On Thu, Jul 15, 2021 at 8:15 AM Alan Maguire alan.maguire@oracle.com wrote:
Add a BTF dumper for typed data, so that the user can dump a typed version of the data provided.
The API is
int btf_dump__dump_type_data(struct btf_dump *d, __u32 id, void *data, size_t data_sz, const struct btf_dump_type_data_opts *opts);
...where the id is the BTF id of the data pointed to by the "void *" argument; for example the BTF id of "struct sk_buff" for a "struct skb *" data pointer. Options supported are
- a starting indent level (indent_lvl)
- a user-specified indent string which will be printed once per indent level; if NULL, tab is chosen but any string <= 32 chars can be provided.
- a set of boolean options to control dump display, similar to those used for BPF helper bpf_snprintf_btf(). Options are - compact : omit newlines and other indentation - skip_names: omit member names - emit_zeroes: show zero-value members
Default output format is identical to that dumped by bpf_snprintf_btf(), for example a "struct sk_buff" representation would look like this:
struct sk_buff){ (union){ (struct){ .next = (struct sk_buff *)0xffffffffffffffff, .prev = (struct sk_buff *)0xffffffffffffffff, (union){ .dev = (struct net_device *)0xffffffffffffffff, .dev_scratch = (long unsigned int)18446744073709551615, }, }, ...
If the data structure is larger than the *data_sz* number of bytes that are available in *data*, as much of the data as possible will be dumped and -E2BIG will be returned. This is useful as tracers will sometimes not be able to capture all of the data associated with a type; for example a "struct task_struct" is ~16k. Being able to specify that only a subset is available is important for such cases. On success, the amount of data dumped is returned.
Signed-off-by: Alan Maguire alan.maguire@oracle.com
Ok, this looks great. I think I found a few residual problems, so please see comments below and address them. But I'm inclined to land this patch set as is because it's in a good shape already, and it is pretty, so it's hard and time-consuming to weed through minor (at this point) changes between versions. So please send follow-up patch(es) with fixes. Hopefully soon enough before the libbpf release. Thanks a lot for working on this and persevering, this is a great API!
I'll apply a patch set to bpf-next when it will open up for new patches. Thanks.
tools/lib/bpf/btf.h | 19 ++ tools/lib/bpf/btf_dump.c | 819 ++++++++++++++++++++++++++++++++++++++++++++++- tools/lib/bpf/libbpf.map | 1 + 3 files changed, 834 insertions(+), 5 deletions(-)
I also wanted to call out this ^^ versus:
a) initial kernel-sharing version:
18 files changed, 3236 insertions(+), 1319 deletions(-)
b) initial libbpf-only version:
6 files changed, 1251 insertions(+), 3 deletions(-)
And the API actually gained in supported features and correctness.
[...]
+union float_data {
long double ld;double d;float f;+};
clever
+static int btf_dump_float_data(struct btf_dump *d,
const struct btf_type *t,__u32 type_id,const void *data)+{
const union float_data *flp = data;union float_data fl;int sz = t->size;/* handle unaligned data; copy to local union */if (((uintptr_t)data) % sz) {memcpy(&fl, data, sz);flp = &fl;}switch (sz) {case 16:btf_dump_type_values(d, "%Lf", flp->ld);break;case 8:btf_dump_type_values(d, "%lf", flp->d);break;case 4:btf_dump_type_values(d, "%f", flp->f);break;default:pr_warn("unexpected size %d for id [%u]\n", sz, type_id);return -EINVAL;}return 0;+}
[...]
+static int btf_dump_array_data(struct btf_dump *d,
const struct btf_type *t,__u32 id,const void *data)+{
const struct btf_array *array = btf_array(t);const struct btf_type *elem_type;__u32 i, elem_size = 0, elem_type_id;bool is_array_member;elem_type_id = array->type;elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL);elem_size = btf__resolve_size(d->btf, elem_type_id);if (elem_size <= 0) {pr_warn("unexpected elem size %d for array type [%u]\n", elem_size, id);return -EINVAL;}if (btf_is_int(elem_type)) {/** BTF_INT_CHAR encoding never seems to be set for* char arrays, so if size is 1 and element is* printable as a char, we'll do that.*/if (elem_size == 1)d->typed_dump->is_array_char = true;}/* note that we increment depth before calling btf_dump_print() below;* this is intentional. btf_dump_data_newline() will not print a* newline for depth 0 (since this leaves us with trailing newlines* at the end of typed display), so depth is incremented first.* For similar reasons, we decrement depth before showing the closing* parenthesis.*/d->typed_dump->depth++;btf_dump_printf(d, "[%s", btf_dump_data_newline(d));/* may be a multidimensional array, so store current "is array member"* status so we can restore it correctly later.*/is_array_member = d->typed_dump->is_array_member;d->typed_dump->is_array_member = true;for (i = 0; i < array->nelems; i++, data += elem_size) {if (d->typed_dump->is_array_terminated)break;
I suspect this logic breaks for multi-dimensional char arrays. Please check and add follow-up tests and fixes, no need to address that in this patch set, you've suffered enough.
btf_dump_dump_type_data(d, NULL, elem_type, elem_type_id, data, 0, 0);}d->typed_dump->is_array_member = is_array_member;d->typed_dump->depth--;btf_dump_data_pfx(d);btf_dump_type_values(d, "]");return 0;+}
+static int btf_dump_struct_data(struct btf_dump *d,
const struct btf_type *t,__u32 id,const void *data)+{
const struct btf_member *m = btf_members(t);__u16 n = btf_vlen(t);int i, err;/* note that we increment depth before calling btf_dump_print() below;* this is intentional. btf_dump_data_newline() will not print a* newline for depth 0 (since this leaves us with trailing newlines* at the end of typed display), so depth is incremented first.* For similar reasons, we decrement depth before showing the closing* parenthesis.*/
ah, ok, I see. I sort of randomly stumbled on this from a purely aesthetic reasons, but I'm happy we clarified this because it's completely non-obvious
d->typed_dump->depth++;btf_dump_printf(d, "{%s", btf_dump_data_newline(d));for (i = 0; i < n; i++, m++) {const struct btf_type *mtype;const char *mname;__u32 moffset;__u8 bit_sz;mtype = btf__type_by_id(d->btf, m->type);mname = btf_name_of(d, m->name_off);moffset = btf_member_bit_offset(t, i);bit_sz = btf_member_bitfield_size(t, i);err = btf_dump_dump_type_data(d, mname, mtype, m->type, data + moffset / 8,moffset % 8, bit_sz);if (err < 0)return err;}d->typed_dump->depth--;btf_dump_data_pfx(d);btf_dump_type_values(d, "}");return err;+}
+static int btf_dump_ptr_data(struct btf_dump *d,
const struct btf_type *t,__u32 id,const void *data)+{
btf_dump_type_values(d, "%p", *(void **)data);
Wait, you fixed pointer zero checking logic and misaligned reads for ints/floats, but none of that for actually printing pointers?... Please send a follow-up fix.
return 0;+}
+static int btf_dump_get_enum_value(struct btf_dump *d,
const struct btf_type *t,const void *data,__u32 id,__s64 *value)+{
int sz = t->size;/* handle unaligned enum value */if (((uintptr_t)data) % sz) {
nit: probably worth a small helper with obvious name to avoid extra comments and all those ((()))
*value = (__s64)btf_dump_bitfield_get_data(d, t, data, 0, 0);return 0;}
[...]
elem_type_id = array->type;elem_size = btf__resolve_size(d->btf, elem_type_id);elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL);ischar = btf_is_int(elem_type) && elem_size == 1;/* check all elements; if _any_ element is nonzero, all* of array is displayed. We make an exception however* for char arrays where the first element is 0; these* are considered zeroed also, even if later elements are* non-zero because the string is terminated.*/for (i = 0; i < array->nelems; i++) {if (i == 0 && ischar && *(char *)data == 0)return -ENODATA;
same here, this might be too aggressive for something like char a[2][10] ?
err = btf_dump_type_data_check_zero(d, elem_type,elem_type_id,data +(i * elem_size),bits_offset, 0);if (err != -ENODATA)return err;}return -ENODATA;}case BTF_KIND_STRUCT:case BTF_KIND_UNION: {const struct btf_member *m = btf_members(t);__u16 n = btf_vlen(t);/* if any struct/union member is non-zero, the struct/union* is considered non-zero and dumped.*/for (i = 0; i < n; i++, m++) {const struct btf_type *mtype;__u32 moffset;mtype = btf__type_by_id(d->btf, m->type);moffset = btf_member_bit_offset(t, i);/* btf_int_bits() does not store member bitfield size;* bitfield size needs to be stored here so int display* of member can retrieve it.*/bit_sz = btf_member_bitfield_size(t, i);err = btf_dump_type_data_check_zero(d, mtype, m->type, data + moffset / 8,moffset % 8, bit_sz);if (err != ENODATA)return err;}return -ENODATA;}case BTF_KIND_ENUM:if (btf_dump_get_enum_value(d, t, data, id, &value))return 0;
why not propagating error here?
if (value == 0)return -ENODATA;return 0;default:return 0;}+}
[...]
case BTF_KIND_ARRAY:err = btf_dump_array_data(d, t, id, data);break;case BTF_KIND_STRUCT:case BTF_KIND_UNION:err = btf_dump_struct_data(d, t, id, data);break;case BTF_KIND_ENUM:/* handle bitfield and int enum values */if (bit_sz) {unsigned __int128 print_num;__s64 enum_val;print_num = btf_dump_bitfield_get_data(d, t, data, bits_offset, bit_sz);enum_val = (__s64)print_num;err = btf_dump_enum_data(d, t, id, &enum_val);
this is broken on big-endian, no? Basically almost always it will be printing either 0, -1 or 0xffffffff?..
} elseerr = btf_dump_enum_data(d, t, id, data);break;case BTF_KIND_VAR:err = btf_dump_var_data(d, t, id, data);break;case BTF_KIND_DATASEC:err = btf_dump_datasec_data(d, t, id, data);break;default:pr_warn("unexpected kind [%u] for id [%u]\n",BTF_INFO_KIND(t->info), id);return -EINVAL;}if (err < 0)return err;return size;+}
+int btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
const void *data, size_t data_sz,const struct btf_dump_type_data_opts *opts)+{
const struct btf_type *t;int ret;if (!OPTS_VALID(opts, btf_dump_type_data_opts))return libbpf_err(-EINVAL);t = btf__type_by_id(d->btf, id);if (!t)return libbpf_err(-ENOENT);d->typed_dump = calloc(1, sizeof(struct btf_dump_data));
just realized this doesn't have to be calloc()'ed, it can be on the stack zero-initialized variable; feel free to switch in the follow up as well
if (!d->typed_dump)return libbpf_err(-ENOMEM);
then we won't need to handle this at all
d->typed_dump->data_end = data + data_sz;d->typed_dump->indent_lvl = OPTS_GET(opts, indent_level, 0);/* default indent string is a tab */if (!opts->indent_str)d->typed_dump->indent_str[0] = '\t';
[...]