The BTF dumper code currently displays arrays of characters as just that - arrays, with each character formatted individually. Sometimes this is what makes sense, but it's nice to be able to treat that array as a string.
This change adds a special case to the btf_dump functionality to allow arrays of single-byte integer values to be printed as character strings. Characters for which isprint() returns false are printed as hex-escaped values. This is enabled when the new ".emit_strings" is set to 1 in the btf_dump_type_data_opts structure.
As an example, here's what it looks like to dump the string "hello" using a few different field values for btf_dump_type_data_opts (.compact = 1):
- .emit_strings = 0, .skip_names = 0: (char[6])['h','e','l','l','o',] - .emit_strings = 0, .skip_names = 1: ['h','e','l','l','o',] - .emit_strings = 1, .skip_names = 0: (char[6])"hello" - .emit_strings = 1, .skip_names = 1: "hello"
Here's the string "h\xff", dumped with .compact = 1 and .skip_names = 1:
- .emit_strings = 0: ['h',-1,] - .emit_strings = 1: "h\xff"
Signed-off-by: Blake Jones blakejones@google.com --- tools/lib/bpf/btf.h | 3 ++- tools/lib/bpf/btf_dump.c | 44 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 2 deletions(-)
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 4392451d634b..ccfd905f03df 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -326,9 +326,10 @@ struct btf_dump_type_data_opts { bool compact; /* no newlines/indentation */ bool skip_names; /* skip member/type names */ bool emit_zeroes; /* show 0-valued fields */ + bool emit_strings; /* print char arrays as strings */ size_t :0; }; -#define btf_dump_type_data_opts__last_field emit_zeroes +#define btf_dump_type_data_opts__last_field emit_strings
LIBBPF_API int btf_dump__dump_type_data(struct btf_dump *d, __u32 id, diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 460c3e57fadb..336a6646e0fa 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -68,6 +68,7 @@ struct btf_dump_data { bool compact; bool skip_names; bool emit_zeroes; + bool emit_strings; __u8 indent_lvl; /* base indent level */ char indent_str[BTF_DATA_INDENT_STR_LEN]; /* below are used during iteration */ @@ -2028,6 +2029,43 @@ static int btf_dump_var_data(struct btf_dump *d, return btf_dump_dump_type_data(d, NULL, t, type_id, data, 0, 0); }
+static int btf_dump_string_data(struct btf_dump *d, + const struct btf_type *t, + __u32 id, + const void *data) +{ + const struct btf_array *array = btf_array(t); + __u32 i; + + btf_dump_data_pfx(d); + btf_dump_printf(d, """); + + for (i = 0; i < array->nelems; i++, data++) { + char c; + + if (data >= d->typed_dump->data_end) + return -E2BIG; + + c = *(char *)data; + if (c == '\0') { + /* + * When printing character arrays as strings, NUL bytes + * are always treated as string terminators; they are + * never printed. + */ + break; + } + if (isprint(c)) + btf_dump_printf(d, "%c", c); + else + btf_dump_printf(d, "\x%02x", *(__u8 *)data); + } + + btf_dump_printf(d, """); + + return 0; +} + static int btf_dump_array_data(struct btf_dump *d, const struct btf_type *t, __u32 id, @@ -2055,8 +2093,11 @@ static int btf_dump_array_data(struct btf_dump *d, * char arrays, so if size is 1 and element is * printable as a char, we'll do that. */ - if (elem_size == 1) + if (elem_size == 1) { + if (d->typed_dump->emit_strings) + return btf_dump_string_data(d, t, id, data); d->typed_dump->is_array_char = true; + } }
/* note that we increment depth before calling btf_dump_print() below; @@ -2544,6 +2585,7 @@ int btf_dump__dump_type_data(struct btf_dump *d, __u32 id, d->typed_dump->compact = OPTS_GET(opts, compact, false); d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false); d->typed_dump->emit_zeroes = OPTS_GET(opts, emit_zeroes, false); + d->typed_dump->emit_strings = OPTS_GET(opts, emit_strings, false);
ret = btf_dump_dump_type_data(d, NULL, t, id, data, 0, 0);
When this mode is turned on, "emit_zeroes" and "compact" have no effect, and embedded NUL characters always terminate printing of an array.
Signed-off-by: Blake Jones blakejones@google.com --- .../selftests/bpf/prog_tests/btf_dump.c | 106 ++++++++++++++++++ 1 file changed, 106 insertions(+)
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index c0a776feec23..2fde118d04c8 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -879,6 +879,110 @@ static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d, "static int bpf_cgrp_storage_busy = (int)2", 2); }
+struct btf_dump_string_ctx { + struct btf *btf; + struct btf_dump *d; + char *str; + struct btf_dump_type_data_opts *opts; + int array_id; +}; + +static int btf_dump_one_string(struct btf_dump_string_ctx *ctx, + char *ptr, size_t ptr_sz, + const char *expected_val) +{ + size_t type_sz; + int ret; + + ctx->str[0] = '\0'; + type_sz = btf__resolve_size(ctx->btf, ctx->array_id); + ret = btf_dump__dump_type_data(ctx->d, ctx->array_id, ptr, ptr_sz, ctx->opts); + if (type_sz <= ptr_sz) { + if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz")) + return -EINVAL; + } else { + if (!ASSERT_EQ(ret, -E2BIG, "failed to return -E2BIG")) + return -EINVAL; + } + if (!ASSERT_STREQ(ctx->str, expected_val, "ensure expected/actual match")) + return -EFAULT; + return 0; +} + +static void btf_dump_strings(struct btf_dump_string_ctx *ctx) +{ + struct btf_dump_type_data_opts *opts = ctx->opts; + + opts->emit_strings = true; + + opts->compact = true; + opts->emit_zeroes = false; + + opts->skip_names = false; + btf_dump_one_string(ctx, "foo", 4, "(char[4])"foo""); + + opts->skip_names = true; + btf_dump_one_string(ctx, "foo", 4, ""foo""); + + /* This should have no effect. */ + opts->emit_zeroes = false; + btf_dump_one_string(ctx, "foo", 4, ""foo""); + + /* This should have no effect. */ + opts->compact = false; + btf_dump_one_string(ctx, "foo", 4, ""foo""); + + /* Non-printable characters come out as hex. */ + btf_dump_one_string(ctx, "fo\xff", 4, ""fo\xff""); + btf_dump_one_string(ctx, "fo\x7", 4, ""fo\x07""); + + /* Should get printed properly even though there's no NUL. */ + char food[4] = { 'f', 'o', 'o', 'd' }; + + btf_dump_one_string(ctx, food, 4, ""food""); + + /* The embedded NUL should terminate the string. */ + char embed[4] = { 'f', 'o', '\0', 'd' }; + + btf_dump_one_string(ctx, embed, 4, ""fo""); +} + +static void test_btf_dump_string_data(void) +{ + struct test_ctx t = {}; + char str[STRSIZE]; + struct btf_dump *d; + DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts); + struct btf_dump_string_ctx ctx; + int char_id, int_id, array_id; + + if (test_ctx__init(&t)) + return; + + d = btf_dump__new(t.btf, btf_dump_snprintf, str, NULL); + if (!ASSERT_OK_PTR(d, "could not create BTF dump")) + return; + + /* Generate BTF for a four-element char array. */ + char_id = btf__add_int(t.btf, "char", 1, BTF_INT_CHAR); + ASSERT_EQ(char_id, 1, "char_id"); + int_id = btf__add_int(t.btf, "int", 4, BTF_INT_SIGNED); + ASSERT_EQ(int_id, 2, "int_id"); + array_id = btf__add_array(t.btf, int_id, char_id, 4); + ASSERT_EQ(array_id, 3, "array_id"); + + ctx.btf = t.btf; + ctx.d = d; + ctx.str = str; + ctx.opts = &opts; + ctx.array_id = array_id; + + btf_dump_strings(&ctx); + + btf_dump__free(d); + test_ctx__free(&t); +} + static void test_btf_datasec(struct btf *btf, struct btf_dump *d, char *str, const char *name, const char *expected_val, void *data, size_t data_sz) @@ -970,6 +1074,8 @@ void test_btf_dump() { test_btf_dump_struct_data(btf, d, str); if (test__start_subtest("btf_dump: var_data")) test_btf_dump_var_data(btf, d, str); + if (test__start_subtest("btf_dump: string_data")) + test_btf_dump_string_data(); btf_dump__free(d); btf__free(btf);
On Mon, Jun 02, 2025 at 09:48:12PM -0700, Blake Jones wrote:
SNIP
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 460c3e57fadb..336a6646e0fa 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -68,6 +68,7 @@ struct btf_dump_data { bool compact; bool skip_names; bool emit_zeroes;
- bool emit_strings; __u8 indent_lvl; /* base indent level */ char indent_str[BTF_DATA_INDENT_STR_LEN]; /* below are used during iteration */
@@ -2028,6 +2029,43 @@ static int btf_dump_var_data(struct btf_dump *d, return btf_dump_dump_type_data(d, NULL, t, type_id, data, 0, 0); } +static int btf_dump_string_data(struct btf_dump *d,
const struct btf_type *t,
__u32 id,
const void *data)
+{
- const struct btf_array *array = btf_array(t);
- __u32 i;
- btf_dump_data_pfx(d);
- btf_dump_printf(d, """);
- for (i = 0; i < array->nelems; i++, data++) {
char c;
if (data >= d->typed_dump->data_end)
return -E2BIG;
curious, is this just string array without null terminating byte? should we just print " and return 0 instead of E2BIG error ?
thanks, jirka
c = *(char *)data;
if (c == '\0') {
/*
* When printing character arrays as strings, NUL bytes
* are always treated as string terminators; they are
* never printed.
*/
break;
}
if (isprint(c))
btf_dump_printf(d, "%c", c);
else
btf_dump_printf(d, "\\x%02x", *(__u8 *)data);
- }
- btf_dump_printf(d, """);
- return 0;
+}
static int btf_dump_array_data(struct btf_dump *d, const struct btf_type *t, __u32 id, @@ -2055,8 +2093,11 @@ static int btf_dump_array_data(struct btf_dump *d, * char arrays, so if size is 1 and element is * printable as a char, we'll do that. */
if (elem_size == 1)
if (elem_size == 1) {
if (d->typed_dump->emit_strings)
return btf_dump_string_data(d, t, id, data); d->typed_dump->is_array_char = true;
}}
/* note that we increment depth before calling btf_dump_print() below; @@ -2544,6 +2585,7 @@ int btf_dump__dump_type_data(struct btf_dump *d, __u32 id, d->typed_dump->compact = OPTS_GET(opts, compact, false); d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false); d->typed_dump->emit_zeroes = OPTS_GET(opts, emit_zeroes, false);
- d->typed_dump->emit_strings = OPTS_GET(opts, emit_strings, false);
ret = btf_dump_dump_type_data(d, NULL, t, id, data, 0, 0); -- 2.49.0.1204.g71687c7c1d-goog
On Tue, Jun 3, 2025 at 6:18 AM Jiri Olsa olsajiri@gmail.com wrote:
+static int btf_dump_string_data(struct btf_dump *d,
const struct btf_type *t,
__u32 id,
const void *data)
+{
const struct btf_array *array = btf_array(t);
__u32 i;
btf_dump_data_pfx(d);
btf_dump_printf(d, "\"");
for (i = 0; i < array->nelems; i++, data++) {
char c;
if (data >= d->typed_dump->data_end)
return -E2BIG;
curious, is this just string array without null terminating byte? should we just print " and return 0 instead of E2BIG error ?
Good question. That E2BIG error would happen, for example, if we tried to print the array "{ 'a', 'b', 'c' }" when the type was "char[4]". I'd say your proposed behavior would be consistent with the semantic of ".emit_strings should display strings in an intuitively useful way", and I'd be in favor of doing that (replacing "return -E2BIG" with "break"). If others agree (specifically Andrii, who had comments about the semantics yesterday), I'll make that change.
Blake
On Tue, Jun 3, 2025 at 8:39 AM Blake Jones blakejones@google.com wrote:
On Tue, Jun 3, 2025 at 6:18 AM Jiri Olsa olsajiri@gmail.com wrote:
+static int btf_dump_string_data(struct btf_dump *d,
const struct btf_type *t,
__u32 id,
const void *data)
+{
const struct btf_array *array = btf_array(t);
__u32 i;
btf_dump_data_pfx(d);
btf_dump_printf(d, "\"");
for (i = 0; i < array->nelems; i++, data++) {
char c;
if (data >= d->typed_dump->data_end)
return -E2BIG;
curious, is this just string array without null terminating byte? should we just print " and return 0 instead of E2BIG error ?
Good question. That E2BIG error would happen, for example, if we tried to print the array "{ 'a', 'b', 'c' }" when the type was "char[4]".
Exactly, data is truncated, we have to return E2BIG. But I think that is checked earlier with btf_dump_type_data_check_overflow(), so we probably don't need to do this here?
Please add tests with truncated data so we know for sure?
I'd say your proposed behavior would be consistent with the semantic of ".emit_strings should display strings in an intuitively useful way",
It still should follow the overall contract, so I think E2BIG is justified for truncated data.
But there is also a bit of a quirk. If a string is not zero-terminated, we actually don't distinguish it in any way. Would it make sense to detect this and still print it as an array of individual characters? It's clearly not a valid C string at that point, so emit_strings doesn't have to apply. WDYT? The implementation would be simple -- find zero in an array, if found - emit everything up to that point as string, if not - emit character array?
and I'd be in favor of doing that (replacing "return -E2BIG" with "break"). If others agree (specifically Andrii, who had comments about the semantics yesterday), I'll make that change.
Blake
Hi Andrii,
On Tue, Jun 3, 2025 at 11:39 AM Andrii Nakryiko andrii.nakryiko@gmail.com wrote:
Good question. That E2BIG error would happen, for example, if we tried to print the array "{ 'a', 'b', 'c' }" when the type was "char[4]".
Exactly, data is truncated, we have to return E2BIG. But I think that is checked earlier with btf_dump_type_data_check_overflow(), so we probably don't need to do this here?
btf_dump_type_data_check_overflow() only looks at INT, FLOAT, PTR, ENUM, and ENUM64 types: https://elixir.bootlin.com/linux/v6.15/source/tools/lib/bpf/btf_dump.c#L2304...
So we still need to do this manually for this ARRAY type.
Please add tests with truncated data so we know for sure?
I've added tests; see below.
I'd say your proposed behavior would be consistent with the semantic of ".emit_strings should display strings in an intuitively useful way",
It still should follow the overall contract, so I think E2BIG is justified for truncated data.
But there is also a bit of a quirk. If a string is not zero-terminated, we actually don't distinguish it in any way. Would it make sense to detect this and still print it as an array of individual characters? It's clearly not a valid C string at that point, so emit_strings doesn't have to apply. WDYT? The implementation would be simple -- find zero in an array, if found - emit everything up to that point as string, if not - emit character array?
I don't have strong feelings one way or another, so I've just implemented this. btf_dump_array_data() now keeps going and does its current behavior if btf_dump_string_data() hit an error. In practice, btf_dump_array_data() does *not* return E2BIG if the provided array is too big for the type; it just displays the first N elements of the array and then returns. I don't plan to change this behavior.
Updated patches coming shortly.
Blake
linux-kselftest-mirror@lists.linaro.org