Thanks for review, Andrii.
One question, should I add bpf_{per, this}_cpu_ptr() to the bpf_base_func_proto() in kernel/bpf/helpers.c?
On Fri, Sep 4, 2020 at 1:04 PM Andrii Nakryiko andrii.nakryiko@gmail.com wrote:
On Thu, Sep 3, 2020 at 3:35 PM Hao Luo haoluo@google.com wrote:
Add bpf_per_cpu_ptr() to help bpf programs access percpu vars. bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the kernel except that it may return NULL. This happens when the cpu parameter is out of range. So the caller must check the returned value.
Acked-by: Andrii Nakryiko andriin@fb.com Signed-off-by: Hao Luo haoluo@google.com
include/linux/bpf.h | 3 ++ include/linux/btf.h | 11 ++++++ include/uapi/linux/bpf.h | 17 +++++++++ kernel/bpf/btf.c | 10 ------ kernel/bpf/verifier.c | 66 +++++++++++++++++++++++++++++++--- kernel/trace/bpf_trace.c | 18 ++++++++++ tools/include/uapi/linux/bpf.h | 17 +++++++++ 7 files changed, 128 insertions(+), 14 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c6d9f2c444f4..6b2034f7665e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -292,6 +292,7 @@ enum bpf_arg_type { ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */ ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */ ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */
ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */
};
/* type of values returned from helper functions */ @@ -305,6 +306,7 @@ enum bpf_return_type { RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */ RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */ RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */
RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */
};
/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs @@ -385,6 +387,7 @@ enum bpf_reg_type { PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */ PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */ PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
PTR_TO_PERCPU_BTF_ID, /* reg points to percpu kernel type */
};
/* The information passed from prog-specific *_is_valid_access diff --git a/include/linux/btf.h b/include/linux/btf.h index 592373d359b9..07b7de1c05b0 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -71,6 +71,11 @@ btf_resolve_size(const struct btf *btf, const struct btf_type *type, i < btf_type_vlen(struct_type); \ i++, member++)
+#define for_each_vsi(i, struct_type, member) \
datasec_type?
Hmmm, right. It seems to come when copy-pasted from "for_each_member".
for (i = 0, member = btf_type_var_secinfo(struct_type); \
i < btf_type_vlen(struct_type); \
i++, member++)
static inline bool btf_type_is_ptr(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_PTR; @@ -155,6 +160,12 @@ static inline const struct btf_member *btf_type_member(const struct btf_type *t) return (const struct btf_member *)(t + 1); }
+static inline const struct btf_var_secinfo *btf_type_var_secinfo(
const struct btf_type *t)
+{
return (const struct btf_var_secinfo *)(t + 1);
+}
#ifdef CONFIG_BPF_SYSCALL const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ab00ad9b32e5..d0ec94d5bdbf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3596,6 +3596,22 @@ union bpf_attr {
the data in *dst*. This is a wrapper of copy_from_user().
Return
0 on success, or a negative error in case of failure.
- void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu)
Description
Take a pointer to a percpu ksym, *percpu_ptr*, and return a
pointer to the percpu kernel variable on *cpu*. A ksym is an
extern variable decorated with '__ksym'. For ksym, there is a
global var (either static or global) defined of the same name
in the kernel. The ksym is percpu if the global var is percpu.
The returned pointer points to the global percpu var on *cpu*.
bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the
kernel, except that bpf_per_cpu_ptr() may return NULL. This
happens if *cpu* is larger than nr_cpu_ids. The caller of
bpf_per_cpu_ptr() must check the returned value.
Return
A generic pointer pointing to the kernel percpu variable on *cpu*.
Or NULL, if *cpu* is invalid.
Ack.
*/ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3747,6 +3763,7 @@ union bpf_attr { FN(inode_storage_delete), \ FN(d_path), \ FN(copy_from_user), \
FN(bpf_per_cpu_ptr), \ /* */
[...]
@@ -4003,6 +4008,15 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, if (type != expected_type) goto err_type; }
} else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
expected_type = PTR_TO_PERCPU_BTF_ID;
if (type != expected_type)
goto err_type;
if (!reg->btf_id) {
verbose(env, "Helper has invalid btf_id in R%d\n", regno);
return -EACCES;
}
meta->ret_btf_id = reg->btf_id; } else if (arg_type == ARG_PTR_TO_BTF_ID) { bool ids_match = false;
@@ -5002,6 +5016,30 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; regs[BPF_REG_0].id = ++env->id_gen; regs[BPF_REG_0].mem_size = meta.mem_size;
} else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL) {
Given this is internal implementation detail, this return type is fine, but I'm wondering if it would be better to just make PTR_TO_BTF_ID to allow not just structs? E.g., if we have an int, just allow reading those 4 bytes.
Not sure what the implications are in terms of implementation, but conceptually that shouldn't be a problem, given we do have BTF type ID describing size and all.
Yeah. Totally agree. I looked at it initially. My take is PTR_TO_BTF_ID is meant for struct types. It required some code refactoring to break this assumption. I can add it to my TODO list and investigate it if this makes more sense.
const struct btf_type *t;
mark_reg_known_zero(env, regs, BPF_REG_0);
t = btf_type_skip_modifiers(btf_vmlinux, meta.ret_btf_id, NULL);
if (!btf_type_is_struct(t)) {
u32 tsize;
const struct btf_type *ret;
const char *tname;
/* resolve the type size of ksym. */
ret = btf_resolve_size(btf_vmlinux, t, &tsize);
if (IS_ERR(ret)) {
tname = btf_name_by_offset(btf_vmlinux, t->name_off);
verbose(env, "unable to resolve the size of type '%s': %ld\n",
tname, PTR_ERR(ret));
return -EINVAL;
}
regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
regs[BPF_REG_0].mem_size = tsize;
} else {
regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL;
regs[BPF_REG_0].btf_id = meta.ret_btf_id;
} } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) { int ret_btf_id;
[...]
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b2a5380eb187..d474c1530f87 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1144,6 +1144,22 @@ static const struct bpf_func_proto bpf_d_path_proto = { .allowed = bpf_d_path_allowed, };
+BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu) +{
if (cpu >= nr_cpu_ids)
return 0;
return (u64)per_cpu_ptr(ptr, cpu);
not sure, but on 32-bit arches this might cause compilation warning, case to (unsigned long) instead?
Ah, I see, good catch! Will fix, thanks.
+}
+static const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
.func = bpf_per_cpu_ptr,
.gpl_only = false,
.ret_type = RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL,
.arg1_type = ARG_PTR_TO_PERCPU_BTF_ID,
.arg2_type = ARG_ANYTHING,
+};
[...]