[PATCH 6.4 111/165] bpf: Disable preemption in bpf_perf_event_output

9 Aug 2023

From: Jiri Olsa jolsa@kernel.org
commit f2c67a3e60d1071b65848efaa8c3b66c363dd025 upstream.
The nesting protection in bpf_perf_event_output relies on disabled
preemption, which is guaranteed for kprobes and tracepoints.
However bpf_perf_event_output can be also called from uprobes context
through bpf_prog_run_array_sleepable function which disables migration,
but keeps preemption enabled.
This can cause task to be preempted by another one inside the nesting
protection and lead eventually to two tasks using same perf_sample_data
buffer and cause crashes like:
kernel tried to execute NX-protected page - exploit attempt? (uid: 0)
  BUG: unable to handle page fault for address: ffffffff82be3eea
  ...
  Call Trace:
   ? __die+0x1f/0x70
   ? page_fault_oops+0x176/0x4d0
   ? exc_page_fault+0x132/0x230
   ? asm_exc_page_fault+0x22/0x30
   ? perf_output_sample+0x12b/0x910
   ? perf_event_output+0xd0/0x1d0
   ? bpf_perf_event_output+0x162/0x1d0
   ? bpf_prog_c6271286d9a4c938_krava1+0x76/0x87
   ? __uprobe_perf_func+0x12b/0x540
   ? uprobe_dispatcher+0x2c4/0x430
   ? uprobe_notify_resume+0x2da/0xce0
   ? atomic_notifier_call_chain+0x7b/0x110
   ? exit_to_user_mode_prepare+0x13e/0x290
   ? irqentry_exit_to_user_mode+0x5/0x30
   ? asm_exc_int3+0x35/0x40
Fixing this by disabling preemption in bpf_perf_event_output.
Cc: stable@vger.kernel.org
Fixes: 8c7dcb84e3b7 ("bpf: implement sleepable uprobes by chaining gps")
Acked-by: Hou Tao houtao1@huawei.com
Signed-off-by: Jiri Olsa jolsa@kernel.org
Link: https://lore.kernel.org/r/20230725084206.580930-2-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov ast@kernel.org
Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org
---
 kernel/trace/bpf_trace.c |   11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -661,8 +661,7 @@ static DEFINE_PER_CPU(int, bpf_trace_nes
 BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
       u64, flags, void *, data, u64, size)
 {
-	struct bpf_trace_sample_data *sds = this_cpu_ptr(&bpf_trace_sds);
-	int nest_level = this_cpu_inc_return(bpf_trace_nest_level);
+	struct bpf_trace_sample_data *sds;
    struct perf_raw_record raw = {
    	.frag = {
    		.size = size,
@@ -670,7 +669,11 @@ BPF_CALL_5(bpf_perf_event_output, struct
    	},
    };
    struct perf_sample_data *sd;
-	int err;
+	int nest_level, err;
+
+	preempt_disable();
+	sds = this_cpu_ptr(&bpf_trace_sds);
+	nest_level = this_cpu_inc_return(bpf_trace_nest_level);
if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) {
    	err = -EBUSY;
@@ -688,9 +691,9 @@ BPF_CALL_5(bpf_perf_event_output, struct
    perf_sample_save_raw_data(sd, &raw);
err = __bpf_perf_event_output(regs, map, flags, sd);
-
 out:
    this_cpu_dec(bpf_trace_nest_level);
+	preempt_enable();
    return err;
 }

    

2026

2025

2024

2023

2022

2021

2020

2019

2018

2017

[PATCH 6.4 111/165] bpf: Disable preemption in bpf_perf_event_output