On Thu, Nov 18, 2021 at 09:18:52AM +0100, Peter Zijlstra wrote:
On Thu, Nov 18, 2021 at 09:06:27AM +0100, Peter Zijlstra wrote:
On Wed, Nov 17, 2021 at 03:50:17PM -0800, Linus Torvalds wrote:
I really don't think the WCHAN code should use unwinders at all. It's too damn fragile, and it's too easily triggered from user space.
On x86, esp. with ORC, it pretty much has to. The thing is, the ORC unwinder has been very stable so far. I'm guessing there's some really stupid thing going on, like for example trying to unwind a freed stack.
I *just* managed to reproduce, so let me go have a poke.
Confirmed, with the below it no longer reproduces. Now, let me go undo that and fix the unwinder to not explode while trying to unwind nothing.
OK, so the bug is firmly with 5d1ceb3969b6 ("x86: Fix __get_wchan() for !STACKTRACE") which lost the try_get_task_stack() that stack_trace_*() does.
We can ofc trivially re-instate that, but I'm now running with the below which I suppose is a better fix, hmm?
(obv I still need to look a the other two unwinders)
--- diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index e6f7592790af..9261ff1343cf 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -352,8 +352,14 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr, if (!stack_access_ok(state, addr, sizeof(long))) return false;
- *val = READ_ONCE_NOCHECK(*(unsigned long *)addr); + pagefault_disable(); + __get_kernel_nofault(val, addr, unsigned long, Efault); + pagefault_enable(); return true; + +Efault: + pagefault_enable(); + return false; }
static bool deref_stack_regs(struct unwind_state *state, unsigned long addr, @@ -367,9 +373,16 @@ static bool deref_stack_regs(struct unwind_state *state, unsigned long addr, if (!stack_access_ok(state, addr, sizeof(struct pt_regs))) return false;
- *ip = READ_ONCE_NOCHECK(regs->ip); - *sp = READ_ONCE_NOCHECK(regs->sp); + pagefault_disable(); + __get_kernel_nofault(ip, ®s->ip, unsigned long, Efault); + __get_kernel_nofault(sp, ®s->sp, unsigned long, Efault); + pagefault_enable(); + return true; + +Efault: + pagefault_enable(); + return false; }
static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr, @@ -380,9 +393,16 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr if (!stack_access_ok(state, addr, IRET_FRAME_SIZE)) return false;
- *ip = READ_ONCE_NOCHECK(regs->ip); - *sp = READ_ONCE_NOCHECK(regs->sp); + pagefault_disable(); + __get_kernel_nofault(ip, ®s->ip, unsigned long, Efault); + __get_kernel_nofault(sp, ®s->sp, unsigned long, Efault); + pagefault_enable(); + return true; + +Efault: + pagefault_enable(); + return false; }
/* @@ -396,22 +416,27 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr static bool get_reg(struct unwind_state *state, unsigned int reg_off, unsigned long *val) { - unsigned int reg = reg_off/8; - if (!state->regs) return false;
+ pagefault_disable(); if (state->full_regs) { - *val = READ_ONCE_NOCHECK(((unsigned long *)state->regs)[reg]); + __get_kernel_nofault(val, (void *)state->regs + reg_off, unsigned long, Efault); + pagefault_enable(); return true; }
if (state->prev_regs) { - *val = READ_ONCE_NOCHECK(((unsigned long *)state->prev_regs)[reg]); + __get_kernel_nofault(val, (void *)state->regs + reg_off, unsigned long, Efault); + pagefault_enable(); return true; }
return false; + +Efault: + pagefault_enable(); + return false; }
bool unwind_next_frame(struct unwind_state *state) @@ -673,8 +698,12 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, struct inactive_task_frame *frame = (void *)task->thread.sp;
state->sp = task->thread.sp + sizeof(*frame); - state->bp = READ_ONCE_NOCHECK(frame->bp); - state->ip = READ_ONCE_NOCHECK(frame->ret_addr); + + pagefault_disable(); + __get_kernel_nofault(&state->bp, &frame->bp, unsigned long, Efault); + __get_kernel_nofault(&state->ip, &frame->ret_addr, unsigned long, Efault); + pagefault_enable(); + state->signal = (void *)state->ip == ret_from_fork; }
@@ -713,6 +742,8 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
return;
+Efault: + pagefault_enable(); err: state->error = true; the_end: