From: Xu Kuohai xukuohai@huawei.com
After commit 3f50f132d840 ("bpf: Verifier, do explicit ALU32 bounds tracking"), the following bpf prog is rejected:
0: (61) r2 = *(u32 *)(r1 +0) ; R2_w=pkt(off=0,r=0,imm=0) 1: (61) r3 = *(u32 *)(r1 +4) ; R3_w=pkt_end(off=0,imm=0) 2: (bf) r1 = r2 3: (07) r1 += 1 4: (2d) if r1 > r3 goto pc+8 5: (71) r1 = *(u8 *)(r2 +0) ; R1_w=scalar(umax=255,var_off=(0x0; 0xff)) 6: (18) r0 = 0x7fffffffffffff10 8: (0f) r1 += r0 ; R1_w=scalar(umin=0x7fffffffffffff10,umax=0x800000000000000f) 9: (18) r0 = 0x8000000000000000 11: (07) r0 += 1 12: (ad) if r0 < r1 goto pc-2 13: (b7) r0 = 0 14: (95) exit
And the verifier log says:
[...]
from 12 to 11: R0_w=-9223372036854775794 R1=scalar(umin=9223372036854775823,umax=9223372036854775823,var_off=(0x8000000000000000; 0xffffffff)) 11: (07) r0 += 1 ; R0_w=-9223372036854775793 12: (ad) if r0 < r1 goto pc-2 ; R0_w=-9223372036854775793 R1=scalar(umin=9223372036854775823,umax=9223372036854775823,var_off=(0x8000000000000000; 0xffffffff)) 13: safe
from 12 to 11: R0_w=-9223372036854775793 R1=scalar(umin=9223372036854775824,umax=9223372036854775823,var_off=(0x8000000000000000; 0xffffffff)) 11: (07) r0 += 1 ; R0_w=-9223372036854775792 12: (ad) if r0 < r1 goto pc-2 ; R0_w=-9223372036854775792 R1=scalar(umin=9223372036854775824,umax=9223372036854775823,var_off=(0x8000000000000000; 0xffffffff)) 13: safe
[...]
What can be seen here is that r1->umin grows blindly and becomes bigger than r1->umax. The reason is because the loop does not terminate, when r0 increases to r1->umax_value, the following code in reg_set_min_max() sets r1->umin_value to r1->umax_value + 1 blindly:
case BPF_JGT: { if (is_jmp32) { [...] } else { u64 false_umax = opcode == BPF_JGT ? val : val - 1; u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
false_reg->umax_value = min(false_reg->umax_value, false_umax); true_reg->umin_value = max(true_reg->umin_value, true_umin); } break; }
Why the loop does not terminate is because tnum_is_const(src_reg->var_off) always returns false, causing is_branch_taken() to be skipped:
if (src_reg->type == SCALAR_VALUE && !is_jmp32 && tnum_is_const(src_reg->var_off)) { pred = is_branch_taken(dst_reg, // could not reach here src_reg->var_off.value, opcode, is_jmp32); }
Why tnum_is_const(src_reg->var_off) always returns false is because r1->umin_value starts increasing from 0x7fffffffffffff10, always bigger than U32_MAX, causing the __reg_combine_64_into_32() to mark the lower 32 bits unbounded, i.e. not a constant.
To fix it: 1. avoid increasing reg lower bound to a value bigger than the upper bound, or decreasing reg upper bound to a value smaller than the lower bound. 2. set 32-bit min/max values to the lower 32 bits of the 64-bit min/max values when the 64-bit min/max values are equal.
Fixes: 3f50f132d840 ("bpf: Verifier, do explicit ALU32 bounds tracking") Signed-off-by: Xu Kuohai xukuohai@huawei.com --- kernel/bpf/verifier.c | 143 +++++++++++++++++++++++++++--------------- 1 file changed, 93 insertions(+), 50 deletions(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2bbd89279070..b775b50353d6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2223,14 +2223,21 @@ static bool __reg64_bound_u32(u64 a)
static void __reg_combine_64_into_32(struct bpf_reg_state *reg) { + s64 smin = reg->smin_value; + s64 smax = reg->smax_value; + u64 umin = reg->umin_value; + u64 umax = reg->umax_value; + __mark_reg32_unbounded(reg); - if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) { - reg->s32_min_value = (s32)reg->smin_value; - reg->s32_max_value = (s32)reg->smax_value; + if ((__reg64_bound_s32(smin) && __reg64_bound_s32(smax)) || + smin == smax) { + reg->s32_min_value = (s32)smin; + reg->s32_max_value = (s32)smax; } - if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) { - reg->u32_min_value = (u32)reg->umin_value; - reg->u32_max_value = (u32)reg->umax_value; + if ((__reg64_bound_u32(umin) && __reg64_bound_u32(umax)) || + umin == umax) { + reg->u32_min_value = (u32)umin; + reg->u32_max_value = (u32)umax; } reg_bounds_sync(reg); } @@ -12828,6 +12835,62 @@ static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg, return -1; }
+static void reg_inc_u32_min(struct bpf_reg_state *reg, u32 val) +{ + reg->u32_min_value = max(reg->u32_min_value, val); + if (reg->u32_min_value > reg->u32_max_value) + reg->u32_min_value = reg->u32_max_value; +} + +static void reg_dec_u32_max(struct bpf_reg_state *reg, u32 val) +{ + reg->u32_max_value = min(reg->u32_max_value, val); + if (reg->u32_max_value < reg->u32_min_value) + reg->u32_max_value = reg->u32_min_value; +} + +static void reg_inc_s32_min(struct bpf_reg_state *reg, s32 val) +{ + reg->s32_min_value = max(reg->s32_min_value, val); + if (reg->s32_min_value > reg->s32_max_value) + reg->s32_min_value = reg->s32_max_value; +} + +static void reg_dec_s32_max(struct bpf_reg_state *reg, s32 val) +{ + reg->s32_max_value = min(reg->s32_max_value, val); + if (reg->s32_max_value < reg->s32_min_value) + reg->s32_max_value = reg->s32_min_value; +} + +static void reg_inc_u64_min(struct bpf_reg_state *reg, u64 val) +{ + reg->umin_value = max(reg->umin_value, val); + if (reg->umin_value > reg->umax_value) + reg->umin_value = reg->umax_value; +} + +static void reg_dec_u64_max(struct bpf_reg_state *reg, u64 val) +{ + reg->umax_value = min(reg->umax_value, val); + if (reg->umax_value < reg->umin_value) + reg->umax_value = reg->umin_value; +} + +static void reg_inc_s64_min(struct bpf_reg_state *reg, s64 val) +{ + reg->smin_value = max(reg->smin_value, val); + if (reg->smin_value > reg->smax_value) + reg->smin_value = reg->smax_value; +} + +static void reg_dec_s64_max(struct bpf_reg_state *reg, s64 val) +{ + reg->smax_value = min(reg->smax_value, val); + if (reg->smax_value < reg->smin_value) + reg->smax_value = reg->smin_value; +} + /* Adjusts the register min/max values in the case that the dst_reg is the * variable register that we are working on, and src_reg is a constant or we're * simply doing a BPF_K check. @@ -12898,76 +12961,56 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, case BPF_JGE: case BPF_JGT: { - if (is_jmp32) { - u32 false_umax = opcode == BPF_JGT ? val32 : val32 - 1; - u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32; + bool neq = (opcode == BPF_JGT);
- false_reg->u32_max_value = min(false_reg->u32_max_value, - false_umax); - true_reg->u32_min_value = max(true_reg->u32_min_value, - true_umin); + if (is_jmp32) { + reg_dec_u32_max(false_reg, neq ? val32 : val32 - 1); + reg_inc_u32_min(true_reg, neq ? val32 + 1 : val32); } else { - u64 false_umax = opcode == BPF_JGT ? val : val - 1; - u64 true_umin = opcode == BPF_JGT ? val + 1 : val; - - false_reg->umax_value = min(false_reg->umax_value, false_umax); - true_reg->umin_value = max(true_reg->umin_value, true_umin); + reg_dec_u64_max(false_reg, neq ? val : val - 1); + reg_inc_u64_min(true_reg, neq ? val + 1 : val); } break; } case BPF_JSGE: case BPF_JSGT: { - if (is_jmp32) { - s32 false_smax = opcode == BPF_JSGT ? sval32 : sval32 - 1; - s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32; + bool neq = (opcode == BPF_JSGT);
- false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax); - true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin); + if (is_jmp32) { + reg_dec_s32_max(false_reg, neq ? sval32 : sval32 - 1); + reg_inc_s32_min(true_reg, neq ? sval32 + 1 : sval32); } else { - s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1; - s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval; - - false_reg->smax_value = min(false_reg->smax_value, false_smax); - true_reg->smin_value = max(true_reg->smin_value, true_smin); + reg_dec_s64_max(false_reg, neq ? sval : sval - 1); + reg_inc_s64_min(true_reg, neq ? sval + 1 : sval); } break; } case BPF_JLE: case BPF_JLT: { - if (is_jmp32) { - u32 false_umin = opcode == BPF_JLT ? val32 : val32 + 1; - u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32; + bool neq = (opcode == BPF_JLT);
- false_reg->u32_min_value = max(false_reg->u32_min_value, - false_umin); - true_reg->u32_max_value = min(true_reg->u32_max_value, - true_umax); + if (is_jmp32) { + reg_inc_u32_min(false_reg, neq ? val32 : val32 + 1); + reg_dec_u32_max(true_reg, neq ? val32 - 1 : val32); } else { - u64 false_umin = opcode == BPF_JLT ? val : val + 1; - u64 true_umax = opcode == BPF_JLT ? val - 1 : val; - - false_reg->umin_value = max(false_reg->umin_value, false_umin); - true_reg->umax_value = min(true_reg->umax_value, true_umax); + reg_inc_u64_min(false_reg, neq ? val : val + 1); + reg_dec_u64_max(true_reg, neq ? val - 1 : val); } break; } case BPF_JSLE: case BPF_JSLT: { - if (is_jmp32) { - s32 false_smin = opcode == BPF_JSLT ? sval32 : sval32 + 1; - s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32; + bool neq = (opcode == BPF_JSLT);
- false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin); - true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax); + if (is_jmp32) { + reg_inc_s32_min(false_reg, neq ? sval32 : sval32 + 1); + reg_dec_s32_max(true_reg, neq ? sval32 - 1 : sval32); } else { - s64 false_smin = opcode == BPF_JSLT ? sval : sval + 1; - s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval; - - false_reg->smin_value = max(false_reg->smin_value, false_smin); - true_reg->smax_value = min(true_reg->smax_value, true_smax); + reg_inc_s64_min(false_reg, neq ? sval : sval + 1); + reg_dec_s64_max(true_reg, neq ? sval - 1 : sval); } break; }