A maliciously corrupted filesystem can contain cycles in the h-tree stored inside a directory. That can easily lead to the kernel corrupting tree nodes that were already verified under its hands while doing a node split and consequently accessing unallocated memory. Fix the problem by verifying traversed block numbers are unique.
CC: stable@vger.kernel.org Signed-off-by: Jan Kara jack@suse.cz --- fs/ext4/namei.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index ca6ee9940599..06441ad6104d 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -777,12 +777,14 @@ static struct dx_frame * dx_probe(struct ext4_filename *fname, struct inode *dir, struct dx_hash_info *hinfo, struct dx_frame *frame_in) { - unsigned count, indirect; + unsigned count, indirect, level, i; struct dx_entry *at, *entries, *p, *q, *m; struct dx_root *root; struct dx_frame *frame = frame_in; struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR); u32 hash; + ext4_lblk_t block; + ext4_lblk_t blocks[EXT4_HTREE_LEVEL];
memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0])); frame->bh = ext4_read_dirblock(dir, 0, INDEX); @@ -854,6 +856,8 @@ dx_probe(struct ext4_filename *fname, struct inode *dir, }
dxtrace(printk("Look up %x", hash)); + level = 0; + blocks[0] = 0; while (1) { count = dx_get_count(entries); if (!count || count > dx_get_limit(entries)) { @@ -882,15 +886,27 @@ dx_probe(struct ext4_filename *fname, struct inode *dir, dx_get_block(at))); frame->entries = entries; frame->at = at; - if (!indirect--) + + block = dx_get_block(at); + for (i = 0; i <= level; i++) { + if (blocks[i] == block) { + ext4_warning_inode(dir, + "dx entry: tree cycle block %u points back to block %u", + blocks[level], block); + goto fail; + } + } + blocks[++level] = block; + if (level > indirect) return frame; frame++; - frame->bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX); + frame->bh = ext4_read_dirblock(dir, block, INDEX); if (IS_ERR(frame->bh)) { ret_err = (struct dx_frame *) frame->bh; frame->bh = NULL; goto fail; } + entries = ((struct dx_node *) frame->bh->b_data)->entries;
if (dx_get_limit(entries) != dx_node_limit(dir)) { @@ -1278,7 +1294,7 @@ static int dx_make_map(struct inode *dir, struct buffer_head *bh, count++; cond_resched(); } - de = ext4_next_entry(de, blocksize); + de = ext4_next_entry(de, dir->i_sb->s_blocksize); } return count; }
On Thu, Apr 28, 2022 at 08:31:38PM +0200, Jan Kara wrote:
A maliciously corrupted filesystem can contain cycles in the h-tree stored inside a directory. That can easily lead to the kernel corrupting tree nodes that were already verified under its hands while doing a node split and consequently accessing unallocated memory. Fix the problem by verifying traversed block numbers are unique.
CC: stable@vger.kernel.org Signed-off-by: Jan Kara jack@suse.cz
When I tried applying this patch, I got this crash:
ext4/052 23s ... [19:28:41][ 2.683407] run fstests ext4/052 at 2022-05-17 19:28:41 [ 5.433672] Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: dx_probe+0x629/0x630 [ 5.434449] CPU: 0 PID: 2468 Comm: dirstress Not tainted 5.18.0-rc5-xfstests-00019-g204e6b4d4cc1 #610 [ 5.435012] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 [ 5.435501] Call Trace: [ 5.435659] <TASK> [ 5.435791] dump_stack_lvl+0x34/0x44 [ 5.436027] panic+0x107/0x28f [ 5.436221] ? dx_probe+0x629/0x630 [ 5.436430] __stack_chk_fail+0x10/0x10 [ 5.436663] dx_probe+0x629/0x630 [ 5.436869] ext4_dx_add_entry+0x54/0x700 [ 5.437176] ext4_add_entry+0x38d/0x4e0 [ 5.437421] ext4_add_nondir+0x2b/0xc0 [ 5.437647] ext4_symlink+0x1c5/0x390 [ 5.437869] vfs_symlink+0x184/0x220 [ 5.438095] do_symlinkat+0x7a/0x110 [ 5.438313] __x64_sys_symlink+0x38/0x40 [ 5.438548] do_syscall_64+0x38/0x90 [ 5.438762] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 5.439070] RIP: 0033:0x7f8137109a97 [ 5.439285] Code: f0 ff ff 73 01 c3 48 8b 0d f6 d3 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 58 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d c9 d3 0c 00 f7 d8 64 89 01 48 [ 5.440374] RSP: 002b:00007ffc514a2428 EFLAGS: 00000246 ORIG_RAX: 0000000000000058 [ 5.440820] RAX: ffffffffffffffda RBX: 0000000000035d4a RCX: 00007f8137109a97 [ 5.441278] RDX: 0000000000000000 RSI: 00007ffc514a2450 RDI: 00007ffc514a2450 [ 5.441734] RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000000000013 [ 5.442153] R10: 00007ffc514a21c2 R11: 0000000000000246 R12: 0000000000061a80 [ 5.442571] R13: 0000000000000000 R14: 00007ffc514a2450 R15: 00007ffc514a2c50 [ 5.442989] </TASK> [ 5.443289] Kernel Offset: disabled [ 5.443517] Rebooting in 5 seconds..
Applying just the first patch series (plus the patch hunk from this commit needed so that the first patch compiles) does not result in a crash, so the problem is clearly with this change.
Looking more closely at ext4/052 which tests the large_dir feature, and then looking at the patch, I suspect the fix which is needed is:
- ext4_lblk_t blocks[EXT4_HTREE_LEVEL];
needs to be
ext4_lblk_t blocks[EXT4_HTREE_LEVEL + 1];
Otherwise on large htree which is 3 levels deep, this
blocks[++level] = block;
is going to end up smashing the stack.
Jan, do you agree?
- Ted
On Tue 17-05-22 19:55:47, Theodore Ts'o wrote:
On Thu, Apr 28, 2022 at 08:31:38PM +0200, Jan Kara wrote:
A maliciously corrupted filesystem can contain cycles in the h-tree stored inside a directory. That can easily lead to the kernel corrupting tree nodes that were already verified under its hands while doing a node split and consequently accessing unallocated memory. Fix the problem by verifying traversed block numbers are unique.
CC: stable@vger.kernel.org Signed-off-by: Jan Kara jack@suse.cz
When I tried applying this patch, I got this crash:
ext4/052 23s ... [19:28:41][ 2.683407] run fstests ext4/052 at 2022-05-17 19:28:41 [ 5.433672] Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: dx_probe+0x629/0x630 [ 5.434449] CPU: 0 PID: 2468 Comm: dirstress Not tainted 5.18.0-rc5-xfstests-00019-g204e6b4d4cc1 #610 [ 5.435012] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 [ 5.435501] Call Trace: [ 5.435659] <TASK> [ 5.435791] dump_stack_lvl+0x34/0x44 [ 5.436027] panic+0x107/0x28f [ 5.436221] ? dx_probe+0x629/0x630 [ 5.436430] __stack_chk_fail+0x10/0x10 [ 5.436663] dx_probe+0x629/0x630 [ 5.436869] ext4_dx_add_entry+0x54/0x700 [ 5.437176] ext4_add_entry+0x38d/0x4e0 [ 5.437421] ext4_add_nondir+0x2b/0xc0 [ 5.437647] ext4_symlink+0x1c5/0x390 [ 5.437869] vfs_symlink+0x184/0x220 [ 5.438095] do_symlinkat+0x7a/0x110 [ 5.438313] __x64_sys_symlink+0x38/0x40 [ 5.438548] do_syscall_64+0x38/0x90 [ 5.438762] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 5.439070] RIP: 0033:0x7f8137109a97 [ 5.439285] Code: f0 ff ff 73 01 c3 48 8b 0d f6 d3 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 58 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d c9 d3 0c 00 f7 d8 64 89 01 48 [ 5.440374] RSP: 002b:00007ffc514a2428 EFLAGS: 00000246 ORIG_RAX: 0000000000000058 [ 5.440820] RAX: ffffffffffffffda RBX: 0000000000035d4a RCX: 00007f8137109a97 [ 5.441278] RDX: 0000000000000000 RSI: 00007ffc514a2450 RDI: 00007ffc514a2450 [ 5.441734] RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000000000013 [ 5.442153] R10: 00007ffc514a21c2 R11: 0000000000000246 R12: 0000000000061a80 [ 5.442571] R13: 0000000000000000 R14: 00007ffc514a2450 R15: 00007ffc514a2c50 [ 5.442989] </TASK> [ 5.443289] Kernel Offset: disabled [ 5.443517] Rebooting in 5 seconds..
Applying just the first patch series (plus the patch hunk from this commit needed so that the first patch compiles) does not result in a crash, so the problem is clearly with this change.
I was wondering why my testing didn't catch this and the reason was that my test VM had a version of xfstests which had ext4/052 test but somehow the 'tests/ext4/group' file didn't contain that test (and a few other newer ones) so it didn't get executed. Not sure how that broken group file got there but anyway it's fixed now and indeed ext4/052 test crashes for me as well.
Looking more closely at ext4/052 which tests the large_dir feature, and then looking at the patch, I suspect the fix which is needed is:
- ext4_lblk_t blocks[EXT4_HTREE_LEVEL];
needs to be
ext4_lblk_t blocks[EXT4_HTREE_LEVEL + 1];
Otherwise on large htree which is 3 levels deep, this
blocks[++level] = block;
is going to end up smashing the stack.
Jan, do you agree?
Yes, thanks for debugging this! But I'd prefer a fix like:
if (++level > indirect) return frame; blocks[level] = block;
because the store of the leaf block into 'blocks' array is just bogus. I'll send V2 with both the issues fixed.
Honza
linux-stable-mirror@lists.linaro.org