From: Baokun Li libaokun1@huawei.com
[ Upstream commit 9e8e819f8f272c4e5dcd0bd6c7450e36481ed139 ]
When setting values of type unsigned int through sysfs, we use kstrtoul() to parse it and then truncate part of it as the final set value, when the set value is greater than UINT_MAX, the set value will not match what we see because of the truncation. As follows:
$ echo 4294967296 > /sys/fs/ext4/sda/mb_max_linear_groups $ cat /sys/fs/ext4/sda/mb_max_linear_groups 0
So we use kstrtouint() to parse the attr_pointer_ui type to avoid the inconsistency described above. In addition, a judgment is added to avoid setting s_resv_clusters less than 0.
Signed-off-by: Baokun Li libaokun1@huawei.com Reviewed-by: Jan Kara jack@suse.cz Link: https://lore.kernel.org/r/20240319113325.3110393-2-libaokun1@huawei.com Signed-off-by: Theodore Ts'o tytso@mit.edu Stable-dep-of: 13df4d44a3aa ("ext4: fix slab-out-of-bounds in ext4_mb_find_good_group_avg_frag_lists()") Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Baokun Li libaokun1@huawei.com --- fs/ext4/sysfs.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 6d332dff79dd..ca820620b974 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -104,7 +104,7 @@ static ssize_t reserved_clusters_store(struct ext4_sb_info *sbi, int ret;
ret = kstrtoull(skip_spaces(buf), 0, &val); - if (ret || val >= clusters) + if (ret || val >= clusters || (s64)val < 0) return -EINVAL;
atomic64_set(&sbi->s_resv_clusters, val); @@ -451,7 +451,8 @@ static ssize_t ext4_attr_store(struct kobject *kobj, s_kobj); struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); void *ptr = calc_ptr(a, sbi); - unsigned long t; + unsigned int t; + unsigned long lt; int ret;
switch (a->attr_id) { @@ -460,7 +461,7 @@ static ssize_t ext4_attr_store(struct kobject *kobj, case attr_pointer_ui: if (!ptr) return 0; - ret = kstrtoul(skip_spaces(buf), 0, &t); + ret = kstrtouint(skip_spaces(buf), 0, &t); if (ret) return ret; if (a->attr_ptr == ptr_ext4_super_block_offset) @@ -471,10 +472,10 @@ static ssize_t ext4_attr_store(struct kobject *kobj, case attr_pointer_ul: if (!ptr) return 0; - ret = kstrtoul(skip_spaces(buf), 0, &t); + ret = kstrtoul(skip_spaces(buf), 0, <); if (ret) return ret; - *((unsigned long *) ptr) = t; + *((unsigned long *) ptr) = lt; return len; case attr_inode_readahead: return inode_readahead_blks_store(sbi, buf, len);
From: Baokun Li libaokun1@huawei.com
[ Upstream commit 13df4d44a3aaabe61cd01d277b6ee23ead2a5206 ]
We can trigger a slab-out-of-bounds with the following commands:
mkfs.ext4 -F /dev/$disk 10G mount /dev/$disk /tmp/test echo 2147483647 > /sys/fs/ext4/$disk/mb_group_prealloc echo test > /tmp/test/file && sync
================================================================== BUG: KASAN: slab-out-of-bounds in ext4_mb_find_good_group_avg_frag_lists+0x8a/0x200 [ext4] Read of size 8 at addr ffff888121b9d0f0 by task kworker/u2:0/11 CPU: 0 PID: 11 Comm: kworker/u2:0 Tainted: GL 6.7.0-next-20240118 #521 Call Trace: dump_stack_lvl+0x2c/0x50 kasan_report+0xb6/0xf0 ext4_mb_find_good_group_avg_frag_lists+0x8a/0x200 [ext4] ext4_mb_regular_allocator+0x19e9/0x2370 [ext4] ext4_mb_new_blocks+0x88a/0x1370 [ext4] ext4_ext_map_blocks+0x14f7/0x2390 [ext4] ext4_map_blocks+0x569/0xea0 [ext4] ext4_do_writepages+0x10f6/0x1bc0 [ext4] [...] ==================================================================
The flow of issue triggering is as follows:
// Set s_mb_group_prealloc to 2147483647 via sysfs ext4_mb_new_blocks ext4_mb_normalize_request ext4_mb_normalize_group_request ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc ext4_mb_regular_allocator ext4_mb_choose_next_group ext4_mb_choose_next_group_best_avail mb_avg_fragment_size_order order = fls(len) - 2 = 29 ext4_mb_find_good_group_avg_frag_lists frag_list = &sbi->s_mb_avg_fragment_size[order] if (list_empty(frag_list)) // Trigger SOOB!
At 4k block size, the length of the s_mb_avg_fragment_size list is 14, but an oversized s_mb_group_prealloc is set, causing slab-out-of-bounds to be triggered by an attempt to access an element at index 29.
Add a new attr_id attr_clusters_in_group with values in the range [0, sbi->s_clusters_per_group] and declare mb_group_prealloc as that type to fix the issue. In addition avoid returning an order from mb_avg_fragment_size_order() greater than MB_NUM_ORDERS(sb) and reduce some useless loops.
Fixes: 7e170922f06b ("ext4: Add allocation criteria 1.5 (CR1_5)") CC: stable@vger.kernel.org Signed-off-by: Baokun Li libaokun1@huawei.com Reviewed-by: Jan Kara jack@suse.cz Reviewed-by: Ojaswin Mujoo ojaswin@linux.ibm.com Link: https://lore.kernel.org/r/20240319113325.3110393-5-libaokun1@huawei.com Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Baokun Li libaokun1@huawei.com --- fs/ext4/mballoc.c | 4 ++++ fs/ext4/sysfs.c | 15 ++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 714f83632e3f..66b5a68b0254 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -831,6 +831,8 @@ static int mb_avg_fragment_size_order(struct super_block *sb, ext4_grpblk_t len) return 0; if (order == MB_NUM_ORDERS(sb)) order--; + if (WARN_ON_ONCE(order > MB_NUM_ORDERS(sb))) + order = MB_NUM_ORDERS(sb) - 1; return order; }
@@ -1008,6 +1010,8 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context * goal length. */ order = fls(ac->ac_g_ex.fe_len) - 1; + if (WARN_ON_ONCE(order - 1 > MB_NUM_ORDERS(ac->ac_sb))) + order = MB_NUM_ORDERS(ac->ac_sb); min_order = order - sbi->s_mb_best_avail_max_trim_order; if (min_order < 0) min_order = 0; diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index ca820620b974..d65dccb44ed5 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -29,6 +29,7 @@ typedef enum { attr_trigger_test_error, attr_first_error_time, attr_last_error_time, + attr_clusters_in_group, attr_feature, attr_pointer_ui, attr_pointer_ul, @@ -207,13 +208,14 @@ EXT4_ATTR_FUNC(sra_exceeded_retry_limit, 0444);
EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, inode_readahead, ext4_sb_info, s_inode_readahead_blks); +EXT4_ATTR_OFFSET(mb_group_prealloc, 0644, clusters_in_group, + ext4_sb_info, s_mb_group_prealloc); EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); -EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); EXT4_RW_ATTR_SBI_UI(mb_max_linear_groups, s_mb_max_linear_groups); EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error); @@ -392,6 +394,7 @@ static ssize_t ext4_attr_show(struct kobject *kobj, (unsigned long long) percpu_counter_sum(&sbi->s_sra_exceeded_retry_limit)); case attr_inode_readahead: + case attr_clusters_in_group: case attr_pointer_ui: if (!ptr) return 0; @@ -469,6 +472,16 @@ static ssize_t ext4_attr_store(struct kobject *kobj, else *((unsigned int *) ptr) = t; return len; + case attr_clusters_in_group: + if (!ptr) + return 0; + ret = kstrtouint(skip_spaces(buf), 0, &t); + if (ret) + return ret; + if (t > sbi->s_clusters_per_group) + return -EINVAL; + *((unsigned int *) ptr) = t; + return len; case attr_pointer_ul: if (!ptr) return 0;
On 2024/6/19 20:19, libaokun@huaweicloud.com wrote:
From: Baokun Li libaokun1@huawei.com
[ Upstream commit 13df4d44a3aaabe61cd01d277b6ee23ead2a5206 ]
We can trigger a slab-out-of-bounds with the following commands:
mkfs.ext4 -F /dev/$disk 10G mount /dev/$disk /tmp/test echo 2147483647 > /sys/fs/ext4/$disk/mb_group_prealloc echo test > /tmp/test/file && sync
================================================================== BUG: KASAN: slab-out-of-bounds in ext4_mb_find_good_group_avg_frag_lists+0x8a/0x200 [ext4] Read of size 8 at addr ffff888121b9d0f0 by task kworker/u2:0/11 CPU: 0 PID: 11 Comm: kworker/u2:0 Tainted: GL 6.7.0-next-20240118 #521 Call Trace: dump_stack_lvl+0x2c/0x50 kasan_report+0xb6/0xf0 ext4_mb_find_good_group_avg_frag_lists+0x8a/0x200 [ext4] ext4_mb_regular_allocator+0x19e9/0x2370 [ext4] ext4_mb_new_blocks+0x88a/0x1370 [ext4] ext4_ext_map_blocks+0x14f7/0x2390 [ext4] ext4_map_blocks+0x569/0xea0 [ext4] ext4_do_writepages+0x10f6/0x1bc0 [ext4] [...] ==================================================================
The flow of issue triggering is as follows:
// Set s_mb_group_prealloc to 2147483647 via sysfs ext4_mb_new_blocks ext4_mb_normalize_request ext4_mb_normalize_group_request ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc ext4_mb_regular_allocator ext4_mb_choose_next_group ext4_mb_choose_next_group_best_avail mb_avg_fragment_size_order order = fls(len) - 2 = 29 ext4_mb_find_good_group_avg_frag_lists frag_list = &sbi->s_mb_avg_fragment_size[order] if (list_empty(frag_list)) // Trigger SOOB!
At 4k block size, the length of the s_mb_avg_fragment_size list is 14, but an oversized s_mb_group_prealloc is set, causing slab-out-of-bounds to be triggered by an attempt to access an element at index 29.
Add a new attr_id attr_clusters_in_group with values in the range [0, sbi->s_clusters_per_group] and declare mb_group_prealloc as that type to fix the issue. In addition avoid returning an order from mb_avg_fragment_size_order() greater than MB_NUM_ORDERS(sb) and reduce some useless loops.
Fixes: 7e170922f06b ("ext4: Add allocation criteria 1.5 (CR1_5)") CC: stable@vger.kernel.org Signed-off-by: Baokun Li libaokun1@huawei.com Reviewed-by: Jan Kara jack@suse.cz Reviewed-by: Ojaswin Mujoo ojaswin@linux.ibm.com Link: https://lore.kernel.org/r/20240319113325.3110393-5-libaokun1@huawei.com Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Baokun Li libaokun1@huawei.com
fs/ext4/mballoc.c | 4 ++++ fs/ext4/sysfs.c | 15 ++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 714f83632e3f..66b5a68b0254 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -831,6 +831,8 @@ static int mb_avg_fragment_size_order(struct super_block *sb, ext4_grpblk_t len) return 0; if (order == MB_NUM_ORDERS(sb)) order--;
- if (WARN_ON_ONCE(order > MB_NUM_ORDERS(sb)))
return order; }order = MB_NUM_ORDERS(sb) - 1;
@@ -1008,6 +1010,8 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context * goal length. */ order = fls(ac->ac_g_ex.fe_len) - 1;
- if (WARN_ON_ONCE(order - 1 > MB_NUM_ORDERS(ac->ac_sb)))
min_order = order - sbi->s_mb_best_avail_max_trim_order; if (min_order < 0) min_order = 0;order = MB_NUM_ORDERS(ac->ac_sb);
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index ca820620b974..d65dccb44ed5 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -29,6 +29,7 @@ typedef enum { attr_trigger_test_error, attr_first_error_time, attr_last_error_time,
- attr_clusters_in_group, attr_feature, attr_pointer_ui, attr_pointer_ul,
@@ -207,13 +208,14 @@ EXT4_ATTR_FUNC(sra_exceeded_retry_limit, 0444); EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, inode_readahead, ext4_sb_info, s_inode_readahead_blks); +EXT4_ATTR_OFFSET(mb_group_prealloc, 0644, clusters_in_group,
EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);ext4_sb_info, s_mb_group_prealloc);
-EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); EXT4_RW_ATTR_SBI_UI(mb_max_linear_groups, s_mb_max_linear_groups); EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error); @@ -392,6 +394,7 @@ static ssize_t ext4_attr_show(struct kobject *kobj, (unsigned long long) percpu_counter_sum(&sbi->s_sra_exceeded_retry_limit)); case attr_inode_readahead:
- case attr_clusters_in_group: case attr_pointer_ui: if (!ptr) return 0;
@@ -469,6 +472,16 @@ static ssize_t ext4_attr_store(struct kobject *kobj, else *((unsigned int *) ptr) = t; return len;
- case attr_clusters_in_group:
Hi Greg,
if (!ptr)
return 0;
I've found that the commit that eventually gets merged in doesn't have this judgment.
6.6: 677ff4589f15 ("ext4: fix slab-out-of-bounds in ext4_mb_find_good_group_avg_frag_lists()") 6.9: b829687ae122 ("ext4: fix slab-out-of-bounds in ext4_mb_find_good_group_avg_frag_lists()")
This may result in a null pointer dereference.
History: https://lore.kernel.org/all/0d620010-c6b4-4f80-a835-451813f957e3@huawei.com/ https://lore.kernel.org/all/20240619121952.3508695-2-libaokun@huaweicloud.co... https://lore.kernel.org/all/20240625085542.189183696@linuxfoundation.org/
Regards, Baokun
ret = kstrtouint(skip_spaces(buf), 0, &t);
if (ret)
return ret;
if (t > sbi->s_clusters_per_group)
return -EINVAL;
*((unsigned int *) ptr) = t;
case attr_pointer_ul: if (!ptr) return 0;return len;
On Tue, Jul 16, 2024 at 10:11:55AM +0800, Baokun Li wrote:
On 2024/6/19 20:19, libaokun@huaweicloud.com wrote:
From: Baokun Li libaokun1@huawei.com
[ Upstream commit 13df4d44a3aaabe61cd01d277b6ee23ead2a5206 ]
We can trigger a slab-out-of-bounds with the following commands:
mkfs.ext4 -F /dev/$disk 10G mount /dev/$disk /tmp/test echo 2147483647 > /sys/fs/ext4/$disk/mb_group_prealloc echo test > /tmp/test/file && sync
================================================================== BUG: KASAN: slab-out-of-bounds in ext4_mb_find_good_group_avg_frag_lists+0x8a/0x200 [ext4] Read of size 8 at addr ffff888121b9d0f0 by task kworker/u2:0/11 CPU: 0 PID: 11 Comm: kworker/u2:0 Tainted: GL 6.7.0-next-20240118 #521 Call Trace: dump_stack_lvl+0x2c/0x50 kasan_report+0xb6/0xf0 ext4_mb_find_good_group_avg_frag_lists+0x8a/0x200 [ext4] ext4_mb_regular_allocator+0x19e9/0x2370 [ext4] ext4_mb_new_blocks+0x88a/0x1370 [ext4] ext4_ext_map_blocks+0x14f7/0x2390 [ext4] ext4_map_blocks+0x569/0xea0 [ext4] ext4_do_writepages+0x10f6/0x1bc0 [ext4] [...] ==================================================================
The flow of issue triggering is as follows:
// Set s_mb_group_prealloc to 2147483647 via sysfs ext4_mb_new_blocks ext4_mb_normalize_request ext4_mb_normalize_group_request ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc ext4_mb_regular_allocator ext4_mb_choose_next_group ext4_mb_choose_next_group_best_avail mb_avg_fragment_size_order order = fls(len) - 2 = 29 ext4_mb_find_good_group_avg_frag_lists frag_list = &sbi->s_mb_avg_fragment_size[order] if (list_empty(frag_list)) // Trigger SOOB!
At 4k block size, the length of the s_mb_avg_fragment_size list is 14, but an oversized s_mb_group_prealloc is set, causing slab-out-of-bounds to be triggered by an attempt to access an element at index 29.
Add a new attr_id attr_clusters_in_group with values in the range [0, sbi->s_clusters_per_group] and declare mb_group_prealloc as that type to fix the issue. In addition avoid returning an order from mb_avg_fragment_size_order() greater than MB_NUM_ORDERS(sb) and reduce some useless loops.
Fixes: 7e170922f06b ("ext4: Add allocation criteria 1.5 (CR1_5)") CC: stable@vger.kernel.org Signed-off-by: Baokun Li libaokun1@huawei.com Reviewed-by: Jan Kara jack@suse.cz Reviewed-by: Ojaswin Mujoo ojaswin@linux.ibm.com Link: https://lore.kernel.org/r/20240319113325.3110393-5-libaokun1@huawei.com Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Baokun Li libaokun1@huawei.com
fs/ext4/mballoc.c | 4 ++++ fs/ext4/sysfs.c | 15 ++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 714f83632e3f..66b5a68b0254 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -831,6 +831,8 @@ static int mb_avg_fragment_size_order(struct super_block *sb, ext4_grpblk_t len) return 0; if (order == MB_NUM_ORDERS(sb)) order--;
- if (WARN_ON_ONCE(order > MB_NUM_ORDERS(sb)))
return order; }order = MB_NUM_ORDERS(sb) - 1;
@@ -1008,6 +1010,8 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context * goal length. */ order = fls(ac->ac_g_ex.fe_len) - 1;
- if (WARN_ON_ONCE(order - 1 > MB_NUM_ORDERS(ac->ac_sb)))
min_order = order - sbi->s_mb_best_avail_max_trim_order; if (min_order < 0) min_order = 0;order = MB_NUM_ORDERS(ac->ac_sb);
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index ca820620b974..d65dccb44ed5 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -29,6 +29,7 @@ typedef enum { attr_trigger_test_error, attr_first_error_time, attr_last_error_time,
- attr_clusters_in_group, attr_feature, attr_pointer_ui, attr_pointer_ul,
@@ -207,13 +208,14 @@ EXT4_ATTR_FUNC(sra_exceeded_retry_limit, 0444); EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, inode_readahead, ext4_sb_info, s_inode_readahead_blks); +EXT4_ATTR_OFFSET(mb_group_prealloc, 0644, clusters_in_group,
EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);ext4_sb_info, s_mb_group_prealloc);
-EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); EXT4_RW_ATTR_SBI_UI(mb_max_linear_groups, s_mb_max_linear_groups); EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error); @@ -392,6 +394,7 @@ static ssize_t ext4_attr_show(struct kobject *kobj, (unsigned long long) percpu_counter_sum(&sbi->s_sra_exceeded_retry_limit)); case attr_inode_readahead:
- case attr_clusters_in_group: case attr_pointer_ui: if (!ptr) return 0;
@@ -469,6 +472,16 @@ static ssize_t ext4_attr_store(struct kobject *kobj, else *((unsigned int *) ptr) = t; return len;
- case attr_clusters_in_group:
Hi Greg,
if (!ptr)
return 0;
I've found that the commit that eventually gets merged in doesn't have this judgment.
6.6: 677ff4589f15 ("ext4: fix slab-out-of-bounds in ext4_mb_find_good_group_avg_frag_lists()") 6.9: b829687ae122 ("ext4: fix slab-out-of-bounds in ext4_mb_find_good_group_avg_frag_lists()")
This may result in a null pointer dereference.
History: https://lore.kernel.org/all/0d620010-c6b4-4f80-a835-451813f957e3@huawei.com/ https://lore.kernel.org/all/20240619121952.3508695-2-libaokun@huaweicloud.co... https://lore.kernel.org/all/20240625085542.189183696@linuxfoundation.org/
I don't understand, can you send a patch that fixes this?
Or should we just revert the original commits? If so, what commit ids need to be reverted?
thanks,
greg k-h
On 2024/7/16 15:49, Greg KH wrote:
On Tue, Jul 16, 2024 at 10:11:55AM +0800, Baokun Li wrote:
On 2024/6/19 20:19, libaokun@huaweicloud.com wrote:
From: Baokun Li libaokun1@huawei.com
[ Upstream commit 13df4d44a3aaabe61cd01d277b6ee23ead2a5206 ]
We can trigger a slab-out-of-bounds with the following commands:
mkfs.ext4 -F /dev/$disk 10G mount /dev/$disk /tmp/test echo 2147483647 > /sys/fs/ext4/$disk/mb_group_prealloc echo test > /tmp/test/file && sync
================================================================== BUG: KASAN: slab-out-of-bounds in ext4_mb_find_good_group_avg_frag_lists+0x8a/0x200 [ext4] Read of size 8 at addr ffff888121b9d0f0 by task kworker/u2:0/11 CPU: 0 PID: 11 Comm: kworker/u2:0 Tainted: GL 6.7.0-next-20240118 #521 Call Trace: dump_stack_lvl+0x2c/0x50 kasan_report+0xb6/0xf0 ext4_mb_find_good_group_avg_frag_lists+0x8a/0x200 [ext4] ext4_mb_regular_allocator+0x19e9/0x2370 [ext4] ext4_mb_new_blocks+0x88a/0x1370 [ext4] ext4_ext_map_blocks+0x14f7/0x2390 [ext4] ext4_map_blocks+0x569/0xea0 [ext4] ext4_do_writepages+0x10f6/0x1bc0 [ext4] [...] ==================================================================
The flow of issue triggering is as follows:
// Set s_mb_group_prealloc to 2147483647 via sysfs ext4_mb_new_blocks ext4_mb_normalize_request ext4_mb_normalize_group_request ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc ext4_mb_regular_allocator ext4_mb_choose_next_group ext4_mb_choose_next_group_best_avail mb_avg_fragment_size_order order = fls(len) - 2 = 29 ext4_mb_find_good_group_avg_frag_lists frag_list = &sbi->s_mb_avg_fragment_size[order] if (list_empty(frag_list)) // Trigger SOOB!
At 4k block size, the length of the s_mb_avg_fragment_size list is 14, but an oversized s_mb_group_prealloc is set, causing slab-out-of-bounds to be triggered by an attempt to access an element at index 29.
Add a new attr_id attr_clusters_in_group with values in the range [0, sbi->s_clusters_per_group] and declare mb_group_prealloc as that type to fix the issue. In addition avoid returning an order from mb_avg_fragment_size_order() greater than MB_NUM_ORDERS(sb) and reduce some useless loops.
Fixes: 7e170922f06b ("ext4: Add allocation criteria 1.5 (CR1_5)") CC: stable@vger.kernel.org Signed-off-by: Baokun Li libaokun1@huawei.com Reviewed-by: Jan Kara jack@suse.cz Reviewed-by: Ojaswin Mujoo ojaswin@linux.ibm.com Link: https://lore.kernel.org/r/20240319113325.3110393-5-libaokun1@huawei.com Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Baokun Li libaokun1@huawei.com
fs/ext4/mballoc.c | 4 ++++ fs/ext4/sysfs.c | 15 ++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 714f83632e3f..66b5a68b0254 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -831,6 +831,8 @@ static int mb_avg_fragment_size_order(struct super_block *sb, ext4_grpblk_t len) return 0; if (order == MB_NUM_ORDERS(sb)) order--;
- if (WARN_ON_ONCE(order > MB_NUM_ORDERS(sb)))
return order; }order = MB_NUM_ORDERS(sb) - 1;
@@ -1008,6 +1010,8 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context * goal length. */ order = fls(ac->ac_g_ex.fe_len) - 1;
- if (WARN_ON_ONCE(order - 1 > MB_NUM_ORDERS(ac->ac_sb)))
min_order = order - sbi->s_mb_best_avail_max_trim_order; if (min_order < 0) min_order = 0;order = MB_NUM_ORDERS(ac->ac_sb);
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index ca820620b974..d65dccb44ed5 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -29,6 +29,7 @@ typedef enum { attr_trigger_test_error, attr_first_error_time, attr_last_error_time,
- attr_clusters_in_group, attr_feature, attr_pointer_ui, attr_pointer_ul,
@@ -207,13 +208,14 @@ EXT4_ATTR_FUNC(sra_exceeded_retry_limit, 0444); EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, inode_readahead, ext4_sb_info, s_inode_readahead_blks); +EXT4_ATTR_OFFSET(mb_group_prealloc, 0644, clusters_in_group,
EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);ext4_sb_info, s_mb_group_prealloc);
-EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); EXT4_RW_ATTR_SBI_UI(mb_max_linear_groups, s_mb_max_linear_groups); EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error); @@ -392,6 +394,7 @@ static ssize_t ext4_attr_show(struct kobject *kobj, (unsigned long long) percpu_counter_sum(&sbi->s_sra_exceeded_retry_limit)); case attr_inode_readahead:
- case attr_clusters_in_group: case attr_pointer_ui: if (!ptr) return 0;
@@ -469,6 +472,16 @@ static ssize_t ext4_attr_store(struct kobject *kobj, else *((unsigned int *) ptr) = t; return len;
- case attr_clusters_in_group:
Hi Greg,
if (!ptr)
return 0;
I've found that the commit that eventually gets merged in doesn't have this judgment.
6.6: 677ff4589f15 ("ext4: fix slab-out-of-bounds in ext4_mb_find_good_group_avg_frag_lists()") 6.9: b829687ae122 ("ext4: fix slab-out-of-bounds in ext4_mb_find_good_group_avg_frag_lists()")
This may result in a null pointer dereference.
History: https://lore.kernel.org/all/0d620010-c6b4-4f80-a835-451813f957e3@huawei.com/ https://lore.kernel.org/all/20240619121952.3508695-2-libaokun@huaweicloud.co... https://lore.kernel.org/all/20240625085542.189183696@linuxfoundation.org/
I don't understand, can you send a patch that fixes this?
Sure! And the information about CVE-2024-40955 needs to be updated.
Or should we just revert the original commits? If so, what commit ids need to be reverted?
thanks,
greg k-h
This patch was first adapted to stable by Sasha, then I realized there was a small problem with the adaptation. https://lore.kernel.org/all/0d620010-c6b4-4f80-a835-451813f957e3@huawei.com/
After communicating with you I sent a v2 version, then you dropped the previous patch and I didn't pay attention to it anymore. https://lore.kernel.org/all/20240619121952.3508695-2-libaokun@huaweicloud.co...
Until this patch became CVE-2024-40955: https://lore.kernel.org/all/2024071224-CVE-2024-40955-43e2@gregkh/
My colleague is responsible for analyzing this CVE and backporting the patch from the stale branch. I was reviewing the patch and realized that the patch that ended up being merged in stable was wrong.
Regards, Baokun
linux-stable-mirror@lists.linaro.org