The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From c2d1b3aae33605a61cbab445d8ae1c708ccd2698 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov(a)suse.com>
Date: Mon, 25 Mar 2019 14:31:21 +0200
Subject: [PATCH] btrfs: Honour FITRIM range constraints during free space trim
Up until now trimming the freespace was done irrespective of what the
arguments of the FITRIM ioctl were. For example fstrim's -o/-l arguments
will be entirely ignored. Fix it by correctly handling those paramter.
This requires breaking if the found freespace extent is after the end of
the passed range as well as completing trim after trimming
fstrim_range::len bytes.
Fixes: 499f377f49f0 ("btrfs: iterate over unused chunk space in FITRIM")
CC: stable(a)vger.kernel.org # 4.4+
Signed-off-by: Nikolay Borisov <nborisov(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index aa52b0995fba..c5f9e8359c6f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -11309,9 +11309,9 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
* held back allocations.
*/
static int btrfs_trim_free_extents(struct btrfs_device *device,
- u64 minlen, u64 *trimmed)
+ struct fstrim_range *range, u64 *trimmed)
{
- u64 start = 0, len = 0;
+ u64 start = range->start, len = 0;
int ret;
*trimmed = 0;
@@ -11354,8 +11354,8 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
if (!trans)
up_read(&fs_info->commit_root_sem);
- ret = find_free_dev_extent_start(trans, device, minlen, start,
- &start, &len);
+ ret = find_free_dev_extent_start(trans, device, range->minlen,
+ start, &start, &len);
if (trans) {
up_read(&fs_info->commit_root_sem);
btrfs_put_transaction(trans);
@@ -11368,6 +11368,16 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
break;
}
+ /* If we are out of the passed range break */
+ if (start > range->start + range->len - 1) {
+ mutex_unlock(&fs_info->chunk_mutex);
+ ret = 0;
+ break;
+ }
+
+ start = max(range->start, start);
+ len = min(range->len, len);
+
ret = btrfs_issue_discard(device->bdev, start, len, &bytes);
mutex_unlock(&fs_info->chunk_mutex);
@@ -11377,6 +11387,10 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
start += len;
*trimmed += bytes;
+ /* We've trimmed enough */
+ if (*trimmed >= range->len)
+ break;
+
if (fatal_signal_pending(current)) {
ret = -ERESTARTSYS;
break;
@@ -11460,8 +11474,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
mutex_lock(&fs_info->fs_devices->device_list_mutex);
devices = &fs_info->fs_devices->devices;
list_for_each_entry(device, devices, dev_list) {
- ret = btrfs_trim_free_extents(device, range->minlen,
- &group_trimmed);
+ ret = btrfs_trim_free_extents(device, range, &group_trimmed);
if (ret) {
dev_failed++;
dev_ret = ret;
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From c2d1b3aae33605a61cbab445d8ae1c708ccd2698 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov(a)suse.com>
Date: Mon, 25 Mar 2019 14:31:21 +0200
Subject: [PATCH] btrfs: Honour FITRIM range constraints during free space trim
Up until now trimming the freespace was done irrespective of what the
arguments of the FITRIM ioctl were. For example fstrim's -o/-l arguments
will be entirely ignored. Fix it by correctly handling those paramter.
This requires breaking if the found freespace extent is after the end of
the passed range as well as completing trim after trimming
fstrim_range::len bytes.
Fixes: 499f377f49f0 ("btrfs: iterate over unused chunk space in FITRIM")
CC: stable(a)vger.kernel.org # 4.4+
Signed-off-by: Nikolay Borisov <nborisov(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index aa52b0995fba..c5f9e8359c6f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -11309,9 +11309,9 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
* held back allocations.
*/
static int btrfs_trim_free_extents(struct btrfs_device *device,
- u64 minlen, u64 *trimmed)
+ struct fstrim_range *range, u64 *trimmed)
{
- u64 start = 0, len = 0;
+ u64 start = range->start, len = 0;
int ret;
*trimmed = 0;
@@ -11354,8 +11354,8 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
if (!trans)
up_read(&fs_info->commit_root_sem);
- ret = find_free_dev_extent_start(trans, device, minlen, start,
- &start, &len);
+ ret = find_free_dev_extent_start(trans, device, range->minlen,
+ start, &start, &len);
if (trans) {
up_read(&fs_info->commit_root_sem);
btrfs_put_transaction(trans);
@@ -11368,6 +11368,16 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
break;
}
+ /* If we are out of the passed range break */
+ if (start > range->start + range->len - 1) {
+ mutex_unlock(&fs_info->chunk_mutex);
+ ret = 0;
+ break;
+ }
+
+ start = max(range->start, start);
+ len = min(range->len, len);
+
ret = btrfs_issue_discard(device->bdev, start, len, &bytes);
mutex_unlock(&fs_info->chunk_mutex);
@@ -11377,6 +11387,10 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
start += len;
*trimmed += bytes;
+ /* We've trimmed enough */
+ if (*trimmed >= range->len)
+ break;
+
if (fatal_signal_pending(current)) {
ret = -ERESTARTSYS;
break;
@@ -11460,8 +11474,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
mutex_lock(&fs_info->fs_devices->device_list_mutex);
devices = &fs_info->fs_devices->devices;
list_for_each_entry(device, devices, dev_list) {
- ret = btrfs_trim_free_extents(device, range->minlen,
- &group_trimmed);
+ ret = btrfs_trim_free_extents(device, range, &group_trimmed);
if (ret) {
dev_failed++;
dev_ret = ret;
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 537f38f019fa0b762dbb4c0fc95d7fcce9db8e2d Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov(a)suse.com>
Date: Thu, 14 Mar 2019 09:52:35 +0200
Subject: [PATCH] btrfs: Correctly free extent buffer in case
btree_read_extent_buffer_pages fails
If a an eb fails to be read for whatever reason - it's corrupted on disk
and parent transid/key validations fail or IO for eb pages fail then
this buffer must be removed from the buffer cache. Currently the code
calls free_extent_buffer if an error occurs. Unfortunately this doesn't
achieve the desired behavior since btrfs_find_create_tree_block returns
with eb->refs == 2.
On the other hand free_extent_buffer will only decrement the refs once
leaving it added to the buffer cache radix tree. This enables later
code to look up the buffer from the cache and utilize it potentially
leading to a crash.
The correct way to free the buffer is call free_extent_buffer_stale.
This function will correctly call atomic_dec explicitly for the buffer
and subsequently call release_extent_buffer which will decrement the
final reference thus correctly remove the invalid buffer from buffer
cache. This change affects only newly allocated buffers since they have
eb->refs == 2.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=202755
Reported-by: Jungyeon <jungyeon(a)gatech.edu>
CC: stable(a)vger.kernel.org # 4.4+
Signed-off-by: Nikolay Borisov <nborisov(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 46b368d84aa3..ea44cf136131 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1021,13 +1021,18 @@ void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr)
{
struct extent_buffer *buf = NULL;
struct inode *btree_inode = fs_info->btree_inode;
+ int ret;
buf = btrfs_find_create_tree_block(fs_info, bytenr);
if (IS_ERR(buf))
return;
- read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
- buf, WAIT_NONE, 0);
- free_extent_buffer(buf);
+
+ ret = read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf,
+ WAIT_NONE, 0);
+ if (ret < 0)
+ free_extent_buffer_stale(buf);
+ else
+ free_extent_buffer(buf);
}
int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
@@ -1047,12 +1052,12 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK,
mirror_num);
if (ret) {
- free_extent_buffer(buf);
+ free_extent_buffer_stale(buf);
return ret;
}
if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) {
- free_extent_buffer(buf);
+ free_extent_buffer_stale(buf);
return -EIO;
} else if (extent_buffer_uptodate(buf)) {
*eb = buf;
@@ -1106,7 +1111,7 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid,
level, first_key);
if (ret) {
- free_extent_buffer(buf);
+ free_extent_buffer_stale(buf);
return ERR_PTR(ret);
}
return buf;
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 537f38f019fa0b762dbb4c0fc95d7fcce9db8e2d Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov(a)suse.com>
Date: Thu, 14 Mar 2019 09:52:35 +0200
Subject: [PATCH] btrfs: Correctly free extent buffer in case
btree_read_extent_buffer_pages fails
If a an eb fails to be read for whatever reason - it's corrupted on disk
and parent transid/key validations fail or IO for eb pages fail then
this buffer must be removed from the buffer cache. Currently the code
calls free_extent_buffer if an error occurs. Unfortunately this doesn't
achieve the desired behavior since btrfs_find_create_tree_block returns
with eb->refs == 2.
On the other hand free_extent_buffer will only decrement the refs once
leaving it added to the buffer cache radix tree. This enables later
code to look up the buffer from the cache and utilize it potentially
leading to a crash.
The correct way to free the buffer is call free_extent_buffer_stale.
This function will correctly call atomic_dec explicitly for the buffer
and subsequently call release_extent_buffer which will decrement the
final reference thus correctly remove the invalid buffer from buffer
cache. This change affects only newly allocated buffers since they have
eb->refs == 2.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=202755
Reported-by: Jungyeon <jungyeon(a)gatech.edu>
CC: stable(a)vger.kernel.org # 4.4+
Signed-off-by: Nikolay Borisov <nborisov(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 46b368d84aa3..ea44cf136131 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1021,13 +1021,18 @@ void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr)
{
struct extent_buffer *buf = NULL;
struct inode *btree_inode = fs_info->btree_inode;
+ int ret;
buf = btrfs_find_create_tree_block(fs_info, bytenr);
if (IS_ERR(buf))
return;
- read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
- buf, WAIT_NONE, 0);
- free_extent_buffer(buf);
+
+ ret = read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf,
+ WAIT_NONE, 0);
+ if (ret < 0)
+ free_extent_buffer_stale(buf);
+ else
+ free_extent_buffer(buf);
}
int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
@@ -1047,12 +1052,12 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK,
mirror_num);
if (ret) {
- free_extent_buffer(buf);
+ free_extent_buffer_stale(buf);
return ret;
}
if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) {
- free_extent_buffer(buf);
+ free_extent_buffer_stale(buf);
return -EIO;
} else if (extent_buffer_uptodate(buf)) {
*eb = buf;
@@ -1106,7 +1111,7 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid,
level, first_key);
if (ret) {
- free_extent_buffer(buf);
+ free_extent_buffer_stale(buf);
return ERR_PTR(ret);
}
return buf;
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 537f38f019fa0b762dbb4c0fc95d7fcce9db8e2d Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov(a)suse.com>
Date: Thu, 14 Mar 2019 09:52:35 +0200
Subject: [PATCH] btrfs: Correctly free extent buffer in case
btree_read_extent_buffer_pages fails
If a an eb fails to be read for whatever reason - it's corrupted on disk
and parent transid/key validations fail or IO for eb pages fail then
this buffer must be removed from the buffer cache. Currently the code
calls free_extent_buffer if an error occurs. Unfortunately this doesn't
achieve the desired behavior since btrfs_find_create_tree_block returns
with eb->refs == 2.
On the other hand free_extent_buffer will only decrement the refs once
leaving it added to the buffer cache radix tree. This enables later
code to look up the buffer from the cache and utilize it potentially
leading to a crash.
The correct way to free the buffer is call free_extent_buffer_stale.
This function will correctly call atomic_dec explicitly for the buffer
and subsequently call release_extent_buffer which will decrement the
final reference thus correctly remove the invalid buffer from buffer
cache. This change affects only newly allocated buffers since they have
eb->refs == 2.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=202755
Reported-by: Jungyeon <jungyeon(a)gatech.edu>
CC: stable(a)vger.kernel.org # 4.4+
Signed-off-by: Nikolay Borisov <nborisov(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 46b368d84aa3..ea44cf136131 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1021,13 +1021,18 @@ void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr)
{
struct extent_buffer *buf = NULL;
struct inode *btree_inode = fs_info->btree_inode;
+ int ret;
buf = btrfs_find_create_tree_block(fs_info, bytenr);
if (IS_ERR(buf))
return;
- read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
- buf, WAIT_NONE, 0);
- free_extent_buffer(buf);
+
+ ret = read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf,
+ WAIT_NONE, 0);
+ if (ret < 0)
+ free_extent_buffer_stale(buf);
+ else
+ free_extent_buffer(buf);
}
int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
@@ -1047,12 +1052,12 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK,
mirror_num);
if (ret) {
- free_extent_buffer(buf);
+ free_extent_buffer_stale(buf);
return ret;
}
if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) {
- free_extent_buffer(buf);
+ free_extent_buffer_stale(buf);
return -EIO;
} else if (extent_buffer_uptodate(buf)) {
*eb = buf;
@@ -1106,7 +1111,7 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid,
level, first_key);
if (ret) {
- free_extent_buffer(buf);
+ free_extent_buffer_stale(buf);
return ERR_PTR(ret);
}
return buf;
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 7bc04c5c2cc467c5b40f2b03ba08da174a0d5fa7 Mon Sep 17 00:00:00 2001
From: Barret Rhoden <brho(a)google.com>
Date: Thu, 25 Apr 2019 11:55:50 -0400
Subject: [PATCH] ext4: fix use-after-free race with debug_want_extra_isize
When remounting with debug_want_extra_isize, we were not performing the
same checks that we do during a normal mount. That allowed us to set a
value for s_want_extra_isize that reached outside the s_inode_size.
Fixes: e2b911c53584 ("ext4: clean up feature test macros with predicate functions")
Reported-by: syzbot+f584efa0ac7213c226b7(a)syzkaller.appspotmail.com
Reviewed-by: Jan Kara <jack(a)suse.cz>
Signed-off-by: Barret Rhoden <brho(a)google.com>
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Cc: stable(a)vger.kernel.org
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6ed4eb81e674..184944d4d8d1 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3513,6 +3513,37 @@ int ext4_calculate_overhead(struct super_block *sb)
return 0;
}
+static void ext4_clamp_want_extra_isize(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es = sbi->s_es;
+
+ /* determine the minimum size of new large inodes, if present */
+ if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE &&
+ sbi->s_want_extra_isize == 0) {
+ sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
+ EXT4_GOOD_OLD_INODE_SIZE;
+ if (ext4_has_feature_extra_isize(sb)) {
+ if (sbi->s_want_extra_isize <
+ le16_to_cpu(es->s_want_extra_isize))
+ sbi->s_want_extra_isize =
+ le16_to_cpu(es->s_want_extra_isize);
+ if (sbi->s_want_extra_isize <
+ le16_to_cpu(es->s_min_extra_isize))
+ sbi->s_want_extra_isize =
+ le16_to_cpu(es->s_min_extra_isize);
+ }
+ }
+ /* Check if enough inode space is available */
+ if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
+ sbi->s_inode_size) {
+ sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
+ EXT4_GOOD_OLD_INODE_SIZE;
+ ext4_msg(sb, KERN_INFO,
+ "required extra inode space not available");
+ }
+}
+
static void ext4_set_resv_clusters(struct super_block *sb)
{
ext4_fsblk_t resv_clusters;
@@ -4387,30 +4418,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
} else if (ret)
goto failed_mount4a;
- /* determine the minimum size of new large inodes, if present */
- if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE &&
- sbi->s_want_extra_isize == 0) {
- sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
- EXT4_GOOD_OLD_INODE_SIZE;
- if (ext4_has_feature_extra_isize(sb)) {
- if (sbi->s_want_extra_isize <
- le16_to_cpu(es->s_want_extra_isize))
- sbi->s_want_extra_isize =
- le16_to_cpu(es->s_want_extra_isize);
- if (sbi->s_want_extra_isize <
- le16_to_cpu(es->s_min_extra_isize))
- sbi->s_want_extra_isize =
- le16_to_cpu(es->s_min_extra_isize);
- }
- }
- /* Check if enough inode space is available */
- if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
- sbi->s_inode_size) {
- sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
- EXT4_GOOD_OLD_INODE_SIZE;
- ext4_msg(sb, KERN_INFO, "required extra inode space not"
- "available");
- }
+ ext4_clamp_want_extra_isize(sb);
ext4_set_resv_clusters(sb);
@@ -5194,6 +5202,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
+ ext4_clamp_want_extra_isize(sb);
+
if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
test_opt(sb, JOURNAL_CHECKSUM)) {
ext4_msg(sb, KERN_ERR, "changing journal_checksum "
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 7bc04c5c2cc467c5b40f2b03ba08da174a0d5fa7 Mon Sep 17 00:00:00 2001
From: Barret Rhoden <brho(a)google.com>
Date: Thu, 25 Apr 2019 11:55:50 -0400
Subject: [PATCH] ext4: fix use-after-free race with debug_want_extra_isize
When remounting with debug_want_extra_isize, we were not performing the
same checks that we do during a normal mount. That allowed us to set a
value for s_want_extra_isize that reached outside the s_inode_size.
Fixes: e2b911c53584 ("ext4: clean up feature test macros with predicate functions")
Reported-by: syzbot+f584efa0ac7213c226b7(a)syzkaller.appspotmail.com
Reviewed-by: Jan Kara <jack(a)suse.cz>
Signed-off-by: Barret Rhoden <brho(a)google.com>
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Cc: stable(a)vger.kernel.org
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6ed4eb81e674..184944d4d8d1 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3513,6 +3513,37 @@ int ext4_calculate_overhead(struct super_block *sb)
return 0;
}
+static void ext4_clamp_want_extra_isize(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es = sbi->s_es;
+
+ /* determine the minimum size of new large inodes, if present */
+ if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE &&
+ sbi->s_want_extra_isize == 0) {
+ sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
+ EXT4_GOOD_OLD_INODE_SIZE;
+ if (ext4_has_feature_extra_isize(sb)) {
+ if (sbi->s_want_extra_isize <
+ le16_to_cpu(es->s_want_extra_isize))
+ sbi->s_want_extra_isize =
+ le16_to_cpu(es->s_want_extra_isize);
+ if (sbi->s_want_extra_isize <
+ le16_to_cpu(es->s_min_extra_isize))
+ sbi->s_want_extra_isize =
+ le16_to_cpu(es->s_min_extra_isize);
+ }
+ }
+ /* Check if enough inode space is available */
+ if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
+ sbi->s_inode_size) {
+ sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
+ EXT4_GOOD_OLD_INODE_SIZE;
+ ext4_msg(sb, KERN_INFO,
+ "required extra inode space not available");
+ }
+}
+
static void ext4_set_resv_clusters(struct super_block *sb)
{
ext4_fsblk_t resv_clusters;
@@ -4387,30 +4418,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
} else if (ret)
goto failed_mount4a;
- /* determine the minimum size of new large inodes, if present */
- if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE &&
- sbi->s_want_extra_isize == 0) {
- sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
- EXT4_GOOD_OLD_INODE_SIZE;
- if (ext4_has_feature_extra_isize(sb)) {
- if (sbi->s_want_extra_isize <
- le16_to_cpu(es->s_want_extra_isize))
- sbi->s_want_extra_isize =
- le16_to_cpu(es->s_want_extra_isize);
- if (sbi->s_want_extra_isize <
- le16_to_cpu(es->s_min_extra_isize))
- sbi->s_want_extra_isize =
- le16_to_cpu(es->s_min_extra_isize);
- }
- }
- /* Check if enough inode space is available */
- if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
- sbi->s_inode_size) {
- sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
- EXT4_GOOD_OLD_INODE_SIZE;
- ext4_msg(sb, KERN_INFO, "required extra inode space not"
- "available");
- }
+ ext4_clamp_want_extra_isize(sb);
ext4_set_resv_clusters(sb);
@@ -5194,6 +5202,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
+ ext4_clamp_want_extra_isize(sb);
+
if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
test_opt(sb, JOURNAL_CHECKSUM)) {
ext4_msg(sb, KERN_ERR, "changing journal_checksum "
Hello,
We ran automated tests on a patchset that was proposed for merging into this
kernel tree. The patches were applied to:
Kernel repo: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
Commit: 7cb9c5d341b9 - Linux 5.1.3
The results of these automated tests are provided below.
Overall result: FAILED (see details below)
Merge: OK
Compile: OK
Tests: FAILED
One or more kernel tests failed:
We hope that these logs can help you find the problem quickly. For the full
detail on our testing procedures, please scroll to the bottom of this message.
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Merge testing
-------------
We cloned this repository and checked out the following commit:
Repo: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
Commit: 7cb9c5d341b9 - Linux 5.1.3
We then merged the patchset with `git am`:
locking-rwsem-prevent-decrement-of-reader-count-befo.patch
x86-speculation-mds-revert-cpu-buffer-clear-on-double-fault-exit.patch
x86-speculation-mds-improve-cpu-buffer-clear-documentation.patch
objtool-fix-function-fallthrough-detection.patch
arm64-dts-rockchip-fix-io-domain-voltage-setting-of-apio5-on-rockpro64.patch
arm64-dts-rockchip-disable-dcmds-on-rk3399-s-emmc-controller.patch
arm-dts-qcom-ipq4019-enlarge-pcie-bar-range.patch
arm-dts-exynos-fix-interrupt-for-shared-eints-on-exynos5260.patch
arm-dts-exynos-fix-audio-routing-on-odroid-xu3.patch
arm-dts-exynos-fix-audio-microphone-routing-on-odroid-xu3.patch
mmc-sdhci-of-arasan-add-dts-property-to-disable-dcmds.patch
arm-exynos-fix-a-leaked-reference-by-adding-missing-of_node_put.patch
power-supply-axp288_charger-fix-unchecked-return-value.patch
power-supply-axp288_fuel_gauge-add-acepc-t8-and-t11-mini-pcs-to-the-blacklist.patch
arm64-mmap-ensure-file-offset-is-treated-as-unsigned.patch
arm64-arch_timer-ensure-counter-register-reads-occur-with-seqlock-held.patch
arm64-compat-reduce-address-limit.patch
arm64-clear-osdlr_el1-on-cpu-boot.patch
arm64-save-and-restore-osdlr_el1-across-suspend-resume.patch
sched-x86-save-flags-on-context-switch.patch
x86-mce-add-an-mce-record-filtering-function.patch
x86-mce-amd-don-t-report-l1-btb-mca-errors-on-some-family-17h-models.patch
crypto-crypto4xx-fix-ctr-aes-missing-output-iv.patch
crypto-crypto4xx-fix-cfb-and-ofb-overran-dst-buffer-issues.patch
crypto-salsa20-don-t-access-already-freed-walk.iv.patch
crypto-lrw-don-t-access-already-freed-walk.iv.patch
crypto-chacha-generic-fix-use-as-arm64-no-neon-fallback.patch
crypto-chacha20poly1305-set-cra_name-correctly.patch
crypto-ccm-fix-incompatibility-between-ccm-and-ccm_base.patch
crypto-ccp-do-not-free-psp_master-when-platform_init-fails.patch
crypto-vmx-fix-copy-paste-error-in-ctr-mode.patch
crypto-skcipher-don-t-warn-on-unprocessed-data-after-slow-walk-step.patch
crypto-crct10dif-generic-fix-use-via-crypto_shash_digest.patch
crypto-x86-crct10dif-pcl-fix-use-via-crypto_shash_digest.patch
crypto-arm64-gcm-aes-ce-fix-no-neon-fallback-code.patch
crypto-gcm-fix-incompatibility-between-gcm-and-gcm_base.patch
crypto-rockchip-update-iv-buffer-to-contain-the-next-iv.patch
crypto-caam-qi2-fix-zero-length-buffer-dma-mapping.patch
crypto-caam-qi2-fix-dma-mapping-of-stack-memory.patch
crypto-caam-qi2-generate-hash-keys-in-place.patch
crypto-arm-aes-neonbs-don-t-access-already-freed-walk.iv.patch
crypto-arm64-aes-neonbs-don-t-access-already-freed-walk.iv.patch
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
build options: -j25 INSTALL_MOD_STRIP=1 targz-pkg
configuration: https://artifacts.cki-project.org/builds/aarch64/kernel-stable_queue_5.1-aa…
kernel build: https://artifacts.cki-project.org/builds/aarch64/kernel-stable_queue_5.1-aa…
ppc64le:
build options: -j25 INSTALL_MOD_STRIP=1 targz-pkg
configuration: https://artifacts.cki-project.org/builds/ppc64le/kernel-stable_queue_5.1-pp…
kernel build: https://artifacts.cki-project.org/builds/ppc64le/kernel-stable_queue_5.1-pp…
s390x:
build options: -j25 INSTALL_MOD_STRIP=1 targz-pkg
configuration: https://artifacts.cki-project.org/builds/s390x/kernel-stable_queue_5.1-s390…
kernel build: https://artifacts.cki-project.org/builds/s390x/kernel-stable_queue_5.1-s390…
x86_64:
build options: -j25 INSTALL_MOD_STRIP=1 targz-pkg
configuration: https://artifacts.cki-project.org/builds/x86_64/kernel-stable_queue_5.1-x86…
kernel build: https://artifacts.cki-project.org/builds/x86_64/kernel-stable_queue_5.1-x86…
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
✅ Boot test [0]
✅ LTP lite [1]
✅ AMTU (Abstract Machine Test Utility) [2]
✅ audit: audit testsuite test [3]
✅ httpd: mod_ssl smoke sanity [4]
✅ iotop: sanity [5]
✅ tuned: tune-processes-through-perf [6]
✅ Usex - version 1.9-29 [7]
✅ stress: stress-ng [8]
✅ Boot test [0]
✅ selinux-policy: serge-testsuite [9]
ppc64le:
✅ Boot test [0]
✅ LTP lite [1]
✅ AMTU (Abstract Machine Test Utility) [2]
✅ audit: audit testsuite test [3]
✅ httpd: mod_ssl smoke sanity [4]
✅ iotop: sanity [5]
✅ tuned: tune-processes-through-perf [6]
✅ Usex - version 1.9-29 [7]
✅ stress: stress-ng [8]
✅ Boot test [0]
✅ selinux-policy: serge-testsuite [9]
s390x:
✅ Boot test [0]
✅ LTP lite [1]
✅ audit: audit testsuite test [3]
✅ httpd: mod_ssl smoke sanity [4]
✅ iotop: sanity [5]
✅ tuned: tune-processes-through-perf [6]
✅ Usex - version 1.9-29 [7]
✅ stress: stress-ng [8]
✅ Boot test [0]
✅ selinux-policy: serge-testsuite [9]
x86_64:
✅ Boot test [0]
✅ selinux-policy: serge-testsuite [9]
✅ Boot test [0]
✅ LTP lite [1]
✅ AMTU (Abstract Machine Test Utility) [2]
✅ audit: audit testsuite test [3]
✅ httpd: mod_ssl smoke sanity [4]
✅ iotop: sanity [5]
✅ tuned: tune-processes-through-perf [6]
✅ Usex - version 1.9-29 [7]
✅ stress: stress-ng [8]
Test source:
[0]: https://github.com/CKI-project/tests-beaker/archive/master.zip#distribution…
[1]: https://github.com/CKI-project/tests-beaker/archive/master.zip#distribution…
[2]: https://github.com/CKI-project/tests-beaker/archive/master.zip#misc/amtu
[3]: https://github.com/CKI-project/tests-beaker/archive/master.zip#packages/aud…
[4]: https://github.com/CKI-project/tests-beaker/archive/master.zip#packages/htt…
[5]: https://github.com/CKI-project/tests-beaker/archive/master.zip#packages/iot…
[6]: https://github.com/CKI-project/tests-beaker/archive/master.zip#packages/tun…
[7]: https://github.com/CKI-project/tests-beaker/archive/master.zip#standards/us…
[8]: https://github.com/CKI-project/tests-beaker/archive/master.zip#stress/stres…
[9]: https://github.com/CKI-project/tests-beaker/archive/master.zip#/packages/se…