From: Kees Cook <keescook(a)chromium.org>
commit 398d5843c03261a2b68730f2f00643826bcec6ba upstream.
The kernel is globally removing the ambiguous 0-length and 1-element
arrays in favor of flexible arrays, so that we can gain both compile-time
and run-time array bounds checking[1].
While struct fealist is defined as a "fake" flexible array (via a
1-element array), it is only used for examination of the first array
element. Walking the list is performed separately, so there is no reason
to treat the "list" member of struct fealist as anything other than a
single entry. Adjust the struct and code to match.
Additionally, struct fea uses the "name" member either as a dynamic
string, or is manually calculated from the start of the struct. Redefine
the member as a flexible array.
No machine code output differences are produced after these changes.
[1] For lots of details, see both:
https://docs.kernel.org/process/deprecated.html#zero-length-and-one-element…https://people.kernel.org/kees/bounded-flexible-arrays-in-c
Cc: Steve French <sfrench(a)samba.org>
Cc: Paulo Alcantara <pc(a)cjr.nz>
Cc: Ronnie Sahlberg <lsahlber(a)redhat.com>
Cc: Shyam Prasad N <sprasad(a)microsoft.com>
Cc: Tom Talpey <tom(a)talpey.com>
Cc: linux-cifs(a)vger.kernel.org
Cc: samba-technical(a)lists.samba.org
Signed-off-by: Kees Cook <keescook(a)chromium.org>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
[ vt: Tested to not break build on x86_64 over v6.1.78. Bug report at [1]. ]
Link: https://lore.kernel.org/all/qjyfz2xftsbch6aozgplxyjfyqnuhn7j44udrucls4pqa5e…
Cc: stable(a)vger.kernel.org # 6.1
Signed-off-by: Vitaly Chikunov <vt(a)altlinux.org>
---
fs/smb/client/cifspdu.h | 4 ++--
fs/smb/client/cifssmb.c | 16 ++++++++--------
2 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h
index 97bb1838555b..96ed0a4a2ce2 100644
--- a/fs/smb/client/cifspdu.h
+++ b/fs/smb/client/cifspdu.h
@@ -2593,7 +2593,7 @@ struct fea {
unsigned char EA_flags;
__u8 name_len;
__le16 value_len;
- char name[1];
+ char name[];
/* optionally followed by value */
} __attribute__((packed));
/* flags for _FEA.fEA */
@@ -2601,7 +2601,7 @@ struct fea {
struct fealist {
__le32 list_len;
- struct fea list[1];
+ struct fea list;
} __attribute__((packed));
/* used to hold an arbitrary blob of data */
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index 67c5fc2b2db9..784fc5ba2c44 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -5697,7 +5697,7 @@ CIFSSMBQAllEAs(const unsigned int xid, struct cifs_tcon *tcon,
/* account for ea list len */
list_len -= 4;
- temp_fea = ea_response_data->list;
+ temp_fea = &ea_response_data->list;
temp_ptr = (char *)temp_fea;
while (list_len > 0) {
unsigned int name_len;
@@ -5812,7 +5812,7 @@ CIFSSMBSetEA(const unsigned int xid, struct cifs_tcon *tcon,
else
name_len = strnlen(ea_name, 255);
- count = sizeof(*parm_data) + ea_value_len + name_len;
+ count = sizeof(*parm_data) + 1 + ea_value_len + name_len;
pSMB->MaxParameterCount = cpu_to_le16(2);
/* BB find max SMB PDU from sess */
pSMB->MaxDataCount = cpu_to_le16(1000);
@@ -5836,14 +5836,14 @@ CIFSSMBSetEA(const unsigned int xid, struct cifs_tcon *tcon,
byte_count = 3 /* pad */ + params + count;
pSMB->DataCount = cpu_to_le16(count);
parm_data->list_len = cpu_to_le32(count);
- parm_data->list[0].EA_flags = 0;
+ parm_data->list.EA_flags = 0;
/* we checked above that name len is less than 255 */
- parm_data->list[0].name_len = (__u8)name_len;
+ parm_data->list.name_len = (__u8)name_len;
/* EA names are always ASCII */
if (ea_name)
- strncpy(parm_data->list[0].name, ea_name, name_len);
- parm_data->list[0].name[name_len] = 0;
- parm_data->list[0].value_len = cpu_to_le16(ea_value_len);
+ strncpy(parm_data->list.name, ea_name, name_len);
+ parm_data->list.name[name_len] = '\0';
+ parm_data->list.value_len = cpu_to_le16(ea_value_len);
/* caller ensures that ea_value_len is less than 64K but
we need to ensure that it fits within the smb */
@@ -5851,7 +5851,7 @@ CIFSSMBSetEA(const unsigned int xid, struct cifs_tcon *tcon,
negotiated SMB buffer size BB */
/* if (ea_value_len > buffer_size - 512 (enough for header)) */
if (ea_value_len)
- memcpy(parm_data->list[0].name+name_len+1,
+ memcpy(parm_data->list.name + name_len + 1,
ea_value, ea_value_len);
pSMB->TotalDataCount = pSMB->DataCount;
--
2.42.1
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 5571e41ec6e56e35f34ae9f5b3a335ef510e0ade
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024021802-crunchy-presoak-d1f4@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
5571e41ec6e5 ("btrfs: don't drop extent_map for free space inode on write error")
4c0c8cfc8433 ("btrfs: move btrfs_drop_extent_cache() to extent_map.c")
cef7820d6abf ("btrfs: fix missed extent on fsync after dropping extent maps")
570eb97bace8 ("btrfs: unify the lock/unlock extent variants")
dbbf49928f2e ("btrfs: remove the wake argument from clear_extent_bits")
e3974c669472 ("btrfs: move core extent_io_tree functions to extent-io-tree.c")
38830018387e ("btrfs: move a few exported extent_io_tree helpers to extent-io-tree.c")
04eba8932392 ("btrfs: temporarily export and then move extent state helpers")
91af24e48474 ("btrfs: temporarily export and move core extent_io_tree tree functions")
6962541e964f ("btrfs: move btrfs_debug_check_extent_io_range into extent-io-tree.c")
ec39e39bbf97 ("btrfs: export wait_extent_bit")
a66318872c41 ("btrfs: move simple extent bit helpers out of extent_io.c")
ad795329574c ("btrfs: convert BUG_ON(EXTENT_BIT_LOCKED) checks to ASSERT's")
83cf709a89fb ("btrfs: move extent state init and alloc functions to their own file")
c45379a20fbc ("btrfs: temporarily export alloc_extent_state helpers")
a40246e8afc0 ("btrfs: separate out the eb and extent state leak helpers")
a62a3bd9546b ("btrfs: separate out the extent state and extent buffer init code")
87c11705cc94 ("btrfs: convert the io_failure_tree to a plain rb_tree")
a2061748052c ("btrfs: unexport internal failrec functions")
0d0a762c419a ("btrfs: rename clean_io_failure and remove extraneous args")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5571e41ec6e56e35f34ae9f5b3a335ef510e0ade Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef(a)toxicpanda.com>
Date: Wed, 31 Jan 2024 14:27:25 -0500
Subject: [PATCH] btrfs: don't drop extent_map for free space inode on write
error
While running the CI for an unrelated change I hit the following panic
with generic/648 on btrfs_holes_spacecache.
assertion failed: block_start != EXTENT_MAP_HOLE, in fs/btrfs/extent_io.c:1385
------------[ cut here ]------------
kernel BUG at fs/btrfs/extent_io.c:1385!
invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
CPU: 1 PID: 2695096 Comm: fsstress Kdump: loaded Tainted: G W 6.8.0-rc2+ #1
RIP: 0010:__extent_writepage_io.constprop.0+0x4c1/0x5c0
Call Trace:
<TASK>
extent_write_cache_pages+0x2ac/0x8f0
extent_writepages+0x87/0x110
do_writepages+0xd5/0x1f0
filemap_fdatawrite_wbc+0x63/0x90
__filemap_fdatawrite_range+0x5c/0x80
btrfs_fdatawrite_range+0x1f/0x50
btrfs_write_out_cache+0x507/0x560
btrfs_write_dirty_block_groups+0x32a/0x420
commit_cowonly_roots+0x21b/0x290
btrfs_commit_transaction+0x813/0x1360
btrfs_sync_file+0x51a/0x640
__x64_sys_fdatasync+0x52/0x90
do_syscall_64+0x9c/0x190
entry_SYSCALL_64_after_hwframe+0x6e/0x76
This happens because we fail to write out the free space cache in one
instance, come back around and attempt to write it again. However on
the second pass through we go to call btrfs_get_extent() on the inode to
get the extent mapping. Because this is a new block group, and with the
free space inode we always search the commit root to avoid deadlocking
with the tree, we find nothing and return a EXTENT_MAP_HOLE for the
requested range.
This happens because the first time we try to write the space cache out
we hit an error, and on an error we drop the extent mapping. This is
normal for normal files, but the free space cache inode is special. We
always expect the extent map to be correct. Thus the second time
through we end up with a bogus extent map.
Since we're deprecating this feature, the most straightforward way to
fix this is to simply skip dropping the extent map range for this failed
range.
I shortened the test by using error injection to stress the area to make
it easier to reproduce. With this patch in place we no longer panic
with my error injection test.
CC: stable(a)vger.kernel.org # 4.14+
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7bcc1c03437a..d232eca1bbee 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3184,8 +3184,23 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
unwritten_start += logical_len;
clear_extent_uptodate(io_tree, unwritten_start, end, NULL);
- /* Drop extent maps for the part of the extent we didn't write. */
- btrfs_drop_extent_map_range(inode, unwritten_start, end, false);
+ /*
+ * Drop extent maps for the part of the extent we didn't write.
+ *
+ * We have an exception here for the free_space_inode, this is
+ * because when we do btrfs_get_extent() on the free space inode
+ * we will search the commit root. If this is a new block group
+ * we won't find anything, and we will trip over the assert in
+ * writepage where we do ASSERT(em->block_start !=
+ * EXTENT_MAP_HOLE).
+ *
+ * Theoretically we could also skip this for any NOCOW extent as
+ * we don't mess with the extent map tree in the NOCOW case, but
+ * for now simply skip this if we are the free space inode.
+ */
+ if (!btrfs_is_free_space_inode(inode))
+ btrfs_drop_extent_map_range(inode, unwritten_start,
+ end, false);
/*
* If the ordered extent had an IOERR or something else went
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 5571e41ec6e56e35f34ae9f5b3a335ef510e0ade
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024021800-willed-chug-3616@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
5571e41ec6e5 ("btrfs: don't drop extent_map for free space inode on write error")
4c0c8cfc8433 ("btrfs: move btrfs_drop_extent_cache() to extent_map.c")
cef7820d6abf ("btrfs: fix missed extent on fsync after dropping extent maps")
570eb97bace8 ("btrfs: unify the lock/unlock extent variants")
dbbf49928f2e ("btrfs: remove the wake argument from clear_extent_bits")
e3974c669472 ("btrfs: move core extent_io_tree functions to extent-io-tree.c")
38830018387e ("btrfs: move a few exported extent_io_tree helpers to extent-io-tree.c")
04eba8932392 ("btrfs: temporarily export and then move extent state helpers")
91af24e48474 ("btrfs: temporarily export and move core extent_io_tree tree functions")
6962541e964f ("btrfs: move btrfs_debug_check_extent_io_range into extent-io-tree.c")
ec39e39bbf97 ("btrfs: export wait_extent_bit")
a66318872c41 ("btrfs: move simple extent bit helpers out of extent_io.c")
ad795329574c ("btrfs: convert BUG_ON(EXTENT_BIT_LOCKED) checks to ASSERT's")
83cf709a89fb ("btrfs: move extent state init and alloc functions to their own file")
c45379a20fbc ("btrfs: temporarily export alloc_extent_state helpers")
a40246e8afc0 ("btrfs: separate out the eb and extent state leak helpers")
a62a3bd9546b ("btrfs: separate out the extent state and extent buffer init code")
87c11705cc94 ("btrfs: convert the io_failure_tree to a plain rb_tree")
a2061748052c ("btrfs: unexport internal failrec functions")
0d0a762c419a ("btrfs: rename clean_io_failure and remove extraneous args")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5571e41ec6e56e35f34ae9f5b3a335ef510e0ade Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef(a)toxicpanda.com>
Date: Wed, 31 Jan 2024 14:27:25 -0500
Subject: [PATCH] btrfs: don't drop extent_map for free space inode on write
error
While running the CI for an unrelated change I hit the following panic
with generic/648 on btrfs_holes_spacecache.
assertion failed: block_start != EXTENT_MAP_HOLE, in fs/btrfs/extent_io.c:1385
------------[ cut here ]------------
kernel BUG at fs/btrfs/extent_io.c:1385!
invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
CPU: 1 PID: 2695096 Comm: fsstress Kdump: loaded Tainted: G W 6.8.0-rc2+ #1
RIP: 0010:__extent_writepage_io.constprop.0+0x4c1/0x5c0
Call Trace:
<TASK>
extent_write_cache_pages+0x2ac/0x8f0
extent_writepages+0x87/0x110
do_writepages+0xd5/0x1f0
filemap_fdatawrite_wbc+0x63/0x90
__filemap_fdatawrite_range+0x5c/0x80
btrfs_fdatawrite_range+0x1f/0x50
btrfs_write_out_cache+0x507/0x560
btrfs_write_dirty_block_groups+0x32a/0x420
commit_cowonly_roots+0x21b/0x290
btrfs_commit_transaction+0x813/0x1360
btrfs_sync_file+0x51a/0x640
__x64_sys_fdatasync+0x52/0x90
do_syscall_64+0x9c/0x190
entry_SYSCALL_64_after_hwframe+0x6e/0x76
This happens because we fail to write out the free space cache in one
instance, come back around and attempt to write it again. However on
the second pass through we go to call btrfs_get_extent() on the inode to
get the extent mapping. Because this is a new block group, and with the
free space inode we always search the commit root to avoid deadlocking
with the tree, we find nothing and return a EXTENT_MAP_HOLE for the
requested range.
This happens because the first time we try to write the space cache out
we hit an error, and on an error we drop the extent mapping. This is
normal for normal files, but the free space cache inode is special. We
always expect the extent map to be correct. Thus the second time
through we end up with a bogus extent map.
Since we're deprecating this feature, the most straightforward way to
fix this is to simply skip dropping the extent map range for this failed
range.
I shortened the test by using error injection to stress the area to make
it easier to reproduce. With this patch in place we no longer panic
with my error injection test.
CC: stable(a)vger.kernel.org # 4.14+
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7bcc1c03437a..d232eca1bbee 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3184,8 +3184,23 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
unwritten_start += logical_len;
clear_extent_uptodate(io_tree, unwritten_start, end, NULL);
- /* Drop extent maps for the part of the extent we didn't write. */
- btrfs_drop_extent_map_range(inode, unwritten_start, end, false);
+ /*
+ * Drop extent maps for the part of the extent we didn't write.
+ *
+ * We have an exception here for the free_space_inode, this is
+ * because when we do btrfs_get_extent() on the free space inode
+ * we will search the commit root. If this is a new block group
+ * we won't find anything, and we will trip over the assert in
+ * writepage where we do ASSERT(em->block_start !=
+ * EXTENT_MAP_HOLE).
+ *
+ * Theoretically we could also skip this for any NOCOW extent as
+ * we don't mess with the extent map tree in the NOCOW case, but
+ * for now simply skip this if we are the free space inode.
+ */
+ if (!btrfs_is_free_space_inode(inode))
+ btrfs_drop_extent_map_range(inode, unwritten_start,
+ end, false);
/*
* If the ordered extent had an IOERR or something else went
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 5571e41ec6e56e35f34ae9f5b3a335ef510e0ade
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024021858-sharpie-diffusive-52e1@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
5571e41ec6e5 ("btrfs: don't drop extent_map for free space inode on write error")
4c0c8cfc8433 ("btrfs: move btrfs_drop_extent_cache() to extent_map.c")
cef7820d6abf ("btrfs: fix missed extent on fsync after dropping extent maps")
570eb97bace8 ("btrfs: unify the lock/unlock extent variants")
dbbf49928f2e ("btrfs: remove the wake argument from clear_extent_bits")
e3974c669472 ("btrfs: move core extent_io_tree functions to extent-io-tree.c")
38830018387e ("btrfs: move a few exported extent_io_tree helpers to extent-io-tree.c")
04eba8932392 ("btrfs: temporarily export and then move extent state helpers")
91af24e48474 ("btrfs: temporarily export and move core extent_io_tree tree functions")
6962541e964f ("btrfs: move btrfs_debug_check_extent_io_range into extent-io-tree.c")
ec39e39bbf97 ("btrfs: export wait_extent_bit")
a66318872c41 ("btrfs: move simple extent bit helpers out of extent_io.c")
ad795329574c ("btrfs: convert BUG_ON(EXTENT_BIT_LOCKED) checks to ASSERT's")
83cf709a89fb ("btrfs: move extent state init and alloc functions to their own file")
c45379a20fbc ("btrfs: temporarily export alloc_extent_state helpers")
a40246e8afc0 ("btrfs: separate out the eb and extent state leak helpers")
a62a3bd9546b ("btrfs: separate out the extent state and extent buffer init code")
87c11705cc94 ("btrfs: convert the io_failure_tree to a plain rb_tree")
a2061748052c ("btrfs: unexport internal failrec functions")
0d0a762c419a ("btrfs: rename clean_io_failure and remove extraneous args")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5571e41ec6e56e35f34ae9f5b3a335ef510e0ade Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef(a)toxicpanda.com>
Date: Wed, 31 Jan 2024 14:27:25 -0500
Subject: [PATCH] btrfs: don't drop extent_map for free space inode on write
error
While running the CI for an unrelated change I hit the following panic
with generic/648 on btrfs_holes_spacecache.
assertion failed: block_start != EXTENT_MAP_HOLE, in fs/btrfs/extent_io.c:1385
------------[ cut here ]------------
kernel BUG at fs/btrfs/extent_io.c:1385!
invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
CPU: 1 PID: 2695096 Comm: fsstress Kdump: loaded Tainted: G W 6.8.0-rc2+ #1
RIP: 0010:__extent_writepage_io.constprop.0+0x4c1/0x5c0
Call Trace:
<TASK>
extent_write_cache_pages+0x2ac/0x8f0
extent_writepages+0x87/0x110
do_writepages+0xd5/0x1f0
filemap_fdatawrite_wbc+0x63/0x90
__filemap_fdatawrite_range+0x5c/0x80
btrfs_fdatawrite_range+0x1f/0x50
btrfs_write_out_cache+0x507/0x560
btrfs_write_dirty_block_groups+0x32a/0x420
commit_cowonly_roots+0x21b/0x290
btrfs_commit_transaction+0x813/0x1360
btrfs_sync_file+0x51a/0x640
__x64_sys_fdatasync+0x52/0x90
do_syscall_64+0x9c/0x190
entry_SYSCALL_64_after_hwframe+0x6e/0x76
This happens because we fail to write out the free space cache in one
instance, come back around and attempt to write it again. However on
the second pass through we go to call btrfs_get_extent() on the inode to
get the extent mapping. Because this is a new block group, and with the
free space inode we always search the commit root to avoid deadlocking
with the tree, we find nothing and return a EXTENT_MAP_HOLE for the
requested range.
This happens because the first time we try to write the space cache out
we hit an error, and on an error we drop the extent mapping. This is
normal for normal files, but the free space cache inode is special. We
always expect the extent map to be correct. Thus the second time
through we end up with a bogus extent map.
Since we're deprecating this feature, the most straightforward way to
fix this is to simply skip dropping the extent map range for this failed
range.
I shortened the test by using error injection to stress the area to make
it easier to reproduce. With this patch in place we no longer panic
with my error injection test.
CC: stable(a)vger.kernel.org # 4.14+
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7bcc1c03437a..d232eca1bbee 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3184,8 +3184,23 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
unwritten_start += logical_len;
clear_extent_uptodate(io_tree, unwritten_start, end, NULL);
- /* Drop extent maps for the part of the extent we didn't write. */
- btrfs_drop_extent_map_range(inode, unwritten_start, end, false);
+ /*
+ * Drop extent maps for the part of the extent we didn't write.
+ *
+ * We have an exception here for the free_space_inode, this is
+ * because when we do btrfs_get_extent() on the free space inode
+ * we will search the commit root. If this is a new block group
+ * we won't find anything, and we will trip over the assert in
+ * writepage where we do ASSERT(em->block_start !=
+ * EXTENT_MAP_HOLE).
+ *
+ * Theoretically we could also skip this for any NOCOW extent as
+ * we don't mess with the extent map tree in the NOCOW case, but
+ * for now simply skip this if we are the free space inode.
+ */
+ if (!btrfs_is_free_space_inode(inode))
+ btrfs_drop_extent_map_range(inode, unwritten_start,
+ end, false);
/*
* If the ordered extent had an IOERR or something else went
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 5571e41ec6e56e35f34ae9f5b3a335ef510e0ade
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024021856-vigorous-supper-1ca1@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
5571e41ec6e5 ("btrfs: don't drop extent_map for free space inode on write error")
4c0c8cfc8433 ("btrfs: move btrfs_drop_extent_cache() to extent_map.c")
cef7820d6abf ("btrfs: fix missed extent on fsync after dropping extent maps")
570eb97bace8 ("btrfs: unify the lock/unlock extent variants")
dbbf49928f2e ("btrfs: remove the wake argument from clear_extent_bits")
e3974c669472 ("btrfs: move core extent_io_tree functions to extent-io-tree.c")
38830018387e ("btrfs: move a few exported extent_io_tree helpers to extent-io-tree.c")
04eba8932392 ("btrfs: temporarily export and then move extent state helpers")
91af24e48474 ("btrfs: temporarily export and move core extent_io_tree tree functions")
6962541e964f ("btrfs: move btrfs_debug_check_extent_io_range into extent-io-tree.c")
ec39e39bbf97 ("btrfs: export wait_extent_bit")
a66318872c41 ("btrfs: move simple extent bit helpers out of extent_io.c")
ad795329574c ("btrfs: convert BUG_ON(EXTENT_BIT_LOCKED) checks to ASSERT's")
83cf709a89fb ("btrfs: move extent state init and alloc functions to their own file")
c45379a20fbc ("btrfs: temporarily export alloc_extent_state helpers")
a40246e8afc0 ("btrfs: separate out the eb and extent state leak helpers")
a62a3bd9546b ("btrfs: separate out the extent state and extent buffer init code")
87c11705cc94 ("btrfs: convert the io_failure_tree to a plain rb_tree")
a2061748052c ("btrfs: unexport internal failrec functions")
0d0a762c419a ("btrfs: rename clean_io_failure and remove extraneous args")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5571e41ec6e56e35f34ae9f5b3a335ef510e0ade Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef(a)toxicpanda.com>
Date: Wed, 31 Jan 2024 14:27:25 -0500
Subject: [PATCH] btrfs: don't drop extent_map for free space inode on write
error
While running the CI for an unrelated change I hit the following panic
with generic/648 on btrfs_holes_spacecache.
assertion failed: block_start != EXTENT_MAP_HOLE, in fs/btrfs/extent_io.c:1385
------------[ cut here ]------------
kernel BUG at fs/btrfs/extent_io.c:1385!
invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
CPU: 1 PID: 2695096 Comm: fsstress Kdump: loaded Tainted: G W 6.8.0-rc2+ #1
RIP: 0010:__extent_writepage_io.constprop.0+0x4c1/0x5c0
Call Trace:
<TASK>
extent_write_cache_pages+0x2ac/0x8f0
extent_writepages+0x87/0x110
do_writepages+0xd5/0x1f0
filemap_fdatawrite_wbc+0x63/0x90
__filemap_fdatawrite_range+0x5c/0x80
btrfs_fdatawrite_range+0x1f/0x50
btrfs_write_out_cache+0x507/0x560
btrfs_write_dirty_block_groups+0x32a/0x420
commit_cowonly_roots+0x21b/0x290
btrfs_commit_transaction+0x813/0x1360
btrfs_sync_file+0x51a/0x640
__x64_sys_fdatasync+0x52/0x90
do_syscall_64+0x9c/0x190
entry_SYSCALL_64_after_hwframe+0x6e/0x76
This happens because we fail to write out the free space cache in one
instance, come back around and attempt to write it again. However on
the second pass through we go to call btrfs_get_extent() on the inode to
get the extent mapping. Because this is a new block group, and with the
free space inode we always search the commit root to avoid deadlocking
with the tree, we find nothing and return a EXTENT_MAP_HOLE for the
requested range.
This happens because the first time we try to write the space cache out
we hit an error, and on an error we drop the extent mapping. This is
normal for normal files, but the free space cache inode is special. We
always expect the extent map to be correct. Thus the second time
through we end up with a bogus extent map.
Since we're deprecating this feature, the most straightforward way to
fix this is to simply skip dropping the extent map range for this failed
range.
I shortened the test by using error injection to stress the area to make
it easier to reproduce. With this patch in place we no longer panic
with my error injection test.
CC: stable(a)vger.kernel.org # 4.14+
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7bcc1c03437a..d232eca1bbee 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3184,8 +3184,23 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
unwritten_start += logical_len;
clear_extent_uptodate(io_tree, unwritten_start, end, NULL);
- /* Drop extent maps for the part of the extent we didn't write. */
- btrfs_drop_extent_map_range(inode, unwritten_start, end, false);
+ /*
+ * Drop extent maps for the part of the extent we didn't write.
+ *
+ * We have an exception here for the free_space_inode, this is
+ * because when we do btrfs_get_extent() on the free space inode
+ * we will search the commit root. If this is a new block group
+ * we won't find anything, and we will trip over the assert in
+ * writepage where we do ASSERT(em->block_start !=
+ * EXTENT_MAP_HOLE).
+ *
+ * Theoretically we could also skip this for any NOCOW extent as
+ * we don't mess with the extent map tree in the NOCOW case, but
+ * for now simply skip this if we are the free space inode.
+ */
+ if (!btrfs_is_free_space_inode(inode))
+ btrfs_drop_extent_map_range(inode, unwritten_start,
+ end, false);
/*
* If the ordered extent had an IOERR or something else went
Hi,
there's been a bug in btrfs space reservation since 6.7 that is now affecting
quite some users. I'd like to ask to add the fix right after it got merged to
Linus' tree so it can possibly be released in 6.7.5.
All apply cleanly on top of current 6.7.x tree. Thanks.
1693d5442c458ae8d5b0d58463b873cd879569ed
f4a9f219411f318ae60d6ff7f129082a75686c6c
12c5128f101bfa47a08e4c0e1a75cfa2d0872bcd
2f6397e448e689adf57e6788c90f913abd7e1af8
Short ids with subjects:
1693d5442c45 btrfs: add and use helper to check if block group is used
f4a9f219411f btrfs: do not delete unused block group if it may be used soon
12c5128f101b btrfs: add new unused block groups to the list of unused block groups
2f6397e448e6 btrfs: don't refill whole delayed refs block reserve when starting transaction
[adding the stable team]
On 05.02.24 18:07, Yang Shi wrote:
> On Sat, Feb 3, 2024 at 1:24 AM Thorsten Leemhuis
> <regressions(a)leemhuis.info> wrote:
>> On 18.01.24 14:35, Yang Shi wrote:
>>>
>>> The commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP
>>> boundaries") caused two issues [1] [2] reported on 32 bit system or compat
>>> userspace.
>>>
>>> It doesn't make too much sense to force huge page alignment on 32 bit
>>> system due to the constrained virtual address space.
>>>
>>> [1] https://lore.kernel.org/linux-mm/CAHbLzkqa1SCBA10yjWTtA2mKCsoK5+M1BthSDL8RO…
>>> [2] https://lore.kernel.org/linux-mm/CAHbLzkqa1SCBA10yjWTtA2mKCsoK5+M1BthSDL8RO…
>>
>> [FWIW, this is now 4ef9ad19e17676 ("mm: huge_memory: don't force huge
>> page alignment on 32 bit") in mainline]
>>
>> Quick question: it it okay to ask Greg to pick this up for linux-6.7.y
>> series?
>
> Yes, definitely. Thanks for following up.
In that case: Greg, could you please consider picking up 4ef9ad19e17676
("mm: huge_memory: don't force huge page alignment on 32 bit") for the
next linux-6.7 rc round? tia!
Ohh, and btw: you might also want to pick up c4608d1bf7c653 ("mm: mmap:
map MAP_STACK to VM_NOHUGEPAGE") if you haven't already done so: its
stable tag contains a typo, hence I guess your scripts might have missed
it (I only noticed that by chance).
Ciao, Thorsten
>> I'm wondering because Jiri's report ([1] in above quote) sounded like
>> this is something that will affect and annoy quite a few people with the
>> linux-6.7.y.
>>
>> Ciao, Thorsten
>>
>>> Fixes: efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries")
>>> Reported-by: Jiri Slaby <jirislaby(a)kernel.org>
>>> Reported-by: Suren Baghdasaryan <surenb(a)google.com>
>>> Tested-by: Jiri Slaby <jirislaby(a)kernel.org>
>>> Tested-by: Suren Baghdasaryan <surenb(a)google.com>
>>> Cc: Rik van Riel <riel(a)surriel.com>
>>> Cc: Matthew Wilcox <willy(a)infradead.org>
>>> Cc: Christopher Lameter <cl(a)linux.com>
>>> Signed-off-by: Yang Shi <yang(a)os.amperecomputing.com>
>>> ---
>>> mm/huge_memory.c | 9 +++++++++
>>> 1 file changed, 9 insertions(+)
>>>
>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>>> index 94ef5c02b459..e9fbaccbe0c0 100644
>>> --- a/mm/huge_memory.c
>>> +++ b/mm/huge_memory.c
>>> @@ -37,6 +37,7 @@
>>> #include <linux/page_owner.h>
>>> #include <linux/sched/sysctl.h>
>>> #include <linux/memory-tiers.h>
>>> +#include <linux/compat.h>
>>>
>>> #include <asm/tlb.h>
>>> #include <asm/pgalloc.h>
>>> @@ -811,6 +812,14 @@ static unsigned long __thp_get_unmapped_area(struct file *filp,
>>> loff_t off_align = round_up(off, size);
>>> unsigned long len_pad, ret;
>>>
>>> + /*
>>> + * It doesn't make too much sense to froce huge page alignment on
>>> + * 32 bit system or compat userspace due to the contrained virtual
>>> + * address space and address entropy.
>>> + */
>>> + if (IS_ENABLED(CONFIG_32BIT) || in_compat_syscall())
>>> + return 0;
>>> +
>>> if (off_end <= off_align || (off_end - off_align) < size)
>>> return 0;
>>>
>
>
Xtensa has two-argument MAKE_PC_FROM_RA macro to convert a0 to an actual
return address because when windowed ABI is used call{,x}{4,8,12}
opcodes stuff encoded window size into the top 2 bits of the register
that becomes a return address in the called function. Second argument of
that macro is supposed to be an address having these 2 topmost bits set
correctly, but the comment suggested that that could be the stack
address. However the stack doesn't have to be in the same 1GByte region
as the code, especially in noMMU XIP configurations.
Fix the comment and use either _text or regs->pc as the second argument
for the MAKE_PC_FROM_RA macro.
Cc: stable(a)vger.kernel.org
Signed-off-by: Max Filippov <jcmvbkbc(a)gmail.com>
---
arch/xtensa/include/asm/processor.h | 8 ++++----
arch/xtensa/include/asm/ptrace.h | 2 +-
arch/xtensa/kernel/process.c | 5 +++--
arch/xtensa/kernel/stacktrace.c | 3 ++-
4 files changed, 10 insertions(+), 8 deletions(-)
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index d008a153a2b9..7ed1a2085bd7 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -115,9 +115,9 @@
#define MAKE_RA_FOR_CALL(ra,ws) (((ra) & 0x3fffffff) | (ws) << 30)
/* Convert return address to a valid pc
- * Note: We assume that the stack pointer is in the same 1GB ranges as the ra
+ * Note: 'text' is the address within the same 1GB range as the ra
*/
-#define MAKE_PC_FROM_RA(ra,sp) (((ra) & 0x3fffffff) | ((sp) & 0xc0000000))
+#define MAKE_PC_FROM_RA(ra, text) (((ra) & 0x3fffffff) | ((unsigned long)(text) & 0xc0000000))
#elif defined(__XTENSA_CALL0_ABI__)
@@ -127,9 +127,9 @@
#define MAKE_RA_FOR_CALL(ra, ws) (ra)
/* Convert return address to a valid pc
- * Note: We assume that the stack pointer is in the same 1GB ranges as the ra
+ * Note: 'text' is not used as 'ra' is always the full address
*/
-#define MAKE_PC_FROM_RA(ra, sp) (ra)
+#define MAKE_PC_FROM_RA(ra, text) (ra)
#else
#error Unsupported Xtensa ABI
diff --git a/arch/xtensa/include/asm/ptrace.h b/arch/xtensa/include/asm/ptrace.h
index a270467556dc..86c70117371b 100644
--- a/arch/xtensa/include/asm/ptrace.h
+++ b/arch/xtensa/include/asm/ptrace.h
@@ -87,7 +87,7 @@ struct pt_regs {
# define user_mode(regs) (((regs)->ps & 0x00000020)!=0)
# define instruction_pointer(regs) ((regs)->pc)
# define return_pointer(regs) (MAKE_PC_FROM_RA((regs)->areg[0], \
- (regs)->areg[1]))
+ (regs)->pc))
# ifndef CONFIG_SMP
# define profile_pc(regs) instruction_pointer(regs)
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index a815577d25fd..7bd66677f7b6 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -47,6 +47,7 @@
#include <asm/asm-offsets.h>
#include <asm/regs.h>
#include <asm/hw_breakpoint.h>
+#include <asm/sections.h>
#include <asm/traps.h>
extern void ret_from_fork(void);
@@ -380,7 +381,7 @@ unsigned long __get_wchan(struct task_struct *p)
int count = 0;
sp = p->thread.sp;
- pc = MAKE_PC_FROM_RA(p->thread.ra, p->thread.sp);
+ pc = MAKE_PC_FROM_RA(p->thread.ra, _text);
do {
if (sp < stack_page + sizeof(struct task_struct) ||
@@ -392,7 +393,7 @@ unsigned long __get_wchan(struct task_struct *p)
/* Stack layout: sp-4: ra, sp-3: sp' */
- pc = MAKE_PC_FROM_RA(SPILL_SLOT(sp, 0), sp);
+ pc = MAKE_PC_FROM_RA(SPILL_SLOT(sp, 0), _text);
sp = SPILL_SLOT(sp, 1);
} while (count++ < 16);
return 0;
diff --git a/arch/xtensa/kernel/stacktrace.c b/arch/xtensa/kernel/stacktrace.c
index 831ffb648bda..ed324fdf2a2f 100644
--- a/arch/xtensa/kernel/stacktrace.c
+++ b/arch/xtensa/kernel/stacktrace.c
@@ -13,6 +13,7 @@
#include <linux/stacktrace.h>
#include <asm/ftrace.h>
+#include <asm/sections.h>
#include <asm/stacktrace.h>
#include <asm/traps.h>
#include <linux/uaccess.h>
@@ -189,7 +190,7 @@ void walk_stackframe(unsigned long *sp,
if (a1 <= (unsigned long)sp)
break;
- frame.pc = MAKE_PC_FROM_RA(a0, a1);
+ frame.pc = MAKE_PC_FROM_RA(a0, _text);
frame.sp = a1;
if (fn(&frame, data))
--
2.39.2
I didn't think to mark these for stable in the commits, but they
definitely should go into the stable queue, since it's a known
mis-compilation of the kvm nested guest code with gcc-11 otherwise.
The bug technically affects other gcc versions too, but apparently not
so that we'd actually notice.
It's two commits:
4356e9f841f7 ("work around gcc bugs with 'asm goto' with outputs")
68fb3ca0e408 ("update workarounds for gcc "asm goto" issue")
where the first one works around the problem, and the second one
("update") just ends up pinpointing exactly which gcc versions are
affected so that future gcc releases won't get the unnecessary
workaround.
Technically only the first one really needs to go into stable. The
second one is more of a judgement call - do you want to match
upstream, and do you care about the (very slight) code generation
improvement with updated gcc versions?
Linus