commit 1645c283a87c61f84b2bffd81f50724df959b11a upstream.
[BUG]
There is a bug report that ntfs2btrfs had a bug that it can lead to
transaction abort and the filesystem flips to read-only.
[CAUSE]
For inline backref items, kernel has a strict requirement for their
ordered, they must follow the following rules:
- All btrfs_extent_inline_ref::type should be in an ascending order
- Within the same type, the items should follow a descending order by
their sequence number
For EXTENT_DATA_REF type, the sequence number is result from
hash_extent_data_ref().
For other types, their sequence numbers are
btrfs_extent_inline_ref::offset.
Thus if there is any code not following above rules, the resulted
inline backrefs can prevent the kernel to locate the needed inline
backref and lead to transaction abort.
[FIX]
Ntrfs2btrfs has already fixed the problem, and btrfs-progs has added the
ability to detect such problems.
For kernel, let's be more noisy and be more specific about the order, so
that the next time kernel hits such problem we would reject it in the
first place, without leading to transaction abort.
Cc: stable(a)vger.kernel.org # 6.6
Link: https://github.com/kdave/btrfs-progs/pull/622
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
[ Fix a conflict due to header cleanup. ]
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
---
fs/btrfs/tree-checker.c | 39 +++++++++++++++++++++++++++++++++++++++
1 file changed, 39 insertions(+)
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index cc6bc5985120..5d6cfa618dc4 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -29,6 +29,7 @@
#include "accessors.h"
#include "file-item.h"
#include "inode-item.h"
+#include "extent-tree.h"
/*
* Error message should follow the following format:
@@ -1274,6 +1275,8 @@ static int check_extent_item(struct extent_buffer *leaf,
unsigned long ptr; /* Current pointer inside inline refs */
unsigned long end; /* Extent item end */
const u32 item_size = btrfs_item_size(leaf, slot);
+ u8 last_type = 0;
+ u64 last_seq = U64_MAX;
u64 flags;
u64 generation;
u64 total_refs; /* Total refs in btrfs_extent_item */
@@ -1320,6 +1323,18 @@ static int check_extent_item(struct extent_buffer *leaf,
* 2.2) Ref type specific data
* Either using btrfs_extent_inline_ref::offset, or specific
* data structure.
+ *
+ * All above inline items should follow the order:
+ *
+ * - All btrfs_extent_inline_ref::type should be in an ascending
+ * order
+ *
+ * - Within the same type, the items should follow a descending
+ * order by their sequence number. The sequence number is
+ * determined by:
+ * * btrfs_extent_inline_ref::offset for all types other than
+ * EXTENT_DATA_REF
+ * * hash_extent_data_ref() for EXTENT_DATA_REF
*/
if (unlikely(item_size < sizeof(*ei))) {
extent_err(leaf, slot,
@@ -1401,6 +1416,7 @@ static int check_extent_item(struct extent_buffer *leaf,
struct btrfs_extent_inline_ref *iref;
struct btrfs_extent_data_ref *dref;
struct btrfs_shared_data_ref *sref;
+ u64 seq;
u64 dref_offset;
u64 inline_offset;
u8 inline_type;
@@ -1414,6 +1430,7 @@ static int check_extent_item(struct extent_buffer *leaf,
iref = (struct btrfs_extent_inline_ref *)ptr;
inline_type = btrfs_extent_inline_ref_type(leaf, iref);
inline_offset = btrfs_extent_inline_ref_offset(leaf, iref);
+ seq = inline_offset;
if (unlikely(ptr + btrfs_extent_inline_ref_size(inline_type) > end)) {
extent_err(leaf, slot,
"inline ref item overflows extent item, ptr %lu iref size %u end %lu",
@@ -1444,6 +1461,10 @@ static int check_extent_item(struct extent_buffer *leaf,
case BTRFS_EXTENT_DATA_REF_KEY:
dref = (struct btrfs_extent_data_ref *)(&iref->offset);
dref_offset = btrfs_extent_data_ref_offset(leaf, dref);
+ seq = hash_extent_data_ref(
+ btrfs_extent_data_ref_root(leaf, dref),
+ btrfs_extent_data_ref_objectid(leaf, dref),
+ btrfs_extent_data_ref_offset(leaf, dref));
if (unlikely(!IS_ALIGNED(dref_offset,
fs_info->sectorsize))) {
extent_err(leaf, slot,
@@ -1470,6 +1491,24 @@ static int check_extent_item(struct extent_buffer *leaf,
inline_type);
return -EUCLEAN;
}
+ if (inline_type < last_type) {
+ extent_err(leaf, slot,
+ "inline ref out-of-order: has type %u, prev type %u",
+ inline_type, last_type);
+ return -EUCLEAN;
+ }
+ /* Type changed, allow the sequence starts from U64_MAX again. */
+ if (inline_type > last_type)
+ last_seq = U64_MAX;
+ if (seq > last_seq) {
+ extent_err(leaf, slot,
+"inline ref out-of-order: has type %u offset %llu seq 0x%llx, prev type %u seq 0x%llx",
+ inline_type, inline_offset, seq,
+ last_type, last_seq);
+ return -EUCLEAN;
+ }
+ last_type = inline_type;
+ last_seq = seq;
ptr += btrfs_extent_inline_ref_size(inline_type);
}
/* No padding is allowed */
--
2.45.2
From: Baokun Li <libaokun1(a)huawei.com>
When commit 13df4d44a3aa ("ext4: fix slab-out-of-bounds in
ext4_mb_find_good_group_avg_frag_lists()") was backported to stable, the
commit f536808adcc3 ("ext4: refactor out ext4_generic_attr_store()") that
uniformly determines if the ptr is null is not merged in, so it needs to
be judged whether ptr is null or not in each case of the switch, otherwise
null pointer dereferencing may occur.
Signed-off-by: Baokun Li <libaokun1(a)huawei.com>
---
fs/ext4/sysfs.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 63cbda3700ea..d65dccb44ed5 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -473,6 +473,8 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
*((unsigned int *) ptr) = t;
return len;
case attr_clusters_in_group:
+ if (!ptr)
+ return 0;
ret = kstrtouint(skip_spaces(buf), 0, &t);
if (ret)
return ret;
--
2.39.2
commit a9e1ddc09ca55746079cc479aa3eb6411f0d99d4 upstream.
Syzbot reported that in rename directory operation on broken directory on
nilfs2, __block_write_begin_int() called to prepare block write may fail
BUG_ON check for access exceeding the folio/page size.
This is because nilfs_dotdot(), which gets parent directory reference
entry ("..") of the directory to be moved or renamed, does not check
consistency enough, and may return location exceeding folio/page size for
broken directories.
Fix this issue by checking required directory entries ("." and "..") in
the first chunk of the directory in nilfs_dotdot().
Link: https://lkml.kernel.org/r/20240628165107.9006-1-konishi.ryusuke@gmail.com
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Reported-by: syzbot+d3abed1ad3d367fa2627(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=d3abed1ad3d367fa2627
Fixes: 2ba466d74ed7 ("nilfs2: directory entry operations")
Tested-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
Please apply this patch to the stable trees indicated by the subject
prefix instead of the patch that failed.
This patch is tailored to take page/folio conversion into account and
can be applied to these stable trees.
Also, all the builds and tests I did on each stable tree passed.
Thanks,
Ryusuke Konishi
fs/nilfs2/dir.c | 32 ++++++++++++++++++++++++++++++--
1 file changed, 30 insertions(+), 2 deletions(-)
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index 51c982ad9608..53e4e63c607e 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -396,11 +396,39 @@ nilfs_find_entry(struct inode *dir, const struct qstr *qstr,
struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct page **p)
{
- struct nilfs_dir_entry *de = nilfs_get_page(dir, 0, p);
+ struct page *page;
+ struct nilfs_dir_entry *de, *next_de;
+ size_t limit;
+ char *msg;
+ de = nilfs_get_page(dir, 0, &page);
if (IS_ERR(de))
return NULL;
- return nilfs_next_entry(de);
+
+ limit = nilfs_last_byte(dir, 0); /* is a multiple of chunk size */
+ if (unlikely(!limit || le64_to_cpu(de->inode) != dir->i_ino ||
+ !nilfs_match(1, ".", de))) {
+ msg = "missing '.'";
+ goto fail;
+ }
+
+ next_de = nilfs_next_entry(de);
+ /*
+ * If "next_de" has not reached the end of the chunk, there is
+ * at least one more record. Check whether it matches "..".
+ */
+ if (unlikely((char *)next_de == (char *)de + nilfs_chunk_size(dir) ||
+ !nilfs_match(2, "..", next_de))) {
+ msg = "missing '..'";
+ goto fail;
+ }
+ *p = page;
+ return next_de;
+
+fail:
+ nilfs_error(dir->i_sb, "directory #%lu %s", dir->i_ino, msg);
+ nilfs_put_page(page);
+ return NULL;
}
ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr)
--
2.43.5
We're seeing a GPU HANG issue on a CHV platform, which was caused by
bac24f59f454 ("drm/i915/execlists: Enable coarse preemption boundaries for gen8").
Gen8 platform has only timeslice and doesn't support a preemption mechanism
as engines do not have a preemption timer and doesn't send an irq if the
preemption timeout expires. So, add a fix to not consider preemption
during dequeuing for gen8 platforms.
v2: Simplify can_preempt() function (Tvrtko Ursulin)
v3:
- Inside need_preempt(), condition of can_preempt() is not required
as simplified can_preempt() is enough. (Chris Wilson)
Fixes: bac24f59f454 ("drm/i915/execlists: Enable coarse preemption boundaries for gen8")
Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/11396
Suggested-by: Andi Shyti <andi.shyti(a)intel.com>
Signed-off-by: Nitin Gote <nitin.r.gote(a)intel.com>
Cc: Chris Wilson <chris.p.wilson(a)linux.intel.com>
CC: <stable(a)vger.kernel.org> # v5.2+
---
drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 21829439e686..72090f52fb85 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -3315,11 +3315,7 @@ static void remove_from_engine(struct i915_request *rq)
static bool can_preempt(struct intel_engine_cs *engine)
{
- if (GRAPHICS_VER(engine->i915) > 8)
- return true;
-
- /* GPGPU on bdw requires extra w/a; not implemented */
- return engine->class != RENDER_CLASS;
+ return GRAPHICS_VER(engine->i915) > 8;
}
static void kick_execlists(const struct i915_request *rq, int prio)
--
2.25.1
From: Xiubo Li <xiubli(a)redhat.com>
If a client sends out a cap update dropping caps with the prior 'seq'
just before an incoming cap revoke request, then the client may drop
the revoke because it believes it's already released the requested
capabilities.
This causes the MDS to wait indefinitely for the client to respond
to the revoke. It's therefore always a good idea to ack the cap
revoke request with the bumped up 'seq'.
Currently if the cap->issued equals to the newcaps the check_caps()
will do nothing, we should force flush the caps.
Cc: stable(a)vger.kernel.org
Link: https://tracker.ceph.com/issues/61782
Signed-off-by: Xiubo Li <xiubli(a)redhat.com>
---
fs/ceph/caps.c | 16 ++++++++++++----
fs/ceph/super.h | 7 ++++---
2 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 24c31f795938..ba5809cf8f02 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2024,6 +2024,8 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci)
* CHECK_CAPS_AUTHONLY - we should only check the auth cap
* CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without
* further delay.
+ * CHECK_CAPS_FLUSH_FORCE - we should flush any caps immediately, without
+ * further delay.
*/
void ceph_check_caps(struct ceph_inode_info *ci, int flags)
{
@@ -2105,7 +2107,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
}
doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s "
- "flushing %s issued %s revoking %s retain %s %s%s%s\n",
+ "flushing %s issued %s revoking %s retain %s %s%s%s%s\n",
inode, ceph_vinop(inode), ceph_cap_string(file_wanted),
ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps),
ceph_cap_string(ci->i_flushing_caps),
@@ -2113,7 +2115,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
ceph_cap_string(retain),
(flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "",
(flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "",
- (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "");
+ (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "",
+ (flags & CHECK_CAPS_FLUSH_FORCE) ? " FLUSH_FORCE" : "");
/*
* If we no longer need to hold onto old our caps, and we may
@@ -2223,6 +2226,9 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
goto ack;
}
+ if (flags & CHECK_CAPS_FLUSH_FORCE)
+ goto ack;
+
/* things we might delay */
if ((cap->issued & ~retain) == 0)
continue; /* nope, all good */
@@ -3518,6 +3524,7 @@ static void handle_cap_grant(struct inode *inode,
bool queue_invalidate = false;
bool deleted_inode = false;
bool fill_inline = false;
+ int flags = 0;
/*
* If there is at least one crypto block then we'll trust
@@ -3751,6 +3758,7 @@ static void handle_cap_grant(struct inode *inode,
/* don't let check_caps skip sending a response to MDS for revoke msgs */
if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) {
cap->mds_wanted = 0;
+ flags |= CHECK_CAPS_FLUSH_FORCE;
if (cap == ci->i_auth_cap)
check_caps = 1; /* check auth cap only */
else
@@ -3806,9 +3814,9 @@ static void handle_cap_grant(struct inode *inode,
mutex_unlock(&session->s_mutex);
if (check_caps == 1)
- ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL);
+ ceph_check_caps(ci, flags | CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL);
else if (check_caps == 2)
- ceph_check_caps(ci, CHECK_CAPS_NOINVAL);
+ ceph_check_caps(ci, flags | CHECK_CAPS_NOINVAL);
}
/*
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index b0b368ed3018..831e8ec4d5da 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -200,9 +200,10 @@ struct ceph_cap {
struct list_head caps_item;
};
-#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */
-#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */
-#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */
+#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */
+#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */
+#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */
+#define CHECK_CAPS_FLUSH_FORCE 8 /* force flush any caps */
struct ceph_cap_flush {
u64 tid;
--
2.45.1
From: Baokun Li <libaokun1(a)huawei.com>
[ Upstream commit 9e8e819f8f272c4e5dcd0bd6c7450e36481ed139 ]
When setting values of type unsigned int through sysfs, we use kstrtoul()
to parse it and then truncate part of it as the final set value, when the
set value is greater than UINT_MAX, the set value will not match what we
see because of the truncation. As follows:
$ echo 4294967296 > /sys/fs/ext4/sda/mb_max_linear_groups
$ cat /sys/fs/ext4/sda/mb_max_linear_groups
0
So we use kstrtouint() to parse the attr_pointer_ui type to avoid the
inconsistency described above. In addition, a judgment is added to avoid
setting s_resv_clusters less than 0.
Signed-off-by: Baokun Li <libaokun1(a)huawei.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Link: https://lore.kernel.org/r/20240319113325.3110393-2-libaokun1@huawei.com
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Stable-dep-of: 13df4d44a3aa ("ext4: fix slab-out-of-bounds in ext4_mb_find_good_group_avg_frag_lists()")
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
Signed-off-by: Baokun Li <libaokun1(a)huawei.com>
---
fs/ext4/sysfs.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 6d332dff79dd..ca820620b974 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -104,7 +104,7 @@ static ssize_t reserved_clusters_store(struct ext4_sb_info *sbi,
int ret;
ret = kstrtoull(skip_spaces(buf), 0, &val);
- if (ret || val >= clusters)
+ if (ret || val >= clusters || (s64)val < 0)
return -EINVAL;
atomic64_set(&sbi->s_resv_clusters, val);
@@ -451,7 +451,8 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
s_kobj);
struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
void *ptr = calc_ptr(a, sbi);
- unsigned long t;
+ unsigned int t;
+ unsigned long lt;
int ret;
switch (a->attr_id) {
@@ -460,7 +461,7 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
case attr_pointer_ui:
if (!ptr)
return 0;
- ret = kstrtoul(skip_spaces(buf), 0, &t);
+ ret = kstrtouint(skip_spaces(buf), 0, &t);
if (ret)
return ret;
if (a->attr_ptr == ptr_ext4_super_block_offset)
@@ -471,10 +472,10 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
case attr_pointer_ul:
if (!ptr)
return 0;
- ret = kstrtoul(skip_spaces(buf), 0, &t);
+ ret = kstrtoul(skip_spaces(buf), 0, <);
if (ret)
return ret;
- *((unsigned long *) ptr) = t;
+ *((unsigned long *) ptr) = lt;
return len;
case attr_inode_readahead:
return inode_readahead_blks_store(sbi, buf, len);
--
2.39.2
V2 main changes:
- Add Rob's reviewed-by in the binding patch.
- Re-name the error out labels and new RXWM macro.
- In #3 patch, add one fix tag, and CC stable kernel.
Based on i.MX8QM HSIO PHY driver, refine i.MX8QM SATA driver by using PHY
interface.
[PATCH v2 1/4] dt-bindings: ata: Add i.MX8QM AHCI compatible string
[PATCH v2 2/4] ata: ahci_imx: Clean up code by using i.MX8Q HSIO PHY
[PATCH v2 3/4] ata: ahci_imx: Enlarge RX water mark for i.MX8QM SATA
[PATCH v2 4/4] ata: ahci_imx: Correct the email address
Documentation/devicetree/bindings/ata/imx-sata.yaml | 47 +++++++++++
drivers/ata/ahci_imx.c | 406 ++++++++++++++++++++++++-----------------------------------------------------------------
2 files changed, 155 insertions(+), 298 deletions(-)