Commit a4fdd9762272 ("iommu: Use flush queue capability") hid the
IOMMU_DOMAIN_DMA_FQ domain type from domain allocation. A check was
introduced in iommu_dma_init_domain() to fall back if not supported, but
this check runs too late: by that point, devices have been attached to
the IOMMU, and the IOMMU driver might not expect FQ domains at
ops->attach_dev() time.
Ensure that we immediately clamp FQ domains to plain DMA if not
supported by the driver at device attach time, not later.
This regressed apple-dart in v6.5.
Cc: regressions(a)lists.linux.dev
Cc: stable(a)vger.kernel.org
Fixes: a4fdd9762272 ("iommu: Use flush queue capability")
Signed-off-by: Hector Martin <marcan(a)marcan.st>
---
drivers/iommu/iommu.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 3bfc56df4f78..12464eaa8d91 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2039,6 +2039,15 @@ static int __iommu_attach_device(struct iommu_domain *domain,
if (unlikely(domain->ops->attach_dev == NULL))
return -ENODEV;
+ /*
+ * Ensure we do not try to attach devices to FQ domains if the
+ * IOMMU does not support them. We can safely fall back to
+ * non-FQ.
+ */
+ if (domain->type == IOMMU_DOMAIN_DMA_FQ &&
+ !device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH))
+ domain->type = IOMMU_DOMAIN_DMA;
+
ret = domain->ops->attach_dev(domain, dev);
if (ret)
return ret;
---
base-commit: ce9ecca0238b140b88f43859b211c9fdfd8e5b70
change-id: 20230922-iommu-type-regression-25b4f43df770
Best regards,
--
Hector Martin <marcan(a)marcan.st>
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 7fda67e8c3ab6069f75888f67958a6d30454a9f6
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023092055-disband-unveiling-f6cc@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
7fda67e8c3ab ("ext4: fix rec_len verify error")
46c116b920eb ("ext4: verify dir block before splitting it")
f036adb39976 ("ext4: rename "dirent_csum" functions to use "dirblock"")
b886ee3e778e ("ext4: Support case-insensitive file name lookups")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7fda67e8c3ab6069f75888f67958a6d30454a9f6 Mon Sep 17 00:00:00 2001
From: Shida Zhang <zhangshida(a)kylinos.cn>
Date: Thu, 3 Aug 2023 14:09:38 +0800
Subject: [PATCH] ext4: fix rec_len verify error
With the configuration PAGE_SIZE 64k and filesystem blocksize 64k,
a problem occurred when more than 13 million files were directly created
under a directory:
EXT4-fs error (device xx): ext4_dx_csum_set:492: inode #xxxx: comm xxxxx: dir seems corrupt? Run e2fsck -D.
EXT4-fs error (device xx): ext4_dx_csum_verify:463: inode #xxxx: comm xxxxx: dir seems corrupt? Run e2fsck -D.
EXT4-fs error (device xx): dx_probe:856: inode #xxxx: block 8188: comm xxxxx: Directory index failed checksum
When enough files are created, the fake_dirent->reclen will be 0xffff.
it doesn't equal to the blocksize 65536, i.e. 0x10000.
But it is not the same condition when blocksize equals to 4k.
when enough files are created, the fake_dirent->reclen will be 0x1000.
it equals to the blocksize 4k, i.e. 0x1000.
The problem seems to be related to the limitation of the 16-bit field
when the blocksize is set to 64k.
To address this, helpers like ext4_rec_len_{from,to}_disk has already
been introduced to complete the conversion between the encoded and the
plain form of rec_len.
So fix this one by using the helper, and all the other in this file too.
Cc: stable(a)kernel.org
Fixes: dbe89444042a ("ext4: Calculate and verify checksums for htree nodes")
Suggested-by: Andreas Dilger <adilger(a)dilger.ca>
Suggested-by: Darrick J. Wong <djwong(a)kernel.org>
Signed-off-by: Shida Zhang <zhangshida(a)kylinos.cn>
Reviewed-by: Andreas Dilger <adilger(a)dilger.ca>
Reviewed-by: Darrick J. Wong <djwong(a)kernel.org>
Link: https://lore.kernel.org/r/20230803060938.1929759-1-zhangshida@kylinos.cn
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index c0f0b4e2413b..c1ceccab05f5 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -343,17 +343,17 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
struct buffer_head *bh)
{
struct ext4_dir_entry_tail *t;
+ int blocksize = EXT4_BLOCK_SIZE(inode->i_sb);
#ifdef PARANOID
struct ext4_dir_entry *d, *top;
d = (struct ext4_dir_entry *)bh->b_data;
top = (struct ext4_dir_entry *)(bh->b_data +
- (EXT4_BLOCK_SIZE(inode->i_sb) -
- sizeof(struct ext4_dir_entry_tail)));
- while (d < top && d->rec_len)
+ (blocksize - sizeof(struct ext4_dir_entry_tail)));
+ while (d < top && ext4_rec_len_from_disk(d->rec_len, blocksize))
d = (struct ext4_dir_entry *)(((void *)d) +
- le16_to_cpu(d->rec_len));
+ ext4_rec_len_from_disk(d->rec_len, blocksize));
if (d != top)
return NULL;
@@ -364,7 +364,8 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
#endif
if (t->det_reserved_zero1 ||
- le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) ||
+ (ext4_rec_len_from_disk(t->det_rec_len, blocksize) !=
+ sizeof(struct ext4_dir_entry_tail)) ||
t->det_reserved_zero2 ||
t->det_reserved_ft != EXT4_FT_DIR_CSUM)
return NULL;
@@ -445,13 +446,14 @@ static struct dx_countlimit *get_dx_countlimit(struct inode *inode,
struct ext4_dir_entry *dp;
struct dx_root_info *root;
int count_offset;
+ int blocksize = EXT4_BLOCK_SIZE(inode->i_sb);
+ unsigned int rlen = ext4_rec_len_from_disk(dirent->rec_len, blocksize);
- if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb))
+ if (rlen == blocksize)
count_offset = 8;
- else if (le16_to_cpu(dirent->rec_len) == 12) {
+ else if (rlen == 12) {
dp = (struct ext4_dir_entry *)(((void *)dirent) + 12);
- if (le16_to_cpu(dp->rec_len) !=
- EXT4_BLOCK_SIZE(inode->i_sb) - 12)
+ if (ext4_rec_len_from_disk(dp->rec_len, blocksize) != blocksize - 12)
return NULL;
root = (struct dx_root_info *)(((void *)dp + 12));
if (root->reserved_zero ||
@@ -1315,6 +1317,7 @@ static int dx_make_map(struct inode *dir, struct buffer_head *bh,
unsigned int buflen = bh->b_size;
char *base = bh->b_data;
struct dx_hash_info h = *hinfo;
+ int blocksize = EXT4_BLOCK_SIZE(dir->i_sb);
if (ext4_has_metadata_csum(dir->i_sb))
buflen -= sizeof(struct ext4_dir_entry_tail);
@@ -1335,11 +1338,12 @@ static int dx_make_map(struct inode *dir, struct buffer_head *bh,
map_tail--;
map_tail->hash = h.hash;
map_tail->offs = ((char *) de - base)>>2;
- map_tail->size = le16_to_cpu(de->rec_len);
+ map_tail->size = ext4_rec_len_from_disk(de->rec_len,
+ blocksize);
count++;
cond_resched();
}
- de = ext4_next_entry(de, dir->i_sb->s_blocksize);
+ de = ext4_next_entry(de, blocksize);
}
return count;
}
From: Zheng Yejian <zhengyejian1(a)huawei.com>
The 'bytes' info in file 'per_cpu/cpu<X>/stats' means the number of
bytes in cpu buffer that have not been consumed. However, currently
after consuming data by reading file 'trace_pipe', the 'bytes' info
was not changed as expected.
# cat per_cpu/cpu0/stats
entries: 0
overrun: 0
commit overrun: 0
bytes: 568 <--- 'bytes' is problematical !!!
oldest event ts: 8651.371479
now ts: 8653.912224
dropped events: 0
read events: 8
The root cause is incorrect stat on cpu_buffer->read_bytes. To fix it:
1. When stat 'read_bytes', account consumed event in rb_advance_reader();
2. When stat 'entries_bytes', exclude the discarded padding event which
is smaller than minimum size because it is invisible to reader. Then
use rb_page_commit() instead of BUF_PAGE_SIZE at where accounting for
page-based read/remove/overrun.
Also correct the comments of ring_buffer_bytes_cpu() in this patch.
Link: https://lore.kernel.org/linux-trace-kernel/20230921125425.1708423-1-zhengye…
Cc: stable(a)vger.kernel.org
Fixes: c64e148a3be3 ("trace: Add ring buffer stats to measure rate of events")
Signed-off-by: Zheng Yejian <zhengyejian1(a)huawei.com>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/ring_buffer.c | 28 +++++++++++++++-------------
1 file changed, 15 insertions(+), 13 deletions(-)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a1651edc48d5..28daf0ce95c5 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -354,6 +354,11 @@ static void rb_init_page(struct buffer_data_page *bpage)
local_set(&bpage->commit, 0);
}
+static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage)
+{
+ return local_read(&bpage->page->commit);
+}
+
static void free_buffer_page(struct buffer_page *bpage)
{
free_page((unsigned long)bpage->page);
@@ -2003,7 +2008,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
* Increment overrun to account for the lost events.
*/
local_add(page_entries, &cpu_buffer->overrun);
- local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
+ local_sub(rb_page_commit(to_remove_page), &cpu_buffer->entries_bytes);
local_inc(&cpu_buffer->pages_lost);
}
@@ -2367,11 +2372,6 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->reader_page->read);
}
-static __always_inline unsigned rb_page_commit(struct buffer_page *bpage)
-{
- return local_read(&bpage->page->commit);
-}
-
static struct ring_buffer_event *
rb_iter_head_event(struct ring_buffer_iter *iter)
{
@@ -2517,7 +2517,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
* the counters.
*/
local_add(entries, &cpu_buffer->overrun);
- local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
+ local_sub(rb_page_commit(next_page), &cpu_buffer->entries_bytes);
local_inc(&cpu_buffer->pages_lost);
/*
@@ -2660,9 +2660,6 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
event = __rb_page_index(tail_page, tail);
- /* account for padding bytes */
- local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
-
/*
* Save the original length to the meta data.
* This will be used by the reader to add lost event
@@ -2676,7 +2673,8 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
* write counter enough to allow another writer to slip
* in on this page.
* We put in a discarded commit instead, to make sure
- * that this space is not used again.
+ * that this space is not used again, and this space will
+ * not be accounted into 'entries_bytes'.
*
* If we are less than the minimum size, we don't need to
* worry about it.
@@ -2701,6 +2699,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
/* time delta must be non zero */
event->time_delta = 1;
+ /* account for padding bytes */
+ local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
+
/* Make sure the padding is visible before the tail_page->write update */
smp_wmb();
@@ -4215,7 +4216,7 @@ u64 ring_buffer_oldest_event_ts(struct trace_buffer *buffer, int cpu)
EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts);
/**
- * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer
+ * ring_buffer_bytes_cpu - get the number of bytes unconsumed in a cpu buffer
* @buffer: The ring buffer
* @cpu: The per CPU buffer to read from.
*/
@@ -4723,6 +4724,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
length = rb_event_length(event);
cpu_buffer->reader_page->read += length;
+ cpu_buffer->read_bytes += length;
}
static void rb_advance_iter(struct ring_buffer_iter *iter)
@@ -5816,7 +5818,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
} else {
/* update the entry counter */
cpu_buffer->read += rb_page_entries(reader);
- cpu_buffer->read_bytes += BUF_PAGE_SIZE;
+ cpu_buffer->read_bytes += rb_page_commit(reader);
/* swap the pages */
rb_init_page(bpage);
--
2.40.1