The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 689a71493bd2f31c024f8c0395f85a1fd4b2138e Mon Sep 17 00:00:00 2001
From: Coiby Xu <coxu(a)redhat.com>
Date: Thu, 14 Jul 2022 21:40:24 +0800
Subject: [PATCH] kexec: clean up arch_kexec_kernel_verify_sig
Before commit 105e10e2cf1c ("kexec_file: drop weak attribute from
functions"), there was already no arch-specific implementation
of arch_kexec_kernel_verify_sig. With weak attribute dropped by that
commit, arch_kexec_kernel_verify_sig is completely useless. So clean it
up.
Note later patches are dependent on this patch so it should be backported
to the stable tree as well.
Cc: stable(a)vger.kernel.org
Suggested-by: Eric W. Biederman <ebiederm(a)xmission.com>
Reviewed-by: Michal Suchanek <msuchanek(a)suse.de>
Acked-by: Baoquan He <bhe(a)redhat.com>
Signed-off-by: Coiby Xu <coxu(a)redhat.com>
[zohar(a)linux.ibm.com: reworded patch description "Note"]
Link: https://lore.kernel.org/linux-integrity/20220714134027.394370-1-coxu@redhat…
Signed-off-by: Mimi Zohar <zohar(a)linux.ibm.com>
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 8107606ad1e8..7f710fb3712b 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -212,11 +212,6 @@ static inline void *arch_kexec_kernel_image_load(struct kimage *image)
}
#endif
-#ifdef CONFIG_KEXEC_SIG
-int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
- unsigned long buf_len);
-#endif
-
extern int kexec_add_buffer(struct kexec_buf *kbuf);
int kexec_locate_mem_hole(struct kexec_buf *kbuf);
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 0c27c81351ee..6dc1294c90fc 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -81,24 +81,6 @@ int kexec_image_post_load_cleanup_default(struct kimage *image)
return image->fops->cleanup(image->image_loader_data);
}
-#ifdef CONFIG_KEXEC_SIG
-static int kexec_image_verify_sig_default(struct kimage *image, void *buf,
- unsigned long buf_len)
-{
- if (!image->fops || !image->fops->verify_sig) {
- pr_debug("kernel loader does not support signature verification.\n");
- return -EKEYREJECTED;
- }
-
- return image->fops->verify_sig(buf, buf_len);
-}
-
-int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, unsigned long buf_len)
-{
- return kexec_image_verify_sig_default(image, buf, buf_len);
-}
-#endif
-
/*
* Free up memory used by kernel, initrd, and command line. This is temporary
* memory allocation which is not needed any more after these buffers have
@@ -141,13 +123,24 @@ void kimage_file_post_load_cleanup(struct kimage *image)
}
#ifdef CONFIG_KEXEC_SIG
+static int kexec_image_verify_sig(struct kimage *image, void *buf,
+ unsigned long buf_len)
+{
+ if (!image->fops || !image->fops->verify_sig) {
+ pr_debug("kernel loader does not support signature verification.\n");
+ return -EKEYREJECTED;
+ }
+
+ return image->fops->verify_sig(buf, buf_len);
+}
+
static int
kimage_validate_signature(struct kimage *image)
{
int ret;
- ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
- image->kernel_buf_len);
+ ret = kexec_image_verify_sig(image, image->kernel_buf,
+ image->kernel_buf_len);
if (ret) {
if (sig_enforce) {
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 689a71493bd2f31c024f8c0395f85a1fd4b2138e Mon Sep 17 00:00:00 2001
From: Coiby Xu <coxu(a)redhat.com>
Date: Thu, 14 Jul 2022 21:40:24 +0800
Subject: [PATCH] kexec: clean up arch_kexec_kernel_verify_sig
Before commit 105e10e2cf1c ("kexec_file: drop weak attribute from
functions"), there was already no arch-specific implementation
of arch_kexec_kernel_verify_sig. With weak attribute dropped by that
commit, arch_kexec_kernel_verify_sig is completely useless. So clean it
up.
Note later patches are dependent on this patch so it should be backported
to the stable tree as well.
Cc: stable(a)vger.kernel.org
Suggested-by: Eric W. Biederman <ebiederm(a)xmission.com>
Reviewed-by: Michal Suchanek <msuchanek(a)suse.de>
Acked-by: Baoquan He <bhe(a)redhat.com>
Signed-off-by: Coiby Xu <coxu(a)redhat.com>
[zohar(a)linux.ibm.com: reworded patch description "Note"]
Link: https://lore.kernel.org/linux-integrity/20220714134027.394370-1-coxu@redhat…
Signed-off-by: Mimi Zohar <zohar(a)linux.ibm.com>
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 8107606ad1e8..7f710fb3712b 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -212,11 +212,6 @@ static inline void *arch_kexec_kernel_image_load(struct kimage *image)
}
#endif
-#ifdef CONFIG_KEXEC_SIG
-int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
- unsigned long buf_len);
-#endif
-
extern int kexec_add_buffer(struct kexec_buf *kbuf);
int kexec_locate_mem_hole(struct kexec_buf *kbuf);
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 0c27c81351ee..6dc1294c90fc 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -81,24 +81,6 @@ int kexec_image_post_load_cleanup_default(struct kimage *image)
return image->fops->cleanup(image->image_loader_data);
}
-#ifdef CONFIG_KEXEC_SIG
-static int kexec_image_verify_sig_default(struct kimage *image, void *buf,
- unsigned long buf_len)
-{
- if (!image->fops || !image->fops->verify_sig) {
- pr_debug("kernel loader does not support signature verification.\n");
- return -EKEYREJECTED;
- }
-
- return image->fops->verify_sig(buf, buf_len);
-}
-
-int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, unsigned long buf_len)
-{
- return kexec_image_verify_sig_default(image, buf, buf_len);
-}
-#endif
-
/*
* Free up memory used by kernel, initrd, and command line. This is temporary
* memory allocation which is not needed any more after these buffers have
@@ -141,13 +123,24 @@ void kimage_file_post_load_cleanup(struct kimage *image)
}
#ifdef CONFIG_KEXEC_SIG
+static int kexec_image_verify_sig(struct kimage *image, void *buf,
+ unsigned long buf_len)
+{
+ if (!image->fops || !image->fops->verify_sig) {
+ pr_debug("kernel loader does not support signature verification.\n");
+ return -EKEYREJECTED;
+ }
+
+ return image->fops->verify_sig(buf, buf_len);
+}
+
static int
kimage_validate_signature(struct kimage *image)
{
int ret;
- ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
- image->kernel_buf_len);
+ ret = kexec_image_verify_sig(image, image->kernel_buf,
+ image->kernel_buf_len);
if (ret) {
if (sig_enforce) {
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b8ac29b40183a6038919768b5d189c9bd91ce9b4 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason(a)zx2c4.com>
Date: Sun, 17 Jul 2022 23:53:34 +0200
Subject: [PATCH] timekeeping: contribute wall clock to rng on time change
The rng's random_init() function contributes the real time to the rng at
boot time, so that events can at least start in relation to something
particular in the real world. But this clock might not yet be set that
point in boot, so nothing is contributed. In addition, the relation
between minor clock changes from, say, NTP, and the cycle counter is
potentially useful entropic data.
This commit addresses this by mixing in a time stamp on calls to
settimeofday and adjtimex. No entropy is credited in doing so, so it
doesn't make initialization faster, but it is still useful input to
have.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable(a)vger.kernel.org
Reviewed-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Eric Biggers <ebiggers(a)google.com>
Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 8e4b3c32fcf9..f72b9f1de178 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -23,6 +23,7 @@
#include <linux/pvclock_gtod.h>
#include <linux/compiler.h>
#include <linux/audit.h>
+#include <linux/random.h>
#include "tick-internal.h"
#include "ntp_internal.h"
@@ -1343,8 +1344,10 @@ int do_settimeofday64(const struct timespec64 *ts)
/* Signal hrtimers about time change */
clock_was_set(CLOCK_SET_WALL);
- if (!ret)
+ if (!ret) {
audit_tk_injoffset(ts_delta);
+ add_device_randomness(ts, sizeof(*ts));
+ }
return ret;
}
@@ -2430,6 +2433,7 @@ int do_adjtimex(struct __kernel_timex *txc)
ret = timekeeping_validate_timex(txc);
if (ret)
return ret;
+ add_device_randomness(txc, sizeof(*txc));
if (txc->modes & ADJ_SETOFFSET) {
struct timespec64 delta;
@@ -2447,6 +2451,7 @@ int do_adjtimex(struct __kernel_timex *txc)
audit_ntp_init(&ad);
ktime_get_real_ts64(&ts);
+ add_device_randomness(&ts, sizeof(ts));
raw_spin_lock_irqsave(&timekeeper_lock, flags);
write_seqcount_begin(&tk_core.seq);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b8ac29b40183a6038919768b5d189c9bd91ce9b4 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason(a)zx2c4.com>
Date: Sun, 17 Jul 2022 23:53:34 +0200
Subject: [PATCH] timekeeping: contribute wall clock to rng on time change
The rng's random_init() function contributes the real time to the rng at
boot time, so that events can at least start in relation to something
particular in the real world. But this clock might not yet be set that
point in boot, so nothing is contributed. In addition, the relation
between minor clock changes from, say, NTP, and the cycle counter is
potentially useful entropic data.
This commit addresses this by mixing in a time stamp on calls to
settimeofday and adjtimex. No entropy is credited in doing so, so it
doesn't make initialization faster, but it is still useful input to
have.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable(a)vger.kernel.org
Reviewed-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Eric Biggers <ebiggers(a)google.com>
Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 8e4b3c32fcf9..f72b9f1de178 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -23,6 +23,7 @@
#include <linux/pvclock_gtod.h>
#include <linux/compiler.h>
#include <linux/audit.h>
+#include <linux/random.h>
#include "tick-internal.h"
#include "ntp_internal.h"
@@ -1343,8 +1344,10 @@ int do_settimeofday64(const struct timespec64 *ts)
/* Signal hrtimers about time change */
clock_was_set(CLOCK_SET_WALL);
- if (!ret)
+ if (!ret) {
audit_tk_injoffset(ts_delta);
+ add_device_randomness(ts, sizeof(*ts));
+ }
return ret;
}
@@ -2430,6 +2433,7 @@ int do_adjtimex(struct __kernel_timex *txc)
ret = timekeeping_validate_timex(txc);
if (ret)
return ret;
+ add_device_randomness(txc, sizeof(*txc));
if (txc->modes & ADJ_SETOFFSET) {
struct timespec64 delta;
@@ -2447,6 +2451,7 @@ int do_adjtimex(struct __kernel_timex *txc)
audit_ntp_init(&ad);
ktime_get_real_ts64(&ts);
+ add_device_randomness(&ts, sizeof(ts));
raw_spin_lock_irqsave(&timekeeper_lock, flags);
write_seqcount_begin(&tk_core.seq);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b8ac29b40183a6038919768b5d189c9bd91ce9b4 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason(a)zx2c4.com>
Date: Sun, 17 Jul 2022 23:53:34 +0200
Subject: [PATCH] timekeeping: contribute wall clock to rng on time change
The rng's random_init() function contributes the real time to the rng at
boot time, so that events can at least start in relation to something
particular in the real world. But this clock might not yet be set that
point in boot, so nothing is contributed. In addition, the relation
between minor clock changes from, say, NTP, and the cycle counter is
potentially useful entropic data.
This commit addresses this by mixing in a time stamp on calls to
settimeofday and adjtimex. No entropy is credited in doing so, so it
doesn't make initialization faster, but it is still useful input to
have.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable(a)vger.kernel.org
Reviewed-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Eric Biggers <ebiggers(a)google.com>
Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 8e4b3c32fcf9..f72b9f1de178 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -23,6 +23,7 @@
#include <linux/pvclock_gtod.h>
#include <linux/compiler.h>
#include <linux/audit.h>
+#include <linux/random.h>
#include "tick-internal.h"
#include "ntp_internal.h"
@@ -1343,8 +1344,10 @@ int do_settimeofday64(const struct timespec64 *ts)
/* Signal hrtimers about time change */
clock_was_set(CLOCK_SET_WALL);
- if (!ret)
+ if (!ret) {
audit_tk_injoffset(ts_delta);
+ add_device_randomness(ts, sizeof(*ts));
+ }
return ret;
}
@@ -2430,6 +2433,7 @@ int do_adjtimex(struct __kernel_timex *txc)
ret = timekeeping_validate_timex(txc);
if (ret)
return ret;
+ add_device_randomness(txc, sizeof(*txc));
if (txc->modes & ADJ_SETOFFSET) {
struct timespec64 delta;
@@ -2447,6 +2451,7 @@ int do_adjtimex(struct __kernel_timex *txc)
audit_ntp_init(&ad);
ktime_get_real_ts64(&ts);
+ add_device_randomness(&ts, sizeof(ts));
raw_spin_lock_irqsave(&timekeeper_lock, flags);
write_seqcount_begin(&tk_core.seq);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3534e5a5ed2997ca1b00f44a0378a075bd05e8a3 Mon Sep 17 00:00:00 2001
From: Luo Meng <luomeng12(a)huawei.com>
Date: Thu, 14 Jul 2022 19:28:25 +0800
Subject: [PATCH] dm thin: fix use-after-free crash in
dm_sm_register_threshold_callback
Fault inject on pool metadata device reports:
BUG: KASAN: use-after-free in dm_pool_register_metadata_threshold+0x40/0x80
Read of size 8 at addr ffff8881b9d50068 by task dmsetup/950
CPU: 7 PID: 950 Comm: dmsetup Tainted: G W 5.19.0-rc6 #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014
Call Trace:
<TASK>
dump_stack_lvl+0x34/0x44
print_address_description.constprop.0.cold+0xeb/0x3f4
kasan_report.cold+0xe6/0x147
dm_pool_register_metadata_threshold+0x40/0x80
pool_ctr+0xa0a/0x1150
dm_table_add_target+0x2c8/0x640
table_load+0x1fd/0x430
ctl_ioctl+0x2c4/0x5a0
dm_ctl_ioctl+0xa/0x10
__x64_sys_ioctl+0xb3/0xd0
do_syscall_64+0x35/0x80
entry_SYSCALL_64_after_hwframe+0x46/0xb0
This can be easily reproduced using:
echo offline > /sys/block/sda/device/state
dd if=/dev/zero of=/dev/mapper/thin bs=4k count=10
dmsetup load pool --table "0 20971520 thin-pool /dev/sda /dev/sdb 128 0 0"
If a metadata commit fails, the transaction will be aborted and the
metadata space maps will be destroyed. If a DM table reload then
happens for this failed thin-pool, a use-after-free will occur in
dm_sm_register_threshold_callback (called from
dm_pool_register_metadata_threshold).
Fix this by in dm_pool_register_metadata_threshold() by returning the
-EINVAL error if the thin-pool is in fail mode. Also fail pool_ctr()
with a new error message: "Error registering metadata threshold".
Fixes: ac8c3f3df65e4 ("dm thin: generate event when metadata threshold passed")
Cc: stable(a)vger.kernel.org
Reported-by: Hulk Robot <hulkci(a)huawei.com>
Signed-off-by: Luo Meng <luomeng12(a)huawei.com>
Signed-off-by: Mike Snitzer <snitzer(a)kernel.org>
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 2db7030aba00..a27395c8621f 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -2045,10 +2045,13 @@ int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd,
dm_sm_threshold_fn fn,
void *context)
{
- int r;
+ int r = -EINVAL;
pmd_write_lock_in_core(pmd);
- r = dm_sm_register_threshold_callback(pmd->metadata_sm, threshold, fn, context);
+ if (!pmd->fail_io) {
+ r = dm_sm_register_threshold_callback(pmd->metadata_sm,
+ threshold, fn, context);
+ }
pmd_write_unlock(pmd);
return r;
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 84c083f76673..e76c96c760a9 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -3375,8 +3375,10 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
calc_metadata_threshold(pt),
metadata_low_callback,
pool);
- if (r)
+ if (r) {
+ ti->error = "Error registering metadata threshold";
goto out_flags_changed;
+ }
dm_pool_register_pre_commit_callback(pool->pmd,
metadata_pre_commit_callback, pool);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3534e5a5ed2997ca1b00f44a0378a075bd05e8a3 Mon Sep 17 00:00:00 2001
From: Luo Meng <luomeng12(a)huawei.com>
Date: Thu, 14 Jul 2022 19:28:25 +0800
Subject: [PATCH] dm thin: fix use-after-free crash in
dm_sm_register_threshold_callback
Fault inject on pool metadata device reports:
BUG: KASAN: use-after-free in dm_pool_register_metadata_threshold+0x40/0x80
Read of size 8 at addr ffff8881b9d50068 by task dmsetup/950
CPU: 7 PID: 950 Comm: dmsetup Tainted: G W 5.19.0-rc6 #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014
Call Trace:
<TASK>
dump_stack_lvl+0x34/0x44
print_address_description.constprop.0.cold+0xeb/0x3f4
kasan_report.cold+0xe6/0x147
dm_pool_register_metadata_threshold+0x40/0x80
pool_ctr+0xa0a/0x1150
dm_table_add_target+0x2c8/0x640
table_load+0x1fd/0x430
ctl_ioctl+0x2c4/0x5a0
dm_ctl_ioctl+0xa/0x10
__x64_sys_ioctl+0xb3/0xd0
do_syscall_64+0x35/0x80
entry_SYSCALL_64_after_hwframe+0x46/0xb0
This can be easily reproduced using:
echo offline > /sys/block/sda/device/state
dd if=/dev/zero of=/dev/mapper/thin bs=4k count=10
dmsetup load pool --table "0 20971520 thin-pool /dev/sda /dev/sdb 128 0 0"
If a metadata commit fails, the transaction will be aborted and the
metadata space maps will be destroyed. If a DM table reload then
happens for this failed thin-pool, a use-after-free will occur in
dm_sm_register_threshold_callback (called from
dm_pool_register_metadata_threshold).
Fix this by in dm_pool_register_metadata_threshold() by returning the
-EINVAL error if the thin-pool is in fail mode. Also fail pool_ctr()
with a new error message: "Error registering metadata threshold".
Fixes: ac8c3f3df65e4 ("dm thin: generate event when metadata threshold passed")
Cc: stable(a)vger.kernel.org
Reported-by: Hulk Robot <hulkci(a)huawei.com>
Signed-off-by: Luo Meng <luomeng12(a)huawei.com>
Signed-off-by: Mike Snitzer <snitzer(a)kernel.org>
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 2db7030aba00..a27395c8621f 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -2045,10 +2045,13 @@ int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd,
dm_sm_threshold_fn fn,
void *context)
{
- int r;
+ int r = -EINVAL;
pmd_write_lock_in_core(pmd);
- r = dm_sm_register_threshold_callback(pmd->metadata_sm, threshold, fn, context);
+ if (!pmd->fail_io) {
+ r = dm_sm_register_threshold_callback(pmd->metadata_sm,
+ threshold, fn, context);
+ }
pmd_write_unlock(pmd);
return r;
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 84c083f76673..e76c96c760a9 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -3375,8 +3375,10 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
calc_metadata_threshold(pt),
metadata_low_callback,
pool);
- if (r)
+ if (r) {
+ ti->error = "Error registering metadata threshold";
goto out_flags_changed;
+ }
dm_pool_register_pre_commit_callback(pool->pmd,
metadata_pre_commit_callback, pool);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3534e5a5ed2997ca1b00f44a0378a075bd05e8a3 Mon Sep 17 00:00:00 2001
From: Luo Meng <luomeng12(a)huawei.com>
Date: Thu, 14 Jul 2022 19:28:25 +0800
Subject: [PATCH] dm thin: fix use-after-free crash in
dm_sm_register_threshold_callback
Fault inject on pool metadata device reports:
BUG: KASAN: use-after-free in dm_pool_register_metadata_threshold+0x40/0x80
Read of size 8 at addr ffff8881b9d50068 by task dmsetup/950
CPU: 7 PID: 950 Comm: dmsetup Tainted: G W 5.19.0-rc6 #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014
Call Trace:
<TASK>
dump_stack_lvl+0x34/0x44
print_address_description.constprop.0.cold+0xeb/0x3f4
kasan_report.cold+0xe6/0x147
dm_pool_register_metadata_threshold+0x40/0x80
pool_ctr+0xa0a/0x1150
dm_table_add_target+0x2c8/0x640
table_load+0x1fd/0x430
ctl_ioctl+0x2c4/0x5a0
dm_ctl_ioctl+0xa/0x10
__x64_sys_ioctl+0xb3/0xd0
do_syscall_64+0x35/0x80
entry_SYSCALL_64_after_hwframe+0x46/0xb0
This can be easily reproduced using:
echo offline > /sys/block/sda/device/state
dd if=/dev/zero of=/dev/mapper/thin bs=4k count=10
dmsetup load pool --table "0 20971520 thin-pool /dev/sda /dev/sdb 128 0 0"
If a metadata commit fails, the transaction will be aborted and the
metadata space maps will be destroyed. If a DM table reload then
happens for this failed thin-pool, a use-after-free will occur in
dm_sm_register_threshold_callback (called from
dm_pool_register_metadata_threshold).
Fix this by in dm_pool_register_metadata_threshold() by returning the
-EINVAL error if the thin-pool is in fail mode. Also fail pool_ctr()
with a new error message: "Error registering metadata threshold".
Fixes: ac8c3f3df65e4 ("dm thin: generate event when metadata threshold passed")
Cc: stable(a)vger.kernel.org
Reported-by: Hulk Robot <hulkci(a)huawei.com>
Signed-off-by: Luo Meng <luomeng12(a)huawei.com>
Signed-off-by: Mike Snitzer <snitzer(a)kernel.org>
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 2db7030aba00..a27395c8621f 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -2045,10 +2045,13 @@ int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd,
dm_sm_threshold_fn fn,
void *context)
{
- int r;
+ int r = -EINVAL;
pmd_write_lock_in_core(pmd);
- r = dm_sm_register_threshold_callback(pmd->metadata_sm, threshold, fn, context);
+ if (!pmd->fail_io) {
+ r = dm_sm_register_threshold_callback(pmd->metadata_sm,
+ threshold, fn, context);
+ }
pmd_write_unlock(pmd);
return r;
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 84c083f76673..e76c96c760a9 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -3375,8 +3375,10 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
calc_metadata_threshold(pt),
metadata_low_callback,
pool);
- if (r)
+ if (r) {
+ ti->error = "Error registering metadata threshold";
goto out_flags_changed;
+ }
dm_pool_register_pre_commit_callback(pool->pmd,
metadata_pre_commit_callback, pool);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7dad24db59d2d2803576f2e3645728866a056dab Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka(a)redhat.com>
Date: Sun, 24 Jul 2022 14:33:52 -0400
Subject: [PATCH] dm raid: fix address sanitizer warning in raid_resume
There is a KASAN warning in raid_resume when running the lvm test
lvconvert-raid.sh. The reason for the warning is that mddev->raid_disks
is greater than rs->raid_disks, so the loop touches one entry beyond
the allocated length.
Cc: stable(a)vger.kernel.org
Signed-off-by: Mikulas Patocka <mpatocka(a)redhat.com>
Signed-off-by: Mike Snitzer <snitzer(a)kernel.org>
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index f41fd3cf5c7d..e6a9b8cb22d3 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3817,7 +3817,7 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices));
- for (i = 0; i < mddev->raid_disks; i++) {
+ for (i = 0; i < rs->raid_disks; i++) {
r = &rs->dev[i].rdev;
/* HM FIXME: enhance journal device recovery processing */
if (test_bit(Journal, &r->flags))
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 65f8b80053a1b2fd602daa6814e62d6fa90e5e9b Mon Sep 17 00:00:00 2001
From: Jan Kara <jack(a)suse.cz>
Date: Tue, 12 Jul 2022 12:54:24 +0200
Subject: [PATCH] ext4: fix race when reusing xattr blocks
When ext4_xattr_block_set() decides to remove xattr block the following
race can happen:
CPU1 CPU2
ext4_xattr_block_set() ext4_xattr_release_block()
new_bh = ext4_xattr_block_cache_find()
lock_buffer(bh);
ref = le32_to_cpu(BHDR(bh)->h_refcount);
if (ref == 1) {
...
mb_cache_entry_delete();
unlock_buffer(bh);
ext4_free_blocks();
...
ext4_forget(..., bh, ...);
jbd2_journal_revoke(..., bh);
ext4_journal_get_write_access(..., new_bh, ...)
do_get_write_access()
jbd2_journal_cancel_revoke(..., new_bh);
Later the code in ext4_xattr_block_set() finds out the block got freed
and cancels reusal of the block but the revoke stays canceled and so in
case of block reuse and journal replay the filesystem can get corrupted.
If the race works out slightly differently, we can also hit assertions
in the jbd2 code.
Fix the problem by making sure that once matching mbcache entry is
found, code dropping the last xattr block reference (or trying to modify
xattr block in place) waits until the mbcache entry reference is
dropped. This way code trying to reuse xattr block is protected from
someone trying to drop the last reference to xattr block.
Reported-and-tested-by: Ritesh Harjani <ritesh.list(a)gmail.com>
CC: stable(a)vger.kernel.org
Fixes: 82939d7999df ("ext4: convert to mbcache2")
Signed-off-by: Jan Kara <jack(a)suse.cz>
Link: https://lore.kernel.org/r/20220712105436.32204-5-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index a25942a74929..533216e80fa2 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -439,9 +439,16 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
/* Remove entry from mbcache when EA inode is getting evicted */
void ext4_evict_ea_inode(struct inode *inode)
{
- if (EA_INODE_CACHE(inode))
- mb_cache_entry_delete(EA_INODE_CACHE(inode),
- ext4_xattr_inode_get_hash(inode), inode->i_ino);
+ struct mb_cache_entry *oe;
+
+ if (!EA_INODE_CACHE(inode))
+ return;
+ /* Wait for entry to get unused so that we can remove it */
+ while ((oe = mb_cache_entry_delete_or_get(EA_INODE_CACHE(inode),
+ ext4_xattr_inode_get_hash(inode), inode->i_ino))) {
+ mb_cache_entry_wait_unused(oe);
+ mb_cache_entry_put(EA_INODE_CACHE(inode), oe);
+ }
}
static int
@@ -1229,6 +1236,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
if (error)
goto out;
+retry_ref:
lock_buffer(bh);
hash = le32_to_cpu(BHDR(bh)->h_hash);
ref = le32_to_cpu(BHDR(bh)->h_refcount);
@@ -1238,9 +1246,18 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
* This must happen under buffer lock for
* ext4_xattr_block_set() to reliably detect freed block
*/
- if (ea_block_cache)
- mb_cache_entry_delete(ea_block_cache, hash,
- bh->b_blocknr);
+ if (ea_block_cache) {
+ struct mb_cache_entry *oe;
+
+ oe = mb_cache_entry_delete_or_get(ea_block_cache, hash,
+ bh->b_blocknr);
+ if (oe) {
+ unlock_buffer(bh);
+ mb_cache_entry_wait_unused(oe);
+ mb_cache_entry_put(ea_block_cache, oe);
+ goto retry_ref;
+ }
+ }
get_bh(bh);
unlock_buffer(bh);
@@ -1867,9 +1884,20 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
* ext4_xattr_block_set() to reliably detect modified
* block
*/
- if (ea_block_cache)
- mb_cache_entry_delete(ea_block_cache, hash,
- bs->bh->b_blocknr);
+ if (ea_block_cache) {
+ struct mb_cache_entry *oe;
+
+ oe = mb_cache_entry_delete_or_get(ea_block_cache,
+ hash, bs->bh->b_blocknr);
+ if (oe) {
+ /*
+ * Xattr block is getting reused. Leave
+ * it alone.
+ */
+ mb_cache_entry_put(ea_block_cache, oe);
+ goto clone_block;
+ }
+ }
ea_bdebug(bs->bh, "modifying in-place");
error = ext4_xattr_set_entry(i, s, handle, inode,
true /* is_block */);
@@ -1885,6 +1913,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
goto cleanup;
goto inserted;
}
+clone_block:
unlock_buffer(bs->bh);
ea_bdebug(bs->bh, "cloning");
s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
@@ -1990,18 +2019,13 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
lock_buffer(new_bh);
/*
* We have to be careful about races with
- * freeing, rehashing or adding references to
- * xattr block. Once we hold buffer lock xattr
- * block's state is stable so we can check
- * whether the block got freed / rehashed or
- * not. Since we unhash mbcache entry under
- * buffer lock when freeing / rehashing xattr
- * block, checking whether entry is still
- * hashed is reliable. Same rules hold for
- * e_reusable handling.
+ * adding references to xattr block. Once we
+ * hold buffer lock xattr block's state is
+ * stable so we can check the additional
+ * reference fits.
*/
- if (hlist_bl_unhashed(&ce->e_hash_list) ||
- !ce->e_reusable) {
+ ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
+ if (ref > EXT4_XATTR_REFCOUNT_MAX) {
/*
* Undo everything and check mbcache
* again.
@@ -2016,9 +2040,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
new_bh = NULL;
goto inserted;
}
- ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
- if (ref >= EXT4_XATTR_REFCOUNT_MAX)
+ if (ref == EXT4_XATTR_REFCOUNT_MAX)
ce->e_reusable = 0;
ea_bdebug(new_bh, "reusing; refcount now=%d",
ref);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 65f8b80053a1b2fd602daa6814e62d6fa90e5e9b Mon Sep 17 00:00:00 2001
From: Jan Kara <jack(a)suse.cz>
Date: Tue, 12 Jul 2022 12:54:24 +0200
Subject: [PATCH] ext4: fix race when reusing xattr blocks
When ext4_xattr_block_set() decides to remove xattr block the following
race can happen:
CPU1 CPU2
ext4_xattr_block_set() ext4_xattr_release_block()
new_bh = ext4_xattr_block_cache_find()
lock_buffer(bh);
ref = le32_to_cpu(BHDR(bh)->h_refcount);
if (ref == 1) {
...
mb_cache_entry_delete();
unlock_buffer(bh);
ext4_free_blocks();
...
ext4_forget(..., bh, ...);
jbd2_journal_revoke(..., bh);
ext4_journal_get_write_access(..., new_bh, ...)
do_get_write_access()
jbd2_journal_cancel_revoke(..., new_bh);
Later the code in ext4_xattr_block_set() finds out the block got freed
and cancels reusal of the block but the revoke stays canceled and so in
case of block reuse and journal replay the filesystem can get corrupted.
If the race works out slightly differently, we can also hit assertions
in the jbd2 code.
Fix the problem by making sure that once matching mbcache entry is
found, code dropping the last xattr block reference (or trying to modify
xattr block in place) waits until the mbcache entry reference is
dropped. This way code trying to reuse xattr block is protected from
someone trying to drop the last reference to xattr block.
Reported-and-tested-by: Ritesh Harjani <ritesh.list(a)gmail.com>
CC: stable(a)vger.kernel.org
Fixes: 82939d7999df ("ext4: convert to mbcache2")
Signed-off-by: Jan Kara <jack(a)suse.cz>
Link: https://lore.kernel.org/r/20220712105436.32204-5-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index a25942a74929..533216e80fa2 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -439,9 +439,16 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
/* Remove entry from mbcache when EA inode is getting evicted */
void ext4_evict_ea_inode(struct inode *inode)
{
- if (EA_INODE_CACHE(inode))
- mb_cache_entry_delete(EA_INODE_CACHE(inode),
- ext4_xattr_inode_get_hash(inode), inode->i_ino);
+ struct mb_cache_entry *oe;
+
+ if (!EA_INODE_CACHE(inode))
+ return;
+ /* Wait for entry to get unused so that we can remove it */
+ while ((oe = mb_cache_entry_delete_or_get(EA_INODE_CACHE(inode),
+ ext4_xattr_inode_get_hash(inode), inode->i_ino))) {
+ mb_cache_entry_wait_unused(oe);
+ mb_cache_entry_put(EA_INODE_CACHE(inode), oe);
+ }
}
static int
@@ -1229,6 +1236,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
if (error)
goto out;
+retry_ref:
lock_buffer(bh);
hash = le32_to_cpu(BHDR(bh)->h_hash);
ref = le32_to_cpu(BHDR(bh)->h_refcount);
@@ -1238,9 +1246,18 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
* This must happen under buffer lock for
* ext4_xattr_block_set() to reliably detect freed block
*/
- if (ea_block_cache)
- mb_cache_entry_delete(ea_block_cache, hash,
- bh->b_blocknr);
+ if (ea_block_cache) {
+ struct mb_cache_entry *oe;
+
+ oe = mb_cache_entry_delete_or_get(ea_block_cache, hash,
+ bh->b_blocknr);
+ if (oe) {
+ unlock_buffer(bh);
+ mb_cache_entry_wait_unused(oe);
+ mb_cache_entry_put(ea_block_cache, oe);
+ goto retry_ref;
+ }
+ }
get_bh(bh);
unlock_buffer(bh);
@@ -1867,9 +1884,20 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
* ext4_xattr_block_set() to reliably detect modified
* block
*/
- if (ea_block_cache)
- mb_cache_entry_delete(ea_block_cache, hash,
- bs->bh->b_blocknr);
+ if (ea_block_cache) {
+ struct mb_cache_entry *oe;
+
+ oe = mb_cache_entry_delete_or_get(ea_block_cache,
+ hash, bs->bh->b_blocknr);
+ if (oe) {
+ /*
+ * Xattr block is getting reused. Leave
+ * it alone.
+ */
+ mb_cache_entry_put(ea_block_cache, oe);
+ goto clone_block;
+ }
+ }
ea_bdebug(bs->bh, "modifying in-place");
error = ext4_xattr_set_entry(i, s, handle, inode,
true /* is_block */);
@@ -1885,6 +1913,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
goto cleanup;
goto inserted;
}
+clone_block:
unlock_buffer(bs->bh);
ea_bdebug(bs->bh, "cloning");
s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
@@ -1990,18 +2019,13 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
lock_buffer(new_bh);
/*
* We have to be careful about races with
- * freeing, rehashing or adding references to
- * xattr block. Once we hold buffer lock xattr
- * block's state is stable so we can check
- * whether the block got freed / rehashed or
- * not. Since we unhash mbcache entry under
- * buffer lock when freeing / rehashing xattr
- * block, checking whether entry is still
- * hashed is reliable. Same rules hold for
- * e_reusable handling.
+ * adding references to xattr block. Once we
+ * hold buffer lock xattr block's state is
+ * stable so we can check the additional
+ * reference fits.
*/
- if (hlist_bl_unhashed(&ce->e_hash_list) ||
- !ce->e_reusable) {
+ ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
+ if (ref > EXT4_XATTR_REFCOUNT_MAX) {
/*
* Undo everything and check mbcache
* again.
@@ -2016,9 +2040,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
new_bh = NULL;
goto inserted;
}
- ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
- if (ref >= EXT4_XATTR_REFCOUNT_MAX)
+ if (ref == EXT4_XATTR_REFCOUNT_MAX)
ce->e_reusable = 0;
ea_bdebug(new_bh, "reusing; refcount now=%d",
ref);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 65f8b80053a1b2fd602daa6814e62d6fa90e5e9b Mon Sep 17 00:00:00 2001
From: Jan Kara <jack(a)suse.cz>
Date: Tue, 12 Jul 2022 12:54:24 +0200
Subject: [PATCH] ext4: fix race when reusing xattr blocks
When ext4_xattr_block_set() decides to remove xattr block the following
race can happen:
CPU1 CPU2
ext4_xattr_block_set() ext4_xattr_release_block()
new_bh = ext4_xattr_block_cache_find()
lock_buffer(bh);
ref = le32_to_cpu(BHDR(bh)->h_refcount);
if (ref == 1) {
...
mb_cache_entry_delete();
unlock_buffer(bh);
ext4_free_blocks();
...
ext4_forget(..., bh, ...);
jbd2_journal_revoke(..., bh);
ext4_journal_get_write_access(..., new_bh, ...)
do_get_write_access()
jbd2_journal_cancel_revoke(..., new_bh);
Later the code in ext4_xattr_block_set() finds out the block got freed
and cancels reusal of the block but the revoke stays canceled and so in
case of block reuse and journal replay the filesystem can get corrupted.
If the race works out slightly differently, we can also hit assertions
in the jbd2 code.
Fix the problem by making sure that once matching mbcache entry is
found, code dropping the last xattr block reference (or trying to modify
xattr block in place) waits until the mbcache entry reference is
dropped. This way code trying to reuse xattr block is protected from
someone trying to drop the last reference to xattr block.
Reported-and-tested-by: Ritesh Harjani <ritesh.list(a)gmail.com>
CC: stable(a)vger.kernel.org
Fixes: 82939d7999df ("ext4: convert to mbcache2")
Signed-off-by: Jan Kara <jack(a)suse.cz>
Link: https://lore.kernel.org/r/20220712105436.32204-5-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index a25942a74929..533216e80fa2 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -439,9 +439,16 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
/* Remove entry from mbcache when EA inode is getting evicted */
void ext4_evict_ea_inode(struct inode *inode)
{
- if (EA_INODE_CACHE(inode))
- mb_cache_entry_delete(EA_INODE_CACHE(inode),
- ext4_xattr_inode_get_hash(inode), inode->i_ino);
+ struct mb_cache_entry *oe;
+
+ if (!EA_INODE_CACHE(inode))
+ return;
+ /* Wait for entry to get unused so that we can remove it */
+ while ((oe = mb_cache_entry_delete_or_get(EA_INODE_CACHE(inode),
+ ext4_xattr_inode_get_hash(inode), inode->i_ino))) {
+ mb_cache_entry_wait_unused(oe);
+ mb_cache_entry_put(EA_INODE_CACHE(inode), oe);
+ }
}
static int
@@ -1229,6 +1236,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
if (error)
goto out;
+retry_ref:
lock_buffer(bh);
hash = le32_to_cpu(BHDR(bh)->h_hash);
ref = le32_to_cpu(BHDR(bh)->h_refcount);
@@ -1238,9 +1246,18 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
* This must happen under buffer lock for
* ext4_xattr_block_set() to reliably detect freed block
*/
- if (ea_block_cache)
- mb_cache_entry_delete(ea_block_cache, hash,
- bh->b_blocknr);
+ if (ea_block_cache) {
+ struct mb_cache_entry *oe;
+
+ oe = mb_cache_entry_delete_or_get(ea_block_cache, hash,
+ bh->b_blocknr);
+ if (oe) {
+ unlock_buffer(bh);
+ mb_cache_entry_wait_unused(oe);
+ mb_cache_entry_put(ea_block_cache, oe);
+ goto retry_ref;
+ }
+ }
get_bh(bh);
unlock_buffer(bh);
@@ -1867,9 +1884,20 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
* ext4_xattr_block_set() to reliably detect modified
* block
*/
- if (ea_block_cache)
- mb_cache_entry_delete(ea_block_cache, hash,
- bs->bh->b_blocknr);
+ if (ea_block_cache) {
+ struct mb_cache_entry *oe;
+
+ oe = mb_cache_entry_delete_or_get(ea_block_cache,
+ hash, bs->bh->b_blocknr);
+ if (oe) {
+ /*
+ * Xattr block is getting reused. Leave
+ * it alone.
+ */
+ mb_cache_entry_put(ea_block_cache, oe);
+ goto clone_block;
+ }
+ }
ea_bdebug(bs->bh, "modifying in-place");
error = ext4_xattr_set_entry(i, s, handle, inode,
true /* is_block */);
@@ -1885,6 +1913,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
goto cleanup;
goto inserted;
}
+clone_block:
unlock_buffer(bs->bh);
ea_bdebug(bs->bh, "cloning");
s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
@@ -1990,18 +2019,13 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
lock_buffer(new_bh);
/*
* We have to be careful about races with
- * freeing, rehashing or adding references to
- * xattr block. Once we hold buffer lock xattr
- * block's state is stable so we can check
- * whether the block got freed / rehashed or
- * not. Since we unhash mbcache entry under
- * buffer lock when freeing / rehashing xattr
- * block, checking whether entry is still
- * hashed is reliable. Same rules hold for
- * e_reusable handling.
+ * adding references to xattr block. Once we
+ * hold buffer lock xattr block's state is
+ * stable so we can check the additional
+ * reference fits.
*/
- if (hlist_bl_unhashed(&ce->e_hash_list) ||
- !ce->e_reusable) {
+ ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
+ if (ref > EXT4_XATTR_REFCOUNT_MAX) {
/*
* Undo everything and check mbcache
* again.
@@ -2016,9 +2040,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
new_bh = NULL;
goto inserted;
}
- ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
- if (ref >= EXT4_XATTR_REFCOUNT_MAX)
+ if (ref == EXT4_XATTR_REFCOUNT_MAX)
ce->e_reusable = 0;
ea_bdebug(new_bh, "reusing; refcount now=%d",
ref);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 65f8b80053a1b2fd602daa6814e62d6fa90e5e9b Mon Sep 17 00:00:00 2001
From: Jan Kara <jack(a)suse.cz>
Date: Tue, 12 Jul 2022 12:54:24 +0200
Subject: [PATCH] ext4: fix race when reusing xattr blocks
When ext4_xattr_block_set() decides to remove xattr block the following
race can happen:
CPU1 CPU2
ext4_xattr_block_set() ext4_xattr_release_block()
new_bh = ext4_xattr_block_cache_find()
lock_buffer(bh);
ref = le32_to_cpu(BHDR(bh)->h_refcount);
if (ref == 1) {
...
mb_cache_entry_delete();
unlock_buffer(bh);
ext4_free_blocks();
...
ext4_forget(..., bh, ...);
jbd2_journal_revoke(..., bh);
ext4_journal_get_write_access(..., new_bh, ...)
do_get_write_access()
jbd2_journal_cancel_revoke(..., new_bh);
Later the code in ext4_xattr_block_set() finds out the block got freed
and cancels reusal of the block but the revoke stays canceled and so in
case of block reuse and journal replay the filesystem can get corrupted.
If the race works out slightly differently, we can also hit assertions
in the jbd2 code.
Fix the problem by making sure that once matching mbcache entry is
found, code dropping the last xattr block reference (or trying to modify
xattr block in place) waits until the mbcache entry reference is
dropped. This way code trying to reuse xattr block is protected from
someone trying to drop the last reference to xattr block.
Reported-and-tested-by: Ritesh Harjani <ritesh.list(a)gmail.com>
CC: stable(a)vger.kernel.org
Fixes: 82939d7999df ("ext4: convert to mbcache2")
Signed-off-by: Jan Kara <jack(a)suse.cz>
Link: https://lore.kernel.org/r/20220712105436.32204-5-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index a25942a74929..533216e80fa2 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -439,9 +439,16 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
/* Remove entry from mbcache when EA inode is getting evicted */
void ext4_evict_ea_inode(struct inode *inode)
{
- if (EA_INODE_CACHE(inode))
- mb_cache_entry_delete(EA_INODE_CACHE(inode),
- ext4_xattr_inode_get_hash(inode), inode->i_ino);
+ struct mb_cache_entry *oe;
+
+ if (!EA_INODE_CACHE(inode))
+ return;
+ /* Wait for entry to get unused so that we can remove it */
+ while ((oe = mb_cache_entry_delete_or_get(EA_INODE_CACHE(inode),
+ ext4_xattr_inode_get_hash(inode), inode->i_ino))) {
+ mb_cache_entry_wait_unused(oe);
+ mb_cache_entry_put(EA_INODE_CACHE(inode), oe);
+ }
}
static int
@@ -1229,6 +1236,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
if (error)
goto out;
+retry_ref:
lock_buffer(bh);
hash = le32_to_cpu(BHDR(bh)->h_hash);
ref = le32_to_cpu(BHDR(bh)->h_refcount);
@@ -1238,9 +1246,18 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
* This must happen under buffer lock for
* ext4_xattr_block_set() to reliably detect freed block
*/
- if (ea_block_cache)
- mb_cache_entry_delete(ea_block_cache, hash,
- bh->b_blocknr);
+ if (ea_block_cache) {
+ struct mb_cache_entry *oe;
+
+ oe = mb_cache_entry_delete_or_get(ea_block_cache, hash,
+ bh->b_blocknr);
+ if (oe) {
+ unlock_buffer(bh);
+ mb_cache_entry_wait_unused(oe);
+ mb_cache_entry_put(ea_block_cache, oe);
+ goto retry_ref;
+ }
+ }
get_bh(bh);
unlock_buffer(bh);
@@ -1867,9 +1884,20 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
* ext4_xattr_block_set() to reliably detect modified
* block
*/
- if (ea_block_cache)
- mb_cache_entry_delete(ea_block_cache, hash,
- bs->bh->b_blocknr);
+ if (ea_block_cache) {
+ struct mb_cache_entry *oe;
+
+ oe = mb_cache_entry_delete_or_get(ea_block_cache,
+ hash, bs->bh->b_blocknr);
+ if (oe) {
+ /*
+ * Xattr block is getting reused. Leave
+ * it alone.
+ */
+ mb_cache_entry_put(ea_block_cache, oe);
+ goto clone_block;
+ }
+ }
ea_bdebug(bs->bh, "modifying in-place");
error = ext4_xattr_set_entry(i, s, handle, inode,
true /* is_block */);
@@ -1885,6 +1913,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
goto cleanup;
goto inserted;
}
+clone_block:
unlock_buffer(bs->bh);
ea_bdebug(bs->bh, "cloning");
s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
@@ -1990,18 +2019,13 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
lock_buffer(new_bh);
/*
* We have to be careful about races with
- * freeing, rehashing or adding references to
- * xattr block. Once we hold buffer lock xattr
- * block's state is stable so we can check
- * whether the block got freed / rehashed or
- * not. Since we unhash mbcache entry under
- * buffer lock when freeing / rehashing xattr
- * block, checking whether entry is still
- * hashed is reliable. Same rules hold for
- * e_reusable handling.
+ * adding references to xattr block. Once we
+ * hold buffer lock xattr block's state is
+ * stable so we can check the additional
+ * reference fits.
*/
- if (hlist_bl_unhashed(&ce->e_hash_list) ||
- !ce->e_reusable) {
+ ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
+ if (ref > EXT4_XATTR_REFCOUNT_MAX) {
/*
* Undo everything and check mbcache
* again.
@@ -2016,9 +2040,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
new_bh = NULL;
goto inserted;
}
- ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
- if (ref >= EXT4_XATTR_REFCOUNT_MAX)
+ if (ref == EXT4_XATTR_REFCOUNT_MAX)
ce->e_reusable = 0;
ea_bdebug(new_bh, "reusing; refcount now=%d",
ref);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 65f8b80053a1b2fd602daa6814e62d6fa90e5e9b Mon Sep 17 00:00:00 2001
From: Jan Kara <jack(a)suse.cz>
Date: Tue, 12 Jul 2022 12:54:24 +0200
Subject: [PATCH] ext4: fix race when reusing xattr blocks
When ext4_xattr_block_set() decides to remove xattr block the following
race can happen:
CPU1 CPU2
ext4_xattr_block_set() ext4_xattr_release_block()
new_bh = ext4_xattr_block_cache_find()
lock_buffer(bh);
ref = le32_to_cpu(BHDR(bh)->h_refcount);
if (ref == 1) {
...
mb_cache_entry_delete();
unlock_buffer(bh);
ext4_free_blocks();
...
ext4_forget(..., bh, ...);
jbd2_journal_revoke(..., bh);
ext4_journal_get_write_access(..., new_bh, ...)
do_get_write_access()
jbd2_journal_cancel_revoke(..., new_bh);
Later the code in ext4_xattr_block_set() finds out the block got freed
and cancels reusal of the block but the revoke stays canceled and so in
case of block reuse and journal replay the filesystem can get corrupted.
If the race works out slightly differently, we can also hit assertions
in the jbd2 code.
Fix the problem by making sure that once matching mbcache entry is
found, code dropping the last xattr block reference (or trying to modify
xattr block in place) waits until the mbcache entry reference is
dropped. This way code trying to reuse xattr block is protected from
someone trying to drop the last reference to xattr block.
Reported-and-tested-by: Ritesh Harjani <ritesh.list(a)gmail.com>
CC: stable(a)vger.kernel.org
Fixes: 82939d7999df ("ext4: convert to mbcache2")
Signed-off-by: Jan Kara <jack(a)suse.cz>
Link: https://lore.kernel.org/r/20220712105436.32204-5-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index a25942a74929..533216e80fa2 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -439,9 +439,16 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
/* Remove entry from mbcache when EA inode is getting evicted */
void ext4_evict_ea_inode(struct inode *inode)
{
- if (EA_INODE_CACHE(inode))
- mb_cache_entry_delete(EA_INODE_CACHE(inode),
- ext4_xattr_inode_get_hash(inode), inode->i_ino);
+ struct mb_cache_entry *oe;
+
+ if (!EA_INODE_CACHE(inode))
+ return;
+ /* Wait for entry to get unused so that we can remove it */
+ while ((oe = mb_cache_entry_delete_or_get(EA_INODE_CACHE(inode),
+ ext4_xattr_inode_get_hash(inode), inode->i_ino))) {
+ mb_cache_entry_wait_unused(oe);
+ mb_cache_entry_put(EA_INODE_CACHE(inode), oe);
+ }
}
static int
@@ -1229,6 +1236,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
if (error)
goto out;
+retry_ref:
lock_buffer(bh);
hash = le32_to_cpu(BHDR(bh)->h_hash);
ref = le32_to_cpu(BHDR(bh)->h_refcount);
@@ -1238,9 +1246,18 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
* This must happen under buffer lock for
* ext4_xattr_block_set() to reliably detect freed block
*/
- if (ea_block_cache)
- mb_cache_entry_delete(ea_block_cache, hash,
- bh->b_blocknr);
+ if (ea_block_cache) {
+ struct mb_cache_entry *oe;
+
+ oe = mb_cache_entry_delete_or_get(ea_block_cache, hash,
+ bh->b_blocknr);
+ if (oe) {
+ unlock_buffer(bh);
+ mb_cache_entry_wait_unused(oe);
+ mb_cache_entry_put(ea_block_cache, oe);
+ goto retry_ref;
+ }
+ }
get_bh(bh);
unlock_buffer(bh);
@@ -1867,9 +1884,20 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
* ext4_xattr_block_set() to reliably detect modified
* block
*/
- if (ea_block_cache)
- mb_cache_entry_delete(ea_block_cache, hash,
- bs->bh->b_blocknr);
+ if (ea_block_cache) {
+ struct mb_cache_entry *oe;
+
+ oe = mb_cache_entry_delete_or_get(ea_block_cache,
+ hash, bs->bh->b_blocknr);
+ if (oe) {
+ /*
+ * Xattr block is getting reused. Leave
+ * it alone.
+ */
+ mb_cache_entry_put(ea_block_cache, oe);
+ goto clone_block;
+ }
+ }
ea_bdebug(bs->bh, "modifying in-place");
error = ext4_xattr_set_entry(i, s, handle, inode,
true /* is_block */);
@@ -1885,6 +1913,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
goto cleanup;
goto inserted;
}
+clone_block:
unlock_buffer(bs->bh);
ea_bdebug(bs->bh, "cloning");
s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
@@ -1990,18 +2019,13 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
lock_buffer(new_bh);
/*
* We have to be careful about races with
- * freeing, rehashing or adding references to
- * xattr block. Once we hold buffer lock xattr
- * block's state is stable so we can check
- * whether the block got freed / rehashed or
- * not. Since we unhash mbcache entry under
- * buffer lock when freeing / rehashing xattr
- * block, checking whether entry is still
- * hashed is reliable. Same rules hold for
- * e_reusable handling.
+ * adding references to xattr block. Once we
+ * hold buffer lock xattr block's state is
+ * stable so we can check the additional
+ * reference fits.
*/
- if (hlist_bl_unhashed(&ce->e_hash_list) ||
- !ce->e_reusable) {
+ ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
+ if (ref > EXT4_XATTR_REFCOUNT_MAX) {
/*
* Undo everything and check mbcache
* again.
@@ -2016,9 +2040,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
new_bh = NULL;
goto inserted;
}
- ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
- if (ref >= EXT4_XATTR_REFCOUNT_MAX)
+ if (ref == EXT4_XATTR_REFCOUNT_MAX)
ce->e_reusable = 0;
ea_bdebug(new_bh, "reusing; refcount now=%d",
ref);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6bc0d63dad7f9f54d381925ee855b402f652fa39 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack(a)suse.cz>
Date: Tue, 12 Jul 2022 12:54:22 +0200
Subject: [PATCH] ext4: remove EA inode entry from mbcache on inode eviction
Currently we remove EA inode from mbcache as soon as its xattr refcount
drops to zero. However there can be pending attempts to reuse the inode
and thus refcount handling code has to handle the situation when
refcount increases from zero anyway. So save some work and just keep EA
inode in mbcache until it is getting evicted. At that moment we are sure
following iget() of EA inode will fail anyway (or wait for eviction to
finish and load things from the disk again) and so removing mbcache
entry at that moment is fine and simplifies the code a bit.
CC: stable(a)vger.kernel.org
Fixes: 82939d7999df ("ext4: convert to mbcache2")
Signed-off-by: Jan Kara <jack(a)suse.cz>
Link: https://lore.kernel.org/r/20220712105436.32204-3-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7db52defcb16..8204c59bdd1d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -177,6 +177,8 @@ void ext4_evict_inode(struct inode *inode)
trace_ext4_evict_inode(inode);
+ if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)
+ ext4_evict_ea_inode(inode);
if (inode->i_nlink) {
/*
* When journalling data dirty buffers are tracked only in the
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index c42b3e0d2d94..d92d50de5a01 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -436,6 +436,14 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
return err;
}
+/* Remove entry from mbcache when EA inode is getting evicted */
+void ext4_evict_ea_inode(struct inode *inode)
+{
+ if (EA_INODE_CACHE(inode))
+ mb_cache_entry_delete(EA_INODE_CACHE(inode),
+ ext4_xattr_inode_get_hash(inode), inode->i_ino);
+}
+
static int
ext4_xattr_inode_verify_hashes(struct inode *ea_inode,
struct ext4_xattr_entry *entry, void *buffer,
@@ -976,10 +984,8 @@ int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode,
static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
int ref_change)
{
- struct mb_cache *ea_inode_cache = EA_INODE_CACHE(ea_inode);
struct ext4_iloc iloc;
s64 ref_count;
- u32 hash;
int ret;
inode_lock(ea_inode);
@@ -1002,14 +1008,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
set_nlink(ea_inode, 1);
ext4_orphan_del(handle, ea_inode);
-
- if (ea_inode_cache) {
- hash = ext4_xattr_inode_get_hash(ea_inode);
- mb_cache_entry_create(ea_inode_cache,
- GFP_NOFS, hash,
- ea_inode->i_ino,
- true /* reusable */);
- }
}
} else {
WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld",
@@ -1022,12 +1020,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
clear_nlink(ea_inode);
ext4_orphan_add(handle, ea_inode);
-
- if (ea_inode_cache) {
- hash = ext4_xattr_inode_get_hash(ea_inode);
- mb_cache_entry_delete(ea_inode_cache, hash,
- ea_inode->i_ino);
- }
}
}
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index e29101168733..824faf0b15a8 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -191,6 +191,7 @@ extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array);
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle);
+extern void ext4_evict_ea_inode(struct inode *inode);
extern const struct xattr_handler *ext4_xattr_handlers[];
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From fd7e672ea98b95b9d4c9dae316639f03c16a749d Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1(a)huawei.com>
Date: Thu, 16 Jun 2022 10:13:58 +0800
Subject: [PATCH] ext4: correct the misjudgment in ext4_iget_extra_inode
Use the EXT4_INODE_HAS_XATTR_SPACE macro to more accurately
determine whether the inode have xattr space.
Cc: stable(a)kernel.org
Signed-off-by: Baokun Li <libaokun1(a)huawei.com>
Reviewed-by: Ritesh Harjani (IBM) <ritesh.list(a)gmail.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Link: https://lore.kernel.org/r/20220616021358.2504451-5-libaokun1@huawei.com
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4bbce86b4ab0..641c9af91641 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4692,8 +4692,7 @@ static inline int ext4_iget_extra_inode(struct inode *inode,
__le32 *magic = (void *)raw_inode +
EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize;
- if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize + sizeof(__le32) <=
- EXT4_INODE_SIZE(inode->i_sb) &&
+ if (EXT4_INODE_HAS_XATTR_SPACE(inode) &&
*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
ext4_set_inode_state(inode, EXT4_STATE_XATTR);
return ext4_find_inline_data_nolock(inode);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 65f8ea4cd57dbd46ea13b41dc8bac03176b04233 Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner(a)redhat.com>
Date: Mon, 4 Jul 2022 16:27:20 +0200
Subject: [PATCH] ext4: check if directory block is within i_size
Currently ext4 directory handling code implicitly assumes that the
directory blocks are always within the i_size. In fact ext4_append()
will attempt to allocate next directory block based solely on i_size and
the i_size is then appropriately increased after a successful
allocation.
However, for this to work it requires i_size to be correct. If, for any
reason, the directory inode i_size is corrupted in a way that the
directory tree refers to a valid directory block past i_size, we could
end up corrupting parts of the directory tree structure by overwriting
already used directory blocks when modifying the directory.
Fix it by catching the corruption early in __ext4_read_dirblock().
Addresses Red-Hat-Bugzilla: #2070205
CVE: CVE-2022-1184
Signed-off-by: Lukas Czerner <lczerner(a)redhat.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Andreas Dilger <adilger(a)dilger.ca>
Link: https://lore.kernel.org/r/20220704142721.157985-1-lczerner@redhat.com
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 1c6725ecca1a..7fced54e2891 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -110,6 +110,13 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
struct ext4_dir_entry *dirent;
int is_dx_block = 0;
+ if (block >= inode->i_size) {
+ ext4_error_inode(inode, func, line, block,
+ "Attempting to read directory block (%u) that is past i_size (%llu)",
+ block, inode->i_size);
+ return ERR_PTR(-EFSCORRUPTED);
+ }
+
if (ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_EIO))
bh = ERR_PTR(-EIO);
else
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 65f8ea4cd57dbd46ea13b41dc8bac03176b04233 Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner(a)redhat.com>
Date: Mon, 4 Jul 2022 16:27:20 +0200
Subject: [PATCH] ext4: check if directory block is within i_size
Currently ext4 directory handling code implicitly assumes that the
directory blocks are always within the i_size. In fact ext4_append()
will attempt to allocate next directory block based solely on i_size and
the i_size is then appropriately increased after a successful
allocation.
However, for this to work it requires i_size to be correct. If, for any
reason, the directory inode i_size is corrupted in a way that the
directory tree refers to a valid directory block past i_size, we could
end up corrupting parts of the directory tree structure by overwriting
already used directory blocks when modifying the directory.
Fix it by catching the corruption early in __ext4_read_dirblock().
Addresses Red-Hat-Bugzilla: #2070205
CVE: CVE-2022-1184
Signed-off-by: Lukas Czerner <lczerner(a)redhat.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Andreas Dilger <adilger(a)dilger.ca>
Link: https://lore.kernel.org/r/20220704142721.157985-1-lczerner@redhat.com
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 1c6725ecca1a..7fced54e2891 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -110,6 +110,13 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
struct ext4_dir_entry *dirent;
int is_dx_block = 0;
+ if (block >= inode->i_size) {
+ ext4_error_inode(inode, func, line, block,
+ "Attempting to read directory block (%u) that is past i_size (%llu)",
+ block, inode->i_size);
+ return ERR_PTR(-EFSCORRUPTED);
+ }
+
if (ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_EIO))
bh = ERR_PTR(-EIO);
else
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 65f8ea4cd57dbd46ea13b41dc8bac03176b04233 Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner(a)redhat.com>
Date: Mon, 4 Jul 2022 16:27:20 +0200
Subject: [PATCH] ext4: check if directory block is within i_size
Currently ext4 directory handling code implicitly assumes that the
directory blocks are always within the i_size. In fact ext4_append()
will attempt to allocate next directory block based solely on i_size and
the i_size is then appropriately increased after a successful
allocation.
However, for this to work it requires i_size to be correct. If, for any
reason, the directory inode i_size is corrupted in a way that the
directory tree refers to a valid directory block past i_size, we could
end up corrupting parts of the directory tree structure by overwriting
already used directory blocks when modifying the directory.
Fix it by catching the corruption early in __ext4_read_dirblock().
Addresses Red-Hat-Bugzilla: #2070205
CVE: CVE-2022-1184
Signed-off-by: Lukas Czerner <lczerner(a)redhat.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Andreas Dilger <adilger(a)dilger.ca>
Link: https://lore.kernel.org/r/20220704142721.157985-1-lczerner@redhat.com
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 1c6725ecca1a..7fced54e2891 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -110,6 +110,13 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
struct ext4_dir_entry *dirent;
int is_dx_block = 0;
+ if (block >= inode->i_size) {
+ ext4_error_inode(inode, func, line, block,
+ "Attempting to read directory block (%u) that is past i_size (%llu)",
+ block, inode->i_size);
+ return ERR_PTR(-EFSCORRUPTED);
+ }
+
if (ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_EIO))
bh = ERR_PTR(-EIO);
else
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 65f8ea4cd57dbd46ea13b41dc8bac03176b04233 Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner(a)redhat.com>
Date: Mon, 4 Jul 2022 16:27:20 +0200
Subject: [PATCH] ext4: check if directory block is within i_size
Currently ext4 directory handling code implicitly assumes that the
directory blocks are always within the i_size. In fact ext4_append()
will attempt to allocate next directory block based solely on i_size and
the i_size is then appropriately increased after a successful
allocation.
However, for this to work it requires i_size to be correct. If, for any
reason, the directory inode i_size is corrupted in a way that the
directory tree refers to a valid directory block past i_size, we could
end up corrupting parts of the directory tree structure by overwriting
already used directory blocks when modifying the directory.
Fix it by catching the corruption early in __ext4_read_dirblock().
Addresses Red-Hat-Bugzilla: #2070205
CVE: CVE-2022-1184
Signed-off-by: Lukas Czerner <lczerner(a)redhat.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Andreas Dilger <adilger(a)dilger.ca>
Link: https://lore.kernel.org/r/20220704142721.157985-1-lczerner@redhat.com
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 1c6725ecca1a..7fced54e2891 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -110,6 +110,13 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
struct ext4_dir_entry *dirent;
int is_dx_block = 0;
+ if (block >= inode->i_size) {
+ ext4_error_inode(inode, func, line, block,
+ "Attempting to read directory block (%u) that is past i_size (%llu)",
+ block, inode->i_size);
+ return ERR_PTR(-EFSCORRUPTED);
+ }
+
if (ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_EIO))
bh = ERR_PTR(-EIO);
else
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e26b04c4c91925dba57324db177a24e18e2d0013 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov(a)suse.com>
Date: Thu, 23 Jun 2022 10:55:47 +0300
Subject: [PATCH] btrfs: properly flag filesystem with
BTRFS_FEATURE_INCOMPAT_BIG_METADATA
Commit 6f93e834fa7c seemingly inadvertently moved the code responsible
for flagging the filesystem as having BIG_METADATA to a place where
setting the flag was essentially lost. This means that
filesystems created with kernels containing this bug (starting with 5.15)
can potentially be mounted by older (pre-3.4) kernels. In reality
chances for this happening are low because there are other incompat
flags introduced in the mean time. Still the correct behavior is to set
INCOMPAT_BIG_METADATA flag and persist this in the superblock.
Fixes: 6f93e834fa7c ("btrfs: fix upper limit for max_inline for page size 64K")
CC: stable(a)vger.kernel.org # 5.4+
Reviewed-by: Qu Wenruo <wqu(a)suse.com>
Signed-off-by: Nikolay Borisov <nborisov(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 243bd7bd79cd..e12fd3abd689 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3484,16 +3484,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
*/
fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
- /*
- * Flag our filesystem as having big metadata blocks if they are bigger
- * than the page size.
- */
- if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) {
- if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
- btrfs_info(fs_info,
- "flagging fs with big metadata feature");
- features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
- }
/* Set up fs_info before parsing mount options */
nodesize = btrfs_super_nodesize(disk_super);
@@ -3534,6 +3524,17 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
btrfs_info(fs_info, "has skinny extents");
+ /*
+ * Flag our filesystem as having big metadata blocks if they are bigger
+ * than the page size.
+ */
+ if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) {
+ if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
+ btrfs_info(fs_info,
+ "flagging fs with big metadata feature");
+ features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
+ }
+
/*
* mixed block groups end up with duplicate but slightly offset
* extent buffers for the same range. It leads to corruptions
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e26b04c4c91925dba57324db177a24e18e2d0013 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov(a)suse.com>
Date: Thu, 23 Jun 2022 10:55:47 +0300
Subject: [PATCH] btrfs: properly flag filesystem with
BTRFS_FEATURE_INCOMPAT_BIG_METADATA
Commit 6f93e834fa7c seemingly inadvertently moved the code responsible
for flagging the filesystem as having BIG_METADATA to a place where
setting the flag was essentially lost. This means that
filesystems created with kernels containing this bug (starting with 5.15)
can potentially be mounted by older (pre-3.4) kernels. In reality
chances for this happening are low because there are other incompat
flags introduced in the mean time. Still the correct behavior is to set
INCOMPAT_BIG_METADATA flag and persist this in the superblock.
Fixes: 6f93e834fa7c ("btrfs: fix upper limit for max_inline for page size 64K")
CC: stable(a)vger.kernel.org # 5.4+
Reviewed-by: Qu Wenruo <wqu(a)suse.com>
Signed-off-by: Nikolay Borisov <nborisov(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 243bd7bd79cd..e12fd3abd689 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3484,16 +3484,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
*/
fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
- /*
- * Flag our filesystem as having big metadata blocks if they are bigger
- * than the page size.
- */
- if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) {
- if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
- btrfs_info(fs_info,
- "flagging fs with big metadata feature");
- features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
- }
/* Set up fs_info before parsing mount options */
nodesize = btrfs_super_nodesize(disk_super);
@@ -3534,6 +3524,17 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
btrfs_info(fs_info, "has skinny extents");
+ /*
+ * Flag our filesystem as having big metadata blocks if they are bigger
+ * than the page size.
+ */
+ if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) {
+ if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
+ btrfs_info(fs_info,
+ "flagging fs with big metadata feature");
+ features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
+ }
+
/*
* mixed block groups end up with duplicate but slightly offset
* extent buffers for the same range. It leads to corruptions
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f31f09f6be1c6c1a673e0566e258281a7bbaaa51 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef(a)toxicpanda.com>
Date: Mon, 13 Jun 2022 15:09:48 -0400
Subject: [PATCH] btrfs: tree-log: make the return value for log syncing
consistent
Currently we will return 1 or -EAGAIN if we decide we need to commit
the transaction rather than sync the log. In practice this doesn't
really matter, we interpret any !0 and !BTRFS_NO_LOG_SYNC as needing to
commit the transaction. However this makes it hard to figure out what
the correct thing to do is.
Fix this up by defining BTRFS_LOG_FORCE_COMMIT and using this in all the
places where we want to force the transaction to be committed.
CC: stable(a)vger.kernel.org # 5.15+
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9dfde1af8a64..89c6d7ff1987 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2308,7 +2308,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
btrfs_release_log_ctx_extents(&ctx);
if (ret < 0) {
/* Fallthrough and commit/free transaction. */
- ret = 1;
+ ret = BTRFS_LOG_FORCE_COMMIT;
}
/* we've logged all the items and now have a consistent
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 1201f083d4db..d898ba13285f 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -171,7 +171,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
int index = (root->log_transid + 1) % 2;
if (btrfs_need_log_full_commit(trans)) {
- ret = -EAGAIN;
+ ret = BTRFS_LOG_FORCE_COMMIT;
goto out;
}
@@ -194,7 +194,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
* writing.
*/
if (zoned && !created) {
- ret = -EAGAIN;
+ ret = BTRFS_LOG_FORCE_COMMIT;
goto out;
}
@@ -3121,7 +3121,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
/* bail out if we need to do a full commit */
if (btrfs_need_log_full_commit(trans)) {
- ret = -EAGAIN;
+ ret = BTRFS_LOG_FORCE_COMMIT;
mutex_unlock(&root->log_mutex);
goto out;
}
@@ -3222,7 +3222,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
}
btrfs_wait_tree_log_extents(log, mark);
mutex_unlock(&log_root_tree->log_mutex);
- ret = -EAGAIN;
+ ret = BTRFS_LOG_FORCE_COMMIT;
goto out;
}
@@ -3261,7 +3261,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
blk_finish_plug(&plug);
btrfs_wait_tree_log_extents(log, mark);
mutex_unlock(&log_root_tree->log_mutex);
- ret = -EAGAIN;
+ ret = BTRFS_LOG_FORCE_COMMIT;
goto out_wake_log_root;
}
@@ -5848,7 +5848,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
inode_only == LOG_INODE_ALL &&
inode->last_unlink_trans >= trans->transid) {
btrfs_set_log_full_commit(trans);
- ret = 1;
+ ret = BTRFS_LOG_FORCE_COMMIT;
goto out_unlock;
}
@@ -6562,12 +6562,12 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
bool log_dentries = false;
if (btrfs_test_opt(fs_info, NOTREELOG)) {
- ret = 1;
+ ret = BTRFS_LOG_FORCE_COMMIT;
goto end_no_trans;
}
if (btrfs_root_refs(&root->root_item) == 0) {
- ret = 1;
+ ret = BTRFS_LOG_FORCE_COMMIT;
goto end_no_trans;
}
@@ -6665,7 +6665,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
end_trans:
if (ret < 0) {
btrfs_set_log_full_commit(trans);
- ret = 1;
+ ret = BTRFS_LOG_FORCE_COMMIT;
}
if (ret)
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 1620f8170629..57ab5f3b8dc7 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -12,6 +12,9 @@
/* return value for btrfs_log_dentry_safe that means we don't need to log it at all */
#define BTRFS_NO_LOG_SYNC 256
+/* We can't use the tree log for whatever reason, force a transaction commit */
+#define BTRFS_LOG_FORCE_COMMIT (1)
+
struct btrfs_log_ctx {
int log_ret;
int log_transid;
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: 72cbc8f04fe2fa93443c0fcccb7ad91dfea3d9ce
Gitweb: https://git.kernel.org/tip/72cbc8f04fe2fa93443c0fcccb7ad91dfea3d9ce
Author: Jan Beulich <jbeulich(a)suse.com>
AuthorDate: Thu, 28 Apr 2022 16:50:29 +02:00
Committer: Borislav Petkov <bp(a)suse.de>
CommitterDate: Mon, 15 Aug 2022 10:51:23 +02:00
x86/PAT: Have pat_enabled() properly reflect state when running on Xen
After commit ID in the Fixes: tag, pat_enabled() returns false (because
of PAT initialization being suppressed in the absence of MTRRs being
announced to be available).
This has become a problem: the i915 driver now fails to initialize when
running PV on Xen (i915_gem_object_pin_map() is where I located the
induced failure), and its error handling is flaky enough to (at least
sometimes) result in a hung system.
Yet even beyond that problem the keying of the use of WC mappings to
pat_enabled() (see arch_can_pci_mmap_wc()) means that in particular
graphics frame buffer accesses would have been quite a bit less optimal
than possible.
Arrange for the function to return true in such environments, without
undermining the rest of PAT MSR management logic considering PAT to be
disabled: specifically, no writes to the PAT MSR should occur.
For the new boolean to live in .init.data, init_cache_modes() also needs
moving to .init.text (where it could/should have lived already before).
[ bp: This is the "small fix" variant for stable. It'll get replaced
with a proper PAT and MTRR detection split upstream but that is too
involved for a stable backport.
- additional touchups to commit msg. Use cpu_feature_enabled(). ]
Fixes: bdd8b6c98239 ("drm/i915: replace X86_FEATURE_PAT with pat_enabled()")
Signed-off-by: Jan Beulich <jbeulich(a)suse.com>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Acked-by: Ingo Molnar <mingo(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Cc: Juergen Gross <jgross(a)suse.com>
Cc: Lucas De Marchi <lucas.demarchi(a)intel.com>
Link: https://lore.kernel.org/r/9385fa60-fa5d-f559-a137-6608408f88b0@suse.com
---
arch/x86/mm/pat/memtype.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index d5ef64d..66a209f 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -62,6 +62,7 @@
static bool __read_mostly pat_bp_initialized;
static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT);
+static bool __initdata pat_force_disabled = !IS_ENABLED(CONFIG_X86_PAT);
static bool __read_mostly pat_bp_enabled;
static bool __read_mostly pat_cm_initialized;
@@ -86,6 +87,7 @@ void pat_disable(const char *msg_reason)
static int __init nopat(char *str)
{
pat_disable("PAT support disabled via boot option.");
+ pat_force_disabled = true;
return 0;
}
early_param("nopat", nopat);
@@ -272,7 +274,7 @@ static void pat_ap_init(u64 pat)
wrmsrl(MSR_IA32_CR_PAT, pat);
}
-void init_cache_modes(void)
+void __init init_cache_modes(void)
{
u64 pat = 0;
@@ -313,6 +315,12 @@ void init_cache_modes(void)
*/
pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) |
PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC);
+ } else if (!pat_force_disabled && cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) {
+ /*
+ * Clearly PAT is enabled underneath. Allow pat_enabled() to
+ * reflect this.
+ */
+ pat_bp_enabled = true;
}
__init_cache_modes(pat);
This bug is marked as fixed by commit:
net: core: netlink: add helper refcount dec and lock function
net: sched: add helper function to take reference to Qdisc
net: sched: extend Qdisc with rcu
net: sched: rename qdisc_destroy() to qdisc_put()
net: sched: use Qdisc rcu API instead of relying on rtnl lock
But I can't find it in any tested tree for more than 90 days.
Is it a correct commit? Please update it by replying:
#syz fix: exact-commit-title
Until then the bug is still considered open and
new crashes with the same signature are ignored.
Hi Sasha,
On 8/13/22 5:50 PM, Sasha Levin wrote:
> This is a note to let you know that I've just added the patch titled
>
> genirq: GENERIC_IRQ_IPI depends on SMP
>
> to the 5.4-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> genirq-generic_irq_ipi-depends-on-smp.patch
> and it can be found in the queue-5.4 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
This commit should not be backported further than 8190cc572981
("irqchip/mips-gic: Only register IPI domain when SMP is enabled"), which it
depends on. It looks like that commit only went back to 5.10.
Regards,
Samuel
> commit 1ac66168f6a589c3f91104eb692fab83bae9ed73
> Author: Samuel Holland <samuel(a)sholland.org>
> Date: Fri Jul 1 15:00:50 2022 -0500
>
> genirq: GENERIC_IRQ_IPI depends on SMP
>
> [ Upstream commit 0f5209fee90b4544c58b4278d944425292789967 ]
>
> The generic IPI code depends on the IRQ affinity mask being allocated
> and initialized. This will not be the case if SMP is disabled. Fix up
> the remaining driver that selected GENERIC_IRQ_IPI in a non-SMP config.
>
> Reported-by: kernel test robot <lkp(a)intel.com>
> Signed-off-by: Samuel Holland <samuel(a)sholland.org>
> Signed-off-by: Marc Zyngier <maz(a)kernel.org>
> Link: https://lore.kernel.org/r/20220701200056.46555-3-samuel@sholland.org
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
> index 20f44ef9c4c9..e50b5516bbef 100644
> --- a/drivers/irqchip/Kconfig
> +++ b/drivers/irqchip/Kconfig
> @@ -178,7 +178,7 @@ config MADERA_IRQ
> config IRQ_MIPS_CPU
> bool
> select GENERIC_IRQ_CHIP
> - select GENERIC_IRQ_IPI if SYS_SUPPORTS_MULTITHREADING
> + select GENERIC_IRQ_IPI if SMP && SYS_SUPPORTS_MULTITHREADING
> select IRQ_DOMAIN
> select IRQ_DOMAIN_HIERARCHY if GENERIC_IRQ_IPI
> select GENERIC_IRQ_EFFECTIVE_AFF_MASK
> diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
> index 4e11120265c7..3a8a631044f0 100644
> --- a/kernel/irq/Kconfig
> +++ b/kernel/irq/Kconfig
> @@ -81,6 +81,7 @@ config IRQ_FASTEOI_HIERARCHY_HANDLERS
> # Generic IRQ IPI support
> config GENERIC_IRQ_IPI
> bool
> + depends on SMP
> select IRQ_DOMAIN_HIERARCHY
>
> # Generic MSI interrupt support
>
From: Nadav Amit <namit(a)vmware.com>
When kprobes emulates JNG/JNLE instructions on x86 it uses the wrong
condition. For JNG (opcode: 0F 8E), according to Intel SDM, the jump is
performed if (ZF == 1 or SF != OF). However the kernel emulation
currently uses 'and' instead of 'or'.
As a result, setting a kprobe on JNG/JNLE might cause the kernel to
behave incorrectly whenever the kprobe is hit.
Fix by changing the 'and' to 'or'.
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: stable(a)vger.kernel.org
Fixes: 6256e668b7af ("x86/kprobes: Use int3 instead of debug trap for single-step")
Signed-off-by: Nadav Amit <namit(a)vmware.com>
---
arch/x86/kernel/kprobes/core.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 74167dc5f55e..4c3c27b6aea3 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -505,7 +505,7 @@ static void kprobe_emulate_jcc(struct kprobe *p, struct pt_regs *regs)
match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^
((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT);
if (p->ainsn.jcc.type >= 0xe)
- match = match && (regs->flags & X86_EFLAGS_ZF);
+ match = match || (regs->flags & X86_EFLAGS_ZF);
}
__kprobe_emulate_jmp(p, regs, (match && !invert) || (!match && invert));
}
--
2.25.1
When we are not connected to a channel, sending channel "switch"
announcement doesn't make any sense.
The BSS list is empty in that case. This causes the for loop in
cfg80211_get_bss() to be bypassed, so the function returns NULL
(check line 1424 of net/wireless/scan.c), causing the WARN_ON()
in ieee80211_ibss_csa_beacon() to get triggered (check line 500
of net/mac80211/ibss.c), which was consequently reported on the
syzkaller dashboard.
Thus, check if we have an existing connection before generating
the CSA beacon in ieee80211_ibss_finish_csa().
Fixes: cd7760e62c2a ("mac80211: add support for CSA in IBSS mode")
Bug report: https://syzkaller.appspot.com/bug?id=05603ef4ae8926761b678d2939a3b2ad28ab9c…
Reported-by: syzbot+b6c9fe29aefe68e4ad34(a)syzkaller.appspotmail.com
Cc: stable(a)vger.kernel.org
Signed-off-by: Siddh Raman Pant <code(a)siddh.me>
---
The fixes commit is old, and syzkaller shows the problem exists for
4.19 and 4.14 as well, so CC'd stable list.
net/mac80211/ibss.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index d56890e3fabb..9b283bbc7bb4 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -530,6 +530,10 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata)
sdata_assert_lock(sdata);
+ /* When not connected/joined, sending CSA doesn't make sense. */
+ if (ifibss->state != IEEE80211_IBSS_MLME_JOINED)
+ return -ENOLINK;
+
/* update cfg80211 bss information with the new channel */
if (!is_zero_ether_addr(ifibss->bssid)) {
cbss = cfg80211_get_bss(sdata->local->hw.wiphy,
--
2.35.1
Concurrent accesses to the tpm chip are prevented by allowing only a
single thread at a time to obtain a tpm chip reference through
tpm_try_get_ops(). However, the tpm's suspend function does not use
this mechanism, so when the tpm api is called by a kthread which
does not get frozen on suspend (such as the hw_random kthread)
it's possible that the tpm is used when already in suspend, or
in use while in the process of suspending.
This is seen on certain ChromeOS platforms - low-probability warnings
are generated during suspend. In this case, the tpm attempted to read data
from a tpm chip on an already-suspended bus.
i2c_designware i2c_designware.1: Transfer while suspended
Fix:
1. prevent concurrent execution of tpm accesses and suspend/
resume, by letting suspend/resume grab the tpm_mutex.
2. before commencing a tpm access, check if the tpm chip is already
suspended. Fail with -EAGAIN if so.
Tested by running 6000 suspend/resume cycles back-to-back on a
ChromeOS "brya" device. The intermittent warnings reliably
disappear after applying this patch. No system issues were observed.
Cc: <stable(a)vger.kernel.org>
Fixes: e891db1a18bf ("tpm: turn on TPM on suspend for TPM 1.x")
Signed-off-by: Sven van Ashbrook <svenva(a)chromium.org>
---
drivers/char/tpm/tpm-interface.c | 16 ++++++++++++++++
include/linux/tpm.h | 2 ++
2 files changed, 18 insertions(+)
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index 1621ce818705..16ca490fd483 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -82,6 +82,11 @@ static ssize_t tpm_try_transmit(struct tpm_chip *chip, void *buf, size_t bufsiz)
return -E2BIG;
}
+ if (chip->is_suspended) {
+ dev_info(&chip->dev, "blocking transmit while suspended\n");
+ return -EAGAIN;
+ }
+
rc = chip->ops->send(chip, buf, count);
if (rc < 0) {
if (rc != -EPIPE)
@@ -394,6 +399,8 @@ int tpm_pm_suspend(struct device *dev)
if (!chip)
return -ENODEV;
+ mutex_lock(&chip->tpm_mutex);
+
if (chip->flags & TPM_CHIP_FLAG_ALWAYS_POWERED)
goto suspended;
@@ -411,6 +418,11 @@ int tpm_pm_suspend(struct device *dev)
}
suspended:
+ if (!rc)
+ chip->is_suspended = true;
+
+ mutex_unlock(&chip->tpm_mutex);
+
return rc;
}
EXPORT_SYMBOL_GPL(tpm_pm_suspend);
@@ -426,6 +438,10 @@ int tpm_pm_resume(struct device *dev)
if (chip == NULL)
return -ENODEV;
+ mutex_lock(&chip->tpm_mutex);
+ chip->is_suspended = false;
+ mutex_unlock(&chip->tpm_mutex);
+
return 0;
}
EXPORT_SYMBOL_GPL(tpm_pm_resume);
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index d7c67581929f..0fbc1a43ae80 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -131,6 +131,8 @@ struct tpm_chip {
int dev_num; /* /dev/tpm# */
unsigned long is_open; /* only one allowed */
+ bool is_suspended;
+
char hwrng_name[64];
struct hwrng hwrng;
--
2.37.1.559.g78731f0fdb-goog
From: Liang He <windhl(a)126.com>
[ Upstream commit d24d7bb2cd947676f9b71fb944d045e09b8b282f ]
In soc_info(), of_find_node_by_type() will return a node pointer
with refcount incremented. We should use of_node_put() when it is
not used anymore.
Acked-by: Timur Tabi <timur(a)kernel.org>
Signed-off-by: Liang He <windhl(a)126.com>
Link: https://lore.kernel.org/r/20220618060850.4058525-1-windhl@126.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/tty/serial/ucc_uart.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/tty/serial/ucc_uart.c b/drivers/tty/serial/ucc_uart.c
index 481eb2989a1e..ed1658b61e54 100644
--- a/drivers/tty/serial/ucc_uart.c
+++ b/drivers/tty/serial/ucc_uart.c
@@ -1143,6 +1143,8 @@ static unsigned int soc_info(unsigned int *rev_h, unsigned int *rev_l)
/* No compatible property, so try the name. */
soc_string = np->name;
+ of_node_put(np);
+
/* Extract the SOC number from the "PowerPC," string */
if ((sscanf(soc_string, "PowerPC,%u", &soc) != 1) || !soc)
return 0;
--
2.35.1
From: Liang He <windhl(a)126.com>
[ Upstream commit d24d7bb2cd947676f9b71fb944d045e09b8b282f ]
In soc_info(), of_find_node_by_type() will return a node pointer
with refcount incremented. We should use of_node_put() when it is
not used anymore.
Acked-by: Timur Tabi <timur(a)kernel.org>
Signed-off-by: Liang He <windhl(a)126.com>
Link: https://lore.kernel.org/r/20220618060850.4058525-1-windhl@126.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/tty/serial/ucc_uart.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/tty/serial/ucc_uart.c b/drivers/tty/serial/ucc_uart.c
index 55b702775786..40b8e414f48f 100644
--- a/drivers/tty/serial/ucc_uart.c
+++ b/drivers/tty/serial/ucc_uart.c
@@ -1143,6 +1143,8 @@ static unsigned int soc_info(unsigned int *rev_h, unsigned int *rev_l)
/* No compatible property, so try the name. */
soc_string = np->name;
+ of_node_put(np);
+
/* Extract the SOC number from the "PowerPC," string */
if ((sscanf(soc_string, "PowerPC,%u", &soc) != 1) || !soc)
return 0;
--
2.35.1
From: Guenter Roeck <linux(a)roeck-us.net>
[ Upstream commit 0cc011c576aaa4de505046f7a6c90933d7c749a9 ]
In some circumstances, attempts are made to add entries to or to remove
entries from an uninitialized list. A prime example is
amdgpu_bo_vm_destroy(): It is indirectly called from
ttm_bo_init_reserved() if that function fails, and tries to remove an
entry from a list. However, that list is only initialized in
amdgpu_bo_create_vm() after the call to ttm_bo_init_reserved() returned
success. This results in crashes such as
BUG: kernel NULL pointer dereference, address: 0000000000000000
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
PGD 0 P4D 0
Oops: 0000 [#1] PREEMPT SMP NOPTI
CPU: 1 PID: 1479 Comm: chrome Not tainted 5.10.110-15768-g29a72e65dae5
Hardware name: Google Grunt/Grunt, BIOS Google_Grunt.11031.149.0 07/15/2020
RIP: 0010:__list_del_entry_valid+0x26/0x7d
...
Call Trace:
amdgpu_bo_vm_destroy+0x48/0x8b
ttm_bo_init_reserved+0x1d7/0x1e0
amdgpu_bo_create+0x212/0x476
? amdgpu_bo_user_destroy+0x23/0x23
? kmem_cache_alloc+0x60/0x271
amdgpu_bo_create_vm+0x40/0x7d
amdgpu_vm_pt_create+0xe8/0x24b
...
Check if the list's prev and next pointers are NULL to catch such problems.
Link: https://lkml.kernel.org/r/20220531222951.92073-1-linux@roeck-us.net
Signed-off-by: Guenter Roeck <linux(a)roeck-us.net>
Cc: Steven Rostedt <rostedt(a)goodmis.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
lib/list_debug.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 5d5424b51b74..413daa72a3d8 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -20,7 +20,11 @@
bool __list_add_valid(struct list_head *new, struct list_head *prev,
struct list_head *next)
{
- if (CHECK_DATA_CORRUPTION(next->prev != prev,
+ if (CHECK_DATA_CORRUPTION(prev == NULL,
+ "list_add corruption. prev is NULL.\n") ||
+ CHECK_DATA_CORRUPTION(next == NULL,
+ "list_add corruption. next is NULL.\n") ||
+ CHECK_DATA_CORRUPTION(next->prev != prev,
"list_add corruption. next->prev should be prev (%px), but was %px. (next=%px).\n",
prev, next->prev, next) ||
CHECK_DATA_CORRUPTION(prev->next != next,
@@ -42,7 +46,11 @@ bool __list_del_entry_valid(struct list_head *entry)
prev = entry->prev;
next = entry->next;
- if (CHECK_DATA_CORRUPTION(next == LIST_POISON1,
+ if (CHECK_DATA_CORRUPTION(next == NULL,
+ "list_del corruption, %px->next is NULL\n", entry) ||
+ CHECK_DATA_CORRUPTION(prev == NULL,
+ "list_del corruption, %px->prev is NULL\n", entry) ||
+ CHECK_DATA_CORRUPTION(next == LIST_POISON1,
"list_del corruption, %px->next is LIST_POISON1 (%px)\n",
entry, LIST_POISON1) ||
CHECK_DATA_CORRUPTION(prev == LIST_POISON2,
--
2.35.1
From: Guenter Roeck <linux(a)roeck-us.net>
[ Upstream commit 0cc011c576aaa4de505046f7a6c90933d7c749a9 ]
In some circumstances, attempts are made to add entries to or to remove
entries from an uninitialized list. A prime example is
amdgpu_bo_vm_destroy(): It is indirectly called from
ttm_bo_init_reserved() if that function fails, and tries to remove an
entry from a list. However, that list is only initialized in
amdgpu_bo_create_vm() after the call to ttm_bo_init_reserved() returned
success. This results in crashes such as
BUG: kernel NULL pointer dereference, address: 0000000000000000
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
PGD 0 P4D 0
Oops: 0000 [#1] PREEMPT SMP NOPTI
CPU: 1 PID: 1479 Comm: chrome Not tainted 5.10.110-15768-g29a72e65dae5
Hardware name: Google Grunt/Grunt, BIOS Google_Grunt.11031.149.0 07/15/2020
RIP: 0010:__list_del_entry_valid+0x26/0x7d
...
Call Trace:
amdgpu_bo_vm_destroy+0x48/0x8b
ttm_bo_init_reserved+0x1d7/0x1e0
amdgpu_bo_create+0x212/0x476
? amdgpu_bo_user_destroy+0x23/0x23
? kmem_cache_alloc+0x60/0x271
amdgpu_bo_create_vm+0x40/0x7d
amdgpu_vm_pt_create+0xe8/0x24b
...
Check if the list's prev and next pointers are NULL to catch such problems.
Link: https://lkml.kernel.org/r/20220531222951.92073-1-linux@roeck-us.net
Signed-off-by: Guenter Roeck <linux(a)roeck-us.net>
Cc: Steven Rostedt <rostedt(a)goodmis.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
lib/list_debug.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 5d5424b51b74..413daa72a3d8 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -20,7 +20,11 @@
bool __list_add_valid(struct list_head *new, struct list_head *prev,
struct list_head *next)
{
- if (CHECK_DATA_CORRUPTION(next->prev != prev,
+ if (CHECK_DATA_CORRUPTION(prev == NULL,
+ "list_add corruption. prev is NULL.\n") ||
+ CHECK_DATA_CORRUPTION(next == NULL,
+ "list_add corruption. next is NULL.\n") ||
+ CHECK_DATA_CORRUPTION(next->prev != prev,
"list_add corruption. next->prev should be prev (%px), but was %px. (next=%px).\n",
prev, next->prev, next) ||
CHECK_DATA_CORRUPTION(prev->next != next,
@@ -42,7 +46,11 @@ bool __list_del_entry_valid(struct list_head *entry)
prev = entry->prev;
next = entry->next;
- if (CHECK_DATA_CORRUPTION(next == LIST_POISON1,
+ if (CHECK_DATA_CORRUPTION(next == NULL,
+ "list_del corruption, %px->next is NULL\n", entry) ||
+ CHECK_DATA_CORRUPTION(prev == NULL,
+ "list_del corruption, %px->prev is NULL\n", entry) ||
+ CHECK_DATA_CORRUPTION(next == LIST_POISON1,
"list_del corruption, %px->next is LIST_POISON1 (%px)\n",
entry, LIST_POISON1) ||
CHECK_DATA_CORRUPTION(prev == LIST_POISON2,
--
2.35.1
From: Guenter Roeck <linux(a)roeck-us.net>
[ Upstream commit 0cc011c576aaa4de505046f7a6c90933d7c749a9 ]
In some circumstances, attempts are made to add entries to or to remove
entries from an uninitialized list. A prime example is
amdgpu_bo_vm_destroy(): It is indirectly called from
ttm_bo_init_reserved() if that function fails, and tries to remove an
entry from a list. However, that list is only initialized in
amdgpu_bo_create_vm() after the call to ttm_bo_init_reserved() returned
success. This results in crashes such as
BUG: kernel NULL pointer dereference, address: 0000000000000000
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
PGD 0 P4D 0
Oops: 0000 [#1] PREEMPT SMP NOPTI
CPU: 1 PID: 1479 Comm: chrome Not tainted 5.10.110-15768-g29a72e65dae5
Hardware name: Google Grunt/Grunt, BIOS Google_Grunt.11031.149.0 07/15/2020
RIP: 0010:__list_del_entry_valid+0x26/0x7d
...
Call Trace:
amdgpu_bo_vm_destroy+0x48/0x8b
ttm_bo_init_reserved+0x1d7/0x1e0
amdgpu_bo_create+0x212/0x476
? amdgpu_bo_user_destroy+0x23/0x23
? kmem_cache_alloc+0x60/0x271
amdgpu_bo_create_vm+0x40/0x7d
amdgpu_vm_pt_create+0xe8/0x24b
...
Check if the list's prev and next pointers are NULL to catch such problems.
Link: https://lkml.kernel.org/r/20220531222951.92073-1-linux@roeck-us.net
Signed-off-by: Guenter Roeck <linux(a)roeck-us.net>
Cc: Steven Rostedt <rostedt(a)goodmis.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
lib/list_debug.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 5d5424b51b74..413daa72a3d8 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -20,7 +20,11 @@
bool __list_add_valid(struct list_head *new, struct list_head *prev,
struct list_head *next)
{
- if (CHECK_DATA_CORRUPTION(next->prev != prev,
+ if (CHECK_DATA_CORRUPTION(prev == NULL,
+ "list_add corruption. prev is NULL.\n") ||
+ CHECK_DATA_CORRUPTION(next == NULL,
+ "list_add corruption. next is NULL.\n") ||
+ CHECK_DATA_CORRUPTION(next->prev != prev,
"list_add corruption. next->prev should be prev (%px), but was %px. (next=%px).\n",
prev, next->prev, next) ||
CHECK_DATA_CORRUPTION(prev->next != next,
@@ -42,7 +46,11 @@ bool __list_del_entry_valid(struct list_head *entry)
prev = entry->prev;
next = entry->next;
- if (CHECK_DATA_CORRUPTION(next == LIST_POISON1,
+ if (CHECK_DATA_CORRUPTION(next == NULL,
+ "list_del corruption, %px->next is NULL\n", entry) ||
+ CHECK_DATA_CORRUPTION(prev == NULL,
+ "list_del corruption, %px->prev is NULL\n", entry) ||
+ CHECK_DATA_CORRUPTION(next == LIST_POISON1,
"list_del corruption, %px->next is LIST_POISON1 (%px)\n",
entry, LIST_POISON1) ||
CHECK_DATA_CORRUPTION(prev == LIST_POISON2,
--
2.35.1
From: Guenter Roeck <linux(a)roeck-us.net>
[ Upstream commit 0cc011c576aaa4de505046f7a6c90933d7c749a9 ]
In some circumstances, attempts are made to add entries to or to remove
entries from an uninitialized list. A prime example is
amdgpu_bo_vm_destroy(): It is indirectly called from
ttm_bo_init_reserved() if that function fails, and tries to remove an
entry from a list. However, that list is only initialized in
amdgpu_bo_create_vm() after the call to ttm_bo_init_reserved() returned
success. This results in crashes such as
BUG: kernel NULL pointer dereference, address: 0000000000000000
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
PGD 0 P4D 0
Oops: 0000 [#1] PREEMPT SMP NOPTI
CPU: 1 PID: 1479 Comm: chrome Not tainted 5.10.110-15768-g29a72e65dae5
Hardware name: Google Grunt/Grunt, BIOS Google_Grunt.11031.149.0 07/15/2020
RIP: 0010:__list_del_entry_valid+0x26/0x7d
...
Call Trace:
amdgpu_bo_vm_destroy+0x48/0x8b
ttm_bo_init_reserved+0x1d7/0x1e0
amdgpu_bo_create+0x212/0x476
? amdgpu_bo_user_destroy+0x23/0x23
? kmem_cache_alloc+0x60/0x271
amdgpu_bo_create_vm+0x40/0x7d
amdgpu_vm_pt_create+0xe8/0x24b
...
Check if the list's prev and next pointers are NULL to catch such problems.
Link: https://lkml.kernel.org/r/20220531222951.92073-1-linux@roeck-us.net
Signed-off-by: Guenter Roeck <linux(a)roeck-us.net>
Cc: Steven Rostedt <rostedt(a)goodmis.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
lib/list_debug.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 9daa3fb9d1cd..d98d43f80958 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -20,7 +20,11 @@
bool __list_add_valid(struct list_head *new, struct list_head *prev,
struct list_head *next)
{
- if (CHECK_DATA_CORRUPTION(next->prev != prev,
+ if (CHECK_DATA_CORRUPTION(prev == NULL,
+ "list_add corruption. prev is NULL.\n") ||
+ CHECK_DATA_CORRUPTION(next == NULL,
+ "list_add corruption. next is NULL.\n") ||
+ CHECK_DATA_CORRUPTION(next->prev != prev,
"list_add corruption. next->prev should be prev (%px), but was %px. (next=%px).\n",
prev, next->prev, next) ||
CHECK_DATA_CORRUPTION(prev->next != next,
@@ -42,7 +46,11 @@ bool __list_del_entry_valid(struct list_head *entry)
prev = entry->prev;
next = entry->next;
- if (CHECK_DATA_CORRUPTION(next == LIST_POISON1,
+ if (CHECK_DATA_CORRUPTION(next == NULL,
+ "list_del corruption, %px->next is NULL\n", entry) ||
+ CHECK_DATA_CORRUPTION(prev == NULL,
+ "list_del corruption, %px->prev is NULL\n", entry) ||
+ CHECK_DATA_CORRUPTION(next == LIST_POISON1,
"list_del corruption, %px->next is LIST_POISON1 (%px)\n",
entry, LIST_POISON1) ||
CHECK_DATA_CORRUPTION(prev == LIST_POISON2,
--
2.35.1
From: Tao Jin <tao-j(a)outlook.com>
[ Upstream commit 54eed5c7b938dc4ef6b14d4ee048bbdafdbce352 ]
The trackpad of the given device sends continuous report of pointers
status as per wxn8 spec. However, the spec did not clarify when the
fingers are lifted so fast that between the interval of two report
frames fingers on pad reduced from >=2 to 0. The second last report
contains >=2 fingers with tip state 1 and the last report contains only
1 finger with tip state 0. Although this can happen unfrequently, a
quick fix will be improve the consistency to 100%. A quick fix is to
disable MT_QUIRK_ALWAYS_VALID and enable MT_QUIRK_NOT_SEEN_MEANS_UP.
Test for hid-tools is added in [1]
In addition to this, I2C device 04CA:00B1 may also need similar class
but with MT_QUIRK_FORCE_MULTI_INPUT disabled (but it does not harm to
enable it on non-multi-input device either). The respective owner has
been notified and a patch may coming soon after test.
[1]: https://gitlab.freedesktop.org/libevdev/hid-tools/-/merge_requests/130
Signed-off-by: Tao Jin <tao-j(a)outlook.com>
Signed-off-by: Jiri Kosina <jkosina(a)suse.cz>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/hid/hid-multitouch.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index f382444dc2db..a14c48de4446 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -194,6 +194,7 @@ static void mt_post_parse(struct mt_device *td, struct mt_application *app);
#define MT_CLS_WIN_8_FORCE_MULTI_INPUT 0x0015
#define MT_CLS_WIN_8_DISABLE_WAKEUP 0x0016
#define MT_CLS_WIN_8_NO_STICKY_FINGERS 0x0017
+#define MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU 0x0018
/* vendor specific classes */
#define MT_CLS_3M 0x0101
@@ -286,6 +287,15 @@ static const struct mt_class mt_classes[] = {
MT_QUIRK_WIN8_PTP_BUTTONS |
MT_QUIRK_FORCE_MULTI_INPUT,
.export_all_inputs = true },
+ { .name = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU,
+ .quirks = MT_QUIRK_IGNORE_DUPLICATES |
+ MT_QUIRK_HOVERING |
+ MT_QUIRK_CONTACT_CNT_ACCURATE |
+ MT_QUIRK_STICKY_FINGERS |
+ MT_QUIRK_WIN8_PTP_BUTTONS |
+ MT_QUIRK_FORCE_MULTI_INPUT |
+ MT_QUIRK_NOT_SEEN_MEANS_UP,
+ .export_all_inputs = true },
{ .name = MT_CLS_WIN_8_DISABLE_WAKEUP,
.quirks = MT_QUIRK_ALWAYS_VALID |
MT_QUIRK_IGNORE_DUPLICATES |
@@ -783,6 +793,7 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
case HID_DG_CONFIDENCE:
if ((cls->name == MT_CLS_WIN_8 ||
cls->name == MT_CLS_WIN_8_FORCE_MULTI_INPUT ||
+ cls->name == MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU ||
cls->name == MT_CLS_WIN_8_DISABLE_WAKEUP) &&
(field->application == HID_DG_TOUCHPAD ||
field->application == HID_DG_TOUCHSCREEN))
@@ -2033,7 +2044,7 @@ static const struct hid_device_id mt_devices[] = {
USB_DEVICE_ID_LENOVO_X1_TAB3) },
/* Lenovo X12 TAB Gen 1 */
- { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT,
+ { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU,
HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8,
USB_VENDOR_ID_LENOVO,
USB_DEVICE_ID_LENOVO_X12_TAB) },
--
2.35.1
From: Gil Fine <gil.fine(a)intel.com>
[ Upstream commit 5fd6b9a5cbe63fea4c490fee8af34144a139a266 ]
In case of uni-directional time sync, TMU handshake is
initiated by upstream router. In case of bi-directional
time sync, TMU handshake is initiated by downstream router.
In order to handle correctly the case of uni-directional mode,
we avoid changing the upstream router's rate to off,
because it might have another downstream router plugged that is set to
uni-directional mode (and we don't want to change its mode).
Instead, we always change downstream router's rate.
Signed-off-by: Gil Fine <gil.fine(a)intel.com>
Signed-off-by: Mika Westerberg <mika.westerberg(a)linux.intel.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/thunderbolt/tmu.c | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/drivers/thunderbolt/tmu.c b/drivers/thunderbolt/tmu.c
index e4a07a26f693..93ba1d00335b 100644
--- a/drivers/thunderbolt/tmu.c
+++ b/drivers/thunderbolt/tmu.c
@@ -359,13 +359,14 @@ int tb_switch_tmu_disable(struct tb_switch *sw)
* In case of uni-directional time sync, TMU handshake is
* initiated by upstream router. In case of bi-directional
* time sync, TMU handshake is initiated by downstream router.
- * Therefore, we change the rate to off in the respective
- * router.
+ * We change downstream router's rate to off for both uni/bidir
+ * cases although it is needed only for the bi-directional mode.
+ * We avoid changing upstream router's mode since it might
+ * have another downstream router plugged, that is set to
+ * uni-directional mode and we don't want to change it's TMU
+ * mode.
*/
- if (unidirectional)
- tb_switch_tmu_rate_write(parent, TB_SWITCH_TMU_RATE_OFF);
- else
- tb_switch_tmu_rate_write(sw, TB_SWITCH_TMU_RATE_OFF);
+ tb_switch_tmu_rate_write(sw, TB_SWITCH_TMU_RATE_OFF);
tb_port_tmu_time_sync_disable(up);
ret = tb_port_tmu_time_sync_disable(down);
--
2.35.1
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7ee951acd31a88f941fd6535fbdee3a1567f1d63 Mon Sep 17 00:00:00 2001
From: Phil Auld <pauld(a)redhat.com>
Date: Fri, 15 Jul 2022 09:49:24 -0400
Subject: [PATCH] drivers/base: fix userspace break from using bin_attributes
for cpumap and cpulist
Using bin_attributes with a 0 size causes fstat and friends to return that
0 size. This breaks userspace code that retrieves the size before reading
the file. Rather than reverting 75bd50fa841 ("drivers/base/node.c: use
bin_attribute to break the size limitation of cpumap ABI") let's put in a
size value at compile time.
For cpulist the maximum size is on the order of
NR_CPUS * (ceil(log10(NR_CPUS)) + 1)/2
which for 8192 is 20480 (8192 * 5)/2. In order to get near that you'd need
a system with every other CPU on one node. For example: (0,2,4,8, ... ).
To simplify the math and support larger NR_CPUS in the future we are using
(NR_CPUS * 7)/2. We also set it to a min of PAGE_SIZE to retain the older
behavior for smaller NR_CPUS.
The cpumap file the size works out to be NR_CPUS/4 + NR_CPUS/32 - 1
(or NR_CPUS * 9/32 - 1) including the ","s.
Add a set of macros for these values to cpumask.h so they can be used in
multiple places. Apply these to the handful of such files in
drivers/base/topology.c as well as node.c.
As an example, on an 80 cpu 4-node system (NR_CPUS == 8192):
before:
-r--r--r--. 1 root root 0 Jul 12 14:08 system/node/node0/cpulist
-r--r--r--. 1 root root 0 Jul 11 17:25 system/node/node0/cpumap
after:
-r--r--r--. 1 root root 28672 Jul 13 11:32 system/node/node0/cpulist
-r--r--r--. 1 root root 4096 Jul 13 11:31 system/node/node0/cpumap
CONFIG_NR_CPUS = 16384
-r--r--r--. 1 root root 57344 Jul 13 14:03 system/node/node0/cpulist
-r--r--r--. 1 root root 4607 Jul 13 14:02 system/node/node0/cpumap
The actual number of cpus doesn't matter for the reported size since they
are based on NR_CPUS.
Fixes: 75bd50fa841d ("drivers/base/node.c: use bin_attribute to break the size limitation of cpumap ABI")
Fixes: bb9ec13d156e ("topology: use bin_attribute to break the size limitation of cpumap ABI")
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: "Rafael J. Wysocki" <rafael(a)kernel.org>
Cc: Yury Norov <yury.norov(a)gmail.com>
Cc: stable(a)vger.kernel.org
Acked-by: Yury Norov <yury.norov(a)gmail.com> (for include/linux/cpumask.h)
Signed-off-by: Phil Auld <pauld(a)redhat.com>
Link: https://lore.kernel.org/r/20220715134924.3466194-1-pauld@redhat.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 0ac6376ef7a1..eb0f43784c2b 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -45,7 +45,7 @@ static inline ssize_t cpumap_read(struct file *file, struct kobject *kobj,
return n;
}
-static BIN_ATTR_RO(cpumap, 0);
+static BIN_ATTR_RO(cpumap, CPUMAP_FILE_MAX_BYTES);
static inline ssize_t cpulist_read(struct file *file, struct kobject *kobj,
struct bin_attribute *attr, char *buf,
@@ -66,7 +66,7 @@ static inline ssize_t cpulist_read(struct file *file, struct kobject *kobj,
return n;
}
-static BIN_ATTR_RO(cpulist, 0);
+static BIN_ATTR_RO(cpulist, CPULIST_FILE_MAX_BYTES);
/**
* struct node_access_nodes - Access class device to hold user visible
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index ac6ad9ab67f9..89f98be5c5b9 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -62,47 +62,47 @@ define_id_show_func(ppin, "0x%llx");
static DEVICE_ATTR_ADMIN_RO(ppin);
define_siblings_read_func(thread_siblings, sibling_cpumask);
-static BIN_ATTR_RO(thread_siblings, 0);
-static BIN_ATTR_RO(thread_siblings_list, 0);
+static BIN_ATTR_RO(thread_siblings, CPUMAP_FILE_MAX_BYTES);
+static BIN_ATTR_RO(thread_siblings_list, CPULIST_FILE_MAX_BYTES);
define_siblings_read_func(core_cpus, sibling_cpumask);
-static BIN_ATTR_RO(core_cpus, 0);
-static BIN_ATTR_RO(core_cpus_list, 0);
+static BIN_ATTR_RO(core_cpus, CPUMAP_FILE_MAX_BYTES);
+static BIN_ATTR_RO(core_cpus_list, CPULIST_FILE_MAX_BYTES);
define_siblings_read_func(core_siblings, core_cpumask);
-static BIN_ATTR_RO(core_siblings, 0);
-static BIN_ATTR_RO(core_siblings_list, 0);
+static BIN_ATTR_RO(core_siblings, CPUMAP_FILE_MAX_BYTES);
+static BIN_ATTR_RO(core_siblings_list, CPULIST_FILE_MAX_BYTES);
#ifdef TOPOLOGY_CLUSTER_SYSFS
define_siblings_read_func(cluster_cpus, cluster_cpumask);
-static BIN_ATTR_RO(cluster_cpus, 0);
-static BIN_ATTR_RO(cluster_cpus_list, 0);
+static BIN_ATTR_RO(cluster_cpus, CPUMAP_FILE_MAX_BYTES);
+static BIN_ATTR_RO(cluster_cpus_list, CPULIST_FILE_MAX_BYTES);
#endif
#ifdef TOPOLOGY_DIE_SYSFS
define_siblings_read_func(die_cpus, die_cpumask);
-static BIN_ATTR_RO(die_cpus, 0);
-static BIN_ATTR_RO(die_cpus_list, 0);
+static BIN_ATTR_RO(die_cpus, CPUMAP_FILE_MAX_BYTES);
+static BIN_ATTR_RO(die_cpus_list, CPULIST_FILE_MAX_BYTES);
#endif
define_siblings_read_func(package_cpus, core_cpumask);
-static BIN_ATTR_RO(package_cpus, 0);
-static BIN_ATTR_RO(package_cpus_list, 0);
+static BIN_ATTR_RO(package_cpus, CPUMAP_FILE_MAX_BYTES);
+static BIN_ATTR_RO(package_cpus_list, CPULIST_FILE_MAX_BYTES);
#ifdef TOPOLOGY_BOOK_SYSFS
define_id_show_func(book_id, "%d");
static DEVICE_ATTR_RO(book_id);
define_siblings_read_func(book_siblings, book_cpumask);
-static BIN_ATTR_RO(book_siblings, 0);
-static BIN_ATTR_RO(book_siblings_list, 0);
+static BIN_ATTR_RO(book_siblings, CPUMAP_FILE_MAX_BYTES);
+static BIN_ATTR_RO(book_siblings_list, CPULIST_FILE_MAX_BYTES);
#endif
#ifdef TOPOLOGY_DRAWER_SYSFS
define_id_show_func(drawer_id, "%d");
static DEVICE_ATTR_RO(drawer_id);
define_siblings_read_func(drawer_siblings, drawer_cpumask);
-static BIN_ATTR_RO(drawer_siblings, 0);
-static BIN_ATTR_RO(drawer_siblings_list, 0);
+static BIN_ATTR_RO(drawer_siblings, CPUMAP_FILE_MAX_BYTES);
+static BIN_ATTR_RO(drawer_siblings_list, CPULIST_FILE_MAX_BYTES);
#endif
static struct bin_attribute *bin_attrs[] = {
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index fe29ac7cc469..4592d0845941 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -1071,4 +1071,22 @@ cpumap_print_list_to_buf(char *buf, const struct cpumask *mask,
[0] = 1UL \
} }
+/*
+ * Provide a valid theoretical max size for cpumap and cpulist sysfs files
+ * to avoid breaking userspace which may allocate a buffer based on the size
+ * reported by e.g. fstat.
+ *
+ * for cpumap NR_CPUS * 9/32 - 1 should be an exact length.
+ *
+ * For cpulist 7 is (ceil(log10(NR_CPUS)) + 1) allowing for NR_CPUS to be up
+ * to 2 orders of magnitude larger than 8192. And then we divide by 2 to
+ * cover a worst-case of every other cpu being on one of two nodes for a
+ * very large NR_CPUS.
+ *
+ * Use PAGE_SIZE as a minimum for smaller configurations.
+ */
+#define CPUMAP_FILE_MAX_BYTES ((((NR_CPUS * 9)/32 - 1) > PAGE_SIZE) \
+ ? (NR_CPUS * 9)/32 - 1 : PAGE_SIZE)
+#define CPULIST_FILE_MAX_BYTES (((NR_CPUS * 7)/2 > PAGE_SIZE) ? (NR_CPUS * 7)/2 : PAGE_SIZE)
+
#endif /* __LINUX_CPUMASK_H */
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 436d219069628f0f0ed27f606224d4ee02a0ca17 Mon Sep 17 00:00:00 2001
From: Werner Sembach <wse(a)tuxedocomputers.com>
Date: Fri, 8 Jul 2022 13:17:38 -0700
Subject: [PATCH] Input: i8042 - add additional TUXEDO devices to i8042 quirk
tables
A lot of modern Clevo barebones have touchpad and/or keyboard issues after
suspend fixable with nomux + reset + noloop + nopnp. Luckily, none of them
have an external PS/2 port so this can safely be set for all of them.
I'm not entirely sure if every device listed really needs all four quirks,
but after testing and production use. No negative effects could be
observed when setting all four.
Signed-off-by: Werner Sembach <wse(a)tuxedocomputers.com>
Reviewed-by: Hans de Goede <hdegoede(a)redhat.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20220708161005.1251929-2-wse@tuxedocomputers.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov(a)gmail.com>
diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index 50b090e77fca..5204a7dd61d4 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -900,14 +900,6 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
},
.driver_data = (void *)(SERIO_QUIRK_NOMUX)
},
- {
- /* Clevo P650RS, 650RP6, Sager NP8152-S, and others */
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),
- DMI_MATCH(DMI_PRODUCT_NAME, "P65xRP"),
- },
- .driver_data = (void *)(SERIO_QUIRK_RESET_ALWAYS)
- },
{
/* OQO Model 01 */
.matches = {
@@ -1162,6 +1154,74 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
.driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
},
+ {
+ /*
+ * This is only a partial board_name and might be followed by
+ * another letter or number. DMI_MATCH however does do partial
+ * matching.
+ */
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "P65xH"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+ /* Clevo P650RS, 650RP6, Sager NP8152-S, and others */
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "P65xRP"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+ /*
+ * This is only a partial board_name and might be followed by
+ * another letter or number. DMI_MATCH however does do partial
+ * matching.
+ */
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "P65_P67H"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+ /*
+ * This is only a partial board_name and might be followed by
+ * another letter or number. DMI_MATCH however does do partial
+ * matching.
+ */
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "P65_67RP"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+ /*
+ * This is only a partial board_name and might be followed by
+ * another letter or number. DMI_MATCH however does do partial
+ * matching.
+ */
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "P65_67RS"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+ /*
+ * This is only a partial board_name and might be followed by
+ * another letter or number. DMI_MATCH however does do partial
+ * matching.
+ */
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "P67xRP"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
{
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "PB50_70DFx,DDx"),
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 436d219069628f0f0ed27f606224d4ee02a0ca17 Mon Sep 17 00:00:00 2001
From: Werner Sembach <wse(a)tuxedocomputers.com>
Date: Fri, 8 Jul 2022 13:17:38 -0700
Subject: [PATCH] Input: i8042 - add additional TUXEDO devices to i8042 quirk
tables
A lot of modern Clevo barebones have touchpad and/or keyboard issues after
suspend fixable with nomux + reset + noloop + nopnp. Luckily, none of them
have an external PS/2 port so this can safely be set for all of them.
I'm not entirely sure if every device listed really needs all four quirks,
but after testing and production use. No negative effects could be
observed when setting all four.
Signed-off-by: Werner Sembach <wse(a)tuxedocomputers.com>
Reviewed-by: Hans de Goede <hdegoede(a)redhat.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20220708161005.1251929-2-wse@tuxedocomputers.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov(a)gmail.com>
diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index 50b090e77fca..5204a7dd61d4 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -900,14 +900,6 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
},
.driver_data = (void *)(SERIO_QUIRK_NOMUX)
},
- {
- /* Clevo P650RS, 650RP6, Sager NP8152-S, and others */
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),
- DMI_MATCH(DMI_PRODUCT_NAME, "P65xRP"),
- },
- .driver_data = (void *)(SERIO_QUIRK_RESET_ALWAYS)
- },
{
/* OQO Model 01 */
.matches = {
@@ -1162,6 +1154,74 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
.driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
},
+ {
+ /*
+ * This is only a partial board_name and might be followed by
+ * another letter or number. DMI_MATCH however does do partial
+ * matching.
+ */
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "P65xH"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+ /* Clevo P650RS, 650RP6, Sager NP8152-S, and others */
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "P65xRP"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+ /*
+ * This is only a partial board_name and might be followed by
+ * another letter or number. DMI_MATCH however does do partial
+ * matching.
+ */
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "P65_P67H"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+ /*
+ * This is only a partial board_name and might be followed by
+ * another letter or number. DMI_MATCH however does do partial
+ * matching.
+ */
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "P65_67RP"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+ /*
+ * This is only a partial board_name and might be followed by
+ * another letter or number. DMI_MATCH however does do partial
+ * matching.
+ */
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "P65_67RS"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+ /*
+ * This is only a partial board_name and might be followed by
+ * another letter or number. DMI_MATCH however does do partial
+ * matching.
+ */
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "P67xRP"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
{
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "PB50_70DFx,DDx"),
The patch below does not apply to the 5.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 436d219069628f0f0ed27f606224d4ee02a0ca17 Mon Sep 17 00:00:00 2001
From: Werner Sembach <wse(a)tuxedocomputers.com>
Date: Fri, 8 Jul 2022 13:17:38 -0700
Subject: [PATCH] Input: i8042 - add additional TUXEDO devices to i8042 quirk
tables
A lot of modern Clevo barebones have touchpad and/or keyboard issues after
suspend fixable with nomux + reset + noloop + nopnp. Luckily, none of them
have an external PS/2 port so this can safely be set for all of them.
I'm not entirely sure if every device listed really needs all four quirks,
but after testing and production use. No negative effects could be
observed when setting all four.
Signed-off-by: Werner Sembach <wse(a)tuxedocomputers.com>
Reviewed-by: Hans de Goede <hdegoede(a)redhat.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20220708161005.1251929-2-wse@tuxedocomputers.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov(a)gmail.com>
diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index 50b090e77fca..5204a7dd61d4 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -900,14 +900,6 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
},
.driver_data = (void *)(SERIO_QUIRK_NOMUX)
},
- {
- /* Clevo P650RS, 650RP6, Sager NP8152-S, and others */
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),
- DMI_MATCH(DMI_PRODUCT_NAME, "P65xRP"),
- },
- .driver_data = (void *)(SERIO_QUIRK_RESET_ALWAYS)
- },
{
/* OQO Model 01 */
.matches = {
@@ -1162,6 +1154,74 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
.driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
},
+ {
+ /*
+ * This is only a partial board_name and might be followed by
+ * another letter or number. DMI_MATCH however does do partial
+ * matching.
+ */
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "P65xH"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+ /* Clevo P650RS, 650RP6, Sager NP8152-S, and others */
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "P65xRP"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+ /*
+ * This is only a partial board_name and might be followed by
+ * another letter or number. DMI_MATCH however does do partial
+ * matching.
+ */
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "P65_P67H"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+ /*
+ * This is only a partial board_name and might be followed by
+ * another letter or number. DMI_MATCH however does do partial
+ * matching.
+ */
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "P65_67RP"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+ /*
+ * This is only a partial board_name and might be followed by
+ * another letter or number. DMI_MATCH however does do partial
+ * matching.
+ */
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "P65_67RS"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+ /*
+ * This is only a partial board_name and might be followed by
+ * another letter or number. DMI_MATCH however does do partial
+ * matching.
+ */
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "P67xRP"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
{
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "PB50_70DFx,DDx"),
The patch below does not apply to the 5.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 436d219069628f0f0ed27f606224d4ee02a0ca17 Mon Sep 17 00:00:00 2001
From: Werner Sembach <wse(a)tuxedocomputers.com>
Date: Fri, 8 Jul 2022 13:17:38 -0700
Subject: [PATCH] Input: i8042 - add additional TUXEDO devices to i8042 quirk
tables
A lot of modern Clevo barebones have touchpad and/or keyboard issues after
suspend fixable with nomux + reset + noloop + nopnp. Luckily, none of them
have an external PS/2 port so this can safely be set for all of them.
I'm not entirely sure if every device listed really needs all four quirks,
but after testing and production use. No negative effects could be
observed when setting all four.
Signed-off-by: Werner Sembach <wse(a)tuxedocomputers.com>
Reviewed-by: Hans de Goede <hdegoede(a)redhat.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20220708161005.1251929-2-wse@tuxedocomputers.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov(a)gmail.com>
diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index 50b090e77fca..5204a7dd61d4 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -900,14 +900,6 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
},
.driver_data = (void *)(SERIO_QUIRK_NOMUX)
},
- {
- /* Clevo P650RS, 650RP6, Sager NP8152-S, and others */
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),
- DMI_MATCH(DMI_PRODUCT_NAME, "P65xRP"),
- },
- .driver_data = (void *)(SERIO_QUIRK_RESET_ALWAYS)
- },
{
/* OQO Model 01 */
.matches = {
@@ -1162,6 +1154,74 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
.driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
},
+ {
+ /*
+ * This is only a partial board_name and might be followed by
+ * another letter or number. DMI_MATCH however does do partial
+ * matching.
+ */
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "P65xH"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+ /* Clevo P650RS, 650RP6, Sager NP8152-S, and others */
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "P65xRP"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+ /*
+ * This is only a partial board_name and might be followed by
+ * another letter or number. DMI_MATCH however does do partial
+ * matching.
+ */
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "P65_P67H"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+ /*
+ * This is only a partial board_name and might be followed by
+ * another letter or number. DMI_MATCH however does do partial
+ * matching.
+ */
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "P65_67RP"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+ /*
+ * This is only a partial board_name and might be followed by
+ * another letter or number. DMI_MATCH however does do partial
+ * matching.
+ */
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "P65_67RS"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+ /*
+ * This is only a partial board_name and might be followed by
+ * another letter or number. DMI_MATCH however does do partial
+ * matching.
+ */
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "P67xRP"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
{
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "PB50_70DFx,DDx"),
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From cc18cc5e82033d406f54144ad6f8092206004684 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Thu, 4 Aug 2022 15:13:46 +0100
Subject: [PATCH] io_uring: mem-account pbuf buckets
Potentially, someone may create as many pbuf bucket as there are indexes
in an xarray without any other restrictions bounding our memory usage,
put memory needed for the buckets under memory accounting.
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Link: https://lore.kernel.org/r/d34c452e45793e978d26e2606211ec9070d329ea.16596223…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index e538fa7cb727..a73f40a4cfe6 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -436,7 +436,7 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
bl = io_buffer_get_list(ctx, p->bgid);
if (unlikely(!bl)) {
- bl = kzalloc(sizeof(*bl), GFP_KERNEL);
+ bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT);
if (!bl) {
ret = -ENOMEM;
goto err;
The patch below does not apply to the 5.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From cc18cc5e82033d406f54144ad6f8092206004684 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Thu, 4 Aug 2022 15:13:46 +0100
Subject: [PATCH] io_uring: mem-account pbuf buckets
Potentially, someone may create as many pbuf bucket as there are indexes
in an xarray without any other restrictions bounding our memory usage,
put memory needed for the buckets under memory accounting.
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Link: https://lore.kernel.org/r/d34c452e45793e978d26e2606211ec9070d329ea.16596223…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index e538fa7cb727..a73f40a4cfe6 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -436,7 +436,7 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
bl = io_buffer_get_list(ctx, p->bgid);
if (unlikely(!bl)) {
- bl = kzalloc(sizeof(*bl), GFP_KERNEL);
+ bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT);
if (!bl) {
ret = -ENOMEM;
goto err;
The patch below does not apply to the 5.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From cc18cc5e82033d406f54144ad6f8092206004684 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Thu, 4 Aug 2022 15:13:46 +0100
Subject: [PATCH] io_uring: mem-account pbuf buckets
Potentially, someone may create as many pbuf bucket as there are indexes
in an xarray without any other restrictions bounding our memory usage,
put memory needed for the buckets under memory accounting.
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Link: https://lore.kernel.org/r/d34c452e45793e978d26e2606211ec9070d329ea.16596223…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index e538fa7cb727..a73f40a4cfe6 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -436,7 +436,7 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
bl = io_buffer_get_list(ctx, p->bgid);
if (unlikely(!bl)) {
- bl = kzalloc(sizeof(*bl), GFP_KERNEL);
+ bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT);
if (!bl) {
ret = -ENOMEM;
goto err;
The patch below does not apply to the 5.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f482aa98652795846cc55da98ebe331eb74f3d0b Mon Sep 17 00:00:00 2001
From: Peilin Ye <peilin.ye(a)bytedance.com>
Date: Wed, 3 Aug 2022 15:23:43 -0700
Subject: [PATCH] audit, io_uring, io-wq: Fix memory leak in io_sq_thread() and
io_wqe_worker()
Currently @audit_context is allocated twice for io_uring workers:
1. copy_process() calls audit_alloc();
2. io_sq_thread() or io_wqe_worker() calls audit_alloc_kernel() (which
is effectively audit_alloc()) and overwrites @audit_context,
causing:
BUG: memory leak
unreferenced object 0xffff888144547400 (size 1024):
<...>
hex dump (first 32 bytes):
00 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 ................
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
backtrace:
[<ffffffff8135cfc3>] audit_alloc+0x133/0x210
[<ffffffff81239e63>] copy_process+0xcd3/0x2340
[<ffffffff8123b5f3>] create_io_thread+0x63/0x90
[<ffffffff81686604>] create_io_worker+0xb4/0x230
[<ffffffff81686f68>] io_wqe_enqueue+0x248/0x3b0
[<ffffffff8167663a>] io_queue_iowq+0xba/0x200
[<ffffffff816768b3>] io_queue_async+0x113/0x180
[<ffffffff816840df>] io_req_task_submit+0x18f/0x1a0
[<ffffffff816841cd>] io_apoll_task_func+0xdd/0x120
[<ffffffff8167d49f>] tctx_task_work+0x11f/0x570
[<ffffffff81272c4e>] task_work_run+0x7e/0xc0
[<ffffffff8125a688>] get_signal+0xc18/0xf10
[<ffffffff8111645b>] arch_do_signal_or_restart+0x2b/0x730
[<ffffffff812ea44e>] exit_to_user_mode_prepare+0x5e/0x180
[<ffffffff844ae1b2>] syscall_exit_to_user_mode+0x12/0x20
[<ffffffff844a7e80>] do_syscall_64+0x40/0x80
Then,
3. io_sq_thread() or io_wqe_worker() frees @audit_context using
audit_free();
4. do_exit() eventually calls audit_free() again, which is okay
because audit_free() does a NULL check.
As suggested by Paul Moore, fix it by deleting audit_alloc_kernel() and
redundant audit_free() calls.
Fixes: 5bd2182d58e9 ("audit,io_uring,io-wq: add some basic audit support to io_uring")
Suggested-by: Paul Moore <paul(a)paul-moore.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Peilin Ye <peilin.ye(a)bytedance.com>
Acked-by: Paul Moore <paul(a)paul-moore.com>
Link: https://lore.kernel.org/r/20220803222343.31673-1-yepeilin.cs@gmail.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 00f7a80f1a3e..3608992848d3 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -285,7 +285,6 @@ static inline int audit_signal_info(int sig, struct task_struct *t)
/* These are defined in auditsc.c */
/* Public API */
extern int audit_alloc(struct task_struct *task);
-extern int audit_alloc_kernel(struct task_struct *task);
extern void __audit_free(struct task_struct *task);
extern void __audit_uring_entry(u8 op);
extern void __audit_uring_exit(int success, long code);
@@ -578,10 +577,6 @@ static inline int audit_alloc(struct task_struct *task)
{
return 0;
}
-static inline int audit_alloc_kernel(struct task_struct *task)
-{
- return 0;
-}
static inline void audit_free(struct task_struct *task)
{ }
static inline void audit_uring_entry(u8 op)
diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
index 77df5b43bf52..c6536d4b2da0 100644
--- a/io_uring/io-wq.c
+++ b/io_uring/io-wq.c
@@ -624,8 +624,6 @@ static int io_wqe_worker(void *data)
snprintf(buf, sizeof(buf), "iou-wrk-%d", wq->task->pid);
set_task_comm(current, buf);
- audit_alloc_kernel(current);
-
while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
long ret;
@@ -660,7 +658,6 @@ static int io_wqe_worker(void *data)
if (test_bit(IO_WQ_BIT_EXIT, &wq->state))
io_worker_handle_work(worker);
- audit_free(current);
io_worker_exit(worker);
return 0;
}
diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c
index 76d4d70c733a..559652380672 100644
--- a/io_uring/sqpoll.c
+++ b/io_uring/sqpoll.c
@@ -235,8 +235,6 @@ static int io_sq_thread(void *data)
set_cpus_allowed_ptr(current, cpu_online_mask);
current->flags |= PF_NO_SETAFFINITY;
- audit_alloc_kernel(current);
-
mutex_lock(&sqd->lock);
while (1) {
bool cap_entries, sqt_spin = false;
@@ -310,8 +308,6 @@ static int io_sq_thread(void *data)
io_run_task_work();
mutex_unlock(&sqd->lock);
- audit_free(current);
-
complete(&sqd->exited);
do_exit(0);
}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 3a8c9d744800..dd8d9ab747c3 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1073,31 +1073,6 @@ int audit_alloc(struct task_struct *tsk)
return 0;
}
-/**
- * audit_alloc_kernel - allocate an audit_context for a kernel task
- * @tsk: the kernel task
- *
- * Similar to the audit_alloc() function, but intended for kernel private
- * threads. Returns zero on success, negative values on failure.
- */
-int audit_alloc_kernel(struct task_struct *tsk)
-{
- /*
- * At the moment we are just going to call into audit_alloc() to
- * simplify the code, but there two things to keep in mind with this
- * approach:
- *
- * 1. Filtering internal kernel tasks is a bit laughable in almost all
- * cases, but there is at least one case where there is a benefit:
- * the '-a task,never' case allows the admin to effectively disable
- * task auditing at runtime.
- *
- * 2. The {set,clear}_task_syscall_work() ops likely have zero effect
- * on these internal kernel tasks, but they probably don't hurt either.
- */
- return audit_alloc(tsk);
-}
-
static inline void audit_free_context(struct audit_context *context)
{
/* resetting is extra work, but it is likely just noise */
The patch below does not apply to the 5.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f482aa98652795846cc55da98ebe331eb74f3d0b Mon Sep 17 00:00:00 2001
From: Peilin Ye <peilin.ye(a)bytedance.com>
Date: Wed, 3 Aug 2022 15:23:43 -0700
Subject: [PATCH] audit, io_uring, io-wq: Fix memory leak in io_sq_thread() and
io_wqe_worker()
Currently @audit_context is allocated twice for io_uring workers:
1. copy_process() calls audit_alloc();
2. io_sq_thread() or io_wqe_worker() calls audit_alloc_kernel() (which
is effectively audit_alloc()) and overwrites @audit_context,
causing:
BUG: memory leak
unreferenced object 0xffff888144547400 (size 1024):
<...>
hex dump (first 32 bytes):
00 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 ................
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
backtrace:
[<ffffffff8135cfc3>] audit_alloc+0x133/0x210
[<ffffffff81239e63>] copy_process+0xcd3/0x2340
[<ffffffff8123b5f3>] create_io_thread+0x63/0x90
[<ffffffff81686604>] create_io_worker+0xb4/0x230
[<ffffffff81686f68>] io_wqe_enqueue+0x248/0x3b0
[<ffffffff8167663a>] io_queue_iowq+0xba/0x200
[<ffffffff816768b3>] io_queue_async+0x113/0x180
[<ffffffff816840df>] io_req_task_submit+0x18f/0x1a0
[<ffffffff816841cd>] io_apoll_task_func+0xdd/0x120
[<ffffffff8167d49f>] tctx_task_work+0x11f/0x570
[<ffffffff81272c4e>] task_work_run+0x7e/0xc0
[<ffffffff8125a688>] get_signal+0xc18/0xf10
[<ffffffff8111645b>] arch_do_signal_or_restart+0x2b/0x730
[<ffffffff812ea44e>] exit_to_user_mode_prepare+0x5e/0x180
[<ffffffff844ae1b2>] syscall_exit_to_user_mode+0x12/0x20
[<ffffffff844a7e80>] do_syscall_64+0x40/0x80
Then,
3. io_sq_thread() or io_wqe_worker() frees @audit_context using
audit_free();
4. do_exit() eventually calls audit_free() again, which is okay
because audit_free() does a NULL check.
As suggested by Paul Moore, fix it by deleting audit_alloc_kernel() and
redundant audit_free() calls.
Fixes: 5bd2182d58e9 ("audit,io_uring,io-wq: add some basic audit support to io_uring")
Suggested-by: Paul Moore <paul(a)paul-moore.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Peilin Ye <peilin.ye(a)bytedance.com>
Acked-by: Paul Moore <paul(a)paul-moore.com>
Link: https://lore.kernel.org/r/20220803222343.31673-1-yepeilin.cs@gmail.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 00f7a80f1a3e..3608992848d3 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -285,7 +285,6 @@ static inline int audit_signal_info(int sig, struct task_struct *t)
/* These are defined in auditsc.c */
/* Public API */
extern int audit_alloc(struct task_struct *task);
-extern int audit_alloc_kernel(struct task_struct *task);
extern void __audit_free(struct task_struct *task);
extern void __audit_uring_entry(u8 op);
extern void __audit_uring_exit(int success, long code);
@@ -578,10 +577,6 @@ static inline int audit_alloc(struct task_struct *task)
{
return 0;
}
-static inline int audit_alloc_kernel(struct task_struct *task)
-{
- return 0;
-}
static inline void audit_free(struct task_struct *task)
{ }
static inline void audit_uring_entry(u8 op)
diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
index 77df5b43bf52..c6536d4b2da0 100644
--- a/io_uring/io-wq.c
+++ b/io_uring/io-wq.c
@@ -624,8 +624,6 @@ static int io_wqe_worker(void *data)
snprintf(buf, sizeof(buf), "iou-wrk-%d", wq->task->pid);
set_task_comm(current, buf);
- audit_alloc_kernel(current);
-
while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
long ret;
@@ -660,7 +658,6 @@ static int io_wqe_worker(void *data)
if (test_bit(IO_WQ_BIT_EXIT, &wq->state))
io_worker_handle_work(worker);
- audit_free(current);
io_worker_exit(worker);
return 0;
}
diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c
index 76d4d70c733a..559652380672 100644
--- a/io_uring/sqpoll.c
+++ b/io_uring/sqpoll.c
@@ -235,8 +235,6 @@ static int io_sq_thread(void *data)
set_cpus_allowed_ptr(current, cpu_online_mask);
current->flags |= PF_NO_SETAFFINITY;
- audit_alloc_kernel(current);
-
mutex_lock(&sqd->lock);
while (1) {
bool cap_entries, sqt_spin = false;
@@ -310,8 +308,6 @@ static int io_sq_thread(void *data)
io_run_task_work();
mutex_unlock(&sqd->lock);
- audit_free(current);
-
complete(&sqd->exited);
do_exit(0);
}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 3a8c9d744800..dd8d9ab747c3 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1073,31 +1073,6 @@ int audit_alloc(struct task_struct *tsk)
return 0;
}
-/**
- * audit_alloc_kernel - allocate an audit_context for a kernel task
- * @tsk: the kernel task
- *
- * Similar to the audit_alloc() function, but intended for kernel private
- * threads. Returns zero on success, negative values on failure.
- */
-int audit_alloc_kernel(struct task_struct *tsk)
-{
- /*
- * At the moment we are just going to call into audit_alloc() to
- * simplify the code, but there two things to keep in mind with this
- * approach:
- *
- * 1. Filtering internal kernel tasks is a bit laughable in almost all
- * cases, but there is at least one case where there is a benefit:
- * the '-a task,never' case allows the admin to effectively disable
- * task auditing at runtime.
- *
- * 2. The {set,clear}_task_syscall_work() ops likely have zero effect
- * on these internal kernel tasks, but they probably don't hurt either.
- */
- return audit_alloc(tsk);
-}
-
static inline void audit_free_context(struct audit_context *context)
{
/* resetting is extra work, but it is likely just noise */
Hi Sasha and others,
On Samstag, 13. August 2022 23:21:42 CEST Sasha Levin wrote:
> This is a note to let you know that I've just added the patch titled
>
> ARM: dts: qcom: msm8974-FP2: Add supplies for remoteprocs
>
> to the 5.18-stable tree which can be found at:
>
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum
> mary
>
> The filename of the patch is:
> arm-dts-qcom-msm8974-fp2-add-supplies-for-remoteproc.patch
> and it can be found in the queue-5.18 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
>
While I don't know the policy for backporting major dts changes, I don't think
backporting all the msm8974 cleanup to anything older is really worth it.
I also don't think there's actually a user of msm8974 stable, all the activity
I know of happens on the latest mainline plus some extra patches on top for
extra functionality that cannot be upstreamed yet.
Regards
Luca
>
>
> commit d5e29d1fc785b60f8ac337c1a36b908a228b01ac
> Author: Luca Weiss <luca(a)z3ntu.xyz>
> Date: Thu Apr 21 23:42:43 2022 +0200
>
> ARM: dts: qcom: msm8974-FP2: Add supplies for remoteprocs
>
> [ Upstream commit fb5e339fb1bc9eb7f34b341d995e4ab39c03588e ]
>
> Those were removed from msm8974.dtsi as part of a recent cleanup commit,
> so add them back for FP2.
>
> Signed-off-by: Luca Weiss <luca(a)z3ntu.xyz>
> Signed-off-by: Bjorn Andersson <bjorn.andersson(a)linaro.org>
> Link: https://lore.kernel.org/r/20220421214243.352469-3-luca@z3ntu.xyz
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/arch/arm/boot/dts/qcom-msm8974-fairphone-fp2.dts
> b/arch/arm/boot/dts/qcom-msm8974-fairphone-fp2.dts index
> d6799a1b820b..32975f56f896 100644
> --- a/arch/arm/boot/dts/qcom-msm8974-fairphone-fp2.dts
> +++ b/arch/arm/boot/dts/qcom-msm8974-fairphone-fp2.dts
> @@ -131,6 +131,17 @@ wcnss {
> };
> };
>
> +&remoteproc_adsp {
> + cx-supply = <&pm8841_s2>;
> +};
> +
> +&remoteproc_mss {
> + cx-supply = <&pm8841_s2>;
> + mss-supply = <&pm8841_s3>;
> + mx-supply = <&pm8841_s1>;
> + pll-supply = <&pm8941_l12>;
> +};
> +
> &rpm_requests {
> pm8841-regulators {
> compatible = "qcom,rpm-pm8841-regulators";
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 118b0c863c8f5629cc5271fc24d72d926e0715d9 Mon Sep 17 00:00:00 2001
From: Arun Easi <aeasi(a)marvell.com>
Date: Wed, 15 Jun 2022 22:35:04 -0700
Subject: [PATCH] scsi: qla2xxx: Fix losing target when it reappears during
delete
FC target disappeared during port perturbation tests due to a race that
tramples target state. Fix the issue by adding state checks before
proceeding.
Link: https://lore.kernel.org/r/20220616053508.27186-8-njavali@marvell.com
Fixes: 44c57f205876 ("scsi: qla2xxx: Changes to support FCP2 Target")
Cc: stable(a)vger.kernel.org
Signed-off-by: Arun Easi <aeasi(a)marvell.com>
Signed-off-by: Nilesh Javali <njavali(a)marvell.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 8b87fefda423..feca9e44b21c 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -2714,17 +2714,24 @@ qla2x00_dev_loss_tmo_callbk(struct fc_rport *rport)
if (!fcport)
return;
- /* Now that the rport has been deleted, set the fcport state to
- FCS_DEVICE_DEAD */
- qla2x00_set_fcport_state(fcport, FCS_DEVICE_DEAD);
+
+ /*
+ * Now that the rport has been deleted, set the fcport state to
+ * FCS_DEVICE_DEAD, if the fcport is still lost.
+ */
+ if (fcport->scan_state != QLA_FCPORT_FOUND)
+ qla2x00_set_fcport_state(fcport, FCS_DEVICE_DEAD);
/*
* Transport has effectively 'deleted' the rport, clear
* all local references.
*/
spin_lock_irqsave(host->host_lock, flags);
- fcport->rport = fcport->drport = NULL;
- *((fc_port_t **)rport->dd_data) = NULL;
+ /* Confirm port has not reappeared before clearing pointers. */
+ if (rport->port_state != FC_PORTSTATE_ONLINE) {
+ fcport->rport = fcport->drport = NULL;
+ *((fc_port_t **)rport->dd_data) = NULL;
+ }
spin_unlock_irqrestore(host->host_lock, flags);
if (test_bit(ABORT_ISP_ACTIVE, &fcport->vha->dpc_flags))
@@ -2757,9 +2764,12 @@ qla2x00_terminate_rport_io(struct fc_rport *rport)
/*
* At this point all fcport's software-states are cleared. Perform any
* final cleanup of firmware resources (PCBs and XCBs).
+ *
+ * Attempt to cleanup only lost devices.
*/
if (fcport->loop_id != FC_NO_LOOP_ID) {
- if (IS_FWI2_CAPABLE(fcport->vha->hw)) {
+ if (IS_FWI2_CAPABLE(fcport->vha->hw) &&
+ fcport->scan_state != QLA_FCPORT_FOUND) {
if (fcport->loop_id != FC_NO_LOOP_ID)
fcport->logout_on_delete = 1;
@@ -2769,7 +2779,7 @@ qla2x00_terminate_rport_io(struct fc_rport *rport)
__LINE__);
qlt_schedule_sess_for_deletion(fcport);
}
- } else {
+ } else if (!IS_FWI2_CAPABLE(fcport->vha->hw)) {
qla2x00_port_logout(fcport->vha, fcport);
}
}
Introduction of 'feature_persistent' made two bugs. First one is wrong
overwrite of 'vbd->feature_gnt_persistent' in 'blkback' due to wrong
parameter value caching position, and the second one is unintended
behavioral change that could break previous dynamic frontend/backend
persistent feature support changes. This patchset fixes the issues.
Changes from v3
(https://lore.kernel.org/xen-devel/20220715175521.126649-1-sj@kernel.org/)
- Split 'blkback' patch for each of the two issues
- Add 'Reported-by: Andrii Chepurnyi <andrii.chepurnyi82(a)gmail.com>'
Changes from v2
(https://lore.kernel.org/xen-devel/20220714224410.51147-1-sj@kernel.org/)
- Keep the behavioral change of v1
- Update blkfront's counterpart to follow the changed behavior
- Update documents for the changed behavior
Changes from v1
(https://lore.kernel.org/xen-devel/20220106091013.126076-1-mheyne@amazon.de/)
- Avoid the behavioral change
(https://lore.kernel.org/xen-devel/20220121102309.27802-1-sj@kernel.org/)
- Rebase on latest xen/tip/linux-next
- Re-work by SeongJae Park <sj(a)kernel.org>
- Cc stable@
Maximilian Heyne (1):
xen-blkback: Apply 'feature_persistent' parameter when connect
SeongJae Park (2):
xen-blkback: fix persistent grants negotiation
xen-blkfront: Apply 'feature_persistent' parameter when connect
.../ABI/testing/sysfs-driver-xen-blkback | 2 +-
.../ABI/testing/sysfs-driver-xen-blkfront | 2 +-
drivers/block/xen-blkback/xenbus.c | 20 ++++++++-----------
drivers/block/xen-blkfront.c | 4 +---
4 files changed, 11 insertions(+), 17 deletions(-)
--
2.25.1
commit dc4d31684974d140250f3ee612c3f0cab13b3146 upstream.
[BUG]
If we have a btrfs image with dirty log, along with an unsupported RO
compatible flag:
log_root 30474240
...
compat_flags 0x0
compat_ro_flags 0x40000003
( FREE_SPACE_TREE |
FREE_SPACE_TREE_VALID |
unknown flag: 0x40000000 )
Then even if we can only mount it RO, we will still cause metadata
update for log replay:
BTRFS info (device dm-1): flagging fs with big metadata feature
BTRFS info (device dm-1): using free space tree
BTRFS info (device dm-1): has skinny extents
BTRFS info (device dm-1): start tree-log replay
This is definitely against RO compact flag requirement.
[CAUSE]
RO compact flag only forces us to do RO mount, but we will still do log
replay for plain RO mount.
Thus this will result us to do log replay and update metadata.
This can be very problematic for new RO compat flag, for example older
kernel can not understand v2 cache, and if we allow metadata update on
RO mount and invalidate/corrupt v2 cache.
[FIX]
Just reject the mount unless rescue=nologreplay is provided:
BTRFS error (device dm-1): cannot replay dirty log with unsupport optional features (0x40000000), try rescue=nologreplay instead
We don't want to set rescue=nologreply directly, as this would make the
end user to read the old data, and cause confusion.
Since the such case is really rare, we're mostly fine to just reject the
mount with an error message, which also includes the proper workaround.
CC: stable(a)vger.kernel.org #4.9 4.14
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
---
fs/btrfs/disk-io.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 46ecb7405af1..b83e96f51a5f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2774,6 +2774,20 @@ int open_ctree(struct super_block *sb,
err = -EINVAL;
goto fail_alloc;
}
+ /*
+ * We have unsupported RO compat features, although RO mounted, we
+ * should not cause any metadata write, including log replay.
+ * Or we could screw up whatever the new feature requires.
+ */
+ if (unlikely(features && btrfs_super_log_root(disk_super) &&
+ !btrfs_test_opt(fs_info, NOLOGREPLAY))) {
+ btrfs_err(fs_info,
+"cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay",
+ features);
+ err = -EINVAL;
+ goto fail_alloc;
+ }
+
max_active = fs_info->thread_pool_size;
--
2.37.1
commit dc4d31684974d140250f3ee612c3f0cab13b3146 upstream.
[BUG]
If we have a btrfs image with dirty log, along with an unsupported RO
compatible flag:
log_root 30474240
...
compat_flags 0x0
compat_ro_flags 0x40000003
( FREE_SPACE_TREE |
FREE_SPACE_TREE_VALID |
unknown flag: 0x40000000 )
Then even if we can only mount it RO, we will still cause metadata
update for log replay:
BTRFS info (device dm-1): flagging fs with big metadata feature
BTRFS info (device dm-1): using free space tree
BTRFS info (device dm-1): has skinny extents
BTRFS info (device dm-1): start tree-log replay
This is definitely against RO compact flag requirement.
[CAUSE]
RO compact flag only forces us to do RO mount, but we will still do log
replay for plain RO mount.
Thus this will result us to do log replay and update metadata.
This can be very problematic for new RO compat flag, for example older
kernel can not understand v2 cache, and if we allow metadata update on
RO mount and invalidate/corrupt v2 cache.
[FIX]
Just reject the mount unless rescue=nologreplay is provided:
BTRFS error (device dm-1): cannot replay dirty log with unsupport optional features (0x40000000), try rescue=nologreplay instead
We don't want to set rescue=nologreply directly, as this would make the
end user to read the old data, and cause confusion.
Since the such case is really rare, we're mostly fine to just reject the
mount with an error message, which also includes the proper workaround.
CC: stable(a)vger.kernel.org #4.19
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
---
fs/btrfs/disk-io.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c9fd018dcf76..98f87cc47433 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2920,6 +2920,20 @@ int open_ctree(struct super_block *sb,
err = -EINVAL;
goto fail_alloc;
}
+ /*
+ * We have unsupported RO compat features, although RO mounted, we
+ * should not cause any metadata write, including log replay.
+ * Or we could screw up whatever the new feature requires.
+ */
+ if (unlikely(features && btrfs_super_log_root(disk_super) &&
+ !btrfs_test_opt(fs_info, NOLOGREPLAY))) {
+ btrfs_err(fs_info,
+"cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay",
+ features);
+ err = -EINVAL;
+ goto fail_alloc;
+ }
+
ret = btrfs_init_workqueues(fs_info, fs_devices);
if (ret) {
--
2.37.1
commit dc4d31684974d140250f3ee612c3f0cab13b3146 upstream.
[BUG]
If we have a btrfs image with dirty log, along with an unsupported RO
compatible flag:
log_root 30474240
...
compat_flags 0x0
compat_ro_flags 0x40000003
( FREE_SPACE_TREE |
FREE_SPACE_TREE_VALID |
unknown flag: 0x40000000 )
Then even if we can only mount it RO, we will still cause metadata
update for log replay:
BTRFS info (device dm-1): flagging fs with big metadata feature
BTRFS info (device dm-1): using free space tree
BTRFS info (device dm-1): has skinny extents
BTRFS info (device dm-1): start tree-log replay
This is definitely against RO compact flag requirement.
[CAUSE]
RO compact flag only forces us to do RO mount, but we will still do log
replay for plain RO mount.
Thus this will result us to do log replay and update metadata.
This can be very problematic for new RO compat flag, for example older
kernel can not understand v2 cache, and if we allow metadata update on
RO mount and invalidate/corrupt v2 cache.
[FIX]
Just reject the mount unless rescue=nologreplay is provided:
BTRFS error (device dm-1): cannot replay dirty log with unsupport optional features (0x40000000), try rescue=nologreplay instead
We don't want to set rescue=nologreply directly, as this would make the
end user to read the old data, and cause confusion.
Since the such case is really rare, we're mostly fine to just reject the
mount with an error message, which also includes the proper workaround.
CC: stable(a)vger.kernel.org #5.4
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
---
fs/btrfs/disk-io.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a4b3e6f6bf02..b94d68035c5d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2970,6 +2970,20 @@ int open_ctree(struct super_block *sb,
err = -EINVAL;
goto fail_csum;
}
+ /*
+ * We have unsupported RO compat features, although RO mounted, we
+ * should not cause any metadata write, including log replay.
+ * Or we could screw up whatever the new feature requires.
+ */
+ if (unlikely(features && btrfs_super_log_root(disk_super) &&
+ !btrfs_test_opt(fs_info, NOLOGREPLAY))) {
+ btrfs_err(fs_info,
+"cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay",
+ features);
+ err = -EINVAL;
+ goto fail_alloc;
+ }
+
ret = btrfs_init_workqueues(fs_info, fs_devices);
if (ret) {
--
2.37.1
commit dc4d31684974d140250f3ee612c3f0cab13b3146 upstream.
[BUG]
If we have a btrfs image with dirty log, along with an unsupported RO
compatible flag:
log_root 30474240
...
compat_flags 0x0
compat_ro_flags 0x40000003
( FREE_SPACE_TREE |
FREE_SPACE_TREE_VALID |
unknown flag: 0x40000000 )
Then even if we can only mount it RO, we will still cause metadata
update for log replay:
BTRFS info (device dm-1): flagging fs with big metadata feature
BTRFS info (device dm-1): using free space tree
BTRFS info (device dm-1): has skinny extents
BTRFS info (device dm-1): start tree-log replay
This is definitely against RO compact flag requirement.
[CAUSE]
RO compact flag only forces us to do RO mount, but we will still do log
replay for plain RO mount.
Thus this will result us to do log replay and update metadata.
This can be very problematic for new RO compat flag, for example older
kernel can not understand v2 cache, and if we allow metadata update on
RO mount and invalidate/corrupt v2 cache.
[FIX]
Just reject the mount unless rescue=nologreplay is provided:
BTRFS error (device dm-1): cannot replay dirty log with unsupport optional features (0x40000000), try rescue=nologreplay instead
We don't want to set rescue=nologreply directly, as this would make the
end user to read the old data, and cause confusion.
Since the such case is really rare, we're mostly fine to just reject the
mount with an error message, which also includes the proper workaround.
CC: stable(a)vger.kernel.org #5.10
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
---
fs/btrfs/disk-io.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 35acdab56a1c..2c7e50980a70 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3104,6 +3104,20 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
err = -EINVAL;
goto fail_alloc;
}
+ /*
+ * We have unsupported RO compat features, although RO mounted, we
+ * should not cause any metadata write, including log replay.
+ * Or we could screw up whatever the new feature requires.
+ */
+ if (unlikely(features && btrfs_super_log_root(disk_super) &&
+ !btrfs_test_opt(fs_info, NOLOGREPLAY))) {
+ btrfs_err(fs_info,
+"cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay",
+ features);
+ err = -EINVAL;
+ goto fail_alloc;
+ }
+
ret = btrfs_init_workqueues(fs_info, fs_devices);
if (ret) {
--
2.37.1
When smb client open file in ksmbd share with O_TRUNC, dos attribute
xattr is removed as well as data in file. This cause the FSCTL_SET_SPARSE
request from the client fails because ksmbd can't update the dos attribute
after setting ATTR_SPARSE_FILE. And this patch fix xfstests generic/469
test also.
Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3")
Cc: stable(a)vger.kernel.org
Signed-off-by: Namjae Jeon <linkinjeon(a)kernel.org>
---
v2:
- don't remove other xattr class also.
- add fixes and stable tags.
fs/ksmbd/smb2pdu.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
index a136d5e4943b..c2daef28a214 100644
--- a/fs/ksmbd/smb2pdu.c
+++ b/fs/ksmbd/smb2pdu.c
@@ -2330,10 +2330,9 @@ static int smb2_remove_smb_xattrs(struct path *path)
name += strlen(name) + 1) {
ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name));
- if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
- strncmp(&name[XATTR_USER_PREFIX_LEN], DOS_ATTRIBUTE_PREFIX,
- DOS_ATTRIBUTE_PREFIX_LEN) &&
- strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX, STREAM_PREFIX_LEN))
+ if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) ||
+ (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
+ strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX, STREAM_PREFIX_LEN)))
continue;
err = ksmbd_vfs_remove_xattr(user_ns, path->dentry, name);
--
2.25.1
Was this in the way of something else?
---------- Forwarded message ----------
Date: Sat, 13 Aug 2022 17:59:45 -0400
From: Sasha Levin <sashal(a)kernel.org>
To: stable-commits(a)vger.kernel.org, Julia.Lawall(a)inria.fr
Cc: Michael Ellerman <mpe(a)ellerman.id.au>, Nicholas Piggin <npiggin(a)gmail.com>,
Christophe Leroy <christophe.leroy(a)csgroup.eu>,
Herbert Xu <herbert(a)gondor.apana.org.au>,
David S. Miller <davem(a)davemloft.net>, Russell Currey <ruscur(a)russell.cc>,
Oliver O'Halloran <oohall(a)gmail.com>, Anatolij Gustschin <agust(a)denx.de>,
Scott Wood <oss(a)buserror.net>, Arnd Bergmann <arnd(a)arndb.de>,
Benjamin Herrenschmidt <benh(a)kernel.crashing.org>,
Frederic Barrat <fbarrat(a)linux.ibm.com>,
Andrew Donnellan <ajd(a)linux.ibm.com>, Geoff Levand <geoff(a)infradead.org>
Subject: Patch "powerpc: fix typos in comments" has been added to the
5.18-stable tree
This is a note to let you know that I've just added the patch titled
powerpc: fix typos in comments
to the 5.18-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
powerpc-fix-typos-in-comments.patch
and it can be found in the queue-5.18 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
commit 819592fc0685424ad22f92a0e3d86602d07d1a23
Author: Julia Lawall <Julia.Lawall(a)inria.fr>
Date: Sat Apr 30 20:56:54 2022 +0200
powerpc: fix typos in comments
[ Upstream commit 1fd02f6605b855b4af2883f29a2abc88bdf17857 ]
Various spelling mistakes in comments.
Detected with the help of Coccinelle.
Signed-off-by: Julia Lawall <Julia.Lawall(a)inria.fr>
Reviewed-by: Joel Stanley <joel(a)jms.id.au>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20220430185654.5855-1-Julia.Lawall@inria.fr
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
diff --git a/arch/powerpc/boot/cuboot-hotfoot.c b/arch/powerpc/boot/cuboot-hotfoot.c
index 888a6b9bfead..0e5532f855d6 100644
--- a/arch/powerpc/boot/cuboot-hotfoot.c
+++ b/arch/powerpc/boot/cuboot-hotfoot.c
@@ -70,7 +70,7 @@ static void hotfoot_fixups(void)
printf("Fixing devtree for 4M Flash\n");
- /* First fix up the base addresse */
+ /* First fix up the base address */
getprop(devp, "reg", regs, sizeof(regs));
regs[0] = 0;
regs[1] = 0xffc00000;
diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c
index c2b23b69d7b1..e8dfe9fb0266 100644
--- a/arch/powerpc/crypto/aes-spe-glue.c
+++ b/arch/powerpc/crypto/aes-spe-glue.c
@@ -404,7 +404,7 @@ static int ppc_xts_decrypt(struct skcipher_request *req)
/*
* Algorithm definitions. Disabling alignment (cra_alignmask=0) was chosen
- * because the e500 platform can handle unaligned reads/writes very efficently.
+ * because the e500 platform can handle unaligned reads/writes very efficiently.
* This improves IPsec thoughput by another few percent. Additionally we assume
* that AES context is always aligned to at least 8 bytes because it is created
* with kmalloc() in the crypto infrastructure
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index ae0fdef0ac11..2a271a6d6924 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -2025,7 +2025,7 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset,
* oprofile_cpu_type already has a value, then we are
* possibly overriding a real PVR with a logical one,
* and, in that case, keep the current value for
- * oprofile_cpu_type. Futhermore, let's ensure that the
+ * oprofile_cpu_type. Furthermore, let's ensure that the
* fix for the PMAO bug is enabled on compatibility mode.
*/
if (old.oprofile_cpu_type != NULL) {
diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c
index 64e423d2fe0f..30d4eca88d17 100644
--- a/arch/powerpc/kernel/dawr.c
+++ b/arch/powerpc/kernel/dawr.c
@@ -27,7 +27,7 @@ int set_dawr(int nr, struct arch_hw_breakpoint *brk)
dawrx |= (brk->type & (HW_BRK_TYPE_PRIV_ALL)) >> 3;
/*
* DAWR length is stored in field MDR bits 48:53. Matches range in
- * doublewords (64 bits) baised by -1 eg. 0b000000=1DW and
+ * doublewords (64 bits) biased by -1 eg. 0b000000=1DW and
* 0b111111=64DW.
* brk->hw_len is in bytes.
* This aligns up to double word size, shifts and does the bias.
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 28bb1e7263a6..ab316e155ea9 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1329,7 +1329,7 @@ int eeh_pe_set_option(struct eeh_pe *pe, int option)
/*
* EEH functionality could possibly be disabled, just
- * return error for the case. And the EEH functinality
+ * return error for the case. And the EEH functionality
* isn't expected to be disabled on one specific PE.
*/
switch (option) {
@@ -1804,7 +1804,7 @@ static int eeh_debugfs_break_device(struct pci_dev *pdev)
* PE freeze. Using the in_8() accessor skips the eeh detection hook
* so the freeze hook so the EEH Detection machinery won't be
* triggered here. This is to match the usual behaviour of EEH
- * where the HW will asyncronously freeze a PE and it's up to
+ * where the HW will asynchronously freeze a PE and it's up to
* the kernel to notice and deal with it.
*
* 3. Turn Memory space back on. This is more important for VFs
diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
index a7a8dc182efb..c23a454af08a 100644
--- a/arch/powerpc/kernel/eeh_event.c
+++ b/arch/powerpc/kernel/eeh_event.c
@@ -143,7 +143,7 @@ int __eeh_send_failure_event(struct eeh_pe *pe)
int eeh_send_failure_event(struct eeh_pe *pe)
{
/*
- * If we've manually supressed recovery events via debugfs
+ * If we've manually suppressed recovery events via debugfs
* then just drop it on the floor.
*/
if (eeh_debugfs_no_recover) {
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index dc2350b288cf..aa29b9b7920f 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -1665,8 +1665,8 @@ int __init setup_fadump(void)
}
/*
* Use subsys_initcall_sync() here because there is dependency with
- * crash_save_vmcoreinfo_init(), which mush run first to ensure vmcoreinfo initialization
- * is done before regisering with f/w.
+ * crash_save_vmcoreinfo_init(), which must run first to ensure vmcoreinfo initialization
+ * is done before registering with f/w.
*/
subsys_initcall_sync(setup_fadump);
#else /* !CONFIG_PRESERVE_FA_DUMP */
diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c
index a0432ef46967..e25b796682cc 100644
--- a/arch/powerpc/kernel/module_32.c
+++ b/arch/powerpc/kernel/module_32.c
@@ -99,7 +99,7 @@ static unsigned long get_plt_size(const Elf32_Ehdr *hdr,
/* Sort the relocation information based on a symbol and
* addend key. This is a stable O(n*log n) complexity
- * alogrithm but it will reduce the complexity of
+ * algorithm but it will reduce the complexity of
* count_relocs() to linear complexity O(n)
*/
sort((void *)hdr + sechdrs[i].sh_offset,
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 794720530442..2cce576edbc5 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -194,7 +194,7 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
/* Sort the relocation information based on a symbol and
* addend key. This is a stable O(n*log n) complexity
- * alogrithm but it will reduce the complexity of
+ * algorithm but it will reduce the complexity of
* count_relocs() to linear complexity O(n)
*/
sort((void *)sechdrs[i].sh_addr,
@@ -361,7 +361,7 @@ static inline int create_ftrace_stub(struct ppc64_stub_entry *entry,
entry->jump[1] |= PPC_HA(reladdr);
entry->jump[2] |= PPC_LO(reladdr);
- /* Eventhough we don't use funcdata in the stub, it's needed elsewhere. */
+ /* Even though we don't use funcdata in the stub, it's needed elsewhere. */
entry->funcdata = func_desc(addr);
entry->magic = STUB_MAGIC;
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 8bc9cf62cd93..9a97a93bd11c 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1688,7 +1688,7 @@ EXPORT_SYMBOL_GPL(pcibios_scan_phb);
static void fixup_hide_host_resource_fsl(struct pci_dev *dev)
{
int i, class = dev->class >> 8;
- /* When configured as agent, programing interface = 1 */
+ /* When configured as agent, programming interface = 1 */
int prog_if = dev->class & 0xf;
if ((class == PCI_CLASS_PROCESSOR_POWERPC ||
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index c3024f104765..6f2b0cc1ddd6 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -244,7 +244,7 @@ EXPORT_SYMBOL(of_create_pci_dev);
* @dev: pci_dev structure for the bridge
*
* of_scan_bus() calls this routine for each PCI bridge that it finds, and
- * this routine in turn call of_scan_bus() recusively to scan for more child
+ * this routine in turn call of_scan_bus() recursively to scan for more child
* devices.
*/
void of_scan_pci_bridge(struct pci_dev *dev)
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 9be279469a85..3940db48db77 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -307,7 +307,7 @@ static void __giveup_vsx(struct task_struct *tsk)
unsigned long msr = tsk->thread.regs->msr;
/*
- * We should never be ssetting MSR_VSX without also setting
+ * We should never be setting MSR_VSX without also setting
* MSR_FP and MSR_VEC
*/
WARN_ON((msr & MSR_VSX) && !((msr & MSR_FP) && (msr & MSR_VEC)));
@@ -645,7 +645,7 @@ static void do_break_handler(struct pt_regs *regs)
return;
}
- /* Otherwise findout which DAWR caused exception and disable it. */
+ /* Otherwise find out which DAWR caused exception and disable it. */
wp_get_instr_detail(regs, &instr, &type, &size, &ea);
for (i = 0; i < nr_wp_slots(); i++) {
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 0ac5faacc909..ace861ec4c4c 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -3416,7 +3416,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
*
* PowerMacs use a different mechanism to spin CPUs
*
- * (This must be done after instanciating RTAS)
+ * (This must be done after instantiating RTAS)
*/
if (of_platform != PLATFORM_POWERMAC)
prom_hold_cpus();
diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c
index f15bc78caf71..076d867412c7 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-view.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-view.c
@@ -174,7 +174,7 @@ int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data)
/*
* softe copies paca->irq_soft_mask variable state. Since irq_soft_mask is
- * no more used as a flag, lets force usr to alway see the softe value as 1
+ * no more used as a flag, lets force usr to always see the softe value as 1
* which means interrupts are not soft disabled.
*/
if (IS_ENABLED(CONFIG_PPC64) && regno == PT_SOFTE) {
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index a99179d83538..bc817a5619d6 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -120,7 +120,7 @@ static struct kmem_cache *flash_block_cache = NULL;
/*
* Local copy of the flash block list.
*
- * The rtas_firmware_flash_list varable will be
+ * The rtas_firmware_flash_list variable will be
* set once the data is fully read.
*
* For convenience as we build the list we use virtual addrs,
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 518ae5aa9410..3acf2782acdf 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -279,7 +279,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
proc_freq / 1000000, proc_freq % 1000000);
/* If we are a Freescale core do a simple check so
- * we dont have to keep adding cases in the future */
+ * we don't have to keep adding cases in the future */
if (PVR_VER(pvr) & 0x8000) {
switch (PVR_VER(pvr)) {
case 0x8000: /* 7441/7450/7451, Voyager */
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 73d483b07ff3..858fc13b8c51 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -123,7 +123,7 @@ static long notrace __unsafe_setup_sigcontext(struct sigcontext __user *sc,
#endif
struct pt_regs *regs = tsk->thread.regs;
unsigned long msr = regs->msr;
- /* Force usr to alway see softe as 1 (interrupts enabled) */
+ /* Force usr to always see softe as 1 (interrupts enabled) */
unsigned long softe = 0x1;
BUG_ON(tsk != current);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index de0f6f09a5dd..a69df557e2b7 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1102,7 +1102,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
DBG("smp_prepare_cpus\n");
/*
- * setup_cpu may need to be called on the boot cpu. We havent
+ * setup_cpu may need to be called on the boot cpu. We haven't
* spun any cpus up but lets be paranoid.
*/
BUG_ON(boot_cpuid != smp_processor_id());
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index f80cce0e3899..4bf757ebe13d 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -829,7 +829,7 @@ static void __read_persistent_clock(struct timespec64 *ts)
static int first = 1;
ts->tv_nsec = 0;
- /* XXX this is a litle fragile but will work okay in the short term */
+ /* XXX this is a little fragile but will work okay in the short term */
if (first) {
first = 0;
if (ppc_md.time_init)
@@ -974,7 +974,7 @@ void secondary_cpu_time_init(void)
*/
start_cpu_decrementer();
- /* FIME: Should make unrelatred change to move snapshot_timebase
+ /* FIME: Should make unrelated change to move snapshot_timebase
* call here ! */
register_decrementer_clockevent(smp_processor_id());
}
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index bfc27496fe7e..7d28b9553654 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -56,7 +56,7 @@
* solved by also having a SMP watchdog where all CPUs check all other
* CPUs heartbeat.
*
- * The SMP checker can detect lockups on other CPUs. A gobal "pending"
+ * The SMP checker can detect lockups on other CPUs. A global "pending"
* cpumask is kept, containing all CPUs which enable the watchdog. Each
* CPU clears their pending bit in their heartbeat timer. When the bitmask
* becomes empty, the last CPU to clear its pending bit updates a global
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 6cc7793b8420..c29c639551fe 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -406,7 +406,7 @@ static int __init export_htab_values(void)
if (!node)
return -ENODEV;
- /* remove any stale propertys so ours can be found */
+ /* remove any stale properties so ours can be found */
of_remove_property(node, of_find_property(node, htab_base_prop.name, NULL));
of_remove_property(node, of_find_property(node, htab_size_prop.name, NULL));
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 0aeb51738ca9..1137c4df726c 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -58,7 +58,7 @@ struct kvm_resize_hpt {
/* Possible values and their usage:
* <0 an error occurred during allocation,
* -EBUSY allocation is in the progress,
- * 0 allocation made successfuly.
+ * 0 allocation made successfully.
*/
int error;
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index fdeda6a9cff4..fdcc7b287dd8 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -453,7 +453,7 @@ static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq,
* we are doing this on secondary cpus and current task there
* is not the hypervisor. Also this is safe against THP in the
* host, because an IPI to primary thread will wait for the secondary
- * to exit which will agains result in the below page table walk
+ * to exit which will again result in the below page table walk
* to finish.
*/
/* an rmap lock won't make it safe. because that just ensure hash
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index fdb57be71aa6..5bbfb2eed127 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -268,7 +268,7 @@ int kvmppc_core_emulate_op_pr(struct kvm_vcpu *vcpu,
/*
* add rules to fit in ISA specification regarding TM
- * state transistion in TM disable/Suspended state,
+ * state transition in TM disable/Suspended state,
* and target TM state is TM inactive(00) state. (the
* change should be suppressed).
*/
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index a28e5b3daabd..ac38c1cad378 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -379,7 +379,7 @@ void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
{
/*
* current->thread.xxx registers must all be restored to host
- * values before a potential context switch, othrewise the context
+ * values before a potential context switch, otherwise the context
* switch itself will overwrite current->thread.xxx with the values
* from the guest SPRs.
*/
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index 36f2314c58e5..598006301620 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -120,7 +120,7 @@ static DEFINE_SPINLOCK(kvmppc_uvmem_bitmap_lock);
* content is un-encrypted.
*
* (c) Normal - The GFN is a normal. The GFN is associated with
- * a normal VM. The contents of the GFN is accesible to
+ * a normal VM. The contents of the GFN is accessible to
* the Hypervisor. Its content is never encrypted.
*
* States of a VM.
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 7bf9e6ca5c2d..d6abed6e51e6 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1287,7 +1287,7 @@ int kvmppc_handle_exit_pr(struct kvm_vcpu *vcpu, unsigned int exit_nr)
/* Get last sc for papr */
if (vcpu->arch.papr_enabled) {
- /* The sc instuction points SRR0 to the next inst */
+ /* The sc instruction points SRR0 to the next inst */
emul = kvmppc_get_last_inst(vcpu, INST_SC, &last_sc);
if (emul != EMULATE_DONE) {
kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) - 4);
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index ab6d37d78c62..589a8f257120 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -462,7 +462,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
* new guy. We cannot assume that the rejected interrupt is less
* favored than the new one, and thus doesn't need to be delivered,
* because by the time we exit icp_try_to_deliver() the target
- * processor may well have alrady consumed & completed it, and thus
+ * processor may well have already consumed & completed it, and thus
* the rejected interrupt might actually be already acceptable.
*/
if (icp_try_to_deliver(icp, new_irq, state->priority, &reject)) {
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index c0ce5531d9bc..24d434f1f012 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -124,7 +124,7 @@ void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
* interrupt might have fired and be on its way to the
* host queue while we mask it, and if we unmask it
* early enough (re-cede right away), there is a
- * theorical possibility that it fires again, thus
+ * theoretical possibility that it fires again, thus
* landing in the target queue more than once which is
* a big no-no.
*
@@ -622,7 +622,7 @@ static int xive_target_interrupt(struct kvm *kvm,
/*
* Targetting rules: In order to avoid losing track of
- * pending interrupts accross mask and unmask, which would
+ * pending interrupts across mask and unmask, which would
* allow queue overflows, we implement the following rules:
*
* - Unless it was never enabled (or we run out of capacity)
@@ -1073,7 +1073,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
/*
* If old_p is set, the interrupt is pending, we switch it to
* PQ=11. This will force a resend in the host so the interrupt
- * isn't lost to whatver host driver may pick it up
+ * isn't lost to whatever host driver may pick it up
*/
if (state->old_p)
xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_11);
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index fa0d8dbbe484..4ff1372e48d4 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -309,7 +309,7 @@ static int kvmppc_core_vcpu_create_e500mc(struct kvm_vcpu *vcpu)
BUILD_BUG_ON(offsetof(struct kvmppc_vcpu_e500, vcpu) != 0);
vcpu_e500 = to_e500(vcpu);
- /* Invalid PIR value -- this LPID dosn't have valid state on any cpu */
+ /* Invalid PIR value -- this LPID doesn't have valid state on any cpu */
vcpu->arch.oldpir = 0xffffffff;
err = kvmppc_e500_tlb_init(vcpu_e500);
diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c
index 7ce8914992e3..2e0cad5817ba 100644
--- a/arch/powerpc/mm/book3s64/hash_pgtable.c
+++ b/arch/powerpc/mm/book3s64/hash_pgtable.c
@@ -377,7 +377,7 @@ int hash__has_transparent_hugepage(void)
if (mmu_psize_defs[MMU_PAGE_16M].shift != PMD_SHIFT)
return 0;
/*
- * We need to make sure that we support 16MB hugepage in a segement
+ * We need to make sure that we support 16MB hugepage in a segment
* with base page size 64K or 4K. We only enable THP with a PAGE_SIZE
* of 64K.
*/
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 985cabdd7f67..5b69b271707e 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1343,7 +1343,7 @@ static int subpage_protection(struct mm_struct *mm, unsigned long ea)
spp >>= 30 - 2 * ((ea >> 12) & 0xf);
/*
- * 0 -> full premission
+ * 0 -> full permission
* 1 -> Read only
* 2 -> no access.
* We return the flag that need to be cleared.
@@ -1664,7 +1664,7 @@ DEFINE_INTERRUPT_HANDLER(do_hash_fault)
err = hash_page_mm(mm, ea, access, TRAP(regs), flags);
if (unlikely(err < 0)) {
- // failed to instert a hash PTE due to an hypervisor error
+ // failed to insert a hash PTE due to an hypervisor error
if (user_mode(regs)) {
if (IS_ENABLED(CONFIG_PPC_SUBPAGE_PROT) && err == -2)
_exception(SIGSEGV, regs, SEGV_ACCERR, ea);
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index 052e6590f84f..071bb66c3ad9 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -331,7 +331,7 @@ static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
spin_lock(&mm->page_table_lock);
/*
* If we find pgtable_page set, we return
- * the allocated page with single fragement
+ * the allocated page with single fragment
* count.
*/
if (likely(!mm->context.pmd_frag)) {
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index def04631a74d..db2f3d193448 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -359,7 +359,7 @@ static void __init radix_init_pgtable(void)
if (!cpu_has_feature(CPU_FTR_HVMODE) &&
cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
/*
- * Older versions of KVM on these machines perfer if the
+ * Older versions of KVM on these machines prefer if the
* guest only uses the low 19 PID bits.
*/
mmu_pid_bits = 19;
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 7724af19ed7e..dda51fef2d2e 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -397,7 +397,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
/*
* Workaround the fact that the "ric" argument to __tlbie_pid
- * must be a compile-time contraint to match the "i" constraint
+ * must be a compile-time constraint to match the "i" constraint
* in the asm statement.
*/
switch (ric) {
diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
index 81091b9587f6..6956f637a38c 100644
--- a/arch/powerpc/mm/book3s64/slb.c
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -347,7 +347,7 @@ void slb_setup_new_exec(void)
/*
* We have no good place to clear the slb preload cache on exec,
* flush_thread is about the earliest arch hook but that happens
- * after we switch to the mm and have aleady preloaded the SLBEs.
+ * after we switch to the mm and have already preloaded the SLBEs.
*
* For the most part that's probably okay to use entries from the
* previous exec, they will age out if unused. It may turn out to
@@ -615,7 +615,7 @@ static void slb_cache_update(unsigned long esid_data)
} else {
/*
* Our cache is full and the current cache content strictly
- * doesn't indicate the active SLB conents. Bump the ptr
+ * doesn't indicate the active SLB contents. Bump the ptr
* so that switch_slb() will ignore the cache.
*/
local_paca->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 83c0ee9fbf05..2e11952057f8 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -111,7 +111,7 @@ static int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_m
}
/*
- * vmemmap virtual address space management does not have a traditonal page
+ * vmemmap virtual address space management does not have a traditional page
* table to track which virtual struct pages are backed by physical mapping.
* The virtual to physical mappings are tracked in a simple linked list
* format. 'vmemmap_list' maintains the entire vmemmap physical mapping at
@@ -128,7 +128,7 @@ static struct vmemmap_backing *next;
/*
* The same pointer 'next' tracks individual chunks inside the allocated
- * full page during the boot time and again tracks the freeed nodes during
+ * full page during the boot time and again tracks the freed nodes during
* runtime. It is racy but it does not happen as they are separated by the
* boot process. Will create problem if some how we have memory hotplug
* operation during boot !!
diff --git a/arch/powerpc/mm/nohash/book3e_hugetlbpage.c b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c
index 8b88be91b622..307ca919d393 100644
--- a/arch/powerpc/mm/nohash/book3e_hugetlbpage.c
+++ b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c
@@ -142,7 +142,7 @@ book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, pte_t pte)
tsize = shift - 10;
/*
* We can't be interrupted while we're setting up the MAS
- * regusters or after we've confirmed that no tlb exists.
+ * registers or after we've confirmed that no tlb exists.
*/
local_irq_save(flags);
diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c
index 5f81c076621f..37eb8d80bd53 100644
--- a/arch/powerpc/mm/nohash/kaslr_booke.c
+++ b/arch/powerpc/mm/nohash/kaslr_booke.c
@@ -311,7 +311,7 @@ static unsigned long __init kaslr_choose_location(void *dt_ptr, phys_addr_t size
ram = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, true, true);
linear_sz = min_t(unsigned long, ram, SZ_512M);
- /* If the linear size is smaller than 64M, do not randmize */
+ /* If the linear size is smaller than 64M, do not randomize */
if (linear_sz < SZ_64M)
return 0;
diff --git a/arch/powerpc/mm/pgtable-frag.c b/arch/powerpc/mm/pgtable-frag.c
index 97ae4935da79..20652daa1d7e 100644
--- a/arch/powerpc/mm/pgtable-frag.c
+++ b/arch/powerpc/mm/pgtable-frag.c
@@ -83,7 +83,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
spin_lock(&mm->page_table_lock);
/*
* If we find pgtable_page set, we return
- * the allocated page with single fragement
+ * the allocated page with single fragment
* count.
*/
if (likely(!pte_frag_get(&mm->context))) {
diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c
index 4738c4dbf567..308a2e40d7be 100644
--- a/arch/powerpc/perf/8xx-pmu.c
+++ b/arch/powerpc/perf/8xx-pmu.c
@@ -157,7 +157,7 @@ static void mpc8xx_pmu_del(struct perf_event *event, int flags)
mpc8xx_pmu_read(event);
- /* If it was the last user, stop counting to avoid useles overhead */
+ /* If it was the last user, stop counting to avoid useless overhead */
switch (event_type(event)) {
case PERF_8xx_ID_CPU_CYCLES:
break;
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 3adc08254875..03c64a0195df 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1142,7 +1142,7 @@ static u64 check_and_compute_delta(u64 prev, u64 val)
/*
* POWER7 can roll back counter values, if the new value is smaller
* than the previous value it will cause the delta and the counter to
- * have bogus values unless we rolled a counter over. If a coutner is
+ * have bogus values unless we rolled a counter over. If a counter is
* rolled back, it will be smaller, but within 256, which is the maximum
* number of events to rollback at once. If we detect a rollback
* return 0. This can lead to a small lack of precision in the
@@ -2052,7 +2052,7 @@ static int power_pmu_event_init(struct perf_event *event)
/*
* PMU config registers have fields that are
* reserved and some specific values for bit fields are reserved.
- * For ex., MMCRA[61:62] is Randome Sampling Mode (SM)
+ * For ex., MMCRA[61:62] is Random Sampling Mode (SM)
* and value of 0b11 to this field is reserved.
* Check for invalid values in attr.config.
*/
@@ -2442,7 +2442,7 @@ static void __perf_event_interrupt(struct pt_regs *regs)
}
/*
- * During system wide profling or while specific CPU is monitored for an
+ * During system wide profiling or while specific CPU is monitored for an
* event, some corner cases could cause PMC to overflow in idle path. This
* will trigger a PMI after waking up from idle. Since counter values are _not_
* saved/restored in idle path, can lead to below "Can't find PMC" message.
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 526d4b767534..498f1a2f7658 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -521,7 +521,7 @@ static int nest_imc_event_init(struct perf_event *event)
/*
* Nest HW counter memory resides in a per-chip reserve-memory (HOMER).
- * Get the base memory addresss for this cpu.
+ * Get the base memory address for this cpu.
*/
chip_id = cpu_to_chip_id(event->cpu);
@@ -674,7 +674,7 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu)
/*
* Check whether core_imc is registered. We could end up here
* if the cpuhotplug callback registration fails. i.e, callback
- * invokes the offline path for all sucessfully registered cpus.
+ * invokes the offline path for all successfully registered cpus.
* At this stage, core_imc pmu will not be registered and we
* should return here.
*
diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index bb5d64862bc9..42abbcfc73da 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -82,11 +82,11 @@ static unsigned long sdar_mod_val(u64 event)
static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
{
/*
- * MMCRA[SDAR_MODE] specifices how the SDAR should be updated in
- * continous sampling mode.
+ * MMCRA[SDAR_MODE] specifies how the SDAR should be updated in
+ * continuous sampling mode.
*
* Incase of Power8:
- * MMCRA[SDAR_MODE] will be programmed as "0b01" for continous sampling
+ * MMCRA[SDAR_MODE] will be programmed as "0b01" for continuous sampling
* mode and will be un-changed when setting MMCRA[63] (Marked events).
*
* Incase of Power9/power10:
diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c b/arch/powerpc/platforms/512x/clock-commonclk.c
index 0b03d812baae..0652c7e69225 100644
--- a/arch/powerpc/platforms/512x/clock-commonclk.c
+++ b/arch/powerpc/platforms/512x/clock-commonclk.c
@@ -663,7 +663,7 @@ static void __init mpc512x_clk_setup_mclk(struct mclk_setup_data *entry, size_t
* the PSC/MSCAN/SPDIF (serial drivers et al) need the MCLK
* for their bitrate
* - in the absence of "aliases" for clocks we need to create
- * individial 'struct clk' items for whatever might get
+ * individual 'struct clk' items for whatever might get
* referenced or looked up, even if several of those items are
* identical from the logical POV (their rate value)
* - for easier future maintenance and for better reflection of
diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c
index e3411663edad..96d9cf49560d 100644
--- a/arch/powerpc/platforms/512x/mpc512x_shared.c
+++ b/arch/powerpc/platforms/512x/mpc512x_shared.c
@@ -289,7 +289,7 @@ static void __init mpc512x_setup_diu(void)
/*
* We do not allocate and configure new area for bitmap buffer
- * because it would requere copying bitmap data (splash image)
+ * because it would require copying bitmap data (splash image)
* and so negatively affect boot time. Instead we reserve the
* already configured frame buffer area so that it won't be
* destroyed. The starting address of the area to reserve and
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_common.c b/arch/powerpc/platforms/52xx/mpc52xx_common.c
index 565e3a83dc9e..60aa6015e284 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_common.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_common.c
@@ -308,7 +308,7 @@ int mpc5200_psc_ac97_gpio_reset(int psc_number)
spin_lock_irqsave(&gpio_lock, flags);
- /* Reconfiure pin-muxing to gpio */
+ /* Reconfigure pin-muxing to gpio */
mux = in_be32(&simple_gpio->port_config);
out_be32(&simple_gpio->port_config, mux & (~gpio));
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index f862b48b4824..7252d992ca9f 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -398,7 +398,7 @@ static int mpc52xx_gpt_do_start(struct mpc52xx_gpt_priv *gpt, u64 period,
set |= MPC52xx_GPT_MODE_CONTINUOUS;
/* Determine the number of clocks in the requested period. 64 bit
- * arithmatic is done here to preserve the precision until the value
+ * arithmetic is done here to preserve the precision until the value
* is scaled back down into the u32 range. Period is in 'ns', bus
* frequency is in Hz. */
clocks = period * (u64)gpt->ipb_freq;
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
index b91ebebd9ff2..54dfc63809d3 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
@@ -104,7 +104,7 @@ static void mpc52xx_lpbfifo_kick(struct mpc52xx_lpbfifo_request *req)
*
* Configure the watermarks so DMA will always complete correctly.
* It may be worth experimenting with the ALARM value to see if
- * there is a performance impacit. However, if it is wrong there
+ * there is a performance impact. However, if it is wrong there
* is a risk of DMA not transferring the last chunk of data
*/
if (write) {
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
index 5bd487030256..ad0b88ecfd0c 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
@@ -151,7 +151,7 @@ static void __init mpc85xx_cds_pci_irq_fixup(struct pci_dev *dev)
*/
case PCI_DEVICE_ID_VIA_82C586_2:
/* There are two USB controllers.
- * Identify them by functon number
+ * Identify them by function number
*/
if (PCI_FUNC(dev->devfn) == 3)
dev->irq = 11;
diff --git a/arch/powerpc/platforms/86xx/gef_ppc9a.c b/arch/powerpc/platforms/86xx/gef_ppc9a.c
index 44bbbc535e1d..884da08806ce 100644
--- a/arch/powerpc/platforms/86xx/gef_ppc9a.c
+++ b/arch/powerpc/platforms/86xx/gef_ppc9a.c
@@ -180,7 +180,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB,
*
* This function is called to determine whether the BSP is compatible with the
* supplied device-tree, which is assumed to be the correct one for the actual
- * board. It is expected thati, in the future, a kernel may support multiple
+ * board. It is expected that, in the future, a kernel may support multiple
* boards.
*/
static int __init gef_ppc9a_probe(void)
diff --git a/arch/powerpc/platforms/86xx/gef_sbc310.c b/arch/powerpc/platforms/86xx/gef_sbc310.c
index 46d6d3d4957a..baaf1ab07016 100644
--- a/arch/powerpc/platforms/86xx/gef_sbc310.c
+++ b/arch/powerpc/platforms/86xx/gef_sbc310.c
@@ -167,7 +167,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB,
*
* This function is called to determine whether the BSP is compatible with the
* supplied device-tree, which is assumed to be the correct one for the actual
- * board. It is expected thati, in the future, a kernel may support multiple
+ * board. It is expected that, in the future, a kernel may support multiple
* boards.
*/
static int __init gef_sbc310_probe(void)
diff --git a/arch/powerpc/platforms/86xx/gef_sbc610.c b/arch/powerpc/platforms/86xx/gef_sbc610.c
index acf2c6c3c1eb..120caf6af71d 100644
--- a/arch/powerpc/platforms/86xx/gef_sbc610.c
+++ b/arch/powerpc/platforms/86xx/gef_sbc610.c
@@ -157,7 +157,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB,
*
* This function is called to determine whether the BSP is compatible with the
* supplied device-tree, which is assumed to be the correct one for the actual
- * board. It is expected thati, in the future, a kernel may support multiple
+ * board. It is expected that, in the future, a kernel may support multiple
* boards.
*/
static int __init gef_sbc610_probe(void)
diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c
index f9a1615b74da..c0799fb26b6d 100644
--- a/arch/powerpc/platforms/book3s/vas-api.c
+++ b/arch/powerpc/platforms/book3s/vas-api.c
@@ -30,7 +30,7 @@
*
* where "vas_copy" and "vas_paste" are defined in copy-paste.h.
* copy/paste returns to the user space directly. So refer NX hardware
- * documententation for exact copy/paste usage and completion / error
+ * documentation for exact copy/paste usage and completion / error
* conditions.
*/
diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c
index 1c4c53bec66c..03512a41bd7e 100644
--- a/arch/powerpc/platforms/cell/cbe_regs.c
+++ b/arch/powerpc/platforms/cell/cbe_regs.c
@@ -23,7 +23,7 @@
* Current implementation uses "cpu" nodes. We build our own mapping
* array of cpu numbers to cpu nodes locally for now to allow interrupt
* time code to have a fast path rather than call of_get_cpu_node(). If
- * we implement cpu hotplug, we'll have to install an appropriate norifier
+ * we implement cpu hotplug, we'll have to install an appropriate notifier
* in order to release references to the cpu going away
*/
static struct cbe_regs_map
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 25e726bf0172..3f141cf5e580 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -582,7 +582,7 @@ static int cell_of_bus_notify(struct notifier_block *nb, unsigned long action,
{
struct device *dev = data;
- /* We are only intereted in device addition */
+ /* We are only interested in device addition */
if (action != BUS_NOTIFY_ADD_DEVICE)
return 0;
diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c
index a1c293f42a1f..3a2ea8376e32 100644
--- a/arch/powerpc/platforms/cell/spider-pci.c
+++ b/arch/powerpc/platforms/cell/spider-pci.c
@@ -81,7 +81,7 @@ static int __init spiderpci_pci_setup_chip(struct pci_controller *phb,
/*
* On CellBlade, we can't know that which XDR memory is used by
* kmalloc() to allocate dummy_page_va.
- * In order to imporve the performance, the XDR which is used to
+ * In order to improve the performance, the XDR which is used to
* allocate dummy_page_va is the nearest the spider-pci.
* We have to select the CBE which is the nearest the spider-pci
* to allocate memory from the best XDR, but I don't know that
diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c
index ddf8742f09a3..080ed2d2c682 100644
--- a/arch/powerpc/platforms/cell/spu_manage.c
+++ b/arch/powerpc/platforms/cell/spu_manage.c
@@ -457,7 +457,7 @@ static void __init init_affinity_node(int cbe)
/*
* Walk through each phandle in vicinity property of the spu
- * (tipically two vicinity phandles per spe node)
+ * (typically two vicinity phandles per spe node)
*/
for (i = 0; i < (lenp / sizeof(phandle)); i++) {
if (vic_handles[i] == avoid_ph)
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
index df89d916236d..9aded0188ce8 100644
--- a/arch/powerpc/platforms/powermac/low_i2c.c
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -1472,7 +1472,7 @@ int __init pmac_i2c_init(void)
smu_i2c_probe();
#endif
- /* Now add plaform functions for some known devices */
+ /* Now add platform functions for some known devices */
pmac_i2c_devscan(pmac_i2c_dev_create);
return 0;
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 89e22c460ebf..33f7b959c810 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -390,7 +390,7 @@ static struct eeh_dev *pnv_eeh_probe(struct pci_dev *pdev)
* should be blocked until PE reset. MMIO access is dropped
* by hardware certainly. In order to drop PCI config requests,
* one more flag (EEH_PE_CFG_RESTRICTED) is introduced, which
- * will be checked in the backend for PE state retrival. If
+ * will be checked in the backend for PE state retrieval. If
* the PE becomes frozen for the first time and the flag has
* been set for the PE, we will set EEH_PE_CFG_BLOCKED for
* that PE to block its config space.
@@ -981,7 +981,7 @@ static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option)
case EEH_RESET_FUNDAMENTAL:
/*
* Wait for Transaction Pending bit to clear. A word-aligned
- * test is used, so we use the conrol offset rather than status
+ * test is used, so we use the control offset rather than status
* and shift the test bit to match.
*/
pnv_eeh_wait_for_pending(pdn, "AF",
@@ -1048,7 +1048,7 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option)
* frozen state during PE reset. However, the good idea here from
* benh is to keep frozen state before we get PE reset done completely
* (until BAR restore). With the frozen state, HW drops illegal IO
- * or MMIO access, which can incur recrusive frozen PE during PE
+ * or MMIO access, which can incur recursive frozen PE during PE
* reset. The side effect is that EEH core has to clear the frozen
* state explicitly after BAR restore.
*/
@@ -1095,8 +1095,8 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option)
* bus is behind a hotplug slot and it will use the slot provided
* reset methods to prevent spurious hotplug events during the reset.
*
- * Fundemental resets need to be handled internally to EEH since the
- * PCI core doesn't really have a concept of a fundemental reset,
+ * Fundamental resets need to be handled internally to EEH since the
+ * PCI core doesn't really have a concept of a fundamental reset,
* mainly because there's no standard way to generate one. Only a
* few devices require an FRESET so it should be fine.
*/
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index a6677a111aca..6f94b808dd39 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -112,7 +112,7 @@ static int __init pnv_save_sprs_for_deep_states(void)
if (rc != 0)
return rc;
- /* Only p8 needs to set extra HID regiters */
+ /* Only p8 needs to set extra HID registers */
if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
uint64_t hid1_val = mfspr(SPRN_HID1);
uint64_t hid4_val = mfspr(SPRN_HID4);
@@ -1204,7 +1204,7 @@ static void __init pnv_arch300_idle_init(void)
* The idle code does not deal with TB loss occurring
* in a shallower state than SPR loss, so force it to
* behave like SPRs are lost if TB is lost. POWER9 would
- * never encouter this, but a POWER8 core would if it
+ * never encounter this, but a POWER8 core would if it
* implemented the stop instruction. So this is for forward
* compatibility.
*/
diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c
index 28b009b46464..27c936075031 100644
--- a/arch/powerpc/platforms/powernv/ocxl.c
+++ b/arch/powerpc/platforms/powernv/ocxl.c
@@ -289,7 +289,7 @@ int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count)
* be used by a function depends on how many functions exist
* on the device. The NPU needs to be configured to know how
* many bits are available to PASIDs and how many are to be
- * used by the function BDF indentifier.
+ * used by the function BDF identifier.
*
* We only support one AFU-carrying function for now.
*/
diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c
index 9d74d3950a52..c1bcd2d4826e 100644
--- a/arch/powerpc/platforms/powernv/opal-fadump.c
+++ b/arch/powerpc/platforms/powernv/opal-fadump.c
@@ -206,7 +206,7 @@ static u64 opal_fadump_init_mem_struct(struct fw_dump *fadump_conf)
opal_fdm->region_cnt = cpu_to_be16(reg_cnt);
/*
- * Kernel metadata is passed to f/w and retrieved in capture kerenl.
+ * Kernel metadata is passed to f/w and retrieved in capture kernel.
* So, use it to save fadump header address instead of calculating it.
*/
opal_fdm->fadumphdr_addr = cpu_to_be64(be64_to_cpu(opal_fdm->rgn[0].dest) +
diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c
index 5390c888db16..d129d6d45a50 100644
--- a/arch/powerpc/platforms/powernv/opal-lpc.c
+++ b/arch/powerpc/platforms/powernv/opal-lpc.c
@@ -197,7 +197,7 @@ static ssize_t lpc_debug_read(struct file *filp, char __user *ubuf,
/*
* Select access size based on count and alignment and
- * access type. IO and MEM only support byte acceses,
+ * access type. IO and MEM only support byte accesses,
* FW supports all 3.
*/
len = 1;
diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c
index 1e8e17df9ce8..a1754a28265d 100644
--- a/arch/powerpc/platforms/powernv/opal-memory-errors.c
+++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c
@@ -82,7 +82,7 @@ static DECLARE_WORK(mem_error_work, mem_error_handler);
/*
* opal_memory_err_event - notifier handler that queues up the opal message
- * to be preocessed later.
+ * to be processed later.
*/
static int opal_memory_err_event(struct notifier_block *nb,
unsigned long msg_type, void *msg)
diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
index 04155aaaadb1..fe3d111b881c 100644
--- a/arch/powerpc/platforms/powernv/pci-sriov.c
+++ b/arch/powerpc/platforms/powernv/pci-sriov.c
@@ -699,7 +699,7 @@ static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
return -ENOSPC;
}
- /* allocate a contigious block of PEs for our VFs */
+ /* allocate a contiguous block of PEs for our VFs */
base_pe = pnv_ioda_alloc_pe(phb, num_vfs);
if (!base_pe) {
pci_err(pdev, "Unable to allocate PEs for %d VFs\n", num_vfs);
diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
index 5ce924611b94..63ef61ed7597 100644
--- a/arch/powerpc/platforms/ps3/mm.c
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -364,7 +364,7 @@ static void __maybe_unused _dma_dump_region(const struct ps3_dma_region *r,
* @bus_addr: Starting ioc bus address of the area to map.
* @len: Length in bytes of the area to map.
* @link: A struct list_head used with struct ps3_dma_region.chunk_list, the
- * list of all chuncks owned by the region.
+ * list of all chunks owned by the region.
*
* This implementation uses a very simple dma page manager
* based on the dma_chunk structure. This scheme assumes
diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c
index b637bf292047..2502e9b17df4 100644
--- a/arch/powerpc/platforms/ps3/system-bus.c
+++ b/arch/powerpc/platforms/ps3/system-bus.c
@@ -601,7 +601,7 @@ static dma_addr_t ps3_ioc0_map_page(struct device *_dev, struct page *page,
iopte_flag |= CBE_IOPTE_PP_W | CBE_IOPTE_SO_RW;
break;
default:
- /* not happned */
+ /* not happened */
BUG();
}
result = ps3_dma_map(dev->d_region, (unsigned long)ptr, size,
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 09fafcf2d3a0..f0b2bca750da 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -510,7 +510,7 @@ static int pseries_eeh_set_option(struct eeh_pe *pe, int option)
int ret = 0;
/*
- * When we're enabling or disabling EEH functioality on
+ * When we're enabling or disabling EEH functionality on
* the particular PE, the PE config address is possibly
* unavailable. Therefore, we have to figure it out from
* the FDT node.
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 4d991cf840d9..7639e7355df2 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -1430,7 +1430,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
pci->table_group->tables[1] = newtbl;
- /* Keep default DMA window stuct if removed */
+ /* Keep default DMA window struct if removed */
if (default_win_removed) {
tbl->it_size = 0;
vfree(tbl->it_map);
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index f27735f623ba..27bed0dd866e 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -658,7 +658,7 @@ static resource_size_t pseries_get_iov_fw_value(struct pci_dev *dev, int resno,
*/
num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
if (resno >= num_res)
- return 0; /* or an errror */
+ return 0; /* or an error */
i = START_OF_ENTRIES + NEXT_ENTRY * resno;
switch (value) {
@@ -762,7 +762,7 @@ static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev)
if (!pdev->is_physfn)
return;
- /*Firmware must support open sriov otherwise dont configure*/
+ /*Firmware must support open sriov otherwise don't configure*/
indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
if (indexes)
of_pci_parse_iov_addrs(pdev, indexes);
diff --git a/arch/powerpc/platforms/pseries/vas-sysfs.c b/arch/powerpc/platforms/pseries/vas-sysfs.c
index ec65586cbeb3..241c84374045 100644
--- a/arch/powerpc/platforms/pseries/vas-sysfs.c
+++ b/arch/powerpc/platforms/pseries/vas-sysfs.c
@@ -76,7 +76,7 @@ struct vas_sysfs_entry {
* Create sysfs interface:
* /sys/devices/vas/vas0/gzip/default_capabilities
* This directory contains the following VAS GZIP capabilities
- * for the defaule credit type.
+ * for the default credit type.
* /sys/devices/vas/vas0/gzip/default_capabilities/nr_total_credits
* Total number of default credits assigned to the LPAR which
* can be changed with DLPAR operation.
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
index ec643bbdb67f..500a1fc4a1d7 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -801,7 +801,7 @@ int vas_reconfig_capabilties(u8 type, int new_nr_creds)
atomic_set(&caps->nr_total_credits, new_nr_creds);
/*
* The total number of available credits may be decreased or
- * inceased with DLPAR operation. Means some windows have to be
+ * increased with DLPAR operation. Means some windows have to be
* closed / reopened. Hold the vas_pseries_mutex so that the
* the user space can not open new windows.
*/
diff --git a/arch/powerpc/sysdev/fsl_lbc.c b/arch/powerpc/sysdev/fsl_lbc.c
index 1985e067e952..18acfb4e82af 100644
--- a/arch/powerpc/sysdev/fsl_lbc.c
+++ b/arch/powerpc/sysdev/fsl_lbc.c
@@ -37,7 +37,7 @@ EXPORT_SYMBOL(fsl_lbc_ctrl_dev);
*
* This function converts a base address of lbc into the right format for the
* BR register. If the SOC has eLBC then it returns 32bit physical address
- * else it convers a 34bit local bus physical address to correct format of
+ * else it converts a 34bit local bus physical address to correct format of
* 32bit address for BR register (Example: MPC8641).
*/
u32 fsl_lbc_addr(phys_addr_t addr_base)
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index ef49ead8bf2e..3c430a6a6a4e 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -218,7 +218,7 @@ static void setup_pci_atmu(struct pci_controller *hose)
* windows have implemented the default target value as 0xf
* for CCSR space.In all Freescale legacy devices the target
* of 0xf is reserved for local memory space. 9132 Rev1.0
- * now has local mempry space mapped to target 0x0 instead of
+ * now has local memory space mapped to target 0x0 instead of
* 0xf. Hence adding a workaround to remove the target 0xf
* defined for memory space from Inbound window attributes.
*/
diff --git a/arch/powerpc/sysdev/ge/ge_pic.c b/arch/powerpc/sysdev/ge/ge_pic.c
index 02553a8ce191..413b375c4d28 100644
--- a/arch/powerpc/sysdev/ge/ge_pic.c
+++ b/arch/powerpc/sysdev/ge/ge_pic.c
@@ -150,7 +150,7 @@ static struct irq_chip gef_pic_chip = {
};
-/* When an interrupt is being configured, this call allows some flexibilty
+/* When an interrupt is being configured, this call allows some flexibility
* in deciding which irq_chip structure is used
*/
static int gef_pic_host_map(struct irq_domain *h, unsigned int virq,
diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c
index 36ec0bdd8b63..a25413826b63 100644
--- a/arch/powerpc/sysdev/mpic_msgr.c
+++ b/arch/powerpc/sysdev/mpic_msgr.c
@@ -99,7 +99,7 @@ void mpic_msgr_disable(struct mpic_msgr *msgr)
EXPORT_SYMBOL_GPL(mpic_msgr_disable);
/* The following three functions are used to compute the order and number of
- * the message register blocks. They are clearly very inefficent. However,
+ * the message register blocks. They are clearly very inefficient. However,
* they are called *only* a few times during device initialization.
*/
static unsigned int mpic_msgr_number_of_blocks(void)
diff --git a/arch/powerpc/sysdev/mpic_msi.c b/arch/powerpc/sysdev/mpic_msi.c
index f412d6ad0b66..9936c014ac7d 100644
--- a/arch/powerpc/sysdev/mpic_msi.c
+++ b/arch/powerpc/sysdev/mpic_msi.c
@@ -37,7 +37,7 @@ static int __init mpic_msi_reserve_u3_hwirqs(struct mpic *mpic)
/* Reserve source numbers we know are reserved in the HW.
*
* This is a bit of a mix of U3 and U4 reserves but that's going
- * to work fine, we have plenty enugh numbers left so let's just
+ * to work fine, we have plenty enough numbers left so let's just
* mark anything we don't like reserved.
*/
for (i = 0; i < 8; i++)
diff --git a/arch/powerpc/sysdev/mpic_timer.c b/arch/powerpc/sysdev/mpic_timer.c
index 444e9ce42d0a..b2f0a73e8f93 100644
--- a/arch/powerpc/sysdev/mpic_timer.c
+++ b/arch/powerpc/sysdev/mpic_timer.c
@@ -255,7 +255,7 @@ EXPORT_SYMBOL(mpic_start_timer);
/**
* mpic_stop_timer - stop hardware timer
- * @handle: the timer to be stoped
+ * @handle: the timer to be stopped
*
* The timer periodically generates an interrupt. Unless user stops the timer.
*/
diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c
index 3f4841dfefb5..73d129594078 100644
--- a/arch/powerpc/sysdev/mpic_u3msi.c
+++ b/arch/powerpc/sysdev/mpic_u3msi.c
@@ -78,7 +78,7 @@ static u64 find_u4_magic_addr(struct pci_dev *pdev, unsigned int hwirq)
/* U4 PCIe MSIs need to write to the special register in
* the bridge that generates interrupts. There should be
- * theorically a register at 0xf8005000 where you just write
+ * theoretically a register at 0xf8005000 where you just write
* the MSI number and that triggers the right interrupt, but
* unfortunately, this is busted in HW, the bridge endian swaps
* the value and hits the wrong nibble in the register.
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index f940428ad13f..45f72fc715fc 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -617,7 +617,7 @@ bool __init xive_native_init(void)
xive_tima_os = r.start;
- /* Grab size of provisionning pages */
+ /* Grab size of provisioning pages */
xive_parse_provisioning(np);
/* Switch the XIVE to exploitation mode */
diff --git a/arch/powerpc/xmon/ppc-opc.c b/arch/powerpc/xmon/ppc-opc.c
index dfb80810b16c..0774d711453e 100644
--- a/arch/powerpc/xmon/ppc-opc.c
+++ b/arch/powerpc/xmon/ppc-opc.c
@@ -408,7 +408,7 @@ const struct powerpc_operand powerpc_operands[] =
#define FXM4 FXM + 1
{ 0xff, 12, insert_fxm, extract_fxm,
PPC_OPERAND_OPTIONAL | PPC_OPERAND_OPTIONAL_VALUE},
- /* If the FXM4 operand is ommitted, use the sentinel value -1. */
+ /* If the FXM4 operand is omitted, use the sentinel value -1. */
{ -1, -1, NULL, NULL, 0},
/* The IMM20 field in an LI instruction. */
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index fd72753e8ad5..27da7d5c2024 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2024,7 +2024,7 @@ static void dump_206_sprs(void)
if (!cpu_has_feature(CPU_FTR_ARCH_206))
return;
- /* Actually some of these pre-date 2.06, but whatevs */
+ /* Actually some of these pre-date 2.06, but whatever */
printf("srr0 = %.16lx srr1 = %.16lx dsisr = %.8lx\n",
mfspr(SPRN_SRR0), mfspr(SPRN_SRR1), mfspr(SPRN_DSISR));
The quilt patch titled
Subject: dma/pool: do not complain if DMA pool is not allocated
has been removed from the -mm tree. Its filename was
dma-pool-do-not-complain-if-dma-pool-is-not-allocated.patch
This patch was dropped because an alternative patch was or shall be merged
------------------------------------------------------
From: Michal Hocko <mhocko(a)suse.com>
Subject: dma/pool: do not complain if DMA pool is not allocated
Date: Tue, 9 Aug 2022 17:37:59 +0200
We have a system complaining about order-10 allocation for the DMA pool.
[ 14.017417][ T1] swapper/0: page allocation failure: order:10, mode:0xcc1(GFP_KERNEL|GFP_DMA), nodemask=(null),cpuset=/,mems_allowed=0-7
[ 14.017429][ T1] CPU: 4 PID: 1 Comm: swapper/0 Not tainted 5.14.21-150400.22-default #1 SLE15-SP4 0b6a6578ade2de5c4a0b916095dff44f76ef1704
[ 14.017434][ T1] Hardware name: XXXX
[ 14.017437][ T1] Call Trace:
[ 14.017444][ T1] <TASK>
[ 14.017449][ T1] dump_stack_lvl+0x45/0x57
[ 14.017469][ T1] warn_alloc+0xfe/0x160
[ 14.017490][ T1] __alloc_pages_slowpath.constprop.112+0xc27/0xc60
[ 14.017497][ T1] ? rdinit_setup+0x2b/0x2b
[ 14.017509][ T1] ? rdinit_setup+0x2b/0x2b
[ 14.017512][ T1] __alloc_pages+0x2d5/0x320
[ 14.017517][ T1] alloc_page_interleave+0xf/0x70
[ 14.017531][ T1] atomic_pool_expand+0x4a/0x200
[ 14.017541][ T1] ? rdinit_setup+0x2b/0x2b
[ 14.017544][ T1] __dma_atomic_pool_init+0x44/0x90
[ 14.017556][ T1] dma_atomic_pool_init+0xad/0x13f
[ 14.017560][ T1] ? __dma_atomic_pool_init+0x90/0x90
[ 14.017562][ T1] do_one_initcall+0x41/0x200
[ 14.017581][ T1] kernel_init_freeable+0x236/0x298
[ 14.017589][ T1] ? rest_init+0xd0/0xd0
[ 14.017596][ T1] kernel_init+0x16/0x120
[ 14.017599][ T1] ret_from_fork+0x22/0x30
[ 14.017604][ T1] </TASK>
[...]
[ 14.018026][ T1] Node 0 DMA free:160kB boost:0kB min:0kB low:0kB high:0kB reserved_highatomic:0KB active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB writepending:0kB present:15996kB managed:15360kB mlocked:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB
[ 14.018035][ T1] lowmem_reserve[]: 0 0 0 0 0
[ 14.018339][ T1] Node 0 DMA: 0*4kB 0*8kB 0*16kB 1*32kB (U) 0*64kB 1*128kB (U) 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB = 160kB
The usable memory in the DMA zone is obviously too small for the pool
pre-allocation. The allocation failure raises concern by admins because
this is considered an error state.
In fact the preallocation itself doesn't expose any actual problem. It is
not even clear whether anybody is ever going to use this pool. If yes
then a warning will be triggered anyway.
Silence the warning to prevent confusion and bug reports.
Link: https://lkml.kernel.org/r/YvJ/V2bor9Q3P6ov@dhcp22.suse.cz
Signed-off-by: Michal Hocko <mhocko(a)suse.com>
Reviewed-by: Baoquan He <bhe(a)redhat.com>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: John Donnelly <john.p.donnelly(a)oracle.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/dma/pool.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/kernel/dma/pool.c~dma-pool-do-not-complain-if-dma-pool-is-not-allocated
+++ a/kernel/dma/pool.c
@@ -205,7 +205,7 @@ static int __init dma_atomic_pool_init(v
ret = -ENOMEM;
if (has_managed_dma()) {
atomic_pool_dma = __dma_atomic_pool_init(atomic_pool_size,
- GFP_KERNEL | GFP_DMA);
+ GFP_KERNEL | GFP_DMA | __GFP_NOWARN);
if (!atomic_pool_dma)
ret = -ENOMEM;
}
_
Patches currently in -mm which might be from mhocko(a)suse.com are
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f323ef3a0d49e147365284bc1f02212e617b7f09 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo(a)netfilter.org>
Date: Mon, 8 Aug 2022 19:30:07 +0200
Subject: [PATCH] netfilter: nf_tables: disallow jump to implicit chain from
set element
Extend struct nft_data_desc to add a flag field that specifies
nft_data_init() is being called for set element data.
Use it to disallow jump to implicit chain from set element, only jump
to chain via immediate expression is allowed.
Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING")
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 1554f1e7215b..99aae36c04b9 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -221,10 +221,15 @@ struct nft_ctx {
bool report;
};
+enum nft_data_desc_flags {
+ NFT_DATA_DESC_SETELEM = (1 << 0),
+};
+
struct nft_data_desc {
enum nft_data_types type;
unsigned int size;
unsigned int len;
+ unsigned int flags;
};
int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 05896765c68f..460b0925ea60 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5226,6 +5226,7 @@ static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set,
desc->type = dtype;
desc->size = NFT_DATA_VALUE_MAXLEN;
desc->len = set->dlen;
+ desc->flags = NFT_DATA_DESC_SETELEM;
return nft_data_init(ctx, data, desc, attr);
}
@@ -9665,6 +9666,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
return PTR_ERR(chain);
if (nft_is_base_chain(chain))
return -EOPNOTSUPP;
+ if (desc->flags & NFT_DATA_DESC_SETELEM &&
+ chain->flags & NFT_CHAIN_BINDING)
+ return -EINVAL;
chain->use++;
data->verdict.chain = chain;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f323ef3a0d49e147365284bc1f02212e617b7f09 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo(a)netfilter.org>
Date: Mon, 8 Aug 2022 19:30:07 +0200
Subject: [PATCH] netfilter: nf_tables: disallow jump to implicit chain from
set element
Extend struct nft_data_desc to add a flag field that specifies
nft_data_init() is being called for set element data.
Use it to disallow jump to implicit chain from set element, only jump
to chain via immediate expression is allowed.
Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING")
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 1554f1e7215b..99aae36c04b9 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -221,10 +221,15 @@ struct nft_ctx {
bool report;
};
+enum nft_data_desc_flags {
+ NFT_DATA_DESC_SETELEM = (1 << 0),
+};
+
struct nft_data_desc {
enum nft_data_types type;
unsigned int size;
unsigned int len;
+ unsigned int flags;
};
int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 05896765c68f..460b0925ea60 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5226,6 +5226,7 @@ static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set,
desc->type = dtype;
desc->size = NFT_DATA_VALUE_MAXLEN;
desc->len = set->dlen;
+ desc->flags = NFT_DATA_DESC_SETELEM;
return nft_data_init(ctx, data, desc, attr);
}
@@ -9665,6 +9666,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
return PTR_ERR(chain);
if (nft_is_base_chain(chain))
return -EOPNOTSUPP;
+ if (desc->flags & NFT_DATA_DESC_SETELEM &&
+ chain->flags & NFT_CHAIN_BINDING)
+ return -EINVAL;
chain->use++;
data->verdict.chain = chain;
The patch below does not apply to the 5.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f323ef3a0d49e147365284bc1f02212e617b7f09 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo(a)netfilter.org>
Date: Mon, 8 Aug 2022 19:30:07 +0200
Subject: [PATCH] netfilter: nf_tables: disallow jump to implicit chain from
set element
Extend struct nft_data_desc to add a flag field that specifies
nft_data_init() is being called for set element data.
Use it to disallow jump to implicit chain from set element, only jump
to chain via immediate expression is allowed.
Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING")
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 1554f1e7215b..99aae36c04b9 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -221,10 +221,15 @@ struct nft_ctx {
bool report;
};
+enum nft_data_desc_flags {
+ NFT_DATA_DESC_SETELEM = (1 << 0),
+};
+
struct nft_data_desc {
enum nft_data_types type;
unsigned int size;
unsigned int len;
+ unsigned int flags;
};
int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 05896765c68f..460b0925ea60 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5226,6 +5226,7 @@ static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set,
desc->type = dtype;
desc->size = NFT_DATA_VALUE_MAXLEN;
desc->len = set->dlen;
+ desc->flags = NFT_DATA_DESC_SETELEM;
return nft_data_init(ctx, data, desc, attr);
}
@@ -9665,6 +9666,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
return PTR_ERR(chain);
if (nft_is_base_chain(chain))
return -EOPNOTSUPP;
+ if (desc->flags & NFT_DATA_DESC_SETELEM &&
+ chain->flags & NFT_CHAIN_BINDING)
+ return -EINVAL;
chain->use++;
data->verdict.chain = chain;
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 470ee20e069a6d05ae549f7d0ef2bdbcee6a81b2 Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo(a)canonical.com>
Date: Tue, 9 Aug 2022 14:01:46 -0300
Subject: [PATCH] netfilter: nf_tables: do not allow SET_ID to refer to another
table
When doing lookups for sets on the same batch by using its ID, a set from a
different table can be used.
Then, when the table is removed, a reference to the set may be kept after
the set is freed, leading to a potential use-after-free.
When looking for sets by ID, use the table that was used for the lookup by
name, and only return sets belonging to that same table.
This fixes CVE-2022-2586, also reported as ZDI-CAN-17470.
Reported-by: Team Orca of Sea Security (@seasecresponse)
Fixes: 958bee14d071 ("netfilter: nf_tables: use new transaction infrastructure to handle sets")
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo(a)canonical.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3b09e13b9b5c..41c529b0001c 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3842,6 +3842,7 @@ static struct nft_set *nft_set_lookup_byhandle(const struct nft_table *table,
}
static struct nft_set *nft_set_lookup_byid(const struct net *net,
+ const struct nft_table *table,
const struct nlattr *nla, u8 genmask)
{
struct nftables_pernet *nft_net = nft_pernet(net);
@@ -3853,6 +3854,7 @@ static struct nft_set *nft_set_lookup_byid(const struct net *net,
struct nft_set *set = nft_trans_set(trans);
if (id == nft_trans_set_id(trans) &&
+ set->table == table &&
nft_active_genmask(set, genmask))
return set;
}
@@ -3873,7 +3875,7 @@ struct nft_set *nft_set_lookup_global(const struct net *net,
if (!nla_set_id)
return set;
- set = nft_set_lookup_byid(net, nla_set_id, genmask);
+ set = nft_set_lookup_byid(net, table, nla_set_id, genmask);
}
return set;
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 470ee20e069a6d05ae549f7d0ef2bdbcee6a81b2 Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo(a)canonical.com>
Date: Tue, 9 Aug 2022 14:01:46 -0300
Subject: [PATCH] netfilter: nf_tables: do not allow SET_ID to refer to another
table
When doing lookups for sets on the same batch by using its ID, a set from a
different table can be used.
Then, when the table is removed, a reference to the set may be kept after
the set is freed, leading to a potential use-after-free.
When looking for sets by ID, use the table that was used for the lookup by
name, and only return sets belonging to that same table.
This fixes CVE-2022-2586, also reported as ZDI-CAN-17470.
Reported-by: Team Orca of Sea Security (@seasecresponse)
Fixes: 958bee14d071 ("netfilter: nf_tables: use new transaction infrastructure to handle sets")
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo(a)canonical.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3b09e13b9b5c..41c529b0001c 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3842,6 +3842,7 @@ static struct nft_set *nft_set_lookup_byhandle(const struct nft_table *table,
}
static struct nft_set *nft_set_lookup_byid(const struct net *net,
+ const struct nft_table *table,
const struct nlattr *nla, u8 genmask)
{
struct nftables_pernet *nft_net = nft_pernet(net);
@@ -3853,6 +3854,7 @@ static struct nft_set *nft_set_lookup_byid(const struct net *net,
struct nft_set *set = nft_trans_set(trans);
if (id == nft_trans_set_id(trans) &&
+ set->table == table &&
nft_active_genmask(set, genmask))
return set;
}
@@ -3873,7 +3875,7 @@ struct nft_set *nft_set_lookup_global(const struct net *net,
if (!nla_set_id)
return set;
- set = nft_set_lookup_byid(net, nla_set_id, genmask);
+ set = nft_set_lookup_byid(net, table, nla_set_id, genmask);
}
return set;
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8affe37c525d800a2628c4ecfaed13b77dc5634a Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik(a)pengutronix.de>
Date: Mon, 4 Jul 2022 16:18:12 +0200
Subject: [PATCH] usb: dwc3: gadget: fix high speed multiplier setting
For High-Speed Transfers the prepare_one_trb function is calculating the
multiplier setting for the trb based on the length parameter of the trb
currently prepared. This assumption is wrong. For trbs with a sg list,
the length of the actual request has to be taken instead.
Fixes: 40d829fb2ec6 ("usb: dwc3: gadget: Correct ISOC DATA PIDs for short packets")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Michael Grzeschik <m.grzeschik(a)pengutronix.de>
Link: https://lore.kernel.org/r/20220704141812.1532306-3-m.grzeschik@pengutronix.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index dcd8fc209ccd..4366c45c28cf 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1265,10 +1265,10 @@ static void dwc3_prepare_one_trb(struct dwc3_ep *dep,
unsigned int mult = 2;
unsigned int maxp = usb_endpoint_maxp(ep->desc);
- if (trb_length <= (2 * maxp))
+ if (req->request.length <= (2 * maxp))
mult--;
- if (trb_length <= maxp)
+ if (req->request.length <= maxp)
mult--;
trb->size |= DWC3_TRB_SIZE_PCM1(mult);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8affe37c525d800a2628c4ecfaed13b77dc5634a Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik(a)pengutronix.de>
Date: Mon, 4 Jul 2022 16:18:12 +0200
Subject: [PATCH] usb: dwc3: gadget: fix high speed multiplier setting
For High-Speed Transfers the prepare_one_trb function is calculating the
multiplier setting for the trb based on the length parameter of the trb
currently prepared. This assumption is wrong. For trbs with a sg list,
the length of the actual request has to be taken instead.
Fixes: 40d829fb2ec6 ("usb: dwc3: gadget: Correct ISOC DATA PIDs for short packets")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Michael Grzeschik <m.grzeschik(a)pengutronix.de>
Link: https://lore.kernel.org/r/20220704141812.1532306-3-m.grzeschik@pengutronix.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index dcd8fc209ccd..4366c45c28cf 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1265,10 +1265,10 @@ static void dwc3_prepare_one_trb(struct dwc3_ep *dep,
unsigned int mult = 2;
unsigned int maxp = usb_endpoint_maxp(ep->desc);
- if (trb_length <= (2 * maxp))
+ if (req->request.length <= (2 * maxp))
mult--;
- if (trb_length <= maxp)
+ if (req->request.length <= maxp)
mult--;
trb->size |= DWC3_TRB_SIZE_PCM1(mult);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8affe37c525d800a2628c4ecfaed13b77dc5634a Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik(a)pengutronix.de>
Date: Mon, 4 Jul 2022 16:18:12 +0200
Subject: [PATCH] usb: dwc3: gadget: fix high speed multiplier setting
For High-Speed Transfers the prepare_one_trb function is calculating the
multiplier setting for the trb based on the length parameter of the trb
currently prepared. This assumption is wrong. For trbs with a sg list,
the length of the actual request has to be taken instead.
Fixes: 40d829fb2ec6 ("usb: dwc3: gadget: Correct ISOC DATA PIDs for short packets")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Michael Grzeschik <m.grzeschik(a)pengutronix.de>
Link: https://lore.kernel.org/r/20220704141812.1532306-3-m.grzeschik@pengutronix.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index dcd8fc209ccd..4366c45c28cf 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1265,10 +1265,10 @@ static void dwc3_prepare_one_trb(struct dwc3_ep *dep,
unsigned int mult = 2;
unsigned int maxp = usb_endpoint_maxp(ep->desc);
- if (trb_length <= (2 * maxp))
+ if (req->request.length <= (2 * maxp))
mult--;
- if (trb_length <= maxp)
+ if (req->request.length <= maxp)
mult--;
trb->size |= DWC3_TRB_SIZE_PCM1(mult);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8affe37c525d800a2628c4ecfaed13b77dc5634a Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik(a)pengutronix.de>
Date: Mon, 4 Jul 2022 16:18:12 +0200
Subject: [PATCH] usb: dwc3: gadget: fix high speed multiplier setting
For High-Speed Transfers the prepare_one_trb function is calculating the
multiplier setting for the trb based on the length parameter of the trb
currently prepared. This assumption is wrong. For trbs with a sg list,
the length of the actual request has to be taken instead.
Fixes: 40d829fb2ec6 ("usb: dwc3: gadget: Correct ISOC DATA PIDs for short packets")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Michael Grzeschik <m.grzeschik(a)pengutronix.de>
Link: https://lore.kernel.org/r/20220704141812.1532306-3-m.grzeschik@pengutronix.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index dcd8fc209ccd..4366c45c28cf 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1265,10 +1265,10 @@ static void dwc3_prepare_one_trb(struct dwc3_ep *dep,
unsigned int mult = 2;
unsigned int maxp = usb_endpoint_maxp(ep->desc);
- if (trb_length <= (2 * maxp))
+ if (req->request.length <= (2 * maxp))
mult--;
- if (trb_length <= maxp)
+ if (req->request.length <= maxp)
mult--;
trb->size |= DWC3_TRB_SIZE_PCM1(mult);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 23385cec5f354794dadced7f28c31da7ae3eb54c Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik(a)pengutronix.de>
Date: Mon, 4 Jul 2022 16:18:11 +0200
Subject: [PATCH] usb: dwc3: gadget: refactor dwc3_repare_one_trb
The function __dwc3_prepare_one_trb has many parameters. Since it is
only used in dwc3_prepare_one_trb there is no point in keeping the
function. We merge both functions and get rid of the big list of
parameters.
Fixes: 40d829fb2ec6 ("usb: dwc3: gadget: Correct ISOC DATA PIDs for short packets")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Michael Grzeschik <m.grzeschik(a)pengutronix.de>
Link: https://lore.kernel.org/r/20220704141812.1532306-2-m.grzeschik@pengutronix.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index a944c7a6c83a..dcd8fc209ccd 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1183,17 +1183,49 @@ static u32 dwc3_calc_trbs_left(struct dwc3_ep *dep)
return trbs_left;
}
-static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb,
- dma_addr_t dma, unsigned int length, unsigned int chain,
- unsigned int node, unsigned int stream_id,
- unsigned int short_not_ok, unsigned int no_interrupt,
- unsigned int is_last, bool must_interrupt)
+/**
+ * dwc3_prepare_one_trb - setup one TRB from one request
+ * @dep: endpoint for which this request is prepared
+ * @req: dwc3_request pointer
+ * @trb_length: buffer size of the TRB
+ * @chain: should this TRB be chained to the next?
+ * @node: only for isochronous endpoints. First TRB needs different type.
+ * @use_bounce_buffer: set to use bounce buffer
+ * @must_interrupt: set to interrupt on TRB completion
+ */
+static void dwc3_prepare_one_trb(struct dwc3_ep *dep,
+ struct dwc3_request *req, unsigned int trb_length,
+ unsigned int chain, unsigned int node, bool use_bounce_buffer,
+ bool must_interrupt)
{
+ struct dwc3_trb *trb;
+ dma_addr_t dma;
+ unsigned int stream_id = req->request.stream_id;
+ unsigned int short_not_ok = req->request.short_not_ok;
+ unsigned int no_interrupt = req->request.no_interrupt;
+ unsigned int is_last = req->request.is_last;
struct dwc3 *dwc = dep->dwc;
struct usb_gadget *gadget = dwc->gadget;
enum usb_device_speed speed = gadget->speed;
- trb->size = DWC3_TRB_SIZE_LENGTH(length);
+ if (use_bounce_buffer)
+ dma = dep->dwc->bounce_addr;
+ else if (req->request.num_sgs > 0)
+ dma = sg_dma_address(req->start_sg);
+ else
+ dma = req->request.dma;
+
+ trb = &dep->trb_pool[dep->trb_enqueue];
+
+ if (!req->trb) {
+ dwc3_gadget_move_started_request(req);
+ req->trb = trb;
+ req->trb_dma = dwc3_trb_dma_offset(dep, trb);
+ }
+
+ req->num_trbs++;
+
+ trb->size = DWC3_TRB_SIZE_LENGTH(trb_length);
trb->bpl = lower_32_bits(dma);
trb->bph = upper_32_bits(dma);
@@ -1233,10 +1265,10 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb,
unsigned int mult = 2;
unsigned int maxp = usb_endpoint_maxp(ep->desc);
- if (length <= (2 * maxp))
+ if (trb_length <= (2 * maxp))
mult--;
- if (length <= maxp)
+ if (trb_length <= maxp)
mult--;
trb->size |= DWC3_TRB_SIZE_PCM1(mult);
@@ -1310,50 +1342,6 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb,
trace_dwc3_prepare_trb(dep, trb);
}
-/**
- * dwc3_prepare_one_trb - setup one TRB from one request
- * @dep: endpoint for which this request is prepared
- * @req: dwc3_request pointer
- * @trb_length: buffer size of the TRB
- * @chain: should this TRB be chained to the next?
- * @node: only for isochronous endpoints. First TRB needs different type.
- * @use_bounce_buffer: set to use bounce buffer
- * @must_interrupt: set to interrupt on TRB completion
- */
-static void dwc3_prepare_one_trb(struct dwc3_ep *dep,
- struct dwc3_request *req, unsigned int trb_length,
- unsigned int chain, unsigned int node, bool use_bounce_buffer,
- bool must_interrupt)
-{
- struct dwc3_trb *trb;
- dma_addr_t dma;
- unsigned int stream_id = req->request.stream_id;
- unsigned int short_not_ok = req->request.short_not_ok;
- unsigned int no_interrupt = req->request.no_interrupt;
- unsigned int is_last = req->request.is_last;
-
- if (use_bounce_buffer)
- dma = dep->dwc->bounce_addr;
- else if (req->request.num_sgs > 0)
- dma = sg_dma_address(req->start_sg);
- else
- dma = req->request.dma;
-
- trb = &dep->trb_pool[dep->trb_enqueue];
-
- if (!req->trb) {
- dwc3_gadget_move_started_request(req);
- req->trb = trb;
- req->trb_dma = dwc3_trb_dma_offset(dep, trb);
- }
-
- req->num_trbs++;
-
- __dwc3_prepare_one_trb(dep, trb, dma, trb_length, chain, node,
- stream_id, short_not_ok, no_interrupt, is_last,
- must_interrupt);
-}
-
static bool dwc3_needs_extra_trb(struct dwc3_ep *dep, struct dwc3_request *req)
{
unsigned int maxp = usb_endpoint_maxp(dep->endpoint.desc);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 23385cec5f354794dadced7f28c31da7ae3eb54c Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik(a)pengutronix.de>
Date: Mon, 4 Jul 2022 16:18:11 +0200
Subject: [PATCH] usb: dwc3: gadget: refactor dwc3_repare_one_trb
The function __dwc3_prepare_one_trb has many parameters. Since it is
only used in dwc3_prepare_one_trb there is no point in keeping the
function. We merge both functions and get rid of the big list of
parameters.
Fixes: 40d829fb2ec6 ("usb: dwc3: gadget: Correct ISOC DATA PIDs for short packets")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Michael Grzeschik <m.grzeschik(a)pengutronix.de>
Link: https://lore.kernel.org/r/20220704141812.1532306-2-m.grzeschik@pengutronix.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index a944c7a6c83a..dcd8fc209ccd 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1183,17 +1183,49 @@ static u32 dwc3_calc_trbs_left(struct dwc3_ep *dep)
return trbs_left;
}
-static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb,
- dma_addr_t dma, unsigned int length, unsigned int chain,
- unsigned int node, unsigned int stream_id,
- unsigned int short_not_ok, unsigned int no_interrupt,
- unsigned int is_last, bool must_interrupt)
+/**
+ * dwc3_prepare_one_trb - setup one TRB from one request
+ * @dep: endpoint for which this request is prepared
+ * @req: dwc3_request pointer
+ * @trb_length: buffer size of the TRB
+ * @chain: should this TRB be chained to the next?
+ * @node: only for isochronous endpoints. First TRB needs different type.
+ * @use_bounce_buffer: set to use bounce buffer
+ * @must_interrupt: set to interrupt on TRB completion
+ */
+static void dwc3_prepare_one_trb(struct dwc3_ep *dep,
+ struct dwc3_request *req, unsigned int trb_length,
+ unsigned int chain, unsigned int node, bool use_bounce_buffer,
+ bool must_interrupt)
{
+ struct dwc3_trb *trb;
+ dma_addr_t dma;
+ unsigned int stream_id = req->request.stream_id;
+ unsigned int short_not_ok = req->request.short_not_ok;
+ unsigned int no_interrupt = req->request.no_interrupt;
+ unsigned int is_last = req->request.is_last;
struct dwc3 *dwc = dep->dwc;
struct usb_gadget *gadget = dwc->gadget;
enum usb_device_speed speed = gadget->speed;
- trb->size = DWC3_TRB_SIZE_LENGTH(length);
+ if (use_bounce_buffer)
+ dma = dep->dwc->bounce_addr;
+ else if (req->request.num_sgs > 0)
+ dma = sg_dma_address(req->start_sg);
+ else
+ dma = req->request.dma;
+
+ trb = &dep->trb_pool[dep->trb_enqueue];
+
+ if (!req->trb) {
+ dwc3_gadget_move_started_request(req);
+ req->trb = trb;
+ req->trb_dma = dwc3_trb_dma_offset(dep, trb);
+ }
+
+ req->num_trbs++;
+
+ trb->size = DWC3_TRB_SIZE_LENGTH(trb_length);
trb->bpl = lower_32_bits(dma);
trb->bph = upper_32_bits(dma);
@@ -1233,10 +1265,10 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb,
unsigned int mult = 2;
unsigned int maxp = usb_endpoint_maxp(ep->desc);
- if (length <= (2 * maxp))
+ if (trb_length <= (2 * maxp))
mult--;
- if (length <= maxp)
+ if (trb_length <= maxp)
mult--;
trb->size |= DWC3_TRB_SIZE_PCM1(mult);
@@ -1310,50 +1342,6 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb,
trace_dwc3_prepare_trb(dep, trb);
}
-/**
- * dwc3_prepare_one_trb - setup one TRB from one request
- * @dep: endpoint for which this request is prepared
- * @req: dwc3_request pointer
- * @trb_length: buffer size of the TRB
- * @chain: should this TRB be chained to the next?
- * @node: only for isochronous endpoints. First TRB needs different type.
- * @use_bounce_buffer: set to use bounce buffer
- * @must_interrupt: set to interrupt on TRB completion
- */
-static void dwc3_prepare_one_trb(struct dwc3_ep *dep,
- struct dwc3_request *req, unsigned int trb_length,
- unsigned int chain, unsigned int node, bool use_bounce_buffer,
- bool must_interrupt)
-{
- struct dwc3_trb *trb;
- dma_addr_t dma;
- unsigned int stream_id = req->request.stream_id;
- unsigned int short_not_ok = req->request.short_not_ok;
- unsigned int no_interrupt = req->request.no_interrupt;
- unsigned int is_last = req->request.is_last;
-
- if (use_bounce_buffer)
- dma = dep->dwc->bounce_addr;
- else if (req->request.num_sgs > 0)
- dma = sg_dma_address(req->start_sg);
- else
- dma = req->request.dma;
-
- trb = &dep->trb_pool[dep->trb_enqueue];
-
- if (!req->trb) {
- dwc3_gadget_move_started_request(req);
- req->trb = trb;
- req->trb_dma = dwc3_trb_dma_offset(dep, trb);
- }
-
- req->num_trbs++;
-
- __dwc3_prepare_one_trb(dep, trb, dma, trb_length, chain, node,
- stream_id, short_not_ok, no_interrupt, is_last,
- must_interrupt);
-}
-
static bool dwc3_needs_extra_trb(struct dwc3_ep *dep, struct dwc3_request *req)
{
unsigned int maxp = usb_endpoint_maxp(dep->endpoint.desc);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 23385cec5f354794dadced7f28c31da7ae3eb54c Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik(a)pengutronix.de>
Date: Mon, 4 Jul 2022 16:18:11 +0200
Subject: [PATCH] usb: dwc3: gadget: refactor dwc3_repare_one_trb
The function __dwc3_prepare_one_trb has many parameters. Since it is
only used in dwc3_prepare_one_trb there is no point in keeping the
function. We merge both functions and get rid of the big list of
parameters.
Fixes: 40d829fb2ec6 ("usb: dwc3: gadget: Correct ISOC DATA PIDs for short packets")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Michael Grzeschik <m.grzeschik(a)pengutronix.de>
Link: https://lore.kernel.org/r/20220704141812.1532306-2-m.grzeschik@pengutronix.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index a944c7a6c83a..dcd8fc209ccd 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1183,17 +1183,49 @@ static u32 dwc3_calc_trbs_left(struct dwc3_ep *dep)
return trbs_left;
}
-static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb,
- dma_addr_t dma, unsigned int length, unsigned int chain,
- unsigned int node, unsigned int stream_id,
- unsigned int short_not_ok, unsigned int no_interrupt,
- unsigned int is_last, bool must_interrupt)
+/**
+ * dwc3_prepare_one_trb - setup one TRB from one request
+ * @dep: endpoint for which this request is prepared
+ * @req: dwc3_request pointer
+ * @trb_length: buffer size of the TRB
+ * @chain: should this TRB be chained to the next?
+ * @node: only for isochronous endpoints. First TRB needs different type.
+ * @use_bounce_buffer: set to use bounce buffer
+ * @must_interrupt: set to interrupt on TRB completion
+ */
+static void dwc3_prepare_one_trb(struct dwc3_ep *dep,
+ struct dwc3_request *req, unsigned int trb_length,
+ unsigned int chain, unsigned int node, bool use_bounce_buffer,
+ bool must_interrupt)
{
+ struct dwc3_trb *trb;
+ dma_addr_t dma;
+ unsigned int stream_id = req->request.stream_id;
+ unsigned int short_not_ok = req->request.short_not_ok;
+ unsigned int no_interrupt = req->request.no_interrupt;
+ unsigned int is_last = req->request.is_last;
struct dwc3 *dwc = dep->dwc;
struct usb_gadget *gadget = dwc->gadget;
enum usb_device_speed speed = gadget->speed;
- trb->size = DWC3_TRB_SIZE_LENGTH(length);
+ if (use_bounce_buffer)
+ dma = dep->dwc->bounce_addr;
+ else if (req->request.num_sgs > 0)
+ dma = sg_dma_address(req->start_sg);
+ else
+ dma = req->request.dma;
+
+ trb = &dep->trb_pool[dep->trb_enqueue];
+
+ if (!req->trb) {
+ dwc3_gadget_move_started_request(req);
+ req->trb = trb;
+ req->trb_dma = dwc3_trb_dma_offset(dep, trb);
+ }
+
+ req->num_trbs++;
+
+ trb->size = DWC3_TRB_SIZE_LENGTH(trb_length);
trb->bpl = lower_32_bits(dma);
trb->bph = upper_32_bits(dma);
@@ -1233,10 +1265,10 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb,
unsigned int mult = 2;
unsigned int maxp = usb_endpoint_maxp(ep->desc);
- if (length <= (2 * maxp))
+ if (trb_length <= (2 * maxp))
mult--;
- if (length <= maxp)
+ if (trb_length <= maxp)
mult--;
trb->size |= DWC3_TRB_SIZE_PCM1(mult);
@@ -1310,50 +1342,6 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb,
trace_dwc3_prepare_trb(dep, trb);
}
-/**
- * dwc3_prepare_one_trb - setup one TRB from one request
- * @dep: endpoint for which this request is prepared
- * @req: dwc3_request pointer
- * @trb_length: buffer size of the TRB
- * @chain: should this TRB be chained to the next?
- * @node: only for isochronous endpoints. First TRB needs different type.
- * @use_bounce_buffer: set to use bounce buffer
- * @must_interrupt: set to interrupt on TRB completion
- */
-static void dwc3_prepare_one_trb(struct dwc3_ep *dep,
- struct dwc3_request *req, unsigned int trb_length,
- unsigned int chain, unsigned int node, bool use_bounce_buffer,
- bool must_interrupt)
-{
- struct dwc3_trb *trb;
- dma_addr_t dma;
- unsigned int stream_id = req->request.stream_id;
- unsigned int short_not_ok = req->request.short_not_ok;
- unsigned int no_interrupt = req->request.no_interrupt;
- unsigned int is_last = req->request.is_last;
-
- if (use_bounce_buffer)
- dma = dep->dwc->bounce_addr;
- else if (req->request.num_sgs > 0)
- dma = sg_dma_address(req->start_sg);
- else
- dma = req->request.dma;
-
- trb = &dep->trb_pool[dep->trb_enqueue];
-
- if (!req->trb) {
- dwc3_gadget_move_started_request(req);
- req->trb = trb;
- req->trb_dma = dwc3_trb_dma_offset(dep, trb);
- }
-
- req->num_trbs++;
-
- __dwc3_prepare_one_trb(dep, trb, dma, trb_length, chain, node,
- stream_id, short_not_ok, no_interrupt, is_last,
- must_interrupt);
-}
-
static bool dwc3_needs_extra_trb(struct dwc3_ep *dep, struct dwc3_request *req)
{
unsigned int maxp = usb_endpoint_maxp(dep->endpoint.desc);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 23385cec5f354794dadced7f28c31da7ae3eb54c Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik(a)pengutronix.de>
Date: Mon, 4 Jul 2022 16:18:11 +0200
Subject: [PATCH] usb: dwc3: gadget: refactor dwc3_repare_one_trb
The function __dwc3_prepare_one_trb has many parameters. Since it is
only used in dwc3_prepare_one_trb there is no point in keeping the
function. We merge both functions and get rid of the big list of
parameters.
Fixes: 40d829fb2ec6 ("usb: dwc3: gadget: Correct ISOC DATA PIDs for short packets")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Michael Grzeschik <m.grzeschik(a)pengutronix.de>
Link: https://lore.kernel.org/r/20220704141812.1532306-2-m.grzeschik@pengutronix.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index a944c7a6c83a..dcd8fc209ccd 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1183,17 +1183,49 @@ static u32 dwc3_calc_trbs_left(struct dwc3_ep *dep)
return trbs_left;
}
-static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb,
- dma_addr_t dma, unsigned int length, unsigned int chain,
- unsigned int node, unsigned int stream_id,
- unsigned int short_not_ok, unsigned int no_interrupt,
- unsigned int is_last, bool must_interrupt)
+/**
+ * dwc3_prepare_one_trb - setup one TRB from one request
+ * @dep: endpoint for which this request is prepared
+ * @req: dwc3_request pointer
+ * @trb_length: buffer size of the TRB
+ * @chain: should this TRB be chained to the next?
+ * @node: only for isochronous endpoints. First TRB needs different type.
+ * @use_bounce_buffer: set to use bounce buffer
+ * @must_interrupt: set to interrupt on TRB completion
+ */
+static void dwc3_prepare_one_trb(struct dwc3_ep *dep,
+ struct dwc3_request *req, unsigned int trb_length,
+ unsigned int chain, unsigned int node, bool use_bounce_buffer,
+ bool must_interrupt)
{
+ struct dwc3_trb *trb;
+ dma_addr_t dma;
+ unsigned int stream_id = req->request.stream_id;
+ unsigned int short_not_ok = req->request.short_not_ok;
+ unsigned int no_interrupt = req->request.no_interrupt;
+ unsigned int is_last = req->request.is_last;
struct dwc3 *dwc = dep->dwc;
struct usb_gadget *gadget = dwc->gadget;
enum usb_device_speed speed = gadget->speed;
- trb->size = DWC3_TRB_SIZE_LENGTH(length);
+ if (use_bounce_buffer)
+ dma = dep->dwc->bounce_addr;
+ else if (req->request.num_sgs > 0)
+ dma = sg_dma_address(req->start_sg);
+ else
+ dma = req->request.dma;
+
+ trb = &dep->trb_pool[dep->trb_enqueue];
+
+ if (!req->trb) {
+ dwc3_gadget_move_started_request(req);
+ req->trb = trb;
+ req->trb_dma = dwc3_trb_dma_offset(dep, trb);
+ }
+
+ req->num_trbs++;
+
+ trb->size = DWC3_TRB_SIZE_LENGTH(trb_length);
trb->bpl = lower_32_bits(dma);
trb->bph = upper_32_bits(dma);
@@ -1233,10 +1265,10 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb,
unsigned int mult = 2;
unsigned int maxp = usb_endpoint_maxp(ep->desc);
- if (length <= (2 * maxp))
+ if (trb_length <= (2 * maxp))
mult--;
- if (length <= maxp)
+ if (trb_length <= maxp)
mult--;
trb->size |= DWC3_TRB_SIZE_PCM1(mult);
@@ -1310,50 +1342,6 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb,
trace_dwc3_prepare_trb(dep, trb);
}
-/**
- * dwc3_prepare_one_trb - setup one TRB from one request
- * @dep: endpoint for which this request is prepared
- * @req: dwc3_request pointer
- * @trb_length: buffer size of the TRB
- * @chain: should this TRB be chained to the next?
- * @node: only for isochronous endpoints. First TRB needs different type.
- * @use_bounce_buffer: set to use bounce buffer
- * @must_interrupt: set to interrupt on TRB completion
- */
-static void dwc3_prepare_one_trb(struct dwc3_ep *dep,
- struct dwc3_request *req, unsigned int trb_length,
- unsigned int chain, unsigned int node, bool use_bounce_buffer,
- bool must_interrupt)
-{
- struct dwc3_trb *trb;
- dma_addr_t dma;
- unsigned int stream_id = req->request.stream_id;
- unsigned int short_not_ok = req->request.short_not_ok;
- unsigned int no_interrupt = req->request.no_interrupt;
- unsigned int is_last = req->request.is_last;
-
- if (use_bounce_buffer)
- dma = dep->dwc->bounce_addr;
- else if (req->request.num_sgs > 0)
- dma = sg_dma_address(req->start_sg);
- else
- dma = req->request.dma;
-
- trb = &dep->trb_pool[dep->trb_enqueue];
-
- if (!req->trb) {
- dwc3_gadget_move_started_request(req);
- req->trb = trb;
- req->trb_dma = dwc3_trb_dma_offset(dep, trb);
- }
-
- req->num_trbs++;
-
- __dwc3_prepare_one_trb(dep, trb, dma, trb_length, chain, node,
- stream_id, short_not_ok, no_interrupt, is_last,
- must_interrupt);
-}
-
static bool dwc3_needs_extra_trb(struct dwc3_ep *dep, struct dwc3_request *req)
{
unsigned int maxp = usb_endpoint_maxp(dep->endpoint.desc);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From dd8de84b57b02ba9c1fe530a6d916c0853f136bd Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy(a)csgroup.eu>
Date: Tue, 28 Jun 2022 16:43:35 +0200
Subject: [PATCH] powerpc/ptdump: Fix display of RW pages on FSL_BOOK3E
On FSL_BOOK3E, _PAGE_RW is defined with two bits, one for user and one
for supervisor. As soon as one of the two bits is set, the page has
to be display as RW. But the way it is implemented today requires both
bits to be set in order to display it as RW.
Instead of display RW when _PAGE_RW bits are set and R otherwise,
reverse the logic and display R when _PAGE_RW bits are all 0 and
RW otherwise.
This change has no impact on other platforms as _PAGE_RW is a single
bit on all of them.
Fixes: 8eb07b187000 ("powerpc/mm: Dump linux pagetables")
Cc: stable(a)vger.kernel.org
Signed-off-by: Christophe Leroy <christophe.leroy(a)csgroup.eu>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/0c33b96317811edf691e81698aaee8fa45ec3449.16564273…
diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c
index 03607ab90c66..f884760ca5cf 100644
--- a/arch/powerpc/mm/ptdump/shared.c
+++ b/arch/powerpc/mm/ptdump/shared.c
@@ -17,9 +17,9 @@ static const struct flag_info flag_array[] = {
.clear = " ",
}, {
.mask = _PAGE_RW,
- .val = _PAGE_RW,
- .set = "rw",
- .clear = "r ",
+ .val = 0,
+ .set = "r ",
+ .clear = "rw",
}, {
.mask = _PAGE_EXEC,
.val = _PAGE_EXEC,
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3dc96bba65f53daa217f0a8f43edad145286a8f5 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack(a)suse.cz>
Date: Tue, 12 Jul 2022 12:54:21 +0200
Subject: [PATCH] mbcache: add functions to delete entry if unused
Add function mb_cache_entry_delete_or_get() to delete mbcache entry if
it is unused and also add a function to wait for entry to become unused
- mb_cache_entry_wait_unused(). We do not share code between the two
deleting function as one of them will go away soon.
CC: stable(a)vger.kernel.org
Fixes: 82939d7999df ("ext4: convert to mbcache2")
Signed-off-by: Jan Kara <jack(a)suse.cz>
Link: https://lore.kernel.org/r/20220712105436.32204-2-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/mbcache.c b/fs/mbcache.c
index cfc28129fb6f..2010bc80a3f2 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -11,7 +11,7 @@
/*
* Mbcache is a simple key-value store. Keys need not be unique, however
* key-value pairs are expected to be unique (we use this fact in
- * mb_cache_entry_delete()).
+ * mb_cache_entry_delete_or_get()).
*
* Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
* Ext4 also uses it for deduplication of xattr values stored in inodes.
@@ -125,6 +125,19 @@ void __mb_cache_entry_free(struct mb_cache_entry *entry)
}
EXPORT_SYMBOL(__mb_cache_entry_free);
+/*
+ * mb_cache_entry_wait_unused - wait to be the last user of the entry
+ *
+ * @entry - entry to work on
+ *
+ * Wait to be the last user of the entry.
+ */
+void mb_cache_entry_wait_unused(struct mb_cache_entry *entry)
+{
+ wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 3);
+}
+EXPORT_SYMBOL(mb_cache_entry_wait_unused);
+
static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
struct mb_cache_entry *entry,
u32 key)
@@ -217,7 +230,7 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
}
EXPORT_SYMBOL(mb_cache_entry_get);
-/* mb_cache_entry_delete - remove a cache entry
+/* mb_cache_entry_delete - try to remove a cache entry
* @cache - cache we work with
* @key - key
* @value - value
@@ -254,6 +267,55 @@ void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value)
}
EXPORT_SYMBOL(mb_cache_entry_delete);
+/* mb_cache_entry_delete_or_get - remove a cache entry if it has no users
+ * @cache - cache we work with
+ * @key - key
+ * @value - value
+ *
+ * Remove entry from cache @cache with key @key and value @value. The removal
+ * happens only if the entry is unused. The function returns NULL in case the
+ * entry was successfully removed or there's no entry in cache. Otherwise the
+ * function grabs reference of the entry that we failed to delete because it
+ * still has users and return it.
+ */
+struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache,
+ u32 key, u64 value)
+{
+ struct hlist_bl_node *node;
+ struct hlist_bl_head *head;
+ struct mb_cache_entry *entry;
+
+ head = mb_cache_entry_head(cache, key);
+ hlist_bl_lock(head);
+ hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
+ if (entry->e_key == key && entry->e_value == value) {
+ if (atomic_read(&entry->e_refcnt) > 2) {
+ atomic_inc(&entry->e_refcnt);
+ hlist_bl_unlock(head);
+ return entry;
+ }
+ /* We keep hash list reference to keep entry alive */
+ hlist_bl_del_init(&entry->e_hash_list);
+ hlist_bl_unlock(head);
+ spin_lock(&cache->c_list_lock);
+ if (!list_empty(&entry->e_list)) {
+ list_del_init(&entry->e_list);
+ if (!WARN_ONCE(cache->c_entry_count == 0,
+ "mbcache: attempt to decrement c_entry_count past zero"))
+ cache->c_entry_count--;
+ atomic_dec(&entry->e_refcnt);
+ }
+ spin_unlock(&cache->c_list_lock);
+ mb_cache_entry_put(cache, entry);
+ return NULL;
+ }
+ }
+ hlist_bl_unlock(head);
+
+ return NULL;
+}
+EXPORT_SYMBOL(mb_cache_entry_delete_or_get);
+
/* mb_cache_entry_touch - cache entry got used
* @cache - cache the entry belongs to
* @entry - entry that got used
diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
index 20f1e3ff6013..8eca7f25c432 100644
--- a/include/linux/mbcache.h
+++ b/include/linux/mbcache.h
@@ -30,15 +30,23 @@ void mb_cache_destroy(struct mb_cache *cache);
int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
u64 value, bool reusable);
void __mb_cache_entry_free(struct mb_cache_entry *entry);
+void mb_cache_entry_wait_unused(struct mb_cache_entry *entry);
static inline int mb_cache_entry_put(struct mb_cache *cache,
struct mb_cache_entry *entry)
{
- if (!atomic_dec_and_test(&entry->e_refcnt))
+ unsigned int cnt = atomic_dec_return(&entry->e_refcnt);
+
+ if (cnt > 0) {
+ if (cnt <= 3)
+ wake_up_var(&entry->e_refcnt);
return 0;
+ }
__mb_cache_entry_free(entry);
return 1;
}
+struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache,
+ u32 key, u64 value);
void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value);
struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
u64 value);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a7209541239e5dd44d981289e5f9059222d40fd1 Mon Sep 17 00:00:00 2001
From: Narendra Hadke <nhadke(a)marvell.com>
Date: Tue, 26 Jul 2022 11:12:21 +0200
Subject: [PATCH] serial: mvebu-uart: uart2 error bits clearing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
For mvebu uart2, error bits are not cleared on buffer read.
This causes interrupt loop and system hang.
Cc: stable(a)vger.kernel.org
Reviewed-by: Yi Guo <yi.guo(a)cavium.com>
Reviewed-by: Nadav Haklai <nadavh(a)marvell.com>
Signed-off-by: Narendra Hadke <nhadke(a)marvell.com>
Signed-off-by: Pali Rohár <pali(a)kernel.org>
Link: https://lore.kernel.org/r/20220726091221.12358-1-pali@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c
index 0429c2a54290..ff61a8d00014 100644
--- a/drivers/tty/serial/mvebu-uart.c
+++ b/drivers/tty/serial/mvebu-uart.c
@@ -265,6 +265,7 @@ static void mvebu_uart_rx_chars(struct uart_port *port, unsigned int status)
struct tty_port *tport = &port->state->port;
unsigned char ch = 0;
char flag = 0;
+ int ret;
do {
if (status & STAT_RX_RDY(port)) {
@@ -277,6 +278,16 @@ static void mvebu_uart_rx_chars(struct uart_port *port, unsigned int status)
port->icount.parity++;
}
+ /*
+ * For UART2, error bits are not cleared on buffer read.
+ * This causes interrupt loop and system hang.
+ */
+ if (IS_EXTENDED(port) && (status & STAT_BRK_ERR)) {
+ ret = readl(port->membase + UART_STAT);
+ ret |= STAT_BRK_ERR;
+ writel(ret, port->membase + UART_STAT);
+ }
+
if (status & STAT_BRK_DET) {
port->icount.brk++;
status &= ~(STAT_FRM_ERR | STAT_PAR_ERR);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a7209541239e5dd44d981289e5f9059222d40fd1 Mon Sep 17 00:00:00 2001
From: Narendra Hadke <nhadke(a)marvell.com>
Date: Tue, 26 Jul 2022 11:12:21 +0200
Subject: [PATCH] serial: mvebu-uart: uart2 error bits clearing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
For mvebu uart2, error bits are not cleared on buffer read.
This causes interrupt loop and system hang.
Cc: stable(a)vger.kernel.org
Reviewed-by: Yi Guo <yi.guo(a)cavium.com>
Reviewed-by: Nadav Haklai <nadavh(a)marvell.com>
Signed-off-by: Narendra Hadke <nhadke(a)marvell.com>
Signed-off-by: Pali Rohár <pali(a)kernel.org>
Link: https://lore.kernel.org/r/20220726091221.12358-1-pali@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c
index 0429c2a54290..ff61a8d00014 100644
--- a/drivers/tty/serial/mvebu-uart.c
+++ b/drivers/tty/serial/mvebu-uart.c
@@ -265,6 +265,7 @@ static void mvebu_uart_rx_chars(struct uart_port *port, unsigned int status)
struct tty_port *tport = &port->state->port;
unsigned char ch = 0;
char flag = 0;
+ int ret;
do {
if (status & STAT_RX_RDY(port)) {
@@ -277,6 +278,16 @@ static void mvebu_uart_rx_chars(struct uart_port *port, unsigned int status)
port->icount.parity++;
}
+ /*
+ * For UART2, error bits are not cleared on buffer read.
+ * This causes interrupt loop and system hang.
+ */
+ if (IS_EXTENDED(port) && (status & STAT_BRK_ERR)) {
+ ret = readl(port->membase + UART_STAT);
+ ret |= STAT_BRK_ERR;
+ writel(ret, port->membase + UART_STAT);
+ }
+
if (status & STAT_BRK_DET) {
port->icount.brk++;
status &= ~(STAT_FRM_ERR | STAT_PAR_ERR);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3dc96bba65f53daa217f0a8f43edad145286a8f5 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack(a)suse.cz>
Date: Tue, 12 Jul 2022 12:54:21 +0200
Subject: [PATCH] mbcache: add functions to delete entry if unused
Add function mb_cache_entry_delete_or_get() to delete mbcache entry if
it is unused and also add a function to wait for entry to become unused
- mb_cache_entry_wait_unused(). We do not share code between the two
deleting function as one of them will go away soon.
CC: stable(a)vger.kernel.org
Fixes: 82939d7999df ("ext4: convert to mbcache2")
Signed-off-by: Jan Kara <jack(a)suse.cz>
Link: https://lore.kernel.org/r/20220712105436.32204-2-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/mbcache.c b/fs/mbcache.c
index cfc28129fb6f..2010bc80a3f2 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -11,7 +11,7 @@
/*
* Mbcache is a simple key-value store. Keys need not be unique, however
* key-value pairs are expected to be unique (we use this fact in
- * mb_cache_entry_delete()).
+ * mb_cache_entry_delete_or_get()).
*
* Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
* Ext4 also uses it for deduplication of xattr values stored in inodes.
@@ -125,6 +125,19 @@ void __mb_cache_entry_free(struct mb_cache_entry *entry)
}
EXPORT_SYMBOL(__mb_cache_entry_free);
+/*
+ * mb_cache_entry_wait_unused - wait to be the last user of the entry
+ *
+ * @entry - entry to work on
+ *
+ * Wait to be the last user of the entry.
+ */
+void mb_cache_entry_wait_unused(struct mb_cache_entry *entry)
+{
+ wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 3);
+}
+EXPORT_SYMBOL(mb_cache_entry_wait_unused);
+
static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
struct mb_cache_entry *entry,
u32 key)
@@ -217,7 +230,7 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
}
EXPORT_SYMBOL(mb_cache_entry_get);
-/* mb_cache_entry_delete - remove a cache entry
+/* mb_cache_entry_delete - try to remove a cache entry
* @cache - cache we work with
* @key - key
* @value - value
@@ -254,6 +267,55 @@ void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value)
}
EXPORT_SYMBOL(mb_cache_entry_delete);
+/* mb_cache_entry_delete_or_get - remove a cache entry if it has no users
+ * @cache - cache we work with
+ * @key - key
+ * @value - value
+ *
+ * Remove entry from cache @cache with key @key and value @value. The removal
+ * happens only if the entry is unused. The function returns NULL in case the
+ * entry was successfully removed or there's no entry in cache. Otherwise the
+ * function grabs reference of the entry that we failed to delete because it
+ * still has users and return it.
+ */
+struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache,
+ u32 key, u64 value)
+{
+ struct hlist_bl_node *node;
+ struct hlist_bl_head *head;
+ struct mb_cache_entry *entry;
+
+ head = mb_cache_entry_head(cache, key);
+ hlist_bl_lock(head);
+ hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
+ if (entry->e_key == key && entry->e_value == value) {
+ if (atomic_read(&entry->e_refcnt) > 2) {
+ atomic_inc(&entry->e_refcnt);
+ hlist_bl_unlock(head);
+ return entry;
+ }
+ /* We keep hash list reference to keep entry alive */
+ hlist_bl_del_init(&entry->e_hash_list);
+ hlist_bl_unlock(head);
+ spin_lock(&cache->c_list_lock);
+ if (!list_empty(&entry->e_list)) {
+ list_del_init(&entry->e_list);
+ if (!WARN_ONCE(cache->c_entry_count == 0,
+ "mbcache: attempt to decrement c_entry_count past zero"))
+ cache->c_entry_count--;
+ atomic_dec(&entry->e_refcnt);
+ }
+ spin_unlock(&cache->c_list_lock);
+ mb_cache_entry_put(cache, entry);
+ return NULL;
+ }
+ }
+ hlist_bl_unlock(head);
+
+ return NULL;
+}
+EXPORT_SYMBOL(mb_cache_entry_delete_or_get);
+
/* mb_cache_entry_touch - cache entry got used
* @cache - cache the entry belongs to
* @entry - entry that got used
diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
index 20f1e3ff6013..8eca7f25c432 100644
--- a/include/linux/mbcache.h
+++ b/include/linux/mbcache.h
@@ -30,15 +30,23 @@ void mb_cache_destroy(struct mb_cache *cache);
int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
u64 value, bool reusable);
void __mb_cache_entry_free(struct mb_cache_entry *entry);
+void mb_cache_entry_wait_unused(struct mb_cache_entry *entry);
static inline int mb_cache_entry_put(struct mb_cache *cache,
struct mb_cache_entry *entry)
{
- if (!atomic_dec_and_test(&entry->e_refcnt))
+ unsigned int cnt = atomic_dec_return(&entry->e_refcnt);
+
+ if (cnt > 0) {
+ if (cnt <= 3)
+ wake_up_var(&entry->e_refcnt);
return 0;
+ }
__mb_cache_entry_free(entry);
return 1;
}
+struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache,
+ u32 key, u64 value);
void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value);
struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
u64 value);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e151db8ecfb019b7da31d076130a794574c89f6f Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka(a)redhat.com>
Date: Sun, 24 Jul 2022 14:26:12 -0400
Subject: [PATCH] md-raid: destroy the bitmap after destroying the thread
When we ran the lvm test "shell/integrity-blocksize-3.sh" on a kernel with
kasan, we got failure in write_page.
The reason for the failure is that md_bitmap_destroy is called before
destroying the thread and the thread may be waiting in the function
write_page for the bio to complete. When the thread finishes waiting, it
executes "if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))", which
triggers the kasan warning.
Note that the commit 48df498daf62 that caused this bug claims that it is
neede for md-cluster, you should check md-cluster and possibly find
another bugfix for it.
BUG: KASAN: use-after-free in write_page+0x18d/0x680 [md_mod]
Read of size 8 at addr ffff889162030c78 by task mdX_raid1/5539
CPU: 10 PID: 5539 Comm: mdX_raid1 Not tainted 5.19.0-rc2 #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
Call Trace:
<TASK>
dump_stack_lvl+0x34/0x44
print_report.cold+0x45/0x57a
? __lock_text_start+0x18/0x18
? write_page+0x18d/0x680 [md_mod]
kasan_report+0xa8/0xe0
? write_page+0x18d/0x680 [md_mod]
kasan_check_range+0x13f/0x180
write_page+0x18d/0x680 [md_mod]
? super_sync+0x4d5/0x560 [dm_raid]
? md_bitmap_file_kick+0xa0/0xa0 [md_mod]
? rs_set_dev_and_array_sectors+0x2e0/0x2e0 [dm_raid]
? mutex_trylock+0x120/0x120
? preempt_count_add+0x6b/0xc0
? preempt_count_sub+0xf/0xc0
md_update_sb+0x707/0xe40 [md_mod]
md_reap_sync_thread+0x1b2/0x4a0 [md_mod]
md_check_recovery+0x533/0x960 [md_mod]
raid1d+0xc8/0x2a20 [raid1]
? var_wake_function+0xe0/0xe0
? psi_group_change+0x411/0x500
? preempt_count_sub+0xf/0xc0
? _raw_spin_lock_irqsave+0x78/0xc0
? __lock_text_start+0x18/0x18
? raid1_end_read_request+0x2a0/0x2a0 [raid1]
? preempt_count_sub+0xf/0xc0
? _raw_spin_unlock_irqrestore+0x19/0x40
? del_timer_sync+0xa9/0x100
? try_to_del_timer_sync+0xc0/0xc0
? _raw_spin_lock_irqsave+0x78/0xc0
? __lock_text_start+0x18/0x18
? __list_del_entry_valid+0x68/0xa0
? finish_wait+0xa3/0x100
md_thread+0x161/0x260 [md_mod]
? unregister_md_personality+0xa0/0xa0 [md_mod]
? _raw_spin_lock_irqsave+0x78/0xc0
? prepare_to_wait_event+0x2c0/0x2c0
? unregister_md_personality+0xa0/0xa0 [md_mod]
kthread+0x148/0x180
? kthread_complete_and_exit+0x20/0x20
ret_from_fork+0x1f/0x30
</TASK>
Allocated by task 5522:
kasan_save_stack+0x1e/0x40
__kasan_kmalloc+0x80/0xa0
md_bitmap_create+0xa8/0xe80 [md_mod]
md_run+0x777/0x1300 [md_mod]
raid_ctr+0x249c/0x4a30 [dm_raid]
dm_table_add_target+0x2b0/0x620 [dm_mod]
table_load+0x1c8/0x400 [dm_mod]
ctl_ioctl+0x29e/0x560 [dm_mod]
dm_compat_ctl_ioctl+0x7/0x20 [dm_mod]
__do_compat_sys_ioctl+0xfa/0x160
do_syscall_64+0x90/0xc0
entry_SYSCALL_64_after_hwframe+0x46/0xb0
Freed by task 5680:
kasan_save_stack+0x1e/0x40
kasan_set_track+0x21/0x40
kasan_set_free_info+0x20/0x40
__kasan_slab_free+0xf7/0x140
kfree+0x80/0x240
md_bitmap_free+0x1c3/0x280 [md_mod]
__md_stop+0x21/0x120 [md_mod]
md_stop+0x9/0x40 [md_mod]
raid_dtr+0x1b/0x40 [dm_raid]
dm_table_destroy+0x98/0x1e0 [dm_mod]
__dm_destroy+0x199/0x360 [dm_mod]
dev_remove+0x10c/0x160 [dm_mod]
ctl_ioctl+0x29e/0x560 [dm_mod]
dm_compat_ctl_ioctl+0x7/0x20 [dm_mod]
__do_compat_sys_ioctl+0xfa/0x160
do_syscall_64+0x90/0xc0
entry_SYSCALL_64_after_hwframe+0x46/0xb0
Signed-off-by: Mikulas Patocka <mpatocka(a)redhat.com>
Cc: stable(a)vger.kernel.org
Fixes: 48df498daf62 ("md: move bitmap_destroy to the beginning of __md_stop")
Signed-off-by: Song Liu <song(a)kernel.org>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 6e82df21623d..35b895813c88 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6238,11 +6238,11 @@ static void mddev_detach(struct mddev *mddev)
static void __md_stop(struct mddev *mddev)
{
struct md_personality *pers = mddev->pers;
- md_bitmap_destroy(mddev);
mddev_detach(mddev);
/* Ensure ->event_work is done */
if (mddev->event_work.func)
flush_workqueue(md_misc_wq);
+ md_bitmap_destroy(mddev);
spin_lock(&mddev->lock);
mddev->pers = NULL;
spin_unlock(&mddev->lock);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e151db8ecfb019b7da31d076130a794574c89f6f Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka(a)redhat.com>
Date: Sun, 24 Jul 2022 14:26:12 -0400
Subject: [PATCH] md-raid: destroy the bitmap after destroying the thread
When we ran the lvm test "shell/integrity-blocksize-3.sh" on a kernel with
kasan, we got failure in write_page.
The reason for the failure is that md_bitmap_destroy is called before
destroying the thread and the thread may be waiting in the function
write_page for the bio to complete. When the thread finishes waiting, it
executes "if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))", which
triggers the kasan warning.
Note that the commit 48df498daf62 that caused this bug claims that it is
neede for md-cluster, you should check md-cluster and possibly find
another bugfix for it.
BUG: KASAN: use-after-free in write_page+0x18d/0x680 [md_mod]
Read of size 8 at addr ffff889162030c78 by task mdX_raid1/5539
CPU: 10 PID: 5539 Comm: mdX_raid1 Not tainted 5.19.0-rc2 #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
Call Trace:
<TASK>
dump_stack_lvl+0x34/0x44
print_report.cold+0x45/0x57a
? __lock_text_start+0x18/0x18
? write_page+0x18d/0x680 [md_mod]
kasan_report+0xa8/0xe0
? write_page+0x18d/0x680 [md_mod]
kasan_check_range+0x13f/0x180
write_page+0x18d/0x680 [md_mod]
? super_sync+0x4d5/0x560 [dm_raid]
? md_bitmap_file_kick+0xa0/0xa0 [md_mod]
? rs_set_dev_and_array_sectors+0x2e0/0x2e0 [dm_raid]
? mutex_trylock+0x120/0x120
? preempt_count_add+0x6b/0xc0
? preempt_count_sub+0xf/0xc0
md_update_sb+0x707/0xe40 [md_mod]
md_reap_sync_thread+0x1b2/0x4a0 [md_mod]
md_check_recovery+0x533/0x960 [md_mod]
raid1d+0xc8/0x2a20 [raid1]
? var_wake_function+0xe0/0xe0
? psi_group_change+0x411/0x500
? preempt_count_sub+0xf/0xc0
? _raw_spin_lock_irqsave+0x78/0xc0
? __lock_text_start+0x18/0x18
? raid1_end_read_request+0x2a0/0x2a0 [raid1]
? preempt_count_sub+0xf/0xc0
? _raw_spin_unlock_irqrestore+0x19/0x40
? del_timer_sync+0xa9/0x100
? try_to_del_timer_sync+0xc0/0xc0
? _raw_spin_lock_irqsave+0x78/0xc0
? __lock_text_start+0x18/0x18
? __list_del_entry_valid+0x68/0xa0
? finish_wait+0xa3/0x100
md_thread+0x161/0x260 [md_mod]
? unregister_md_personality+0xa0/0xa0 [md_mod]
? _raw_spin_lock_irqsave+0x78/0xc0
? prepare_to_wait_event+0x2c0/0x2c0
? unregister_md_personality+0xa0/0xa0 [md_mod]
kthread+0x148/0x180
? kthread_complete_and_exit+0x20/0x20
ret_from_fork+0x1f/0x30
</TASK>
Allocated by task 5522:
kasan_save_stack+0x1e/0x40
__kasan_kmalloc+0x80/0xa0
md_bitmap_create+0xa8/0xe80 [md_mod]
md_run+0x777/0x1300 [md_mod]
raid_ctr+0x249c/0x4a30 [dm_raid]
dm_table_add_target+0x2b0/0x620 [dm_mod]
table_load+0x1c8/0x400 [dm_mod]
ctl_ioctl+0x29e/0x560 [dm_mod]
dm_compat_ctl_ioctl+0x7/0x20 [dm_mod]
__do_compat_sys_ioctl+0xfa/0x160
do_syscall_64+0x90/0xc0
entry_SYSCALL_64_after_hwframe+0x46/0xb0
Freed by task 5680:
kasan_save_stack+0x1e/0x40
kasan_set_track+0x21/0x40
kasan_set_free_info+0x20/0x40
__kasan_slab_free+0xf7/0x140
kfree+0x80/0x240
md_bitmap_free+0x1c3/0x280 [md_mod]
__md_stop+0x21/0x120 [md_mod]
md_stop+0x9/0x40 [md_mod]
raid_dtr+0x1b/0x40 [dm_raid]
dm_table_destroy+0x98/0x1e0 [dm_mod]
__dm_destroy+0x199/0x360 [dm_mod]
dev_remove+0x10c/0x160 [dm_mod]
ctl_ioctl+0x29e/0x560 [dm_mod]
dm_compat_ctl_ioctl+0x7/0x20 [dm_mod]
__do_compat_sys_ioctl+0xfa/0x160
do_syscall_64+0x90/0xc0
entry_SYSCALL_64_after_hwframe+0x46/0xb0
Signed-off-by: Mikulas Patocka <mpatocka(a)redhat.com>
Cc: stable(a)vger.kernel.org
Fixes: 48df498daf62 ("md: move bitmap_destroy to the beginning of __md_stop")
Signed-off-by: Song Liu <song(a)kernel.org>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 6e82df21623d..35b895813c88 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6238,11 +6238,11 @@ static void mddev_detach(struct mddev *mddev)
static void __md_stop(struct mddev *mddev)
{
struct md_personality *pers = mddev->pers;
- md_bitmap_destroy(mddev);
mddev_detach(mddev);
/* Ensure ->event_work is done */
if (mddev->event_work.func)
flush_workqueue(md_misc_wq);
+ md_bitmap_destroy(mddev);
spin_lock(&mddev->lock);
mddev->pers = NULL;
spin_unlock(&mddev->lock);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e151db8ecfb019b7da31d076130a794574c89f6f Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka(a)redhat.com>
Date: Sun, 24 Jul 2022 14:26:12 -0400
Subject: [PATCH] md-raid: destroy the bitmap after destroying the thread
When we ran the lvm test "shell/integrity-blocksize-3.sh" on a kernel with
kasan, we got failure in write_page.
The reason for the failure is that md_bitmap_destroy is called before
destroying the thread and the thread may be waiting in the function
write_page for the bio to complete. When the thread finishes waiting, it
executes "if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))", which
triggers the kasan warning.
Note that the commit 48df498daf62 that caused this bug claims that it is
neede for md-cluster, you should check md-cluster and possibly find
another bugfix for it.
BUG: KASAN: use-after-free in write_page+0x18d/0x680 [md_mod]
Read of size 8 at addr ffff889162030c78 by task mdX_raid1/5539
CPU: 10 PID: 5539 Comm: mdX_raid1 Not tainted 5.19.0-rc2 #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
Call Trace:
<TASK>
dump_stack_lvl+0x34/0x44
print_report.cold+0x45/0x57a
? __lock_text_start+0x18/0x18
? write_page+0x18d/0x680 [md_mod]
kasan_report+0xa8/0xe0
? write_page+0x18d/0x680 [md_mod]
kasan_check_range+0x13f/0x180
write_page+0x18d/0x680 [md_mod]
? super_sync+0x4d5/0x560 [dm_raid]
? md_bitmap_file_kick+0xa0/0xa0 [md_mod]
? rs_set_dev_and_array_sectors+0x2e0/0x2e0 [dm_raid]
? mutex_trylock+0x120/0x120
? preempt_count_add+0x6b/0xc0
? preempt_count_sub+0xf/0xc0
md_update_sb+0x707/0xe40 [md_mod]
md_reap_sync_thread+0x1b2/0x4a0 [md_mod]
md_check_recovery+0x533/0x960 [md_mod]
raid1d+0xc8/0x2a20 [raid1]
? var_wake_function+0xe0/0xe0
? psi_group_change+0x411/0x500
? preempt_count_sub+0xf/0xc0
? _raw_spin_lock_irqsave+0x78/0xc0
? __lock_text_start+0x18/0x18
? raid1_end_read_request+0x2a0/0x2a0 [raid1]
? preempt_count_sub+0xf/0xc0
? _raw_spin_unlock_irqrestore+0x19/0x40
? del_timer_sync+0xa9/0x100
? try_to_del_timer_sync+0xc0/0xc0
? _raw_spin_lock_irqsave+0x78/0xc0
? __lock_text_start+0x18/0x18
? __list_del_entry_valid+0x68/0xa0
? finish_wait+0xa3/0x100
md_thread+0x161/0x260 [md_mod]
? unregister_md_personality+0xa0/0xa0 [md_mod]
? _raw_spin_lock_irqsave+0x78/0xc0
? prepare_to_wait_event+0x2c0/0x2c0
? unregister_md_personality+0xa0/0xa0 [md_mod]
kthread+0x148/0x180
? kthread_complete_and_exit+0x20/0x20
ret_from_fork+0x1f/0x30
</TASK>
Allocated by task 5522:
kasan_save_stack+0x1e/0x40
__kasan_kmalloc+0x80/0xa0
md_bitmap_create+0xa8/0xe80 [md_mod]
md_run+0x777/0x1300 [md_mod]
raid_ctr+0x249c/0x4a30 [dm_raid]
dm_table_add_target+0x2b0/0x620 [dm_mod]
table_load+0x1c8/0x400 [dm_mod]
ctl_ioctl+0x29e/0x560 [dm_mod]
dm_compat_ctl_ioctl+0x7/0x20 [dm_mod]
__do_compat_sys_ioctl+0xfa/0x160
do_syscall_64+0x90/0xc0
entry_SYSCALL_64_after_hwframe+0x46/0xb0
Freed by task 5680:
kasan_save_stack+0x1e/0x40
kasan_set_track+0x21/0x40
kasan_set_free_info+0x20/0x40
__kasan_slab_free+0xf7/0x140
kfree+0x80/0x240
md_bitmap_free+0x1c3/0x280 [md_mod]
__md_stop+0x21/0x120 [md_mod]
md_stop+0x9/0x40 [md_mod]
raid_dtr+0x1b/0x40 [dm_raid]
dm_table_destroy+0x98/0x1e0 [dm_mod]
__dm_destroy+0x199/0x360 [dm_mod]
dev_remove+0x10c/0x160 [dm_mod]
ctl_ioctl+0x29e/0x560 [dm_mod]
dm_compat_ctl_ioctl+0x7/0x20 [dm_mod]
__do_compat_sys_ioctl+0xfa/0x160
do_syscall_64+0x90/0xc0
entry_SYSCALL_64_after_hwframe+0x46/0xb0
Signed-off-by: Mikulas Patocka <mpatocka(a)redhat.com>
Cc: stable(a)vger.kernel.org
Fixes: 48df498daf62 ("md: move bitmap_destroy to the beginning of __md_stop")
Signed-off-by: Song Liu <song(a)kernel.org>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 6e82df21623d..35b895813c88 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6238,11 +6238,11 @@ static void mddev_detach(struct mddev *mddev)
static void __md_stop(struct mddev *mddev)
{
struct md_personality *pers = mddev->pers;
- md_bitmap_destroy(mddev);
mddev_detach(mddev);
/* Ensure ->event_work is done */
if (mddev->event_work.func)
flush_workqueue(md_misc_wq);
+ md_bitmap_destroy(mddev);
spin_lock(&mddev->lock);
mddev->pers = NULL;
spin_unlock(&mddev->lock);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2fdbb8dd01556e1501132b5ad3826e8f71e24a8b Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi(a)redhat.com>
Date: Fri, 22 Apr 2022 15:48:53 +0200
Subject: [PATCH] fuse: fix deadlock between atomic O_TRUNC and page
invalidation
fuse_finish_open() will be called with FUSE_NOWRITE set in case of atomic
O_TRUNC open(), so commit 76224355db75 ("fuse: truncate pagecache on
atomic_o_trunc") replaced invalidate_inode_pages2() by truncate_pagecache()
in such a case to avoid the A-A deadlock. However, we found another A-B-B-A
deadlock related to the case above, which will cause the xfstests
generic/464 testcase hung in our virtio-fs test environment.
For example, consider two processes concurrently open one same file, one
with O_TRUNC and another without O_TRUNC. The deadlock case is described
below, if open(O_TRUNC) is already set_nowrite(acquired A), and is trying
to lock a page (acquiring B), open() could have held the page lock
(acquired B), and waiting on the page writeback (acquiring A). This would
lead to deadlocks.
open(O_TRUNC)
----------------------------------------------------------------
fuse_open_common
inode_lock [C acquire]
fuse_set_nowrite [A acquire]
fuse_finish_open
truncate_pagecache
lock_page [B acquire]
truncate_inode_page
unlock_page [B release]
fuse_release_nowrite [A release]
inode_unlock [C release]
----------------------------------------------------------------
open()
----------------------------------------------------------------
fuse_open_common
fuse_finish_open
invalidate_inode_pages2
lock_page [B acquire]
fuse_launder_page
fuse_wait_on_page_writeback [A acquire & release]
unlock_page [B release]
----------------------------------------------------------------
Besides this case, all calls of invalidate_inode_pages2() and
invalidate_inode_pages2_range() in fuse code also can deadlock with
open(O_TRUNC).
Fix by moving the truncate_pagecache() call outside the nowrite protected
region. The nowrite protection is only for delayed writeback
(writeback_cache) case, where inode lock does not protect against
truncation racing with writes on the server. Write syscalls racing with
page cache truncation still get the inode lock protection.
This patch also changes the order of filemap_invalidate_lock()
vs. fuse_set_nowrite() in fuse_open_common(). This new order matches the
order found in fuse_file_fallocate() and fuse_do_setattr().
Reported-by: Jiachen Zhang <zhangjiachen.jaycee(a)bytedance.com>
Tested-by: Jiachen Zhang <zhangjiachen.jaycee(a)bytedance.com>
Fixes: e4648309b85a ("fuse: truncate pending writes on O_TRUNC")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi(a)redhat.com>
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 74303d6e987b..a93d675a726a 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -537,6 +537,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
struct fuse_file *ff;
void *security_ctx = NULL;
u32 security_ctxlen;
+ bool trunc = flags & O_TRUNC;
/* Userspace expects S_IFREG in create mode */
BUG_ON((mode & S_IFMT) != S_IFREG);
@@ -561,7 +562,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
inarg.mode = mode;
inarg.umask = current_umask();
- if (fm->fc->handle_killpriv_v2 && (flags & O_TRUNC) &&
+ if (fm->fc->handle_killpriv_v2 && trunc &&
!(flags & O_EXCL) && !capable(CAP_FSETID)) {
inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
}
@@ -623,6 +624,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
} else {
file->private_data = ff;
fuse_finish_open(inode, file);
+ if (fm->fc->atomic_o_trunc && trunc)
+ truncate_pagecache(inode, 0);
+ else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
+ invalidate_inode_pages2(inode->i_mapping);
}
return err;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 60885ff9157c..dfee142bca5c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -210,13 +210,9 @@ void fuse_finish_open(struct inode *inode, struct file *file)
fi->attr_version = atomic64_inc_return(&fc->attr_version);
i_size_write(inode, 0);
spin_unlock(&fi->lock);
- truncate_pagecache(inode, 0);
file_update_time(file);
fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE);
- } else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) {
- invalidate_inode_pages2(inode->i_mapping);
}
-
if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
fuse_link_write_file(file);
}
@@ -239,30 +235,38 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
if (err)
return err;
- if (is_wb_truncate || dax_truncate) {
+ if (is_wb_truncate || dax_truncate)
inode_lock(inode);
- fuse_set_nowrite(inode);
- }
if (dax_truncate) {
filemap_invalidate_lock(inode->i_mapping);
err = fuse_dax_break_layouts(inode, 0, 0);
if (err)
- goto out;
+ goto out_inode_unlock;
}
+ if (is_wb_truncate || dax_truncate)
+ fuse_set_nowrite(inode);
+
err = fuse_do_open(fm, get_node_id(inode), file, isdir);
if (!err)
fuse_finish_open(inode, file);
-out:
+ if (is_wb_truncate || dax_truncate)
+ fuse_release_nowrite(inode);
+ if (!err) {
+ struct fuse_file *ff = file->private_data;
+
+ if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC))
+ truncate_pagecache(inode, 0);
+ else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
+ invalidate_inode_pages2(inode->i_mapping);
+ }
if (dax_truncate)
filemap_invalidate_unlock(inode->i_mapping);
-
- if (is_wb_truncate | dax_truncate) {
- fuse_release_nowrite(inode);
+out_inode_unlock:
+ if (is_wb_truncate || dax_truncate)
inode_unlock(inode);
- }
return err;
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2fdbb8dd01556e1501132b5ad3826e8f71e24a8b Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi(a)redhat.com>
Date: Fri, 22 Apr 2022 15:48:53 +0200
Subject: [PATCH] fuse: fix deadlock between atomic O_TRUNC and page
invalidation
fuse_finish_open() will be called with FUSE_NOWRITE set in case of atomic
O_TRUNC open(), so commit 76224355db75 ("fuse: truncate pagecache on
atomic_o_trunc") replaced invalidate_inode_pages2() by truncate_pagecache()
in such a case to avoid the A-A deadlock. However, we found another A-B-B-A
deadlock related to the case above, which will cause the xfstests
generic/464 testcase hung in our virtio-fs test environment.
For example, consider two processes concurrently open one same file, one
with O_TRUNC and another without O_TRUNC. The deadlock case is described
below, if open(O_TRUNC) is already set_nowrite(acquired A), and is trying
to lock a page (acquiring B), open() could have held the page lock
(acquired B), and waiting on the page writeback (acquiring A). This would
lead to deadlocks.
open(O_TRUNC)
----------------------------------------------------------------
fuse_open_common
inode_lock [C acquire]
fuse_set_nowrite [A acquire]
fuse_finish_open
truncate_pagecache
lock_page [B acquire]
truncate_inode_page
unlock_page [B release]
fuse_release_nowrite [A release]
inode_unlock [C release]
----------------------------------------------------------------
open()
----------------------------------------------------------------
fuse_open_common
fuse_finish_open
invalidate_inode_pages2
lock_page [B acquire]
fuse_launder_page
fuse_wait_on_page_writeback [A acquire & release]
unlock_page [B release]
----------------------------------------------------------------
Besides this case, all calls of invalidate_inode_pages2() and
invalidate_inode_pages2_range() in fuse code also can deadlock with
open(O_TRUNC).
Fix by moving the truncate_pagecache() call outside the nowrite protected
region. The nowrite protection is only for delayed writeback
(writeback_cache) case, where inode lock does not protect against
truncation racing with writes on the server. Write syscalls racing with
page cache truncation still get the inode lock protection.
This patch also changes the order of filemap_invalidate_lock()
vs. fuse_set_nowrite() in fuse_open_common(). This new order matches the
order found in fuse_file_fallocate() and fuse_do_setattr().
Reported-by: Jiachen Zhang <zhangjiachen.jaycee(a)bytedance.com>
Tested-by: Jiachen Zhang <zhangjiachen.jaycee(a)bytedance.com>
Fixes: e4648309b85a ("fuse: truncate pending writes on O_TRUNC")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi(a)redhat.com>
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 74303d6e987b..a93d675a726a 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -537,6 +537,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
struct fuse_file *ff;
void *security_ctx = NULL;
u32 security_ctxlen;
+ bool trunc = flags & O_TRUNC;
/* Userspace expects S_IFREG in create mode */
BUG_ON((mode & S_IFMT) != S_IFREG);
@@ -561,7 +562,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
inarg.mode = mode;
inarg.umask = current_umask();
- if (fm->fc->handle_killpriv_v2 && (flags & O_TRUNC) &&
+ if (fm->fc->handle_killpriv_v2 && trunc &&
!(flags & O_EXCL) && !capable(CAP_FSETID)) {
inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
}
@@ -623,6 +624,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
} else {
file->private_data = ff;
fuse_finish_open(inode, file);
+ if (fm->fc->atomic_o_trunc && trunc)
+ truncate_pagecache(inode, 0);
+ else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
+ invalidate_inode_pages2(inode->i_mapping);
}
return err;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 60885ff9157c..dfee142bca5c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -210,13 +210,9 @@ void fuse_finish_open(struct inode *inode, struct file *file)
fi->attr_version = atomic64_inc_return(&fc->attr_version);
i_size_write(inode, 0);
spin_unlock(&fi->lock);
- truncate_pagecache(inode, 0);
file_update_time(file);
fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE);
- } else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) {
- invalidate_inode_pages2(inode->i_mapping);
}
-
if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
fuse_link_write_file(file);
}
@@ -239,30 +235,38 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
if (err)
return err;
- if (is_wb_truncate || dax_truncate) {
+ if (is_wb_truncate || dax_truncate)
inode_lock(inode);
- fuse_set_nowrite(inode);
- }
if (dax_truncate) {
filemap_invalidate_lock(inode->i_mapping);
err = fuse_dax_break_layouts(inode, 0, 0);
if (err)
- goto out;
+ goto out_inode_unlock;
}
+ if (is_wb_truncate || dax_truncate)
+ fuse_set_nowrite(inode);
+
err = fuse_do_open(fm, get_node_id(inode), file, isdir);
if (!err)
fuse_finish_open(inode, file);
-out:
+ if (is_wb_truncate || dax_truncate)
+ fuse_release_nowrite(inode);
+ if (!err) {
+ struct fuse_file *ff = file->private_data;
+
+ if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC))
+ truncate_pagecache(inode, 0);
+ else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
+ invalidate_inode_pages2(inode->i_mapping);
+ }
if (dax_truncate)
filemap_invalidate_unlock(inode->i_mapping);
-
- if (is_wb_truncate | dax_truncate) {
- fuse_release_nowrite(inode);
+out_inode_unlock:
+ if (is_wb_truncate || dax_truncate)
inode_unlock(inode);
- }
return err;
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2fdbb8dd01556e1501132b5ad3826e8f71e24a8b Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi(a)redhat.com>
Date: Fri, 22 Apr 2022 15:48:53 +0200
Subject: [PATCH] fuse: fix deadlock between atomic O_TRUNC and page
invalidation
fuse_finish_open() will be called with FUSE_NOWRITE set in case of atomic
O_TRUNC open(), so commit 76224355db75 ("fuse: truncate pagecache on
atomic_o_trunc") replaced invalidate_inode_pages2() by truncate_pagecache()
in such a case to avoid the A-A deadlock. However, we found another A-B-B-A
deadlock related to the case above, which will cause the xfstests
generic/464 testcase hung in our virtio-fs test environment.
For example, consider two processes concurrently open one same file, one
with O_TRUNC and another without O_TRUNC. The deadlock case is described
below, if open(O_TRUNC) is already set_nowrite(acquired A), and is trying
to lock a page (acquiring B), open() could have held the page lock
(acquired B), and waiting on the page writeback (acquiring A). This would
lead to deadlocks.
open(O_TRUNC)
----------------------------------------------------------------
fuse_open_common
inode_lock [C acquire]
fuse_set_nowrite [A acquire]
fuse_finish_open
truncate_pagecache
lock_page [B acquire]
truncate_inode_page
unlock_page [B release]
fuse_release_nowrite [A release]
inode_unlock [C release]
----------------------------------------------------------------
open()
----------------------------------------------------------------
fuse_open_common
fuse_finish_open
invalidate_inode_pages2
lock_page [B acquire]
fuse_launder_page
fuse_wait_on_page_writeback [A acquire & release]
unlock_page [B release]
----------------------------------------------------------------
Besides this case, all calls of invalidate_inode_pages2() and
invalidate_inode_pages2_range() in fuse code also can deadlock with
open(O_TRUNC).
Fix by moving the truncate_pagecache() call outside the nowrite protected
region. The nowrite protection is only for delayed writeback
(writeback_cache) case, where inode lock does not protect against
truncation racing with writes on the server. Write syscalls racing with
page cache truncation still get the inode lock protection.
This patch also changes the order of filemap_invalidate_lock()
vs. fuse_set_nowrite() in fuse_open_common(). This new order matches the
order found in fuse_file_fallocate() and fuse_do_setattr().
Reported-by: Jiachen Zhang <zhangjiachen.jaycee(a)bytedance.com>
Tested-by: Jiachen Zhang <zhangjiachen.jaycee(a)bytedance.com>
Fixes: e4648309b85a ("fuse: truncate pending writes on O_TRUNC")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi(a)redhat.com>
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 74303d6e987b..a93d675a726a 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -537,6 +537,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
struct fuse_file *ff;
void *security_ctx = NULL;
u32 security_ctxlen;
+ bool trunc = flags & O_TRUNC;
/* Userspace expects S_IFREG in create mode */
BUG_ON((mode & S_IFMT) != S_IFREG);
@@ -561,7 +562,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
inarg.mode = mode;
inarg.umask = current_umask();
- if (fm->fc->handle_killpriv_v2 && (flags & O_TRUNC) &&
+ if (fm->fc->handle_killpriv_v2 && trunc &&
!(flags & O_EXCL) && !capable(CAP_FSETID)) {
inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
}
@@ -623,6 +624,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
} else {
file->private_data = ff;
fuse_finish_open(inode, file);
+ if (fm->fc->atomic_o_trunc && trunc)
+ truncate_pagecache(inode, 0);
+ else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
+ invalidate_inode_pages2(inode->i_mapping);
}
return err;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 60885ff9157c..dfee142bca5c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -210,13 +210,9 @@ void fuse_finish_open(struct inode *inode, struct file *file)
fi->attr_version = atomic64_inc_return(&fc->attr_version);
i_size_write(inode, 0);
spin_unlock(&fi->lock);
- truncate_pagecache(inode, 0);
file_update_time(file);
fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE);
- } else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) {
- invalidate_inode_pages2(inode->i_mapping);
}
-
if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
fuse_link_write_file(file);
}
@@ -239,30 +235,38 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
if (err)
return err;
- if (is_wb_truncate || dax_truncate) {
+ if (is_wb_truncate || dax_truncate)
inode_lock(inode);
- fuse_set_nowrite(inode);
- }
if (dax_truncate) {
filemap_invalidate_lock(inode->i_mapping);
err = fuse_dax_break_layouts(inode, 0, 0);
if (err)
- goto out;
+ goto out_inode_unlock;
}
+ if (is_wb_truncate || dax_truncate)
+ fuse_set_nowrite(inode);
+
err = fuse_do_open(fm, get_node_id(inode), file, isdir);
if (!err)
fuse_finish_open(inode, file);
-out:
+ if (is_wb_truncate || dax_truncate)
+ fuse_release_nowrite(inode);
+ if (!err) {
+ struct fuse_file *ff = file->private_data;
+
+ if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC))
+ truncate_pagecache(inode, 0);
+ else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
+ invalidate_inode_pages2(inode->i_mapping);
+ }
if (dax_truncate)
filemap_invalidate_unlock(inode->i_mapping);
-
- if (is_wb_truncate | dax_truncate) {
- fuse_release_nowrite(inode);
+out_inode_unlock:
+ if (is_wb_truncate || dax_truncate)
inode_unlock(inode);
- }
return err;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2fdbb8dd01556e1501132b5ad3826e8f71e24a8b Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi(a)redhat.com>
Date: Fri, 22 Apr 2022 15:48:53 +0200
Subject: [PATCH] fuse: fix deadlock between atomic O_TRUNC and page
invalidation
fuse_finish_open() will be called with FUSE_NOWRITE set in case of atomic
O_TRUNC open(), so commit 76224355db75 ("fuse: truncate pagecache on
atomic_o_trunc") replaced invalidate_inode_pages2() by truncate_pagecache()
in such a case to avoid the A-A deadlock. However, we found another A-B-B-A
deadlock related to the case above, which will cause the xfstests
generic/464 testcase hung in our virtio-fs test environment.
For example, consider two processes concurrently open one same file, one
with O_TRUNC and another without O_TRUNC. The deadlock case is described
below, if open(O_TRUNC) is already set_nowrite(acquired A), and is trying
to lock a page (acquiring B), open() could have held the page lock
(acquired B), and waiting on the page writeback (acquiring A). This would
lead to deadlocks.
open(O_TRUNC)
----------------------------------------------------------------
fuse_open_common
inode_lock [C acquire]
fuse_set_nowrite [A acquire]
fuse_finish_open
truncate_pagecache
lock_page [B acquire]
truncate_inode_page
unlock_page [B release]
fuse_release_nowrite [A release]
inode_unlock [C release]
----------------------------------------------------------------
open()
----------------------------------------------------------------
fuse_open_common
fuse_finish_open
invalidate_inode_pages2
lock_page [B acquire]
fuse_launder_page
fuse_wait_on_page_writeback [A acquire & release]
unlock_page [B release]
----------------------------------------------------------------
Besides this case, all calls of invalidate_inode_pages2() and
invalidate_inode_pages2_range() in fuse code also can deadlock with
open(O_TRUNC).
Fix by moving the truncate_pagecache() call outside the nowrite protected
region. The nowrite protection is only for delayed writeback
(writeback_cache) case, where inode lock does not protect against
truncation racing with writes on the server. Write syscalls racing with
page cache truncation still get the inode lock protection.
This patch also changes the order of filemap_invalidate_lock()
vs. fuse_set_nowrite() in fuse_open_common(). This new order matches the
order found in fuse_file_fallocate() and fuse_do_setattr().
Reported-by: Jiachen Zhang <zhangjiachen.jaycee(a)bytedance.com>
Tested-by: Jiachen Zhang <zhangjiachen.jaycee(a)bytedance.com>
Fixes: e4648309b85a ("fuse: truncate pending writes on O_TRUNC")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi(a)redhat.com>
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 74303d6e987b..a93d675a726a 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -537,6 +537,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
struct fuse_file *ff;
void *security_ctx = NULL;
u32 security_ctxlen;
+ bool trunc = flags & O_TRUNC;
/* Userspace expects S_IFREG in create mode */
BUG_ON((mode & S_IFMT) != S_IFREG);
@@ -561,7 +562,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
inarg.mode = mode;
inarg.umask = current_umask();
- if (fm->fc->handle_killpriv_v2 && (flags & O_TRUNC) &&
+ if (fm->fc->handle_killpriv_v2 && trunc &&
!(flags & O_EXCL) && !capable(CAP_FSETID)) {
inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
}
@@ -623,6 +624,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
} else {
file->private_data = ff;
fuse_finish_open(inode, file);
+ if (fm->fc->atomic_o_trunc && trunc)
+ truncate_pagecache(inode, 0);
+ else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
+ invalidate_inode_pages2(inode->i_mapping);
}
return err;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 60885ff9157c..dfee142bca5c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -210,13 +210,9 @@ void fuse_finish_open(struct inode *inode, struct file *file)
fi->attr_version = atomic64_inc_return(&fc->attr_version);
i_size_write(inode, 0);
spin_unlock(&fi->lock);
- truncate_pagecache(inode, 0);
file_update_time(file);
fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE);
- } else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) {
- invalidate_inode_pages2(inode->i_mapping);
}
-
if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
fuse_link_write_file(file);
}
@@ -239,30 +235,38 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
if (err)
return err;
- if (is_wb_truncate || dax_truncate) {
+ if (is_wb_truncate || dax_truncate)
inode_lock(inode);
- fuse_set_nowrite(inode);
- }
if (dax_truncate) {
filemap_invalidate_lock(inode->i_mapping);
err = fuse_dax_break_layouts(inode, 0, 0);
if (err)
- goto out;
+ goto out_inode_unlock;
}
+ if (is_wb_truncate || dax_truncate)
+ fuse_set_nowrite(inode);
+
err = fuse_do_open(fm, get_node_id(inode), file, isdir);
if (!err)
fuse_finish_open(inode, file);
-out:
+ if (is_wb_truncate || dax_truncate)
+ fuse_release_nowrite(inode);
+ if (!err) {
+ struct fuse_file *ff = file->private_data;
+
+ if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC))
+ truncate_pagecache(inode, 0);
+ else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
+ invalidate_inode_pages2(inode->i_mapping);
+ }
if (dax_truncate)
filemap_invalidate_unlock(inode->i_mapping);
-
- if (is_wb_truncate | dax_truncate) {
- fuse_release_nowrite(inode);
+out_inode_unlock:
+ if (is_wb_truncate || dax_truncate)
inode_unlock(inode);
- }
return err;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2fdbb8dd01556e1501132b5ad3826e8f71e24a8b Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi(a)redhat.com>
Date: Fri, 22 Apr 2022 15:48:53 +0200
Subject: [PATCH] fuse: fix deadlock between atomic O_TRUNC and page
invalidation
fuse_finish_open() will be called with FUSE_NOWRITE set in case of atomic
O_TRUNC open(), so commit 76224355db75 ("fuse: truncate pagecache on
atomic_o_trunc") replaced invalidate_inode_pages2() by truncate_pagecache()
in such a case to avoid the A-A deadlock. However, we found another A-B-B-A
deadlock related to the case above, which will cause the xfstests
generic/464 testcase hung in our virtio-fs test environment.
For example, consider two processes concurrently open one same file, one
with O_TRUNC and another without O_TRUNC. The deadlock case is described
below, if open(O_TRUNC) is already set_nowrite(acquired A), and is trying
to lock a page (acquiring B), open() could have held the page lock
(acquired B), and waiting on the page writeback (acquiring A). This would
lead to deadlocks.
open(O_TRUNC)
----------------------------------------------------------------
fuse_open_common
inode_lock [C acquire]
fuse_set_nowrite [A acquire]
fuse_finish_open
truncate_pagecache
lock_page [B acquire]
truncate_inode_page
unlock_page [B release]
fuse_release_nowrite [A release]
inode_unlock [C release]
----------------------------------------------------------------
open()
----------------------------------------------------------------
fuse_open_common
fuse_finish_open
invalidate_inode_pages2
lock_page [B acquire]
fuse_launder_page
fuse_wait_on_page_writeback [A acquire & release]
unlock_page [B release]
----------------------------------------------------------------
Besides this case, all calls of invalidate_inode_pages2() and
invalidate_inode_pages2_range() in fuse code also can deadlock with
open(O_TRUNC).
Fix by moving the truncate_pagecache() call outside the nowrite protected
region. The nowrite protection is only for delayed writeback
(writeback_cache) case, where inode lock does not protect against
truncation racing with writes on the server. Write syscalls racing with
page cache truncation still get the inode lock protection.
This patch also changes the order of filemap_invalidate_lock()
vs. fuse_set_nowrite() in fuse_open_common(). This new order matches the
order found in fuse_file_fallocate() and fuse_do_setattr().
Reported-by: Jiachen Zhang <zhangjiachen.jaycee(a)bytedance.com>
Tested-by: Jiachen Zhang <zhangjiachen.jaycee(a)bytedance.com>
Fixes: e4648309b85a ("fuse: truncate pending writes on O_TRUNC")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi(a)redhat.com>
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 74303d6e987b..a93d675a726a 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -537,6 +537,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
struct fuse_file *ff;
void *security_ctx = NULL;
u32 security_ctxlen;
+ bool trunc = flags & O_TRUNC;
/* Userspace expects S_IFREG in create mode */
BUG_ON((mode & S_IFMT) != S_IFREG);
@@ -561,7 +562,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
inarg.mode = mode;
inarg.umask = current_umask();
- if (fm->fc->handle_killpriv_v2 && (flags & O_TRUNC) &&
+ if (fm->fc->handle_killpriv_v2 && trunc &&
!(flags & O_EXCL) && !capable(CAP_FSETID)) {
inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
}
@@ -623,6 +624,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
} else {
file->private_data = ff;
fuse_finish_open(inode, file);
+ if (fm->fc->atomic_o_trunc && trunc)
+ truncate_pagecache(inode, 0);
+ else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
+ invalidate_inode_pages2(inode->i_mapping);
}
return err;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 60885ff9157c..dfee142bca5c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -210,13 +210,9 @@ void fuse_finish_open(struct inode *inode, struct file *file)
fi->attr_version = atomic64_inc_return(&fc->attr_version);
i_size_write(inode, 0);
spin_unlock(&fi->lock);
- truncate_pagecache(inode, 0);
file_update_time(file);
fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE);
- } else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) {
- invalidate_inode_pages2(inode->i_mapping);
}
-
if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
fuse_link_write_file(file);
}
@@ -239,30 +235,38 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
if (err)
return err;
- if (is_wb_truncate || dax_truncate) {
+ if (is_wb_truncate || dax_truncate)
inode_lock(inode);
- fuse_set_nowrite(inode);
- }
if (dax_truncate) {
filemap_invalidate_lock(inode->i_mapping);
err = fuse_dax_break_layouts(inode, 0, 0);
if (err)
- goto out;
+ goto out_inode_unlock;
}
+ if (is_wb_truncate || dax_truncate)
+ fuse_set_nowrite(inode);
+
err = fuse_do_open(fm, get_node_id(inode), file, isdir);
if (!err)
fuse_finish_open(inode, file);
-out:
+ if (is_wb_truncate || dax_truncate)
+ fuse_release_nowrite(inode);
+ if (!err) {
+ struct fuse_file *ff = file->private_data;
+
+ if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC))
+ truncate_pagecache(inode, 0);
+ else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
+ invalidate_inode_pages2(inode->i_mapping);
+ }
if (dax_truncate)
filemap_invalidate_unlock(inode->i_mapping);
-
- if (is_wb_truncate | dax_truncate) {
- fuse_release_nowrite(inode);
+out_inode_unlock:
+ if (is_wb_truncate || dax_truncate)
inode_unlock(inode);
- }
return err;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2fdbb8dd01556e1501132b5ad3826e8f71e24a8b Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi(a)redhat.com>
Date: Fri, 22 Apr 2022 15:48:53 +0200
Subject: [PATCH] fuse: fix deadlock between atomic O_TRUNC and page
invalidation
fuse_finish_open() will be called with FUSE_NOWRITE set in case of atomic
O_TRUNC open(), so commit 76224355db75 ("fuse: truncate pagecache on
atomic_o_trunc") replaced invalidate_inode_pages2() by truncate_pagecache()
in such a case to avoid the A-A deadlock. However, we found another A-B-B-A
deadlock related to the case above, which will cause the xfstests
generic/464 testcase hung in our virtio-fs test environment.
For example, consider two processes concurrently open one same file, one
with O_TRUNC and another without O_TRUNC. The deadlock case is described
below, if open(O_TRUNC) is already set_nowrite(acquired A), and is trying
to lock a page (acquiring B), open() could have held the page lock
(acquired B), and waiting on the page writeback (acquiring A). This would
lead to deadlocks.
open(O_TRUNC)
----------------------------------------------------------------
fuse_open_common
inode_lock [C acquire]
fuse_set_nowrite [A acquire]
fuse_finish_open
truncate_pagecache
lock_page [B acquire]
truncate_inode_page
unlock_page [B release]
fuse_release_nowrite [A release]
inode_unlock [C release]
----------------------------------------------------------------
open()
----------------------------------------------------------------
fuse_open_common
fuse_finish_open
invalidate_inode_pages2
lock_page [B acquire]
fuse_launder_page
fuse_wait_on_page_writeback [A acquire & release]
unlock_page [B release]
----------------------------------------------------------------
Besides this case, all calls of invalidate_inode_pages2() and
invalidate_inode_pages2_range() in fuse code also can deadlock with
open(O_TRUNC).
Fix by moving the truncate_pagecache() call outside the nowrite protected
region. The nowrite protection is only for delayed writeback
(writeback_cache) case, where inode lock does not protect against
truncation racing with writes on the server. Write syscalls racing with
page cache truncation still get the inode lock protection.
This patch also changes the order of filemap_invalidate_lock()
vs. fuse_set_nowrite() in fuse_open_common(). This new order matches the
order found in fuse_file_fallocate() and fuse_do_setattr().
Reported-by: Jiachen Zhang <zhangjiachen.jaycee(a)bytedance.com>
Tested-by: Jiachen Zhang <zhangjiachen.jaycee(a)bytedance.com>
Fixes: e4648309b85a ("fuse: truncate pending writes on O_TRUNC")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi(a)redhat.com>
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 74303d6e987b..a93d675a726a 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -537,6 +537,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
struct fuse_file *ff;
void *security_ctx = NULL;
u32 security_ctxlen;
+ bool trunc = flags & O_TRUNC;
/* Userspace expects S_IFREG in create mode */
BUG_ON((mode & S_IFMT) != S_IFREG);
@@ -561,7 +562,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
inarg.mode = mode;
inarg.umask = current_umask();
- if (fm->fc->handle_killpriv_v2 && (flags & O_TRUNC) &&
+ if (fm->fc->handle_killpriv_v2 && trunc &&
!(flags & O_EXCL) && !capable(CAP_FSETID)) {
inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
}
@@ -623,6 +624,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
} else {
file->private_data = ff;
fuse_finish_open(inode, file);
+ if (fm->fc->atomic_o_trunc && trunc)
+ truncate_pagecache(inode, 0);
+ else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
+ invalidate_inode_pages2(inode->i_mapping);
}
return err;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 60885ff9157c..dfee142bca5c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -210,13 +210,9 @@ void fuse_finish_open(struct inode *inode, struct file *file)
fi->attr_version = atomic64_inc_return(&fc->attr_version);
i_size_write(inode, 0);
spin_unlock(&fi->lock);
- truncate_pagecache(inode, 0);
file_update_time(file);
fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE);
- } else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) {
- invalidate_inode_pages2(inode->i_mapping);
}
-
if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
fuse_link_write_file(file);
}
@@ -239,30 +235,38 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
if (err)
return err;
- if (is_wb_truncate || dax_truncate) {
+ if (is_wb_truncate || dax_truncate)
inode_lock(inode);
- fuse_set_nowrite(inode);
- }
if (dax_truncate) {
filemap_invalidate_lock(inode->i_mapping);
err = fuse_dax_break_layouts(inode, 0, 0);
if (err)
- goto out;
+ goto out_inode_unlock;
}
+ if (is_wb_truncate || dax_truncate)
+ fuse_set_nowrite(inode);
+
err = fuse_do_open(fm, get_node_id(inode), file, isdir);
if (!err)
fuse_finish_open(inode, file);
-out:
+ if (is_wb_truncate || dax_truncate)
+ fuse_release_nowrite(inode);
+ if (!err) {
+ struct fuse_file *ff = file->private_data;
+
+ if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC))
+ truncate_pagecache(inode, 0);
+ else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
+ invalidate_inode_pages2(inode->i_mapping);
+ }
if (dax_truncate)
filemap_invalidate_unlock(inode->i_mapping);
-
- if (is_wb_truncate | dax_truncate) {
- fuse_release_nowrite(inode);
+out_inode_unlock:
+ if (is_wb_truncate || dax_truncate)
inode_unlock(inode);
- }
return err;
}
As per hardware datasheet its recommended that we check the device
status before reading devid assigned by auto-enumeration.
Without this patch we see SoundWire devices with invalid enumeration
addresses on the bus.
Cc: stable(a)vger.kernel.org
Fixes: a6e6581942ca ("soundwire: qcom: add auto enumeration support")
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org>
---
drivers/soundwire/qcom.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c
index dbf793e7e5d7..9df970eeca45 100644
--- a/drivers/soundwire/qcom.c
+++ b/drivers/soundwire/qcom.c
@@ -480,6 +480,10 @@ static int qcom_swrm_enumerate(struct sdw_bus *bus)
char *buf1 = (char *)&val1, *buf2 = (char *)&val2;
for (i = 1; i <= SDW_MAX_DEVICES; i++) {
+ /* do not continue if the status is Not Present */
+ if (!ctrl->status[i])
+ continue;
+
/*SCP_Devid5 - Devid 4*/
ctrl->reg_read(ctrl, SWRM_ENUMERATOR_SLAVE_DEV_ID_1(i), &val1);
--
2.25.1
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6c96a3c7d49593ef15805f5e497601c87695abc9 Mon Sep 17 00:00:00 2001
From: Bikash Hazarika <bhazarika(a)marvell.com>
Date: Tue, 12 Jul 2022 22:20:38 -0700
Subject: [PATCH] scsi: qla2xxx: Zero undefined mailbox IN registers
While requesting a new mailbox command, driver does not write any data to
unused registers. Initialize the unused register value to zero while
requesting a new mailbox command to prevent stale entry access by firmware.
Link: https://lore.kernel.org/r/20220713052045.10683-4-njavali@marvell.com
Cc: stable(a)vger.kernel.org
Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com>
Signed-off-by: Bikash Hazarika <bhazarika(a)marvell.com>
Signed-off-by: Quinn Tran <qutran(a)marvell.com>
Signed-off-by: Nilesh Javali <njavali(a)marvell.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 643fa0052f5a..9a3f832c49ef 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -238,6 +238,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
ql_dbg(ql_dbg_mbx, vha, 0x1112,
"mbox[%d]<-0x%04x\n", cnt, *iptr);
wrt_reg_word(optr, *iptr);
+ } else {
+ wrt_reg_word(optr, 0);
}
mboxes >>= 1;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6c96a3c7d49593ef15805f5e497601c87695abc9 Mon Sep 17 00:00:00 2001
From: Bikash Hazarika <bhazarika(a)marvell.com>
Date: Tue, 12 Jul 2022 22:20:38 -0700
Subject: [PATCH] scsi: qla2xxx: Zero undefined mailbox IN registers
While requesting a new mailbox command, driver does not write any data to
unused registers. Initialize the unused register value to zero while
requesting a new mailbox command to prevent stale entry access by firmware.
Link: https://lore.kernel.org/r/20220713052045.10683-4-njavali@marvell.com
Cc: stable(a)vger.kernel.org
Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com>
Signed-off-by: Bikash Hazarika <bhazarika(a)marvell.com>
Signed-off-by: Quinn Tran <qutran(a)marvell.com>
Signed-off-by: Nilesh Javali <njavali(a)marvell.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 643fa0052f5a..9a3f832c49ef 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -238,6 +238,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
ql_dbg(ql_dbg_mbx, vha, 0x1112,
"mbox[%d]<-0x%04x\n", cnt, *iptr);
wrt_reg_word(optr, *iptr);
+ } else {
+ wrt_reg_word(optr, 0);
}
mboxes >>= 1;
The patch below was submitted to be applied to the 5.19-stable tree.
I fail to see how this patch meets the stable kernel rules as found at
Documentation/process/stable-kernel-rules.rst.
I could be totally wrong, and if so, please respond to
<stable(a)vger.kernel.org> and let me know why this patch should be
applied. Otherwise, it is now dropped from my patch queues, never to be
seen again.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1ccad27716ecad1fd58c35e579bedb81fa5e1ad5 Mon Sep 17 00:00:00 2001
From: Bikash Hazarika <bhazarika(a)marvell.com>
Date: Tue, 12 Jul 2022 22:20:44 -0700
Subject: [PATCH] scsi: qla2xxx: Update manufacturer details
Update manufacturer details to indicate Marvell Semiconductors.
Link: https://lore.kernel.org/r/20220713052045.10683-10-njavali@marvell.com
Cc: stable(a)vger.kernel.org
Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com>
Signed-off-by: Bikash Hazarika <bhazarika(a)marvell.com>
Signed-off-by: Nilesh Javali <njavali(a)marvell.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 91c8fedc8ffa..3ec6a200942e 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -78,7 +78,7 @@ typedef union {
#include "qla_nvme.h"
#define QLA2XXX_DRIVER_NAME "qla2xxx"
#define QLA2XXX_APIDEV "ql2xapidev"
-#define QLA2XXX_MANUFACTURER "QLogic Corporation"
+#define QLA2XXX_MANUFACTURER "Marvell Semiconductor, Inc."
/*
* We have MAILBOX_REGISTER_COUNT sized arrays in a few places,
diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c
index 7ca734337000..64ab070b8716 100644
--- a/drivers/scsi/qla2xxx/qla_gs.c
+++ b/drivers/scsi/qla2xxx/qla_gs.c
@@ -1616,7 +1616,7 @@ qla2x00_hba_attributes(scsi_qla_host_t *vha, void *entries,
eiter->type = cpu_to_be16(FDMI_HBA_MANUFACTURER);
alen = scnprintf(
eiter->a.manufacturer, sizeof(eiter->a.manufacturer),
- "%s", "QLogic Corporation");
+ "%s", QLA2XXX_MANUFACTURER);
alen += FDMI_ATTR_ALIGNMENT(alen);
alen += FDMI_ATTR_TYPELEN(eiter);
eiter->len = cpu_to_be16(alen);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3455607fd7be10b449f5135c00dc306b85dc0d21 Mon Sep 17 00:00:00 2001
From: Tony Battersby <tonyb(a)cybernetics.com>
Date: Mon, 11 Jul 2022 10:51:32 -0400
Subject: [PATCH] scsi: sg: Allow waiting for commands to complete on removed
device
When a SCSI device is removed while in active use, currently sg will
immediately return -ENODEV on any attempt to wait for active commands that
were sent before the removal. This is problematic for commands that use
SG_FLAG_DIRECT_IO since the data buffer may still be in use by the kernel
when userspace frees or reuses it after getting ENODEV, leading to
corrupted userspace memory (in the case of READ-type commands) or corrupted
data being sent to the device (in the case of WRITE-type commands). This
has been seen in practice when logging out of a iscsi_tcp session, where
the iSCSI driver may still be processing commands after the device has been
marked for removal.
Change the policy to allow userspace to wait for active sg commands even
when the device is being removed. Return -ENODEV only when there are no
more responses to read.
Link: https://lore.kernel.org/r/5ebea46f-fe83-2d0b-233d-d0dcb362dd0a@cybernetics.…
Cc: <stable(a)vger.kernel.org>
Acked-by: Douglas Gilbert <dgilbert(a)interlog.com>
Signed-off-by: Tony Battersby <tonyb(a)cybernetics.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 118c7b4a8af2..340b050ad28d 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -195,7 +195,7 @@ static void sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size);
static void sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp);
static Sg_fd *sg_add_sfp(Sg_device * sdp);
static void sg_remove_sfp(struct kref *);
-static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id);
+static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id, bool *busy);
static Sg_request *sg_add_request(Sg_fd * sfp);
static int sg_remove_request(Sg_fd * sfp, Sg_request * srp);
static Sg_device *sg_get_dev(int dev);
@@ -444,6 +444,7 @@ sg_read(struct file *filp, char __user *buf, size_t count, loff_t * ppos)
Sg_fd *sfp;
Sg_request *srp;
int req_pack_id = -1;
+ bool busy;
sg_io_hdr_t *hp;
struct sg_header *old_hdr;
int retval;
@@ -466,20 +467,16 @@ sg_read(struct file *filp, char __user *buf, size_t count, loff_t * ppos)
if (retval)
return retval;
- srp = sg_get_rq_mark(sfp, req_pack_id);
+ srp = sg_get_rq_mark(sfp, req_pack_id, &busy);
if (!srp) { /* now wait on packet to arrive */
- if (atomic_read(&sdp->detaching))
- return -ENODEV;
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
retval = wait_event_interruptible(sfp->read_wait,
- (atomic_read(&sdp->detaching) ||
- (srp = sg_get_rq_mark(sfp, req_pack_id))));
- if (atomic_read(&sdp->detaching))
- return -ENODEV;
- if (retval)
- /* -ERESTARTSYS as signal hit process */
- return retval;
+ ((srp = sg_get_rq_mark(sfp, req_pack_id, &busy)) ||
+ (!busy && atomic_read(&sdp->detaching))));
+ if (!srp)
+ /* signal or detaching */
+ return retval ? retval : -ENODEV;
}
if (srp->header.interface_id != '\0')
return sg_new_read(sfp, buf, count, srp);
@@ -940,9 +937,7 @@ sg_ioctl_common(struct file *filp, Sg_device *sdp, Sg_fd *sfp,
if (result < 0)
return result;
result = wait_event_interruptible(sfp->read_wait,
- (srp_done(sfp, srp) || atomic_read(&sdp->detaching)));
- if (atomic_read(&sdp->detaching))
- return -ENODEV;
+ srp_done(sfp, srp));
write_lock_irq(&sfp->rq_list_lock);
if (srp->done) {
srp->done = 2;
@@ -2079,19 +2074,28 @@ sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp)
}
static Sg_request *
-sg_get_rq_mark(Sg_fd * sfp, int pack_id)
+sg_get_rq_mark(Sg_fd * sfp, int pack_id, bool *busy)
{
Sg_request *resp;
unsigned long iflags;
+ *busy = false;
write_lock_irqsave(&sfp->rq_list_lock, iflags);
list_for_each_entry(resp, &sfp->rq_list, entry) {
- /* look for requests that are ready + not SG_IO owned */
- if ((1 == resp->done) && (!resp->sg_io_owned) &&
+ /* look for requests that are not SG_IO owned */
+ if ((!resp->sg_io_owned) &&
((-1 == pack_id) || (resp->header.pack_id == pack_id))) {
- resp->done = 2; /* guard against other readers */
- write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
- return resp;
+ switch (resp->done) {
+ case 0: /* request active */
+ *busy = true;
+ break;
+ case 1: /* request done; response ready to return */
+ resp->done = 2; /* guard against other readers */
+ write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
+ return resp;
+ case 2: /* response already being returned */
+ break;
+ }
}
}
write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
@@ -2145,6 +2149,15 @@ sg_remove_request(Sg_fd * sfp, Sg_request * srp)
res = 1;
}
write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
+
+ /*
+ * If the device is detaching, wakeup any readers in case we just
+ * removed the last response, which would leave nothing for them to
+ * return other than -ENODEV.
+ */
+ if (unlikely(atomic_read(&sfp->parentdp->detaching)))
+ wake_up_interruptible_all(&sfp->read_wait);
+
return res;
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3455607fd7be10b449f5135c00dc306b85dc0d21 Mon Sep 17 00:00:00 2001
From: Tony Battersby <tonyb(a)cybernetics.com>
Date: Mon, 11 Jul 2022 10:51:32 -0400
Subject: [PATCH] scsi: sg: Allow waiting for commands to complete on removed
device
When a SCSI device is removed while in active use, currently sg will
immediately return -ENODEV on any attempt to wait for active commands that
were sent before the removal. This is problematic for commands that use
SG_FLAG_DIRECT_IO since the data buffer may still be in use by the kernel
when userspace frees or reuses it after getting ENODEV, leading to
corrupted userspace memory (in the case of READ-type commands) or corrupted
data being sent to the device (in the case of WRITE-type commands). This
has been seen in practice when logging out of a iscsi_tcp session, where
the iSCSI driver may still be processing commands after the device has been
marked for removal.
Change the policy to allow userspace to wait for active sg commands even
when the device is being removed. Return -ENODEV only when there are no
more responses to read.
Link: https://lore.kernel.org/r/5ebea46f-fe83-2d0b-233d-d0dcb362dd0a@cybernetics.…
Cc: <stable(a)vger.kernel.org>
Acked-by: Douglas Gilbert <dgilbert(a)interlog.com>
Signed-off-by: Tony Battersby <tonyb(a)cybernetics.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 118c7b4a8af2..340b050ad28d 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -195,7 +195,7 @@ static void sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size);
static void sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp);
static Sg_fd *sg_add_sfp(Sg_device * sdp);
static void sg_remove_sfp(struct kref *);
-static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id);
+static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id, bool *busy);
static Sg_request *sg_add_request(Sg_fd * sfp);
static int sg_remove_request(Sg_fd * sfp, Sg_request * srp);
static Sg_device *sg_get_dev(int dev);
@@ -444,6 +444,7 @@ sg_read(struct file *filp, char __user *buf, size_t count, loff_t * ppos)
Sg_fd *sfp;
Sg_request *srp;
int req_pack_id = -1;
+ bool busy;
sg_io_hdr_t *hp;
struct sg_header *old_hdr;
int retval;
@@ -466,20 +467,16 @@ sg_read(struct file *filp, char __user *buf, size_t count, loff_t * ppos)
if (retval)
return retval;
- srp = sg_get_rq_mark(sfp, req_pack_id);
+ srp = sg_get_rq_mark(sfp, req_pack_id, &busy);
if (!srp) { /* now wait on packet to arrive */
- if (atomic_read(&sdp->detaching))
- return -ENODEV;
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
retval = wait_event_interruptible(sfp->read_wait,
- (atomic_read(&sdp->detaching) ||
- (srp = sg_get_rq_mark(sfp, req_pack_id))));
- if (atomic_read(&sdp->detaching))
- return -ENODEV;
- if (retval)
- /* -ERESTARTSYS as signal hit process */
- return retval;
+ ((srp = sg_get_rq_mark(sfp, req_pack_id, &busy)) ||
+ (!busy && atomic_read(&sdp->detaching))));
+ if (!srp)
+ /* signal or detaching */
+ return retval ? retval : -ENODEV;
}
if (srp->header.interface_id != '\0')
return sg_new_read(sfp, buf, count, srp);
@@ -940,9 +937,7 @@ sg_ioctl_common(struct file *filp, Sg_device *sdp, Sg_fd *sfp,
if (result < 0)
return result;
result = wait_event_interruptible(sfp->read_wait,
- (srp_done(sfp, srp) || atomic_read(&sdp->detaching)));
- if (atomic_read(&sdp->detaching))
- return -ENODEV;
+ srp_done(sfp, srp));
write_lock_irq(&sfp->rq_list_lock);
if (srp->done) {
srp->done = 2;
@@ -2079,19 +2074,28 @@ sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp)
}
static Sg_request *
-sg_get_rq_mark(Sg_fd * sfp, int pack_id)
+sg_get_rq_mark(Sg_fd * sfp, int pack_id, bool *busy)
{
Sg_request *resp;
unsigned long iflags;
+ *busy = false;
write_lock_irqsave(&sfp->rq_list_lock, iflags);
list_for_each_entry(resp, &sfp->rq_list, entry) {
- /* look for requests that are ready + not SG_IO owned */
- if ((1 == resp->done) && (!resp->sg_io_owned) &&
+ /* look for requests that are not SG_IO owned */
+ if ((!resp->sg_io_owned) &&
((-1 == pack_id) || (resp->header.pack_id == pack_id))) {
- resp->done = 2; /* guard against other readers */
- write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
- return resp;
+ switch (resp->done) {
+ case 0: /* request active */
+ *busy = true;
+ break;
+ case 1: /* request done; response ready to return */
+ resp->done = 2; /* guard against other readers */
+ write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
+ return resp;
+ case 2: /* response already being returned */
+ break;
+ }
}
}
write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
@@ -2145,6 +2149,15 @@ sg_remove_request(Sg_fd * sfp, Sg_request * srp)
res = 1;
}
write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
+
+ /*
+ * If the device is detaching, wakeup any readers in case we just
+ * removed the last response, which would leave nothing for them to
+ * return other than -ENODEV.
+ */
+ if (unlikely(atomic_read(&sfp->parentdp->detaching)))
+ wake_up_interruptible_all(&sfp->read_wait);
+
return res;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3455607fd7be10b449f5135c00dc306b85dc0d21 Mon Sep 17 00:00:00 2001
From: Tony Battersby <tonyb(a)cybernetics.com>
Date: Mon, 11 Jul 2022 10:51:32 -0400
Subject: [PATCH] scsi: sg: Allow waiting for commands to complete on removed
device
When a SCSI device is removed while in active use, currently sg will
immediately return -ENODEV on any attempt to wait for active commands that
were sent before the removal. This is problematic for commands that use
SG_FLAG_DIRECT_IO since the data buffer may still be in use by the kernel
when userspace frees or reuses it after getting ENODEV, leading to
corrupted userspace memory (in the case of READ-type commands) or corrupted
data being sent to the device (in the case of WRITE-type commands). This
has been seen in practice when logging out of a iscsi_tcp session, where
the iSCSI driver may still be processing commands after the device has been
marked for removal.
Change the policy to allow userspace to wait for active sg commands even
when the device is being removed. Return -ENODEV only when there are no
more responses to read.
Link: https://lore.kernel.org/r/5ebea46f-fe83-2d0b-233d-d0dcb362dd0a@cybernetics.…
Cc: <stable(a)vger.kernel.org>
Acked-by: Douglas Gilbert <dgilbert(a)interlog.com>
Signed-off-by: Tony Battersby <tonyb(a)cybernetics.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 118c7b4a8af2..340b050ad28d 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -195,7 +195,7 @@ static void sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size);
static void sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp);
static Sg_fd *sg_add_sfp(Sg_device * sdp);
static void sg_remove_sfp(struct kref *);
-static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id);
+static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id, bool *busy);
static Sg_request *sg_add_request(Sg_fd * sfp);
static int sg_remove_request(Sg_fd * sfp, Sg_request * srp);
static Sg_device *sg_get_dev(int dev);
@@ -444,6 +444,7 @@ sg_read(struct file *filp, char __user *buf, size_t count, loff_t * ppos)
Sg_fd *sfp;
Sg_request *srp;
int req_pack_id = -1;
+ bool busy;
sg_io_hdr_t *hp;
struct sg_header *old_hdr;
int retval;
@@ -466,20 +467,16 @@ sg_read(struct file *filp, char __user *buf, size_t count, loff_t * ppos)
if (retval)
return retval;
- srp = sg_get_rq_mark(sfp, req_pack_id);
+ srp = sg_get_rq_mark(sfp, req_pack_id, &busy);
if (!srp) { /* now wait on packet to arrive */
- if (atomic_read(&sdp->detaching))
- return -ENODEV;
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
retval = wait_event_interruptible(sfp->read_wait,
- (atomic_read(&sdp->detaching) ||
- (srp = sg_get_rq_mark(sfp, req_pack_id))));
- if (atomic_read(&sdp->detaching))
- return -ENODEV;
- if (retval)
- /* -ERESTARTSYS as signal hit process */
- return retval;
+ ((srp = sg_get_rq_mark(sfp, req_pack_id, &busy)) ||
+ (!busy && atomic_read(&sdp->detaching))));
+ if (!srp)
+ /* signal or detaching */
+ return retval ? retval : -ENODEV;
}
if (srp->header.interface_id != '\0')
return sg_new_read(sfp, buf, count, srp);
@@ -940,9 +937,7 @@ sg_ioctl_common(struct file *filp, Sg_device *sdp, Sg_fd *sfp,
if (result < 0)
return result;
result = wait_event_interruptible(sfp->read_wait,
- (srp_done(sfp, srp) || atomic_read(&sdp->detaching)));
- if (atomic_read(&sdp->detaching))
- return -ENODEV;
+ srp_done(sfp, srp));
write_lock_irq(&sfp->rq_list_lock);
if (srp->done) {
srp->done = 2;
@@ -2079,19 +2074,28 @@ sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp)
}
static Sg_request *
-sg_get_rq_mark(Sg_fd * sfp, int pack_id)
+sg_get_rq_mark(Sg_fd * sfp, int pack_id, bool *busy)
{
Sg_request *resp;
unsigned long iflags;
+ *busy = false;
write_lock_irqsave(&sfp->rq_list_lock, iflags);
list_for_each_entry(resp, &sfp->rq_list, entry) {
- /* look for requests that are ready + not SG_IO owned */
- if ((1 == resp->done) && (!resp->sg_io_owned) &&
+ /* look for requests that are not SG_IO owned */
+ if ((!resp->sg_io_owned) &&
((-1 == pack_id) || (resp->header.pack_id == pack_id))) {
- resp->done = 2; /* guard against other readers */
- write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
- return resp;
+ switch (resp->done) {
+ case 0: /* request active */
+ *busy = true;
+ break;
+ case 1: /* request done; response ready to return */
+ resp->done = 2; /* guard against other readers */
+ write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
+ return resp;
+ case 2: /* response already being returned */
+ break;
+ }
}
}
write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
@@ -2145,6 +2149,15 @@ sg_remove_request(Sg_fd * sfp, Sg_request * srp)
res = 1;
}
write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
+
+ /*
+ * If the device is detaching, wakeup any readers in case we just
+ * removed the last response, which would leave nothing for them to
+ * return other than -ENODEV.
+ */
+ if (unlikely(atomic_read(&sfp->parentdp->detaching)))
+ wake_up_interruptible_all(&sfp->read_wait);
+
return res;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0948a9c5386095baae4012190a6b65aba684a907 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021(a)gmail.com>
Date: Fri, 1 Jul 2022 14:14:17 -0700
Subject: [PATCH] scsi: lpfc: Remove extra atomic_inc on cmd_pending in
queuecommand after VMID
VMID introduced an extra increment of cmd_pending, causing double-counting
of the I/O. The normal increment ios performed in lpfc_get_scsi_buf.
Link: https://lore.kernel.org/r/20220701211425.2708-5-jsmart2021@gmail.com
Fixes: 33c79741deaf ("scsi: lpfc: vmid: Introduce VMID in I/O path")
Cc: <stable(a)vger.kernel.org> # v5.14+
Co-developed-by: Justin Tee <justin.tee(a)broadcom.com>
Signed-off-by: Justin Tee <justin.tee(a)broadcom.com>
Signed-off-by: James Smart <jsmart2021(a)gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index ba5e4016262e..084c0f9fdc3a 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -5456,7 +5456,6 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
cur_iocbq->cmd_flag |= LPFC_IO_VMID;
}
}
- atomic_inc(&ndlp->cmd_pending);
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
if (unlikely(phba->hdwqstat_on & LPFC_CHECK_SCSI_IO))
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0b9ba6135d7f18b82f3d8bebb55ded725ba88e0e Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason(a)zx2c4.com>
Date: Wed, 13 Jul 2022 01:12:21 +0200
Subject: [PATCH] um: seed rng using host OS rng
UML generally does not provide access to special CPU instructions like
RDRAND, and execution tends to be rather deterministic, with no real
hardware interrupts, making good randomness really very hard, if not
all together impossible. Not only is this a security eyebrow raiser, but
it's also quite annoying when trying to do various pieces of UML-based
automation that takes a long time to boot, if ever.
Fix this by trivially calling getrandom() in the host and using that
seed as "bootloader randomness", which initializes the rng immediately
at UML boot.
The old behavior can be restored the same way as on any other arch, by
way of CONFIG_TRUST_BOOTLOADER_RANDOMNESS=n or
random.trust_bootloader=0. So seen from that perspective, this just
makes UML act like other archs, which is positive in its own right.
Additionally, wire up arch_get_random_{int,long}() in the same way, so
that reseeds can also make use of the host RNG, controllable by
CONFIG_TRUST_CPU_RANDOMNESS and random.trust_cpu, per usual.
Cc: stable(a)vger.kernel.org
Acked-by: Johannes Berg <johannes(a)sipsolutions.net>
Acked-By: Anton Ivanov <anton.ivanov(a)cambridgegreys.com>
Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
diff --git a/arch/um/include/asm/archrandom.h b/arch/um/include/asm/archrandom.h
new file mode 100644
index 000000000000..2f24cb96391d
--- /dev/null
+++ b/arch/um/include/asm/archrandom.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_UM_ARCHRANDOM_H__
+#define __ASM_UM_ARCHRANDOM_H__
+
+#include <linux/types.h>
+
+/* This is from <os.h>, but better not to #include that in a global header here. */
+ssize_t os_getrandom(void *buf, size_t len, unsigned int flags);
+
+static inline bool __must_check arch_get_random_long(unsigned long *v)
+{
+ return os_getrandom(v, sizeof(*v), 0) == sizeof(*v);
+}
+
+static inline bool __must_check arch_get_random_int(unsigned int *v)
+{
+ return os_getrandom(v, sizeof(*v), 0) == sizeof(*v);
+}
+
+static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
+{
+ return false;
+}
+
+static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
+{
+ return false;
+}
+
+#endif
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index fafde1d5416e..0df646c6651e 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -11,6 +11,12 @@
#include <irq_user.h>
#include <longjmp.h>
#include <mm_id.h>
+/* This is to get size_t */
+#ifndef __UM_HOST__
+#include <linux/types.h>
+#else
+#include <sys/types.h>
+#endif
#define CATCH_EINTR(expr) while ((errno = 0, ((expr) < 0)) && (errno == EINTR))
@@ -243,6 +249,7 @@ extern void stack_protections(unsigned long address);
extern int raw(int fd);
extern void setup_machinename(char *machine_out);
extern void setup_hostinfo(char *buf, int len);
+extern ssize_t os_getrandom(void *buf, size_t len, unsigned int flags);
extern void os_dump_core(void) __attribute__ ((noreturn));
extern void um_early_printk(const char *s, unsigned int n);
extern void os_fix_helper_signals(void);
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 0760e24f2eba..74f3efd96bd4 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -16,6 +16,7 @@
#include <linux/sched/task.h>
#include <linux/kmsg_dump.h>
#include <linux/suspend.h>
+#include <linux/random.h>
#include <asm/processor.h>
#include <asm/cpufeature.h>
@@ -406,6 +407,8 @@ int __init __weak read_initrd(void)
void __init setup_arch(char **cmdline_p)
{
+ u8 rng_seed[32];
+
stack_protections((unsigned long) &init_thread_info);
setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem);
mem_total_pages(physmem_size, iomem_size, highmem);
@@ -416,6 +419,11 @@ void __init setup_arch(char **cmdline_p)
strlcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
*cmdline_p = command_line;
setup_hostinfo(host_info, sizeof host_info);
+
+ if (os_getrandom(rng_seed, sizeof(rng_seed), 0) == sizeof(rng_seed)) {
+ add_bootloader_randomness(rng_seed, sizeof(rng_seed));
+ memzero_explicit(rng_seed, sizeof(rng_seed));
+ }
}
void __init check_bugs(void)
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index 41297ec404bf..fc0f2a9dee5a 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -14,6 +14,7 @@
#include <sys/wait.h>
#include <sys/mman.h>
#include <sys/utsname.h>
+#include <sys/random.h>
#include <init.h>
#include <os.h>
@@ -96,6 +97,11 @@ static inline void __attribute__ ((noreturn)) uml_abort(void)
exit(127);
}
+ssize_t os_getrandom(void *buf, size_t len, unsigned int flags)
+{
+ return getrandom(buf, len, flags);
+}
+
/*
* UML helper threads must not handle SIGWINCH/INT/TERM
*/
The patch below does not apply to the 5.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7ec4cdb321738d44ae5d405e7b6ac73dfbf99caa Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
Date: Mon, 25 Jul 2022 22:49:25 +0900
Subject: [PATCH] mtd: core: check partition before dereference
syzbot is reporting NULL pointer dereference at mtd_check_of_node() [1],
for mtdram test device (CONFIG_MTD_MTDRAM) is not partition.
Link: https://syzkaller.appspot.com/bug?extid=fe013f55a2814a9e8cfd [1]
Reported-by: syzbot <syzbot+fe013f55a2814a9e8cfd(a)syzkaller.appspotmail.com>
Reported-by: kernel test robot <oliver.sang(a)intel.com>
Fixes: ad9b10d1eaada169 ("mtd: core: introduce of support for dynamic partitions")
Signed-off-by: Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
CC: stable(a)vger.kernel.org
Signed-off-by: Richard Weinberger <richard(a)nod.at>
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 6fafea80fd98..a9b8be9f40dc 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -559,6 +559,8 @@ static void mtd_check_of_node(struct mtd_info *mtd)
return;
/* Check if a partitions node exist */
+ if (!mtd_is_partition(mtd))
+ return;
parent = mtd->parent;
parent_dn = dev_of_node(&parent->dev);
if (!parent_dn)
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e16eceea863b417fd328588b1be1a79de0bc937f Mon Sep 17 00:00:00 2001
From: Olga Kitaina <okitain(a)gmail.com>
Date: Tue, 28 Jun 2022 21:18:24 +0530
Subject: [PATCH] mtd: rawnand: arasan: Fix clock rate in NV-DDR
According to the Arasan NAND controller spec, the flash clock rate for SDR
must be <= 100 MHz, while for NV-DDR it must be the same as the rate of the
CLK line for the mode. The driver previously always set 100 MHz for NV-DDR,
which would result in incorrect behavior for NV-DDR modes 0-4.
The appropriate clock rate can be calculated from the NV-DDR timing
parameters as 1/tCK, or for rates measured in picoseconds,
10^12 / nand_nvddr_timings->tCK_min.
Fixes: 197b88fecc50 ("mtd: rawnand: arasan: Add new Arasan NAND controller")
CC: stable(a)vger.kernel.org # 5.8+
Signed-off-by: Olga Kitaina <okitain(a)gmail.com>
Signed-off-by: Amit Kumar Mahapatra <amit.kumar-mahapatra(a)xilinx.com>
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20220628154824.12222-3-amit.kumar-mahapat…
diff --git a/drivers/mtd/nand/raw/arasan-nand-controller.c b/drivers/mtd/nand/raw/arasan-nand-controller.c
index c5264fa223c4..296fb16c8dc3 100644
--- a/drivers/mtd/nand/raw/arasan-nand-controller.c
+++ b/drivers/mtd/nand/raw/arasan-nand-controller.c
@@ -1043,7 +1043,13 @@ static int anfc_setup_interface(struct nand_chip *chip, int target,
DQS_BUFF_SEL_OUT(dqs_mode);
}
- anand->clk = ANFC_XLNX_SDR_DFLT_CORE_CLK;
+ if (nand_interface_is_sdr(conf)) {
+ anand->clk = ANFC_XLNX_SDR_DFLT_CORE_CLK;
+ } else {
+ /* ONFI timings are defined in picoseconds */
+ anand->clk = div_u64((u64)NSEC_PER_SEC * 1000,
+ conf->timings.nvddr.tCK_min);
+ }
/*
* Due to a hardware bug in the ZynqMP SoC, SDR timing modes 0-1 work
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From dc4d31684974d140250f3ee612c3f0cab13b3146 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu(a)suse.com>
Date: Tue, 7 Jun 2022 19:48:24 +0800
Subject: [PATCH] btrfs: reject log replay if there is unsupported RO compat
flag
[BUG]
If we have a btrfs image with dirty log, along with an unsupported RO
compatible flag:
log_root 30474240
...
compat_flags 0x0
compat_ro_flags 0x40000003
( FREE_SPACE_TREE |
FREE_SPACE_TREE_VALID |
unknown flag: 0x40000000 )
Then even if we can only mount it RO, we will still cause metadata
update for log replay:
BTRFS info (device dm-1): flagging fs with big metadata feature
BTRFS info (device dm-1): using free space tree
BTRFS info (device dm-1): has skinny extents
BTRFS info (device dm-1): start tree-log replay
This is definitely against RO compact flag requirement.
[CAUSE]
RO compact flag only forces us to do RO mount, but we will still do log
replay for plain RO mount.
Thus this will result us to do log replay and update metadata.
This can be very problematic for new RO compat flag, for example older
kernel can not understand v2 cache, and if we allow metadata update on
RO mount and invalidate/corrupt v2 cache.
[FIX]
Just reject the mount unless rescue=nologreplay is provided:
BTRFS error (device dm-1): cannot replay dirty log with unsupport optional features (0x40000000), try rescue=nologreplay instead
We don't want to set rescue=nologreply directly, as this would make the
end user to read the old data, and cause confusion.
Since the such case is really rare, we're mostly fine to just reject the
mount with an error message, which also includes the proper workaround.
CC: stable(a)vger.kernel.org #4.9+
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index ed1d92b370db..32b88a227734 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3556,6 +3556,20 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
err = -EINVAL;
goto fail_alloc;
}
+ /*
+ * We have unsupported RO compat features, although RO mounted, we
+ * should not cause any metadata write, including log replay.
+ * Or we could screw up whatever the new feature requires.
+ */
+ if (unlikely(features && btrfs_super_log_root(disk_super) &&
+ !btrfs_test_opt(fs_info, NOLOGREPLAY))) {
+ btrfs_err(fs_info,
+"cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay",
+ features);
+ err = -EINVAL;
+ goto fail_alloc;
+ }
+
if (sectorsize < PAGE_SIZE) {
struct btrfs_subpage_info *subpage_info;
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From dc4d31684974d140250f3ee612c3f0cab13b3146 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu(a)suse.com>
Date: Tue, 7 Jun 2022 19:48:24 +0800
Subject: [PATCH] btrfs: reject log replay if there is unsupported RO compat
flag
[BUG]
If we have a btrfs image with dirty log, along with an unsupported RO
compatible flag:
log_root 30474240
...
compat_flags 0x0
compat_ro_flags 0x40000003
( FREE_SPACE_TREE |
FREE_SPACE_TREE_VALID |
unknown flag: 0x40000000 )
Then even if we can only mount it RO, we will still cause metadata
update for log replay:
BTRFS info (device dm-1): flagging fs with big metadata feature
BTRFS info (device dm-1): using free space tree
BTRFS info (device dm-1): has skinny extents
BTRFS info (device dm-1): start tree-log replay
This is definitely against RO compact flag requirement.
[CAUSE]
RO compact flag only forces us to do RO mount, but we will still do log
replay for plain RO mount.
Thus this will result us to do log replay and update metadata.
This can be very problematic for new RO compat flag, for example older
kernel can not understand v2 cache, and if we allow metadata update on
RO mount and invalidate/corrupt v2 cache.
[FIX]
Just reject the mount unless rescue=nologreplay is provided:
BTRFS error (device dm-1): cannot replay dirty log with unsupport optional features (0x40000000), try rescue=nologreplay instead
We don't want to set rescue=nologreply directly, as this would make the
end user to read the old data, and cause confusion.
Since the such case is really rare, we're mostly fine to just reject the
mount with an error message, which also includes the proper workaround.
CC: stable(a)vger.kernel.org #4.9+
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index ed1d92b370db..32b88a227734 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3556,6 +3556,20 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
err = -EINVAL;
goto fail_alloc;
}
+ /*
+ * We have unsupported RO compat features, although RO mounted, we
+ * should not cause any metadata write, including log replay.
+ * Or we could screw up whatever the new feature requires.
+ */
+ if (unlikely(features && btrfs_super_log_root(disk_super) &&
+ !btrfs_test_opt(fs_info, NOLOGREPLAY))) {
+ btrfs_err(fs_info,
+"cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay",
+ features);
+ err = -EINVAL;
+ goto fail_alloc;
+ }
+
if (sectorsize < PAGE_SIZE) {
struct btrfs_subpage_info *subpage_info;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From dc4d31684974d140250f3ee612c3f0cab13b3146 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu(a)suse.com>
Date: Tue, 7 Jun 2022 19:48:24 +0800
Subject: [PATCH] btrfs: reject log replay if there is unsupported RO compat
flag
[BUG]
If we have a btrfs image with dirty log, along with an unsupported RO
compatible flag:
log_root 30474240
...
compat_flags 0x0
compat_ro_flags 0x40000003
( FREE_SPACE_TREE |
FREE_SPACE_TREE_VALID |
unknown flag: 0x40000000 )
Then even if we can only mount it RO, we will still cause metadata
update for log replay:
BTRFS info (device dm-1): flagging fs with big metadata feature
BTRFS info (device dm-1): using free space tree
BTRFS info (device dm-1): has skinny extents
BTRFS info (device dm-1): start tree-log replay
This is definitely against RO compact flag requirement.
[CAUSE]
RO compact flag only forces us to do RO mount, but we will still do log
replay for plain RO mount.
Thus this will result us to do log replay and update metadata.
This can be very problematic for new RO compat flag, for example older
kernel can not understand v2 cache, and if we allow metadata update on
RO mount and invalidate/corrupt v2 cache.
[FIX]
Just reject the mount unless rescue=nologreplay is provided:
BTRFS error (device dm-1): cannot replay dirty log with unsupport optional features (0x40000000), try rescue=nologreplay instead
We don't want to set rescue=nologreply directly, as this would make the
end user to read the old data, and cause confusion.
Since the such case is really rare, we're mostly fine to just reject the
mount with an error message, which also includes the proper workaround.
CC: stable(a)vger.kernel.org #4.9+
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index ed1d92b370db..32b88a227734 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3556,6 +3556,20 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
err = -EINVAL;
goto fail_alloc;
}
+ /*
+ * We have unsupported RO compat features, although RO mounted, we
+ * should not cause any metadata write, including log replay.
+ * Or we could screw up whatever the new feature requires.
+ */
+ if (unlikely(features && btrfs_super_log_root(disk_super) &&
+ !btrfs_test_opt(fs_info, NOLOGREPLAY))) {
+ btrfs_err(fs_info,
+"cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay",
+ features);
+ err = -EINVAL;
+ goto fail_alloc;
+ }
+
if (sectorsize < PAGE_SIZE) {
struct btrfs_subpage_info *subpage_info;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From dc4d31684974d140250f3ee612c3f0cab13b3146 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu(a)suse.com>
Date: Tue, 7 Jun 2022 19:48:24 +0800
Subject: [PATCH] btrfs: reject log replay if there is unsupported RO compat
flag
[BUG]
If we have a btrfs image with dirty log, along with an unsupported RO
compatible flag:
log_root 30474240
...
compat_flags 0x0
compat_ro_flags 0x40000003
( FREE_SPACE_TREE |
FREE_SPACE_TREE_VALID |
unknown flag: 0x40000000 )
Then even if we can only mount it RO, we will still cause metadata
update for log replay:
BTRFS info (device dm-1): flagging fs with big metadata feature
BTRFS info (device dm-1): using free space tree
BTRFS info (device dm-1): has skinny extents
BTRFS info (device dm-1): start tree-log replay
This is definitely against RO compact flag requirement.
[CAUSE]
RO compact flag only forces us to do RO mount, but we will still do log
replay for plain RO mount.
Thus this will result us to do log replay and update metadata.
This can be very problematic for new RO compat flag, for example older
kernel can not understand v2 cache, and if we allow metadata update on
RO mount and invalidate/corrupt v2 cache.
[FIX]
Just reject the mount unless rescue=nologreplay is provided:
BTRFS error (device dm-1): cannot replay dirty log with unsupport optional features (0x40000000), try rescue=nologreplay instead
We don't want to set rescue=nologreply directly, as this would make the
end user to read the old data, and cause confusion.
Since the such case is really rare, we're mostly fine to just reject the
mount with an error message, which also includes the proper workaround.
CC: stable(a)vger.kernel.org #4.9+
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index ed1d92b370db..32b88a227734 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3556,6 +3556,20 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
err = -EINVAL;
goto fail_alloc;
}
+ /*
+ * We have unsupported RO compat features, although RO mounted, we
+ * should not cause any metadata write, including log replay.
+ * Or we could screw up whatever the new feature requires.
+ */
+ if (unlikely(features && btrfs_super_log_root(disk_super) &&
+ !btrfs_test_opt(fs_info, NOLOGREPLAY))) {
+ btrfs_err(fs_info,
+"cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay",
+ features);
+ err = -EINVAL;
+ goto fail_alloc;
+ }
+
if (sectorsize < PAGE_SIZE) {
struct btrfs_subpage_info *subpage_info;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From dc4d31684974d140250f3ee612c3f0cab13b3146 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu(a)suse.com>
Date: Tue, 7 Jun 2022 19:48:24 +0800
Subject: [PATCH] btrfs: reject log replay if there is unsupported RO compat
flag
[BUG]
If we have a btrfs image with dirty log, along with an unsupported RO
compatible flag:
log_root 30474240
...
compat_flags 0x0
compat_ro_flags 0x40000003
( FREE_SPACE_TREE |
FREE_SPACE_TREE_VALID |
unknown flag: 0x40000000 )
Then even if we can only mount it RO, we will still cause metadata
update for log replay:
BTRFS info (device dm-1): flagging fs with big metadata feature
BTRFS info (device dm-1): using free space tree
BTRFS info (device dm-1): has skinny extents
BTRFS info (device dm-1): start tree-log replay
This is definitely against RO compact flag requirement.
[CAUSE]
RO compact flag only forces us to do RO mount, but we will still do log
replay for plain RO mount.
Thus this will result us to do log replay and update metadata.
This can be very problematic for new RO compat flag, for example older
kernel can not understand v2 cache, and if we allow metadata update on
RO mount and invalidate/corrupt v2 cache.
[FIX]
Just reject the mount unless rescue=nologreplay is provided:
BTRFS error (device dm-1): cannot replay dirty log with unsupport optional features (0x40000000), try rescue=nologreplay instead
We don't want to set rescue=nologreply directly, as this would make the
end user to read the old data, and cause confusion.
Since the such case is really rare, we're mostly fine to just reject the
mount with an error message, which also includes the proper workaround.
CC: stable(a)vger.kernel.org #4.9+
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index ed1d92b370db..32b88a227734 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3556,6 +3556,20 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
err = -EINVAL;
goto fail_alloc;
}
+ /*
+ * We have unsupported RO compat features, although RO mounted, we
+ * should not cause any metadata write, including log replay.
+ * Or we could screw up whatever the new feature requires.
+ */
+ if (unlikely(features && btrfs_super_log_root(disk_super) &&
+ !btrfs_test_opt(fs_info, NOLOGREPLAY))) {
+ btrfs_err(fs_info,
+"cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay",
+ features);
+ err = -EINVAL;
+ goto fail_alloc;
+ }
+
if (sectorsize < PAGE_SIZE) {
struct btrfs_subpage_info *subpage_info;
From: Daniel Borkmann <daniel(a)iogearbox.net>
[ Upstream commit 5ccb071e97fbd9ffe623a0d3977cc6d013bee93c ]
Commit aaac3ba95e4c ("bpf: charge user for creation of BPF maps and
programs") made a wrong assumption of charging against prog->pages.
Unlike map->pages, prog->pages are still subject to change when we
need to expand the program through bpf_prog_realloc().
This can for example happen during verification stage when we need to
expand and rewrite parts of the program. Should the required space
cross a page boundary, then prog->pages is not the same anymore as
its original value that we used to bpf_prog_charge_memlock() on. Thus,
we'll hit a wrap-around during bpf_prog_uncharge_memlock() when prog
is freed eventually. I noticed this that despite having unlimited
memlock, programs suddenly refused to load with EPERM error due to
insufficient memlock.
There are two ways to fix this issue. One would be to add a cached
variable to struct bpf_prog that takes a snapshot of prog->pages at the
time of charging. The other approach is to also account for resizes. I
chose to go with the latter for a couple of reasons: i) We want accounting
rather to be more accurate instead of further fooling limits, ii) adding
yet another page counter on struct bpf_prog would also be a waste just
for this purpose. We also do want to charge as early as possible to
avoid going into the verifier just to find out later on that we crossed
limits. The only place that needs to be fixed is bpf_prog_realloc(),
since only here we expand the program, so we try to account for the
needed delta and should we fail, call-sites check for outcome anyway.
On cBPF to eBPF migrations, we don't grab a reference to the user as
they are charged differently. With that in place, my test case worked
fine.
Fixes: aaac3ba95e4c ("bpf: charge user for creation of BPF maps and programs")
Signed-off-by: Daniel Borkmann <daniel(a)iogearbox.net>
Acked-by: Alexei Starovoitov <ast(a)kernel.org>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
[Quentin: backport to 4.9: Adjust context in bpf.h ]
Signed-off-by: Quentin Monnet <quentin(a)isovalent.com>
---
This fix was merged in Linux 4.10 but never backported to 4.9. The
overflow has been occurring regularly when running Cilium's CI tests on
kernel 4.9, so I would like to submit this patch for consideration to
the 4.9 stable branch.
The initial patch applied with a minor conflict on include/linux/bpf.h,
due to unprivileged_ebpf_enabled() backported in 6481835a9a5b
("x86/speculation: Include unprivileged eBPF status in Spectre v2
mitigation reporting")
---
include/linux/bpf.h | 11 +++++++++++
kernel/bpf/core.c | 16 +++++++++++++---
kernel/bpf/syscall.c | 36 ++++++++++++++++++++++++++++--------
3 files changed, 52 insertions(+), 11 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index fe520d40597f..7a1e6d3d0fd9 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -246,6 +246,8 @@ struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type);
struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i);
struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog);
void bpf_prog_put(struct bpf_prog *prog);
+int __bpf_prog_charge(struct user_struct *user, u32 pages);
+void __bpf_prog_uncharge(struct user_struct *user, u32 pages);
struct bpf_map *bpf_map_get_with_uref(u32 ufd);
struct bpf_map *__bpf_map_get(struct fd f);
@@ -328,6 +330,15 @@ static inline struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
return ERR_PTR(-EOPNOTSUPP);
}
+static inline int __bpf_prog_charge(struct user_struct *user, u32 pages)
+{
+ return 0;
+}
+
+static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
+{
+}
+
static inline bool unprivileged_ebpf_enabled(void)
{
return false;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 9976703f2dbf..5aeadf79e05e 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -107,19 +107,29 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
gfp_extra_flags;
struct bpf_prog *fp;
+ u32 pages, delta;
+ int ret;
BUG_ON(fp_old == NULL);
size = round_up(size, PAGE_SIZE);
- if (size <= fp_old->pages * PAGE_SIZE)
+ pages = size / PAGE_SIZE;
+ if (pages <= fp_old->pages)
return fp_old;
+ delta = pages - fp_old->pages;
+ ret = __bpf_prog_charge(fp_old->aux->user, delta);
+ if (ret)
+ return NULL;
+
fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
- if (fp != NULL) {
+ if (fp == NULL) {
+ __bpf_prog_uncharge(fp_old->aux->user, delta);
+ } else {
kmemcheck_annotate_bitfield(fp, meta);
memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
- fp->pages = size / PAGE_SIZE;
+ fp->pages = pages;
fp->aux->prog = fp;
/* We keep fp->aux from fp_old around in the new
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index e30ad1be6841..e0d4e210b1a1 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -581,19 +581,39 @@ static void free_used_maps(struct bpf_prog_aux *aux)
kfree(aux->used_maps);
}
+int __bpf_prog_charge(struct user_struct *user, u32 pages)
+{
+ unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ unsigned long user_bufs;
+
+ if (user) {
+ user_bufs = atomic_long_add_return(pages, &user->locked_vm);
+ if (user_bufs > memlock_limit) {
+ atomic_long_sub(pages, &user->locked_vm);
+ return -EPERM;
+ }
+ }
+
+ return 0;
+}
+
+void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
+{
+ if (user)
+ atomic_long_sub(pages, &user->locked_vm);
+}
+
static int bpf_prog_charge_memlock(struct bpf_prog *prog)
{
struct user_struct *user = get_current_user();
- unsigned long memlock_limit;
+ int ret;
- memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
- atomic_long_add(prog->pages, &user->locked_vm);
- if (atomic_long_read(&user->locked_vm) > memlock_limit) {
- atomic_long_sub(prog->pages, &user->locked_vm);
+ ret = __bpf_prog_charge(user, prog->pages);
+ if (ret) {
free_uid(user);
- return -EPERM;
+ return ret;
}
+
prog->aux->user = user;
return 0;
}
@@ -602,7 +622,7 @@ static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
{
struct user_struct *user = prog->aux->user;
- atomic_long_sub(prog->pages, &user->locked_vm);
+ __bpf_prog_uncharge(user, prog->pages);
free_uid(user);
}
--
2.25.1
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4c46091ee985ae84c60c5e95055d779fcd291d87 Mon Sep 17 00:00:00 2001
From: Tadeusz Struk <tadeusz.struk(a)linaro.org>
Date: Tue, 17 May 2022 11:04:20 -0700
Subject: [PATCH] bpf: Fix KASAN use-after-free Read in compute_effective_progs
Syzbot found a Use After Free bug in compute_effective_progs().
The reproducer creates a number of BPF links, and causes a fault
injected alloc to fail, while calling bpf_link_detach on them.
Link detach triggers the link to be freed by bpf_link_free(),
which calls __cgroup_bpf_detach() and update_effective_progs().
If the memory allocation in this function fails, the function restores
the pointer to the bpf_cgroup_link on the cgroup list, but the memory
gets freed just after it returns. After this, every subsequent call to
update_effective_progs() causes this already deallocated pointer to be
dereferenced in prog_list_length(), and triggers KASAN UAF error.
To fix this issue don't preserve the pointer to the prog or link in the
list, but remove it and replace it with a dummy prog without shrinking
the table. The subsequent call to __cgroup_bpf_detach() or
__cgroup_bpf_detach() will correct it.
Fixes: af6eea57437a ("bpf: Implement bpf_link-based cgroup BPF program attachment")
Reported-by: <syzbot+f264bffdfbd5614f3bb2(a)syzkaller.appspotmail.com>
Signed-off-by: Tadeusz Struk <tadeusz.struk(a)linaro.org>
Signed-off-by: Andrii Nakryiko <andrii(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Link: https://syzkaller.appspot.com/bug?id=8ebf179a95c2a2670f7cf1ba62429ec044369d…
Link: https://lore.kernel.org/bpf/20220517180420.87954-1-tadeusz.struk@linaro.org
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index afb414b26d01..7a394f7c205c 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -720,6 +720,60 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
return ERR_PTR(-ENOENT);
}
+/**
+ * purge_effective_progs() - After compute_effective_progs fails to alloc new
+ * cgrp->bpf.inactive table we can recover by
+ * recomputing the array in place.
+ *
+ * @cgrp: The cgroup which descendants to travers
+ * @prog: A program to detach or NULL
+ * @link: A link to detach or NULL
+ * @atype: Type of detach operation
+ */
+static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog,
+ struct bpf_cgroup_link *link,
+ enum cgroup_bpf_attach_type atype)
+{
+ struct cgroup_subsys_state *css;
+ struct bpf_prog_array *progs;
+ struct bpf_prog_list *pl;
+ struct list_head *head;
+ struct cgroup *cg;
+ int pos;
+
+ /* recompute effective prog array in place */
+ css_for_each_descendant_pre(css, &cgrp->self) {
+ struct cgroup *desc = container_of(css, struct cgroup, self);
+
+ if (percpu_ref_is_zero(&desc->bpf.refcnt))
+ continue;
+
+ /* find position of link or prog in effective progs array */
+ for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {
+ if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
+ continue;
+
+ head = &cg->bpf.progs[atype];
+ list_for_each_entry(pl, head, node) {
+ if (!prog_list_prog(pl))
+ continue;
+ if (pl->prog == prog && pl->link == link)
+ goto found;
+ pos++;
+ }
+ }
+found:
+ BUG_ON(!cg);
+ progs = rcu_dereference_protected(
+ desc->bpf.effective[atype],
+ lockdep_is_held(&cgroup_mutex));
+
+ /* Remove the program from the array */
+ WARN_ONCE(bpf_prog_array_delete_safe_at(progs, pos),
+ "Failed to purge a prog from array at index %d", pos);
+ }
+}
+
/**
* __cgroup_bpf_detach() - Detach the program or link from a cgroup, and
* propagate the change to descendants
@@ -739,7 +793,6 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
struct bpf_prog_list *pl;
struct list_head *progs;
u32 flags;
- int err;
atype = to_cgroup_bpf_attach_type(type);
if (atype < 0)
@@ -761,9 +814,12 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
pl->prog = NULL;
pl->link = NULL;
- err = update_effective_progs(cgrp, atype);
- if (err)
- goto cleanup;
+ if (update_effective_progs(cgrp, atype)) {
+ /* if update effective array failed replace the prog with a dummy prog*/
+ pl->prog = old_prog;
+ pl->link = link;
+ purge_effective_progs(cgrp, old_prog, link, atype);
+ }
/* now can actually delete it from this cgroup list */
list_del(&pl->node);
@@ -775,12 +831,6 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
bpf_prog_put(old_prog);
static_branch_dec(&cgroup_bpf_enabled_key[atype]);
return 0;
-
-cleanup:
- /* restore back prog or link */
- pl->prog = old_prog;
- pl->link = link;
- return err;
}
static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
The new vkms virtual display code is atomic so there is
no need to call drm_helper_disable_unused_functions()
when it is enabled. Doing so can result in a segfault.
When the driver switched from the old virtual display code
to the new atomic virtual display code, it was missed that
we enable virtual display unconditionally under SR-IOV
so the checks here missed that case. Add the missing
check for SR-IOV.
There is no equivalent of this patch for Linus' tree
because the relevant code no longer exists. This patch
is only relevant to kernels 5.15 and 5.16.
Fixes: 84ec374bd580 ("drm/amdgpu: create amdgpu_vkms (v4)")
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org # 5.15.x
Cc: hgoffin(a)amazon.com
---
drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index cd0acbea75da..d58ab9deb028 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -341,7 +341,8 @@ int amdgpu_fbdev_init(struct amdgpu_device *adev)
}
/* disable all the possible outputs/crtcs before entering KMS mode */
- if (!amdgpu_device_has_dc_support(adev) && !amdgpu_virtual_display)
+ if (!amdgpu_device_has_dc_support(adev) && !amdgpu_virtual_display &&
+ !amdgpu_sriov_vf(adev))
drm_helper_disable_unused_functions(adev_to_drm(adev));
drm_fb_helper_initial_config(&rfbdev->helper, bpp_sel);
--
2.35.3
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From cb50813998b5aed924323b1b46471e8c60b26692 Mon Sep 17 00:00:00 2001
From: Mario Kleiner <mario.kleiner.de(a)gmail.com>
Date: Mon, 11 Jul 2022 19:39:28 +0200
Subject: [PATCH] drm/amd/display: Only use depth 36 bpp linebuffers on DCN
display engines.
Various DCE versions had trouble with 36 bpp lb depth, requiring fixes,
last time in commit 353ca0fa5630 ("drm/amd/display: Fix 10bit 4K display
on CIK GPUs") for DCE-8. So far >= DCE-11.2 was considered ok, but now I
found out that on DCE-11.2 it causes dithering when there shouldn't be
any, so identity pixel passthrough with identity gamma LUTs doesn't work
when it should. This breaks various important neuroscience applications,
as reported to me by scientific users of Polaris cards under Ubuntu 22.04
with Linux 5.15, and confirmed by testing it myself on DCE-11.2.
Lets only use depth 36 for DCN engines, where my testing showed that it
is both necessary for high color precision output, e.g., RGBA16 fb's,
and not harmful, as far as more than one year in real-world use showed.
DCE engines seem to work fine for high precision output at 30 bpp, so
this ("famous last words") depth 30 should hopefully fix all known problems
without introducing new ones.
Successfully retested on DCE-11.2 Polaris and DCN-1.0 Raven Ridge on
top of Linux 5.19.0-rc2 + drm-next.
Fixes: 353ca0fa5630 ("drm/amd/display: Fix 10bit 4K display on CIK GPUs")
Signed-off-by: Mario Kleiner <mario.kleiner.de(a)gmail.com>
Tested-by: Mario Kleiner <mario.kleiner.de(a)gmail.com>
Cc: stable(a)vger.kernel.org # 5.14.0
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: Harry Wentland <harry.wentland(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 2a701c583332..e33df231e9d2 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -1156,12 +1156,13 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
* on certain displays, such as the Sharp 4k. 36bpp is needed
* to support SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 and
* SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616 with actual > 10 bpc
- * precision on at least DCN display engines. However, at least
- * Carrizo with DCE_VERSION_11_0 does not like 36 bpp lb depth,
- * so use only 30 bpp on DCE_VERSION_11_0. Testing with DCE 11.2 and 8.3
- * did not show such problems, so this seems to be the exception.
+ * precision on DCN display engines, but apparently not for DCE, as
+ * far as testing on DCE-11.2 and DCE-8 showed. Various DCE parts have
+ * problems: Carrizo with DCE_VERSION_11_0 does not like 36 bpp lb depth,
+ * neither do DCE-8 at 4k resolution, or DCE-11.2 (broken identify pixel
+ * passthrough). Therefore only use 36 bpp on DCN where it is actually needed.
*/
- if (plane_state->ctx->dce_version > DCE_VERSION_11_0)
+ if (plane_state->ctx->dce_version > DCE_VERSION_MAX)
pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_36BPP;
else
pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP;
The patch below does not apply to the 5.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From cb50813998b5aed924323b1b46471e8c60b26692 Mon Sep 17 00:00:00 2001
From: Mario Kleiner <mario.kleiner.de(a)gmail.com>
Date: Mon, 11 Jul 2022 19:39:28 +0200
Subject: [PATCH] drm/amd/display: Only use depth 36 bpp linebuffers on DCN
display engines.
Various DCE versions had trouble with 36 bpp lb depth, requiring fixes,
last time in commit 353ca0fa5630 ("drm/amd/display: Fix 10bit 4K display
on CIK GPUs") for DCE-8. So far >= DCE-11.2 was considered ok, but now I
found out that on DCE-11.2 it causes dithering when there shouldn't be
any, so identity pixel passthrough with identity gamma LUTs doesn't work
when it should. This breaks various important neuroscience applications,
as reported to me by scientific users of Polaris cards under Ubuntu 22.04
with Linux 5.15, and confirmed by testing it myself on DCE-11.2.
Lets only use depth 36 for DCN engines, where my testing showed that it
is both necessary for high color precision output, e.g., RGBA16 fb's,
and not harmful, as far as more than one year in real-world use showed.
DCE engines seem to work fine for high precision output at 30 bpp, so
this ("famous last words") depth 30 should hopefully fix all known problems
without introducing new ones.
Successfully retested on DCE-11.2 Polaris and DCN-1.0 Raven Ridge on
top of Linux 5.19.0-rc2 + drm-next.
Fixes: 353ca0fa5630 ("drm/amd/display: Fix 10bit 4K display on CIK GPUs")
Signed-off-by: Mario Kleiner <mario.kleiner.de(a)gmail.com>
Tested-by: Mario Kleiner <mario.kleiner.de(a)gmail.com>
Cc: stable(a)vger.kernel.org # 5.14.0
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: Harry Wentland <harry.wentland(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 2a701c583332..e33df231e9d2 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -1156,12 +1156,13 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
* on certain displays, such as the Sharp 4k. 36bpp is needed
* to support SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 and
* SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616 with actual > 10 bpc
- * precision on at least DCN display engines. However, at least
- * Carrizo with DCE_VERSION_11_0 does not like 36 bpp lb depth,
- * so use only 30 bpp on DCE_VERSION_11_0. Testing with DCE 11.2 and 8.3
- * did not show such problems, so this seems to be the exception.
+ * precision on DCN display engines, but apparently not for DCE, as
+ * far as testing on DCE-11.2 and DCE-8 showed. Various DCE parts have
+ * problems: Carrizo with DCE_VERSION_11_0 does not like 36 bpp lb depth,
+ * neither do DCE-8 at 4k resolution, or DCE-11.2 (broken identify pixel
+ * passthrough). Therefore only use 36 bpp on DCN where it is actually needed.
*/
- if (plane_state->ctx->dce_version > DCE_VERSION_11_0)
+ if (plane_state->ctx->dce_version > DCE_VERSION_MAX)
pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_36BPP;
else
pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP;
The patch below does not apply to the 5.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From cb50813998b5aed924323b1b46471e8c60b26692 Mon Sep 17 00:00:00 2001
From: Mario Kleiner <mario.kleiner.de(a)gmail.com>
Date: Mon, 11 Jul 2022 19:39:28 +0200
Subject: [PATCH] drm/amd/display: Only use depth 36 bpp linebuffers on DCN
display engines.
Various DCE versions had trouble with 36 bpp lb depth, requiring fixes,
last time in commit 353ca0fa5630 ("drm/amd/display: Fix 10bit 4K display
on CIK GPUs") for DCE-8. So far >= DCE-11.2 was considered ok, but now I
found out that on DCE-11.2 it causes dithering when there shouldn't be
any, so identity pixel passthrough with identity gamma LUTs doesn't work
when it should. This breaks various important neuroscience applications,
as reported to me by scientific users of Polaris cards under Ubuntu 22.04
with Linux 5.15, and confirmed by testing it myself on DCE-11.2.
Lets only use depth 36 for DCN engines, where my testing showed that it
is both necessary for high color precision output, e.g., RGBA16 fb's,
and not harmful, as far as more than one year in real-world use showed.
DCE engines seem to work fine for high precision output at 30 bpp, so
this ("famous last words") depth 30 should hopefully fix all known problems
without introducing new ones.
Successfully retested on DCE-11.2 Polaris and DCN-1.0 Raven Ridge on
top of Linux 5.19.0-rc2 + drm-next.
Fixes: 353ca0fa5630 ("drm/amd/display: Fix 10bit 4K display on CIK GPUs")
Signed-off-by: Mario Kleiner <mario.kleiner.de(a)gmail.com>
Tested-by: Mario Kleiner <mario.kleiner.de(a)gmail.com>
Cc: stable(a)vger.kernel.org # 5.14.0
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: Harry Wentland <harry.wentland(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 2a701c583332..e33df231e9d2 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -1156,12 +1156,13 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
* on certain displays, such as the Sharp 4k. 36bpp is needed
* to support SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 and
* SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616 with actual > 10 bpc
- * precision on at least DCN display engines. However, at least
- * Carrizo with DCE_VERSION_11_0 does not like 36 bpp lb depth,
- * so use only 30 bpp on DCE_VERSION_11_0. Testing with DCE 11.2 and 8.3
- * did not show such problems, so this seems to be the exception.
+ * precision on DCN display engines, but apparently not for DCE, as
+ * far as testing on DCE-11.2 and DCE-8 showed. Various DCE parts have
+ * problems: Carrizo with DCE_VERSION_11_0 does not like 36 bpp lb depth,
+ * neither do DCE-8 at 4k resolution, or DCE-11.2 (broken identify pixel
+ * passthrough). Therefore only use 36 bpp on DCN where it is actually needed.
*/
- if (plane_state->ctx->dce_version > DCE_VERSION_11_0)
+ if (plane_state->ctx->dce_version > DCE_VERSION_MAX)
pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_36BPP;
else
pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c7860cbee9989882d2908682526a5ef617523cfe Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <dmitry.osipenko(a)collabora.com>
Date: Wed, 29 Jun 2022 01:42:39 +0300
Subject: [PATCH] drm/tegra: Fix vmapping of prime buffers
The code assumes that Tegra GEM is permanently vmapped, which is not
true for the scattered buffers. After converting Tegra video decoder
driver to V4L API, we're now getting a BUG_ON from dma-buf core on playing
video using libvdpau-tegra on T30+ because tegra_gem_prime_vmap() sets
vaddr to NULL. Older pre-V4L video decoder driver wasn't vmapping dma-bufs.
Fix it by actually vmapping the exported GEMs.
Cc: stable(a)vger.kernel.org
Signed-off-by: Dmitry Osipenko <dmitry.osipenko(a)collabora.com>
Signed-off-by: Thierry Reding <treding(a)nvidia.com>
diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index 7c7dd84e6db8..81991090adcc 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -704,14 +704,23 @@ static int tegra_gem_prime_vmap(struct dma_buf *buf, struct iosys_map *map)
{
struct drm_gem_object *gem = buf->priv;
struct tegra_bo *bo = to_tegra_bo(gem);
+ void *vaddr;
- iosys_map_set_vaddr(map, bo->vaddr);
+ vaddr = tegra_bo_mmap(&bo->base);
+ if (IS_ERR(vaddr))
+ return PTR_ERR(vaddr);
+
+ iosys_map_set_vaddr(map, vaddr);
return 0;
}
static void tegra_gem_prime_vunmap(struct dma_buf *buf, struct iosys_map *map)
{
+ struct drm_gem_object *gem = buf->priv;
+ struct tegra_bo *bo = to_tegra_bo(gem);
+
+ tegra_bo_munmap(&bo->base, map->vaddr);
}
static const struct dma_buf_ops tegra_gem_prime_dmabuf_ops = {
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 53c26181950ddc3c8ace3c0939c89e9c4d8deeb9 Mon Sep 17 00:00:00 2001
From: Lyude Paul <lyude(a)redhat.com>
Date: Thu, 14 Jul 2022 13:42:33 -0400
Subject: [PATCH] drm/nouveau/acpi: Don't print error when we get -EINPROGRESS
from pm_runtime
Since this isn't actually a failure.
Signed-off-by: Lyude Paul <lyude(a)redhat.com>
Reviewed-by: David Airlie <airlied(a)linux.ie>
Fixes: 79e765ad665d ("drm/nouveau/drm/nouveau: Prevent handling ACPI HPD events too early")
Cc: <stable(a)vger.kernel.org> # v4.19+
Link: https://patchwork.freedesktop.org/patch/msgid/20220714174234.949259-2-lyude…
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 2cd0932b3d68..9f5a45f24e5b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -537,7 +537,7 @@ nouveau_display_acpi_ntfy(struct notifier_block *nb, unsigned long val,
* it's own hotplug events.
*/
pm_runtime_put_autosuspend(drm->dev->dev);
- } else if (ret == 0) {
+ } else if (ret == 0 || ret == -EINPROGRESS) {
/* We've started resuming the GPU already, so
* it will handle scheduling a full reprobe
* itself
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 53c26181950ddc3c8ace3c0939c89e9c4d8deeb9 Mon Sep 17 00:00:00 2001
From: Lyude Paul <lyude(a)redhat.com>
Date: Thu, 14 Jul 2022 13:42:33 -0400
Subject: [PATCH] drm/nouveau/acpi: Don't print error when we get -EINPROGRESS
from pm_runtime
Since this isn't actually a failure.
Signed-off-by: Lyude Paul <lyude(a)redhat.com>
Reviewed-by: David Airlie <airlied(a)linux.ie>
Fixes: 79e765ad665d ("drm/nouveau/drm/nouveau: Prevent handling ACPI HPD events too early")
Cc: <stable(a)vger.kernel.org> # v4.19+
Link: https://patchwork.freedesktop.org/patch/msgid/20220714174234.949259-2-lyude…
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 2cd0932b3d68..9f5a45f24e5b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -537,7 +537,7 @@ nouveau_display_acpi_ntfy(struct notifier_block *nb, unsigned long val,
* it's own hotplug events.
*/
pm_runtime_put_autosuspend(drm->dev->dev);
- } else if (ret == 0) {
+ } else if (ret == 0 || ret == -EINPROGRESS) {
/* We've started resuming the GPU already, so
* it will handle scheduling a full reprobe
* itself