From userspace, spawning a new process with, for example,
posix_spawn(), only allows the user to work with
the scheduling priority value defined by POSIX
in the sched_param struct.
However, sched_setparam() and similar syscalls lead to
__sched_setscheduler() which rejects any new value
for the priority other than 0 for non-RT schedule classes,
a behavior kept since Linux 2.6 or earlier.
Linux translates the usage of the sched_param struct
into it's own internal sched_attr struct during the syscall,
but the user has no way to manage the other values
within the sched_attr struct using only POSIX functions.
The only other way to adjust niceness while using posix_spawn()
would be to set the value after the process has started,
but this introduces the risk of the process being dead
before the next syscall can set the priority after the fact.
To resolve this, allow the use of the priority value
originally from the POSIX sched_param struct in order to
set the niceness value instead of rejecting the priority value.
Edit the sched_get_priority_*() POSIX syscalls
in order to reflect the range of values accepted.
Cc: stable(a)vger.kernel.org # Apply to kernel/sched/core.c
Signed-off-by: Michael Pratt <mcpratt(a)pm.me>
---
kernel/sched/syscalls.c | 21 +++++++++++++++++++--
1 file changed, 19 insertions(+), 2 deletions(-)
diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c
index ae1b42775ef9..52c02b80f037 100644
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -853,6 +853,19 @@ static int _sched_setscheduler(struct task_struct *p, int policy,
attr.sched_policy = policy;
}
+ if (attr.sched_priority > MAX_PRIO-1)
+ return -EINVAL;
+
+ /*
+ * If priority is set for SCHED_NORMAL or SCHED_BATCH,
+ * set the niceness instead, but only for user calls.
+ */
+ if (check && attr.sched_priority > MAX_RT_PRIO-1 &&
+ ((policy != SETPARAM_POLICY && fair_policy(policy)) || fair_policy(p->policy))) {
+ attr.sched_nice = PRIO_TO_NICE(attr.sched_priority);
+ attr.sched_priority = 0;
+ }
+
return __sched_setscheduler(p, &attr, check, true);
}
/**
@@ -1598,9 +1611,11 @@ SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
case SCHED_RR:
ret = MAX_RT_PRIO-1;
break;
- case SCHED_DEADLINE:
case SCHED_NORMAL:
case SCHED_BATCH:
+ ret = MAX_PRIO-1;
+ break;
+ case SCHED_DEADLINE:
case SCHED_IDLE:
ret = 0;
break;
@@ -1625,9 +1640,11 @@ SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
case SCHED_RR:
ret = 1;
break;
- case SCHED_DEADLINE:
case SCHED_NORMAL:
case SCHED_BATCH:
+ ret = MAX_RT_PRIO;
+ break;
+ case SCHED_DEADLINE:
case SCHED_IDLE:
ret = 0;
}
base-commit: 5be63fc19fcaa4c236b307420483578a56986a37
--
2.30.2
Hello,
commit 8b4865cd904650cbed7f2407e653934c621b8127 is marked for backport
to stable. The patch does apply cleanly to 6.11.y, but not to earlier
versions. In reply to this mail I send a backport for 6.10.y and 6.6.y.
The 6.6.y patch also applies to 6.1.y. I didn't test earlier stable
branches.
Best regards
Uwe
On 10/6/24 18:18, Sasha Levin wrote:
> This is a note to let you know that I've just added the patch titled
>
> ata: pata_serverworks: Do not use the term blacklist
>
> to the 6.11-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> ata-pata_serverworks-do-not-use-the-term-blacklist.patch
> and it can be found in the queue-6.11 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
Hm... again, what exactly does this commit fix? :-/
> commit 106fce3509096c391ee57d0482d1e857e3dfb6fb
> Author: Damien Le Moal <dlemoal(a)kernel.org>
> Date: Fri Jul 26 10:58:36 2024 +0900
>
> ata: pata_serverworks: Do not use the term blacklist
>
> [ Upstream commit 858048568c9e3887d8b19e101ee72f129d65cb15 ]
>
> Let's not use the term blacklist in the function
> serverworks_osb4_filter() documentation comment and rather simply refer
> to what that function looks at: the list of devices with groken UDMA5.
>
> While at it, also constify the values of the csb_bad_ata100 array.
>
> Of note is that all of this should probably be handled using libata
> quirk mechanism but it is unclear if these UDMA5 quirks are specific
> to this controller only.
>
> Signed-off-by: Damien Le Moal <dlemoal(a)kernel.org>
> Reviewed-by: Niklas Cassel <cassel(a)kernel.org>
> Reviewed-by: Igor Pylypiv <ipylypiv(a)google.com>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/drivers/ata/pata_serverworks.c b/drivers/ata/pata_serverworks.c
> index 549ff24a98231..4edddf6bcc150 100644
> --- a/drivers/ata/pata_serverworks.c
> +++ b/drivers/ata/pata_serverworks.c
> @@ -46,10 +46,11 @@
> #define SVWKS_CSB5_REVISION_NEW 0x92 /* min PCI_REVISION_ID for UDMA5 (A2.0) */
> #define SVWKS_CSB6_REVISION 0xa0 /* min PCI_REVISION_ID for UDMA4 (A1.0) */
>
> -/* Seagate Barracuda ATA IV Family drives in UDMA mode 5
> - * can overrun their FIFOs when used with the CSB5 */
> -
> -static const char *csb_bad_ata100[] = {
> +/*
> + * Seagate Barracuda ATA IV Family drives in UDMA mode 5
> + * can overrun their FIFOs when used with the CSB5.
> + */
> +static const char * const csb_bad_ata100[] = {
> "ST320011A",
> "ST340016A",
> "ST360021A",
> @@ -163,10 +164,11 @@ static unsigned int serverworks_osb4_filter(struct ata_device *adev, unsigned in
> * @adev: ATA device
> * @mask: Mask of proposed modes
> *
> - * Check the blacklist and disable UDMA5 if matched
> + * Check the list of devices with broken UDMA5 and
> + * disable UDMA5 if matched.
> */
> -
> -static unsigned int serverworks_csb_filter(struct ata_device *adev, unsigned int mask)
> +static unsigned int serverworks_csb_filter(struct ata_device *adev,
> + unsigned int mask)
> {
> const char *p;
> char model_num[ATA_ID_PROD_LEN + 1];
MBR, Sergey
Purpose is to add ad3552r AXI DAC (fpga-based) support.
The "ad3552r" AXI IP, a variant of the generic "DAC" AXI IP,
has been created to reach the maximum speed (33MUPS) supported
from the ad3552r. To obtain the maximum transfer rate, a custom
IP core module has been implemented with a QSPI interface with
DDR (Double Data Rate) mode.
The design is actually using the DAC backend since the register
map is the same of the generic DAC IP, except for some customized
bitfields. For this reason, a new "compatible" has been added
in adi-axi-dac.c.
Also, backend has been extended with all the needed functions
for this use case, keeping the names gneric.
The following patch is actually applying to linux-iio/testing.
---
Changes in v2:
- use unsigned int on bus_reg_read/write
- add a compatible in axi-dac backend for the ad3552r DAC IP
- minor code alignment fixes
- fix a return value not checked
- change devicetree structure setting ad3552r-axi as a backend
subnode
- add synchronous_mode_available in the ABI doc
Changes in v3:
- changing AXI backend approach using a dac ip compatible
- fdt bindings updates accordingly
- fdt, ad3552r device must be a subnode of the backend
- allow probe of child devices
- passing QSPI bus access function by platform data
- move synchronous mode as a fdt parameter
- reorganizing defines in proper patches
- fix make dt_binding_check errors
- fix ad3552r maximum SPI speed
- fix samplerate calulcation
- minor code style fixes
Changes in v4:
- fix Kconfig
- fix backend documentation
- driver renamed to a more gneric "high speed" (ad3552r-hs)
- restyled axi-dac register names
- removed synchronous support, dead code
(could be added in the future with David sugestions if needed)
- renaming backend buffer enable/disable calls
- using model_data in common code
- using devm_add_action_or_reset
- minor code style fixes
Signed-off-by: Angelo Dureghello <adureghello(a)baylibre.com>
---
Angelo Dureghello (11):
iio: dac: adi-axi-dac: update register names
iio: dac: adi-axi-dac: fix wrong register bitfield
dt-bindings: iio: dac: adi-axi-dac: add ad3552r axi variant
dt-bindings: iio: dac: ad3552r: fix maximum spi speed
dt-bindings: iio: dac: ad3552r: add iio backend support
iio: backend: extend features
iio: dac: adi-axi-dac: extend features
iio: dac: ad3552r: changes to use FIELD_PREP
iio: dac: ad3552r: extract common code (no changes in behavior intended)
iio: dac: ad3552r: add high-speed platform driver
iio: dac: adi-axi-dac: add registering of child fdt node
.../devicetree/bindings/iio/dac/adi,ad3552r.yaml | 9 +-
.../devicetree/bindings/iio/dac/adi,axi-dac.yaml | 49 +-
drivers/iio/dac/Kconfig | 14 +
drivers/iio/dac/Makefile | 3 +-
drivers/iio/dac/ad3552r-common.c | 170 +++++++
drivers/iio/dac/ad3552r-hs.c | 528 +++++++++++++++++++++
drivers/iio/dac/ad3552r.c | 461 +++---------------
drivers/iio/dac/ad3552r.h | 207 ++++++++
drivers/iio/dac/adi-axi-dac.c | 477 ++++++++++++++++---
drivers/iio/industrialio-backend.c | 79 +++
include/linux/iio/backend.h | 17 +
include/linux/platform_data/ad3552r-hs.h | 18 +
12 files changed, 1563 insertions(+), 469 deletions(-)
---
base-commit: c81ca31b5191ef48b5e5fb2545fde7dd436c2bd5
change-id: 20241003-wip-bl-ad3552r-axi-v0-iio-testing-aedec3e91ff7
Best regards,
--
Angelo Dureghello <adureghello(a)baylibre.com>
x86_android_tablet_remove() frees the pdevs[] array, so it should not
be used after calling x86_android_tablet_remove().
When platform_device_register() fails, store the pdevs[x] PTR_ERR() value
into the local ret variable before calling x86_android_tablet_remove()
to avoid using pdevs[] after it has been freed.
Fixes: 5eba0141206e ("platform/x86: x86-android-tablets: Add support for instantiating platform-devs")
Fixes: e2200d3f26da ("platform/x86: x86-android-tablets: Add gpio_keys support to x86_android_tablet_init()")
Cc: stable(a)vger.kernel.org
Reported-by: Aleksandr Burakov <a.burakov(a)rosalinux.ru>
Closes: https://lore.kernel.org/platform-driver-x86/20240917120458.7300-1-a.burakov…
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
drivers/platform/x86/x86-android-tablets/core.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/platform/x86/x86-android-tablets/core.c b/drivers/platform/x86/x86-android-tablets/core.c
index 1427a9a39008..ef572b90e06b 100644
--- a/drivers/platform/x86/x86-android-tablets/core.c
+++ b/drivers/platform/x86/x86-android-tablets/core.c
@@ -390,8 +390,9 @@ static __init int x86_android_tablet_probe(struct platform_device *pdev)
for (i = 0; i < pdev_count; i++) {
pdevs[i] = platform_device_register_full(&dev_info->pdev_info[i]);
if (IS_ERR(pdevs[i])) {
+ ret = PTR_ERR(pdevs[i]);
x86_android_tablet_remove(pdev);
- return PTR_ERR(pdevs[i]);
+ return ret;
}
}
@@ -443,8 +444,9 @@ static __init int x86_android_tablet_probe(struct platform_device *pdev)
PLATFORM_DEVID_AUTO,
&pdata, sizeof(pdata));
if (IS_ERR(pdevs[pdev_count])) {
+ ret = PTR_ERR(pdevs[pdev_count]);
x86_android_tablet_remove(pdev);
- return PTR_ERR(pdevs[pdev_count]);
+ return ret;
}
pdev_count++;
}
--
2.46.2
Hello,
I have been investigating a read performance regression of dm-snapshot
on top of loopback in which the read time for a dd command increased
from 2min to 40min. I bisected the issue to dc5fc361d89 ("block:
attempt direct issue of plug list"). I blktraced before and after this
commit and the main difference I saw was that before this commit, when
the performance was good, there were a lot of IO unplugs on the loop
dev. After this commit, I saw 0 IO unplugs.
On the mainline, I was also able to bisect to a commit which fixed
this issue: 667ea36378cf ("loop: don't set QUEUE_FLAG_NOMERGES"). I
also blktraced before and after this commit, and unsurprisingly, the
main difference was that commit resulted in IO merges whereas
previously there were none being.
I don't totally understand what is going on with the first commit
which introduced the issue but I'd guess some modifying of the plug
list behavior resulted in IO not getting merged/grouped but when we
enabled QUEUE_FLAG_NOMERGES, we were then able to optimize through
this mechanism. Buuuut 2min->40min seems like a huge performance drop
just from merged vs non-merged IO, no? So perhaps it's more
complicated than that...
dc5fc361d89 -> 5.16
667ea36378c -> 6.11
6.6.y and 6.1.y and were both experiencing the performance issue. I
tried porting 667ea36378 to these branches; it applied cleanly and
resolved the issue for both. So perhaps we should consider it for the
stable trees, but it'd be great if someone from the block layer could
chime in with a better idea of what's going on here.
- leah
Hi,
"Panel replay" is a power saving technology that some display panels
support.
During 6.11 development we had to turn off panel replay due to reports
of regressions. Those regressions have now been fixed, and I've
confirmed that panel replay works again properly on 6.11 with the
following series of commits.
Can you please consider taking these back to 6.11.y so that users with
these panels can get their power savings back?
commit b68417613d41 ("drm/amd/display: Disable replay if VRR capability
is false")
commit f91a9af09dea ("drm/amd/display: Fix VRR cannot enable")
commit df18a4de9e77 ("drm/amd/display: Reset VRR config during resume")
commit be64336307a6 ("drm/amd/display: Re-enable panel replay feature")
Thanks!
From: Jann Horn <jannh(a)google.com>
commit 4f5a100f87f32cb65d4bb1ad282a08c92f6f591e upstream.
The F2FS ioctls for starting and committing atomic writes check for
inode_owner_or_capable(), but this does not give LSMs like SELinux or
Landlock an opportunity to deny the write access - if the caller's FSUID
matches the inode's UID, inode_owner_or_capable() immediately returns true.
There are scenarios where LSMs want to deny a process the ability to write
particular files, even files that the FSUID of the process owns; but this
can currently partially be bypassed using atomic write ioctls in two ways:
- F2FS_IOC_START_ATOMIC_REPLACE + F2FS_IOC_COMMIT_ATOMIC_WRITE can
truncate an inode to size 0
- F2FS_IOC_START_ATOMIC_WRITE + F2FS_IOC_ABORT_ATOMIC_WRITE can revert
changes another process concurrently made to a file
Fix it by requiring FMODE_WRITE for these operations, just like for
F2FS_IOC_MOVE_RANGE. Since any legitimate caller should only be using these
ioctls when intending to write into the file, that seems unlikely to break
anything.
Fixes: 88b88a667971 ("f2fs: support atomic writes")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jann Horn <jannh(a)google.com>
Reviewed-by: Chao Yu <chao(a)kernel.org>
Reviewed-by: Eric Biggers <ebiggers(a)google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk(a)kernel.org>
Signed-off-by: Eric Biggers <ebiggers(a)google.com>
---
fs/f2fs/file.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 62f2521cd955e..2982cc5b37d78 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2103,10 +2103,13 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct inode *pinode;
loff_t isize;
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(mnt_userns, inode))
return -EACCES;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
@@ -2200,10 +2203,13 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
struct user_namespace *mnt_userns = file_mnt_user_ns(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(mnt_userns, inode))
return -EACCES;
ret = mnt_want_write_file(filp);
if (ret)
@@ -2232,10 +2238,13 @@ static int f2fs_ioc_abort_atomic_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
struct user_namespace *mnt_userns = file_mnt_user_ns(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(mnt_userns, inode))
return -EACCES;
ret = mnt_want_write_file(filp);
if (ret)
base-commit: aa4cd140bba57b7064b4c7a7141bebd336d32087
--
2.47.0.rc0.187.ge670bccf7e-goog
Like the various 14" Asus ExpertBook B2 B2402* models there are also
4 variants of the 15" Asus ExpertBook B2 B2502* models:
B2502CBA: 12th gen Intel CPU, non flip
B2502FBA: 12th gen Intel CPU, flip
B2502CVA: 13th gen Intel CPU, non flip
B2502FVA: 13th gen Intel CPU, flip
Currently there already are DMI quirks for the B2502CBA, B2502FBA and
B2502CVA models. Asus website shows that there also is a B2502FVA.
Rather then adding a 4th quirk fold the 3 existing quirks into a single
quirk covering B2502* to also cover the last model while at the same time
reducing the number of quirks.
Cc: All applicable <stable(a)vger.kernel.org>
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
drivers/acpi/resource.c | 18 ++----------------
1 file changed, 2 insertions(+), 16 deletions(-)
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index cbe51ae6ae25..0eb52e372467 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -490,24 +490,10 @@ static const struct dmi_system_id irq1_level_low_skip_override[] = {
},
},
{
- /* Asus ExpertBook B2502 */
+ /* Asus ExpertBook B2502 (B2502CBA / B2502FBA / B2502CVA / B2502FVA) */
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
- DMI_MATCH(DMI_BOARD_NAME, "B2502CBA"),
- },
- },
- {
- /* Asus ExpertBook B2502FBA */
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
- DMI_MATCH(DMI_BOARD_NAME, "B2502FBA"),
- },
- },
- {
- /* Asus ExpertBook B2502CVA */
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
- DMI_MATCH(DMI_BOARD_NAME, "B2502CVA"),
+ DMI_MATCH(DMI_BOARD_NAME, "B2502"),
},
},
{
--
2.46.2
The Asus ExpertBook B2402CBA / B2402FBA are the non flip / flip versions
of the 14" Asus ExpertBook B2 with 12th gen Intel processors.
It has been reported that the B2402FVA which is the 14" Asus ExpertBook
B2 flip with 13th gen Intel processors needs to skip the IRQ override too.
And looking at Asus website there also is a B2402CVA which is the non flip
model with 13th gen Intel processors.
Summarizing the following 4 models of the Asus ExpertBook B2 are known:
B2402CBA: 12th gen Intel CPU, non flip
B2402FBA: 12th gen Intel CPU, flip
B2402CVA: 13th gen Intel CPU, non flip
B2402FVA: 13th gen Intel CPU, flip
Fold the 2 existing quirks for the B2402CBA and B2402FBA into a single
quirk covering B2402* to also cover the 2 other models while at the same
time reducing the number of quirks.
Reported-by: Stefan Blum <stefan.blum(a)gmail.com>
Closes: https://lore.kernel.org/platform-driver-x86/a983e6d5-c7ab-4758-be9b-7dcfc1b…
Cc: All applicable <stable(a)vger.kernel.org>
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
drivers/acpi/resource.c | 11 ++---------
1 file changed, 2 insertions(+), 9 deletions(-)
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index 3d74ebe9dbd8..cbe51ae6ae25 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -483,17 +483,10 @@ static const struct dmi_system_id irq1_level_low_skip_override[] = {
},
},
{
- /* Asus ExpertBook B2402CBA */
+ /* Asus ExpertBook B2402 (B2402CBA / B2402FBA / B2402CVA / B2402FVA) */
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
- DMI_MATCH(DMI_BOARD_NAME, "B2402CBA"),
- },
- },
- {
- /* Asus ExpertBook B2402FBA */
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
- DMI_MATCH(DMI_BOARD_NAME, "B2402FBA"),
+ DMI_MATCH(DMI_BOARD_NAME, "B2402"),
},
},
{
--
2.46.2
Attaching SST PCI device to VM causes "BUG: KASAN: slab-out-of-bounds".
kasan report:
[ 19.411889] ==================================================================
[ 19.413702] BUG: KASAN: slab-out-of-bounds in _isst_if_get_pci_dev+0x3d5/0x400 [isst_if_common]
[ 19.415634] Read of size 8 at addr ffff888829e65200 by task cpuhp/16/113
[ 19.417368]
[ 19.418627] CPU: 16 PID: 113 Comm: cpuhp/16 Tainted: G E 6.9.0 #10
[ 19.420435] Hardware name: VMware, Inc. VMware20,1/440BX Desktop Reference Platform, BIOS VMW201.00V.20192059.B64.2207280713 07/28/2022
[ 19.422687] Call Trace:
[ 19.424091] <TASK>
[ 19.425448] dump_stack_lvl+0x5d/0x80
[ 19.426963] ? _isst_if_get_pci_dev+0x3d5/0x400 [isst_if_common]
[ 19.428694] print_report+0x19d/0x52e
[ 19.430206] ? __pfx__raw_spin_lock_irqsave+0x10/0x10
[ 19.431837] ? _isst_if_get_pci_dev+0x3d5/0x400 [isst_if_common]
[ 19.433539] kasan_report+0xf0/0x170
[ 19.435019] ? _isst_if_get_pci_dev+0x3d5/0x400 [isst_if_common]
[ 19.436709] _isst_if_get_pci_dev+0x3d5/0x400 [isst_if_common]
[ 19.438379] ? __pfx_sched_clock_cpu+0x10/0x10
[ 19.439910] isst_if_cpu_online+0x406/0x58f [isst_if_common]
[ 19.441573] ? __pfx_isst_if_cpu_online+0x10/0x10 [isst_if_common]
[ 19.443263] ? ttwu_queue_wakelist+0x2c1/0x360
[ 19.444797] cpuhp_invoke_callback+0x221/0xec0
[ 19.446337] cpuhp_thread_fun+0x21b/0x610
[ 19.447814] ? __pfx_cpuhp_thread_fun+0x10/0x10
[ 19.449354] smpboot_thread_fn+0x2e7/0x6e0
[ 19.450859] ? __pfx_smpboot_thread_fn+0x10/0x10
[ 19.452405] kthread+0x29c/0x350
[ 19.453817] ? __pfx_kthread+0x10/0x10
[ 19.455253] ret_from_fork+0x31/0x70
[ 19.456685] ? __pfx_kthread+0x10/0x10
[ 19.458114] ret_from_fork_asm+0x1a/0x30
[ 19.459573] </TASK>
[ 19.460853]
[ 19.462055] Allocated by task 1198:
[ 19.463410] kasan_save_stack+0x30/0x50
[ 19.464788] kasan_save_track+0x14/0x30
[ 19.466139] __kasan_kmalloc+0xaa/0xb0
[ 19.467465] __kmalloc+0x1cd/0x470
[ 19.468748] isst_if_cdev_register+0x1da/0x350 [isst_if_common]
[ 19.470233] isst_if_mbox_init+0x108/0xff0 [isst_if_mbox_msr]
[ 19.471670] do_one_initcall+0xa4/0x380
[ 19.472903] do_init_module+0x238/0x760
[ 19.474105] load_module+0x5239/0x6f00
[ 19.475285] init_module_from_file+0xd1/0x130
[ 19.476506] idempotent_init_module+0x23b/0x650
[ 19.477725] __x64_sys_finit_module+0xbe/0x130
[ 19.476506] idempotent_init_module+0x23b/0x650
[ 19.477725] __x64_sys_finit_module+0xbe/0x130
[ 19.478920] do_syscall_64+0x82/0x160
[ 19.480036] entry_SYSCALL_64_after_hwframe+0x76/0x7e
[ 19.481292]
[ 19.482205] The buggy address belongs to the object at ffff888829e65000
which belongs to the cache kmalloc-512 of size 512
[ 19.484818] The buggy address is located 0 bytes to the right of
allocated 512-byte region [ffff888829e65000, ffff888829e65200)
[ 19.487447]
[ 19.488328] The buggy address belongs to the physical page:
[ 19.489569] page: refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff888829e60c00 pfn:0x829e60
[ 19.491140] head: order:3 entire_mapcount:0 nr_pages_mapped:0 pincount:0
[ 19.492466] anon flags: 0x57ffffc0000840(slab|head|node=1|zone=2|lastcpupid=0x1fffff)
[ 19.493914] page_type: 0xffffffff()
[ 19.494988] raw: 0057ffffc0000840 ffff88810004cc80 0000000000000000 0000000000000001
[ 19.496451] raw: ffff888829e60c00 0000000080200018 00000001ffffffff 0000000000000000
[ 19.497906] head: 0057ffffc0000840 ffff88810004cc80 0000000000000000 0000000000000001
[ 19.499379] head: ffff888829e60c00 0000000080200018 00000001ffffffff 0000000000000000
[ 19.500844] head: 0057ffffc0000003 ffffea0020a79801 ffffea0020a79848 00000000ffffffff
[ 19.502316] head: 0000000800000000 0000000000000000 00000000ffffffff 0000000000000000
[ 19.503784] page dumped because: kasan: bad access detected
[ 19.505058]
[ 19.505970] Memory state around the buggy address:
[ 19.507172] ffff888829e65100: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[ 19.508599] ffff888829e65180: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[ 19.510013] >ffff888829e65200: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 19.510014] ^
[ 19.510016] ffff888829e65280: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 19.510018] ffff888829e65300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 19.515367] ==================================================================
The reason for this error is physical_package_ids assigned by VMware VMM
are not continuous and have gaps. This will cause value returned by
topology_physical_package_id() to be more than topology_max_packages().
Here the allocation uses topology_max_packages(). The call to
topology_max_packages() returns maximum logical package ID not physical
ID. Hence use topology_logical_package_id() instead of
topology_physical_package_id().
Fixes: 9a1aac8a96dc ("platform/x86: ISST: PUNIT device mapping with Sub-NUMA clustering")
Signed-off-by: Zach Wade <zachwade.k(a)gmail.com>
---
drivers/platform/x86/intel/speed_select_if/isst_if_common.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
index 10e21563fa46..030c33070b84 100644
--- a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
+++ b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
@@ -316,7 +316,9 @@ static struct pci_dev *_isst_if_get_pci_dev(int cpu, int bus_no, int dev, int fn
cpu >= nr_cpu_ids || cpu >= num_possible_cpus())
return NULL;
- pkg_id = topology_physical_package_id(cpu);
+ pkg_id = topology_logical_package_id(cpu);
+ if (pkg_id >= topology_max_packages())
+ return NULL;
bus_number = isst_cpu_info[cpu].bus_info[bus_no];
if (bus_number < 0)
--
2.46.0
Changes in v5:
- Fixes smatch CI splat
- Link to v4: https://lore.kernel.org/r/20241003-b4-master-24-11-25-ov08x40-v4-0-7ee2c45f…
Changes in v4:
- Drops link-frequencies from properties: as discussed here:
https://lore.kernel.org/r/Zv6STSKeNNlT83ux@kekkonen.localdomain
- Link to v3: https://lore.kernel.org/r/20241002-b4-master-24-11-25-ov08x40-v3-0-483bcdcf…
Changes in v3:
- Drops assigned-clock-* from description retains in example - Sakari,
Krzysztof
- Updates example fake clock names to ov08x40_* instead of copy/paste
ov9282_clk -> ov08x40_clk, ov9282_clk_parent -> ov08x40_clk_parent - bod
- Link to v2: https://lore.kernel.org/r/20241001-b4-master-24-11-25-ov08x40-v2-0-e478976b…
Changes in v2:
- Drops "-" in ovti,ov08x40.yaml after description: - Rob
- Adds ":" after first line of description text - Rob
- dts -> DT in commit log - Rob
- Removes dependency on 'xvclk' as a name in yaml
and driver - Sakari
- Uses assigned-clock, assigned-clock-parents and assigned-clock-rates -
Sakari
- Drops clock-frequency - Sakarai, Krzysztof
- Drops dovdd-supply, avdd-supply, dvdd-supply and reset-gpios
as required, its perfectly possible not to have the reset GPIO or the
power rails under control of the SoC. - bod
- Link to v1: https://lore.kernel.org/r/20240926-b4-master-24-11-25-ov08x40-v1-0-e4d5fbd3…
V1:
This series brings fixes and updates to ov08x40 which allows for use of
this sensor on the Qualcomm x1e80100 CRD but also on any other dts based
system.
Firstly there's a fix for the pseudo burst mode code that was added in
8f667d202384 ("media: ov08x40: Reduce start streaming time"). Not every I2C
controller can handle an arbitrary sized write, this is the case on
Qualcomm CAMSS/CCI I2C sensor interfaces which limit the transaction size
and communicate this limit via I2C quirks. A simple fix to optionally break
up the large submitted burst into chunks not exceeding adapter->quirk size
fixes.
Secondly then is addition of a yaml description for the ov08x40 and
extension of the driver to support OF probe and powering on of the power
rails from the driver instead of from ACPI.
Once done the sensor works without further modification on the Qualcomm
x1e80100 CRD.
Signed-off-by: Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
---
Bryan O'Donoghue (4):
media: ov08x40: Fix burst write sequence
media: dt-bindings: Add OmniVision OV08X40
media: ov08x40: Rename ext_clk to xvclk
media: ov08x40: Add OF probe support
.../bindings/media/i2c/ovti,ov08x40.yaml | 114 +++++++++++++
drivers/media/i2c/ov08x40.c | 181 ++++++++++++++++++---
2 files changed, 271 insertions(+), 24 deletions(-)
---
base-commit: 2b7275670032a98cba266bd1b8905f755b3e650f
change-id: 20240926-b4-master-24-11-25-ov08x40-c6f477aaa6a4
Best regards,
--
Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
This series is aimed at fixing a soundness issue with how dynamically
allocated LockClassKeys are handled. Currently, LockClassKeys can be
used without being Pin'd, which can break lockdep since it relies on
address stability. Similarly, these keys are not automatically
(de)registered with lockdep.
At the suggestion of Alice Ryhl, this series includes a patch for
-stable kernels that disables dynamically allocated keys. This prevents
backported patches from using the unsound implementation.
Currently, this series requires that all dynamically allocated
LockClassKeys have a lifetime of 'static (i.e., they must be leaked
after allocation). This is because Lock does not currently keep a
reference to the LockClassKey, instead passing it to C via FFI. This
causes a problem because the rust compiler would allow creating a
'static Lock with a 'a LockClassKey (with 'a < 'static) while C would
expect the LockClassKey to live as long as the lock. This problem
represents an avenue for future work.
---
Changes from RFC:
- Split into two commits so that dynamically allocated LockClassKeys are
removed from stable kernels. (Thanks Alice Ryhl)
- Extract calls to C lockdep functions into helpers so things build
properly when LOCKDEP=n. (Thanks Benno Lossin)
- Remove extraneous `get_ref()` calls. (Thanks Benno Lossin)
- Provide better documentation for `new_dynamic()`. (Thanks Benno
Lossin)
- Ran rustfmt to fix formatting and some extraneous changes. (Thanks
Alice Ryhl and Benno Lossin)
- Link to RFC: https://lore.kernel.org/r/20240905-rust-lockdep-v1-1-d2c9c21aa8b2@gmail.com
---
Mitchell Levy (2):
rust: lockdep: Remove support for dynamically allocated LockClassKeys
rust: lockdep: Use Pin for all LockClassKey usages
rust/helpers/helpers.c | 1 +
rust/helpers/sync.c | 13 +++++++++++++
rust/kernel/lib.rs | 2 +-
rust/kernel/sync.rs | 34 ++++++++++++++++++++++++----------
rust/kernel/sync/condvar.rs | 11 +++++++----
rust/kernel/sync/lock.rs | 4 ++--
rust/kernel/workqueue.rs | 2 +-
7 files changed, 49 insertions(+), 18 deletions(-)
---
base-commit: 9852d85ec9d492ebef56dc5f229416c925758edc
change-id: 20240905-rust-lockdep-d3e30521c8ba
Best regards,
--
Mitchell Levy <levymitchell0(a)gmail.com>
>From: Zhaoyang Huang <zhaoyang.huang(a)unisoc.com>
>
>[ Upstream commit 03e02b94171b1985dd0aa184296fe94425b855a3 ]
>
>This patch is inspired by a code review of fs codes which aims at folio's extra
>refcnt that could introduce unwanted behavious when judging refcnt, such
>as[1].That is, the folio passed to mapping_evict_folio carries the refcnts from
>find_lock_entries, page_cache, corresponding to PTEs and folio's private if has.
>However, current code doesn't take the refcnt for folio's private which could
>have mapping_evict_folio miss the one to only PTE and lead to call
>filemap_release_folio wrongly.
>
>[1]
>long mapping_evict_folio(struct address_space *mapping, struct folio *folio)
>{ ...
>//current code will misjudge here if there is one pte on the folio which is be
>deemed as the one as folio's private
> if (folio_ref_count(folio) >
> folio_nr_pages(folio) + folio_has_private(folio) +
>1)
> return 0;
> if (!filemap_release_folio(folio, 0))
> return 0;
>
> return remove_mapping(mapping, folio); }
>
>Signed-off-by: Zhaoyang Huang <zhaoyang.huang(a)unisoc.com>
>Signed-off-by: Anna Schumaker <anna.schumaker(a)oracle.com>
>Signed-off-by: Sasha Levin <sashal(a)kernel.org>
Sorry, this patch should be dropped since Trond has explained that there is no need to grab refcnt here as nfs_page has hold the refcount on this folio.
>---
> fs/nfs/write.c | 6 ++----
> 1 file changed, 2 insertions(+), 4 deletions(-)
>
>diff --git a/fs/nfs/write.c b/fs/nfs/write.c index
>d074d0ceb4f01..80c6ded5f74c6 100644
>--- a/fs/nfs/write.c
>+++ b/fs/nfs/write.c
>@@ -772,8 +772,7 @@ static void nfs_inode_add_request(struct nfs_page
>*req)
> nfs_lock_request(req);
> spin_lock(&mapping->i_private_lock);
> set_bit(PG_MAPPED, &req->wb_flags);
>- folio_set_private(folio);
>- folio->private = req;
>+ folio_attach_private(folio, req);
> spin_unlock(&mapping->i_private_lock);
> atomic_long_inc(&nfsi->nrequests);
> /* this a head request for a page group - mark it as having an @@
>-797,8 +796,7 @@ static void nfs_inode_remove_request(struct nfs_page
>*req)
>
> spin_lock(&mapping->i_private_lock);
> if (likely(folio)) {
>- folio->private = NULL;
>- folio_clear_private(folio);
>+ folio_detach_private(folio);
> clear_bit(PG_MAPPED,
>&req->wb_head->wb_flags);
> }
> spin_unlock(&mapping->i_private_lock);
>--
>2.43.0
The patch titled
Subject: selftests/mm: replace atomic_bool with pthread_barrier_t
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
selftests-mm-replace-atomic_bool-with-pthread_barrier_t.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Edward Liaw <edliaw(a)google.com>
Subject: selftests/mm: replace atomic_bool with pthread_barrier_t
Date: Thu, 3 Oct 2024 21:17:10 +0000
Patch series "selftests/mm: fix deadlock after pthread_create".
On Android arm, pthread_create followed by a fork caused a deadlock in the
case where the fork required work to be completed by the created thread.
Update the synchronization primitive to use pthread_barrier instead of
atomic_bool.
Apply the same fix to the wp-fork-with-event test.
This patch (of 2):
Swap synchronization primitive with pthread_barrier, so that stdatomic.h
does not need to be included.
The synchronization is needed on Android ARM64; we see a deadlock with
pthread_create when the parent thread races forward before the child has a
chance to start doing work.
Link: https://lkml.kernel.org/r/20241003211716.371786-1-edliaw@google.com
Link: https://lkml.kernel.org/r/20241003211716.371786-2-edliaw@google.com
Fixes: 8c864371b2a1 ("selftests/mm: fix ARM related issue with fork after
pthread_create")
Signed-off-by: Edward Liaw <edliaw(a)google.com>
Cc: Lokesh Gidra <lokeshgidra(a)google.com>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
tools/testing/selftests/mm/uffd-common.c | 5 +++--
tools/testing/selftests/mm/uffd-common.h | 3 +--
tools/testing/selftests/mm/uffd-unit-tests.c | 14 ++++++++------
3 files changed, 12 insertions(+), 10 deletions(-)
--- a/tools/testing/selftests/mm/uffd-common.c~selftests-mm-replace-atomic_bool-with-pthread_barrier_t
+++ a/tools/testing/selftests/mm/uffd-common.c
@@ -18,7 +18,7 @@ bool test_uffdio_wp = true;
unsigned long long *count_verify;
uffd_test_ops_t *uffd_test_ops;
uffd_test_case_ops_t *uffd_test_case_ops;
-atomic_bool ready_for_fork;
+pthread_barrier_t ready_for_fork;
static int uffd_mem_fd_create(off_t mem_size, bool hugetlb)
{
@@ -519,7 +519,8 @@ void *uffd_poll_thread(void *arg)
pollfd[1].fd = pipefd[cpu*2];
pollfd[1].events = POLLIN;
- ready_for_fork = true;
+ /* Ready for parent thread to fork */
+ pthread_barrier_wait(&ready_for_fork);
for (;;) {
ret = poll(pollfd, 2, -1);
--- a/tools/testing/selftests/mm/uffd-common.h~selftests-mm-replace-atomic_bool-with-pthread_barrier_t
+++ a/tools/testing/selftests/mm/uffd-common.h
@@ -33,7 +33,6 @@
#include <inttypes.h>
#include <stdint.h>
#include <sys/random.h>
-#include <stdatomic.h>
#include "../kselftest.h"
#include "vm_util.h"
@@ -105,7 +104,7 @@ extern bool map_shared;
extern bool test_uffdio_wp;
extern unsigned long long *count_verify;
extern volatile bool test_uffdio_copy_eexist;
-extern atomic_bool ready_for_fork;
+extern pthread_barrier_t ready_for_fork;
extern uffd_test_ops_t anon_uffd_test_ops;
extern uffd_test_ops_t shmem_uffd_test_ops;
--- a/tools/testing/selftests/mm/uffd-unit-tests.c~selftests-mm-replace-atomic_bool-with-pthread_barrier_t
+++ a/tools/testing/selftests/mm/uffd-unit-tests.c
@@ -774,7 +774,7 @@ static void uffd_sigbus_test_common(bool
char c;
struct uffd_args args = { 0 };
- ready_for_fork = false;
+ pthread_barrier_init(&ready_for_fork, NULL, 2);
fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
@@ -791,8 +791,9 @@ static void uffd_sigbus_test_common(bool
if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
err("uffd_poll_thread create");
- while (!ready_for_fork)
- ; /* Wait for the poll_thread to start executing before forking */
+ /* Wait for child thread to start before forking */
+ pthread_barrier_wait(&ready_for_fork);
+ pthread_barrier_destroy(&ready_for_fork);
pid = fork();
if (pid < 0)
@@ -833,7 +834,7 @@ static void uffd_events_test_common(bool
char c;
struct uffd_args args = { 0 };
- ready_for_fork = false;
+ pthread_barrier_init(&ready_for_fork, NULL, 2);
fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
if (uffd_register(uffd, area_dst, nr_pages * page_size,
@@ -844,8 +845,9 @@ static void uffd_events_test_common(bool
if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
err("uffd_poll_thread create");
- while (!ready_for_fork)
- ; /* Wait for the poll_thread to start executing before forking */
+ /* Wait for child thread to start before forking */
+ pthread_barrier_wait(&ready_for_fork);
+ pthread_barrier_destroy(&ready_for_fork);
pid = fork();
if (pid < 0)
_
Patches currently in -mm which might be from edliaw(a)google.com are
selftests-mm-replace-atomic_bool-with-pthread_barrier_t.patch
selftests-mm-fix-deadlock-for-fork-after-pthread_create-on-arm.patch
From: Aaron Thompson <dev(a)aaront.org>
If iso_init() has been called, iso_exit() must be called on module
unload. Without that, the struct proto that iso_init() registered with
proto_register() becomes invalid, which could cause unpredictable
problems later. In my case, with CONFIG_LIST_HARDENED and
CONFIG_BUG_ON_DATA_CORRUPTION enabled, loading the module again usually
triggers this BUG():
list_add corruption. next->prev should be prev (ffffffffb5355fd0),
but was 0000000000000068. (next=ffffffffc0a010d0).
------------[ cut here ]------------
kernel BUG at lib/list_debug.c:29!
Oops: invalid opcode: 0000 [#1] PREEMPT SMP PTI
CPU: 1 PID: 4159 Comm: modprobe Not tainted 6.10.11-4+bt2-ao-desktop #1
RIP: 0010:__list_add_valid_or_report+0x61/0xa0
...
__list_add_valid_or_report+0x61/0xa0
proto_register+0x299/0x320
hci_sock_init+0x16/0xc0 [bluetooth]
bt_init+0x68/0xd0 [bluetooth]
__pfx_bt_init+0x10/0x10 [bluetooth]
do_one_initcall+0x80/0x2f0
do_init_module+0x8b/0x230
__do_sys_init_module+0x15f/0x190
do_syscall_64+0x68/0x110
...
Cc: stable(a)vger.kernel.org
Fixes: ccf74f2390d6 ("Bluetooth: Add BTPROTO_ISO socket type")
Signed-off-by: Aaron Thompson <dev(a)aaront.org>
---
net/bluetooth/mgmt.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index e4f564d6f6fb..78a164fab3e1 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -10487,6 +10487,7 @@ int mgmt_init(void)
void mgmt_exit(void)
{
+ iso_exit();
hci_mgmt_chan_unregister(&chan);
}
--
2.39.5
From: Dominique Martinet <asmadeus(a)codewreck.org>
commit c898afdc15645efb555acb6d85b484eb40a45409 upstream.
Fix a use-after-free on dentry's d_fsdata fid list when a thread
looks up a fid through dentry while another thread unlinks it:
UAF thread:
refcount_t: addition on 0; use-after-free.
p9_fid_get linux/./include/net/9p/client.h:262
v9fs_fid_find+0x236/0x280 linux/fs/9p/fid.c:129
v9fs_fid_lookup_with_uid linux/fs/9p/fid.c:181
v9fs_fid_lookup+0xbf/0xc20 linux/fs/9p/fid.c:314
v9fs_vfs_getattr_dotl+0xf9/0x360 linux/fs/9p/vfs_inode_dotl.c:400
vfs_statx+0xdd/0x4d0 linux/fs/stat.c:248
Freed by:
p9_fid_destroy (inlined)
p9_client_clunk+0xb0/0xe0 linux/net/9p/client.c:1456
p9_fid_put linux/./include/net/9p/client.h:278
v9fs_dentry_release+0xb5/0x140 linux/fs/9p/vfs_dentry.c:55
v9fs_remove+0x38f/0x620 linux/fs/9p/vfs_inode.c:518
vfs_unlink+0x29a/0x810 linux/fs/namei.c:4335
The problem is that d_fsdata was not accessed under d_lock, because
d_release() normally is only called once the dentry is otherwise no
longer accessible but since we also call it explicitly in v9fs_remove
that lock is required:
move the hlist out of the dentry under lock then unref its fids once
they are no longer accessible.
Fixes: 154372e67d40 ("fs/9p: fix create-unlink-getattr idiom")
Cc: stable(a)vger.kernel.org
Reported-by: Meysam Firouzi
Reported-by: Amirmohammad Eftekhar
Reviewed-by: Christian Schoenebeck <linux_oss(a)crudebyte.com>
Message-ID: <20240521122947.1080227-1-asmadeus(a)codewreck.org>
Signed-off-by: Dominique Martinet <asmadeus(a)codewreck.org>
[Samasth: backport to 5.15.y]
Signed-off-by: Samasth Norway Ananda <samasth.norway.ananda(a)oracle.com>
Conflicts:
fs/9p/vfs_dentry.c
keep p9_client_clunk instead of introducing the helper function
p9_fid_get/put from commit b48dbb998d70 ("9p fid refcount: add
p9_fid_get/put wrappers")
---
This is a fix for CVE-2024-39463, minor conflict resolution involved.
---
fs/9p/vfs_dentry.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index c2736af97884f..afcf4711ac2a8 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -52,12 +52,17 @@ static int v9fs_cached_dentry_delete(const struct dentry *dentry)
static void v9fs_dentry_release(struct dentry *dentry)
{
struct hlist_node *p, *n;
+ struct hlist_head head;
p9_debug(P9_DEBUG_VFS, " dentry: %pd (%p)\n",
dentry, dentry);
- hlist_for_each_safe(p, n, (struct hlist_head *)&dentry->d_fsdata)
+
+ spin_lock(&dentry->d_lock);
+ hlist_move_list((struct hlist_head *)&dentry->d_fsdata, &head);
+ spin_unlock(&dentry->d_lock);
+
+ hlist_for_each_safe(p, n, &head)
p9_client_clunk(hlist_entry(p, struct p9_fid, dlist));
- dentry->d_fsdata = NULL;
}
static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
--
2.46.0
The patch titled
Subject: mm: fix null pointer dereference in pfnmap_lockdep_assert
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
fixes-null-pointer-dereference-in-pfnmap_lockdep_assert.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Manas <manas18244(a)iiitd.ac.in>
Subject: mm: fix null pointer dereference in pfnmap_lockdep_assert
Date: Fri, 04 Oct 2024 23:12:16 +0530
syzbot has pointed to a possible null pointer dereference in
pfnmap_lockdep_assert. vm_file member of vm_area_struct is being
dereferenced without any checks.
This fix assigns mapping only if vm_file is not NULL.
Peter said (https://lkml.kernel.org/r/Zv8HRA5mnhVevBN_@x1n):
: If I read the stack right, the crash was before mmap() of the new vma
: happens, instead it's during unmap() of one existing vma which existed and
: overlapped with the new vma's mapping range:
:
: follow_phys arch/x86/mm/pat/memtype.c:956 [inline]
: get_pat_info+0x182/0x3f0 arch/x86/mm/pat/memtype.c:988
: untrack_pfn+0x327/0x640 arch/x86/mm/pat/memtype.c:1101
: unmap_single_vma+0x1f6/0x2b0 mm/memory.c:1834
: unmap_vmas+0x3cc/0x5f0 mm/memory.c:1900
: unmap_region+0x214/0x380 mm/vma.c:354 <--------------- here
: mmap_region+0x22f9/0x2990 mm/mmap.c:1573
: do_mmap+0x8f0/0x1000 mm/mmap.c:496
: vm_mmap_pgoff+0x1dd/0x3d0 mm/util.c:588
: ksys_mmap_pgoff+0x4eb/0x720 mm/mmap.c:542
: do_syscall_x64 arch/x86/entry/common.c:52 [inline]
: do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83
: entry_SYSCALL_64_after_hwframe+0x77/0x7f
:
: It looks like the vma that was overwritten by the new file vma mapping
: could be a VM_PFNMAP vma (I'm guessing vvar or something similar..),
: that's where untrack_pfn() got kicked off. In this case, the vma being
: overwritten and to be unmapped can have ->vm_file==NULL (while ->vm_ops
: non-NULL; /me looking at __install_special_mapping()).
Link: https://lkml.kernel.org/r/20241004-fix-null-deref-v4-1-d0a8ec01ac85@iiitd.a…
Reported-by: syzbot+093d096417e7038a689b(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=093d096417e7038a689b
Cc: Anup Sharma <anupnewsmail(a)gmail.com>
Cc: Shuah Khan <skhan(a)linuxfoundation.org>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
--- a/mm/memory.c~fixes-null-pointer-dereference-in-pfnmap_lockdep_assert
+++ a/mm/memory.c
@@ -6355,10 +6355,13 @@ static inline void pfnmap_args_setup(str
static inline void pfnmap_lockdep_assert(struct vm_area_struct *vma)
{
#ifdef CONFIG_LOCKDEP
- struct address_space *mapping = vma->vm_file->f_mapping;
+ struct address_space *mapping = NULL;
+
+ if (vma->vm_file)
+ mapping = vma->vm_file->f_mapping;
if (mapping)
- lockdep_assert(lockdep_is_held(&vma->vm_file->f_mapping->i_mmap_rwsem) ||
+ lockdep_assert(lockdep_is_held(&mapping->i_mmap_rwsem) ||
lockdep_is_held(&vma->vm_mm->mmap_lock));
else
lockdep_assert(lockdep_is_held(&vma->vm_mm->mmap_lock));
_
Patches currently in -mm which might be from manas18244(a)iiitd.ac.in are
fixes-null-pointer-dereference-in-pfnmap_lockdep_assert.patch
Stop spamming the logs with errors about missing mechanism for setting
the so called download (or dump) mode for users that have not requested
that feature to be enabled in the first place.
This avoids the follow error being logged on boot as well as on
shutdown when the feature it not available and download mode has not
been enabled on the kernel command line:
qcom_scm firmware:scm: No available mechanism for setting download mode
Fixes: 79cb2cb8d89b ("firmware: qcom: scm: Disable SDI and write no dump to dump mode")
Fixes: 781d32d1c970 ("firmware: qcom_scm: Clear download bit during reboot")
Cc: Mukesh Ojha <quic_mojha(a)quicinc.com>
Cc: stable(a)vger.kernel.org # 6.4
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
drivers/firmware/qcom/qcom_scm.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/firmware/qcom/qcom_scm.c b/drivers/firmware/qcom/qcom_scm.c
index 10986cb11ec0..e2ac595902ed 100644
--- a/drivers/firmware/qcom/qcom_scm.c
+++ b/drivers/firmware/qcom/qcom_scm.c
@@ -545,7 +545,7 @@ static void qcom_scm_set_download_mode(u32 dload_mode)
} else if (__qcom_scm_is_call_available(__scm->dev, QCOM_SCM_SVC_BOOT,
QCOM_SCM_BOOT_SET_DLOAD_MODE)) {
ret = __qcom_scm_set_dload_mode(__scm->dev, !!dload_mode);
- } else {
+ } else if (dload_mode) {
dev_err(__scm->dev,
"No available mechanism for setting download mode\n");
}
--
2.45.2
Since, two suspend-resume cycles are required to enter hibernate and,
since we only need to enable idle optimizations in the first cycle
(which is pretty much equivalent to s2idle). We can check in_s0ix, to
prevent the system from entering idle optimizations before it actually
enters hibernate (from display's perspective).
Cc: stable(a)vger.kernel.org # 6.10+
Signed-off-by: Hamza Mahfooz <hamza.mahfooz(a)amd.com>
---
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 4651b884d8d9..546a168a2fbf 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2996,10 +2996,11 @@ static int dm_suspend(struct amdgpu_ip_block *ip_block)
hpd_rx_irq_work_suspend(dm);
- if (adev->dm.dc->caps.ips_support)
- dc_allow_idle_optimizations(adev->dm.dc, true);
-
dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3);
+
+ if (dm->dc->caps.ips_support && adev->in_s0ix)
+ dc_allow_idle_optimizations(dm->dc, true);
+
dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D3);
return 0;
--
2.46.0
From: Jann Horn <jannh(a)google.com>
commit 4f5a100f87f32cb65d4bb1ad282a08c92f6f591e upstream.
The F2FS ioctls for starting and committing atomic writes check for
inode_owner_or_capable(), but this does not give LSMs like SELinux or
Landlock an opportunity to deny the write access - if the caller's FSUID
matches the inode's UID, inode_owner_or_capable() immediately returns true.
There are scenarios where LSMs want to deny a process the ability to write
particular files, even files that the FSUID of the process owns; but this
can currently partially be bypassed using atomic write ioctls in two ways:
- F2FS_IOC_START_ATOMIC_REPLACE + F2FS_IOC_COMMIT_ATOMIC_WRITE can
truncate an inode to size 0
- F2FS_IOC_START_ATOMIC_WRITE + F2FS_IOC_ABORT_ATOMIC_WRITE can revert
changes another process concurrently made to a file
Fix it by requiring FMODE_WRITE for these operations, just like for
F2FS_IOC_MOVE_RANGE. Since any legitimate caller should only be using these
ioctls when intending to write into the file, that seems unlikely to break
anything.
Fixes: 88b88a667971 ("f2fs: support atomic writes")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jann Horn <jannh(a)google.com>
Reviewed-by: Chao Yu <chao(a)kernel.org>
Reviewed-by: Eric Biggers <ebiggers(a)google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk(a)kernel.org>
Signed-off-by: Eric Biggers <ebiggers(a)google.com>
---
fs/f2fs/file.c | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 043ce96ac1270..0cc2f41e81243 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1709,10 +1709,13 @@ static int f2fs_ioc_getversion(struct file *filp, unsigned long arg)
static int f2fs_ioc_start_atomic_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(inode))
return -EACCES;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
@@ -1766,10 +1769,13 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
static int f2fs_ioc_commit_atomic_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(inode))
return -EACCES;
ret = mnt_want_write_file(filp);
if (ret)
@@ -1811,10 +1817,13 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
static int f2fs_ioc_start_volatile_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(inode))
return -EACCES;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
@@ -1846,10 +1855,13 @@ static int f2fs_ioc_start_volatile_write(struct file *filp)
static int f2fs_ioc_release_volatile_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(inode))
return -EACCES;
ret = mnt_want_write_file(filp);
if (ret)
@@ -1875,10 +1887,13 @@ static int f2fs_ioc_release_volatile_write(struct file *filp)
static int f2fs_ioc_abort_volatile_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(inode))
return -EACCES;
ret = mnt_want_write_file(filp);
if (ret)
base-commit: de2cffe297563c815c840cfa14b77a0868b61e53
--
2.47.0.rc0.187.ge670bccf7e-goog
From: Jann Horn <jannh(a)google.com>
commit 4f5a100f87f32cb65d4bb1ad282a08c92f6f591e upstream.
The F2FS ioctls for starting and committing atomic writes check for
inode_owner_or_capable(), but this does not give LSMs like SELinux or
Landlock an opportunity to deny the write access - if the caller's FSUID
matches the inode's UID, inode_owner_or_capable() immediately returns true.
There are scenarios where LSMs want to deny a process the ability to write
particular files, even files that the FSUID of the process owns; but this
can currently partially be bypassed using atomic write ioctls in two ways:
- F2FS_IOC_START_ATOMIC_REPLACE + F2FS_IOC_COMMIT_ATOMIC_WRITE can
truncate an inode to size 0
- F2FS_IOC_START_ATOMIC_WRITE + F2FS_IOC_ABORT_ATOMIC_WRITE can revert
changes another process concurrently made to a file
Fix it by requiring FMODE_WRITE for these operations, just like for
F2FS_IOC_MOVE_RANGE. Since any legitimate caller should only be using these
ioctls when intending to write into the file, that seems unlikely to break
anything.
Fixes: 88b88a667971 ("f2fs: support atomic writes")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jann Horn <jannh(a)google.com>
Reviewed-by: Chao Yu <chao(a)kernel.org>
Reviewed-by: Eric Biggers <ebiggers(a)google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk(a)kernel.org>
Signed-off-by: Eric Biggers <ebiggers(a)google.com>
---
fs/f2fs/file.c | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 2330600dbe02e..738d65abde510 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1855,10 +1855,13 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
struct inode *inode = file_inode(filp);
struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(inode))
return -EACCES;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
@@ -1921,10 +1924,13 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
static int f2fs_ioc_commit_atomic_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(inode))
return -EACCES;
ret = mnt_want_write_file(filp);
if (ret)
@@ -1963,10 +1969,13 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
static int f2fs_ioc_start_volatile_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(inode))
return -EACCES;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
@@ -1998,10 +2007,13 @@ static int f2fs_ioc_start_volatile_write(struct file *filp)
static int f2fs_ioc_release_volatile_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(inode))
return -EACCES;
ret = mnt_want_write_file(filp);
if (ret)
@@ -2027,10 +2039,13 @@ static int f2fs_ioc_release_volatile_write(struct file *filp)
static int f2fs_ioc_abort_volatile_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(inode))
return -EACCES;
ret = mnt_want_write_file(filp);
if (ret)
base-commit: 661f109c057497c8baf507a2562ceb9f9fb3cbc2
--
2.47.0.rc0.187.ge670bccf7e-goog
From: Jann Horn <jannh(a)google.com>
commit 4f5a100f87f32cb65d4bb1ad282a08c92f6f591e upstream.
The F2FS ioctls for starting and committing atomic writes check for
inode_owner_or_capable(), but this does not give LSMs like SELinux or
Landlock an opportunity to deny the write access - if the caller's FSUID
matches the inode's UID, inode_owner_or_capable() immediately returns true.
There are scenarios where LSMs want to deny a process the ability to write
particular files, even files that the FSUID of the process owns; but this
can currently partially be bypassed using atomic write ioctls in two ways:
- F2FS_IOC_START_ATOMIC_REPLACE + F2FS_IOC_COMMIT_ATOMIC_WRITE can
truncate an inode to size 0
- F2FS_IOC_START_ATOMIC_WRITE + F2FS_IOC_ABORT_ATOMIC_WRITE can revert
changes another process concurrently made to a file
Fix it by requiring FMODE_WRITE for these operations, just like for
F2FS_IOC_MOVE_RANGE. Since any legitimate caller should only be using these
ioctls when intending to write into the file, that seems unlikely to break
anything.
Fixes: 88b88a667971 ("f2fs: support atomic writes")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jann Horn <jannh(a)google.com>
Reviewed-by: Chao Yu <chao(a)kernel.org>
Reviewed-by: Eric Biggers <ebiggers(a)google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk(a)kernel.org>
Signed-off-by: Eric Biggers <ebiggers(a)google.com>
---
fs/f2fs/file.c | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 50514962771a1..e25788e643bbd 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2047,10 +2047,13 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
struct inode *inode = file_inode(filp);
struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(inode))
return -EACCES;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
@@ -2117,10 +2120,13 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
static int f2fs_ioc_commit_atomic_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(inode))
return -EACCES;
ret = mnt_want_write_file(filp);
if (ret)
@@ -2159,10 +2165,13 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
static int f2fs_ioc_start_volatile_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(inode))
return -EACCES;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
@@ -2194,10 +2203,13 @@ static int f2fs_ioc_start_volatile_write(struct file *filp)
static int f2fs_ioc_release_volatile_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(inode))
return -EACCES;
ret = mnt_want_write_file(filp);
if (ret)
@@ -2223,10 +2235,13 @@ static int f2fs_ioc_release_volatile_write(struct file *filp)
static int f2fs_ioc_abort_volatile_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(inode))
return -EACCES;
ret = mnt_want_write_file(filp);
if (ret)
base-commit: ceb091e2c4ccf93b1ee0e0e8a202476a433784ff
--
2.47.0.rc0.187.ge670bccf7e-goog
From: Jann Horn <jannh(a)google.com>
commit 4f5a100f87f32cb65d4bb1ad282a08c92f6f591e upstream.
The F2FS ioctls for starting and committing atomic writes check for
inode_owner_or_capable(), but this does not give LSMs like SELinux or
Landlock an opportunity to deny the write access - if the caller's FSUID
matches the inode's UID, inode_owner_or_capable() immediately returns true.
There are scenarios where LSMs want to deny a process the ability to write
particular files, even files that the FSUID of the process owns; but this
can currently partially be bypassed using atomic write ioctls in two ways:
- F2FS_IOC_START_ATOMIC_REPLACE + F2FS_IOC_COMMIT_ATOMIC_WRITE can
truncate an inode to size 0
- F2FS_IOC_START_ATOMIC_WRITE + F2FS_IOC_ABORT_ATOMIC_WRITE can revert
changes another process concurrently made to a file
Fix it by requiring FMODE_WRITE for these operations, just like for
F2FS_IOC_MOVE_RANGE. Since any legitimate caller should only be using these
ioctls when intending to write into the file, that seems unlikely to break
anything.
Fixes: 88b88a667971 ("f2fs: support atomic writes")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jann Horn <jannh(a)google.com>
Reviewed-by: Chao Yu <chao(a)kernel.org>
Reviewed-by: Eric Biggers <ebiggers(a)google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk(a)kernel.org>
Signed-off-by: Eric Biggers <ebiggers(a)google.com>
---
fs/f2fs/file.c | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index be9536815e50d..fd369db1e47b5 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2005,10 +2005,13 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
struct inode *inode = file_inode(filp);
struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(&init_user_ns, inode))
return -EACCES;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
@@ -2075,10 +2078,13 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
static int f2fs_ioc_commit_atomic_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(&init_user_ns, inode))
return -EACCES;
ret = mnt_want_write_file(filp);
if (ret)
@@ -2117,10 +2123,13 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
static int f2fs_ioc_start_volatile_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(&init_user_ns, inode))
return -EACCES;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
@@ -2152,10 +2161,13 @@ static int f2fs_ioc_start_volatile_write(struct file *filp)
static int f2fs_ioc_release_volatile_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(&init_user_ns, inode))
return -EACCES;
ret = mnt_want_write_file(filp);
if (ret)
@@ -2181,10 +2193,13 @@ static int f2fs_ioc_release_volatile_write(struct file *filp)
static int f2fs_ioc_abort_volatile_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(&init_user_ns, inode))
return -EACCES;
ret = mnt_want_write_file(filp);
if (ret)
base-commit: 3a5928702e7120f83f703fd566082bfb59f1a57e
--
2.47.0.rc0.187.ge670bccf7e-goog
Commit c938ab4da0eb ("net: phy: Manual remove LEDs to ensure correct
ordering") correctly fixed a problem with using devm_ but missed
removing the LED entry from the LEDs list.
This cause kernel panic on specific scenario where the port for the PHY
is torn down and up and the kmod for the PHY is removed.
On setting the port down the first time, the assosiacted LEDs are
correctly unregistered. The associated kmod for the PHY is now removed.
The kmod is now added again and the port is now put up, the associated LED
are registered again.
On putting the port down again for the second time after these step, the
LED list now have 4 elements. With the first 2 already unregistered
previously and the 2 new one registered again.
This cause a kernel panic as the first 2 element should have been
removed.
Fix this by correctly removing the element when LED is unregistered.
Reported-by: Daniel Golle <daniel(a)makrotopia.org>
Tested-by: Daniel Golle <daniel(a)makrotopia.org>
Cc: stable(a)vger.kernel.org
Fixes: c938ab4da0eb ("net: phy: Manual remove LEDs to ensure correct ordering")
Signed-off-by: Christian Marangi <ansuelsmth(a)gmail.com>
---
drivers/net/phy/phy_device.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 560e338b307a..499797646580 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -3326,10 +3326,11 @@ static __maybe_unused int phy_led_hw_is_supported(struct led_classdev *led_cdev,
static void phy_leds_unregister(struct phy_device *phydev)
{
- struct phy_led *phyled;
+ struct phy_led *phyled, *tmp;
- list_for_each_entry(phyled, &phydev->leds, list) {
+ list_for_each_entry_safe(phyled, tmp, &phydev->leds, list) {
led_classdev_unregister(&phyled->led_cdev);
+ list_del(&phyled->list);
}
}
--
2.45.2
From: Wei Li <liwei391(a)huawei.com>
There is another found exception that the "timerlat/1" thread was
scheduled on CPU0, and lead to timer corruption finally:
```
ODEBUG: init active (active state 0) object: ffff888237c2e108 object type: hrtimer hint: timerlat_irq+0x0/0x220
WARNING: CPU: 0 PID: 426 at lib/debugobjects.c:518 debug_print_object+0x7d/0xb0
Modules linked in:
CPU: 0 UID: 0 PID: 426 Comm: timerlat/1 Not tainted 6.11.0-rc7+ #45
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014
RIP: 0010:debug_print_object+0x7d/0xb0
...
Call Trace:
<TASK>
? __warn+0x7c/0x110
? debug_print_object+0x7d/0xb0
? report_bug+0xf1/0x1d0
? prb_read_valid+0x17/0x20
? handle_bug+0x3f/0x70
? exc_invalid_op+0x13/0x60
? asm_exc_invalid_op+0x16/0x20
? debug_print_object+0x7d/0xb0
? debug_print_object+0x7d/0xb0
? __pfx_timerlat_irq+0x10/0x10
__debug_object_init+0x110/0x150
hrtimer_init+0x1d/0x60
timerlat_main+0xab/0x2d0
? __pfx_timerlat_main+0x10/0x10
kthread+0xb7/0xe0
? __pfx_kthread+0x10/0x10
ret_from_fork+0x2d/0x40
? __pfx_kthread+0x10/0x10
ret_from_fork_asm+0x1a/0x30
</TASK>
```
After tracing the scheduling event, it was discovered that the migration
of the "timerlat/1" thread was performed during thread creation. Further
analysis confirmed that it is because the CPU online processing for
osnoise is implemented through workers, which is asynchronous with the
offline processing. When the worker was scheduled to create a thread, the
CPU may has already been removed from the cpu_online_mask during the offline
process, resulting in the inability to select the right CPU:
T1 | T2
[CPUHP_ONLINE] | cpu_device_down()
osnoise_hotplug_workfn() |
| cpus_write_lock()
| takedown_cpu(1)
| cpus_write_unlock()
[CPUHP_OFFLINE] |
cpus_read_lock() |
start_kthread(1) |
cpus_read_unlock() |
To fix this, skip online processing if the CPU is already offline.
Cc: stable(a)vger.kernel.org
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Link: https://lore.kernel.org/20240924094515.3561410-4-liwei391@huawei.com
Fixes: c8895e271f79 ("trace/osnoise: Support hotplug operations")
Signed-off-by: Wei Li <liwei391(a)huawei.com>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/trace_osnoise.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index e22567174dd3..a50ed23bee77 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -2097,6 +2097,8 @@ static void osnoise_hotplug_workfn(struct work_struct *dummy)
mutex_lock(&interface_lock);
cpus_read_lock();
+ if (!cpu_online(cpu))
+ goto out_unlock;
if (!cpumask_test_cpu(cpu, &osnoise_cpumask))
goto out_unlock;
--
2.45.2
From: Wei Li <liwei391(a)huawei.com>
osnoise_hotplug_workfn() is the asynchronous online callback for
"trace/osnoise:online". It may be congested when a CPU goes online and
offline repeatedly and is invoked for multiple times after a certain
online.
This will lead to kthread leak and timer corruption. Add a check
in start_kthread() to prevent this situation.
Cc: stable(a)vger.kernel.org
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Link: https://lore.kernel.org/20240924094515.3561410-2-liwei391@huawei.com
Fixes: c8895e271f79 ("trace/osnoise: Support hotplug operations")
Signed-off-by: Wei Li <liwei391(a)huawei.com>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/trace_osnoise.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 1439064f65d6..d1a539913a5f 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -2007,6 +2007,10 @@ static int start_kthread(unsigned int cpu)
void *main = osnoise_main;
char comm[24];
+ /* Do not start a new thread if it is already running */
+ if (per_cpu(per_cpu_osnoise_var, cpu).kthread)
+ return 0;
+
if (timerlat_enabled()) {
snprintf(comm, 24, "timerlat/%d", cpu);
main = timerlat_main;
@@ -2061,11 +2065,10 @@ static int start_per_cpu_kthreads(void)
if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask)) {
struct task_struct *kthread;
- kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread;
+ kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL);
if (!WARN_ON(!kthread))
kthread_stop(kthread);
}
- per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
}
for_each_cpu(cpu, current_mask) {
--
2.45.2
From: Eder Zulian <ezulian(a)redhat.com>
The help text in osnoise top and timerlat top had some minor errors
and omissions. The -d option was missing the 's' (second) abbreviation and
the error message for '-d' used '-D'.
Cc: stable(a)vger.kernel.org
Fixes: 1eceb2fc2ca54 ("rtla/osnoise: Add osnoise top mode")
Fixes: a828cd18bc4ad ("rtla: Add timerlat tool and timelart top mode")
Link: https://lore.kernel.org/20240813155831.384446-1-ezulian@redhat.com
Suggested-by: Tomas Glozar <tglozar(a)redhat.com>
Reviewed-by: Tomas Glozar <tglozar(a)redhat.com>
Signed-off-by: Eder Zulian <ezulian(a)redhat.com>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
tools/tracing/rtla/src/osnoise_top.c | 2 +-
tools/tracing/rtla/src/timerlat_top.c | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c
index 2f756628613d..30e3853076a0 100644
--- a/tools/tracing/rtla/src/osnoise_top.c
+++ b/tools/tracing/rtla/src/osnoise_top.c
@@ -442,7 +442,7 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
case 'd':
params->duration = parse_seconds_duration(optarg);
if (!params->duration)
- osnoise_top_usage(params, "Invalid -D duration\n");
+ osnoise_top_usage(params, "Invalid -d duration\n");
break;
case 'e':
tevent = trace_event_alloc(optarg);
diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c
index 8c16419fe22a..210b0f533534 100644
--- a/tools/tracing/rtla/src/timerlat_top.c
+++ b/tools/tracing/rtla/src/timerlat_top.c
@@ -459,7 +459,7 @@ static void timerlat_top_usage(char *usage)
" -c/--cpus cpus: run the tracer only on the given cpus",
" -H/--house-keeping cpus: run rtla control threads only on the given cpus",
" -C/--cgroup[=cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited",
- " -d/--duration time[m|h|d]: duration of the session in seconds",
+ " -d/--duration time[s|m|h|d]: duration of the session",
" -D/--debug: print debug info",
" --dump-tasks: prints the task running on all CPUs if stop conditions are met (depends on !--no-aa)",
" -t/--trace[file]: save the stopped trace to [file|timerlat_trace.txt]",
@@ -613,7 +613,7 @@ static struct timerlat_top_params
case 'd':
params->duration = parse_seconds_duration(optarg);
if (!params->duration)
- timerlat_top_usage("Invalid -D duration\n");
+ timerlat_top_usage("Invalid -d duration\n");
break;
case 'e':
tevent = trace_event_alloc(optarg);
--
2.45.2
Hi Vladimir!
On Fri, 2024-10-04 at 14:57 +0300, Vladimir Oltean wrote:
> On Fri, Oct 04, 2024 at 01:36:54PM +0200, A. Sverdlin wrote:
> > From: Anatolij Gustschin <agust(a)denx.de>
> >
> > Accessing device registers seems to be not reliable, the chip
> > revision is sometimes detected wrongly (0 instead of expected 1).
> >
> > Ensure that the chip reset is performed via reset GPIO and then
> > wait for 'Device Ready' status in HW_CFG register before doing
> > any register initializations.
> >
> > Signed-off-by: Anatolij Gustschin <agust(a)denx.de>
> > [alex: reworked using read_poll_timeout()]
> > Signed-off-by: Alexander Sverdlin <alexander.sverdlin(a)siemens.com>
> > ---
>
> Reviewed-by: Vladimir Oltean <olteanv(a)gmail.com>
>
> I see you're missing a Fixes: tag (meaning in this case: backport down
> to all stable tree containing this commit). I think you can just post it
> as a reply to this email, without resending, and it should get picked up
> by maintainers through the same mechanism as Reviewed-by: tags.
Well, the only meaningful would be the very first commit for lan9303:
Fixes: a1292595e006 ("net: dsa: add new DSA switch driver for the SMSC-LAN9303")
Cc: <stable(a)vger.kernel.org>
--
Alexander Sverdlin
Siemens AG
www.siemens.com
The value of an arithmetic expression period_ns * 1000 is subject
to overflow due to a failure to cast operands to a larger data
type before performing arithmetic
Found by Linux Verification Center (linuxtesting.org) with SVACE.
Fixes: 3e90b1c7ebe9 ("staging: comedi: ni_tio: tidy up ni_tio_set_clock_src() and helpers")
Cc: <stable(a)vger.kernel.org> # v5.15+
Reviewed-by: Ian Abbott <abbotti(a)mev.co.uk>
Signed-off-by: Denis Arefev <arefev(a)swemel.ru>
Signed-off-by: Ian Abbott <abbotti(a)mev.co.uk>
---
V1 -> V2: Oh, good point. It should be 1000ULL.
drivers/comedi/drivers/ni_tio.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/comedi/drivers/ni_tio.c b/drivers/comedi/drivers/ni_tio.c
index da6826d77e60..acc914903c70 100644
--- a/drivers/comedi/drivers/ni_tio.c
+++ b/drivers/comedi/drivers/ni_tio.c
@@ -800,7 +800,7 @@ static int ni_tio_set_clock_src(struct ni_gpct *counter,
GI_PRESCALE_X2(counter_dev->variant) |
GI_PRESCALE_X8(counter_dev->variant), bits);
}
- counter->clock_period_ps = period_ns * 1000;
+ counter->clock_period_ps = period_ns * 1000ULL;
ni_tio_set_sync_mode(counter);
return 0;
}
--
2.25.1
The patch titled
Subject: nilfs2: propagate directory read errors from nilfs_find_entry()
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
nilfs2-propagate-directory-read-errors-from-nilfs_find_entry.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Subject: nilfs2: propagate directory read errors from nilfs_find_entry()
Date: Fri, 4 Oct 2024 12:35:31 +0900
Syzbot reported that a task hang occurs in vcs_open() during a fuzzing
test for nilfs2.
The root cause of this problem is that in nilfs_find_entry(), which
searches for directory entries, ignores errors when loading a directory
page/folio via nilfs_get_folio() fails.
If the filesystem images is corrupted, and the i_size of the directory
inode is large, and the directory page/folio is successfully read but
fails the sanity check, for example when it is zero-filled,
nilfs_check_folio() may continue to spit out error messages in bursts.
Fix this issue by propagating the error to the callers when loading a
page/folio fails in nilfs_find_entry().
The current interface of nilfs_find_entry() and its callers is outdated
and cannot propagate error codes such as -EIO and -ENOMEM returned via
nilfs_find_entry(), so fix it together.
Link: https://lkml.kernel.org/r/20241004033640.6841-1-konishi.ryusuke@gmail.com
Fixes: 2ba466d74ed7 ("nilfs2: directory entry operations")
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Reported-by: Lizhi Xu <lizhi.xu(a)windriver.com>
Closes: https://lkml.kernel.org/r/20240927013806.3577931-1-lizhi.xu@windriver.com
Reported-by: syzbot+8a192e8d090fa9a31135(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=8a192e8d090fa9a31135
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/nilfs2/dir.c | 48 ++++++++++++++++++++++----------------------
fs/nilfs2/namei.c | 39 +++++++++++++++++++++++------------
fs/nilfs2/nilfs.h | 2 -
3 files changed, 52 insertions(+), 37 deletions(-)
--- a/fs/nilfs2/dir.c~nilfs2-propagate-directory-read-errors-from-nilfs_find_entry
+++ a/fs/nilfs2/dir.c
@@ -289,7 +289,7 @@ static int nilfs_readdir(struct file *fi
* The folio is mapped and unlocked. When the caller is finished with
* the entry, it should call folio_release_kmap().
*
- * On failure, returns NULL and the caller should ignore foliop.
+ * On failure, returns an error pointer and the caller should ignore foliop.
*/
struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir,
const struct qstr *qstr, struct folio **foliop)
@@ -312,22 +312,24 @@ struct nilfs_dir_entry *nilfs_find_entry
do {
char *kaddr = nilfs_get_folio(dir, n, foliop);
- if (!IS_ERR(kaddr)) {
- de = (struct nilfs_dir_entry *)kaddr;
- kaddr += nilfs_last_byte(dir, n) - reclen;
- while ((char *) de <= kaddr) {
- if (de->rec_len == 0) {
- nilfs_error(dir->i_sb,
- "zero-length directory entry");
- folio_release_kmap(*foliop, kaddr);
- goto out;
- }
- if (nilfs_match(namelen, name, de))
- goto found;
- de = nilfs_next_entry(de);
+ if (IS_ERR(kaddr))
+ return ERR_CAST(kaddr);
+
+ de = (struct nilfs_dir_entry *)kaddr;
+ kaddr += nilfs_last_byte(dir, n) - reclen;
+ while ((char *)de <= kaddr) {
+ if (de->rec_len == 0) {
+ nilfs_error(dir->i_sb,
+ "zero-length directory entry");
+ folio_release_kmap(*foliop, kaddr);
+ goto out;
}
- folio_release_kmap(*foliop, kaddr);
+ if (nilfs_match(namelen, name, de))
+ goto found;
+ de = nilfs_next_entry(de);
}
+ folio_release_kmap(*foliop, kaddr);
+
if (++n >= npages)
n = 0;
/* next folio is past the blocks we've got */
@@ -340,7 +342,7 @@ struct nilfs_dir_entry *nilfs_find_entry
}
} while (n != start);
out:
- return NULL;
+ return ERR_PTR(-ENOENT);
found:
ei->i_dir_start_lookup = n;
@@ -384,18 +386,18 @@ fail:
return NULL;
}
-ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr)
+int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino)
{
- ino_t res = 0;
struct nilfs_dir_entry *de;
struct folio *folio;
de = nilfs_find_entry(dir, qstr, &folio);
- if (de) {
- res = le64_to_cpu(de->inode);
- folio_release_kmap(folio, de);
- }
- return res;
+ if (IS_ERR(de))
+ return PTR_ERR(de);
+
+ *ino = le64_to_cpu(de->inode);
+ folio_release_kmap(folio, de);
+ return 0;
}
void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
--- a/fs/nilfs2/namei.c~nilfs2-propagate-directory-read-errors-from-nilfs_find_entry
+++ a/fs/nilfs2/namei.c
@@ -55,12 +55,20 @@ nilfs_lookup(struct inode *dir, struct d
{
struct inode *inode;
ino_t ino;
+ int res;
if (dentry->d_name.len > NILFS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
- ino = nilfs_inode_by_name(dir, &dentry->d_name);
- inode = ino ? nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino) : NULL;
+ res = nilfs_inode_by_name(dir, &dentry->d_name, &ino);
+ if (res) {
+ if (res != -ENOENT)
+ return ERR_PTR(res);
+ inode = NULL;
+ } else {
+ inode = nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino);
+ }
+
return d_splice_alias(inode, dentry);
}
@@ -263,10 +271,11 @@ static int nilfs_do_unlink(struct inode
struct folio *folio;
int err;
- err = -ENOENT;
de = nilfs_find_entry(dir, &dentry->d_name, &folio);
- if (!de)
+ if (IS_ERR(de)) {
+ err = PTR_ERR(de);
goto out;
+ }
inode = d_inode(dentry);
err = -EIO;
@@ -362,10 +371,11 @@ static int nilfs_rename(struct mnt_idmap
if (unlikely(err))
return err;
- err = -ENOENT;
old_de = nilfs_find_entry(old_dir, &old_dentry->d_name, &old_folio);
- if (!old_de)
+ if (IS_ERR(old_de)) {
+ err = PTR_ERR(old_de);
goto out;
+ }
if (S_ISDIR(old_inode->i_mode)) {
err = -EIO;
@@ -382,10 +392,12 @@ static int nilfs_rename(struct mnt_idmap
if (dir_de && !nilfs_empty_dir(new_inode))
goto out_dir;
- err = -ENOENT;
- new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_folio);
- if (!new_de)
+ new_de = nilfs_find_entry(new_dir, &new_dentry->d_name,
+ &new_folio);
+ if (IS_ERR(new_de)) {
+ err = PTR_ERR(new_de);
goto out_dir;
+ }
nilfs_set_link(new_dir, new_de, new_folio, old_inode);
folio_release_kmap(new_folio, new_de);
nilfs_mark_inode_dirty(new_dir);
@@ -440,12 +452,13 @@ out:
*/
static struct dentry *nilfs_get_parent(struct dentry *child)
{
- unsigned long ino;
+ ino_t ino;
+ int res;
struct nilfs_root *root;
- ino = nilfs_inode_by_name(d_inode(child), &dotdot_name);
- if (!ino)
- return ERR_PTR(-ENOENT);
+ res = nilfs_inode_by_name(d_inode(child), &dotdot_name, &ino);
+ if (res)
+ return ERR_PTR(res);
root = NILFS_I(d_inode(child))->i_root;
--- a/fs/nilfs2/nilfs.h~nilfs2-propagate-directory-read-errors-from-nilfs_find_entry
+++ a/fs/nilfs2/nilfs.h
@@ -254,7 +254,7 @@ static inline __u32 nilfs_mask_flags(umo
/* dir.c */
int nilfs_add_link(struct dentry *, struct inode *);
-ino_t nilfs_inode_by_name(struct inode *, const struct qstr *);
+int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino);
int nilfs_make_empty(struct inode *, struct inode *);
struct nilfs_dir_entry *nilfs_find_entry(struct inode *, const struct qstr *,
struct folio **);
_
Patches currently in -mm which might be from konishi.ryusuke(a)gmail.com are
nilfs2-propagate-directory-read-errors-from-nilfs_find_entry.patch
The quilt patch titled
Subject: secretmem: disable memfd_secret() if arch cannot set direct map
has been removed from the -mm tree. Its filename was
secretmem-disable-memfd_secret-if-arch-cannot-set-direct-map.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Patrick Roy <roypat(a)amazon.co.uk>
Subject: secretmem: disable memfd_secret() if arch cannot set direct map
Date: Tue, 1 Oct 2024 09:00:41 +0100
Return -ENOSYS from memfd_secret() syscall if !can_set_direct_map(). This
is the case for example on some arm64 configurations, where marking 4k
PTEs in the direct map not present can only be done if the direct map is
set up at 4k granularity in the first place (as ARM's break-before-make
semantics do not easily allow breaking apart large/gigantic pages).
More precisely, on arm64 systems with !can_set_direct_map(),
set_direct_map_invalid_noflush() is a no-op, however it returns success
(0) instead of an error. This means that memfd_secret will seemingly
"work" (e.g. syscall succeeds, you can mmap the fd and fault in pages),
but it does not actually achieve its goal of removing its memory from the
direct map.
Note that with this patch, memfd_secret() will start erroring on systems
where can_set_direct_map() returns false (arm64 with
CONFIG_RODATA_FULL_DEFAULT_ENABLED=n, CONFIG_DEBUG_PAGEALLOC=n and
CONFIG_KFENCE=n), but that still seems better than the current silent
failure. Since CONFIG_RODATA_FULL_DEFAULT_ENABLED defaults to 'y', most
arm64 systems actually have a working memfd_secret() and aren't be
affected.
From going through the iterations of the original memfd_secret patch
series, it seems that disabling the syscall in these scenarios was the
intended behavior [1] (preferred over having
set_direct_map_invalid_noflush return an error as that would result in
SIGBUSes at page-fault time), however the check for it got dropped between
v16 [2] and v17 [3], when secretmem moved away from CMA allocations.
[1]: https://lore.kernel.org/lkml/20201124164930.GK8537@kernel.org/
[2]: https://lore.kernel.org/lkml/20210121122723.3446-11-rppt@kernel.org/#t
[3]: https://lore.kernel.org/lkml/20201125092208.12544-10-rppt@kernel.org/
Link: https://lkml.kernel.org/r/20241001080056.784735-1-roypat@amazon.co.uk
Fixes: 1507f51255c9 ("mm: introduce memfd_secret system call to create "secret" memory areas")
Signed-off-by: Patrick Roy <roypat(a)amazon.co.uk>
Reviewed-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org>
Cc: Alexander Graf <graf(a)amazon.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: James Gowans <jgowans(a)amazon.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/secretmem.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/mm/secretmem.c~secretmem-disable-memfd_secret-if-arch-cannot-set-direct-map
+++ a/mm/secretmem.c
@@ -238,7 +238,7 @@ SYSCALL_DEFINE1(memfd_secret, unsigned i
/* make sure local flags do not confict with global fcntl.h */
BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC);
- if (!secretmem_enable)
+ if (!secretmem_enable || !can_set_direct_map())
return -ENOSYS;
if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC))
@@ -280,7 +280,7 @@ static struct file_system_type secretmem
static int __init secretmem_init(void)
{
- if (!secretmem_enable)
+ if (!secretmem_enable || !can_set_direct_map())
return 0;
secretmem_mnt = kern_mount(&secretmem_fs);
_
Patches currently in -mm which might be from roypat(a)amazon.co.uk are
The quilt patch titled
Subject: fs/proc/kcore.c: allow translation of physical memory addresses
has been removed from the -mm tree. Its filename was
fs-proc-kcorec-allow-translation-of-physical-memory-addresses.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Alexander Gordeev <agordeev(a)linux.ibm.com>
Subject: fs/proc/kcore.c: allow translation of physical memory addresses
Date: Mon, 30 Sep 2024 14:21:19 +0200
When /proc/kcore is read an attempt to read the first two pages results in
HW-specific page swap on s390 and another (so called prefix) pages are
accessed instead. That leads to a wrong read.
Allow architecture-specific translation of memory addresses using
kc_xlate_dev_mem_ptr() and kc_unxlate_dev_mem_ptr() callbacks similarily
to /dev/mem xlate_dev_mem_ptr() and unxlate_dev_mem_ptr() callbacks. That
way an architecture can deal with specific physical memory ranges.
Re-use the existing /dev/mem callback implementation on s390, which
handles the described prefix pages swapping correctly.
For other architectures the default callback is basically NOP. It is
expected the condition (vaddr == __va(__pa(vaddr))) always holds true for
KCORE_RAM memory type.
Link: https://lkml.kernel.org/r/20240930122119.1651546-1-agordeev@linux.ibm.com
Signed-off-by: Alexander Gordeev <agordeev(a)linux.ibm.com>
Suggested-by: Heiko Carstens <hca(a)linux.ibm.com>
Cc: Vasily Gorbik <gor(a)linux.ibm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/s390/include/asm/io.h | 2 +
fs/proc/kcore.c | 36 +++++++++++++++++++++++++++++++++--
2 files changed, 36 insertions(+), 2 deletions(-)
--- a/arch/s390/include/asm/io.h~fs-proc-kcorec-allow-translation-of-physical-memory-addresses
+++ a/arch/s390/include/asm/io.h
@@ -16,8 +16,10 @@
#include <asm/pci_io.h>
#define xlate_dev_mem_ptr xlate_dev_mem_ptr
+#define kc_xlate_dev_mem_ptr xlate_dev_mem_ptr
void *xlate_dev_mem_ptr(phys_addr_t phys);
#define unxlate_dev_mem_ptr unxlate_dev_mem_ptr
+#define kc_unxlate_dev_mem_ptr unxlate_dev_mem_ptr
void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
#define IO_SPACE_LIMIT 0
--- a/fs/proc/kcore.c~fs-proc-kcorec-allow-translation-of-physical-memory-addresses
+++ a/fs/proc/kcore.c
@@ -50,6 +50,20 @@ static struct proc_dir_entry *proc_root_
#define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET)
#endif
+#ifndef kc_xlate_dev_mem_ptr
+#define kc_xlate_dev_mem_ptr kc_xlate_dev_mem_ptr
+static inline void *kc_xlate_dev_mem_ptr(phys_addr_t phys)
+{
+ return __va(phys);
+}
+#endif
+#ifndef kc_unxlate_dev_mem_ptr
+#define kc_unxlate_dev_mem_ptr kc_unxlate_dev_mem_ptr
+static inline void kc_unxlate_dev_mem_ptr(phys_addr_t phys, void *virt)
+{
+}
+#endif
+
static LIST_HEAD(kclist_head);
static DECLARE_RWSEM(kclist_lock);
static int kcore_need_update = 1;
@@ -471,6 +485,8 @@ static ssize_t read_kcore_iter(struct ki
while (buflen) {
struct page *page;
unsigned long pfn;
+ phys_addr_t phys;
+ void *__start;
/*
* If this is the first iteration or the address is not within
@@ -537,7 +553,8 @@ static ssize_t read_kcore_iter(struct ki
}
break;
case KCORE_RAM:
- pfn = __pa(start) >> PAGE_SHIFT;
+ phys = __pa(start);
+ pfn = phys >> PAGE_SHIFT;
page = pfn_to_online_page(pfn);
/*
@@ -557,13 +574,28 @@ static ssize_t read_kcore_iter(struct ki
fallthrough;
case KCORE_VMEMMAP:
case KCORE_TEXT:
+ if (m->type == KCORE_RAM) {
+ __start = kc_xlate_dev_mem_ptr(phys);
+ if (!__start) {
+ ret = -ENOMEM;
+ if (iov_iter_zero(tsz, iter) != tsz)
+ ret = -EFAULT;
+ goto out;
+ }
+ } else {
+ __start = (void *)start;
+ }
+
/*
* Sadly we must use a bounce buffer here to be able to
* make use of copy_from_kernel_nofault(), as these
* memory regions might not always be mapped on all
* architectures.
*/
- if (copy_from_kernel_nofault(buf, (void *)start, tsz)) {
+ ret = copy_from_kernel_nofault(buf, __start, tsz);
+ if (m->type == KCORE_RAM)
+ kc_unxlate_dev_mem_ptr(phys, __start);
+ if (ret) {
if (iov_iter_zero(tsz, iter) != tsz) {
ret = -EFAULT;
goto out;
_
Patches currently in -mm which might be from agordeev(a)linux.ibm.com are
The quilt patch titled
Subject: selftests/mm: fix incorrect buffer->mirror size in hmm2 double_map test
has been removed from the -mm tree. Its filename was
selftests-mm-fixed-incorrect-buffer-mirror-size-in-hmm2-double_map-test.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Donet Tom <donettom(a)linux.ibm.com>
Subject: selftests/mm: fix incorrect buffer->mirror size in hmm2 double_map test
Date: Fri, 27 Sep 2024 00:07:52 -0500
The hmm2 double_map test was failing due to an incorrect buffer->mirror
size. The buffer->mirror size was 6, while buffer->ptr size was 6 *
PAGE_SIZE. The test failed because the kernel's copy_to_user function was
attempting to copy a 6 * PAGE_SIZE buffer to buffer->mirror. Since the
size of buffer->mirror was incorrect, copy_to_user failed.
This patch corrects the buffer->mirror size to 6 * PAGE_SIZE.
Test Result without this patch
==============================
# RUN hmm2.hmm2_device_private.double_map ...
# hmm-tests.c:1680:double_map:Expected ret (-14) == 0 (0)
# double_map: Test terminated by assertion
# FAIL hmm2.hmm2_device_private.double_map
not ok 53 hmm2.hmm2_device_private.double_map
Test Result with this patch
===========================
# RUN hmm2.hmm2_device_private.double_map ...
# OK hmm2.hmm2_device_private.double_map
ok 53 hmm2.hmm2_device_private.double_map
Link: https://lkml.kernel.org/r/20240927050752.51066-1-donettom@linux.ibm.com
Fixes: fee9f6d1b8df ("mm/hmm/test: add selftests for HMM")
Signed-off-by: Donet Tom <donettom(a)linux.ibm.com>
Reviewed-by: Muhammad Usama Anjum <usama.anjum(a)collabora.com>
Cc: J��r��me Glisse <jglisse(a)redhat.com>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: Mark Brown <broonie(a)kernel.org>
Cc: Przemek Kitszel <przemyslaw.kitszel(a)intel.com>
Cc: Ritesh Harjani (IBM) <ritesh.list(a)gmail.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Ralph Campbell <rcampbell(a)nvidia.com>
Cc: Jason Gunthorpe <jgg(a)mellanox.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
tools/testing/selftests/mm/hmm-tests.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/tools/testing/selftests/mm/hmm-tests.c~selftests-mm-fixed-incorrect-buffer-mirror-size-in-hmm2-double_map-test
+++ a/tools/testing/selftests/mm/hmm-tests.c
@@ -1657,7 +1657,7 @@ TEST_F(hmm2, double_map)
buffer->fd = -1;
buffer->size = size;
- buffer->mirror = malloc(npages);
+ buffer->mirror = malloc(size);
ASSERT_NE(buffer->mirror, NULL);
/* Reserve a range of addresses. */
_
Patches currently in -mm which might be from donettom(a)linux.ibm.com are
The quilt patch titled
Subject: device-dax: correct pgoff align in dax_set_mapping()
has been removed from the -mm tree. Its filename was
device-dax-correct-pgoff-align-in-dax_set_mapping.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: "Kun(llfl)" <llfl(a)linux.alibaba.com>
Subject: device-dax: correct pgoff align in dax_set_mapping()
Date: Fri, 27 Sep 2024 15:45:09 +0800
pgoff should be aligned using ALIGN_DOWN() instead of ALIGN(). Otherwise,
vmf->address not aligned to fault_size will be aligned to the next
alignment, that can result in memory failure getting the wrong address.
It's a subtle situation that only can be observed in
page_mapped_in_vma() after the page is page fault handled by
dev_dax_huge_fault. Generally, there is little chance to perform
page_mapped_in_vma in dev-dax's page unless in specific error injection
to the dax device to trigger an MCE - memory-failure. In that case,
page_mapped_in_vma() will be triggered to determine which task is
accessing the failure address and kill that task in the end.
We used self-developed dax device (which is 2M aligned mapping) , to
perform error injection to random address. It turned out that error
injected to non-2M-aligned address was causing endless MCE until panic.
Because page_mapped_in_vma() kept resulting wrong address and the task
accessing the failure address was never killed properly:
[ 3783.719419] Memory failure: 0x200c9742: recovery action for dax page:
Recovered
[ 3784.049006] mce: Uncorrected hardware memory error in user-access at
200c9742380
[ 3784.049190] Memory failure: 0x200c9742: recovery action for dax page:
Recovered
[ 3784.448042] mce: Uncorrected hardware memory error in user-access at
200c9742380
[ 3784.448186] Memory failure: 0x200c9742: recovery action for dax page:
Recovered
[ 3784.792026] mce: Uncorrected hardware memory error in user-access at
200c9742380
[ 3784.792179] Memory failure: 0x200c9742: recovery action for dax page:
Recovered
[ 3785.162502] mce: Uncorrected hardware memory error in user-access at
200c9742380
[ 3785.162633] Memory failure: 0x200c9742: recovery action for dax page:
Recovered
[ 3785.461116] mce: Uncorrected hardware memory error in user-access at
200c9742380
[ 3785.461247] Memory failure: 0x200c9742: recovery action for dax page:
Recovered
[ 3785.764730] mce: Uncorrected hardware memory error in user-access at
200c9742380
[ 3785.764859] Memory failure: 0x200c9742: recovery action for dax page:
Recovered
[ 3786.042128] mce: Uncorrected hardware memory error in user-access at
200c9742380
[ 3786.042259] Memory failure: 0x200c9742: recovery action for dax page:
Recovered
[ 3786.464293] mce: Uncorrected hardware memory error in user-access at
200c9742380
[ 3786.464423] Memory failure: 0x200c9742: recovery action for dax page:
Recovered
[ 3786.818090] mce: Uncorrected hardware memory error in user-access at
200c9742380
[ 3786.818217] Memory failure: 0x200c9742: recovery action for dax page:
Recovered
[ 3787.085297] mce: Uncorrected hardware memory error in user-access at
200c9742380
[ 3787.085424] Memory failure: 0x200c9742: recovery action for dax page:
Recovered
It took us several weeks to pinpoint this problem,�� but we eventually
used bpftrace to trace the page fault and mce address and successfully
identified the issue.
Joao added:
; Likely we never reproduce in production because we always pin
: device-dax regions in the region align they provide (Qemu does
: similarly with prealloc in hugetlb/file backed memory). I think this
: bug requires that we touch *unpinned* device-dax regions unaligned to
: the device-dax selected alignment (page size i.e. 4K/2M/1G)
Link: https://lkml.kernel.org/r/23c02a03e8d666fef11bbe13e85c69c8b4ca0624.17274216…
Fixes: b9b5777f09be ("device-dax: use ALIGN() for determining pgoff")
Signed-off-by: Kun(llfl) <llfl(a)linux.alibaba.com>
Tested-by: JianXiong Zhao <zhaojianxiong.zjx(a)alibaba-inc.com>
Reviewed-by: Joao Martins <joao.m.martins(a)oracle.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
drivers/dax/device.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/drivers/dax/device.c~device-dax-correct-pgoff-align-in-dax_set_mapping
+++ a/drivers/dax/device.c
@@ -86,7 +86,7 @@ static void dax_set_mapping(struct vm_fa
nr_pages = 1;
pgoff = linear_page_index(vmf->vma,
- ALIGN(vmf->address, fault_size));
+ ALIGN_DOWN(vmf->address, fault_size));
for (i = 0; i < nr_pages; i++) {
struct page *page = pfn_to_page(pfn_t_to_pfn(pfn) + i);
_
Patches currently in -mm which might be from llfl(a)linux.alibaba.com are
The quilt patch titled
Subject: kthread: unpark only parked kthread
has been removed from the -mm tree. Its filename was
kthread-unpark-only-parked-kthread.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Frederic Weisbecker <frederic(a)kernel.org>
Subject: kthread: unpark only parked kthread
Date: Fri, 13 Sep 2024 23:46:34 +0200
Calling into kthread unparking unconditionally is mostly harmless when
the kthread is already unparked. The wake up is then simply ignored
because the target is not in TASK_PARKED state.
However if the kthread is per CPU, the wake up is preceded by a call
to kthread_bind() which expects the task to be inactive and in
TASK_PARKED state, which obviously isn't the case if it is unparked.
As a result, calling kthread_stop() on an unparked per-cpu kthread
triggers such a warning:
WARNING: CPU: 0 PID: 11 at kernel/kthread.c:525 __kthread_bind_mask kernel/kthread.c:525
<TASK>
kthread_stop+0x17a/0x630 kernel/kthread.c:707
destroy_workqueue+0x136/0xc40 kernel/workqueue.c:5810
wg_destruct+0x1e2/0x2e0 drivers/net/wireguard/device.c:257
netdev_run_todo+0xe1a/0x1000 net/core/dev.c:10693
default_device_exit_batch+0xa14/0xa90 net/core/dev.c:11769
ops_exit_list net/core/net_namespace.c:178 [inline]
cleanup_net+0x89d/0xcc0 net/core/net_namespace.c:640
process_one_work kernel/workqueue.c:3231 [inline]
process_scheduled_works+0xa2c/0x1830 kernel/workqueue.c:3312
worker_thread+0x86d/0xd70 kernel/workqueue.c:3393
kthread+0x2f0/0x390 kernel/kthread.c:389
ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147
ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244
</TASK>
Fix this with skipping unecessary unparking while stopping a kthread.
Link: https://lkml.kernel.org/r/20240913214634.12557-1-frederic@kernel.org
Fixes: 5c25b5ff89f0 ("workqueue: Tag bound workers with KTHREAD_IS_PER_CPU")
Signed-off-by: Frederic Weisbecker <frederic(a)kernel.org>
Reported-by: syzbot+943d34fa3cf2191e3068(a)syzkaller.appspotmail.com
Tested-by: syzbot+943d34fa3cf2191e3068(a)syzkaller.appspotmail.com
Suggested-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Hillf Danton <hdanton(a)sina.com>
Cc: Tejun Heo <tj(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/kthread.c | 2 ++
1 file changed, 2 insertions(+)
--- a/kernel/kthread.c~kthread-unpark-only-parked-kthread
+++ a/kernel/kthread.c
@@ -623,6 +623,8 @@ void kthread_unpark(struct task_struct *
{
struct kthread *kthread = to_kthread(k);
+ if (!test_bit(KTHREAD_SHOULD_PARK, &kthread->flags))
+ return;
/*
* Newly created kthread was parked when the CPU was offline.
* The binding was lost and we need to set it again.
_
Patches currently in -mm which might be from frederic(a)kernel.org are
(resending, sorry - I'd forgotten to add the mailing list)
After commit 379a58caa199 ("scsi: fnic: Move fnic_fnic_flush_tx() to a work
queue"), it can happen that a work item is sent to an uninitialized work
queue. This may has the effect that the item being queued is never
actually queued, and any further actions depending on it will not proceed.
The following warning is observed while the fnic driver is loaded:
kernel: WARNING: CPU: 11 PID: 0 at ../kernel/workqueue.c:1524 __queue_work+0x373/0x410
kernel: <IRQ>
kernel: queue_work_on+0x3a/0x50
kernel: fnic_wq_copy_cmpl_handler+0x54a/0x730 [fnic 62fbff0c42e7fb825c60a55cde2fb91facb2ed24]
kernel: fnic_isr_msix_wq_copy+0x2d/0x60 [fnic 62fbff0c42e7fb825c60a55cde2fb91facb2ed24]
kernel: __handle_irq_event_percpu+0x36/0x1a0
kernel: handle_irq_event_percpu+0x30/0x70
kernel: handle_irq_event+0x34/0x60
kernel: handle_edge_irq+0x7e/0x1a0
kernel: __common_interrupt+0x3b/0xb0
kernel: common_interrupt+0x58/0xa0
kernel: </IRQ>
It has been observed that this may break the rediscovery of fibre channel
devices after a temporary fabric failure.
This patch fixes it by moving the work queue initialization out of
an if block in fnic_probe().
Signed-off-by: Martin Wilck <mwilck(a)suse.com>
Fixes: 379a58caa199 ("scsi: fnic: Move fnic_fnic_flush_tx() to a work queue")
Cc: stable(a)vger.kernel.org
---
drivers/scsi/fnic/fnic_main.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c
index 0044717d4486..adec0df24bc4 100644
--- a/drivers/scsi/fnic/fnic_main.c
+++ b/drivers/scsi/fnic/fnic_main.c
@@ -830,7 +830,6 @@ static int fnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
spin_lock_init(&fnic->vlans_lock);
INIT_WORK(&fnic->fip_frame_work, fnic_handle_fip_frame);
INIT_WORK(&fnic->event_work, fnic_handle_event);
- INIT_WORK(&fnic->flush_work, fnic_flush_tx);
skb_queue_head_init(&fnic->fip_frame_queue);
INIT_LIST_HEAD(&fnic->evlist);
INIT_LIST_HEAD(&fnic->vlans);
@@ -948,6 +947,7 @@ static int fnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
INIT_WORK(&fnic->link_work, fnic_handle_link);
INIT_WORK(&fnic->frame_work, fnic_handle_frame);
+ INIT_WORK(&fnic->flush_work, fnic_flush_tx);
skb_queue_head_init(&fnic->frame_queue);
skb_queue_head_init(&fnic->tx_queue);
--
2.46.1
From: Aaron Thompson <dev(a)aaront.org>
If bt_debugfs is not created successfully, which happens if either
CONFIG_DEBUG_FS or CONFIG_DEBUG_FS_ALLOW_ALL is unset, then iso_init()
returns early and does not set iso_inited to true. This means that a
subsequent call to iso_init() will result in duplicate calls to
proto_register(), bt_sock_register(), etc.
With CONFIG_LIST_HARDENED and CONFIG_BUG_ON_DATA_CORRUPTION enabled, the
duplicate call to proto_register() triggers this BUG():
list_add double add: new=ffffffffc0b280d0, prev=ffffffffbab56250,
next=ffffffffc0b280d0.
------------[ cut here ]------------
kernel BUG at lib/list_debug.c:35!
Oops: invalid opcode: 0000 [#1] PREEMPT SMP PTI
CPU: 2 PID: 887 Comm: bluetoothd Not tainted 6.10.11-1-ao-desktop #1
RIP: 0010:__list_add_valid_or_report+0x9a/0xa0
...
__list_add_valid_or_report+0x9a/0xa0
proto_register+0x2b5/0x340
iso_init+0x23/0x150 [bluetooth]
set_iso_socket_func+0x68/0x1b0 [bluetooth]
kmem_cache_free+0x308/0x330
hci_sock_sendmsg+0x990/0x9e0 [bluetooth]
__sock_sendmsg+0x7b/0x80
sock_write_iter+0x9a/0x110
do_iter_readv_writev+0x11d/0x220
vfs_writev+0x180/0x3e0
do_writev+0xca/0x100
...
This change removes the early return. The check for iso_debugfs being
NULL was unnecessary, it is always NULL when iso_inited is false.
Cc: stable(a)vger.kernel.org
Fixes: ccf74f2390d6 ("Bluetooth: Add BTPROTO_ISO socket type")
Signed-off-by: Aaron Thompson <dev(a)aaront.org>
---
net/bluetooth/iso.c | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index d5e00d0dd1a0..c9eefb43bf47 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -2301,13 +2301,9 @@ int iso_init(void)
hci_register_cb(&iso_cb);
- if (IS_ERR_OR_NULL(bt_debugfs))
- return 0;
-
- if (!iso_debugfs) {
+ if (!IS_ERR_OR_NULL(bt_debugfs))
iso_debugfs = debugfs_create_file("iso", 0444, bt_debugfs,
NULL, &iso_debugfs_fops);
- }
iso_inited = true;
--
2.39.5
Oliver reports that the kvm_has_feat() helper is not behaviing as
expected for negative feature. On investigation, the main issue
seems to be caused by the following construct:
(id##_##fld##_SIGNED ? \
get_idreg_field_signed(kvm, id, fld) : \
get_idreg_field_unsigned(kvm, id, fld))
where one side of the expression evaluates as something signed,
and the other as something unsigned. In retrospect, this is totally
braindead, as the compiler converts this into an unsigned expression.
When compared to something that is 0, the test is simply elided.
Epic fail. Similar issue exists in the expand_field_sign() macro.
The correct way to handle this is to chose between signed and unsigned
comparisons, so that both sides of the ternary expression are of the
same type (bool).
In order to keep the code readable (sort of), we introduce new
comparison primitives taking an operator as a parameter, and
rewrite the kvm_has_feat*() helpers in terms of these primitives.
Fixes: c62d7a23b947 ("KVM: arm64: Add feature checking helpers")
Reported-by: Oliver Upton <oliver.upton(a)linux.dev>
Tested-by: Oliver Upton <oliver.upton(a)linux.dev>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Cc: stable(a)vger.kernel.org
---
arch/arm64/include/asm/kvm_host.h | 25 +++++++++++++------------
1 file changed, 13 insertions(+), 12 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index aab1e59aa91e..e9e9b57782e4 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -1490,11 +1490,6 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val);
sign_extend64(__val, id##_##fld##_WIDTH - 1); \
})
-#define expand_field_sign(id, fld, val) \
- (id##_##fld##_SIGNED ? \
- __expand_field_sign_signed(id, fld, val) : \
- __expand_field_sign_unsigned(id, fld, val))
-
#define get_idreg_field_unsigned(kvm, id, fld) \
({ \
u64 __val = kvm_read_vm_id_reg((kvm), SYS_##id); \
@@ -1510,20 +1505,26 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val);
#define get_idreg_field_enum(kvm, id, fld) \
get_idreg_field_unsigned(kvm, id, fld)
-#define get_idreg_field(kvm, id, fld) \
+#define kvm_cmp_feat_signed(kvm, id, fld, op, limit) \
+ (get_idreg_field_signed((kvm), id, fld) op __expand_field_sign_signed(id, fld, limit))
+
+#define kvm_cmp_feat_unsigned(kvm, id, fld, op, limit) \
+ (get_idreg_field_unsigned((kvm), id, fld) op __expand_field_sign_unsigned(id, fld, limit))
+
+#define kvm_cmp_feat(kvm, id, fld, op, limit) \
(id##_##fld##_SIGNED ? \
- get_idreg_field_signed(kvm, id, fld) : \
- get_idreg_field_unsigned(kvm, id, fld))
+ kvm_cmp_feat_signed(kvm, id, fld, op, limit) : \
+ kvm_cmp_feat_unsigned(kvm, id, fld, op, limit))
#define kvm_has_feat(kvm, id, fld, limit) \
- (get_idreg_field((kvm), id, fld) >= expand_field_sign(id, fld, limit))
+ kvm_cmp_feat(kvm, id, fld, >=, limit)
#define kvm_has_feat_enum(kvm, id, fld, val) \
- (get_idreg_field_unsigned((kvm), id, fld) == __expand_field_sign_unsigned(id, fld, val))
+ kvm_cmp_feat_unsigned(kvm, id, fld, ==, val)
#define kvm_has_feat_range(kvm, id, fld, min, max) \
- (get_idreg_field((kvm), id, fld) >= expand_field_sign(id, fld, min) && \
- get_idreg_field((kvm), id, fld) <= expand_field_sign(id, fld, max))
+ (kvm_cmp_feat(kvm, id, fld, >=, min) && \
+ kvm_cmp_feat(kvm, id, fld, <=, max))
/* Check for a given level of PAuth support */
#define kvm_has_pauth(k, l) \
--
2.39.2
With the introduction of commit e42ac1418055 ("bpf: Check unsupported ops
from the bpf_struct_ops's cfi_stubs"), a HID-BPF struct_ops containing
a .hid_hw_request() or a .hid_hw_output_report() was failing to load
as the cfi stubs were not defined.
Fix that by defining those simple static functions and restore HID-BPF
functionality.
This was detected with the HID selftests suddenly failing on Linus' tree.
Cc: stable(a)vger.kernel.org # v6.11+
Fixes: 9286675a2aed ("HID: bpf: add HID-BPF hooks for hid_hw_output_report")
Fixes: 8bd0488b5ea5 ("HID: bpf: add HID-BPF hooks for hid_hw_raw_requests")
Signed-off-by: Benjamin Tissoires <bentiss(a)kernel.org>
---
Hi,
This commit should directly go in Linus tree before we start creating
topic branches for 6.13 given that the CI is now failing on our HID
master branch.
Cheers,
Benjamin
---
drivers/hid/bpf/hid_bpf_struct_ops.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/drivers/hid/bpf/hid_bpf_struct_ops.c b/drivers/hid/bpf/hid_bpf_struct_ops.c
index cd696c59ba0f..702c22fae136 100644
--- a/drivers/hid/bpf/hid_bpf_struct_ops.c
+++ b/drivers/hid/bpf/hid_bpf_struct_ops.c
@@ -276,9 +276,23 @@ static int __hid_bpf_rdesc_fixup(struct hid_bpf_ctx *ctx)
return 0;
}
+static int __hid_bpf_hw_request(struct hid_bpf_ctx *ctx, unsigned char reportnum,
+ enum hid_report_type rtype, enum hid_class_request reqtype,
+ u64 source)
+{
+ return 0;
+}
+
+static int __hid_bpf_hw_output_report(struct hid_bpf_ctx *ctx, u64 source)
+{
+ return 0;
+}
+
static struct hid_bpf_ops __bpf_hid_bpf_ops = {
.hid_device_event = __hid_bpf_device_event,
.hid_rdesc_fixup = __hid_bpf_rdesc_fixup,
+ .hid_hw_request = __hid_bpf_hw_request,
+ .hid_hw_output_report = __hid_bpf_hw_output_report,
};
static struct bpf_struct_ops bpf_hid_bpf_ops = {
---
base-commit: 13882369ceb9b0953f9f5ff8563bbccfd80d0ffd
change-id: 20240927-fix-hid-bpf-stubs-f80591a673c2
Best regards,
--
Benjamin Tissoires <bentiss(a)kernel.org>
The existing code moves VF to the same namespace as the synthetic device
during netvsc_register_vf(). But, if the synthetic device is moved to a
new namespace after the VF registration, the VF won't be moved together.
To make the behavior more consistent, add a namespace check to netvsc_open(),
and move the VF if it is not in the same namespace.
Cc: stable(a)vger.kernel.org
Fixes: c0a41b887ce6 ("hv_netvsc: move VF to same namespace as netvsc device")
Signed-off-by: Haiyang Zhang <haiyangz(a)microsoft.com>
---
drivers/net/hyperv/netvsc_drv.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 153b97f8ec0d..9caade092524 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -134,6 +134,19 @@ static int netvsc_open(struct net_device *net)
}
if (vf_netdev) {
+ if (!net_eq(dev_net(net), dev_net(vf_netdev))) {
+ ret = dev_change_net_namespace(vf_netdev, dev_net(net),
+ "eth%d");
+ if (ret)
+ netdev_err(vf_netdev,
+ "Cannot move to same namespace as %s: %d\n",
+ net->name, ret);
+ else
+ netdev_info(vf_netdev,
+ "Moved VF to namespace with: %s\n",
+ net->name);
+ }
+
/* Setting synthetic device up transparently sets
* slave as up. If open fails, then slave will be
* still be offline (and not used).
--
2.34.1
During fuzz testing, the following issue was discovered:
BUG: KMSAN: kernel-infoleak in _copy_to_iter+0x598/0x2a30
_copy_to_iter+0x598/0x2a30
__skb_datagram_iter+0x168/0x1060
skb_copy_datagram_iter+0x5b/0x220
netlink_recvmsg+0x362/0x1700
sock_recvmsg+0x2dc/0x390
__sys_recvfrom+0x381/0x6d0
__x64_sys_recvfrom+0x130/0x200
x64_sys_call+0x32c8/0x3cc0
do_syscall_64+0xd8/0x1c0
entry_SYSCALL_64_after_hwframe+0x79/0x81
Uninit was stored to memory at:
copy_to_user_state_extra+0xcc1/0x1e00
dump_one_state+0x28c/0x5f0
xfrm_state_walk+0x548/0x11e0
xfrm_dump_sa+0x1e0/0x840
netlink_dump+0x943/0x1c40
__netlink_dump_start+0x746/0xdb0
xfrm_user_rcv_msg+0x429/0xc00
netlink_rcv_skb+0x613/0x780
xfrm_netlink_rcv+0x77/0xc0
netlink_unicast+0xe90/0x1280
netlink_sendmsg+0x126d/0x1490
__sock_sendmsg+0x332/0x3d0
____sys_sendmsg+0x863/0xc30
___sys_sendmsg+0x285/0x3e0
__x64_sys_sendmsg+0x2d6/0x560
x64_sys_call+0x1316/0x3cc0
do_syscall_64+0xd8/0x1c0
entry_SYSCALL_64_after_hwframe+0x79/0x81
Uninit was created at:
__kmalloc+0x571/0xd30
attach_auth+0x106/0x3e0
xfrm_add_sa+0x2aa0/0x4230
xfrm_user_rcv_msg+0x832/0xc00
netlink_rcv_skb+0x613/0x780
xfrm_netlink_rcv+0x77/0xc0
netlink_unicast+0xe90/0x1280
netlink_sendmsg+0x126d/0x1490
__sock_sendmsg+0x332/0x3d0
____sys_sendmsg+0x863/0xc30
___sys_sendmsg+0x285/0x3e0
__x64_sys_sendmsg+0x2d6/0x560
x64_sys_call+0x1316/0x3cc0
do_syscall_64+0xd8/0x1c0
entry_SYSCALL_64_after_hwframe+0x79/0x81
Bytes 328-379 of 732 are uninitialized
Memory access of size 732 starts at ffff88800e18e000
Data copied to user address 00007ff30f48aff0
CPU: 2 PID: 18167 Comm: syz-executor.0 Not tainted 6.8.11 #1
Hardware name:
QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
Fixes copying of xfrm algorithms where some random
data of the structure fields can end up in userspace.
Padding in structures may be filled with random (possibly sensitve)
data and should never be given directly to user-space.
A similar issue was resolved in the commit
8222d5910dae ("xfrm: Zero padding when dumping algos and encap")
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
Fixes: c7a5899eb26e ("xfrm: redact SA secret with lockdown confidentiality")
Cc: stable(a)vger.kernel.org
Co-developed-by: Boris Tonofa <b.tonofa(a)ideco.ru>
Signed-off-by: Boris Tonofa <b.tonofa(a)ideco.ru>
Signed-off-by: Petr Vaganov <p.vaganov(a)ideco.ru>
---
net/xfrm/xfrm_user.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 55f039ec3d59..97faeb3574ea 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1098,7 +1098,9 @@ static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb)
if (!nla)
return -EMSGSIZE;
ap = nla_data(nla);
- memcpy(ap, auth, sizeof(struct xfrm_algo_auth));
+ strscpy_pad(ap->alg_name, auth->alg_name, sizeof(sizeof(ap->alg_name)));
+ ap->alg_key_len = auth->alg_key_len;
+ ap->alg_trunc_len = auth->alg_trunc_len;
if (redact_secret && auth->alg_key_len)
memset(ap->alg_key, 0, (auth->alg_key_len + 7) / 8);
else
--
2.46.1
Changes in v4:
- Drops link-frequencies from properties: as discussed here:
https://lore.kernel.org/r/Zv6STSKeNNlT83ux@kekkonen.localdomain
- Link to v3: https://lore.kernel.org/r/20241002-b4-master-24-11-25-ov08x40-v3-0-483bcdcf…
Changes in v3:
- Drops assigned-clock-* from description retains in example - Sakari,
Krzysztof
- Updates example fake clock names to ov08x40_* instead of copy/paste
ov9282_clk -> ov08x40_clk, ov9282_clk_parent -> ov08x40_clk_parent - bod
- Link to v2: https://lore.kernel.org/r/20241001-b4-master-24-11-25-ov08x40-v2-0-e478976b…
Changes in v2:
- Drops "-" in ovti,ov08x40.yaml after description: - Rob
- Adds ":" after first line of description text - Rob
- dts -> DT in commit log - Rob
- Removes dependency on 'xvclk' as a name in yaml
and driver - Sakari
- Uses assigned-clock, assigned-clock-parents and assigned-clock-rates -
Sakari
- Drops clock-frequency - Sakarai, Krzysztof
- Drops dovdd-supply, avdd-supply, dvdd-supply and reset-gpios
as required, its perfectly possible not to have the reset GPIO or the
power rails under control of the SoC. - bod
- Link to v1: https://lore.kernel.org/r/20240926-b4-master-24-11-25-ov08x40-v1-0-e4d5fbd3…
V1:
This series brings fixes and updates to ov08x40 which allows for use of
this sensor on the Qualcomm x1e80100 CRD but also on any other dts based
system.
Firstly there's a fix for the pseudo burst mode code that was added in
8f667d202384 ("media: ov08x40: Reduce start streaming time"). Not every I2C
controller can handle an arbitrary sized write, this is the case on
Qualcomm CAMSS/CCI I2C sensor interfaces which limit the transaction size
and communicate this limit via I2C quirks. A simple fix to optionally break
up the large submitted burst into chunks not exceeding adapter->quirk size
fixes.
Secondly then is addition of a yaml description for the ov08x40 and
extension of the driver to support OF probe and powering on of the power
rails from the driver instead of from ACPI.
Once done the sensor works without further modification on the Qualcomm
x1e80100 CRD.
Signed-off-by: Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
---
Bryan O'Donoghue (4):
media: ov08x40: Fix burst write sequence
media: dt-bindings: Add OmniVision OV08X40
media: ov08x40: Rename ext_clk to xvclk
media: ov08x40: Add OF probe support
.../bindings/media/i2c/ovti,ov08x40.yaml | 114 +++++++++++++
drivers/media/i2c/ov08x40.c | 179 ++++++++++++++++++---
2 files changed, 270 insertions(+), 23 deletions(-)
---
base-commit: 2b7275670032a98cba266bd1b8905f755b3e650f
change-id: 20240926-b4-master-24-11-25-ov08x40-c6f477aaa6a4
Best regards,
--
Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
tpm2_load_null() has weak and broken error handling:
- The return value of tpm2_create_primary() is ignored.
- Leaks TPM return codes from tpm2_load_context() to the caller.
- If the key name comparison succeeds returns previous error
instead of zero to the caller.
Implement a proper error rollback.
Cc: stable(a)vger.kernel.org # v6.10+
Fixes: eb24c9788cd9 ("tpm: disable the TPM if NULL name changes")
Signed-off-by: Jarkko Sakkinen <jarkko(a)kernel.org>
---
v5:
- Fix the TPM error code leak from tpm2_load_context().
v4:
- No changes.
v3:
- Update log messages. Previously the log message incorrectly stated
on load failure that integrity check had been failed, even tho the
check is done *after* the load operation.
v2:
- Refined the commit message.
- Reverted tpm2_create_primary() changes. They are not required if
tmp_null_key is used as the parameter.
---
drivers/char/tpm/tpm2-sessions.c | 43 +++++++++++++++++---------------
1 file changed, 23 insertions(+), 20 deletions(-)
diff --git a/drivers/char/tpm/tpm2-sessions.c b/drivers/char/tpm/tpm2-sessions.c
index 0f09ac33ae99..a856adef18d3 100644
--- a/drivers/char/tpm/tpm2-sessions.c
+++ b/drivers/char/tpm/tpm2-sessions.c
@@ -915,33 +915,36 @@ static int tpm2_parse_start_auth_session(struct tpm2_auth *auth,
static int tpm2_load_null(struct tpm_chip *chip, u32 *null_key)
{
- int rc;
unsigned int offset = 0; /* dummy offset for null seed context */
u8 name[SHA256_DIGEST_SIZE + 2];
+ u32 tmp_null_key;
+ int rc;
rc = tpm2_load_context(chip, chip->null_key_context, &offset,
- null_key);
- if (rc != -EINVAL)
- return rc;
+ &tmp_null_key);
+ if (rc != -EINVAL) {
+ if (!rc)
+ *null_key = tmp_null_key;
+ goto err;
+ }
- /* an integrity failure may mean the TPM has been reset */
- dev_err(&chip->dev, "NULL key integrity failure!\n");
- /* check the null name against what we know */
- tpm2_create_primary(chip, TPM2_RH_NULL, NULL, name);
- if (memcmp(name, chip->null_key_name, sizeof(name)) == 0)
- /* name unchanged, assume transient integrity failure */
- return rc;
- /*
- * Fatal TPM failure: the NULL seed has actually changed, so
- * the TPM must have been illegally reset. All in-kernel TPM
- * operations will fail because the NULL primary can't be
- * loaded to salt the sessions, but disable the TPM anyway so
- * userspace programmes can't be compromised by it.
- */
- dev_err(&chip->dev, "NULL name has changed, disabling TPM due to interference\n");
+ rc = tpm2_create_primary(chip, TPM2_RH_NULL, &tmp_null_key, name);
+ if (rc)
+ goto err;
+
+ /* Return the null key if the name has not been changed: */
+ if (memcmp(name, chip->null_key_name, sizeof(name)) == 0) {
+ *null_key = tmp_null_key;
+ return 0;
+ }
+
+ /* Deduce from the name change TPM interference: */
+ dev_err(&chip->dev, "the null key integrity check failedh\n");
+ tpm2_flush_context(chip, tmp_null_key);
chip->flags |= TPM_CHIP_FLAG_DISABLE;
- return rc;
+err:
+ return rc ? -ENODEV : rc;
}
/**
--
2.46.1
We use Kconfig to select the kernel stack size, doubling the default
size if KASAN is enabled.
But that actually only works if KASAN is selected from the beginning,
meaning that if KASAN config is added later (for example using
menuconfig), CONFIG_THREAD_SIZE_ORDER won't be updated, keeping the
default size, which is not enough for KASAN as reported in [1].
So fix this by moving the logic to compute the right kernel stack into a
header.
Fixes: a7555f6b62e7 ("riscv: stack: Add config of thread stack size")
Reported-by: syzbot+ba9eac24453387a9d502(a)syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/000000000000eb301906222aadc2@google.com/ [1]
Cc: stable(a)vger.kernel.org
Signed-off-by: Alexandre Ghiti <alexghiti(a)rivosinc.com>
---
arch/riscv/Kconfig | 3 +--
arch/riscv/include/asm/thread_info.h | 7 ++++++-
2 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index ccbfd28f4982..b65846d02622 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -759,8 +759,7 @@ config IRQ_STACKS
config THREAD_SIZE_ORDER
int "Kernel stack size (in power-of-two numbers of page size)" if VMAP_STACK && EXPERT
range 0 4
- default 1 if 32BIT && !KASAN
- default 3 if 64BIT && KASAN
+ default 1 if 32BIT
default 2
help
Specify the Pages of thread stack size (from 4KB to 64KB), which also
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index fca5c6be2b81..385b43211a71 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -13,7 +13,12 @@
#include <linux/sizes.h>
/* thread information allocation */
-#define THREAD_SIZE_ORDER CONFIG_THREAD_SIZE_ORDER
+#ifdef CONFIG_KASAN
+#define KASAN_STACK_ORDER 1
+#else
+#define KASAN_STACK_ORDER 0
+#endif
+#define THREAD_SIZE_ORDER (CONFIG_THREAD_SIZE_ORDER + KASAN_STACK_ORDER)
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
/*
--
2.39.2
img_info->mhi_buf should be freed on error path in mhi_alloc_bhie_table().
This error case is rare but still needs to be fixed.
Found by Linux Verification Center (linuxtesting.org).
Fixes: 3000f85b8f47 ("bus: mhi: core: Add support for basic PM operations")
Cc: stable(a)vger.kernel.org
Signed-off-by: Fedor Pchelkin <pchelkin(a)ispras.ru>
---
v2: add missing Cc: stable, as Greg Kroah-Hartman's bot reported
drivers/bus/mhi/host/boot.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/bus/mhi/host/boot.c b/drivers/bus/mhi/host/boot.c
index edc0ec5a0933..738dcd11b66f 100644
--- a/drivers/bus/mhi/host/boot.c
+++ b/drivers/bus/mhi/host/boot.c
@@ -357,6 +357,7 @@ int mhi_alloc_bhie_table(struct mhi_controller *mhi_cntrl,
for (--i, --mhi_buf; i >= 0; i--, mhi_buf--)
dma_free_coherent(mhi_cntrl->cntrl_dev, mhi_buf->len,
mhi_buf->buf, mhi_buf->dma_addr);
+ kfree(img_info->mhi_buf);
error_alloc_mhi_buf:
kfree(img_info);
--
2.39.2
Brandon reports sporadic, non-sensical spikes in cumulative pressure
time (total=) when reading cpu.pressure at a high rate. This is due to
a race condition between reader aggregation and tasks changing states.
While it affects all states and all resources captured by PSI, in
practice it most likely triggers with CPU pressure, since scheduling
events are so frequent compared to other resource events.
The race context is the live snooping of ongoing stalls during a
pressure read. The read aggregates per-cpu records for stalls that
have concluded, but will also incorporate ad-hoc the duration of any
active state that hasn't been recorded yet. This is important to get
timely measurements of ongoing stalls. Those ad-hoc samples are
calculated on-the-fly up to the current time on that CPU; since the
stall hasn't concluded, it's expected that this is the minimum amount
of stall time that will enter the per-cpu records once it does.
The problem is that the path that concludes the state uses a CPU clock
read that is not synchronized against aggregators; the clock is read
outside of the seqlock protection. This allows aggregators to race and
snoop a stall with a longer duration than will actually be recorded.
With the recorded stall time being less than the last snapshot
remembered by the aggregator, a subsequent sample will underflow and
observe a bogus delta value, resulting in an erratic jump in pressure.
Fix this by moving the clock read of the state change into the seqlock
protection. This ensures no aggregation can snoop live stalls past the
time that's recorded when the state concludes.
Reported-by: Brandon Duffany <brandon(a)buildbuddy.io>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=219194
Link: https://lore.kernel.org/lkml/20240827121851.GB438928@cmpxchg.org/
Fixes: df77430639c9 ("psi: Reduce calls to sched_clock() in psi")
Cc: stable(a)vger.kernel.org
Signed-off-by: Johannes Weiner <hannes(a)cmpxchg.org>
Reviewed-by: Chengming Zhou <chengming.zhou(a)linux.dev>
---
kernel/sched/psi.c | 26 ++++++++++++--------------
1 file changed, 12 insertions(+), 14 deletions(-)
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index 020d58967d4e..84dad1511d1e 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -769,12 +769,13 @@ static void record_times(struct psi_group_cpu *groupc, u64 now)
}
static void psi_group_change(struct psi_group *group, int cpu,
- unsigned int clear, unsigned int set, u64 now,
+ unsigned int clear, unsigned int set,
bool wake_clock)
{
struct psi_group_cpu *groupc;
unsigned int t, m;
u32 state_mask;
+ u64 now;
lockdep_assert_rq_held(cpu_rq(cpu));
groupc = per_cpu_ptr(group->pcpu, cpu);
@@ -789,6 +790,7 @@ static void psi_group_change(struct psi_group *group, int cpu,
* SOME and FULL time these may have resulted in.
*/
write_seqcount_begin(&groupc->seq);
+ now = cpu_clock(cpu);
/*
* Start with TSK_ONCPU, which doesn't have a corresponding
@@ -899,18 +901,15 @@ void psi_task_change(struct task_struct *task, int clear, int set)
{
int cpu = task_cpu(task);
struct psi_group *group;
- u64 now;
if (!task->pid)
return;
psi_flags_change(task, clear, set);
- now = cpu_clock(cpu);
-
group = task_psi_group(task);
do {
- psi_group_change(group, cpu, clear, set, now, true);
+ psi_group_change(group, cpu, clear, set, true);
} while ((group = group->parent));
}
@@ -919,7 +918,6 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
{
struct psi_group *group, *common = NULL;
int cpu = task_cpu(prev);
- u64 now = cpu_clock(cpu);
if (next->pid) {
psi_flags_change(next, 0, TSK_ONCPU);
@@ -936,7 +934,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
break;
}
- psi_group_change(group, cpu, 0, TSK_ONCPU, now, true);
+ psi_group_change(group, cpu, 0, TSK_ONCPU, true);
} while ((group = group->parent));
}
@@ -974,7 +972,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
do {
if (group == common)
break;
- psi_group_change(group, cpu, clear, set, now, wake_clock);
+ psi_group_change(group, cpu, clear, set, wake_clock);
} while ((group = group->parent));
/*
@@ -986,7 +984,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
if ((prev->psi_flags ^ next->psi_flags) & ~TSK_ONCPU) {
clear &= ~TSK_ONCPU;
for (; group; group = group->parent)
- psi_group_change(group, cpu, clear, set, now, wake_clock);
+ psi_group_change(group, cpu, clear, set, wake_clock);
}
}
}
@@ -997,8 +995,8 @@ void psi_account_irqtime(struct rq *rq, struct task_struct *curr, struct task_st
int cpu = task_cpu(curr);
struct psi_group *group;
struct psi_group_cpu *groupc;
- u64 now, irq;
s64 delta;
+ u64 irq;
if (static_branch_likely(&psi_disabled))
return;
@@ -1011,7 +1009,6 @@ void psi_account_irqtime(struct rq *rq, struct task_struct *curr, struct task_st
if (prev && task_psi_group(prev) == group)
return;
- now = cpu_clock(cpu);
irq = irq_time_read(cpu);
delta = (s64)(irq - rq->psi_irq_time);
if (delta < 0)
@@ -1019,12 +1016,15 @@ void psi_account_irqtime(struct rq *rq, struct task_struct *curr, struct task_st
rq->psi_irq_time = irq;
do {
+ u64 now;
+
if (!group->enabled)
continue;
groupc = per_cpu_ptr(group->pcpu, cpu);
write_seqcount_begin(&groupc->seq);
+ now = cpu_clock(cpu);
record_times(groupc, now);
groupc->times[PSI_IRQ_FULL] += delta;
@@ -1223,11 +1223,9 @@ void psi_cgroup_restart(struct psi_group *group)
for_each_possible_cpu(cpu) {
struct rq *rq = cpu_rq(cpu);
struct rq_flags rf;
- u64 now;
rq_lock_irq(rq, &rf);
- now = cpu_clock(cpu);
- psi_group_change(group, cpu, 0, 0, now, true);
+ psi_group_change(group, cpu, 0, 0, true);
rq_unlock_irq(rq, &rf);
}
}
--
2.46.2
From: Willem de Bruijn <willemb(a)google.com>
Detect gso fraglist skbs with corrupted geometry (see below) and
pass these to skb_segment instead of skb_segment_list, as the first
can segment them correctly.
Valid SKB_GSO_FRAGLIST skbs
- consist of two or more segments
- the head_skb holds the protocol headers plus first gso_size
- one or more frag_list skbs hold exactly one segment
- all but the last must be gso_size
Optional datapath hooks such as NAT and BPF (bpf_skb_pull_data) can
modify these skbs, breaking these invariants.
In extreme cases they pull all data into skb linear. For UDP, this
causes a NULL ptr deref in __udpv4_gso_segment_list_csum at
udp_hdr(seg->next)->dest.
Detect invalid geometry due to pull, by checking head_skb size.
Don't just drop, as this may blackhole a destination. Convert to be
able to pass to regular skb_segment.
Link: https://lore.kernel.org/netdev/20240428142913.18666-1-shiming.cheng@mediate…
Fixes: 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.")
Signed-off-by: Willem de Bruijn <willemb(a)google.com>
Cc: stable(a)vger.kernel.org
---
v1->v2
- update Fixes tag to point to udp specific patch
- reinit uh->check to pseudo csum as required by CHECKSUM_PARTIAL
---
net/ipv4/udp_offload.c | 22 ++++++++++++++++++++--
1 file changed, 20 insertions(+), 2 deletions(-)
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index d842303587af..a5be6e4ed326 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -296,8 +296,26 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
return NULL;
}
- if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)
- return __udp_gso_segment_list(gso_skb, features, is_ipv6);
+ if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST) {
+ /* Detect modified geometry and pass those to skb_segment. */
+ if (skb_pagelen(gso_skb) - sizeof(*uh) == skb_shinfo(gso_skb)->gso_size)
+ return __udp_gso_segment_list(gso_skb, features, is_ipv6);
+
+ /* Setup csum, as fraglist skips this in udp4_gro_receive. */
+ gso_skb->csum_start = skb_transport_header(gso_skb) - gso_skb->head;
+ gso_skb->csum_offset = offsetof(struct udphdr, check);
+ gso_skb->ip_summed = CHECKSUM_PARTIAL;
+
+ uh = udp_hdr(gso_skb);
+ if (is_ipv6)
+ uh->check = ~udp_v6_check(gso_skb->len,
+ &ipv6_hdr(gso_skb)->saddr,
+ &ipv6_hdr(gso_skb)->daddr, 0);
+ else
+ uh->check = ~udp_v4_check(gso_skb->len,
+ ip_hdr(gso_skb)->saddr,
+ ip_hdr(gso_skb)->daddr, 0);
+ }
skb_pull(gso_skb, sizeof(*uh));
--
2.46.1.824.gd892dcdcdd-goog
When sending data using DMA at high baudrate (4 Mbdps in local test case) to
a device with small RX buffer which keeps asserting RTS after every received
byte, it is possible that the iMX UART driver would not recognize the falling
edge of RTS input signal and get stuck, unable to transmit any more data.
This condition happens when the following sequence of events occur:
- imx_uart_mctrl_check() is called at some point and takes a snapshot of UART
control signal status into sport->old_status using imx_uart_get_hwmctrl().
The RTSS/TIOCM_CTS bit is of interest here (*).
- DMA transfer occurs, the remote device asserts RTS signal after each byte.
The i.MX UART driver recognizes each such RTS signal change, raises an
interrupt with USR1 register RTSD bit set, which leads to invocation of
__imx_uart_rtsint(), which calls uart_handle_cts_change().
- If the RTS signal is deasserted, uart_handle_cts_change() clears
port->hw_stopped and unblocks the port for further data transfers.
- If the RTS is asserted, uart_handle_cts_change() sets port->hw_stopped
and blocks the port for further data transfers. This may occur as the
last interrupt of a transfer, which means port->hw_stopped remains set
and the port remains blocked (**).
- Any further data transfer attempts will trigger imx_uart_mctrl_check(),
which will read current status of UART control signals by calling
imx_uart_get_hwmctrl() (***) and compare it with sport->old_status .
- If current status differs from sport->old_status for RTS signal,
uart_handle_cts_change() is called and possibly unblocks the port
by clearing port->hw_stopped .
- If current status does not differ from sport->old_status for RTS
signal, no action occurs. This may occur in case prior snapshot (*)
was taken before any transfer so the RTS is deasserted, current
snapshot (***) was taken after a transfer and therefore RTS is
deasserted again, which means current status and sport->old_status
are identical. In case (**) triggered when RTS got asserted, and
made port->hw_stopped set, the port->hw_stopped will remain set
because no change on RTS line is recognized by this driver and
uart_handle_cts_change() is not called from here to unblock the
port->hw_stopped.
Update sport->old_status in __imx_uart_rtsint() accordingly to make
imx_uart_mctrl_check() detect such RTS change. Note that TIOCM_CAR
and TIOCM_RI bits in sport->old_status do not suffer from this problem.
Fixes: ceca629e0b48 ("[ARM] 2971/1: i.MX uart handle rts irq")
Reviewed-by: Esben Haabendal <esben(a)geanix.com>
Signed-off-by: Marek Vasut <marex(a)denx.de>
---
Cc: Christoph Niedermaier <cniedermaier(a)dh-electronics.com>
Cc: Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
Cc: Esben Haabendal <esben(a)geanix.com>
Cc: Fabio Estevam <festevam(a)gmail.com>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Jiri Slaby <jirislaby(a)kernel.org>
Cc: Lino Sanfilippo <l.sanfilippo(a)kunbus.com>
Cc: Pengutronix Kernel Team <kernel(a)pengutronix.de>
Cc: Rasmus Villemoes <linux(a)rasmusvillemoes.dk>
Cc: Rickard x Andersson <rickaran(a)axis.com>
Cc: Sascha Hauer <s.hauer(a)pengutronix.de>
Cc: Shawn Guo <shawnguo(a)kernel.org>
Cc: Stefan Eichenberger <stefan.eichenberger(a)toradex.com>
Cc: Uwe Kleine-König <u.kleine-koenig(a)baylibre.com>
Cc: imx(a)lists.linux.dev
Cc: linux-arm-kernel(a)lists.infradead.org
Cc: linux-serial(a)vger.kernel.org
Cc: stable(a)vger.kernel.org
---
V2: - Add code comment
- Add RB from Esben
---
drivers/tty/serial/imx.c | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index 67d4a72eda770..90974d338f3c0 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -762,6 +762,21 @@ static irqreturn_t __imx_uart_rtsint(int irq, void *dev_id)
imx_uart_writel(sport, USR1_RTSD, USR1);
usr1 = imx_uart_readl(sport, USR1) & USR1_RTSS;
+ /*
+ * Update sport->old_status here, so any follow-up calls to
+ * imx_uart_mctrl_check() will be able to recognize that RTS
+ * state changed since last imx_uart_mctrl_check() call.
+ *
+ * In case RTS has been detected as asserted here and later on
+ * deasserted by the time imx_uart_mctrl_check() was called,
+ * imx_uart_mctrl_check() can detect the RTS state change and
+ * trigger uart_handle_cts_change() to unblock the port for
+ * further TX transfers.
+ */
+ if (usr1 & USR1_RTSS)
+ sport->old_status |= TIOCM_CTS;
+ else
+ sport->old_status &= ~TIOCM_CTS;
uart_handle_cts_change(&sport->port, usr1);
wake_up_interruptible(&sport->port.state->port.delta_msr_wait);
--
2.45.2
Hi
In downstream Debian we got a report from Eric Degenetais, in
https://bugs.debian.org/1081833 that after the update to the 6.1.106
based version, there were regular cracks in HDMI sound during
playback.
Eric was able to bisec the issue down to
92afcc310038ebe5d66c689bb0bf418f5451201c in the v6.1.y series which
got applied in 6.1.104.
Cf. https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1081833#47
#regzbot introduced: 92afcc310038ebe5d66c689bb0bf418f5451201c
#regzbot link: https://bugs.debian.org/1081833
It should be noted that Eric as well tried more recent stable series
as well, in particular did test as well 6.10.6 based version back on
20th september, and the issue was reproducible there as well.
Is there anything else we can try to provide?
Regards,
Salvatore
Hi,
Arguably the most important block stable patch I don't see in the
most recent review series sent out, which is odd because it's
certainly marked with fixes and a stable tag. It's this one:
commit e3accac1a976e65491a9b9fba82ce8ddbd3d2389
Author: Damien Le Moal <dlemoal(a)kernel.org>
Date: Tue Sep 17 22:32:31 2024 +0900
block: Fix elv_iosched_local_module handling of "none" scheduler
and it really must go into -stable asap as it's fixing a real issue
that I've had multiple users email me about. Can we get this added
to the current 6.11-stable series so we don't miss another release?
It's also quite possible that I'm blind and it is indeed in the queue
or already there, but for the life of me I can't see it.
--
Jens Axboe
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x e6a3531dd542cb127c8de32ab1e54a48ae19962b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100247-friction-answering-6c42@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
e6a3531dd542 ("dm-verity: restart or panic on an I/O error")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e6a3531dd542cb127c8de32ab1e54a48ae19962b Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka(a)redhat.com>
Date: Tue, 24 Sep 2024 15:18:29 +0200
Subject: [PATCH] dm-verity: restart or panic on an I/O error
Maxim Suhanov reported that dm-verity doesn't crash if an I/O error
happens. In theory, this could be used to subvert security, because an
attacker can create sectors that return error with the Write Uncorrectable
command. Some programs may misbehave if they have to deal with EIO.
This commit fixes dm-verity, so that if "panic_on_corruption" or
"restart_on_corruption" was specified and an I/O error happens, the
machine will panic or restart.
This commit also changes kernel_restart to emergency_restart -
kernel_restart calls reboot notifiers and these reboot notifiers may wait
for the bio that failed. emergency_restart doesn't call the notifiers.
Reported-by: Maxim Suhanov <dfirblog(a)gmail.com>
Signed-off-by: Mikulas Patocka <mpatocka(a)redhat.com>
Cc: stable(a)vger.kernel.org
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index cf659c8feb29..a95c1b9cc5b5 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -272,8 +272,10 @@ static int verity_handle_err(struct dm_verity *v, enum verity_block_type type,
if (v->mode == DM_VERITY_MODE_LOGGING)
return 0;
- if (v->mode == DM_VERITY_MODE_RESTART)
- kernel_restart("dm-verity device corrupted");
+ if (v->mode == DM_VERITY_MODE_RESTART) {
+ pr_emerg("dm-verity device corrupted\n");
+ emergency_restart();
+ }
if (v->mode == DM_VERITY_MODE_PANIC)
panic("dm-verity device corrupted");
@@ -596,6 +598,23 @@ static void verity_finish_io(struct dm_verity_io *io, blk_status_t status)
if (!static_branch_unlikely(&use_bh_wq_enabled) || !io->in_bh)
verity_fec_finish_io(io);
+ if (unlikely(status != BLK_STS_OK) &&
+ unlikely(!(bio->bi_opf & REQ_RAHEAD)) &&
+ !verity_is_system_shutting_down()) {
+ if (v->mode == DM_VERITY_MODE_RESTART ||
+ v->mode == DM_VERITY_MODE_PANIC)
+ DMERR_LIMIT("%s has error: %s", v->data_dev->name,
+ blk_status_to_str(status));
+
+ if (v->mode == DM_VERITY_MODE_RESTART) {
+ pr_emerg("dm-verity device corrupted\n");
+ emergency_restart();
+ }
+
+ if (v->mode == DM_VERITY_MODE_PANIC)
+ panic("dm-verity device corrupted");
+ }
+
bio_endio(bio);
}
Hi,
some of our customers (Proxmox VE) are seeing issues with file
corruptions when accessing contents located on CephFS via the in-kernel
Ceph client [0,1], we managed to reproduce this regression on kernels up
to the latest 6.11-rc6.
Accessing the same content on the CephFS using the FUSE client or the
in-kernel ceph client with older kernels (Ubuntu kernel on v6.5) does
not show file corruptions.
Unfortunately the corruption is hard to reproduce, seemingly only a
small subset of files is affected. However, once a file is affected, the
issue is persistent and can easily be reproduced.
Bisection with the reproducer points to this commit:
"92b6cc5d: netfs: Add iov_iters to (sub)requests to describe various
buffers"
Description of the issue:
A file was copied from local filesystem to cephfs via:
```
cp /tmp/proxmox-backup-server_3.2-1.iso
/mnt/pve/cephfs/proxmox-backup-server_3.2-1.iso
```
* sha256sum on local
filesystem:`1d19698e8f7e769cf0a0dcc7ba0018ef5416c5ec495d5e61313f9c84a4237607
/tmp/proxmox-backup-server_3.2-1.iso`
* sha256sum on cephfs with kernel up to above commit:
`1d19698e8f7e769cf0a0dcc7ba0018ef5416c5ec495d5e61313f9c84a4237607
/mnt/pve/cephfs/proxmox-backup-server_3.2-1.iso`
* sha256sum on cephfs with kernel after above commit:
`89ad3620bf7b1e0913b534516cfbe48580efbaec944b79951e2c14e5e551f736
/mnt/pve/cephfs/proxmox-backup-server_3.2-1.iso`
* removing and/or recopying the file does not change the issue, the
corrupt checksum remains the same.
* accessing the same file from different clients results in the same
output: the one with above patch applied do show the incorrect checksum,
ones without the patch show the correct checksum.
* the issue persists even across reboot of the ceph cluster and/or clients.
* the file is indeed corrupt after reading, as verified by a `cmp -b`.
Interestingly, the first 4M contain the correct data, the following 4M
are read as all zeros, which differs from the original data.
* the issue is related to the readahead size: mounting the cephfs with a
`rasize=0` makes the issue disappear, same is true for sizes up to 128k
(please note that the ranges as initially reported on the mailing list
[3] are not correct for rasize [0..128k] the file is not corrupted).
In the bugtracker issue [4] I attached a ftrace with "*ceph*" as filter
while performing a read on the latest kernel 6.11-rc6 while performing
```
dd if=/mnt/pve/cephfs/proxmox-backup-server_3.2-1.iso of=/tmp/test.out
bs=8M count=1
```
the relevant part shown by task `dd-26192`.
Please let me know if I can provide further information or debug outputs
in order to narrow down the issue.
[0] https://forum.proxmox.com/threads/78340/post-676129
[1] https://forum.proxmox.com/threads/149249/
[2] https://forum.proxmox.com/threads/151291/
[3]
https://lore.kernel.org/lkml/db686d0c-2f27-47c8-8c14-26969433b13b@proxmox.c…
[4] https://bugzilla.kernel.org/show_bug.cgi?id=219237
#regzbot introduced: 92b6cc5d
Regards,
Christian Ebner
It is possible that an interrupt is disabled and masked at the same time.
When the interrupt is enabled again by enable_irq(), only plic_irq_enable()
is called, not plic_irq_unmask(). The interrupt remains masked and never
raises.
An example where interrupt is both disabled and masked is when
handle_fasteoi_irq() is the handler, and IRQS_ONESHOT is set. The interrupt
handler:
1. Mask the interrupt
2. Handle the interrupt
3. Check if interrupt is still enabled, and unmask it (see
cond_unmask_eoi_irq())
If another task disables the interrupt in the middle of the above steps,
the interrupt will not get unmasked, and will remain masked when it is
enabled in the future.
The problem is occasionally observed when PREEMPT_RT is enabled, because
PREEMPT_RT add the IRQS_ONESHOT flag. But PREEMPT_RT only makes the
problem more likely to appear, the bug has been around since
commit a1706a1c5062 ("irqchip/sifive-plic: Separate the enable and mask
operations").
Fix it by unmasking interrupt in plic_irq_enable().
Fixes: a1706a1c5062 ("irqchip/sifive-plic: Separate the enable and mask operations").
Signed-off-by: Nam Cao <namcao(a)linutronix.de>
Cc: stable(a)vger.kernel.org
---
drivers/irqchip/irq-sifive-plic.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index 2f6ef5c495bd..0efbf14ec9fa 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -128,6 +128,9 @@ static inline void plic_irq_toggle(const struct cpumask *mask,
static void plic_irq_enable(struct irq_data *d)
{
+ struct plic_priv *priv = irq_data_get_irq_chip_data(d);
+
+ writel(1, priv->regs + PRIORITY_BASE + d->hwirq * PRIORITY_PER_ID);
plic_irq_toggle(irq_data_get_effective_affinity_mask(d), d, 1);
}
--
2.39.5
Changes in v3:
- Drops assigned-clock-* from description retains in example - Sakari,
Krzysztof
- Updates example fake clock names to ov08x40_* instead of copy/paste
ov9282_clk -> ov08x40_clk, ov9282_clk_parent -> ov08x40_clk_parent - bod
- Link to v2: https://lore.kernel.org/r/20241001-b4-master-24-11-25-ov08x40-v2-0-e478976b…
Changes in v2:
- Drops "-" in ovti,ov08x40.yaml after description: - Rob
- Adds ":" after first line of description text - Rob
- dts -> DT in commit log - Rob
- Removes dependency on 'xvclk' as a name in yaml
and driver - Sakari
- Uses assigned-clock, assigned-clock-parents and assigned-clock-rates -
Sakari
- Drops clock-frequency - Sakarai, Krzysztof
- Drops dovdd-supply, avdd-supply, dvdd-supply and reset-gpios
as required, its perfectly possible not to have the reset GPIO or the
power rails under control of the SoC. - bod
- Link to v1: https://lore.kernel.org/r/20240926-b4-master-24-11-25-ov08x40-v1-0-e4d5fbd3…
V1:
This series brings fixes and updates to ov08x40 which allows for use of
this sensor on the Qualcomm x1e80100 CRD but also on any other dts based
system.
Firstly there's a fix for the pseudo burst mode code that was added in
8f667d202384 ("media: ov08x40: Reduce start streaming time"). Not every I2C
controller can handle an arbitrary sized write, this is the case on
Qualcomm CAMSS/CCI I2C sensor interfaces which limit the transaction size
and communicate this limit via I2C quirks. A simple fix to optionally break
up the large submitted burst into chunks not exceeding adapter->quirk size
fixes.
Secondly then is addition of a yaml description for the ov08x40 and
extension of the driver to support OF probe and powering on of the power
rails from the driver instead of from ACPI.
Once done the sensor works without further modification on the Qualcomm
x1e80100 CRD.
Signed-off-by: Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
---
Bryan O'Donoghue (4):
media: ov08x40: Fix burst write sequence
media: dt-bindings: Add OmniVision OV08X40
media: ov08x40: Rename ext_clk to xvclk
media: ov08x40: Add OF probe support
.../bindings/media/i2c/ovti,ov08x40.yaml | 116 +++++++++++++
drivers/media/i2c/ov08x40.c | 179 ++++++++++++++++++---
2 files changed, 272 insertions(+), 23 deletions(-)
---
base-commit: 2b7275670032a98cba266bd1b8905f755b3e650f
change-id: 20240926-b4-master-24-11-25-ov08x40-c6f477aaa6a4
Best regards,
--
Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
Hi,
Subject: [PATCH net] netkit: Assign missing bpf_net_context
commit id: 157f29152b61ca41809dd7ead29f5733adeced19
kernel versions: 6.11
The patch was sent before 6.11 was cut final, but it didn't make it.
We are seeing kernel crashes on 6.11.1 without this commit, with the
exact signature that the patch fixes.
Thank you.
Regards,
Maksym
commit 41130c32f3a18fcc930316da17f3a5f3bc326aa1 upstream
This was not marked as stable (but has a Fixes: tag), but causes the USB stack
to crash with 6.1.x kernels.
The patch does not apply when cherry-picking because of the context.
Should I send the patch again with updated context so that it applies without
conflicts?
Best regards,
Georg
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x d4fc4d01471528da8a9797a065982e05090e1d81
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100100-emperor-thespian-397f@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
d4fc4d014715 ("x86/tdx: Fix "in-kernel MMIO" check")
859e63b789d6 ("x86/tdx: Convert shared memory back to private on kexec")
533c67e63584 ("mm/mglru: add dummy pmd_dirty()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d4fc4d01471528da8a9797a065982e05090e1d81 Mon Sep 17 00:00:00 2001
From: "Alexey Gladkov (Intel)" <legion(a)kernel.org>
Date: Fri, 13 Sep 2024 19:05:56 +0200
Subject: [PATCH] x86/tdx: Fix "in-kernel MMIO" check
TDX only supports kernel-initiated MMIO operations. The handle_mmio()
function checks if the #VE exception occurred in the kernel and rejects
the operation if it did not.
However, userspace can deceive the kernel into performing MMIO on its
behalf. For example, if userspace can point a syscall to an MMIO address,
syscall does get_user() or put_user() on it, triggering MMIO #VE. The
kernel will treat the #VE as in-kernel MMIO.
Ensure that the target MMIO address is within the kernel before decoding
instruction.
Fixes: 31d58c4e557d ("x86/tdx: Handle in-kernel MMIO")
Signed-off-by: Alexey Gladkov (Intel) <legion(a)kernel.org>
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Reviewed-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Acked-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc:stable@vger.kernel.org
Link: https://lore.kernel.org/all/565a804b80387970460a4ebc67c88d1380f61ad1.172623…
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index da8b66dce0da..327c45c5013f 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -16,6 +16,7 @@
#include <asm/insn-eval.h>
#include <asm/pgtable.h>
#include <asm/set_memory.h>
+#include <asm/traps.h>
/* MMIO direction */
#define EPT_READ 0
@@ -433,6 +434,11 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve)
return -EINVAL;
}
+ if (!fault_in_kernel_space(ve->gla)) {
+ WARN_ONCE(1, "Access to userspace address is not supported");
+ return -EINVAL;
+ }
+
/*
* Reject EPT violation #VEs that split pages.
*
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x d4fc4d01471528da8a9797a065982e05090e1d81
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100104-everybody-retrace-89ed@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
d4fc4d014715 ("x86/tdx: Fix "in-kernel MMIO" check")
859e63b789d6 ("x86/tdx: Convert shared memory back to private on kexec")
533c67e63584 ("mm/mglru: add dummy pmd_dirty()")
df57721f9a63 ("Merge tag 'x86_shstk_for_6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d4fc4d01471528da8a9797a065982e05090e1d81 Mon Sep 17 00:00:00 2001
From: "Alexey Gladkov (Intel)" <legion(a)kernel.org>
Date: Fri, 13 Sep 2024 19:05:56 +0200
Subject: [PATCH] x86/tdx: Fix "in-kernel MMIO" check
TDX only supports kernel-initiated MMIO operations. The handle_mmio()
function checks if the #VE exception occurred in the kernel and rejects
the operation if it did not.
However, userspace can deceive the kernel into performing MMIO on its
behalf. For example, if userspace can point a syscall to an MMIO address,
syscall does get_user() or put_user() on it, triggering MMIO #VE. The
kernel will treat the #VE as in-kernel MMIO.
Ensure that the target MMIO address is within the kernel before decoding
instruction.
Fixes: 31d58c4e557d ("x86/tdx: Handle in-kernel MMIO")
Signed-off-by: Alexey Gladkov (Intel) <legion(a)kernel.org>
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Reviewed-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Acked-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc:stable@vger.kernel.org
Link: https://lore.kernel.org/all/565a804b80387970460a4ebc67c88d1380f61ad1.172623…
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index da8b66dce0da..327c45c5013f 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -16,6 +16,7 @@
#include <asm/insn-eval.h>
#include <asm/pgtable.h>
#include <asm/set_memory.h>
+#include <asm/traps.h>
/* MMIO direction */
#define EPT_READ 0
@@ -433,6 +434,11 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve)
return -EINVAL;
}
+ if (!fault_in_kernel_space(ve->gla)) {
+ WARN_ONCE(1, "Access to userspace address is not supported");
+ return -EINVAL;
+ }
+
/*
* Reject EPT violation #VEs that split pages.
*
When sending data using DMA at high baudrate (4 Mbdps in local test case) to
a device with small RX buffer which keeps asserting RTS after every received
byte, it is possible that the iMX UART driver would not recognize the falling
edge of RTS input signal and get stuck, unable to transmit any more data.
This condition happens when the following sequence of events occur:
- imx_uart_mctrl_check() is called at some point and takes a snapshot of UART
control signal status into sport->old_status using imx_uart_get_hwmctrl().
The RTSS/TIOCM_CTS bit is of interest here (*).
- DMA transfer occurs, the remote device asserts RTS signal after each byte.
The i.MX UART driver recognizes each such RTS signal change, raises an
interrupt with USR1 register RTSD bit set, which leads to invocation of
__imx_uart_rtsint(), which calls uart_handle_cts_change().
- If the RTS signal is deasserted, uart_handle_cts_change() clears
port->hw_stopped and unblocks the port for further data transfers.
- If the RTS is asserted, uart_handle_cts_change() sets port->hw_stopped
and blocks the port for further data transfers. This may occur as the
last interrupt of a transfer, which means port->hw_stopped remains set
and the port remains blocked (**).
- Any further data transfer attempts will trigger imx_uart_mctrl_check(),
which will read current status of UART control signals by calling
imx_uart_get_hwmctrl() (***) and compare it with sport->old_status .
- If current status differs from sport->old_status for RTS signal,
uart_handle_cts_change() is called and possibly unblocks the port
by clearing port->hw_stopped .
- If current status does not differ from sport->old_status for RTS
signal, no action occurs. This may occur in case prior snapshot (*)
was taken before any transfer so the RTS is deasserted, current
snapshot (***) was taken after a transfer and therefore RTS is
deasserted again, which means current status and sport->old_status
are identical. In case (**) triggered when RTS got asserted, and
made port->hw_stopped set, the port->hw_stopped will remain set
because no change on RTS line is recognized by this driver and
uart_handle_cts_change() is not called from here to unblock the
port->hw_stopped.
Update sport->old_status in __imx_uart_rtsint() accordingly to make
imx_uart_mctrl_check() detect such RTS change. Note that TIOCM_CAR
and TIOCM_RI bits in sport->old_status do not suffer from this problem.
Fixes: ceca629e0b48 ("[ARM] 2971/1: i.MX uart handle rts irq")
Signed-off-by: Marek Vasut <marex(a)denx.de>
---
Cc: "Uwe Kleine-König" <u.kleine-koenig(a)pengutronix.de>
Cc: Christoph Niedermaier <cniedermaier(a)dh-electronics.com>
Cc: Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
Cc: Esben Haabendal <esben(a)geanix.com>
Cc: Fabio Estevam <festevam(a)gmail.com>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Jiri Slaby <jirislaby(a)kernel.org>
Cc: Lino Sanfilippo <l.sanfilippo(a)kunbus.com>
Cc: Pengutronix Kernel Team <kernel(a)pengutronix.de>
Cc: Rasmus Villemoes <linux(a)rasmusvillemoes.dk>
Cc: Rickard x Andersson <rickaran(a)axis.com>
Cc: Sascha Hauer <s.hauer(a)pengutronix.de>
Cc: Shawn Guo <shawnguo(a)kernel.org>
Cc: Stefan Eichenberger <stefan.eichenberger(a)toradex.com>
Cc: imx(a)lists.linux.dev
Cc: linux-arm-kernel(a)lists.infradead.org
Cc: linux-serial(a)vger.kernel.org
Cc: stable(a)vger.kernel.org
---
drivers/tty/serial/imx.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index 67d4a72eda770..3ad7f42790ef9 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -762,6 +762,10 @@ static irqreturn_t __imx_uart_rtsint(int irq, void *dev_id)
imx_uart_writel(sport, USR1_RTSD, USR1);
usr1 = imx_uart_readl(sport, USR1) & USR1_RTSS;
+ if (usr1 & USR1_RTSS)
+ sport->old_status |= TIOCM_CTS;
+ else
+ sport->old_status &= ~TIOCM_CTS;
uart_handle_cts_change(&sport->port, usr1);
wake_up_interruptible(&sport->port.state->port.delta_msr_wait);
--
2.45.2
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 73aeab373557fa6ee4ae0b742c6211ccd9859280
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100234-dizzy-backlands-30eb@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
73aeab373557 ("block, bfq: fix procress reference leakage for bfqq in merge chain")
9778369a2d6c ("block, bfq: split sync bfq_queues on a per-actuator basis")
246cf66e300b ("block, bfq: fix uaf for bfqq in bfq_exit_icq_bfqq")
337366e02b37 ("block, bfq: replace 0/1 with false/true in bic apis")
64dc8c732f5c ("block, bfq: fix possible uaf for 'bfqq->bic'")
dc469ba2e790 ("block/bfq: Use the new blk_opf_t type")
4e54a2493e58 ("bfq: Get rid of __bio_blkcg() usage")
09f871868080 ("bfq: Track whether bfq_group is still online")
ea591cd4eb27 ("bfq: Update cgroup information before merging bio")
3bc5e683c67d ("bfq: Split shared queues on move between cgroups")
f6bad159f5d5 ("block/bfq-iosched.c: use "false" rather than "BLK_RW_ASYNC"")
a0725c22cd84 ("bfq: use bfq_bic_lookup in bfq_limit_depth")
1f18b7005b49 ("bfq: Limit waker detection in time")
76f1df88bbc2 ("bfq: Limit number of requests consumed by each cgroup")
44dfa279f117 ("bfq: Store full bitmap depth in bfq_data")
ae0f1a732f4a ("blk-mq: Stop using pointers for blk_mq_tags bitmap tags")
e155b0c238b2 ("blk-mq: Use shared tags for shared sbitmap support")
645db34e5050 ("blk-mq: Refactor and rename blk_mq_free_map_and_{requests->rqs}()")
63064be150e4 ("blk-mq: Add blk_mq_alloc_map_and_rqs()")
a7e7388dced4 ("blk-mq: Add blk_mq_tag_update_sched_shared_sbitmap()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 73aeab373557fa6ee4ae0b742c6211ccd9859280 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3(a)huawei.com>
Date: Mon, 9 Sep 2024 21:41:49 +0800
Subject: [PATCH] block, bfq: fix procress reference leakage for bfqq in merge
chain
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Original state:
Process 1 Process 2 Process 3 Process 4
(BIC1) (BIC2) (BIC3) (BIC4)
Λ | | |
\--------------\ \-------------\ \-------------\|
V V V
bfqq1--------->bfqq2---------->bfqq3----------->bfqq4
ref 0 1 2 4
After commit 0e456dba86c7 ("block, bfq: choose the last bfqq from merge
chain in bfq_setup_cooperator()"), if P1 issues a new IO:
Without the patch:
Process 1 Process 2 Process 3 Process 4
(BIC1) (BIC2) (BIC3) (BIC4)
Λ | | |
\------------------------------\ \-------------\|
V V
bfqq1--------->bfqq2---------->bfqq3----------->bfqq4
ref 0 0 2 4
bfqq3 will be used to handle IO from P1, this is not expected, IO
should be redirected to bfqq4;
With the patch:
-------------------------------------------
| |
Process 1 Process 2 Process 3 | Process 4
(BIC1) (BIC2) (BIC3) | (BIC4)
| | | |
\-------------\ \-------------\|
V V
bfqq1--------->bfqq2---------->bfqq3----------->bfqq4
ref 0 0 2 4
IO is redirected to bfqq4, however, procress reference of bfqq3 is still
2, while there is only P2 using it.
Fix the problem by calling bfq_merge_bfqqs() for each bfqq in the merge
chain. Also change bfqq_merge_bfqqs() to return new_bfqq to simplify
code.
Fixes: 0e456dba86c7 ("block, bfq: choose the last bfqq from merge chain in bfq_setup_cooperator()")
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Link: https://lore.kernel.org/r/20240909134154.954924-3-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index d5d39974c674..f4192d5411d2 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -3129,10 +3129,12 @@ void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq)
bfq_put_queue(bfqq);
}
-static void
-bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
- struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
+static struct bfq_queue *bfq_merge_bfqqs(struct bfq_data *bfqd,
+ struct bfq_io_cq *bic,
+ struct bfq_queue *bfqq)
{
+ struct bfq_queue *new_bfqq = bfqq->new_bfqq;
+
bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",
(unsigned long)new_bfqq->pid);
/* Save weight raising and idle window of the merged queues */
@@ -3226,6 +3228,8 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
bfq_reassign_last_bfqq(bfqq, new_bfqq);
bfq_release_process_ref(bfqd, bfqq);
+
+ return new_bfqq;
}
static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq,
@@ -3261,14 +3265,8 @@ static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq,
* fulfilled, i.e., bic can be redirected to new_bfqq
* and bfqq can be put.
*/
- bfq_merge_bfqqs(bfqd, bfqd->bio_bic, bfqq,
- new_bfqq);
- /*
- * If we get here, bio will be queued into new_queue,
- * so use new_bfqq to decide whether bio and rq can be
- * merged.
- */
- bfqq = new_bfqq;
+ while (bfqq != new_bfqq)
+ bfqq = bfq_merge_bfqqs(bfqd, bfqd->bio_bic, bfqq);
/*
* Change also bqfd->bio_bfqq, as
@@ -5705,9 +5703,7 @@ bfq_do_early_stable_merge(struct bfq_data *bfqd, struct bfq_queue *bfqq,
* state before killing it.
*/
bfqq->bic = bic;
- bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq);
-
- return new_bfqq;
+ return bfq_merge_bfqqs(bfqd, bic, bfqq);
}
/*
@@ -6162,6 +6158,7 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
bool waiting, idle_timer_disabled = false;
if (new_bfqq) {
+ struct bfq_queue *old_bfqq = bfqq;
/*
* Release the request's reference to the old bfqq
* and make sure one is taken to the shared queue.
@@ -6178,18 +6175,18 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
* new_bfqq.
*/
if (bic_to_bfqq(RQ_BIC(rq), true,
- bfq_actuator_index(bfqd, rq->bio)) == bfqq)
- bfq_merge_bfqqs(bfqd, RQ_BIC(rq),
- bfqq, new_bfqq);
+ bfq_actuator_index(bfqd, rq->bio)) == bfqq) {
+ while (bfqq != new_bfqq)
+ bfqq = bfq_merge_bfqqs(bfqd, RQ_BIC(rq), bfqq);
+ }
- bfq_clear_bfqq_just_created(bfqq);
+ bfq_clear_bfqq_just_created(old_bfqq);
/*
* rq is about to be enqueued into new_bfqq,
* release rq reference on bfqq
*/
- bfq_put_queue(bfqq);
+ bfq_put_queue(old_bfqq);
rq->elv.priv[1] = new_bfqq;
- bfqq = new_bfqq;
}
bfq_update_io_thinktime(bfqd, bfqq);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 73aeab373557fa6ee4ae0b742c6211ccd9859280
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100232-unheated-trailing-4528@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
73aeab373557 ("block, bfq: fix procress reference leakage for bfqq in merge chain")
9778369a2d6c ("block, bfq: split sync bfq_queues on a per-actuator basis")
246cf66e300b ("block, bfq: fix uaf for bfqq in bfq_exit_icq_bfqq")
337366e02b37 ("block, bfq: replace 0/1 with false/true in bic apis")
64dc8c732f5c ("block, bfq: fix possible uaf for 'bfqq->bic'")
dc469ba2e790 ("block/bfq: Use the new blk_opf_t type")
4e54a2493e58 ("bfq: Get rid of __bio_blkcg() usage")
09f871868080 ("bfq: Track whether bfq_group is still online")
ea591cd4eb27 ("bfq: Update cgroup information before merging bio")
3bc5e683c67d ("bfq: Split shared queues on move between cgroups")
f6bad159f5d5 ("block/bfq-iosched.c: use "false" rather than "BLK_RW_ASYNC"")
a0725c22cd84 ("bfq: use bfq_bic_lookup in bfq_limit_depth")
1f18b7005b49 ("bfq: Limit waker detection in time")
76f1df88bbc2 ("bfq: Limit number of requests consumed by each cgroup")
44dfa279f117 ("bfq: Store full bitmap depth in bfq_data")
ae0f1a732f4a ("blk-mq: Stop using pointers for blk_mq_tags bitmap tags")
e155b0c238b2 ("blk-mq: Use shared tags for shared sbitmap support")
645db34e5050 ("blk-mq: Refactor and rename blk_mq_free_map_and_{requests->rqs}()")
63064be150e4 ("blk-mq: Add blk_mq_alloc_map_and_rqs()")
a7e7388dced4 ("blk-mq: Add blk_mq_tag_update_sched_shared_sbitmap()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 73aeab373557fa6ee4ae0b742c6211ccd9859280 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3(a)huawei.com>
Date: Mon, 9 Sep 2024 21:41:49 +0800
Subject: [PATCH] block, bfq: fix procress reference leakage for bfqq in merge
chain
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Original state:
Process 1 Process 2 Process 3 Process 4
(BIC1) (BIC2) (BIC3) (BIC4)
Λ | | |
\--------------\ \-------------\ \-------------\|
V V V
bfqq1--------->bfqq2---------->bfqq3----------->bfqq4
ref 0 1 2 4
After commit 0e456dba86c7 ("block, bfq: choose the last bfqq from merge
chain in bfq_setup_cooperator()"), if P1 issues a new IO:
Without the patch:
Process 1 Process 2 Process 3 Process 4
(BIC1) (BIC2) (BIC3) (BIC4)
Λ | | |
\------------------------------\ \-------------\|
V V
bfqq1--------->bfqq2---------->bfqq3----------->bfqq4
ref 0 0 2 4
bfqq3 will be used to handle IO from P1, this is not expected, IO
should be redirected to bfqq4;
With the patch:
-------------------------------------------
| |
Process 1 Process 2 Process 3 | Process 4
(BIC1) (BIC2) (BIC3) | (BIC4)
| | | |
\-------------\ \-------------\|
V V
bfqq1--------->bfqq2---------->bfqq3----------->bfqq4
ref 0 0 2 4
IO is redirected to bfqq4, however, procress reference of bfqq3 is still
2, while there is only P2 using it.
Fix the problem by calling bfq_merge_bfqqs() for each bfqq in the merge
chain. Also change bfqq_merge_bfqqs() to return new_bfqq to simplify
code.
Fixes: 0e456dba86c7 ("block, bfq: choose the last bfqq from merge chain in bfq_setup_cooperator()")
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Link: https://lore.kernel.org/r/20240909134154.954924-3-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index d5d39974c674..f4192d5411d2 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -3129,10 +3129,12 @@ void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq)
bfq_put_queue(bfqq);
}
-static void
-bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
- struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
+static struct bfq_queue *bfq_merge_bfqqs(struct bfq_data *bfqd,
+ struct bfq_io_cq *bic,
+ struct bfq_queue *bfqq)
{
+ struct bfq_queue *new_bfqq = bfqq->new_bfqq;
+
bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",
(unsigned long)new_bfqq->pid);
/* Save weight raising and idle window of the merged queues */
@@ -3226,6 +3228,8 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
bfq_reassign_last_bfqq(bfqq, new_bfqq);
bfq_release_process_ref(bfqd, bfqq);
+
+ return new_bfqq;
}
static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq,
@@ -3261,14 +3265,8 @@ static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq,
* fulfilled, i.e., bic can be redirected to new_bfqq
* and bfqq can be put.
*/
- bfq_merge_bfqqs(bfqd, bfqd->bio_bic, bfqq,
- new_bfqq);
- /*
- * If we get here, bio will be queued into new_queue,
- * so use new_bfqq to decide whether bio and rq can be
- * merged.
- */
- bfqq = new_bfqq;
+ while (bfqq != new_bfqq)
+ bfqq = bfq_merge_bfqqs(bfqd, bfqd->bio_bic, bfqq);
/*
* Change also bqfd->bio_bfqq, as
@@ -5705,9 +5703,7 @@ bfq_do_early_stable_merge(struct bfq_data *bfqd, struct bfq_queue *bfqq,
* state before killing it.
*/
bfqq->bic = bic;
- bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq);
-
- return new_bfqq;
+ return bfq_merge_bfqqs(bfqd, bic, bfqq);
}
/*
@@ -6162,6 +6158,7 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
bool waiting, idle_timer_disabled = false;
if (new_bfqq) {
+ struct bfq_queue *old_bfqq = bfqq;
/*
* Release the request's reference to the old bfqq
* and make sure one is taken to the shared queue.
@@ -6178,18 +6175,18 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
* new_bfqq.
*/
if (bic_to_bfqq(RQ_BIC(rq), true,
- bfq_actuator_index(bfqd, rq->bio)) == bfqq)
- bfq_merge_bfqqs(bfqd, RQ_BIC(rq),
- bfqq, new_bfqq);
+ bfq_actuator_index(bfqd, rq->bio)) == bfqq) {
+ while (bfqq != new_bfqq)
+ bfqq = bfq_merge_bfqqs(bfqd, RQ_BIC(rq), bfqq);
+ }
- bfq_clear_bfqq_just_created(bfqq);
+ bfq_clear_bfqq_just_created(old_bfqq);
/*
* rq is about to be enqueued into new_bfqq,
* release rq reference on bfqq
*/
- bfq_put_queue(bfqq);
+ bfq_put_queue(old_bfqq);
rq->elv.priv[1] = new_bfqq;
- bfqq = new_bfqq;
}
bfq_update_io_thinktime(bfqd, bfqq);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 73aeab373557fa6ee4ae0b742c6211ccd9859280
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100231-difficult-nail-04b1@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
73aeab373557 ("block, bfq: fix procress reference leakage for bfqq in merge chain")
9778369a2d6c ("block, bfq: split sync bfq_queues on a per-actuator basis")
246cf66e300b ("block, bfq: fix uaf for bfqq in bfq_exit_icq_bfqq")
337366e02b37 ("block, bfq: replace 0/1 with false/true in bic apis")
64dc8c732f5c ("block, bfq: fix possible uaf for 'bfqq->bic'")
dc469ba2e790 ("block/bfq: Use the new blk_opf_t type")
4e54a2493e58 ("bfq: Get rid of __bio_blkcg() usage")
09f871868080 ("bfq: Track whether bfq_group is still online")
ea591cd4eb27 ("bfq: Update cgroup information before merging bio")
3bc5e683c67d ("bfq: Split shared queues on move between cgroups")
f6bad159f5d5 ("block/bfq-iosched.c: use "false" rather than "BLK_RW_ASYNC"")
a0725c22cd84 ("bfq: use bfq_bic_lookup in bfq_limit_depth")
1f18b7005b49 ("bfq: Limit waker detection in time")
76f1df88bbc2 ("bfq: Limit number of requests consumed by each cgroup")
44dfa279f117 ("bfq: Store full bitmap depth in bfq_data")
ae0f1a732f4a ("blk-mq: Stop using pointers for blk_mq_tags bitmap tags")
e155b0c238b2 ("blk-mq: Use shared tags for shared sbitmap support")
645db34e5050 ("blk-mq: Refactor and rename blk_mq_free_map_and_{requests->rqs}()")
63064be150e4 ("blk-mq: Add blk_mq_alloc_map_and_rqs()")
a7e7388dced4 ("blk-mq: Add blk_mq_tag_update_sched_shared_sbitmap()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 73aeab373557fa6ee4ae0b742c6211ccd9859280 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3(a)huawei.com>
Date: Mon, 9 Sep 2024 21:41:49 +0800
Subject: [PATCH] block, bfq: fix procress reference leakage for bfqq in merge
chain
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Original state:
Process 1 Process 2 Process 3 Process 4
(BIC1) (BIC2) (BIC3) (BIC4)
Λ | | |
\--------------\ \-------------\ \-------------\|
V V V
bfqq1--------->bfqq2---------->bfqq3----------->bfqq4
ref 0 1 2 4
After commit 0e456dba86c7 ("block, bfq: choose the last bfqq from merge
chain in bfq_setup_cooperator()"), if P1 issues a new IO:
Without the patch:
Process 1 Process 2 Process 3 Process 4
(BIC1) (BIC2) (BIC3) (BIC4)
Λ | | |
\------------------------------\ \-------------\|
V V
bfqq1--------->bfqq2---------->bfqq3----------->bfqq4
ref 0 0 2 4
bfqq3 will be used to handle IO from P1, this is not expected, IO
should be redirected to bfqq4;
With the patch:
-------------------------------------------
| |
Process 1 Process 2 Process 3 | Process 4
(BIC1) (BIC2) (BIC3) | (BIC4)
| | | |
\-------------\ \-------------\|
V V
bfqq1--------->bfqq2---------->bfqq3----------->bfqq4
ref 0 0 2 4
IO is redirected to bfqq4, however, procress reference of bfqq3 is still
2, while there is only P2 using it.
Fix the problem by calling bfq_merge_bfqqs() for each bfqq in the merge
chain. Also change bfqq_merge_bfqqs() to return new_bfqq to simplify
code.
Fixes: 0e456dba86c7 ("block, bfq: choose the last bfqq from merge chain in bfq_setup_cooperator()")
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Link: https://lore.kernel.org/r/20240909134154.954924-3-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index d5d39974c674..f4192d5411d2 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -3129,10 +3129,12 @@ void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq)
bfq_put_queue(bfqq);
}
-static void
-bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
- struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
+static struct bfq_queue *bfq_merge_bfqqs(struct bfq_data *bfqd,
+ struct bfq_io_cq *bic,
+ struct bfq_queue *bfqq)
{
+ struct bfq_queue *new_bfqq = bfqq->new_bfqq;
+
bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",
(unsigned long)new_bfqq->pid);
/* Save weight raising and idle window of the merged queues */
@@ -3226,6 +3228,8 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
bfq_reassign_last_bfqq(bfqq, new_bfqq);
bfq_release_process_ref(bfqd, bfqq);
+
+ return new_bfqq;
}
static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq,
@@ -3261,14 +3265,8 @@ static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq,
* fulfilled, i.e., bic can be redirected to new_bfqq
* and bfqq can be put.
*/
- bfq_merge_bfqqs(bfqd, bfqd->bio_bic, bfqq,
- new_bfqq);
- /*
- * If we get here, bio will be queued into new_queue,
- * so use new_bfqq to decide whether bio and rq can be
- * merged.
- */
- bfqq = new_bfqq;
+ while (bfqq != new_bfqq)
+ bfqq = bfq_merge_bfqqs(bfqd, bfqd->bio_bic, bfqq);
/*
* Change also bqfd->bio_bfqq, as
@@ -5705,9 +5703,7 @@ bfq_do_early_stable_merge(struct bfq_data *bfqd, struct bfq_queue *bfqq,
* state before killing it.
*/
bfqq->bic = bic;
- bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq);
-
- return new_bfqq;
+ return bfq_merge_bfqqs(bfqd, bic, bfqq);
}
/*
@@ -6162,6 +6158,7 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
bool waiting, idle_timer_disabled = false;
if (new_bfqq) {
+ struct bfq_queue *old_bfqq = bfqq;
/*
* Release the request's reference to the old bfqq
* and make sure one is taken to the shared queue.
@@ -6178,18 +6175,18 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
* new_bfqq.
*/
if (bic_to_bfqq(RQ_BIC(rq), true,
- bfq_actuator_index(bfqd, rq->bio)) == bfqq)
- bfq_merge_bfqqs(bfqd, RQ_BIC(rq),
- bfqq, new_bfqq);
+ bfq_actuator_index(bfqd, rq->bio)) == bfqq) {
+ while (bfqq != new_bfqq)
+ bfqq = bfq_merge_bfqqs(bfqd, RQ_BIC(rq), bfqq);
+ }
- bfq_clear_bfqq_just_created(bfqq);
+ bfq_clear_bfqq_just_created(old_bfqq);
/*
* rq is about to be enqueued into new_bfqq,
* release rq reference on bfqq
*/
- bfq_put_queue(bfqq);
+ bfq_put_queue(old_bfqq);
rq->elv.priv[1] = new_bfqq;
- bfqq = new_bfqq;
}
bfq_update_io_thinktime(bfqd, bfqq);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 73aeab373557fa6ee4ae0b742c6211ccd9859280
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100230-epilogue-hush-6a49@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
73aeab373557 ("block, bfq: fix procress reference leakage for bfqq in merge chain")
9778369a2d6c ("block, bfq: split sync bfq_queues on a per-actuator basis")
246cf66e300b ("block, bfq: fix uaf for bfqq in bfq_exit_icq_bfqq")
337366e02b37 ("block, bfq: replace 0/1 with false/true in bic apis")
64dc8c732f5c ("block, bfq: fix possible uaf for 'bfqq->bic'")
dc469ba2e790 ("block/bfq: Use the new blk_opf_t type")
4e54a2493e58 ("bfq: Get rid of __bio_blkcg() usage")
09f871868080 ("bfq: Track whether bfq_group is still online")
ea591cd4eb27 ("bfq: Update cgroup information before merging bio")
3bc5e683c67d ("bfq: Split shared queues on move between cgroups")
f6bad159f5d5 ("block/bfq-iosched.c: use "false" rather than "BLK_RW_ASYNC"")
a0725c22cd84 ("bfq: use bfq_bic_lookup in bfq_limit_depth")
1f18b7005b49 ("bfq: Limit waker detection in time")
76f1df88bbc2 ("bfq: Limit number of requests consumed by each cgroup")
44dfa279f117 ("bfq: Store full bitmap depth in bfq_data")
ae0f1a732f4a ("blk-mq: Stop using pointers for blk_mq_tags bitmap tags")
e155b0c238b2 ("blk-mq: Use shared tags for shared sbitmap support")
645db34e5050 ("blk-mq: Refactor and rename blk_mq_free_map_and_{requests->rqs}()")
63064be150e4 ("blk-mq: Add blk_mq_alloc_map_and_rqs()")
a7e7388dced4 ("blk-mq: Add blk_mq_tag_update_sched_shared_sbitmap()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 73aeab373557fa6ee4ae0b742c6211ccd9859280 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3(a)huawei.com>
Date: Mon, 9 Sep 2024 21:41:49 +0800
Subject: [PATCH] block, bfq: fix procress reference leakage for bfqq in merge
chain
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Original state:
Process 1 Process 2 Process 3 Process 4
(BIC1) (BIC2) (BIC3) (BIC4)
Λ | | |
\--------------\ \-------------\ \-------------\|
V V V
bfqq1--------->bfqq2---------->bfqq3----------->bfqq4
ref 0 1 2 4
After commit 0e456dba86c7 ("block, bfq: choose the last bfqq from merge
chain in bfq_setup_cooperator()"), if P1 issues a new IO:
Without the patch:
Process 1 Process 2 Process 3 Process 4
(BIC1) (BIC2) (BIC3) (BIC4)
Λ | | |
\------------------------------\ \-------------\|
V V
bfqq1--------->bfqq2---------->bfqq3----------->bfqq4
ref 0 0 2 4
bfqq3 will be used to handle IO from P1, this is not expected, IO
should be redirected to bfqq4;
With the patch:
-------------------------------------------
| |
Process 1 Process 2 Process 3 | Process 4
(BIC1) (BIC2) (BIC3) | (BIC4)
| | | |
\-------------\ \-------------\|
V V
bfqq1--------->bfqq2---------->bfqq3----------->bfqq4
ref 0 0 2 4
IO is redirected to bfqq4, however, procress reference of bfqq3 is still
2, while there is only P2 using it.
Fix the problem by calling bfq_merge_bfqqs() for each bfqq in the merge
chain. Also change bfqq_merge_bfqqs() to return new_bfqq to simplify
code.
Fixes: 0e456dba86c7 ("block, bfq: choose the last bfqq from merge chain in bfq_setup_cooperator()")
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Link: https://lore.kernel.org/r/20240909134154.954924-3-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index d5d39974c674..f4192d5411d2 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -3129,10 +3129,12 @@ void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq)
bfq_put_queue(bfqq);
}
-static void
-bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
- struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
+static struct bfq_queue *bfq_merge_bfqqs(struct bfq_data *bfqd,
+ struct bfq_io_cq *bic,
+ struct bfq_queue *bfqq)
{
+ struct bfq_queue *new_bfqq = bfqq->new_bfqq;
+
bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",
(unsigned long)new_bfqq->pid);
/* Save weight raising and idle window of the merged queues */
@@ -3226,6 +3228,8 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
bfq_reassign_last_bfqq(bfqq, new_bfqq);
bfq_release_process_ref(bfqd, bfqq);
+
+ return new_bfqq;
}
static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq,
@@ -3261,14 +3265,8 @@ static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq,
* fulfilled, i.e., bic can be redirected to new_bfqq
* and bfqq can be put.
*/
- bfq_merge_bfqqs(bfqd, bfqd->bio_bic, bfqq,
- new_bfqq);
- /*
- * If we get here, bio will be queued into new_queue,
- * so use new_bfqq to decide whether bio and rq can be
- * merged.
- */
- bfqq = new_bfqq;
+ while (bfqq != new_bfqq)
+ bfqq = bfq_merge_bfqqs(bfqd, bfqd->bio_bic, bfqq);
/*
* Change also bqfd->bio_bfqq, as
@@ -5705,9 +5703,7 @@ bfq_do_early_stable_merge(struct bfq_data *bfqd, struct bfq_queue *bfqq,
* state before killing it.
*/
bfqq->bic = bic;
- bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq);
-
- return new_bfqq;
+ return bfq_merge_bfqqs(bfqd, bic, bfqq);
}
/*
@@ -6162,6 +6158,7 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
bool waiting, idle_timer_disabled = false;
if (new_bfqq) {
+ struct bfq_queue *old_bfqq = bfqq;
/*
* Release the request's reference to the old bfqq
* and make sure one is taken to the shared queue.
@@ -6178,18 +6175,18 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
* new_bfqq.
*/
if (bic_to_bfqq(RQ_BIC(rq), true,
- bfq_actuator_index(bfqd, rq->bio)) == bfqq)
- bfq_merge_bfqqs(bfqd, RQ_BIC(rq),
- bfqq, new_bfqq);
+ bfq_actuator_index(bfqd, rq->bio)) == bfqq) {
+ while (bfqq != new_bfqq)
+ bfqq = bfq_merge_bfqqs(bfqd, RQ_BIC(rq), bfqq);
+ }
- bfq_clear_bfqq_just_created(bfqq);
+ bfq_clear_bfqq_just_created(old_bfqq);
/*
* rq is about to be enqueued into new_bfqq,
* release rq reference on bfqq
*/
- bfq_put_queue(bfqq);
+ bfq_put_queue(old_bfqq);
rq->elv.priv[1] = new_bfqq;
- bfqq = new_bfqq;
}
bfq_update_io_thinktime(bfqd, bfqq);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 73aeab373557fa6ee4ae0b742c6211ccd9859280
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100229-lazy-atonable-fd03@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
73aeab373557 ("block, bfq: fix procress reference leakage for bfqq in merge chain")
9778369a2d6c ("block, bfq: split sync bfq_queues on a per-actuator basis")
246cf66e300b ("block, bfq: fix uaf for bfqq in bfq_exit_icq_bfqq")
337366e02b37 ("block, bfq: replace 0/1 with false/true in bic apis")
64dc8c732f5c ("block, bfq: fix possible uaf for 'bfqq->bic'")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 73aeab373557fa6ee4ae0b742c6211ccd9859280 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3(a)huawei.com>
Date: Mon, 9 Sep 2024 21:41:49 +0800
Subject: [PATCH] block, bfq: fix procress reference leakage for bfqq in merge
chain
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Original state:
Process 1 Process 2 Process 3 Process 4
(BIC1) (BIC2) (BIC3) (BIC4)
Λ | | |
\--------------\ \-------------\ \-------------\|
V V V
bfqq1--------->bfqq2---------->bfqq3----------->bfqq4
ref 0 1 2 4
After commit 0e456dba86c7 ("block, bfq: choose the last bfqq from merge
chain in bfq_setup_cooperator()"), if P1 issues a new IO:
Without the patch:
Process 1 Process 2 Process 3 Process 4
(BIC1) (BIC2) (BIC3) (BIC4)
Λ | | |
\------------------------------\ \-------------\|
V V
bfqq1--------->bfqq2---------->bfqq3----------->bfqq4
ref 0 0 2 4
bfqq3 will be used to handle IO from P1, this is not expected, IO
should be redirected to bfqq4;
With the patch:
-------------------------------------------
| |
Process 1 Process 2 Process 3 | Process 4
(BIC1) (BIC2) (BIC3) | (BIC4)
| | | |
\-------------\ \-------------\|
V V
bfqq1--------->bfqq2---------->bfqq3----------->bfqq4
ref 0 0 2 4
IO is redirected to bfqq4, however, procress reference of bfqq3 is still
2, while there is only P2 using it.
Fix the problem by calling bfq_merge_bfqqs() for each bfqq in the merge
chain. Also change bfqq_merge_bfqqs() to return new_bfqq to simplify
code.
Fixes: 0e456dba86c7 ("block, bfq: choose the last bfqq from merge chain in bfq_setup_cooperator()")
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Link: https://lore.kernel.org/r/20240909134154.954924-3-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index d5d39974c674..f4192d5411d2 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -3129,10 +3129,12 @@ void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq)
bfq_put_queue(bfqq);
}
-static void
-bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
- struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
+static struct bfq_queue *bfq_merge_bfqqs(struct bfq_data *bfqd,
+ struct bfq_io_cq *bic,
+ struct bfq_queue *bfqq)
{
+ struct bfq_queue *new_bfqq = bfqq->new_bfqq;
+
bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",
(unsigned long)new_bfqq->pid);
/* Save weight raising and idle window of the merged queues */
@@ -3226,6 +3228,8 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
bfq_reassign_last_bfqq(bfqq, new_bfqq);
bfq_release_process_ref(bfqd, bfqq);
+
+ return new_bfqq;
}
static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq,
@@ -3261,14 +3265,8 @@ static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq,
* fulfilled, i.e., bic can be redirected to new_bfqq
* and bfqq can be put.
*/
- bfq_merge_bfqqs(bfqd, bfqd->bio_bic, bfqq,
- new_bfqq);
- /*
- * If we get here, bio will be queued into new_queue,
- * so use new_bfqq to decide whether bio and rq can be
- * merged.
- */
- bfqq = new_bfqq;
+ while (bfqq != new_bfqq)
+ bfqq = bfq_merge_bfqqs(bfqd, bfqd->bio_bic, bfqq);
/*
* Change also bqfd->bio_bfqq, as
@@ -5705,9 +5703,7 @@ bfq_do_early_stable_merge(struct bfq_data *bfqd, struct bfq_queue *bfqq,
* state before killing it.
*/
bfqq->bic = bic;
- bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq);
-
- return new_bfqq;
+ return bfq_merge_bfqqs(bfqd, bic, bfqq);
}
/*
@@ -6162,6 +6158,7 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
bool waiting, idle_timer_disabled = false;
if (new_bfqq) {
+ struct bfq_queue *old_bfqq = bfqq;
/*
* Release the request's reference to the old bfqq
* and make sure one is taken to the shared queue.
@@ -6178,18 +6175,18 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
* new_bfqq.
*/
if (bic_to_bfqq(RQ_BIC(rq), true,
- bfq_actuator_index(bfqd, rq->bio)) == bfqq)
- bfq_merge_bfqqs(bfqd, RQ_BIC(rq),
- bfqq, new_bfqq);
+ bfq_actuator_index(bfqd, rq->bio)) == bfqq) {
+ while (bfqq != new_bfqq)
+ bfqq = bfq_merge_bfqqs(bfqd, RQ_BIC(rq), bfqq);
+ }
- bfq_clear_bfqq_just_created(bfqq);
+ bfq_clear_bfqq_just_created(old_bfqq);
/*
* rq is about to be enqueued into new_bfqq,
* release rq reference on bfqq
*/
- bfq_put_queue(bfqq);
+ bfq_put_queue(old_bfqq);
rq->elv.priv[1] = new_bfqq;
- bfqq = new_bfqq;
}
bfq_update_io_thinktime(bfqd, bfqq);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 1ba0403ac6447f2d63914fb760c44a3b19c44eaf
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100223-gutter-exchange-0c2b@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
1ba0403ac644 ("block, bfq: fix uaf for accessing waker_bfqq after splitting")
fd571df0ac5b ("block, bfq: turn bfqq_data into an array in bfq_io_cq")
a61230470c8c ("block, bfq: move io_cq-persistent bfqq data into a dedicated struct")
9778369a2d6c ("block, bfq: split sync bfq_queues on a per-actuator basis")
246cf66e300b ("block, bfq: fix uaf for bfqq in bfq_exit_icq_bfqq")
337366e02b37 ("block, bfq: replace 0/1 with false/true in bic apis")
64dc8c732f5c ("block, bfq: fix possible uaf for 'bfqq->bic'")
a1795c2ccb1e ("bfq: fix waker_bfqq inconsistency crash")
dc469ba2e790 ("block/bfq: Use the new blk_opf_t type")
f950667356ce ("bfq: Relax waker detection for shared queues")
4e54a2493e58 ("bfq: Get rid of __bio_blkcg() usage")
09f871868080 ("bfq: Track whether bfq_group is still online")
ea591cd4eb27 ("bfq: Update cgroup information before merging bio")
3bc5e683c67d ("bfq: Split shared queues on move between cgroups")
70456e5210f4 ("bfq: Avoid false marking of bic as stably merged")
f6bad159f5d5 ("block/bfq-iosched.c: use "false" rather than "BLK_RW_ASYNC"")
a0725c22cd84 ("bfq: use bfq_bic_lookup in bfq_limit_depth")
1f18b7005b49 ("bfq: Limit waker detection in time")
76f1df88bbc2 ("bfq: Limit number of requests consumed by each cgroup")
44dfa279f117 ("bfq: Store full bitmap depth in bfq_data")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1ba0403ac6447f2d63914fb760c44a3b19c44eaf Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3(a)huawei.com>
Date: Mon, 9 Sep 2024 21:41:48 +0800
Subject: [PATCH] block, bfq: fix uaf for accessing waker_bfqq after splitting
After commit 42c306ed7233 ("block, bfq: don't break merge chain in
bfq_split_bfqq()"), if the current procress is the last holder of bfqq,
the bfqq can be freed after bfq_split_bfqq(). Hence recored the bfqq and
then access bfqq->waker_bfqq may trigger UAF. What's more, the waker_bfqq
may in the merge chain of bfqq, hence just recored waker_bfqq is still
not safe.
Fix the problem by adding a helper bfq_waker_bfqq() to check if
bfqq->waker_bfqq is in the merge chain, and current procress is the only
holder.
Fixes: 42c306ed7233 ("block, bfq: don't break merge chain in bfq_split_bfqq()")
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Link: https://lore.kernel.org/r/20240909134154.954924-2-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index d1bf2b8a3576..d5d39974c674 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -6825,6 +6825,31 @@ static void bfq_prepare_request(struct request *rq)
rq->elv.priv[0] = rq->elv.priv[1] = NULL;
}
+static struct bfq_queue *bfq_waker_bfqq(struct bfq_queue *bfqq)
+{
+ struct bfq_queue *new_bfqq = bfqq->new_bfqq;
+ struct bfq_queue *waker_bfqq = bfqq->waker_bfqq;
+
+ if (!waker_bfqq)
+ return NULL;
+
+ while (new_bfqq) {
+ if (new_bfqq == waker_bfqq) {
+ /*
+ * If waker_bfqq is in the merge chain, and current
+ * is the only procress.
+ */
+ if (bfqq_process_refs(waker_bfqq) == 1)
+ return NULL;
+ break;
+ }
+
+ new_bfqq = new_bfqq->new_bfqq;
+ }
+
+ return waker_bfqq;
+}
+
/*
* If needed, init rq, allocate bfq data structures associated with
* rq, and increment reference counters in the destination bfq_queue
@@ -6886,7 +6911,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
/* If the queue was seeky for too long, break it apart. */
if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq) &&
!bic->bfqq_data[a_idx].stably_merged) {
- struct bfq_queue *old_bfqq = bfqq;
+ struct bfq_queue *waker_bfqq = bfq_waker_bfqq(bfqq);
/* Update bic before losing reference to bfqq */
if (bfq_bfqq_in_large_burst(bfqq))
@@ -6906,7 +6931,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
bfqq_already_existing = true;
if (!bfqq_already_existing) {
- bfqq->waker_bfqq = old_bfqq->waker_bfqq;
+ bfqq->waker_bfqq = waker_bfqq;
bfqq->tentative_waker_bfqq = NULL;
/*
@@ -6916,7 +6941,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
* woken_list of the waker. See
* bfq_check_waker for details.
*/
- if (bfqq->waker_bfqq)
+ if (waker_bfqq)
hlist_add_head(&bfqq->woken_list_node,
&bfqq->waker_bfqq->woken_list);
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 1ba0403ac6447f2d63914fb760c44a3b19c44eaf
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100222-aliens-ergonomic-4ce2@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
1ba0403ac644 ("block, bfq: fix uaf for accessing waker_bfqq after splitting")
fd571df0ac5b ("block, bfq: turn bfqq_data into an array in bfq_io_cq")
a61230470c8c ("block, bfq: move io_cq-persistent bfqq data into a dedicated struct")
9778369a2d6c ("block, bfq: split sync bfq_queues on a per-actuator basis")
246cf66e300b ("block, bfq: fix uaf for bfqq in bfq_exit_icq_bfqq")
337366e02b37 ("block, bfq: replace 0/1 with false/true in bic apis")
64dc8c732f5c ("block, bfq: fix possible uaf for 'bfqq->bic'")
a1795c2ccb1e ("bfq: fix waker_bfqq inconsistency crash")
dc469ba2e790 ("block/bfq: Use the new blk_opf_t type")
f950667356ce ("bfq: Relax waker detection for shared queues")
4e54a2493e58 ("bfq: Get rid of __bio_blkcg() usage")
09f871868080 ("bfq: Track whether bfq_group is still online")
ea591cd4eb27 ("bfq: Update cgroup information before merging bio")
3bc5e683c67d ("bfq: Split shared queues on move between cgroups")
70456e5210f4 ("bfq: Avoid false marking of bic as stably merged")
f6bad159f5d5 ("block/bfq-iosched.c: use "false" rather than "BLK_RW_ASYNC"")
a0725c22cd84 ("bfq: use bfq_bic_lookup in bfq_limit_depth")
1f18b7005b49 ("bfq: Limit waker detection in time")
76f1df88bbc2 ("bfq: Limit number of requests consumed by each cgroup")
44dfa279f117 ("bfq: Store full bitmap depth in bfq_data")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1ba0403ac6447f2d63914fb760c44a3b19c44eaf Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3(a)huawei.com>
Date: Mon, 9 Sep 2024 21:41:48 +0800
Subject: [PATCH] block, bfq: fix uaf for accessing waker_bfqq after splitting
After commit 42c306ed7233 ("block, bfq: don't break merge chain in
bfq_split_bfqq()"), if the current procress is the last holder of bfqq,
the bfqq can be freed after bfq_split_bfqq(). Hence recored the bfqq and
then access bfqq->waker_bfqq may trigger UAF. What's more, the waker_bfqq
may in the merge chain of bfqq, hence just recored waker_bfqq is still
not safe.
Fix the problem by adding a helper bfq_waker_bfqq() to check if
bfqq->waker_bfqq is in the merge chain, and current procress is the only
holder.
Fixes: 42c306ed7233 ("block, bfq: don't break merge chain in bfq_split_bfqq()")
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Link: https://lore.kernel.org/r/20240909134154.954924-2-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index d1bf2b8a3576..d5d39974c674 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -6825,6 +6825,31 @@ static void bfq_prepare_request(struct request *rq)
rq->elv.priv[0] = rq->elv.priv[1] = NULL;
}
+static struct bfq_queue *bfq_waker_bfqq(struct bfq_queue *bfqq)
+{
+ struct bfq_queue *new_bfqq = bfqq->new_bfqq;
+ struct bfq_queue *waker_bfqq = bfqq->waker_bfqq;
+
+ if (!waker_bfqq)
+ return NULL;
+
+ while (new_bfqq) {
+ if (new_bfqq == waker_bfqq) {
+ /*
+ * If waker_bfqq is in the merge chain, and current
+ * is the only procress.
+ */
+ if (bfqq_process_refs(waker_bfqq) == 1)
+ return NULL;
+ break;
+ }
+
+ new_bfqq = new_bfqq->new_bfqq;
+ }
+
+ return waker_bfqq;
+}
+
/*
* If needed, init rq, allocate bfq data structures associated with
* rq, and increment reference counters in the destination bfq_queue
@@ -6886,7 +6911,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
/* If the queue was seeky for too long, break it apart. */
if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq) &&
!bic->bfqq_data[a_idx].stably_merged) {
- struct bfq_queue *old_bfqq = bfqq;
+ struct bfq_queue *waker_bfqq = bfq_waker_bfqq(bfqq);
/* Update bic before losing reference to bfqq */
if (bfq_bfqq_in_large_burst(bfqq))
@@ -6906,7 +6931,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
bfqq_already_existing = true;
if (!bfqq_already_existing) {
- bfqq->waker_bfqq = old_bfqq->waker_bfqq;
+ bfqq->waker_bfqq = waker_bfqq;
bfqq->tentative_waker_bfqq = NULL;
/*
@@ -6916,7 +6941,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
* woken_list of the waker. See
* bfq_check_waker for details.
*/
- if (bfqq->waker_bfqq)
+ if (waker_bfqq)
hlist_add_head(&bfqq->woken_list_node,
&bfqq->waker_bfqq->woken_list);
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 1ba0403ac6447f2d63914fb760c44a3b19c44eaf
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100221-radar-unethical-0eaa@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
1ba0403ac644 ("block, bfq: fix uaf for accessing waker_bfqq after splitting")
fd571df0ac5b ("block, bfq: turn bfqq_data into an array in bfq_io_cq")
a61230470c8c ("block, bfq: move io_cq-persistent bfqq data into a dedicated struct")
9778369a2d6c ("block, bfq: split sync bfq_queues on a per-actuator basis")
246cf66e300b ("block, bfq: fix uaf for bfqq in bfq_exit_icq_bfqq")
337366e02b37 ("block, bfq: replace 0/1 with false/true in bic apis")
64dc8c732f5c ("block, bfq: fix possible uaf for 'bfqq->bic'")
a1795c2ccb1e ("bfq: fix waker_bfqq inconsistency crash")
dc469ba2e790 ("block/bfq: Use the new blk_opf_t type")
f950667356ce ("bfq: Relax waker detection for shared queues")
4e54a2493e58 ("bfq: Get rid of __bio_blkcg() usage")
09f871868080 ("bfq: Track whether bfq_group is still online")
ea591cd4eb27 ("bfq: Update cgroup information before merging bio")
3bc5e683c67d ("bfq: Split shared queues on move between cgroups")
70456e5210f4 ("bfq: Avoid false marking of bic as stably merged")
f6bad159f5d5 ("block/bfq-iosched.c: use "false" rather than "BLK_RW_ASYNC"")
a0725c22cd84 ("bfq: use bfq_bic_lookup in bfq_limit_depth")
1f18b7005b49 ("bfq: Limit waker detection in time")
76f1df88bbc2 ("bfq: Limit number of requests consumed by each cgroup")
44dfa279f117 ("bfq: Store full bitmap depth in bfq_data")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1ba0403ac6447f2d63914fb760c44a3b19c44eaf Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3(a)huawei.com>
Date: Mon, 9 Sep 2024 21:41:48 +0800
Subject: [PATCH] block, bfq: fix uaf for accessing waker_bfqq after splitting
After commit 42c306ed7233 ("block, bfq: don't break merge chain in
bfq_split_bfqq()"), if the current procress is the last holder of bfqq,
the bfqq can be freed after bfq_split_bfqq(). Hence recored the bfqq and
then access bfqq->waker_bfqq may trigger UAF. What's more, the waker_bfqq
may in the merge chain of bfqq, hence just recored waker_bfqq is still
not safe.
Fix the problem by adding a helper bfq_waker_bfqq() to check if
bfqq->waker_bfqq is in the merge chain, and current procress is the only
holder.
Fixes: 42c306ed7233 ("block, bfq: don't break merge chain in bfq_split_bfqq()")
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Link: https://lore.kernel.org/r/20240909134154.954924-2-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index d1bf2b8a3576..d5d39974c674 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -6825,6 +6825,31 @@ static void bfq_prepare_request(struct request *rq)
rq->elv.priv[0] = rq->elv.priv[1] = NULL;
}
+static struct bfq_queue *bfq_waker_bfqq(struct bfq_queue *bfqq)
+{
+ struct bfq_queue *new_bfqq = bfqq->new_bfqq;
+ struct bfq_queue *waker_bfqq = bfqq->waker_bfqq;
+
+ if (!waker_bfqq)
+ return NULL;
+
+ while (new_bfqq) {
+ if (new_bfqq == waker_bfqq) {
+ /*
+ * If waker_bfqq is in the merge chain, and current
+ * is the only procress.
+ */
+ if (bfqq_process_refs(waker_bfqq) == 1)
+ return NULL;
+ break;
+ }
+
+ new_bfqq = new_bfqq->new_bfqq;
+ }
+
+ return waker_bfqq;
+}
+
/*
* If needed, init rq, allocate bfq data structures associated with
* rq, and increment reference counters in the destination bfq_queue
@@ -6886,7 +6911,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
/* If the queue was seeky for too long, break it apart. */
if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq) &&
!bic->bfqq_data[a_idx].stably_merged) {
- struct bfq_queue *old_bfqq = bfqq;
+ struct bfq_queue *waker_bfqq = bfq_waker_bfqq(bfqq);
/* Update bic before losing reference to bfqq */
if (bfq_bfqq_in_large_burst(bfqq))
@@ -6906,7 +6931,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
bfqq_already_existing = true;
if (!bfqq_already_existing) {
- bfqq->waker_bfqq = old_bfqq->waker_bfqq;
+ bfqq->waker_bfqq = waker_bfqq;
bfqq->tentative_waker_bfqq = NULL;
/*
@@ -6916,7 +6941,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
* woken_list of the waker. See
* bfq_check_waker for details.
*/
- if (bfqq->waker_bfqq)
+ if (waker_bfqq)
hlist_add_head(&bfqq->woken_list_node,
&bfqq->waker_bfqq->woken_list);
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 1ba0403ac6447f2d63914fb760c44a3b19c44eaf
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100221-pang-residue-68ad@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
1ba0403ac644 ("block, bfq: fix uaf for accessing waker_bfqq after splitting")
fd571df0ac5b ("block, bfq: turn bfqq_data into an array in bfq_io_cq")
a61230470c8c ("block, bfq: move io_cq-persistent bfqq data into a dedicated struct")
9778369a2d6c ("block, bfq: split sync bfq_queues on a per-actuator basis")
246cf66e300b ("block, bfq: fix uaf for bfqq in bfq_exit_icq_bfqq")
337366e02b37 ("block, bfq: replace 0/1 with false/true in bic apis")
64dc8c732f5c ("block, bfq: fix possible uaf for 'bfqq->bic'")
a1795c2ccb1e ("bfq: fix waker_bfqq inconsistency crash")
dc469ba2e790 ("block/bfq: Use the new blk_opf_t type")
f950667356ce ("bfq: Relax waker detection for shared queues")
4e54a2493e58 ("bfq: Get rid of __bio_blkcg() usage")
09f871868080 ("bfq: Track whether bfq_group is still online")
ea591cd4eb27 ("bfq: Update cgroup information before merging bio")
3bc5e683c67d ("bfq: Split shared queues on move between cgroups")
70456e5210f4 ("bfq: Avoid false marking of bic as stably merged")
f6bad159f5d5 ("block/bfq-iosched.c: use "false" rather than "BLK_RW_ASYNC"")
a0725c22cd84 ("bfq: use bfq_bic_lookup in bfq_limit_depth")
1f18b7005b49 ("bfq: Limit waker detection in time")
76f1df88bbc2 ("bfq: Limit number of requests consumed by each cgroup")
44dfa279f117 ("bfq: Store full bitmap depth in bfq_data")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1ba0403ac6447f2d63914fb760c44a3b19c44eaf Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3(a)huawei.com>
Date: Mon, 9 Sep 2024 21:41:48 +0800
Subject: [PATCH] block, bfq: fix uaf for accessing waker_bfqq after splitting
After commit 42c306ed7233 ("block, bfq: don't break merge chain in
bfq_split_bfqq()"), if the current procress is the last holder of bfqq,
the bfqq can be freed after bfq_split_bfqq(). Hence recored the bfqq and
then access bfqq->waker_bfqq may trigger UAF. What's more, the waker_bfqq
may in the merge chain of bfqq, hence just recored waker_bfqq is still
not safe.
Fix the problem by adding a helper bfq_waker_bfqq() to check if
bfqq->waker_bfqq is in the merge chain, and current procress is the only
holder.
Fixes: 42c306ed7233 ("block, bfq: don't break merge chain in bfq_split_bfqq()")
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Link: https://lore.kernel.org/r/20240909134154.954924-2-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index d1bf2b8a3576..d5d39974c674 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -6825,6 +6825,31 @@ static void bfq_prepare_request(struct request *rq)
rq->elv.priv[0] = rq->elv.priv[1] = NULL;
}
+static struct bfq_queue *bfq_waker_bfqq(struct bfq_queue *bfqq)
+{
+ struct bfq_queue *new_bfqq = bfqq->new_bfqq;
+ struct bfq_queue *waker_bfqq = bfqq->waker_bfqq;
+
+ if (!waker_bfqq)
+ return NULL;
+
+ while (new_bfqq) {
+ if (new_bfqq == waker_bfqq) {
+ /*
+ * If waker_bfqq is in the merge chain, and current
+ * is the only procress.
+ */
+ if (bfqq_process_refs(waker_bfqq) == 1)
+ return NULL;
+ break;
+ }
+
+ new_bfqq = new_bfqq->new_bfqq;
+ }
+
+ return waker_bfqq;
+}
+
/*
* If needed, init rq, allocate bfq data structures associated with
* rq, and increment reference counters in the destination bfq_queue
@@ -6886,7 +6911,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
/* If the queue was seeky for too long, break it apart. */
if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq) &&
!bic->bfqq_data[a_idx].stably_merged) {
- struct bfq_queue *old_bfqq = bfqq;
+ struct bfq_queue *waker_bfqq = bfq_waker_bfqq(bfqq);
/* Update bic before losing reference to bfqq */
if (bfq_bfqq_in_large_burst(bfqq))
@@ -6906,7 +6931,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
bfqq_already_existing = true;
if (!bfqq_already_existing) {
- bfqq->waker_bfqq = old_bfqq->waker_bfqq;
+ bfqq->waker_bfqq = waker_bfqq;
bfqq->tentative_waker_bfqq = NULL;
/*
@@ -6916,7 +6941,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
* woken_list of the waker. See
* bfq_check_waker for details.
*/
- if (bfqq->waker_bfqq)
+ if (waker_bfqq)
hlist_add_head(&bfqq->woken_list_node,
&bfqq->waker_bfqq->woken_list);
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 1ba0403ac6447f2d63914fb760c44a3b19c44eaf
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100220-swagger-freehand-6d91@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
1ba0403ac644 ("block, bfq: fix uaf for accessing waker_bfqq after splitting")
fd571df0ac5b ("block, bfq: turn bfqq_data into an array in bfq_io_cq")
a61230470c8c ("block, bfq: move io_cq-persistent bfqq data into a dedicated struct")
9778369a2d6c ("block, bfq: split sync bfq_queues on a per-actuator basis")
246cf66e300b ("block, bfq: fix uaf for bfqq in bfq_exit_icq_bfqq")
337366e02b37 ("block, bfq: replace 0/1 with false/true in bic apis")
64dc8c732f5c ("block, bfq: fix possible uaf for 'bfqq->bic'")
a1795c2ccb1e ("bfq: fix waker_bfqq inconsistency crash")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1ba0403ac6447f2d63914fb760c44a3b19c44eaf Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3(a)huawei.com>
Date: Mon, 9 Sep 2024 21:41:48 +0800
Subject: [PATCH] block, bfq: fix uaf for accessing waker_bfqq after splitting
After commit 42c306ed7233 ("block, bfq: don't break merge chain in
bfq_split_bfqq()"), if the current procress is the last holder of bfqq,
the bfqq can be freed after bfq_split_bfqq(). Hence recored the bfqq and
then access bfqq->waker_bfqq may trigger UAF. What's more, the waker_bfqq
may in the merge chain of bfqq, hence just recored waker_bfqq is still
not safe.
Fix the problem by adding a helper bfq_waker_bfqq() to check if
bfqq->waker_bfqq is in the merge chain, and current procress is the only
holder.
Fixes: 42c306ed7233 ("block, bfq: don't break merge chain in bfq_split_bfqq()")
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Link: https://lore.kernel.org/r/20240909134154.954924-2-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index d1bf2b8a3576..d5d39974c674 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -6825,6 +6825,31 @@ static void bfq_prepare_request(struct request *rq)
rq->elv.priv[0] = rq->elv.priv[1] = NULL;
}
+static struct bfq_queue *bfq_waker_bfqq(struct bfq_queue *bfqq)
+{
+ struct bfq_queue *new_bfqq = bfqq->new_bfqq;
+ struct bfq_queue *waker_bfqq = bfqq->waker_bfqq;
+
+ if (!waker_bfqq)
+ return NULL;
+
+ while (new_bfqq) {
+ if (new_bfqq == waker_bfqq) {
+ /*
+ * If waker_bfqq is in the merge chain, and current
+ * is the only procress.
+ */
+ if (bfqq_process_refs(waker_bfqq) == 1)
+ return NULL;
+ break;
+ }
+
+ new_bfqq = new_bfqq->new_bfqq;
+ }
+
+ return waker_bfqq;
+}
+
/*
* If needed, init rq, allocate bfq data structures associated with
* rq, and increment reference counters in the destination bfq_queue
@@ -6886,7 +6911,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
/* If the queue was seeky for too long, break it apart. */
if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq) &&
!bic->bfqq_data[a_idx].stably_merged) {
- struct bfq_queue *old_bfqq = bfqq;
+ struct bfq_queue *waker_bfqq = bfq_waker_bfqq(bfqq);
/* Update bic before losing reference to bfqq */
if (bfq_bfqq_in_large_burst(bfqq))
@@ -6906,7 +6931,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
bfqq_already_existing = true;
if (!bfqq_already_existing) {
- bfqq->waker_bfqq = old_bfqq->waker_bfqq;
+ bfqq->waker_bfqq = waker_bfqq;
bfqq->tentative_waker_bfqq = NULL;
/*
@@ -6916,7 +6941,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
* woken_list of the waker. See
* bfq_check_waker for details.
*/
- if (bfqq->waker_bfqq)
+ if (waker_bfqq)
hlist_add_head(&bfqq->woken_list_node,
&bfqq->waker_bfqq->woken_list);
}
The patch below does not apply to the 6.11-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.11.y
git checkout FETCH_HEAD
git cherry-pick -x 5fe6e308abaea082c20fbf2aa5df8e14495622cf
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100247-spray-enjoyable-b1d0@gregkh' --subject-prefix 'PATCH 6.11.y' HEAD^..
Possible dependencies:
5fe6e308abae ("bpf: Fix use-after-free in bpf_uprobe_multi_link_attach()")
3c83a9ad0295 ("uprobes: make uprobe_register() return struct uprobe *")
e04332ebc8ac ("uprobes: kill uprobe_register_refctr()")
db61e6a4eee5 ("selftests/bpf: fix uprobe.path leak in bpf_testmod")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5fe6e308abaea082c20fbf2aa5df8e14495622cf Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg(a)redhat.com>
Date: Tue, 13 Aug 2024 17:25:24 +0200
Subject: [PATCH] bpf: Fix use-after-free in bpf_uprobe_multi_link_attach()
If bpf_link_prime() fails, bpf_uprobe_multi_link_attach() goes to the
error_free label and frees the array of bpf_uprobe's without calling
bpf_uprobe_unregister().
This leaks bpf_uprobe->uprobe and worse, this frees bpf_uprobe->consumer
without removing it from the uprobe->consumers list.
Fixes: 89ae89f53d20 ("bpf: Add multi uprobe link")
Closes: https://lore.kernel.org/all/000000000000382d39061f59f2dd@google.com/
Reported-by: syzbot+f7a1c2c2711e4a780f19(a)syzkaller.appspotmail.com
Signed-off-by: Oleg Nesterov <oleg(a)redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Acked-by: Andrii Nakryiko <andrii(a)kernel.org>
Acked-by: Jiri Olsa <jolsa(a)kernel.org>
Tested-by: syzbot+f7a1c2c2711e4a780f19(a)syzkaller.appspotmail.com
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20240813152524.GA7292@redhat.com
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 4e391daafa64..90cd30e9723e 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -3484,17 +3484,20 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
&uprobes[i].consumer);
if (IS_ERR(uprobes[i].uprobe)) {
err = PTR_ERR(uprobes[i].uprobe);
- bpf_uprobe_unregister(uprobes, i);
- goto error_free;
+ link->cnt = i;
+ goto error_unregister;
}
}
err = bpf_link_prime(&link->link, &link_primer);
if (err)
- goto error_free;
+ goto error_unregister;
return bpf_link_settle(&link_primer);
+error_unregister:
+ bpf_uprobe_unregister(uprobes, link->cnt);
+
error_free:
kvfree(uprobes);
kfree(link);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x e6a3531dd542cb127c8de32ab1e54a48ae19962b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100250-mortuary-cogwheel-4144@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
e6a3531dd542 ("dm-verity: restart or panic on an I/O error")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e6a3531dd542cb127c8de32ab1e54a48ae19962b Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka(a)redhat.com>
Date: Tue, 24 Sep 2024 15:18:29 +0200
Subject: [PATCH] dm-verity: restart or panic on an I/O error
Maxim Suhanov reported that dm-verity doesn't crash if an I/O error
happens. In theory, this could be used to subvert security, because an
attacker can create sectors that return error with the Write Uncorrectable
command. Some programs may misbehave if they have to deal with EIO.
This commit fixes dm-verity, so that if "panic_on_corruption" or
"restart_on_corruption" was specified and an I/O error happens, the
machine will panic or restart.
This commit also changes kernel_restart to emergency_restart -
kernel_restart calls reboot notifiers and these reboot notifiers may wait
for the bio that failed. emergency_restart doesn't call the notifiers.
Reported-by: Maxim Suhanov <dfirblog(a)gmail.com>
Signed-off-by: Mikulas Patocka <mpatocka(a)redhat.com>
Cc: stable(a)vger.kernel.org
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index cf659c8feb29..a95c1b9cc5b5 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -272,8 +272,10 @@ static int verity_handle_err(struct dm_verity *v, enum verity_block_type type,
if (v->mode == DM_VERITY_MODE_LOGGING)
return 0;
- if (v->mode == DM_VERITY_MODE_RESTART)
- kernel_restart("dm-verity device corrupted");
+ if (v->mode == DM_VERITY_MODE_RESTART) {
+ pr_emerg("dm-verity device corrupted\n");
+ emergency_restart();
+ }
if (v->mode == DM_VERITY_MODE_PANIC)
panic("dm-verity device corrupted");
@@ -596,6 +598,23 @@ static void verity_finish_io(struct dm_verity_io *io, blk_status_t status)
if (!static_branch_unlikely(&use_bh_wq_enabled) || !io->in_bh)
verity_fec_finish_io(io);
+ if (unlikely(status != BLK_STS_OK) &&
+ unlikely(!(bio->bi_opf & REQ_RAHEAD)) &&
+ !verity_is_system_shutting_down()) {
+ if (v->mode == DM_VERITY_MODE_RESTART ||
+ v->mode == DM_VERITY_MODE_PANIC)
+ DMERR_LIMIT("%s has error: %s", v->data_dev->name,
+ blk_status_to_str(status));
+
+ if (v->mode == DM_VERITY_MODE_RESTART) {
+ pr_emerg("dm-verity device corrupted\n");
+ emergency_restart();
+ }
+
+ if (v->mode == DM_VERITY_MODE_PANIC)
+ panic("dm-verity device corrupted");
+ }
+
bio_endio(bio);
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x e6a3531dd542cb127c8de32ab1e54a48ae19962b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100249-snorkel-jockey-0dd7@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
e6a3531dd542 ("dm-verity: restart or panic on an I/O error")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e6a3531dd542cb127c8de32ab1e54a48ae19962b Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka(a)redhat.com>
Date: Tue, 24 Sep 2024 15:18:29 +0200
Subject: [PATCH] dm-verity: restart or panic on an I/O error
Maxim Suhanov reported that dm-verity doesn't crash if an I/O error
happens. In theory, this could be used to subvert security, because an
attacker can create sectors that return error with the Write Uncorrectable
command. Some programs may misbehave if they have to deal with EIO.
This commit fixes dm-verity, so that if "panic_on_corruption" or
"restart_on_corruption" was specified and an I/O error happens, the
machine will panic or restart.
This commit also changes kernel_restart to emergency_restart -
kernel_restart calls reboot notifiers and these reboot notifiers may wait
for the bio that failed. emergency_restart doesn't call the notifiers.
Reported-by: Maxim Suhanov <dfirblog(a)gmail.com>
Signed-off-by: Mikulas Patocka <mpatocka(a)redhat.com>
Cc: stable(a)vger.kernel.org
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index cf659c8feb29..a95c1b9cc5b5 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -272,8 +272,10 @@ static int verity_handle_err(struct dm_verity *v, enum verity_block_type type,
if (v->mode == DM_VERITY_MODE_LOGGING)
return 0;
- if (v->mode == DM_VERITY_MODE_RESTART)
- kernel_restart("dm-verity device corrupted");
+ if (v->mode == DM_VERITY_MODE_RESTART) {
+ pr_emerg("dm-verity device corrupted\n");
+ emergency_restart();
+ }
if (v->mode == DM_VERITY_MODE_PANIC)
panic("dm-verity device corrupted");
@@ -596,6 +598,23 @@ static void verity_finish_io(struct dm_verity_io *io, blk_status_t status)
if (!static_branch_unlikely(&use_bh_wq_enabled) || !io->in_bh)
verity_fec_finish_io(io);
+ if (unlikely(status != BLK_STS_OK) &&
+ unlikely(!(bio->bi_opf & REQ_RAHEAD)) &&
+ !verity_is_system_shutting_down()) {
+ if (v->mode == DM_VERITY_MODE_RESTART ||
+ v->mode == DM_VERITY_MODE_PANIC)
+ DMERR_LIMIT("%s has error: %s", v->data_dev->name,
+ blk_status_to_str(status));
+
+ if (v->mode == DM_VERITY_MODE_RESTART) {
+ pr_emerg("dm-verity device corrupted\n");
+ emergency_restart();
+ }
+
+ if (v->mode == DM_VERITY_MODE_PANIC)
+ panic("dm-verity device corrupted");
+ }
+
bio_endio(bio);
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x e6a3531dd542cb127c8de32ab1e54a48ae19962b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100248-blitz-lapdog-06dc@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
e6a3531dd542 ("dm-verity: restart or panic on an I/O error")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e6a3531dd542cb127c8de32ab1e54a48ae19962b Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka(a)redhat.com>
Date: Tue, 24 Sep 2024 15:18:29 +0200
Subject: [PATCH] dm-verity: restart or panic on an I/O error
Maxim Suhanov reported that dm-verity doesn't crash if an I/O error
happens. In theory, this could be used to subvert security, because an
attacker can create sectors that return error with the Write Uncorrectable
command. Some programs may misbehave if they have to deal with EIO.
This commit fixes dm-verity, so that if "panic_on_corruption" or
"restart_on_corruption" was specified and an I/O error happens, the
machine will panic or restart.
This commit also changes kernel_restart to emergency_restart -
kernel_restart calls reboot notifiers and these reboot notifiers may wait
for the bio that failed. emergency_restart doesn't call the notifiers.
Reported-by: Maxim Suhanov <dfirblog(a)gmail.com>
Signed-off-by: Mikulas Patocka <mpatocka(a)redhat.com>
Cc: stable(a)vger.kernel.org
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index cf659c8feb29..a95c1b9cc5b5 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -272,8 +272,10 @@ static int verity_handle_err(struct dm_verity *v, enum verity_block_type type,
if (v->mode == DM_VERITY_MODE_LOGGING)
return 0;
- if (v->mode == DM_VERITY_MODE_RESTART)
- kernel_restart("dm-verity device corrupted");
+ if (v->mode == DM_VERITY_MODE_RESTART) {
+ pr_emerg("dm-verity device corrupted\n");
+ emergency_restart();
+ }
if (v->mode == DM_VERITY_MODE_PANIC)
panic("dm-verity device corrupted");
@@ -596,6 +598,23 @@ static void verity_finish_io(struct dm_verity_io *io, blk_status_t status)
if (!static_branch_unlikely(&use_bh_wq_enabled) || !io->in_bh)
verity_fec_finish_io(io);
+ if (unlikely(status != BLK_STS_OK) &&
+ unlikely(!(bio->bi_opf & REQ_RAHEAD)) &&
+ !verity_is_system_shutting_down()) {
+ if (v->mode == DM_VERITY_MODE_RESTART ||
+ v->mode == DM_VERITY_MODE_PANIC)
+ DMERR_LIMIT("%s has error: %s", v->data_dev->name,
+ blk_status_to_str(status));
+
+ if (v->mode == DM_VERITY_MODE_RESTART) {
+ pr_emerg("dm-verity device corrupted\n");
+ emergency_restart();
+ }
+
+ if (v->mode == DM_VERITY_MODE_PANIC)
+ panic("dm-verity device corrupted");
+ }
+
bio_endio(bio);
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x e6a3531dd542cb127c8de32ab1e54a48ae19962b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100248-cycling-unseemly-7286@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
e6a3531dd542 ("dm-verity: restart or panic on an I/O error")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e6a3531dd542cb127c8de32ab1e54a48ae19962b Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka(a)redhat.com>
Date: Tue, 24 Sep 2024 15:18:29 +0200
Subject: [PATCH] dm-verity: restart or panic on an I/O error
Maxim Suhanov reported that dm-verity doesn't crash if an I/O error
happens. In theory, this could be used to subvert security, because an
attacker can create sectors that return error with the Write Uncorrectable
command. Some programs may misbehave if they have to deal with EIO.
This commit fixes dm-verity, so that if "panic_on_corruption" or
"restart_on_corruption" was specified and an I/O error happens, the
machine will panic or restart.
This commit also changes kernel_restart to emergency_restart -
kernel_restart calls reboot notifiers and these reboot notifiers may wait
for the bio that failed. emergency_restart doesn't call the notifiers.
Reported-by: Maxim Suhanov <dfirblog(a)gmail.com>
Signed-off-by: Mikulas Patocka <mpatocka(a)redhat.com>
Cc: stable(a)vger.kernel.org
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index cf659c8feb29..a95c1b9cc5b5 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -272,8 +272,10 @@ static int verity_handle_err(struct dm_verity *v, enum verity_block_type type,
if (v->mode == DM_VERITY_MODE_LOGGING)
return 0;
- if (v->mode == DM_VERITY_MODE_RESTART)
- kernel_restart("dm-verity device corrupted");
+ if (v->mode == DM_VERITY_MODE_RESTART) {
+ pr_emerg("dm-verity device corrupted\n");
+ emergency_restart();
+ }
if (v->mode == DM_VERITY_MODE_PANIC)
panic("dm-verity device corrupted");
@@ -596,6 +598,23 @@ static void verity_finish_io(struct dm_verity_io *io, blk_status_t status)
if (!static_branch_unlikely(&use_bh_wq_enabled) || !io->in_bh)
verity_fec_finish_io(io);
+ if (unlikely(status != BLK_STS_OK) &&
+ unlikely(!(bio->bi_opf & REQ_RAHEAD)) &&
+ !verity_is_system_shutting_down()) {
+ if (v->mode == DM_VERITY_MODE_RESTART ||
+ v->mode == DM_VERITY_MODE_PANIC)
+ DMERR_LIMIT("%s has error: %s", v->data_dev->name,
+ blk_status_to_str(status));
+
+ if (v->mode == DM_VERITY_MODE_RESTART) {
+ pr_emerg("dm-verity device corrupted\n");
+ emergency_restart();
+ }
+
+ if (v->mode == DM_VERITY_MODE_PANIC)
+ panic("dm-verity device corrupted");
+ }
+
bio_endio(bio);
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 1d4a1adbed2582444aaf97671858b7d12915bd05
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100220-hazard-sharper-437c@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
1d4a1adbed25 ("i2c: xiic: Try re-initialization on bus busy timeout")
ee1691d0ae10 ("i2c: xiic: improve error message when transfer fails to start")
d663d93bb47e ("i2c: xiic: xiic_xfer(): Fix runtime PM leak on error path")
294b29f15469 ("i2c: xiic: Fix RX IRQ busy check")
c119e7d00c91 ("i2c: xiic: Fix broken locking on tx_msg")
9e3b184b3b4f ("i2c: xiic: Support forcing single-master in DT")
9106e45ceaaf ("i2c: xiic: Improve struct memory alignment")
0a9336ee133d ("i2c: xiic: Change code alignment to 1 space only")
b4c119dbc300 ("i2c: xiic: Add timeout to the rx fifo wait loop")
bcc156e2289d ("i2c: xiic: Fix kerneldoc warnings")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1d4a1adbed2582444aaf97671858b7d12915bd05 Mon Sep 17 00:00:00 2001
From: Robert Hancock <robert.hancock(a)calian.com>
Date: Wed, 11 Sep 2024 22:16:53 +0200
Subject: [PATCH] i2c: xiic: Try re-initialization on bus busy timeout
In the event that the I2C bus was powered down when the I2C controller
driver loads, or some spurious pulses occur on the I2C bus, it's
possible that the controller detects a spurious I2C "start" condition.
In this situation it may continue to report the bus is busy indefinitely
and block the controller from working.
The "single-master" DT flag can be specified to disable bus busy checks
entirely, but this may not be safe to use in situations where other I2C
masters may potentially exist.
In the event that the controller reports "bus busy" for too long when
starting a transaction, we can try reinitializing the controller to see
if the busy condition clears. This allows recovering from this scenario.
Fixes: e1d5b6598cdc ("i2c: Add support for Xilinx XPS IIC Bus Interface")
Signed-off-by: Robert Hancock <robert.hancock(a)calian.com>
Cc: <stable(a)vger.kernel.org> # v2.6.34+
Reviewed-by: Manikanta Guntupalli <manikanta.guntupalli(a)amd.com>
Acked-by: Michal Simek <michal.simek(a)amd.com>
Signed-off-by: Andi Shyti <andi.shyti(a)kernel.org>
diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c
index bd6e107472b7..4c89aad02451 100644
--- a/drivers/i2c/busses/i2c-xiic.c
+++ b/drivers/i2c/busses/i2c-xiic.c
@@ -843,23 +843,11 @@ static int xiic_bus_busy(struct xiic_i2c *i2c)
return (sr & XIIC_SR_BUS_BUSY_MASK) ? -EBUSY : 0;
}
-static int xiic_busy(struct xiic_i2c *i2c)
+static int xiic_wait_not_busy(struct xiic_i2c *i2c)
{
int tries = 3;
int err;
- if (i2c->tx_msg || i2c->rx_msg)
- return -EBUSY;
-
- /* In single master mode bus can only be busy, when in use by this
- * driver. If the register indicates bus being busy for some reason we
- * should ignore it, since bus will never be released and i2c will be
- * stuck forever.
- */
- if (i2c->singlemaster) {
- return 0;
- }
-
/* for instance if previous transfer was terminated due to TX error
* it might be that the bus is on it's way to become available
* give it at most 3 ms to wake
@@ -1103,13 +1091,36 @@ static int xiic_start_xfer(struct xiic_i2c *i2c, struct i2c_msg *msgs, int num)
mutex_lock(&i2c->lock);
- ret = xiic_busy(i2c);
- if (ret) {
+ if (i2c->tx_msg || i2c->rx_msg) {
dev_err(i2c->adap.dev.parent,
"cannot start a transfer while busy\n");
+ ret = -EBUSY;
goto out;
}
+ /* In single master mode bus can only be busy, when in use by this
+ * driver. If the register indicates bus being busy for some reason we
+ * should ignore it, since bus will never be released and i2c will be
+ * stuck forever.
+ */
+ if (!i2c->singlemaster) {
+ ret = xiic_wait_not_busy(i2c);
+ if (ret) {
+ /* If the bus is stuck in a busy state, such as due to spurious low
+ * pulses on the bus causing a false start condition to be detected,
+ * then try to recover by re-initializing the controller and check
+ * again if the bus is still busy.
+ */
+ dev_warn(i2c->adap.dev.parent, "I2C bus busy timeout, reinitializing\n");
+ ret = xiic_reinit(i2c);
+ if (ret)
+ goto out;
+ ret = xiic_wait_not_busy(i2c);
+ if (ret)
+ goto out;
+ }
+ }
+
i2c->tx_msg = msgs;
i2c->rx_msg = NULL;
i2c->nmsgs = num;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 1d4a1adbed2582444aaf97671858b7d12915bd05
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100219-purveyor-waffle-a828@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
1d4a1adbed25 ("i2c: xiic: Try re-initialization on bus busy timeout")
ee1691d0ae10 ("i2c: xiic: improve error message when transfer fails to start")
d663d93bb47e ("i2c: xiic: xiic_xfer(): Fix runtime PM leak on error path")
294b29f15469 ("i2c: xiic: Fix RX IRQ busy check")
c119e7d00c91 ("i2c: xiic: Fix broken locking on tx_msg")
9e3b184b3b4f ("i2c: xiic: Support forcing single-master in DT")
9106e45ceaaf ("i2c: xiic: Improve struct memory alignment")
0a9336ee133d ("i2c: xiic: Change code alignment to 1 space only")
b4c119dbc300 ("i2c: xiic: Add timeout to the rx fifo wait loop")
bcc156e2289d ("i2c: xiic: Fix kerneldoc warnings")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1d4a1adbed2582444aaf97671858b7d12915bd05 Mon Sep 17 00:00:00 2001
From: Robert Hancock <robert.hancock(a)calian.com>
Date: Wed, 11 Sep 2024 22:16:53 +0200
Subject: [PATCH] i2c: xiic: Try re-initialization on bus busy timeout
In the event that the I2C bus was powered down when the I2C controller
driver loads, or some spurious pulses occur on the I2C bus, it's
possible that the controller detects a spurious I2C "start" condition.
In this situation it may continue to report the bus is busy indefinitely
and block the controller from working.
The "single-master" DT flag can be specified to disable bus busy checks
entirely, but this may not be safe to use in situations where other I2C
masters may potentially exist.
In the event that the controller reports "bus busy" for too long when
starting a transaction, we can try reinitializing the controller to see
if the busy condition clears. This allows recovering from this scenario.
Fixes: e1d5b6598cdc ("i2c: Add support for Xilinx XPS IIC Bus Interface")
Signed-off-by: Robert Hancock <robert.hancock(a)calian.com>
Cc: <stable(a)vger.kernel.org> # v2.6.34+
Reviewed-by: Manikanta Guntupalli <manikanta.guntupalli(a)amd.com>
Acked-by: Michal Simek <michal.simek(a)amd.com>
Signed-off-by: Andi Shyti <andi.shyti(a)kernel.org>
diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c
index bd6e107472b7..4c89aad02451 100644
--- a/drivers/i2c/busses/i2c-xiic.c
+++ b/drivers/i2c/busses/i2c-xiic.c
@@ -843,23 +843,11 @@ static int xiic_bus_busy(struct xiic_i2c *i2c)
return (sr & XIIC_SR_BUS_BUSY_MASK) ? -EBUSY : 0;
}
-static int xiic_busy(struct xiic_i2c *i2c)
+static int xiic_wait_not_busy(struct xiic_i2c *i2c)
{
int tries = 3;
int err;
- if (i2c->tx_msg || i2c->rx_msg)
- return -EBUSY;
-
- /* In single master mode bus can only be busy, when in use by this
- * driver. If the register indicates bus being busy for some reason we
- * should ignore it, since bus will never be released and i2c will be
- * stuck forever.
- */
- if (i2c->singlemaster) {
- return 0;
- }
-
/* for instance if previous transfer was terminated due to TX error
* it might be that the bus is on it's way to become available
* give it at most 3 ms to wake
@@ -1103,13 +1091,36 @@ static int xiic_start_xfer(struct xiic_i2c *i2c, struct i2c_msg *msgs, int num)
mutex_lock(&i2c->lock);
- ret = xiic_busy(i2c);
- if (ret) {
+ if (i2c->tx_msg || i2c->rx_msg) {
dev_err(i2c->adap.dev.parent,
"cannot start a transfer while busy\n");
+ ret = -EBUSY;
goto out;
}
+ /* In single master mode bus can only be busy, when in use by this
+ * driver. If the register indicates bus being busy for some reason we
+ * should ignore it, since bus will never be released and i2c will be
+ * stuck forever.
+ */
+ if (!i2c->singlemaster) {
+ ret = xiic_wait_not_busy(i2c);
+ if (ret) {
+ /* If the bus is stuck in a busy state, such as due to spurious low
+ * pulses on the bus causing a false start condition to be detected,
+ * then try to recover by re-initializing the controller and check
+ * again if the bus is still busy.
+ */
+ dev_warn(i2c->adap.dev.parent, "I2C bus busy timeout, reinitializing\n");
+ ret = xiic_reinit(i2c);
+ if (ret)
+ goto out;
+ ret = xiic_wait_not_busy(i2c);
+ if (ret)
+ goto out;
+ }
+ }
+
i2c->tx_msg = msgs;
i2c->rx_msg = NULL;
i2c->nmsgs = num;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 1d4a1adbed2582444aaf97671858b7d12915bd05
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100218-unlovable-upon-1c43@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
1d4a1adbed25 ("i2c: xiic: Try re-initialization on bus busy timeout")
ee1691d0ae10 ("i2c: xiic: improve error message when transfer fails to start")
d663d93bb47e ("i2c: xiic: xiic_xfer(): Fix runtime PM leak on error path")
294b29f15469 ("i2c: xiic: Fix RX IRQ busy check")
c119e7d00c91 ("i2c: xiic: Fix broken locking on tx_msg")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1d4a1adbed2582444aaf97671858b7d12915bd05 Mon Sep 17 00:00:00 2001
From: Robert Hancock <robert.hancock(a)calian.com>
Date: Wed, 11 Sep 2024 22:16:53 +0200
Subject: [PATCH] i2c: xiic: Try re-initialization on bus busy timeout
In the event that the I2C bus was powered down when the I2C controller
driver loads, or some spurious pulses occur on the I2C bus, it's
possible that the controller detects a spurious I2C "start" condition.
In this situation it may continue to report the bus is busy indefinitely
and block the controller from working.
The "single-master" DT flag can be specified to disable bus busy checks
entirely, but this may not be safe to use in situations where other I2C
masters may potentially exist.
In the event that the controller reports "bus busy" for too long when
starting a transaction, we can try reinitializing the controller to see
if the busy condition clears. This allows recovering from this scenario.
Fixes: e1d5b6598cdc ("i2c: Add support for Xilinx XPS IIC Bus Interface")
Signed-off-by: Robert Hancock <robert.hancock(a)calian.com>
Cc: <stable(a)vger.kernel.org> # v2.6.34+
Reviewed-by: Manikanta Guntupalli <manikanta.guntupalli(a)amd.com>
Acked-by: Michal Simek <michal.simek(a)amd.com>
Signed-off-by: Andi Shyti <andi.shyti(a)kernel.org>
diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c
index bd6e107472b7..4c89aad02451 100644
--- a/drivers/i2c/busses/i2c-xiic.c
+++ b/drivers/i2c/busses/i2c-xiic.c
@@ -843,23 +843,11 @@ static int xiic_bus_busy(struct xiic_i2c *i2c)
return (sr & XIIC_SR_BUS_BUSY_MASK) ? -EBUSY : 0;
}
-static int xiic_busy(struct xiic_i2c *i2c)
+static int xiic_wait_not_busy(struct xiic_i2c *i2c)
{
int tries = 3;
int err;
- if (i2c->tx_msg || i2c->rx_msg)
- return -EBUSY;
-
- /* In single master mode bus can only be busy, when in use by this
- * driver. If the register indicates bus being busy for some reason we
- * should ignore it, since bus will never be released and i2c will be
- * stuck forever.
- */
- if (i2c->singlemaster) {
- return 0;
- }
-
/* for instance if previous transfer was terminated due to TX error
* it might be that the bus is on it's way to become available
* give it at most 3 ms to wake
@@ -1103,13 +1091,36 @@ static int xiic_start_xfer(struct xiic_i2c *i2c, struct i2c_msg *msgs, int num)
mutex_lock(&i2c->lock);
- ret = xiic_busy(i2c);
- if (ret) {
+ if (i2c->tx_msg || i2c->rx_msg) {
dev_err(i2c->adap.dev.parent,
"cannot start a transfer while busy\n");
+ ret = -EBUSY;
goto out;
}
+ /* In single master mode bus can only be busy, when in use by this
+ * driver. If the register indicates bus being busy for some reason we
+ * should ignore it, since bus will never be released and i2c will be
+ * stuck forever.
+ */
+ if (!i2c->singlemaster) {
+ ret = xiic_wait_not_busy(i2c);
+ if (ret) {
+ /* If the bus is stuck in a busy state, such as due to spurious low
+ * pulses on the bus causing a false start condition to be detected,
+ * then try to recover by re-initializing the controller and check
+ * again if the bus is still busy.
+ */
+ dev_warn(i2c->adap.dev.parent, "I2C bus busy timeout, reinitializing\n");
+ ret = xiic_reinit(i2c);
+ if (ret)
+ goto out;
+ ret = xiic_wait_not_busy(i2c);
+ if (ret)
+ goto out;
+ }
+ }
+
i2c->tx_msg = msgs;
i2c->rx_msg = NULL;
i2c->nmsgs = num;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 1d4a1adbed2582444aaf97671858b7d12915bd05
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100217-schematic-oil-69e2@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
1d4a1adbed25 ("i2c: xiic: Try re-initialization on bus busy timeout")
ee1691d0ae10 ("i2c: xiic: improve error message when transfer fails to start")
d663d93bb47e ("i2c: xiic: xiic_xfer(): Fix runtime PM leak on error path")
294b29f15469 ("i2c: xiic: Fix RX IRQ busy check")
c119e7d00c91 ("i2c: xiic: Fix broken locking on tx_msg")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1d4a1adbed2582444aaf97671858b7d12915bd05 Mon Sep 17 00:00:00 2001
From: Robert Hancock <robert.hancock(a)calian.com>
Date: Wed, 11 Sep 2024 22:16:53 +0200
Subject: [PATCH] i2c: xiic: Try re-initialization on bus busy timeout
In the event that the I2C bus was powered down when the I2C controller
driver loads, or some spurious pulses occur on the I2C bus, it's
possible that the controller detects a spurious I2C "start" condition.
In this situation it may continue to report the bus is busy indefinitely
and block the controller from working.
The "single-master" DT flag can be specified to disable bus busy checks
entirely, but this may not be safe to use in situations where other I2C
masters may potentially exist.
In the event that the controller reports "bus busy" for too long when
starting a transaction, we can try reinitializing the controller to see
if the busy condition clears. This allows recovering from this scenario.
Fixes: e1d5b6598cdc ("i2c: Add support for Xilinx XPS IIC Bus Interface")
Signed-off-by: Robert Hancock <robert.hancock(a)calian.com>
Cc: <stable(a)vger.kernel.org> # v2.6.34+
Reviewed-by: Manikanta Guntupalli <manikanta.guntupalli(a)amd.com>
Acked-by: Michal Simek <michal.simek(a)amd.com>
Signed-off-by: Andi Shyti <andi.shyti(a)kernel.org>
diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c
index bd6e107472b7..4c89aad02451 100644
--- a/drivers/i2c/busses/i2c-xiic.c
+++ b/drivers/i2c/busses/i2c-xiic.c
@@ -843,23 +843,11 @@ static int xiic_bus_busy(struct xiic_i2c *i2c)
return (sr & XIIC_SR_BUS_BUSY_MASK) ? -EBUSY : 0;
}
-static int xiic_busy(struct xiic_i2c *i2c)
+static int xiic_wait_not_busy(struct xiic_i2c *i2c)
{
int tries = 3;
int err;
- if (i2c->tx_msg || i2c->rx_msg)
- return -EBUSY;
-
- /* In single master mode bus can only be busy, when in use by this
- * driver. If the register indicates bus being busy for some reason we
- * should ignore it, since bus will never be released and i2c will be
- * stuck forever.
- */
- if (i2c->singlemaster) {
- return 0;
- }
-
/* for instance if previous transfer was terminated due to TX error
* it might be that the bus is on it's way to become available
* give it at most 3 ms to wake
@@ -1103,13 +1091,36 @@ static int xiic_start_xfer(struct xiic_i2c *i2c, struct i2c_msg *msgs, int num)
mutex_lock(&i2c->lock);
- ret = xiic_busy(i2c);
- if (ret) {
+ if (i2c->tx_msg || i2c->rx_msg) {
dev_err(i2c->adap.dev.parent,
"cannot start a transfer while busy\n");
+ ret = -EBUSY;
goto out;
}
+ /* In single master mode bus can only be busy, when in use by this
+ * driver. If the register indicates bus being busy for some reason we
+ * should ignore it, since bus will never be released and i2c will be
+ * stuck forever.
+ */
+ if (!i2c->singlemaster) {
+ ret = xiic_wait_not_busy(i2c);
+ if (ret) {
+ /* If the bus is stuck in a busy state, such as due to spurious low
+ * pulses on the bus causing a false start condition to be detected,
+ * then try to recover by re-initializing the controller and check
+ * again if the bus is still busy.
+ */
+ dev_warn(i2c->adap.dev.parent, "I2C bus busy timeout, reinitializing\n");
+ ret = xiic_reinit(i2c);
+ if (ret)
+ goto out;
+ ret = xiic_wait_not_busy(i2c);
+ if (ret)
+ goto out;
+ }
+ }
+
i2c->tx_msg = msgs;
i2c->rx_msg = NULL;
i2c->nmsgs = num;
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 1d4a1adbed2582444aaf97671858b7d12915bd05
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100216-baboon-arson-e49d@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
1d4a1adbed25 ("i2c: xiic: Try re-initialization on bus busy timeout")
ee1691d0ae10 ("i2c: xiic: improve error message when transfer fails to start")
d663d93bb47e ("i2c: xiic: xiic_xfer(): Fix runtime PM leak on error path")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1d4a1adbed2582444aaf97671858b7d12915bd05 Mon Sep 17 00:00:00 2001
From: Robert Hancock <robert.hancock(a)calian.com>
Date: Wed, 11 Sep 2024 22:16:53 +0200
Subject: [PATCH] i2c: xiic: Try re-initialization on bus busy timeout
In the event that the I2C bus was powered down when the I2C controller
driver loads, or some spurious pulses occur on the I2C bus, it's
possible that the controller detects a spurious I2C "start" condition.
In this situation it may continue to report the bus is busy indefinitely
and block the controller from working.
The "single-master" DT flag can be specified to disable bus busy checks
entirely, but this may not be safe to use in situations where other I2C
masters may potentially exist.
In the event that the controller reports "bus busy" for too long when
starting a transaction, we can try reinitializing the controller to see
if the busy condition clears. This allows recovering from this scenario.
Fixes: e1d5b6598cdc ("i2c: Add support for Xilinx XPS IIC Bus Interface")
Signed-off-by: Robert Hancock <robert.hancock(a)calian.com>
Cc: <stable(a)vger.kernel.org> # v2.6.34+
Reviewed-by: Manikanta Guntupalli <manikanta.guntupalli(a)amd.com>
Acked-by: Michal Simek <michal.simek(a)amd.com>
Signed-off-by: Andi Shyti <andi.shyti(a)kernel.org>
diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c
index bd6e107472b7..4c89aad02451 100644
--- a/drivers/i2c/busses/i2c-xiic.c
+++ b/drivers/i2c/busses/i2c-xiic.c
@@ -843,23 +843,11 @@ static int xiic_bus_busy(struct xiic_i2c *i2c)
return (sr & XIIC_SR_BUS_BUSY_MASK) ? -EBUSY : 0;
}
-static int xiic_busy(struct xiic_i2c *i2c)
+static int xiic_wait_not_busy(struct xiic_i2c *i2c)
{
int tries = 3;
int err;
- if (i2c->tx_msg || i2c->rx_msg)
- return -EBUSY;
-
- /* In single master mode bus can only be busy, when in use by this
- * driver. If the register indicates bus being busy for some reason we
- * should ignore it, since bus will never be released and i2c will be
- * stuck forever.
- */
- if (i2c->singlemaster) {
- return 0;
- }
-
/* for instance if previous transfer was terminated due to TX error
* it might be that the bus is on it's way to become available
* give it at most 3 ms to wake
@@ -1103,13 +1091,36 @@ static int xiic_start_xfer(struct xiic_i2c *i2c, struct i2c_msg *msgs, int num)
mutex_lock(&i2c->lock);
- ret = xiic_busy(i2c);
- if (ret) {
+ if (i2c->tx_msg || i2c->rx_msg) {
dev_err(i2c->adap.dev.parent,
"cannot start a transfer while busy\n");
+ ret = -EBUSY;
goto out;
}
+ /* In single master mode bus can only be busy, when in use by this
+ * driver. If the register indicates bus being busy for some reason we
+ * should ignore it, since bus will never be released and i2c will be
+ * stuck forever.
+ */
+ if (!i2c->singlemaster) {
+ ret = xiic_wait_not_busy(i2c);
+ if (ret) {
+ /* If the bus is stuck in a busy state, such as due to spurious low
+ * pulses on the bus causing a false start condition to be detected,
+ * then try to recover by re-initializing the controller and check
+ * again if the bus is still busy.
+ */
+ dev_warn(i2c->adap.dev.parent, "I2C bus busy timeout, reinitializing\n");
+ ret = xiic_reinit(i2c);
+ if (ret)
+ goto out;
+ ret = xiic_wait_not_busy(i2c);
+ if (ret)
+ goto out;
+ }
+ }
+
i2c->tx_msg = msgs;
i2c->rx_msg = NULL;
i2c->nmsgs = num;
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 1d4a1adbed2582444aaf97671858b7d12915bd05
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100216-tingling-finicky-658f@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
1d4a1adbed25 ("i2c: xiic: Try re-initialization on bus busy timeout")
ee1691d0ae10 ("i2c: xiic: improve error message when transfer fails to start")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1d4a1adbed2582444aaf97671858b7d12915bd05 Mon Sep 17 00:00:00 2001
From: Robert Hancock <robert.hancock(a)calian.com>
Date: Wed, 11 Sep 2024 22:16:53 +0200
Subject: [PATCH] i2c: xiic: Try re-initialization on bus busy timeout
In the event that the I2C bus was powered down when the I2C controller
driver loads, or some spurious pulses occur on the I2C bus, it's
possible that the controller detects a spurious I2C "start" condition.
In this situation it may continue to report the bus is busy indefinitely
and block the controller from working.
The "single-master" DT flag can be specified to disable bus busy checks
entirely, but this may not be safe to use in situations where other I2C
masters may potentially exist.
In the event that the controller reports "bus busy" for too long when
starting a transaction, we can try reinitializing the controller to see
if the busy condition clears. This allows recovering from this scenario.
Fixes: e1d5b6598cdc ("i2c: Add support for Xilinx XPS IIC Bus Interface")
Signed-off-by: Robert Hancock <robert.hancock(a)calian.com>
Cc: <stable(a)vger.kernel.org> # v2.6.34+
Reviewed-by: Manikanta Guntupalli <manikanta.guntupalli(a)amd.com>
Acked-by: Michal Simek <michal.simek(a)amd.com>
Signed-off-by: Andi Shyti <andi.shyti(a)kernel.org>
diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c
index bd6e107472b7..4c89aad02451 100644
--- a/drivers/i2c/busses/i2c-xiic.c
+++ b/drivers/i2c/busses/i2c-xiic.c
@@ -843,23 +843,11 @@ static int xiic_bus_busy(struct xiic_i2c *i2c)
return (sr & XIIC_SR_BUS_BUSY_MASK) ? -EBUSY : 0;
}
-static int xiic_busy(struct xiic_i2c *i2c)
+static int xiic_wait_not_busy(struct xiic_i2c *i2c)
{
int tries = 3;
int err;
- if (i2c->tx_msg || i2c->rx_msg)
- return -EBUSY;
-
- /* In single master mode bus can only be busy, when in use by this
- * driver. If the register indicates bus being busy for some reason we
- * should ignore it, since bus will never be released and i2c will be
- * stuck forever.
- */
- if (i2c->singlemaster) {
- return 0;
- }
-
/* for instance if previous transfer was terminated due to TX error
* it might be that the bus is on it's way to become available
* give it at most 3 ms to wake
@@ -1103,13 +1091,36 @@ static int xiic_start_xfer(struct xiic_i2c *i2c, struct i2c_msg *msgs, int num)
mutex_lock(&i2c->lock);
- ret = xiic_busy(i2c);
- if (ret) {
+ if (i2c->tx_msg || i2c->rx_msg) {
dev_err(i2c->adap.dev.parent,
"cannot start a transfer while busy\n");
+ ret = -EBUSY;
goto out;
}
+ /* In single master mode bus can only be busy, when in use by this
+ * driver. If the register indicates bus being busy for some reason we
+ * should ignore it, since bus will never be released and i2c will be
+ * stuck forever.
+ */
+ if (!i2c->singlemaster) {
+ ret = xiic_wait_not_busy(i2c);
+ if (ret) {
+ /* If the bus is stuck in a busy state, such as due to spurious low
+ * pulses on the bus causing a false start condition to be detected,
+ * then try to recover by re-initializing the controller and check
+ * again if the bus is still busy.
+ */
+ dev_warn(i2c->adap.dev.parent, "I2C bus busy timeout, reinitializing\n");
+ ret = xiic_reinit(i2c);
+ if (ret)
+ goto out;
+ ret = xiic_wait_not_busy(i2c);
+ if (ret)
+ goto out;
+ }
+ }
+
i2c->tx_msg = msgs;
i2c->rx_msg = NULL;
i2c->nmsgs = num;
The patch below does not apply to the 6.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.10.y
git checkout FETCH_HEAD
git cherry-pick -x 1d4a1adbed2582444aaf97671858b7d12915bd05
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100215-lagged-definite-9910@gregkh' --subject-prefix 'PATCH 6.10.y' HEAD^..
Possible dependencies:
1d4a1adbed25 ("i2c: xiic: Try re-initialization on bus busy timeout")
ee1691d0ae10 ("i2c: xiic: improve error message when transfer fails to start")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1d4a1adbed2582444aaf97671858b7d12915bd05 Mon Sep 17 00:00:00 2001
From: Robert Hancock <robert.hancock(a)calian.com>
Date: Wed, 11 Sep 2024 22:16:53 +0200
Subject: [PATCH] i2c: xiic: Try re-initialization on bus busy timeout
In the event that the I2C bus was powered down when the I2C controller
driver loads, or some spurious pulses occur on the I2C bus, it's
possible that the controller detects a spurious I2C "start" condition.
In this situation it may continue to report the bus is busy indefinitely
and block the controller from working.
The "single-master" DT flag can be specified to disable bus busy checks
entirely, but this may not be safe to use in situations where other I2C
masters may potentially exist.
In the event that the controller reports "bus busy" for too long when
starting a transaction, we can try reinitializing the controller to see
if the busy condition clears. This allows recovering from this scenario.
Fixes: e1d5b6598cdc ("i2c: Add support for Xilinx XPS IIC Bus Interface")
Signed-off-by: Robert Hancock <robert.hancock(a)calian.com>
Cc: <stable(a)vger.kernel.org> # v2.6.34+
Reviewed-by: Manikanta Guntupalli <manikanta.guntupalli(a)amd.com>
Acked-by: Michal Simek <michal.simek(a)amd.com>
Signed-off-by: Andi Shyti <andi.shyti(a)kernel.org>
diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c
index bd6e107472b7..4c89aad02451 100644
--- a/drivers/i2c/busses/i2c-xiic.c
+++ b/drivers/i2c/busses/i2c-xiic.c
@@ -843,23 +843,11 @@ static int xiic_bus_busy(struct xiic_i2c *i2c)
return (sr & XIIC_SR_BUS_BUSY_MASK) ? -EBUSY : 0;
}
-static int xiic_busy(struct xiic_i2c *i2c)
+static int xiic_wait_not_busy(struct xiic_i2c *i2c)
{
int tries = 3;
int err;
- if (i2c->tx_msg || i2c->rx_msg)
- return -EBUSY;
-
- /* In single master mode bus can only be busy, when in use by this
- * driver. If the register indicates bus being busy for some reason we
- * should ignore it, since bus will never be released and i2c will be
- * stuck forever.
- */
- if (i2c->singlemaster) {
- return 0;
- }
-
/* for instance if previous transfer was terminated due to TX error
* it might be that the bus is on it's way to become available
* give it at most 3 ms to wake
@@ -1103,13 +1091,36 @@ static int xiic_start_xfer(struct xiic_i2c *i2c, struct i2c_msg *msgs, int num)
mutex_lock(&i2c->lock);
- ret = xiic_busy(i2c);
- if (ret) {
+ if (i2c->tx_msg || i2c->rx_msg) {
dev_err(i2c->adap.dev.parent,
"cannot start a transfer while busy\n");
+ ret = -EBUSY;
goto out;
}
+ /* In single master mode bus can only be busy, when in use by this
+ * driver. If the register indicates bus being busy for some reason we
+ * should ignore it, since bus will never be released and i2c will be
+ * stuck forever.
+ */
+ if (!i2c->singlemaster) {
+ ret = xiic_wait_not_busy(i2c);
+ if (ret) {
+ /* If the bus is stuck in a busy state, such as due to spurious low
+ * pulses on the bus causing a false start condition to be detected,
+ * then try to recover by re-initializing the controller and check
+ * again if the bus is still busy.
+ */
+ dev_warn(i2c->adap.dev.parent, "I2C bus busy timeout, reinitializing\n");
+ ret = xiic_reinit(i2c);
+ if (ret)
+ goto out;
+ ret = xiic_wait_not_busy(i2c);
+ if (ret)
+ goto out;
+ }
+ }
+
i2c->tx_msg = msgs;
i2c->rx_msg = NULL;
i2c->nmsgs = num;
The patch below does not apply to the 6.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.10.y
git checkout FETCH_HEAD
git cherry-pick -x 95599ef684d01136a8b77c16a7c853496786e173
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100206-thread-uncorrupt-0ea0@gregkh' --subject-prefix 'PATCH 6.10.y' HEAD^..
Possible dependencies:
95599ef684d0 ("mm/codetag: fix pgalloc_tag_split()")
cf54f310d0d3 ("mm/hugetlb: use __GFP_COMP for gigantic folios")
c0f398c3b2cf ("mm/hugetlb_vmemmap: batch HVO work when demoting")
fbc90c042cd1 ("Merge tag 'mm-stable-2024-07-21-14-50' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 95599ef684d01136a8b77c16a7c853496786e173 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao(a)google.com>
Date: Thu, 5 Sep 2024 22:21:07 -0600
Subject: [PATCH] mm/codetag: fix pgalloc_tag_split()
The current assumption is that a large folio can only be split into
order-0 folios. That is not the case for hugeTLB demotion, nor for THP
split: see commit c010d47f107f ("mm: thp: split huge page to any lower
order pages").
When a large folio is split into ones of a lower non-zero order, only the
new head pages should be tagged. Tagging tail pages can cause imbalanced
"calls" counters, since only head pages are untagged by pgalloc_tag_sub()
and the "calls" counts on tail pages are leaked, e.g.,
# echo 2048kB >/sys/kernel/mm/hugepages/hugepages-1048576kB/demote_size
# echo 700 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
# time echo 700 >/sys/kernel/mm/hugepages/hugepages-1048576kB/demote
# echo 0 >/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
# grep alloc_gigantic_folio /proc/allocinfo
Before this patch:
0 549427200 mm/hugetlb.c:1549 func:alloc_gigantic_folio
real 0m2.057s
user 0m0.000s
sys 0m2.051s
After this patch:
0 0 mm/hugetlb.c:1549 func:alloc_gigantic_folio
real 0m1.711s
user 0m0.000s
sys 0m1.704s
Not tagging tail pages also improves the splitting time, e.g., by about
15% when demoting 1GB hugeTLB folios to 2MB ones, as shown above.
Link: https://lkml.kernel.org/r/20240906042108.1150526-2-yuzhao@google.com
Fixes: be25d1d4e822 ("mm: create new codetag references during page splitting")
Signed-off-by: Yu Zhao <yuzhao(a)google.com>
Acked-by: Suren Baghdasaryan <surenb(a)google.com>
Cc: Kent Overstreet <kent.overstreet(a)linux.dev>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b0ff06d18c71..6bb778cbaabf 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4084,4 +4084,34 @@ void vma_pgtable_walk_end(struct vm_area_struct *vma);
int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size);
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order)
+{
+ int i;
+ struct alloc_tag *tag;
+ unsigned int nr_pages = 1 << new_order;
+
+ if (!mem_alloc_profiling_enabled())
+ return;
+
+ tag = pgalloc_tag_get(&folio->page);
+ if (!tag)
+ return;
+
+ for (i = nr_pages; i < (1 << old_order); i += nr_pages) {
+ union codetag_ref *ref = get_page_tag_ref(folio_page(folio, i));
+
+ if (ref) {
+ /* Set new reference to point to the original tag */
+ alloc_tag_ref_set(ref, tag);
+ put_page_tag_ref(ref);
+ }
+ }
+}
+#else /* !CONFIG_MEM_ALLOC_PROFILING */
+static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order)
+{
+}
+#endif /* CONFIG_MEM_ALLOC_PROFILING */
+
#endif /* _LINUX_MM_H */
diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h
index 207f0c83c8e9..59a3deb792a8 100644
--- a/include/linux/pgalloc_tag.h
+++ b/include/linux/pgalloc_tag.h
@@ -80,36 +80,6 @@ static inline void pgalloc_tag_sub(struct page *page, unsigned int nr)
}
}
-static inline void pgalloc_tag_split(struct page *page, unsigned int nr)
-{
- int i;
- struct page_ext *first_page_ext;
- struct page_ext *page_ext;
- union codetag_ref *ref;
- struct alloc_tag *tag;
-
- if (!mem_alloc_profiling_enabled())
- return;
-
- first_page_ext = page_ext = page_ext_get(page);
- if (unlikely(!page_ext))
- return;
-
- ref = codetag_ref_from_page_ext(page_ext);
- if (!ref->ct)
- goto out;
-
- tag = ct_to_alloc_tag(ref->ct);
- page_ext = page_ext_next(page_ext);
- for (i = 1; i < nr; i++) {
- /* Set new reference to point to the original tag */
- alloc_tag_ref_set(codetag_ref_from_page_ext(page_ext), tag);
- page_ext = page_ext_next(page_ext);
- }
-out:
- page_ext_put(first_page_ext);
-}
-
static inline struct alloc_tag *pgalloc_tag_get(struct page *page)
{
struct alloc_tag *tag = NULL;
@@ -142,7 +112,6 @@ static inline void clear_page_tag_ref(struct page *page) {}
static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
unsigned int nr) {}
static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
-static inline void pgalloc_tag_split(struct page *page, unsigned int nr) {}
static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; }
static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f15f7faf2a63..cc2872f12030 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3226,7 +3226,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
/* Caller disabled irqs, so they are still disabled here */
split_page_owner(head, order, new_order);
- pgalloc_tag_split(head, 1 << order);
+ pgalloc_tag_split(folio, order, new_order);
/* See comment in __split_huge_page_tail() */
if (folio_test_anon(folio)) {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3faf5aad142d..a8624c07d8bf 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3778,7 +3778,7 @@ static long demote_free_hugetlb_folios(struct hstate *src, struct hstate *dst,
list_del(&folio->lru);
split_page_owner(&folio->page, huge_page_order(src), huge_page_order(dst));
- pgalloc_tag_split(&folio->page, 1 << huge_page_order(src));
+ pgalloc_tag_split(folio, huge_page_order(src), huge_page_order(dst));
for (i = 0; i < pages_per_huge_page(src); i += pages_per_huge_page(dst)) {
struct page *page = folio_page(folio, i);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 74f13f676985..874e006f3d1c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2776,7 +2776,7 @@ void split_page(struct page *page, unsigned int order)
for (i = 1; i < (1 << order); i++)
set_page_refcounted(page + i);
split_page_owner(page, order, 0);
- pgalloc_tag_split(page, 1 << order);
+ pgalloc_tag_split(page_folio(page), order, 0);
split_page_memcg(page, order, 0);
}
EXPORT_SYMBOL_GPL(split_page);
@@ -4974,7 +4974,7 @@ static void *make_alloc_exact(unsigned long addr, unsigned int order,
struct page *last = page + nr;
split_page_owner(page, order, 0);
- pgalloc_tag_split(page, 1 << order);
+ pgalloc_tag_split(page_folio(page), order, 0);
split_page_memcg(page, order, 0);
while (page < --last)
set_page_refcounted(last);
The patch below does not apply to the 6.11-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.11.y
git checkout FETCH_HEAD
git cherry-pick -x 95599ef684d01136a8b77c16a7c853496786e173
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100205-snowsuit-agony-4387@gregkh' --subject-prefix 'PATCH 6.11.y' HEAD^..
Possible dependencies:
95599ef684d0 ("mm/codetag: fix pgalloc_tag_split()")
cf54f310d0d3 ("mm/hugetlb: use __GFP_COMP for gigantic folios")
c0f398c3b2cf ("mm/hugetlb_vmemmap: batch HVO work when demoting")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 95599ef684d01136a8b77c16a7c853496786e173 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao(a)google.com>
Date: Thu, 5 Sep 2024 22:21:07 -0600
Subject: [PATCH] mm/codetag: fix pgalloc_tag_split()
The current assumption is that a large folio can only be split into
order-0 folios. That is not the case for hugeTLB demotion, nor for THP
split: see commit c010d47f107f ("mm: thp: split huge page to any lower
order pages").
When a large folio is split into ones of a lower non-zero order, only the
new head pages should be tagged. Tagging tail pages can cause imbalanced
"calls" counters, since only head pages are untagged by pgalloc_tag_sub()
and the "calls" counts on tail pages are leaked, e.g.,
# echo 2048kB >/sys/kernel/mm/hugepages/hugepages-1048576kB/demote_size
# echo 700 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
# time echo 700 >/sys/kernel/mm/hugepages/hugepages-1048576kB/demote
# echo 0 >/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
# grep alloc_gigantic_folio /proc/allocinfo
Before this patch:
0 549427200 mm/hugetlb.c:1549 func:alloc_gigantic_folio
real 0m2.057s
user 0m0.000s
sys 0m2.051s
After this patch:
0 0 mm/hugetlb.c:1549 func:alloc_gigantic_folio
real 0m1.711s
user 0m0.000s
sys 0m1.704s
Not tagging tail pages also improves the splitting time, e.g., by about
15% when demoting 1GB hugeTLB folios to 2MB ones, as shown above.
Link: https://lkml.kernel.org/r/20240906042108.1150526-2-yuzhao@google.com
Fixes: be25d1d4e822 ("mm: create new codetag references during page splitting")
Signed-off-by: Yu Zhao <yuzhao(a)google.com>
Acked-by: Suren Baghdasaryan <surenb(a)google.com>
Cc: Kent Overstreet <kent.overstreet(a)linux.dev>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b0ff06d18c71..6bb778cbaabf 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4084,4 +4084,34 @@ void vma_pgtable_walk_end(struct vm_area_struct *vma);
int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size);
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order)
+{
+ int i;
+ struct alloc_tag *tag;
+ unsigned int nr_pages = 1 << new_order;
+
+ if (!mem_alloc_profiling_enabled())
+ return;
+
+ tag = pgalloc_tag_get(&folio->page);
+ if (!tag)
+ return;
+
+ for (i = nr_pages; i < (1 << old_order); i += nr_pages) {
+ union codetag_ref *ref = get_page_tag_ref(folio_page(folio, i));
+
+ if (ref) {
+ /* Set new reference to point to the original tag */
+ alloc_tag_ref_set(ref, tag);
+ put_page_tag_ref(ref);
+ }
+ }
+}
+#else /* !CONFIG_MEM_ALLOC_PROFILING */
+static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order)
+{
+}
+#endif /* CONFIG_MEM_ALLOC_PROFILING */
+
#endif /* _LINUX_MM_H */
diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h
index 207f0c83c8e9..59a3deb792a8 100644
--- a/include/linux/pgalloc_tag.h
+++ b/include/linux/pgalloc_tag.h
@@ -80,36 +80,6 @@ static inline void pgalloc_tag_sub(struct page *page, unsigned int nr)
}
}
-static inline void pgalloc_tag_split(struct page *page, unsigned int nr)
-{
- int i;
- struct page_ext *first_page_ext;
- struct page_ext *page_ext;
- union codetag_ref *ref;
- struct alloc_tag *tag;
-
- if (!mem_alloc_profiling_enabled())
- return;
-
- first_page_ext = page_ext = page_ext_get(page);
- if (unlikely(!page_ext))
- return;
-
- ref = codetag_ref_from_page_ext(page_ext);
- if (!ref->ct)
- goto out;
-
- tag = ct_to_alloc_tag(ref->ct);
- page_ext = page_ext_next(page_ext);
- for (i = 1; i < nr; i++) {
- /* Set new reference to point to the original tag */
- alloc_tag_ref_set(codetag_ref_from_page_ext(page_ext), tag);
- page_ext = page_ext_next(page_ext);
- }
-out:
- page_ext_put(first_page_ext);
-}
-
static inline struct alloc_tag *pgalloc_tag_get(struct page *page)
{
struct alloc_tag *tag = NULL;
@@ -142,7 +112,6 @@ static inline void clear_page_tag_ref(struct page *page) {}
static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
unsigned int nr) {}
static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
-static inline void pgalloc_tag_split(struct page *page, unsigned int nr) {}
static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; }
static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f15f7faf2a63..cc2872f12030 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3226,7 +3226,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
/* Caller disabled irqs, so they are still disabled here */
split_page_owner(head, order, new_order);
- pgalloc_tag_split(head, 1 << order);
+ pgalloc_tag_split(folio, order, new_order);
/* See comment in __split_huge_page_tail() */
if (folio_test_anon(folio)) {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3faf5aad142d..a8624c07d8bf 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3778,7 +3778,7 @@ static long demote_free_hugetlb_folios(struct hstate *src, struct hstate *dst,
list_del(&folio->lru);
split_page_owner(&folio->page, huge_page_order(src), huge_page_order(dst));
- pgalloc_tag_split(&folio->page, 1 << huge_page_order(src));
+ pgalloc_tag_split(folio, huge_page_order(src), huge_page_order(dst));
for (i = 0; i < pages_per_huge_page(src); i += pages_per_huge_page(dst)) {
struct page *page = folio_page(folio, i);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 74f13f676985..874e006f3d1c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2776,7 +2776,7 @@ void split_page(struct page *page, unsigned int order)
for (i = 1; i < (1 << order); i++)
set_page_refcounted(page + i);
split_page_owner(page, order, 0);
- pgalloc_tag_split(page, 1 << order);
+ pgalloc_tag_split(page_folio(page), order, 0);
split_page_memcg(page, order, 0);
}
EXPORT_SYMBOL_GPL(split_page);
@@ -4974,7 +4974,7 @@ static void *make_alloc_exact(unsigned long addr, unsigned int order,
struct page *last = page + nr;
split_page_owner(page, order, 0);
- pgalloc_tag_split(page, 1 << order);
+ pgalloc_tag_split(page_folio(page), order, 0);
split_page_memcg(page, order, 0);
while (page < --last)
set_page_refcounted(last);
The patch below does not apply to the 6.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.10.y
git checkout FETCH_HEAD
git cherry-pick -x 5fe6e308abaea082c20fbf2aa5df8e14495622cf
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100250-celibacy-doubling-466e@gregkh' --subject-prefix 'PATCH 6.10.y' HEAD^..
Possible dependencies:
5fe6e308abae ("bpf: Fix use-after-free in bpf_uprobe_multi_link_attach()")
3c83a9ad0295 ("uprobes: make uprobe_register() return struct uprobe *")
e04332ebc8ac ("uprobes: kill uprobe_register_refctr()")
db61e6a4eee5 ("selftests/bpf: fix uprobe.path leak in bpf_testmod")
f42a58ffb8bb ("selftests/bpf: Add uretprobe syscall test for regs changes")
3e8e25761a40 ("selftests/bpf: Add uretprobe syscall test for regs integrity")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5fe6e308abaea082c20fbf2aa5df8e14495622cf Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg(a)redhat.com>
Date: Tue, 13 Aug 2024 17:25:24 +0200
Subject: [PATCH] bpf: Fix use-after-free in bpf_uprobe_multi_link_attach()
If bpf_link_prime() fails, bpf_uprobe_multi_link_attach() goes to the
error_free label and frees the array of bpf_uprobe's without calling
bpf_uprobe_unregister().
This leaks bpf_uprobe->uprobe and worse, this frees bpf_uprobe->consumer
without removing it from the uprobe->consumers list.
Fixes: 89ae89f53d20 ("bpf: Add multi uprobe link")
Closes: https://lore.kernel.org/all/000000000000382d39061f59f2dd@google.com/
Reported-by: syzbot+f7a1c2c2711e4a780f19(a)syzkaller.appspotmail.com
Signed-off-by: Oleg Nesterov <oleg(a)redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Acked-by: Andrii Nakryiko <andrii(a)kernel.org>
Acked-by: Jiri Olsa <jolsa(a)kernel.org>
Tested-by: syzbot+f7a1c2c2711e4a780f19(a)syzkaller.appspotmail.com
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20240813152524.GA7292@redhat.com
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 4e391daafa64..90cd30e9723e 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -3484,17 +3484,20 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
&uprobes[i].consumer);
if (IS_ERR(uprobes[i].uprobe)) {
err = PTR_ERR(uprobes[i].uprobe);
- bpf_uprobe_unregister(uprobes, i);
- goto error_free;
+ link->cnt = i;
+ goto error_unregister;
}
}
err = bpf_link_prime(&link->link, &link_primer);
if (err)
- goto error_free;
+ goto error_unregister;
return bpf_link_settle(&link_primer);
+error_unregister:
+ bpf_uprobe_unregister(uprobes, link->cnt);
+
error_free:
kvfree(uprobes);
kfree(link);
The patch below does not apply to the 6.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.10.y
git checkout FETCH_HEAD
git cherry-pick -x 3a987b88a42593875f6345188ca33731c7df728c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100128-prison-ploy-dfd6@gregkh' --subject-prefix 'PATCH 6.10.y' HEAD^..
Possible dependencies:
3a987b88a425 ("debugfs show actual source in /proc/mounts")
49abee5991e1 ("debugfs: Convert to new uid/gid option parsing helpers")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3a987b88a42593875f6345188ca33731c7df728c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marc=20Aur=C3=A8le=20La=20France?= <tsi(a)tuyoix.net>
Date: Sat, 10 Aug 2024 13:25:27 -0600
Subject: [PATCH] debugfs show actual source in /proc/mounts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
After its conversion to the new mount API, debugfs displays "none" in
/proc/mounts instead of the actual source. Fix this by recognising its
"source" mount option.
Signed-off-by: Marc Aurèle La France <tsi(a)tuyoix.net>
Link: https://lore.kernel.org/r/e439fae2-01da-234b-75b9-2a7951671e27@tuyoix.net
Fixes: a20971c18752 ("vfs: Convert debugfs to use the new mount API")
Cc: stable(a)vger.kernel.org # 6.10.x: 49abee5991e1: debugfs: Convert to new uid/gid option parsing helpers
Signed-off-by: Christian Brauner <brauner(a)kernel.org>
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 91521576f500..66d9b3b4c588 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -89,12 +89,14 @@ enum {
Opt_uid,
Opt_gid,
Opt_mode,
+ Opt_source,
};
static const struct fs_parameter_spec debugfs_param_specs[] = {
fsparam_gid ("gid", Opt_gid),
fsparam_u32oct ("mode", Opt_mode),
fsparam_uid ("uid", Opt_uid),
+ fsparam_string ("source", Opt_source),
{}
};
@@ -126,6 +128,12 @@ static int debugfs_parse_param(struct fs_context *fc, struct fs_parameter *param
case Opt_mode:
opts->mode = result.uint_32 & S_IALLUGO;
break;
+ case Opt_source:
+ if (fc->source)
+ return invalfc(fc, "Multiple sources specified");
+ fc->source = param->string;
+ param->string = NULL;
+ break;
/*
* We might like to report bad mount options here;
* but traditionally debugfs has ignored all mount options
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x f011c9cf04c06f16b24f583d313d3c012e589e50
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100131-number-deface-36a6@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
f011c9cf04c0 ("io_uring/sqpoll: do not allow pinning outside of cpuset")
17437f311490 ("io_uring: move SQPOLL related handling into its own file")
59915143e89f ("io_uring: move timeout opcodes and handling into its own file")
e418bbc97bff ("io_uring: move our reference counting into a header")
36404b09aa60 ("io_uring: move msg_ring into its own file")
f9ead18c1058 ("io_uring: split network related opcodes into its own file")
e0da14def1ee ("io_uring: move statx handling to its own file")
a9c210cebe13 ("io_uring: move epoll handler to its own file")
4cf90495281b ("io_uring: add a dummy -EOPNOTSUPP prep handler")
99f15d8d6136 ("io_uring: move uring_cmd handling to its own file")
cd40cae29ef8 ("io_uring: split out open/close operations")
453b329be5ea ("io_uring: separate out file table handling code")
f4c163dd7d4b ("io_uring: split out fadvise/madvise operations")
0d5847274037 ("io_uring: split out fs related sync/fallocate functions")
531113bbd5bf ("io_uring: split out splice related operations")
11aeb71406dd ("io_uring: split out filesystem related operations")
e28683bdfc2f ("io_uring: move nop into its own file")
5e2a18d93fec ("io_uring: move xattr related opcodes to its own file")
97b388d70b53 ("io_uring: handle completions in the core")
de23077eda61 ("io_uring: set completion results upfront")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f011c9cf04c06f16b24f583d313d3c012e589e50 Mon Sep 17 00:00:00 2001
From: Felix Moessbauer <felix.moessbauer(a)siemens.com>
Date: Mon, 9 Sep 2024 17:00:36 +0200
Subject: [PATCH] io_uring/sqpoll: do not allow pinning outside of cpuset
The submit queue polling threads are userland threads that just never
exit to the userland. When creating the thread with IORING_SETUP_SQ_AFF,
the affinity of the poller thread is set to the cpu specified in
sq_thread_cpu. However, this CPU can be outside of the cpuset defined
by the cgroup cpuset controller. This violates the rules defined by the
cpuset controller and is a potential issue for realtime applications.
In b7ed6d8ffd6 we fixed the default affinity of the poller thread, in
case no explicit pinning is required by inheriting the one of the
creating task. In case of explicit pinning, the check is more
complicated, as also a cpu outside of the parent cpumask is allowed.
We implemented this by using cpuset_cpus_allowed (that has support for
cgroup cpusets) and testing if the requested cpu is in the set.
Fixes: 37d1e2e3642e ("io_uring: move SQPOLL thread io-wq forked worker")
Cc: stable(a)vger.kernel.org # 6.1+
Signed-off-by: Felix Moessbauer <felix.moessbauer(a)siemens.com>
Link: https://lore.kernel.org/r/20240909150036.55921-1-felix.moessbauer@siemens.c…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c
index e545bf240d35..272df9d00f45 100644
--- a/io_uring/sqpoll.c
+++ b/io_uring/sqpoll.c
@@ -10,6 +10,7 @@
#include <linux/slab.h>
#include <linux/audit.h>
#include <linux/security.h>
+#include <linux/cpuset.h>
#include <linux/io_uring.h>
#include <uapi/linux/io_uring.h>
@@ -460,10 +461,12 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx,
return 0;
if (p->flags & IORING_SETUP_SQ_AFF) {
+ struct cpumask allowed_mask;
int cpu = p->sq_thread_cpu;
ret = -EINVAL;
- if (cpu >= nr_cpu_ids || !cpu_online(cpu))
+ cpuset_cpus_allowed(current, &allowed_mask);
+ if (!cpumask_test_cpu(cpu, &allowed_mask))
goto err_sqpoll;
sqd->sq_cpu = cpu;
} else {
Hi Sasha,
I've been getting emails from your bots...
I sent two pulls to Linus for 6.13-rc1:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
In these, I'm not sure there's actually much valid stable material. I
didn't mark anything as Cc: stable(a)vger.kernel.org, I don't think.
As such, can you make sure none of those get backported?
Alternatively, if you do have reason to want to pick some of these,
can you be clear with what and why, and actually carefully decide
which ones and which dependencies are required as such in a
non-automated way?
Thanks,
Jason
The only user (the mesa gallium driver) is already assuming explicit
synchronization and doing the export/import dance on shared BOs. The
only reason we were registering ourselves as writers on external BOs
is because Xe, which was the reference back when we developed Panthor,
was doing so. Turns out Xe was wrong, and we really want bookkeep on
all registered fences, so userspace can explicitly upgrade those to
read/write when needed.
Fixes: 4bdca1150792 ("drm/panthor: Add the driver frontend block")
Cc: Matthew Brost <matthew.brost(a)intel.com>
Cc: Simona Vetter <simona.vetter(a)ffwll.ch>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Boris Brezillon <boris.brezillon(a)collabora.com>
---
drivers/gpu/drm/panthor/panthor_sched.c | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index 9a0ff48f7061..41260cf4beb8 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -3423,13 +3423,8 @@ void panthor_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *sched
{
struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
- /* Still not sure why we want USAGE_WRITE for external objects, since I
- * was assuming this would be handled through explicit syncs being imported
- * to external BOs with DMA_BUF_IOCTL_IMPORT_SYNC_FILE, but other drivers
- * seem to pass DMA_RESV_USAGE_WRITE, so there must be a good reason.
- */
panthor_vm_update_resvs(job->group->vm, exec, &sched_job->s_fence->finished,
- DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE);
+ DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
}
void panthor_sched_unplug(struct panthor_device *ptdev)
--
2.46.0
If deferred operations are pending, we want to wait for those to
land before declaring the queue blocked on a SYNC_WAIT. We need
this to deal with the case where the sync object is signalled through
a deferred SYNC_{ADD,SET} from the same queue. If we don't do that
and the group gets scheduled out before the deferred SYNC_{SET,ADD}
is executed, we'll end up with a timeout, because no external
SYNC_{SET,ADD} will make the scheduler reconsider the group for
execution.
Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Boris Brezillon <boris.brezillon(a)collabora.com>
---
drivers/gpu/drm/panthor/panthor_sched.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index 41260cf4beb8..201d5e7a921e 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -1103,7 +1103,13 @@ cs_slot_sync_queue_state_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs
list_move_tail(&group->wait_node,
&group->ptdev->scheduler->groups.waiting);
}
- group->blocked_queues |= BIT(cs_id);
+
+ /* The queue is only blocked if there's no deferred operation
+ * pending, which can be checked through the scoreboard status.
+ */
+ if (!cs_iface->output->status_scoreboards)
+ group->blocked_queues |= BIT(cs_id);
+
queue->syncwait.gpu_va = cs_iface->output->status_wait_sync_ptr;
queue->syncwait.ref = cs_iface->output->status_wait_sync_value;
status_wait_cond = cs_iface->output->status_wait & CS_STATUS_WAIT_SYNC_COND_MASK;
--
2.46.0
The group variable can't be used to retrieve ptdev in our second loop,
because it points to the previously iterated list_head, not a valid
group. Get the ptdev object from the scheduler instead.
Cc: <stable(a)vger.kernel.org>
Fixes: d72f049087d4 ("drm/panthor: Allow driver compilation")
Reported-by: kernel test robot <lkp(a)intel.com>
Reported-by: Julia Lawall <julia.lawall(a)inria.fr>
Closes: https://lore.kernel.org/r/202409302306.UDikqa03-lkp@intel.com/
Signed-off-by: Boris Brezillon <boris.brezillon(a)collabora.com>
---
drivers/gpu/drm/panthor/panthor_sched.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index 201d5e7a921e..24ff91c084e4 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -2052,6 +2052,7 @@ static void
tick_ctx_cleanup(struct panthor_scheduler *sched,
struct panthor_sched_tick_ctx *ctx)
{
+ struct panthor_device *ptdev = sched->ptdev;
struct panthor_group *group, *tmp;
u32 i;
@@ -2060,7 +2061,7 @@ tick_ctx_cleanup(struct panthor_scheduler *sched,
/* If everything went fine, we should only have groups
* to be terminated in the old_groups lists.
*/
- drm_WARN_ON(&group->ptdev->base, !ctx->csg_upd_failed_mask &&
+ drm_WARN_ON(&ptdev->base, !ctx->csg_upd_failed_mask &&
group_can_run(group));
if (!group_can_run(group)) {
@@ -2083,7 +2084,7 @@ tick_ctx_cleanup(struct panthor_scheduler *sched,
/* If everything went fine, the groups to schedule lists should
* be empty.
*/
- drm_WARN_ON(&group->ptdev->base,
+ drm_WARN_ON(&ptdev->base,
!ctx->csg_upd_failed_mask && !list_empty(&ctx->groups[i]));
list_for_each_entry_safe(group, tmp, &ctx->groups[i], run_node) {
--
2.46.0
SC7280 is a Qualcomm SoC. This series adds support to
bring up the CSIPHY, CSID, VFE/RDI interfaces in SC7280.
SC7280 provides
- 3 x VFE, 3 RDI per VFE
- 2 x VFE Lite, 4 RDI per VFE
- 3 x CSID
- 2 x CSID Lite
- 5 x CSI PHY
The changes are verified on SC7280 qcs6490-rb3gen2-vision board,
the base dts for qcs6490-rb3gen2 is:
https://lore.kernel.org/all/20231103184655.23555-1-quic_kbajaj@quicinc.com/
V1 for this series: https://lore.kernel.org/linux-arm-msm/20240629-camss_first_post_linux_next-…
Changes in V2:
1) Improved indentation/formatting.
2) Removed _src clocks and misleading code comments.
3) Added name fields for power domains and csid register offset in DTSI.
4) Dropped minItems field from YAML file.
5) Listed changes in alphabetical order.
6) Updated description and commit text to reflect changes
7) Changed the compatible string from imx412 to imx577.
8) Added board-specific enablement changes in the newly created vision
board DTSI file.
9) Fixed bug encountered during testing.
10) Moved logically independent changes to a new/seprate patch.
11) Removed cci0 as no sensor is on this port and MCLK2, which was a
copy-paste error from the RB5 board reference.
12) Added power rails, referencing the RB5 board.
13) Discarded Patch 5/6 completely (not required).
14) Removed unused enums.
To: Robert Foss <rfoss(a)kernel.org>
To: Todor Tomov <todor.too(a)gmail.com>
To: Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
To: Mauro Carvalho Chehab <mchehab(a)kernel.org>
To: Rob Herring <robh(a)kernel.org>
To: Krzysztof Kozlowski <krzk+dt(a)kernel.org>
To: Conor Dooley <conor+dt(a)kernel.org>
To: Kapatrala Syed <akapatra(a)quicinc.com>
To: Hariram Purushothaman <hariramp(a)quicinc.com>
To: Bjorn Andersson <andersson(a)kernel.org>
To: Konrad Dybcio <konradybcio(a)kernel.org>
To: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
To: cros-qcom-dts-watchers(a)chromium.org
To: Catalin Marinas <catalin.marinas(a)arm.com>
To: Will Deacon <will(a)kernel.org>
Cc: linux-arm-msm(a)vger.kernel.org
Cc: linux-media(a)vger.kernel.org
Cc: devicetree(a)vger.kernel.org
Cc: linux-kernel(a)vger.kernel.org
Cc: linux-arm-kernel(a)lists.infradead.org
Test-by: Vikram Sharma <quic_vikramsa(a)quicinc.com>
Signed-off-by: Vikram Sharma <quic_vikramsa(a)quicinc.com>
---
Suresh Vankadara (1):
media: qcom: camss: Add support for camss driver on SC7280
Vikram Sharma (9):
media: dt-bindings: media: camss: Add qcom,sc7280-camss binding
media: dt-bindings: media: qcs6490-rb3gen2-vision-mezzanine: Add dt bindings
media: qcom: camss: Fix potential crash if domain attach fails
media: qcom: camss: Sort CAMSS version enums and compatible strings
media: qcom: camss: Add camss_link_entities_v2
arm64: dts: qcom: sc7280: Add support for camss
arm64: dts: qcom: qcs6490-rb3gen2-vision-mezzanine: Enable IMX577 sensor
arm64: dts: qcom: sc7280: Add default and suspend states for GPIO
arm64: defconfig: Enable camcc driver for SC7280
Documentation/devicetree/bindings/arm/qcom.yaml | 1 +
.../bindings/media/qcom,sc7280-camss.yaml | 441 +++++++++++++++++++++
arch/arm64/boot/dts/qcom/Makefile | 1 +
.../dts/qcom/qcs6490-rb3gen2-vision-mezzanine.dts | 61 +++
arch/arm64/boot/dts/qcom/sc7280.dtsi | 208 ++++++++++
arch/arm64/configs/defconfig | 1 +
drivers/media/platform/qcom/camss/camss-csid.c | 1 -
.../platform/qcom/camss/camss-csiphy-3ph-1-0.c | 13 +-
drivers/media/platform/qcom/camss/camss-csiphy.c | 5 +
drivers/media/platform/qcom/camss/camss-csiphy.h | 1 +
drivers/media/platform/qcom/camss/camss-vfe.c | 8 +-
drivers/media/platform/qcom/camss/camss.c | 400 ++++++++++++++++++-
drivers/media/platform/qcom/camss/camss.h | 1 +
13 files changed, 1131 insertions(+), 11 deletions(-)
---
base-commit: fdadd93817f124fd0ea6ef251d4a1068b7feceba
change-id: 20240904-camss_on_sc7280_rb3gen2_vision_v2_patches-15c195fb3f12
Best regards,
--
Vikram Sharma <quic_vikramsa(a)quicinc.com>
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 0199d2f2bd8cd97b310f7ed82a067247d7456029
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100153-unpaired-charity-9b6f@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
0199d2f2bd8c ("PCI: xilinx-nwl: Fix off-by-one in INTx IRQ handler")
e56427068a8d ("PCI: xilinx-nwl: Use irq_data_get_irq_chip_data()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0199d2f2bd8cd97b310f7ed82a067247d7456029 Mon Sep 17 00:00:00 2001
From: Sean Anderson <sean.anderson(a)linux.dev>
Date: Fri, 31 May 2024 12:13:32 -0400
Subject: [PATCH] PCI: xilinx-nwl: Fix off-by-one in INTx IRQ handler
MSGF_LEG_MASK is laid out with INTA in bit 0, INTB in bit 1, INTC in bit 2,
and INTD in bit 3. Hardware IRQ numbers start at 0, and we register
PCI_NUM_INTX IRQs. So to enable INTA (aka hwirq 0) we should set bit 0.
Remove the subtraction of one.
This bug would cause INTx interrupts not to be delivered, as enabling INTB
would actually enable INTA, and enabling INTA wouldn't enable anything at
all. It is likely that this got overlooked for so long since most PCIe
hardware uses MSIs. This fixes the following UBSAN error:
UBSAN: shift-out-of-bounds in ../drivers/pci/controller/pcie-xilinx-nwl.c:389:11
shift exponent 18446744073709551615 is too large for 32-bit type 'int'
CPU: 1 PID: 61 Comm: kworker/u10:1 Not tainted 6.6.20+ #268
Hardware name: xlnx,zynqmp (DT)
Workqueue: events_unbound deferred_probe_work_func
Call trace:
dump_backtrace (arch/arm64/kernel/stacktrace.c:235)
show_stack (arch/arm64/kernel/stacktrace.c:242)
dump_stack_lvl (lib/dump_stack.c:107)
dump_stack (lib/dump_stack.c:114)
__ubsan_handle_shift_out_of_bounds (lib/ubsan.c:218 lib/ubsan.c:387)
nwl_unmask_leg_irq (drivers/pci/controller/pcie-xilinx-nwl.c:389 (discriminator 1))
irq_enable (kernel/irq/internals.h:234 kernel/irq/chip.c:170 kernel/irq/chip.c:439 kernel/irq/chip.c:432 kernel/irq/chip.c:345)
__irq_startup (kernel/irq/internals.h:239 kernel/irq/chip.c:180 kernel/irq/chip.c:250)
irq_startup (kernel/irq/chip.c:270)
__setup_irq (kernel/irq/manage.c:1800)
request_threaded_irq (kernel/irq/manage.c:2206)
pcie_pme_probe (include/linux/interrupt.h:168 drivers/pci/pcie/pme.c:348)
Fixes: 9a181e1093af ("PCI: xilinx-nwl: Modify IRQ chip for legacy interrupts")
Link: https://lore.kernel.org/r/20240531161337.864994-3-sean.anderson@linux.dev
Signed-off-by: Sean Anderson <sean.anderson(a)linux.dev>
Signed-off-by: Bjorn Helgaas <bhelgaas(a)google.com>
Cc: stable(a)vger.kernel.org
diff --git a/drivers/pci/controller/pcie-xilinx-nwl.c b/drivers/pci/controller/pcie-xilinx-nwl.c
index 0408f4d612b5..437927e3bcca 100644
--- a/drivers/pci/controller/pcie-xilinx-nwl.c
+++ b/drivers/pci/controller/pcie-xilinx-nwl.c
@@ -371,7 +371,7 @@ static void nwl_mask_intx_irq(struct irq_data *data)
u32 mask;
u32 val;
- mask = 1 << (data->hwirq - 1);
+ mask = 1 << data->hwirq;
raw_spin_lock_irqsave(&pcie->leg_mask_lock, flags);
val = nwl_bridge_readl(pcie, MSGF_LEG_MASK);
nwl_bridge_writel(pcie, (val & (~mask)), MSGF_LEG_MASK);
@@ -385,7 +385,7 @@ static void nwl_unmask_intx_irq(struct irq_data *data)
u32 mask;
u32 val;
- mask = 1 << (data->hwirq - 1);
+ mask = 1 << data->hwirq;
raw_spin_lock_irqsave(&pcie->leg_mask_lock, flags);
val = nwl_bridge_readl(pcie, MSGF_LEG_MASK);
nwl_bridge_writel(pcie, (val | mask), MSGF_LEG_MASK);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 78cb66caa6ab5385ac2090f1aae5f3c19e08f522
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100129-herring-nutcase-68d7@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
78cb66caa6ab ("hwrng: mtk - Use devm_pm_runtime_enable")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 78cb66caa6ab5385ac2090f1aae5f3c19e08f522 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <guoqing.jiang(a)canonical.com>
Date: Mon, 26 Aug 2024 15:04:15 +0800
Subject: [PATCH] hwrng: mtk - Use devm_pm_runtime_enable
Replace pm_runtime_enable with the devres-enabled version which
can trigger pm_runtime_disable.
Otherwise, the below appears during reload driver.
mtk_rng 1020f000.rng: Unbalanced pm_runtime_enable!
Fixes: 81d2b34508c6 ("hwrng: mtk - add runtime PM support")
Cc: <stable(a)vger.kernel.org>
Suggested-by: Chen-Yu Tsai <wenst(a)chromium.org>
Signed-off-by: Guoqing Jiang <guoqing.jiang(a)canonical.com>
Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
diff --git a/drivers/char/hw_random/mtk-rng.c b/drivers/char/hw_random/mtk-rng.c
index aa993753ab12..1e3048f2bb38 100644
--- a/drivers/char/hw_random/mtk-rng.c
+++ b/drivers/char/hw_random/mtk-rng.c
@@ -142,7 +142,7 @@ static int mtk_rng_probe(struct platform_device *pdev)
dev_set_drvdata(&pdev->dev, priv);
pm_runtime_set_autosuspend_delay(&pdev->dev, RNG_AUTOSUSPEND_TIMEOUT);
pm_runtime_use_autosuspend(&pdev->dev);
- pm_runtime_enable(&pdev->dev);
+ devm_pm_runtime_enable(&pdev->dev);
dev_info(&pdev->dev, "registered RNG driver\n");
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 1b0e32753d8550908dff8982410357b5114be78c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100113-satchel-circulate-81a1@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
1b0e32753d85 ("ARM: dts: imx6ul-geam: fix fsl,pins property in tscgrp pinctrl")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1b0e32753d8550908dff8982410357b5114be78c Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Date: Sat, 31 Aug 2024 12:11:28 +0200
Subject: [PATCH] ARM: dts: imx6ul-geam: fix fsl,pins property in tscgrp
pinctrl
The property is "fsl,pins", not "fsl,pin". Wrong property means the pin
configuration was not applied. Fixes dtbs_check warnings:
imx6ul-geam.dtb: pinctrl@20e0000: tscgrp: 'fsl,pins' is a required property
imx6ul-geam.dtb: pinctrl@20e0000: tscgrp: 'fsl,pin' does not match any of the regexes: 'pinctrl-[0-9]+'
Cc: stable(a)vger.kernel.org
Fixes: a58e4e608bc8 ("ARM: dts: imx6ul-geam: Add Engicam IMX6UL GEA M6UL initial support")
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Reviewed-by: Michael Trimarchi <michael(a)amarulasolutions.com>
Signed-off-by: Shawn Guo <shawnguo(a)kernel.org>
diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts b/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts
index cdbb8c435cd6..601d89b904cd 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts
+++ b/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts
@@ -365,7 +365,7 @@ MX6UL_PAD_ENET1_RX_ER__PWM8_OUT 0x110b0
};
pinctrl_tsc: tscgrp {
- fsl,pin = <
+ fsl,pins = <
MX6UL_PAD_GPIO1_IO01__GPIO1_IO01 0xb0
MX6UL_PAD_GPIO1_IO02__GPIO1_IO02 0xb0
MX6UL_PAD_GPIO1_IO03__GPIO1_IO03 0xb0
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 1b0e32753d8550908dff8982410357b5114be78c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100112-cyclic-aqua-915b@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
1b0e32753d85 ("ARM: dts: imx6ul-geam: fix fsl,pins property in tscgrp pinctrl")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1b0e32753d8550908dff8982410357b5114be78c Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Date: Sat, 31 Aug 2024 12:11:28 +0200
Subject: [PATCH] ARM: dts: imx6ul-geam: fix fsl,pins property in tscgrp
pinctrl
The property is "fsl,pins", not "fsl,pin". Wrong property means the pin
configuration was not applied. Fixes dtbs_check warnings:
imx6ul-geam.dtb: pinctrl@20e0000: tscgrp: 'fsl,pins' is a required property
imx6ul-geam.dtb: pinctrl@20e0000: tscgrp: 'fsl,pin' does not match any of the regexes: 'pinctrl-[0-9]+'
Cc: stable(a)vger.kernel.org
Fixes: a58e4e608bc8 ("ARM: dts: imx6ul-geam: Add Engicam IMX6UL GEA M6UL initial support")
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Reviewed-by: Michael Trimarchi <michael(a)amarulasolutions.com>
Signed-off-by: Shawn Guo <shawnguo(a)kernel.org>
diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts b/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts
index cdbb8c435cd6..601d89b904cd 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts
+++ b/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts
@@ -365,7 +365,7 @@ MX6UL_PAD_ENET1_RX_ER__PWM8_OUT 0x110b0
};
pinctrl_tsc: tscgrp {
- fsl,pin = <
+ fsl,pins = <
MX6UL_PAD_GPIO1_IO01__GPIO1_IO01 0xb0
MX6UL_PAD_GPIO1_IO02__GPIO1_IO02 0xb0
MX6UL_PAD_GPIO1_IO03__GPIO1_IO03 0xb0
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 1b0e32753d8550908dff8982410357b5114be78c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100111-lilac-underarm-be95@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
1b0e32753d85 ("ARM: dts: imx6ul-geam: fix fsl,pins property in tscgrp pinctrl")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1b0e32753d8550908dff8982410357b5114be78c Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Date: Sat, 31 Aug 2024 12:11:28 +0200
Subject: [PATCH] ARM: dts: imx6ul-geam: fix fsl,pins property in tscgrp
pinctrl
The property is "fsl,pins", not "fsl,pin". Wrong property means the pin
configuration was not applied. Fixes dtbs_check warnings:
imx6ul-geam.dtb: pinctrl@20e0000: tscgrp: 'fsl,pins' is a required property
imx6ul-geam.dtb: pinctrl@20e0000: tscgrp: 'fsl,pin' does not match any of the regexes: 'pinctrl-[0-9]+'
Cc: stable(a)vger.kernel.org
Fixes: a58e4e608bc8 ("ARM: dts: imx6ul-geam: Add Engicam IMX6UL GEA M6UL initial support")
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Reviewed-by: Michael Trimarchi <michael(a)amarulasolutions.com>
Signed-off-by: Shawn Guo <shawnguo(a)kernel.org>
diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts b/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts
index cdbb8c435cd6..601d89b904cd 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts
+++ b/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts
@@ -365,7 +365,7 @@ MX6UL_PAD_ENET1_RX_ER__PWM8_OUT 0x110b0
};
pinctrl_tsc: tscgrp {
- fsl,pin = <
+ fsl,pins = <
MX6UL_PAD_GPIO1_IO01__GPIO1_IO01 0xb0
MX6UL_PAD_GPIO1_IO02__GPIO1_IO02 0xb0
MX6UL_PAD_GPIO1_IO03__GPIO1_IO03 0xb0
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 1b0e32753d8550908dff8982410357b5114be78c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100111-rocking-clatter-0ff3@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
1b0e32753d85 ("ARM: dts: imx6ul-geam: fix fsl,pins property in tscgrp pinctrl")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1b0e32753d8550908dff8982410357b5114be78c Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Date: Sat, 31 Aug 2024 12:11:28 +0200
Subject: [PATCH] ARM: dts: imx6ul-geam: fix fsl,pins property in tscgrp
pinctrl
The property is "fsl,pins", not "fsl,pin". Wrong property means the pin
configuration was not applied. Fixes dtbs_check warnings:
imx6ul-geam.dtb: pinctrl@20e0000: tscgrp: 'fsl,pins' is a required property
imx6ul-geam.dtb: pinctrl@20e0000: tscgrp: 'fsl,pin' does not match any of the regexes: 'pinctrl-[0-9]+'
Cc: stable(a)vger.kernel.org
Fixes: a58e4e608bc8 ("ARM: dts: imx6ul-geam: Add Engicam IMX6UL GEA M6UL initial support")
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Reviewed-by: Michael Trimarchi <michael(a)amarulasolutions.com>
Signed-off-by: Shawn Guo <shawnguo(a)kernel.org>
diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts b/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts
index cdbb8c435cd6..601d89b904cd 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts
+++ b/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts
@@ -365,7 +365,7 @@ MX6UL_PAD_ENET1_RX_ER__PWM8_OUT 0x110b0
};
pinctrl_tsc: tscgrp {
- fsl,pin = <
+ fsl,pins = <
MX6UL_PAD_GPIO1_IO01__GPIO1_IO01 0xb0
MX6UL_PAD_GPIO1_IO02__GPIO1_IO02 0xb0
MX6UL_PAD_GPIO1_IO03__GPIO1_IO03 0xb0
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 1b0e32753d8550908dff8982410357b5114be78c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100110-profane-wriggle-076a@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
1b0e32753d85 ("ARM: dts: imx6ul-geam: fix fsl,pins property in tscgrp pinctrl")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1b0e32753d8550908dff8982410357b5114be78c Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Date: Sat, 31 Aug 2024 12:11:28 +0200
Subject: [PATCH] ARM: dts: imx6ul-geam: fix fsl,pins property in tscgrp
pinctrl
The property is "fsl,pins", not "fsl,pin". Wrong property means the pin
configuration was not applied. Fixes dtbs_check warnings:
imx6ul-geam.dtb: pinctrl@20e0000: tscgrp: 'fsl,pins' is a required property
imx6ul-geam.dtb: pinctrl@20e0000: tscgrp: 'fsl,pin' does not match any of the regexes: 'pinctrl-[0-9]+'
Cc: stable(a)vger.kernel.org
Fixes: a58e4e608bc8 ("ARM: dts: imx6ul-geam: Add Engicam IMX6UL GEA M6UL initial support")
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Reviewed-by: Michael Trimarchi <michael(a)amarulasolutions.com>
Signed-off-by: Shawn Guo <shawnguo(a)kernel.org>
diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts b/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts
index cdbb8c435cd6..601d89b904cd 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts
+++ b/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts
@@ -365,7 +365,7 @@ MX6UL_PAD_ENET1_RX_ER__PWM8_OUT 0x110b0
};
pinctrl_tsc: tscgrp {
- fsl,pin = <
+ fsl,pins = <
MX6UL_PAD_GPIO1_IO01__GPIO1_IO01 0xb0
MX6UL_PAD_GPIO1_IO02__GPIO1_IO02 0xb0
MX6UL_PAD_GPIO1_IO03__GPIO1_IO03 0xb0
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 12736adc43b7cd5cb83f274f8f37b0f89d107c97
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100148-payday-steadfast-3ba9@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
12736adc43b7 ("dt-bindings: spi: nxp-fspi: add imx8ulp support")
18ab9e9e8889 ("dt-bindings: spi: nxp-fspi: support i.MX93 and i.MX95")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 12736adc43b7cd5cb83f274f8f37b0f89d107c97 Mon Sep 17 00:00:00 2001
From: Haibo Chen <haibo.chen(a)nxp.com>
Date: Thu, 5 Sep 2024 17:43:35 +0800
Subject: [PATCH] dt-bindings: spi: nxp-fspi: add imx8ulp support
The flexspi on imx8ulp only has 16 number of LUTs, it is different
with flexspi on other imx SoC which has 32 number of LUTs.
Fixes: ef89fd56bdfc ("arm64: dts: imx8ulp: add flexspi node")
Cc: stable(a)kernel.org
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Signed-off-by: Haibo Chen <haibo.chen(a)nxp.com>
Reviewed-by: Frank Li <Frank.Li(a)nxp.com>
Link: https://patch.msgid.link/20240905094338.1986871-2-haibo.chen@nxp.com
Signed-off-by: Mark Brown <broonie(a)kernel.org>
diff --git a/Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml b/Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml
index 4a5f41bde00f..902db92da832 100644
--- a/Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml
@@ -21,6 +21,7 @@ properties:
- nxp,imx8mm-fspi
- nxp,imx8mp-fspi
- nxp,imx8qxp-fspi
+ - nxp,imx8ulp-fspi
- nxp,lx2160a-fspi
- items:
- enum:
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 63dff3e48871b0583be5032ff8fb7260c349a18c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100134-brutishly-unlinked-b242@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
63dff3e48871 ("lsm: add the inode_free_security_rcu() LSM implementation hook")
4de2f084fbff ("ima: Make it independent from 'integrity' LSM")
75a323e604fc ("evm: Make it independent from 'integrity' LSM")
923831117611 ("evm: Move to LSM infrastructure")
84594c9ecdca ("ima: Move IMA-Appraisal to LSM infrastructure")
cd3cec0a02c7 ("ima: Move to LSM infrastructure")
06cca5110774 ("integrity: Move integrity_kernel_module_request() to IMA")
b8d997032a46 ("security: Introduce key_post_create_or_update hook")
2d705d802414 ("security: Introduce inode_post_remove_acl hook")
8b9d0b825c65 ("security: Introduce inode_post_set_acl hook")
a7811e34d100 ("security: Introduce inode_post_create_tmpfile hook")
f09068b5a114 ("security: Introduce file_release hook")
8f46ff5767b0 ("security: Introduce file_post_open hook")
dae52cbf5887 ("security: Introduce inode_post_removexattr hook")
77fa6f314f03 ("security: Introduce inode_post_setattr hook")
314a8dc728d0 ("security: Align inode_setattr hook definition with EVM")
779cb1947e27 ("evm: Align evm_inode_post_setxattr() definition with LSM infrastructure")
2b6a4054f8c2 ("evm: Align evm_inode_setxattr() definition with LSM infrastructure")
784111d0093e ("evm: Align evm_inode_post_setattr() definition with LSM infrastructure")
fec5f85e468d ("ima: Align ima_post_read_file() definition with LSM infrastructure")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 63dff3e48871b0583be5032ff8fb7260c349a18c Mon Sep 17 00:00:00 2001
From: Paul Moore <paul(a)paul-moore.com>
Date: Tue, 9 Jul 2024 19:43:06 -0400
Subject: [PATCH] lsm: add the inode_free_security_rcu() LSM implementation
hook
The LSM framework has an existing inode_free_security() hook which
is used by LSMs that manage state associated with an inode, but
due to the use of RCU to protect the inode, special care must be
taken to ensure that the LSMs do not fully release the inode state
until it is safe from a RCU perspective.
This patch implements a new inode_free_security_rcu() implementation
hook which is called when it is safe to free the LSM's internal inode
state. Unfortunately, this new hook does not have access to the inode
itself as it may already be released, so the existing
inode_free_security() hook is retained for those LSMs which require
access to the inode.
Cc: stable(a)vger.kernel.org
Reported-by: syzbot+5446fbf332b0602ede0b(a)syzkaller.appspotmail.com
Closes: https://lore.kernel.org/r/00000000000076ba3b0617f65cc8@google.com
Signed-off-by: Paul Moore <paul(a)paul-moore.com>
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 63e2656d1d56..520730fe2d94 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -114,6 +114,7 @@ LSM_HOOK(int, 0, path_notify, const struct path *path, u64 mask,
unsigned int obj_type)
LSM_HOOK(int, 0, inode_alloc_security, struct inode *inode)
LSM_HOOK(void, LSM_RET_VOID, inode_free_security, struct inode *inode)
+LSM_HOOK(void, LSM_RET_VOID, inode_free_security_rcu, void *inode_security)
LSM_HOOK(int, -EOPNOTSUPP, inode_init_security, struct inode *inode,
struct inode *dir, const struct qstr *qstr, struct xattr *xattrs,
int *xattr_count)
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index c51e24d24d1e..3c323ca213d4 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -223,7 +223,7 @@ static inline void ima_inode_set_iint(const struct inode *inode,
struct ima_iint_cache *ima_iint_find(struct inode *inode);
struct ima_iint_cache *ima_inode_get(struct inode *inode);
-void ima_inode_free(struct inode *inode);
+void ima_inode_free_rcu(void *inode_security);
void __init ima_iintcache_init(void);
extern const int read_idmap[];
diff --git a/security/integrity/ima/ima_iint.c b/security/integrity/ima/ima_iint.c
index e23412a2c56b..00b249101f98 100644
--- a/security/integrity/ima/ima_iint.c
+++ b/security/integrity/ima/ima_iint.c
@@ -109,22 +109,18 @@ struct ima_iint_cache *ima_inode_get(struct inode *inode)
}
/**
- * ima_inode_free - Called on inode free
- * @inode: Pointer to the inode
+ * ima_inode_free_rcu - Called to free an inode via a RCU callback
+ * @inode_security: The inode->i_security pointer
*
- * Free the iint associated with an inode.
+ * Free the IMA data associated with an inode.
*/
-void ima_inode_free(struct inode *inode)
+void ima_inode_free_rcu(void *inode_security)
{
- struct ima_iint_cache *iint;
+ struct ima_iint_cache **iint_p = inode_security + ima_blob_sizes.lbs_inode;
- if (!IS_IMA(inode))
- return;
-
- iint = ima_iint_find(inode);
- ima_inode_set_iint(inode, NULL);
-
- ima_iint_free(iint);
+ /* *iint_p should be NULL if !IS_IMA(inode) */
+ if (*iint_p)
+ ima_iint_free(*iint_p);
}
static void ima_iint_init_once(void *foo)
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index f04f43af651c..5b3394864b21 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -1193,7 +1193,7 @@ static struct security_hook_list ima_hooks[] __ro_after_init = {
#ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS
LSM_HOOK_INIT(kernel_module_request, ima_kernel_module_request),
#endif
- LSM_HOOK_INIT(inode_free_security, ima_inode_free),
+ LSM_HOOK_INIT(inode_free_security_rcu, ima_inode_free_rcu),
};
static const struct lsm_id ima_lsmid = {
diff --git a/security/landlock/fs.c b/security/landlock/fs.c
index 7877a64cc6b8..0804f76a67be 100644
--- a/security/landlock/fs.c
+++ b/security/landlock/fs.c
@@ -1207,13 +1207,16 @@ static int current_check_refer_path(struct dentry *const old_dentry,
/* Inode hooks */
-static void hook_inode_free_security(struct inode *const inode)
+static void hook_inode_free_security_rcu(void *inode_security)
{
+ struct landlock_inode_security *inode_sec;
+
/*
* All inodes must already have been untied from their object by
* release_inode() or hook_sb_delete().
*/
- WARN_ON_ONCE(landlock_inode(inode)->object);
+ inode_sec = inode_security + landlock_blob_sizes.lbs_inode;
+ WARN_ON_ONCE(inode_sec->object);
}
/* Super-block hooks */
@@ -1637,7 +1640,7 @@ static int hook_file_ioctl_compat(struct file *file, unsigned int cmd,
}
static struct security_hook_list landlock_hooks[] __ro_after_init = {
- LSM_HOOK_INIT(inode_free_security, hook_inode_free_security),
+ LSM_HOOK_INIT(inode_free_security_rcu, hook_inode_free_security_rcu),
LSM_HOOK_INIT(sb_delete, hook_sb_delete),
LSM_HOOK_INIT(sb_mount, hook_sb_mount),
diff --git a/security/security.c b/security/security.c
index b316e6586be2..611d3c124ba6 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1609,9 +1609,8 @@ int security_inode_alloc(struct inode *inode)
static void inode_free_by_rcu(struct rcu_head *head)
{
- /*
- * The rcu head is at the start of the inode blob
- */
+ /* The rcu head is at the start of the inode blob */
+ call_void_hook(inode_free_security_rcu, head);
kmem_cache_free(lsm_inode_cache, head);
}
@@ -1619,23 +1618,24 @@ static void inode_free_by_rcu(struct rcu_head *head)
* security_inode_free() - Free an inode's LSM blob
* @inode: the inode
*
- * Deallocate the inode security structure and set @inode->i_security to NULL.
+ * Release any LSM resources associated with @inode, although due to the
+ * inode's RCU protections it is possible that the resources will not be
+ * fully released until after the current RCU grace period has elapsed.
+ *
+ * It is important for LSMs to note that despite being present in a call to
+ * security_inode_free(), @inode may still be referenced in a VFS path walk
+ * and calls to security_inode_permission() may be made during, or after,
+ * a call to security_inode_free(). For this reason the inode->i_security
+ * field is released via a call_rcu() callback and any LSMs which need to
+ * retain inode state for use in security_inode_permission() should only
+ * release that state in the inode_free_security_rcu() LSM hook callback.
*/
void security_inode_free(struct inode *inode)
{
call_void_hook(inode_free_security, inode);
- /*
- * The inode may still be referenced in a path walk and
- * a call to security_inode_permission() can be made
- * after inode_free_security() is called. Ideally, the VFS
- * wouldn't do this, but fixing that is a much harder
- * job. For now, simply free the i_security via RCU, and
- * leave the current inode->i_security pointer intact.
- * The inode will be freed after the RCU grace period too.
- */
- if (inode->i_security)
- call_rcu((struct rcu_head *)inode->i_security,
- inode_free_by_rcu);
+ if (!inode->i_security)
+ return;
+ call_rcu((struct rcu_head *)inode->i_security, inode_free_by_rcu);
}
/**
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 63dff3e48871b0583be5032ff8fb7260c349a18c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100133-antivirus-stuffing-da34@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
63dff3e48871 ("lsm: add the inode_free_security_rcu() LSM implementation hook")
4de2f084fbff ("ima: Make it independent from 'integrity' LSM")
75a323e604fc ("evm: Make it independent from 'integrity' LSM")
923831117611 ("evm: Move to LSM infrastructure")
84594c9ecdca ("ima: Move IMA-Appraisal to LSM infrastructure")
cd3cec0a02c7 ("ima: Move to LSM infrastructure")
06cca5110774 ("integrity: Move integrity_kernel_module_request() to IMA")
b8d997032a46 ("security: Introduce key_post_create_or_update hook")
2d705d802414 ("security: Introduce inode_post_remove_acl hook")
8b9d0b825c65 ("security: Introduce inode_post_set_acl hook")
a7811e34d100 ("security: Introduce inode_post_create_tmpfile hook")
f09068b5a114 ("security: Introduce file_release hook")
8f46ff5767b0 ("security: Introduce file_post_open hook")
dae52cbf5887 ("security: Introduce inode_post_removexattr hook")
77fa6f314f03 ("security: Introduce inode_post_setattr hook")
314a8dc728d0 ("security: Align inode_setattr hook definition with EVM")
779cb1947e27 ("evm: Align evm_inode_post_setxattr() definition with LSM infrastructure")
2b6a4054f8c2 ("evm: Align evm_inode_setxattr() definition with LSM infrastructure")
784111d0093e ("evm: Align evm_inode_post_setattr() definition with LSM infrastructure")
fec5f85e468d ("ima: Align ima_post_read_file() definition with LSM infrastructure")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 63dff3e48871b0583be5032ff8fb7260c349a18c Mon Sep 17 00:00:00 2001
From: Paul Moore <paul(a)paul-moore.com>
Date: Tue, 9 Jul 2024 19:43:06 -0400
Subject: [PATCH] lsm: add the inode_free_security_rcu() LSM implementation
hook
The LSM framework has an existing inode_free_security() hook which
is used by LSMs that manage state associated with an inode, but
due to the use of RCU to protect the inode, special care must be
taken to ensure that the LSMs do not fully release the inode state
until it is safe from a RCU perspective.
This patch implements a new inode_free_security_rcu() implementation
hook which is called when it is safe to free the LSM's internal inode
state. Unfortunately, this new hook does not have access to the inode
itself as it may already be released, so the existing
inode_free_security() hook is retained for those LSMs which require
access to the inode.
Cc: stable(a)vger.kernel.org
Reported-by: syzbot+5446fbf332b0602ede0b(a)syzkaller.appspotmail.com
Closes: https://lore.kernel.org/r/00000000000076ba3b0617f65cc8@google.com
Signed-off-by: Paul Moore <paul(a)paul-moore.com>
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 63e2656d1d56..520730fe2d94 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -114,6 +114,7 @@ LSM_HOOK(int, 0, path_notify, const struct path *path, u64 mask,
unsigned int obj_type)
LSM_HOOK(int, 0, inode_alloc_security, struct inode *inode)
LSM_HOOK(void, LSM_RET_VOID, inode_free_security, struct inode *inode)
+LSM_HOOK(void, LSM_RET_VOID, inode_free_security_rcu, void *inode_security)
LSM_HOOK(int, -EOPNOTSUPP, inode_init_security, struct inode *inode,
struct inode *dir, const struct qstr *qstr, struct xattr *xattrs,
int *xattr_count)
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index c51e24d24d1e..3c323ca213d4 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -223,7 +223,7 @@ static inline void ima_inode_set_iint(const struct inode *inode,
struct ima_iint_cache *ima_iint_find(struct inode *inode);
struct ima_iint_cache *ima_inode_get(struct inode *inode);
-void ima_inode_free(struct inode *inode);
+void ima_inode_free_rcu(void *inode_security);
void __init ima_iintcache_init(void);
extern const int read_idmap[];
diff --git a/security/integrity/ima/ima_iint.c b/security/integrity/ima/ima_iint.c
index e23412a2c56b..00b249101f98 100644
--- a/security/integrity/ima/ima_iint.c
+++ b/security/integrity/ima/ima_iint.c
@@ -109,22 +109,18 @@ struct ima_iint_cache *ima_inode_get(struct inode *inode)
}
/**
- * ima_inode_free - Called on inode free
- * @inode: Pointer to the inode
+ * ima_inode_free_rcu - Called to free an inode via a RCU callback
+ * @inode_security: The inode->i_security pointer
*
- * Free the iint associated with an inode.
+ * Free the IMA data associated with an inode.
*/
-void ima_inode_free(struct inode *inode)
+void ima_inode_free_rcu(void *inode_security)
{
- struct ima_iint_cache *iint;
+ struct ima_iint_cache **iint_p = inode_security + ima_blob_sizes.lbs_inode;
- if (!IS_IMA(inode))
- return;
-
- iint = ima_iint_find(inode);
- ima_inode_set_iint(inode, NULL);
-
- ima_iint_free(iint);
+ /* *iint_p should be NULL if !IS_IMA(inode) */
+ if (*iint_p)
+ ima_iint_free(*iint_p);
}
static void ima_iint_init_once(void *foo)
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index f04f43af651c..5b3394864b21 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -1193,7 +1193,7 @@ static struct security_hook_list ima_hooks[] __ro_after_init = {
#ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS
LSM_HOOK_INIT(kernel_module_request, ima_kernel_module_request),
#endif
- LSM_HOOK_INIT(inode_free_security, ima_inode_free),
+ LSM_HOOK_INIT(inode_free_security_rcu, ima_inode_free_rcu),
};
static const struct lsm_id ima_lsmid = {
diff --git a/security/landlock/fs.c b/security/landlock/fs.c
index 7877a64cc6b8..0804f76a67be 100644
--- a/security/landlock/fs.c
+++ b/security/landlock/fs.c
@@ -1207,13 +1207,16 @@ static int current_check_refer_path(struct dentry *const old_dentry,
/* Inode hooks */
-static void hook_inode_free_security(struct inode *const inode)
+static void hook_inode_free_security_rcu(void *inode_security)
{
+ struct landlock_inode_security *inode_sec;
+
/*
* All inodes must already have been untied from their object by
* release_inode() or hook_sb_delete().
*/
- WARN_ON_ONCE(landlock_inode(inode)->object);
+ inode_sec = inode_security + landlock_blob_sizes.lbs_inode;
+ WARN_ON_ONCE(inode_sec->object);
}
/* Super-block hooks */
@@ -1637,7 +1640,7 @@ static int hook_file_ioctl_compat(struct file *file, unsigned int cmd,
}
static struct security_hook_list landlock_hooks[] __ro_after_init = {
- LSM_HOOK_INIT(inode_free_security, hook_inode_free_security),
+ LSM_HOOK_INIT(inode_free_security_rcu, hook_inode_free_security_rcu),
LSM_HOOK_INIT(sb_delete, hook_sb_delete),
LSM_HOOK_INIT(sb_mount, hook_sb_mount),
diff --git a/security/security.c b/security/security.c
index b316e6586be2..611d3c124ba6 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1609,9 +1609,8 @@ int security_inode_alloc(struct inode *inode)
static void inode_free_by_rcu(struct rcu_head *head)
{
- /*
- * The rcu head is at the start of the inode blob
- */
+ /* The rcu head is at the start of the inode blob */
+ call_void_hook(inode_free_security_rcu, head);
kmem_cache_free(lsm_inode_cache, head);
}
@@ -1619,23 +1618,24 @@ static void inode_free_by_rcu(struct rcu_head *head)
* security_inode_free() - Free an inode's LSM blob
* @inode: the inode
*
- * Deallocate the inode security structure and set @inode->i_security to NULL.
+ * Release any LSM resources associated with @inode, although due to the
+ * inode's RCU protections it is possible that the resources will not be
+ * fully released until after the current RCU grace period has elapsed.
+ *
+ * It is important for LSMs to note that despite being present in a call to
+ * security_inode_free(), @inode may still be referenced in a VFS path walk
+ * and calls to security_inode_permission() may be made during, or after,
+ * a call to security_inode_free(). For this reason the inode->i_security
+ * field is released via a call_rcu() callback and any LSMs which need to
+ * retain inode state for use in security_inode_permission() should only
+ * release that state in the inode_free_security_rcu() LSM hook callback.
*/
void security_inode_free(struct inode *inode)
{
call_void_hook(inode_free_security, inode);
- /*
- * The inode may still be referenced in a path walk and
- * a call to security_inode_permission() can be made
- * after inode_free_security() is called. Ideally, the VFS
- * wouldn't do this, but fixing that is a much harder
- * job. For now, simply free the i_security via RCU, and
- * leave the current inode->i_security pointer intact.
- * The inode will be freed after the RCU grace period too.
- */
- if (inode->i_security)
- call_rcu((struct rcu_head *)inode->i_security,
- inode_free_by_rcu);
+ if (!inode->i_security)
+ return;
+ call_rcu((struct rcu_head *)inode->i_security, inode_free_by_rcu);
}
/**
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 63dff3e48871b0583be5032ff8fb7260c349a18c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100132-kung-gibberish-1f5b@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
63dff3e48871 ("lsm: add the inode_free_security_rcu() LSM implementation hook")
4de2f084fbff ("ima: Make it independent from 'integrity' LSM")
75a323e604fc ("evm: Make it independent from 'integrity' LSM")
923831117611 ("evm: Move to LSM infrastructure")
84594c9ecdca ("ima: Move IMA-Appraisal to LSM infrastructure")
cd3cec0a02c7 ("ima: Move to LSM infrastructure")
06cca5110774 ("integrity: Move integrity_kernel_module_request() to IMA")
b8d997032a46 ("security: Introduce key_post_create_or_update hook")
2d705d802414 ("security: Introduce inode_post_remove_acl hook")
8b9d0b825c65 ("security: Introduce inode_post_set_acl hook")
a7811e34d100 ("security: Introduce inode_post_create_tmpfile hook")
f09068b5a114 ("security: Introduce file_release hook")
8f46ff5767b0 ("security: Introduce file_post_open hook")
dae52cbf5887 ("security: Introduce inode_post_removexattr hook")
77fa6f314f03 ("security: Introduce inode_post_setattr hook")
314a8dc728d0 ("security: Align inode_setattr hook definition with EVM")
779cb1947e27 ("evm: Align evm_inode_post_setxattr() definition with LSM infrastructure")
2b6a4054f8c2 ("evm: Align evm_inode_setxattr() definition with LSM infrastructure")
784111d0093e ("evm: Align evm_inode_post_setattr() definition with LSM infrastructure")
fec5f85e468d ("ima: Align ima_post_read_file() definition with LSM infrastructure")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 63dff3e48871b0583be5032ff8fb7260c349a18c Mon Sep 17 00:00:00 2001
From: Paul Moore <paul(a)paul-moore.com>
Date: Tue, 9 Jul 2024 19:43:06 -0400
Subject: [PATCH] lsm: add the inode_free_security_rcu() LSM implementation
hook
The LSM framework has an existing inode_free_security() hook which
is used by LSMs that manage state associated with an inode, but
due to the use of RCU to protect the inode, special care must be
taken to ensure that the LSMs do not fully release the inode state
until it is safe from a RCU perspective.
This patch implements a new inode_free_security_rcu() implementation
hook which is called when it is safe to free the LSM's internal inode
state. Unfortunately, this new hook does not have access to the inode
itself as it may already be released, so the existing
inode_free_security() hook is retained for those LSMs which require
access to the inode.
Cc: stable(a)vger.kernel.org
Reported-by: syzbot+5446fbf332b0602ede0b(a)syzkaller.appspotmail.com
Closes: https://lore.kernel.org/r/00000000000076ba3b0617f65cc8@google.com
Signed-off-by: Paul Moore <paul(a)paul-moore.com>
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 63e2656d1d56..520730fe2d94 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -114,6 +114,7 @@ LSM_HOOK(int, 0, path_notify, const struct path *path, u64 mask,
unsigned int obj_type)
LSM_HOOK(int, 0, inode_alloc_security, struct inode *inode)
LSM_HOOK(void, LSM_RET_VOID, inode_free_security, struct inode *inode)
+LSM_HOOK(void, LSM_RET_VOID, inode_free_security_rcu, void *inode_security)
LSM_HOOK(int, -EOPNOTSUPP, inode_init_security, struct inode *inode,
struct inode *dir, const struct qstr *qstr, struct xattr *xattrs,
int *xattr_count)
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index c51e24d24d1e..3c323ca213d4 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -223,7 +223,7 @@ static inline void ima_inode_set_iint(const struct inode *inode,
struct ima_iint_cache *ima_iint_find(struct inode *inode);
struct ima_iint_cache *ima_inode_get(struct inode *inode);
-void ima_inode_free(struct inode *inode);
+void ima_inode_free_rcu(void *inode_security);
void __init ima_iintcache_init(void);
extern const int read_idmap[];
diff --git a/security/integrity/ima/ima_iint.c b/security/integrity/ima/ima_iint.c
index e23412a2c56b..00b249101f98 100644
--- a/security/integrity/ima/ima_iint.c
+++ b/security/integrity/ima/ima_iint.c
@@ -109,22 +109,18 @@ struct ima_iint_cache *ima_inode_get(struct inode *inode)
}
/**
- * ima_inode_free - Called on inode free
- * @inode: Pointer to the inode
+ * ima_inode_free_rcu - Called to free an inode via a RCU callback
+ * @inode_security: The inode->i_security pointer
*
- * Free the iint associated with an inode.
+ * Free the IMA data associated with an inode.
*/
-void ima_inode_free(struct inode *inode)
+void ima_inode_free_rcu(void *inode_security)
{
- struct ima_iint_cache *iint;
+ struct ima_iint_cache **iint_p = inode_security + ima_blob_sizes.lbs_inode;
- if (!IS_IMA(inode))
- return;
-
- iint = ima_iint_find(inode);
- ima_inode_set_iint(inode, NULL);
-
- ima_iint_free(iint);
+ /* *iint_p should be NULL if !IS_IMA(inode) */
+ if (*iint_p)
+ ima_iint_free(*iint_p);
}
static void ima_iint_init_once(void *foo)
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index f04f43af651c..5b3394864b21 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -1193,7 +1193,7 @@ static struct security_hook_list ima_hooks[] __ro_after_init = {
#ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS
LSM_HOOK_INIT(kernel_module_request, ima_kernel_module_request),
#endif
- LSM_HOOK_INIT(inode_free_security, ima_inode_free),
+ LSM_HOOK_INIT(inode_free_security_rcu, ima_inode_free_rcu),
};
static const struct lsm_id ima_lsmid = {
diff --git a/security/landlock/fs.c b/security/landlock/fs.c
index 7877a64cc6b8..0804f76a67be 100644
--- a/security/landlock/fs.c
+++ b/security/landlock/fs.c
@@ -1207,13 +1207,16 @@ static int current_check_refer_path(struct dentry *const old_dentry,
/* Inode hooks */
-static void hook_inode_free_security(struct inode *const inode)
+static void hook_inode_free_security_rcu(void *inode_security)
{
+ struct landlock_inode_security *inode_sec;
+
/*
* All inodes must already have been untied from their object by
* release_inode() or hook_sb_delete().
*/
- WARN_ON_ONCE(landlock_inode(inode)->object);
+ inode_sec = inode_security + landlock_blob_sizes.lbs_inode;
+ WARN_ON_ONCE(inode_sec->object);
}
/* Super-block hooks */
@@ -1637,7 +1640,7 @@ static int hook_file_ioctl_compat(struct file *file, unsigned int cmd,
}
static struct security_hook_list landlock_hooks[] __ro_after_init = {
- LSM_HOOK_INIT(inode_free_security, hook_inode_free_security),
+ LSM_HOOK_INIT(inode_free_security_rcu, hook_inode_free_security_rcu),
LSM_HOOK_INIT(sb_delete, hook_sb_delete),
LSM_HOOK_INIT(sb_mount, hook_sb_mount),
diff --git a/security/security.c b/security/security.c
index b316e6586be2..611d3c124ba6 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1609,9 +1609,8 @@ int security_inode_alloc(struct inode *inode)
static void inode_free_by_rcu(struct rcu_head *head)
{
- /*
- * The rcu head is at the start of the inode blob
- */
+ /* The rcu head is at the start of the inode blob */
+ call_void_hook(inode_free_security_rcu, head);
kmem_cache_free(lsm_inode_cache, head);
}
@@ -1619,23 +1618,24 @@ static void inode_free_by_rcu(struct rcu_head *head)
* security_inode_free() - Free an inode's LSM blob
* @inode: the inode
*
- * Deallocate the inode security structure and set @inode->i_security to NULL.
+ * Release any LSM resources associated with @inode, although due to the
+ * inode's RCU protections it is possible that the resources will not be
+ * fully released until after the current RCU grace period has elapsed.
+ *
+ * It is important for LSMs to note that despite being present in a call to
+ * security_inode_free(), @inode may still be referenced in a VFS path walk
+ * and calls to security_inode_permission() may be made during, or after,
+ * a call to security_inode_free(). For this reason the inode->i_security
+ * field is released via a call_rcu() callback and any LSMs which need to
+ * retain inode state for use in security_inode_permission() should only
+ * release that state in the inode_free_security_rcu() LSM hook callback.
*/
void security_inode_free(struct inode *inode)
{
call_void_hook(inode_free_security, inode);
- /*
- * The inode may still be referenced in a path walk and
- * a call to security_inode_permission() can be made
- * after inode_free_security() is called. Ideally, the VFS
- * wouldn't do this, but fixing that is a much harder
- * job. For now, simply free the i_security via RCU, and
- * leave the current inode->i_security pointer intact.
- * The inode will be freed after the RCU grace period too.
- */
- if (inode->i_security)
- call_rcu((struct rcu_head *)inode->i_security,
- inode_free_by_rcu);
+ if (!inode->i_security)
+ return;
+ call_rcu((struct rcu_head *)inode->i_security, inode_free_by_rcu);
}
/**
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 63dff3e48871b0583be5032ff8fb7260c349a18c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100131-concerned-unashamed-5020@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
63dff3e48871 ("lsm: add the inode_free_security_rcu() LSM implementation hook")
4de2f084fbff ("ima: Make it independent from 'integrity' LSM")
75a323e604fc ("evm: Make it independent from 'integrity' LSM")
923831117611 ("evm: Move to LSM infrastructure")
84594c9ecdca ("ima: Move IMA-Appraisal to LSM infrastructure")
cd3cec0a02c7 ("ima: Move to LSM infrastructure")
06cca5110774 ("integrity: Move integrity_kernel_module_request() to IMA")
b8d997032a46 ("security: Introduce key_post_create_or_update hook")
2d705d802414 ("security: Introduce inode_post_remove_acl hook")
8b9d0b825c65 ("security: Introduce inode_post_set_acl hook")
a7811e34d100 ("security: Introduce inode_post_create_tmpfile hook")
f09068b5a114 ("security: Introduce file_release hook")
8f46ff5767b0 ("security: Introduce file_post_open hook")
dae52cbf5887 ("security: Introduce inode_post_removexattr hook")
77fa6f314f03 ("security: Introduce inode_post_setattr hook")
314a8dc728d0 ("security: Align inode_setattr hook definition with EVM")
779cb1947e27 ("evm: Align evm_inode_post_setxattr() definition with LSM infrastructure")
2b6a4054f8c2 ("evm: Align evm_inode_setxattr() definition with LSM infrastructure")
784111d0093e ("evm: Align evm_inode_post_setattr() definition with LSM infrastructure")
fec5f85e468d ("ima: Align ima_post_read_file() definition with LSM infrastructure")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 63dff3e48871b0583be5032ff8fb7260c349a18c Mon Sep 17 00:00:00 2001
From: Paul Moore <paul(a)paul-moore.com>
Date: Tue, 9 Jul 2024 19:43:06 -0400
Subject: [PATCH] lsm: add the inode_free_security_rcu() LSM implementation
hook
The LSM framework has an existing inode_free_security() hook which
is used by LSMs that manage state associated with an inode, but
due to the use of RCU to protect the inode, special care must be
taken to ensure that the LSMs do not fully release the inode state
until it is safe from a RCU perspective.
This patch implements a new inode_free_security_rcu() implementation
hook which is called when it is safe to free the LSM's internal inode
state. Unfortunately, this new hook does not have access to the inode
itself as it may already be released, so the existing
inode_free_security() hook is retained for those LSMs which require
access to the inode.
Cc: stable(a)vger.kernel.org
Reported-by: syzbot+5446fbf332b0602ede0b(a)syzkaller.appspotmail.com
Closes: https://lore.kernel.org/r/00000000000076ba3b0617f65cc8@google.com
Signed-off-by: Paul Moore <paul(a)paul-moore.com>
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 63e2656d1d56..520730fe2d94 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -114,6 +114,7 @@ LSM_HOOK(int, 0, path_notify, const struct path *path, u64 mask,
unsigned int obj_type)
LSM_HOOK(int, 0, inode_alloc_security, struct inode *inode)
LSM_HOOK(void, LSM_RET_VOID, inode_free_security, struct inode *inode)
+LSM_HOOK(void, LSM_RET_VOID, inode_free_security_rcu, void *inode_security)
LSM_HOOK(int, -EOPNOTSUPP, inode_init_security, struct inode *inode,
struct inode *dir, const struct qstr *qstr, struct xattr *xattrs,
int *xattr_count)
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index c51e24d24d1e..3c323ca213d4 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -223,7 +223,7 @@ static inline void ima_inode_set_iint(const struct inode *inode,
struct ima_iint_cache *ima_iint_find(struct inode *inode);
struct ima_iint_cache *ima_inode_get(struct inode *inode);
-void ima_inode_free(struct inode *inode);
+void ima_inode_free_rcu(void *inode_security);
void __init ima_iintcache_init(void);
extern const int read_idmap[];
diff --git a/security/integrity/ima/ima_iint.c b/security/integrity/ima/ima_iint.c
index e23412a2c56b..00b249101f98 100644
--- a/security/integrity/ima/ima_iint.c
+++ b/security/integrity/ima/ima_iint.c
@@ -109,22 +109,18 @@ struct ima_iint_cache *ima_inode_get(struct inode *inode)
}
/**
- * ima_inode_free - Called on inode free
- * @inode: Pointer to the inode
+ * ima_inode_free_rcu - Called to free an inode via a RCU callback
+ * @inode_security: The inode->i_security pointer
*
- * Free the iint associated with an inode.
+ * Free the IMA data associated with an inode.
*/
-void ima_inode_free(struct inode *inode)
+void ima_inode_free_rcu(void *inode_security)
{
- struct ima_iint_cache *iint;
+ struct ima_iint_cache **iint_p = inode_security + ima_blob_sizes.lbs_inode;
- if (!IS_IMA(inode))
- return;
-
- iint = ima_iint_find(inode);
- ima_inode_set_iint(inode, NULL);
-
- ima_iint_free(iint);
+ /* *iint_p should be NULL if !IS_IMA(inode) */
+ if (*iint_p)
+ ima_iint_free(*iint_p);
}
static void ima_iint_init_once(void *foo)
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index f04f43af651c..5b3394864b21 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -1193,7 +1193,7 @@ static struct security_hook_list ima_hooks[] __ro_after_init = {
#ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS
LSM_HOOK_INIT(kernel_module_request, ima_kernel_module_request),
#endif
- LSM_HOOK_INIT(inode_free_security, ima_inode_free),
+ LSM_HOOK_INIT(inode_free_security_rcu, ima_inode_free_rcu),
};
static const struct lsm_id ima_lsmid = {
diff --git a/security/landlock/fs.c b/security/landlock/fs.c
index 7877a64cc6b8..0804f76a67be 100644
--- a/security/landlock/fs.c
+++ b/security/landlock/fs.c
@@ -1207,13 +1207,16 @@ static int current_check_refer_path(struct dentry *const old_dentry,
/* Inode hooks */
-static void hook_inode_free_security(struct inode *const inode)
+static void hook_inode_free_security_rcu(void *inode_security)
{
+ struct landlock_inode_security *inode_sec;
+
/*
* All inodes must already have been untied from their object by
* release_inode() or hook_sb_delete().
*/
- WARN_ON_ONCE(landlock_inode(inode)->object);
+ inode_sec = inode_security + landlock_blob_sizes.lbs_inode;
+ WARN_ON_ONCE(inode_sec->object);
}
/* Super-block hooks */
@@ -1637,7 +1640,7 @@ static int hook_file_ioctl_compat(struct file *file, unsigned int cmd,
}
static struct security_hook_list landlock_hooks[] __ro_after_init = {
- LSM_HOOK_INIT(inode_free_security, hook_inode_free_security),
+ LSM_HOOK_INIT(inode_free_security_rcu, hook_inode_free_security_rcu),
LSM_HOOK_INIT(sb_delete, hook_sb_delete),
LSM_HOOK_INIT(sb_mount, hook_sb_mount),
diff --git a/security/security.c b/security/security.c
index b316e6586be2..611d3c124ba6 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1609,9 +1609,8 @@ int security_inode_alloc(struct inode *inode)
static void inode_free_by_rcu(struct rcu_head *head)
{
- /*
- * The rcu head is at the start of the inode blob
- */
+ /* The rcu head is at the start of the inode blob */
+ call_void_hook(inode_free_security_rcu, head);
kmem_cache_free(lsm_inode_cache, head);
}
@@ -1619,23 +1618,24 @@ static void inode_free_by_rcu(struct rcu_head *head)
* security_inode_free() - Free an inode's LSM blob
* @inode: the inode
*
- * Deallocate the inode security structure and set @inode->i_security to NULL.
+ * Release any LSM resources associated with @inode, although due to the
+ * inode's RCU protections it is possible that the resources will not be
+ * fully released until after the current RCU grace period has elapsed.
+ *
+ * It is important for LSMs to note that despite being present in a call to
+ * security_inode_free(), @inode may still be referenced in a VFS path walk
+ * and calls to security_inode_permission() may be made during, or after,
+ * a call to security_inode_free(). For this reason the inode->i_security
+ * field is released via a call_rcu() callback and any LSMs which need to
+ * retain inode state for use in security_inode_permission() should only
+ * release that state in the inode_free_security_rcu() LSM hook callback.
*/
void security_inode_free(struct inode *inode)
{
call_void_hook(inode_free_security, inode);
- /*
- * The inode may still be referenced in a path walk and
- * a call to security_inode_permission() can be made
- * after inode_free_security() is called. Ideally, the VFS
- * wouldn't do this, but fixing that is a much harder
- * job. For now, simply free the i_security via RCU, and
- * leave the current inode->i_security pointer intact.
- * The inode will be freed after the RCU grace period too.
- */
- if (inode->i_security)
- call_rcu((struct rcu_head *)inode->i_security,
- inode_free_by_rcu);
+ if (!inode->i_security)
+ return;
+ call_rcu((struct rcu_head *)inode->i_security, inode_free_by_rcu);
}
/**
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 63dff3e48871b0583be5032ff8fb7260c349a18c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100130-superior-viewpoint-e91a@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
63dff3e48871 ("lsm: add the inode_free_security_rcu() LSM implementation hook")
4de2f084fbff ("ima: Make it independent from 'integrity' LSM")
75a323e604fc ("evm: Make it independent from 'integrity' LSM")
923831117611 ("evm: Move to LSM infrastructure")
84594c9ecdca ("ima: Move IMA-Appraisal to LSM infrastructure")
cd3cec0a02c7 ("ima: Move to LSM infrastructure")
06cca5110774 ("integrity: Move integrity_kernel_module_request() to IMA")
b8d997032a46 ("security: Introduce key_post_create_or_update hook")
2d705d802414 ("security: Introduce inode_post_remove_acl hook")
8b9d0b825c65 ("security: Introduce inode_post_set_acl hook")
a7811e34d100 ("security: Introduce inode_post_create_tmpfile hook")
f09068b5a114 ("security: Introduce file_release hook")
8f46ff5767b0 ("security: Introduce file_post_open hook")
dae52cbf5887 ("security: Introduce inode_post_removexattr hook")
77fa6f314f03 ("security: Introduce inode_post_setattr hook")
314a8dc728d0 ("security: Align inode_setattr hook definition with EVM")
779cb1947e27 ("evm: Align evm_inode_post_setxattr() definition with LSM infrastructure")
2b6a4054f8c2 ("evm: Align evm_inode_setxattr() definition with LSM infrastructure")
784111d0093e ("evm: Align evm_inode_post_setattr() definition with LSM infrastructure")
fec5f85e468d ("ima: Align ima_post_read_file() definition with LSM infrastructure")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 63dff3e48871b0583be5032ff8fb7260c349a18c Mon Sep 17 00:00:00 2001
From: Paul Moore <paul(a)paul-moore.com>
Date: Tue, 9 Jul 2024 19:43:06 -0400
Subject: [PATCH] lsm: add the inode_free_security_rcu() LSM implementation
hook
The LSM framework has an existing inode_free_security() hook which
is used by LSMs that manage state associated with an inode, but
due to the use of RCU to protect the inode, special care must be
taken to ensure that the LSMs do not fully release the inode state
until it is safe from a RCU perspective.
This patch implements a new inode_free_security_rcu() implementation
hook which is called when it is safe to free the LSM's internal inode
state. Unfortunately, this new hook does not have access to the inode
itself as it may already be released, so the existing
inode_free_security() hook is retained for those LSMs which require
access to the inode.
Cc: stable(a)vger.kernel.org
Reported-by: syzbot+5446fbf332b0602ede0b(a)syzkaller.appspotmail.com
Closes: https://lore.kernel.org/r/00000000000076ba3b0617f65cc8@google.com
Signed-off-by: Paul Moore <paul(a)paul-moore.com>
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 63e2656d1d56..520730fe2d94 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -114,6 +114,7 @@ LSM_HOOK(int, 0, path_notify, const struct path *path, u64 mask,
unsigned int obj_type)
LSM_HOOK(int, 0, inode_alloc_security, struct inode *inode)
LSM_HOOK(void, LSM_RET_VOID, inode_free_security, struct inode *inode)
+LSM_HOOK(void, LSM_RET_VOID, inode_free_security_rcu, void *inode_security)
LSM_HOOK(int, -EOPNOTSUPP, inode_init_security, struct inode *inode,
struct inode *dir, const struct qstr *qstr, struct xattr *xattrs,
int *xattr_count)
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index c51e24d24d1e..3c323ca213d4 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -223,7 +223,7 @@ static inline void ima_inode_set_iint(const struct inode *inode,
struct ima_iint_cache *ima_iint_find(struct inode *inode);
struct ima_iint_cache *ima_inode_get(struct inode *inode);
-void ima_inode_free(struct inode *inode);
+void ima_inode_free_rcu(void *inode_security);
void __init ima_iintcache_init(void);
extern const int read_idmap[];
diff --git a/security/integrity/ima/ima_iint.c b/security/integrity/ima/ima_iint.c
index e23412a2c56b..00b249101f98 100644
--- a/security/integrity/ima/ima_iint.c
+++ b/security/integrity/ima/ima_iint.c
@@ -109,22 +109,18 @@ struct ima_iint_cache *ima_inode_get(struct inode *inode)
}
/**
- * ima_inode_free - Called on inode free
- * @inode: Pointer to the inode
+ * ima_inode_free_rcu - Called to free an inode via a RCU callback
+ * @inode_security: The inode->i_security pointer
*
- * Free the iint associated with an inode.
+ * Free the IMA data associated with an inode.
*/
-void ima_inode_free(struct inode *inode)
+void ima_inode_free_rcu(void *inode_security)
{
- struct ima_iint_cache *iint;
+ struct ima_iint_cache **iint_p = inode_security + ima_blob_sizes.lbs_inode;
- if (!IS_IMA(inode))
- return;
-
- iint = ima_iint_find(inode);
- ima_inode_set_iint(inode, NULL);
-
- ima_iint_free(iint);
+ /* *iint_p should be NULL if !IS_IMA(inode) */
+ if (*iint_p)
+ ima_iint_free(*iint_p);
}
static void ima_iint_init_once(void *foo)
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index f04f43af651c..5b3394864b21 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -1193,7 +1193,7 @@ static struct security_hook_list ima_hooks[] __ro_after_init = {
#ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS
LSM_HOOK_INIT(kernel_module_request, ima_kernel_module_request),
#endif
- LSM_HOOK_INIT(inode_free_security, ima_inode_free),
+ LSM_HOOK_INIT(inode_free_security_rcu, ima_inode_free_rcu),
};
static const struct lsm_id ima_lsmid = {
diff --git a/security/landlock/fs.c b/security/landlock/fs.c
index 7877a64cc6b8..0804f76a67be 100644
--- a/security/landlock/fs.c
+++ b/security/landlock/fs.c
@@ -1207,13 +1207,16 @@ static int current_check_refer_path(struct dentry *const old_dentry,
/* Inode hooks */
-static void hook_inode_free_security(struct inode *const inode)
+static void hook_inode_free_security_rcu(void *inode_security)
{
+ struct landlock_inode_security *inode_sec;
+
/*
* All inodes must already have been untied from their object by
* release_inode() or hook_sb_delete().
*/
- WARN_ON_ONCE(landlock_inode(inode)->object);
+ inode_sec = inode_security + landlock_blob_sizes.lbs_inode;
+ WARN_ON_ONCE(inode_sec->object);
}
/* Super-block hooks */
@@ -1637,7 +1640,7 @@ static int hook_file_ioctl_compat(struct file *file, unsigned int cmd,
}
static struct security_hook_list landlock_hooks[] __ro_after_init = {
- LSM_HOOK_INIT(inode_free_security, hook_inode_free_security),
+ LSM_HOOK_INIT(inode_free_security_rcu, hook_inode_free_security_rcu),
LSM_HOOK_INIT(sb_delete, hook_sb_delete),
LSM_HOOK_INIT(sb_mount, hook_sb_mount),
diff --git a/security/security.c b/security/security.c
index b316e6586be2..611d3c124ba6 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1609,9 +1609,8 @@ int security_inode_alloc(struct inode *inode)
static void inode_free_by_rcu(struct rcu_head *head)
{
- /*
- * The rcu head is at the start of the inode blob
- */
+ /* The rcu head is at the start of the inode blob */
+ call_void_hook(inode_free_security_rcu, head);
kmem_cache_free(lsm_inode_cache, head);
}
@@ -1619,23 +1618,24 @@ static void inode_free_by_rcu(struct rcu_head *head)
* security_inode_free() - Free an inode's LSM blob
* @inode: the inode
*
- * Deallocate the inode security structure and set @inode->i_security to NULL.
+ * Release any LSM resources associated with @inode, although due to the
+ * inode's RCU protections it is possible that the resources will not be
+ * fully released until after the current RCU grace period has elapsed.
+ *
+ * It is important for LSMs to note that despite being present in a call to
+ * security_inode_free(), @inode may still be referenced in a VFS path walk
+ * and calls to security_inode_permission() may be made during, or after,
+ * a call to security_inode_free(). For this reason the inode->i_security
+ * field is released via a call_rcu() callback and any LSMs which need to
+ * retain inode state for use in security_inode_permission() should only
+ * release that state in the inode_free_security_rcu() LSM hook callback.
*/
void security_inode_free(struct inode *inode)
{
call_void_hook(inode_free_security, inode);
- /*
- * The inode may still be referenced in a path walk and
- * a call to security_inode_permission() can be made
- * after inode_free_security() is called. Ideally, the VFS
- * wouldn't do this, but fixing that is a much harder
- * job. For now, simply free the i_security via RCU, and
- * leave the current inode->i_security pointer intact.
- * The inode will be freed after the RCU grace period too.
- */
- if (inode->i_security)
- call_rcu((struct rcu_head *)inode->i_security,
- inode_free_by_rcu);
+ if (!inode->i_security)
+ return;
+ call_rcu((struct rcu_head *)inode->i_security, inode_free_by_rcu);
}
/**
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 63dff3e48871b0583be5032ff8fb7260c349a18c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100129-undertook-rebuff-1fdd@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
63dff3e48871 ("lsm: add the inode_free_security_rcu() LSM implementation hook")
4de2f084fbff ("ima: Make it independent from 'integrity' LSM")
75a323e604fc ("evm: Make it independent from 'integrity' LSM")
923831117611 ("evm: Move to LSM infrastructure")
84594c9ecdca ("ima: Move IMA-Appraisal to LSM infrastructure")
cd3cec0a02c7 ("ima: Move to LSM infrastructure")
06cca5110774 ("integrity: Move integrity_kernel_module_request() to IMA")
b8d997032a46 ("security: Introduce key_post_create_or_update hook")
2d705d802414 ("security: Introduce inode_post_remove_acl hook")
8b9d0b825c65 ("security: Introduce inode_post_set_acl hook")
a7811e34d100 ("security: Introduce inode_post_create_tmpfile hook")
f09068b5a114 ("security: Introduce file_release hook")
8f46ff5767b0 ("security: Introduce file_post_open hook")
dae52cbf5887 ("security: Introduce inode_post_removexattr hook")
77fa6f314f03 ("security: Introduce inode_post_setattr hook")
314a8dc728d0 ("security: Align inode_setattr hook definition with EVM")
779cb1947e27 ("evm: Align evm_inode_post_setxattr() definition with LSM infrastructure")
2b6a4054f8c2 ("evm: Align evm_inode_setxattr() definition with LSM infrastructure")
784111d0093e ("evm: Align evm_inode_post_setattr() definition with LSM infrastructure")
fec5f85e468d ("ima: Align ima_post_read_file() definition with LSM infrastructure")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 63dff3e48871b0583be5032ff8fb7260c349a18c Mon Sep 17 00:00:00 2001
From: Paul Moore <paul(a)paul-moore.com>
Date: Tue, 9 Jul 2024 19:43:06 -0400
Subject: [PATCH] lsm: add the inode_free_security_rcu() LSM implementation
hook
The LSM framework has an existing inode_free_security() hook which
is used by LSMs that manage state associated with an inode, but
due to the use of RCU to protect the inode, special care must be
taken to ensure that the LSMs do not fully release the inode state
until it is safe from a RCU perspective.
This patch implements a new inode_free_security_rcu() implementation
hook which is called when it is safe to free the LSM's internal inode
state. Unfortunately, this new hook does not have access to the inode
itself as it may already be released, so the existing
inode_free_security() hook is retained for those LSMs which require
access to the inode.
Cc: stable(a)vger.kernel.org
Reported-by: syzbot+5446fbf332b0602ede0b(a)syzkaller.appspotmail.com
Closes: https://lore.kernel.org/r/00000000000076ba3b0617f65cc8@google.com
Signed-off-by: Paul Moore <paul(a)paul-moore.com>
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 63e2656d1d56..520730fe2d94 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -114,6 +114,7 @@ LSM_HOOK(int, 0, path_notify, const struct path *path, u64 mask,
unsigned int obj_type)
LSM_HOOK(int, 0, inode_alloc_security, struct inode *inode)
LSM_HOOK(void, LSM_RET_VOID, inode_free_security, struct inode *inode)
+LSM_HOOK(void, LSM_RET_VOID, inode_free_security_rcu, void *inode_security)
LSM_HOOK(int, -EOPNOTSUPP, inode_init_security, struct inode *inode,
struct inode *dir, const struct qstr *qstr, struct xattr *xattrs,
int *xattr_count)
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index c51e24d24d1e..3c323ca213d4 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -223,7 +223,7 @@ static inline void ima_inode_set_iint(const struct inode *inode,
struct ima_iint_cache *ima_iint_find(struct inode *inode);
struct ima_iint_cache *ima_inode_get(struct inode *inode);
-void ima_inode_free(struct inode *inode);
+void ima_inode_free_rcu(void *inode_security);
void __init ima_iintcache_init(void);
extern const int read_idmap[];
diff --git a/security/integrity/ima/ima_iint.c b/security/integrity/ima/ima_iint.c
index e23412a2c56b..00b249101f98 100644
--- a/security/integrity/ima/ima_iint.c
+++ b/security/integrity/ima/ima_iint.c
@@ -109,22 +109,18 @@ struct ima_iint_cache *ima_inode_get(struct inode *inode)
}
/**
- * ima_inode_free - Called on inode free
- * @inode: Pointer to the inode
+ * ima_inode_free_rcu - Called to free an inode via a RCU callback
+ * @inode_security: The inode->i_security pointer
*
- * Free the iint associated with an inode.
+ * Free the IMA data associated with an inode.
*/
-void ima_inode_free(struct inode *inode)
+void ima_inode_free_rcu(void *inode_security)
{
- struct ima_iint_cache *iint;
+ struct ima_iint_cache **iint_p = inode_security + ima_blob_sizes.lbs_inode;
- if (!IS_IMA(inode))
- return;
-
- iint = ima_iint_find(inode);
- ima_inode_set_iint(inode, NULL);
-
- ima_iint_free(iint);
+ /* *iint_p should be NULL if !IS_IMA(inode) */
+ if (*iint_p)
+ ima_iint_free(*iint_p);
}
static void ima_iint_init_once(void *foo)
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index f04f43af651c..5b3394864b21 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -1193,7 +1193,7 @@ static struct security_hook_list ima_hooks[] __ro_after_init = {
#ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS
LSM_HOOK_INIT(kernel_module_request, ima_kernel_module_request),
#endif
- LSM_HOOK_INIT(inode_free_security, ima_inode_free),
+ LSM_HOOK_INIT(inode_free_security_rcu, ima_inode_free_rcu),
};
static const struct lsm_id ima_lsmid = {
diff --git a/security/landlock/fs.c b/security/landlock/fs.c
index 7877a64cc6b8..0804f76a67be 100644
--- a/security/landlock/fs.c
+++ b/security/landlock/fs.c
@@ -1207,13 +1207,16 @@ static int current_check_refer_path(struct dentry *const old_dentry,
/* Inode hooks */
-static void hook_inode_free_security(struct inode *const inode)
+static void hook_inode_free_security_rcu(void *inode_security)
{
+ struct landlock_inode_security *inode_sec;
+
/*
* All inodes must already have been untied from their object by
* release_inode() or hook_sb_delete().
*/
- WARN_ON_ONCE(landlock_inode(inode)->object);
+ inode_sec = inode_security + landlock_blob_sizes.lbs_inode;
+ WARN_ON_ONCE(inode_sec->object);
}
/* Super-block hooks */
@@ -1637,7 +1640,7 @@ static int hook_file_ioctl_compat(struct file *file, unsigned int cmd,
}
static struct security_hook_list landlock_hooks[] __ro_after_init = {
- LSM_HOOK_INIT(inode_free_security, hook_inode_free_security),
+ LSM_HOOK_INIT(inode_free_security_rcu, hook_inode_free_security_rcu),
LSM_HOOK_INIT(sb_delete, hook_sb_delete),
LSM_HOOK_INIT(sb_mount, hook_sb_mount),
diff --git a/security/security.c b/security/security.c
index b316e6586be2..611d3c124ba6 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1609,9 +1609,8 @@ int security_inode_alloc(struct inode *inode)
static void inode_free_by_rcu(struct rcu_head *head)
{
- /*
- * The rcu head is at the start of the inode blob
- */
+ /* The rcu head is at the start of the inode blob */
+ call_void_hook(inode_free_security_rcu, head);
kmem_cache_free(lsm_inode_cache, head);
}
@@ -1619,23 +1618,24 @@ static void inode_free_by_rcu(struct rcu_head *head)
* security_inode_free() - Free an inode's LSM blob
* @inode: the inode
*
- * Deallocate the inode security structure and set @inode->i_security to NULL.
+ * Release any LSM resources associated with @inode, although due to the
+ * inode's RCU protections it is possible that the resources will not be
+ * fully released until after the current RCU grace period has elapsed.
+ *
+ * It is important for LSMs to note that despite being present in a call to
+ * security_inode_free(), @inode may still be referenced in a VFS path walk
+ * and calls to security_inode_permission() may be made during, or after,
+ * a call to security_inode_free(). For this reason the inode->i_security
+ * field is released via a call_rcu() callback and any LSMs which need to
+ * retain inode state for use in security_inode_permission() should only
+ * release that state in the inode_free_security_rcu() LSM hook callback.
*/
void security_inode_free(struct inode *inode)
{
call_void_hook(inode_free_security, inode);
- /*
- * The inode may still be referenced in a path walk and
- * a call to security_inode_permission() can be made
- * after inode_free_security() is called. Ideally, the VFS
- * wouldn't do this, but fixing that is a much harder
- * job. For now, simply free the i_security via RCU, and
- * leave the current inode->i_security pointer intact.
- * The inode will be freed after the RCU grace period too.
- */
- if (inode->i_security)
- call_rcu((struct rcu_head *)inode->i_security,
- inode_free_by_rcu);
+ if (!inode->i_security)
+ return;
+ call_rcu((struct rcu_head *)inode->i_security, inode_free_by_rcu);
}
/**
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 9a22b2812393d93d84358a760c347c21939029a6
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100110-smokiness-detract-b410@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
9a22b2812393 ("padata: use integer wrap around to prevent deadlock on seq_nr overflow")
57ddfecc72a6 ("padata: Fix list iterator in padata_do_serial()")
f601c725a6ac ("padata: remove padata_parallel_queue")
4611ce224688 ("padata: allocate work structures for parallel jobs from a pool")
f1b192b117cd ("padata: initialize earlier")
305dacf77952 ("padata: remove exit routine")
bfcdcef8c8e3 ("padata: update documentation")
3facced7aeed ("padata: remove reorder_objects")
91a71d612128 ("padata: remove cpumask change notifier")
894c9ef9780c ("padata: validate cpumask without removed CPU during offline")
bbefa1dd6a6d ("crypto: pcrypt - Avoid deadlock by using per-instance padata queues")
07928d9bfc81 ("padata: Remove broken queue flushing")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9a22b2812393d93d84358a760c347c21939029a6 Mon Sep 17 00:00:00 2001
From: VanGiang Nguyen <vangiang.nguyen(a)rohde-schwarz.com>
Date: Fri, 9 Aug 2024 06:21:42 +0000
Subject: [PATCH] padata: use integer wrap around to prevent deadlock on seq_nr
overflow
When submitting more than 2^32 padata objects to padata_do_serial, the
current sorting implementation incorrectly sorts padata objects with
overflowed seq_nr, causing them to be placed before existing objects in
the reorder list. This leads to a deadlock in the serialization process
as padata_find_next cannot match padata->seq_nr and pd->processed
because the padata instance with overflowed seq_nr will be selected
next.
To fix this, we use an unsigned integer wrap around to correctly sort
padata objects in scenarios with integer overflow.
Fixes: bfde23ce200e ("padata: unbind parallel jobs from specific CPUs")
Cc: <stable(a)vger.kernel.org>
Co-developed-by: Christian Gafert <christian.gafert(a)rohde-schwarz.com>
Signed-off-by: Christian Gafert <christian.gafert(a)rohde-schwarz.com>
Co-developed-by: Max Ferger <max.ferger(a)rohde-schwarz.com>
Signed-off-by: Max Ferger <max.ferger(a)rohde-schwarz.com>
Signed-off-by: Van Giang Nguyen <vangiang.nguyen(a)rohde-schwarz.com>
Acked-by: Daniel Jordan <daniel.m.jordan(a)oracle.com>
Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
diff --git a/kernel/padata.c b/kernel/padata.c
index 53f4bc912712..222bccd0c96b 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -404,7 +404,8 @@ void padata_do_serial(struct padata_priv *padata)
/* Sort in ascending order of sequence number. */
list_for_each_prev(pos, &reorder->list) {
cur = list_entry(pos, struct padata_priv, list);
- if (cur->seq_nr < padata->seq_nr)
+ /* Compare by difference to consider integer wrap around */
+ if ((signed int)(cur->seq_nr - padata->seq_nr) < 0)
break;
}
list_add(&padata->list, pos);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x a309320ddbac6b1583224fcb6bacd424bcf8637f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100154-maternity-snowfield-cbf4@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
a309320ddbac ("cpuidle: riscv-sbi: Use scoped device node handling to fix missing of_node_put")
24760e43c6a6 ("cpuidle: Adjust includes to remove of_device.h")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a309320ddbac6b1583224fcb6bacd424bcf8637f Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Date: Tue, 20 Aug 2024 11:40:22 +0200
Subject: [PATCH] cpuidle: riscv-sbi: Use scoped device node handling to fix
missing of_node_put
Two return statements in sbi_cpuidle_dt_init_states() did not drop the
OF node reference count. Solve the issue and simplify entire error
handling with scoped/cleanup.h.
Fixes: 6abf32f1d9c5 ("cpuidle: Add RISC-V SBI CPU idle driver")
Cc: All applicable <stable(a)vger.kernel.org>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Reviewed-by: Anup Patel <anup(a)brainfault.org>
Link: https://patch.msgid.link/20240820094023.61155-1-krzysztof.kozlowski@linaro.…
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
diff --git a/drivers/cpuidle/cpuidle-riscv-sbi.c b/drivers/cpuidle/cpuidle-riscv-sbi.c
index a6e123dfe394..5bb3401220d2 100644
--- a/drivers/cpuidle/cpuidle-riscv-sbi.c
+++ b/drivers/cpuidle/cpuidle-riscv-sbi.c
@@ -8,6 +8,7 @@
#define pr_fmt(fmt) "cpuidle-riscv-sbi: " fmt
+#include <linux/cleanup.h>
#include <linux/cpuhotplug.h>
#include <linux/cpuidle.h>
#include <linux/cpumask.h>
@@ -236,19 +237,16 @@ static int sbi_cpuidle_dt_init_states(struct device *dev,
{
struct sbi_cpuidle_data *data = per_cpu_ptr(&sbi_cpuidle_data, cpu);
struct device_node *state_node;
- struct device_node *cpu_node;
u32 *states;
int i, ret;
- cpu_node = of_cpu_device_node_get(cpu);
+ struct device_node *cpu_node __free(device_node) = of_cpu_device_node_get(cpu);
if (!cpu_node)
return -ENODEV;
states = devm_kcalloc(dev, state_count, sizeof(*states), GFP_KERNEL);
- if (!states) {
- ret = -ENOMEM;
- goto fail;
- }
+ if (!states)
+ return -ENOMEM;
/* Parse SBI specific details from state DT nodes */
for (i = 1; i < state_count; i++) {
@@ -264,10 +262,8 @@ static int sbi_cpuidle_dt_init_states(struct device *dev,
pr_debug("sbi-state %#x index %d\n", states[i], i);
}
- if (i != state_count) {
- ret = -ENODEV;
- goto fail;
- }
+ if (i != state_count)
+ return -ENODEV;
/* Initialize optional data, used for the hierarchical topology. */
ret = sbi_dt_cpu_init_topology(drv, data, state_count, cpu);
@@ -277,10 +273,7 @@ static int sbi_cpuidle_dt_init_states(struct device *dev,
/* Store states in the per-cpu struct. */
data->states = states;
-fail:
- of_node_put(cpu_node);
-
- return ret;
+ return 0;
}
static void sbi_cpuidle_deinit_cpu(int cpu)
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 4f5a100f87f32cb65d4bb1ad282a08c92f6f591e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100136-reanalyze-footwork-21f0@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
4f5a100f87f3 ("f2fs: Require FMODE_WRITE for atomic write ioctls")
01beba7957a2 ("fs: port inode_owner_or_capable() to mnt_idmap")
f2d40141d5d9 ("fs: port inode_init_owner() to mnt_idmap")
4609e1f18e19 ("fs: port ->permission() to pass mnt_idmap")
8782a9aea3ab ("fs: port ->fileattr_set() to pass mnt_idmap")
13e83a4923be ("fs: port ->set_acl() to pass mnt_idmap")
77435322777d ("fs: port ->get_acl() to pass mnt_idmap")
011e2b717b1b ("fs: port ->tmpfile() to pass mnt_idmap")
5ebb29bee8d5 ("fs: port ->mknod() to pass mnt_idmap")
c54bd91e9eab ("fs: port ->mkdir() to pass mnt_idmap")
7a77db95511c ("fs: port ->symlink() to pass mnt_idmap")
6c960e68aaed ("fs: port ->create() to pass mnt_idmap")
b74d24f7a74f ("fs: port ->getattr() to pass mnt_idmap")
c1632a0f1120 ("fs: port ->setattr() to pass mnt_idmap")
abf08576afe3 ("fs: port vfs_*() helpers to struct mnt_idmap")
6022ec6ee2c3 ("Merge tag 'ntfs3_for_6.2' of https://github.com/Paragon-Software-Group/linux-ntfs3")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4f5a100f87f32cb65d4bb1ad282a08c92f6f591e Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Tue, 6 Aug 2024 16:07:16 +0200
Subject: [PATCH] f2fs: Require FMODE_WRITE for atomic write ioctls
The F2FS ioctls for starting and committing atomic writes check for
inode_owner_or_capable(), but this does not give LSMs like SELinux or
Landlock an opportunity to deny the write access - if the caller's FSUID
matches the inode's UID, inode_owner_or_capable() immediately returns true.
There are scenarios where LSMs want to deny a process the ability to write
particular files, even files that the FSUID of the process owns; but this
can currently partially be bypassed using atomic write ioctls in two ways:
- F2FS_IOC_START_ATOMIC_REPLACE + F2FS_IOC_COMMIT_ATOMIC_WRITE can
truncate an inode to size 0
- F2FS_IOC_START_ATOMIC_WRITE + F2FS_IOC_ABORT_ATOMIC_WRITE can revert
changes another process concurrently made to a file
Fix it by requiring FMODE_WRITE for these operations, just like for
F2FS_IOC_MOVE_RANGE. Since any legitimate caller should only be using these
ioctls when intending to write into the file, that seems unlikely to break
anything.
Fixes: 88b88a667971 ("f2fs: support atomic writes")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jann Horn <jannh(a)google.com>
Reviewed-by: Chao Yu <chao(a)kernel.org>
Reviewed-by: Eric Biggers <ebiggers(a)google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk(a)kernel.org>
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index bddcb2cd945a..2c591fbc75a9 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2131,6 +2131,9 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
loff_t isize;
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(idmap, inode))
return -EACCES;
@@ -2239,6 +2242,9 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
struct mnt_idmap *idmap = file_mnt_idmap(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(idmap, inode))
return -EACCES;
@@ -2271,6 +2277,9 @@ static int f2fs_ioc_abort_atomic_write(struct file *filp)
struct mnt_idmap *idmap = file_mnt_idmap(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(idmap, inode))
return -EACCES;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 4f5a100f87f32cb65d4bb1ad282a08c92f6f591e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100135-magenta-resigned-365f@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
4f5a100f87f3 ("f2fs: Require FMODE_WRITE for atomic write ioctls")
01beba7957a2 ("fs: port inode_owner_or_capable() to mnt_idmap")
f2d40141d5d9 ("fs: port inode_init_owner() to mnt_idmap")
4609e1f18e19 ("fs: port ->permission() to pass mnt_idmap")
8782a9aea3ab ("fs: port ->fileattr_set() to pass mnt_idmap")
13e83a4923be ("fs: port ->set_acl() to pass mnt_idmap")
77435322777d ("fs: port ->get_acl() to pass mnt_idmap")
011e2b717b1b ("fs: port ->tmpfile() to pass mnt_idmap")
5ebb29bee8d5 ("fs: port ->mknod() to pass mnt_idmap")
c54bd91e9eab ("fs: port ->mkdir() to pass mnt_idmap")
7a77db95511c ("fs: port ->symlink() to pass mnt_idmap")
6c960e68aaed ("fs: port ->create() to pass mnt_idmap")
b74d24f7a74f ("fs: port ->getattr() to pass mnt_idmap")
c1632a0f1120 ("fs: port ->setattr() to pass mnt_idmap")
abf08576afe3 ("fs: port vfs_*() helpers to struct mnt_idmap")
6022ec6ee2c3 ("Merge tag 'ntfs3_for_6.2' of https://github.com/Paragon-Software-Group/linux-ntfs3")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4f5a100f87f32cb65d4bb1ad282a08c92f6f591e Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Tue, 6 Aug 2024 16:07:16 +0200
Subject: [PATCH] f2fs: Require FMODE_WRITE for atomic write ioctls
The F2FS ioctls for starting and committing atomic writes check for
inode_owner_or_capable(), but this does not give LSMs like SELinux or
Landlock an opportunity to deny the write access - if the caller's FSUID
matches the inode's UID, inode_owner_or_capable() immediately returns true.
There are scenarios where LSMs want to deny a process the ability to write
particular files, even files that the FSUID of the process owns; but this
can currently partially be bypassed using atomic write ioctls in two ways:
- F2FS_IOC_START_ATOMIC_REPLACE + F2FS_IOC_COMMIT_ATOMIC_WRITE can
truncate an inode to size 0
- F2FS_IOC_START_ATOMIC_WRITE + F2FS_IOC_ABORT_ATOMIC_WRITE can revert
changes another process concurrently made to a file
Fix it by requiring FMODE_WRITE for these operations, just like for
F2FS_IOC_MOVE_RANGE. Since any legitimate caller should only be using these
ioctls when intending to write into the file, that seems unlikely to break
anything.
Fixes: 88b88a667971 ("f2fs: support atomic writes")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jann Horn <jannh(a)google.com>
Reviewed-by: Chao Yu <chao(a)kernel.org>
Reviewed-by: Eric Biggers <ebiggers(a)google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk(a)kernel.org>
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index bddcb2cd945a..2c591fbc75a9 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2131,6 +2131,9 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
loff_t isize;
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(idmap, inode))
return -EACCES;
@@ -2239,6 +2242,9 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
struct mnt_idmap *idmap = file_mnt_idmap(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(idmap, inode))
return -EACCES;
@@ -2271,6 +2277,9 @@ static int f2fs_ioc_abort_atomic_write(struct file *filp)
struct mnt_idmap *idmap = file_mnt_idmap(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(idmap, inode))
return -EACCES;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 4f5a100f87f32cb65d4bb1ad282a08c92f6f591e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100134-blustery-wisplike-6b11@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
4f5a100f87f3 ("f2fs: Require FMODE_WRITE for atomic write ioctls")
01beba7957a2 ("fs: port inode_owner_or_capable() to mnt_idmap")
f2d40141d5d9 ("fs: port inode_init_owner() to mnt_idmap")
4609e1f18e19 ("fs: port ->permission() to pass mnt_idmap")
8782a9aea3ab ("fs: port ->fileattr_set() to pass mnt_idmap")
13e83a4923be ("fs: port ->set_acl() to pass mnt_idmap")
77435322777d ("fs: port ->get_acl() to pass mnt_idmap")
011e2b717b1b ("fs: port ->tmpfile() to pass mnt_idmap")
5ebb29bee8d5 ("fs: port ->mknod() to pass mnt_idmap")
c54bd91e9eab ("fs: port ->mkdir() to pass mnt_idmap")
7a77db95511c ("fs: port ->symlink() to pass mnt_idmap")
6c960e68aaed ("fs: port ->create() to pass mnt_idmap")
b74d24f7a74f ("fs: port ->getattr() to pass mnt_idmap")
c1632a0f1120 ("fs: port ->setattr() to pass mnt_idmap")
abf08576afe3 ("fs: port vfs_*() helpers to struct mnt_idmap")
6022ec6ee2c3 ("Merge tag 'ntfs3_for_6.2' of https://github.com/Paragon-Software-Group/linux-ntfs3")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4f5a100f87f32cb65d4bb1ad282a08c92f6f591e Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Tue, 6 Aug 2024 16:07:16 +0200
Subject: [PATCH] f2fs: Require FMODE_WRITE for atomic write ioctls
The F2FS ioctls for starting and committing atomic writes check for
inode_owner_or_capable(), but this does not give LSMs like SELinux or
Landlock an opportunity to deny the write access - if the caller's FSUID
matches the inode's UID, inode_owner_or_capable() immediately returns true.
There are scenarios where LSMs want to deny a process the ability to write
particular files, even files that the FSUID of the process owns; but this
can currently partially be bypassed using atomic write ioctls in two ways:
- F2FS_IOC_START_ATOMIC_REPLACE + F2FS_IOC_COMMIT_ATOMIC_WRITE can
truncate an inode to size 0
- F2FS_IOC_START_ATOMIC_WRITE + F2FS_IOC_ABORT_ATOMIC_WRITE can revert
changes another process concurrently made to a file
Fix it by requiring FMODE_WRITE for these operations, just like for
F2FS_IOC_MOVE_RANGE. Since any legitimate caller should only be using these
ioctls when intending to write into the file, that seems unlikely to break
anything.
Fixes: 88b88a667971 ("f2fs: support atomic writes")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jann Horn <jannh(a)google.com>
Reviewed-by: Chao Yu <chao(a)kernel.org>
Reviewed-by: Eric Biggers <ebiggers(a)google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk(a)kernel.org>
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index bddcb2cd945a..2c591fbc75a9 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2131,6 +2131,9 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
loff_t isize;
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(idmap, inode))
return -EACCES;
@@ -2239,6 +2242,9 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
struct mnt_idmap *idmap = file_mnt_idmap(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(idmap, inode))
return -EACCES;
@@ -2271,6 +2277,9 @@ static int f2fs_ioc_abort_atomic_write(struct file *filp)
struct mnt_idmap *idmap = file_mnt_idmap(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(idmap, inode))
return -EACCES;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 4f5a100f87f32cb65d4bb1ad282a08c92f6f591e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100133-islamic-matriarch-4cee@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
4f5a100f87f3 ("f2fs: Require FMODE_WRITE for atomic write ioctls")
01beba7957a2 ("fs: port inode_owner_or_capable() to mnt_idmap")
f2d40141d5d9 ("fs: port inode_init_owner() to mnt_idmap")
4609e1f18e19 ("fs: port ->permission() to pass mnt_idmap")
8782a9aea3ab ("fs: port ->fileattr_set() to pass mnt_idmap")
13e83a4923be ("fs: port ->set_acl() to pass mnt_idmap")
77435322777d ("fs: port ->get_acl() to pass mnt_idmap")
011e2b717b1b ("fs: port ->tmpfile() to pass mnt_idmap")
5ebb29bee8d5 ("fs: port ->mknod() to pass mnt_idmap")
c54bd91e9eab ("fs: port ->mkdir() to pass mnt_idmap")
7a77db95511c ("fs: port ->symlink() to pass mnt_idmap")
6c960e68aaed ("fs: port ->create() to pass mnt_idmap")
b74d24f7a74f ("fs: port ->getattr() to pass mnt_idmap")
c1632a0f1120 ("fs: port ->setattr() to pass mnt_idmap")
abf08576afe3 ("fs: port vfs_*() helpers to struct mnt_idmap")
6022ec6ee2c3 ("Merge tag 'ntfs3_for_6.2' of https://github.com/Paragon-Software-Group/linux-ntfs3")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4f5a100f87f32cb65d4bb1ad282a08c92f6f591e Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Tue, 6 Aug 2024 16:07:16 +0200
Subject: [PATCH] f2fs: Require FMODE_WRITE for atomic write ioctls
The F2FS ioctls for starting and committing atomic writes check for
inode_owner_or_capable(), but this does not give LSMs like SELinux or
Landlock an opportunity to deny the write access - if the caller's FSUID
matches the inode's UID, inode_owner_or_capable() immediately returns true.
There are scenarios where LSMs want to deny a process the ability to write
particular files, even files that the FSUID of the process owns; but this
can currently partially be bypassed using atomic write ioctls in two ways:
- F2FS_IOC_START_ATOMIC_REPLACE + F2FS_IOC_COMMIT_ATOMIC_WRITE can
truncate an inode to size 0
- F2FS_IOC_START_ATOMIC_WRITE + F2FS_IOC_ABORT_ATOMIC_WRITE can revert
changes another process concurrently made to a file
Fix it by requiring FMODE_WRITE for these operations, just like for
F2FS_IOC_MOVE_RANGE. Since any legitimate caller should only be using these
ioctls when intending to write into the file, that seems unlikely to break
anything.
Fixes: 88b88a667971 ("f2fs: support atomic writes")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jann Horn <jannh(a)google.com>
Reviewed-by: Chao Yu <chao(a)kernel.org>
Reviewed-by: Eric Biggers <ebiggers(a)google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk(a)kernel.org>
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index bddcb2cd945a..2c591fbc75a9 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2131,6 +2131,9 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
loff_t isize;
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(idmap, inode))
return -EACCES;
@@ -2239,6 +2242,9 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
struct mnt_idmap *idmap = file_mnt_idmap(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(idmap, inode))
return -EACCES;
@@ -2271,6 +2277,9 @@ static int f2fs_ioc_abort_atomic_write(struct file *filp)
struct mnt_idmap *idmap = file_mnt_idmap(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(idmap, inode))
return -EACCES;
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 4f5a100f87f32cb65d4bb1ad282a08c92f6f591e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100132-panorama-legacy-223d@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
4f5a100f87f3 ("f2fs: Require FMODE_WRITE for atomic write ioctls")
01beba7957a2 ("fs: port inode_owner_or_capable() to mnt_idmap")
f2d40141d5d9 ("fs: port inode_init_owner() to mnt_idmap")
4609e1f18e19 ("fs: port ->permission() to pass mnt_idmap")
8782a9aea3ab ("fs: port ->fileattr_set() to pass mnt_idmap")
13e83a4923be ("fs: port ->set_acl() to pass mnt_idmap")
77435322777d ("fs: port ->get_acl() to pass mnt_idmap")
011e2b717b1b ("fs: port ->tmpfile() to pass mnt_idmap")
5ebb29bee8d5 ("fs: port ->mknod() to pass mnt_idmap")
c54bd91e9eab ("fs: port ->mkdir() to pass mnt_idmap")
7a77db95511c ("fs: port ->symlink() to pass mnt_idmap")
6c960e68aaed ("fs: port ->create() to pass mnt_idmap")
b74d24f7a74f ("fs: port ->getattr() to pass mnt_idmap")
c1632a0f1120 ("fs: port ->setattr() to pass mnt_idmap")
abf08576afe3 ("fs: port vfs_*() helpers to struct mnt_idmap")
6022ec6ee2c3 ("Merge tag 'ntfs3_for_6.2' of https://github.com/Paragon-Software-Group/linux-ntfs3")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4f5a100f87f32cb65d4bb1ad282a08c92f6f591e Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Tue, 6 Aug 2024 16:07:16 +0200
Subject: [PATCH] f2fs: Require FMODE_WRITE for atomic write ioctls
The F2FS ioctls for starting and committing atomic writes check for
inode_owner_or_capable(), but this does not give LSMs like SELinux or
Landlock an opportunity to deny the write access - if the caller's FSUID
matches the inode's UID, inode_owner_or_capable() immediately returns true.
There are scenarios where LSMs want to deny a process the ability to write
particular files, even files that the FSUID of the process owns; but this
can currently partially be bypassed using atomic write ioctls in two ways:
- F2FS_IOC_START_ATOMIC_REPLACE + F2FS_IOC_COMMIT_ATOMIC_WRITE can
truncate an inode to size 0
- F2FS_IOC_START_ATOMIC_WRITE + F2FS_IOC_ABORT_ATOMIC_WRITE can revert
changes another process concurrently made to a file
Fix it by requiring FMODE_WRITE for these operations, just like for
F2FS_IOC_MOVE_RANGE. Since any legitimate caller should only be using these
ioctls when intending to write into the file, that seems unlikely to break
anything.
Fixes: 88b88a667971 ("f2fs: support atomic writes")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jann Horn <jannh(a)google.com>
Reviewed-by: Chao Yu <chao(a)kernel.org>
Reviewed-by: Eric Biggers <ebiggers(a)google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk(a)kernel.org>
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index bddcb2cd945a..2c591fbc75a9 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2131,6 +2131,9 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
loff_t isize;
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(idmap, inode))
return -EACCES;
@@ -2239,6 +2242,9 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
struct mnt_idmap *idmap = file_mnt_idmap(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(idmap, inode))
return -EACCES;
@@ -2271,6 +2277,9 @@ static int f2fs_ioc_abort_atomic_write(struct file *filp)
struct mnt_idmap *idmap = file_mnt_idmap(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(idmap, inode))
return -EACCES;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 1cade98cf6415897bf9342ee451cc5b40b58c638
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100148-utensil-cornbread-3a99@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
1cade98cf641 ("f2fs: fix several potential integer overflows in file offsets")
e7547daccd6a ("f2fs: refactor extent_cache to support for read and more")
749d543c0d45 ("f2fs: remove unnecessary __init_extent_tree")
3bac20a8f011 ("f2fs: move internal functions into extent_cache.c")
12607c1ba763 ("f2fs: specify extent cache for read explicitly")
544b53dadc20 ("f2fs: code clean and fix a type error")
a834aa3ec95b ("f2fs: add "c_len" into trace_f2fs_update_extent_tree_range for compressed file")
07725adc55c0 ("f2fs: fix race condition on setting FI_NO_EXTENT flag")
01fc4b9a6ed8 ("f2fs: use onstack pages instead of pvec")
4f8219f8aa17 ("f2fs: intorduce f2fs_all_cluster_page_ready")
054cb2891b9c ("f2fs: replace F2FS_I(inode) and sbi by the local variable")
3db1de0e582c ("f2fs: change the current atomic write way")
71419129625a ("f2fs: give priority to select unpinned section for foreground GC")
a9163b947ae8 ("f2fs: write checkpoint during FG_GC")
2aaf51dd39af ("f2fs: fix dereference of stale list iterator after loop body")
642c0969916e ("f2fs: don't set GC_FAILURE_PIN for background GC")
a22bb5526d7d ("f2fs: check pinfile in gc_data_segment() in advance")
6b1f86f8e9c7 ("Merge tag 'folio-5.18b' of git://git.infradead.org/users/willy/pagecache")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1cade98cf6415897bf9342ee451cc5b40b58c638 Mon Sep 17 00:00:00 2001
From: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
Date: Wed, 24 Jul 2024 10:28:38 -0700
Subject: [PATCH] f2fs: fix several potential integer overflows in file offsets
When dealing with large extents and calculating file offsets by
summing up according extent offsets and lengths of unsigned int type,
one may encounter possible integer overflow if the values are
big enough.
Prevent this from happening by expanding one of the addends to
(pgoff_t) type.
Found by Linux Verification Center (linuxtesting.org) with static
analysis tool SVACE.
Fixes: d323d005ac4a ("f2fs: support file defragment")
Cc: stable(a)vger.kernel.org
Signed-off-by: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
Reviewed-by: Chao Yu <chao(a)kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk(a)kernel.org>
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index fd1fc06359ee..62ac440d9416 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -366,7 +366,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
static void __drop_largest_extent(struct extent_tree *et,
pgoff_t fofs, unsigned int len)
{
- if (fofs < et->largest.fofs + et->largest.len &&
+ if (fofs < (pgoff_t)et->largest.fofs + et->largest.len &&
fofs + len > et->largest.fofs) {
et->largest.len = 0;
et->largest_updated = true;
@@ -456,7 +456,7 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
if (type == EX_READ &&
et->largest.fofs <= pgofs &&
- et->largest.fofs + et->largest.len > pgofs) {
+ (pgoff_t)et->largest.fofs + et->largest.len > pgofs) {
*ei = et->largest;
ret = true;
stat_inc_largest_node_hit(sbi);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 168f08507004..c598cfe5e0ed 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2710,7 +2710,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
* block addresses are continuous.
*/
if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) {
- if (ei.fofs + ei.len >= pg_end)
+ if ((pgoff_t)ei.fofs + ei.len >= pg_end)
goto out;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 1cade98cf6415897bf9342ee451cc5b40b58c638
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100147-unsolved-revision-357a@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
1cade98cf641 ("f2fs: fix several potential integer overflows in file offsets")
e7547daccd6a ("f2fs: refactor extent_cache to support for read and more")
749d543c0d45 ("f2fs: remove unnecessary __init_extent_tree")
3bac20a8f011 ("f2fs: move internal functions into extent_cache.c")
12607c1ba763 ("f2fs: specify extent cache for read explicitly")
544b53dadc20 ("f2fs: code clean and fix a type error")
a834aa3ec95b ("f2fs: add "c_len" into trace_f2fs_update_extent_tree_range for compressed file")
07725adc55c0 ("f2fs: fix race condition on setting FI_NO_EXTENT flag")
01fc4b9a6ed8 ("f2fs: use onstack pages instead of pvec")
4f8219f8aa17 ("f2fs: intorduce f2fs_all_cluster_page_ready")
054cb2891b9c ("f2fs: replace F2FS_I(inode) and sbi by the local variable")
3db1de0e582c ("f2fs: change the current atomic write way")
71419129625a ("f2fs: give priority to select unpinned section for foreground GC")
a9163b947ae8 ("f2fs: write checkpoint during FG_GC")
2aaf51dd39af ("f2fs: fix dereference of stale list iterator after loop body")
642c0969916e ("f2fs: don't set GC_FAILURE_PIN for background GC")
a22bb5526d7d ("f2fs: check pinfile in gc_data_segment() in advance")
6b1f86f8e9c7 ("Merge tag 'folio-5.18b' of git://git.infradead.org/users/willy/pagecache")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1cade98cf6415897bf9342ee451cc5b40b58c638 Mon Sep 17 00:00:00 2001
From: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
Date: Wed, 24 Jul 2024 10:28:38 -0700
Subject: [PATCH] f2fs: fix several potential integer overflows in file offsets
When dealing with large extents and calculating file offsets by
summing up according extent offsets and lengths of unsigned int type,
one may encounter possible integer overflow if the values are
big enough.
Prevent this from happening by expanding one of the addends to
(pgoff_t) type.
Found by Linux Verification Center (linuxtesting.org) with static
analysis tool SVACE.
Fixes: d323d005ac4a ("f2fs: support file defragment")
Cc: stable(a)vger.kernel.org
Signed-off-by: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
Reviewed-by: Chao Yu <chao(a)kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk(a)kernel.org>
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index fd1fc06359ee..62ac440d9416 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -366,7 +366,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
static void __drop_largest_extent(struct extent_tree *et,
pgoff_t fofs, unsigned int len)
{
- if (fofs < et->largest.fofs + et->largest.len &&
+ if (fofs < (pgoff_t)et->largest.fofs + et->largest.len &&
fofs + len > et->largest.fofs) {
et->largest.len = 0;
et->largest_updated = true;
@@ -456,7 +456,7 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
if (type == EX_READ &&
et->largest.fofs <= pgofs &&
- et->largest.fofs + et->largest.len > pgofs) {
+ (pgoff_t)et->largest.fofs + et->largest.len > pgofs) {
*ei = et->largest;
ret = true;
stat_inc_largest_node_hit(sbi);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 168f08507004..c598cfe5e0ed 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2710,7 +2710,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
* block addresses are continuous.
*/
if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) {
- if (ei.fofs + ei.len >= pg_end)
+ if ((pgoff_t)ei.fofs + ei.len >= pg_end)
goto out;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 1cade98cf6415897bf9342ee451cc5b40b58c638
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100147-untaxed-viscosity-628e@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
1cade98cf641 ("f2fs: fix several potential integer overflows in file offsets")
e7547daccd6a ("f2fs: refactor extent_cache to support for read and more")
749d543c0d45 ("f2fs: remove unnecessary __init_extent_tree")
3bac20a8f011 ("f2fs: move internal functions into extent_cache.c")
12607c1ba763 ("f2fs: specify extent cache for read explicitly")
544b53dadc20 ("f2fs: code clean and fix a type error")
a834aa3ec95b ("f2fs: add "c_len" into trace_f2fs_update_extent_tree_range for compressed file")
07725adc55c0 ("f2fs: fix race condition on setting FI_NO_EXTENT flag")
01fc4b9a6ed8 ("f2fs: use onstack pages instead of pvec")
4f8219f8aa17 ("f2fs: intorduce f2fs_all_cluster_page_ready")
054cb2891b9c ("f2fs: replace F2FS_I(inode) and sbi by the local variable")
3db1de0e582c ("f2fs: change the current atomic write way")
71419129625a ("f2fs: give priority to select unpinned section for foreground GC")
a9163b947ae8 ("f2fs: write checkpoint during FG_GC")
2aaf51dd39af ("f2fs: fix dereference of stale list iterator after loop body")
642c0969916e ("f2fs: don't set GC_FAILURE_PIN for background GC")
a22bb5526d7d ("f2fs: check pinfile in gc_data_segment() in advance")
6b1f86f8e9c7 ("Merge tag 'folio-5.18b' of git://git.infradead.org/users/willy/pagecache")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1cade98cf6415897bf9342ee451cc5b40b58c638 Mon Sep 17 00:00:00 2001
From: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
Date: Wed, 24 Jul 2024 10:28:38 -0700
Subject: [PATCH] f2fs: fix several potential integer overflows in file offsets
When dealing with large extents and calculating file offsets by
summing up according extent offsets and lengths of unsigned int type,
one may encounter possible integer overflow if the values are
big enough.
Prevent this from happening by expanding one of the addends to
(pgoff_t) type.
Found by Linux Verification Center (linuxtesting.org) with static
analysis tool SVACE.
Fixes: d323d005ac4a ("f2fs: support file defragment")
Cc: stable(a)vger.kernel.org
Signed-off-by: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
Reviewed-by: Chao Yu <chao(a)kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk(a)kernel.org>
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index fd1fc06359ee..62ac440d9416 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -366,7 +366,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
static void __drop_largest_extent(struct extent_tree *et,
pgoff_t fofs, unsigned int len)
{
- if (fofs < et->largest.fofs + et->largest.len &&
+ if (fofs < (pgoff_t)et->largest.fofs + et->largest.len &&
fofs + len > et->largest.fofs) {
et->largest.len = 0;
et->largest_updated = true;
@@ -456,7 +456,7 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
if (type == EX_READ &&
et->largest.fofs <= pgofs &&
- et->largest.fofs + et->largest.len > pgofs) {
+ (pgoff_t)et->largest.fofs + et->largest.len > pgofs) {
*ei = et->largest;
ret = true;
stat_inc_largest_node_hit(sbi);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 168f08507004..c598cfe5e0ed 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2710,7 +2710,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
* block addresses are continuous.
*/
if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) {
- if (ei.fofs + ei.len >= pg_end)
+ if ((pgoff_t)ei.fofs + ei.len >= pg_end)
goto out;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 1cade98cf6415897bf9342ee451cc5b40b58c638
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100146-petite-uncoated-28c9@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
1cade98cf641 ("f2fs: fix several potential integer overflows in file offsets")
e7547daccd6a ("f2fs: refactor extent_cache to support for read and more")
749d543c0d45 ("f2fs: remove unnecessary __init_extent_tree")
3bac20a8f011 ("f2fs: move internal functions into extent_cache.c")
12607c1ba763 ("f2fs: specify extent cache for read explicitly")
544b53dadc20 ("f2fs: code clean and fix a type error")
a834aa3ec95b ("f2fs: add "c_len" into trace_f2fs_update_extent_tree_range for compressed file")
07725adc55c0 ("f2fs: fix race condition on setting FI_NO_EXTENT flag")
01fc4b9a6ed8 ("f2fs: use onstack pages instead of pvec")
4f8219f8aa17 ("f2fs: intorduce f2fs_all_cluster_page_ready")
054cb2891b9c ("f2fs: replace F2FS_I(inode) and sbi by the local variable")
3db1de0e582c ("f2fs: change the current atomic write way")
71419129625a ("f2fs: give priority to select unpinned section for foreground GC")
a9163b947ae8 ("f2fs: write checkpoint during FG_GC")
2aaf51dd39af ("f2fs: fix dereference of stale list iterator after loop body")
642c0969916e ("f2fs: don't set GC_FAILURE_PIN for background GC")
a22bb5526d7d ("f2fs: check pinfile in gc_data_segment() in advance")
6b1f86f8e9c7 ("Merge tag 'folio-5.18b' of git://git.infradead.org/users/willy/pagecache")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1cade98cf6415897bf9342ee451cc5b40b58c638 Mon Sep 17 00:00:00 2001
From: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
Date: Wed, 24 Jul 2024 10:28:38 -0700
Subject: [PATCH] f2fs: fix several potential integer overflows in file offsets
When dealing with large extents and calculating file offsets by
summing up according extent offsets and lengths of unsigned int type,
one may encounter possible integer overflow if the values are
big enough.
Prevent this from happening by expanding one of the addends to
(pgoff_t) type.
Found by Linux Verification Center (linuxtesting.org) with static
analysis tool SVACE.
Fixes: d323d005ac4a ("f2fs: support file defragment")
Cc: stable(a)vger.kernel.org
Signed-off-by: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
Reviewed-by: Chao Yu <chao(a)kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk(a)kernel.org>
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index fd1fc06359ee..62ac440d9416 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -366,7 +366,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
static void __drop_largest_extent(struct extent_tree *et,
pgoff_t fofs, unsigned int len)
{
- if (fofs < et->largest.fofs + et->largest.len &&
+ if (fofs < (pgoff_t)et->largest.fofs + et->largest.len &&
fofs + len > et->largest.fofs) {
et->largest.len = 0;
et->largest_updated = true;
@@ -456,7 +456,7 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
if (type == EX_READ &&
et->largest.fofs <= pgofs &&
- et->largest.fofs + et->largest.len > pgofs) {
+ (pgoff_t)et->largest.fofs + et->largest.len > pgofs) {
*ei = et->largest;
ret = true;
stat_inc_largest_node_hit(sbi);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 168f08507004..c598cfe5e0ed 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2710,7 +2710,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
* block addresses are continuous.
*/
if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) {
- if (ei.fofs + ei.len >= pg_end)
+ if ((pgoff_t)ei.fofs + ei.len >= pg_end)
goto out;
}
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 7ee85f5515e86a4e2a2f51969795920733912bad
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100131-uncharted-flyaway-8acd@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
7ee85f5515e8 ("btrfs: fix race setting file private on concurrent lseek using same fd")
68539bd0e73b ("btrfs: update comment for struct btrfs_inode::lock")
398fb9131f31 ("btrfs: reorder btrfs_inode to fill gaps")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7ee85f5515e86a4e2a2f51969795920733912bad Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Tue, 3 Sep 2024 10:55:36 +0100
Subject: [PATCH] btrfs: fix race setting file private on concurrent lseek
using same fd
When doing concurrent lseek(2) system calls against the same file
descriptor, using multiple threads belonging to the same process, we have
a short time window where a race happens and can result in a memory leak.
The race happens like this:
1) A program opens a file descriptor for a file and then spawns two
threads (with the pthreads library for example), lets call them
task A and task B;
2) Task A calls lseek with SEEK_DATA or SEEK_HOLE and ends up at
file.c:find_desired_extent() while holding a read lock on the inode;
3) At the start of find_desired_extent(), it extracts the file's
private_data pointer into a local variable named 'private', which has
a value of NULL;
4) Task B also calls lseek with SEEK_DATA or SEEK_HOLE, locks the inode
in shared mode and enters file.c:find_desired_extent(), where it also
extracts file->private_data into its local variable 'private', which
has a NULL value;
5) Because it saw a NULL file private, task A allocates a private
structure and assigns to the file structure;
6) Task B also saw a NULL file private so it also allocates its own file
private and then assigns it to the same file structure, since both
tasks are using the same file descriptor.
At this point we leak the private structure allocated by task A.
Besides the memory leak, there's also the detail that both tasks end up
using the same cached state record in the private structure (struct
btrfs_file_private::llseek_cached_state), which can result in a
use-after-free problem since one task can free it while the other is
still using it (only one task took a reference count on it). Also, sharing
the cached state is not a good idea since it could result in incorrect
results in the future - right now it should not be a problem because it
end ups being used only in extent-io-tree.c:count_range_bits() where we do
range validation before using the cached state.
Fix this by protecting the private assignment and check of a file while
holding the inode's spinlock and keep track of the task that allocated
the private, so that it's used only by that task in order to prevent
user-after-free issues with the cached state record as well as potentially
using it incorrectly in the future.
Fixes: 3c32c7212f16 ("btrfs: use cached state when looking for delalloc ranges with lseek")
CC: stable(a)vger.kernel.org # 6.6+
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 9a4b7c119318..e152fde888fc 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -152,6 +152,7 @@ struct btrfs_inode {
* logged_trans), to access/update delalloc_bytes, new_delalloc_bytes,
* defrag_bytes, disk_i_size, outstanding_extents, csum_bytes and to
* update the VFS' inode number of bytes used.
+ * Also protects setting struct file::private_data.
*/
spinlock_t lock;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 1a44fb9845e3..317a3712270f 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -463,6 +463,8 @@ struct btrfs_file_private {
void *filldir_buf;
u64 last_index;
struct extent_state *llseek_cached_state;
+ /* Task that allocated this structure. */
+ struct task_struct *owner_task;
};
static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c5e36f58eb07..4fb521d91b06 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -3485,7 +3485,7 @@ static bool find_desired_extent_in_hole(struct btrfs_inode *inode, int whence,
static loff_t find_desired_extent(struct file *file, loff_t offset, int whence)
{
struct btrfs_inode *inode = BTRFS_I(file->f_mapping->host);
- struct btrfs_file_private *private = file->private_data;
+ struct btrfs_file_private *private;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct extent_state *cached_state = NULL;
struct extent_state **delalloc_cached_state;
@@ -3513,7 +3513,19 @@ static loff_t find_desired_extent(struct file *file, loff_t offset, int whence)
inode_get_bytes(&inode->vfs_inode) == i_size)
return i_size;
- if (!private) {
+ spin_lock(&inode->lock);
+ private = file->private_data;
+ spin_unlock(&inode->lock);
+
+ if (private && private->owner_task != current) {
+ /*
+ * Not allocated by us, don't use it as its cached state is used
+ * by the task that allocated it and we don't want neither to
+ * mess with it nor get incorrect results because it reflects an
+ * invalid state for the current task.
+ */
+ private = NULL;
+ } else if (!private) {
private = kzalloc(sizeof(*private), GFP_KERNEL);
/*
* No worries if memory allocation failed.
@@ -3521,7 +3533,23 @@ static loff_t find_desired_extent(struct file *file, loff_t offset, int whence)
* lseek SEEK_HOLE/DATA calls to a file when there's delalloc,
* so everything will still be correct.
*/
- file->private_data = private;
+ if (private) {
+ bool free = false;
+
+ private->owner_task = current;
+
+ spin_lock(&inode->lock);
+ if (file->private_data)
+ free = true;
+ else
+ file->private_data = private;
+ spin_unlock(&inode->lock);
+
+ if (free) {
+ kfree(private);
+ private = NULL;
+ }
+ }
}
if (private)
Ensure we serialize with completion side to prevent UAF with fence going
out of scope on the stack, since we have no clue if it will fire after
the timeout before we can erase from the xa. Also we have some dependent
loads and stores for which we need the correct ordering, and we lack the
needed barriers. Fix this by grabbing the ct->lock after the wait, which
is also held by the completion side.
v2 (Badal):
- Also print done after acquiring the lock and seeing timeout.
Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Signed-off-by: Matthew Auld <matthew.auld(a)intel.com>
Cc: Matthew Brost <matthew.brost(a)intel.com>
Cc: Badal Nilawar <badal.nilawar(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v6.8+
---
drivers/gpu/drm/xe/xe_guc_ct.c | 21 ++++++++++++++++++---
1 file changed, 18 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 4b95f75b1546..44263b3cd8c7 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -903,16 +903,26 @@ static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
}
ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ);
+
+ /*
+ * Ensure we serialize with completion side to prevent UAF with fence going out of scope on
+ * the stack, since we have no clue if it will fire after the timeout before we can erase
+ * from the xa. Also we have some dependent loads and stores below for which we need the
+ * correct ordering, and we lack the needed barriers.
+ */
+ mutex_lock(&ct->lock);
if (!ret) {
- xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x",
- g2h_fence.seqno, action[0]);
+ xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x, done %s",
+ g2h_fence.seqno, action[0], str_yes_no(g2h_fence.done));
xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
+ mutex_unlock(&ct->lock);
return -ETIME;
}
if (g2h_fence.retry) {
xe_gt_dbg(gt, "H2G action %#x retrying: reason %#x\n",
action[0], g2h_fence.reason);
+ mutex_unlock(&ct->lock);
goto retry;
}
if (g2h_fence.fail) {
@@ -921,7 +931,12 @@ static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
ret = -EIO;
}
- return ret > 0 ? response_buffer ? g2h_fence.response_len : g2h_fence.response_data : ret;
+ if (ret > 0)
+ ret = response_buffer ? g2h_fence.response_len : g2h_fence.response_data;
+
+ mutex_unlock(&ct->lock);
+
+ return ret;
}
/**
--
2.46.2
Hi,
as discussed in [1], this is a manual backport of the remaining two
patches to let the io worker threads respect the affinites defined by
the cgroup of the process.
In 6.1 one worker is created per NUMA node, while in da64d6db3bd3
("io_uring: One wqe per wq") this is changed to only have a single worker.
As this patch is pretty invasive, Jens and me agreed to not backport it.
Instead we now limit the workers cpuset to the cpus that are in the
intersection between what the cgroup allows and what the NUMA node has.
This leaves the question what to do in case the intersection is empty:
To be backwarts compatible, we allow this case, but restrict the cpumask
of the poller to the cpuset defined by the cgroup. We further believe
this is a reasonable decision, as da64d6db3bd3 drops the NUMA awareness
anyways.
[1] https://lore.kernel.org/lkml/ec01745a-b102-4f6e-abc9-abd636d36319@kernel.dk
Best regards,
Felix Moessbauer
Siemens AG
Felix Moessbauer (2):
io_uring/io-wq: do not allow pinning outside of cpuset
io_uring/io-wq: inherit cpuset of cgroup in io worker
io_uring/io-wq.c | 33 ++++++++++++++++++++++++++-------
1 file changed, 26 insertions(+), 7 deletions(-)
--
2.39.2
From: Sasha Levin <sashal(a)kernel.org>
Date: Mon, 30 Sep 2024 19:08:34 -0400
> This is a note to let you know that I've just added the patch titled
>
> idpf: enable WB_ON_ITR
>
> to the 6.11-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> idpf-enable-wb_on_itr.patch
> and it can be found in the queue-6.11 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
Hi,
This commit should not be applied standalone and requires relatively big
prerequisites from the series it was sent in, so I'd suggest just
dropping it from stable.
>
>
>
> commit 31ffcb4f7329c601cd6e065d80e4485a468e9980
> Author: Joshua Hay <joshua.a.hay(a)intel.com>
> Date: Wed Sep 4 17:47:48 2024 +0200
>
> idpf: enable WB_ON_ITR
>
> [ Upstream commit 9c4a27da0ecc4080dfcd63903dd94f01ba1399dd ]
>
> Tell hardware to write back completed descriptors even when interrupts
> are disabled. Otherwise, descriptors might not be written back until
> the hardware can flush a full cacheline of descriptors. This can cause
> unnecessary delays when traffic is light (or even trigger Tx queue
> timeout).
Thanks,
Olek
Hi all,
As some of you have noticed, there's a TON of failure messages being
sent out for AMD gpu driver commits that are tagged for stable
backports. In short, you all are doing something really wrong with how
you are tagging these.
Please fix it up to NOT have duplicates in multiple branches that end up
in Linus's tree at different times. Or if you MUST do that, then give
us a chance to figure out that it IS a duplicate. As-is, it's not
working at all, and I think I need to just drop all patches for this
driver that are tagged for stable going forward and rely on you all to
provide a proper set of backported fixes when you say they are needed.
Again, what you are doing today is NOT ok and is broken. Please fix.
greg k-h
V2:
Changes in v2:
- Drops "-" in ovti,ov08x40.yaml after description: - Rob
- Adds ":" after first line of description text - Rob
- dts -> DT in commit log - Rob
- Removes dependency on 'xvclk' as a name in yaml
and driver - Sakari
- Uses assigned-clock, assigned-clock-parents and assigned-clock-rates -
Sakari
- Drops clock-frequency - Sakarai, Krzysztof
- Drops dovdd-supply, avdd-supply, dvdd-supply and reset-gpios
as required, its perfectly possible not to have the reset GPIO or the
power rails under control of the SoC. - bod
- Link to v1: https://lore.kernel.org/r/20240926-b4-master-24-11-25-ov08x40-v1-0-e4d5fbd3…
V1:
This series brings fixes and updates to ov08x40 which allows for use of
this sensor on the Qualcomm x1e80100 CRD but also on any other dts based
system.
Firstly there's a fix for the pseudo burst mode code that was added in
8f667d202384 ("media: ov08x40: Reduce start streaming time"). Not every I2C
controller can handle an arbitrary sized write, this is the case on
Qualcomm CAMSS/CCI I2C sensor interfaces which limit the transaction size
and communicate this limit via I2C quirks. A simple fix to optionally break
up the large submitted burst into chunks not exceeding adapter->quirk size
fixes.
Secondly then is addition of a yaml description for the ov08x40 and
extension of the driver to support OF probe and powering on of the power
rails from the driver instead of from ACPI.
Once done the sensor works without further modification on the Qualcomm
x1e80100 CRD.
Signed-off-by: Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
---
Bryan O'Donoghue (4):
media: ov08x40: Fix burst write sequence
media: dt-bindings: Add OmniVision OV08X40
media: ov08x40: Rename ext_clk to xvclk
media: ov08x40: Add OF probe support
.../bindings/media/i2c/ovti,ov08x40.yaml | 120 ++++++++++++++
drivers/media/i2c/ov08x40.c | 179 ++++++++++++++++++---
2 files changed, 276 insertions(+), 23 deletions(-)
---
base-commit: 2b7275670032a98cba266bd1b8905f755b3e650f
change-id: 20240926-b4-master-24-11-25-ov08x40-c6f477aaa6a4
Best regards,
--
Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 77b0b98bb743f5d04d8f995ba1936e1143689d4a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100117-venture-unloving-3135@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
77b0b98bb743 ("btrfs: subpage: fix the bitmap dump which can cause bitmap corruption")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 77b0b98bb743f5d04d8f995ba1936e1143689d4a Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu(a)suse.com>
Date: Fri, 30 Aug 2024 16:35:48 +0930
Subject: [PATCH] btrfs: subpage: fix the bitmap dump which can cause bitmap
corruption
In commit 75258f20fb70 ("btrfs: subpage: dump extra subpage bitmaps for
debug") an internal macro GET_SUBPAGE_BITMAP() is introduced to grab the
bitmap of each attribute.
But that commit is using bitmap_cut() which will do the left shift of
the larger bitmap, causing incorrect values.
Thankfully this bitmap_cut() is only called for debug usage, and so far
it's not yet causing problem.
Fix it to use bitmap_read() to only grab the desired sub-bitmap.
Fixes: 75258f20fb70 ("btrfs: subpage: dump extra subpage bitmaps for debug")
CC: stable(a)vger.kernel.org # 6.6+
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index 631d96f1e905..f8795c3d2270 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -902,8 +902,14 @@ void btrfs_folio_end_all_writers(const struct btrfs_fs_info *fs_info, struct fol
}
#define GET_SUBPAGE_BITMAP(subpage, subpage_info, name, dst) \
- bitmap_cut(dst, subpage->bitmaps, 0, \
- subpage_info->name##_offset, subpage_info->bitmap_nr_bits)
+{ \
+ const int bitmap_nr_bits = subpage_info->bitmap_nr_bits; \
+ \
+ ASSERT(bitmap_nr_bits < BITS_PER_LONG); \
+ *dst = bitmap_read(subpage->bitmaps, \
+ subpage_info->name##_offset, \
+ bitmap_nr_bits); \
+}
void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len)
From: Gui-Dong Han <hanguidong02(a)outlook.com>
This patch addresses a reference count handling issue in the
ice_dpll_init_rclk_pins() function. The function calls ice_dpll_get_pins(),
which increments the reference count of the relevant resources. However,
if the condition WARN_ON((!vsi || !vsi->netdev)) is met, the function
currently returns an error without properly releasing the resources
acquired by ice_dpll_get_pins(), leading to a reference count leak.
To resolve this, the check has been moved to the top of the function. This
ensures that the function verifies the state before any resources are
acquired, avoiding the need for additional resource management in the
error path.
This bug was identified by an experimental static analysis tool developed
by our team. The tool specializes in analyzing reference count operations
and detecting potential issues where resources are not properly managed.
In this case, the tool flagged the missing release operation as a
potential problem, which led to the development of this patch.
Fixes: d7999f5ea64b ("ice: implement dpll interface to control cgu")
Cc: stable(a)vger.kernel.org
Signed-off-by: Gui-Dong Han <hanguidong02(a)outlook.com>
Reviewed-by: Simon Horman <horms(a)kernel.org>
Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha(a)intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen(a)intel.com>
---
drivers/net/ethernet/intel/ice/ice_dpll.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c
index cd95705d1e7f..8b6dc4d54fdc 100644
--- a/drivers/net/ethernet/intel/ice/ice_dpll.c
+++ b/drivers/net/ethernet/intel/ice/ice_dpll.c
@@ -1843,6 +1843,8 @@ ice_dpll_init_rclk_pins(struct ice_pf *pf, struct ice_dpll_pin *pin,
struct dpll_pin *parent;
int ret, i;
+ if (WARN_ON((!vsi || !vsi->netdev)))
+ return -EINVAL;
ret = ice_dpll_get_pins(pf, pin, start_idx, ICE_DPLL_RCLK_NUM_PER_PF,
pf->dplls.clock_id);
if (ret)
@@ -1858,8 +1860,6 @@ ice_dpll_init_rclk_pins(struct ice_pf *pf, struct ice_dpll_pin *pin,
if (ret)
goto unregister_pins;
}
- if (WARN_ON((!vsi || !vsi->netdev)))
- return -EINVAL;
dpll_netdev_pin_set(vsi->netdev, pf->dplls.rclk.pin);
return 0;
--
2.42.0
The patch below does not apply to the 6.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.10.y
git checkout FETCH_HEAD
git cherry-pick -x 874b6476fa888e79e41daf7653384c8d70927b90
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100115-support-data-8f0e@gregkh' --subject-prefix 'PATCH 6.10.y' HEAD^..
Possible dependencies:
874b6476fa88 ("thermal: sysfs: Add sanity checks for trip temperature and hysteresis")
107280e1371f ("thermal: sysfs: Refine the handling of trip hysteresis changes")
afd84fb10ced ("thermal: sysfs: Get to trips via attribute pointers")
0728c810873e ("thermal: trip: Pass trip pointer to .set_trip_temp() thermal zone callback")
a52641bc6293 ("thermal: trip: Use READ_ONCE() for lockless access to trip properties")
f41f23b0cae1 ("thermal: trip: Use common set of trip type names")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 874b6476fa888e79e41daf7653384c8d70927b90 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki(a)intel.com>
Date: Mon, 26 Aug 2024 18:21:59 +0200
Subject: [PATCH] thermal: sysfs: Add sanity checks for trip temperature and
hysteresis
Add sanity checks for new trip temperature and hysteresis values to
trip_point_temp_store() and trip_point_hyst_store() to prevent trip
point threshold from falling below THERMAL_TEMP_INVALID.
However, still allow user space to pass THERMAL_TEMP_INVALID as the
new trip temperature value to invalidate the trip if necessary.
Also allow the hysteresis to be updated when the temperature is invalid
to allow user space to avoid having to adjust hysteresis after a valid
temperature has been set, but in that case just change the value and do
nothing else.
Fixes: be0a3600aa1e ("thermal: sysfs: Rework the handling of trip point updates")
Cc: 6.8+ <stable(a)vger.kernel.org> # 6.8+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Reviewed-by: Lukasz Luba <lukasz.luba(a)arm.com>
Link: https://patch.msgid.link/12528772.O9o76ZdvQC@rjwysocki.net
diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
index b740b60032ee..197ae1262466 100644
--- a/drivers/thermal/thermal_sysfs.c
+++ b/drivers/thermal/thermal_sysfs.c
@@ -111,18 +111,26 @@ trip_point_temp_store(struct device *dev, struct device_attribute *attr,
mutex_lock(&tz->lock);
- if (temp != trip->temperature) {
- if (tz->ops.set_trip_temp) {
- ret = tz->ops.set_trip_temp(tz, trip, temp);
- if (ret)
- goto unlock;
- }
+ if (temp == trip->temperature)
+ goto unlock;
- thermal_zone_set_trip_temp(tz, trip, temp);
-
- __thermal_zone_device_update(tz, THERMAL_TRIP_CHANGED);
+ /* Arrange the condition to avoid integer overflows. */
+ if (temp != THERMAL_TEMP_INVALID &&
+ temp <= trip->hysteresis + THERMAL_TEMP_INVALID) {
+ ret = -EINVAL;
+ goto unlock;
}
+ if (tz->ops.set_trip_temp) {
+ ret = tz->ops.set_trip_temp(tz, trip, temp);
+ if (ret)
+ goto unlock;
+ }
+
+ thermal_zone_set_trip_temp(tz, trip, temp);
+
+ __thermal_zone_device_update(tz, THERMAL_TRIP_CHANGED);
+
unlock:
mutex_unlock(&tz->lock);
@@ -152,15 +160,33 @@ trip_point_hyst_store(struct device *dev, struct device_attribute *attr,
mutex_lock(&tz->lock);
- if (hyst != trip->hysteresis) {
- thermal_zone_set_trip_hyst(tz, trip, hyst);
+ if (hyst == trip->hysteresis)
+ goto unlock;
- __thermal_zone_device_update(tz, THERMAL_TRIP_CHANGED);
+ /*
+ * Allow the hysteresis to be updated when the temperature is invalid
+ * to allow user space to avoid having to adjust hysteresis after a
+ * valid temperature has been set, but in that case just change the
+ * value and do nothing else.
+ */
+ if (trip->temperature == THERMAL_TEMP_INVALID) {
+ WRITE_ONCE(trip->hysteresis, hyst);
+ goto unlock;
}
+ if (trip->temperature - hyst <= THERMAL_TEMP_INVALID) {
+ ret = -EINVAL;
+ goto unlock;
+ }
+
+ thermal_zone_set_trip_hyst(tz, trip, hyst);
+
+ __thermal_zone_device_update(tz, THERMAL_TRIP_CHANGED);
+
+unlock:
mutex_unlock(&tz->lock);
- return count;
+ return ret ? ret : count;
}
static ssize_t
The patch below does not apply to the 6.11-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.11.y
git checkout FETCH_HEAD
git cherry-pick -x 874b6476fa888e79e41daf7653384c8d70927b90
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100114-compare-delivery-d2c9@gregkh' --subject-prefix 'PATCH 6.11.y' HEAD^..
Possible dependencies:
874b6476fa88 ("thermal: sysfs: Add sanity checks for trip temperature and hysteresis")
107280e1371f ("thermal: sysfs: Refine the handling of trip hysteresis changes")
afd84fb10ced ("thermal: sysfs: Get to trips via attribute pointers")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 874b6476fa888e79e41daf7653384c8d70927b90 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki(a)intel.com>
Date: Mon, 26 Aug 2024 18:21:59 +0200
Subject: [PATCH] thermal: sysfs: Add sanity checks for trip temperature and
hysteresis
Add sanity checks for new trip temperature and hysteresis values to
trip_point_temp_store() and trip_point_hyst_store() to prevent trip
point threshold from falling below THERMAL_TEMP_INVALID.
However, still allow user space to pass THERMAL_TEMP_INVALID as the
new trip temperature value to invalidate the trip if necessary.
Also allow the hysteresis to be updated when the temperature is invalid
to allow user space to avoid having to adjust hysteresis after a valid
temperature has been set, but in that case just change the value and do
nothing else.
Fixes: be0a3600aa1e ("thermal: sysfs: Rework the handling of trip point updates")
Cc: 6.8+ <stable(a)vger.kernel.org> # 6.8+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Reviewed-by: Lukasz Luba <lukasz.luba(a)arm.com>
Link: https://patch.msgid.link/12528772.O9o76ZdvQC@rjwysocki.net
diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
index b740b60032ee..197ae1262466 100644
--- a/drivers/thermal/thermal_sysfs.c
+++ b/drivers/thermal/thermal_sysfs.c
@@ -111,18 +111,26 @@ trip_point_temp_store(struct device *dev, struct device_attribute *attr,
mutex_lock(&tz->lock);
- if (temp != trip->temperature) {
- if (tz->ops.set_trip_temp) {
- ret = tz->ops.set_trip_temp(tz, trip, temp);
- if (ret)
- goto unlock;
- }
+ if (temp == trip->temperature)
+ goto unlock;
- thermal_zone_set_trip_temp(tz, trip, temp);
-
- __thermal_zone_device_update(tz, THERMAL_TRIP_CHANGED);
+ /* Arrange the condition to avoid integer overflows. */
+ if (temp != THERMAL_TEMP_INVALID &&
+ temp <= trip->hysteresis + THERMAL_TEMP_INVALID) {
+ ret = -EINVAL;
+ goto unlock;
}
+ if (tz->ops.set_trip_temp) {
+ ret = tz->ops.set_trip_temp(tz, trip, temp);
+ if (ret)
+ goto unlock;
+ }
+
+ thermal_zone_set_trip_temp(tz, trip, temp);
+
+ __thermal_zone_device_update(tz, THERMAL_TRIP_CHANGED);
+
unlock:
mutex_unlock(&tz->lock);
@@ -152,15 +160,33 @@ trip_point_hyst_store(struct device *dev, struct device_attribute *attr,
mutex_lock(&tz->lock);
- if (hyst != trip->hysteresis) {
- thermal_zone_set_trip_hyst(tz, trip, hyst);
+ if (hyst == trip->hysteresis)
+ goto unlock;
- __thermal_zone_device_update(tz, THERMAL_TRIP_CHANGED);
+ /*
+ * Allow the hysteresis to be updated when the temperature is invalid
+ * to allow user space to avoid having to adjust hysteresis after a
+ * valid temperature has been set, but in that case just change the
+ * value and do nothing else.
+ */
+ if (trip->temperature == THERMAL_TEMP_INVALID) {
+ WRITE_ONCE(trip->hysteresis, hyst);
+ goto unlock;
}
+ if (trip->temperature - hyst <= THERMAL_TEMP_INVALID) {
+ ret = -EINVAL;
+ goto unlock;
+ }
+
+ thermal_zone_set_trip_hyst(tz, trip, hyst);
+
+ __thermal_zone_device_update(tz, THERMAL_TRIP_CHANGED);
+
+unlock:
mutex_unlock(&tz->lock);
- return count;
+ return ret ? ret : count;
}
static ssize_t
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 477d81a1c47a1b79b9c08fc92b5dea3c5143800b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100139-outfit-agenda-4b7e@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
477d81a1c47a ("x86/entry: Remove unwanted instrumentation in common_interrupt()")
90f357208200 ("x86/idtentry: Incorporate definitions/declarations of the FRED entries")
5b51e1db9bdc ("x86/entry: Convert device interrupts to inline stack switching")
569dd8b4eb7e ("x86/entry: Convert system vectors to irq stack macro")
a0cfc74d0b00 ("x86/irq: Provide macro for inlining irq stack switching")
951c2a51ae75 ("x86/irq/64: Adjust the per CPU irq stack pointer by 8")
e7f890017971 ("x86/irq: Sanitize irq stack tracking")
b6be002bcd1d ("x86/entry: Move nmi entry/exit into common code")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 477d81a1c47a1b79b9c08fc92b5dea3c5143800b Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov(a)google.com>
Date: Tue, 11 Jun 2024 09:50:30 +0200
Subject: [PATCH] x86/entry: Remove unwanted instrumentation in
common_interrupt()
common_interrupt() and related variants call kvm_set_cpu_l1tf_flush_l1d(),
which is neither marked noinstr nor __always_inline.
So compiler puts it out of line and adds instrumentation to it. Since the
call is inside of instrumentation_begin/end(), objtool does not warn about
it.
The manifestation is that KCOV produces spurious coverage in
kvm_set_cpu_l1tf_flush_l1d() in random places because the call happens when
preempt count is not yet updated to say that the kernel is in an interrupt.
Mark kvm_set_cpu_l1tf_flush_l1d() as __always_inline and move it out of the
instrumentation_begin/end() section. It only calls __this_cpu_write()
which is already safe to call in noinstr contexts.
Fixes: 6368558c3710 ("x86/entry: Provide IDTENTRY_SYSVEC")
Signed-off-by: Dmitry Vyukov <dvyukov(a)google.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Alexander Potapenko <glider(a)google.com>
Acked-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/3f9a1de9e415fcb53d07dc9e19fa8481bb021b1b.171809…
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index c67fa6ad098a..6ffa8b75f4cd 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -69,7 +69,11 @@ extern u64 arch_irq_stat(void);
#define local_softirq_pending_ref pcpu_hot.softirq_pending
#if IS_ENABLED(CONFIG_KVM_INTEL)
-static inline void kvm_set_cpu_l1tf_flush_l1d(void)
+/*
+ * This function is called from noinstr interrupt contexts
+ * and must be inlined to not get instrumentation.
+ */
+static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void)
{
__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
}
@@ -84,7 +88,7 @@ static __always_inline bool kvm_get_cpu_l1tf_flush_l1d(void)
return __this_cpu_read(irq_stat.kvm_cpu_l1tf_flush_l1d);
}
#else /* !IS_ENABLED(CONFIG_KVM_INTEL) */
-static inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
+static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
#endif /* IS_ENABLED(CONFIG_KVM_INTEL) */
#endif /* _ASM_X86_HARDIRQ_H */
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index d4f24499b256..ad5c68f0509d 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -212,8 +212,8 @@ __visible noinstr void func(struct pt_regs *regs, \
irqentry_state_t state = irqentry_enter(regs); \
u32 vector = (u32)(u8)error_code; \
\
+ kvm_set_cpu_l1tf_flush_l1d(); \
instrumentation_begin(); \
- kvm_set_cpu_l1tf_flush_l1d(); \
run_irq_on_irqstack_cond(__##func, regs, vector); \
instrumentation_end(); \
irqentry_exit(regs, state); \
@@ -250,7 +250,6 @@ static void __##func(struct pt_regs *regs); \
\
static __always_inline void instr_##func(struct pt_regs *regs) \
{ \
- kvm_set_cpu_l1tf_flush_l1d(); \
run_sysvec_on_irqstack_cond(__##func, regs); \
} \
\
@@ -258,6 +257,7 @@ __visible noinstr void func(struct pt_regs *regs) \
{ \
irqentry_state_t state = irqentry_enter(regs); \
\
+ kvm_set_cpu_l1tf_flush_l1d(); \
instrumentation_begin(); \
instr_##func (regs); \
instrumentation_end(); \
@@ -288,7 +288,6 @@ static __always_inline void __##func(struct pt_regs *regs); \
static __always_inline void instr_##func(struct pt_regs *regs) \
{ \
__irq_enter_raw(); \
- kvm_set_cpu_l1tf_flush_l1d(); \
__##func (regs); \
__irq_exit_raw(); \
} \
@@ -297,6 +296,7 @@ __visible noinstr void func(struct pt_regs *regs) \
{ \
irqentry_state_t state = irqentry_enter(regs); \
\
+ kvm_set_cpu_l1tf_flush_l1d(); \
instrumentation_begin(); \
instr_##func (regs); \
instrumentation_end(); \
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 477d81a1c47a1b79b9c08fc92b5dea3c5143800b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100138-marital-stumbling-53bc@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
477d81a1c47a ("x86/entry: Remove unwanted instrumentation in common_interrupt()")
90f357208200 ("x86/idtentry: Incorporate definitions/declarations of the FRED entries")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 477d81a1c47a1b79b9c08fc92b5dea3c5143800b Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov(a)google.com>
Date: Tue, 11 Jun 2024 09:50:30 +0200
Subject: [PATCH] x86/entry: Remove unwanted instrumentation in
common_interrupt()
common_interrupt() and related variants call kvm_set_cpu_l1tf_flush_l1d(),
which is neither marked noinstr nor __always_inline.
So compiler puts it out of line and adds instrumentation to it. Since the
call is inside of instrumentation_begin/end(), objtool does not warn about
it.
The manifestation is that KCOV produces spurious coverage in
kvm_set_cpu_l1tf_flush_l1d() in random places because the call happens when
preempt count is not yet updated to say that the kernel is in an interrupt.
Mark kvm_set_cpu_l1tf_flush_l1d() as __always_inline and move it out of the
instrumentation_begin/end() section. It only calls __this_cpu_write()
which is already safe to call in noinstr contexts.
Fixes: 6368558c3710 ("x86/entry: Provide IDTENTRY_SYSVEC")
Signed-off-by: Dmitry Vyukov <dvyukov(a)google.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Alexander Potapenko <glider(a)google.com>
Acked-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/3f9a1de9e415fcb53d07dc9e19fa8481bb021b1b.171809…
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index c67fa6ad098a..6ffa8b75f4cd 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -69,7 +69,11 @@ extern u64 arch_irq_stat(void);
#define local_softirq_pending_ref pcpu_hot.softirq_pending
#if IS_ENABLED(CONFIG_KVM_INTEL)
-static inline void kvm_set_cpu_l1tf_flush_l1d(void)
+/*
+ * This function is called from noinstr interrupt contexts
+ * and must be inlined to not get instrumentation.
+ */
+static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void)
{
__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
}
@@ -84,7 +88,7 @@ static __always_inline bool kvm_get_cpu_l1tf_flush_l1d(void)
return __this_cpu_read(irq_stat.kvm_cpu_l1tf_flush_l1d);
}
#else /* !IS_ENABLED(CONFIG_KVM_INTEL) */
-static inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
+static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
#endif /* IS_ENABLED(CONFIG_KVM_INTEL) */
#endif /* _ASM_X86_HARDIRQ_H */
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index d4f24499b256..ad5c68f0509d 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -212,8 +212,8 @@ __visible noinstr void func(struct pt_regs *regs, \
irqentry_state_t state = irqentry_enter(regs); \
u32 vector = (u32)(u8)error_code; \
\
+ kvm_set_cpu_l1tf_flush_l1d(); \
instrumentation_begin(); \
- kvm_set_cpu_l1tf_flush_l1d(); \
run_irq_on_irqstack_cond(__##func, regs, vector); \
instrumentation_end(); \
irqentry_exit(regs, state); \
@@ -250,7 +250,6 @@ static void __##func(struct pt_regs *regs); \
\
static __always_inline void instr_##func(struct pt_regs *regs) \
{ \
- kvm_set_cpu_l1tf_flush_l1d(); \
run_sysvec_on_irqstack_cond(__##func, regs); \
} \
\
@@ -258,6 +257,7 @@ __visible noinstr void func(struct pt_regs *regs) \
{ \
irqentry_state_t state = irqentry_enter(regs); \
\
+ kvm_set_cpu_l1tf_flush_l1d(); \
instrumentation_begin(); \
instr_##func (regs); \
instrumentation_end(); \
@@ -288,7 +288,6 @@ static __always_inline void __##func(struct pt_regs *regs); \
static __always_inline void instr_##func(struct pt_regs *regs) \
{ \
__irq_enter_raw(); \
- kvm_set_cpu_l1tf_flush_l1d(); \
__##func (regs); \
__irq_exit_raw(); \
} \
@@ -297,6 +296,7 @@ __visible noinstr void func(struct pt_regs *regs) \
{ \
irqentry_state_t state = irqentry_enter(regs); \
\
+ kvm_set_cpu_l1tf_flush_l1d(); \
instrumentation_begin(); \
instr_##func (regs); \
instrumentation_end(); \
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 477d81a1c47a1b79b9c08fc92b5dea3c5143800b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100137-overdraft-spongy-2e3f@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
477d81a1c47a ("x86/entry: Remove unwanted instrumentation in common_interrupt()")
90f357208200 ("x86/idtentry: Incorporate definitions/declarations of the FRED entries")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 477d81a1c47a1b79b9c08fc92b5dea3c5143800b Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov(a)google.com>
Date: Tue, 11 Jun 2024 09:50:30 +0200
Subject: [PATCH] x86/entry: Remove unwanted instrumentation in
common_interrupt()
common_interrupt() and related variants call kvm_set_cpu_l1tf_flush_l1d(),
which is neither marked noinstr nor __always_inline.
So compiler puts it out of line and adds instrumentation to it. Since the
call is inside of instrumentation_begin/end(), objtool does not warn about
it.
The manifestation is that KCOV produces spurious coverage in
kvm_set_cpu_l1tf_flush_l1d() in random places because the call happens when
preempt count is not yet updated to say that the kernel is in an interrupt.
Mark kvm_set_cpu_l1tf_flush_l1d() as __always_inline and move it out of the
instrumentation_begin/end() section. It only calls __this_cpu_write()
which is already safe to call in noinstr contexts.
Fixes: 6368558c3710 ("x86/entry: Provide IDTENTRY_SYSVEC")
Signed-off-by: Dmitry Vyukov <dvyukov(a)google.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Alexander Potapenko <glider(a)google.com>
Acked-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/3f9a1de9e415fcb53d07dc9e19fa8481bb021b1b.171809…
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index c67fa6ad098a..6ffa8b75f4cd 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -69,7 +69,11 @@ extern u64 arch_irq_stat(void);
#define local_softirq_pending_ref pcpu_hot.softirq_pending
#if IS_ENABLED(CONFIG_KVM_INTEL)
-static inline void kvm_set_cpu_l1tf_flush_l1d(void)
+/*
+ * This function is called from noinstr interrupt contexts
+ * and must be inlined to not get instrumentation.
+ */
+static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void)
{
__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
}
@@ -84,7 +88,7 @@ static __always_inline bool kvm_get_cpu_l1tf_flush_l1d(void)
return __this_cpu_read(irq_stat.kvm_cpu_l1tf_flush_l1d);
}
#else /* !IS_ENABLED(CONFIG_KVM_INTEL) */
-static inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
+static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
#endif /* IS_ENABLED(CONFIG_KVM_INTEL) */
#endif /* _ASM_X86_HARDIRQ_H */
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index d4f24499b256..ad5c68f0509d 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -212,8 +212,8 @@ __visible noinstr void func(struct pt_regs *regs, \
irqentry_state_t state = irqentry_enter(regs); \
u32 vector = (u32)(u8)error_code; \
\
+ kvm_set_cpu_l1tf_flush_l1d(); \
instrumentation_begin(); \
- kvm_set_cpu_l1tf_flush_l1d(); \
run_irq_on_irqstack_cond(__##func, regs, vector); \
instrumentation_end(); \
irqentry_exit(regs, state); \
@@ -250,7 +250,6 @@ static void __##func(struct pt_regs *regs); \
\
static __always_inline void instr_##func(struct pt_regs *regs) \
{ \
- kvm_set_cpu_l1tf_flush_l1d(); \
run_sysvec_on_irqstack_cond(__##func, regs); \
} \
\
@@ -258,6 +257,7 @@ __visible noinstr void func(struct pt_regs *regs) \
{ \
irqentry_state_t state = irqentry_enter(regs); \
\
+ kvm_set_cpu_l1tf_flush_l1d(); \
instrumentation_begin(); \
instr_##func (regs); \
instrumentation_end(); \
@@ -288,7 +288,6 @@ static __always_inline void __##func(struct pt_regs *regs); \
static __always_inline void instr_##func(struct pt_regs *regs) \
{ \
__irq_enter_raw(); \
- kvm_set_cpu_l1tf_flush_l1d(); \
__##func (regs); \
__irq_exit_raw(); \
} \
@@ -297,6 +296,7 @@ __visible noinstr void func(struct pt_regs *regs) \
{ \
irqentry_state_t state = irqentry_enter(regs); \
\
+ kvm_set_cpu_l1tf_flush_l1d(); \
instrumentation_begin(); \
instr_##func (regs); \
instrumentation_end(); \
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 477d81a1c47a1b79b9c08fc92b5dea3c5143800b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100136-phrase-smashing-4e41@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
477d81a1c47a ("x86/entry: Remove unwanted instrumentation in common_interrupt()")
90f357208200 ("x86/idtentry: Incorporate definitions/declarations of the FRED entries")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 477d81a1c47a1b79b9c08fc92b5dea3c5143800b Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov(a)google.com>
Date: Tue, 11 Jun 2024 09:50:30 +0200
Subject: [PATCH] x86/entry: Remove unwanted instrumentation in
common_interrupt()
common_interrupt() and related variants call kvm_set_cpu_l1tf_flush_l1d(),
which is neither marked noinstr nor __always_inline.
So compiler puts it out of line and adds instrumentation to it. Since the
call is inside of instrumentation_begin/end(), objtool does not warn about
it.
The manifestation is that KCOV produces spurious coverage in
kvm_set_cpu_l1tf_flush_l1d() in random places because the call happens when
preempt count is not yet updated to say that the kernel is in an interrupt.
Mark kvm_set_cpu_l1tf_flush_l1d() as __always_inline and move it out of the
instrumentation_begin/end() section. It only calls __this_cpu_write()
which is already safe to call in noinstr contexts.
Fixes: 6368558c3710 ("x86/entry: Provide IDTENTRY_SYSVEC")
Signed-off-by: Dmitry Vyukov <dvyukov(a)google.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Alexander Potapenko <glider(a)google.com>
Acked-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/3f9a1de9e415fcb53d07dc9e19fa8481bb021b1b.171809…
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index c67fa6ad098a..6ffa8b75f4cd 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -69,7 +69,11 @@ extern u64 arch_irq_stat(void);
#define local_softirq_pending_ref pcpu_hot.softirq_pending
#if IS_ENABLED(CONFIG_KVM_INTEL)
-static inline void kvm_set_cpu_l1tf_flush_l1d(void)
+/*
+ * This function is called from noinstr interrupt contexts
+ * and must be inlined to not get instrumentation.
+ */
+static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void)
{
__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
}
@@ -84,7 +88,7 @@ static __always_inline bool kvm_get_cpu_l1tf_flush_l1d(void)
return __this_cpu_read(irq_stat.kvm_cpu_l1tf_flush_l1d);
}
#else /* !IS_ENABLED(CONFIG_KVM_INTEL) */
-static inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
+static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
#endif /* IS_ENABLED(CONFIG_KVM_INTEL) */
#endif /* _ASM_X86_HARDIRQ_H */
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index d4f24499b256..ad5c68f0509d 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -212,8 +212,8 @@ __visible noinstr void func(struct pt_regs *regs, \
irqentry_state_t state = irqentry_enter(regs); \
u32 vector = (u32)(u8)error_code; \
\
+ kvm_set_cpu_l1tf_flush_l1d(); \
instrumentation_begin(); \
- kvm_set_cpu_l1tf_flush_l1d(); \
run_irq_on_irqstack_cond(__##func, regs, vector); \
instrumentation_end(); \
irqentry_exit(regs, state); \
@@ -250,7 +250,6 @@ static void __##func(struct pt_regs *regs); \
\
static __always_inline void instr_##func(struct pt_regs *regs) \
{ \
- kvm_set_cpu_l1tf_flush_l1d(); \
run_sysvec_on_irqstack_cond(__##func, regs); \
} \
\
@@ -258,6 +257,7 @@ __visible noinstr void func(struct pt_regs *regs) \
{ \
irqentry_state_t state = irqentry_enter(regs); \
\
+ kvm_set_cpu_l1tf_flush_l1d(); \
instrumentation_begin(); \
instr_##func (regs); \
instrumentation_end(); \
@@ -288,7 +288,6 @@ static __always_inline void __##func(struct pt_regs *regs); \
static __always_inline void instr_##func(struct pt_regs *regs) \
{ \
__irq_enter_raw(); \
- kvm_set_cpu_l1tf_flush_l1d(); \
__##func (regs); \
__irq_exit_raw(); \
} \
@@ -297,6 +296,7 @@ __visible noinstr void func(struct pt_regs *regs) \
{ \
irqentry_state_t state = irqentry_enter(regs); \
\
+ kvm_set_cpu_l1tf_flush_l1d(); \
instrumentation_begin(); \
instr_##func (regs); \
instrumentation_end(); \
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x b53f09ecd602d7b8b7da83b0890cbac500b6a9b9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100101-huskiness-tray-09c8@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
b53f09ecd602 ("ACPI: resource: Do IRQ override on MECHREV GM7XG0M")
6eaf375a5a98 ("ACPI: resource: Do IRQ override on GMxBGxx (XMG APEX 17 M23)")
424009ab2030 ("ACPI: resource: Drop .ident values from dmi_system_id tables")
d37273af0e42 ("ACPI: resource: Consolidate IRQ trigger-type override DMI tables")
c1ed72171ed5 ("ACPI: resource: Skip IRQ override on ASUS ExpertBook B1402CBA")
453b014e2c29 ("ACPI: resource: Fix IRQ override quirk for PCSpecialist Elimina Pro 16 M")
56fec0051a69 ("ACPI: resource: Add IRQ override quirk for PCSpecialist Elimina Pro 16 M")
9728ac221160 ("ACPI: resource: Always use MADT override IRQ settings for all legacy non i8042 IRQs")
2d331a6ac481 ("ACPI: resource: revert "Remove "Zen" specific match and quirks"")
a9c4a912b7dc ("ACPI: resource: Remove "Zen" specific match and quirks")
71a485624c4c ("ACPI: resource: Add IRQ override quirk for LG UltraPC 17U70P")
05cda427126f ("ACPI: resource: Skip IRQ override on ASUS ExpertBook B1502CBA")
2d0ab14634a2 ("ACPI: resource: Add Medion S17413 to IRQ override quirk")
65eb2867f5bf ("ACPI: resource: Skip IRQ override on Asus Expertbook B2402FBA")
17bb7046e7ce ("ACPI: resource: Do IRQ override on all TongFang GMxRGxx")
cb18703c1797 ("ACPI: resource: Add IRQ overrides for MAINGEAR Vector Pro 2 models")
77c724888238 ("ACPI: resource: Skip IRQ override on Asus Expertbook B2402CBA")
7203481fd12b ("ACPI: resource: Add Asus ExpertBook B2502 to Asus quirks")
f3cb9b740869 ("ACPI: resource: do IRQ override on Lenovo 14ALC7")
7592b79ba4a9 ("ACPI: resource: do IRQ override on XMG Core 15")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b53f09ecd602d7b8b7da83b0890cbac500b6a9b9 Mon Sep 17 00:00:00 2001
From: Li Chen <me(a)linux.beauty>
Date: Sat, 3 Aug 2024 16:13:18 +0800
Subject: [PATCH] ACPI: resource: Do IRQ override on MECHREV GM7XG0M
Listed device need the override for the keyboard to work.
Fixes: 9946e39fe8d0 ("ACPI: resource: skip IRQ override on AMD Zen platforms")
Cc: All applicable <stable(a)vger.kernel.org>
Signed-off-by: Li Chen <me(a)linux.beauty>
Link: https://patch.msgid.link/87y15e6n35.wl-me@linux.beauty
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index df5d5a554b38..aa9990507f34 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -554,6 +554,12 @@ static const struct dmi_system_id irq1_level_low_skip_override[] = {
* to have a working keyboard.
*/
static const struct dmi_system_id irq1_edge_low_force_override[] = {
+ {
+ /* MECHREV Jiaolong17KS Series GM7XG0M */
+ .matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "GM7XG0M"),
+ },
+ },
{
/* XMG APEX 17 (M23) */
.matches = {
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x b53f09ecd602d7b8b7da83b0890cbac500b6a9b9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100101-area-scarce-20d5@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
b53f09ecd602 ("ACPI: resource: Do IRQ override on MECHREV GM7XG0M")
6eaf375a5a98 ("ACPI: resource: Do IRQ override on GMxBGxx (XMG APEX 17 M23)")
424009ab2030 ("ACPI: resource: Drop .ident values from dmi_system_id tables")
d37273af0e42 ("ACPI: resource: Consolidate IRQ trigger-type override DMI tables")
c1ed72171ed5 ("ACPI: resource: Skip IRQ override on ASUS ExpertBook B1402CBA")
453b014e2c29 ("ACPI: resource: Fix IRQ override quirk for PCSpecialist Elimina Pro 16 M")
56fec0051a69 ("ACPI: resource: Add IRQ override quirk for PCSpecialist Elimina Pro 16 M")
9728ac221160 ("ACPI: resource: Always use MADT override IRQ settings for all legacy non i8042 IRQs")
2d331a6ac481 ("ACPI: resource: revert "Remove "Zen" specific match and quirks"")
a9c4a912b7dc ("ACPI: resource: Remove "Zen" specific match and quirks")
71a485624c4c ("ACPI: resource: Add IRQ override quirk for LG UltraPC 17U70P")
05cda427126f ("ACPI: resource: Skip IRQ override on ASUS ExpertBook B1502CBA")
2d0ab14634a2 ("ACPI: resource: Add Medion S17413 to IRQ override quirk")
65eb2867f5bf ("ACPI: resource: Skip IRQ override on Asus Expertbook B2402FBA")
17bb7046e7ce ("ACPI: resource: Do IRQ override on all TongFang GMxRGxx")
cb18703c1797 ("ACPI: resource: Add IRQ overrides for MAINGEAR Vector Pro 2 models")
77c724888238 ("ACPI: resource: Skip IRQ override on Asus Expertbook B2402CBA")
7203481fd12b ("ACPI: resource: Add Asus ExpertBook B2502 to Asus quirks")
f3cb9b740869 ("ACPI: resource: do IRQ override on Lenovo 14ALC7")
7592b79ba4a9 ("ACPI: resource: do IRQ override on XMG Core 15")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b53f09ecd602d7b8b7da83b0890cbac500b6a9b9 Mon Sep 17 00:00:00 2001
From: Li Chen <me(a)linux.beauty>
Date: Sat, 3 Aug 2024 16:13:18 +0800
Subject: [PATCH] ACPI: resource: Do IRQ override on MECHREV GM7XG0M
Listed device need the override for the keyboard to work.
Fixes: 9946e39fe8d0 ("ACPI: resource: skip IRQ override on AMD Zen platforms")
Cc: All applicable <stable(a)vger.kernel.org>
Signed-off-by: Li Chen <me(a)linux.beauty>
Link: https://patch.msgid.link/87y15e6n35.wl-me@linux.beauty
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index df5d5a554b38..aa9990507f34 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -554,6 +554,12 @@ static const struct dmi_system_id irq1_level_low_skip_override[] = {
* to have a working keyboard.
*/
static const struct dmi_system_id irq1_edge_low_force_override[] = {
+ {
+ /* MECHREV Jiaolong17KS Series GM7XG0M */
+ .matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "GM7XG0M"),
+ },
+ },
{
/* XMG APEX 17 (M23) */
.matches = {
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x b53f09ecd602d7b8b7da83b0890cbac500b6a9b9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100100-deflator-mouth-8525@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
b53f09ecd602 ("ACPI: resource: Do IRQ override on MECHREV GM7XG0M")
6eaf375a5a98 ("ACPI: resource: Do IRQ override on GMxBGxx (XMG APEX 17 M23)")
424009ab2030 ("ACPI: resource: Drop .ident values from dmi_system_id tables")
d37273af0e42 ("ACPI: resource: Consolidate IRQ trigger-type override DMI tables")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b53f09ecd602d7b8b7da83b0890cbac500b6a9b9 Mon Sep 17 00:00:00 2001
From: Li Chen <me(a)linux.beauty>
Date: Sat, 3 Aug 2024 16:13:18 +0800
Subject: [PATCH] ACPI: resource: Do IRQ override on MECHREV GM7XG0M
Listed device need the override for the keyboard to work.
Fixes: 9946e39fe8d0 ("ACPI: resource: skip IRQ override on AMD Zen platforms")
Cc: All applicable <stable(a)vger.kernel.org>
Signed-off-by: Li Chen <me(a)linux.beauty>
Link: https://patch.msgid.link/87y15e6n35.wl-me@linux.beauty
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index df5d5a554b38..aa9990507f34 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -554,6 +554,12 @@ static const struct dmi_system_id irq1_level_low_skip_override[] = {
* to have a working keyboard.
*/
static const struct dmi_system_id irq1_edge_low_force_override[] = {
+ {
+ /* MECHREV Jiaolong17KS Series GM7XG0M */
+ .matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "GM7XG0M"),
+ },
+ },
{
/* XMG APEX 17 (M23) */
.matches = {
On Mon, Sep 30, 2024 at 08:28:59PM -0400, Sasha Levin wrote:
> This is a note to let you know that I've just added the patch titled
>
> Input: ps2-gpio - use IRQF_NO_AUTOEN flag in request_irq()
>
> to the 5.15-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> input-ps2-gpio-use-irqf_no_autoen-flag-in-request_ir.patch
> and it can be found in the queue-5.15 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
For the love of God, why? Why does this pure cleanup type of change
needs to be in stable?
>
>
>
> commit 2d007ddec282076923c4d84d6b12858b9f44594a
> Author: Jinjie Ruan <ruanjinjie(a)huawei.com>
> Date: Thu Sep 12 11:30:13 2024 +0800
>
> Input: ps2-gpio - use IRQF_NO_AUTOEN flag in request_irq()
>
> [ Upstream commit dcd18a3fb1228409dfc24373c5c6868a655810b0 ]
>
> disable_irq() after request_irq() still has a time gap in which
> interrupts can come. request_irq() with IRQF_NO_AUTOEN flag will
> disable IRQ auto-enable when request IRQ.
>
> Fixes: 9ee0a0558819 ("Input: PS/2 gpio bit banging driver for serio bus")
> Signed-off-by: Jinjie Ruan <ruanjinjie(a)huawei.com>
> Acked-by: Danilo Krummrich <dakr(a)kernel.org>
> Link: https://lore.kernel.org/r/20240912033013.2610949-1-ruanjinjie@huawei.com
> Signed-off-by: Dmitry Torokhov <dmitry.torokhov(a)gmail.com>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/drivers/input/serio/ps2-gpio.c b/drivers/input/serio/ps2-gpio.c
> index 8970b49ea09a2..b0238a8b5c210 100644
> --- a/drivers/input/serio/ps2-gpio.c
> +++ b/drivers/input/serio/ps2-gpio.c
> @@ -374,16 +374,14 @@ static int ps2_gpio_probe(struct platform_device *pdev)
> }
>
> error = devm_request_irq(dev, drvdata->irq, ps2_gpio_irq,
> - IRQF_NO_THREAD, DRIVER_NAME, drvdata);
> + IRQF_NO_THREAD | IRQF_NO_AUTOEN, DRIVER_NAME,
> + drvdata);
> if (error) {
> dev_err(dev, "failed to request irq %d: %d\n",
> drvdata->irq, error);
> goto err_free_serio;
> }
>
> - /* Keep irq disabled until serio->open is called. */
> - disable_irq(drvdata->irq);
> -
> serio->id.type = SERIO_8042;
> serio->open = ps2_gpio_open;
> serio->close = ps2_gpio_close;
--
Dmitry
The patch below does not apply to the 6.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.10.y
git checkout FETCH_HEAD
git cherry-pick -x 63d14d974d3d82d0feeae8c73b055d330051e1b4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100118-preteen-unsuited-4428@gregkh' --subject-prefix 'PATCH 6.10.y' HEAD^..
Possible dependencies:
63d14d974d3d ("serial: qcom-geni: fix polled console corruption")
cc4a0e5754a1 ("serial: qcom-geni: fix console corruption")
f97cdbbf187f ("serial: qcom-geni: fix false console tx restart")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 63d14d974d3d82d0feeae8c73b055d330051e1b4 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Fri, 6 Sep 2024 15:13:36 +0200
Subject: [PATCH] serial: qcom-geni: fix polled console corruption
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The polled UART operations are used by the kernel debugger (KDB, KGDB),
which can interrupt the kernel at any point in time. The current
Qualcomm GENI implementation does not really work when there is on-going
serial output as it inadvertently "hijacks" the current tx command,
which can result in both the initial debugger output being corrupted as
well as the corruption of any on-going serial output (up to 4k
characters) when execution resumes:
0190: abcdefghijklmnopqrstuvwxyz0123456789 0190: abcdefghijklmnopqrstuvwxyz0123456789
0191: abcdefghijklmnop[ 50.825552] sysrq: DEBUG
qrstuvwxyz0123456789 0191: abcdefghijklmnopqrstuvwxyz0123456789
Entering kdb (current=0xffff53510b4cd280, pid 640) on processor 2 due to Keyboard Entry
[2]kdb> go
omlji3h3h2g2g1f1f0e0ezdzdycycxbxbwawav :t72r2rp
o9n976k5j5j4i4i3h3h2g2g1f1f0e0ezdzdycycxbxbwawavu:t7t8s8s8r2r2q0q0p
o9n9n8ml6k6k5j5j4i4i3h3h2g2g1f1f0e0ezdzdycycxbxbwawav v u:u:t9t0s4s4rq0p
o9n9n8m8m7l7l6k6k5j5j40q0p p o
o9n9n8m8m7l7l6k6k5j5j4i4i3h3h2g2g1f1f0e0ezdzdycycxbxbwawav :t8t9s4s4r4r4q0q0p
Fix this by making sure that the polled output implementation waits for
the tx fifo to drain before cancelling any on-going longer transfers. As
the polled code cannot take any locks, leave the state variables as they
are and instead make sure that the interrupt handler always starts a new
tx command when there is data in the write buffer.
Since the debugger can interrupt the interrupt handler when it is
writing data to the tx fifo, it is currently not possible to fully
prevent losing up to 64 bytes of tty output on resume.
Fixes: c4f528795d1a ("tty: serial: msm_geni_serial: Add serial driver support for GENI based QUP")
Cc: stable(a)vger.kernel.org # 4.17
Reviewed-by: Douglas Anderson <dianders(a)chromium.org>
Tested-by: Nícolas F. R. A. Prado <nfraprado(a)collabora.com>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Link: https://lore.kernel.org/r/20240906131336.23625-9-johan+linaro@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index f23fd0ac3cfd..6f0db310cf69 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -145,6 +145,7 @@ static const struct uart_ops qcom_geni_uart_pops;
static struct uart_driver qcom_geni_console_driver;
static struct uart_driver qcom_geni_uart_driver;
+static void __qcom_geni_serial_cancel_tx_cmd(struct uart_port *uport);
static void qcom_geni_serial_cancel_tx_cmd(struct uart_port *uport);
static inline struct qcom_geni_serial_port *to_dev_port(struct uart_port *uport)
@@ -384,13 +385,14 @@ static int qcom_geni_serial_get_char(struct uart_port *uport)
static void qcom_geni_serial_poll_put_char(struct uart_port *uport,
unsigned char c)
{
- writel(DEF_TX_WM, uport->membase + SE_GENI_TX_WATERMARK_REG);
+ if (qcom_geni_serial_main_active(uport)) {
+ qcom_geni_serial_poll_tx_done(uport);
+ __qcom_geni_serial_cancel_tx_cmd(uport);
+ }
+
writel(M_CMD_DONE_EN, uport->membase + SE_GENI_M_IRQ_CLEAR);
qcom_geni_serial_setup_tx(uport, 1);
- WARN_ON(!qcom_geni_serial_poll_bit(uport, SE_GENI_M_IRQ_STATUS,
- M_TX_FIFO_WATERMARK_EN, true));
writel(c, uport->membase + SE_GENI_TX_FIFOn);
- writel(M_TX_FIFO_WATERMARK_EN, uport->membase + SE_GENI_M_IRQ_CLEAR);
qcom_geni_serial_poll_tx_done(uport);
}
#endif
@@ -677,13 +679,10 @@ static void qcom_geni_serial_stop_tx_fifo(struct uart_port *uport)
writel(irq_en, uport->membase + SE_GENI_M_IRQ_EN);
}
-static void qcom_geni_serial_cancel_tx_cmd(struct uart_port *uport)
+static void __qcom_geni_serial_cancel_tx_cmd(struct uart_port *uport)
{
struct qcom_geni_serial_port *port = to_dev_port(uport);
- if (!qcom_geni_serial_main_active(uport))
- return;
-
geni_se_cancel_m_cmd(&port->se);
if (!qcom_geni_serial_poll_bit(uport, SE_GENI_M_IRQ_STATUS,
M_CMD_CANCEL_EN, true)) {
@@ -693,6 +692,16 @@ static void qcom_geni_serial_cancel_tx_cmd(struct uart_port *uport)
writel(M_CMD_ABORT_EN, uport->membase + SE_GENI_M_IRQ_CLEAR);
}
writel(M_CMD_CANCEL_EN, uport->membase + SE_GENI_M_IRQ_CLEAR);
+}
+
+static void qcom_geni_serial_cancel_tx_cmd(struct uart_port *uport)
+{
+ struct qcom_geni_serial_port *port = to_dev_port(uport);
+
+ if (!qcom_geni_serial_main_active(uport))
+ return;
+
+ __qcom_geni_serial_cancel_tx_cmd(uport);
port->tx_remaining = 0;
port->tx_queued = 0;
@@ -919,7 +928,7 @@ static void qcom_geni_serial_handle_tx_fifo(struct uart_port *uport,
if (!chunk)
goto out_write_wakeup;
- if (!port->tx_remaining) {
+ if (!active) {
qcom_geni_serial_setup_tx(uport, pending);
port->tx_remaining = pending;
port->tx_queued = 0;
The patch below does not apply to the 6.11-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.11.y
git checkout FETCH_HEAD
git cherry-pick -x 63d14d974d3d82d0feeae8c73b055d330051e1b4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100117-cannabis-glory-6c7d@gregkh' --subject-prefix 'PATCH 6.11.y' HEAD^..
Possible dependencies:
63d14d974d3d ("serial: qcom-geni: fix polled console corruption")
cc4a0e5754a1 ("serial: qcom-geni: fix console corruption")
f97cdbbf187f ("serial: qcom-geni: fix false console tx restart")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 63d14d974d3d82d0feeae8c73b055d330051e1b4 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Fri, 6 Sep 2024 15:13:36 +0200
Subject: [PATCH] serial: qcom-geni: fix polled console corruption
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The polled UART operations are used by the kernel debugger (KDB, KGDB),
which can interrupt the kernel at any point in time. The current
Qualcomm GENI implementation does not really work when there is on-going
serial output as it inadvertently "hijacks" the current tx command,
which can result in both the initial debugger output being corrupted as
well as the corruption of any on-going serial output (up to 4k
characters) when execution resumes:
0190: abcdefghijklmnopqrstuvwxyz0123456789 0190: abcdefghijklmnopqrstuvwxyz0123456789
0191: abcdefghijklmnop[ 50.825552] sysrq: DEBUG
qrstuvwxyz0123456789 0191: abcdefghijklmnopqrstuvwxyz0123456789
Entering kdb (current=0xffff53510b4cd280, pid 640) on processor 2 due to Keyboard Entry
[2]kdb> go
omlji3h3h2g2g1f1f0e0ezdzdycycxbxbwawav :t72r2rp
o9n976k5j5j4i4i3h3h2g2g1f1f0e0ezdzdycycxbxbwawavu:t7t8s8s8r2r2q0q0p
o9n9n8ml6k6k5j5j4i4i3h3h2g2g1f1f0e0ezdzdycycxbxbwawav v u:u:t9t0s4s4rq0p
o9n9n8m8m7l7l6k6k5j5j40q0p p o
o9n9n8m8m7l7l6k6k5j5j4i4i3h3h2g2g1f1f0e0ezdzdycycxbxbwawav :t8t9s4s4r4r4q0q0p
Fix this by making sure that the polled output implementation waits for
the tx fifo to drain before cancelling any on-going longer transfers. As
the polled code cannot take any locks, leave the state variables as they
are and instead make sure that the interrupt handler always starts a new
tx command when there is data in the write buffer.
Since the debugger can interrupt the interrupt handler when it is
writing data to the tx fifo, it is currently not possible to fully
prevent losing up to 64 bytes of tty output on resume.
Fixes: c4f528795d1a ("tty: serial: msm_geni_serial: Add serial driver support for GENI based QUP")
Cc: stable(a)vger.kernel.org # 4.17
Reviewed-by: Douglas Anderson <dianders(a)chromium.org>
Tested-by: Nícolas F. R. A. Prado <nfraprado(a)collabora.com>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Link: https://lore.kernel.org/r/20240906131336.23625-9-johan+linaro@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index f23fd0ac3cfd..6f0db310cf69 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -145,6 +145,7 @@ static const struct uart_ops qcom_geni_uart_pops;
static struct uart_driver qcom_geni_console_driver;
static struct uart_driver qcom_geni_uart_driver;
+static void __qcom_geni_serial_cancel_tx_cmd(struct uart_port *uport);
static void qcom_geni_serial_cancel_tx_cmd(struct uart_port *uport);
static inline struct qcom_geni_serial_port *to_dev_port(struct uart_port *uport)
@@ -384,13 +385,14 @@ static int qcom_geni_serial_get_char(struct uart_port *uport)
static void qcom_geni_serial_poll_put_char(struct uart_port *uport,
unsigned char c)
{
- writel(DEF_TX_WM, uport->membase + SE_GENI_TX_WATERMARK_REG);
+ if (qcom_geni_serial_main_active(uport)) {
+ qcom_geni_serial_poll_tx_done(uport);
+ __qcom_geni_serial_cancel_tx_cmd(uport);
+ }
+
writel(M_CMD_DONE_EN, uport->membase + SE_GENI_M_IRQ_CLEAR);
qcom_geni_serial_setup_tx(uport, 1);
- WARN_ON(!qcom_geni_serial_poll_bit(uport, SE_GENI_M_IRQ_STATUS,
- M_TX_FIFO_WATERMARK_EN, true));
writel(c, uport->membase + SE_GENI_TX_FIFOn);
- writel(M_TX_FIFO_WATERMARK_EN, uport->membase + SE_GENI_M_IRQ_CLEAR);
qcom_geni_serial_poll_tx_done(uport);
}
#endif
@@ -677,13 +679,10 @@ static void qcom_geni_serial_stop_tx_fifo(struct uart_port *uport)
writel(irq_en, uport->membase + SE_GENI_M_IRQ_EN);
}
-static void qcom_geni_serial_cancel_tx_cmd(struct uart_port *uport)
+static void __qcom_geni_serial_cancel_tx_cmd(struct uart_port *uport)
{
struct qcom_geni_serial_port *port = to_dev_port(uport);
- if (!qcom_geni_serial_main_active(uport))
- return;
-
geni_se_cancel_m_cmd(&port->se);
if (!qcom_geni_serial_poll_bit(uport, SE_GENI_M_IRQ_STATUS,
M_CMD_CANCEL_EN, true)) {
@@ -693,6 +692,16 @@ static void qcom_geni_serial_cancel_tx_cmd(struct uart_port *uport)
writel(M_CMD_ABORT_EN, uport->membase + SE_GENI_M_IRQ_CLEAR);
}
writel(M_CMD_CANCEL_EN, uport->membase + SE_GENI_M_IRQ_CLEAR);
+}
+
+static void qcom_geni_serial_cancel_tx_cmd(struct uart_port *uport)
+{
+ struct qcom_geni_serial_port *port = to_dev_port(uport);
+
+ if (!qcom_geni_serial_main_active(uport))
+ return;
+
+ __qcom_geni_serial_cancel_tx_cmd(uport);
port->tx_remaining = 0;
port->tx_queued = 0;
@@ -919,7 +928,7 @@ static void qcom_geni_serial_handle_tx_fifo(struct uart_port *uport,
if (!chunk)
goto out_write_wakeup;
- if (!port->tx_remaining) {
+ if (!active) {
qcom_geni_serial_setup_tx(uport, pending);
port->tx_remaining = pending;
port->tx_queued = 0;
The patch below does not apply to the 6.11-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.11.y
git checkout FETCH_HEAD
git cherry-pick -x cc4a0e5754a16bbc1e215c091349a7c83a2c5e14
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100139-sterilize-freckles-6bc2@gregkh' --subject-prefix 'PATCH 6.11.y' HEAD^..
Possible dependencies:
cc4a0e5754a1 ("serial: qcom-geni: fix console corruption")
f97cdbbf187f ("serial: qcom-geni: fix false console tx restart")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From cc4a0e5754a16bbc1e215c091349a7c83a2c5e14 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Fri, 6 Sep 2024 15:13:34 +0200
Subject: [PATCH] serial: qcom-geni: fix console corruption
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The Qualcomm serial console implementation is broken and can lose
characters when the serial port is also used for tty output.
Specifically, the console code only waits for the current tx command to
complete when all data has already been written to the fifo. When there
are on-going longer transfers this often means that console output is
lost when the console code inadvertently "hijacks" the current tx
command instead of starting a new one.
This can, for example, be observed during boot when console output that
should have been interspersed with init output is truncated:
[ 9.462317] qcom-snps-eusb2-hsphy fde000.phy: Registered Qcom-eUSB2 phy
[ OK ] Found device KBG50ZNS256G KIOXIA Wi[ 9.471743ndows.
[ 9.539915] xhci-hcd xhci-hcd.0.auto: xHCI Host Controller
Add a new state variable to track how much data has been written to the
fifo and use it to determine when the fifo and shift register are both
empty. This is needed since there is currently no other known way to
determine when the shift register is empty.
This in turn allows the console code to interrupt long transfers without
losing data.
Note that the oops-in-progress case is similarly broken as it does not
cancel any active command and also waits for the wrong status flag when
attempting to drain the fifo (TX_FIFO_NOT_EMPTY_EN is only set when
cancelling a command leaves data in the fifo).
Fixes: c4f528795d1a ("tty: serial: msm_geni_serial: Add serial driver support for GENI based QUP")
Fixes: a1fee899e5be ("tty: serial: qcom_geni_serial: Fix softlock")
Fixes: 9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get the port lock")
Cc: stable(a)vger.kernel.org # 4.17
Reviewed-by: Douglas Anderson <dianders(a)chromium.org>
Tested-by: Nícolas F. R. A. Prado <nfraprado(a)collabora.com>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Link: https://lore.kernel.org/r/20240906131336.23625-7-johan+linaro@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index 7bbd70c30620..f8f6e9466b40 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -131,6 +131,7 @@ struct qcom_geni_serial_port {
bool brk;
unsigned int tx_remaining;
+ unsigned int tx_queued;
int wakeup_irq;
bool rx_tx_swap;
bool cts_rts_swap;
@@ -144,6 +145,8 @@ static const struct uart_ops qcom_geni_uart_pops;
static struct uart_driver qcom_geni_console_driver;
static struct uart_driver qcom_geni_uart_driver;
+static void qcom_geni_serial_cancel_tx_cmd(struct uart_port *uport);
+
static inline struct qcom_geni_serial_port *to_dev_port(struct uart_port *uport)
{
return container_of(uport, struct qcom_geni_serial_port, uport);
@@ -393,6 +396,14 @@ static void qcom_geni_serial_poll_put_char(struct uart_port *uport,
#endif
#ifdef CONFIG_SERIAL_QCOM_GENI_CONSOLE
+static void qcom_geni_serial_drain_fifo(struct uart_port *uport)
+{
+ struct qcom_geni_serial_port *port = to_dev_port(uport);
+
+ qcom_geni_serial_poll_bitfield(uport, SE_GENI_M_GP_LENGTH, GP_LENGTH,
+ port->tx_queued);
+}
+
static void qcom_geni_serial_wr_char(struct uart_port *uport, unsigned char ch)
{
struct qcom_geni_private_data *private_data = uport->private_data;
@@ -468,7 +479,6 @@ static void qcom_geni_serial_console_write(struct console *co, const char *s,
struct qcom_geni_serial_port *port;
bool locked = true;
unsigned long flags;
- u32 geni_status;
WARN_ON(co->index < 0 || co->index >= GENI_UART_CONS_PORTS);
@@ -482,34 +492,20 @@ static void qcom_geni_serial_console_write(struct console *co, const char *s,
else
uart_port_lock_irqsave(uport, &flags);
- geni_status = readl(uport->membase + SE_GENI_STATUS);
+ if (qcom_geni_serial_main_active(uport)) {
+ /* Wait for completion or drain FIFO */
+ if (!locked || port->tx_remaining == 0)
+ qcom_geni_serial_poll_tx_done(uport);
+ else
+ qcom_geni_serial_drain_fifo(uport);
- if (!locked) {
- /*
- * We can only get here if an oops is in progress then we were
- * unable to get the lock. This means we can't safely access
- * our state variables like tx_remaining. About the best we
- * can do is wait for the FIFO to be empty before we start our
- * transfer, so we'll do that.
- */
- qcom_geni_serial_poll_bit(uport, SE_GENI_M_IRQ_STATUS,
- M_TX_FIFO_NOT_EMPTY_EN, false);
- } else if ((geni_status & M_GENI_CMD_ACTIVE) && !port->tx_remaining) {
- /*
- * It seems we can't interrupt existing transfers if all data
- * has been sent, in which case we need to look for done first.
- */
- qcom_geni_serial_poll_tx_done(uport);
+ qcom_geni_serial_cancel_tx_cmd(uport);
}
__qcom_geni_serial_console_write(uport, s, count);
-
- if (locked) {
- if (port->tx_remaining)
- qcom_geni_serial_setup_tx(uport, port->tx_remaining);
+ if (locked)
uart_port_unlock_irqrestore(uport, flags);
- }
}
static void handle_rx_console(struct uart_port *uport, u32 bytes, bool drop)
@@ -690,6 +686,7 @@ static void qcom_geni_serial_cancel_tx_cmd(struct uart_port *uport)
writel(M_CMD_CANCEL_EN, uport->membase + SE_GENI_M_IRQ_CLEAR);
port->tx_remaining = 0;
+ port->tx_queued = 0;
}
static void qcom_geni_serial_handle_rx_fifo(struct uart_port *uport, bool drop)
@@ -916,6 +913,7 @@ static void qcom_geni_serial_handle_tx_fifo(struct uart_port *uport,
if (!port->tx_remaining) {
qcom_geni_serial_setup_tx(uport, pending);
port->tx_remaining = pending;
+ port->tx_queued = 0;
irq_en = readl(uport->membase + SE_GENI_M_IRQ_EN);
if (!(irq_en & M_TX_FIFO_WATERMARK_EN))
@@ -924,6 +922,7 @@ static void qcom_geni_serial_handle_tx_fifo(struct uart_port *uport,
}
qcom_geni_serial_send_chunk_fifo(uport, chunk);
+ port->tx_queued += chunk;
/*
* The tx fifo watermark is level triggered and latched. Though we had
The patch below does not apply to the 6.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.10.y
git checkout FETCH_HEAD
git cherry-pick -x cc4a0e5754a16bbc1e215c091349a7c83a2c5e14
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100138-family-anvil-4319@gregkh' --subject-prefix 'PATCH 6.10.y' HEAD^..
Possible dependencies:
cc4a0e5754a1 ("serial: qcom-geni: fix console corruption")
f97cdbbf187f ("serial: qcom-geni: fix false console tx restart")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From cc4a0e5754a16bbc1e215c091349a7c83a2c5e14 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Fri, 6 Sep 2024 15:13:34 +0200
Subject: [PATCH] serial: qcom-geni: fix console corruption
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The Qualcomm serial console implementation is broken and can lose
characters when the serial port is also used for tty output.
Specifically, the console code only waits for the current tx command to
complete when all data has already been written to the fifo. When there
are on-going longer transfers this often means that console output is
lost when the console code inadvertently "hijacks" the current tx
command instead of starting a new one.
This can, for example, be observed during boot when console output that
should have been interspersed with init output is truncated:
[ 9.462317] qcom-snps-eusb2-hsphy fde000.phy: Registered Qcom-eUSB2 phy
[ OK ] Found device KBG50ZNS256G KIOXIA Wi[ 9.471743ndows.
[ 9.539915] xhci-hcd xhci-hcd.0.auto: xHCI Host Controller
Add a new state variable to track how much data has been written to the
fifo and use it to determine when the fifo and shift register are both
empty. This is needed since there is currently no other known way to
determine when the shift register is empty.
This in turn allows the console code to interrupt long transfers without
losing data.
Note that the oops-in-progress case is similarly broken as it does not
cancel any active command and also waits for the wrong status flag when
attempting to drain the fifo (TX_FIFO_NOT_EMPTY_EN is only set when
cancelling a command leaves data in the fifo).
Fixes: c4f528795d1a ("tty: serial: msm_geni_serial: Add serial driver support for GENI based QUP")
Fixes: a1fee899e5be ("tty: serial: qcom_geni_serial: Fix softlock")
Fixes: 9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get the port lock")
Cc: stable(a)vger.kernel.org # 4.17
Reviewed-by: Douglas Anderson <dianders(a)chromium.org>
Tested-by: Nícolas F. R. A. Prado <nfraprado(a)collabora.com>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Link: https://lore.kernel.org/r/20240906131336.23625-7-johan+linaro@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index 7bbd70c30620..f8f6e9466b40 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -131,6 +131,7 @@ struct qcom_geni_serial_port {
bool brk;
unsigned int tx_remaining;
+ unsigned int tx_queued;
int wakeup_irq;
bool rx_tx_swap;
bool cts_rts_swap;
@@ -144,6 +145,8 @@ static const struct uart_ops qcom_geni_uart_pops;
static struct uart_driver qcom_geni_console_driver;
static struct uart_driver qcom_geni_uart_driver;
+static void qcom_geni_serial_cancel_tx_cmd(struct uart_port *uport);
+
static inline struct qcom_geni_serial_port *to_dev_port(struct uart_port *uport)
{
return container_of(uport, struct qcom_geni_serial_port, uport);
@@ -393,6 +396,14 @@ static void qcom_geni_serial_poll_put_char(struct uart_port *uport,
#endif
#ifdef CONFIG_SERIAL_QCOM_GENI_CONSOLE
+static void qcom_geni_serial_drain_fifo(struct uart_port *uport)
+{
+ struct qcom_geni_serial_port *port = to_dev_port(uport);
+
+ qcom_geni_serial_poll_bitfield(uport, SE_GENI_M_GP_LENGTH, GP_LENGTH,
+ port->tx_queued);
+}
+
static void qcom_geni_serial_wr_char(struct uart_port *uport, unsigned char ch)
{
struct qcom_geni_private_data *private_data = uport->private_data;
@@ -468,7 +479,6 @@ static void qcom_geni_serial_console_write(struct console *co, const char *s,
struct qcom_geni_serial_port *port;
bool locked = true;
unsigned long flags;
- u32 geni_status;
WARN_ON(co->index < 0 || co->index >= GENI_UART_CONS_PORTS);
@@ -482,34 +492,20 @@ static void qcom_geni_serial_console_write(struct console *co, const char *s,
else
uart_port_lock_irqsave(uport, &flags);
- geni_status = readl(uport->membase + SE_GENI_STATUS);
+ if (qcom_geni_serial_main_active(uport)) {
+ /* Wait for completion or drain FIFO */
+ if (!locked || port->tx_remaining == 0)
+ qcom_geni_serial_poll_tx_done(uport);
+ else
+ qcom_geni_serial_drain_fifo(uport);
- if (!locked) {
- /*
- * We can only get here if an oops is in progress then we were
- * unable to get the lock. This means we can't safely access
- * our state variables like tx_remaining. About the best we
- * can do is wait for the FIFO to be empty before we start our
- * transfer, so we'll do that.
- */
- qcom_geni_serial_poll_bit(uport, SE_GENI_M_IRQ_STATUS,
- M_TX_FIFO_NOT_EMPTY_EN, false);
- } else if ((geni_status & M_GENI_CMD_ACTIVE) && !port->tx_remaining) {
- /*
- * It seems we can't interrupt existing transfers if all data
- * has been sent, in which case we need to look for done first.
- */
- qcom_geni_serial_poll_tx_done(uport);
+ qcom_geni_serial_cancel_tx_cmd(uport);
}
__qcom_geni_serial_console_write(uport, s, count);
-
- if (locked) {
- if (port->tx_remaining)
- qcom_geni_serial_setup_tx(uport, port->tx_remaining);
+ if (locked)
uart_port_unlock_irqrestore(uport, flags);
- }
}
static void handle_rx_console(struct uart_port *uport, u32 bytes, bool drop)
@@ -690,6 +686,7 @@ static void qcom_geni_serial_cancel_tx_cmd(struct uart_port *uport)
writel(M_CMD_CANCEL_EN, uport->membase + SE_GENI_M_IRQ_CLEAR);
port->tx_remaining = 0;
+ port->tx_queued = 0;
}
static void qcom_geni_serial_handle_rx_fifo(struct uart_port *uport, bool drop)
@@ -916,6 +913,7 @@ static void qcom_geni_serial_handle_tx_fifo(struct uart_port *uport,
if (!port->tx_remaining) {
qcom_geni_serial_setup_tx(uport, pending);
port->tx_remaining = pending;
+ port->tx_queued = 0;
irq_en = readl(uport->membase + SE_GENI_M_IRQ_EN);
if (!(irq_en & M_TX_FIFO_WATERMARK_EN))
@@ -924,6 +922,7 @@ static void qcom_geni_serial_handle_tx_fifo(struct uart_port *uport,
}
qcom_geni_serial_send_chunk_fifo(uport, chunk);
+ port->tx_queued += chunk;
/*
* The tx fifo watermark is level triggered and latched. Though we had