The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
bd97a59da6a8 ("nvme-auth: use workqueue dedicated to authentication")
aa36d711e945 ("nvme-auth: convert dhchap_auth_list to an array")
8d1c1904e947 ("nvme-auth: clear sensitive info right after authentication completes")
e481fc0a3777 ("nvme-auth: guarantee dhchap buffers under memory pressure")
b7d604cae8f6 ("nvme-auth: don't keep long lived 4k dhchap buffer")
bfc4068e1e55 ("nvme-auth: remove redundant if statement")
01604350e145 ("nvme-auth: don't override ctrl keys before validation")
193a8c7e5f1a ("nvme-auth: don't ignore key generation failures when initializing ctrl keys")
f6b182fbd5c6 ("nvme-auth: remove redundant buffer deallocations")
0c999e69c40a ("nvme-auth: rename authentication work elements")
0a7ce375f83f ("nvme-auth: rename __nvme_auth_[reset|free] to nvme_auth[reset|free]_dhchap")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From bd97a59da6a866e3dee5d2a2d582ec71dbbc84cd Mon Sep 17 00:00:00 2001
From: Shin'ichiro Kawasaki <shinichiro.kawasaki(a)wdc.com>
Date: Tue, 31 Jan 2023 18:26:44 +0900
Subject: [PATCH] nvme-auth: use workqueue dedicated to authentication
NVMe In-Band authentication uses two kinds of works: chap->auth_work and
ctrl->dhchap_auth_work. The latter work flushes or cancels the former
work. However, the both works are queued to the same workqueue nvme-wq.
It results in the lockdep WARNING as follows:
WARNING: possible recursive locking detected
6.2.0-rc4+ #1 Not tainted
--------------------------------------------
kworker/u16:7/69 is trying to acquire lock:
ffff902d52e65548 ((wq_completion)nvme-wq){+.+.}-{0:0}, at: start_flush_work+0x2c5/0x380
but task is already holding lock:
ffff902d52e65548 ((wq_completion)nvme-wq){+.+.}-{0:0}, at: process_one_work+0x210/0x410
To avoid the WARNING, introduce a new workqueue nvme-auth-wq dedicated
to chap->auth_work.
Reported-by: Daniel Wagner <dwagner(a)suse.de>
Link: https://lore.kernel.org/linux-nvme/20230130110802.paafkiipmitwtnwr@carbon.l…
Fixes: f50fff73d620 ("nvme: implement In-Band authentication")
Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki(a)wdc.com>
Tested-by: Daniel Wagner <dwagner(a)suse.de>
Reviewed-by: Hannes Reinecke <hare(a)suse.de>
Signed-off-by: Christoph Hellwig <hch(a)lst.de>
diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c
index 4424f53a8a0a..b57630d1d3b8 100644
--- a/drivers/nvme/host/auth.c
+++ b/drivers/nvme/host/auth.c
@@ -45,6 +45,8 @@ struct nvme_dhchap_queue_context {
int sess_key_len;
};
+struct workqueue_struct *nvme_auth_wq;
+
#define nvme_auth_flags_from_qid(qid) \
(qid == 0) ? 0 : BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED
#define nvme_auth_queue_from_qid(ctrl, qid) \
@@ -866,7 +868,7 @@ int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid)
chap = &ctrl->dhchap_ctxs[qid];
cancel_work_sync(&chap->auth_work);
- queue_work(nvme_wq, &chap->auth_work);
+ queue_work(nvme_auth_wq, &chap->auth_work);
return 0;
}
EXPORT_SYMBOL_GPL(nvme_auth_negotiate);
@@ -1008,10 +1010,15 @@ EXPORT_SYMBOL_GPL(nvme_auth_free);
int __init nvme_init_auth(void)
{
+ nvme_auth_wq = alloc_workqueue("nvme-auth-wq",
+ WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
+ if (!nvme_auth_wq)
+ return -ENOMEM;
+
nvme_chap_buf_cache = kmem_cache_create("nvme-chap-buf-cache",
CHAP_BUF_SIZE, 0, SLAB_HWCACHE_ALIGN, NULL);
if (!nvme_chap_buf_cache)
- return -ENOMEM;
+ goto err_destroy_workqueue;
nvme_chap_buf_pool = mempool_create(16, mempool_alloc_slab,
mempool_free_slab, nvme_chap_buf_cache);
@@ -1021,6 +1028,8 @@ int __init nvme_init_auth(void)
return 0;
err_destroy_chap_buf_cache:
kmem_cache_destroy(nvme_chap_buf_cache);
+err_destroy_workqueue:
+ destroy_workqueue(nvme_auth_wq);
return -ENOMEM;
}
@@ -1028,4 +1037,5 @@ void __exit nvme_exit_auth(void)
{
mempool_destroy(nvme_chap_buf_pool);
kmem_cache_destroy(nvme_chap_buf_cache);
+ destroy_workqueue(nvme_auth_wq);
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
fc546faa5595 ("powerpc/kexec_file: Count hot-pluggable memory in FDT estimate")
7294194b47e9 ("powerpc/kexec_file: Fix division by zero in extra size estimation")
340a4a9f8773 ("powerpc: Take in account addition CPU node when building kexec FDT")
886db32398ab ("powerpc/kexec_file: Restore FDT size estimation for kdump kernel")
3c985d31ad66 ("powerpc: Use common of_kexec_alloc_and_setup_fdt()")
e6635bab530d ("powerpc: Use ELF fields defined in 'struct kimage'")
2377c92e37fe ("powerpc/kexec_file: fix FDT size estimation for kdump kernel")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From fc546faa559538fb312c77e055243ece18ab3288 Mon Sep 17 00:00:00 2001
From: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Date: Tue, 31 Jan 2023 08:36:15 +0530
Subject: [PATCH] powerpc/kexec_file: Count hot-pluggable memory in FDT
estimate
On Systems where online memory is lesser compared to max memory, the
kexec_file_load system call may fail to load the kdump kernel with the
below errors:
"Failed to update fdt with linux,drconf-usable-memory property"
"Error setting up usable-memory property for kdump kernel"
This happens because the size estimation for usable memory properties
for the kdump kernel's FDT is based on the online memory whereas the
usable memory properties include max memory. In short, the hot-pluggable
memory is not accounted for while estimating the size of the usable
memory properties.
The issue is addressed by calculating usable memory property size using
max hotplug address instead of the last online memory address.
Fixes: 2377c92e37fe ("powerpc/kexec_file: fix FDT size estimation for kdump kernel")
Signed-off-by: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20230131030615.729894-1-sourabhjain@linux.ibm.com
diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
index 19d084682bc2..52085751f5f4 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -990,7 +990,7 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image)
* number of usable memory entries and use for FDT size estimation.
*/
if (drmem_lmb_size()) {
- usm_entries = ((memblock_end_of_DRAM() / drmem_lmb_size()) +
+ usm_entries = ((memory_hotplug_max() / drmem_lmb_size()) +
(2 * (resource_size(&crashk_res) / drmem_lmb_size())));
extra_size = (unsigned int)(usm_entries * sizeof(u64));
} else {
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
fc546faa5595 ("powerpc/kexec_file: Count hot-pluggable memory in FDT estimate")
7294194b47e9 ("powerpc/kexec_file: Fix division by zero in extra size estimation")
340a4a9f8773 ("powerpc: Take in account addition CPU node when building kexec FDT")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From fc546faa559538fb312c77e055243ece18ab3288 Mon Sep 17 00:00:00 2001
From: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Date: Tue, 31 Jan 2023 08:36:15 +0530
Subject: [PATCH] powerpc/kexec_file: Count hot-pluggable memory in FDT
estimate
On Systems where online memory is lesser compared to max memory, the
kexec_file_load system call may fail to load the kdump kernel with the
below errors:
"Failed to update fdt with linux,drconf-usable-memory property"
"Error setting up usable-memory property for kdump kernel"
This happens because the size estimation for usable memory properties
for the kdump kernel's FDT is based on the online memory whereas the
usable memory properties include max memory. In short, the hot-pluggable
memory is not accounted for while estimating the size of the usable
memory properties.
The issue is addressed by calculating usable memory property size using
max hotplug address instead of the last online memory address.
Fixes: 2377c92e37fe ("powerpc/kexec_file: fix FDT size estimation for kdump kernel")
Signed-off-by: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20230131030615.729894-1-sourabhjain@linux.ibm.com
diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
index 19d084682bc2..52085751f5f4 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -990,7 +990,7 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image)
* number of usable memory entries and use for FDT size estimation.
*/
if (drmem_lmb_size()) {
- usm_entries = ((memblock_end_of_DRAM() / drmem_lmb_size()) +
+ usm_entries = ((memory_hotplug_max() / drmem_lmb_size()) +
(2 * (resource_size(&crashk_res) / drmem_lmb_size())));
extra_size = (unsigned int)(usm_entries * sizeof(u64));
} else {
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
fc546faa5595 ("powerpc/kexec_file: Count hot-pluggable memory in FDT estimate")
7294194b47e9 ("powerpc/kexec_file: Fix division by zero in extra size estimation")
340a4a9f8773 ("powerpc: Take in account addition CPU node when building kexec FDT")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From fc546faa559538fb312c77e055243ece18ab3288 Mon Sep 17 00:00:00 2001
From: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Date: Tue, 31 Jan 2023 08:36:15 +0530
Subject: [PATCH] powerpc/kexec_file: Count hot-pluggable memory in FDT
estimate
On Systems where online memory is lesser compared to max memory, the
kexec_file_load system call may fail to load the kdump kernel with the
below errors:
"Failed to update fdt with linux,drconf-usable-memory property"
"Error setting up usable-memory property for kdump kernel"
This happens because the size estimation for usable memory properties
for the kdump kernel's FDT is based on the online memory whereas the
usable memory properties include max memory. In short, the hot-pluggable
memory is not accounted for while estimating the size of the usable
memory properties.
The issue is addressed by calculating usable memory property size using
max hotplug address instead of the last online memory address.
Fixes: 2377c92e37fe ("powerpc/kexec_file: fix FDT size estimation for kdump kernel")
Signed-off-by: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20230131030615.729894-1-sourabhjain@linux.ibm.com
diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
index 19d084682bc2..52085751f5f4 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -990,7 +990,7 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image)
* number of usable memory entries and use for FDT size estimation.
*/
if (drmem_lmb_size()) {
- usm_entries = ((memblock_end_of_DRAM() / drmem_lmb_size()) +
+ usm_entries = ((memory_hotplug_max() / drmem_lmb_size()) +
(2 * (resource_size(&crashk_res) / drmem_lmb_size())));
extra_size = (unsigned int)(usm_entries * sizeof(u64));
} else {
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
98d0219e043e ("powerpc/64s/radix: Fix crash with unaligned relocated kernel")
b150a4d12b91 ("powerpc/vmlinux.lds: Add an explicit symbol for the SRWX boundary")
331771e836e6 ("powerpc/vmlinux.lds: Ensure STRICT_ALIGN_SIZE is at least page aligned")
2a0fb3c155c9 ("powerpc/32: Set an IBAT covering up to _einittext during init")
c4a22611bf6c ("powerpc/603: Use SPRN_SDR1 to store the pgdir phys address")
035b19a15a98 ("powerpc/32s: Always map kernel text and rodata with BATs")
11522448e641 ("powerpc/603: Always fault when _PAGE_ACCESSED is not set")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 98d0219e043e09013e883eacde3b93e0b2bf944d Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe(a)ellerman.id.au>
Date: Tue, 10 Jan 2023 23:47:52 +1100
Subject: [PATCH] powerpc/64s/radix: Fix crash with unaligned relocated kernel
If a relocatable kernel is loaded at an address that is not 2MB aligned
and told not to relocate to zero, the kernel can crash due to
mark_rodata_ro() incorrectly changing some read-write data to read-only.
Scenarios where the misalignment can occur are when the kernel is
loaded by kdump or using the RELOCATABLE_TEST config option.
Example crash with the kernel loaded at 5MB:
Run /sbin/init as init process
BUG: Unable to handle kernel data access on write at 0xc000000000452000
Faulting instruction address: 0xc0000000005b6730
Oops: Kernel access of bad area, sig: 11 [#1]
LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
CPU: 1 PID: 1 Comm: init Not tainted 6.2.0-rc1-00011-g349188be4841 #166
Hardware name: IBM pSeries (emulated by qemu) POWER9 (raw) 0x4e1202 0xf000005 of:SLOF,git-5b4c5a hv:linux,kvm pSeries
NIP: c0000000005b6730 LR: c000000000ae9ab8 CTR: 0000000000000380
REGS: c000000004503250 TRAP: 0300 Not tainted (6.2.0-rc1-00011-g349188be4841)
MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE> CR: 44288480 XER: 00000000
CFAR: c0000000005b66ec DAR: c000000000452000 DSISR: 0a000000 IRQMASK: 0
...
NIP memset+0x68/0x104
LR zero_user_segments.constprop.0+0xa8/0xf0
Call Trace:
ext4_mpage_readpages+0x7f8/0x830
ext4_readahead+0x48/0x60
read_pages+0xb8/0x380
page_cache_ra_unbounded+0x19c/0x250
filemap_fault+0x58c/0xae0
__do_fault+0x60/0x100
__handle_mm_fault+0x1230/0x1a40
handle_mm_fault+0x120/0x300
___do_page_fault+0x20c/0xa80
do_page_fault+0x30/0xc0
data_access_common_virt+0x210/0x220
This happens because mark_rodata_ro() tries to change permissions on the
range _stext..__end_rodata, but _stext sits in the middle of the 2MB
page from 4MB to 6MB:
radix-mmu: Mapped 0x0000000000000000-0x0000000000200000 with 2.00 MiB pages (exec)
radix-mmu: Mapped 0x0000000000200000-0x0000000000400000 with 2.00 MiB pages
radix-mmu: Mapped 0x0000000000400000-0x0000000002400000 with 2.00 MiB pages (exec)
The logic that changes the permissions assumes the linear mapping was
split correctly at boot, so it marks the entire 2MB page read-only. That
leads to the write fault above.
To fix it, the boot time mapping logic needs to consider that if the
kernel is running at a non-zero address then _stext is a boundary where
it must split the mapping.
That leads to the mapping being split correctly, allowing the rodata
permission change to take happen correctly, with no spillover:
radix-mmu: Mapped 0x0000000000000000-0x0000000000200000 with 2.00 MiB pages (exec)
radix-mmu: Mapped 0x0000000000200000-0x0000000000400000 with 2.00 MiB pages
radix-mmu: Mapped 0x0000000000400000-0x0000000000500000 with 64.0 KiB pages
radix-mmu: Mapped 0x0000000000500000-0x0000000000600000 with 64.0 KiB pages (exec)
radix-mmu: Mapped 0x0000000000600000-0x0000000002400000 with 2.00 MiB pages (exec)
If the kernel is loaded at a 2MB aligned address, the mapping continues
to use 2MB pages as before:
radix-mmu: Mapped 0x0000000000000000-0x0000000000200000 with 2.00 MiB pages (exec)
radix-mmu: Mapped 0x0000000000200000-0x0000000000400000 with 2.00 MiB pages
radix-mmu: Mapped 0x0000000000400000-0x0000000002c00000 with 2.00 MiB pages (exec)
radix-mmu: Mapped 0x0000000002c00000-0x0000000100000000 with 2.00 MiB pages
Fixes: c55d7b5e6426 ("powerpc: Remove STRICT_KERNEL_RWX incompatibility with RELOCATABLE")
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20230110124753.1325426-1-mpe@ellerman.id.au
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index cac727b01799..5a2384ed1727 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -262,6 +262,17 @@ print_mapping(unsigned long start, unsigned long end, unsigned long size, bool e
static unsigned long next_boundary(unsigned long addr, unsigned long end)
{
#ifdef CONFIG_STRICT_KERNEL_RWX
+ unsigned long stext_phys;
+
+ stext_phys = __pa_symbol(_stext);
+
+ // Relocatable kernel running at non-zero real address
+ if (stext_phys != 0) {
+ // Start of relocated kernel text is a rodata boundary
+ if (addr < stext_phys)
+ return stext_phys;
+ }
+
if (addr < __pa_symbol(__srwx_boundary))
return __pa_symbol(__srwx_boundary);
#endif
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
98d0219e043e ("powerpc/64s/radix: Fix crash with unaligned relocated kernel")
b150a4d12b91 ("powerpc/vmlinux.lds: Add an explicit symbol for the SRWX boundary")
331771e836e6 ("powerpc/vmlinux.lds: Ensure STRICT_ALIGN_SIZE is at least page aligned")
2a0fb3c155c9 ("powerpc/32: Set an IBAT covering up to _einittext during init")
c4a22611bf6c ("powerpc/603: Use SPRN_SDR1 to store the pgdir phys address")
035b19a15a98 ("powerpc/32s: Always map kernel text and rodata with BATs")
11522448e641 ("powerpc/603: Always fault when _PAGE_ACCESSED is not set")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 98d0219e043e09013e883eacde3b93e0b2bf944d Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe(a)ellerman.id.au>
Date: Tue, 10 Jan 2023 23:47:52 +1100
Subject: [PATCH] powerpc/64s/radix: Fix crash with unaligned relocated kernel
If a relocatable kernel is loaded at an address that is not 2MB aligned
and told not to relocate to zero, the kernel can crash due to
mark_rodata_ro() incorrectly changing some read-write data to read-only.
Scenarios where the misalignment can occur are when the kernel is
loaded by kdump or using the RELOCATABLE_TEST config option.
Example crash with the kernel loaded at 5MB:
Run /sbin/init as init process
BUG: Unable to handle kernel data access on write at 0xc000000000452000
Faulting instruction address: 0xc0000000005b6730
Oops: Kernel access of bad area, sig: 11 [#1]
LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
CPU: 1 PID: 1 Comm: init Not tainted 6.2.0-rc1-00011-g349188be4841 #166
Hardware name: IBM pSeries (emulated by qemu) POWER9 (raw) 0x4e1202 0xf000005 of:SLOF,git-5b4c5a hv:linux,kvm pSeries
NIP: c0000000005b6730 LR: c000000000ae9ab8 CTR: 0000000000000380
REGS: c000000004503250 TRAP: 0300 Not tainted (6.2.0-rc1-00011-g349188be4841)
MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE> CR: 44288480 XER: 00000000
CFAR: c0000000005b66ec DAR: c000000000452000 DSISR: 0a000000 IRQMASK: 0
...
NIP memset+0x68/0x104
LR zero_user_segments.constprop.0+0xa8/0xf0
Call Trace:
ext4_mpage_readpages+0x7f8/0x830
ext4_readahead+0x48/0x60
read_pages+0xb8/0x380
page_cache_ra_unbounded+0x19c/0x250
filemap_fault+0x58c/0xae0
__do_fault+0x60/0x100
__handle_mm_fault+0x1230/0x1a40
handle_mm_fault+0x120/0x300
___do_page_fault+0x20c/0xa80
do_page_fault+0x30/0xc0
data_access_common_virt+0x210/0x220
This happens because mark_rodata_ro() tries to change permissions on the
range _stext..__end_rodata, but _stext sits in the middle of the 2MB
page from 4MB to 6MB:
radix-mmu: Mapped 0x0000000000000000-0x0000000000200000 with 2.00 MiB pages (exec)
radix-mmu: Mapped 0x0000000000200000-0x0000000000400000 with 2.00 MiB pages
radix-mmu: Mapped 0x0000000000400000-0x0000000002400000 with 2.00 MiB pages (exec)
The logic that changes the permissions assumes the linear mapping was
split correctly at boot, so it marks the entire 2MB page read-only. That
leads to the write fault above.
To fix it, the boot time mapping logic needs to consider that if the
kernel is running at a non-zero address then _stext is a boundary where
it must split the mapping.
That leads to the mapping being split correctly, allowing the rodata
permission change to take happen correctly, with no spillover:
radix-mmu: Mapped 0x0000000000000000-0x0000000000200000 with 2.00 MiB pages (exec)
radix-mmu: Mapped 0x0000000000200000-0x0000000000400000 with 2.00 MiB pages
radix-mmu: Mapped 0x0000000000400000-0x0000000000500000 with 64.0 KiB pages
radix-mmu: Mapped 0x0000000000500000-0x0000000000600000 with 64.0 KiB pages (exec)
radix-mmu: Mapped 0x0000000000600000-0x0000000002400000 with 2.00 MiB pages (exec)
If the kernel is loaded at a 2MB aligned address, the mapping continues
to use 2MB pages as before:
radix-mmu: Mapped 0x0000000000000000-0x0000000000200000 with 2.00 MiB pages (exec)
radix-mmu: Mapped 0x0000000000200000-0x0000000000400000 with 2.00 MiB pages
radix-mmu: Mapped 0x0000000000400000-0x0000000002c00000 with 2.00 MiB pages (exec)
radix-mmu: Mapped 0x0000000002c00000-0x0000000100000000 with 2.00 MiB pages
Fixes: c55d7b5e6426 ("powerpc: Remove STRICT_KERNEL_RWX incompatibility with RELOCATABLE")
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20230110124753.1325426-1-mpe@ellerman.id.au
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index cac727b01799..5a2384ed1727 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -262,6 +262,17 @@ print_mapping(unsigned long start, unsigned long end, unsigned long size, bool e
static unsigned long next_boundary(unsigned long addr, unsigned long end)
{
#ifdef CONFIG_STRICT_KERNEL_RWX
+ unsigned long stext_phys;
+
+ stext_phys = __pa_symbol(_stext);
+
+ // Relocatable kernel running at non-zero real address
+ if (stext_phys != 0) {
+ // Start of relocated kernel text is a rodata boundary
+ if (addr < stext_phys)
+ return stext_phys;
+ }
+
if (addr < __pa_symbol(__srwx_boundary))
return __pa_symbol(__srwx_boundary);
#endif
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
98d0219e043e ("powerpc/64s/radix: Fix crash with unaligned relocated kernel")
b150a4d12b91 ("powerpc/vmlinux.lds: Add an explicit symbol for the SRWX boundary")
331771e836e6 ("powerpc/vmlinux.lds: Ensure STRICT_ALIGN_SIZE is at least page aligned")
2a0fb3c155c9 ("powerpc/32: Set an IBAT covering up to _einittext during init")
c4a22611bf6c ("powerpc/603: Use SPRN_SDR1 to store the pgdir phys address")
035b19a15a98 ("powerpc/32s: Always map kernel text and rodata with BATs")
11522448e641 ("powerpc/603: Always fault when _PAGE_ACCESSED is not set")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 98d0219e043e09013e883eacde3b93e0b2bf944d Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe(a)ellerman.id.au>
Date: Tue, 10 Jan 2023 23:47:52 +1100
Subject: [PATCH] powerpc/64s/radix: Fix crash with unaligned relocated kernel
If a relocatable kernel is loaded at an address that is not 2MB aligned
and told not to relocate to zero, the kernel can crash due to
mark_rodata_ro() incorrectly changing some read-write data to read-only.
Scenarios where the misalignment can occur are when the kernel is
loaded by kdump or using the RELOCATABLE_TEST config option.
Example crash with the kernel loaded at 5MB:
Run /sbin/init as init process
BUG: Unable to handle kernel data access on write at 0xc000000000452000
Faulting instruction address: 0xc0000000005b6730
Oops: Kernel access of bad area, sig: 11 [#1]
LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
CPU: 1 PID: 1 Comm: init Not tainted 6.2.0-rc1-00011-g349188be4841 #166
Hardware name: IBM pSeries (emulated by qemu) POWER9 (raw) 0x4e1202 0xf000005 of:SLOF,git-5b4c5a hv:linux,kvm pSeries
NIP: c0000000005b6730 LR: c000000000ae9ab8 CTR: 0000000000000380
REGS: c000000004503250 TRAP: 0300 Not tainted (6.2.0-rc1-00011-g349188be4841)
MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE> CR: 44288480 XER: 00000000
CFAR: c0000000005b66ec DAR: c000000000452000 DSISR: 0a000000 IRQMASK: 0
...
NIP memset+0x68/0x104
LR zero_user_segments.constprop.0+0xa8/0xf0
Call Trace:
ext4_mpage_readpages+0x7f8/0x830
ext4_readahead+0x48/0x60
read_pages+0xb8/0x380
page_cache_ra_unbounded+0x19c/0x250
filemap_fault+0x58c/0xae0
__do_fault+0x60/0x100
__handle_mm_fault+0x1230/0x1a40
handle_mm_fault+0x120/0x300
___do_page_fault+0x20c/0xa80
do_page_fault+0x30/0xc0
data_access_common_virt+0x210/0x220
This happens because mark_rodata_ro() tries to change permissions on the
range _stext..__end_rodata, but _stext sits in the middle of the 2MB
page from 4MB to 6MB:
radix-mmu: Mapped 0x0000000000000000-0x0000000000200000 with 2.00 MiB pages (exec)
radix-mmu: Mapped 0x0000000000200000-0x0000000000400000 with 2.00 MiB pages
radix-mmu: Mapped 0x0000000000400000-0x0000000002400000 with 2.00 MiB pages (exec)
The logic that changes the permissions assumes the linear mapping was
split correctly at boot, so it marks the entire 2MB page read-only. That
leads to the write fault above.
To fix it, the boot time mapping logic needs to consider that if the
kernel is running at a non-zero address then _stext is a boundary where
it must split the mapping.
That leads to the mapping being split correctly, allowing the rodata
permission change to take happen correctly, with no spillover:
radix-mmu: Mapped 0x0000000000000000-0x0000000000200000 with 2.00 MiB pages (exec)
radix-mmu: Mapped 0x0000000000200000-0x0000000000400000 with 2.00 MiB pages
radix-mmu: Mapped 0x0000000000400000-0x0000000000500000 with 64.0 KiB pages
radix-mmu: Mapped 0x0000000000500000-0x0000000000600000 with 64.0 KiB pages (exec)
radix-mmu: Mapped 0x0000000000600000-0x0000000002400000 with 2.00 MiB pages (exec)
If the kernel is loaded at a 2MB aligned address, the mapping continues
to use 2MB pages as before:
radix-mmu: Mapped 0x0000000000000000-0x0000000000200000 with 2.00 MiB pages (exec)
radix-mmu: Mapped 0x0000000000200000-0x0000000000400000 with 2.00 MiB pages
radix-mmu: Mapped 0x0000000000400000-0x0000000002c00000 with 2.00 MiB pages (exec)
radix-mmu: Mapped 0x0000000002c00000-0x0000000100000000 with 2.00 MiB pages
Fixes: c55d7b5e6426 ("powerpc: Remove STRICT_KERNEL_RWX incompatibility with RELOCATABLE")
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20230110124753.1325426-1-mpe@ellerman.id.au
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index cac727b01799..5a2384ed1727 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -262,6 +262,17 @@ print_mapping(unsigned long start, unsigned long end, unsigned long size, bool e
static unsigned long next_boundary(unsigned long addr, unsigned long end)
{
#ifdef CONFIG_STRICT_KERNEL_RWX
+ unsigned long stext_phys;
+
+ stext_phys = __pa_symbol(_stext);
+
+ // Relocatable kernel running at non-zero real address
+ if (stext_phys != 0) {
+ // Start of relocated kernel text is a rodata boundary
+ if (addr < stext_phys)
+ return stext_phys;
+ }
+
if (addr < __pa_symbol(__srwx_boundary))
return __pa_symbol(__srwx_boundary);
#endif
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
98d0219e043e ("powerpc/64s/radix: Fix crash with unaligned relocated kernel")
b150a4d12b91 ("powerpc/vmlinux.lds: Add an explicit symbol for the SRWX boundary")
331771e836e6 ("powerpc/vmlinux.lds: Ensure STRICT_ALIGN_SIZE is at least page aligned")
2a0fb3c155c9 ("powerpc/32: Set an IBAT covering up to _einittext during init")
c4a22611bf6c ("powerpc/603: Use SPRN_SDR1 to store the pgdir phys address")
035b19a15a98 ("powerpc/32s: Always map kernel text and rodata with BATs")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 98d0219e043e09013e883eacde3b93e0b2bf944d Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe(a)ellerman.id.au>
Date: Tue, 10 Jan 2023 23:47:52 +1100
Subject: [PATCH] powerpc/64s/radix: Fix crash with unaligned relocated kernel
If a relocatable kernel is loaded at an address that is not 2MB aligned
and told not to relocate to zero, the kernel can crash due to
mark_rodata_ro() incorrectly changing some read-write data to read-only.
Scenarios where the misalignment can occur are when the kernel is
loaded by kdump or using the RELOCATABLE_TEST config option.
Example crash with the kernel loaded at 5MB:
Run /sbin/init as init process
BUG: Unable to handle kernel data access on write at 0xc000000000452000
Faulting instruction address: 0xc0000000005b6730
Oops: Kernel access of bad area, sig: 11 [#1]
LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
CPU: 1 PID: 1 Comm: init Not tainted 6.2.0-rc1-00011-g349188be4841 #166
Hardware name: IBM pSeries (emulated by qemu) POWER9 (raw) 0x4e1202 0xf000005 of:SLOF,git-5b4c5a hv:linux,kvm pSeries
NIP: c0000000005b6730 LR: c000000000ae9ab8 CTR: 0000000000000380
REGS: c000000004503250 TRAP: 0300 Not tainted (6.2.0-rc1-00011-g349188be4841)
MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE> CR: 44288480 XER: 00000000
CFAR: c0000000005b66ec DAR: c000000000452000 DSISR: 0a000000 IRQMASK: 0
...
NIP memset+0x68/0x104
LR zero_user_segments.constprop.0+0xa8/0xf0
Call Trace:
ext4_mpage_readpages+0x7f8/0x830
ext4_readahead+0x48/0x60
read_pages+0xb8/0x380
page_cache_ra_unbounded+0x19c/0x250
filemap_fault+0x58c/0xae0
__do_fault+0x60/0x100
__handle_mm_fault+0x1230/0x1a40
handle_mm_fault+0x120/0x300
___do_page_fault+0x20c/0xa80
do_page_fault+0x30/0xc0
data_access_common_virt+0x210/0x220
This happens because mark_rodata_ro() tries to change permissions on the
range _stext..__end_rodata, but _stext sits in the middle of the 2MB
page from 4MB to 6MB:
radix-mmu: Mapped 0x0000000000000000-0x0000000000200000 with 2.00 MiB pages (exec)
radix-mmu: Mapped 0x0000000000200000-0x0000000000400000 with 2.00 MiB pages
radix-mmu: Mapped 0x0000000000400000-0x0000000002400000 with 2.00 MiB pages (exec)
The logic that changes the permissions assumes the linear mapping was
split correctly at boot, so it marks the entire 2MB page read-only. That
leads to the write fault above.
To fix it, the boot time mapping logic needs to consider that if the
kernel is running at a non-zero address then _stext is a boundary where
it must split the mapping.
That leads to the mapping being split correctly, allowing the rodata
permission change to take happen correctly, with no spillover:
radix-mmu: Mapped 0x0000000000000000-0x0000000000200000 with 2.00 MiB pages (exec)
radix-mmu: Mapped 0x0000000000200000-0x0000000000400000 with 2.00 MiB pages
radix-mmu: Mapped 0x0000000000400000-0x0000000000500000 with 64.0 KiB pages
radix-mmu: Mapped 0x0000000000500000-0x0000000000600000 with 64.0 KiB pages (exec)
radix-mmu: Mapped 0x0000000000600000-0x0000000002400000 with 2.00 MiB pages (exec)
If the kernel is loaded at a 2MB aligned address, the mapping continues
to use 2MB pages as before:
radix-mmu: Mapped 0x0000000000000000-0x0000000000200000 with 2.00 MiB pages (exec)
radix-mmu: Mapped 0x0000000000200000-0x0000000000400000 with 2.00 MiB pages
radix-mmu: Mapped 0x0000000000400000-0x0000000002c00000 with 2.00 MiB pages (exec)
radix-mmu: Mapped 0x0000000002c00000-0x0000000100000000 with 2.00 MiB pages
Fixes: c55d7b5e6426 ("powerpc: Remove STRICT_KERNEL_RWX incompatibility with RELOCATABLE")
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20230110124753.1325426-1-mpe@ellerman.id.au
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index cac727b01799..5a2384ed1727 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -262,6 +262,17 @@ print_mapping(unsigned long start, unsigned long end, unsigned long size, bool e
static unsigned long next_boundary(unsigned long addr, unsigned long end)
{
#ifdef CONFIG_STRICT_KERNEL_RWX
+ unsigned long stext_phys;
+
+ stext_phys = __pa_symbol(_stext);
+
+ // Relocatable kernel running at non-zero real address
+ if (stext_phys != 0) {
+ // Start of relocated kernel text is a rodata boundary
+ if (addr < stext_phys)
+ return stext_phys;
+ }
+
if (addr < __pa_symbol(__srwx_boundary))
return __pa_symbol(__srwx_boundary);
#endif
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
98d0219e043e ("powerpc/64s/radix: Fix crash with unaligned relocated kernel")
b150a4d12b91 ("powerpc/vmlinux.lds: Add an explicit symbol for the SRWX boundary")
331771e836e6 ("powerpc/vmlinux.lds: Ensure STRICT_ALIGN_SIZE is at least page aligned")
2a0fb3c155c9 ("powerpc/32: Set an IBAT covering up to _einittext during init")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 98d0219e043e09013e883eacde3b93e0b2bf944d Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe(a)ellerman.id.au>
Date: Tue, 10 Jan 2023 23:47:52 +1100
Subject: [PATCH] powerpc/64s/radix: Fix crash with unaligned relocated kernel
If a relocatable kernel is loaded at an address that is not 2MB aligned
and told not to relocate to zero, the kernel can crash due to
mark_rodata_ro() incorrectly changing some read-write data to read-only.
Scenarios where the misalignment can occur are when the kernel is
loaded by kdump or using the RELOCATABLE_TEST config option.
Example crash with the kernel loaded at 5MB:
Run /sbin/init as init process
BUG: Unable to handle kernel data access on write at 0xc000000000452000
Faulting instruction address: 0xc0000000005b6730
Oops: Kernel access of bad area, sig: 11 [#1]
LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
CPU: 1 PID: 1 Comm: init Not tainted 6.2.0-rc1-00011-g349188be4841 #166
Hardware name: IBM pSeries (emulated by qemu) POWER9 (raw) 0x4e1202 0xf000005 of:SLOF,git-5b4c5a hv:linux,kvm pSeries
NIP: c0000000005b6730 LR: c000000000ae9ab8 CTR: 0000000000000380
REGS: c000000004503250 TRAP: 0300 Not tainted (6.2.0-rc1-00011-g349188be4841)
MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE> CR: 44288480 XER: 00000000
CFAR: c0000000005b66ec DAR: c000000000452000 DSISR: 0a000000 IRQMASK: 0
...
NIP memset+0x68/0x104
LR zero_user_segments.constprop.0+0xa8/0xf0
Call Trace:
ext4_mpage_readpages+0x7f8/0x830
ext4_readahead+0x48/0x60
read_pages+0xb8/0x380
page_cache_ra_unbounded+0x19c/0x250
filemap_fault+0x58c/0xae0
__do_fault+0x60/0x100
__handle_mm_fault+0x1230/0x1a40
handle_mm_fault+0x120/0x300
___do_page_fault+0x20c/0xa80
do_page_fault+0x30/0xc0
data_access_common_virt+0x210/0x220
This happens because mark_rodata_ro() tries to change permissions on the
range _stext..__end_rodata, but _stext sits in the middle of the 2MB
page from 4MB to 6MB:
radix-mmu: Mapped 0x0000000000000000-0x0000000000200000 with 2.00 MiB pages (exec)
radix-mmu: Mapped 0x0000000000200000-0x0000000000400000 with 2.00 MiB pages
radix-mmu: Mapped 0x0000000000400000-0x0000000002400000 with 2.00 MiB pages (exec)
The logic that changes the permissions assumes the linear mapping was
split correctly at boot, so it marks the entire 2MB page read-only. That
leads to the write fault above.
To fix it, the boot time mapping logic needs to consider that if the
kernel is running at a non-zero address then _stext is a boundary where
it must split the mapping.
That leads to the mapping being split correctly, allowing the rodata
permission change to take happen correctly, with no spillover:
radix-mmu: Mapped 0x0000000000000000-0x0000000000200000 with 2.00 MiB pages (exec)
radix-mmu: Mapped 0x0000000000200000-0x0000000000400000 with 2.00 MiB pages
radix-mmu: Mapped 0x0000000000400000-0x0000000000500000 with 64.0 KiB pages
radix-mmu: Mapped 0x0000000000500000-0x0000000000600000 with 64.0 KiB pages (exec)
radix-mmu: Mapped 0x0000000000600000-0x0000000002400000 with 2.00 MiB pages (exec)
If the kernel is loaded at a 2MB aligned address, the mapping continues
to use 2MB pages as before:
radix-mmu: Mapped 0x0000000000000000-0x0000000000200000 with 2.00 MiB pages (exec)
radix-mmu: Mapped 0x0000000000200000-0x0000000000400000 with 2.00 MiB pages
radix-mmu: Mapped 0x0000000000400000-0x0000000002c00000 with 2.00 MiB pages (exec)
radix-mmu: Mapped 0x0000000002c00000-0x0000000100000000 with 2.00 MiB pages
Fixes: c55d7b5e6426 ("powerpc: Remove STRICT_KERNEL_RWX incompatibility with RELOCATABLE")
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20230110124753.1325426-1-mpe@ellerman.id.au
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index cac727b01799..5a2384ed1727 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -262,6 +262,17 @@ print_mapping(unsigned long start, unsigned long end, unsigned long size, bool e
static unsigned long next_boundary(unsigned long addr, unsigned long end)
{
#ifdef CONFIG_STRICT_KERNEL_RWX
+ unsigned long stext_phys;
+
+ stext_phys = __pa_symbol(_stext);
+
+ // Relocatable kernel running at non-zero real address
+ if (stext_phys != 0) {
+ // Start of relocated kernel text is a rodata boundary
+ if (addr < stext_phys)
+ return stext_phys;
+ }
+
if (addr < __pa_symbol(__srwx_boundary))
return __pa_symbol(__srwx_boundary);
#endif
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
bffb7d9d1a3d ("iio:adc:twl6030: Enable measurement of VAC")
f804bd0dc286 ("iio:adc:twl6030: Enable measurements of VUSB, VBAT and others")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From bffb7d9d1a3dbd09e083b88aefd093b3b10abbfb Mon Sep 17 00:00:00 2001
From: Andreas Kemnade <andreas(a)kemnade.info>
Date: Sat, 17 Dec 2022 23:13:05 +0100
Subject: [PATCH] iio:adc:twl6030: Enable measurement of VAC
VAC needs to be wired up to produce proper measurements,
without this change only near zero values are reported.
Reported-by: kernel test robot <lkp(a)intel.com>
Reported-by: Julia Lawall <julia.lawall(a)lip6.fr>
Fixes: 1696f36482e7 ("iio: twl6030-gpadc: TWL6030, TWL6032 GPADC driver")
Signed-off-by: Andreas Kemnade <andreas(a)kemnade.info>
Link: https://lore.kernel.org/r/20221217221305.671117-1-andreas@kemnade.info
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
diff --git a/drivers/iio/adc/twl6030-gpadc.c b/drivers/iio/adc/twl6030-gpadc.c
index 40438e5b4970..32873fb5f367 100644
--- a/drivers/iio/adc/twl6030-gpadc.c
+++ b/drivers/iio/adc/twl6030-gpadc.c
@@ -952,7 +952,7 @@ static int twl6030_gpadc_probe(struct platform_device *pdev)
}
ret = twl_i2c_write_u8(TWL6030_MODULE_ID0,
- VBAT_MEAS | BB_MEAS | BB_MEAS,
+ VBAT_MEAS | BB_MEAS | VAC_MEAS,
TWL6030_MISC1);
if (ret < 0) {
dev_err(dev, "failed to wire up inputs\n");
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
bffb7d9d1a3d ("iio:adc:twl6030: Enable measurement of VAC")
f804bd0dc286 ("iio:adc:twl6030: Enable measurements of VUSB, VBAT and others")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From bffb7d9d1a3dbd09e083b88aefd093b3b10abbfb Mon Sep 17 00:00:00 2001
From: Andreas Kemnade <andreas(a)kemnade.info>
Date: Sat, 17 Dec 2022 23:13:05 +0100
Subject: [PATCH] iio:adc:twl6030: Enable measurement of VAC
VAC needs to be wired up to produce proper measurements,
without this change only near zero values are reported.
Reported-by: kernel test robot <lkp(a)intel.com>
Reported-by: Julia Lawall <julia.lawall(a)lip6.fr>
Fixes: 1696f36482e7 ("iio: twl6030-gpadc: TWL6030, TWL6032 GPADC driver")
Signed-off-by: Andreas Kemnade <andreas(a)kemnade.info>
Link: https://lore.kernel.org/r/20221217221305.671117-1-andreas@kemnade.info
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
diff --git a/drivers/iio/adc/twl6030-gpadc.c b/drivers/iio/adc/twl6030-gpadc.c
index 40438e5b4970..32873fb5f367 100644
--- a/drivers/iio/adc/twl6030-gpadc.c
+++ b/drivers/iio/adc/twl6030-gpadc.c
@@ -952,7 +952,7 @@ static int twl6030_gpadc_probe(struct platform_device *pdev)
}
ret = twl_i2c_write_u8(TWL6030_MODULE_ID0,
- VBAT_MEAS | BB_MEAS | BB_MEAS,
+ VBAT_MEAS | BB_MEAS | VAC_MEAS,
TWL6030_MISC1);
if (ret < 0) {
dev_err(dev, "failed to wire up inputs\n");
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
bffb7d9d1a3d ("iio:adc:twl6030: Enable measurement of VAC")
f804bd0dc286 ("iio:adc:twl6030: Enable measurements of VUSB, VBAT and others")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From bffb7d9d1a3dbd09e083b88aefd093b3b10abbfb Mon Sep 17 00:00:00 2001
From: Andreas Kemnade <andreas(a)kemnade.info>
Date: Sat, 17 Dec 2022 23:13:05 +0100
Subject: [PATCH] iio:adc:twl6030: Enable measurement of VAC
VAC needs to be wired up to produce proper measurements,
without this change only near zero values are reported.
Reported-by: kernel test robot <lkp(a)intel.com>
Reported-by: Julia Lawall <julia.lawall(a)lip6.fr>
Fixes: 1696f36482e7 ("iio: twl6030-gpadc: TWL6030, TWL6032 GPADC driver")
Signed-off-by: Andreas Kemnade <andreas(a)kemnade.info>
Link: https://lore.kernel.org/r/20221217221305.671117-1-andreas@kemnade.info
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
diff --git a/drivers/iio/adc/twl6030-gpadc.c b/drivers/iio/adc/twl6030-gpadc.c
index 40438e5b4970..32873fb5f367 100644
--- a/drivers/iio/adc/twl6030-gpadc.c
+++ b/drivers/iio/adc/twl6030-gpadc.c
@@ -952,7 +952,7 @@ static int twl6030_gpadc_probe(struct platform_device *pdev)
}
ret = twl_i2c_write_u8(TWL6030_MODULE_ID0,
- VBAT_MEAS | BB_MEAS | BB_MEAS,
+ VBAT_MEAS | BB_MEAS | VAC_MEAS,
TWL6030_MISC1);
if (ret < 0) {
dev_err(dev, "failed to wire up inputs\n");
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
bffb7d9d1a3d ("iio:adc:twl6030: Enable measurement of VAC")
f804bd0dc286 ("iio:adc:twl6030: Enable measurements of VUSB, VBAT and others")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From bffb7d9d1a3dbd09e083b88aefd093b3b10abbfb Mon Sep 17 00:00:00 2001
From: Andreas Kemnade <andreas(a)kemnade.info>
Date: Sat, 17 Dec 2022 23:13:05 +0100
Subject: [PATCH] iio:adc:twl6030: Enable measurement of VAC
VAC needs to be wired up to produce proper measurements,
without this change only near zero values are reported.
Reported-by: kernel test robot <lkp(a)intel.com>
Reported-by: Julia Lawall <julia.lawall(a)lip6.fr>
Fixes: 1696f36482e7 ("iio: twl6030-gpadc: TWL6030, TWL6032 GPADC driver")
Signed-off-by: Andreas Kemnade <andreas(a)kemnade.info>
Link: https://lore.kernel.org/r/20221217221305.671117-1-andreas@kemnade.info
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
diff --git a/drivers/iio/adc/twl6030-gpadc.c b/drivers/iio/adc/twl6030-gpadc.c
index 40438e5b4970..32873fb5f367 100644
--- a/drivers/iio/adc/twl6030-gpadc.c
+++ b/drivers/iio/adc/twl6030-gpadc.c
@@ -952,7 +952,7 @@ static int twl6030_gpadc_probe(struct platform_device *pdev)
}
ret = twl_i2c_write_u8(TWL6030_MODULE_ID0,
- VBAT_MEAS | BB_MEAS | BB_MEAS,
+ VBAT_MEAS | BB_MEAS | VAC_MEAS,
TWL6030_MISC1);
if (ret < 0) {
dev_err(dev, "failed to wire up inputs\n");
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
f30d4968e9ae ("bpf: Do not reject when the stack read size is different from the tracked scalar size")
354e8f1970f8 ("bpf: Support <8-byte scalar spill and refill")
27113c59b6d0 ("bpf: Check the other end of slot_type for STACK_SPILL")
2039f26f3aca ("bpf: Fix leakage due to insufficient speculative store bypass mitigation")
01f810ace9ed ("bpf: Allow variable-offset stack access")
cd17d38f8b28 ("bpf: Permits pointers on stack for helper calls")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f30d4968e9aee737e174fc97942af46cfb49b484 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai(a)fb.com>
Date: Mon, 1 Nov 2021 23:45:35 -0700
Subject: [PATCH] bpf: Do not reject when the stack read size is different from
the tracked scalar size
Below is a simplified case from a report in bcc [0]:
r4 = 20
*(u32 *)(r10 -4) = r4
*(u32 *)(r10 -8) = r4 /* r4 state is tracked */
r4 = *(u64 *)(r10 -8) /* Read more than the tracked 32bit scalar.
* verifier rejects as 'corrupted spill memory'.
*/
After commit 354e8f1970f8 ("bpf: Support <8-byte scalar spill and refill"),
the 8-byte aligned 32bit spill is also tracked by the verifier and the
register state is stored.
However, if 8 bytes are read from the stack instead of the tracked 4 byte
scalar, then verifier currently rejects the program as "corrupted spill
memory". This patch fixes this case by allowing it to read but marks the
register as unknown.
Also note that, if the prog is trying to corrupt/leak an earlier spilled
pointer by spilling another <8 bytes register on top, this has already
been rejected in the check_stack_write_fixed_off().
[0] https://github.com/iovisor/bcc/pull/3683
Fixes: 354e8f1970f8 ("bpf: Support <8-byte scalar spill and refill")
Reported-by: Hengqi Chen <hengqi.chen(a)gmail.com>
Reported-by: Yonghong Song <yhs(a)gmail.com>
Signed-off-by: Martin KaFai Lau <kafai(a)fb.com>
Signed-off-by: Daniel Borkmann <daniel(a)iogearbox.net>
Tested-by: Hengqi Chen <hengqi.chen(a)gmail.com>
Acked-by: Yonghong Song <yhs(a)fb.com>
Link: https://lore.kernel.org/bpf/20211102064535.316018-1-kafai@fb.com
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f0dca726ebfd..5f8d9128860a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3088,9 +3088,12 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
reg = ®_state->stack[spi].spilled_ptr;
if (is_spilled_reg(®_state->stack[spi])) {
- if (size != BPF_REG_SIZE) {
- u8 scalar_size = 0;
+ u8 spill_size = 1;
+
+ for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
+ spill_size++;
+ if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
if (reg->type != SCALAR_VALUE) {
verbose_linfo(env, env->insn_idx, "; ");
verbose(env, "invalid size of register fill\n");
@@ -3101,10 +3104,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
if (dst_regno < 0)
return 0;
- for (i = BPF_REG_SIZE; i > 0 && stype[i - 1] == STACK_SPILL; i--)
- scalar_size++;
-
- if (!(off % BPF_REG_SIZE) && size == scalar_size) {
+ if (!(off % BPF_REG_SIZE) && size == spill_size) {
/* The earlier check_reg_arg() has decided the
* subreg_def for this insn. Save it first.
*/
@@ -3128,12 +3128,6 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
return 0;
}
- for (i = 1; i < BPF_REG_SIZE; i++) {
- if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
- verbose(env, "corrupted spill memory\n");
- return -EACCES;
- }
- }
if (dst_regno >= 0) {
/* restore register state from stack */
Hi Sasha!
Dne ponedeljek, 06. februar 2023 ob 14:45:06 CET je Sasha Levin napisal(a):
> This is a note to let you know that I've just added the patch titled
>
> bus: sunxi-rsb: Fix error handling in sunxi_rsb_init()
>
> to the 5.10-stable tree which can be found at:
>
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum
> mary
>
> The filename of the patch is:
> bus-sunxi-rsb-fix-error-handling-in-sunxi_rsb_init.patch
> and it can be found in the queue-5.10 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
>
>
>
> commit ad954fdfb62b7541a93ce1a12da025a8f698d8a8
> Author: Yuan Can <yuancan(a)huawei.com>
> Date: Wed Nov 23 09:42:00 2022 +0000
>
> bus: sunxi-rsb: Fix error handling in sunxi_rsb_init()
>
> [ Upstream commit f71eaf2708be7831428eacae7db25d8ec6b8b4c5 ]
>
> The sunxi_rsb_init() returns the platform_driver_register() directly
> without checking its return value, if platform_driver_register() failed,
> the sunxi_rsb_bus is not unregistered.
> Fix by unregister sunxi_rsb_bus when platform_driver_register() failed.
>
> Fixes: d787dcdb9c8f ("bus: sunxi-rsb: Add driver for Allwinner Reduced
> Serial Bus") Signed-off-by: Yuan Can <yuancan(a)huawei.com>
> Reviewed-by: Jernej Skrabec <jernej.skrabec(a)gmail.com>
> Link:
> https://lore.kernel.org/r/20221123094200.12036-1-yuancan@huawei.com
> Signed-off-by: Jernej Skrabec <jernej.skrabec(a)gmail.com>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c
> index f8c29b888e6b..98cbb18f17fa 100644
> --- a/drivers/bus/sunxi-rsb.c
> +++ b/drivers/bus/sunxi-rsb.c
> @@ -781,7 +781,13 @@ static int __init sunxi_rsb_init(void)
> return ret;
> }
>
> - return platform_driver_register(&sunxi_rsb_driver);
> + ret = platform_driver_register(&sunxi_rsb_driver);
> + if (ret) {
> + bus_unregister(&sunxi_rsb_bus);
> + return ret;
> + }
> +
> + return 0;
> }
> module_init(sunxi_rsb_init);
>
> diff --git a/sound/soc/intel/boards/bytcr_rt5651.c
> b/sound/soc/intel/boards/bytcr_rt5651.c index bf8b87d45cb0..2c76f0abeeca
> 100644
> --- a/sound/soc/intel/boards/bytcr_rt5651.c
> +++ b/sound/soc/intel/boards/bytcr_rt5651.c
> @@ -918,7 +918,6 @@ static int snd_byt_rt5651_mc_probe(struct
> platform_device *pdev) if (adev) {
> snprintf(byt_rt5651_codec_name,
sizeof(byt_rt5651_codec_name),
> "i2c-%s", acpi_dev_name(adev));
> - put_device(&adev->dev);
> byt_rt5651_dais[dai_index].codecs->name =
byt_rt5651_codec_name;
> } else {
> dev_err(&pdev->dev, "Error cannot find '%s' dev\n",
mach->id);
> @@ -927,6 +926,7 @@ static int snd_byt_rt5651_mc_probe(struct
> platform_device *pdev)
>
> codec_dev = bus_find_device_by_name(&i2c_bus_type, NULL,
>
byt_rt5651_codec_name);
> + acpi_dev_put(adev);
> if (!codec_dev)
> return -EPROBE_DEFER;
Above bytcr_rt5651.c changes are unrelated to original commit. Did you merge
two commits by mistake?
Best regards,
Jernej
After the qmp phy driver was split it looks like 5.15.y stable kernels
aren't getting fixes like commit 7a7d86d14d07 ("phy: qcom-qmp-combo: fix
broken power on") which is tagged for stable 5.10. Trogdor boards use
the qmp phy on 5.15.y kernels, so I backported the fixes I could find
that looked like we may possibly trip over at some point.
USB and DP work on my Trogdor.Lazor board with this set.
Changes from v2 (https://lore.kernel.org/r/20230113204548.578798-1-swboyd@chromium.org):
* Keep conditional that can't be removed from last patch
* Rebase to latest 5.15.y stable tree
Changes from v1 (https://lore.kernel.org/r/20230113005405.3992011-1-swboyd@chromium.org):
* New patch for memleak on probe deferal to avoid compat issues
* Update "fix broken power on" patch for pcie/ufs phy
Johan Hovold (5):
phy: qcom-qmp-combo: disable runtime PM on unbind
phy: qcom-qmp-combo: fix memleak on probe deferral
phy: qcom-qmp-usb: fix memleak on probe deferral
phy: qcom-qmp-combo: fix broken power on
phy: qcom-qmp-combo: fix runtime suspend
drivers/phy/qualcomm/phy-qcom-qmp.c | 89 ++++++++++++++++++++---------
1 file changed, 61 insertions(+), 28 deletions(-)
Cc: Johan Hovold <johan+linaro(a)kernel.org>
Cc: Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>
Cc: Vinod Koul <vkoul(a)kernel.org>
base-commit: 9cf4111cdf9420fa99792ae16c8de23242bb2e0b
--
https://chromeos.dev
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
cc755b4377b0 ("ASoC: SOF: keep prepare/unprepare widgets in sink path")
0ad84b11f2f8 ("ASoC: SOF: sof-audio: skip prepare/unprepare if swidget is NULL")
7d2a67e02549 ("ASoC: SOF: sof-audio: unprepare when swidget->use_count > 0")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From cc755b4377b0520d594ae573497cf0824baea648 Mon Sep 17 00:00:00 2001
From: Bard Liao <yung-chuan.liao(a)linux.intel.com>
Date: Wed, 18 Jan 2023 12:12:55 +0200
Subject: [PATCH] ASoC: SOF: keep prepare/unprepare widgets in sink path
The existing code return when a widget doesn't need to
prepare/unprepare. This will prevent widgets in the sink path from being
prepared/unprepared.
Cc: <stable(a)vger.kernel.org> # 6.1
Link: https://github.com/thesofproject/linux/issues/4021
Signed-off-by: Bard Liao <yung-chuan.liao(a)linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan(a)linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart(a)linux.intel.com>
Reviewed-by: Rander Wang <rander.wang(a)intel.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi(a)linux.intel.com>
Link: https://lore.kernel.org/r/20230118101255.29139-4-peter.ujfalusi@linux.intel…
Signed-off-by: Mark Brown <broonie(a)kernel.org>
diff --git a/sound/soc/sof/sof-audio.c b/sound/soc/sof/sof-audio.c
index 8c114e6a23c6..ff716bfbcb67 100644
--- a/sound/soc/sof/sof-audio.c
+++ b/sound/soc/sof/sof-audio.c
@@ -271,9 +271,9 @@ sof_unprepare_widgets_in_path(struct snd_sof_dev *sdev, struct snd_soc_dapm_widg
struct snd_sof_widget *swidget = widget->dobj.private;
struct snd_soc_dapm_path *p;
- /* return if the widget is in use or if it is already unprepared */
+ /* skip if the widget is in use or if it is already unprepared */
if (!swidget || !swidget->prepared || swidget->use_count > 0)
- return;
+ goto sink_unprepare;
if (widget_ops[widget->id].ipc_unprepare)
/* unprepare the source widget */
@@ -281,6 +281,7 @@ sof_unprepare_widgets_in_path(struct snd_sof_dev *sdev, struct snd_soc_dapm_widg
swidget->prepared = false;
+sink_unprepare:
/* unprepare all widgets in the sink paths */
snd_soc_dapm_widget_for_each_sink_path(widget, p) {
if (!p->walking && p->sink->dobj.private) {
From: Jan Kara <jack(a)suse.cz>
commit c1ad35dd0548ce947d97aaf92f7f2f9a202951cf upstream
udf_write_fi() uses lengthOfImpUse of the entry it is writing to.
However this field has not yet been initialized so it either contains
completely bogus value or value from last directory entry at that place.
In either case this is wrong and can lead to filesystem corruption or
kernel crashes.
This patch deviates from the original upstream patch because in the original
upstream patch, udf_get_fi_ident(sfi) was being used instead of (uint8_t *)sfi->fileIdent + liu
as the first arg to memcpy at line 77 and line 81. Those subsequent lines have been
replaced with what the upstream patch passes in to memcpy.
Reported-by: butt3rflyh4ck <butterflyhuangxx(a)gmail.com>
CC: stable(a)vger.kernel.org
Fixes: 979a6e28dd96 ("udf: Get rid of 0-length arrays in struct fileIdentDesc")
Signed-off-by: Jan Kara <jack(a)suse.cz>
Signed-off-by: Nobel Barakat <nobelbarakat(a)google.com>
---
fs/udf/namei.c | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 77b6d89b9bcd..cbd6ad54a23b 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -74,12 +74,11 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
if (fileident) {
if (adinicb || (offset + lfi < 0)) {
- memcpy((uint8_t *)sfi->fileIdent + liu, fileident, lfi);
+ memcpy(sfi->impUse + liu, fileident, lfi);
} else if (offset >= 0) {
memcpy(fibh->ebh->b_data + offset, fileident, lfi);
} else {
- memcpy((uint8_t *)sfi->fileIdent + liu, fileident,
- -offset);
+ memcpy(sfi->impUse + liu, fileident, -offset);
memcpy(fibh->ebh->b_data, fileident - offset,
lfi + offset);
}
@@ -88,11 +87,11 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
offset += lfi;
if (adinicb || (offset + padlen < 0)) {
- memset((uint8_t *)sfi->padding + liu + lfi, 0x00, padlen);
+ memset(sfi->impUse + liu + lfi, 0x00, padlen);
} else if (offset >= 0) {
memset(fibh->ebh->b_data + offset, 0x00, padlen);
} else {
- memset((uint8_t *)sfi->padding + liu + lfi, 0x00, -offset);
+ memset(sfi->impUse + liu + lfi, 0x00, -offset);
memset(fibh->ebh->b_data, 0x00, padlen + offset);
}
--
2.39.1.519.gcb327c4b5f-goog
From: Johan Hovold <johan+linaro(a)kernel.org>
commit c7b98de745cffdceefc077ad5cf9cda032ef8959 upstream.
Drop the confused runtime-suspend type check which effectively broke
runtime PM if the DP child node happens to be parsed before the USB
child node during probe (e.g. due to order of child nodes in the
devicetree).
Instead use the new driver data USB PHY pointer to access the USB
configuration and resources.
Fixes: 52e013d0bffa ("phy: qcom-qmp: Add support for DP in USB3+DP combo phy")
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Link: https://lore.kernel.org/r/20221114081346.5116-6-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul(a)kernel.org>
Signed-off-by: Stephen Boyd <swboyd(a)chromium.org>
---
drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 12 ++----------
1 file changed, 2 insertions(+), 10 deletions(-)
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index adcda7762acf..816829105135 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -2296,15 +2296,11 @@ static void qmp_combo_disable_autonomous_mode(struct qmp_phy *qphy)
static int __maybe_unused qmp_combo_runtime_suspend(struct device *dev)
{
struct qcom_qmp *qmp = dev_get_drvdata(dev);
- struct qmp_phy *qphy = qmp->phys[0];
+ struct qmp_phy *qphy = qmp->usb_phy;
const struct qmp_phy_cfg *cfg = qphy->cfg;
dev_vdbg(dev, "Suspending QMP phy, mode:%d\n", qphy->mode);
- /* Supported only for USB3 PHY and luckily USB3 is the first phy */
- if (cfg->type != PHY_TYPE_USB3)
- return 0;
-
if (!qmp->init_count) {
dev_vdbg(dev, "PHY not initialized, bailing out\n");
return 0;
@@ -2321,16 +2317,12 @@ static int __maybe_unused qmp_combo_runtime_suspend(struct device *dev)
static int __maybe_unused qmp_combo_runtime_resume(struct device *dev)
{
struct qcom_qmp *qmp = dev_get_drvdata(dev);
- struct qmp_phy *qphy = qmp->phys[0];
+ struct qmp_phy *qphy = qmp->usb_phy;
const struct qmp_phy_cfg *cfg = qphy->cfg;
int ret = 0;
dev_vdbg(dev, "Resuming QMP phy, mode:%d\n", qphy->mode);
- /* Supported only for USB3 PHY and luckily USB3 is the first phy */
- if (cfg->type != PHY_TYPE_USB3)
- return 0;
-
if (!qmp->init_count) {
dev_vdbg(dev, "PHY not initialized, bailing out\n");
return 0;
base-commit: 68a95455c153f8adc513e5b688f4b348daa7c1b1
--
https://chromeos.dev
From: Zhang Xiaoxu <zhangxiaoxu5(a)huawei.com>
commit 9181f40fb2952fd59ecb75e7158620c9c669eee3 upstream.
If rdma receive buffer allocate failed, should call rpcrdma_regbuf_free()
to free the send buffer, otherwise, the buffer data will be leaked.
Fixes: bb93a1ae2bf4 ("xprtrdma: Allocate req's regbufs at xprt create time")
Signed-off-by: Zhang Xiaoxu <zhangxiaoxu5(a)huawei.com>
Signed-off-by: Trond Myklebust <trond.myklebust(a)hammerspace.com>
[Harshit: Backport to 5.4.y]
Also make the same change for 'req->rl_rdmabuf' at the same time as
this will also have the same memory leak problem as 'req->rl_sendbuf'
(This is because commit b78de1dca00376aaba7a58bb5fe21c1606524abe is not
in 5.4.y)
Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli(a)oracle.com>
---
Conflict resolution: Replace kfree(req->rl_sendbuf) with the correct free
function rpcrdma_regbuf_free(req->rl_sendbuf) in out4 label.
Testing: Only compile and boot tested.
Thanks to Vegard for pointing out the similar problem with
'req->rl_rdmabuf'
Previously the backport had some problems and was reverted [1]
[1] https://lore.kernel.org/all/20230203101029.850099165@linuxfoundation.org/
---
net/sunrpc/xprtrdma/verbs.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 0f4d39fdb48f..cfae1a871578 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1034,9 +1034,9 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
return req;
out4:
- kfree(req->rl_sendbuf);
+ rpcrdma_regbuf_free(req->rl_sendbuf);
out3:
- kfree(req->rl_rdmabuf);
+ rpcrdma_regbuf_free(req->rl_rdmabuf);
out2:
kfree(req);
out1:
--
2.31.1
From: Andrea Righi <andrea.righi(a)canonical.com>
Commit ebc5951eea499314f6fbbde20e295f1345c67330 upstream.
[ This fixes a performance issue we're seeing in AWS instances when
running swapoff and using the global readahead algorithm. For a
particular instance configuration, Without this fix I/O throughput
is very low during swapoff (about 15 MB/s) with this patch is
reaches 500 MB/s. Tested swapoff with different workloads with
this patch applied. 5.10 onwards already have this fix ]
In unuse_pte_range() we blindly swap-in pages without checking if the
swap entry is already present in the swap cache.
By doing this, the hit/miss ratio used by the swap readahead heuristic
is not properly updated and this leads to non-optimal performance during
swapoff.
Tracing the distribution of the readahead size returned by the swap
readahead heuristic during swapoff shows that a small readahead size is
used most of the time as if we had only misses (this happens both with
cluster and vma readahead), for example:
r::swapin_nr_pages(unsigned long offset):unsigned long:$retval
COUNT EVENT
36948 $retval = 8
44151 $retval = 4
49290 $retval = 1
527771 $retval = 2
Checking if the swap entry is present in the swap cache, instead, allows
to properly update the readahead statistics and the heuristic behaves in a
better way during swapoff, selecting a bigger readahead size:
r::swapin_nr_pages(unsigned long offset):unsigned long:$retval
COUNT EVENT
1618 $retval = 1
4960 $retval = 2
41315 $retval = 4
103521 $retval = 8
In terms of swapoff performance the result is the following:
Testing environment
===================
- Host:
CPU: 1.8GHz Intel Core i7-8565U (quad-core, 8MB cache)
HDD: PC401 NVMe SK hynix 512GB
MEM: 16GB
- Guest (kvm):
8GB of RAM
virtio block driver
16GB swap file on ext4 (/swapfile)
Test case
=========
- allocate 85% of memory
- `systemctl hibernate` to force all the pages to be swapped-out to the
swap file
- resume the system
- measure the time that swapoff takes to complete:
# /usr/bin/time swapoff /swapfile
Result (swapoff time)
======
5.6 vanilla 5.6 w/ this patch
----------- -----------------
cluster-readahead 22.09s 12.19s
vma-readahead 18.20s 15.33s
Conclusion
==========
The specific use case this patch is addressing is to improve swapoff
performance in cloud environments when a VM has been hibernated, resumed
and all the memory needs to be forced back to RAM by disabling swap.
This change allows to better exploits the advantages of the readahead
heuristic during swapoff and this improvement allows to to speed up the
resume process of such VMs.
[andrea.righi(a)canonical.com: update changelog]
Link: http://lkml.kernel.org/r/20200418084705.GA147642@xps-13
Signed-off-by: Andrea Righi <andrea.righi(a)canonical.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Reviewed-by: "Huang, Ying" <ying.huang(a)intel.com>
Cc: Minchan Kim <minchan(a)kernel.org>
Cc: Anchal Agarwal <anchalag(a)amazon.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Vineeth Remanan Pillai <vpillai(a)digitalocean.com>
Cc: Kelley Nielsen <kelleynnn(a)gmail.com>
Link: http://lkml.kernel.org/r/20200416180132.GB3352@xps-13
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Signed-off-by: Luiz Capitulino <luizcap(a)amazon.com>
---
mm/swapfile.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
Add missing SOB.
diff --git a/mm/swapfile.c b/mm/swapfile.c
index f6964212c6c8..fe5995c38ea4 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1950,10 +1950,14 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
pte_unmap(pte);
swap_map = &si->swap_map[offset];
- vmf.vma = vma;
- vmf.address = addr;
- vmf.pmd = pmd;
- page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, &vmf);
+ page = lookup_swap_cache(entry, vma, addr);
+ if (!page) {
+ vmf.vma = vma;
+ vmf.address = addr;
+ vmf.pmd = pmd;
+ page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
+ &vmf);
+ }
if (!page) {
if (*swap_map == 0 || *swap_map == SWAP_MAP_BAD)
goto try_next;
--
2.38.1
Build ID on arm64 is broken if CONFIG_MODVERSIONS=y
on 5.4, 5.10, and 5.15
Greg mentioned to submit for _all_ branches here:
https://lore.kernel.org/all/Y9N9B5e7HZhFN7nl@kroah.com/
so here is 6.1's series.
This 6.1 series is not strictly necessary, as the problem
does not present itself in the current 6.1 stable kernels.
However, 6.1 would be broken with inclusion of either:
994b7ac1697b ("arm64: remove special treatment for the link order of head.o")
2348e6bf4421 ("riscv: remove special treatment for the link order of head.o")
Should we just include these as well?
Both the above are listed as Fixes for:
[PATCH 6.1 fix build id for arm64 1/5] arch: fix broken BuildID for arm64 and riscv
Applying this series to 6.1 is probably best to remain consistent.
4.19 and 4.14 are not susceptible to the underlying quirk of
ld (at least in my study of the code to date).
I've build tested on {x86_64, arm64, riscv, powerpc, s390, sh}
Simple test case:
$ readelf -n vmlinux | grep "Build ID"
*NOTE* to following is not in mainline, yet:
[PATCH 6.1 fix build id for arm64 5/5] sh: define RUNTIME_DISCARD_EXIT
https://lore.kernel.org/all/9166a8abdc0f979e50377e61780a4bba1dfa2f52.167451…
But it is now queued in akpm's tree:
https://lore.kernel.org/all/20230127230702.81FA0C433D2@smtp.kernel.org/
Thank you Andrew!
Regards,
--Tom
Masahiro Yamada (2):
arch: fix broken BuildID for arm64 and riscv
s390: define RUNTIME_DISCARD_EXIT to fix link error with GNU ld < 2.36
Michael Ellerman (2):
powerpc/vmlinux.lds: Define RUNTIME_DISCARD_EXIT
powerpc/vmlinux.lds: Don't discard .rela* for relocatable builds
Tom Saeger (1):
sh: define RUNTIME_DISCARD_EXIT
arch/powerpc/kernel/vmlinux.lds.S | 6 +++++-
arch/s390/kernel/vmlinux.lds.S | 2 ++
arch/sh/kernel/vmlinux.lds.S | 2 ++
include/asm-generic/vmlinux.lds.h | 5 +++++
4 files changed, 14 insertions(+), 1 deletion(-)
base-commit: 93f875a8526a291005e7f38478079526c843cbec
--
2.39.1
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
db3546d58b5a ("nvmem: core: fix cell removal on error")
b985f4cba6db ("nvmem: add support for cell info")
c7235ee3f4b8 ("nvmem: remove the global cell list")
c1de7f43bd84 ("nvmem: use kref")
fa72d847d68d ("nvmem: check the return value of nvmem_add_cells()")
1852183e142e ("nvmem: use list_for_each_entry_safe in nvmem_device_remove_all_cells()")
d7b9fd1669d4 ("nvmem: provide nvmem_dev_name()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From db3546d58b5a0fa581d9c9f2bdc2856fa6c5e43e Mon Sep 17 00:00:00 2001
From: Michael Walle <michael(a)walle.cc>
Date: Fri, 27 Jan 2023 10:40:13 +0000
Subject: [PATCH] nvmem: core: fix cell removal on error
nvmem_add_cells() could return an error after some cells are already
added to the provider. In this case, the added cells are not removed.
Remove any registered cells if nvmem_add_cells() fails.
Fixes: fa72d847d68d7 ("nvmem: check the return value of nvmem_add_cells()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Michael Walle <michael(a)walle.cc>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org>
Link: https://lore.kernel.org/r/20230127104015.23839-9-srinivas.kandagatla@linaro…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index cbe5df99db82..563116405b3d 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -847,7 +847,7 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
if (config->cells) {
rval = nvmem_add_cells(nvmem, config->cells, config->ncells);
if (rval)
- goto err_teardown_compat;
+ goto err_remove_cells;
}
rval = nvmem_add_cells_from_table(nvmem);
@@ -870,7 +870,6 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
err_remove_cells:
nvmem_device_remove_all_cells(nvmem);
-err_teardown_compat:
if (config->compat)
nvmem_sysfs_remove_compat(nvmem, config);
err_put_device:
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
ab3428cfd9aa ("nvmem: core: fix registration vs use race")
560181d3ace6 ("nvmem: core: fix cleanup after dev_set_name()")
569653f022a2 ("nvmem: core: remove nvmem_config wp_gpio")
3bd747c7ea13 ("nvmem: core: initialise nvmem->id early")
5544e90c8126 ("nvmem: core: add error handling for dev_set_name")
bd1244561fa2 ("nvmem: core: Fix memleak in nvmem_register()")
f6c052afe6f8 ("nvmem: core: Fix a conflict between MTD and NVMEM on wp-gpios property")
de0534df9347 ("nvmem: core: fix error handling while validating keepout regions")
1333a6779501 ("nvmem: core: allow specifying of_node")
fd3bb8f54a88 ("nvmem: core: Add support for keepout regions")
1eb51d6a4fce ("nvmem: switch to simpler IDA interface")
731aa3fae813 ("nvmem: core: add support to auto devid")
b96fc5416b09 ("nvmem: ensure sysfs writes handle write-protect pin")
844003052719 ("nvmem: core: remove nvmem_sysfs_get_groups()")
664f0549380c ("nvmem: core: use is_bin_visible for permissions")
f60442ddc40c ("nvmem: core: use device_register and device_unregister")
e6de179d7a88 ("nvmem: core: add root_only member to nvmem device struct")
3c91ef69a3e9 ("nvmem: check for NULL reg_read and reg_write before dereferencing")
a9c3766cb19c ("nvmem: release the write-protect pin")
f7d8d7dcd978 ("nvmem: fix memory leak in error path")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ab3428cfd9aa2f3463ee4b2909b5bb2193bd0c4a Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel(a)armlinux.org.uk>
Date: Fri, 27 Jan 2023 10:40:11 +0000
Subject: [PATCH] nvmem: core: fix registration vs use race
The i.MX6 CPU frequency driver sometimes fails to register at boot time
due to nvmem_cell_read_u32() sporadically returning -ENOENT.
This happens because there is a window where __nvmem_device_get() in
of_nvmem_cell_get() is able to return the nvmem device, but as cells
have been setup, nvmem_find_cell_entry_by_node() returns NULL.
The occurs because the nvmem core registration code violates one of the
fundamental principles of kernel programming: do not publish data
structures before their setup is complete.
Fix this by making nvmem core code conform with this principle.
Fixes: eace75cfdcf7 ("nvmem: Add a simple NVMEM framework for nvmem providers")
Cc: stable(a)vger.kernel.org
Signed-off-by: Russell King (Oracle) <rmk+kernel(a)armlinux.org.uk>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org>
Link: https://lore.kernel.org/r/20230127104015.23839-7-srinivas.kandagatla@linaro…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index ac77a019aed7..e92c6f1aadbb 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -832,22 +832,16 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
nvmem->dev.groups = nvmem_dev_groups;
#endif
- dev_dbg(&nvmem->dev, "Registering nvmem device %s\n", config->name);
-
- rval = device_add(&nvmem->dev);
- if (rval)
- goto err_put_device;
-
if (nvmem->nkeepout) {
rval = nvmem_validate_keepouts(nvmem);
if (rval)
- goto err_device_del;
+ goto err_put_device;
}
if (config->compat) {
rval = nvmem_sysfs_setup_compat(nvmem, config);
if (rval)
- goto err_device_del;
+ goto err_put_device;
}
if (config->cells) {
@@ -864,6 +858,12 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
if (rval)
goto err_remove_cells;
+ dev_dbg(&nvmem->dev, "Registering nvmem device %s\n", config->name);
+
+ rval = device_add(&nvmem->dev);
+ if (rval)
+ goto err_remove_cells;
+
blocking_notifier_call_chain(&nvmem_notifier, NVMEM_ADD, nvmem);
return nvmem;
@@ -873,8 +873,6 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
err_teardown_compat:
if (config->compat)
nvmem_sysfs_remove_compat(nvmem, config);
-err_device_del:
- device_del(&nvmem->dev);
err_put_device:
put_device(&nvmem->dev);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
ab3428cfd9aa ("nvmem: core: fix registration vs use race")
560181d3ace6 ("nvmem: core: fix cleanup after dev_set_name()")
569653f022a2 ("nvmem: core: remove nvmem_config wp_gpio")
3bd747c7ea13 ("nvmem: core: initialise nvmem->id early")
5544e90c8126 ("nvmem: core: add error handling for dev_set_name")
bd1244561fa2 ("nvmem: core: Fix memleak in nvmem_register()")
f6c052afe6f8 ("nvmem: core: Fix a conflict between MTD and NVMEM on wp-gpios property")
de0534df9347 ("nvmem: core: fix error handling while validating keepout regions")
1333a6779501 ("nvmem: core: allow specifying of_node")
fd3bb8f54a88 ("nvmem: core: Add support for keepout regions")
1eb51d6a4fce ("nvmem: switch to simpler IDA interface")
731aa3fae813 ("nvmem: core: add support to auto devid")
b96fc5416b09 ("nvmem: ensure sysfs writes handle write-protect pin")
844003052719 ("nvmem: core: remove nvmem_sysfs_get_groups()")
664f0549380c ("nvmem: core: use is_bin_visible for permissions")
f60442ddc40c ("nvmem: core: use device_register and device_unregister")
e6de179d7a88 ("nvmem: core: add root_only member to nvmem device struct")
3c91ef69a3e9 ("nvmem: check for NULL reg_read and reg_write before dereferencing")
a9c3766cb19c ("nvmem: release the write-protect pin")
f7d8d7dcd978 ("nvmem: fix memory leak in error path")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ab3428cfd9aa2f3463ee4b2909b5bb2193bd0c4a Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel(a)armlinux.org.uk>
Date: Fri, 27 Jan 2023 10:40:11 +0000
Subject: [PATCH] nvmem: core: fix registration vs use race
The i.MX6 CPU frequency driver sometimes fails to register at boot time
due to nvmem_cell_read_u32() sporadically returning -ENOENT.
This happens because there is a window where __nvmem_device_get() in
of_nvmem_cell_get() is able to return the nvmem device, but as cells
have been setup, nvmem_find_cell_entry_by_node() returns NULL.
The occurs because the nvmem core registration code violates one of the
fundamental principles of kernel programming: do not publish data
structures before their setup is complete.
Fix this by making nvmem core code conform with this principle.
Fixes: eace75cfdcf7 ("nvmem: Add a simple NVMEM framework for nvmem providers")
Cc: stable(a)vger.kernel.org
Signed-off-by: Russell King (Oracle) <rmk+kernel(a)armlinux.org.uk>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org>
Link: https://lore.kernel.org/r/20230127104015.23839-7-srinivas.kandagatla@linaro…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index ac77a019aed7..e92c6f1aadbb 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -832,22 +832,16 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
nvmem->dev.groups = nvmem_dev_groups;
#endif
- dev_dbg(&nvmem->dev, "Registering nvmem device %s\n", config->name);
-
- rval = device_add(&nvmem->dev);
- if (rval)
- goto err_put_device;
-
if (nvmem->nkeepout) {
rval = nvmem_validate_keepouts(nvmem);
if (rval)
- goto err_device_del;
+ goto err_put_device;
}
if (config->compat) {
rval = nvmem_sysfs_setup_compat(nvmem, config);
if (rval)
- goto err_device_del;
+ goto err_put_device;
}
if (config->cells) {
@@ -864,6 +858,12 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
if (rval)
goto err_remove_cells;
+ dev_dbg(&nvmem->dev, "Registering nvmem device %s\n", config->name);
+
+ rval = device_add(&nvmem->dev);
+ if (rval)
+ goto err_remove_cells;
+
blocking_notifier_call_chain(&nvmem_notifier, NVMEM_ADD, nvmem);
return nvmem;
@@ -873,8 +873,6 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
err_teardown_compat:
if (config->compat)
nvmem_sysfs_remove_compat(nvmem, config);
-err_device_del:
- device_del(&nvmem->dev);
err_put_device:
put_device(&nvmem->dev);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
ab3428cfd9aa ("nvmem: core: fix registration vs use race")
560181d3ace6 ("nvmem: core: fix cleanup after dev_set_name()")
569653f022a2 ("nvmem: core: remove nvmem_config wp_gpio")
3bd747c7ea13 ("nvmem: core: initialise nvmem->id early")
5544e90c8126 ("nvmem: core: add error handling for dev_set_name")
bd1244561fa2 ("nvmem: core: Fix memleak in nvmem_register()")
f6c052afe6f8 ("nvmem: core: Fix a conflict between MTD and NVMEM on wp-gpios property")
de0534df9347 ("nvmem: core: fix error handling while validating keepout regions")
1333a6779501 ("nvmem: core: allow specifying of_node")
fd3bb8f54a88 ("nvmem: core: Add support for keepout regions")
1eb51d6a4fce ("nvmem: switch to simpler IDA interface")
731aa3fae813 ("nvmem: core: add support to auto devid")
b96fc5416b09 ("nvmem: ensure sysfs writes handle write-protect pin")
844003052719 ("nvmem: core: remove nvmem_sysfs_get_groups()")
664f0549380c ("nvmem: core: use is_bin_visible for permissions")
f60442ddc40c ("nvmem: core: use device_register and device_unregister")
e6de179d7a88 ("nvmem: core: add root_only member to nvmem device struct")
3c91ef69a3e9 ("nvmem: check for NULL reg_read and reg_write before dereferencing")
a9c3766cb19c ("nvmem: release the write-protect pin")
f7d8d7dcd978 ("nvmem: fix memory leak in error path")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ab3428cfd9aa2f3463ee4b2909b5bb2193bd0c4a Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel(a)armlinux.org.uk>
Date: Fri, 27 Jan 2023 10:40:11 +0000
Subject: [PATCH] nvmem: core: fix registration vs use race
The i.MX6 CPU frequency driver sometimes fails to register at boot time
due to nvmem_cell_read_u32() sporadically returning -ENOENT.
This happens because there is a window where __nvmem_device_get() in
of_nvmem_cell_get() is able to return the nvmem device, but as cells
have been setup, nvmem_find_cell_entry_by_node() returns NULL.
The occurs because the nvmem core registration code violates one of the
fundamental principles of kernel programming: do not publish data
structures before their setup is complete.
Fix this by making nvmem core code conform with this principle.
Fixes: eace75cfdcf7 ("nvmem: Add a simple NVMEM framework for nvmem providers")
Cc: stable(a)vger.kernel.org
Signed-off-by: Russell King (Oracle) <rmk+kernel(a)armlinux.org.uk>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org>
Link: https://lore.kernel.org/r/20230127104015.23839-7-srinivas.kandagatla@linaro…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index ac77a019aed7..e92c6f1aadbb 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -832,22 +832,16 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
nvmem->dev.groups = nvmem_dev_groups;
#endif
- dev_dbg(&nvmem->dev, "Registering nvmem device %s\n", config->name);
-
- rval = device_add(&nvmem->dev);
- if (rval)
- goto err_put_device;
-
if (nvmem->nkeepout) {
rval = nvmem_validate_keepouts(nvmem);
if (rval)
- goto err_device_del;
+ goto err_put_device;
}
if (config->compat) {
rval = nvmem_sysfs_setup_compat(nvmem, config);
if (rval)
- goto err_device_del;
+ goto err_put_device;
}
if (config->cells) {
@@ -864,6 +858,12 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
if (rval)
goto err_remove_cells;
+ dev_dbg(&nvmem->dev, "Registering nvmem device %s\n", config->name);
+
+ rval = device_add(&nvmem->dev);
+ if (rval)
+ goto err_remove_cells;
+
blocking_notifier_call_chain(&nvmem_notifier, NVMEM_ADD, nvmem);
return nvmem;
@@ -873,8 +873,6 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
err_teardown_compat:
if (config->compat)
nvmem_sysfs_remove_compat(nvmem, config);
-err_device_del:
- device_del(&nvmem->dev);
err_put_device:
put_device(&nvmem->dev);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
ab3428cfd9aa ("nvmem: core: fix registration vs use race")
560181d3ace6 ("nvmem: core: fix cleanup after dev_set_name()")
569653f022a2 ("nvmem: core: remove nvmem_config wp_gpio")
3bd747c7ea13 ("nvmem: core: initialise nvmem->id early")
5544e90c8126 ("nvmem: core: add error handling for dev_set_name")
bd1244561fa2 ("nvmem: core: Fix memleak in nvmem_register()")
f6c052afe6f8 ("nvmem: core: Fix a conflict between MTD and NVMEM on wp-gpios property")
de0534df9347 ("nvmem: core: fix error handling while validating keepout regions")
1333a6779501 ("nvmem: core: allow specifying of_node")
fd3bb8f54a88 ("nvmem: core: Add support for keepout regions")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ab3428cfd9aa2f3463ee4b2909b5bb2193bd0c4a Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel(a)armlinux.org.uk>
Date: Fri, 27 Jan 2023 10:40:11 +0000
Subject: [PATCH] nvmem: core: fix registration vs use race
The i.MX6 CPU frequency driver sometimes fails to register at boot time
due to nvmem_cell_read_u32() sporadically returning -ENOENT.
This happens because there is a window where __nvmem_device_get() in
of_nvmem_cell_get() is able to return the nvmem device, but as cells
have been setup, nvmem_find_cell_entry_by_node() returns NULL.
The occurs because the nvmem core registration code violates one of the
fundamental principles of kernel programming: do not publish data
structures before their setup is complete.
Fix this by making nvmem core code conform with this principle.
Fixes: eace75cfdcf7 ("nvmem: Add a simple NVMEM framework for nvmem providers")
Cc: stable(a)vger.kernel.org
Signed-off-by: Russell King (Oracle) <rmk+kernel(a)armlinux.org.uk>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org>
Link: https://lore.kernel.org/r/20230127104015.23839-7-srinivas.kandagatla@linaro…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index ac77a019aed7..e92c6f1aadbb 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -832,22 +832,16 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
nvmem->dev.groups = nvmem_dev_groups;
#endif
- dev_dbg(&nvmem->dev, "Registering nvmem device %s\n", config->name);
-
- rval = device_add(&nvmem->dev);
- if (rval)
- goto err_put_device;
-
if (nvmem->nkeepout) {
rval = nvmem_validate_keepouts(nvmem);
if (rval)
- goto err_device_del;
+ goto err_put_device;
}
if (config->compat) {
rval = nvmem_sysfs_setup_compat(nvmem, config);
if (rval)
- goto err_device_del;
+ goto err_put_device;
}
if (config->cells) {
@@ -864,6 +858,12 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
if (rval)
goto err_remove_cells;
+ dev_dbg(&nvmem->dev, "Registering nvmem device %s\n", config->name);
+
+ rval = device_add(&nvmem->dev);
+ if (rval)
+ goto err_remove_cells;
+
blocking_notifier_call_chain(&nvmem_notifier, NVMEM_ADD, nvmem);
return nvmem;
@@ -873,8 +873,6 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
err_teardown_compat:
if (config->compat)
nvmem_sysfs_remove_compat(nvmem, config);
-err_device_del:
- device_del(&nvmem->dev);
err_put_device:
put_device(&nvmem->dev);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
ab3428cfd9aa ("nvmem: core: fix registration vs use race")
560181d3ace6 ("nvmem: core: fix cleanup after dev_set_name()")
569653f022a2 ("nvmem: core: remove nvmem_config wp_gpio")
3bd747c7ea13 ("nvmem: core: initialise nvmem->id early")
5544e90c8126 ("nvmem: core: add error handling for dev_set_name")
bd1244561fa2 ("nvmem: core: Fix memleak in nvmem_register()")
f6c052afe6f8 ("nvmem: core: Fix a conflict between MTD and NVMEM on wp-gpios property")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ab3428cfd9aa2f3463ee4b2909b5bb2193bd0c4a Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel(a)armlinux.org.uk>
Date: Fri, 27 Jan 2023 10:40:11 +0000
Subject: [PATCH] nvmem: core: fix registration vs use race
The i.MX6 CPU frequency driver sometimes fails to register at boot time
due to nvmem_cell_read_u32() sporadically returning -ENOENT.
This happens because there is a window where __nvmem_device_get() in
of_nvmem_cell_get() is able to return the nvmem device, but as cells
have been setup, nvmem_find_cell_entry_by_node() returns NULL.
The occurs because the nvmem core registration code violates one of the
fundamental principles of kernel programming: do not publish data
structures before their setup is complete.
Fix this by making nvmem core code conform with this principle.
Fixes: eace75cfdcf7 ("nvmem: Add a simple NVMEM framework for nvmem providers")
Cc: stable(a)vger.kernel.org
Signed-off-by: Russell King (Oracle) <rmk+kernel(a)armlinux.org.uk>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org>
Link: https://lore.kernel.org/r/20230127104015.23839-7-srinivas.kandagatla@linaro…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index ac77a019aed7..e92c6f1aadbb 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -832,22 +832,16 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
nvmem->dev.groups = nvmem_dev_groups;
#endif
- dev_dbg(&nvmem->dev, "Registering nvmem device %s\n", config->name);
-
- rval = device_add(&nvmem->dev);
- if (rval)
- goto err_put_device;
-
if (nvmem->nkeepout) {
rval = nvmem_validate_keepouts(nvmem);
if (rval)
- goto err_device_del;
+ goto err_put_device;
}
if (config->compat) {
rval = nvmem_sysfs_setup_compat(nvmem, config);
if (rval)
- goto err_device_del;
+ goto err_put_device;
}
if (config->cells) {
@@ -864,6 +858,12 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
if (rval)
goto err_remove_cells;
+ dev_dbg(&nvmem->dev, "Registering nvmem device %s\n", config->name);
+
+ rval = device_add(&nvmem->dev);
+ if (rval)
+ goto err_remove_cells;
+
blocking_notifier_call_chain(&nvmem_notifier, NVMEM_ADD, nvmem);
return nvmem;
@@ -873,8 +873,6 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
err_teardown_compat:
if (config->compat)
nvmem_sysfs_remove_compat(nvmem, config);
-err_device_del:
- device_del(&nvmem->dev);
err_put_device:
put_device(&nvmem->dev);
Intel IOMMU driver implements IOTLB flush queue with domain selective
or PASID selective invalidations. In this case there's no need to track
IOVA page range and sync IOTLBs, which may cause significant performance
hit.
This patch adds a check to avoid IOVA gather page and IOTLB sync for
the lazy path.
The performance difference on Sapphire Rapids 100Gb NIC is improved by
the following (as measured by iperf send):
w/o this fix~48 Gbits/s. with this fix ~54 Gbits/s
Cc: <stable(a)vger.kernel.org>
Tested-by: Sanjay Kumar <sanjay.k.kumar(a)intel.com>
Signed-off-by: Sanjay Kumar <sanjay.k.kumar(a)intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan(a)linux.intel.com>
---
drivers/iommu/intel/iommu.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index b4878c7ac008..705a1c66691a 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4352,7 +4352,13 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
if (dmar_domain->max_addr == iova + size)
dmar_domain->max_addr = iova;
- iommu_iotlb_gather_add_page(domain, gather, iova, size);
+ /*
+ * We do not use page-selective IOTLB invalidation in flush queue,
+ * There is no need to track page and sync iotlb. Domain-selective or
+ * PASID-selective validation are used in the flush queue.
+ */
+ if (!gather->queued)
+ iommu_iotlb_gather_add_page(domain, gather, iova, size);
return size;
}
--
2.25.1
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
3f6c02fa712b ("serial: stm32: Merge hard IRQ and threaded IRQ handling into single IRQ handler")
6333a4850621 ("serial: stm32: push DMA RX data before suspending")
6eeb348c8482 ("serial: stm32: terminate / restart DMA transfer at suspend / resume")
e0abc903deea ("serial: stm32: rework RX dma initialization and release")
d1ec8a2eabe9 ("serial: stm32: update throttle and unthrottle ops for dma mode")
33bb2f6ac308 ("serial: stm32: rework RX over DMA")
cc58d0a3f0a4 ("serial: stm32: re-introduce an irq flag condition in usart_receive_chars")
59bd4eedf118 ("serial: stm32: use the defined variable to simplify code")
a7770a4bfcf4 ("serial: stm32: defer probe for dma devices")
cea37afd28f1 ("serial: stm32: defer sysrq processing")
e359b4411c28 ("serial: stm32: fix threaded interrupt handling")
3d530017bef1 ("serial: stm32: update wakeup IRQ management")
c0f3332cb5f2 ("serial: stm32: clean wakeup handling in serial_suspend")
1631eeeaf084 ("serial: stm32: rework wakeup management")
9f77d19207a0 ("serial: stm32: add FIFO flush when port is closed")
12761869f0ef ("serial: stm32: fix wake-up flag handling")
ad7676812437 ("serial: stm32: fix a deadlock condition with wakeup event")
25a8e7611da5 ("serial: stm32: fix TX and RX FIFO thresholds")
f4518a8a75f5 ("serial: stm32: fix startup by enabling usart for reception")
87fd0741d6dc ("serial: stm32: fix probe and remove order for dma")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3f6c02fa712bd453871877fe1d1969625617471e Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex(a)denx.de>
Date: Fri, 20 Jan 2023 17:03:32 +0100
Subject: [PATCH] serial: stm32: Merge hard IRQ and threaded IRQ handling into
single IRQ handler
Requesting an interrupt with IRQF_ONESHOT will run the primary handler
in the hard-IRQ context even in the force-threaded mode. The
force-threaded mode is used by PREEMPT_RT in order to avoid acquiring
sleeping locks (spinlock_t) in hard-IRQ context. This combination
makes it impossible and leads to "sleeping while atomic" warnings.
Use one interrupt handler for both handlers (primary and secondary)
and drop the IRQF_ONESHOT flag which is not needed.
Fixes: e359b4411c283 ("serial: stm32: fix threaded interrupt handling")
Reviewed-by: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Tested-by: Valentin Caron <valentin.caron(a)foss.st.com> # V3
Signed-off-by: Marek Vasut <marex(a)denx.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20230120160332.57930-1-marex@denx.de
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c
index a1490033aa16..409e91d6829a 100644
--- a/drivers/tty/serial/stm32-usart.c
+++ b/drivers/tty/serial/stm32-usart.c
@@ -797,25 +797,11 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr)
spin_unlock(&port->lock);
}
- if (stm32_usart_rx_dma_enabled(port))
- return IRQ_WAKE_THREAD;
- else
- return IRQ_HANDLED;
-}
-
-static irqreturn_t stm32_usart_threaded_interrupt(int irq, void *ptr)
-{
- struct uart_port *port = ptr;
- struct tty_port *tport = &port->state->port;
- struct stm32_port *stm32_port = to_stm32_port(port);
- unsigned int size;
- unsigned long flags;
-
/* Receiver timeout irq for DMA RX */
- if (!stm32_port->throttled) {
- spin_lock_irqsave(&port->lock, flags);
+ if (stm32_usart_rx_dma_enabled(port) && !stm32_port->throttled) {
+ spin_lock(&port->lock);
size = stm32_usart_receive_chars(port, false);
- uart_unlock_and_check_sysrq_irqrestore(port, flags);
+ uart_unlock_and_check_sysrq(port);
if (size)
tty_flip_buffer_push(tport);
}
@@ -1015,10 +1001,8 @@ static int stm32_usart_startup(struct uart_port *port)
u32 val;
int ret;
- ret = request_threaded_irq(port->irq, stm32_usart_interrupt,
- stm32_usart_threaded_interrupt,
- IRQF_ONESHOT | IRQF_NO_SUSPEND,
- name, port);
+ ret = request_irq(port->irq, stm32_usart_interrupt,
+ IRQF_NO_SUSPEND, name, port);
if (ret)
return ret;
@@ -1601,13 +1585,6 @@ static int stm32_usart_of_dma_rx_probe(struct stm32_port *stm32port,
struct dma_slave_config config;
int ret;
- /*
- * Using DMA and threaded handler for the console could lead to
- * deadlocks.
- */
- if (uart_console(port))
- return -ENODEV;
-
stm32port->rx_buf = dma_alloc_coherent(dev, RX_BUF_L,
&stm32port->rx_dma_buf,
GFP_KERNEL);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
3f6c02fa712b ("serial: stm32: Merge hard IRQ and threaded IRQ handling into single IRQ handler")
6333a4850621 ("serial: stm32: push DMA RX data before suspending")
6eeb348c8482 ("serial: stm32: terminate / restart DMA transfer at suspend / resume")
e0abc903deea ("serial: stm32: rework RX dma initialization and release")
d1ec8a2eabe9 ("serial: stm32: update throttle and unthrottle ops for dma mode")
33bb2f6ac308 ("serial: stm32: rework RX over DMA")
cc58d0a3f0a4 ("serial: stm32: re-introduce an irq flag condition in usart_receive_chars")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3f6c02fa712bd453871877fe1d1969625617471e Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex(a)denx.de>
Date: Fri, 20 Jan 2023 17:03:32 +0100
Subject: [PATCH] serial: stm32: Merge hard IRQ and threaded IRQ handling into
single IRQ handler
Requesting an interrupt with IRQF_ONESHOT will run the primary handler
in the hard-IRQ context even in the force-threaded mode. The
force-threaded mode is used by PREEMPT_RT in order to avoid acquiring
sleeping locks (spinlock_t) in hard-IRQ context. This combination
makes it impossible and leads to "sleeping while atomic" warnings.
Use one interrupt handler for both handlers (primary and secondary)
and drop the IRQF_ONESHOT flag which is not needed.
Fixes: e359b4411c283 ("serial: stm32: fix threaded interrupt handling")
Reviewed-by: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Tested-by: Valentin Caron <valentin.caron(a)foss.st.com> # V3
Signed-off-by: Marek Vasut <marex(a)denx.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20230120160332.57930-1-marex@denx.de
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c
index a1490033aa16..409e91d6829a 100644
--- a/drivers/tty/serial/stm32-usart.c
+++ b/drivers/tty/serial/stm32-usart.c
@@ -797,25 +797,11 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr)
spin_unlock(&port->lock);
}
- if (stm32_usart_rx_dma_enabled(port))
- return IRQ_WAKE_THREAD;
- else
- return IRQ_HANDLED;
-}
-
-static irqreturn_t stm32_usart_threaded_interrupt(int irq, void *ptr)
-{
- struct uart_port *port = ptr;
- struct tty_port *tport = &port->state->port;
- struct stm32_port *stm32_port = to_stm32_port(port);
- unsigned int size;
- unsigned long flags;
-
/* Receiver timeout irq for DMA RX */
- if (!stm32_port->throttled) {
- spin_lock_irqsave(&port->lock, flags);
+ if (stm32_usart_rx_dma_enabled(port) && !stm32_port->throttled) {
+ spin_lock(&port->lock);
size = stm32_usart_receive_chars(port, false);
- uart_unlock_and_check_sysrq_irqrestore(port, flags);
+ uart_unlock_and_check_sysrq(port);
if (size)
tty_flip_buffer_push(tport);
}
@@ -1015,10 +1001,8 @@ static int stm32_usart_startup(struct uart_port *port)
u32 val;
int ret;
- ret = request_threaded_irq(port->irq, stm32_usart_interrupt,
- stm32_usart_threaded_interrupt,
- IRQF_ONESHOT | IRQF_NO_SUSPEND,
- name, port);
+ ret = request_irq(port->irq, stm32_usart_interrupt,
+ IRQF_NO_SUSPEND, name, port);
if (ret)
return ret;
@@ -1601,13 +1585,6 @@ static int stm32_usart_of_dma_rx_probe(struct stm32_port *stm32port,
struct dma_slave_config config;
int ret;
- /*
- * Using DMA and threaded handler for the console could lead to
- * deadlocks.
- */
- if (uart_console(port))
- return -ENODEV;
-
stm32port->rx_buf = dma_alloc_coherent(dev, RX_BUF_L,
&stm32port->rx_dma_buf,
GFP_KERNEL);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
31352811e13d ("serial: 8250_dma: Fix DMA Rx completion race")
56dc5074cbec ("serial: 8250_dma: Rearm DMA Rx if more data is pending")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 31352811e13dc2313f101b890fd4b1ce760b5fe7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen(a)linux.intel.com>
Date: Mon, 30 Jan 2023 13:48:40 +0200
Subject: [PATCH] serial: 8250_dma: Fix DMA Rx completion race
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
__dma_rx_complete() is called from two places:
- Through the DMA completion callback dma_rx_complete()
- From serial8250_rx_dma_flush() after IIR_RLSI or IIR_RX_TIMEOUT
The former does not hold port's lock during __dma_rx_complete() which
allows these two to race and potentially insert the same data twice.
Extend port's lock coverage in dma_rx_complete() to prevent the race
and check if the DMA Rx is still pending completion before calling
into __dma_rx_complete().
Reported-by: Gilles BULOZ <gilles.buloz(a)kontron.com>
Tested-by: Gilles BULOZ <gilles.buloz(a)kontron.com>
Fixes: 9ee4b83e51f7 ("serial: 8250: Add support for dmaengine")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Link: https://lore.kernel.org/r/20230130114841.25749-2-ilpo.jarvinen@linux.intel.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c
index 37d6af2ec427..5594883a96f8 100644
--- a/drivers/tty/serial/8250/8250_dma.c
+++ b/drivers/tty/serial/8250/8250_dma.c
@@ -62,9 +62,14 @@ static void dma_rx_complete(void *param)
struct uart_8250_dma *dma = p->dma;
unsigned long flags;
- __dma_rx_complete(p);
-
spin_lock_irqsave(&p->port.lock, flags);
+ if (dma->rx_running)
+ __dma_rx_complete(p);
+
+ /*
+ * Cannot be combined with the previous check because __dma_rx_complete()
+ * changes dma->rx_running.
+ */
if (!dma->rx_running && (serial_lsr_in(p) & UART_LSR_DR))
p->dma->rx_dma(p);
spin_unlock_irqrestore(&p->port.lock, flags);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
31352811e13d ("serial: 8250_dma: Fix DMA Rx completion race")
56dc5074cbec ("serial: 8250_dma: Rearm DMA Rx if more data is pending")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 31352811e13dc2313f101b890fd4b1ce760b5fe7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen(a)linux.intel.com>
Date: Mon, 30 Jan 2023 13:48:40 +0200
Subject: [PATCH] serial: 8250_dma: Fix DMA Rx completion race
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
__dma_rx_complete() is called from two places:
- Through the DMA completion callback dma_rx_complete()
- From serial8250_rx_dma_flush() after IIR_RLSI or IIR_RX_TIMEOUT
The former does not hold port's lock during __dma_rx_complete() which
allows these two to race and potentially insert the same data twice.
Extend port's lock coverage in dma_rx_complete() to prevent the race
and check if the DMA Rx is still pending completion before calling
into __dma_rx_complete().
Reported-by: Gilles BULOZ <gilles.buloz(a)kontron.com>
Tested-by: Gilles BULOZ <gilles.buloz(a)kontron.com>
Fixes: 9ee4b83e51f7 ("serial: 8250: Add support for dmaengine")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Link: https://lore.kernel.org/r/20230130114841.25749-2-ilpo.jarvinen@linux.intel.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c
index 37d6af2ec427..5594883a96f8 100644
--- a/drivers/tty/serial/8250/8250_dma.c
+++ b/drivers/tty/serial/8250/8250_dma.c
@@ -62,9 +62,14 @@ static void dma_rx_complete(void *param)
struct uart_8250_dma *dma = p->dma;
unsigned long flags;
- __dma_rx_complete(p);
-
spin_lock_irqsave(&p->port.lock, flags);
+ if (dma->rx_running)
+ __dma_rx_complete(p);
+
+ /*
+ * Cannot be combined with the previous check because __dma_rx_complete()
+ * changes dma->rx_running.
+ */
if (!dma->rx_running && (serial_lsr_in(p) & UART_LSR_DR))
p->dma->rx_dma(p);
spin_unlock_irqrestore(&p->port.lock, flags);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
31352811e13d ("serial: 8250_dma: Fix DMA Rx completion race")
56dc5074cbec ("serial: 8250_dma: Rearm DMA Rx if more data is pending")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 31352811e13dc2313f101b890fd4b1ce760b5fe7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen(a)linux.intel.com>
Date: Mon, 30 Jan 2023 13:48:40 +0200
Subject: [PATCH] serial: 8250_dma: Fix DMA Rx completion race
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
__dma_rx_complete() is called from two places:
- Through the DMA completion callback dma_rx_complete()
- From serial8250_rx_dma_flush() after IIR_RLSI or IIR_RX_TIMEOUT
The former does not hold port's lock during __dma_rx_complete() which
allows these two to race and potentially insert the same data twice.
Extend port's lock coverage in dma_rx_complete() to prevent the race
and check if the DMA Rx is still pending completion before calling
into __dma_rx_complete().
Reported-by: Gilles BULOZ <gilles.buloz(a)kontron.com>
Tested-by: Gilles BULOZ <gilles.buloz(a)kontron.com>
Fixes: 9ee4b83e51f7 ("serial: 8250: Add support for dmaengine")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Link: https://lore.kernel.org/r/20230130114841.25749-2-ilpo.jarvinen@linux.intel.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c
index 37d6af2ec427..5594883a96f8 100644
--- a/drivers/tty/serial/8250/8250_dma.c
+++ b/drivers/tty/serial/8250/8250_dma.c
@@ -62,9 +62,14 @@ static void dma_rx_complete(void *param)
struct uart_8250_dma *dma = p->dma;
unsigned long flags;
- __dma_rx_complete(p);
-
spin_lock_irqsave(&p->port.lock, flags);
+ if (dma->rx_running)
+ __dma_rx_complete(p);
+
+ /*
+ * Cannot be combined with the previous check because __dma_rx_complete()
+ * changes dma->rx_running.
+ */
if (!dma->rx_running && (serial_lsr_in(p) & UART_LSR_DR))
p->dma->rx_dma(p);
spin_unlock_irqrestore(&p->port.lock, flags);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
31352811e13d ("serial: 8250_dma: Fix DMA Rx completion race")
56dc5074cbec ("serial: 8250_dma: Rearm DMA Rx if more data is pending")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 31352811e13dc2313f101b890fd4b1ce760b5fe7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen(a)linux.intel.com>
Date: Mon, 30 Jan 2023 13:48:40 +0200
Subject: [PATCH] serial: 8250_dma: Fix DMA Rx completion race
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
__dma_rx_complete() is called from two places:
- Through the DMA completion callback dma_rx_complete()
- From serial8250_rx_dma_flush() after IIR_RLSI or IIR_RX_TIMEOUT
The former does not hold port's lock during __dma_rx_complete() which
allows these two to race and potentially insert the same data twice.
Extend port's lock coverage in dma_rx_complete() to prevent the race
and check if the DMA Rx is still pending completion before calling
into __dma_rx_complete().
Reported-by: Gilles BULOZ <gilles.buloz(a)kontron.com>
Tested-by: Gilles BULOZ <gilles.buloz(a)kontron.com>
Fixes: 9ee4b83e51f7 ("serial: 8250: Add support for dmaengine")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Link: https://lore.kernel.org/r/20230130114841.25749-2-ilpo.jarvinen@linux.intel.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c
index 37d6af2ec427..5594883a96f8 100644
--- a/drivers/tty/serial/8250/8250_dma.c
+++ b/drivers/tty/serial/8250/8250_dma.c
@@ -62,9 +62,14 @@ static void dma_rx_complete(void *param)
struct uart_8250_dma *dma = p->dma;
unsigned long flags;
- __dma_rx_complete(p);
-
spin_lock_irqsave(&p->port.lock, flags);
+ if (dma->rx_running)
+ __dma_rx_complete(p);
+
+ /*
+ * Cannot be combined with the previous check because __dma_rx_complete()
+ * changes dma->rx_running.
+ */
if (!dma->rx_running && (serial_lsr_in(p) & UART_LSR_DR))
p->dma->rx_dma(p);
spin_unlock_irqrestore(&p->port.lock, flags);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
31352811e13d ("serial: 8250_dma: Fix DMA Rx completion race")
56dc5074cbec ("serial: 8250_dma: Rearm DMA Rx if more data is pending")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 31352811e13dc2313f101b890fd4b1ce760b5fe7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen(a)linux.intel.com>
Date: Mon, 30 Jan 2023 13:48:40 +0200
Subject: [PATCH] serial: 8250_dma: Fix DMA Rx completion race
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
__dma_rx_complete() is called from two places:
- Through the DMA completion callback dma_rx_complete()
- From serial8250_rx_dma_flush() after IIR_RLSI or IIR_RX_TIMEOUT
The former does not hold port's lock during __dma_rx_complete() which
allows these two to race and potentially insert the same data twice.
Extend port's lock coverage in dma_rx_complete() to prevent the race
and check if the DMA Rx is still pending completion before calling
into __dma_rx_complete().
Reported-by: Gilles BULOZ <gilles.buloz(a)kontron.com>
Tested-by: Gilles BULOZ <gilles.buloz(a)kontron.com>
Fixes: 9ee4b83e51f7 ("serial: 8250: Add support for dmaengine")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Link: https://lore.kernel.org/r/20230130114841.25749-2-ilpo.jarvinen@linux.intel.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c
index 37d6af2ec427..5594883a96f8 100644
--- a/drivers/tty/serial/8250/8250_dma.c
+++ b/drivers/tty/serial/8250/8250_dma.c
@@ -62,9 +62,14 @@ static void dma_rx_complete(void *param)
struct uart_8250_dma *dma = p->dma;
unsigned long flags;
- __dma_rx_complete(p);
-
spin_lock_irqsave(&p->port.lock, flags);
+ if (dma->rx_running)
+ __dma_rx_complete(p);
+
+ /*
+ * Cannot be combined with the previous check because __dma_rx_complete()
+ * changes dma->rx_running.
+ */
if (!dma->rx_running && (serial_lsr_in(p) & UART_LSR_DR))
p->dma->rx_dma(p);
spin_unlock_irqrestore(&p->port.lock, flags);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
31352811e13d ("serial: 8250_dma: Fix DMA Rx completion race")
56dc5074cbec ("serial: 8250_dma: Rearm DMA Rx if more data is pending")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 31352811e13dc2313f101b890fd4b1ce760b5fe7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen(a)linux.intel.com>
Date: Mon, 30 Jan 2023 13:48:40 +0200
Subject: [PATCH] serial: 8250_dma: Fix DMA Rx completion race
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
__dma_rx_complete() is called from two places:
- Through the DMA completion callback dma_rx_complete()
- From serial8250_rx_dma_flush() after IIR_RLSI or IIR_RX_TIMEOUT
The former does not hold port's lock during __dma_rx_complete() which
allows these two to race and potentially insert the same data twice.
Extend port's lock coverage in dma_rx_complete() to prevent the race
and check if the DMA Rx is still pending completion before calling
into __dma_rx_complete().
Reported-by: Gilles BULOZ <gilles.buloz(a)kontron.com>
Tested-by: Gilles BULOZ <gilles.buloz(a)kontron.com>
Fixes: 9ee4b83e51f7 ("serial: 8250: Add support for dmaengine")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Link: https://lore.kernel.org/r/20230130114841.25749-2-ilpo.jarvinen@linux.intel.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c
index 37d6af2ec427..5594883a96f8 100644
--- a/drivers/tty/serial/8250/8250_dma.c
+++ b/drivers/tty/serial/8250/8250_dma.c
@@ -62,9 +62,14 @@ static void dma_rx_complete(void *param)
struct uart_8250_dma *dma = p->dma;
unsigned long flags;
- __dma_rx_complete(p);
-
spin_lock_irqsave(&p->port.lock, flags);
+ if (dma->rx_running)
+ __dma_rx_complete(p);
+
+ /*
+ * Cannot be combined with the previous check because __dma_rx_complete()
+ * changes dma->rx_running.
+ */
if (!dma->rx_running && (serial_lsr_in(p) & UART_LSR_DR))
p->dma->rx_dma(p);
spin_unlock_irqrestore(&p->port.lock, flags);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
57e9af7831dc ("serial: 8250_dma: Fix DMA Rx rearm race")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 57e9af7831dcf211c5c689c2a6f209f4abdf0bce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen(a)linux.intel.com>
Date: Mon, 30 Jan 2023 13:48:41 +0200
Subject: [PATCH] serial: 8250_dma: Fix DMA Rx rearm race
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
As DMA Rx can be completed from two places, it is possible that DMA Rx
completes before DMA completion callback had a chance to complete it.
Once the previous DMA Rx has been completed, a new one can be started
on the next UART interrupt. The following race is possible
(uart_unlock_and_check_sysrq_irqrestore() replaced with
spin_unlock_irqrestore() for simplicity/clarity):
CPU0 CPU1
dma_rx_complete()
serial8250_handle_irq()
spin_lock_irqsave(&port->lock)
handle_rx_dma()
serial8250_rx_dma_flush()
__dma_rx_complete()
dma->rx_running = 0
// Complete DMA Rx
spin_unlock_irqrestore(&port->lock)
serial8250_handle_irq()
spin_lock_irqsave(&port->lock)
handle_rx_dma()
serial8250_rx_dma()
dma->rx_running = 1
// Setup a new DMA Rx
spin_unlock_irqrestore(&port->lock)
spin_lock_irqsave(&port->lock)
// sees dma->rx_running = 1
__dma_rx_complete()
dma->rx_running = 0
// Incorrectly complete
// running DMA Rx
This race seems somewhat theoretical to occur for real but handle it
correctly regardless. Check what is the DMA status before complething
anything in __dma_rx_complete().
Reported-by: Gilles BULOZ <gilles.buloz(a)kontron.com>
Tested-by: Gilles BULOZ <gilles.buloz(a)kontron.com>
Fixes: 9ee4b83e51f7 ("serial: 8250: Add support for dmaengine")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Link: https://lore.kernel.org/r/20230130114841.25749-3-ilpo.jarvinen@linux.intel.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c
index 5594883a96f8..7fa66501792d 100644
--- a/drivers/tty/serial/8250/8250_dma.c
+++ b/drivers/tty/serial/8250/8250_dma.c
@@ -43,15 +43,23 @@ static void __dma_rx_complete(struct uart_8250_port *p)
struct uart_8250_dma *dma = p->dma;
struct tty_port *tty_port = &p->port.state->port;
struct dma_tx_state state;
+ enum dma_status dma_status;
int count;
- dma->rx_running = 0;
- dmaengine_tx_status(dma->rxchan, dma->rx_cookie, &state);
+ /*
+ * New DMA Rx can be started during the completion handler before it
+ * could acquire port's lock and it might still be ongoing. Don't to
+ * anything in such case.
+ */
+ dma_status = dmaengine_tx_status(dma->rxchan, dma->rx_cookie, &state);
+ if (dma_status == DMA_IN_PROGRESS)
+ return;
count = dma->rx_size - state.residue;
tty_insert_flip_string(tty_port, dma->rx_buf, count);
p->port.icount.rx += count;
+ dma->rx_running = 0;
tty_flip_buffer_push(tty_port);
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
57e9af7831dc ("serial: 8250_dma: Fix DMA Rx rearm race")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 57e9af7831dcf211c5c689c2a6f209f4abdf0bce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen(a)linux.intel.com>
Date: Mon, 30 Jan 2023 13:48:41 +0200
Subject: [PATCH] serial: 8250_dma: Fix DMA Rx rearm race
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
As DMA Rx can be completed from two places, it is possible that DMA Rx
completes before DMA completion callback had a chance to complete it.
Once the previous DMA Rx has been completed, a new one can be started
on the next UART interrupt. The following race is possible
(uart_unlock_and_check_sysrq_irqrestore() replaced with
spin_unlock_irqrestore() for simplicity/clarity):
CPU0 CPU1
dma_rx_complete()
serial8250_handle_irq()
spin_lock_irqsave(&port->lock)
handle_rx_dma()
serial8250_rx_dma_flush()
__dma_rx_complete()
dma->rx_running = 0
// Complete DMA Rx
spin_unlock_irqrestore(&port->lock)
serial8250_handle_irq()
spin_lock_irqsave(&port->lock)
handle_rx_dma()
serial8250_rx_dma()
dma->rx_running = 1
// Setup a new DMA Rx
spin_unlock_irqrestore(&port->lock)
spin_lock_irqsave(&port->lock)
// sees dma->rx_running = 1
__dma_rx_complete()
dma->rx_running = 0
// Incorrectly complete
// running DMA Rx
This race seems somewhat theoretical to occur for real but handle it
correctly regardless. Check what is the DMA status before complething
anything in __dma_rx_complete().
Reported-by: Gilles BULOZ <gilles.buloz(a)kontron.com>
Tested-by: Gilles BULOZ <gilles.buloz(a)kontron.com>
Fixes: 9ee4b83e51f7 ("serial: 8250: Add support for dmaengine")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Link: https://lore.kernel.org/r/20230130114841.25749-3-ilpo.jarvinen@linux.intel.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c
index 5594883a96f8..7fa66501792d 100644
--- a/drivers/tty/serial/8250/8250_dma.c
+++ b/drivers/tty/serial/8250/8250_dma.c
@@ -43,15 +43,23 @@ static void __dma_rx_complete(struct uart_8250_port *p)
struct uart_8250_dma *dma = p->dma;
struct tty_port *tty_port = &p->port.state->port;
struct dma_tx_state state;
+ enum dma_status dma_status;
int count;
- dma->rx_running = 0;
- dmaengine_tx_status(dma->rxchan, dma->rx_cookie, &state);
+ /*
+ * New DMA Rx can be started during the completion handler before it
+ * could acquire port's lock and it might still be ongoing. Don't to
+ * anything in such case.
+ */
+ dma_status = dmaengine_tx_status(dma->rxchan, dma->rx_cookie, &state);
+ if (dma_status == DMA_IN_PROGRESS)
+ return;
count = dma->rx_size - state.residue;
tty_insert_flip_string(tty_port, dma->rx_buf, count);
p->port.icount.rx += count;
+ dma->rx_running = 0;
tty_flip_buffer_push(tty_port);
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
57e9af7831dc ("serial: 8250_dma: Fix DMA Rx rearm race")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 57e9af7831dcf211c5c689c2a6f209f4abdf0bce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen(a)linux.intel.com>
Date: Mon, 30 Jan 2023 13:48:41 +0200
Subject: [PATCH] serial: 8250_dma: Fix DMA Rx rearm race
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
As DMA Rx can be completed from two places, it is possible that DMA Rx
completes before DMA completion callback had a chance to complete it.
Once the previous DMA Rx has been completed, a new one can be started
on the next UART interrupt. The following race is possible
(uart_unlock_and_check_sysrq_irqrestore() replaced with
spin_unlock_irqrestore() for simplicity/clarity):
CPU0 CPU1
dma_rx_complete()
serial8250_handle_irq()
spin_lock_irqsave(&port->lock)
handle_rx_dma()
serial8250_rx_dma_flush()
__dma_rx_complete()
dma->rx_running = 0
// Complete DMA Rx
spin_unlock_irqrestore(&port->lock)
serial8250_handle_irq()
spin_lock_irqsave(&port->lock)
handle_rx_dma()
serial8250_rx_dma()
dma->rx_running = 1
// Setup a new DMA Rx
spin_unlock_irqrestore(&port->lock)
spin_lock_irqsave(&port->lock)
// sees dma->rx_running = 1
__dma_rx_complete()
dma->rx_running = 0
// Incorrectly complete
// running DMA Rx
This race seems somewhat theoretical to occur for real but handle it
correctly regardless. Check what is the DMA status before complething
anything in __dma_rx_complete().
Reported-by: Gilles BULOZ <gilles.buloz(a)kontron.com>
Tested-by: Gilles BULOZ <gilles.buloz(a)kontron.com>
Fixes: 9ee4b83e51f7 ("serial: 8250: Add support for dmaengine")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Link: https://lore.kernel.org/r/20230130114841.25749-3-ilpo.jarvinen@linux.intel.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c
index 5594883a96f8..7fa66501792d 100644
--- a/drivers/tty/serial/8250/8250_dma.c
+++ b/drivers/tty/serial/8250/8250_dma.c
@@ -43,15 +43,23 @@ static void __dma_rx_complete(struct uart_8250_port *p)
struct uart_8250_dma *dma = p->dma;
struct tty_port *tty_port = &p->port.state->port;
struct dma_tx_state state;
+ enum dma_status dma_status;
int count;
- dma->rx_running = 0;
- dmaengine_tx_status(dma->rxchan, dma->rx_cookie, &state);
+ /*
+ * New DMA Rx can be started during the completion handler before it
+ * could acquire port's lock and it might still be ongoing. Don't to
+ * anything in such case.
+ */
+ dma_status = dmaengine_tx_status(dma->rxchan, dma->rx_cookie, &state);
+ if (dma_status == DMA_IN_PROGRESS)
+ return;
count = dma->rx_size - state.residue;
tty_insert_flip_string(tty_port, dma->rx_buf, count);
p->port.icount.rx += count;
+ dma->rx_running = 0;
tty_flip_buffer_push(tty_port);
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
57e9af7831dc ("serial: 8250_dma: Fix DMA Rx rearm race")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 57e9af7831dcf211c5c689c2a6f209f4abdf0bce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen(a)linux.intel.com>
Date: Mon, 30 Jan 2023 13:48:41 +0200
Subject: [PATCH] serial: 8250_dma: Fix DMA Rx rearm race
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
As DMA Rx can be completed from two places, it is possible that DMA Rx
completes before DMA completion callback had a chance to complete it.
Once the previous DMA Rx has been completed, a new one can be started
on the next UART interrupt. The following race is possible
(uart_unlock_and_check_sysrq_irqrestore() replaced with
spin_unlock_irqrestore() for simplicity/clarity):
CPU0 CPU1
dma_rx_complete()
serial8250_handle_irq()
spin_lock_irqsave(&port->lock)
handle_rx_dma()
serial8250_rx_dma_flush()
__dma_rx_complete()
dma->rx_running = 0
// Complete DMA Rx
spin_unlock_irqrestore(&port->lock)
serial8250_handle_irq()
spin_lock_irqsave(&port->lock)
handle_rx_dma()
serial8250_rx_dma()
dma->rx_running = 1
// Setup a new DMA Rx
spin_unlock_irqrestore(&port->lock)
spin_lock_irqsave(&port->lock)
// sees dma->rx_running = 1
__dma_rx_complete()
dma->rx_running = 0
// Incorrectly complete
// running DMA Rx
This race seems somewhat theoretical to occur for real but handle it
correctly regardless. Check what is the DMA status before complething
anything in __dma_rx_complete().
Reported-by: Gilles BULOZ <gilles.buloz(a)kontron.com>
Tested-by: Gilles BULOZ <gilles.buloz(a)kontron.com>
Fixes: 9ee4b83e51f7 ("serial: 8250: Add support for dmaengine")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Link: https://lore.kernel.org/r/20230130114841.25749-3-ilpo.jarvinen@linux.intel.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c
index 5594883a96f8..7fa66501792d 100644
--- a/drivers/tty/serial/8250/8250_dma.c
+++ b/drivers/tty/serial/8250/8250_dma.c
@@ -43,15 +43,23 @@ static void __dma_rx_complete(struct uart_8250_port *p)
struct uart_8250_dma *dma = p->dma;
struct tty_port *tty_port = &p->port.state->port;
struct dma_tx_state state;
+ enum dma_status dma_status;
int count;
- dma->rx_running = 0;
- dmaengine_tx_status(dma->rxchan, dma->rx_cookie, &state);
+ /*
+ * New DMA Rx can be started during the completion handler before it
+ * could acquire port's lock and it might still be ongoing. Don't to
+ * anything in such case.
+ */
+ dma_status = dmaengine_tx_status(dma->rxchan, dma->rx_cookie, &state);
+ if (dma_status == DMA_IN_PROGRESS)
+ return;
count = dma->rx_size - state.residue;
tty_insert_flip_string(tty_port, dma->rx_buf, count);
p->port.icount.rx += count;
+ dma->rx_running = 0;
tty_flip_buffer_push(tty_port);
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
57e9af7831dc ("serial: 8250_dma: Fix DMA Rx rearm race")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 57e9af7831dcf211c5c689c2a6f209f4abdf0bce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen(a)linux.intel.com>
Date: Mon, 30 Jan 2023 13:48:41 +0200
Subject: [PATCH] serial: 8250_dma: Fix DMA Rx rearm race
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
As DMA Rx can be completed from two places, it is possible that DMA Rx
completes before DMA completion callback had a chance to complete it.
Once the previous DMA Rx has been completed, a new one can be started
on the next UART interrupt. The following race is possible
(uart_unlock_and_check_sysrq_irqrestore() replaced with
spin_unlock_irqrestore() for simplicity/clarity):
CPU0 CPU1
dma_rx_complete()
serial8250_handle_irq()
spin_lock_irqsave(&port->lock)
handle_rx_dma()
serial8250_rx_dma_flush()
__dma_rx_complete()
dma->rx_running = 0
// Complete DMA Rx
spin_unlock_irqrestore(&port->lock)
serial8250_handle_irq()
spin_lock_irqsave(&port->lock)
handle_rx_dma()
serial8250_rx_dma()
dma->rx_running = 1
// Setup a new DMA Rx
spin_unlock_irqrestore(&port->lock)
spin_lock_irqsave(&port->lock)
// sees dma->rx_running = 1
__dma_rx_complete()
dma->rx_running = 0
// Incorrectly complete
// running DMA Rx
This race seems somewhat theoretical to occur for real but handle it
correctly regardless. Check what is the DMA status before complething
anything in __dma_rx_complete().
Reported-by: Gilles BULOZ <gilles.buloz(a)kontron.com>
Tested-by: Gilles BULOZ <gilles.buloz(a)kontron.com>
Fixes: 9ee4b83e51f7 ("serial: 8250: Add support for dmaengine")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Link: https://lore.kernel.org/r/20230130114841.25749-3-ilpo.jarvinen@linux.intel.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c
index 5594883a96f8..7fa66501792d 100644
--- a/drivers/tty/serial/8250/8250_dma.c
+++ b/drivers/tty/serial/8250/8250_dma.c
@@ -43,15 +43,23 @@ static void __dma_rx_complete(struct uart_8250_port *p)
struct uart_8250_dma *dma = p->dma;
struct tty_port *tty_port = &p->port.state->port;
struct dma_tx_state state;
+ enum dma_status dma_status;
int count;
- dma->rx_running = 0;
- dmaengine_tx_status(dma->rxchan, dma->rx_cookie, &state);
+ /*
+ * New DMA Rx can be started during the completion handler before it
+ * could acquire port's lock and it might still be ongoing. Don't to
+ * anything in such case.
+ */
+ dma_status = dmaengine_tx_status(dma->rxchan, dma->rx_cookie, &state);
+ if (dma_status == DMA_IN_PROGRESS)
+ return;
count = dma->rx_size - state.residue;
tty_insert_flip_string(tty_port, dma->rx_buf, count);
p->port.icount.rx += count;
+ dma->rx_running = 0;
tty_flip_buffer_push(tty_port);
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
57e9af7831dc ("serial: 8250_dma: Fix DMA Rx rearm race")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 57e9af7831dcf211c5c689c2a6f209f4abdf0bce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen(a)linux.intel.com>
Date: Mon, 30 Jan 2023 13:48:41 +0200
Subject: [PATCH] serial: 8250_dma: Fix DMA Rx rearm race
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
As DMA Rx can be completed from two places, it is possible that DMA Rx
completes before DMA completion callback had a chance to complete it.
Once the previous DMA Rx has been completed, a new one can be started
on the next UART interrupt. The following race is possible
(uart_unlock_and_check_sysrq_irqrestore() replaced with
spin_unlock_irqrestore() for simplicity/clarity):
CPU0 CPU1
dma_rx_complete()
serial8250_handle_irq()
spin_lock_irqsave(&port->lock)
handle_rx_dma()
serial8250_rx_dma_flush()
__dma_rx_complete()
dma->rx_running = 0
// Complete DMA Rx
spin_unlock_irqrestore(&port->lock)
serial8250_handle_irq()
spin_lock_irqsave(&port->lock)
handle_rx_dma()
serial8250_rx_dma()
dma->rx_running = 1
// Setup a new DMA Rx
spin_unlock_irqrestore(&port->lock)
spin_lock_irqsave(&port->lock)
// sees dma->rx_running = 1
__dma_rx_complete()
dma->rx_running = 0
// Incorrectly complete
// running DMA Rx
This race seems somewhat theoretical to occur for real but handle it
correctly regardless. Check what is the DMA status before complething
anything in __dma_rx_complete().
Reported-by: Gilles BULOZ <gilles.buloz(a)kontron.com>
Tested-by: Gilles BULOZ <gilles.buloz(a)kontron.com>
Fixes: 9ee4b83e51f7 ("serial: 8250: Add support for dmaengine")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Link: https://lore.kernel.org/r/20230130114841.25749-3-ilpo.jarvinen@linux.intel.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c
index 5594883a96f8..7fa66501792d 100644
--- a/drivers/tty/serial/8250/8250_dma.c
+++ b/drivers/tty/serial/8250/8250_dma.c
@@ -43,15 +43,23 @@ static void __dma_rx_complete(struct uart_8250_port *p)
struct uart_8250_dma *dma = p->dma;
struct tty_port *tty_port = &p->port.state->port;
struct dma_tx_state state;
+ enum dma_status dma_status;
int count;
- dma->rx_running = 0;
- dmaengine_tx_status(dma->rxchan, dma->rx_cookie, &state);
+ /*
+ * New DMA Rx can be started during the completion handler before it
+ * could acquire port's lock and it might still be ongoing. Don't to
+ * anything in such case.
+ */
+ dma_status = dmaengine_tx_status(dma->rxchan, dma->rx_cookie, &state);
+ if (dma_status == DMA_IN_PROGRESS)
+ return;
count = dma->rx_size - state.residue;
tty_insert_flip_string(tty_port, dma->rx_buf, count);
p->port.icount.rx += count;
+ dma->rx_running = 0;
tty_flip_buffer_push(tty_port);
}
The patch below does not apply to the all-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 57e9af7831dcf211c5c689c2a6f209f4abdf0bce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen(a)linux.intel.com>
Date: Mon, 30 Jan 2023 13:48:41 +0200
Subject: [PATCH] serial: 8250_dma: Fix DMA Rx rearm race
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
As DMA Rx can be completed from two places, it is possible that DMA Rx
completes before DMA completion callback had a chance to complete it.
Once the previous DMA Rx has been completed, a new one can be started
on the next UART interrupt. The following race is possible
(uart_unlock_and_check_sysrq_irqrestore() replaced with
spin_unlock_irqrestore() for simplicity/clarity):
CPU0 CPU1
dma_rx_complete()
serial8250_handle_irq()
spin_lock_irqsave(&port->lock)
handle_rx_dma()
serial8250_rx_dma_flush()
__dma_rx_complete()
dma->rx_running = 0
// Complete DMA Rx
spin_unlock_irqrestore(&port->lock)
serial8250_handle_irq()
spin_lock_irqsave(&port->lock)
handle_rx_dma()
serial8250_rx_dma()
dma->rx_running = 1
// Setup a new DMA Rx
spin_unlock_irqrestore(&port->lock)
spin_lock_irqsave(&port->lock)
// sees dma->rx_running = 1
__dma_rx_complete()
dma->rx_running = 0
// Incorrectly complete
// running DMA Rx
This race seems somewhat theoretical to occur for real but handle it
correctly regardless. Check what is the DMA status before complething
anything in __dma_rx_complete().
Reported-by: Gilles BULOZ <gilles.buloz(a)kontron.com>
Tested-by: Gilles BULOZ <gilles.buloz(a)kontron.com>
Fixes: 9ee4b83e51f7 ("serial: 8250: Add support for dmaengine")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Link: https://lore.kernel.org/r/20230130114841.25749-3-ilpo.jarvinen@linux.intel.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c
index 5594883a96f8..7fa66501792d 100644
--- a/drivers/tty/serial/8250/8250_dma.c
+++ b/drivers/tty/serial/8250/8250_dma.c
@@ -43,15 +43,23 @@ static void __dma_rx_complete(struct uart_8250_port *p)
struct uart_8250_dma *dma = p->dma;
struct tty_port *tty_port = &p->port.state->port;
struct dma_tx_state state;
+ enum dma_status dma_status;
int count;
- dma->rx_running = 0;
- dmaengine_tx_status(dma->rxchan, dma->rx_cookie, &state);
+ /*
+ * New DMA Rx can be started during the completion handler before it
+ * could acquire port's lock and it might still be ongoing. Don't to
+ * anything in such case.
+ */
+ dma_status = dmaengine_tx_status(dma->rxchan, dma->rx_cookie, &state);
+ if (dma_status == DMA_IN_PROGRESS)
+ return;
count = dma->rx_size - state.residue;
tty_insert_flip_string(tty_port, dma->rx_buf, count);
p->port.icount.rx += count;
+ dma->rx_running = 0;
tty_flip_buffer_push(tty_port);
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
73bdf65ea748 ("migrate: hugetlb: check for hugetlb shared PMD in node migration")
7ce82f4c3f3e ("mm/migration: return errno when isolate_huge_page failed")
1b7f7e58decc ("mm/gup: Convert check_and_migrate_movable_pages() to use a folio")
f9f38f78c5d5 ("mm: refactor check_and_migrate_movable_pages")
5ac95884a784 ("mm/migrate: enable returning precise migrate_pages() success count")
c5b5a3dd2c1f ("mm: thp: refactor NUMA fault handling")
5db4f15c4fd7 ("mm: memory: add orig_pmd to struct vm_fault")
8f34f1eac382 ("mm/userfaultfd: fix uffd-wp special cases for fork()")
25182f05ffed ("mm,hwpoison: fix race with hugetlb page allocation")
f68749ec342b ("mm/gup: longterm pin migration cleanup")
d1e153fea2a8 ("mm/gup: migrate pinned pages out of movable zone")
1a08ae36cf8b ("mm cma: rename PF_MEMALLOC_NOCMA to PF_MEMALLOC_PIN")
6e7f34ebb8d2 ("mm/gup: check for isolation errors")
f0f4463837da ("mm/gup: return an error on migration failure")
83c02c23d074 ("mm/gup: check every subpage of a compound page during isolation")
c991ffef7bce ("mm/gup: don't pin migrated cma pages in movable zone")
7ee820ee7238 ("Revert "mm: migrate: skip shared exec THP for NUMA balancing"")
ae37c7ff79f1 ("mm: make alloc_contig_range handle in-use hugetlb pages")
369fa227c219 ("mm: make alloc_contig_range handle free hugetlb pages")
c2ad7a1ffeaf ("mm,compaction: let isolate_migratepages_{range,block} return error codes")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 73bdf65ea74857d7fb2ec3067a3cec0e261b1462 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz(a)oracle.com>
Date: Thu, 26 Jan 2023 14:27:21 -0800
Subject: [PATCH] migrate: hugetlb: check for hugetlb shared PMD in node
migration
migrate_pages/mempolicy semantics state that CAP_SYS_NICE is required to
move pages shared with another process to a different node. page_mapcount
> 1 is being used to determine if a hugetlb page is shared. However, a
hugetlb page will have a mapcount of 1 if mapped by multiple processes via
a shared PMD. As a result, hugetlb pages shared by multiple processes and
mapped with a shared PMD can be moved by a process without CAP_SYS_NICE.
To fix, check for a shared PMD if mapcount is 1. If a shared PMD is found
consider the page shared.
Link: https://lkml.kernel.org/r/20230126222721.222195-3-mike.kravetz@oracle.com
Fixes: e2d8cf405525 ("migrate: add hugepage migration code to migrate_pages()")
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Acked-by: Peter Xu <peterx(a)redhat.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: James Houghton <jthoughton(a)google.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Muchun Song <songmuchun(a)bytedance.com>
Cc: Naoya Horiguchi <naoya.horiguchi(a)linux.dev>
Cc: Vishal Moola (Oracle) <vishal.moola(a)gmail.com>
Cc: Yang Shi <shy828301(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 02c8a712282f..f940395667c8 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -600,7 +600,8 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
/* With MPOL_MF_MOVE, we migrate only unshared hugepage. */
if (flags & (MPOL_MF_MOVE_ALL) ||
- (flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) {
+ (flags & MPOL_MF_MOVE && page_mapcount(page) == 1 &&
+ !hugetlb_pmd_shared(pte))) {
if (isolate_hugetlb(page, qp->pagelist) &&
(flags & MPOL_MF_STRICT))
/*
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
73bdf65ea748 ("migrate: hugetlb: check for hugetlb shared PMD in node migration")
7ce82f4c3f3e ("mm/migration: return errno when isolate_huge_page failed")
1b7f7e58decc ("mm/gup: Convert check_and_migrate_movable_pages() to use a folio")
f9f38f78c5d5 ("mm: refactor check_and_migrate_movable_pages")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 73bdf65ea74857d7fb2ec3067a3cec0e261b1462 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz(a)oracle.com>
Date: Thu, 26 Jan 2023 14:27:21 -0800
Subject: [PATCH] migrate: hugetlb: check for hugetlb shared PMD in node
migration
migrate_pages/mempolicy semantics state that CAP_SYS_NICE is required to
move pages shared with another process to a different node. page_mapcount
> 1 is being used to determine if a hugetlb page is shared. However, a
hugetlb page will have a mapcount of 1 if mapped by multiple processes via
a shared PMD. As a result, hugetlb pages shared by multiple processes and
mapped with a shared PMD can be moved by a process without CAP_SYS_NICE.
To fix, check for a shared PMD if mapcount is 1. If a shared PMD is found
consider the page shared.
Link: https://lkml.kernel.org/r/20230126222721.222195-3-mike.kravetz@oracle.com
Fixes: e2d8cf405525 ("migrate: add hugepage migration code to migrate_pages()")
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Acked-by: Peter Xu <peterx(a)redhat.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: James Houghton <jthoughton(a)google.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Muchun Song <songmuchun(a)bytedance.com>
Cc: Naoya Horiguchi <naoya.horiguchi(a)linux.dev>
Cc: Vishal Moola (Oracle) <vishal.moola(a)gmail.com>
Cc: Yang Shi <shy828301(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 02c8a712282f..f940395667c8 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -600,7 +600,8 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
/* With MPOL_MF_MOVE, we migrate only unshared hugepage. */
if (flags & (MPOL_MF_MOVE_ALL) ||
- (flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) {
+ (flags & MPOL_MF_MOVE && page_mapcount(page) == 1 &&
+ !hugetlb_pmd_shared(pte))) {
if (isolate_hugetlb(page, qp->pagelist) &&
(flags & MPOL_MF_STRICT))
/*
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
023f47a8250c ("mm/khugepaged: fix ->anon_vma race")
34488399fa08 ("mm/madvise: add file and shmem support to MADV_COLLAPSE")
58ac9a8993a1 ("mm/khugepaged: attempt to map file/shmem-backed pte-mapped THPs by pmds")
780a4b6fb865 ("mm/khugepaged: check compound_order() in collapse_pte_mapped_thp()")
b26e27015ec9 ("mm: thp: convert to use common struct mm_slot")
685405020b9f ("mm/khugepaged: stop using vma linked list")
7d2c4385c341 ("mm/khugepaged: rename prefix of shared collapse functions")
7d8faaf15545 ("mm/madvise: introduce MADV_COLLAPSE sync hugepage collapse")
507228044236 ("mm/khugepaged: record SCAN_PMD_MAPPED when scan_pmd() finds hugepage")
a7f4e6e4c47c ("mm/thp: add flag to enforce sysfs THP in hugepage_vma_check()")
50ad2f24b3b4 ("mm/khugepaged: propagate enum scan_result codes back to callers")
9710a78ab2ae ("mm/khugepaged: dedup and simplify hugepage alloc and charging")
34d6b470ab9c ("mm/khugepaged: add struct collapse_control")
c6a7f445a272 ("mm: khugepaged: don't carry huge page to the next loop for !CONFIG_NUMA")
1064026bab9f ("mm: khugepaged: reorg some khugepaged helpers")
7da4e2cb8b1f ("mm: thp: kill __transhuge_page_enabled()")
9fec51689ff6 ("mm: thp: kill transparent_hugepage_active()")
f707fa493784 ("mm: khugepaged: better comments for anon vma check in hugepage_vma_revalidate")
4fa6893faeaa ("mm: thp: consolidate vma size check to transhuge_vma_suitable")
66137fb34a4b ("mm: khugepaged: check THP flag in hugepage_vma_check()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 023f47a8250c6bdb4aebe744db4bf7f73414028b Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Wed, 11 Jan 2023 14:33:51 +0100
Subject: [PATCH] mm/khugepaged: fix ->anon_vma race
If an ->anon_vma is attached to the VMA, collapse_and_free_pmd() requires
it to be locked.
Page table traversal is allowed under any one of the mmap lock, the
anon_vma lock (if the VMA is associated with an anon_vma), and the
mapping lock (if the VMA is associated with a mapping); and so to be
able to remove page tables, we must hold all three of them.
retract_page_tables() bails out if an ->anon_vma is attached, but does
this check before holding the mmap lock (as the comment above the check
explains).
If we racily merged an existing ->anon_vma (shared with a child
process) from a neighboring VMA, subsequent rmap traversals on pages
belonging to the child will be able to see the page tables that we are
concurrently removing while assuming that nothing else can access them.
Repeat the ->anon_vma check once we hold the mmap lock to ensure that
there really is no concurrent page table access.
Hitting this bug causes a lockdep warning in collapse_and_free_pmd(),
in the line "lockdep_assert_held_write(&vma->anon_vma->root->rwsem)".
It can also lead to use-after-free access.
Link: https://lore.kernel.org/linux-mm/CAG48ez3434wZBKFFbdx4M9j6eUwSUVPd4dxhzW_k_…
Link: https://lkml.kernel.org/r/20230111133351.807024-1-jannh@google.com
Fixes: f3f0e1d2150b ("khugepaged: add support of collapse for tmpfs/shmem pages")
Signed-off-by: Jann Horn <jannh(a)google.com>
Reported-by: Zach O'Keefe <zokeefe(a)google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov(a)intel.linux.com>
Reviewed-by: Yang Shi <shy828301(a)gmail.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 79be13133322..935aa8b71d1c 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1642,7 +1642,7 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff,
* has higher cost too. It would also probably require locking
* the anon_vma.
*/
- if (vma->anon_vma) {
+ if (READ_ONCE(vma->anon_vma)) {
result = SCAN_PAGE_ANON;
goto next;
}
@@ -1670,6 +1670,18 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff,
result = SCAN_PTE_MAPPED_HUGEPAGE;
if ((cc->is_khugepaged || is_target) &&
mmap_write_trylock(mm)) {
+ /*
+ * Re-check whether we have an ->anon_vma, because
+ * collapse_and_free_pmd() requires that either no
+ * ->anon_vma exists or the anon_vma is locked.
+ * We already checked ->anon_vma above, but that check
+ * is racy because ->anon_vma can be populated under the
+ * mmap lock in read mode.
+ */
+ if (vma->anon_vma) {
+ result = SCAN_PAGE_ANON;
+ goto unlock_next;
+ }
/*
* When a vma is registered with uffd-wp, we can't
* recycle the pmd pgtable because there can be pte
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
023f47a8250c ("mm/khugepaged: fix ->anon_vma race")
34488399fa08 ("mm/madvise: add file and shmem support to MADV_COLLAPSE")
58ac9a8993a1 ("mm/khugepaged: attempt to map file/shmem-backed pte-mapped THPs by pmds")
780a4b6fb865 ("mm/khugepaged: check compound_order() in collapse_pte_mapped_thp()")
b26e27015ec9 ("mm: thp: convert to use common struct mm_slot")
685405020b9f ("mm/khugepaged: stop using vma linked list")
7d2c4385c341 ("mm/khugepaged: rename prefix of shared collapse functions")
7d8faaf15545 ("mm/madvise: introduce MADV_COLLAPSE sync hugepage collapse")
507228044236 ("mm/khugepaged: record SCAN_PMD_MAPPED when scan_pmd() finds hugepage")
a7f4e6e4c47c ("mm/thp: add flag to enforce sysfs THP in hugepage_vma_check()")
50ad2f24b3b4 ("mm/khugepaged: propagate enum scan_result codes back to callers")
9710a78ab2ae ("mm/khugepaged: dedup and simplify hugepage alloc and charging")
34d6b470ab9c ("mm/khugepaged: add struct collapse_control")
c6a7f445a272 ("mm: khugepaged: don't carry huge page to the next loop for !CONFIG_NUMA")
1064026bab9f ("mm: khugepaged: reorg some khugepaged helpers")
7da4e2cb8b1f ("mm: thp: kill __transhuge_page_enabled()")
9fec51689ff6 ("mm: thp: kill transparent_hugepage_active()")
f707fa493784 ("mm: khugepaged: better comments for anon vma check in hugepage_vma_revalidate")
4fa6893faeaa ("mm: thp: consolidate vma size check to transhuge_vma_suitable")
66137fb34a4b ("mm: khugepaged: check THP flag in hugepage_vma_check()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 023f47a8250c6bdb4aebe744db4bf7f73414028b Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Wed, 11 Jan 2023 14:33:51 +0100
Subject: [PATCH] mm/khugepaged: fix ->anon_vma race
If an ->anon_vma is attached to the VMA, collapse_and_free_pmd() requires
it to be locked.
Page table traversal is allowed under any one of the mmap lock, the
anon_vma lock (if the VMA is associated with an anon_vma), and the
mapping lock (if the VMA is associated with a mapping); and so to be
able to remove page tables, we must hold all three of them.
retract_page_tables() bails out if an ->anon_vma is attached, but does
this check before holding the mmap lock (as the comment above the check
explains).
If we racily merged an existing ->anon_vma (shared with a child
process) from a neighboring VMA, subsequent rmap traversals on pages
belonging to the child will be able to see the page tables that we are
concurrently removing while assuming that nothing else can access them.
Repeat the ->anon_vma check once we hold the mmap lock to ensure that
there really is no concurrent page table access.
Hitting this bug causes a lockdep warning in collapse_and_free_pmd(),
in the line "lockdep_assert_held_write(&vma->anon_vma->root->rwsem)".
It can also lead to use-after-free access.
Link: https://lore.kernel.org/linux-mm/CAG48ez3434wZBKFFbdx4M9j6eUwSUVPd4dxhzW_k_…
Link: https://lkml.kernel.org/r/20230111133351.807024-1-jannh@google.com
Fixes: f3f0e1d2150b ("khugepaged: add support of collapse for tmpfs/shmem pages")
Signed-off-by: Jann Horn <jannh(a)google.com>
Reported-by: Zach O'Keefe <zokeefe(a)google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov(a)intel.linux.com>
Reviewed-by: Yang Shi <shy828301(a)gmail.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 79be13133322..935aa8b71d1c 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1642,7 +1642,7 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff,
* has higher cost too. It would also probably require locking
* the anon_vma.
*/
- if (vma->anon_vma) {
+ if (READ_ONCE(vma->anon_vma)) {
result = SCAN_PAGE_ANON;
goto next;
}
@@ -1670,6 +1670,18 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff,
result = SCAN_PTE_MAPPED_HUGEPAGE;
if ((cc->is_khugepaged || is_target) &&
mmap_write_trylock(mm)) {
+ /*
+ * Re-check whether we have an ->anon_vma, because
+ * collapse_and_free_pmd() requires that either no
+ * ->anon_vma exists or the anon_vma is locked.
+ * We already checked ->anon_vma above, but that check
+ * is racy because ->anon_vma can be populated under the
+ * mmap lock in read mode.
+ */
+ if (vma->anon_vma) {
+ result = SCAN_PAGE_ANON;
+ goto unlock_next;
+ }
/*
* When a vma is registered with uffd-wp, we can't
* recycle the pmd pgtable because there can be pte
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
023f47a8250c ("mm/khugepaged: fix ->anon_vma race")
34488399fa08 ("mm/madvise: add file and shmem support to MADV_COLLAPSE")
58ac9a8993a1 ("mm/khugepaged: attempt to map file/shmem-backed pte-mapped THPs by pmds")
780a4b6fb865 ("mm/khugepaged: check compound_order() in collapse_pte_mapped_thp()")
b26e27015ec9 ("mm: thp: convert to use common struct mm_slot")
685405020b9f ("mm/khugepaged: stop using vma linked list")
7d2c4385c341 ("mm/khugepaged: rename prefix of shared collapse functions")
7d8faaf15545 ("mm/madvise: introduce MADV_COLLAPSE sync hugepage collapse")
507228044236 ("mm/khugepaged: record SCAN_PMD_MAPPED when scan_pmd() finds hugepage")
a7f4e6e4c47c ("mm/thp: add flag to enforce sysfs THP in hugepage_vma_check()")
50ad2f24b3b4 ("mm/khugepaged: propagate enum scan_result codes back to callers")
9710a78ab2ae ("mm/khugepaged: dedup and simplify hugepage alloc and charging")
34d6b470ab9c ("mm/khugepaged: add struct collapse_control")
c6a7f445a272 ("mm: khugepaged: don't carry huge page to the next loop for !CONFIG_NUMA")
1064026bab9f ("mm: khugepaged: reorg some khugepaged helpers")
7da4e2cb8b1f ("mm: thp: kill __transhuge_page_enabled()")
9fec51689ff6 ("mm: thp: kill transparent_hugepage_active()")
f707fa493784 ("mm: khugepaged: better comments for anon vma check in hugepage_vma_revalidate")
4fa6893faeaa ("mm: thp: consolidate vma size check to transhuge_vma_suitable")
66137fb34a4b ("mm: khugepaged: check THP flag in hugepage_vma_check()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 023f47a8250c6bdb4aebe744db4bf7f73414028b Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Wed, 11 Jan 2023 14:33:51 +0100
Subject: [PATCH] mm/khugepaged: fix ->anon_vma race
If an ->anon_vma is attached to the VMA, collapse_and_free_pmd() requires
it to be locked.
Page table traversal is allowed under any one of the mmap lock, the
anon_vma lock (if the VMA is associated with an anon_vma), and the
mapping lock (if the VMA is associated with a mapping); and so to be
able to remove page tables, we must hold all three of them.
retract_page_tables() bails out if an ->anon_vma is attached, but does
this check before holding the mmap lock (as the comment above the check
explains).
If we racily merged an existing ->anon_vma (shared with a child
process) from a neighboring VMA, subsequent rmap traversals on pages
belonging to the child will be able to see the page tables that we are
concurrently removing while assuming that nothing else can access them.
Repeat the ->anon_vma check once we hold the mmap lock to ensure that
there really is no concurrent page table access.
Hitting this bug causes a lockdep warning in collapse_and_free_pmd(),
in the line "lockdep_assert_held_write(&vma->anon_vma->root->rwsem)".
It can also lead to use-after-free access.
Link: https://lore.kernel.org/linux-mm/CAG48ez3434wZBKFFbdx4M9j6eUwSUVPd4dxhzW_k_…
Link: https://lkml.kernel.org/r/20230111133351.807024-1-jannh@google.com
Fixes: f3f0e1d2150b ("khugepaged: add support of collapse for tmpfs/shmem pages")
Signed-off-by: Jann Horn <jannh(a)google.com>
Reported-by: Zach O'Keefe <zokeefe(a)google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov(a)intel.linux.com>
Reviewed-by: Yang Shi <shy828301(a)gmail.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 79be13133322..935aa8b71d1c 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1642,7 +1642,7 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff,
* has higher cost too. It would also probably require locking
* the anon_vma.
*/
- if (vma->anon_vma) {
+ if (READ_ONCE(vma->anon_vma)) {
result = SCAN_PAGE_ANON;
goto next;
}
@@ -1670,6 +1670,18 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff,
result = SCAN_PTE_MAPPED_HUGEPAGE;
if ((cc->is_khugepaged || is_target) &&
mmap_write_trylock(mm)) {
+ /*
+ * Re-check whether we have an ->anon_vma, because
+ * collapse_and_free_pmd() requires that either no
+ * ->anon_vma exists or the anon_vma is locked.
+ * We already checked ->anon_vma above, but that check
+ * is racy because ->anon_vma can be populated under the
+ * mmap lock in read mode.
+ */
+ if (vma->anon_vma) {
+ result = SCAN_PAGE_ANON;
+ goto unlock_next;
+ }
/*
* When a vma is registered with uffd-wp, we can't
* recycle the pmd pgtable because there can be pte
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
023f47a8250c ("mm/khugepaged: fix ->anon_vma race")
34488399fa08 ("mm/madvise: add file and shmem support to MADV_COLLAPSE")
58ac9a8993a1 ("mm/khugepaged: attempt to map file/shmem-backed pte-mapped THPs by pmds")
780a4b6fb865 ("mm/khugepaged: check compound_order() in collapse_pte_mapped_thp()")
b26e27015ec9 ("mm: thp: convert to use common struct mm_slot")
685405020b9f ("mm/khugepaged: stop using vma linked list")
7d2c4385c341 ("mm/khugepaged: rename prefix of shared collapse functions")
7d8faaf15545 ("mm/madvise: introduce MADV_COLLAPSE sync hugepage collapse")
507228044236 ("mm/khugepaged: record SCAN_PMD_MAPPED when scan_pmd() finds hugepage")
a7f4e6e4c47c ("mm/thp: add flag to enforce sysfs THP in hugepage_vma_check()")
50ad2f24b3b4 ("mm/khugepaged: propagate enum scan_result codes back to callers")
9710a78ab2ae ("mm/khugepaged: dedup and simplify hugepage alloc and charging")
34d6b470ab9c ("mm/khugepaged: add struct collapse_control")
c6a7f445a272 ("mm: khugepaged: don't carry huge page to the next loop for !CONFIG_NUMA")
1064026bab9f ("mm: khugepaged: reorg some khugepaged helpers")
7da4e2cb8b1f ("mm: thp: kill __transhuge_page_enabled()")
9fec51689ff6 ("mm: thp: kill transparent_hugepage_active()")
f707fa493784 ("mm: khugepaged: better comments for anon vma check in hugepage_vma_revalidate")
4fa6893faeaa ("mm: thp: consolidate vma size check to transhuge_vma_suitable")
66137fb34a4b ("mm: khugepaged: check THP flag in hugepage_vma_check()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 023f47a8250c6bdb4aebe744db4bf7f73414028b Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Wed, 11 Jan 2023 14:33:51 +0100
Subject: [PATCH] mm/khugepaged: fix ->anon_vma race
If an ->anon_vma is attached to the VMA, collapse_and_free_pmd() requires
it to be locked.
Page table traversal is allowed under any one of the mmap lock, the
anon_vma lock (if the VMA is associated with an anon_vma), and the
mapping lock (if the VMA is associated with a mapping); and so to be
able to remove page tables, we must hold all three of them.
retract_page_tables() bails out if an ->anon_vma is attached, but does
this check before holding the mmap lock (as the comment above the check
explains).
If we racily merged an existing ->anon_vma (shared with a child
process) from a neighboring VMA, subsequent rmap traversals on pages
belonging to the child will be able to see the page tables that we are
concurrently removing while assuming that nothing else can access them.
Repeat the ->anon_vma check once we hold the mmap lock to ensure that
there really is no concurrent page table access.
Hitting this bug causes a lockdep warning in collapse_and_free_pmd(),
in the line "lockdep_assert_held_write(&vma->anon_vma->root->rwsem)".
It can also lead to use-after-free access.
Link: https://lore.kernel.org/linux-mm/CAG48ez3434wZBKFFbdx4M9j6eUwSUVPd4dxhzW_k_…
Link: https://lkml.kernel.org/r/20230111133351.807024-1-jannh@google.com
Fixes: f3f0e1d2150b ("khugepaged: add support of collapse for tmpfs/shmem pages")
Signed-off-by: Jann Horn <jannh(a)google.com>
Reported-by: Zach O'Keefe <zokeefe(a)google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov(a)intel.linux.com>
Reviewed-by: Yang Shi <shy828301(a)gmail.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 79be13133322..935aa8b71d1c 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1642,7 +1642,7 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff,
* has higher cost too. It would also probably require locking
* the anon_vma.
*/
- if (vma->anon_vma) {
+ if (READ_ONCE(vma->anon_vma)) {
result = SCAN_PAGE_ANON;
goto next;
}
@@ -1670,6 +1670,18 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff,
result = SCAN_PTE_MAPPED_HUGEPAGE;
if ((cc->is_khugepaged || is_target) &&
mmap_write_trylock(mm)) {
+ /*
+ * Re-check whether we have an ->anon_vma, because
+ * collapse_and_free_pmd() requires that either no
+ * ->anon_vma exists or the anon_vma is locked.
+ * We already checked ->anon_vma above, but that check
+ * is racy because ->anon_vma can be populated under the
+ * mmap lock in read mode.
+ */
+ if (vma->anon_vma) {
+ result = SCAN_PAGE_ANON;
+ goto unlock_next;
+ }
/*
* When a vma is registered with uffd-wp, we can't
* recycle the pmd pgtable because there can be pte
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
023f47a8250c ("mm/khugepaged: fix ->anon_vma race")
34488399fa08 ("mm/madvise: add file and shmem support to MADV_COLLAPSE")
58ac9a8993a1 ("mm/khugepaged: attempt to map file/shmem-backed pte-mapped THPs by pmds")
780a4b6fb865 ("mm/khugepaged: check compound_order() in collapse_pte_mapped_thp()")
b26e27015ec9 ("mm: thp: convert to use common struct mm_slot")
685405020b9f ("mm/khugepaged: stop using vma linked list")
7d2c4385c341 ("mm/khugepaged: rename prefix of shared collapse functions")
7d8faaf15545 ("mm/madvise: introduce MADV_COLLAPSE sync hugepage collapse")
507228044236 ("mm/khugepaged: record SCAN_PMD_MAPPED when scan_pmd() finds hugepage")
a7f4e6e4c47c ("mm/thp: add flag to enforce sysfs THP in hugepage_vma_check()")
50ad2f24b3b4 ("mm/khugepaged: propagate enum scan_result codes back to callers")
9710a78ab2ae ("mm/khugepaged: dedup and simplify hugepage alloc and charging")
34d6b470ab9c ("mm/khugepaged: add struct collapse_control")
c6a7f445a272 ("mm: khugepaged: don't carry huge page to the next loop for !CONFIG_NUMA")
1064026bab9f ("mm: khugepaged: reorg some khugepaged helpers")
7da4e2cb8b1f ("mm: thp: kill __transhuge_page_enabled()")
9fec51689ff6 ("mm: thp: kill transparent_hugepage_active()")
f707fa493784 ("mm: khugepaged: better comments for anon vma check in hugepage_vma_revalidate")
4fa6893faeaa ("mm: thp: consolidate vma size check to transhuge_vma_suitable")
66137fb34a4b ("mm: khugepaged: check THP flag in hugepage_vma_check()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 023f47a8250c6bdb4aebe744db4bf7f73414028b Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Wed, 11 Jan 2023 14:33:51 +0100
Subject: [PATCH] mm/khugepaged: fix ->anon_vma race
If an ->anon_vma is attached to the VMA, collapse_and_free_pmd() requires
it to be locked.
Page table traversal is allowed under any one of the mmap lock, the
anon_vma lock (if the VMA is associated with an anon_vma), and the
mapping lock (if the VMA is associated with a mapping); and so to be
able to remove page tables, we must hold all three of them.
retract_page_tables() bails out if an ->anon_vma is attached, but does
this check before holding the mmap lock (as the comment above the check
explains).
If we racily merged an existing ->anon_vma (shared with a child
process) from a neighboring VMA, subsequent rmap traversals on pages
belonging to the child will be able to see the page tables that we are
concurrently removing while assuming that nothing else can access them.
Repeat the ->anon_vma check once we hold the mmap lock to ensure that
there really is no concurrent page table access.
Hitting this bug causes a lockdep warning in collapse_and_free_pmd(),
in the line "lockdep_assert_held_write(&vma->anon_vma->root->rwsem)".
It can also lead to use-after-free access.
Link: https://lore.kernel.org/linux-mm/CAG48ez3434wZBKFFbdx4M9j6eUwSUVPd4dxhzW_k_…
Link: https://lkml.kernel.org/r/20230111133351.807024-1-jannh@google.com
Fixes: f3f0e1d2150b ("khugepaged: add support of collapse for tmpfs/shmem pages")
Signed-off-by: Jann Horn <jannh(a)google.com>
Reported-by: Zach O'Keefe <zokeefe(a)google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov(a)intel.linux.com>
Reviewed-by: Yang Shi <shy828301(a)gmail.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 79be13133322..935aa8b71d1c 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1642,7 +1642,7 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff,
* has higher cost too. It would also probably require locking
* the anon_vma.
*/
- if (vma->anon_vma) {
+ if (READ_ONCE(vma->anon_vma)) {
result = SCAN_PAGE_ANON;
goto next;
}
@@ -1670,6 +1670,18 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff,
result = SCAN_PTE_MAPPED_HUGEPAGE;
if ((cc->is_khugepaged || is_target) &&
mmap_write_trylock(mm)) {
+ /*
+ * Re-check whether we have an ->anon_vma, because
+ * collapse_and_free_pmd() requires that either no
+ * ->anon_vma exists or the anon_vma is locked.
+ * We already checked ->anon_vma above, but that check
+ * is racy because ->anon_vma can be populated under the
+ * mmap lock in read mode.
+ */
+ if (vma->anon_vma) {
+ result = SCAN_PAGE_ANON;
+ goto unlock_next;
+ }
/*
* When a vma is registered with uffd-wp, we can't
* recycle the pmd pgtable because there can be pte
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
7294194b47e9 ("powerpc/kexec_file: Fix division by zero in extra size estimation")
340a4a9f8773 ("powerpc: Take in account addition CPU node when building kexec FDT")
886db32398ab ("powerpc/kexec_file: Restore FDT size estimation for kdump kernel")
3c985d31ad66 ("powerpc: Use common of_kexec_alloc_and_setup_fdt()")
e6635bab530d ("powerpc: Use ELF fields defined in 'struct kimage'")
2377c92e37fe ("powerpc/kexec_file: fix FDT size estimation for kdump kernel")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7294194b47e994753a86eee8cf1c61f3f36458a3 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe(a)ellerman.id.au>
Date: Mon, 30 Jan 2023 12:47:07 +1100
Subject: [PATCH] powerpc/kexec_file: Fix division by zero in extra size
estimation
In kexec_extra_fdt_size_ppc64() there's logic to estimate how much
extra space will be needed in the device tree for some memory related
properties.
That logic uses the size of RAM divided by drmem_lmb_size() to do the
estimation. However drmem_lmb_size() can be zero if the machine has no
hotpluggable memory configured, which is the case when booting with qemu
and no maxmem=x parameter is passed (the default).
The division by zero is reported by UBSAN, and can also lead to an
overflow and a warning from kvmalloc, and kdump kernel loading fails:
WARNING: CPU: 0 PID: 133 at mm/util.c:596 kvmalloc_node+0x15c/0x160
Modules linked in:
CPU: 0 PID: 133 Comm: kexec Not tainted 6.2.0-rc5-03455-g07358bd97810 #223
Hardware name: IBM pSeries (emulated by qemu) POWER9 (raw) 0x4e1200 0xf000005 of:SLOF,git-dd0dca pSeries
NIP: c00000000041ff4c LR: c00000000041fe58 CTR: 0000000000000000
REGS: c0000000096ef750 TRAP: 0700 Not tainted (6.2.0-rc5-03455-g07358bd97810)
MSR: 800000000282b033 <SF,VEC,VSX,EE,FP,ME,IR,DR,RI,LE> CR: 24248242 XER: 2004011e
CFAR: c00000000041fed0 IRQMASK: 0
...
NIP kvmalloc_node+0x15c/0x160
LR kvmalloc_node+0x68/0x160
Call Trace:
kvmalloc_node+0x68/0x160 (unreliable)
of_kexec_alloc_and_setup_fdt+0xb8/0x7d0
elf64_load+0x25c/0x4a0
kexec_image_load_default+0x58/0x80
sys_kexec_file_load+0x5c0/0x920
system_call_exception+0x128/0x330
system_call_vectored_common+0x15c/0x2ec
To fix it, skip the calculation if drmem_lmb_size() is zero.
Fixes: 2377c92e37fe ("powerpc/kexec_file: fix FDT size estimation for kdump kernel")
Cc: stable(a)vger.kernel.org # v5.12+
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20230130014707.541110-1-mpe@ellerman.id.au
diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
index af8854f9eae3..19d084682bc2 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -989,10 +989,13 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image)
* linux,drconf-usable-memory properties. Get an approximate on the
* number of usable memory entries and use for FDT size estimation.
*/
- usm_entries = ((memblock_end_of_DRAM() / drmem_lmb_size()) +
- (2 * (resource_size(&crashk_res) / drmem_lmb_size())));
-
- extra_size = (unsigned int)(usm_entries * sizeof(u64));
+ if (drmem_lmb_size()) {
+ usm_entries = ((memblock_end_of_DRAM() / drmem_lmb_size()) +
+ (2 * (resource_size(&crashk_res) / drmem_lmb_size())));
+ extra_size = (unsigned int)(usm_entries * sizeof(u64));
+ } else {
+ extra_size = 0;
+ }
/*
* Get the number of CPU nodes in the current DT. This allows to
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
7294194b47e9 ("powerpc/kexec_file: Fix division by zero in extra size estimation")
340a4a9f8773 ("powerpc: Take in account addition CPU node when building kexec FDT")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7294194b47e994753a86eee8cf1c61f3f36458a3 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe(a)ellerman.id.au>
Date: Mon, 30 Jan 2023 12:47:07 +1100
Subject: [PATCH] powerpc/kexec_file: Fix division by zero in extra size
estimation
In kexec_extra_fdt_size_ppc64() there's logic to estimate how much
extra space will be needed in the device tree for some memory related
properties.
That logic uses the size of RAM divided by drmem_lmb_size() to do the
estimation. However drmem_lmb_size() can be zero if the machine has no
hotpluggable memory configured, which is the case when booting with qemu
and no maxmem=x parameter is passed (the default).
The division by zero is reported by UBSAN, and can also lead to an
overflow and a warning from kvmalloc, and kdump kernel loading fails:
WARNING: CPU: 0 PID: 133 at mm/util.c:596 kvmalloc_node+0x15c/0x160
Modules linked in:
CPU: 0 PID: 133 Comm: kexec Not tainted 6.2.0-rc5-03455-g07358bd97810 #223
Hardware name: IBM pSeries (emulated by qemu) POWER9 (raw) 0x4e1200 0xf000005 of:SLOF,git-dd0dca pSeries
NIP: c00000000041ff4c LR: c00000000041fe58 CTR: 0000000000000000
REGS: c0000000096ef750 TRAP: 0700 Not tainted (6.2.0-rc5-03455-g07358bd97810)
MSR: 800000000282b033 <SF,VEC,VSX,EE,FP,ME,IR,DR,RI,LE> CR: 24248242 XER: 2004011e
CFAR: c00000000041fed0 IRQMASK: 0
...
NIP kvmalloc_node+0x15c/0x160
LR kvmalloc_node+0x68/0x160
Call Trace:
kvmalloc_node+0x68/0x160 (unreliable)
of_kexec_alloc_and_setup_fdt+0xb8/0x7d0
elf64_load+0x25c/0x4a0
kexec_image_load_default+0x58/0x80
sys_kexec_file_load+0x5c0/0x920
system_call_exception+0x128/0x330
system_call_vectored_common+0x15c/0x2ec
To fix it, skip the calculation if drmem_lmb_size() is zero.
Fixes: 2377c92e37fe ("powerpc/kexec_file: fix FDT size estimation for kdump kernel")
Cc: stable(a)vger.kernel.org # v5.12+
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20230130014707.541110-1-mpe@ellerman.id.au
diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
index af8854f9eae3..19d084682bc2 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -989,10 +989,13 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image)
* linux,drconf-usable-memory properties. Get an approximate on the
* number of usable memory entries and use for FDT size estimation.
*/
- usm_entries = ((memblock_end_of_DRAM() / drmem_lmb_size()) +
- (2 * (resource_size(&crashk_res) / drmem_lmb_size())));
-
- extra_size = (unsigned int)(usm_entries * sizeof(u64));
+ if (drmem_lmb_size()) {
+ usm_entries = ((memblock_end_of_DRAM() / drmem_lmb_size()) +
+ (2 * (resource_size(&crashk_res) / drmem_lmb_size())));
+ extra_size = (unsigned int)(usm_entries * sizeof(u64));
+ } else {
+ extra_size = 0;
+ }
/*
* Get the number of CPU nodes in the current DT. This allows to
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
7294194b47e9 ("powerpc/kexec_file: Fix division by zero in extra size estimation")
340a4a9f8773 ("powerpc: Take in account addition CPU node when building kexec FDT")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7294194b47e994753a86eee8cf1c61f3f36458a3 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe(a)ellerman.id.au>
Date: Mon, 30 Jan 2023 12:47:07 +1100
Subject: [PATCH] powerpc/kexec_file: Fix division by zero in extra size
estimation
In kexec_extra_fdt_size_ppc64() there's logic to estimate how much
extra space will be needed in the device tree for some memory related
properties.
That logic uses the size of RAM divided by drmem_lmb_size() to do the
estimation. However drmem_lmb_size() can be zero if the machine has no
hotpluggable memory configured, which is the case when booting with qemu
and no maxmem=x parameter is passed (the default).
The division by zero is reported by UBSAN, and can also lead to an
overflow and a warning from kvmalloc, and kdump kernel loading fails:
WARNING: CPU: 0 PID: 133 at mm/util.c:596 kvmalloc_node+0x15c/0x160
Modules linked in:
CPU: 0 PID: 133 Comm: kexec Not tainted 6.2.0-rc5-03455-g07358bd97810 #223
Hardware name: IBM pSeries (emulated by qemu) POWER9 (raw) 0x4e1200 0xf000005 of:SLOF,git-dd0dca pSeries
NIP: c00000000041ff4c LR: c00000000041fe58 CTR: 0000000000000000
REGS: c0000000096ef750 TRAP: 0700 Not tainted (6.2.0-rc5-03455-g07358bd97810)
MSR: 800000000282b033 <SF,VEC,VSX,EE,FP,ME,IR,DR,RI,LE> CR: 24248242 XER: 2004011e
CFAR: c00000000041fed0 IRQMASK: 0
...
NIP kvmalloc_node+0x15c/0x160
LR kvmalloc_node+0x68/0x160
Call Trace:
kvmalloc_node+0x68/0x160 (unreliable)
of_kexec_alloc_and_setup_fdt+0xb8/0x7d0
elf64_load+0x25c/0x4a0
kexec_image_load_default+0x58/0x80
sys_kexec_file_load+0x5c0/0x920
system_call_exception+0x128/0x330
system_call_vectored_common+0x15c/0x2ec
To fix it, skip the calculation if drmem_lmb_size() is zero.
Fixes: 2377c92e37fe ("powerpc/kexec_file: fix FDT size estimation for kdump kernel")
Cc: stable(a)vger.kernel.org # v5.12+
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20230130014707.541110-1-mpe@ellerman.id.au
diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
index af8854f9eae3..19d084682bc2 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -989,10 +989,13 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image)
* linux,drconf-usable-memory properties. Get an approximate on the
* number of usable memory entries and use for FDT size estimation.
*/
- usm_entries = ((memblock_end_of_DRAM() / drmem_lmb_size()) +
- (2 * (resource_size(&crashk_res) / drmem_lmb_size())));
-
- extra_size = (unsigned int)(usm_entries * sizeof(u64));
+ if (drmem_lmb_size()) {
+ usm_entries = ((memblock_end_of_DRAM() / drmem_lmb_size()) +
+ (2 * (resource_size(&crashk_res) / drmem_lmb_size())));
+ extra_size = (unsigned int)(usm_entries * sizeof(u64));
+ } else {
+ extra_size = 0;
+ }
/*
* Get the number of CPU nodes in the current DT. This allows to
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
ac86f547ca10 ("mm: memcg: fix NULL pointer in mem_cgroup_track_foreign_dirty_slowpath()")
203a31516616 ("mm/writeback: Add __folio_mark_dirty()")
9d8053fc7a21 ("mm/memcg: Convert mem_cgroup_track_foreign_dirty_slowpath() to folio")
a49d0c507759 ("mm/writeback: Add folio_wait_stable()")
490e016f229a ("mm/writeback: Add folio_wait_writeback()")
4268b48077e5 ("mm/filemap: Add folio_end_writeback()")
575ced1c8b0d ("mm/swap: Add folio_rotate_reclaimable()")
4e1364286d0a ("mm/filemap: Add folio_unlock()")
2f52578f9c64 ("mm/util: Add folio_mapping() and folio_file_mapping()")
3a6b2162005f ("mm: move page dirtying prototypes from mm.h")
6e1cae881a06 ("mm/writeback: move __set_page_dirty() to core mm")
34ebcce79324 ("fs: unexport __set_page_dirty")
87e378974975 ("mm/page-writeback: use __this_cpu_inc() in account_page_dirtied()")
9620ad86d0e3 ("afs: Re-enable freezing once a page fault is interrupted")
d479960e44f2 ("mm: disable LRU pagevec during the migration temporarily")
198fba4137a1 ("mm/mmzone.h: fix existing kernel-doc comments and link them to core-api")
a87132a22991 ("mm/doc: add mm.h and mm_types.h to the mm-api document")
842ca547f706 ("mm: move page_mapping_file to pagemap.h")
5cbf03985c67 ("afs: Use new netfs lib read helper API")
dc4191841d09 ("afs: Use the fs operation ops to handle FetchData completion")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ac86f547ca1002aec2ef66b9e64d03f45bbbfbb9 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Date: Sun, 29 Jan 2023 12:09:45 +0800
Subject: [PATCH] mm: memcg: fix NULL pointer in
mem_cgroup_track_foreign_dirty_slowpath()
As commit 18365225f044 ("hwpoison, memcg: forcibly uncharge LRU pages"),
hwpoison will forcibly uncharg a LRU hwpoisoned page, the folio_memcg
could be NULl, then, mem_cgroup_track_foreign_dirty_slowpath() could
occurs a NULL pointer dereference, let's do not record the foreign
writebacks for folio memcg is null in mem_cgroup_track_foreign_dirty() to
fix it.
Link: https://lkml.kernel.org/r/20230129040945.180629-1-wangkefeng.wang@huawei.com
Fixes: 97b27821b485 ("writeback, memcg: Implement foreign dirty flushing")
Signed-off-by: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Reported-by: Ma Wupeng <mawupeng1(a)huawei.com>
Tested-by: Miko Larsson <mikoxyzzz(a)gmail.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Jan Kara <jack(a)suse.cz>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Cc: Ma Wupeng <mawupeng1(a)huawei.com>
Cc: Naoya Horiguchi <naoya.horiguchi(a)nec.com>
Cc: Shakeel Butt <shakeelb(a)google.com>
Cc: Tejun Heo <tj(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d3c8203cab6c..85dc9b88ea37 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1666,10 +1666,13 @@ void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio,
static inline void mem_cgroup_track_foreign_dirty(struct folio *folio,
struct bdi_writeback *wb)
{
+ struct mem_cgroup *memcg;
+
if (mem_cgroup_disabled())
return;
- if (unlikely(&folio_memcg(folio)->css != wb->memcg_css))
+ memcg = folio_memcg(folio);
+ if (unlikely(memcg && &memcg->css != wb->memcg_css))
mem_cgroup_track_foreign_dirty_slowpath(folio, wb);
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
ac86f547ca10 ("mm: memcg: fix NULL pointer in mem_cgroup_track_foreign_dirty_slowpath()")
203a31516616 ("mm/writeback: Add __folio_mark_dirty()")
9d8053fc7a21 ("mm/memcg: Convert mem_cgroup_track_foreign_dirty_slowpath() to folio")
a49d0c507759 ("mm/writeback: Add folio_wait_stable()")
490e016f229a ("mm/writeback: Add folio_wait_writeback()")
4268b48077e5 ("mm/filemap: Add folio_end_writeback()")
575ced1c8b0d ("mm/swap: Add folio_rotate_reclaimable()")
4e1364286d0a ("mm/filemap: Add folio_unlock()")
2f52578f9c64 ("mm/util: Add folio_mapping() and folio_file_mapping()")
3a6b2162005f ("mm: move page dirtying prototypes from mm.h")
6e1cae881a06 ("mm/writeback: move __set_page_dirty() to core mm")
34ebcce79324 ("fs: unexport __set_page_dirty")
87e378974975 ("mm/page-writeback: use __this_cpu_inc() in account_page_dirtied()")
9620ad86d0e3 ("afs: Re-enable freezing once a page fault is interrupted")
d479960e44f2 ("mm: disable LRU pagevec during the migration temporarily")
198fba4137a1 ("mm/mmzone.h: fix existing kernel-doc comments and link them to core-api")
a87132a22991 ("mm/doc: add mm.h and mm_types.h to the mm-api document")
842ca547f706 ("mm: move page_mapping_file to pagemap.h")
5cbf03985c67 ("afs: Use new netfs lib read helper API")
dc4191841d09 ("afs: Use the fs operation ops to handle FetchData completion")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ac86f547ca1002aec2ef66b9e64d03f45bbbfbb9 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Date: Sun, 29 Jan 2023 12:09:45 +0800
Subject: [PATCH] mm: memcg: fix NULL pointer in
mem_cgroup_track_foreign_dirty_slowpath()
As commit 18365225f044 ("hwpoison, memcg: forcibly uncharge LRU pages"),
hwpoison will forcibly uncharg a LRU hwpoisoned page, the folio_memcg
could be NULl, then, mem_cgroup_track_foreign_dirty_slowpath() could
occurs a NULL pointer dereference, let's do not record the foreign
writebacks for folio memcg is null in mem_cgroup_track_foreign_dirty() to
fix it.
Link: https://lkml.kernel.org/r/20230129040945.180629-1-wangkefeng.wang@huawei.com
Fixes: 97b27821b485 ("writeback, memcg: Implement foreign dirty flushing")
Signed-off-by: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Reported-by: Ma Wupeng <mawupeng1(a)huawei.com>
Tested-by: Miko Larsson <mikoxyzzz(a)gmail.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Jan Kara <jack(a)suse.cz>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Cc: Ma Wupeng <mawupeng1(a)huawei.com>
Cc: Naoya Horiguchi <naoya.horiguchi(a)nec.com>
Cc: Shakeel Butt <shakeelb(a)google.com>
Cc: Tejun Heo <tj(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d3c8203cab6c..85dc9b88ea37 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1666,10 +1666,13 @@ void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio,
static inline void mem_cgroup_track_foreign_dirty(struct folio *folio,
struct bdi_writeback *wb)
{
+ struct mem_cgroup *memcg;
+
if (mem_cgroup_disabled())
return;
- if (unlikely(&folio_memcg(folio)->css != wb->memcg_css))
+ memcg = folio_memcg(folio);
+ if (unlikely(memcg && &memcg->css != wb->memcg_css))
mem_cgroup_track_foreign_dirty_slowpath(folio, wb);
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
ac86f547ca10 ("mm: memcg: fix NULL pointer in mem_cgroup_track_foreign_dirty_slowpath()")
203a31516616 ("mm/writeback: Add __folio_mark_dirty()")
9d8053fc7a21 ("mm/memcg: Convert mem_cgroup_track_foreign_dirty_slowpath() to folio")
a49d0c507759 ("mm/writeback: Add folio_wait_stable()")
490e016f229a ("mm/writeback: Add folio_wait_writeback()")
4268b48077e5 ("mm/filemap: Add folio_end_writeback()")
575ced1c8b0d ("mm/swap: Add folio_rotate_reclaimable()")
4e1364286d0a ("mm/filemap: Add folio_unlock()")
2f52578f9c64 ("mm/util: Add folio_mapping() and folio_file_mapping()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ac86f547ca1002aec2ef66b9e64d03f45bbbfbb9 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Date: Sun, 29 Jan 2023 12:09:45 +0800
Subject: [PATCH] mm: memcg: fix NULL pointer in
mem_cgroup_track_foreign_dirty_slowpath()
As commit 18365225f044 ("hwpoison, memcg: forcibly uncharge LRU pages"),
hwpoison will forcibly uncharg a LRU hwpoisoned page, the folio_memcg
could be NULl, then, mem_cgroup_track_foreign_dirty_slowpath() could
occurs a NULL pointer dereference, let's do not record the foreign
writebacks for folio memcg is null in mem_cgroup_track_foreign_dirty() to
fix it.
Link: https://lkml.kernel.org/r/20230129040945.180629-1-wangkefeng.wang@huawei.com
Fixes: 97b27821b485 ("writeback, memcg: Implement foreign dirty flushing")
Signed-off-by: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Reported-by: Ma Wupeng <mawupeng1(a)huawei.com>
Tested-by: Miko Larsson <mikoxyzzz(a)gmail.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Jan Kara <jack(a)suse.cz>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Cc: Ma Wupeng <mawupeng1(a)huawei.com>
Cc: Naoya Horiguchi <naoya.horiguchi(a)nec.com>
Cc: Shakeel Butt <shakeelb(a)google.com>
Cc: Tejun Heo <tj(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d3c8203cab6c..85dc9b88ea37 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1666,10 +1666,13 @@ void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio,
static inline void mem_cgroup_track_foreign_dirty(struct folio *folio,
struct bdi_writeback *wb)
{
+ struct mem_cgroup *memcg;
+
if (mem_cgroup_disabled())
return;
- if (unlikely(&folio_memcg(folio)->css != wb->memcg_css))
+ memcg = folio_memcg(folio);
+ if (unlikely(memcg && &memcg->css != wb->memcg_css))
mem_cgroup_track_foreign_dirty_slowpath(folio, wb);
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
0b1d60d6dd9e ("riscv: Fix build with CONFIG_CC_OPTIMIZE_FOR_SIZE=y")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0b1d60d6dd9e2e867cc6e4277d73ea5a7ff2d4d0 Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel(a)sholland.org>
Date: Thu, 22 Sep 2022 01:09:58 -0500
Subject: [PATCH] riscv: Fix build with CONFIG_CC_OPTIMIZE_FOR_SIZE=y
commit 8eb060e10185 ("arch/riscv: add Zihintpause support") broke
building with CONFIG_CC_OPTIMIZE_FOR_SIZE enabled (gcc 11.1.0):
CC arch/riscv/kernel/vdso/vgettimeofday.o
In file included from <command-line>:
./arch/riscv/include/asm/jump_label.h: In function 'cpu_relax':
././include/linux/compiler_types.h:285:33: warning: 'asm' operand 0 probably does not match constraints
285 | #define asm_volatile_goto(x...) asm goto(x)
| ^~~
./arch/riscv/include/asm/jump_label.h:41:9: note: in expansion of macro 'asm_volatile_goto'
41 | asm_volatile_goto(
| ^~~~~~~~~~~~~~~~~
././include/linux/compiler_types.h:285:33: error: impossible constraint in 'asm'
285 | #define asm_volatile_goto(x...) asm goto(x)
| ^~~
./arch/riscv/include/asm/jump_label.h:41:9: note: in expansion of macro 'asm_volatile_goto'
41 | asm_volatile_goto(
| ^~~~~~~~~~~~~~~~~
make[1]: *** [scripts/Makefile.build:249: arch/riscv/kernel/vdso/vgettimeofday.o] Error 1
make: *** [arch/riscv/Makefile:128: vdso_prepare] Error 2
Having a static branch in cpu_relax() is problematic because that
function is widely inlined, including in some quite complex functions
like in the VDSO. A quick measurement shows this static branch is
responsible by itself for around 40% of the jump table.
Drop the static branch, which ends up being the same number of
instructions anyway. If Zihintpause is supported, we trade the nop from
the static branch for a div. If Zihintpause is unsupported, we trade the
jump from the static branch for (what gets interpreted as) a nop.
Fixes: 8eb060e10185 ("arch/riscv: add Zihintpause support")
Signed-off-by: Samuel Holland <samuel(a)sholland.org>
Reviewed-by: Conor Dooley <conor.dooley(a)microchip.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmer(a)rivosinc.com>
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 3c8a5ca95c72..3bf10a8e665a 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -67,7 +67,6 @@ enum riscv_isa_ext_id {
*/
enum riscv_isa_ext_key {
RISCV_ISA_EXT_KEY_FPU, /* For 'F' and 'D' */
- RISCV_ISA_EXT_KEY_ZIHINTPAUSE,
RISCV_ISA_EXT_KEY_MAX,
};
@@ -87,8 +86,6 @@ static __always_inline int riscv_isa_ext2key(int num)
return RISCV_ISA_EXT_KEY_FPU;
case RISCV_ISA_EXT_d:
return RISCV_ISA_EXT_KEY_FPU;
- case RISCV_ISA_EXT_ZIHINTPAUSE:
- return RISCV_ISA_EXT_KEY_ZIHINTPAUSE;
default:
return -EINVAL;
}
diff --git a/arch/riscv/include/asm/vdso/processor.h b/arch/riscv/include/asm/vdso/processor.h
index 1e4f8b4aef79..789bdb8211a2 100644
--- a/arch/riscv/include/asm/vdso/processor.h
+++ b/arch/riscv/include/asm/vdso/processor.h
@@ -4,30 +4,25 @@
#ifndef __ASSEMBLY__
-#include <linux/jump_label.h>
#include <asm/barrier.h>
-#include <asm/hwcap.h>
static inline void cpu_relax(void)
{
- if (!static_branch_likely(&riscv_isa_ext_keys[RISCV_ISA_EXT_KEY_ZIHINTPAUSE])) {
#ifdef __riscv_muldiv
- int dummy;
- /* In lieu of a halt instruction, induce a long-latency stall. */
- __asm__ __volatile__ ("div %0, %0, zero" : "=r" (dummy));
+ int dummy;
+ /* In lieu of a halt instruction, induce a long-latency stall. */
+ __asm__ __volatile__ ("div %0, %0, zero" : "=r" (dummy));
#endif
- } else {
- /*
- * Reduce instruction retirement.
- * This assumes the PC changes.
- */
+ /*
+ * Reduce instruction retirement.
+ * This assumes the PC changes.
+ */
#ifdef __riscv_zihintpause
- __asm__ __volatile__ ("pause");
+ __asm__ __volatile__ ("pause");
#else
- /* Encoding of the pause instruction */
- __asm__ __volatile__ (".4byte 0x100000F");
+ /* Encoding of the pause instruction */
+ __asm__ __volatile__ (".4byte 0x100000F");
#endif
- }
barrier();
}
From: Jan Kara <jack(a)suse.cz>
commit c1ad35dd0548ce947d97aaf92f7f2f9a202951cf upstream
udf_write_fi() uses lengthOfImpUse of the entry it is writing to.
However this field has not yet been initialized so it either contains
completely bogus value or value from last directory entry at that place.
In either case this is wrong and can lead to filesystem corruption or
kernel crashes.
This patch deviates from the original upstream patch because in the original
upstream patch, udf_get_fi_ident(sfi) was being used instead of (uint8_t *)sfi->fileIdent + liu
as the first arg to memcpy at line 77 and line 81. Those subsequent lines have been
replaced with what the upstream patch passes in to memcpy.
Reported-by: butt3rflyh4ck <butterflyhuangxx(a)gmail.com>
CC: stable(a)vger.kernel.org
Fixes: 979a6e28dd96 ("udf: Get rid of 0-length arrays in struct fileIdentDesc")
Signed-off-by: Jan Kara <jack(a)suse.cz>
Signed-off-by: Nobel Barakat <nobelbarakat(a)google.com>
---
fs/udf/namei.c | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 77b6d89b9bcd..cbd6ad54a23b 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -74,12 +74,11 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
if (fileident) {
if (adinicb || (offset + lfi < 0)) {
- memcpy((uint8_t *)sfi->fileIdent + liu, fileident, lfi);
+ memcpy(sfi->impUse + liu, fileident, lfi);
} else if (offset >= 0) {
memcpy(fibh->ebh->b_data + offset, fileident, lfi);
} else {
- memcpy((uint8_t *)sfi->fileIdent + liu, fileident,
- -offset);
+ memcpy(sfi->impUse + liu, fileident, -offset);
memcpy(fibh->ebh->b_data, fileident - offset,
lfi + offset);
}
@@ -88,11 +87,11 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
offset += lfi;
if (adinicb || (offset + padlen < 0)) {
- memset((uint8_t *)sfi->padding + liu + lfi, 0x00, padlen);
+ memset(sfi->impUse + liu + lfi, 0x00, padlen);
} else if (offset >= 0) {
memset(fibh->ebh->b_data + offset, 0x00, padlen);
} else {
- memset((uint8_t *)sfi->padding + liu + lfi, 0x00, -offset);
+ memset(sfi->impUse + liu + lfi, 0x00, -offset);
memset(fibh->ebh->b_data, 0x00, padlen + offset);
}
--
2.39.1.519.gcb327c4b5f-goog
Hi Greetings...,
How are you doing today, I hope this email finds you in good health. You have not responded to my previous emails to you regarding Mr. Husson.
Kindly acknowledge my proposal and let me know what your decisions are, if you are taking the offer.
Get back to me as soon as you can for further details.
Best regards,
Mr. Chaoxiang Genghis.
The following commit has been merged into the locking/urgent branch of tip:
Commit-ID: db370a8b9f67ae5f17e3d5482493294467784504
Gitweb: https://git.kernel.org/tip/db370a8b9f67ae5f17e3d5482493294467784504
Author: Wander Lairson Costa <wander(a)redhat.com>
AuthorDate: Thu, 02 Feb 2023 09:30:20 -03:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Mon, 06 Feb 2023 14:49:13 +01:00
rtmutex: Ensure that the top waiter is always woken up
Let L1 and L2 be two spinlocks.
Let T1 be a task holding L1 and blocked on L2. T1, currently, is the top
waiter of L2.
Let T2 be the task holding L2.
Let T3 be a task trying to acquire L1.
The following events will lead to a state in which the wait queue of L2
isn't empty, but no task actually holds the lock.
T1 T2 T3
== == ==
spin_lock(L1)
| raw_spin_lock(L1->wait_lock)
| rtlock_slowlock_locked(L1)
| | task_blocks_on_rt_mutex(L1, T3)
| | | orig_waiter->lock = L1
| | | orig_waiter->task = T3
| | | raw_spin_unlock(L1->wait_lock)
| | | rt_mutex_adjust_prio_chain(T1, L1, L2, orig_waiter, T3)
spin_unlock(L2) | | | |
| rt_mutex_slowunlock(L2) | | | |
| | raw_spin_lock(L2->wait_lock) | | | |
| | wakeup(T1) | | | |
| | raw_spin_unlock(L2->wait_lock) | | | |
| | | | waiter = T1->pi_blocked_on
| | | | waiter == rt_mutex_top_waiter(L2)
| | | | waiter->task == T1
| | | | raw_spin_lock(L2->wait_lock)
| | | | dequeue(L2, waiter)
| | | | update_prio(waiter, T1)
| | | | enqueue(L2, waiter)
| | | | waiter != rt_mutex_top_waiter(L2)
| | | | L2->owner == NULL
| | | | wakeup(T1)
| | | | raw_spin_unlock(L2->wait_lock)
T1 wakes up
T1 != top_waiter(L2)
schedule_rtlock()
If the deadline of T1 is updated before the call to update_prio(), and the
new deadline is greater than the deadline of the second top waiter, then
after the requeue, T1 is no longer the top waiter, and the wrong task is
woken up which will then go back to sleep because it is not the top waiter.
This can be reproduced in PREEMPT_RT with stress-ng:
while true; do
stress-ng --sched deadline --sched-period 1000000000 \
--sched-runtime 800000000 --sched-deadline \
1000000000 --mmapfork 23 -t 20
done
A similar issue was pointed out by Thomas versus the cases where the top
waiter drops out early due to a signal or timeout, which is a general issue
for all regular rtmutex use cases, e.g. futex.
The problematic code is in rt_mutex_adjust_prio_chain():
// Save the top waiter before dequeue/enqueue
prerequeue_top_waiter = rt_mutex_top_waiter(lock);
rt_mutex_dequeue(lock, waiter);
waiter_update_prio(waiter, task);
rt_mutex_enqueue(lock, waiter);
// Lock has no owner?
if (!rt_mutex_owner(lock)) {
// Top waiter changed
----> if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
----> wake_up_state(waiter->task, waiter->wake_state);
This only takes the case into account where @waiter is the new top waiter
due to the requeue operation.
But it fails to handle the case where @waiter is not longer the top
waiter due to the requeue operation.
Ensure that the new top waiter is woken up so in all cases so it can take
over the ownerless lock.
[ tglx: Amend changelog, add Fixes tag ]
Fixes: c014ef69b3ac ("locking/rtmutex: Add wake_state to rt_mutex_waiter")
Signed-off-by: Wander Lairson Costa <wander(a)redhat.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20230117172649.52465-1-wander@redhat.com
Link: https://lore.kernel.org/r/20230202123020.14844-1-wander@redhat.com
---
kernel/locking/rtmutex.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 010cf4e..728f434 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -901,8 +901,9 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
* then we need to wake the new top waiter up to try
* to get the lock.
*/
- if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
- wake_up_state(waiter->task, waiter->wake_state);
+ top_waiter = rt_mutex_top_waiter(lock);
+ if (prerequeue_top_waiter != top_waiter)
+ wake_up_state(top_waiter->task, top_waiter->wake_state);
raw_spin_unlock_irq(&lock->wait_lock);
return 0;
}
This is the start of the stable review cycle for the 6.1.10 release.
There are 28 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Sun, 05 Feb 2023 10:09:58 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v6.x/stable-review/patch-6.1.10-rc1…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-6.1.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 6.1.10-rc1
Jeremy Kerr <jk(a)codeconstruct.com.au>
net: mctp: purge receive queues on sk destruction
Miguel Ojeda <ojeda(a)kernel.org>
rust: print: avoid evaluating arguments in `pr_*` macros in `unsafe` blocks
Yan Zhai <yan(a)cloudflare.com>
net: fix NULL pointer in skb_segment_list
Mario Limonciello <mario.limonciello(a)amd.com>
gpiolib-acpi: Don't set GPIOs for wakeup in S3 mode
Mario Limonciello <mario.limonciello(a)amd.com>
gpiolib: acpi: Add a ignore wakeup quirk for Clevo NL5xRU
Janne Grunau <j(a)jannau.net>
nvme-apple: only reset the controller when RTKit is running
Paulo Alcantara <pc(a)cjr.nz>
cifs: fix return of uninitialized rc in dfs_cache_update_tgthint()
Mario Limonciello <mario.limonciello(a)amd.com>
gpiolib: acpi: Allow ignoring wake capability on pins that aren't in _AEI
Hui Wang <hui.wang(a)canonical.com>
dmaengine: imx-sdma: Fix a possible memory leak in sdma_transfer_init
Roderick Colenbrander <roderick(a)gaikai.com>
HID: playstation: sanity check DualSense calibration data.
José Expósito <jose.exposito89(a)gmail.com>
HID: uclogic: Add support for XP-PEN Deco 01 V2
Heiko Carstens <hca(a)linux.ibm.com>
s390: workaround invalid gcc-11 out of bounds read warning
Pavel Begunkov <asml.silence(a)gmail.com>
block: fix hctx checks for batch allocation
Hans de Goede <hdegoede(a)redhat.com>
ACPI: video: Add backlight=native DMI quirk for Acer Aspire 4810T
Jinyang He <hejinyang(a)loongson.cn>
LoongArch: Get frame info in unwind_start() when regs is not available
Yu Kuai <yukuai3(a)huawei.com>
blk-cgroup: fix missing pd_online_fn() while activating policy
Jingbo Xu <jefflexu(a)linux.alibaba.com>
erofs: clean up parsing of fscache related options
Mark Brown <broonie(a)kernel.org>
kselftest: Fix error message for unconfigured LLVM builds
Arnd Bergmann <arnd(a)arndb.de>
ARM: omap1: fix building gpio15xx
Dominik Kobinski <dominikkobinski314(a)gmail.com>
arm64: dts: msm8994-angler: fix the memory map
Sriram R <quic_srirrama(a)quicinc.com>
mac80211: Fix MLO address translation for multiple bss case
Siddh Raman Pant <code(a)siddh.me>
erofs/zmap.c: Fix incorrect offset calculation
Hao Sun <sunhao.th(a)gmail.com>
bpf: Skip task with pid=1 in send_signal_common()
Cristian Marussi <cristian.marussi(a)arm.com>
firmware: arm_scmi: Clear stale xfer->hdr.status
Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
arm64: dts: imx8mq-thor96: fix no-mmc property for SDHCI
Geert Uytterhoeven <geert+renesas(a)glider.be>
arm64: dts: freescale: Fix pca954x i2c-mux node names
Geert Uytterhoeven <geert+renesas(a)glider.be>
ARM: dts: vf610: Fix pca9548 i2c-mux node names
Geert Uytterhoeven <geert+renesas(a)glider.be>
ARM: dts: imx: Fix pca9547 i2c-mux node name
-------------
Diffstat:
Makefile | 4 +--
arch/arm/boot/dts/imx53-ppd.dts | 2 +-
arch/arm/boot/dts/vf610-zii-dev-rev-b.dts | 2 +-
arch/arm/boot/dts/vf610-zii-dev-rev-c.dts | 2 +-
arch/arm/mach-omap1/gpio15xx.c | 1 +
arch/arm64/boot/dts/freescale/fsl-ls1012a-qds.dts | 2 +-
arch/arm64/boot/dts/freescale/fsl-ls1043a-qds.dts | 2 +-
arch/arm64/boot/dts/freescale/fsl-ls1046a-qds.dts | 2 +-
arch/arm64/boot/dts/freescale/fsl-ls1088a-qds.dts | 2 +-
arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts | 2 +-
.../arm64/boot/dts/freescale/fsl-ls1088a-ten64.dts | 2 +-
arch/arm64/boot/dts/freescale/fsl-ls208xa-qds.dtsi | 2 +-
arch/arm64/boot/dts/freescale/fsl-ls208xa-rdb.dtsi | 2 +-
.../arm64/boot/dts/freescale/fsl-lx2160a-cex7.dtsi | 2 +-
.../boot/dts/freescale/imx8mm-nitrogen-r2.dts | 2 +-
arch/arm64/boot/dts/freescale/imx8mq-nitrogen.dts | 4 +--
arch/arm64/boot/dts/freescale/imx8mq-thor96.dts | 4 +--
arch/arm64/boot/dts/freescale/imx8qxp-mek.dts | 2 +-
.../dts/qcom/msm8994-huawei-angler-rev-101.dts | 19 +++++++++++--
arch/loongarch/kernel/process.c | 12 ++------
arch/loongarch/kernel/unwind_guess.c | 6 ++++
arch/loongarch/kernel/unwind_prologue.c | 16 +++++++++--
arch/s390/kernel/setup.c | 5 ++--
block/blk-cgroup.c | 4 +++
block/blk-mq.c | 6 +++-
drivers/acpi/video_detect.c | 8 ++++++
drivers/dma/imx-sdma.c | 4 ++-
drivers/firmware/arm_scmi/driver.c | 2 ++
drivers/gpio/gpiolib-acpi.c | 20 ++++++++++++--
drivers/hid/hid-ids.h | 1 +
drivers/hid/hid-playstation.c | 32 ++++++++++++++++++++++
drivers/hid/hid-uclogic-core.c | 2 ++
drivers/hid/hid-uclogic-params.c | 2 ++
drivers/nvme/host/apple.c | 6 ++--
fs/cifs/dfs_cache.c | 6 ++--
fs/erofs/super.c | 13 ++++-----
fs/erofs/zmap.c | 10 +++++--
kernel/trace/bpf_trace.c | 3 ++
net/core/skbuff.c | 5 ++--
net/mac80211/rx.c | 3 ++
net/mctp/af_mctp.c | 6 ++++
rust/kernel/print.rs | 29 ++++++++++++--------
tools/testing/selftests/lib.mk | 2 +-
43 files changed, 190 insertions(+), 73 deletions(-)
On some Logitech mice, such as the G903, and possibly the G403, the HID
events are generated on a different interface to the HID++ one.
If we enable hi-res through the HID++ interface, the HID interface
wouldn't know anything about it, and handle the events as if they were
regular scroll events, making the mouse unusable.
Disable hi-res scrolling on those devices until we implement scroll
events through HID++.
Signed-off-by: Bastien Nocera <hadess(a)hadess.net>
Tested-by: Tobias Klausmann <klausman(a)schwarzvogel.de>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=216885
Fixes: 908d325e1665 ("HID: logitech-hidpp: Detect hi-res scrolling support")
Cc: stable(a)vger.kernel.org
---
drivers/hid/hid-logitech-hidpp.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
index abf2c95e4d0b..9c1ee8e91e0c 100644
--- a/drivers/hid/hid-logitech-hidpp.c
+++ b/drivers/hid/hid-logitech-hidpp.c
@@ -3978,7 +3978,8 @@ static void hidpp_connect_event(struct hidpp_device *hidpp)
}
hidpp_initialize_battery(hidpp);
- hidpp_initialize_hires_scroll(hidpp);
+ if (!hid_is_usb(hidpp->hid_dev))
+ hidpp_initialize_hires_scroll(hidpp);
/* forward current battery state */
if (hidpp->capabilities & HIDPP_CAPABILITY_HIDPP10_BATTERY) {
--
2.39.1
From: Louis Rannou <lrannou(a)baylibre.com>
spi_nor_set_erase_type() was used either to set or to mask out an erase
type. When we used it to mask out an erase type a shift-out-of-bounds
was hit:
UBSAN: shift-out-of-bounds in drivers/mtd/spi-nor/core.c:2237:24
shift exponent 4294967295 is too large for 32-bit type 'int'
The setting of the size_{shift, mask} and of the opcode are unnecessary
when the erase size is zero, as throughout the code just the erase size
is considered to determine whether an erase type is supported or not.
Setting the opcode to 0xFF was wrong too as nobody guarantees that 0xFF
is an unused opcode. Thus when masking out an erase type, just set the
erase size to zero. This will fix the shift-out-of-bounds.
Fixes: 5390a8df769e ("mtd: spi-nor: add support to non-uniform SFDP SPI NOR flash memories")
Cc: stable(a)vger.kernel.org
Reported-by: Alexander Stein <Alexander.Stein(a)tq-group.com>
Signed-off-by: Louis Rannou <lrannou(a)baylibre.com>
[ta: refine changes, new commit message, fix compilation error]
Signed-off-by: Tudor Ambarus <tudor.ambarus(a)linaro.org>
---
drivers/mtd/spi-nor/core.c | 9 +++++++++
drivers/mtd/spi-nor/core.h | 1 +
drivers/mtd/spi-nor/sfdp.c | 4 ++--
3 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c
index 247d1014879a..22cb18b6c941 100644
--- a/drivers/mtd/spi-nor/core.c
+++ b/drivers/mtd/spi-nor/core.c
@@ -2025,6 +2025,15 @@ void spi_nor_set_erase_type(struct spi_nor_erase_type *erase, u32 size,
erase->size_mask = (1 << erase->size_shift) - 1;
}
+/**
+ * spi_nor_mask_erase_type() - mask out an SPI NOR erase type
+ * @erase: pointer to a structure that describes an SPI NOR erase type
+ */
+void spi_nor_mask_erase_type(struct spi_nor_erase_type *erase)
+{
+ erase->size = 0;
+}
+
/**
* spi_nor_init_uniform_erase_map() - Initialize uniform erase map
* @map: the erase map of the SPI NOR
diff --git a/drivers/mtd/spi-nor/core.h b/drivers/mtd/spi-nor/core.h
index f6d012e1f681..25423225c29d 100644
--- a/drivers/mtd/spi-nor/core.h
+++ b/drivers/mtd/spi-nor/core.h
@@ -681,6 +681,7 @@ void spi_nor_set_pp_settings(struct spi_nor_pp_command *pp, u8 opcode,
void spi_nor_set_erase_type(struct spi_nor_erase_type *erase, u32 size,
u8 opcode);
+void spi_nor_mask_erase_type(struct spi_nor_erase_type *erase);
struct spi_nor_erase_region *
spi_nor_region_next(struct spi_nor_erase_region *region);
void spi_nor_init_uniform_erase_map(struct spi_nor_erase_map *map,
diff --git a/drivers/mtd/spi-nor/sfdp.c b/drivers/mtd/spi-nor/sfdp.c
index fd4daf8fa5df..298ab5e53a8c 100644
--- a/drivers/mtd/spi-nor/sfdp.c
+++ b/drivers/mtd/spi-nor/sfdp.c
@@ -875,7 +875,7 @@ static int spi_nor_init_non_uniform_erase_map(struct spi_nor *nor,
*/
for (i = 0; i < SNOR_ERASE_TYPE_MAX; i++)
if (!(regions_erase_type & BIT(erase[i].idx)))
- spi_nor_set_erase_type(&erase[i], 0, 0xFF);
+ spi_nor_mask_erase_type(&erase[i]);
return 0;
}
@@ -1089,7 +1089,7 @@ static int spi_nor_parse_4bait(struct spi_nor *nor,
erase_type[i].opcode = (dwords[SFDP_DWORD(2)] >>
erase_type[i].idx * 8) & 0xFF;
else
- spi_nor_set_erase_type(&erase_type[i], 0u, 0xFF);
+ spi_nor_mask_erase_type(&erase_type[i]);
}
/*
--
2.34.1
This is an automatic generated email to let you know that the following patch were queued:
Subject: media: ipu3-cio2: Fix PM runtime usage_count in driver unbind
Author: Sakari Ailus <sakari.ailus(a)linux.intel.com>
Date: Wed Dec 21 09:30:11 2022 +0100
Get the PM runtime usage_count and forbid PM runtime at driver unbind. The
opposite is being done in probe() already.
Fixes: commit c2a6a07afe4a ("media: intel-ipu3: cio2: add new MIPI-CSI2 driver")
Cc: stable(a)vger.kernel.org # for >= 4.16
Signed-off-by: Sakari Ailus <sakari.ailus(a)linux.intel.com>
Reviewed-by: Bingbu Cao <bingbu.cao(a)intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
drivers/media/pci/intel/ipu3/ipu3-cio2-main.c | 3 +++
1 file changed, 3 insertions(+)
---
diff --git a/drivers/media/pci/intel/ipu3/ipu3-cio2-main.c b/drivers/media/pci/intel/ipu3/ipu3-cio2-main.c
index 390bd5ea3472..3b76a9d0383a 100644
--- a/drivers/media/pci/intel/ipu3/ipu3-cio2-main.c
+++ b/drivers/media/pci/intel/ipu3/ipu3-cio2-main.c
@@ -1843,6 +1843,9 @@ static void cio2_pci_remove(struct pci_dev *pci_dev)
v4l2_device_unregister(&cio2->v4l2_dev);
media_device_cleanup(&cio2->media_dev);
mutex_destroy(&cio2->lock);
+
+ pm_runtime_forbid(&pci_dev->dev);
+ pm_runtime_get_noresume(&pci_dev->dev);
}
static int __maybe_unused cio2_runtime_suspend(struct device *dev)
From: Eric Biggers <ebiggers(a)google.com>
When writing a page from an encrypted file that is using
filesystem-layer encryption (not inline encryption), f2fs encrypts the
pagecache page into a bounce page, then writes the bounce page.
It also passes the bounce page to wbc_account_cgroup_owner(). That's
incorrect, because the bounce page is a newly allocated temporary page
that doesn't have the memory cgroup of the original pagecache page.
This makes wbc_account_cgroup_owner() not account the I/O to the owner
of the pagecache page as it should.
Fix this by always passing the pagecache page to
wbc_account_cgroup_owner().
Fixes: 578c647879f7 ("f2fs: implement cgroup writeback support")
Cc: stable(a)vger.kernel.org
Reported-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Signed-off-by: Eric Biggers <ebiggers(a)google.com>
---
fs/f2fs/data.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 754841bce389f..8a636500db0ef 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -739,7 +739,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
}
if (fio->io_wbc && !is_read_io(fio->op))
- wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
+ wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
inc_page_count(fio->sbi, is_read_io(fio->op) ?
__read_io_type(page) : WB_DATA_TYPE(fio->page));
@@ -949,7 +949,7 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
}
if (fio->io_wbc)
- wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
+ wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
inc_page_count(fio->sbi, WB_DATA_TYPE(page));
@@ -1023,7 +1023,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
}
if (fio->io_wbc)
- wbc_account_cgroup_owner(fio->io_wbc, bio_page, PAGE_SIZE);
+ wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
io->last_block_in_bio = fio->new_blkaddr;
base-commit: de6b3a5e09b29c014bd04044b023896107cfa2ee
--
2.39.1
This series backports patches in order to resolve the issue discussed here:
https://lore.kernel.org/selinux/389334fe-6e12-96b2-6ce9-9f0e8fcb85bf@huawei…
This required backporting the non-blocking LSM policy update mechanism
prerequisite patches.
Paul Moore has reviewed the non-blocking LSM policy update backport,
saying the SELinux part look good. The patchset was also reviewed by Mimi
Zohar
and everything looks ok.
I've tested the patches against the said issue and can confirm that the
issue is fixed. I've also run the LTP testcases on the fixed IMA and all
tests are passed.
This is a re-send of the original patchset as the original patchset
might have a faulty cover letter. The original patchset could be found
here:
https://patchwork.kernel.org/project/linux-integrity/list/?series=709367
GUO Zihua (1):
ima: Handle -ESTALE returned by ima_filter_rule_match()
Janne Karhunen (2):
LSM: switch to blocking policy update notifiers
ima: use the lsm policy update notifier
drivers/infiniband/core/device.c | 4 +-
include/linux/security.h | 12 +--
security/integrity/ima/ima.h | 2 +
security/integrity/ima/ima_main.c | 8 ++
security/integrity/ima/ima_policy.c | 151 ++++++++++++++++++++++------
security/security.c | 23 +++--
security/selinux/hooks.c | 2 +-
security/selinux/selinuxfs.c | 2 +-
8 files changed, 155 insertions(+), 49 deletions(-)
--
2.17.1
--
Hello,
I tried e-mailing you more than twice but my email bounced back
failure, Note this, soonest you receive this email revert to me before
I deliver the message it's importunate, pressing, crucial. Await your
response.
Best regards
Dr. Moustapha Sanon
According to schematics it is PF15 and not PF14 (MIC_SW_EN).
Seems as if it was hidden and not noticed during testing since
there is no sound DT node.
Fixes: 158c774d3c64 ("MIPS: Ingenic: Add missing nodes for Ingenic SoCs and boards.")
Cc: stable(a)vger.kernel.org
Signed-off-by: H. Nikolaus Schaller <hns(a)goldelico.com>
Acked-by: Paul Cercueil <paul(a)crapouillou.net>
---
arch/mips/boot/dts/ingenic/ci20.dts | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts
index 9819abb2465dd..a276488c0f752 100644
--- a/arch/mips/boot/dts/ingenic/ci20.dts
+++ b/arch/mips/boot/dts/ingenic/ci20.dts
@@ -115,7 +115,7 @@ otg_power: fixedregulator@2 {
regulator-min-microvolt = <5000000>;
regulator-max-microvolt = <5000000>;
- gpio = <&gpf 14 GPIO_ACTIVE_LOW>;
+ gpio = <&gpf 15 GPIO_ACTIVE_LOW>;
enable-active-high;
};
};
--
2.38.1
Hallo gelukkige vriend,
Hallo mijn gelukkige vriend, ik ben de heer Charles W. Jackson Jr., de
megawinnaar van $ 344,6 miljoen in de Mega Millions Jackpot, ik doneer
aan 5 willekeurige individuen, als je deze e-mail ontvangt, is je e-mail
geselecteerd na een draaibal . Ik heb het grootste deel van mijn
vermogen verdeeld over een aantal goede doelen en organisaties. Ik heb
vrijwillig besloten om de som van $ 3 miljoen USD aan jou of je
organisatie te doneren als een van de geselecteerde 5, je kunt mijn
winst verifiëren via de onderstaande YouTube-pagina.
BEKIJK MIJ HIER: https://www.youtube.com/watch?v=0MUR8QEIMQI
Deze donatie van $ 3 miljoen USD is gedaan om u in staat te stellen uw
persoonlijke problemen te versterken en genereus de hand te reiken aan
de minst bevoorrechte, verweesde en liefdadigheidsorganisaties in uw
gemeenschap.
Wat voor mij het belangrijkste is, is dat u de gedoneerde fondsen op de
beste manier toewijst die u, uw familie, vrienden ten goede komt en om
de behoeftigen in uw directe gemeenschap te helpen.
DIT IS JE DONATIECODE: DON207152
Stuur je DONATIECODE naar deze e-mail door simpelweg zo snel mogelijk op
antwoord op dit bericht te klikken, zodat we de donatieprocedure snel
kunnen afronden.
E-mailadres contact: charlesjacksonj1(a)gmail.com
Ik hoop u en uw gezin dit jaar gelukkig te maken.
Groeten,
Charles W.Jackson Jr
blit_x and blit_y are u32, so fbcon currently cannot support fonts
larger than 32x32.
The 32x32 case also needs shifting an unsigned int, to properly set bit
31, otherwise we get "UBSAN: shift-out-of-bounds in fbcon_set_font",
as reported on:
http://lore.kernel.org/all/IA1PR07MB98308653E259A6F2CE94A4AFABCE9@IA1PR07MB…
Kernel Branch: 6.2.0-rc5-next-20230124
Kernel config: https://drive.google.com/file/d/1F-LszDAizEEH0ZX0HcSR06v5q8FPl2Uv/view?usp=…
Reproducer: https://drive.google.com/file/d/1mP1jcLBY7vWCNM60OMf-ogw-urQRjNrm/view?usp=…
Reported-by: Sanan Hasanov <sanan.hasanov(a)Knights.ucf.edu>
Signed-off-by: Samuel Thibault <samuel.thibault(a)ens-lyon.org>
Fixes: 2d2699d98492 ("fbcon: font setting should check limitation of driver")
Cc: stable(a)vger.kernel.org
---
v1 -> v2:
- Use BIT macro instead of fixing bit test by hand.
- Add Fixes and Cc: stable headers.
Index: linux-6.0/drivers/video/fbdev/core/fbcon.c
===================================================================
--- linux-6.0.orig/drivers/video/fbdev/core/fbcon.c
+++ linux-6.0/drivers/video/fbdev/core/fbcon.c
@@ -2489,9 +2489,12 @@ static int fbcon_set_font(struct vc_data
h > FBCON_SWAP(info->var.rotate, info->var.yres, info->var.xres))
return -EINVAL;
+ if (font->width > 32 || font->height > 32)
+ return -EINVAL;
+
/* Make sure drawing engine can handle the font */
- if (!(info->pixmap.blit_x & (1 << (font->width - 1))) ||
- !(info->pixmap.blit_y & (1 << (font->height - 1))))
+ if (!(info->pixmap.blit_x & BIT(font->width - 1)) ||
+ !(info->pixmap.blit_y & BIT(font->height - 1)))
return -EINVAL;
/* Make sure driver can handle the font length */
This is the start of the stable review cycle for the 5.10.167 release.
There are 9 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Sun, 05 Feb 2023 10:09:58 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.10.167-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.10.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.10.167-rc1
Yan Zhai <yan(a)cloudflare.com>
net: fix NULL pointer in skb_segment_list
Soenke Huster <soenke.huster(a)eknoes.de>
Bluetooth: fix null ptr deref on hci_sync_conn_complete_evt
Dave Hansen <dave.hansen(a)intel.com>
ACPI: processor idle: Practically limit "Dummy wait" workaround to old Intel systems
Hui Wang <hui.wang(a)canonical.com>
dmaengine: imx-sdma: Fix a possible memory leak in sdma_transfer_init
Yu Kuai <yukuai3(a)huawei.com>
blk-cgroup: fix missing pd_online_fn() while activating policy
Hao Sun <sunhao.th(a)gmail.com>
bpf: Skip task with pid=1 in send_signal_common()
Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
arm64: dts: imx8mq-thor96: fix no-mmc property for SDHCI
Geert Uytterhoeven <geert+renesas(a)glider.be>
ARM: dts: vf610: Fix pca9548 i2c-mux node names
Geert Uytterhoeven <geert+renesas(a)glider.be>
ARM: dts: imx: Fix pca9547 i2c-mux node name
-------------
Diffstat:
Makefile | 4 ++--
arch/arm/boot/dts/imx53-ppd.dts | 2 +-
arch/arm/boot/dts/vf610-zii-dev-rev-b.dts | 2 +-
arch/arm/boot/dts/vf610-zii-dev-rev-c.dts | 2 +-
arch/arm64/boot/dts/freescale/imx8mq-thor96.dts | 4 ++--
block/blk-cgroup.c | 4 ++++
drivers/acpi/processor_idle.c | 23 ++++++++++++++++++++---
drivers/dma/imx-sdma.c | 4 +++-
kernel/trace/bpf_trace.c | 3 +++
net/bluetooth/hci_event.c | 13 +++++++++++++
net/core/skbuff.c | 5 ++---
11 files changed, 52 insertions(+), 14 deletions(-)
Dear.Friend
I am Mrs. Anain kaimova. I am sending this brief letter to solicit
your partnership to transfer a sum of 11.9 Million Dollars into your
reliable account as my business partner. However, it's my urgent need
for foreign partner that made me to contact you for this transaction.
Further details of the transfer will be forwarded to you if you are
ready to assist me.
Best Regards.
Mrs.Anain kaimova
The quilt patch titled
Subject: aio: fix mremap after fork null-deref
has been removed from the -mm tree. Its filename was
aio-fix-mremap-after-fork-null-deref.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Seth Jenkins <sethjenkins(a)google.com>
Subject: aio: fix mremap after fork null-deref
Date: Tue, 31 Jan 2023 12:25:55 -0500
Commit e4a0d3e720e7 ("aio: Make it possible to remap aio ring") introduced
a null-deref if mremap is called on an old aio mapping after fork as
mm->ioctx_table will be set to NULL.
[jmoyer(a)redhat.com: fix 80 column issue]
Link: https://lkml.kernel.org/r/x49sffq4nvg.fsf@segfault.boston.devel.redhat.com
Fixes: e4a0d3e720e7 ("aio: Make it possible to remap aio ring")
Signed-off-by: Seth Jenkins <sethjenkins(a)google.com>
Signed-off-by: Jeff Moyer <jmoyer(a)redhat.com>
Cc: Alexander Viro <viro(a)zeniv.linux.org.uk>
Cc: Benjamin LaHaise <bcrl(a)kvack.org>
Cc: Jann Horn <jannh(a)google.com>
Cc: Pavel Emelyanov <xemul(a)parallels.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
--- a/fs/aio.c~aio-fix-mremap-after-fork-null-deref
+++ a/fs/aio.c
@@ -361,6 +361,9 @@ static int aio_ring_mremap(struct vm_are
spin_lock(&mm->ioctx_lock);
rcu_read_lock();
table = rcu_dereference(mm->ioctx_table);
+ if (!table)
+ goto out_unlock;
+
for (i = 0; i < table->nr; i++) {
struct kioctx *ctx;
@@ -374,6 +377,7 @@ static int aio_ring_mremap(struct vm_are
}
}
+out_unlock:
rcu_read_unlock();
spin_unlock(&mm->ioctx_lock);
return res;
_
Patches currently in -mm which might be from sethjenkins(a)google.com are
From: Peter Chen <peter.chen(a)nxp.com>
commit 4bb4fc0dbfa23acab9b762949b91ffd52106fe4b upstream
With this change, there will be a wakeup entry at /sys/../power/wakeup,
and the user could use this entry to choose whether enable xhci wakeup
features (wake up system from suspend) or not.
Tested-by: Matthias Kaehlcke <mka(a)chromium.org>
Reviewed-by: Matthias Kaehlcke <mka(a)chromium.org>
Signed-off-by: Peter Chen <peter.chen(a)nxp.com>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
Link: https://lore.kernel.org/r/20200918131752.16488-6-mathias.nyman@linux.intel.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Florian Fainelli <f.fainelli(a)gmail.com>
---
drivers/usb/host/xhci-plat.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index 2a73592908e1..3d20fe9c415f 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -252,7 +252,7 @@ static int xhci_plat_probe(struct platform_device *pdev)
*priv = *priv_match;
}
- device_wakeup_enable(hcd->self.controller);
+ device_set_wakeup_capable(&pdev->dev, true);
xhci->clk = clk;
xhci->main_hcd = hcd;
--
2.34.1
On 2/3/23 16:42, Vinayak Hegde wrote:
> Hi Everyone,
> I was going through A Beginner's Guide to Linux Kernel Development (LFD103) course and was trying out a few things
> mistakenly sending this mail.
>
Hi and welcome to LKML!
Some netiquette tips:
* Don't top-post when replying; reply inline with appropriate context
instead. Some people (like me) tends to cut quoted reply below
if you top-post.
* Don't send HTML emails - many kernel development lists (including
LKML) don't like them for being common spam method.
* Wait for at least a day before replying - people may respond to
your message at different pace.
* Use git-send-email(1) to submit patches (see
Documentation/process/submitting-patches.rst for how to do that).
Regarding your patch, I think Greg has already bumped SUBLEVEL
whenever new stable release is made, so no need to send separate
patch just for that.
Thanks.
--
An old man doll... just what I always wanted! - Clara
From: Andrea Righi <andrea.righi(a)canonical.com>
Commit ebc5951eea499314f6fbbde20e295f1345c67330 upstream.
[ This fixes a performance issue we're seeing in AWS instances when
running swapoff and using the global readahead algorithm. For a
particular instance configuration, Without this fix I/O throughput
is very low during swapoff (about 15 MB/s) with this patch is
reaches 500 MB/s. Tested swapoff with different workloads with
this patch applied. 5.10 onwards already have this fix ]
In unuse_pte_range() we blindly swap-in pages without checking if the
swap entry is already present in the swap cache.
By doing this, the hit/miss ratio used by the swap readahead heuristic
is not properly updated and this leads to non-optimal performance during
swapoff.
Tracing the distribution of the readahead size returned by the swap
readahead heuristic during swapoff shows that a small readahead size is
used most of the time as if we had only misses (this happens both with
cluster and vma readahead), for example:
r::swapin_nr_pages(unsigned long offset):unsigned long:$retval
COUNT EVENT
36948 $retval = 8
44151 $retval = 4
49290 $retval = 1
527771 $retval = 2
Checking if the swap entry is present in the swap cache, instead, allows
to properly update the readahead statistics and the heuristic behaves in a
better way during swapoff, selecting a bigger readahead size:
r::swapin_nr_pages(unsigned long offset):unsigned long:$retval
COUNT EVENT
1618 $retval = 1
4960 $retval = 2
41315 $retval = 4
103521 $retval = 8
In terms of swapoff performance the result is the following:
Testing environment
===================
- Host:
CPU: 1.8GHz Intel Core i7-8565U (quad-core, 8MB cache)
HDD: PC401 NVMe SK hynix 512GB
MEM: 16GB
- Guest (kvm):
8GB of RAM
virtio block driver
16GB swap file on ext4 (/swapfile)
Test case
=========
- allocate 85% of memory
- `systemctl hibernate` to force all the pages to be swapped-out to the
swap file
- resume the system
- measure the time that swapoff takes to complete:
# /usr/bin/time swapoff /swapfile
Result (swapoff time)
======
5.6 vanilla 5.6 w/ this patch
----------- -----------------
cluster-readahead 22.09s 12.19s
vma-readahead 18.20s 15.33s
Conclusion
==========
The specific use case this patch is addressing is to improve swapoff
performance in cloud environments when a VM has been hibernated, resumed
and all the memory needs to be forced back to RAM by disabling swap.
This change allows to better exploits the advantages of the readahead
heuristic during swapoff and this improvement allows to to speed up the
resume process of such VMs.
[andrea.righi(a)canonical.com: update changelog]
Link: http://lkml.kernel.org/r/20200418084705.GA147642@xps-13
Signed-off-by: Andrea Righi <andrea.righi(a)canonical.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Reviewed-by: "Huang, Ying" <ying.huang(a)intel.com>
Cc: Minchan Kim <minchan(a)kernel.org>
Cc: Anchal Agarwal <anchalag(a)amazon.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Vineeth Remanan Pillai <vpillai(a)digitalocean.com>
Cc: Kelley Nielsen <kelleynnn(a)gmail.com>
Link: http://lkml.kernel.org/r/20200416180132.GB3352@xps-13
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
---
mm/swapfile.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index f6964212c6c8..fe5995c38ea4 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1950,10 +1950,14 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
pte_unmap(pte);
swap_map = &si->swap_map[offset];
- vmf.vma = vma;
- vmf.address = addr;
- vmf.pmd = pmd;
- page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, &vmf);
+ page = lookup_swap_cache(entry, vma, addr);
+ if (!page) {
+ vmf.vma = vma;
+ vmf.address = addr;
+ vmf.pmd = pmd;
+ page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
+ &vmf);
+ }
if (!page) {
if (*swap_map == 0 || *swap_map == SWAP_MAP_BAD)
goto try_next;
--
2.38.1
Hi,
I see a number of build failures in v4.19.y.queue.
Building arm:allmodconfig ... failed
--------------
drivers/memory/atmel-sdramc.c: In function 'atmel_ramc_probe':
drivers/memory/atmel-sdramc.c:62:23: error: implicit declaration of function 'devm_clk_get_enabled'
Building arm:imx_v6_v7_defconfig ... failed
--------------
Error log:
drivers/mmc/host/sdhci-esdhc-imx.c: In function 'sdhci_esdhc_imx_hwinit':
drivers/mmc/host/sdhci-esdhc-imx.c:1187:31: error: implicit declaration of function 'cqhci_readl'
drivers/mmc/host/sdhci-esdhc-imx.c:1187:52: error: 'CQHCI_IS' undeclared (first use in this function)
1187 | tmp = cqhci_readl(cq_host, CQHCI_IS);
drivers/mmc/host/sdhci-esdhc-imx.c:1188:25: error: implicit declaration of function 'cqhci_writel'; did you mean 'sdhci_writel'? [-Werror=implicit-function-declaration]
1188 | cqhci_writel(cq_host, tmp, CQHCI_IS);
drivers/mmc/host/sdhci-esdhc-imx.c:1189:47: error: 'CQHCI_HALT' undeclared (first use in this function)
1189 | cqhci_writel(cq_host, CQHCI_HALT, CQHCI_CTL);
| ^~~~~~~~~~
drivers/mmc/host/sdhci-esdhc-imx.c:1189:59: error: 'CQHCI_CTL' undeclared (first use in this function)
1189 | cqhci_writel(cq_host, CQHCI_HALT, CQHCI_CTL);
Other builds fail with the same errors.
Guenter