The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
bd97a59da6a8 ("nvme-auth: use workqueue dedicated to authentication")
aa36d711e945 ("nvme-auth: convert dhchap_auth_list to an array")
8d1c1904e947 ("nvme-auth: clear sensitive info right after authentication completes")
e481fc0a3777 ("nvme-auth: guarantee dhchap buffers under memory pressure")
b7d604cae8f6 ("nvme-auth: don't keep long lived 4k dhchap buffer")
bfc4068e1e55 ("nvme-auth: remove redundant if statement")
01604350e145 ("nvme-auth: don't override ctrl keys before validation")
193a8c7e5f1a ("nvme-auth: don't ignore key generation failures when initializing ctrl keys")
f6b182fbd5c6 ("nvme-auth: remove redundant buffer deallocations")
0c999e69c40a ("nvme-auth: rename authentication work elements")
0a7ce375f83f ("nvme-auth: rename __nvme_auth_[reset|free] to nvme_auth[reset|free]_dhchap")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From bd97a59da6a866e3dee5d2a2d582ec71dbbc84cd Mon Sep 17 00:00:00 2001
From: Shin'ichiro Kawasaki <shinichiro.kawasaki(a)wdc.com>
Date: Tue, 31 Jan 2023 18:26:44 +0900
Subject: [PATCH] nvme-auth: use workqueue dedicated to authentication
NVMe In-Band authentication uses two kinds of works: chap->auth_work and
ctrl->dhchap_auth_work. The latter work flushes or cancels the former
work. However, the both works are queued to the same workqueue nvme-wq.
It results in the lockdep WARNING as follows:
WARNING: possible recursive locking detected
6.2.0-rc4+ #1 Not tainted
--------------------------------------------
kworker/u16:7/69 is trying to acquire lock:
ffff902d52e65548 ((wq_completion)nvme-wq){+.+.}-{0:0}, at: start_flush_work+0x2c5/0x380
but task is already holding lock:
ffff902d52e65548 ((wq_completion)nvme-wq){+.+.}-{0:0}, at: process_one_work+0x210/0x410
To avoid the WARNING, introduce a new workqueue nvme-auth-wq dedicated
to chap->auth_work.
Reported-by: Daniel Wagner <dwagner(a)suse.de>
Link: https://lore.kernel.org/linux-nvme/20230130110802.paafkiipmitwtnwr@carbon.l…
Fixes: f50fff73d620 ("nvme: implement In-Band authentication")
Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki(a)wdc.com>
Tested-by: Daniel Wagner <dwagner(a)suse.de>
Reviewed-by: Hannes Reinecke <hare(a)suse.de>
Signed-off-by: Christoph Hellwig <hch(a)lst.de>
diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c
index 4424f53a8a0a..b57630d1d3b8 100644
--- a/drivers/nvme/host/auth.c
+++ b/drivers/nvme/host/auth.c
@@ -45,6 +45,8 @@ struct nvme_dhchap_queue_context {
int sess_key_len;
};
+struct workqueue_struct *nvme_auth_wq;
+
#define nvme_auth_flags_from_qid(qid) \
(qid == 0) ? 0 : BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED
#define nvme_auth_queue_from_qid(ctrl, qid) \
@@ -866,7 +868,7 @@ int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid)
chap = &ctrl->dhchap_ctxs[qid];
cancel_work_sync(&chap->auth_work);
- queue_work(nvme_wq, &chap->auth_work);
+ queue_work(nvme_auth_wq, &chap->auth_work);
return 0;
}
EXPORT_SYMBOL_GPL(nvme_auth_negotiate);
@@ -1008,10 +1010,15 @@ EXPORT_SYMBOL_GPL(nvme_auth_free);
int __init nvme_init_auth(void)
{
+ nvme_auth_wq = alloc_workqueue("nvme-auth-wq",
+ WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
+ if (!nvme_auth_wq)
+ return -ENOMEM;
+
nvme_chap_buf_cache = kmem_cache_create("nvme-chap-buf-cache",
CHAP_BUF_SIZE, 0, SLAB_HWCACHE_ALIGN, NULL);
if (!nvme_chap_buf_cache)
- return -ENOMEM;
+ goto err_destroy_workqueue;
nvme_chap_buf_pool = mempool_create(16, mempool_alloc_slab,
mempool_free_slab, nvme_chap_buf_cache);
@@ -1021,6 +1028,8 @@ int __init nvme_init_auth(void)
return 0;
err_destroy_chap_buf_cache:
kmem_cache_destroy(nvme_chap_buf_cache);
+err_destroy_workqueue:
+ destroy_workqueue(nvme_auth_wq);
return -ENOMEM;
}
@@ -1028,4 +1037,5 @@ void __exit nvme_exit_auth(void)
{
mempool_destroy(nvme_chap_buf_pool);
kmem_cache_destroy(nvme_chap_buf_cache);
+ destroy_workqueue(nvme_auth_wq);
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
fc546faa5595 ("powerpc/kexec_file: Count hot-pluggable memory in FDT estimate")
7294194b47e9 ("powerpc/kexec_file: Fix division by zero in extra size estimation")
340a4a9f8773 ("powerpc: Take in account addition CPU node when building kexec FDT")
886db32398ab ("powerpc/kexec_file: Restore FDT size estimation for kdump kernel")
3c985d31ad66 ("powerpc: Use common of_kexec_alloc_and_setup_fdt()")
e6635bab530d ("powerpc: Use ELF fields defined in 'struct kimage'")
2377c92e37fe ("powerpc/kexec_file: fix FDT size estimation for kdump kernel")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From fc546faa559538fb312c77e055243ece18ab3288 Mon Sep 17 00:00:00 2001
From: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Date: Tue, 31 Jan 2023 08:36:15 +0530
Subject: [PATCH] powerpc/kexec_file: Count hot-pluggable memory in FDT
estimate
On Systems where online memory is lesser compared to max memory, the
kexec_file_load system call may fail to load the kdump kernel with the
below errors:
"Failed to update fdt with linux,drconf-usable-memory property"
"Error setting up usable-memory property for kdump kernel"
This happens because the size estimation for usable memory properties
for the kdump kernel's FDT is based on the online memory whereas the
usable memory properties include max memory. In short, the hot-pluggable
memory is not accounted for while estimating the size of the usable
memory properties.
The issue is addressed by calculating usable memory property size using
max hotplug address instead of the last online memory address.
Fixes: 2377c92e37fe ("powerpc/kexec_file: fix FDT size estimation for kdump kernel")
Signed-off-by: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20230131030615.729894-1-sourabhjain@linux.ibm.com
diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
index 19d084682bc2..52085751f5f4 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -990,7 +990,7 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image)
* number of usable memory entries and use for FDT size estimation.
*/
if (drmem_lmb_size()) {
- usm_entries = ((memblock_end_of_DRAM() / drmem_lmb_size()) +
+ usm_entries = ((memory_hotplug_max() / drmem_lmb_size()) +
(2 * (resource_size(&crashk_res) / drmem_lmb_size())));
extra_size = (unsigned int)(usm_entries * sizeof(u64));
} else {
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
fc546faa5595 ("powerpc/kexec_file: Count hot-pluggable memory in FDT estimate")
7294194b47e9 ("powerpc/kexec_file: Fix division by zero in extra size estimation")
340a4a9f8773 ("powerpc: Take in account addition CPU node when building kexec FDT")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From fc546faa559538fb312c77e055243ece18ab3288 Mon Sep 17 00:00:00 2001
From: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Date: Tue, 31 Jan 2023 08:36:15 +0530
Subject: [PATCH] powerpc/kexec_file: Count hot-pluggable memory in FDT
estimate
On Systems where online memory is lesser compared to max memory, the
kexec_file_load system call may fail to load the kdump kernel with the
below errors:
"Failed to update fdt with linux,drconf-usable-memory property"
"Error setting up usable-memory property for kdump kernel"
This happens because the size estimation for usable memory properties
for the kdump kernel's FDT is based on the online memory whereas the
usable memory properties include max memory. In short, the hot-pluggable
memory is not accounted for while estimating the size of the usable
memory properties.
The issue is addressed by calculating usable memory property size using
max hotplug address instead of the last online memory address.
Fixes: 2377c92e37fe ("powerpc/kexec_file: fix FDT size estimation for kdump kernel")
Signed-off-by: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20230131030615.729894-1-sourabhjain@linux.ibm.com
diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
index 19d084682bc2..52085751f5f4 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -990,7 +990,7 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image)
* number of usable memory entries and use for FDT size estimation.
*/
if (drmem_lmb_size()) {
- usm_entries = ((memblock_end_of_DRAM() / drmem_lmb_size()) +
+ usm_entries = ((memory_hotplug_max() / drmem_lmb_size()) +
(2 * (resource_size(&crashk_res) / drmem_lmb_size())));
extra_size = (unsigned int)(usm_entries * sizeof(u64));
} else {
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
fc546faa5595 ("powerpc/kexec_file: Count hot-pluggable memory in FDT estimate")
7294194b47e9 ("powerpc/kexec_file: Fix division by zero in extra size estimation")
340a4a9f8773 ("powerpc: Take in account addition CPU node when building kexec FDT")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From fc546faa559538fb312c77e055243ece18ab3288 Mon Sep 17 00:00:00 2001
From: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Date: Tue, 31 Jan 2023 08:36:15 +0530
Subject: [PATCH] powerpc/kexec_file: Count hot-pluggable memory in FDT
estimate
On Systems where online memory is lesser compared to max memory, the
kexec_file_load system call may fail to load the kdump kernel with the
below errors:
"Failed to update fdt with linux,drconf-usable-memory property"
"Error setting up usable-memory property for kdump kernel"
This happens because the size estimation for usable memory properties
for the kdump kernel's FDT is based on the online memory whereas the
usable memory properties include max memory. In short, the hot-pluggable
memory is not accounted for while estimating the size of the usable
memory properties.
The issue is addressed by calculating usable memory property size using
max hotplug address instead of the last online memory address.
Fixes: 2377c92e37fe ("powerpc/kexec_file: fix FDT size estimation for kdump kernel")
Signed-off-by: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20230131030615.729894-1-sourabhjain@linux.ibm.com
diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
index 19d084682bc2..52085751f5f4 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -990,7 +990,7 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image)
* number of usable memory entries and use for FDT size estimation.
*/
if (drmem_lmb_size()) {
- usm_entries = ((memblock_end_of_DRAM() / drmem_lmb_size()) +
+ usm_entries = ((memory_hotplug_max() / drmem_lmb_size()) +
(2 * (resource_size(&crashk_res) / drmem_lmb_size())));
extra_size = (unsigned int)(usm_entries * sizeof(u64));
} else {
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
98d0219e043e ("powerpc/64s/radix: Fix crash with unaligned relocated kernel")
b150a4d12b91 ("powerpc/vmlinux.lds: Add an explicit symbol for the SRWX boundary")
331771e836e6 ("powerpc/vmlinux.lds: Ensure STRICT_ALIGN_SIZE is at least page aligned")
2a0fb3c155c9 ("powerpc/32: Set an IBAT covering up to _einittext during init")
c4a22611bf6c ("powerpc/603: Use SPRN_SDR1 to store the pgdir phys address")
035b19a15a98 ("powerpc/32s: Always map kernel text and rodata with BATs")
11522448e641 ("powerpc/603: Always fault when _PAGE_ACCESSED is not set")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
850659392abc ("powerpc/reg.h: delete duplicated word")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 98d0219e043e09013e883eacde3b93e0b2bf944d Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe(a)ellerman.id.au>
Date: Tue, 10 Jan 2023 23:47:52 +1100
Subject: [PATCH] powerpc/64s/radix: Fix crash with unaligned relocated kernel
If a relocatable kernel is loaded at an address that is not 2MB aligned
and told not to relocate to zero, the kernel can crash due to
mark_rodata_ro() incorrectly changing some read-write data to read-only.
Scenarios where the misalignment can occur are when the kernel is
loaded by kdump or using the RELOCATABLE_TEST config option.
Example crash with the kernel loaded at 5MB:
Run /sbin/init as init process
BUG: Unable to handle kernel data access on write at 0xc000000000452000
Faulting instruction address: 0xc0000000005b6730
Oops: Kernel access of bad area, sig: 11 [#1]
LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
CPU: 1 PID: 1 Comm: init Not tainted 6.2.0-rc1-00011-g349188be4841 #166
Hardware name: IBM pSeries (emulated by qemu) POWER9 (raw) 0x4e1202 0xf000005 of:SLOF,git-5b4c5a hv:linux,kvm pSeries
NIP: c0000000005b6730 LR: c000000000ae9ab8 CTR: 0000000000000380
REGS: c000000004503250 TRAP: 0300 Not tainted (6.2.0-rc1-00011-g349188be4841)
MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE> CR: 44288480 XER: 00000000
CFAR: c0000000005b66ec DAR: c000000000452000 DSISR: 0a000000 IRQMASK: 0
...
NIP memset+0x68/0x104
LR zero_user_segments.constprop.0+0xa8/0xf0
Call Trace:
ext4_mpage_readpages+0x7f8/0x830
ext4_readahead+0x48/0x60
read_pages+0xb8/0x380
page_cache_ra_unbounded+0x19c/0x250
filemap_fault+0x58c/0xae0
__do_fault+0x60/0x100
__handle_mm_fault+0x1230/0x1a40
handle_mm_fault+0x120/0x300
___do_page_fault+0x20c/0xa80
do_page_fault+0x30/0xc0
data_access_common_virt+0x210/0x220
This happens because mark_rodata_ro() tries to change permissions on the
range _stext..__end_rodata, but _stext sits in the middle of the 2MB
page from 4MB to 6MB:
radix-mmu: Mapped 0x0000000000000000-0x0000000000200000 with 2.00 MiB pages (exec)
radix-mmu: Mapped 0x0000000000200000-0x0000000000400000 with 2.00 MiB pages
radix-mmu: Mapped 0x0000000000400000-0x0000000002400000 with 2.00 MiB pages (exec)
The logic that changes the permissions assumes the linear mapping was
split correctly at boot, so it marks the entire 2MB page read-only. That
leads to the write fault above.
To fix it, the boot time mapping logic needs to consider that if the
kernel is running at a non-zero address then _stext is a boundary where
it must split the mapping.
That leads to the mapping being split correctly, allowing the rodata
permission change to take happen correctly, with no spillover:
radix-mmu: Mapped 0x0000000000000000-0x0000000000200000 with 2.00 MiB pages (exec)
radix-mmu: Mapped 0x0000000000200000-0x0000000000400000 with 2.00 MiB pages
radix-mmu: Mapped 0x0000000000400000-0x0000000000500000 with 64.0 KiB pages
radix-mmu: Mapped 0x0000000000500000-0x0000000000600000 with 64.0 KiB pages (exec)
radix-mmu: Mapped 0x0000000000600000-0x0000000002400000 with 2.00 MiB pages (exec)
If the kernel is loaded at a 2MB aligned address, the mapping continues
to use 2MB pages as before:
radix-mmu: Mapped 0x0000000000000000-0x0000000000200000 with 2.00 MiB pages (exec)
radix-mmu: Mapped 0x0000000000200000-0x0000000000400000 with 2.00 MiB pages
radix-mmu: Mapped 0x0000000000400000-0x0000000002c00000 with 2.00 MiB pages (exec)
radix-mmu: Mapped 0x0000000002c00000-0x0000000100000000 with 2.00 MiB pages
Fixes: c55d7b5e6426 ("powerpc: Remove STRICT_KERNEL_RWX incompatibility with RELOCATABLE")
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20230110124753.1325426-1-mpe@ellerman.id.au
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index cac727b01799..5a2384ed1727 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -262,6 +262,17 @@ print_mapping(unsigned long start, unsigned long end, unsigned long size, bool e
static unsigned long next_boundary(unsigned long addr, unsigned long end)
{
#ifdef CONFIG_STRICT_KERNEL_RWX
+ unsigned long stext_phys;
+
+ stext_phys = __pa_symbol(_stext);
+
+ // Relocatable kernel running at non-zero real address
+ if (stext_phys != 0) {
+ // Start of relocated kernel text is a rodata boundary
+ if (addr < stext_phys)
+ return stext_phys;
+ }
+
if (addr < __pa_symbol(__srwx_boundary))
return __pa_symbol(__srwx_boundary);
#endif