From: Xu Lu luxu.kernel@bytedance.com
[ Upstream commit f754f27e98f88428aaf6be6e00f5cbce97f62d4b ]
In sparse vmemmap model, the virtual address of vmemmap is calculated as: ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT)). And the struct page's va can be calculated with an offset: (vmemmap + (pfn)).
However, when initializing struct pages, kernel actually starts from the first page from the same section that phys_ram_base belongs to. If the first page's physical address is not (phys_ram_base >> PAGE_SHIFT), then we get an va below VMEMMAP_START when calculating va for it's struct page.
For example, if phys_ram_base starts from 0x82000000 with pfn 0x82000, the first page in the same section is actually pfn 0x80000. During init_unavailable_range(), we will initialize struct page for pfn 0x80000 with virtual address ((struct page *)VMEMMAP_START - 0x2000), which is below VMEMMAP_START as well as PCI_IO_END.
This commit fixes this bug by introducing a new variable 'vmemmap_start_pfn' which is aligned with memory section size and using it to calculate vmemmap address instead of phys_ram_base.
Fixes: a11dd49dcb93 ("riscv: Sparse-Memory/vmemmap out-of-bounds fix") Signed-off-by: Xu Lu luxu.kernel@bytedance.com Reviewed-by: Alexandre Ghiti alexghiti@rivosinc.com Tested-by: Björn Töpel bjorn@rivosinc.com Reviewed-by: Björn Töpel bjorn@rivosinc.com Link: https://lore.kernel.org/r/20241209122617.53341-1-luxu.kernel@bytedance.com Signed-off-by: Palmer Dabbelt palmer@rivosinc.com Signed-off-by: Zhaoyang Li lizy04@hust.edu.cn --- arch/riscv/include/asm/page.h | 1 + arch/riscv/include/asm/pgtable.h | 2 +- arch/riscv/mm/init.c | 17 ++++++++++++++++- 3 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 86048c60f700..fd861ac47a89 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -115,6 +115,7 @@ struct kernel_mapping {
extern struct kernel_mapping kernel_map; extern phys_addr_t phys_ram_base; +extern unsigned long vmemmap_start_pfn;
#define is_kernel_mapping(x) \ ((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size)) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 7d1688f850c3..bb19a643c5c2 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -79,7 +79,7 @@ * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled. */ -#define vmemmap ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT)) +#define vmemmap ((struct page *)VMEMMAP_START - vmemmap_start_pfn)
#define PCI_IO_SIZE SZ_16M #define PCI_IO_END VMEMMAP_START diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index ba2210b553f9..72b3462babbf 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -22,6 +22,7 @@ #include <linux/hugetlb.h>
#include <asm/fixmap.h> +#include <asm/sparsemem.h> #include <asm/tlbflush.h> #include <asm/sections.h> #include <asm/soc.h> @@ -52,6 +53,13 @@ EXPORT_SYMBOL(pgtable_l5_enabled); phys_addr_t phys_ram_base __ro_after_init; EXPORT_SYMBOL(phys_ram_base);
+#ifdef CONFIG_SPARSEMEM_VMEMMAP +#define VMEMMAP_ADDR_ALIGN (1ULL << SECTION_SIZE_BITS) + +unsigned long vmemmap_start_pfn __ro_after_init; +EXPORT_SYMBOL(vmemmap_start_pfn); +#endif + unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); @@ -210,8 +218,12 @@ static void __init setup_bootmem(void) memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
phys_ram_end = memblock_end_of_DRAM(); - if (!IS_ENABLED(CONFIG_XIP_KERNEL)) + if (!IS_ENABLED(CONFIG_XIP_KERNEL)) { phys_ram_base = memblock_start_of_DRAM(); +#ifdef CONFIG_SPARSEMEM_VMEMMAP + vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT; +#endif +} /* * Reserve physical address space that would be mapped to virtual * addresses greater than (void *)(-PAGE_SIZE) because: @@ -946,6 +958,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom);
phys_ram_base = CONFIG_PHYS_RAM_BASE; +#ifdef CONFIG_SPARSEMEM_VMEMMAP + vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT; +#endif kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE; kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_start);
[ Sasha's backport helper bot ]
Hi,
✅ All tests passed successfully. No issues detected. No action required from the submitter.
The upstream commit SHA1 provided is correct: f754f27e98f88428aaf6be6e00f5cbce97f62d4b
WARNING: Author mismatch between patch and upstream commit: Backport author: Zhaoyang Lilizy04@hust.edu.cn Commit author: Xu Luluxu.kernel@bytedance.com
Status in newer kernel trees: 6.14.y | Present (exact SHA1) 6.12.y | Present (different SHA1: d2bd51954ac8) 6.6.y | Present (different SHA1: a4a7ac3d2660)
Note: The patch differs from the upstream commit: --- 1: f754f27e98f88 ! 1: 0281b720e72e4 riscv: mm: Fix the out of bound issue of vmemmap address @@ Metadata ## Commit message ## riscv: mm: Fix the out of bound issue of vmemmap address
+ [ Upstream commit f754f27e98f88428aaf6be6e00f5cbce97f62d4b ] + In sparse vmemmap model, the virtual address of vmemmap is calculated as: ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT)). And the struct page's va can be calculated with an offset: @@ Commit message Reviewed-by: Björn Töpel bjorn@rivosinc.com Link: https://lore.kernel.org/r/20241209122617.53341-1-luxu.kernel@bytedance.com Signed-off-by: Palmer Dabbelt palmer@rivosinc.com + Signed-off-by: Zhaoyang Li lizy04@hust.edu.cn
## arch/riscv/include/asm/page.h ## @@ arch/riscv/include/asm/page.h: struct kernel_mapping { @@ arch/riscv/include/asm/pgtable.h
## arch/riscv/mm/init.c ## @@ - #include <asm/pgtable.h> - #include <asm/sections.h> - #include <asm/soc.h> + #include <linux/hugetlb.h> + + #include <asm/fixmap.h> +#include <asm/sparsemem.h> #include <asm/tlbflush.h> - - #include "../kernel/head.h" + #include <asm/sections.h> + #include <asm/soc.h> @@ arch/riscv/mm/init.c: EXPORT_SYMBOL(pgtable_l5_enabled); phys_addr_t phys_ram_base __ro_after_init; EXPORT_SYMBOL(phys_ram_base); @@ arch/riscv/mm/init.c: EXPORT_SYMBOL(pgtable_l5_enabled); __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); @@ arch/riscv/mm/init.c: static void __init setup_bootmem(void) - * Make sure we align the start of the memory on a PMD boundary so that - * at worst, we map the linear mapping with PMD mappings. - */ + memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); + + phys_ram_end = memblock_end_of_DRAM(); - if (!IS_ENABLED(CONFIG_XIP_KERNEL)) + if (!IS_ENABLED(CONFIG_XIP_KERNEL)) { - phys_ram_base = memblock_start_of_DRAM() & PMD_MASK; + phys_ram_base = memblock_start_of_DRAM(); +#ifdef CONFIG_SPARSEMEM_VMEMMAP + vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT; +#endif -+ } - ++} /* - * In 64-bit, any use of __va/__pa before this point is wrong as we + * Reserve physical address space that would be mapped to virtual + * addresses greater than (void *)(-PAGE_SIZE) because: @@ arch/riscv/mm/init.c: asmlinkage void __init setup_vm(uintptr_t dtb_pa) kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom);
---
Results of testing on various branches:
| Branch | Patch Apply | Build Test | |---------------------------|-------------|------------| | stable/linux-6.1.y | Success | Success |
linux-stable-mirror@lists.linaro.org