Make sv39 the default address space for mmap as some applications currently depend on this assumption. The RISC-V specification enforces that bits outside of the virtual address range are not used, so restricting the size of the default address space as such should be temporary. A hint address passed to mmap will cause the largest address space that fits entirely into the hint to be used. If the hint is less than or equal to 1<<38, a 39-bit address will be used. After an address space is completely full, the next smallest address space will be used.
Documentation is also added to the RISC-V virtual memory section to explain these changes.
Charlie Jenkins (2): RISC-V: mm: Restrict address space for sv39,sv48,sv57 RISC-V: mm: Update documentation and include test
Documentation/riscv/vm-layout.rst | 20 ++++++++ arch/riscv/include/asm/elf.h | 2 +- arch/riscv/include/asm/pgtable.h | 21 ++++++-- arch/riscv/include/asm/processor.h | 41 +++++++++++++--- tools/testing/selftests/riscv/Makefile | 2 +- tools/testing/selftests/riscv/mm/Makefile | 22 +++++++++ .../selftests/riscv/mm/testcases/mmap.c | 49 +++++++++++++++++++ 7 files changed, 144 insertions(+), 13 deletions(-) create mode 100644 tools/testing/selftests/riscv/mm/Makefile create mode 100644 tools/testing/selftests/riscv/mm/testcases/mmap.c
base-commit: eef509789cecdce895020682192d32e8bac790e8
Make sv39 the default address space for mmap as some applications currently depend on this assumption. The RISC-V specification enforces that bits outside of the virtual address range are not used, so restricting the size of the default address space as such should be temporary. A hint address passed to mmap will cause the largest address space that fits entirely into the hint to be used. If the hint is less than or equal to 1<<38, a 39-bit address will be used. After an address space is completely full, the next smallest address space will be used.
Signed-off-by: Charlie Jenkins charlie@rivosinc.com --- arch/riscv/include/asm/elf.h | 2 +- arch/riscv/include/asm/pgtable.h | 13 +++++++++- arch/riscv/include/asm/processor.h | 41 +++++++++++++++++++++++++----- 3 files changed, 47 insertions(+), 9 deletions(-)
diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h index 30e7d2455960..1b57f13a1afd 100644 --- a/arch/riscv/include/asm/elf.h +++ b/arch/riscv/include/asm/elf.h @@ -49,7 +49,7 @@ extern bool compat_elf_check_arch(Elf32_Ehdr *hdr); * the loader. We need to make sure that it is out of the way of the program * that it will "exec", and that there is sufficient room for the brk. */ -#define ELF_ET_DYN_BASE ((TASK_SIZE / 3) * 2) +#define ELF_ET_DYN_BASE ((DEFAULT_MAP_WINDOW / 3) * 2)
#ifdef CONFIG_64BIT #ifdef CONFIG_COMPAT diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 75970ee2bda2..e83912e97870 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -57,18 +57,29 @@ #define MODULES_END (PFN_ALIGN((unsigned long)&_start)) #endif
+ /* * Roughly size the vmemmap space to be large enough to fit enough * struct pages to map half the virtual address space. Then * position vmemmap directly below the VMALLOC region. */ #ifdef CONFIG_64BIT +#define VA_BITS_SV39 39 +#define VA_BITS_SV48 48 +#define VA_BITS_SV57 57 + +#define VA_USER_SV39 (UL(1) << (VA_BITS_SV39 - 1)) +#define VA_USER_SV48 (UL(1) << (VA_BITS_SV48 - 1)) +#define VA_USER_SV57 (UL(1) << (VA_BITS_SV57 - 1)) + #define VA_BITS (pgtable_l5_enabled ? \ - 57 : (pgtable_l4_enabled ? 48 : 39)) + VA_BITS_SV57 : (pgtable_l4_enabled ? VA_BITS_SV48 : VA_BITS_SV39)) #else #define VA_BITS 32 #endif
+#define DEFAULT_VA_BITS ((VA_BITS >= VA_BITS_SV39) ? VA_BITS_SV39 : VA_BITS) + #define VMEMMAP_SHIFT \ (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) #define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT) diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 6fb8bbec8459..019dcd4ecae4 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -12,20 +12,47 @@
#include <asm/ptrace.h>
-/* - * This decides where the kernel will search for a free chunk of vm - * space during mmap's. - */ -#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3) - -#define STACK_TOP TASK_SIZE #ifdef CONFIG_64BIT +#define DEFAULT_MAP_WINDOW (UL(1) << (DEFAULT_VA_BITS - 1)) #define STACK_TOP_MAX TASK_SIZE_64 + +#define arch_get_mmap_end(addr, len, flags) \ + ((addr) == 0 || (addr) >= VA_USER_SV57 ? STACK_TOP_MAX : \ + (((addr) >= VA_USER_SV48) && (VA_BITS >= VA_BITS_SV48)) ? \ + VA_USER_SV48 : \ + VA_USER_SV39) + +#define arch_get_mmap_base(addr, base) \ + (((addr >= VA_USER_SV57) && (VA_BITS >= VA_BITS_SV57)) ? \ + base + STACK_TOP_MAX - DEFAULT_MAP_WINDOW : \ + (((addr) >= VA_USER_SV48) && (VA_BITS >= VA_BITS_SV48)) ? \ + base + VA_USER_SV48 - DEFAULT_MAP_WINDOW : \ + base) + #else +#define DEFAULT_MAP_WINDOW TASK_SIZE #define STACK_TOP_MAX TASK_SIZE + +#define arch_get_mmap_end(addr, len, flags) \ + ((addr) > DEFAULT_MAP_WINDOW ? STACK_TOP_MAX : DEFAULT_MAP_WINDOW) + +#define arch_get_mmap_base(addr, base) \ + ((addr > DEFAULT_MAP_WINDOW) ? \ + base + STACK_TOP_MAX - DEFAULT_MAP_WINDOW : \ + base) + #endif #define STACK_ALIGN 16
+ +#define STACK_TOP DEFAULT_MAP_WINDOW + +/* + * This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +#define TASK_UNMAPPED_BASE PAGE_ALIGN(DEFAULT_MAP_WINDOW / 3) + #ifndef __ASSEMBLY__
struct task_struct;
On 27 Jun 2023, at 23:21, Charlie Jenkins charlie@rivosinc.com wrote:
Make sv39 the default address space for mmap as some applications currently depend on this assumption.
They are just plain wrong too. Sv48 was in even Priv v1.10 (the first spec where satp was named as such and contained the mode, rather than requiring M-mode’s help in configuring virtual memory), predating the ratified v1.11 spec. A 39-bit address space is pathetic and has implications for ASLR.
I strongly suggest applications be forced to support at least Sv48, which is totally reasonable given the address space sizes used by other architectures. Sv57 is more disruptive to some runtimes, though ideally even that would be free for the kernel to use rather than committing to not using it for the default uABI.
Jess
The RISC-V specification enforces that bits outside of the virtual address range are not used, so restricting the size of the default address space as such should be temporary. A hint address passed to mmap will cause the largest address space that fits entirely into the hint to be used. If the hint is less than or equal to 1<<38, a 39-bit address will be used. After an address space is completely full, the next smallest address space will be used.
Signed-off-by: Charlie Jenkins charlie@rivosinc.com
arch/riscv/include/asm/elf.h | 2 +- arch/riscv/include/asm/pgtable.h | 13 +++++++++- arch/riscv/include/asm/processor.h | 41 +++++++++++++++++++++++++----- 3 files changed, 47 insertions(+), 9 deletions(-)
diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h index 30e7d2455960..1b57f13a1afd 100644 --- a/arch/riscv/include/asm/elf.h +++ b/arch/riscv/include/asm/elf.h @@ -49,7 +49,7 @@ extern bool compat_elf_check_arch(Elf32_Ehdr *hdr);
- the loader. We need to make sure that it is out of the way of the program
- that it will "exec", and that there is sufficient room for the brk.
*/ -#define ELF_ET_DYN_BASE ((TASK_SIZE / 3) * 2) +#define ELF_ET_DYN_BASE ((DEFAULT_MAP_WINDOW / 3) * 2)
#ifdef CONFIG_64BIT #ifdef CONFIG_COMPAT diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 75970ee2bda2..e83912e97870 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -57,18 +57,29 @@ #define MODULES_END (PFN_ALIGN((unsigned long)&_start)) #endif
/*
- Roughly size the vmemmap space to be large enough to fit enough
- struct pages to map half the virtual address space. Then
- position vmemmap directly below the VMALLOC region.
*/ #ifdef CONFIG_64BIT +#define VA_BITS_SV39 39 +#define VA_BITS_SV48 48 +#define VA_BITS_SV57 57
+#define VA_USER_SV39 (UL(1) << (VA_BITS_SV39 - 1)) +#define VA_USER_SV48 (UL(1) << (VA_BITS_SV48 - 1)) +#define VA_USER_SV57 (UL(1) << (VA_BITS_SV57 - 1))
#define VA_BITS (pgtable_l5_enabled ? \
- 57 : (pgtable_l4_enabled ? 48 : 39))
- VA_BITS_SV57 : (pgtable_l4_enabled ? VA_BITS_SV48 : VA_BITS_SV39))
#else #define VA_BITS 32 #endif
+#define DEFAULT_VA_BITS ((VA_BITS >= VA_BITS_SV39) ? VA_BITS_SV39 : VA_BITS)
#define VMEMMAP_SHIFT \ (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) #define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT) diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 6fb8bbec8459..019dcd4ecae4 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -12,20 +12,47 @@
#include <asm/ptrace.h>
-/*
- This decides where the kernel will search for a free chunk of vm
- space during mmap's.
- */
-#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3)
-#define STACK_TOP TASK_SIZE #ifdef CONFIG_64BIT +#define DEFAULT_MAP_WINDOW (UL(1) << (DEFAULT_VA_BITS - 1)) #define STACK_TOP_MAX TASK_SIZE_64
+#define arch_get_mmap_end(addr, len, flags) \
- ((addr) == 0 || (addr) >= VA_USER_SV57 ? STACK_TOP_MAX : \
- (((addr) >= VA_USER_SV48) && (VA_BITS >= VA_BITS_SV48)) ? \
- VA_USER_SV48 : \
- VA_USER_SV39)
+#define arch_get_mmap_base(addr, base) \
- (((addr >= VA_USER_SV57) && (VA_BITS >= VA_BITS_SV57)) ? \
- base + STACK_TOP_MAX - DEFAULT_MAP_WINDOW : \
- (((addr) >= VA_USER_SV48) && (VA_BITS >= VA_BITS_SV48)) ? \
- base + VA_USER_SV48 - DEFAULT_MAP_WINDOW : \
- base)
#else +#define DEFAULT_MAP_WINDOW TASK_SIZE #define STACK_TOP_MAX TASK_SIZE
+#define arch_get_mmap_end(addr, len, flags) \
- ((addr) > DEFAULT_MAP_WINDOW ? STACK_TOP_MAX : DEFAULT_MAP_WINDOW)
+#define arch_get_mmap_base(addr, base) \
- ((addr > DEFAULT_MAP_WINDOW) ? \
- base + STACK_TOP_MAX - DEFAULT_MAP_WINDOW : \
- base)
#endif #define STACK_ALIGN 16
+#define STACK_TOP DEFAULT_MAP_WINDOW
+/*
- This decides where the kernel will search for a free chunk of vm
- space during mmap's.
- */
+#define TASK_UNMAPPED_BASE PAGE_ALIGN(DEFAULT_MAP_WINDOW / 3)
#ifndef __ASSEMBLY__
struct task_struct;
2.34.1
linux-riscv mailing list linux-riscv@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-riscv
On Tue, 27 Jun 2023 15:32:36 PDT (-0700), jrtc27@jrtc27.com wrote:
On 27 Jun 2023, at 23:21, Charlie Jenkins charlie@rivosinc.com wrote:
Make sv39 the default address space for mmap as some applications currently depend on this assumption.
They are just plain wrong too. Sv48 was in even Priv v1.10 (the first spec where satp was named as such and contained the mode, rather than requiring M-mode’s help in configuring virtual memory), predating the ratified v1.11 spec. A 39-bit address space is pathetic and has implications for ASLR.
I strongly suggest applications be forced to support at least Sv48, which is totally reasonable given the address space sizes used by other architectures. Sv57 is more disruptive to some runtimes, though ideally even that would be free for the kernel to use rather than committing to not using it for the default uABI.
Go and OpenJDK both broke when we expanded the VA width. I don't like it either, but if the change breaks userspace then it's a regression and we have to live with the bug.
Jess
The RISC-V specification enforces that bits outside of the virtual address range are not used, so restricting the size of the default address space as such should be temporary. A hint address passed to mmap will cause the largest address space that fits entirely into the hint to be used. If the hint is less than or equal to 1<<38, a 39-bit address will be used. After an address space is completely full, the next smallest address space will be used.
Signed-off-by: Charlie Jenkins charlie@rivosinc.com
arch/riscv/include/asm/elf.h | 2 +- arch/riscv/include/asm/pgtable.h | 13 +++++++++- arch/riscv/include/asm/processor.h | 41 +++++++++++++++++++++++++----- 3 files changed, 47 insertions(+), 9 deletions(-)
diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h index 30e7d2455960..1b57f13a1afd 100644 --- a/arch/riscv/include/asm/elf.h +++ b/arch/riscv/include/asm/elf.h @@ -49,7 +49,7 @@ extern bool compat_elf_check_arch(Elf32_Ehdr *hdr);
- the loader. We need to make sure that it is out of the way of the program
- that it will "exec", and that there is sufficient room for the brk.
*/ -#define ELF_ET_DYN_BASE ((TASK_SIZE / 3) * 2) +#define ELF_ET_DYN_BASE ((DEFAULT_MAP_WINDOW / 3) * 2)
#ifdef CONFIG_64BIT #ifdef CONFIG_COMPAT diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 75970ee2bda2..e83912e97870 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -57,18 +57,29 @@ #define MODULES_END (PFN_ALIGN((unsigned long)&_start)) #endif
/*
- Roughly size the vmemmap space to be large enough to fit enough
- struct pages to map half the virtual address space. Then
- position vmemmap directly below the VMALLOC region.
*/ #ifdef CONFIG_64BIT +#define VA_BITS_SV39 39 +#define VA_BITS_SV48 48 +#define VA_BITS_SV57 57
+#define VA_USER_SV39 (UL(1) << (VA_BITS_SV39 - 1)) +#define VA_USER_SV48 (UL(1) << (VA_BITS_SV48 - 1)) +#define VA_USER_SV57 (UL(1) << (VA_BITS_SV57 - 1))
#define VA_BITS (pgtable_l5_enabled ? \
- 57 : (pgtable_l4_enabled ? 48 : 39))
- VA_BITS_SV57 : (pgtable_l4_enabled ? VA_BITS_SV48 : VA_BITS_SV39))
#else #define VA_BITS 32 #endif
+#define DEFAULT_VA_BITS ((VA_BITS >= VA_BITS_SV39) ? VA_BITS_SV39 : VA_BITS)
#define VMEMMAP_SHIFT \ (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) #define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT) diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 6fb8bbec8459..019dcd4ecae4 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -12,20 +12,47 @@
#include <asm/ptrace.h>
-/*
- This decides where the kernel will search for a free chunk of vm
- space during mmap's.
- */
-#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3)
-#define STACK_TOP TASK_SIZE #ifdef CONFIG_64BIT +#define DEFAULT_MAP_WINDOW (UL(1) << (DEFAULT_VA_BITS - 1)) #define STACK_TOP_MAX TASK_SIZE_64
+#define arch_get_mmap_end(addr, len, flags) \
- ((addr) == 0 || (addr) >= VA_USER_SV57 ? STACK_TOP_MAX : \
- (((addr) >= VA_USER_SV48) && (VA_BITS >= VA_BITS_SV48)) ? \
- VA_USER_SV48 : \
- VA_USER_SV39)
+#define arch_get_mmap_base(addr, base) \
- (((addr >= VA_USER_SV57) && (VA_BITS >= VA_BITS_SV57)) ? \
- base + STACK_TOP_MAX - DEFAULT_MAP_WINDOW : \
- (((addr) >= VA_USER_SV48) && (VA_BITS >= VA_BITS_SV48)) ? \
- base + VA_USER_SV48 - DEFAULT_MAP_WINDOW : \
- base)
#else +#define DEFAULT_MAP_WINDOW TASK_SIZE #define STACK_TOP_MAX TASK_SIZE
+#define arch_get_mmap_end(addr, len, flags) \
- ((addr) > DEFAULT_MAP_WINDOW ? STACK_TOP_MAX : DEFAULT_MAP_WINDOW)
+#define arch_get_mmap_base(addr, base) \
- ((addr > DEFAULT_MAP_WINDOW) ? \
- base + STACK_TOP_MAX - DEFAULT_MAP_WINDOW : \
- base)
#endif #define STACK_ALIGN 16
+#define STACK_TOP DEFAULT_MAP_WINDOW
+/*
- This decides where the kernel will search for a free chunk of vm
- space during mmap's.
- */
+#define TASK_UNMAPPED_BASE PAGE_ALIGN(DEFAULT_MAP_WINDOW / 3)
#ifndef __ASSEMBLY__
struct task_struct;
2.34.1
linux-riscv mailing list linux-riscv@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-riscv
On 6/28/23 02:36, Palmer Dabbelt wrote:
On Tue, 27 Jun 2023 15:32:36 PDT (-0700), jrtc27@jrtc27.com wrote:
On 27 Jun 2023, at 23:21, Charlie Jenkins charlie@rivosinc.com wrote:
Make sv39 the default address space for mmap as some applications currently depend on this assumption.
They are just plain wrong too. Sv48 was in even Priv v1.10 (the first spec where satp was named as such and contained the mode, rather than requiring M-mode’s help in configuring virtual memory), predating the ratified v1.11 spec. A 39-bit address space is pathetic and has implications for ASLR.
I strongly suggest applications be forced to support at least Sv48, which is totally reasonable given the address space sizes used by other architectures. Sv57 is more disruptive to some runtimes, though ideally even that would be free for the kernel to use rather than committing to not using it for the default uABI.
Go and OpenJDK both broke when we expanded the VA width. I don't like it either, but if the change breaks userspace then it's a regression and we have to live with the bug.
Have we debugged this ? do we at least know why they break ? Just disabling Sv48/57 by default for everyone because some userspace apps break doesn't seem the correct approach, it seems more like a bug in userspace IMHO.
Yes it is small to have a default of 38-bits of userspace. I would be interesting in the opinions of other people on whether it would be acceptable to have the default be sv48 and require applications that prefer fewer bits to specify so with the given mmap hinting.
On Wed, Jun 28, 2023 at 5:09 AM Charlie Jenkins charlie@rivosinc.com wrote:
Yes it is small to have a default of 38-bits of userspace. I would be interesting in the opinions of other people on whether it would be acceptable to have the default be sv48 and require applications that prefer fewer bits to specify so with the given mmap hinting.
I think sv48 is a reasonable default instead of sv39. We should fallback to sv39 only if the underlying host does not support sv48.
Regards, Anup
On Wed, Jun 28, 2023 at 06:04:41PM +0530, Anup Patel wrote:
On Wed, Jun 28, 2023 at 5:09 AM Charlie Jenkins charlie@rivosinc.com wrote:
Yes it is small to have a default of 38-bits of userspace. I would be interesting in the opinions of other people on whether it would be acceptable to have the default be sv48 and require applications that prefer fewer bits to specify so with the given mmap hinting.
I think sv48 is a reasonable default instead of sv39. We should fallback to sv39 only if the underlying host does not support sv48.
Regards, Anup
I did some research and it appears that Java does work on sv48, but not on sv57. Using the v6.4 kernel I was able to successfully run OpenJDK on both sv38 and sv48, but on sv57 there is a SIGSEGV error on QEMU. Relevant JDK discussion can be seen here https://mail.openjdk.org/pipermail/hotspot-dev/2022-November/067298.html. Go similarly appears to work even on sv57 according to https://go-review.googlesource.com/c/go/+/409055. I have not tried Go myself.
The point of contention here I believe is that in v6.4, the highest address space available will be used, causing all of these applications that do not work properly in sv57 to fail when testing in sv57 environments. Given that these applications seem to work in sv48, it seems reasonable to default to sv48, unless there are an abundance of additional applications that are unhappy with this.
Using the hint mechanism to mmap will then allow users to change the address space to sv57 if required. It should be possible to allow users to use sv38 if they need it using the same mechanism, but reducing the address space instead of growing it will require more thought from me to implement.
Thanks, Charlie
Add documentation explaining the behavior of mmap. Include a simple test that ensures that mmap returns an address less than the hint address while there are still addresses available.
Signed-off-by: Charlie Jenkins charlie@rivosinc.com --- Documentation/riscv/vm-layout.rst | 20 ++++++++ arch/riscv/include/asm/pgtable.h | 8 +-- tools/testing/selftests/riscv/Makefile | 2 +- tools/testing/selftests/riscv/mm/Makefile | 22 +++++++++ .../selftests/riscv/mm/testcases/mmap.c | 49 +++++++++++++++++++ 5 files changed, 97 insertions(+), 4 deletions(-) create mode 100644 tools/testing/selftests/riscv/mm/Makefile create mode 100644 tools/testing/selftests/riscv/mm/testcases/mmap.c
diff --git a/Documentation/riscv/vm-layout.rst b/Documentation/riscv/vm-layout.rst index 5462c84f4723..365a481be90b 100644 --- a/Documentation/riscv/vm-layout.rst +++ b/Documentation/riscv/vm-layout.rst @@ -133,3 +133,23 @@ RISC-V Linux Kernel SV57 ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel __________________|____________|__________________|_________|____________________________________________________________ + + +Userspace VAs +-------------------- +To maintain compatibility with software that relies on the VA space +with a maximum of 39-bits, the kernel will, by default, return virtual +return virtual addresses to userspace from a 39-bit range. + +Software can "opt-in" to receiving VAs from other VA space by providing +a hint address to mmap. A call to mmap is guaranteed to return an address +that will not override the unset left-aligned bits in the hint address, +unless there is no space left in the address space. If there is no space +available in the requested address space, an address in the next smallest +available address space will be returned. + +For example, in order to obtain 48-bit VA space, a hint address greater than +:code:`1 << 38` must be provided. Note that this is 38 due to sv39 userspace +ending at :code:`1 << 38` with the addresses beyond this and up to :code:`1 << 39` +being reserved for the kernel. Similarly, to obtain 57-bit VA space addresses, a +hint address greater than or equal to :code:`1 << 47` must be provided. diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index e83912e97870..93e7233d5d2a 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -841,14 +841,16 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) * Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32. * Note that PGDIR_SIZE must evenly divide TASK_SIZE. * Task size is: - * - 0x9fc00000 (~2.5GB) for RV32. - * - 0x4000000000 ( 256GB) for RV64 using SV39 mmu - * - 0x800000000000 ( 128TB) for RV64 using SV48 mmu + * - 0x9fc00000 (~2.5GB) for RV32. + * - 0x4000000000 ( 256GB) for RV64 using SV39 mmu + * - 0x800000000000 ( 128TB) for RV64 using SV48 mmu + * - 0x100000000000000 ( 64PB) for RV64 using SV57 mmu * * Note that PGDIR_SIZE must evenly divide TASK_SIZE since "RISC-V * Instruction Set Manual Volume II: Privileged Architecture" states that * "load and store effective addresses, which are 64bits, must have bits * 63–48 all equal to bit 47, or else a page-fault exception will occur." + * Similarly for SV57, bits 63–57 must be equal to bit 56. */ #ifdef CONFIG_64BIT #define TASK_SIZE_64 (PGDIR_SIZE * PTRS_PER_PGD / 2) diff --git a/tools/testing/selftests/riscv/Makefile b/tools/testing/selftests/riscv/Makefile index 32a72902d045..0fee58f990ae 100644 --- a/tools/testing/selftests/riscv/Makefile +++ b/tools/testing/selftests/riscv/Makefile @@ -5,7 +5,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not)
ifneq (,$(filter $(ARCH),riscv)) -RISCV_SUBTARGETS ?= hwprobe +RISCV_SUBTARGETS ?= hwprobe mm else RISCV_SUBTARGETS := endif diff --git a/tools/testing/selftests/riscv/mm/Makefile b/tools/testing/selftests/riscv/mm/Makefile new file mode 100644 index 000000000000..4aadf04401db --- /dev/null +++ b/tools/testing/selftests/riscv/mm/Makefile @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2019 ARM Limited +# Originally tools/testing/selftests/arm64/signal + +# Additional include paths needed by kselftest.h and local headers +CFLAGS += -D_GNU_SOURCE -std=gnu99 -I. + +SRCS := $(filter-out testcases/testcases.c,$(wildcard testcases/*.c)) +PROGS := $(patsubst %.c,%,$(SRCS)) + +# Generated binaries to be installed by top KSFT script +TEST_GEN_PROGS := $(notdir $(PROGS)) + +# Get Kernel headers installed and use them. + +# Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list +# to account for any OUTPUT target-dirs optionally provided by +# the toplevel makefile +include ../../lib.mk + +$(TEST_GEN_PROGS): $(PROGS) + cp $(PROGS) $(OUTPUT)/ \ No newline at end of file diff --git a/tools/testing/selftests/riscv/mm/testcases/mmap.c b/tools/testing/selftests/riscv/mm/testcases/mmap.c new file mode 100644 index 000000000000..aa37a623b93f --- /dev/null +++ b/tools/testing/selftests/riscv/mm/testcases/mmap.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <sys/mman.h> +#include "../../kselftest_harness.h" + +TEST(sv57_test) +{ + // Only works on 64 bit + #if __riscv_xlen == 64 + // Place all of the hint addresses on the boundaries of mmap + // sv39, sv48, sv57 + // User addresses end at 1<<38, 1<<47, 1<<56 respectively + void *on_37_bits = (void *) (1UL << 37); + void *on_38_bits = (void *) (1UL << 38); + void *on_46_bits = (void *) (1UL << 46); + void *on_47_bits = (void *) (1UL << 47); + void *on_55_bits = (void *) (1UL << 55); + void *on_56_bits = (void *) (1UL << 56); + + int prot = PROT_READ | PROT_WRITE; + int flags = MAP_PRIVATE | MAP_ANONYMOUS; + + int *no_hint = mmap(NULL, 5*sizeof(int), prot, flags, 0, 0); + int *on_37_addr = mmap(on_37_bits, 5*sizeof(int), prot, flags, 0, 0); + int *on_38_addr = mmap(on_38_bits, 5*sizeof(int), prot, flags, 0, 0); + int *on_46_addr = mmap(on_46_bits, 5*sizeof(int), prot, flags, 0, 0); + int *on_47_addr = mmap(on_47_bits, 5*sizeof(int), prot, flags, 0, 0); + int *on_55_addr = mmap(on_55_bits, 5*sizeof(int), prot, flags, 0, 0); + int *on_56_addr = mmap(on_56_bits, 5*sizeof(int), prot, flags, 0, 0); + + EXPECT_NE(no_hint, MAP_FAILED); + EXPECT_NE(on_37_addr, MAP_FAILED); + EXPECT_NE(on_38_addr, MAP_FAILED); + EXPECT_NE(on_46_addr, MAP_FAILED); + EXPECT_NE(on_47_addr, MAP_FAILED); + EXPECT_NE(on_55_addr, MAP_FAILED); + EXPECT_NE(on_56_addr, MAP_FAILED); + + EXPECT_LT((unsigned long) no_hint, 1UL << 38); + EXPECT_LT((unsigned long) on_37_addr, 1UL << 38); + EXPECT_LT((unsigned long) on_38_addr, 1UL << 38); + EXPECT_LT((unsigned long) on_46_addr, 1UL << 38); + EXPECT_LT((unsigned long) on_47_addr, 1UL << 47); + EXPECT_LT((unsigned long) on_55_addr, 1UL << 47); + EXPECT_LT((unsigned long) on_56_addr, 1UL << 57); + #else + #endif +} + +TEST_HARNESS_MAIN
On 6/28/23 3:21 AM, Charlie Jenkins wrote:
Add documentation explaining the behavior of mmap. Include a simple test that ensures that mmap returns an address less than the hint address while there are still addresses available.
Signed-off-by: Charlie Jenkins charlie@rivosinc.com
Documentation/riscv/vm-layout.rst | 20 ++++++++ arch/riscv/include/asm/pgtable.h | 8 +-- tools/testing/selftests/riscv/Makefile | 2 +- tools/testing/selftests/riscv/mm/Makefile | 22 +++++++++ .../selftests/riscv/mm/testcases/mmap.c | 49 +++++++++++++++++++
Please add a .gitignore file and specify the artifacts there i.e., mmap.
5 files changed, 97 insertions(+), 4 deletions(-) create mode 100644 tools/testing/selftests/riscv/mm/Makefile create mode 100644 tools/testing/selftests/riscv/mm/testcases/mmap.c
diff --git a/Documentation/riscv/vm-layout.rst b/Documentation/riscv/vm-layout.rst index 5462c84f4723..365a481be90b 100644 --- a/Documentation/riscv/vm-layout.rst +++ b/Documentation/riscv/vm-layout.rst @@ -133,3 +133,23 @@ RISC-V Linux Kernel SV57 ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel __________________|____________|__________________|_________|____________________________________________________________
+Userspace VAs +-------------------- +To maintain compatibility with software that relies on the VA space +with a maximum of 39-bits, the kernel will, by default, return virtual +return virtual addresses to userspace from a 39-bit range.
+Software can "opt-in" to receiving VAs from other VA space by providing +a hint address to mmap. A call to mmap is guaranteed to return an address +that will not override the unset left-aligned bits in the hint address, +unless there is no space left in the address space. If there is no space +available in the requested address space, an address in the next smallest +available address space will be returned.
+For example, in order to obtain 48-bit VA space, a hint address greater than +:code:`1 << 38` must be provided. Note that this is 38 due to sv39 userspace +ending at :code:`1 << 38` with the addresses beyond this and up to :code:`1 << 39` +being reserved for the kernel. Similarly, to obtain 57-bit VA space addresses, a +hint address greater than or equal to :code:`1 << 47` must be provided. diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index e83912e97870..93e7233d5d2a 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -841,14 +841,16 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
- Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32.
- Note that PGDIR_SIZE must evenly divide TASK_SIZE.
- Task size is:
0x9fc00000 (~2.5GB) for RV32.
- 0x4000000000 ( 256GB) for RV64 using SV39 mmu
- 0x800000000000 ( 128TB) for RV64 using SV48 mmu
0x9fc00000 (~2.5GB) for RV32.
0x4000000000 ( 256GB) for RV64 using SV39 mmu
- 0x800000000000 ( 128TB) for RV64 using SV48 mmu
- 0x100000000000000 ( 64PB) for RV64 using SV57 mmu
- Note that PGDIR_SIZE must evenly divide TASK_SIZE since "RISC-V
- Instruction Set Manual Volume II: Privileged Architecture" states that
- "load and store effective addresses, which are 64bits, must have bits
- 63–48 all equal to bit 47, or else a page-fault exception will occur."
*/
- Similarly for SV57, bits 63–57 must be equal to bit 56.
#ifdef CONFIG_64BIT #define TASK_SIZE_64 (PGDIR_SIZE * PTRS_PER_PGD / 2) diff --git a/tools/testing/selftests/riscv/Makefile b/tools/testing/selftests/riscv/Makefile index 32a72902d045..0fee58f990ae 100644 --- a/tools/testing/selftests/riscv/Makefile +++ b/tools/testing/selftests/riscv/Makefile @@ -5,7 +5,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),riscv)) -RISCV_SUBTARGETS ?= hwprobe +RISCV_SUBTARGETS ?= hwprobe mm else RISCV_SUBTARGETS := endif diff --git a/tools/testing/selftests/riscv/mm/Makefile b/tools/testing/selftests/riscv/mm/Makefile new file mode 100644 index 000000000000..4aadf04401db --- /dev/null +++ b/tools/testing/selftests/riscv/mm/Makefile @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2019 ARM Limited +# Originally tools/testing/selftests/arm64/signal
+# Additional include paths needed by kselftest.h and local headers +CFLAGS += -D_GNU_SOURCE -std=gnu99 -I.
+SRCS := $(filter-out testcases/testcases.c,$(wildcard testcases/*.c)) +PROGS := $(patsubst %.c,%,$(SRCS))
+# Generated binaries to be installed by top KSFT script +TEST_GEN_PROGS := $(notdir $(PROGS))
+# Get Kernel headers installed and use them.
+# Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list +# to account for any OUTPUT target-dirs optionally provided by +# the toplevel makefile +include ../../lib.mk
+$(TEST_GEN_PROGS): $(PROGS)
- cp $(PROGS) $(OUTPUT)/
\ No newline at end of file diff --git a/tools/testing/selftests/riscv/mm/testcases/mmap.c b/tools/testing/selftests/riscv/mm/testcases/mmap.c new file mode 100644 index 000000000000..aa37a623b93f --- /dev/null +++ b/tools/testing/selftests/riscv/mm/testcases/mmap.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <sys/mman.h> +#include "../../kselftest_harness.h"
+TEST(sv57_test) +{
// Only works on 64 bit
#if __riscv_xlen == 64
// Place all of the hint addresses on the boundaries of mmap
// sv39, sv48, sv57
// User addresses end at 1<<38, 1<<47, 1<<56 respectively
void *on_37_bits = (void *) (1UL << 37);
void *on_38_bits = (void *) (1UL << 38);
void *on_46_bits = (void *) (1UL << 46);
void *on_47_bits = (void *) (1UL << 47);
void *on_55_bits = (void *) (1UL << 55);
void *on_56_bits = (void *) (1UL << 56);
int prot = PROT_READ | PROT_WRITE;
int flags = MAP_PRIVATE | MAP_ANONYMOUS;
int *no_hint = mmap(NULL, 5*sizeof(int), prot, flags, 0, 0);
int *on_37_addr = mmap(on_37_bits, 5*sizeof(int), prot, flags, 0, 0);
int *on_38_addr = mmap(on_38_bits, 5*sizeof(int), prot, flags, 0, 0);
int *on_46_addr = mmap(on_46_bits, 5*sizeof(int), prot, flags, 0, 0);
int *on_47_addr = mmap(on_47_bits, 5*sizeof(int), prot, flags, 0, 0);
int *on_55_addr = mmap(on_55_bits, 5*sizeof(int), prot, flags, 0, 0);
int *on_56_addr = mmap(on_56_bits, 5*sizeof(int), prot, flags, 0, 0);
EXPECT_NE(no_hint, MAP_FAILED);
EXPECT_NE(on_37_addr, MAP_FAILED);
EXPECT_NE(on_38_addr, MAP_FAILED);
EXPECT_NE(on_46_addr, MAP_FAILED);
EXPECT_NE(on_47_addr, MAP_FAILED);
EXPECT_NE(on_55_addr, MAP_FAILED);
EXPECT_NE(on_56_addr, MAP_FAILED);
EXPECT_LT((unsigned long) no_hint, 1UL << 38);
EXPECT_LT((unsigned long) on_37_addr, 1UL << 38);
EXPECT_LT((unsigned long) on_38_addr, 1UL << 38);
EXPECT_LT((unsigned long) on_46_addr, 1UL << 38);
EXPECT_LT((unsigned long) on_47_addr, 1UL << 47);
EXPECT_LT((unsigned long) on_55_addr, 1UL << 47);
EXPECT_LT((unsigned long) on_56_addr, 1UL << 57);
#else
#endif
+}
+TEST_HARNESS_MAIN
linux-kselftest-mirror@lists.linaro.org