This patch reverted the meaning of the addr parameter in the mmap syscall change from the previous commit add2cc6b6515 ("RISC-V: mm: Restrict address space for sv39,sv48,sv57") from patch[1] which treats hint addr as the upper bound of the mmap return address. However, some userspace software assumes mmap will attempt to create mapping on the hint address if possible without MAP_FIXED set, thus these software will always use the fallback path as the return address is not the same as the hint, which may lead to some performance overhead. Other ISAs like x86, arm64, and powerpc also meet this issue which has userspace virtual address bits larger than 48-bit and userspace software may use the MSB beyond 48-bit to store some information. Still, these ISAs didn't change the meaning of the hint address and only limited the address space to 48-bit when the hint address did not go beyond the default map window.
Thus, this patch makes the behavior of mmap syscall on RISC-V sv57 capable system align with x86, arm64, powerpc by only limiting the address space to DEFAULT_MAP_WINDOW which is defined as not larger than 47-bit. If a user program wants to use sv57 address space, it can use mmap with a hint address larger than BIT(47) as it is already documented in x86 and arm64. And this code is copied from kernel source code on powerpc.
[1]. https://lore.kernel.org/r/20230809232218.849726-2-charlie@rivosinc.com
Signed-off-by: Yangyu Chen cyy@cyyself.name --- arch/riscv/include/asm/processor.h | 38 ++++++------------------------ 1 file changed, 7 insertions(+), 31 deletions(-)
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index a8509cc31ab2..bc604669f18e 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -18,37 +18,13 @@ #define DEFAULT_MAP_WINDOW (UL(1) << (MMAP_VA_BITS - 1)) #define STACK_TOP_MAX TASK_SIZE
-#define arch_get_mmap_end(addr, len, flags) \ -({ \ - unsigned long mmap_end; \ - typeof(addr) _addr = (addr); \ - if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) && is_compat_task())) \ - mmap_end = STACK_TOP_MAX; \ - else if ((_addr) >= VA_USER_SV57) \ - mmap_end = STACK_TOP_MAX; \ - else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48)) \ - mmap_end = VA_USER_SV48; \ - else \ - mmap_end = VA_USER_SV39; \ - mmap_end; \ -}) - -#define arch_get_mmap_base(addr, base) \ -({ \ - unsigned long mmap_base; \ - typeof(addr) _addr = (addr); \ - typeof(base) _base = (base); \ - unsigned long rnd_gap = DEFAULT_MAP_WINDOW - (_base); \ - if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) && is_compat_task())) \ - mmap_base = (_base); \ - else if (((_addr) >= VA_USER_SV57) && (VA_BITS >= VA_BITS_SV57)) \ - mmap_base = VA_USER_SV57 - rnd_gap; \ - else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48)) \ - mmap_base = VA_USER_SV48 - rnd_gap; \ - else \ - mmap_base = VA_USER_SV39 - rnd_gap; \ - mmap_base; \ -}) +#define arch_get_mmap_end(addr, len, flags) \ + (((addr) > DEFAULT_MAP_WINDOW) || \ + (((flags) & MAP_FIXED) && ((addr) + (len) > DEFAULT_MAP_WINDOW)) ? TASK_SIZE : \ + DEFAULT_MAP_WINDOW) + +#define arch_get_mmap_base(addr, base) \ + (((addr) > DEFAULT_MAP_WINDOW) ? (base) + TASK_SIZE - DEFAULT_MAP_WINDOW : (base))
#else #define DEFAULT_MAP_WINDOW TASK_SIZE