On 05/07/2023 20:59, Charlie Jenkins wrote:
Add documentation explaining the behavior of mmap. Include a simple test that ensures that mmap returns an address less than the hint address while there are still addresses available.
Signed-off-by: Charlie Jenkins charlie@rivosinc.com
Documentation/riscv/vm-layout.rst | 22 +++++++++ arch/riscv/include/asm/pgtable.h | 8 +-- tools/testing/selftests/riscv/Makefile | 2 +- tools/testing/selftests/riscv/mm/.gitignore | 1 + tools/testing/selftests/riscv/mm/Makefile | 21 ++++++++ .../selftests/riscv/mm/testcases/mmap.c | 49 +++++++++++++++++++ 6 files changed, 99 insertions(+), 4 deletions(-) create mode 100644 tools/testing/selftests/riscv/mm/.gitignore create mode 100644 tools/testing/selftests/riscv/mm/Makefile create mode 100644 tools/testing/selftests/riscv/mm/testcases/mmap.c
diff --git a/Documentation/riscv/vm-layout.rst b/Documentation/riscv/vm-layout.rst index 5462c84f4723..a610c68c9f3f 100644 --- a/Documentation/riscv/vm-layout.rst +++ b/Documentation/riscv/vm-layout.rst @@ -133,3 +133,25 @@ RISC-V Linux Kernel SV57 ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel __________________|____________|__________________|_________|____________________________________________________________
+Userspace VAs +-------------------- +To maintain compatibility with software that relies on the VA space +with a maximum of 39-bits, the kernel will, by default, return virtual +return virtual addresses to userspace from a 48-bit range (sv48).
Hmmm weird, you say that applications that requires 38-bit address space will be given a 47-bit address by default?
This +default behavior is achieved by passing 0 into the hint address parameter +of mmap.
+Software can "opt-in" to receiving VAs from other VA space by providing +a hint address to mmap. A call to mmap is guaranteed to return an address +that will not override the unset left-aligned bits in the hint address, +unless there is no space left in the address space. If there is no space +available in the requested address space, an address in the next smallest +available address space will be returned.
+For example, in order to obtain 48-bit VA space, a hint address greater than +:code:`1 << 38` must be provided. Note that this is 38 due to sv39 userspace +ending at :code:`1 << 38` with the addresses beyond this and up to :code:`1 << 39`
Not "up to" since actually the MSB will all be set for the kernel address space, so that would be up to (1 << 64) - 1.
+being reserved for the kernel. Similarly, to obtain 57-bit VA space addresses, a +hint address greater than or equal to :code:`1 << 47` must be provided. diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 752e210c7547..5ac973193fab 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -841,14 +841,16 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
- Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32.
- Note that PGDIR_SIZE must evenly divide TASK_SIZE.
- Task size is:
0x9fc00000 (~2.5GB) for RV32.
- 0x4000000000 ( 256GB) for RV64 using SV39 mmu
- 0x800000000000 ( 128TB) for RV64 using SV48 mmu
0x9fc00000 (~2.5GB) for RV32.
0x4000000000 ( 256GB) for RV64 using SV39 mmu
- 0x800000000000 ( 128TB) for RV64 using SV48 mmu
- 0x100000000000000 ( 64PB) for RV64 using SV57 mmu
- Note that PGDIR_SIZE must evenly divide TASK_SIZE since "RISC-V
- Instruction Set Manual Volume II: Privileged Architecture" states that
- "load and store effective addresses, which are 64bits, must have bits
- 63–48 all equal to bit 47, or else a page-fault exception will occur."
- Similarly for SV57, bits 63–57 must be equal to bit 56.
This change should go in its own patch. And you should split this patch into the documentation update and the addition of the test, both are independent.
*/ #ifdef CONFIG_64BIT #define TASK_SIZE_64 (PGDIR_SIZE * PTRS_PER_PGD / 2) diff --git a/tools/testing/selftests/riscv/Makefile b/tools/testing/selftests/riscv/Makefile index 32a72902d045..0fee58f990ae 100644 --- a/tools/testing/selftests/riscv/Makefile +++ b/tools/testing/selftests/riscv/Makefile @@ -5,7 +5,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),riscv)) -RISCV_SUBTARGETS ?= hwprobe +RISCV_SUBTARGETS ?= hwprobe mm else RISCV_SUBTARGETS := endif diff --git a/tools/testing/selftests/riscv/mm/.gitignore b/tools/testing/selftests/riscv/mm/.gitignore new file mode 100644 index 000000000000..022ea0a3f7df --- /dev/null +++ b/tools/testing/selftests/riscv/mm/.gitignore @@ -0,0 +1 @@ +mmap \ No newline at end of file diff --git a/tools/testing/selftests/riscv/mm/Makefile b/tools/testing/selftests/riscv/mm/Makefile new file mode 100644 index 000000000000..d41a0b3d2ca2 --- /dev/null +++ b/tools/testing/selftests/riscv/mm/Makefile @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0 +# Originally tools/testing/selftests/arm64/signal
+# Additional include paths needed by kselftest.h and local headers +CFLAGS += -D_GNU_SOURCE -std=gnu99 -I.
+SRCS := $(filter-out testcases/testcases.c,$(wildcard testcases/*.c)) +PROGS := $(patsubst %.c,%,$(SRCS))
+# Generated binaries to be installed by top KSFT script +TEST_GEN_PROGS := $(notdir $(PROGS))
+# Get Kernel headers installed and use them.
+# Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list +# to account for any OUTPUT target-dirs optionally provided by +# the toplevel makefile +include ../../lib.mk
+$(TEST_GEN_PROGS): $(PROGS)
- cp $(PROGS) $(OUTPUT)/
\ No newline at end of file diff --git a/tools/testing/selftests/riscv/mm/testcases/mmap.c b/tools/testing/selftests/riscv/mm/testcases/mmap.c new file mode 100644 index 000000000000..781576f4c14b --- /dev/null +++ b/tools/testing/selftests/riscv/mm/testcases/mmap.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <sys/mman.h> +#include "../../kselftest_harness.h"
+TEST(sv57_test) +{
// Only works on 64 bit
#if __riscv_xlen == 64
// Place all of the hint addresses on the boundaries of mmap
// sv39, sv48, sv57
// User addresses end at 1<<38, 1<<47, 1<<56 respectively
void *on_37_bits = (void *) (1UL << 37);
void *on_38_bits = (void *) (1UL << 38);
void *on_46_bits = (void *) (1UL << 46);
void *on_47_bits = (void *) (1UL << 47);
void *on_55_bits = (void *) (1UL << 55);
void *on_56_bits = (void *) (1UL << 56);
int prot = PROT_READ | PROT_WRITE;
int flags = MAP_PRIVATE | MAP_ANONYMOUS;
int *no_hint = mmap(NULL, 5*sizeof(int), prot, flags, 0, 0);
int *on_37_addr = mmap(on_37_bits, 5*sizeof(int), prot, flags, 0, 0);
int *on_38_addr = mmap(on_38_bits, 5*sizeof(int), prot, flags, 0, 0);
int *on_46_addr = mmap(on_46_bits, 5*sizeof(int), prot, flags, 0, 0);
int *on_47_addr = mmap(on_47_bits, 5*sizeof(int), prot, flags, 0, 0);
int *on_55_addr = mmap(on_55_bits, 5*sizeof(int), prot, flags, 0, 0);
int *on_56_addr = mmap(on_56_bits, 5*sizeof(int), prot, flags, 0, 0);
EXPECT_NE(no_hint, MAP_FAILED);
EXPECT_NE(on_37_addr, MAP_FAILED);
EXPECT_NE(on_38_addr, MAP_FAILED);
EXPECT_NE(on_46_addr, MAP_FAILED);
EXPECT_NE(on_47_addr, MAP_FAILED);
EXPECT_NE(on_55_addr, MAP_FAILED);
EXPECT_NE(on_56_addr, MAP_FAILED);
EXPECT_LT((unsigned long) no_hint, 1UL << 47);
EXPECT_LT((unsigned long) on_37_addr, 1UL << 38);
EXPECT_LT((unsigned long) on_38_addr, 1UL << 38);
EXPECT_LT((unsigned long) on_46_addr, 1UL << 38);
EXPECT_LT((unsigned long) on_47_addr, 1UL << 47);
EXPECT_LT((unsigned long) on_55_addr, 1UL << 47);
EXPECT_LT((unsigned long) on_56_addr, 1UL << 57);
#else
#endif
+}
+TEST_HARNESS_MAIN