Let's test some basic functionality using /dev/mem. These tests will implicitly cover some PAT (Page Attribute Handling) handling on x86.
These tests will only run when /dev/mem access to the first two pages in physical address space is possible and allowed; otherwise, the tests are skipped.
On current x86-64 with PAT inside a VM, all tests pass:
TAP version 13 1..6 # Starting 6 tests from 1 test cases. # RUN pfnmap.madvise_disallowed ... # OK pfnmap.madvise_disallowed ok 1 pfnmap.madvise_disallowed # RUN pfnmap.munmap_split ... # OK pfnmap.munmap_split ok 2 pfnmap.munmap_split # RUN pfnmap.mremap_fixed ... # OK pfnmap.mremap_fixed ok 3 pfnmap.mremap_fixed # RUN pfnmap.mremap_shrink ... # OK pfnmap.mremap_shrink ok 4 pfnmap.mremap_shrink # RUN pfnmap.mremap_expand ... # OK pfnmap.mremap_expand ok 5 pfnmap.mremap_expand # RUN pfnmap.fork ... # OK pfnmap.fork ok 6 pfnmap.fork # PASSED: 6 / 6 tests passed. # Totals: pass:6 fail:0 xfail:0 xpass:0 skip:0 error:0
However, we are able to trigger:
[ 27.888251] x86/PAT: pfnmap:1790 freeing invalid memtype [mem 0x00000000-0x00000fff]
There are probably more things worth testing in the future, such as MAP_PRIVATE handling. But this set of tests is sufficient to cover most of the things we will rework regarding PAT handling.
Cc: Andrew Morton akpm@linux-foundation.org Cc: Shuah Khan shuah@kernel.org Cc: Lorenzo Stoakes lorenzo.stoakes@oracle.com Cc: Ingo Molnar mingo@redhat.com Cc: Peter Xu peterx@redhat.com Cc: Dev Jain dev.jain@arm.com Signed-off-by: David Hildenbrand david@redhat.com ---
Hopefully I didn't miss any review feedback.
v1 -> v2: * Rewrite using kselftest_harness, which simplifies a lot of things * Add to .gitignore and run_vmtests.sh * Register signal handler on demand * Use volatile trick to force a read (not factoring out FORCE_READ just yet) * Drop mprotect() test case * Add some more comments why we test certain things * Use NULL for mmap() first parameter instead of 0 * Smaller fixes
--- tools/testing/selftests/mm/.gitignore | 1 + tools/testing/selftests/mm/Makefile | 1 + tools/testing/selftests/mm/pfnmap.c | 196 ++++++++++++++++++++++ tools/testing/selftests/mm/run_vmtests.sh | 4 + 4 files changed, 202 insertions(+) create mode 100644 tools/testing/selftests/mm/pfnmap.c
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index 91db34941a143..824266982aa36 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -20,6 +20,7 @@ mremap_test on-fault-limit transhuge-stress pagemap_ioctl +pfnmap *.tmp* protection_keys protection_keys_32 diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index ad4d6043a60f0..ae6f994d3add7 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -84,6 +84,7 @@ TEST_GEN_FILES += mremap_test TEST_GEN_FILES += mseal_test TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += pagemap_ioctl +TEST_GEN_FILES += pfnmap TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += uffd-stress diff --git a/tools/testing/selftests/mm/pfnmap.c b/tools/testing/selftests/mm/pfnmap.c new file mode 100644 index 0000000000000..8a9d19b6020c7 --- /dev/null +++ b/tools/testing/selftests/mm/pfnmap.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Basic VM_PFNMAP tests relying on mmap() of '/dev/mem' + * + * Copyright 2025, Red Hat, Inc. + * + * Author(s): David Hildenbrand david@redhat.com + */ +#define _GNU_SOURCE +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <setjmp.h> +#include <linux/mman.h> +#include <sys/mman.h> +#include <sys/wait.h> + +#include "../kselftest_harness.h" +#include "vm_util.h" + +static sigjmp_buf sigjmp_buf_env; + +static void signal_handler(int sig) +{ + siglongjmp(sigjmp_buf_env, -EFAULT); +} + +static int test_read_access(char *addr, size_t size, size_t pagesize) +{ + size_t offs; + int ret; + + if (signal(SIGSEGV, signal_handler) == SIG_ERR) + return -EINVAL; + + ret = sigsetjmp(sigjmp_buf_env, 1); + if (!ret) { + for (offs = 0; offs < size; offs += pagesize) + /* Force a read that the compiler cannot optimize out. */ + *((volatile char *)(addr + offs)); + } + if (signal(SIGSEGV, signal_handler) == SIG_ERR) + return -EINVAL; + + return ret; +} + +FIXTURE(pfnmap) +{ + size_t pagesize; + int dev_mem_fd; + char *addr1; + size_t size1; + char *addr2; + size_t size2; +}; + +FIXTURE_SETUP(pfnmap) +{ + self->pagesize = getpagesize(); + + self->dev_mem_fd = open("/dev/mem", O_RDONLY); + if (self->dev_mem_fd < 0) + SKIP(return, "Cannot open '/dev/mem'\n"); + + /* We'll require the first two pages throughout our tests ... */ + self->size1 = self->pagesize * 2; + self->addr1 = mmap(NULL, self->size1, PROT_READ, MAP_SHARED, + self->dev_mem_fd, 0); + if (self->addr1 == MAP_FAILED) + SKIP(return, "Cannot mmap '/dev/mem'\n"); + + /* ... and want to be able to read from them. */ + if (test_read_access(self->addr1, self->size1, self->pagesize)) + SKIP(return, "Cannot read-access mmap'ed '/dev/mem'\n"); + + self->size2 = 0; + self->addr2 = MAP_FAILED; +} + +FIXTURE_TEARDOWN(pfnmap) +{ + if (self->addr2 != MAP_FAILED) + munmap(self->addr2, self->size2); + if (self->addr1 != MAP_FAILED) + munmap(self->addr1, self->size1); + if (self->dev_mem_fd >= 0) + close(self->dev_mem_fd); +} + +TEST_F(pfnmap, madvise_disallowed) +{ + int advices[] = { + MADV_DONTNEED, + MADV_DONTNEED_LOCKED, + MADV_FREE, + MADV_WIPEONFORK, + MADV_COLD, + MADV_PAGEOUT, + MADV_POPULATE_READ, + MADV_POPULATE_WRITE, + }; + int i; + + /* All these advices must be rejected. */ + for (i = 0; i < ARRAY_SIZE(advices); i++) { + EXPECT_LT(madvise(self->addr1, self->pagesize, advices[i]), 0); + EXPECT_EQ(errno, EINVAL); + } +} + +TEST_F(pfnmap, munmap_split) +{ + /* + * Unmap the first page. This munmap() call is not really expected to + * fail, but we might be able to trigger other internal issues. + */ + ASSERT_EQ(munmap(self->addr1, self->pagesize), 0); + + /* + * Remap the first page while the second page is still mapped. This + * makes sure that any PAT tracking on x86 will allow for mmap()'ing + * a page again while some parts of the first mmap() are still + * around. + */ + self->size2 = self->pagesize; + self->addr2 = mmap(NULL, self->pagesize, PROT_READ, MAP_SHARED, + self->dev_mem_fd, 0); + ASSERT_NE(self->addr2, MAP_FAILED); +} + +TEST_F(pfnmap, mremap_fixed) +{ + char *ret; + + /* Reserve a destination area. */ + self->size2 = self->size1; + self->addr2 = mmap(NULL, self->size2, PROT_READ, MAP_ANON | MAP_PRIVATE, + -1, 0); + ASSERT_NE(self->addr2, MAP_FAILED); + + /* mremap() over our destination. */ + ret = mremap(self->addr1, self->size1, self->size2, + MREMAP_FIXED | MREMAP_MAYMOVE, self->addr2); + ASSERT_NE(ret, MAP_FAILED); +} + +TEST_F(pfnmap, mremap_shrink) +{ + char *ret; + + /* Shrinking is expected to work. */ + ret = mremap(self->addr1, self->size1, self->size1 - self->pagesize, 0); + ASSERT_NE(ret, MAP_FAILED); +} + +TEST_F(pfnmap, mremap_expand) +{ + /* + * Growing is not expected to work, and getting it right would + * be challenging. So this test primarily serves as an early warning + * that something that probably should never work suddenly works. + */ + self->size2 = self->size1 + self->pagesize; + self->addr2 = mremap(self->addr1, self->size1, self->size2, MREMAP_MAYMOVE); + ASSERT_EQ(self->addr2, MAP_FAILED); +} + +TEST_F(pfnmap, fork) +{ + pid_t pid; + int ret; + + /* fork() a child and test if the child can access the pages. */ + pid = fork(); + ASSERT_GE(pid, 0); + + if (!pid) { + EXPECT_EQ(test_read_access(self->addr1, self->size1, + self->pagesize), 0); + exit(0); + } + + wait(&ret); + if (WIFEXITED(ret)) + ret = WEXITSTATUS(ret); + else + ret = -EINVAL; + ASSERT_EQ(ret, 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 188b125bf1f6b..dddd1dd8af145 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -63,6 +63,8 @@ separated by spaces: test soft dirty page bit semantics - pagemap test pagemap_scan IOCTL +- pfnmap + tests for VM_PFNMAP handling - cow test copy-on-write semantics - thp @@ -472,6 +474,8 @@ fi
CATEGORY="pagemap" run_test ./pagemap_ioctl
+CATEGORY="pfnmap" run_test ./pfnmap + # COW tests CATEGORY="cow" run_test ./cow
On Fri, May 09, 2025 at 05:30:32PM +0200, David Hildenbrand wrote:
Let's test some basic functionality using /dev/mem. These tests will implicitly cover some PAT (Page Attribute Handling) handling on x86.
These tests will only run when /dev/mem access to the first two pages in physical address space is possible and allowed; otherwise, the tests are skipped.
On current x86-64 with PAT inside a VM, all tests pass:
TAP version 13 1..6 # Starting 6 tests from 1 test cases. # RUN pfnmap.madvise_disallowed ... # OK pfnmap.madvise_disallowed ok 1 pfnmap.madvise_disallowed # RUN pfnmap.munmap_split ... # OK pfnmap.munmap_split ok 2 pfnmap.munmap_split # RUN pfnmap.mremap_fixed ... # OK pfnmap.mremap_fixed ok 3 pfnmap.mremap_fixed # RUN pfnmap.mremap_shrink ... # OK pfnmap.mremap_shrink ok 4 pfnmap.mremap_shrink # RUN pfnmap.mremap_expand ... # OK pfnmap.mremap_expand ok 5 pfnmap.mremap_expand # RUN pfnmap.fork ... # OK pfnmap.fork ok 6 pfnmap.fork # PASSED: 6 / 6 tests passed. # Totals: pass:6 fail:0 xfail:0 xpass:0 skip:0 error:0
However, we are able to trigger:
[ 27.888251] x86/PAT: pfnmap:1790 freeing invalid memtype [mem 0x00000000-0x00000fff]
There are probably more things worth testing in the future, such as MAP_PRIVATE handling. But this set of tests is sufficient to cover most of the things we will rework regarding PAT handling.
Cc: Andrew Morton akpm@linux-foundation.org Cc: Shuah Khan shuah@kernel.org Cc: Lorenzo Stoakes lorenzo.stoakes@oracle.com Cc: Ingo Molnar mingo@redhat.com Cc: Peter Xu peterx@redhat.com Cc: Dev Jain dev.jain@arm.com Signed-off-by: David Hildenbrand david@redhat.com
Nice, big improvement!
Reviewed-by: Lorenzo Stoakes lorenzo.stoakes@oracle.com
Hopefully I didn't miss any review feedback.
All good afaict! :) the only thing would be to make the siglongjmp() conditional but it's not a big deal.
v1 -> v2:
- Rewrite using kselftest_harness, which simplifies a lot of things
- Add to .gitignore and run_vmtests.sh
- Register signal handler on demand
- Use volatile trick to force a read (not factoring out FORCE_READ just yet)
- Drop mprotect() test case
- Add some more comments why we test certain things
- Use NULL for mmap() first parameter instead of 0
- Smaller fixes
tools/testing/selftests/mm/.gitignore | 1 + tools/testing/selftests/mm/Makefile | 1 + tools/testing/selftests/mm/pfnmap.c | 196 ++++++++++++++++++++++ tools/testing/selftests/mm/run_vmtests.sh | 4 + 4 files changed, 202 insertions(+) create mode 100644 tools/testing/selftests/mm/pfnmap.c
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index 91db34941a143..824266982aa36 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -20,6 +20,7 @@ mremap_test on-fault-limit transhuge-stress pagemap_ioctl +pfnmap *.tmp* protection_keys protection_keys_32 diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index ad4d6043a60f0..ae6f994d3add7 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -84,6 +84,7 @@ TEST_GEN_FILES += mremap_test TEST_GEN_FILES += mseal_test TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += pagemap_ioctl +TEST_GEN_FILES += pfnmap TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += uffd-stress diff --git a/tools/testing/selftests/mm/pfnmap.c b/tools/testing/selftests/mm/pfnmap.c new file mode 100644 index 0000000000000..8a9d19b6020c7 --- /dev/null +++ b/tools/testing/selftests/mm/pfnmap.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0-only +/*
- Basic VM_PFNMAP tests relying on mmap() of '/dev/mem'
- Copyright 2025, Red Hat, Inc.
- Author(s): David Hildenbrand david@redhat.com
- */
+#define _GNU_SOURCE +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <setjmp.h> +#include <linux/mman.h> +#include <sys/mman.h> +#include <sys/wait.h>
+#include "../kselftest_harness.h" +#include "vm_util.h"
+static sigjmp_buf sigjmp_buf_env;
+static void signal_handler(int sig) +{
- siglongjmp(sigjmp_buf_env, -EFAULT);
+}
+static int test_read_access(char *addr, size_t size, size_t pagesize) +{
- size_t offs;
- int ret;
- if (signal(SIGSEGV, signal_handler) == SIG_ERR)
return -EINVAL;
- ret = sigsetjmp(sigjmp_buf_env, 1);
- if (!ret) {
for (offs = 0; offs < size; offs += pagesize)
/* Force a read that the compiler cannot optimize out. */
*((volatile char *)(addr + offs));
- }
- if (signal(SIGSEGV, signal_handler) == SIG_ERR)
return -EINVAL;
- return ret;
+}
+FIXTURE(pfnmap) +{
- size_t pagesize;
- int dev_mem_fd;
- char *addr1;
- size_t size1;
- char *addr2;
- size_t size2;
+};
+FIXTURE_SETUP(pfnmap) +{
- self->pagesize = getpagesize();
- self->dev_mem_fd = open("/dev/mem", O_RDONLY);
- if (self->dev_mem_fd < 0)
SKIP(return, "Cannot open '/dev/mem'\n");
- /* We'll require the first two pages throughout our tests ... */
- self->size1 = self->pagesize * 2;
- self->addr1 = mmap(NULL, self->size1, PROT_READ, MAP_SHARED,
self->dev_mem_fd, 0);
- if (self->addr1 == MAP_FAILED)
SKIP(return, "Cannot mmap '/dev/mem'\n");
- /* ... and want to be able to read from them. */
- if (test_read_access(self->addr1, self->size1, self->pagesize))
SKIP(return, "Cannot read-access mmap'ed '/dev/mem'\n");
- self->size2 = 0;
- self->addr2 = MAP_FAILED;
+}
+FIXTURE_TEARDOWN(pfnmap) +{
- if (self->addr2 != MAP_FAILED)
munmap(self->addr2, self->size2);
- if (self->addr1 != MAP_FAILED)
munmap(self->addr1, self->size1);
- if (self->dev_mem_fd >= 0)
close(self->dev_mem_fd);
+}
+TEST_F(pfnmap, madvise_disallowed) +{
- int advices[] = {
MADV_DONTNEED,
MADV_DONTNEED_LOCKED,
MADV_FREE,
MADV_WIPEONFORK,
MADV_COLD,
MADV_PAGEOUT,
MADV_POPULATE_READ,
MADV_POPULATE_WRITE,
- };
- int i;
- /* All these advices must be rejected. */
- for (i = 0; i < ARRAY_SIZE(advices); i++) {
EXPECT_LT(madvise(self->addr1, self->pagesize, advices[i]), 0);
EXPECT_EQ(errno, EINVAL);
- }
+}
+TEST_F(pfnmap, munmap_split) +{
- /*
* Unmap the first page. This munmap() call is not really expected to
* fail, but we might be able to trigger other internal issues.
*/
- ASSERT_EQ(munmap(self->addr1, self->pagesize), 0);
- /*
* Remap the first page while the second page is still mapped. This
* makes sure that any PAT tracking on x86 will allow for mmap()'ing
* a page again while some parts of the first mmap() are still
* around.
*/
- self->size2 = self->pagesize;
- self->addr2 = mmap(NULL, self->pagesize, PROT_READ, MAP_SHARED,
self->dev_mem_fd, 0);
- ASSERT_NE(self->addr2, MAP_FAILED);
+}
+TEST_F(pfnmap, mremap_fixed) +{
- char *ret;
- /* Reserve a destination area. */
- self->size2 = self->size1;
- self->addr2 = mmap(NULL, self->size2, PROT_READ, MAP_ANON | MAP_PRIVATE,
-1, 0);
- ASSERT_NE(self->addr2, MAP_FAILED);
- /* mremap() over our destination. */
- ret = mremap(self->addr1, self->size1, self->size2,
MREMAP_FIXED | MREMAP_MAYMOVE, self->addr2);
- ASSERT_NE(ret, MAP_FAILED);
+}
+TEST_F(pfnmap, mremap_shrink) +{
- char *ret;
- /* Shrinking is expected to work. */
- ret = mremap(self->addr1, self->size1, self->size1 - self->pagesize, 0);
- ASSERT_NE(ret, MAP_FAILED);
+}
+TEST_F(pfnmap, mremap_expand) +{
- /*
* Growing is not expected to work, and getting it right would
* be challenging. So this test primarily serves as an early warning
* that something that probably should never work suddenly works.
*/
- self->size2 = self->size1 + self->pagesize;
- self->addr2 = mremap(self->addr1, self->size1, self->size2, MREMAP_MAYMOVE);
- ASSERT_EQ(self->addr2, MAP_FAILED);
+}
+TEST_F(pfnmap, fork) +{
- pid_t pid;
- int ret;
- /* fork() a child and test if the child can access the pages. */
- pid = fork();
- ASSERT_GE(pid, 0);
- if (!pid) {
EXPECT_EQ(test_read_access(self->addr1, self->size1,
self->pagesize), 0);
exit(0);
- }
- wait(&ret);
- if (WIFEXITED(ret))
ret = WEXITSTATUS(ret);
- else
ret = -EINVAL;
- ASSERT_EQ(ret, 0);
+}
+TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 188b125bf1f6b..dddd1dd8af145 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -63,6 +63,8 @@ separated by spaces: test soft dirty page bit semantics
- pagemap test pagemap_scan IOCTL
+- pfnmap
- tests for VM_PFNMAP handling
- cow test copy-on-write semantics
- thp
@@ -472,6 +474,8 @@ fi CATEGORY="pagemap" run_test ./pagemap_ioctl +CATEGORY="pfnmap" run_test ./pfnmap
# COW tests CATEGORY="cow" run_test ./cow -- 2.49.0
On 09.05.25 17:55, Lorenzo Stoakes wrote:
On Fri, May 09, 2025 at 05:30:32PM +0200, David Hildenbrand wrote:
Let's test some basic functionality using /dev/mem. These tests will implicitly cover some PAT (Page Attribute Handling) handling on x86.
These tests will only run when /dev/mem access to the first two pages in physical address space is possible and allowed; otherwise, the tests are skipped.
On current x86-64 with PAT inside a VM, all tests pass:
TAP version 13 1..6 # Starting 6 tests from 1 test cases. # RUN pfnmap.madvise_disallowed ... # OK pfnmap.madvise_disallowed ok 1 pfnmap.madvise_disallowed # RUN pfnmap.munmap_split ... # OK pfnmap.munmap_split ok 2 pfnmap.munmap_split # RUN pfnmap.mremap_fixed ... # OK pfnmap.mremap_fixed ok 3 pfnmap.mremap_fixed # RUN pfnmap.mremap_shrink ... # OK pfnmap.mremap_shrink ok 4 pfnmap.mremap_shrink # RUN pfnmap.mremap_expand ... # OK pfnmap.mremap_expand ok 5 pfnmap.mremap_expand # RUN pfnmap.fork ... # OK pfnmap.fork ok 6 pfnmap.fork # PASSED: 6 / 6 tests passed. # Totals: pass:6 fail:0 xfail:0 xpass:0 skip:0 error:0
However, we are able to trigger:
[ 27.888251] x86/PAT: pfnmap:1790 freeing invalid memtype [mem 0x00000000-0x00000fff]
There are probably more things worth testing in the future, such as MAP_PRIVATE handling. But this set of tests is sufficient to cover most of the things we will rework regarding PAT handling.
Cc: Andrew Morton akpm@linux-foundation.org Cc: Shuah Khan shuah@kernel.org Cc: Lorenzo Stoakes lorenzo.stoakes@oracle.com Cc: Ingo Molnar mingo@redhat.com Cc: Peter Xu peterx@redhat.com Cc: Dev Jain dev.jain@arm.com Signed-off-by: David Hildenbrand david@redhat.com
Nice, big improvement!
Reviewed-by: Lorenzo Stoakes lorenzo.stoakes@oracle.com
Thanks! It was worth spending the time on using the harness.
The FIXTURE_TEARDOWN() stuff is really confusing. It's not actually required to teardown most stuff (unless you create files in setup etc), because all tests are executed in a fork'ed child, where fd's, mappings, ... will go away immediately afterwards during the exit().
I still implemented FIXTURE_TEARDOWN (like everybody else), because maybe the manual teardown can find other issues not triggered during exit().
On Mon, May 12, 2025 at 10:18:05AM +0200, David Hildenbrand wrote:
On 09.05.25 17:55, Lorenzo Stoakes wrote:
On Fri, May 09, 2025 at 05:30:32PM +0200, David Hildenbrand wrote:
Let's test some basic functionality using /dev/mem. These tests will implicitly cover some PAT (Page Attribute Handling) handling on x86.
These tests will only run when /dev/mem access to the first two pages in physical address space is possible and allowed; otherwise, the tests are skipped.
On current x86-64 with PAT inside a VM, all tests pass:
TAP version 13 1..6 # Starting 6 tests from 1 test cases. # RUN pfnmap.madvise_disallowed ... # OK pfnmap.madvise_disallowed ok 1 pfnmap.madvise_disallowed # RUN pfnmap.munmap_split ... # OK pfnmap.munmap_split ok 2 pfnmap.munmap_split # RUN pfnmap.mremap_fixed ... # OK pfnmap.mremap_fixed ok 3 pfnmap.mremap_fixed # RUN pfnmap.mremap_shrink ... # OK pfnmap.mremap_shrink ok 4 pfnmap.mremap_shrink # RUN pfnmap.mremap_expand ... # OK pfnmap.mremap_expand ok 5 pfnmap.mremap_expand # RUN pfnmap.fork ... # OK pfnmap.fork ok 6 pfnmap.fork # PASSED: 6 / 6 tests passed. # Totals: pass:6 fail:0 xfail:0 xpass:0 skip:0 error:0
However, we are able to trigger:
[ 27.888251] x86/PAT: pfnmap:1790 freeing invalid memtype [mem 0x00000000-0x00000fff]
There are probably more things worth testing in the future, such as MAP_PRIVATE handling. But this set of tests is sufficient to cover most of the things we will rework regarding PAT handling.
Cc: Andrew Morton akpm@linux-foundation.org Cc: Shuah Khan shuah@kernel.org Cc: Lorenzo Stoakes lorenzo.stoakes@oracle.com Cc: Ingo Molnar mingo@redhat.com Cc: Peter Xu peterx@redhat.com Cc: Dev Jain dev.jain@arm.com Signed-off-by: David Hildenbrand david@redhat.com
Nice, big improvement!
Reviewed-by: Lorenzo Stoakes lorenzo.stoakes@oracle.com
Thanks! It was worth spending the time on using the harness.
The FIXTURE_TEARDOWN() stuff is really confusing. It's not actually required to teardown most stuff (unless you create files in setup etc), because all tests are executed in a fork'ed child, where fd's, mappings, ... will go away immediately afterwards during the exit().
Yeah, it's maybe not always necessary, but stil handy, and at least allows for strict cleanup/separation between tests.
And having things structured this way makes life much easier in many other regards, as you have one place for it, you don't have to manually fiddle with test counts etc. etc.
Overall I think it's a big win :)
I still implemented FIXTURE_TEARDOWN (like everybody else), because maybe the manual teardown can find other issues not triggered during exit().
Ack!
-- Cheers,
David / dhildenb
Cheers, Lorenzo
Hi David,
On 09/05/2025 16:30, David Hildenbrand wrote:
Let's test some basic functionality using /dev/mem. These tests will implicitly cover some PAT (Page Attribute Handling) handling on x86.
These tests will only run when /dev/mem access to the first two pages in physical address space is possible and allowed; otherwise, the tests are skipped.
We are seeing really horrible RAS errors with this test when run on arm64 tx2 machine. Based solely on reviewing the code, I think the problem is that tx2 doesn't have anything at phys address 0, so test_read_access() is trying to put trasactions out to a bad address on the bus.
tx2 /proc/iomem:
$ sudo cat /proc/iomem 30000000-37ffffff : PCI ECAM 38000000-3fffffff : PCI ECAM 40000000-5fffffff : PCI Bus 0000:00 ...
Whereas my x86 box has some reserved memory:
$ sudo cat /proc/iomem 00000000-00000fff : Reserved 00001000-0003dfff : System RAM ...
I think perhaps the only safe way to handle this is to parse /proc/iomem for a region of "System RAM" that is at least 2 pages then use that for your read tests. This would also solve the hypothetical issue of reading something that has read size effects.
I also spotted a few nits while reading the code...
On current x86-64 with PAT inside a VM, all tests pass:
TAP version 13 1..6 # Starting 6 tests from 1 test cases. # RUN pfnmap.madvise_disallowed ... # OK pfnmap.madvise_disallowed ok 1 pfnmap.madvise_disallowed # RUN pfnmap.munmap_split ... # OK pfnmap.munmap_split ok 2 pfnmap.munmap_split # RUN pfnmap.mremap_fixed ... # OK pfnmap.mremap_fixed ok 3 pfnmap.mremap_fixed # RUN pfnmap.mremap_shrink ... # OK pfnmap.mremap_shrink ok 4 pfnmap.mremap_shrink # RUN pfnmap.mremap_expand ... # OK pfnmap.mremap_expand ok 5 pfnmap.mremap_expand # RUN pfnmap.fork ... # OK pfnmap.fork ok 6 pfnmap.fork # PASSED: 6 / 6 tests passed. # Totals: pass:6 fail:0 xfail:0 xpass:0 skip:0 error:0
However, we are able to trigger:
[ 27.888251] x86/PAT: pfnmap:1790 freeing invalid memtype [mem 0x00000000-0x00000fff]
There are probably more things worth testing in the future, such as MAP_PRIVATE handling. But this set of tests is sufficient to cover most of the things we will rework regarding PAT handling.
Cc: Andrew Morton akpm@linux-foundation.org Cc: Shuah Khan shuah@kernel.org Cc: Lorenzo Stoakes lorenzo.stoakes@oracle.com Cc: Ingo Molnar mingo@redhat.com Cc: Peter Xu peterx@redhat.com Cc: Dev Jain dev.jain@arm.com Signed-off-by: David Hildenbrand david@redhat.com
Hopefully I didn't miss any review feedback.
v1 -> v2:
- Rewrite using kselftest_harness, which simplifies a lot of things
- Add to .gitignore and run_vmtests.sh
- Register signal handler on demand
- Use volatile trick to force a read (not factoring out FORCE_READ just yet)
- Drop mprotect() test case
- Add some more comments why we test certain things
- Use NULL for mmap() first parameter instead of 0
- Smaller fixes
tools/testing/selftests/mm/.gitignore | 1 + tools/testing/selftests/mm/Makefile | 1 + tools/testing/selftests/mm/pfnmap.c | 196 ++++++++++++++++++++++ tools/testing/selftests/mm/run_vmtests.sh | 4 + 4 files changed, 202 insertions(+) create mode 100644 tools/testing/selftests/mm/pfnmap.c
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index 91db34941a143..824266982aa36 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -20,6 +20,7 @@ mremap_test on-fault-limit transhuge-stress pagemap_ioctl +pfnmap *.tmp* protection_keys protection_keys_32 diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index ad4d6043a60f0..ae6f994d3add7 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -84,6 +84,7 @@ TEST_GEN_FILES += mremap_test TEST_GEN_FILES += mseal_test TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += pagemap_ioctl +TEST_GEN_FILES += pfnmap TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += uffd-stress diff --git a/tools/testing/selftests/mm/pfnmap.c b/tools/testing/selftests/mm/pfnmap.c new file mode 100644 index 0000000000000..8a9d19b6020c7 --- /dev/null +++ b/tools/testing/selftests/mm/pfnmap.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0-only +/*
- Basic VM_PFNMAP tests relying on mmap() of '/dev/mem'
- Copyright 2025, Red Hat, Inc.
- Author(s): David Hildenbrand david@redhat.com
- */
+#define _GNU_SOURCE +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <setjmp.h> +#include <linux/mman.h> +#include <sys/mman.h> +#include <sys/wait.h>
+#include "../kselftest_harness.h" +#include "vm_util.h"
+static sigjmp_buf sigjmp_buf_env;
+static void signal_handler(int sig) +{
- siglongjmp(sigjmp_buf_env, -EFAULT);
+}
+static int test_read_access(char *addr, size_t size, size_t pagesize) +{
- size_t offs;
- int ret;
- if (signal(SIGSEGV, signal_handler) == SIG_ERR)
return -EINVAL;
- ret = sigsetjmp(sigjmp_buf_env, 1);
- if (!ret) {
for (offs = 0; offs < size; offs += pagesize)
/* Force a read that the compiler cannot optimize out. */
*((volatile char *)(addr + offs));
- }
- if (signal(SIGSEGV, signal_handler) == SIG_ERR)
I think you mean: if (signal(SIGSEGV, SIG_DFL) == SIG_ERR)
Otherwise your signal_handler will remain installed, right?
return -EINVAL;
- return ret;
+}
+FIXTURE(pfnmap) +{
- size_t pagesize;
- int dev_mem_fd;
- char *addr1;
- size_t size1;
- char *addr2;
- size_t size2;
+};
+FIXTURE_SETUP(pfnmap) +{
- self->pagesize = getpagesize();
- self->dev_mem_fd = open("/dev/mem", O_RDONLY);
- if (self->dev_mem_fd < 0)
SKIP(return, "Cannot open '/dev/mem'\n");
- /* We'll require the first two pages throughout our tests ... */
- self->size1 = self->pagesize * 2;
- self->addr1 = mmap(NULL, self->size1, PROT_READ, MAP_SHARED,
self->dev_mem_fd, 0);
- if (self->addr1 == MAP_FAILED)
SKIP(return, "Cannot mmap '/dev/mem'\n");
- /* ... and want to be able to read from them. */
- if (test_read_access(self->addr1, self->size1, self->pagesize))
SKIP(return, "Cannot read-access mmap'ed '/dev/mem'\n");
- self->size2 = 0;
- self->addr2 = MAP_FAILED;
Do you need to init all the params in FIXTURE(pfnmap) to their "safe" values before any possible early returns? We don't want FIXTURE_TEARDOWN(pfnmap) getting confused.
Thanks, Ryan
+}
+FIXTURE_TEARDOWN(pfnmap) +{
- if (self->addr2 != MAP_FAILED)
munmap(self->addr2, self->size2);
- if (self->addr1 != MAP_FAILED)
munmap(self->addr1, self->size1);
- if (self->dev_mem_fd >= 0)
close(self->dev_mem_fd);
+}
+TEST_F(pfnmap, madvise_disallowed) +{
- int advices[] = {
MADV_DONTNEED,
MADV_DONTNEED_LOCKED,
MADV_FREE,
MADV_WIPEONFORK,
MADV_COLD,
MADV_PAGEOUT,
MADV_POPULATE_READ,
MADV_POPULATE_WRITE,
- };
- int i;
- /* All these advices must be rejected. */
- for (i = 0; i < ARRAY_SIZE(advices); i++) {
EXPECT_LT(madvise(self->addr1, self->pagesize, advices[i]), 0);
EXPECT_EQ(errno, EINVAL);
- }
+}
+TEST_F(pfnmap, munmap_split) +{
- /*
* Unmap the first page. This munmap() call is not really expected to
* fail, but we might be able to trigger other internal issues.
*/
- ASSERT_EQ(munmap(self->addr1, self->pagesize), 0);
- /*
* Remap the first page while the second page is still mapped. This
* makes sure that any PAT tracking on x86 will allow for mmap()'ing
* a page again while some parts of the first mmap() are still
* around.
*/
- self->size2 = self->pagesize;
- self->addr2 = mmap(NULL, self->pagesize, PROT_READ, MAP_SHARED,
self->dev_mem_fd, 0);
- ASSERT_NE(self->addr2, MAP_FAILED);
+}
+TEST_F(pfnmap, mremap_fixed) +{
- char *ret;
- /* Reserve a destination area. */
- self->size2 = self->size1;
- self->addr2 = mmap(NULL, self->size2, PROT_READ, MAP_ANON | MAP_PRIVATE,
-1, 0);
- ASSERT_NE(self->addr2, MAP_FAILED);
- /* mremap() over our destination. */
- ret = mremap(self->addr1, self->size1, self->size2,
MREMAP_FIXED | MREMAP_MAYMOVE, self->addr2);
- ASSERT_NE(ret, MAP_FAILED);
+}
+TEST_F(pfnmap, mremap_shrink) +{
- char *ret;
- /* Shrinking is expected to work. */
- ret = mremap(self->addr1, self->size1, self->size1 - self->pagesize, 0);
- ASSERT_NE(ret, MAP_FAILED);
+}
+TEST_F(pfnmap, mremap_expand) +{
- /*
* Growing is not expected to work, and getting it right would
* be challenging. So this test primarily serves as an early warning
* that something that probably should never work suddenly works.
*/
- self->size2 = self->size1 + self->pagesize;
- self->addr2 = mremap(self->addr1, self->size1, self->size2, MREMAP_MAYMOVE);
- ASSERT_EQ(self->addr2, MAP_FAILED);
+}
+TEST_F(pfnmap, fork) +{
- pid_t pid;
- int ret;
- /* fork() a child and test if the child can access the pages. */
- pid = fork();
- ASSERT_GE(pid, 0);
- if (!pid) {
EXPECT_EQ(test_read_access(self->addr1, self->size1,
self->pagesize), 0);
exit(0);
- }
- wait(&ret);
- if (WIFEXITED(ret))
ret = WEXITSTATUS(ret);
- else
ret = -EINVAL;
- ASSERT_EQ(ret, 0);
+}
+TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 188b125bf1f6b..dddd1dd8af145 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -63,6 +63,8 @@ separated by spaces: test soft dirty page bit semantics
- pagemap test pagemap_scan IOCTL
+- pfnmap
- tests for VM_PFNMAP handling
- cow test copy-on-write semantics
- thp
@@ -472,6 +474,8 @@ fi CATEGORY="pagemap" run_test ./pagemap_ioctl +CATEGORY="pfnmap" run_test ./pfnmap
# COW tests CATEGORY="cow" run_test ./cow
On 28.05.25 12:34, Ryan Roberts wrote:
Hi David,
On 09/05/2025 16:30, David Hildenbrand wrote:
Let's test some basic functionality using /dev/mem. These tests will implicitly cover some PAT (Page Attribute Handling) handling on x86.
These tests will only run when /dev/mem access to the first two pages in physical address space is possible and allowed; otherwise, the tests are skipped.
We are seeing really horrible RAS errors with this test when run on arm64 tx2 machine. Based solely on reviewing the code, I think the problem is that tx2 doesn't have anything at phys address 0, so test_read_access() is trying to put trasactions out to a bad address on the bus.
tx2 /proc/iomem:
$ sudo cat /proc/iomem 30000000-37ffffff : PCI ECAM 38000000-3fffffff : PCI ECAM 40000000-5fffffff : PCI Bus 0000:00 ...
Whereas my x86 box has some reserved memory:
$ sudo cat /proc/iomem 00000000-00000fff : Reserved 00001000-0003dfff : System RAM ...
A quick fix would be to make this test specific to x86 (the only one I tested on). We should always have the lower two pages IIRC (BIOS stuff etc).
I think perhaps the only safe way to handle this is to parse /proc/iomem for a region of "System RAM" that is at least 2 pages then use that for your read tests. This would also solve the hypothetical issue of reading something that has read size effects.
That sounds also plausible yes. I somehow remembered that mmap() would fail if "there is nothing".
I also spotted a few nits while reading the code...
On current x86-64 with PAT inside a VM, all tests pass:
TAP version 13 1..6 # Starting 6 tests from 1 test cases. # RUN pfnmap.madvise_disallowed ... # OK pfnmap.madvise_disallowed ok 1 pfnmap.madvise_disallowed # RUN pfnmap.munmap_split ... # OK pfnmap.munmap_split ok 2 pfnmap.munmap_split # RUN pfnmap.mremap_fixed ... # OK pfnmap.mremap_fixed ok 3 pfnmap.mremap_fixed # RUN pfnmap.mremap_shrink ... # OK pfnmap.mremap_shrink ok 4 pfnmap.mremap_shrink # RUN pfnmap.mremap_expand ... # OK pfnmap.mremap_expand ok 5 pfnmap.mremap_expand # RUN pfnmap.fork ... # OK pfnmap.fork ok 6 pfnmap.fork # PASSED: 6 / 6 tests passed. # Totals: pass:6 fail:0 xfail:0 xpass:0 skip:0 error:0
However, we are able to trigger:
[ 27.888251] x86/PAT: pfnmap:1790 freeing invalid memtype [mem 0x00000000-0x00000fff]
There are probably more things worth testing in the future, such
as>> MAP_PRIVATE handling. But this set of tests is sufficient to cover most of
the things we will rework regarding PAT handling.
Cc: Andrew Morton akpm@linux-foundation.org Cc: Shuah Khan shuah@kernel.org Cc: Lorenzo Stoakes lorenzo.stoakes@oracle.com Cc: Ingo Molnar mingo@redhat.com Cc: Peter Xu peterx@redhat.com Cc: Dev Jain dev.jain@arm.com Signed-off-by: David Hildenbrand david@redhat.com
Hopefully I didn't miss any review feedback.
v1 -> v2:
- Rewrite using kselftest_harness, which simplifies a lot of things
- Add to .gitignore and run_vmtests.sh
- Register signal handler on demand
- Use volatile trick to force a read (not factoring out FORCE_READ just yet)
- Drop mprotect() test case
- Add some more comments why we test certain things
- Use NULL for mmap() first parameter instead of 0
- Smaller fixes
tools/testing/selftests/mm/.gitignore | 1 + tools/testing/selftests/mm/Makefile | 1 + tools/testing/selftests/mm/pfnmap.c | 196 ++++++++++++++++++++++ tools/testing/selftests/mm/run_vmtests.sh | 4 + 4 files changed, 202 insertions(+) create mode 100644 tools/testing/selftests/mm/pfnmap.c
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index 91db34941a143..824266982aa36 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -20,6 +20,7 @@ mremap_test on-fault-limit transhuge-stress pagemap_ioctl +pfnmap *.tmp* protection_keys protection_keys_32 diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index ad4d6043a60f0..ae6f994d3add7 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -84,6 +84,7 @@ TEST_GEN_FILES += mremap_test TEST_GEN_FILES += mseal_test TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += pagemap_ioctl +TEST_GEN_FILES += pfnmap TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += uffd-stress diff --git a/tools/testing/selftests/mm/pfnmap.c b/tools/testing/selftests/mm/pfnmap.c new file mode 100644 index 0000000000000..8a9d19b6020c7 --- /dev/null +++ b/tools/testing/selftests/mm/pfnmap.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0-only +/*
- Basic VM_PFNMAP tests relying on mmap() of '/dev/mem'
- Copyright 2025, Red Hat, Inc.
- Author(s): David Hildenbrand david@redhat.com
- */
+#define _GNU_SOURCE +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <setjmp.h> +#include <linux/mman.h> +#include <sys/mman.h> +#include <sys/wait.h>
+#include "../kselftest_harness.h" +#include "vm_util.h"
+static sigjmp_buf sigjmp_buf_env;
+static void signal_handler(int sig) +{
- siglongjmp(sigjmp_buf_env, -EFAULT);
+}
+static int test_read_access(char *addr, size_t size, size_t pagesize) +{
- size_t offs;
- int ret;
- if (signal(SIGSEGV, signal_handler) == SIG_ERR)
return -EINVAL;
- ret = sigsetjmp(sigjmp_buf_env, 1);
- if (!ret) {
for (offs = 0; offs < size; offs += pagesize)
/* Force a read that the compiler cannot optimize out. */
*((volatile char *)(addr + offs));
- }
- if (signal(SIGSEGV, signal_handler) == SIG_ERR)
I think you mean: if (signal(SIGSEGV, SIG_DFL) == SIG_ERR)
Otherwise your signal_handler will remain installed, right?
Yeah, copy and past error.
return -EINVAL;
- return ret;
+}
+FIXTURE(pfnmap) +{
- size_t pagesize;
- int dev_mem_fd;
- char *addr1;
- size_t size1;
- char *addr2;
- size_t size2;
+};
+FIXTURE_SETUP(pfnmap) +{
- self->pagesize = getpagesize();
- self->dev_mem_fd = open("/dev/mem", O_RDONLY);
- if (self->dev_mem_fd < 0)
SKIP(return, "Cannot open '/dev/mem'\n");
- /* We'll require the first two pages throughout our tests ... */
- self->size1 = self->pagesize * 2;
- self->addr1 = mmap(NULL, self->size1, PROT_READ, MAP_SHARED,
self->dev_mem_fd, 0);
- if (self->addr1 == MAP_FAILED)
SKIP(return, "Cannot mmap '/dev/mem'\n");
- /* ... and want to be able to read from them. */
- if (test_read_access(self->addr1, self->size1, self->pagesize))
SKIP(return, "Cannot read-access mmap'ed '/dev/mem'\n");
- self->size2 = 0;
- self->addr2 = MAP_FAILED;
Do you need to init all the params in FIXTURE(pfnmap) to their "safe" values before any possible early returns? We don't want FIXTURE_TEARDOWN(pfnmap) getting confused.
If FIXTURE_SETUP fails, we'll exit immediately. See __TEST_F_IMPL().
Note that all tests are executed in a fork'ed child process, so quitting early (or not even having FIXTURE_TEARDOWN in most cases ...) does not really matter.
On 28.05.25 12:44, David Hildenbrand wrote:
On 28.05.25 12:34, Ryan Roberts wrote:
Hi David,
On 09/05/2025 16:30, David Hildenbrand wrote:
Let's test some basic functionality using /dev/mem. These tests will implicitly cover some PAT (Page Attribute Handling) handling on x86.
These tests will only run when /dev/mem access to the first two pages in physical address space is possible and allowed; otherwise, the tests are skipped.
We are seeing really horrible RAS errors with this test when run on arm64 tx2 machine. Based solely on reviewing the code, I think the problem is that tx2 doesn't have anything at phys address 0, so test_read_access() is trying to put trasactions out to a bad address on the bus.
tx2 /proc/iomem:
$ sudo cat /proc/iomem 30000000-37ffffff : PCI ECAM 38000000-3fffffff : PCI ECAM 40000000-5fffffff : PCI Bus 0000:00 ...
Whereas my x86 box has some reserved memory:
$ sudo cat /proc/iomem 00000000-00000fff : Reserved 00001000-0003dfff : System RAM ...
A quick fix would be to make this test specific to x86 (the only one I tested on). We should always have the lower two pages IIRC (BIOS stuff etc).
I think perhaps the only safe way to handle this is to parse /proc/iomem for a region of "System RAM" that is at least 2 pages then use that for your read tests. This would also solve the hypothetical issue of reading something that has read size effects.
That sounds also plausible yes. I somehow remembered that mmap() would fail if "there is nothing".
Ah, my memory comes back, we perform checks only with CONFIG_STRICT_DEVMEM.
On 28/05/2025 11:48, David Hildenbrand wrote:
On 28.05.25 12:44, David Hildenbrand wrote:
On 28.05.25 12:34, Ryan Roberts wrote:
Hi David,
On 09/05/2025 16:30, David Hildenbrand wrote:
Let's test some basic functionality using /dev/mem. These tests will implicitly cover some PAT (Page Attribute Handling) handling on x86.
These tests will only run when /dev/mem access to the first two pages in physical address space is possible and allowed; otherwise, the tests are skipped.
We are seeing really horrible RAS errors with this test when run on arm64 tx2 machine. Based solely on reviewing the code, I think the problem is that tx2 doesn't have anything at phys address 0, so test_read_access() is trying to put trasactions out to a bad address on the bus.
tx2 /proc/iomem:
$ sudo cat /proc/iomem 30000000-37ffffff : PCI ECAM 38000000-3fffffff : PCI ECAM 40000000-5fffffff : PCI Bus 0000:00 ...
Whereas my x86 box has some reserved memory:
$ sudo cat /proc/iomem 00000000-00000fff : Reserved 00001000-0003dfff : System RAM ...
A quick fix would be to make this test specific to x86 (the only one I tested on). We should always have the lower two pages IIRC (BIOS stuff etc).
I'm not sure how far along this patch is? I'm guessing mm-stable? Perhaps you can do the quick fix, then I'd be happy to make this more robust for arm64 later?
I think perhaps the only safe way to handle this is to parse /proc/iomem for a region of "System RAM" that is at least 2 pages then use that for your read tests. This would also solve the hypothetical issue of reading something that has read size effects.
That sounds also plausible yes. I somehow remembered that mmap() would fail if "there is nothing".
Ah, my memory comes back, we perform checks only with CONFIG_STRICT_DEVMEM.
Ahh makes sense. I guess our config doesn't include this. I just checked the RAS error and it is for PA 0. So I'm confident that what I describe above is definitely what is happening.
On 28.05.25 12:53, Ryan Roberts wrote:
On 28/05/2025 11:48, David Hildenbrand wrote:
On 28.05.25 12:44, David Hildenbrand wrote:
On 28.05.25 12:34, Ryan Roberts wrote:
Hi David,
On 09/05/2025 16:30, David Hildenbrand wrote:
Let's test some basic functionality using /dev/mem. These tests will implicitly cover some PAT (Page Attribute Handling) handling on x86.
These tests will only run when /dev/mem access to the first two pages in physical address space is possible and allowed; otherwise, the tests are skipped.
We are seeing really horrible RAS errors with this test when run on arm64 tx2 machine. Based solely on reviewing the code, I think the problem is that tx2 doesn't have anything at phys address 0, so test_read_access() is trying to put trasactions out to a bad address on the bus.
tx2 /proc/iomem:
$ sudo cat /proc/iomem 30000000-37ffffff : PCI ECAM 38000000-3fffffff : PCI ECAM 40000000-5fffffff : PCI Bus 0000:00 ...
Whereas my x86 box has some reserved memory:
$ sudo cat /proc/iomem 00000000-00000fff : Reserved 00001000-0003dfff : System RAM ...
A quick fix would be to make this test specific to x86 (the only one I tested on). We should always have the lower two pages IIRC (BIOS stuff etc).
I'm not sure how far along this patch is? I'm guessing mm-stable? Perhaps you can do the quick fix, then I'd be happy to make this more robust for arm64 later?
Already hacking on the parsing :)
On 28.05.25 12:53, Ryan Roberts wrote:
On 28/05/2025 11:48, David Hildenbrand wrote:
On 28.05.25 12:44, David Hildenbrand wrote:
On 28.05.25 12:34, Ryan Roberts wrote:
Hi David,
On 09/05/2025 16:30, David Hildenbrand wrote:
Let's test some basic functionality using /dev/mem. These tests will implicitly cover some PAT (Page Attribute Handling) handling on x86.
These tests will only run when /dev/mem access to the first two pages in physical address space is possible and allowed; otherwise, the tests are skipped.
We are seeing really horrible RAS errors with this test when run on arm64 tx2 machine. Based solely on reviewing the code, I think the problem is that tx2 doesn't have anything at phys address 0, so test_read_access() is trying to put trasactions out to a bad address on the bus.
tx2 /proc/iomem:
$ sudo cat /proc/iomem 30000000-37ffffff : PCI ECAM 38000000-3fffffff : PCI ECAM 40000000-5fffffff : PCI Bus 0000:00 ...
Whereas my x86 box has some reserved memory:
$ sudo cat /proc/iomem 00000000-00000fff : Reserved 00001000-0003dfff : System RAM ...
A quick fix would be to make this test specific to x86 (the only one I tested on). We should always have the lower two pages IIRC (BIOS stuff etc).
I'm not sure how far along this patch is? I'm guessing mm-stable? Perhaps you can do the quick fix, then I'd be happy to make this more robust for arm64 later?
Can you give the following a quick test on that machine? Then, I can send it as a proper patch later.
From 40fea063f2fcf1474fb47cb9aebdb04fd825032b Mon Sep 17 00:00:00 2001 From: David Hildenbrand david@redhat.com Date: Wed, 28 May 2025 14:35:23 +0200 Subject: [PATCH] selftests/mm: two fixes for the pfnmap test
When unregistering the signal handler, we have to pass SIG_DFL, and blindly reading from PFN 0 and PFN 1 seems to be problematic on !x86 systems. In particularly, on arm64 tx2 machines where noting resides at these physical memory locations, we can generate RAS errors.
Let's fix it by scanning /proc/iomem for actual "System RAM".
Reported-by: Ryan Roberts ryan.roberts@arm.com Closes: https://lore.kernel.org/all/232960c2-81db-47ca-a337-38c4bce5f997@arm.com/T/#... Fixes: 2616b370323a ("selftests/mm: add simple VM_PFNMAP tests based on mmap'ing /dev/mem") Signed-off-by: David Hildenbrand david@redhat.com --- tools/testing/selftests/mm/pfnmap.c | 61 +++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/mm/pfnmap.c b/tools/testing/selftests/mm/pfnmap.c index 8a9d19b6020c7..4943927a7d1ea 100644 --- a/tools/testing/selftests/mm/pfnmap.c +++ b/tools/testing/selftests/mm/pfnmap.c @@ -12,6 +12,8 @@ #include <stdint.h> #include <unistd.h> #include <errno.h> +#include <stdio.h> +#include <ctype.h> #include <fcntl.h> #include <signal.h> #include <setjmp.h> @@ -43,14 +45,62 @@ static int test_read_access(char *addr, size_t size, size_t pagesize) /* Force a read that the compiler cannot optimize out. */ *((volatile char *)(addr + offs)); } - if (signal(SIGSEGV, signal_handler) == SIG_ERR) + if (signal(SIGSEGV, SIG_DFL) == SIG_ERR) return -EINVAL;
return ret; }
+static int find_ram_target(off_t *phys_addr, + unsigned long pagesize) +{ + unsigned long long start, end; + char line[80], *end_ptr; + FILE *file; + + /* Search /proc/iomem for the first suitable "System RAM" range. */ + file = fopen("/proc/iomem", "r"); + if (!file) + return -errno; + + while (fgets(line, sizeof(line), file)) { + /* Ignore any child nodes. */ + if (!isalnum(line[0])) + continue; + + if (!strstr(line, "System RAM\n")) + continue; + + start = strtoull(line, &end_ptr, 16); + /* Skip over the "-" */ + end_ptr++; + /* Make end "exclusive". */ + end = strtoull(end_ptr, NULL, 16) + 1; + + /* Actual addresses are not exported */ + if (!start && !end) + break; + + /* We need full pages. */ + start = (start + pagesize - 1) & ~(pagesize - 1); + end &= ~(pagesize - 1); + + if (start != (off_t)start) + break; + + /* We need two pages. */ + if (end > start + 2 * pagesize) { + fclose(file); + *phys_addr = start; + return 0; + } + } + return -ENOENT; +} + FIXTURE(pfnmap) { + off_t phys_addr; size_t pagesize; int dev_mem_fd; char *addr1; @@ -63,14 +113,17 @@ FIXTURE_SETUP(pfnmap) { self->pagesize = getpagesize();
+ /* We'll require two physical pages throughout our tests ... */ + if (find_ram_target(&self->phys_addr, self->pagesize)) + SKIP(return, "Cannot find ram target in '/dev/iomem'\n"); + self->dev_mem_fd = open("/dev/mem", O_RDONLY); if (self->dev_mem_fd < 0) SKIP(return, "Cannot open '/dev/mem'\n");
- /* We'll require the first two pages throughout our tests ... */ self->size1 = self->pagesize * 2; self->addr1 = mmap(NULL, self->size1, PROT_READ, MAP_SHARED, - self->dev_mem_fd, 0); + self->dev_mem_fd, self->phys_addr); if (self->addr1 == MAP_FAILED) SKIP(return, "Cannot mmap '/dev/mem'\n");
@@ -129,7 +182,7 @@ TEST_F(pfnmap, munmap_split) */ self->size2 = self->pagesize; self->addr2 = mmap(NULL, self->pagesize, PROT_READ, MAP_SHARED, - self->dev_mem_fd, 0); + self->dev_mem_fd, self->phys_addr); ASSERT_NE(self->addr2, MAP_FAILED); }
On 28/05/2025 13:40, David Hildenbrand wrote:
On 28.05.25 12:53, Ryan Roberts wrote:
On 28/05/2025 11:48, David Hildenbrand wrote:
On 28.05.25 12:44, David Hildenbrand wrote:
On 28.05.25 12:34, Ryan Roberts wrote:
Hi David,
On 09/05/2025 16:30, David Hildenbrand wrote:
Let's test some basic functionality using /dev/mem. These tests will implicitly cover some PAT (Page Attribute Handling) handling on x86.
These tests will only run when /dev/mem access to the first two pages in physical address space is possible and allowed; otherwise, the tests are skipped.
We are seeing really horrible RAS errors with this test when run on arm64 tx2 machine. Based solely on reviewing the code, I think the problem is that tx2 doesn't have anything at phys address 0, so test_read_access() is trying to put trasactions out to a bad address on the bus.
tx2 /proc/iomem:
$ sudo cat /proc/iomem 30000000-37ffffff : PCI ECAM 38000000-3fffffff : PCI ECAM 40000000-5fffffff : PCI Bus 0000:00 ...
Whereas my x86 box has some reserved memory:
$ sudo cat /proc/iomem 00000000-00000fff : Reserved 00001000-0003dfff : System RAM ...
A quick fix would be to make this test specific to x86 (the only one I tested on). We should always have the lower two pages IIRC (BIOS stuff etc).
I'm not sure how far along this patch is? I'm guessing mm-stable? Perhaps you can do the quick fix, then I'd be happy to make this more robust for arm64 later?
Can you give the following a quick test on that machine? Then, I can send it as a proper patch later.
The machine in question is part of our CI infra, so not easy for me to run an ad-hoc test. I've asked Aishwarya if it's possible to queue up a CI job with the patch, but that will involve running the whole test run I think, so probably will take a couple of days to turn around.
FWIW, the change looks good to me:
Reviewed-by: Ryan Roberts ryan.roberts@arm.com
From 40fea063f2fcf1474fb47cb9aebdb04fd825032b Mon Sep 17 00:00:00 2001 From: David Hildenbrand david@redhat.com Date: Wed, 28 May 2025 14:35:23 +0200 Subject: [PATCH] selftests/mm: two fixes for the pfnmap test
When unregistering the signal handler, we have to pass SIG_DFL, and blindly reading from PFN 0 and PFN 1 seems to be problematic on !x86 systems. In particularly, on arm64 tx2 machines where noting resides at these physical memory locations, we can generate RAS errors.
Let's fix it by scanning /proc/iomem for actual "System RAM".
Reported-by: Ryan Roberts ryan.roberts@arm.com Closes: https://lore.kernel.org/all/232960c2-81db-47ca- a337-38c4bce5f997@arm.com/T/#u Fixes: 2616b370323a ("selftests/mm: add simple VM_PFNMAP tests based on mmap'ing /dev/mem") Signed-off-by: David Hildenbrand david@redhat.com
tools/testing/selftests/mm/pfnmap.c | 61 +++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/mm/pfnmap.c b/tools/testing/selftests/mm/ pfnmap.c index 8a9d19b6020c7..4943927a7d1ea 100644 --- a/tools/testing/selftests/mm/pfnmap.c +++ b/tools/testing/selftests/mm/pfnmap.c @@ -12,6 +12,8 @@ #include <stdint.h> #include <unistd.h> #include <errno.h> +#include <stdio.h> +#include <ctype.h> #include <fcntl.h> #include <signal.h> #include <setjmp.h> @@ -43,14 +45,62 @@ static int test_read_access(char *addr, size_t size, size_t pagesize) /* Force a read that the compiler cannot optimize out. */ *((volatile char *)(addr + offs)); } - if (signal(SIGSEGV, signal_handler) == SIG_ERR) + if (signal(SIGSEGV, SIG_DFL) == SIG_ERR) return -EINVAL; return ret; } +static int find_ram_target(off_t *phys_addr, + unsigned long pagesize) +{ + unsigned long long start, end; + char line[80], *end_ptr; + FILE *file;
+ /* Search /proc/iomem for the first suitable "System RAM" range. */ + file = fopen("/proc/iomem", "r"); + if (!file) + return -errno;
+ while (fgets(line, sizeof(line), file)) { + /* Ignore any child nodes. */ + if (!isalnum(line[0])) + continue;
+ if (!strstr(line, "System RAM\n")) + continue;
+ start = strtoull(line, &end_ptr, 16); + /* Skip over the "-" */ + end_ptr++; + /* Make end "exclusive". */ + end = strtoull(end_ptr, NULL, 16) + 1;
+ /* Actual addresses are not exported */ + if (!start && !end) + break;
+ /* We need full pages. */ + start = (start + pagesize - 1) & ~(pagesize - 1); + end &= ~(pagesize - 1);
+ if (start != (off_t)start) + break;
+ /* We need two pages. */ + if (end > start + 2 * pagesize) { + fclose(file); + *phys_addr = start; + return 0; + } + } + return -ENOENT; +}
FIXTURE(pfnmap) { + off_t phys_addr; size_t pagesize; int dev_mem_fd; char *addr1; @@ -63,14 +113,17 @@ FIXTURE_SETUP(pfnmap) { self->pagesize = getpagesize(); + /* We'll require two physical pages throughout our tests ... */ + if (find_ram_target(&self->phys_addr, self->pagesize)) + SKIP(return, "Cannot find ram target in '/dev/iomem'\n");
self->dev_mem_fd = open("/dev/mem", O_RDONLY); if (self->dev_mem_fd < 0) SKIP(return, "Cannot open '/dev/mem'\n"); - /* We'll require the first two pages throughout our tests ... */ self->size1 = self->pagesize * 2; self->addr1 = mmap(NULL, self->size1, PROT_READ, MAP_SHARED, - self->dev_mem_fd, 0); + self->dev_mem_fd, self->phys_addr); if (self->addr1 == MAP_FAILED) SKIP(return, "Cannot mmap '/dev/mem'\n"); @@ -129,7 +182,7 @@ TEST_F(pfnmap, munmap_split) */ self->size2 = self->pagesize; self->addr2 = mmap(NULL, self->pagesize, PROT_READ, MAP_SHARED, - self->dev_mem_fd, 0); + self->dev_mem_fd, self->phys_addr); ASSERT_NE(self->addr2, MAP_FAILED); }
Hi David,
I applied the patch on next-20250515 and ran the test. The issue is no longer observed, and the test completed successfully. I can confirm that the patch resolves the problem.
Tested-by: Aishwarya TCV aishwarya.tcv@arm.com
Thanks, Aishwarya
On 28.05.25 20:23, Aishwarya wrote:
Hi David,
I applied the patch on next-20250515 and ran the test. The issue is no longer observed, and the test completed successfully. I can confirm that the patch resolves the problem.
Tested-by: Aishwarya TCV aishwarya.tcv@arm.com
Thanks a bunch for the fast re-test!
linux-kselftest-mirror@lists.linaro.org