Memory protection keys enables an application to protect its address space from inadvertent access by its own code.
This feature is now enabled on powerpc architecture and integrated in 4.16-rc1. The patches move the selftests to arch neutral directory and enhance their test coverage.
Test ---- Verified for correctness on powerpc and on x86 architectures(using EC2 ubuntu VM instance).
History: -------
version 12: (1) fixed the offset of pkey field in the siginfo structure for x86_64 and powerpc. And tries to use the actual field if the headers have it defined. version 11: (1) fixed a deadlock in the ptrace testcase.
version 10 and prior: (1) moved the testcase to arch neutral directory (2) split the changes into incremental patches.
Ram Pai (21): selftests/x86: Move protecton key selftest to arch neutral directory selftests/vm: rename all references to pkru to a generic name selftests/vm: move generic definitions to header file selftests/vm: typecast the pkey register selftests/vm: generic function to handle shadow key register selftests/vm: fix the wrong assert in pkey_disable_set() selftests/vm: fixed bugs in pkey_disable_clear() selftests/vm: clear the bits in shadow reg when a pkey is freed. selftests/vm: fix alloc_random_pkey() to make it really random selftests/vm: introduce two arch independent abstraction selftests/vm: pkey register should match shadow pkey selftests/vm: generic cleanup selftests/vm: powerpc implementation for generic abstraction selftests/vm: clear the bits in shadow reg when a pkey is freed. selftests/vm: powerpc implementation to check support for pkey selftests/vm: fix an assertion in test_pkey_alloc_exhaust() selftests/vm: associate key on a mapped page and detect access violation selftests/vm: associate key on a mapped page and detect write violation selftests/vm: detect write violation on a mapped access-denied-key page selftests/vm: testcases must restore pkey-permissions selftests/vm: sub-page allocator
Thiago Jung Bauermann (1): selftests/vm: Fix deadlock in protection_keys.c
tools/testing/selftests/vm/Makefile | 1 + tools/testing/selftests/vm/pkey-helpers.h | 434 ++++++++ tools/testing/selftests/vm/protection_keys.c | 1471 +++++++++++++++++++++++++ tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/pkey-helpers.h | 223 ---- tools/testing/selftests/x86/protection_keys.c | 1407 ----------------------- 6 files changed, 1907 insertions(+), 1631 deletions(-) create mode 100644 tools/testing/selftests/vm/pkey-helpers.h create mode 100644 tools/testing/selftests/vm/protection_keys.c delete mode 100644 tools/testing/selftests/x86/pkey-helpers.h delete mode 100644 tools/testing/selftests/x86/protection_keys.c
-- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/Makefile | 1 + tools/testing/selftests/vm/pkey-helpers.h | 223 ++++ tools/testing/selftests/vm/protection_keys.c | 1407 +++++++++++++++++++++++++ tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/pkey-helpers.h | 223 ---- tools/testing/selftests/x86/protection_keys.c | 1407 ------------------------- 6 files changed, 1632 insertions(+), 1631 deletions(-) create mode 100644 tools/testing/selftests/vm/pkey-helpers.h create mode 100644 tools/testing/selftests/vm/protection_keys.c delete mode 100644 tools/testing/selftests/x86/pkey-helpers.h delete mode 100644 tools/testing/selftests/x86/protection_keys.c
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index fdefa22..9788a58 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -20,6 +20,7 @@ TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd TEST_GEN_FILES += va_128TBswitch TEST_GEN_FILES += virtual_address_range +TEST_GEN_FILES += protection_keys
TEST_PROGS := run_vmtests
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h new file mode 100644 index 0000000..b3cb767 --- /dev/null +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -0,0 +1,223 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _PKEYS_HELPER_H +#define _PKEYS_HELPER_H +#define _GNU_SOURCE +#include <string.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdint.h> +#include <stdbool.h> +#include <signal.h> +#include <assert.h> +#include <stdlib.h> +#include <ucontext.h> +#include <sys/mman.h> + +#define NR_PKEYS 16 +#define PKRU_BITS_PER_PKEY 2 + +#ifndef DEBUG_LEVEL +#define DEBUG_LEVEL 0 +#endif +#define DPRINT_IN_SIGNAL_BUF_SIZE 4096 +extern int dprint_in_signal; +extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; +static inline void sigsafe_printf(const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + if (!dprint_in_signal) { + vprintf(format, ap); + } else { + int ret; + int len = vsnprintf(dprint_in_signal_buffer, + DPRINT_IN_SIGNAL_BUF_SIZE, + format, ap); + /* + * len is amount that would have been printed, + * but actual write is truncated at BUF_SIZE. + */ + if (len > DPRINT_IN_SIGNAL_BUF_SIZE) + len = DPRINT_IN_SIGNAL_BUF_SIZE; + ret = write(1, dprint_in_signal_buffer, len); + if (ret < 0) + abort(); + } + va_end(ap); +} +#define dprintf_level(level, args...) do { \ + if (level <= DEBUG_LEVEL) \ + sigsafe_printf(args); \ + fflush(NULL); \ +} while (0) +#define dprintf0(args...) dprintf_level(0, args) +#define dprintf1(args...) dprintf_level(1, args) +#define dprintf2(args...) dprintf_level(2, args) +#define dprintf3(args...) dprintf_level(3, args) +#define dprintf4(args...) dprintf_level(4, args) + +extern unsigned int shadow_pkru; +static inline unsigned int __rdpkru(void) +{ + unsigned int eax, edx; + unsigned int ecx = 0; + unsigned int pkru; + + asm volatile(".byte 0x0f,0x01,0xee\n\t" + : "=a" (eax), "=d" (edx) + : "c" (ecx)); + pkru = eax; + return pkru; +} + +static inline unsigned int _rdpkru(int line) +{ + unsigned int pkru = __rdpkru(); + + dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n", + line, pkru, shadow_pkru); + assert(pkru == shadow_pkru); + + return pkru; +} + +#define rdpkru() _rdpkru(__LINE__) + +static inline void __wrpkru(unsigned int pkru) +{ + unsigned int eax = pkru; + unsigned int ecx = 0; + unsigned int edx = 0; + + dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru); + asm volatile(".byte 0x0f,0x01,0xef\n\t" + : : "a" (eax), "c" (ecx), "d" (edx)); + assert(pkru == __rdpkru()); +} + +static inline void wrpkru(unsigned int pkru) +{ + dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru); + /* will do the shadow check for us: */ + rdpkru(); + __wrpkru(pkru); + shadow_pkru = pkru; + dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru()); +} + +/* + * These are technically racy. since something could + * change PKRU between the read and the write. + */ +static inline void __pkey_access_allow(int pkey, int do_allow) +{ + unsigned int pkru = rdpkru(); + int bit = pkey * 2; + + if (do_allow) + pkru &= (1<<bit); + else + pkru |= (1<<bit); + + dprintf4("pkru now: %08x\n", rdpkru()); + wrpkru(pkru); +} + +static inline void __pkey_write_allow(int pkey, int do_allow_write) +{ + long pkru = rdpkru(); + int bit = pkey * 2 + 1; + + if (do_allow_write) + pkru &= (1<<bit); + else + pkru |= (1<<bit); + + wrpkru(pkru); + dprintf4("pkru now: %08x\n", rdpkru()); +} + +#define PROT_PKEY0 0x10 /* protection key value (bit 0) */ +#define PROT_PKEY1 0x20 /* protection key value (bit 1) */ +#define PROT_PKEY2 0x40 /* protection key value (bit 2) */ +#define PROT_PKEY3 0x80 /* protection key value (bit 3) */ + +#define PAGE_SIZE 4096 +#define MB (1<<20) + +static inline void __cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + /* ecx is often an input as well as an output. */ + asm volatile( + "cpuid;" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx)); +} + +/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */ +#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */ +#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */ + +static inline int cpu_has_pku(void) +{ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + + eax = 0x7; + ecx = 0x0; + __cpuid(&eax, &ebx, &ecx, &edx); + + if (!(ecx & X86_FEATURE_PKU)) { + dprintf2("cpu does not have PKU\n"); + return 0; + } + if (!(ecx & X86_FEATURE_OSPKE)) { + dprintf2("cpu does not have OSPKE\n"); + return 0; + } + return 1; +} + +#define XSTATE_PKRU_BIT (9) +#define XSTATE_PKRU 0x200 + +int pkru_xstate_offset(void) +{ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + int xstate_offset; + int xstate_size; + unsigned long XSTATE_CPUID = 0xd; + int leaf; + + /* assume that XSTATE_PKRU is set in XCR0 */ + leaf = XSTATE_PKRU_BIT; + { + eax = XSTATE_CPUID; + ecx = leaf; + __cpuid(&eax, &ebx, &ecx, &edx); + + if (leaf == XSTATE_PKRU_BIT) { + xstate_offset = ebx; + xstate_size = eax; + } + } + + if (xstate_size == 0) { + printf("could not find size/offset of PKRU in xsave state\n"); + return 0; + } + + return xstate_offset; +} + +#endif /* _PKEYS_HELPER_H */ diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c new file mode 100644 index 0000000..bc1b073 --- /dev/null +++ b/tools/testing/selftests/vm/protection_keys.c @@ -0,0 +1,1407 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Tests x86 Memory Protection Keys (see Documentation/x86/protection-keys.txt) + * + * There are examples in here of: + * * how to set protection keys on memory + * * how to set/clear bits in PKRU (the rights register) + * * how to handle SEGV_PKRU signals and extract pkey-relevant + * information from the siginfo + * + * Things to add: + * make sure KSM and KSM COW breaking works + * prefault pages in at malloc, or not + * protect MPX bounds tables with protection keys? + * make sure VMA splitting/merging is working correctly + * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys + * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel + * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks + * + * Compile like this: + * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm + * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm + */ +#define _GNU_SOURCE +#include <errno.h> +#include <linux/futex.h> +#include <sys/time.h> +#include <sys/syscall.h> +#include <string.h> +#include <stdio.h> +#include <stdint.h> +#include <stdbool.h> +#include <signal.h> +#include <assert.h> +#include <stdlib.h> +#include <ucontext.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <sys/ptrace.h> +#include <setjmp.h> + +#include "pkey-helpers.h" + +int iteration_nr = 1; +int test_nr; + +unsigned int shadow_pkru; + +#define HPAGE_SIZE (1UL<<21) +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) +#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) +#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1)) +#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to)) +#define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to)) +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) + +#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP) + +int dprint_in_signal; +char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; + +extern void abort_hooks(void); +#define pkey_assert(condition) do { \ + if (!(condition)) { \ + dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \ + __FILE__, __LINE__, \ + test_nr, iteration_nr); \ + dprintf0("errno at assert: %d", errno); \ + abort_hooks(); \ + assert(condition); \ + } \ +} while (0) +#define raw_assert(cond) assert(cond) + +void cat_into_file(char *str, char *file) +{ + int fd = open(file, O_RDWR); + int ret; + + dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file); + /* + * these need to be raw because they are called under + * pkey_assert() + */ + raw_assert(fd >= 0); + ret = write(fd, str, strlen(str)); + if (ret != strlen(str)) { + perror("write to file failed"); + fprintf(stderr, "filename: '%s' str: '%s'\n", file, str); + raw_assert(0); + } + close(fd); +} + +#if CONTROL_TRACING > 0 +static int warned_tracing; +int tracing_root_ok(void) +{ + if (geteuid() != 0) { + if (!warned_tracing) + fprintf(stderr, "WARNING: not run as root, " + "can not do tracing control\n"); + warned_tracing = 1; + return 0; + } + return 1; +} +#endif + +void tracing_on(void) +{ +#if CONTROL_TRACING > 0 +#define TRACEDIR "/sys/kernel/debug/tracing" + char pidstr[32]; + + if (!tracing_root_ok()) + return; + + sprintf(pidstr, "%d", getpid()); + cat_into_file("0", TRACEDIR "/tracing_on"); + cat_into_file("\n", TRACEDIR "/trace"); + if (1) { + cat_into_file("function_graph", TRACEDIR "/current_tracer"); + cat_into_file("1", TRACEDIR "/options/funcgraph-proc"); + } else { + cat_into_file("nop", TRACEDIR "/current_tracer"); + } + cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid"); + cat_into_file("1", TRACEDIR "/tracing_on"); + dprintf1("enabled tracing\n"); +#endif +} + +void tracing_off(void) +{ +#if CONTROL_TRACING > 0 + if (!tracing_root_ok()) + return; + cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on"); +#endif +} + +void abort_hooks(void) +{ + fprintf(stderr, "running %s()...\n", __func__); + tracing_off(); +#ifdef SLEEP_ON_ABORT + sleep(SLEEP_ON_ABORT); +#endif +} + +static inline void __page_o_noops(void) +{ + /* 8-bytes of instruction * 512 bytes = 1 page */ + asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr"); +} + +/* + * This attempts to have roughly a page of instructions followed by a few + * instructions that do a write, and another page of instructions. That + * way, we are pretty sure that the write is in the second page of + * instructions and has at least a page of padding behind it. + * + * *That* lets us be sure to madvise() away the write instruction, which + * will then fault, which makes sure that the fault code handles + * execute-only memory properly. + */ +__attribute__((__aligned__(PAGE_SIZE))) +void lots_o_noops_around_write(int *write_to_me) +{ + dprintf3("running %s()\n", __func__); + __page_o_noops(); + /* Assume this happens in the second page of instructions: */ + *write_to_me = __LINE__; + /* pad out by another page: */ + __page_o_noops(); + dprintf3("%s() done\n", __func__); +} + +/* Define some kernel-like types */ +#define u8 uint8_t +#define u16 uint16_t +#define u32 uint32_t +#define u64 uint64_t + +#ifdef __i386__ + +#ifndef SYS_mprotect_key +# define SYS_mprotect_key 380 +#endif +#ifndef SYS_pkey_alloc +# define SYS_pkey_alloc 381 +# define SYS_pkey_free 382 +#endif +#define REG_IP_IDX REG_EIP +#define si_pkey_offset 0x14 + +#else + +#ifndef SYS_mprotect_key +# define SYS_mprotect_key 329 +#endif +#ifndef SYS_pkey_alloc +# define SYS_pkey_alloc 330 +# define SYS_pkey_free 331 +#endif +#define REG_IP_IDX REG_RIP +#define si_pkey_offset 0x20 + +#endif + +void dump_mem(void *dumpme, int len_bytes) +{ + char *c = (void *)dumpme; + int i; + + for (i = 0; i < len_bytes; i += sizeof(u64)) { + u64 *ptr = (u64 *)(c + i); + dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr); + } +} + +#define SEGV_BNDERR 3 /* failed address bound checks */ +#define SEGV_PKUERR 4 + +static char *si_code_str(int si_code) +{ + if (si_code == SEGV_MAPERR) + return "SEGV_MAPERR"; + if (si_code == SEGV_ACCERR) + return "SEGV_ACCERR"; + if (si_code == SEGV_BNDERR) + return "SEGV_BNDERR"; + if (si_code == SEGV_PKUERR) + return "SEGV_PKUERR"; + return "UNKNOWN"; +} + +int pkru_faults; +int last_si_pkey = -1; +void signal_handler(int signum, siginfo_t *si, void *vucontext) +{ + ucontext_t *uctxt = vucontext; + int trapno; + unsigned long ip; + char *fpregs; + u32 *pkru_ptr; + u64 siginfo_pkey; + u32 *si_pkey_ptr; + int pkru_offset; + fpregset_t fpregset; + + dprint_in_signal = 1; + dprintf1(">>>>===============SIGSEGV============================\n"); + dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__, + __rdpkru(), shadow_pkru); + + trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; + ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; + fpregset = uctxt->uc_mcontext.fpregs; + fpregs = (void *)fpregset; + + dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__, + trapno, ip, si_code_str(si->si_code), si->si_code); +#ifdef __i386__ + /* + * 32-bit has some extra padding so that userspace can tell whether + * the XSTATE header is present in addition to the "legacy" FPU + * state. We just assume that it is here. + */ + fpregs += 0x70; +#endif + pkru_offset = pkru_xstate_offset(); + pkru_ptr = (void *)(&fpregs[pkru_offset]); + + dprintf1("siginfo: %p\n", si); + dprintf1(" fpregs: %p\n", fpregs); + /* + * If we got a PKRU fault, we *HAVE* to have at least one bit set in + * here. + */ + dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset()); + if (DEBUG_LEVEL > 4) + dump_mem(pkru_ptr - 128, 256); + pkey_assert(*pkru_ptr); + + si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); + dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); + dump_mem(si_pkey_ptr - 8, 24); + siginfo_pkey = *si_pkey_ptr; + pkey_assert(siginfo_pkey < NR_PKEYS); + last_si_pkey = siginfo_pkey; + + if ((si->si_code == SEGV_MAPERR) || + (si->si_code == SEGV_ACCERR) || + (si->si_code == SEGV_BNDERR)) { + printf("non-PK si_code, exiting...\n"); + exit(4); + } + + dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr); + /* need __rdpkru() version so we do not do shadow_pkru checking */ + dprintf1("signal pkru from pkru: %08x\n", __rdpkru()); + dprintf1("pkey from siginfo: %jx\n", siginfo_pkey); + *(u64 *)pkru_ptr = 0x00000000; + dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); + pkru_faults++; + dprintf1("<<<<==================================================\n"); + return; + if (trapno == 14) { + fprintf(stderr, + "ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n", + trapno, ip); + fprintf(stderr, "si_addr %p\n", si->si_addr); + fprintf(stderr, "REG_ERR: %lx\n", + (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); + exit(1); + } else { + fprintf(stderr, "unexpected trap %d! at 0x%lx\n", trapno, ip); + fprintf(stderr, "si_addr %p\n", si->si_addr); + fprintf(stderr, "REG_ERR: %lx\n", + (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); + exit(2); + } + dprint_in_signal = 0; +} + +int wait_all_children(void) +{ + int status; + return waitpid(-1, &status, 0); +} + +void sig_chld(int x) +{ + dprint_in_signal = 1; + dprintf2("[%d] SIGCHLD: %d\n", getpid(), x); + dprint_in_signal = 0; +} + +void setup_sigsegv_handler(void) +{ + int r, rs; + struct sigaction newact; + struct sigaction oldact; + + /* #PF is mapped to sigsegv */ + int signum = SIGSEGV; + + newact.sa_handler = 0; + newact.sa_sigaction = signal_handler; + + /*sigset_t - signals to block while in the handler */ + /* get the old signal mask. */ + rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask); + pkey_assert(rs == 0); + + /* call sa_sigaction, not sa_handler*/ + newact.sa_flags = SA_SIGINFO; + + newact.sa_restorer = 0; /* void(*)(), obsolete */ + r = sigaction(signum, &newact, &oldact); + r = sigaction(SIGALRM, &newact, &oldact); + pkey_assert(r == 0); +} + +void setup_handlers(void) +{ + signal(SIGCHLD, &sig_chld); + setup_sigsegv_handler(); +} + +pid_t fork_lazy_child(void) +{ + pid_t forkret; + + forkret = fork(); + pkey_assert(forkret >= 0); + dprintf3("[%d] fork() ret: %d\n", getpid(), forkret); + + if (!forkret) { + /* in the child */ + while (1) { + dprintf1("child sleeping...\n"); + sleep(30); + } + } + return forkret; +} + +void davecmp(void *_a, void *_b, int len) +{ + int i; + unsigned long *a = _a; + unsigned long *b = _b; + + for (i = 0; i < len / sizeof(*a); i++) { + if (a[i] == b[i]) + continue; + + dprintf3("[%3d]: a: %016lx b: %016lx\n", i, a[i], b[i]); + } +} + +void dumpit(char *f) +{ + int fd = open(f, O_RDONLY); + char buf[100]; + int nr_read; + + dprintf2("maps fd: %d\n", fd); + do { + nr_read = read(fd, &buf[0], sizeof(buf)); + write(1, buf, nr_read); + } while (nr_read > 0); + close(fd); +} + +#define PKEY_DISABLE_ACCESS 0x1 +#define PKEY_DISABLE_WRITE 0x2 + +u32 pkey_get(int pkey, unsigned long flags) +{ + u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); + u32 pkru = __rdpkru(); + u32 shifted_pkru; + u32 masked_pkru; + + dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n", + __func__, pkey, flags, 0, 0); + dprintf2("%s() raw pkru: %x\n", __func__, pkru); + + shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY)); + dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru); + masked_pkru = shifted_pkru & mask; + dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru); + /* + * shift down the relevant bits to the lowest two, then + * mask off all the other high bits. + */ + return masked_pkru; +} + +int pkey_set(int pkey, unsigned long rights, unsigned long flags) +{ + u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); + u32 old_pkru = __rdpkru(); + u32 new_pkru; + + /* make sure that 'rights' only contains the bits we expect: */ + assert(!(rights & ~mask)); + + /* copy old pkru */ + new_pkru = old_pkru; + /* mask out bits from pkey in old value: */ + new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY)); + /* OR in new bits for pkey: */ + new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY)); + + __wrpkru(new_pkru); + + dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n", + __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru); + return 0; +} + +void pkey_disable_set(int pkey, int flags) +{ + unsigned long syscall_flags = 0; + int ret; + int pkey_rights; + u32 orig_pkru = rdpkru(); + + dprintf1("START->%s(%d, 0x%x)\n", __func__, + pkey, flags); + pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); + + pkey_rights = pkey_get(pkey, syscall_flags); + + dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + pkey, pkey, pkey_rights); + pkey_assert(pkey_rights >= 0); + + pkey_rights |= flags; + + ret = pkey_set(pkey, pkey_rights, syscall_flags); + assert(!ret); + /*pkru and flags have the same format */ + shadow_pkru |= flags << (pkey * 2); + dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru); + + pkey_assert(ret >= 0); + + pkey_rights = pkey_get(pkey, syscall_flags); + dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + pkey, pkey, pkey_rights); + + dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); + if (flags) + pkey_assert(rdpkru() > orig_pkru); + dprintf1("END<---%s(%d, 0x%x)\n", __func__, + pkey, flags); +} + +void pkey_disable_clear(int pkey, int flags) +{ + unsigned long syscall_flags = 0; + int ret; + int pkey_rights = pkey_get(pkey, syscall_flags); + u32 orig_pkru = rdpkru(); + + pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); + + dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + pkey, pkey, pkey_rights); + pkey_assert(pkey_rights >= 0); + + pkey_rights |= flags; + + ret = pkey_set(pkey, pkey_rights, 0); + /* pkru and flags have the same format */ + shadow_pkru &= ~(flags << (pkey * 2)); + pkey_assert(ret >= 0); + + pkey_rights = pkey_get(pkey, syscall_flags); + dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + pkey, pkey, pkey_rights); + + dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); + if (flags) + assert(rdpkru() > orig_pkru); +} + +void pkey_write_allow(int pkey) +{ + pkey_disable_clear(pkey, PKEY_DISABLE_WRITE); +} +void pkey_write_deny(int pkey) +{ + pkey_disable_set(pkey, PKEY_DISABLE_WRITE); +} +void pkey_access_allow(int pkey) +{ + pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS); +} +void pkey_access_deny(int pkey) +{ + pkey_disable_set(pkey, PKEY_DISABLE_ACCESS); +} + +int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, + unsigned long pkey) +{ + int sret; + + dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__, + ptr, size, orig_prot, pkey); + + errno = 0; + sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey); + if (errno) { + dprintf2("SYS_mprotect_key sret: %d\n", sret); + dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot); + dprintf2("SYS_mprotect_key failed, errno: %d\n", errno); + if (DEBUG_LEVEL >= 2) + perror("SYS_mprotect_pkey"); + } + return sret; +} + +int sys_pkey_alloc(unsigned long flags, unsigned long init_val) +{ + int ret = syscall(SYS_pkey_alloc, flags, init_val); + dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n", + __func__, flags, init_val, ret, errno); + return ret; +} + +int alloc_pkey(void) +{ + int ret; + unsigned long init_val = 0x0; + + dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n", + __LINE__, __rdpkru(), shadow_pkru); + ret = sys_pkey_alloc(0, init_val); + /* + * pkey_alloc() sets PKRU, so we need to reflect it in + * shadow_pkru: + */ + dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", + __LINE__, ret, __rdpkru(), shadow_pkru); + if (ret) { + /* clear both the bits: */ + shadow_pkru &= ~(0x3 << (ret * 2)); + dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", + __LINE__, ret, __rdpkru(), shadow_pkru); + /* + * move the new state in from init_val + * (remember, we cheated and init_val == pkru format) + */ + shadow_pkru |= (init_val << (ret * 2)); + } + dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", + __LINE__, ret, __rdpkru(), shadow_pkru); + dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno); + /* for shadow checking: */ + rdpkru(); + dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", + __LINE__, ret, __rdpkru(), shadow_pkru); + return ret; +} + +int sys_pkey_free(unsigned long pkey) +{ + int ret = syscall(SYS_pkey_free, pkey); + dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret); + return ret; +} + +/* + * I had a bug where pkey bits could be set by mprotect() but + * not cleared. This ensures we get lots of random bit sets + * and clears on the vma and pte pkey bits. + */ +int alloc_random_pkey(void) +{ + int max_nr_pkey_allocs; + int ret; + int i; + int alloced_pkeys[NR_PKEYS]; + int nr_alloced = 0; + int random_index; + memset(alloced_pkeys, 0, sizeof(alloced_pkeys)); + + /* allocate every possible key and make a note of which ones we got */ + max_nr_pkey_allocs = NR_PKEYS; + max_nr_pkey_allocs = 1; + for (i = 0; i < max_nr_pkey_allocs; i++) { + int new_pkey = alloc_pkey(); + if (new_pkey < 0) + break; + alloced_pkeys[nr_alloced++] = new_pkey; + } + + pkey_assert(nr_alloced > 0); + /* select a random one out of the allocated ones */ + random_index = rand() % nr_alloced; + ret = alloced_pkeys[random_index]; + /* now zero it out so we don't free it next */ + alloced_pkeys[random_index] = 0; + + /* go through the allocated ones that we did not want and free them */ + for (i = 0; i < nr_alloced; i++) { + int free_ret; + if (!alloced_pkeys[i]) + continue; + free_ret = sys_pkey_free(alloced_pkeys[i]); + pkey_assert(!free_ret); + } + dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, + __LINE__, ret, __rdpkru(), shadow_pkru); + return ret; +} + +int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, + unsigned long pkey) +{ + int nr_iterations = random() % 100; + int ret; + + while (0) { + int rpkey = alloc_random_pkey(); + ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); + dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", + ptr, size, orig_prot, pkey, ret); + if (nr_iterations-- < 0) + break; + + dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, + __LINE__, ret, __rdpkru(), shadow_pkru); + sys_pkey_free(rpkey); + dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, + __LINE__, ret, __rdpkru(), shadow_pkru); + } + pkey_assert(pkey < NR_PKEYS); + + ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); + dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", + ptr, size, orig_prot, pkey, ret); + pkey_assert(!ret); + dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, + __LINE__, ret, __rdpkru(), shadow_pkru); + return ret; +} + +struct pkey_malloc_record { + void *ptr; + long size; +}; +struct pkey_malloc_record *pkey_malloc_records; +long nr_pkey_malloc_records; +void record_pkey_malloc(void *ptr, long size) +{ + long i; + struct pkey_malloc_record *rec = NULL; + + for (i = 0; i < nr_pkey_malloc_records; i++) { + rec = &pkey_malloc_records[i]; + /* find a free record */ + if (rec) + break; + } + if (!rec) { + /* every record is full */ + size_t old_nr_records = nr_pkey_malloc_records; + size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1); + size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record); + dprintf2("new_nr_records: %zd\n", new_nr_records); + dprintf2("new_size: %zd\n", new_size); + pkey_malloc_records = realloc(pkey_malloc_records, new_size); + pkey_assert(pkey_malloc_records != NULL); + rec = &pkey_malloc_records[nr_pkey_malloc_records]; + /* + * realloc() does not initialize memory, so zero it from + * the first new record all the way to the end. + */ + for (i = 0; i < new_nr_records - old_nr_records; i++) + memset(rec + i, 0, sizeof(*rec)); + } + dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n", + (int)(rec - pkey_malloc_records), rec, ptr, size); + rec->ptr = ptr; + rec->size = size; + nr_pkey_malloc_records++; +} + +void free_pkey_malloc(void *ptr) +{ + long i; + int ret; + dprintf3("%s(%p)\n", __func__, ptr); + for (i = 0; i < nr_pkey_malloc_records; i++) { + struct pkey_malloc_record *rec = &pkey_malloc_records[i]; + dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n", + ptr, i, rec, rec->ptr, rec->size); + if ((ptr < rec->ptr) || + (ptr >= rec->ptr + rec->size)) + continue; + + dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n", + ptr, i, rec, rec->ptr, rec->size); + nr_pkey_malloc_records--; + ret = munmap(rec->ptr, rec->size); + dprintf3("munmap ret: %d\n", ret); + pkey_assert(!ret); + dprintf3("clearing rec->ptr, rec: %p\n", rec); + rec->ptr = NULL; + dprintf3("done clearing rec->ptr, rec: %p\n", rec); + return; + } + pkey_assert(false); +} + + +void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) +{ + void *ptr; + int ret; + + rdpkru(); + dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, + size, prot, pkey); + pkey_assert(pkey < NR_PKEYS); + ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + pkey_assert(ptr != (void *)-1); + ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); + pkey_assert(!ret); + record_pkey_malloc(ptr, size); + rdpkru(); + + dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); + return ptr; +} + +void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) +{ + int ret; + void *ptr; + + dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, + size, prot, pkey); + /* + * Guarantee we can fit at least one huge page in the resulting + * allocation by allocating space for 2: + */ + size = ALIGN_UP(size, HPAGE_SIZE * 2); + ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + pkey_assert(ptr != (void *)-1); + record_pkey_malloc(ptr, size); + mprotect_pkey(ptr, size, prot, pkey); + + dprintf1("unaligned ptr: %p\n", ptr); + ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE); + dprintf1(" aligned ptr: %p\n", ptr); + ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE); + dprintf1("MADV_HUGEPAGE ret: %d\n", ret); + ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED); + dprintf1("MADV_WILLNEED ret: %d\n", ret); + memset(ptr, 0, HPAGE_SIZE); + + dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr); + return ptr; +} + +int hugetlb_setup_ok; +#define GET_NR_HUGE_PAGES 10 +void setup_hugetlbfs(void) +{ + int err; + int fd; + char buf[] = "123"; + + if (geteuid() != 0) { + fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n"); + return; + } + + cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages"); + + /* + * Now go make sure that we got the pages and that they + * are 2M pages. Someone might have made 1G the default. + */ + fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY); + if (fd < 0) { + perror("opening sysfs 2M hugetlb config"); + return; + } + + /* -1 to guarantee leaving the trailing \0 */ + err = read(fd, buf, sizeof(buf)-1); + close(fd); + if (err <= 0) { + perror("reading sysfs 2M hugetlb config"); + return; + } + + if (atoi(buf) != GET_NR_HUGE_PAGES) { + fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n", + buf, GET_NR_HUGE_PAGES); + return; + } + + hugetlb_setup_ok = 1; +} + +void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) +{ + void *ptr; + int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB; + + if (!hugetlb_setup_ok) + return PTR_ERR_ENOTSUP; + + dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey); + size = ALIGN_UP(size, HPAGE_SIZE * 2); + pkey_assert(pkey < NR_PKEYS); + ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0); + pkey_assert(ptr != (void *)-1); + mprotect_pkey(ptr, size, prot, pkey); + + record_pkey_malloc(ptr, size); + + dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr); + return ptr; +} + +void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey) +{ + void *ptr; + int fd; + + dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, + size, prot, pkey); + pkey_assert(pkey < NR_PKEYS); + fd = open("/dax/foo", O_RDWR); + pkey_assert(fd >= 0); + + ptr = mmap(0, size, prot, MAP_SHARED, fd, 0); + pkey_assert(ptr != (void *)-1); + + mprotect_pkey(ptr, size, prot, pkey); + + record_pkey_malloc(ptr, size); + + dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr); + close(fd); + return ptr; +} + +void *(*pkey_malloc[])(long size, int prot, u16 pkey) = { + + malloc_pkey_with_mprotect, + malloc_pkey_anon_huge, + malloc_pkey_hugetlb +/* can not do direct with the pkey_mprotect() API: + malloc_pkey_mmap_direct, + malloc_pkey_mmap_dax, +*/ +}; + +void *malloc_pkey(long size, int prot, u16 pkey) +{ + void *ret; + static int malloc_type; + int nr_malloc_types = ARRAY_SIZE(pkey_malloc); + + pkey_assert(pkey < NR_PKEYS); + + while (1) { + pkey_assert(malloc_type < nr_malloc_types); + + ret = pkey_malloc[malloc_type](size, prot, pkey); + pkey_assert(ret != (void *)-1); + + malloc_type++; + if (malloc_type >= nr_malloc_types) + malloc_type = (random()%nr_malloc_types); + + /* try again if the malloc_type we tried is unsupported */ + if (ret == PTR_ERR_ENOTSUP) + continue; + + break; + } + + dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__, + size, prot, pkey, ret); + return ret; +} + +int last_pkru_faults; +void expected_pk_fault(int pkey) +{ + dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n", + __func__, last_pkru_faults, pkru_faults); + dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey); + pkey_assert(last_pkru_faults + 1 == pkru_faults); + pkey_assert(last_si_pkey == pkey); + /* + * The signal handler shold have cleared out PKRU to let the + * test program continue. We now have to restore it. + */ + if (__rdpkru() != 0) + pkey_assert(0); + + __wrpkru(shadow_pkru); + dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n", + __func__, shadow_pkru); + last_pkru_faults = pkru_faults; + last_si_pkey = -1; +} + +void do_not_expect_pk_fault(void) +{ + pkey_assert(last_pkru_faults == pkru_faults); +} + +int test_fds[10] = { -1 }; +int nr_test_fds; +void __save_test_fd(int fd) +{ + pkey_assert(fd >= 0); + pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds)); + test_fds[nr_test_fds] = fd; + nr_test_fds++; +} + +int get_test_read_fd(void) +{ + int test_fd = open("/etc/passwd", O_RDONLY); + __save_test_fd(test_fd); + return test_fd; +} + +void close_test_fds(void) +{ + int i; + + for (i = 0; i < nr_test_fds; i++) { + if (test_fds[i] < 0) + continue; + close(test_fds[i]); + test_fds[i] = -1; + } + nr_test_fds = 0; +} + +#define barrier() __asm__ __volatile__("": : :"memory") +__attribute__((noinline)) int read_ptr(int *ptr) +{ + /* + * Keep GCC from optimizing this away somehow + */ + barrier(); + return *ptr; +} + +void test_read_of_write_disabled_region(int *ptr, u16 pkey) +{ + int ptr_contents; + + dprintf1("disabling write access to PKEY[1], doing read\n"); + pkey_write_deny(pkey); + ptr_contents = read_ptr(ptr); + dprintf1("*ptr: %d\n", ptr_contents); + dprintf1("\n"); +} +void test_read_of_access_disabled_region(int *ptr, u16 pkey) +{ + int ptr_contents; + + dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr); + rdpkru(); + pkey_access_deny(pkey); + ptr_contents = read_ptr(ptr); + dprintf1("*ptr: %d\n", ptr_contents); + expected_pk_fault(pkey); +} +void test_write_of_write_disabled_region(int *ptr, u16 pkey) +{ + dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey); + pkey_write_deny(pkey); + *ptr = __LINE__; + expected_pk_fault(pkey); +} +void test_write_of_access_disabled_region(int *ptr, u16 pkey) +{ + dprintf1("disabling access to PKEY[%02d], doing write\n", pkey); + pkey_access_deny(pkey); + *ptr = __LINE__; + expected_pk_fault(pkey); +} +void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) +{ + int ret; + int test_fd = get_test_read_fd(); + + dprintf1("disabling access to PKEY[%02d], " + "having kernel read() to buffer\n", pkey); + pkey_access_deny(pkey); + ret = read(test_fd, ptr, 1); + dprintf1("read ret: %d\n", ret); + pkey_assert(ret); +} +void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey) +{ + int ret; + int test_fd = get_test_read_fd(); + + pkey_write_deny(pkey); + ret = read(test_fd, ptr, 100); + dprintf1("read ret: %d\n", ret); + if (ret < 0 && (DEBUG_LEVEL > 0)) + perror("verbose read result (OK for this to be bad)"); + pkey_assert(ret); +} + +void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey) +{ + int pipe_ret, vmsplice_ret; + struct iovec iov; + int pipe_fds[2]; + + pipe_ret = pipe(pipe_fds); + + pkey_assert(pipe_ret == 0); + dprintf1("disabling access to PKEY[%02d], " + "having kernel vmsplice from buffer\n", pkey); + pkey_access_deny(pkey); + iov.iov_base = ptr; + iov.iov_len = PAGE_SIZE; + vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT); + dprintf1("vmsplice() ret: %d\n", vmsplice_ret); + pkey_assert(vmsplice_ret == -1); + + close(pipe_fds[0]); + close(pipe_fds[1]); +} + +void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) +{ + int ignored = 0xdada; + int futex_ret; + int some_int = __LINE__; + + dprintf1("disabling write to PKEY[%02d], " + "doing futex gunk in buffer\n", pkey); + *ptr = some_int; + pkey_write_deny(pkey); + futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL, + &ignored, ignored); + if (DEBUG_LEVEL > 0) + perror("futex"); + dprintf1("futex() ret: %d\n", futex_ret); +} + +/* Assumes that all pkeys other than 'pkey' are unallocated */ +void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) +{ + int err; + int i; + + /* Note: 0 is the default pkey, so don't mess with it */ + for (i = 1; i < NR_PKEYS; i++) { + if (pkey == i) + continue; + + dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i); + err = sys_pkey_free(i); + pkey_assert(err); + + err = sys_pkey_free(i); + pkey_assert(err); + + err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i); + pkey_assert(err); + } +} + +/* Assumes that all pkeys other than 'pkey' are unallocated */ +void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) +{ + int err; + int bad_pkey = NR_PKEYS+99; + + /* pass a known-invalid pkey in: */ + err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey); + pkey_assert(err); +} + +/* Assumes that all pkeys other than 'pkey' are unallocated */ +void test_pkey_alloc_exhaust(int *ptr, u16 pkey) +{ + int err; + int allocated_pkeys[NR_PKEYS] = {0}; + int nr_allocated_pkeys = 0; + int i; + + for (i = 0; i < NR_PKEYS*2; i++) { + int new_pkey; + dprintf1("%s() alloc loop: %d\n", __func__, i); + new_pkey = alloc_pkey(); + dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__, + __LINE__, err, __rdpkru(), shadow_pkru); + rdpkru(); /* for shadow checking */ + dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC); + if ((new_pkey == -1) && (errno == ENOSPC)) { + dprintf2("%s() failed to allocate pkey after %d tries\n", + __func__, nr_allocated_pkeys); + break; + } + pkey_assert(nr_allocated_pkeys < NR_PKEYS); + allocated_pkeys[nr_allocated_pkeys++] = new_pkey; + } + + dprintf3("%s()::%d\n", __func__, __LINE__); + + /* + * ensure it did not reach the end of the loop without + * failure: + */ + pkey_assert(i < NR_PKEYS*2); + + /* + * There are 16 pkeys supported in hardware. One is taken + * up for the default (0) and another can be taken up by + * an execute-only mapping. Ensure that we can allocate + * at least 14 (16-2). + */ + pkey_assert(i >= NR_PKEYS-2); + + for (i = 0; i < nr_allocated_pkeys; i++) { + err = sys_pkey_free(allocated_pkeys[i]); + pkey_assert(!err); + rdpkru(); /* for shadow checking */ + } +} + +void test_ptrace_of_child(int *ptr, u16 pkey) +{ + __attribute__((__unused__)) int peek_result; + pid_t child_pid; + void *ignored = 0; + long ret; + int status; + /* + * This is the "control" for our little expermient. Make sure + * we can always access it when ptracing. + */ + int *plain_ptr_unaligned = malloc(HPAGE_SIZE); + int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE); + + /* + * Fork a child which is an exact copy of this process, of course. + * That means we can do all of our tests via ptrace() and then plain + * memory access and ensure they work differently. + */ + child_pid = fork_lazy_child(); + dprintf1("[%d] child pid: %d\n", getpid(), child_pid); + + ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored); + if (ret) + perror("attach"); + dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__); + pkey_assert(ret != -1); + ret = waitpid(child_pid, &status, WUNTRACED); + if ((ret != child_pid) || !(WIFSTOPPED(status))) { + fprintf(stderr, "weird waitpid result %ld stat %x\n", + ret, status); + pkey_assert(0); + } + dprintf2("waitpid ret: %ld\n", ret); + dprintf2("waitpid status: %d\n", status); + + pkey_access_deny(pkey); + pkey_write_deny(pkey); + + /* Write access, untested for now: + ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data); + pkey_assert(ret != -1); + dprintf1("poke at %p: %ld\n", peek_at, ret); + */ + + /* + * Try to access the pkey-protected "ptr" via ptrace: + */ + ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored); + /* expect it to work, without an error: */ + pkey_assert(ret != -1); + /* Now access from the current task, and expect an exception: */ + peek_result = read_ptr(ptr); + expected_pk_fault(pkey); + + /* + * Try to access the NON-pkey-protected "plain_ptr" via ptrace: + */ + ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored); + /* expect it to work, without an error: */ + pkey_assert(ret != -1); + /* Now access from the current task, and expect NO exception: */ + peek_result = read_ptr(plain_ptr); + do_not_expect_pk_fault(); + + ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); + pkey_assert(ret != -1); + + ret = kill(child_pid, SIGKILL); + pkey_assert(ret != -1); + + wait(&status); + + free(plain_ptr_unaligned); +} + +void test_executing_on_unreadable_memory(int *ptr, u16 pkey) +{ + void *p1; + int scratch; + int ptr_contents; + int ret; + + p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE); + dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write); + /* lots_o_noops_around_write should be page-aligned already */ + assert(p1 == &lots_o_noops_around_write); + + /* Point 'p1' at the *second* page of the function: */ + p1 += PAGE_SIZE; + + madvise(p1, PAGE_SIZE, MADV_DONTNEED); + lots_o_noops_around_write(&scratch); + ptr_contents = read_ptr(p1); + dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); + + ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey); + pkey_assert(!ret); + pkey_access_deny(pkey); + + dprintf2("pkru: %x\n", rdpkru()); + + /* + * Make sure this is an *instruction* fault + */ + madvise(p1, PAGE_SIZE, MADV_DONTNEED); + lots_o_noops_around_write(&scratch); + do_not_expect_pk_fault(); + ptr_contents = read_ptr(p1); + dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); + expected_pk_fault(pkey); +} + +void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) +{ + int size = PAGE_SIZE; + int sret; + + if (cpu_has_pku()) { + dprintf1("SKIP: %s: no CPU support\n", __func__); + return; + } + + sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey); + pkey_assert(sret < 0); +} + +void (*pkey_tests[])(int *ptr, u16 pkey) = { + test_read_of_write_disabled_region, + test_read_of_access_disabled_region, + test_write_of_write_disabled_region, + test_write_of_access_disabled_region, + test_kernel_write_of_access_disabled_region, + test_kernel_write_of_write_disabled_region, + test_kernel_gup_of_access_disabled_region, + test_kernel_gup_write_to_write_disabled_region, + test_executing_on_unreadable_memory, + test_ptrace_of_child, + test_pkey_syscalls_on_non_allocated_pkey, + test_pkey_syscalls_bad_args, + test_pkey_alloc_exhaust, +}; + +void run_tests_once(void) +{ + int *ptr; + int prot = PROT_READ|PROT_WRITE; + + for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) { + int pkey; + int orig_pkru_faults = pkru_faults; + + dprintf1("======================\n"); + dprintf1("test %d preparing...\n", test_nr); + + tracing_on(); + pkey = alloc_random_pkey(); + dprintf1("test %d starting with pkey: %d\n", test_nr, pkey); + ptr = malloc_pkey(PAGE_SIZE, prot, pkey); + dprintf1("test %d starting...\n", test_nr); + pkey_tests[test_nr](ptr, pkey); + dprintf1("freeing test memory: %p\n", ptr); + free_pkey_malloc(ptr); + sys_pkey_free(pkey); + + dprintf1("pkru_faults: %d\n", pkru_faults); + dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults); + + tracing_off(); + close_test_fds(); + + printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr); + dprintf1("======================\n\n"); + } + iteration_nr++; +} + +void pkey_setup_shadow(void) +{ + shadow_pkru = __rdpkru(); +} + +int main(void) +{ + int nr_iterations = 22; + + setup_handlers(); + + printf("has pku: %d\n", cpu_has_pku()); + + if (!cpu_has_pku()) { + int size = PAGE_SIZE; + int *ptr; + + printf("running PKEY tests for unsupported CPU/OS\n"); + + ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + assert(ptr != (void *)-1); + test_mprotect_pkey_on_unsupported_cpu(ptr, 1); + exit(0); + } + + pkey_setup_shadow(); + printf("startup pkru: %x\n", rdpkru()); + setup_hugetlbfs(); + + while (nr_iterations-- > 0) + run_tests_once(); + + printf("done (all tests OK)\n"); + return 0; +} diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 10ca46d..fb234e6 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -7,7 +7,7 @@ include ../lib.mk
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \ check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \ - protection_keys test_vdso test_vsyscall + test_vdso test_vsyscall TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h deleted file mode 100644 index b3cb767..0000000 --- a/tools/testing/selftests/x86/pkey-helpers.h +++ /dev/null @@ -1,223 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _PKEYS_HELPER_H -#define _PKEYS_HELPER_H -#define _GNU_SOURCE -#include <string.h> -#include <stdarg.h> -#include <stdio.h> -#include <stdint.h> -#include <stdbool.h> -#include <signal.h> -#include <assert.h> -#include <stdlib.h> -#include <ucontext.h> -#include <sys/mman.h> - -#define NR_PKEYS 16 -#define PKRU_BITS_PER_PKEY 2 - -#ifndef DEBUG_LEVEL -#define DEBUG_LEVEL 0 -#endif -#define DPRINT_IN_SIGNAL_BUF_SIZE 4096 -extern int dprint_in_signal; -extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; -static inline void sigsafe_printf(const char *format, ...) -{ - va_list ap; - - va_start(ap, format); - if (!dprint_in_signal) { - vprintf(format, ap); - } else { - int ret; - int len = vsnprintf(dprint_in_signal_buffer, - DPRINT_IN_SIGNAL_BUF_SIZE, - format, ap); - /* - * len is amount that would have been printed, - * but actual write is truncated at BUF_SIZE. - */ - if (len > DPRINT_IN_SIGNAL_BUF_SIZE) - len = DPRINT_IN_SIGNAL_BUF_SIZE; - ret = write(1, dprint_in_signal_buffer, len); - if (ret < 0) - abort(); - } - va_end(ap); -} -#define dprintf_level(level, args...) do { \ - if (level <= DEBUG_LEVEL) \ - sigsafe_printf(args); \ - fflush(NULL); \ -} while (0) -#define dprintf0(args...) dprintf_level(0, args) -#define dprintf1(args...) dprintf_level(1, args) -#define dprintf2(args...) dprintf_level(2, args) -#define dprintf3(args...) dprintf_level(3, args) -#define dprintf4(args...) dprintf_level(4, args) - -extern unsigned int shadow_pkru; -static inline unsigned int __rdpkru(void) -{ - unsigned int eax, edx; - unsigned int ecx = 0; - unsigned int pkru; - - asm volatile(".byte 0x0f,0x01,0xee\n\t" - : "=a" (eax), "=d" (edx) - : "c" (ecx)); - pkru = eax; - return pkru; -} - -static inline unsigned int _rdpkru(int line) -{ - unsigned int pkru = __rdpkru(); - - dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n", - line, pkru, shadow_pkru); - assert(pkru == shadow_pkru); - - return pkru; -} - -#define rdpkru() _rdpkru(__LINE__) - -static inline void __wrpkru(unsigned int pkru) -{ - unsigned int eax = pkru; - unsigned int ecx = 0; - unsigned int edx = 0; - - dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru); - asm volatile(".byte 0x0f,0x01,0xef\n\t" - : : "a" (eax), "c" (ecx), "d" (edx)); - assert(pkru == __rdpkru()); -} - -static inline void wrpkru(unsigned int pkru) -{ - dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru); - /* will do the shadow check for us: */ - rdpkru(); - __wrpkru(pkru); - shadow_pkru = pkru; - dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru()); -} - -/* - * These are technically racy. since something could - * change PKRU between the read and the write. - */ -static inline void __pkey_access_allow(int pkey, int do_allow) -{ - unsigned int pkru = rdpkru(); - int bit = pkey * 2; - - if (do_allow) - pkru &= (1<<bit); - else - pkru |= (1<<bit); - - dprintf4("pkru now: %08x\n", rdpkru()); - wrpkru(pkru); -} - -static inline void __pkey_write_allow(int pkey, int do_allow_write) -{ - long pkru = rdpkru(); - int bit = pkey * 2 + 1; - - if (do_allow_write) - pkru &= (1<<bit); - else - pkru |= (1<<bit); - - wrpkru(pkru); - dprintf4("pkru now: %08x\n", rdpkru()); -} - -#define PROT_PKEY0 0x10 /* protection key value (bit 0) */ -#define PROT_PKEY1 0x20 /* protection key value (bit 1) */ -#define PROT_PKEY2 0x40 /* protection key value (bit 2) */ -#define PROT_PKEY3 0x80 /* protection key value (bit 3) */ - -#define PAGE_SIZE 4096 -#define MB (1<<20) - -static inline void __cpuid(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - /* ecx is often an input as well as an output. */ - asm volatile( - "cpuid;" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (*eax), "2" (*ecx)); -} - -/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */ -#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */ -#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */ - -static inline int cpu_has_pku(void) -{ - unsigned int eax; - unsigned int ebx; - unsigned int ecx; - unsigned int edx; - - eax = 0x7; - ecx = 0x0; - __cpuid(&eax, &ebx, &ecx, &edx); - - if (!(ecx & X86_FEATURE_PKU)) { - dprintf2("cpu does not have PKU\n"); - return 0; - } - if (!(ecx & X86_FEATURE_OSPKE)) { - dprintf2("cpu does not have OSPKE\n"); - return 0; - } - return 1; -} - -#define XSTATE_PKRU_BIT (9) -#define XSTATE_PKRU 0x200 - -int pkru_xstate_offset(void) -{ - unsigned int eax; - unsigned int ebx; - unsigned int ecx; - unsigned int edx; - int xstate_offset; - int xstate_size; - unsigned long XSTATE_CPUID = 0xd; - int leaf; - - /* assume that XSTATE_PKRU is set in XCR0 */ - leaf = XSTATE_PKRU_BIT; - { - eax = XSTATE_CPUID; - ecx = leaf; - __cpuid(&eax, &ebx, &ecx, &edx); - - if (leaf == XSTATE_PKRU_BIT) { - xstate_offset = ebx; - xstate_size = eax; - } - } - - if (xstate_size == 0) { - printf("could not find size/offset of PKRU in xsave state\n"); - return 0; - } - - return xstate_offset; -} - -#endif /* _PKEYS_HELPER_H */ diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c deleted file mode 100644 index bc1b073..0000000 --- a/tools/testing/selftests/x86/protection_keys.c +++ /dev/null @@ -1,1407 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Tests x86 Memory Protection Keys (see Documentation/x86/protection-keys.txt) - * - * There are examples in here of: - * * how to set protection keys on memory - * * how to set/clear bits in PKRU (the rights register) - * * how to handle SEGV_PKRU signals and extract pkey-relevant - * information from the siginfo - * - * Things to add: - * make sure KSM and KSM COW breaking works - * prefault pages in at malloc, or not - * protect MPX bounds tables with protection keys? - * make sure VMA splitting/merging is working correctly - * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys - * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel - * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks - * - * Compile like this: - * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm - * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm - */ -#define _GNU_SOURCE -#include <errno.h> -#include <linux/futex.h> -#include <sys/time.h> -#include <sys/syscall.h> -#include <string.h> -#include <stdio.h> -#include <stdint.h> -#include <stdbool.h> -#include <signal.h> -#include <assert.h> -#include <stdlib.h> -#include <ucontext.h> -#include <sys/mman.h> -#include <sys/types.h> -#include <sys/wait.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <unistd.h> -#include <sys/ptrace.h> -#include <setjmp.h> - -#include "pkey-helpers.h" - -int iteration_nr = 1; -int test_nr; - -unsigned int shadow_pkru; - -#define HPAGE_SIZE (1UL<<21) -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) -#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) -#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1)) -#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to)) -#define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to)) -#define __stringify_1(x...) #x -#define __stringify(x...) __stringify_1(x) - -#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP) - -int dprint_in_signal; -char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; - -extern void abort_hooks(void); -#define pkey_assert(condition) do { \ - if (!(condition)) { \ - dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \ - __FILE__, __LINE__, \ - test_nr, iteration_nr); \ - dprintf0("errno at assert: %d", errno); \ - abort_hooks(); \ - assert(condition); \ - } \ -} while (0) -#define raw_assert(cond) assert(cond) - -void cat_into_file(char *str, char *file) -{ - int fd = open(file, O_RDWR); - int ret; - - dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file); - /* - * these need to be raw because they are called under - * pkey_assert() - */ - raw_assert(fd >= 0); - ret = write(fd, str, strlen(str)); - if (ret != strlen(str)) { - perror("write to file failed"); - fprintf(stderr, "filename: '%s' str: '%s'\n", file, str); - raw_assert(0); - } - close(fd); -} - -#if CONTROL_TRACING > 0 -static int warned_tracing; -int tracing_root_ok(void) -{ - if (geteuid() != 0) { - if (!warned_tracing) - fprintf(stderr, "WARNING: not run as root, " - "can not do tracing control\n"); - warned_tracing = 1; - return 0; - } - return 1; -} -#endif - -void tracing_on(void) -{ -#if CONTROL_TRACING > 0 -#define TRACEDIR "/sys/kernel/debug/tracing" - char pidstr[32]; - - if (!tracing_root_ok()) - return; - - sprintf(pidstr, "%d", getpid()); - cat_into_file("0", TRACEDIR "/tracing_on"); - cat_into_file("\n", TRACEDIR "/trace"); - if (1) { - cat_into_file("function_graph", TRACEDIR "/current_tracer"); - cat_into_file("1", TRACEDIR "/options/funcgraph-proc"); - } else { - cat_into_file("nop", TRACEDIR "/current_tracer"); - } - cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid"); - cat_into_file("1", TRACEDIR "/tracing_on"); - dprintf1("enabled tracing\n"); -#endif -} - -void tracing_off(void) -{ -#if CONTROL_TRACING > 0 - if (!tracing_root_ok()) - return; - cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on"); -#endif -} - -void abort_hooks(void) -{ - fprintf(stderr, "running %s()...\n", __func__); - tracing_off(); -#ifdef SLEEP_ON_ABORT - sleep(SLEEP_ON_ABORT); -#endif -} - -static inline void __page_o_noops(void) -{ - /* 8-bytes of instruction * 512 bytes = 1 page */ - asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr"); -} - -/* - * This attempts to have roughly a page of instructions followed by a few - * instructions that do a write, and another page of instructions. That - * way, we are pretty sure that the write is in the second page of - * instructions and has at least a page of padding behind it. - * - * *That* lets us be sure to madvise() away the write instruction, which - * will then fault, which makes sure that the fault code handles - * execute-only memory properly. - */ -__attribute__((__aligned__(PAGE_SIZE))) -void lots_o_noops_around_write(int *write_to_me) -{ - dprintf3("running %s()\n", __func__); - __page_o_noops(); - /* Assume this happens in the second page of instructions: */ - *write_to_me = __LINE__; - /* pad out by another page: */ - __page_o_noops(); - dprintf3("%s() done\n", __func__); -} - -/* Define some kernel-like types */ -#define u8 uint8_t -#define u16 uint16_t -#define u32 uint32_t -#define u64 uint64_t - -#ifdef __i386__ - -#ifndef SYS_mprotect_key -# define SYS_mprotect_key 380 -#endif -#ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 381 -# define SYS_pkey_free 382 -#endif -#define REG_IP_IDX REG_EIP -#define si_pkey_offset 0x14 - -#else - -#ifndef SYS_mprotect_key -# define SYS_mprotect_key 329 -#endif -#ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 330 -# define SYS_pkey_free 331 -#endif -#define REG_IP_IDX REG_RIP -#define si_pkey_offset 0x20 - -#endif - -void dump_mem(void *dumpme, int len_bytes) -{ - char *c = (void *)dumpme; - int i; - - for (i = 0; i < len_bytes; i += sizeof(u64)) { - u64 *ptr = (u64 *)(c + i); - dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr); - } -} - -#define SEGV_BNDERR 3 /* failed address bound checks */ -#define SEGV_PKUERR 4 - -static char *si_code_str(int si_code) -{ - if (si_code == SEGV_MAPERR) - return "SEGV_MAPERR"; - if (si_code == SEGV_ACCERR) - return "SEGV_ACCERR"; - if (si_code == SEGV_BNDERR) - return "SEGV_BNDERR"; - if (si_code == SEGV_PKUERR) - return "SEGV_PKUERR"; - return "UNKNOWN"; -} - -int pkru_faults; -int last_si_pkey = -1; -void signal_handler(int signum, siginfo_t *si, void *vucontext) -{ - ucontext_t *uctxt = vucontext; - int trapno; - unsigned long ip; - char *fpregs; - u32 *pkru_ptr; - u64 siginfo_pkey; - u32 *si_pkey_ptr; - int pkru_offset; - fpregset_t fpregset; - - dprint_in_signal = 1; - dprintf1(">>>>===============SIGSEGV============================\n"); - dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__, - __rdpkru(), shadow_pkru); - - trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; - ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; - fpregset = uctxt->uc_mcontext.fpregs; - fpregs = (void *)fpregset; - - dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__, - trapno, ip, si_code_str(si->si_code), si->si_code); -#ifdef __i386__ - /* - * 32-bit has some extra padding so that userspace can tell whether - * the XSTATE header is present in addition to the "legacy" FPU - * state. We just assume that it is here. - */ - fpregs += 0x70; -#endif - pkru_offset = pkru_xstate_offset(); - pkru_ptr = (void *)(&fpregs[pkru_offset]); - - dprintf1("siginfo: %p\n", si); - dprintf1(" fpregs: %p\n", fpregs); - /* - * If we got a PKRU fault, we *HAVE* to have at least one bit set in - * here. - */ - dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset()); - if (DEBUG_LEVEL > 4) - dump_mem(pkru_ptr - 128, 256); - pkey_assert(*pkru_ptr); - - si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); - dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); - dump_mem(si_pkey_ptr - 8, 24); - siginfo_pkey = *si_pkey_ptr; - pkey_assert(siginfo_pkey < NR_PKEYS); - last_si_pkey = siginfo_pkey; - - if ((si->si_code == SEGV_MAPERR) || - (si->si_code == SEGV_ACCERR) || - (si->si_code == SEGV_BNDERR)) { - printf("non-PK si_code, exiting...\n"); - exit(4); - } - - dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr); - /* need __rdpkru() version so we do not do shadow_pkru checking */ - dprintf1("signal pkru from pkru: %08x\n", __rdpkru()); - dprintf1("pkey from siginfo: %jx\n", siginfo_pkey); - *(u64 *)pkru_ptr = 0x00000000; - dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); - pkru_faults++; - dprintf1("<<<<==================================================\n"); - return; - if (trapno == 14) { - fprintf(stderr, - "ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n", - trapno, ip); - fprintf(stderr, "si_addr %p\n", si->si_addr); - fprintf(stderr, "REG_ERR: %lx\n", - (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); - exit(1); - } else { - fprintf(stderr, "unexpected trap %d! at 0x%lx\n", trapno, ip); - fprintf(stderr, "si_addr %p\n", si->si_addr); - fprintf(stderr, "REG_ERR: %lx\n", - (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); - exit(2); - } - dprint_in_signal = 0; -} - -int wait_all_children(void) -{ - int status; - return waitpid(-1, &status, 0); -} - -void sig_chld(int x) -{ - dprint_in_signal = 1; - dprintf2("[%d] SIGCHLD: %d\n", getpid(), x); - dprint_in_signal = 0; -} - -void setup_sigsegv_handler(void) -{ - int r, rs; - struct sigaction newact; - struct sigaction oldact; - - /* #PF is mapped to sigsegv */ - int signum = SIGSEGV; - - newact.sa_handler = 0; - newact.sa_sigaction = signal_handler; - - /*sigset_t - signals to block while in the handler */ - /* get the old signal mask. */ - rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask); - pkey_assert(rs == 0); - - /* call sa_sigaction, not sa_handler*/ - newact.sa_flags = SA_SIGINFO; - - newact.sa_restorer = 0; /* void(*)(), obsolete */ - r = sigaction(signum, &newact, &oldact); - r = sigaction(SIGALRM, &newact, &oldact); - pkey_assert(r == 0); -} - -void setup_handlers(void) -{ - signal(SIGCHLD, &sig_chld); - setup_sigsegv_handler(); -} - -pid_t fork_lazy_child(void) -{ - pid_t forkret; - - forkret = fork(); - pkey_assert(forkret >= 0); - dprintf3("[%d] fork() ret: %d\n", getpid(), forkret); - - if (!forkret) { - /* in the child */ - while (1) { - dprintf1("child sleeping...\n"); - sleep(30); - } - } - return forkret; -} - -void davecmp(void *_a, void *_b, int len) -{ - int i; - unsigned long *a = _a; - unsigned long *b = _b; - - for (i = 0; i < len / sizeof(*a); i++) { - if (a[i] == b[i]) - continue; - - dprintf3("[%3d]: a: %016lx b: %016lx\n", i, a[i], b[i]); - } -} - -void dumpit(char *f) -{ - int fd = open(f, O_RDONLY); - char buf[100]; - int nr_read; - - dprintf2("maps fd: %d\n", fd); - do { - nr_read = read(fd, &buf[0], sizeof(buf)); - write(1, buf, nr_read); - } while (nr_read > 0); - close(fd); -} - -#define PKEY_DISABLE_ACCESS 0x1 -#define PKEY_DISABLE_WRITE 0x2 - -u32 pkey_get(int pkey, unsigned long flags) -{ - u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); - u32 pkru = __rdpkru(); - u32 shifted_pkru; - u32 masked_pkru; - - dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n", - __func__, pkey, flags, 0, 0); - dprintf2("%s() raw pkru: %x\n", __func__, pkru); - - shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY)); - dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru); - masked_pkru = shifted_pkru & mask; - dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru); - /* - * shift down the relevant bits to the lowest two, then - * mask off all the other high bits. - */ - return masked_pkru; -} - -int pkey_set(int pkey, unsigned long rights, unsigned long flags) -{ - u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); - u32 old_pkru = __rdpkru(); - u32 new_pkru; - - /* make sure that 'rights' only contains the bits we expect: */ - assert(!(rights & ~mask)); - - /* copy old pkru */ - new_pkru = old_pkru; - /* mask out bits from pkey in old value: */ - new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY)); - /* OR in new bits for pkey: */ - new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY)); - - __wrpkru(new_pkru); - - dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n", - __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru); - return 0; -} - -void pkey_disable_set(int pkey, int flags) -{ - unsigned long syscall_flags = 0; - int ret; - int pkey_rights; - u32 orig_pkru = rdpkru(); - - dprintf1("START->%s(%d, 0x%x)\n", __func__, - pkey, flags); - pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); - - pkey_rights = pkey_get(pkey, syscall_flags); - - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, - pkey, pkey, pkey_rights); - pkey_assert(pkey_rights >= 0); - - pkey_rights |= flags; - - ret = pkey_set(pkey, pkey_rights, syscall_flags); - assert(!ret); - /*pkru and flags have the same format */ - shadow_pkru |= flags << (pkey * 2); - dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru); - - pkey_assert(ret >= 0); - - pkey_rights = pkey_get(pkey, syscall_flags); - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, - pkey, pkey, pkey_rights); - - dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); - if (flags) - pkey_assert(rdpkru() > orig_pkru); - dprintf1("END<---%s(%d, 0x%x)\n", __func__, - pkey, flags); -} - -void pkey_disable_clear(int pkey, int flags) -{ - unsigned long syscall_flags = 0; - int ret; - int pkey_rights = pkey_get(pkey, syscall_flags); - u32 orig_pkru = rdpkru(); - - pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); - - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, - pkey, pkey, pkey_rights); - pkey_assert(pkey_rights >= 0); - - pkey_rights |= flags; - - ret = pkey_set(pkey, pkey_rights, 0); - /* pkru and flags have the same format */ - shadow_pkru &= ~(flags << (pkey * 2)); - pkey_assert(ret >= 0); - - pkey_rights = pkey_get(pkey, syscall_flags); - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, - pkey, pkey, pkey_rights); - - dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); - if (flags) - assert(rdpkru() > orig_pkru); -} - -void pkey_write_allow(int pkey) -{ - pkey_disable_clear(pkey, PKEY_DISABLE_WRITE); -} -void pkey_write_deny(int pkey) -{ - pkey_disable_set(pkey, PKEY_DISABLE_WRITE); -} -void pkey_access_allow(int pkey) -{ - pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS); -} -void pkey_access_deny(int pkey) -{ - pkey_disable_set(pkey, PKEY_DISABLE_ACCESS); -} - -int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, - unsigned long pkey) -{ - int sret; - - dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__, - ptr, size, orig_prot, pkey); - - errno = 0; - sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey); - if (errno) { - dprintf2("SYS_mprotect_key sret: %d\n", sret); - dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot); - dprintf2("SYS_mprotect_key failed, errno: %d\n", errno); - if (DEBUG_LEVEL >= 2) - perror("SYS_mprotect_pkey"); - } - return sret; -} - -int sys_pkey_alloc(unsigned long flags, unsigned long init_val) -{ - int ret = syscall(SYS_pkey_alloc, flags, init_val); - dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n", - __func__, flags, init_val, ret, errno); - return ret; -} - -int alloc_pkey(void) -{ - int ret; - unsigned long init_val = 0x0; - - dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n", - __LINE__, __rdpkru(), shadow_pkru); - ret = sys_pkey_alloc(0, init_val); - /* - * pkey_alloc() sets PKRU, so we need to reflect it in - * shadow_pkru: - */ - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); - if (ret) { - /* clear both the bits: */ - shadow_pkru &= ~(0x3 << (ret * 2)); - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); - /* - * move the new state in from init_val - * (remember, we cheated and init_val == pkru format) - */ - shadow_pkru |= (init_val << (ret * 2)); - } - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); - dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno); - /* for shadow checking: */ - rdpkru(); - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); - return ret; -} - -int sys_pkey_free(unsigned long pkey) -{ - int ret = syscall(SYS_pkey_free, pkey); - dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret); - return ret; -} - -/* - * I had a bug where pkey bits could be set by mprotect() but - * not cleared. This ensures we get lots of random bit sets - * and clears on the vma and pte pkey bits. - */ -int alloc_random_pkey(void) -{ - int max_nr_pkey_allocs; - int ret; - int i; - int alloced_pkeys[NR_PKEYS]; - int nr_alloced = 0; - int random_index; - memset(alloced_pkeys, 0, sizeof(alloced_pkeys)); - - /* allocate every possible key and make a note of which ones we got */ - max_nr_pkey_allocs = NR_PKEYS; - max_nr_pkey_allocs = 1; - for (i = 0; i < max_nr_pkey_allocs; i++) { - int new_pkey = alloc_pkey(); - if (new_pkey < 0) - break; - alloced_pkeys[nr_alloced++] = new_pkey; - } - - pkey_assert(nr_alloced > 0); - /* select a random one out of the allocated ones */ - random_index = rand() % nr_alloced; - ret = alloced_pkeys[random_index]; - /* now zero it out so we don't free it next */ - alloced_pkeys[random_index] = 0; - - /* go through the allocated ones that we did not want and free them */ - for (i = 0; i < nr_alloced; i++) { - int free_ret; - if (!alloced_pkeys[i]) - continue; - free_ret = sys_pkey_free(alloced_pkeys[i]); - pkey_assert(!free_ret); - } - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); - return ret; -} - -int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, - unsigned long pkey) -{ - int nr_iterations = random() % 100; - int ret; - - while (0) { - int rpkey = alloc_random_pkey(); - ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); - dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", - ptr, size, orig_prot, pkey, ret); - if (nr_iterations-- < 0) - break; - - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); - sys_pkey_free(rpkey); - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); - } - pkey_assert(pkey < NR_PKEYS); - - ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); - dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", - ptr, size, orig_prot, pkey, ret); - pkey_assert(!ret); - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); - return ret; -} - -struct pkey_malloc_record { - void *ptr; - long size; -}; -struct pkey_malloc_record *pkey_malloc_records; -long nr_pkey_malloc_records; -void record_pkey_malloc(void *ptr, long size) -{ - long i; - struct pkey_malloc_record *rec = NULL; - - for (i = 0; i < nr_pkey_malloc_records; i++) { - rec = &pkey_malloc_records[i]; - /* find a free record */ - if (rec) - break; - } - if (!rec) { - /* every record is full */ - size_t old_nr_records = nr_pkey_malloc_records; - size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1); - size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record); - dprintf2("new_nr_records: %zd\n", new_nr_records); - dprintf2("new_size: %zd\n", new_size); - pkey_malloc_records = realloc(pkey_malloc_records, new_size); - pkey_assert(pkey_malloc_records != NULL); - rec = &pkey_malloc_records[nr_pkey_malloc_records]; - /* - * realloc() does not initialize memory, so zero it from - * the first new record all the way to the end. - */ - for (i = 0; i < new_nr_records - old_nr_records; i++) - memset(rec + i, 0, sizeof(*rec)); - } - dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n", - (int)(rec - pkey_malloc_records), rec, ptr, size); - rec->ptr = ptr; - rec->size = size; - nr_pkey_malloc_records++; -} - -void free_pkey_malloc(void *ptr) -{ - long i; - int ret; - dprintf3("%s(%p)\n", __func__, ptr); - for (i = 0; i < nr_pkey_malloc_records; i++) { - struct pkey_malloc_record *rec = &pkey_malloc_records[i]; - dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n", - ptr, i, rec, rec->ptr, rec->size); - if ((ptr < rec->ptr) || - (ptr >= rec->ptr + rec->size)) - continue; - - dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n", - ptr, i, rec, rec->ptr, rec->size); - nr_pkey_malloc_records--; - ret = munmap(rec->ptr, rec->size); - dprintf3("munmap ret: %d\n", ret); - pkey_assert(!ret); - dprintf3("clearing rec->ptr, rec: %p\n", rec); - rec->ptr = NULL; - dprintf3("done clearing rec->ptr, rec: %p\n", rec); - return; - } - pkey_assert(false); -} - - -void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) -{ - void *ptr; - int ret; - - rdpkru(); - dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, - size, prot, pkey); - pkey_assert(pkey < NR_PKEYS); - ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - pkey_assert(ptr != (void *)-1); - ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); - pkey_assert(!ret); - record_pkey_malloc(ptr, size); - rdpkru(); - - dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); - return ptr; -} - -void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) -{ - int ret; - void *ptr; - - dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, - size, prot, pkey); - /* - * Guarantee we can fit at least one huge page in the resulting - * allocation by allocating space for 2: - */ - size = ALIGN_UP(size, HPAGE_SIZE * 2); - ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - pkey_assert(ptr != (void *)-1); - record_pkey_malloc(ptr, size); - mprotect_pkey(ptr, size, prot, pkey); - - dprintf1("unaligned ptr: %p\n", ptr); - ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE); - dprintf1(" aligned ptr: %p\n", ptr); - ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE); - dprintf1("MADV_HUGEPAGE ret: %d\n", ret); - ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED); - dprintf1("MADV_WILLNEED ret: %d\n", ret); - memset(ptr, 0, HPAGE_SIZE); - - dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr); - return ptr; -} - -int hugetlb_setup_ok; -#define GET_NR_HUGE_PAGES 10 -void setup_hugetlbfs(void) -{ - int err; - int fd; - char buf[] = "123"; - - if (geteuid() != 0) { - fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n"); - return; - } - - cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages"); - - /* - * Now go make sure that we got the pages and that they - * are 2M pages. Someone might have made 1G the default. - */ - fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY); - if (fd < 0) { - perror("opening sysfs 2M hugetlb config"); - return; - } - - /* -1 to guarantee leaving the trailing \0 */ - err = read(fd, buf, sizeof(buf)-1); - close(fd); - if (err <= 0) { - perror("reading sysfs 2M hugetlb config"); - return; - } - - if (atoi(buf) != GET_NR_HUGE_PAGES) { - fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n", - buf, GET_NR_HUGE_PAGES); - return; - } - - hugetlb_setup_ok = 1; -} - -void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) -{ - void *ptr; - int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB; - - if (!hugetlb_setup_ok) - return PTR_ERR_ENOTSUP; - - dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey); - size = ALIGN_UP(size, HPAGE_SIZE * 2); - pkey_assert(pkey < NR_PKEYS); - ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0); - pkey_assert(ptr != (void *)-1); - mprotect_pkey(ptr, size, prot, pkey); - - record_pkey_malloc(ptr, size); - - dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr); - return ptr; -} - -void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey) -{ - void *ptr; - int fd; - - dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, - size, prot, pkey); - pkey_assert(pkey < NR_PKEYS); - fd = open("/dax/foo", O_RDWR); - pkey_assert(fd >= 0); - - ptr = mmap(0, size, prot, MAP_SHARED, fd, 0); - pkey_assert(ptr != (void *)-1); - - mprotect_pkey(ptr, size, prot, pkey); - - record_pkey_malloc(ptr, size); - - dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr); - close(fd); - return ptr; -} - -void *(*pkey_malloc[])(long size, int prot, u16 pkey) = { - - malloc_pkey_with_mprotect, - malloc_pkey_anon_huge, - malloc_pkey_hugetlb -/* can not do direct with the pkey_mprotect() API: - malloc_pkey_mmap_direct, - malloc_pkey_mmap_dax, -*/ -}; - -void *malloc_pkey(long size, int prot, u16 pkey) -{ - void *ret; - static int malloc_type; - int nr_malloc_types = ARRAY_SIZE(pkey_malloc); - - pkey_assert(pkey < NR_PKEYS); - - while (1) { - pkey_assert(malloc_type < nr_malloc_types); - - ret = pkey_malloc[malloc_type](size, prot, pkey); - pkey_assert(ret != (void *)-1); - - malloc_type++; - if (malloc_type >= nr_malloc_types) - malloc_type = (random()%nr_malloc_types); - - /* try again if the malloc_type we tried is unsupported */ - if (ret == PTR_ERR_ENOTSUP) - continue; - - break; - } - - dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__, - size, prot, pkey, ret); - return ret; -} - -int last_pkru_faults; -void expected_pk_fault(int pkey) -{ - dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n", - __func__, last_pkru_faults, pkru_faults); - dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey); - pkey_assert(last_pkru_faults + 1 == pkru_faults); - pkey_assert(last_si_pkey == pkey); - /* - * The signal handler shold have cleared out PKRU to let the - * test program continue. We now have to restore it. - */ - if (__rdpkru() != 0) - pkey_assert(0); - - __wrpkru(shadow_pkru); - dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n", - __func__, shadow_pkru); - last_pkru_faults = pkru_faults; - last_si_pkey = -1; -} - -void do_not_expect_pk_fault(void) -{ - pkey_assert(last_pkru_faults == pkru_faults); -} - -int test_fds[10] = { -1 }; -int nr_test_fds; -void __save_test_fd(int fd) -{ - pkey_assert(fd >= 0); - pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds)); - test_fds[nr_test_fds] = fd; - nr_test_fds++; -} - -int get_test_read_fd(void) -{ - int test_fd = open("/etc/passwd", O_RDONLY); - __save_test_fd(test_fd); - return test_fd; -} - -void close_test_fds(void) -{ - int i; - - for (i = 0; i < nr_test_fds; i++) { - if (test_fds[i] < 0) - continue; - close(test_fds[i]); - test_fds[i] = -1; - } - nr_test_fds = 0; -} - -#define barrier() __asm__ __volatile__("": : :"memory") -__attribute__((noinline)) int read_ptr(int *ptr) -{ - /* - * Keep GCC from optimizing this away somehow - */ - barrier(); - return *ptr; -} - -void test_read_of_write_disabled_region(int *ptr, u16 pkey) -{ - int ptr_contents; - - dprintf1("disabling write access to PKEY[1], doing read\n"); - pkey_write_deny(pkey); - ptr_contents = read_ptr(ptr); - dprintf1("*ptr: %d\n", ptr_contents); - dprintf1("\n"); -} -void test_read_of_access_disabled_region(int *ptr, u16 pkey) -{ - int ptr_contents; - - dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr); - rdpkru(); - pkey_access_deny(pkey); - ptr_contents = read_ptr(ptr); - dprintf1("*ptr: %d\n", ptr_contents); - expected_pk_fault(pkey); -} -void test_write_of_write_disabled_region(int *ptr, u16 pkey) -{ - dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey); - pkey_write_deny(pkey); - *ptr = __LINE__; - expected_pk_fault(pkey); -} -void test_write_of_access_disabled_region(int *ptr, u16 pkey) -{ - dprintf1("disabling access to PKEY[%02d], doing write\n", pkey); - pkey_access_deny(pkey); - *ptr = __LINE__; - expected_pk_fault(pkey); -} -void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) -{ - int ret; - int test_fd = get_test_read_fd(); - - dprintf1("disabling access to PKEY[%02d], " - "having kernel read() to buffer\n", pkey); - pkey_access_deny(pkey); - ret = read(test_fd, ptr, 1); - dprintf1("read ret: %d\n", ret); - pkey_assert(ret); -} -void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey) -{ - int ret; - int test_fd = get_test_read_fd(); - - pkey_write_deny(pkey); - ret = read(test_fd, ptr, 100); - dprintf1("read ret: %d\n", ret); - if (ret < 0 && (DEBUG_LEVEL > 0)) - perror("verbose read result (OK for this to be bad)"); - pkey_assert(ret); -} - -void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey) -{ - int pipe_ret, vmsplice_ret; - struct iovec iov; - int pipe_fds[2]; - - pipe_ret = pipe(pipe_fds); - - pkey_assert(pipe_ret == 0); - dprintf1("disabling access to PKEY[%02d], " - "having kernel vmsplice from buffer\n", pkey); - pkey_access_deny(pkey); - iov.iov_base = ptr; - iov.iov_len = PAGE_SIZE; - vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT); - dprintf1("vmsplice() ret: %d\n", vmsplice_ret); - pkey_assert(vmsplice_ret == -1); - - close(pipe_fds[0]); - close(pipe_fds[1]); -} - -void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) -{ - int ignored = 0xdada; - int futex_ret; - int some_int = __LINE__; - - dprintf1("disabling write to PKEY[%02d], " - "doing futex gunk in buffer\n", pkey); - *ptr = some_int; - pkey_write_deny(pkey); - futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL, - &ignored, ignored); - if (DEBUG_LEVEL > 0) - perror("futex"); - dprintf1("futex() ret: %d\n", futex_ret); -} - -/* Assumes that all pkeys other than 'pkey' are unallocated */ -void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) -{ - int err; - int i; - - /* Note: 0 is the default pkey, so don't mess with it */ - for (i = 1; i < NR_PKEYS; i++) { - if (pkey == i) - continue; - - dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i); - err = sys_pkey_free(i); - pkey_assert(err); - - err = sys_pkey_free(i); - pkey_assert(err); - - err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i); - pkey_assert(err); - } -} - -/* Assumes that all pkeys other than 'pkey' are unallocated */ -void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) -{ - int err; - int bad_pkey = NR_PKEYS+99; - - /* pass a known-invalid pkey in: */ - err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey); - pkey_assert(err); -} - -/* Assumes that all pkeys other than 'pkey' are unallocated */ -void test_pkey_alloc_exhaust(int *ptr, u16 pkey) -{ - int err; - int allocated_pkeys[NR_PKEYS] = {0}; - int nr_allocated_pkeys = 0; - int i; - - for (i = 0; i < NR_PKEYS*2; i++) { - int new_pkey; - dprintf1("%s() alloc loop: %d\n", __func__, i); - new_pkey = alloc_pkey(); - dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, err, __rdpkru(), shadow_pkru); - rdpkru(); /* for shadow checking */ - dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC); - if ((new_pkey == -1) && (errno == ENOSPC)) { - dprintf2("%s() failed to allocate pkey after %d tries\n", - __func__, nr_allocated_pkeys); - break; - } - pkey_assert(nr_allocated_pkeys < NR_PKEYS); - allocated_pkeys[nr_allocated_pkeys++] = new_pkey; - } - - dprintf3("%s()::%d\n", __func__, __LINE__); - - /* - * ensure it did not reach the end of the loop without - * failure: - */ - pkey_assert(i < NR_PKEYS*2); - - /* - * There are 16 pkeys supported in hardware. One is taken - * up for the default (0) and another can be taken up by - * an execute-only mapping. Ensure that we can allocate - * at least 14 (16-2). - */ - pkey_assert(i >= NR_PKEYS-2); - - for (i = 0; i < nr_allocated_pkeys; i++) { - err = sys_pkey_free(allocated_pkeys[i]); - pkey_assert(!err); - rdpkru(); /* for shadow checking */ - } -} - -void test_ptrace_of_child(int *ptr, u16 pkey) -{ - __attribute__((__unused__)) int peek_result; - pid_t child_pid; - void *ignored = 0; - long ret; - int status; - /* - * This is the "control" for our little expermient. Make sure - * we can always access it when ptracing. - */ - int *plain_ptr_unaligned = malloc(HPAGE_SIZE); - int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE); - - /* - * Fork a child which is an exact copy of this process, of course. - * That means we can do all of our tests via ptrace() and then plain - * memory access and ensure they work differently. - */ - child_pid = fork_lazy_child(); - dprintf1("[%d] child pid: %d\n", getpid(), child_pid); - - ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored); - if (ret) - perror("attach"); - dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__); - pkey_assert(ret != -1); - ret = waitpid(child_pid, &status, WUNTRACED); - if ((ret != child_pid) || !(WIFSTOPPED(status))) { - fprintf(stderr, "weird waitpid result %ld stat %x\n", - ret, status); - pkey_assert(0); - } - dprintf2("waitpid ret: %ld\n", ret); - dprintf2("waitpid status: %d\n", status); - - pkey_access_deny(pkey); - pkey_write_deny(pkey); - - /* Write access, untested for now: - ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data); - pkey_assert(ret != -1); - dprintf1("poke at %p: %ld\n", peek_at, ret); - */ - - /* - * Try to access the pkey-protected "ptr" via ptrace: - */ - ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored); - /* expect it to work, without an error: */ - pkey_assert(ret != -1); - /* Now access from the current task, and expect an exception: */ - peek_result = read_ptr(ptr); - expected_pk_fault(pkey); - - /* - * Try to access the NON-pkey-protected "plain_ptr" via ptrace: - */ - ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored); - /* expect it to work, without an error: */ - pkey_assert(ret != -1); - /* Now access from the current task, and expect NO exception: */ - peek_result = read_ptr(plain_ptr); - do_not_expect_pk_fault(); - - ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); - pkey_assert(ret != -1); - - ret = kill(child_pid, SIGKILL); - pkey_assert(ret != -1); - - wait(&status); - - free(plain_ptr_unaligned); -} - -void test_executing_on_unreadable_memory(int *ptr, u16 pkey) -{ - void *p1; - int scratch; - int ptr_contents; - int ret; - - p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE); - dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write); - /* lots_o_noops_around_write should be page-aligned already */ - assert(p1 == &lots_o_noops_around_write); - - /* Point 'p1' at the *second* page of the function: */ - p1 += PAGE_SIZE; - - madvise(p1, PAGE_SIZE, MADV_DONTNEED); - lots_o_noops_around_write(&scratch); - ptr_contents = read_ptr(p1); - dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); - - ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey); - pkey_assert(!ret); - pkey_access_deny(pkey); - - dprintf2("pkru: %x\n", rdpkru()); - - /* - * Make sure this is an *instruction* fault - */ - madvise(p1, PAGE_SIZE, MADV_DONTNEED); - lots_o_noops_around_write(&scratch); - do_not_expect_pk_fault(); - ptr_contents = read_ptr(p1); - dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); - expected_pk_fault(pkey); -} - -void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) -{ - int size = PAGE_SIZE; - int sret; - - if (cpu_has_pku()) { - dprintf1("SKIP: %s: no CPU support\n", __func__); - return; - } - - sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey); - pkey_assert(sret < 0); -} - -void (*pkey_tests[])(int *ptr, u16 pkey) = { - test_read_of_write_disabled_region, - test_read_of_access_disabled_region, - test_write_of_write_disabled_region, - test_write_of_access_disabled_region, - test_kernel_write_of_access_disabled_region, - test_kernel_write_of_write_disabled_region, - test_kernel_gup_of_access_disabled_region, - test_kernel_gup_write_to_write_disabled_region, - test_executing_on_unreadable_memory, - test_ptrace_of_child, - test_pkey_syscalls_on_non_allocated_pkey, - test_pkey_syscalls_bad_args, - test_pkey_alloc_exhaust, -}; - -void run_tests_once(void) -{ - int *ptr; - int prot = PROT_READ|PROT_WRITE; - - for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) { - int pkey; - int orig_pkru_faults = pkru_faults; - - dprintf1("======================\n"); - dprintf1("test %d preparing...\n", test_nr); - - tracing_on(); - pkey = alloc_random_pkey(); - dprintf1("test %d starting with pkey: %d\n", test_nr, pkey); - ptr = malloc_pkey(PAGE_SIZE, prot, pkey); - dprintf1("test %d starting...\n", test_nr); - pkey_tests[test_nr](ptr, pkey); - dprintf1("freeing test memory: %p\n", ptr); - free_pkey_malloc(ptr); - sys_pkey_free(pkey); - - dprintf1("pkru_faults: %d\n", pkru_faults); - dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults); - - tracing_off(); - close_test_fds(); - - printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr); - dprintf1("======================\n\n"); - } - iteration_nr++; -} - -void pkey_setup_shadow(void) -{ - shadow_pkru = __rdpkru(); -} - -int main(void) -{ - int nr_iterations = 22; - - setup_handlers(); - - printf("has pku: %d\n", cpu_has_pku()); - - if (!cpu_has_pku()) { - int size = PAGE_SIZE; - int *ptr; - - printf("running PKEY tests for unsupported CPU/OS\n"); - - ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - assert(ptr != (void *)-1); - test_mprotect_pkey_on_unsupported_cpu(ptr, 1); - exit(0); - } - - pkey_setup_shadow(); - printf("startup pkru: %x\n", rdpkru()); - setup_hugetlbfs(); - - while (nr_iterations-- > 0) - run_tests_once(); - - printf("done (all tests OK)\n"); - return 0; -}
* Ram Pai linuxram@us.ibm.com wrote:
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com
tools/testing/selftests/vm/Makefile | 1 + tools/testing/selftests/vm/pkey-helpers.h | 223 ++++ tools/testing/selftests/vm/protection_keys.c | 1407 +++++++++++++++++++++++++ tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/pkey-helpers.h | 223 ---- tools/testing/selftests/x86/protection_keys.c | 1407 ------------------------- 6 files changed, 1632 insertions(+), 1631 deletions(-) create mode 100644 tools/testing/selftests/vm/pkey-helpers.h create mode 100644 tools/testing/selftests/vm/protection_keys.c delete mode 100644 tools/testing/selftests/x86/pkey-helpers.h delete mode 100644 tools/testing/selftests/x86/protection_keys.c
Acked-by: Ingo Molnar mingo@kernel.org
Thanks,
Ingo -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
some pkru references are named to pkey_reg and some prku references are renamed to pkey
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/pkey-helpers.h | 85 +++++----- tools/testing/selftests/vm/protection_keys.c | 227 ++++++++++++++------------ 2 files changed, 164 insertions(+), 148 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index b3cb767..a568166 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -14,7 +14,7 @@ #include <sys/mman.h>
#define NR_PKEYS 16 -#define PKRU_BITS_PER_PKEY 2 +#define PKEY_BITS_PER_PKEY 2
#ifndef DEBUG_LEVEL #define DEBUG_LEVEL 0 @@ -57,85 +57,88 @@ static inline void sigsafe_printf(const char *format, ...) #define dprintf3(args...) dprintf_level(3, args) #define dprintf4(args...) dprintf_level(4, args)
-extern unsigned int shadow_pkru; -static inline unsigned int __rdpkru(void) +extern unsigned int shadow_pkey_reg; +static inline unsigned int __rdpkey_reg(void) { unsigned int eax, edx; unsigned int ecx = 0; - unsigned int pkru; + unsigned int pkey_reg;
asm volatile(".byte 0x0f,0x01,0xee\n\t" : "=a" (eax), "=d" (edx) : "c" (ecx)); - pkru = eax; - return pkru; + pkey_reg = eax; + return pkey_reg; }
-static inline unsigned int _rdpkru(int line) +static inline unsigned int _rdpkey_reg(int line) { - unsigned int pkru = __rdpkru(); + unsigned int pkey_reg = __rdpkey_reg();
- dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n", - line, pkru, shadow_pkru); - assert(pkru == shadow_pkru); + dprintf4("rdpkey_reg(line=%d) pkey_reg: %x shadow: %x\n", + line, pkey_reg, shadow_pkey_reg); + assert(pkey_reg == shadow_pkey_reg);
- return pkru; + return pkey_reg; }
-#define rdpkru() _rdpkru(__LINE__) +#define rdpkey_reg() _rdpkey_reg(__LINE__)
-static inline void __wrpkru(unsigned int pkru) +static inline void __wrpkey_reg(unsigned int pkey_reg) { - unsigned int eax = pkru; + unsigned int eax = pkey_reg; unsigned int ecx = 0; unsigned int edx = 0;
- dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru); + dprintf4("%s() changing %08x to %08x\n", __func__, + __rdpkey_reg(), pkey_reg); asm volatile(".byte 0x0f,0x01,0xef\n\t" : : "a" (eax), "c" (ecx), "d" (edx)); - assert(pkru == __rdpkru()); + assert(pkey_reg == __rdpkey_reg()); }
-static inline void wrpkru(unsigned int pkru) +static inline void wrpkey_reg(unsigned int pkey_reg) { - dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru); + dprintf4("%s() changing %08x to %08x\n", __func__, + __rdpkey_reg(), pkey_reg); /* will do the shadow check for us: */ - rdpkru(); - __wrpkru(pkru); - shadow_pkru = pkru; - dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru()); + rdpkey_reg(); + __wrpkey_reg(pkey_reg); + shadow_pkey_reg = pkey_reg; + dprintf4("%s(%08x) pkey_reg: %08x\n", __func__, + pkey_reg, __rdpkey_reg()); }
/* * These are technically racy. since something could - * change PKRU between the read and the write. + * change PKEY register between the read and the write. */ static inline void __pkey_access_allow(int pkey, int do_allow) { - unsigned int pkru = rdpkru(); + unsigned int pkey_reg = rdpkey_reg(); int bit = pkey * 2;
if (do_allow) - pkru &= (1<<bit); + pkey_reg &= (1<<bit); else - pkru |= (1<<bit); + pkey_reg |= (1<<bit);
- dprintf4("pkru now: %08x\n", rdpkru()); - wrpkru(pkru); + dprintf4("pkey_reg now: %08x\n", rdpkey_reg()); + wrpkey_reg(pkey_reg); }
static inline void __pkey_write_allow(int pkey, int do_allow_write) { - long pkru = rdpkru(); + long pkey_reg = rdpkey_reg(); int bit = pkey * 2 + 1;
if (do_allow_write) - pkru &= (1<<bit); + pkey_reg &= (1<<bit); else - pkru |= (1<<bit); + pkey_reg |= (1<<bit);
- wrpkru(pkru); - dprintf4("pkru now: %08x\n", rdpkru()); + wrpkey_reg(pkey_reg); + dprintf4("pkey_reg now: %08x\n", rdpkey_reg()); }
#define PROT_PKEY0 0x10 /* protection key value (bit 0) */ @@ -185,10 +188,10 @@ static inline int cpu_has_pku(void) return 1; }
-#define XSTATE_PKRU_BIT (9) -#define XSTATE_PKRU 0x200 +#define XSTATE_PKEY_BIT (9) +#define XSTATE_PKEY 0x200
-int pkru_xstate_offset(void) +int pkey_reg_xstate_offset(void) { unsigned int eax; unsigned int ebx; @@ -199,21 +202,21 @@ int pkru_xstate_offset(void) unsigned long XSTATE_CPUID = 0xd; int leaf;
- /* assume that XSTATE_PKRU is set in XCR0 */ - leaf = XSTATE_PKRU_BIT; + /* assume that XSTATE_PKEY is set in XCR0 */ + leaf = XSTATE_PKEY_BIT; { eax = XSTATE_CPUID; ecx = leaf; __cpuid(&eax, &ebx, &ecx, &edx);
- if (leaf == XSTATE_PKRU_BIT) { + if (leaf == XSTATE_PKEY_BIT) { xstate_offset = ebx; xstate_size = eax; } }
if (xstate_size == 0) { - printf("could not find size/offset of PKRU in xsave state\n"); + printf("could not find size/offset of PKEY in xsave state\n"); return 0; }
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index bc1b073..4aebf12 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Tests x86 Memory Protection Keys (see Documentation/x86/protection-keys.txt) + * Tests Memory Protection Keys (see Documentation/vm/protection-keys.txt) * * There are examples in here of: * * how to set protection keys on memory - * * how to set/clear bits in PKRU (the rights register) - * * how to handle SEGV_PKRU signals and extract pkey-relevant + * * how to set/clear bits in pkey registers (the rights register) + * * how to handle SEGV_PKUERR signals and extract pkey-relevant * information from the siginfo * * Things to add: @@ -48,7 +48,7 @@ int iteration_nr = 1; int test_nr;
-unsigned int shadow_pkru; +unsigned int shadow_pkey_reg;
#define HPAGE_SIZE (1UL<<21) #define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) @@ -241,7 +241,7 @@ void dump_mem(void *dumpme, int len_bytes) return "UNKNOWN"; }
-int pkru_faults; +int pkey_faults; int last_si_pkey = -1; void signal_handler(int signum, siginfo_t *si, void *vucontext) { @@ -249,16 +249,16 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) int trapno; unsigned long ip; char *fpregs; - u32 *pkru_ptr; + u32 *pkey_reg_ptr; u64 siginfo_pkey; u32 *si_pkey_ptr; - int pkru_offset; + int pkey_reg_offset; fpregset_t fpregset;
dprint_in_signal = 1; dprintf1(">>>>===============SIGSEGV============================\n"); - dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__, - __rdpkru(), shadow_pkru); + dprintf1("%s()::%d, pkey_reg: 0x%x shadow: %x\n", __func__, __LINE__, + __rdpkey_reg(), shadow_pkey_reg);
trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; @@ -275,19 +275,19 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) */ fpregs += 0x70; #endif - pkru_offset = pkru_xstate_offset(); - pkru_ptr = (void *)(&fpregs[pkru_offset]); + pkey_reg_offset = pkey_reg_xstate_offset(); + pkey_reg_ptr = (void *)(&fpregs[pkey_reg_offset]);
dprintf1("siginfo: %p\n", si); dprintf1(" fpregs: %p\n", fpregs); /* - * If we got a PKRU fault, we *HAVE* to have at least one bit set in + * If we got a PKEY fault, we *HAVE* to have at least one bit set in * here. */ - dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset()); + dprintf1("pkey_reg_xstate_offset: %d\n", pkey_reg_xstate_offset()); if (DEBUG_LEVEL > 4) - dump_mem(pkru_ptr - 128, 256); - pkey_assert(*pkru_ptr); + dump_mem(pkey_reg_ptr - 128, 256); + pkey_assert(*pkey_reg_ptr);
si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); @@ -303,13 +303,16 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) exit(4); }
- dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr); - /* need __rdpkru() version so we do not do shadow_pkru checking */ - dprintf1("signal pkru from pkru: %08x\n", __rdpkru()); + dprintf1("signal pkey_reg from xsave: %08x\n", *pkey_reg_ptr); + /* + * need __rdpkey_reg() version so we do not do shadow_pkey_reg + * checking + */ + dprintf1("signal pkey_reg from pkey_reg: %08x\n", __rdpkey_reg()); dprintf1("pkey from siginfo: %jx\n", siginfo_pkey); - *(u64 *)pkru_ptr = 0x00000000; + *(u64 *)pkey_reg_ptr = 0x00000000; dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); - pkru_faults++; + pkey_faults++; dprintf1("<<<<==================================================\n"); return; if (trapno == 14) { @@ -427,45 +430,47 @@ void dumpit(char *f) u32 pkey_get(int pkey, unsigned long flags) { u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); - u32 pkru = __rdpkru(); - u32 shifted_pkru; - u32 masked_pkru; + u32 pkey_reg = __rdpkey_reg(); + u32 shifted_pkey_reg; + u32 masked_pkey_reg;
dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n", __func__, pkey, flags, 0, 0); - dprintf2("%s() raw pkru: %x\n", __func__, pkru); + dprintf2("%s() raw pkey_reg: %x\n", __func__, pkey_reg);
- shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY)); - dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru); - masked_pkru = shifted_pkru & mask; - dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru); + shifted_pkey_reg = (pkey_reg >> (pkey * PKEY_BITS_PER_PKEY)); + dprintf2("%s() shifted_pkey_reg: %x\n", __func__, shifted_pkey_reg); + masked_pkey_reg = shifted_pkey_reg & mask; + dprintf2("%s() masked pkey_reg: %x\n", __func__, masked_pkey_reg); /* * shift down the relevant bits to the lowest two, then * mask off all the other high bits. */ - return masked_pkru; + return masked_pkey_reg; }
int pkey_set(int pkey, unsigned long rights, unsigned long flags) { u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); - u32 old_pkru = __rdpkru(); - u32 new_pkru; + u32 old_pkey_reg = __rdpkey_reg(); + u32 new_pkey_reg;
/* make sure that 'rights' only contains the bits we expect: */ assert(!(rights & ~mask));
- /* copy old pkru */ - new_pkru = old_pkru; + /* copy old pkey_reg */ + new_pkey_reg = old_pkey_reg; /* mask out bits from pkey in old value: */ - new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY)); + new_pkey_reg &= ~(mask << (pkey * PKEY_BITS_PER_PKEY)); /* OR in new bits for pkey: */ - new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY)); + new_pkey_reg |= (rights << (pkey * PKEY_BITS_PER_PKEY));
- __wrpkru(new_pkru); + __wrpkey_reg(new_pkey_reg);
- dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n", - __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru); + dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x" + " pkey_reg now: %x old_pkey_reg: %x\n", + __func__, pkey, rights, flags, 0, __rdpkey_reg(), + old_pkey_reg); return 0; }
@@ -474,7 +479,7 @@ void pkey_disable_set(int pkey, int flags) unsigned long syscall_flags = 0; int ret; int pkey_rights; - u32 orig_pkru = rdpkru(); + u32 orig_pkey_reg = rdpkey_reg();
dprintf1("START->%s(%d, 0x%x)\n", __func__, pkey, flags); @@ -490,9 +495,9 @@ void pkey_disable_set(int pkey, int flags)
ret = pkey_set(pkey, pkey_rights, syscall_flags); assert(!ret); - /*pkru and flags have the same format */ - shadow_pkru |= flags << (pkey * 2); - dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru); + /*pkey_reg and flags have the same format */ + shadow_pkey_reg |= flags << (pkey * 2); + dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkey_reg);
pkey_assert(ret >= 0);
@@ -500,9 +505,9 @@ void pkey_disable_set(int pkey, int flags) dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights);
- dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); + dprintf1("%s(%d) pkey_reg: 0x%x\n", __func__, pkey, rdpkey_reg()); if (flags) - pkey_assert(rdpkru() > orig_pkru); + pkey_assert(rdpkey_reg() > orig_pkey_reg); dprintf1("END<---%s(%d, 0x%x)\n", __func__, pkey, flags); } @@ -512,7 +517,7 @@ void pkey_disable_clear(int pkey, int flags) unsigned long syscall_flags = 0; int ret; int pkey_rights = pkey_get(pkey, syscall_flags); - u32 orig_pkru = rdpkru(); + u32 orig_pkey_reg = rdpkey_reg();
pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
@@ -523,17 +528,17 @@ void pkey_disable_clear(int pkey, int flags) pkey_rights |= flags;
ret = pkey_set(pkey, pkey_rights, 0); - /* pkru and flags have the same format */ - shadow_pkru &= ~(flags << (pkey * 2)); + /* pkey_reg and flags have the same format */ + shadow_pkey_reg &= ~(flags << (pkey * 2)); pkey_assert(ret >= 0);
pkey_rights = pkey_get(pkey, syscall_flags); dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights);
- dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); + dprintf1("%s(%d) pkey_reg: 0x%x\n", __func__, pkey, rdpkey_reg()); if (flags) - assert(rdpkru() > orig_pkru); + assert(rdpkey_reg() > orig_pkey_reg); }
void pkey_write_allow(int pkey) @@ -586,33 +591,38 @@ int alloc_pkey(void) int ret; unsigned long init_val = 0x0;
- dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n", - __LINE__, __rdpkru(), shadow_pkru); + dprintf1("%s()::%d, pkey_reg: 0x%x shadow: %x\n", __func__, + __LINE__, __rdpkey_reg(), shadow_pkey_reg); ret = sys_pkey_alloc(0, init_val); /* - * pkey_alloc() sets PKRU, so we need to reflect it in - * shadow_pkru: + * pkey_alloc() sets PKEY register, so we need to reflect it in + * shadow_pkey_reg: */ - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); + dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + __func__, __LINE__, ret, __rdpkey_reg(), + shadow_pkey_reg); if (ret) { /* clear both the bits: */ - shadow_pkru &= ~(0x3 << (ret * 2)); - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); + shadow_pkey_reg &= ~(0x3 << (ret * 2)); + dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + __func__, + __LINE__, ret, __rdpkey_reg(), + shadow_pkey_reg); /* * move the new state in from init_val - * (remember, we cheated and init_val == pkru format) + * (remember, we cheated and init_val == pkey_reg format) */ - shadow_pkru |= (init_val << (ret * 2)); + shadow_pkey_reg |= (init_val << (ret * 2)); } - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); - dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno); + dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + __func__, __LINE__, ret, __rdpkey_reg(), + shadow_pkey_reg); + dprintf1("%s()::%d errno: %d\n", __func__, __LINE__, errno); /* for shadow checking: */ - rdpkru(); - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); + rdpkey_reg(); + dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + __func__, __LINE__, ret, __rdpkey_reg(), + shadow_pkey_reg); return ret; }
@@ -663,8 +673,8 @@ int alloc_random_pkey(void) free_ret = sys_pkey_free(alloced_pkeys[i]); pkey_assert(!free_ret); } - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); + dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", __func__, + __LINE__, ret, __rdpkey_reg(), shadow_pkey_reg); return ret; }
@@ -682,11 +692,13 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, if (nr_iterations-- < 0) break;
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); + dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + __func__, __LINE__, ret, __rdpkey_reg(), + shadow_pkey_reg); sys_pkey_free(rpkey); - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); + dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + __func__, __LINE__, ret, __rdpkey_reg(), + shadow_pkey_reg); } pkey_assert(pkey < NR_PKEYS);
@@ -694,8 +706,8 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", ptr, size, orig_prot, pkey, ret); pkey_assert(!ret); - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); + dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", __func__, + __LINE__, ret, __rdpkey_reg(), shadow_pkey_reg); return ret; }
@@ -773,7 +785,7 @@ void free_pkey_malloc(void *ptr) void *ptr; int ret;
- rdpkru(); + rdpkey_reg(); dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, size, prot, pkey); pkey_assert(pkey < NR_PKEYS); @@ -782,7 +794,7 @@ void free_pkey_malloc(void *ptr) ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); pkey_assert(!ret); record_pkey_malloc(ptr, size); - rdpkru(); + rdpkey_reg();
dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); return ptr; @@ -945,31 +957,31 @@ void setup_hugetlbfs(void) return ret; }
-int last_pkru_faults; -void expected_pk_fault(int pkey) +int last_pkey_faults; +void expected_pkey_fault(int pkey) { - dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n", - __func__, last_pkru_faults, pkru_faults); + dprintf2("%s(): last_pkey_faults: %d pkey_faults: %d\n", + __func__, last_pkey_faults, pkey_faults); dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey); - pkey_assert(last_pkru_faults + 1 == pkru_faults); + pkey_assert(last_pkey_faults + 1 == pkey_faults); pkey_assert(last_si_pkey == pkey); /* - * The signal handler shold have cleared out PKRU to let the + * The signal handler shold have cleared out PKEY register to let the * test program continue. We now have to restore it. */ - if (__rdpkru() != 0) + if (__rdpkey_reg() != 0) pkey_assert(0);
- __wrpkru(shadow_pkru); - dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n", - __func__, shadow_pkru); - last_pkru_faults = pkru_faults; + __wrpkey_reg(shadow_pkey_reg); + dprintf1("%s() set pkey_reg=%x to restore state after signal " + "nuked it\n", __func__, shadow_pkey_reg); + last_pkey_faults = pkey_faults; last_si_pkey = -1; }
-void do_not_expect_pk_fault(void) +void do_not_expect_pkey_fault(void) { - pkey_assert(last_pkru_faults == pkru_faults); + pkey_assert(last_pkey_faults == pkey_faults); }
int test_fds[10] = { -1 }; @@ -1027,25 +1039,25 @@ void test_read_of_access_disabled_region(int *ptr, u16 pkey) int ptr_contents;
dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr); - rdpkru(); + rdpkey_reg(); pkey_access_deny(pkey); ptr_contents = read_ptr(ptr); dprintf1("*ptr: %d\n", ptr_contents); - expected_pk_fault(pkey); + expected_pkey_fault(pkey); } void test_write_of_write_disabled_region(int *ptr, u16 pkey) { dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey); pkey_write_deny(pkey); *ptr = __LINE__; - expected_pk_fault(pkey); + expected_pkey_fault(pkey); } void test_write_of_access_disabled_region(int *ptr, u16 pkey) { dprintf1("disabling access to PKEY[%02d], doing write\n", pkey); pkey_access_deny(pkey); *ptr = __LINE__; - expected_pk_fault(pkey); + expected_pkey_fault(pkey); } void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) { @@ -1157,9 +1169,10 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) int new_pkey; dprintf1("%s() alloc loop: %d\n", __func__, i); new_pkey = alloc_pkey(); - dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, err, __rdpkru(), shadow_pkru); - rdpkru(); /* for shadow checking */ + dprintf4("%s()::%d, err: %d pkey_reg: 0x%x shadow: 0x%x\n", + __func__, __LINE__, err, __rdpkey_reg(), + shadow_pkey_reg); + rdpkey_reg(); /* for shadow checking */ dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC); if ((new_pkey == -1) && (errno == ENOSPC)) { dprintf2("%s() failed to allocate pkey after %d tries\n", @@ -1189,7 +1202,7 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) for (i = 0; i < nr_allocated_pkeys; i++) { err = sys_pkey_free(allocated_pkeys[i]); pkey_assert(!err); - rdpkru(); /* for shadow checking */ + rdpkey_reg(); /* for shadow checking */ } }
@@ -1246,7 +1259,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey) pkey_assert(ret != -1); /* Now access from the current task, and expect an exception: */ peek_result = read_ptr(ptr); - expected_pk_fault(pkey); + expected_pkey_fault(pkey);
/* * Try to access the NON-pkey-protected "plain_ptr" via ptrace: @@ -1256,7 +1269,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey) pkey_assert(ret != -1); /* Now access from the current task, and expect NO exception: */ peek_result = read_ptr(plain_ptr); - do_not_expect_pk_fault(); + do_not_expect_pkey_fault();
ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); pkey_assert(ret != -1); @@ -1293,17 +1306,17 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey) pkey_assert(!ret); pkey_access_deny(pkey);
- dprintf2("pkru: %x\n", rdpkru()); + dprintf2("pkey_reg: %x\n", rdpkey_reg());
/* * Make sure this is an *instruction* fault */ madvise(p1, PAGE_SIZE, MADV_DONTNEED); lots_o_noops_around_write(&scratch); - do_not_expect_pk_fault(); + do_not_expect_pkey_fault(); ptr_contents = read_ptr(p1); dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); - expected_pk_fault(pkey); + expected_pkey_fault(pkey); }
void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) @@ -1343,7 +1356,7 @@ void run_tests_once(void)
for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) { int pkey; - int orig_pkru_faults = pkru_faults; + int orig_pkey_faults = pkey_faults;
dprintf1("======================\n"); dprintf1("test %d preparing...\n", test_nr); @@ -1358,8 +1371,8 @@ void run_tests_once(void) free_pkey_malloc(ptr); sys_pkey_free(pkey);
- dprintf1("pkru_faults: %d\n", pkru_faults); - dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults); + dprintf1("pkey_faults: %d\n", pkey_faults); + dprintf1("orig_pkey_faults: %d\n", orig_pkey_faults);
tracing_off(); close_test_fds(); @@ -1372,7 +1385,7 @@ void run_tests_once(void)
void pkey_setup_shadow(void) { - shadow_pkru = __rdpkru(); + shadow_pkey_reg = __rdpkey_reg(); }
int main(void) @@ -1396,7 +1409,7 @@ int main(void) }
pkey_setup_shadow(); - printf("startup pkru: %x\n", rdpkru()); + printf("startup pkey_reg: %x\n", rdpkey_reg()); setup_hugetlbfs();
while (nr_iterations-- > 0)
On 02/21/2018 05:55 PM, Ram Pai wrote:
int pkey_set(int pkey, unsigned long rights, unsigned long flags) { u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
- u32 old_pkru = __rdpkru();
- u32 new_pkru;
- u32 old_pkey_reg = __rdpkey_reg();
- u32 new_pkey_reg;
If we're not using the _actual_ instruction names ("rdpkru"), I think I'd rather this be something more readable, like: __read_pkey_reg().
But, it's OK-ish the way it is.
Reviewed-by: Dave Hansen dave.hansen@intel.com -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Moved all the generic definition and helper functions to the header file
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/pkey-helpers.h | 62 ++++++++++++++++++++++-- tools/testing/selftests/vm/protection_keys.c | 66 -------------------------- 2 files changed, 57 insertions(+), 71 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index a568166..c1bc761 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -13,8 +13,31 @@ #include <ucontext.h> #include <sys/mman.h>
+/* Define some kernel-like types */ +#define u8 uint8_t +#define u16 uint16_t +#define u32 uint32_t +#define u64 uint64_t + +#ifdef __i386__ +#define SYS_mprotect_key 380 +#define SYS_pkey_alloc 381 +#define SYS_pkey_free 382 +#define REG_IP_IDX REG_EIP +#define si_pkey_offset 0x14 +#else +#define SYS_mprotect_key 329 +#define SYS_pkey_alloc 330 +#define SYS_pkey_free 331 +#define REG_IP_IDX REG_RIP +#define si_pkey_offset 0x20 +#endif + #define NR_PKEYS 16 #define PKEY_BITS_PER_PKEY 2 +#define PKEY_DISABLE_ACCESS 0x1 +#define PKEY_DISABLE_WRITE 0x2 +#define HPAGE_SIZE (1UL<<21)
#ifndef DEBUG_LEVEL #define DEBUG_LEVEL 0 @@ -141,11 +164,6 @@ static inline void __pkey_write_allow(int pkey, int do_allow_write) dprintf4("pkey_reg now: %08x\n", rdpkey_reg()); }
-#define PROT_PKEY0 0x10 /* protection key value (bit 0) */ -#define PROT_PKEY1 0x20 /* protection key value (bit 1) */ -#define PROT_PKEY2 0x40 /* protection key value (bit 2) */ -#define PROT_PKEY3 0x80 /* protection key value (bit 3) */ - #define PAGE_SIZE 4096 #define MB (1<<20)
@@ -223,4 +241,38 @@ int pkey_reg_xstate_offset(void) return xstate_offset; }
+static inline void __page_o_noops(void) +{ + /* 8-bytes of instruction * 512 bytes = 1 page */ + asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr"); +} + #endif /* _PKEYS_HELPER_H */ + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) +#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) +#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1)) +#define ALIGN_PTR_UP(p, ptr_align_to) \ + ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to)) +#define ALIGN_PTR_DOWN(p, ptr_align_to) \ + ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to)) +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) + +#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP) + +int dprint_in_signal; +char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; + +extern void abort_hooks(void); +#define pkey_assert(condition) do { \ + if (!(condition)) { \ + dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \ + __FILE__, __LINE__, \ + test_nr, iteration_nr); \ + dprintf0("errno at assert: %d", errno); \ + abort_hooks(); \ + assert(condition); \ + } \ +} while (0) +#define raw_assert(cond) assert(cond) diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 4aebf12..91bade4 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -49,34 +49,9 @@ int test_nr;
unsigned int shadow_pkey_reg; - -#define HPAGE_SIZE (1UL<<21) -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) -#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) -#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1)) -#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to)) -#define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to)) -#define __stringify_1(x...) #x -#define __stringify(x...) __stringify_1(x) - -#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP) - int dprint_in_signal; char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
-extern void abort_hooks(void); -#define pkey_assert(condition) do { \ - if (!(condition)) { \ - dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \ - __FILE__, __LINE__, \ - test_nr, iteration_nr); \ - dprintf0("errno at assert: %d", errno); \ - abort_hooks(); \ - assert(condition); \ - } \ -} while (0) -#define raw_assert(cond) assert(cond) - void cat_into_file(char *str, char *file) { int fd = open(file, O_RDWR); @@ -154,12 +129,6 @@ void abort_hooks(void) #endif }
-static inline void __page_o_noops(void) -{ - /* 8-bytes of instruction * 512 bytes = 1 page */ - asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr"); -} - /* * This attempts to have roughly a page of instructions followed by a few * instructions that do a write, and another page of instructions. That @@ -182,38 +151,6 @@ void lots_o_noops_around_write(int *write_to_me) dprintf3("%s() done\n", __func__); }
-/* Define some kernel-like types */ -#define u8 uint8_t -#define u16 uint16_t -#define u32 uint32_t -#define u64 uint64_t - -#ifdef __i386__ - -#ifndef SYS_mprotect_key -# define SYS_mprotect_key 380 -#endif -#ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 381 -# define SYS_pkey_free 382 -#endif -#define REG_IP_IDX REG_EIP -#define si_pkey_offset 0x14 - -#else - -#ifndef SYS_mprotect_key -# define SYS_mprotect_key 329 -#endif -#ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 330 -# define SYS_pkey_free 331 -#endif -#define REG_IP_IDX REG_RIP -#define si_pkey_offset 0x20 - -#endif - void dump_mem(void *dumpme, int len_bytes) { char *c = (void *)dumpme; @@ -424,9 +361,6 @@ void dumpit(char *f) close(fd); }
-#define PKEY_DISABLE_ACCESS 0x1 -#define PKEY_DISABLE_WRITE 0x2 - u32 pkey_get(int pkey, unsigned long flags) { u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
This is in preparation to accomadate a differing size register across architectures.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/pkey-helpers.h | 27 +++++----- tools/testing/selftests/vm/protection_keys.c | 69 ++++++++++++++------------ 2 files changed, 51 insertions(+), 45 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index c1bc761..b6c2133 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -18,6 +18,7 @@ #define u16 uint16_t #define u32 uint32_t #define u64 uint64_t +#define pkey_reg_t u32
#ifdef __i386__ #define SYS_mprotect_key 380 @@ -80,12 +81,12 @@ static inline void sigsafe_printf(const char *format, ...) #define dprintf3(args...) dprintf_level(3, args) #define dprintf4(args...) dprintf_level(4, args)
-extern unsigned int shadow_pkey_reg; -static inline unsigned int __rdpkey_reg(void) +extern pkey_reg_t shadow_pkey_reg; +static inline pkey_reg_t __rdpkey_reg(void) { unsigned int eax, edx; unsigned int ecx = 0; - unsigned int pkey_reg; + pkey_reg_t pkey_reg;
asm volatile(".byte 0x0f,0x01,0xee\n\t" : "=a" (eax), "=d" (edx) @@ -94,11 +95,11 @@ static inline unsigned int __rdpkey_reg(void) return pkey_reg; }
-static inline unsigned int _rdpkey_reg(int line) +static inline pkey_reg_t _rdpkey_reg(int line) { - unsigned int pkey_reg = __rdpkey_reg(); + pkey_reg_t pkey_reg = __rdpkey_reg();
- dprintf4("rdpkey_reg(line=%d) pkey_reg: %x shadow: %x\n", + dprintf4("rdpkey_reg(line=%d) pkey_reg: %016lx shadow: %016lx\n", line, pkey_reg, shadow_pkey_reg); assert(pkey_reg == shadow_pkey_reg);
@@ -107,11 +108,11 @@ static inline unsigned int _rdpkey_reg(int line)
#define rdpkey_reg() _rdpkey_reg(__LINE__)
-static inline void __wrpkey_reg(unsigned int pkey_reg) +static inline void __wrpkey_reg(pkey_reg_t pkey_reg) { - unsigned int eax = pkey_reg; - unsigned int ecx = 0; - unsigned int edx = 0; + pkey_reg_t eax = pkey_reg; + pkey_reg_t ecx = 0; + pkey_reg_t edx = 0;
dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkey_reg(), pkey_reg); @@ -120,7 +121,7 @@ static inline void __wrpkey_reg(unsigned int pkey_reg) assert(pkey_reg == __rdpkey_reg()); }
-static inline void wrpkey_reg(unsigned int pkey_reg) +static inline void wrpkey_reg(pkey_reg_t pkey_reg) { dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkey_reg(), pkey_reg); @@ -138,7 +139,7 @@ static inline void wrpkey_reg(unsigned int pkey_reg) */ static inline void __pkey_access_allow(int pkey, int do_allow) { - unsigned int pkey_reg = rdpkey_reg(); + pkey_reg_t pkey_reg = rdpkey_reg(); int bit = pkey * 2;
if (do_allow) @@ -152,7 +153,7 @@ static inline void __pkey_access_allow(int pkey, int do_allow)
static inline void __pkey_write_allow(int pkey, int do_allow_write) { - long pkey_reg = rdpkey_reg(); + pkey_reg_t pkey_reg = rdpkey_reg(); int bit = pkey * 2 + 1;
if (do_allow_write) diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 91bade4..3ef2569 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -48,7 +48,7 @@ int iteration_nr = 1; int test_nr;
-unsigned int shadow_pkey_reg; +pkey_reg_t shadow_pkey_reg; int dprint_in_signal; char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
@@ -158,7 +158,7 @@ void dump_mem(void *dumpme, int len_bytes)
for (i = 0; i < len_bytes; i += sizeof(u64)) { u64 *ptr = (u64 *)(c + i); - dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr); + dprintf1("dump[%03d][@%p]: %016lx\n", i, ptr, *ptr); } }
@@ -186,7 +186,7 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) int trapno; unsigned long ip; char *fpregs; - u32 *pkey_reg_ptr; + pkey_reg_t *pkey_reg_ptr; u64 siginfo_pkey; u32 *si_pkey_ptr; int pkey_reg_offset; @@ -194,7 +194,8 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
dprint_in_signal = 1; dprintf1(">>>>===============SIGSEGV============================\n"); - dprintf1("%s()::%d, pkey_reg: 0x%x shadow: %x\n", __func__, __LINE__, + dprintf1("%s()::%d, pkey_reg: 0x%016lx shadow: %016lx\n", + __func__, __LINE__, __rdpkey_reg(), shadow_pkey_reg);
trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; @@ -202,8 +203,9 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) fpregset = uctxt->uc_mcontext.fpregs; fpregs = (void *)fpregset;
- dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__, - trapno, ip, si_code_str(si->si_code), si->si_code); + dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n", + __func__, trapno, ip, si_code_str(si->si_code), + si->si_code); #ifdef __i386__ /* * 32-bit has some extra padding so that userspace can tell whether @@ -240,12 +242,12 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) exit(4); }
- dprintf1("signal pkey_reg from xsave: %08x\n", *pkey_reg_ptr); + dprintf1("signal pkey_reg from xsave: %016lx\n", *pkey_reg_ptr); /* * need __rdpkey_reg() version so we do not do shadow_pkey_reg * checking */ - dprintf1("signal pkey_reg from pkey_reg: %08x\n", __rdpkey_reg()); + dprintf1("signal pkey_reg from pkey_reg: %016lx\n", __rdpkey_reg()); dprintf1("pkey from siginfo: %jx\n", siginfo_pkey); *(u64 *)pkey_reg_ptr = 0x00000000; dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); @@ -364,8 +366,8 @@ void dumpit(char *f) u32 pkey_get(int pkey, unsigned long flags) { u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); - u32 pkey_reg = __rdpkey_reg(); - u32 shifted_pkey_reg; + pkey_reg_t pkey_reg = __rdpkey_reg(); + pkey_reg_t shifted_pkey_reg; u32 masked_pkey_reg;
dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n", @@ -386,8 +388,8 @@ u32 pkey_get(int pkey, unsigned long flags) int pkey_set(int pkey, unsigned long rights, unsigned long flags) { u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); - u32 old_pkey_reg = __rdpkey_reg(); - u32 new_pkey_reg; + pkey_reg_t old_pkey_reg = __rdpkey_reg(); + pkey_reg_t new_pkey_reg;
/* make sure that 'rights' only contains the bits we expect: */ assert(!(rights & ~mask)); @@ -401,10 +403,10 @@ int pkey_set(int pkey, unsigned long rights, unsigned long flags)
__wrpkey_reg(new_pkey_reg);
- dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x" - " pkey_reg now: %x old_pkey_reg: %x\n", - __func__, pkey, rights, flags, 0, __rdpkey_reg(), - old_pkey_reg); + dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x " + "pkey_reg now: %016lx old_pkey_reg: %016lx\n", + __func__, pkey, rights, flags, + 0, __rdpkey_reg(), old_pkey_reg); return 0; }
@@ -413,7 +415,7 @@ void pkey_disable_set(int pkey, int flags) unsigned long syscall_flags = 0; int ret; int pkey_rights; - u32 orig_pkey_reg = rdpkey_reg(); + pkey_reg_t orig_pkey_reg = rdpkey_reg();
dprintf1("START->%s(%d, 0x%x)\n", __func__, pkey, flags); @@ -421,8 +423,6 @@ void pkey_disable_set(int pkey, int flags)
pkey_rights = pkey_get(pkey, syscall_flags);
- dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, - pkey, pkey, pkey_rights); pkey_assert(pkey_rights >= 0);
pkey_rights |= flags; @@ -431,7 +431,8 @@ void pkey_disable_set(int pkey, int flags) assert(!ret); /*pkey_reg and flags have the same format */ shadow_pkey_reg |= flags << (pkey * 2); - dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkey_reg); + dprintf1("%s(%d) shadow: 0x%016lx\n", + __func__, pkey, shadow_pkey_reg);
pkey_assert(ret >= 0);
@@ -439,7 +440,8 @@ void pkey_disable_set(int pkey, int flags) dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights);
- dprintf1("%s(%d) pkey_reg: 0x%x\n", __func__, pkey, rdpkey_reg()); + dprintf1("%s(%d) pkey_reg: 0x%lx\n", + __func__, pkey, rdpkey_reg()); if (flags) pkey_assert(rdpkey_reg() > orig_pkey_reg); dprintf1("END<---%s(%d, 0x%x)\n", __func__, @@ -451,7 +453,7 @@ void pkey_disable_clear(int pkey, int flags) unsigned long syscall_flags = 0; int ret; int pkey_rights = pkey_get(pkey, syscall_flags); - u32 orig_pkey_reg = rdpkey_reg(); + pkey_reg_t orig_pkey_reg = rdpkey_reg();
pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
@@ -470,7 +472,8 @@ void pkey_disable_clear(int pkey, int flags) dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights);
- dprintf1("%s(%d) pkey_reg: 0x%x\n", __func__, pkey, rdpkey_reg()); + dprintf1("%s(%d) pkey_reg: 0x%016lx\n", __func__, + pkey, rdpkey_reg()); if (flags) assert(rdpkey_reg() > orig_pkey_reg); } @@ -525,20 +528,21 @@ int alloc_pkey(void) int ret; unsigned long init_val = 0x0;
- dprintf1("%s()::%d, pkey_reg: 0x%x shadow: %x\n", __func__, + dprintf1("%s()::%d, pkey_reg: 0x%016lx shadow: %016lx\n", __func__, __LINE__, __rdpkey_reg(), shadow_pkey_reg); ret = sys_pkey_alloc(0, init_val); /* * pkey_alloc() sets PKEY register, so we need to reflect it in * shadow_pkey_reg: */ - dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016lx shadow: 0x%016lx\n", __func__, __LINE__, ret, __rdpkey_reg(), shadow_pkey_reg); if (ret) { /* clear both the bits: */ shadow_pkey_reg &= ~(0x3 << (ret * 2)); - dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016lx " + "shadow: 0x%016lx\n", __func__, __LINE__, ret, __rdpkey_reg(), shadow_pkey_reg); @@ -548,13 +552,13 @@ int alloc_pkey(void) */ shadow_pkey_reg |= (init_val << (ret * 2)); } - dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016lx shadow: 0x%016lx\n", __func__, __LINE__, ret, __rdpkey_reg(), shadow_pkey_reg); dprintf1("%s()::%d errno: %d\n", __func__, __LINE__, errno); /* for shadow checking: */ rdpkey_reg(); - dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016lx shadow: 0x%016lx\n", __func__, __LINE__, ret, __rdpkey_reg(), shadow_pkey_reg); return ret; @@ -1103,9 +1107,10 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) int new_pkey; dprintf1("%s() alloc loop: %d\n", __func__, i); new_pkey = alloc_pkey(); - dprintf4("%s()::%d, err: %d pkey_reg: 0x%x shadow: 0x%x\n", - __func__, __LINE__, err, __rdpkey_reg(), - shadow_pkey_reg); + dprintf4("%s()::%d, err: %d pkey_reg: 0x%016lx " + "shadow: 0x%016lx\n", + __func__, __LINE__, err, __rdpkey_reg(), + shadow_pkey_reg); rdpkey_reg(); /* for shadow checking */ dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC); if ((new_pkey == -1) && (errno == ENOSPC)) { @@ -1343,7 +1348,7 @@ int main(void) }
pkey_setup_shadow(); - printf("startup pkey_reg: %x\n", rdpkey_reg()); + printf("startup pkey_reg: 0x%016lx\n", (u64)rdpkey_reg()); setup_hugetlbfs();
while (nr_iterations-- > 0)
On 02/21/2018 05:55 PM, Ram Pai wrote:
-static inline unsigned int _rdpkey_reg(int line) +static inline pkey_reg_t _rdpkey_reg(int line) {
- unsigned int pkey_reg = __rdpkey_reg();
- pkey_reg_t pkey_reg = __rdpkey_reg();
- dprintf4("rdpkey_reg(line=%d) pkey_reg: %x shadow: %x\n",
- dprintf4("rdpkey_reg(line=%d) pkey_reg: %016lx shadow: %016lx\n", line, pkey_reg, shadow_pkey_reg); assert(pkey_reg == shadow_pkey_reg);
Hmm. So we're using %lx for an int? Doesn't the compiler complain about this? -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Dave Hansen dave.hansen@intel.com writes:
On 02/21/2018 05:55 PM, Ram Pai wrote:
-static inline unsigned int _rdpkey_reg(int line) +static inline pkey_reg_t _rdpkey_reg(int line) {
- unsigned int pkey_reg = __rdpkey_reg();
- pkey_reg_t pkey_reg = __rdpkey_reg();
- dprintf4("rdpkey_reg(line=%d) pkey_reg: %x shadow: %x\n",
- dprintf4("rdpkey_reg(line=%d) pkey_reg: %016lx shadow: %016lx\n", line, pkey_reg, shadow_pkey_reg); assert(pkey_reg == shadow_pkey_reg);
Hmm. So we're using %lx for an int? Doesn't the compiler complain about this?
It doesn't because dprintf4() doesn't have the annotation that tells the compiler that it takes printf-like arguments. Once I add it:
--- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -54,6 +54,10 @@ #define DPRINT_IN_SIGNAL_BUF_SIZE 4096 extern int dprint_in_signal; extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; + +#ifdef __GNUC__ +__attribute__((format(printf, 1, 2))) +#endif static inline void sigsafe_printf(const char *format, ...) { va_list ap;
Then it does complain about it. I'm working on a fix where each arch will define a format string to use for its pkey_reg_t and use it like this:
--- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -19,6 +19,7 @@ #define u32 uint32_t #define u64 uint64_t #define pkey_reg_t u32 +#define PKEY_REG_FMT "%016x"
#ifdef __i386__ #ifndef SYS_mprotect_key @@ -112,7 +113,8 @@ static inline pkey_reg_t _read_pkey_reg(int line) { pkey_reg_t pkey_reg = __read_pkey_reg();
- dprintf4("read_pkey_reg(line=%d) pkey_reg: %016lx shadow: %016lx\n", + dprintf4("read_pkey_reg(line=%d) pkey_reg: "PKEY_REG_FMT + " shadow: "PKEY_REG_FMT"\n", line, pkey_reg, shadow_pkey_reg); assert(pkey_reg == shadow_pkey_reg);
-- Thiago Jung Bauermann IBM Linux Technology Center
-- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
helper functions to handler shadow pkey register
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/pkey-helpers.h | 27 ++++++++++++++++++++ tools/testing/selftests/vm/protection_keys.c | 34 ++++++++++++++++--------- 2 files changed, 49 insertions(+), 12 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index b6c2133..7c979ad 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -44,6 +44,33 @@ #define DEBUG_LEVEL 0 #endif #define DPRINT_IN_SIGNAL_BUF_SIZE 4096 + +static inline u32 pkey_to_shift(int pkey) +{ + return pkey * PKEY_BITS_PER_PKEY; +} + +static inline pkey_reg_t reset_bits(int pkey, pkey_reg_t bits) +{ + u32 shift = pkey_to_shift(pkey); + + return ~(bits << shift); +} + +static inline pkey_reg_t left_shift_bits(int pkey, pkey_reg_t bits) +{ + u32 shift = pkey_to_shift(pkey); + + return (bits << shift); +} + +static inline pkey_reg_t right_shift_bits(int pkey, pkey_reg_t bits) +{ + u32 shift = pkey_to_shift(pkey); + + return (bits >> shift); +} + extern int dprint_in_signal; extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; static inline void sigsafe_printf(const char *format, ...) diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 3ef2569..83216c5 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -374,7 +374,7 @@ u32 pkey_get(int pkey, unsigned long flags) __func__, pkey, flags, 0, 0); dprintf2("%s() raw pkey_reg: %x\n", __func__, pkey_reg);
- shifted_pkey_reg = (pkey_reg >> (pkey * PKEY_BITS_PER_PKEY)); + shifted_pkey_reg = right_shift_bits(pkey, pkey_reg); dprintf2("%s() shifted_pkey_reg: %x\n", __func__, shifted_pkey_reg); masked_pkey_reg = shifted_pkey_reg & mask; dprintf2("%s() masked pkey_reg: %x\n", __func__, masked_pkey_reg); @@ -397,9 +397,9 @@ int pkey_set(int pkey, unsigned long rights, unsigned long flags) /* copy old pkey_reg */ new_pkey_reg = old_pkey_reg; /* mask out bits from pkey in old value: */ - new_pkey_reg &= ~(mask << (pkey * PKEY_BITS_PER_PKEY)); + new_pkey_reg &= reset_bits(pkey, mask); /* OR in new bits for pkey: */ - new_pkey_reg |= (rights << (pkey * PKEY_BITS_PER_PKEY)); + new_pkey_reg |= left_shift_bits(pkey, rights);
__wrpkey_reg(new_pkey_reg);
@@ -430,7 +430,7 @@ void pkey_disable_set(int pkey, int flags) ret = pkey_set(pkey, pkey_rights, syscall_flags); assert(!ret); /*pkey_reg and flags have the same format */ - shadow_pkey_reg |= flags << (pkey * 2); + shadow_pkey_reg |= left_shift_bits(pkey, flags); dprintf1("%s(%d) shadow: 0x%016lx\n", __func__, pkey, shadow_pkey_reg);
@@ -465,7 +465,7 @@ void pkey_disable_clear(int pkey, int flags)
ret = pkey_set(pkey, pkey_rights, 0); /* pkey_reg and flags have the same format */ - shadow_pkey_reg &= ~(flags << (pkey * 2)); + shadow_pkey_reg &= reset_bits(pkey, flags); pkey_assert(ret >= 0);
pkey_rights = pkey_get(pkey, syscall_flags); @@ -523,6 +523,21 @@ int sys_pkey_alloc(unsigned long flags, unsigned long init_val) return ret; }
+void pkey_setup_shadow(void) +{ + shadow_pkey_reg = __rdpkey_reg(); +} + +void pkey_reset_shadow(u32 key) +{ + shadow_pkey_reg &= reset_bits(key, 0x3); +} + +void pkey_set_shadow(u32 key, u64 init_val) +{ + shadow_pkey_reg |= left_shift_bits(key, init_val); +} + int alloc_pkey(void) { int ret; @@ -540,7 +555,7 @@ int alloc_pkey(void) shadow_pkey_reg); if (ret) { /* clear both the bits: */ - shadow_pkey_reg &= ~(0x3 << (ret * 2)); + pkey_reset_shadow(ret); dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016lx " "shadow: 0x%016lx\n", __func__, @@ -550,7 +565,7 @@ int alloc_pkey(void) * move the new state in from init_val * (remember, we cheated and init_val == pkey_reg format) */ - shadow_pkey_reg |= (init_val << (ret * 2)); + pkey_set_shadow(ret, init_val); } dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016lx shadow: 0x%016lx\n", __func__, __LINE__, ret, __rdpkey_reg(), @@ -1322,11 +1337,6 @@ void run_tests_once(void) iteration_nr++; }
-void pkey_setup_shadow(void) -{ - shadow_pkey_reg = __rdpkey_reg(); -} - int main(void) { int nr_iterations = 22;
On 02/21/2018 05:55 PM, Ram Pai wrote:
+static inline u32 pkey_to_shift(int pkey) +{
- return pkey * PKEY_BITS_PER_PKEY;
+}
pkey_bit_position(), perhaps?
+static inline pkey_reg_t reset_bits(int pkey, pkey_reg_t bits) +{
- u32 shift = pkey_to_shift(pkey);
- return ~(bits << shift);
+}
I'd prefer clear_pkey_flags() or maybe clear_pkey_bits(). "reset" can mean "reset to 0" or "reset to 1".
Also, why the u32 here? Isn't an int more appropriate?
+static inline pkey_reg_t left_shift_bits(int pkey, pkey_reg_t bits) +{
- u32 shift = pkey_to_shift(pkey);
- return (bits << shift);
+}
+static inline pkey_reg_t right_shift_bits(int pkey, pkey_reg_t bits) +{
- u32 shift = pkey_to_shift(pkey);
- return (bits >> shift);
+}
Some comments on these would be handy. Basically that this takes a per-key flags value and puts it at the right position so it can be shoved in the register. -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
If the flag is 0, no bits will be set. Hence we cant expect the resulting bitmap to have a higher value than what it was earlier.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 83216c5..0109388 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -443,7 +443,7 @@ void pkey_disable_set(int pkey, int flags) dprintf1("%s(%d) pkey_reg: 0x%lx\n", __func__, pkey, rdpkey_reg()); if (flags) - pkey_assert(rdpkey_reg() > orig_pkey_reg); + pkey_assert(rdpkey_reg() >= orig_pkey_reg); dprintf1("END<---%s(%d, 0x%x)\n", __func__, pkey, flags); }
On 02/21/2018 05:55 PM, Ram Pai wrote:
If the flag is 0, no bits will be set. Hence we cant expect the resulting bitmap to have a higher value than what it was earlier.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com
tools/testing/selftests/vm/protection_keys.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 83216c5..0109388 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -443,7 +443,7 @@ void pkey_disable_set(int pkey, int flags) dprintf1("%s(%d) pkey_reg: 0x%lx\n", __func__, pkey, rdpkey_reg()); if (flags)
pkey_assert(rdpkey_reg() > orig_pkey_reg);
dprintf1("END<---%s(%d, 0x%x)\n", __func__, pkey, flags);pkey_assert(rdpkey_reg() >= orig_pkey_reg);
}
I'm not sure about this one. Did this cause a problem for you?
Why would you call this and ask no bits to be set?
-- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
instead of clearing the bits, pkey_disable_clear() was setting the bits. Fixed it.
Also fixed a wrong assertion in that function. When bits are cleared, the resulting bit value will be less than the original.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 0109388..ca54a95 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -461,7 +461,7 @@ void pkey_disable_clear(int pkey, int flags) pkey, pkey, pkey_rights); pkey_assert(pkey_rights >= 0);
- pkey_rights |= flags; + pkey_rights &= ~flags;
ret = pkey_set(pkey, pkey_rights, 0); /* pkey_reg and flags have the same format */ @@ -475,7 +475,7 @@ void pkey_disable_clear(int pkey, int flags) dprintf1("%s(%d) pkey_reg: 0x%016lx\n", __func__, pkey, rdpkey_reg()); if (flags) - assert(rdpkey_reg() > orig_pkey_reg); + assert(rdpkey_reg() < orig_pkey_reg); }
void pkey_write_allow(int pkey)
On 02/21/2018 05:55 PM, Ram Pai wrote:
--- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -461,7 +461,7 @@ void pkey_disable_clear(int pkey, int flags) pkey, pkey, pkey_rights); pkey_assert(pkey_rights >= 0);
- pkey_rights |= flags;
- pkey_rights &= ~flags;
ret = pkey_set(pkey, pkey_rights, 0); /* pkey_reg and flags have the same format */ @@ -475,7 +475,7 @@ void pkey_disable_clear(int pkey, int flags) dprintf1("%s(%d) pkey_reg: 0x%016lx\n", __func__, pkey, rdpkey_reg()); if (flags)
assert(rdpkey_reg() > orig_pkey_reg);
assert(rdpkey_reg() < orig_pkey_reg);
} void pkey_write_allow(int pkey)
This seems so horribly wrong that I wonder how it worked in the first place. Any idea? -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Dave Hansen dave.hansen@intel.com writes:
On 02/21/2018 05:55 PM, Ram Pai wrote:
--- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -461,7 +461,7 @@ void pkey_disable_clear(int pkey, int flags) pkey, pkey, pkey_rights); pkey_assert(pkey_rights >= 0);
- pkey_rights |= flags;
pkey_rights &= ~flags;
ret = pkey_set(pkey, pkey_rights, 0); /* pkey_reg and flags have the same format */
@@ -475,7 +475,7 @@ void pkey_disable_clear(int pkey, int flags) dprintf1("%s(%d) pkey_reg: 0x%016lx\n", __func__, pkey, rdpkey_reg()); if (flags)
assert(rdpkey_reg() > orig_pkey_reg);
assert(rdpkey_reg() < orig_pkey_reg);
}
void pkey_write_allow(int pkey)
This seems so horribly wrong that I wonder how it worked in the first place. Any idea?
The code simply wasn't used. pkey_disable_clear() is called by pkey_write_allow() and pkey_access_allow(), but before this patch series nothing called either of these functions.
-- Thiago Jung Bauermann IBM Linux Technology Center
-- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 03/28/2018 01:47 PM, Thiago Jung Bauermann wrote:
if (flags)
assert(rdpkey_reg() > orig_pkey_reg);
assert(rdpkey_reg() < orig_pkey_reg);
}
void pkey_write_allow(int pkey)
This seems so horribly wrong that I wonder how it worked in the first place. Any idea?
The code simply wasn't used. pkey_disable_clear() is called by pkey_write_allow() and pkey_access_allow(), but before this patch series nothing called either of these functions.
Ahh, that explains it. Can that get stuck in the changelog, please? -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Dave Hansen dave.hansen@intel.com writes:
On 03/28/2018 01:47 PM, Thiago Jung Bauermann wrote:
if (flags)
assert(rdpkey_reg() > orig_pkey_reg);
assert(rdpkey_reg() < orig_pkey_reg);
}
void pkey_write_allow(int pkey)
This seems so horribly wrong that I wonder how it worked in the first place. Any idea?
The code simply wasn't used. pkey_disable_clear() is called by pkey_write_allow() and pkey_access_allow(), but before this patch series nothing called either of these functions.
Ahh, that explains it. Can that get stuck in the changelog, please?
Yes, will be in the next version.
When a key is freed, the key is no more effective. Clear the bits corresponding to the pkey in the shadow register. Otherwise it will carry some spurious bits which can trigger false-positive asserts.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 3 +++ 1 files changed, 3 insertions(+), 0 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index ca54a95..aaf9f09 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -582,6 +582,9 @@ int alloc_pkey(void) int sys_pkey_free(unsigned long pkey) { int ret = syscall(SYS_pkey_free, pkey); + + if (!ret) + shadow_pkey_reg &= reset_bits(pkey, PKEY_DISABLE_ACCESS); dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret); return ret; }
On 02/21/2018 05:55 PM, Ram Pai wrote:
When a key is freed, the key is no more effective. Clear the bits corresponding to the pkey in the shadow register. Otherwise it will carry some spurious bits which can trigger false-positive asserts.
...
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index ca54a95..aaf9f09 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -582,6 +582,9 @@ int alloc_pkey(void) int sys_pkey_free(unsigned long pkey) { int ret = syscall(SYS_pkey_free, pkey);
- if (!ret)
dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret); return ret;shadow_pkey_reg &= reset_bits(pkey, PKEY_DISABLE_ACCESS);
}
Did this cause problems for you in practice?
On x86, sys_pkey_free() does not affect PKRU, so this isn't quite right. I'd much rather have the actual tests explicitly clear the PKRU bits and also in the process clear the shadow bits. -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
alloc_random_pkey() was allocating the same pkey every time. Not all pkeys were geting tested. fixed it.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 10 +++++++--- 1 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index aaf9f09..2e4b636 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -24,6 +24,7 @@ #define _GNU_SOURCE #include <errno.h> #include <linux/futex.h> +#include <time.h> #include <sys/time.h> #include <sys/syscall.h> #include <string.h> @@ -602,13 +603,15 @@ int alloc_random_pkey(void) int alloced_pkeys[NR_PKEYS]; int nr_alloced = 0; int random_index; + memset(alloced_pkeys, 0, sizeof(alloced_pkeys)); + srand((unsigned int)time(NULL));
/* allocate every possible key and make a note of which ones we got */ max_nr_pkey_allocs = NR_PKEYS; - max_nr_pkey_allocs = 1; for (i = 0; i < max_nr_pkey_allocs; i++) { int new_pkey = alloc_pkey(); + if (new_pkey < 0) break; alloced_pkeys[nr_alloced++] = new_pkey; @@ -624,13 +627,14 @@ int alloc_random_pkey(void) /* go through the allocated ones that we did not want and free them */ for (i = 0; i < nr_alloced; i++) { int free_ret; + if (!alloced_pkeys[i]) continue; free_ret = sys_pkey_free(alloced_pkeys[i]); pkey_assert(!free_ret); } - dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkey_reg(), shadow_pkey_reg); + dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%016lx\n", + __func__, __LINE__, ret, __rdpkey_reg(), shadow_pkey_reg); return ret; }
On 02/21/2018 05:55 PM, Ram Pai wrote:
alloc_random_pkey() was allocating the same pkey every time. Not all pkeys were geting tested. fixed it.
...
@@ -602,13 +603,15 @@ int alloc_random_pkey(void) int alloced_pkeys[NR_PKEYS]; int nr_alloced = 0; int random_index;
- memset(alloced_pkeys, 0, sizeof(alloced_pkeys));
- srand((unsigned int)time(NULL));
/* allocate every possible key and make a note of which ones we got */ max_nr_pkey_allocs = NR_PKEYS;
- max_nr_pkey_allocs = 1; for (i = 0; i < max_nr_pkey_allocs; i++) { int new_pkey = alloc_pkey();
The srand() is probably useful, but won't this always just do a single alloc_pkey() now? That seems like it will mean we always use the first one the kernel gives us, which isn't random.
- dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", __func__,
__LINE__, ret, __rdpkey_reg(), shadow_pkey_reg);
- dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%016lx\n",
return ret;__func__, __LINE__, ret, __rdpkey_reg(), shadow_pkey_reg);
}
This belonged in the pkey_reg_t patch, I think.
-- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
open_hugepage_file() <- opens the huge page file get_start_key() <-- provides the first non-reserved key.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/pkey-helpers.h | 11 +++++++++++ tools/testing/selftests/vm/protection_keys.c | 6 +++--- 2 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index 7c979ad..c8f5739 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -304,3 +304,14 @@ static inline void __page_o_noops(void) } \ } while (0) #define raw_assert(cond) assert(cond) + +static inline int open_hugepage_file(int flag) +{ + return open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", + O_RDONLY); +} + +static inline int get_start_key(void) +{ + return 1; +} diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 2e4b636..254b66d 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -809,7 +809,7 @@ void setup_hugetlbfs(void) * Now go make sure that we got the pages and that they * are 2M pages. Someone might have made 1G the default. */ - fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY); + fd = open_hugepage_file(O_RDONLY); if (fd < 0) { perror("opening sysfs 2M hugetlb config"); return; @@ -1087,10 +1087,10 @@ void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) { int err; - int i; + int i = get_start_key();
/* Note: 0 is the default pkey, so don't mess with it */ - for (i = 1; i < NR_PKEYS; i++) { + for (; i < NR_PKEYS; i++) { if (pkey == i) continue;
On 02/21/2018 05:55 PM, Ram Pai wrote:
open_hugepage_file() <- opens the huge page file get_start_key() <-- provides the first non-reserved key.
Looks reasonable.
Reviewed-by: Dave Hansen dave.hansen@intel.com
-- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
expected_pkey_fault() is comparing the contents of pkey register with 0. This may not be true all the time. There could be bits set by default by the architecture which can never be changed. Hence compare the value against shadow pkey register, which is supposed to track the bits accurately all throughout
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 254b66d..6054093 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -926,10 +926,10 @@ void expected_pkey_fault(int pkey) pkey_assert(last_pkey_faults + 1 == pkey_faults); pkey_assert(last_si_pkey == pkey); /* - * The signal handler shold have cleared out PKEY register to let the + * The signal handler shold have cleared out pkey-register to let the * test program continue. We now have to restore it. */ - if (__rdpkey_reg() != 0) + if (__rdpkey_reg() != shadow_pkey_reg) pkey_assert(0);
__wrpkey_reg(shadow_pkey_reg);
On 02/21/2018 05:55 PM, Ram Pai wrote:
expected_pkey_fault() is comparing the contents of pkey register with 0. This may not be true all the time. There could be bits set by default by the architecture which can never be changed. Hence compare the value against shadow pkey register, which is supposed to track the bits accurately all throughout
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com
tools/testing/selftests/vm/protection_keys.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 254b66d..6054093 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -926,10 +926,10 @@ void expected_pkey_fault(int pkey) pkey_assert(last_pkey_faults + 1 == pkey_faults); pkey_assert(last_si_pkey == pkey); /*
* The signal handler shold have cleared out PKEY register to let the
* The signal handler shold have cleared out pkey-register to let the
Heh, you randomly changed the formatting and didn't bother with my awful typo. :)
* test program continue. We now have to restore it. */
- if (__rdpkey_reg() != 0)
- if (__rdpkey_reg() != shadow_pkey_reg) pkey_assert(0);
__wrpkey_reg(shadow_pkey_reg);
I don't think this should be "shadow_pkey_reg". This was just trying to double-check that the signal handler messed around with PKRU the way we expected.
We could also just check that the disable bits for 'pkey' are clear at this point. That would be almost as good. -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
cleanup the code to satisfy coding styles.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 81 ++++++++++++++------------ 1 files changed, 43 insertions(+), 38 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 6054093..6fdd8f5 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -4,7 +4,7 @@ * * There are examples in here of: * * how to set protection keys on memory - * * how to set/clear bits in pkey registers (the rights register) + * * how to set/clear bits in Protection Key registers (the rights register) * * how to handle SEGV_PKUERR signals and extract pkey-relevant * information from the siginfo * @@ -13,13 +13,18 @@ * prefault pages in at malloc, or not * protect MPX bounds tables with protection keys? * make sure VMA splitting/merging is working correctly - * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys - * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel - * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks + * OOMs can destroy mm->mmap (see exit_mmap()), + * so make sure it is immune to pkeys + * look for pkey "leaks" where it is still set on a VMA + * but "freed" back to the kernel + * do a plain mprotect() to a mprotect_pkey() area and make + * sure the pkey sticks * * Compile like this: - * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm - * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm + * gcc -o protection_keys -O2 -g -std=gnu99 + * -pthread -Wall protection_keys.c -lrt -ldl -lm + * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 + * -pthread -Wall protection_keys.c -lrt -ldl -lm */ #define _GNU_SOURCE #include <errno.h> @@ -251,26 +256,11 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) dprintf1("signal pkey_reg from pkey_reg: %016lx\n", __rdpkey_reg()); dprintf1("pkey from siginfo: %jx\n", siginfo_pkey); *(u64 *)pkey_reg_ptr = 0x00000000; - dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); + dprintf1("WARNING: set PKEY_REG=0 to allow faulting instruction " + "to continue\n"); pkey_faults++; dprintf1("<<<<==================================================\n"); return; - if (trapno == 14) { - fprintf(stderr, - "ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n", - trapno, ip); - fprintf(stderr, "si_addr %p\n", si->si_addr); - fprintf(stderr, "REG_ERR: %lx\n", - (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); - exit(1); - } else { - fprintf(stderr, "unexpected trap %d! at 0x%lx\n", trapno, ip); - fprintf(stderr, "si_addr %p\n", si->si_addr); - fprintf(stderr, "REG_ERR: %lx\n", - (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); - exit(2); - } - dprint_in_signal = 0; }
int wait_all_children(void) @@ -415,7 +405,7 @@ void pkey_disable_set(int pkey, int flags) { unsigned long syscall_flags = 0; int ret; - int pkey_rights; + u32 pkey_rights; pkey_reg_t orig_pkey_reg = rdpkey_reg();
dprintf1("START->%s(%d, 0x%x)\n", __func__, @@ -453,7 +443,7 @@ void pkey_disable_clear(int pkey, int flags) { unsigned long syscall_flags = 0; int ret; - int pkey_rights = pkey_get(pkey, syscall_flags); + u32 pkey_rights = pkey_get(pkey, syscall_flags); pkey_reg_t orig_pkey_reg = rdpkey_reg();
pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); @@ -516,9 +506,10 @@ int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, return sret; }
-int sys_pkey_alloc(unsigned long flags, unsigned long init_val) +int sys_pkey_alloc(unsigned long flags, u64 init_val) { int ret = syscall(SYS_pkey_alloc, flags, init_val); + dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n", __func__, flags, init_val, ret, errno); return ret; @@ -542,7 +533,7 @@ void pkey_set_shadow(u32 key, u64 init_val) int alloc_pkey(void) { int ret; - unsigned long init_val = 0x0; + u64 init_val = 0x0;
dprintf1("%s()::%d, pkey_reg: 0x%016lx shadow: %016lx\n", __func__, __LINE__, __rdpkey_reg(), shadow_pkey_reg); @@ -692,7 +683,9 @@ void record_pkey_malloc(void *ptr, long size) /* every record is full */ size_t old_nr_records = nr_pkey_malloc_records; size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1); - size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record); + size_t new_size = new_nr_records * + sizeof(struct pkey_malloc_record); + dprintf2("new_nr_records: %zd\n", new_nr_records); dprintf2("new_size: %zd\n", new_size); pkey_malloc_records = realloc(pkey_malloc_records, new_size); @@ -716,9 +709,11 @@ void free_pkey_malloc(void *ptr) { long i; int ret; + dprintf3("%s(%p)\n", __func__, ptr); for (i = 0; i < nr_pkey_malloc_records; i++) { struct pkey_malloc_record *rec = &pkey_malloc_records[i]; + dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n", ptr, i, rec, rec->ptr, rec->size); if ((ptr < rec->ptr) || @@ -799,11 +794,13 @@ void setup_hugetlbfs(void) char buf[] = "123";
if (geteuid() != 0) { - fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n"); + fprintf(stderr, + "WARNING: not run as root, can not do hugetlb test\n"); return; }
- cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages"); + cat_into_file(__stringify(GET_NR_HUGE_PAGES), + "/proc/sys/vm/nr_hugepages");
/* * Now go make sure that we got the pages and that they @@ -824,7 +821,8 @@ void setup_hugetlbfs(void) }
if (atoi(buf) != GET_NR_HUGE_PAGES) { - fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n", + fprintf(stderr, "could not confirm 2M pages, got:" + " '%s' expected %d\n", buf, GET_NR_HUGE_PAGES); return; } @@ -957,6 +955,7 @@ void __save_test_fd(int fd) int get_test_read_fd(void) { int test_fd = open("/etc/passwd", O_RDONLY); + __save_test_fd(test_fd); return test_fd; } @@ -998,7 +997,8 @@ void test_read_of_access_disabled_region(int *ptr, u16 pkey) { int ptr_contents;
- dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr); + dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", + pkey, ptr); rdpkey_reg(); pkey_access_deny(pkey); ptr_contents = read_ptr(ptr); @@ -1120,13 +1120,14 @@ void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) /* Assumes that all pkeys other than 'pkey' are unallocated */ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) { - int err; + int err = 0; int allocated_pkeys[NR_PKEYS] = {0}; int nr_allocated_pkeys = 0; int i;
for (i = 0; i < NR_PKEYS*2; i++) { int new_pkey; + dprintf1("%s() alloc loop: %d\n", __func__, i); new_pkey = alloc_pkey(); dprintf4("%s()::%d, err: %d pkey_reg: 0x%016lx " @@ -1134,9 +1135,11 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) __func__, __LINE__, err, __rdpkey_reg(), shadow_pkey_reg); rdpkey_reg(); /* for shadow checking */ - dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC); + dprintf2("%s() errno: %d ENOSPC: %d\n", + __func__, errno, ENOSPC); if ((new_pkey == -1) && (errno == ENOSPC)) { - dprintf2("%s() failed to allocate pkey after %d tries\n", + dprintf2("%s() failed to allocate pkey " + "after %d tries\n", __func__, nr_allocated_pkeys); break; } @@ -1338,7 +1341,8 @@ void run_tests_once(void) tracing_off(); close_test_fds();
- printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr); + printf("test %2d PASSED (iteration %d)\n", + test_nr, iteration_nr); dprintf1("======================\n\n"); } iteration_nr++; @@ -1350,7 +1354,7 @@ int main(void)
setup_handlers();
- printf("has pku: %d\n", cpu_has_pku()); + printf("has pkey: %d\n", cpu_has_pku());
if (!cpu_has_pku()) { int size = PAGE_SIZE; @@ -1358,7 +1362,8 @@ int main(void)
printf("running PKEY tests for unsupported CPU/OS\n");
- ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + ptr = mmap(NULL, size, PROT_NONE, + MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); assert(ptr != (void *)-1); test_mprotect_pkey_on_unsupported_cpu(ptr, 1); exit(0);
On 02/21/2018 05:55 PM, Ram Pai wrote:
cleanup the code to satisfy coding styles.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com
tools/testing/selftests/vm/protection_keys.c | 81 ++++++++++++++------------ 1 files changed, 43 insertions(+), 38 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 6054093..6fdd8f5 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -4,7 +4,7 @@
- There are examples in here of:
- how to set protection keys on memory
- how to set/clear bits in pkey registers (the rights register)
- how to set/clear bits in Protection Key registers (the rights register)
I don't think CodingStyle says to do this. :)
- how to handle SEGV_PKUERR signals and extract pkey-relevant
- information from the siginfo
@@ -13,13 +13,18 @@
- prefault pages in at malloc, or not
- protect MPX bounds tables with protection keys?
- make sure VMA splitting/merging is working correctly
- OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys
- look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel
- do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks
- OOMs can destroy mm->mmap (see exit_mmap()),
so make sure it is immune to pkeys
- look for pkey "leaks" where it is still set on a VMA
but "freed" back to the kernel
- do a plain mprotect() to a mprotect_pkey() area and make
sure the pkey sticks
Ram, I'm not sure where this came from, but this looks horrid. Please don't do this to the file
- Compile like this:
- gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
- gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
- gcc -o protection_keys -O2 -g -std=gnu99
-pthread -Wall protection_keys.c -lrt -ldl -lm
- gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99
*/
-pthread -Wall protection_keys.c -lrt -ldl -lm
Please just leave this, or remove it from the file. It was a long line so it could be copied and pasted, this ruins that.
#define _GNU_SOURCE #include <errno.h> @@ -251,26 +256,11 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) dprintf1("signal pkey_reg from pkey_reg: %016lx\n", __rdpkey_reg()); dprintf1("pkey from siginfo: %jx\n", siginfo_pkey); *(u64 *)pkey_reg_ptr = 0x00000000;
- dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
- dprintf1("WARNING: set PKEY_REG=0 to allow faulting instruction "
pkey_faults++; dprintf1("<<<<==================================================\n"); return;"to continue\n");
- if (trapno == 14) {
fprintf(stderr,
"ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n",
trapno, ip);
fprintf(stderr, "si_addr %p\n", si->si_addr);
fprintf(stderr, "REG_ERR: %lx\n",
(unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
exit(1);
- } else {
fprintf(stderr, "unexpected trap %d! at 0x%lx\n", trapno, ip);
fprintf(stderr, "si_addr %p\n", si->si_addr);
fprintf(stderr, "REG_ERR: %lx\n",
(unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
exit(2);
- }
- dprint_in_signal = 0;
}
I think this is just randomly removing code now.
I think you should probably just drop this patch. It's not really brining anything useful. -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Introduce powerpc implementation for the various abstractions.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/pkey-helpers.h | 128 ++++++++++++++++++++++---- tools/testing/selftests/vm/protection_keys.c | 42 +++++--- 2 files changed, 136 insertions(+), 34 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index c8f5739..c47aead 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -18,27 +18,51 @@ #define u16 uint16_t #define u32 uint32_t #define u64 uint64_t -#define pkey_reg_t u32
-#ifdef __i386__ +#if defined(__i386__) || defined(__x86_64__) /* arch */ + +#ifdef __i386__ /* arch */ #define SYS_mprotect_key 380 -#define SYS_pkey_alloc 381 -#define SYS_pkey_free 382 +#define SYS_pkey_alloc 381 +#define SYS_pkey_free 382 #define REG_IP_IDX REG_EIP -#define si_pkey_offset 0x14 -#else +#elif __x86_64__ #define SYS_mprotect_key 329 -#define SYS_pkey_alloc 330 -#define SYS_pkey_free 331 +#define SYS_pkey_alloc 330 +#define SYS_pkey_free 331 #define REG_IP_IDX REG_RIP -#define si_pkey_offset 0x20 -#endif +#endif /* __x86_64__ */ + +#define NR_PKEYS 16 +#define NR_RESERVED_PKEYS 1 +#define PKEY_BITS_PER_PKEY 2 +#define PKEY_DISABLE_ACCESS 0x1 +#define PKEY_DISABLE_WRITE 0x2 +#define HPAGE_SIZE (1UL<<21) +#define pkey_reg_t u32
-#define NR_PKEYS 16 -#define PKEY_BITS_PER_PKEY 2 -#define PKEY_DISABLE_ACCESS 0x1 -#define PKEY_DISABLE_WRITE 0x2 -#define HPAGE_SIZE (1UL<<21) +#elif __powerpc64__ /* arch */ + +#define SYS_mprotect_key 386 +#define SYS_pkey_alloc 384 +#define SYS_pkey_free 385 +#define REG_IP_IDX PT_NIP +#define REG_TRAPNO PT_TRAP +#define gregs gp_regs +#define fpregs fp_regs + +#define NR_PKEYS 32 +#define NR_RESERVED_PKEYS_4K 26 +#define NR_RESERVED_PKEYS_64K 3 +#define PKEY_BITS_PER_PKEY 2 +#define PKEY_DISABLE_ACCESS 0x3 /* disable read and write */ +#define PKEY_DISABLE_WRITE 0x2 +#define HPAGE_SIZE (1UL<<24) +#define pkey_reg_t u64 + +#else /* arch */ + NOT SUPPORTED +#endif /* arch */
#ifndef DEBUG_LEVEL #define DEBUG_LEVEL 0 @@ -47,7 +71,11 @@
static inline u32 pkey_to_shift(int pkey) { +#if defined(__i386__) || defined(__x86_64__) /* arch */ return pkey * PKEY_BITS_PER_PKEY; +#elif __powerpc64__ /* arch */ + return (NR_PKEYS - pkey - 1) * PKEY_BITS_PER_PKEY; +#endif /* arch */ }
static inline pkey_reg_t reset_bits(int pkey, pkey_reg_t bits) @@ -111,6 +139,7 @@ static inline void sigsafe_printf(const char *format, ...) extern pkey_reg_t shadow_pkey_reg; static inline pkey_reg_t __rdpkey_reg(void) { +#if defined(__i386__) || defined(__x86_64__) /* arch */ unsigned int eax, edx; unsigned int ecx = 0; pkey_reg_t pkey_reg; @@ -118,7 +147,13 @@ static inline pkey_reg_t __rdpkey_reg(void) asm volatile(".byte 0x0f,0x01,0xee\n\t" : "=a" (eax), "=d" (edx) : "c" (ecx)); - pkey_reg = eax; +#elif __powerpc64__ /* arch */ + pkey_reg_t eax; + pkey_reg_t pkey_reg; + + asm volatile("mfspr %0, 0xd" : "=r" ((pkey_reg_t)(eax))); +#endif /* arch */ + pkey_reg = (pkey_reg_t)eax; return pkey_reg; }
@@ -138,6 +173,7 @@ static inline pkey_reg_t _rdpkey_reg(int line) static inline void __wrpkey_reg(pkey_reg_t pkey_reg) { pkey_reg_t eax = pkey_reg; +#if defined(__i386__) || defined(__x86_64__) /* arch */ pkey_reg_t ecx = 0; pkey_reg_t edx = 0;
@@ -146,6 +182,14 @@ static inline void __wrpkey_reg(pkey_reg_t pkey_reg) asm volatile(".byte 0x0f,0x01,0xef\n\t" : : "a" (eax), "c" (ecx), "d" (edx)); assert(pkey_reg == __rdpkey_reg()); + +#elif __powerpc64__ /* arch */ + dprintf4("%s() changing %llx to %llx\n", + __func__, __rdpkey_reg(), pkey_reg); + asm volatile("mtspr 0xd, %0" : : "r" ((unsigned long)(eax)) : "memory"); +#endif /* arch */ + dprintf4("%s() pkey register after changing %016lx to %016lx\n", + __func__, __rdpkey_reg(), pkey_reg); }
static inline void wrpkey_reg(pkey_reg_t pkey_reg) @@ -192,6 +236,8 @@ static inline void __pkey_write_allow(int pkey, int do_allow_write) dprintf4("pkey_reg now: %08x\n", rdpkey_reg()); }
+#if defined(__i386__) || defined(__x86_64__) /* arch */ + #define PAGE_SIZE 4096 #define MB (1<<20)
@@ -274,8 +320,18 @@ static inline void __page_o_noops(void) /* 8-bytes of instruction * 512 bytes = 1 page */ asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr"); } +#elif __powerpc64__ /* arch */
-#endif /* _PKEYS_HELPER_H */ +#define PAGE_SIZE (0x1UL << 16) +static inline int cpu_has_pku(void) +{ + return 1; +} + +/* 8-bytes of instruction * 16384bytes = 1 page */ +#define __page_o_noops() asm(".rept 16384 ; nop; .endr") + +#endif /* arch */
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) #define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) @@ -307,11 +363,47 @@ static inline void __page_o_noops(void)
static inline int open_hugepage_file(int flag) { - return open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", + int fd; + +#if defined(__i386__) || defined(__x86_64__) /* arch */ + fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY); +#elif __powerpc64__ /* arch */ + fd = open("/sys/kernel/mm/hugepages/hugepages-16384kB/nr_hugepages", + O_RDONLY); +#else /* arch */ + NOT SUPPORTED +#endif /* arch */ + return fd; }
static inline int get_start_key(void) { +#if defined(__i386__) || defined(__x86_64__) /* arch */ return 1; +#elif __powerpc64__ /* arch */ + return 0; +#else /* arch */ + NOT SUPPORTED +#endif /* arch */ +} + +static inline u32 *siginfo_get_pkey_ptr(siginfo_t *si) +{ +#ifdef si_pkey + return &si->si_pkey; +#else + +#ifdef __i386__ /* arch */ +#define si_pkey_offset 0x14 +#elif __x86_64__ /*arch */ +#define si_pkey_offset 0x1C +#elif __powerpc64__ /*arch */ +#define si_pkey_offset 0x1C +#endif /*arch */ + + return (u32 *)(((u8 *)si) + si_pkey_offset); +#endif } + +#endif /* _PKEYS_HELPER_H */ diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 6fdd8f5..c4c73e6 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -186,17 +186,18 @@ void dump_mem(void *dumpme, int len_bytes)
int pkey_faults; int last_si_pkey = -1; +void pkey_access_allow(int pkey); void signal_handler(int signum, siginfo_t *si, void *vucontext) { ucontext_t *uctxt = vucontext; int trapno; unsigned long ip; char *fpregs; +#if defined(__i386__) || defined(__x86_64__) /* arch */ pkey_reg_t *pkey_reg_ptr; - u64 siginfo_pkey; +#endif /* defined(__i386__) || defined(__x86_64__) */ + u32 siginfo_pkey; u32 *si_pkey_ptr; - int pkey_reg_offset; - fpregset_t fpregset;
dprint_in_signal = 1; dprintf1(">>>>===============SIGSEGV============================\n"); @@ -206,12 +207,14 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; - fpregset = uctxt->uc_mcontext.fpregs; - fpregs = (void *)fpregset; + fpregs = (char *) uctxt->uc_mcontext.fpregs;
dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n", __func__, trapno, ip, si_code_str(si->si_code), si->si_code); + +#if defined(__i386__) || defined(__x86_64__) /* arch */ + #ifdef __i386__ /* * 32-bit has some extra padding so that userspace can tell whether @@ -219,22 +222,23 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) * state. We just assume that it is here. */ fpregs += 0x70; -#endif - pkey_reg_offset = pkey_reg_xstate_offset(); - pkey_reg_ptr = (void *)(&fpregs[pkey_reg_offset]); +#endif /* __i386__ */
- dprintf1("siginfo: %p\n", si); - dprintf1(" fpregs: %p\n", fpregs); + pkey_reg_ptr = (void *)(&fpregs[pkey_reg_xstate_offset()]); /* - * If we got a PKEY fault, we *HAVE* to have at least one bit set in + * If we got a key fault, we *HAVE* to have at least one bit set in * here. */ dprintf1("pkey_reg_xstate_offset: %d\n", pkey_reg_xstate_offset()); if (DEBUG_LEVEL > 4) dump_mem(pkey_reg_ptr - 128, 256); pkey_assert(*pkey_reg_ptr); +#endif /* defined(__i386__) || defined(__x86_64__) */
- si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); + dprintf1("siginfo: %p\n", si); + dprintf1(" fpregs: %p\n", fpregs); + + si_pkey_ptr = siginfo_get_pkey_ptr(si); dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); dump_mem(si_pkey_ptr - 8, 24); siginfo_pkey = *si_pkey_ptr; @@ -248,19 +252,25 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) exit(4); }
- dprintf1("signal pkey_reg from xsave: %016lx\n", *pkey_reg_ptr); /* * need __rdpkey_reg() version so we do not do shadow_pkey_reg * checking */ dprintf1("signal pkey_reg from pkey_reg: %016lx\n", __rdpkey_reg()); - dprintf1("pkey from siginfo: %jx\n", siginfo_pkey); - *(u64 *)pkey_reg_ptr = 0x00000000; + dprintf1("pkey from siginfo: %lx\n", siginfo_pkey); +#if defined(__i386__) || defined(__x86_64__) /* arch */ + dprintf1("signal pkey_reg from xsave: %016lx\n", *pkey_reg_ptr); + *(u64 *)pkey_reg_ptr &= reset_bits(siginfo_pkey, + PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE); +#elif __powerpc64__ + pkey_access_allow(siginfo_pkey); +#endif + shadow_pkey_reg &= reset_bits(siginfo_pkey, + PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE); dprintf1("WARNING: set PKEY_REG=0 to allow faulting instruction " "to continue\n"); pkey_faults++; dprintf1("<<<<==================================================\n"); - return; }
int wait_all_children(void)
On 02/21/2018 05:55 PM, Ram Pai wrote:
static inline u32 pkey_to_shift(int pkey) { +#if defined(__i386__) || defined(__x86_64__) /* arch */ return pkey * PKEY_BITS_PER_PKEY; +#elif __powerpc64__ /* arch */
- return (NR_PKEYS - pkey - 1) * PKEY_BITS_PER_PKEY;
+#endif /* arch */ }
I really detest the #if #else style. Can't we just have a pkey_ppc.h and a pkey_x86.h or something? -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
When a key is freed, the key is no more effective. Clear the bits corresponding to the pkey in the shadow register. Otherwise it will carry some spurious bits which can trigger false-positive asserts.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 3 ++- 1 files changed, 2 insertions(+), 1 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index c4c73e6..e82bd88 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -586,7 +586,8 @@ int sys_pkey_free(unsigned long pkey) int ret = syscall(SYS_pkey_free, pkey);
if (!ret) - shadow_pkey_reg &= reset_bits(pkey, PKEY_DISABLE_ACCESS); + shadow_pkey_reg &= reset_bits(pkey, + PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE); dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret); return ret; }
On 02/21/2018 05:55 PM, Ram Pai wrote:
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index c4c73e6..e82bd88 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -586,7 +586,8 @@ int sys_pkey_free(unsigned long pkey) int ret = syscall(SYS_pkey_free, pkey); if (!ret)
shadow_pkey_reg &= reset_bits(pkey, PKEY_DISABLE_ACCESS);
shadow_pkey_reg &= reset_bits(pkey,
dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret); return ret;PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE);
}
What about your EXEC bit? -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
pkey subsystem is supported if the hardware and kernel has support. We determine that by checking if allocation of a key succeeds or not.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/pkey-helpers.h | 22 ++++++++++++++++------ tools/testing/selftests/vm/protection_keys.c | 9 +++++---- 2 files changed, 21 insertions(+), 10 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index c47aead..88ef58f 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -258,7 +258,7 @@ static inline void __cpuid(unsigned int *eax, unsigned int *ebx, #define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */
-static inline int cpu_has_pku(void) +static inline bool is_pkey_supported(void) { unsigned int eax; unsigned int ebx; @@ -271,13 +271,13 @@ static inline int cpu_has_pku(void)
if (!(ecx & X86_FEATURE_PKU)) { dprintf2("cpu does not have PKU\n"); - return 0; + return false; } if (!(ecx & X86_FEATURE_OSPKE)) { dprintf2("cpu does not have OSPKE\n"); - return 0; + return false; } - return 1; + return true; }
#define XSTATE_PKEY_BIT (9) @@ -323,9 +323,19 @@ static inline void __page_o_noops(void) #elif __powerpc64__ /* arch */
#define PAGE_SIZE (0x1UL << 16) -static inline int cpu_has_pku(void) +static inline bool is_pkey_supported(void) { - return 1; + /* + * No simple way to determine this. + * lets try allocating a key and see if it succeeds. + */ + int ret = sys_pkey_alloc(0, 0); + + if (ret > 0) { + sys_pkey_free(ret); + return true; + } + return false; }
/* 8-bytes of instruction * 16384bytes = 1 page */ diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index e82bd88..58da5a0 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1299,8 +1299,8 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) int size = PAGE_SIZE; int sret;
- if (cpu_has_pku()) { - dprintf1("SKIP: %s: no CPU support\n", __func__); + if (is_pkey_supported()) { + dprintf1("SKIP: %s: no CPU/kernel support\n", __func__); return; }
@@ -1362,12 +1362,13 @@ void run_tests_once(void) int main(void) { int nr_iterations = 22; + int pkey_supported = is_pkey_supported();
setup_handlers();
- printf("has pkey: %d\n", cpu_has_pku()); + printf("has pkey: %s\n", pkey_supported ? "Yes" : "No");
- if (!cpu_has_pku()) { + if (!pkey_supported) { int size = PAGE_SIZE; int *ptr;
On 02/21/2018 05:55 PM, Ram Pai wrote:
#define PAGE_SIZE (0x1UL << 16) -static inline int cpu_has_pku(void) +static inline bool is_pkey_supported(void) {
- return 1;
- /*
* No simple way to determine this.
* lets try allocating a key and see if it succeeds.
*/
- int ret = sys_pkey_alloc(0, 0);
- if (ret > 0) {
sys_pkey_free(ret);
return true;
- }
- return false;
}
The point of doing this was to have a test for the CPU that way separate from the syscalls.
Can you leave cpu_has_pkeys() in place?
-- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
The maximum number of keys that can be allocated has to take into consideration, that some keys are reserved by the architecture for specific purpose. Hence cannot be allocated.
Fix the assertion in test_pkey_alloc_exhaust()
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/pkey-helpers.h | 14 ++++++++++++++ tools/testing/selftests/vm/protection_keys.c | 9 ++++----- 2 files changed, 18 insertions(+), 5 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index 88ef58f..67f9b0f 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -416,4 +416,18 @@ static inline int get_start_key(void) #endif }
+static inline int arch_reserved_keys(void) +{ +#if defined(__i386__) || defined(__x86_64__) /* arch */ + return NR_RESERVED_PKEYS; +#elif __powerpc64__ /* arch */ + if (sysconf(_SC_PAGESIZE) == 4096) + return NR_RESERVED_PKEYS_4K; + else + return NR_RESERVED_PKEYS_64K; +#else /* arch */ + NOT SUPPORTED +#endif /* arch */ +} + #endif /* _PKEYS_HELPER_H */ diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 58da5a0..aae6771 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1167,12 +1167,11 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) pkey_assert(i < NR_PKEYS*2);
/* - * There are 16 pkeys supported in hardware. One is taken - * up for the default (0) and another can be taken up by - * an execute-only mapping. Ensure that we can allocate - * at least 14 (16-2). + * There are NR_PKEYS pkeys supported in hardware. arch_reserved_keys() + * are reserved. One can be taken up by an execute-only mapping. + * Ensure that we can allocate at least the remaining. */ - pkey_assert(i >= NR_PKEYS-2); + pkey_assert(i >= (NR_PKEYS-arch_reserved_keys()-1));
for (i = 0; i < nr_allocated_pkeys; i++) { err = sys_pkey_free(allocated_pkeys[i]);
On 02/21/2018 05:55 PM, Ram Pai wrote:
+static inline int arch_reserved_keys(void) +{ +#if defined(__i386__) || defined(__x86_64__) /* arch */
- return NR_RESERVED_PKEYS;
+#elif __powerpc64__ /* arch */
- if (sysconf(_SC_PAGESIZE) == 4096)
return NR_RESERVED_PKEYS_4K;
- else
return NR_RESERVED_PKEYS_64K;
+#else /* arch */
- NOT SUPPORTED
+#endif /* arch */ +}
Yeah, this is hideous.
Please either do it in one header:
#ifdef x86.. static inline int arch_reserved_keys(void) { } ... #elif ppc static inline int arch_reserved_keys(void) { } ... #else #error #endif
Or in multiple:
#ifdef x86.. #include <pkey_x86.h> #elif ppc #include <pkey_ppc.h> #else #error #endif -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
detect access-violation on a page to which access-disabled key is associated much after the page is mapped.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 19 +++++++++++++++++++ 1 files changed, 19 insertions(+), 0 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index aae6771..dba9162 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1016,6 +1016,24 @@ void test_read_of_access_disabled_region(int *ptr, u16 pkey) dprintf1("*ptr: %d\n", ptr_contents); expected_pkey_fault(pkey); } + +void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr, + u16 pkey) +{ + int ptr_contents; + + dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", + pkey, ptr); + ptr_contents = read_ptr(ptr); + dprintf1("reading ptr before disabling the read : %d\n", + ptr_contents); + rdpkey_reg(); + pkey_access_deny(pkey); + ptr_contents = read_ptr(ptr); + dprintf1("*ptr: %d\n", ptr_contents); + expected_pkey_fault(pkey); +} + void test_write_of_write_disabled_region(int *ptr, u16 pkey) { dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey); @@ -1310,6 +1328,7 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) void (*pkey_tests[])(int *ptr, u16 pkey) = { test_read_of_write_disabled_region, test_read_of_access_disabled_region, + test_read_of_access_disabled_region_with_page_already_mapped, test_write_of_write_disabled_region, test_write_of_access_disabled_region, test_kernel_write_of_access_disabled_region,
On 02/21/2018 05:55 PM, Ram Pai wrote:
detect access-violation on a page to which access-disabled key is associated much after the page is mapped.
Looks fine to me. Did this actually find a bug for you?
Acked-by: Dave Hansen dave.hansen@intel.com
-- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
detect write-violation on a page to which write-disabled key is associated much after the page is mapped.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 12 ++++++++++++ 1 files changed, 12 insertions(+), 0 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index dba9162..b685e26 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1034,6 +1034,17 @@ void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr, expected_pkey_fault(pkey); }
+void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr, + u16 pkey) +{ + *ptr = __LINE__; + dprintf1("disabling write access; after accessing the page, " + "to PKEY[%02d], doing write\n", pkey); + pkey_write_deny(pkey); + *ptr = __LINE__; + expected_pkey_fault(pkey); +} + void test_write_of_write_disabled_region(int *ptr, u16 pkey) { dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey); @@ -1330,6 +1341,7 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) test_read_of_access_disabled_region, test_read_of_access_disabled_region_with_page_already_mapped, test_write_of_write_disabled_region, + test_write_of_write_disabled_region_with_page_already_mapped, test_write_of_access_disabled_region, test_kernel_write_of_access_disabled_region, test_kernel_write_of_write_disabled_region,
On 02/21/2018 05:55 PM, Ram Pai wrote:
detect write-violation on a page to which write-disabled key is associated much after the page is mapped.
The more tests the merrier.
Acked-by: Dave Hansen dave.hansen@intel.com
-- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
detect write-violation on a page to which access-disabled key is associated much after the page is mapped.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 13 +++++++++++++ 1 files changed, 13 insertions(+), 0 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index b685e26..437ee74 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1059,6 +1059,18 @@ void test_write_of_access_disabled_region(int *ptr, u16 pkey) *ptr = __LINE__; expected_pkey_fault(pkey); } + +void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr, + u16 pkey) +{ + *ptr = __LINE__; + dprintf1("disabling access; after accessing the page, " + " to PKEY[%02d], doing write\n", pkey); + pkey_access_deny(pkey); + *ptr = __LINE__; + expected_pkey_fault(pkey); +} + void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) { int ret; @@ -1343,6 +1355,7 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) test_write_of_write_disabled_region, test_write_of_write_disabled_region_with_page_already_mapped, test_write_of_access_disabled_region, + test_write_of_access_disabled_region_with_page_already_mapped, test_kernel_write_of_access_disabled_region, test_kernel_write_of_write_disabled_region, test_kernel_gup_of_access_disabled_region,
On 02/21/2018 05:55 PM, Ram Pai wrote:
detect write-violation on a page to which access-disabled key is associated much after the page is mapped.
Acked-by: Dave Hansen dave.hansen@intel.com
-- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Generally the signal handler restores the state of the pkey register before returning. However there are times when the read/write operation can legitamely fail without invoking the signal handler. Eg: A sys_read() operaton to a write-protected page should be disallowed. In such a case the state of the pkey register is not restored to its original state. The test case is responsible for restoring the key register state to its original value.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 5 +++++ 1 files changed, 5 insertions(+), 0 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 437ee74..42c068a 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1003,6 +1003,7 @@ void test_read_of_write_disabled_region(int *ptr, u16 pkey) ptr_contents = read_ptr(ptr); dprintf1("*ptr: %d\n", ptr_contents); dprintf1("\n"); + pkey_write_allow(pkey); } void test_read_of_access_disabled_region(int *ptr, u16 pkey) { @@ -1082,6 +1083,7 @@ void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) ret = read(test_fd, ptr, 1); dprintf1("read ret: %d\n", ret); pkey_assert(ret); + pkey_access_allow(pkey); } void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey) { @@ -1094,6 +1096,7 @@ void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey) if (ret < 0 && (DEBUG_LEVEL > 0)) perror("verbose read result (OK for this to be bad)"); pkey_assert(ret); + pkey_write_allow(pkey); }
void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey) @@ -1113,6 +1116,7 @@ void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey) vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT); dprintf1("vmsplice() ret: %d\n", vmsplice_ret); pkey_assert(vmsplice_ret == -1); + pkey_access_allow(pkey);
close(pipe_fds[0]); close(pipe_fds[1]); @@ -1133,6 +1137,7 @@ void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) if (DEBUG_LEVEL > 0) perror("futex"); dprintf1("futex() ret: %d\n", futex_ret); + pkey_write_allow(pkey); }
/* Assumes that all pkeys other than 'pkey' are unallocated */
On 02/21/2018 05:55 PM, Ram Pai wrote:
Generally the signal handler restores the state of the pkey register before returning. However there are times when the read/write operation can legitamely fail without invoking the signal handler. Eg: A sys_read() operaton to a write-protected page should be disallowed. In such a case the state of the pkey register is not restored to its original state. The test case is responsible for restoring the key register state to its original value.
Oh, that's a good point.
Could we just do this in a common place, though? Like reset the register after each test? Seems more foolproof. -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
introduce a new allocator that allocates 4k hardware-pages to back 64k linux-page. This allocator is only applicable on powerpc.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 30 ++++++++++++++++++++++++++ 1 files changed, 30 insertions(+), 0 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 42c068a..1b06e59 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -766,6 +766,35 @@ void free_pkey_malloc(void *ptr) return ptr; }
+void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) +{ +#ifdef __powerpc64__ + void *ptr; + int ret; + + dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, + size, prot, pkey); + pkey_assert(pkey < NR_PKEYS); + ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + pkey_assert(ptr != (void *)-1); + + ret = syscall(__NR_subpage_prot, ptr, size, NULL); + if (ret) { + perror("subpage_perm"); + return PTR_ERR_ENOTSUP; + } + + ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); + pkey_assert(!ret); + record_pkey_malloc(ptr, size); + + dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); + return ptr; +#else /* __powerpc64__ */ + return PTR_ERR_ENOTSUP; +#endif /* __powerpc64__ */ +} + void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) { int ret; @@ -888,6 +917,7 @@ void setup_hugetlbfs(void) void *(*pkey_malloc[])(long size, int prot, u16 pkey) = {
malloc_pkey_with_mprotect, + malloc_pkey_with_mprotect_subpage, malloc_pkey_anon_huge, malloc_pkey_hugetlb /* can not do direct with the pkey_mprotect() API:
On 02/21/2018 05:55 PM, Ram Pai wrote: ...
@@ -888,6 +917,7 @@ void setup_hugetlbfs(void) void *(*pkey_malloc[])(long size, int prot, u16 pkey) = { malloc_pkey_with_mprotect,
- malloc_pkey_with_mprotect_subpage, malloc_pkey_anon_huge, malloc_pkey_hugetlb
/* can not do direct with the pkey_mprotect() API:
I think I'd rather have an #ifdef on the array entries than have the malloc entry do nothing on x86. Maybe have a ppc-specific section at the end? -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
From: Thiago Jung Bauermann bauerman@linux.vnet.ibm.com
The sig_chld() handler calls dprintf2() taking care of setting dprint_in_signal so that sigsafe_printf() won't call printf(). Unfortunately, this precaution is is negated by dprintf_level(), which has a call to fflush().
This function acquires a lock, which means that if the signal interrupts an ongoing fflush() the process will deadlock. At least on powerpc this is easy to trigger, resulting in the following backtrace when attaching to the frozen process:
(gdb) bt #0 0x00007fff9f96c7d8 in __lll_lock_wait_private () from /lib64/power8/libc.so.6 #1 0x00007fff9f8cba4c in _IO_flush_all_lockp () from /lib64/power8/libc.so.6 #2 0x00007fff9f8cbd1c in __GI__IO_flush_all () from /lib64/power8/libc.so.6 #3 0x00007fff9f8b7424 in fflush () from /lib64/power8/libc.so.6 #4 0x00000000100504f8 in sig_chld (x=17) at protection_keys.c:283 #5 <signal handler called> #6 0x00007fff9f8cb8ac in _IO_flush_all_lockp () from /lib64/power8/libc.so.6 #7 0x00007fff9f8cbd1c in __GI__IO_flush_all () from /lib64/power8/libc.so.6 #8 0x00007fff9f8b7424 in fflush () from /lib64/power8/libc.so.6 #9 0x0000000010050b50 in pkey_get (pkey=7, flags=0) at protection_keys.c:379 #10 0x0000000010050dc0 in pkey_disable_set (pkey=7, flags=2) at protection_keys.c:423 #11 0x0000000010051414 in pkey_write_deny (pkey=7) at protection_keys.c:486 #12 0x00000000100556bc in test_ptrace_of_child (ptr=0x7fff9f7f0000, pkey=7) at protection_keys.c:1288 #13 0x0000000010055f60 in run_tests_once () at protection_keys.c:1414 #14 0x00000000100561a4 in main () at protection_keys.c:1459
The fix is to refrain from calling fflush() when inside a signal handler. The output may not be as pretty but at least the testcase will be able to move on.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com Signed-off-by: Thiago Jung Bauermann bauerman@linux.vnet.ibm.com
tools/testing/selftests/vm/pkey-helpers.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) --- tools/testing/selftests/vm/pkey-helpers.h | 3 ++- 1 files changed, 2 insertions(+), 1 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index 67f9b0f..7240598 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -128,7 +128,8 @@ static inline void sigsafe_printf(const char *format, ...) #define dprintf_level(level, args...) do { \ if (level <= DEBUG_LEVEL) \ sigsafe_printf(args); \ - fflush(NULL); \ + if (!dprint_in_signal) \ + fflush(NULL); \ } while (0) #define dprintf0(args...) dprintf_level(0, args) #define dprintf1(args...) dprintf_level(1, args)
On Wed, 21 Feb 2018 17:55:41 -0800 Ram Pai linuxram@us.ibm.com wrote:
From: Thiago Jung Bauermann bauerman@linux.vnet.ibm.com
The sig_chld() handler calls dprintf2() taking care of setting dprint_in_signal so that sigsafe_printf() won't call printf(). Unfortunately, this precaution is is negated by dprintf_level(), which has a call to fflush().
fflush() is not the signal-safe function list, so this makes sense. I wonder if fflush() is needed in sigsafe_printf()?
How about?
diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h index b3cb7670e026..2c3b39851f10 100644 --- a/tools/testing/selftests/x86/pkey-helpers.h +++ b/tools/testing/selftests/x86/pkey-helpers.h @@ -29,6 +29,7 @@ static inline void sigsafe_printf(const char *format, ...) va_start(ap, format); if (!dprint_in_signal) { vprintf(format, ap); + fflush(NULL); \ } else { int ret; int len = vsnprintf(dprint_in_signal_buffer, @@ -49,7 +50,6 @@ static inline void sigsafe_printf(const char *format, ...) #define dprintf_level(level, args...) do { \ if (level <= DEBUG_LEVEL) \ sigsafe_printf(args); \ - fflush(NULL); \ } while (0) #define dprintf0(args...) dprintf_level(0, args) #define dprintf1(args...) dprintf_level(1, args)
But both are equivalent I guess, so Acked-by: Balbir Singh bsingharora@gmail.com -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 02/21/2018 05:55 PM, Ram Pai wrote:
From: Thiago Jung Bauermann bauerman@linux.vnet.ibm.com
The sig_chld() handler calls dprintf2() taking care of setting dprint_in_signal so that sigsafe_printf() won't call printf(). Unfortunately, this precaution is is negated by dprintf_level(), which has a call to fflush().
This function acquires a lock, which means that if the signal interrupts an ongoing fflush() the process will deadlock. At least on powerpc this is easy to trigger, resulting in the following backtrace when attaching to the frozen process:
Ugh, yeah, I've run into this too.
Acked-by: Dave Hansen dave.hansen@intel.com -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
linux-kselftest-mirror@lists.linaro.org