February 2020 - Linux-kselftest-mirror

[PATCH v26 20/22] selftests/x86: Add vDSO selftest for SGX

by Jarkko Sakkinen

Expand the selftest by invoking the enclave by using __vdso_sgx_enter_enclave() in addition to direct ENCLS[EENTER]. Cc: linux-kselftest(a)vger.kernel.org Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen(a)linux.intel.com> --- tools/testing/selftests/x86/sgx/main.c | 132 +++++++++++++++++++++ tools/testing/selftests/x86/sgx/sgx_call.S | 43 +++++++ tools/testing/selftests/x86/sgx/sgx_call.h | 3 + 3 files changed, 178 insertions(+) diff --git a/tools/testing/selftests/x86/sgx/main.c b/tools/testing/selftests/x86/sgx/main.c index 48ed5fdfb3cb..d97cc3cf0093 100644 --- a/tools/testing/selftests/x86/sgx/main.c +++ b/tools/testing/selftests/x86/sgx/main.c @@ -21,6 +21,109 @@ #define PAGE_SIZE 4096 static const uint64_t MAGIC = 0x1122334455667788ULL; +void *eenter; + +struct vdso_symtab { + Elf64_Sym *elf_symtab; + const char *elf_symstrtab; + Elf64_Word *elf_hashtab; +}; + +static void *vdso_get_base_addr(char *envp[]) +{ + Elf64_auxv_t *auxv; + int i; + + for (i = 0; envp[i]; i++) + ; + + auxv = (Elf64_auxv_t *)&envp[i + 1]; + + for (i = 0; auxv[i].a_type != AT_NULL; i++) { + if (auxv[i].a_type == AT_SYSINFO_EHDR) + return (void *)auxv[i].a_un.a_val; + } + + return NULL; +} + +static Elf64_Dyn *vdso_get_dyntab(void *addr) +{ + Elf64_Ehdr *ehdr = addr; + Elf64_Phdr *phdrtab = addr + ehdr->e_phoff; + int i; + + for (i = 0; i < ehdr->e_phnum; i++) + if (phdrtab[i].p_type == PT_DYNAMIC) + return addr + phdrtab[i].p_offset; + + return NULL; +} + +static void *vdso_get_dyn(void *addr, Elf64_Dyn *dyntab, Elf64_Sxword tag) +{ + int i; + + for (i = 0; dyntab[i].d_tag != DT_NULL; i++) + if (dyntab[i].d_tag == tag) + return addr + dyntab[i].d_un.d_ptr; + + return NULL; +} + +static bool vdso_get_symtab(void *addr, struct vdso_symtab *symtab) +{ + Elf64_Dyn *dyntab = vdso_get_dyntab(addr); + + symtab->elf_symtab = vdso_get_dyn(addr, dyntab, DT_SYMTAB); + if (!symtab->elf_symtab) + return false; + + symtab->elf_symstrtab = vdso_get_dyn(addr, dyntab, DT_STRTAB); + if (!symtab->elf_symstrtab) + return false; + + symtab->elf_hashtab = vdso_get_dyn(addr, dyntab, DT_HASH); + if (!symtab->elf_hashtab) + return false; + + return true; +} + +static unsigned long elf_sym_hash(const char *name) +{ + unsigned long h = 0, high; + + while (*name) { + h = (h << 4) + *name++; + high = h & 0xf0000000; + + if (high) + h ^= high >> 24; + + h &= ~high; + } + + return h; +} + +static Elf64_Sym *vdso_symtab_get(struct vdso_symtab *symtab, const char *name) +{ + Elf64_Word bucketnum = symtab->elf_hashtab[0]; + Elf64_Word *buckettab = &symtab->elf_hashtab[2]; + Elf64_Word *chaintab = &symtab->elf_hashtab[2 + bucketnum]; + Elf64_Sym *sym; + Elf64_Word i; + + for (i = buckettab[elf_sym_hash(name) % bucketnum]; i != STN_UNDEF; + i = chaintab[i]) { + sym = &symtab->elf_symtab[i]; + if (!strcmp(name, &symtab->elf_symstrtab[sym->st_name])) + return sym; + } + + return NULL; +} static bool encl_create(int dev_fd, unsigned long bin_size, struct sgx_secs *secs) @@ -218,10 +321,14 @@ bool load_sigstruct(const char *path, void *sigstruct) int main(int argc, char *argv[], char *envp[]) { + struct sgx_enclave_exception exception; struct sgx_sigstruct sigstruct; + struct vdso_symtab symtab; + Elf64_Sym *eenter_sym; struct sgx_secs secs; uint64_t result = 0; off_t bin_size; + void *addr; void *bin; if (!encl_data_map("encl.bin", &bin, &bin_size)) @@ -243,5 +350,30 @@ int main(int argc, char *argv[], char *envp[]) printf("Output: 0x%lx\n", result); + memset(&exception, 0, sizeof(exception)); + + addr = vdso_get_base_addr(envp); + if (!addr) + exit(1); + + if (!vdso_get_symtab(addr, &symtab)) + exit(1); + + eenter_sym = vdso_symtab_get(&symtab, "__vdso_sgx_enter_enclave"); + if (!eenter_sym) + exit(1); + eenter = addr + eenter_sym->st_value; + + printf("Input: 0x%lx\n", MAGIC); + + sgx_call_vdso((void *)&MAGIC, &result, 0, NULL, NULL, NULL, + (void *)secs.base, &exception, NULL); + if (result != MAGIC) { + fprintf(stderr, "0x%lx != 0x%lx\n", result, MAGIC); + exit(1); + } + + printf("Output: 0x%lx\n", result); + exit(0); } diff --git a/tools/testing/selftests/x86/sgx/sgx_call.S b/tools/testing/selftests/x86/sgx/sgx_call.S index ca4c7893f9d9..e71f44f7a995 100644 --- a/tools/testing/selftests/x86/sgx/sgx_call.S +++ b/tools/testing/selftests/x86/sgx/sgx_call.S @@ -21,3 +21,46 @@ sgx_async_exit: ENCLU pop %rbx ret + + .global sgx_call_vdso +sgx_call_vdso: + .cfi_startproc + push %r15 + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset %r15, 0 + push %r14 + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset %r14, 0 + push %r13 + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset %r13, 0 + push %r12 + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset %r12, 0 + push %rbx + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset %rbx, 0 + push $0 + .cfi_adjust_cfa_offset 8 + push 0x48(%rsp) + .cfi_adjust_cfa_offset 8 + push 0x48(%rsp) + .cfi_adjust_cfa_offset 8 + push 0x48(%rsp) + .cfi_adjust_cfa_offset 8 + mov $2, %eax + call *eenter(%rip) + add $0x20, %rsp + .cfi_adjust_cfa_offset -0x20 + pop %rbx + .cfi_adjust_cfa_offset -8 + pop %r12 + .cfi_adjust_cfa_offset -8 + pop %r13 + .cfi_adjust_cfa_offset -8 + pop %r14 + .cfi_adjust_cfa_offset -8 + pop %r15 + .cfi_adjust_cfa_offset -8 + ret + .cfi_endproc diff --git a/tools/testing/selftests/x86/sgx/sgx_call.h b/tools/testing/selftests/x86/sgx/sgx_call.h index bf72068ada23..a4072c5ecce7 100644 --- a/tools/testing/selftests/x86/sgx/sgx_call.h +++ b/tools/testing/selftests/x86/sgx/sgx_call.h @@ -8,4 +8,7 @@ void sgx_call_eenter(void *rdi, void *rsi, void *entry); +int sgx_call_vdso(void *rdi, void *rsi, long rdx, void *rcx, void *r8, void *r9, + void *tcs, struct sgx_enclave_exception *ei, void *cb); + #endif /* SGX_CALL_H */ -- 2.20.1

5 years, 10 months

1
0
0 0

[PATCH v26 12/22] selftests/x86: Add a selftest for SGX

by Jarkko Sakkinen

Add a selftest for SGX. It is a trivial test where a simple enclave copies one 64-bit word of memory between two memory locations given to the enclave as arguments. Use ENCLS[EENTER] to invoke the enclave. Cc: linux-sgx(a)vger.kernel.org Cc: linux-kselftest(a)vger.kernel.org Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen(a)linux.intel.com> --- tools/testing/selftests/x86/sgx/.gitignore | 3 + tools/testing/selftests/x86/sgx/Makefile | 48 ++ tools/testing/selftests/x86/sgx/defines.h | 17 + tools/testing/selftests/x86/sgx/encl.c | 20 + tools/testing/selftests/x86/sgx/encl.lds | 34 ++ .../selftests/x86/sgx/encl_bootstrap.S | 94 ++++ tools/testing/selftests/x86/sgx/main.c | 247 +++++++++ tools/testing/selftests/x86/sgx/sgx_call.S | 23 + tools/testing/selftests/x86/sgx/sgx_call.h | 11 + tools/testing/selftests/x86/sgx/sgxsign.c | 493 ++++++++++++++++++ .../testing/selftests/x86/sgx/signing_key.pem | 39 ++ 11 files changed, 1029 insertions(+) create mode 100644 tools/testing/selftests/x86/sgx/.gitignore create mode 100644 tools/testing/selftests/x86/sgx/Makefile create mode 100644 tools/testing/selftests/x86/sgx/defines.h create mode 100644 tools/testing/selftests/x86/sgx/encl.c create mode 100644 tools/testing/selftests/x86/sgx/encl.lds create mode 100644 tools/testing/selftests/x86/sgx/encl_bootstrap.S create mode 100644 tools/testing/selftests/x86/sgx/main.c create mode 100644 tools/testing/selftests/x86/sgx/sgx_call.S create mode 100644 tools/testing/selftests/x86/sgx/sgx_call.h create mode 100644 tools/testing/selftests/x86/sgx/sgxsign.c create mode 100644 tools/testing/selftests/x86/sgx/signing_key.pem diff --git a/tools/testing/selftests/x86/sgx/.gitignore b/tools/testing/selftests/x86/sgx/.gitignore new file mode 100644 index 000000000000..98eb2d439606 --- /dev/null +++ b/tools/testing/selftests/x86/sgx/.gitignore @@ -0,0 +1,3 @@ +encl.ss +sgxsign +test_sgx diff --git a/tools/testing/selftests/x86/sgx/Makefile b/tools/testing/selftests/x86/sgx/Makefile new file mode 100644 index 000000000000..f838700029e2 --- /dev/null +++ b/tools/testing/selftests/x86/sgx/Makefile @@ -0,0 +1,48 @@ +top_srcdir = ../../../../.. + +include ../../lib.mk + +ifndef OBJCOPY +OBJCOPY := $(CROSS_COMPILE)objcopy +endif + +INCLUDES := -I$(top_srcdir)/tools/include +HOST_CFLAGS := -Wall -Werror -g $(INCLUDES) -fPIC -z noexecstack +ENCL_CFLAGS := -Wall -Werror -static -nostdlib -nostartfiles -fPIC \ + -fno-stack-protector -mrdrnd $(INCLUDES) + +TEST_CUSTOM_PROGS := $(OUTPUT)/test_sgx $(OUTPUT)/encl.bin $(OUTPUT)/encl.ss + +all: $(TEST_CUSTOM_PROGS) + +$(OUTPUT)/test_sgx: $(OUTPUT)/main.o $(OUTPUT)/sgx_call.o + $(CC) $(HOST_CFLAGS) -o $@ $^ + +$(OUTPUT)/main.o: main.c + $(CC) $(HOST_CFLAGS) -c $< -o $@ + +$(OUTPUT)/sgx_call.o: sgx_call.S + $(CC) $(HOST_CFLAGS) -c $< -o $@ + +$(OUTPUT)/encl.bin: $(OUTPUT)/encl.elf $(OUTPUT)/sgxsign + $(OBJCOPY) -O binary $< $@ + +$(OUTPUT)/encl.elf: encl.lds encl.c encl_bootstrap.S + $(CC) $(ENCL_CFLAGS) -T $^ -o $@ + +$(OUTPUT)/encl.ss: $(OUTPUT)/encl.bin + $(OUTPUT)/sgxsign signing_key.pem $(OUTPUT)/encl.bin $(OUTPUT)/encl.ss + +$(OUTPUT)/sgxsign: sgxsign.c + $(CC) $(INCLUDES) -o $@ $< -lcrypto + +EXTRA_CLEAN := \ + $(OUTPUT)/encl.bin \ + $(OUTPUT)/encl.elf \ + $(OUTPUT)/encl.ss \ + $(OUTPUT)/sgx_call.o \ + $(OUTPUT)/sgxsign \ + $(OUTPUT)/test_sgx \ + $(OUTPUT)/test_sgx.o \ + +.PHONY: clean diff --git a/tools/testing/selftests/x86/sgx/defines.h b/tools/testing/selftests/x86/sgx/defines.h new file mode 100644 index 000000000000..87264f85cb9f --- /dev/null +++ b/tools/testing/selftests/x86/sgx/defines.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright(c) 2016-19 Intel Corporation. + */ + +#ifndef DEFINES_H +#define DEFINES_H + +#include <stdint.h> + +#define __aligned(x) __attribute__((__aligned__(x))) +#define __packed __attribute__((packed)) + +#include "../../../../../arch/x86/kernel/cpu/sgx/arch.h" +#include "../../../../../arch/x86/include/uapi/asm/sgx.h" + +#endif /* DEFINES_H */ diff --git a/tools/testing/selftests/x86/sgx/encl.c b/tools/testing/selftests/x86/sgx/encl.c new file mode 100644 index 000000000000..ede915399742 --- /dev/null +++ b/tools/testing/selftests/x86/sgx/encl.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +// Copyright(c) 2016-18 Intel Corporation. + +#include <stddef.h> +#include "defines.h" + +static void *memcpy(void *dest, const void *src, size_t n) +{ + size_t i; + + for (i = 0; i < n; i++) + ((char *)dest)[i] = ((char *)src)[i]; + + return dest; +} + +void encl_body(void *rdi, void *rsi) +{ + memcpy(rsi, rdi, 8); +} diff --git a/tools/testing/selftests/x86/sgx/encl.lds b/tools/testing/selftests/x86/sgx/encl.lds new file mode 100644 index 000000000000..9a56d3064104 --- /dev/null +++ b/tools/testing/selftests/x86/sgx/encl.lds @@ -0,0 +1,34 @@ +OUTPUT_FORMAT(elf64-x86-64) + +SECTIONS +{ + . = 0; + .tcs : { + *(.tcs*) + } + + . = ALIGN(4096); + .text : { + *(.text*) + *(.rodata*) + } + + . = ALIGN(4096); + .data : { + *(.data*) + } + + /DISCARD/ : { + *(.data*) + *(.comment*) + *(.note*) + *(.debug*) + *(.eh_frame*) + } +} + +ASSERT(!DEFINED(.altinstructions), "ALTERNATIVES are not supported in enclaves") +ASSERT(!DEFINED(.altinstr_replacement), "ALTERNATIVES are not supported in enclaves") +ASSERT(!DEFINED(.discard.retpoline_safe), "RETPOLINE ALTERNATIVES are not supported in enclaves") +ASSERT(!DEFINED(.discard.nospec), "RETPOLINE ALTERNATIVES are not supported in enclaves") +ASSERT(!DEFINED(.got.plt), "Libcalls are not supported in enclaves") diff --git a/tools/testing/selftests/x86/sgx/encl_bootstrap.S b/tools/testing/selftests/x86/sgx/encl_bootstrap.S new file mode 100644 index 000000000000..d07f970ccdf9 --- /dev/null +++ b/tools/testing/selftests/x86/sgx/encl_bootstrap.S @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +/* + * Copyright(c) 2016-18 Intel Corporation. + */ + + .macro ENCLU + .byte 0x0f, 0x01, 0xd7 + .endm + + .section ".tcs", "a" + .balign 4096 + + .fill 1, 8, 0 # STATE (set by CPU) + .fill 1, 8, 0 # FLAGS + .quad encl_ssa # OSSA + .fill 1, 4, 0 # CSSA (set by CPU) + .fill 1, 4, 1 # NSSA + .quad encl_entry # OENTRY + .fill 1, 8, 0 # AEP (set by EENTER and ERESUME) + .fill 1, 8, 0 # OFSBASE + .fill 1, 8, 0 # OGSBASE + .fill 1, 4, 0xFFFFFFFF # FSLIMIT + .fill 1, 4, 0xFFFFFFFF # GSLIMIT + .fill 4024, 1, 0 # Reserved + + .text + +encl_entry: + # RBX contains the base address for TCS, which is also the first address + # inside the enclave. By adding the value of le_stack_end to it, we get + # the absolute address for the stack. + lea (encl_stack)(%rbx), %rax + xchg %rsp, %rax + push %rax + + push %rcx # push the address after EENTER + push %rbx # push the enclave base address + + call encl_body + + pop %rbx # pop the enclave base address + + # Restore XSAVE registers to a synthetic state. + mov $0xFFFFFFFF, %rax + mov $0xFFFFFFFF, %rdx + lea (xsave_area)(%rbx), %rdi + fxrstor (%rdi) + + # Clear GPRs. + xor %rcx, %rcx + xor %rdx, %rdx + xor %rdi, %rdi + xor %rsi, %rsi + xor %r8, %r8 + xor %r9, %r9 + xor %r10, %r10 + xor %r11, %r11 + xor %r12, %r12 + xor %r13, %r13 + xor %r14, %r14 + xor %r15, %r15 + + # Reset status flags. + add %rdx, %rdx # OF = SF = AF = CF = 0; ZF = PF = 1 + + # Prepare EEXIT target by popping the address of the instruction after + # EENTER to RBX. + pop %rbx + + # Restore the caller stack. + pop %rax + mov %rax, %rsp + + # EEXIT + mov $4, %rax + enclu + + .section ".data", "aw" + +encl_ssa: + .space 4096 + +xsave_area: + .fill 1, 4, 0x037F # FCW + .fill 5, 4, 0 + .fill 1, 4, 0x1F80 # MXCSR + .fill 1, 4, 0xFFFF # MXCSR_MASK + .fill 123, 4, 0 + .fill 1, 4, 0x80000000 # XCOMP_BV[63] = 1, compaction mode + .fill 12, 4, 0 + + .balign 4096 + .space 8192 +encl_stack: diff --git a/tools/testing/selftests/x86/sgx/main.c b/tools/testing/selftests/x86/sgx/main.c new file mode 100644 index 000000000000..48ed5fdfb3cb --- /dev/null +++ b/tools/testing/selftests/x86/sgx/main.c @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +// Copyright(c) 2016-18 Intel Corporation. + +#include <elf.h> +#include <errno.h> +#include <fcntl.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include "defines.h" +#include "sgx_call.h" + +#define PAGE_SIZE 4096 + +static const uint64_t MAGIC = 0x1122334455667788ULL; + +static bool encl_create(int dev_fd, unsigned long bin_size, + struct sgx_secs *secs) +{ + struct sgx_enclave_create ioc; + void *area; + int rc; + + memset(secs, 0, sizeof(*secs)); + secs->ssa_frame_size = 1; + secs->attributes = SGX_ATTR_MODE64BIT; + secs->xfrm = 3; + + for (secs->size = 4096; secs->size < bin_size; ) + secs->size <<= 1; + + area = mmap(NULL, secs->size * 2, PROT_NONE, MAP_SHARED, dev_fd, 0); + if (area == MAP_FAILED) { + perror("mmap"); + return false; + } + + secs->base = ((uint64_t)area + secs->size - 1) & ~(secs->size - 1); + + munmap(area, secs->base - (uint64_t)area); + munmap((void *)(secs->base + secs->size), + (uint64_t)area + secs->size - secs->base); + + ioc.src = (unsigned long)secs; + rc = ioctl(dev_fd, SGX_IOC_ENCLAVE_CREATE, &ioc); + if (rc) { + fprintf(stderr, "ECREATE failed rc=%d, err=%d.\n", rc, errno); + munmap((void *)secs->base, secs->size); + return false; + } + + return true; +} + +static bool encl_add_pages(int dev_fd, unsigned long offset, void *data, + unsigned long length, uint64_t flags) +{ + struct sgx_enclave_add_pages ioc; + struct sgx_secinfo secinfo; + int rc; + + memset(&secinfo, 0, sizeof(secinfo)); + secinfo.flags = flags; + + ioc.src = (uint64_t)data; + ioc.offset = offset; + ioc.length = length; + ioc.secinfo = (unsigned long)&secinfo; + ioc.flags = SGX_PAGE_MEASURE; + + rc = ioctl(dev_fd, SGX_IOC_ENCLAVE_ADD_PAGES, &ioc); + if (rc) { + fprintf(stderr, "EADD failed rc=%d.\n", rc); + return false; + } + + if (ioc.count != ioc.length) { + fprintf(stderr, "Partially processed, update the test.\n"); + return false; + } + + return true; +} + +#define SGX_REG_PAGE_FLAGS \ + (SGX_SECINFO_REG | SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_X) + +static bool encl_build(struct sgx_secs *secs, void *bin, + unsigned long bin_size, struct sgx_sigstruct *sigstruct) +{ + struct sgx_enclave_init ioc; + void *addr; + int dev_fd; + int rc; + + dev_fd = open("/dev/sgx/enclave", O_RDWR); + if (dev_fd < 0) { + fprintf(stderr, "Unable to open /dev/sgx\n"); + return false; + } + + if (!encl_create(dev_fd, bin_size, secs)) + goto out_dev_fd; + + if (!encl_add_pages(dev_fd, 0, bin, PAGE_SIZE, SGX_SECINFO_TCS)) + goto out_dev_fd; + + if (!encl_add_pages(dev_fd, PAGE_SIZE, bin + PAGE_SIZE, + bin_size - PAGE_SIZE, SGX_REG_PAGE_FLAGS)) + goto out_dev_fd; + + ioc.sigstruct = (uint64_t)sigstruct; + rc = ioctl(dev_fd, SGX_IOC_ENCLAVE_INIT, &ioc); + if (rc) { + printf("EINIT failed rc=%d\n", rc); + goto out_map; + } + + addr = mmap((void *)secs->base, PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, dev_fd, 0); + if (addr == MAP_FAILED) { + fprintf(stderr, "mmap() failed on TCS, errno=%d.\n", errno); + return false; + } + + addr = mmap((void *)(secs->base + PAGE_SIZE), bin_size - PAGE_SIZE, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_SHARED | MAP_FIXED, dev_fd, 0); + if (addr == MAP_FAILED) { + fprintf(stderr, "mmap() failed, errno=%d.\n", errno); + return false; + } + + close(dev_fd); + return true; +out_map: + munmap((void *)secs->base, secs->size); +out_dev_fd: + close(dev_fd); + return false; +} + +bool get_file_size(const char *path, off_t *bin_size) +{ + struct stat sb; + int ret; + + ret = stat(path, &sb); + if (ret) { + perror("stat"); + return false; + } + + if (!sb.st_size || sb.st_size & 0xfff) { + fprintf(stderr, "Invalid blob size %lu\n", sb.st_size); + return false; + } + + *bin_size = sb.st_size; + return true; +} + +bool encl_data_map(const char *path, void **bin, off_t *bin_size) +{ + int fd; + + fd = open(path, O_RDONLY); + if (fd == -1) { + fprintf(stderr, "open() %s failed, errno=%d.\n", path, errno); + return false; + } + + if (!get_file_size(path, bin_size)) + goto err_out; + + *bin = mmap(NULL, *bin_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (*bin == MAP_FAILED) { + fprintf(stderr, "mmap() %s failed, errno=%d.\n", path, errno); + goto err_out; + } + + close(fd); + return true; + +err_out: + close(fd); + return false; +} + +bool load_sigstruct(const char *path, void *sigstruct) +{ + int fd; + + fd = open(path, O_RDONLY); + if (fd == -1) { + fprintf(stderr, "open() %s failed, errno=%d.\n", path, errno); + return false; + } + + if (read(fd, sigstruct, sizeof(struct sgx_sigstruct)) != + sizeof(struct sgx_sigstruct)) { + fprintf(stderr, "read() %s failed, errno=%d.\n", path, errno); + close(fd); + return false; + } + + close(fd); + return true; +} + +int main(int argc, char *argv[], char *envp[]) +{ + struct sgx_sigstruct sigstruct; + struct sgx_secs secs; + uint64_t result = 0; + off_t bin_size; + void *bin; + + if (!encl_data_map("encl.bin", &bin, &bin_size)) + exit(1); + + if (!load_sigstruct("encl.ss", &sigstruct)) + exit(1); + + if (!encl_build(&secs, bin, bin_size, &sigstruct)) + exit(1); + + printf("Input: 0x%lx\n", MAGIC); + + sgx_call_eenter((void *)&MAGIC, &result, (void *)secs.base); + if (result != MAGIC) { + fprintf(stderr, "0x%lx != 0x%lx\n", result, MAGIC); + exit(1); + } + + printf("Output: 0x%lx\n", result); + + exit(0); +} diff --git a/tools/testing/selftests/x86/sgx/sgx_call.S b/tools/testing/selftests/x86/sgx/sgx_call.S new file mode 100644 index 000000000000..ca4c7893f9d9 --- /dev/null +++ b/tools/testing/selftests/x86/sgx/sgx_call.S @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +/** +* Copyright(c) 2016-18 Intel Corporation. +*/ + + .text + + .macro ENCLU + .byte 0x0f, 0x01, 0xd7 + .endm + + .text + + .global sgx_call_eenter +sgx_call_eenter: + push %rbx + mov $0x02, %rax + mov %rdx, %rbx + lea sgx_async_exit(%rip), %rcx +sgx_async_exit: + ENCLU + pop %rbx + ret diff --git a/tools/testing/selftests/x86/sgx/sgx_call.h b/tools/testing/selftests/x86/sgx/sgx_call.h new file mode 100644 index 000000000000..bf72068ada23 --- /dev/null +++ b/tools/testing/selftests/x86/sgx/sgx_call.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright(c) 2016-19 Intel Corporation. + */ + +#ifndef SGX_CALL_H +#define SGX_CALL_H + +void sgx_call_eenter(void *rdi, void *rsi, void *entry); + +#endif /* SGX_CALL_H */ diff --git a/tools/testing/selftests/x86/sgx/sgxsign.c b/tools/testing/selftests/x86/sgx/sgxsign.c new file mode 100644 index 000000000000..3d9007af40c9 --- /dev/null +++ b/tools/testing/selftests/x86/sgx/sgxsign.c @@ -0,0 +1,493 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +// Copyright(c) 2016-18 Intel Corporation. + +#define _GNU_SOURCE +#include <getopt.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> +#include <openssl/err.h> +#include <openssl/pem.h> +#include "defines.h" + +struct sgx_sigstruct_payload { + struct sgx_sigstruct_header header; + struct sgx_sigstruct_body body; +}; + +static bool check_crypto_errors(void) +{ + int err; + bool had_errors = false; + const char *filename; + int line; + char str[256]; + + for ( ; ; ) { + if (ERR_peek_error() == 0) + break; + + had_errors = true; + err = ERR_get_error_line(&filename, &line); + ERR_error_string_n(err, str, sizeof(str)); + fprintf(stderr, "crypto: %s: %s:%d\n", str, filename, line); + } + + return had_errors; +} + +static void exit_usage(const char *program) +{ + fprintf(stderr, + "Usage: %s/sign-le <key> <enclave> <sigstruct>\n", program); + exit(1); +} + +static inline const BIGNUM *get_modulus(RSA *key) +{ +#if OPENSSL_VERSION_NUMBER < 0x10100000L + return key->n; +#else + const BIGNUM *n; + + RSA_get0_key(key, &n, NULL, NULL); + return n; +#endif +} + +static RSA *load_sign_key(const char *path) +{ + FILE *f; + RSA *key; + + f = fopen(path, "rb"); + if (!f) { + fprintf(stderr, "Unable to open %s\n", path); + return NULL; + } + key = RSA_new(); + if (!PEM_read_RSAPrivateKey(f, &key, NULL, NULL)) + return NULL; + fclose(f); + + if (BN_num_bytes(get_modulus(key)) != SGX_MODULUS_SIZE) { + fprintf(stderr, "Invalid key size %d\n", + BN_num_bytes(get_modulus(key))); + RSA_free(key); + return NULL; + } + + return key; +} + +static void reverse_bytes(void *data, int length) +{ + int i = 0; + int j = length - 1; + uint8_t temp; + uint8_t *ptr = data; + + while (i < j) { + temp = ptr[i]; + ptr[i] = ptr[j]; + ptr[j] = temp; + i++; + j--; + } +} + +enum mrtags { + MRECREATE = 0x0045544145524345, + MREADD = 0x0000000044444145, + MREEXTEND = 0x00444E4554584545, +}; + +static bool mrenclave_update(EVP_MD_CTX *ctx, const void *data) +{ + if (!EVP_DigestUpdate(ctx, data, 64)) { + fprintf(stderr, "digest update failed\n"); + return false; + } + + return true; +} + +static bool mrenclave_commit(EVP_MD_CTX *ctx, uint8_t *mrenclave) +{ + unsigned int size; + + if (!EVP_DigestFinal_ex(ctx, (unsigned char *)mrenclave, &size)) { + fprintf(stderr, "digest commit failed\n"); + return false; + } + + if (size != 32) { + fprintf(stderr, "invalid digest size = %u\n", size); + return false; + } + + return true; +} + +struct mrecreate { + uint64_t tag; + uint32_t ssaframesize; + uint64_t size; + uint8_t reserved[44]; +} __attribute__((__packed__)); + + +static bool mrenclave_ecreate(EVP_MD_CTX *ctx, uint64_t blob_size) +{ + struct mrecreate mrecreate; + uint64_t encl_size; + + for (encl_size = 0x1000; encl_size < blob_size; ) + encl_size <<= 1; + + memset(&mrecreate, 0, sizeof(mrecreate)); + mrecreate.tag = MRECREATE; + mrecreate.ssaframesize = 1; + mrecreate.size = encl_size; + + if (!EVP_DigestInit_ex(ctx, EVP_sha256(), NULL)) + return false; + + return mrenclave_update(ctx, &mrecreate); +} + +struct mreadd { + uint64_t tag; + uint64_t offset; + uint64_t flags; /* SECINFO flags */ + uint8_t reserved[40]; +} __attribute__((__packed__)); + +static bool mrenclave_eadd(EVP_MD_CTX *ctx, uint64_t offset, uint64_t flags) +{ + struct mreadd mreadd; + + memset(&mreadd, 0, sizeof(mreadd)); + mreadd.tag = MREADD; + mreadd.offset = offset; + mreadd.flags = flags; + + return mrenclave_update(ctx, &mreadd); +} + +struct mreextend { + uint64_t tag; + uint64_t offset; + uint8_t reserved[48]; +} __attribute__((__packed__)); + +static bool mrenclave_eextend(EVP_MD_CTX *ctx, uint64_t offset, uint8_t *data) +{ + struct mreextend mreextend; + int i; + + for (i = 0; i < 0x1000; i += 0x100) { + memset(&mreextend, 0, sizeof(mreextend)); + mreextend.tag = MREEXTEND; + mreextend.offset = offset + i; + + if (!mrenclave_update(ctx, &mreextend)) + return false; + + if (!mrenclave_update(ctx, &data[i + 0x00])) + return false; + + if (!mrenclave_update(ctx, &data[i + 0x40])) + return false; + + if (!mrenclave_update(ctx, &data[i + 0x80])) + return false; + + if (!mrenclave_update(ctx, &data[i + 0xC0])) + return false; + } + + return true; +} + +/** + * measure_encl - measure enclave + * @path: path to the enclave + * @mrenclave: measurement + * + * Calculates MRENCLAVE. Assumes that the very first page is a TCS page and + * following pages are regular pages. Does not measure the contents of the + * enclave as the signing tool is used at the moment only for the launch + * enclave, which is pass-through (everything gets a token). + */ +static bool measure_encl(const char *path, uint8_t *mrenclave) +{ + FILE *file; + struct stat sb; + EVP_MD_CTX *ctx; + uint64_t flags; + uint64_t offset; + uint8_t data[0x1000]; + int rc; + + ctx = EVP_MD_CTX_create(); + if (!ctx) + return false; + + file = fopen(path, "rb"); + if (!file) { + perror("fopen"); + EVP_MD_CTX_destroy(ctx); + return false; + } + + rc = stat(path, &sb); + if (rc) { + perror("stat"); + goto out; + } + + if (!sb.st_size || sb.st_size & 0xfff) { + fprintf(stderr, "Invalid blob size %lu\n", sb.st_size); + goto out; + } + + if (!mrenclave_ecreate(ctx, sb.st_size)) + goto out; + + for (offset = 0; offset < sb.st_size; offset += 0x1000) { + if (!offset) + flags = SGX_SECINFO_TCS; + else + flags = SGX_SECINFO_REG | SGX_SECINFO_R | + SGX_SECINFO_W | SGX_SECINFO_X; + + if (!mrenclave_eadd(ctx, offset, flags)) + goto out; + + rc = fread(data, 1, 0x1000, file); + if (!rc) + break; + if (rc < 0x1000) + goto out; + + if (!mrenclave_eextend(ctx, offset, data)) + goto out; + } + + if (!mrenclave_commit(ctx, mrenclave)) + goto out; + + fclose(file); + EVP_MD_CTX_destroy(ctx); + return true; +out: + fclose(file); + EVP_MD_CTX_destroy(ctx); + return false; +} + +/** + * sign_encl - sign enclave + * @sigstruct: pointer to SIGSTRUCT + * @key: 3072-bit RSA key + * @signature: byte array for the signature + * + * Calculates EMSA-PKCSv1.5 signature for the given SIGSTRUCT. The result is + * stored in big-endian format so that it can be further passed to OpenSSL + * libcrypto functions. + */ +static bool sign_encl(const struct sgx_sigstruct *sigstruct, RSA *key, + uint8_t *signature) +{ + struct sgx_sigstruct_payload payload; + unsigned int siglen; + uint8_t digest[SHA256_DIGEST_LENGTH]; + bool ret; + + memcpy(&payload.header, &sigstruct->header, sizeof(sigstruct->header)); + memcpy(&payload.body, &sigstruct->body, sizeof(sigstruct->body)); + + SHA256((unsigned char *)&payload, sizeof(payload), digest); + + ret = RSA_sign(NID_sha256, digest, SHA256_DIGEST_LENGTH, signature, + &siglen, key); + + return ret; +} + +struct q1q2_ctx { + BN_CTX *bn_ctx; + BIGNUM *m; + BIGNUM *s; + BIGNUM *q1; + BIGNUM *qr; + BIGNUM *q2; +}; + +static void free_q1q2_ctx(struct q1q2_ctx *ctx) +{ + BN_CTX_free(ctx->bn_ctx); + BN_free(ctx->m); + BN_free(ctx->s); + BN_free(ctx->q1); + BN_free(ctx->qr); + BN_free(ctx->q2); +} + +static bool alloc_q1q2_ctx(const uint8_t *s, const uint8_t *m, + struct q1q2_ctx *ctx) +{ + ctx->bn_ctx = BN_CTX_new(); + ctx->s = BN_bin2bn(s, SGX_MODULUS_SIZE, NULL); + ctx->m = BN_bin2bn(m, SGX_MODULUS_SIZE, NULL); + ctx->q1 = BN_new(); + ctx->qr = BN_new(); + ctx->q2 = BN_new(); + + if (!ctx->bn_ctx || !ctx->s || !ctx->m || !ctx->q1 || !ctx->qr || + !ctx->q2) { + free_q1q2_ctx(ctx); + return false; + } + + return true; +} + +static bool calc_q1q2(const uint8_t *s, const uint8_t *m, uint8_t *q1, + uint8_t *q2) +{ + struct q1q2_ctx ctx; + + if (!alloc_q1q2_ctx(s, m, &ctx)) { + fprintf(stderr, "Not enough memory for Q1Q2 calculation\n"); + return false; + } + + if (!BN_mul(ctx.q1, ctx.s, ctx.s, ctx.bn_ctx)) + goto out; + + if (!BN_div(ctx.q1, ctx.qr, ctx.q1, ctx.m, ctx.bn_ctx)) + goto out; + + if (BN_num_bytes(ctx.q1) > SGX_MODULUS_SIZE) { + fprintf(stderr, "Too large Q1 %d bytes\n", + BN_num_bytes(ctx.q1)); + goto out; + } + + if (!BN_mul(ctx.q2, ctx.s, ctx.qr, ctx.bn_ctx)) + goto out; + + if (!BN_div(ctx.q2, NULL, ctx.q2, ctx.m, ctx.bn_ctx)) + goto out; + + if (BN_num_bytes(ctx.q2) > SGX_MODULUS_SIZE) { + fprintf(stderr, "Too large Q2 %d bytes\n", + BN_num_bytes(ctx.q2)); + goto out; + } + + BN_bn2bin(ctx.q1, q1); + BN_bn2bin(ctx.q2, q2); + + free_q1q2_ctx(&ctx); + return true; +out: + free_q1q2_ctx(&ctx); + return false; +} + +static bool save_sigstruct(const struct sgx_sigstruct *sigstruct, + const char *path) +{ + FILE *f = fopen(path, "wb"); + + if (!f) { + fprintf(stderr, "Unable to open %s\n", path); + return false; + } + + fwrite(sigstruct, sizeof(*sigstruct), 1, f); + fclose(f); + return true; +} + +int main(int argc, char **argv) +{ + uint64_t header1[2] = {0x000000E100000006, 0x0000000000010000}; + uint64_t header2[2] = {0x0000006000000101, 0x0000000100000060}; + struct sgx_sigstruct ss; + const char *program; + int opt; + RSA *sign_key; + + memset(&ss, 0, sizeof(ss)); + ss.header.header1[0] = header1[0]; + ss.header.header1[1] = header1[1]; + ss.header.header2[0] = header2[0]; + ss.header.header2[1] = header2[1]; + ss.exponent = 3; + +#ifndef CONFIG_EINITTOKENKEY + ss.body.attributes = SGX_ATTR_MODE64BIT; +#else + ss.body.attributes = SGX_ATTR_MODE64BIT | SGX_ATTR_EINITTOKENKEY; +#endif + ss.body.xfrm = 3, + + program = argv[0]; + + do { + opt = getopt(argc, argv, ""); + switch (opt) { + case -1: + break; + default: + exit_usage(program); + } + } while (opt != -1); + + argc -= optind; + argv += optind; + + if (argc < 3) + exit_usage(program); + + /* sanity check only */ + if (check_crypto_errors()) + exit(1); + + sign_key = load_sign_key(argv[0]); + if (!sign_key) + goto out; + + BN_bn2bin(get_modulus(sign_key), ss.modulus); + + if (!measure_encl(argv[1], ss.body.mrenclave)) + goto out; + + if (!sign_encl(&ss, sign_key, ss.signature)) + goto out; + + if (!calc_q1q2(ss.signature, ss.modulus, ss.q1, ss.q2)) + goto out; + + /* convert to little endian */ + reverse_bytes(ss.signature, SGX_MODULUS_SIZE); + reverse_bytes(ss.modulus, SGX_MODULUS_SIZE); + reverse_bytes(ss.q1, SGX_MODULUS_SIZE); + reverse_bytes(ss.q2, SGX_MODULUS_SIZE); + + if (!save_sigstruct(&ss, argv[2])) + goto out; + exit(0); +out: + check_crypto_errors(); + exit(1); +} diff --git a/tools/testing/selftests/x86/sgx/signing_key.pem b/tools/testing/selftests/x86/sgx/signing_key.pem new file mode 100644 index 000000000000..d76f21f19187 --- /dev/null +++ b/tools/testing/selftests/x86/sgx/signing_key.pem @@ -0,0 +1,39 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIG4wIBAAKCAYEApalGbq7Q+usM91CPtksu3D+b0Prc8gAFL6grM3mg85A5Bx8V +cfMXPgtrw8EYFwQxDAvzZWwl+9VfOX0ECrFRBkOHcOiG0SnADN8+FLj1UiNUQwbp +S6OzhNWuRcSbGraSOyUlVlV0yMQSvewyzGklOaXBe30AJqzIBc8QfdSxKuP8rs0Z +ga6k/Bl73osrYKByILJTUUeZqjLERsE6GebsdzbWgKn8qVqng4ZS4yMNg6LeRlH3 ++9CIPgg4jwpSLHcp7dq2qTIB9a0tGe9ayp+5FbucpB6U7ePold0EeRN6RlJGDF9k +L93v8P5ykz5G5gYZ2g0K1X2sHIWV4huxPgv5PXgdyQYbK+6olqj0d5rjYuwX57Ul +k6SroPS1U6UbdCjG5txM+BNGU0VpD0ZhrIRw0leQdnNcCO9sTJuInZrgYacSVJ7u +mtB+uCt+uzUesc+l+xPRYA+9e14lLkZp7AAmo9FvL816XDI09deehJ3i/LmHKCRN +tuqC5TprRjFwUr6dAgEDAoIBgG5w2Z8fNfycs0+LCnmHdJLVEotR6KFVWMpwHMz7 +wKJgJgS/Y6FMuilc8oKAuroCy11dTO5IGVKOP3uorVx2NgQtBPXwWeDGgAiU1A3Q +o4wXjYIEm4fCd63jyYPYZ2ckYXzDbjmOTdstYdPyzIhGGNEZK6eoqsRzMAPfYFPj +IMdCqHSIu6vJw1K7p+myHOsVoWshjODaZnF3LYSA0WaZ8vokjwBxUxuRxQJZjJds +s60XPtmL+qfgWtQFewoG4XL6GuD8FcXccynRRtzrLtFNPIl9BQfWfjBBhTC1/Te1 +0Z6XbZvpdUTD9OfLB7SbR2OUFNpKQgriO0iYVdbW3cr7uu38Zwp4W1TX73DPjoi6 +KNooP6SGWd4mRJW2+dUmSYS4QNG8eVVZswKcploEIXlAKRsOe4kzJJ1iETugIe85 +uX8nd1WYEp65xwoRUg8hqng0MeyveVbXqNKuJG6tzNDt9kgFYo+hmC/oouAW2Dtc +T9jdRAwKJXqA2Eg6OkgXCEv+kwKBwQDYaQiFMlFhsmLlqI+EzCUh7c941/cL7m6U +7j98+8ngl0HgCEcrc10iJVCKakQW3YbPzAx3XkKTaGjWazvvrFarXIGlOud64B8a +iWyQ7VdlnmZnNEdk+C83tI91OQeaTKqRLDGzKh29Ry/jL8Pcbazt+kDgxa0H7qJp +roADUanLQuNkYubpbhFBh3xpa2EExaVq6rF7nIVsD8W9TrbmPKA4LgH7z0iy544D +kVCNYsTjYDdUWP+WiSor8kCnnpjnN9sCgcEAw/eNezUD1UDf6OYFC9+5JZJFn4Tg +mZMyN93JKIb199ffwnjtHUSjcyiWeesXucpzwtGbTcwQnDisSW4oneYKLSEBlBaq +scqiUugyGZZOthFSCbdXYXMViK2vHrKlkse7GxVlROKcEhM/pRBrmjaGO8eWR+D4 +FO2wCXzVs3KgV6j779frw0vC54oHOxc9+Lu1rSHp4i+600koyvL/zF6U/5tZXIvN +YW2yoiQJnjCmVA1pwbwV6KAUTPDTMnBK+YjnAoHBAJBGBa4hi5Z27JkbCliIGMFJ +NPs6pLKe9GNJf6in2+sPgUAFhMeiPhbDiwbxgrnpBIqICE+ULGJFmzmc0p/IOceT +ARjR76dAFLxbnbXzj5kURETNhO36yiUjCk4mBRGIcbYddndxaSjaH+zKgpLzyJ6m +1esuc1qfFvEfAAI2cTIsl5hB70ZJYNZaUvDyQK3ZGPHxy6e9rkgKg9OJz0QoatAe +q/002yHvtAJg4F5B2JeVejg7VQ8GHB1MKxppu0TP5wKBwQCCpQj8zgKOKz/wmViy +lSYZDC5qWJW7t3bP6TDFr06lOpUsUJ4TgxeiGw778g/RMaKB4RIz3WBoJcgw9BsT +7rFza1ZiucchMcGMmswRDt8kC4wGejpA92Owc8oUdxkMhSdnY5jYlxK2t3/DYEe8 +JFl9L7mFQKVjSSAGUzkiTGrlG1Kf5UfXh9dFBq98uilQfSPIwUaWynyM23CHTKqI +Pw3/vOY9sojrnncWwrEUIG7is5vWfWPwargzSzd29YdRBe8CgcEAuRVewK/YeNOX +B7ZG6gKKsfsvrGtY7FPETzLZAHjoVXYNea4LVZ2kn4hBXXlvw/4HD+YqcTt4wmif +5JQlDvjNobUiKJZpzy7hklVhF7wZFl4pCF7Yh43q9iQ7gKTaeUG7MiaK+G8Zz8aY +HW9rsiihbdZkccMvnPfO9334XMxl3HtBRzLstjUlbLB7Sdh+7tZ3JQidCOFNs5pE +XyWwnASPu4tKfDahH1UUTp1uJcq/6716CSWg080avYxFcn75qqsb +-----END RSA PRIVATE KEY----- -- 2.20.1

5 years, 10 months

1
0
0 0

[PATCH v2 0/4] Implement FUTEX_WAIT_MULTIPLE operation

by André Almeida

Hello, This patchset implements a new futex operation, called FUTEX_WAIT_MULTIPLE, which allows a thread to wait on several futexes at the same time, and be awoken by any of them. The use case lies in the Wine implementation of the Windows NT interface WaitMultipleObjects. This Windows API function allows a thread to sleep waiting on the first of a set of event sources (mutexes, timers, signal, console input, etc) to signal. Considering this is a primitive synchronization operation for Windows applications, being able to quickly signal events on the producer side, and quickly go to sleep on the consumer side is essential for good performance of those running over Wine. Since this API exposes a mechanism to wait on multiple objects, and we might have multiple waiters for each of these events, a M->N relationship, the current Linux interfaces fell short on performance evaluation of large M,N scenarios. We experimented, for instance, with eventfd, which has performance problems discussed below, but we also experimented with userspace solutions, like making each consumer wait on a condition variable guarding the entire list of objects, and then waking up multiple variables on the producer side, but this is prohibitively expensive since we either need to signal many condition variables or share that condition variable among multiple waiters, and then verify for the event being signaled in userspace, which means dealing with often false positive wakes ups. The natural interface to implement the behavior we want, also considering that one of the waitable objects is a mutex itself, would be the futex interface. Therefore, this patchset proposes a mechanism for a thread to wait on multiple futexes at once, and wake up on the first futex that was awaken. In particular, using futexes in our Wine use case reduced the CPU utilization by 4% for the game Beat Saber and by 1.5% for the game Shadow of Tomb Raider, both running over Proton (a Wine based solution for Windows emulation), when compared to the eventfd interface. This implementation also doesn't rely of file descriptors, so it doesn't risk overflowing the resource. In time, we are also proposing modifications to glibc and libpthread to make this feature available for Linux native multithreaded applications using libpthread, which can benefit from the behavior of waiting on any of a group of futexes. Technically, the existing FUTEX_WAIT implementation can be easily reworked by using futex_wait_multiple() with a count of one, and I have a patch showing how it works. I'm not proposing it, since futex is such a tricky code, that I'd be more comfortable to have FUTEX_WAIT_MULTIPLE running upstream for a couple development cycles, before considering modifying FUTEX_WAIT. The patch series includes an extensive set of kselftests validating the behavior of the interface. We also implemented support[1] on Syzkaller and survived the fuzzy testing. Finally, if you'd rather pull directly a branch with this set you can find it here: https://gitlab.collabora.com/tonyk/linux/commits/futex-dev === Performance of eventfd === Polling on several eventfd contexts with semaphore semantics would provide us with the exact semantics we are looking for. However, as shown below, in a scenario with sufficient producers and consumers, the eventfd interface itself becomes a bottleneck, in particular because each thread will compete to acquire a sequence of waitqueue locks for each eventfd context in the poll list. In addition, in the uncontended case, where the producer is ready for consumption, eventfd still requires going into the kernel on the consumer side. When a write or a read operation in an eventfd file succeeds, it will try to wake up all threads that are waiting to perform some operation to the file. The lock (ctx->wqh.lock) that hold the access to the file value (ctx->count) is the same lock used to control access the waitqueue. When all those those thread woke, they will compete to get this lock. Along with that, the poll() also manipulates the waitqueue and need to hold this same lock. This lock is specially hard to acquire when poll() calls poll_freewait(), where it tries to free all waitqueues associated with this poll. While doing that, it will compete with a lot of read and write operations that have been waken. In our use case, with a huge number of parallel reads, writes and polls, this lock is a bottleneck and hurts the performance of applications. Our implementation of futex, however, decrease the calls of spin lock by more than 80% in some user applications. Finally, eventfd operates on file descriptors, which is a limited resource that has shown its limitation in our use cases. Despite the Windows interface not waiting on more than 64 objects at once, we still have multiple waiters at the same time, and we were easily able to exhaust the FD limits on applications like games. The RFC for this patch can be found here: https://lkml.org/lkml/2019/7/30/1399 Thanks, André [1] https://github.com/andrealmeid/syzkaller/tree/futex-wait-multiple Gabriel Krisman Bertazi (4): futex: Implement mechanism to wait on any of several futexes selftests: futex: Add FUTEX_WAIT_MULTIPLE timeout test selftests: futex: Add FUTEX_WAIT_MULTIPLE wouldblock test selftests: futex: Add FUTEX_WAIT_MULTIPLE wake up test include/uapi/linux/futex.h | 20 + kernel/futex.c | 356 +++++++++++++++++- .../selftests/futex/functional/.gitignore | 1 + .../selftests/futex/functional/Makefile | 3 +- .../futex/functional/futex_wait_multiple.c | 173 +++++++++ .../futex/functional/futex_wait_timeout.c | 38 +- .../futex/functional/futex_wait_wouldblock.c | 28 +- .../testing/selftests/futex/functional/run.sh | 3 + .../selftests/futex/include/futextest.h | 22 + 9 files changed, 635 insertions(+), 7 deletions(-) create mode 100644 tools/testing/selftests/futex/functional/futex_wait_multiple.c -- 2.25.0

5 years, 10 months

1
4
0 0

[PATCH] selftests: allow detection of build failures

by Jiri Benc

Commit 5f70bde26a48 ("selftests: fix build behaviour on targets' failures") added a logic to track failure of builds of individual targets. However, it does exactly the opposite of what a distro kernel needs: we create a RPM package with a selected set of selftests and we need the build to fail if build of any of the targets fail. Both use cases are valid. A distribution kernel is in control of what is included in the kernel and what is being built; any error needs to be flagged and acted upon. A CI system that tries to build as many tests as possible on the best effort basis is not really interested in a failure here and there. Support both use cases by introducing a FORCE_TARGETS variable. It is switched off by default to make life for CI systems easier, distributions can easily switch it on while building their packages. Reported-by: Yauheni Kaliuta <yauheni.kaliuta(a)redhat.com> Signed-off-by: Jiri Benc <jbenc(a)redhat.com> --- tools/testing/selftests/Makefile | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 5182d6078cbc..97fca70d2cd6 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -74,6 +74,12 @@ ifneq ($(SKIP_TARGETS),) override TARGETS := $(TMP) endif +# User can set FORCE_TARGETS to 1 to require all targets to be successfully +# built; make will fail if any of the targets cannot be built. If +# FORCE_TARGETS is not set (the default), make will succeed if at least one +# of the targets gets built. +FORCE_TARGETS ?= + # Clear LDFLAGS and MAKEFLAGS if called from main # Makefile to avoid test build failures when test # Makefile doesn't have explicit build rules. @@ -148,7 +154,8 @@ all: khdr for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ mkdir $$BUILD_TARGET -p; \ - $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET; \ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET \ + $(if $(FORCE_TARGETS),|| exit); \ ret=$$((ret * $$?)); \ done; exit $$ret; @@ -202,7 +209,8 @@ ifdef INSTALL_PATH @ret=1; \ for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ - $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install \ + $(if $(FORCE_TARGETS),|| exit); \ ret=$$((ret * $$?)); \ done; exit $$ret; -- 2.18.1

5 years, 10 months

2
1
0 0

[PATCH] selftests/vm: Add missed tests in run_vmtests

by sj38.park＠gmail.com

From: SeongJae Park <sjpark(a)amazon.de> The commits introducing 'mlock-random-test'[1], 'map_fiex_noreplace'[2], and 'thuge-gen'[3] have not added those in the 'run_vmtests' script and thus the 'run_tests' command of kselftests doesn't run those. This commit adds those in the script. 'gup_benchmark' and 'transhuge-stress' are also not included in the 'run_vmtests', but this commit does not add those because those are for performance measurement rather than pass/fail tests. [1] commit 26b4224d9961 ("selftests: expanding more mlock selftest") [2] commit 91cbacc34512 ("tools/testing/selftests/vm/map_fixed_noreplace.c: add test for MAP_FIXED_NOREPLACE") [3] commit fcc1f2d5dd34 ("selftests: add a test program for variable huge page sizes in mmap/shmget") Signed-off-by: SeongJae Park <sjpark(a)amazon.de> --- tools/testing/selftests/vm/run_vmtests | 33 ++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index a692ea828317..f33714843198 100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests @@ -112,6 +112,17 @@ echo "NOTE: The above hugetlb tests provide minimal coverage. Use" echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" echo " hugetlb regression testing." +echo "---------------------------" +echo "running map_fixed_noreplace" +echo "---------------------------" +./map_fixed_noreplace +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + echo "-------------------" echo "running userfaultfd" echo "-------------------" @@ -186,6 +197,17 @@ else echo "[PASS]" fi +echo "-------------------------" +echo "running mlock-random-test" +echo "-------------------------" +./mlock-random-test +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + echo "--------------------" echo "running mlock2-tests" echo "--------------------" @@ -197,6 +219,17 @@ else echo "[PASS]" fi +echo "-----------------" +echo "running thuge-gen" +echo "-----------------" +./thuge-gen +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + if [ $VADDR64 -ne 0 ]; then echo "-----------------------------" echo "running virtual_address_range" base-commit: d5226fa6dbae0569ee43ecfc08bdcd6770fc4755 -- 2.17.1

5 years, 10 months

1
0
0 0

[PATCH] selftests: fix too long argument

by Jiri Benc

With some shells, the command construed for install of bpf selftests becomes too large due to long list of files: make[1]: execvp: /bin/sh: Argument list too long make[1]: *** [../lib.mk:73: install] Error 127 Currently, each of the file lists is replicated three times in the command: in the shell 'if' condition, in the 'echo' and in the 'rsync'. Reduce that by one instance by using make conditionals and separate the echo and rsync into two shell commands. (One would be inclined to just remove the '@' at the beginning of the rsync command and let 'make' echo it by itself; unfortunately, it appears that the '@' in the front of mkdir silences output also for the following commands.) Also, separate handling of each of the lists to its own shell command. The semantics of the makefile is unchanged before and after the patch. The ability of individual test directories to override INSTALL_RULE is retained. Reported-by: Yauheni Kaliuta <yauheni.kaliuta(a)redhat.com> Tested-by: Yauheni Kaliuta <yauheni.kaliuta(a)redhat.com> Signed-off-by: Jiri Benc <jbenc(a)redhat.com> --- tools/testing/selftests/lib.mk | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 1c8a1963d03f..3ed0134a764d 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -83,17 +83,20 @@ else $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS)) endif +define INSTALL_SINGLE_RULE + $(if $(INSTALL_LIST),@mkdir -p $(INSTALL_PATH)) + $(if $(INSTALL_LIST),@echo rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/) + $(if $(INSTALL_LIST),@rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/) +endef + define INSTALL_RULE - @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \ - mkdir -p ${INSTALL_PATH}; \ - echo "rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/"; \ - rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/; \ - fi - @if [ "X$(TEST_GEN_PROGS)$(TEST_CUSTOM_PROGS)$(TEST_GEN_PROGS_EXTENDED)$(TEST_GEN_FILES)" != "X" ]; then \ - mkdir -p ${INSTALL_PATH}; \ - echo "rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/"; \ - rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/; \ - fi + $(eval INSTALL_LIST = $(TEST_PROGS)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_PROGS_EXTENDED)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_FILES)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_GEN_PROGS)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_CUSTOM_PROGS)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_GEN_PROGS_EXTENDED)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_GEN_FILES)) $(INSTALL_SINGLE_RULE) endef install: all -- 2.18.1

5 years, 10 months

1
0
0 0

[PATCH v4 00/12] mm/gup: track FOLL_PIN pages

by John Hubbard

Hi, Here's an update after the latest round of reviews from Kirill and Jan. There is a git repo and branch, for convenience in reviewing: git@github.com:johnhubbard/linux.git track_user_pages_v4 I'm particularly pleased that the /proc/vmstat items are now available in all configurations, seeing as how I was looking (in vain) for that information during a recent investigation of a remotely reported problem. ============================================================ Changes since v3: * Rebased onto latest linux.git * Added ACKs and reviewed-by's from Kirill Shutemov and Jan Kara. * /proc/vmstat: * Renamed items, after realizing that I hate the previous names: nr_foll_pin_requested --> nr_foll_pin_acquired nr_foll_pin_returned --> nr_foll_pin_released * Removed the CONFIG_DEBUG_VM guard, and collapsed away a wrapper routine: now just calls mod_node_page_state() directly. * Tweaked the WARN_ON_ONCE() statements in mm/hugetlb.c to be more informative, and added comments above them as well. * Fixed gup_benchmark: signed int --> unsigned long. * One or two minor formatting changes. ============================================================ Changes since v2: * Rebased onto linux.git, because the akpm tree for 5.6 has been merged. * Split the tracking patch into even more patches, as requested. * Merged Matthew Wilcox's dump_page() changes into mine, as part of the first patch. * Renamed: page_dma_pinned() --> page_maybe_dma_pinned(), in response to Kirill Shutemov's review. * Moved a WARN to the top of a routine, and fixed a typo in the commit description of patch #7, also as suggested by Kirill. ============================================================ Changes since v1: * Split the tracking patch into 6 smaller patches * Rebased onto today's linux-next/akpm (there weren't any conflicts). * Fixed an "unsigned int" vs. "int" problem in gup_benchmark, reported by Nathan Chancellor. (I don't see it in my local builds, probably because they use gcc, but an LLVM test found the mismatch.) * Fixed a huge page pincount problem (add/subtract vs. increment/decrement), spotted by Jan Kara. ============================================================ There is a reasonable case to be made for merging two of the patches (patches 7 and 8), given that patch 7 provides tracking that has upper limits on the number of pins that can be done with huge pages. Let me know if anyone wants those merged, but unless there is some weird chance of someone grabbing patch 7 and not patch 8, I don't really see the need. Meanwhile, it's easier to review in this form. Also, patch 3 has been revived. Earlier reviewers asked for it to be merged into the tracking patch (one cannot please everyone, heh), but now it's back out on it's own. This activates tracking of FOLL_PIN pages. This is in support of fixing the get_user_pages()+DMA problem described in [1]-[4]. FOLL_PIN support is now in the main linux tree. However, the patch to use FOLL_PIN to track pages was *not* submitted, because Leon saw an RDMA test suite failure that involved (I think) page refcount overflows when huge pages were used. This patch definitively solves that kind of overflow problem, by adding an exact pincount, for compound pages (of order > 1), in the 3rd struct page of a compound page. If available, that form of pincounting is used, instead of the GUP_PIN_COUNTING_BIAS approach. Thanks again to Jan Kara for that idea. Other interesting changes: * dump_page(): added one, or two new things to report for compound pages: head refcount (for all compound pages), and map_pincount (for compound pages of order > 1). * Documentation/core-api/pin_user_pages.rst: removed the "TODO" for the huge page refcount upper limit problems, and added notes about how it works now. Also added a note about the dump_page() enhancements. * Added some comments in gup.c and mm.h, to explain that there are two ways to count pinned pages: exact (for compound pages of order > 1) and fuzzy (GUP_PIN_COUNTING_BIAS: for all other pages). ============================================================ General notes about the tracking patch: This is a prerequisite to solving the problem of proper interactions between file-backed pages, and [R]DMA activities, as discussed in [1], [2], [3], [4] and in a remarkable number of email threads since about 2017. :) In contrast to earlier approaches, the page tracking can be incrementally applied to the kernel call sites that, until now, have been simply calling get_user_pages() ("gup"). In other words, opt-in by changing from this: get_user_pages() (sets FOLL_GET) put_page() to this: pin_user_pages() (sets FOLL_PIN) unpin_user_page() ============================================================ Next steps: * Convert more subsystems from get_user_pages() to pin_user_pages(). * Work with Ira and others to connect this all up with file system leases. [1] Some slow progress on get_user_pages() (Apr 2, 2019): https://lwn.net/Articles/784574/ [2] DMA and get_user_pages() (LPC: Dec 12, 2018): https://lwn.net/Articles/774411/ [3] The trouble with get_user_pages() (Apr 30, 2018): https://lwn.net/Articles/753027/ [4] LWN kernel index: get_user_pages() https://lwn.net/Kernel/Index/#Memory_management-get_user_pages John Hubbard (12): mm: dump_page(): better diagnostics for compound pages mm/gup: split get_user_pages_remote() into two routines mm/gup: pass a flags arg to __gup_device_* functions mm: introduce page_ref_sub_return() mm/gup: pass gup flags to two more routines mm/gup: require FOLL_GET for get_user_pages_fast() mm/gup: track FOLL_PIN pages mm/gup: page->hpage_pinned_refcount: exact pin counts for huge pages mm: dump_page(): better diagnostics for huge pinned pages mm/gup: /proc/vmstat: pin_user_pages (FOLL_PIN) reporting mm/gup_benchmark: support pin_user_pages() and related calls selftests/vm: run_vmtests: invoke gup_benchmark with basic FOLL_PIN coverage Documentation/core-api/pin_user_pages.rst | 59 ++- include/linux/mm.h | 108 ++++- include/linux/mm_types.h | 7 +- include/linux/mmzone.h | 2 + include/linux/page_ref.h | 9 + mm/debug.c | 61 ++- mm/gup.c | 448 ++++++++++++++++----- mm/gup_benchmark.c | 71 +++- mm/huge_memory.c | 29 +- mm/hugetlb.c | 60 ++- mm/page_alloc.c | 2 + mm/rmap.c | 6 + mm/vmstat.c | 2 + tools/testing/selftests/vm/gup_benchmark.c | 15 +- tools/testing/selftests/vm/run_vmtests | 22 + 15 files changed, 721 insertions(+), 180 deletions(-) -- 2.25.0

5 years, 10 months

3
20
0 0

[PATCH 1/3] kvm: mmu: Replace unsigned with unsigned int for PTE access

by Ben Gardon

There are several functions which pass an access permission mask for SPTEs as an unsigned. This works, but checkpatch complains about it. Switch the occurrences of unsigned to unsigned int to satisfy checkpatch. No functional change expected. Tested by running kvm-unit-tests on an Intel Haswell machine. This commit introduced no new failures. This commit can be viewed in Gerrit at: https://linux-review.googlesource.com/c/virt/kvm/kvm/+/2358 Signed-off-by: Ben Gardon <bgardon(a)google.com> Reviewed-by: Oliver Upton <oupton(a)google.com> --- arch/x86/kvm/mmu/mmu.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 84eeb61d06aa3..a9c593dec49bf 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -452,7 +452,7 @@ static u64 get_mmio_spte_generation(u64 spte) } static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, - unsigned access) + unsigned int access) { u64 gen = kvm_vcpu_memslots(vcpu)->generation & MMIO_SPTE_GEN_MASK; u64 mask = generation_mmio_spte_mask(gen); @@ -484,7 +484,7 @@ static unsigned get_mmio_spte_access(u64 spte) } static bool set_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, - kvm_pfn_t pfn, unsigned access) + kvm_pfn_t pfn, unsigned int access) { if (unlikely(is_noslot_pfn(pfn))) { mark_mmio_spte(vcpu, sptep, gfn, access); @@ -2475,7 +2475,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, gva_t gaddr, unsigned level, int direct, - unsigned access) + unsigned int access) { union kvm_mmu_page_role role; unsigned quadrant; @@ -2990,7 +2990,7 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn) #define SET_SPTE_NEED_REMOTE_TLB_FLUSH BIT(1) static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, - unsigned pte_access, int level, + unsigned int pte_access, int level, gfn_t gfn, kvm_pfn_t pfn, bool speculative, bool can_unsync, bool host_writable) { @@ -3081,9 +3081,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, return ret; } -static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, - int write_fault, int level, gfn_t gfn, kvm_pfn_t pfn, - bool speculative, bool host_writable) +static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, + unsigned int pte_access, int write_fault, int level, + gfn_t gfn, kvm_pfn_t pfn, bool speculative, + bool host_writable) { int was_rmapped = 0; int rmap_count; @@ -3165,7 +3166,7 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, { struct page *pages[PTE_PREFETCH_NUM]; struct kvm_memory_slot *slot; - unsigned access = sp->role.access; + unsigned int access = sp->role.access; int i, ret; gfn_t gfn; @@ -3400,7 +3401,8 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) } static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, - kvm_pfn_t pfn, unsigned access, int *ret_val) + kvm_pfn_t pfn, unsigned int access, + int *ret_val) { /* The pfn is invalid, report the error! */ if (unlikely(is_error_pfn(pfn))) { @@ -4005,7 +4007,7 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) if (is_mmio_spte(spte)) { gfn_t gfn = get_mmio_spte_gfn(spte); - unsigned access = get_mmio_spte_access(spte); + unsigned int access = get_mmio_spte_access(spte); if (!check_mmio_spte(vcpu, spte)) return RET_PF_INVALID; @@ -4349,7 +4351,7 @@ static void inject_page_fault(struct kvm_vcpu *vcpu, } static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, - unsigned access, int *nr_present) + unsigned int access, int *nr_present) { if (unlikely(is_mmio_spte(*sptep))) { if (gfn != get_mmio_spte_gfn(*sptep)) { -- 2.25.0.341.g760bfbb309-goog

5 years, 10 months

4
8
0 0

[PATCH v6 6/6] selftests/cgroup: add tests for cloning into cgroups

by Christian Brauner

Expand the cgroup test-suite to include tests for CLONE_INTO_CGROUP. This adds the following tests: - CLONE_INTO_CGROUP manages to clone a process directly into a correctly delegated cgroup - CLONE_INTO_CGROUP fails to clone a process into a cgroup that has been removed after we've opened an fd to it - CLONE_INTO_CGROUP fails to clone a process into an invalid domain cgroup - CLONE_INTO_CGROUP adheres to the no internal process constraint - CLONE_INTO_CGROUP works with the freezer feature Cc: Tejun Heo <tj(a)kernel.org> Cc: Shuah Khan <shuah(a)kernel.org> Cc: cgroups(a)vger.kernel.org Cc: linux-kselftest(a)vger.kernel.org Acked-by: Roman Gushchin <guro(a)fb.com> Signed-off-by: Christian Brauner <christian.brauner(a)ubuntu.com> --- /* v1 */ Link: https://lore.kernel.org/r/20191218173516.7875-4-christian.brauner@ubuntu.com /* v2 */ Link: https://lore.kernel.org/r/20191223061504.28716-4-christian.brauner@ubuntu.c… unchanged /* v3 */ Link: https://lore.kernel.org/r/20200117002143.15559-6-christian.brauner@ubuntu.c… unchanged /* v4 */ Link: https://lore.kernel.org/r/20200117181219.14542-7-christian.brauner@ubuntu.c… unchanged /* v5 */ Link: https://lore.kernel.org/r/20200121154844.411-7-christian.brauner@ubuntu.com unchanged - Christian Brauner <christian.brauner(a)ubuntu.com>: - add Acked-by: Roman Gushchin <guro(a)fb.com> /* v6 */ unchanged --- tools/testing/selftests/cgroup/Makefile | 6 +- tools/testing/selftests/cgroup/cgroup_util.c | 126 ++++++++++++++++++ tools/testing/selftests/cgroup/cgroup_util.h | 4 + tools/testing/selftests/cgroup/test_core.c | 64 +++++++++ .../selftests/clone3/clone3_selftests.h | 19 ++- 5 files changed, 214 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile index 66aafe1f5746..967f268fde74 100644 --- a/tools/testing/selftests/cgroup/Makefile +++ b/tools/testing/selftests/cgroup/Makefile @@ -11,6 +11,6 @@ TEST_GEN_PROGS += test_freezer include ../lib.mk -$(OUTPUT)/test_memcontrol: cgroup_util.c -$(OUTPUT)/test_core: cgroup_util.c -$(OUTPUT)/test_freezer: cgroup_util.c +$(OUTPUT)/test_memcontrol: cgroup_util.c ../clone3/clone3_selftests.h +$(OUTPUT)/test_core: cgroup_util.c ../clone3/clone3_selftests.h +$(OUTPUT)/test_freezer: cgroup_util.c ../clone3/clone3_selftests.h diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index 8f7131dcf1ff..8a637ca7d73a 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -15,6 +15,7 @@ #include <unistd.h> #include "cgroup_util.h" +#include "../clone3/clone3_selftests.h" static ssize_t read_text(const char *path, char *buf, size_t max_len) { @@ -331,12 +332,112 @@ int cg_run(const char *cgroup, } } +pid_t clone_into_cgroup(int cgroup_fd) +{ +#ifdef CLONE_ARGS_SIZE_VER2 + pid_t pid; + + struct clone_args args = { + .flags = CLONE_INTO_CGROUP, + .exit_signal = SIGCHLD, + .cgroup = cgroup_fd, + }; + + pid = sys_clone3(&args, sizeof(struct clone_args)); + /* + * Verify that this is a genuine test failure: + * ENOSYS -> clone3() not available + * E2BIG -> CLONE_INTO_CGROUP not available + */ + if (pid < 0 && (errno == ENOSYS || errno == E2BIG)) + goto pretend_enosys; + + return pid; + +pretend_enosys: +#endif + errno = ENOSYS; + return -ENOSYS; +} + +int clone_reap(pid_t pid, int options) +{ + int ret; + siginfo_t info = { + .si_signo = 0, + }; + +again: + ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD); + if (ret < 0) { + if (errno == EINTR) + goto again; + return -1; + } + + if (options & WEXITED) { + if (WIFEXITED(info.si_status)) + return WEXITSTATUS(info.si_status); + } + + if (options & WSTOPPED) { + if (WIFSTOPPED(info.si_status)) + return WSTOPSIG(info.si_status); + } + + if (options & WCONTINUED) { + if (WIFCONTINUED(info.si_status)) + return 0; + } + + return -1; +} + +int dirfd_open_opath(const char *dir) +{ + return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH); +} + +#define close_prot_errno(fd) \ + if (fd >= 0) { \ + int _e_ = errno; \ + close(fd); \ + errno = _e_; \ + } + +static int clone_into_cgroup_run_nowait(const char *cgroup, + int (*fn)(const char *cgroup, void *arg), + void *arg) +{ + int cgroup_fd; + pid_t pid; + + cgroup_fd = dirfd_open_opath(cgroup); + if (cgroup_fd < 0) + return -1; + + pid = clone_into_cgroup(cgroup_fd); + close_prot_errno(cgroup_fd); + if (pid == 0) + exit(fn(cgroup, arg)); + + return pid; +} + int cg_run_nowait(const char *cgroup, int (*fn)(const char *cgroup, void *arg), void *arg) { int pid; + pid = clone_into_cgroup_run_nowait(cgroup, fn, arg); + if (pid > 0) + return pid; + + /* Genuine test failure. */ + if (pid < 0 && errno != ENOSYS) + return -1; + pid = fork(); if (pid == 0) { char buf[64]; @@ -450,3 +551,28 @@ int proc_read_strstr(int pid, bool thread, const char *item, const char *needle) return strstr(buf, needle) ? 0 : -1; } + +int clone_into_cgroup_run_wait(const char *cgroup) +{ + int cgroup_fd; + pid_t pid; + + cgroup_fd = dirfd_open_opath(cgroup); + if (cgroup_fd < 0) + return -1; + + pid = clone_into_cgroup(cgroup_fd); + close_prot_errno(cgroup_fd); + if (pid < 0) + return -1; + + if (pid == 0) + exit(EXIT_SUCCESS); + + /* + * We don't care whether this fails. We only care whether the initial + * clone succeeded. + */ + (void)clone_reap(pid, WEXITED); + return 0; +} diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h index 49c54fbdb229..5a1305dd1f0b 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.h +++ b/tools/testing/selftests/cgroup/cgroup_util.h @@ -50,3 +50,7 @@ extern int cg_wait_for_proc_count(const char *cgroup, int count); extern int cg_killall(const char *cgroup); extern ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size); extern int proc_read_strstr(int pid, bool thread, const char *item, const char *needle); +extern pid_t clone_into_cgroup(int cgroup_fd); +extern int clone_reap(pid_t pid, int options); +extern int clone_into_cgroup_run_wait(const char *cgroup); +extern int dirfd_open_opath(const char *dir); diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index c5ca669feb2b..96e016ccafe0 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -25,8 +25,11 @@ static int test_cgcore_populated(const char *root) { int ret = KSFT_FAIL; + int err; char *cg_test_a = NULL, *cg_test_b = NULL; char *cg_test_c = NULL, *cg_test_d = NULL; + int cgroup_fd = -EBADF; + pid_t pid; cg_test_a = cg_name(root, "cg_test_a"); cg_test_b = cg_name(root, "cg_test_a/cg_test_b"); @@ -78,6 +81,52 @@ static int test_cgcore_populated(const char *root) if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n")) goto cleanup; + /* Test that we can directly clone into a new cgroup. */ + cgroup_fd = dirfd_open_opath(cg_test_d); + if (cgroup_fd < 0) + goto cleanup; + + pid = clone_into_cgroup(cgroup_fd); + if (pid < 0) { + if (errno == ENOSYS) + goto cleanup_pass; + goto cleanup; + } + + if (pid == 0) { + if (raise(SIGSTOP)) + exit(EXIT_FAILURE); + exit(EXIT_SUCCESS); + } + + err = cg_read_strcmp(cg_test_d, "cgroup.events", "populated 1\n"); + + (void)clone_reap(pid, WSTOPPED); + (void)kill(pid, SIGCONT); + (void)clone_reap(pid, WEXITED); + + if (err) + goto cleanup; + + if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n")) + goto cleanup; + + /* Remove cgroup. */ + if (cg_test_d) { + cg_destroy(cg_test_d); + free(cg_test_d); + cg_test_d = NULL; + } + + pid = clone_into_cgroup(cgroup_fd); + if (pid < 0) + goto cleanup_pass; + if (pid == 0) + exit(EXIT_SUCCESS); + (void)clone_reap(pid, WEXITED); + goto cleanup; + +cleanup_pass: ret = KSFT_PASS; cleanup: @@ -93,6 +142,8 @@ static int test_cgcore_populated(const char *root) free(cg_test_c); free(cg_test_b); free(cg_test_a); + if (cgroup_fd >= 0) + close(cgroup_fd); return ret; } @@ -136,6 +187,16 @@ static int test_cgcore_invalid_domain(const char *root) if (errno != EOPNOTSUPP) goto cleanup; + if (!clone_into_cgroup_run_wait(child)) + goto cleanup; + + if (errno == ENOSYS) + goto cleanup_pass; + + if (errno != EOPNOTSUPP) + goto cleanup; + +cleanup_pass: ret = KSFT_PASS; cleanup: @@ -345,6 +406,9 @@ static int test_cgcore_internal_process_constraint(const char *root) if (!cg_enter_current(parent)) goto cleanup; + if (!clone_into_cgroup_run_wait(parent)) + goto cleanup; + ret = KSFT_PASS; cleanup: diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h index a3f2c8ad8bcc..91c1a78ddb39 100644 --- a/tools/testing/selftests/clone3/clone3_selftests.h +++ b/tools/testing/selftests/clone3/clone3_selftests.h @@ -5,12 +5,24 @@ #define _GNU_SOURCE #include <sched.h> +#include <linux/sched.h> +#include <linux/types.h> #include <stdint.h> #include <syscall.h> -#include <linux/types.h> +#include <sys/wait.h> + +#include "../kselftest.h" #define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr))) +#ifndef CLONE_INTO_CGROUP +#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */ +#endif + +#ifndef CLONE_ARGS_SIZE_VER0 +#define CLONE_ARGS_SIZE_VER0 64 +#endif + #ifndef __NR_clone3 #define __NR_clone3 -1 struct clone_args { @@ -22,10 +34,13 @@ struct clone_args { __aligned_u64 stack; __aligned_u64 stack_size; __aligned_u64 tls; +#define CLONE_ARGS_SIZE_VER1 80 __aligned_u64 set_tid; __aligned_u64 set_tid_size; +#define CLONE_ARGS_SIZE_VER2 88 + __aligned_u64 cgroup; }; -#endif +#endif /* __NR_clone3 */ static pid_t sys_clone3(struct clone_args *args, size_t size) { -- 2.25.0

5 years, 10 months

1
0
0 0

[PATCH v25 20/21] selftests/x86: Add vDSO selftest for SGX

by Jarkko Sakkinen

Expand the selftest by invoking the enclave by using __vdso_sgx_enter_enclave() in addition to direct ENCLS[EENTER]. Cc: linux-kselftest(a)vger.kernel.org Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen(a)linux.intel.com> --- tools/testing/selftests/x86/sgx/main.c | 132 +++++++++++++++++++++ tools/testing/selftests/x86/sgx/sgx_call.S | 43 +++++++ tools/testing/selftests/x86/sgx/sgx_call.h | 3 + 3 files changed, 178 insertions(+) diff --git a/tools/testing/selftests/x86/sgx/main.c b/tools/testing/selftests/x86/sgx/main.c index 48ed5fdfb3cb..d97cc3cf0093 100644 --- a/tools/testing/selftests/x86/sgx/main.c +++ b/tools/testing/selftests/x86/sgx/main.c @@ -21,6 +21,109 @@ #define PAGE_SIZE 4096 static const uint64_t MAGIC = 0x1122334455667788ULL; +void *eenter; + +struct vdso_symtab { + Elf64_Sym *elf_symtab; + const char *elf_symstrtab; + Elf64_Word *elf_hashtab; +}; + +static void *vdso_get_base_addr(char *envp[]) +{ + Elf64_auxv_t *auxv; + int i; + + for (i = 0; envp[i]; i++) + ; + + auxv = (Elf64_auxv_t *)&envp[i + 1]; + + for (i = 0; auxv[i].a_type != AT_NULL; i++) { + if (auxv[i].a_type == AT_SYSINFO_EHDR) + return (void *)auxv[i].a_un.a_val; + } + + return NULL; +} + +static Elf64_Dyn *vdso_get_dyntab(void *addr) +{ + Elf64_Ehdr *ehdr = addr; + Elf64_Phdr *phdrtab = addr + ehdr->e_phoff; + int i; + + for (i = 0; i < ehdr->e_phnum; i++) + if (phdrtab[i].p_type == PT_DYNAMIC) + return addr + phdrtab[i].p_offset; + + return NULL; +} + +static void *vdso_get_dyn(void *addr, Elf64_Dyn *dyntab, Elf64_Sxword tag) +{ + int i; + + for (i = 0; dyntab[i].d_tag != DT_NULL; i++) + if (dyntab[i].d_tag == tag) + return addr + dyntab[i].d_un.d_ptr; + + return NULL; +} + +static bool vdso_get_symtab(void *addr, struct vdso_symtab *symtab) +{ + Elf64_Dyn *dyntab = vdso_get_dyntab(addr); + + symtab->elf_symtab = vdso_get_dyn(addr, dyntab, DT_SYMTAB); + if (!symtab->elf_symtab) + return false; + + symtab->elf_symstrtab = vdso_get_dyn(addr, dyntab, DT_STRTAB); + if (!symtab->elf_symstrtab) + return false; + + symtab->elf_hashtab = vdso_get_dyn(addr, dyntab, DT_HASH); + if (!symtab->elf_hashtab) + return false; + + return true; +} + +static unsigned long elf_sym_hash(const char *name) +{ + unsigned long h = 0, high; + + while (*name) { + h = (h << 4) + *name++; + high = h & 0xf0000000; + + if (high) + h ^= high >> 24; + + h &= ~high; + } + + return h; +} + +static Elf64_Sym *vdso_symtab_get(struct vdso_symtab *symtab, const char *name) +{ + Elf64_Word bucketnum = symtab->elf_hashtab[0]; + Elf64_Word *buckettab = &symtab->elf_hashtab[2]; + Elf64_Word *chaintab = &symtab->elf_hashtab[2 + bucketnum]; + Elf64_Sym *sym; + Elf64_Word i; + + for (i = buckettab[elf_sym_hash(name) % bucketnum]; i != STN_UNDEF; + i = chaintab[i]) { + sym = &symtab->elf_symtab[i]; + if (!strcmp(name, &symtab->elf_symstrtab[sym->st_name])) + return sym; + } + + return NULL; +} static bool encl_create(int dev_fd, unsigned long bin_size, struct sgx_secs *secs) @@ -218,10 +321,14 @@ bool load_sigstruct(const char *path, void *sigstruct) int main(int argc, char *argv[], char *envp[]) { + struct sgx_enclave_exception exception; struct sgx_sigstruct sigstruct; + struct vdso_symtab symtab; + Elf64_Sym *eenter_sym; struct sgx_secs secs; uint64_t result = 0; off_t bin_size; + void *addr; void *bin; if (!encl_data_map("encl.bin", &bin, &bin_size)) @@ -243,5 +350,30 @@ int main(int argc, char *argv[], char *envp[]) printf("Output: 0x%lx\n", result); + memset(&exception, 0, sizeof(exception)); + + addr = vdso_get_base_addr(envp); + if (!addr) + exit(1); + + if (!vdso_get_symtab(addr, &symtab)) + exit(1); + + eenter_sym = vdso_symtab_get(&symtab, "__vdso_sgx_enter_enclave"); + if (!eenter_sym) + exit(1); + eenter = addr + eenter_sym->st_value; + + printf("Input: 0x%lx\n", MAGIC); + + sgx_call_vdso((void *)&MAGIC, &result, 0, NULL, NULL, NULL, + (void *)secs.base, &exception, NULL); + if (result != MAGIC) { + fprintf(stderr, "0x%lx != 0x%lx\n", result, MAGIC); + exit(1); + } + + printf("Output: 0x%lx\n", result); + exit(0); } diff --git a/tools/testing/selftests/x86/sgx/sgx_call.S b/tools/testing/selftests/x86/sgx/sgx_call.S index ca4c7893f9d9..e71f44f7a995 100644 --- a/tools/testing/selftests/x86/sgx/sgx_call.S +++ b/tools/testing/selftests/x86/sgx/sgx_call.S @@ -21,3 +21,46 @@ sgx_async_exit: ENCLU pop %rbx ret + + .global sgx_call_vdso +sgx_call_vdso: + .cfi_startproc + push %r15 + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset %r15, 0 + push %r14 + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset %r14, 0 + push %r13 + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset %r13, 0 + push %r12 + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset %r12, 0 + push %rbx + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset %rbx, 0 + push $0 + .cfi_adjust_cfa_offset 8 + push 0x48(%rsp) + .cfi_adjust_cfa_offset 8 + push 0x48(%rsp) + .cfi_adjust_cfa_offset 8 + push 0x48(%rsp) + .cfi_adjust_cfa_offset 8 + mov $2, %eax + call *eenter(%rip) + add $0x20, %rsp + .cfi_adjust_cfa_offset -0x20 + pop %rbx + .cfi_adjust_cfa_offset -8 + pop %r12 + .cfi_adjust_cfa_offset -8 + pop %r13 + .cfi_adjust_cfa_offset -8 + pop %r14 + .cfi_adjust_cfa_offset -8 + pop %r15 + .cfi_adjust_cfa_offset -8 + ret + .cfi_endproc diff --git a/tools/testing/selftests/x86/sgx/sgx_call.h b/tools/testing/selftests/x86/sgx/sgx_call.h index bf72068ada23..a4072c5ecce7 100644 --- a/tools/testing/selftests/x86/sgx/sgx_call.h +++ b/tools/testing/selftests/x86/sgx/sgx_call.h @@ -8,4 +8,7 @@ void sgx_call_eenter(void *rdi, void *rsi, void *entry); +int sgx_call_vdso(void *rdi, void *rsi, long rdx, void *rcx, void *r8, void *r9, + void *tcs, struct sgx_enclave_exception *ei, void *cb); + #endif /* SGX_CALL_H */ -- 2.20.1

5 years, 10 months

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-kselftest-mirror February 2020