v4: https://lkml.org/lkml/2020/9/2/356 v4-->v5 Based on comments from Artem Bityutskiy, evaluation of timer based wakeup latencies may not be a fruitful measurement especially on the x86 platform which has the capability to pre-arm a CPU when a timer is set.
Hence, including only the IPI based tests for latency measurement to acheive expected behaviour across platforms.
kernel module + bash selftest approach which presents lower deviations and higher accuracy: https://lkml.org/lkml/2020/7/21/567 ---
The patch series introduces a mechanism to measure wakeup latency for IPI based interrupts. The motivation behind this series is to find significant deviations behind advertised latency values
To achieve this in the userspace, IPI latencies are calculated by sending information through pipes and inducing a wakeup.
To account for delays from kernel-userspace interactions baseline observations are taken on a 100% busy CPU and subsequent obervations must be considered relative to that.
One downside of the userspace approach in contrast to the kernel implementation is that the run to run variance can turn out to be high in the order of ms; which is the scope of the experiments at times.
Another downside of the userspace approach is that it takes much longer to run and hence a command-line option quick and full are added to make sure quick 1 CPU tests can be carried out when needed and otherwise it can carry out a full system comprehensive test.
Usage --- ./cpuidle --mode <full / quick / num_cpus> --output <output location> full: runs on all CPUS quick: run on a random CPU num_cpus: Limit the number of CPUS to run on
Sample output snippet --------------------- --IPI Latency Test--- SRC_CPU DEST_CPU IPI_Latency(ns) ... 0 5 256178 0 6 478161 0 7 285445 0 8 273553 Expected IPI latency(ns): 100000 Observed Average IPI latency(ns): 248334
Pratik Rajesh Sampat (1): selftests/cpuidle: Add support for cpuidle latency measurement
tools/testing/selftests/Makefile | 1 + tools/testing/selftests/cpuidle/Makefile | 7 + tools/testing/selftests/cpuidle/cpuidle.c | 479 ++++++++++++++++++++++ tools/testing/selftests/cpuidle/settings | 1 + 4 files changed, 488 insertions(+) create mode 100644 tools/testing/selftests/cpuidle/Makefile create mode 100644 tools/testing/selftests/cpuidle/cpuidle.c create mode 100644 tools/testing/selftests/cpuidle/settings
Measure cpuidle latencies on wakeup to determine and compare with the advertsied wakeup latencies for each idle state.
Cpuidle wakeup latencies are determined for IPIs and can help determine any deviations from what is advertsied by the hardware.
A baseline measurement for each case of IPIs is taken at 100 percent CPU usage to quantify for the kernel-userpsace overhead during execution.
Signed-off-by: Pratik Rajesh Sampat psampat@linux.ibm.com --- tools/testing/selftests/Makefile | 1 + tools/testing/selftests/cpuidle/Makefile | 7 + tools/testing/selftests/cpuidle/cpuidle.c | 479 ++++++++++++++++++++++ tools/testing/selftests/cpuidle/settings | 1 + 4 files changed, 488 insertions(+) create mode 100644 tools/testing/selftests/cpuidle/Makefile create mode 100644 tools/testing/selftests/cpuidle/cpuidle.c create mode 100644 tools/testing/selftests/cpuidle/settings
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 9018f45d631d..2bb0e87f76fd 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -8,6 +8,7 @@ TARGETS += cgroup TARGETS += clone3 TARGETS += core TARGETS += cpufreq +TARGETS += cpuidle TARGETS += cpu-hotplug TARGETS += drivers/dma-buf TARGETS += efivarfs diff --git a/tools/testing/selftests/cpuidle/Makefile b/tools/testing/selftests/cpuidle/Makefile new file mode 100644 index 000000000000..d332485e1bc5 --- /dev/null +++ b/tools/testing/selftests/cpuidle/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +TEST_GEN_PROGS := cpuidle + +CFLAGS += -O2 +LDLIBS += -lpthread + +include ../lib.mk diff --git a/tools/testing/selftests/cpuidle/cpuidle.c b/tools/testing/selftests/cpuidle/cpuidle.c new file mode 100644 index 000000000000..b2f6a0e655b8 --- /dev/null +++ b/tools/testing/selftests/cpuidle/cpuidle.c @@ -0,0 +1,479 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Cpuidle latency measurement microbenchmark + * + * A mechanism to measure wakeup latency for IPI based interrupts. Results of + * this microbenchmark can be used to check and validate against the + * advertised latencies for each cpuidle state + * + * IPIs (using pipes) are used to wake the CPU up and measure the + * time difference + * + * Baseline measurements on 100 busy load are used to account for the + * kernel-userspace overhead + * + * Usage: + * ./cpuidle --mode <full / quick / num_cpus> --output <output location> + * + * Copyright (C) 2020 Pratik Rajesh Sampat psampat@linux.ibm.com, IBM + */ + +#define _GNU_SOURCE +#include <assert.h> +#include <dirent.h> +#include <fcntl.h> +#include <getopt.h> +#include <inttypes.h> +#include <limits.h> +#include <pthread.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> +#include <unistd.h> + +#define READ 0 +#define WRITE 1 + +static int pipe_fd[2]; +static int *cpu_list; +static int cpus; +static int idle_states; +static uint64_t *latency_list; + +static char *log_file = "cpuidle.log"; + +static int get_online_cpus(int *online_cpu_list, int total_cpus) +{ + char filename[80]; + int i, index = 0; + FILE *fptr; + + for (i = 0; i < total_cpus; i++) { + char status; + + sprintf(filename, "/sys/devices/system/cpu/cpu"); + sprintf(filename + strlen(filename), "%d%s", i, "/online"); + fptr = fopen(filename, "r"); + if (!fptr) + continue; + assert(fscanf(fptr, "%c", &status) != EOF); + if (status == '1') + online_cpu_list[index++] = i; + fclose(fptr); + } + return index; +} + +static uint64_t us_to_ns(uint64_t val) +{ + return val * 1000; +} + +static void get_latency(int cpu) +{ + char filename[80]; + uint64_t latency; + FILE *fptr; + int state; + + for (state = 0; state < idle_states; state++) { + sprintf(filename, "%s%d%s%d%s", "/sys/devices/system/cpu/cpu", + cpu, "/cpuidle/state", + state, "/latency"); + fptr = fopen(filename, "r"); + assert(fptr); + + assert(fscanf(fptr, "%ld", &latency) != EOF); + latency_list[state] = latency; + fclose(fptr); + } +} + +static int get_idle_state_count(int cpu) +{ + struct dirent *entry; + int dir_count = 0; + char filename[80]; + DIR *dirp; + + sprintf(filename, "%s%d%s", "/sys/devices/system/cpu/cpu", + cpu, "/cpuidle"); + + dirp = opendir(filename); + if (!dirp) + return -1; + while (entry = readdir(dirp)) { + if (entry->d_type == DT_DIR) { + if (strcmp(entry->d_name, ".") == 0 || + strcmp(entry->d_name, "..") == 0) + continue; + dir_count++; + } + } + closedir(dirp); + return dir_count; +} + +/* Enable or disable all idle states */ +static int state_all_idle(char *disable) +{ + char filename[80]; + FILE *fptr; + int i, j; + + for (i = 0; i < cpus; i++) { + for (j = 0; j < idle_states; j++) { + sprintf(filename, "%s%d%s%d%s", + "/sys/devices/system/cpu/cpu", cpu_list[i], + "/cpuidle/state", j, "/disable"); + fptr = fopen(filename, "w"); + assert(fptr); + fprintf(fptr, "%s", disable); + fclose(fptr); + } + } + return 0; +} + +/* Disable all idle states */ +static int cpuidle_disable_all_states(void) +{ + return state_all_idle("1"); +} + +static int cpuidle_enable_all_states(void) +{ + return state_all_idle("0"); +} + +static int state_operation(char *disable, int state) +{ + char filename[80]; + FILE *fptr; + int i; + + for (i = 0; i < cpus; i++) { + sprintf(filename, "%s%d%s%d%s", "/sys/devices/system/cpu/cpu", + cpu_list[i], "/cpuidle/state", state, "/disable"); + fptr = fopen(filename, "w"); + assert(fptr); + fprintf(fptr, "%s", disable); + fclose(fptr); + } + return 0; +} + +static int cpuidle_enable_state(int state) +{ + return state_operation("0", state); +} + +static int cpuidle_disable_state(int state) +{ + return state_operation("1", state); +} + +static uint64_t average(uint64_t *arr, int size) +{ + int i, sum = 0; + + assert(size != 0); + for (i = 0; i < size; i++) + sum += arr[i]; + return sum / size; +} + +static pthread_t start_thread_on(void *(*fn)(void *), void *arg, uint64_t cpu) +{ + pthread_attr_t attr; + cpu_set_t cpuset; + pthread_t tid; + int rc; + + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + + rc = pthread_attr_init(&attr); + if (rc) { + perror("pthread_attr_init"); + exit(1); + } + + rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset); + if (rc) { + perror("pthread_attr_setaffinity_np"); + exit(1); + } + + rc = pthread_create(&tid, &attr, fn, arg); + if (rc) { + perror("pthread_create"); + exit(1); + } + return tid; +} + +void *util_full_cpu(void *unused) +{ + FILE *fptr; + + fptr = fopen("/dev/null", "w"); + assert(fptr); + while (1) { + fprintf(fptr, "0"); + nanosleep((const struct timespec[]){{0, 10L}}, NULL); + } + fclose(fptr); +} + +/* IPI based wakeup latencies */ +struct latency { + unsigned int src_cpu; + unsigned int dest_cpu; + struct timespec time_start; + struct timespec time_end; + uint64_t latency_ns; +} ipi_wakeup; + +static void *writer(void *unused) +{ + signed char c = 'P'; + + assert(write(pipe_fd[WRITE], &c, 1) == 1); + ipi_wakeup.src_cpu = sched_getcpu(); + + return NULL; +} + +static void *reader(void *unused) +{ + signed char c; + + assert(read(pipe_fd[READ], &c, 1) == 1); + ipi_wakeup.dest_cpu = sched_getcpu(); + + return NULL; +} + +static void ipi_test_once(int baseline, int src_cpu, int dest_cpu) +{ + pthread_t tid, tid1, baseline_tid; + + if (baseline) { + baseline_tid = start_thread_on(util_full_cpu, NULL, dest_cpu); + /* Run process for long enough to gain 100% usage*/ + sleep(2); + } + + clock_gettime(CLOCK_REALTIME, &ipi_wakeup.time_start); + + tid = start_thread_on(writer, NULL, src_cpu); + pthread_join(tid, NULL); + tid1 = start_thread_on(reader, NULL, dest_cpu); + pthread_join(tid1, NULL); + + clock_gettime(CLOCK_REALTIME, &ipi_wakeup.time_end); + ipi_wakeup.latency_ns = (ipi_wakeup.time_end.tv_sec - + ipi_wakeup.time_start.tv_sec) * 1000000000ULL + + ipi_wakeup.time_end.tv_nsec - ipi_wakeup.time_start.tv_nsec; + + if (baseline) + pthread_cancel(baseline_tid); +} + +static void ipi_test(int src_cpu) +{ + uint64_t avg_arr[cpus], avg_latency; + int cpu, state; + FILE *fptr; + + assert(cpuidle_disable_all_states() == 0); + + if (pipe(pipe_fd)) + exit(1); + + fptr = fopen(log_file, "a"); + fprintf(fptr, "----IPI TEST----\n"); + + fprintf(fptr, "----Baseline IPI Latency----\n"); + fprintf(fptr, "%s %10s %18s\n", "SRC_CPU", "DEST_CPU", + "Baseline_latency(ns)"); + /* Run the test as dummy once to stablize */ + ipi_test_once(1, src_cpu, cpu_list[0]); + for (cpu = 0; cpu < cpus; cpu++) { + ipi_test_once(1, src_cpu, cpu_list[cpu]); + fprintf(fptr, "%3d %10d %12ld\n", ipi_wakeup.src_cpu, + ipi_wakeup.dest_cpu, + ipi_wakeup.latency_ns); + avg_arr[cpu] = ipi_wakeup.latency_ns; + } + avg_latency = average(avg_arr, cpus); + fprintf(fptr, "Baseline average IPI latency(ns): %ld\n\n", avg_latency); + + for (state = 0; state < idle_states; state++) { + fprintf(fptr, "--Enabling state: %d--\n", state); + assert(cpuidle_enable_state(state) == 0); + fprintf(fptr, "%s %10s %18s\n", "SRC_CPU", "DEST_CPU", + "IPI_Latency(ns)"); + for (cpu = 0; cpu < cpus; cpu++) { + /* Allow sufficient cycles to go idle */ + sleep(1); + ipi_test_once(0, src_cpu, cpu_list[cpu]); + fprintf(fptr, "%3d %10d %18ld\n", ipi_wakeup.src_cpu, + ipi_wakeup.dest_cpu, + ipi_wakeup.latency_ns); + avg_arr[cpu] = ipi_wakeup.latency_ns; + } + fprintf(fptr, "Expected Latency(ns): %ld\n", + us_to_ns(latency_list[state])); + avg_latency = average(avg_arr, cpus); + fprintf(fptr, "Observed Average IPI latency(ns): %ld\n\n", + avg_latency); + assert(cpuidle_disable_state(state) == 0); + } + + assert(cpuidle_enable_all_states() == 0); + fclose(fptr); +} + +static struct option options[] = { + {"output", required_argument, 0, 'o'}, + {"mode", required_argument, 0, 'm'}, +}; + +static void usage(void) +{ + fprintf(stderr, "Usage: cpuidle <options>\n\n"); + fprintf(stderr, "\t\t--mode=X\t(quick / full / <num_cpus>)\n"); + fprintf(stderr, "\t\t--output=X\tOutput loc (def: cpuidle.log)\n\n"); +} + +void get_n_random_cpus(int *shf_list, int shf_size, int *list, int n) +{ + /* Shuffle the Online CPU list */ + int i; + int shuffle_index_list[shf_size]; + + for (i = 0; i < shf_size; i++) + shuffle_index_list[i] = i; + for (i = 0; i < shf_size; i++) { + int idx = i + rand() % (shf_size - i); + int temp = shuffle_index_list[i]; + + shuffle_index_list[i] = shuffle_index_list[idx]; + shuffle_index_list[idx] = temp; + } + + /* Pick the first n from the shf_list elements */ + for (i = 0; i < n; i++) + list[i] = shf_list[shuffle_index_list[i]]; +} + +int main(int argc, char *argv[]) +{ + int total_cpus, online_cpus, option_index = 0; + int *online_cpu_list; + signed char c; + FILE *fptr; + + if (getuid()) { + fprintf(stderr, "cpuidle latency test must run as root\n"); + exit(1); + } + + total_cpus = sysconf(_SC_NPROCESSORS_ONLN); + online_cpu_list = malloc(total_cpus * sizeof(int)); + if (!online_cpu_list) { + perror("Malloc failure"); + exit(1); + } + + online_cpus = get_online_cpus(&online_cpu_list[0], total_cpus); + if (!online_cpus) { + perror("Unable to get online CPUS"); + exit(1); + } + + /* Get one CPU for a quick test */ + cpus = 1; + cpu_list = malloc(1 * sizeof(int)); + srand(time(NULL)); + cpu_list[0] = online_cpu_list[(rand() % online_cpus) + 1]; + + while (1) { + c = getopt_long(argc, argv, "", options, &option_index); + if (c == -1) + break; + + switch (c) { + case 'o': + log_file = optarg; + break; + case 'm': + if (!strcmp(optarg, "full")) { + cpu_list = realloc(cpu_list, + online_cpus * sizeof(int)); + memcpy(cpu_list, online_cpu_list, + online_cpus * sizeof(int)); + cpus = online_cpus; + } else if (strcmp(optarg, "quick")) { + int opt_cpus; + + opt_cpus = atoi(optarg); + if (!opt_cpus) { + fprintf(stderr, "Error parsing mode\n"); + usage(); + exit(1); + } + if (opt_cpus > online_cpus) { + fprintf(stderr, "Number of CPUS > Online CPUs\n"); + usage(); + exit(1); + } + cpu_list = realloc(cpu_list, + opt_cpus * sizeof(int)); + get_n_random_cpus(online_cpu_list, online_cpus, + &cpu_list[0], opt_cpus); + cpus = opt_cpus; + } + break; + default: + usage(); + exit(1); + } + } + + idle_states = get_idle_state_count(online_cpu_list[0]); + if (idle_states == -1) { + perror("Unable to get idle states"); + exit(1); + } + + fptr = fopen(log_file, "w+"); + fprintf(fptr, "cpuidle latency selftests. IPI & Timers\n"); + fprintf(fptr, "Number of CPUS: %d\n", total_cpus); + fprintf(fptr, "Number of idle states: %d\n", idle_states); + fclose(fptr); + + latency_list = malloc(idle_states * sizeof(uint64_t)); + if (!latency_list) { + perror("Malloc failure"); + exit(1); + } + + get_latency(online_cpu_list[0]); + + ipi_test(online_cpu_list[0]); + printf("IPI test done\n"); + fflush(stdout); + + printf("Output logged at: %s\n", log_file); + + free(latency_list); + free(cpu_list); + free(online_cpu_list); + return 0; +} diff --git a/tools/testing/selftests/cpuidle/settings b/tools/testing/selftests/cpuidle/settings new file mode 100644 index 000000000000..e7b9417537fb --- /dev/null +++ b/tools/testing/selftests/cpuidle/settings @@ -0,0 +1 @@ +timeout=0
linux-kselftest-mirror@lists.linaro.org