Hi
x86 PTI trampolines were backported to 4.14, refer commit: c631a16e5b84521ae80ab10d47cd6ccd2afc6817 ("x86/entry/64: Create a per-CPU SYSCALL entry trampoline") so the corresponding perf tools workaround also needs to be backported. Here are 5 patches for 4.14 stable backported from 4.18, to address that.
Regards Adrian
commit dbbd34a666ee117d0e39e71a47f38f02c4a5c698 upstream.
Add a function to identify the machine architecture.
Signed-off-by: Adrian Hunter adrian.hunter@intel.com Tested-by: Jiri Olsa jolsa@kernel.org Cc: Alexander Shishkin alexander.shishkin@linux.intel.com Cc: Andi Kleen ak@linux.intel.com Cc: Andy Lutomirski luto@kernel.org Cc: Dave Hansen dave.hansen@linux.intel.com Cc: H. Peter Anvin hpa@zytor.com Cc: Joerg Roedel joro@8bytes.org Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: x86@kernel.org Link: http://lkml.kernel.org/r/1526548928-20790-6-git-send-email-adrian.hunter@int... Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com --- tools/perf/util/env.c | 19 +++++++++++++++++++ tools/perf/util/env.h | 3 +++ tools/perf/util/machine.c | 9 +++++++++ tools/perf/util/machine.h | 2 ++ 4 files changed, 33 insertions(+)
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 6276b340f893..49f58921a968 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -3,6 +3,7 @@ #include "env.h" #include "util.h" #include <errno.h> +#include <sys/utsname.h>
struct perf_env perf_env;
@@ -87,6 +88,24 @@ int perf_env__read_cpu_topology_map(struct perf_env *env) return 0; }
+static int perf_env__read_arch(struct perf_env *env) +{ + struct utsname uts; + + if (env->arch) + return 0; + + if (!uname(&uts)) + env->arch = strdup(uts.machine); + + return env->arch ? 0 : -ENOMEM; +} + +const char *perf_env__raw_arch(struct perf_env *env) +{ + return env && !perf_env__read_arch(env) ? env->arch : "unknown"; +} + void cpu_cache_level__free(struct cpu_cache_level *cache) { free(cache->type); diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 1eb35b190b34..bd3869913907 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -65,4 +65,7 @@ int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]); int perf_env__read_cpu_topology_map(struct perf_env *env);
void cpu_cache_level__free(struct cpu_cache_level *cache); + +const char *perf_env__raw_arch(struct perf_env *env); + #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index bd5d5b5e2218..2af879693fbe 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2238,6 +2238,15 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, return 0; }
+/* + * Compares the raw arch string. N.B. see instead perf_env__arch() if a + * normalized arch is needed. + */ +bool machine__is(struct machine *machine, const char *arch) +{ + return machine && !strcmp(perf_env__raw_arch(machine->env), arch); +} + int machine__get_kernel_start(struct machine *machine) { struct map *map = machine__kernel_map(machine); diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index d551aa80a59b..fbc5133fb27c 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -169,6 +169,8 @@ static inline bool machine__is_host(struct machine *machine) return machine ? machine->pid == HOST_KERNEL_ID : false; }
+bool machine__is(struct machine *machine, const char *arch); + struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
commit 19422a9f2a3be7f3a046285ffae4cbb571aa853a upstream.
On x86_64, PTI entry trampolines are less than the start of kernel text, but still above 2^63. So leave kernel_start = 1ULL << 63 for x86_64.
Signed-off-by: Adrian Hunter adrian.hunter@intel.com Tested-by: Jiri Olsa jolsa@kernel.org Cc: Alexander Shishkin alexander.shishkin@linux.intel.com Cc: Andi Kleen ak@linux.intel.com Cc: Andy Lutomirski luto@kernel.org Cc: Dave Hansen dave.hansen@linux.intel.com Cc: H. Peter Anvin hpa@zytor.com Cc: Joerg Roedel joro@8bytes.org Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: x86@kernel.org Link: http://lkml.kernel.org/r/1526548928-20790-7-git-send-email-adrian.hunter@int... Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com --- tools/perf/util/machine.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 2af879693fbe..af18c3d55642 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2263,7 +2263,12 @@ int machine__get_kernel_start(struct machine *machine) machine->kernel_start = 1ULL << 63; if (map) { err = map__load(map); - if (!err) + /* + * On x86_64, PTI entry trampolines are less than the + * start of kernel text, but still above 2^63. So leave + * kernel_start = 1ULL << 63 for x86_64. + */ + if (!err && !machine__is(machine, "x86_64")) machine->kernel_start = map->start; } return err;
commit 9cecca325ea879c84fcd31a5e609a514c1a1dbd1 upstream.
Add a function to return the number of the machine's available CPUs.
Signed-off-by: Adrian Hunter adrian.hunter@intel.com Cc: Alexander Shishkin alexander.shishkin@linux.intel.com Cc: Andi Kleen ak@linux.intel.com Cc: Andy Lutomirski luto@kernel.org Cc: Dave Hansen dave.hansen@linux.intel.com Cc: H. Peter Anvin hpa@zytor.com Cc: Jiri Olsa jolsa@redhat.com Cc: Joerg Roedel joro@8bytes.org Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: x86@kernel.org Link: http://lkml.kernel.org/r/1526986485-6562-5-git-send-email-adrian.hunter@inte... Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com --- tools/perf/util/env.c | 13 +++++++++++++ tools/perf/util/env.h | 1 + tools/perf/util/machine.c | 5 +++++ tools/perf/util/machine.h | 1 + 4 files changed, 20 insertions(+)
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 49f58921a968..b492cb974aa0 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -101,11 +101,24 @@ static int perf_env__read_arch(struct perf_env *env) return env->arch ? 0 : -ENOMEM; }
+static int perf_env__read_nr_cpus_avail(struct perf_env *env) +{ + if (env->nr_cpus_avail == 0) + env->nr_cpus_avail = cpu__max_present_cpu(); + + return env->nr_cpus_avail ? 0 : -ENOENT; +} + const char *perf_env__raw_arch(struct perf_env *env) { return env && !perf_env__read_arch(env) ? env->arch : "unknown"; }
+int perf_env__nr_cpus_avail(struct perf_env *env) +{ + return env && !perf_env__read_nr_cpus_avail(env) ? env->nr_cpus_avail : 0; +} + void cpu_cache_level__free(struct cpu_cache_level *cache) { free(cache->type); diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index bd3869913907..9aace8452751 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -67,5 +67,6 @@ int perf_env__read_cpu_topology_map(struct perf_env *env); void cpu_cache_level__free(struct cpu_cache_level *cache);
const char *perf_env__raw_arch(struct perf_env *env); +int perf_env__nr_cpus_avail(struct perf_env *env);
#endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index af18c3d55642..78aa1c5f19ca 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2247,6 +2247,11 @@ bool machine__is(struct machine *machine, const char *arch) return machine && !strcmp(perf_env__raw_arch(machine->env), arch); }
+int machine__nr_cpus_avail(struct machine *machine) +{ + return machine ? perf_env__nr_cpus_avail(machine->env) : 0; +} + int machine__get_kernel_start(struct machine *machine) { struct map *map = machine__kernel_map(machine); diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index fbc5133fb27c..245743d9ce63 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -170,6 +170,7 @@ static inline bool machine__is_host(struct machine *machine) }
bool machine__is(struct machine *machine, const char *arch); +int machine__nr_cpus_avail(struct machine *machine);
struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
commit 4d99e4136580d178e3523281a820be17bf814bf8 upstream.
On x86_64 the PTI entry trampolines are not in the kernel map created by perf tools. That results in the addresses having no symbols and prevents annotation. It also causes Intel PT to have decoding errors at the trampoline addresses.
Workaround that by creating maps for the trampolines.
At present the kernel does not export information revealing where the trampolines are. Until that happens, the addresses are hardcoded.
Signed-off-by: Adrian Hunter adrian.hunter@intel.com Cc: Alexander Shishkin alexander.shishkin@linux.intel.com Cc: Andi Kleen ak@linux.intel.com Cc: Andy Lutomirski luto@kernel.org Cc: Dave Hansen dave.hansen@linux.intel.com Cc: H. Peter Anvin hpa@zytor.com Cc: Jiri Olsa jolsa@redhat.com Cc: Joerg Roedel joro@8bytes.org Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: x86@kernel.org Link: http://lkml.kernel.org/r/1526986485-6562-6-git-send-email-adrian.hunter@inte... Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com --- tools/perf/util/machine.c | 96 +++++++++++++++++++++++++++++++++++++++ tools/perf/util/machine.h | 3 ++ tools/perf/util/symbol.c | 12 +++-- 3 files changed, 106 insertions(+), 5 deletions(-)
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 78aa1c5f19ca..968fd0454e6b 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -818,6 +818,102 @@ static int machine__get_running_kernel_start(struct machine *machine, return 0; }
+/* Kernel-space maps for symbols that are outside the main kernel map and module maps */ +struct extra_kernel_map { + u64 start; + u64 end; + u64 pgoff; +}; + +static int machine__create_extra_kernel_map(struct machine *machine, + struct dso *kernel, + struct extra_kernel_map *xm) +{ + struct kmap *kmap; + struct map *map; + + map = map__new2(xm->start, kernel, MAP__FUNCTION); + if (!map) + return -1; + + map->end = xm->end; + map->pgoff = xm->pgoff; + + kmap = map__kmap(map); + + kmap->kmaps = &machine->kmaps; + + map_groups__insert(&machine->kmaps, map); + + pr_debug2("Added extra kernel map %" PRIx64 "-%" PRIx64 "\n", + map->start, map->end); + + map__put(map); + + return 0; +} + +static u64 find_entry_trampoline(struct dso *dso) +{ + /* Duplicates are removed so lookup all aliases */ + const char *syms[] = { + "_entry_trampoline", + "__entry_trampoline_start", + "entry_SYSCALL_64_trampoline", + }; + struct symbol *sym = dso__first_symbol(dso, MAP__FUNCTION); + unsigned int i; + + for (; sym; sym = dso__next_symbol(sym)) { + if (sym->binding != STB_GLOBAL) + continue; + for (i = 0; i < ARRAY_SIZE(syms); i++) { + if (!strcmp(sym->name, syms[i])) + return sym->start; + } + } + + return 0; +} + +/* + * These values can be used for kernels that do not have symbols for the entry + * trampolines in kallsyms. + */ +#define X86_64_CPU_ENTRY_AREA_PER_CPU 0xfffffe0000000000ULL +#define X86_64_CPU_ENTRY_AREA_SIZE 0x2c000 +#define X86_64_ENTRY_TRAMPOLINE 0x6000 + +/* Map x86_64 PTI entry trampolines */ +int machine__map_x86_64_entry_trampolines(struct machine *machine, + struct dso *kernel) +{ + u64 pgoff = find_entry_trampoline(kernel); + int nr_cpus_avail, cpu; + + if (!pgoff) + return 0; + + nr_cpus_avail = machine__nr_cpus_avail(machine); + + /* Add a 1 page map for each CPU's entry trampoline */ + for (cpu = 0; cpu < nr_cpus_avail; cpu++) { + u64 va = X86_64_CPU_ENTRY_AREA_PER_CPU + + cpu * X86_64_CPU_ENTRY_AREA_SIZE + + X86_64_ENTRY_TRAMPOLINE; + struct extra_kernel_map xm = { + .start = va, + .end = va + page_size, + .pgoff = pgoff, + }; + + if (machine__create_extra_kernel_map(machine, kernel, &xm) < 0) + return -1; + } + + return 0; +} + int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) { int type; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 245743d9ce63..13041b036a5b 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -266,4 +266,7 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, */ char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp);
+int machine__map_x86_64_entry_trampolines(struct machine *machine, + struct dso *kernel); + #endif /* __PERF_MACHINE_H */ diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index ec40e47aa198..3936f69f385c 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1513,20 +1513,22 @@ int dso__load(struct dso *dso, struct map *map) goto out; }
+ if (map->groups && map->groups->machine) + machine = map->groups->machine; + else + machine = NULL; + if (dso->kernel) { if (dso->kernel == DSO_TYPE_KERNEL) ret = dso__load_kernel_sym(dso, map); else if (dso->kernel == DSO_TYPE_GUEST_KERNEL) ret = dso__load_guest_kernel_sym(dso, map);
+ if (machine__is(machine, "x86_64")) + machine__map_x86_64_entry_trampolines(machine, dso); goto out; }
- if (map->groups && map->groups->machine) - machine = map->groups->machine; - else - machine = NULL; - dso->adjust_symbols = 0;
if (perfmap) {
commit f6c66d73bb8192d357bb5fb8cd5826920f811d8c upstream.
The "Object code reading" test will not create maps for the PTI entry trampolines unless the machine environment exists to show that the arch is x86_64.
Signed-off-by: Adrian Hunter adrian.hunter@intel.com Reported-by: Arnaldo Carvalho de Melo acme@redhat.com Tested-by: Arnaldo Carvalho de Melo acme@redhat.com Cc: Jiri Olsa jolsa@redhat.com Link: http://lkml.kernel.org/r/1528183800-21577-1-git-send-email-adrian.hunter@int... [ split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com --- tools/perf/tests/code-reading.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index fcc8984bc329..acad8ba06d77 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -527,6 +527,7 @@ static int do_test_code_reading(bool try_kcore) pid = getpid();
machine = machine__new_host(); + machine->env = &perf_env;
ret = machine__create_kernel_maps(machine); if (ret < 0) {
On Wed, Nov 21, 2018 at 03:52:42PM +0200, Adrian Hunter wrote:
Hi
x86 PTI trampolines were backported to 4.14, refer commit: c631a16e5b84521ae80ab10d47cd6ccd2afc6817 ("x86/entry/64: Create a per-CPU SYSCALL entry trampoline") so the corresponding perf tools workaround also needs to be backported. Here are 5 patches for 4.14 stable backported from 4.18, to address that.
Queued for 4.14, thank you.
-- Thanks, Sasha
linux-stable-mirror@lists.linaro.org