N2 r0p3 doesn't require the workaround [1], so gating on (#slots - 5) no longer works because all N2s have 5 slots. Add a new expression builtin that identifies the need for the workaround correctly.
[1]: https://gitlab.arm.com/telemetry-solution/telemetry-solution/-/blob/main/dat... Signed-off-by: James Clark james.clark@arm.com --- tools/perf/arch/arm64/util/pmu.c | 21 +++++++++++++++++++ .../arm64/arm/neoverse-n2-v2/metrics.json | 8 +++---- tools/perf/util/expr.c | 4 ++++ tools/perf/util/pmu.c | 6 ++++++ tools/perf/util/pmu.h | 1 + 5 files changed, 36 insertions(+), 4 deletions(-)
diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c index 561de0cb6b95..30e2385a83cf 100644 --- a/tools/perf/arch/arm64/util/pmu.c +++ b/tools/perf/arch/arm64/util/pmu.c @@ -2,6 +2,7 @@
#include <internal/cpumap.h> #include "../../../util/cpumap.h" +#include "../../../util/header.h" #include "../../../util/pmu.h" #include "../../../util/pmus.h" #include <api/fs/fs.h> @@ -62,3 +63,23 @@ double perf_pmu__cpu_slots_per_cycle(void)
return slots ? (double)slots : NAN; } + +double perf_pmu__no_stall_errata(void) +{ + struct perf_pmu *pmu = pmu__find_core_pmu(); + char *cpuid = perf_pmu__getcpuid(pmu); + bool n2_r0p3_plus; + bool not_n2; + + if (!cpuid) + return NAN; + + /* N2 r0p3+ doesn't need CPU_CYCLES to be subtracted from slots. */ + n2_r0p3_plus = !strcmp_cpuid_str("0x00000000410fd493", cpuid); + + /* Anything other than N2 doesn't need the workaround either */ + not_n2 = strcmp_cpuid_str("0x00000000410fd490", cpuid); + + free(cpuid); + return n2_r0p3_plus || not_n2; +} diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json index 8ad15b726dca..9b912a9427f6 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json @@ -1,15 +1,15 @@ [ { "ArchStdEvent": "FRONTEND_BOUND", - "MetricExpr": "((stall_slot_frontend) if (#slots - 5) else (stall_slot_frontend - cpu_cycles)) / (#slots * cpu_cycles)" + "MetricExpr": "((stall_slot_frontend) if (#no_stall_errata) else (stall_slot_frontend - cpu_cycles)) / (#slots * cpu_cycles)" }, { "ArchStdEvent": "BAD_SPECULATION", - "MetricExpr": "(1 - op_retired / op_spec) * (1 - (stall_slot if (#slots - 5) else (stall_slot - cpu_cycles)) / (#slots * cpu_cycles))" + "MetricExpr": "(1 - op_retired / op_spec) * (1 - (stall_slot if (#no_stall_errata) else (stall_slot - cpu_cycles)) / (#slots * cpu_cycles))" }, { "ArchStdEvent": "RETIRING", - "MetricExpr": "(op_retired / op_spec) * (1 - (stall_slot if (#slots - 5) else (stall_slot - cpu_cycles)) / (#slots * cpu_cycles))" + "MetricExpr": "(op_retired / op_spec) * (1 - (stall_slot if (#no_stall_errata) else (stall_slot - cpu_cycles)) / (#slots * cpu_cycles))" }, { "ArchStdEvent": "BACKEND_BOUND" @@ -201,7 +201,7 @@ "ScaleUnit": "100%" }, { - "MetricExpr": "OP_RETIRED / OP_SPEC * (1 - (STALL_SLOT if (#slots - 5) else (STALL_SLOT - CPU_CYCLES)) / (#slots * CPU_CYCLES))", + "MetricExpr": "OP_RETIRED / OP_SPEC * (1 - (STALL_SLOT if (#no_stall_errata) else (STALL_SLOT - CPU_CYCLES)) / (#slots * CPU_CYCLES))", "BriefDescription": "The truly effective ratio of micro-operations executed by the CPU, which means that misprediction and stall are not included", "MetricGroup": "PEutilization", "MetricName": "cpu_utilization", diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 7410a165f68b..3bae19785796 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -465,6 +465,10 @@ double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx result = perf_pmu__cpu_slots_per_cycle(); goto out; } + if (!strcmp("#no_stall_errata", literal)) { + result = perf_pmu__no_stall_errata(); + goto out; + } if (!strcmp("#has_pmem", literal)) { result = has_pmem(); goto out; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index b6654b9f55d2..d37dc7202ddb 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1779,3 +1779,9 @@ void perf_pmu__delete(struct perf_pmu *pmu) zfree(&pmu->alias_name); free(pmu); } + +__weak double perf_pmu__no_stall_errata(void) +{ + /* Only exists on Arm */ + return NAN; +} diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 203b92860e3c..57c002308f9a 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -288,5 +288,6 @@ int perf_pmu__pathname_fd(int dirfd, const char *pmu_name, const char *filename, struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *lookup_name); struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pmus); void perf_pmu__delete(struct perf_pmu *pmu); +double perf_pmu__no_stall_errata(void);
#endif /* __PMU_H */