The original method for allocating trace source ID values to sources was to use a fixed algorithm for CPU based sources of (cpu_num * 2 + 0x10). The STM was allocated ID 0x1.
This mechanism is broken for systems with more than 47 cores.
The kernel related patches the provide a fixed Trace ID allocation mechanism are now upstreamed.
This patchset updates the perf code to handle the changes to the trace ID notification mechanism that now uses the PERF_RECORD_AUX_OUTPUT_HW_ID packet to set Trace ID in the perf ETM decoders.
Applies and test oo perf/core
Changes since v8: 1. Fix build issues 2. Fix implicit function problem
Changes since v7: Split from original patchset [1] to be sent separately as kernel related patches are now upstream.
[1] https://lore.kernel.org/linux-arm-kernel/20230116124928.5440-1-mike.leach@li...
Mike Leach (3): perf: cs-etm: Move mapping of Trace ID and cpu into helper function perf: cs-etm: Update record event to use new Trace ID protocol perf: cs-etm: Handle PERF_RECORD_AUX_OUTPUT_HW_ID packet
tools/include/linux/coresight-pmu.h | 47 ++- tools/perf/arch/arm/util/cs-etm.c | 27 +- tools/perf/util/cs-etm-base.c | 3 +- .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 7 + tools/perf/util/cs-etm.c | 326 +++++++++++++++--- tools/perf/util/cs-etm.h | 14 +- 6 files changed, 356 insertions(+), 68 deletions(-)
The information to associate Trace ID and CPU will be changing.
Drivers will start outputting this as a hardware ID packet in the data file which if present will be used in preference to the AUXINFO values.
To prepare for this we provide a helper functions to do the individual ID mapping, and one to extract the IDs from the completed metadata blocks.
Signed-off-by: Mike Leach mike.leach@linaro.org Reviewed-by: James Clark james.clark@arm.com Acked-by: Suzuki K Poulose suzuki.poulose@arm.com --- tools/include/linux/coresight-pmu.h | 5 ++ tools/perf/util/cs-etm-base.c | 3 +- tools/perf/util/cs-etm.c | 92 +++++++++++++++++++---------- tools/perf/util/cs-etm.h | 14 ++++- 4 files changed, 79 insertions(+), 35 deletions(-)
diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h index 6c2fd6cc5a98..db9c7c0abb6a 100644 --- a/tools/include/linux/coresight-pmu.h +++ b/tools/include/linux/coresight-pmu.h @@ -7,9 +7,14 @@ #ifndef _LINUX_CORESIGHT_PMU_H #define _LINUX_CORESIGHT_PMU_H
+#include <linux/bits.h> + #define CORESIGHT_ETM_PMU_NAME "cs_etm" #define CORESIGHT_ETM_PMU_SEED 0x10
+/* CoreSight trace ID is currently the bottom 7 bits of the value */ +#define CORESIGHT_TRACE_ID_VAL_MASK GENMASK(6, 0) + /* * Below are the definition of bit offsets for perf option, and works as * arbitrary values for all ETM versions. diff --git a/tools/perf/util/cs-etm-base.c b/tools/perf/util/cs-etm-base.c index 5f48b756c4cf..4abe416e3feb 100644 --- a/tools/perf/util/cs-etm-base.c +++ b/tools/perf/util/cs-etm-base.c @@ -148,7 +148,8 @@ static void cs_etm__print_auxtrace_info(u64 *val, int num) for (i = CS_HEADER_VERSION_MAX; cpu < num; cpu++) { if (version == 0) err = cs_etm__print_cpu_metadata_v0(val, &i); - else if (version == 1) + /* printing same for both, but value bit flags added on v2 */ + else if ((version == 1) || (version == 2)) err = cs_etm__print_cpu_metadata_v1(val, &i); if (err) return; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index f65bac5ddbdb..5fc9c288080b 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -196,6 +196,30 @@ int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt) return 0; }
+static int cs_etm__map_trace_id(u8 trace_chan_id, u64 *cpu_metadata) +{ + struct int_node *inode; + + /* Get an RB node for this CPU */ + inode = intlist__findnew(traceid_list, trace_chan_id); + + /* Something went wrong, no need to continue */ + if (!inode) + return -ENOMEM; + + /* + * The node for that CPU should not be taken. + * Back out if that's the case. + */ + if (inode->priv) + return -EINVAL; + + /* All good, associate the traceID with the metadata pointer */ + inode->priv = cpu_metadata; + + return 0; +} + void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, u8 trace_chan_id) { @@ -2804,17 +2828,46 @@ static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu) return true; }
+/* map trace ids to correct metadata block, from information in metadata */ +static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata) +{ + u64 cs_etm_magic; + u8 trace_chan_id; + int i, err; + + for (i = 0; i < num_cpu; i++) { + cs_etm_magic = metadata[i][CS_ETM_MAGIC]; + switch (cs_etm_magic) { + case __perf_cs_etmv3_magic: + trace_chan_id = (u8)((metadata[i][CS_ETM_ETMTRACEIDR]) & + CORESIGHT_TRACE_ID_VAL_MASK); + break; + case __perf_cs_etmv4_magic: + case __perf_cs_ete_magic: + trace_chan_id = (u8)((metadata[i][CS_ETMV4_TRCTRACEIDR]) & + CORESIGHT_TRACE_ID_VAL_MASK); + break; + default: + /* unknown magic number */ + return -EINVAL; + } + err = cs_etm__map_trace_id(trace_chan_id, metadata[i]); + if (err) + return err; + } + return 0; +} + int cs_etm__process_auxtrace_info_full(union perf_event *event, struct perf_session *session) { struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; struct cs_etm_auxtrace *etm = NULL; - struct int_node *inode; struct perf_record_time_conv *tc = &session->time_conv; int event_header_size = sizeof(struct perf_event_header); int total_size = auxtrace_info->header.size; int priv_size = 0; - int num_cpu, trcidr_idx; + int num_cpu; int err = 0; int i, j; u64 *ptr = NULL; @@ -2853,23 +2906,13 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, cs_etm__create_meta_blk(ptr, &i, CS_ETM_PRIV_MAX, CS_ETM_NR_TRC_PARAMS_V0); - - /* The traceID is our handle */ - trcidr_idx = CS_ETM_ETMTRACEIDR; - } else if (ptr[i] == __perf_cs_etmv4_magic) { metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETMV4_PRIV_MAX, CS_ETMV4_NR_TRC_PARAMS_V0); - - /* The traceID is our handle */ - trcidr_idx = CS_ETMV4_TRCTRACEIDR; } else if (ptr[i] == __perf_cs_ete_magic) { metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1); - - /* ETE shares first part of metadata with ETMv4 */ - trcidr_idx = CS_ETMV4_TRCTRACEIDR; } else { ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n", ptr[i]); @@ -2881,26 +2924,6 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, err = -ENOMEM; goto err_free_metadata; } - - /* Get an RB node for this CPU */ - inode = intlist__findnew(traceid_list, metadata[j][trcidr_idx]); - - /* Something went wrong, no need to continue */ - if (!inode) { - err = -ENOMEM; - goto err_free_metadata; - } - - /* - * The node for that CPU should not be taken. - * Back out if that's the case. - */ - if (inode->priv) { - err = -EINVAL; - goto err_free_metadata; - } - /* All good, associate the traceID with the metadata pointer */ - inode->priv = metadata[j]; }
/* @@ -2994,6 +3017,11 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, if (err) goto err_delete_thread;
+ /* before aux records are queued, need to map metadata to trace IDs */ + err = cs_etm__map_trace_ids_metadata(num_cpu, metadata); + if (err) + goto err_delete_thread; + err = cs_etm__queue_aux_records(session); if (err) goto err_delete_thread; diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 98a4f7113d2f..661f029322e4 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -29,13 +29,17 @@ enum { /* * Update the version for new format. * - * New version 1 format adds a param count to the per cpu metadata. + * Version 1: format adds a param count to the per cpu metadata. * This allows easy adding of new metadata parameters. * Requires that new params always added after current ones. * Also allows client reader to handle file versions that are different by * checking the number of params in the file vs the number expected. + * + * Version 2: Drivers will use PERF_RECORD_AUX_OUTPUT_HW_ID to output + * CoreSight Trace ID. ...TRACEIDR metadata will be set to legacy values + * but with addition flags. */ -#define CS_HEADER_CURRENT_VERSION 1 +#define CS_HEADER_CURRENT_VERSION 2
/* Beginning of header common to both ETMv3 and V4 */ enum { @@ -97,6 +101,12 @@ enum { CS_ETE_PRIV_MAX };
+/* + * Check for valid CoreSight trace ID. If an invalid value is present in the metadata, + * then IDs are present in the hardware ID packet in the data file. + */ +#define CS_IS_VALID_TRACE_ID(id) ((id > 0) && (id < 0x70)) + /* * ETMv3 exception encoding number: * See Embedded Trace Macrocell specification (ARM IHI 0014Q)
Trace IDs are now dynamically allocated.
Previously used the static association algorithm that is no longer used. The 'cpu * 2 + seed' was outdated and broken for systems with high core counts (>46). as it did not scale and was broken for larger core counts.
Trace ID will now be sent in PERF_RECORD_AUX_OUTPUT_HW_ID record.
Legacy ID algorithm renamed and retained for limited backward compatibility use.
Signed-off-by: Mike Leach mike.leach@linaro.org Reviewed-by: James Clark james.clark@arm.com Acked-by: Suzuki K Poulose suzuki.poulose@arm.com --- tools/include/linux/coresight-pmu.h | 29 +++++++++++++++++------------ tools/perf/arch/arm/util/cs-etm.c | 27 +++++++++++++++++---------- 2 files changed, 34 insertions(+), 22 deletions(-)
diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h index db9c7c0abb6a..1760f9a574b0 100644 --- a/tools/include/linux/coresight-pmu.h +++ b/tools/include/linux/coresight-pmu.h @@ -10,11 +10,27 @@ #include <linux/bits.h>
#define CORESIGHT_ETM_PMU_NAME "cs_etm" -#define CORESIGHT_ETM_PMU_SEED 0x10 + +/* + * The legacy Trace ID system based on fixed calculation from the cpu + * number. This has been replaced by drivers using a dynamic allocation + * system - but need to retain the legacy algorithm for backward comparibility + * in certain situations:- + * a) new perf running on older systems that generate the legacy mapping + * b) older tools that may not update at the same time as the kernel. + */ +#define CORESIGHT_LEGACY_CPU_TRACE_ID(cpu) (0x10 + (cpu * 2))
/* CoreSight trace ID is currently the bottom 7 bits of the value */ #define CORESIGHT_TRACE_ID_VAL_MASK GENMASK(6, 0)
+/* + * perf record will set the legacy meta data values as unused initially. + * This allows perf report to manage the decoders created when dynamic + * allocation in operation. + */ +#define CORESIGHT_TRACE_ID_UNUSED_FLAG BIT(31) + /* * Below are the definition of bit offsets for perf option, and works as * arbitrary values for all ETM versions. @@ -39,15 +55,4 @@ #define ETM4_CFG_BIT_RETSTK 12 #define ETM4_CFG_BIT_VMID_OPT 15
-static inline int coresight_get_trace_id(int cpu) -{ - /* - * A trace ID of value 0 is invalid, so let's start at some - * random value that fits in 7 bits and go from there. Since - * the common convention is to have data trace IDs be I(N) + 1, - * set instruction trace IDs as a function of the CPU number. - */ - return (CORESIGHT_ETM_PMU_SEED + (cpu * 2)); -} - #endif diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 86b61ad74f90..e02a9bfc3d42 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -437,13 +437,16 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, evlist__to_front(evlist, cs_etm_evsel);
/* - * In the case of per-cpu mmaps, we need the CPU on the - * AUX event. We also need the contextID in order to be notified + * get the CPU on the sample - need it to associate trace ID in the + * AUX_OUTPUT_HW_ID event, and the AUX event for per-cpu mmaps. + */ + evsel__set_sample_bit(cs_etm_evsel, CPU); + + /* + * Also the case of per-cpu mmaps, need the contextID in order to be notified * when a context switch happened. */ if (!perf_cpu_map__empty(cpus)) { - evsel__set_sample_bit(cs_etm_evsel, CPU); - err = cs_etm_set_option(itr, cs_etm_evsel, BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_TS)); if (err) @@ -679,8 +682,10 @@ static void cs_etm_save_etmv4_header(__u64 data[], struct auxtrace_record *itr,
/* Get trace configuration register */ data[CS_ETMV4_TRCCONFIGR] = cs_etmv4_get_config(itr); - /* Get traceID from the framework */ - data[CS_ETMV4_TRCTRACEIDR] = coresight_get_trace_id(cpu); + /* traceID set to legacy version, in case new perf running on older system */ + data[CS_ETMV4_TRCTRACEIDR] = + CORESIGHT_LEGACY_CPU_TRACE_ID(cpu) | CORESIGHT_TRACE_ID_UNUSED_FLAG; + /* Get read-only information from sysFS */ data[CS_ETMV4_TRCIDR0] = cs_etm_get_ro(cs_etm_pmu, cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR0]); @@ -711,8 +716,10 @@ static void cs_etm_save_ete_header(__u64 data[], struct auxtrace_record *itr, in
/* Get trace configuration register */ data[CS_ETE_TRCCONFIGR] = cs_etmv4_get_config(itr); - /* Get traceID from the framework */ - data[CS_ETE_TRCTRACEIDR] = coresight_get_trace_id(cpu); + /* traceID set to legacy version, in case new perf running on older system */ + data[CS_ETE_TRCTRACEIDR] = + CORESIGHT_LEGACY_CPU_TRACE_ID(cpu) | CORESIGHT_TRACE_ID_UNUSED_FLAG; + /* Get read-only information from sysFS */ data[CS_ETE_TRCIDR0] = cs_etm_get_ro(cs_etm_pmu, cpu, metadata_ete_ro[CS_ETE_TRCIDR0]); @@ -768,9 +775,9 @@ static void cs_etm_get_metadata(int cpu, u32 *offset, magic = __perf_cs_etmv3_magic; /* Get configuration register */ info->priv[*offset + CS_ETM_ETMCR] = cs_etm_get_config(itr); - /* Get traceID from the framework */ + /* traceID set to legacy value in case new perf running on old system */ info->priv[*offset + CS_ETM_ETMTRACEIDR] = - coresight_get_trace_id(cpu); + CORESIGHT_LEGACY_CPU_TRACE_ID(cpu) | CORESIGHT_TRACE_ID_UNUSED_FLAG; /* Get read-only information from sysFS */ info->priv[*offset + CS_ETM_ETMCCER] = cs_etm_get_ro(cs_etm_pmu, cpu,
When using dynamically assigned CoreSight trace IDs the drivers can output the ID / CPU association as a PERF_RECORD_AUX_OUTPUT_HW_ID packet.
Update cs-etm decoder to handle this packet by setting the CPU/Trace ID mapping.
Signed-off-by: Mike Leach mike.leach@linaro.org Reviewed-by: James Clark james.clark@arm.com Acked-by: Suzuki K Poulose suzuki.poulose@arm.com --- tools/include/linux/coresight-pmu.h | 15 ++ .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 7 + tools/perf/util/cs-etm.c | 246 ++++++++++++++++-- 3 files changed, 250 insertions(+), 18 deletions(-)
diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h index 1760f9a574b0..cef3b1c25335 100644 --- a/tools/include/linux/coresight-pmu.h +++ b/tools/include/linux/coresight-pmu.h @@ -31,6 +31,9 @@ */ #define CORESIGHT_TRACE_ID_UNUSED_FLAG BIT(31)
+/* Value to set for unused trace ID values */ +#define CORESIGHT_TRACE_ID_UNUSED_VAL 0x7F + /* * Below are the definition of bit offsets for perf option, and works as * arbitrary values for all ETM versions. @@ -55,4 +58,16 @@ #define ETM4_CFG_BIT_RETSTK 12 #define ETM4_CFG_BIT_VMID_OPT 15
+/* + * Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload. + * Used to associate a CPU with the CoreSight Trace ID. + * [07:00] - Trace ID - uses 8 bits to make value easy to read in file. + * [59:08] - Unused (SBZ) + * [63:60] - Version + */ +#define CS_AUX_HW_ID_TRACE_ID_MASK GENMASK_ULL(7, 0) +#define CS_AUX_HW_ID_VERSION_MASK GENMASK_ULL(63, 60) + +#define CS_AUX_HW_ID_CURR_VERSION 0 + #endif diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index d0e521dfcf35..82a27ab90c8b 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -668,6 +668,7 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, switch (t_params->protocol) { case CS_ETM_PROTO_ETMV3: case CS_ETM_PROTO_PTM: + csid = (t_params->etmv3.reg_idr & CORESIGHT_TRACE_ID_VAL_MASK); cs_etm_decoder__gen_etmv3_config(t_params, &config_etmv3); decoder->decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ? OCSD_BUILTIN_DCD_ETMV3 : @@ -675,11 +676,13 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, trace_config = &config_etmv3; break; case CS_ETM_PROTO_ETMV4i: + csid = (t_params->etmv4.reg_traceidr & CORESIGHT_TRACE_ID_VAL_MASK); cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4); decoder->decoder_name = OCSD_BUILTIN_DCD_ETMV4I; trace_config = &trace_config_etmv4; break; case CS_ETM_PROTO_ETE: + csid = (t_params->ete.reg_traceidr & CORESIGHT_TRACE_ID_VAL_MASK); cs_etm_decoder__gen_ete_config(t_params, &trace_config_ete); decoder->decoder_name = OCSD_BUILTIN_DCD_ETE; trace_config = &trace_config_ete; @@ -688,6 +691,10 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, return -1; }
+ /* if the CPU has no trace ID associated, no decoder needed */ + if (csid == CORESIGHT_TRACE_ID_UNUSED_VAL) + return 0; + if (d_params->operation == CS_ETM_OPERATION_DECODE) { if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder->decoder_name, diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 5fc9c288080b..94e2d02009eb 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -220,6 +220,143 @@ static int cs_etm__map_trace_id(u8 trace_chan_id, u64 *cpu_metadata) return 0; }
+static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata) +{ + u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC]; + + switch (cs_etm_magic) { + case __perf_cs_etmv3_magic: + *trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] & + CORESIGHT_TRACE_ID_VAL_MASK); + break; + case __perf_cs_etmv4_magic: + case __perf_cs_ete_magic: + *trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] & + CORESIGHT_TRACE_ID_VAL_MASK); + break; + default: + return -EINVAL; + } + return 0; +} + +/* + * update metadata trace ID from the value found in the AUX_HW_INFO packet. + * This will also clear the CORESIGHT_TRACE_ID_UNUSED_FLAG flag if present. + */ +static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata) +{ + u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC]; + + switch (cs_etm_magic) { + case __perf_cs_etmv3_magic: + cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id; + break; + case __perf_cs_etmv4_magic: + case __perf_cs_ete_magic: + cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id; + break; + + default: + return -EINVAL; + } + return 0; +} + +/* + * FIELD_GET (linux/bitfield.h) not available outside kernel code, + * and the header contains too many dependencies to just copy over, + * so roll our own based on the original + */ +#define __bf_shf(x) (__builtin_ffsll(x) - 1) +#define FIELD_GET(_mask, _reg) \ + ({ \ + (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \ + }) + +/* + * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event. + * + * The payload associates the Trace ID and the CPU. + * The routine is tolerant of seeing multiple packets with the same association, + * but a CPU / Trace ID association changing during a session is an error. + */ +static int cs_etm__process_aux_output_hw_id(struct perf_session *session, + union perf_event *event) +{ + struct cs_etm_auxtrace *etm; + struct perf_sample sample; + struct int_node *inode; + struct evsel *evsel; + u64 *cpu_data; + u64 hw_id; + int cpu, version, err; + u8 trace_chan_id, curr_chan_id; + + /* extract and parse the HW ID */ + hw_id = event->aux_output_hw_id.hw_id; + version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id); + trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id); + + /* check that we can handle this version */ + if (version > CS_AUX_HW_ID_CURR_VERSION) + return -EINVAL; + + /* get access to the etm metadata */ + etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); + if (!etm || !etm->metadata) + return -EINVAL; + + /* parse the sample to get the CPU */ + evsel = evlist__event2evsel(session->evlist, event); + if (!evsel) + return -EINVAL; + err = evsel__parse_sample(evsel, event, &sample); + if (err) + return err; + cpu = sample.cpu; + if (cpu == -1) { + /* no CPU in the sample - possibly recorded with an old version of perf */ + pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record."); + return -EINVAL; + } + + /* See if the ID is mapped to a CPU, and it matches the current CPU */ + inode = intlist__find(traceid_list, trace_chan_id); + if (inode) { + cpu_data = inode->priv; + if ((int)cpu_data[CS_ETM_CPU] != cpu) { + pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n"); + return -EINVAL; + } + + /* check that the mapped ID matches */ + err = cs_etm__metadata_get_trace_id(&curr_chan_id, cpu_data); + if (err) + return err; + if (curr_chan_id != trace_chan_id) { + pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n"); + return -EINVAL; + } + + /* mapped and matched - return OK */ + return 0; + } + + /* not one we've seen before - lets map it */ + cpu_data = etm->metadata[cpu]; + err = cs_etm__map_trace_id(trace_chan_id, cpu_data); + if (err) + return err; + + /* + * if we are picking up the association from the packet, need to plug + * the correct trace ID into the metadata for setting up decoders later. + */ + err = cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data); + return err; +} + void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, u8 trace_chan_id) { @@ -2668,11 +2805,16 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o }
/* - * In per-thread mode, CPU is set to -1, but TID will be set instead. See - * auxtrace_mmap_params__set_idx(). Return 'not found' if neither CPU nor TID match. + * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See + * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a + * CPU as we set this always for the AUX_OUTPUT_HW_ID event. + * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1. + * Return 'not found' if mismatch. */ - if ((auxtrace_event->cpu == (__u32) -1 && auxtrace_event->tid != sample->tid) || - auxtrace_event->cpu != sample->cpu) + if (auxtrace_event->cpu == (__u32) -1) { + if (auxtrace_event->tid != sample->tid) + return 1; + } else if (auxtrace_event->cpu != sample->cpu) return 1;
if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) { @@ -2721,6 +2863,17 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o return 1; }
+static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event, + u64 offset __maybe_unused, void *data __maybe_unused) +{ + /* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */ + if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) { + (*(int *)data)++; /* increment found count */ + return cs_etm__process_aux_output_hw_id(session, event); + } + return 0; +} + static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event, u64 offset __maybe_unused, void *data __maybe_unused) { @@ -2839,13 +2992,13 @@ static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata) cs_etm_magic = metadata[i][CS_ETM_MAGIC]; switch (cs_etm_magic) { case __perf_cs_etmv3_magic: - trace_chan_id = (u8)((metadata[i][CS_ETM_ETMTRACEIDR]) & - CORESIGHT_TRACE_ID_VAL_MASK); + metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK; + trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]); break; case __perf_cs_etmv4_magic: case __perf_cs_ete_magic: - trace_chan_id = (u8)((metadata[i][CS_ETMV4_TRCTRACEIDR]) & - CORESIGHT_TRACE_ID_VAL_MASK); + metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK; + trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]); break; default: /* unknown magic number */ @@ -2858,6 +3011,35 @@ static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata) return 0; }
+/* + * If we found AUX_HW_ID packets, then set any metadata marked as unused to the + * unused value to reduce the number of unneeded decoders created. + */ +static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata) +{ + u64 cs_etm_magic; + int i; + + for (i = 0; i < num_cpu; i++) { + cs_etm_magic = metadata[i][CS_ETM_MAGIC]; + switch (cs_etm_magic) { + case __perf_cs_etmv3_magic: + if (metadata[i][CS_ETM_ETMTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG) + metadata[i][CS_ETM_ETMTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL; + break; + case __perf_cs_etmv4_magic: + case __perf_cs_ete_magic: + if (metadata[i][CS_ETMV4_TRCTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG) + metadata[i][CS_ETMV4_TRCTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL; + break; + default: + /* unknown magic number */ + return -EINVAL; + } + } + return 0; +} + int cs_etm__process_auxtrace_info_full(union perf_event *event, struct perf_session *session) { @@ -2869,6 +3051,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, int priv_size = 0; int num_cpu; int err = 0; + int aux_hw_id_found; int i, j; u64 *ptr = NULL; u64 **metadata = NULL; @@ -3017,8 +3200,43 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, if (err) goto err_delete_thread;
- /* before aux records are queued, need to map metadata to trace IDs */ - err = cs_etm__map_trace_ids_metadata(num_cpu, metadata); + /* + * Map Trace ID values to CPU metadata. + * + * Trace metadata will always contain Trace ID values from the legacy algorithm. If the + * files has been recorded by a "new" perf updated to handle AUX_HW_ID then the metadata + * ID value will also have the CORESIGHT_TRACE_ID_UNUSED_FLAG set. + * + * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use + * the same IDs as the old algorithm as far as is possible, unless there are clashes + * in which case a different value will be used. This means an older perf may still + * be able to record and read files generate on a newer system. + * + * For a perf able to interpret AUX_HW_ID packets we first check for the presence of + * those packets. If they are there then the values will be mapped and plugged into + * the metadata. We then set any remaining metadata values with the used flag to a + * value CORESIGHT_TRACE_ID_UNUSED_VAL - which indicates no decoder is required. + * + * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel + * then we map Trace ID values to CPU directly from the metadata - clearing any unused + * flags if present. + */ + + /* first scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */ + aux_hw_id_found = 0; + err = perf_session__peek_events(session, session->header.data_offset, + session->header.data_size, + cs_etm__process_aux_hw_id_cb, &aux_hw_id_found); + if (err) + goto err_delete_thread; + + /* if HW ID found then clear any unused metadata ID values */ + if (aux_hw_id_found) + err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata); + /* otherwise, this is a file with metadata values only, map from metadata */ + else + err = cs_etm__map_trace_ids_metadata(num_cpu, metadata); + if (err) goto err_delete_thread;
@@ -3027,14 +3245,6 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, goto err_delete_thread;
etm->data_queued = etm->queues.populated; - /* - * Print warning in pipe mode, see cs_etm__process_auxtrace_event() and - * cs_etm__queue_aux_fragment() for details relating to limitations. - */ - if (!etm->data_queued) - pr_warning("CS ETM warning: Coresight decode and TRBE support requires random file access.\n" - "Continuing with best effort decoding in piped mode.\n\n"); - return 0;
err_delete_thread:
Em Fri, Mar 31, 2023 at 06:56:42AM +0100, Mike Leach escreveu:
The original method for allocating trace source ID values to sources was to use a fixed algorithm for CPU based sources of (cpu_num * 2 + 0x10). The STM was allocated ID 0x1.
This mechanism is broken for systems with more than 47 cores.
The kernel related patches the provide a fixed Trace ID allocation mechanism are now upstreamed.
This patchset updates the perf code to handle the changes to the trace ID notification mechanism that now uses the PERF_RECORD_AUX_OUTPUT_HW_ID packet to set Trace ID in the perf ETM decoders.
Applies and test oo perf/core
I'm keeping perf/core with the same contents as perf-tools-next, please use perf-tools-next as in the future I'll stop using the old perf/core name.
Applied, testing.
- Arnaldo
Changes since v8:
- Fix build issues
- Fix implicit function problem
Changes since v7: Split from original patchset [1] to be sent separately as kernel related patches are now upstream.
[1] https://lore.kernel.org/linux-arm-kernel/20230116124928.5440-1-mike.leach@li...
Mike Leach (3): perf: cs-etm: Move mapping of Trace ID and cpu into helper function perf: cs-etm: Update record event to use new Trace ID protocol perf: cs-etm: Handle PERF_RECORD_AUX_OUTPUT_HW_ID packet
tools/include/linux/coresight-pmu.h | 47 ++- tools/perf/arch/arm/util/cs-etm.c | 27 +- tools/perf/util/cs-etm-base.c | 3 +- .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 7 + tools/perf/util/cs-etm.c | 326 +++++++++++++++--- tools/perf/util/cs-etm.h | 14 +- 6 files changed, 356 insertions(+), 68 deletions(-)
-- 2.17.1