On 19/04/2023 18:21, Ganapatrao Kulkarni wrote:
> The current implementation supports coresight trace for a range of
> CPUs, if the first CPU is CPU0.
>
> Adding changes to enable coresight trace for any range of CPUs by
> decoding the first CPU also from the header.
> Later, first CPU id is used instead of CPU0 across the decoder functions.
>
> Signed-off-by: Ganapatrao Kulkarni <gankulkarni(a)os.amperecomputing.com>
> ---
> .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 4 +-
> .../perf/util/cs-etm-decoder/cs-etm-decoder.h | 3 +-
> tools/perf/util/cs-etm.c | 62 ++++++++++++-------
> 3 files changed, 42 insertions(+), 27 deletions(-)
>
> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> index 82a27ab90c8b..41ab299b643b 100644
> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> @@ -724,7 +724,7 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params,
> }
>
> struct cs_etm_decoder *
> -cs_etm_decoder__new(int decoders, struct cs_etm_decoder_params *d_params,
> +cs_etm_decoder__new(int first_decoder, int decoders, struct cs_etm_decoder_params *d_params,
> struct cs_etm_trace_params t_params[])
> {
> struct cs_etm_decoder *decoder;
> @@ -769,7 +769,7 @@ cs_etm_decoder__new(int decoders, struct cs_etm_decoder_params *d_params,
> /* init raw frame logging if required */
> cs_etm_decoder__init_raw_frame_logging(d_params, decoder);
>
> - for (i = 0; i < decoders; i++) {
> + for (i = first_decoder; i < decoders; i++) {
> ret = cs_etm_decoder__create_etm_decoder(d_params,
> &t_params[i],
> decoder);
> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
> index 92a855fbe5b8..b06193fc75b4 100644
> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
> @@ -90,7 +90,8 @@ int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder,
> size_t len, size_t *consumed);
>
> struct cs_etm_decoder *
> -cs_etm_decoder__new(int num_cpu,
> +cs_etm_decoder__new(int first_decoder,
> + int decoders,
> struct cs_etm_decoder_params *d_params,
> struct cs_etm_trace_params t_params[]);
>
> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> index 94e2d02009eb..2619513ae088 100644
> --- a/tools/perf/util/cs-etm.c
> +++ b/tools/perf/util/cs-etm.c
> @@ -55,6 +55,8 @@ struct cs_etm_auxtrace {
> u8 has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
>
> int num_cpu;
> + int first_cpu;
> + int last_cpu;
> u64 latest_kernel_timestamp;
> u32 auxtrace_type;
> u64 branches_sample_type;
> @@ -638,14 +640,13 @@ static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
> }
>
> static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
> - struct cs_etm_auxtrace *etm,
> - int decoders)
> + struct cs_etm_auxtrace *etm)
> {
> int i;
> u32 etmidr;
> u64 architecture;
>
> - for (i = 0; i < decoders; i++) {
> + for (i = etm->first_cpu; i < etm->last_cpu; i++) {
> architecture = etm->metadata[i][CS_ETM_MAGIC];
>
> switch (architecture) {
> @@ -817,7 +818,7 @@ static void cs_etm__free(struct perf_session *session)
> /* Then the RB tree itself */
> intlist__delete(traceid_list);
>
> - for (i = 0; i < aux->num_cpu; i++)
> + for (i = aux->first_cpu; i < aux->last_cpu; i++)
> zfree(&aux->metadata[i]);
>
> thread__zput(aux->unknown_thread);
> @@ -921,7 +922,8 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
> * Each queue can only contain data from one CPU when unformatted, so only one decoder is
> * needed.
> */
> - int decoders = formatted ? etm->num_cpu : 1;
> + int first_decoder = formatted ? etm->first_cpu : 0;
> + int decoders = first_decoder + (formatted ? etm->num_cpu : 1);
>
> etmq = zalloc(sizeof(*etmq));
> if (!etmq)
> @@ -937,7 +939,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
> if (!t_params)
> goto out_free;
>
> - if (cs_etm__init_trace_params(t_params, etm, decoders))
> + if (cs_etm__init_trace_params(t_params, etm))
> goto out_free;
>
> /* Set decoder parameters to decode trace packets */
> @@ -947,8 +949,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
> formatted))
> goto out_free;
>
> - etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
> - t_params);
> + etmq->decoder = cs_etm_decoder__new(first_decoder, decoders, &d_params, t_params);
>
> if (!etmq->decoder)
> goto out_free;
> @@ -2959,11 +2960,11 @@ static int cs_etm__queue_aux_records(struct perf_session *session)
> * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
> * timestamps).
> */
> -static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
> +static bool cs_etm__has_virtual_ts(u64 **metadata, struct cs_etm_auxtrace *etm)
> {
> int j;
>
> - for (j = 0; j < num_cpu; j++) {
> + for (j = etm->first_cpu; j < etm->last_cpu; j++) {
> switch (metadata[j][CS_ETM_MAGIC]) {
> case __perf_cs_etmv4_magic:
> if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1)
> @@ -2982,13 +2983,14 @@ static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
> }
>
> /* map trace ids to correct metadata block, from information in metadata */
> -static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
> +static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm)
> {
> u64 cs_etm_magic;
> + u64 **metadata = etm->metadata;
> u8 trace_chan_id;
> int i, err;
>
> - for (i = 0; i < num_cpu; i++) {
> + for (i = etm->first_cpu; i < etm->last_cpu; i++) {
> cs_etm_magic = metadata[i][CS_ETM_MAGIC];
> switch (cs_etm_magic) {
> case __perf_cs_etmv3_magic:
> @@ -3015,12 +3017,13 @@ static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
> * If we found AUX_HW_ID packets, then set any metadata marked as unused to the
> * unused value to reduce the number of unneeded decoders created.
> */
> -static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata)
> +static int cs_etm__clear_unused_trace_ids_metadata(struct cs_etm_auxtrace *etm)
> {
> u64 cs_etm_magic;
> + u64 **metadata = etm->metadata;
> int i;
>
> - for (i = 0; i < num_cpu; i++) {
> + for (i = etm->first_cpu; i < etm->last_cpu; i++) {
> cs_etm_magic = metadata[i][CS_ETM_MAGIC];
> switch (cs_etm_magic) {
> case __perf_cs_etmv3_magic:
> @@ -3049,7 +3052,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
> int event_header_size = sizeof(struct perf_event_header);
> int total_size = auxtrace_info->header.size;
> int priv_size = 0;
> - int num_cpu;
> + int num_cpu, first_cpu = 0, last_cpu;
> int err = 0;
> int aux_hw_id_found;
> int i, j;
> @@ -3068,22 +3071,31 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
> /* First the global part */
> ptr = (u64 *) auxtrace_info->priv;
> num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
> - metadata = zalloc(sizeof(*metadata) * num_cpu);
> +
> + /* Start parsing after the common part of the header */
> + i = CS_HEADER_VERSION_MAX;
> +
> + /*Get CPU id of first event */
> + first_cpu = ptr[i + CS_ETM_CPU];
> + last_cpu = first_cpu + num_cpu;
> +
> + if (first_cpu > cpu__max_cpu().cpu ||
> + last_cpu > cpu__max_cpu().cpu)
> + return -EINVAL;
> +
> + metadata = zalloc(sizeof(*metadata) * last_cpu);
Hi Ganapatrao,
I think I see what the problem is, but I'm wondering if a better fix
would be to further decouple the CPU ID from the index in the array.
With your change it's not clear what happens with sparse recordings, for
example 'perf record -e cs_etm// -C 1,3,5'. And it seems like there is
some wastage in the zalloc here for example if only CPU 256 is traced
then we'd still make 256 decoders but 255 of them would be unused?
I tried to test sparse recordings, but your change doesn't apply to the
latest coresight/next branch. I did notice that 'perf report -D' doesn't
work with them on coresight/next (it just quits), but I couldn't see if
that's fixed with your change.
Would a better fix not be to keep the metadata loops from 0-N and
instead save the CPU ID in cs_etm_decoder_params or the decoder. That
way it would support both sparse and not starting from 0 cases? I think
the code would be better if it's worded like "i < recorded_cpus" rather
than "i < cpu" to make it clear that i isn't actually the CPU ID it's
just an index.
Also a new test for this scenario would probably be a good idea.
Thanks
James
Hi Greg,
Please find the updates for coresight self hosted tracing targeting v6.4
Kindly pull.
Suzuki
The following changes since commit 197b6b60ae7bc51dd0814953c562833143b292aa:
Linux 6.3-rc4 (2023-03-26 14:40:20 -0700)
are available in the Git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/coresight/linux.git tags/coresight-next-v6.4
for you to fetch changes up to 18996a113f2567aef3057e300e3193ce2df1684c:
coresight: etm_pmu: Set the module field (2023-04-14 12:14:09 +0100)
----------------------------------------------------------------
coresight: Updates for v6.4
This is a relatively smaller update for CoreSight tracing subsystem targeting
v6.4, with the following changes:
- Removing Mathieu Poirier as MAINTAINER for the subsystem, with updates to
CREDITS for his contributions.
- Fix CoreSight ETM PMU to set the module field
Signed-off-by: Suzuki K Poulose <suzuki.poulose(a)arm.com>
----------------------------------------------------------------
Suzuki K Poulose (2):
MAINTAINERS: Remove Mathieu Poirier as coresight maintainer
coresight: etm_pmu: Set the module field
CREDITS | 5 +++++
MAINTAINERS | 1 -
drivers/hwtracing/coresight/coresight-etm-perf.c | 1 +
3 files changed, 6 insertions(+), 1 deletion(-)
struct pmu::module must be set to the module owning the PMU driver.
Set this for the coresight etm_pmu.
Signed-off-by: Suzuki K Poulose <suzuki.poulose(a)arm.com>
---
drivers/hwtracing/coresight/coresight-etm-perf.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index a48c97da8165..711f451b6946 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -901,6 +901,7 @@ int __init etm_perf_init(void)
etm_pmu.addr_filters_sync = etm_addr_filters_sync;
etm_pmu.addr_filters_validate = etm_addr_filters_validate;
etm_pmu.nr_addr_filters = ETM_ADDR_CMP_MAX;
+ etm_pmu.module = THIS_MODULE;
ret = perf_pmu_register(&etm_pmu, CORESIGHT_ETM_PMU_NAME, -1);
if (ret == 0)
--
2.34.1
Hi,
Please also post to all the relevant lists when submitting a patch, as
defined in ./scripts/get_maintainer.pl
On Thu, 6 Apr 2023 at 03:23, Sean Wang <seanwang1(a)lenovo.com> wrote:
>
> From: Sean Wang <seanwang1(a)lenovo.com>
>
> Add AMBA UCI id to support Cortex-A510
>
> Signed-off-by: Sean Wang <seanwang1(a)lenovo.com>
> ---
> drivers/hwtracing/coresight/coresight-etm4x-core.c | 10 ++++++++++
> 1 file changed, 10 insertions(+)
>
> diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
> index 1ea8f173cca0..702704cf4f1f 100644
> --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
> +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
> @@ -2186,6 +2186,15 @@ static struct amba_cs_uci_id uci_id_etm4[] = {
> }
> };
>
> +static struct amba_cs_uci_id uci_id_ete[] = {
> + {
> + /* ETE UCI data */
> + .devarch = ETM_DEVARCH_ETE_ARCH,
> + .devarch_mask = ETM_DEVARCH_ID_MASK,
> + .devtype = 0x00000013,
> + }
> +};
> +
> static void clear_etmdrvdata(void *info)
> {
> int cpu = *(int *)info;
> @@ -2264,6 +2273,7 @@ static const struct amba_id etm4_ids[] = {
> CS_AMBA_UCI_ID(0x000cc0af, uci_id_etm4),/* Marvell ThunderX2 */
> CS_AMBA_UCI_ID(0x000b6d01, uci_id_etm4),/* HiSilicon-Hip08 */
> CS_AMBA_UCI_ID(0x000b6d02, uci_id_etm4),/* HiSilicon-Hip09 */
> + CS_AMBA_UCI_ID(0x000bbd46, uci_id_ete),/* Cortex-A510 */
> {},
> };
>
> --
> 2.34.1
>
I do not believe that this is needed. ETE is architecturally required
to implement the system instructions interface to the ETE trace
registers, and memory mapped access to the ETE from the PE is removed
by the specification
Your board .dts should declare ETE with the compatible
"arm,embedded-trace-extension", and the system instruction platform
driver will be initialised, which does not require the AMBA matching
IDs.
If your solution does have the optional APB interface to the ETE we
would expect this to be used by any external debug system and not by
self hosted trace in the linux kernel.
Regards
Mike
--
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK
On 05/04/2023 11:31, Greg KH wrote:
> On Wed, Apr 05, 2023 at 11:16:17AM +0100, Suzuki K Poulose wrote:
>> Hi Greg,
>>
>> On 21/03/2023 16:21, Greg KH wrote:
>>> On Tue, Mar 21, 2023 at 01:38:52PM +0000, Suzuki K Poulose wrote:
>>>> Greg,
>>>>
>>>> Please find a couple of fixes for coresight self-hosted tracing for v6.3. Kindly
>>>> consider pulling.
>>>>
>>>> Suzuki
>>>>
>>>>
>>>> The following changes since commit eeac8ede17557680855031c6f305ece2378af326:
>>>>
>>>> Linux 6.3-rc2 (2023-03-12 16:36:44 -0700)
>>>>
>>>> are available in the Git repository at:
>>>>
>>>> git://git.kernel.org/pub/scm/linux/kernel/git/coresight/linux.git tags/coresight-fixes-v6.3
>>
>>>
>>> Pulled and pushed out, thanks.
>>
>> Just wondering, why this hasn't shown up on linus' tree yet ? Do you plan to
>> send it for rc6 ?
>
> Yes.
Thanks! I will wait for that, before I queue the patches for the next cycle.
Suzuki
Hi Greg,
On 21/03/2023 16:21, Greg KH wrote:
> On Tue, Mar 21, 2023 at 01:38:52PM +0000, Suzuki K Poulose wrote:
>> Greg,
>>
>> Please find a couple of fixes for coresight self-hosted tracing for v6.3. Kindly
>> consider pulling.
>>
>> Suzuki
>>
>>
>> The following changes since commit eeac8ede17557680855031c6f305ece2378af326:
>>
>> Linux 6.3-rc2 (2023-03-12 16:36:44 -0700)
>>
>> are available in the Git repository at:
>>
>> git://git.kernel.org/pub/scm/linux/kernel/git/coresight/linux.git tags/coresight-fixes-v6.3
>
> Pulled and pushed out, thanks.
Just wondering, why this hasn't shown up on linus' tree yet ? Do you
plan to send it for rc6 ?
Suzuki