On 11/05/2022 03:02, liuqi (BA) wrote:
Hi James,
On 2022/5/10 18:14, James Clark wrote:
On 07/04/2022 13:58, Yicong Yang wrote:
From: Qi Liu liuqi115@huawei.com
[...]
struct auxtrace_record *auxtrace_record__init(struct evlist *evlist, int *err) { @@ -57,8 +112,12 @@ struct auxtrace_record struct evsel *evsel; bool found_etm = false; struct perf_pmu *found_spe = NULL; + struct perf_pmu *found_ptt = NULL; struct perf_pmu **arm_spe_pmus = NULL; + struct perf_pmu **hisi_ptt_pmus = NULL;
int nr_spes = 0; + int nr_ptts = 0; int i = 0; if (!evlist) @@ -66,13 +125,14 @@ struct auxtrace_record cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME); arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err); + hisi_ptt_pmus = find_all_hisi_ptt_pmus(&nr_ptts, err); evlist__for_each_entry(evlist, evsel) { if (cs_etm_pmu && evsel->core.attr.type == cs_etm_pmu->type) found_etm = true; - if (!nr_spes || found_spe) + if ((!nr_spes || found_spe) && (!nr_ptts || found_ptt)) continue; for (i = 0; i < nr_spes; i++) { @@ -81,11 +141,18 @@ struct auxtrace_record break; } }
+ for (i = 0; i < nr_ptts; i++) { + if (evsel->core.attr.type == hisi_ptt_pmus[i]->type) { + found_ptt = hisi_ptt_pmus[i]; + break; + } + } } free(arm_spe_pmus); - if (found_etm && found_spe) { - pr_err("Concurrent ARM Coresight ETM and SPE operation not currently supported\n"); + if (found_etm && found_spe && found_ptt) { + pr_err("Concurrent ARM Coresight ETM ,SPE and HiSilicon PCIe Trace operation not currently supported\n");
Hi Yicong,
Is that actually a limitation? I don't see why they couldn't work concurrently.
As Leo said, the logic here should be like this:
int auxtrace_event_cnt = 0; if (found_etm) auxtrace_event_cnt++; if (found_spe) auxtrace_event_cnt++; if (found_ptt) auxtrace_event_cnt++;
if (auxtrace_event_cnt > 1) { pr_err("Concurrent AUX trace operation isn't supported: found etm %d spe %d ptt %d\n", found_etm, found_spe, found_ptt); *err = -EOPNOTSUPP; return NULL; }
which means perf doesn't allow more than one auxtrace event recording at the same time.
Oh I see that the limitation is actually in perf when decoding the data. I thought it meant that it wasn't possible to open multiple aux events at the same time, which I think should work in theory. Makes sense.
*err = -EOPNOTSUPP; return NULL; } @@ -96,6 +163,9 @@ struct auxtrace_record #if defined(__aarch64__) if (found_spe) return arm_spe_recording_init(err, found_spe);
+ if (found_ptt) + return hisi_ptt_recording_init(err, found_ptt); #endif /*
[...]
+static int hisi_ptt_recording_options(struct auxtrace_record *itr, + struct evlist *evlist, + struct record_opts *opts) +{ + struct hisi_ptt_recording *pttr = + container_of(itr, struct hisi_ptt_recording, itr); + struct perf_pmu *hisi_ptt_pmu = pttr->hisi_ptt_pmu; + struct perf_cpu_map *cpus = evlist->core.cpus; + struct evsel *evsel, *hisi_ptt_evsel = NULL; + struct evsel *tracking_evsel; + int err;
+ pttr->evlist = evlist; + evlist__for_each_entry(evlist, evsel) { + if (evsel->core.attr.type == hisi_ptt_pmu->type) { + if (hisi_ptt_evsel) { + pr_err("There may be only one " HISI_PTT_PMU_NAME "x event\n"); + return -EINVAL; + } + evsel->core.attr.freq = 0; + evsel->core.attr.sample_period = 1; + hisi_ptt_evsel = evsel; + opts->full_auxtrace = true; + } + }
+ err = hisi_ptt_set_auxtrace_mmap_page(opts); + if (err) + return err; + /* + * To obtain the auxtrace buffer file descriptor, the auxtrace event + * must come first. + */ + evlist__to_front(evlist, hisi_ptt_evsel);
+ if (!perf_cpu_map__empty(cpus)) { + evsel__set_sample_bit(hisi_ptt_evsel, TIME); + evsel__set_sample_bit(hisi_ptt_evsel, CPU); + }
Similar to Leo's comment: CPU isn't required if it's uncore, and if TIME is useful then add it regardless of whether the event is opened per-cpu or on a task.
got it, will fix this next time.
+ /* Add dummy event to keep tracking */ + err = parse_events(evlist, "dummy:u", NULL); + if (err) + return err;
+ tracking_evsel = evlist__last(evlist); + evlist__set_tracking_event(evlist, tracking_evsel);
+ tracking_evsel->core.attr.freq = 0; + tracking_evsel->core.attr.sample_period = 1;
+ if (!perf_cpu_map__empty(cpus)) + evsel__set_sample_bit(tracking_evsel, TIME);
Same comment as above.
got it, thanks.
+ return 0; +}
+static int hisi_ptt_process_auxtrace_event(struct perf_session *session, + union perf_event *event, + struct perf_tool *tool __maybe_unused) +{ + struct hisi_ptt *ptt = container_of(session->auxtrace, struct hisi_ptt, + auxtrace); + struct auxtrace_buffer *buffer; + off_t data_offset; + int fd = perf_data__fd(session->data); + int err;
+ if (perf_data__is_pipe(session->data)) { + data_offset = 0; + } else { + data_offset = lseek(fd, 0, SEEK_CUR); + if (data_offset == -1) + return -errno; + }
+ err = auxtrace_queues__add_event(&ptt->queues, session, event, + data_offset, &buffer); + if (err) + return err;
+ if (dump_trace) { + if (auxtrace_buffer__get_data(buffer, fd)) { + hisi_ptt_dump_event(ptt, buffer->data, buffer->size); + auxtrace_buffer__put_data(buffer); + }
Technically auxtrace_queues aren't required here because they are more for supporting trace from multiple CPUs and sorting and re-ordering between them.
If this is new device is uncore and always from a single source you could just go straight to hisi_ptt_dump_event() with data_offset and size of the auxtrace event.
But I suppose it also doesn't hurt to use some of the existing framework like you have done.
ok, I'll delete the auxtrace_queues next time, thanks.
It's up to you, it might be more work and it's best to leave it as it is now that you have it working already.
James
Qi
+ }
+ return 0; +}
+static int hisi_ptt_flush(struct perf_session *session __maybe_unused, + struct perf_tool *tool __maybe_unused) +{ + return 0; +}
+static void hisi_ptt_free_events(struct perf_session *session) +{ + struct hisi_ptt *ptt = container_of(session->auxtrace, struct hisi_ptt, + auxtrace); + struct auxtrace_queues *queues = &ptt->queues; + unsigned int i;
+ for (i = 0; i < queues->nr_queues; i++) { + free(queues->queue_array[i].priv); + queues->queue_array[i].priv = NULL; + } + auxtrace_queues__free(queues); +}
+static void hisi_ptt_free(struct perf_session *session) +{ + struct hisi_ptt *ptt = container_of(session->auxtrace, struct hisi_ptt, + auxtrace);
+ hisi_ptt_free_events(session); + session->auxtrace = NULL; + free(ptt); +}
+static bool hisi_ptt_evsel_is_auxtrace(struct perf_session *session, + struct evsel *evsel) +{ + struct hisi_ptt *ptt = container_of(session->auxtrace, struct hisi_ptt, auxtrace);
+ return evsel->core.attr.type == ptt->pmu_type; +}
+static const char * const hisi_ptt_info_fmts[] = { + [HISI_PTT_PMU_TYPE] = " PMU Type %" PRId64 "\n", +};
+static void hisi_ptt_print_info(__u64 *arr) +{ + if (!dump_trace) + return;
+ fprintf(stdout, hisi_ptt_info_fmts[HISI_PTT_PMU_TYPE], arr[HISI_PTT_PMU_TYPE]); +}
+int hisi_ptt_process_auxtrace_info(union perf_event *event, + struct perf_session *session) +{ + struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; + struct hisi_ptt *ptt; + int err;
+ if (auxtrace_info->header.size < HISI_PTT_AUXTRACE_PRIV_SIZE + + sizeof(struct perf_record_auxtrace_info)) + return -EINVAL;
+ ptt = zalloc(sizeof(struct hisi_ptt)); + if (!ptt) + return -ENOMEM;
+ err = auxtrace_queues__init(&ptt->queues); + if (err) + goto err_free;
+ ptt->session = session; + ptt->machine = &session->machines.host; /* No kvm support */ + ptt->auxtrace_type = auxtrace_info->type; + ptt->pmu_type = auxtrace_info->priv[HISI_PTT_PMU_TYPE];
+ ptt->auxtrace.process_event = hisi_ptt_process_event; + ptt->auxtrace.process_auxtrace_event = hisi_ptt_process_auxtrace_event; + ptt->auxtrace.flush_events = hisi_ptt_flush; + ptt->auxtrace.free_events = hisi_ptt_free_events; + ptt->auxtrace.free = hisi_ptt_free; + ptt->auxtrace.evsel_is_auxtrace = hisi_ptt_evsel_is_auxtrace; + session->auxtrace = &ptt->auxtrace;
+ hisi_ptt_print_info(&auxtrace_info->priv[0]);
+ return 0;
+err_free: + free(ptt); + return err; +} diff --git a/tools/perf/util/hisi_ptt.h b/tools/perf/util/hisi_ptt.h new file mode 100644 index 000000000000..c0b6cbde1221 --- /dev/null +++ b/tools/perf/util/hisi_ptt.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/*
- HiSilicon PCIe Trace and Tuning (PTT) support
- Copyright (c) 2022 HiSilicon Technologies Co., Ltd.
- */
+#ifndef INCLUDE__PERF_HISI_PTT_H__ +#define INCLUDE__PERF_HISI_PTT_H__
+#define HISI_PTT_PMU_NAME "hisi_ptt" +enum { + HISI_PTT_PMU_TYPE, + HISI_PTT_PER_CPU_MMAPS, + HISI_PTT_AUXTRACE_PRIV_MAX, +};
+#define HISI_PTT_AUXTRACE_PRIV_SIZE (HISI_PTT_AUXTRACE_PRIV_MAX * sizeof(u64)) +union perf_event; +struct perf_session; +struct perf_pmu;
+struct auxtrace_record *hisi_ptt_recording_init(int *err, + struct perf_pmu *hisi_ptt_pmu);
+int hisi_ptt_process_auxtrace_info(union perf_event *event, + struct perf_session *session);
+#endif
.