On 20/01/2023 00:51, Steve Clevenger wrote:
Add 32-bit read/write access option for Ampere ETMv4.6 64-bit registers. Ampere Computing erratum AC03_DEBUG_10 describes a design decision where 64-bit read/write access is not supported for the ETMv4.6 implementation. These 64-bit registers must be accessed as 2 ea. 32-bit registers. AC03_DEBUG_10 is described in the AmpereOne Developer Errata: https://solutions.amperecomputing.com/customer-connect/products/AmpereOne-de...
As with the previous comment, please : a) If this is because of the system instruction access support b) Document the erratum
Fix drvdata->nr_addr_cmp for() loop range bug to drvdata->nr_addr_cmp * 2 in etm_enable_hw.
Good catch ! Please separate this out and send it as a fix. I can queue this.
Signed-off-by: Steve Clevenger scclevenger@os.amperecomputing.com
.../coresight/coresight-etm4x-core.c | 81 ++++++++++++++----- drivers/hwtracing/coresight/coresight-etm4x.h | 32 ++++++++ 2 files changed, 93 insertions(+), 20 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index 533be1928a09..bf4daa649cdf 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -452,18 +452,31 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata) if (etm4x_sspcicrn_present(drvdata, i)) etm4x_relaxed_write32(csa, config->ss_pe_cmp[i], TRCSSPCICRn(i)); }
- for (i = 0; i < drvdata->nr_addr_cmp; i++) {
etm4x_relaxed_write64(csa, config->addr_val[i], TRCACVRn(i));
etm4x_relaxed_write64(csa, config->addr_acc[i], TRCACATRn(i));
- for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
if (drvdata->no_quad_mmio) {
etm4x_split_write64(csa, config->addr_val[i], TRCACVRn(i));
etm4x_split_write64(csa, config->addr_acc[i], TRCACATRn(i));
} else {
etm4x_relaxed_write64(csa, config->addr_val[i], TRCACVRn(i));
etm4x_relaxed_write64(csa, config->addr_acc[i], TRCACATRn(i));
}
- }
Something like this can be leave some places out. I think we could hide it under the generic helpers and handle it there. May be "struct csdev_access" can cache this "no_quad_mmio" and do the right thing ?
- for (i = 0; i < drvdata->numcidc; i++) {
if (drvdata->no_quad_mmio)
etm4x_split_write64(csa, config->ctxid_pid[i], TRCCIDCVRn(i));
else
}etm4x_relaxed_write64(csa, config->ctxid_pid[i], TRCCIDCVRn(i));
- for (i = 0; i < drvdata->numcidc; i++)
etm4x_relaxed_write32(csa, config->ctxid_mask0, TRCCIDCCTLR0); if (drvdata->numcidc > 4) etm4x_relaxed_write32(csa, config->ctxid_mask1, TRCCIDCCTLR1);etm4x_relaxed_write64(csa, config->ctxid_pid[i], TRCCIDCVRn(i));
- for (i = 0; i < drvdata->numvmidc; i++)
etm4x_relaxed_write64(csa, config->vmid_val[i], TRCVMIDCVRn(i));
- for (i = 0; i < drvdata->numvmidc; i++) {
if (drvdata->no_quad_mmio)
etm4x_split_write64(csa, config->vmid_val[i], TRCVMIDCVRn(i));
else
etm4x_relaxed_write64(csa, config->vmid_val[i], TRCVMIDCVRn(i));
- } etm4x_relaxed_write32(csa, config->vmid_mask0, TRCVMIDCCTLR0); if (drvdata->numvmidc > 4) etm4x_relaxed_write32(csa, config->vmid_mask1, TRCVMIDCCTLR1);
@@ -1670,8 +1683,13 @@ static int __etm4_cpu_save(struct etmv4_drvdata *drvdata) } for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
state->trcacvr[i] = etm4x_read64(csa, TRCACVRn(i));
state->trcacatr[i] = etm4x_read64(csa, TRCACATRn(i));
if (drvdata->no_quad_mmio) {
state->trcacvr[i] = etm4x_split_read64(csa, TRCACVRn(i));
state->trcacatr[i] = etm4x_split_read64(csa, TRCACATRn(i));
} else {
state->trcacvr[i] = etm4x_read64(csa, TRCACVRn(i));
state->trcacatr[i] = etm4x_read64(csa, TRCACATRn(i));
}}
/* @@ -1681,11 +1699,19 @@ static int __etm4_cpu_save(struct etmv4_drvdata *drvdata) * unit") of ARM IHI 0064D. */
- for (i = 0; i < drvdata->numcidc; i++)
state->trccidcvr[i] = etm4x_read64(csa, TRCCIDCVRn(i));
- for (i = 0; i < drvdata->numcidc; i++) {
if (drvdata->no_quad_mmio)
state->trccidcvr[i] = etm4x_split_read64(csa, TRCCIDCVRn(i));
else
state->trccidcvr[i] = etm4x_read64(csa, TRCCIDCVRn(i));
- }
- for (i = 0; i < drvdata->numvmidc; i++)
state->trcvmidcvr[i] = etm4x_read64(csa, TRCVMIDCVRn(i));
- for (i = 0; i < drvdata->numvmidc; i++) {
if (drvdata->no_quad_mmio)
state->trcvmidcvr[i] = etm4x_split_read64(csa, TRCVMIDCVRn(i));
else
state->trcvmidcvr[i] = etm4x_read64(csa, TRCVMIDCVRn(i));
- }
state->trccidcctlr0 = etm4x_read32(csa, TRCCIDCCTLR0); if (drvdata->numcidc > 4) @@ -1799,15 +1825,28 @@ static void __etm4_cpu_restore(struct etmv4_drvdata *drvdata) } for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
etm4x_relaxed_write64(csa, state->trcacvr[i], TRCACVRn(i));
etm4x_relaxed_write64(csa, state->trcacatr[i], TRCACATRn(i));
if (drvdata->no_quad_mmio) {
etm4x_split_write64(csa, state->trcacvr[i], TRCACVRn(i));
etm4x_split_write64(csa, state->trcacatr[i], TRCACATRn(i));
} else {
etm4x_relaxed_write64(csa, state->trcacvr[i], TRCACVRn(i));
etm4x_relaxed_write64(csa, state->trcacatr[i], TRCACATRn(i));
}}
- for (i = 0; i < drvdata->numcidc; i++)
etm4x_relaxed_write64(csa, state->trccidcvr[i], TRCCIDCVRn(i));
- for (i = 0; i < drvdata->numcidc; i++) {
if (drvdata->no_quad_mmio)
etm4x_split_write64(csa, state->trccidcvr[i], TRCCIDCVRn(i));
else
etm4x_relaxed_write64(csa, state->trccidcvr[i], TRCCIDCVRn(i));
- }
- for (i = 0; i < drvdata->numvmidc; i++)
etm4x_relaxed_write64(csa, state->trcvmidcvr[i], TRCVMIDCVRn(i));
- for (i = 0; i < drvdata->numvmidc; i++) {
if (drvdata->no_quad_mmio)
etm4x_split_write64(csa, state->trcvmidcvr[i], TRCVMIDCVRn(i));
else
etm4x_relaxed_write64(csa, state->trcvmidcvr[i], TRCVMIDCVRn(i));
- }
etm4x_relaxed_write32(csa, state->trccidcctlr0, TRCCIDCCTLR0); if (drvdata->numcidc > 4) @@ -2047,8 +2086,10 @@ static int etm4_probe(struct device *dev, void __iomem *base, u32 etm_pid) * isolates the manufacturer JEP106 ID in the PID. * TRCPIDR2 (JEDC|DES_1) << 16 | TRCPIDR1 (DES_0) << 8) */
- if ((init_arg.pid & 0x000FF000) == 0x00096000)
- if ((init_arg.pid & 0x000FF000) == 0x00096000) { drvdata->mmio_external = true;
drvdata->no_quad_mmio = true;
- }
/* * Serialize against CPUHP callbacks to avoid race condition diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h index cf4f9f2e1807..0650bcdff410 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.h +++ b/drivers/hwtracing/coresight/coresight-etm4x.h @@ -1016,6 +1016,7 @@ struct etmv4_save_state {
the trace unit.
- @arch_features: Bitmap of arch features of etmv4 devices.
- @mmio_external: True if ETM considers MMIO an external access.
*/ struct etmv4_drvdata { void __iomem *base;
- @no_quad_mmio: True if ETM does not support 64-bit (quad) access.
@@ -1069,6 +1070,7 @@ struct etmv4_drvdata { bool skip_power_up; DECLARE_BITMAP(arch_features, ETM4_IMPDEF_FEATURE_MAX); bool mmio_external;
- bool no_quad_mmio; };
/* Address comparator access types */ @@ -1093,6 +1095,36 @@ void etm4_config_trace_mode(struct etmv4_config *config); u64 etm4x_sysreg_read(u32 offset, bool _relaxed, bool _64bit); void etm4x_sysreg_write(u64 val, u32 offset, bool _relaxed, bool _64bit); +/* 64-bit aligned to convert 64-bit access to 2 ea. 32-bit access */ +#pragma pack(push, 8)
+struct etm_quad_split {
- u32 lsw;
- u32 msw;
+};
+#pragma pack(pop)
+static inline u64 etm4x_split_read64(struct csdev_access *csa, unsigned int offset) +{
- struct etm_quad_split container;
- container.lsw = etm4x_read32(csa, offset);
- container.msw = etm4x_read32(csa, offset + sizeof(u32));
- return *(u64 *) &container;
Wouldn't this break with the "endianness" flip ? (Not that we have BE implementations). Could we not combine the two values to a 64bit value and pass that instead ?
Similarly below.
Suzuki
+}
+static inline void etm4x_split_write64(struct csdev_access *csa, u64 quad, unsigned int offset) +{
- struct etm_quad_split container;
- *(u64 *) &container = quad;
- etm4x_relaxed_write32(csa, container.lsw, offset);
- etm4x_relaxed_write32(csa, container.msw, offset + sizeof(u32));
+}
- static inline bool etm4x_is_ete(struct etmv4_drvdata *drvdata) { return drvdata->arch >= ETM_ARCH_ETE;
On 20/01/2023 11:19, Suzuki K Poulose wrote:
On 20/01/2023 00:51, Steve Clevenger wrote:
[...]
} - for (i = 0; i < drvdata->nr_addr_cmp; i++) { - etm4x_relaxed_write64(csa, config->addr_val[i], TRCACVRn(i)); - etm4x_relaxed_write64(csa, config->addr_acc[i], TRCACATRn(i)); + for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) { + if (drvdata->no_quad_mmio) { + etm4x_split_write64(csa, config->addr_val[i], TRCACVRn(i)); + etm4x_split_write64(csa, config->addr_acc[i], TRCACATRn(i)); + } else { + etm4x_relaxed_write64(csa, config->addr_val[i], TRCACVRn(i)); + etm4x_relaxed_write64(csa, config->addr_acc[i], TRCACATRn(i)); + } + }
Something like this can be leave some places out. I think we could hide it under the generic helpers and handle it there. May be "struct csdev_access" can cache this "no_quad_mmio" and do the right thing ?
+1 for this, or just pass drvdata to etm4x_relaxed_write64() and then it can decide what to do. I'd prefer that to caching the value in csdev_access because it would just be a copy of some other value and might go stale or not be set at some point.
James