This series adds CPU erratum work arounds related to the self-hosted tracing. The list of affected errata handled in this series are :
* TRBE may overwrite trace in FILL mode - Arm Neoverse-N2 #2139208 - Cortex-A710 #2119858
* A TSB instruction may not flush the trace completely when executed in trace prohibited region.
- Arm Neoverse-N2 #2067961 - Cortex-A710 #2054223
The series applies on the self-hosted/trbe fixes posted here [0]. A tree containing both the series is available here [1].
[0] https://lkml.kernel.org/r/20210723124456.3828769-1-suzuki.poulose@arm.com [1] git@git.gitlab.arm.com:linux-arm/linux-skp.git coresight/errata/trbe-tsb-n2-a710/v1
Suzuki K Poulose (10): coresight: trbe: Add infrastructure for Errata handling coresight: trbe: Add a helper to calculate the trace generated coresight: trbe: Add a helper to pad a given buffer area coresight: trbe: Decouple buffer base from the hardware base coresight: trbe: Allow driver to choose a different alignment arm64: Add Neoverse-N2, Cortex-A710 CPU part definition arm64: Add erratum detection for TRBE overwrite in FILL mode coresight: trbe: Workaround TRBE errat overwrite in FILL mode arm64: Enable workaround for TRBE overwrite in FILL mode arm64: errata: Add workaround for TSB flush failures
Documentation/arm64/silicon-errata.rst | 8 + arch/arm64/Kconfig | 70 ++++++ arch/arm64/include/asm/barrier.h | 17 +- arch/arm64/include/asm/cputype.h | 4 + arch/arm64/kernel/cpu_errata.c | 44 ++++ arch/arm64/tools/cpucaps | 2 + drivers/hwtracing/coresight/coresight-trbe.c | 227 ++++++++++++++++--- 7 files changed, 341 insertions(+), 31 deletions(-)
Add a minimal infrastructure to keep track of the errata affecting the given TRBE instance. Given that we have heterogeneous CPUs, we have to manage the list per-TRBE instance to be able to apply the work around as needed.
We rely on the arm64 errata framework for the actual description and the discovery of a given erratum, to keep the Erratum work around at a central place and benefit from the code and the advertisement from the kernel. We use a local mapping of the erratum to avoid bloating up the individual TRBE structures. i.e, each arm64 TRBE erratum bit is assigned a new number within the driver to track. Each trbe instance updates the list of affected erratum at probe time on the CPU. This makes sure that we can easily access the list of errata on a given TRBE instance without much overhead.
Cc: Mathieu Poirier mathieu.poirier@linaro.org Cc: Mike Leach mike.leach@linaro.org Cc: Leo Yan leo.yan@linaro.org Cc: Anshuman Khandual anshuman.khandual@arm.com Signed-off-by: Suzuki K Poulose suzuki.poulose@arm.com --- drivers/hwtracing/coresight/coresight-trbe.c | 48 ++++++++++++++++++++ 1 file changed, 48 insertions(+)
diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c index b8586c170889..0368bf405e35 100644 --- a/drivers/hwtracing/coresight/coresight-trbe.c +++ b/drivers/hwtracing/coresight/coresight-trbe.c @@ -16,6 +16,8 @@ #define pr_fmt(fmt) DRVNAME ": " fmt
#include <asm/barrier.h> +#include <asm/cputype.h> + #include "coresight-self-hosted-trace.h" #include "coresight-trbe.h"
@@ -65,6 +67,35 @@ struct trbe_buf { struct trbe_cpudata *cpudata; };
+/* + * TRBE erratum list + * + * We rely on the corresponding cpucaps to be defined for a given + * TRBE erratum. We map the given cpucap into a TRBE internal number + * to make the tracking of the errata lean. + * + * This helps in : + * - Not duplicating the detection logic + * - Streamlined detection of erratum across the system + * + * Since the erratum work arounds could be applied individually + * per TRBE instance, we keep track of the list of errata that + * affects the given instance of the TRBE. + */ +#define TRBE_ERRATA_MAX 0 + +static unsigned long trbe_errata_cpucaps[TRBE_ERRATA_MAX] = { +}; + +/* + * struct trbe_cpudata: TRBE instance specific data + * @trbe_flag - TRBE dirty/access flag support + * @tbre_align - Actual TRBE alignment required for TRBPTR_EL1. + * @cpu - CPU this TRBE belongs to. + * @mode - Mode of current operation. (perf/disabled) + * @drvdata - TRBE specific drvdata + * @errata - Bit map for the errata on this TRBE. + */ struct trbe_cpudata { bool trbe_flag; u64 trbe_align; @@ -72,6 +103,7 @@ struct trbe_cpudata { enum cs_mode mode; struct trbe_buf *buf; struct trbe_drvdata *drvdata; + DECLARE_BITMAP(errata, TRBE_ERRATA_MAX); };
struct trbe_drvdata { @@ -84,6 +116,21 @@ struct trbe_drvdata { struct platform_device *pdev; };
+static void trbe_check_errata(struct trbe_cpudata *cpudata) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(trbe_errata_cpucaps); i++) { + if (this_cpu_has_cap(trbe_errata_cpucaps[i])) + set_bit(i, cpudata->errata); + } +} + +static inline bool trbe_has_erratum(int i, struct trbe_cpudata *cpudata) +{ + return (i < TRBE_ERRATA_MAX) && test_bit(i, cpudata->errata); +} + static int trbe_alloc_node(struct perf_event *event) { if (event->cpu == -1) @@ -925,6 +972,7 @@ static void arm_trbe_probe_cpu(void *info) goto cpu_clear; }
+ trbe_check_errata(cpudata); cpudata->trbe_align = 1ULL << get_trbe_address_align(trbidr); if (cpudata->trbe_align > SZ_2K) { pr_err("Unsupported alignment on cpu %d\n", cpu);
We collect the trace from the TRBE on FILL event from IRQ context and when via update_buffer(), when the event is stopped. Let us consolidate how we calculate the trace generated into a helper.
Signed-off-by: Suzuki K Poulose suzuki.poulose@arm.com --- drivers/hwtracing/coresight/coresight-trbe.c | 48 ++++++++++++-------- 1 file changed, 30 insertions(+), 18 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c index 0368bf405e35..a0168ad204b3 100644 --- a/drivers/hwtracing/coresight/coresight-trbe.c +++ b/drivers/hwtracing/coresight/coresight-trbe.c @@ -528,6 +528,30 @@ static enum trbe_fault_action trbe_get_fault_act(u64 trbsr) return TRBE_FAULT_ACT_SPURIOUS; }
+static unsigned long trbe_get_trace_size(struct perf_output_handle *handle, + struct trbe_buf *buf, + bool wrap) +{ + u64 write; + u64 start_off, end_off; + + /* + * If the TRBE has wrapped around the write pointer has + * wrapped and should be treated as limit. + */ + if (wrap) + write = get_trbe_limit_pointer(); + else + write = get_trbe_write_pointer(); + + end_off = write - buf->trbe_base; + start_off = PERF_IDX2OFF(handle->head, buf); + + if (WARN_ON_ONCE(end_off < start_off)) + return 0; + return (end_off - start_off); +} + static void *arm_trbe_alloc_buffer(struct coresight_device *csdev, struct perf_event *event, void **pages, int nr_pages, bool snapshot) @@ -589,9 +613,9 @@ static unsigned long arm_trbe_update_buffer(struct coresight_device *csdev, struct trbe_cpudata *cpudata = dev_get_drvdata(&csdev->dev); struct trbe_buf *buf = config; enum trbe_fault_action act; - unsigned long size, offset; - unsigned long write, base, status; + unsigned long size, status; unsigned long flags; + bool wrap = false;
WARN_ON(buf->cpudata != cpudata); WARN_ON(cpudata->cpu != smp_processor_id()); @@ -633,8 +657,6 @@ static unsigned long arm_trbe_update_buffer(struct coresight_device *csdev, * handle gets freed in etm_event_stop(). */ trbe_drain_and_disable_local(); - write = get_trbe_write_pointer(); - base = get_trbe_base_pointer();
/* Check if there is a pending interrupt and handle it here */ status = read_sysreg_s(SYS_TRBSR_EL1); @@ -658,20 +680,11 @@ static unsigned long arm_trbe_update_buffer(struct coresight_device *csdev, goto done; }
- /* - * Otherwise, the buffer is full and the write pointer - * has reached base. Adjust this back to the Limit pointer - * for correct size. Also, mark the buffer truncated. - */ - write = get_trbe_limit_pointer(); perf_aux_output_flag(handle, PERF_AUX_FLAG_COLLISION); + wrap = true; }
- offset = write - base; - if (WARN_ON_ONCE(offset < PERF_IDX2OFF(handle->head, buf))) - size = 0; - else - size = offset - PERF_IDX2OFF(handle->head, buf); + size = trbe_get_trace_size(handle, buf, wrap);
done: local_irq_restore(flags); @@ -752,11 +765,10 @@ static int trbe_handle_overflow(struct perf_output_handle *handle) { struct perf_event *event = handle->event; struct trbe_buf *buf = etm_perf_sink_config(handle); - unsigned long offset, size; + unsigned long size; struct etm_event_data *event_data;
- offset = get_trbe_limit_pointer() - get_trbe_base_pointer(); - size = offset - PERF_IDX2OFF(handle->head, buf); + size = trbe_get_trace_size(handle, buf, true); if (buf->snapshot) handle->head += size;
Refactor the helper to pad a given AUX buffer area to allow "filling" ignore packets, without moving any handle pointers. This will be useful in working around errata, where we may have to fill the buffer after a session.
Signed-off-by: Suzuki K Poulose suzuki.poulose@arm.com --- drivers/hwtracing/coresight/coresight-trbe.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c index a0168ad204b3..0af644331b99 100644 --- a/drivers/hwtracing/coresight/coresight-trbe.c +++ b/drivers/hwtracing/coresight/coresight-trbe.c @@ -228,12 +228,18 @@ static void trbe_stop_and_truncate_event(struct perf_output_handle *handle) * consumed from the user space. The enabled TRBE buffer area is a moving subset of * the allocated perf auxiliary buffer. */ + +static void __trbe_pad_buf(struct trbe_buf *buf, u64 offset, int len) +{ + memset((void *)buf->trbe_base + offset, ETE_IGNORE_PACKET, len); +} + static void trbe_pad_buf(struct perf_output_handle *handle, int len) { struct trbe_buf *buf = etm_perf_sink_config(handle); u64 head = PERF_IDX2OFF(handle->head, buf);
- memset((void *)buf->trbe_base + head, ETE_IGNORE_PACKET, len); + __trbe_pad_buf(buf, head, len); if (!buf->snapshot) perf_aux_output_skip(handle, len); }
We always set the TRBBASER_EL1 to the base of the virtual ring buffer. We are about to change this for working around an erratum. So, in preparation to that, allow the driver to choose a different base for the TRBBASER_EL1 (which is within the buffer range).
Cc: Anshuman Khandual anshuman.khandual@arm.com Cc: Mike Leach mike.leach@linaro.org Cc: Mathieu Poirier mathieu.poirier@linaro.org Cc: Leo Yan leo.yan@linaro.org Signed-off-by: Suzuki K Poulose suzuki.poulose@arm.com --- drivers/hwtracing/coresight/coresight-trbe.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c index 0af644331b99..9735d514c5e1 100644 --- a/drivers/hwtracing/coresight/coresight-trbe.c +++ b/drivers/hwtracing/coresight/coresight-trbe.c @@ -59,6 +59,8 @@ struct trbe_buf { * trbe_limit sibling pointers. */ unsigned long trbe_base; + /* The base programmed into the TRBE */ + unsigned long trbe_hw_base; unsigned long trbe_limit; unsigned long trbe_write; int nr_pages; @@ -504,7 +506,7 @@ static void trbe_enable_hw(struct trbe_buf *buf) set_trbe_disabled(); isb(); clr_trbe_status(); - set_trbe_base_pointer(buf->trbe_base); + set_trbe_base_pointer(buf->trbe_hw_base); set_trbe_write_pointer(buf->trbe_write);
/* @@ -709,6 +711,8 @@ static int __arm_trbe_enable(struct trbe_buf *buf, trbe_stop_and_truncate_event(handle); return -ENOSPC; } + /* Set the base of the TRBE to the buffer base */ + buf->trbe_hw_base = buf->trbe_base; *this_cpu_ptr(buf->cpudata->drvdata->handle) = handle; trbe_enable_hw(buf); return 0; @@ -808,7 +812,7 @@ static bool is_perf_trbe(struct perf_output_handle *handle) struct trbe_drvdata *drvdata = cpudata->drvdata; int cpu = smp_processor_id();
- WARN_ON(buf->trbe_base != get_trbe_base_pointer()); + WARN_ON(buf->trbe_hw_base != get_trbe_base_pointer()); WARN_ON(buf->trbe_limit != get_trbe_limit_pointer());
if (cpudata->mode != CS_MODE_PERF)
The TRBE hardware mandates a minimum alignment for the TRBPTR_EL1, advertised via the TRBIDR_EL1. This is used by the driver to align the buffer write head. This patch allows the driver to choose a different alignment from that of the hardware, by decoupling the alignment tracking. This will be useful for working around errata.
Cc: Mathieu Poirier mathieu.poirier@linaro.org Cc: Anshuman Khandual anshuman.khandual@arm.com Cc: Mike Leach mike.leach@linaro.org Cc: Leo Yan leo.yan@linaro.org Signed-off-by: Suzuki K Poulose suzuki.poulose@arm.com --- drivers/hwtracing/coresight/coresight-trbe.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c index 9735d514c5e1..9ea28813182b 100644 --- a/drivers/hwtracing/coresight/coresight-trbe.c +++ b/drivers/hwtracing/coresight/coresight-trbe.c @@ -92,7 +92,8 @@ static unsigned long trbe_errata_cpucaps[TRBE_ERRATA_MAX] = { /* * struct trbe_cpudata: TRBE instance specific data * @trbe_flag - TRBE dirty/access flag support - * @tbre_align - Actual TRBE alignment required for TRBPTR_EL1. + * @trbe_hw_align - Actual TRBE alignment required for TRBPTR_EL1. + * @trbe_align - Software alignment used for the TRBPTR_EL1, * @cpu - CPU this TRBE belongs to. * @mode - Mode of current operation. (perf/disabled) * @drvdata - TRBE specific drvdata @@ -100,6 +101,7 @@ static unsigned long trbe_errata_cpucaps[TRBE_ERRATA_MAX] = { */ struct trbe_cpudata { bool trbe_flag; + u64 trbe_hw_align; u64 trbe_align; int cpu; enum cs_mode mode; @@ -906,7 +908,7 @@ static ssize_t align_show(struct device *dev, struct device_attribute *attr, cha { struct trbe_cpudata *cpudata = dev_get_drvdata(dev);
- return sprintf(buf, "%llx\n", cpudata->trbe_align); + return sprintf(buf, "%llx\n", cpudata->trbe_hw_align); } static DEVICE_ATTR_RO(align);
@@ -995,11 +997,13 @@ static void arm_trbe_probe_cpu(void *info) }
trbe_check_errata(cpudata); - cpudata->trbe_align = 1ULL << get_trbe_address_align(trbidr); - if (cpudata->trbe_align > SZ_2K) { + + cpudata->trbe_hw_align = 1ULL << get_trbe_address_align(trbidr); + if (cpudata->trbe_hw_align > SZ_2K) { pr_err("Unsupported alignment on cpu %d\n", cpu); goto cpu_clear; } + cpudata->trbe_align = cpudata->trbe_hw_align; cpudata->trbe_flag = get_trbe_flag_update(trbidr); cpudata->cpu = cpu; cpudata->drvdata = drvdata;
Add the CPU Partnumbers for the new Arm designs.
Cc: Catalin Marinas catalin.marinas@arm.com Cc: Mark Rutland mark.rutland@arm.com Cc: Will Deacon will@kernel.org Signed-off-by: Suzuki K Poulose suzuki.poulose@arm.com --- arch/arm64/include/asm/cputype.h | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 6231e1f0abe7..b71bd6c176c2 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -73,6 +73,8 @@ #define ARM_CPU_PART_CORTEX_A76 0xD0B #define ARM_CPU_PART_NEOVERSE_N1 0xD0C #define ARM_CPU_PART_CORTEX_A77 0xD0D +#define ARM_CPU_PART_CORTEX_A710 0xD47 +#define ARM_CPU_PART_NEOVERSE_N2 0xD49
#define APM_CPU_PART_POTENZA 0x000
@@ -112,6 +114,8 @@ #define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55) #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) +#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) +#define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
Arm Neoverse-N2 and the Cortex-A710 cores are affected by a CPU erratum where the TRBE will overwrite the trace buffer in FILL mode. The TRBE doesn't stop (as expected in FILL mode) when it reaches the limit and wraps to the base to continue writing upto 3 cache lines. This will overwrite any trace that was written previously.
Add the Neoverse-N2 erratumi(#2139208) and Cortex-A710 erratum (#2119858) to the detection logic.
This will be used by the TRBE driver in later patches to work around the issue. The detection has been kept with the core arm64 errata framework list to make sure : - We don't duplicate the framework in TRBE driver - The errata detection is advertised like the rest of the CPU errata.
Note that the Kconfig entries will be added after we have added the work around in the TRBE driver, which depends on the cpucap from here.
Cc: Will Deacon will@kernel.org Cc: Mark Rutland mark.rutland@arm.com Cc: Anshuman Khandual anshuman.khandual@arm.com Cc: Catalin Marinas catalin.marinas@arm.com Cc: Mathieu Poirier mathieu.poirier@linaro.org Cc: Mike Leach mike.leach@linaro.org cc: Leo Yan leo.yan@linaro.org Signed-off-by: Suzuki K Poulose suzuki.poulose@arm.com --- arch/arm64/kernel/cpu_errata.c | 25 +++++++++++++++++++++++++ arch/arm64/tools/cpucaps | 1 + 2 files changed, 26 insertions(+)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index e2c20c036442..ccd757373f36 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -340,6 +340,18 @@ static const struct midr_range erratum_1463225[] = { }; #endif
+#ifdef CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE +static const struct midr_range trbe_overwrite_fill_mode_cpus[] = { +#ifdef CONFIG_ARM64_ERRATUM_2139208 + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), +#endif +#ifdef CONFIG_ARM64_ERRATUM_2119858 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), +#endif + {}, +}; +#endif /* CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE */ + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -533,6 +545,19 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .capability = ARM64_WORKAROUND_NVIDIA_CARMEL_CNP, ERRATA_MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL), }, +#endif +#ifdef CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE + { + /* + * The erratum work around is handled within the TRBE + * driver and can be applied per-cpu. So, we can allow + * a late CPU to come online with this erratum. + */ + .desc = "ARM erratum 2119858 or 2139208", + .capability = ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE, + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, + CAP_MIDR_RANGE_LIST(trbe_overwrite_fill_mode_cpus), + }, #endif { } diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 49305c2e6dfd..1ccb92165bd8 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -53,6 +53,7 @@ WORKAROUND_1418040 WORKAROUND_1463225 WORKAROUND_1508412 WORKAROUND_1542419 +WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_CAVIUM_23154 WORKAROUND_CAVIUM_27456 WORKAROUND_CAVIUM_30115
Hi Suzuki,
On Wed, Jul 28, 2021 at 7:23 PM Suzuki K Poulose suzuki.poulose@arm.com wrote:
Arm Neoverse-N2 and the Cortex-A710 cores are affected by a CPU erratum where the TRBE will overwrite the trace buffer in FILL mode. The TRBE doesn't stop (as expected in FILL mode) when it reaches the limit and wraps to the base to continue writing upto 3 cache lines. This will overwrite any trace that was written previously.
Add the Neoverse-N2 erratumi(#2139208) and Cortex-A710 erratum (#2119858) to the detection logic.
This will be used by the TRBE driver in later patches to work around the issue. The detection has been kept with the core arm64 errata framework list to make sure :
- We don't duplicate the framework in TRBE driver
- The errata detection is advertised like the rest of the CPU errata.
Note that the Kconfig entries will be added after we have added the work around in the TRBE driver, which depends on the cpucap from here.
Cc: Will Deacon will@kernel.org Cc: Mark Rutland mark.rutland@arm.com Cc: Anshuman Khandual anshuman.khandual@arm.com Cc: Catalin Marinas catalin.marinas@arm.com Cc: Mathieu Poirier mathieu.poirier@linaro.org Cc: Mike Leach mike.leach@linaro.org cc: Leo Yan leo.yan@linaro.org Signed-off-by: Suzuki K Poulose suzuki.poulose@arm.com
arch/arm64/kernel/cpu_errata.c | 25 +++++++++++++++++++++++++ arch/arm64/tools/cpucaps | 1 + 2 files changed, 26 insertions(+)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index e2c20c036442..ccd757373f36 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -340,6 +340,18 @@ static const struct midr_range erratum_1463225[] = { }; #endif
+#ifdef CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE +static const struct midr_range trbe_overwrite_fill_mode_cpus[] = { +#ifdef CONFIG_ARM64_ERRATUM_2139208
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+#endif +#ifdef CONFIG_ARM64_ERRATUM_2119858
MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
+#endif
{},
+}; +#endif /* CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE */
const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -533,6 +545,19 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .capability = ARM64_WORKAROUND_NVIDIA_CARMEL_CNP, ERRATA_MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL), }, +#endif +#ifdef CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE
{
/*
* The erratum work around is handled within the TRBE
* driver and can be applied per-cpu. So, we can allow
* a late CPU to come online with this erratum.
*/
.desc = "ARM erratum 2119858 or 2139208",
.capability = ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE,
.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
CAP_MIDR_RANGE_LIST(trbe_overwrite_fill_mode_cpus),
},
#endif { } diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 49305c2e6dfd..1ccb92165bd8 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -53,6 +53,7 @@ WORKAROUND_1418040 WORKAROUND_1463225 WORKAROUND_1508412 WORKAROUND_1542419 +WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_CAVIUM_23154 WORKAROUND_CAVIUM_27456 WORKAROUND_CAVIUM_30115
We need to keep this list sorted ?
-- 2.24.1
CoreSight mailing list CoreSight@lists.linaro.org https://lists.linaro.org/mailman/listinfo/coresight
On 06/08/2021 13:44, Linu Cherian wrote:
Hi Suzuki,
On Wed, Jul 28, 2021 at 7:23 PM Suzuki K Poulose suzuki.poulose@arm.com wrote:
Arm Neoverse-N2 and the Cortex-A710 cores are affected by a CPU erratum where the TRBE will overwrite the trace buffer in FILL mode. The TRBE doesn't stop (as expected in FILL mode) when it reaches the limit and wraps to the base to continue writing upto 3 cache lines. This will overwrite any trace that was written previously.
Add the Neoverse-N2 erratumi(#2139208) and Cortex-A710 erratum (#2119858) to the detection logic.
This will be used by the TRBE driver in later patches to work around the issue. The detection has been kept with the core arm64 errata framework list to make sure :
- We don't duplicate the framework in TRBE driver
- The errata detection is advertised like the rest of the CPU errata.
Note that the Kconfig entries will be added after we have added the work around in the TRBE driver, which depends on the cpucap from here.
Cc: Will Deacon will@kernel.org Cc: Mark Rutland mark.rutland@arm.com Cc: Anshuman Khandual anshuman.khandual@arm.com Cc: Catalin Marinas catalin.marinas@arm.com Cc: Mathieu Poirier mathieu.poirier@linaro.org Cc: Mike Leach mike.leach@linaro.org cc: Leo Yan leo.yan@linaro.org Signed-off-by: Suzuki K Poulose suzuki.poulose@arm.com
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 49305c2e6dfd..1ccb92165bd8 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -53,6 +53,7 @@ WORKAROUND_1418040 WORKAROUND_1463225 WORKAROUND_1508412 WORKAROUND_1542419 +WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_CAVIUM_23154 WORKAROUND_CAVIUM_27456 WORKAROUND_CAVIUM_30115
We need to keep this list sorted ?
Not necessary, anymore. For a given kernel the numbers are autogenerated by the script.
See arch/arm64/tools/gen-cpucaps.awk
Suzuki
ARM Neoverse-N2 (#2139208) and Cortex-A710(##2119858) suffers from an erratum, which when triggered, might cause the TRBE to overwrite the trace data already collected in FILL mode, in the event of a WRAP. i.e, the TRBE doesn't stop writing the data, instead wraps to the base and could write upto 3 cache line size worth trace. Thus, this could corrupt the trace at the "BASE" pointer.
The workaround is to program the write pointer 256bytes from the base, such that if the erratum is triggered, it doesn't overwrite the trace data that was captured. This skipped region could be padded with ignore packets at the end of the session, so that the decoder sees a continuous buffer with some padding at the beginning. The trace data written at the base is considered lost as the limit could have been in the middle of the perf ring buffer, and jumping to the "base" is not acceptable. We set the flags already to indicate that some amount of trace was lost during the FILL event IRQ. So this is fine.
One important change with the work around is, we program the TRBBASER_EL1 to current page where we are allowed to write. Otherwise, it could overwrite a region that may be consumed by the perf. Towards this, we always make sure that the "handle->head" and thus the trbe_write is PAGE_SIZE aligned, so that we can set the BASE to the PAGE base and move the TRBPTR to the 256bytes offset.
Cc: Mike Leach mike.leach@linaro.org Cc: Mathieu Poirier mathieu.poirier@linaro.org Cc: Anshuman Khandual anshuman.khandual@arm.com Cc: Leo Yan leo.yan@linaro.org Signed-off-by: Suzuki K Poulose suzuki.poulose@arm.com --- drivers/hwtracing/coresight/coresight-trbe.c | 111 +++++++++++++++++-- 1 file changed, 102 insertions(+), 9 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c index 9ea28813182b..cd997ed5d918 100644 --- a/drivers/hwtracing/coresight/coresight-trbe.c +++ b/drivers/hwtracing/coresight/coresight-trbe.c @@ -17,6 +17,7 @@
#include <asm/barrier.h> #include <asm/cputype.h> +#include <asm/cpufeature.h>
#include "coresight-self-hosted-trace.h" #include "coresight-trbe.h" @@ -84,9 +85,17 @@ struct trbe_buf { * per TRBE instance, we keep track of the list of errata that * affects the given instance of the TRBE. */ -#define TRBE_ERRATA_MAX 0 +#define TRBE_WORKAROUND_OVERWRITE_FILL_MODE 0 +#define TRBE_ERRATA_MAX 1 + +/* + * Safe limit for the number of bytes that may be overwritten + * when the erratum is triggered. + */ +#define TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP 256
static unsigned long trbe_errata_cpucaps[TRBE_ERRATA_MAX] = { + [TRBE_WORKAROUND_OVERWRITE_FILL_MODE] = ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE, };
/* @@ -531,10 +540,13 @@ static enum trbe_fault_action trbe_get_fault_act(u64 trbsr) if ((ec == TRBE_EC_STAGE1_ABORT) || (ec == TRBE_EC_STAGE2_ABORT)) return TRBE_FAULT_ACT_FATAL;
- if (is_trbe_wrap(trbsr) && (ec == TRBE_EC_OTHERS) && (bsc == TRBE_BSC_FILLED)) { - if (get_trbe_write_pointer() == get_trbe_base_pointer()) - return TRBE_FAULT_ACT_WRAP; - } + /* + * It is not necessary to verify the TRBPTR == TRBBASER to detect + * a FILL event. Moreover, CPU errata could make this check invalid. + */ + if (is_trbe_wrap(trbsr) && (ec == TRBE_EC_OTHERS) && (bsc == TRBE_BSC_FILLED)) + return TRBE_FAULT_ACT_WRAP; + return TRBE_FAULT_ACT_SPURIOUS; }
@@ -544,6 +556,7 @@ static unsigned long trbe_get_trace_size(struct perf_output_handle *handle, { u64 write; u64 start_off, end_off; + u64 size;
/* * If the TRBE has wrapped around the write pointer has @@ -559,7 +572,18 @@ static unsigned long trbe_get_trace_size(struct perf_output_handle *handle,
if (WARN_ON_ONCE(end_off < start_off)) return 0; - return (end_off - start_off); + + size = end_off - start_off; + /* + * If the TRBE is affected by the following erratum, we must fill + * the space we skipped with IGNORE packets. And we are always + * guaranteed to have at least a PAGE_SIZE space in the buffer. + */ + if (trbe_has_erratum(TRBE_WORKAROUND_OVERWRITE_FILL_MODE, buf->cpudata) && + !WARN_ON(size < TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP)) + __trbe_pad_buf(buf, start_off, TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP); + + return size; }
static void *arm_trbe_alloc_buffer(struct coresight_device *csdev, @@ -704,20 +728,73 @@ static unsigned long arm_trbe_update_buffer(struct coresight_device *csdev, return size; }
+ +static int trbe_apply_work_around_before_enable(struct trbe_buf *buf) +{ + /* + * TRBE_WORKAROUND_OVERWRITE_FILL_MODE causes the TRBE to overwrite a few cache + * line size from the "TRBBASER_EL1" in the event of a "FILL". + * Thus, we could loose some amount of the trace at the base. + * + * To work around this: + * - Software must leave 256bytes of space from the base, so that + * the trace collected now is not overwritten. + * - Fill the first 256bytes with IGNORE packets for the decoder + * to ignore at the end of the session, so that the decoder ignores + * this gap. + * + * This also means that, the TRBE driver must set the TRBBASER_EL1 + * such that, when the erratum is triggered, it doesn't overwrite + * the "area" outside the area marked by (handle->head, +size). + * So, we make sure that the handle->head is always PAGE aligned, + * by tweaking the required alignment for the TRBE (trbe_align). + * And when we enable the TRBE, + * + * - move the TRBPTR_EL1 to 256bytes past the starting point. + * So that any trace collected in this run is not overwritten. + * + * - set the TRBBASER_EL1 to the original trbe_write. This will + * ensure that, if the TRBE hits the erratum, it would only + * write within the region allowed for the TRBE. + * + * At the trace collection time, we always pad the skipped bytes + * with IGNORE packets to make sure the decoder doesn't see any + * overwritten packets. + */ + if (trbe_has_erratum(TRBE_WORKAROUND_OVERWRITE_FILL_MODE, buf->cpudata)) { + if (WARN_ON(!IS_ALIGNED(buf->trbe_write, PAGE_SIZE))) + return -EINVAL; + buf->trbe_hw_base = buf->trbe_write; + buf->trbe_write += TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP; + } + + return 0; +} + static int __arm_trbe_enable(struct trbe_buf *buf, struct perf_output_handle *handle) { + int ret = 0; + buf->trbe_limit = compute_trbe_buffer_limit(handle); buf->trbe_write = buf->trbe_base + PERF_IDX2OFF(handle->head, buf); if (buf->trbe_limit == buf->trbe_base) { - trbe_stop_and_truncate_event(handle); - return -ENOSPC; + ret = -ENOSPC; + goto err; } /* Set the base of the TRBE to the buffer base */ buf->trbe_hw_base = buf->trbe_base; + + ret = trbe_apply_work_around_before_enable(buf); + if (ret) + goto err; + *this_cpu_ptr(buf->cpudata->drvdata->handle) = handle; trbe_enable_hw(buf); return 0; +err: + trbe_stop_and_truncate_event(handle); + return ret; }
static int arm_trbe_enable(struct coresight_device *csdev, u32 mode, void *data) @@ -1003,7 +1080,23 @@ static void arm_trbe_probe_cpu(void *info) pr_err("Unsupported alignment on cpu %d\n", cpu); goto cpu_clear; } - cpudata->trbe_align = cpudata->trbe_hw_align; + + /* + * If the TRBE is affected by erratum TRBE_WORKAROUND_OVERWRITE_FILL_MODE, + * we must always program the TBRPTR_EL1, 256bytes from a page + * boundary, with TRBBASER_EL1 set to the page, to prevent + * TRBE over-writing 256bytes at TRBBASER_EL1 on FILL event. + * + * Thus make sure we always align our write pointer to a PAGE_SIZE, + * which also guarantees that we have at least a PAGE_SIZE space in + * the buffer (TRBLIMITR is PAGE aligned) and thus we can skip + * the required bytes at the base. + */ + if (trbe_has_erratum(TRBE_WORKAROUND_OVERWRITE_FILL_MODE, cpudata)) + cpudata->trbe_align = PAGE_SIZE; + else + cpudata->trbe_align = cpudata->trbe_hw_align; + cpudata->trbe_flag = get_trbe_flag_update(trbidr); cpudata->cpu = cpu; cpudata->drvdata = drvdata;
Hi Suzuki,
On Wed, Jul 28, 2021 at 7:23 PM Suzuki K Poulose suzuki.poulose@arm.com wrote:
ARM Neoverse-N2 (#2139208) and Cortex-A710(##2119858) suffers from an erratum, which when triggered, might cause the TRBE to overwrite the trace data already collected in FILL mode, in the event of a WRAP. i.e, the TRBE doesn't stop writing the data, instead wraps to the base and could write upto 3 cache line size worth trace. Thus, this could corrupt the trace at the "BASE" pointer.
The workaround is to program the write pointer 256bytes from the base, such that if the erratum is triggered, it doesn't overwrite the trace data that was captured. This skipped region could be padded with ignore packets at the end of the session, so that the decoder sees a continuous buffer with some padding at the beginning.
Just trying to understand, Is there a possibility that lost data results in partial trace packets towards the end of the buffer ? Or its always guaranteed that trace packet end is always aligned with buffer end/limit ? Thinking of a case when formatting is disabled.
The trace data written at the base is considered
lost as the limit could have been in the middle of the perf ring buffer, and jumping to the "base" is not acceptable. We set the flags already to indicate that some amount of trace was lost during the FILL event IRQ. So this is fine.
One important change with the work around is, we program the TRBBASER_EL1 to current page where we are allowed to write. Otherwise, it could overwrite a region that may be consumed by the perf. Towards this, we always make sure that the "handle->head" and thus the trbe_write is PAGE_SIZE aligned, so that we can set the BASE to the PAGE base and move the TRBPTR to the 256bytes offset.
Cc: Mike Leach mike.leach@linaro.org Cc: Mathieu Poirier mathieu.poirier@linaro.org Cc: Anshuman Khandual anshuman.khandual@arm.com Cc: Leo Yan leo.yan@linaro.org Signed-off-by: Suzuki K Poulose suzuki.poulose@arm.com
drivers/hwtracing/coresight/coresight-trbe.c | 111 +++++++++++++++++-- 1 file changed, 102 insertions(+), 9 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c index 9ea28813182b..cd997ed5d918 100644 --- a/drivers/hwtracing/coresight/coresight-trbe.c +++ b/drivers/hwtracing/coresight/coresight-trbe.c @@ -17,6 +17,7 @@
#include <asm/barrier.h> #include <asm/cputype.h> +#include <asm/cpufeature.h>
#include "coresight-self-hosted-trace.h" #include "coresight-trbe.h" @@ -84,9 +85,17 @@ struct trbe_buf {
- per TRBE instance, we keep track of the list of errata that
- affects the given instance of the TRBE.
*/ -#define TRBE_ERRATA_MAX 0 +#define TRBE_WORKAROUND_OVERWRITE_FILL_MODE 0 +#define TRBE_ERRATA_MAX 1
+/*
- Safe limit for the number of bytes that may be overwritten
- when the erratum is triggered.
- */
+#define TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP 256
static unsigned long trbe_errata_cpucaps[TRBE_ERRATA_MAX] = {
[TRBE_WORKAROUND_OVERWRITE_FILL_MODE] = ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE,
};
/* @@ -531,10 +540,13 @@ static enum trbe_fault_action trbe_get_fault_act(u64 trbsr) if ((ec == TRBE_EC_STAGE1_ABORT) || (ec == TRBE_EC_STAGE2_ABORT)) return TRBE_FAULT_ACT_FATAL;
if (is_trbe_wrap(trbsr) && (ec == TRBE_EC_OTHERS) && (bsc == TRBE_BSC_FILLED)) {
if (get_trbe_write_pointer() == get_trbe_base_pointer())
return TRBE_FAULT_ACT_WRAP;
}
/*
* It is not necessary to verify the TRBPTR == TRBBASER to detect
* a FILL event. Moreover, CPU errata could make this check invalid.
*/
if (is_trbe_wrap(trbsr) && (ec == TRBE_EC_OTHERS) && (bsc == TRBE_BSC_FILLED))
return TRBE_FAULT_ACT_WRAP;
return TRBE_FAULT_ACT_SPURIOUS;
}
@@ -544,6 +556,7 @@ static unsigned long trbe_get_trace_size(struct perf_output_handle *handle, { u64 write; u64 start_off, end_off;
u64 size; /* * If the TRBE has wrapped around the write pointer has
@@ -559,7 +572,18 @@ static unsigned long trbe_get_trace_size(struct perf_output_handle *handle,
if (WARN_ON_ONCE(end_off < start_off)) return 0;
return (end_off - start_off);
size = end_off - start_off;
/*
* If the TRBE is affected by the following erratum, we must fill
* the space we skipped with IGNORE packets. And we are always
* guaranteed to have at least a PAGE_SIZE space in the buffer.
*/
if (trbe_has_erratum(TRBE_WORKAROUND_OVERWRITE_FILL_MODE, buf->cpudata) &&
!WARN_ON(size < TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP))
__trbe_pad_buf(buf, start_off, TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP);
return size;
}
static void *arm_trbe_alloc_buffer(struct coresight_device *csdev, @@ -704,20 +728,73 @@ static unsigned long arm_trbe_update_buffer(struct coresight_device *csdev, return size; }
+static int trbe_apply_work_around_before_enable(struct trbe_buf *buf) +{
/*
* TRBE_WORKAROUND_OVERWRITE_FILL_MODE causes the TRBE to overwrite a few cache
* line size from the "TRBBASER_EL1" in the event of a "FILL".
* Thus, we could loose some amount of the trace at the base.
*
* To work around this:
* - Software must leave 256bytes of space from the base, so that
* the trace collected now is not overwritten.
* - Fill the first 256bytes with IGNORE packets for the decoder
* to ignore at the end of the session, so that the decoder ignores
* this gap.
*
* This also means that, the TRBE driver must set the TRBBASER_EL1
* such that, when the erratum is triggered, it doesn't overwrite
* the "area" outside the area marked by (handle->head, +size).
* So, we make sure that the handle->head is always PAGE aligned,
* by tweaking the required alignment for the TRBE (trbe_align).
* And when we enable the TRBE,
*
* - move the TRBPTR_EL1 to 256bytes past the starting point.
* So that any trace collected in this run is not overwritten.
*
* - set the TRBBASER_EL1 to the original trbe_write. This will
* ensure that, if the TRBE hits the erratum, it would only
* write within the region allowed for the TRBE.
*
* At the trace collection time, we always pad the skipped bytes
* with IGNORE packets to make sure the decoder doesn't see any
* overwritten packets.
*/
if (trbe_has_erratum(TRBE_WORKAROUND_OVERWRITE_FILL_MODE, buf->cpudata)) {
if (WARN_ON(!IS_ALIGNED(buf->trbe_write, PAGE_SIZE)))
return -EINVAL;
buf->trbe_hw_base = buf->trbe_write;
buf->trbe_write += TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP;
}
return 0;
+}
static int __arm_trbe_enable(struct trbe_buf *buf, struct perf_output_handle *handle) {
int ret = 0;
buf->trbe_limit = compute_trbe_buffer_limit(handle); buf->trbe_write = buf->trbe_base + PERF_IDX2OFF(handle->head, buf); if (buf->trbe_limit == buf->trbe_base) {
trbe_stop_and_truncate_event(handle);
return -ENOSPC;
ret = -ENOSPC;
goto err; } /* Set the base of the TRBE to the buffer base */ buf->trbe_hw_base = buf->trbe_base;
ret = trbe_apply_work_around_before_enable(buf);
if (ret)
goto err;
*this_cpu_ptr(buf->cpudata->drvdata->handle) = handle; trbe_enable_hw(buf); return 0;
+err:
trbe_stop_and_truncate_event(handle);
return ret;
}
static int arm_trbe_enable(struct coresight_device *csdev, u32 mode, void *data) @@ -1003,7 +1080,23 @@ static void arm_trbe_probe_cpu(void *info) pr_err("Unsupported alignment on cpu %d\n", cpu); goto cpu_clear; }
cpudata->trbe_align = cpudata->trbe_hw_align;
/*
* If the TRBE is affected by erratum TRBE_WORKAROUND_OVERWRITE_FILL_MODE,
* we must always program the TBRPTR_EL1, 256bytes from a page
* boundary, with TRBBASER_EL1 set to the page, to prevent
* TRBE over-writing 256bytes at TRBBASER_EL1 on FILL event.
*
* Thus make sure we always align our write pointer to a PAGE_SIZE,
* which also guarantees that we have at least a PAGE_SIZE space in
* the buffer (TRBLIMITR is PAGE aligned) and thus we can skip
* the required bytes at the base.
*/
if (trbe_has_erratum(TRBE_WORKAROUND_OVERWRITE_FILL_MODE, cpudata))
cpudata->trbe_align = PAGE_SIZE;
else
cpudata->trbe_align = cpudata->trbe_hw_align;
cpudata->trbe_flag = get_trbe_flag_update(trbidr); cpudata->cpu = cpu; cpudata->drvdata = drvdata;
-- 2.24.1
CoreSight mailing list CoreSight@lists.linaro.org https://lists.linaro.org/mailman/listinfo/coresight
Hi Linu
On 06/08/2021 17:09, Linu Cherian wrote:
Hi Suzuki,
On Wed, Jul 28, 2021 at 7:23 PM Suzuki K Poulose suzuki.poulose@arm.com wrote:
ARM Neoverse-N2 (#2139208) and Cortex-A710(##2119858) suffers from an erratum, which when triggered, might cause the TRBE to overwrite the trace data already collected in FILL mode, in the event of a WRAP. i.e, the TRBE doesn't stop writing the data, instead wraps to the base and could write upto 3 cache line size worth trace. Thus, this could corrupt the trace at the "BASE" pointer.
The workaround is to program the write pointer 256bytes from the base, such that if the erratum is triggered, it doesn't overwrite the trace data that was captured. This skipped region could be padded with ignore packets at the end of the session, so that the decoder sees a continuous buffer with some padding at the beginning.
Just trying to understand, Is there a possibility that lost data results in partial trace packets towards the end of the buffer ? Or its always guaranteed that trace packet end is always aligned with buffer end/limit ? Thinking of a case when formatting is disabled.
Yes, trace could be partial towards the end with TRBE in the FILL mode. The TRBE doesn't add any formatting and is thus raw ETE trace which doesn't have any alignment requirements. This the case even without the work around.
Suzuki
Now that we have the work around implmented in the TRBE driver, add the Kconfig entries and document the errata.
Cc: Mark Rutland mark.rutland@arm.com Cc: Will Deacon will@kernel.org Cc: Catalin Marinas catalin.marinas@arm.com Cc: Anshuman Khandual anshuman.khandual@arm.com Cc: Mathieu Poirier mathieu.poirier@linaro.org Cc: Mike Leach mike.leach@linaro.org Cc: Leo Yan leo.yan@linaro.org Signed-off-by: Suzuki K Poulose suzuki.poulose@arm.com --- Documentation/arm64/silicon-errata.rst | 4 +++ arch/arm64/Kconfig | 39 ++++++++++++++++++++++++++ 2 files changed, 43 insertions(+)
diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index d410a47ffa57..2f99229d993c 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -92,12 +92,16 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A77 | #1508412 | ARM64_ERRATUM_1508412 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1349291 | N/A | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1542419 | ARM64_ERRATUM_1542419 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-N2 | #2139208 | ARM64_ERRATUM_2139208 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | MMU-500 | #841119,826419 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b5b13a932561..ad301045dafc 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -669,6 +669,45 @@ config ARM64_ERRATUM_1508412
If unsure, say Y.
+config ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE + bool + +config ARM64_ERRATUM_2119858 + bool "Cortex-A710: 2119858: workaround TRBE overwriting trace data in FILL mode" + default y + depends on CORESIGHT_TRBE + select ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE + help + This option adds the workaround for ARM Cortex-A710 erratum 2119858. + + Affected Cortex-A710 cores could overwrite upto 3 cache lines of trace + data at the base of the buffer (ponited by TRBASER_EL1) in FILL mode in + the event of a WRAP event. + + Work around the issue by always making sure we move the TRBPTR_EL1 by + 256bytes before enabling the buffer and filling the first 256bytes of + the buffer with ETM ignore packets upon disabling. + + If unsure, say Y. + +config ARM64_ERRATUM_2139208 + bool "Neoverse-N2: 2139208: workaround TRBE overwriting trace data in FILL mode" + default y + depends on CORESIGHT_TRBE + select ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE + help + This option adds the workaround for ARM Neoverse-N2 erratum 2139208. + + Affected Neoverse-N2 cores could overwrite upto 3 cache lines of trace + data at the base of the buffer (ponited by TRBASER_EL1) in FILL mode in + the event of a WRAP event. + + Work around the issue by always making sure we move the TRBPTR_EL1 by + 256bytes before enabling the buffer and filling the first 256bytes of + the buffer with ETM ignore packets upon disabling. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y
Arm Neoverse-N2 (#2067961) and Cortex-A710 (#2054223) suffers from errata, where a TSB (trace synchronization barrier) fails to flush the trace data completely, when executed from a trace prohibited region. In Linux we always execute it after we have moved the PE to trace prohibited region. So, we can apply the workaround everytime a TSB is executed.
The work around is to issue two TSB consecutively.
NOTE: This errata is defined as LOCAL_CPU_ERRATUM, implying that a late CPU could be blocked from booting if it is the first CPU that requires the workaround. This is because we do not allow setting a cpu_hwcaps after the SMP boot. The other alternative is to use "this_cpu_has_cap()" instead of the faster system wide check, which may be a bit of an overhead, given we may have to do this in nvhe KVM host before a guest entry.
Cc: Will Deacon will@kernel.org Cc: Catalin Marinas catalin.marinas@arm.com Cc: Mathieu Poirier mathieu.poirier@linaro.org Cc: Mike Leach mike.leach@linaro.org Cc: Mark Rutland mark.rutland@arm.com Cc: Anshuman Khandual anshuman.khandual@arm.com Cc: Marc Zyngier maz@kernel.org Signed-off-by: Suzuki K Poulose suzuki.poulose@arm.com --- Documentation/arm64/silicon-errata.rst | 4 ++++ arch/arm64/Kconfig | 31 ++++++++++++++++++++++++++ arch/arm64/include/asm/barrier.h | 17 +++++++++++++- arch/arm64/kernel/cpu_errata.c | 19 ++++++++++++++++ arch/arm64/tools/cpucaps | 1 + 5 files changed, 71 insertions(+), 1 deletion(-)
diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 2f99229d993c..569a92411dcd 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -94,6 +94,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1349291 | N/A | @@ -102,6 +104,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N2 | #2139208 | ARM64_ERRATUM_2139208 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-N2 | #2067961 | ARM64_ERRATUM_2067961 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | MMU-500 | #841119,826419 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index ad301045dafc..49416ea655c0 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -708,6 +708,37 @@ config ARM64_ERRATUM_2139208
If unsure, say Y.
+config ARM64_WORKAROUND_TSB_FLUSH_FAILURE + bool + +config ARM64_ERRATUM_2054223 + bool "Cortex-A710: 2054223: workaround TSB instruction failing to flush trace" + default y + help + Enable workaround for ARM Cortex-A710 erratum 2054223 + + Affected cores may fail to flush the trace data on a TSB instruction, when + the PE is in trace prohibited state. This will cause loosing a few bytes + of the trace cached. + + Workaround is to issue two TSB consecutively on affected cores. + + If unsure, say Y. + +config ARM64_ERRATUM_2067961 + bool "Neoverse-N2: 2067961: workaround TSB instruction failing to flush trace" + default y + help + Enable workaround for ARM Neoverse-N2 erratum 2067961 + + Affected cores may fail to flush the trace data on a TSB instruction, when + the PE is in trace prohibited state. This will cause loosing a few bytes + of the trace cached. + + Workaround is to issue two TSB consecutively on affected cores. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 451e11e5fd23..3bc1ed436e04 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -23,7 +23,7 @@ #define dsb(opt) asm volatile("dsb " #opt : : : "memory")
#define psb_csync() asm volatile("hint #17" : : : "memory") -#define tsb_csync() asm volatile("hint #18" : : : "memory") +#define __tsb_csync() asm volatile("hint #18" : : : "memory") #define csdb() asm volatile("hint #20" : : : "memory")
#ifdef CONFIG_ARM64_PSEUDO_NMI @@ -46,6 +46,21 @@ #define dma_rmb() dmb(oshld) #define dma_wmb() dmb(oshst)
+ +#define tsb_csync() \ + do { \ + /* \ + * CPUs affected by Arm Erratum 2054223 or 2067961 needs \ + * another TSB to ensure the trace is flushed. \ + */ \ + if (cpus_have_const_cap(ARM64_WORKAROUND_TSB_FLUSH_FAILURE)) { \ + __tsb_csync(); \ + __tsb_csync(); \ + } else { \ + __tsb_csync(); \ + } \ + } while (0) + /* * Generate a mask for array_index__nospec() that is ~0UL when 0 <= idx < sz * and 0 otherwise. diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index ccd757373f36..bdbeac75ead6 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -352,6 +352,18 @@ static const struct midr_range trbe_overwrite_fill_mode_cpus[] = { }; #endif /* CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE */
+#ifdef CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE +static const struct midr_range tsb_flush_fail_cpus[] = { +#ifdef CONFIG_ARM64_ERRATUM_2067961 + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), +#endif +#ifdef CONFIG_ARM64_ERRATUM_2054223 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), +#endif + {}, +}; +#endif /* CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE */ + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -558,6 +570,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, CAP_MIDR_RANGE_LIST(trbe_overwrite_fill_mode_cpus), }, +#endif +#ifdef CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILRE + { + .desc = "ARM erratum 2067961 or 2054223", + .capability = ARM64_WORKAROUND_TSB_FLUSH_FAILURE, + ERRATA_MIDR_RANGE_LIST(tsb_flush_fail_cpus), + }, #endif { } diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 1ccb92165bd8..2102e15af43d 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -54,6 +54,7 @@ WORKAROUND_1463225 WORKAROUND_1508412 WORKAROUND_1542419 WORKAROUND_TRBE_OVERWRITE_FILL_MODE +WORKAROUND_TSB_FLUSH_FAILURE WORKAROUND_CAVIUM_23154 WORKAROUND_CAVIUM_27456 WORKAROUND_CAVIUM_30115