On 24/02/2025 3:32 am, Jie Gan wrote:
>
>
> On 2/21/2025 7:39 PM, Suzuki K Poulose wrote:
>> On 17/02/2025 09:30, Jie Gan wrote:
>>> The Coresight TMC Control Unit hosts miscellaneous configuration
>>> registers
>>> which control various features related to TMC ETR sink.
>>>
>>> Based on the trace ID, which is programmed in the related CTCU ATID
>>> register of a specific ETR, trace data with that trace ID gets into
>>> the ETR buffer, while other trace data gets dropped.
>>>
>>> Enabling source device sets one bit of the ATID register based on
>>> source device's trace ID.
>>> Disabling source device resets the bit according to the source
>>> device's trace ID.
>>>
>>> Reviewed-by: James Clark <james.clark(a)linaro.org>
>>> Signed-off-by: Jie Gan <quic_jiegan(a)quicinc.com>
>>> ---
>>> drivers/hwtracing/coresight/Kconfig | 12 +
>>> drivers/hwtracing/coresight/Makefile | 1 +
>>> drivers/hwtracing/coresight/coresight-ctcu.c | 268 +++++++++++++++++++
>>> drivers/hwtracing/coresight/coresight-ctcu.h | 24 ++
>>> include/linux/coresight.h | 3 +-
>>> 5 files changed, 307 insertions(+), 1 deletion(-)
>>> create mode 100644 drivers/hwtracing/coresight/coresight-ctcu.c
>>> create mode 100644 drivers/hwtracing/coresight/coresight-ctcu.h
>>>
>>> diff --git a/drivers/hwtracing/coresight/Kconfig b/drivers/hwtracing/
>>> coresight/Kconfig
>>> index 06f0a7594169..ecd7086a5b83 100644
>>> --- a/drivers/hwtracing/coresight/Kconfig
>>> +++ b/drivers/hwtracing/coresight/Kconfig
>>> @@ -133,6 +133,18 @@ config CORESIGHT_STM
>>> To compile this driver as a module, choose M here: the
>>> module will be called coresight-stm.
>>> +config CORESIGHT_CTCU
>>> + tristate "CoreSight TMC Control Unit driver"
>>> + depends on CORESIGHT_LINK_AND_SINK_TMC
>>> + help
>>> + This driver provides support for CoreSight TMC Control Unit
>>> + that hosts miscellaneous configuration registers. This is
>>> + primarily used for controlling the behaviors of the TMC
>>> + ETR device.
>>> +
>>> + To compile this driver as a module, choose M here: the
>>> + module will be called coresight-ctcu.
>>> +
>>> config CORESIGHT_CPU_DEBUG
>>> tristate "CoreSight CPU Debug driver"
>>> depends on ARM || ARM64
>>> diff --git a/drivers/hwtracing/coresight/Makefile b/drivers/
>>> hwtracing/ coresight/Makefile
>>> index 4ba478211b31..1b7869910a12 100644
>>> --- a/drivers/hwtracing/coresight/Makefile
>>> +++ b/drivers/hwtracing/coresight/Makefile
>>> @@ -51,3 +51,4 @@ coresight-cti-y := coresight-cti-core.o coresight-
>>> cti-platform.o \
>>> coresight-cti-sysfs.o
>>> obj-$(CONFIG_ULTRASOC_SMB) += ultrasoc-smb.o
>>> obj-$(CONFIG_CORESIGHT_DUMMY) += coresight-dummy.o
>>> +obj-$(CONFIG_CORESIGHT_CTCU) += coresight-ctcu.o
>>> diff --git a/drivers/hwtracing/coresight/coresight-ctcu.c b/drivers/
>>> hwtracing/coresight/coresight-ctcu.c
>>> new file mode 100644
>>> index 000000000000..e1460a627c4d
>>> --- /dev/null
>>> +++ b/drivers/hwtracing/coresight/coresight-ctcu.c
>>> @@ -0,0 +1,268 @@
>>> +// SPDX-License-Identifier: GPL-2.0-only
>>> +/*
>>> + * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All
>>> rights reserved.
>>> + */
>>> +
>>> +#include <linux/clk.h>
>>> +#include <linux/coresight.h>
>>> +#include <linux/device.h>
>>> +#include <linux/err.h>
>>> +#include <linux/kernel.h>
>>> +#include <linux/init.h>
>>> +#include <linux/io.h>
>>> +#include <linux/module.h>
>>> +#include <linux/mutex.h>
>>> +#include <linux/of.h>
>>> +#include <linux/platform_device.h>
>>> +#include <linux/slab.h>
>>> +
>>> +#include "coresight-ctcu.h"
>>> +#include "coresight-priv.h"
>>> +
>>> +DEFINE_CORESIGHT_DEVLIST(ctcu_devs, "ctcu");
>>> +
>>> +#define ctcu_writel(drvdata, val, offset) __raw_writel((val),
>>> drvdata->base + offset)
>>> +#define ctcu_readl(drvdata, offset) __raw_readl(drvdata->base
>>> + offset)
>>> +
>>> +/*
>>> + * The TMC Coresight Control Unit uses four ATID registers to
>>> control the data
>>> + * filter function based on the trace ID for each TMC ETR sink. The
>>> length of
>>> + * each ATID register is 32 bits. Therefore, the ETR has a related
>>> field in
>>> + * CTCU that is 128 bits long. Each trace ID is represented by one
>>> bit in that
>>> + * filed.
>>> + * e.g. ETR0ATID0 layout, set bit 5 for traceid 5
>>> + * bit5
>>> + * ------------------------------------------------------
>>> + * | |28| |24| |20| |16| |12| |8| 1|4| |0|
>>> + * ------------------------------------------------------
>>> + *
>>> + * e.g. ETR0:
>>> + * 127 0 from ATID_offset for ETR0ATID0
>>> + * -------------------------
>>> + * |ATID3|ATID2|ATID1|ATID0|
>>> + */
>>> +#define CTCU_ATID_REG_OFFSET(traceid, atid_offset) \
>>> + ((traceid / 32) * 4 + atid_offset)
>>> +
>>> +#define CTCU_ATID_REG_BIT(traceid) (traceid % 32)
>>> +#define CTCU_ATID_REG_SIZE 0x10
>>> +
>>> +struct ctcu_atid_config {
>>> + const u32 atid_offset;
>>> + const u32 port_num;
>>> +};
>>> +
>>> +struct ctcu_config {
>>> + const struct ctcu_atid_config *atid_config;
>>> + int num_atid_config;
>>> +};
>>> +
>>> +static const struct ctcu_atid_config sa8775p_atid_cfgs[] = {
>>> + {0xf8, 0},
>>> + {0x108, 1},
>>> +};
>>> +
>>> +static const struct ctcu_config sa8775p_cfgs = {
>>> + .atid_config = sa8775p_atid_cfgs,
>>> + .num_atid_config = ARRAY_SIZE(sa8775p_atid_cfgs),
>>> +};
>>> +
>>> +static void ctcu_program_atid_register(struct ctcu_drvdata *drvdata,
>>> u32 reg_offset,
>>> + u8 bit, bool enable)
>>> +{
>>> + u32 val;
>>> +
>>> + CS_UNLOCK(drvdata->base);
>>> + val = ctcu_readl(drvdata, reg_offset);
>>> + val = enable? (val | BIT(bit)) : (val & ~BIT(bit));
>>
>> minor nit: If possible do not use the ternary operator like this. It
>> is much better readable as:
>>
>> if (enable)
>> val |= BIT(bit);
>> else
>> val &= ~BIT(bit);
>>
>
> Will do this way.
>
>>> + ctcu_writel(drvdata, val, reg_offset);
>>> + CS_LOCK(drvdata->base);
>>> +}
>>> +
>>> +/*
>>> + * __ctcu_set_etr_traceid: Set bit in the ATID register based on
>>> trace ID when enable is true.
>>> + * Reset the bit of the ATID register based on trace ID when enable
>>> is false.
>>> + *
>>> + * @csdev: coresight_device struct related to the device
>>> + * @traceid: trace ID of the source tracer.
>>> + * @port_num: port number from TMC ETR sink.
>>> + * @enable: True for set bit and false for reset bit.
>>> + *
>>> + * Returns 0 indicates success. Non-zero result means failure.
>>> + */
>>> +static int __ctcu_set_etr_traceid(struct coresight_device *csdev, u8
>>> traceid, int port_num,
>>> + bool enable)
>>> +{
>>> + struct ctcu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
>>> + u32 atid_offset, reg_offset;
>>> + u8 refcnt, bit;
>>> +
>>> + atid_offset = drvdata->atid_offset[port_num];
>>> + if (atid_offset == 0)
>>> + return -EINVAL;
>>> +
>>> + bit = CTCU_ATID_REG_BIT(traceid);
>>> + reg_offset = CTCU_ATID_REG_OFFSET(traceid, atid_offset);
>>> + if (reg_offset - atid_offset > CTCU_ATID_REG_SIZE)
>>> + return -EINVAL;
>>> +
>>> + guard(raw_spinlock_irqsave)(&drvdata->spin_lock);
>>> + refcnt = drvdata->traceid_refcnt[port_num][traceid];
>>> + /* Only program the atid register when the refcnt value is 0 or
>>> 1 */
>>
>> A normal trace source won't be enabled more than once (e.g., ETM). The
>> only odd one out is the STM, which may be driven by multiple agents.
>> So this refcounting looks necessary.
>>
>
> Besides, for the TPDMs which shared the trace_id of the TPDA also need
> the refcnt. Consider we have TPDM1 and TPDM2 connected to the same TPDA
> device. Once we disable one of the TPDM without checking the refcnt, the
> filter function will be disabled for another TPDM.
>
>>> + if (enable && (++refcnt == 1))
>>> + ctcu_program_atid_register(drvdata, reg_offset, bit, enable);
>>> + else if (!enable && (--refcnt == 0))
>>> + ctcu_program_atid_register(drvdata, reg_offset, bit, enable);
>>
>> minor nit:
>>
>> if ((enable && !refcount++) ||
>> (!enable && --refcount))
>> ctcu_program_atid_register(drvdata, reg_offset, bit, enable);
>>
>>
>
> I did (enable && (++refcnt == 1)) just because I think we only need
> program the register when refcnt is equal to 1. We dont need reprogram
> the register with same value when refcnt greater than 1. So I think it's
> better for the performance?
>
>> Also, see my comment the bottom for "refcount" being u8 .
>
> Sure, will check.
>
>>
>>
>>> +
>>> + drvdata->traceid_refcnt[port_num][traceid] = refcnt;
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static int ctcu_get_active_port(struct coresight_device *sink,
>>> struct coresight_device *helper)
>>> +{
>>> + int i;
>>> +
>>> + for (i = 0; i < sink->pdata->nr_outconns; ++i) {
>>> + if (sink->pdata->out_conns[i]->dest_dev)
>>> + return sink->pdata->out_conns[i]->dest_port;
>>
>> Don't we need to make sure it matches the helper ? What if there are
>> multiple helpers ? e.g, a CATU connected to the ETR and CTCU ?
>> Or even try the opposite ? i.e. search the helper and find the port
>> matching the sink ?
Good catch, looks like this should be done the opposite way around.
>>
>> struct coresight_platform_data *pdata = helper->pdata;
>>
>> for (i = 0; i < pdata->nr_inconns; ++i)
>> if (pdata->in_conns[i]->dest_dev == sink)
>> return pdata->in_conns[i]->src_port;
>>
>> Not sure if that works with the helper device connection, James ?
Yeah connections are always made in both directions.
> Can we check the subtype of the helper device? We should only have one
> CTCU helper device for each ETR.
>
> enum coresight_dev_subtype_helper subtype;
>
> for (i = 0; i < sink->pdata->nr_outconns; ++i) {
> subtype = sink->pdata->out_conns[i]->dest_dev->subtype.helper_subtype;
> if (subtype == CORESIGHT_DEV_SUBTYPE_HELPER_CTCU)
> return sink->pdata->out_conns[i]->dest_port;
>
I don't think we need to check the type, just search all the CTCU's
in_conns until you find the sink.
As Suzuki says, by looking at the out_conns of the sink you might find a
different helper device. Checking that it really is connected to the
sink is probably more robust that relying on the type anyway.
This patch series is rebased on coresight-next-v6.13.rc2
* Patches 1 & 2 adds support for allocation of trace buffer pages from
reserved RAM
* Patches 3 & 4 adds support for saving metadata at the time of kernel panic
* Patch 5 adds support for reading trace data captured at the time of panic
* Patches 6 & 7 adds support for disabling coresight blocks at the time of
panic
* Patch 8: Gives the full description about this feature as part of
documentation
v13 is posted here,
https://lore.kernel.org/linux-arm-kernel/20241216053014.3427909-1-lcherian@…
Changelog from v13:
* Changed the log levels of crc error check failure prints from dev_dbg to
dev_err as suggested by Suzuki
* Add metadata valid flag checks for successfully opening crashdata files,
as suggested by Suzuki
* Report to the user during probe if valid crash tracedata is found,
as suggested by Suzuki
* Added CRC recalculation upon barrier packet insertion for overflow
cases, this fixes crc check failures upon subsequent boots
* Few other trivial cleanups suggested by Suzuki
Changelog from v12:
* Fixed wrong buffer pointer passed to coresigh_insert_barrier_packet
* tmc_read_prepare/unprepare_crashdata need to be called only once and
hence removed from read path and added to tmc_probe
* tmc_read_prepare_crashdata renamed to tmc_prepare_crashdata and
avoid taking locks as its moved to probe function.
* Introduced read status flag, "reading" specific to reserved buffer to keep the
reserved buffer reading independent of the regular buffer.
* open/release ops for reserved buffer has to take care only about the
set/unset the "reading" status flag as the reserved buffer is prepared
during the probe time itself.
* Few other trivial changes
Changelog from v11:
Convert all commands to literal code blocks, that was missed out in v11.
No other code changes.
Changelog from v10:
* Converted all csdev_access_* to readl functions in tmc_panic_sync_*
* Added "tmc" prefix for register snapshots in struct tmc_crash_metadata
* Converted dev_info to dev_dbg in panic handlers
* Converted dsb to dmb in panic handlers
* Fixed marking metadata as invalid when a user is trying to use the
reserved buffer. Earlier this was wrongly set at the time of reading
reserved trace buffer.
* Moved common validation checks to is_tmc_crashdata_valid and minor
code rearrangements for efficiency
* Got rid of sink specific prepare/unprepare invocations
* Got rid of full from struct tmc_resrv_buf
* While reading crashdata, size is now calculated from metadata instead
of relying on reserved buffer size populated by dtb
* Minor documenation fixes
Changelog from v9:
* Add common helper function of_tmc_get_reserved_resource_by_name
for better code reuse
* Reserved buffer validity and crashdata validity has been separated to
avoid interdependence
* New fields added to crash metadata: version, ffcr, ffsr, mode
* Version checks added for metadata validation
* Special file /dev/crash_tmc_xxx would be available only when
crash metadata is valid
* Removed READ_CRASHDATA mode meant for special casing crashdata reads.
Instead, dedicated read function added for crashdata reads from reserved
buffer which is common for both ETR and ETF sinks as well.
* Documentation added to Documentation/tracing/coresight/panic.rst
Changelog from v8:
* Added missing exit path on error in __tmc_probe.
* Few whitespace fixes, checkpatch fixes.
* With perf sessions honouring stop_on_flush sysfs attribute,
removed redundant variable stop_on_flush_en.
Changelog from v7:
* Fixed breakage on perf test -vvvv "arm coresight".
No issues seen with and without "resrv" buffer mode
* Moved the crashdev registration into a separate function.
* Removed redundant variable in tmc_etr_setup_crashdata_buf
* Avoided a redundant memcpy in tmc_panic_sync_etf.
* Tested kernel panic with trace session started uisng perf.
Please see the title "Perf based testing" below for details.
For this, stop_on_flush sysfs attribute is taken into
consideration while starting perf sessions as well.
Changelog from v6:
* Added special device files for reading crashdata, so that
read_prevboot mode flag is removed.
* Added new sysfs TMC device attribute, stop_on_flush.
Stop on flush trigger event is disabled by default.
User need to explicitly enable this from sysfs for panic stop
to work.
* Address parameter for panicstop ETM configuration is
chosen as kernel "panic" address by default.
* Added missing tmc_wait_for_tmcready during panic handling
* Few other misc code rearrangements.
Changelog from v5:
* Fixed issues reported by CONFIG_DEBUG_ATOMIC_SLEEP
* Fixed a memory leak while reading data from /dev/tmc_etrx in
READ_PREVBOOT mode
* Tested reading trace data from crashdump kernel
Changelog from v4:
* Device tree binding
- Description is made more explicit on the usage of reserved memory
region
- Mismatch in memory region names in dts binding and driver fixed
- Removed "mem" suffix from the memory region names
* Rename "struct tmc_register_snapshot" -> "struct tmc_crash_metadata",
since it contains more than register snapshot.
Related variables are named accordingly.
* Rename struct tmc_drvdata members
resrv_buf -> crash_tbuf
metadata -> crash_mdata
* Size field in metadata refers to RSZ register and hence indicates the
size in 32 bit words. ETR metadata follows this convention, the same
has been extended to ETF metadata as well.
* Added crc32 for more robust metadata and tracedata validation.
* Added/modified dev_dbg messages during metadata validation
* Fixed a typo in patch 5 commit description
Changelog from v3:
* Converted the Coresight ETM driver change to a named configuration.
RFC tag has been removed with this change.
* Fixed yaml issues reported by "make dt_binding_check"
* Added names for reserved memory regions 0 and 1
* Added prevalidation checks for metadata processing
* Fixed a regression introduced in RFC v3
- TMC Status register was getting saved wrongly
* Reverted memremap attribute changes from _WB to _WC to match
with the dma map attributes
* Introduced reserved buffer mode specific .sync op.
This fixes a possible crash when reserved buffer mode was used in
normal trace capture, due to unwanted dma maintenance operations.
Linu Cherian (8):
dt-bindings: arm: coresight-tmc: Add "memory-region" property
coresight: tmc-etr: Add support to use reserved trace memory
coresight: core: Add provision for panic callbacks
coresight: tmc: Enable panic sync handling
coresight: tmc: Add support for reading crash data
coresight: tmc: Stop trace capture on FlIn
coresight: config: Add preloaded configuration
Documentation: coresight: Panic support
.../bindings/arm/arm,coresight-tmc.yaml | 26 ++
Documentation/trace/coresight/panic.rst | 362 ++++++++++++++++++
drivers/hwtracing/coresight/Makefile | 2 +-
.../coresight/coresight-cfg-preload.c | 2 +
.../coresight/coresight-cfg-preload.h | 2 +
.../hwtracing/coresight/coresight-cfg-pstop.c | 83 ++++
drivers/hwtracing/coresight/coresight-core.c | 42 ++
.../hwtracing/coresight/coresight-tmc-core.c | 321 +++++++++++++++-
.../hwtracing/coresight/coresight-tmc-etf.c | 92 ++++-
.../hwtracing/coresight/coresight-tmc-etr.c | 184 ++++++++-
drivers/hwtracing/coresight/coresight-tmc.h | 105 +++++
include/linux/coresight.h | 12 +
12 files changed, 1221 insertions(+), 12 deletions(-)
create mode 100644 Documentation/trace/coresight/panic.rst
create mode 100644 drivers/hwtracing/coresight/coresight-cfg-pstop.c
--
2.34.1
On Thu, 9 Jan 2025 21:53:48 +0000, Ilkka Koskinen wrote:
> Trying to record a trace on kernel with 64k pages resulted in -ENOMEM.
> This happens due to a bug in calculating the number of table pages, which
> returns zero. Fix the issue by rounding up.
>
> $ perf record --kcore -e cs_etm/@tmc_etr55,cycacc,branch_broadcast/k --per-thread taskset --cpu-list 1 dd if=/dev/zero of=/dev/null
> failed to mmap with 12 (Cannot allocate memory)
>
> [...]
Applied, thanks!
[1/1] coresight: catu: Fix number of pages while using 64k pages
https://git.kernel.org/coresight/c/0e14e062f5ff
Best regards,
--
Suzuki K Poulose <suzuki.poulose(a)arm.com>
Hi Suzuki,
thanks for the reply! The CPUs of the boards I am using are all based on Arm-v8(.2), but I found the components' addresses in the manuals of the SoCs.
I managed to modify the Devicetree by writing my own .dtsi file (see attachment) and finally got the CoreSight devices in /sys/devices/.
However, dmesg shows the following:
[ 0.000000] Booting Linux on physical CPU 0x0
[ 0.000000] Linux version 4.9.253-coresight (user@user-desktop) (gcc version 7.5.0 (Ubuntu/Linaro 7.5.0-3ubuntu1~18.04) ) #1 SMP PREEMPT Wed Jan 1 18:45:04 CET 2025
[ 0.000000] Boot CPU: AArch64 Processor [411fd071]
(omitted 87 lines)
[ 0.212039] DTS File Name: /home/user/Downloads/Linux_for_Tegra/source/public/kernel/kernel-4.9/arch/arm64/boot/dts/../../../../../../hardware/nvidia/platform/t210/porg/kernel-dts/tegra210-p3448-0000-p3449-0000-a02.dts
[ 0.212045] DTB Build time: Jan 1 2025 16:04:45
(omitted 35 lines)
[ 0.420616] DTS File Name: /home/user/Downloads/Linux_for_Tegra/source/public/kernel/kernel-4.9/arch/arm64/boot/dts/../../../../../../hardware/nvidia/platform/t210/porg/kernel-dts/tegra210-p3448-0000-p3449-0000-a02.dts
[ 0.420622] DTB Build time: Jan 1 2025 16:04:45
(omitted 75 lines)
[ 0.524166] OF: amba_device_add() failed (-19) for /funnel_bccplex@73001000
(omitted 367 lines)
[ 1.330484] OF: graph: no port node found in /etf@72030000
[ 1.330757] OF: graph: no port node found in /etr@72050000
[ 1.330987] OF: graph: no port node found in /funnel_major@72010000
[ 1.331238] OF: graph: no port node found in /ptm0@73440000
[ 1.331451] coresight-etm4x 73440000.ptm0: CPU0: Cortex-A57 ETM v4.0 initialized
[ 1.331482] OF: graph: no port node found in /ptm1@73540000
[ 1.331689] coresight-etm4x 73540000.ptm1: CPU1: Cortex-A57 ETM v4.0 initialized
[ 1.331719] OF: graph: no port node found in /ptm2@73640000
[ 1.331938] coresight-etm4x 73640000.ptm2: CPU2: Cortex-A57 ETM v4.0 initialized
[ 1.331944] extcon-disp-state extcon:disp-state: cable 47 state 0
[ 1.331946] Extcon AUX1(HDMI) disable
[ 1.331976] OF: graph: no port node found in /ptm3@73740000
[ 1.332192] coresight-etm4x 73740000.ptm3: CPU3: Cortex-A57 ETM v4.0 initialized
[ 1.332250] OF: graph: no port node found in /replicator@72040000
[ 1.332305] coresight-replicator-qcom 72040000.replicator: REPLICATOR 1.0 initialized
[ 1.332350] OF: graph: no port node found in /stm@72070000
[ 1.332386] coresight-stm 72070000.stm: stm_register_device failed, probing deffered
(omitted 64 lines)
[ 1.411751] OF: graph: no port node found in /stm@72070000
[ 1.412025] coresight-stm 72070000.stm: STM32 initialized
(omitted 212 lines)
Do you have an idea what I did wrong? In the end, I want to be able to follow the steps described here:
https://docs.nvidia.com/jetson/archives/l4t-archived/l4t-3275/index.html#pa…
Best regards,
Vincent
(P.S. There was a problem sending this email a first time, but it should work now)