On 21/12/2024 11:54 am, Marc Zyngier wrote:
> On Fri, 20 Dec 2024 17:32:17 +0000,
> James Clark <james.clark(a)linaro.org> wrote:
>>
>>
>>
>> On 20/12/2024 5:05 pm, Marc Zyngier wrote:
>>> On Wed, 27 Nov 2024 10:01:23 +0000,
>>> James Clark <james.clark(a)linaro.org> wrote:
>>>>
>>>> Currently in nVHE, KVM has to check if TRBE is enabled on every guest
>>>> switch even if it was never used. Because it's a debug feature and is
>>>> more likely to not be used than used, give KVM the TRBE buffer status to
>>>> allow a much simpler and faster do-nothing path in the hyp.
>>>>
>>>> This is always called with preemption disabled except for probe/hotplug
>>>> which gets wrapped with preempt_disable().
>>>>
>>>> Protected mode disables trace regardless of TRBE (because
>>>> guest_trfcr_el1 is always 0), which was not previously done. HAS_TRBE
>>>> becomes redundant, but HAS_TRF is now required for this.
>>>>
>>>> Signed-off-by: James Clark <james.clark(a)linaro.org>
>>>> ---
>>>> arch/arm64/include/asm/kvm_host.h | 10 +++-
>>>> arch/arm64/kvm/debug.c | 25 ++++++++--
>>>> arch/arm64/kvm/hyp/nvhe/debug-sr.c | 51 +++++++++++---------
>>>> drivers/hwtracing/coresight/coresight-trbe.c | 5 ++
>>>> 4 files changed, 65 insertions(+), 26 deletions(-)
>>>>
>>>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
>>>> index 7e3478386351..ba251caa593b 100644
>>>> --- a/arch/arm64/include/asm/kvm_host.h
>>>> +++ b/arch/arm64/include/asm/kvm_host.h
>>>> @@ -611,7 +611,8 @@ struct cpu_sve_state {
>>>> */
>>>> struct kvm_host_data {
>>>> #define KVM_HOST_DATA_FLAG_HAS_SPE 0
>>>> -#define KVM_HOST_DATA_FLAG_HAS_TRBE 1
>>>> +#define KVM_HOST_DATA_FLAG_HAS_TRF 1
>>>> +#define KVM_HOST_DATA_FLAG_TRBE_ENABLED 2
>>>> unsigned long flags;
>>>> struct kvm_cpu_context host_ctxt;
>>>> @@ -657,6 +658,9 @@ struct kvm_host_data {
>>>> u64 mdcr_el2;
>>>> } host_debug_state;
>>>> + /* Guest trace filter value */
>>>> + u64 guest_trfcr_el1;
>>>
>>> Guest value? Or host state while running the guest? If the former,
>>> then this has nothing to do here. If the latter, this should be
>>> spelled out (trfcr_in_guest?), and the comment amended.
>>>
>>>> +
>>>> /* Number of programmable event counters (PMCR_EL0.N) for this CPU */
>>>> unsigned int nr_event_counters;
>>>> };
>>>> @@ -1381,6 +1385,8 @@ static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
>>>> void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr);
>>>> void kvm_clr_pmu_events(u64 clr);
>>>> bool kvm_set_pmuserenr(u64 val);
>>>> +void kvm_enable_trbe(void);
>>>> +void kvm_disable_trbe(void);
>>>> #else
>>>> static inline void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr) {}
>>>> static inline void kvm_clr_pmu_events(u64 clr) {}
>>>> @@ -1388,6 +1394,8 @@ static inline bool kvm_set_pmuserenr(u64 val)
>>>> {
>>>> return false;
>>>> }
>>>> +static inline void kvm_enable_trbe(void) {}
>>>> +static inline void kvm_disable_trbe(void) {}
>>>> #endif
>>>> void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu);
>>>> diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
>>>> index dd9e139dfd13..0c340ae7b5d1 100644
>>>> --- a/arch/arm64/kvm/debug.c
>>>> +++ b/arch/arm64/kvm/debug.c
>>>> @@ -314,7 +314,26 @@ void kvm_init_host_debug_data(void)
>>>> !(read_sysreg_s(SYS_PMBIDR_EL1) & PMBIDR_EL1_P))
>>>> host_data_set_flag(HAS_SPE);
>>>> - if (cpuid_feature_extract_unsigned_field(dfr0,
>>>> ID_AA64DFR0_EL1_TraceBuffer_SHIFT) &&
>>>> - !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_EL1_P))
>>>> - host_data_set_flag(HAS_TRBE);
>>>> + if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceFilt_SHIFT))
>>>> + host_data_set_flag(HAS_TRF);
>>>> }
>>>> +
>>>> +void kvm_enable_trbe(void)
>>>> +{
>>>> + if (has_vhe() || is_protected_kvm_enabled() ||
>>>> + WARN_ON_ONCE(preemptible()))
>>>> + return;
>>>> +
>>>> + host_data_set_flag(TRBE_ENABLED);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(kvm_enable_trbe);
>>>> +
>>>> +void kvm_disable_trbe(void)
>>>> +{
>>>> + if (has_vhe() || is_protected_kvm_enabled() ||
>>>> + WARN_ON_ONCE(preemptible()))
>>>> + return;
>>>> +
>>>> + host_data_clear_flag(TRBE_ENABLED);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(kvm_disable_trbe);
>>>> diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
>>>> index 858bb38e273f..9479bee41801 100644
>>>> --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
>>>> +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
>>>> @@ -51,32 +51,39 @@ static void __debug_restore_spe(u64 pmscr_el1)
>>>> write_sysreg_el1(pmscr_el1, SYS_PMSCR);
>>>> }
>>>> -static void __debug_save_trace(u64 *trfcr_el1)
>>>> +static void __trace_do_switch(u64 *saved_trfcr, u64 new_trfcr)
>>>> {
>>>> - *trfcr_el1 = 0;
>>>> + *saved_trfcr = read_sysreg_el1(SYS_TRFCR);
>>>> + write_sysreg_el1(new_trfcr, SYS_TRFCR);
>>>> - /* Check if the TRBE is enabled */
>>>> - if (!(read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_EL1_E))
>>>> + /* No need to drain if going to an enabled state or from disabled state */
>>>> + if (new_trfcr || !*saved_trfcr)
>>>
>>> What if TRFCR_EL1.TS is set to something non-zero? I'd rather you
>>> check for the E*TRE bits instead of assuming things.
>>>
>>
>> Yeah it's probably better that way. TS is actually always set when any
>> tracing session starts and then never cleared, so doing it the simpler
>> way made it always flush even after tracing finished, which probably
>> wasn't great.
>
> Quite. Can you please *test* these things?
>
> [...]
>
Sorry to confuse things I wasn't 100% accurate here, yes it's tested and
working. It works because of the split set/clear_trfcr() API. The
Coresight driver specifically calls clear at the end of the session
rather than a set of 0. That signals this function not to be called so
there's no excessive swapping.
Secondly, the buffer flushing case is triggered by TRBE_ENABLED, which
forces TRFCR to 0, so "if (new_trfcr)" is an OK way to gate the flush.
>>>> @@ -253,8 +256,10 @@ static void trbe_drain_and_disable_local(struct trbe_cpudata *cpudata)
>>>> static void trbe_reset_local(struct trbe_cpudata *cpudata)
>>>> {
>>>> + preempt_disable();
>>>> trbe_drain_and_disable_local(cpudata);
>>>> write_sysreg_s(0, SYS_TRBLIMITR_EL1);
>>>> + preempt_enable();
>>>
>>> This looks terribly wrong. If you need to disable preemption here, why
>>> doesn't the critical section cover all register accesses? Surely you
>>> don't want to nuke another CPU's context?
>>>
>>> But looking at the calling sites, this makes even less sense. The two
>>> callers of this thing mess with *per-CPU* interrupts. Dealing with
>>> per-CPU interrupts in preemptible context is a big no-no (hint: they
>>> start with a call to smp_processor_id()).
>>>
>>> So what is this supposed to ensure?
>>>
>>> M.
>>>
>>
>> These ones are only intended to silence the
>> WARN_ON_ONCE(preemptible()) in kvm_enable_trbe() when this is called
>> from boot/hotplug (arm_trbe_enable_cpu()). Preemption isn't disabled,
>> but a guest can't run at that point either.
>>
>> The "real" calls to kvm_enable_trbe() _are_ called from an atomic
>> context. I think there was a previous review comment about when it was
>> safe to call the KVM parts of this change, which is why I added the
>> warning making sure it was always called with preemption disabled. But
>> actually I could remove the warning and these preempt_disables() and
>> replace them with a comment.
>
> You should keep the WARN_ON(), and either *never* end-up calling this
> stuff during a CPUHP event, or handle the fact that preemption isn't
> initialised yet. For example by checking whether the current CPU is
> online.
>
> But this sort of random spreading of preemption disabling is not an
> acceptable outcome.
>
> M.
>
I'll look into this again. This was my initial attempt but couldn't find
any easily accessible state that allowed to to be done this way. Maybe I
missed something, but the obvious cpu_online() etc were already true at
this point.
Thanks
James
On 21/12/2024 12:34 pm, Marc Zyngier wrote:
> On Wed, 27 Nov 2024 10:01:24 +0000,
> James Clark <james.clark(a)linaro.org> wrote:
>>
>> For nVHE, switch the filter value in and out if the Coresight driver
>> asks for it. This will support filters for guests when sinks other than
>> TRBE are used.
>>
>> For VHE, just write the filter directly to TRFCR_EL1 where trace can be
>> used even with TRBE sinks.
>>
>> Signed-off-by: James Clark <james.clark(a)linaro.org>
>> ---
>> arch/arm64/include/asm/kvm_host.h | 5 +++++
>> arch/arm64/kvm/debug.c | 28 ++++++++++++++++++++++++++++
>> arch/arm64/kvm/hyp/nvhe/debug-sr.c | 1 +
>> 3 files changed, 34 insertions(+)
>>
>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
>> index ba251caa593b..cce07887551b 100644
>> --- a/arch/arm64/include/asm/kvm_host.h
>> +++ b/arch/arm64/include/asm/kvm_host.h
>> @@ -613,6 +613,7 @@ struct kvm_host_data {
>> #define KVM_HOST_DATA_FLAG_HAS_SPE 0
>> #define KVM_HOST_DATA_FLAG_HAS_TRF 1
>> #define KVM_HOST_DATA_FLAG_TRBE_ENABLED 2
>> +#define KVM_HOST_DATA_FLAG_GUEST_FILTER 3
>
> Guest filter what? This is meaningless.
>
KVM_HOST_DATA_FLAG_SWAP_TRFCR maybe?
>> unsigned long flags;
>>
>> struct kvm_cpu_context host_ctxt;
>> @@ -1387,6 +1388,8 @@ void kvm_clr_pmu_events(u64 clr);
>> bool kvm_set_pmuserenr(u64 val);
>> void kvm_enable_trbe(void);
>> void kvm_disable_trbe(void);
>> +void kvm_set_trfcr(u64 guest_trfcr);
>> +void kvm_clear_trfcr(void);
>> #else
>> static inline void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr) {}
>> static inline void kvm_clr_pmu_events(u64 clr) {}
>> @@ -1396,6 +1399,8 @@ static inline bool kvm_set_pmuserenr(u64 val)
>> }
>> static inline void kvm_enable_trbe(void) {}
>> static inline void kvm_disable_trbe(void) {}
>> +static inline void kvm_set_trfcr(u64 guest_trfcr) {}
>> +static inline void kvm_clear_trfcr(void) {}
>> #endif
>>
>> void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu);
>> diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
>> index 0c340ae7b5d1..9266f2776991 100644
>> --- a/arch/arm64/kvm/debug.c
>> +++ b/arch/arm64/kvm/debug.c
>> @@ -337,3 +337,31 @@ void kvm_disable_trbe(void)
>> host_data_clear_flag(TRBE_ENABLED);
>> }
>> EXPORT_SYMBOL_GPL(kvm_disable_trbe);
>> +
>> +void kvm_set_trfcr(u64 guest_trfcr)
>
> Again. Is this the guest's view? or the host view while running the
> guest? I asked the question on the previous patch, and you didn't
> reply.
>
Ah sorry missed that one:
> Guest value? Or host state while running the guest? If the former,
> then this has nothing to do here. If the latter, this should be
> spelled out (trfcr_in_guest?), and the comment amended.
Yes, the latter, guest TRFCR reads are undef anyway. I can rename this
and the host_data variable to be trfcr_in_guest.
>> +{
>> + if (is_protected_kvm_enabled() || WARN_ON_ONCE(preemptible()))
>> + return;
>> +
>> + if (has_vhe())
>> + write_sysreg_s(guest_trfcr, SYS_TRFCR_EL12);
>> + else {
>> + *host_data_ptr(guest_trfcr_el1) = guest_trfcr;
>> + host_data_set_flag(GUEST_FILTER);
>> + }
>
> Oh come on. This is basic coding style, see section 3 in
> Documentation/process/coding-style.rst.
>
Oops, I'd have thought checkpatch could catch something like that. Will fix.
>> +}
>> +EXPORT_SYMBOL_GPL(kvm_set_trfcr);
>> +
>> +void kvm_clear_trfcr(void)
>> +{
>> + if (is_protected_kvm_enabled() || WARN_ON_ONCE(preemptible()))
>> + return;
>> +
>> + if (has_vhe())
>> + write_sysreg_s(0, SYS_TRFCR_EL12);
>> + else {
>> + *host_data_ptr(guest_trfcr_el1) = 0;
>> + host_data_clear_flag(GUEST_FILTER);
>> + }
>> +}
>> +EXPORT_SYMBOL_GPL(kvm_clear_trfcr);
>
> Why do we have two helpers? Clearly, calling kvm_set_trfcr() with
> E{1,0}TRE=={0,0} should result in *disabling* things. Except it
> doesn't, and you should fix it. Once that is fixed, it becomes
obvious> that kvm_clear_trfcr() serves no purpose.
>
With only one kvm_set_trfcr() there's no way to distinguish swapping in
a 0 value or stopping swapping altogether. I thought we wanted a single
flag that gated the register accesses so the hyp mostly does nothing?
With only kvm_set_trfcr() you first need to check FEAT_TRF then you need
to compare the real register with trfcr_in_guest to know whether to swap
or not every time.
Actually I think some of the previous versions had something like this
but it was a bit more complicated.
Maybe set/clear_trfcr() aren't great names. Perhaps
kvm_set_trfcr_in_guest() and kvm_disable_trfcr_in_guest()? With the
second one hinting that it stops the swapping regardless of what the
values are.
I don't think calling kvm_set_trfcr() with E{1,0}TRE=={0,0} is actually
broken in this version, it means that the Coresight driver wants that
value to be installed for guests. So it should actually _enable_
swapping in the value of 0, not disable anything.
> To sum it up, KVM's API should reflect the architecture instead of
> making things up.
>
We had kvm_set_trfcr(u64 host_trfcr, u64 guest_trfcr) on the last
version, which also serves the same purpose I mentioned above because
you can check if they're the same or not and disable swapping. I don't
know if that counts as reflecting the architecture better. But Oliver
mentioned he preferred it more "intent" based which is why I added the
clear_trfcr().
>> diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
>> index 9479bee41801..7edee7ace433 100644
>> --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
>> +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
>> @@ -67,6 +67,7 @@ static void __trace_do_switch(u64 *saved_trfcr, u64 new_trfcr)
>> static bool __trace_needs_switch(void)
>> {
>> return host_data_test_flag(TRBE_ENABLED) ||
>> + host_data_test_flag(GUEST_FILTER) ||
>> (is_protected_kvm_enabled() && host_data_test_flag(HAS_TRF));
>
> Wouldn't it make more sense to just force the "GUEST_FILTER" flag in
> the pKVM case, and drop the 3rd term altogether?
>
> M.
>
Yep we can set GUEST_FILTER once at startup and it gets dropped along
with HAS_TRF. That's a lot simpler.
Thanks
James
On 20/12/2024 5:05 pm, Marc Zyngier wrote:
> On Wed, 27 Nov 2024 10:01:23 +0000,
> James Clark <james.clark(a)linaro.org> wrote:
>>
>> Currently in nVHE, KVM has to check if TRBE is enabled on every guest
>> switch even if it was never used. Because it's a debug feature and is
>> more likely to not be used than used, give KVM the TRBE buffer status to
>> allow a much simpler and faster do-nothing path in the hyp.
>>
>> This is always called with preemption disabled except for probe/hotplug
>> which gets wrapped with preempt_disable().
>>
>> Protected mode disables trace regardless of TRBE (because
>> guest_trfcr_el1 is always 0), which was not previously done. HAS_TRBE
>> becomes redundant, but HAS_TRF is now required for this.
>>
>> Signed-off-by: James Clark <james.clark(a)linaro.org>
>> ---
>> arch/arm64/include/asm/kvm_host.h | 10 +++-
>> arch/arm64/kvm/debug.c | 25 ++++++++--
>> arch/arm64/kvm/hyp/nvhe/debug-sr.c | 51 +++++++++++---------
>> drivers/hwtracing/coresight/coresight-trbe.c | 5 ++
>> 4 files changed, 65 insertions(+), 26 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
>> index 7e3478386351..ba251caa593b 100644
>> --- a/arch/arm64/include/asm/kvm_host.h
>> +++ b/arch/arm64/include/asm/kvm_host.h
>> @@ -611,7 +611,8 @@ struct cpu_sve_state {
>> */
>> struct kvm_host_data {
>> #define KVM_HOST_DATA_FLAG_HAS_SPE 0
>> -#define KVM_HOST_DATA_FLAG_HAS_TRBE 1
>> +#define KVM_HOST_DATA_FLAG_HAS_TRF 1
>> +#define KVM_HOST_DATA_FLAG_TRBE_ENABLED 2
>> unsigned long flags;
>>
>> struct kvm_cpu_context host_ctxt;
>> @@ -657,6 +658,9 @@ struct kvm_host_data {
>> u64 mdcr_el2;
>> } host_debug_state;
>>
>> + /* Guest trace filter value */
>> + u64 guest_trfcr_el1;
>
> Guest value? Or host state while running the guest? If the former,
> then this has nothing to do here. If the latter, this should be
> spelled out (trfcr_in_guest?), and the comment amended.
>
>> +
>> /* Number of programmable event counters (PMCR_EL0.N) for this CPU */
>> unsigned int nr_event_counters;
>> };
>> @@ -1381,6 +1385,8 @@ static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
>> void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr);
>> void kvm_clr_pmu_events(u64 clr);
>> bool kvm_set_pmuserenr(u64 val);
>> +void kvm_enable_trbe(void);
>> +void kvm_disable_trbe(void);
>> #else
>> static inline void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr) {}
>> static inline void kvm_clr_pmu_events(u64 clr) {}
>> @@ -1388,6 +1394,8 @@ static inline bool kvm_set_pmuserenr(u64 val)
>> {
>> return false;
>> }
>> +static inline void kvm_enable_trbe(void) {}
>> +static inline void kvm_disable_trbe(void) {}
>> #endif
>>
>> void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu);
>> diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
>> index dd9e139dfd13..0c340ae7b5d1 100644
>> --- a/arch/arm64/kvm/debug.c
>> +++ b/arch/arm64/kvm/debug.c
>> @@ -314,7 +314,26 @@ void kvm_init_host_debug_data(void)
>> !(read_sysreg_s(SYS_PMBIDR_EL1) & PMBIDR_EL1_P))
>> host_data_set_flag(HAS_SPE);
>>
>> - if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceBuffer_SHIFT) &&
>> - !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_EL1_P))
>> - host_data_set_flag(HAS_TRBE);
>> + if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceFilt_SHIFT))
>> + host_data_set_flag(HAS_TRF);
>> }
>> +
>> +void kvm_enable_trbe(void)
>> +{
>> + if (has_vhe() || is_protected_kvm_enabled() ||
>> + WARN_ON_ONCE(preemptible()))
>> + return;
>> +
>> + host_data_set_flag(TRBE_ENABLED);
>> +}
>> +EXPORT_SYMBOL_GPL(kvm_enable_trbe);
>> +
>> +void kvm_disable_trbe(void)
>> +{
>> + if (has_vhe() || is_protected_kvm_enabled() ||
>> + WARN_ON_ONCE(preemptible()))
>> + return;
>> +
>> + host_data_clear_flag(TRBE_ENABLED);
>> +}
>> +EXPORT_SYMBOL_GPL(kvm_disable_trbe);
>> diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
>> index 858bb38e273f..9479bee41801 100644
>> --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
>> +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
>> @@ -51,32 +51,39 @@ static void __debug_restore_spe(u64 pmscr_el1)
>> write_sysreg_el1(pmscr_el1, SYS_PMSCR);
>> }
>>
>> -static void __debug_save_trace(u64 *trfcr_el1)
>> +static void __trace_do_switch(u64 *saved_trfcr, u64 new_trfcr)
>> {
>> - *trfcr_el1 = 0;
>> + *saved_trfcr = read_sysreg_el1(SYS_TRFCR);
>> + write_sysreg_el1(new_trfcr, SYS_TRFCR);
>>
>> - /* Check if the TRBE is enabled */
>> - if (!(read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_EL1_E))
>> + /* No need to drain if going to an enabled state or from disabled state */
>> + if (new_trfcr || !*saved_trfcr)
>
> What if TRFCR_EL1.TS is set to something non-zero? I'd rather you
> check for the E*TRE bits instead of assuming things.
>
Yeah it's probably better that way. TS is actually always set when any
tracing session starts and then never cleared, so doing it the simpler
way made it always flush even after tracing finished, which probably
wasn't great.
>> return;
>> - /*
>> - * Prohibit trace generation while we are in guest.
>> - * Since access to TRFCR_EL1 is trapped, the guest can't
>> - * modify the filtering set by the host.
>> - */
>> - *trfcr_el1 = read_sysreg_el1(SYS_TRFCR);
>> - write_sysreg_el1(0, SYS_TRFCR);
>> +
>> isb();
>> - /* Drain the trace buffer to memory */
>> tsb_csync();
>> }
>>
>> -static void __debug_restore_trace(u64 trfcr_el1)
>> +static bool __trace_needs_switch(void)
>> {
>> - if (!trfcr_el1)
>> - return;
>> + return host_data_test_flag(TRBE_ENABLED) ||
>> + (is_protected_kvm_enabled() && host_data_test_flag(HAS_TRF));
>> +}
>>
>> - /* Restore trace filter controls */
>> - write_sysreg_el1(trfcr_el1, SYS_TRFCR);
>> +static void __trace_switch_to_guest(void)
>> +{
>> + /* Unsupported with TRBE so disable */
>> + if (host_data_test_flag(TRBE_ENABLED))
>> + *host_data_ptr(guest_trfcr_el1) = 0;
>> +
>> + __trace_do_switch(host_data_ptr(host_debug_state.trfcr_el1),
>> + *host_data_ptr(guest_trfcr_el1));
>> +}
>> +
>> +static void __trace_switch_to_host(void)
>> +{
>> + __trace_do_switch(host_data_ptr(guest_trfcr_el1),
>> + *host_data_ptr(host_debug_state.trfcr_el1));
>> }
>>
>> void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
>> @@ -84,9 +91,9 @@ void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
>> /* Disable and flush SPE data generation */
>> if (host_data_test_flag(HAS_SPE))
>> __debug_save_spe(host_data_ptr(host_debug_state.pmscr_el1));
>> - /* Disable and flush Self-Hosted Trace generation */
>> - if (host_data_test_flag(HAS_TRBE))
>> - __debug_save_trace(host_data_ptr(host_debug_state.trfcr_el1));
>> +
>> + if (__trace_needs_switch())
>> + __trace_switch_to_guest();
>> }
>>
>> void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
>> @@ -98,8 +105,8 @@ void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
>> {
>> if (host_data_test_flag(HAS_SPE))
>> __debug_restore_spe(*host_data_ptr(host_debug_state.pmscr_el1));
>> - if (host_data_test_flag(HAS_TRBE))
>> - __debug_restore_trace(*host_data_ptr(host_debug_state.trfcr_el1));
>> + if (__trace_needs_switch())
>> + __trace_switch_to_host();
>> }
>>
>> void __debug_switch_to_host(struct kvm_vcpu *vcpu)
>> diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c
>> index 96a32b213669..9c0f8c43e6fe 100644
>> --- a/drivers/hwtracing/coresight/coresight-trbe.c
>> +++ b/drivers/hwtracing/coresight/coresight-trbe.c
>> @@ -18,6 +18,7 @@
>> #include <asm/barrier.h>
>> #include <asm/cpufeature.h>
>> #include <linux/vmalloc.h>
>> +#include <linux/kvm_host.h>
>
> Ordering of include files.
>
>>
>> #include "coresight-self-hosted-trace.h"
>> #include "coresight-trbe.h"
>> @@ -221,6 +222,7 @@ static inline void set_trbe_enabled(struct trbe_cpudata *cpudata, u64 trblimitr)
>> */
>> trblimitr |= TRBLIMITR_EL1_E;
>> write_sysreg_s(trblimitr, SYS_TRBLIMITR_EL1);
>> + kvm_enable_trbe();
>>
>> /* Synchronize the TRBE enable event */
>> isb();
>> @@ -239,6 +241,7 @@ static inline void set_trbe_disabled(struct trbe_cpudata *cpudata)
>> */
>> trblimitr &= ~TRBLIMITR_EL1_E;
>> write_sysreg_s(trblimitr, SYS_TRBLIMITR_EL1);
>> + kvm_disable_trbe();
>>
>> if (trbe_needs_drain_after_disable(cpudata))
>> trbe_drain_buffer();
>> @@ -253,8 +256,10 @@ static void trbe_drain_and_disable_local(struct trbe_cpudata *cpudata)
>>
>> static void trbe_reset_local(struct trbe_cpudata *cpudata)
>> {
>> + preempt_disable();
>> trbe_drain_and_disable_local(cpudata);
>> write_sysreg_s(0, SYS_TRBLIMITR_EL1);
>> + preempt_enable();
>
> This looks terribly wrong. If you need to disable preemption here, why
> doesn't the critical section cover all register accesses? Surely you
> don't want to nuke another CPU's context?
>
> But looking at the calling sites, this makes even less sense. The two
> callers of this thing mess with *per-CPU* interrupts. Dealing with
> per-CPU interrupts in preemptible context is a big no-no (hint: they
> start with a call to smp_processor_id()).
>
> So what is this supposed to ensure?
>
> M.
>
These ones are only intended to silence the WARN_ON_ONCE(preemptible())
in kvm_enable_trbe() when this is called from boot/hotplug
(arm_trbe_enable_cpu()). Preemption isn't disabled, but a guest can't
run at that point either.
The "real" calls to kvm_enable_trbe() _are_ called from an atomic
context. I think there was a previous review comment about when it was
safe to call the KVM parts of this change, which is why I added the
warning making sure it was always called with preemption disabled. But
actually I could remove the warning and these preempt_disables() and
replace them with a comment.
Thanks
James
On 20/12/2024 11:39, Yeoreum Yun wrote:
> Hi Suzuki,
>> On 20/12/2024 10:38, Yeoreum Yun wrote:
>>> Hi Mike.
>>>
>>>> Notably missing is the same changes for the etm3x driver. The ETMv3.x
>>>> and PTM1.x are supported by this driver, and these trace source
>>>> variants are also supported in perf in the cs_etm.c code.
>>>
>>> But I wonder etmv3 needs to change. Because its spinlock is used only
>>> via sysfs enable/disable path.
>>> So, I think it doesn't need to change the lock type.
>>
>> ETM3 can be used in perf mode, similar to the ETM4x.
>>
>> So, you need to fix it as well.
>
> Yes. But etmv3's etmdata->spinlock doesn't used in perf path
> its usage is only in sysfs interface path.
> That's why I think it could skip too.
Ok, which I think is a problem, since the sysfs mode could overwrite the
"config" while perf is preparing the config from the event parsing.
And we would need it there. So, for the time being, we can accept this
series, pending other review comments and address this issue separately
Suzuki
>
> Thanks.
On 20/12/2024 10:38, Yeoreum Yun wrote:
> Hi Mike.
>
>> Notably missing is the same changes for the etm3x driver. The ETMv3.x
>> and PTM1.x are supported by this driver, and these trace source
>> variants are also supported in perf in the cs_etm.c code.
>
> But I wonder etmv3 needs to change. Because its spinlock is used only
> via sysfs enable/disable path.
> So, I think it doesn't need to change the lock type.
ETM3 can be used in perf mode, similar to the ETM4x.
So, you need to fix it as well.
>
>> STM is also missing, though this is not directly enabled via perf -
>> but could perhaps run concurrently as it can be a target output for
>> ftrace.
>
> Actually, I couldn't find out the path where
> the STM's lock could be grabbed under other raw_spin_lock (including csdev)
> If you don't mind would you let me the code path please?
STM can't be used in perf mode, and as such you may skip it.
Suzuki
>
> Thanks
>> --
>> Mike Leach
>> Principal Engineer, ARM Ltd.
>> Manchester Design Centre. UK
With current design, the name of the non-cpu bounded coresight
component is the device type with the number. And with 'ls' command
we can get the register address of the component. But from these
information, we can't know what the HW or system the component belongs
to. Add label in DT and show the hw information by reading label sysfs
node.
cti_sys0 -> ../../../devices/platform/soc(a)0/138f0000.cti/cti_sys0
cti_sys1 -> ../../../devices/platform/soc(a)0/13900000.cti/cti_sys1
tpdm0 -> ../../../devices/platform/soc(a)0/10b0d000.tpdm/tpdm0
tpdm1 -> ../../../devices/platform/soc(a)0/10c28000.tpdm/tpdm1
tpdm2 -> ../../../devices/platform/soc(a)0/10c29000.tpdm/tpdm2
/sys/bus/coresight/devices # cat cti*/label
cti_dlct_0
cti_dlct_1
cti_apss_0
cti_apss_1
cti_apss_2
Change since V5:
1. Update the kernel version of ABI files.
2. Add link of different patch versions.
V5 link: https://patchwork.kernel.org/project/linux-arm-msm/cover/20241210122253.319…
Change since V4:
1. Add label in DT and add label sysfs node for each coresight device.
V4 link: https://patchwork.kernel.org/project/linux-arm-msm/cover/20240703122340.268…
Change since V3:
1. Change device-name to arm,cs-dev-name.
2. Add arm,cs-dev-name to only CTI and sources' dt-binding.
V3 link: https://patchwork.kernel.org/project/linux-arm-msm/cover/20240131082628.628…
Change since V2:
1. Fix the error in coresight core.
drivers/hwtracing/coresight/coresight-core.c:1775:7: error: assigning to 'char *' from 'const char *' discards qualifiers
2. Fix the warning when run dtbinding check.
Documentation/devicetree/bindings/arm/arm,coresight-cpu-debug.yaml: device-name: missing type definition
V2 link: https://patchwork.kernel.org/project/linux-arm-msm/cover/20240115164252.265…
Change since V1:
1. Change coresight-name to device name.
2. Add the device-name in coresight dt bindings.
V1 link: https://patchwork.kernel.org/project/linux-arm-kernel/patch/20230208110716.…
Mao Jinlong (2):
dt-bindings: arm: Add label in the coresight components
coresight: Add label sysfs node support
.../testing/sysfs-bus-coresight-devices-cti | 6 ++++
.../sysfs-bus-coresight-devices-funnel | 6 ++++
.../testing/sysfs-bus-coresight-devices-tpdm | 6 ++++
.../bindings/arm/arm,coresight-cti.yaml | 6 ++++
.../arm/arm,coresight-dummy-sink.yaml | 6 ++++
.../arm/arm,coresight-dummy-source.yaml | 6 ++++
.../arm/arm,coresight-dynamic-funnel.yaml | 6 ++++
.../arm/arm,coresight-dynamic-replicator.yaml | 6 ++++
.../arm/arm,coresight-static-funnel.yaml | 6 ++++
.../arm/arm,coresight-static-replicator.yaml | 6 ++++
.../bindings/arm/arm,coresight-tmc.yaml | 6 ++++
.../bindings/arm/qcom,coresight-tpda.yaml | 6 ++++
.../bindings/arm/qcom,coresight-tpdm.yaml | 6 ++++
drivers/hwtracing/coresight/coresight-sysfs.c | 32 +++++++++++++++++++
14 files changed, 110 insertions(+)
--
2.17.1
Hi,
On Mon, 16 Dec 2024 at 11:50, Yeoreum Yun <yeoreum.yun(a)arm.com> wrote:
>
> In some coresight drivers, drvdata->spinlock can be held during __schedule()
> by perf_event_task_sched_out()/in().
>
> Since drvdata->spinlock type is spinlock_t and
> perf_event_task_sched_out()/in() is called after acquiring rq_lock,
> which is raw_spinlock_t (an unsleepable lock),
> this poses an issue in PREEMPT_RT kernel where spinlock_t is sleepable.
>
> To address this,change type drvdata->spinlock in some coresight drivers,
> which can be called by perf_event_task_sched_out()/in(),
> from spinlock_t to raw_spinlock_t.
>
> Reviewed-by: James Clark <james.clark(a)linaro.org>
>
> v2 to v3:
> - Fix build error
>
> v1 to v2:
> - seperate patchsets to change locktype and apply gurad API.
>
> Yeoreum Yun (9):
> coresight: change coresight_device lock type to raw_spinlock_t
> coresight-etm4x: change etmv4_drvdata spinlock type to raw_spinlock_t
> coresight: change coresight_trace_id_map's lock type to
> raw_spinlock_t
> coresight-cti: change cti_drvdata spinlock's type to raw_spinlock_t
> coresight-etb10: change etb_drvdata spinlock's type to raw_spinlock_t
> coresight-funnel: change funnel_drvdata spinlock's type to
> raw_spinlock_t
> coresight-replicator: change replicator_drvdata spinlock's type to
> raw_spinlock_t
> coresight-tmc: change tmc_drvdata spinlock's type to raw_spinlock_t
> coresight/ultrasoc: change cti_drvdata spinlock's type to
> raw_spinlock_t
>
> .../hwtracing/coresight/coresight-config.c | 8 +-
> .../hwtracing/coresight/coresight-config.h | 2 +-
> drivers/hwtracing/coresight/coresight-core.c | 2 +-
> .../hwtracing/coresight/coresight-cti-core.c | 44 +--
> .../hwtracing/coresight/coresight-cti-sysfs.c | 76 +++---
> drivers/hwtracing/coresight/coresight-cti.h | 2 +-
> drivers/hwtracing/coresight/coresight-etb10.c | 26 +-
> .../coresight/coresight-etm4x-core.c | 18 +-
> .../coresight/coresight-etm4x-sysfs.c | 250 +++++++++---------
> drivers/hwtracing/coresight/coresight-etm4x.h | 2 +-
> .../hwtracing/coresight/coresight-funnel.c | 12 +-
> .../coresight/coresight-replicator.c | 12 +-
> .../hwtracing/coresight/coresight-syscfg.c | 26 +-
> .../hwtracing/coresight/coresight-tmc-core.c | 6 +-
> .../hwtracing/coresight/coresight-tmc-etf.c | 48 ++--
> .../hwtracing/coresight/coresight-tmc-etr.c | 40 +--
> drivers/hwtracing/coresight/coresight-tmc.h | 2 +-
> .../hwtracing/coresight/coresight-trace-id.c | 22 +-
> drivers/hwtracing/coresight/ultrasoc-smb.c | 12 +-
> drivers/hwtracing/coresight/ultrasoc-smb.h | 2 +-
> include/linux/coresight.h | 4 +-
> 21 files changed, 308 insertions(+), 308 deletions(-)
>
> --
> LEVI:{C3F47F37-75D8-414A-A8BA-3980EC8A46D7}
>
Notably missing is the same changes for the etm3x driver. The ETMv3.x
and PTM1.x are supported by this driver, and these trace source
variants are also supported in perf in the cs_etm.c code.
STM is also missing, though this is not directly enabled via perf -
but could perhaps run concurrently as it can be a target output for
ftrace.
Regards
Mike
--
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK