Stephane Eranian found a bug in that IBS' current Fetch counter was not being reset when the driver would write the new value to clear it along with the enable bit set, and found that adding an MSR write that would first disable IBS Fetch would make IBS Fetch reset its current count.
Indeed, the PPR for AMD Family 17h Model 31h B0 55803 Rev 0.54 - Sep 12, 2019 states "The periodic fetch counter is set to IbsFetchCnt [...] when IbsFetchEn is changed from 0 to 1."
Explicitly set IbsFetchEn to 0 and then to 1 when re-enabling IBS Fetch, so the driver properly resets the internal counter to 0 and IBS Fetch starts counting again.
A family 15h machine tested does not have this problem, and the extra wrmsr is also not needed on Family 19h, so only do the extra wrmsr on families 16h through 18h.
Reported-by: Stephane Eranian stephane.eranian@google.com Signed-off-by: Kim Phillips kim.phillips@amd.com Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 Cc: Stephane Eranian eranian@google.com Cc: Peter Zijlstra peterz@infradead.org Cc: Ingo Molnar mingo@kernel.org Cc: Ingo Molnar mingo@redhat.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Borislav Petkov bp@alien8.de Cc: Alexander Shishkin alexander.shishkin@linux.intel.com Cc: Arnaldo Carvalho de Melo acme@kernel.org Cc: "H. Peter Anvin" hpa@zytor.com Cc: Jiri Olsa jolsa@redhat.com Cc: Mark Rutland mark.rutland@arm.com Cc: Michael Petlan mpetlan@redhat.com Cc: Namhyung Kim namhyung@kernel.org Cc: LKML linux-kernel@vger.kernel.org Cc: x86 x86@kernel.org Cc: stable@vger.kernel.org --- v2: constrained the extra wrmsr to Families 16h through 18h, inclusive.
arch/x86/events/amd/ibs.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 26c36357c4c9..3eb9a55e998c 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -363,7 +363,14 @@ perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event, static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, struct hw_perf_event *hwc, u64 config) { - wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask); + u64 _config = (hwc->config | config) & ~perf_ibs->enable_mask; + + /* On Fam17h, the periodic fetch counter is set when IbsFetchEn is changed from 0 to 1 */ + if (perf_ibs == &perf_ibs_fetch && boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18) + wrmsrl(hwc->config_base, _config); + + _config |= perf_ibs->enable_mask; + wrmsrl(hwc->config_base, _config); }
/*
On Tue, Sep 08, 2020 at 04:47:36PM -0500, Kim Phillips wrote:
Stephane Eranian found a bug in that IBS' current Fetch counter was not being reset when the driver would write the new value to clear it along with the enable bit set, and found that adding an MSR write that would first disable IBS Fetch would make IBS Fetch reset its current count.
Indeed, the PPR for AMD Family 17h Model 31h B0 55803 Rev 0.54 - Sep 12, 2019 states "The periodic fetch counter is set to IbsFetchCnt [...] when IbsFetchEn is changed from 0 to 1."
Explicitly set IbsFetchEn to 0 and then to 1 when re-enabling IBS Fetch, so the driver properly resets the internal counter to 0 and IBS Fetch starts counting again.
A family 15h machine tested does not have this problem, and the extra wrmsr is also not needed on Family 19h, so only do the extra wrmsr on families 16h through 18h.
*groan*...
arch/x86/events/amd/ibs.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 26c36357c4c9..3eb9a55e998c 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -363,7 +363,14 @@ perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event, static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, struct hw_perf_event *hwc, u64 config) {
- wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
- u64 _config = (hwc->config | config) & ~perf_ibs->enable_mask;
- /* On Fam17h, the periodic fetch counter is set when IbsFetchEn is changed from 0 to 1 */
- if (perf_ibs == &perf_ibs_fetch && boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18)
wrmsrl(hwc->config_base, _config);
That's an anti-patttern (and for some reason this is the second time in a week I've seen it). This is a fairly hot path and you're adding a whole bunch of loads and branches.
Granted, in case you need the extra wrmsr that's probably noise, but supposedly you're going to be fixing this in hardware eventually, and you'll be getting rid of the second wrmsr again. But then you're stuck with the loads and branches.
A better option would be to use hwc->flags, you're loading from that line already, so it's guaranteed hot and then you only have a single branch. Or stick it in perf_ibs near enable_mask, same difference.
- _config |= perf_ibs->enable_mask;
- wrmsrl(hwc->config_base, _config);
} /* -- 2.27.0
On Thu, Sep 10, 2020 at 10:32:23AM +0200, peterz@infradead.org wrote:
@@ -363,7 +363,14 @@ perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event, static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, struct hw_perf_event *hwc, u64 config) {
- wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
- u64 _config = (hwc->config | config) & ~perf_ibs->enable_mask;
- /* On Fam17h, the periodic fetch counter is set when IbsFetchEn is changed from 0 to 1 */
- if (perf_ibs == &perf_ibs_fetch && boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18)
wrmsrl(hwc->config_base, _config);
A better option would be to use hwc->flags, you're loading from that line already, so it's guaranteed hot and then you only have a single branch. Or stick it in perf_ibs near enable_mask, same difference.
I fixed it for you.
---
struct perf_ibs { struct pmu pmu; /* 0 296 */ /* --- cacheline 4 boundary (256 bytes) was 40 bytes ago --- */ unsigned int msr; /* 296 4 */
/* XXX 4 bytes hole, try to pack */
u64 config_mask; /* 304 8 */ u64 cnt_mask; /* 312 8 */ /* --- cacheline 5 boundary (320 bytes) --- */ u64 enable_mask; /* 320 8 */ u64 valid_mask; /* 328 8 */ u64 max_period; /* 336 8 */ long unsigned int offset_mask[1]; /* 344 8 */ int offset_max; /* 352 4 */ unsigned int fetch_count_reset_broken:1; /* 356: 0 4 */
/* XXX 31 bits hole, try to pack */
struct cpu_perf_ibs * pcpu; /* 360 8 */ struct attribute * * format_attrs; /* 368 8 */ struct attribute_group format_group; /* 376 40 */ /* --- cacheline 6 boundary (384 bytes) was 32 bytes ago --- */ const struct attribute_group * attr_groups[2]; /* 416 16 */ u64 (*get_count)(u64); /* 432 8 */
/* size: 440, cachelines: 7, members: 15 */ /* sum members: 432, holes: 1, sum holes: 4 */ /* sum bitfield members: 1 bits, bit holes: 1, sum bit holes: 31 bits */ /* last cacheline: 56 bytes */ };
--- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -89,6 +89,7 @@ struct perf_ibs { u64 max_period; unsigned long offset_mask[1]; int offset_max; + unsigned int fetch_count_reset_broken : 1; struct cpu_perf_ibs __percpu *pcpu;
struct attribute **format_attrs; @@ -370,7 +371,13 @@ perf_ibs_event_update(struct perf_ibs *p static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, struct hw_perf_event *hwc, u64 config) { - wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask); + u64 _config = (hwc->config | config) & ~perf_ibs->enable_mask; + + if (perf_ibs->fetch_count_reset_broken) + wrmsrl(hwc->config_base, _config); + + _config |= perf_ibs->enable_mask; + wrmsrl(hwc->config_base, _config); }
/* @@ -756,6 +763,13 @@ static __init void perf_event_ibs_init(v { struct attribute **attr = ibs_op_format_attrs;
+ /* + * Some chips fail to reset the fetch count when it is written; instead + * they need a 0-1 transition of IbsFetchEn. + */ + if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18) + perf_ibs_fetch.fetch_count_reset_broken = 1; + perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
if (ibs_caps & IBS_CAPS_OPCNT) {
On 9/10/20 3:50 AM, peterz@infradead.org wrote:
On Thu, Sep 10, 2020 at 10:32:23AM +0200, peterz@infradead.org wrote:
@@ -363,7 +363,14 @@ perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event, static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, struct hw_perf_event *hwc, u64 config) {
- wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
- u64 _config = (hwc->config | config) & ~perf_ibs->enable_mask;
- /* On Fam17h, the periodic fetch counter is set when IbsFetchEn is changed from 0 to 1 */
- if (perf_ibs == &perf_ibs_fetch && boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18)
wrmsrl(hwc->config_base, _config);
A better option would be to use hwc->flags, you're loading from that line already, so it's guaranteed hot and then you only have a single branch. Or stick it in perf_ibs near enable_mask, same difference.
I fixed it for you.
@@ -370,7 +371,13 @@ perf_ibs_event_update(struct perf_ibs *p static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, struct hw_perf_event *hwc, u64 config) {
- wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
- u64 _config = (hwc->config | config) & ~perf_ibs->enable_mask;
- if (perf_ibs->fetch_count_reset_broken)
Nice, we don't even need the perf_ibs == &perf_ibs_fetch check here because fetch_count_reset_broken is guaranteed to be 0 in perf_ibs_op.
Thanks!
Kim
linux-stable-mirror@lists.linaro.org