This work is based on work by Daniel Wagner. A few tracepoints are added at the end of the critical section. With the hist trigger in place, the hist trigger plots may be generated, with per-cpu breakdown of events captured. It is based on linux kernel's event infrastructure.
The following filter(s) may be used
'hist:key=latency.log2:val=hitcount:sort=latency' 'hist:key=cpu,latency:val=hitcount:sort=latency if cpu==1' 'hist:key=common_pid.execname'
This captures only the latencies introduced by disabled irqs and preemption. Additional per process data has to be captured to calculate the effective latencies introduced for individual processes.
Initial work - latency.patch
[1] https://git.kernel.org/cgit/linux/kernel/git/rt/linux-stable-rt.git/commit/?...
Signed-off-by: Binoy Jayan binoy.jayan@linaro.org --- include/trace/events/latency.h | 43 ++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace_irqsoff.c | 42 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 include/trace/events/latency.h
diff --git a/include/trace/events/latency.h b/include/trace/events/latency.h new file mode 100644 index 0000000..77896c7 --- /dev/null +++ b/include/trace/events/latency.h @@ -0,0 +1,43 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM latency + +#if !defined(_TRACE_HIST_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_HIST_H + +#include <linux/tracepoint.h> + +DECLARE_EVENT_CLASS(latency_template, + TP_PROTO(int cpu, cycles_t latency), + + TP_ARGS(cpu, latency), + + TP_STRUCT__entry( + __field(int, cpu) + __field(cycles_t, latency) + ), + + TP_fast_assign( + __entry->cpu = cpu; + __entry->latency = latency; + ), + + TP_printk("cpu=%d, latency=%lu", __entry->cpu, + (unsigned long) __entry->latency) +); + +DEFINE_EVENT(latency_template, latency_irqs, + TP_PROTO(int cpu, cycles_t latency), + TP_ARGS(cpu, latency)); + +DEFINE_EVENT(latency_template, latency_preempt, + TP_PROTO(int cpu, cycles_t latency), + TP_ARGS(cpu, latency)); + +DEFINE_EVENT(latency_template, latency_critical_timings, + TP_PROTO(int cpu, cycles_t latency), + TP_ARGS(cpu, latency)); + +#endif /* _TRACE_HIST_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 03cdff8..3fcf446 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -13,14 +13,22 @@ #include <linux/uaccess.h> #include <linux/module.h> #include <linux/ftrace.h> +#include <linux/percpu-defs.h> + +#include <trace/events/sched.h>
#include "trace.h"
+#define CREATE_TRACE_POINTS +#include <trace/events/latency.h> + static struct trace_array *irqsoff_trace __read_mostly; static int tracer_enabled __read_mostly;
static DEFINE_PER_CPU(int, tracing_cpu); - +static DEFINE_PER_CPU(cycle_t __maybe_unused, ts_irqs); +static DEFINE_PER_CPU(cycle_t __maybe_unused, ts_preempt); +static DEFINE_PER_CPU(cycle_t __maybe_unused, ts_critical_timings); static DEFINE_RAW_SPINLOCK(max_trace_lock);
enum { @@ -419,9 +427,17 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) atomic_dec(&data->disabled); }
+static inline cycle_t get_delta(cycle_t __percpu *ts) +{ + return (cycle_t) trace_clock_local() - this_cpu_read(*ts); +} /* start and stop critical timings used to for stoppage (in idle) */ void start_critical_timings(void) { + if (trace_latency_critical_timings_enabled()) + this_cpu_write(ts_critical_timings, + (cycle_t) trace_clock_local()); + if (preempt_trace() || irq_trace()) start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } @@ -431,6 +447,13 @@ void stop_critical_timings(void) { if (preempt_trace() || irq_trace()) stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); + + if (trace_latency_critical_timings_enabled()) { + trace_latency_critical_timings( + raw_smp_processor_id(), + get_delta(&ts_critical_timings)); + } + } EXPORT_SYMBOL_GPL(stop_critical_timings);
@@ -438,6 +461,10 @@ EXPORT_SYMBOL_GPL(stop_critical_timings); #ifdef CONFIG_PROVE_LOCKING void time_hardirqs_on(unsigned long a0, unsigned long a1) { + if (trace_latency_irqs_enabled()) { + trace_latency_irqs(raw_smp_processor_id(), + get_delta(&ts_irqs)); + } if (!preempt_trace() && irq_trace()) stop_critical_timing(a0, a1); } @@ -446,6 +473,10 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1) { if (!preempt_trace() && irq_trace()) start_critical_timing(a0, a1); + + if (trace_latency_irqs_enabled()) { + this_cpu_write(ts_irqs, (cycle_t) trace_clock_local()); + } }
#else /* !CONFIG_PROVE_LOCKING */ @@ -503,6 +534,11 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller); #ifdef CONFIG_PREEMPT_TRACER void trace_preempt_on(unsigned long a0, unsigned long a1) { + if (trace_latency_preempt_enabled()) { + trace_latency_preempt(raw_smp_processor_id(), + get_delta(&ts_preempt)); + } + if (preempt_trace() && !irq_trace()) stop_critical_timing(a0, a1); } @@ -511,6 +547,10 @@ void trace_preempt_off(unsigned long a0, unsigned long a1) { if (preempt_trace() && !irq_trace()) start_critical_timing(a0, a1); + + if (trace_latency_preempt_enabled()) { + this_cpu_write(ts_preempt, (cycle_t) trace_clock_local()); + } } #endif /* CONFIG_PREEMPT_TRACER */