Hi Thomas, Hi Russell: This RFC is particularly for your attention since it results directly from feedback I've received from both of you, albeit quite a few months ago now.
This patchset demonstrates using FIQ to improve the quality of the PMU trace on ARM systems. To do so it introduces generic changes that allow irqs to be routed to NMI.
This patchset applies on top of my own patchset:
arm: Implement arch_trigger_all_cpu_backtrace http://thread.gmane.org/gmane.linux.kernel/1864829
I think the important points of this set are clear without reference to the previous patchset but the patches will not run (nor apply cleanly) without the previous patchset.
Currently these patches strictly honour a request from Russell to avoid indirection (notifiers, etc) in the ARM default FIQ handler. I have therefore separated the request that an irq be routed to NMI from the installation of a handler for it.
Avoiding indirection does raise some problems though, because it means we arrive in the PMU code without a context pointer. At present I have just added a global variables into the ARM PMU code in order to hold information about irq allocations in a form I can safely read from NMI.
Daniel Thompson (5): arm: irq: Add a __nmi_count stat irq: Allow interrupts to routed to NMI (or similar) irq: gic: Add support for NMI routing arm: perf: Make v7 support FIQ-safe arm: perf: Use FIQ to handle PMU events.
arch/arm/include/asm/hardirq.h | 1 + arch/arm/include/asm/pmu.h | 4 +++ arch/arm/kernel/irq.c | 7 +++- arch/arm/kernel/perf_event.c | 2 +- arch/arm/kernel/perf_event_cpu.c | 35 +++++++++++++++++-- arch/arm/kernel/perf_event_v7.c | 11 ++---- arch/arm/kernel/traps.c | 15 +++++--- drivers/irqchip/irq-gic.c | 75 ++++++++++++++++++++++++++++++---------- include/linux/interrupt.h | 20 +++++++++++ include/linux/irq.h | 2 ++ include/linux/irqchip/arm-gic.h | 8 ++++- kernel/irq/manage.c | 29 ++++++++++++++-- 12 files changed, 169 insertions(+), 40 deletions(-)
-- 1.9.3
Extends the irq statistics for ARM, making it possible to quickly observe how many times the default FIQ handler has executed.
In /proc/interrupts we use the NMI terminology (rather than FIQ) because the statistic is only likely to be updated when we run the default FIQ handler (handle_fiq_as_nmi).
Signed-off-by: Daniel Thompson daniel.thompson@linaro.org --- arch/arm/include/asm/hardirq.h | 1 + arch/arm/kernel/irq.c | 7 ++++++- arch/arm/kernel/traps.c | 2 ++ 3 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/arch/arm/include/asm/hardirq.h b/arch/arm/include/asm/hardirq.h index 5df33e30ae1b..27ab43b5d7e2 100644 --- a/arch/arm/include/asm/hardirq.h +++ b/arch/arm/include/asm/hardirq.h @@ -9,6 +9,7 @@
typedef struct { unsigned int __softirq_pending; + unsigned int __nmi_count; #ifdef CONFIG_SMP unsigned int ipi_irqs[NR_IPI]; #endif diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index ad857bada96c..6dd1619e0700 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -48,13 +48,18 @@ unsigned long irq_err_count;
int arch_show_interrupts(struct seq_file *p, int prec) { + int i; + #ifdef CONFIG_FIQ show_fiq_list(p, prec); #endif #ifdef CONFIG_SMP show_ipi_list(p, prec); #endif - seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count); + seq_printf(p, "%*s: ", prec, "NMI"); + for_each_online_cpu(i) + seq_printf(p, "%10u ", __get_irq_stat(i, __nmi_count)); + seq_printf(p, "\n%*s: %10lu\n", prec, "Err", irq_err_count); return 0; }
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 1836415b8a5c..5645f81ac4cc 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -476,8 +476,10 @@ die_sig: */ asmlinkage void __exception_irq_entry handle_fiq_as_nmi(struct pt_regs *regs) { + unsigned int cpu = smp_processor_id(); struct pt_regs *old_regs = set_irq_regs(regs);
+ __inc_irq_stat(cpu, __nmi_count); nmi_enter();
#ifdef CONFIG_ARM_GIC
Some combinations of architectures and interrupt controllers make it possible for abitrary interrupt signals to be selectively made immune to masking by local_irq_disable(). For example, on ARM platforms, many interrupt controllers allow interrupts to be routed to FIQ rather than IRQ.
These features could be exploited to implement debug and tracing features that can be implemented using NMI on x86 platforms (perf, hard lockup, kgdb).
This patch assumes that the management of the NMI handler itself will be architecture specific (maybe notifier list like x86, hard coded like ARM, or something else entirely). The generic layer can still manage the irq as normal (affinity, enable/disable, free) but is not responsible for dispatching.
Signed-off-by: Daniel Thompson daniel.thompson@linaro.org --- include/linux/interrupt.h | 20 ++++++++++++++++++++ include/linux/irq.h | 2 ++ kernel/irq/manage.c | 29 +++++++++++++++++++++++++++-- 3 files changed, 49 insertions(+), 2 deletions(-)
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index d9b05b5bf8c7..b95dc28f4cc3 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -57,6 +57,9 @@ * IRQF_NO_THREAD - Interrupt cannot be threaded * IRQF_EARLY_RESUME - Resume IRQ early during syscore instead of at device * resume time. + * __IRQF_NMI - Route the interrupt to an NMI or some similar signal that is not + * masked by local_irq_disable(). Used internally by + * request_nmi_irq(). */ #define IRQF_DISABLED 0x00000020 #define IRQF_SHARED 0x00000080 @@ -70,6 +73,7 @@ #define IRQF_FORCE_RESUME 0x00008000 #define IRQF_NO_THREAD 0x00010000 #define IRQF_EARLY_RESUME 0x00020000 +#define __IRQF_NMI 0x00040000
#define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
@@ -139,6 +143,22 @@ extern int __must_check request_percpu_irq(unsigned int irq, irq_handler_t handler, const char *devname, void __percpu *percpu_dev_id);
+static inline int __must_check request_nmi_irq(unsigned int irq, + unsigned long flags, + const char *name, void *dev_id) +{ + /* + * no_action unconditionally returns IRQ_NONE which is exactly + * what we need. The handler might be expected to be unreachable + * but some controllers may spuriously ack the NMI from the IRQ + * handling code. When this happens it occurs very rarely, thus + * by returning IRQ_NONE we can rely on the spurious interrupt + * logic to do the right thing. + */ + return request_irq(irq, no_action, flags | IRQF_NO_THREAD | __IRQF_NMI, + name, dev_id); +} + extern void free_irq(unsigned int, void *); extern void free_percpu_irq(unsigned int, void __percpu *);
diff --git a/include/linux/irq.h b/include/linux/irq.h index d09ec7a1243e..695eb37f04ae 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -307,6 +307,7 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * @irq_eoi: end of interrupt * @irq_set_affinity: set the CPU affinity on SMP machines * @irq_retrigger: resend an IRQ to the CPU + * @irq_set_nmi_routing:set whether interrupt can act like NMI * @irq_set_type: set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ * @irq_set_wake: enable/disable power-management wake-on of an IRQ * @irq_bus_lock: function to lock access to slow bus (i2c) chips @@ -341,6 +342,7 @@ struct irq_chip {
int (*irq_set_affinity)(struct irq_data *data, const struct cpumask *dest, bool force); int (*irq_retrigger)(struct irq_data *data); + int (*irq_set_nmi_routing)(struct irq_data *data, unsigned int nmi); int (*irq_set_type)(struct irq_data *data, unsigned int flow_type); int (*irq_set_wake)(struct irq_data *data, unsigned int on);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 80692373abd6..8e669051759d 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -571,6 +571,17 @@ int can_request_irq(unsigned int irq, unsigned long irqflags) return canrequest; }
+int __irq_set_nmi_routing(struct irq_desc *desc, unsigned int irq, + unsigned int nmi) +{ + struct irq_chip *chip = desc->irq_data.chip; + + if (!chip || !chip->irq_set_nmi_routing) + return -EINVAL; + + return chip->irq_set_nmi_routing(&desc->irq_data, nmi); +} + int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, unsigned long flags) { @@ -1058,11 +1069,12 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) * the same type (level, edge, polarity). So both flag * fields must have IRQF_SHARED set and the bits which * set the trigger type must match. Also all must - * agree on ONESHOT. + * agree on ONESHOT and NMI */ if (!((old->flags & new->flags) & IRQF_SHARED) || ((old->flags ^ new->flags) & IRQF_TRIGGER_MASK) || - ((old->flags ^ new->flags) & IRQF_ONESHOT)) + ((old->flags ^ new->flags) & IRQF_ONESHOT) || + ((old->flags ^ new->flags) & __IRQF_NMI)) goto mismatch;
/* All handlers must agree on per-cpuness */ @@ -1153,6 +1165,19 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
init_waitqueue_head(&desc->wait_for_threads);
+ if (new->flags & __IRQF_NMI) { + ret = __irq_set_nmi_routing(desc, irq, true); + if (ret != 1) + goto out_mask; + } else { + ret = __irq_set_nmi_routing(desc, irq, false); + if (ret == 1) { + pr_err("Failed to disable NMI routing for irq %d\n", + irq); + goto out_mask; + } + } + /* Setup the type (level, edge polarity) if configured: */ if (new->flags & IRQF_TRIGGER_MASK) { ret = __irq_set_trigger(desc, irq,
Daniel, feel free to ignore my comments. I know less about this than anybody :) I am especially unfamiliar with NMI, but I will risk exposing my ignorance
On Tuesday, January 13, 2015 04:35:28 PM Daniel Thompson wrote:
Some combinations of architectures and interrupt controllers make it possible for abitrary interrupt signals to be selectively made immune to masking by local_irq_disable(). For example, on ARM platforms, many interrupt controllers allow interrupts to be routed to FIQ rather than IRQ.
These features could be exploited to implement debug and tracing features that can be implemented using NMI on x86 platforms (perf, hard lockup, kgdb).
This patch assumes that the management of the NMI handler itself will be architecture specific (maybe notifier list like x86, hard coded like ARM, or something else entirely). The generic layer can still manage the irq as normal (affinity, enable/disable, free) but is not responsible for dispatching.
Signed-off-by: Daniel Thompson daniel.thompson@linaro.org
include/linux/interrupt.h | 20 ++++++++++++++++++++ include/linux/irq.h | 2 ++ kernel/irq/manage.c | 29 +++++++++++++++++++++++++++-- 3 files changed, 49 insertions(+), 2 deletions(-)
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index d9b05b5bf8c7..b95dc28f4cc3 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -57,6 +57,9 @@
- IRQF_NO_THREAD - Interrupt cannot be threaded
- IRQF_EARLY_RESUME - Resume IRQ early during syscore instead of at device
resume time.
- __IRQF_NMI - Route the interrupt to an NMI or some similar signal that
is not + * masked by local_irq_disable(). Used internally by +
*/request_nmi_irq().
#define IRQF_DISABLED 0x00000020 #define IRQF_SHARED 0x00000080 @@ -70,6 +73,7 @@ #define IRQF_FORCE_RESUME 0x00008000 #define IRQF_NO_THREAD 0x00010000 #define IRQF_EARLY_RESUME 0x00020000 +#define __IRQF_NMI 0x00040000
#define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
@@ -139,6 +143,22 @@ extern int __must_check request_percpu_irq(unsigned int irq, irq_handler_t handler, const char *devname, void __percpu *percpu_dev_id);
+static inline int __must_check request_nmi_irq(unsigned int irq,
unsigned long flags,
const char *name, void *dev_id)
Why not pass your handler in here, instead of adding explicitly it to the default FIQ handler?
request_nmi_irq need not exist at all. Your __IRQF_NMI flag is sufficient with request_irq.
+{
- /*
* no_action unconditionally returns IRQ_NONE which is exactly
* what we need. The handler might be expected to be unreachable
* but some controllers may spuriously ack the NMI from the IRQ
* handling code. When this happens it occurs very rarely, thus
* by returning IRQ_NONE we can rely on the spurious interrupt
* logic to do the right thing.
*/
- return request_irq(irq, no_action, flags | IRQF_NO_THREAD | __IRQF_NMI,
name, dev_id);
no_action here looks kind of evil to me. I'd prefer to see the nonsecure irq handler check for __IRQF_NMI and call no_action.
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 6f1c7a5..f810cff 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -711,7 +711,10 @@ void handle_percpu_devid_irq(unsigned int irq, struct irq_desc *desc) chip->irq_ack(&desc->irq_data);
trace_irq_handler_entry(irq, action); - res = action->handler(irq, dev_id); + if (action->flags & __IRQF_NMI) + res = no_action(irq, dev_id); + else + res = action->handler(irq, dev_id); trace_irq_handler_exit(irq, action, res);
if (chip->irq_eoi)
+}
extern void free_irq(unsigned int, void *); extern void free_percpu_irq(unsigned int, void __percpu *);
diff --git a/include/linux/irq.h b/include/linux/irq.h index d09ec7a1243e..695eb37f04ae 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -307,6 +307,7 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * @irq_eoi: end of interrupt
- @irq_set_affinity: set the CPU affinity on SMP machines
- @irq_retrigger: resend an IRQ to the CPU
- @irq_set_nmi_routing:set whether interrupt can act like NMI
- @irq_set_type: set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ
- @irq_set_wake: enable/disable power-management wake-on of an IRQ
- @irq_bus_lock: function to lock access to slow bus (i2c) chips
@@ -341,6 +342,7 @@ struct irq_chip {
int (*irq_set_affinity)(struct irq_data *data, const struct cpumask *dest, bool force); int (*irq_retrigger)(struct irq_data *data);
- int (*irq_set_nmi_routing)(struct irq_data *data, unsigned int nmi); int (*irq_set_type)(struct irq_data *data, unsigned int flow_type); int (*irq_set_wake)(struct irq_data *data, unsigned int on);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 80692373abd6..8e669051759d 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -571,6 +571,17 @@ int can_request_irq(unsigned int irq, unsigned long irqflags) return canrequest; }
+int __irq_set_nmi_routing(struct irq_desc *desc, unsigned int irq,
unsigned int nmi)
+{
- struct irq_chip *chip = desc->irq_data.chip;
- if (!chip || !chip->irq_set_nmi_routing)
return -EINVAL;
- return chip->irq_set_nmi_routing(&desc->irq_data, nmi);
+}
int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, unsigned long flags) { @@ -1058,11 +1069,12 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) * the same type (level, edge, polarity). So both flag * fields must have IRQF_SHARED set and the bits which * set the trigger type must match. Also all must
* agree on ONESHOT.
*/ if (!((old->flags & new->flags) & IRQF_SHARED) || ((old->flags ^ new->flags) & IRQF_TRIGGER_MASK) ||* agree on ONESHOT and NMI
((old->flags ^ new->flags) & IRQF_ONESHOT))
((old->flags ^ new->flags) & IRQF_ONESHOT) ||
((old->flags ^ new->flags) & __IRQF_NMI)) goto mismatch;
/* All handlers must agree on per-cpuness */
@@ -1153,6 +1165,19 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
init_waitqueue_head(&desc->wait_for_threads);
if (new->flags & __IRQF_NMI) {
ret = __irq_set_nmi_routing(desc, irq, true);
if (ret != 1)
goto out_mask;
} else {
ret = __irq_set_nmi_routing(desc, irq, false);
if (ret == 1) {
pr_err("Failed to disable NMI routing for irq %d\n",
irq);
goto out_mask;
}
}
- /* Setup the type (level, edge polarity) if configured: */ if (new->flags & IRQF_TRIGGER_MASK) { ret = __irq_set_trigger(desc, irq,
-- 1.9.3
linux-arm-kernel mailing list linux-arm-kernel@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
On 19/01/15 16:21, Joshua Clayton wrote:
Daniel, feel free to ignore my comments. I know less about this than anybody :) I am especially unfamiliar with NMI, but I will risk exposing my ignorance
Not at all! Thanks for the review.
On Tuesday, January 13, 2015 04:35:28 PM Daniel Thompson wrote:
Some combinations of architectures and interrupt controllers make it possible for abitrary interrupt signals to be selectively made immune to masking by local_irq_disable(). For example, on ARM platforms, many interrupt controllers allow interrupts to be routed to FIQ rather than IRQ.
These features could be exploited to implement debug and tracing features that can be implemented using NMI on x86 platforms (perf, hard lockup, kgdb).
This patch assumes that the management of the NMI handler itself will be architecture specific (maybe notifier list like x86, hard coded like ARM, or something else entirely). The generic layer can still manage the irq as normal (affinity, enable/disable, free) but is not responsible for dispatching.
Signed-off-by: Daniel Thompson daniel.thompson@linaro.org
include/linux/interrupt.h | 20 ++++++++++++++++++++ include/linux/irq.h | 2 ++ kernel/irq/manage.c | 29 +++++++++++++++++++++++++++-- 3 files changed, 49 insertions(+), 2 deletions(-)
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index d9b05b5bf8c7..b95dc28f4cc3 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -57,6 +57,9 @@
- IRQF_NO_THREAD - Interrupt cannot be threaded
- IRQF_EARLY_RESUME - Resume IRQ early during syscore instead of at device
resume time.
- __IRQF_NMI - Route the interrupt to an NMI or some similar signal that
is not + * masked by local_irq_disable(). Used internally by +
*/request_nmi_irq().
#define IRQF_DISABLED 0x00000020 #define IRQF_SHARED 0x00000080 @@ -70,6 +73,7 @@ #define IRQF_FORCE_RESUME 0x00008000 #define IRQF_NO_THREAD 0x00010000 #define IRQF_EARLY_RESUME 0x00020000 +#define __IRQF_NMI 0x00040000
#define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
@@ -139,6 +143,22 @@ extern int __must_check request_percpu_irq(unsigned int irq, irq_handler_t handler, const char *devname, void __percpu *percpu_dev_id);
+static inline int __must_check request_nmi_irq(unsigned int irq,
unsigned long flags,
const char *name, void *dev_id)
Why not pass your handler in here, instead of adding explicitly it to the default FIQ handler?
request_nmi_irq need not exist at all. Your __IRQF_NMI flag is sufficient with request_irq.
The approach currently taken is designed to avoid indirection within the default FIQ handler and ultimately resulted from this thread: http://thread.gmane.org/gmane.linux.ports.arm.kernel/331027/focus=1778426
IIRC Russell King wanted to ensure that any code that tried to execute from the default FIQ handler received sufficient code review. Avoiding indirection was a simple way to do that (since traps.c tends to be well reviewed).
However other than Russell's concerns about code review I am not aware of any other reason *not* to install the handler using the normal IRQ machinery. Specifically:
* The first level data structure (irq to irq_desc) uses RCU and should be NMI-safe.
* We prohibit IRQF_SHARED and can trust users of __IRQF_NMI to take care not to free the IRQ whilst the handler could be running[1] then I think it is safe to lookup an irqaction without taking the irqdesc lock.
[1] Most uses of __IRQF_NMI that I've looked at will trivially ensure this by never uninstalling the handler...
+{
- /*
* no_action unconditionally returns IRQ_NONE which is exactly
* what we need. The handler might be expected to be unreachable
* but some controllers may spuriously ack the NMI from the IRQ
* handling code. When this happens it occurs very rarely, thus
* by returning IRQ_NONE we can rely on the spurious interrupt
* logic to do the right thing.
*/
- return request_irq(irq, no_action, flags | IRQF_NO_THREAD | __IRQF_NMI,
name, dev_id);
no_action here looks kind of evil to me. I'd prefer to see the nonsecure irq handler check for __IRQF_NMI and call no_action.
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 6f1c7a5..f810cff 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -711,7 +711,10 @@ void handle_percpu_devid_irq(unsigned int irq, struct irq_desc *desc) chip->irq_ack(&desc->irq_data); trace_irq_handler_entry(irq, action);
res = action->handler(irq, dev_id);
if (action->flags & __IRQF_NMI)
res = no_action(irq, dev_id);
else
res = action->handler(irq, dev_id); trace_irq_handler_exit(irq, action, res);
if (chip->irq_eoi)
Hmnnn... there should be no need to check for __IRQF_NMI in this bit of code.
*If* action->handler pointed to a real NMI handler then we should just call it even though were are "only" running from IRQ.
When the problem occurs the GIC priority filtering will prevent further FIQs from being delivered so we might as well just make the call. Note that the issue that makes us occasionally spuriously ack is *extremely* ARM specific (an obscure GIC/TrustZone interaction) so there is little need to consider other architectures here.
+}
extern void free_irq(unsigned int, void *); extern void free_percpu_irq(unsigned int, void __percpu *);
diff --git a/include/linux/irq.h b/include/linux/irq.h index d09ec7a1243e..695eb37f04ae 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -307,6 +307,7 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * @irq_eoi: end of interrupt
- @irq_set_affinity: set the CPU affinity on SMP machines
- @irq_retrigger: resend an IRQ to the CPU
- @irq_set_nmi_routing:set whether interrupt can act like NMI
- @irq_set_type: set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ
- @irq_set_wake: enable/disable power-management wake-on of an IRQ
- @irq_bus_lock: function to lock access to slow bus (i2c) chips
@@ -341,6 +342,7 @@ struct irq_chip {
int (*irq_set_affinity)(struct irq_data *data, const struct cpumask *dest, bool force); int (*irq_retrigger)(struct irq_data *data);
- int (*irq_set_nmi_routing)(struct irq_data *data, unsigned int nmi); int (*irq_set_type)(struct irq_data *data, unsigned int flow_type); int (*irq_set_wake)(struct irq_data *data, unsigned int on);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 80692373abd6..8e669051759d 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -571,6 +571,17 @@ int can_request_irq(unsigned int irq, unsigned long irqflags) return canrequest; }
+int __irq_set_nmi_routing(struct irq_desc *desc, unsigned int irq,
unsigned int nmi)
+{
- struct irq_chip *chip = desc->irq_data.chip;
- if (!chip || !chip->irq_set_nmi_routing)
return -EINVAL;
- return chip->irq_set_nmi_routing(&desc->irq_data, nmi);
+}
int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, unsigned long flags) { @@ -1058,11 +1069,12 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) * the same type (level, edge, polarity). So both flag * fields must have IRQF_SHARED set and the bits which * set the trigger type must match. Also all must
* agree on ONESHOT.
*/ if (!((old->flags & new->flags) & IRQF_SHARED) || ((old->flags ^ new->flags) & IRQF_TRIGGER_MASK) ||* agree on ONESHOT and NMI
((old->flags ^ new->flags) & IRQF_ONESHOT))
((old->flags ^ new->flags) & IRQF_ONESHOT) ||
((old->flags ^ new->flags) & __IRQF_NMI)) goto mismatch;
/* All handlers must agree on per-cpuness */
@@ -1153,6 +1165,19 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
init_waitqueue_head(&desc->wait_for_threads);
if (new->flags & __IRQF_NMI) {
ret = __irq_set_nmi_routing(desc, irq, true);
if (ret != 1)
goto out_mask;
} else {
ret = __irq_set_nmi_routing(desc, irq, false);
if (ret == 1) {
pr_err("Failed to disable NMI routing for irq %d\n",
irq);
goto out_mask;
}
}
- /* Setup the type (level, edge polarity) if configured: */ if (new->flags & IRQF_TRIGGER_MASK) { ret = __irq_set_trigger(desc, irq,
-- 1.9.3
linux-arm-kernel mailing list linux-arm-kernel@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
This patch provides an implementation of irq_set_nmi_routing by allowing SPIs to be switched between group 1 (IRQ) and group 0 (FIQ).
It also repaces the interface used between the default FIQ handler and the GIC. These extensions are required in order to allow SPIs to be acknowledged and completed.
Signed-off-by: Daniel Thompson daniel.thompson@linaro.org --- arch/arm/kernel/traps.c | 12 ++++--- drivers/irqchip/irq-gic.c | 75 ++++++++++++++++++++++++++++++----------- include/linux/irqchip/arm-gic.h | 8 ++++- 3 files changed, 71 insertions(+), 24 deletions(-)
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 5645f81ac4cc..74c752b9db68 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -478,17 +478,21 @@ asmlinkage void __exception_irq_entry handle_fiq_as_nmi(struct pt_regs *regs) { unsigned int cpu = smp_processor_id(); struct pt_regs *old_regs = set_irq_regs(regs); + int irq = 0;
__inc_irq_stat(cpu, __nmi_count); nmi_enter(); + irq = gic_ack_fiq();
-#ifdef CONFIG_ARM_GIC - gic_handle_fiq_ipi(); -#endif + if (irq) { + /* empty - no SPI handlers (yet) */ + } else { #ifdef CONFIG_SMP - ipi_cpu_backtrace(regs); + ipi_cpu_backtrace(regs); #endif + }
+ gic_eoi_fiq(); nmi_exit();
set_irq_regs(old_regs); diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index c4f4a8827ed8..7c212b3126b8 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -129,6 +129,9 @@ struct irq_chip gic_arch_extn = {
static struct gic_chip_data gic_data[MAX_GIC_NR] __read_mostly;
+static int gic_set_group_irq(struct gic_chip_data *gic, unsigned int hwirq, + int group); + #ifdef CONFIG_GIC_NON_BANKED static void __iomem *gic_get_percpu_base(union gic_base *base) { @@ -214,6 +217,18 @@ static void gic_eoi_irq(struct irq_data *d) writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_EOI); }
+static int gic_set_nmi_routing(struct irq_data *d, unsigned int nmi) +{ + struct gic_chip_data *gic = irq_data_get_irq_chip_data(d); + int ret; + + ret = gic_set_group_irq(gic, gic_irq(d), !nmi); + if (ret >= 0) + ret = !ret; + + return ret; +} + static int gic_set_type(struct irq_data *d, unsigned int type) { void __iomem *base = gic_dist_base(d); @@ -346,6 +361,7 @@ static struct irq_chip gic_chip = { .irq_mask = gic_mask_irq, .irq_unmask = gic_unmask_irq, .irq_eoi = gic_eoi_irq, + .irq_set_nmi_routing = gic_set_nmi_routing, .irq_set_type = gic_set_type, .irq_retrigger = gic_retrigger, #ifdef CONFIG_SMP @@ -364,8 +380,8 @@ static struct irq_chip gic_chip = { * If is safe to call this function on systems which do not support * grouping (it will have no effect). */ -static void gic_set_group_irq(struct gic_chip_data *gic, unsigned int hwirq, - int group) +static int gic_set_group_irq(struct gic_chip_data *gic, unsigned int hwirq, + int group) { void __iomem *base = gic_data_dist_base(gic); unsigned int grp_reg = hwirq / 32 * 4; @@ -381,7 +397,7 @@ static void gic_set_group_irq(struct gic_chip_data *gic, unsigned int hwirq, * the EnableGrp1 bit set. */ if (!(GICD_ENABLE_GRP1 & readl_relaxed(base + GIC_DIST_CTRL))) - return; + return -EINVAL;
raw_spin_lock(&irq_controller_lock);
@@ -403,32 +419,53 @@ static void gic_set_group_irq(struct gic_chip_data *gic, unsigned int hwirq, writel_relaxed(pri_val, base + GIC_DIST_PRI + pri_reg);
raw_spin_unlock(&irq_controller_lock); + + return group; }
-/* - * Fully acknowledge (both ack and eoi) any outstanding FIQ-based IPI, - * otherwise do nothing. - */ -void gic_handle_fiq_ipi(void) +static DEFINE_PER_CPU(unsigned long, gic_irqstat_fiq); + +int gic_ack_fiq(void) { struct gic_chip_data *gic = &gic_data[0]; void __iomem *cpu_base = gic_data_cpu_base(gic); - unsigned long irqstat, irqnr; + unsigned long irqstat, hwirq; + unsigned int irq = 0; + + /* + * This function is called unconditionally by the default FIQ + * handler so first we must check that the driver it + * initialized. + */ + if (!gic->gic_irqs) + return 0;
if (WARN_ON(!in_nmi())) - return; + return 0;
- while ((1u << readl_relaxed(cpu_base + GIC_CPU_HIGHPRI)) & - SMP_IPI_FIQ_MASK) { - irqstat = readl_relaxed(cpu_base + GIC_CPU_INTACK); - writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI); + /* read intack with the priority mask set so we only acknowledge FIQs */ + writel_relaxed(GICC_INT_PRI_THRESHOLD >> 1, cpu_base + GIC_CPU_PRIMASK); + irqstat = readl_relaxed(cpu_base + GIC_CPU_INTACK); + raw_cpu_write(gic_irqstat_fiq, irqstat); + writel_relaxed(GICC_INT_PRI_THRESHOLD, cpu_base + GIC_CPU_PRIMASK);
- irqnr = irqstat & GICC_IAR_INT_ID_MASK; - WARN_RATELIMIT(irqnr > 16, - "Unexpected irqnr %lu (bad prioritization?)\n", - irqnr); - } + hwirq = irqstat & GICC_IAR_INT_ID_MASK; + if (likely(hwirq > 15 && hwirq < 1021)) + irq = irq_find_mapping(gic->domain, hwirq); + + return irq; +} + +void gic_eoi_fiq(void) +{ + struct gic_chip_data *gic = &gic_data[0]; + + if (!gic->gic_irqs) + return; + + writel_relaxed(raw_cpu_read(gic_irqstat_fiq), + gic_data_cpu_base(gic) + GIC_CPU_EOI); }
void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq) diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 7690f70049a3..7c03e12f9e52 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -127,7 +127,13 @@ static inline void __init register_routable_domain_ops gic_routable_irq_domain_ops = ops; }
-void gic_handle_fiq_ipi(void); +#ifdef CONFIG_ARM_GIC +int gic_ack_fiq(void); +void gic_eoi_fiq(void); +#else +static inline int gic_ack_fiq(void) { return 0; } +static inline void gic_eof_fiq(void) {} +#endif
#endif /* __ASSEMBLY */ #endif
armv7pmu_disable_event() is called during irq handler. If irq handling switches over to fiq then the spin locks in this function risks deadlock. Both armv7_pmnc_disable_counter() and armv7_pmnc_disable_intens() are unconditional co-processor writes. I haven't yet come up with an schedule where other users of pmu_lock would break if interleaved with these calls so I have simply removed them.
The other changed required it so avoid calling irq_work_run() when run from a FIQ handler. The pended work will either be dispatched by the irq work IPI or by a timer handler.
Signed-off-by: Daniel Thompson daniel.thompson@linaro.org --- arch/arm/kernel/perf_event_v7.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-)
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 8993770c47de..08f426486d3e 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -744,7 +744,6 @@ static void armv7pmu_enable_event(struct perf_event *event)
static void armv7pmu_disable_event(struct perf_event *event) { - unsigned long flags; struct hw_perf_event *hwc = &event->hw; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); @@ -757,11 +756,6 @@ static void armv7pmu_disable_event(struct perf_event *event) }
/* - * Disable counter and interrupt - */ - raw_spin_lock_irqsave(&events->pmu_lock, flags); - - /* * Disable counter */ armv7_pmnc_disable_counter(idx); @@ -770,8 +764,6 @@ static void armv7pmu_disable_event(struct perf_event *event) * Disable interrupt for this counter */ armv7_pmnc_disable_intens(idx); - - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); }
static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) @@ -831,7 +823,8 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) * platforms that can have the PMU interrupts raised as an NMI, this * will not work. */ - irq_work_run(); + if (!in_nmi()) + irq_work_run();
return IRQ_HANDLED; }
Using FIQ (if it is available) gives perf a better insight into the system by allowing code run with interrupts disabled to be profiled.
Signed-off-by: Daniel Thompson daniel.thompson@linaro.org --- arch/arm/include/asm/pmu.h | 4 ++++ arch/arm/kernel/perf_event.c | 2 +- arch/arm/kernel/perf_event_cpu.c | 35 ++++++++++++++++++++++++++++++++--- arch/arm/kernel/traps.c | 3 ++- 4 files changed, 39 insertions(+), 5 deletions(-)
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index b1596bd59129..2a7ea97a4a14 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -123,6 +123,8 @@ struct arm_pmu {
extern const struct dev_pm_ops armpmu_dev_pm_ops;
+irqreturn_t armpmu_dispatch_irq(int irq, void *dev); + int armpmu_register(struct arm_pmu *armpmu, int type);
u64 armpmu_event_update(struct perf_event *event); @@ -136,6 +138,8 @@ int armpmu_map_event(struct perf_event *event, [PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask);
+void cpu_pmu_handle_fiq(int irq); + struct pmu_probe_info { unsigned int cpuid; unsigned int mask; diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index f7c65adaa428..5ae9adf7f18e 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -296,7 +296,7 @@ validate_group(struct perf_event *event) return 0; }
-static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) +irqreturn_t armpmu_dispatch_irq(int irq, void *dev) { struct arm_pmu *armpmu; struct platform_device *plat_device; diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index a80309087a7b..5c4e9ce23389 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -36,6 +36,9 @@ /* Set at runtime when we know what CPU type we are. */ static struct arm_pmu *cpu_pmu;
+/* Allows us to find out if an IRQ is for us (mostly used from NMI context) */ +static DEFINE_PER_CPU(int, cpu_pmu_irqs); + /* * Despite the names, these two functions are CPU-specific and are used * by the OProfile/perf code. @@ -127,6 +130,24 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) } }
+/* + * This handler is called *unconditionally* from the default NMI/FIQ + * handler. The irq may not be anything to do with us so the main + * job of this function is to figure out if the irq passed in is ours + * or not. + */ +void cpu_pmu_handle_fiq(int irq) +{ + int cpu = smp_processor_id(); + + if (irq != get_cpu_var(cpu_pmu_irqs)) + return; + + (void)armpmu_dispatch_irq(irq, + get_cpu_ptr(&cpu_pmu->hw_events->percpu_pmu)); +} + + static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) { int i, err, irq, irqs; @@ -170,9 +191,16 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) continue; }
- err = request_irq(irq, handler, - IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", - per_cpu_ptr(&hw_events->percpu_pmu, i)); + err = request_nmi_irq( + irq, IRQF_NOBALANCING, "arm-pmu", + per_cpu_ptr(&hw_events->percpu_pmu, i)); + if (err) { + err = request_irq( + irq, handler, + IRQF_NOBALANCING | IRQF_NO_THREAD, + "arm-pmu", + per_cpu_ptr(&hw_events->percpu_pmu, i)); + } if (err) { pr_err("unable to request IRQ%d for ARM PMU counters\n", irq); @@ -180,6 +208,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) }
cpumask_set_cpu(i, &cpu_pmu->active_irqs); + per_cpu(cpu_pmu_irqs, i) = irq; } }
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 74c752b9db68..c581e07517ff 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -38,6 +38,7 @@ #include <asm/tls.h> #include <asm/system_misc.h> #include <asm/opcodes.h> +#include <asm/pmu.h>
static const char *handler[]= { @@ -485,7 +486,7 @@ asmlinkage void __exception_irq_entry handle_fiq_as_nmi(struct pt_regs *regs) irq = gic_ack_fiq();
if (irq) { - /* empty - no SPI handlers (yet) */ + cpu_pmu_handle_fiq(irq); } else { #ifdef CONFIG_SMP ipi_cpu_backtrace(regs);
On Tuesday, January 13, 2015 04:35:31 PM Daniel Thompson wrote:
Using FIQ (if it is available) gives perf a better insight into the system by allowing code run with interrupts disabled to be profiled.
Signed-off-by: Daniel Thompson daniel.thompson@linaro.org
arch/arm/include/asm/pmu.h | 4 ++++ arch/arm/kernel/perf_event.c | 2 +- arch/arm/kernel/perf_event_cpu.c | 35 ++++++++++++++++++++++++++++++++--- arch/arm/kernel/traps.c | 3 ++- 4 files changed, 39 insertions(+), 5 deletions(-)
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index b1596bd59129..2a7ea97a4a14 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -123,6 +123,8 @@ struct arm_pmu {
extern const struct dev_pm_ops armpmu_dev_pm_ops;
+irqreturn_t armpmu_dispatch_irq(int irq, void *dev);
int armpmu_register(struct arm_pmu *armpmu, int type);
u64 armpmu_event_update(struct perf_event *event); @@ -136,6 +138,8 @@ int armpmu_map_event(struct perf_event *event, [PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask);
+void cpu_pmu_handle_fiq(int irq);
struct pmu_probe_info { unsigned int cpuid; unsigned int mask; diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index f7c65adaa428..5ae9adf7f18e 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -296,7 +296,7 @@ validate_group(struct perf_event *event) return 0; }
-static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) +irqreturn_t armpmu_dispatch_irq(int irq, void *dev) { struct arm_pmu *armpmu; struct platform_device *plat_device; diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index a80309087a7b..5c4e9ce23389 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -36,6 +36,9 @@ /* Set at runtime when we know what CPU type we are. */ static struct arm_pmu *cpu_pmu;
+/* Allows us to find out if an IRQ is for us (mostly used from NMI context) */ +static DEFINE_PER_CPU(int, cpu_pmu_irqs);
/*
- Despite the names, these two functions are CPU-specific and are used
- by the OProfile/perf code.
@@ -127,6 +130,24 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) } }
+/*
- This handler is called *unconditionally* from the default NMI/FIQ
- handler. The irq may not be anything to do with us so the main
- job of this function is to figure out if the irq passed in is ours
- or not.
- */
This comment is an indicator that all the code in cpu_pmu_handle_fiq is in the wrong place. It (or something like it) belongs at the level of the default FIQ handler, and not in perf_event_cpu.c
+void cpu_pmu_handle_fiq(int irq) +{
- int cpu = smp_processor_id();
- if (irq != get_cpu_var(cpu_pmu_irqs))
return;
- (void)armpmu_dispatch_irq(irq,
get_cpu_ptr(&cpu_pmu->hw_events->percpu_pmu));
+}
static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) { int i, err, irq, irqs; @@ -170,9 +191,16 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) continue; }
err = request_irq(irq, handler,
IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
per_cpu_ptr(&hw_events->percpu_pmu, i));
err = request_nmi_irq(
irq, IRQF_NOBALANCING, "arm-pmu",
per_cpu_ptr(&hw_events->percpu_pmu, i));
if (err) {
err = request_irq(
irq, handler,
IRQF_NOBALANCING | IRQF_NO_THREAD,
"arm-pmu",
per_cpu_ptr(&hw_events->percpu_pmu, i));
} if (err) { pr_err("unable to request IRQ%d for ARM PMU counters\n", irq);
@@ -180,6 +208,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) }
cpumask_set_cpu(i, &cpu_pmu->active_irqs);
} }per_cpu(cpu_pmu_irqs, i) = irq;
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 74c752b9db68..c581e07517ff 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -38,6 +38,7 @@ #include <asm/tls.h> #include <asm/system_misc.h> #include <asm/opcodes.h> +#include <asm/pmu.h>
static const char *handler[]= { @@ -485,7 +486,7 @@ asmlinkage void __exception_irq_entry handle_fiq_as_nmi(struct pt_regs *regs) irq = gic_ack_fiq();
if (irq) {
/* empty - no SPI handlers (yet) */
} else {cpu_pmu_handle_fiq(irq);
#ifdef CONFIG_SMP ipi_cpu_backtrace(regs); -- 1.9.3
linux-arm-kernel mailing list linux-arm-kernel@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
On 19/01/15 16:35, Joshua Clayton wrote:
On Tuesday, January 13, 2015 04:35:31 PM Daniel Thompson wrote:
Using FIQ (if it is available) gives perf a better insight into the system by allowing code run with interrupts disabled to be profiled.
Signed-off-by: Daniel Thompson daniel.thompson@linaro.org
arch/arm/include/asm/pmu.h | 4 ++++ arch/arm/kernel/perf_event.c | 2 +- arch/arm/kernel/perf_event_cpu.c | 35 ++++++++++++++++++++++++++++++++--- arch/arm/kernel/traps.c | 3 ++- 4 files changed, 39 insertions(+), 5 deletions(-)
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index b1596bd59129..2a7ea97a4a14 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -123,6 +123,8 @@ struct arm_pmu {
extern const struct dev_pm_ops armpmu_dev_pm_ops;
+irqreturn_t armpmu_dispatch_irq(int irq, void *dev);
int armpmu_register(struct arm_pmu *armpmu, int type);
u64 armpmu_event_update(struct perf_event *event); @@ -136,6 +138,8 @@ int armpmu_map_event(struct perf_event *event, [PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask);
+void cpu_pmu_handle_fiq(int irq);
struct pmu_probe_info { unsigned int cpuid; unsigned int mask; diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index f7c65adaa428..5ae9adf7f18e 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -296,7 +296,7 @@ validate_group(struct perf_event *event) return 0; }
-static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) +irqreturn_t armpmu_dispatch_irq(int irq, void *dev) { struct arm_pmu *armpmu; struct platform_device *plat_device; diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index a80309087a7b..5c4e9ce23389 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -36,6 +36,9 @@ /* Set at runtime when we know what CPU type we are. */ static struct arm_pmu *cpu_pmu;
+/* Allows us to find out if an IRQ is for us (mostly used from NMI context) */ +static DEFINE_PER_CPU(int, cpu_pmu_irqs);
/*
- Despite the names, these two functions are CPU-specific and are used
- by the OProfile/perf code.
@@ -127,6 +130,24 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) } }
+/*
- This handler is called *unconditionally* from the default NMI/FIQ
- handler. The irq may not be anything to do with us so the main
- job of this function is to figure out if the irq passed in is ours
- or not.
- */
This comment is an indicator that all the code in cpu_pmu_handle_fiq is in the wrong place. It (or something like it) belongs at the level of the default FIQ handler, and not in perf_event_cpu.c
I'm not sure about that.
If we moved this code into the default FIQ handler that means the PMU driver would have to explicitly share its irq value with the default FIQ handler (which doesn't really care about that).
I'm inclined to view this code as the effect of avoiding indirection in the default FIQ handler.
Regular irq code has nothing like this because &armpmu_dispatch_irq, irq and get_cpu_ptr(&cpu_pmu->hw_events->percpu_pmu) would all be looked up from the irqaction and any unwanted events are naturally filtered by the irq dispatch.
After your review I'm very tempted to put together a patch that dispatches NMIs indirectly from the default FIQ handler. However I still don't have much of an answer to Russell's concerns about code review.
+void cpu_pmu_handle_fiq(int irq) +{
- int cpu = smp_processor_id();
- if (irq != get_cpu_var(cpu_pmu_irqs))
return;
- (void)armpmu_dispatch_irq(irq,
get_cpu_ptr(&cpu_pmu->hw_events->percpu_pmu));
+}
static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) { int i, err, irq, irqs; @@ -170,9 +191,16 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) continue; }
err = request_irq(irq, handler,
IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
per_cpu_ptr(&hw_events->percpu_pmu, i));
err = request_nmi_irq(
irq, IRQF_NOBALANCING, "arm-pmu",
per_cpu_ptr(&hw_events->percpu_pmu, i));
if (err) {
err = request_irq(
irq, handler,
IRQF_NOBALANCING | IRQF_NO_THREAD,
"arm-pmu",
per_cpu_ptr(&hw_events->percpu_pmu, i));
} if (err) { pr_err("unable to request IRQ%d for ARM PMU counters\n", irq);
@@ -180,6 +208,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) }
cpumask_set_cpu(i, &cpu_pmu->active_irqs);
} }per_cpu(cpu_pmu_irqs, i) = irq;
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 74c752b9db68..c581e07517ff 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -38,6 +38,7 @@ #include <asm/tls.h> #include <asm/system_misc.h> #include <asm/opcodes.h> +#include <asm/pmu.h>
static const char *handler[]= { @@ -485,7 +486,7 @@ asmlinkage void __exception_irq_entry handle_fiq_as_nmi(struct pt_regs *regs) irq = gic_ack_fiq();
if (irq) {
/* empty - no SPI handlers (yet) */
} else {cpu_pmu_handle_fiq(irq);
#ifdef CONFIG_SMP ipi_cpu_backtrace(regs); -- 1.9.3
linux-arm-kernel mailing list linux-arm-kernel@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
On Tuesday, January 20, 2015 10:18:10 AM Daniel Thompson wrote:
On 19/01/15 16:35, Joshua Clayton wrote:
On Tuesday, January 13, 2015 04:35:31 PM Daniel Thompson wrote:
Using FIQ (if it is available) gives perf a better insight into the system by allowing code run with interrupts disabled to be profiled.
Signed-off-by: Daniel Thompson daniel.thompson@linaro.org
arch/arm/include/asm/pmu.h | 4 ++++ arch/arm/kernel/perf_event.c | 2 +- arch/arm/kernel/perf_event_cpu.c | 35 ++++++++++++++++++++++++++++++++--- arch/arm/kernel/traps.c | 3 ++- 4 files changed, 39 insertions(+), 5 deletions(-)
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index b1596bd59129..2a7ea97a4a14 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -123,6 +123,8 @@ struct arm_pmu {
extern const struct dev_pm_ops armpmu_dev_pm_ops;
+irqreturn_t armpmu_dispatch_irq(int irq, void *dev);
int armpmu_register(struct arm_pmu *armpmu, int type); u64 armpmu_event_update(struct perf_event *event);
@@ -136,6 +138,8 @@ int armpmu_map_event(struct perf_event *event,
[PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask);
+void cpu_pmu_handle_fiq(int irq);
struct pmu_probe_info { unsigned int cpuid; unsigned int mask;
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index f7c65adaa428..5ae9adf7f18e 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -296,7 +296,7 @@ validate_group(struct perf_event *event)
return 0; }
-static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) +irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
{ struct arm_pmu *armpmu; struct platform_device *plat_device;
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index a80309087a7b..5c4e9ce23389 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -36,6 +36,9 @@
/* Set at runtime when we know what CPU type we are. */ static struct arm_pmu *cpu_pmu;
+/* Allows us to find out if an IRQ is for us (mostly used from NMI context) */ +static DEFINE_PER_CPU(int, cpu_pmu_irqs);
/*
- Despite the names, these two functions are CPU-specific and are used
- by the OProfile/perf code.
@@ -127,6 +130,24 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
} }
+/*
- This handler is called *unconditionally* from the default NMI/FIQ
- handler. The irq may not be anything to do with us so the main
- job of this function is to figure out if the irq passed in is ours
- or not.
- */
This comment is an indicator that all the code in cpu_pmu_handle_fiq is in the wrong place. It (or something like it) belongs at the level of the default FIQ handler, and not in perf_event_cpu.c
I'm not sure about that.
If we moved this code into the default FIQ handler that means the PMU driver would have to explicitly share its irq value with the default FIQ handler (which doesn't really care about that).
I'm inclined to view this code as the effect of avoiding indirection in the default FIQ handler.
Regular irq code has nothing like this because &armpmu_dispatch_irq, irq and get_cpu_ptr(&cpu_pmu->hw_events->percpu_pmu) would all be looked up from the irqaction and any unwanted events are naturally filtered by the irq dispatch.
After your review I'm very tempted to put together a patch that dispatches NMIs indirectly from the default FIQ handler. However I still don't have much of an answer to Russell's concerns about code review.
Ironically I'm exactly the person RMK wants to protect against.
Perhaps request_nmi_irq (or code called from there) might be the best place to put a "bad code filter", rather than in the handler/dispatcher as the FIQ is running.
+void cpu_pmu_handle_fiq(int irq) +{
- int cpu = smp_processor_id();
- if (irq != get_cpu_var(cpu_pmu_irqs))
return;
- (void)armpmu_dispatch_irq(irq,
get_cpu_ptr(&cpu_pmu->hw_events->percpu_pmu));
+}
static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t
handler) {
int i, err, irq, irqs;
@@ -170,9 +191,16 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) continue;
}
err = request_irq(irq, handler,
IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
per_cpu_ptr(&hw_events->percpu_pmu, i));
err = request_nmi_irq(
irq, IRQF_NOBALANCING, "arm-pmu",
per_cpu_ptr(&hw_events->percpu_pmu, i));
if (err) {
err = request_irq(
irq, handler,
IRQF_NOBALANCING | IRQF_NO_THREAD,
"arm-pmu",
per_cpu_ptr(&hw_events->percpu_pmu, i));
} if (err) {
pr_err("unable to request IRQ%d for ARM PMU counters\n", irq);
@@ -180,6 +208,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) }
cpumask_set_cpu(i, &cpu_pmu->active_irqs);
per_cpu(cpu_pmu_irqs, i) = irq;
}
}
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 74c752b9db68..c581e07517ff 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -38,6 +38,7 @@
#include <asm/tls.h> #include <asm/system_misc.h> #include <asm/opcodes.h>
+#include <asm/pmu.h>
static const char *handler[]= {
@@ -485,7 +486,7 @@ asmlinkage void __exception_irq_entry handle_fiq_as_nmi(struct pt_regs *regs) irq = gic_ack_fiq();
if (irq) {
/* empty - no SPI handlers (yet) */
cpu_pmu_handle_fiq(irq);
} else {
#ifdef CONFIG_SMP ipi_cpu_backtrace(regs);
-- 1.9.3
linux-arm-kernel mailing list linux-arm-kernel@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
linux-arm-kernel mailing list linux-arm-kernel@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
On Tue, Jan 13, 2015 at 04:35:31PM +0000, Daniel Thompson wrote:
+/*
- This handler is called *unconditionally* from the default NMI/FIQ
- handler. The irq may not be anything to do with us so the main
- job of this function is to figure out if the irq passed in is ours
- or not.
- */
+void cpu_pmu_handle_fiq(int irq) +{
- int cpu = smp_processor_id();
This can be either debug_smp_processor_id() or raw_smp_processor_id(). raw_smp_processor_id() is fine from FIQ contexts, as seems to be debug_smp_processor_id(), but only because we guarantee that irqs_disabled() in there will be true.
- if (irq != get_cpu_var(cpu_pmu_irqs))
return;
get_cpu_var() needs put_cpu_var() to undo its effects. get_cpu_var() calls preempt_disable(), which calls into lockdep... I think we determined that was fine last time we went digging? put_cpu_var() would call preempt_enable() which I'd hope would be safe in FIQ/NMI contexts?
- (void)armpmu_dispatch_irq(irq,
get_cpu_ptr(&cpu_pmu->hw_events->percpu_pmu));
Again, get_cpu_xxx() needs to be balanced with a put_cpu_xxx().
On 19/01/15 17:48, Russell King - ARM Linux wrote:
On Tue, Jan 13, 2015 at 04:35:31PM +0000, Daniel Thompson wrote:
+/*
- This handler is called *unconditionally* from the default NMI/FIQ
- handler. The irq may not be anything to do with us so the main
- job of this function is to figure out if the irq passed in is ours
- or not.
- */
+void cpu_pmu_handle_fiq(int irq) +{
- int cpu = smp_processor_id();
This can be either debug_smp_processor_id() or raw_smp_processor_id(). raw_smp_processor_id() is fine from FIQ contexts, as seems to be debug_smp_processor_id(), but only because we guarantee that irqs_disabled() in there will be true.
Curiously I was looking at exactly this yesterday (because I was intrigued why the NMI-safe bits of kgdb use raw_smp_processor_id() but the x86 arch_trigger_all_cpu_backtrace() implementation uses smp_processor_id()).
Given the comments make clear smp_processor_id() is the preferred variant except for false positives I concluded I would continue with smp_processor_id() for any code I write hanging off the default FIQ handler. No objections?
- if (irq != get_cpu_var(cpu_pmu_irqs))
return;
get_cpu_var() needs put_cpu_var() to undo its effects. get_cpu_var() calls preempt_disable(), which calls into lockdep... I think we determined that was fine last time we went digging?
Yes. We reviewed lockdep from the point-of-view of RCU and found that lockdep disabled most of itself when in_nmi() is true.
put_cpu_var() would call preempt_enable() which I'd hope would be safe in FIQ/NMI contexts?
Yes.
preempt_count_add/sub form part of the work done by nmi_enter() and nmi_exit().
However this code gets no benefit from calling get_cpu_var(). I think it would be better to switch it to this_cpu_ptr.
- (void)armpmu_dispatch_irq(irq,
get_cpu_ptr(&cpu_pmu->hw_events->percpu_pmu));
Again, get_cpu_xxx() needs to be balanced with a put_cpu_xxx().
Hi Thomas, Hi Russell: This RFC is particularly for your attention since it results directly from feedback I've received from both of you, albeit quite a few months ago now.
This patchset demonstrates using FIQ to improve the quality of the PMU trace on ARM systems. To do so it introduces generic changes that allow irqs to be routed to NMI.
This patchset applies on top of my own patchset:
arm: Implement arch_trigger_all_cpu_backtrace http://thread.gmane.org/gmane.linux.kernel/1864829
I think most important aspects of the code are clear without reference to the previous patchset however the patches will not run (nor apply cleanly) without the previous patchset.
v2:
* Removed the direct calls in handle_fiq_as_nmi (Joshua Clayton). There is now indirection in the default FIQ handler so, to address RMK's concerns regarding code review, there is also a means for arch code to filter the avaiable handlers.
The new approach has got rid of a *lot* of nasty hairballs in the code (including those that came up in the reviews Joshua Clayton and Russell King).
Daniel Thompson (5): arm: irq: Add a __nmi_count stat irq: Allow interrupts to routed to NMI (or similar) irq: gic: Add support for NMI routing arm: perf: Make v7 support FIQ-safe arm: perf: Use FIQ to handle PMU events.
arch/arm/include/asm/hardirq.h | 1 + arch/arm/include/asm/pmu.h | 2 ++ arch/arm/kernel/irq.c | 7 ++++- arch/arm/kernel/perf_event.c | 2 +- arch/arm/kernel/perf_event_cpu.c | 14 +++++++-- arch/arm/kernel/perf_event_v7.c | 11 ++----- arch/arm/kernel/traps.c | 35 ++++++++++++++++++---- drivers/irqchip/irq-gic.c | 64 ++++++++++++++++++++++++++++------------ include/linux/interrupt.h | 5 ++++ include/linux/irq.h | 2 ++ include/linux/irqchip/arm-gic.h | 6 +++- include/linux/irqdesc.h | 3 ++ kernel/irq/irqdesc.c | 48 ++++++++++++++++++++++++++++++ kernel/irq/manage.c | 46 +++++++++++++++++++++++++++++ 14 files changed, 207 insertions(+), 39 deletions(-)
-- 1.9.3
Extends the irq statistics for ARM, making it possible to quickly observe how many times the default FIQ handler has executed.
In /proc/interrupts we use the NMI terminology (rather than FIQ) because the statistic is only likely to be updated when we run the default FIQ handler (handle_fiq_as_nmi).
Signed-off-by: Daniel Thompson daniel.thompson@linaro.org --- arch/arm/include/asm/hardirq.h | 1 + arch/arm/kernel/irq.c | 7 ++++++- arch/arm/kernel/traps.c | 2 ++ 3 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/arch/arm/include/asm/hardirq.h b/arch/arm/include/asm/hardirq.h index 5df33e30ae1b..27ab43b5d7e2 100644 --- a/arch/arm/include/asm/hardirq.h +++ b/arch/arm/include/asm/hardirq.h @@ -9,6 +9,7 @@
typedef struct { unsigned int __softirq_pending; + unsigned int __nmi_count; #ifdef CONFIG_SMP unsigned int ipi_irqs[NR_IPI]; #endif diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index ad857bada96c..6dd1619e0700 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -48,13 +48,18 @@ unsigned long irq_err_count;
int arch_show_interrupts(struct seq_file *p, int prec) { + int i; + #ifdef CONFIG_FIQ show_fiq_list(p, prec); #endif #ifdef CONFIG_SMP show_ipi_list(p, prec); #endif - seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count); + seq_printf(p, "%*s: ", prec, "NMI"); + for_each_online_cpu(i) + seq_printf(p, "%10u ", __get_irq_stat(i, __nmi_count)); + seq_printf(p, "\n%*s: %10lu\n", prec, "Err", irq_err_count); return 0; }
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 1836415b8a5c..5645f81ac4cc 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -476,8 +476,10 @@ die_sig: */ asmlinkage void __exception_irq_entry handle_fiq_as_nmi(struct pt_regs *regs) { + unsigned int cpu = smp_processor_id(); struct pt_regs *old_regs = set_irq_regs(regs);
+ __inc_irq_stat(cpu, __nmi_count); nmi_enter();
#ifdef CONFIG_ARM_GIC
Some combinations of architectures and interrupt controllers make it possible for abitrary interrupt signals to be selectively made immune to masking by local_irq_disable(). For example, on ARM platforms, many interrupt controllers allow interrupts to be routed to FIQ rather than IRQ.
These features could be exploited to implement debug and tracing features that can be implemented using NMI on x86 platforms (perf, hard lockup, kgdb).
Signed-off-by: Daniel Thompson daniel.thompson@linaro.org --- include/linux/interrupt.h | 5 +++++ include/linux/irq.h | 2 ++ include/linux/irqdesc.h | 3 +++ kernel/irq/irqdesc.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++ kernel/irq/manage.c | 46 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 104 insertions(+)
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index d9b05b5bf8c7..839ad225bc97 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -57,6 +57,8 @@ * IRQF_NO_THREAD - Interrupt cannot be threaded * IRQF_EARLY_RESUME - Resume IRQ early during syscore instead of at device * resume time. + * IRQF_NMI - Route the interrupt to an NMI or some similar signal that is not + * masked by local_irq_disable(). */ #define IRQF_DISABLED 0x00000020 #define IRQF_SHARED 0x00000080 @@ -70,8 +72,10 @@ #define IRQF_FORCE_RESUME 0x00008000 #define IRQF_NO_THREAD 0x00010000 #define IRQF_EARLY_RESUME 0x00020000 +#define __IRQF_NMI 0x00040000
#define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) +#define IRQF_NMI (__IRQF_NMI | IRQF_NO_THREAD)
/* * These values can be returned by request_any_context_irq() and @@ -649,5 +653,6 @@ int arch_show_interrupts(struct seq_file *p, int prec); extern int early_irq_init(void); extern int arch_probe_nr_irqs(void); extern int arch_early_irq_init(void); +extern int arch_filter_nmi_handler(irq_handler_t);
#endif diff --git a/include/linux/irq.h b/include/linux/irq.h index d09ec7a1243e..695eb37f04ae 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -307,6 +307,7 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * @irq_eoi: end of interrupt * @irq_set_affinity: set the CPU affinity on SMP machines * @irq_retrigger: resend an IRQ to the CPU + * @irq_set_nmi_routing:set whether interrupt can act like NMI * @irq_set_type: set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ * @irq_set_wake: enable/disable power-management wake-on of an IRQ * @irq_bus_lock: function to lock access to slow bus (i2c) chips @@ -341,6 +342,7 @@ struct irq_chip {
int (*irq_set_affinity)(struct irq_data *data, const struct cpumask *dest, bool force); int (*irq_retrigger)(struct irq_data *data); + int (*irq_set_nmi_routing)(struct irq_data *data, unsigned int nmi); int (*irq_set_type)(struct irq_data *data, unsigned int flow_type); int (*irq_set_wake)(struct irq_data *data, unsigned int on);
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index faf433af425e..408d2e4ed40f 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -213,4 +213,7 @@ __irq_set_preflow_handler(unsigned int irq, irq_preflow_handler_t handler) } #endif
+int handle_nmi_irq_desc(unsigned int irq, struct irq_desc *desc); +int handle_nmi_irq(unsigned int irq); + #endif diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 99793b9b6d23..876d01a6ad74 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -646,3 +646,51 @@ unsigned int kstat_irqs_usr(unsigned int irq) irq_unlock_sparse(); return sum; } + +/** + * handle_nmi_irq_desc - Call an NMI handler + * @irq: the interrupt number + * @desc: the interrupt description structure for this irq + * + * To the caller this function is similar in scope to generic_handle_irq_desc() + * but without any attempt to manage the handler flow. We assume that if the + * flow is complex then NMI routing is a bad idea; the caller is expected to + * handle the ack, clear, mask and unmask issues if necessary. + * + * Note that this function does not take any of the usual locks. Instead + * is relies on NMIs being prohibited from sharing interrupts (i.e. + * there will be exactly one irqaction) and that no call to free_irq() + * will be made whilst the handler is running. + */ +int handle_nmi_irq_desc(unsigned int irq, struct irq_desc *desc) +{ + struct irqaction *action = desc->action; + + BUG_ON(action->next); + + return action->handler(irq, action->dev_id); +} +EXPORT_SYMBOL_GPL(handle_nmi_irq_desc); + +/** + * handle_nmi - Call an NMI handler + * @irq: the interrupt number + * @desc: the interrupt description structure for this irq + * + * To the caller this function is similar in scope to generic_handle_irq(), + * see handle_nmi_irq_desc for more detail. + */ +int handle_nmi_irq(unsigned int irq) +{ + /* + * irq_to_desc is either simple arithmetic (no locking) or a radix + * tree lookup (RCU). Both are safe from NMI. + */ + struct irq_desc *desc = irq_to_desc(irq); + + if (!desc) + return -EINVAL; + handle_nmi_irq_desc(irq, desc); + return 0; +} +EXPORT_SYMBOL_GPL(handle_nmi_irq); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 80692373abd6..96212a0493c0 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -571,6 +571,17 @@ int can_request_irq(unsigned int irq, unsigned long irqflags) return canrequest; }
+int __irq_set_nmi_routing(struct irq_desc *desc, unsigned int irq, + unsigned int nmi) +{ + struct irq_chip *chip = desc->irq_data.chip; + + if (!chip || !chip->irq_set_nmi_routing) + return -EINVAL; + + return chip->irq_set_nmi_routing(&desc->irq_data, nmi); +} + int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, unsigned long flags) { @@ -966,6 +977,16 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
if (desc->irq_data.chip == &no_irq_chip) return -ENOSYS; + + if (new->flags & __IRQF_NMI) { + if (new->flags & IRQF_SHARED) + return -EINVAL; + + ret = arch_filter_nmi_handler(new->handler); + if (ret < 0) + return ret; + } + if (!try_module_get(desc->owner)) return -ENODEV;
@@ -1153,6 +1174,19 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
init_waitqueue_head(&desc->wait_for_threads);
+ if (new->flags & __IRQF_NMI) { + ret = __irq_set_nmi_routing(desc, irq, true); + if (ret != 1) + goto out_mask; + } else { + ret = __irq_set_nmi_routing(desc, irq, false); + if (ret == 1) { + pr_err("Failed to disable NMI routing for irq %d\n", + irq); + goto out_mask; + } + } + /* Setup the type (level, edge polarity) if configured: */ if (new->flags & IRQF_TRIGGER_MASK) { ret = __irq_set_trigger(desc, irq, @@ -1758,3 +1792,15 @@ int request_percpu_irq(unsigned int irq, irq_handler_t handler,
return retval; } + +/* + * Allows architectures to deny requests to set __IRQF_NMI. + * + * Typically this is used to restrict the use of NMI handlers that do not + * originate from arch code. However the default implementation is + * extremely permissive. + */ +int __weak arch_filter_nmi_handler(irq_handler_t handler) +{ + return 0; +}
On Wed, 21 Jan 2015, Daniel Thompson wrote:
@@ -307,6 +307,7 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
- @irq_eoi: end of interrupt
- @irq_set_affinity: set the CPU affinity on SMP machines
- @irq_retrigger: resend an IRQ to the CPU
- @irq_set_nmi_routing:set whether interrupt can act like NMI
-ENOPARSE
+int handle_nmi_irq_desc(unsigned int irq, struct irq_desc *desc);
And that's global for what?
+int handle_nmi_irq(unsigned int irq);
#endif diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 99793b9b6d23..876d01a6ad74 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -646,3 +646,51 @@ unsigned int kstat_irqs_usr(unsigned int irq) irq_unlock_sparse(); return sum; }
+/**
- handle_nmi_irq_desc - Call an NMI handler
- @irq: the interrupt number
- @desc: the interrupt description structure for this irq
- To the caller this function is similar in scope to generic_handle_irq_desc()
- but without any attempt to manage the handler flow. We assume that if the
We assume nothing. We set clear rules and if possible we enforce them.
- flow is complex then NMI routing is a bad idea; the caller is expected to
- handle the ack, clear, mask and unmask issues if necessary.
And the caller is supposed to do that in which way?
- Note that this function does not take any of the usual locks. Instead
- is relies on NMIs being prohibited from sharing interrupts (i.e.
- there will be exactly one irqaction) and that no call to free_irq()
- will be made whilst the handler is running.
And how do you guarantee that? Not at all AFAICT.
- */
+int handle_nmi_irq_desc(unsigned int irq, struct irq_desc *desc) +{
- struct irqaction *action = desc->action;
- BUG_ON(action->next);
- return action->handler(irq, action->dev_id);
+} +EXPORT_SYMBOL_GPL(handle_nmi_irq_desc);
You seem to have a strong determination to add EXPORT_SYMBOL_GPL to everything and some more. How is a module supposed to call this?
+/**
- handle_nmi - Call an NMI handler
- @irq: the interrupt number
- @desc: the interrupt description structure for this irq
- To the caller this function is similar in scope to generic_handle_irq(),
- see handle_nmi_irq_desc for more detail.
I don't see a detail here which connects that in any way to generic_handle_irq().
- */
+int handle_nmi_irq(unsigned int irq) +{
- /*
* irq_to_desc is either simple arithmetic (no locking) or a radix
* tree lookup (RCU). Both are safe from NMI.
*/
- struct irq_desc *desc = irq_to_desc(irq);
- if (!desc)
return -EINVAL;
- handle_nmi_irq_desc(irq, desc);
- return 0;
+} +EXPORT_SYMBOL_GPL(handle_nmi_irq);
Sigh. Why would any low level entry handler live in a module?
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 80692373abd6..96212a0493c0 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -571,6 +571,17 @@ int can_request_irq(unsigned int irq, unsigned long irqflags) return canrequest; }
Of course, because the function you copied has no documentation, you are supposed to omit it as well, right?
+int __irq_set_nmi_routing(struct irq_desc *desc, unsigned int irq,
And irq is used for what? Just because the function you copied does not use it either?
And why is it global? Just because the function you copied is global as well?
unsigned int nmi)
+{
- struct irq_chip *chip = desc->irq_data.chip;
- if (!chip || !chip->irq_set_nmi_routing)
return -EINVAL;
- return chip->irq_set_nmi_routing(&desc->irq_data, nmi);
+}
int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, unsigned long flags) { @@ -966,6 +977,16 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (desc->irq_data.chip == &no_irq_chip) return -ENOSYS;
- if (new->flags & __IRQF_NMI) {
if (new->flags & IRQF_SHARED)
return -EINVAL;
ret = arch_filter_nmi_handler(new->handler);
See rant below.
if (ret < 0)
return ret;
- }
- if (!try_module_get(desc->owner)) return -ENODEV;
@@ -1153,6 +1174,19 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) init_waitqueue_head(&desc->wait_for_threads);
if (new->flags & __IRQF_NMI) {
ret = __irq_set_nmi_routing(desc, irq, true);
if (ret != 1)
goto out_mask;
Another set of magic return values which are completely undocumented and follow the windows programming style. What's wrong with 0 on success?
} else {
ret = __irq_set_nmi_routing(desc, irq, false);
if (ret == 1) {
pr_err("Failed to disable NMI routing for irq %d\n",
irq);
Can we add a bit more unreadable conditions here?
What's wrong with
ret = irq_setup_nmi(desc, new->flags & __IRQF_NMI); if (ret) { pr_err("some useful info for both cases"); goto out; }
????
+/*
- Allows architectures to deny requests to set __IRQF_NMI.
- Typically this is used to restrict the use of NMI handlers that do not
- originate from arch code. However the default implementation is
- extremely permissive.
- */
+int __weak arch_filter_nmi_handler(irq_handler_t handler)
Your explanation above is completely useless and the default is wrong as well.
What is this function going to solve? Nothing, AFAICT.
Why is handler a proper decision criteria? How are the decisions going to look like?
Looking at your proposed ARM implementation just make me ROTFL. You whitelist the perf handler. So any request for a random irq number with the perf handler as a target will succeed as long as that irq line can be switched to NMI mode.
Before you send another iteration of this, can you please sit down and do a proper write up of the goals and the design to reach those goals?
I'm certainly not going to waste my time and look at another cobbled together 'works for me' hackery.
Thanks,
tglx
This patch provides an implementation of irq_set_nmi_routing by allowing SPIs to be switched between group 1 (IRQ) and group 0 (FIQ).
It also repaces the interface used between the default FIQ handler and the GIC. These extensions are required in order to allow SPIs to be acknowledged and completed.
Signed-off-by: Daniel Thompson daniel.thompson@linaro.org --- arch/arm/kernel/traps.c | 29 +++++++++++++++---- drivers/irqchip/irq-gic.c | 64 +++++++++++++++++++++++++++++------------ include/linux/irqchip/arm-gic.h | 6 +++- 3 files changed, 74 insertions(+), 25 deletions(-)
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 5645f81ac4cc..445fdf26b1af 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -26,6 +26,7 @@ #include <linux/init.h> #include <linux/sched.h> #include <linux/irq.h> +#include <linux/interrupt.h> #include <linux/irqchip/arm-gic.h>
#include <linux/atomic.h> @@ -462,6 +463,23 @@ die_sig: arm_notify_die("Oops - undefined instruction", regs, &info, 0, 6); }
+int arch_filter_nmi_handler(irq_handler_t handler) +{ + irq_handler_t whitelist[] = { + }; + int i; + + for (i = 0; i < ARRAY_SIZE(whitelist); i++) + if (handler == whitelist[i]) { + pr_debug("%pS accepted for use as NMI handler\n", + handler); + return 0; + } + + pr_err("%pS cannot be used as an NMI handler\n", handler); + return -EPERM; +} + /* * Handle FIQ similarly to NMI on x86 systems. * @@ -478,19 +496,20 @@ asmlinkage void __exception_irq_entry handle_fiq_as_nmi(struct pt_regs *regs) { unsigned int cpu = smp_processor_id(); struct pt_regs *old_regs = set_irq_regs(regs); + enum irqreturn irqret = 0;
__inc_irq_stat(cpu, __nmi_count); nmi_enter();
-#ifdef CONFIG_ARM_GIC - gic_handle_fiq_ipi(); -#endif + irqret = gic_handle_fiq(); + + if (irqret == IRQ_NONE) { #ifdef CONFIG_SMP - ipi_cpu_backtrace(regs); + ipi_cpu_backtrace(regs); #endif + }
nmi_exit(); - set_irq_regs(old_regs); }
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index c4f4a8827ed8..658c6dd5cf08 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -129,6 +129,9 @@ struct irq_chip gic_arch_extn = {
static struct gic_chip_data gic_data[MAX_GIC_NR] __read_mostly;
+static int gic_set_group_irq(struct gic_chip_data *gic, unsigned int hwirq, + int group); + #ifdef CONFIG_GIC_NON_BANKED static void __iomem *gic_get_percpu_base(union gic_base *base) { @@ -214,6 +217,18 @@ static void gic_eoi_irq(struct irq_data *d) writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_EOI); }
+static int gic_set_nmi_routing(struct irq_data *d, unsigned int nmi) +{ + struct gic_chip_data *gic = irq_data_get_irq_chip_data(d); + int ret; + + ret = gic_set_group_irq(gic, gic_irq(d), !nmi); + if (ret >= 0) + ret = !ret; + + return ret; +} + static int gic_set_type(struct irq_data *d, unsigned int type) { void __iomem *base = gic_dist_base(d); @@ -346,6 +361,7 @@ static struct irq_chip gic_chip = { .irq_mask = gic_mask_irq, .irq_unmask = gic_unmask_irq, .irq_eoi = gic_eoi_irq, + .irq_set_nmi_routing = gic_set_nmi_routing, .irq_set_type = gic_set_type, .irq_retrigger = gic_retrigger, #ifdef CONFIG_SMP @@ -364,8 +380,8 @@ static struct irq_chip gic_chip = { * If is safe to call this function on systems which do not support * grouping (it will have no effect). */ -static void gic_set_group_irq(struct gic_chip_data *gic, unsigned int hwirq, - int group) +static int gic_set_group_irq(struct gic_chip_data *gic, unsigned int hwirq, + int group) { void __iomem *base = gic_data_dist_base(gic); unsigned int grp_reg = hwirq / 32 * 4; @@ -381,7 +397,7 @@ static void gic_set_group_irq(struct gic_chip_data *gic, unsigned int hwirq, * the EnableGrp1 bit set. */ if (!(GICD_ENABLE_GRP1 & readl_relaxed(base + GIC_DIST_CTRL))) - return; + return -EINVAL;
raw_spin_lock(&irq_controller_lock);
@@ -403,32 +419,42 @@ static void gic_set_group_irq(struct gic_chip_data *gic, unsigned int hwirq, writel_relaxed(pri_val, base + GIC_DIST_PRI + pri_reg);
raw_spin_unlock(&irq_controller_lock); -}
+ return group; +}
-/* - * Fully acknowledge (both ack and eoi) any outstanding FIQ-based IPI, - * otherwise do nothing. - */ -void gic_handle_fiq_ipi(void) +enum irqreturn gic_handle_fiq(void) { struct gic_chip_data *gic = &gic_data[0]; void __iomem *cpu_base = gic_data_cpu_base(gic); - unsigned long irqstat, irqnr; + unsigned long irqstat, hwirq; + unsigned int irq = 0; + + /* + * This function is called unconditionally by the default FIQ + * handler so first we must check that the driver it + * initialized. + */ + if (!gic->gic_irqs) + return IRQ_NONE;
if (WARN_ON(!in_nmi())) - return; + return IRQ_NONE;
- while ((1u << readl_relaxed(cpu_base + GIC_CPU_HIGHPRI)) & - SMP_IPI_FIQ_MASK) { - irqstat = readl_relaxed(cpu_base + GIC_CPU_INTACK); - writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI); + /* read intack with the priority mask set so we only acknowledge FIQs */ + writel_relaxed(GICC_INT_PRI_THRESHOLD >> 1, cpu_base + GIC_CPU_PRIMASK); + irqstat = readl_relaxed(cpu_base + GIC_CPU_INTACK); + writel_relaxed(GICC_INT_PRI_THRESHOLD, cpu_base + GIC_CPU_PRIMASK);
- irqnr = irqstat & GICC_IAR_INT_ID_MASK; - WARN_RATELIMIT(irqnr > 16, - "Unexpected irqnr %lu (bad prioritization?)\n", - irqnr); + hwirq = irqstat & GICC_IAR_INT_ID_MASK; + if (likely(hwirq > 15 && hwirq < 1021)) { + irq = irq_find_mapping(gic->domain, hwirq); + handle_nmi_irq(irq); } + + writel_relaxed(irqstat, gic_data_cpu_base(gic) + GIC_CPU_EOI); + + return IRQ_RETVAL(irq); }
void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq) diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 7690f70049a3..265ea31a5711 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -127,7 +127,11 @@ static inline void __init register_routable_domain_ops gic_routable_irq_domain_ops = ops; }
-void gic_handle_fiq_ipi(void); +#ifdef CONFIG_ARM_GIC +enum irqreturn gic_handle_fiq(void); +#else +enum irqreturn gic_handle_fiq(void) { return IRQ_NONE; } +#endif
#endif /* __ASSEMBLY */ #endif
armv7pmu_disable_event() is called during irq handling. If irq handling switches over to fiq then the spin locks in this function risk deadlock. Both armv7_pmnc_disable_counter() and armv7_pmnc_disable_intens() are unconditional co-processor writes. I haven't yet come up with an schedule where other users of pmu_lock would break if interleaved with these calls so I have simply removed them.
The other change required us to avoid calling irq_work_run() when run from a FIQ handler. The pended work will either be dispatched by the irq work IPI or by a timer handler.
Signed-off-by: Daniel Thompson daniel.thompson@linaro.org --- arch/arm/kernel/perf_event_v7.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-)
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 8993770c47de..08f426486d3e 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -744,7 +744,6 @@ static void armv7pmu_enable_event(struct perf_event *event)
static void armv7pmu_disable_event(struct perf_event *event) { - unsigned long flags; struct hw_perf_event *hwc = &event->hw; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); @@ -757,11 +756,6 @@ static void armv7pmu_disable_event(struct perf_event *event) }
/* - * Disable counter and interrupt - */ - raw_spin_lock_irqsave(&events->pmu_lock, flags); - - /* * Disable counter */ armv7_pmnc_disable_counter(idx); @@ -770,8 +764,6 @@ static void armv7pmu_disable_event(struct perf_event *event) * Disable interrupt for this counter */ armv7_pmnc_disable_intens(idx); - - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); }
static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) @@ -831,7 +823,8 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) * platforms that can have the PMU interrupts raised as an NMI, this * will not work. */ - irq_work_run(); + if (!in_nmi()) + irq_work_run();
return IRQ_HANDLED; }
Using FIQ (if it is available) gives perf a better insight into the system by allowing code run with interrupts disabled to be profiled.
Signed-off-by: Daniel Thompson daniel.thompson@linaro.org --- arch/arm/include/asm/pmu.h | 2 ++ arch/arm/kernel/perf_event.c | 2 +- arch/arm/kernel/perf_event_cpu.c | 14 +++++++++++--- arch/arm/kernel/traps.c | 4 ++++ 4 files changed, 18 insertions(+), 4 deletions(-)
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index b1596bd59129..e6c87e393547 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -123,6 +123,8 @@ struct arm_pmu {
extern const struct dev_pm_ops armpmu_dev_pm_ops;
+irqreturn_t armpmu_dispatch_irq(int irq, void *dev); + int armpmu_register(struct arm_pmu *armpmu, int type);
u64 armpmu_event_update(struct perf_event *event); diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index f7c65adaa428..5ae9adf7f18e 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -296,7 +296,7 @@ validate_group(struct perf_event *event) return 0; }
-static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) +irqreturn_t armpmu_dispatch_irq(int irq, void *dev) { struct arm_pmu *armpmu; struct platform_device *plat_device; diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index b30a2645c2f1..e5c9c5d607ab 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -177,9 +177,17 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) continue; }
- err = request_irq(irq, handler, - IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", - per_cpu_ptr(&hw_events->percpu_pmu, i)); + err = request_irq( + irq, handler, + IRQF_NOBALANCING | IRQF_NMI, + "arm-pmu", per_cpu_ptr(&hw_events->percpu_pmu, i)); + if (err) { + err = request_irq( + irq, handler, + IRQF_NOBALANCING | IRQF_NO_THREAD, + "arm-pmu", + per_cpu_ptr(&hw_events->percpu_pmu, i)); + } if (err) { pr_err("unable to request IRQ%d for ARM PMU counters\n", irq); diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 445fdf26b1af..a5ffddeb224e 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -39,6 +39,7 @@ #include <asm/tls.h> #include <asm/system_misc.h> #include <asm/opcodes.h> +#include <asm/pmu.h>
static const char *handler[]= { @@ -466,6 +467,9 @@ die_sig: int arch_filter_nmi_handler(irq_handler_t handler) { irq_handler_t whitelist[] = { +#ifdef CONFIG_HW_PERF_EVENTS + armpmu_dispatch_irq, +#endif }; int i;
linaro-kernel@lists.linaro.org