Users of the recently proposed extensions to the FIQ infrastructure have reported a problem which causes the interrupt dispatcher in irq-gic.c (running as a normal IRQ handler) to try to acknowledge group 0 interrupts (group 0 should raise FIQ).
The problem occurs because the GICv1 found in Cortex A9 implementations explicitly uses the security state (normal or trusted world) to determine whether the interrupt controller should acknowledge group 0 interrupts. For FIQ to be useful as an NMI on Cortex A9 the kernel must run in in trusted world hence by default the GIC allows the CPU to acknowledge group 0 interrupts.
Two workarounds have been proposed, one which retrospectively corrects the problem if it is observed and one which uses the ARM MMU section flags to ensure the GIC driver can read INTACK from the current security context. The later workaround, which is both more invasive and higher performance[1], is presented in this patchset.
This patchset depends upon my own patchset for ARM providing additional FIQ infrastructure but does contain the changes to the GIC driver required to support FIQ. The effect of this is that workaround code is found primarily in patches 5, 6 and 7.
[1] http://thread.gmane.org/gmane.linux.ports.arm.kernel/331027/focus=1748892
Daniel Thompson (6): irqchip: gic: Provide support for interrupt grouping irqchip: gic: Add support for FIQ management irqchip: gic: Remove spin locks from eoi_irq arm: mm: Avoid ioremap_page_range() for non-secure mappings irqchip: gic: Use non-secure aliased register set when FIQ is enabled arm: imx: non-secure aliased mapping of GIC registers
Marek Vasut (3): ARM: dump the status of NS bit in L1 PTE ARM: Add L1 PTE non-secure mapping ARM: socfpga: Map the GIC CPU registers as MT_DEVICE_NS
arch/arm/include/asm/io.h | 5 +- arch/arm/include/asm/mach/map.h | 4 +- arch/arm/include/asm/pgtable-2level-hwdef.h | 1 + arch/arm/mach-imx/mach-imx6q.c | 11 ++ arch/arm/mach-socfpga/socfpga.c | 8 + arch/arm/mm/dump.c | 5 + arch/arm/mm/ioremap.c | 4 + arch/arm/mm/mmu.c | 13 +- drivers/irqchip/irq-gic.c | 240 ++++++++++++++++++++++++++-- include/linux/irqchip/arm-gic.h | 4 +- 10 files changed, 273 insertions(+), 22 deletions(-)
-- 1.9.3
All GIC hardware except GICv1-without-TrustZone support provides a means to group exceptions into group 0 (which can optionally be signally using use FIQ) and group 1. The kernel currently provides no means to exploit this. This patch alters the initialization of the GIC to place all interrupts into group 1 which is the foundational requirement to meaningfully use FIQ.
Note that the hardware functionality is unavailable to the kernel when a secure monitor is present because access to the grouping registers are prohibited outside "secure world" (this feature allows grouping to be used to allow hardware peripherals to send interrupts into the secure world). The GIC driver will automatically detect this and disable its attempts to group interrupts.
On systems without TrustZone support the kernel has the power to route interrupt sources to FIQ, potentially allowing a driver to exploit the NMI-like properties of FIQ.
Tested on Freescale i.MX6 (quad A9), STiH416 (dual A9) and a self-written qemu GICv2 model.
Signed-off-by: Daniel Thompson daniel.thompson@linaro.org Cc: Thomas Gleixner tglx@linutronix.de Cc: Jason Cooper jason@lakedaemon.net Cc: Nicolas Pitre nicolas.pitre@linaro.org Cc: Christoffer Dall christoffer.dall@linaro.org Cc: Sricharan R r.sricharan@ti.com Acked-by: Dirk Behme dirk.behme@de.bosch.com --- drivers/irqchip/irq-gic.c | 99 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 94 insertions(+), 5 deletions(-)
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 7e11c9d..bbffca3 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -42,6 +42,9 @@ #include <linux/irqchip/chained_irq.h> #include <linux/irqchip/arm-gic.h>
+#ifdef CONFIG_FIQ +#include <asm/fiq.h> +#endif #include <asm/irq.h> #include <asm/exception.h> #include <asm/smp_plat.h> @@ -68,6 +71,9 @@ struct gic_chip_data { #ifdef CONFIG_GIC_NON_BANKED void __iomem *(*get_base)(union gic_base *); #endif +#ifdef CONFIG_FIQ + bool fiq_enable; +#endif };
static DEFINE_RAW_SPINLOCK(irq_controller_lock); @@ -131,6 +137,16 @@ static inline void gic_set_base_accessor(struct gic_chip_data *data, #define gic_set_base_accessor(d, f) #endif
+#ifdef CONFIG_FIQ +static inline bool gic_data_fiq_enable(struct gic_chip_data *data) +{ + return data->fiq_enable; +} +#else +static inline bool gic_data_fiq_enable( + struct gic_chip_data *data) { return false; } +#endif + static inline void __iomem *gic_dist_base(struct irq_data *d) { struct gic_chip_data *gic_data = irq_data_get_irq_chip_data(d); @@ -349,6 +365,42 @@ static struct irq_chip gic_chip = { .irq_set_wake = gic_set_wake, };
+#ifdef CONFIG_FIQ +static void __init gic_init_fiq(struct gic_chip_data *gic, + irq_hw_number_t first_irq, + unsigned int num_irqs) +{ + void __iomem *dist_base = gic_data_dist_base(gic_data); + unsigned int i; + + /* + * FIQ can only be supported on platforms without an extended irq_eoi + * method (otherwise we take a lock during eoi handling). + */ + if (gic_arch_extn.irq_eoi) + return; + + /* + * If grouping is not available (not implemented or prohibited by + * security mode) these registers a read-as-zero/write-ignored. + * However as a precaution we restore the reset default regardless of + * the result of the test. + */ + writel_relaxed(1, dist_base + GIC_DIST_IGROUP + 0); + gic->fiq_enable = readl_relaxed(dist_base + GIC_DIST_IGROUP + 0); + writel_relaxed(0, dist_base + GIC_DIST_IGROUP + 0); + pr_debug("gic: FIQ support %s\n", + gic->fiq_enable ? "enabled" : "disabled"); + + if (!gic->fiq_enable) + return; +} +#else /* CONFIG_FIQ */ +static inline void gic_init_fiq(struct gic_chip_data *gic, + irq_hw_number_t first_irq, + unsigned int num_irqs) {} +#endif /* CONFIG_FIQ */ + void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq) { if (gic_nr >= MAX_GIC_NR) @@ -408,13 +460,28 @@ static void __init gic_dist_init(struct gic_chip_data *gic) writel_relaxed(0xa0a0a0a0, base + GIC_DIST_PRI + i * 4 / 4);
/* + * Optionally set all global interrupts to be group 1. + */ + if (gic_data_fiq_enable(gic)) + for (i = 32; i < gic_irqs; i += 32) + writel_relaxed(0xffffffff, + base + GIC_DIST_IGROUP + i * 4 / 32); + + /* * Disable all interrupts. Leave the PPI and SGIs alone * as these enables are banked registers. */ for (i = 32; i < gic_irqs; i += 32) writel_relaxed(0xffffffff, base + GIC_DIST_ENABLE_CLEAR + i * 4 / 32);
- writel_relaxed(1, base + GIC_DIST_CTRL); + /* + * Set EnableGrp1/EnableGrp0 (bit 1 and 0) or EnableGrp (bit 0 only, + * bit 1 ignored) + */ + if (gic_data_fiq_enable(gic)) + writel_relaxed(3, base + GIC_DIST_CTRL); + else + writel_relaxed(1, base + GIC_DIST_CTRL); }
static void gic_cpu_init(struct gic_chip_data *gic) @@ -452,8 +519,20 @@ static void gic_cpu_init(struct gic_chip_data *gic) for (i = 0; i < 32; i += 4) writel_relaxed(0xa0a0a0a0, dist_base + GIC_DIST_PRI + i * 4 / 4);
+ /* + * Set all PPI and SGI interrupts to be group 1. + * + * If grouping is not available (not implemented or prohibited by + * security mode) these registers are read-as-zero/write-ignored. + */ + if (gic_data_fiq_enable(gic)) + writel_relaxed(0xffffffff, dist_base + GIC_DIST_IGROUP + 0); + writel_relaxed(0xf0, base + GIC_CPU_PRIMASK); - writel_relaxed(1, base + GIC_CPU_CTRL); + if (gic_data_fiq_enable(gic)) + writel_relaxed(0x1f, base + GIC_CPU_CTRL); + else + writel_relaxed(1, base + GIC_CPU_CTRL); }
void gic_cpu_if_down(void) @@ -537,7 +616,10 @@ static void gic_dist_restore(unsigned int gic_nr) writel_relaxed(gic_data[gic_nr].saved_spi_enable[i], dist_base + GIC_DIST_ENABLE_SET + i * 4);
- writel_relaxed(1, dist_base + GIC_DIST_CTRL); + if (gic_data_fiq_enable(&gic_data[gic_nr])) + writel_relaxed(3, dist_base + GIC_DIST_CTRL); + else + writel_relaxed(1, dist_base + GIC_DIST_CTRL); }
static void gic_cpu_save(unsigned int gic_nr) @@ -594,7 +676,7 @@ static void gic_cpu_restore(unsigned int gic_nr) writel_relaxed(0xa0a0a0a0, dist_base + GIC_DIST_PRI + i * 4);
writel_relaxed(0xf0, cpu_base + GIC_CPU_PRIMASK); - writel_relaxed(1, cpu_base + GIC_CPU_CTRL); + writel_relaxed(0x1f, cpu_base + GIC_CPU_CTRL); }
static int gic_notifier(struct notifier_block *self, unsigned long cmd, void *v) @@ -656,6 +738,7 @@ static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) { int cpu; unsigned long flags, map = 0; + unsigned long softint;
raw_spin_lock_irqsave(&irq_controller_lock, flags);
@@ -670,7 +753,11 @@ static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) dmb(ishst);
/* this always happens on GIC0 */ - writel_relaxed(map << 16 | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT); + softint = map << 16 | irq; + if (gic_data_fiq_enable(&gic_data[0])) + softint |= 0x8000; + writel_relaxed(softint, + gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT);
raw_spin_unlock_irqrestore(&irq_controller_lock, flags); } @@ -1014,6 +1101,8 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
gic->domain = irq_domain_add_legacy(node, gic_irqs, irq_base, hwirq_base, &gic_irq_domain_ops, gic); + + gic_init_fiq(gic, irq_base, gic_irqs); } else { gic->domain = irq_domain_add_linear(node, nr_routable_irqs, &gic_irq_domain_ops,
On Monday, July 21, 2014 at 04:47:12 PM, Daniel Thompson wrote:
All GIC hardware except GICv1-without-TrustZone support provides a means to group exceptions into group 0 (which can optionally be signally using use FIQ) and group 1. The kernel currently provides no means to exploit this. This patch alters the initialization of the GIC to place all interrupts into group 1 which is the foundational requirement to meaningfully use FIQ.
[...]
@@ -670,7 +753,11 @@ static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) dmb(ishst);
/* this always happens on GIC0 */
- writel_relaxed(map << 16 | irq, gic_data_dist_base(&gic_data[0]) +
GIC_DIST_SOFTINT); + softint = map << 16 | irq;
- if (gic_data_fiq_enable(&gic_data[0]))
softint |= 0x8000;
These magic bits here could use some clarification, possibly a comment.
writel_relaxed(softint,
gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT);
raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
}
[...]
Best regards, Marek Vasut
This patch introduces callbacks to route interrupts to or away from the FIQ signal and registers these callbacks with the FIQ infrastructure (if the device can supports it).
Both these aspects combine and allow a driver to deploy a FIQ handler without any machine specific knowledge; it can be used effectively on multi-platform kernels.
Signed-off-by: Daniel Thompson daniel.thompson@linaro.org Cc: Thomas Gleixner tglx@linutronix.de Cc: Jason Cooper jason@lakedaemon.net Cc: Nicolas Pitre nicolas.pitre@linaro.org Cc: Christoffer Dall christoffer.dall@linaro.org Cc: Sricharan R r.sricharan@ti.com --- drivers/irqchip/irq-gic.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+)
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index bbffca3..d3c7559 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -366,6 +366,69 @@ static struct irq_chip gic_chip = { };
#ifdef CONFIG_FIQ +/* + * Shift an interrupt between Group 0 and Group 1. + * + * In addition to changing the group we also modify the priority to + * match what "ARM strongly recommends" for a system where no Group 1 + * interrupt must ever preempt a Group 0 interrupt. + */ +static void gic_set_group_irq(struct irq_data *d, int group) +{ + unsigned int grp_reg = gic_irq(d) / 32 * 4; + u32 grp_mask = 1 << (gic_irq(d) % 32); + u32 grp_val; + + unsigned int pri_reg = (gic_irq(d) / 4) * 4; + u32 pri_mask = 1 << (7 + ((gic_irq(d) % 4) * 8)); + u32 pri_val; + + raw_spin_lock(&irq_controller_lock); + + grp_val = readl_relaxed(gic_dist_base(d) + GIC_DIST_IGROUP + grp_reg); + pri_val = readl_relaxed(gic_dist_base(d) + GIC_DIST_PRI + pri_reg); + + if (group) { + grp_val |= grp_mask; + pri_val |= pri_mask; + } else { + grp_val &= ~grp_mask; + pri_val &= ~pri_mask; + } + + writel_relaxed(grp_val, gic_dist_base(d) + GIC_DIST_IGROUP + grp_reg); + writel_relaxed(pri_val, gic_dist_base(d) + GIC_DIST_PRI + pri_reg); + + raw_spin_unlock(&irq_controller_lock); +} + +static void gic_enable_fiq(struct irq_data *d) +{ + gic_set_group_irq(d, 0); +} + +static void gic_disable_fiq(struct irq_data *d) +{ + gic_set_group_irq(d, 1); +} + +static int gic_ack_fiq(struct irq_data *d) +{ + struct gic_chip_data *gic = irq_data_get_irq_chip_data(d); + u32 irqstat, irqnr; + + irqstat = readl_relaxed(gic_data_cpu_base(gic) + GIC_CPU_INTACK); + irqnr = irqstat & GICC_IAR_INT_ID_MASK; + return irq_find_mapping(gic->domain, irqnr); +} + +static struct fiq_chip gic_fiq = { + .fiq_enable = gic_enable_fiq, + .fiq_disable = gic_disable_fiq, + .fiq_ack = gic_ack_fiq, + .fiq_eoi = gic_eoi_irq, +}; + static void __init gic_init_fiq(struct gic_chip_data *gic, irq_hw_number_t first_irq, unsigned int num_irqs) @@ -394,6 +457,12 @@ static void __init gic_init_fiq(struct gic_chip_data *gic,
if (!gic->fiq_enable) return; + + /* + * FIQ is supported on this device! Register our chip data. + */ + for (i = 0; i < num_irqs; i++) + fiq_register_mapping(first_irq + i, &gic_fiq); } #else /* CONFIG_FIQ */ static inline void gic_init_fiq(struct gic_chip_data *gic,
This patch is motivated by the comment it removes from gic_init_fiq, namely that the spin locks in eoi_irq preclude certain platforms from supporting FIQ.
Currently there is only one upstream platform (tegra) that actually hooks gic_arch_extn.irq_eoi and it does not require these spin locks.
Signed-off-by: Daniel Thompson daniel.thompson@linaro.org Cc: Thomas Gleixner tglx@linutronix.de Cc: Jason Cooper jason@lakedaemon.net Cc: Peter De Schrijver pdeschrijver@nvidia.com --- drivers/irqchip/irq-gic.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-)
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index d3c7559..5c934a4 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -191,11 +191,8 @@ static void gic_unmask_irq(struct irq_data *d)
static void gic_eoi_irq(struct irq_data *d) { - if (gic_arch_extn.irq_eoi) { - raw_spin_lock(&irq_controller_lock); + if (gic_arch_extn.irq_eoi) gic_arch_extn.irq_eoi(d); - raw_spin_unlock(&irq_controller_lock); - }
writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_EOI); } @@ -437,13 +434,6 @@ static void __init gic_init_fiq(struct gic_chip_data *gic, unsigned int i;
/* - * FIQ can only be supported on platforms without an extended irq_eoi - * method (otherwise we take a lock during eoi handling). - */ - if (gic_arch_extn.irq_eoi) - return; - - /* * If grouping is not available (not implemented or prohibited by * security mode) these registers a read-as-zero/write-ignored. * However as a precaution we restore the reset default regardless of
From: Marek Vasut marex@denx.de
Add minor adjustment to the page table dumper to also print the status of the L1 PTE NS bit.
Signed-off-by: Marek Vasut marex@denx.de --- arch/arm/mm/dump.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c index c508f41..0100edb 100644 --- a/arch/arm/mm/dump.c +++ b/arch/arm/mm/dump.c @@ -176,6 +176,11 @@ static const struct prot_bits section_bits[] = { .val = PMD_SECT_S, .set = "SHD", .clear = " ", + }, { + .mask = PMD_SECT_NS, + .val = PMD_SECT_NS, + .set = "NS ", + .clear = "ns ", }, };
From: Marek Vasut marex@denx.de
Add new device type, MT_DEVICE_NS. This type sets the NS bit in L1 PTE [1]. Accesses to a memory region which is mapped this way generate non-secure access to that memory area. One must be careful here, since the NS bit is only available in L1 PTE, therefore when creating the mapping, the mapping must be at least 1 MiB big and must be aligned to 1 MiB. If that condition was false, the kernel would use regular L2 page mapping for this area instead and the NS bit setting would be ineffective.
[1] See DDI0406B , Section B3 "Virtual Memory System Architecture (VMSA)", Subsection B3.3.1 "Translation table entry formats", paragraph "First-level descriptors", Table B3-1 and associated description of the NS bit in the "Section" table entry.
Signed-off-by: Marek Vasut marex@denx.de Signed-off-by: Daniel Thompson daniel.thompson@linaro.org --- arch/arm/include/asm/io.h | 5 ++++- arch/arm/include/asm/mach/map.h | 4 ++-- arch/arm/include/asm/pgtable-2level-hwdef.h | 1 + arch/arm/mm/mmu.c | 13 ++++++++++++- 4 files changed, 19 insertions(+), 4 deletions(-)
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index 3d23418..22765e0 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -125,8 +125,10 @@ static inline u32 __raw_readl(const volatile void __iomem *addr) #define MT_DEVICE_NONSHARED 1 #define MT_DEVICE_CACHED 2 #define MT_DEVICE_WC 3 +#define MT_DEVICE_NS 4 + /* - * types 4 onwards can be found in asm/mach/map.h and are undefined + * types 5 onwards can be found in asm/mach/map.h and are undefined * for ioremap */
@@ -343,6 +345,7 @@ extern void _memset_io(volatile void __iomem *, int, size_t); #define ioremap_nocache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE) #define ioremap_cache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_CACHED) #define ioremap_wc(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_WC) +#define ioremap_ns(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_NS) #define iounmap __arm_iounmap
/* diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h index f98c7f3..42be265 100644 --- a/arch/arm/include/asm/mach/map.h +++ b/arch/arm/include/asm/mach/map.h @@ -21,9 +21,9 @@ struct map_desc { unsigned int type; };
-/* types 0-3 are defined in asm/io.h */ +/* types 0-4 are defined in asm/io.h */ enum { - MT_UNCACHED = 4, + MT_UNCACHED = 5, MT_CACHECLEAN, MT_MINICLEAN, MT_LOW_VECTORS, diff --git a/arch/arm/include/asm/pgtable-2level-hwdef.h b/arch/arm/include/asm/pgtable-2level-hwdef.h index 5cfba15..d24e7ea 100644 --- a/arch/arm/include/asm/pgtable-2level-hwdef.h +++ b/arch/arm/include/asm/pgtable-2level-hwdef.h @@ -36,6 +36,7 @@ #define PMD_SECT_S (_AT(pmdval_t, 1) << 16) /* v6 */ #define PMD_SECT_nG (_AT(pmdval_t, 1) << 17) /* v6 */ #define PMD_SECT_SUPER (_AT(pmdval_t, 1) << 18) /* v6 */ +#define PMD_SECT_NS (_AT(pmdval_t, 1) << 19) /* v6 */ #define PMD_SECT_AF (_AT(pmdval_t, 0))
#define PMD_SECT_UNCACHED (_AT(pmdval_t, 0)) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index ab14b79..9baf1cb 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -268,6 +268,13 @@ static struct mem_type mem_types[] = { .prot_sect = PROT_SECT_DEVICE, .domain = DOMAIN_IO, }, + [MT_DEVICE_NS] = { /* Non-secure accesses from secure mode */ + .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED | + L_PTE_SHARED, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PROT_SECT_DEVICE | PMD_SECT_S | PMD_SECT_NS, + .domain = DOMAIN_IO, + }, [MT_UNCACHED] = { .prot_pte = PROT_PTE_DEVICE, .prot_l1 = PMD_TYPE_TABLE, @@ -474,6 +481,7 @@ static void __init build_mem_type_table(void) mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN; mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN; mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN; + mem_types[MT_DEVICE_NS].prot_sect |= PMD_SECT_XN;
/* Also setup NX memory mapping */ mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_XN; @@ -489,6 +497,7 @@ static void __init build_mem_type_table(void) mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1); mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(1); mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE; + mem_types[MT_DEVICE_NS].prot_sect |= PMD_SECT_TEX(1); } else if (cpu_is_xsc3()) { /* * For Xscale3, @@ -500,6 +509,7 @@ static void __init build_mem_type_table(void) mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1) | PMD_SECT_BUFFERED; mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2); mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1); + mem_types[MT_DEVICE_NS].prot_sect |= PMD_SECT_TEX(1) | PMD_SECT_BUFFERED; } else { /* * For ARMv6 and ARMv7 without TEX remapping, @@ -511,6 +521,7 @@ static void __init build_mem_type_table(void) mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED; mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2); mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1); + mem_types[MT_DEVICE_NS].prot_sect |= PMD_SECT_BUFFERED; } } else { /* @@ -856,7 +867,7 @@ static void __init create_mapping(struct map_desc *md) return; }
- if ((md->type == MT_DEVICE || md->type == MT_ROM) && + if ((md->type == MT_DEVICE || md->type == MT_DEVICE_NS || md->type == MT_ROM) && md->virtual >= PAGE_OFFSET && (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) { printk(KERN_WARNING "BUG: mapping for 0x%08llx"
On Mon, Jul 21, 2014 at 03:47:16PM +0100, Daniel Thompson wrote:
From: Marek Vasut marex@denx.de
Add new device type, MT_DEVICE_NS. This type sets the NS bit in L1 PTE [1]. Accesses to a memory region which is mapped this way generate non-secure access to that memory area. One must be careful here, since the NS bit is only available in L1 PTE, therefore when creating the mapping, the mapping must be at least 1 MiB big and must be aligned to 1 MiB. If that condition was false, the kernel would use regular L2 page mapping for this area instead and the NS bit setting would be ineffective.
Right, so this says that PTE mappings are not permissible.
- [MT_DEVICE_NS] = { /* Non-secure accesses from secure mode */
.prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
L_PTE_SHARED,
.prot_l1 = PMD_TYPE_TABLE,
However, by filling in prot_pte and prot_l1, you're telling the code that it /can/ setup such a mapping. This is screwed.
If you want to deny anything but section mappings (because they don't work) then you omit prot_pte and prot_l1. With those omitted, if someone tries to abuse this mapping type, then this check in create_mapping() will trigger:
if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) { printk(KERN_WARNING "BUG: map for 0x%08llx at 0x%08lx can not " "be mapped using pages, ignoring.\n", (long long)__pfn_to_phys(md->pfn), addr); return; }
ioremap doesn't have that check; it assumes that it will always be setting up PTE mappings via ioremap_page_range(). In fact, on many platforms that's the only option.
So making this interface available via ioremap() seems pointless - but more importantly it's extremely error-prone. So, MT_DEVICE_NS shouldn't be using 4 at all, shouldn't be in asm/io.h, but should be with the private MT_* definitions in map.h.
On 21/07/14 17:46, Russell King - ARM Linux wrote:
On Mon, Jul 21, 2014 at 03:47:16PM +0100, Daniel Thompson wrote:
From: Marek Vasut marex@denx.de
Add new device type, MT_DEVICE_NS. This type sets the NS bit in L1 PTE [1]. Accesses to a memory region which is mapped this way generate non-secure access to that memory area. One must be careful here, since the NS bit is only available in L1 PTE, therefore when creating the mapping, the mapping must be at least 1 MiB big and must be aligned to 1 MiB. If that condition was false, the kernel would use regular L2 page mapping for this area instead and the NS bit setting would be ineffective.
Right, so this says that PTE mappings are not permissible.
- [MT_DEVICE_NS] = { /* Non-secure accesses from secure mode */
.prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
L_PTE_SHARED,
.prot_l1 = PMD_TYPE_TABLE,
However, by filling in prot_pte and prot_l1, you're telling the code that it /can/ setup such a mapping. This is screwed.
I'll fix this.
If you want to deny anything but section mappings (because they don't work) then you omit prot_pte and prot_l1. With those omitted, if someone tries to abuse this mapping type, then this check in create_mapping() will trigger:
if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) { printk(KERN_WARNING "BUG: map for 0x%08llx at 0x%08lx can not " "be mapped using pages, ignoring.\n", (long long)__pfn_to_phys(md->pfn), addr); return; }
ioremap doesn't have that check; it assumes that it will always be setting up PTE mappings via ioremap_page_range(). In fact, on many platforms that's the only option.
I have proposed a patch (which I just noticed is currently *really* broken but ignore that for now) to prevent the fallback to ioremap_page_range(). As you say this leaves nothing but the lookup in the static mappings for many platforms.
That patches looks at PMD_SECT_NS directly but could be changed to zero check ->prot_l1 instead.
That removes the danger of spuriously getting bad mappings but is certainly not elegant.
So making this interface available via ioremap() seems pointless - but more importantly it's extremely error-prone. So, MT_DEVICE_NS shouldn't be using 4 at all, shouldn't be in asm/io.h, but should be with the private MT_* definitions in map.h.
I wanted to use ioremap() because it allows platform neutral code in the GIC driver to look up a staticly configured non-secure aliased mapping for the GIC (if it exists). Also given the mapping is used for register I/O ioremap() also felt "right".
Is new API better? A very thin wrapper around find_static_vm_paddr()?
I guess the best thing would be to allocate the mapping dynamically. It might be possible for __arm_ioremap_pfn_caller() to change the NS flag in the first-level table after allocating a naturally aligned 1MB vm_area and before updating the second-level? We are not required to use sections, however all pages that share a L1 entry get the same security flags.
Daniel.
ioremap_page_range() cannot provide us memory with the the non-secure section flag set unfortunately at present ioremap+MT_DEVICE_NS will fallback to this function and spuriously return a virtual pointer to what is effectively MT_DEVICE memory.
This patch makes the fallback to ioremap_page_range() ineffective; to allocate MT_DEVICE_NS memory we must rely instead on the platform startup code providing a suitable static mapping.
Signed-off-by: Daniel Thompson daniel.thompson@linaro.org --- arch/arm/mm/ioremap.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index d1e5ad7..1cb12f8 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -319,6 +319,10 @@ void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, err = remap_area_sections(addr, pfn, size, type); } else #endif + if (type->prot_sect & PMD_SECT_NS) + /* required section flag is not achievable; give up */ + return NULL; + else err = ioremap_page_range(addr, addr + size, paddr, __pgprot(type->prot_pte));
Signed-off-by: Daniel Thompson daniel.thompson@linaro.org --- drivers/irqchip/irq-gic.c | 76 ++++++++++++++++++++++++++++++++++++----- include/linux/irqchip/arm-gic.h | 4 +-- 2 files changed, 70 insertions(+), 10 deletions(-)
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 5c934a4..8faa271 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -59,6 +59,7 @@ union gic_base { struct gic_chip_data { union gic_base dist_base; union gic_base cpu_base; + union gic_base aliased_cpu_base; #ifdef CONFIG_CPU_PM u32 saved_spi_enable[DIV_ROUND_UP(1020, 32)]; u32 saved_spi_conf[DIV_ROUND_UP(1020, 16)]; @@ -126,6 +127,12 @@ static inline void __iomem *gic_data_cpu_base(struct gic_chip_data *data) return data->get_base(&data->cpu_base); }
+static inline void __iomem *gic_data_aliased_cpu_base( + struct gic_chip_data *data) +{ + return data->get_base(&data->aliased_cpu_base); +} + static inline void gic_set_base_accessor(struct gic_chip_data *data, void __iomem *(*f)(union gic_base *)) { @@ -134,6 +141,7 @@ static inline void gic_set_base_accessor(struct gic_chip_data *data, #else #define gic_data_dist_base(d) ((d)->dist_base.common_base) #define gic_data_cpu_base(d) ((d)->cpu_base.common_base) +#define gic_data_aliased_cpu_base(d) ((d)->aliased_cpu_base.common_base) #define gic_set_base_accessor(d, f) #endif
@@ -159,6 +167,13 @@ static inline void __iomem *gic_cpu_base(struct irq_data *d) return gic_data_cpu_base(gic_data); }
+static inline void __iomem *gic_aliased_cpu_base(struct irq_data *d) +{ + struct gic_chip_data *gic_data = irq_data_get_irq_chip_data(d); + + return gic_data_aliased_cpu_base(gic_data); +} + static inline unsigned int gic_irq(struct irq_data *d) { return d->hwirq; @@ -194,7 +209,7 @@ static void gic_eoi_irq(struct irq_data *d) if (gic_arch_extn.irq_eoi) gic_arch_extn.irq_eoi(d);
- writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_EOI); + writel_relaxed(gic_irq(d), gic_aliased_cpu_base(d) + GIC_CPU_EOI); }
static int gic_set_type(struct irq_data *d, unsigned int type) @@ -300,7 +315,7 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) { u32 irqstat, irqnr; struct gic_chip_data *gic = &gic_data[0]; - void __iomem *cpu_base = gic_data_cpu_base(gic); + void __iomem *cpu_base = gic_data_aliased_cpu_base(gic);
do { irqstat = readl_relaxed(cpu_base + GIC_CPU_INTACK); @@ -332,7 +347,8 @@ static void gic_handle_cascade_irq(unsigned int irq, struct irq_desc *desc) chained_irq_enter(chip, desc);
raw_spin_lock(&irq_controller_lock); - status = readl_relaxed(gic_data_cpu_base(chip_data) + GIC_CPU_INTACK); + status = readl_relaxed(gic_data_aliased_cpu_base(chip_data) + + GIC_CPU_INTACK); raw_spin_unlock(&irq_controller_lock);
gic_irq = (status & 0x3ff); @@ -419,11 +435,19 @@ static int gic_ack_fiq(struct irq_data *d) return irq_find_mapping(gic->domain, irqnr); }
+static void gic_eoi_fiq(struct irq_data *d) +{ + if (gic_arch_extn.irq_eoi) + gic_arch_extn.irq_eoi(d); + + writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_EOI); +} + static struct fiq_chip gic_fiq = { .fiq_enable = gic_enable_fiq, .fiq_disable = gic_disable_fiq, .fiq_ack = gic_ack_fiq, - .fiq_eoi = gic_eoi_irq, + .fiq_eoi = gic_eoi_fiq, };
static void __init gic_init_fiq(struct gic_chip_data *gic, @@ -453,6 +477,10 @@ static void __init gic_init_fiq(struct gic_chip_data *gic, */ for (i = 0; i < num_irqs; i++) fiq_register_mapping(first_irq + i, &gic_fiq); + + /* This is not a fatal problem for some use-cases so WARN() is enough */ + WARN(gic_data_cpu_base(gic_data) == gic_data_aliased_cpu_base(gic_data), + "No non-secure alias; IRQ handler may spuriously ack FIQs\n"); } #else /* CONFIG_FIQ */ static inline void gic_init_fiq(struct gic_chip_data *gic, @@ -1076,7 +1104,9 @@ const struct irq_domain_ops *gic_routable_irq_domain_ops =
void __init gic_init_bases(unsigned int gic_nr, int irq_start, void __iomem *dist_base, void __iomem *cpu_base, - u32 percpu_offset, struct device_node *node) + void __iomem *aliased_cpu_base, + u32 percpu_offset, + struct device_node *node) { irq_hw_number_t hwirq_base; struct gic_chip_data *gic; @@ -1085,6 +1115,9 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
BUG_ON(gic_nr >= MAX_GIC_NR);
+ if (!aliased_cpu_base) + aliased_cpu_base = cpu_base; + gic = &gic_data[gic_nr]; #ifdef CONFIG_GIC_NON_BANKED if (percpu_offset) { /* Frankein-GIC without banked registers... */ @@ -1092,10 +1125,14 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
gic->dist_base.percpu_base = alloc_percpu(void __iomem *); gic->cpu_base.percpu_base = alloc_percpu(void __iomem *); + gic->aliased_cpu_base.percpu_base = + alloc_percpu(void __iomem *); if (WARN_ON(!gic->dist_base.percpu_base || - !gic->cpu_base.percpu_base)) { + !gic->cpu_base.percpu_base || + !gic->aliased_cpu_base.percpu_base)) { free_percpu(gic->dist_base.percpu_base); free_percpu(gic->cpu_base.percpu_base); + free_percpu(gic->aliased_cpu_base.percpu_base); return; }
@@ -1103,6 +1140,8 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start, unsigned long offset = percpu_offset * cpu_logical_map(cpu); *per_cpu_ptr(gic->dist_base.percpu_base, cpu) = dist_base + offset; *per_cpu_ptr(gic->cpu_base.percpu_base, cpu) = cpu_base + offset; + *per_cpu_ptr(gic->aliased_cpu_base.percpu_base, cpu) = + aliased_cpu_base + offset; }
gic_set_base_accessor(gic, gic_get_percpu_base); @@ -1114,6 +1153,7 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start, percpu_offset); gic->dist_base.common_base = dist_base; gic->cpu_base.common_base = cpu_base; + gic->aliased_cpu_base.common_base = aliased_cpu_base; gic_set_base_accessor(gic, gic_get_common_base); }
@@ -1188,11 +1228,27 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start, #ifdef CONFIG_OF static int gic_cnt __initdata;
+static void __init __iomem *gic_arm_iomap_nonsecure(struct device_node *np, + int index) +{ +#if defined(CONFIG_ARM) && defined(CONFIG_FIQ) + struct resource res; + + if (of_address_to_resource(np, index, &res)) + return NULL; + + return __arm_ioremap(res.start, resource_size(&res), MT_DEVICE_NS); +#else + return NULL; +#endif +} + static int __init gic_of_init(struct device_node *node, struct device_node *parent) { - void __iomem *cpu_base; void __iomem *dist_base; + void __iomem *cpu_base; + void __iomem *aliased_cpu_base; u32 percpu_offset; int irq;
@@ -1205,10 +1261,14 @@ gic_of_init(struct device_node *node, struct device_node *parent) cpu_base = of_iomap(node, 1); WARN(!cpu_base, "unable to map gic cpu registers\n");
+ aliased_cpu_base = gic_arm_iomap_nonsecure(node, 1); + /* no NULL check because NULL is a legimate value */ + if (of_property_read_u32(node, "cpu-offset", &percpu_offset)) percpu_offset = 0;
- gic_init_bases(gic_cnt, -1, dist_base, cpu_base, percpu_offset, node); + gic_init_bases(gic_cnt, -1, dist_base, cpu_base, aliased_cpu_base, + percpu_offset, node); if (!gic_cnt) gic_init_physaddr(node);
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 45e2d8c..15cf913 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -80,14 +80,14 @@ struct device_node; extern struct irq_chip gic_arch_extn;
void gic_init_bases(unsigned int, int, void __iomem *, void __iomem *, - u32 offset, struct device_node *); + void __iomem *, u32 offset, struct device_node *); void gic_cascade_irq(unsigned int gic_nr, unsigned int irq); void gic_cpu_if_down(void);
static inline void gic_init(unsigned int nr, int start, void __iomem *dist , void __iomem *cpu) { - gic_init_bases(nr, start, dist, cpu, 0, NULL); + gic_init_bases(nr, start, dist, cpu, NULL, 0, NULL); }
void gic_send_sgi(unsigned int cpu_id, unsigned int irq);
From: Marek Vasut marex@denx.de
Statically map the SoCFPGA's memory space at PA 0xfff00000 +1MiB to 0xff000000 and set type of this VA to be MT_DEVICE_NS. This PA contains the SoCFPGA's GIC CPU registers at offset +0xec100. This area is right past VMALLOC_END and well below the optional start of per-CPU highmem entries. We can thus use this area to place our mapping here and be sure it's not in anyone's way.
All accesses to VA 0xff000000 generate non-secure accesses on the bus and we can leverage this property to generate non-secure read of the GIC INTACK (IAR) register. This non-secure read will never return an FIQ interrupt number.
Signed-off-by: Marek Vasut marex@denx.de Signed-off-by: Daniel Thompson daniel.thompson@linaro.org --- arch/arm/mach-socfpga/socfpga.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/arch/arm/mach-socfpga/socfpga.c b/arch/arm/mach-socfpga/socfpga.c index adbf383..5b60f82 100644 --- a/arch/arm/mach-socfpga/socfpga.c +++ b/arch/arm/mach-socfpga/socfpga.c @@ -45,6 +45,13 @@ static struct map_desc uart_io_desc __initdata = { .type = MT_DEVICE, };
+static struct map_desc gic_cpu_io_desc __initdata = { + .virtual = 0xff000000, + .pfn = __phys_to_pfn(0xfff00000), + .length = SZ_1M, + .type = MT_DEVICE_NS, +}; + static void __init socfpga_scu_map_io(void) { unsigned long base; @@ -60,6 +67,7 @@ static void __init socfpga_map_io(void) { socfpga_scu_map_io(); iotable_init(&uart_io_desc, 1); + iotable_init(&gic_cpu_io_desc, 1); early_printk("Early printk initialized\n"); }
Signed-off-by: Daniel Thompson daniel.thompson@linaro.org --- arch/arm/mach-imx/mach-imx6q.c | 11 +++++++++++ 1 file changed, 11 insertions(+)
diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c index e60456d..192d268 100644 --- a/arch/arm/mach-imx/mach-imx6q.c +++ b/arch/arm/mach-imx/mach-imx6q.c @@ -381,10 +381,21 @@ static void __init imx6q_init_late(void) } }
+static struct map_desc gic_cpu_io_desc __initdata = { + .virtual = 0xff000000, + .pfn = __phys_to_pfn(0x00a00000), + .length = SZ_1M, + .type = MT_DEVICE_NS, +}; + static void __init imx6q_map_io(void) { debug_ll_io_init(); imx_scu_map_io(); + /* TODO: Need to check we are running without a secure monitor before + * setting up this mapping. + */ + iotable_init(&gic_cpu_io_desc, 1); }
static void __init imx6q_init_irq(void)
On Monday, July 21, 2014 at 04:47:20 PM, Daniel Thompson wrote:
Signed-off-by: Daniel Thompson daniel.thompson@linaro.org
arch/arm/mach-imx/mach-imx6q.c | 11 +++++++++++ 1 file changed, 11 insertions(+)
diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c index e60456d..192d268 100644 --- a/arch/arm/mach-imx/mach-imx6q.c +++ b/arch/arm/mach-imx/mach-imx6q.c @@ -381,10 +381,21 @@ static void __init imx6q_init_late(void) } }
+static struct map_desc gic_cpu_io_desc __initdata = {
.virtual = 0xff000000,
.pfn = __phys_to_pfn(0x00a00000),
.length = SZ_1M,
.type = MT_DEVICE_NS,
+};
static void __init imx6q_map_io(void) { debug_ll_io_init(); imx_scu_map_io();
- /* TODO: Need to check we are running without a secure monitor before
* setting up this mapping.
*/
- iotable_init(&gic_cpu_io_desc, 1);
}
Is there no way to add ioremap_nonsecure() so the gic can allocate the mapping itself instead of adding a static one ? Also, can you add a flag to the MT_DEVICE_NS that says the mapping can only ever be in L1 and never in "lower" levels of the page table ?
Best regards, Marek Vasut
linaro-kernel@lists.linaro.org