From: Semen Protsenko semen.protsenko@linaro.org
"efi" global data structure contains "runtime_version" field which must be assigned in order to use it later in Runtime Services virtual calls (virt_efi_* functions).
Before this patch "runtime_version" was unassigned (0), so each Runtime Service virtual call that checks revision would fail.
Signed-off-by: Semen Protsenko semen.protsenko@linaro.org Acked-by: Ard Biesheuvel ard.biesheuvel@linaro.org Cc: stable@vger.kernel.org Signed-off-by: Matt Fleming matt.fleming@intel.com --- arch/arm64/kernel/efi.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 865fdf5c7344..219a59f2ae97 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -455,6 +455,8 @@ static int __init arm64_enter_virtual_mode(void) efi_native_runtime_setup(); set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+ efi.runtime_version = efi.systab->hdr.revision; + return 0;
err_unmap:
From: Leif Lindholm leif.lindholm@linaro.org
UEFI provides its own method for marking regions to reserve, via the memory map which is also used to initialise memblock. So when using the UEFI memory map, ignore any memreserve entries present in the DT.
Reported-by: Mark Rutland mark.rutland@arm.com Reviewed-by: Mark Rutland mark.rutland@arm.com Acked-by: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Leif Lindholm leif.lindholm@linaro.org Signed-off-by: Will Deacon will.deacon@arm.com --- arch/arm64/kernel/efi.c | 2 ++ arch/arm64/mm/init.c | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 219a59f2ae97..95c49ebc660d 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -175,6 +175,8 @@ static __init void reserve_regions(void) if (uefi_debug) pr_cont("\n"); } + + set_bit(EFI_MEMMAP, &efi.flags); }
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 5b4526ee3a01..5472c2401876 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -32,6 +32,7 @@ #include <linux/of_fdt.h> #include <linux/dma-mapping.h> #include <linux/dma-contiguous.h> +#include <linux/efi.h>
#include <asm/fixmap.h> #include <asm/sections.h> @@ -148,7 +149,8 @@ void __init arm64_memblock_init(void) memblock_reserve(__virt_to_phys(initrd_start), initrd_end - initrd_start); #endif
- early_init_fdt_scan_reserved_mem(); + if (!efi_enabled(EFI_MEMMAP)) + early_init_fdt_scan_reserved_mem();
/* 4GB maximum for 32-bit only capable devices */ if (IS_ENABLED(CONFIG_ZONE_DMA))
Memory regions of type ACPI_MEMORY_NVS should be preserved by the OS, so make sure we reserve them at boot.
Signed-off-by: Ard Biesheuvel ard.biesheuvel@linaro.org --- arch/arm64/kernel/efi.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 95c49ebc660d..71ea4fc0aa8a 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -132,6 +132,7 @@ static __init int is_reserve_region(efi_memory_desc_t *md) return 1;
if (md->type == EFI_ACPI_RECLAIM_MEMORY || + md->type == EFI_ACPI_MEMORY_NVS || md->type == EFI_RESERVED_TYPE) return 1;
The EFI_CONFIG_TABLES bit already gets set by efi_config_init(), so there is no reason to set it again after this function returns successfully.
Signed-off-by: Ard Biesheuvel ard.biesheuvel@linaro.org --- arch/arm64/kernel/efi.c | 2 -- 1 file changed, 2 deletions(-)
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 71ea4fc0aa8a..51522ab0c6da 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -112,8 +112,6 @@ static int __init uefi_init(void) efi.systab->hdr.revision & 0xffff, vendor);
retval = efi_config_init(NULL); - if (retval == 0) - set_bit(EFI_CONFIG_TABLES, &efi.flags);
out: early_memunmap(efi.systab, sizeof(efi_system_table_t));
On systems that boot via UEFI, all memory nodes are deleted from the device tree, and instead, the size and location of system RAM is derived from the UEFI memory map. This is handled by reserve_regions, which not only reserves parts of memory that UEFI declares as reserved, but also installs the memblocks that cover the remaining usable memory.
Currently, reserve_regions() is only called if uefi_init() succeeds. However, it does not actually depend on anything that uefi_init() does, and not calling reserve_regions() results in a broken boot, so it is better to just call it unconditionally.
Signed-off-by: Ard Biesheuvel ard.biesheuvel@linaro.org --- arch/arm64/kernel/efi.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-)
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 51522ab0c6da..00b693212b23 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -313,10 +313,8 @@ void __init efi_init(void) memmap.desc_size = params.desc_size; memmap.desc_version = params.desc_ver;
- if (uefi_init() < 0) - return; - reserve_regions(); + uefi_init(); }
void __init efi_idmap_init(void) @@ -374,15 +372,13 @@ static int __init arm64_enter_virtual_mode(void) int count = 0; unsigned long flags;
- if (!efi_enabled(EFI_BOOT)) { - pr_info("EFI services will not be available.\n"); - return -1; - } + if (!efi_enabled(EFI_MEMMAP)) + return 0;
mapsize = memmap.map_end - memmap.map; early_memunmap(memmap.map, mapsize);
- if (efi_runtime_disabled()) { + if (!efi_enabled(EFI_BOOT) || efi_runtime_disabled()) { pr_info("EFI runtime services will be disabled.\n"); return -1; }
Currently, swapper_pg_dir and idmap_pg_dir share the init_mm mm_struct instance. To allow the introduction of other pg_dir instances, for instance, for UEFI's mapping of Runtime Services, make the struct_mm instance an explicit argument that gets passed down to the pmd and pte instantiation functions. Note that the consumers (pmd_populate/pgd_populate) of the mm_struct argument don't actually inspect it, but let's fix it for correctness' sake.
Signed-off-by: Ard Biesheuvel ard.biesheuvel@linaro.org --- arch/arm64/mm/mmu.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index c55567283cde..f7d01c9816f0 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -155,9 +155,9 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, } while (pte++, addr += PAGE_SIZE, addr != end); }
-static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, - unsigned long end, phys_addr_t phys, - int map_io) +static void __init alloc_init_pmd(struct mm_struct *mm, pud_t *pud, + unsigned long addr, unsigned long end, + phys_addr_t phys, int map_io) { pmd_t *pmd; unsigned long next; @@ -177,7 +177,7 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, */ if (pud_none(*pud) || pud_bad(*pud)) { pmd = early_alloc(PTRS_PER_PMD * sizeof(pmd_t)); - pud_populate(&init_mm, pud, pmd); + pud_populate(mm, pud, pmd); }
pmd = pmd_offset(pud, addr); @@ -201,16 +201,16 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, } while (pmd++, addr = next, addr != end); }
-static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, - unsigned long end, unsigned long phys, - int map_io) +static void __init alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, + unsigned long addr, unsigned long end, + unsigned long phys, int map_io) { pud_t *pud; unsigned long next;
if (pgd_none(*pgd)) { pud = early_alloc(PTRS_PER_PUD * sizeof(pud_t)); - pgd_populate(&init_mm, pgd, pud); + pgd_populate(mm, pgd, pud); } BUG_ON(pgd_bad(*pgd));
@@ -239,7 +239,7 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, flush_tlb_all(); } } else { - alloc_init_pmd(pud, addr, next, phys, map_io); + alloc_init_pmd(mm, pud, addr, next, phys, map_io); } phys += next - addr; } while (pud++, addr = next, addr != end); @@ -249,9 +249,9 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, * Create the page directory entries and any necessary page tables for the * mapping specified by 'md'. */ -static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys, - unsigned long virt, phys_addr_t size, - int map_io) +static void __init __create_mapping(struct mm_struct *mm, pgd_t *pgd, + phys_addr_t phys, unsigned long virt, + phys_addr_t size, int map_io) { unsigned long addr, length, end, next;
@@ -261,7 +261,7 @@ static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys, end = addr + length; do { next = pgd_addr_end(addr, end); - alloc_init_pud(pgd, addr, next, phys, map_io); + alloc_init_pud(mm, pgd, addr, next, phys, map_io); phys += next - addr; } while (pgd++, addr = next, addr != end); } @@ -274,7 +274,8 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt, &phys, virt); return; } - __create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt, size, 0); + __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt, + size, 0); }
void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io) @@ -283,7 +284,7 @@ void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io) pr_warn("BUG: not creating id mapping for %pa\n", &addr); return; } - __create_mapping(&idmap_pg_dir[pgd_index(addr)], + __create_mapping(&init_mm, &idmap_pg_dir[pgd_index(addr)], addr, addr, size, map_io); }
For UEFI, we need to install the memory mappings used for Runtime Services in a dedicated set of page tables. Add create_pgd_mapping(), which allows us to allocate and install the page table entries early. --- arch/arm64/include/asm/mmu.h | 14 ++++++++++++-- arch/arm64/kernel/efi.c | 4 ++-- arch/arm64/mm/mmu.c | 41 ++++++++++++++++++++++++++++++----------- 3 files changed, 44 insertions(+), 15 deletions(-)
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index c2f006c48bdb..c23aa4d87be7 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -31,7 +31,17 @@ extern void paging_init(void); extern void setup_mm_for_reboot(void); extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); extern void init_mem_pgprot(void); -/* create an identity mapping for memory (or io if map_io is true) */ -extern void create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io); + +enum mmu_map_type { + MMU_MAP_TYPE_DEFAULT, + MMU_MAP_TYPE_WRITE_PROTECT, + MMU_MAP_TYPE_EXECUTE_PROTECT, + MMU_MAP_TYPE_MMIO +}; +extern void create_id_mapping(phys_addr_t addr, phys_addr_t size, + enum mmu_map_type map_type); +extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + enum mmu_map_type map_type);
#endif diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 00b693212b23..4c6c9f0319dc 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -54,7 +54,7 @@ static void __init efi_setup_idmap(void) u64 paddr, npages, size;
for_each_memblock(memory, r) - create_id_mapping(r->base, r->size, 0); + create_id_mapping(r->base, r->size, MMU_MAP_TYPE_DEFAULT);
/* map runtime io spaces */ for_each_efi_memory_desc(&memmap, md) { @@ -64,7 +64,7 @@ static void __init efi_setup_idmap(void) npages = md->num_pages; memrange_efi_to_native(&paddr, &npages); size = npages << PAGE_SHIFT; - create_id_mapping(paddr, size, 1); + create_id_mapping(paddr, size, MMU_MAP_TYPE_MMIO); } }
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index f7d01c9816f0..f661fc44fab2 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -157,19 +157,30 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
static void __init alloc_init_pmd(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, - phys_addr_t phys, int map_io) + phys_addr_t phys, enum mmu_map_type type) { pmd_t *pmd; unsigned long next; pmdval_t prot_sect; pgprot_t prot_pte;
- if (map_io) { + switch (type) { + case MMU_MAP_TYPE_WRITE_PROTECT: + prot_sect = PROT_SECT_NORMAL_EXEC | PMD_SECT_RDONLY; + prot_pte = __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY); + break; + case MMU_MAP_TYPE_EXECUTE_PROTECT: + prot_sect = PROT_SECT_NORMAL; + prot_pte = PAGE_KERNEL; + break; + case MMU_MAP_TYPE_MMIO: prot_sect = PROT_SECT_DEVICE_nGnRE; prot_pte = __pgprot(PROT_DEVICE_nGnRE); - } else { + break; + default: prot_sect = PROT_SECT_NORMAL_EXEC; prot_pte = PAGE_KERNEL_EXEC; + break; }
/* @@ -203,7 +214,7 @@ static void __init alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
static void __init alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, unsigned long end, - unsigned long phys, int map_io) + unsigned long phys, enum mmu_map_type type) { pud_t *pud; unsigned long next; @@ -221,7 +232,7 @@ static void __init alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, /* * For 4K granule only, attempt to put down a 1GB block */ - if (!map_io && (PAGE_SHIFT == 12) && + if (type == MMU_MAP_TYPE_DEFAULT && (PAGE_SHIFT == 12) && ((addr | next | phys) & ~PUD_MASK) == 0) { pud_t old_pud = *pud; set_pud(pud, __pud(phys | PROT_SECT_NORMAL_EXEC)); @@ -239,7 +250,7 @@ static void __init alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, flush_tlb_all(); } } else { - alloc_init_pmd(mm, pud, addr, next, phys, map_io); + alloc_init_pmd(mm, pud, addr, next, phys, type); } phys += next - addr; } while (pud++, addr = next, addr != end); @@ -251,7 +262,7 @@ static void __init alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, */ static void __init __create_mapping(struct mm_struct *mm, pgd_t *pgd, phys_addr_t phys, unsigned long virt, - phys_addr_t size, int map_io) + phys_addr_t size, enum mmu_map_type type) { unsigned long addr, length, end, next;
@@ -261,7 +272,7 @@ static void __init __create_mapping(struct mm_struct *mm, pgd_t *pgd, end = addr + length; do { next = pgd_addr_end(addr, end); - alloc_init_pud(mm, pgd, addr, next, phys, map_io); + alloc_init_pud(mm, pgd, addr, next, phys, type); phys += next - addr; } while (pgd++, addr = next, addr != end); } @@ -275,17 +286,25 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt, return; } __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt, - size, 0); + size, MMU_MAP_TYPE_DEFAULT); }
-void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io) +void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, + enum mmu_map_type map_type) { if ((addr >> PGDIR_SHIFT) >= ARRAY_SIZE(idmap_pg_dir)) { pr_warn("BUG: not creating id mapping for %pa\n", &addr); return; } __create_mapping(&init_mm, &idmap_pg_dir[pgd_index(addr)], - addr, addr, size, map_io); + addr, addr, size, map_type); +} + +void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + enum mmu_map_type map_type) +{ + __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, map_type); }
static void __init map_mem(void)
Split of the remapping code from efi_config_init() so that the caller can perform its own remapping. This is necessary to correctly handle virtually remapped UEFI memory regions under kexec, as efi.systab will have been updated to a virtual address.
Signed-off-by: Ard Biesheuvel ard.biesheuvel@linaro.org --- drivers/firmware/efi/efi.c | 49 +++++++++++++++++++++++++++++----------------- include/linux/efi.h | 2 ++ 2 files changed, 33 insertions(+), 18 deletions(-)
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 8590099ac148..3e72e3bfc8ab 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -289,9 +289,10 @@ static __init int match_config_table(efi_guid_t *guid, return 0; }
-int __init efi_config_init(efi_config_table_type_t *arch_tables) +int __init efi_config_parse_tables(void *config_tables, int count, + efi_config_table_type_t *arch_tables) { - void *config_tables, *tablep; + void *tablep; int i, sz;
if (efi_enabled(EFI_64BIT)) @@ -299,19 +300,9 @@ int __init efi_config_init(efi_config_table_type_t *arch_tables) else sz = sizeof(efi_config_table_32_t);
- /* - * Let's see what config tables the firmware passed to us. - */ - config_tables = early_memremap(efi.systab->tables, - efi.systab->nr_tables * sz); - if (config_tables == NULL) { - pr_err("Could not map Configuration table!\n"); - return -ENOMEM; - } - tablep = config_tables; pr_info(""); - for (i = 0; i < efi.systab->nr_tables; i++) { + for (i = 0; i < count; i++) { efi_guid_t guid; unsigned long table;
@@ -324,8 +315,6 @@ int __init efi_config_init(efi_config_table_type_t *arch_tables) if (table64 >> 32) { pr_cont("\n"); pr_err("Table located above 4GB, disabling EFI.\n"); - early_memunmap(config_tables, - efi.systab->nr_tables * sz); return -EINVAL; } #endif @@ -340,13 +329,37 @@ int __init efi_config_init(efi_config_table_type_t *arch_tables) tablep += sz; } pr_cont("\n"); - early_memunmap(config_tables, efi.systab->nr_tables * sz); - set_bit(EFI_CONFIG_TABLES, &efi.flags); - return 0; }
+int __init efi_config_init(efi_config_table_type_t *arch_tables) +{ + void *config_tables; + int sz, ret; + + if (efi_enabled(EFI_64BIT)) + sz = sizeof(efi_config_table_64_t); + else + sz = sizeof(efi_config_table_32_t); + + /* + * Let's see what config tables the firmware passed to us. + */ + config_tables = early_memremap(efi.systab->tables, + efi.systab->nr_tables * sz); + if (config_tables == NULL) { + pr_err("Could not map Configuration table!\n"); + return -ENOMEM; + } + + ret = efi_config_parse_tables(config_tables, efi.systab->nr_tables, + arch_tables); + + early_memunmap(config_tables, efi.systab->nr_tables * sz); + return ret; +} + #ifdef CONFIG_EFI_VARS_MODULE static int __init efi_load_efivars(void) { diff --git a/include/linux/efi.h b/include/linux/efi.h index 0949f9c7e872..97027fafebd4 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -871,6 +871,8 @@ static inline efi_status_t efi_query_variable_store(u32 attributes, unsigned lon #endif extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); extern int efi_config_init(efi_config_table_type_t *arch_tables); +extern int efi_config_parse_tables(void *config_tables, int count, + efi_config_table_type_t *arch_tables); extern u64 efi_get_iobase (void); extern u32 efi_mem_type (unsigned long phys_addr); extern u64 efi_mem_attributes (unsigned long phys_addr);
In order to support kexec, the kernel needs to be able to deal with the state of the UEFI firmware after SetVirtualAddressMap() has been called. To avoid having separate code paths for non-kexec and kexec, let's move the call to SetVirtualAddressMap() to the stub: this will guarantee us that it will only be called once (since the stub is not executed during kexec), and ensures that the UEFI state is identical between kexec and normal boot.
This implies that the layout of the virtual mapping needs to be created by the stub as well. All regions are rounded up to a naturally aligned multiple of 64 KB (for compatibility with 64k pages kernels) and recorded in the UEFI memory map. The kernel proper reads those values and installs the mappings in a dedicated set of page tables that are swapped in during UEFI Runtime Services calls.
Signed-off-by: Ard Biesheuvel ard.biesheuvel@linaro.org --- arch/arm64/include/asm/efi.h | 19 +++- arch/arm64/kernel/efi.c | 220 ++++++++++++++++++++----------------- drivers/firmware/efi/libstub/fdt.c | 110 ++++++++++++++++++- 3 files changed, 245 insertions(+), 104 deletions(-)
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index a34fd3b12e2b..d752e5480096 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -12,23 +12,32 @@ extern void efi_idmap_init(void); #define efi_idmap_init() #endif
+void efi_load_rt_mapping(void); +void efi_unload_rt_mapping(void); + #define efi_call_virt(f, ...) \ ({ \ - efi_##f##_t *__f = efi.systab->runtime->f; \ + efi_##f##_t *__f; \ efi_status_t __s; \ \ - kernel_neon_begin(); \ + kernel_neon_begin(); /* disables preemption */ \ + efi_load_rt_mapping(); \ + __f = efi.systab->runtime->f; \ __s = __f(__VA_ARGS__); \ + efi_unload_rt_mapping(); \ kernel_neon_end(); \ __s; \ })
#define __efi_call_virt(f, ...) \ ({ \ - efi_##f##_t *__f = efi.systab->runtime->f; \ + efi_##f##_t *__f; \ \ - kernel_neon_begin(); \ + kernel_neon_begin(); /* disables preemption */ \ + efi_load_rt_mapping(); \ + __f = efi.systab->runtime->f; \ __f(__VA_ARGS__); \ + efi_unload_rt_mapping(); \ kernel_neon_end(); \ })
@@ -44,4 +53,6 @@ extern void efi_idmap_init(void);
#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__)
+#define EFI_VIRTMAP EFI_ARCH_1 + #endif /* _ASM_EFI_H */ diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 4c6c9f0319dc..98664b924058 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -19,16 +19,21 @@ #include <linux/of_fdt.h> #include <linux/sched.h> #include <linux/slab.h> +#include <linux/mm_types.h> +#include <linux/rbtree.h> +#include <linux/rwsem.h> +#include <linux/spinlock.h> +#include <linux/atomic.h>
#include <asm/cacheflush.h> #include <asm/efi.h> #include <asm/tlbflush.h> +#include <asm/pgtable.h> #include <asm/mmu_context.h> +#include <asm/mmu.h>
struct efi_memory_map memmap;
-static efi_runtime_services_t *runtime; - static u64 efi_system_table;
static int uefi_debug __initdata; @@ -68,9 +73,36 @@ static void __init efi_setup_idmap(void) } }
+/* + * Translate a EFI virtual address into a physical address: this is necessary, + * as some data members of the EFI system table are virtually remapped after + * SetVirtualAddressMap() has been called. + */ +static phys_addr_t __init efi_to_phys(unsigned long addr) +{ + efi_memory_desc_t *md; + + for_each_efi_memory_desc(&memmap, md) { + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; + if (md->virt_addr == 0) + /* no virtual mapping has been installed by the stub */ + break; + if (md->virt_addr < addr && + (addr - md->virt_addr) < (md->num_pages << EFI_PAGE_SHIFT)) + return md->phys_addr + addr - md->virt_addr; + } + + WARN_ONCE(1, "UEFI virtual mapping incomplete or missing -- no entry found for 0x%lx\n", + addr); + return addr; +} + static int __init uefi_init(void) { efi_char16_t *c16; + void *config_tables; + u64 table_size; char vendor[100] = "unknown"; int i, retval;
@@ -98,7 +130,7 @@ static int __init uefi_init(void) efi.systab->hdr.revision & 0xffff);
/* Show what we know for posterity */ - c16 = early_memremap(efi.systab->fw_vendor, + c16 = early_memremap(efi_to_phys(efi.systab->fw_vendor), sizeof(vendor)); if (c16) { for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i) @@ -111,8 +143,14 @@ static int __init uefi_init(void) efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor);
- retval = efi_config_init(NULL); + table_size = sizeof(efi_config_table_64_t) * efi.systab->nr_tables; + config_tables = early_memremap(efi_to_phys(efi.systab->tables), + table_size); + + retval = efi_config_parse_tables(config_tables, + efi.systab->nr_tables, NULL);
+ early_memunmap(config_tables, table_size); out: early_memunmap(efi.systab, sizeof(efi_system_table_t)); return retval; @@ -317,60 +355,79 @@ void __init efi_init(void) uefi_init(); }
+static pgd_t efi_pgd[PTRS_PER_PGD] __page_aligned_bss; + +static struct mm_struct efi_mm = { + .mm_rb = RB_ROOT, + .pgd = efi_pgd, + .mm_users = ATOMIC_INIT(2), + .mm_count = ATOMIC_INIT(1), + .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), + .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), + .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), + INIT_MM_CONTEXT(efi_mm) +}; + void __init efi_idmap_init(void) { + efi_memory_desc_t *md; + if (!efi_enabled(EFI_BOOT)) return;
/* boot time idmap_pg_dir is incomplete, so fill in missing parts */ efi_setup_idmap(); -} - -static int __init remap_region(efi_memory_desc_t *md, void **new) -{ - u64 paddr, vaddr, npages, size; - - paddr = md->phys_addr; - npages = md->num_pages; - memrange_efi_to_native(&paddr, &npages); - size = npages << PAGE_SHIFT; - - if (is_normal_ram(md)) - vaddr = (__force u64)ioremap_cache(paddr, size); - else - vaddr = (__force u64)ioremap(paddr, size);
- if (!vaddr) { - pr_err("Unable to remap 0x%llx pages @ %p\n", - npages, (void *)paddr); - return 0; - } + for_each_efi_memory_desc(&memmap, md) { + u64 paddr, npages, size; + enum mmu_map_type map_type;
- /* adjust for any rounding when EFI and system pagesize differs */ - md->virt_addr = vaddr + (md->phys_addr - paddr); + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; + if (md->virt_addr == 0) + /* no virtual mapping has been installed by the stub */ + return;
- if (uefi_debug) - pr_info(" EFI remap 0x%012llx => %p\n", - md->phys_addr, (void *)md->virt_addr); + paddr = md->phys_addr; + npages = md->num_pages; + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT;
- memcpy(*new, md, memmap.desc_size); - *new += memmap.desc_size; + if (uefi_debug) + pr_info(" EFI remap 0x%012llx => %p\n", + md->phys_addr, (void *)md->virt_addr);
- return 1; + /* + * Take care not to create regions that are + * both writable and executable. + * TODO: check with the UEFI police whether this is an + * acceptable way, i.e., without checking if the memory + * attribute has the respective WP/XP bit set. + */ + switch (md->type) { + default: + pr_warn("UEFI Virtual Mapping requested for region of type 0x%x @ 0x%llx, mapping as non-executable\n", + md->type, md->phys_addr); + /* fall through */ + case EFI_RUNTIME_SERVICES_DATA: + map_type = MMU_MAP_TYPE_EXECUTE_PROTECT; + break; + case EFI_RUNTIME_SERVICES_CODE: + map_type = MMU_MAP_TYPE_WRITE_PROTECT; + break; + case EFI_MEMORY_MAPPED_IO: + map_type = MMU_MAP_TYPE_MMIO; + break; + } + create_pgd_mapping(&efi_mm, paddr, md->virt_addr, size, + map_type); + } + set_bit(EFI_VIRTMAP, &efi.flags); }
-/* - * Switch UEFI from an identity map to a kernel virtual map - */ static int __init arm64_enter_virtual_mode(void) { - efi_memory_desc_t *md; - phys_addr_t virtmap_phys; - void *virtmap, *virt_md; - efi_status_t status; u64 mapsize; - int count = 0; - unsigned long flags;
if (!efi_enabled(EFI_MEMMAP)) return 0; @@ -391,78 +448,45 @@ static int __init arm64_enter_virtual_mode(void)
efi.memmap = &memmap;
- /* Map the runtime regions */ - virtmap = kmalloc(mapsize, GFP_KERNEL); - if (!virtmap) { - pr_err("Failed to allocate EFI virtual memmap\n"); - return -1; - } - virtmap_phys = virt_to_phys(virtmap); - virt_md = virtmap; - - for_each_efi_memory_desc(&memmap, md) { - if (!(md->attribute & EFI_MEMORY_RUNTIME)) - continue; - if (!remap_region(md, &virt_md)) - goto err_unmap; - ++count; - } - - efi.systab = (__force void *)efi_lookup_mapped_addr(efi_system_table); + efi.systab = (__force void *)ioremap_cache(efi_system_table, + sizeof(efi_system_table_t)); if (!efi.systab) { - /* - * If we have no virtual mapping for the System Table at this - * point, the memory map doesn't cover the physical offset where - * it resides. This means the System Table will be inaccessible - * to Runtime Services themselves once the virtual mapping is - * installed. - */ - pr_err("Failed to remap EFI System Table -- buggy firmware?\n"); - goto err_unmap; + pr_err("Failed to remap EFI System Table\n"); + return -1; } set_bit(EFI_SYSTEM_TABLES, &efi.flags);
- local_irq_save(flags); - cpu_switch_mm(idmap_pg_dir, &init_mm); - - /* Call SetVirtualAddressMap with the physical address of the map */ - runtime = efi.systab->runtime; - efi.set_virtual_address_map = runtime->set_virtual_address_map; - - status = efi.set_virtual_address_map(count * memmap.desc_size, - memmap.desc_size, - memmap.desc_version, - (efi_memory_desc_t *)virtmap_phys); - cpu_set_reserved_ttbr0(); - flush_tlb_all(); - local_irq_restore(flags); - - kfree(virtmap); - free_boot_services();
- if (status != EFI_SUCCESS) { - pr_err("Failed to set EFI virtual address map! [%lx]\n", - status); + if (!efi_enabled(EFI_VIRTMAP)) { + pr_err("No UEFI virtual mapping was installed -- runtime services will not be available\n"); return -1; }
/* Set up runtime services function pointers */ - runtime = efi.systab->runtime; efi_native_runtime_setup(); set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
efi.runtime_version = efi.systab->hdr.revision;
return 0; - -err_unmap: - /* unmap all mappings that succeeded: there are 'count' of those */ - for (virt_md = virtmap; count--; virt_md += memmap.desc_size) { - md = virt_md; - iounmap((__force void __iomem *)md->virt_addr); - } - kfree(virtmap); - return -1; } early_initcall(arm64_enter_virtual_mode); + +static void efi_set_pgd(struct mm_struct *mm) +{ + cpu_switch_mm(mm->pgd, mm); + flush_tlb_all(); + if (icache_is_aivivt()) + __flush_icache_all(); +} + +void efi_load_rt_mapping(void) +{ + efi_set_pgd(&efi_mm); +} + +void efi_unload_rt_mapping(void) +{ + efi_set_pgd(current->active_mm); +} diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index a56bb3528755..248ea8463be8 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -160,6 +160,74 @@ fdt_set_fail: #endif
/* + * This is the base address at which to start allocating virtual memory ranges + * for UEFI Runtime Services. This is a userland range so that we can use any + * allocation we choose, and eliminate the risk of a conflict after kexec. + */ +#define EFI_RT_VIRTUAL_BASE 0x40000000 + +#define PAGE_SHIFT_64K 16 +#define PAGE_SIZE_64K (1UL << PAGE_SHIFT_64K) +#define PAGE_MASK_64K (~(PAGE_SIZE_64K - 1)) +#define PFN_64K_UP(x) (((x) + PAGE_SIZE_64K - 1) >> PAGE_SHIFT_64K) +#define PFN_64K_DOWN(x) ((x) >> PAGE_SHIFT_64K) + +static inline void memrange_efi_to_64k(u64 *addr, u64 *npages) +{ + *npages = PFN_64K_UP(*addr + (*npages << EFI_PAGE_SHIFT)) - + PFN_64K_DOWN(*addr); + *addr &= PAGE_MASK_64K; +} + +static void update_memory_map(efi_memory_desc_t *memory_map, + unsigned long map_size, unsigned long desc_size, + int *count) +{ + u64 efi_virtual_base = EFI_RT_VIRTUAL_BASE; + union { + efi_memory_desc_t entry; + u8 pad[desc_size]; + } *p, *q, tmp; + int i = map_size / desc_size; + + p = (void *)memory_map; + for (q = p; i >= 0; i--, q++) { + u64 paddr, npages, size; + + if (!(q->entry.attribute & EFI_MEMORY_RUNTIME)) + continue; + + /* + * Swap the entries around so that all EFI_MEMORY_RUNTIME + * entries bubble to the top. This will allow us to reuse the + * table as input to SetVirtualAddressMap(). + */ + if (q != p) { + tmp = *p; + *p = *q; + *q = tmp; + } + + /* + * Make the low mapping compatible with 64k pages: this allows + * a 4k page size kernel to kexec a 64k page size kernel and + * vice versa. + */ + paddr = p->entry.phys_addr; + npages = p->entry.num_pages; + memrange_efi_to_64k(&paddr, &npages); + size = npages << PAGE_SHIFT_64K; + + p->entry.virt_addr = efi_virtual_base + p->entry.phys_addr - + paddr; + efi_virtual_base += size; + + ++p; + ++*count; + } +} + +/* * Allocate memory for a new FDT, then add EFI, commandline, and * initrd related fields to the FDT. This routine increases the * FDT allocation size until the allocated memory is large @@ -188,6 +256,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, efi_memory_desc_t *memory_map; unsigned long new_fdt_size; efi_status_t status; + int runtime_entry_count = 0;
/* * Estimate size of new FDT, and allocate memory for it. We @@ -240,12 +309,49 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, } }
+ /* + * Update the memory map with virtual addresses, and reorder the entries + * so that we can pass it straight into SetVirtualAddressMap() + */ + update_memory_map(memory_map, map_size, desc_size, + &runtime_entry_count); + /* Now we are ready to exit_boot_services.*/ status = sys_table->boottime->exit_boot_services(handle, mmap_key);
+ if (status == EFI_SUCCESS) { + efi_set_virtual_address_map_t *svam; + + /* Install the new virtual address map */ + svam = sys_table->runtime->set_virtual_address_map; + status = svam(runtime_entry_count * desc_size, desc_size, + desc_ver, memory_map);
- if (status == EFI_SUCCESS) - return status; + /* + * We are beyond the point of no return here, so if the call to + * SetVirtualAddressMap() failed, we need to signal that to the + * incoming kernel but proceed normally otherwise. + */ + if (status != EFI_SUCCESS) { + int i; + + /* + * Set the virtual address field of all + * EFI_MEMORY_RUNTIME entries to 0. This will signal + * the incoming kernel that no virtual translation has + * been installed. + */ + for (i = 0; i < map_size; i += desc_size) { + efi_memory_desc_t *p; + + p = (efi_memory_desc_t *)((u8 *)memory_map + i); + if (!(p->attribute & EFI_MEMORY_RUNTIME)) + break; + p->virt_addr = 0; + } + } + return EFI_SUCCESS; + }
pr_efi_err(sys_table, "Exit boot services failed.\n");