Hi Will,
Thanks for having a look.
On Fri, Nov 29, 2013 at 04:10:16PM +0000, Will Deacon wrote:
This patch implements basic support for UEFI runtime services in the ARM architecture - a requirement for using efibootmgr to read and update the system boot configuration.
It uses the generic configuration table scanning to populate ACPI and SMBIOS pointers.
I've got a bunch of implementation questions, so hopefully you can help me to understand what this is doing!
I can try :)
diff --git a/arch/arm/kernel/uefi.c b/arch/arm/kernel/uefi.c new file mode 100644 index 0000000..f771026 --- /dev/null +++ b/arch/arm/kernel/uefi.c
+/*
- Returns 1 if 'facility' is enabled, 0 otherwise.
- */
+int efi_enabled(int facility) +{
return test_bit(facility, &arm_uefi_facility) != 0;!test_bit(...) ?
Could do. Cloned from the x86 implementation. Matt Fleming has indicated some rework coming in this area.
+static int __init uefi_init(void) +{
efi_char16_t *c16;char vendor[100] = "unknown";int i, retval;efi.systab = early_memremap(uefi_system_table,sizeof(efi_system_table_t));/** Verify the UEFI System Table*/if (efi.systab == NULL)panic("Whoa! Can't find UEFI system table.\n");if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)panic("Whoa! UEFI system table signature incorrect\n");if ((efi.systab->hdr.revision >> 16) == 0)pr_warn("Warning: UEFI system table version %d.%02d, expected 1.00 or greater\n",efi.systab->hdr.revision >> 16,efi.systab->hdr.revision & 0xffff);/* Show what we know for posterity */c16 = (efi_char16_t *)early_memremap(efi.systab->fw_vendor,sizeof(vendor));if (c16) {for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i)vendor[i] = c16[i];vendor[i] = '\0';}pr_info("UEFI v%u.%.02u by %s\n",efi.systab->hdr.revision >> 16,efi.systab->hdr.revision & 0xffff, vendor);retval = efi_config_init(NULL);if (retval == 0)set_bit(EFI_CONFIG_TABLES, &arm_uefi_facility);Does this actually need to be atomic?
Not necessarily, but it's neater than masking, and only called once.
early_iounmap(c16, sizeof(vendor));early_iounmap(efi.systab, sizeof(efi_system_table_t));return retval;+}
+static __init int is_discardable_region(efi_memory_desc_t *md) +{ +#ifdef KEEP_ALL_REGIONS
return 0;+#endif
Maybe just have a dummy is_discardable_region in this case, like we usually do instead of inlining the #ifdef?
OK.
if (md->attribute & EFI_MEMORY_RUNTIME)return 0;switch (md->type) {+#ifdef KEEP_BOOT_SERVICES_REGIONS
case EFI_BOOT_SERVICES_CODE:case EFI_BOOT_SERVICES_DATA:+#endif
/* Keep tables around for any future kexec operations */case EFI_ACPI_RECLAIM_MEMORY:return 0;/* Preserve */case EFI_RESERVED_TYPE:return 0;}return 1;+}
[...]
+int __init uefi_memblock_arm_reserve_range(void) +{
if (!of_scan_flat_dt(fdt_find_uefi_params, NULL))return 0;set_bit(EFI_BOOT, &arm_uefi_facility);Similar comment wrt atomicity here (and in the rest of this patch).
Similar response.
uefi_init();remove_regions();return 0;+}
+/*
- Disable instrrupts, enable idmap and disable caches.
interrupts
Yeah.
- */
+static void __init phys_call_prologue(void) +{
local_irq_disable();/* Take out a flat memory mapping. */setup_mm_for_reboot();/* Clean and invalidate caches */flush_cache_all();/* Turn off caching */cpu_proc_fin();/* Push out any further dirty data, and ensure cache is empty */flush_cache_all();Do we care about outer caches here?
I think we do. We jump into UEFI and make it relocate itself to the new virtual addresses, with MMU disabled (so all accesses NC).
This all looks suspiciously like copy-paste from __soft_restart;
Yeah, 'cause you told me to :)
perhaps some refactoring/reuse is in order?
Could do. Turn this into a process.c:idcall_prepare(), or something less daftly named?
+}
+/*
- Restore original memory map and re-enable interrupts.
- */
+static void __init phys_call_epilogue(void) +{
static struct mm_struct *mm = &init_mm;/* Restore original memory mapping */cpu_switch_mm(mm->pgd, mm);/* Flush branch predictor and TLBs */local_flush_bp_all();+#ifdef CONFIG_CPU_HAS_ASID
local_flush_tlb_all();+#endif
... and this looks like copy-paste from setup_mm_for_reboot.
You told me that too! Only this goes the other way.
Is the refactoring worth the extra code?
local_irq_enable();+}
+static int __init remap_region(efi_memory_desc_t *md, efi_memory_desc_t *entry) +{
u64 va;u64 paddr;u64 size;*entry = *md;paddr = entry->phys_addr;size = entry->num_pages << EFI_PAGE_SHIFT;/** Map everything writeback-capable as coherent memory,* anything else as device.*/if (md->attribute & EFI_MEMORY_WB)va = (u64)((u32)uefi_remap(paddr, size) & 0xffffffffUL);elseva = (u64)((u32)uefi_ioremap(paddr, size) & 0xffffffffUL);Do you really need all those casts/masking?
I didn't find a better way to avoid warnings when building this code for both LPAE/non-LPAE.
We are guaranteed by the UEFI spec that all addresses will be <4GB, but the descriptor format is still 64-bit physical/virtual addresses, and we need to pass them back as such.
if (!va)return 0;entry->virt_addr = va;if (uefi_debug)pr_info(" %016llx-%016llx => 0x%08x : (%s)\n",paddr, paddr + size - 1, (u32)va,md->attribute & EFI_MEMORY_WB ? "WB" : "I/O");return 1;+}
+static int __init remap_regions(void) +{
void *p, *next;efi_memory_desc_t *md;memmap.phys_map = uefi_remap(uefi_boot_mmap, uefi_boot_mmap_size);if (!memmap.phys_map)return 0;memmap.map_end = (void *)memmap.phys_map + uefi_boot_mmap_size;memmap.desc_size = uefi_mmap_desc_size;memmap.desc_version = uefi_mmap_desc_ver;/* Allocate space for the physical region map */memmap.map = kzalloc(memmap.nr_map * memmap.desc_size, GFP_KERNEL);if (!memmap.map)return 0;next = memmap.map;for (p = memmap.phys_map; p < memmap.map_end; p += memmap.desc_size) {md = p;if (is_discardable_region(md) || md->type == EFI_RESERVED_TYPE)continue;if (!remap_region(p, next))return 0;What about the kzalloc above, does that need freeing?
In case of failure? Yes, good point.
+/*
- This function switches the UEFI runtime services to virtual mode.
- This operation must be performed only once in the system's lifetime,
- including any kecec calls.
- This must be done with a 1:1 mapping. The current implementation
- resolves this by disabling the MMU.
- */
+efi_status_t __init phys_set_virtual_address_map(u32 memory_map_size,
u32 descriptor_size,u32 descriptor_version,efi_memory_desc_t *dsc)+{
uefi_phys_call_t *phys_set_map;efi_status_t status;phys_call_prologue();phys_set_map = (void *)(unsigned long)virt_to_phys(uefi_phys_call);/* Called with caches disabled, returns with caches enabled */status = phys_set_map(memory_map_size, descriptor_size,descriptor_version, dsc,efi.set_virtual_address_map)+;
Guessing this relies on a physically-tagged cache? Wouldn't hurt to put something in the epilogue code to deal with vivt caches if they're not prohibited by construction (e.g. due to some build dependency magic).
Sorry, I don't quite follow? How would it depend on a physically-tagged cache?
phys_call_epilogue();return status;+}
diff --git a/arch/arm/kernel/uefi_phys.S b/arch/arm/kernel/uefi_phys.S new file mode 100644 index 0000000..e9a1542 --- /dev/null +++ b/arch/arm/kernel/uefi_phys.S @@ -0,0 +1,59 @@ +/*
- arch/arm/kernel/uefi_phys.S
- Copyright (C) 2013 Linaro Ltd.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License version 2 as
- published by the Free Software Foundation.
- */
+#include <linux/linkage.h> +#define PAR_MASK 0xfff
.text+@ uefi_phys_call(a, b, c, d, *f)
.align 5.pushsection .idmap.text, "ax"+ENTRY(uefi_phys_call)
@ Save physical contextmov r12, sppush {r4-r5, r12, lr}@ Extract function pointer (don't write r12 beyond this)ldr r12, [sp, #16]@ Convert sp to 32-bit physicalmov lr, spldr r4, =PAR_MASKand r5, lr, r4 @ Extract lower 12 bits of spmcr p15, 0, lr, c7, c8, 1 @ Write VA -> ATS1CPWThis is broken without an isb but, more to the point, why can't we just do the standard lowmem virt_to_phys conversion here instead?
I can't use that from within asm (right?). Are you suggesting that I should duplicate the mechanism of virt_to_phys here?
mrc p15, 0, lr, c7, c4, 0 @ Physical Address Registermvn r4, r4and lr, lr, r4 @ Clear lower 12 bits of PAadd lr, lr, r5 @ Calculate phys spmov sp, lr @ Update@ Disable MMUmrc p15, 0, lr, c1, c0, 0 @ ctrl registerbic lr, lr, #0x1 @ ...............mmcr p15, 0, lr, c1, c0, 0 @ disable MMUisb@ Make callblx r12This is basically a copy-paste of cpu_v7_reset. Perhaps using some assembler macros would help here?
Well, I explicitly don't want to touch SCTLR.TE. We could macro-ize it, but I think that would increase the amount of code.
pop {r4-r5, r12, lr}@ Enable MMU + Cachesmrc p15, 0, r1, c1, c0, 0 @ ctrl registerorr r1, r1, #0x1000 @ ...i............orr r1, r1, #0x0005 @ .............c.mmcr p15, 0, r1, c1, c0, 0 @ enable MMUisbWhy do we have to enable the caches so early?
You'd prefer it being done back in phys_call_epilogue()?
/ Leif