The following changes since commit e769ece3b129698d2b09811a6f6d304e4eaa8c29:
KVM: s390: fix pfmf non-quiescing control handling (2013-07-29 09:02:30 +0200)
are available in the git repository at:
git://git.linaro.org/people/cdall/linux-kvm-arm.git tags/kvm-arm-fixes-3.11
for you to fetch changes up to 2184a60de26b94bc5a88de3e5a960ef9ff54ba5a:
KVM: ARM: Squash len warning (2013-08-11 21:03:39 -0700)
---------------------------------------------------------------- Christoffer Dall (3): ARM: KVM: Fix 64-bit coprocessor handling ARM: KVM: Fix unaligned unmap_range leak KVM: ARM: Squash len warning
Marc Zyngier (1): arm64: KVM: fix 2-level page tables unmapping
arch/arm/kvm/coproc.c | 26 +++++++++++++++++++------- arch/arm/kvm/coproc.h | 3 +++ arch/arm/kvm/coproc_a15.c | 6 +++++- arch/arm/kvm/mmio.c | 3 ++- arch/arm/kvm/mmu.c | 36 +++++++++++++++--------------------- 5 files changed, 44 insertions(+), 30 deletions(-)
The PAR was exported as CRn == 7 and CRm == 0, but in fact the primary coprocessor register number was determined by CRm for 64-bit coprocessor registers as the user space API was modeled after the coprocessor access instructions (see the ARM ARM rev. C - B3-1445).
However, just changing the CRn to CRm breaks the sorting check when booting the kernel, because the internal kernel logic always treats CRn as the primary register number, and it makes the table sorting impossible to understand for humans.
Alternatively we could change the logic to always have CRn == CRm, but that becomes unclear in the number of ways we do look up of a coprocessor register. We could also have a separate 64-bit table but that feels somewhat over-engineered. Instead, keep CRn the primary representation of the primary coproc. register number in-kernel and always export the primary number as CRm as per the existing user space ABI.
Note: The TTBR registers just magically worked because they happened to follow the CRn(0) regs and were considered CRn(0) in the in-kernel representation.
Signed-off-by: Christoffer Dall christoffer.dall@linaro.org --- arch/arm/kvm/coproc.c | 26 +++++++++++++++++++------- arch/arm/kvm/coproc.h | 3 +++ arch/arm/kvm/coproc_a15.c | 6 +++++- 3 files changed, 27 insertions(+), 8 deletions(-)
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 4a51990..db9cf69 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -146,7 +146,11 @@ static bool pm_fake(struct kvm_vcpu *vcpu, #define access_pmintenclr pm_fake
/* Architected CP15 registers. - * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 + * CRn denotes the primary register number, but is copied to the CRm in the + * user space API for 64-bit register access in line with the terminology used + * in the ARM ARM. + * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit + * registers preceding 32-bit ones. */ static const struct coproc_reg cp15_regs[] = { /* CSSELR: swapped by interrupt.S. */ @@ -154,8 +158,8 @@ static const struct coproc_reg cp15_regs[] = { NULL, reset_unknown, c0_CSSELR },
/* TTBR0/TTBR1: swapped by interrupt.S. */ - { CRm( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 }, - { CRm( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 }, + { CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 }, + { CRm64( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 },
/* TTBCR: swapped by interrupt.S. */ { CRn( 2), CRm( 0), Op1( 0), Op2( 2), is32, @@ -182,7 +186,7 @@ static const struct coproc_reg cp15_regs[] = { NULL, reset_unknown, c6_IFAR },
/* PAR swapped by interrupt.S */ - { CRn( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR }, + { CRm64( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR },
/* * DC{C,I,CI}SW operations: @@ -399,12 +403,13 @@ static bool index_to_params(u64 id, struct coproc_params *params) | KVM_REG_ARM_OPC1_MASK)) return false; params->is_64bit = true; - params->CRm = ((id & KVM_REG_ARM_CRM_MASK) + /* CRm to CRn: see cp15_to_index for details */ + params->CRn = ((id & KVM_REG_ARM_CRM_MASK) >> KVM_REG_ARM_CRM_SHIFT); params->Op1 = ((id & KVM_REG_ARM_OPC1_MASK) >> KVM_REG_ARM_OPC1_SHIFT); params->Op2 = 0; - params->CRn = 0; + params->CRm = 0; return true; default: return false; @@ -898,7 +903,14 @@ static u64 cp15_to_index(const struct coproc_reg *reg) if (reg->is_64) { val |= KVM_REG_SIZE_U64; val |= (reg->Op1 << KVM_REG_ARM_OPC1_SHIFT); - val |= (reg->CRm << KVM_REG_ARM_CRM_SHIFT); + /* + * CRn always denotes the primary coproc. reg. nr. for the + * in-kernel representation, but the user space API uses the + * CRm for the encoding, because it is modelled after the + * MRRC/MCRR instructions: see the ARM ARM rev. c page + * B3-1445 + */ + val |= (reg->CRn << KVM_REG_ARM_CRM_SHIFT); } else { val |= KVM_REG_SIZE_U32; val |= (reg->Op1 << KVM_REG_ARM_OPC1_SHIFT); diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h index b7301d3..0461d5c 100644 --- a/arch/arm/kvm/coproc.h +++ b/arch/arm/kvm/coproc.h @@ -135,6 +135,8 @@ static inline int cmp_reg(const struct coproc_reg *i1, return -1; if (i1->CRn != i2->CRn) return i1->CRn - i2->CRn; + if (i1->is_64 != i2->is_64) + return i2->is_64 - i1->is_64; if (i1->CRm != i2->CRm) return i1->CRm - i2->CRm; if (i1->Op1 != i2->Op1) @@ -145,6 +147,7 @@ static inline int cmp_reg(const struct coproc_reg *i1,
#define CRn(_x) .CRn = _x #define CRm(_x) .CRm = _x +#define CRm64(_x) .CRn = _x, .CRm = 0 #define Op1(_x) .Op1 = _x #define Op2(_x) .Op2 = _x #define is64 .is_64 = true diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c index 685063a..cf93472 100644 --- a/arch/arm/kvm/coproc_a15.c +++ b/arch/arm/kvm/coproc_a15.c @@ -114,7 +114,11 @@ static bool access_l2ectlr(struct kvm_vcpu *vcpu,
/* * A15-specific CP15 registers. - * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 + * CRn denotes the primary register number, but is copied to the CRm in the + * user space API for 64-bit register access in line with the terminology used + * in the ARM ARM. + * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit + * registers preceding 32-bit ones. */ static const struct coproc_reg a15_regs[] = { /* MPIDR: we use VMPIDR for guest access. */
The unmap_range function did not properly cover the case when the start address was not aligned to PMD_SIZE or PUD_SIZE and an entire pte table or pmd table was cleared, causing us to leak memory when incrementing the addr.
The fix is to always move onto the next page table entry boundary instead of adding the full size of the VA range covered by the corresponding table level entry.
Acked-by: Marc Zyngier marc.zyngier@arm.com Signed-off-by: Christoffer Dall christoffer.dall@linaro.org --- arch/arm/kvm/mmu.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index ca6bea4..80a83ec 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -132,37 +132,37 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp, pmd_t *pmd; pte_t *pte; unsigned long long addr = start, end = start + size; - u64 range; + u64 next;
while (addr < end) { pgd = pgdp + pgd_index(addr); pud = pud_offset(pgd, addr); if (pud_none(*pud)) { - addr += PUD_SIZE; + addr = pud_addr_end(addr, end); continue; }
pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) { - addr += PMD_SIZE; + addr = pmd_addr_end(addr, end); continue; }
pte = pte_offset_kernel(pmd, addr); clear_pte_entry(kvm, pte, addr); - range = PAGE_SIZE; + next = addr + PAGE_SIZE;
/* If we emptied the pte, walk back up the ladder */ if (pte_empty(pte)) { clear_pmd_entry(kvm, pmd, addr); - range = PMD_SIZE; + next = pmd_addr_end(addr, end); if (pmd_empty(pmd)) { clear_pud_entry(kvm, pud, addr); - range = PUD_SIZE; + next = pud_addr_end(addr, end); } }
- addr += range; + addr = next; } }
From: Marc Zyngier marc.zyngier@arm.com
When using 64kB pages, we only have two levels of page tables, meaning that PGD, PUD and PMD are fused. In this case, trying to refcount PUDs and PMDs independently is a a complete disaster, as they are the same.
We manage to get it right for the allocation (stage2_set_pte uses {pmd,pud}_none), but the unmapping path clears both pud and pmd refcounts, which fails spectacularly with 2-level page tables.
The fix is to avoid calling clear_pud_entry when both the pmd and pud pages are empty. For this, and instead of introducing another pud_empty function, consolidate both pte_empty and pmd_empty into page_empty (the code is actually identical) and use that to also test the validity of the pud.
Signed-off-by: Marc Zyngier marc.zyngier@arm.com Signed-off-by: Christoffer Dall christoffer.dall@linaro.org --- arch/arm/kvm/mmu.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-)
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 80a83ec..0988d9e 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -85,6 +85,12 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) return p; }
+static bool page_empty(void *ptr) +{ + struct page *ptr_page = virt_to_page(ptr); + return page_count(ptr_page) == 1; +} + static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) { pmd_t *pmd_table = pmd_offset(pud, 0); @@ -103,12 +109,6 @@ static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) put_page(virt_to_page(pmd)); }
-static bool pmd_empty(pmd_t *pmd) -{ - struct page *pmd_page = virt_to_page(pmd); - return page_count(pmd_page) == 1; -} - static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr) { if (pte_present(*pte)) { @@ -118,12 +118,6 @@ static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr) } }
-static bool pte_empty(pte_t *pte) -{ - struct page *pte_page = virt_to_page(pte); - return page_count(pte_page) == 1; -} - static void unmap_range(struct kvm *kvm, pgd_t *pgdp, unsigned long long start, u64 size) { @@ -153,10 +147,10 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp, next = addr + PAGE_SIZE;
/* If we emptied the pte, walk back up the ladder */ - if (pte_empty(pte)) { + if (page_empty(pte)) { clear_pmd_entry(kvm, pmd, addr); next = pmd_addr_end(addr, end); - if (pmd_empty(pmd)) { + if (page_empty(pmd) && !page_empty(pud)) { clear_pud_entry(kvm, pud, addr); next = pud_addr_end(addr, end); }
The 'len' variable was declared an unsigned and then checked for less than 0, which results in warnings on some compilers. Since len is assigned an int, make it an int.
Signed-off-by: Christoffer Dall christoffer.dall@linaro.org --- arch/arm/kvm/mmio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index b8e06b7..0c25d94 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -63,7 +63,8 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_exit_mmio *mmio) { - unsigned long rt, len; + unsigned long rt; + int len; bool is_write, sign_extend;
if (kvm_vcpu_dabt_isextabt(vcpu)) {
Il 12/08/2013 06:12, Christoffer Dall ha scritto:
The following changes since commit e769ece3b129698d2b09811a6f6d304e4eaa8c29:
KVM: s390: fix pfmf non-quiescing control handling (2013-07-29 09:02:30 +0200)
are available in the git repository at:
git://git.linaro.org/people/cdall/linux-kvm-arm.git tags/kvm-arm-fixes-3.11
for you to fetch changes up to 2184a60de26b94bc5a88de3e5a960ef9ff54ba5a:
KVM: ARM: Squash len warning (2013-08-11 21:03:39 -0700)
Christoffer Dall (3): ARM: KVM: Fix 64-bit coprocessor handling ARM: KVM: Fix unaligned unmap_range leak KVM: ARM: Squash len warning
Marc Zyngier (1): arm64: KVM: fix 2-level page tables unmapping
arch/arm/kvm/coproc.c | 26 +++++++++++++++++++------- arch/arm/kvm/coproc.h | 3 +++ arch/arm/kvm/coproc_a15.c | 6 +++++- arch/arm/kvm/mmio.c | 3 ++- arch/arm/kvm/mmu.c | 36 +++++++++++++++--------------------- 5 files changed, 44 insertions(+), 30 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Thanks, pulled and sent to Linus.
Paolo
linaro-kernel@lists.linaro.org