 
            commit 2a8859f373b0a86f0ece8ec8312607eacf12485d upstream.
FNAME(cmpxchg_gpte) is an inefficient mess. It is at least decent if it can go through get_user_pages_fast(), but if it cannot then it tries to use memremap(); that is not just terribly slow, it is also wrong because it assumes that the VM_PFNMAP VMA is contiguous.
The right way to do it would be to do the same thing as hva_to_pfn_remapped() does since commit add6a0cd1c5b ("KVM: MMU: try to fix up page faults before giving up", 2016-07-05), using follow_pte() and fixup_user_fault() to determine the correct address to use for memremap(). To do this, one could for example extract hva_to_pfn() for use outside virt/kvm/kvm_main.c. But really there is no reason to do that either, because there is already a perfectly valid address to do the cmpxchg() on, only it is a userspace address. That means doing user_access_begin()/user_access_end() and writing the code in assembly to handle any exception correctly. Worse, the guest PTE can be 8-byte even on i686 so there is the extra complication of using cmpxchg8b to account for. But at least it is an efficient mess.
Reported-by: Qiuhao Li qiuhao@sysec.org Reported-by: Gaoning Pan pgn@zju.edu.cn Reported-by: Yongkang Jia kangel@zju.edu.cn Reported-by: syzbot+6cde2282daa792c49ab8@syzkaller.appspotmail.com Debugged-by: Tadeusz Struk tadeusz.struk@linaro.org Tested-by: Maxim Levitsky mlevitsk@redhat.com Cc: stable@vger.kernel.org Fixes: bd53cb35a3e9 ("X86/KVM: Handle PFNs outside of kernel reach when touching GPTEs") Signed-off-by: Paolo Bonzini pbonzini@redhat.com --- arch/x86/kvm/paging_tmpl.h | 77 ++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 40 deletions(-)
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 97b21e7fd013..13b5c424adb2 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -34,9 +34,8 @@ #define PT_HAVE_ACCESSED_DIRTY(mmu) true #ifdef CONFIG_X86_64 #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL - #define CMPXCHG cmpxchg + #define CMPXCHG "cmpxchgq" #else - #define CMPXCHG cmpxchg64 #define PT_MAX_FULL_LEVELS 2 #endif #elif PTTYPE == 32 @@ -52,7 +51,7 @@ #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT #define PT_HAVE_ACCESSED_DIRTY(mmu) true - #define CMPXCHG cmpxchg + #define CMPXCHG "cmpxchgl" #elif PTTYPE == PTTYPE_EPT #define pt_element_t u64 #define guest_walker guest_walkerEPT @@ -65,8 +64,10 @@ #define PT_GUEST_DIRTY_SHIFT 9 #define PT_GUEST_ACCESSED_SHIFT 8 #define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad) - #define CMPXCHG cmpxchg64 #define PT_MAX_FULL_LEVELS 4 + #ifdef CONFIG_X86_64 + #define CMPXCHG "cmpxchgq" + #endif #else #error Invalid PTTYPE value #endif @@ -132,43 +133,39 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, pt_element_t __user *ptep_user, unsigned index, pt_element_t orig_pte, pt_element_t new_pte) { - int npages; - pt_element_t ret; - pt_element_t *table; - struct page *page; - - npages = get_user_pages_fast((unsigned long)ptep_user, 1, FOLL_WRITE, &page); - if (likely(npages == 1)) { - table = kmap_atomic(page); - ret = CMPXCHG(&table[index], orig_pte, new_pte); - kunmap_atomic(table); - - kvm_release_page_dirty(page); - } else { - struct vm_area_struct *vma; - unsigned long vaddr = (unsigned long)ptep_user & PAGE_MASK; - unsigned long pfn; - unsigned long paddr; - - down_read(¤t->mm->mmap_sem); - vma = find_vma_intersection(current->mm, vaddr, vaddr + PAGE_SIZE); - if (!vma || !(vma->vm_flags & VM_PFNMAP)) { - up_read(¤t->mm->mmap_sem); - return -EFAULT; - } - pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - paddr = pfn << PAGE_SHIFT; - table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB); - if (!table) { - up_read(¤t->mm->mmap_sem); - return -EFAULT; - } - ret = CMPXCHG(&table[index], orig_pte, new_pte); - memunmap(table); - up_read(¤t->mm->mmap_sem); - } + int r = -EFAULT; + + if (!user_access_begin(ptep_user, sizeof(pt_element_t))) + return -EFAULT; + +#ifdef CMPXCHG + asm volatile("1:" LOCK_PREFIX CMPXCHG " %[new], %[ptr]\n" + "mov $0, %[r]\n" + "setnz %b[r]\n" + "2:" + _ASM_EXTABLE_UA(1b, 2b) + : [ptr] "+m" (*ptep_user), + [old] "+a" (orig_pte), + [r] "+q" (r) + : [new] "r" (new_pte) + : "memory"); +#else + asm volatile("1:" LOCK_PREFIX "cmpxchg8b %[ptr]\n" + "movl $0, %[r]\n" + "jz 2f\n" + "incl %[r]\n" + "2:" + _ASM_EXTABLE_UA(1b, 2b) + : [ptr] "+m" (*ptep_user), + [old] "+A" (orig_pte), + [r] "+rm" (r) + : [new_lo] "b" ((u32)new_pte), + [new_hi] "c" ((u32)(new_pte >> 32)) + : "memory"); +#endif
- return (ret != orig_pte); + user_access_end(); + return r; }
static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
 
            On Mon, Apr 04, 2022 at 11:49:13AM -0400, Paolo Bonzini wrote:
commit 2a8859f373b0a86f0ece8ec8312607eacf12485d upstream.
FNAME(cmpxchg_gpte) is an inefficient mess. It is at least decent if it can go through get_user_pages_fast(), but if it cannot then it tries to use memremap(); that is not just terribly slow, it is also wrong because it assumes that the VM_PFNMAP VMA is contiguous.
The right way to do it would be to do the same thing as hva_to_pfn_remapped() does since commit add6a0cd1c5b ("KVM: MMU: try to fix up page faults before giving up", 2016-07-05), using follow_pte() and fixup_user_fault() to determine the correct address to use for memremap(). To do this, one could for example extract hva_to_pfn() for use outside virt/kvm/kvm_main.c. But really there is no reason to do that either, because there is already a perfectly valid address to do the cmpxchg() on, only it is a userspace address. That means doing user_access_begin()/user_access_end() and writing the code in assembly to handle any exception correctly. Worse, the guest PTE can be 8-byte even on i686 so there is the extra complication of using cmpxchg8b to account for. But at least it is an efficient mess.
Reported-by: Qiuhao Li qiuhao@sysec.org Reported-by: Gaoning Pan pgn@zju.edu.cn Reported-by: Yongkang Jia kangel@zju.edu.cn Reported-by: syzbot+6cde2282daa792c49ab8@syzkaller.appspotmail.com Debugged-by: Tadeusz Struk tadeusz.struk@linaro.org Tested-by: Maxim Levitsky mlevitsk@redhat.com Cc: stable@vger.kernel.org Fixes: bd53cb35a3e9 ("X86/KVM: Handle PFNs outside of kernel reach when touching GPTEs") Signed-off-by: Paolo Bonzini pbonzini@redhat.com
arch/x86/kvm/paging_tmpl.h | 77 ++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 40 deletions(-)
Thanks for the fix, now queued up.
greg k-h
linux-stable-mirror@lists.linaro.org

