From: "Aneesh Kumar K.V" aneesh.kumar@linux.ibm.com
commit 12e4d53f3f04e81f9e83d6fc10edc7314ab9f6b9 upstream.
Patch series "Fixup page directory freeing", v4.
This is a repost of patch series from Peter with the arch specific changes except ppc64 dropped. ppc64 changes are added here because we are redoing the patch series on top of ppc64 changes. This makes it easy to backport these changes. Only the first 2 patches need to be backported to stable.
The thing is, on anything SMP, freeing page directories should observe the exact same order as normal page freeing:
1) unhook page/directory 2) TLB invalidate 3) free page/directory
Without this, any concurrent page-table walk could end up with a Use-after-Free. This is esp. trivial for anything that has software page-table walkers (HAVE_FAST_GUP / software TLB fill) or the hardware caches partial page-walks (ie. caches page directories).
Even on UP this might give issues since mmu_gather is preemptible these days. An interrupt or preempted task accessing user pages might stumble into the free page if the hardware caches page directories.
This patch series fixes ppc64 and add generic MMU_GATHER changes to support the conversion of other architectures. I haven't added patches w.r.t other architecture because they are yet to be acked.
This patch (of 9):
A followup patch is going to make sure we correctly invalidate page walk cache before we free page table pages. In order to keep things simple enable RCU_TABLE_FREE even for !SMP so that we don't have to fixup the !SMP case differently in the followup patch
!SMP case is right now broken for radix translation w.r.t page walk cache flush. We can get interrupted in between page table free and that would imply we have page walk cache entries pointing to tables which got freed already. Michael said "both our platforms that run on Power9 force SMP on in Kconfig, so the !SMP case is unlikely to be a problem for anyone in practice, unless they've hacked their kernel to build it !SMP."
Link: http://lkml.kernel.org/r/20200116064531.483522-2-aneesh.kumar@linux.ibm.com Signed-off-by: Aneesh Kumar K.V aneesh.kumar@linux.ibm.com Cc: stable@vger.kernel.org # 4.19 Signed-off-by: Santosh Sivaraj santosh@fossix.org [santosh: backported for 4.19 stable] Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org --- arch/powerpc/Kconfig | 2 +- arch/powerpc/include/asm/book3s/32/pgalloc.h | 8 -------- arch/powerpc/include/asm/book3s/64/pgalloc.h | 2 -- arch/powerpc/include/asm/nohash/32/pgalloc.h | 8 -------- arch/powerpc/include/asm/nohash/64/pgalloc.h | 9 +-------- arch/powerpc/mm/pgtable-book3s64.c | 7 ------- 6 files changed, 2 insertions(+), 34 deletions(-)
--- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -216,7 +216,7 @@ config PPC select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP - select HAVE_RCU_TABLE_FREE if SMP + select HAVE_RCU_TABLE_FREE select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE if PPC64 && CPU_LITTLE_ENDIAN --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h @@ -110,7 +110,6 @@ static inline void pgtable_free(void *ta #define check_pgt_cache() do { } while (0) #define get_hugepd_cache_index(x) (x)
-#ifdef CONFIG_SMP static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) { @@ -127,13 +126,6 @@ static inline void __tlb_remove_table(vo
pgtable_free(table, shift); } -#else -static inline void pgtable_free_tlb(struct mmu_gather *tlb, - void *table, int shift) -{ - pgtable_free(table, shift); -} -#endif
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -47,9 +47,7 @@ extern pmd_t *pmd_fragment_alloc(struct extern void pte_fragment_free(unsigned long *, int); extern void pmd_fragment_free(unsigned long *); extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift); -#ifdef CONFIG_SMP extern void __tlb_remove_table(void *_table); -#endif
static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm) { --- a/arch/powerpc/include/asm/nohash/32/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h @@ -111,7 +111,6 @@ static inline void pgtable_free(void *ta #define check_pgt_cache() do { } while (0) #define get_hugepd_cache_index(x) (x)
-#ifdef CONFIG_SMP static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) { @@ -128,13 +127,6 @@ static inline void __tlb_remove_table(vo
pgtable_free(table, shift); } -#else -static inline void pgtable_free_tlb(struct mmu_gather *tlb, - void *table, int shift) -{ - pgtable_free(table, shift); -} -#endif
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h @@ -142,7 +142,7 @@ static inline void pgtable_free(void *ta }
#define get_hugepd_cache_index(x) (x) -#ifdef CONFIG_SMP + static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) { unsigned long pgf = (unsigned long)table; @@ -160,13 +160,6 @@ static inline void __tlb_remove_table(vo pgtable_free(table, shift); }
-#else -static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) -{ - pgtable_free(table, shift); -} -#endif - static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) { --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -432,7 +432,6 @@ static inline void pgtable_free(void *ta } }
-#ifdef CONFIG_SMP void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index) { unsigned long pgf = (unsigned long)table; @@ -449,12 +448,6 @@ void __tlb_remove_table(void *_table)
return pgtable_free(table, index); } -#else -void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index) -{ - return pgtable_free(table, index); -} -#endif
#ifdef CONFIG_PROC_FS atomic_long_t direct_pages_count[MMU_PAGE_COUNT];