Commit 4936084c2ee227524c242d79 ("MIPS: Cleanup R10000_LLSC_WAR logic in atomic.h") introduce a mistake in atomic64_fetch_##op##_relaxed(), because it forget to delete R10000_LLSC_WAR in the if-condition. So fix it.
Fixes: 4936084c2ee227524c24 ("MIPS: Cleanup R10000_LLSC_WAR logic in atomic.h") Cc: stable@vger.kernel.org Cc: Joshua Kinard kumba@gentoo.org Signed-off-by: Huacai Chen chenhc@lemote.com --- arch/mips/include/asm/atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index e8fbfd4..43fcd35 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -313,7 +313,7 @@ static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \ { \ long result; \ \ - if (kernel_uses_llsc && R10000_LLSC_WAR) { \ + if (kernel_uses_llsc) { \ long temp; \ \ __asm__ __volatile__( \
On the Loongson-2G/2H/3A/3B there is a hardware flaw that ll/sc and lld/scd is very weak ordering. We should add sync instructions before each ll/lld and after the last sc/scd to workaround. Otherwise, this flaw will cause deadlock occationally (e.g. when doing heavy load test with LTP).
This patch is a less invasive but more comfortable solution, it is even not a enough solution in theory (but enough in practice). The solution which is enough in theory (which is more than a minimal change) looks too ugly, and you can refer to: https://patchwork.linux-mips.org/patch/21134/.
Why disable fix-loongson3-llsc in compiler? Because compiler fix will cause problems in kernel's .fixup section.
Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen chenhc@lemote.com --- arch/mips/include/asm/atomic.h | 4 ++++ arch/mips/include/asm/barrier.h | 6 ++++++ arch/mips/include/asm/bitops.h | 10 ++++++++++ arch/mips/include/asm/edac.h | 2 ++ arch/mips/include/asm/futex.h | 4 ++++ arch/mips/include/asm/pgtable.h | 2 ++ arch/mips/loongson64/Platform | 3 +++ arch/mips/mm/tlbex.c | 11 +++++++++++ 8 files changed, 42 insertions(+)
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 43fcd35..dbbc6c1 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -58,6 +58,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \ if (kernel_uses_llsc) { \ int temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set push \n" \ " .set "MIPS_ISA_LEVEL" \n" \ @@ -68,6 +69,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \ " .set pop \n" \ : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \ : "Ir" (i)); \ + loongson_llsc_mb(); \ } else { \ unsigned long flags; \ \ @@ -256,6 +258,7 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ if (kernel_uses_llsc) { \ long temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set push \n" \ " .set "MIPS_ISA_LEVEL" \n" \ @@ -266,6 +269,7 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ " .set pop \n" \ : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \ : "Ir" (i)); \ + loongson_llsc_mb(); \ } else { \ unsigned long flags; \ \ diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index a5eb1bb..7ce9a15 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h @@ -222,6 +222,12 @@ #define __smp_mb__before_atomic() __smp_mb__before_llsc() #define __smp_mb__after_atomic() smp_llsc_mb()
+#if defined(CONFIG_LOONGSON3) && defined(CONFIG_SMP) /* Loongson-3's LLSC workaround */ +#define loongson_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") +#else +#define loongson_llsc_mb() __asm__ __volatile__(" " : : :"memory") +#endif + #include <asm-generic/barrier.h>
#endif /* __ASM_BARRIER_H */ diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h index f2a840f..d70c7e5 100644 --- a/arch/mips/include/asm/bitops.h +++ b/arch/mips/include/asm/bitops.h @@ -69,6 +69,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) : "ir" (1UL << bit), GCC_OFF_SMALL_ASM() (*m)); #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) } else if (kernel_uses_llsc && __builtin_constant_p(bit)) { + loongson_llsc_mb(); do { __asm__ __volatile__( " " __LL "%0, %1 # set_bit \n" @@ -77,8 +78,10 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) : "ir" (bit), "r" (~0)); } while (unlikely(!temp)); + loongson_llsc_mb(); #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */ } else if (kernel_uses_llsc) { + loongson_llsc_mb(); do { __asm__ __volatile__( " .set push \n" @@ -90,6 +93,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) : "ir" (1UL << bit)); } while (unlikely(!temp)); + loongson_llsc_mb(); } else __mips_set_bit(nr, addr); } @@ -123,6 +127,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) : "ir" (~(1UL << bit))); #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) } else if (kernel_uses_llsc && __builtin_constant_p(bit)) { + loongson_llsc_mb(); do { __asm__ __volatile__( " " __LL "%0, %1 # clear_bit \n" @@ -131,8 +136,10 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) : "ir" (bit)); } while (unlikely(!temp)); + loongson_llsc_mb(); #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */ } else if (kernel_uses_llsc) { + loongson_llsc_mb(); do { __asm__ __volatile__( " .set push \n" @@ -144,6 +151,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) : "ir" (~(1UL << bit))); } while (unlikely(!temp)); + loongson_llsc_mb(); } else __mips_clear_bit(nr, addr); } @@ -193,6 +201,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr) unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); unsigned long temp;
+ loongson_llsc_mb(); do { __asm__ __volatile__( " .set push \n" @@ -204,6 +213,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr) : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) : "ir" (1UL << bit)); } while (unlikely(!temp)); + loongson_llsc_mb(); } else __mips_change_bit(nr, addr); } diff --git a/arch/mips/include/asm/edac.h b/arch/mips/include/asm/edac.h index c5d1477..1abf69c 100644 --- a/arch/mips/include/asm/edac.h +++ b/arch/mips/include/asm/edac.h @@ -20,6 +20,7 @@ static inline void edac_atomic_scrub(void *va, u32 size) * Intel: asm("lock; addl $0, %0"::"m"(*virt_addr)); */
+ loongson_llsc_mb(); __asm__ __volatile__ ( " .set push \n" " .set mips2 \n" @@ -30,6 +31,7 @@ static inline void edac_atomic_scrub(void *va, u32 size) " .set pop \n" : "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*virt_addr) : GCC_OFF_SMALL_ASM() (*virt_addr)); + loongson_llsc_mb();
virt_addr++; } diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h index 8eff134..07b0be8 100644 --- a/arch/mips/include/asm/futex.h +++ b/arch/mips/include/asm/futex.h @@ -50,6 +50,7 @@ "i" (-EFAULT) \ : "memory"); \ } else if (cpu_has_llsc) { \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set push \n" \ " .set noat \n" \ @@ -78,6 +79,7 @@ : "0" (0), GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oparg), \ "i" (-EFAULT) \ : "memory"); \ + loongson_llsc_mb(); \ } else \ ret = -ENOSYS; \ } @@ -163,6 +165,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, "i" (-EFAULT) : "memory"); } else if (cpu_has_llsc) { + loongson_llsc_mb(); __asm__ __volatile__( "# futex_atomic_cmpxchg_inatomic \n" " .set push \n" @@ -192,6 +195,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT) : "memory"); + loongson_llsc_mb(); } else return -ENOSYS;
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 57933fc..910851c 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -228,6 +228,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) : [global] "r" (page_global)); } else if (kernel_uses_llsc) { + loongson_llsc_mb(); __asm__ __volatile__ ( " .set push \n" " .set "MIPS_ISA_ARCH_LEVEL" \n" @@ -242,6 +243,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) " .set pop \n" : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) : [global] "r" (page_global)); + loongson_llsc_mb(); } #else /* !CONFIG_SMP */ if (pte_none(*buddy)) diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform index 0fce460..3700dcf 100644 --- a/arch/mips/loongson64/Platform +++ b/arch/mips/loongson64/Platform @@ -23,6 +23,9 @@ ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS endif
cflags-$(CONFIG_CPU_LOONGSON3) += -Wa,--trap +ifneq ($(call as-option,-Wa$(comma)-mfix-loongson3-llsc,),) + cflags-$(CONFIG_CPU_LOONGSON3) += -Wa$(comma)-mno-fix-loongson3-llsc +endif # # binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a # as MIPS64 R2; older versions as just R1. This leaves the possibility open diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 37b1cb2..e1a4637 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -932,6 +932,8 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, * to mimic that here by taking a load/istream page * fault. */ + if (current_cpu_type() == CPU_LOONGSON3) + uasm_i_sync(p, 0); UASM_i_LA(p, ptr, (unsigned long)tlb_do_page_fault_0); uasm_i_jr(p, ptr);
@@ -1556,6 +1558,7 @@ static void build_loongson3_tlb_refill_handler(void)
if (check_for_high_segbits) { uasm_l_large_segbits_fault(&l, p); + uasm_i_sync(&p, 0); UASM_i_LA(&p, K1, (unsigned long)tlb_do_page_fault_0); uasm_i_jr(&p, K1); uasm_i_nop(&p); @@ -1646,6 +1649,8 @@ static void iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr) { #ifdef CONFIG_SMP + if (current_cpu_type() == CPU_LOONGSON3) + uasm_i_sync(p, 0); # ifdef CONFIG_PHYS_ADDR_T_64BIT if (cpu_has_64bits) uasm_i_lld(p, pte, 0, ptr); @@ -2259,6 +2264,8 @@ static void build_r4000_tlb_load_handler(void) #endif
uasm_l_nopage_tlbl(&l, p); + if (current_cpu_type() == CPU_LOONGSON3) + uasm_i_sync(&p, 0); build_restore_work_registers(&p); #ifdef CONFIG_CPU_MICROMIPS if ((unsigned long)tlb_do_page_fault_0 & 1) { @@ -2313,6 +2320,8 @@ static void build_r4000_tlb_store_handler(void) #endif
uasm_l_nopage_tlbs(&l, p); + if (current_cpu_type() == CPU_LOONGSON3) + uasm_i_sync(&p, 0); build_restore_work_registers(&p); #ifdef CONFIG_CPU_MICROMIPS if ((unsigned long)tlb_do_page_fault_1 & 1) { @@ -2368,6 +2377,8 @@ static void build_r4000_tlb_modify_handler(void) #endif
uasm_l_nopage_tlbm(&l, p); + if (current_cpu_type() == CPU_LOONGSON3) + uasm_i_sync(&p, 0); build_restore_work_registers(&p); #ifdef CONFIG_CPU_MICROMIPS if ((unsigned long)tlb_do_page_fault_1 & 1) {
在 2018年12月25日,上午8:51,Huacai Chen chenhc@lemote.com 写道:
On the Loongson-2G/2H/3A/3B there is a hardware flaw that ll/sc and lld/scd is very weak ordering. We should add sync instructions before each ll/lld and after the last sc/scd to workaround. Otherwise, this flaw will cause deadlock occationally (e.g. when doing heavy load test with LTP).
This patch is a less invasive but more comfortable solution, it is even not a enough solution in theory (but enough in practice). The solution which is enough in theory (which is more than a minimal change) looks too ugly, and you can refer to: https://patchwork.linux-mips.org/patch/21134/.
Why disable fix-loongson3-llsc in compiler? Because compiler fix will cause problems in kernel's .fixup section.
It is __ex_table section.
Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen chenhc@lemote.com
arch/mips/include/asm/atomic.h | 4 ++++ arch/mips/include/asm/barrier.h | 6 ++++++ arch/mips/include/asm/bitops.h | 10 ++++++++++ arch/mips/include/asm/edac.h | 2 ++ arch/mips/include/asm/futex.h | 4 ++++ arch/mips/include/asm/pgtable.h | 2 ++ arch/mips/loongson64/Platform | 3 +++ arch/mips/mm/tlbex.c | 11 +++++++++++ 8 files changed, 42 insertions(+)
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 43fcd35..dbbc6c1 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -58,6 +58,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \ if (kernel_uses_llsc) { \ int temp; \ \
loongson_llsc_mb(); \
I noticed that we have smp_mb__before_llsc And smp_llsc_mb
There are smp_mb__before_llsc/smp_llsc_mb in some situation, and it is not in some situation?
Why is it like this? I guess we should add smp_mb__before_llsc/smp_llsc_mb pairs for all ll/sc.
__asm__ __volatile__( \ " .set push \n" \ " .set "MIPS_ISA_LEVEL" \n" \
@@ -68,6 +69,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \ " .set pop \n" \ : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \ : "Ir" (i)); \
} else { \ unsigned long flags; \ \loongson_llsc_mb(); \
@@ -256,6 +258,7 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ if (kernel_uses_llsc) { \ long temp; \ \
__asm__ __volatile__( \ " .set push \n" \ " .set "MIPS_ISA_LEVEL" \n" \loongson_llsc_mb(); \
@@ -266,6 +269,7 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ " .set pop \n" \ : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \ : "Ir" (i)); \
} else { \ unsigned long flags; \ \loongson_llsc_mb(); \
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index a5eb1bb..7ce9a15 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h @@ -222,6 +222,12 @@ #define __smp_mb__before_atomic() __smp_mb__before_llsc() #define __smp_mb__after_atomic() smp_llsc_mb()
+#if defined(CONFIG_LOONGSON3) && defined(CONFIG_SMP) /* Loongson-3's LLSC workaround */ +#define loongson_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") +#else +#define loongson_llsc_mb() __asm__ __volatile__(" " : : :"memory") +#endif
#include <asm-generic/barrier.h>
#endif /* __ASM_BARRIER_H */ diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h index f2a840f..d70c7e5 100644 --- a/arch/mips/include/asm/bitops.h +++ b/arch/mips/include/asm/bitops.h @@ -69,6 +69,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) : "ir" (1UL << bit), GCC_OFF_SMALL_ASM() (*m)); #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) } else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
do { __asm__ __volatile__( " " __LL "%0, %1 # set_bit \n"loongson_llsc_mb();
@@ -77,8 +78,10 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) : "ir" (bit), "r" (~0)); } while (unlikely(!temp));
loongson_llsc_mb();
#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */ } else if (kernel_uses_llsc) {
do { __asm__ __volatile__( " .set push \n"loongson_llsc_mb();
@@ -90,6 +93,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) : "ir" (1UL << bit)); } while (unlikely(!temp));
} else __mips_set_bit(nr, addr);loongson_llsc_mb();
} @@ -123,6 +127,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) : "ir" (~(1UL << bit))); #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) } else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
do { __asm__ __volatile__( " " __LL "%0, %1 # clear_bit \n"loongson_llsc_mb();
@@ -131,8 +136,10 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) : "ir" (bit)); } while (unlikely(!temp));
loongson_llsc_mb();
#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */ } else if (kernel_uses_llsc) {
do { __asm__ __volatile__( " .set push \n"loongson_llsc_mb();
@@ -144,6 +151,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) : "ir" (~(1UL << bit))); } while (unlikely(!temp));
} else __mips_clear_bit(nr, addr);loongson_llsc_mb();
} @@ -193,6 +201,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr) unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); unsigned long temp;
do { __asm__ __volatile__( " .set push \n"loongson_llsc_mb();
@@ -204,6 +213,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr) : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) : "ir" (1UL << bit)); } while (unlikely(!temp));
} else __mips_change_bit(nr, addr);loongson_llsc_mb();
} diff --git a/arch/mips/include/asm/edac.h b/arch/mips/include/asm/edac.h index c5d1477..1abf69c 100644 --- a/arch/mips/include/asm/edac.h +++ b/arch/mips/include/asm/edac.h @@ -20,6 +20,7 @@ static inline void edac_atomic_scrub(void *va, u32 size) * Intel: asm("lock; addl $0, %0"::"m"(*virt_addr)); */
__asm__ __volatile__ ( " .set push \n" " .set mips2 \n"loongson_llsc_mb();
@@ -30,6 +31,7 @@ static inline void edac_atomic_scrub(void *va, u32 size) " .set pop \n" : "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*virt_addr) : GCC_OFF_SMALL_ASM() (*virt_addr));
loongson_llsc_mb();
virt_addr++; }
diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h index 8eff134..07b0be8 100644 --- a/arch/mips/include/asm/futex.h +++ b/arch/mips/include/asm/futex.h @@ -50,6 +50,7 @@ "i" (-EFAULT) \ : "memory"); \ } else if (cpu_has_llsc) { \
__asm__ __volatile__( \ " .set push \n" \ " .set noat \n" \loongson_llsc_mb(); \
@@ -78,6 +79,7 @@ : "0" (0), GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oparg), \ "i" (-EFAULT) \ : "memory"); \
} else \ ret = -ENOSYS; \loongson_llsc_mb(); \
} @@ -163,6 +165,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, "i" (-EFAULT) : "memory"); } else if (cpu_has_llsc) {
__asm__ __volatile__( "# futex_atomic_cmpxchg_inatomic \n" " .set push \n"loongson_llsc_mb();
@@ -192,6 +195,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT) : "memory");
} else return -ENOSYS;loongson_llsc_mb();
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 57933fc..910851c 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -228,6 +228,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) : [global] "r" (page_global)); } else if (kernel_uses_llsc) {
loongson_llsc_mb(); __asm__ __volatile__ ( " .set push \n" " .set "MIPS_ISA_ARCH_LEVEL" \n"
@@ -242,6 +243,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) " .set pop \n" : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) : [global] "r" (page_global));
}loongson_llsc_mb();
#else /* !CONFIG_SMP */ if (pte_none(*buddy)) diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform index 0fce460..3700dcf 100644 --- a/arch/mips/loongson64/Platform +++ b/arch/mips/loongson64/Platform @@ -23,6 +23,9 @@ ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS endif
cflags-$(CONFIG_CPU_LOONGSON3) += -Wa,--trap +ifneq ($(call as-option,-Wa$(comma)-mfix-loongson3-llsc,),)
- cflags-$(CONFIG_CPU_LOONGSON3) += -Wa$(comma)-mno-fix-loongson3-llsc
+endif # # binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a # as MIPS64 R2; older versions as just R1. This leaves the possibility open diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 37b1cb2..e1a4637 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -932,6 +932,8 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, * to mimic that here by taking a load/istream page * fault. */
if (current_cpu_type() == CPU_LOONGSON3)
UASM_i_LA(p, ptr, (unsigned long)tlb_do_page_fault_0); uasm_i_jr(p, ptr);uasm_i_sync(p, 0);
Here seems not about ll/sc problem as no ll/sc used here at all. While with this patch, it seems much more stable.
I tried to add sync before ll and at the target of branch in ll/sc, the stabilization improves, while The machine still hangs.
@@ -1556,6 +1558,7 @@ static void build_loongson3_tlb_refill_handler(void)
if (check_for_high_segbits) { uasm_l_large_segbits_fault(&l, p);
UASM_i_LA(&p, K1, (unsigned long)tlb_do_page_fault_0); uasm_i_jr(&p, K1); uasm_i_nop(&p);uasm_i_sync(&p, 0);
@@ -1646,6 +1649,8 @@ static void iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr) { #ifdef CONFIG_SMP
- if (current_cpu_type() == CPU_LOONGSON3)
uasm_i_sync(p, 0);
# ifdef CONFIG_PHYS_ADDR_T_64BIT if (cpu_has_64bits) uasm_i_lld(p, pte, 0, ptr); @@ -2259,6 +2264,8 @@ static void build_r4000_tlb_load_handler(void) #endif
uasm_l_nopage_tlbl(&l, p);
- if (current_cpu_type() == CPU_LOONGSON3)
build_restore_work_registers(&p);uasm_i_sync(&p, 0);
#ifdef CONFIG_CPU_MICROMIPS if ((unsigned long)tlb_do_page_fault_0 & 1) { @@ -2313,6 +2320,8 @@ static void build_r4000_tlb_store_handler(void) #endif
uasm_l_nopage_tlbs(&l, p);
- if (current_cpu_type() == CPU_LOONGSON3)
build_restore_work_registers(&p);uasm_i_sync(&p, 0);
#ifdef CONFIG_CPU_MICROMIPS if ((unsigned long)tlb_do_page_fault_1 & 1) { @@ -2368,6 +2377,8 @@ static void build_r4000_tlb_modify_handler(void) #endif
uasm_l_nopage_tlbm(&l, p);
- if (current_cpu_type() == CPU_LOONGSON3)
build_restore_work_registers(&p);uasm_i_sync(&p, 0);
#ifdef CONFIG_CPU_MICROMIPS if ((unsigned long)tlb_do_page_fault_1 & 1) {
Hi, Yunqiang,
1, Only Loongson need this barrier, so I don't use smp_mb__before_llsc/smp_llsc_mb, but introduce a new barrier. 2, atomic operations with _relax suffix doesn't need barrier, operations with _acquire/_release suffix and those without suffix will add needed barriers themselves.
Huacai
------------------ Original ------------------ From: "suyunqiang"wzssyqa@icloud.com; Date: Tue, Dec 25, 2018 08:24 PM To: "Huacai Chen"chenhc@lemote.com; Cc: "Ralf Baechle"ralf@linux-mips.org; "Paul Burton"paul.burton@mips.com; "Steven J . Hill"Steven.Hill@cavium.com; "linux-mips"linux-mips@linux-mips.org; "Fuxin Zhang"zhangfx@lemote.com; "wuzhangjin"wuzhangjin@gmail.com; "stable"stable@vger.kernel.org; Subject: Re: [PATCH 2/2] MIPS: Loongson: Introduce and use loongson_llsc_mb()
在 2018年12月25日,上午8:51,Huacai Chen chenhc@lemote.com 写道:
On the Loongson-2G/2H/3A/3B there is a hardware flaw that ll/sc and lld/scd is very weak ordering. We should add sync instructions before each ll/lld and after the last sc/scd to workaround. Otherwise, this flaw will cause deadlock occationally (e.g. when doing heavy load test with LTP).
This patch is a less invasive but more comfortable solution, it is even not a enough solution in theory (but enough in practice). The solution which is enough in theory (which is more than a minimal change) looks too ugly, and you can refer to: https://patchwork.linux-mips.org/patch/21134/.
Why disable fix-loongson3-llsc in compiler? Because compiler fix will cause problems in kernel's .fixup section.
It is __ex_table section.
Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen chenhc@lemote.com
arch/mips/include/asm/atomic.h | 4 ++++ arch/mips/include/asm/barrier.h | 6 ++++++ arch/mips/include/asm/bitops.h | 10 ++++++++++ arch/mips/include/asm/edac.h | 2 ++ arch/mips/include/asm/futex.h | 4 ++++ arch/mips/include/asm/pgtable.h | 2 ++ arch/mips/loongson64/Platform | 3 +++ arch/mips/mm/tlbex.c | 11 +++++++++++ 8 files changed, 42 insertions(+)
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 43fcd35..dbbc6c1 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -58,6 +58,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \ if (kernel_uses_llsc) { \ int temp; \ \
- loongson_llsc_mb(); \
I noticed that we have smp_mb__before_llsc And smp_llsc_mb
There are smp_mb__before_llsc/smp_llsc_mb in some situation, and it is not in some situation?
Why is it like this? I guess we should add smp_mb__before_llsc/smp_llsc_mb pairs for all ll/sc.
__asm__ __volatile__( \ " .set push \n" \ " .set "MIPS_ISA_LEVEL" \n" \ @@ -68,6 +69,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \ " .set pop \n" \ : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \ : "Ir" (i)); \
- loongson_llsc_mb(); \
} else { \ unsigned long flags; \ \ @@ -256,6 +258,7 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ if (kernel_uses_llsc) { \ long temp; \ \
- loongson_llsc_mb(); \
__asm__ __volatile__( \ " .set push \n" \ " .set "MIPS_ISA_LEVEL" \n" \ @@ -266,6 +269,7 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ " .set pop \n" \ : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \ : "Ir" (i)); \
- loongson_llsc_mb(); \
} else { \ unsigned long flags; \ \ diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index a5eb1bb..7ce9a15 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h @@ -222,6 +222,12 @@ #define __smp_mb__before_atomic() __smp_mb__before_llsc() #define __smp_mb__after_atomic() smp_llsc_mb()
+#if defined(CONFIG_LOONGSON3) && defined(CONFIG_SMP) /* Loongson-3's LLSC workaround */ +#define loongson_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") +#else +#define loongson_llsc_mb() __asm__ __volatile__(" " : : :"memory") +#endif
#include <asm-generic/barrier.h>
#endif /* __ASM_BARRIER_H */ diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h index f2a840f..d70c7e5 100644 --- a/arch/mips/include/asm/bitops.h +++ b/arch/mips/include/asm/bitops.h @@ -69,6 +69,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) : "ir" (1UL << bit), GCC_OFF_SMALL_ASM() (*m)); #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) } else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
- loongson_llsc_mb();
do { __asm__ __volatile__( " " __LL "%0, %1 # set_bit \n" @@ -77,8 +78,10 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) : "ir" (bit), "r" (~0)); } while (unlikely(!temp));
- loongson_llsc_mb();
#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */ } else if (kernel_uses_llsc) {
- loongson_llsc_mb();
do { __asm__ __volatile__( " .set push \n" @@ -90,6 +93,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) : "ir" (1UL << bit)); } while (unlikely(!temp));
- loongson_llsc_mb();
} else __mips_set_bit(nr, addr); } @@ -123,6 +127,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) : "ir" (~(1UL << bit))); #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) } else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
- loongson_llsc_mb();
do { __asm__ __volatile__( " " __LL "%0, %1 # clear_bit \n" @@ -131,8 +136,10 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) : "ir" (bit)); } while (unlikely(!temp));
- loongson_llsc_mb();
#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */ } else if (kernel_uses_llsc) {
- loongson_llsc_mb();
do { __asm__ __volatile__( " .set push \n" @@ -144,6 +151,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) : "ir" (~(1UL << bit))); } while (unlikely(!temp));
- loongson_llsc_mb();
} else __mips_clear_bit(nr, addr); } @@ -193,6 +201,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr) unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); unsigned long temp;
- loongson_llsc_mb();
do { __asm__ __volatile__( " .set push \n" @@ -204,6 +213,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr) : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) : "ir" (1UL << bit)); } while (unlikely(!temp));
- loongson_llsc_mb();
} else __mips_change_bit(nr, addr); } diff --git a/arch/mips/include/asm/edac.h b/arch/mips/include/asm/edac.h index c5d1477..1abf69c 100644 --- a/arch/mips/include/asm/edac.h +++ b/arch/mips/include/asm/edac.h @@ -20,6 +20,7 @@ static inline void edac_atomic_scrub(void *va, u32 size)
- Intel: asm("lock; addl $0, %0"::"m"(*virt_addr));
*/
- loongson_llsc_mb();
__asm__ __volatile__ ( " .set push \n" " .set mips2 \n" @@ -30,6 +31,7 @@ static inline void edac_atomic_scrub(void *va, u32 size) " .set pop \n" : "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*virt_addr) : GCC_OFF_SMALL_ASM() (*virt_addr));
- loongson_llsc_mb();
virt_addr++; } diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h index 8eff134..07b0be8 100644 --- a/arch/mips/include/asm/futex.h +++ b/arch/mips/include/asm/futex.h @@ -50,6 +50,7 @@ "i" (-EFAULT) \ : "memory"); \ } else if (cpu_has_llsc) { \
- loongson_llsc_mb(); \
__asm__ __volatile__( \ " .set push \n" \ " .set noat \n" \ @@ -78,6 +79,7 @@ : "0" (0), GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oparg), \ "i" (-EFAULT) \ : "memory"); \
- loongson_llsc_mb(); \
} else \ ret = -ENOSYS; \ } @@ -163,6 +165,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, "i" (-EFAULT) : "memory"); } else if (cpu_has_llsc) {
- loongson_llsc_mb();
__asm__ __volatile__( "# futex_atomic_cmpxchg_inatomic \n" " .set push \n" @@ -192,6 +195,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT) : "memory");
- loongson_llsc_mb();
} else return -ENOSYS;
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 57933fc..910851c 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -228,6 +228,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) : [global] "r" (page_global)); } else if (kernel_uses_llsc) {
- loongson_llsc_mb();
__asm__ __volatile__ ( " .set push \n" " .set "MIPS_ISA_ARCH_LEVEL" \n" @@ -242,6 +243,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) " .set pop \n" : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) : [global] "r" (page_global));
- loongson_llsc_mb();
} #else /* !CONFIG_SMP */ if (pte_none(*buddy)) diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform index 0fce460..3700dcf 100644 --- a/arch/mips/loongson64/Platform +++ b/arch/mips/loongson64/Platform @@ -23,6 +23,9 @@ ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS endif
cflags-$(CONFIG_CPU_LOONGSON3) += -Wa,--trap +ifneq ($(call as-option,-Wa$(comma)-mfix-loongson3-llsc,),)
- cflags-$(CONFIG_CPU_LOONGSON3) += -Wa$(comma)-mno-fix-loongson3-llsc
+endif # # binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a # as MIPS64 R2; older versions as just R1. This leaves the possibility open diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 37b1cb2..e1a4637 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -932,6 +932,8 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
- to mimic that here by taking a load/istream page
- fault.
*/
- if (current_cpu_type() == CPU_LOONGSON3)
- uasm_i_sync(p, 0);
UASM_i_LA(p, ptr, (unsigned long)tlb_do_page_fault_0); uasm_i_jr(p, ptr);
Here seems not about ll/sc problem as no ll/sc used here at all. While with this patch, it seems much more stable.
I tried to add sync before ll and at the target of branch in ll/sc, the stabilization improves, while The machine still hangs.
@@ -1556,6 +1558,7 @@ static void build_loongson3_tlb_refill_handler(void)
if (check_for_high_segbits) { uasm_l_large_segbits_fault(&l, p);
- uasm_i_sync(&p, 0);
UASM_i_LA(&p, K1, (unsigned long)tlb_do_page_fault_0); uasm_i_jr(&p, K1); uasm_i_nop(&p); @@ -1646,6 +1649,8 @@ static void iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr) { #ifdef CONFIG_SMP
- if (current_cpu_type() == CPU_LOONGSON3)
- uasm_i_sync(p, 0);
# ifdef CONFIG_PHYS_ADDR_T_64BIT if (cpu_has_64bits) uasm_i_lld(p, pte, 0, ptr); @@ -2259,6 +2264,8 @@ static void build_r4000_tlb_load_handler(void) #endif
uasm_l_nopage_tlbl(&l, p);
- if (current_cpu_type() == CPU_LOONGSON3)
- uasm_i_sync(&p, 0);
build_restore_work_registers(&p); #ifdef CONFIG_CPU_MICROMIPS if ((unsigned long)tlb_do_page_fault_0 & 1) { @@ -2313,6 +2320,8 @@ static void build_r4000_tlb_store_handler(void) #endif
uasm_l_nopage_tlbs(&l, p);
- if (current_cpu_type() == CPU_LOONGSON3)
- uasm_i_sync(&p, 0);
build_restore_work_registers(&p); #ifdef CONFIG_CPU_MICROMIPS if ((unsigned long)tlb_do_page_fault_1 & 1) { @@ -2368,6 +2377,8 @@ static void build_r4000_tlb_modify_handler(void) #endif
uasm_l_nopage_tlbm(&l, p);
- if (current_cpu_type() == CPU_LOONGSON3)
- uasm_i_sync(&p, 0);
build_restore_work_registers(&p); #ifdef CONFIG_CPU_MICROMIPS if ((unsigned long)tlb_do_page_fault_1 & 1) {
Hello,
Huacai Chen wrote:
Commit 4936084c2ee227524c242d79 ("MIPS: Cleanup R10000_LLSC_WAR logic in atomic.h") introduce a mistake in atomic64_fetch_##op##_relaxed(), because it forget to delete R10000_LLSC_WAR in the if-condition. So fix it.
Fixes: 4936084c2ee227524c24 ("MIPS: Cleanup R10000_LLSC_WAR logic in atomic.h") Cc: stable@vger.kernel.org Cc: Joshua Kinard kumba@gentoo.org Signed-off-by: Huacai Chen chenhc@lemote.com
Applied to mips-fixes.
Thanks, Paul
[ This message was auto-generated; if you believe anything is incorrect then please email paul.burton@mips.com to report it. ]
linux-stable-mirror@lists.linaro.org