From: Kees Cook keescook@chromium.org
[ Upstream commit 32b143637e8180f5d5cea54320c769210dea4f19 ]
In commit 76624175dcae ("arm64: uaccess: consistently check object sizes"), the object size checks are moved outside the access_ok() so that bad destinations are detected before hitting the "memset(dest, 0, size)" in the copy_from_user() failure path.
This makes the same change for arm, with attention given to possibly extracting the uaccess routines into a common header file for all architectures in the future.
Suggested-by: Mark Rutland mark.rutland@arm.com Signed-off-by: Kees Cook keescook@chromium.org Signed-off-by: Russell King rmk+kernel@armlinux.org.uk Signed-off-by: Sasha Levin alexander.levin@verizon.com --- arch/arm/include/asm/uaccess.h | 44 ++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 12 deletions(-)
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 1f59ea051bab..b7e0125c0bbf 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -478,11 +478,10 @@ extern unsigned long __must_check arm_copy_from_user(void *to, const void __user *from, unsigned long n);
static inline unsigned long __must_check -__copy_from_user(void *to, const void __user *from, unsigned long n) +__arch_copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned int __ua_flags;
- check_object_size(to, n, false); __ua_flags = uaccess_save_and_enable(); n = arm_copy_from_user(to, from, n); uaccess_restore(__ua_flags); @@ -495,18 +494,15 @@ extern unsigned long __must_check __copy_to_user_std(void __user *to, const void *from, unsigned long n);
static inline unsigned long __must_check -__copy_to_user(void __user *to, const void *from, unsigned long n) +__arch_copy_to_user(void __user *to, const void *from, unsigned long n) { #ifndef CONFIG_UACCESS_WITH_MEMCPY unsigned int __ua_flags; - - check_object_size(from, n, true); __ua_flags = uaccess_save_and_enable(); n = arm_copy_to_user(to, from, n); uaccess_restore(__ua_flags); return n; #else - check_object_size(from, n, true); return arm_copy_to_user(to, from, n); #endif } @@ -526,25 +522,49 @@ __clear_user(void __user *addr, unsigned long n) }
#else -#define __copy_from_user(to, from, n) (memcpy(to, (void __force *)from, n), 0) -#define __copy_to_user(to, from, n) (memcpy((void __force *)to, from, n), 0) +#define __arch_copy_from_user(to, from, n) \ + (memcpy(to, (void __force *)from, n), 0) +#define __arch_copy_to_user(to, from, n) \ + (memcpy((void __force *)to, from, n), 0) #define __clear_user(addr, n) (memset((void __force *)addr, 0, n), 0) #endif
-static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) +static inline unsigned long __must_check +__copy_from_user(void *to, const void __user *from, unsigned long n) +{ + check_object_size(to, n, false); + return __arch_copy_from_user(to, from, n); +} + +static inline unsigned long __must_check +copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long res = n; + + check_object_size(to, n, false); + if (likely(access_ok(VERIFY_READ, from, n))) - res = __copy_from_user(to, from, n); + res = __arch_copy_from_user(to, from, n); if (unlikely(res)) memset(to + (n - res), 0, res); return res; }
-static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) +static inline unsigned long __must_check +__copy_to_user(void __user *to, const void *from, unsigned long n) { + check_object_size(from, n, true); + + return __arch_copy_to_user(to, from, n); +} + +static inline unsigned long __must_check +copy_to_user(void __user *to, const void *from, unsigned long n) +{ + check_object_size(from, n, true); + if (access_ok(VERIFY_WRITE, to, n)) - n = __copy_to_user(to, from, n); + n = __arch_copy_to_user(to, from, n); return n; }
From: Dmitry Safonov dsafonov@virtuozzo.com
[ Upstream commit 2a4d0c627f5374f365a873dea4e10ae0bb437680 ]
Kernel erases R8..R11 registers prior returning to userspace from int80:
https://lkml.org/lkml/2009/10/1/164
GCC can reuse these registers and doesn't expect them to change during syscall invocation. I met this kind of bug in CRIU once GCC 6.1 and CLANG stored local variables in those registers and the kernel zerofied them during syscall:
https://github.com/xemul/criu/commit/990d33f1a1cdd17bca6c2eb059ab3be2564f7fa...
By that reason I suggest to add those registers to clobbers in selftests. Also, as noted by Andy - removed unneeded clobber for flags in INT $0x80 inline asm.
Signed-off-by: Dmitry Safonov dsafonov@virtuozzo.com Acked-by: Andy Lutomirski luto@kernel.org Cc: 0x7f454c46@gmail.com Cc: Borislav Petkov bp@alien8.de Cc: Borislav Petkov bp@suse.de Cc: Brian Gerst brgerst@gmail.com Cc: Denys Vlasenko dvlasenk@redhat.com Cc: H. Peter Anvin hpa@zytor.com Cc: Josh Poimboeuf jpoimboe@redhat.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Peter Zijlstra peterz@infradead.org Cc: Shuah Khan shuah@kernel.org Cc: Thomas Gleixner tglx@linutronix.de Cc: linux-kselftest@vger.kernel.org Link: http://lkml.kernel.org/r/20170213101336.20486-1-dsafonov@virtuozzo.com Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Sasha Levin alexander.levin@verizon.com --- tools/testing/selftests/x86/fsgsbase.c | 2 +- tools/testing/selftests/x86/ldt_gdt.c | 16 +++++++++++----- tools/testing/selftests/x86/ptrace_syscall.c | 3 ++- tools/testing/selftests/x86/single_step_syscall.c | 5 ++++- 4 files changed, 18 insertions(+), 8 deletions(-)
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c index 9b4610c6d3fb..f249e042b3b5 100644 --- a/tools/testing/selftests/x86/fsgsbase.c +++ b/tools/testing/selftests/x86/fsgsbase.c @@ -245,7 +245,7 @@ void do_unexpected_base(void) long ret; asm volatile ("int $0x80" : "=a" (ret) : "a" (243), "b" (low_desc) - : "flags"); + : "r8", "r9", "r10", "r11"); memcpy(&desc, low_desc, sizeof(desc)); munmap(low_desc, sizeof(desc));
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index e717fed80219..b9a22f18566a 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -45,6 +45,12 @@ #define AR_DB (1 << 22) #define AR_G (1 << 23)
+#ifdef __x86_64__ +# define INT80_CLOBBERS "r8", "r9", "r10", "r11" +#else +# define INT80_CLOBBERS +#endif + static int nerrs;
/* Points to an array of 1024 ints, each holding its own index. */ @@ -634,7 +640,7 @@ static int invoke_set_thread_area(void) asm volatile ("int $0x80" : "=a" (ret), "+m" (low_user_desc) : "a" (243), "b" (low_user_desc) - : "flags"); + : INT80_CLOBBERS); return ret; }
@@ -703,7 +709,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS);
if (sel != 0) { result = "FAIL"; @@ -734,7 +740,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS);
if (sel != 0) { result = "FAIL"; @@ -767,7 +773,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS);
#ifdef __x86_64__ syscall(SYS_arch_prctl, ARCH_GET_FS, &new_base); @@ -820,7 +826,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS);
#ifdef __x86_64__ syscall(SYS_arch_prctl, ARCH_GET_GS, &new_base); diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c index b037ce9cf116..eaea92439708 100644 --- a/tools/testing/selftests/x86/ptrace_syscall.c +++ b/tools/testing/selftests/x86/ptrace_syscall.c @@ -58,7 +58,8 @@ static void do_full_int80(struct syscall_args32 *args) asm volatile ("int $0x80" : "+a" (args->nr), "+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2), - "+S" (args->arg3), "+D" (args->arg4), "+r" (bp)); + "+S" (args->arg3), "+D" (args->arg4), "+r" (bp) + : : "r8", "r9", "r10", "r11"); args->arg5 = bp; #else sys32_helper(args, int80_and_ret); diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c index 50c26358e8b7..a48da95c18fd 100644 --- a/tools/testing/selftests/x86/single_step_syscall.c +++ b/tools/testing/selftests/x86/single_step_syscall.c @@ -56,9 +56,11 @@ static volatile sig_atomic_t sig_traps; #ifdef __x86_64__ # define REG_IP REG_RIP # define WIDTH "q" +# define INT80_CLOBBERS "r8", "r9", "r10", "r11" #else # define REG_IP REG_EIP # define WIDTH "l" +# define INT80_CLOBBERS #endif
static unsigned long get_eflags(void) @@ -140,7 +142,8 @@ int main()
printf("[RUN]\tSet TF and check int80\n"); set_eflags(get_eflags() | X86_EFLAGS_TF); - asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)); + asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid) + : INT80_CLOBBERS); check_result();
/*
Hi Sasha,
I would object including this to stable tree: 1. It's selftest fixup 2. I have never saw selftests failing because of it (but it's possible) I saw this in CRIU (Checkpoint Restore In Userspace) project triggering, so I've fixed the selftests, mostly for documentation reasons - as people in userspace can reuse this code and think it's good and will just work after copy-paste (which is not the case).
So, this patch doesn't look urgent to include it in -stable kernel.
Thanks, Dmitry
2017-11-29 17:21 GMT+00:00 alexander.levin@verizon.com:
From: Dmitry Safonov dsafonov@virtuozzo.com
[ Upstream commit 2a4d0c627f5374f365a873dea4e10ae0bb437680 ]
Kernel erases R8..R11 registers prior returning to userspace from int80:
https://lkml.org/lkml/2009/10/1/164
GCC can reuse these registers and doesn't expect them to change during syscall invocation. I met this kind of bug in CRIU once GCC 6.1 and CLANG stored local variables in those registers and the kernel zerofied them during syscall:
https://github.com/xemul/criu/commit/990d33f1a1cdd17bca6c2eb059ab3be2564f7fa...
By that reason I suggest to add those registers to clobbers in selftests. Also, as noted by Andy - removed unneeded clobber for flags in INT $0x80 inline asm.
Signed-off-by: Dmitry Safonov dsafonov@virtuozzo.com Acked-by: Andy Lutomirski luto@kernel.org Cc: 0x7f454c46@gmail.com Cc: Borislav Petkov bp@alien8.de Cc: Borislav Petkov bp@suse.de Cc: Brian Gerst brgerst@gmail.com Cc: Denys Vlasenko dvlasenk@redhat.com Cc: H. Peter Anvin hpa@zytor.com Cc: Josh Poimboeuf jpoimboe@redhat.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Peter Zijlstra peterz@infradead.org Cc: Shuah Khan shuah@kernel.org Cc: Thomas Gleixner tglx@linutronix.de Cc: linux-kselftest@vger.kernel.org Link: http://lkml.kernel.org/r/20170213101336.20486-1-dsafonov@virtuozzo.com Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Sasha Levin alexander.levin@verizon.com
tools/testing/selftests/x86/fsgsbase.c | 2 +- tools/testing/selftests/x86/ldt_gdt.c | 16 +++++++++++----- tools/testing/selftests/x86/ptrace_syscall.c | 3 ++- tools/testing/selftests/x86/single_step_syscall.c | 5 ++++- 4 files changed, 18 insertions(+), 8 deletions(-)
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c index 9b4610c6d3fb..f249e042b3b5 100644 --- a/tools/testing/selftests/x86/fsgsbase.c +++ b/tools/testing/selftests/x86/fsgsbase.c @@ -245,7 +245,7 @@ void do_unexpected_base(void) long ret; asm volatile ("int $0x80" : "=a" (ret) : "a" (243), "b" (low_desc)
: "flags");
: "r8", "r9", "r10", "r11"); memcpy(&desc, low_desc, sizeof(desc)); munmap(low_desc, sizeof(desc));
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index e717fed80219..b9a22f18566a 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -45,6 +45,12 @@ #define AR_DB (1 << 22) #define AR_G (1 << 23)
+#ifdef __x86_64__ +# define INT80_CLOBBERS "r8", "r9", "r10", "r11" +#else +# define INT80_CLOBBERS +#endif
static int nerrs;
/* Points to an array of 1024 ints, each holding its own index. */ @@ -634,7 +640,7 @@ static int invoke_set_thread_area(void) asm volatile ("int $0x80" : "=a" (ret), "+m" (low_user_desc) : "a" (243), "b" (low_user_desc)
: "flags");
: INT80_CLOBBERS); return ret;
}
@@ -703,7 +709,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
: "flags");
: INT80_CLOBBERS); if (sel != 0) { result = "FAIL";
@@ -734,7 +740,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
: "flags");
: INT80_CLOBBERS); if (sel != 0) { result = "FAIL";
@@ -767,7 +773,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
: "flags");
: INT80_CLOBBERS);
#ifdef __x86_64__ syscall(SYS_arch_prctl, ARCH_GET_FS, &new_base); @@ -820,7 +826,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
: "flags");
: INT80_CLOBBERS);
#ifdef __x86_64__ syscall(SYS_arch_prctl, ARCH_GET_GS, &new_base); diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c index b037ce9cf116..eaea92439708 100644 --- a/tools/testing/selftests/x86/ptrace_syscall.c +++ b/tools/testing/selftests/x86/ptrace_syscall.c @@ -58,7 +58,8 @@ static void do_full_int80(struct syscall_args32 *args) asm volatile ("int $0x80" : "+a" (args->nr), "+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2),
"+S" (args->arg3), "+D" (args->arg4), "+r" (bp));
"+S" (args->arg3), "+D" (args->arg4), "+r" (bp)
: : "r8", "r9", "r10", "r11"); args->arg5 = bp;
#else sys32_helper(args, int80_and_ret); diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c index 50c26358e8b7..a48da95c18fd 100644 --- a/tools/testing/selftests/x86/single_step_syscall.c +++ b/tools/testing/selftests/x86/single_step_syscall.c @@ -56,9 +56,11 @@ static volatile sig_atomic_t sig_traps; #ifdef __x86_64__ # define REG_IP REG_RIP # define WIDTH "q" +# define INT80_CLOBBERS "r8", "r9", "r10", "r11" #else # define REG_IP REG_EIP # define WIDTH "l" +# define INT80_CLOBBERS #endif
static unsigned long get_eflags(void) @@ -140,7 +142,8 @@ int main()
printf("[RUN]\tSet TF and check int80\n"); set_eflags(get_eflags() | X86_EFLAGS_TF);
asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid));
asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)
: INT80_CLOBBERS); check_result(); /*
-- 2.11.0
On Wed, Nov 29, 2017 at 05:42:25PM +0000, Dmitry Safonov wrote:
Hi Sasha,
I would object including this to stable tree:
- It's selftest fixup
- I have never saw selftests failing because of it (but it's possible) I saw this in CRIU (Checkpoint Restore In Userspace) project triggering, so I've fixed the selftests, mostly for documentation reasons - as people in userspace can reuse this code and think it's good and will just work after copy-paste (which is not the case).
So, this patch doesn't look urgent to include it in -stable kernel.
Hey Dmitry,
We try to backport selftest patches as stable material to allow a "complete" selftest to run on stable kernels.
From: Steffen Klassert steffen.klassert@secunet.com
[ Upstream commit e3dc847a5f85b43ee2bfc8eae407a7e383483228 ]
In vti6_xmit(), the check for IPV6_MIN_MTU before we send a ICMPV6_PKT_TOOBIG message is missing. So we might report a PMTU below 1280. Fix this by adding the required check.
Fixes: ccd740cbc6e ("vti6: Add pmtu handling to vti6_xmit.") Signed-off-by: Steffen Klassert steffen.klassert@secunet.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- net/ipv6/ip6_vti.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 816f79d1a8a3..6dfb7247c1ce 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -485,11 +485,15 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) if (!skb->ignore_df && skb->len > mtu) { skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu);
- if (skb->protocol == htons(ETH_P_IPV6)) + if (skb->protocol == htons(ETH_P_IPV6)) { + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - else + } else { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + }
return -EMSGSIZE; }
From: Ladislav Michl ladis@linux-mips.org
[ Upstream commit 7807e086a2d1f69cc1a57958cac04fea79fc2112 ]
gpmc_probe_onenand_child returns success even on gpmc_onenand_init failure. Fix that.
Signed-off-by: Ladislav Michl ladis@linux-mips.org Acked-by: Roger Quadros rogerq@ti.com Signed-off-by: Tony Lindgren tony@atomide.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- arch/arm/mach-omap2/gpmc-onenand.c | 10 ++++++---- drivers/memory/omap-gpmc.c | 4 +--- include/linux/omap-gpmc.h | 5 +++-- 3 files changed, 10 insertions(+), 9 deletions(-)
diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c index 8633c703546a..2944af820558 100644 --- a/arch/arm/mach-omap2/gpmc-onenand.c +++ b/arch/arm/mach-omap2/gpmc-onenand.c @@ -367,7 +367,7 @@ static int gpmc_onenand_setup(void __iomem *onenand_base, int *freq_ptr) return ret; }
-void gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data) +int gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data) { int err; struct device *dev = &gpmc_onenand_device.dev; @@ -393,15 +393,17 @@ void gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data) if (err < 0) { dev_err(dev, "Cannot request GPMC CS %d, error %d\n", gpmc_onenand_data->cs, err); - return; + return err; }
gpmc_onenand_resource.end = gpmc_onenand_resource.start + ONENAND_IO_SIZE - 1;
- if (platform_device_register(&gpmc_onenand_device) < 0) { + err = platform_device_register(&gpmc_onenand_device); + if (err) { dev_err(dev, "Unable to register OneNAND device\n"); gpmc_cs_free(gpmc_onenand_data->cs); - return; } + + return err; } diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c index 5457c361ad58..bf0fe0137dfe 100644 --- a/drivers/memory/omap-gpmc.c +++ b/drivers/memory/omap-gpmc.c @@ -1947,9 +1947,7 @@ static int gpmc_probe_onenand_child(struct platform_device *pdev, if (!of_property_read_u32(child, "dma-channel", &val)) gpmc_onenand_data->dma_channel = val;
- gpmc_onenand_init(gpmc_onenand_data); - - return 0; + return gpmc_onenand_init(gpmc_onenand_data); } #else static int gpmc_probe_onenand_child(struct platform_device *pdev, diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h index 35d0fd7a4948..e821a3132a3e 100644 --- a/include/linux/omap-gpmc.h +++ b/include/linux/omap-gpmc.h @@ -88,10 +88,11 @@ static inline int gpmc_nand_init(struct omap_nand_platform_data *d, #endif
#if IS_ENABLED(CONFIG_MTD_ONENAND_OMAP2) -extern void gpmc_onenand_init(struct omap_onenand_platform_data *d); +extern int gpmc_onenand_init(struct omap_onenand_platform_data *d); #else #define board_onenand_data NULL -static inline void gpmc_onenand_init(struct omap_onenand_platform_data *d) +static inline int gpmc_onenand_init(struct omap_onenand_platform_data *d) { + return 0; } #endif
From: Andrew Banman abanman@hpe.com
[ Upstream commit 1b17c6df852851b40c3c27c66b8fa2fd99cf25d8 ]
Writing to the software acknowledge clear register when there are no pending messages causes a HUB error to assert. The original intent of this write was to clear the pending bits before start of operation, but this is an incorrect method and has been determined to be unnecessary.
Signed-off-by: Andrew Banman abanman@hpe.com Acked-by: Mike Travis mike.travis@hpe.com Cc: Andy Lutomirski luto@kernel.org Cc: Borislav Petkov bp@alien8.de Cc: Brian Gerst brgerst@gmail.com Cc: Denys Vlasenko dvlasenk@redhat.com Cc: H. Peter Anvin hpa@zytor.com Cc: Josh Poimboeuf jpoimboe@redhat.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: akpm@linux-foundation.org Cc: rja@hpe.com Cc: sivanich@hpe.com Link: http://lkml.kernel.org/r/1487351269-181133-1-git-send-email-abanman@hpe.com Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Sasha Levin alexander.levin@verizon.com --- arch/x86/platform/uv/tlb_uv.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 9e42842e924a..0f0175186f1b 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1848,7 +1848,6 @@ static void pq_init(int node, int pnode)
ops.write_payload_first(pnode, first); ops.write_payload_last(pnode, last); - ops.write_g_sw_ack(pnode, 0xffffUL);
/* in effect, all msg_type's are set to MSG_NOOP */ memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE);
From: Paul Mackerras paulus@ozlabs.org
[ Upstream commit 7a70d7288c926ae88e0c773fbb506aa374e99c2d ]
The POWER9 MMU reads and caches entries from the process table. When we kexec from one kernel to another, the second kernel sets its process table pointer but doesn't currently do anything to make the CPU invalidate any cached entries from the old process table. This adds a tlbie (TLB invalidate entry) instruction with parameters to invalidate caching of the process table after the new process table is installed.
Signed-off-by: Paul Mackerras paulus@ozlabs.org Signed-off-by: Michael Ellerman mpe@ellerman.id.au Signed-off-by: Sasha Levin alexander.levin@verizon.com --- arch/powerpc/mm/pgtable-radix.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 9a25dce87875..44c33ee397a0 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -173,6 +173,10 @@ redo: */ register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12); pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd); + asm volatile("ptesync" : : : "memory"); + asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : + "r" (TLBIEL_INVAL_SET_LPID), "r" (0)); + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); }
static void __init radix_init_partition_table(void)
From: Thomas Gleixner tglx@linutronix.de
[ Upstream commit bb1a2c26165640ba2cbcfe06c81e9f9d6db4e643 ]
Sergey reported a might sleep warning triggered from the hpet resume path. It's caused by the call to disable_irq() from interrupt disabled context.
The problem with the low level resume code is that it is not accounted as a special system_state like we do during the boot process. Calling the same code during system boot would not trigger the warning. That's inconsistent at best.
In this particular case it's trivial to replace the disable_irq() with disable_hardirq() because this particular code path is solely used from system resume and the involved hpet interrupts can never be force threaded.
Reported-and-tested-by: Sergey Senozhatsky sergey.senozhatsky.work@gmail.com Signed-off-by: Thomas Gleixner tglx@linutronix.de Cc: Peter Zijlstra peterz@infradead.org Cc: "Rafael J. Wysocki" rjw@sisk.pl Cc: Sergey Senozhatsky sergey.senozhatsky@gmail.com Cc: Borislav Petkov bp@alien8.de Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1703012108460.3684@nanos Signed-off-by: Thomas Gleixner tglx@linutronix.de Signed-off-by: Sasha Levin alexander.levin@verizon.com --- arch/x86/kernel/hpet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 932348fbb6ea..9512529e8eab 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -354,7 +354,7 @@ static int hpet_resume(struct clock_event_device *evt, int timer)
irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq)); irq_domain_activate_irq(irq_get_irq_data(hdev->irq)); - disable_irq(hdev->irq); + disable_hardirq(hdev->irq); irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); enable_irq(hdev->irq); }
From: Peter Zijlstra peterz@infradead.org
[ Upstream commit 4c77b18cf8b7ab37c7d5737b4609010d2ceec5f0 ]
Kitsunyan reported desktop latency issues on his Celeron 887 because of commit:
1b568f0aabf2 ("sched/core: Optimize SCHED_SMT")
... even though his CPU doesn't do SMT.
The effect of running the SMT code on a !SMT part is basically a more aggressive select_idle_cpu(). Removing the avg condition fixed things for him.
I also know FB likes this test gone, even though other workloads like having it.
For now, take it out by default, until we get a better idea.
Reported-by: kitsunyan kitsunyan@inbox.ru Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Cc: Chris Mason clm@fb.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Mike Galbraith efault@gmx.de Cc: Mike Galbraith umgwanakikbuti@gmail.com Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Sasha Levin alexander.levin@verizon.com --- kernel/sched/fair.c | 2 +- kernel/sched/features.h | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7a68c631d5b5..3d862f5b0331 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5451,7 +5451,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t * Due to large variance we need a large fuzz factor; hackbench in * particularly is sensitive here. */ - if ((avg_idle / 512) < avg_cost) + if (sched_feat(SIS_AVG_CPU) && (avg_idle / 512) < avg_cost) return -1;
time = local_clock(); diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 69631fa46c2f..1b3c8189b286 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -51,6 +51,11 @@ SCHED_FEAT(NONTASK_CAPACITY, true) */ SCHED_FEAT(TTWU_QUEUE, true)
+/* + * When doing wakeups, attempt to limit superfluous scans of the LLC domain. + */ +SCHED_FEAT(SIS_AVG_CPU, false) + #ifdef HAVE_RT_PUSH_IPI /* * In order to avoid a thundering herd attack of CPUs that are
From: Sachin Sant sachinp@linux.vnet.ibm.com
[ Upstream commit a6d8a21596df041f36f4c2ccc260c459e3e851f1 ]
Tests under alignment subdirectory are skipped when executed on previous generation hardware, but harness still marks them as failed.
test: test_copy_unaligned tags: git_version:unknown [SKIP] Test skipped on line 26 skip: test_copy_unaligned selftests: copy_unaligned [FAIL]
The MAGIC_SKIP_RETURN_VALUE value assigned to rc variable is retained till the program exit which causes the test to be marked as failed.
This patch resets the value before returning to the main() routine. With this patch the test o/p is as follows:
test: test_copy_unaligned tags: git_version:unknown [SKIP] Test skipped on line 26 skip: test_copy_unaligned selftests: copy_unaligned [PASS]
Signed-off-by: Sachin Sant sachinp@linux.vnet.ibm.com Signed-off-by: Michael Ellerman mpe@ellerman.id.au Signed-off-by: Sasha Levin alexander.levin@verizon.com --- tools/testing/selftests/powerpc/harness.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c index 248a820048df..66d31de60b9a 100644 --- a/tools/testing/selftests/powerpc/harness.c +++ b/tools/testing/selftests/powerpc/harness.c @@ -114,9 +114,11 @@ int test_harness(int (test_function)(void), char *name)
rc = run_test(test_function, name);
- if (rc == MAGIC_SKIP_RETURN_VALUE) + if (rc == MAGIC_SKIP_RETURN_VALUE) { test_skip(name); - else + /* so that skipped test is not marked as failed */ + rc = 0; + } else test_finish(name, rc);
return rc;
From: David Daney david.daney@cavium.com
[ Upstream commit ab42632156becd35d3884ee5c14da2bedbf3149a ]
For powerpc the __jump_table section in modules is not aligned, this causes a WARN_ON() splat when loading a module containing a __jump_table.
Strict alignment became necessary with commit 3821fd35b58d ("jump_label: Reduce the size of struct static_key"), currently in linux-next, which uses the two least significant bits of pointers to __jump_table elements.
Fix by forcing __jump_table to 8, which is the same alignment used for this section in the kernel proper.
Link: http://lkml.kernel.org/r/20170301220453.4756-1-david.daney@cavium.com
Reviewed-by: Jason Baron jbaron@akamai.com Acked-by: Jessica Yu jeyu@redhat.com Acked-by: Michael Ellerman mpe@ellerman.id.au (powerpc) Tested-by: Sachin Sant sachinp@linux.vnet.ibm.com Signed-off-by: David Daney david.daney@cavium.com Signed-off-by: Steven Rostedt (VMware) rostedt@goodmis.org Signed-off-by: Sasha Levin alexander.levin@verizon.com --- scripts/module-common.lds | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/scripts/module-common.lds b/scripts/module-common.lds index 73a2c7da0e55..53234e85192a 100644 --- a/scripts/module-common.lds +++ b/scripts/module-common.lds @@ -19,4 +19,6 @@ SECTIONS {
. = ALIGN(8); .init_array 0 : { *(SORT(.init_array.*)) *(.init_array) } + + __jump_table 0 : ALIGN(8) { KEEP(*(__jump_table)) } }
From: Nicholas Piggin npiggin@gmail.com
[ Upstream commit 4dc831aa88132f835cefe876aa0206977c4d7710 ]
GCC can compile with either endian, but the default ABI version is set based on the default endianness of the toolchain. Alan Modra says:
you need both -mbig and -mabi=elfv1 to make a powerpc64le gcc generate powerpc64 code
The opposite is true for powerpc64 when generating -mlittle it requires -mabi=elfv2 to generate v2 ABI, which we were already doing.
This change adds ABI annotations together with endianness for all cases, LE and BE. This fixes the case of building a BE kernel with a toolchain that is LE by default.
Signed-off-by: Nicholas Piggin npiggin@gmail.com Tested-by: Naveen N. Rao naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Michael Ellerman mpe@ellerman.id.au Signed-off-by: Sasha Levin alexander.levin@verizon.com --- arch/powerpc/Makefile | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 617dece67924..a60c9c6e5cc1 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -72,8 +72,15 @@ GNUTARGET := powerpc MULTIPLEWORD := -mmultiple endif
-cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) +ifdef CONFIG_PPC64 +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mcall-aixdesc) +aflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) +aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mabi=elfv2 +endif + cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) ifneq ($(cc-name),clang) cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mno-strict-align endif @@ -113,7 +120,9 @@ ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc)) AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2) else +CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcall-aixdesc) +AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) endif CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc)) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions)
From: Shile Zhang shile.zhang@nokia.com
[ Upstream commit 6ad966d7303b70165228dba1ee8da1a05c10eefe ]
Paul's patch to fix checksum folding, commit b492f7e4e07a ("powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold") missed a case in csum_add(). Fix it.
Signed-off-by: Shile Zhang shile.zhang@nokia.com Acked-by: Paul Mackerras paulus@ozlabs.org Signed-off-by: Michael Ellerman mpe@ellerman.id.au Signed-off-by: Sasha Levin alexander.levin@verizon.com --- arch/powerpc/include/asm/checksum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index 1e8fceb308a5..a67bb09585f4 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -100,7 +100,7 @@ static inline __wsum csum_add(__wsum csum, __wsum addend)
#ifdef __powerpc64__ res += (__force u64)addend; - return (__force __wsum)((u32)res + (res >> 32)); + return (__force __wsum) from64to32(res); #else asm("addc %0,%0,%1;" "addze %0,%0;"
From: Sean Young sean@mess.org
[ Upstream commit db5b15b74ed9a5c04bb808d18ffa2c773f5c18c0 ]
The locking in lirc needs improvement, but for now just fix this potential deadlock.
====================================================== [ INFO: possible circular locking dependency detected ] 4.10.0-rc1+ #1 Not tainted ------------------------------------------------------- bash/2502 is trying to acquire lock: (ir_raw_handler_lock){+.+.+.}, at: [<ffffffffc06f6a5e>] ir_raw_encode_scancode+0x3e/0xb0 [rc_core]
but task is already holding lock: (&dev->lock){+.+.+.}, at: [<ffffffffc06f511f>] store_filter+0x9f/0x240 [rc_core]
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #2 (&dev->lock){+.+.+.}:
[<ffffffffa110adad>] lock_acquire+0xfd/0x200 [<ffffffffa1921327>] mutex_lock_nested+0x77/0x6d0 [<ffffffffc06f436a>] rc_open+0x2a/0x80 [rc_core] [<ffffffffc07114ca>] lirc_dev_fop_open+0xda/0x1e0 [lirc_dev] [<ffffffffa12975e0>] chrdev_open+0xb0/0x210 [<ffffffffa128eb5a>] do_dentry_open+0x20a/0x2f0 [<ffffffffa128ffcc>] vfs_open+0x4c/0x80 [<ffffffffa12a35ec>] path_openat+0x5bc/0xc00 [<ffffffffa12a5271>] do_filp_open+0x91/0x100 [<ffffffffa12903f0>] do_sys_open+0x130/0x220 [<ffffffffa12904fe>] SyS_open+0x1e/0x20 [<ffffffffa19278c1>] entry_SYSCALL_64_fastpath+0x1f/0xc2 -> #1 (lirc_dev_lock){+.+.+.}: [<ffffffffa110adad>] lock_acquire+0xfd/0x200 [<ffffffffa1921327>] mutex_lock_nested+0x77/0x6d0 [<ffffffffc0711f47>] lirc_register_driver+0x67/0x59b [lirc_dev] [<ffffffffc06db7f4>] ir_lirc_register+0x1f4/0x260 [ir_lirc_codec] [<ffffffffc06f6cac>] ir_raw_handler_register+0x7c/0xb0 [rc_core] [<ffffffffc0398010>] 0xffffffffc0398010 [<ffffffffa1002192>] do_one_initcall+0x52/0x1b0 [<ffffffffa11ef5c8>] do_init_module+0x5f/0x1fa [<ffffffffa11566b5>] load_module+0x2675/0x2b00 [<ffffffffa1156dcf>] SYSC_finit_module+0xdf/0x110 [<ffffffffa1156e1e>] SyS_finit_module+0xe/0x10 [<ffffffffa1003f5c>] do_syscall_64+0x6c/0x1f0 [<ffffffffa1927989>] return_from_SYSCALL_64+0x0/0x7a -> #0 (ir_raw_handler_lock){+.+.+.}: [<ffffffffa110a7b7>] __lock_acquire+0x10f7/0x1290 [<ffffffffa110adad>] lock_acquire+0xfd/0x200 [<ffffffffa1921327>] mutex_lock_nested+0x77/0x6d0 [<ffffffffc06f6a5e>] ir_raw_encode_scancode+0x3e/0xb0 [rc_core] [<ffffffffc0b0f492>] loop_set_wakeup_filter+0x62/0xbd [rc_loopback] [<ffffffffc06f522a>] store_filter+0x1aa/0x240 [rc_core] [<ffffffffa15e46f8>] dev_attr_store+0x18/0x30 [<ffffffffa13318e5>] sysfs_kf_write+0x45/0x60 [<ffffffffa1330b55>] kernfs_fop_write+0x155/0x1e0 [<ffffffffa1290797>] __vfs_write+0x37/0x160 [<ffffffffa12921f8>] vfs_write+0xc8/0x1e0 [<ffffffffa12936e8>] SyS_write+0x58/0xc0 [<ffffffffa19278c1>] entry_SYSCALL_64_fastpath+0x1f/0xc2
other info that might help us debug this:
Chain exists of: ir_raw_handler_lock --> lirc_dev_lock --> &dev->lock
Possible unsafe locking scenario:
CPU0 CPU1 ---- ---- lock(&dev->lock); lock(lirc_dev_lock); lock(&dev->lock); lock(ir_raw_handler_lock);
*** DEADLOCK ***
4 locks held by bash/2502: #0: (sb_writers#4){.+.+.+}, at: [<ffffffffa12922c5>] vfs_write+0x195/0x1e0 #1: (&of->mutex){+.+.+.}, at: [<ffffffffa1330b1f>] kernfs_fop_write+0x11f/0x1e0 #2: (s_active#215){.+.+.+}, at: [<ffffffffa1330b28>] kernfs_fop_write+0x128/0x1e0 #3: (&dev->lock){+.+.+.}, at: [<ffffffffc06f511f>] store_filter+0x9f/0x240 [rc_core]
stack backtrace: CPU: 3 PID: 2502 Comm: bash Not tainted 4.10.0-rc1+ #1 Hardware name: /DG45ID, BIOS IDG4510H.86A.0135.2011.0225.1100 02/25/2011 Call Trace: dump_stack+0x86/0xc3 print_circular_bug+0x1be/0x210 __lock_acquire+0x10f7/0x1290 lock_acquire+0xfd/0x200 ? ir_raw_encode_scancode+0x3e/0xb0 [rc_core] ? ir_raw_encode_scancode+0x3e/0xb0 [rc_core] mutex_lock_nested+0x77/0x6d0 ? ir_raw_encode_scancode+0x3e/0xb0 [rc_core] ? loop_set_wakeup_filter+0x44/0xbd [rc_loopback] ir_raw_encode_scancode+0x3e/0xb0 [rc_core] loop_set_wakeup_filter+0x62/0xbd [rc_loopback] ? loop_set_tx_duty_cycle+0x70/0x70 [rc_loopback] store_filter+0x1aa/0x240 [rc_core] dev_attr_store+0x18/0x30 sysfs_kf_write+0x45/0x60 kernfs_fop_write+0x155/0x1e0 __vfs_write+0x37/0x160 ? rcu_read_lock_sched_held+0x4a/0x80 ? rcu_sync_lockdep_assert+0x2f/0x60 ? __sb_start_write+0x10c/0x220 ? vfs_write+0x195/0x1e0 ? security_file_permission+0x3b/0xc0 vfs_write+0xc8/0x1e0 SyS_write+0x58/0xc0 entry_SYSCALL_64_fastpath+0x1f/0xc2
Signed-off-by: Sean Young sean@mess.org Signed-off-by: Mauro Carvalho Chehab mchehab@s-opensource.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/media/rc/lirc_dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c index 6ebe89551961..f4509ef9922b 100644 --- a/drivers/media/rc/lirc_dev.c +++ b/drivers/media/rc/lirc_dev.c @@ -446,6 +446,8 @@ int lirc_dev_fop_open(struct inode *inode, struct file *file) return -ERESTARTSYS;
ir = irctls[iminor(inode)]; + mutex_unlock(&lirc_dev_lock); + if (!ir) { retval = -ENODEV; goto error; @@ -486,8 +488,6 @@ int lirc_dev_fop_open(struct inode *inode, struct file *file) }
error: - mutex_unlock(&lirc_dev_lock); - nonseekable_open(inode, file);
return retval;
From: Guenter Roeck linux@roeck-us.net
[ Upstream commit 10e5778f54765c96fe0c8f104b7a030e5b35bc72 ]
After commit 0549bde0fcb1 ("of: fix of_node leak caused in of_find_node_opts_by_path"), the following error may be reported when running omap images.
OF: ERROR: Bad of_node_put() on /ocp@68000000 CPU: 0 PID: 0 Comm: swapper Not tainted 4.10.0-rc7-next-20170210 #1 Hardware name: Generic OMAP3-GP (Flattened Device Tree) [<c0310604>] (unwind_backtrace) from [<c030bbf4>] (show_stack+0x10/0x14) [<c030bbf4>] (show_stack) from [<c05add8c>] (dump_stack+0x98/0xac) [<c05add8c>] (dump_stack) from [<c05af1b0>] (kobject_release+0x48/0x7c) [<c05af1b0>] (kobject_release) from [<c0ad1aa4>] (of_find_node_by_name+0x74/0x94) [<c0ad1aa4>] (of_find_node_by_name) from [<c1215bd4>] (omap3xxx_hwmod_is_hs_ip_block_usable+0x24/0x2c) [<c1215bd4>] (omap3xxx_hwmod_is_hs_ip_block_usable) from [<c1215d5c>] (omap3xxx_hwmod_init+0x180/0x274) [<c1215d5c>] (omap3xxx_hwmod_init) from [<c120faa8>] (omap3_init_early+0xa0/0x11c) [<c120faa8>] (omap3_init_early) from [<c120fb2c>] (omap3430_init_early+0x8/0x30) [<c120fb2c>] (omap3430_init_early) from [<c1204710>] (setup_arch+0xc04/0xc34) [<c1204710>] (setup_arch) from [<c1200948>] (start_kernel+0x68/0x38c) [<c1200948>] (start_kernel) from [<8020807c>] (0x8020807c)
of_find_node_by_name() drops the reference to the passed device node. The commit referenced above exposes this problem.
To fix the problem, use of_get_child_by_name() instead of of_find_node_by_name(); of_get_child_by_name() does not drop the reference count of passed device nodes. While semantically different, we only look for immediate children of the passed device node, so of_get_child_by_name() is a more appropriate function to use anyway.
Release the reference to the device node obtained with of_get_child_by_name() after it is no longer needed to avoid another device node leak.
While at it, clean up the code and change the return type of omap3xxx_hwmod_is_hs_ip_block_usable() to bool to match its use and the return type of of_device_is_available().
Cc: Qi Hou qi.hou@windriver.com Cc: Peter Rosin peda@axentia.se Cc: Rob Herring robh@kernel.org Signed-off-by: Guenter Roeck linux@roeck-us.net Signed-off-by: Tony Lindgren tony@atomide.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- arch/arm/mach-omap2/omap_hwmod_3xxx_data.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c index 1cc4a6f3954e..483658d86f80 100644 --- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c @@ -3828,16 +3828,20 @@ static struct omap_hwmod_ocp_if *omap3xxx_dss_hwmod_ocp_ifs[] __initdata = { * Return: 0 if device named @dev_name is not likely to be accessible, * or 1 if it is likely to be accessible. */ -static int __init omap3xxx_hwmod_is_hs_ip_block_usable(struct device_node *bus, - const char *dev_name) +static bool __init omap3xxx_hwmod_is_hs_ip_block_usable(struct device_node *bus, + const char *dev_name) { + struct device_node *node; + bool available; + if (!bus) - return (omap_type() == OMAP2_DEVICE_TYPE_GP) ? 1 : 0; + return omap_type() == OMAP2_DEVICE_TYPE_GP;
- if (of_device_is_available(of_find_node_by_name(bus, dev_name))) - return 1; + node = of_get_child_by_name(bus, dev_name); + available = of_device_is_available(node); + of_node_put(node);
- return 0; + return available; }
int __init omap3xxx_hwmod_init(void)
From: Phil Reid preid@electromag.com.au
[ Upstream commit f759921cfbf4847319d197a6ed7c9534d593f8bc ]
When a threaded irq handler is chained attached to one of the gpio pins when configure for level irq the altera_gpio_irq_leveL_high_handler does not mask the interrupt while being handled by the chained irq. This resulting in the threaded irq not getting enough cycles to complete quickly enough before the irq was disabled as faulty. handle_level_irq should be used in this situation instead of handle_simple_irq.
In gpiochip_irqchip_add set default handler to handle_bad_irq as per Documentation/gpio/driver.txt. Then set the correct handler in the set_type callback.
Signed-off-by: Phil Reid preid@electromag.com.au Signed-off-by: Linus Walleij linus.walleij@linaro.org Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/gpio/gpio-altera.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-)
diff --git a/drivers/gpio/gpio-altera.c b/drivers/gpio/gpio-altera.c index 5bddbd507ca9..3fe6a21e05a5 100644 --- a/drivers/gpio/gpio-altera.c +++ b/drivers/gpio/gpio-altera.c @@ -90,21 +90,18 @@ static int altera_gpio_irq_set_type(struct irq_data *d,
altera_gc = gpiochip_get_data(irq_data_get_irq_chip_data(d));
- if (type == IRQ_TYPE_NONE) + if (type == IRQ_TYPE_NONE) { + irq_set_handler_locked(d, handle_bad_irq); return 0; - if (type == IRQ_TYPE_LEVEL_HIGH && - altera_gc->interrupt_trigger == IRQ_TYPE_LEVEL_HIGH) - return 0; - if (type == IRQ_TYPE_EDGE_RISING && - altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_RISING) - return 0; - if (type == IRQ_TYPE_EDGE_FALLING && - altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_FALLING) - return 0; - if (type == IRQ_TYPE_EDGE_BOTH && - altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_BOTH) + } + if (type == altera_gc->interrupt_trigger) { + if (type == IRQ_TYPE_LEVEL_HIGH) + irq_set_handler_locked(d, handle_level_irq); + else + irq_set_handler_locked(d, handle_simple_irq); return 0; - + } + irq_set_handler_locked(d, handle_bad_irq); return -EINVAL; }
@@ -230,7 +227,6 @@ static void altera_gpio_irq_edge_handler(struct irq_desc *desc) chained_irq_exit(chip, desc); }
- static void altera_gpio_irq_leveL_high_handler(struct irq_desc *desc) { struct altera_gpio_chip *altera_gc; @@ -310,7 +306,7 @@ static int altera_gpio_probe(struct platform_device *pdev) altera_gc->interrupt_trigger = reg;
ret = gpiochip_irqchip_add(&altera_gc->mmchip.gc, &altera_irq_chip, 0, - handle_simple_irq, IRQ_TYPE_NONE); + handle_bad_irq, IRQ_TYPE_NONE);
if (ret) { dev_err(&pdev->dev, "could not add irqchip\n");
From: Kuninori Morimoto kuninori.morimoto.gx@renesas.com
[ Upstream commit 4b30eebfc35c67771b5f58d9274d3e321b72d7a8 ]
SSI8 is is sharing pin with SSI7, and nothing to do for SSI_MODEx. It is special pin and it needs special settings whole system, but we can't confirm it, because we never have SSI8 available board.
This patch fixup SSI_MODEx settings error for SSI8 on connection test, but should be confirmed behavior on real board in the future.
Signed-off-by: Kuninori Morimoto kuninori.morimoto.gx@renesas.com Tested-by: Hiroyuki Yokoyama hiroyuki.yokoyama.vx@renesas.com Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin alexander.levin@verizon.com --- sound/soc/sh/rcar/ssiu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/sound/soc/sh/rcar/ssiu.c b/sound/soc/sh/rcar/ssiu.c index 6f9b388ec5a8..3f95d6b88f8c 100644 --- a/sound/soc/sh/rcar/ssiu.c +++ b/sound/soc/sh/rcar/ssiu.c @@ -44,7 +44,11 @@ static int rsnd_ssiu_init(struct rsnd_mod *mod, mask1 = (1 << 4) | (1 << 20); /* mask sync bit */ mask2 = (1 << 4); /* mask sync bit */ val1 = val2 = 0; - if (rsnd_ssi_is_pin_sharing(io)) { + if (id == 8) { + /* + * SSI8 pin is sharing with SSI7, nothing to do. + */ + } else if (rsnd_ssi_is_pin_sharing(io)) { int shift = -1;
switch (id) {
From: Guenter Roeck linux@roeck-us.net
[ Upstream commit b92675d998a9fa37fe9e0e35053a95b4a23c158b ]
The device node returned by of_find_node_by_name() needs to be released after it is no longer needed to avoid a device node leak.
Signed-off-by: Guenter Roeck linux@roeck-us.net Signed-off-by: Tony Lindgren tony@atomide.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- arch/arm/mach-omap2/omap_hwmod_3xxx_data.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c index 483658d86f80..bca54154e14f 100644 --- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c @@ -3910,15 +3910,20 @@ int __init omap3xxx_hwmod_init(void)
if (h_sham && omap3xxx_hwmod_is_hs_ip_block_usable(bus, "sham")) { r = omap_hwmod_register_links(h_sham); - if (r < 0) + if (r < 0) { + of_node_put(bus); return r; + } }
if (h_aes && omap3xxx_hwmod_is_hs_ip_block_usable(bus, "aes")) { r = omap_hwmod_register_links(h_aes); - if (r < 0) + if (r < 0) { + of_node_put(bus); return r; + } } + of_node_put(bus);
/* * Register hwmod links specific to certain ES levels of a
From: Roger Quadros rogerq@ti.com
[ Upstream commit 1551e35ea4189c1f7199fe278395fc94196715f2 ]
On TI platforms (dra7, am437x), the DWC3_DSTS_DEVCTRLHLT bit is not set after the device controller is stopped via DWC3_DCTL_RUN_STOP.
If we don't disconnect and stop the gadget, it stops working after a system resume with the trace below.
There is no point in preventing gadget disconnect and gadget stop during system suspend/resume as we're going to suspend in any case, whether DEVCTRLHLT timed out or not.
[ 141.727480] ------------[ cut here ]------------ [ 141.732349] WARNING: CPU: 1 PID: 2135 at drivers/usb/dwc3/gadget.c:2384 dwc3_stop_active_transfer.constprop.4+0xc4/0xe4 [dwc3] [ 141.744299] Modules linked in: usb_f_ss_lb g_zero libcomposite xhci_plat_hcd xhci_hcd usbcore dwc3 evdev udc_core m25p80 usb_common spi_nor snd_soc_davinci_mcasp snd_soc_simple_card snd_soc_edma snd_soc_tlv3e [ 141.792163] CPU: 1 PID: 2135 Comm: irq/456-dwc3 Not tainted 4.10.0-rc8 #1138 [ 141.799547] Hardware name: Generic DRA74X (Flattened Device Tree) [ 141.805940] [<c01101b4>] (unwind_backtrace) from [<c010c31c>] (show_stack+0x10/0x14) [ 141.814066] [<c010c31c>] (show_stack) from [<c04a0918>] (dump_stack+0xac/0xe0) [ 141.821648] [<c04a0918>] (dump_stack) from [<c013708c>] (__warn+0xd8/0x104) [ 141.828955] [<c013708c>] (__warn) from [<c0137164>] (warn_slowpath_null+0x20/0x28) [ 141.836902] [<c0137164>] (warn_slowpath_null) from [<bf27784c>] (dwc3_stop_active_transfer.constprop.4+0xc4/0xe4 [dwc3]) [ 141.848329] [<bf27784c>] (dwc3_stop_active_transfer.constprop.4 [dwc3]) from [<bf27ab14>] (__dwc3_gadget_ep_disable+0x64/0x528 [dwc3]) [ 141.861034] [<bf27ab14>] (__dwc3_gadget_ep_disable [dwc3]) from [<bf27c27c>] (dwc3_gadget_ep_disable+0x3c/0xc8 [dwc3]) [ 141.872280] [<bf27c27c>] (dwc3_gadget_ep_disable [dwc3]) from [<bf23b428>] (usb_ep_disable+0x11c/0x18c [udc_core]) [ 141.883160] [<bf23b428>] (usb_ep_disable [udc_core]) from [<bf342774>] (disable_ep+0x18/0x54 [usb_f_ss_lb]) [ 141.893408] [<bf342774>] (disable_ep [usb_f_ss_lb]) from [<bf3437b0>] (disable_endpoints+0x18/0x50 [usb_f_ss_lb]) [ 141.904168] [<bf3437b0>] (disable_endpoints [usb_f_ss_lb]) from [<bf343814>] (disable_source_sink+0x2c/0x34 [usb_f_ss_lb]) [ 141.915771] [<bf343814>] (disable_source_sink [usb_f_ss_lb]) from [<bf329a9c>] (reset_config+0x48/0x7c [libcomposite]) [ 141.927012] [<bf329a9c>] (reset_config [libcomposite]) from [<bf329afc>] (composite_disconnect+0x2c/0x54 [libcomposite]) [ 141.938444] [<bf329afc>] (composite_disconnect [libcomposite]) from [<bf23d7dc>] (usb_gadget_udc_reset+0x10/0x34 [udc_core]) [ 141.950237] [<bf23d7dc>] (usb_gadget_udc_reset [udc_core]) from [<bf276d70>] (dwc3_gadget_reset_interrupt+0x64/0x698 [dwc3]) [ 141.962022] [<bf276d70>] (dwc3_gadget_reset_interrupt [dwc3]) from [<bf27952c>] (dwc3_thread_interrupt+0x618/0x1a3c [dwc3]) [ 141.973723] [<bf27952c>] (dwc3_thread_interrupt [dwc3]) from [<c01a7ce8>] (irq_thread_fn+0x1c/0x54) [ 141.983215] [<c01a7ce8>] (irq_thread_fn) from [<c01a7fbc>] (irq_thread+0x120/0x1f0) [ 141.991247] [<c01a7fbc>] (irq_thread) from [<c015ba14>] (kthread+0xf8/0x138) [ 141.998641] [<c015ba14>] (kthread) from [<c01078f0>] (ret_from_fork+0x14/0x24) [ 142.006213] ---[ end trace b4ecfe9f175b9a9c ]---
Signed-off-by: Roger Quadros rogerq@ti.com Signed-off-by: Felipe Balbi felipe.balbi@linux.intel.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/usb/dwc3/gadget.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index c61ddbf94bc7..16c67120d72b 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -3092,15 +3092,10 @@ void dwc3_gadget_exit(struct dwc3 *dwc)
int dwc3_gadget_suspend(struct dwc3 *dwc) { - int ret; - if (!dwc->gadget_driver) return 0;
- ret = dwc3_gadget_run_stop(dwc, false, false); - if (ret < 0) - return ret; - + dwc3_gadget_run_stop(dwc, false, false); dwc3_disconnect_gadget(dwc); __dwc3_gadget_stop(dwc);
From: John Keeping john@metanate.com
[ Upstream commit 38355b2a44776c25b0f2ad466e8c51bb805b3032 ]
When binding a gadget to a device, "name" is stored in gi->udc_name, but this does not happen when unregistering and the string is leaked.
Signed-off-by: John Keeping john@metanate.com Signed-off-by: Felipe Balbi felipe.balbi@linux.intel.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/usb/gadget/configfs.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 502a096fc380..a5ca409dc97e 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -269,6 +269,7 @@ static ssize_t gadget_dev_desc_UDC_store(struct config_item *item, ret = unregister_gadget(gi); if (ret) goto err; + kfree(name); } else { if (gi->composite.gadget_driver.udc_name) { ret = -EBUSY;
From: Christophe JAILLET christophe.jaillet@wanadoo.fr
[ Upstream commit b6e7aeeaf235901c42ec35de4633c7c69501d303 ]
'kbuf' is allocated just a few lines above using 'memdup_user()'. If the 'if (dev->buf)' test fails, this memory is never released.
Signed-off-by: Christophe JAILLET christophe.jaillet@wanadoo.fr Signed-off-by: Felipe Balbi felipe.balbi@linux.intel.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/usb/gadget/legacy/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index f69dbd4bcd18..b8534d3f8bb0 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -1819,8 +1819,10 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
spin_lock_irq (&dev->lock); value = -EINVAL; - if (dev->buf) + if (dev->buf) { + kfree(kbuf); goto fail; + } dev->buf = kbuf;
/* full or low speed config */
From: Daniel Drake drake@endlessm.com
[ Upstream commit f2f10b7e722a75c6d75a7f7cd06b0eee3ae20f7c ]
Add support for media keys on the keyboard that comes with the Asus V221ID and ZN241IC All In One computers.
The keys to support here are WLAN, BRIGHTNESSDOWN and BRIGHTNESSUP.
This device is not visibly branded as Chicony, and the USB Vendor ID suggests that it is a JESS device. However this seems like the right place to put it: the usage codes are identical to the currently supported devices, and this driver already supports the ASUS AIO keyboard AK1D.
Signed-off-by: Daniel Drake drake@endlessm.com Signed-off-by: Jiri Kosina jkosina@suse.cz Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/hid/Kconfig | 4 ++-- drivers/hid/hid-chicony.c | 1 + drivers/hid/hid-core.c | 1 + drivers/hid/hid-ids.h | 1 + 4 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index cd4599c0523b..db607d51ee2b 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -175,11 +175,11 @@ config HID_CHERRY Support for Cherry Cymotion keyboard.
config HID_CHICONY - tristate "Chicony Tactical pad" + tristate "Chicony devices" depends on HID default !EXPERT ---help--- - Support for Chicony Tactical pad. + Support for Chicony Tactical pad and special keys on Chicony keyboards.
config HID_CORSAIR tristate "Corsair devices" diff --git a/drivers/hid/hid-chicony.c b/drivers/hid/hid-chicony.c index bc3cec199fee..f04ed9aabc3f 100644 --- a/drivers/hid/hid-chicony.c +++ b/drivers/hid/hid-chicony.c @@ -86,6 +86,7 @@ static const struct hid_device_id ch_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS2) }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) }, + { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) }, { } }; MODULE_DEVICE_TABLE(hid, ch_devices); diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 4f3f5749b0c1..bdde8859e191 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1906,6 +1906,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081) }, { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A0C2) }, { HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) }, + { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) }, { HID_USB_DEVICE(USB_VENDOR_ID_JESS2, USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ION, USB_DEVICE_ID_ICADE) }, { HID_USB_DEVICE(USB_VENDOR_ID_KENSINGTON, USB_DEVICE_ID_KS_SLIMBLADE) }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 08fd3f831d62..433d5f675c03 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -558,6 +558,7 @@
#define USB_VENDOR_ID_JESS 0x0c45 #define USB_DEVICE_ID_JESS_YUREX 0x1010 +#define USB_DEVICE_ID_JESS_ZEN_AIO_KBD 0x5112
#define USB_VENDOR_ID_JESS2 0x0f30 #define USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD 0x0111
From: Tejun Heo tj@kernel.org
[ Upstream commit 0580b762a4d6b70817476b90042813f8573283fa ]
ata_sff_qc_issue() expects upper layers to never issue commands on a command protocol that it doesn't implement. While the assumption holds fine with the usual IO path, nothing filters based on the command protocol in the passthrough path (which was added later), allowing the warning to be tripped with a passthrough command with the right (well, wrong) protocol.
Failing with AC_ERR_SYSTEM is the right thing to do anyway. Remove the unnecessary WARN.
Reported-by: Dmitry Vyukov dvyukov@google.com Link: http://lkml.kernel.org/r/CACT4Y+bXkvevNZU8uP6X0QVqsj6wNoUA_1exfTSOzc+SmUtMOA... Signed-off-by: Tejun Heo tj@kernel.org Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/ata/libata-sff.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 051b6158d1b7..8d22acdf90f0 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -1481,7 +1481,6 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) break;
default: - WARN_ON_ONCE(1); return AC_ERR_SYSTEM; }
From: Jim Mattson jmattson@google.com
[ Upstream commit 587d7e72aedca91cee80c0a56811649c3efab765 ]
VMCLEAR should silently ignore a failure to clear the launch state of the VMCS referenced by the operand.
Signed-off-by: Jim Mattson jmattson@google.com [Changed "kvm_write_guest(vcpu->kvm" to "kvm_vcpu_write_guest(vcpu".] Signed-off-by: Radim Krčmář rkrcmar@redhat.com
Signed-off-by: Sasha Levin alexander.levin@verizon.com --- arch/x86/kvm/vmx.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a8ae57acb6f6..22476d2ce002 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7206,9 +7206,8 @@ static int handle_vmoff(struct kvm_vcpu *vcpu) static int handle_vmclear(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + u32 zero = 0; gpa_t vmptr; - struct vmcs12 *vmcs12; - struct page *page;
if (!nested_vmx_check_permission(vcpu)) return 1; @@ -7219,22 +7218,9 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) if (vmptr == vmx->nested.current_vmptr) nested_release_vmcs12(vmx);
- page = nested_get_page(vcpu, vmptr); - if (page == NULL) { - /* - * For accurate processor emulation, VMCLEAR beyond available - * physical memory should do nothing at all. However, it is - * possible that a nested vmx bug, not a guest hypervisor bug, - * resulted in this case, so let's shut down before doing any - * more damage: - */ - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); - return 1; - } - vmcs12 = kmap(page); - vmcs12->launch_state = 0; - kunmap(page); - nested_release_page(page); + kvm_vcpu_write_guest(vcpu, + vmptr + offsetof(struct vmcs12, launch_state), + &zero, sizeof(zero));
nested_free_vmcs02(vmx, vmptr);
From: Raz Manor Raz.Manor@valens.com
[ Upstream commit ef5e2fa9f65befa12f1113c734602d2c1964d2a5 ]
In the function scan_dma_completions() there is a reusage of tmp variable. That coused a wrong value being used in some case when reading a short packet terminated transaction from an endpoint, in 2 concecutive reads.
This was my logic for the patch:
The req->td->dmadesc equals to 0 iff: -- There was a transaction ending with a short packet, and -- The read() to read it was shorter than the transaction length, and -- The read() to complete it is longer than the residue. I believe this is true from the printouts of various cases, but I can't be positive it is correct.
Entering this if, there should be no more data in the endpoint (a short packet terminated the transaction). If there is, the transaction wasn't really done and we should exit and wait for it to finish entirely. That is the inner if. That inner if should never happen, but it is there to be on the safe side. That is why it is marked with the comment /* paranoia */. The size of the data available in the endpoint is ep->dma->dmacount and it is read to tmp. This entire clause is based on my own educated guesses.
If we passed that inner if without breaking in the original code, than tmp & DMA_BYTE_MASK_COUNT== 0. That means we will always pass dma bytes count of 0 to dma_done(), meaning all the requested bytes were read.
dma_done() reports back to the upper layer that the request (read()) was done and how many bytes were read. In the original code that would always be the request size, regardless of the actual size of the data. That did not make sense to me at all.
However, the original value of tmp is req->td->dmacount, which is the dmacount value when the request's dma transaction was finished. And that is a much more reasonable value to report back to the caller.
To recreate the problem: Read from a bulk out endpoint in a loop, 1024 * n bytes in each iteration. Connect the PLX to a host you can control. Send to that endpoint 1024 * n + x bytes, such that 0 < x < 1024 * n and (x % 1024) != 0 You would expect the first read() to return 1024 * n and the second read() to return x. But you will get the first read to return 1024 * n and the second one to return 1024 * n. That is true for every positive integer n.
Cc: Felipe Balbi balbi@kernel.org Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: linux-usb@vger.kernel.org Signed-off-by: Raz Manor Raz.Manor@valens.com Signed-off-by: Felipe Balbi felipe.balbi@linux.intel.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/usb/gadget/udc/net2280.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-)
diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c index 33f3987218f7..d133252ef2c3 100644 --- a/drivers/usb/gadget/udc/net2280.c +++ b/drivers/usb/gadget/udc/net2280.c @@ -1146,15 +1146,15 @@ static int scan_dma_completions(struct net2280_ep *ep) */ while (!list_empty(&ep->queue)) { struct net2280_request *req; - u32 tmp; + u32 req_dma_count;
req = list_entry(ep->queue.next, struct net2280_request, queue); if (!req->valid) break; rmb(); - tmp = le32_to_cpup(&req->td->dmacount); - if ((tmp & BIT(VALID_BIT)) != 0) + req_dma_count = le32_to_cpup(&req->td->dmacount); + if ((req_dma_count & BIT(VALID_BIT)) != 0) break;
/* SHORT_PACKET_TRANSFERRED_INTERRUPT handles "usb-short" @@ -1163,40 +1163,41 @@ static int scan_dma_completions(struct net2280_ep *ep) */ if (unlikely(req->td->dmadesc == 0)) { /* paranoia */ - tmp = readl(&ep->dma->dmacount); - if (tmp & DMA_BYTE_COUNT_MASK) + u32 const ep_dmacount = readl(&ep->dma->dmacount); + + if (ep_dmacount & DMA_BYTE_COUNT_MASK) break; /* single transfer mode */ - dma_done(ep, req, tmp, 0); + dma_done(ep, req, req_dma_count, 0); num_completed++; break; } else if (!ep->is_in && (req->req.length % ep->ep.maxpacket) && !(ep->dev->quirks & PLX_PCIE)) {
- tmp = readl(&ep->regs->ep_stat); + u32 const ep_stat = readl(&ep->regs->ep_stat); /* AVOID TROUBLE HERE by not issuing short reads from * your gadget driver. That helps avoids errata 0121, * 0122, and 0124; not all cases trigger the warning. */ - if ((tmp & BIT(NAK_OUT_PACKETS)) == 0) { + if ((ep_stat & BIT(NAK_OUT_PACKETS)) == 0) { ep_warn(ep->dev, "%s lost packet sync!\n", ep->ep.name); req->req.status = -EOVERFLOW; } else { - tmp = readl(&ep->regs->ep_avail); - if (tmp) { + u32 const ep_avail = readl(&ep->regs->ep_avail); + if (ep_avail) { /* fifo gets flushed later */ ep->out_overflow = 1; ep_dbg(ep->dev, "%s dma, discard %d len %d\n", - ep->ep.name, tmp, + ep->ep.name, ep_avail, req->req.length); req->req.status = -EOVERFLOW; } } } - dma_done(ep, req, tmp, 0); + dma_done(ep, req, req_dma_count, 0); num_completed++; }
From: Petr Cvek petr.cvek@tul.cz
[ Upstream commit df7545719a14fa7b481896fb8689e23d0a00f682 ]
A call usb_put_phy(udc->transceiver) must be tested for a valid pointer. Use an already existing test for usb_unregister_notifier call.
Acked-by: Robert Jarzmik robert.jarzmik@free.fr Reported-by: Robert Jarzmik robert.jarzmik@free.fr Signed-off-by: Petr Cvek petr.cvek@tul.cz Signed-off-by: Felipe Balbi felipe.balbi@linux.intel.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/usb/gadget/udc/pxa27x_udc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/usb/gadget/udc/pxa27x_udc.c b/drivers/usb/gadget/udc/pxa27x_udc.c index 7fa60f5b7ae4..afd6b86458c5 100644 --- a/drivers/usb/gadget/udc/pxa27x_udc.c +++ b/drivers/usb/gadget/udc/pxa27x_udc.c @@ -2534,9 +2534,10 @@ static int pxa_udc_remove(struct platform_device *_dev) usb_del_gadget_udc(&udc->gadget); pxa_cleanup_debugfs(udc);
- if (!IS_ERR_OR_NULL(udc->transceiver)) + if (!IS_ERR_OR_NULL(udc->transceiver)) { usb_unregister_notifier(udc->transceiver, &pxa27x_udc_phy); - usb_put_phy(udc->transceiver); + usb_put_phy(udc->transceiver); + }
udc->transceiver = NULL; the_controller = NULL;
From: Joe Perches joe@perches.com
[ Upstream commit 23456565acf6d452e0368f7380aecd584c019c67 ]
Recent printk changes for KERN_CONT cause this logging to be defectively emitted on multiple lines. Fix it.
Also reduces object size a trivial amount.
$ size drivers/scsi/qla2xxx/qla_dbg.o* text data bss dec hex filename 39125 0 0 39125 98d5 drivers/scsi/qla2xxx/qla_dbg.o.new 39164 0 0 39164 98fc drivers/scsi/qla2xxx/qla_dbg.o.old
Signed-off-by: Joe Perches joe@perches.com Acked-by: Himanshu Madhani himanshu.madhani@cavium.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/scsi/qla2xxx/qla_dbg.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-)
diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 658e4d15cb71..ce4ac769a9a2 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -2707,13 +2707,9 @@ ql_dump_buffer(uint32_t level, scsi_qla_host_t *vha, int32_t id, "%-+5d 0 1 2 3 4 5 6 7 8 9 A B C D E F\n", size); ql_dbg(level, vha, id, "----- -----------------------------------------------\n"); - for (cnt = 0; cnt < size; cnt++, buf++) { - if (cnt % 16 == 0) - ql_dbg(level, vha, id, "%04x:", cnt & ~0xFU); - printk(" %02x", *buf); - if (cnt % 16 == 15) - printk("\n"); + for (cnt = 0; cnt < size; cnt += 16) { + ql_dbg(level, vha, id, "%04x: ", cnt); + print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1, + buf + cnt, min(16U, size - cnt), false); } - if (cnt % 16 != 0) - printk("\n"); }
From: James Smart jsmart2021@gmail.com
[ Upstream commit 5d181531bc6169e19a02a27d202cf0e982db9d0e ]
if REG_VPI fails, the driver was incorrectly issuing INIT_VFI (a SLI4 command) on a SLI3 adapter.
Signed-off-by: Dick Kennedy dick.kennedy@broadcom.com Signed-off-by: James Smart james.smart@broadcom.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/scsi/lpfc/lpfc_els.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 4df3cdcf88ce..9c9563312a3d 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -8185,11 +8185,17 @@ lpfc_cmpl_reg_new_vport(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) spin_lock_irq(shost->host_lock); vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI; spin_unlock_irq(shost->host_lock); - if (vport->port_type == LPFC_PHYSICAL_PORT - && !(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG)) - lpfc_issue_init_vfi(vport); - else + if (mb->mbxStatus == MBX_NOT_FINISHED) + break; + if ((vport->port_type == LPFC_PHYSICAL_PORT) && + !(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG)) { + if (phba->sli_rev == LPFC_SLI_REV4) + lpfc_issue_init_vfi(vport); + else + lpfc_initial_flogi(vport); + } else { lpfc_initial_fdisc(vport); + } break; } } else {
From: Franck Demathieu fdemathieu@gmail.com
[ Upstream commit 4b9de5da7e120c7f02395da729f0ec77ce7a6044 ]
The 'size' variable is unsigned according to the dt-bindings. As this variable is used as integer in other places, create a new variable that allows to fix the following sparse issue (-Wtypesign):
drivers/irqchip/irq-crossbar.c:279:52: warning: incorrect type in argument 3 (different signedness) drivers/irqchip/irq-crossbar.c:279:52: expected unsigned int [usertype] *out_value drivers/irqchip/irq-crossbar.c:279:52: got int *<noident>
Signed-off-by: Franck Demathieu fdemathieu@gmail.com Signed-off-by: Marc Zyngier marc.zyngier@arm.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/irqchip/irq-crossbar.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c index 05bbf171df37..1070b7b959f2 100644 --- a/drivers/irqchip/irq-crossbar.c +++ b/drivers/irqchip/irq-crossbar.c @@ -199,7 +199,7 @@ static const struct irq_domain_ops crossbar_domain_ops = { static int __init crossbar_of_init(struct device_node *node) { int i, size, reserved = 0; - u32 max = 0, entry; + u32 max = 0, entry, reg_size; const __be32 *irqsr; int ret = -ENOMEM;
@@ -276,9 +276,9 @@ static int __init crossbar_of_init(struct device_node *node) if (!cb->register_offsets) goto err_irq_map;
- of_property_read_u32(node, "ti,reg-size", &size); + of_property_read_u32(node, "ti,reg-size", ®_size);
- switch (size) { + switch (reg_size) { case 1: cb->write = crossbar_writeb; break; @@ -304,7 +304,7 @@ static int __init crossbar_of_init(struct device_node *node) continue;
cb->register_offsets[i] = reserved; - reserved += size; + reserved += reg_size; }
of_property_read_u32(node, "ti,irqs-safe-map", &cb->safe_map);
From: Tejun Heo tj@kernel.org
[ Upstream commit 637fdbae60d6cb9f6e963c1079d7e0445c86ff7d ]
If queue_delayed_work() gets called with NULL @wq, the kernel will oops asynchronuosly on timer expiration which isn't too helpful in tracking down the offender. This actually happened with smc.
__queue_delayed_work() already does several input sanity checks synchronously. Add NULL @wq check.
Reported-by: Dave Jones davej@codemonkey.org.uk Link: http://lkml.kernel.org/r/20170227171439.jshx3qplflyrgcv7@codemonkey.org.uk Signed-off-by: Tejun Heo tj@kernel.org Signed-off-by: Sasha Levin alexander.levin@verizon.com --- kernel/workqueue.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 296dcca77f33..181c2ad0cb54 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1506,6 +1506,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, struct timer_list *timer = &dwork->timer; struct work_struct *work = &dwork->work;
+ WARN_ON_ONCE(!wq); WARN_ON_ONCE(timer->function != delayed_work_timer_fn || timer->data != (unsigned long)dwork); WARN_ON_ONCE(timer_pending(timer));
From: Andre Przywara andre.przywara@arm.com
[ Upstream commit a5e1e6ca94a8cec51571fd62e3eaec269717969c ]
The ITS spec says that ITS commands are only processed when the ITS is enabled (section 8.19.4, Enabled, bit[0]). Our emulation was not taking this into account. Fix this by checking the enabled state before handling CWRITER writes.
On the other hand that means that CWRITER could advance while the ITS is disabled, and enabling it would need those commands to be processed. Fix this case as well by refactoring actual command processing and calling this from both the GITS_CWRITER and GITS_CTLR handlers.
Reviewed-by: Eric Auger eric.auger@redhat.com Reviewed-by: Christoffer Dall cdall@linaro.org Signed-off-by: Andre Przywara andre.przywara@arm.com Signed-off-by: Marc Zyngier marc.zyngier@arm.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- virt/kvm/arm/vgic/vgic-its.c | 109 ++++++++++++++++++++++++++----------------- 1 file changed, 65 insertions(+), 44 deletions(-)
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 4660a7d04eea..77b4c481534b 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -360,29 +360,6 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) return ret; }
-static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu, - struct vgic_its *its, - gpa_t addr, unsigned int len) -{ - u32 reg = 0; - - mutex_lock(&its->cmd_lock); - if (its->creadr == its->cwriter) - reg |= GITS_CTLR_QUIESCENT; - if (its->enabled) - reg |= GITS_CTLR_ENABLE; - mutex_unlock(&its->cmd_lock); - - return reg; -} - -static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its, - gpa_t addr, unsigned int len, - unsigned long val) -{ - its->enabled = !!(val & GITS_CTLR_ENABLE); -} - static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm, struct vgic_its *its, gpa_t addr, unsigned int len) @@ -1160,33 +1137,16 @@ static void vgic_mmio_write_its_cbaser(struct kvm *kvm, struct vgic_its *its, #define ITS_CMD_SIZE 32 #define ITS_CMD_OFFSET(reg) ((reg) & GENMASK(19, 5))
-/* - * By writing to CWRITER the guest announces new commands to be processed. - * To avoid any races in the first place, we take the its_cmd lock, which - * protects our ring buffer variables, so that there is only one user - * per ITS handling commands at a given time. - */ -static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its, - gpa_t addr, unsigned int len, - unsigned long val) +/* Must be called with the cmd_lock held. */ +static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its) { gpa_t cbaser; u64 cmd_buf[4]; - u32 reg;
- if (!its) - return; - - mutex_lock(&its->cmd_lock); - - reg = update_64bit_reg(its->cwriter, addr & 7, len, val); - reg = ITS_CMD_OFFSET(reg); - if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) { - mutex_unlock(&its->cmd_lock); + /* Commands are only processed when the ITS is enabled. */ + if (!its->enabled) return; - }
- its->cwriter = reg; cbaser = CBASER_ADDRESS(its->cbaser);
while (its->cwriter != its->creadr) { @@ -1206,6 +1166,34 @@ static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its, if (its->creadr == ITS_CMD_BUFFER_SIZE(its->cbaser)) its->creadr = 0; } +} + +/* + * By writing to CWRITER the guest announces new commands to be processed. + * To avoid any races in the first place, we take the its_cmd lock, which + * protects our ring buffer variables, so that there is only one user + * per ITS handling commands at a given time. + */ +static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u64 reg; + + if (!its) + return; + + mutex_lock(&its->cmd_lock); + + reg = update_64bit_reg(its->cwriter, addr & 7, len, val); + reg = ITS_CMD_OFFSET(reg); + if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) { + mutex_unlock(&its->cmd_lock); + return; + } + its->cwriter = reg; + + vgic_its_process_commands(kvm, its);
mutex_unlock(&its->cmd_lock); } @@ -1286,6 +1274,39 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm, *regptr = reg; }
+static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + u32 reg = 0; + + mutex_lock(&its->cmd_lock); + if (its->creadr == its->cwriter) + reg |= GITS_CTLR_QUIESCENT; + if (its->enabled) + reg |= GITS_CTLR_ENABLE; + mutex_unlock(&its->cmd_lock); + + return reg; +} + +static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + mutex_lock(&its->cmd_lock); + + its->enabled = !!(val & GITS_CTLR_ENABLE); + + /* + * Try to process any pending commands. This function bails out early + * if the ITS is disabled or no commands have been queued. + */ + vgic_its_process_commands(kvm, its); + + mutex_unlock(&its->cmd_lock); +} + #define REGISTER_ITS_DESC(off, rd, wr, length, acc) \ { \ .reg_offset = off, \
From: Mark Rutland mark.rutland@arm.com
[ Upstream commit ba4dd156eabdca93501d92a980ba27fa5f4bbd27 ]
Currently we BUG() if we see an ESR_EL2.EC value we don't recognise. As configurable disables/enables are added to the architecture (controlled by RES1/RES0 bits respectively), with associated synchronous exceptions, it may be possible for a guest to trigger exceptions with classes that we don't recognise.
While we can't service these exceptions in a manner useful to the guest, we can avoid bringing down the host. Per ARM DDI 0487A.k_iss10775, page D7-1937, EC values within the range 0x00 - 0x2c are reserved for future use with synchronous exceptions, and EC values within the range 0x2d - 0x3f may be used for either synchronous or asynchronous exceptions.
The patch makes KVM handle any unknown EC by injecting an UNDEFINED exception into the guest, with a corresponding (ratelimited) warning in the host dmesg. We could later improve on this with with a new (opt-in) exit to the host userspace.
Cc: Dave Martin dave.martin@arm.com Cc: Suzuki K Poulose suzuki.poulose@arm.com Reviewed-by: Christoffer Dall christoffer.dall@linaro.org Signed-off-by: Mark Rutland mark.rutland@arm.com Signed-off-by: Marc Zyngier marc.zyngier@arm.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- arch/arm64/kvm/handle_exit.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index a204adf29f0a..85baadab02d3 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -125,7 +125,19 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run) return ret; }
+static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + u32 hsr = kvm_vcpu_get_hsr(vcpu); + + kvm_pr_unimpl("Unknown exception class: hsr: %#08x -- %s\n", + hsr, esr_get_class_string(hsr)); + + kvm_inject_undefined(vcpu); + return 1; +} + static exit_handle_fn arm_exit_handlers[] = { + [0 ... ESR_ELx_EC_MAX] = kvm_handle_unknown_ec, [ESR_ELx_EC_WFx] = kvm_handle_wfx, [ESR_ELx_EC_CP15_32] = kvm_handle_cp15_32, [ESR_ELx_EC_CP15_64] = kvm_handle_cp15_64, @@ -151,13 +163,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) u32 hsr = kvm_vcpu_get_hsr(vcpu); u8 hsr_ec = ESR_ELx_EC(hsr);
- if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || - !arm_exit_handlers[hsr_ec]) { - kvm_err("Unknown exception class: hsr: %#08x -- %s\n", - hsr, esr_get_class_string(hsr)); - BUG(); - } - return arm_exit_handlers[hsr_ec]; }
From: Mark Rutland mark.rutland@arm.com
[ Upstream commit f050fe7a9164945dd1c28be05bf00e8cfb082ccf ]
Currently we BUG() if we see a HSR.EC value we don't recognise. As configurable disables/enables are added to the architecture (controlled by RES1/RES0 bits respectively), with associated synchronous exceptions, it may be possible for a guest to trigger exceptions with classes that we don't recognise.
While we can't service these exceptions in a manner useful to the guest, we can avoid bringing down the host. Per ARM DDI 0406C.c, all currently unallocated HSR EC encodings are reserved, and per ARM DDI 0487A.k_iss10775, page G6-4395, EC values within the range 0x00 - 0x2c are reserved for future use with synchronous exceptions, and EC values within the range 0x2d - 0x3f may be used for either synchronous or asynchronous exceptions.
The patch makes KVM handle any unknown EC by injecting an UNDEFINED exception into the guest, with a corresponding (ratelimited) warning in the host dmesg. We could later improve on this with with a new (opt-in) exit to the host userspace.
Cc: Dave Martin dave.martin@arm.com Cc: Suzuki K Poulose suzuki.poulose@arm.com Reviewed-by: Christoffer Dall christoffer.dall@linaro.org Signed-off-by: Mark Rutland mark.rutland@arm.com Signed-off-by: Marc Zyngier marc.zyngier@arm.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- arch/arm/include/asm/kvm_arm.h | 1 + arch/arm/kvm/handle_exit.c | 19 ++++++++++++------- 2 files changed, 13 insertions(+), 7 deletions(-)
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index e22089fb44dc..a3f0b3d50089 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -209,6 +209,7 @@ #define HSR_EC_IABT_HYP (0x21) #define HSR_EC_DABT (0x24) #define HSR_EC_DABT_HYP (0x25) +#define HSR_EC_MAX (0x3f)
#define HSR_WFI_IS_WFE (_AC(1, UL) << 0)
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 066b6d4508ce..42f5daf715d0 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -79,7 +79,19 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; }
+static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + u32 hsr = kvm_vcpu_get_hsr(vcpu); + + kvm_pr_unimpl("Unknown exception class: hsr: %#08x\n", + hsr); + + kvm_inject_undefined(vcpu); + return 1; +} + static exit_handle_fn arm_exit_handlers[] = { + [0 ... HSR_EC_MAX] = kvm_handle_unknown_ec, [HSR_EC_WFI] = kvm_handle_wfx, [HSR_EC_CP15_32] = kvm_handle_cp15_32, [HSR_EC_CP15_64] = kvm_handle_cp15_64, @@ -98,13 +110,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) { u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
- if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || - !arm_exit_handlers[hsr_ec]) { - kvm_err("Unknown exception class: hsr: %#08x\n", - (unsigned int)kvm_vcpu_get_hsr(vcpu)); - BUG(); - } - return arm_exit_handlers[hsr_ec]; }
From: Wanpeng Li wanpeng.li@hotmail.com
[ Upstream commit 2f707d97982286b307ef2a9b034e19aabc1abb56 ]
Reported by syzkaller:
WARNING: CPU: 1 PID: 27742 at arch/x86/kvm/vmx.c:11029 nested_vmx_vmexit+0x5c35/0x74d0 arch/x86/kvm/vmx.c:11029 CPU: 1 PID: 27742 Comm: a.out Not tainted 4.10.0+ #229 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 [inline] dump_stack+0x2ee/0x3ef lib/dump_stack.c:51 panic+0x1fb/0x412 kernel/panic.c:179 __warn+0x1c4/0x1e0 kernel/panic.c:540 warn_slowpath_null+0x2c/0x40 kernel/panic.c:583 nested_vmx_vmexit+0x5c35/0x74d0 arch/x86/kvm/vmx.c:11029 vmx_leave_nested arch/x86/kvm/vmx.c:11136 [inline] vmx_set_msr+0x1565/0x1910 arch/x86/kvm/vmx.c:3324 kvm_set_msr+0xd4/0x170 arch/x86/kvm/x86.c:1099 do_set_msr+0x11e/0x190 arch/x86/kvm/x86.c:1128 __msr_io arch/x86/kvm/x86.c:2577 [inline] msr_io+0x24b/0x450 arch/x86/kvm/x86.c:2614 kvm_arch_vcpu_ioctl+0x35b/0x46a0 arch/x86/kvm/x86.c:3497 kvm_vcpu_ioctl+0x232/0x1120 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2721 vfs_ioctl fs/ioctl.c:43 [inline] do_vfs_ioctl+0x1bf/0x1790 fs/ioctl.c:683 SYSC_ioctl fs/ioctl.c:698 [inline] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:689 entry_SYSCALL_64_fastpath+0x1f/0xc2
The syzkaller folks reported a nested_run_pending warning during userspace clear VMX capability which is exposed to L1 before.
The warning gets thrown while doing
(*(uint32_t*)0x20aecfe8 = (uint32_t)0x1); (*(uint32_t*)0x20aecfec = (uint32_t)0x0); (*(uint32_t*)0x20aecff0 = (uint32_t)0x3a); (*(uint32_t*)0x20aecff4 = (uint32_t)0x0); (*(uint64_t*)0x20aecff8 = (uint64_t)0x0); r[29] = syscall(__NR_ioctl, r[4], 0x4008ae89ul, 0x20aecfe8ul, 0, 0, 0, 0, 0, 0);
i.e. KVM_SET_MSR ioctl with
struct kvm_msrs { .nmsrs = 1, .pad = 0, .entries = { {.index = MSR_IA32_FEATURE_CONTROL, .reserved = 0, .data = 0} } }
The VMLANCH/VMRESUME emulation should be stopped since the CPU is going to reset here. This patch resets the nested_run_pending since the CPU is going to be reset hence there should be nothing pending.
Reported-by: Dmitry Vyukov dvyukov@google.com Suggested-by: Radim Krčmář rkrcmar@redhat.com Cc: Paolo Bonzini pbonzini@redhat.com Cc: Radim Krčmář rkrcmar@redhat.com Cc: Dmitry Vyukov dvyukov@google.com Cc: David Hildenbrand david@redhat.com Signed-off-by: Wanpeng Li wanpeng.li@hotmail.com Reviewed-by: David Hildenbrand david@redhat.com Reviewed-by: Jim Mattson jmattson@google.com Signed-off-by: Radim Krčmář rkrcmar@redhat.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- arch/x86/kvm/vmx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 22476d2ce002..5c58bd60b45b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10885,8 +10885,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, */ static void vmx_leave_nested(struct kvm_vcpu *vcpu) { - if (is_guest_mode(vcpu)) + if (is_guest_mode(vcpu)) { + to_vmx(vcpu)->nested.nested_run_pending = 0; nested_vmx_vmexit(vcpu, -1, 0, 0); + } free_nested(to_vmx(vcpu)); }
From: "Blomme, Maarten" Maarten.Blomme@flir.com
[ Upstream commit 4342696df764ec65dcdfbd0c10d90ea52505f8ba ]
Signed-off-by: Maarten Blomme Maarten.Blomme@flir.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/net/phy/spi_ks8995.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c index 93ffedfa2994..aa7209d794f3 100644 --- a/drivers/net/phy/spi_ks8995.c +++ b/drivers/net/phy/spi_ks8995.c @@ -498,6 +498,7 @@ static int ks8995_probe(struct spi_device *spi) if (err) return err;
+ sysfs_attr_init(&ks->regs_attr.attr); err = sysfs_create_bin_file(&spi->dev.kobj, &ks->regs_attr); if (err) { dev_err(&spi->dev, "unable to create sysfs file, err=%d\n",
From: "Blomme, Maarten" Maarten.Blomme@flir.com
[ Upstream commit 239870f2a0ebf75cc8f6d987dc528c5243f93d69 ]
Signed-off-by: Maarten Blomme Maarten.Blomme@flir.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/net/phy/spi_ks8995.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c index aa7209d794f3..1e2d4f1179da 100644 --- a/drivers/net/phy/spi_ks8995.c +++ b/drivers/net/phy/spi_ks8995.c @@ -491,8 +491,8 @@ static int ks8995_probe(struct spi_device *spi) if (err) return err;
- ks->regs_attr.size = ks->chip->regs_size; memcpy(&ks->regs_attr, &ks8995_registers_attr, sizeof(ks->regs_attr)); + ks->regs_attr.size = ks->chip->regs_size;
err = ks8995_reset(ks); if (err)
From: Michal Schmidt mschmidt@redhat.com
[ Upstream commit 83bd9eb8fc69cdd5135ed6e1f066adc8841800fd ]
VFs are currently missing the VLAN filtering feature, because we were checking the PF's acquire response before actually performing the acquire.
Fix it by setting the feature flag later when we have the PF response.
Signed-off-by: Michal Schmidt mschmidt@redhat.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index dcc620549f78..5d958b5bb8b1 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -13293,17 +13293,15 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev, dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_HIGHDMA;
- /* VF with OLD Hypervisor or old PF do not support filtering */ if (IS_PF(bp)) { if (chip_is_e1x) bp->accept_any_vlan = true; else dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; -#ifdef CONFIG_BNX2X_SRIOV - } else if (bp->acquire_resp.pfdev_info.pf_cap & PFVF_CAP_VLAN_FILTER) { - dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; -#endif } + /* For VF we'll know whether to enable VLAN filtering after + * getting a response to CHANNEL_TLV_ACQUIRE from PF. + */
dev->features |= dev->hw_features | NETIF_F_HW_VLAN_CTAG_RX; dev->features |= NETIF_F_HIGHDMA; @@ -14006,6 +14004,14 @@ static int bnx2x_init_one(struct pci_dev *pdev, rc = bnx2x_vfpf_acquire(bp, tx_count, rx_count); if (rc) goto init_one_freemem; + +#ifdef CONFIG_BNX2X_SRIOV + /* VF with OLD Hypervisor or old PF do not support filtering */ + if (bp->acquire_resp.pfdev_info.pf_cap & PFVF_CAP_VLAN_FILTER) { + dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; + dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + } +#endif }
/* Enable SRIOV if capability found in configuration space */
From: Michal Schmidt mschmidt@redhat.com
[ Upstream commit 466e8bf10ac104d96e1ea813e8126e11cb72ea20 ]
It is possible to crash the kernel by accessing a PTP device while its associated bnx2x interface is down. Before the interface is brought up, the timecounter is not initialized, so accessing it results in NULL dereference.
Fix it by checking if the interface is up.
Use -ENETDOWN as the error code when the interface is down. -EFAULT in bnx2x_ptp_adjfreq() did not seem right.
Tested using phc_ctl get/set/adj/freq commands.
Signed-off-by: Michal Schmidt mschmidt@redhat.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 4febe60eadc2..dcc620549f78 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -13735,7 +13735,7 @@ static int bnx2x_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) if (!netif_running(bp->dev)) { DP(BNX2X_MSG_PTP, "PTP adjfreq called while the interface is down\n"); - return -EFAULT; + return -ENETDOWN; }
if (ppb < 0) { @@ -13794,6 +13794,12 @@ static int bnx2x_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) { struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info);
+ if (!netif_running(bp->dev)) { + DP(BNX2X_MSG_PTP, + "PTP adjtime called while the interface is down\n"); + return -ENETDOWN; + } + DP(BNX2X_MSG_PTP, "PTP adjtime called, delta = %llx\n", delta);
timecounter_adjtime(&bp->timecounter, delta); @@ -13806,6 +13812,12 @@ static int bnx2x_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info); u64 ns;
+ if (!netif_running(bp->dev)) { + DP(BNX2X_MSG_PTP, + "PTP gettime called while the interface is down\n"); + return -ENETDOWN; + } + ns = timecounter_read(&bp->timecounter);
DP(BNX2X_MSG_PTP, "PTP gettime called, ns = %llu\n", ns); @@ -13821,6 +13833,12 @@ static int bnx2x_ptp_settime(struct ptp_clock_info *ptp, struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info); u64 ns;
+ if (!netif_running(bp->dev)) { + DP(BNX2X_MSG_PTP, + "PTP settime called while the interface is down\n"); + return -ENETDOWN; + } + ns = timespec64_to_ns(ts);
DP(BNX2X_MSG_PTP, "PTP settime called, ns = %llu\n", ns);
From: Michal Schmidt mschmidt@redhat.com
[ Upstream commit 22118d861cec5da6ed525aaf12a3de9bfeffc58f ]
It is too late to check for the limit of the number of VF multicast addresses after they have already been copied to the req->multicast[] array, possibly overflowing it.
Do the check before copying.
Also fix the error path to not skip unlocking vf2pf_mutex.
Signed-off-by: Michal Schmidt mschmidt@redhat.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c index bfae300cf25f..c2d327d9dff0 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c @@ -868,7 +868,7 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev) struct bnx2x *bp = netdev_priv(dev); struct vfpf_set_q_filters_tlv *req = &bp->vf2pf_mbox->req.set_q_filters; struct pfvf_general_resp_tlv *resp = &bp->vf2pf_mbox->resp.general_resp; - int rc, i = 0; + int rc = 0, i = 0; struct netdev_hw_addr *ha;
if (bp->state != BNX2X_STATE_OPEN) { @@ -883,6 +883,15 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev) /* Get Rx mode requested */ DP(NETIF_MSG_IFUP, "dev->flags = %x\n", dev->flags);
+ /* We support PFVF_MAX_MULTICAST_PER_VF mcast addresses tops */ + if (netdev_mc_count(dev) > PFVF_MAX_MULTICAST_PER_VF) { + DP(NETIF_MSG_IFUP, + "VF supports not more than %d multicast MAC addresses\n", + PFVF_MAX_MULTICAST_PER_VF); + rc = -EINVAL; + goto out; + } + netdev_for_each_mc_addr(ha, dev) { DP(NETIF_MSG_IFUP, "Adding mcast MAC: %pM\n", bnx2x_mc_addr(ha)); @@ -890,16 +899,6 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev) i++; }
- /* We support four PFVF_MAX_MULTICAST_PER_VF mcast - * addresses tops - */ - if (i >= PFVF_MAX_MULTICAST_PER_VF) { - DP(NETIF_MSG_IFUP, - "VF supports not more than %d multicast MAC addresses\n", - PFVF_MAX_MULTICAST_PER_VF); - return -EINVAL; - } - req->n_multicast = i; req->flags |= VFPF_SET_Q_FILTERS_MULTICAST_CHANGED; req->vf_qid = 0; @@ -924,7 +923,7 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev) out: bnx2x_vfpf_finalize(bp, &req->first_tlv);
- return 0; + return rc; }
/* request pf to add a vlan for the vf */
From: Thomas Falcon tlfalcon@linux.vnet.ibm.com
[ Upstream commit 068d9f90a6978c3e3a662d9e85204a7d6be240d2 ]
The amount of TX/RX buffers that the vNIC driver currently allocates is different from the amount agreed upon in negotiation with firmware. Correct that by allocating the requested number of buffers confirmed by firmware.
Signed-off-by: Thomas Falcon tlfalcon@linux.vnet.ibm.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/net/ethernet/ibm/ibmvnic.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 2eec76e88ead..7c6c1468628b 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -404,7 +404,7 @@ static int ibmvnic_open(struct net_device *netdev) send_map_query(adapter); for (i = 0; i < rxadd_subcrqs; i++) { init_rx_pool(adapter, &adapter->rx_pool[i], - IBMVNIC_BUFFS_PER_POOL, i, + adapter->req_rx_add_entries_per_subcrq, i, be64_to_cpu(size_array[i]), 1); if (alloc_rx_pool(adapter, &adapter->rx_pool[i])) { dev_err(dev, "Couldn't alloc rx pool\n"); @@ -419,23 +419,23 @@ static int ibmvnic_open(struct net_device *netdev) for (i = 0; i < tx_subcrqs; i++) { tx_pool = &adapter->tx_pool[i]; tx_pool->tx_buff = - kcalloc(adapter->max_tx_entries_per_subcrq, + kcalloc(adapter->req_tx_entries_per_subcrq, sizeof(struct ibmvnic_tx_buff), GFP_KERNEL); if (!tx_pool->tx_buff) goto tx_pool_alloc_failed;
if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff, - adapter->max_tx_entries_per_subcrq * + adapter->req_tx_entries_per_subcrq * adapter->req_mtu)) goto tx_ltb_alloc_failed;
tx_pool->free_map = - kcalloc(adapter->max_tx_entries_per_subcrq, + kcalloc(adapter->req_tx_entries_per_subcrq, sizeof(int), GFP_KERNEL); if (!tx_pool->free_map) goto tx_fm_alloc_failed;
- for (j = 0; j < adapter->max_tx_entries_per_subcrq; j++) + for (j = 0; j < adapter->req_tx_entries_per_subcrq; j++) tx_pool->free_map[j] = j;
tx_pool->consumer_index = 0; @@ -746,7 +746,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
tx_pool->consumer_index = (tx_pool->consumer_index + 1) % - adapter->max_tx_entries_per_subcrq; + adapter->req_tx_entries_per_subcrq;
tx_buff = &tx_pool->tx_buff[index]; tx_buff->skb = skb; @@ -819,7 +819,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
if (tx_pool->consumer_index == 0) tx_pool->consumer_index = - adapter->max_tx_entries_per_subcrq - 1; + adapter->req_tx_entries_per_subcrq - 1; else tx_pool->consumer_index--;
@@ -1400,7 +1400,7 @@ restart_loop: producer_index] = index; adapter->tx_pool[pool].producer_index = (adapter->tx_pool[pool].producer_index + 1) % - adapter->max_tx_entries_per_subcrq; + adapter->req_tx_entries_per_subcrq; } /* remove tx_comp scrq*/ next->tx_comp.first = 0;
From: Thomas Falcon tlfalcon@linux.vnet.ibm.com
[ Upstream commit 142c0ac445792c492579cb01f1cfd4e32e6dfcce ]
Use a counter to track the number of outstanding transmissions sent that have not received completions. If the counter reaches the maximum number of queue entries, stop transmissions on that queue. As we receive more completions from firmware, wake the queue once the counter reaches an acceptable level.
This patch prevents hardware/firmware TX queue from filling up and and generating errors. Since incorporating this fix, internal testing has reported that these firmware errors have stopped.
Signed-off-by: Thomas Falcon tlfalcon@linux.vnet.ibm.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/net/ethernet/ibm/ibmvnic.c | 27 ++++++++++++++++++++++++++- drivers/net/ethernet/ibm/ibmvnic.h | 1 + 2 files changed, 27 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index b8778e7b1f79..2eec76e88ead 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -705,6 +705,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req; struct device *dev = &adapter->vdev->dev; struct ibmvnic_tx_buff *tx_buff = NULL; + struct ibmvnic_sub_crq_queue *tx_scrq; struct ibmvnic_tx_pool *tx_pool; unsigned int tx_send_failed = 0; unsigned int tx_map_failed = 0; @@ -724,6 +725,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) int ret = 0;
tx_pool = &adapter->tx_pool[queue_num]; + tx_scrq = adapter->tx_scrq[queue_num]; txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb)); handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + be32_to_cpu(adapter->login_rsp_buf-> @@ -826,6 +828,14 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) ret = NETDEV_TX_BUSY; goto out; } + + atomic_inc(&tx_scrq->used); + + if (atomic_read(&tx_scrq->used) >= adapter->req_tx_entries_per_subcrq) { + netdev_info(netdev, "Stopping queue %d\n", queue_num); + netif_stop_subqueue(netdev, queue_num); + } + tx_packets++; tx_bytes += skb->len; txq->trans_start = jiffies; @@ -1220,6 +1230,7 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter scrq->adapter = adapter; scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs); scrq->cur = 0; + atomic_set(&scrq->used, 0); scrq->rx_skb_top = NULL; spin_lock_init(&scrq->lock);
@@ -1368,8 +1379,22 @@ restart_loop: DMA_TO_DEVICE); }
- if (txbuff->last_frag) + if (txbuff->last_frag) { + atomic_dec(&scrq->used); + + if (atomic_read(&scrq->used) <= + (adapter->req_tx_entries_per_subcrq / 2) && + netif_subqueue_stopped(adapter->netdev, + txbuff->skb)) { + netif_wake_subqueue(adapter->netdev, + scrq->pool_index); + netdev_dbg(adapter->netdev, + "Started queue %d\n", + scrq->pool_index); + } + dev_kfree_skb_any(txbuff->skb); + }
adapter->tx_pool[pool].free_map[adapter->tx_pool[pool]. producer_index] = index; diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index dd775d951b73..892eda346e54 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -863,6 +863,7 @@ struct ibmvnic_sub_crq_queue { spinlock_t lock; struct sk_buff *rx_skb_top; struct ibmvnic_adapter *adapter; + atomic_t used; };
struct ibmvnic_long_term_buff {
From: Michal Schmidt mschmidt@redhat.com
[ Upstream commit 78d5505432436516456c12abbe705ec8dee7ee2b ]
On failure to configure a VF MAC/VLAN filter we should not attempt to rollback filters that we failed to configure with -EEXIST.
Signed-off-by: Michal Schmidt mschmidt@redhat.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 8 +++++++- drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 3f77d0863543..c6e059119b22 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -434,7 +434,9 @@ static int bnx2x_vf_mac_vlan_config(struct bnx2x *bp,
/* Add/Remove the filter */ rc = bnx2x_config_vlan_mac(bp, &ramrod); - if (rc && rc != -EEXIST) { + if (rc == -EEXIST) + return 0; + if (rc) { BNX2X_ERR("Failed to %s %s\n", filter->add ? "add" : "delete", (filter->type == BNX2X_VF_FILTER_VLAN_MAC) ? @@ -444,6 +446,8 @@ static int bnx2x_vf_mac_vlan_config(struct bnx2x *bp, return rc; }
+ filter->applied = true; + return 0; }
@@ -471,6 +475,8 @@ int bnx2x_vf_mac_vlan_config_list(struct bnx2x *bp, struct bnx2x_virtf *vf, BNX2X_ERR("Managed only %d/%d filters - rolling back\n", i, filters->count + 1); while (--i >= 0) { + if (!filters->filters[i].applied) + continue; filters->filters[i].add = !filters->filters[i].add; bnx2x_vf_mac_vlan_config(bp, vf, qid, &filters->filters[i], diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h index 7a6d406f4c11..888d0b6632e8 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h @@ -114,6 +114,7 @@ struct bnx2x_vf_mac_vlan_filter { (BNX2X_VF_FILTER_MAC | BNX2X_VF_FILTER_VLAN) /*shortcut*/
bool add; + bool applied; u8 *mac; u16 vid; };
From: Sowmini Varadhan sowmini.varadhan@oracle.com
[ Upstream commit b21dd4506b71bdb9c5a20e759255cd2513ea7ebe ]
Commit a93d01f5777e ("RDS: TCP: avoid bad page reference in rds_tcp_listen_data_ready") added the function rds_tcp_listen_sock_def_readable() to handle the case when a partially set-up acceptor socket drops into rds_tcp_listen_data_ready(). However, if the listen socket (rtn->rds_tcp_listen_sock) is itself going through a tear-down via rds_tcp_listen_stop(), the (*ready)() will be null and we would hit a panic of the form BUG: unable to handle kernel NULL pointer dereference at (null) IP: (null) : ? rds_tcp_listen_data_ready+0x59/0xb0 [rds_tcp] tcp_data_queue+0x39d/0x5b0 tcp_rcv_established+0x2e5/0x660 tcp_v4_do_rcv+0x122/0x220 tcp_v4_rcv+0x8b7/0x980 : In the above case, it is not fatal to encounter a NULL value for ready- we should just drop the packet and let the flush of the acceptor thread finish gracefully.
In general, the tear-down sequence for listen() and accept() socket that is ensured by this commit is: rtn->rds_tcp_listen_sock = NULL; /* prevent any new accepts */ In rds_tcp_listen_stop(): serialize with, and prevent, further callbacks using lock_sock() flush rds_wq flush acceptor workq sock_release(listen socket)
Signed-off-by: Sowmini Varadhan sowmini.varadhan@oracle.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@verizon.com --- net/rds/tcp.c | 15 ++++++++++----- net/rds/tcp.h | 2 +- net/rds/tcp_listen.c | 9 +++++++-- 3 files changed, 18 insertions(+), 8 deletions(-)
diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 20e2923dc827..78f976d32018 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -478,9 +478,10 @@ static void __net_exit rds_tcp_exit_net(struct net *net) * we do need to clean up the listen socket here. */ if (rtn->rds_tcp_listen_sock) { - rds_tcp_listen_stop(rtn->rds_tcp_listen_sock); + struct socket *lsock = rtn->rds_tcp_listen_sock; + rtn->rds_tcp_listen_sock = NULL; - flush_work(&rtn->rds_tcp_accept_w); + rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); } }
@@ -517,10 +518,10 @@ static void rds_tcp_kill_sock(struct net *net) struct rds_tcp_connection *tc, *_tc; LIST_HEAD(tmp_list); struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); + struct socket *lsock = rtn->rds_tcp_listen_sock;
- rds_tcp_listen_stop(rtn->rds_tcp_listen_sock); rtn->rds_tcp_listen_sock = NULL; - flush_work(&rtn->rds_tcp_accept_w); + rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); @@ -540,8 +541,12 @@ static void rds_tcp_kill_sock(struct net *net) void *rds_tcp_listen_sock_def_readable(struct net *net) { struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); + struct socket *lsock = rtn->rds_tcp_listen_sock; + + if (!lsock) + return NULL;
- return rtn->rds_tcp_listen_sock->sk->sk_user_data; + return lsock->sk->sk_user_data; }
static int rds_tcp_dev_event(struct notifier_block *this, diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 9a1cc8906576..56ea6620fcf9 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -66,7 +66,7 @@ void rds_tcp_state_change(struct sock *sk);
/* tcp_listen.c */ struct socket *rds_tcp_listen_init(struct net *); -void rds_tcp_listen_stop(struct socket *); +void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor); void rds_tcp_listen_data_ready(struct sock *sk); int rds_tcp_accept_one(struct socket *sock); int rds_tcp_keepalive(struct socket *sock); diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 525b624fec8b..185a56b1e29c 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -227,6 +227,9 @@ void rds_tcp_listen_data_ready(struct sock *sk) * before it has been accepted and the accepter has set up their * data_ready.. we only want to queue listen work for our listening * socket + * + * (*ready)() may be null if we are racing with netns delete, and + * the listen socket is being torn down. */ if (sk->sk_state == TCP_LISTEN) rds_tcp_accept_work(sk); @@ -235,7 +238,8 @@ void rds_tcp_listen_data_ready(struct sock *sk)
out: read_unlock_bh(&sk->sk_callback_lock); - ready(sk); + if (ready) + ready(sk); }
struct socket *rds_tcp_listen_init(struct net *net) @@ -275,7 +279,7 @@ out: return NULL; }
-void rds_tcp_listen_stop(struct socket *sock) +void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor) { struct sock *sk;
@@ -296,5 +300,6 @@ void rds_tcp_listen_stop(struct socket *sock)
/* wait for accepts to stop and close the socket */ flush_workqueue(rds_wq); + flush_work(acceptor); sock_release(sock); }
From: Chris Brandt chris.brandt@renesas.com
[ Upstream commit 2501c1bb054290679baad0ff7f4f07c714251f4c ]
While modifying the driver to use the STOP interrupt, the completion of the intermediate transfers need to wake the driver back up in order to initiate the next transfer (restart condition). Otherwise you get never ending interrupts and only the first transfer sent.
Fixes: 71ccea095ea1 ("i2c: riic: correctly finish transfers") Reported-by: Simon Horman horms@verge.net.au Signed-off-by: Chris Brandt chris.brandt@renesas.com Tested-by: Simon Horman horms+renesas@verge.net.au Signed-off-by: Wolfram Sang wsa@the-dreams.de Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/i2c/busses/i2c-riic.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/i2c/busses/i2c-riic.c b/drivers/i2c/busses/i2c-riic.c index 8f11d347b3ec..c811af4c8d81 100644 --- a/drivers/i2c/busses/i2c-riic.c +++ b/drivers/i2c/busses/i2c-riic.c @@ -218,8 +218,12 @@ static irqreturn_t riic_tend_isr(int irq, void *data) }
if (riic->is_last || riic->err) { - riic_clear_set_bit(riic, 0, ICIER_SPIE, RIIC_ICIER); + riic_clear_set_bit(riic, ICIER_TEIE, ICIER_SPIE, RIIC_ICIER); writeb(ICCR2_SP, riic->base + RIIC_ICCR2); + } else { + /* Transfer is complete, but do not send STOP */ + riic_clear_set_bit(riic, ICIER_TEIE, 0, RIIC_ICIER); + complete(&riic->msg_done); }
return IRQ_HANDLED;
From: WANG Cong xiyou.wangcong@gmail.com
[ Upstream commit 15e668070a64bb97f102ad9cf3bccbca0545cda8 ]
Andrey reported the following kernel crash:
kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 14446 Comm: syz-executor6 Not tainted 4.10.0+ #82 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff88001f311700 task.stack: ffff88001f6e8000 RIP: 0010:ip6mr_sk_done+0x15a/0x3d0 net/ipv6/ip6mr.c:1618 RSP: 0018:ffff88001f6ef418 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 1ffff10003edde8c RCX: ffffc900043ee000 RDX: 0000000000000004 RSI: ffffffff83e3b3f8 RDI: 0000000000000020 RBP: ffff88001f6ef508 R08: fffffbfff0dcc5d8 R09: 0000000000000000 R10: ffffffff86e62ec0 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: ffff88001f6ef4e0 R15: ffff8800380a0040 FS: 00007f7a52cec700(0000) GS:ffff88003ec00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000061c500 CR3: 000000001f1ae000 CR4: 00000000000006f0 DR0: 0000000020000000 DR1: 0000000020000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600 Call Trace: rawv6_close+0x4c/0x80 net/ipv6/raw.c:1217 inet_release+0xed/0x1c0 net/ipv4/af_inet.c:425 inet6_release+0x50/0x70 net/ipv6/af_inet6.c:432 sock_release+0x8d/0x1e0 net/socket.c:597 __sock_create+0x39d/0x880 net/socket.c:1226 sock_create_kern+0x3f/0x50 net/socket.c:1243 inet_ctl_sock_create+0xbb/0x280 net/ipv4/af_inet.c:1526 icmpv6_sk_init+0x163/0x500 net/ipv6/icmp.c:954 ops_init+0x10a/0x550 net/core/net_namespace.c:115 setup_net+0x261/0x660 net/core/net_namespace.c:291 copy_net_ns+0x27e/0x540 net/core/net_namespace.c:396 9pnet_virtio: no channels available for device ./file1 create_new_namespaces+0x437/0x9b0 kernel/nsproxy.c:106 unshare_nsproxy_namespaces+0xae/0x1e0 kernel/nsproxy.c:205 SYSC_unshare kernel/fork.c:2281 [inline] SyS_unshare+0x64e/0x1000 kernel/fork.c:2231 entry_SYSCALL_64_fastpath+0x1f/0xc2
This is because net->ipv6.mr6_tables is not initialized at that point, ip6mr_rules_init() is not called yet, therefore on the error path when we iterator the list, we trigger this oops. Fix this by reordering ip6mr_rules_init() before icmpv6_sk_init().
Reported-by: Andrey Konovalov andreyknvl@google.com Signed-off-by: Cong Wang xiyou.wangcong@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin alexander.levin@verizon.com --- net/ipv6/af_inet6.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 46ad699937fd..8285a1c108c9 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -909,12 +909,12 @@ static int __init inet6_init(void) err = register_pernet_subsys(&inet6_net_ops); if (err) goto register_pernet_fail; - err = icmpv6_init(); - if (err) - goto icmp_fail; err = ip6_mr_init(); if (err) goto ipmr_fail; + err = icmpv6_init(); + if (err) + goto icmp_fail; err = ndisc_init(); if (err) goto ndisc_fail; @@ -1044,10 +1044,10 @@ igmp_fail: ndisc_cleanup(); ndisc_fail: ip6_mr_cleanup(); -ipmr_fail: - icmpv6_cleanup(); icmp_fail: unregister_pernet_subsys(&inet6_net_ops); +ipmr_fail: + icmpv6_cleanup(); register_pernet_fail: sock_unregister(PF_INET6); rtnl_unregister_all(PF_INET6);
From: Krzysztof Kozlowski krzk@kernel.org
[ Upstream commit 07de4bc88ce6a4d898cad9aa4c99c1df7e87702d ]
In a regular interrupt handler driver was finishing the crypt/decrypt request by calling complete on crypto request. This is disallowed since converting to skcipher in commit b286d8b1a690 ("crypto: skcipher - Add skcipher walk interface") and causes a warning: WARNING: CPU: 0 PID: 0 at crypto/skcipher.c:430 skcipher_walk_first+0x13c/0x14c
The interrupt is marked shared but in fact there are no other users sharing it. Thus the simplest solution seems to be to just use a threaded interrupt handler, after converting it to oneshot.
Signed-off-by: Krzysztof Kozlowski krzk@kernel.org Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/crypto/s5p-sss.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c index dce1af0ce85c..a668286d62cb 100644 --- a/drivers/crypto/s5p-sss.c +++ b/drivers/crypto/s5p-sss.c @@ -805,8 +805,9 @@ static int s5p_aes_probe(struct platform_device *pdev) dev_warn(dev, "feed control interrupt is not available.\n"); goto err_irq; } - err = devm_request_irq(dev, pdata->irq_fc, s5p_aes_interrupt, - IRQF_SHARED, pdev->name, pdev); + err = devm_request_threaded_irq(dev, pdata->irq_fc, NULL, + s5p_aes_interrupt, IRQF_ONESHOT, + pdev->name, pdev); if (err < 0) { dev_warn(dev, "feed control interrupt is not available.\n"); goto err_irq;
From: Ming Lei tom.leiming@gmail.com
[ Upstream commit 737f98cfe7de8df7433a4d846850aa8efa44bd48 ]
Both q->mq_kobj and sw queues' kobjects should have been initialized once, instead of doing that each add_disk context.
Also this patch removes clearing of ctx in blk_mq_init_cpu_queues() because percpu allocator fills zero to allocated variable.
This patch fixes one issue[1] reported from Omar.
[1] kernel wearning when doing unbind/bind on one scsi-mq device
[ 19.347924] kobject (ffff8800791ea0b8): tried to init an initialized object, something is seriously wrong. [ 19.349781] CPU: 1 PID: 84 Comm: kworker/u8:1 Not tainted 4.10.0-rc7-00210-g53f39eeaa263 #34 [ 19.350686] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.1-20161122_114906-anatol 04/01/2014 [ 19.350920] Workqueue: events_unbound async_run_entry_fn [ 19.350920] Call Trace: [ 19.350920] dump_stack+0x63/0x83 [ 19.350920] kobject_init+0x77/0x90 [ 19.350920] blk_mq_register_dev+0x40/0x130 [ 19.350920] blk_register_queue+0xb6/0x190 [ 19.350920] device_add_disk+0x1ec/0x4b0 [ 19.350920] sd_probe_async+0x10d/0x1c0 [sd_mod] [ 19.350920] async_run_entry_fn+0x48/0x150 [ 19.350920] process_one_work+0x1d0/0x480 [ 19.350920] worker_thread+0x48/0x4e0 [ 19.350920] kthread+0x101/0x140 [ 19.350920] ? process_one_work+0x480/0x480 [ 19.350920] ? kthread_create_on_node+0x60/0x60 [ 19.350920] ret_from_fork+0x2c/0x40
Cc: Omar Sandoval osandov@osandov.com Signed-off-by: Ming Lei tom.leiming@gmail.com Tested-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Jens Axboe axboe@fb.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- block/blk-mq-sysfs.c | 4 +--- block/blk-mq.c | 4 +++- block/blk-mq.h | 1 + 3 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 01fb455d3377..8c0894e0713b 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -429,7 +429,7 @@ void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx) kobject_init(&hctx->kobj, &blk_mq_hw_ktype); }
-static void blk_mq_sysfs_init(struct request_queue *q) +void blk_mq_sysfs_init(struct request_queue *q) { struct blk_mq_ctx *ctx; int cpu; @@ -449,8 +449,6 @@ int blk_mq_register_dev(struct device *dev, struct request_queue *q)
blk_mq_disable_hotplug();
- blk_mq_sysfs_init(q); - ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq"); if (ret < 0) goto out; diff --git a/block/blk-mq.c b/block/blk-mq.c index 7b597ec4e9c5..10f8f94b7f20 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1707,7 +1707,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i); struct blk_mq_hw_ctx *hctx;
- memset(__ctx, 0, sizeof(*__ctx)); __ctx->cpu = i; spin_lock_init(&__ctx->lock); INIT_LIST_HEAD(&__ctx->rq_list); @@ -1970,6 +1969,9 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, if (!q->queue_ctx) goto err_exit;
+ /* init q->mq_kobj and sw queues' kobjects */ + blk_mq_sysfs_init(q); + q->queue_hw_ctx = kzalloc_node(nr_cpu_ids * sizeof(*(q->queue_hw_ctx)), GFP_KERNEL, set->numa_node); if (!q->queue_hw_ctx) diff --git a/block/blk-mq.h b/block/blk-mq.h index e5d25249028c..c55bcf67b956 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -50,6 +50,7 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, /* * sysfs helpers */ +extern void blk_mq_sysfs_init(struct request_queue *q); extern int blk_mq_sysfs_register(struct request_queue *q); extern void blk_mq_sysfs_unregister(struct request_queue *q); extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
From: Jim Qu Jim.Qu@amd.com
[ Upstream commit c085bd5119d5d0bdf3ef591a5563566be7dedced ]
Signed-off-by: Jim Qu Jim.Qu@amd.com Reviewed-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e41d4baebf86..ce9797b6f9c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2020,8 +2020,11 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) }
r = amdgpu_late_init(adev); - if (r) + if (r) { + if (fbcon) + console_unlock(); return r; + }
/* pin cursors */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
From: Jan Kara jack@suse.cz
[ Upstream commit 672a2c87c83649fb0167202342ce85af9a3b4f1c ]
It is invalid to call del_gendisk() when disk->queue is NULL. Fix error handling in axon_ram_probe() to avoid doing that.
Also del_gendisk() does not drop a reference to gendisk allocated by alloc_disk(). That has to be done by put_disk(). Add that call where needed.
Reported-by: Dan Carpenter dan.carpenter@oracle.com Signed-off-by: Jan Kara jack@suse.cz Signed-off-by: Jens Axboe axboe@fb.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- arch/powerpc/sysdev/axonram.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index ada29eaed6e2..f523ac883150 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -274,7 +274,9 @@ failed: if (bank->disk->major > 0) unregister_blkdev(bank->disk->major, bank->disk->disk_name); - del_gendisk(bank->disk); + if (bank->disk->flags & GENHD_FL_UP) + del_gendisk(bank->disk); + put_disk(bank->disk); } device->dev.platform_data = NULL; if (bank->io_addr != 0) @@ -299,6 +301,7 @@ axon_ram_remove(struct platform_device *device) device_remove_file(&device->dev, &dev_attr_ecc); free_irq(bank->irq_id, device); del_gendisk(bank->disk); + put_disk(bank->disk); iounmap((void __iomem *) bank->io_addr); kfree(bank);
From: Florian Westphal fw@strlen.de
[ Upstream commit 7b4fdf77a450ec0fdcb2f677b080ddbf2c186544 ]
Andrey reports syzkaller splat caused by
NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
in ipv4 nat. But this assertion (and the comment) are wrong, this function does see fragments when IP_NODEFRAG setsockopt is used.
As conntrack doesn't track packets without complete l4 header, only the first fragment is tracked.
Because applying nat to first packet but not the rest makes no sense this also turns off tracking of all fragments.
Reported-by: Andrey Konovalov andreyknvl@google.com Signed-off-by: Florian Westphal fw@strlen.de Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin alexander.levin@verizon.com --- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 4 ++++ net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | 5 ----- 2 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 713c09a74b90..0c9ded247ebb 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -158,6 +158,10 @@ static unsigned int ipv4_conntrack_local(void *priv, if (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; + + if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */ + return NF_ACCEPT; + return nf_conntrack_in(state->net, PF_INET, state->hook, skb); }
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index f8aad03d674b..6f5e8d01b876 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -255,11 +255,6 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb, /* maniptype == SRC for postrouting. */ enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook);
- /* We never see fragments: conntrack defrags on pre-routing - * and local-out, and nf_nat_out protects post-routing. - */ - NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); - ct = nf_ct_get(skb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would * have dropped it. Hence it's the user's responsibilty to
From: Johannes Thumshirn jthumshirn@suse.de
[ Upstream commit 0bc315381fe9ed9fb91db8b0e82171b645ac008f ]
zram can handle at most SECTORS_PER_PAGE sectors in a bio's bvec. When using the NVMe over Fabrics loopback target which potentially sends a huge bulk of pages attached to the bio's bvec this results in a kernel panic because of array out of bounds accesses in zram_decompress_page().
Signed-off-by: Johannes Thumshirn jthumshirn@suse.de Reviewed-by: Hannes Reinecke hare@suse.com Reviewed-by: Sergey Senozhatsky sergey.senozhatsky@gmail.com Signed-off-by: Jens Axboe axboe@fb.com Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/block/zram/zram_drv.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index c9914d653968..b7c0b69a02f5 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1286,6 +1286,8 @@ static int zram_add(void) blk_queue_io_min(zram->disk->queue, PAGE_SIZE); blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); zram->disk->queue->limits.discard_granularity = PAGE_SIZE; + zram->disk->queue->limits.max_sectors = SECTORS_PER_PAGE; + zram->disk->queue->limits.chunk_sectors = 0; blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); /* * zram_bio_discard() will clear all logical blocks if logical block
From: Jérémy Lefaure jeremy.lefaure@lse.epita.fr
[ Upstream commit e61555c29c28a4a3b6ba6207f4a0883ee236004d ]
The MTR_DRAM_WIDTH macro returns the data width. It is sometimes used as if it returned a boolean true if the width if 8. Fix the tests where MTR_DRAM_WIDTH is misused.
Signed-off-by: Jérémy Lefaure jeremy.lefaure@lse.epita.fr Cc: linux-edac linux-edac@vger.kernel.org Link: http://lkml.kernel.org/r/20170309011809.8340-1-jeremy.lefaure@lse.epita.fr Signed-off-by: Borislav Petkov bp@suse.de Signed-off-by: Sasha Levin alexander.levin@verizon.com --- drivers/edac/i5000_edac.c | 2 +- drivers/edac/i5400_edac.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c index 72e07e3cf718..2a09be5f4f86 100644 --- a/drivers/edac/i5000_edac.c +++ b/drivers/edac/i5000_edac.c @@ -1293,7 +1293,7 @@ static int i5000_init_csrows(struct mem_ctl_info *mci) dimm->mtype = MEM_FB_DDR2;
/* ask what device type on this row */ - if (MTR_DRAM_WIDTH(mtr)) + if (MTR_DRAM_WIDTH(mtr) == 8) dimm->dtype = DEV_X8; else dimm->dtype = DEV_X4; diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c index 6ef6ad1ba16e..029dfe07b734 100644 --- a/drivers/edac/i5400_edac.c +++ b/drivers/edac/i5400_edac.c @@ -1207,13 +1207,14 @@ static int i5400_init_dimms(struct mem_ctl_info *mci)
dimm->nr_pages = size_mb << 8; dimm->grain = 8; - dimm->dtype = MTR_DRAM_WIDTH(mtr) ? DEV_X8 : DEV_X4; + dimm->dtype = MTR_DRAM_WIDTH(mtr) == 8 ? + DEV_X8 : DEV_X4; dimm->mtype = MEM_FB_DDR2; /* * The eccc mechanism is SDDC (aka SECC), with * is similar to Chipkill. */ - dimm->edac_mode = MTR_DRAM_WIDTH(mtr) ? + dimm->edac_mode = MTR_DRAM_WIDTH(mtr) == 8 ? EDAC_S8ECD8ED : EDAC_S4ECD4ED; ndimms++; }
From: Alexey Kardashevskiy aik@ozlabs.ru
[ Upstream commit 7aafac11e308d37ed3c509829bb43d80c1811ac3 ]
The IODA2 specification says that a 64 DMA address cannot use top 4 bits (3 are reserved and one is a "TVE select"); bottom page_shift bits cannot be used for multilevel table addressing either.
The existing IODA2 table allocation code aligns the minimum TCE table size to PAGE_SIZE so in the case of 64K system pages and 4K IOMMU pages, we have 64-4-12=48 bits. Since 64K page stores 8192 TCEs, i.e. needs 13 bits, the maximum number of levels is 48/13 = 3 so we physically cannot address more and EEH happens on DMA accesses.
This adds a check that too many levels were requested.
It is still possible to have 5 levels in the case of 4K system page size.
Signed-off-by: Alexey Kardashevskiy aik@ozlabs.ru Acked-by: Gavin Shan gwshan@linux.vnet.ibm.com Signed-off-by: Michael Ellerman mpe@ellerman.id.au Signed-off-by: Sasha Levin alexander.levin@verizon.com --- arch/powerpc/platforms/powernv/pci-ioda.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index dcdfee0cd4f2..f602307a4386 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2623,6 +2623,9 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, level_shift = entries_shift + 3; level_shift = max_t(unsigned, level_shift, PAGE_SHIFT);
+ if ((level_shift - 3) * levels + page_shift >= 60) + return -EINVAL; + /* Allocate TCE table */ addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, levels, tce_table_size, &offset, &total_allocated);
linux-stable-mirror@lists.linaro.org