This is a note to let you know that I've just added the patch titled
powerpc/xive: Use hw CPU ids when configuring the CPU queues
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
powerpc-xive-use-hw-cpu-ids-when-configuring-the-cpu-queues.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 8e036c8d30a2cd9d8fc7442fbf6824e0a3e986e7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg(a)kaod.org>
Date: Tue, 13 Feb 2018 09:47:12 +0100
Subject: powerpc/xive: Use hw CPU ids when configuring the CPU queues
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
From: Cédric Le Goater <clg(a)kaod.org>
commit 8e036c8d30a2cd9d8fc7442fbf6824e0a3e986e7 upstream.
The CPU event notification queues on sPAPR should be configured using
a hardware CPU identifier.
The problem did not show up on the Power Hypervisor because pHyp
supports 8 threads per core which keeps CPU number contiguous. This is
not the case on all sPAPR virtual machines, some use SMT=1.
Also improve error logging by adding the CPU number.
Fixes: eac1e731b59e ("powerpc/xive: guest exploitation of the XIVE interrupt controller")
Cc: stable(a)vger.kernel.org # v4.14+
Signed-off-by: Cédric Le Goater <clg(a)kaod.org>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/powerpc/sysdev/xive/spapr.c | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
--- a/arch/powerpc/sysdev/xive/spapr.c
+++ b/arch/powerpc/sysdev/xive/spapr.c
@@ -356,7 +356,8 @@ static int xive_spapr_configure_queue(u3
rc = plpar_int_get_queue_info(0, target, prio, &esn_page, &esn_size);
if (rc) {
- pr_err("Error %lld getting queue info prio %d\n", rc, prio);
+ pr_err("Error %lld getting queue info CPU %d prio %d\n", rc,
+ target, prio);
rc = -EIO;
goto fail;
}
@@ -370,7 +371,8 @@ static int xive_spapr_configure_queue(u3
/* Configure and enable the queue in HW */
rc = plpar_int_set_queue_config(flags, target, prio, qpage_phys, order);
if (rc) {
- pr_err("Error %lld setting queue for prio %d\n", rc, prio);
+ pr_err("Error %lld setting queue for CPU %d prio %d\n", rc,
+ target, prio);
rc = -EIO;
} else {
q->qpage = qpage;
@@ -389,8 +391,8 @@ static int xive_spapr_setup_queue(unsign
if (IS_ERR(qpage))
return PTR_ERR(qpage);
- return xive_spapr_configure_queue(cpu, q, prio, qpage,
- xive_queue_shift);
+ return xive_spapr_configure_queue(get_hard_smp_processor_id(cpu),
+ q, prio, qpage, xive_queue_shift);
}
static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,
@@ -399,10 +401,12 @@ static void xive_spapr_cleanup_queue(uns
struct xive_q *q = &xc->queue[prio];
unsigned int alloc_order;
long rc;
+ int hw_cpu = get_hard_smp_processor_id(cpu);
- rc = plpar_int_set_queue_config(0, cpu, prio, 0, 0);
+ rc = plpar_int_set_queue_config(0, hw_cpu, prio, 0, 0);
if (rc)
- pr_err("Error %ld setting queue for prio %d\n", rc, prio);
+ pr_err("Error %ld setting queue for CPU %d prio %d\n", rc,
+ hw_cpu, prio);
alloc_order = xive_alloc_order(xive_queue_shift);
free_pages((unsigned long)q->qpage, alloc_order);
Patches currently in stable-queue which might be from clg(a)kaod.org are
queue-4.14/powerpc-xive-use-hw-cpu-ids-when-configuring-the-cpu-queues.patch
This is a note to let you know that I've just added the patch titled
s390: fix handling of -1 in set{,fs}[gu]id16 syscalls
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
s390-fix-handling-of-1-in-set-fs-id16-syscalls.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 6dd0d2d22aa363fec075cb2577ba273ac8462e94 Mon Sep 17 00:00:00 2001
From: Eugene Syromiatnikov <esyr(a)redhat.com>
Date: Mon, 15 Jan 2018 20:38:17 +0100
Subject: s390: fix handling of -1 in set{,fs}[gu]id16 syscalls
From: Eugene Syromiatnikov <esyr(a)redhat.com>
commit 6dd0d2d22aa363fec075cb2577ba273ac8462e94 upstream.
For some reason, the implementation of some 16-bit ID system calls
(namely, setuid16/setgid16 and setfsuid16/setfsgid16) used type cast
instead of low2highgid/low2highuid macros for converting [GU]IDs, which
led to incorrect handling of value of -1 (which ought to be considered
invalid).
Discovered by strace test suite.
Cc: stable(a)vger.kernel.org
Signed-off-by: Eugene Syromiatnikov <esyr(a)redhat.com>
Signed-off-by: Heiko Carstens <heiko.carstens(a)de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky(a)de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/s390/kernel/compat_linux.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -110,7 +110,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setregid16,
COMPAT_SYSCALL_DEFINE1(s390_setgid16, u16, gid)
{
- return sys_setgid((gid_t)gid);
+ return sys_setgid(low2highgid(gid));
}
COMPAT_SYSCALL_DEFINE2(s390_setreuid16, u16, ruid, u16, euid)
@@ -120,7 +120,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setreuid16,
COMPAT_SYSCALL_DEFINE1(s390_setuid16, u16, uid)
{
- return sys_setuid((uid_t)uid);
+ return sys_setuid(low2highuid(uid));
}
COMPAT_SYSCALL_DEFINE3(s390_setresuid16, u16, ruid, u16, euid, u16, suid)
@@ -173,12 +173,12 @@ COMPAT_SYSCALL_DEFINE3(s390_getresgid16,
COMPAT_SYSCALL_DEFINE1(s390_setfsuid16, u16, uid)
{
- return sys_setfsuid((uid_t)uid);
+ return sys_setfsuid(low2highuid(uid));
}
COMPAT_SYSCALL_DEFINE1(s390_setfsgid16, u16, gid)
{
- return sys_setfsgid((gid_t)gid);
+ return sys_setfsgid(low2highgid(gid));
}
static int groups16_to_user(u16 __user *grouplist, struct group_info *group_info)
Patches currently in stable-queue which might be from esyr(a)redhat.com are
queue-4.14/s390-fix-handling-of-1-in-set-fs-id16-syscalls.patch
This is a note to let you know that I've just added the patch titled
powerpc/numa: Invalidate numa_cpu_lookup_table on cpu remove
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
powerpc-numa-invalidate-numa_cpu_lookup_table-on-cpu-remove.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 1d9a090783bef19fe8cdec878620d22f05191316 Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont(a)linux.vnet.ibm.com>
Date: Fri, 26 Jan 2018 13:41:59 -0600
Subject: powerpc/numa: Invalidate numa_cpu_lookup_table on cpu remove
From: Nathan Fontenot <nfont(a)linux.vnet.ibm.com>
commit 1d9a090783bef19fe8cdec878620d22f05191316 upstream.
When DLPAR removing a CPU, the unmapping of the cpu from a node in
unmap_cpu_from_node() should also invalidate the CPUs entry in the
numa_cpu_lookup_table. There is not a guarantee that on a subsequent
DLPAR add of the CPU the associativity will be the same and thus
could be in a different node. Invalidating the entry in the
numa_cpu_lookup_table causes the associativity to be read from the
device tree at the time of the add.
The current behavior of not invalidating the CPUs entry in the
numa_cpu_lookup_table can result in scenarios where the the topology
layout of CPUs in the partition does not match the device tree
or the topology reported by the HMC.
This bug looks like it was introduced in 2004 in the commit titled
"ppc64: cpu hotplug notifier for numa", which is 6b15e4e87e32 in the
linux-fullhist tree. Hence tag it for all stable releases.
Cc: stable(a)vger.kernel.org
Signed-off-by: Nathan Fontenot <nfont(a)linux.vnet.ibm.com>
Reviewed-by: Tyrel Datwyler <tyreld(a)linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/powerpc/include/asm/topology.h | 5 +++++
arch/powerpc/mm/numa.c | 5 -----
arch/powerpc/platforms/pseries/hotplug-cpu.c | 2 ++
3 files changed, 7 insertions(+), 5 deletions(-)
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -44,6 +44,11 @@ extern int sysfs_add_device_to_node(stru
extern void sysfs_remove_device_from_node(struct device *dev, int nid);
extern int numa_update_cpu_topology(bool cpus_locked);
+static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node)
+{
+ numa_cpu_lookup_table[cpu] = node;
+}
+
static inline int early_cpu_to_node(int cpu)
{
int nid;
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -142,11 +142,6 @@ static void reset_numa_cpu_lookup_table(
numa_cpu_lookup_table[cpu] = -1;
}
-static void update_numa_cpu_lookup_table(unsigned int cpu, int node)
-{
- numa_cpu_lookup_table[cpu] = node;
-}
-
static void map_cpu_to_node(int cpu, int node)
{
update_numa_cpu_lookup_table(cpu, node);
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -36,6 +36,7 @@
#include <asm/xics.h>
#include <asm/xive.h>
#include <asm/plpar_wrappers.h>
+#include <asm/topology.h>
#include "pseries.h"
#include "offline_states.h"
@@ -331,6 +332,7 @@ static void pseries_remove_processor(str
BUG_ON(cpu_online(cpu));
set_cpu_present(cpu, false);
set_hard_smp_processor_id(cpu, -1);
+ update_numa_cpu_lookup_table(cpu, -1);
break;
}
if (cpu >= nr_cpu_ids)
Patches currently in stable-queue which might be from nfont(a)linux.vnet.ibm.com are
queue-4.14/powerpc-numa-invalidate-numa_cpu_lookup_table-on-cpu-remove.patch
This is a note to let you know that I've just added the patch titled
powerpc/radix: Remove trace_tlbie call from radix__flush_tlb_all
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
powerpc-radix-remove-trace_tlbie-call-from-radix__flush_tlb_all.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 8d81296cfcce89013a714feb8d25004a156f8181 Mon Sep 17 00:00:00 2001
From: Mahesh Salgaonkar <mahesh(a)linux.vnet.ibm.com>
Date: Thu, 30 Nov 2017 14:35:54 +0530
Subject: powerpc/radix: Remove trace_tlbie call from radix__flush_tlb_all
From: Mahesh Salgaonkar <mahesh(a)linux.vnet.ibm.com>
commit 8d81296cfcce89013a714feb8d25004a156f8181 upstream.
radix__flush_tlb_all() is called only in kexec path in real mode and any
tracepoints at this stage will make kexec to fail if enabled.
To verify enable tlbie trace before kexec.
$ echo 1 > /sys/kernel/debug/tracing/events/powerpc/tlbie/enable
== kexec into new kernel and kexec fails.
Fix this by not calling trace_tlbie from radix__flush_tlb_all().
Fixes: 0428491cba92 ("powerpc/mm: Trace tlbie(l) instructions")
Cc: stable(a)vger.kernel.org # v4.13+
Signed-off-by: Mahesh Salgaonkar <mahesh(a)linux.vnet.ibm.com>
Acked-by: Balbir Singh <bsingharora(a)gmail.com>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/powerpc/mm/tlb-radix.c | 2 --
1 file changed, 2 deletions(-)
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -453,14 +453,12 @@ void radix__flush_tlb_all(void)
*/
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
- trace_tlbie(0, 0, rb, rs, ric, prs, r);
/*
* now flush host entires by passing PRS = 0 and LPID == 0
*/
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
- trace_tlbie(0, 0, rb, 0, ric, prs, r);
}
void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
Patches currently in stable-queue which might be from mahesh(a)linux.vnet.ibm.com are
queue-4.14/powerpc-radix-remove-trace_tlbie-call-from-radix__flush_tlb_all.patch
This is a note to let you know that I've just added the patch titled
powerpc/mm: Flush radix process translations when setting MMU type
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
powerpc-mm-flush-radix-process-translations-when-setting-mmu-type.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 62e984ddfd6b056d399e24113f5e6a7145e579d8 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik(a)ozlabs.ru>
Date: Thu, 1 Feb 2018 16:09:44 +1100
Subject: powerpc/mm: Flush radix process translations when setting MMU type
From: Alexey Kardashevskiy <aik(a)ozlabs.ru>
commit 62e984ddfd6b056d399e24113f5e6a7145e579d8 upstream.
Radix guests do normally invalidate process-scoped translations when a
new pid is allocated but migrated guests do not invalidate these so
migrated guests crash sometime, especially easy to reproduce with
migration happening within first 10 seconds after the guest boot start
on the same machine.
This adds the "Invalidate process-scoped translations" flush to fix
radix guests migration.
Fixes: 2ee13be34b13 ("KVM: PPC: Book3S HV: Update kvmppc_set_arch_compat() for ISA v3.00")
Cc: stable(a)vger.kernel.org # v4.10+
Signed-off-by: Alexey Kardashevskiy <aik(a)ozlabs.ru>
Tested-by: Laurent Vivier <lvivier(a)redhat.com>
Tested-by: Daniel Henrique Barboza <danielhb(a)linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/powerpc/mm/pgtable_64.c | 2 ++
1 file changed, 2 insertions(+)
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -483,6 +483,8 @@ void mmu_partition_table_set_entry(unsig
if (old & PATB_HR) {
asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+ asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
+ "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1);
} else {
asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
Patches currently in stable-queue which might be from aik(a)ozlabs.ru are
queue-4.14/powerpc-mm-flush-radix-process-translations-when-setting-mmu-type.patch
This is a note to let you know that I've just added the patch titled
PM / devfreq: Propagate error from devfreq_add_device()
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
pm-devfreq-propagate-error-from-devfreq_add_device.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From d1bf2d30728f310f72296b54f0651ecdb09cbb12 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson(a)linaro.org>
Date: Sun, 5 Nov 2017 21:27:41 -0800
Subject: PM / devfreq: Propagate error from devfreq_add_device()
From: Bjorn Andersson <bjorn.andersson(a)linaro.org>
commit d1bf2d30728f310f72296b54f0651ecdb09cbb12 upstream.
Propagate the error of devfreq_add_device() in devm_devfreq_add_device()
rather than statically returning ENOMEM. This makes it slightly faster
to pinpoint the cause of a returned error.
Fixes: 8cd84092d35e ("PM / devfreq: Add resource-managed function for devfreq device")
Cc: stable(a)vger.kernel.org
Acked-by: Chanwoo Choi <cw00.choi(a)samsung.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson(a)linaro.org>
Signed-off-by: MyungJoo Ham <myungjoo.ham(a)samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/devfreq/devfreq.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -676,7 +676,7 @@ struct devfreq *devm_devfreq_add_device(
devfreq = devfreq_add_device(dev, profile, governor_name, data);
if (IS_ERR(devfreq)) {
devres_free(ptr);
- return ERR_PTR(-ENOMEM);
+ return devfreq;
}
*ptr = devfreq;
Patches currently in stable-queue which might be from bjorn.andersson(a)linaro.org are
queue-4.14/pm-devfreq-propagate-error-from-devfreq_add_device.patch
queue-4.14/arm64-dts-msm8916-correct-ipc-references-for-smsm.patch
This is a note to let you know that I've just added the patch titled
powerpc: Fix DABR match on hash based systems
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
powerpc-fix-dabr-match-on-hash-based-systems.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From f23ab3efb1b30cc5c5ef5ae4ef294ed467f30675 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh(a)kernel.crashing.org>
Date: Fri, 10 Nov 2017 12:15:00 +1100
Subject: powerpc: Fix DABR match on hash based systems
From: Benjamin Herrenschmidt <benh(a)kernel.crashing.org>
commit f23ab3efb1b30cc5c5ef5ae4ef294ed467f30675 upstream.
Commit 398a719d34a1 ("powerpc/mm: Update bits used to skip hash_page")
mistakenly dropped the DSISR_DABRMATCH bit from the mask of bit tested
to skip trying to hash a page.
As a result, the DABR matches would no longer be detected.
This adds it back. We open code it in the 2 places where it matters
rather than fold it into DSISR_BAD_FAULT_32S/64S because this isn't
technically a bad fault and while we would never hit it with the
current code, I prefer if page_fault_is_bad() didn't trigger on these.
Fixes: 398a719d34a1 ("powerpc/mm: Update bits used to skip hash_page")
Cc: stable(a)vger.kernel.org # v4.14
Tested-by: Pedro Miraglia Franco de Carvalho <pedromfc(a)br.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh(a)kernel.crashing.org>
Signed-off-by: Naveen N. Rao <naveen.n.rao(a)linux.vnet.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/powerpc/kernel/exceptions-64s.S | 2 +-
arch/powerpc/kernel/head_32.S | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1617,7 +1617,7 @@ USE_TEXT_SECTION()
.balign IFETCH_ALIGN_BYTES
do_hash_page:
#ifdef CONFIG_PPC_STD_MMU_64
- lis r0,DSISR_BAD_FAULT_64S@h
+ lis r0,(DSISR_BAD_FAULT_64S|DSISR_DABRMATCH)@h
ori r0,r0,DSISR_BAD_FAULT_64S@l
and. r0,r4,r0 /* weird error? */
bne- handle_page_fault /* if not, try to insert a HPTE */
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -388,7 +388,7 @@ DataAccess:
EXCEPTION_PROLOG
mfspr r10,SPRN_DSISR
stw r10,_DSISR(r11)
- andis. r0,r10,DSISR_BAD_FAULT_32S@h
+ andis. r0,r10,(DSISR_BAD_FAULT_32S|DSISR_DABRMATCH)@h
bne 1f /* if not, try to put a PTE */
mfspr r4,SPRN_DAR /* into the hash table */
rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */
Patches currently in stable-queue which might be from benh(a)kernel.crashing.org are
queue-4.14/powerpc-fix-dabr-match-on-hash-based-systems.patch
This is a note to let you know that I've just added the patch titled
ocfs2: try a blocking lock before return AOP_TRUNCATED_PAGE
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
ocfs2-try-a-blocking-lock-before-return-aop_truncated_page.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From ff26cc10aec128c3f86b5611fd5f59c71d49c0e3 Mon Sep 17 00:00:00 2001
From: Gang He <ghe(a)suse.com>
Date: Wed, 31 Jan 2018 16:14:48 -0800
Subject: ocfs2: try a blocking lock before return AOP_TRUNCATED_PAGE
From: Gang He <ghe(a)suse.com>
commit ff26cc10aec128c3f86b5611fd5f59c71d49c0e3 upstream.
If we can't get inode lock immediately in the function
ocfs2_inode_lock_with_page() when reading a page, we should not return
directly here, since this will lead to a softlockup problem when the
kernel is configured with CONFIG_PREEMPT is not set. The method is to
get a blocking lock and immediately unlock before returning, this can
avoid CPU resource waste due to lots of retries, and benefits fairness
in getting lock among multiple nodes, increase efficiency in case
modifying the same file frequently from multiple nodes.
The softlockup crash (when set /proc/sys/kernel/softlockup_panic to 1)
looks like:
Kernel panic - not syncing: softlockup: hung tasks
CPU: 0 PID: 885 Comm: multi_mmap Tainted: G L 4.12.14-6.1-default #1
Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
Call Trace:
<IRQ>
dump_stack+0x5c/0x82
panic+0xd5/0x21e
watchdog_timer_fn+0x208/0x210
__hrtimer_run_queues+0xcc/0x200
hrtimer_interrupt+0xa6/0x1f0
smp_apic_timer_interrupt+0x34/0x50
apic_timer_interrupt+0x96/0xa0
</IRQ>
RIP: 0010:unlock_page+0x17/0x30
RSP: 0000:ffffaf154080bc88 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff10
RAX: dead000000000100 RBX: fffff21e009f5300 RCX: 0000000000000004
RDX: dead0000000000ff RSI: 0000000000000202 RDI: fffff21e009f5300
RBP: 0000000000000000 R08: 0000000000000000 R09: ffffaf154080bb00
R10: ffffaf154080bc30 R11: 0000000000000040 R12: ffff993749a39518
R13: 0000000000000000 R14: fffff21e009f5300 R15: fffff21e009f5300
ocfs2_inode_lock_with_page+0x25/0x30 [ocfs2]
ocfs2_readpage+0x41/0x2d0 [ocfs2]
filemap_fault+0x12b/0x5c0
ocfs2_fault+0x29/0xb0 [ocfs2]
__do_fault+0x1a/0xa0
__handle_mm_fault+0xbe8/0x1090
handle_mm_fault+0xaa/0x1f0
__do_page_fault+0x235/0x4b0
trace_do_page_fault+0x3c/0x110
async_page_fault+0x28/0x30
RIP: 0033:0x7fa75ded638e
RSP: 002b:00007ffd6657db18 EFLAGS: 00010287
RAX: 000055c7662fb700 RBX: 0000000000000001 RCX: 000055c7662fb700
RDX: 0000000000001770 RSI: 00007fa75e909000 RDI: 000055c7662fb700
RBP: 0000000000000003 R08: 000000000000000e R09: 0000000000000000
R10: 0000000000000483 R11: 00007fa75ded61b0 R12: 00007fa75e90a770
R13: 000000000000000e R14: 0000000000001770 R15: 0000000000000000
About performance improvement, we can see the testing time is reduced,
and CPU utilization decreases, the detailed data is as follows. I ran
multi_mmap test case in ocfs2-test package in a three nodes cluster.
Before applying this patch:
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
2754 ocfs2te+ 20 0 170248 6980 4856 D 80.73 0.341 0:18.71 multi_mmap
1505 root rt 0 222236 123060 97224 S 2.658 6.015 0:01.44 corosync
5 root 20 0 0 0 0 S 1.329 0.000 0:00.19 kworker/u8:0
95 root 20 0 0 0 0 S 1.329 0.000 0:00.25 kworker/u8:1
2728 root 20 0 0 0 0 S 0.997 0.000 0:00.24 jbd2/sda1-33
2721 root 20 0 0 0 0 S 0.664 0.000 0:00.07 ocfs2dc-3C8CFD4
2750 ocfs2te+ 20 0 142976 4652 3532 S 0.664 0.227 0:00.28 mpirun
ocfs2test@tb-node2:~>multiple_run.sh -i ens3 -k ~/linux-4.4.21-69.tar.gz -o ~/ocfs2mullog -C hacluster -s pcmk -n tb-node2,tb-node1,tb-node3 -d /dev/sda1 -b 4096 -c 32768 -t multi_mmap /mnt/shared
Tests with "-b 4096 -C 32768"
Thu Dec 28 14:44:52 CST 2017
multi_mmap..................................................Passed.
Runtime 783 seconds.
After apply this patch:
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
2508 ocfs2te+ 20 0 170248 6804 4680 R 54.00 0.333 0:55.37 multi_mmap
155 root 20 0 0 0 0 S 2.667 0.000 0:01.20 kworker/u8:3
95 root 20 0 0 0 0 S 2.000 0.000 0:01.58 kworker/u8:1
2504 ocfs2te+ 20 0 142976 4604 3480 R 1.667 0.225 0:01.65 mpirun
5 root 20 0 0 0 0 S 1.000 0.000 0:01.36 kworker/u8:0
2482 root 20 0 0 0 0 S 1.000 0.000 0:00.86 jbd2/sda1-33
299 root 0 -20 0 0 0 S 0.333 0.000 0:00.13 kworker/2:1H
335 root 0 -20 0 0 0 S 0.333 0.000 0:00.17 kworker/1:1H
535 root 20 0 12140 7268 1456 S 0.333 0.355 0:00.34 haveged
1282 root rt 0 222284 123108 97224 S 0.333 6.017 0:01.33 corosync
ocfs2test@tb-node2:~>multiple_run.sh -i ens3 -k ~/linux-4.4.21-69.tar.gz -o ~/ocfs2mullog -C hacluster -s pcmk -n tb-node2,tb-node1,tb-node3 -d /dev/sda1 -b 4096 -c 32768 -t multi_mmap /mnt/shared
Tests with "-b 4096 -C 32768"
Thu Dec 28 15:04:12 CST 2017
multi_mmap..................................................Passed.
Runtime 487 seconds.
Link: http://lkml.kernel.org/r/1514447305-30814-1-git-send-email-ghe@suse.com
Fixes: 1cce4df04f37 ("ocfs2: do not lock/unlock() inode DLM lock")
Signed-off-by: Gang He <ghe(a)suse.com>
Reviewed-by: Eric Ren <zren(a)suse.com>
Acked-by: alex chen <alex.chen(a)huawei.com>
Acked-by: piaojun <piaojun(a)huawei.com>
Cc: Mark Fasheh <mfasheh(a)versity.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Junxiao Bi <junxiao.bi(a)oracle.com>
Cc: Joseph Qi <jiangqi903(a)gmail.com>
Cc: Changwei Ge <ge.changwei(a)h3c.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
fs/ocfs2/dlmglue.c | 9 +++++++++
1 file changed, 9 insertions(+)
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2486,6 +2486,15 @@ int ocfs2_inode_lock_with_page(struct in
ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK);
if (ret == -EAGAIN) {
unlock_page(page);
+ /*
+ * If we can't get inode lock immediately, we should not return
+ * directly here, since this will lead to a softlockup problem.
+ * The method is to get a blocking lock and immediately unlock
+ * before returning, this can avoid CPU resource waste due to
+ * lots of retries, and benefits fairness in getting lock.
+ */
+ if (ocfs2_inode_lock(inode, ret_bh, ex) == 0)
+ ocfs2_inode_unlock(inode, ex);
ret = AOP_TRUNCATED_PAGE;
}
Patches currently in stable-queue which might be from ghe(a)suse.com are
queue-4.14/ocfs2-try-a-blocking-lock-before-return-aop_truncated_page.patch
This is a note to let you know that I've just added the patch titled
mwifiex: resolve reset vs. remove()/shutdown() deadlocks
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
mwifiex-resolve-reset-vs.-remove-shutdown-deadlocks.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From a64e7a79dd6030479caad603c8d78e6c9c14904f Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris(a)chromium.org>
Date: Fri, 12 Jan 2018 13:08:37 -0800
Subject: mwifiex: resolve reset vs. remove()/shutdown() deadlocks
From: Brian Norris <briannorris(a)chromium.org>
commit a64e7a79dd6030479caad603c8d78e6c9c14904f upstream.
Commit b014e96d1abb ("PCI: Protect pci_error_handlers->reset_notify()
usage with device_lock()") resolves races between driver reset and
removal, but it introduces some new deadlock problems. If we see a
timeout while we've already started suspending, removing, or shutting
down the driver, we might see:
(a) a worker thread, running mwifiex_pcie_work() ->
mwifiex_pcie_card_reset_work() -> pci_reset_function()
(b) a removal thread, running mwifiex_pcie_remove() ->
mwifiex_free_adapter() -> mwifiex_unregister() ->
mwifiex_cleanup_pcie() -> cancel_work_sync(&card->work)
Unfortunately, mwifiex_pcie_remove() already holds the device lock that
pci_reset_function() is now requesting, and so we see a deadlock.
It's necessary to cancel and synchronize our outstanding work before
tearing down the driver, so we can't have this work wait indefinitely
for the lock.
It's reasonable to only "try" to reset here, since this will mostly
happen for cases where it's already difficult to reset the firmware
anyway (e.g., while we're suspending or powering off the system). And if
reset *really* needs to happen, we can always try again later.
Fixes: b014e96d1abb ("PCI: Protect pci_error_handlers->reset_notify() usage with device_lock()")
Cc: <stable(a)vger.kernel.org>
Cc: Xinming Hu <huxm(a)marvell.com>
Signed-off-by: Brian Norris <briannorris(a)chromium.org>
Signed-off-by: Kalle Valo <kvalo(a)codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/net/wireless/marvell/mwifiex/pcie.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
--- a/drivers/net/wireless/marvell/mwifiex/pcie.c
+++ b/drivers/net/wireless/marvell/mwifiex/pcie.c
@@ -2781,7 +2781,10 @@ static void mwifiex_pcie_card_reset_work
{
struct pcie_service_card *card = adapter->card;
- pci_reset_function(card->dev);
+ /* We can't afford to wait here; remove() might be waiting on us. If we
+ * can't grab the device lock, maybe we'll get another chance later.
+ */
+ pci_try_reset_function(card->dev);
}
static void mwifiex_pcie_work(struct work_struct *work)
Patches currently in stable-queue which might be from briannorris(a)chromium.org are
queue-4.14/mwifiex-resolve-reset-vs.-remove-shutdown-deadlocks.patch
This is a note to let you know that I've just added the patch titled
cpufreq: powernv: Dont assume distinct pstate values for nominal and pmin
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
cpufreq-powernv-dont-assume-distinct-pstate-values-for-nominal-and-pmin.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 3fa4680b860bf48b437d6a2c039789c4abe202ae Mon Sep 17 00:00:00 2001
From: Shilpasri G Bhat <shilpa.bhat(a)linux.vnet.ibm.com>
Date: Fri, 12 Jan 2018 12:43:53 +0530
Subject: cpufreq: powernv: Dont assume distinct pstate values for nominal and pmin
From: Shilpasri G Bhat <shilpa.bhat(a)linux.vnet.ibm.com>
commit 3fa4680b860bf48b437d6a2c039789c4abe202ae upstream.
Some OpenPOWER boxes can have same pstate values for nominal and
pmin pstates. In these boxes the current code will not initialize
'powernv_pstate_info.min' variable and result in erroneous CPU
frequency reporting. This patch fixes this problem.
Fixes: 09ca4c9b5958 (cpufreq: powernv: Replacing pstate_id with frequency table index)
Reported-by: Alvin Wang <wangat(a)tw.ibm.com>
Signed-off-by: Shilpasri G Bhat <shilpa.bhat(a)linux.vnet.ibm.com>
Acked-by: Viresh Kumar <viresh.kumar(a)linaro.org>
Cc: 4.8+ <stable(a)vger.kernel.org> # 4.8+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/cpufreq/powernv-cpufreq.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -287,9 +287,9 @@ next:
if (id == pstate_max)
powernv_pstate_info.max = i;
- else if (id == pstate_nominal)
+ if (id == pstate_nominal)
powernv_pstate_info.nominal = i;
- else if (id == pstate_min)
+ if (id == pstate_min)
powernv_pstate_info.min = i;
if (powernv_pstate_info.wof_enabled && id == pstate_turbo) {
Patches currently in stable-queue which might be from shilpa.bhat(a)linux.vnet.ibm.com are
queue-4.14/cpufreq-powernv-dont-assume-distinct-pstate-values-for-nominal-and-pmin.patch