As it stands, memory_failure() gets thoroughly confused by dev_pagemap
backed mappings. The recovery code has specific enabling for several
possible page states and needs new enabling to handle poison in dax
mappings.
In order to support reliable reverse mapping of user space addresses add
new locking in the fsdax implementation to prevent races between
page-address_space disassociation events and the rmap performed in the
memory_failure() path. Additionally, since dev_pagemap pages are hidden
from the page allocator, add a mechanism to determine the size of the
mapping that encompasses a given poisoned pfn. Lastly, since pmem errors
can be repaired, change the speculatively accessed poison protection,
mce_unmap_kpfn(), to be reversible and otherwise allow ongoing access
from the kernel.
---
Dan Williams (11):
device-dax: convert to vmf_insert_mixed and vm_fault_t
device-dax: cleanup vm_fault de-reference chains
device-dax: enable page_mapping()
device-dax: set page->index
filesystem-dax: set page->index
filesystem-dax: perform __dax_invalidate_mapping_entry() under the page lock
mm, madvise_inject_error: fix page count leak
x86, memory_failure: introduce {set,clear}_mce_nospec()
mm, memory_failure: pass page size to kill_proc()
mm, memory_failure: teach memory_failure() about dev_pagemap pages
libnvdimm, pmem: restore page attributes when clearing errors
arch/x86/include/asm/set_memory.h | 29 ++++++
arch/x86/kernel/cpu/mcheck/mce-internal.h | 15 ---
arch/x86/kernel/cpu/mcheck/mce.c | 38 +-------
drivers/dax/device.c | 91 ++++++++++++--------
drivers/nvdimm/pmem.c | 26 ++++++
drivers/nvdimm/pmem.h | 13 +++
fs/dax.c | 102 ++++++++++++++++++++--
include/linux/huge_mm.h | 5 +
include/linux/set_memory.h | 14 +++
mm/huge_memory.c | 4 -
mm/madvise.c | 11 ++
mm/memory-failure.c | 133 +++++++++++++++++++++++++++--
12 files changed, 370 insertions(+), 111 deletions(-)
Add a kernel parameter that allows setting UV memory block size. This
is to provide an adjustment for new forms of PMEM and other DIMM memory
that might require alignment restrictions other than scanning the global
address table for the required minimum alignment. The value set will be
further adjusted by both the GAM range table scan as well as restrictions
imposed by set_memory_block_size_order().
Signed-off-by: Mike Travis <mike.travis(a)hpe.com>
Reviewed-by: Andrew Banman <andrew.banman(a)hpe.com>
Cc: stable(a)vger.kernel.org
---
arch/x86/kernel/apic/x2apic_uv_x.c | 11 +++++++++++
1 file changed, 11 insertions(+)
--- linux.orig/arch/x86/kernel/apic/x2apic_uv_x.c
+++ linux/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -396,6 +396,17 @@ EXPORT_SYMBOL(uv_hub_info_version);
/* Default UV memory block size is 2GB */
static unsigned long mem_block_size = (2UL << 30);
+/* Kernel parameter to specify UV mem block size */
+static int parse_mem_block_size(char *ptr)
+{
+ unsigned long size = memparse(ptr, NULL);
+
+ /* Size will be rounded down by set_block_size() below */
+ mem_block_size = size;
+ return 0;
+}
+early_param("uv_memblksize", parse_mem_block_size);
+
static __init int adj_blksize(u32 lgre)
{
unsigned long base = (unsigned long)lgre << UV_GAM_RANGE_SHFT;
--
Add a call to the new function to "adjust" the current fixed UV memory
block size of 2GB so it can be changed to a different physical boundary.
This accommodates changes in the Intel BIOS, and therefore UV BIOS,
which now can align boundaries different than the previous UV standard
of 2GB. It also flags any UV Global Address boundaries from BIOS that
cause a change in the mem block size (boundary).
The current boundary of 2GB has been used on UV since the first system
release in 2009 with Linux 2.6 and has worked fine. But the new NVDIMM
persistent memory modules (PMEM), along with the Intel BIOS changes to
support these modules caused the memory block size boundary to be set
to a lower limit. Intel only guarantees that this minimum boundary at
64MB though the current Linux limit is 128MB.
Note that the default remains 2GB if no changes occur.
Signed-off-by: Mike Travis <mike.travis(a)hpe.com>
Reviewed-by: Andrew Banman <andrew.banman(a)hpe.com>
Cc: stable(a)vger.kernel.org
---
v2: Update description
---
arch/x86/kernel/apic/x2apic_uv_x.c | 49 ++++++++++++++++++++++++++++++++++---
1 file changed, 46 insertions(+), 3 deletions(-)
--- linux.orig/arch/x86/kernel/apic/x2apic_uv_x.c
+++ linux/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -26,6 +26,7 @@
#include <linux/delay.h>
#include <linux/crash_dump.h>
#include <linux/reboot.h>
+#include <linux/memory.h>
#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h>
@@ -392,6 +393,40 @@ extern int uv_hub_info_version(void)
}
EXPORT_SYMBOL(uv_hub_info_version);
+/* Default UV memory block size is 2GB */
+static unsigned long mem_block_size = (2UL << 30);
+
+static __init int adj_blksize(u32 lgre)
+{
+ unsigned long base = (unsigned long)lgre << UV_GAM_RANGE_SHFT;
+ unsigned long size;
+
+ for (size = mem_block_size; size > MIN_MEMORY_BLOCK_SIZE; size >>= 1)
+ if (IS_ALIGNED(base, size))
+ break;
+
+ if (size >= mem_block_size)
+ return 0;
+
+ mem_block_size = size;
+ return 1;
+}
+
+static __init void set_block_size(void)
+{
+ unsigned int order = ffs(mem_block_size);
+
+ if (order) {
+ /* adjust for ffs return of 1..64 */
+ set_memory_block_size_order(order - 1);
+ pr_info("UV: mem_block_size set to 0x%lx\n", mem_block_size);
+ } else {
+ /* bad or zero value, default to 1UL << 31 (2GB) */
+ pr_err("UV: mem_block_size error with 0x%lx\n", mem_block_size);
+ set_memory_block_size_order(31);
+ }
+}
+
/* Build GAM range lookup table: */
static __init void build_uv_gr_table(void)
{
@@ -1180,23 +1215,30 @@ static void __init decode_gam_rng_tbl(un
<< UV_GAM_RANGE_SHFT);
int order = 0;
char suffix[] = " KMGTPE";
+ int flag = ' ';
while (size > 9999 && order < sizeof(suffix)) {
size /= 1024;
order++;
}
+ /* adjust max block size to current range start */
+ if (gre->type == 1 || gre->type == 2)
+ if (adj_blksize(lgre))
+ flag = '*';
+
if (!index) {
pr_info("UV: GAM Range Table...\n");
- pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN");
+ pr_info("UV: # %20s %14s %6s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN");
}
- pr_info("UV: %2d: 0x%014lx-0x%014lx %5lu%c %3d %04x %02x %02x\n",
+ pr_info("UV: %2d: 0x%014lx-0x%014lx%c %5lu%c %3d %04x %02x %02x\n",
index++,
(unsigned long)lgre << UV_GAM_RANGE_SHFT,
(unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
- size, suffix[order],
+ flag, size, suffix[order],
gre->type, gre->nasid, gre->sockid, gre->pnode);
+ /* update to next range start */
lgre = gre->limit;
if (sock_min > gre->sockid)
sock_min = gre->sockid;
@@ -1427,6 +1469,7 @@ static void __init uv_system_init_hub(vo
build_socket_tables();
build_uv_gr_table();
+ set_block_size();
uv_init_hub_info(&hub_info);
uv_possible_blades = num_possible_nodes();
if (!_node_to_pnode)
--
Add a new function to "adjust" the current fixed UV memory block size
of 2GB so it can be changed to a different physical boundary. This is
out of necessity so arch dependent code can accommodate specific BIOS
requirements which can align these new PMEM modules at less than the
default boundaries.
A "set order" type of function was used to insure that the memory block
size will be a power of two value without requiring a validity check.
64GB was chosen as the upper limit for memory block size values to
accommodate upcoming 4PB systems which have 6 more bits of physical
address space (46 becoming 52).
Signed-off-by: Mike Travis <mike.travis(a)hpe.com>
Reviewed-by: Andrew Banman <andrew.banman(a)hpe.com>
Cc: stable(a)vger.kernel.org
---
v2: Update description
---
arch/x86/mm/init_64.c | 20 ++++++++++++++++----
include/linux/memory.h | 1 +
2 files changed, 17 insertions(+), 4 deletions(-)
--- linux.orig/arch/x86/mm/init_64.c
+++ linux/arch/x86/mm/init_64.c
@@ -1350,16 +1350,28 @@ int kern_addr_valid(unsigned long addr)
/* Amount of ram needed to start using large blocks */
#define MEM_SIZE_FOR_LARGE_BLOCK (64UL << 30)
+/* Adjustable memory block size */
+static unsigned long set_memory_block_size;
+int __init set_memory_block_size_order(unsigned int order)
+{
+ unsigned long size = 1UL << order;
+
+ if (size > MEM_SIZE_FOR_LARGE_BLOCK || size < MIN_MEMORY_BLOCK_SIZE)
+ return -EINVAL;
+
+ set_memory_block_size = size;
+ return 0;
+}
+
static unsigned long probe_memory_block_size(void)
{
unsigned long boot_mem_end = max_pfn << PAGE_SHIFT;
unsigned long bz;
- /* If this is UV system, always set 2G block size */
- if (is_uv_system()) {
- bz = MAX_BLOCK_SIZE;
+ /* If memory block size has been set, then use it */
+ bz = set_memory_block_size;
+ if (bz)
goto done;
- }
/* Use regular block if RAM is smaller than MEM_SIZE_FOR_LARGE_BLOCK */
if (boot_mem_end < MEM_SIZE_FOR_LARGE_BLOCK) {
--- linux.orig/include/linux/memory.h
+++ linux/include/linux/memory.h
@@ -38,6 +38,7 @@ struct memory_block {
int arch_get_memory_phys_device(unsigned long start_pfn);
unsigned long memory_block_size_bytes(void);
+int set_memory_block_size_order(unsigned int order);
/* These states are exposed to userspace as text strings in sysfs */
#define MEM_ONLINE (1<<0) /* exposed to userspace */
--
According to the reference manual the shp_2_mcu / mcu_2_shp
scripts must be used for devices connected through the SPBA.
This fixes an issue we saw with DMA transfers.
Sometimes the SPI controller RX FIFO was not empty after a DMA
transfer and the driver got stuck in the next PIO transfer when
it read one word more than expected.
commit dd4b487b32a35 ("ARM: dts: imx6: Use correct SDMA script
for SPI cores") is fixing the same issue but only for SPI1 - 4.
Fixes: 677940258dd8e ("ARM: dts: imx6q: enable dma for ecspi5")
Signed-off-by: Sean Nyekjaer <sean.nyekjaer(a)prevas.dk>
Reviewed-by: Fabio Estevam <fabio.estevam(a)nxp.com>
---
arch/arm/boot/dts/imx6q.dtsi | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm/boot/dts/imx6q.dtsi b/arch/arm/boot/dts/imx6q.dtsi
index ae7b3f107893..5185300cc11f 100644
--- a/arch/arm/boot/dts/imx6q.dtsi
+++ b/arch/arm/boot/dts/imx6q.dtsi
@@ -96,7 +96,7 @@
clocks = <&clks IMX6Q_CLK_ECSPI5>,
<&clks IMX6Q_CLK_ECSPI5>;
clock-names = "ipg", "per";
- dmas = <&sdma 11 7 1>, <&sdma 12 7 2>;
+ dmas = <&sdma 11 8 1>, <&sdma 12 8 2>;
dma-names = "rx", "tx";
status = "disabled";
};
--
2.17.0
This is the start of the stable review cycle for the 3.18.110 release.
There are 45 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Sat May 26 09:30:59 UTC 2018.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v3.x/stable-review/patch-3.18.110-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-3.18.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 3.18.110-rc1
Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
x86/kexec: Avoid double free_page() upon do_kexec_load() failure
Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
hfsplus: stop workqueue when fill_super() failed
Johannes Berg <johannes.berg(a)intel.com>
cfg80211: limit wiphy names to 128 bytes
Alexander Potapenko <glider(a)google.com>
scsi: sg: allocate with __GFP_ZERO in sg_build_indirect()
Jason Yan <yanaijie(a)huawei.com>
scsi: libsas: defer ata device eh commands to libata
Al Viro <viro(a)zeniv.linux.org.uk>
ext2: fix a block leak
Eric Dumazet <edumazet(a)google.com>
tcp: purge write queue in tcp_connect_init()
Willem de Bruijn <willemb(a)google.com>
net: test tailroom before appending to linear skb
Masami Hiramatsu <mhiramat(a)kernel.org>
ARM: 8772/1: kprobes: Prohibit kprobes on get_user functions
Dexuan Cui <decui(a)microsoft.com>
tick/broadcast: Use for_each_cpu() specially on UP kernels
Masami Hiramatsu <mhiramat(a)kernel.org>
ARM: 8771/1: kprobes: Prohibit kprobes on do_undefinstr
Ard Biesheuvel <ard.biesheuvel(a)linaro.org>
efi: Avoid potential crashes, fix the 'struct efi_pci_io_protocol_32' definition for mixed mode
Nicholas Piggin <npiggin(a)gmail.com>
powerpc/powernv: Fix NVRAM sleep in invalid context when crashing
Steven Rostedt (VMware) <rostedt(a)goodmis.org>
tracing/x86/xen: Remove zero data size trace events trace_xen_mmu_flush_tlb{_all}
Benjamin Herrenschmidt <benh(a)kernel.crashing.org>
powerpc: Don't preempt_disable() in show_cpuinfo()
Wenwen Wang <wang6495(a)umn.edu>
ALSA: control: fix a redundant-copy issue
Federico Cuello <fedux(a)fedux.com.ar>
ALSA: usb: mixer: volume quirk for CM102-A+/102S+
Shuah Khan (Samsung OSG) <shuah(a)kernel.org>
usbip: usbip_host: fix bad unlock balance during stub_probe()
Shuah Khan (Samsung OSG) <shuah(a)kernel.org>
usbip: usbip_host: fix NULL-ptr deref and use-after-free errors
Alexey Khoroshilov <khoroshilov(a)ispras.ru>
usbip: fix error handling in stub_probe()
Shuah Khan (Samsung OSG) <shuah(a)kernel.org>
usbip: usbip_host: run rebind from exit when module is removed
Shuah Khan (Samsung OSG) <shuah(a)kernel.org>
usbip: usbip_host: delete device from busid_table after rebind
Shuah Khan <shuahkh(a)osg.samsung.com>
usbip: usbip_host: refine probe and disconnect debug msgs to be useful
zhongjiang <zhongjiang(a)huawei.com>
kernel/exit.c: avoid undefined behaviour when calling wait4()
Michael Kerrisk (man-pages) <mtk.manpages(a)gmail.com>
pipe: cap initial pipe capacity according to pipe-max-size limit
James Chapman <jchapman(a)katalix.com>
l2tp: revert "l2tp: fix missing print session offset info"
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "ARM: dts: imx6qdl-wandboard: Fix audio channel swap"
Vasily Averin <vvs(a)virtuozzo.com>
lockd: lost rollback of set_grace_period() in lockd_down_net()
Bjørn Mork <bjorn(a)mork.no>
qmi_wwan: do not steal interfaces from class drivers
Xin Long <lucien.xin(a)gmail.com>
sctp: delay the authentication for the duplicated cookie-echo chunk
Xin Long <lucien.xin(a)gmail.com>
sctp: fix the issue that the cookie-ack with auth can't get processed
Yuchung Cheng <ycheng(a)google.com>
tcp: ignore Fast Open on repair mode
Debabrata Banerjee <dbanerje(a)akamai.com>
bonding: do not allow rlb updates to invalid mac
Michael Chan <michael.chan(a)broadcom.com>
tg3: Fix vunmap() BUG_ON() triggered from tg3_free_consistent().
Xin Long <lucien.xin(a)gmail.com>
sctp: use the old asoc when making the cookie-ack chunk in dupcook_d
Heiner Kallweit <hkallweit1(a)gmail.com>
r8169: fix powering up RTL8168h
Lance Richardson <lance.richardson.net(a)gmail.com>
net: support compat 64-bit time in {s,g}etsockopt
Eric Dumazet <edumazet(a)google.com>
net_sched: fq: take care of throttled flows before reuse
Moshe Shemesh <moshe(a)mellanox.com>
net/mlx4_en: Verify coalescing parameters are in range
Rob Taglang <rob(a)taglang.io>
net: ethernet: sun: niu set correct packet size in skb
Eric Dumazet <edumazet(a)google.com>
llc: better deal with too small mtu
Andrey Ignatov <rdna(a)fb.com>
ipv4: fix memory leaks in udp_sendmsg, ping_v4_sendmsg
Eric Dumazet <edumazet(a)google.com>
dccp: fix tasklet usage
Hangbin Liu <liuhangbin(a)gmail.com>
bridge: check iface upper dev when setting master via ioctl
Ingo Molnar <mingo(a)elte.hu>
8139too: Use disable_irq_nosync() in rtl8139_poll_controller()
-------------
Diffstat:
Makefile | 4 +-
arch/arm/boot/dts/imx6qdl-wandboard.dtsi | 1 -
arch/arm/include/asm/assembler.h | 10 +++
arch/arm/kernel/traps.c | 5 +-
arch/arm/lib/getuser.S | 10 +++
arch/powerpc/kernel/setup-common.c | 11 ---
arch/powerpc/platforms/powernv/opal-nvram.c | 14 +++-
arch/x86/boot/compressed/eboot.c | 6 +-
arch/x86/kernel/machine_kexec_32.c | 6 +-
arch/x86/kernel/machine_kexec_64.c | 4 +-
arch/x86/xen/mmu.c | 4 -
drivers/net/bonding/bond_alb.c | 2 +-
drivers/net/ethernet/broadcom/tg3.c | 9 +-
drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 16 ++++
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 7 +-
drivers/net/ethernet/realtek/8139too.c | 2 +-
drivers/net/ethernet/realtek/r8169.c | 3 +
drivers/net/ethernet/sun/niu.c | 5 +-
drivers/net/usb/qmi_wwan.c | 13 +++
drivers/scsi/libsas/sas_scsi_host.c | 33 +++-----
drivers/scsi/sg.c | 2 +-
drivers/usb/usbip/stub.h | 2 +
drivers/usb/usbip/stub_dev.c | 69 +++++++++-------
drivers/usb/usbip/stub_main.c | 105 +++++++++++++++++++++---
fs/ext2/inode.c | 10 ---
fs/hfsplus/super.c | 1 +
fs/lockd/svc.c | 2 +
fs/pipe.c | 3 +
include/linux/efi.h | 8 +-
include/trace/events/xen.h | 16 ----
include/uapi/linux/nl80211.h | 2 +
kernel/exit.c | 4 +
kernel/time/tick-broadcast.c | 8 ++
net/bridge/br_if.c | 4 +-
net/compat.c | 6 +-
net/dccp/ccids/ccid2.c | 14 +++-
net/dccp/timer.c | 2 +-
net/ipv4/ip_output.c | 3 +-
net/ipv4/ping.c | 7 +-
net/ipv4/tcp.c | 2 +-
net/ipv4/tcp_output.c | 7 +-
net/ipv4/udp.c | 7 +-
net/ipv6/ip6_output.c | 3 +-
net/l2tp/l2tp_netlink.c | 2 -
net/llc/af_llc.c | 3 +
net/sched/sch_fq.c | 37 ++++++---
net/sctp/associola.c | 30 ++++++-
net/sctp/inqueue.c | 2 +-
net/sctp/sm_statefuns.c | 89 +++++++++++---------
net/wireless/core.c | 3 +
sound/core/control_compat.c | 3 +-
sound/usb/mixer.c | 8 ++
52 files changed, 427 insertions(+), 202 deletions(-)