The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From cc51e5428ea54f575d49cfcede1d4cb3a72b4ec4 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak(a)linux.intel.com>
Date: Fri, 24 Aug 2018 10:03:50 -0700
Subject: [PATCH] x86/speculation/l1tf: Increase l1tf memory limit for Nehalem+
On Nehalem and newer core CPUs the CPU cache internally uses 44 bits
physical address space. The L1TF workaround is limited by this internal
cache address width, and needs to have one bit free there for the
mitigation to work.
Older client systems report only 36bit physical address space so the range
check decides that L1TF is not mitigated for a 36bit phys/32GB system with
some memory holes.
But since these actually have the larger internal cache width this warning
is bogus because it would only really be needed if the system had more than
43bits of memory.
Add a new internal x86_cache_bits field. Normally it is the same as the
physical bits field reported by CPUID, but for Nehalem and newerforce it to
be at least 44bits.
Change the L1TF memory size warning to use the new cache_bits field to
avoid bogus warnings and remove the bogus comment about memory size.
Fixes: 17dbca119312 ("x86/speculation/l1tf: Add sysfs reporting for l1tf")
Reported-by: George Anchev <studio(a)anchev.net>
Reported-by: Christopher Snowhill <kode54(a)gmail.com>
Signed-off-by: Andi Kleen <ak(a)linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: x86(a)kernel.org
Cc: linux-kernel(a)vger.kernel.org
Cc: Michael Hocko <mhocko(a)suse.com>
Cc: vbabka(a)suse.cz
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/20180824170351.34874-1-andi@firstfloor.org
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c24297268ebc..d53c54b842da 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -132,6 +132,8 @@ struct cpuinfo_x86 {
/* Index into per_cpu list: */
u16 cpu_index;
u32 microcode;
+ /* Address space bits used by the cache internally */
+ u8 x86_cache_bits;
unsigned initialized : 1;
} __randomize_layout;
@@ -183,7 +185,7 @@ extern void cpu_detect(struct cpuinfo_x86 *c);
static inline unsigned long long l1tf_pfn_limit(void)
{
- return BIT_ULL(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT);
+ return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT);
}
extern void early_cpu_init(void);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 4c2313d0b9ca..40bdaea97fe7 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -668,6 +668,45 @@ EXPORT_SYMBOL_GPL(l1tf_mitigation);
enum vmx_l1d_flush_state l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
EXPORT_SYMBOL_GPL(l1tf_vmx_mitigation);
+/*
+ * These CPUs all support 44bits physical address space internally in the
+ * cache but CPUID can report a smaller number of physical address bits.
+ *
+ * The L1TF mitigation uses the top most address bit for the inversion of
+ * non present PTEs. When the installed memory reaches into the top most
+ * address bit due to memory holes, which has been observed on machines
+ * which report 36bits physical address bits and have 32G RAM installed,
+ * then the mitigation range check in l1tf_select_mitigation() triggers.
+ * This is a false positive because the mitigation is still possible due to
+ * the fact that the cache uses 44bit internally. Use the cache bits
+ * instead of the reported physical bits and adjust them on the affected
+ * machines to 44bit if the reported bits are less than 44.
+ */
+static void override_cache_bits(struct cpuinfo_x86 *c)
+{
+ if (c->x86 != 6)
+ return;
+
+ switch (c->x86_model) {
+ case INTEL_FAM6_NEHALEM:
+ case INTEL_FAM6_WESTMERE:
+ case INTEL_FAM6_SANDYBRIDGE:
+ case INTEL_FAM6_IVYBRIDGE:
+ case INTEL_FAM6_HASWELL_CORE:
+ case INTEL_FAM6_HASWELL_ULT:
+ case INTEL_FAM6_HASWELL_GT3E:
+ case INTEL_FAM6_BROADWELL_CORE:
+ case INTEL_FAM6_BROADWELL_GT3E:
+ case INTEL_FAM6_SKYLAKE_MOBILE:
+ case INTEL_FAM6_SKYLAKE_DESKTOP:
+ case INTEL_FAM6_KABYLAKE_MOBILE:
+ case INTEL_FAM6_KABYLAKE_DESKTOP:
+ if (c->x86_cache_bits < 44)
+ c->x86_cache_bits = 44;
+ break;
+ }
+}
+
static void __init l1tf_select_mitigation(void)
{
u64 half_pa;
@@ -675,6 +714,8 @@ static void __init l1tf_select_mitigation(void)
if (!boot_cpu_has_bug(X86_BUG_L1TF))
return;
+ override_cache_bits(&boot_cpu_data);
+
switch (l1tf_mitigation) {
case L1TF_MITIGATION_OFF:
case L1TF_MITIGATION_FLUSH_NOWARN:
@@ -694,11 +735,6 @@ static void __init l1tf_select_mitigation(void)
return;
#endif
- /*
- * This is extremely unlikely to happen because almost all
- * systems have far more MAX_PA/2 than RAM can be fit into
- * DIMM slots.
- */
half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
if (e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) {
pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 84dee5ab745a..44c4ef3d989b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -919,6 +919,7 @@ void get_cpu_address_sizes(struct cpuinfo_x86 *c)
else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
c->x86_phys_bits = 36;
#endif
+ c->x86_cache_bits = c->x86_phys_bits;
}
static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
From: Ulf Hansson <ulf.hansson(a)linaro.org>
In case the PM domain fails to be powered on in genpd_dev_pm_attach(), it
returns -EPROBE_DEFER, but keeping the device attached to its PM domain.
This leads to problems when the next attempt to attach is re-tried. More
precisely, in that situation an -EEXIST error code is returned, because the
device already has its PM domain pointer assigned, from the first attempt.
Now, because of the sloppy error handling by the existing callers of
dev_pm_domain_attach(), probing is allowed to continue when -EEXIST is
returned. However, in such case there are no guarantees that the PM domain
is powered on by genpd, which may lead to hangs when buses/drivers tried to
access their devices.
Let's fix this behaviour, simply by detaching the device when powering on
fails in genpd_dev_pm_attach().
Cc: v4.11+ <stable(a)vger.kernel.org> # v4.11+
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
(cherry picked from commit 72038df3c580c4c326b83c86149d7ac34007532a)
Cherry-picked to imx_4.9.y with minimal conflicts so that we can
properly handle errors from SCFW pm calls
Signed-off-by: Leonard Crestez <leonard.crestez(a)nxp.com>
---
drivers/base/power/domain.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 9c3e535795a0..d9681d372997 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1936,10 +1936,13 @@ int genpd_dev_pm_attach(struct device *dev)
dev->pm_domain->sync = genpd_dev_pm_sync;
mutex_lock(&pd->lock);
ret = genpd_poweron(pd, 0);
mutex_unlock(&pd->lock);
+
+ if (ret)
+ genpd_remove_device(pd, dev);
out:
return ret ? -EPROBE_DEFER : 0;
}
EXPORT_SYMBOL_GPL(genpd_dev_pm_attach);
#endif /* CONFIG_PM_GENERIC_DOMAINS_OF */
--
2.17.1
The steps taken by usb core to set a new interface is very different from
what is done on the xHC host side.
xHC hardware will do everything in one go. One command is used to set up
new endpoints, free old endpoints, check bandwidth, and run the new
endpoints.
All this is done by xHC when usb core asks the hcd to check for
available bandwidth. At this point usb core has not yet flushed the old
endpoints, which will cause use-after-free issues in xhci driver as
queued URBs are cancelled on a re-allocated endpoint.
To resolve this add a call to usb_disable_interface() which will flush
the endpoints before calling usb_hcd_alloc_bandwidth()
Additional checks in xhci driver will also be implemented to gracefully
handle stale URB cancel on freed and re-allocated endpoints
Cc: <stable(a)vger.kernel.org>
Reported-by: Sudip Mukherjee <sudipm.mukherjee(a)gmail.com>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
---
drivers/usb/core/message.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index 228672f..304bef2 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -1377,6 +1377,13 @@ int usb_set_interface(struct usb_device *dev, int interface, int alternate)
return -EINVAL;
}
+ /*
+ * usb3 hosts configure the interface in usb_hcd_alloc_bandwidth,
+ * including freeing dropped endpoint ring buffers.
+ * Make sure the interface endpoints are flushed before that
+ */
+ usb_disable_interface(dev, iface, false);
+
/* Make sure we have enough bandwidth for this alternate interface.
* Remove the current alt setting and add the new alt setting.
*/
--
2.7.4
Hi,
I would like to take the chance to speak with the person who handle your
images in your company?
Our ability of imaging editing.
Cutting out for your images
Clipping path for your images
Masking for your images
Retouching for your images
Retouching for the Portraits images
We have 17 photo editors in house and daily basis 700 images can be done.
We can give testing for your photos if you send us one or two to start
working.
Thanks,
Jason Jones
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From d814a49198eafa6163698bdd93961302f3a877a4 Mon Sep 17 00:00:00 2001
From: Ethan Lien <ethanlien(a)synology.com>
Date: Mon, 2 Jul 2018 15:44:58 +0800
Subject: [PATCH] btrfs: use correct compare function of dirty_metadata_bytes
We use customized, nodesize batch value to update dirty_metadata_bytes.
We should also use batch version of compare function or we will easily
goto fast path and get false result from percpu_counter_compare().
Fixes: e2d845211eda ("Btrfs: use percpu counter for dirty metadata count")
CC: stable(a)vger.kernel.org # 4.4+
Signed-off-by: Ethan Lien <ethanlien(a)synology.com>
Reviewed-by: Nikolay Borisov <nborisov(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6023eed3e805..e3858b2fe014 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -959,8 +959,9 @@ static int btree_writepages(struct address_space *mapping,
fs_info = BTRFS_I(mapping->host)->root->fs_info;
/* this is a bit racy, but that's ok */
- ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes,
- BTRFS_DIRTY_METADATA_THRESH);
+ ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes,
+ BTRFS_DIRTY_METADATA_THRESH,
+ fs_info->dirty_metadata_batch);
if (ret < 0)
return 0;
}
@@ -4134,8 +4135,9 @@ static void __btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info,
if (flush_delayed)
btrfs_balance_delayed_items(fs_info);
- ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes,
- BTRFS_DIRTY_METADATA_THRESH);
+ ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes,
+ BTRFS_DIRTY_METADATA_THRESH,
+ fs_info->dirty_metadata_batch);
if (ret > 0) {
balance_dirty_pages_ratelimited(fs_info->btree_inode->i_mapping);
}
commit 4c4a39dd5fe2d13e2d2fa5fceb8ef95d19fc389a upstream
If there is a mismatch in the I/D min line size, we must
always use the system wide safe value both in applications
and in the kernel, while performing cache operations. However,
we have been checking more bits than just the min line sizes,
which triggers false negatives. We may need to trap the user
accesses in such cases, but not necessarily patch the kernel.
This patch fixes the check to do the right thing as advertised.
A new capability will be added to check mismatches in other
fields and ensure we trap the CTR accesses.
Fixes: be68a8aaf925 ("arm64: cpufeature: Fix CTR_EL0 field definitions")
Cc: <stable(a)vger.kernel.org> # v4.14
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Reported-by: Will Deacon <will.deacon(a)arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose(a)arm.com>
Signed-off-by: Will Deacon <will.deacon(a)arm.com>
---
arch/arm64/include/asm/cache.h | 5 +++++
arch/arm64/kernel/cpu_errata.c | 6 ++++--
arch/arm64/kernel/cpufeature.c | 4 ++--
3 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index ea9bb4e..e40f8a2 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -20,9 +20,14 @@
#define CTR_L1IP_SHIFT 14
#define CTR_L1IP_MASK 3
+#define CTR_DMINLINE_SHIFT 16
+#define CTR_IMINLINE_SHIFT 0
#define CTR_CWG_SHIFT 24
#define CTR_CWG_MASK 15
+#define CTR_CACHE_MINLINE_MASK \
+ (0xf << CTR_DMINLINE_SHIFT | 0xf << CTR_IMINLINE_SHIFT)
+
#define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK)
#define ICACHE_POLICY_VPIPT 0
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index eccdb28..3d1569c 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -48,9 +48,11 @@ static bool
has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry,
int scope)
{
+ u64 mask = CTR_CACHE_MINLINE_MASK;
+
WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
- return (read_cpuid_cachetype() & arm64_ftr_reg_ctrel0.strict_mask) !=
- (arm64_ftr_reg_ctrel0.sys_val & arm64_ftr_reg_ctrel0.strict_mask);
+ return (read_cpuid_cachetype() & mask) !=
+ (arm64_ftr_reg_ctrel0.sys_val & mask);
}
static int cpu_enable_trap_ctr_access(void *__unused)
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 376cf12..003dd39 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -180,14 +180,14 @@ static const struct arm64_ftr_bits ftr_ctr[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 28, 1, 1), /* IDC */
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 20, 4, 0), /* ERG */
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DMINLINE_SHIFT, 4, 1),
/*
* Linux can handle differing I-cache policies. Userspace JITs will
* make use of *minLine.
* If we have differing I-cache policies, report it as the weakest - VIPT.
*/
ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_VIPT), /* L1Ip */
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IMINLINE_SHIFT, 4, 0),
ARM64_FTR_END,
};
--
2.7.4
When a system suffers from dcache aliasing a user program may observe
stale VDSO data from an aliased cache line. Notably this can break the
expectation that clock_gettime(CLOCK_MONOTONIC, ...) is, as its name
suggests, monotonic.
In order to ensure that users observe updates to the VDSO data page as
intended, align the user mappings of the VDSO data page such that their
cache colouring matches that of the virtual address range which the
kernel will use to update the data page - typically its unmapped address
within kseg0.
This ensures that we don't introduce aliasing cache lines for the VDSO
data page, and therefore that userland will observe updates without
requiring cache invalidation.
Signed-off-by: Paul Burton <paul.burton(a)mips.com>
Reported-by: Hauke Mehrtens <hauke(a)hauke-m.de>
Reported-by: Rene Nielsen <rene.nielsen(a)microsemi.com>
Reported-by: Alexandre Belloni <alexandre.belloni(a)bootlin.com>
Fixes: ebb5e78cc634 ("MIPS: Initial implementation of a VDSO")
Cc: James Hogan <jhogan(a)kernel.org>
Cc: linux-mips(a)linux-mips.org
Cc: stable(a)vger.kernel.org # v4.4+
---
Hi Alexandre,
Could you try this out on your Ocelot system? Hopefully it'll solve the
problem just as well as James' patch but doesn't need the questionable
change to arch_get_unmapped_area_common().
Thanks,
Paul
---
arch/mips/kernel/vdso.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
index 019035d7225c..5fb617a42335 100644
--- a/arch/mips/kernel/vdso.c
+++ b/arch/mips/kernel/vdso.c
@@ -13,6 +13,7 @@
#include <linux/err.h>
#include <linux/init.h>
#include <linux/ioport.h>
+#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/slab.h>
@@ -20,6 +21,7 @@
#include <asm/abi.h>
#include <asm/mips-cps.h>
+#include <asm/page.h>
#include <asm/vdso.h>
/* Kernel-provided data used by the VDSO. */
@@ -128,12 +130,30 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
vvar_size = gic_size + PAGE_SIZE;
size = vvar_size + image->size;
+ /*
+ * Find a region that's large enough for us to perform the
+ * colour-matching alignment below.
+ */
+ if (cpu_has_dc_aliases)
+ size += shm_align_mask + 1;
+
base = get_unmapped_area(NULL, 0, size, 0, 0);
if (IS_ERR_VALUE(base)) {
ret = base;
goto out;
}
+ /*
+ * If we suffer from dcache aliasing, ensure that the VDSO data page is
+ * coloured the same as the kernel's mapping of that memory. This
+ * ensures that when the kernel updates the VDSO data userland will see
+ * it without requiring cache invalidations.
+ */
+ if (cpu_has_dc_aliases) {
+ base = __ALIGN_MASK(base, shm_align_mask);
+ base += ((unsigned long)&vdso_data - gic_size) & shm_align_mask;
+ }
+
data_addr = base + gic_size;
vdso_addr = data_addr + PAGE_SIZE;
--
2.18.0
Dear Supplier,
Please kindly confirm back in reply if am onto the right contact
personin your company responsible for inquiries & orders.
Please can you as well send your updated 2018 product
catalog/price listing to our Import Department desk.
Regards
Walter Benson
Purchase Manager
Aharkco Trading Co., LTD.
The patch below does not apply to the 4.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From ad0eaee6195db1db1749dd46b9e6f4466793d178 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo(a)embeddedor.com>
Date: Mon, 6 Aug 2018 07:14:51 -0500
Subject: [PATCH] ASoC: wm8994: Fix missing break in switch
Add missing break statement in order to prevent the code from falling
through to the default case.
Addresses-Coverity-ID: 115050 ("Missing break in switch")
Reported-by: Valdis Kletnieks <valdis.kletnieks(a)vt.edu>
Signed-off-by: Gustavo A. R. Silva <gustavo(a)embeddedor.com>
Acked-by: Charles Keepax <ckeepax(a)opensource.cirrus.com>
Signed-off-by: Mark Brown <broonie(a)kernel.org>
Cc: stable(a)vger.kernel.org
diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
index 62f8c5b9ba92..14f1b0c0d286 100644
--- a/sound/soc/codecs/wm8994.c
+++ b/sound/soc/codecs/wm8994.c
@@ -2432,7 +2432,7 @@ static int wm8994_set_dai_sysclk(struct snd_soc_dai *dai,
snd_soc_component_update_bits(component, WM8994_POWER_MANAGEMENT_2,
WM8994_OPCLK_ENA, 0);
}
- /* fall through */
+ break;
default:
return -EINVAL;
From: Dan Carpenter <dan.carpenter(a)oracle.com>
[ Upstream commit f019f07ecf6a6b8bd6d7853bce70925d90af02d1 ]
The uio_unregister_device() function assumes that if "info->uio_dev" is
non-NULL that means "info" is fully allocated. Setting info->uio_de
has to be the last thing in the function.
In the current code, if request_threaded_irq() fails then we return with
info->uio_dev set to non-NULL but info is not fully allocated and it can
lead to double frees.
Fixes: beafc54c4e2f ("UIO: Add the User IO core code")
Signed-off-by: Dan Carpenter <dan.carpenter(a)oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Sasha Levin <alexander.levin(a)microsoft.com>
---
drivers/uio/uio.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 208bc52fc84d..f0a9ea2740df 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -841,8 +841,6 @@ int __uio_register_device(struct module *owner,
if (ret)
goto err_uio_dev_add_attributes;
- info->uio_dev = idev;
-
if (info->irq && (info->irq != UIO_IRQ_CUSTOM)) {
/*
* Note that we deliberately don't use devm_request_irq
@@ -858,6 +856,7 @@ int __uio_register_device(struct module *owner,
goto err_request_irq;
}
+ info->uio_dev = idev;
return 0;
err_request_irq:
--
2.17.1
From: Ronnie Sahlberg <lsahlber(a)redhat.com>
[ Upstream commit e6c47dd0da1e3a484e778046fc10da0b20606a86 ]
Some SMB2/3 servers, Win2016 but possibly others too, adds padding
not only between PDUs in a compound but also to the final PDU.
This padding extends the PDU to a multiple of 8 bytes.
Check if the unexpected length looks like this might be the case
and avoid triggering the log messages for :
"SMB2 server sent bad RFC1001 len %d not %d\n"
Signed-off-by: Ronnie Sahlberg <lsahlber(a)redhat.com>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
Signed-off-by: Sasha Levin <alexander.levin(a)microsoft.com>
---
fs/cifs/smb2misc.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 3ff7cec2da81..239215dcc00b 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -240,6 +240,13 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *srvr)
if (clc_len == len + 1)
return 0;
+ /*
+ * Some windows servers (win2016) will pad also the final
+ * PDU in a compound to 8 bytes.
+ */
+ if (((clc_len + 7) & ~7) == len)
+ return 0;
+
/*
* MacOS server pads after SMB2.1 write response with 3 bytes
* of junk. Other servers match RFC1001 len to actual
--
2.17.1
From: Ralph Campbell <rcampbell(a)nvidia.com>
Private ZONE_DEVICE pages use a special pte entry and thus are not
present. Properly handle this case in map_pte(), it is already handled
in check_pte(), the map_pte() part was lost in some rebase most probably.
Without this patch the slow migration path can not migrate back private
ZONE_DEVICE memory to regular memory. This was found after stress
testing migration back to system memory. This ultimatly can lead the
CPU to an infinite page fault loop on the special swap entry.
Signed-off-by: Ralph Campbell <rcampbell(a)nvidia.com>
Signed-off-by: Jérôme Glisse <jglisse(a)redhat.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
---
mm/page_vma_mapped.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index ae3c2a35d61b..1cf5b9bfb559 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -21,6 +21,15 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw)
if (!is_swap_pte(*pvmw->pte))
return false;
} else {
+ if (is_swap_pte(*pvmw->pte)) {
+ swp_entry_t entry;
+
+ /* Handle un-addressable ZONE_DEVICE memory */
+ entry = pte_to_swp_entry(*pvmw->pte);
+ if (is_device_private_entry(entry))
+ return true;
+ }
+
if (!pte_present(*pvmw->pte))
return false;
}
--
2.17.1
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 024d83cadc6b2af027e473720f3c3da97496c318 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Date: Sun, 12 Aug 2018 20:41:45 +0200
Subject: [PATCH] KVM: x86: SVM: Call x86_spec_ctrl_set_guest/host() with
interrupts disabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Mikhail reported the following lockdep splat:
WARNING: possible irq lock inversion dependency detected
CPU 0/KVM/10284 just changed the state of lock:
000000000d538a88 (&st->lock){+...}, at:
speculative_store_bypass_update+0x10b/0x170
but this lock was taken by another, HARDIRQ-safe lock
in the past:
(&(&sighand->siglock)->rlock){-.-.}
and interrupts could create inverse lock ordering between them.
Possible interrupt unsafe locking scenario:
CPU0 CPU1
---- ----
lock(&st->lock);
local_irq_disable();
lock(&(&sighand->siglock)->rlock);
lock(&st->lock);
<Interrupt>
lock(&(&sighand->siglock)->rlock);
*** DEADLOCK ***
The code path which connects those locks is:
speculative_store_bypass_update()
ssb_prctl_set()
do_seccomp()
do_syscall_64()
In svm_vcpu_run() speculative_store_bypass_update() is called with
interupts enabled via x86_virt_spec_ctrl_set_guest/host().
This is actually a false positive, because GIF=0 so interrupts are
disabled even if IF=1; however, we can easily move the invocations of
x86_virt_spec_ctrl_set_guest/host() into the interrupt disabled region to
cure it, and it's a good idea to keep the GIF=0/IF=1 area as small
and self-contained as possible.
Fixes: 1f50ddb4f418 ("x86/speculation: Handle HT correctly on AMD")
Reported-by: Mikhail Gavrilov <mikhail.v.gavrilov(a)gmail.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Tested-by: Mikhail Gavrilov <mikhail.v.gavrilov(a)gmail.com>
Cc: Joerg Roedel <joro(a)8bytes.org>
Cc: Paolo Bonzini <pbonzini(a)redhat.com>
Cc: Radim Krčmář <rkrcmar(a)redhat.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Borislav Petkov <bp(a)suse.de>
Cc: Konrad Rzeszutek Wilk <konrad.wilk(a)oracle.com>
Cc: Tom Lendacky <thomas.lendacky(a)amd.com>
Cc: kvm(a)vger.kernel.org
Cc: x86(a)kernel.org
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 73e27a98456f..6276140044d0 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -5586,8 +5586,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
clgi();
- local_irq_enable();
-
/*
* If this vCPU has touched SPEC_CTRL, restore the guest's value if
* it's non-zero. Since vmentry is serialising on affected CPUs, there
@@ -5596,6 +5594,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
*/
x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
+ local_irq_enable();
+
asm volatile (
"push %%" _ASM_BP "; \n\t"
"mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
@@ -5718,12 +5718,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
- x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
-
reload_tss(vcpu);
local_irq_disable();
+ x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
+
vcpu->arch.cr2 = svm->vmcb->save.cr2;
vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
In the __getcpu function, lsl was using the wrong target
and destination registers. Luckily, the compiler tends to
choose %eax for both variables, so it has been working
so far.
Cc: x86(a)kernel.org
Cc: stable(a)vger.kernel.org
Signed-off-by: Samuel Neves <sneves(a)dei.uc.pt>
---
arch/x86/include/asm/vgtod.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index fb856c9f0449..53748541c487 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -93,7 +93,7 @@ static inline unsigned int __getcpu(void)
*
* If RDPID is available, use it.
*/
- alternative_io ("lsl %[p],%[seg]",
+ alternative_io ("lsl %[seg],%[p]",
".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
X86_FEATURE_RDPID,
[p] "=a" (p), [seg] "r" (__PER_CPU_SEG));
--
2.17.1
In the __getcpu function, lsl was using the wrong target
and destination registers. Luckily, the compiler tends to
choose %eax for both variables, so it has been working
so far.
Signed-off-by: Samuel Neves <sneves(a)dei.uc.pt>
---
arch/x86/include/asm/vgtod.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index fb856c9f0449..53748541c487 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -93,7 +93,7 @@ static inline unsigned int __getcpu(void)
*
* If RDPID is available, use it.
*/
- alternative_io ("lsl %[p],%[seg]",
+ alternative_io ("lsl %[seg],%[p]",
".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
X86_FEATURE_RDPID,
[p] "=a" (p), [seg] "r" (__PER_CPU_SEG));
--
2.17.1
From: Jason Wang <jasowang(a)redhat.com>
commit b196d88aba8ac72b775137854121097f4c4c6862 upstream.
We used to initialize ptr_ring during TUNSETIFF, this is because its
size depends on the tx_queue_len of netdevice. And we try to clean it
up when socket were detached from netdevice. A race were spotted when
trying to do uninit during a read which will lead a use after free for
pointer ring. Solving this by always initialize a zero size ptr_ring
in open() and do resizing during TUNSETIFF, and then we can safely do
cleanup during close(). With this, there's no need for the workaround
that was introduced by commit 4df0bfc79904 ("tun: fix a memory leak
for tfile->tx_array").
Backport Note :-
This is a backport of following 2 upstream patches(the second fixes the
first).
b196d88aba ("tun: fix use after free for ptr_ring")
7063efd33b ("tuntap: fix use after free during release")
Comparison with the upstream patch:
[1] A "semantic revert" of the changes made in
4df0bfc799("tun: fix a memory leak for tfile->tx_array").
4df0bfc799 was applied upstream, and then skb array was changed
to use ptr_ring. The upstream fix then removes the changes introduced
by 4df0bfc799. This backport does the same; "revert" the changes
made by 4df0bfc799.
[2] xdp_rxq_info_unreg() being called in relevant locations
As xdp_rxq_info related patches are not present in 4.14, these
changes are not needed in the backport.
[3] An instance of ptr_ring_init needs to be replaced by skb_array_init.
[4] ptr_ring_cleanup needs to be replaced by skb_array_cleanup.
b196d88ab places the cleanup function in tun_chr_close() only to
later move it into __tun_detach in upstream commit
7063efd33bb("tuntap: fix use after free during release"). So
place skb_array_cleanup in __tun_detach.
Reported-by: syzbot+e8b902c3c3fadf0a9dba(a)syzkaller.appspotmail.com
Cc: Eric Dumazet <eric.dumazet(a)gmail.com>
Cc: Cong Wang <xiyou.wangcong(a)gmail.com>
Cc: Michael S. Tsirkin <mst(a)redhat.com>
Fixes: 1576d9860599 ("tun: switch to use skb array for tx")
Signed-off-by: Jason Wang <jasowang(a)redhat.com>
Acked-by: Michael S. Tsirkin <mst(a)redhat.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Zubin Mithra <zsm(a)chromium.org>
---
drivers/net/tun.c | 21 +++++++--------------
1 file changed, 7 insertions(+), 14 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index cb17ffadfc30..e0baea2dfd3c 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -534,14 +534,6 @@ static void tun_queue_purge(struct tun_file *tfile)
skb_queue_purge(&tfile->sk.sk_error_queue);
}
-static void tun_cleanup_tx_array(struct tun_file *tfile)
-{
- if (tfile->tx_array.ring.queue) {
- skb_array_cleanup(&tfile->tx_array);
- memset(&tfile->tx_array, 0, sizeof(tfile->tx_array));
- }
-}
-
static void __tun_detach(struct tun_file *tfile, bool clean)
{
struct tun_file *ntfile;
@@ -583,7 +575,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
tun->dev->reg_state == NETREG_REGISTERED)
unregister_netdevice(tun->dev);
}
- tun_cleanup_tx_array(tfile);
+ skb_array_cleanup(&tfile->tx_array);
sock_put(&tfile->sk);
}
}
@@ -623,13 +615,11 @@ static void tun_detach_all(struct net_device *dev)
/* Drop read queue */
tun_queue_purge(tfile);
sock_put(&tfile->sk);
- tun_cleanup_tx_array(tfile);
}
list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
tun_enable_queue(tfile);
tun_queue_purge(tfile);
sock_put(&tfile->sk);
- tun_cleanup_tx_array(tfile);
}
BUG_ON(tun->numdisabled != 0);
@@ -675,7 +665,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filte
}
if (!tfile->detached &&
- skb_array_init(&tfile->tx_array, dev->tx_queue_len, GFP_KERNEL)) {
+ skb_array_resize(&tfile->tx_array, dev->tx_queue_len, GFP_KERNEL)) {
err = -ENOMEM;
goto out;
}
@@ -2624,6 +2614,11 @@ static int tun_chr_open(struct inode *inode, struct file * file)
&tun_proto, 0);
if (!tfile)
return -ENOMEM;
+ if (skb_array_init(&tfile->tx_array, 0, GFP_KERNEL)) {
+ sk_free(&tfile->sk);
+ return -ENOMEM;
+ }
+
RCU_INIT_POINTER(tfile->tun, NULL);
tfile->flags = 0;
tfile->ifindex = 0;
@@ -2644,8 +2639,6 @@ static int tun_chr_open(struct inode *inode, struct file * file)
sock_set_flag(&tfile->sk, SOCK_ZEROCOPY);
- memset(&tfile->tx_array, 0, sizeof(tfile->tx_array));
-
return 0;
}
--
2.19.0.rc0.228.g281dcd1b4d0-goog
clang-7 has a new warning (-Wreturn-stack-address) for warning when a
function returns the address of a local variable. This is in general a
good warning, but the kernel has a few places where GNU statement
expressions return the address of a label in order to get the current
instruction pointer (see _THIS_IP_ and current_text_addr).
In order to disable a warning at a single call site, the kernel already
has __diag macros for inserting compiler and compiler-version specific
_Pragma's.
This series adds CLANG_VERSION macros necessary for proper __diag
support, and whitelists the case in _THIS_IP_. current_text_addr will be
consolidated in a follow up series.
Nick Desaulniers (2):
compiler-clang.h: Add CLANG_VERSION and __diag macros
kernel.h: Disable -Wreturn-stack-address for _THIS_IP_
include/linux/compiler-clang.h | 19 +++++++++++++++++++
include/linux/compiler_types.h | 4 ++++
include/linux/kernel.h | 10 +++++++++-
3 files changed, 32 insertions(+), 1 deletion(-)
--
2.18.0.345.g5c9ce644c3-goog
4.4.y, 4.9.y:
fs/cifs/cifsfs.c: In function 'cifs_statfs':
fs/cifs/cifsfs.c:198:27: error: 'struct cifs_tcon' has no member named 'vol_serial_number'
fs/cifs/cifsfs.c:200:45: error: 'struct cifs_tcon' has no member named 'vol_create_time'
4.14.y, 4.18.y:
kernel/printk/printk_safe.o: In function `vprintk_func':
kernel/printk/printk_safe.c:386: undefined reference to `vprintk_store'
kernel/printk/printk_safe.c:388: undefined reference to `defer_console_output'
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 86658b819cd0a9aa584cd84453ed268a6f013770 Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punit.agrawal(a)arm.com>
Date: Mon, 13 Aug 2018 11:43:50 +0100
Subject: [PATCH] KVM: arm/arm64: Skip updating PMD entry if no change
Contention on updating a PMD entry by a large number of vcpus can lead
to duplicate work when handling stage 2 page faults. As the page table
update follows the break-before-make requirement of the architecture,
it can lead to repeated refaults due to clearing the entry and
flushing the tlbs.
This problem is more likely when -
* there are large number of vcpus
* the mapping is large block mapping
such as when using PMD hugepages (512MB) with 64k pages.
Fix this by skipping the page table update if there is no change in
the entry being updated.
Cc: stable(a)vger.kernel.org
Fixes: ad361f093c1e ("KVM: ARM: Support hugetlbfs backed huge pages")
Reviewed-by: Suzuki Poulose <suzuki.poulose(a)arm.com>
Acked-by: Christoffer Dall <christoffer.dall(a)arm.com>
Signed-off-by: Punit Agrawal <punit.agrawal(a)arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier(a)arm.com>
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 97d27cd9c654..13dfe36501aa 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1044,19 +1044,35 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
pmd = stage2_get_pmd(kvm, cache, addr);
VM_BUG_ON(!pmd);
- /*
- * Mapping in huge pages should only happen through a fault. If a
- * page is merged into a transparent huge page, the individual
- * subpages of that huge page should be unmapped through MMU
- * notifiers before we get here.
- *
- * Merging of CompoundPages is not supported; they should become
- * splitting first, unmapped, merged, and mapped back in on-demand.
- */
- VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
-
old_pmd = *pmd;
if (pmd_present(old_pmd)) {
+ /*
+ * Multiple vcpus faulting on the same PMD entry, can
+ * lead to them sequentially updating the PMD with the
+ * same value. Following the break-before-make
+ * (pmd_clear() followed by tlb_flush()) process can
+ * hinder forward progress due to refaults generated
+ * on missing translations.
+ *
+ * Skip updating the page table if the entry is
+ * unchanged.
+ */
+ if (pmd_val(old_pmd) == pmd_val(*new_pmd))
+ return 0;
+
+ /*
+ * Mapping in huge pages should only happen through a
+ * fault. If a page is merged into a transparent huge
+ * page, the individual subpages of that huge page
+ * should be unmapped through MMU notifiers before we
+ * get here.
+ *
+ * Merging of CompoundPages is not supported; they
+ * should become splitting first, unmapped, merged,
+ * and mapped back in on-demand.
+ */
+ VM_BUG_ON(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd));
+
pmd_clear(pmd);
kvm_tlb_flush_vmid_ipa(kvm, addr);
} else {
commit ID: f1ed3df20d2d223e0852cc4ac1f19bba869a7e3c
Please merge this patch into stable tree (already exist in Linus’ tree).
The initial patch submission lacked "Cc: stable(a)vger.kernel.org" by
mistake. The kernel versions that should get patch:
4.19
4.18
4.14
>From f1ed3df20d2d223e0852cc4ac1f19bba869a7e3c Mon Sep 17 00:00:00 2001
From: Michal Wnukowski <wnukowski(a)google.com>
Date: Wed, 15 Aug 2018 15:51:57 -0700
Subject: nvme-pci: add a memory barrier to nvme_dbbuf_update_and_check_event
In many architectures loads may be reordered with older stores to
different locations. In the nvme driver the following two operations
could be reordered:
- Write shadow doorbell (dbbuf_db) into memory.
- Read EventIdx (dbbuf_ei) from memory.
This can result in a potential race condition between driver and VM host
processing requests (if given virtual NVMe controller has a support for
shadow doorbell). If that occurs, then the NVMe controller may decide to
wait for MMIO doorbell from guest operating system, and guest driver may
decide not to issue MMIO doorbell on any of subsequent commands.
This issue is purely timing-dependent one, so there is no easy way to
reproduce it. Currently the easiest known approach is to run "Oracle IO
Numbers" (orion) that is shipped with Oracle DB:
orion -run advanced -num_large 0 -size_small 8 -type rand -simulate \
concat -write 40 -duration 120 -matrix row -testname nvme_test
Where nvme_test is a .lun file that contains a list of NVMe block
devices to run test against. Limiting number of vCPUs assigned to given
VM instance seems to increase chances for this bug to occur. On test
environment with VM that got 4 NVMe drives and 1 vCPU assigned the
virtual NVMe controller hang could be observed within 10-20 minutes.
That correspond to about 400-500k IO operations processed (or about
100GB of IO read/writes).
Orion tool was used as a validation and set to run in a loop for 36
hours (equivalent of pushing 550M IO operations). No issues were
observed. That suggest that the patch fixes the issue.
Fixes: f9f38e33389c ("nvme: improve performance for virtual NVMe devices")
Signed-off-by: Michal Wnukowski <wnukowski(a)google.com>
Reviewed-by: Keith Busch <keith.busch(a)intel.com>
Reviewed-by: Sagi Grimberg <sagi(a)grimberg.me>
[hch: updated changelog and comment a bit]
Signed-off-by: Christoph Hellwig <hch(a)lst.de>
---
drivers/nvme/host/pci.c | 8 ++++++++
1 file changed, 8 insertions(+)
(limited to 'drivers/nvme/host/pci.c')
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 1b9951d2067e..d668682f91df 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -316,6 +316,14 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db,
old_value = *dbbuf_db;
*dbbuf_db = value;
+ /*
+ * Ensure that the doorbell is updated before reading the event
+ * index from memory. The controller needs to provide similar
+ * ordering to ensure the envent index is updated before reading
+ * the doorbell.
+ */
+ mb();
+
if (!nvme_dbbuf_need_event(*dbbuf_ei, value, old_value))
return false;
}
--
cgit 1.2-0.3.lf.el7
From: Randy Dunlap <rdunlap(a)infradead.org>
When $DEPMOD is not found, only print a warning instead of exiting
with an error message and error status:
Warning: 'make modules_install' requires /sbin/depmod. Please install it.
This is probably in the kmod package.
Change the Error to a Warning because "not all build hosts for cross
compiling Linux are Linux systems and are able to provide a working
port of depmod, especially at the file patch /sbin/depmod."
I.e., "make modules_install" may be used to copy/install the
loadable modules files to a target directory on a build system and
then transferred to an embedded device where /sbin/depmod is run
instead of it being run on the build system.
Fixes: 934193a654c1 ("kbuild: verify that $DEPMOD is installed")
Signed-off-by: Randy Dunlap <rdunlap(a)infradead.org>
Reported-by: H. Nikolaus Schaller <hns(a)goldelico.com>
Cc: stable(a)vger.kernel.org
Cc: Lucas De Marchi <lucas.demarchi(a)profusion.mobi>
Cc: Lucas De Marchi <lucas.de.marchi(a)gmail.com>
Cc: Michal Marek <michal.lkml(a)markovi.net>
Cc: Jessica Yu <jeyu(a)kernel.org>
Cc: Chih-Wei Huang <cwhuang(a)linux.org.tw>
Cc: H. Nikolaus Schaller <hns(a)goldelico.com>
---
v2: add missing "exit 0" and update the commit message (no Error).
v3: add Fixes: and Cc: stable
v4: add Reported-by: and more explanation for the patch.
scripts/depmod.sh | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- lnx-418.orig/scripts/depmod.sh
+++ lnx-418/scripts/depmod.sh
@@ -15,9 +15,9 @@ if ! test -r System.map ; then
fi
if [ -z $(command -v $DEPMOD) ]; then
- echo "'make modules_install' requires $DEPMOD. Please install it." >&2
+ echo "Warning: 'make modules_install' requires $DEPMOD. Please install it." >&2
echo "This is probably in the kmod package." >&2
- exit 1
+ exit 0
fi
# older versions of depmod require the version string to start with three
On 08/31/2018 10:38 AM, Kalle Valo wrote:
> Larry Finger <Larry.Finger(a)lwfinger.net> wrote:
>
>> In commit 66cffd6daab7 ("b43: fix transmit failure when VT is switched"),
>> a condition is noted where the network controller needs to be reset. Note
>> that this situation happens when running the open-source firmware
>> (http://netweb.ing.unibs.it/~openfwwf/), plus a number of other special
>> conditions.
>>
>> for a different card model, it is reported that this change breaks
>> operation running the proprietary firmware
>> (https://marc.info/?l=linux-wireless&m=153504546924558&w=2). Rather
>> than reverting the previous patch, the code is tweaked to avoid the
>> reset unless the open-source firmware is being used.
>>
>> Fixes: 66cffd6daab7 ("b43: fix transmit failure when VT is switched")
>> Cc: Stable <stable(a)vger.kernel.org> # 4.18+
>> Cc: Taketo Kabe <kabe(a)sra-tohoku.co.jp>
>> Reported-and-tested-by: D. Prabhu <d.praabhu(a)gmail.com>
>> Signed-off-by: Larry Finger <Larry.Finger(a)lwfinger.net>
>
> I'll change the title to something more descriptive:
>
> b43: fix DMA error related regression with proprietary firmware
>
> Does that make sense?
Yes, that is fine.
Larry
This is the start of the stable review cycle for the 4.4.106 release.
There are 105 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Sun Dec 17 09:22:39 UTC 2017.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.4.106-rc1.gz
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.4.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.4.106-rc1
Vincent Pelletier <plr.vincent(a)gmail.com>
usb: gadget: ffs: Forbid usb_ep_alloc_request from sleeping
Marc Zyngier <marc.zyngier(a)arm.com>
arm: KVM: Fix VTTBR_BADDR_MASK BUG_ON off-by-one
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "x86/mm/pat: Ensure cpa->pfn only contains page frame numbers"
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "x86/efi: Hoist page table switching code into efi_call_virt()"
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "x86/efi: Build our own page table structures"
Eric Dumazet <edumazet(a)google.com>
net/packet: fix a race in packet_bind() and packet_notifier()
Mike Maloney <maloney(a)google.com>
packet: fix crash in fanout_demux_rollover()
Hangbin Liu <liuhangbin(a)gmail.com>
sit: update frag_off info
Håkon Bugge <Haakon.Bugge(a)oracle.com>
rds: Fix NULL pointer dereference in __rds_rdma_map
Jon Maloy <jon.maloy(a)ericsson.com>
tipc: fix memory leak in tipc_accept_from_sock()
Al Viro <viro(a)zeniv.linux.org.uk>
more bio_map_user_iov() leak fixes
Heiko Carstens <heiko.carstens(a)de.ibm.com>
s390: always save and restore all registers on context switch
Masamitsu Yamazaki <m-yamazaki(a)ah.jp.nec.com>
ipmi: Stop timers before cleaning up the module
Paul Moore <paul(a)paul-moore.com>
audit: ensure that 'audit=1' actually enables audit for PID 1
Keefe Liu <liuqifa(a)huawei.com>
ipvlan: fix ipv6 outbound device
David Howells <dhowells(a)redhat.com>
afs: Connect up the CB.ProbeUuid
Majd Dibbiny <majd(a)mellanox.com>
IB/mlx5: Assign send CQ and recv CQ of UMR QP
Mark Bloch <markb(a)mellanox.com>
IB/mlx4: Increase maximal message size under UD QP
Herbert Xu <herbert(a)gondor.apana.org.au>
xfrm: Copy policy family in clone_policy
Jason Baron <jbaron(a)akamai.com>
jump_label: Invoke jump_label_test() via early_initcall()
Arvind Yadav <arvind.yadav.cs(a)gmail.com>
atm: horizon: Fix irq release error
Xin Long <lucien.xin(a)gmail.com>
sctp: use the right sk after waking up from wait_buf sleep
Xin Long <lucien.xin(a)gmail.com>
sctp: do not free asoc when it is already dead in sctp_sendmsg
Pavel Tatashin <pasha.tatashin(a)oracle.com>
sparc64/mm: set fields in deferred pages
Ming Lei <ming.lei(a)redhat.com>
block: wake up all tasks blocked in get_request()
Chuck Lever <chuck.lever(a)oracle.com>
sunrpc: Fix rpc_task_begin trace point
Trond Myklebust <trond.myklebust(a)primarydata.com>
NFS: Fix a typo in nfs_rename()
Randy Dunlap <rdunlap(a)infradead.org>
dynamic-debug-howto: fix optional/omitted ending line number to be LARGE instead of 0
Stephen Bates <sbates(a)raithlin.com>
lib/genalloc.c: make the avail variable an atomic_long_t
Xin Long <lucien.xin(a)gmail.com>
route: update fnhe_expires for redirect when the fnhe exists
Xin Long <lucien.xin(a)gmail.com>
route: also update fnhe_genid when updating a route cache
Ben Hutchings <ben.hutchings(a)codethink.co.uk>
mac80211_hwsim: Fix memory leak in hwsim_new_radio_nl()
Masahiro Yamada <yamada.masahiro(a)socionext.com>
kbuild: pkg: use --transform option to prefix paths in tar
Jérémy Lefaure <jeremy.lefaure(a)lse.epita.fr>
EDAC, i5000, i5400: Fix definition of NRECMEMB register
Jérémy Lefaure <jeremy.lefaure(a)lse.epita.fr>
EDAC, i5000, i5400: Fix use of MTR_DRAM_WIDTH macro
Alexey Kardashevskiy <aik(a)ozlabs.ru>
powerpc/powernv/ioda2: Gracefully fail if too many TCE levels requested
Jim Qu <Jim.Qu(a)amd.com>
drm/amd/amdgpu: fix console deadlock if late init failed
Jan Kara <jack(a)suse.cz>
axonram: Fix gendisk handling
Florian Westphal <fw(a)strlen.de>
netfilter: don't track fragmented packets
Johannes Thumshirn <jthumshirn(a)suse.de>
zram: set physical queue limits to avoid array out of bounds accesses
Chris Brandt <chris.brandt(a)renesas.com>
i2c: riic: fix restart condition
Krzysztof Kozlowski <krzk(a)kernel.org>
crypto: s5p-sss - Fix completing crypto request in IRQ handler
WANG Cong <xiyou.wangcong(a)gmail.com>
ipv6: reorder icmpv6_init() and ip6_mr_init()
Michal Schmidt <mschmidt(a)redhat.com>
bnx2x: do not rollback VF MAC/VLAN filters we did not configure
Michal Schmidt <mschmidt(a)redhat.com>
bnx2x: fix possible overrun of VFPF multicast addresses array
Michal Schmidt <mschmidt(a)redhat.com>
bnx2x: prevent crash when accessing PTP with interface down
Blomme, Maarten <Maarten.Blomme(a)flir.com>
spi_ks8995: fix "BUG: key accdaa28 not in .data!"
Mark Rutland <mark.rutland(a)arm.com>
arm64: KVM: Survive unknown traps from guests
Mark Rutland <mark.rutland(a)arm.com>
arm: KVM: Survive unknown traps from guests
Wanpeng Li <wanpeng.li(a)hotmail.com>
KVM: nVMX: reset nested_run_pending if the vCPU is going to be reset
Franck Demathieu <fdemathieu(a)gmail.com>
irqchip/crossbar: Fix incorrect type of register size
James Smart <jsmart2021(a)gmail.com>
scsi: lpfc: Fix crash during Hardware error recovery on SLI3 adapters
Tejun Heo <tj(a)kernel.org>
workqueue: trigger WARN if queue_delayed_work() is called with NULL @wq
Tejun Heo <tj(a)kernel.org>
libata: drop WARN from protocol error in ata_sff_qc_issue()
Jim Mattson <jmattson(a)google.com>
kvm: nVMX: VMCLEAR should not cause the vCPU to shut down
Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
USB: gadgetfs: Fix a potential memory leak in 'dev_config()'
John Keeping <john(a)metanate.com>
usb: gadget: configs: plug memory leak
Daniel Drake <drake(a)endlessm.com>
HID: chicony: Add support for another ASUS Zen AiO keyboard
Phil Reid <preid(a)electromag.com.au>
gpio: altera: Use handle_level_irq when configured as a level_high
Guenter Roeck <linux(a)roeck-us.net>
ARM: OMAP2+: Release device node after it is no longer needed.
Guenter Roeck <linux(a)roeck-us.net>
ARM: OMAP2+: Fix device node reference counts
David Daney <david.daney(a)cavium.com>
module: set __jump_table alignment to 8
Sachin Sant <sachinp(a)linux.vnet.ibm.com>
selftest/powerpc: Fix false failures for skipped tests
Thomas Gleixner <tglx(a)linutronix.de>
x86/hpet: Prevent might sleep splat on resume
Ladislav Michl <ladis(a)linux-mips.org>
ARM: OMAP2+: gpmc-onenand: propagate error on initialization failure
Steffen Klassert <steffen.klassert(a)secunet.com>
vti6: Don't report path MTU below IPV6_MIN_MTU.
Sasha Levin <alexander.levin(a)verizon.com>
Revert "s390/kbuild: enable modversions for symbols exported from asm"
Sasha Levin <alexander.levin(a)verizon.com>
Revert "spi: SPI_FSL_DSPI should depend on HAS_DMA"
Sasha Levin <alexander.levin(a)verizon.com>
Revert "drm/armada: Fix compile fail"
Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
mm: drop unused pmdp_huge_get_and_clear_notify()
Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
thp: fix MADV_DONTNEED vs. numa balancing race
Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
thp: reduce indentation level in change_huge_pmd()
Stephen Hemminger <stephen(a)networkplumber.org>
scsi: storvsc: Workaround for virtual DVD SCSI version
Russell King <rmk+kernel(a)armlinux.org.uk>
ARM: avoid faulting on qemu
Russell King <rmk+kernel(a)armlinux.org.uk>
ARM: BUG if jumping to usermode address in kernel mode
Dave Martin <Dave.Martin(a)arm.com>
arm64: fpsimd: Prevent registers leaking from dead tasks
Andrew Honig <ahonig(a)google.com>
KVM: VMX: remove I/O port 0x80 bypass on Intel hosts
Kristina Martsenko <kristina.martsenko(a)arm.com>
arm64: KVM: fix VTTBR_BADDR_MASK BUG_ON off-by-one
Laurent Caumont <lcaumont2(a)gmail.com>
media: dvb: i2c transfers over usb cannot be done from stack
Marek Szyprowski <m.szyprowski(a)samsung.com>
drm/exynos: gem: Drop NONCONTIG flag for buffers allocated without IOMMU
Dave Gordon <david.s.gordon(a)intel.com>
drm: extra printk() wrapper macros
Daniel Thompson <daniel.thompson(a)linaro.org>
kdb: Fix handling of kallsyms_symbol_next() return value
Heiko Carstens <heiko.carstens(a)de.ibm.com>
s390: fix compat system call table
Robin Murphy <robin.murphy(a)arm.com>
iommu/vt-d: Fix scatterlist offset handling
Jaejoong Kim <climbbb.kim(a)gmail.com>
ALSA: usb-audio: Add check return value for usb_string()
Jaejoong Kim <climbbb.kim(a)gmail.com>
ALSA: usb-audio: Fix out-of-bound error
Takashi Iwai <tiwai(a)suse.de>
ALSA: seq: Remove spurious WARN_ON() at timer check
Robb Glasser <rglasser(a)google.com>
ALSA: pcm: prevent UAF in snd_pcm_info
Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
x86/PCI: Make broadcom_postcore_init() check acpi_disabled
Eric Biggers <ebiggers(a)google.com>
X.509: reject invalid BIT STRING for subjectPublicKey
Eric Biggers <ebiggers(a)google.com>
ASN.1: check for error from ASN1_OP_END__ACT actions
Eric Biggers <ebiggers(a)google.com>
ASN.1: fix out-of-bounds read when parsing indefinite length item
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
efi: Move some sysfs files to be read-only by root
Huacai Chen <chenhc(a)lemote.com>
scsi: libsas: align sata_device's rps_resp on a cacheline
William Breathitt Gray <vilhelm.gray(a)gmail.com>
isa: Prevent NULL dereference in isa_bus driver callbacks
Paul Meyer <Paul.Meyer(a)microsoft.com>
hv: kvp: Avoid reading past allocated blocks from KVP file
weiping zhang <zwp10758(a)gmail.com>
virtio: release virtio index when fail to device_register
Martin Kelly <mkelly(a)xevo.com>
can: usb_8dev: cancel urb on -EPIPE and -EPROTO
Martin Kelly <mkelly(a)xevo.com>
can: esd_usb2: cancel urb on -EPIPE and -EPROTO
Martin Kelly <mkelly(a)xevo.com>
can: ems_usb: cancel urb on -EPIPE and -EPROTO
Martin Kelly <mkelly(a)xevo.com>
can: kvaser_usb: cancel urb on -EPIPE and -EPROTO
Jimmy Assarsson <jimmyassarsson(a)gmail.com>
can: kvaser_usb: ratelimit errors if incomplete messages are received
Jimmy Assarsson <jimmyassarsson(a)gmail.com>
can: kvaser_usb: Fix comparison bug in kvaser_usb_read_bulk_callback()
Jimmy Assarsson <jimmyassarsson(a)gmail.com>
can: kvaser_usb: free buf in error paths
Oliver Stäbler <oliver.staebler(a)bytesatwork.ch>
can: ti_hecc: Fix napi poll return value for repoll
-------------
Diffstat:
Makefile | 4 +-
arch/arm/include/asm/assembler.h | 18 +++
arch/arm/include/asm/kvm_arm.h | 4 +-
arch/arm/kernel/entry-header.S | 6 +
arch/arm/kvm/handle_exit.c | 19 +--
arch/arm/mach-omap2/gpmc-onenand.c | 10 +-
arch/arm/mach-omap2/omap_hwmod_3xxx_data.c | 25 ++--
arch/arm64/include/asm/kvm_arm.h | 3 +-
arch/arm64/kernel/process.c | 9 ++
arch/arm64/kvm/handle_exit.c | 19 +--
arch/powerpc/platforms/powernv/pci-ioda.c | 3 +
arch/powerpc/sysdev/axonram.c | 5 +-
arch/s390/include/asm/asm-prototypes.h | 8 --
arch/s390/include/asm/switch_to.h | 19 ++-
arch/s390/kernel/syscalls.S | 6 +-
arch/sparc/mm/init_64.c | 9 +-
arch/x86/include/asm/efi.h | 26 ----
arch/x86/kernel/hpet.c | 2 +-
arch/x86/kvm/vmx.c | 31 ++---
arch/x86/mm/pageattr.c | 17 ++-
arch/x86/pci/broadcom_bus.c | 2 +-
arch/x86/platform/efi/efi.c | 39 +++---
arch/x86/platform/efi/efi_32.c | 5 -
arch/x86/platform/efi/efi_64.c | 137 ++++++----------------
arch/x86/platform/efi/efi_stub_64.S | 43 +++++++
block/bio.c | 14 ++-
block/blk-core.c | 4 +-
crypto/asymmetric_keys/x509_cert_parser.c | 2 +
drivers/ata/libata-sff.c | 1 -
drivers/atm/horizon.c | 2 +-
drivers/base/isa.c | 10 +-
drivers/block/zram/zram_drv.c | 2 +
drivers/char/ipmi/ipmi_si_intf.c | 44 +++----
drivers/crypto/s5p-sss.c | 5 +-
drivers/edac/i5000_edac.c | 8 +-
drivers/edac/i5400_edac.c | 9 +-
drivers/firmware/efi/efi.c | 3 +-
drivers/firmware/efi/esrt.c | 15 +--
drivers/firmware/efi/runtime-map.c | 10 +-
drivers/gpio/gpio-altera.c | 26 ++--
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +-
drivers/gpu/drm/armada/Makefile | 2 -
drivers/gpu/drm/exynos/exynos_drm_gem.c | 9 ++
drivers/hid/Kconfig | 4 +-
drivers/hid/hid-chicony.c | 1 +
drivers/hid/hid-core.c | 1 +
drivers/hid/hid-ids.h | 1 +
drivers/i2c/busses/i2c-riic.c | 6 +-
drivers/infiniband/hw/mlx4/qp.c | 2 +-
drivers/infiniband/hw/mlx5/main.c | 2 +
drivers/iommu/intel-iommu.c | 8 +-
drivers/irqchip/irq-crossbar.c | 8 +-
drivers/media/usb/dvb-usb/dibusb-common.c | 16 ++-
drivers/memory/omap-gpmc.c | 4 +-
drivers/net/can/ti_hecc.c | 3 +
drivers/net/can/usb/ems_usb.c | 2 +
drivers/net/can/usb/esd_usb2.c | 2 +
drivers/net/can/usb/kvaser_usb.c | 13 +-
drivers/net/can/usb/usb_8dev.c | 2 +
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 20 +++-
drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 8 +-
drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h | 1 +
drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c | 23 ++--
drivers/net/ipvlan/ipvlan_core.c | 2 +-
drivers/net/phy/spi_ks8995.c | 1 +
drivers/net/wireless/mac80211_hwsim.c | 5 +-
drivers/scsi/lpfc/lpfc_els.c | 14 ++-
drivers/scsi/storvsc_drv.c | 27 +++--
drivers/spi/Kconfig | 1 -
drivers/usb/gadget/configfs.c | 1 +
drivers/usb/gadget/function/f_fs.c | 2 +-
drivers/usb/gadget/legacy/inode.c | 4 +-
drivers/virtio/virtio.c | 2 +
fs/afs/cmservice.c | 3 +
fs/nfs/dir.c | 2 +-
include/drm/drmP.h | 26 +++-
include/linux/genalloc.h | 3 +-
include/linux/mmu_notifier.h | 13 --
include/linux/omap-gpmc.h | 5 +-
include/linux/sysfs.h | 6 +
include/scsi/libsas.h | 2 +-
kernel/audit.c | 10 +-
kernel/debug/kdb/kdb_io.c | 2 +-
kernel/jump_label.c | 2 +-
kernel/workqueue.c | 1 +
lib/asn1_decoder.c | 49 ++++----
lib/dynamic_debug.c | 4 +
lib/genalloc.c | 10 +-
mm/huge_memory.c | 82 +++++++++----
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 4 +
net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | 5 -
net/ipv4/route.c | 14 ++-
net/ipv6/af_inet6.c | 10 +-
net/ipv6/ip6_vti.c | 8 +-
net/ipv6/sit.c | 1 +
net/packet/af_packet.c | 37 +++---
net/packet/internal.h | 1 -
net/rds/rdma.c | 2 +-
net/sctp/socket.c | 38 ++++--
net/sunrpc/sched.c | 3 +-
net/tipc/server.c | 1 +
net/xfrm/xfrm_policy.c | 1 +
scripts/module-common.lds | 2 +
scripts/package/Makefile | 5 +-
sound/core/pcm.c | 2 +
sound/core/seq/seq_timer.c | 2 +-
sound/usb/mixer.c | 13 +-
tools/hv/hv_kvp_daemon.c | 70 +++--------
tools/testing/selftests/powerpc/harness.c | 6 +-
109 files changed, 701 insertions(+), 570 deletions(-)
The following commit in 4.18.y causes the build to fail:
79764192e1c4 ("printk/nmi: Prevent deadlock when accessing the main log buffer in NMI")
with:
| kernel/printk/printk_safe.o: In function `vprintk_func':
| printk_safe.c:(.text+0x51b): undefined reference to `vprintk_store'
| printk_safe.c:(.text+0x52f): undefined reference to `defer_console_output'
Picking up these from mainline seems to do the trick:
a338f84dc196 ("printk: Create helper function to queue deferred console handling")
ba552399954d ("printk: Split the code for storing a message into the log buffer")
Thanks,
Dan
From: Rishabh Bhatnagar <rishabhb(a)codeaurora.org>
When calling request_firmware_into_buf() with the FW_OPT_NOCACHE flag
it is expected that firmware is loaded into buffer from memory.
But inside alloc_lookup_fw_priv every new firmware that is loaded is
added to the firmware cache (fwc) list head. So if any driver requests
a firmware that is already loaded the code iterates over the above
mentioned list and it can end up giving a pointer to other device driver's
firmware buffer.
Also the existing copy may either be modified by drivers, remote processors
or even freed. This causes a potential security issue with batched requests
when using request_firmware_into_buf.
Fix alloc_lookup_fw_priv to not add to the fwc head list if FW_OPT_NOCACHE
is set, and also don't do the lookup in the list.
Fixes: 0e742e9275 ("firmware: provide infrastructure to make fw caching optional")
[mcgrof: broken since feature introduction on v4.8]
Cc: stable(a)vger.kernel.org # v4.8+
Signed-off-by: Vikram Mulukutla <markivx(a)codeaurora.org>
Signed-off-by: Rishabh Bhatnagar <rishabhb(a)codeaurora.org>
Signed-off-by: Luis Chamberlain <mcgrof(a)kernel.org>
---
This has been tested with the self test firmware scrip and found no
regressions.
drivers/base/firmware_loader/main.c | 30 +++++++++++++++++------------
1 file changed, 18 insertions(+), 12 deletions(-)
diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c
index 0943e7065e0e..b3c0498ee433 100644
--- a/drivers/base/firmware_loader/main.c
+++ b/drivers/base/firmware_loader/main.c
@@ -209,21 +209,24 @@ static struct fw_priv *__lookup_fw_priv(const char *fw_name)
static int alloc_lookup_fw_priv(const char *fw_name,
struct firmware_cache *fwc,
struct fw_priv **fw_priv, void *dbuf,
- size_t size)
+ size_t size, enum fw_opt opt_flags)
{
struct fw_priv *tmp;
spin_lock(&fwc->lock);
- tmp = __lookup_fw_priv(fw_name);
- if (tmp) {
- kref_get(&tmp->ref);
- spin_unlock(&fwc->lock);
- *fw_priv = tmp;
- pr_debug("batched request - sharing the same struct fw_priv and lookup for multiple requests\n");
- return 1;
+ if (!(opt_flags & FW_OPT_NOCACHE)) {
+ tmp = __lookup_fw_priv(fw_name);
+ if (tmp) {
+ kref_get(&tmp->ref);
+ spin_unlock(&fwc->lock);
+ *fw_priv = tmp;
+ pr_debug("batched request - sharing the same struct fw_priv and lookup for multiple requests\n");
+ return 1;
+ }
}
+
tmp = __allocate_fw_priv(fw_name, fwc, dbuf, size);
- if (tmp)
+ if (tmp && !(opt_flags & FW_OPT_NOCACHE))
list_add(&tmp->list, &fwc->head);
spin_unlock(&fwc->lock);
@@ -493,7 +496,8 @@ int assign_fw(struct firmware *fw, struct device *device,
*/
static int
_request_firmware_prepare(struct firmware **firmware_p, const char *name,
- struct device *device, void *dbuf, size_t size)
+ struct device *device, void *dbuf, size_t size,
+ enum fw_opt opt_flags)
{
struct firmware *firmware;
struct fw_priv *fw_priv;
@@ -511,7 +515,8 @@ _request_firmware_prepare(struct firmware **firmware_p, const char *name,
return 0; /* assigned */
}
- ret = alloc_lookup_fw_priv(name, &fw_cache, &fw_priv, dbuf, size);
+ ret = alloc_lookup_fw_priv(name, &fw_cache, &fw_priv, dbuf, size,
+ opt_flags);
/*
* bind with 'priv' now to avoid warning in failure path
@@ -571,7 +576,8 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
goto out;
}
- ret = _request_firmware_prepare(&fw, name, device, buf, size);
+ ret = _request_firmware_prepare(&fw, name, device, buf, size,
+ opt_flags);
if (ret <= 0) /* error or already assigned */
goto out;
--
2.18.0
Make sure the cancelled URB is on the current endpoint ring.
If the endpoint ring has been reallocated since the URB was enqueued
then the URB may contain TD and TRB pointers to a already freed ring.
In this the case return the URB without touching any of the freed ring
structure data.
Don't try to stop the ring. It would be useless.
This can occur if endpoint is not flushed before it is dropped and
re-added, which is the case in usb_set_interface() as xhci does
things in an odd order.
Cc: <stable(a)vger.kernel.org>
Tested-by: Sudip Mukherjee <sudipm.mukherjee(a)gmail.com>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
---
drivers/usb/host/xhci.c | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 61f48b1..0420eef 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -37,6 +37,21 @@ static unsigned long long quirks;
module_param(quirks, ullong, S_IRUGO);
MODULE_PARM_DESC(quirks, "Bit flags for quirks to be enabled as default");
+static bool td_on_ring(struct xhci_td *td, struct xhci_ring *ring)
+{
+ struct xhci_segment *seg = ring->first_seg;
+
+ if (!td || !td->start_seg)
+ return false;
+ do {
+ if (seg == td->start_seg)
+ return true;
+ seg = seg->next;
+ } while (seg && seg != ring->first_seg);
+
+ return false;
+}
+
/* TODO: copied from ehci-hcd.c - can this be refactored? */
/*
* xhci_handshake - spin reading hc until handshake completes or fails
@@ -1571,6 +1586,21 @@ static int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
goto done;
}
+ /*
+ * check ring is not re-allocated since URB was enqueued. If it is, then
+ * make sure none of the ring related pointers in this URB private data
+ * are touched, such as td_list, otherwise we overwrite freed data
+ */
+ if (!td_on_ring(&urb_priv->td[0], ep_ring)) {
+ xhci_err(xhci, "Canceled URB td not found on endpoint ring");
+ for (i = urb_priv->num_tds_done; i < urb_priv->num_tds; i++) {
+ td = &urb_priv->td[i];
+ if (!list_empty(&td->cancelled_td_list))
+ list_del_init(&td->cancelled_td_list);
+ }
+ goto err_giveback;
+ }
+
if (xhci->xhc_state & XHCI_STATE_HALTED) {
xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
"HC halted, freeing TD manually.");
--
2.7.4
Hi,
Have you received my email from last week?
I would like to speak with the person who manage your photos for your
company?
We are here to provide you all kinds of imaging editing.
What we can provide you:
Cutting out for photos
Clipping path for photos
Masking for photos
Retouching for your photos
Retouching for the Beauty Model and Portraits.
We have 20 staffs in house and daily basis 1000 images can be processed.
We give testing for your photos.
Thanks,
Jimmy Wilson
Hi,
Have you received my email from last week?
I would like to speak with the person who manage your photos for your
company?
We are here to provide you all kinds of imaging editing.
What we can provide you:
Cutting out for photos
Clipping path for photos
Masking for photos
Retouching for your photos
Retouching for the Beauty Model and Portraits.
We have 20 staffs in house and daily basis 1000 images can be processed.
We give testing for your photos.
Thanks,
Jimmy Wilson
Hi,
Have you received my email from last week?
I would like to speak with the person who manage your photos for your
company?
We are here to provide you all kinds of imaging editing.
What we can provide you:
Cutting out for photos
Clipping path for photos
Masking for photos
Retouching for your photos
Retouching for the Beauty Model and Portraits.
We have 20 staffs in house and daily basis 1000 images can be processed.
We give testing for your photos.
Thanks,
Jimmy Wilson
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 976d34e2dab10ece5ea8fe7090b7692913f89084 Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punit.agrawal(a)arm.com>
Date: Mon, 13 Aug 2018 11:43:51 +0100
Subject: [PATCH] KVM: arm/arm64: Skip updating PTE entry if no change
When there is contention on faulting in a particular page table entry
at stage 2, the break-before-make requirement of the architecture can
lead to additional refaulting due to TLB invalidation.
Avoid this by skipping a page table update if the new value of the PTE
matches the previous value.
Cc: stable(a)vger.kernel.org
Fixes: d5d8184d35c9 ("KVM: ARM: Memory virtualization setup")
Reviewed-by: Suzuki Poulose <suzuki.poulose(a)arm.com>
Acked-by: Christoffer Dall <christoffer.dall(a)arm.com>
Signed-off-by: Punit Agrawal <punit.agrawal(a)arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier(a)arm.com>
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 13dfe36501aa..91aaf73b00df 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1147,6 +1147,10 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
/* Create 2nd stage page table mapping - Level 3 */
old_pte = *pte;
if (pte_present(old_pte)) {
+ /* Skip page table update if there is no change */
+ if (pte_val(old_pte) == pte_val(*new_pte))
+ return 0;
+
kvm_set_pte(pte, __pte(0));
kvm_tlb_flush_vmid_ipa(kvm, addr);
} else {
On Bay Trail and Cherry Trail devices we set the pm_disabled flag for I2C
busses which the OS shares with the PUNIT as these need special handling.
Until now we called dev_pm_syscore_device(dev, true) for I2C controllers
with this flag set to keep these I2C controllers always on.
After commit 12864ff8545f ("ACPI / LPSS: Avoid PM quirks on suspend and
resume from hibernation"), this no longer works. This commit modifies
lpss_iosf_exit_d3_state() to only run if lpss_iosf_enter_d3_state() has ran
before it, so that it does not run on a resume from hibernate (or from S3).
On these systems the conditions for lpss_iosf_enter_d3_state() to run
never become true, so lpss_iosf_exit_d3_state() never gets called and
the 2 LPSS DMA controllers never get forced into D0 mode, instead they
are left in their default automatic power-on when needed mode.
The not forcing of D0 mode for the DMA controllers enables these systems
to properly enter S0ix modes, which is a good thing.
But after entering S0ix modes the I2C controller connected to the PMIC
no longer works, leading to e.g. broken battery monitoring.
The _PS3 method for this I2C controller looks like this:
Method (_PS3, 0, NotSerialized) // _PS3: Power State 3
{
If ((((PMID == 0x04) || (PMID == 0x05)) || (PMID == 0x06)))
{
Return (Zero)
}
PSAT |= 0x03
Local0 = PSAT /* \_SB_.I2C5.PSAT */
}
Where PMID = 0x05, so we enter the Return (Zero) path on these systems.
So even if we were to not call dev_pm_syscore_device(dev, true) the
I2C controller will be left in D0 rather then be switched to D3.
Yet on other Bay and Cherry Trail devices S0ix is not entered unless *all*
I2C controllers are in D3 mode. This combined with the I2C controller no
longer working now that we reach S0ix states on these systems leads to me
believing that the PUNIT itself puts the I2C controller in D3 when all
other conditions for entering S0ix states are true.
Since now the I2C controller is put in D3 over a suspend/resume we must
re-initialize it afterwards and that does indeed fix it no longer working.
This commit implements this fix by:
1) Making the suspend_late callback a no-op if pm_disabled is set and
making the resume_early callback skip the clock re-enable (since it now was
not disabled) while still doing the necessary I2C controller re-init.
2) Removing the dev_pm_syscore_device(dev, true) call, so that the suspend
and resume callbacks are actually called. Normally this would cause the
ACPI pm code to call _PS3 putting the I2C controller in D3, wreaking havoc
since it is shared with the PUNIT, but in this special case the _PS3 method
is a no-op so we can safely allow a "fake" suspend / resume.
Fixes: 12864ff8545f ("ACPI / LPSS: Avoid PM quirks on suspend and resume ...")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=200861
Cc: 4.15+ <stable(a)vger.kernel.org> # 4.15+
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
drivers/i2c/busses/i2c-designware-master.c | 1 -
drivers/i2c/busses/i2c-designware-platdrv.c | 7 ++++++-
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c
index 27436a937492..54b2a3a86677 100644
--- a/drivers/i2c/busses/i2c-designware-master.c
+++ b/drivers/i2c/busses/i2c-designware-master.c
@@ -693,7 +693,6 @@ int i2c_dw_probe(struct dw_i2c_dev *dev)
i2c_set_adapdata(adap, dev);
if (dev->pm_disabled) {
- dev_pm_syscore_device(dev->dev, true);
irq_flags = IRQF_NO_SUSPEND;
} else {
irq_flags = IRQF_SHARED | IRQF_COND_SUSPEND;
diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index 5660daf6c92e..d281d21cdd8e 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -448,6 +448,9 @@ static int dw_i2c_plat_suspend(struct device *dev)
{
struct dw_i2c_dev *i_dev = dev_get_drvdata(dev);
+ if (i_dev->pm_disabled)
+ return 0;
+
i_dev->disable(i_dev);
i2c_dw_prepare_clk(i_dev, false);
@@ -458,7 +461,9 @@ static int dw_i2c_plat_resume(struct device *dev)
{
struct dw_i2c_dev *i_dev = dev_get_drvdata(dev);
- i2c_dw_prepare_clk(i_dev, true);
+ if (!i_dev->pm_disabled)
+ i2c_dw_prepare_clk(i_dev, true);
+
i_dev->init(i_dev);
return 0;
--
2.19.0.rc0
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 5ad356eabc47d26a92140a0c4b20eba471c10de3 Mon Sep 17 00:00:00 2001
From: Greg Hackmann <ghackmann(a)android.com>
Date: Wed, 15 Aug 2018 12:51:21 -0700
Subject: [PATCH] arm64: mm: check for upper PAGE_SHIFT bits in pfn_valid()
ARM64's pfn_valid() shifts away the upper PAGE_SHIFT bits of the input
before seeing if the PFN is valid. This leads to false positives when
some of the upper bits are set, but the lower bits match a valid PFN.
For example, the following userspace code looks up a bogus entry in
/proc/kpageflags:
int pagemap = open("/proc/self/pagemap", O_RDONLY);
int pageflags = open("/proc/kpageflags", O_RDONLY);
uint64_t pfn, val;
lseek64(pagemap, [...], SEEK_SET);
read(pagemap, &pfn, sizeof(pfn));
if (pfn & (1UL << 63)) { /* valid PFN */
pfn &= ((1UL << 55) - 1); /* clear flag bits */
pfn |= (1UL << 55);
lseek64(pageflags, pfn * sizeof(uint64_t), SEEK_SET);
read(pageflags, &val, sizeof(val));
}
On ARM64 this causes the userspace process to crash with SIGSEGV rather
than reading (1 << KPF_NOPAGE). kpageflags_read() treats the offset as
valid, and stable_page_flags() will try to access an address between the
user and kernel address ranges.
Fixes: c1cc1552616d ("arm64: MMU initialisation")
Cc: stable(a)vger.kernel.org
Signed-off-by: Greg Hackmann <ghackmann(a)google.com>
Signed-off-by: Will Deacon <will.deacon(a)arm.com>
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 325cfb3b858a..811f9f8b3bb0 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -287,7 +287,11 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
#ifdef CONFIG_HAVE_ARCH_PFN_VALID
int pfn_valid(unsigned long pfn)
{
- return memblock_is_map_memory(pfn << PAGE_SHIFT);
+ phys_addr_t addr = pfn << PAGE_SHIFT;
+
+ if ((addr >> PAGE_SHIFT) != pfn)
+ return 0;
+ return memblock_is_map_memory(addr);
}
EXPORT_SYMBOL(pfn_valid);
#endif
From: Ronnie Sahlberg <lsahlber(a)redhat.com>
[ Upstream commit e6c47dd0da1e3a484e778046fc10da0b20606a86 ]
Some SMB2/3 servers, Win2016 but possibly others too, adds padding
not only between PDUs in a compound but also to the final PDU.
This padding extends the PDU to a multiple of 8 bytes.
Check if the unexpected length looks like this might be the case
and avoid triggering the log messages for :
"SMB2 server sent bad RFC1001 len %d not %d\n"
Signed-off-by: Ronnie Sahlberg <lsahlber(a)redhat.com>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
Signed-off-by: Sasha Levin <alexander.levin(a)microsoft.com>
---
fs/cifs/smb2misc.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index b35c1398d459..494438195a1d 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -182,6 +182,13 @@ smb2_check_message(char *buf, unsigned int length)
if (clc_len == 4 + len + 1)
return 0;
+ /*
+ * Some windows servers (win2016) will pad also the final
+ * PDU in a compound to 8 bytes.
+ */
+ if (((clc_len + 7) & ~7) == len)
+ return 0;
+
/*
* MacOS server pads after SMB2.1 write response with 3 bytes
* of junk. Other servers match RFC1001 len to actual
--
2.17.1
From: Ronnie Sahlberg <lsahlber(a)redhat.com>
[ Upstream commit e6c47dd0da1e3a484e778046fc10da0b20606a86 ]
Some SMB2/3 servers, Win2016 but possibly others too, adds padding
not only between PDUs in a compound but also to the final PDU.
This padding extends the PDU to a multiple of 8 bytes.
Check if the unexpected length looks like this might be the case
and avoid triggering the log messages for :
"SMB2 server sent bad RFC1001 len %d not %d\n"
Signed-off-by: Ronnie Sahlberg <lsahlber(a)redhat.com>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
Signed-off-by: Sasha Levin <alexander.levin(a)microsoft.com>
---
fs/cifs/smb2misc.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 76ccf20fbfb7..0e62bf1ebbd7 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -184,6 +184,13 @@ smb2_check_message(char *buf, unsigned int length)
if (clc_len == 4 + len + 1)
return 0;
+ /*
+ * Some windows servers (win2016) will pad also the final
+ * PDU in a compound to 8 bytes.
+ */
+ if (((clc_len + 7) & ~7) == len)
+ return 0;
+
/*
* MacOS server pads after SMB2.1 write response with 3 bytes
* of junk. Other servers match RFC1001 len to actual
--
2.17.1
From: Ronnie Sahlberg <lsahlber(a)redhat.com>
[ Upstream commit e6c47dd0da1e3a484e778046fc10da0b20606a86 ]
Some SMB2/3 servers, Win2016 but possibly others too, adds padding
not only between PDUs in a compound but also to the final PDU.
This padding extends the PDU to a multiple of 8 bytes.
Check if the unexpected length looks like this might be the case
and avoid triggering the log messages for :
"SMB2 server sent bad RFC1001 len %d not %d\n"
Signed-off-by: Ronnie Sahlberg <lsahlber(a)redhat.com>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
Signed-off-by: Sasha Levin <alexander.levin(a)microsoft.com>
---
fs/cifs/smb2misc.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 967dfe656ced..e96a74da756f 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -208,6 +208,13 @@ smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr)
if (clc_len == 4 + len + 1)
return 0;
+ /*
+ * Some windows servers (win2016) will pad also the final
+ * PDU in a compound to 8 bytes.
+ */
+ if (((clc_len + 7) & ~7) == len)
+ return 0;
+
/*
* MacOS server pads after SMB2.1 write response with 3 bytes
* of junk. Other servers match RFC1001 len to actual
--
2.17.1
From: Ronnie Sahlberg <lsahlber(a)redhat.com>
[ Upstream commit e6c47dd0da1e3a484e778046fc10da0b20606a86 ]
Some SMB2/3 servers, Win2016 but possibly others too, adds padding
not only between PDUs in a compound but also to the final PDU.
This padding extends the PDU to a multiple of 8 bytes.
Check if the unexpected length looks like this might be the case
and avoid triggering the log messages for :
"SMB2 server sent bad RFC1001 len %d not %d\n"
Signed-off-by: Ronnie Sahlberg <lsahlber(a)redhat.com>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
Signed-off-by: Sasha Levin <alexander.levin(a)microsoft.com>
---
fs/cifs/smb2misc.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 7b08a1446a7f..efdfdb47a7dd 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -211,6 +211,13 @@ smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr)
if (clc_len == 4 + len + 1)
return 0;
+ /*
+ * Some windows servers (win2016) will pad also the final
+ * PDU in a compound to 8 bytes.
+ */
+ if (((clc_len + 7) & ~7) == len)
+ return 0;
+
/*
* MacOS server pads after SMB2.1 write response with 3 bytes
* of junk. Other servers match RFC1001 len to actual
--
2.17.1
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 9af0b3d1257756394ebbd06b14937b557e3a756b Mon Sep 17 00:00:00 2001
From: Wang Shilong <wshilong(a)ddn.com>
Date: Sun, 29 Jul 2018 17:27:45 -0400
Subject: [PATCH] ext4: fix race when setting the bitmap corrupted flag
Whenever we hit block or inode bitmap corruptions we set
bit and then reduce this block group free inode/clusters
counter to expose right available space.
However some of ext4_mark_group_bitmap_corrupted() is called
inside group spinlock, some are not, this could make it happen
that we double reduce one block group free counters from system.
Always hold group spinlock for it could fix it, but it looks
a little heavy, we could use test_and_set_bit() to fix race
problems here.
Signed-off-by: Wang Shilong <wshilong(a)ddn.com>
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Cc: stable(a)vger.kernel.org
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d4a218ba626c..f7750bc5b85a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -795,26 +795,26 @@ void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_group_info *grp = ext4_get_group_info(sb, group);
struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
+ int ret;
- if ((flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) &&
- !EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) {
- percpu_counter_sub(&sbi->s_freeclusters_counter,
- grp->bb_free);
- set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
- &grp->bb_state);
+ if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) {
+ ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
+ &grp->bb_state);
+ if (!ret)
+ percpu_counter_sub(&sbi->s_freeclusters_counter,
+ grp->bb_free);
}
- if ((flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) &&
- !EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
- if (gdp) {
+ if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) {
+ ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
+ &grp->bb_state);
+ if (!ret && gdp) {
int count;
count = ext4_free_inodes_count(sb, gdp);
percpu_counter_sub(&sbi->s_freeinodes_counter,
count);
}
- set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
- &grp->bb_state);
}
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 9af0b3d1257756394ebbd06b14937b557e3a756b Mon Sep 17 00:00:00 2001
From: Wang Shilong <wshilong(a)ddn.com>
Date: Sun, 29 Jul 2018 17:27:45 -0400
Subject: [PATCH] ext4: fix race when setting the bitmap corrupted flag
Whenever we hit block or inode bitmap corruptions we set
bit and then reduce this block group free inode/clusters
counter to expose right available space.
However some of ext4_mark_group_bitmap_corrupted() is called
inside group spinlock, some are not, this could make it happen
that we double reduce one block group free counters from system.
Always hold group spinlock for it could fix it, but it looks
a little heavy, we could use test_and_set_bit() to fix race
problems here.
Signed-off-by: Wang Shilong <wshilong(a)ddn.com>
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Cc: stable(a)vger.kernel.org
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d4a218ba626c..f7750bc5b85a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -795,26 +795,26 @@ void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_group_info *grp = ext4_get_group_info(sb, group);
struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
+ int ret;
- if ((flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) &&
- !EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) {
- percpu_counter_sub(&sbi->s_freeclusters_counter,
- grp->bb_free);
- set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
- &grp->bb_state);
+ if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) {
+ ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
+ &grp->bb_state);
+ if (!ret)
+ percpu_counter_sub(&sbi->s_freeclusters_counter,
+ grp->bb_free);
}
- if ((flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) &&
- !EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
- if (gdp) {
+ if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) {
+ ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
+ &grp->bb_state);
+ if (!ret && gdp) {
int count;
count = ext4_free_inodes_count(sb, gdp);
percpu_counter_sub(&sbi->s_freeinodes_counter,
count);
}
- set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
- &grp->bb_state);
}
}
The patch below does not apply to the 3.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a4d2aadca184ece182418950d45ba4ffc7b652d2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd(a)arndb.de>
Date: Sun, 29 Jul 2018 15:48:00 -0400
Subject: [PATCH] ext4: sysfs: print ext4_super_block fields as little-endian
While working on extended rand for last_error/first_error timestamps,
I noticed that the endianess is wrong; we access the little-endian
fields in struct ext4_super_block as native-endian when we print them.
This adds a special case in ext4_attr_show() and ext4_attr_store()
to byteswap the superblock fields if needed.
In older kernels, this code was part of super.c, it got moved to
sysfs.c in linux-4.4.
Cc: stable(a)vger.kernel.org
Fixes: 52c198c6820f ("ext4: add sysfs entry showing whether the fs contains errors")
Reviewed-by: Andreas Dilger <adilger(a)dilger.ca>
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index f34da0bb8f17..b970a200f20c 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -274,8 +274,12 @@ static ssize_t ext4_attr_show(struct kobject *kobj,
case attr_pointer_ui:
if (!ptr)
return 0;
- return snprintf(buf, PAGE_SIZE, "%u\n",
- *((unsigned int *) ptr));
+ if (a->attr_ptr == ptr_ext4_super_block_offset)
+ return snprintf(buf, PAGE_SIZE, "%u\n",
+ le32_to_cpup(ptr));
+ else
+ return snprintf(buf, PAGE_SIZE, "%u\n",
+ *((unsigned int *) ptr));
case attr_pointer_atomic:
if (!ptr)
return 0;
@@ -308,7 +312,10 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
ret = kstrtoul(skip_spaces(buf), 0, &t);
if (ret)
return ret;
- *((unsigned int *) ptr) = t;
+ if (a->attr_ptr == ptr_ext4_super_block_offset)
+ *((__le32 *) ptr) = cpu_to_le32(t);
+ else
+ *((unsigned int *) ptr) = t;
return len;
case attr_inode_readahead:
return inode_readahead_blks_store(sbi, buf, len);
The patch below does not apply to the 3.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 976d34e2dab10ece5ea8fe7090b7692913f89084 Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punit.agrawal(a)arm.com>
Date: Mon, 13 Aug 2018 11:43:51 +0100
Subject: [PATCH] KVM: arm/arm64: Skip updating PTE entry if no change
When there is contention on faulting in a particular page table entry
at stage 2, the break-before-make requirement of the architecture can
lead to additional refaulting due to TLB invalidation.
Avoid this by skipping a page table update if the new value of the PTE
matches the previous value.
Cc: stable(a)vger.kernel.org
Fixes: d5d8184d35c9 ("KVM: ARM: Memory virtualization setup")
Reviewed-by: Suzuki Poulose <suzuki.poulose(a)arm.com>
Acked-by: Christoffer Dall <christoffer.dall(a)arm.com>
Signed-off-by: Punit Agrawal <punit.agrawal(a)arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier(a)arm.com>
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 13dfe36501aa..91aaf73b00df 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1147,6 +1147,10 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
/* Create 2nd stage page table mapping - Level 3 */
old_pte = *pte;
if (pte_present(old_pte)) {
+ /* Skip page table update if there is no change */
+ if (pte_val(old_pte) == pte_val(*new_pte))
+ return 0;
+
kvm_set_pte(pte, __pte(0));
kvm_tlb_flush_vmid_ipa(kvm, addr);
} else {
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 976d34e2dab10ece5ea8fe7090b7692913f89084 Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punit.agrawal(a)arm.com>
Date: Mon, 13 Aug 2018 11:43:51 +0100
Subject: [PATCH] KVM: arm/arm64: Skip updating PTE entry if no change
When there is contention on faulting in a particular page table entry
at stage 2, the break-before-make requirement of the architecture can
lead to additional refaulting due to TLB invalidation.
Avoid this by skipping a page table update if the new value of the PTE
matches the previous value.
Cc: stable(a)vger.kernel.org
Fixes: d5d8184d35c9 ("KVM: ARM: Memory virtualization setup")
Reviewed-by: Suzuki Poulose <suzuki.poulose(a)arm.com>
Acked-by: Christoffer Dall <christoffer.dall(a)arm.com>
Signed-off-by: Punit Agrawal <punit.agrawal(a)arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier(a)arm.com>
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 13dfe36501aa..91aaf73b00df 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1147,6 +1147,10 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
/* Create 2nd stage page table mapping - Level 3 */
old_pte = *pte;
if (pte_present(old_pte)) {
+ /* Skip page table update if there is no change */
+ if (pte_val(old_pte) == pte_val(*new_pte))
+ return 0;
+
kvm_set_pte(pte, __pte(0));
kvm_tlb_flush_vmid_ipa(kvm, addr);
} else {
The patch below does not apply to the 3.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 86658b819cd0a9aa584cd84453ed268a6f013770 Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punit.agrawal(a)arm.com>
Date: Mon, 13 Aug 2018 11:43:50 +0100
Subject: [PATCH] KVM: arm/arm64: Skip updating PMD entry if no change
Contention on updating a PMD entry by a large number of vcpus can lead
to duplicate work when handling stage 2 page faults. As the page table
update follows the break-before-make requirement of the architecture,
it can lead to repeated refaults due to clearing the entry and
flushing the tlbs.
This problem is more likely when -
* there are large number of vcpus
* the mapping is large block mapping
such as when using PMD hugepages (512MB) with 64k pages.
Fix this by skipping the page table update if there is no change in
the entry being updated.
Cc: stable(a)vger.kernel.org
Fixes: ad361f093c1e ("KVM: ARM: Support hugetlbfs backed huge pages")
Reviewed-by: Suzuki Poulose <suzuki.poulose(a)arm.com>
Acked-by: Christoffer Dall <christoffer.dall(a)arm.com>
Signed-off-by: Punit Agrawal <punit.agrawal(a)arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier(a)arm.com>
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 97d27cd9c654..13dfe36501aa 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1044,19 +1044,35 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
pmd = stage2_get_pmd(kvm, cache, addr);
VM_BUG_ON(!pmd);
- /*
- * Mapping in huge pages should only happen through a fault. If a
- * page is merged into a transparent huge page, the individual
- * subpages of that huge page should be unmapped through MMU
- * notifiers before we get here.
- *
- * Merging of CompoundPages is not supported; they should become
- * splitting first, unmapped, merged, and mapped back in on-demand.
- */
- VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
-
old_pmd = *pmd;
if (pmd_present(old_pmd)) {
+ /*
+ * Multiple vcpus faulting on the same PMD entry, can
+ * lead to them sequentially updating the PMD with the
+ * same value. Following the break-before-make
+ * (pmd_clear() followed by tlb_flush()) process can
+ * hinder forward progress due to refaults generated
+ * on missing translations.
+ *
+ * Skip updating the page table if the entry is
+ * unchanged.
+ */
+ if (pmd_val(old_pmd) == pmd_val(*new_pmd))
+ return 0;
+
+ /*
+ * Mapping in huge pages should only happen through a
+ * fault. If a page is merged into a transparent huge
+ * page, the individual subpages of that huge page
+ * should be unmapped through MMU notifiers before we
+ * get here.
+ *
+ * Merging of CompoundPages is not supported; they
+ * should become splitting first, unmapped, merged,
+ * and mapped back in on-demand.
+ */
+ VM_BUG_ON(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd));
+
pmd_clear(pmd);
kvm_tlb_flush_vmid_ipa(kvm, addr);
} else {
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 86658b819cd0a9aa584cd84453ed268a6f013770 Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punit.agrawal(a)arm.com>
Date: Mon, 13 Aug 2018 11:43:50 +0100
Subject: [PATCH] KVM: arm/arm64: Skip updating PMD entry if no change
Contention on updating a PMD entry by a large number of vcpus can lead
to duplicate work when handling stage 2 page faults. As the page table
update follows the break-before-make requirement of the architecture,
it can lead to repeated refaults due to clearing the entry and
flushing the tlbs.
This problem is more likely when -
* there are large number of vcpus
* the mapping is large block mapping
such as when using PMD hugepages (512MB) with 64k pages.
Fix this by skipping the page table update if there is no change in
the entry being updated.
Cc: stable(a)vger.kernel.org
Fixes: ad361f093c1e ("KVM: ARM: Support hugetlbfs backed huge pages")
Reviewed-by: Suzuki Poulose <suzuki.poulose(a)arm.com>
Acked-by: Christoffer Dall <christoffer.dall(a)arm.com>
Signed-off-by: Punit Agrawal <punit.agrawal(a)arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier(a)arm.com>
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 97d27cd9c654..13dfe36501aa 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1044,19 +1044,35 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
pmd = stage2_get_pmd(kvm, cache, addr);
VM_BUG_ON(!pmd);
- /*
- * Mapping in huge pages should only happen through a fault. If a
- * page is merged into a transparent huge page, the individual
- * subpages of that huge page should be unmapped through MMU
- * notifiers before we get here.
- *
- * Merging of CompoundPages is not supported; they should become
- * splitting first, unmapped, merged, and mapped back in on-demand.
- */
- VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
-
old_pmd = *pmd;
if (pmd_present(old_pmd)) {
+ /*
+ * Multiple vcpus faulting on the same PMD entry, can
+ * lead to them sequentially updating the PMD with the
+ * same value. Following the break-before-make
+ * (pmd_clear() followed by tlb_flush()) process can
+ * hinder forward progress due to refaults generated
+ * on missing translations.
+ *
+ * Skip updating the page table if the entry is
+ * unchanged.
+ */
+ if (pmd_val(old_pmd) == pmd_val(*new_pmd))
+ return 0;
+
+ /*
+ * Mapping in huge pages should only happen through a
+ * fault. If a page is merged into a transparent huge
+ * page, the individual subpages of that huge page
+ * should be unmapped through MMU notifiers before we
+ * get here.
+ *
+ * Merging of CompoundPages is not supported; they
+ * should become splitting first, unmapped, merged,
+ * and mapped back in on-demand.
+ */
+ VM_BUG_ON(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd));
+
pmd_clear(pmd);
kvm_tlb_flush_vmid_ipa(kvm, addr);
} else {
The patch below does not apply to the 3.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 5ad356eabc47d26a92140a0c4b20eba471c10de3 Mon Sep 17 00:00:00 2001
From: Greg Hackmann <ghackmann(a)android.com>
Date: Wed, 15 Aug 2018 12:51:21 -0700
Subject: [PATCH] arm64: mm: check for upper PAGE_SHIFT bits in pfn_valid()
ARM64's pfn_valid() shifts away the upper PAGE_SHIFT bits of the input
before seeing if the PFN is valid. This leads to false positives when
some of the upper bits are set, but the lower bits match a valid PFN.
For example, the following userspace code looks up a bogus entry in
/proc/kpageflags:
int pagemap = open("/proc/self/pagemap", O_RDONLY);
int pageflags = open("/proc/kpageflags", O_RDONLY);
uint64_t pfn, val;
lseek64(pagemap, [...], SEEK_SET);
read(pagemap, &pfn, sizeof(pfn));
if (pfn & (1UL << 63)) { /* valid PFN */
pfn &= ((1UL << 55) - 1); /* clear flag bits */
pfn |= (1UL << 55);
lseek64(pageflags, pfn * sizeof(uint64_t), SEEK_SET);
read(pageflags, &val, sizeof(val));
}
On ARM64 this causes the userspace process to crash with SIGSEGV rather
than reading (1 << KPF_NOPAGE). kpageflags_read() treats the offset as
valid, and stable_page_flags() will try to access an address between the
user and kernel address ranges.
Fixes: c1cc1552616d ("arm64: MMU initialisation")
Cc: stable(a)vger.kernel.org
Signed-off-by: Greg Hackmann <ghackmann(a)google.com>
Signed-off-by: Will Deacon <will.deacon(a)arm.com>
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 325cfb3b858a..811f9f8b3bb0 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -287,7 +287,11 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
#ifdef CONFIG_HAVE_ARCH_PFN_VALID
int pfn_valid(unsigned long pfn)
{
- return memblock_is_map_memory(pfn << PAGE_SHIFT);
+ phys_addr_t addr = pfn << PAGE_SHIFT;
+
+ if ((addr >> PAGE_SHIFT) != pfn)
+ return 0;
+ return memblock_is_map_memory(addr);
}
EXPORT_SYMBOL(pfn_valid);
#endif
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 4c4a39dd5fe2d13e2d2fa5fceb8ef95d19fc389a Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose(a)arm.com>
Date: Wed, 4 Jul 2018 23:07:45 +0100
Subject: [PATCH] arm64: Fix mismatched cache line size detection
If there is a mismatch in the I/D min line size, we must
always use the system wide safe value both in applications
and in the kernel, while performing cache operations. However,
we have been checking more bits than just the min line sizes,
which triggers false negatives. We may need to trap the user
accesses in such cases, but not necessarily patch the kernel.
This patch fixes the check to do the right thing as advertised.
A new capability will be added to check mismatches in other
fields and ensure we trap the CTR accesses.
Fixes: be68a8aaf925 ("arm64: cpufeature: Fix CTR_EL0 field definitions")
Cc: <stable(a)vger.kernel.org>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Reported-by: Will Deacon <will.deacon(a)arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose(a)arm.com>
Signed-off-by: Will Deacon <will.deacon(a)arm.com>
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index 5df5cfe1c143..5ee5bca8c24b 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -21,12 +21,16 @@
#define CTR_L1IP_SHIFT 14
#define CTR_L1IP_MASK 3
#define CTR_DMINLINE_SHIFT 16
+#define CTR_IMINLINE_SHIFT 0
#define CTR_ERG_SHIFT 20
#define CTR_CWG_SHIFT 24
#define CTR_CWG_MASK 15
#define CTR_IDC_SHIFT 28
#define CTR_DIC_SHIFT 29
+#define CTR_CACHE_MINLINE_MASK \
+ (0xf << CTR_DMINLINE_SHIFT | 0xf << CTR_IMINLINE_SHIFT)
+
#define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK)
#define ICACHE_POLICY_VPIPT 0
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 1d2b6d768efe..5d1fa928ea4b 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -68,9 +68,11 @@ static bool
has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry,
int scope)
{
+ u64 mask = CTR_CACHE_MINLINE_MASK;
+
WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
- return (read_cpuid_cachetype() & arm64_ftr_reg_ctrel0.strict_mask) !=
- (arm64_ftr_reg_ctrel0.sys_val & arm64_ftr_reg_ctrel0.strict_mask);
+ return (read_cpuid_cachetype() & mask) !=
+ (arm64_ftr_reg_ctrel0.sys_val & mask);
}
static void
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index f24892a40d2c..25d5cef00333 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -214,7 +214,7 @@ static const struct arm64_ftr_bits ftr_ctr[] = {
* If we have differing I-cache policies, report it as the weakest - VIPT.
*/
ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_VIPT), /* L1Ip */
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IMINLINE_SHIFT, 4, 0),
ARM64_FTR_END,
};
Subject of the patch: iommu/arm-smmu: Error out only if not enough
context interrupts
Commit ID: 42b88ec6530fd76f1ae06de7f09830bcbca5bbd6
Why: ARM SMMU does not initialize for Broadcom Stingray SoC if
bootloader reserves few contexts. Kernel should not error out if there
are enough context interrupts.
What kernel versions: 4.14.y and 4.18.y (applies cleanly).
Before patching:
Kernel does not boot using eMMC on Broadcom Stingray SoC with latest
bootloader emitting this error
"arm-smmu 64000000.mmu: found only 64 context interrupt(s) but 63 required"
After patching: Kernel cleanly boots on the SoC.
Thanks,
JB
Hi
On 07/31/2018 05:52 PM, Frederic Weisbecker wrote:
> Before updating the full nohz tick or the idle time on IRQ exit, we
> check first if we are not in a nesting interrupt, whether the inner
> interrupt is a hard or a soft IRQ.
>
> There is a historical reason for that: the dyntick idle mode used to
> reprogram the tick on IRQ exit, after softirq processing, and there was
> no point in doing that job in the outer nesting interrupt because the
> tick update will be performed through the end of the inner interrupt
> eventually, with even potential new timer updates.
>
> One corner case could show up though: if an idle tick interrupts a softirq
> executing inline in the idle loop (through a call to local_bh_enable())
> after we entered in dynticks mode, the IRQ won't reprogram the tick
> because it assumes the softirq executes on an inner IRQ-tail. As a
> result we might put the CPU in sleep mode with the tick completely
> stopped whereas a timer can still be enqueued. Indeed there is no tick
> reprogramming in local_bh_enable(). We probably asssumed there was no bh
> disabled section in idle, although there didn't seem to be debug code
> ensuring that.
>
> Nowadays the nesting interrupt optimization still stands but only concern
> full dynticks. The tick is stopped on IRQ exit in full dynticks mode
> and we want to wait for the end of the inner IRQ to reprogramm the tick.
> But in_interrupt() doesn't make a difference between softirqs executing
> on IRQ tail and those executing inline. What was to be considered a
> corner case in dynticks-idle mode now becomes a serious opportunity for
> a bug in full dynticks mode: if a tick interrupts a task executing
> softirq inline, the tick reprogramming will be ignored and we may exit
> to userspace after local_bh_enable() with an enqueued timer that will
> never fire.
>
> To fix this, simply keep reprogramming the tick if we are in a hardirq
> interrupting softirq. We can still figure out a way later to restore
> this optimization while excluding inline softirq processing.
>
> Reported-by: Anna-Maria Gleixner <anna-maria(a)linutronix.de>
> Signed-off-by: Frederic Weisbecker <frederic(a)kernel.org>
> Cc: Thomas Gleixner <tglx(a)linutronix.de>
> Cc: Ingo Molnar <mingo(a)kernel.org>
> Tested-by: Anna-Maria Gleixner <anna-maria(a)linutronix.de>
> ---
> kernel/softirq.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/kernel/softirq.c b/kernel/softirq.c
> index 900dcfe..0980a81 100644
> --- a/kernel/softirq.c
> +++ b/kernel/softirq.c
> @@ -386,7 +386,7 @@ static inline void tick_irq_exit(void)
>
> /* Make sure that timer wheel updates are propagated */
> if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
> - if (!in_interrupt())
> + if (!in_irq())
> tick_nohz_irq_exit();
> }
> #endif
>
This patch was back ported to the Stable linux-4.14.y and It causes regression -
flood of "NOHZ: local_softirq_pending" messages on all TI boards during boot (NFS boot):
[ 4.179796] NOHZ: local_softirq_pending 2c2 in sirq 256
[ 4.185051] NOHZ: local_softirq_pending 2c2 in sirq 256
the same is not reproducible with LKML - seems due to changes in tick-sched.c
__tick_nohz_idle_enter()/tick_nohz_irq_exit().
I've generated backtrace from can_stop_idle_tick() (see below) and seems this
patch makes tick_nohz_irq_exit() call unconditional in case of nested interrupt:
gic_handle_irq
|- irq_exit
|- preempt_count_sub(HARDIRQ_OFFSET); <-- [1]
|-__do_softirq
<irqs enabled>
|- gic_handle_irq()
|- irq_exit()
|- tick_irq_exit()
if (!in_irq()) <-- My understanding is that this condition will be always true due to [1]
tick_nohz_irq_exit();
|-__tick_nohz_idle_enter()
|- can_stop_idle_tick()
Sry, not sure if my conclusion is right and how can it be fixed.
[ 3.842320] NOHZ: local_softirq_pending 40 in sirq 256
[ 3.847485] ------------[ cut here ]------------
[ 3.852133] WARNING: CPU: 0 PID: 0 at kernel/time/tick-sched.c:915 __tick_nohz_idle_enter+0x4b8/0x568
[ 3.861393] Modules linked in:
[ 3.864469] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.14.66-01768-gc26f664-dirty #311
[ 3.872506] Hardware name: Generic DRA74X (Flattened Device Tree)
[ 3.878623] Backtrace:
[ 3.881091] [<c010c050>] (dump_backtrace) from [<c010c320>] (show_stack+0x18/0x1c)
[ 3.888696] r7:00000009 r6:600f0193 r5:00000000 r4:c0c5fca4
[ 3.894386] [<c010c308>] (show_stack) from [<c07ad028>] (dump_stack+0x8c/0xa0)
[ 3.901645] [<c07acf9c>] (dump_stack) from [<c012e558>] (__warn+0xec/0x104)
[ 3.908638] r7:00000009 r6:c0996d08 r5:00000000 r4:00000000
[ 3.914329] [<c012e46c>] (__warn) from [<c012e628>] (warn_slowpath_null+0x28/0x30)
[ 3.921933] r9:00000000 r8:e4e1f7de r7:c0c8c1d8 r6:c0c65180 r5:00000000 r4:eed408e8
[ 3.929715] [<c012e600>] (warn_slowpath_null) from [<c01a9780>] (__tick_nohz_idle_enter+0x4b8/0x568)
[ 3.938890] [<c01a92c8>] (__tick_nohz_idle_enter) from [<c01a9c74>] (tick_nohz_irq_exit+0x2c/0x30)
[ 3.947890] r10:c0c01f50 r9:c0c00000 r8:ee008000 r7:00000000 r6:c0c01ee0 r5:00000048
[ 3.955752] r4:c0b62afc
[ 3.958301] [<c01a9c48>] (tick_nohz_irq_exit) from [<c0133748>] (irq_exit+0xf4/0x144)
[ 3.966173] [<c0133654>] (irq_exit) from [<c0181e6c>] (__handle_domain_irq+0x68/0xbc)
[ 3.974043] [<c0181e04>] (__handle_domain_irq) from [<c0101508>] (gic_handle_irq+0x44/0x80)
[ 3.982432] r9:c0c00000 r8:fa213000 r7:fa212000 r6:c0c01dd0 r5:fa21200c r4:c0c03ff0
[ 3.990211] [<c01014c4>] (gic_handle_irq) from [<c010cf4c>] (__irq_svc+0x6c/0xa8)
[ 3.997726] Exception stack(0xc0c01dd0 to 0xc0c01e18)
[ 4.002800] 1dc0: 00000000 c0c65180 00000000 00000000
[ 4.011017] 1de0: 00000280 00000013 c0c00000 00000000 ee008000 c0c00000 c0c01f50 c0c01e7c
[ 4.019232] 1e00: c0c01e80 c0c01e20 c0133730 c01015dc 600f0113 ffffffff
[ 4.025877] r9:c0c00000 r8:ee008000 r7:c0c01e04 r6:ffffffff r5:600f0113 r4:c01015dc
[ 4.033659] [<c0101548>] (__do_softirq) from [<c0133730>] (irq_exit+0xdc/0x144)
[ 4.041002] r10:c0c01f50 r9:c0c00000 r8:ee008000 r7:00000000 r6:00000000 r5:00000013
[ 4.048863] r4:c0b62afc
[ 4.051414] [<c0133654>] (irq_exit) from [<c0181e6c>] (__handle_domain_irq+0x68/0xbc)
[ 4.059280] [<c0181e04>] (__handle_domain_irq) from [<c0101508>] (gic_handle_irq+0x44/0x80)
[ 4.067670] r9:c0c00000 r8:fa213000 r7:fa212000 r6:c0c01ee0 r5:fa21200c r4:c0c03ff0
[ 4.075448] [<c01014c4>] (gic_handle_irq) from [<c010cf4c>] (__irq_svc+0x6c/0xa8)
[ 4.082963] Exception stack(0xc0c01ee0 to 0xc0c01f28)
[ 4.088041] 1ee0: 00000001 00000000 fe600000 00000000 c0c00000 c0c03cc8 c0c03c68 c0b623b8
[ 4.096258] 1f00: 00000000 00000000 c0c01f50 c0c01f3c c0c01f1c c0c01f30 c0120f14 c0108ae4
[ 4.104469] 1f20: 600f0013 ffffffff
[ 4.107975] r9:c0c00000 r8:00000000 r7:c0c01f14 r6:ffffffff r5:600f0013 r4:c0108ae4
[ 4.115760] [<c0108abc>] (arch_cpu_idle) from [<c07c6a14>] (default_idle_call+0x28/0x34)
[ 4.123894] [<c07c69ec>] (default_idle_call) from [<c016e4a4>] (do_idle+0x180/0x214)
[ 4.131676] [<c016e324>] (do_idle) from [<c016e7fc>] (cpu_startup_entry+0x20/0x24)
[ 4.139283] r10:effff7c0 r9:c0b48a30 r8:c0c64000 r7:c0c03c40 r6:ffffffff r5:00000002
[ 4.147146] r4:000000be
[ 4.149698] [<c016e7dc>] (cpu_startup_entry) from [<c07c12e4>] (rest_init+0xd8/0xdc)
[ 4.157483] [<c07c120c>] (rest_init) from [<c0b00d8c>] (start_kernel+0x3cc/0x3d8)
[ 4.164997] r5:c0c64000 r4:c0c6404c
[ 4.168592] [<c0b009c0>] (start_kernel) from [<8000807c>] (0x8000807c)
[ 4.175148] ---[ end trace 9c10a64bf81ad3fe ]---
--
regards,
-grygorii
In NMI context, we might be in the middle of context switching or in
the middle of switch_mm_irqs_off(). In either case, CR3 might not
match current->mm, which could cause copy_from_user_nmi() and
friends to read the wrong memory.
Fix it by adding a new nmi_uaccess_okay() helper and checking it in
copy_from_user_nmi() and in __copy_from_user_nmi()'s callers.
Cc: stable(a)vger.kernel.org
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Nadav Amit <nadav.amit(a)gmail.com>
Signed-off-by: Andy Lutomirski <luto(a)kernel.org>
---
Nadav, this is intended for your series. Want to add it right
before the use_temporary_mm() stuff?
Changes from v1:
- Improved comments
- Add debugging
- Fix bugs
arch/x86/events/core.c | 2 +-
arch/x86/include/asm/tlbflush.h | 44 +++++++++++++++++++++++++++++++++
arch/x86/lib/usercopy.c | 5 ++++
arch/x86/mm/tlb.c | 7 ++++++
4 files changed, 57 insertions(+), 1 deletion(-)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 5f4829f10129..dfb2f7c0d019 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2465,7 +2465,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
perf_callchain_store(entry, regs->ip);
- if (!current->mm)
+ if (!nmi_uaccess_okay())
return;
if (perf_callchain_user32(regs, entry))
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 29c9da6c62fc..183d36a98b04 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -175,8 +175,16 @@ struct tlb_state {
* are on. This means that it may not match current->active_mm,
* which will contain the previous user mm when we're in lazy TLB
* mode even if we've already switched back to swapper_pg_dir.
+ *
+ * During switch_mm_irqs_off(), loaded_mm will be set to
+ * LOADED_MM_SWITCHING during the brief interrupts-off window
+ * when CR3 and loaded_mm would otherwise be inconsistent. This
+ * is for nmi_uaccess_okay()'s benefit.
*/
struct mm_struct *loaded_mm;
+
+#define LOADED_MM_SWITCHING ((struct mm_struct *)1)
+
u16 loaded_mm_asid;
u16 next_asid;
/* last user mm's ctx id */
@@ -246,6 +254,42 @@ struct tlb_state {
};
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
+/*
+ * Blindly accessing user memory from NMI context can be dangerous
+ * if we're in the middle of switching the current user task or
+ * switching the loaded mm. It can also be dangerous if we
+ * interrupted some kernel code that was temporarily using a
+ * different mm.
+ */
+static inline bool nmi_uaccess_okay(void)
+{
+ struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+ struct mm_struct *current_mm = current->mm;
+
+#ifdef CONFIG_DEBUG_VM
+ WARN_ON_ONCE(!loaded_mm);
+#endif
+
+ /*
+ * The condition we want to check is
+ * current_mm->pgd == __va(read_cr3_pa()). This may be slow, though,
+ * if we're running in a VM with shadow paging, and nmi_uaccess_okay()
+ * is supposed to be reasonably fast.
+ *
+ * Instead, we check the almost equivalent but somewhat conservative
+ * condition below, and we rely on the fact that switch_mm_irqs_off()
+ * sets loaded_mm to LOADED_MM_SWITCHING before writing to CR3.
+ */
+ if (loaded_mm != current_mm)
+ return false;
+
+#ifdef CONFIG_DEBUG_VM
+ WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa()));
+#endif
+
+ return true;
+}
+
/* Initialize cr4 shadow for this CPU. */
static inline void cr4_init_shadow(void)
{
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index c8c6ad0d58b8..3f435d7fca5e 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -7,6 +7,8 @@
#include <linux/uaccess.h>
#include <linux/export.h>
+#include <asm/tlbflush.h>
+
/*
* We rely on the nested NMI work to allow atomic faults from the NMI path; the
* nested NMI paths are careful to preserve CR2.
@@ -19,6 +21,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
if (__range_not_ok(from, n, TASK_SIZE))
return n;
+ if (!nmi_uaccess_okay())
+ return n;
+
/*
* Even though this function is typically called from NMI/IRQ context
* disable pagefaults so that its behaviour is consistent even when
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index e721cf2d4290..707d2b2a333f 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -304,6 +304,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
+ /* Let nmi_uaccess_okay() know that we're changing CR3. */
+ this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
+ barrier();
+
if (need_flush) {
this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
@@ -334,6 +338,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
if (next != &init_mm)
this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
+ /* Make sure we write CR3 before loaded_mm. */
+ barrier();
+
this_cpu_write(cpu_tlbstate.loaded_mm, next);
this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
}
--
2.17.1
From: Ralph Campbell <rcampbell(a)nvidia.com>
In hmm_mirror_unregister(), mm->hmm is set to NULL and then
mmu_notifier_unregister_no_release() is called. That creates a small
window where mmu_notifier can call mmu_notifier_ops with mm->hmm equal
to NULL. Fix this by first unregistering mmu notifier callbacks and
then setting mm->hmm to NULL.
Similarly in hmm_register(), set mm->hmm before registering mmu_notifier
callbacks so callback functions always see mm->hmm set.
Signed-off-by: Ralph Campbell <rcampbell(a)nvidia.com>
Reviewed-by: John Hubbard <jhubbard(a)nvidia.com>
Reviewed-by: Jérôme Glisse <jglisse(a)redhat.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: stable(a)vger.kernel.org
---
mm/hmm.c | 36 +++++++++++++++++++++---------------
1 file changed, 21 insertions(+), 15 deletions(-)
diff --git a/mm/hmm.c b/mm/hmm.c
index 9a068a1da487..a16678d08127 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -91,16 +91,6 @@ static struct hmm *hmm_register(struct mm_struct *mm)
spin_lock_init(&hmm->lock);
hmm->mm = mm;
- /*
- * We should only get here if hold the mmap_sem in write mode ie on
- * registration of first mirror through hmm_mirror_register()
- */
- hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops;
- if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) {
- kfree(hmm);
- return NULL;
- }
-
spin_lock(&mm->page_table_lock);
if (!mm->hmm)
mm->hmm = hmm;
@@ -108,12 +98,27 @@ static struct hmm *hmm_register(struct mm_struct *mm)
cleanup = true;
spin_unlock(&mm->page_table_lock);
- if (cleanup) {
- mmu_notifier_unregister(&hmm->mmu_notifier, mm);
- kfree(hmm);
- }
+ if (cleanup)
+ goto error;
+
+ /*
+ * We should only get here if hold the mmap_sem in write mode ie on
+ * registration of first mirror through hmm_mirror_register()
+ */
+ hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops;
+ if (__mmu_notifier_register(&hmm->mmu_notifier, mm))
+ goto error_mm;
return mm->hmm;
+
+error_mm:
+ spin_lock(&mm->page_table_lock);
+ if (mm->hmm == hmm)
+ mm->hmm = NULL;
+ spin_unlock(&mm->page_table_lock);
+error:
+ kfree(hmm);
+ return NULL;
}
void hmm_mm_destroy(struct mm_struct *mm)
@@ -278,12 +283,13 @@ void hmm_mirror_unregister(struct hmm_mirror *mirror)
if (!should_unregister || mm == NULL)
return;
+ mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm);
+
spin_lock(&mm->page_table_lock);
if (mm->hmm == hmm)
mm->hmm = NULL;
spin_unlock(&mm->page_table_lock);
- mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm);
kfree(hmm);
}
EXPORT_SYMBOL(hmm_mirror_unregister);
--
2.17.1
From: Jan-Marek Glogowski <glogow(a)fbihome.de>
This re-applies the workaround for "some DP sinks, [which] are a
little nuts" from commit 1a36147bb939 ("drm/i915: Perform link
quality check unconditionally during long pulse").
It makes the secondary AOC E2460P monitor connected via DP to an
acer Veriton N4640G usable again.
This hunk was dropped in commit c85d200e8321 ("drm/i915: Move SST
DP link retraining into the ->post_hotplug() hook")
Fixes: c85d200e8321 ("drm/i915: Move SST DP link retraining into the ->post_hotplug() hook")
[Cleaned up commit message, added stable cc]
Signed-off-by: Lyude Paul <lyude(a)redhat.com>
Signed-off-by: Jan-Marek Glogowski <glogow(a)fbihome.de>
Cc: stable(a)vger.kernel.org
---
Resending this to update patchwork; will push in a little bit
drivers/gpu/drm/i915/intel_dp.c | 33 +++++++++++++++++++--------------
1 file changed, 19 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index b3f6f04c3c7d..db8515171270 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -4333,18 +4333,6 @@ intel_dp_needs_link_retrain(struct intel_dp *intel_dp)
return !drm_dp_channel_eq_ok(link_status, intel_dp->lane_count);
}
-/*
- * If display is now connected check links status,
- * there has been known issues of link loss triggering
- * long pulse.
- *
- * Some sinks (eg. ASUS PB287Q) seem to perform some
- * weird HPD ping pong during modesets. So we can apparently
- * end up with HPD going low during a modeset, and then
- * going back up soon after. And once that happens we must
- * retrain the link to get a picture. That's in case no
- * userspace component reacted to intermittent HPD dip.
- */
int intel_dp_retrain_link(struct intel_encoder *encoder,
struct drm_modeset_acquire_ctx *ctx)
{
@@ -5031,7 +5019,8 @@ intel_dp_unset_edid(struct intel_dp *intel_dp)
}
static int
-intel_dp_long_pulse(struct intel_connector *connector)
+intel_dp_long_pulse(struct intel_connector *connector,
+ struct drm_modeset_acquire_ctx *ctx)
{
struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
struct intel_dp *intel_dp = intel_attached_dp(&connector->base);
@@ -5090,6 +5079,22 @@ intel_dp_long_pulse(struct intel_connector *connector)
*/
status = connector_status_disconnected;
goto out;
+ } else {
+ /*
+ * If display is now connected check links status,
+ * there has been known issues of link loss triggering
+ * long pulse.
+ *
+ * Some sinks (eg. ASUS PB287Q) seem to perform some
+ * weird HPD ping pong during modesets. So we can apparently
+ * end up with HPD going low during a modeset, and then
+ * going back up soon after. And once that happens we must
+ * retrain the link to get a picture. That's in case no
+ * userspace component reacted to intermittent HPD dip.
+ */
+ struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base;
+
+ intel_dp_retrain_link(encoder, ctx);
}
/*
@@ -5151,7 +5156,7 @@ intel_dp_detect(struct drm_connector *connector,
return ret;
}
- status = intel_dp_long_pulse(intel_dp->attached_connector);
+ status = intel_dp_long_pulse(intel_dp->attached_connector, ctx);
}
intel_dp->detect_done = false;
--
2.17.1
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 0d836392cadd5535f4184d46d901a82eb276ed62 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Fri, 20 Jul 2018 10:59:06 +0100
Subject: [PATCH] Btrfs: fix mount failure after fsync due to hard link
recreation
If we end up with logging an inode reference item which has the same name
but different index from the one we have persisted, we end up failing when
replaying the log with an errno value of -EEXIST. The error comes from
btrfs_add_link(), which is called from add_inode_ref(), when we are
replaying an inode reference item.
Example scenario where this happens:
$ mkfs.btrfs -f /dev/sdb
$ mount /dev/sdb /mnt
$ touch /mnt/foo
$ ln /mnt/foo /mnt/bar
$ sync
# Rename the first hard link (foo) to a new name and rename the second
# hard link (bar) to the old name of the first hard link (foo).
$ mv /mnt/foo /mnt/qwerty
$ mv /mnt/bar /mnt/foo
# Create a new file, in the same parent directory, with the old name of
# the second hard link (bar) and fsync this new file.
# We do this instead of calling fsync on foo/qwerty because if we did
# that the fsync resulted in a full transaction commit, not triggering
# the problem.
$ touch /mnt/bar
$ xfs_io -c "fsync" /mnt/bar
<power fail>
$ mount /dev/sdb /mnt
mount: mount /dev/sdb on /mnt failed: File exists
So fix this by checking if a conflicting inode reference exists (same
name, same parent but different index), removing it (and the associated
dir index entries from the parent inode) if it exists, before attempting
to add the new reference.
A test case for fstests follows soon.
CC: stable(a)vger.kernel.org # 4.4+
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 10f6a4223897..033aeebbe9de 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1290,6 +1290,46 @@ static int unlink_old_inode_refs(struct btrfs_trans_handle *trans,
return ret;
}
+static int btrfs_inode_ref_exists(struct inode *inode, struct inode *dir,
+ const u8 ref_type, const char *name,
+ const int namelen)
+{
+ struct btrfs_key key;
+ struct btrfs_path *path;
+ const u64 parent_id = btrfs_ino(BTRFS_I(dir));
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = btrfs_ino(BTRFS_I(inode));
+ key.type = ref_type;
+ if (key.type == BTRFS_INODE_REF_KEY)
+ key.offset = parent_id;
+ else
+ key.offset = btrfs_extref_hash(parent_id, name, namelen);
+
+ ret = btrfs_search_slot(NULL, BTRFS_I(inode)->root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = 0;
+ goto out;
+ }
+ if (key.type == BTRFS_INODE_EXTREF_KEY)
+ ret = btrfs_find_name_in_ext_backref(path->nodes[0],
+ path->slots[0], parent_id,
+ name, namelen, NULL);
+ else
+ ret = btrfs_find_name_in_backref(path->nodes[0], path->slots[0],
+ name, namelen, NULL);
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
/*
* replay one inode back reference item found in the log tree.
* eb, slot and key refer to the buffer and key found in the log tree.
@@ -1399,6 +1439,32 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
}
}
+ /*
+ * If a reference item already exists for this inode
+ * with the same parent and name, but different index,
+ * drop it and the corresponding directory index entries
+ * from the parent before adding the new reference item
+ * and dir index entries, otherwise we would fail with
+ * -EEXIST returned from btrfs_add_link() below.
+ */
+ ret = btrfs_inode_ref_exists(inode, dir, key->type,
+ name, namelen);
+ if (ret > 0) {
+ ret = btrfs_unlink_inode(trans, root,
+ BTRFS_I(dir),
+ BTRFS_I(inode),
+ name, namelen);
+ /*
+ * If we dropped the link count to 0, bump it so
+ * that later the iput() on the inode will not
+ * free it. We will fixup the link count later.
+ */
+ if (!ret && inode->i_nlink == 0)
+ inc_nlink(inode);
+ }
+ if (ret < 0)
+ goto out;
+
/* insert our name */
ret = btrfs_add_link(trans, BTRFS_I(dir),
BTRFS_I(inode),
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 0d836392cadd5535f4184d46d901a82eb276ed62 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Fri, 20 Jul 2018 10:59:06 +0100
Subject: [PATCH] Btrfs: fix mount failure after fsync due to hard link
recreation
If we end up with logging an inode reference item which has the same name
but different index from the one we have persisted, we end up failing when
replaying the log with an errno value of -EEXIST. The error comes from
btrfs_add_link(), which is called from add_inode_ref(), when we are
replaying an inode reference item.
Example scenario where this happens:
$ mkfs.btrfs -f /dev/sdb
$ mount /dev/sdb /mnt
$ touch /mnt/foo
$ ln /mnt/foo /mnt/bar
$ sync
# Rename the first hard link (foo) to a new name and rename the second
# hard link (bar) to the old name of the first hard link (foo).
$ mv /mnt/foo /mnt/qwerty
$ mv /mnt/bar /mnt/foo
# Create a new file, in the same parent directory, with the old name of
# the second hard link (bar) and fsync this new file.
# We do this instead of calling fsync on foo/qwerty because if we did
# that the fsync resulted in a full transaction commit, not triggering
# the problem.
$ touch /mnt/bar
$ xfs_io -c "fsync" /mnt/bar
<power fail>
$ mount /dev/sdb /mnt
mount: mount /dev/sdb on /mnt failed: File exists
So fix this by checking if a conflicting inode reference exists (same
name, same parent but different index), removing it (and the associated
dir index entries from the parent inode) if it exists, before attempting
to add the new reference.
A test case for fstests follows soon.
CC: stable(a)vger.kernel.org # 4.4+
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 10f6a4223897..033aeebbe9de 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1290,6 +1290,46 @@ static int unlink_old_inode_refs(struct btrfs_trans_handle *trans,
return ret;
}
+static int btrfs_inode_ref_exists(struct inode *inode, struct inode *dir,
+ const u8 ref_type, const char *name,
+ const int namelen)
+{
+ struct btrfs_key key;
+ struct btrfs_path *path;
+ const u64 parent_id = btrfs_ino(BTRFS_I(dir));
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = btrfs_ino(BTRFS_I(inode));
+ key.type = ref_type;
+ if (key.type == BTRFS_INODE_REF_KEY)
+ key.offset = parent_id;
+ else
+ key.offset = btrfs_extref_hash(parent_id, name, namelen);
+
+ ret = btrfs_search_slot(NULL, BTRFS_I(inode)->root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = 0;
+ goto out;
+ }
+ if (key.type == BTRFS_INODE_EXTREF_KEY)
+ ret = btrfs_find_name_in_ext_backref(path->nodes[0],
+ path->slots[0], parent_id,
+ name, namelen, NULL);
+ else
+ ret = btrfs_find_name_in_backref(path->nodes[0], path->slots[0],
+ name, namelen, NULL);
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
/*
* replay one inode back reference item found in the log tree.
* eb, slot and key refer to the buffer and key found in the log tree.
@@ -1399,6 +1439,32 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
}
}
+ /*
+ * If a reference item already exists for this inode
+ * with the same parent and name, but different index,
+ * drop it and the corresponding directory index entries
+ * from the parent before adding the new reference item
+ * and dir index entries, otherwise we would fail with
+ * -EEXIST returned from btrfs_add_link() below.
+ */
+ ret = btrfs_inode_ref_exists(inode, dir, key->type,
+ name, namelen);
+ if (ret > 0) {
+ ret = btrfs_unlink_inode(trans, root,
+ BTRFS_I(dir),
+ BTRFS_I(inode),
+ name, namelen);
+ /*
+ * If we dropped the link count to 0, bump it so
+ * that later the iput() on the inode will not
+ * free it. We will fixup the link count later.
+ */
+ if (!ret && inode->i_nlink == 0)
+ inc_nlink(inode);
+ }
+ if (ret < 0)
+ goto out;
+
/* insert our name */
ret = btrfs_add_link(trans, BTRFS_I(dir),
BTRFS_I(inode),
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 0d836392cadd5535f4184d46d901a82eb276ed62 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Fri, 20 Jul 2018 10:59:06 +0100
Subject: [PATCH] Btrfs: fix mount failure after fsync due to hard link
recreation
If we end up with logging an inode reference item which has the same name
but different index from the one we have persisted, we end up failing when
replaying the log with an errno value of -EEXIST. The error comes from
btrfs_add_link(), which is called from add_inode_ref(), when we are
replaying an inode reference item.
Example scenario where this happens:
$ mkfs.btrfs -f /dev/sdb
$ mount /dev/sdb /mnt
$ touch /mnt/foo
$ ln /mnt/foo /mnt/bar
$ sync
# Rename the first hard link (foo) to a new name and rename the second
# hard link (bar) to the old name of the first hard link (foo).
$ mv /mnt/foo /mnt/qwerty
$ mv /mnt/bar /mnt/foo
# Create a new file, in the same parent directory, with the old name of
# the second hard link (bar) and fsync this new file.
# We do this instead of calling fsync on foo/qwerty because if we did
# that the fsync resulted in a full transaction commit, not triggering
# the problem.
$ touch /mnt/bar
$ xfs_io -c "fsync" /mnt/bar
<power fail>
$ mount /dev/sdb /mnt
mount: mount /dev/sdb on /mnt failed: File exists
So fix this by checking if a conflicting inode reference exists (same
name, same parent but different index), removing it (and the associated
dir index entries from the parent inode) if it exists, before attempting
to add the new reference.
A test case for fstests follows soon.
CC: stable(a)vger.kernel.org # 4.4+
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 10f6a4223897..033aeebbe9de 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1290,6 +1290,46 @@ again:
return ret;
}
+static int btrfs_inode_ref_exists(struct inode *inode, struct inode *dir,
+ const u8 ref_type, const char *name,
+ const int namelen)
+{
+ struct btrfs_key key;
+ struct btrfs_path *path;
+ const u64 parent_id = btrfs_ino(BTRFS_I(dir));
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = btrfs_ino(BTRFS_I(inode));
+ key.type = ref_type;
+ if (key.type == BTRFS_INODE_REF_KEY)
+ key.offset = parent_id;
+ else
+ key.offset = btrfs_extref_hash(parent_id, name, namelen);
+
+ ret = btrfs_search_slot(NULL, BTRFS_I(inode)->root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = 0;
+ goto out;
+ }
+ if (key.type == BTRFS_INODE_EXTREF_KEY)
+ ret = btrfs_find_name_in_ext_backref(path->nodes[0],
+ path->slots[0], parent_id,
+ name, namelen, NULL);
+ else
+ ret = btrfs_find_name_in_backref(path->nodes[0], path->slots[0],
+ name, namelen, NULL);
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
/*
* replay one inode back reference item found in the log tree.
* eb, slot and key refer to the buffer and key found in the log tree.
@@ -1399,6 +1439,32 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
}
}
+ /*
+ * If a reference item already exists for this inode
+ * with the same parent and name, but different index,
+ * drop it and the corresponding directory index entries
+ * from the parent before adding the new reference item
+ * and dir index entries, otherwise we would fail with
+ * -EEXIST returned from btrfs_add_link() below.
+ */
+ ret = btrfs_inode_ref_exists(inode, dir, key->type,
+ name, namelen);
+ if (ret > 0) {
+ ret = btrfs_unlink_inode(trans, root,
+ BTRFS_I(dir),
+ BTRFS_I(inode),
+ name, namelen);
+ /*
+ * If we dropped the link count to 0, bump it so
+ * that later the iput() on the inode will not
+ * free it. We will fixup the link count later.
+ */
+ if (!ret && inode->i_nlink == 0)
+ inc_nlink(inode);
+ }
+ if (ret < 0)
+ goto out;
+
/* insert our name */
ret = btrfs_add_link(trans, BTRFS_I(dir),
BTRFS_I(inode),
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 46b2f4590aab71d31088a265c86026b1e96c9de4 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Tue, 24 Jul 2018 11:54:04 +0100
Subject: [PATCH] Btrfs: fix send failure when root has deleted files still
open
The more common use case of send involves creating a RO snapshot and then
use it for a send operation. In this case it's not possible to have inodes
in the snapshot that have a link count of zero (inode with an orphan item)
since during snapshot creation we do the orphan cleanup. However, other
less common use cases for send can end up seeing inodes with a link count
of zero and in this case the send operation fails with a ENOENT error
because any attempt to generate a path for the inode, with the purpose
of creating it or updating it at the receiver, fails since there are no
inode reference items. One use case it to use a regular subvolume for
a send operation after turning it to RO mode or turning a RW snapshot
into RO mode and then using it for a send operation. In both cases, if a
file gets all its hard links deleted while there is an open file
descriptor before turning the subvolume/snapshot into RO mode, the send
operation will encounter an inode with a link count of zero and then
fail with errno ENOENT.
Example using a full send with a subvolume:
$ mkfs.btrfs -f /dev/sdb
$ mount /dev/sdb /mnt
$ btrfs subvolume create /mnt/sv1
$ touch /mnt/sv1/foo
$ touch /mnt/sv1/bar
# keep an open file descriptor on file bar
$ exec 73</mnt/sv1/bar
$ unlink /mnt/sv1/bar
# Turn the subvolume to RO mode and use it for a full send, while
# holding the open file descriptor.
$ btrfs property set /mnt/sv1 ro true
$ btrfs send -f /tmp/full.send /mnt/sv1
At subvol /mnt/sv1
ERROR: send ioctl failed with -2: No such file or directory
Example using an incremental send with snapshots:
$ mkfs.btrfs -f /dev/sdb
$ mount /dev/sdb /mnt
$ btrfs subvolume create /mnt/sv1
$ touch /mnt/sv1/foo
$ touch /mnt/sv1/bar
$ btrfs subvolume snapshot -r /mnt/sv1 /mnt/snap1
$ echo "hello world" >> /mnt/sv1/bar
$ btrfs subvolume snapshot -r /mnt/sv1 /mnt/snap2
# Turn the second snapshot to RW mode and delete file foo while
# holding an open file descriptor on it.
$ btrfs property set /mnt/snap2 ro false
$ exec 73</mnt/snap2/foo
$ unlink /mnt/snap2/foo
# Set the second snapshot back to RO mode and do an incremental send.
$ btrfs property set /mnt/snap2 ro true
$ btrfs send -f /tmp/inc.send -p /mnt/snap1 /mnt/snap2
At subvol /mnt/snap2
ERROR: send ioctl failed with -2: No such file or directory
So fix this by ignoring inodes with a link count of zero if we are either
doing a full send or if they do not exist in the parent snapshot (they
are new in the send snapshot), and unlink all paths found in the parent
snapshot when doing an incremental send (and ignoring all other inode
items, such as xattrs and extents).
A test case for fstests follows soon.
CC: stable(a)vger.kernel.org # 4.4+
Reported-by: Martin Wilck <martin.wilck(a)suse.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 42e04cd3cd95..551294a6c9e2 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -100,6 +100,7 @@ struct send_ctx {
u64 cur_inode_rdev;
u64 cur_inode_last_extent;
u64 cur_inode_next_write_offset;
+ bool ignore_cur_inode;
u64 send_progress;
@@ -5796,6 +5797,9 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
int pending_move = 0;
int refs_processed = 0;
+ if (sctx->ignore_cur_inode)
+ return 0;
+
ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move,
&refs_processed);
if (ret < 0)
@@ -5914,6 +5918,93 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
return ret;
}
+struct parent_paths_ctx {
+ struct list_head *refs;
+ struct send_ctx *sctx;
+};
+
+static int record_parent_ref(int num, u64 dir, int index, struct fs_path *name,
+ void *ctx)
+{
+ struct parent_paths_ctx *ppctx = ctx;
+
+ return record_ref(ppctx->sctx->parent_root, dir, name, ppctx->sctx,
+ ppctx->refs);
+}
+
+/*
+ * Issue unlink operations for all paths of the current inode found in the
+ * parent snapshot.
+ */
+static int btrfs_unlink_all_paths(struct send_ctx *sctx)
+{
+ LIST_HEAD(deleted_refs);
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct parent_paths_ctx ctx;
+ int ret;
+
+ path = alloc_path_for_send();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = sctx->cur_ino;
+ key.type = BTRFS_INODE_REF_KEY;
+ key.offset = 0;
+ ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+
+ ctx.refs = &deleted_refs;
+ ctx.sctx = sctx;
+
+ while (true) {
+ struct extent_buffer *eb = path->nodes[0];
+ int slot = path->slots[0];
+
+ if (slot >= btrfs_header_nritems(eb)) {
+ ret = btrfs_next_leaf(sctx->parent_root, path);
+ if (ret < 0)
+ goto out;
+ else if (ret > 0)
+ break;
+ continue;
+ }
+
+ btrfs_item_key_to_cpu(eb, &key, slot);
+ if (key.objectid != sctx->cur_ino)
+ break;
+ if (key.type != BTRFS_INODE_REF_KEY &&
+ key.type != BTRFS_INODE_EXTREF_KEY)
+ break;
+
+ ret = iterate_inode_ref(sctx->parent_root, path, &key, 1,
+ record_parent_ref, &ctx);
+ if (ret < 0)
+ goto out;
+
+ path->slots[0]++;
+ }
+
+ while (!list_empty(&deleted_refs)) {
+ struct recorded_ref *ref;
+
+ ref = list_first_entry(&deleted_refs, struct recorded_ref, list);
+ ret = send_unlink(sctx, ref->full_path);
+ if (ret < 0)
+ goto out;
+ fs_path_free(ref->full_path);
+ list_del(&ref->list);
+ kfree(ref);
+ }
+ ret = 0;
+out:
+ btrfs_free_path(path);
+ if (ret)
+ __free_recorded_refs(&deleted_refs);
+ return ret;
+}
+
static int changed_inode(struct send_ctx *sctx,
enum btrfs_compare_tree_result result)
{
@@ -5928,6 +6019,7 @@ static int changed_inode(struct send_ctx *sctx,
sctx->cur_inode_new_gen = 0;
sctx->cur_inode_last_extent = (u64)-1;
sctx->cur_inode_next_write_offset = 0;
+ sctx->ignore_cur_inode = false;
/*
* Set send_progress to current inode. This will tell all get_cur_xxx
@@ -5968,6 +6060,33 @@ static int changed_inode(struct send_ctx *sctx,
sctx->cur_inode_new_gen = 1;
}
+ /*
+ * Normally we do not find inodes with a link count of zero (orphans)
+ * because the most common case is to create a snapshot and use it
+ * for a send operation. However other less common use cases involve
+ * using a subvolume and send it after turning it to RO mode just
+ * after deleting all hard links of a file while holding an open
+ * file descriptor against it or turning a RO snapshot into RW mode,
+ * keep an open file descriptor against a file, delete it and then
+ * turn the snapshot back to RO mode before using it for a send
+ * operation. So if we find such cases, ignore the inode and all its
+ * items completely if it's a new inode, or if it's a changed inode
+ * make sure all its previous paths (from the parent snapshot) are all
+ * unlinked and all other the inode items are ignored.
+ */
+ if (result == BTRFS_COMPARE_TREE_NEW ||
+ result == BTRFS_COMPARE_TREE_CHANGED) {
+ u32 nlinks;
+
+ nlinks = btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii);
+ if (nlinks == 0) {
+ sctx->ignore_cur_inode = true;
+ if (result == BTRFS_COMPARE_TREE_CHANGED)
+ ret = btrfs_unlink_all_paths(sctx);
+ goto out;
+ }
+ }
+
if (result == BTRFS_COMPARE_TREE_NEW) {
sctx->cur_inode_gen = left_gen;
sctx->cur_inode_new = 1;
@@ -6306,15 +6425,17 @@ static int changed_cb(struct btrfs_path *left_path,
key->objectid == BTRFS_FREE_SPACE_OBJECTID)
goto out;
- if (key->type == BTRFS_INODE_ITEM_KEY)
+ if (key->type == BTRFS_INODE_ITEM_KEY) {
ret = changed_inode(sctx, result);
- else if (key->type == BTRFS_INODE_REF_KEY ||
- key->type == BTRFS_INODE_EXTREF_KEY)
- ret = changed_ref(sctx, result);
- else if (key->type == BTRFS_XATTR_ITEM_KEY)
- ret = changed_xattr(sctx, result);
- else if (key->type == BTRFS_EXTENT_DATA_KEY)
- ret = changed_extent(sctx, result);
+ } else if (!sctx->ignore_cur_inode) {
+ if (key->type == BTRFS_INODE_REF_KEY ||
+ key->type == BTRFS_INODE_EXTREF_KEY)
+ ret = changed_ref(sctx, result);
+ else if (key->type == BTRFS_XATTR_ITEM_KEY)
+ ret = changed_xattr(sctx, result);
+ else if (key->type == BTRFS_EXTENT_DATA_KEY)
+ ret = changed_extent(sctx, result);
+ }
out:
return ret;
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 46b2f4590aab71d31088a265c86026b1e96c9de4 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Tue, 24 Jul 2018 11:54:04 +0100
Subject: [PATCH] Btrfs: fix send failure when root has deleted files still
open
The more common use case of send involves creating a RO snapshot and then
use it for a send operation. In this case it's not possible to have inodes
in the snapshot that have a link count of zero (inode with an orphan item)
since during snapshot creation we do the orphan cleanup. However, other
less common use cases for send can end up seeing inodes with a link count
of zero and in this case the send operation fails with a ENOENT error
because any attempt to generate a path for the inode, with the purpose
of creating it or updating it at the receiver, fails since there are no
inode reference items. One use case it to use a regular subvolume for
a send operation after turning it to RO mode or turning a RW snapshot
into RO mode and then using it for a send operation. In both cases, if a
file gets all its hard links deleted while there is an open file
descriptor before turning the subvolume/snapshot into RO mode, the send
operation will encounter an inode with a link count of zero and then
fail with errno ENOENT.
Example using a full send with a subvolume:
$ mkfs.btrfs -f /dev/sdb
$ mount /dev/sdb /mnt
$ btrfs subvolume create /mnt/sv1
$ touch /mnt/sv1/foo
$ touch /mnt/sv1/bar
# keep an open file descriptor on file bar
$ exec 73</mnt/sv1/bar
$ unlink /mnt/sv1/bar
# Turn the subvolume to RO mode and use it for a full send, while
# holding the open file descriptor.
$ btrfs property set /mnt/sv1 ro true
$ btrfs send -f /tmp/full.send /mnt/sv1
At subvol /mnt/sv1
ERROR: send ioctl failed with -2: No such file or directory
Example using an incremental send with snapshots:
$ mkfs.btrfs -f /dev/sdb
$ mount /dev/sdb /mnt
$ btrfs subvolume create /mnt/sv1
$ touch /mnt/sv1/foo
$ touch /mnt/sv1/bar
$ btrfs subvolume snapshot -r /mnt/sv1 /mnt/snap1
$ echo "hello world" >> /mnt/sv1/bar
$ btrfs subvolume snapshot -r /mnt/sv1 /mnt/snap2
# Turn the second snapshot to RW mode and delete file foo while
# holding an open file descriptor on it.
$ btrfs property set /mnt/snap2 ro false
$ exec 73</mnt/snap2/foo
$ unlink /mnt/snap2/foo
# Set the second snapshot back to RO mode and do an incremental send.
$ btrfs property set /mnt/snap2 ro true
$ btrfs send -f /tmp/inc.send -p /mnt/snap1 /mnt/snap2
At subvol /mnt/snap2
ERROR: send ioctl failed with -2: No such file or directory
So fix this by ignoring inodes with a link count of zero if we are either
doing a full send or if they do not exist in the parent snapshot (they
are new in the send snapshot), and unlink all paths found in the parent
snapshot when doing an incremental send (and ignoring all other inode
items, such as xattrs and extents).
A test case for fstests follows soon.
CC: stable(a)vger.kernel.org # 4.4+
Reported-by: Martin Wilck <martin.wilck(a)suse.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 42e04cd3cd95..551294a6c9e2 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -100,6 +100,7 @@ struct send_ctx {
u64 cur_inode_rdev;
u64 cur_inode_last_extent;
u64 cur_inode_next_write_offset;
+ bool ignore_cur_inode;
u64 send_progress;
@@ -5796,6 +5797,9 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
int pending_move = 0;
int refs_processed = 0;
+ if (sctx->ignore_cur_inode)
+ return 0;
+
ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move,
&refs_processed);
if (ret < 0)
@@ -5914,6 +5918,93 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
return ret;
}
+struct parent_paths_ctx {
+ struct list_head *refs;
+ struct send_ctx *sctx;
+};
+
+static int record_parent_ref(int num, u64 dir, int index, struct fs_path *name,
+ void *ctx)
+{
+ struct parent_paths_ctx *ppctx = ctx;
+
+ return record_ref(ppctx->sctx->parent_root, dir, name, ppctx->sctx,
+ ppctx->refs);
+}
+
+/*
+ * Issue unlink operations for all paths of the current inode found in the
+ * parent snapshot.
+ */
+static int btrfs_unlink_all_paths(struct send_ctx *sctx)
+{
+ LIST_HEAD(deleted_refs);
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct parent_paths_ctx ctx;
+ int ret;
+
+ path = alloc_path_for_send();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = sctx->cur_ino;
+ key.type = BTRFS_INODE_REF_KEY;
+ key.offset = 0;
+ ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+
+ ctx.refs = &deleted_refs;
+ ctx.sctx = sctx;
+
+ while (true) {
+ struct extent_buffer *eb = path->nodes[0];
+ int slot = path->slots[0];
+
+ if (slot >= btrfs_header_nritems(eb)) {
+ ret = btrfs_next_leaf(sctx->parent_root, path);
+ if (ret < 0)
+ goto out;
+ else if (ret > 0)
+ break;
+ continue;
+ }
+
+ btrfs_item_key_to_cpu(eb, &key, slot);
+ if (key.objectid != sctx->cur_ino)
+ break;
+ if (key.type != BTRFS_INODE_REF_KEY &&
+ key.type != BTRFS_INODE_EXTREF_KEY)
+ break;
+
+ ret = iterate_inode_ref(sctx->parent_root, path, &key, 1,
+ record_parent_ref, &ctx);
+ if (ret < 0)
+ goto out;
+
+ path->slots[0]++;
+ }
+
+ while (!list_empty(&deleted_refs)) {
+ struct recorded_ref *ref;
+
+ ref = list_first_entry(&deleted_refs, struct recorded_ref, list);
+ ret = send_unlink(sctx, ref->full_path);
+ if (ret < 0)
+ goto out;
+ fs_path_free(ref->full_path);
+ list_del(&ref->list);
+ kfree(ref);
+ }
+ ret = 0;
+out:
+ btrfs_free_path(path);
+ if (ret)
+ __free_recorded_refs(&deleted_refs);
+ return ret;
+}
+
static int changed_inode(struct send_ctx *sctx,
enum btrfs_compare_tree_result result)
{
@@ -5928,6 +6019,7 @@ static int changed_inode(struct send_ctx *sctx,
sctx->cur_inode_new_gen = 0;
sctx->cur_inode_last_extent = (u64)-1;
sctx->cur_inode_next_write_offset = 0;
+ sctx->ignore_cur_inode = false;
/*
* Set send_progress to current inode. This will tell all get_cur_xxx
@@ -5968,6 +6060,33 @@ static int changed_inode(struct send_ctx *sctx,
sctx->cur_inode_new_gen = 1;
}
+ /*
+ * Normally we do not find inodes with a link count of zero (orphans)
+ * because the most common case is to create a snapshot and use it
+ * for a send operation. However other less common use cases involve
+ * using a subvolume and send it after turning it to RO mode just
+ * after deleting all hard links of a file while holding an open
+ * file descriptor against it or turning a RO snapshot into RW mode,
+ * keep an open file descriptor against a file, delete it and then
+ * turn the snapshot back to RO mode before using it for a send
+ * operation. So if we find such cases, ignore the inode and all its
+ * items completely if it's a new inode, or if it's a changed inode
+ * make sure all its previous paths (from the parent snapshot) are all
+ * unlinked and all other the inode items are ignored.
+ */
+ if (result == BTRFS_COMPARE_TREE_NEW ||
+ result == BTRFS_COMPARE_TREE_CHANGED) {
+ u32 nlinks;
+
+ nlinks = btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii);
+ if (nlinks == 0) {
+ sctx->ignore_cur_inode = true;
+ if (result == BTRFS_COMPARE_TREE_CHANGED)
+ ret = btrfs_unlink_all_paths(sctx);
+ goto out;
+ }
+ }
+
if (result == BTRFS_COMPARE_TREE_NEW) {
sctx->cur_inode_gen = left_gen;
sctx->cur_inode_new = 1;
@@ -6306,15 +6425,17 @@ static int changed_cb(struct btrfs_path *left_path,
key->objectid == BTRFS_FREE_SPACE_OBJECTID)
goto out;
- if (key->type == BTRFS_INODE_ITEM_KEY)
+ if (key->type == BTRFS_INODE_ITEM_KEY) {
ret = changed_inode(sctx, result);
- else if (key->type == BTRFS_INODE_REF_KEY ||
- key->type == BTRFS_INODE_EXTREF_KEY)
- ret = changed_ref(sctx, result);
- else if (key->type == BTRFS_XATTR_ITEM_KEY)
- ret = changed_xattr(sctx, result);
- else if (key->type == BTRFS_EXTENT_DATA_KEY)
- ret = changed_extent(sctx, result);
+ } else if (!sctx->ignore_cur_inode) {
+ if (key->type == BTRFS_INODE_REF_KEY ||
+ key->type == BTRFS_INODE_EXTREF_KEY)
+ ret = changed_ref(sctx, result);
+ else if (key->type == BTRFS_XATTR_ITEM_KEY)
+ ret = changed_xattr(sctx, result);
+ else if (key->type == BTRFS_EXTENT_DATA_KEY)
+ ret = changed_extent(sctx, result);
+ }
out:
return ret;
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 46b2f4590aab71d31088a265c86026b1e96c9de4 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Tue, 24 Jul 2018 11:54:04 +0100
Subject: [PATCH] Btrfs: fix send failure when root has deleted files still
open
The more common use case of send involves creating a RO snapshot and then
use it for a send operation. In this case it's not possible to have inodes
in the snapshot that have a link count of zero (inode with an orphan item)
since during snapshot creation we do the orphan cleanup. However, other
less common use cases for send can end up seeing inodes with a link count
of zero and in this case the send operation fails with a ENOENT error
because any attempt to generate a path for the inode, with the purpose
of creating it or updating it at the receiver, fails since there are no
inode reference items. One use case it to use a regular subvolume for
a send operation after turning it to RO mode or turning a RW snapshot
into RO mode and then using it for a send operation. In both cases, if a
file gets all its hard links deleted while there is an open file
descriptor before turning the subvolume/snapshot into RO mode, the send
operation will encounter an inode with a link count of zero and then
fail with errno ENOENT.
Example using a full send with a subvolume:
$ mkfs.btrfs -f /dev/sdb
$ mount /dev/sdb /mnt
$ btrfs subvolume create /mnt/sv1
$ touch /mnt/sv1/foo
$ touch /mnt/sv1/bar
# keep an open file descriptor on file bar
$ exec 73</mnt/sv1/bar
$ unlink /mnt/sv1/bar
# Turn the subvolume to RO mode and use it for a full send, while
# holding the open file descriptor.
$ btrfs property set /mnt/sv1 ro true
$ btrfs send -f /tmp/full.send /mnt/sv1
At subvol /mnt/sv1
ERROR: send ioctl failed with -2: No such file or directory
Example using an incremental send with snapshots:
$ mkfs.btrfs -f /dev/sdb
$ mount /dev/sdb /mnt
$ btrfs subvolume create /mnt/sv1
$ touch /mnt/sv1/foo
$ touch /mnt/sv1/bar
$ btrfs subvolume snapshot -r /mnt/sv1 /mnt/snap1
$ echo "hello world" >> /mnt/sv1/bar
$ btrfs subvolume snapshot -r /mnt/sv1 /mnt/snap2
# Turn the second snapshot to RW mode and delete file foo while
# holding an open file descriptor on it.
$ btrfs property set /mnt/snap2 ro false
$ exec 73</mnt/snap2/foo
$ unlink /mnt/snap2/foo
# Set the second snapshot back to RO mode and do an incremental send.
$ btrfs property set /mnt/snap2 ro true
$ btrfs send -f /tmp/inc.send -p /mnt/snap1 /mnt/snap2
At subvol /mnt/snap2
ERROR: send ioctl failed with -2: No such file or directory
So fix this by ignoring inodes with a link count of zero if we are either
doing a full send or if they do not exist in the parent snapshot (they
are new in the send snapshot), and unlink all paths found in the parent
snapshot when doing an incremental send (and ignoring all other inode
items, such as xattrs and extents).
A test case for fstests follows soon.
CC: stable(a)vger.kernel.org # 4.4+
Reported-by: Martin Wilck <martin.wilck(a)suse.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 42e04cd3cd95..551294a6c9e2 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -100,6 +100,7 @@ struct send_ctx {
u64 cur_inode_rdev;
u64 cur_inode_last_extent;
u64 cur_inode_next_write_offset;
+ bool ignore_cur_inode;
u64 send_progress;
@@ -5796,6 +5797,9 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
int pending_move = 0;
int refs_processed = 0;
+ if (sctx->ignore_cur_inode)
+ return 0;
+
ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move,
&refs_processed);
if (ret < 0)
@@ -5914,6 +5918,93 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
return ret;
}
+struct parent_paths_ctx {
+ struct list_head *refs;
+ struct send_ctx *sctx;
+};
+
+static int record_parent_ref(int num, u64 dir, int index, struct fs_path *name,
+ void *ctx)
+{
+ struct parent_paths_ctx *ppctx = ctx;
+
+ return record_ref(ppctx->sctx->parent_root, dir, name, ppctx->sctx,
+ ppctx->refs);
+}
+
+/*
+ * Issue unlink operations for all paths of the current inode found in the
+ * parent snapshot.
+ */
+static int btrfs_unlink_all_paths(struct send_ctx *sctx)
+{
+ LIST_HEAD(deleted_refs);
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct parent_paths_ctx ctx;
+ int ret;
+
+ path = alloc_path_for_send();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = sctx->cur_ino;
+ key.type = BTRFS_INODE_REF_KEY;
+ key.offset = 0;
+ ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+
+ ctx.refs = &deleted_refs;
+ ctx.sctx = sctx;
+
+ while (true) {
+ struct extent_buffer *eb = path->nodes[0];
+ int slot = path->slots[0];
+
+ if (slot >= btrfs_header_nritems(eb)) {
+ ret = btrfs_next_leaf(sctx->parent_root, path);
+ if (ret < 0)
+ goto out;
+ else if (ret > 0)
+ break;
+ continue;
+ }
+
+ btrfs_item_key_to_cpu(eb, &key, slot);
+ if (key.objectid != sctx->cur_ino)
+ break;
+ if (key.type != BTRFS_INODE_REF_KEY &&
+ key.type != BTRFS_INODE_EXTREF_KEY)
+ break;
+
+ ret = iterate_inode_ref(sctx->parent_root, path, &key, 1,
+ record_parent_ref, &ctx);
+ if (ret < 0)
+ goto out;
+
+ path->slots[0]++;
+ }
+
+ while (!list_empty(&deleted_refs)) {
+ struct recorded_ref *ref;
+
+ ref = list_first_entry(&deleted_refs, struct recorded_ref, list);
+ ret = send_unlink(sctx, ref->full_path);
+ if (ret < 0)
+ goto out;
+ fs_path_free(ref->full_path);
+ list_del(&ref->list);
+ kfree(ref);
+ }
+ ret = 0;
+out:
+ btrfs_free_path(path);
+ if (ret)
+ __free_recorded_refs(&deleted_refs);
+ return ret;
+}
+
static int changed_inode(struct send_ctx *sctx,
enum btrfs_compare_tree_result result)
{
@@ -5928,6 +6019,7 @@ static int changed_inode(struct send_ctx *sctx,
sctx->cur_inode_new_gen = 0;
sctx->cur_inode_last_extent = (u64)-1;
sctx->cur_inode_next_write_offset = 0;
+ sctx->ignore_cur_inode = false;
/*
* Set send_progress to current inode. This will tell all get_cur_xxx
@@ -5968,6 +6060,33 @@ static int changed_inode(struct send_ctx *sctx,
sctx->cur_inode_new_gen = 1;
}
+ /*
+ * Normally we do not find inodes with a link count of zero (orphans)
+ * because the most common case is to create a snapshot and use it
+ * for a send operation. However other less common use cases involve
+ * using a subvolume and send it after turning it to RO mode just
+ * after deleting all hard links of a file while holding an open
+ * file descriptor against it or turning a RO snapshot into RW mode,
+ * keep an open file descriptor against a file, delete it and then
+ * turn the snapshot back to RO mode before using it for a send
+ * operation. So if we find such cases, ignore the inode and all its
+ * items completely if it's a new inode, or if it's a changed inode
+ * make sure all its previous paths (from the parent snapshot) are all
+ * unlinked and all other the inode items are ignored.
+ */
+ if (result == BTRFS_COMPARE_TREE_NEW ||
+ result == BTRFS_COMPARE_TREE_CHANGED) {
+ u32 nlinks;
+
+ nlinks = btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii);
+ if (nlinks == 0) {
+ sctx->ignore_cur_inode = true;
+ if (result == BTRFS_COMPARE_TREE_CHANGED)
+ ret = btrfs_unlink_all_paths(sctx);
+ goto out;
+ }
+ }
+
if (result == BTRFS_COMPARE_TREE_NEW) {
sctx->cur_inode_gen = left_gen;
sctx->cur_inode_new = 1;
@@ -6306,15 +6425,17 @@ static int changed_cb(struct btrfs_path *left_path,
key->objectid == BTRFS_FREE_SPACE_OBJECTID)
goto out;
- if (key->type == BTRFS_INODE_ITEM_KEY)
+ if (key->type == BTRFS_INODE_ITEM_KEY) {
ret = changed_inode(sctx, result);
- else if (key->type == BTRFS_INODE_REF_KEY ||
- key->type == BTRFS_INODE_EXTREF_KEY)
- ret = changed_ref(sctx, result);
- else if (key->type == BTRFS_XATTR_ITEM_KEY)
- ret = changed_xattr(sctx, result);
- else if (key->type == BTRFS_EXTENT_DATA_KEY)
- ret = changed_extent(sctx, result);
+ } else if (!sctx->ignore_cur_inode) {
+ if (key->type == BTRFS_INODE_REF_KEY ||
+ key->type == BTRFS_INODE_EXTREF_KEY)
+ ret = changed_ref(sctx, result);
+ else if (key->type == BTRFS_XATTR_ITEM_KEY)
+ ret = changed_xattr(sctx, result);
+ else if (key->type == BTRFS_EXTENT_DATA_KEY)
+ ret = changed_extent(sctx, result);
+ }
out:
return ret;
The patch below does not apply to the 3.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 3c4276936f6fbe52884b4ea4e6cc120b890a0f9f Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik(a)fb.com>
Date: Fri, 20 Jul 2018 11:46:10 -0700
Subject: [PATCH] Btrfs: fix btrfs_write_inode vs delayed iput deadlock
We recently ran into the following deadlock involving
btrfs_write_inode():
[ +0.005066] __schedule+0x38e/0x8c0
[ +0.007144] schedule+0x36/0x80
[ +0.006447] bit_wait+0x11/0x60
[ +0.006446] __wait_on_bit+0xbe/0x110
[ +0.007487] ? bit_wait_io+0x60/0x60
[ +0.007319] __inode_wait_for_writeback+0x96/0xc0
[ +0.009568] ? autoremove_wake_function+0x40/0x40
[ +0.009565] inode_wait_for_writeback+0x21/0x30
[ +0.009224] evict+0xb0/0x190
[ +0.006099] iput+0x1a8/0x210
[ +0.006103] btrfs_run_delayed_iputs+0x73/0xc0
[ +0.009047] btrfs_commit_transaction+0x799/0x8c0
[ +0.009567] btrfs_write_inode+0x81/0xb0
[ +0.008008] __writeback_single_inode+0x267/0x320
[ +0.009569] writeback_sb_inodes+0x25b/0x4e0
[ +0.008702] wb_writeback+0x102/0x2d0
[ +0.007487] wb_workfn+0xa4/0x310
[ +0.006794] ? wb_workfn+0xa4/0x310
[ +0.007143] process_one_work+0x150/0x410
[ +0.008179] worker_thread+0x6d/0x520
[ +0.007490] kthread+0x12c/0x160
[ +0.006620] ? put_pwq_unlocked+0x80/0x80
[ +0.008185] ? kthread_park+0xa0/0xa0
[ +0.007484] ? do_syscall_64+0x53/0x150
[ +0.007837] ret_from_fork+0x29/0x40
Writeback calls:
btrfs_write_inode
btrfs_commit_transaction
btrfs_run_delayed_iputs
If iput() is called on that same inode, evict() will wait for writeback
forever.
btrfs_write_inode() was originally added way back in 4730a4bc5bf3
("btrfs_dirty_inode") to support O_SYNC writes. However, ->write_inode()
hasn't been used for O_SYNC since 148f948ba877 ("vfs: Introduce new
helpers for syncing after writing to O_SYNC file or IS_SYNC inode"), so
btrfs_write_inode() is actually unnecessary (and leads to a bunch of
unnecessary commits). Get rid of it, which also gets rid of the
deadlock.
CC: stable(a)vger.kernel.org # 3.2+
Signed-off-by: Josef Bacik <jbacik(a)fb.com>
[Omar: new commit message]
Signed-off-by: Omar Sandoval <osandov(a)fb.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4955e04da4c8..472457795486 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6021,32 +6021,6 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
return ret;
}
-int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
- int ret = 0;
- bool nolock = false;
-
- if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
- return 0;
-
- if (btrfs_fs_closing(root->fs_info) &&
- btrfs_is_free_space_inode(BTRFS_I(inode)))
- nolock = true;
-
- if (wbc->sync_mode == WB_SYNC_ALL) {
- if (nolock)
- trans = btrfs_join_transaction_nolock(root);
- else
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
- ret = btrfs_commit_transaction(trans);
- }
- return ret;
-}
-
/*
* This is somewhat expensive, updating the tree every time the
* inode changes. But, it is most likely to find the inode in cache.
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index efe8b03ce380..67de3c0fc85b 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2344,7 +2344,6 @@ static const struct super_operations btrfs_super_ops = {
.sync_fs = btrfs_sync_fs,
.show_options = btrfs_show_options,
.show_devname = btrfs_show_devname,
- .write_inode = btrfs_write_inode,
.alloc_inode = btrfs_alloc_inode,
.destroy_inode = btrfs_destroy_inode,
.statfs = btrfs_statfs,
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 3c4276936f6fbe52884b4ea4e6cc120b890a0f9f Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik(a)fb.com>
Date: Fri, 20 Jul 2018 11:46:10 -0700
Subject: [PATCH] Btrfs: fix btrfs_write_inode vs delayed iput deadlock
We recently ran into the following deadlock involving
btrfs_write_inode():
[ +0.005066] __schedule+0x38e/0x8c0
[ +0.007144] schedule+0x36/0x80
[ +0.006447] bit_wait+0x11/0x60
[ +0.006446] __wait_on_bit+0xbe/0x110
[ +0.007487] ? bit_wait_io+0x60/0x60
[ +0.007319] __inode_wait_for_writeback+0x96/0xc0
[ +0.009568] ? autoremove_wake_function+0x40/0x40
[ +0.009565] inode_wait_for_writeback+0x21/0x30
[ +0.009224] evict+0xb0/0x190
[ +0.006099] iput+0x1a8/0x210
[ +0.006103] btrfs_run_delayed_iputs+0x73/0xc0
[ +0.009047] btrfs_commit_transaction+0x799/0x8c0
[ +0.009567] btrfs_write_inode+0x81/0xb0
[ +0.008008] __writeback_single_inode+0x267/0x320
[ +0.009569] writeback_sb_inodes+0x25b/0x4e0
[ +0.008702] wb_writeback+0x102/0x2d0
[ +0.007487] wb_workfn+0xa4/0x310
[ +0.006794] ? wb_workfn+0xa4/0x310
[ +0.007143] process_one_work+0x150/0x410
[ +0.008179] worker_thread+0x6d/0x520
[ +0.007490] kthread+0x12c/0x160
[ +0.006620] ? put_pwq_unlocked+0x80/0x80
[ +0.008185] ? kthread_park+0xa0/0xa0
[ +0.007484] ? do_syscall_64+0x53/0x150
[ +0.007837] ret_from_fork+0x29/0x40
Writeback calls:
btrfs_write_inode
btrfs_commit_transaction
btrfs_run_delayed_iputs
If iput() is called on that same inode, evict() will wait for writeback
forever.
btrfs_write_inode() was originally added way back in 4730a4bc5bf3
("btrfs_dirty_inode") to support O_SYNC writes. However, ->write_inode()
hasn't been used for O_SYNC since 148f948ba877 ("vfs: Introduce new
helpers for syncing after writing to O_SYNC file or IS_SYNC inode"), so
btrfs_write_inode() is actually unnecessary (and leads to a bunch of
unnecessary commits). Get rid of it, which also gets rid of the
deadlock.
CC: stable(a)vger.kernel.org # 3.2+
Signed-off-by: Josef Bacik <jbacik(a)fb.com>
[Omar: new commit message]
Signed-off-by: Omar Sandoval <osandov(a)fb.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4955e04da4c8..472457795486 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6021,32 +6021,6 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
return ret;
}
-int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
- int ret = 0;
- bool nolock = false;
-
- if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
- return 0;
-
- if (btrfs_fs_closing(root->fs_info) &&
- btrfs_is_free_space_inode(BTRFS_I(inode)))
- nolock = true;
-
- if (wbc->sync_mode == WB_SYNC_ALL) {
- if (nolock)
- trans = btrfs_join_transaction_nolock(root);
- else
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
- ret = btrfs_commit_transaction(trans);
- }
- return ret;
-}
-
/*
* This is somewhat expensive, updating the tree every time the
* inode changes. But, it is most likely to find the inode in cache.
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index efe8b03ce380..67de3c0fc85b 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2344,7 +2344,6 @@ static const struct super_operations btrfs_super_ops = {
.sync_fs = btrfs_sync_fs,
.show_options = btrfs_show_options,
.show_devname = btrfs_show_devname,
- .write_inode = btrfs_write_inode,
.alloc_inode = btrfs_alloc_inode,
.destroy_inode = btrfs_destroy_inode,
.statfs = btrfs_statfs,
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 3c4276936f6fbe52884b4ea4e6cc120b890a0f9f Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik(a)fb.com>
Date: Fri, 20 Jul 2018 11:46:10 -0700
Subject: [PATCH] Btrfs: fix btrfs_write_inode vs delayed iput deadlock
We recently ran into the following deadlock involving
btrfs_write_inode():
[ +0.005066] __schedule+0x38e/0x8c0
[ +0.007144] schedule+0x36/0x80
[ +0.006447] bit_wait+0x11/0x60
[ +0.006446] __wait_on_bit+0xbe/0x110
[ +0.007487] ? bit_wait_io+0x60/0x60
[ +0.007319] __inode_wait_for_writeback+0x96/0xc0
[ +0.009568] ? autoremove_wake_function+0x40/0x40
[ +0.009565] inode_wait_for_writeback+0x21/0x30
[ +0.009224] evict+0xb0/0x190
[ +0.006099] iput+0x1a8/0x210
[ +0.006103] btrfs_run_delayed_iputs+0x73/0xc0
[ +0.009047] btrfs_commit_transaction+0x799/0x8c0
[ +0.009567] btrfs_write_inode+0x81/0xb0
[ +0.008008] __writeback_single_inode+0x267/0x320
[ +0.009569] writeback_sb_inodes+0x25b/0x4e0
[ +0.008702] wb_writeback+0x102/0x2d0
[ +0.007487] wb_workfn+0xa4/0x310
[ +0.006794] ? wb_workfn+0xa4/0x310
[ +0.007143] process_one_work+0x150/0x410
[ +0.008179] worker_thread+0x6d/0x520
[ +0.007490] kthread+0x12c/0x160
[ +0.006620] ? put_pwq_unlocked+0x80/0x80
[ +0.008185] ? kthread_park+0xa0/0xa0
[ +0.007484] ? do_syscall_64+0x53/0x150
[ +0.007837] ret_from_fork+0x29/0x40
Writeback calls:
btrfs_write_inode
btrfs_commit_transaction
btrfs_run_delayed_iputs
If iput() is called on that same inode, evict() will wait for writeback
forever.
btrfs_write_inode() was originally added way back in 4730a4bc5bf3
("btrfs_dirty_inode") to support O_SYNC writes. However, ->write_inode()
hasn't been used for O_SYNC since 148f948ba877 ("vfs: Introduce new
helpers for syncing after writing to O_SYNC file or IS_SYNC inode"), so
btrfs_write_inode() is actually unnecessary (and leads to a bunch of
unnecessary commits). Get rid of it, which also gets rid of the
deadlock.
CC: stable(a)vger.kernel.org # 3.2+
Signed-off-by: Josef Bacik <jbacik(a)fb.com>
[Omar: new commit message]
Signed-off-by: Omar Sandoval <osandov(a)fb.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4955e04da4c8..472457795486 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6021,32 +6021,6 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
return ret;
}
-int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
- int ret = 0;
- bool nolock = false;
-
- if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
- return 0;
-
- if (btrfs_fs_closing(root->fs_info) &&
- btrfs_is_free_space_inode(BTRFS_I(inode)))
- nolock = true;
-
- if (wbc->sync_mode == WB_SYNC_ALL) {
- if (nolock)
- trans = btrfs_join_transaction_nolock(root);
- else
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
- ret = btrfs_commit_transaction(trans);
- }
- return ret;
-}
-
/*
* This is somewhat expensive, updating the tree every time the
* inode changes. But, it is most likely to find the inode in cache.
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index efe8b03ce380..67de3c0fc85b 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2344,7 +2344,6 @@ static const struct super_operations btrfs_super_ops = {
.sync_fs = btrfs_sync_fs,
.show_options = btrfs_show_options,
.show_devname = btrfs_show_devname,
- .write_inode = btrfs_write_inode,
.alloc_inode = btrfs_alloc_inode,
.destroy_inode = btrfs_destroy_inode,
.statfs = btrfs_statfs,
Hi !
this is also the wrong version of the patch - the proper version is
below. This has been posted to lkml https://lkml.org/lkml/2018/7/18/191
"[PATCH V3] drm: handle error values properly" but there was no review yet
The version you have here though is for sure broken. So maybe this should
be simply dropped until the above, presumably correct fix, is confirmed.
thx!
hofrat
----- Forwarded message from gregkh(a)linuxfoundation.org -----
Date: Tue, 28 Aug 2018 16:09:59 +0200
From: gregkh(a)linuxfoundation.org
To: 1531571532-22733-1-git-send-email-hofrat(a)osadl.org, alexander.levin(a)microsoft.com, gregkh(a)linuxfoundation.org, hofrat(a)osadl.org,
seanpaul(a)chromium.org
Cc: stable-commits(a)vger.kernel.org
Subject: Patch "drm: re-enable error handling" has been added to the 3.18-stable tree
This is a note to let you know that I've just added the patch titled
drm: re-enable error handling
to the 3.18-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
drm-re-enable-error-handling.patch
and it can be found in the queue-3.18 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Tue Aug 28 16:08:28 CEST 2018
From: Nicholas Mc Guire <hofrat(a)osadl.org>
Date: Sat, 14 Jul 2018 14:32:12 +0200
Subject: drm: re-enable error handling
From: Nicholas Mc Guire <hofrat(a)osadl.org>
[ Upstream commit d530b5f1ca0bb66958a2b714bebe40a1248b9c15 ]
drm_legacy_ctxbitmap_next() returns idr_alloc() which can return
-ENOMEM, -EINVAL or -ENOSPC none of which are -1 . but the call sites
of drm_legacy_ctxbitmap_next() seem to be assuming that the error case
would be -1 (original return of drm_ctxbitmap_next() prior to 2.6.23
was actually -1). Thus reenable error handling by checking for < 0.
Signed-off-by: Nicholas Mc Guire <hofrat(a)osadl.org>
Fixes: 62968144e673 ("drm: convert drm context code to use Linux idr")
Signed-off-by: Sean Paul <seanpaul(a)chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/1531571532-22733-1-git-send-e…
Signed-off-by: Sasha Levin <alexander.levin(a)microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/gpu/drm/drm_context.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/drivers/gpu/drm/drm_context.c
+++ b/drivers/gpu/drm/drm_context.c
@@ -341,7 +341,7 @@ int drm_legacy_addctx(struct drm_device
ctx->handle = drm_legacy_ctxbitmap_next(dev);
}
DRM_DEBUG("%d\n", ctx->handle);
- if (ctx->handle == -1) {
+ if (ctx->handle < 0) {
DRM_DEBUG("Not enough free contexts.\n");
/* Should this return -EBUSY instead? */
return -ENOMEM;
Patches currently in stable-queue which might be from hofrat(a)osadl.org are
queue-3.18/drm-re-enable-error-handling.patch
queue-3.18/can-mpc5xxx_can-check-of_iomap-return-before-use.patch
----- End forwarded message -----
In NMI context, we might be in the middle of context switching or in
the middle of switch_mm_irqs_off(). In either case, CR3 might not
match current->mm, which could cause copy_from_user_nmi() and
friends to read the wrong memory.
Fix it by adding a new nmi_uaccess_okay() helper and checking it in
copy_from_user_nmi() and in __copy_from_user_nmi()'s callers.
Cc: stable(a)vger.kernel.org
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Nadav Amit <nadav.amit(a)gmail.com>
Signed-off-by: Andy Lutomirski <luto(a)kernel.org>
---
The 0day bot is still chewing on this, but I've tested it a bit locally
and it seems to do the right thing.
I've never observed the bug it fixes, but it does appear to fix a bug
unless I've missed something. It's also a prerequisite for Nadav's
fixmap bugfix.
arch/x86/events/core.c | 2 +-
arch/x86/include/asm/tlbflush.h | 16 ++++++++++++++++
arch/x86/lib/usercopy.c | 5 +++++
arch/x86/mm/tlb.c | 3 +++
4 files changed, 25 insertions(+), 1 deletion(-)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 5f4829f10129..dfb2f7c0d019 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2465,7 +2465,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
perf_callchain_store(entry, regs->ip);
- if (!current->mm)
+ if (!nmi_uaccess_okay())
return;
if (perf_callchain_user32(regs, entry))
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 89a73bc31622..b23b2625793b 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -230,6 +230,22 @@ struct tlb_state {
};
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
+/*
+ * Blindly accessing user memory from NMI context can be dangerous
+ * if we're in the middle of switching the current user task or
+ * switching the loaded mm. It can also be dangerous if we
+ * interrupted some kernel code that was temporarily using a
+ * different mm.
+ */
+static inline bool nmi_uaccess_okay(void)
+{
+ struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+ struct mm_struct *current_mm = current->mm;
+
+ return current_mm && loaded_mm == current_mm &&
+ loaded_mm->pgd == __va(read_cr3_pa());
+}
+
/* Initialize cr4 shadow for this CPU. */
static inline void cr4_init_shadow(void)
{
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index c8c6ad0d58b8..3f435d7fca5e 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -7,6 +7,8 @@
#include <linux/uaccess.h>
#include <linux/export.h>
+#include <asm/tlbflush.h>
+
/*
* We rely on the nested NMI work to allow atomic faults from the NMI path; the
* nested NMI paths are careful to preserve CR2.
@@ -19,6 +21,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
if (__range_not_ok(from, n, TASK_SIZE))
return n;
+ if (!nmi_uaccess_okay())
+ return n;
+
/*
* Even though this function is typically called from NMI/IRQ context
* disable pagefaults so that its behaviour is consistent even when
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 457b281b9339..f4b41d5a93dd 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -345,6 +345,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
*/
trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
} else {
+ /* Let NMI code know that CR3 may not match expectations. */
+ this_cpu_write(cpu_tlbstate.loaded_mm, NULL);
+
/* The new ASID is already up to date. */
load_new_mm_cr3(next->pgd, new_asid, false);
--
2.17.1
The patch
regulator: bd71837: Disable voltage monitoring for LDO3/4
has been applied to the regulator tree at
https://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator.git
All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.
You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.
If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.
Please add any relevant lists and maintainers to the CCs when replying
to this mail.
Thanks,
Mark
>From 823f18f8b860526fc099c222619a126d57d2ad8c Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen(a)fi.rohmeurope.com>
Date: Wed, 29 Aug 2018 15:36:10 +0300
Subject: [PATCH] regulator: bd71837: Disable voltage monitoring for LDO3/4
There is a HW quirk in BD71837. The shutdown sequence timings for
bucks/LDOs which are enabled via register interface are changed.
At PMIC poweroff the voltage for BUCK6/7 is cut immediately at the
beginning of shut-down sequence. This causes LDO5/6 voltage
monitoring to detect under voltage and force PMIC to emergency
state instead of poweroff. Disable voltage monitoring for LDO5 and
LDO6 at probe to avoid this.
Signed-off-by: Matti Vaittinen <matti.vaittinen(a)fi.rohmeurope.com>
Signed-off-by: Mark Brown <broonie(a)kernel.org>
Cc: stable(a)vger.kernel.org
---
drivers/regulator/bd71837-regulator.c | 19 +++++++++++++++
include/linux/mfd/rohm-bd718x7.h | 33 ++++++++++++++++++++++++---
2 files changed, 49 insertions(+), 3 deletions(-)
diff --git a/drivers/regulator/bd71837-regulator.c b/drivers/regulator/bd71837-regulator.c
index 0f8ac8dec3e1..a1bd8aaf4d98 100644
--- a/drivers/regulator/bd71837-regulator.c
+++ b/drivers/regulator/bd71837-regulator.c
@@ -569,6 +569,25 @@ static int bd71837_probe(struct platform_device *pdev)
BD71837_REG_REGLOCK);
}
+ /*
+ * There is a HW quirk in BD71837. The shutdown sequence timings for
+ * bucks/LDOs which are controlled via register interface are changed.
+ * At PMIC poweroff the voltage for BUCK6/7 is cut immediately at the
+ * beginning of shut-down sequence. As bucks 6 and 7 are parent
+ * supplies for LDO5 and LDO6 - this causes LDO5/6 voltage
+ * monitoring to errorneously detect under voltage and force PMIC to
+ * emergency state instead of poweroff. In order to avoid this we
+ * disable voltage monitoring for LDO5 and LDO6
+ */
+ err = regmap_update_bits(pmic->mfd->regmap, BD718XX_REG_MVRFLTMASK2,
+ BD718XX_LDO5_VRMON80 | BD718XX_LDO6_VRMON80,
+ BD718XX_LDO5_VRMON80 | BD718XX_LDO6_VRMON80);
+ if (err) {
+ dev_err(&pmic->pdev->dev,
+ "Failed to disable voltage monitoring\n");
+ goto err;
+ }
+
for (i = 0; i < ARRAY_SIZE(pmic_regulator_inits); i++) {
struct regulator_desc *desc;
diff --git a/include/linux/mfd/rohm-bd718x7.h b/include/linux/mfd/rohm-bd718x7.h
index a528747f8aed..e8338e5dc10b 100644
--- a/include/linux/mfd/rohm-bd718x7.h
+++ b/include/linux/mfd/rohm-bd718x7.h
@@ -78,9 +78,9 @@ enum {
BD71837_REG_TRANS_COND0 = 0x1F,
BD71837_REG_TRANS_COND1 = 0x20,
BD71837_REG_VRFAULTEN = 0x21,
- BD71837_REG_MVRFLTMASK0 = 0x22,
- BD71837_REG_MVRFLTMASK1 = 0x23,
- BD71837_REG_MVRFLTMASK2 = 0x24,
+ BD718XX_REG_MVRFLTMASK0 = 0x22,
+ BD718XX_REG_MVRFLTMASK1 = 0x23,
+ BD718XX_REG_MVRFLTMASK2 = 0x24,
BD71837_REG_RCVCFG = 0x25,
BD71837_REG_RCVNUM = 0x26,
BD71837_REG_PWRONCONFIG0 = 0x27,
@@ -159,6 +159,33 @@ enum {
#define BUCK8_MASK 0x3F
#define BUCK8_DEFAULT 0x1E
+/* BD718XX Voltage monitoring masks */
+#define BD718XX_BUCK1_VRMON80 0x1
+#define BD718XX_BUCK1_VRMON130 0x2
+#define BD718XX_BUCK2_VRMON80 0x4
+#define BD718XX_BUCK2_VRMON130 0x8
+#define BD718XX_1ST_NODVS_BUCK_VRMON80 0x1
+#define BD718XX_1ST_NODVS_BUCK_VRMON130 0x2
+#define BD718XX_2ND_NODVS_BUCK_VRMON80 0x4
+#define BD718XX_2ND_NODVS_BUCK_VRMON130 0x8
+#define BD718XX_3RD_NODVS_BUCK_VRMON80 0x10
+#define BD718XX_3RD_NODVS_BUCK_VRMON130 0x20
+#define BD718XX_4TH_NODVS_BUCK_VRMON80 0x40
+#define BD718XX_4TH_NODVS_BUCK_VRMON130 0x80
+#define BD718XX_LDO1_VRMON80 0x1
+#define BD718XX_LDO2_VRMON80 0x2
+#define BD718XX_LDO3_VRMON80 0x4
+#define BD718XX_LDO4_VRMON80 0x8
+#define BD718XX_LDO5_VRMON80 0x10
+#define BD718XX_LDO6_VRMON80 0x20
+
+/* BD71837 specific voltage monitoring masks */
+#define BD71837_BUCK3_VRMON80 0x10
+#define BD71837_BUCK3_VRMON130 0x20
+#define BD71837_BUCK4_VRMON80 0x40
+#define BD71837_BUCK4_VRMON130 0x80
+#define BD71837_LDO7_VRMON80 0x40
+
/* BD71837_REG_IRQ bits */
#define IRQ_SWRST 0x40
#define IRQ_PWRON_S 0x20
--
2.18.0
[BUG]
fstrim on some btrfs only trims the unallocated space, not trimming any
space in existing block groups.
[CAUSE]
Before fstrim_range passed to btrfs_trim_fs(), it get truncated to
range [0, super->total_bytes).
So later btrfs_trim_fs() will only be able to trim block groups in range
[0, super->total_bytes).
While for btrfs, any bytenr aligned to sector size is valid, since btrfs use
its logical address space, there is nothing limiting the location where
we put block groups.
For btrfs with routine balance, it's quite easy to relocate all
block groups and bytenr of block groups will start beyond super->total_bytes.
In that case, btrfs will not trim existing block groups.
[FIX]
Just remove the truncation in btrfs_ioctl_fitrim(), so btrfs_trim_fs()
can get the unmodified range, which is normally set to [0, U64_MAX].
Reported-by: Chris Murphy <lists(a)colorremedies.com>
Fixes: f4c697e6406d ("btrfs: return EINVAL if start > total_bytes in fitrim ioctl")
Cc: <stable(a)vger.kernel.org> # v4.0+
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
---
fs/btrfs/extent-tree.c | 10 +---------
fs/btrfs/ioctl.c | 11 +++++++----
2 files changed, 8 insertions(+), 13 deletions(-)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 7768f206196a..d1478d66c7a5 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -10851,21 +10851,13 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
u64 start;
u64 end;
u64 trimmed = 0;
- u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
u64 bg_failed = 0;
u64 dev_failed = 0;
int bg_ret = 0;
int dev_ret = 0;
int ret = 0;
- /*
- * try to trim all FS space, our block group may start from non-zero.
- */
- if (range->len == total_bytes)
- cache = btrfs_lookup_first_block_group(fs_info, range->start);
- else
- cache = btrfs_lookup_block_group(fs_info, range->start);
-
+ cache = btrfs_lookup_first_block_group(fs_info, range->start);
for (; cache; cache = next_block_group(fs_info, cache)) {
if (cache->key.objectid >= (range->start + range->len)) {
btrfs_put_block_group(cache);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 63600dc2ac4c..8165a4bfa579 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -491,7 +491,6 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
struct fstrim_range range;
u64 minlen = ULLONG_MAX;
u64 num_devices = 0;
- u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
int ret;
if (!capable(CAP_SYS_ADMIN))
@@ -515,11 +514,15 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
return -EOPNOTSUPP;
if (copy_from_user(&range, arg, sizeof(range)))
return -EFAULT;
- if (range.start > total_bytes ||
- range.len < fs_info->sb->s_blocksize)
+
+ /*
+ * NOTE: Don't truncate the range using super->total_bytes.
+ * Bytenr of btrfs block group is in btrfs logical address space,
+ * which can be any sector size aligned bytenr in [0, U64_MAX].
+ */
+ if (range.len < fs_info->sb->s_blocksize)
return -EINVAL;
- range.len = min(range.len, total_bytes - range.start);
range.minlen = max(range.minlen, minlen);
ret = btrfs_trim_fs(fs_info, &range);
if (ret < 0)
--
2.18.0
Like d88b6d04: "cdrom: information leak in cdrom_ioctl_media_changed()"
There is another cast from unsigned long to int which causes
a bounds check to fail with specially crafted input. The value is
then used as an index in the slot array in cdrom_slot_status().
Signed-off-by: Scott Bauer <scott.bauer(a)intel.com>
Signed-off-by: Scott Bauer <sbauer(a)plzdonthack.me>
Cc: stable(a)vger.kernel.org
---
drivers/cdrom/cdrom.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index bfc566d3f31a..8cfa10ab7abc 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2542,7 +2542,7 @@ static int cdrom_ioctl_drive_status(struct cdrom_device_info *cdi,
if (!CDROM_CAN(CDC_SELECT_DISC) ||
(arg == CDSL_CURRENT || arg == CDSL_NONE))
return cdi->ops->drive_status(cdi, CDSL_CURRENT);
- if (((int)arg >= cdi->capacity))
+ if (arg >= cdi->capacity)
return -EINVAL;
return cdrom_slot_status(cdi, arg);
}
--
2.14.1
We're using imx_v6_v7_defconfig on our Wandboards. After upgrading to
v4.14.67, reboot no longer works (or, well, takes a very long time when
the watchdog is configured).
v4.14.66 works fine, the breakage bisects to
2059e527a659cf16d6bb709f1c8509f7a7623fc4 (ARM: imx_v6_v7_defconfig:
Select ULPI support), and reverting that on top of v4.14.67 again works.
Rasmus
The patch
spi: spi-fsl-dspi: fix broken DSPI_EOQ_MODE
has been applied to the spi tree at
https://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git
All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.
You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.
If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.
Please add any relevant lists and maintainers to the CCs when replying
to this mail.
Thanks,
Mark
>From 5223c9c1cbfc0cd4d0a1b50758e0949af3290fa1 Mon Sep 17 00:00:00 2001
From: Angelo Dureghello <angelo(a)sysam.it>
Date: Sat, 18 Aug 2018 01:51:58 +0200
Subject: [PATCH] spi: spi-fsl-dspi: fix broken DSPI_EOQ_MODE
This patch fixes the dspi_eoq_write function used by the
ColdFire mcf5441x family. The 16 bit cmd part must be re-set at
each data transfer.
Also, now that fifo_size variables are used for eoq_read/write,
a proper fifo size must be set (16 slots for the ColdFire dspi
module version).
Signed-off-by: Angelo Dureghello <angelo(a)sysam.it>
Acked-by: Esben Haabendal <esben(a)haabendal.dk>
Signed-off-by: Mark Brown <broonie(a)kernel.org>
Cc: stable(a)vger.kernel.org
---
drivers/spi/spi-fsl-dspi.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index 7cb3ab0a35a0..3082e72e4f6c 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -30,7 +30,11 @@
#define DRIVER_NAME "fsl-dspi"
+#ifdef CONFIG_M5441x
+#define DSPI_FIFO_SIZE 16
+#else
#define DSPI_FIFO_SIZE 4
+#endif
#define DSPI_DMA_BUFSIZE (DSPI_FIFO_SIZE * 1024)
#define SPI_MCR 0x00
@@ -623,9 +627,11 @@ static void dspi_tcfq_read(struct fsl_dspi *dspi)
static void dspi_eoq_write(struct fsl_dspi *dspi)
{
int fifo_size = DSPI_FIFO_SIZE;
+ u16 xfer_cmd = dspi->tx_cmd;
/* Fill TX FIFO with as many transfers as possible */
while (dspi->len && fifo_size--) {
+ dspi->tx_cmd = xfer_cmd;
/* Request EOQF for last transfer in FIFO */
if (dspi->len == dspi->bytes_per_word || fifo_size == 0)
dspi->tx_cmd |= SPI_PUSHR_CMD_EOQ;
--
2.18.0
Subject of the patch: iommu/arm-smmu: Error out only if not enough
context interrupts
Commit ID: 42b88ec6530fd76f1ae06de7f09830bcbca5bbd6
Why: ARM SMMU does not initialize for Broadcom Stingray SoC if
bootloader reserves few contexts. Kernel should not error out if there
are enough context interrupts.
Patch
Sehr geehrte Damen und Herren.
Nachdem wir Ihre Webseite besucht und das Profil Ihrer Geschäftstätigkeit analysiert haben, wissen wir schon, wie Sie neue Kunden ab sofort gewinnen können.
Wir verfügen über mehr als 100 000 Adressenangaben der potentiellen Kunden. Diese Daten wurden nach Branchen gegliedert.
http://www.gbc-at.net/?page=catalog
***
1. Österreich 2018 ( 104 000 ) - 149 EUR ( bis zum 29.08.2018 )
***
Bitte informieren Sie sich über die weiteren Details einmal unverbindlich auf unseren Webseite:
http://www.gbc-at.net/?page=catalog
Die Adressensortierung je nach der Branche findet KOSTENLOS im beigelegten DataManager-Programm statt.
Zusätzlich bieten wir KOSTENLOS das Tool zum automatischen Verschicken der Angebote an.
MfG
Thomas Weber
GC-Team
Some modems now use the Android Debug Bridge to provide a debugging
interface, and some phones can also export serial ports managed by the
"option" driver.
The ADB daemon running in userspace tries to use USB interfaces with
bDeviceClass=0xFF, bDeviceSubClass=0x42, bDeviceProtocol=1
Prevent the option driver from binding to those interfaces, as they
will not be serial ports.
This can fix issues like:
https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=781256
Signed-off-by: Romain Izard <romain.izard.pro(a)gmail.com>
Cc: stable <stable(a)vger.kernel.org>
---
drivers/usb/serial/option.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 664e61f16b6a..f98943a57ff0 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1987,6 +1987,12 @@ static int option_probe(struct usb_serial *serial,
if (iface_desc->bInterfaceClass == USB_CLASS_MASS_STORAGE)
return -ENODEV;
+ /* Do not bind Android Debug Bridge interfaces */
+ if (iface_desc->bInterfaceClass == USB_CLASS_VENDOR_SPEC &&
+ iface_desc->bInterfaceSubClass == 0x42 &&
+ iface_desc->bInterfaceProtocol == 1)
+ return -ENODEV;
+
/*
* Don't bind reserved interfaces (like network ones) which often have
* the same class/subclass/protocol as the serial interfaces. Look at
--
2.17.1
Hi !
this is also the wrong version of the patch - the proper version is
below. This has been posted to lkml https://lkml.org/lkml/2018/7/18/191
but there was no review yet - the version you have though is for sure
broken. So maybe this should be simply dropped until the fix is confirmed
thx!
hofrat
----- Forwarded message from gregkh(a)linuxfoundation.org -----
Date: Tue, 28 Aug 2018 16:11:46 +0200
From: gregkh(a)linuxfoundation.org
To: 1531571532-22733-1-git-send-email-hofrat(a)osadl.org, alexander.levin(a)microsoft.com, gregkh(a)linuxfoundation.org, hofrat(a)osadl.org,
seanpaul(a)chromium.org
Cc: stable-commits(a)vger.kernel.org
Subject: Patch "drm: re-enable error handling" has been added to the 4.4-stable tree
This is a note to let you know that I've just added the patch titled
drm: re-enable error handling
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
drm-re-enable-error-handling.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Tue Aug 28 16:10:37 CEST 2018
From: Nicholas Mc Guire <hofrat(a)osadl.org>
Date: Sat, 14 Jul 2018 14:32:12 +0200
Subject: drm: re-enable error handling
From: Nicholas Mc Guire <hofrat(a)osadl.org>
[ Upstream commit d530b5f1ca0bb66958a2b714bebe40a1248b9c15 ]
drm_legacy_ctxbitmap_next() returns idr_alloc() which can return
-ENOMEM, -EINVAL or -ENOSPC none of which are -1 . but the call sites
of drm_legacy_ctxbitmap_next() seem to be assuming that the error case
would be -1 (original return of drm_ctxbitmap_next() prior to 2.6.23
was actually -1). Thus reenable error handling by checking for < 0.
Signed-off-by: Nicholas Mc Guire <hofrat(a)osadl.org>
Fixes: 62968144e673 ("drm: convert drm context code to use Linux idr")
Signed-off-by: Sean Paul <seanpaul(a)chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/1531571532-22733-1-git-send-e…
Signed-off-by: Sasha Levin <alexander.levin(a)microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/gpu/drm/drm_context.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/drivers/gpu/drm/drm_context.c
+++ b/drivers/gpu/drm/drm_context.c
@@ -372,7 +372,7 @@ int drm_legacy_addctx(struct drm_device
ctx->handle = drm_legacy_ctxbitmap_next(dev);
}
DRM_DEBUG("%d\n", ctx->handle);
- if (ctx->handle == -1) {
+ if (ctx->handle < 0) {
DRM_DEBUG("Not enough free contexts.\n");
/* Should this return -EBUSY instead? */
return -ENOMEM;
Patches currently in stable-queue which might be from hofrat(a)osadl.org are
queue-4.4/drm-re-enable-error-handling.patch
queue-4.4/can-mpc5xxx_can-check-of_iomap-return-before-use.patch
----- End forwarded message -----
Two users have reported [1] that they have an "extremely unlikely" system
with more than MAX_PA/2 memory and L1TF mitigation is not effective. In fact
it's a CPU with 36bits phys limit (64GB) and 32GB memory, but due to holes
in the e820 map, the main region is almost 500MB over the 32GB limit:
[ 0.000000] BIOS-e820: [mem 0x0000000100000000-0x000000081effffff] usable
Suggestions to use 'mem=32G' to prefer L1TF mitigation while losing the 500MB
revealed, that there's an off-by-one error in the check in
l1tf_select_mitigation(). l1tf_pfn_limit() returns the last usable pfn
(inclusive), but it's more common and hopefully less error-prone to return the
first pfn that's over limit, so this patch changes that and updates the other
callers.
[1] https://bugzilla.suse.com/show_bug.cgi?id=1105536
Reported-by: George Anchev <studio(a)anchev.net>
Reported-by: Christopher Snowhill <kode54(a)gmail.com>
Fixes: 17dbca119312 ("x86/speculation/l1tf: Add sysfs reporting for l1tf")
Cc: stable(a)vger.kernel.org
Signed-off-by: Vlastimil Babka <vbabka(a)suse.cz>
---
arch/x86/include/asm/processor.h | 2 +-
arch/x86/mm/init.c | 2 +-
arch/x86/mm/mmap.c | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index a0a52274cb4a..c24297268ebc 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -183,7 +183,7 @@ extern void cpu_detect(struct cpuinfo_x86 *c);
static inline unsigned long long l1tf_pfn_limit(void)
{
- return BIT_ULL(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT) - 1;
+ return BIT_ULL(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT);
}
extern void early_cpu_init(void);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 02de3d6065c4..63a6f9fcaf20 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -923,7 +923,7 @@ unsigned long max_swapfile_size(void)
if (boot_cpu_has_bug(X86_BUG_L1TF)) {
/* Limit the swap file size to MAX_PA/2 for L1TF workaround */
- unsigned long long l1tf_limit = l1tf_pfn_limit() + 1;
+ unsigned long long l1tf_limit = l1tf_pfn_limit();
/*
* We encode swap offsets also with 3 bits below those for pfn
* which makes the usable limit higher.
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index f40ab8185d94..1e95d57760cf 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -257,7 +257,7 @@ bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
/* If it's real memory always allow */
if (pfn_valid(pfn))
return true;
- if (pfn > l1tf_pfn_limit() && !capable(CAP_SYS_ADMIN))
+ if (pfn >= l1tf_pfn_limit() && !capable(CAP_SYS_ADMIN))
return false;
return true;
}
--
2.18.0
Hi,
The upstream kernel has supported IIO free-running counters on Skylake
server. As of Skylake Server, there are a number of free running
counters in each IIO Box that collect counts of per-box IO clocks and
per-port Input/Output x BW/Utilization.
There are three types of IIO free-running counters on Skylake server:
1. IO CLOCKS counter: a clock of IIO box.
2. BANDWIDTH counters: count inbound(PCIe->CPU)/outbound(CPU->PCIe)
bandwidth.
3. UTILIZATION counters: count input/output utilization.
With these IIO free-running counters, we can get good observation for
IIO traffic on Skylake server. For example, we can see the IIO inbound
bandwidth (PCIe->CPU).
root@skx /sys/devices# perf stat -a -e
uncore_iio_free_running_2/bw_in_port0/
^C
Performance counter stats for 'system wide':
153.19 MiB uncore_iio_free_running_2/bw_in_port0/
8.037701069 seconds time elapsed
I propose to backport the patches which support IIO free-running
counters to 4.14 stable kernel.
perf/x86/intel/uncore: Introduce customized event_read() for client IMC
uncore
2da331465f44f9618abe8837d1a68405d550b66e
perf/x86/intel/uncore: Add new data structures for free running counters
927b2deb067b8b4753fc09c7a42092f43fc0c1f6
perf/x86/intel/uncore: Add infrastructure for free running counters
0e0162dfcd1fbe4c711ee86f24f966c318999603
perf/x86/intel/uncore: Support IIO free-running counters on SKX
0f519f0352e37e7d71bdce5559517c74a35f6e33
perf/x86/intel/uncore: Expose uncore_pmu_event*() functions
5a6c9d94e9ed7410142bc6fcb638a4db1895aa0c
perf/x86/intel/uncore: Clean up client IMC uncore
9aae1780e7e81e54edfb70ba33ead5b0b48be009
Thanks
Jin Yao
From: Randy Dunlap <rdunlap(a)infradead.org>
When $DEPMOD is not found, only print a warning instead of exiting
with an error message and error status.
Warning: 'make modules_install' requires /sbin/depmod. Please install it.
This is probably in the kmod package.
Signed-off-by: Randy Dunlap <rdunlap(a)infradead.org>
Fixes: 934193a654c1 ("kbuild: verify that $DEPMOD is installed")
Cc: stable(a)vger.kernel.org
Cc: Lucas De Marchi <lucas.demarchi(a)profusion.mobi>
Cc: Lucas De Marchi <lucas.de.marchi(a)gmail.com>
Cc: Michal Marek <michal.lkml(a)markovi.net>
Cc: Jessica Yu <jeyu(a)kernel.org>
Cc: Chih-Wei Huang <cwhuang(a)linux.org.tw>
Cc: H. Nikolaus Schaller <hns(a)goldelico.com>
---
v2: add missing "exit 0" and update the commit message (no Error).
v3: add Fixes: and Cc: stable
scripts/depmod.sh | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- lnx-418.orig/scripts/depmod.sh
+++ lnx-418/scripts/depmod.sh
@@ -15,9 +15,9 @@ if ! test -r System.map ; then
fi
if [ -z $(command -v $DEPMOD) ]; then
- echo "'make modules_install' requires $DEPMOD. Please install it." >&2
+ echo "Warning: 'make modules_install' requires $DEPMOD. Please install it." >&2
echo "This is probably in the kmod package." >&2
- exit 1
+ exit 0
fi
# older versions of depmod require the version string to start with three
The patch titled
Subject: uapi/linux/keyctl.h: don't use C++ reserved keyword as a struct member name
has been added to the -mm tree. Its filename is
uapi-linux-keyctlh-dont-use-c-reserved-keyword-as-a-struct-member-name.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/uapi-linux-keyctlh-dont-use-c-rese…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/uapi-linux-keyctlh-dont-use-c-rese…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Randy Dunlap <rdunlap(a)infradead.org>
Subject: uapi/linux/keyctl.h: don't use C++ reserved keyword as a struct member name
Since this header is in "include/uapi/linux/", apparently people want to
use it in userspace programs -- even in C++ ones. However, the header
uses a C++ reserved keyword ("private"), so change that to "dh_private"
instead to allow the header file to be used in C++ userspace.
Fixes https://bugzilla.kernel.org/show_bug.cgi?id=191051
Link: http://lkml.kernel.org/r/0db6c314-1ef4-9bfa-1baa-7214dd2ee061@infradead.org
Fixes: ddbb41148724 ("KEYS: Add KEYCTL_DH_COMPUTE command")
Signed-off-by: Randy Dunlap <rdunlap(a)infradead.org>
Reviewed-by: Andrew Morton <akpm(a)linux-foundation.org>
Cc: David Howells <dhowells(a)redhat.com>
Cc: James Morris <jmorris(a)namei.org>
Cc: "Serge E. Hallyn" <serge(a)hallyn.com>
Cc: Mat Martineau <mathew.j.martineau(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/uapi/linux/keyctl.h | 2 +-
security/keys/dh.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
--- a/include/uapi/linux/keyctl.h~uapi-linux-keyctlh-dont-use-c-reserved-keyword-as-a-struct-member-name
+++ a/include/uapi/linux/keyctl.h
@@ -65,7 +65,7 @@
/* keyctl structures */
struct keyctl_dh_params {
- __s32 private;
+ __s32 dh_private;
__s32 prime;
__s32 base;
};
--- a/security/keys/dh.c~uapi-linux-keyctlh-dont-use-c-reserved-keyword-as-a-struct-member-name
+++ a/security/keys/dh.c
@@ -300,7 +300,7 @@ long __keyctl_dh_compute(struct keyctl_d
}
dh_inputs.g_size = dlen;
- dlen = dh_data_from_key(pcopy.private, &dh_inputs.key);
+ dlen = dh_data_from_key(pcopy.dh_private, &dh_inputs.key);
if (dlen < 0) {
ret = dlen;
goto out2;
_
Patches currently in -mm which might be from rdunlap(a)infradead.org are
uapi-linux-keyctlh-dont-use-c-reserved-keyword-as-a-struct-member-name.patch
The patch titled
Subject: memory_hotplug: fix kernel_panic on offline page processing
has been added to the -mm tree. Its filename is
memory_hotplug-fix-kernel_panic-on-offline-page-processing.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/memory_hotplug-fix-kernel_panic-on…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/memory_hotplug-fix-kernel_panic-on…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Mikhail Zaslonko <zaslonko(a)linux.ibm.com>
Subject: memory_hotplug: fix kernel_panic on offline page processing
Within show_valid_zones() the function test_pages_in_a_zone() should be
called for online memory blocks only. Otherwise it might lead to the
VM_BUG_ON due to uninitialized struct pages (when CONFIG_DEBUG_VM_PGFLAGS
kernel option is set):
page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p))
------------[ cut here ]------------
Call Trace:
([<000000000038f91e>] test_pages_in_a_zone+0xe6/0x168)
[<0000000000923472>] show_valid_zones+0x5a/0x1a8
[<0000000000900284>] dev_attr_show+0x3c/0x78
[<000000000046f6f0>] sysfs_kf_seq_show+0xd0/0x150
[<00000000003ef662>] seq_read+0x212/0x4b8
[<00000000003bf202>] __vfs_read+0x3a/0x178
[<00000000003bf3ca>] vfs_read+0x8a/0x148
[<00000000003bfa3a>] ksys_read+0x62/0xb8
[<0000000000bc2220>] system_call+0xdc/0x2d8
That VM_BUG_ON was triggered by the page poisoning introduced in
mm/sparse.c with the git commit d0dc12e86b31 ("mm/memory_hotplug: optimize
memory hotplug") With the same commit the new 'nid' field has been added
to the struct memory_block in order to store and later on derive the node
id for offline pages (instead of accessing struct page which might be
uninitialized). But one reference to nid in show_valid_zones() function
has been overlooked. Fixed with current commit. Also, nr_pages will not
be used any more after test_pages_in_a_zone() call, do not update it.
Link: http://lkml.kernel.org/r/20180828090539.41491-1-zaslonko@linux.ibm.com
Fixes: d0dc12e86b31 ("mm/memory_hotplug: optimize memory hotplug")
Signed-off-by: Mikhail Zaslonko <zaslonko(a)linux.ibm.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Reviewed-by: Pavel Tatashin <pavel.tatashin(a)microsoft.com>
Cc: <stable(a)vger.kernel.org> [4.17+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
drivers/base/memory.c | 20 +++++++++-----------
1 file changed, 9 insertions(+), 11 deletions(-)
--- a/drivers/base/memory.c~memory_hotplug-fix-kernel_panic-on-offline-page-processing
+++ a/drivers/base/memory.c
@@ -417,25 +417,23 @@ static ssize_t show_valid_zones(struct d
int nid;
/*
- * The block contains more than one zone can not be offlined.
- * This can happen e.g. for ZONE_DMA and ZONE_DMA32
- */
- if (!test_pages_in_a_zone(start_pfn, start_pfn + nr_pages, &valid_start_pfn, &valid_end_pfn))
- return sprintf(buf, "none\n");
-
- start_pfn = valid_start_pfn;
- nr_pages = valid_end_pfn - start_pfn;
-
- /*
* Check the existing zone. Make sure that we do that only on the
* online nodes otherwise the page_zone is not reliable
*/
if (mem->state == MEM_ONLINE) {
+ /*
+ * The block contains more than one zone can not be offlined.
+ * This can happen e.g. for ZONE_DMA and ZONE_DMA32
+ */
+ if (!test_pages_in_a_zone(start_pfn, start_pfn + nr_pages,
+ &valid_start_pfn, &valid_end_pfn))
+ return sprintf(buf, "none\n");
+ start_pfn = valid_start_pfn;
strcat(buf, page_zone(pfn_to_page(start_pfn))->name);
goto out;
}
- nid = pfn_to_nid(start_pfn);
+ nid = mem->nid;
default_zone = zone_for_pfn_range(MMOP_ONLINE_KEEP, nid, start_pfn, nr_pages);
strcat(buf, default_zone->name);
_
Patches currently in -mm which might be from zaslonko(a)linux.ibm.com are
memory_hotplug-fix-kernel_panic-on-offline-page-processing.patch
Fixes commit 208cbb325589 ("x86/irqflags: Provide a declaration for
native_save_fl")
This should have been marked extern inline in order to pick up the out
of line definition in arch/x86/kernel/irqflags.S.
Cc: stable(a)vger.kernel.org # 4.18, 4.14, 4.9, 4.4
Reported-by: Ben Hutchings <ben.hutchings(a)codethink.co.uk>
Signed-off-by: Nick Desaulniers <ndesaulniers(a)google.com>
---
arch/x86/include/asm/irqflags.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index c14f2a74b2be..15450a675031 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -33,7 +33,8 @@ extern inline unsigned long native_save_fl(void)
return flags;
}
-static inline void native_restore_fl(unsigned long flags)
+extern inline void native_restore_fl(unsigned long flags);
+extern inline void native_restore_fl(unsigned long flags)
{
asm volatile("push %0 ; popf"
: /* no output */
--
2.19.0.rc0.228.g281dcd1b4d0-goog
The following commit:
368a540e0232 (x86/kvmclock: Remove memblock dependency)
caused SEV guest regression. When SEV is active, we map the shared
variables (wall_clock and hv_clock_boot) with C=0 to ensure that both
the guest and the hypervisor is able to access the data. To map the
variables we use kernel_physical_mapping_init() to split the large pages,
but this routine fails to allocate a new page. Before the above commit,
kvmclock initialization was called after memory allocator was available
but now its called very early in the boot process.
Recently we added a special .data..decrypted section to hold the shared
variables. This section is mapped with C=0 very early. Use __decrypted
attribute to put the wall_clock and hv_clock_boot in .data..decrypted
section so that they are mapped with C=0.
Signed-off-by: Brijesh Singh <brijesh.singh(a)amd.com>
Fixes: 368a540e0232 ("x86/kvmclock: Remove memblock dependency")
Cc: stable(a)vger.kernel.org
Cc: Tom Lendacky <thomas.lendacky(a)amd.com>
Cc: kvm(a)vger.kernel.org
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Borislav Petkov <bp(a)suse.de>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: linux-kernel(a)vger.kernel.org
Cc: Paolo Bonzini <pbonzini(a)redhat.com>
Cc: Sean Christopherson <sean.j.christopherson(a)intel.com>
Cc: kvm(a)vger.kernel.org
Cc: "Radim Krčmář" <rkrcmar(a)redhat.com>
---
arch/x86/kernel/kvmclock.c | 30 +++++++++++++++++++++++++-----
1 file changed, 25 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 1e67646..08f5f8a 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -28,6 +28,7 @@
#include <linux/sched/clock.h>
#include <linux/mm.h>
#include <linux/slab.h>
+#include <linux/set_memory.h>
#include <asm/hypervisor.h>
#include <asm/mem_encrypt.h>
@@ -61,8 +62,8 @@ early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
(PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info))
static struct pvclock_vsyscall_time_info
- hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __aligned(PAGE_SIZE);
-static struct pvclock_wall_clock wall_clock;
+ hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __decrypted __aligned(PAGE_SIZE);
+static struct pvclock_wall_clock wall_clock __decrypted;
static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
@@ -267,10 +268,29 @@ static int kvmclock_setup_percpu(unsigned int cpu)
return 0;
/* Use the static page for the first CPUs, allocate otherwise */
- if (cpu < HVC_BOOT_ARRAY_SIZE)
+ if (cpu < HVC_BOOT_ARRAY_SIZE) {
p = &hv_clock_boot[cpu];
- else
- p = kzalloc(sizeof(*p), GFP_KERNEL);
+ } else {
+ int rc;
+ unsigned int sz = sizeof(*p);
+
+ if (sev_active())
+ sz = PAGE_ALIGN(sz);
+
+ p = kzalloc(sz, GFP_KERNEL);
+
+ /*
+ * The physical address of per-cpu variable will be shared with
+ * the hypervisor. Let's clear the C-bit before we assign the
+ * memory to per_cpu variable.
+ */
+ if (p && sev_active()) {
+ rc = set_memory_decrypted((unsigned long)p, sz >> PAGE_SHIFT);
+ if (rc)
+ return rc;
+ memset(p, 0, sz);
+ }
+ }
per_cpu(hv_clock_per_cpu, cpu) = p;
return p ? 0 : -ENOMEM;
--
2.7.4
If the SPI bus number is provided by a DT alias, idr_alloc() is called
twice, leading to:
WARNING: CPU: 1 PID: 1 at drivers/spi/spi.c:2179 spi_register_controller+0x11c/0x5d8
couldn't get idr
Fix this by moving the handling of fixed SPI bus numbers up, before the
DT handling code fills in ctlr->bus_num.
Fixes: 1a4327fbf4554d5b ("spi: fix IDR collision on systems with both fixed and dynamic SPI bus numbers")
Signed-off-by: Geert Uytterhoeven <geert+renesas(a)glider.be>
---
Seen on e.g. r8a7791/koelsch, breaking both RSPI and MSIOF.
---
drivers/spi/spi.c | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index a00d006d4c3a1c5a..9da0bc5a036cfff6 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -2143,8 +2143,17 @@ int spi_register_controller(struct spi_controller *ctlr)
*/
if (ctlr->num_chipselect == 0)
return -EINVAL;
- /* allocate dynamic bus number using Linux idr */
- if ((ctlr->bus_num < 0) && ctlr->dev.of_node) {
+ if (ctlr->bus_num >= 0) {
+ /* devices with a fixed bus num must check-in with the num */
+ mutex_lock(&board_lock);
+ id = idr_alloc(&spi_master_idr, ctlr, ctlr->bus_num,
+ ctlr->bus_num + 1, GFP_KERNEL);
+ mutex_unlock(&board_lock);
+ if (WARN(id < 0, "couldn't get idr"))
+ return id == -ENOSPC ? -EBUSY : id;
+ ctlr->bus_num = id;
+ } else if (ctlr->dev.of_node) {
+ /* allocate dynamic bus number using Linux idr */
id = of_alias_get_id(ctlr->dev.of_node, "spi");
if (id >= 0) {
ctlr->bus_num = id;
@@ -2170,15 +2179,6 @@ int spi_register_controller(struct spi_controller *ctlr)
if (WARN(id < 0, "couldn't get idr"))
return id;
ctlr->bus_num = id;
- } else {
- /* devices with a fixed bus num must check-in with the num */
- mutex_lock(&board_lock);
- id = idr_alloc(&spi_master_idr, ctlr, ctlr->bus_num,
- ctlr->bus_num + 1, GFP_KERNEL);
- mutex_unlock(&board_lock);
- if (WARN(id < 0, "couldn't get idr"))
- return id == -ENOSPC ? -EBUSY : id;
- ctlr->bus_num = id;
}
INIT_LIST_HEAD(&ctlr->queue);
spin_lock_init(&ctlr->queue_lock);
--
2.17.1
The patch
spi: spi-fsl-dspi: fix broken DSPI_EOQ_MODE
has been applied to the spi tree at
https://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git
All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.
You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.
If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.
Please add any relevant lists and maintainers to the CCs when replying
to this mail.
Thanks,
Mark
>From 5223c9c1cbfc0cd4d0a1b50758e0949af3290fa1 Mon Sep 17 00:00:00 2001
From: Angelo Dureghello <angelo(a)sysam.it>
Date: Sat, 18 Aug 2018 01:51:58 +0200
Subject: [PATCH] spi: spi-fsl-dspi: fix broken DSPI_EOQ_MODE
This patch fixes the dspi_eoq_write function used by the
ColdFire mcf5441x family. The 16 bit cmd part must be re-set at
each data transfer.
Also, now that fifo_size variables are used for eoq_read/write,
a proper fifo size must be set (16 slots for the ColdFire dspi
module version).
Signed-off-by: Angelo Dureghello <angelo(a)sysam.it>
Acked-by: Esben Haabendal <esben(a)haabendal.dk>
Signed-off-by: Mark Brown <broonie(a)kernel.org>
Cc: stable(a)vger.kernel.org
---
drivers/spi/spi-fsl-dspi.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index 7cb3ab0a35a0..3082e72e4f6c 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -30,7 +30,11 @@
#define DRIVER_NAME "fsl-dspi"
+#ifdef CONFIG_M5441x
+#define DSPI_FIFO_SIZE 16
+#else
#define DSPI_FIFO_SIZE 4
+#endif
#define DSPI_DMA_BUFSIZE (DSPI_FIFO_SIZE * 1024)
#define SPI_MCR 0x00
@@ -623,9 +627,11 @@ static void dspi_tcfq_read(struct fsl_dspi *dspi)
static void dspi_eoq_write(struct fsl_dspi *dspi)
{
int fifo_size = DSPI_FIFO_SIZE;
+ u16 xfer_cmd = dspi->tx_cmd;
/* Fill TX FIFO with as many transfers as possible */
while (dspi->len && fifo_size--) {
+ dspi->tx_cmd = xfer_cmd;
/* Request EOQF for last transfer in FIFO */
if (dspi->len == dspi->bytes_per_word || fifo_size == 0)
dspi->tx_cmd |= SPI_PUSHR_CMD_EOQ;
--
2.18.0
The patch
ASoC: rt5682: Change DAC/ADC volume scale
has been applied to the asoc tree at
https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git
All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.
You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.
If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.
Please add any relevant lists and maintainers to the CCs when replying
to this mail.
Thanks,
Mark
>From 7509487785d7a2bf3606cf26710f0ca29e9ca94d Mon Sep 17 00:00:00 2001
From: Shuming Fan <shumingf(a)realtek.com>
Date: Fri, 24 Aug 2018 10:52:19 +0800
Subject: [PATCH] ASoC: rt5682: Change DAC/ADC volume scale
The step of DAC/ADC volume scale changes from 0.375dB to 0.75dB
Signed-off-by: Shuming Fan <shumingf(a)realtek.com>
Signed-off-by: Mark Brown <broonie(a)kernel.org>
Cc: stable(a)vger.kernel.org
---
sound/soc/codecs/rt5682.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c
index 640d400ca013..afe7d5b19313 100644
--- a/sound/soc/codecs/rt5682.c
+++ b/sound/soc/codecs/rt5682.c
@@ -750,8 +750,8 @@ static bool rt5682_readable_register(struct device *dev, unsigned int reg)
}
static const DECLARE_TLV_DB_SCALE(hp_vol_tlv, -2250, 150, 0);
-static const DECLARE_TLV_DB_SCALE(dac_vol_tlv, -65625, 375, 0);
-static const DECLARE_TLV_DB_SCALE(adc_vol_tlv, -17625, 375, 0);
+static const DECLARE_TLV_DB_SCALE(dac_vol_tlv, -6525, 75, 0);
+static const DECLARE_TLV_DB_SCALE(adc_vol_tlv, -1725, 75, 0);
static const DECLARE_TLV_DB_SCALE(adc_bst_tlv, 0, 1200, 0);
/* {0, +20, +24, +30, +35, +40, +44, +50, +52} dB */
@@ -1114,7 +1114,7 @@ static const struct snd_kcontrol_new rt5682_snd_controls[] = {
/* DAC Digital Volume */
SOC_DOUBLE_TLV("DAC1 Playback Volume", RT5682_DAC1_DIG_VOL,
- RT5682_L_VOL_SFT, RT5682_R_VOL_SFT, 175, 0, dac_vol_tlv),
+ RT5682_L_VOL_SFT + 1, RT5682_R_VOL_SFT + 1, 86, 0, dac_vol_tlv),
/* IN Boost Volume */
SOC_SINGLE_TLV("CBJ Boost Volume", RT5682_CBJ_BST_CTRL,
@@ -1124,7 +1124,7 @@ static const struct snd_kcontrol_new rt5682_snd_controls[] = {
SOC_DOUBLE("STO1 ADC Capture Switch", RT5682_STO1_ADC_DIG_VOL,
RT5682_L_MUTE_SFT, RT5682_R_MUTE_SFT, 1, 1),
SOC_DOUBLE_TLV("STO1 ADC Capture Volume", RT5682_STO1_ADC_DIG_VOL,
- RT5682_L_VOL_SFT, RT5682_R_VOL_SFT, 127, 0, adc_vol_tlv),
+ RT5682_L_VOL_SFT + 1, RT5682_R_VOL_SFT + 1, 63, 0, adc_vol_tlv),
/* ADC Boost Volume Control */
SOC_DOUBLE_TLV("STO1 ADC Boost Gain Volume", RT5682_STO1_ADC_BOOST,
--
2.18.0
This caused a confusing error message, but there is functionally
no problem since the default method is DIRECT.
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index b419d6e33b3a..a942fd28dae8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -277,6 +277,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
case CHIP_PITCAIRN:
case CHIP_VERDE:
case CHIP_OLAND:
+ case CHIP_HAINAN:
return AMDGPU_FW_LOAD_DIRECT;
#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
--
2.13.6
Hi,
I just wanted to check if you would be interested in a list of Managed
Service Providers (MSPs) and Managed Security Service Providers (MSSPs)?
We also have the data intelligence of:
• Managed Service Providers (MSP’s)
• Managed Security Service Providers (MSSP’s)
• IT Decision Makers – 6million across all industry
• Business Decision Makers – 10 million across all industry
• Value Added Resellers- VARs
• Independent Software Vendors- ISVs
• System Integrators- SIs
• VoIP Service Providers.
• Telecommunications Service Providers (TSPs)
• Application Service Providers (ASPs)
• IT Managed Services Providers (ITMSP)
• Storage Service Providers (SSPs)
Kindly review and let me know if I can share more information on this.
I look forward to hearing from you.
Regards,
Stella
Marketing Specialist
If you don't want to include yourself in our mailing list, please reply
back “Leave Out" in a subject line
If the size of spi-nor flash is larger than 16MB, the read_opcode
is set to SPINOR_OP_READ_1_1_4_4B, and fsl_qspi_get_seqid() will
return -EINVAL when cmd is SPINOR_OP_READ_1_1_4_4B. This can
cause read operation fail.
---
v2:
add Fixes tag and CC stable suggested by Boris.
---
Fixes: e46ecda764dc ("mtd: spi-nor: Add Freescale QuadSPI driver")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Liu Xiang <liu.xiang6(a)zte.com.cn>
---
drivers/mtd/spi-nor/fsl-quadspi.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c
index 7d9620c..64304a3 100644
--- a/drivers/mtd/spi-nor/fsl-quadspi.c
+++ b/drivers/mtd/spi-nor/fsl-quadspi.c
@@ -478,6 +478,7 @@ static int fsl_qspi_get_seqid(struct fsl_qspi *q, u8 cmd)
{
switch (cmd) {
case SPINOR_OP_READ_1_1_4:
+ case SPINOR_OP_READ_1_1_4_4B:
return SEQID_READ;
case SPINOR_OP_WREN:
return SEQID_WREN;
--
1.9.1
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA512
Hi Greg,
Pleae pull commits for Linux 4.4 .
I've sent a review request for all commits over a week ago and all
comments were addressed.
Thanks,
Sasha
=====
The following changes since commit 7dc18ebc3101229d5238a2dc740804cd4836b383:
Linux 4.4.150 (2018-08-18 10:45:38 +0200)
are available in the Git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/sashal/linux-stable.git tags/for-greg-4.4-14082018
for you to fetch changes up to 99f86ec21034d82ebdcfff3ed4010945e082ca04:
mm/memory.c: check return value of ioremap_prot (2018-08-18 10:17:21 -0400)
- ----------------------------------------------------------------
for-greg-4.4-14082018
- ----------------------------------------------------------------
Alexander Sverdlin (1):
i2c: davinci: Avoid zero value of CLKH
Alexey Kodanev (1):
dccp: fix undefined behavior with 'cwnd' shift in ccid2_cwnd_restart()
Bernd Edlinger (1):
nl80211: Add a missing break in parse_station_flags
Calvin Walton (1):
tools/power turbostat: Read extended processor family from CPUID
Colin Ian King (1):
drivers: net: lmc: fix case value for target abort error
Eric Dumazet (1):
xfrm_user: prevent leaking 2 bytes of kernel memory
Eugeniu Rosca (1):
usb: gadget: f_uac2: fix endianness of 'struct cntrl_*_lay3'
Eyal Birger (1):
vti6: fix PMTU caching and reporting on xmit
Florian Westphal (3):
xfrm: free skb if nlsk pointer is NULL
netfilter: conntrack: dccp: treat SYNC/SYNCACK as invalid if no prior state
atl1c: reserve min skb headroom
Govindarajulu Varadarajan (1):
enic: handle mtu change for vf properly
Guenter Roeck (1):
media: staging: omap4iss: Include asm/cacheflush.h after generic includes
Jia-Ju Bai (2):
usb: gadget: r8a66597: Fix two possible sleep-in-atomic-context bugs in init_controller()
usb: gadget: r8a66597: Fix a possible sleep-in-atomic-context bugs in r8a66597_queue()
Jim Gill (1):
scsi: vmw_pvscsi: Return DID_RESET for status SAM_STAT_COMMAND_TERMINATED
Johannes Thumshirn (1):
scsi: fcoe: drop frames in ELS LOGO error path
Kiran Kumar Modukuri (3):
fscache: Allow cancelled operations to be enqueued
cachefiles: Fix refcounting bug in backing-file read monitoring
cachefiles: Wait rather than BUG'ing on "Unexpected object collision"
Len Brown (1):
tools/power turbostat: fix -S on UP systems
Li Wang (1):
zswap: re-check zswap_is_full() after do zswap_shrink()
Lucas Stach (2):
drm/imx: imx-ldb: disable LDB on driver bind
drm/imx: imx-ldb: check if channel is enabled before printing warning
Masami Hiramatsu (1):
selftests/ftrace: Add snapshot and tracing_on test case
Nicholas Mc Guire (2):
drm: re-enable error handling
can: mpc5xxx_can: check of_iomap return before use
Peter Senna Tschudin (1):
tools: usb: ffs-test: Fix build on big endian systems
Rafał Miłecki (1):
Revert "MIPS: BCM47XX: Enable 74K Core ExternalSync for PCIe erratum"
Randy Dunlap (4):
usb/phy: fix PPC64 build errors in phy-fsl-usb.c
net: prevent ISA drivers from building on PPC32
arc: fix build errors in arc/include/asm/delay.h
arc: fix type warnings in arc/mm/cache.c
Sean Paul (1):
drm/bridge: adv7511: Reset registers on hotplug
Shubhrajyoti Datta (1):
net: axienet: Fix double deregister of mdio
Sudarsana Reddy Kalluru (2):
qed: Fix possible race for the link state value.
bnx2x: Fix invalid memory access in rss hash config path.
Tommi Rantala (1):
xfrm: fix missing dst_release() after policy blocking lbcast and multicast
Varun Prakash (1):
scsi: libiscsi: fix possible NULL pointer dereference in case of TMF
Willem de Bruijn (1):
packet: refine ring v3 block size test to hold one frame
YueHaibing (1):
net: caif: Add a missing rcu_read_unlock() in caif_flow_cb
jie@chenjie6@huwei.com (1):
mm/memory.c: check return value of ioremap_prot
mpubbise(a)codeaurora.org (1):
mac80211: add stations tied to AP_VLANs during hw reconfig
arch/arc/include/asm/delay.h | 3 +
arch/arc/mm/cache.c | 7 +-
arch/mips/bcm47xx/setup.c | 6 --
arch/mips/include/asm/mipsregs.h | 3 -
drivers/gpu/drm/drm_context.c | 2 +-
drivers/gpu/drm/i2c/adv7511.c | 12 ++++
drivers/gpu/drm/imx/imx-ldb.c | 9 ++-
drivers/i2c/busses/i2c-davinci.c | 8 ++-
drivers/net/can/mscan/mpc5xxx_can.c | 5 ++
drivers/net/ethernet/3com/Kconfig | 2 +-
drivers/net/ethernet/amd/Kconfig | 4 +-
drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 1 +
.../net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 13 +++-
drivers/net/ethernet/cirrus/Kconfig | 1 +
drivers/net/ethernet/cisco/enic/enic_main.c | 78 ++++++++--------------
drivers/net/ethernet/qlogic/qed/qed_mcp.c | 1 +
drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c | 1 +
drivers/net/wan/lmc/lmc_main.c | 2 +-
drivers/scsi/fcoe/fcoe_ctlr.c | 4 +-
drivers/scsi/libiscsi.c | 12 ++--
drivers/scsi/vmw_pvscsi.c | 11 ++-
drivers/staging/media/omap4iss/iss_video.c | 3 +-
drivers/usb/gadget/function/f_uac2.c | 20 +++---
drivers/usb/gadget/udc/r8a66597-udc.c | 6 +-
drivers/usb/phy/phy-fsl-usb.c | 4 +-
fs/cachefiles/namei.c | 1 -
fs/cachefiles/rdwr.c | 17 +++--
fs/fscache/operation.c | 6 +-
mm/memory.c | 3 +
mm/zswap.c | 9 +++
net/caif/caif_dev.c | 4 +-
net/dccp/ccids/ccid2.c | 6 +-
net/ipv6/ip6_vti.c | 11 +--
net/mac80211/util.c | 3 +-
net/netfilter/nf_conntrack_proto_dccp.c | 8 +--
net/packet/af_packet.c | 10 +--
net/wireless/nl80211.c | 1 +
net/xfrm/xfrm_policy.c | 3 +
net/xfrm/xfrm_user.c | 18 +++--
tools/power/x86/turbostat/turbostat.c | 8 +--
.../selftests/ftrace/test.d/00basic/snapshot.tc | 28 ++++++++
tools/usb/ffs-test.c | 19 +++++-
42 files changed, 232 insertions(+), 141 deletions(-)
create mode 100644 tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc
-----BEGIN PGP SIGNATURE-----
iQIzBAEBCgAdFiEE4n5dijQDou9mhzu83qZv95d3LNwFAluBcWAACgkQ3qZv95d3
LNzUvw/+JXEap2/Xn5LA446o2ugIgmxLj3cHKHrKGBjqntMdAnarbllkHN8cPmhB
itp92E8NC6tkWrtuG78pXPI/KqyyfQQ9bdx/pX6ASEqlJVIToDBUv/qDAv9NKB2C
Tvs0HjeU058qNZxxArIPDFQu0l8QuWlbSQ19wk5j9nyZYKLNjoK83IxktNA5eEOM
UowNygI6SkhcWexcO+QzBheqgsGdk2lbiACuZQP2UfzNoj3B+jmkrjluKJeZHsp9
RC9Tt5iAaHWze6bqetDapqNiiVKamOh4RwFFb2auZSbi0njzHKLvYU/zIhbpBmB/
TWkIyzM7X384wyzDsCCIHi/OVmHPoWCN1cTPL5624qloNIG6VEFUNlB79q1R4htu
fH680orPjCiRmavyCml9CA1CR9Qja8TvD9+Hj6LjLP4xxCCV3EGDzE85lYpOYxze
EH/JldbaRGjhnxRtok+yKKcy/MoWFTB4BpsRxVrqALDZlV325uh2Eui6ku5vyQug
I6CG+SWrjQ8E+BQgLGLEHF8KP6Si38LJMJyavuCTQquxH2uXNR8Z8gHqpX47YXOC
gkuXOELTLw6dBxzLUWbv6FmOT7DZxC+swuKmOoXrFa877mIrC8RgFbSDL7MMjq9/
wuC1m/n0QmIIMOoMmZ0RIMwusNkAGOxqNW+DhPB4z7M298r+eHU=
=mPDt
-----END PGP SIGNATURE-----
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA512
Hi Greg,
Pleae pull commits for Linux 3.18 .
I've sent a review request for all commits over a week ago and all
comments were addressed.
Thanks,
Sasha
=====
The following changes since commit 18e6ee0440a7ab853e4ca0f1403eeef1803ed970:
Linux 3.18.119 (2018-08-17 20:54:56 +0200)
are available in the Git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/sashal/linux-stable.git tags/for-greg-3.18-14082018
for you to fetch changes up to a4756ac848473e2142769e029251fa02abac9ef1:
mm/memory.c: check return value of ioremap_prot (2018-08-18 10:18:24 -0400)
- ----------------------------------------------------------------
for-greg-3.18-14082018
- ----------------------------------------------------------------
Alexey Kodanev (1):
dccp: fix undefined behavior with 'cwnd' shift in ccid2_cwnd_restart()
Bernd Edlinger (1):
nl80211: Add a missing break in parse_station_flags
Calvin Walton (1):
tools/power turbostat: Read extended processor family from CPUID
Colin Ian King (1):
drivers: net: lmc: fix case value for target abort error
Eric Dumazet (1):
xfrm_user: prevent leaking 2 bytes of kernel memory
Eugeniu Rosca (1):
usb: gadget: f_uac2: fix endianness of 'struct cntrl_*_lay3'
Florian Westphal (3):
xfrm: free skb if nlsk pointer is NULL
netfilter: conntrack: dccp: treat SYNC/SYNCACK as invalid if no prior state
atl1c: reserve min skb headroom
Govindarajulu Varadarajan (1):
enic: handle mtu change for vf properly
Guenter Roeck (1):
media: staging: omap4iss: Include asm/cacheflush.h after generic includes
Jia-Ju Bai (2):
usb: gadget: r8a66597: Fix two possible sleep-in-atomic-context bugs in init_controller()
usb: gadget: r8a66597: Fix a possible sleep-in-atomic-context bugs in r8a66597_queue()
Jim Gill (1):
scsi: vmw_pvscsi: Return DID_RESET for status SAM_STAT_COMMAND_TERMINATED
Johannes Thumshirn (1):
scsi: fcoe: drop frames in ELS LOGO error path
Kiran Kumar Modukuri (3):
fscache: Allow cancelled operations to be enqueued
cachefiles: Fix refcounting bug in backing-file read monitoring
cachefiles: Wait rather than BUG'ing on "Unexpected object collision"
Len Brown (1):
tools/power turbostat: fix -S on UP systems
Li Wang (1):
zswap: re-check zswap_is_full() after do zswap_shrink()
Lucas Stach (2):
drm/imx: imx-ldb: disable LDB on driver bind
drm/imx: imx-ldb: check if channel is enabled before printing warning
Masami Hiramatsu (1):
selftests/ftrace: Add snapshot and tracing_on test case
Nicholas Mc Guire (2):
drm: re-enable error handling
can: mpc5xxx_can: check of_iomap return before use
Peter Senna Tschudin (1):
tools: usb: ffs-test: Fix build on big endian systems
Rafał Miłecki (1):
Revert "MIPS: BCM47XX: Enable 74K Core ExternalSync for PCIe erratum"
Randy Dunlap (3):
usb/phy: fix PPC64 build errors in phy-fsl-usb.c
arc: fix build errors in arc/include/asm/delay.h
arc: fix type warnings in arc/mm/cache.c
Shubhrajyoti Datta (1):
net: axienet: Fix double deregister of mdio
Sudarsana Reddy Kalluru (1):
bnx2x: Fix invalid memory access in rss hash config path.
Tommi Rantala (1):
xfrm: fix missing dst_release() after policy blocking lbcast and multicast
Varun Prakash (1):
scsi: libiscsi: fix possible NULL pointer dereference in case of TMF
Willem de Bruijn (1):
packet: refine ring v3 block size test to hold one frame
YueHaibing (1):
net: caif: Add a missing rcu_read_unlock() in caif_flow_cb
jie@chenjie6@huwei.com (1):
mm/memory.c: check return value of ioremap_prot
mpubbise(a)codeaurora.org (1):
mac80211: add stations tied to AP_VLANs during hw reconfig
arch/arc/include/asm/delay.h | 3 +
arch/arc/mm/cache_arc700.c | 7 +-
arch/mips/bcm47xx/setup.c | 6 --
arch/mips/include/asm/mipsregs.h | 3 -
drivers/gpu/drm/drm_context.c | 2 +-
drivers/net/can/mscan/mpc5xxx_can.c | 5 ++
drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 1 +
.../net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 13 +++-
drivers/net/ethernet/cisco/enic/enic_main.c | 78 ++++++++--------------
drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c | 1 +
drivers/net/wan/lmc/lmc_main.c | 2 +-
drivers/scsi/fcoe/fcoe_ctlr.c | 4 +-
drivers/scsi/libiscsi.c | 12 ++--
drivers/scsi/vmw_pvscsi.c | 11 ++-
drivers/staging/imx-drm/imx-ldb.c | 9 ++-
drivers/staging/media/omap4iss/iss_video.c | 3 +-
drivers/usb/gadget/function/f_uac2.c | 20 +++---
drivers/usb/gadget/udc/r8a66597-udc.c | 6 +-
drivers/usb/phy/phy-fsl-usb.c | 4 +-
fs/cachefiles/namei.c | 1 -
fs/cachefiles/rdwr.c | 17 +++--
fs/fscache/operation.c | 6 +-
mm/memory.c | 3 +
mm/zswap.c | 9 +++
net/caif/caif_dev.c | 4 +-
net/dccp/ccids/ccid2.c | 6 +-
net/mac80211/util.c | 3 +-
net/netfilter/nf_conntrack_proto_dccp.c | 8 +--
net/packet/af_packet.c | 10 +--
net/wireless/nl80211.c | 1 +
net/xfrm/xfrm_policy.c | 3 +
net/xfrm/xfrm_user.c | 18 +++--
tools/power/x86/turbostat/turbostat.c | 8 +--
.../selftests/ftrace/test.d/00basic/snapshot.tc | 28 ++++++++
tools/usb/ffs-test.c | 19 +++++-
35 files changed, 203 insertions(+), 131 deletions(-)
create mode 100644 tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc
-----BEGIN PGP SIGNATURE-----
iQIzBAEBCgAdFiEE4n5dijQDou9mhzu83qZv95d3LNwFAluBcWcACgkQ3qZv95d3
LNyyQBAAuR/0/kaIUMqYPIXnnUXjUxhxiJmI5oCxGepVsD0XwATTk+gyu1niajDW
dP+V4MjSsdZMtM4h5L52GGWFVNOppZFBd/xCci2nUAS4wmE9jLsTmnKyKWnPoGxo
XiEz4OMXfhRCoir69kY3x+CN/F5XOcQJq4+F9RwMMGHFgJJ6fXng6m+UTtkjyFKQ
uJaBk4iwtJUCa8yEC+L/5Uuqx7tM9J64J5zG5YxsWvd3LslUuXkE17xiFhoompNM
dn/mT8mxY7P+SYb9FcLRoUu0Lz6aL82HW4zd55hlFIREBNaBfXIJOA7wPVo5UwBI
HsyqCDBEaJ04gnfqaRo041wbl0CrX599r0sMQri3UUvKVZYFeBQvFYKim7UKdvzB
bO1aKR+lOCnwsNIQGqFcgmYZzF4aDNe8xoeROTCC+MW6V++in0W/HALeqyoSW0Ko
74eQWDIXGBuWjM83H94rMeyxuL7L9qnBSA2vGT4YbK10F9X/lxq/MbxtGtKp/APL
aLRXfl/NzRJW8AI73ej+eH8Nq3d3W749RYPHihvvv3izfWFyLxjj/0TncaXlPOO3
qVXf9LdHXrF+L9jJk5qpfFQ92xk57ssQXYocCPG/455oizsD8zpWk0WqQ2LMxzlY
WJhSRaJjRBucU3mtCUq8fLZRaln91HIGISEPaRAlK5dAN0Tx7FU=
=O1lO
-----END PGP SIGNATURE-----
We provide photoshop services to some of the companies from around the
world.
We have worked on tons of images ever since our team establishment in 2009.
Many online retail companies use our services for retouching electronics,
jewelry, apparels, furniture
etc. by getting the images of their products enhanced.
Here are the details of what we provide:
Clipping path;
Deep etch process
Image masking
Remove background
Portrait retouching
Jewelry retouching
Fashion retouching
Please reply back for further info.
We can provide testing for your photos if needed.
Thanks,
Ruby
I'm announcing the release of the 4.4.153 kernel.
All users of the 4.4 kernel series must upgrade.
The updated 4.4.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-4.4.y
and can be browsed at the normal kernel.org git web browser:
http://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Makefile | 2 +-
arch/x86/include/asm/mmu_context.h | 3 +--
arch/x86/mm/pageattr.c | 2 +-
fs/overlayfs/overlayfs.h | 1 +
fs/overlayfs/readdir.c | 37 +++++++++++++++++++++++++++++++++++++
fs/overlayfs/super.c | 20 ++++++++++++++++++++
6 files changed, 61 insertions(+), 4 deletions(-)
Andi Kleen (1):
x86/mm/pat: Fix L1TF stable backport for CPA
Eric Biggers (1):
x86/mm: Fix use-after-free of ldt_struct
Greg Kroah-Hartman (1):
Linux 4.4.153
Vivek Goyal (3):
ovl: Ensure upper filesystem supports d_type
ovl: Do d_type check only if work dir creation was successful
ovl: warn instead of error if d_type is not supported
Do you have photos for cutting out,or adding clipping path?
We are here to help you for that also including retouching.
Both for product photos and portrait photos.
Yours,
Jason
The function tty_port_tty_get() gets a reference to the tty. Since
the code is not using tty_port_tty_set(), the reference is kept
even after closing the tty.
Avoid using tty_port_tty_get() by directly access the tty instance.
Since lpuart_start_rx_dma() is called from the .startup() and
.set_termios() callback, it is safe to assume the tty instance is
valid.
Cc: stable(a)vger.kernel.org # v4.9+
Fixes: 5887ad43ee02 ("tty: serial: fsl_lpuart: Use cyclic DMA for Rx")
Signed-off-by: Stefan Agner <stefan(a)agner.ch>
---
This fixes a memory leak observable when opening/closing the tty in a
loop. This is also reported by kmemleak:
unreferenced object 0xc9d17000 (size 1024):
comm "(agetty)", pid 389, jiffies 4294943045 (age 100.670s)
hex dump (first 32 bytes):
01 54 00 00 01 00 00 00 00 8c 9b c8 80 58 9b c8 .T...........X..
48 58 c4 c0 00 00 00 00 00 00 00 00 00 00 00 00 HX..............
backtrace:
[<(ptrval)>] kmem_cache_alloc_trace+0x160/0x2b8
[<(ptrval)>] alloc_tty_struct+0x44/0x254
[<(ptrval)>] tty_init_dev+0x44/0x1c8
[<(ptrval)>] tty_open+0x268/0x414
[<(ptrval)>] chrdev_open+0xb4/0x1bc
[<(ptrval)>] do_dentry_open+0x1c0/0x388
[<(ptrval)>] vfs_open+0x34/0x38
[<(ptrval)>] path_openat+0x5b0/0x11bc
[<(ptrval)>] do_filp_open+0x7c/0xe8
[<(ptrval)>] do_sys_open+0x188/0x20c
[<(ptrval)>] sys_openat+0x14/0x18
[<(ptrval)>] ret_fast_syscall+0x0/0x28
[<(ptrval)>] 0xbee0a460
[<(ptrval)>] 0xffffffff
I *think* the statement that accessing tty_struct without using
tty_port_tty_get is safe in this case is true. It would be good if
somebody with more TTY knowledge could review the change.
--
Stefan
drivers/tty/serial/fsl_lpuart.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index 51e47a63d61a..3f8d1274fc85 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -979,7 +979,8 @@ static inline int lpuart_start_rx_dma(struct lpuart_port *sport)
struct circ_buf *ring = &sport->rx_ring;
int ret, nent;
int bits, baud;
- struct tty_struct *tty = tty_port_tty_get(&sport->port.state->port);
+ struct tty_port *port = &sport->port.state->port;
+ struct tty_struct *tty = port->tty;
struct ktermios *termios = &tty->termios;
baud = tty_get_baud_rate(tty);
--
2.18.0
I found that injecting disconnects with v4.18-rc resulted in
random failures of the multi-threaded git regression test.
The root cause appears to be that, after a reconnect, the
RPC/RDMA transport is waking pending RPCs before the transport has
posted enough Receive buffers to receive the Replies. If a Reply
arrives before enough Receive buffers are posted, the connection
is dropped. A few connection drops happen in quick succession as
the client and server struggle to regain credit synchronization.
This regression was introduced with commit 7c8d9e7c8863 ("xprtrdma:
Move Receive posting to Receive handler"). The client is supposed to
post a single Receive when a connection is established because
it's not supposed to send more than one RPC Call before it gets
a fresh credit grant in the first RPC Reply [RFC 8166, Section
3.3.3].
Unfortunately there appears to be a longstanding bug in the Linux
client's credit accounting mechanism. On connect, it simply dumps
all pending RPC Calls onto the new connection. It's possible it has
done this ever since the RPC/RDMA transport was added to the kernel
ten years ago.
Servers have so far been tolerant of this bad behavior. Currently no
server implementation ever changes its credit grant over reconnects,
and servers always repost enough Receives before connections are
fully established.
The Linux client implementation used to post a Receive before each
of these Calls. This has covered up the flooding send behavior.
I could try to correct this old bug so that the client sends exactly
one RPC Call and waits for a Reply. Since we are so close to the
next merge window, I'm going to instead provide a simple patch to
post enough Receives before a reconnect completes (based on the
number of credits granted to the previous connection).
The spurious disconnects will be gone, but the client will still
send multiple RPC Calls immediately after a reconnect.
Addressing the latter problem will wait for a merge window because
a) I expect it to be a large change requiring lots of testing, and
b) obviously the Linux client has interoperated successfully since
day zero while still being broken.
Fixes: 7c8d9e7c8863 ("xprtrdma: Move Receive posting to ... ")
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
---
net/sunrpc/xprtrdma/verbs.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
Hi stable@ -
This fix has been merged into v4.19 as upstream commit 8d4fb8ff427a
("xprtrdma: Fix disconnect regression"). It addresses a regression
in v4.18. I expected it to go into late v4.18-rc, which is why there
is no "cc: stable" on the original submission.
Could you please apply it to 4.18.y ? Thank you!
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 042bb24..1386c6e 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -279,7 +279,6 @@
++xprt->rx_xprt.connect_cookie;
connstate = -ECONNABORTED;
connected:
- xprt->rx_buf.rb_credits = 1;
ep->rep_connected = connstate;
rpcrdma_conn_func(ep);
wake_up_all(&ep->rep_connect_wait);
@@ -754,6 +753,7 @@
}
ep->rep_connected = 0;
+ rpcrdma_post_recvs(r_xprt, true);
rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
if (rc) {
@@ -772,8 +772,6 @@
dprintk("RPC: %s: connected\n", __func__);
- rpcrdma_post_recvs(r_xprt, true);
-
out:
if (rc)
ep->rep_connected = rc;
@@ -1170,6 +1168,7 @@ struct rpcrdma_req *
list_add(&req->rl_list, &buf->rb_send_bufs);
}
+ buf->rb_credits = 1;
buf->rb_posted_receives = 0;
INIT_LIST_HEAD(&buf->rb_recv_bufs);
This is the start of the stable review cycle for the 4.4.153 release.
There are 5 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Tue Aug 28 06:40:50 UTC 2018.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.4.153-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.4.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.4.153-rc1
Vivek Goyal <vgoyal(a)redhat.com>
ovl: warn instead of error if d_type is not supported
Vivek Goyal <vgoyal(a)redhat.com>
ovl: Do d_type check only if work dir creation was successful
Vivek Goyal <vgoyal(a)redhat.com>
ovl: Ensure upper filesystem supports d_type
Eric Biggers <ebiggers(a)google.com>
x86/mm: Fix use-after-free of ldt_struct
Andi Kleen <ak(a)linux.intel.com>
x86/mm/pat: Fix L1TF stable backport for CPA
-------------
Diffstat:
Makefile | 4 ++--
arch/x86/include/asm/mmu_context.h | 3 +--
arch/x86/mm/pageattr.c | 2 +-
fs/overlayfs/overlayfs.h | 1 +
fs/overlayfs/readdir.c | 37 +++++++++++++++++++++++++++++++++++++
fs/overlayfs/super.c | 20 ++++++++++++++++++++
6 files changed, 62 insertions(+), 5 deletions(-)
The scm device must be present in order for the rmtfs driver to
configure memory permissions for the rmtfs memory region, so check that
it is probed before continuing.
Cc: stable(a)vger.kernel.org
Fixes: fa65f8045137 ("soc: qcom: rmtfs-mem: Add support for assigning memory to remote")
Signed-off-by: Bjorn Andersson <bjorn.andersson(a)linaro.org>
---
drivers/soc/qcom/rmtfs_mem.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/soc/qcom/rmtfs_mem.c b/drivers/soc/qcom/rmtfs_mem.c
index 8a3678c2e83c..97bb5989aa21 100644
--- a/drivers/soc/qcom/rmtfs_mem.c
+++ b/drivers/soc/qcom/rmtfs_mem.c
@@ -212,6 +212,11 @@ static int qcom_rmtfs_mem_probe(struct platform_device *pdev)
dev_err(&pdev->dev, "failed to parse qcom,vmid\n");
goto remove_cdev;
} else if (!ret) {
+ if (!qcom_scm_is_available()) {
+ ret = -EPROBE_DEFER;
+ goto remove_cdev;
+ }
+
perms[0].vmid = QCOM_SCM_VMID_HLOS;
perms[0].perm = QCOM_SCM_PERM_RW;
perms[1].vmid = vmid;
--
2.18.0
In case a client fails to connect in mei_cldev_enable(), the
caller won't call the mei_cldev_disable leaving the client
in a linked stated. Upon driver unload the client structure
will be freed in mei_cl_bus_dev_release(), leaving a stale pointer
on a fail_list. This will eventually end up in crash
during power down flow in mei_cl_set_disonnected().
RIP: mei_cl_set_disconnected+0x5/0x260[mei]
Call trace:
mei_cl_all_disconnect+0x22/0x30
mei_reset+0x194/0x250
__synchronize_hardirq+0x43/0x50
_cond_resched+0x15/0x30
mei_me_intr_clear+0x20/0x100
mei_stop+0x76/0xb0
mei_me_shutdown+0x3f/0x80
pci_device_shutdown+0x34/0x60
kernel_restart+0x0e/0x30
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=200455
Fixes: 'c110cdb17148 ("mei: bus: make a client pointer always available")'
Cc: <stable(a)vger.kernel.org> 4.10+
Tested-by: Georg Müller <georgmueller(a)gmx.net>
Signed-off-by: Tomas Winkler <tomas.winkler(a)intel.com>
---
drivers/misc/mei/bus.c | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index 13c6c9a2248a..fc3872fe7b25 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -521,17 +521,15 @@ int mei_cldev_enable(struct mei_cl_device *cldev)
cl = cldev->cl;
+ mutex_lock(&bus->device_lock);
if (cl->state == MEI_FILE_UNINITIALIZED) {
- mutex_lock(&bus->device_lock);
ret = mei_cl_link(cl);
- mutex_unlock(&bus->device_lock);
if (ret)
- return ret;
+ goto out;
/* update pointers */
cl->cldev = cldev;
}
- mutex_lock(&bus->device_lock);
if (mei_cl_is_connected(cl)) {
ret = 0;
goto out;
@@ -875,12 +873,13 @@ static void mei_cl_bus_dev_release(struct device *dev)
mei_me_cl_put(cldev->me_cl);
mei_dev_bus_put(cldev->bus);
+ mei_cl_unlink(cldev->cl);
kfree(cldev->cl);
kfree(cldev);
}
static const struct device_type mei_cl_device_type = {
- .release = mei_cl_bus_dev_release,
+ .release = mei_cl_bus_dev_release,
};
/**
--
2.14.4
In case the device is not connected it doesn't 'get'
hw module and hence should not 'put' it on disable.
Cc: <stable(a)vger.kernel.org> 4.16+
Fixes:'commit 257355a44b99 ("mei: make module referencing local to the bus.c")'
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=200455
Tested-by: Georg Müller <georgmueller(a)gmx.net>
Signed-off-by: Tomas Winkler <tomas.winkler(a)intel.com>
---
drivers/misc/mei/bus.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index 7bba62a72921..13c6c9a2248a 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -616,9 +616,8 @@ int mei_cldev_disable(struct mei_cl_device *cldev)
if (err < 0)
dev_err(bus->dev, "Could not disconnect from the ME client\n");
-out:
mei_cl_bus_module_put(cldev);
-
+out:
/* Flush queues and remove any pending read */
mei_cl_flush_queues(cl, NULL);
mei_cl_unlink(cl);
--
2.14.4
On 08/21/2018 11:37 AM, Juergen Gross wrote:
> While the hypervisor emulates plain writes to PTEs happily, this is
> much slower than issuing a hypercall for PTE modifcations. And writing
> a PTE via two 32-bit write instructions (especially when clearing the
> PTE) will result in an intermediate L1TF vulnerable PTE.
>
> Writes to PAE PTEs should always be done with 64-bit writes or via
> hypercalls.
>
> Juergen Gross (2):
> x86/xen: don't write ptes directly in 32-bit PV guests
> x86/pae: use 64 bit atomic xchg function in native_ptep_get_and_clear
>
> arch/x86/include/asm/pgtable-3level.h | 7 +++----
> arch/x86/xen/mmu_pv.c | 7 +++----
> 2 files changed, 6 insertions(+), 8 deletions(-)
>
Applied to for-linus-19b.
(+stable.)
-boris
Hi,
I just wanted to check if you would be interested in a list of Managed
Service Providers (MSPs) and Managed Security Service Providers (MSSPs)?
We also have the data intelligence of:
• Managed Service Providers (MSP’s)
• Managed Security Service Providers (MSSP’s
• IT Decision Makers – 6million across all industry
• Business Decision Makers – 10 million across all industry
• Value Added Resellers- VARs
• Independent Software Vendors- ISVs
• System Integrators- SIs
• VoIP Service Providers.
• Telecommunications Service Providers (TSPs)
• Application Service Providers (ASPs)
• IT Managed Services Providers (ITMSP)
• Storage Service Providers (SSPs)
Kindly review and let me know if I can share more information on this.
I look forward to hearing from you.
Regards,
Page Brooks
Marketing Specialist
If you don't want to include yourself in our mailing list, please reply
back “Leave Out" in a subject line
The following commit:
368a540e0232 (x86/kvmclock: Remove memblock dependency)
caused SEV guest regression. When SEV is active, we map the shared
variables (wall_clock and hv_clock_boot) with C=0 to ensure that both
the guest and the hypervisor is able to access the data. To map the
variables we use kernel_physical_mapping_init() to split the large pages,
but this routine fails to allocate a new page. Before the above commit,
kvmclock initialization was called after memory allocator was available
but now its called very early in the boot process.
Recently we added a special .data..decrypted section to hold the shared
variables. This section is mapped with C=0 very early. Use __decrypted
attribute to put the wall_clock and hv_clock_boot in .data..decrypted
section so that they are mapped with C=0.
Signed-off-by: Brijesh Singh <brijesh.singh(a)amd.com>
Fixes: 368a540e0232 ("x86/kvmclock: Remove memblock dependency")
Cc: stable(a)vger.kernel.org
Cc: Tom Lendacky <thomas.lendacky(a)amd.com>
Cc: kvm(a)vger.kernel.org
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Borislav Petkov <bp(a)suse.de>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: linux-kernel(a)vger.kernel.org
Cc: Paolo Bonzini <pbonzini(a)redhat.com>
Cc: Sean Christopherson <sean.j.christopherson(a)intel.com>
Cc: "Radim Krčmář" <rkrcmar(a)redhat.com>
---
arch/x86/kernel/kvmclock.c | 26 +++++++++++++++++++++-----
1 file changed, 21 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 1e67646..ae9188a 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -28,6 +28,7 @@
#include <linux/sched/clock.h>
#include <linux/mm.h>
#include <linux/slab.h>
+#include <linux/set_memory.h>
#include <asm/hypervisor.h>
#include <asm/mem_encrypt.h>
@@ -61,8 +62,8 @@ early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
(PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info))
static struct pvclock_vsyscall_time_info
- hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __aligned(PAGE_SIZE);
-static struct pvclock_wall_clock wall_clock;
+ hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __decrypted __aligned(PAGE_SIZE);
+static struct pvclock_wall_clock wall_clock __decrypted;
static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
@@ -267,10 +268,25 @@ static int kvmclock_setup_percpu(unsigned int cpu)
return 0;
/* Use the static page for the first CPUs, allocate otherwise */
- if (cpu < HVC_BOOT_ARRAY_SIZE)
+ if (cpu < HVC_BOOT_ARRAY_SIZE) {
p = &hv_clock_boot[cpu];
- else
- p = kzalloc(sizeof(*p), GFP_KERNEL);
+ } else {
+ int rc;
+ unsigned int sz = sizeof(*p);
+
+ if (sev_active())
+ sz = PAGE_ALIGN(sz);
+
+ p = kzalloc(sz, GFP_KERNEL);
+
+ /* When SEV is active the hv_clock need to be mapped as decrypted. */
+ if (p && sev_active()) {
+ rc = set_memory_decrypted((unsigned long)p, sz >> PAGE_SHIFT);
+ if (rc)
+ return rc;
+ memset(p, 0, sz);
+ }
+ }
per_cpu(hv_clock_per_cpu, cpu) = p;
return p ? 0 : -ENOMEM;
--
2.7.4
Use the new of_get_compatible_child() helper to lookup the usb sibling
node instead of using of_find_compatible_node(), which searches the
entire tree from a given start node and thus can return an unrelated
(non-sibling) node.
This also addresses a potential use-after-free (e.g. after probe
deferral) as the tree-wide helper drops a reference to its first
argument (i.e. the parent device node).
While at it, also fix the related phy-node reference leak.
Fixes: f5e4edb8c888 ("power: twl4030_charger: find associated phy by more reliable means.")
Cc: stable <stable(a)vger.kernel.org> # 4.2
Cc: NeilBrown <neilb(a)suse.de>
Cc: Felipe Balbi <felipe.balbi(a)linux.intel.com>
Cc: Sebastian Reichel <sre(a)kernel.org>
Reviewed-by: Sebastian Reichel <sre(a)kernel.org>
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
drivers/power/supply/twl4030_charger.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/power/supply/twl4030_charger.c b/drivers/power/supply/twl4030_charger.c
index bbcaee56db9d..b6a7d9f74cf3 100644
--- a/drivers/power/supply/twl4030_charger.c
+++ b/drivers/power/supply/twl4030_charger.c
@@ -996,12 +996,13 @@ static int twl4030_bci_probe(struct platform_device *pdev)
if (bci->dev->of_node) {
struct device_node *phynode;
- phynode = of_find_compatible_node(bci->dev->of_node->parent,
- NULL, "ti,twl4030-usb");
+ phynode = of_get_compatible_child(bci->dev->of_node->parent,
+ "ti,twl4030-usb");
if (phynode) {
bci->usb_nb.notifier_call = twl4030_bci_usb_ncb;
bci->transceiver = devm_usb_get_phy_by_node(
bci->dev, phynode, &bci->usb_nb);
+ of_node_put(phynode);
if (IS_ERR(bci->transceiver)) {
ret = PTR_ERR(bci->transceiver);
if (ret == -EPROBE_DEFER)
--
2.18.0
Use the new of_get_compatible_child() helper to lookup the nfc child
node instead of using of_find_compatible_node(), which searches the
entire tree from a given start node and thus can return an unrelated
(i.e. non-child) node.
This also addresses a potential use-after-free (e.g. after probe
deferral) as the tree-wide helper drops a reference to its first
argument (i.e. the parent node).
Fixes: e097dc624f78 ("NFC: nfcmrvl: add UART driver")
Fixes: d8e018c0b321 ("NFC: nfcmrvl: update device tree bindings for Marvell NFC")
Cc: stable <stable(a)vger.kernel.org> # 4.2
Cc: Vincent Cuissard <cuissard(a)marvell.com>
Cc: Samuel Ortiz <sameo(a)linux.intel.com>
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
drivers/nfc/nfcmrvl/uart.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/nfc/nfcmrvl/uart.c b/drivers/nfc/nfcmrvl/uart.c
index 91162f8e0366..9a22056e8d9e 100644
--- a/drivers/nfc/nfcmrvl/uart.c
+++ b/drivers/nfc/nfcmrvl/uart.c
@@ -73,10 +73,9 @@ static int nfcmrvl_uart_parse_dt(struct device_node *node,
struct device_node *matched_node;
int ret;
- matched_node = of_find_compatible_node(node, NULL, "marvell,nfc-uart");
+ matched_node = of_get_compatible_child(node, "marvell,nfc-uart");
if (!matched_node) {
- matched_node = of_find_compatible_node(node, NULL,
- "mrvl,nfc-uart");
+ matched_node = of_get_compatible_child(node, "mrvl,nfc-uart");
if (!matched_node)
return -ENODEV;
}
--
2.18.0
Use the new of_get_compatible_child() helper to lookup the legacy
pwrlevels child node instead of using of_find_compatible_node(), which
searches the entire tree from a given start node and thus can return an
unrelated (i.e. non-child) node.
This also addresses a potential use-after-free (e.g. after probe
deferral) as the tree-wide helper drops a reference to its first
argument (i.e. the probed device's node).
While at it, also fix the related child-node reference leak.
Fixes: e2af8b6b0ca1 ("drm/msm: gpu: Use OPP tables if we can")
Cc: stable <stable(a)vger.kernel.org> # 4.12
Cc: Jordan Crouse <jcrouse(a)codeaurora.org>
Cc: Rob Clark <robdclark(a)gmail.com>
Cc: David Airlie <airlied(a)linux.ie>
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
drivers/gpu/drm/msm/adreno/adreno_gpu.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index da1363a0c54d..93d70f4a2154 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -633,8 +633,7 @@ static int adreno_get_legacy_pwrlevels(struct device *dev)
struct device_node *child, *node;
int ret;
- node = of_find_compatible_node(dev->of_node, NULL,
- "qcom,gpu-pwrlevels");
+ node = of_get_compatible_child(dev->of_node, "qcom,gpu-pwrlevels");
if (!node) {
dev_err(dev, "Could not find the GPU powerlevels\n");
return -ENXIO;
@@ -655,6 +654,8 @@ static int adreno_get_legacy_pwrlevels(struct device *dev)
dev_pm_opp_add(dev, val, 0);
}
+ of_node_put(node);
+
return 0;
}
--
2.18.0
Use the new of_get_compatible_child() helper to lookup the sibling
instead of using of_find_compatible_node(), which searches the entire
tree from a given start node and thus can return an unrelated (i.e.
non-sibling) node.
This also addresses a potential use-after-free (e.g. after probe
deferral) as the tree-wide helper drops a reference to its first
argument (i.e. the parent device node).
While at it, also fix the related cec-node reference leak.
Fixes: 8f83f26891e1 ("drm/mediatek: Add HDMI support")
Cc: stable <stable(a)vger.kernel.org> # 4.8
Cc: Junzhi Zhao <junzhi.zhao(a)mediatek.com>
Cc: Philipp Zabel <p.zabel(a)pengutronix.de>
Cc: CK Hu <ck.hu(a)mediatek.com>
Cc: David Airlie <airlied(a)linux.ie>
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
drivers/gpu/drm/mediatek/mtk_hdmi.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c
index 2d45d1dd9554..643f5edd68fe 100644
--- a/drivers/gpu/drm/mediatek/mtk_hdmi.c
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c
@@ -1446,8 +1446,7 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi,
}
/* The CEC module handles HDMI hotplug detection */
- cec_np = of_find_compatible_node(np->parent, NULL,
- "mediatek,mt8173-cec");
+ cec_np = of_get_compatible_child(np->parent, "mediatek,mt8173-cec");
if (!cec_np) {
dev_err(dev, "Failed to find CEC node\n");
return -EINVAL;
@@ -1457,8 +1456,10 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi,
if (!cec_pdev) {
dev_err(hdmi->dev, "Waiting for CEC device %pOF\n",
cec_np);
+ of_node_put(cec_np);
return -EPROBE_DEFER;
}
+ of_node_put(cec_np);
hdmi->cec_dev = &cec_pdev->dev;
/*
--
2.18.0
Hi,
I wonder, it it makes sense to backport commit
e666d4e9ceec crypto: vmx - Use skcipher for ctr fallback
to v 4.14 stable kernel.
I'm using it in 4.12+.
These commits (somehow similar) has been backported to 4.10.2:
5839f555fa57 crypto: vmx - Use skcipher for xts fallback
c96d0a1c47ab crypto: vmx - Use skcipher for cbc fallback
Kind regards,
Petr
Hi Greg,
This patch is not marked for 4.4-stable, but it's already in 4.9 and 4.14 stable.
This is a trivial rename patch, but the naming would be more logical and makes
backports easier.
Please consider to apply this patch to 4.4-stable.
--
SZ Lin (林上智)
The patch titled
Subject: mm/hugetlb: filter out hugetlb pages if HUGEPAGE migration is not supported.
has been added to the -mm tree. Its filename is
mm-hugetlb-filter-out-hugetlb-pages-if-hugepage-migration-is-not-supported.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/mm-hugetlb-filter-out-hugetlb-page…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/mm-hugetlb-filter-out-hugetlb-page…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: "Aneesh Kumar K.V" <aneesh.kumar(a)linux.ibm.com>
Subject: mm/hugetlb: filter out hugetlb pages if HUGEPAGE migration is not supported.
When scanning for movable pages, filter out Hugetlb pages if hugepage
migration is not supported. Without this we hit infinte loop in
__offline_pages() where we do
pfn = scan_movable_pages(start_pfn, end_pfn);
if (pfn) { /* We have movable pages */
ret = do_migrate_range(pfn, end_pfn);
goto repeat;
}
Fix this by checking hugepage_migration_supported both in
has_unmovable_pages which is the primary backoff mechanism for page
offlining and for consistency reasons also into scan_movable_pages because
it doesn't make any sense to return a pfn to non-migrateable huge page.
This issue was revealed by, but not caused by 72b39cfc4d75 ("mm,
memory_hotplug: do not fail offlining too early").
Link: http://lkml.kernel.org/r/20180824063314.21981-1-aneesh.kumar@linux.ibm.com
Fixes: 72b39cfc4d75 ("mm, memory_hotplug: do not fail offlining too early")
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com>
Reported-by: Haren Myneni <haren(a)linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi(a)ah.jp.nec.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory_hotplug.c | 3 ++-
mm/page_alloc.c | 4 ++++
2 files changed, 6 insertions(+), 1 deletion(-)
--- a/mm/memory_hotplug.c~mm-hugetlb-filter-out-hugetlb-pages-if-hugepage-migration-is-not-supported
+++ a/mm/memory_hotplug.c
@@ -1333,7 +1333,8 @@ static unsigned long scan_movable_pages(
if (__PageMovable(page))
return pfn;
if (PageHuge(page)) {
- if (page_huge_active(page))
+ if (hugepage_migration_supported(page_hstate(page)) &&
+ page_huge_active(page))
return pfn;
else
pfn = round_up(pfn + 1,
--- a/mm/page_alloc.c~mm-hugetlb-filter-out-hugetlb-pages-if-hugepage-migration-is-not-supported
+++ a/mm/page_alloc.c
@@ -7709,6 +7709,10 @@ bool has_unmovable_pages(struct zone *zo
* handle each tail page individually in migration.
*/
if (PageHuge(page)) {
+
+ if (!hugepage_migration_supported(page_hstate(page)))
+ goto unmovable;
+
iter = round_up(iter + 1, 1<<compound_order(page)) - 1;
continue;
}
_
Patches currently in -mm which might be from aneesh.kumar(a)linux.ibm.com are
mm-hugetlb-filter-out-hugetlb-pages-if-hugepage-migration-is-not-supported.patch
Hallo,
4.4.147 is the last kernel of the 4.4.x series that boots for me. All
subsequent versions panic on boot. How do I report this bug?
If I'm supposed to use https://bugzilla.kernel.org/ I don't know what
to fill into the fields. I don't even know if the longterm kernel falls
under "Mainline" or some other tree.
MSB
--
Fun chemistry experiments:
Sodium chloride doubles its volume
when combined with an equal amount of table salt.
Hi Greg,
Arch Linux recently enabled building the kernel documentation in their
PKGBUILD, which turned my normally peaceful build into a spamfest of
unescaped braces warnings from Perl.
These are fixed upstream with the following two patches:
701b3a3c0ac4 ("PATCH scripts/kernel-doc")
673bb2dfc364 ("scripts/kernel-doc: Escape all literal braces in regexes")
Could they please be applied to 4.18? They should be clean cherry-picks.
Thanks!
Nathan
Hi Greg, Ben, and all
Is https://www.kernel.org/category/releases.html updated in terms of EOL?
Some news out of Linaro conference [2] generated a lot of doubts and questions
around.
Specially because on the way it was stated by the news 3.16 wouldn't be active
anymore. So I'm not sure about the news, but I'd like confirmation from you about
expected EOL.
[2] https://itsfoss.com/linux-lts-kernel-six-years/
Thanks in advance,
Rodrigo.
Inside of start_xmit() the call to check if the connection is up and the
queueing of the packets for later transmission is not atomic which
leaves a window where cm_rep_handler can run, set the connection up,
dequeue pending packets and leave the subsequently queued packets by
start_xmit() sitting on neigh->queue until they're dropped when the
connection is torn down. This only applies to connected mode. These
dropped packets can really upset TCP, for example, and cause
multi-minute delays in transmission for open connections.
Here's the code in start_xmit where we check to see if the connection
is up:
if (ipoib_cm_get(neigh)) {
if (ipoib_cm_up(neigh)) {
ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
goto unref;
}
}
The race occurs if cm_rep_handler execution occurs after the above
connection check (specifically if it gets to the point where it acquires
priv->lock to dequeue pending skb's) but before the below code snippet
in start_xmit where packets are queued.
if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
push_pseudo_header(skb, phdr->hwaddr);
spin_lock_irqsave(&priv->lock, flags);
__skb_queue_tail(&neigh->queue, skb);
spin_unlock_irqrestore(&priv->lock, flags);
} else {
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
}
The patch re-checks ipoib_cm_up with priv->lock held to avoid this
race condition. Since odds are the conn should be up most of the time
(and thus the connection *not* down most of the time) we don't hold the
lock for the first check attempt to avoid a slowdown from unecessary
locking for the majority of the packets transmitted during the
connection's life.
Signed-off-by: Aaron Knister <aaron.s.knister(a)nasa.gov>
---
drivers/infiniband/ulp/ipoib/ipoib_main.c | 46 ++++++++++++++++++++++++++-----
1 file changed, 39 insertions(+), 7 deletions(-)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 26cde95b..a950c916 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1093,6 +1093,21 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
spin_unlock_irqrestore(&priv->lock, flags);
}
+static bool defer_neigh_skb(struct sk_buff *skb,
+ struct net_device *dev,
+ struct ipoib_neigh *neigh,
+ struct ipoib_pseudo_header *phdr)
+{
+ if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+ push_pseudo_header(skb, phdr->hwaddr);
+ __skb_queue_tail(&neigh->queue, skb);
+ return true;
+ }
+
+ return false;
+}
+
+
static netdev_tx_t ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
@@ -1101,6 +1116,7 @@ static netdev_tx_t ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
struct ipoib_pseudo_header *phdr;
struct ipoib_header *header;
unsigned long flags;
+ bool deferred_pkt = true;
phdr = (struct ipoib_pseudo_header *) skb->data;
skb_pull(skb, sizeof(*phdr));
@@ -1160,6 +1176,23 @@ static netdev_tx_t ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
goto unref;
}
+ /*
+ * Re-check ipoib_cm_up with priv->lock held to avoid
+ * race condition between start_xmit and skb_dequeue in
+ * cm_rep_handler. Since odds are the conn should be up
+ * most of the time, we don't hold the lock for the
+ * first check above
+ */
+ spin_lock_irqsave(&priv->lock, flags);
+ if (ipoib_cm_up(neigh)) {
+ spin_unlock_irqrestore(&priv->lock, flags);
+ ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
+ } else {
+ deferred_pkt = defer_neigh_skb(skb, dev, neigh, phdr);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ }
+
+ goto unref;
} else if (neigh->ah && neigh->ah->valid) {
neigh->ah->last_send = rn->send(dev, skb, neigh->ah->ah,
IPOIB_QPN(phdr->hwaddr));
@@ -1168,17 +1201,16 @@ static netdev_tx_t ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
neigh_refresh_path(neigh, phdr->hwaddr, dev);
}
- if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
- push_pseudo_header(skb, phdr->hwaddr);
- spin_lock_irqsave(&priv->lock, flags);
- __skb_queue_tail(&neigh->queue, skb);
- spin_unlock_irqrestore(&priv->lock, flags);
- } else {
+ spin_lock_irqsave(&priv->lock, flags);
+ deferred_pkt = defer_neigh_skb(skb, dev, neigh, phdr);
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+unref:
+ if (!deferred_pkt) {
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
}
-unref:
ipoib_neigh_put(neigh);
return NETDEV_TX_OK;
--
2.12.3
Use the new of_get_compatible_child() helper to lookup the slot child
node instead of using of_find_compatible_node(), which searches the
entire tree and thus can return an unrelated (i.e. non-child) node.
This also addresses a potential use-after-free (e.g. after probe
deferral) as the tree-wide helper drops a reference to its first
argument (i.e. the node of the device being probed).
While at it, also fix up the related slot-node reference leak.
Fixes: ed80a13bb4c4 ("mmc: meson-mx-sdio: Add a driver for the Amlogic Meson8 and Meson8b SoCs")
Cc: stable <stable(a)vger.kernel.org> # 4.15
Cc: Carlo Caione <carlo(a)endlessm.com>
Cc: Martin Blumenstingl <martin.blumenstingl(a)googlemail.com>
Cc: Ulf Hansson <ulf.hansson(a)linaro.org>
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
drivers/mmc/host/meson-mx-sdio.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/mmc/host/meson-mx-sdio.c b/drivers/mmc/host/meson-mx-sdio.c
index 09cb89645d06..2cfec33178c1 100644
--- a/drivers/mmc/host/meson-mx-sdio.c
+++ b/drivers/mmc/host/meson-mx-sdio.c
@@ -517,19 +517,23 @@ static struct mmc_host_ops meson_mx_mmc_ops = {
static struct platform_device *meson_mx_mmc_slot_pdev(struct device *parent)
{
struct device_node *slot_node;
+ struct platform_device *pdev;
/*
* TODO: the MMC core framework currently does not support
* controllers with multiple slots properly. So we only register
* the first slot for now
*/
- slot_node = of_find_compatible_node(parent->of_node, NULL, "mmc-slot");
+ slot_node = of_get_compatible_child(parent->of_node, "mmc-slot");
if (!slot_node) {
dev_warn(parent, "no 'mmc-slot' sub-node found\n");
return ERR_PTR(-ENOENT);
}
- return of_platform_device_create(slot_node, NULL, parent);
+ pdev = of_platform_device_create(slot_node, NULL, parent);
+ of_node_put(slot_node);
+
+ return pdev;
}
static int meson_mx_mmc_add_host(struct meson_mx_mmc_host *host)
--
2.18.0
This is the start of the stable review cycle for the 4.18.5 release.
There are 22 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Sat Aug 25 07:47:43 UTC 2018.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.18.5-rc1…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.18.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.18.5-rc1
Jann Horn <jannh(a)google.com>
reiserfs: fix broken xattr handling (heap corruption, bad retval)
Esben Haabendal <eha(a)deif.com>
i2c: imx: Fix race condition in dma read
Hans de Goede <hdegoede(a)redhat.com>
i2c: core: ACPI: Properly set status byte to 0 for multi-byte writes
Lukas Wunner <lukas(a)wunner.de>
PCI: pciehp: Fix unprotected list iteration in IRQ handler
Lukas Wunner <lukas(a)wunner.de>
PCI: pciehp: Fix use-after-free on unplug
Myron Stowe <myron.stowe(a)redhat.com>
PCI: Skip MPS logic for Virtual Functions (VFs)
Zachary Zhang <zhangzg(a)marvell.com>
PCI: aardvark: Size bridges before resources allocation
Lukas Wunner <lukas(a)wunner.de>
PCI: hotplug: Don't leak pci_slot on registration failure
Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
PCI / ACPI / PM: Resume all bridges on suspend-to-RAM
Christian König <ckoenig.leichtzumerken(a)gmail.com>
PCI: Restore resized BAR state on resume
John David Anglin <dave.anglin(a)bell.net>
parisc: Remove ordered stores from syscall.S
John David Anglin <dave.anglin(a)bell.net>
parisc: Remove unnecessary barriers from spinlock.h
Gustavo A. R. Silva <gustavo(a)embeddedor.com>
drm/amdgpu/pm: Fix potential Spectre v1
Gustavo A. R. Silva <gustavo(a)embeddedor.com>
drm/i915/kvmgt: Fix potential Spectre v1
Jeremy Cline <jcline(a)redhat.com>
ext4: fix spectre gadget in ext4_mb_regular_allocator()
Michael Ellerman <mpe(a)ellerman.id.au>
powerpc64s: Show ori31 availability in spectre_v1 sysfs file not v2
Dave Hansen <dave.hansen(a)linux.intel.com>
x86/mm/init: Remove freed kernel image areas from alias mapping
Dave Hansen <dave.hansen(a)linux.intel.com>
x86/mm/init: Add helper for freeing kernel image pages
Dave Hansen <dave.hansen(a)linux.intel.com>
x86/mm/init: Pass unconverted symbol addresses to free_init_pages()
Dave Hansen <dave.hansen(a)linux.intel.com>
mm: Allow non-direct-map arguments to free_reserved_area()
Matthijs van Duin <matthijsvanduin(a)gmail.com>
pty: fix O_CLOEXEC for TIOCGPTPEER
Takashi Iwai <tiwai(a)suse.de>
EDAC: Add missing MEM_LRDDR4 entry in edac_mem_types[]
-------------
Diffstat:
Makefile | 4 ++--
arch/parisc/include/asm/spinlock.h | 8 ++------
arch/parisc/kernel/syscall.S | 24 +++++++++++-----------
arch/powerpc/kernel/security.c | 27 ++++++++++++++++---------
arch/x86/include/asm/processor.h | 1 +
arch/x86/include/asm/set_memory.h | 1 +
arch/x86/mm/init.c | 37 +++++++++++++++++++++++++++++++---
arch/x86/mm/init_64.c | 8 ++------
arch/x86/mm/pageattr.c | 13 ++++++++++++
drivers/edac/edac_mc.c | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 3 ++-
drivers/gpu/drm/i915/gvt/kvmgt.c | 9 ++++++++-
drivers/i2c/busses/i2c-imx.c | 8 ++++----
drivers/i2c/i2c-core-acpi.c | 11 +++++++---
drivers/pci/controller/pci-aardvark.c | 1 +
drivers/pci/hotplug/pci_hotplug_core.c | 9 +++++++++
drivers/pci/hotplug/pciehp.h | 1 +
drivers/pci/hotplug/pciehp_core.c | 7 +++++++
drivers/pci/hotplug/pciehp_hpc.c | 18 +++++------------
drivers/pci/pci-acpi.c | 6 ++----
drivers/pci/pci.c | 28 +++++++++++++++++++++++++
drivers/pci/probe.c | 4 ++++
drivers/tty/pty.c | 2 +-
fs/ext4/mballoc.c | 4 +++-
fs/reiserfs/xattr.c | 4 +++-
mm/page_alloc.c | 16 +++++++++++++--
26 files changed, 185 insertions(+), 70 deletions(-)
A recent change added some MDS processing in the lpfc_drain_txq
routine that relies on the fcp_wq being allocated. For nvmet operation
the fcp_wq is not allocated because it can only be an nvme-target.
When the original MDS support was added LS_MDS_LOOPBACK was
defined wrong, (0x16) it should have been 0x10 (decimal value used for
hex setting). This incorrect value allowed MDS_LOOPBACK to be set
simultaneously with LS_NPIV_FAB_SUPPORTED, causing the driver to
crash when it accesses the non-existent fcp_wq.
Correct the bad value setting for LS_MDS_LOOPBACK.
Fixes: ae9e28f36a6c ("lpfc: Add MDS Diagnostic support.")
Cc: <stable(a)vger.kernel.org> # 4.12
Signed-off-by: Dick Kennedy <dick.kennedy(a)broadcom.com>
Signed-off-by: James Smart <james.smart(a)broadcom.com>
---
drivers/scsi/lpfc/lpfc.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index e0d0da5f43d6..43732e8d1347 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -672,7 +672,7 @@ struct lpfc_hba {
#define LS_NPIV_FAB_SUPPORTED 0x2 /* Fabric supports NPIV */
#define LS_IGNORE_ERATT 0x4 /* intr handler should ignore ERATT */
#define LS_MDS_LINK_DOWN 0x8 /* MDS Diagnostics Link Down */
-#define LS_MDS_LOOPBACK 0x16 /* MDS Diagnostics Link Up (Loopback) */
+#define LS_MDS_LOOPBACK 0x10 /* MDS Diagnostics Link Up (Loopback) */
uint32_t hba_flag; /* hba generic flags */
#define HBA_ERATT_HANDLED 0x1 /* This flag is set when eratt handled */
--
2.13.1
The patch titled
Subject: mm: respect arch_dup_mmap() return value
has been added to the -mm tree. Its filename is
mm-respect-arch_dup_mmap-return-value.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/mm-respect-arch_dup_mmap-return-va…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/mm-respect-arch_dup_mmap-return-va…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Nadav Amit <namit(a)vmware.com>
Subject: mm: respect arch_dup_mmap() return value
d70f2a14b72a4 ("include/linux/sched/mm.h: uninline mmdrop_async(), etc")
ignored the return value of arch_dup_mmap(). As a result, on x86, a
failure to duplicate the LDT (e.g., due to memory allocation error), would
leave the duplicated memory mapping in an inconsistent state.
Fix by regarding the return value, as it was before the change.
Link: http://lkml.kernel.org/r/20180823051229.211856-1-namit@vmware.com
Fixes: d70f2a14b72a4 ("include/linux/sched/mm.h: uninline mmdrop_async(), etc")
Signed-off-by: Nadav Amit <namit(a)vmware.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/fork.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
--- a/kernel/fork.c~mm-respect-arch_dup_mmap-return-value
+++ a/kernel/fork.c
@@ -550,8 +550,7 @@ static __latent_entropy int dup_mmap(str
goto out;
}
/* a new mm has just been created */
- arch_dup_mmap(oldmm, mm);
- retval = 0;
+ retval = arch_dup_mmap(oldmm, mm);
out:
up_write(&mm->mmap_sem);
flush_tlb_mm(oldmm);
_
Patches currently in -mm which might be from namit(a)vmware.com are
mm-respect-arch_dup_mmap-return-value.patch
The patch titled
Subject: mm: migration: fix migration of huge PMD shared pages
has been added to the -mm tree. Its filename is
mm-migration-fix-migration-of-huge-pmd-shared-pages.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/mm-migration-fix-migration-of-huge…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/mm-migration-fix-migration-of-huge…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Mike Kravetz <mike.kravetz(a)oracle.com>
Subject: mm: migration: fix migration of huge PMD shared pages
The page migration code employs try_to_unmap() to try and unmap the source
page. This is accomplished by using rmap_walk to find all vmas where the
page is mapped. This search stops when page mapcount is zero. For shared
PMD huge pages, the page map count is always 1 no matter the number of
mappings. Shared mappings are tracked via the reference count of the PMD
page. Therefore, try_to_unmap stops prematurely and does not completely
unmap all mappings of the source page.
This problem can result is data corruption as writes to the original
source page can happen after contents of the page are copied to the target
page. Hence, data is lost.
This problem was originally seen as DB corruption of shared global areas
after a huge page was soft offlined due to ECC memory errors. DB
developers noticed they could reproduce the issue by (hotplug) offlining
memory used to back huge pages. A simple testcase can reproduce the
problem by creating a shared PMD mapping (note that this must be at least
PUD_SIZE in size and PUD_SIZE aligned (1GB on x86)), and using
migrate_pages() to migrate process pages between nodes while continually
writing to the huge pages being migrated.
To fix, have the try_to_unmap_one routine check for huge PMD sharing by
calling huge_pmd_unshare for hugetlbfs huge pages. If it is a shared
mapping it will be 'unshared' which removes the page table entry and drops
the reference on the PMD page. After this, flush caches and TLB.
mmu notifiers are called before locking page tables, but we can not be
sure of PMD sharing until page tables are locked. Therefore, check for
the possibility of PMD sharing before locking so that notifiers can
prepare for the worst possible case.
Link: http://lkml.kernel.org/r/20180823205917.16297-2-mike.kravetz@oracle.com
Fixes: 39dde65c9940 ("shared page table for hugetlb page")
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov(a)linux.intel.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Naoya Horiguchi <n-horiguchi(a)ah.jp.nec.com>
Cc: Davidlohr Bueso <dave(a)stgolabs.net>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/hugetlb.h | 14 ++++++++++++
mm/hugetlb.c | 40 ++++++++++++++++++++++++++++++++++--
mm/rmap.c | 42 +++++++++++++++++++++++++++++++++++---
3 files changed, 91 insertions(+), 5 deletions(-)
--- a/include/linux/hugetlb.h~mm-migration-fix-migration-of-huge-pmd-shared-pages
+++ a/include/linux/hugetlb.h
@@ -140,6 +140,8 @@ pte_t *huge_pte_alloc(struct mm_struct *
pte_t *huge_pte_offset(struct mm_struct *mm,
unsigned long addr, unsigned long sz);
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+ unsigned long *start, unsigned long *end);
struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
int write);
struct page *follow_huge_pd(struct vm_area_struct *vma,
@@ -170,6 +172,18 @@ static inline unsigned long hugetlb_tota
return 0;
}
+static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr,
+ pte_t *ptep)
+{
+ return 0;
+}
+
+static inline void adjust_range_if_pmd_sharing_possible(
+ struct vm_area_struct *vma,
+ unsigned long *start, unsigned long *end)
+{
+}
+
#define follow_hugetlb_page(m,v,p,vs,a,b,i,w,n) ({ BUG(); 0; })
#define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL)
#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
--- a/mm/hugetlb.c~mm-migration-fix-migration-of-huge-pmd-shared-pages
+++ a/mm/hugetlb.c
@@ -4541,6 +4541,9 @@ static unsigned long page_table_shareabl
return saddr;
}
+#define _range_in_vma(vma, start, end) \
+ ((vma)->vm_start <= (start) && (end) <= (vma)->vm_end)
+
static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
{
unsigned long base = addr & PUD_MASK;
@@ -4549,13 +4552,41 @@ static bool vma_shareable(struct vm_area
/*
* check on proper vm_flags and page table alignment
*/
- if (vma->vm_flags & VM_MAYSHARE &&
- vma->vm_start <= base && end <= vma->vm_end)
+ if (vma->vm_flags & VM_MAYSHARE && _range_in_vma(vma, base, end))
return true;
return false;
}
/*
+ * Determine if start,end range within vma could be mapped by shared pmd.
+ * If yes, adjust start and end to cover range associated with possible
+ * shared pmd mappings.
+ */
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+ unsigned long *start, unsigned long *end)
+{
+ unsigned long check_addr = *start;
+
+ if (!(vma->vm_flags & VM_MAYSHARE))
+ return;
+
+ for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) {
+ unsigned long a_start = check_addr & PUD_MASK;
+ unsigned long a_end = a_start + PUD_SIZE;
+
+ /*
+ * If sharing is possible, adjust start/end if necessary.
+ */
+ if (_range_in_vma(vma, a_start, a_end)) {
+ if (a_start < *start)
+ *start = a_start;
+ if (a_end > *end)
+ *end = a_end;
+ }
+ }
+}
+
+/*
* Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
* and returns the corresponding pte. While this is not necessary for the
* !shared pmd case because we can allocate the pmd later as well, it makes the
@@ -4652,6 +4683,11 @@ int huge_pmd_unshare(struct mm_struct *m
{
return 0;
}
+
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+ unsigned long *start, unsigned long *end)
+{
+}
#define want_pmd_share() (0)
#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
--- a/mm/rmap.c~mm-migration-fix-migration-of-huge-pmd-shared-pages
+++ a/mm/rmap.c
@@ -1362,11 +1362,21 @@ static bool try_to_unmap_one(struct page
}
/*
- * We have to assume the worse case ie pmd for invalidation. Note that
- * the page can not be free in this function as call of try_to_unmap()
- * must hold a reference on the page.
+ * For THP, we have to assume the worse case ie pmd for invalidation.
+ * For hugetlb, it could be much worse if we need to do pud
+ * invalidation in the case of pmd sharing.
+ *
+ * Note that the page can not be free in this function as call of
+ * try_to_unmap() must hold a reference on the page.
*/
end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));
+ if (PageHuge(page)) {
+ /*
+ * If sharing is possible, start and end will be adjusted
+ * accordingly.
+ */
+ adjust_range_if_pmd_sharing_possible(vma, &start, &end);
+ }
mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);
while (page_vma_mapped_walk(&pvmw)) {
@@ -1409,6 +1419,32 @@ static bool try_to_unmap_one(struct page
subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
address = pvmw.address;
+ if (PageHuge(page)) {
+ if (huge_pmd_unshare(mm, &address, pvmw.pte)) {
+ /*
+ * huge_pmd_unshare unmapped an entire PMD
+ * page. There is no way of knowing exactly
+ * which PMDs may be cached for this mm, so
+ * we must flush them all. start/end were
+ * already adjusted above to cover this range.
+ */
+ flush_cache_range(vma, start, end);
+ flush_tlb_range(vma, start, end);
+ mmu_notifier_invalidate_range(mm, start, end);
+
+ /*
+ * The ref count of the PMD page was dropped
+ * which is part of the way map counting
+ * is done for shared PMDs. Return 'true'
+ * here. When there is no other sharing,
+ * huge_pmd_unshare returns false and we will
+ * unmap the actual page and drop map count
+ * to zero.
+ */
+ page_vma_mapped_walk_done(&pvmw);
+ break;
+ }
+ }
if (IS_ENABLED(CONFIG_MIGRATION) &&
(flags & TTU_MIGRATION) &&
_
Patches currently in -mm which might be from mike.kravetz(a)oracle.com are
mm-migration-fix-migration-of-huge-pmd-shared-pages.patch
hugetlb-take-pmd-sharing-into-account-when-flushing-tlb-caches.patch
We use kzalloc to allocate the write_buf that we use for
i2c transfer on hdcp write. But it seems that we are forgetting
to free the memory that is not needed after i2c transfer is
completed.
Reported-by: Brian J Wood <brian.j.wood(a)intel.com>
Fixes: 2320175feb74 ("drm/i915: Implement HDCP for HDMI")
Cc: Ramalingam C <ramalingam.c(a)intel.com>
Cc: Sean Paul <seanpaul(a)chromium.org>
Cc: Jani Nikula <jani.nikula(a)linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v4.17+
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
---
drivers/gpu/drm/i915/intel_hdmi.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 8363fbd18ee8..a1799b5c12bb 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -943,8 +943,12 @@ static int intel_hdmi_hdcp_write(struct intel_digital_port *intel_dig_port,
ret = i2c_transfer(adapter, &msg, 1);
if (ret == 1)
- return 0;
- return ret >= 0 ? -EIO : ret;
+ ret = 0;
+ else if (ret >= 0)
+ ret = -EIO;
+
+ kfree(write_buf);
+ return ret;
}
static
--
2.17.1
The patch titled
Subject: reiserfs: fix broken xattr handling (heap corruption, bad retval)
has been removed from the -mm tree. Its filename was
reiserfs-fix-broken-xattr-handling-heap-corruption-bad-retval.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Jann Horn <jannh(a)google.com>
Subject: reiserfs: fix broken xattr handling (heap corruption, bad retval)
This fixes the following issues:
- When a buffer size is supplied to reiserfs_listxattr() such that each
individual name fits, but the concatenation of all names doesn't fit,
reiserfs_listxattr() overflows the supplied buffer. This leads to a
kernel heap overflow (verified using KASAN) followed by an out-of-bounds
usercopy and is therefore a security bug.
- When a buffer size is supplied to reiserfs_listxattr() such that a
name doesn't fit, -ERANGE should be returned. But reiserfs instead just
truncates the list of names; I have verified that if the only xattr on a
file has a longer name than the supplied buffer length, listxattr()
incorrectly returns zero.
With my patch applied, -ERANGE is returned in both cases and the memory
corruption doesn't happen anymore.
Credit for making me clean this code up a bit goes to Al Viro, who pointed
out that the ->actor calling convention is suboptimal and should be
changed.
Link: http://lkml.kernel.org/r/20180802151539.5373-1-jannh@google.com
Fixes: 48b32a3553a5 ("reiserfs: use generic xattr handlers")
Signed-off-by: Jann Horn <jannh(a)google.com>
Acked-by: Jeff Mahoney <jeffm(a)suse.com>
Cc: Eric Biggers <ebiggers(a)google.com>
Cc: Al Viro <viro(a)zeniv.linux.org.uk>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/reiserfs/xattr.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
--- a/fs/reiserfs/xattr.c~reiserfs-fix-broken-xattr-handling-heap-corruption-bad-retval
+++ a/fs/reiserfs/xattr.c
@@ -792,8 +792,10 @@ static int listxattr_filler(struct dir_c
return 0;
size = namelen + 1;
if (b->buf) {
- if (size > b->size)
+ if (b->pos + size > b->size) {
+ b->pos = -ERANGE;
return -ERANGE;
+ }
memcpy(b->buf + b->pos, name, namelen);
b->buf[b->pos + namelen] = 0;
}
_
Patches currently in -mm which might be from jannh(a)google.com are
The patch titled
Subject: drivers/block/zram/zram_drv.c: fix bug storing backing_dev
has been removed from the -mm tree. Its filename was
zram-fix-bug-storing-backing_dev.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Peter Kalauskas <peskal(a)google.com>
Subject: drivers/block/zram/zram_drv.c: fix bug storing backing_dev
The call to strlcpy in backing_dev_store is incorrect. It should take
the size of the destination buffer instead of the size of the source
buffer. Additionally, ignore the newline character (\n) when reading
the new file_name buffer. This makes it possible to set the backing_dev
as follows:
echo /dev/sdX > /sys/block/zram0/backing_dev
The reason it worked before was the fact that strlcpy() copies 'len - 1'
bytes, which is strlen(buf) - 1 in our case, so it accidentally didn't
copy the trailing new line symbol. Which also means that "echo -n
/dev/sdX" most likely was broken.
Signed-off-by: Peter Kalauskas <peskal(a)google.com>
Link: http://lkml.kernel.org/r/20180813061623.GC64836@rodete-desktop-imager.corp.…
Acked-by: Minchan Kim <minchan(a)kernel.org>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky(a)gmail.com>
Cc: <stable(a)vger.kernel.org> [4.14+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
drivers/block/zram/zram_drv.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
--- a/drivers/block/zram/zram_drv.c~zram-fix-bug-storing-backing_dev
+++ a/drivers/block/zram/zram_drv.c
@@ -337,6 +337,7 @@ static ssize_t backing_dev_store(struct
struct device_attribute *attr, const char *buf, size_t len)
{
char *file_name;
+ size_t sz;
struct file *backing_dev = NULL;
struct inode *inode;
struct address_space *mapping;
@@ -357,7 +358,11 @@ static ssize_t backing_dev_store(struct
goto out;
}
- strlcpy(file_name, buf, len);
+ strlcpy(file_name, buf, PATH_MAX);
+ /* ignore trailing newline */
+ sz = strlen(file_name);
+ if (sz > 0 && file_name[sz - 1] == '\n')
+ file_name[sz - 1] = 0x00;
backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0);
if (IS_ERR(backing_dev)) {
_
Patches currently in -mm which might be from peskal(a)google.com are
The page migration code employs try_to_unmap() to try and unmap the
source page. This is accomplished by using rmap_walk to find all
vmas where the page is mapped. This search stops when page mapcount
is zero. For shared PMD huge pages, the page map count is always 1
no matter the number of mappings. Shared mappings are tracked via
the reference count of the PMD page. Therefore, try_to_unmap stops
prematurely and does not completely unmap all mappings of the source
page.
This problem can result is data corruption as writes to the original
source page can happen after contents of the page are copied to the
target page. Hence, data is lost.
This problem was originally seen as DB corruption of shared global
areas after a huge page was soft offlined due to ECC memory errors.
DB developers noticed they could reproduce the issue by (hotplug)
offlining memory used to back huge pages. A simple testcase can
reproduce the problem by creating a shared PMD mapping (note that
this must be at least PUD_SIZE in size and PUD_SIZE aligned (1GB on
x86)), and using migrate_pages() to migrate process pages between
nodes while continually writing to the huge pages being migrated.
To fix, have the try_to_unmap_one routine check for huge PMD sharing
by calling huge_pmd_unshare for hugetlbfs huge pages. If it is a
shared mapping it will be 'unshared' which removes the page table
entry and drops the reference on the PMD page. After this, flush
caches and TLB.
mmu notifiers are called before locking page tables, but we can not
be sure of PMD sharing until page tables are locked. Therefore,
check for the possibility of PMD sharing before locking so that
notifiers can prepare for the worst possible case.
Fixes: 39dde65c9940 ("shared page table for hugetlb page")
Cc: stable(a)vger.kernel.org
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
---
include/linux/hugetlb.h | 14 ++++++++++++++
mm/hugetlb.c | 40 +++++++++++++++++++++++++++++++++++++++
mm/rmap.c | 42 ++++++++++++++++++++++++++++++++++++++---
3 files changed, 93 insertions(+), 3 deletions(-)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 36fa6a2a82e3..1c6cde68487f 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -140,6 +140,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
pte_t *huge_pte_offset(struct mm_struct *mm,
unsigned long addr, unsigned long sz);
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
+bool huge_pmd_sharing_possible(struct vm_area_struct *vma,
+ unsigned long *start, unsigned long *end);
struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
int write);
struct page *follow_huge_pd(struct vm_area_struct *vma,
@@ -170,6 +172,18 @@ static inline unsigned long hugetlb_total_pages(void)
return 0;
}
+static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr,
+ pte_t *ptep)
+{
+ return 0;
+}
+
+bool huge_pmd_sharing_possible(struct vm_area_struct *vma,
+ unsigned long *start, unsigned long *end)
+{
+ return false;
+}
+
#define follow_hugetlb_page(m,v,p,vs,a,b,i,w,n) ({ BUG(); 0; })
#define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL)
#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3103099f64fd..fd155dc52117 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4555,6 +4555,9 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
/*
* check on proper vm_flags and page table alignment
+ *
+ * Note that this is the same check used in huge_pmd_sharing_possible.
+ * If you change one, consider changing both.
*/
if (vma->vm_flags & VM_MAYSHARE &&
vma->vm_start <= base && end <= vma->vm_end)
@@ -4562,6 +4565,43 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
return false;
}
+/*
+ * Determine if start,end range within vma could be mapped by shared pmd.
+ * If yes, adjust start and end to cover range associated with possible
+ * shared pmd mappings.
+ */
+bool huge_pmd_sharing_possible(struct vm_area_struct *vma,
+ unsigned long *start, unsigned long *end)
+{
+ unsigned long check_addr = *start;
+ bool ret = false;
+
+ if (!(vma->vm_flags & VM_MAYSHARE))
+ return ret;
+
+ for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) {
+ unsigned long a_start = check_addr & PUD_MASK;
+ unsigned long a_end = a_start + PUD_SIZE;
+
+ /*
+ * If sharing is possible, adjust start/end if necessary.
+ *
+ * Note that this is the same check used in vma_shareable. If
+ * you change one, consider changing both.
+ */
+ if (vma->vm_start <= a_start && a_end <= vma->vm_end) {
+ if (a_start < *start)
+ *start = a_start;
+ if (a_end > *end)
+ *end = a_end;
+
+ ret = true;
+ }
+ }
+
+ return ret;
+}
+
/*
* Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
* and returns the corresponding pte. While this is not necessary for the
diff --git a/mm/rmap.c b/mm/rmap.c
index eb477809a5c0..8cf853a4b093 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1362,11 +1362,21 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
}
/*
- * We have to assume the worse case ie pmd for invalidation. Note that
- * the page can not be free in this function as call of try_to_unmap()
- * must hold a reference on the page.
+ * For THP, we have to assume the worse case ie pmd for invalidation.
+ * For hugetlb, it could be much worse if we need to do pud
+ * invalidation in the case of pmd sharing.
+ *
+ * Note that the page can not be free in this function as call of
+ * try_to_unmap() must hold a reference on the page.
*/
end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));
+ if (PageHuge(page)) {
+ /*
+ * If sharing is possible, start and end will be adjusted
+ * accordingly.
+ */
+ (void)huge_pmd_sharing_possible(vma, &start, &end);
+ }
mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);
while (page_vma_mapped_walk(&pvmw)) {
@@ -1409,6 +1419,32 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
address = pvmw.address;
+ if (PageHuge(page)) {
+ if (huge_pmd_unshare(mm, &address, pvmw.pte)) {
+ /*
+ * huge_pmd_unshare unmapped an entire PMD
+ * page. There is no way of knowing exactly
+ * which PMDs may be cached for this mm, so
+ * we must flush them all. start/end were
+ * already adjusted above to cover this range.
+ */
+ flush_cache_range(vma, start, end);
+ flush_tlb_range(vma, start, end);
+ mmu_notifier_invalidate_range(mm, start, end);
+
+ /*
+ * The ref count of the PMD page was dropped
+ * which is part of the way map counting
+ * is done for shared PMDs. Return 'true'
+ * here. When there is no other sharing,
+ * huge_pmd_unshare returns false and we will
+ * unmap the actual page and drop map count
+ * to zero.
+ */
+ page_vma_mapped_walk_done(&pvmw);
+ break;
+ }
+ }
if (IS_ENABLED(CONFIG_MIGRATION) &&
(flags & TTU_MIGRATION) &&
--
2.17.1
This patch requires that /sbin/depmod is installed and installable on
the build host.
But not all build hosts for cross compiling Linux are Linux systems
and are able to provide a working port of depmod, especially at the
file patch /sbin/depmod.
I use, for example, a Darwin system to cross compile Linux and I run
depmod -a on the embedded system once, after installing a new Linux
kernel there.
I have no problem with seeing a warning, but aborting the build process
is IMHO a bad idea since the previous behaviour didn't harm many people
as far as I see. Probably 99% of people compiling Linux kernels do that
on Linux and 99% of those have depmod installed for optimal operation of
their build host. So IMHO printing the warning is good enough.
BR and thanks,
Nikolaus Schaller
On Thu, Aug 23, 2018 at 09:38:31AM -0400, Scott French wrote:
> Please remove the stfrench @ gmail address. I am not Steve
Now removed, sorry for the noise.
greg k-h
Commit 5769beaf180a8 ("powerpc/mm: Add proper pte access check helper
for other platforms") replaced generic pte_access_permitted() by an
arch specific one.
The generic one is defined as
(pte_present(pte) && (!(write) || pte_write(pte)))
The arch specific one is open coded checking that _PAGE_USER and
_PAGE_WRITE (_PAGE_RW) flags are set, but lacking to check that
_PAGE_RO and _PAGE_PRIVILEGED are unset, leading to a useless test
on targets like the 8xx which defines _PAGE_RW and _PAGE_USER as 0.
Commit 5fa5b16be5b31 ("powerpc/mm/hugetlb: Use pte_access_permitted
for hugetlb access check") replaced some tests performed with
pte helpers by a call to pte_access_permitted(), leading to the same
issue.
This patch rewrites powerpc/nohash pte_access_permitted()
using pte helpers.
Fixes: 5769beaf180a8 ("powerpc/mm: Add proper pte access check helper for other platforms")
Fixes: 5fa5b16be5b31 ("powerpc/mm/hugetlb: Use pte_access_permitted for hugetlb access check")
Cc: stable(a)vger.kernel.org # v4.15+
Signed-off-by: Christophe Leroy <christophe.leroy(a)c-s.fr>
---
arch/powerpc/include/asm/nohash/pgtable.h | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
index 2160be2e4339..b321c82b3624 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -51,17 +51,14 @@ static inline int pte_present(pte_t pte)
#define pte_access_permitted pte_access_permitted
static inline bool pte_access_permitted(pte_t pte, bool write)
{
- unsigned long pteval = pte_val(pte);
/*
* A read-only access is controlled by _PAGE_USER bit.
* We have _PAGE_READ set for WRITE and EXECUTE
*/
- unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_USER;
-
- if (write)
- need_pte_bits |= _PAGE_WRITE;
+ if (!pte_present(pte) || !pte_user(pte) || !pte_read(pte))
+ return false;
- if ((pteval & need_pte_bits) != need_pte_bits)
+ if (write && !pte_write(pte))
return false;
return true;
--
2.13.3
Hi Greg,
This patch is not marked for 4.4-stable, but it's already in 4.9 and 4.14 stable.
Please apply to 4.4-stable.
This patch turned the status from error to warning if d_type is not supported,
and thus operation won't be interrupted.
--
SZ Lin (林上智)
Hi Greg,
This patch is not marked for 4.4-stable, but it's already in 4.9 and 4.14 stable.
Please apply to 4.4-stable.
This patch fixed check machanism for d_type to avoid returning d_type not
supported even if underlying filesystem might be supporting it.
--
SZ Lin (林上智)
Hi Greg,
This patch is not marked for 4.4-stable, but it's already in 4.9 and 4.14 stable.
Please apply to 4.4-stable.
This patch added a check mechanism for d_type in upper layer of overlayfs to
avoid whiteouts issue.
--
SZ Lin (林上智)
Commit d70f2a14b72a4 ("include/linux/sched/mm.h: uninline
mmdrop_async(), etc") ignored the return value of arch_dup_mmap(). As a
result, on x86, a failure to duplicate the LDT (e.g., due to memory
allocation error), would leave the duplicated memory mapping in an
inconsistent state.
Fix by regarding the return value, as it was before the change.
Fixes: d70f2a14b72a4 ("include/linux/sched/mm.h: uninline mmdrop_async(), etc")
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: stable(a)vger.kernel.org
Signed-off-by: Nadav Amit <namit(a)vmware.com>
---
kernel/fork.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/kernel/fork.c b/kernel/fork.c
index 1b27babc4c78..4527d1d331de 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -549,8 +549,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
goto out;
}
/* a new mm has just been created */
- arch_dup_mmap(oldmm, mm);
- retval = 0;
+ retval = arch_dup_mmap(oldmm, mm);
out:
up_write(&mm->mmap_sem);
flush_tlb_mm(oldmm);
--
2.17.1
Use the new of_get_compatible_child() helper to lookup the usb sibling
node instead of using of_find_compatible_node(), which searches the
entire tree and thus can return an unrelated (non-sibling) node.
This also addresses a potential use-after-free (e.g. after probe
deferral) as the tree-wide helper drops a reference to its first
argument (i.e. the parent device node).
While at it, also fix the related phy-node reference leak.
Fixes: f5e4edb8c888 ("power: twl4030_charger: find associated phy by more reliable means.")
Cc: stable <stable(a)vger.kernel.org> # 4.2
Cc: NeilBrown <neilb(a)suse.de>
Cc: Felipe Balbi <balbi(a)ti.com>
Cc: Sebastian Reichel <sre(a)kernel.org>
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
drivers/power/supply/twl4030_charger.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/power/supply/twl4030_charger.c b/drivers/power/supply/twl4030_charger.c
index bbcaee56db9d..b6a7d9f74cf3 100644
--- a/drivers/power/supply/twl4030_charger.c
+++ b/drivers/power/supply/twl4030_charger.c
@@ -996,12 +996,13 @@ static int twl4030_bci_probe(struct platform_device *pdev)
if (bci->dev->of_node) {
struct device_node *phynode;
- phynode = of_find_compatible_node(bci->dev->of_node->parent,
- NULL, "ti,twl4030-usb");
+ phynode = of_get_compatible_child(bci->dev->of_node->parent,
+ "ti,twl4030-usb");
if (phynode) {
bci->usb_nb.notifier_call = twl4030_bci_usb_ncb;
bci->transceiver = devm_usb_get_phy_by_node(
bci->dev, phynode, &bci->usb_nb);
+ of_node_put(phynode);
if (IS_ERR(bci->transceiver)) {
ret = PTR_ERR(bci->transceiver);
if (ret == -EPROBE_DEFER)
--
2.18.0
Various mips64 and ppc64 qemu tests crash as follows
in v4.14.y and v4.17.y (the log is from ppc64).
------------[ cut here ]------------
kernel BUG at kernel/time/hrtimer.c:1673!
Oops: Exception in kernel mode, sig: 5 [#1]
BE NUMA CoreNet Generic
Modules linked in:
CPU: 0 PID: 1 Comm: init Not tainted 4.17.19-rc1-00309-g8fe1830 #1
NIP: c000000000085d6c LR: c00000000089d840 CTR: c00000000000cd00
REGS: c00000003e1e7990 TRAP: 0700 Not tainted (4.17.19-rc1-00309-g8fe1830)
MSR: 000000008002b000 <CE,EE,FP,ME> CR: 48000284 XER: 00000000
SOFTE: 0
GPR00: c00000000089d7ec c00000003e1e7c10 c000000000cb9c00 c00000003e1e8238
GPR04: c00000003e1e7c80 ffffffffffffffff 000000003b9aca00 0000000000000000
GPR08: 0000000031012c01 0000000031012c01 0000000000000002 0000000031012c01
GPR12: 0000000028000482 c000000000d35000 0000000000000000 0000000000000000
GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
GPR24: 0000000000000000 0000000000000000 0000000000000016 00000000ffff9008
GPR28: c00000003e1e7e10 c00000003e1e8000 0000000000000000 000000009336eabb
NIP [c000000000085d6c] .nanosleep_copyout+0x4c/0x50
LR [c00000000089d840] .do_nanosleep+0x160/0x190
Call Trace:
[c00000003e1e7c10] [c00000000089d7ec] .do_nanosleep+0x10c/0x190 (unreliable)
[c00000003e1e7cc0] [c000000000085e78] .hrtimer_nanosleep+0x108/0x1d0
[c00000003e1e7da0] [c000000000086068] .__se_compat_sys_nanosleep+0x78/0xb0
[c00000003e1e7e30] [c000000000000618] system_call+0x58/0x64
Instruction dump:
7c832378 e8890010 4bffbadd 60000000 38210070 e8010010 2fa30000 3940fff2
3860fdfc 7c63579e 7c0803a6 4e800020 <0fe00000> 7c0802a6 fb81ffe0 fbc1fff0
---[ end trace 15c7fbc119007c42 ]---
I started to bisect, but abandoned it after finding commit 62d7ce7f40a9
("posix-timers: Fix nanosleep_copyout() for CONFIG_COMPAT_32BIT_TIME")
in both branches. Since there is no "config COMPAT_32BIT_TIME" in v4.14.y
or v4.17.y, some relevant code is commented out by the commit, which in
turn results in the crash.
Guenter
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 1204e35bedf4e5015cda559ed8c84789a6dae24e Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas(a)wunner.de>
Date: Thu, 19 Jul 2018 17:27:34 -0500
Subject: [PATCH] PCI: pciehp: Fix unprotected list iteration in IRQ handler
Commit b440bde74f04 ("PCI: Add pci_ignore_hotplug() to ignore hotplug
events for a device") iterates over the devices on a hotplug port's
subordinate bus in pciehp's IRQ handler without acquiring pci_bus_sem.
It is thus possible for a user to cause a crash by concurrently
manipulating the device list, e.g. by disabling slot power via sysfs
on a different CPU or by initiating a remove/rescan via sysfs.
This can't be fixed by acquiring pci_bus_sem because it may sleep.
The simplest fix is to avoid the list iteration altogether and just
check the ignore_hotplug flag on the port itself. This works because
pci_ignore_hotplug() sets the flag both on the device as well as on its
parent bridge.
We do lose the ability to print the name of the device blocking hotplug
in the debug message, but that's probably bearable.
Fixes: b440bde74f04 ("PCI: Add pci_ignore_hotplug() to ignore hotplug events for a device")
Signed-off-by: Lukas Wunner <lukas(a)wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas(a)google.com>
Cc: stable(a)vger.kernel.org
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 84b3d421c083..aff191b4552c 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -539,8 +539,6 @@ static irqreturn_t pciehp_isr(int irq, void *dev_id)
{
struct controller *ctrl = (struct controller *)dev_id;
struct pci_dev *pdev = ctrl_dev(ctrl);
- struct pci_bus *subordinate = pdev->subordinate;
- struct pci_dev *dev;
struct slot *slot = ctrl->slot;
u16 status, events;
u8 present;
@@ -588,14 +586,9 @@ static irqreturn_t pciehp_isr(int irq, void *dev_id)
wake_up(&ctrl->queue);
}
- if (subordinate) {
- list_for_each_entry(dev, &subordinate->devices, bus_list) {
- if (dev->ignore_hotplug) {
- ctrl_dbg(ctrl, "ignoring hotplug event %#06x (%s requested no hotplug)\n",
- events, pci_name(dev));
- return IRQ_HANDLED;
- }
- }
+ if (pdev->ignore_hotplug) {
+ ctrl_dbg(ctrl, "ignoring hotplug event %#06x\n", events);
+ return IRQ_HANDLED;
}
/* Check Attention Button Pressed */
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 91a2968e245d6ba616db37001fa1a043078b1a65 Mon Sep 17 00:00:00 2001
From: Zachary Zhang <zhangzg(a)marvell.com>
Date: Fri, 29 Jun 2018 11:16:19 +0200
Subject: [PATCH] PCI: aardvark: Size bridges before resources allocation
The PCIE I/O and MEM resource allocation mechanism is that root bus
goes through the following steps:
1. Check PCI bridges' range and computes I/O and Mem base/limits.
2. Sort all subordinate devices I/O and MEM resource requirements and
allocate the resources and writes/updates subordinate devices'
requirements to PCI bridges I/O and Mem MEM/limits registers.
Currently, PCI Aardvark driver only handles the second step and lacks
the first step, so there is an I/O and MEM resource allocation failure
when using a PCI switch. This commit fixes that by sizing bridges
before doing the resource allocation.
Fixes: 8c39d710363c1 ("PCI: aardvark: Add Aardvark PCI host controller
driver")
Signed-off-by: Zachary Zhang <zhangzg(a)marvell.com>
[Thomas: edit commit log.]
Signed-off-by: Thomas Petazzoni <thomas.petazzoni(a)bootlin.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi(a)arm.com>
Cc: <stable(a)vger.kernel.org>
diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c
index c9c72595bd20..10543ed7b500 100644
--- a/drivers/pci/controller/pci-aardvark.c
+++ b/drivers/pci/controller/pci-aardvark.c
@@ -906,6 +906,7 @@ static int advk_pcie_probe(struct platform_device *pdev)
bus = bridge->bus;
+ pci_bus_size_bridges(bus);
pci_bus_assign_resources(bus);
list_for_each_entry(child, &bus->children, node)
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 91a2968e245d6ba616db37001fa1a043078b1a65 Mon Sep 17 00:00:00 2001
From: Zachary Zhang <zhangzg(a)marvell.com>
Date: Fri, 29 Jun 2018 11:16:19 +0200
Subject: [PATCH] PCI: aardvark: Size bridges before resources allocation
The PCIE I/O and MEM resource allocation mechanism is that root bus
goes through the following steps:
1. Check PCI bridges' range and computes I/O and Mem base/limits.
2. Sort all subordinate devices I/O and MEM resource requirements and
allocate the resources and writes/updates subordinate devices'
requirements to PCI bridges I/O and Mem MEM/limits registers.
Currently, PCI Aardvark driver only handles the second step and lacks
the first step, so there is an I/O and MEM resource allocation failure
when using a PCI switch. This commit fixes that by sizing bridges
before doing the resource allocation.
Fixes: 8c39d710363c1 ("PCI: aardvark: Add Aardvark PCI host controller
driver")
Signed-off-by: Zachary Zhang <zhangzg(a)marvell.com>
[Thomas: edit commit log.]
Signed-off-by: Thomas Petazzoni <thomas.petazzoni(a)bootlin.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi(a)arm.com>
Cc: <stable(a)vger.kernel.org>
diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c
index c9c72595bd20..10543ed7b500 100644
--- a/drivers/pci/controller/pci-aardvark.c
+++ b/drivers/pci/controller/pci-aardvark.c
@@ -906,6 +906,7 @@ static int advk_pcie_probe(struct platform_device *pdev)
bus = bridge->bus;
+ pci_bus_size_bridges(bus);
pci_bus_assign_resources(bus);
list_for_each_entry(child, &bus->children, node)
The patch below does not apply to the 4.17-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 91a2968e245d6ba616db37001fa1a043078b1a65 Mon Sep 17 00:00:00 2001
From: Zachary Zhang <zhangzg(a)marvell.com>
Date: Fri, 29 Jun 2018 11:16:19 +0200
Subject: [PATCH] PCI: aardvark: Size bridges before resources allocation
The PCIE I/O and MEM resource allocation mechanism is that root bus
goes through the following steps:
1. Check PCI bridges' range and computes I/O and Mem base/limits.
2. Sort all subordinate devices I/O and MEM resource requirements and
allocate the resources and writes/updates subordinate devices'
requirements to PCI bridges I/O and Mem MEM/limits registers.
Currently, PCI Aardvark driver only handles the second step and lacks
the first step, so there is an I/O and MEM resource allocation failure
when using a PCI switch. This commit fixes that by sizing bridges
before doing the resource allocation.
Fixes: 8c39d710363c1 ("PCI: aardvark: Add Aardvark PCI host controller
driver")
Signed-off-by: Zachary Zhang <zhangzg(a)marvell.com>
[Thomas: edit commit log.]
Signed-off-by: Thomas Petazzoni <thomas.petazzoni(a)bootlin.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi(a)arm.com>
Cc: <stable(a)vger.kernel.org>
diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c
index c9c72595bd20..10543ed7b500 100644
--- a/drivers/pci/controller/pci-aardvark.c
+++ b/drivers/pci/controller/pci-aardvark.c
@@ -906,6 +906,7 @@ static int advk_pcie_probe(struct platform_device *pdev)
bus = bridge->bus;
+ pci_bus_size_bridges(bus);
pci_bus_assign_resources(bus);
list_for_each_entry(child, &bus->children, node)
Hi Greg,
Kindly consider/review following net/sched fixes for stable 4.4.y.
This patchset is a follow-up of upstream fix
87b60cfacf9f ("net_sched: fix error recovery at qdisc creation")
cherry-picked on stable 4.4.y.
It fix null pointer dereferences due to uninitialized timer
(qdisc watchdog) or double frees due to ->destroy cleaning up a
second time. Here is the original submission
https://www.mail-archive.com/netdev@vger.kernel.org/msg186003.html
Cherry-picked and build tested on Linux 4.4.151 for ARCH=x86_64.
Regards,
Amit Pundir
Change since v1:
Rebased "sch_multiq: fix double free on init failure" patch
and fixed "unused variable" build warning.
Nikolay Aleksandrov (5):
sch_htb: fix crash on init failure
sch_multiq: fix double free on init failure
sch_hhf: fix null pointer dereference on init failure
sch_netem: avoid null pointer deref on init failure
sch_tbf: fix two null pointer dereferences on init failure
net/sched/sch_hhf.c | 3 +++
net/sched/sch_htb.c | 5 +++--
net/sched/sch_multiq.c | 9 ++-------
net/sched/sch_netem.c | 4 ++--
net/sched/sch_tbf.c | 5 +++--
5 files changed, 13 insertions(+), 13 deletions(-)
--
2.7.4
We need that to adjust the len of the 2nd transfer (called data in
spi-mem) if it's too long to fit in a SPI message or SPI transfer.
Fixes: c36ff266dc82 ("spi: Extend the core to ease integration of SPI memory controllers")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Chuanhua Han <chuanhua.han(a)nxp.com>
Suggested-by: Boris Brezillon <boris.brezillon(a)bootlin.com>
---
Changes in v5:
-Add the validation check after the op->data.nbytes assignment
-Assign the "len" variable after defining it
-Remove the brackets on both sides of "opt-> data.nbytes"
Changes in v4:
-Rename variable name "opcode_addr_dummy_sum" to "len".
-The comparison of "spi_max_message_size(mem->spi)" and "len" was removed.
-Adjust their order when comparing the sizes of "spi_max_message_size(mem->spi)" and "len"
-Changing the "unsigned long" type in the code to "size_t"
Changes in v3:
-Rename variable name "val" to "opcode_addr_dummy_sum".
-Place the legitimacy of the transfer size(i.e., "spi_max_message_size(mem->spi)" and
-"opcode_addr_dummy_sum") into "if (! ctlr - > mem_ops | |! ctlr-> mem_ops->exec_op) {"
structure and add "spi_max_transfer_size(mem->spi) and opcode_addr_dummy_sum".
-Adjust the formatting alignment of the code.
-"(unsigned long)op->data.nbytes" was modified to "(unsigned long)(op->data.nbytes)".
Changes in v2:
-Place the adjusted transfer bytes code in spi_mem_adjust_op_size() and check
spi_max_message_size(mem->spi) value before subtracting opcode, addr and dummy bytes.
-Change the code from fsl-espi controller to generic code(The adjustment of spi transmission
length was originally modified in the "drivers/spi/spi-fsl-espi.c" file, and now the adjustment
of transfer length is made in the "drivers/spi/spi-mem.c" file)
drivers/spi/spi-mem.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c
index 990770d..6184fa1 100644
--- a/drivers/spi/spi-mem.c
+++ b/drivers/spi/spi-mem.c
@@ -328,10 +328,26 @@ EXPORT_SYMBOL_GPL(spi_mem_exec_op);
int spi_mem_adjust_op_size(struct spi_mem *mem, struct spi_mem_op *op)
{
struct spi_controller *ctlr = mem->spi->controller;
+ size_t len;
+
+ len = sizeof(op->cmd.opcode) + op->addr.nbytes + op->dummy.nbytes;
if (ctlr->mem_ops && ctlr->mem_ops->adjust_op_size)
return ctlr->mem_ops->adjust_op_size(mem, op);
+ if (!ctlr->mem_ops || !ctlr->mem_ops->exec_op) {
+ if (len > spi_max_transfer_size(mem->spi))
+ return -EINVAL;
+
+ op->data.nbytes = min3((size_t)op->data.nbytes,
+ spi_max_transfer_size(mem->spi),
+ spi_max_message_size(mem->spi) -
+ len);
+
+ if (!op->data.nbytes)
+ return -EINVAL;
+ }
+
return 0;
}
EXPORT_SYMBOL_GPL(spi_mem_adjust_op_size);
--
2.7.4
From: Shan Hai <shan.hai(a)oracle.com>
The writeback thread would exit with a lock held when the cache device is
detached via sysfs interface, fix it by releasing the held lock before exiting
the while-loop.
Fixes: fadd94e05c02 (bcache: quit dc->writeback_thread when BCACHE_DEV_DETACHING is set)
Signed-off-by: Shan Hai <shan.hai(a)oracle.com>
Signed-off-by: Coly Li <colyli(a)suse.de>
Tested-by: Shenghui Wang <shhuiw(a)foxmail.com>
Cc: stable(a)vger.kernel.org #4.17+
---
Changelog:
v2: add Fixes tag by Coly Li.
v1: initial patch from Shan Hai.
drivers/md/bcache/writeback.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 6be05bd7ca67..08c3a9f9676c 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -685,8 +685,10 @@ static int bch_writeback_thread(void *arg)
* data on cache. BCACHE_DEV_DETACHING flag is set in
* bch_cached_dev_detach().
*/
- if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
+ if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) {
+ up_write(&dc->writeback_lock);
break;
+ }
}
up_write(&dc->writeback_lock);
--
2.18.0
Two bug fixes:
1) missing entries in the l1d_param array; this can cause a host crash
if an access attempts to reach the missing entry. Future-proof the get
function against any overflows as well. However, the two entries
VMENTER_L1D_FLUSH_EPT_DISABLED and VMENTER_L1D_FLUSH_NOT_REQUIRED must
not be accepted by the parse function, so disable them there.
2) invalid values must be rejected even if the CPU does not have the
bug, so test for them before checking boot_cpu_has(X86_BUG_L1TF)
... and a small refactoring, since the .cmd field is redundant with
the index in the array.
Reported-by: Bandan Das <bsd(a)redhat.com>
Cc: stable(a)vger.kernel.org
Fixes: a7b9020b06ec6d7c3f3b0d4ef1a9eba12654f4f7
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
---
arch/x86/kvm/vmx.c | 26 ++++++++++++++++----------
1 file changed, 16 insertions(+), 10 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c76ca8c4befa..8dae47e7267a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -198,12 +198,14 @@
static const struct {
const char *option;
- enum vmx_l1d_flush_state cmd;
+ bool for_parse;
} vmentry_l1d_param[] = {
- {"auto", VMENTER_L1D_FLUSH_AUTO},
- {"never", VMENTER_L1D_FLUSH_NEVER},
- {"cond", VMENTER_L1D_FLUSH_COND},
- {"always", VMENTER_L1D_FLUSH_ALWAYS},
+ [VMENTER_L1D_FLUSH_AUTO] = {"auto", true},
+ [VMENTER_L1D_FLUSH_NEVER] = {"never", true},
+ [VMENTER_L1D_FLUSH_COND] = {"cond", true},
+ [VMENTER_L1D_FLUSH_ALWAYS] = {"always", true},
+ [VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false},
+ [VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false},
};
#define L1D_CACHE_ORDER 4
@@ -287,8 +289,9 @@ static int vmentry_l1d_flush_parse(const char *s)
if (s) {
for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) {
- if (sysfs_streq(s, vmentry_l1d_param[i].option))
- return vmentry_l1d_param[i].cmd;
+ if (vmentry_l1d_param[i].for_parse &&
+ sysfs_streq(s, vmentry_l1d_param[i].option))
+ return i;
}
}
return -EINVAL;
@@ -298,13 +301,13 @@ static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
{
int l1tf, ret;
- if (!boot_cpu_has(X86_BUG_L1TF))
- return 0;
-
l1tf = vmentry_l1d_flush_parse(s);
if (l1tf < 0)
return l1tf;
+ if (!boot_cpu_has(X86_BUG_L1TF))
+ return 0;
+
/*
* Has vmx_init() run already? If not then this is the pre init
* parameter parsing. In that case just store the value and let
@@ -324,6 +327,9 @@ static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
{
+ if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param)))
+ return sprintf(s, "???\n");
+
return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
}
--
1.8.3.1
v4.4.y, v4.9.y:
Building mips:cavium_octeon_defconfig ... failed
--------------
Error log:
/opt/buildbot/slave/stable-queue-4.4/build/drivers/net/ethernet/octeon/octeon_mgmt.c: In function 'octeon_mgmt_change_mtu':
/opt/buildbot/slave/stable-queue-4.4/build/drivers/net/ethernet/octeon/octeon_mgmt.c:652:6: error: 'size_without_fcs' undeclared
v4.9.y, v4.14.y:
Building i386:tools/perf ... failed
Building x86_64:tools/perf ... failed
--------------
Error log:
PERF_VERSION = 4.9.123.g5175d5
tests/parse-events.c: In function ‘test_event’:
tests/parse-events.c:1681:3: error: implicit declaration of function ‘parse_events_print_error’ [-Werror=implicit-function-declaration]
parse_events_print_error(&err, e->name);
^
tests/parse-events.c:1681:3: error: nested extern declaration of ‘parse_events_print_error’
This is just a snapshot; builds are still ongoing. I'll send another e-mail later
if more errors are reported after the build is complete.
Guenter
From: Shan Hai <shan.hai(a)oracle.com>
The writeback thread would exit with a lock held when the cache device is
detached via sysfs interface, fix it by releasing the held lock before exiting
the while-loop.
Signed-off-by: Shan Hai <shan.hai(a)oracle.com>
Signed-off-by: Coly Li <colyli(a)suse.de>
Tested-by: Shenghui Wang <shhuiw(a)foxmail.com>
Cc: stable(a)vger.kernel.org #4.17+
---
drivers/md/bcache/writeback.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 6be05bd7ca67..08c3a9f9676c 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -685,8 +685,10 @@ static int bch_writeback_thread(void *arg)
* data on cache. BCACHE_DEV_DETACHING flag is set in
* bch_cached_dev_detach().
*/
- if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
+ if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) {
+ up_write(&dc->writeback_lock);
break;
+ }
}
up_write(&dc->writeback_lock);
--
2.18.0
'type' is user-controlled, so sanitize it after the bounds check to
avoid using it in speculative execution. This covers the following
potential gadgets detected with the help of smatch:
* fs/ext4/super.c:5741 ext4_quota_read() warn: potential spectre issue
'sb_dqopt(sb)->files' [r]
* fs/ext4/super.c:5778 ext4_quota_write() warn: potential spectre issue
'sb_dqopt(sb)->files' [r]
* fs/f2fs/super.c:1552 f2fs_quota_read() warn: potential spectre issue
'sb_dqopt(sb)->files' [r]
* fs/f2fs/super.c:1608 f2fs_quota_write() warn: potential spectre issue
'sb_dqopt(sb)->files' [r]
* fs/quota/dquot.c:412 mark_info_dirty() warn: potential spectre issue
'sb_dqopt(sb)->info' [w]
* fs/quota/dquot.c:933 dqinit_needed() warn: potential spectre issue
'dquots' [r]
* fs/quota/dquot.c:2112 dquot_commit_info() warn: potential spectre
issue 'dqopt->ops' [r]
* fs/quota/dquot.c:2362 vfs_load_quota_inode() warn: potential spectre
issue 'dqopt->files' [w] (local cap)
* fs/quota/dquot.c:2369 vfs_load_quota_inode() warn: potential spectre
issue 'dqopt->ops' [w] (local cap)
* fs/quota/dquot.c:2370 vfs_load_quota_inode() warn: potential spectre
issue 'dqopt->info' [w] (local cap)
* fs/quota/quota.c:110 quota_getfmt() warn: potential spectre issue
'sb_dqopt(sb)->info' [r]
* fs/quota/quota_v2.c:84 v2_check_quota_file() warn: potential spectre
issue 'quota_magics' [w]
* fs/quota/quota_v2.c:85 v2_check_quota_file() warn: potential spectre
issue 'quota_versions' [w]
* fs/quota/quota_v2.c:96 v2_read_file_info() warn: potential spectre
issue 'dqopt->info' [r]
* fs/quota/quota_v2.c:172 v2_write_file_info() warn: potential spectre
issue 'dqopt->info' [r]
Additionally, a quick inspection indicates there are array accesses with
'type' in quota_on() and quota_off() functions which are also addressed
by this.
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Jeremy Cline <jcline(a)redhat.com>
---
This patch isn't going to cleanly apply to stable without the "fs/quota:
Replace XQM_MAXQUOTAS usage with MAXQUOTAS" patch, but I'm not sure that
patch is really stable material and XQM_MAXQUOTAS has been 3 since
pre-v4.4 so the end result will be the same even if that patch isn't
backported.
fs/quota/quota.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index d403392d8a0f..f0cbf58ad4da 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -18,6 +18,7 @@
#include <linux/quotaops.h>
#include <linux/types.h>
#include <linux/writeback.h>
+#include <linux/nospec.h>
static int check_quotactl_permission(struct super_block *sb, int type, int cmd,
qid_t id)
@@ -701,6 +702,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
if (type >= MAXQUOTAS)
return -EINVAL;
+ type = array_index_nospec(type, MAXQUOTAS);
/*
* Quota not supported on this fs? Check this before s_quota_types
* since they needn't be set if quota is not supported at all.
--
2.17.1
Use the new of_get_compatible_child() helper to lookup the legacy
pwrlevels child node instead of using of_find_compatible_node(), which
searches the entire tree and thus can return an unrelated (i.e.
non-child) node.
This also addresses a potential use-after-free (e.g. after probe
deferral) as the tree-wide helper drops a reference to its first
argument (i.e. the probed device's node).
While at it, also fix the related child-node reference leak.
Fixes: e2af8b6b0ca1 ("drm/msm: gpu: Use OPP tables if we can")
Cc: stable <stable(a)vger.kernel.org> # 4.12
Cc: Jordan Crouse <jcrouse(a)codeaurora.org>
Cc: Rob Clark <robdclark(a)gmail.com>
Cc: David Airlie <airlied(a)linux.ie>
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
drivers/gpu/drm/msm/adreno/adreno_gpu.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index da1363a0c54d..93d70f4a2154 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -633,8 +633,7 @@ static int adreno_get_legacy_pwrlevels(struct device *dev)
struct device_node *child, *node;
int ret;
- node = of_find_compatible_node(dev->of_node, NULL,
- "qcom,gpu-pwrlevels");
+ node = of_get_compatible_child(dev->of_node, "qcom,gpu-pwrlevels");
if (!node) {
dev_err(dev, "Could not find the GPU powerlevels\n");
return -ENXIO;
@@ -655,6 +654,8 @@ static int adreno_get_legacy_pwrlevels(struct device *dev)
dev_pm_opp_add(dev, val, 0);
}
+ of_node_put(node);
+
return 0;
}
--
2.18.0
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 3b885ac1dc35b87a39ee176a6c7e2af9c789d8b8 Mon Sep 17 00:00:00 2001
From: John David Anglin <dave.anglin(a)bell.net>
Date: Sun, 12 Aug 2018 16:31:17 -0400
Subject: [PATCH] parisc: Remove unnecessary barriers from spinlock.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Now that mb() is an instruction barrier, it will slow performance if we issue
unnecessary barriers.
The spinlock defines have a number of unnecessary barriers. The __ldcw()
define is both a hardware and compiler barrier. The mb() barriers in the
routines using __ldcw() serve no purpose.
The only barrier needed is the one in arch_spin_unlock(). We need to ensure
all accesses are complete prior to releasing the lock.
Signed-off-by: John David Anglin <dave.anglin(a)bell.net>
Cc: stable(a)vger.kernel.org # 4.0+
Signed-off-by: Helge Deller <deller(a)gmx.de>
diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
index 6f84b6acc86e..8a63515f03bf 100644
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -20,7 +20,6 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *x,
{
volatile unsigned int *a;
- mb();
a = __ldcw_align(x);
while (__ldcw(a) == 0)
while (*a == 0)
@@ -30,17 +29,16 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *x,
local_irq_disable();
} else
cpu_relax();
- mb();
}
#define arch_spin_lock_flags arch_spin_lock_flags
static inline void arch_spin_unlock(arch_spinlock_t *x)
{
volatile unsigned int *a;
- mb();
+
a = __ldcw_align(x);
- *a = 1;
mb();
+ *a = 1;
}
static inline int arch_spin_trylock(arch_spinlock_t *x)
@@ -48,10 +46,8 @@ static inline int arch_spin_trylock(arch_spinlock_t *x)
volatile unsigned int *a;
int ret;
- mb();
a = __ldcw_align(x);
ret = __ldcw(a) != 0;
- mb();
return ret;
}
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 3b885ac1dc35b87a39ee176a6c7e2af9c789d8b8 Mon Sep 17 00:00:00 2001
From: John David Anglin <dave.anglin(a)bell.net>
Date: Sun, 12 Aug 2018 16:31:17 -0400
Subject: [PATCH] parisc: Remove unnecessary barriers from spinlock.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Now that mb() is an instruction barrier, it will slow performance if we issue
unnecessary barriers.
The spinlock defines have a number of unnecessary barriers. The __ldcw()
define is both a hardware and compiler barrier. The mb() barriers in the
routines using __ldcw() serve no purpose.
The only barrier needed is the one in arch_spin_unlock(). We need to ensure
all accesses are complete prior to releasing the lock.
Signed-off-by: John David Anglin <dave.anglin(a)bell.net>
Cc: stable(a)vger.kernel.org # 4.0+
Signed-off-by: Helge Deller <deller(a)gmx.de>
diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
index 6f84b6acc86e..8a63515f03bf 100644
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -20,7 +20,6 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *x,
{
volatile unsigned int *a;
- mb();
a = __ldcw_align(x);
while (__ldcw(a) == 0)
while (*a == 0)
@@ -30,17 +29,16 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *x,
local_irq_disable();
} else
cpu_relax();
- mb();
}
#define arch_spin_lock_flags arch_spin_lock_flags
static inline void arch_spin_unlock(arch_spinlock_t *x)
{
volatile unsigned int *a;
- mb();
+
a = __ldcw_align(x);
- *a = 1;
mb();
+ *a = 1;
}
static inline int arch_spin_trylock(arch_spinlock_t *x)
@@ -48,10 +46,8 @@ static inline int arch_spin_trylock(arch_spinlock_t *x)
volatile unsigned int *a;
int ret;
- mb();
a = __ldcw_align(x);
ret = __ldcw(a) != 0;
- mb();
return ret;
}
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 3b885ac1dc35b87a39ee176a6c7e2af9c789d8b8 Mon Sep 17 00:00:00 2001
From: John David Anglin <dave.anglin(a)bell.net>
Date: Sun, 12 Aug 2018 16:31:17 -0400
Subject: [PATCH] parisc: Remove unnecessary barriers from spinlock.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Now that mb() is an instruction barrier, it will slow performance if we issue
unnecessary barriers.
The spinlock defines have a number of unnecessary barriers. The __ldcw()
define is both a hardware and compiler barrier. The mb() barriers in the
routines using __ldcw() serve no purpose.
The only barrier needed is the one in arch_spin_unlock(). We need to ensure
all accesses are complete prior to releasing the lock.
Signed-off-by: John David Anglin <dave.anglin(a)bell.net>
Cc: stable(a)vger.kernel.org # 4.0+
Signed-off-by: Helge Deller <deller(a)gmx.de>
diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
index 6f84b6acc86e..8a63515f03bf 100644
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -20,7 +20,6 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *x,
{
volatile unsigned int *a;
- mb();
a = __ldcw_align(x);
while (__ldcw(a) == 0)
while (*a == 0)
@@ -30,17 +29,16 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *x,
local_irq_disable();
} else
cpu_relax();
- mb();
}
#define arch_spin_lock_flags arch_spin_lock_flags
static inline void arch_spin_unlock(arch_spinlock_t *x)
{
volatile unsigned int *a;
- mb();
+
a = __ldcw_align(x);
- *a = 1;
mb();
+ *a = 1;
}
static inline int arch_spin_trylock(arch_spinlock_t *x)
@@ -48,10 +46,8 @@ static inline int arch_spin_trylock(arch_spinlock_t *x)
volatile unsigned int *a;
int ret;
- mb();
a = __ldcw_align(x);
ret = __ldcw(a) != 0;
- mb();
return ret;
}
Use the new of_get_compatible_child() helper to lookup the nfc child
node instead of using of_find_compatible_node(), which searches the
entire tree and thus can return an unrelated (i.e. non-child) node.
This also addresses a potential use-after-free (e.g. after probe
deferral) as the tree-wide helper drops a reference to its first
argument (i.e. the parent node).
Fixes: e097dc624f78 ("NFC: nfcmrvl: add UART driver")
Fixes: d8e018c0b321 ("NFC: nfcmrvl: update device tree bindings for Marvell NFC")
Cc: stable <stable(a)vger.kernel.org> # 4.2
Cc: Vincent Cuissard <cuissard(a)marvell.com>
Cc: Samuel Ortiz <sameo(a)linux.intel.com>
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
drivers/nfc/nfcmrvl/uart.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/nfc/nfcmrvl/uart.c b/drivers/nfc/nfcmrvl/uart.c
index 91162f8e0366..9a22056e8d9e 100644
--- a/drivers/nfc/nfcmrvl/uart.c
+++ b/drivers/nfc/nfcmrvl/uart.c
@@ -73,10 +73,9 @@ static int nfcmrvl_uart_parse_dt(struct device_node *node,
struct device_node *matched_node;
int ret;
- matched_node = of_find_compatible_node(node, NULL, "marvell,nfc-uart");
+ matched_node = of_get_compatible_child(node, "marvell,nfc-uart");
if (!matched_node) {
- matched_node = of_find_compatible_node(node, NULL,
- "mrvl,nfc-uart");
+ matched_node = of_get_compatible_child(node, "mrvl,nfc-uart");
if (!matched_node)
return -ENODEV;
}
--
2.18.0
Use the new of_get_compatible_child() helper to lookup the mdio child
node instead of using of_find_compatible_node(), which searches the
entire tree and thus can return an unrelated (i.e. non-child) node.
This also addresses a potential use-after-free (e.g. after probe
deferral) as the tree-wide helper drops a reference to its first
argument (i.e. the node of the device being probed).
Fixes: aa09677cba42 ("net: bcmgenet: add MDIO routines")
Cc: stable <stable(a)vger.kernel.org> # 3.15
Cc: Florian Fainelli <f.fainelli(a)gmail.com>
Cc: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
drivers/net/ethernet/broadcom/genet/bcmmii.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index 5333274a283c..87fc65560ceb 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -333,7 +333,7 @@ static struct device_node *bcmgenet_mii_of_find_mdio(struct bcmgenet_priv *priv)
if (!compat)
return NULL;
- priv->mdio_dn = of_find_compatible_node(dn, NULL, compat);
+ priv->mdio_dn = of_get_compatible_child(dn, compat);
kfree(compat);
if (!priv->mdio_dn) {
dev_err(kdev, "unable to find MDIO bus node\n");
--
2.18.0