Extend kvm_s390_shadow_fault to return the pointer to the valid leaf
DAT table entry, or to the invalid entry.
Also return some flags in the lower bits of the address:
PEI_DAT_PROT: indicates that DAT protection applies because of the
protection bit in the segment (or, if EDAT, region) tables.
PEI_NOT_PTE: indicates that the address of the DAT table entry returned
does not refer to a PTE, but to a segment or region table.
Signed-off-by: Claudio Imbrenda <imbrenda(a)linux.ibm.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Janosch Frank <frankja(a)de.ibm.com>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
---
arch/s390/kvm/gaccess.c | 30 +++++++++++++++++++++++++-----
arch/s390/kvm/gaccess.h | 6 +++++-
arch/s390/kvm/vsie.c | 8 ++++----
3 files changed, 34 insertions(+), 10 deletions(-)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 6d6b57059493..33a03e518640 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -976,7 +976,9 @@ int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra)
* kvm_s390_shadow_tables - walk the guest page table and create shadow tables
* @sg: pointer to the shadow guest address space structure
* @saddr: faulting address in the shadow gmap
- * @pgt: pointer to the page table address result
+ * @pgt: pointer to the beginning of the page table for the given address if
+ * successful (return value 0), or to the first invalid DAT entry in
+ * case of exceptions (return value > 0)
* @fake: pgt references contiguous guest memory block, not a pgtable
*/
static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
@@ -1034,6 +1036,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
rfte.val = ptr;
goto shadow_r2t;
}
+ *pgt = ptr + vaddr.rfx * 8;
rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val);
if (rc)
return rc;
@@ -1060,6 +1063,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
rste.val = ptr;
goto shadow_r3t;
}
+ *pgt = ptr + vaddr.rsx * 8;
rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val);
if (rc)
return rc;
@@ -1087,6 +1091,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
rtte.val = ptr;
goto shadow_sgt;
}
+ *pgt = ptr + vaddr.rtx * 8;
rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val);
if (rc)
return rc;
@@ -1123,6 +1128,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
ste.val = ptr;
goto shadow_pgt;
}
+ *pgt = ptr + vaddr.sx * 8;
rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val);
if (rc)
return rc;
@@ -1157,6 +1163,8 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
* @vcpu: virtual cpu
* @sg: pointer to the shadow guest address space structure
* @saddr: faulting address in the shadow gmap
+ * @datptr: will contain the address of the faulting DAT table entry, or of
+ * the valid leaf, plus some flags
*
* Returns: - 0 if the shadow fault was successfully resolved
* - > 0 (pgm exception code) on exceptions while faulting
@@ -1165,11 +1173,11 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
* - -ENOMEM if out of memory
*/
int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
- unsigned long saddr)
+ unsigned long saddr, unsigned long *datptr)
{
union vaddress vaddr;
union page_table_entry pte;
- unsigned long pgt;
+ unsigned long pgt = 0;
int dat_protection, fake;
int rc;
@@ -1191,8 +1199,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
pte.val = pgt + vaddr.px * PAGE_SIZE;
goto shadow_page;
}
- if (!rc)
- rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val);
+
+ switch (rc) {
+ case PGM_SEGMENT_TRANSLATION:
+ case PGM_REGION_THIRD_TRANS:
+ case PGM_REGION_SECOND_TRANS:
+ case PGM_REGION_FIRST_TRANS:
+ pgt |= PEI_NOT_PTE;
+ break;
+ case 0:
+ pgt += vaddr.px * 8;
+ rc = gmap_read_table(sg->parent, pgt, &pte.val);
+ }
+ if (*datptr)
+ *datptr = pgt | dat_protection * PEI_DAT_PROT;
if (!rc && pte.i)
rc = PGM_PAGE_TRANSLATION;
if (!rc && pte.z)
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 107fdfd2eadd..df4014f09e26 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -378,7 +378,11 @@ void ipte_unlock(struct kvm_vcpu *vcpu);
int ipte_lock_held(struct kvm_vcpu *vcpu);
int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);
+/* MVPG PEI indication bits */
+#define PEI_DAT_PROT 2
+#define PEI_NOT_PTE 4
+
int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *shadow,
- unsigned long saddr);
+ unsigned long saddr, unsigned long *datptr);
#endif /* __KVM_S390_GACCESS_H */
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index bd803e091918..78b604326016 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -620,10 +620,10 @@ static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
/* with mso/msl, the prefix lies at offset *mso* */
prefix += scb_s->mso;
- rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
+ rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix, NULL);
if (!rc && (scb_s->ecb & ECB_TE))
rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
- prefix + PAGE_SIZE);
+ prefix + PAGE_SIZE, NULL);
/*
* We don't have to mprotect, we will be called for all unshadows.
* SIE will detect if protection applies and trigger a validity.
@@ -914,7 +914,7 @@ static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
current->thread.gmap_addr, 1);
rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
- current->thread.gmap_addr);
+ current->thread.gmap_addr, NULL);
if (rc > 0) {
rc = inject_fault(vcpu, rc,
current->thread.gmap_addr,
@@ -936,7 +936,7 @@ static void handle_last_fault(struct kvm_vcpu *vcpu,
{
if (vsie_page->fault_addr)
kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
- vsie_page->fault_addr);
+ vsie_page->fault_addr, NULL);
vsie_page->fault_addr = 0;
}
--
2.26.2
Creating a series of detached mounts, attaching them to the filesystem,
and unmounting them can be used to trigger an integer overflow in
ns->mounts causing the kernel to block any new mounts in count_mounts()
and returning ENOSPC because it falsely assumes that the maximum number
of mounts in the mount namespace has been reached, i.e. it thinks it
can't fit the new mounts into the mount namespace anymore.
Depending on the number of mounts in your system, this can be reproduced
on any kernel that supportes open_tree() and move_mount() with the
following instructions:
1. Compile the following program "repro.c" via "make repro"
> cat repro.c
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <getopt.h>
#include <limits.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <linux/mount.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
/* open_tree() */
#ifndef OPEN_TREE_CLONE
#define OPEN_TREE_CLONE 1
#endif
#ifndef OPEN_TREE_CLOEXEC
#define OPEN_TREE_CLOEXEC O_CLOEXEC
#endif
#ifndef __NR_open_tree
#if defined __alpha__
#define __NR_open_tree 538
#elif defined _MIPS_SIM
#if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
#define __NR_open_tree 4428
#endif
#if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
#define __NR_open_tree 6428
#endif
#if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
#define __NR_open_tree 5428
#endif
#elif defined __ia64__
#define __NR_open_tree (428 + 1024)
#else
#define __NR_open_tree 428
#endif
#endif
/* move_mount() */
#ifndef MOVE_MOUNT_F_EMPTY_PATH
#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
#endif
#ifndef __NR_move_mount
#if defined __alpha__
#define __NR_move_mount 539
#elif defined _MIPS_SIM
#if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
#define __NR_move_mount 4429
#endif
#if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
#define __NR_move_mount 6429
#endif
#if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
#define __NR_move_mount 5429
#endif
#elif defined __ia64__
#define __NR_move_mount (428 + 1024)
#else
#define __NR_move_mount 429
#endif
#endif
static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
{
return syscall(__NR_open_tree, dfd, filename, flags);
}
static inline int sys_move_mount(int from_dfd, const char *from_pathname, int to_dfd,
const char *to_pathname, unsigned int flags)
{
return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, to_pathname, flags);
}
static void usage(void)
{
const char *text = "mount-new [--recursive] <source> <target>\n";
fprintf(stderr, "%s", text);
_exit(EXIT_SUCCESS);
}
#define exit_usage(format, ...) \
({ \
fprintf(stderr, format, ##__VA_ARGS__); \
usage(); \
})
#define exit_log(format, ...) \
({ \
fprintf(stderr, format, ##__VA_ARGS__); \
exit(EXIT_FAILURE); \
})
static const struct option longopts[] = {
{"recursive", no_argument, 0, 'a'},
{"help", no_argument, 0, 'b'},
{ NULL, no_argument, 0, 0 },
};
int main(int argc, char *argv[])
{
int index = 0;
bool recursive = false;
int fd_tree, new_argc, ret;
char *source, *target;
char *const *new_argv;
while ((ret = getopt_long_only(argc, argv, "", longopts, &index)) != -1) {
switch (ret) {
case 'a':
recursive = true;
break;
case 'b':
/* fallthrough */
default:
usage();
}
}
new_argv = &argv[optind];
new_argc = argc - optind;
if (new_argc < 2)
exit_usage("Missing source or target mountpoint\n\n");
source = new_argv[0];
target = new_argv[1];
fd_tree = sys_open_tree(-EBADF, source,
OPEN_TREE_CLONE |
OPEN_TREE_CLOEXEC |
AT_EMPTY_PATH |
(recursive ? AT_RECURSIVE : 0));
if (fd_tree < 0) {
exit_log("%m - Failed to open %s\n", source);
exit(EXIT_FAILURE);
}
ret = sys_move_mount(fd_tree, "", -EBADF, target, MOVE_MOUNT_F_EMPTY_PATH);
if (ret < 0)
exit_log("%m - Failed to attach mount to %s\n", target);
close(fd_tree);
exit(EXIT_SUCCESS);
}
2. Run a loop like:
while true; do sudo ./repro; done
and wait for the kernel to refuse any new mounts by returning ENOSPC.
Depending on the number of mounts you have on the system this might take
shorter or longer.
The root cause of this is that detached mounts aren't handled correctly
when the destination mount point is MS_SHARED causing a borked mount
tree and ultimately to a miscalculation of the number of mounts in the
mount namespace.
Detached mounts created via
open_tree(fd, path, OPEN_TREE_CLONE)
are essentially like an unattached new mount, or an unattached
bind-mount. They can then later on be attached to the filesystem via
move_mount() which calles into attach_recursive_mount(). Part of
attaching it to the filesystem is making sure that mounts get correctly
propagated in case the destination mountpoint is MS_SHARED, i.e. is a
shared mountpoint. This is done by calling into propagate_mnt() which
walks the list of peers calling propagate_one() on each mount in this
list making sure it receives the propagation event.
For new mounts and bind-mounts propagate_one() knows to skip them by
realizing that there's no mount namespace attached to them.
However, detached mounts have an anonymous mount namespace attached to
them and so they aren't skipped causing the mount table to get wonky and
ultimately preventing any new mounts via the ENOSPC issue.
So teach propagation to handle detached mounts by making it aware of
them. I've been tracking this issue down for the last couple of days by
unmounting everything in my current mount table leaving only /proc and
/sys mounted and running the reproducer above verifying the counting of
the mounts. With this fix the counts are correct and the ENOSPC issue
can't be reproduced.
Fixes: 2db154b3ea8e ("vfs: syscall: Add move_mount(2) to move mounts around")
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: David Howells <dhowells(a)redhat.com>
Cc: Al Viro <viro(a)zeniv.linux.org.uk>
Cc: linux-fsdevel(a)vger.kernel.org
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Christian Brauner <christian.brauner(a)ubuntu.com>
---
fs/pnode.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/pnode.h b/fs/pnode.h
index 26f74e092bd9..988f1aa9b02a 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -12,7 +12,7 @@
#define IS_MNT_SHARED(m) ((m)->mnt.mnt_flags & MNT_SHARED)
#define IS_MNT_SLAVE(m) ((m)->mnt_master)
-#define IS_MNT_NEW(m) (!(m)->mnt_ns)
+#define IS_MNT_NEW(m) (!(m)->mnt_ns || is_anon_ns((m)->mnt_ns))
#define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED)
#define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE)
#define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED)
base-commit: f69d02e37a85645aa90d18cacfff36dba370f797
--
2.27.0
On Thu, Mar 4, 2021 at 12:23 AM Andrew Morton <akpm(a)linux-foundation.org> wrote:
>
> On Fri, 26 Feb 2021 02:25:37 +0100 Andrey Konovalov <andreyknvl(a)google.com> wrote:
>
> > Currently, kasan_free_nondeferred_pages()->kasan_free_pages() is called
> > after debug_pagealloc_unmap_pages(). This causes a crash when
> > debug_pagealloc is enabled, as HW_TAGS KASAN can't set tags on an
> > unmapped page.
> >
> > This patch puts kasan_free_nondeferred_pages() before
> > debug_pagealloc_unmap_pages().
> >
> > Besides fixing the crash, this also makes the annotation order consistent
> > with debug_pagealloc_map_pages() preceding kasan_alloc_pages().
> >
>
> This bug exists in 5.12, does it not?
>
> If so, is cc:stable appropriate and if so, do we have a suitable Fixes:
> commit?
Sure:
Fixes: 94ab5b61ee16 ("kasan, arm64: enable CONFIG_KASAN_HW_TAGS")
Cc: <stable(a)vger.kernel.org>
EVM_ALLOW_METADATA_WRITES is an EVM initialization flag that can be set to
temporarily disable metadata verification until all xattrs/attrs necessary
to verify an EVM portable signature are copied to the file. This flag is
cleared when EVM is initialized with an HMAC key, to avoid that the HMAC is
calculated on unverified xattrs/attrs.
Currently EVM unnecessarily denies setting this flag if EVM is initialized
with a public key, which is not a concern as it cannot be used to trust
xattrs/attrs updates. This patch removes this limitation.
Cc: stable(a)vger.kernel.org # 4.16.x
Fixes: ae1ba1676b88e ("EVM: Allow userland to permit modification of EVM-protected metadata")
Signed-off-by: Roberto Sassu <roberto.sassu(a)huawei.com>
---
Documentation/ABI/testing/evm | 5 +++--
security/integrity/evm/evm_secfs.c | 4 ++--
2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/Documentation/ABI/testing/evm b/Documentation/ABI/testing/evm
index 3c477ba48a31..eb6d70fd6fa2 100644
--- a/Documentation/ABI/testing/evm
+++ b/Documentation/ABI/testing/evm
@@ -49,8 +49,9 @@ Description:
modification of EVM-protected metadata and
disable all further modification of policy
- Note that once a key has been loaded, it will no longer be
- possible to enable metadata modification.
+ Note that once an HMAC key has been loaded, it will no longer
+ be possible to enable metadata modification and, if it is
+ already enabled, it will be disabled.
Until key loading has been signaled EVM can not create
or validate the 'security.evm' xattr, but returns
diff --git a/security/integrity/evm/evm_secfs.c b/security/integrity/evm/evm_secfs.c
index bbc85637e18b..197a4b83e534 100644
--- a/security/integrity/evm/evm_secfs.c
+++ b/security/integrity/evm/evm_secfs.c
@@ -81,10 +81,10 @@ static ssize_t evm_write_key(struct file *file, const char __user *buf,
return -EINVAL;
/* Don't allow a request to freshly enable metadata writes if
- * keys are loaded.
+ * an HMAC key is loaded.
*/
if ((i & EVM_ALLOW_METADATA_WRITES) &&
- ((evm_initialized & EVM_KEY_MASK) != 0) &&
+ ((evm_initialized & EVM_INIT_HMAC) != 0) &&
!(evm_initialized & EVM_ALLOW_METADATA_WRITES))
return -EPERM;
--
2.26.2
evm_inode_init_security() requires an HMAC key to calculate the HMAC on
initial xattrs provided by LSMs. However, it checks generically whether a
key has been loaded, including also public keys, which is not correct as
public keys are not suitable to calculate the HMAC.
Originally, support for signature verification was introduced to verify a
possibly immutable initial ram disk, when no new files are created, and to
switch to HMAC for the root filesystem. By that time, an HMAC key should
have been loaded and usable to calculate HMACs for new files.
More recently support for requiring an HMAC key was removed from the
kernel, so that signature verification can be used alone. Since this is a
legitimate use case, evm_inode_init_security() should not return an error
when no HMAC key has been loaded.
This patch fixes this problem by replacing the evm_key_loaded() check with
a check of the EVM_INIT_HMAC flag in evm_initialized.
Cc: stable(a)vger.kernel.org # 4.5.x
Fixes: 26ddabfe96b ("evm: enable EVM when X509 certificate is loaded")
Signed-off-by: Roberto Sassu <roberto.sassu(a)huawei.com>
Reviewed-by: Mimi Zohar <zohar(a)linux.ibm.com>
---
security/integrity/evm/evm_main.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index 0de367aaa2d3..7ac5204c8d1f 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -521,7 +521,7 @@ void evm_inode_post_setattr(struct dentry *dentry, int ia_valid)
}
/*
- * evm_inode_init_security - initializes security.evm
+ * evm_inode_init_security - initializes security.evm HMAC value
*/
int evm_inode_init_security(struct inode *inode,
const struct xattr *lsm_xattr,
@@ -530,7 +530,8 @@ int evm_inode_init_security(struct inode *inode,
struct evm_xattr *xattr_data;
int rc;
- if (!evm_key_loaded() || !evm_protected_xattr(lsm_xattr->name))
+ if (!(evm_initialized & EVM_INIT_HMAC) ||
+ !evm_protected_xattr(lsm_xattr->name))
return 0;
xattr_data = kzalloc(sizeof(*xattr_data), GFP_NOFS);
--
2.26.2
From: Borislav Petkov <bp(a)suse.de>
[ Upstream commit 6c13d7ff81e6d2f01f62ccbfa49d1b8d87f274d0 ]
Those were only laptops and are very very unlikely to have ECC memory.
Currently, when the driver attempts to load, it issues:
EDAC amd64: Error: F1 not found: device 0x1601 (broken BIOS?)
because the PCI device is the wrong one (it uses the F15h default one).
So do not load the driver on them as that is pointless.
Reported-by: Don Curtis <bugrprt21882(a)online.de>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Tested-by: Don Curtis <bugrprt21882(a)online.de>
Link: http://bugzilla.opensuse.org/show_bug.cgi?id=1179763
Link: https://lkml.kernel.org/r/20201218160622.20146-1-bp@alien8.de
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/edac/amd64_edac.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 620f7041db6b5..b36d5879b91e0 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -3350,10 +3350,13 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
fam_type = &family_types[F15_M60H_CPUS];
pvt->ops = &family_types[F15_M60H_CPUS].ops;
break;
+ /* Richland is only client */
+ } else if (pvt->model == 0x13) {
+ return NULL;
+ } else {
+ fam_type = &family_types[F15_CPUS];
+ pvt->ops = &family_types[F15_CPUS].ops;
}
-
- fam_type = &family_types[F15_CPUS];
- pvt->ops = &family_types[F15_CPUS].ops;
break;
case 0x16:
@@ -3547,6 +3550,7 @@ static int probe_one_instance(unsigned int nid)
pvt->mc_node_id = nid;
pvt->F3 = F3;
+ ret = -ENODEV;
fam_type = per_family_init(pvt);
if (!fam_type)
goto err_enable;
--
2.27.0
This is an automatic generated email to let you know that the following patch were queued:
Subject: media: dvb-usb: Fix use-after-free access
Author: Takashi Iwai <tiwai(a)suse.de>
Date: Mon Feb 1 09:32:47 2021 +0100
dvb_usb_device_init() copies the properties to the own data, so that
the callers can release the original properties later (as done in the
commit 299c7007e936 ("media: dw2102: Fix memleak on sequence of
probes")). However, it also stores dev->desc pointer that is a
reference to the original properties data. Since dev->desc is
referred later, it may result in use-after-free, in the worst case,
leading to a kernel Oops as reported.
This patch addresses the problem by allocating and copying the
properties at first, then get the desc from the copied properties.
Reported-and-tested-by: Stefan Seyfried <seife+kernel(a)b1-systems.com>
BugLink: http://bugzilla.opensuse.org/show_bug.cgi?id=1181104
Reviewed-by: Robert Foss <robert.foss(a)linaro.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
Signed-off-by: Sean Young <sean(a)mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei(a)kernel.org>
drivers/media/usb/dvb-usb/dvb-usb-init.c | 23 +++++++++++++----------
1 file changed, 13 insertions(+), 10 deletions(-)
---
diff --git a/drivers/media/usb/dvb-usb/dvb-usb-init.c b/drivers/media/usb/dvb-usb/dvb-usb-init.c
index c78158d12540..6c9e3290af56 100644
--- a/drivers/media/usb/dvb-usb/dvb-usb-init.c
+++ b/drivers/media/usb/dvb-usb/dvb-usb-init.c
@@ -264,27 +264,30 @@ int dvb_usb_device_init(struct usb_interface *intf,
if (du != NULL)
*du = NULL;
- if ((desc = dvb_usb_find_device(udev, props, &cold)) == NULL) {
+ d = kzalloc(sizeof(*d), GFP_KERNEL);
+ if (!d) {
+ err("no memory for 'struct dvb_usb_device'");
+ return -ENOMEM;
+ }
+
+ memcpy(&d->props, props, sizeof(struct dvb_usb_device_properties));
+
+ desc = dvb_usb_find_device(udev, &d->props, &cold);
+ if (!desc) {
deb_err("something went very wrong, device was not found in current device list - let's see what comes next.\n");
- return -ENODEV;
+ ret = -ENODEV;
+ goto error;
}
if (cold) {
info("found a '%s' in cold state, will try to load a firmware", desc->name);
ret = dvb_usb_download_firmware(udev, props);
if (!props->no_reconnect || ret != 0)
- return ret;
+ goto error;
}
info("found a '%s' in warm state.", desc->name);
- d = kzalloc(sizeof(struct dvb_usb_device), GFP_KERNEL);
- if (d == NULL) {
- err("no memory for 'struct dvb_usb_device'");
- return -ENOMEM;
- }
-
d->udev = udev;
- memcpy(&d->props, props, sizeof(struct dvb_usb_device_properties));
d->desc = desc;
d->owner = owner;
This is an automatic generated email to let you know that the following patch were queued:
Subject: media: dvb-usb: Fix memory leak at error in dvb_usb_device_init()
Author: Takashi Iwai <tiwai(a)suse.de>
Date: Mon Feb 1 09:32:46 2021 +0100
dvb_usb_device_init() allocates a dvb_usb_device object, but it
doesn't release the object by itself even at errors. The object is
released in the callee side (dvb_usb_init()) in some error cases via
dvb_usb_exit() call, but it also missed the object free in other error
paths. And, the caller (it's only dvb_usb_device_init()) doesn't seem
caring the resource management as well, hence those memories are
leaked.
This patch assures releasing the memory at the error path in
dvb_usb_device_init(). Now dvb_usb_init() frees the resources it
allocated but leaves the passed dvb_usb_device object intact. In
turn, the dvb_usb_device object is released in dvb_usb_device_init()
instead.
We could use dvb_usb_exit() function for releasing everything in the
callee (as it was used for some error cases in the original code), but
releasing the passed object in the callee is non-intuitive and
error-prone. So I took this approach (which is more standard in Linus
kernel code) although it ended with a bit more open codes.
Along with the change, the patch makes sure that USB intfdata is reset
and don't return the bogus pointer to the caller of
dvb_usb_device_init() at the error path, too.
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
Signed-off-by: Sean Young <sean(a)mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei(a)kernel.org>
drivers/media/usb/dvb-usb/dvb-usb-init.c | 47 +++++++++++++++++++++-----------
1 file changed, 31 insertions(+), 16 deletions(-)
---
diff --git a/drivers/media/usb/dvb-usb/dvb-usb-init.c b/drivers/media/usb/dvb-usb/dvb-usb-init.c
index c1a7634e27b4..c78158d12540 100644
--- a/drivers/media/usb/dvb-usb/dvb-usb-init.c
+++ b/drivers/media/usb/dvb-usb/dvb-usb-init.c
@@ -158,22 +158,20 @@ static int dvb_usb_init(struct dvb_usb_device *d, short *adapter_nums)
if (d->props.priv_init != NULL) {
ret = d->props.priv_init(d);
- if (ret != 0) {
- kfree(d->priv);
- d->priv = NULL;
- return ret;
- }
+ if (ret != 0)
+ goto err_priv_init;
}
}
/* check the capabilities and set appropriate variables */
dvb_usb_device_power_ctrl(d, 1);
- if ((ret = dvb_usb_i2c_init(d)) ||
- (ret = dvb_usb_adapter_init(d, adapter_nums))) {
- dvb_usb_exit(d);
- return ret;
- }
+ ret = dvb_usb_i2c_init(d);
+ if (ret)
+ goto err_i2c_init;
+ ret = dvb_usb_adapter_init(d, adapter_nums);
+ if (ret)
+ goto err_adapter_init;
if ((ret = dvb_usb_remote_init(d)))
err("could not initialize remote control.");
@@ -181,6 +179,17 @@ static int dvb_usb_init(struct dvb_usb_device *d, short *adapter_nums)
dvb_usb_device_power_ctrl(d, 0);
return 0;
+
+err_adapter_init:
+ dvb_usb_adapter_exit(d);
+err_i2c_init:
+ dvb_usb_i2c_exit(d);
+ if (d->priv && d->props.priv_destroy)
+ d->props.priv_destroy(d);
+err_priv_init:
+ kfree(d->priv);
+ d->priv = NULL;
+ return ret;
}
/* determine the name and the state of the just found USB device */
@@ -281,15 +290,21 @@ int dvb_usb_device_init(struct usb_interface *intf,
usb_set_intfdata(intf, d);
- if (du != NULL)
+ ret = dvb_usb_init(d, adapter_nums);
+ if (ret) {
+ info("%s error while loading driver (%d)", desc->name, ret);
+ goto error;
+ }
+
+ if (du)
*du = d;
- ret = dvb_usb_init(d, adapter_nums);
+ info("%s successfully initialized and connected.", desc->name);
+ return 0;
- if (ret == 0)
- info("%s successfully initialized and connected.", desc->name);
- else
- info("%s error while loading driver (%d)", desc->name, ret);
+ error:
+ usb_set_intfdata(intf, NULL);
+ kfree(d);
return ret;
}
EXPORT_SYMBOL(dvb_usb_device_init);
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From fb18802a338b36f675a388fc03d2aa504a0d0899 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus(a)linux.intel.com>
Date: Sat, 19 Dec 2020 23:29:58 +0100
Subject: [PATCH] media: v4l: ioctl: Fix memory leak in video_usercopy
When an IOCTL with argument size larger than 128 that also used array
arguments were handled, two memory allocations were made but alas, only
the latter one of them was released. This happened because there was only
a single local variable to hold such a temporary allocation.
Fix this by adding separate variables to hold the pointers to the
temporary allocations.
Reported-by: Arnd Bergmann <arnd(a)kernel.org>
Reported-by: syzbot+1115e79c8df6472c612b(a)syzkaller.appspotmail.com
Fixes: d14e6d76ebf7 ("[media] v4l: Add multi-planar ioctl handling code")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sakari Ailus <sakari.ailus(a)linux.intel.com>
Acked-by: Arnd Bergmann <arnd(a)arndb.de>
Acked-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
Reviewed-by: Laurent Pinchart <laurent.pinchart(a)ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei(a)kernel.org>
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index 3198abdd538c..9906b41004e9 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -3283,7 +3283,7 @@ video_usercopy(struct file *file, unsigned int orig_cmd, unsigned long arg,
v4l2_kioctl func)
{
char sbuf[128];
- void *mbuf = NULL;
+ void *mbuf = NULL, *array_buf = NULL;
void *parg = (void *)arg;
long err = -EINVAL;
bool has_array_args;
@@ -3318,27 +3318,21 @@ video_usercopy(struct file *file, unsigned int orig_cmd, unsigned long arg,
has_array_args = err;
if (has_array_args) {
- /*
- * When adding new types of array args, make sure that the
- * parent argument to ioctl (which contains the pointer to the
- * array) fits into sbuf (so that mbuf will still remain
- * unused up to here).
- */
- mbuf = kvmalloc(array_size, GFP_KERNEL);
+ array_buf = kvmalloc(array_size, GFP_KERNEL);
err = -ENOMEM;
- if (NULL == mbuf)
+ if (array_buf == NULL)
goto out_array_args;
err = -EFAULT;
if (in_compat_syscall())
- err = v4l2_compat_get_array_args(file, mbuf, user_ptr,
- array_size, orig_cmd,
- parg);
+ err = v4l2_compat_get_array_args(file, array_buf,
+ user_ptr, array_size,
+ orig_cmd, parg);
else
- err = copy_from_user(mbuf, user_ptr, array_size) ?
+ err = copy_from_user(array_buf, user_ptr, array_size) ?
-EFAULT : 0;
if (err)
goto out_array_args;
- *kernel_ptr = mbuf;
+ *kernel_ptr = array_buf;
}
/* Handles IOCTL */
@@ -3360,12 +3354,13 @@ video_usercopy(struct file *file, unsigned int orig_cmd, unsigned long arg,
if (in_compat_syscall()) {
int put_err;
- put_err = v4l2_compat_put_array_args(file, user_ptr, mbuf,
- array_size, orig_cmd,
- parg);
+ put_err = v4l2_compat_put_array_args(file, user_ptr,
+ array_buf,
+ array_size,
+ orig_cmd, parg);
if (put_err)
err = put_err;
- } else if (copy_to_user(user_ptr, mbuf, array_size)) {
+ } else if (copy_to_user(user_ptr, array_buf, array_size)) {
err = -EFAULT;
}
goto out_array_args;
@@ -3381,6 +3376,7 @@ video_usercopy(struct file *file, unsigned int orig_cmd, unsigned long arg,
if (video_put_user((void __user *)arg, parg, cmd, orig_cmd))
err = -EFAULT;
out:
+ kvfree(array_buf);
kvfree(mbuf);
return err;
}
Hi
2347961b11d4 ("binfmt_misc: pass binfmt_misc flags to the
interpreter") was applied in mainline and included in 5.12-rc1.
Probably you could argue here on both a bugfix or feature addition.
My intention is the following: In the Debian bugreport
https://bugs.debian.org/970460 an issue was raised with qemu-user
which needs to know if it has to preserve the argv[0]. As shown there
it is an issue with multi-call binaries.
So again, not sure if you want to consider it, but defintively
Yunqiang Su and others would appreicate. If it gets backported we will
pick it up automatically.
Regards,
Salvatore
From: Eric Biggers <ebiggers(a)google.com>
f2fs didn't properly clean up if verity failed to be enabled on a file:
- It left verity metadata (pages past EOF) in the page cache, which
would be exposed to userspace if the file was later extended.
- It didn't truncate the verity metadata at all (either from cache or
from disk) if an error occurred while setting the verity bit.
Fix these bugs by adding a call to truncate_inode_pages() and ensuring
that we truncate the verity metadata (both from cache and from disk) in
all error paths. Also rework the code to cleanly separate the success
path from the error paths, which makes it much easier to understand.
Finally, log a message if f2fs_truncate() fails, since it might
otherwise fail silently.
Reported-by: Yunlei He <heyunlei(a)hihonor.com>
Fixes: 95ae251fe828 ("f2fs: add fs-verity support")
Cc: <stable(a)vger.kernel.org> # v5.4+
Signed-off-by: Eric Biggers <ebiggers(a)google.com>
---
v2: take i_gc_rwsem, and log a message if f2fs_truncate() fails.
fs/f2fs/verity.c | 75 ++++++++++++++++++++++++++++++++++--------------
1 file changed, 54 insertions(+), 21 deletions(-)
diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c
index 054ec852b5ea4..15ba36926fad7 100644
--- a/fs/f2fs/verity.c
+++ b/fs/f2fs/verity.c
@@ -152,40 +152,73 @@ static int f2fs_end_enable_verity(struct file *filp, const void *desc,
size_t desc_size, u64 merkle_tree_size)
{
struct inode *inode = file_inode(filp);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
u64 desc_pos = f2fs_verity_metadata_pos(inode) + merkle_tree_size;
struct fsverity_descriptor_location dloc = {
.version = cpu_to_le32(F2FS_VERIFY_VER),
.size = cpu_to_le32(desc_size),
.pos = cpu_to_le64(desc_pos),
};
- int err = 0;
+ int err = 0, err2 = 0;
- if (desc != NULL) {
- /* Succeeded; write the verity descriptor. */
- err = pagecache_write(inode, desc, desc_size, desc_pos);
+ /*
+ * If an error already occurred (which fs/verity/ signals by passing
+ * desc == NULL), then only clean-up is needed.
+ */
+ if (desc == NULL)
+ goto cleanup;
- /* Write all pages before clearing FI_VERITY_IN_PROGRESS. */
- if (!err)
- err = filemap_write_and_wait(inode->i_mapping);
- }
+ /* Append the verity descriptor. */
+ err = pagecache_write(inode, desc, desc_size, desc_pos);
+ if (err)
+ goto cleanup;
+
+ /*
+ * Write all pages (both data and verity metadata). Note that this must
+ * happen before clearing FI_VERITY_IN_PROGRESS; otherwise pages beyond
+ * i_size won't be written properly. For crash consistency, this also
+ * must happen before the verity inode flag gets persisted.
+ */
+ err = filemap_write_and_wait(inode->i_mapping);
+ if (err)
+ goto cleanup;
+
+ /* Set the verity xattr. */
+ err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_VERITY,
+ F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc),
+ NULL, XATTR_CREATE);
+ if (err)
+ goto cleanup;
- /* If we failed, truncate anything we wrote past i_size. */
- if (desc == NULL || err)
- f2fs_truncate(inode);
+ /* Finally, set the verity inode flag. */
+ file_set_verity(inode);
+ f2fs_set_inode_flags(inode);
+ f2fs_mark_inode_dirty_sync(inode, true);
clear_inode_flag(inode, FI_VERITY_IN_PROGRESS);
+ return 0;
- if (desc != NULL && !err) {
- err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_VERITY,
- F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc),
- NULL, XATTR_CREATE);
- if (!err) {
- file_set_verity(inode);
- f2fs_set_inode_flags(inode);
- f2fs_mark_inode_dirty_sync(inode, true);
- }
+cleanup:
+ /*
+ * Verity failed to be enabled, so clean up by truncating any verity
+ * metadata that was written beyond i_size (both from cache and from
+ * disk) and clearing FI_VERITY_IN_PROGRESS.
+ *
+ * Taking i_gc_rwsem[WRITE] is needed to stop f2fs garbage collection
+ * from re-instantiating cached pages we are truncating (since unlike
+ * normal file accesses, garbage collection isn't limited by i_size).
+ */
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ truncate_inode_pages(inode->i_mapping, inode->i_size);
+ err2 = f2fs_truncate(inode);
+ if (err2) {
+ f2fs_err(sbi, "Truncating verity metadata failed (errno=%d)",
+ err2);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
}
- return err;
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ clear_inode_flag(inode, FI_VERITY_IN_PROGRESS);
+ return err ?: err2;
}
static int f2fs_get_verity_descriptor(struct inode *inode, void *buf,
--
2.30.1
The following patches fix the swapfile page-to-sector mapping for block
devices that implement rw_page for all the stable kernels.
This is related to the upstream fix of commit caf6912f3f4a ("swap: fix
swapfile read/write offset"), but for kernels prior to v5.12-rc1 the bug
only affects swapfiles that sit on top of block devices which provide a
rw_page operation.
Hi Greg,
On 04-03-21, 16:17, gregkh(a)linuxfoundation.org wrote:
>
> This is a note to let you know that I've just added the patch titled
>
> soundwire: debugfs: use controller id instead of link_id
>
> to the 5.11-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> soundwire-debugfs-use-controller-id-instead-of-link_id.patch
> and it can be found in the queue-5.11 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
Please drop this patch from stable, it was not tagged to stable and it
is reverted
Thanks
>
>
> >From 6d5e7af1f6f5924de5dd1ebe97675c2363100878 Mon Sep 17 00:00:00 2001
> From: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org>
> Date: Fri, 15 Jan 2021 16:25:59 +0000
> Subject: soundwire: debugfs: use controller id instead of link_id
>
> From: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org>
>
> commit 6d5e7af1f6f5924de5dd1ebe97675c2363100878 upstream.
>
> link_id can be zero and if we have multiple controller instances
> in a system like Qualcomm debugfs will end-up with duplicate namespace
> resulting in incorrect debugfs entries.
>
> Using id should give a unique debugfs directory entry and should fix below
> warning too.
> "debugfs: Directory 'master-0' with parent 'soundwire' already present!"
>
> Fixes: bf03473d5bcc ("soundwire: add debugfs support")
> Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org>
> Link: https://lore.kernel.org/r/20210115162559.20869-1-srinivas.kandagatla@linaro…
> Signed-off-by: Vinod Koul <vkoul(a)kernel.org>
> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
> ---
> drivers/soundwire/debugfs.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> --- a/drivers/soundwire/debugfs.c
> +++ b/drivers/soundwire/debugfs.c
> @@ -19,7 +19,7 @@ void sdw_bus_debugfs_init(struct sdw_bus
> return;
>
> /* create the debugfs master-N */
> - snprintf(name, sizeof(name), "master-%d", bus->link_id);
> + snprintf(name, sizeof(name), "master-%d", bus->id);
> bus->debugfs = debugfs_create_dir(name, sdw_debugfs_root);
> }
>
>
>
> Patches currently in stable-queue which might be from srinivas.kandagatla(a)linaro.org are
>
> queue-5.11/asoc-qcom-remove-useless-debug-print.patch
> queue-5.11/soundwire-debugfs-use-controller-id-instead-of-link_id.patch
--
~Vinod
From: Eric Biggers <ebiggers(a)google.com>
f2fs didn't properly clean up if verity failed to be enabled on a file:
- It left verity metadata (pages past EOF) in the page cache, which
would be exposed to userspace if the file was later extended.
- It didn't truncate the verity metadata at all (either from cache or
from disk) if an error occurred while setting the verity bit.
Fix these bugs by adding a call to truncate_inode_pages() and ensuring
that we truncate the verity metadata (both from cache and from disk) in
all error paths. Also rework the code to cleanly separate the success
path from the error paths, which makes it much easier to understand.
Reported-by: Yunlei He <heyunlei(a)hihonor.com>
Fixes: 95ae251fe828 ("f2fs: add fs-verity support")
Cc: <stable(a)vger.kernel.org> # v5.4+
Signed-off-by: Eric Biggers <ebiggers(a)google.com>
---
fs/f2fs/verity.c | 61 ++++++++++++++++++++++++++++++++----------------
1 file changed, 41 insertions(+), 20 deletions(-)
diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c
index 054ec852b5ea4..2db89967fde37 100644
--- a/fs/f2fs/verity.c
+++ b/fs/f2fs/verity.c
@@ -160,31 +160,52 @@ static int f2fs_end_enable_verity(struct file *filp, const void *desc,
};
int err = 0;
- if (desc != NULL) {
- /* Succeeded; write the verity descriptor. */
- err = pagecache_write(inode, desc, desc_size, desc_pos);
+ /*
+ * If an error already occurred (which fs/verity/ signals by passing
+ * desc == NULL), then only clean-up is needed.
+ */
+ if (desc == NULL)
+ goto cleanup;
- /* Write all pages before clearing FI_VERITY_IN_PROGRESS. */
- if (!err)
- err = filemap_write_and_wait(inode->i_mapping);
- }
+ /* Append the verity descriptor. */
+ err = pagecache_write(inode, desc, desc_size, desc_pos);
+ if (err)
+ goto cleanup;
- /* If we failed, truncate anything we wrote past i_size. */
- if (desc == NULL || err)
- f2fs_truncate(inode);
+ /*
+ * Write all pages (both data and verity metadata). Note that this must
+ * happen before clearing FI_VERITY_IN_PROGRESS; otherwise pages beyond
+ * i_size won't be written properly. For crash consistency, this also
+ * must happen before the verity inode flag gets persisted.
+ */
+ err = filemap_write_and_wait(inode->i_mapping);
+ if (err)
+ goto cleanup;
+
+ /* Set the verity xattr. */
+ err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_VERITY,
+ F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc),
+ NULL, XATTR_CREATE);
+ if (err)
+ goto cleanup;
+
+ /* Finally, set the verity inode flag. */
+ file_set_verity(inode);
+ f2fs_set_inode_flags(inode);
+ f2fs_mark_inode_dirty_sync(inode, true);
clear_inode_flag(inode, FI_VERITY_IN_PROGRESS);
+ return 0;
- if (desc != NULL && !err) {
- err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_VERITY,
- F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc),
- NULL, XATTR_CREATE);
- if (!err) {
- file_set_verity(inode);
- f2fs_set_inode_flags(inode);
- f2fs_mark_inode_dirty_sync(inode, true);
- }
- }
+cleanup:
+ /*
+ * Verity failed to be enabled, so clean up by truncating any verity
+ * metadata that was written beyond i_size (both from cache and from
+ * disk) and clearing FI_VERITY_IN_PROGRESS.
+ */
+ truncate_inode_pages(inode->i_mapping, inode->i_size);
+ f2fs_truncate(inode);
+ clear_inode_flag(inode, FI_VERITY_IN_PROGRESS);
return err;
}
--
2.30.1
The patch titled
Subject: mm, hwpoison: do not lock page again when me_huge_page() successfully recovers
has been added to the -mm tree. Its filename is
mm-hwpoison-do-not-lock-page-again-when-me_huge_page-successfully-recovers.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/mm-hwpoison-do-not-lock-page-agai…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/mm-hwpoison-do-not-lock-page-agai…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Naoya Horiguchi <naoya.horiguchi(a)nec.com>
Subject: mm, hwpoison: do not lock page again when me_huge_page() successfully recovers
Currently me_huge_page() temporary unlocks page to perform some actions
then locks it again later. My testcase (which calls hard-offline on some
tail page in a hugetlb, then accesses the address of the hugetlb range)
showed that page allocation code detects the page lock on buddy page and
printed out "BUG: Bad page state" message. PG_hwpoison does not prevent
it because PG_hwpoison flag is set on any subpage of the hugetlb page but
the 2nd page lock is on the head page.
This patch suggests to drop the 2nd page lock to fix the issue.
Link: https://lkml.kernel.org/r/20210304064437.962442-1-nao.horiguchi@gmail.com
Fixes: commit 78bb920344b8 ("mm: hwpoison: dissolve in-use hugepage in unrecoverable memory error")
Signed-off-by: Naoya Horiguchi <naoya.horiguchi(a)nec.com>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Tony Luck <tony.luck(a)intel.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar(a)linux.vnet.ibm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory-failure.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/mm/memory-failure.c~mm-hwpoison-do-not-lock-page-again-when-me_huge_page-successfully-recovers
+++ a/mm/memory-failure.c
@@ -834,7 +834,6 @@ static int me_huge_page(struct page *p,
page_ref_inc(p);
res = MF_RECOVERED;
}
- lock_page(hpage);
}
return res;
@@ -1290,7 +1289,8 @@ static int memory_failure_hugetlb(unsign
res = identify_page_state(pfn, p, page_flags);
out:
- unlock_page(head);
+ if (PageLocked(head))
+ unlock_page(head);
return res;
}
_
Patches currently in -mm which might be from naoya.horiguchi(a)nec.com are
mm-hwpoison-do-not-lock-page-again-when-me_huge_page-successfully-recovers.patch
The patch titled
Subject: mm/userfaultfd: fix memory corruption due to writeprotect
has been added to the -mm tree. Its filename is
mm-userfaultfd-fix-memory-corruption-due-to-writeprotect.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/mm-userfaultfd-fix-memory-corrupt…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/mm-userfaultfd-fix-memory-corrupt…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Nadav Amit <namit(a)vmware.com>
Subject: mm/userfaultfd: fix memory corruption due to writeprotect
Userfaultfd self-test fails occasionally, indicating a memory corruption.
Analyzing this problem indicates that there is a real bug since mmap_lock
is only taken for read in mwriteprotect_range() and defers flushes, and
since there is insufficient consideration of concurrent deferred TLB
flushes in wp_page_copy(). Although the PTE is flushed from the TLBs in
wp_page_copy(), this flush takes place after the copy has already been
performed, and therefore changes of the page are possible between the time
of the copy and the time in which the PTE is flushed.
To make matters worse, memory-unprotection using userfaultfd also poses a
problem. Although memory unprotection is logically a promotion of PTE
permissions, and therefore should not require a TLB flush, the current
userrfaultfd code might actually cause a demotion of the architectural PTE
permission: when userfaultfd_writeprotect() unprotects memory region, it
unintentionally *clears* the RW-bit if it was already set. Note that this
unprotecting a PTE that is not write-protected is a valid use-case: the
userfaultfd monitor might ask to unprotect a region that holds both
write-protected and write-unprotected PTEs.
The scenario that happens in selftests/vm/userfaultfd is as follows:
cpu0 cpu1 cpu2
---- ---- ----
[ Writable PTE
cached in TLB ]
userfaultfd_writeprotect()
[ write-*unprotect* ]
mwriteprotect_range()
mmap_read_lock()
change_protection()
change_protection_range()
...
change_pte_range()
[ *clear* “write”-bit ]
[ defer TLB flushes ]
[ page-fault ]
...
wp_page_copy()
cow_user_page()
[ copy page ]
[ write to old
page ]
...
set_pte_at_notify()
A similar scenario can happen:
cpu0 cpu1 cpu2 cpu3
---- ---- ---- ----
[ Writable PTE
cached in TLB ]
userfaultfd_writeprotect()
[ write-protect ]
[ deferred TLB flush ]
userfaultfd_writeprotect()
[ write-unprotect ]
[ deferred TLB flush]
[ page-fault ]
wp_page_copy()
cow_user_page()
[ copy page ]
... [ write to page ]
set_pte_at_notify()
This race exists since commit 292924b26024 ("userfaultfd: wp: apply
_PAGE_UFFD_WP bit"). Yet, as Yu Zhao pointed, these races became apparent
since commit 09854ba94c6a ("mm: do_wp_page() simplification") which made
wp_page_copy() more likely to take place, specifically if page_count(page)
> 1.
To resolve the aforementioned races, check whether there are pending
flushes on uffd-write-protected VMAs, and if there are, perform a flush
before doing the COW.
Further optimizations will follow to avoid during uffd-write-unprotect
unnecassary PTE write-protection and TLB flushes.
Link: https://lkml.kernel.org/r/20210304095423.3825684-1-namit@vmware.com
Fixes: 09854ba94c6a ("mm: do_wp_page() simplification")
Signed-off-by: Nadav Amit <namit(a)vmware.com>
Suggested-by: Yu Zhao <yuzhao(a)google.com>
Reviewed-by: Peter Xu <peterx(a)redhat.com>
Tested-by: Peter Xu <peterx(a)redhat.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: Pavel Emelyanov <xemul(a)openvz.org>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Mike Rapoport <rppt(a)linux.vnet.ibm.com>
Cc: Minchan Kim <minchan(a)kernel.org>
Cc: Will Deacon <will(a)kernel.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: <stable(a)vger.kernel.org> [5.9+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory.c | 8 ++++++++
1 file changed, 8 insertions(+)
--- a/mm/memory.c~mm-userfaultfd-fix-memory-corruption-due-to-writeprotect
+++ a/mm/memory.c
@@ -3103,6 +3103,14 @@ static vm_fault_t do_wp_page(struct vm_f
return handle_userfault(vmf, VM_UFFD_WP);
}
+ /*
+ * Userfaultfd write-protect can defer flushes. Ensure the TLB
+ * is flushed in this case before copying.
+ */
+ if (unlikely(userfaultfd_wp(vmf->vma) &&
+ mm_tlb_flush_pending(vmf->vma->vm_mm)))
+ flush_tlb_page(vmf->vma, vmf->address);
+
vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte);
if (!vmf->page) {
/*
_
Patches currently in -mm which might be from namit(a)vmware.com are
mm-userfaultfd-fix-memory-corruption-due-to-writeprotect.patch
From: YunQiang Su <yunqiang.su(a)cipunited.com>
The MIPS FPU may have 3 mode:
FR=0: MIPS I style, all of the FPR are single.
FR=1: all 32 FPR can be double.
FRE: redirecting the rw of odd-FPR to the upper 32bit of even-double FPR.
The binary may have 3 mode:
FP32: can only work with FR=0 and FRE mode
FPXX: can work with all of FR=0/FR=1/FRE mode.
FP64: can only work with FR=1 mode
Some binary, for example the output of golang, may be mark as FPXX,
while in fact they are FP32. It is caused by the bug of design and linker:
Object produced by pure Go has no FP annotation while in fact they are FP32;
if we link them with the C module which marked as FPXX,
the result will be marked as FPXX. If these fake-FPXX binaries is executed
in FR=1 mode, some problem will happen.
In Golang, now we add the FP32 annotation, so the future golang programs
won't have this problem. While for the existing binaries, we need a
kernel workaround.
Currently, FR=1 mode is used for all FPXX binary, it makes some wrong
behivour of the binaries. Since FPXX binary can work with both FR=1 and FR=0,
we force it to use FR=0 or FRE (for R6 CPU).
Reference:
https://web.archive.org/web/20180828210612/https://dmz-portal.mips.com/wiki…https://go-review.googlesource.com/c/go/+/239217https://go-review.googlesource.com/c/go/+/237058
Signed-off-by: YunQiang Su <yunqiang.su(a)cipunited.com>
Cc: stable(a)vger.kernel.org # 4.19+
v6->v7:
Use FRE mode for pre-R6 binaries on R6 CPU.
v5->v6:
Rollback to V3, aka remove config option.
v4->v5:
Fix CONFIG_MIPS_O32_FPXX_USE_FR0 usage: if -> ifdef
v3->v4:
introduce a config option: CONFIG_MIPS_O32_FPXX_USE_FR0
v2->v3:
commit message: add Signed-off-by and Cc to stable.
v1->v2:
Fix bad commit message: in fact, we are switching to FR=0
---
arch/mips/kernel/elf.c | 20 +++++++++++++-------
1 file changed, 13 insertions(+), 7 deletions(-)
diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c
index 7b045d2a0b51..4d4db619544b 100644
--- a/arch/mips/kernel/elf.c
+++ b/arch/mips/kernel/elf.c
@@ -232,11 +232,16 @@ int arch_check_elf(void *_ehdr, bool has_interpreter, void *_interp_ehdr,
* that inherently require the hybrid FP mode.
* - If FR1 and FRDEFAULT is true, that means we hit the any-abi or
* fpxx case. This is because, in any-ABI (or no-ABI) we have no FPU
- * instructions so we don't care about the mode. We will simply use
- * the one preferred by the hardware. In fpxx case, that ABI can
- * handle both FR=1 and FR=0, so, again, we simply choose the one
- * preferred by the hardware. Next, if we only use single-precision
- * FPU instructions, and the default ABI FPU mode is not good
+ * instructions so we don't care about the mode.
+ * In fpxx case, that ABI can handle all of FR=1/FR=0/FRE mode.
+ * Here, we need to use FR=0/FRE mode instead of FR=1, because some binaries
+ * may be mark as FPXX by mistake due to bugs of design and linker:
+ * The object produced by pure Go has no FP annotation,
+ * then is treated as any-ABI by linker, although in fact they are FP32;
+ * if any-ABI object is linked with FPXX object, the result will be mark as FPXX.
+ * Then the problem happens: run FP32 binaries in FR=1 mode.
+ * - If we only use single-precision FPU instructions,
+ * and the default ABI FPU mode is not good
* (ie single + any ABI combination), we set again the FPU mode to the
* one is preferred by the hardware. Next, if we know that the code
* will only use single-precision instructions, shown by single being
@@ -248,8 +253,9 @@ int arch_check_elf(void *_ehdr, bool has_interpreter, void *_interp_ehdr,
*/
if (prog_req.fre && !prog_req.frdefault && !prog_req.fr1)
state->overall_fp_mode = FP_FRE;
- else if ((prog_req.fr1 && prog_req.frdefault) ||
- (prog_req.single && !prog_req.frdefault))
+ else if (prog_req.fr1 && prog_req.frdefault)
+ state->overall_fp_mode = cpu_has_mips_r6 ? FP_FRE : FP_FR0;
+ else if (prog_req.single && !prog_req.frdefault)
/* Make sure 64-bit MIPS III/IV/64R1 will not pick FR1 */
state->overall_fp_mode = ((raw_current_cpu_data.fpu_id & MIPS_FPIR_F64) &&
cpu_has_mips_r2_r6) ?
--
2.20.1
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 6d5e7af1f6f5924de5dd1ebe97675c2363100878 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org>
Date: Fri, 15 Jan 2021 16:25:59 +0000
Subject: [PATCH] soundwire: debugfs: use controller id instead of link_id
link_id can be zero and if we have multiple controller instances
in a system like Qualcomm debugfs will end-up with duplicate namespace
resulting in incorrect debugfs entries.
Using id should give a unique debugfs directory entry and should fix below
warning too.
"debugfs: Directory 'master-0' with parent 'soundwire' already present!"
Fixes: bf03473d5bcc ("soundwire: add debugfs support")
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org>
Link: https://lore.kernel.org/r/20210115162559.20869-1-srinivas.kandagatla@linaro…
Signed-off-by: Vinod Koul <vkoul(a)kernel.org>
diff --git a/drivers/soundwire/debugfs.c b/drivers/soundwire/debugfs.c
index b6cad0d59b7b..5f9efa42bb25 100644
--- a/drivers/soundwire/debugfs.c
+++ b/drivers/soundwire/debugfs.c
@@ -19,7 +19,7 @@ void sdw_bus_debugfs_init(struct sdw_bus *bus)
return;
/* create the debugfs master-N */
- snprintf(name, sizeof(name), "master-%d", bus->link_id);
+ snprintf(name, sizeof(name), "master-%d", bus->id);
bus->debugfs = debugfs_create_dir(name, sdw_debugfs_root);
}
From: Sergey Senozhatsky <senozhatsky(a)chromium.org>
commit ea86f3defd55f141a44146e66cbf8ffb683d60da upstream.
We observed that some of virtio_gpu_object_shmem_init() allocations
can be rather costly - order 6 - which can be difficult to fulfill
under memory pressure conditions. Switch to kvmalloc_array() in
virtio_gpu_object_shmem_init() and let the kernel vmalloc the entries
array.
the original patch applied to a different file, this backport moved this
change to the proper place for 4.14/4.19/5.4.
Signed-off-by: Sergey Senozhatsky <senozhatsky(a)chromium.org>
Link: http://patchwork.freedesktop.org/patch/msgid/20201105014744.1662226-1-senoz…
Cc: stable(a)vger.kernel.org [4.14, 4.19, 5.4]
Signed-off-by: Gerd Hoffmann <kraxel(a)redhat.com>
Signed-off-by: Doug Horn <doughorn(a)google.com>
---
drivers/gpu/drm/virtio/virtgpu_vq.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c
index a956c73ea85e..374279ba1444 100644
--- a/drivers/gpu/drm/virtio/virtgpu_vq.c
+++ b/drivers/gpu/drm/virtio/virtgpu_vq.c
@@ -865,9 +865,9 @@ int virtio_gpu_object_attach(struct virtio_gpu_device *vgdev,
}
/* gets freed when the ring has consumed it */
- ents = kmalloc_array(obj->pages->nents,
- sizeof(struct virtio_gpu_mem_entry),
- GFP_KERNEL);
+ ents = kvmalloc_array(obj->pages->nents,
+ sizeof(struct virtio_gpu_mem_entry),
+ GFP_KERNEL);
if (!ents) {
DRM_ERROR("failed to allocate ent list\n");
return -ENOMEM;
--
2.30.1.766.gb4fecdf3b7-goog
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From e0371298ddc51761be257698554ea507ac8bf831 Mon Sep 17 00:00:00 2001
From: Roja Rani Yarubandi <rojay(a)codeaurora.org>
Date: Fri, 8 Jan 2021 20:35:45 +0530
Subject: [PATCH] i2c: i2c-qcom-geni: Add shutdown callback for i2c
If the hardware is still accessing memory after SMMU translation
is disabled (as part of smmu shutdown callback), then the
IOVAs (I/O virtual address) which it was using will go on the bus
as the physical addresses which will result in unknown crashes
like NoC/interconnect errors.
So, implement shutdown callback to i2c driver to stop on-going transfer
and unmap DMA mappings during system "reboot" or "shutdown".
Fixes: 37692de5d523 ("i2c: i2c-qcom-geni: Add bus driver for the Qualcomm GENI I2C controller")
Signed-off-by: Roja Rani Yarubandi <rojay(a)codeaurora.org>
Reviewed-by: Akash Asthana <akashast(a)codeaurora.org>
Signed-off-by: Wolfram Sang <wsa(a)kernel.org>
diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c
index 214b4c913a13..c3f584795911 100644
--- a/drivers/i2c/busses/i2c-qcom-geni.c
+++ b/drivers/i2c/busses/i2c-qcom-geni.c
@@ -375,6 +375,32 @@ static void geni_i2c_tx_msg_cleanup(struct geni_i2c_dev *gi2c,
}
}
+static void geni_i2c_stop_xfer(struct geni_i2c_dev *gi2c)
+{
+ int ret;
+ u32 geni_status;
+ struct i2c_msg *cur;
+
+ /* Resume device, as runtime suspend can happen anytime during transfer */
+ ret = pm_runtime_get_sync(gi2c->se.dev);
+ if (ret < 0) {
+ dev_err(gi2c->se.dev, "Failed to resume device: %d\n", ret);
+ return;
+ }
+
+ geni_status = readl_relaxed(gi2c->se.base + SE_GENI_STATUS);
+ if (geni_status & M_GENI_CMD_ACTIVE) {
+ cur = gi2c->cur;
+ geni_i2c_abort_xfer(gi2c);
+ if (cur->flags & I2C_M_RD)
+ geni_i2c_rx_msg_cleanup(gi2c, cur);
+ else
+ geni_i2c_tx_msg_cleanup(gi2c, cur);
+ }
+
+ pm_runtime_put_sync_suspend(gi2c->se.dev);
+}
+
static int geni_i2c_rx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg,
u32 m_param)
{
@@ -650,6 +676,13 @@ static int geni_i2c_remove(struct platform_device *pdev)
return 0;
}
+static void geni_i2c_shutdown(struct platform_device *pdev)
+{
+ struct geni_i2c_dev *gi2c = platform_get_drvdata(pdev);
+
+ geni_i2c_stop_xfer(gi2c);
+}
+
static int __maybe_unused geni_i2c_runtime_suspend(struct device *dev)
{
int ret;
@@ -714,6 +747,7 @@ MODULE_DEVICE_TABLE(of, geni_i2c_dt_match);
static struct platform_driver geni_i2c_driver = {
.probe = geni_i2c_probe,
.remove = geni_i2c_remove,
+ .shutdown = geni_i2c_shutdown,
.driver = {
.name = "geni_i2c",
.pm = &geni_i2c_pm_ops,
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From d87903b63e3ce1eafaa701aec5cc1d0ecd0d84dc Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni(a)redhat.com>
Date: Fri, 19 Feb 2021 18:35:38 +0100
Subject: [PATCH] mptcp: fix DATA_FIN generation on early shutdown
If the msk is closed before sending or receiving any data,
no DATA_FIN is generated, instead an MPC ack packet is
crafted out.
In the above scenario, the MPTCP protocol creates and sends a
pure ack and such packets matches also the criteria for an
MPC ack and the protocol tries first to insert MPC options,
leading to the described error.
This change addresses the issue by avoiding the insertion of an
MPC option for DATA_FIN packets or if the sub-flow is not
established.
To avoid doing multiple times the same test, fetch the data_fin
flag in a bool variable and pass it to both the interested
helpers.
Fixes: 6d0060f600ad ("mptcp: Write MPTCP DSS headers to outgoing data packets")
Reviewed-by: Mat Martineau <mathew.j.martineau(a)linux.intel.com>
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index b63574d6b812..444a38681e93 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -411,6 +411,7 @@ static void clear_3rdack_retransmission(struct sock *sk)
}
static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
+ bool snd_data_fin_enable,
unsigned int *size,
unsigned int remaining,
struct mptcp_out_options *opts)
@@ -428,9 +429,10 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
if (!skb)
return false;
- /* MPC/MPJ needed only on 3rd ack packet */
- if (subflow->fully_established ||
- subflow->snd_isn != TCP_SKB_CB(skb)->seq)
+ /* MPC/MPJ needed only on 3rd ack packet, DATA_FIN and TCP shutdown take precedence */
+ if (subflow->fully_established || snd_data_fin_enable ||
+ subflow->snd_isn != TCP_SKB_CB(skb)->seq ||
+ sk->sk_state != TCP_ESTABLISHED)
return false;
if (subflow->mp_capable) {
@@ -502,20 +504,20 @@ static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow,
}
static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
+ bool snd_data_fin_enable,
unsigned int *size,
unsigned int remaining,
struct mptcp_out_options *opts)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
- u64 snd_data_fin_enable, ack_seq;
unsigned int dss_size = 0;
struct mptcp_ext *mpext;
unsigned int ack_size;
bool ret = false;
+ u64 ack_seq;
mpext = skb ? mptcp_get_ext(skb) : NULL;
- snd_data_fin_enable = mptcp_data_fin_enabled(msk);
if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) {
unsigned int map_size;
@@ -717,12 +719,15 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
unsigned int *size, unsigned int remaining,
struct mptcp_out_options *opts)
{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
unsigned int opt_size = 0;
+ bool snd_data_fin;
bool ret = false;
opts->suboptions = 0;
- if (unlikely(mptcp_check_fallback(sk)))
+ if (unlikely(__mptcp_check_fallback(msk)))
return false;
/* prevent adding of any MPTCP related options on reset packet
@@ -731,10 +736,10 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST))
return false;
- if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts))
+ snd_data_fin = mptcp_data_fin_enabled(msk);
+ if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts))
ret = true;
- else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining,
- opts))
+ else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts))
ret = true;
/* we reserved enough space for the above options, and exceeding the
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 07956b6269d3ed05d854233d5bb776dca91751dd Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson(a)redhat.com>
Date: Tue, 16 Feb 2021 15:49:34 -0700
Subject: [PATCH] vfio/type1: Use follow_pte()
follow_pfn() doesn't make sure that we're using the correct page
protections, get the pte with follow_pte() so that we can test
protections and get the pfn from the pte.
Fixes: 5cbf3264bc71 ("vfio/type1: Fix VA->PA translation for PFNMAP VMAs in vaddr_get_pfn()")
Reviewed-by: Jason Gunthorpe <jgg(a)nvidia.com>
Reviewed-by: Cornelia Huck <cohuck(a)redhat.com>
Reviewed-by: Peter Xu <peterx(a)redhat.com>
Signed-off-by: Alex Williamson <alex.williamson(a)redhat.com>
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index b3df383d7028..ed03f3fcb07e 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -24,6 +24,7 @@
#include <linux/compat.h>
#include <linux/device.h>
#include <linux/fs.h>
+#include <linux/highmem.h>
#include <linux/iommu.h>
#include <linux/module.h>
#include <linux/mm.h>
@@ -462,9 +463,11 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
unsigned long vaddr, unsigned long *pfn,
bool write_fault)
{
+ pte_t *ptep;
+ spinlock_t *ptl;
int ret;
- ret = follow_pfn(vma, vaddr, pfn);
+ ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl);
if (ret) {
bool unlocked = false;
@@ -478,9 +481,17 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
if (ret)
return ret;
- ret = follow_pfn(vma, vaddr, pfn);
+ ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl);
+ if (ret)
+ return ret;
}
+ if (write_fault && !pte_write(*ptep))
+ ret = -EFAULT;
+ else
+ *pfn = pte_pfn(*ptep);
+
+ pte_unmap_unlock(ptep, ptl);
return ret;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 07956b6269d3ed05d854233d5bb776dca91751dd Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson(a)redhat.com>
Date: Tue, 16 Feb 2021 15:49:34 -0700
Subject: [PATCH] vfio/type1: Use follow_pte()
follow_pfn() doesn't make sure that we're using the correct page
protections, get the pte with follow_pte() so that we can test
protections and get the pfn from the pte.
Fixes: 5cbf3264bc71 ("vfio/type1: Fix VA->PA translation for PFNMAP VMAs in vaddr_get_pfn()")
Reviewed-by: Jason Gunthorpe <jgg(a)nvidia.com>
Reviewed-by: Cornelia Huck <cohuck(a)redhat.com>
Reviewed-by: Peter Xu <peterx(a)redhat.com>
Signed-off-by: Alex Williamson <alex.williamson(a)redhat.com>
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index b3df383d7028..ed03f3fcb07e 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -24,6 +24,7 @@
#include <linux/compat.h>
#include <linux/device.h>
#include <linux/fs.h>
+#include <linux/highmem.h>
#include <linux/iommu.h>
#include <linux/module.h>
#include <linux/mm.h>
@@ -462,9 +463,11 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
unsigned long vaddr, unsigned long *pfn,
bool write_fault)
{
+ pte_t *ptep;
+ spinlock_t *ptl;
int ret;
- ret = follow_pfn(vma, vaddr, pfn);
+ ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl);
if (ret) {
bool unlocked = false;
@@ -478,9 +481,17 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
if (ret)
return ret;
- ret = follow_pfn(vma, vaddr, pfn);
+ ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl);
+ if (ret)
+ return ret;
}
+ if (write_fault && !pte_write(*ptep))
+ ret = -EFAULT;
+ else
+ *pfn = pte_pfn(*ptep);
+
+ pte_unmap_unlock(ptep, ptl);
return ret;
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 07956b6269d3ed05d854233d5bb776dca91751dd Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson(a)redhat.com>
Date: Tue, 16 Feb 2021 15:49:34 -0700
Subject: [PATCH] vfio/type1: Use follow_pte()
follow_pfn() doesn't make sure that we're using the correct page
protections, get the pte with follow_pte() so that we can test
protections and get the pfn from the pte.
Fixes: 5cbf3264bc71 ("vfio/type1: Fix VA->PA translation for PFNMAP VMAs in vaddr_get_pfn()")
Reviewed-by: Jason Gunthorpe <jgg(a)nvidia.com>
Reviewed-by: Cornelia Huck <cohuck(a)redhat.com>
Reviewed-by: Peter Xu <peterx(a)redhat.com>
Signed-off-by: Alex Williamson <alex.williamson(a)redhat.com>
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index b3df383d7028..ed03f3fcb07e 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -24,6 +24,7 @@
#include <linux/compat.h>
#include <linux/device.h>
#include <linux/fs.h>
+#include <linux/highmem.h>
#include <linux/iommu.h>
#include <linux/module.h>
#include <linux/mm.h>
@@ -462,9 +463,11 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
unsigned long vaddr, unsigned long *pfn,
bool write_fault)
{
+ pte_t *ptep;
+ spinlock_t *ptl;
int ret;
- ret = follow_pfn(vma, vaddr, pfn);
+ ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl);
if (ret) {
bool unlocked = false;
@@ -478,9 +481,17 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
if (ret)
return ret;
- ret = follow_pfn(vma, vaddr, pfn);
+ ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl);
+ if (ret)
+ return ret;
}
+ if (write_fault && !pte_write(*ptep))
+ ret = -EFAULT;
+ else
+ *pfn = pte_pfn(*ptep);
+
+ pte_unmap_unlock(ptep, ptl);
return ret;
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 07956b6269d3ed05d854233d5bb776dca91751dd Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson(a)redhat.com>
Date: Tue, 16 Feb 2021 15:49:34 -0700
Subject: [PATCH] vfio/type1: Use follow_pte()
follow_pfn() doesn't make sure that we're using the correct page
protections, get the pte with follow_pte() so that we can test
protections and get the pfn from the pte.
Fixes: 5cbf3264bc71 ("vfio/type1: Fix VA->PA translation for PFNMAP VMAs in vaddr_get_pfn()")
Reviewed-by: Jason Gunthorpe <jgg(a)nvidia.com>
Reviewed-by: Cornelia Huck <cohuck(a)redhat.com>
Reviewed-by: Peter Xu <peterx(a)redhat.com>
Signed-off-by: Alex Williamson <alex.williamson(a)redhat.com>
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index b3df383d7028..ed03f3fcb07e 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -24,6 +24,7 @@
#include <linux/compat.h>
#include <linux/device.h>
#include <linux/fs.h>
+#include <linux/highmem.h>
#include <linux/iommu.h>
#include <linux/module.h>
#include <linux/mm.h>
@@ -462,9 +463,11 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
unsigned long vaddr, unsigned long *pfn,
bool write_fault)
{
+ pte_t *ptep;
+ spinlock_t *ptl;
int ret;
- ret = follow_pfn(vma, vaddr, pfn);
+ ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl);
if (ret) {
bool unlocked = false;
@@ -478,9 +481,17 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
if (ret)
return ret;
- ret = follow_pfn(vma, vaddr, pfn);
+ ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl);
+ if (ret)
+ return ret;
}
+ if (write_fault && !pte_write(*ptep))
+ ret = -EFAULT;
+ else
+ *pfn = pte_pfn(*ptep);
+
+ pte_unmap_unlock(ptep, ptl);
return ret;
}
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 07956b6269d3ed05d854233d5bb776dca91751dd Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson(a)redhat.com>
Date: Tue, 16 Feb 2021 15:49:34 -0700
Subject: [PATCH] vfio/type1: Use follow_pte()
follow_pfn() doesn't make sure that we're using the correct page
protections, get the pte with follow_pte() so that we can test
protections and get the pfn from the pte.
Fixes: 5cbf3264bc71 ("vfio/type1: Fix VA->PA translation for PFNMAP VMAs in vaddr_get_pfn()")
Reviewed-by: Jason Gunthorpe <jgg(a)nvidia.com>
Reviewed-by: Cornelia Huck <cohuck(a)redhat.com>
Reviewed-by: Peter Xu <peterx(a)redhat.com>
Signed-off-by: Alex Williamson <alex.williamson(a)redhat.com>
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index b3df383d7028..ed03f3fcb07e 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -24,6 +24,7 @@
#include <linux/compat.h>
#include <linux/device.h>
#include <linux/fs.h>
+#include <linux/highmem.h>
#include <linux/iommu.h>
#include <linux/module.h>
#include <linux/mm.h>
@@ -462,9 +463,11 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
unsigned long vaddr, unsigned long *pfn,
bool write_fault)
{
+ pte_t *ptep;
+ spinlock_t *ptl;
int ret;
- ret = follow_pfn(vma, vaddr, pfn);
+ ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl);
if (ret) {
bool unlocked = false;
@@ -478,9 +481,17 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
if (ret)
return ret;
- ret = follow_pfn(vma, vaddr, pfn);
+ ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl);
+ if (ret)
+ return ret;
}
+ if (write_fault && !pte_write(*ptep))
+ ret = -EFAULT;
+ else
+ *pfn = pte_pfn(*ptep);
+
+ pte_unmap_unlock(ptep, ptl);
return ret;
}
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 07956b6269d3ed05d854233d5bb776dca91751dd Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson(a)redhat.com>
Date: Tue, 16 Feb 2021 15:49:34 -0700
Subject: [PATCH] vfio/type1: Use follow_pte()
follow_pfn() doesn't make sure that we're using the correct page
protections, get the pte with follow_pte() so that we can test
protections and get the pfn from the pte.
Fixes: 5cbf3264bc71 ("vfio/type1: Fix VA->PA translation for PFNMAP VMAs in vaddr_get_pfn()")
Reviewed-by: Jason Gunthorpe <jgg(a)nvidia.com>
Reviewed-by: Cornelia Huck <cohuck(a)redhat.com>
Reviewed-by: Peter Xu <peterx(a)redhat.com>
Signed-off-by: Alex Williamson <alex.williamson(a)redhat.com>
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index b3df383d7028..ed03f3fcb07e 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -24,6 +24,7 @@
#include <linux/compat.h>
#include <linux/device.h>
#include <linux/fs.h>
+#include <linux/highmem.h>
#include <linux/iommu.h>
#include <linux/module.h>
#include <linux/mm.h>
@@ -462,9 +463,11 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
unsigned long vaddr, unsigned long *pfn,
bool write_fault)
{
+ pte_t *ptep;
+ spinlock_t *ptl;
int ret;
- ret = follow_pfn(vma, vaddr, pfn);
+ ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl);
if (ret) {
bool unlocked = false;
@@ -478,9 +481,17 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
if (ret)
return ret;
- ret = follow_pfn(vma, vaddr, pfn);
+ ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl);
+ if (ret)
+ return ret;
}
+ if (write_fault && !pte_write(*ptep))
+ ret = -EFAULT;
+ else
+ *pfn = pte_pfn(*ptep);
+
+ pte_unmap_unlock(ptep, ptl);
return ret;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From ea354b6ddd6f09be29424f41fa75a3e637fea234 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter(a)oracle.com>
Date: Thu, 21 Jan 2021 07:44:00 +0100
Subject: [PATCH] media: zr364xx: fix memory leaks in probe()
Syzbot discovered that the probe error handling doesn't clean up the
resources allocated in zr364xx_board_init(). There are several
related bugs in this code so I have re-written the error handling.
1) Introduce a new function zr364xx_board_uninit() which cleans up
the resources in zr364xx_board_init().
2) In zr364xx_board_init() if the call to zr364xx_start_readpipe()
fails then release the "cam->buffer.frame[i].lpvbits" memory
before returning. This way every function either allocates
everything successfully or it cleans up after itself.
3) Re-write the probe function so that each failure path goto frees
the most recent allocation. That way we don't free anything
before it has been allocated and we can also verify that
everything is freed.
4) Originally, in the probe function the "cam->v4l2_dev.release"
pointer was set to "zr364xx_release" near the start but I moved
that assignment to the end, after everything had succeeded. The
release function was never actually called during the probe cleanup
process, but with this change I wanted to make it clear that we
don't want to call zr364xx_release() until everything is
allocated successfully.
Next I re-wrote the zr364xx_release() function. Ideally this would
have been a simple matter of copy and pasting the cleanup code from
probe and adding an additional call to video_unregister_device(). But
there are a couple quirks to note.
1) The probe function does not call videobuf_mmap_free() and I don't
know where the videobuf_mmap is allocated. I left the code as-is to
avoid introducing a bug in code I don't understand.
2) The zr364xx_board_uninit() has a call to zr364xx_stop_readpipe()
which is a change from the original behavior with regards to
unloading the driver. Calling zr364xx_stop_readpipe() on a stopped
pipe is not a problem so this is safe and is potentially a bugfix.
Reported-by: syzbot+b4d54814b339b5c6bbd4(a)syzkaller.appspotmail.com
Signed-off-by: Dan Carpenter <dan.carpenter(a)oracle.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei(a)kernel.org>
diff --git a/drivers/media/usb/zr364xx/zr364xx.c b/drivers/media/usb/zr364xx/zr364xx.c
index 1e1c6b4d1874..d29b861367ea 100644
--- a/drivers/media/usb/zr364xx/zr364xx.c
+++ b/drivers/media/usb/zr364xx/zr364xx.c
@@ -1181,15 +1181,11 @@ static int zr364xx_open(struct file *file)
return err;
}
-static void zr364xx_release(struct v4l2_device *v4l2_dev)
+static void zr364xx_board_uninit(struct zr364xx_camera *cam)
{
- struct zr364xx_camera *cam =
- container_of(v4l2_dev, struct zr364xx_camera, v4l2_dev);
unsigned long i;
- v4l2_device_unregister(&cam->v4l2_dev);
-
- videobuf_mmap_free(&cam->vb_vidq);
+ zr364xx_stop_readpipe(cam);
/* release sys buffers */
for (i = 0; i < FRAMES; i++) {
@@ -1200,9 +1196,19 @@ static void zr364xx_release(struct v4l2_device *v4l2_dev)
cam->buffer.frame[i].lpvbits = NULL;
}
- v4l2_ctrl_handler_free(&cam->ctrl_handler);
/* release transfer buffer */
kfree(cam->pipe->transfer_buffer);
+}
+
+static void zr364xx_release(struct v4l2_device *v4l2_dev)
+{
+ struct zr364xx_camera *cam =
+ container_of(v4l2_dev, struct zr364xx_camera, v4l2_dev);
+
+ videobuf_mmap_free(&cam->vb_vidq);
+ v4l2_ctrl_handler_free(&cam->ctrl_handler);
+ zr364xx_board_uninit(cam);
+ v4l2_device_unregister(&cam->v4l2_dev);
kfree(cam);
}
@@ -1376,11 +1382,14 @@ static int zr364xx_board_init(struct zr364xx_camera *cam)
/* start read pipe */
err = zr364xx_start_readpipe(cam);
if (err)
- goto err_free;
+ goto err_free_frames;
DBG(": board initialized\n");
return 0;
+err_free_frames:
+ for (i = 0; i < FRAMES; i++)
+ vfree(cam->buffer.frame[i].lpvbits);
err_free:
kfree(cam->pipe->transfer_buffer);
cam->pipe->transfer_buffer = NULL;
@@ -1409,12 +1418,10 @@ static int zr364xx_probe(struct usb_interface *intf,
if (!cam)
return -ENOMEM;
- cam->v4l2_dev.release = zr364xx_release;
err = v4l2_device_register(&intf->dev, &cam->v4l2_dev);
if (err < 0) {
dev_err(&udev->dev, "couldn't register v4l2_device\n");
- kfree(cam);
- return err;
+ goto free_cam;
}
hdl = &cam->ctrl_handler;
v4l2_ctrl_handler_init(hdl, 1);
@@ -1423,7 +1430,7 @@ static int zr364xx_probe(struct usb_interface *intf,
if (hdl->error) {
err = hdl->error;
dev_err(&udev->dev, "couldn't register control\n");
- goto fail;
+ goto unregister;
}
/* save the init method used by this camera */
cam->method = id->driver_info;
@@ -1496,7 +1503,7 @@ static int zr364xx_probe(struct usb_interface *intf,
if (!cam->read_endpoint) {
err = -ENOMEM;
dev_err(&intf->dev, "Could not find bulk-in endpoint\n");
- goto fail;
+ goto unregister;
}
/* v4l */
@@ -1507,10 +1514,11 @@ static int zr364xx_probe(struct usb_interface *intf,
/* load zr364xx board specific */
err = zr364xx_board_init(cam);
- if (!err)
- err = v4l2_ctrl_handler_setup(hdl);
if (err)
- goto fail;
+ goto unregister;
+ err = v4l2_ctrl_handler_setup(hdl);
+ if (err)
+ goto board_uninit;
spin_lock_init(&cam->slock);
@@ -1525,16 +1533,21 @@ static int zr364xx_probe(struct usb_interface *intf,
err = video_register_device(&cam->vdev, VFL_TYPE_VIDEO, -1);
if (err) {
dev_err(&udev->dev, "video_register_device failed\n");
- goto fail;
+ goto free_handler;
}
+ cam->v4l2_dev.release = zr364xx_release;
dev_info(&udev->dev, DRIVER_DESC " controlling device %s\n",
video_device_node_name(&cam->vdev));
return 0;
-fail:
+free_handler:
v4l2_ctrl_handler_free(hdl);
+board_uninit:
+ zr364xx_board_uninit(cam);
+unregister:
v4l2_device_unregister(&cam->v4l2_dev);
+free_cam:
kfree(cam);
return err;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From ea354b6ddd6f09be29424f41fa75a3e637fea234 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter(a)oracle.com>
Date: Thu, 21 Jan 2021 07:44:00 +0100
Subject: [PATCH] media: zr364xx: fix memory leaks in probe()
Syzbot discovered that the probe error handling doesn't clean up the
resources allocated in zr364xx_board_init(). There are several
related bugs in this code so I have re-written the error handling.
1) Introduce a new function zr364xx_board_uninit() which cleans up
the resources in zr364xx_board_init().
2) In zr364xx_board_init() if the call to zr364xx_start_readpipe()
fails then release the "cam->buffer.frame[i].lpvbits" memory
before returning. This way every function either allocates
everything successfully or it cleans up after itself.
3) Re-write the probe function so that each failure path goto frees
the most recent allocation. That way we don't free anything
before it has been allocated and we can also verify that
everything is freed.
4) Originally, in the probe function the "cam->v4l2_dev.release"
pointer was set to "zr364xx_release" near the start but I moved
that assignment to the end, after everything had succeeded. The
release function was never actually called during the probe cleanup
process, but with this change I wanted to make it clear that we
don't want to call zr364xx_release() until everything is
allocated successfully.
Next I re-wrote the zr364xx_release() function. Ideally this would
have been a simple matter of copy and pasting the cleanup code from
probe and adding an additional call to video_unregister_device(). But
there are a couple quirks to note.
1) The probe function does not call videobuf_mmap_free() and I don't
know where the videobuf_mmap is allocated. I left the code as-is to
avoid introducing a bug in code I don't understand.
2) The zr364xx_board_uninit() has a call to zr364xx_stop_readpipe()
which is a change from the original behavior with regards to
unloading the driver. Calling zr364xx_stop_readpipe() on a stopped
pipe is not a problem so this is safe and is potentially a bugfix.
Reported-by: syzbot+b4d54814b339b5c6bbd4(a)syzkaller.appspotmail.com
Signed-off-by: Dan Carpenter <dan.carpenter(a)oracle.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei(a)kernel.org>
diff --git a/drivers/media/usb/zr364xx/zr364xx.c b/drivers/media/usb/zr364xx/zr364xx.c
index 1e1c6b4d1874..d29b861367ea 100644
--- a/drivers/media/usb/zr364xx/zr364xx.c
+++ b/drivers/media/usb/zr364xx/zr364xx.c
@@ -1181,15 +1181,11 @@ static int zr364xx_open(struct file *file)
return err;
}
-static void zr364xx_release(struct v4l2_device *v4l2_dev)
+static void zr364xx_board_uninit(struct zr364xx_camera *cam)
{
- struct zr364xx_camera *cam =
- container_of(v4l2_dev, struct zr364xx_camera, v4l2_dev);
unsigned long i;
- v4l2_device_unregister(&cam->v4l2_dev);
-
- videobuf_mmap_free(&cam->vb_vidq);
+ zr364xx_stop_readpipe(cam);
/* release sys buffers */
for (i = 0; i < FRAMES; i++) {
@@ -1200,9 +1196,19 @@ static void zr364xx_release(struct v4l2_device *v4l2_dev)
cam->buffer.frame[i].lpvbits = NULL;
}
- v4l2_ctrl_handler_free(&cam->ctrl_handler);
/* release transfer buffer */
kfree(cam->pipe->transfer_buffer);
+}
+
+static void zr364xx_release(struct v4l2_device *v4l2_dev)
+{
+ struct zr364xx_camera *cam =
+ container_of(v4l2_dev, struct zr364xx_camera, v4l2_dev);
+
+ videobuf_mmap_free(&cam->vb_vidq);
+ v4l2_ctrl_handler_free(&cam->ctrl_handler);
+ zr364xx_board_uninit(cam);
+ v4l2_device_unregister(&cam->v4l2_dev);
kfree(cam);
}
@@ -1376,11 +1382,14 @@ static int zr364xx_board_init(struct zr364xx_camera *cam)
/* start read pipe */
err = zr364xx_start_readpipe(cam);
if (err)
- goto err_free;
+ goto err_free_frames;
DBG(": board initialized\n");
return 0;
+err_free_frames:
+ for (i = 0; i < FRAMES; i++)
+ vfree(cam->buffer.frame[i].lpvbits);
err_free:
kfree(cam->pipe->transfer_buffer);
cam->pipe->transfer_buffer = NULL;
@@ -1409,12 +1418,10 @@ static int zr364xx_probe(struct usb_interface *intf,
if (!cam)
return -ENOMEM;
- cam->v4l2_dev.release = zr364xx_release;
err = v4l2_device_register(&intf->dev, &cam->v4l2_dev);
if (err < 0) {
dev_err(&udev->dev, "couldn't register v4l2_device\n");
- kfree(cam);
- return err;
+ goto free_cam;
}
hdl = &cam->ctrl_handler;
v4l2_ctrl_handler_init(hdl, 1);
@@ -1423,7 +1430,7 @@ static int zr364xx_probe(struct usb_interface *intf,
if (hdl->error) {
err = hdl->error;
dev_err(&udev->dev, "couldn't register control\n");
- goto fail;
+ goto unregister;
}
/* save the init method used by this camera */
cam->method = id->driver_info;
@@ -1496,7 +1503,7 @@ static int zr364xx_probe(struct usb_interface *intf,
if (!cam->read_endpoint) {
err = -ENOMEM;
dev_err(&intf->dev, "Could not find bulk-in endpoint\n");
- goto fail;
+ goto unregister;
}
/* v4l */
@@ -1507,10 +1514,11 @@ static int zr364xx_probe(struct usb_interface *intf,
/* load zr364xx board specific */
err = zr364xx_board_init(cam);
- if (!err)
- err = v4l2_ctrl_handler_setup(hdl);
if (err)
- goto fail;
+ goto unregister;
+ err = v4l2_ctrl_handler_setup(hdl);
+ if (err)
+ goto board_uninit;
spin_lock_init(&cam->slock);
@@ -1525,16 +1533,21 @@ static int zr364xx_probe(struct usb_interface *intf,
err = video_register_device(&cam->vdev, VFL_TYPE_VIDEO, -1);
if (err) {
dev_err(&udev->dev, "video_register_device failed\n");
- goto fail;
+ goto free_handler;
}
+ cam->v4l2_dev.release = zr364xx_release;
dev_info(&udev->dev, DRIVER_DESC " controlling device %s\n",
video_device_node_name(&cam->vdev));
return 0;
-fail:
+free_handler:
v4l2_ctrl_handler_free(hdl);
+board_uninit:
+ zr364xx_board_uninit(cam);
+unregister:
v4l2_device_unregister(&cam->v4l2_dev);
+free_cam:
kfree(cam);
return err;
}
From: "Steven Rostedt (VMware)" <rostedt(a)goodmis.org>
Part of the logic of the new time stamp code depends on the before_stamp and
the write_stamp to be different if the write_stamp does not match the last
event on the buffer, as it will be used to calculate the delta of the next
event written on the buffer.
The discard logic depends on this, as the next event to come in needs to
inject a full timestamp as it can not rely on the last event timestamp in
the buffer because it is unknown due to events after it being discarded. But
by changing the write_stamp back to the time before it, it forces the next
event to use a full time stamp, instead of relying on it.
The issue came when a full time stamp was used for the event, and
rb_time_delta() returns zero in that case. The update to the write_stamp
(which subtracts delta) made it not change. Then when the event is removed
from the buffer, because the before_stamp and write_stamp still match, the
next event written would calculate its delta from the write_stamp, but that
would be wrong as the write_stamp is of the time of the event that was
discarded.
In the case that the delta change being made to write_stamp is zero, set the
before_stamp to zero as well, and this will force the next event to inject a
full timestamp and not use the current write_stamp.
Cc: stable(a)vger.kernel.org
Fixes: a389d86f7fd09 ("ring-buffer: Have nested events still record running time stamp")
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
---
kernel/trace/ring_buffer.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index b9dad3500041..342f49c3ccc5 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2814,6 +2814,17 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
write_stamp, write_stamp - delta))
return 0;
+ /*
+ * It's possible that the event time delta is zero
+ * (has the same time stamp as the previous event)
+ * in which case write_stamp and before_stamp could
+ * be the same. In such a case, force before_stamp
+ * to be different than write_stamp. It doesn't
+ * matter what it is, as long as its different.
+ */
+ if (!delta)
+ rb_time_set(&cpu_buffer->before_stamp, 0);
+
/*
* If an event were to come in now, it would see that the
* write_stamp and the before_stamp are different, and assume
--
2.30.0
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 76d7fff22be3e4185ee5f9da2eecbd8188e76b2c Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan(a)kernel.org>
Date: Fri, 15 Jan 2021 12:26:22 -0700
Subject: [PATCH] MIPS: VDSO: Use CLANG_FLAGS instead of filtering out
'--target='
Commit ee67855ecd9d ("MIPS: vdso: Allow clang's --target flag in VDSO
cflags") allowed the '--target=' flag from the main Makefile to filter
through to the vDSO. However, it did not bring any of the other clang
specific flags for controlling the integrated assembler and the GNU
tools locations (--prefix=, --gcc-toolchain=, and -no-integrated-as).
Without these, we will get a warning (visible with tinyconfig):
arch/mips/vdso/elf.S:14:1: warning: DWARF2 only supports one section per
compilation unit
.pushsection .note.Linux, "a",@note ; .balign 4 ; .long 2f - 1f ; .long
4484f - 3f ; .long 0 ; 1:.asciz "Linux" ; 2:.balign 4 ; 3:
^
arch/mips/vdso/elf.S:34:2: warning: DWARF2 only supports one section per
compilation unit
.section .mips_abiflags, "a"
^
All of these flags are bundled up under CLANG_FLAGS in the main Makefile
and exported so that they can be added to Makefiles that set their own
CFLAGS. Use this value instead of filtering out '--target=' so there is
no warning and all of the tools are properly used.
Cc: stable(a)vger.kernel.org
Fixes: ee67855ecd9d ("MIPS: vdso: Allow clang's --target flag in VDSO cflags")
Link: https://github.com/ClangBuiltLinux/linux/issues/1256
Reported-by: Anders Roxell <anders.roxell(a)linaro.org>
Signed-off-by: Nathan Chancellor <natechancellor(a)gmail.com>
Tested-by: Anders Roxell <anders.roxell(a)linaro.org>
Signed-off-by: Thomas Bogendoerfer <tsbogend(a)alpha.franken.de>
diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile
index 5810cc12bc1d..2131d3fd7333 100644
--- a/arch/mips/vdso/Makefile
+++ b/arch/mips/vdso/Makefile
@@ -16,16 +16,13 @@ ccflags-vdso := \
$(filter -march=%,$(KBUILD_CFLAGS)) \
$(filter -m%-float,$(KBUILD_CFLAGS)) \
$(filter -mno-loongson-%,$(KBUILD_CFLAGS)) \
+ $(CLANG_FLAGS) \
-D__VDSO__
ifndef CONFIG_64BIT
ccflags-vdso += -DBUILD_VDSO32
endif
-ifdef CONFIG_CC_IS_CLANG
-ccflags-vdso += $(filter --target=%,$(KBUILD_CFLAGS))
-endif
-
#
# The -fno-jump-tables flag only prevents the compiler from generating
# jump tables but does not prevent the compiler from emitting absolute
The arm64 implementations of some atomic operations had incorrect
assembly constraints. Depending on the compiler version and
options, this can result in a build failure for some parameter
values:
/tmp/ccDOb5nB.s: Assembler messages:
/tmp/ccDOb5nB.s:2214: Error: immediate out of range at operand 3 -- `bic w1,w0,5'
This has specifically been seen when building a 4.9 stable kernel with
gcc 6.3.0, since commit 23025393dbeb "xen/netback: use lateeoi irq
binding" was applied. I can also reproduce it with 4.14.
I cannot reproduce it with 4.19, but the same fixes are applicable and
the issue presumably could occur when using different compiler
options.
I haven't done anything about the 4.4 branch since it does not have
that xen-netback fix and it has significantly different definitions
for arm64 atomic ops.
I've attached a mailbox of patches for each of the 4.9, 4.14, and 4.19
branches.
Ben.
--
Ben Hutchings
I'm always amazed by the number of people who take up solipsism because
they heard someone else explain it. - E*Borg on alt.fan.pratchett
commit f5c6d0fcf90ce07ee0d686d465b19b247ebd5ed7 upstream.
These plt* and .text.ftrace_trampoline sections specified for arm64 have
non-zero addressses. Non-zero section addresses in a relocatable ELF would
confuse GDB when it tries to compute the section offsets and it ends up
printing wrong symbol addresses. Therefore, set them to zero, which mirrors
the change in commit 5d8591bc0fba ("module: set ksymtab/kcrctab* section
addresses to 0x0").
Reported-by: Frank van der Linden <fllinden(a)amazon.com>
Signed-off-by: Shaoying Xu <shaoyi(a)amazon.com>
Cc: <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20210216183234.GA23876@amazon.com
Signed-off-by: Will Deacon <will(a)kernel.org>
[shaoyi(a)amazon.com: made same changes in arch/arm64/kernel/module.lds for 5.4]
Signed-off-by: Shaoying Xu <shaoyi(a)amazon.com>
---
arch/arm64/include/asm/module.lds.h was renamed from arch/arm64/kernel/module.lds
by commit 596b0474d3d9 ("kbuild: preprocess module linker script") since v5.10.
Therefore, made same changes in arch/arm64/kernel/module.lds for 5.4.
arch/arm64/kernel/module.lds | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds
index 22e36a21c113..09a0eef71d12 100644
--- a/arch/arm64/kernel/module.lds
+++ b/arch/arm64/kernel/module.lds
@@ -1,5 +1,5 @@
SECTIONS {
- .plt (NOLOAD) : { BYTE(0) }
- .init.plt (NOLOAD) : { BYTE(0) }
- .text.ftrace_trampoline (NOLOAD) : { BYTE(0) }
+ .plt 0 (NOLOAD) : { BYTE(0) }
+ .init.plt 0 (NOLOAD) : { BYTE(0) }
+ .text.ftrace_trampoline 0 (NOLOAD) : { BYTE(0) }
}
--
2.16.6
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 8a8109f303e25a27f92c1d8edd67d7cbbc60a4eb Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun(a)bytedance.com>
Date: Wed, 10 Feb 2021 11:48:23 +0800
Subject: [PATCH] printk: fix deadlock when kernel panic
printk_safe_flush_on_panic() caused the following deadlock on our
server:
CPU0: CPU1:
panic rcu_dump_cpu_stacks
kdump_nmi_shootdown_cpus nmi_trigger_cpumask_backtrace
register_nmi_handler(crash_nmi_callback) printk_safe_flush
__printk_safe_flush
raw_spin_lock_irqsave(&read_lock)
// send NMI to other processors
apic_send_IPI_allbutself(NMI_VECTOR)
// NMI interrupt, dead loop
crash_nmi_callback
printk_safe_flush_on_panic
printk_safe_flush
__printk_safe_flush
// deadlock
raw_spin_lock_irqsave(&read_lock)
DEADLOCK: read_lock is taken on CPU1 and will never get released.
It happens when panic() stops a CPU by NMI while it has been in
the middle of printk_safe_flush().
Handle the lock the same way as logbuf_lock. The printk_safe buffers
are flushed only when both locks can be safely taken. It can avoid
the deadlock _in this particular case_ at expense of losing contents
of printk_safe buffers.
Note: It would actually be safe to re-init the locks when all CPUs were
stopped by NMI. But it would require passing this information
from arch-specific code. It is not worth the complexity.
Especially because logbuf_lock and printk_safe buffers have been
obsoleted by the lockless ring buffer.
Fixes: cf9b1106c81c ("printk/nmi: flush NMI messages on the system panic")
Signed-off-by: Muchun Song <songmuchun(a)bytedance.com>
Reviewed-by: Petr Mladek <pmladek(a)suse.com>
Cc: <stable(a)vger.kernel.org>
Acked-by: Sergey Senozhatsky <sergey.senozhatsky(a)gmail.com>
Signed-off-by: Petr Mladek <pmladek(a)suse.com>
Link: https://lore.kernel.org/r/20210210034823.64867-1-songmuchun@bytedance.com
diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
index a0e6f746de6c..2e9e3ed7d63e 100644
--- a/kernel/printk/printk_safe.c
+++ b/kernel/printk/printk_safe.c
@@ -45,6 +45,8 @@ struct printk_safe_seq_buf {
static DEFINE_PER_CPU(struct printk_safe_seq_buf, safe_print_seq);
static DEFINE_PER_CPU(int, printk_context);
+static DEFINE_RAW_SPINLOCK(safe_read_lock);
+
#ifdef CONFIG_PRINTK_NMI
static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq);
#endif
@@ -180,8 +182,6 @@ static void report_message_lost(struct printk_safe_seq_buf *s)
*/
static void __printk_safe_flush(struct irq_work *work)
{
- static raw_spinlock_t read_lock =
- __RAW_SPIN_LOCK_INITIALIZER(read_lock);
struct printk_safe_seq_buf *s =
container_of(work, struct printk_safe_seq_buf, work);
unsigned long flags;
@@ -195,7 +195,7 @@ static void __printk_safe_flush(struct irq_work *work)
* different CPUs. This is especially important when printing
* a backtrace.
*/
- raw_spin_lock_irqsave(&read_lock, flags);
+ raw_spin_lock_irqsave(&safe_read_lock, flags);
i = 0;
more:
@@ -232,7 +232,7 @@ static void __printk_safe_flush(struct irq_work *work)
out:
report_message_lost(s);
- raw_spin_unlock_irqrestore(&read_lock, flags);
+ raw_spin_unlock_irqrestore(&safe_read_lock, flags);
}
/**
@@ -278,6 +278,14 @@ void printk_safe_flush_on_panic(void)
raw_spin_lock_init(&logbuf_lock);
}
+ if (raw_spin_is_locked(&safe_read_lock)) {
+ if (num_online_cpus() > 1)
+ return;
+
+ debug_locks_off();
+ raw_spin_lock_init(&safe_read_lock);
+ }
+
printk_safe_flush();
}
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
This patch can be apply to 5.10 and 5.11 directly.
binfmt_misc: pass binfmt_misc flags to the interpreter
It can be useful to the interpreter to know which flags are in use.
For instance, knowing if the preserve-argv[0] is in use would allow to
skip the pathname argument.
This patch uses an unused auxiliary vector, AT_FLAGS, to add a flag to
inform interpreter if the preserve-argv[0] is enabled.
Note by Helge Deller:
The real-world user of this patch is qemu-user, which needs to know if
it has to preserve the argv[0]. See Debian bug #970460.
Signed-off-by: Laurent Vivier <laurent(a)vivier.eu>
Reviewed-by: YunQiang Su <ysu(a)wavecomp.com>
URL: http://bugs.debian.org/970460
Signed-off-by: Helge Deller <deller(a)gmx.de>
Hello,
Please consider queueing
commit 3c0be5849259b729580c23549330973a2dd513a2
Author: Paul Burton <paulburton(a)kernel.org>
MIPS: Drop 32-bit asm string functions
The commit is part of 5.5-rc1.
We got a bug report about following nftables rule not matching
even if it should on Linux 5.4.y mips32:
meta iifname "br-vlan"
'iifname' uses strncpy(..., IFNAMSIZ) to copy dev->name to the register.
The MIPS asm function doesn't zero-pad the remaining bytes, but that is
needed for the compare op to work reliably.
The reporter confirmed that this removal fixes the issue, the generic C
version behaves as expected.
The patch doesn't apply cleanly to 5.4, there is a minor conflict
related to FORTIFY macro, but its easily resolved as all code
is removed.
I did not try earlier 4.4.y releases but I susepct they should also get
this patch applied.
I can send a 5.4.y backport if thats preferred.
Thanks.
[Backport of commit 182f709c5cff683e6732d04c78e328de0532284f to
4.19-stable; context diff in second hunk]
CCW_CMD_READ_STATUS was introduced with revision 2 of virtio-ccw,
and drivers should only rely on it being implemented when they
negotiated at least that revision with the device.
However, virtio_ccw_get_status() issued READ_STATUS for any
device operating at least at revision 1. If the device accepts
READ_STATUS regardless of the negotiated revision (which some
implementations like QEMU do, even though the spec currently does
not allow it), everything works as intended. While a device
rejecting the command should also be handled gracefully, we will
not be able to see any changes the device makes to the status,
such as setting NEEDS_RESET or setting the status to zero after
a completed reset.
We negotiated the revision to at most 1, as we never bumped the
maximum revision; let's do that now and properly send READ_STATUS
only if we are operating at least at revision 2.
Cc: stable(a)vger.kernel.org
Fixes: 7d3ce5ab9430 ("virtio/s390: support READ_STATUS command for virtio-ccw")
Reviewed-by: Halil Pasic <pasic(a)linux.ibm.com>
Signed-off-by: Cornelia Huck <cohuck(a)redhat.com>
Signed-off-by: Vasily Gorbik <gor(a)linux.ibm.com>
Link: https://lore.kernel.org/r/20210216110645.1087321-1-cohuck@redhat.com
Signed-off-by: Vasily Gorbik <gor(a)linux.ibm.com>
Signed-off-by: Cornelia Huck <cohuck(a)redhat.com>
---
drivers/s390/virtio/virtio_ccw.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 67efdf25657f..0447ae2781ba 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -103,7 +103,7 @@ struct virtio_rev_info {
};
/* the highest virtio-ccw revision we support */
-#define VIRTIO_CCW_REV_MAX 1
+#define VIRTIO_CCW_REV_MAX 2
struct virtio_ccw_vq_info {
struct virtqueue *vq;
@@ -911,7 +911,7 @@ static u8 virtio_ccw_get_status(struct virtio_device *vdev)
u8 old_status = *vcdev->status;
struct ccw1 *ccw;
- if (vcdev->revision < 1)
+ if (vcdev->revision < 2)
return *vcdev->status;
ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
--
2.26.2
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From dbfee5aee7e54f83d96ceb8e3e80717fac62ad63 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz(a)oracle.com>
Date: Wed, 24 Feb 2021 12:07:50 -0800
Subject: [PATCH] hugetlb: fix update_and_free_page contig page struct
assumption
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
page structs are not guaranteed to be contiguous for gigantic pages. The
routine update_and_free_page can encounter a gigantic page, yet it assumes
page structs are contiguous when setting page flags in subpages.
If update_and_free_page encounters non-contiguous page structs, we can see
“BUG: Bad page state in process …” errors.
Non-contiguous page structs are generally not an issue. However, they can
exist with a specific kernel configuration and hotplug operations. For
example: Configure the kernel with CONFIG_SPARSEMEM and
!CONFIG_SPARSEMEM_VMEMMAP. Then, hotplug add memory for the area where
the gigantic page will be allocated. Zi Yan outlined steps to reproduce
here [1].
[1] https://lore.kernel.org/linux-mm/16F7C58B-4D79-41C5-9B64-A1A1628F4AF2@nvidi…
Link: https://lkml.kernel.org/r/20210217184926.33567-1-mike.kravetz@oracle.com
Fixes: 944d9fec8d7a ("hugetlb: add support for gigantic page allocation at runtime")
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Zi Yan <ziy(a)nvidia.com>
Cc: Davidlohr Bueso <dbueso(a)suse.de>
Cc: "Kirill A . Shutemov" <kirill.shutemov(a)linux.intel.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Joao Martins <joao.m.martins(a)oracle.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6786b313e6ac..3f6a50373d4f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1321,14 +1321,16 @@ static inline void destroy_compound_gigantic_page(struct page *page,
static void update_and_free_page(struct hstate *h, struct page *page)
{
int i;
+ struct page *subpage = page;
if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
return;
h->nr_huge_pages--;
h->nr_huge_pages_node[page_to_nid(page)]--;
- for (i = 0; i < pages_per_huge_page(h); i++) {
- page[i].flags &= ~(1 << PG_locked | 1 << PG_error |
+ for (i = 0; i < pages_per_huge_page(h);
+ i++, subpage = mem_map_next(subpage, page, i)) {
+ subpage->flags &= ~(1 << PG_locked | 1 << PG_error |
1 << PG_referenced | 1 << PG_dirty |
1 << PG_active | 1 << PG_private |
1 << PG_writeback);
Hi,
the attached patches are the backport of these 2 patches for tools/Makefile
that allows building when OpenSSL is not at the default location. They apply
cleanly to both to 4.4, 4.9, and 4.14. Backports to 4.19 and 5.4 were already
sent and are already queued up.
Greetings,
Eike
--
Rolf Eike Beer, emlix GmbH, http://www.emlix.com
Fon +49 551 30664-0, Fax +49 551 30664-11
Gothaer Platz 3, 37083 Göttingen, Germany
Sitz der Gesellschaft: Göttingen, Amtsgericht Göttingen HR B 3160
Geschäftsführung: Heike Jordan, Dr. Uwe Kracke – Ust-IdNr.: DE 205 198 055
emlix - smart embedded open source
The fixes made in commit: 4ae5798004d8 ("iwlwifi: pcie: add a NULL check in
iwl_pcie_txq_unmap") is not enough in 4.4.y tree.. This still have problems
with null references. This provides the correct fix.
Also, this is a problem only in 4.4.y. This patch has been applied to other
LTS trees, but with the correct fixes.
Fixes: 4ae5798004d8 ("iwlwifi: pcie: add a NULL check in iwl_pcie_txq_unmap")
Cc: stable(a)vger.kernel.org
Cc: Emmanuel Grumbach <emmanuel.grumbach(a)intel.com>
Cc: Luca Coelho <luciano.coelho(a)intel.com>
Cc: Kalle Valo <kvalo(a)codeaurora.org>
Cc: Sasha Levin <sashal(a)kernel.org>
Signed-off-by: Nobuhiro Iwamatsu (CIP) <nobuhiro1.iwamatsu(a)toshiba.co.jp>
---
drivers/net/wireless/iwlwifi/pcie/tx.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/net/wireless/iwlwifi/pcie/tx.c b/drivers/net/wireless/iwlwifi/pcie/tx.c
index cb03c2855019..7584796131fa 100644
--- a/drivers/net/wireless/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/iwlwifi/pcie/tx.c
@@ -583,13 +583,15 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id)
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
struct iwl_txq *txq = &trans_pcie->txq[txq_id];
- struct iwl_queue *q = &txq->q;
+ struct iwl_queue *q;
if (!txq) {
IWL_ERR(trans, "Trying to free a queue that wasn't allocated?\n");
return;
}
+ q = &txq->q;
+
spin_lock_bh(&txq->lock);
while (q->write_ptr != q->read_ptr) {
IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n",
--
2.30.0.rc2
From: Peter Zijlstra <peterz(a)infradead.org>
commit 499f5aca2cdd5e958b27e2655e7e7f82524f46b1 upstream.
futex_top_waiter() returns the top-waiter on the pi_mutex. Assinging
this to a variable 'match' totally obscures the code.
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Cc: juri.lelli(a)arm.com
Cc: bigeasy(a)linutronix.de
Cc: xlpang(a)redhat.com
Cc: rostedt(a)goodmis.org
Cc: mathieu.desnoyers(a)efficios.com
Cc: jdesfossez(a)efficios.com
Cc: dvhart(a)infradead.org
Cc: bristot(a)redhat.com
Link: http://lkml.kernel.org/r/20170322104151.554710645@infradead.org
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
[bwh: Backported to 4.9: adjust context]
Signed-off-by: Ben Hutchings <ben(a)decadent.org.uk>
---
kernel/futex.c | 28 ++++++++++++++--------------
1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/kernel/futex.c b/kernel/futex.c
index 0b49a8e1e1be..1cb5064548d6 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1352,14 +1352,14 @@ static int lookup_pi_state(u32 __user *uaddr, u32 uval,
union futex_key *key, struct futex_pi_state **ps,
struct task_struct **exiting)
{
- struct futex_q *match = futex_top_waiter(hb, key);
+ struct futex_q *top_waiter = futex_top_waiter(hb, key);
/*
* If there is a waiter on that futex, validate it and
* attach to the pi_state when the validation succeeds.
*/
- if (match)
- return attach_to_pi_state(uaddr, uval, match->pi_state, ps);
+ if (top_waiter)
+ return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps);
/*
* We are the first waiter - try to look up the owner based on
@@ -1414,7 +1414,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
int set_waiters)
{
u32 uval, newval, vpid = task_pid_vnr(task);
- struct futex_q *match;
+ struct futex_q *top_waiter;
int ret;
/*
@@ -1440,9 +1440,9 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
* Lookup existing state first. If it exists, try to attach to
* its pi_state.
*/
- match = futex_top_waiter(hb, key);
- if (match)
- return attach_to_pi_state(uaddr, uval, match->pi_state, ps);
+ top_waiter = futex_top_waiter(hb, key);
+ if (top_waiter)
+ return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps);
/*
* No waiter and user TID is 0. We are here because the
@@ -1532,11 +1532,11 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
q->lock_ptr = NULL;
}
-static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
+static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *top_waiter,
struct futex_hash_bucket *hb)
{
struct task_struct *new_owner;
- struct futex_pi_state *pi_state = this->pi_state;
+ struct futex_pi_state *pi_state = top_waiter->pi_state;
u32 uninitialized_var(curval), newval;
WAKE_Q(wake_q);
bool deboost;
@@ -1557,7 +1557,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
/*
* When we interleave with futex_lock_pi() where it does
- * rt_mutex_timed_futex_lock(), we might observe @this futex_q waiter,
+ * rt_mutex_timed_futex_lock(), we might observe @top_waiter futex_q waiter,
* but the rt_mutex's wait_list can be empty (either still, or again,
* depending on which side we land).
*
@@ -2975,7 +2975,7 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
u32 uninitialized_var(curval), uval, vpid = task_pid_vnr(current);
union futex_key key = FUTEX_KEY_INIT;
struct futex_hash_bucket *hb;
- struct futex_q *match;
+ struct futex_q *top_waiter;
int ret;
retry:
@@ -2999,9 +2999,9 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
* all and we at least want to know if user space fiddled
* with the futex value instead of blindly unlocking.
*/
- match = futex_top_waiter(hb, &key);
- if (match) {
- ret = wake_futex_pi(uaddr, uval, match, hb);
+ top_waiter = futex_top_waiter(hb, &key);
+ if (top_waiter) {
+ ret = wake_futex_pi(uaddr, uval, top_waiter, hb);
/*
* In case of success wake_futex_pi dropped the hash
* bucket lock.
From: Thomas Gleixner <tglx(a)linutronix.de>
commit 12bb3f7f1b03d5913b3f9d4236a488aa7774dfe9 upstream
In case that futex_lock_pi() was aborted by a signal or a timeout and the
task returned without acquiring the rtmutex, but is the designated owner of
the futex due to a concurrent futex_unlock_pi() fixup_owner() is invoked to
establish consistent state. In that case it invokes fixup_pi_state_owner()
which in turn tries to acquire the rtmutex again. If that succeeds then it
does not propagate this success to fixup_owner() and futex_lock_pi()
returns -EINTR or -ETIMEOUT despite having the futex locked.
Return success from fixup_pi_state_owner() in all cases where the current
task owns the rtmutex and therefore the futex and propagate it correctly
through fixup_owner(). Fixup the other callsite which does not expect a
positive return value.
Fixes: c1e2f0eaf015 ("futex: Avoid violating the 10th rule of futex")
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Cc: stable(a)vger.kernel.org
[Sharan: Backported patch for kernel 4.4.y. Also folded in is a part
of the cleanup patch d7c5ed73b19c("futex: Remove needless goto's")]
Signed-off-by: Sharan Turlapati <sturlapati(a)vmware.com>
---
kernel/futex.c | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/kernel/futex.c b/kernel/futex.c
index 199e63c5b612..49cd9f6316e5 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2287,7 +2287,7 @@ retry:
}
if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
- /* We got the lock after all, nothing to fix. */
+ /* We got the lock. pi_state is correct. Tell caller */
return 1;
}
@@ -2329,7 +2329,7 @@ retry:
*/
pi_state_update_owner(pi_state, newowner);
- return 0;
+ return argowner == current;
/*
* To handle the page fault we need to drop the hash bucket
@@ -2412,8 +2412,6 @@ static long futex_wait_restart(struct restart_block *restart);
*/
static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
{
- int ret = 0;
-
if (locked) {
/*
* Got the lock. We might not be the anticipated owner if we
@@ -2424,8 +2422,8 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
* stable state, anything else needs more attention.
*/
if (q->pi_state->owner != current)
- ret = fixup_pi_state_owner(uaddr, q, current);
- goto out;
+ return fixup_pi_state_owner(uaddr, q, current);
+ return 1;
}
/*
@@ -2436,10 +2434,8 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
* Another speculative read; pi_state->owner == current is unstable
* but needs our attention.
*/
- if (q->pi_state->owner == current) {
- ret = fixup_pi_state_owner(uaddr, q, NULL);
- goto out;
- }
+ if (q->pi_state->owner == current)
+ return fixup_pi_state_owner(uaddr, q, NULL);
/*
* Paranoia check. If we did not take the lock, then we should not be
@@ -2448,8 +2444,7 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current))
return fixup_pi_state_owner(uaddr, q, current);
-out:
- return ret ? ret : locked;
+ return 0;
}
/**
@@ -3071,6 +3066,11 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
*/
free_pi_state(q.pi_state);
spin_unlock(q.lock_ptr);
+ /*
+ * Adjust the return value. It's either -EFAULT or
+ * success (1) but the caller expects 0 for success.
+ */
+ ret = ret < 0 ? ret : 0;
}
} else {
struct rt_mutex *pi_mutex;
--
2.28.0
Commit 5d069dbe8aaf ("fuse: fix bad inode") replaced make_bad_inode()
in fuse_iget() with a private implementation fuse_make_bad().
The private implementation fails to remove the bad inode from inode
cache, so the retry loop with iget5_locked() finds the same bad inode
and marks it bad forever.
kmsg snip:
[ ] rcu: INFO: rcu_sched self-detected stall on CPU
...
[ ] ? bit_wait_io+0x50/0x50
[ ] ? fuse_init_file_inode+0x70/0x70
[ ] ? find_inode.isra.32+0x60/0xb0
[ ] ? fuse_init_file_inode+0x70/0x70
[ ] ilookup5_nowait+0x65/0x90
[ ] ? fuse_init_file_inode+0x70/0x70
[ ] ilookup5.part.36+0x2e/0x80
[ ] ? fuse_init_file_inode+0x70/0x70
[ ] ? fuse_inode_eq+0x20/0x20
[ ] iget5_locked+0x21/0x80
[ ] ? fuse_inode_eq+0x20/0x20
[ ] fuse_iget+0x96/0x1b0
Fixes: 5d069dbe8aaf ("fuse: fix bad inode")
Cc: stable(a)vger.kernel.org # 5.10+
Signed-off-by: Amir Goldstein <amir73il(a)gmail.com>
---
Miklos,
This live lock showed up on a stress test in my system after upgarde to
kernel 5.10. It's 100% reproducible when trying to rsync from a davfs2
filesystem with severel 1000 files.
Thanks,
Amir.
fs/fuse/fuse_i.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 68cca8d4db6e..63d97a15ffde 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -863,6 +863,7 @@ static inline u64 fuse_get_attr_version(struct fuse_conn *fc)
static inline void fuse_make_bad(struct inode *inode)
{
+ remove_inode_hash(inode);
set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state);
}
--
2.30.0
Commit 44d30d622821d ("phy: cadence: Add driver for Sierra PHY")
de-asserts PHY_RESET even before the configurations are loaded in
phy_init(). However PHY_RESET should be de-asserted only after
all the configurations has been initialized, instead of de-asserting
in probe. Fix it here.
Fixes: 44d30d622821d ("phy: cadence: Add driver for Sierra PHY")
Signed-off-by: Kishon Vijay Abraham I <kishon(a)ti.com>
Cc: <stable(a)vger.kernel.org> # v5.4+
---
drivers/phy/cadence/phy-cadence-sierra.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/phy/cadence/phy-cadence-sierra.c b/drivers/phy/cadence/phy-cadence-sierra.c
index 26a0badabe38..19f32ae877b9 100644
--- a/drivers/phy/cadence/phy-cadence-sierra.c
+++ b/drivers/phy/cadence/phy-cadence-sierra.c
@@ -319,6 +319,12 @@ static int cdns_sierra_phy_on(struct phy *gphy)
u32 val;
int ret;
+ ret = reset_control_deassert(sp->phy_rst);
+ if (ret) {
+ dev_err(dev, "Failed to take the PHY out of reset\n");
+ return ret;
+ }
+
/* Take the PHY lane group out of reset */
ret = reset_control_deassert(ins->lnk_rst);
if (ret) {
@@ -616,7 +622,6 @@ static int cdns_sierra_phy_probe(struct platform_device *pdev)
pm_runtime_enable(dev);
phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
- reset_control_deassert(sp->phy_rst);
return PTR_ERR_OR_ZERO(phy_provider);
put_child:
--
2.17.1
From: Nadav Amit <namit(a)vmware.com>
Userfaultfd self-test fails occasionally, indicating a memory
corruption.
Analyzing this problem indicates that there is a real bug since
mmap_lock is only taken for read in mwriteprotect_range() and defers
flushes, and since there is insufficient consideration of concurrent
deferred TLB flushes in wp_page_copy(). Although the PTE is flushed from
the TLBs in wp_page_copy(), this flush takes place after the copy has
already been performed, and therefore changes of the page are possible
between the time of the copy and the time in which the PTE is flushed.
To make matters worse, memory-unprotection using userfaultfd also poses
a problem. Although memory unprotection is logically a promotion of PTE
permissions, and therefore should not require a TLB flush, the current
userrfaultfd code might actually cause a demotion of the architectural
PTE permission: when userfaultfd_writeprotect() unprotects memory
region, it unintentionally *clears* the RW-bit if it was already set.
Note that this unprotecting a PTE that is not write-protected is a valid
use-case: the userfaultfd monitor might ask to unprotect a region that
holds both write-protected and write-unprotected PTEs.
The scenario that happens in selftests/vm/userfaultfd is as follows:
cpu0 cpu1 cpu2
---- ---- ----
[ Writable PTE
cached in TLB ]
userfaultfd_writeprotect()
[ write-*unprotect* ]
mwriteprotect_range()
mmap_read_lock()
change_protection()
change_protection_range()
...
change_pte_range()
[ *clear* “write”-bit ]
[ defer TLB flushes ]
[ page-fault ]
...
wp_page_copy()
cow_user_page()
[ copy page ]
[ write to old
page ]
...
set_pte_at_notify()
A similar scenario can happen:
cpu0 cpu1 cpu2 cpu3
---- ---- ---- ----
[ Writable PTE
cached in TLB ]
userfaultfd_writeprotect()
[ write-protect ]
[ deferred TLB flush ]
userfaultfd_writeprotect()
[ write-unprotect ]
[ deferred TLB flush]
[ page-fault ]
wp_page_copy()
cow_user_page()
[ copy page ]
... [ write to page ]
set_pte_at_notify()
This race exists since commit 292924b26024 ("userfaultfd: wp: apply
_PAGE_UFFD_WP bit"). Yet, as Yu Zhao pointed, these races became
apparent since commit 09854ba94c6a ("mm: do_wp_page() simplification")
which made wp_page_copy() more likely to take place, specifically if
page_count(page) > 1.
To resolve the aforementioned races, check whether there are pending
flushes on uffd-write-protected VMAs, and if there are, perform a flush
before doing the COW.
Further optimizations will follow to avoid during uffd-write-unprotect
unnecassary PTE write-protection and TLB flushes.
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: Pavel Emelyanov <xemul(a)openvz.org>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Mike Rapoport <rppt(a)linux.vnet.ibm.com>
Cc: Minchan Kim <minchan(a)kernel.org>
Cc: Will Deacon <will(a)kernel.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: stable(a)vger.kernel.org # 5.9+
Suggested-by: Yu Zhao <yuzhao(a)google.com>
Reviewed-by: Peter Xu <peterx(a)redhat.com>
Tested-by: Peter Xu <peterx(a)redhat.com>
Fixes: 09854ba94c6a ("mm: do_wp_page() simplification")
Signed-off-by: Nadav Amit <namit(a)vmware.com>
---
v3->v4:
* Fix the "Fixes" tag for real [Peter Xu]
* Reviewed-by, suggested-by tags [Peter Xu]
* Adding unlikely() [Peter Xu]
v2->v3:
* Do not acquire mmap_lock for write, flush conditionally instead [Yu]
* Change the fixes tag to the patch that made the race apparent [Yu]
* Removing patch to avoid write-protect on uffd unprotect. More
comprehensive solution to follow (and avoid the TLB flush as well).
---
mm/memory.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/mm/memory.c b/mm/memory.c
index 9e8576a83147..79253cb3bcd5 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3092,6 +3092,14 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
return handle_userfault(vmf, VM_UFFD_WP);
}
+ /*
+ * Userfaultfd write-protect can defer flushes. Ensure the TLB
+ * is flushed in this case before copying.
+ */
+ if (unlikely(userfaultfd_wp(vmf->vma) &&
+ mm_tlb_flush_pending(vmf->vma->vm_mm)))
+ flush_tlb_page(vmf->vma, vmf->address);
+
vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte);
if (!vmf->page) {
/*
--
2.25.1