The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From f37c563bab4297024c300b05c8f48430e323809d Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba(a)suse.com>
Date: Fri, 10 Jul 2020 09:49:56 +0200
Subject: [PATCH] btrfs: add missing check for nocow and compression inode
flags
User Forza reported on IRC that some invalid combinations of file
attributes are accepted by chattr.
The NODATACOW and compression file flags/attributes are mutually
exclusive, but they could be set by 'chattr +c +C' on an empty file. The
nodatacow will be in effect because it's checked first in
btrfs_run_delalloc_range.
Extend the flag validation to catch the following cases:
- input flags are conflicting
- old and new flags are conflicting
- initialize the local variable with inode flags after inode ls locked
Inode attributes take precedence over mount options and are an
independent setting.
Nocompress would be a no-op with nodatacow, but we don't want to mix
any compression-related options with nodatacow.
CC: stable(a)vger.kernel.org # 4.4+
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b4ddf51ae377..bd3511c5ca81 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -164,8 +164,11 @@ static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
return 0;
}
-/* Check if @flags are a supported and valid set of FS_*_FL flags */
-static int check_fsflags(unsigned int flags)
+/*
+ * Check if @flags are a supported and valid set of FS_*_FL flags and that
+ * the old and new flags are not conflicting
+ */
+static int check_fsflags(unsigned int old_flags, unsigned int flags)
{
if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
FS_NOATIME_FL | FS_NODUMP_FL | \
@@ -174,9 +177,19 @@ static int check_fsflags(unsigned int flags)
FS_NOCOW_FL))
return -EOPNOTSUPP;
+ /* COMPR and NOCOMP on new/old are valid */
if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
return -EINVAL;
+ if ((flags & FS_COMPR_FL) && (flags & FS_NOCOW_FL))
+ return -EINVAL;
+
+ /* NOCOW and compression options are mutually exclusive */
+ if ((old_flags & FS_NOCOW_FL) && (flags & (FS_COMPR_FL | FS_NOCOMP_FL)))
+ return -EINVAL;
+ if ((flags & FS_NOCOW_FL) && (old_flags & (FS_COMPR_FL | FS_NOCOMP_FL)))
+ return -EINVAL;
+
return 0;
}
@@ -190,7 +203,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
unsigned int fsflags, old_fsflags;
int ret;
const char *comp = NULL;
- u32 binode_flags = binode->flags;
+ u32 binode_flags;
if (!inode_owner_or_capable(inode))
return -EPERM;
@@ -201,22 +214,23 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
if (copy_from_user(&fsflags, arg, sizeof(fsflags)))
return -EFAULT;
- ret = check_fsflags(fsflags);
- if (ret)
- return ret;
-
ret = mnt_want_write_file(file);
if (ret)
return ret;
inode_lock(inode);
-
fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
+
ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags);
if (ret)
goto out_unlock;
+ ret = check_fsflags(old_fsflags, fsflags);
+ if (ret)
+ goto out_unlock;
+
+ binode_flags = binode->flags;
if (fsflags & FS_SYNC_FL)
binode_flags |= BTRFS_INODE_SYNC;
else
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From f37c563bab4297024c300b05c8f48430e323809d Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba(a)suse.com>
Date: Fri, 10 Jul 2020 09:49:56 +0200
Subject: [PATCH] btrfs: add missing check for nocow and compression inode
flags
User Forza reported on IRC that some invalid combinations of file
attributes are accepted by chattr.
The NODATACOW and compression file flags/attributes are mutually
exclusive, but they could be set by 'chattr +c +C' on an empty file. The
nodatacow will be in effect because it's checked first in
btrfs_run_delalloc_range.
Extend the flag validation to catch the following cases:
- input flags are conflicting
- old and new flags are conflicting
- initialize the local variable with inode flags after inode ls locked
Inode attributes take precedence over mount options and are an
independent setting.
Nocompress would be a no-op with nodatacow, but we don't want to mix
any compression-related options with nodatacow.
CC: stable(a)vger.kernel.org # 4.4+
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b4ddf51ae377..bd3511c5ca81 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -164,8 +164,11 @@ static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
return 0;
}
-/* Check if @flags are a supported and valid set of FS_*_FL flags */
-static int check_fsflags(unsigned int flags)
+/*
+ * Check if @flags are a supported and valid set of FS_*_FL flags and that
+ * the old and new flags are not conflicting
+ */
+static int check_fsflags(unsigned int old_flags, unsigned int flags)
{
if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
FS_NOATIME_FL | FS_NODUMP_FL | \
@@ -174,9 +177,19 @@ static int check_fsflags(unsigned int flags)
FS_NOCOW_FL))
return -EOPNOTSUPP;
+ /* COMPR and NOCOMP on new/old are valid */
if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
return -EINVAL;
+ if ((flags & FS_COMPR_FL) && (flags & FS_NOCOW_FL))
+ return -EINVAL;
+
+ /* NOCOW and compression options are mutually exclusive */
+ if ((old_flags & FS_NOCOW_FL) && (flags & (FS_COMPR_FL | FS_NOCOMP_FL)))
+ return -EINVAL;
+ if ((flags & FS_NOCOW_FL) && (old_flags & (FS_COMPR_FL | FS_NOCOMP_FL)))
+ return -EINVAL;
+
return 0;
}
@@ -190,7 +203,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
unsigned int fsflags, old_fsflags;
int ret;
const char *comp = NULL;
- u32 binode_flags = binode->flags;
+ u32 binode_flags;
if (!inode_owner_or_capable(inode))
return -EPERM;
@@ -201,22 +214,23 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
if (copy_from_user(&fsflags, arg, sizeof(fsflags)))
return -EFAULT;
- ret = check_fsflags(fsflags);
- if (ret)
- return ret;
-
ret = mnt_want_write_file(file);
if (ret)
return ret;
inode_lock(inode);
-
fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
+
ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags);
if (ret)
goto out_unlock;
+ ret = check_fsflags(old_fsflags, fsflags);
+ if (ret)
+ goto out_unlock;
+
+ binode_flags = binode->flags;
if (fsflags & FS_SYNC_FL)
binode_flags |= BTRFS_INODE_SYNC;
else
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From f37c563bab4297024c300b05c8f48430e323809d Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba(a)suse.com>
Date: Fri, 10 Jul 2020 09:49:56 +0200
Subject: [PATCH] btrfs: add missing check for nocow and compression inode
flags
User Forza reported on IRC that some invalid combinations of file
attributes are accepted by chattr.
The NODATACOW and compression file flags/attributes are mutually
exclusive, but they could be set by 'chattr +c +C' on an empty file. The
nodatacow will be in effect because it's checked first in
btrfs_run_delalloc_range.
Extend the flag validation to catch the following cases:
- input flags are conflicting
- old and new flags are conflicting
- initialize the local variable with inode flags after inode ls locked
Inode attributes take precedence over mount options and are an
independent setting.
Nocompress would be a no-op with nodatacow, but we don't want to mix
any compression-related options with nodatacow.
CC: stable(a)vger.kernel.org # 4.4+
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b4ddf51ae377..bd3511c5ca81 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -164,8 +164,11 @@ static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
return 0;
}
-/* Check if @flags are a supported and valid set of FS_*_FL flags */
-static int check_fsflags(unsigned int flags)
+/*
+ * Check if @flags are a supported and valid set of FS_*_FL flags and that
+ * the old and new flags are not conflicting
+ */
+static int check_fsflags(unsigned int old_flags, unsigned int flags)
{
if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
FS_NOATIME_FL | FS_NODUMP_FL | \
@@ -174,9 +177,19 @@ static int check_fsflags(unsigned int flags)
FS_NOCOW_FL))
return -EOPNOTSUPP;
+ /* COMPR and NOCOMP on new/old are valid */
if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
return -EINVAL;
+ if ((flags & FS_COMPR_FL) && (flags & FS_NOCOW_FL))
+ return -EINVAL;
+
+ /* NOCOW and compression options are mutually exclusive */
+ if ((old_flags & FS_NOCOW_FL) && (flags & (FS_COMPR_FL | FS_NOCOMP_FL)))
+ return -EINVAL;
+ if ((flags & FS_NOCOW_FL) && (old_flags & (FS_COMPR_FL | FS_NOCOMP_FL)))
+ return -EINVAL;
+
return 0;
}
@@ -190,7 +203,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
unsigned int fsflags, old_fsflags;
int ret;
const char *comp = NULL;
- u32 binode_flags = binode->flags;
+ u32 binode_flags;
if (!inode_owner_or_capable(inode))
return -EPERM;
@@ -201,22 +214,23 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
if (copy_from_user(&fsflags, arg, sizeof(fsflags)))
return -EFAULT;
- ret = check_fsflags(fsflags);
- if (ret)
- return ret;
-
ret = mnt_want_write_file(file);
if (ret)
return ret;
inode_lock(inode);
-
fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
+
ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags);
if (ret)
goto out_unlock;
+ ret = check_fsflags(old_fsflags, fsflags);
+ if (ret)
+ goto out_unlock;
+
+ binode_flags = binode->flags;
if (fsflags & FS_SYNC_FL)
binode_flags |= BTRFS_INODE_SYNC;
else
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From f37c563bab4297024c300b05c8f48430e323809d Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba(a)suse.com>
Date: Fri, 10 Jul 2020 09:49:56 +0200
Subject: [PATCH] btrfs: add missing check for nocow and compression inode
flags
User Forza reported on IRC that some invalid combinations of file
attributes are accepted by chattr.
The NODATACOW and compression file flags/attributes are mutually
exclusive, but they could be set by 'chattr +c +C' on an empty file. The
nodatacow will be in effect because it's checked first in
btrfs_run_delalloc_range.
Extend the flag validation to catch the following cases:
- input flags are conflicting
- old and new flags are conflicting
- initialize the local variable with inode flags after inode ls locked
Inode attributes take precedence over mount options and are an
independent setting.
Nocompress would be a no-op with nodatacow, but we don't want to mix
any compression-related options with nodatacow.
CC: stable(a)vger.kernel.org # 4.4+
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b4ddf51ae377..bd3511c5ca81 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -164,8 +164,11 @@ static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
return 0;
}
-/* Check if @flags are a supported and valid set of FS_*_FL flags */
-static int check_fsflags(unsigned int flags)
+/*
+ * Check if @flags are a supported and valid set of FS_*_FL flags and that
+ * the old and new flags are not conflicting
+ */
+static int check_fsflags(unsigned int old_flags, unsigned int flags)
{
if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
FS_NOATIME_FL | FS_NODUMP_FL | \
@@ -174,9 +177,19 @@ static int check_fsflags(unsigned int flags)
FS_NOCOW_FL))
return -EOPNOTSUPP;
+ /* COMPR and NOCOMP on new/old are valid */
if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
return -EINVAL;
+ if ((flags & FS_COMPR_FL) && (flags & FS_NOCOW_FL))
+ return -EINVAL;
+
+ /* NOCOW and compression options are mutually exclusive */
+ if ((old_flags & FS_NOCOW_FL) && (flags & (FS_COMPR_FL | FS_NOCOMP_FL)))
+ return -EINVAL;
+ if ((flags & FS_NOCOW_FL) && (old_flags & (FS_COMPR_FL | FS_NOCOMP_FL)))
+ return -EINVAL;
+
return 0;
}
@@ -190,7 +203,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
unsigned int fsflags, old_fsflags;
int ret;
const char *comp = NULL;
- u32 binode_flags = binode->flags;
+ u32 binode_flags;
if (!inode_owner_or_capable(inode))
return -EPERM;
@@ -201,22 +214,23 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
if (copy_from_user(&fsflags, arg, sizeof(fsflags)))
return -EFAULT;
- ret = check_fsflags(fsflags);
- if (ret)
- return ret;
-
ret = mnt_want_write_file(file);
if (ret)
return ret;
inode_lock(inode);
-
fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
+
ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags);
if (ret)
goto out_unlock;
+ ret = check_fsflags(old_fsflags, fsflags);
+ if (ret)
+ goto out_unlock;
+
+ binode_flags = binode->flags;
if (fsflags & FS_SYNC_FL)
binode_flags |= BTRFS_INODE_SYNC;
else
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 01d01caf19ff7c537527d352d169c4368375c0a1 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef(a)toxicpanda.com>
Date: Fri, 17 Jul 2020 15:12:28 -0400
Subject: [PATCH] btrfs: move the chunk_mutex in btrfs_read_chunk_tree
We are currently getting this lockdep splat in btrfs/161:
======================================================
WARNING: possible circular locking dependency detected
5.8.0-rc5+ #20 Tainted: G E
------------------------------------------------------
mount/678048 is trying to acquire lock:
ffff9b769f15b6e0 (&fs_devs->device_list_mutex){+.+.}-{3:3}, at: clone_fs_devices+0x4d/0x170 [btrfs]
but task is already holding lock:
ffff9b76abdb08d0 (&fs_info->chunk_mutex){+.+.}-{3:3}, at: btrfs_read_chunk_tree+0x6a/0x800 [btrfs]
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (&fs_info->chunk_mutex){+.+.}-{3:3}:
__mutex_lock+0x8b/0x8f0
btrfs_init_new_device+0x2d2/0x1240 [btrfs]
btrfs_ioctl+0x1de/0x2d20 [btrfs]
ksys_ioctl+0x87/0xc0
__x64_sys_ioctl+0x16/0x20
do_syscall_64+0x52/0xb0
entry_SYSCALL_64_after_hwframe+0x44/0xa9
-> #0 (&fs_devs->device_list_mutex){+.+.}-{3:3}:
__lock_acquire+0x1240/0x2460
lock_acquire+0xab/0x360
__mutex_lock+0x8b/0x8f0
clone_fs_devices+0x4d/0x170 [btrfs]
btrfs_read_chunk_tree+0x330/0x800 [btrfs]
open_ctree+0xb7c/0x18ce [btrfs]
btrfs_mount_root.cold+0x13/0xfa [btrfs]
legacy_get_tree+0x30/0x50
vfs_get_tree+0x28/0xc0
fc_mount+0xe/0x40
vfs_kern_mount.part.0+0x71/0x90
btrfs_mount+0x13b/0x3e0 [btrfs]
legacy_get_tree+0x30/0x50
vfs_get_tree+0x28/0xc0
do_mount+0x7de/0xb30
__x64_sys_mount+0x8e/0xd0
do_syscall_64+0x52/0xb0
entry_SYSCALL_64_after_hwframe+0x44/0xa9
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(&fs_info->chunk_mutex);
lock(&fs_devs->device_list_mutex);
lock(&fs_info->chunk_mutex);
lock(&fs_devs->device_list_mutex);
*** DEADLOCK ***
3 locks held by mount/678048:
#0: ffff9b75ff5fb0e0 (&type->s_umount_key#63/1){+.+.}-{3:3}, at: alloc_super+0xb5/0x380
#1: ffffffffc0c2fbc8 (uuid_mutex){+.+.}-{3:3}, at: btrfs_read_chunk_tree+0x54/0x800 [btrfs]
#2: ffff9b76abdb08d0 (&fs_info->chunk_mutex){+.+.}-{3:3}, at: btrfs_read_chunk_tree+0x6a/0x800 [btrfs]
stack backtrace:
CPU: 2 PID: 678048 Comm: mount Tainted: G E 5.8.0-rc5+ #20
Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./890FX Deluxe5, BIOS P1.40 05/03/2011
Call Trace:
dump_stack+0x96/0xd0
check_noncircular+0x162/0x180
__lock_acquire+0x1240/0x2460
? asm_sysvec_apic_timer_interrupt+0x12/0x20
lock_acquire+0xab/0x360
? clone_fs_devices+0x4d/0x170 [btrfs]
__mutex_lock+0x8b/0x8f0
? clone_fs_devices+0x4d/0x170 [btrfs]
? rcu_read_lock_sched_held+0x52/0x60
? cpumask_next+0x16/0x20
? module_assert_mutex_or_preempt+0x14/0x40
? __module_address+0x28/0xf0
? clone_fs_devices+0x4d/0x170 [btrfs]
? static_obj+0x4f/0x60
? lockdep_init_map_waits+0x43/0x200
? clone_fs_devices+0x4d/0x170 [btrfs]
clone_fs_devices+0x4d/0x170 [btrfs]
btrfs_read_chunk_tree+0x330/0x800 [btrfs]
open_ctree+0xb7c/0x18ce [btrfs]
? super_setup_bdi_name+0x79/0xd0
btrfs_mount_root.cold+0x13/0xfa [btrfs]
? vfs_parse_fs_string+0x84/0xb0
? rcu_read_lock_sched_held+0x52/0x60
? kfree+0x2b5/0x310
legacy_get_tree+0x30/0x50
vfs_get_tree+0x28/0xc0
fc_mount+0xe/0x40
vfs_kern_mount.part.0+0x71/0x90
btrfs_mount+0x13b/0x3e0 [btrfs]
? cred_has_capability+0x7c/0x120
? rcu_read_lock_sched_held+0x52/0x60
? legacy_get_tree+0x30/0x50
legacy_get_tree+0x30/0x50
vfs_get_tree+0x28/0xc0
do_mount+0x7de/0xb30
? memdup_user+0x4e/0x90
__x64_sys_mount+0x8e/0xd0
do_syscall_64+0x52/0xb0
entry_SYSCALL_64_after_hwframe+0x44/0xa9
This is because btrfs_read_chunk_tree() can come upon DEV_EXTENT's and
then read the device, which takes the device_list_mutex. The
device_list_mutex needs to be taken before the chunk_mutex, so this is a
problem. We only really need the chunk mutex around adding the chunk,
so move the mutex around read_one_chunk.
An argument could be made that we don't even need the chunk_mutex here
as it's during mount, and we are protected by various other locks.
However we already have special rules for ->device_list_mutex, and I'd
rather not have another special case for ->chunk_mutex.
CC: stable(a)vger.kernel.org # 4.19+
Reviewed-by: Anand Jain <anand.jain(a)oracle.com>
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 084b8227ea2c..d7670e2a9f39 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -7077,7 +7077,6 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
* otherwise we don't need it.
*/
mutex_lock(&uuid_mutex);
- mutex_lock(&fs_info->chunk_mutex);
/*
* It is possible for mount and umount to race in such a way that
@@ -7135,7 +7134,9 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
} else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
struct btrfs_chunk *chunk;
chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
+ mutex_lock(&fs_info->chunk_mutex);
ret = read_one_chunk(&found_key, leaf, chunk);
+ mutex_unlock(&fs_info->chunk_mutex);
if (ret)
goto error;
}
@@ -7165,7 +7166,6 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
}
ret = 0;
error:
- mutex_unlock(&fs_info->chunk_mutex);
mutex_unlock(&uuid_mutex);
btrfs_free_path(path);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 5aa7d1a7f4a2f8ca6be1f32415e9365d026e8fa7 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Thu, 2 Jul 2020 12:32:20 +0100
Subject: [PATCH] btrfs: only commit delayed items at fsync if we are logging a
directory
When logging an inode we are committing its delayed items if either the
inode is a directory or if it is a new inode, created in the current
transaction.
We need to do it for directories, since new directory indexes are stored
as delayed items of the inode and when logging a directory we need to be
able to access all indexes from the fs/subvolume tree in order to figure
out which index ranges need to be logged.
However for new inodes that are not directories, we do not need to do it
because the only type of delayed item they can have is the inode item, and
we are guaranteed to always log an up to date version of the inode item:
*) for a full fsync we do it by committing the delayed inode and then
copying the item from the fs/subvolume tree with
copy_inode_items_to_log();
*) for a fast fsync we always log the inode item based on the contents of
the in-memory struct btrfs_inode. We guarantee this is always done since
commit e4545de5b035c7 ("Btrfs: fix fsync data loss after append write").
So stop running delayed items for a new inodes that are not directories,
since that forces committing the delayed inode into the fs/subvolume tree,
wasting time and adding contention to the tree when a full fsync is not
required. We will only do it in case a fast fsync is needed.
This patch is part of a series that has the following patches:
1/4 btrfs: only commit the delayed inode when doing a full fsync
2/4 btrfs: only commit delayed items at fsync if we are logging a directory
3/4 btrfs: stop incremening log_batch for the log root tree when syncing log
4/4 btrfs: remove no longer needed use of log_writers for the log root tree
After the entire patchset applied I saw about 12% decrease on max latency
reported by dbench. The test was done on a qemu vm, with 8 cores, 16Gb of
ram, using kvm and using a raw NVMe device directly (no intermediary fs on
the host). The test was invoked like the following:
mkfs.btrfs -f /dev/sdk
mount -o ssd -o nospace_cache /dev/sdk /mnt/sdk
dbench -D /mnt/sdk -t 300 8
umount /mnt/dsk
CC: stable(a)vger.kernel.org # 5.4+
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 44bbf8919883..7c325451d47f 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -5123,7 +5123,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
const loff_t end,
struct btrfs_log_ctx *ctx)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_path *path;
struct btrfs_path *dst_path;
struct btrfs_key min_key;
@@ -5166,15 +5165,17 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
max_key.offset = (u64)-1;
/*
- * Only run delayed items if we are a dir or a new file.
+ * Only run delayed items if we are a directory. We want to make sure
+ * all directory indexes hit the fs/subvolume tree so we can find them
+ * and figure out which index ranges have to be logged.
+ *
* Otherwise commit the delayed inode only if the full sync flag is set,
* as we want to make sure an up to date version is in the subvolume
* tree so copy_inode_items_to_log() / copy_items() can find it and copy
* it to the log tree. For a non full sync, we always log the inode item
* based on the in-memory struct btrfs_inode which is always up to date.
*/
- if (S_ISDIR(inode->vfs_inode.i_mode) ||
- inode->generation > fs_info->last_trans_committed)
+ if (S_ISDIR(inode->vfs_inode.i_mode))
ret = btrfs_commit_inode_delayed_items(trans, inode);
else if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags))
ret = btrfs_commit_inode_delayed_inode(inode);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 8c8648dd1f6d62aeb912deeb788b6ac33cb782e7 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Thu, 2 Jul 2020 12:31:59 +0100
Subject: [PATCH] btrfs: only commit the delayed inode when doing a full fsync
Commit 2c2c452b0cafdc ("Btrfs: fix fsync when extend references are added
to an inode") forced a commit of the delayed inode when logging an inode
in order to ensure we would end up logging the inode item during a full
fsync. By committing the delayed inode, we updated the inode item in the
fs/subvolume tree and then later when copying items from leafs modified in
the current transaction into the log tree (with copy_inode_items_to_log())
we ended up copying the inode item from the fs/subvolume tree into the log
tree. Logging an up to date version of the inode item is required to make
sure at log replay time we get the link count fixup triggered among other
things (replay xattr deletes, etc). The test case generic/040 from fstests
exercises the bug which that commit fixed.
However for a fast fsync we don't need to commit the delayed inode because
we always log an up to date version of the inode item based on the struct
btrfs_inode we have in-memory. We started doing this for fast fsyncs since
commit e4545de5b035c7 ("Btrfs: fix fsync data loss after append write").
So just stop committing the delayed inode if we are doing a fast fsync,
we are only wasting time and adding contention on fs/subvolume tree.
This patch is part of a series that has the following patches:
1/4 btrfs: only commit the delayed inode when doing a full fsync
2/4 btrfs: only commit delayed items at fsync if we are logging a directory
3/4 btrfs: stop incremening log_batch for the log root tree when syncing log
4/4 btrfs: remove no longer needed use of log_writers for the log root tree
After the entire patchset applied I saw about 12% decrease on max latency
reported by dbench. The test was done on a qemu vm, with 8 cores, 16Gb of
ram, using kvm and using a raw NVMe device directly (no intermediary fs on
the host). The test was invoked like the following:
mkfs.btrfs -f /dev/sdk
mount -o ssd -o nospace_cache /dev/sdk /mnt/sdk
dbench -D /mnt/sdk -t 300 8
umount /mnt/dsk
CC: stable(a)vger.kernel.org # 5.4+
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index df6d4e3e40b1..44bbf8919883 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -5130,7 +5130,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
struct btrfs_key max_key;
struct btrfs_root *log = root->log_root;
int err = 0;
- int ret;
+ int ret = 0;
bool fast_search = false;
u64 ino = btrfs_ino(inode);
struct extent_map_tree *em_tree = &inode->extent_tree;
@@ -5167,14 +5167,16 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
/*
* Only run delayed items if we are a dir or a new file.
- * Otherwise commit the delayed inode only, which is needed in
- * order for the log replay code to mark inodes for link count
- * fixup (create temporary BTRFS_TREE_LOG_FIXUP_OBJECTID items).
+ * Otherwise commit the delayed inode only if the full sync flag is set,
+ * as we want to make sure an up to date version is in the subvolume
+ * tree so copy_inode_items_to_log() / copy_items() can find it and copy
+ * it to the log tree. For a non full sync, we always log the inode item
+ * based on the in-memory struct btrfs_inode which is always up to date.
*/
if (S_ISDIR(inode->vfs_inode.i_mode) ||
inode->generation > fs_info->last_trans_committed)
ret = btrfs_commit_inode_delayed_items(trans, inode);
- else
+ else if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags))
ret = btrfs_commit_inode_delayed_inode(inode);
if (ret) {
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 851fd730a743e072badaf67caf39883e32439431 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu(a)suse.com>
Date: Tue, 16 Jun 2020 10:17:34 +0800
Subject: [PATCH] btrfs: don't allocate anonymous block device for user
invisible roots
[BUG]
When a lot of subvolumes are created, there is a user report about
transaction aborted:
BTRFS: Transaction aborted (error -24)
WARNING: CPU: 17 PID: 17041 at fs/btrfs/transaction.c:1576 create_pending_snapshot+0xbc4/0xd10 [btrfs]
RIP: 0010:create_pending_snapshot+0xbc4/0xd10 [btrfs]
Call Trace:
create_pending_snapshots+0x82/0xa0 [btrfs]
btrfs_commit_transaction+0x275/0x8c0 [btrfs]
btrfs_mksubvol+0x4b9/0x500 [btrfs]
btrfs_ioctl_snap_create_transid+0x174/0x180 [btrfs]
btrfs_ioctl_snap_create_v2+0x11c/0x180 [btrfs]
btrfs_ioctl+0x11a4/0x2da0 [btrfs]
do_vfs_ioctl+0xa9/0x640
ksys_ioctl+0x67/0x90
__x64_sys_ioctl+0x1a/0x20
do_syscall_64+0x5a/0x110
entry_SYSCALL_64_after_hwframe+0x44/0xa9
---[ end trace 33f2f83f3d5250e9 ]---
BTRFS: error (device sda1) in create_pending_snapshot:1576: errno=-24 unknown
BTRFS info (device sda1): forced readonly
BTRFS warning (device sda1): Skipping commit of aborted transaction.
BTRFS: error (device sda1) in cleanup_transaction:1831: errno=-24 unknown
[CAUSE]
The error is EMFILE (Too many files open) and comes from the anonymous
block device allocation. The ids are in a shared pool of size 1<<20.
The ids are assigned to live subvolumes, ie. the root structure exists
in memory (eg. after creation or after the root appears in some path).
The pool could be exhausted if the numbers are not reclaimed fast
enough, after subvolume deletion or if other system component uses the
anon block devices.
[WORKAROUND]
Since it's not possible to completely solve the problem, we can only
minimize the time the id is allocated to a subvolume root.
Firstly, we can reduce the use of anon_dev by trees that are not
subvolume roots, like data reloc tree.
This patch will do extra check on root objectid, to skip roots that
don't need anon_dev. Currently it's only data reloc tree and orphan
roots.
Reported-by: Greed Rong <greedrong(a)gmail.com>
Link: https://lore.kernel.org/linux-btrfs/CA+UqX+NTrZ6boGnWHhSeZmEY5J76CTqmYjO2S+…
CC: stable(a)vger.kernel.org # 4.4+
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index dbf90cd49513..c90edf04a9db 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1424,9 +1424,16 @@ static int btrfs_init_fs_root(struct btrfs_root *root)
spin_lock_init(&root->ino_cache_lock);
init_waitqueue_head(&root->ino_cache_wait);
- ret = get_anon_bdev(&root->anon_dev);
- if (ret)
- goto fail;
+ /*
+ * Don't assign anonymous block device to roots that are not exposed to
+ * userspace, the id pool is limited to 1M
+ */
+ if (is_fstree(root->root_key.objectid) &&
+ btrfs_root_refs(&root->root_item) > 0) {
+ ret = get_anon_bdev(&root->anon_dev);
+ if (ret)
+ goto fail;
+ }
mutex_lock(&root->objectid_mutex);
ret = btrfs_find_highest_objectid(root,
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 082b6c970f02fefd278c7833880cda29691a5f34 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu(a)suse.com>
Date: Tue, 16 Jun 2020 10:17:37 +0800
Subject: [PATCH] btrfs: free anon block device right after subvolume deletion
[BUG]
When a lot of subvolumes are created, there is a user report about
transaction aborted caused by slow anonymous block device reclaim:
BTRFS: Transaction aborted (error -24)
WARNING: CPU: 17 PID: 17041 at fs/btrfs/transaction.c:1576 create_pending_snapshot+0xbc4/0xd10 [btrfs]
RIP: 0010:create_pending_snapshot+0xbc4/0xd10 [btrfs]
Call Trace:
create_pending_snapshots+0x82/0xa0 [btrfs]
btrfs_commit_transaction+0x275/0x8c0 [btrfs]
btrfs_mksubvol+0x4b9/0x500 [btrfs]
btrfs_ioctl_snap_create_transid+0x174/0x180 [btrfs]
btrfs_ioctl_snap_create_v2+0x11c/0x180 [btrfs]
btrfs_ioctl+0x11a4/0x2da0 [btrfs]
do_vfs_ioctl+0xa9/0x640
ksys_ioctl+0x67/0x90
__x64_sys_ioctl+0x1a/0x20
do_syscall_64+0x5a/0x110
entry_SYSCALL_64_after_hwframe+0x44/0xa9
---[ end trace 33f2f83f3d5250e9 ]---
BTRFS: error (device sda1) in create_pending_snapshot:1576: errno=-24 unknown
BTRFS info (device sda1): forced readonly
BTRFS warning (device sda1): Skipping commit of aborted transaction.
BTRFS: error (device sda1) in cleanup_transaction:1831: errno=-24 unknown
[CAUSE]
The anonymous device pool is shared and its size is 1M. It's possible to
hit that limit if the subvolume deletion is not fast enough and the
subvolumes to be cleaned keep the ids allocated.
[WORKAROUND]
We can't avoid the anon device pool exhaustion but we can shorten the
time the id is attached to the subvolume root once the subvolume becomes
invisible to the user.
Reported-by: Greed Rong <greedrong(a)gmail.com>
Link: https://lore.kernel.org/linux-btrfs/CA+UqX+NTrZ6boGnWHhSeZmEY5J76CTqmYjO2S+…
CC: stable(a)vger.kernel.org # 4.4+
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5d2ce8092531..f066cad2d039 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4026,6 +4026,8 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
}
}
+ free_anon_bdev(dest->anon_dev);
+ dest->anon_dev = 0;
out_end_trans:
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 082b6c970f02fefd278c7833880cda29691a5f34 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu(a)suse.com>
Date: Tue, 16 Jun 2020 10:17:37 +0800
Subject: [PATCH] btrfs: free anon block device right after subvolume deletion
[BUG]
When a lot of subvolumes are created, there is a user report about
transaction aborted caused by slow anonymous block device reclaim:
BTRFS: Transaction aborted (error -24)
WARNING: CPU: 17 PID: 17041 at fs/btrfs/transaction.c:1576 create_pending_snapshot+0xbc4/0xd10 [btrfs]
RIP: 0010:create_pending_snapshot+0xbc4/0xd10 [btrfs]
Call Trace:
create_pending_snapshots+0x82/0xa0 [btrfs]
btrfs_commit_transaction+0x275/0x8c0 [btrfs]
btrfs_mksubvol+0x4b9/0x500 [btrfs]
btrfs_ioctl_snap_create_transid+0x174/0x180 [btrfs]
btrfs_ioctl_snap_create_v2+0x11c/0x180 [btrfs]
btrfs_ioctl+0x11a4/0x2da0 [btrfs]
do_vfs_ioctl+0xa9/0x640
ksys_ioctl+0x67/0x90
__x64_sys_ioctl+0x1a/0x20
do_syscall_64+0x5a/0x110
entry_SYSCALL_64_after_hwframe+0x44/0xa9
---[ end trace 33f2f83f3d5250e9 ]---
BTRFS: error (device sda1) in create_pending_snapshot:1576: errno=-24 unknown
BTRFS info (device sda1): forced readonly
BTRFS warning (device sda1): Skipping commit of aborted transaction.
BTRFS: error (device sda1) in cleanup_transaction:1831: errno=-24 unknown
[CAUSE]
The anonymous device pool is shared and its size is 1M. It's possible to
hit that limit if the subvolume deletion is not fast enough and the
subvolumes to be cleaned keep the ids allocated.
[WORKAROUND]
We can't avoid the anon device pool exhaustion but we can shorten the
time the id is attached to the subvolume root once the subvolume becomes
invisible to the user.
Reported-by: Greed Rong <greedrong(a)gmail.com>
Link: https://lore.kernel.org/linux-btrfs/CA+UqX+NTrZ6boGnWHhSeZmEY5J76CTqmYjO2S+…
CC: stable(a)vger.kernel.org # 4.4+
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5d2ce8092531..f066cad2d039 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4026,6 +4026,8 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
}
}
+ free_anon_bdev(dest->anon_dev);
+ dest->anon_dev = 0;
out_end_trans:
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 082b6c970f02fefd278c7833880cda29691a5f34 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu(a)suse.com>
Date: Tue, 16 Jun 2020 10:17:37 +0800
Subject: [PATCH] btrfs: free anon block device right after subvolume deletion
[BUG]
When a lot of subvolumes are created, there is a user report about
transaction aborted caused by slow anonymous block device reclaim:
BTRFS: Transaction aborted (error -24)
WARNING: CPU: 17 PID: 17041 at fs/btrfs/transaction.c:1576 create_pending_snapshot+0xbc4/0xd10 [btrfs]
RIP: 0010:create_pending_snapshot+0xbc4/0xd10 [btrfs]
Call Trace:
create_pending_snapshots+0x82/0xa0 [btrfs]
btrfs_commit_transaction+0x275/0x8c0 [btrfs]
btrfs_mksubvol+0x4b9/0x500 [btrfs]
btrfs_ioctl_snap_create_transid+0x174/0x180 [btrfs]
btrfs_ioctl_snap_create_v2+0x11c/0x180 [btrfs]
btrfs_ioctl+0x11a4/0x2da0 [btrfs]
do_vfs_ioctl+0xa9/0x640
ksys_ioctl+0x67/0x90
__x64_sys_ioctl+0x1a/0x20
do_syscall_64+0x5a/0x110
entry_SYSCALL_64_after_hwframe+0x44/0xa9
---[ end trace 33f2f83f3d5250e9 ]---
BTRFS: error (device sda1) in create_pending_snapshot:1576: errno=-24 unknown
BTRFS info (device sda1): forced readonly
BTRFS warning (device sda1): Skipping commit of aborted transaction.
BTRFS: error (device sda1) in cleanup_transaction:1831: errno=-24 unknown
[CAUSE]
The anonymous device pool is shared and its size is 1M. It's possible to
hit that limit if the subvolume deletion is not fast enough and the
subvolumes to be cleaned keep the ids allocated.
[WORKAROUND]
We can't avoid the anon device pool exhaustion but we can shorten the
time the id is attached to the subvolume root once the subvolume becomes
invisible to the user.
Reported-by: Greed Rong <greedrong(a)gmail.com>
Link: https://lore.kernel.org/linux-btrfs/CA+UqX+NTrZ6boGnWHhSeZmEY5J76CTqmYjO2S+…
CC: stable(a)vger.kernel.org # 4.4+
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5d2ce8092531..f066cad2d039 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4026,6 +4026,8 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
}
}
+ free_anon_bdev(dest->anon_dev);
+ dest->anon_dev = 0;
out_end_trans:
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From de3c4bf648975ea0b1d344d811e9b0748907b47c Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth(a)gmail.com>
Date: Mon, 15 Jun 2020 23:06:04 +0200
Subject: [PATCH] PCI: qcom: Add support for tx term offset for rev 2.1.0
Add tx term offset support to pcie qcom driver need in some revision of
the ipq806x SoC. Ipq8064 needs tx term offset set to 7.
Link: https://lore.kernel.org/r/20200615210608.21469-9-ansuelsmth@gmail.com
Fixes: 82a823833f4e ("PCI: qcom: Add Qualcomm PCIe controller driver")
Signed-off-by: Sham Muthayyan <smuthayy(a)codeaurora.org>
Signed-off-by: Ansuel Smith <ansuelsmth(a)gmail.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi(a)arm.com>
Acked-by: Stanimir Varbanov <svarbanov(a)mm-sol.com>
Cc: stable(a)vger.kernel.org # v4.5+
diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
index 69c7b119e81a..34d961e492fd 100644
--- a/drivers/pci/controller/dwc/pcie-qcom.c
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -45,7 +45,13 @@
#define PCIE_CAP_CPL_TIMEOUT_DISABLE 0x10
#define PCIE20_PARF_PHY_CTRL 0x40
+#define PHY_CTRL_PHY_TX0_TERM_OFFSET_MASK GENMASK(20, 16)
+#define PHY_CTRL_PHY_TX0_TERM_OFFSET(x) ((x) << 16)
+
#define PCIE20_PARF_PHY_REFCLK 0x4C
+#define PHY_REFCLK_SSP_EN BIT(16)
+#define PHY_REFCLK_USE_PAD BIT(12)
+
#define PCIE20_PARF_DBI_BASE_ADDR 0x168
#define PCIE20_PARF_SLV_ADDR_SPACE_SIZE 0x16C
#define PCIE20_PARF_MHI_CLOCK_RESET_CTRL 0x174
@@ -371,9 +377,18 @@ static int qcom_pcie_init_2_1_0(struct qcom_pcie *pcie)
writel(PHY_RX0_EQ(4), pcie->parf + PCIE20_PARF_CONFIG_BITS);
}
+ if (of_device_is_compatible(node, "qcom,pcie-ipq8064")) {
+ /* set TX termination offset */
+ val = readl(pcie->parf + PCIE20_PARF_PHY_CTRL);
+ val &= ~PHY_CTRL_PHY_TX0_TERM_OFFSET_MASK;
+ val |= PHY_CTRL_PHY_TX0_TERM_OFFSET(7);
+ writel(val, pcie->parf + PCIE20_PARF_PHY_CTRL);
+ }
+
/* enable external reference clock */
val = readl(pcie->parf + PCIE20_PARF_PHY_REFCLK);
- val |= BIT(16);
+ val &= ~PHY_REFCLK_USE_PAD;
+ val |= PHY_REFCLK_SSP_EN;
writel(val, pcie->parf + PCIE20_PARF_PHY_REFCLK);
/* wait for clock acquisition */
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From de3c4bf648975ea0b1d344d811e9b0748907b47c Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth(a)gmail.com>
Date: Mon, 15 Jun 2020 23:06:04 +0200
Subject: [PATCH] PCI: qcom: Add support for tx term offset for rev 2.1.0
Add tx term offset support to pcie qcom driver need in some revision of
the ipq806x SoC. Ipq8064 needs tx term offset set to 7.
Link: https://lore.kernel.org/r/20200615210608.21469-9-ansuelsmth@gmail.com
Fixes: 82a823833f4e ("PCI: qcom: Add Qualcomm PCIe controller driver")
Signed-off-by: Sham Muthayyan <smuthayy(a)codeaurora.org>
Signed-off-by: Ansuel Smith <ansuelsmth(a)gmail.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi(a)arm.com>
Acked-by: Stanimir Varbanov <svarbanov(a)mm-sol.com>
Cc: stable(a)vger.kernel.org # v4.5+
diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
index 69c7b119e81a..34d961e492fd 100644
--- a/drivers/pci/controller/dwc/pcie-qcom.c
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -45,7 +45,13 @@
#define PCIE_CAP_CPL_TIMEOUT_DISABLE 0x10
#define PCIE20_PARF_PHY_CTRL 0x40
+#define PHY_CTRL_PHY_TX0_TERM_OFFSET_MASK GENMASK(20, 16)
+#define PHY_CTRL_PHY_TX0_TERM_OFFSET(x) ((x) << 16)
+
#define PCIE20_PARF_PHY_REFCLK 0x4C
+#define PHY_REFCLK_SSP_EN BIT(16)
+#define PHY_REFCLK_USE_PAD BIT(12)
+
#define PCIE20_PARF_DBI_BASE_ADDR 0x168
#define PCIE20_PARF_SLV_ADDR_SPACE_SIZE 0x16C
#define PCIE20_PARF_MHI_CLOCK_RESET_CTRL 0x174
@@ -371,9 +377,18 @@ static int qcom_pcie_init_2_1_0(struct qcom_pcie *pcie)
writel(PHY_RX0_EQ(4), pcie->parf + PCIE20_PARF_CONFIG_BITS);
}
+ if (of_device_is_compatible(node, "qcom,pcie-ipq8064")) {
+ /* set TX termination offset */
+ val = readl(pcie->parf + PCIE20_PARF_PHY_CTRL);
+ val &= ~PHY_CTRL_PHY_TX0_TERM_OFFSET_MASK;
+ val |= PHY_CTRL_PHY_TX0_TERM_OFFSET(7);
+ writel(val, pcie->parf + PCIE20_PARF_PHY_CTRL);
+ }
+
/* enable external reference clock */
val = readl(pcie->parf + PCIE20_PARF_PHY_REFCLK);
- val |= BIT(16);
+ val &= ~PHY_REFCLK_USE_PAD;
+ val |= PHY_REFCLK_SSP_EN;
writel(val, pcie->parf + PCIE20_PARF_PHY_REFCLK);
/* wait for clock acquisition */
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 2194bc7c39610be7cabe7456c5f63a570604f015 Mon Sep 17 00:00:00 2001
From: Rajat Jain <rajatja(a)google.com>
Date: Mon, 6 Jul 2020 16:32:40 -0700
Subject: [PATCH] PCI: Add device even if driver attach failed
device_attach() returning failure indicates a driver error while trying to
probe the device. In such a scenario, the PCI device should still be added
in the system and be visible to the user.
When device_attach() fails, merely warn about it and keep the PCI device in
the system.
This partially reverts ab1a187bba5c ("PCI: Check device_attach() return
value always").
Link: https://lore.kernel.org/r/20200706233240.3245512-1-rajatja@google.com
Signed-off-by: Rajat Jain <rajatja(a)google.com>
Signed-off-by: Bjorn Helgaas <bhelgaas(a)google.com>
Reviewed-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: stable(a)vger.kernel.org # v4.6+
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 8e40b3e6da77..3cef835b375f 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -322,12 +322,8 @@ void pci_bus_add_device(struct pci_dev *dev)
dev->match_driver = true;
retval = device_attach(&dev->dev);
- if (retval < 0 && retval != -EPROBE_DEFER) {
+ if (retval < 0 && retval != -EPROBE_DEFER)
pci_warn(dev, "device attach failed (%d)\n", retval);
- pci_proc_detach_device(dev);
- pci_remove_sysfs_dev_files(dev);
- return;
- }
pci_dev_assign_added(dev, true);
}
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 3f9a7a13fe4cb6e119e4e4745fbf975d30bfac9b Mon Sep 17 00:00:00 2001
From: Ashok Raj <ashok.raj(a)intel.com>
Date: Thu, 23 Jul 2020 15:37:29 -0700
Subject: [PATCH] PCI/ATS: Add pci_pri_supported() to check device or
associated PF
For SR-IOV, the PF PRI is shared between the PF and any associated VFs, and
the PRI Capability is allowed for PFs but not for VFs. Searching for the
PRI Capability on a VF always fails, even if its associated PF supports
PRI.
Add pci_pri_supported() to check whether device or its associated PF
supports PRI.
[bhelgaas: commit log, avoid "!!"]
Fixes: b16d0cb9e2fc ("iommu/vt-d: Always enable PASID/PRI PCI capabilities before ATS")
Link: https://lore.kernel.org/r/1595543849-19692-1-git-send-email-ashok.raj@intel…
Signed-off-by: Ashok Raj <ashok.raj(a)intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas(a)google.com>
Reviewed-by: Lu Baolu <baolu.lu(a)linux.intel.com>
Acked-by: Joerg Roedel <jroedel(a)suse.de>
Cc: stable(a)vger.kernel.org # v4.4+
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 9129663a7406..5552e7d5d2b1 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2554,7 +2554,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
}
if (info->ats_supported && ecap_prs(iommu->ecap) &&
- pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
+ pci_pri_supported(pdev))
info->pri_supported = 1;
}
}
diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index b761c1f72f67..647e097530a8 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -325,6 +325,21 @@ int pci_prg_resp_pasid_required(struct pci_dev *pdev)
return pdev->pasid_required;
}
+
+/**
+ * pci_pri_supported - Check if PRI is supported.
+ * @pdev: PCI device structure
+ *
+ * Returns true if PRI capability is present, false otherwise.
+ */
+bool pci_pri_supported(struct pci_dev *pdev)
+{
+ /* VFs share the PF PRI */
+ if (pci_physfn(pdev)->pri_cap)
+ return true;
+ return false;
+}
+EXPORT_SYMBOL_GPL(pci_pri_supported);
#endif /* CONFIG_PCI_PRI */
#ifdef CONFIG_PCI_PASID
diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
index f75c307f346d..df54cd5b15db 100644
--- a/include/linux/pci-ats.h
+++ b/include/linux/pci-ats.h
@@ -28,6 +28,10 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs);
void pci_disable_pri(struct pci_dev *pdev);
int pci_reset_pri(struct pci_dev *pdev);
int pci_prg_resp_pasid_required(struct pci_dev *pdev);
+bool pci_pri_supported(struct pci_dev *pdev);
+#else
+static inline bool pci_pri_supported(struct pci_dev *pdev)
+{ return false; }
#endif /* CONFIG_PCI_PRI */
#ifdef CONFIG_PCI_PASID
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 3f9a7a13fe4cb6e119e4e4745fbf975d30bfac9b Mon Sep 17 00:00:00 2001
From: Ashok Raj <ashok.raj(a)intel.com>
Date: Thu, 23 Jul 2020 15:37:29 -0700
Subject: [PATCH] PCI/ATS: Add pci_pri_supported() to check device or
associated PF
For SR-IOV, the PF PRI is shared between the PF and any associated VFs, and
the PRI Capability is allowed for PFs but not for VFs. Searching for the
PRI Capability on a VF always fails, even if its associated PF supports
PRI.
Add pci_pri_supported() to check whether device or its associated PF
supports PRI.
[bhelgaas: commit log, avoid "!!"]
Fixes: b16d0cb9e2fc ("iommu/vt-d: Always enable PASID/PRI PCI capabilities before ATS")
Link: https://lore.kernel.org/r/1595543849-19692-1-git-send-email-ashok.raj@intel…
Signed-off-by: Ashok Raj <ashok.raj(a)intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas(a)google.com>
Reviewed-by: Lu Baolu <baolu.lu(a)linux.intel.com>
Acked-by: Joerg Roedel <jroedel(a)suse.de>
Cc: stable(a)vger.kernel.org # v4.4+
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 9129663a7406..5552e7d5d2b1 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2554,7 +2554,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
}
if (info->ats_supported && ecap_prs(iommu->ecap) &&
- pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
+ pci_pri_supported(pdev))
info->pri_supported = 1;
}
}
diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index b761c1f72f67..647e097530a8 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -325,6 +325,21 @@ int pci_prg_resp_pasid_required(struct pci_dev *pdev)
return pdev->pasid_required;
}
+
+/**
+ * pci_pri_supported - Check if PRI is supported.
+ * @pdev: PCI device structure
+ *
+ * Returns true if PRI capability is present, false otherwise.
+ */
+bool pci_pri_supported(struct pci_dev *pdev)
+{
+ /* VFs share the PF PRI */
+ if (pci_physfn(pdev)->pri_cap)
+ return true;
+ return false;
+}
+EXPORT_SYMBOL_GPL(pci_pri_supported);
#endif /* CONFIG_PCI_PRI */
#ifdef CONFIG_PCI_PASID
diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
index f75c307f346d..df54cd5b15db 100644
--- a/include/linux/pci-ats.h
+++ b/include/linux/pci-ats.h
@@ -28,6 +28,10 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs);
void pci_disable_pri(struct pci_dev *pdev);
int pci_reset_pri(struct pci_dev *pdev);
int pci_prg_resp_pasid_required(struct pci_dev *pdev);
+bool pci_pri_supported(struct pci_dev *pdev);
+#else
+static inline bool pci_pri_supported(struct pci_dev *pdev)
+{ return false; }
#endif /* CONFIG_PCI_PRI */
#ifdef CONFIG_PCI_PASID
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 3f9a7a13fe4cb6e119e4e4745fbf975d30bfac9b Mon Sep 17 00:00:00 2001
From: Ashok Raj <ashok.raj(a)intel.com>
Date: Thu, 23 Jul 2020 15:37:29 -0700
Subject: [PATCH] PCI/ATS: Add pci_pri_supported() to check device or
associated PF
For SR-IOV, the PF PRI is shared between the PF and any associated VFs, and
the PRI Capability is allowed for PFs but not for VFs. Searching for the
PRI Capability on a VF always fails, even if its associated PF supports
PRI.
Add pci_pri_supported() to check whether device or its associated PF
supports PRI.
[bhelgaas: commit log, avoid "!!"]
Fixes: b16d0cb9e2fc ("iommu/vt-d: Always enable PASID/PRI PCI capabilities before ATS")
Link: https://lore.kernel.org/r/1595543849-19692-1-git-send-email-ashok.raj@intel…
Signed-off-by: Ashok Raj <ashok.raj(a)intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas(a)google.com>
Reviewed-by: Lu Baolu <baolu.lu(a)linux.intel.com>
Acked-by: Joerg Roedel <jroedel(a)suse.de>
Cc: stable(a)vger.kernel.org # v4.4+
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 9129663a7406..5552e7d5d2b1 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2554,7 +2554,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
}
if (info->ats_supported && ecap_prs(iommu->ecap) &&
- pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
+ pci_pri_supported(pdev))
info->pri_supported = 1;
}
}
diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index b761c1f72f67..647e097530a8 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -325,6 +325,21 @@ int pci_prg_resp_pasid_required(struct pci_dev *pdev)
return pdev->pasid_required;
}
+
+/**
+ * pci_pri_supported - Check if PRI is supported.
+ * @pdev: PCI device structure
+ *
+ * Returns true if PRI capability is present, false otherwise.
+ */
+bool pci_pri_supported(struct pci_dev *pdev)
+{
+ /* VFs share the PF PRI */
+ if (pci_physfn(pdev)->pri_cap)
+ return true;
+ return false;
+}
+EXPORT_SYMBOL_GPL(pci_pri_supported);
#endif /* CONFIG_PCI_PRI */
#ifdef CONFIG_PCI_PASID
diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
index f75c307f346d..df54cd5b15db 100644
--- a/include/linux/pci-ats.h
+++ b/include/linux/pci-ats.h
@@ -28,6 +28,10 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs);
void pci_disable_pri(struct pci_dev *pdev);
int pci_reset_pri(struct pci_dev *pdev);
int pci_prg_resp_pasid_required(struct pci_dev *pdev);
+bool pci_pri_supported(struct pci_dev *pdev);
+#else
+static inline bool pci_pri_supported(struct pci_dev *pdev)
+{ return false; }
#endif /* CONFIG_PCI_PRI */
#ifdef CONFIG_PCI_PASID
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 3f9a7a13fe4cb6e119e4e4745fbf975d30bfac9b Mon Sep 17 00:00:00 2001
From: Ashok Raj <ashok.raj(a)intel.com>
Date: Thu, 23 Jul 2020 15:37:29 -0700
Subject: [PATCH] PCI/ATS: Add pci_pri_supported() to check device or
associated PF
For SR-IOV, the PF PRI is shared between the PF and any associated VFs, and
the PRI Capability is allowed for PFs but not for VFs. Searching for the
PRI Capability on a VF always fails, even if its associated PF supports
PRI.
Add pci_pri_supported() to check whether device or its associated PF
supports PRI.
[bhelgaas: commit log, avoid "!!"]
Fixes: b16d0cb9e2fc ("iommu/vt-d: Always enable PASID/PRI PCI capabilities before ATS")
Link: https://lore.kernel.org/r/1595543849-19692-1-git-send-email-ashok.raj@intel…
Signed-off-by: Ashok Raj <ashok.raj(a)intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas(a)google.com>
Reviewed-by: Lu Baolu <baolu.lu(a)linux.intel.com>
Acked-by: Joerg Roedel <jroedel(a)suse.de>
Cc: stable(a)vger.kernel.org # v4.4+
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 9129663a7406..5552e7d5d2b1 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2554,7 +2554,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
}
if (info->ats_supported && ecap_prs(iommu->ecap) &&
- pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
+ pci_pri_supported(pdev))
info->pri_supported = 1;
}
}
diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index b761c1f72f67..647e097530a8 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -325,6 +325,21 @@ int pci_prg_resp_pasid_required(struct pci_dev *pdev)
return pdev->pasid_required;
}
+
+/**
+ * pci_pri_supported - Check if PRI is supported.
+ * @pdev: PCI device structure
+ *
+ * Returns true if PRI capability is present, false otherwise.
+ */
+bool pci_pri_supported(struct pci_dev *pdev)
+{
+ /* VFs share the PF PRI */
+ if (pci_physfn(pdev)->pri_cap)
+ return true;
+ return false;
+}
+EXPORT_SYMBOL_GPL(pci_pri_supported);
#endif /* CONFIG_PCI_PRI */
#ifdef CONFIG_PCI_PASID
diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
index f75c307f346d..df54cd5b15db 100644
--- a/include/linux/pci-ats.h
+++ b/include/linux/pci-ats.h
@@ -28,6 +28,10 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs);
void pci_disable_pri(struct pci_dev *pdev);
int pci_reset_pri(struct pci_dev *pdev);
int pci_prg_resp_pasid_required(struct pci_dev *pdev);
+bool pci_pri_supported(struct pci_dev *pdev);
+#else
+static inline bool pci_pri_supported(struct pci_dev *pdev)
+{ return false; }
#endif /* CONFIG_PCI_PRI */
#ifdef CONFIG_PCI_PASID
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 3f9a7a13fe4cb6e119e4e4745fbf975d30bfac9b Mon Sep 17 00:00:00 2001
From: Ashok Raj <ashok.raj(a)intel.com>
Date: Thu, 23 Jul 2020 15:37:29 -0700
Subject: [PATCH] PCI/ATS: Add pci_pri_supported() to check device or
associated PF
For SR-IOV, the PF PRI is shared between the PF and any associated VFs, and
the PRI Capability is allowed for PFs but not for VFs. Searching for the
PRI Capability on a VF always fails, even if its associated PF supports
PRI.
Add pci_pri_supported() to check whether device or its associated PF
supports PRI.
[bhelgaas: commit log, avoid "!!"]
Fixes: b16d0cb9e2fc ("iommu/vt-d: Always enable PASID/PRI PCI capabilities before ATS")
Link: https://lore.kernel.org/r/1595543849-19692-1-git-send-email-ashok.raj@intel…
Signed-off-by: Ashok Raj <ashok.raj(a)intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas(a)google.com>
Reviewed-by: Lu Baolu <baolu.lu(a)linux.intel.com>
Acked-by: Joerg Roedel <jroedel(a)suse.de>
Cc: stable(a)vger.kernel.org # v4.4+
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 9129663a7406..5552e7d5d2b1 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2554,7 +2554,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
}
if (info->ats_supported && ecap_prs(iommu->ecap) &&
- pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
+ pci_pri_supported(pdev))
info->pri_supported = 1;
}
}
diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index b761c1f72f67..647e097530a8 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -325,6 +325,21 @@ int pci_prg_resp_pasid_required(struct pci_dev *pdev)
return pdev->pasid_required;
}
+
+/**
+ * pci_pri_supported - Check if PRI is supported.
+ * @pdev: PCI device structure
+ *
+ * Returns true if PRI capability is present, false otherwise.
+ */
+bool pci_pri_supported(struct pci_dev *pdev)
+{
+ /* VFs share the PF PRI */
+ if (pci_physfn(pdev)->pri_cap)
+ return true;
+ return false;
+}
+EXPORT_SYMBOL_GPL(pci_pri_supported);
#endif /* CONFIG_PCI_PRI */
#ifdef CONFIG_PCI_PASID
diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
index f75c307f346d..df54cd5b15db 100644
--- a/include/linux/pci-ats.h
+++ b/include/linux/pci-ats.h
@@ -28,6 +28,10 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs);
void pci_disable_pri(struct pci_dev *pdev);
int pci_reset_pri(struct pci_dev *pdev);
int pci_prg_resp_pasid_required(struct pci_dev *pdev);
+bool pci_pri_supported(struct pci_dev *pdev);
+#else
+static inline bool pci_pri_supported(struct pci_dev *pdev)
+{ return false; }
#endif /* CONFIG_PCI_PRI */
#ifdef CONFIG_PCI_PASID
From: Thomas Gleixner <tglx(a)linutronix.de>
commit f0c7baca180046824e07fc5f1326e83a8fd150c7 upstream.
John reported that on a RK3288 system the perf per CPU interrupts are all
affine to CPU0 and provided the analysis:
"It looks like what happens is that because the interrupts are not per-CPU
in the hardware, armpmu_request_irq() calls irq_force_affinity() while
the interrupt is deactivated and then request_irq() with IRQF_PERCPU |
IRQF_NOBALANCING.
Now when irq_startup() runs with IRQ_STARTUP_NORMAL, it calls
irq_setup_affinity() which returns early because IRQF_PERCPU and
IRQF_NOBALANCING are set, leaving the interrupt on its original CPU."
This was broken by the recent commit which blocked interrupt affinity
setting in hardware before activation of the interrupt. While this works in
general, it does not work for this particular case. As contrary to the
initial analysis not all interrupt chip drivers implement an activate
callback, the safe cure is to make the deferred interrupt affinity setting
at activation time opt-in.
Implement the necessary core logic and make the two irqchip implementations
for which this is required opt-in. In hindsight this would have been the
right thing to do, but ...
Fixes: baedb87d1b53 ("genirq/affinity: Handle affinity setting on inactive interrupts correctly")
Reported-by: John Keeping <john(a)metanate.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Tested-by: Marc Zyngier <maz(a)kernel.org>
Acked-by: Marc Zyngier <maz(a)kernel.org>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/87blk4tzgm.fsf@nanos.tec.linutronix.de
---
arch/x86/kernel/apic/vector.c | 4 ++++
drivers/irqchip/irq-gic-v3-its.c | 5 ++++-
include/linux/irq.h | 13 +++++++++++++
kernel/irq/manage.c | 6 +++++-
4 files changed, 26 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 18c0dca08163..557b153c9ee6 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -554,6 +554,10 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
irqd->chip_data = apicd;
irqd->hwirq = virq + i;
irqd_set_single_target(irqd);
+
+ /* Don't invoke affinity setter on deactivated interrupts */
+ irqd_set_affinity_on_activate(irqd);
+
/*
* Legacy vectors are already assigned when the IOAPIC
* takes them over. They stay on the same vector. This is
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 263cf9240b16..7966b19ceba7 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -2581,6 +2581,7 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
msi_alloc_info_t *info = args;
struct its_device *its_dev = info->scratchpad[0].ptr;
struct its_node *its = its_dev->its;
+ struct irq_data *irqd;
irq_hw_number_t hwirq;
int err;
int i;
@@ -2600,7 +2601,9 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
irq_domain_set_hwirq_and_chip(domain, virq + i,
hwirq + i, &its_irq_chip, its_dev);
- irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(virq + i)));
+ irqd = irq_get_irq_data(virq + i);
+ irqd_set_single_target(irqd);
+ irqd_set_affinity_on_activate(irqd);
pr_debug("ID:%d pID:%d vID:%d\n",
(int)(hwirq + i - its_dev->event_map.lpi_base),
(int)(hwirq + i), virq + i);
diff --git a/include/linux/irq.h b/include/linux/irq.h
index ab9d956e21b3..be00e0ae5e31 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -211,6 +211,8 @@ struct irq_data {
* IRQD_CAN_RESERVE - Can use reservation mode
* IRQD_MSI_NOMASK_QUIRK - Non-maskable MSI quirk for affinity change
* required
+ * IRQD_AFFINITY_ON_ACTIVATE - Affinity is set on activation. Don't call
+ * irq_chip::irq_set_affinity() when deactivated.
*/
enum {
IRQD_TRIGGER_MASK = 0xf,
@@ -234,6 +236,7 @@ enum {
IRQD_DEFAULT_TRIGGER_SET = (1 << 25),
IRQD_CAN_RESERVE = (1 << 26),
IRQD_MSI_NOMASK_QUIRK = (1 << 27),
+ IRQD_AFFINITY_ON_ACTIVATE = (1 << 29),
};
#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
@@ -408,6 +411,16 @@ static inline bool irqd_msi_nomask_quirk(struct irq_data *d)
return __irqd_to_state(d) & IRQD_MSI_NOMASK_QUIRK;
}
+static inline void irqd_set_affinity_on_activate(struct irq_data *d)
+{
+ __irqd_to_state(d) |= IRQD_AFFINITY_ON_ACTIVATE;
+}
+
+static inline bool irqd_affinity_on_activate(struct irq_data *d)
+{
+ return __irqd_to_state(d) & IRQD_AFFINITY_ON_ACTIVATE;
+}
+
#undef __irqd_to_state
static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index df73685de114..3b1d0a4725a4 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -281,12 +281,16 @@ static bool irq_set_affinity_deactivated(struct irq_data *data,
struct irq_desc *desc = irq_data_to_desc(data);
/*
+ * Handle irq chips which can handle affinity only in activated
+ * state correctly
+ *
* If the interrupt is not yet activated, just store the affinity
* mask and do not call the chip driver at all. On activation the
* driver has to make sure anyway that the interrupt is in a
* useable state so startup works.
*/
- if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) || irqd_is_activated(data))
+ if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) ||
+ irqd_is_activated(data) || !irqd_affinity_on_activate(data))
return false;
cpumask_copy(desc->irq_common_data.affinity, mask);
--
2.17.2
Recently we found regression when running will_it_scale/page_fault3 test
on ARM64. Over 70% down for the multi processes cases and over 20% down
for the multi threads cases. It turns out the regression is caused by
commit 89b15332af7c0312a41e50846819ca6613b58b4c ("mm: drop mmap_sem before
calling balance_dirty_pages() in write fault").
The test mmaps a memory size file then write to the mapping, this would
make all memory dirty and trigger dirty pages throttle, that upstream
commit would release mmap_sem then retry the page fault. The retried page
fault would see correct PTEs installed then just fall through to spurious TLB
flush. The regression is caused by the excessive spurious TLB flush. It is
fine on x86 since x86's spurious TLB flush is no-op.
We could just skip the spurious TLB flush to mitigate the regression.
Suggested-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Reported-by: Xu Yu <xuyu(a)linux.alibaba.com>
Debugged-by: Xu Yu <xuyu(a)linux.alibaba.com>
Tested-by: Xu Yu <xuyu(a)linux.alibaba.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Will Deacon <will.deacon(a)arm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Yang Shi <shy828301(a)gmail.com>
---
v3: Incorporated Linus's suggestion
v2: Incorporated Will Deacon's suggestion
mm/memory.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/mm/memory.c b/mm/memory.c
index 3a7779d9891d..602f4283122f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4247,6 +4247,9 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
vmf->flags & FAULT_FLAG_WRITE)) {
update_mmu_cache(vmf->vma, vmf->address, vmf->pte);
} else {
+ /* Skip spurious TLB flush for retried page fault */
+ if (vmf->flags & FAULT_FLAG_TRIED)
+ goto unlock;
/*
* This is needed only for protection faults but the arch code
* is not yet telling us if this is a protection fault or not.
--
2.26.2
Fix linkage error when CONFIG_BINFMT_ELF is selected but CONFIG_COREDUMP
is not:
ia64-linux-ld: arch/ia64/kernel/elfcore.o: in function `elf_core_write_extra_phdrs':
elfcore.c:(.text+0x172): undefined reference to `dump_emit'
ia64-linux-ld: arch/ia64/kernel/elfcore.o: in function `elf_core_write_extra_data':
elfcore.c:(.text+0x2b2): undefined reference to `dump_emit'
Cc: <stable(a)vger.kernel.org>
Fixes: 1fcccbac89f5 ("elf coredump: replace ELF_CORE_EXTRA_* macros by functions")
Reported-by: kernel test robot <lkp(a)intel.com>
Signed-off-by: Krzysztof Kozlowski <krzk(a)kernel.org>
---
This is similar fix to commit 42d91f612c87 ("um: Fix build error and
kconfig for i386") although I put different fixes tag - the commit which
introduced this part of code.
---
arch/ia64/kernel/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 1a8df6669eee..18d6008b151f 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -41,7 +41,7 @@ obj-y += esi_stub.o # must be in kernel proper
endif
obj-$(CONFIG_INTEL_IOMMU) += pci-dma.o
-obj-$(CONFIG_BINFMT_ELF) += elfcore.o
+obj-$(CONFIG_ELF_CORE) += elfcore.o
# fp_emulate() expects f2-f5,f16-f31 to contain the user-level state.
CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31
--
2.17.1
This is a final attempt to reach you as regards the estate of our deceased client who made you one of the beneficiaries of his estate. Do get back to me at your earliest convenience. The Trustees
The patch titled
Subject: squashfs: avoid bio_alloc() failure with 1Mbyte blocks
has been added to the -mm tree. Its filename is
squashfs-avoid-bio_alloc-failure-with-1mbyte-blocks.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/squashfs-avoid-bio_alloc-failure-w…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/squashfs-avoid-bio_alloc-failure-w…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Phillip Lougher <phillip(a)squashfs.org.uk>
Subject: squashfs: avoid bio_alloc() failure with 1Mbyte blocks
This is a regression introduced by the patch "migrate from ll_rw_block
usage to BIO".
Bio_alloc() is limited to 256 pages (1 Mbyte). This can cause a failure
when reading 1 Mbyte block filesystems. The problem is a datablock can be
fully (or almost uncompressed), requiring 256 pages, but, because blocks
are not aligned to page boundaries, it may require 257 pages to read.
Bio_kmalloc() can handle 1024 pages, and so use this for the edge
condition.
Link: http://lkml.kernel.org/r/20200815035637.15319-1-phillip@squashfs.org.uk
Fixes: 93e72b3c612a ("squashfs: migrate from ll_rw_block usage to BIO")
Signed-off-by: Phillip Lougher <phillip(a)squashfs.org.uk>
Reported-by: Nicolas Prochazka <nicolas.prochazka(a)gmail.com>
Reported-by: Tomoatsu Shimada <shimada(a)walbrix.com>
Reviewed-by: Guenter Roeck <groeck(a)chromium.org>
Cc: Philippe Liard <pliard(a)google.com>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Adrien Schildknecht <adrien+dev(a)schischi.me>
Cc: Daniel Rosenberg <drosen(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/squashfs/block.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
--- a/fs/squashfs/block.c~squashfs-avoid-bio_alloc-failure-with-1mbyte-blocks
+++ a/fs/squashfs/block.c
@@ -87,7 +87,11 @@ static int squashfs_bio_read(struct supe
int error, i;
struct bio *bio;
- bio = bio_alloc(GFP_NOIO, page_count);
+ if (page_count <= BIO_MAX_PAGES)
+ bio = bio_alloc(GFP_NOIO, page_count);
+ else
+ bio = bio_kmalloc(GFP_NOIO, page_count);
+
if (!bio)
return -ENOMEM;
_
Patches currently in -mm which might be from phillip(a)squashfs.org.uk are
squashfs-avoid-bio_alloc-failure-with-1mbyte-blocks.patch
The patch titled
Subject: uprobes: __replace_page() avoid BUG in munlock_vma_page()
has been added to the -mm tree. Its filename is
uprobes-__replace_page-avoid-bug-in-munlock_vma_page.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/uprobes-__replace_page-avoid-bug-i…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/uprobes-__replace_page-avoid-bug-i…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Hugh Dickins <hughd(a)google.com>
Subject: uprobes: __replace_page() avoid BUG in munlock_vma_page()
syzbot crashed on the VM_BUG_ON_PAGE(PageTail) in munlock_vma_page(), when
called from uprobes __replace_page(). Which of many ways to fix it?
Settled on not calling when PageCompound (since Head and Tail are equals
in this context, PageCompound the usual check in uprobes.c, and the prior
use of FOLL_SPLIT_PMD will have cleared PageMlocked already).
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008161338360.20413@eggly.anvils
Fixes: 5a52c9df62b4 ("uprobe: use FOLL_SPLIT_PMD instead of FOLL_SPLIT")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Reported-by: syzbot <syzkaller(a)googlegroups.com>
Acked-by: Song Liu <songliubraving(a)fb.com>
Acked-by: Oleg Nesterov <oleg(a)redhat.com>
Reviewed-by: Srikar Dronamraju <srikar(a)linux.vnet.ibm.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org> [5.4+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/events/uprobes.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/kernel/events/uprobes.c~uprobes-__replace_page-avoid-bug-in-munlock_vma_page
+++ a/kernel/events/uprobes.c
@@ -205,7 +205,7 @@ static int __replace_page(struct vm_area
try_to_free_swap(old_page);
page_vma_mapped_walk_done(&pvmw);
- if (vma->vm_flags & VM_LOCKED)
+ if ((vma->vm_flags & VM_LOCKED) && !PageCompound(old_page))
munlock_vma_page(old_page);
put_page(old_page);
_
Patches currently in -mm which might be from hughd(a)google.com are
uprobes-__replace_page-avoid-bug-in-munlock_vma_page.patch
khugepaged-adjust-vm_bug_on_mm-in-__khugepaged_enter.patch
The patch titled
Subject: kernel/relay.c: fix memleak on destroy relay channel
has been added to the -mm tree. Its filename is
kernel-relayc-fix-memleak-on-destroy-relay-channel.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/kernel-relayc-fix-memleak-on-destr…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/kernel-relayc-fix-memleak-on-destr…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Wei Yongjun <weiyongjun1(a)huawei.com>
Subject: kernel/relay.c: fix memleak on destroy relay channel
kmemleak report memory leak as follows:
unreferenced object 0x607ee4e5f948 (size 8):
comm "syz-executor.1", pid 2098, jiffies 4295031601 (age 288.468s)
hex dump (first 8 bytes):
00 00 00 00 00 00 00 00 ........
backtrace:
[<00000000ca1de2fa>] relay_open kernel/relay.c:583 [inline]
[<00000000ca1de2fa>] relay_open+0xb6/0x970 kernel/relay.c:563
[<0000000038ae5a4b>] do_blk_trace_setup+0x4a8/0xb20 kernel/trace/blktrace.c:557
[<00000000d5e778e9>] __blk_trace_setup+0xb6/0x150 kernel/trace/blktrace.c:597
[<0000000038fdf803>] blk_trace_ioctl+0x146/0x280 kernel/trace/blktrace.c:738
[<00000000ce25a0ca>] blkdev_ioctl+0xb2/0x6a0 block/ioctl.c:613
[<00000000579e47e0>] block_ioctl+0xe5/0x120 fs/block_dev.c:1871
[<00000000b1588c11>] vfs_ioctl fs/ioctl.c:48 [inline]
[<00000000b1588c11>] __do_sys_ioctl fs/ioctl.c:753 [inline]
[<00000000b1588c11>] __se_sys_ioctl fs/ioctl.c:739 [inline]
[<00000000b1588c11>] __x64_sys_ioctl+0x170/0x1ce fs/ioctl.c:739
[<0000000088fc9942>] do_syscall_64+0x33/0x40 arch/x86/entry/common.c:46
[<000000004f6dd57a>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
'chan->buf' is malloced in relay_open() by alloc_percpu() but not free
while destroy the relay channel. Fix it by adding free_percpu() before
return from relay_destroy_channel().
Link: http://lkml.kernel.org/r/20200817122826.48518-1-weiyongjun1@huawei.com
Fixes: 017c59c042d0 ("relay: Use per CPU constructs for the relay channel buffer pointers")
Signed-off-by: Wei Yongjun <weiyongjun1(a)huawei.com>
Reported-by: Hulk Robot <hulkci(a)huawei.com>
Reviewed-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Al Viro <viro(a)zeniv.linux.org.uk>
Cc: Michael Ellerman <mpe(a)ellerman.id.au>
Cc: David Rientjes <rientjes(a)google.com>
Cc: Michel Lespinasse <walken(a)google.com>
Cc: Daniel Axtens <dja(a)axtens.net>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Akash Goel <akash.goel(a)intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/relay.c | 1 +
1 file changed, 1 insertion(+)
--- a/kernel/relay.c~kernel-relayc-fix-memleak-on-destroy-relay-channel
+++ a/kernel/relay.c
@@ -197,6 +197,7 @@ free_buf:
static void relay_destroy_channel(struct kref *kref)
{
struct rchan *chan = container_of(kref, struct rchan, kref);
+ free_percpu(chan->buf);
kfree(chan);
}
_
Patches currently in -mm which might be from weiyongjun1(a)huawei.com are
kernel-relayc-fix-memleak-on-destroy-relay-channel.patch
The patch titled
Subject: romfs: fix uninitialized memory leak in romfs_dev_read()
has been added to the -mm tree. Its filename is
romfs-fix-uninitialized-memory-leak-in-romfs_dev_read.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/romfs-fix-uninitialized-memory-lea…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/romfs-fix-uninitialized-memory-lea…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Jann Horn <jannh(a)google.com>
Subject: romfs: fix uninitialized memory leak in romfs_dev_read()
romfs has a superblock field that limits the size of the filesystem; data
beyond that limit is never accessed.
romfs_dev_read() fetches a caller-supplied number of bytes from the
backing device. It returns 0 on success or an error code on failure;
therefore, its API can't represent short reads, it's all-or-nothing.
However, when romfs_dev_read() detects that the requested operation would
cross the filesystem size limit, it currently silently truncates the
requested number of bytes. This e.g. means that when the content of a
file with size 0x1000 starts one byte before the filesystem size limit,
->readpage() will only fill a single byte of the supplied page while
leaving the rest uninitialized, leaking that uninitialized memory to
userspace.
Fix it by returning an error code instead of truncating the read when the
requested read operation would go beyond the end of the filesystem.
Link: http://lkml.kernel.org/r/20200818013202.2246365-1-jannh@google.com
Fixes: da4458bda237 ("NOMMU: Make it possible for RomFS to use MTD devices directly")
Signed-off-by: Jann Horn <jannh(a)google.com>
Reviewed-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: David Howells <dhowells(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/romfs/storage.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
--- a/fs/romfs/storage.c~romfs-fix-uninitialized-memory-leak-in-romfs_dev_read
+++ a/fs/romfs/storage.c
@@ -217,10 +217,8 @@ int romfs_dev_read(struct super_block *s
size_t limit;
limit = romfs_maxsize(sb);
- if (pos >= limit)
+ if (pos >= limit || buflen > limit - pos)
return -EIO;
- if (buflen > limit - pos)
- buflen = limit - pos;
#ifdef CONFIG_ROMFS_ON_MTD
if (sb->s_mtd)
_
Patches currently in -mm which might be from jannh(a)google.com are
romfs-fix-uninitialized-memory-leak-in-romfs_dev_read.patch
The patch titled
Subject: include/asm-generic/vmlinux.lds.h: align ro_after_init
has been removed from the -mm tree. Its filename was
include-asm-generic-vmlinuxldsh-align-ro_after_init.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Romain Naour <romain.naour(a)gmail.com>
Subject: include/asm-generic/vmlinux.lds.h: align ro_after_init
Since the patch [1], building the kernel using a toolchain built with
binutils 2.33.1 prevents booting a sh4 system under Qemu. Apply the patch
provided by Alan Modra [2] that fix alignment of rodata.
[1] https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;h=ebd2263ba9a9124d…
[2] https://www.sourceware.org/ml/binutils/2019-12/msg00112.html
Link: https://marc.info/?l=linux-sh&m=158429470221261
Signed-off-by: Romain Naour <romain.naour(a)gmail.com>
Cc: Alan Modra <amodra(a)gmail.com>
Cc: Bin Meng <bin.meng(a)windriver.com>
Cc: Chen Zhou <chenzhou10(a)huawei.com>
Cc: Geert Uytterhoeven <geert+renesas(a)glider.be>
Cc: John Paul Adrian Glaubitz <glaubitz(a)physik.fu-berlin.de>
Cc: Krzysztof Kozlowski <krzk(a)kernel.org>
Cc: Kuninori Morimoto <kuninori.morimoto.gx(a)renesas.com>
Cc: Rich Felker <dalias(a)libc.org>
Cc: Sam Ravnborg <sam(a)ravnborg.org>
Cc: Yoshinori Sato <ysato(a)users.sourceforge.jp>
Cc: Arnd Bergmann <arnd(a)arndb.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/asm-generic/vmlinux.lds.h | 1 +
1 file changed, 1 insertion(+)
--- a/include/asm-generic/vmlinux.lds.h~include-asm-generic-vmlinuxldsh-align-ro_after_init
+++ a/include/asm-generic/vmlinux.lds.h
@@ -394,6 +394,7 @@
*/
#ifndef RO_AFTER_INIT_DATA
#define RO_AFTER_INIT_DATA \
+ . = ALIGN(8); \
__start_ro_after_init = .; \
*(.data..ro_after_init) \
JUMP_TABLE_DATA \
_
Patches currently in -mm which might be from romain.naour(a)gmail.com are
The patch titled
Subject: Revert "mm/vmstat.c: do not show lowmem reserve protection information of empty zone"
has been removed from the -mm tree. Its filename was
revert-mm-vmstatc-do-not-show-lowmem-reserve-protection-information-of-empty-zone.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Baoquan He <bhe(a)redhat.com>
Subject: Revert "mm/vmstat.c: do not show lowmem reserve protection information of empty zone"
This reverts commit 26e7deadaae175.
Sonny reported that one of their tests started failing on the latest
kernel on their Chrome OS platform. The root cause is that the above
commit removed the protection line of empty zone, while the parser used in
the test relies on the protection line to mark the end of each zone.
Let's revert it to avoid breaking userspace testing or applications.
Link: http://lkml.kernel.org/r/20200811075412.12872-1-bhe@redhat.com
Fixes: 26e7deadaae175 ("mm/vmstat.c: do not show lowmem reserve protection information of empty zone)"
Signed-off-by: Baoquan He <bhe(a)redhat.com>
Reported-by: Sonny Rao <sonnyrao(a)chromium.org>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Acked-by: David Rientjes <rientjes(a)google.com>
Cc: <stable(a)vger.kernel.org> [5.8.x]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/vmstat.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
--- a/mm/vmstat.c~revert-mm-vmstatc-do-not-show-lowmem-reserve-protection-information-of-empty-zone
+++ a/mm/vmstat.c
@@ -1642,12 +1642,6 @@ static void zoneinfo_show_print(struct s
zone->present_pages,
zone_managed_pages(zone));
- /* If unpopulated, no other information is useful */
- if (!populated_zone(zone)) {
- seq_putc(m, '\n');
- return;
- }
-
seq_printf(m,
"\n protection: (%ld",
zone->lowmem_reserve[0]);
@@ -1655,6 +1649,12 @@ static void zoneinfo_show_print(struct s
seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
seq_putc(m, ')');
+ /* If unpopulated, no other information is useful */
+ if (!populated_zone(zone)) {
+ seq_putc(m, '\n');
+ return;
+ }
+
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
seq_printf(m, "\n %-12s %lu", zone_stat_name(i),
zone_page_state(zone, i));
_
Patches currently in -mm which might be from bhe(a)redhat.com are
Commit 0f348db01fdf ("thermal: ti-soc-thermal: Fix reversed condition
in ti_thermal_expose_sensor()") Fixes an issue that was introduced by
accidentally inverting the check which disabled the functionality.
The issue was then backported to 4.14+ kernels, so they are also
affected and this patch fixes the regressions.
Please backport this fix to kernels 4.14+ to restore functionality.
Thank you,
adam
LLVM implemented a recent "libcall optimization" that lowers calls to
`sprintf(dest, "%s", str)` where the return value is used to
`stpcpy(dest, str) - dest`. This generally avoids the machinery involved
in parsing format strings. This optimization was introduced into
clang-12. Because the kernel does not provide an implementation of
stpcpy, we observe linkage failures for almost all targets when building
with ToT clang.
The interface is unsafe as it does not perform any bounds checking.
Disable this "libcall optimization" via `-fno-builtin-stpcpy`.
Unlike
commit 5f074f3e192f ("lib/string.c: implement a basic bcmp")
which cited failures with `-fno-builtin-*` flags being retained in LLVM
LTO, that bug seems to have been fixed by
https://reviews.llvm.org/D71193, so the above sha can now be reverted in
favor of `-fno-builtin-bcmp`.
Cc: stable(a)vger.kernel.org # 4.4
Link: https://bugs.llvm.org/show_bug.cgi?id=47162
Link: https://github.com/ClangBuiltLinux/linux/issues/1126
Link: https://reviews.llvm.org/D85963
Reported-by: Sami Tolvanen <samitolvanen(a)google.com>
Suggested-by: Dávid Bolvanský <david.bolvansky(a)gmail.com>
Suggested-by: Kees Cook <keescook(a)chromium.org>
Signed-off-by: Nick Desaulniers <ndesaulniers(a)google.com>
---
Makefile | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/Makefile b/Makefile
index 9cac6fde3479..211a1b6f6478 100644
--- a/Makefile
+++ b/Makefile
@@ -959,6 +959,12 @@ ifdef CONFIG_RETPOLINE
KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
endif
+# The compiler may "libcall optimize" certain function calls into the below
+# functions, for architectures that don't use -ffreestanding. If we don't plan
+# to provide implementations of these routines, then prevent the compiler from
+# emitting calls to what will be undefined symbols.
+KBUILD_CFLAGS += -fno-builtin-stpcpy
+
# include additional Makefiles when needed
include-y := scripts/Makefile.extrawarn
include-$(CONFIG_KASAN) += scripts/Makefile.kasan
--
2.28.0.220.ged08abb693-goog
Seen with v4.4.232-120-g11806ba5e43a:
Building powerpc:defconfig ... failed
drivers/misc/cxl/sysfs.c: In function ‘cxl_sysfs_afu_new_cr’:
drivers/misc/cxl/sysfs.c:558:1: error: label ‘err’ defined but not used [-Werror=unused-label]
Guenter
This is a note to let you know that I've just added the patch titled
staging: greybus: audio: fix uninitialized value issue
to my staging git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git
in the staging-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From 1dffeb8b8b4c261c45416d53c75ea51e6ece1770 Mon Sep 17 00:00:00 2001
From: Vaibhav Agarwal <vaibhav.sr(a)gmail.com>
Date: Fri, 14 Aug 2020 18:03:15 +0530
Subject: staging: greybus: audio: fix uninitialized value issue
The current implementation for gbcodec_mixer_dapm_ctl_put() uses
uninitialized gbvalue for comparison with updated value. This was found
using static analysis with coverity.
Uninitialized scalar variable (UNINIT)
11. uninit_use: Using uninitialized value
gbvalue.value.integer_value[0].
460 if (gbvalue.value.integer_value[0] != val) {
This patch fixes the issue with fetching the gbvalue before using it for
comparision.
Fixes: 6339d2322c47 ("greybus: audio: Add topology parser for GB codec")
Reported-by: Colin Ian King <colin.king(a)canonical.com>
Signed-off-by: Vaibhav Agarwal <vaibhav.sr(a)gmail.com>
Cc: stable <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/bc4f29eb502ccf93cd2ffd98db0e319fa7d0f247.15974081…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/staging/greybus/audio_topology.c | 29 ++++++++++++------------
1 file changed, 15 insertions(+), 14 deletions(-)
diff --git a/drivers/staging/greybus/audio_topology.c b/drivers/staging/greybus/audio_topology.c
index 2f9fdbdcd547..83b38ae8908c 100644
--- a/drivers/staging/greybus/audio_topology.c
+++ b/drivers/staging/greybus/audio_topology.c
@@ -456,6 +456,15 @@ static int gbcodec_mixer_dapm_ctl_put(struct snd_kcontrol *kcontrol,
val = ucontrol->value.integer.value[0] & mask;
connect = !!val;
+ ret = gb_pm_runtime_get_sync(bundle);
+ if (ret)
+ return ret;
+
+ ret = gb_audio_gb_get_control(module->mgmt_connection, data->ctl_id,
+ GB_AUDIO_INVALID_INDEX, &gbvalue);
+ if (ret)
+ goto exit;
+
/* update ucontrol */
if (gbvalue.value.integer_value[0] != val) {
for (wi = 0; wi < wlist->num_widgets; wi++) {
@@ -466,25 +475,17 @@ static int gbcodec_mixer_dapm_ctl_put(struct snd_kcontrol *kcontrol,
gbvalue.value.integer_value[0] =
cpu_to_le32(ucontrol->value.integer.value[0]);
- ret = gb_pm_runtime_get_sync(bundle);
- if (ret)
- return ret;
-
ret = gb_audio_gb_set_control(module->mgmt_connection,
data->ctl_id,
GB_AUDIO_INVALID_INDEX, &gbvalue);
-
- gb_pm_runtime_put_autosuspend(bundle);
-
- if (ret) {
- dev_err_ratelimited(codec_dev,
- "%d:Error in %s for %s\n", ret,
- __func__, kcontrol->id.name);
- return ret;
- }
}
- return 0;
+exit:
+ gb_pm_runtime_put_autosuspend(bundle);
+ if (ret)
+ dev_err_ratelimited(codec_dev, "%d:Error in %s for %s\n", ret,
+ __func__, kcontrol->id.name);
+ return ret;
}
#define SOC_DAPM_MIXER_GB(xname, kcount, data) \
--
2.28.0
This is a note to let you know that I've just added the patch titled
staging: wlan-ng: fix out of bounds read in prism2sta_probe_usb()
to my staging git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git
in the staging-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From fea22e159d51c766ba70473f473a0ec914cc7e92 Mon Sep 17 00:00:00 2001
From: Rustam Kovhaev <rkovhaev(a)gmail.com>
Date: Tue, 4 Aug 2020 07:56:14 -0700
Subject: staging: wlan-ng: fix out of bounds read in prism2sta_probe_usb()
let's use usb_find_common_endpoints() to discover endpoints, it does all
necessary checks for type and xfer direction
remove memset() in hfa384x_create(), because we now assign endpoints in
prism2sta_probe_usb() and because create_wlan() uses kzalloc() to
allocate hfa384x struct before calling hfa384x_create()
Fixes: faaff9765664 ("staging: wlan-ng: properly check endpoint types")
Reported-and-tested-by: syzbot+22794221ab96b0bab53a(a)syzkaller.appspotmail.com
Link: https://syzkaller.appspot.com/bug?extid=22794221ab96b0bab53a
Signed-off-by: Rustam Kovhaev <rkovhaev(a)gmail.com>
Cc: stable <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20200804145614.104320-1-rkovhaev@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/staging/wlan-ng/hfa384x_usb.c | 5 -----
drivers/staging/wlan-ng/prism2usb.c | 19 ++++++-------------
2 files changed, 6 insertions(+), 18 deletions(-)
diff --git a/drivers/staging/wlan-ng/hfa384x_usb.c b/drivers/staging/wlan-ng/hfa384x_usb.c
index fa1bf8b069fd..2720f7319a3d 100644
--- a/drivers/staging/wlan-ng/hfa384x_usb.c
+++ b/drivers/staging/wlan-ng/hfa384x_usb.c
@@ -524,13 +524,8 @@ static void hfa384x_usb_defer(struct work_struct *data)
*/
void hfa384x_create(struct hfa384x *hw, struct usb_device *usb)
{
- memset(hw, 0, sizeof(*hw));
hw->usb = usb;
- /* set up the endpoints */
- hw->endp_in = usb_rcvbulkpipe(usb, 1);
- hw->endp_out = usb_sndbulkpipe(usb, 2);
-
/* Set up the waitq */
init_waitqueue_head(&hw->cmdq);
diff --git a/drivers/staging/wlan-ng/prism2usb.c b/drivers/staging/wlan-ng/prism2usb.c
index 456603fd26c0..4b08dc1da4f9 100644
--- a/drivers/staging/wlan-ng/prism2usb.c
+++ b/drivers/staging/wlan-ng/prism2usb.c
@@ -61,23 +61,14 @@ static int prism2sta_probe_usb(struct usb_interface *interface,
const struct usb_device_id *id)
{
struct usb_device *dev;
- const struct usb_endpoint_descriptor *epd;
- const struct usb_host_interface *iface_desc = interface->cur_altsetting;
+ struct usb_endpoint_descriptor *bulk_in, *bulk_out;
+ struct usb_host_interface *iface_desc = interface->cur_altsetting;
struct wlandevice *wlandev = NULL;
struct hfa384x *hw = NULL;
int result = 0;
- if (iface_desc->desc.bNumEndpoints != 2) {
- result = -ENODEV;
- goto failed;
- }
-
- result = -EINVAL;
- epd = &iface_desc->endpoint[1].desc;
- if (!usb_endpoint_is_bulk_in(epd))
- goto failed;
- epd = &iface_desc->endpoint[2].desc;
- if (!usb_endpoint_is_bulk_out(epd))
+ result = usb_find_common_endpoints(iface_desc, &bulk_in, &bulk_out, NULL, NULL);
+ if (result)
goto failed;
dev = interface_to_usbdev(interface);
@@ -96,6 +87,8 @@ static int prism2sta_probe_usb(struct usb_interface *interface,
}
/* Initialize the hw data */
+ hw->endp_in = usb_rcvbulkpipe(dev, bulk_in->bEndpointAddress);
+ hw->endp_out = usb_sndbulkpipe(dev, bulk_out->bEndpointAddress);
hfa384x_create(hw, dev);
hw->wlandev = wlandev;
--
2.28.0
This is a note to let you know that I've just added the patch titled
mei: hdcp: fix mei_hdcp_verify_mprime() input parameter
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From 51072c0f5b5e98a035c6f63b83ba2afedbb7accd Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler(a)intel.com>
Date: Tue, 18 Aug 2020 10:54:06 +0300
Subject: mei: hdcp: fix mei_hdcp_verify_mprime() input parameter
wired_cmd_repeater_auth_stream_req_in has a variable
length array at the end. we use struct_size() overflow
macro to determine the size for the allocation and sending
size.
This also fixes bug in case number of streams is > 0 in the original
submission. This bug was not triggered as the number of streams is
always one.
Fixes: c56967d674e3 (mei: hdcp: Replace one-element array with flexible-array member)
Fixes: 0a1af1b5c18d (misc/mei/hdcp: Verify M_prime)
Cc: <stable(a)vger.kernel.org> # v5.1+: c56967d674e3 (mei: hdcp: Replace one-element array with flexible-array member)
Signed-off-by: Tomas Winkler <tomas.winkler(a)intel.com>
Reviewed-by: Gustavo A. R. Silva <gustavoars(a)kernel.org>
Link: https://lore.kernel.org/r/20200818075406.2532605-1-tomas.winkler@intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/misc/mei/hdcp/mei_hdcp.c | 40 +++++++++++++++++++-------------
1 file changed, 24 insertions(+), 16 deletions(-)
diff --git a/drivers/misc/mei/hdcp/mei_hdcp.c b/drivers/misc/mei/hdcp/mei_hdcp.c
index d1d3e025ca0e..9ae9669e46ea 100644
--- a/drivers/misc/mei/hdcp/mei_hdcp.c
+++ b/drivers/misc/mei/hdcp/mei_hdcp.c
@@ -546,38 +546,46 @@ static int mei_hdcp_verify_mprime(struct device *dev,
struct hdcp_port_data *data,
struct hdcp2_rep_stream_ready *stream_ready)
{
- struct wired_cmd_repeater_auth_stream_req_in
- verify_mprime_in = { { 0 } };
+ struct wired_cmd_repeater_auth_stream_req_in *verify_mprime_in;
struct wired_cmd_repeater_auth_stream_req_out
verify_mprime_out = { { 0 } };
struct mei_cl_device *cldev;
ssize_t byte;
+ size_t cmd_size;
if (!dev || !stream_ready || !data)
return -EINVAL;
cldev = to_mei_cl_device(dev);
- verify_mprime_in.header.api_version = HDCP_API_VERSION;
- verify_mprime_in.header.command_id = WIRED_REPEATER_AUTH_STREAM_REQ;
- verify_mprime_in.header.status = ME_HDCP_STATUS_SUCCESS;
- verify_mprime_in.header.buffer_len =
+ cmd_size = struct_size(verify_mprime_in, streams, data->k);
+ if (cmd_size == SIZE_MAX)
+ return -EINVAL;
+
+ verify_mprime_in = kzalloc(cmd_size, GFP_KERNEL);
+ if (!verify_mprime_in)
+ return -ENOMEM;
+
+ verify_mprime_in->header.api_version = HDCP_API_VERSION;
+ verify_mprime_in->header.command_id = WIRED_REPEATER_AUTH_STREAM_REQ;
+ verify_mprime_in->header.status = ME_HDCP_STATUS_SUCCESS;
+ verify_mprime_in->header.buffer_len =
WIRED_CMD_BUF_LEN_REPEATER_AUTH_STREAM_REQ_MIN_IN;
- verify_mprime_in.port.integrated_port_type = data->port_type;
- verify_mprime_in.port.physical_port = (u8)data->fw_ddi;
- verify_mprime_in.port.attached_transcoder = (u8)data->fw_tc;
+ verify_mprime_in->port.integrated_port_type = data->port_type;
+ verify_mprime_in->port.physical_port = (u8)data->fw_ddi;
+ verify_mprime_in->port.attached_transcoder = (u8)data->fw_tc;
+
+ memcpy(verify_mprime_in->m_prime, stream_ready->m_prime, HDCP_2_2_MPRIME_LEN);
+ drm_hdcp_cpu_to_be24(verify_mprime_in->seq_num_m, data->seq_num_m);
- memcpy(verify_mprime_in.m_prime, stream_ready->m_prime,
- HDCP_2_2_MPRIME_LEN);
- drm_hdcp_cpu_to_be24(verify_mprime_in.seq_num_m, data->seq_num_m);
- memcpy(verify_mprime_in.streams, data->streams,
+ memcpy(verify_mprime_in->streams, data->streams,
array_size(data->k, sizeof(*data->streams)));
- verify_mprime_in.k = cpu_to_be16(data->k);
+ verify_mprime_in->k = cpu_to_be16(data->k);
- byte = mei_cldev_send(cldev, (u8 *)&verify_mprime_in,
- sizeof(verify_mprime_in));
+ byte = mei_cldev_send(cldev, (u8 *)verify_mprime_in, cmd_size);
+ kfree(verify_mprime_in);
if (byte < 0) {
dev_dbg(dev, "mei_cldev_send failed. %zd\n", byte);
return byte;
--
2.28.0
commit a10674bf2406 ("tcp: detecting the misuse of .sendpage for Slab
objects") adds the checks for Slab pages, but the pages don't have
page_count are still missing from the check.
Network layer's sendpage method is not designed to send page_count 0
pages neither, therefore both PageSlab() and page_count() should be
both checked for the sending page. This is exactly what sendpage_ok()
does.
This patch uses sendpage_ok() in do_tcp_sendpages() to detect misused
.sendpage, to make the code more robust.
Fixes: a10674bf2406 ("tcp: detecting the misuse of .sendpage for Slab objects")
Suggested-by: Eric Dumazet <eric.dumazet(a)gmail.com>
Signed-off-by: Coly Li <colyli(a)suse.de>
Cc: Vasily Averin <vvs(a)virtuozzo.com>
Cc: David S. Miller <davem(a)davemloft.net>
Cc: stable(a)vger.kernel.org
---
net/ipv4/tcp.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 31f3b858db81..2135ee7c806d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -970,7 +970,8 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
if (IS_ENABLED(CONFIG_DEBUG_VM) &&
- WARN_ONCE(PageSlab(page), "page must not be a Slab one"))
+ WARN_ONCE(!sendpage_ok(page),
+ "page must not be a Slab one and have page_count > 0"))
return -EINVAL;
/* Wait for a connection to finish. One exception is TCP Fast Open
--
2.26.2
Currently nvme_tcp_try_send_data() doesn't use kernel_sendpage() to
send slab pages. But for pages allocated by __get_free_pages() without
__GFP_COMP, which also have refcount as 0, they are still sent by
kernel_sendpage() to remote end, this is problematic.
The new introduced helper sendpage_ok() checks both PageSlab tag and
page_count counter, and returns true if the checking page is OK to be
sent by kernel_sendpage().
This patch fixes the page checking issue of nvme_tcp_try_send_data()
with sendpage_ok(). If sendpage_ok() returns true, send this page by
kernel_sendpage(), otherwise use sock_no_sendpage to handle this page.
Signed-off-by: Coly Li <colyli(a)suse.de>
Cc: Chaitanya Kulkarni <chaitanya.kulkarni(a)wdc.com>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Hannes Reinecke <hare(a)suse.de>
Cc: Jan Kara <jack(a)suse.com>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Mikhail Skorzhinskii <mskorzhinskiy(a)solarflare.com>
Cc: Philipp Reisner <philipp.reisner(a)linbit.com>
Cc: Sagi Grimberg <sagi(a)grimberg.me>
Cc: Vlastimil Babka <vbabka(a)suse.com>
Cc: stable(a)vger.kernel.org
---
drivers/nvme/host/tcp.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 62fbaecdc960..902fe742762b 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -912,12 +912,11 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
else
flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
- /* can't zcopy slab pages */
- if (unlikely(PageSlab(page))) {
- ret = sock_no_sendpage(queue->sock, page, offset, len,
+ if (sendpage_ok(page)) {
+ ret = kernel_sendpage(queue->sock, page, offset, len,
flags);
} else {
- ret = kernel_sendpage(queue->sock, page, offset, len,
+ ret = sock_no_sendpage(queue->sock, page, offset, len,
flags);
}
if (ret <= 0)
--
2.26.2
This series was original by a bug fix in nvme-over-tcp driver which only
checked whether a page was allocated from slab allcoator, but forgot to
check its page_count: The page handled by sendpage should be neither a
Slab page nor 0 page_count page.
As Sagi Grimberg suggested, the original fix is refind to a more common
inline routine:
static inline bool sendpage_ok(struct page *page)
{
return (!PageSlab(page) && page_count(page) >= 1);
}
If sendpage_ok() returns true, the checking page can be handled by the
zero copy sendpage method in network layer.
The first patch in this series introduces sendpage_ok() in header file
include/linux/net.h, the second patch fixes the page checking issue in
nvme-over-tcp driver, the third patch adds page_count check by using
sendpage_ok() in do_tcp_sendpages() as Eric Dumazet suggested, and all
rested patches just replace existing open coded checks with the inline
sendpage_ok() routine.
Coly Li
Cc: Chaitanya Kulkarni <chaitanya.kulkarni(a)wdc.com>
Cc: Chris Leech <cleech(a)redhat.com>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Cong Wang <amwang(a)redhat.com>
Cc: David S. Miller <davem(a)davemloft.net>
Cc: Hannes Reinecke <hare(a)suse.de>
Cc: Ilya Dryomov <idryomov(a)gmail.com>
Cc: Jan Kara <jack(a)suse.com>
Cc: Jeff Layton <jlayton(a)kernel.org>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Lee Duncan <lduncan(a)suse.com>
Cc: Mike Christie <michaelc(a)cs.wisc.edu>
Cc: Mikhail Skorzhinskii <mskorzhinskiy(a)solarflare.com>
Cc: Philipp Reisner <philipp.reisner(a)linbit.com>
Cc: Sagi Grimberg <sagi(a)grimberg.me>
Cc: stable(a)vger.kernel.org
Cc: Vasily Averin <vvs(a)virtuozzo.com>
Cc: Vlastimil Babka <vbabka(a)suse.com>
---
Changelog:
v6: fix page check in do_tcp_sendpages(), and replace other open coded
checks with sendpage_ok() in libceph, iscsi drivers.
v5, include linux/mm.h in include/linux/net.h
v4, change sendpage_ok() as an inline helper, and post it as
separate patch.
v3, introduce a more common sendpage_ok()
v2, fix typo in patch subject
v1, the initial version.
Coly Li (6):
net: introduce helper sendpage_ok() in include/linux/net.h
nvme-tcp: check page by sendpage_ok() before calling kernel_sendpage()
tcp: use sendpage_ok() to detect misused .sendpage
drbd: code cleanup by using sendpage_ok() to check page for
kernel_sendpage()
scsi: libiscsi: use sendpage_ok() in iscsi_tcp_segment_map()
libceph: use sendpage_ok() in ceph_tcp_sendpage()
drivers/block/drbd/drbd_main.c | 2 +-
drivers/nvme/host/tcp.c | 7 +++----
drivers/scsi/libiscsi_tcp.c | 2 +-
include/linux/net.h | 16 ++++++++++++++++
net/ceph/messenger.c | 2 +-
net/ipv4/tcp.c | 3 ++-
6 files changed, 24 insertions(+), 8 deletions(-)
--
2.26.2
This is a note to let you know that I've just added the patch titled
serial: samsung: Removes the IRQ not found warning
to my tty git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git
in the tty-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From 8c6c378b0cbe0c9f1390986b5f8ffb5f6ff7593b Mon Sep 17 00:00:00 2001
From: Tamseel Shams <m.shams(a)samsung.com>
Date: Mon, 10 Aug 2020 08:30:21 +0530
Subject: serial: samsung: Removes the IRQ not found warning
In few older Samsung SoCs like s3c2410, s3c2412
and s3c2440, UART IP is having 2 interrupt lines.
However, in other SoCs like s3c6400, s5pv210,
exynos5433, and exynos4210 UART is having only 1
interrupt line. Due to this, "platform_get_irq(platdev, 1)"
call in the driver gives the following false-positive error:
"IRQ index 1 not found" on newer SoC's.
This patch adds the condition to check for Tx interrupt
only for the those SoC's which have 2 interrupt lines.
Tested-by: Alim Akhtar <alim.akhtar(a)samsung.com>
Tested-by: Marek Szyprowski <m.szyprowski(a)samsung.com>
Reviewed-by: Krzysztof Kozlowski <krzk(a)kernel.org>
Reviewed-by: Alim Akhtar <alim.akhtar(a)samsung.com>
Signed-off-by: Tamseel Shams <m.shams(a)samsung.com>
Cc: stable <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20200810030021.45348-1-m.shams@samsung.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/tty/serial/samsung_tty.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/drivers/tty/serial/samsung_tty.c b/drivers/tty/serial/samsung_tty.c
index 8ed3482d2e1e..8ae3e03fbd8c 100644
--- a/drivers/tty/serial/samsung_tty.c
+++ b/drivers/tty/serial/samsung_tty.c
@@ -1905,9 +1905,11 @@ static int s3c24xx_serial_init_port(struct s3c24xx_uart_port *ourport,
ourport->tx_irq = ret + 1;
}
- ret = platform_get_irq(platdev, 1);
- if (ret > 0)
- ourport->tx_irq = ret;
+ if (!s3c24xx_serial_has_interrupt_mask(port)) {
+ ret = platform_get_irq(platdev, 1);
+ if (ret > 0)
+ ourport->tx_irq = ret;
+ }
/*
* DMA is currently supported only on DT platforms, if DMA properties
* are specified.
--
2.28.0
This is a note to let you know that I've just added the patch titled
serial: stm32: avoid kernel warning on absence of optional IRQ
to my tty git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git
in the tty-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From fdf16d78941b4f380753053d229955baddd00712 Mon Sep 17 00:00:00 2001
From: Holger Assmann <h.assmann(a)pengutronix.de>
Date: Thu, 13 Aug 2020 17:27:57 +0200
Subject: serial: stm32: avoid kernel warning on absence of optional IRQ
stm32_init_port() of the stm32-usart may trigger a warning in
platform_get_irq() when the device tree specifies no wakeup interrupt.
The wakeup interrupt is usually a board-specific GPIO and the driver
functions correctly in its absence. The mainline stm32mp151.dtsi does
not specify it, so all mainline device trees trigger an unnecessary
kernel warning. Use of platform_get_irq_optional() avoids this.
Fixes: 2c58e56096dd ("serial: stm32: fix the get_irq error case")
Signed-off-by: Holger Assmann <h.assmann(a)pengutronix.de>
Cc: stable <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20200813152757.32751-1-h.assmann@pengutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/tty/serial/stm32-usart.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c
index 143300a80090..ba503dd04ce2 100644
--- a/drivers/tty/serial/stm32-usart.c
+++ b/drivers/tty/serial/stm32-usart.c
@@ -970,7 +970,7 @@ static int stm32_init_port(struct stm32_port *stm32port,
return ret;
if (stm32port->info->cfg.has_wakeup) {
- stm32port->wakeirq = platform_get_irq(pdev, 1);
+ stm32port->wakeirq = platform_get_irq_optional(pdev, 1);
if (stm32port->wakeirq <= 0 && stm32port->wakeirq != -ENXIO)
return stm32port->wakeirq ? : -ENODEV;
}
--
2.28.0
This is a note to let you know that I've just added the patch titled
serial: stm32: avoid kernel warning on absence of optional IRQ
to my tty git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git
in the tty-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From fdf16d78941b4f380753053d229955baddd00712 Mon Sep 17 00:00:00 2001
From: Holger Assmann <h.assmann(a)pengutronix.de>
Date: Thu, 13 Aug 2020 17:27:57 +0200
Subject: serial: stm32: avoid kernel warning on absence of optional IRQ
stm32_init_port() of the stm32-usart may trigger a warning in
platform_get_irq() when the device tree specifies no wakeup interrupt.
The wakeup interrupt is usually a board-specific GPIO and the driver
functions correctly in its absence. The mainline stm32mp151.dtsi does
not specify it, so all mainline device trees trigger an unnecessary
kernel warning. Use of platform_get_irq_optional() avoids this.
Fixes: 2c58e56096dd ("serial: stm32: fix the get_irq error case")
Signed-off-by: Holger Assmann <h.assmann(a)pengutronix.de>
Cc: stable <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20200813152757.32751-1-h.assmann@pengutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/tty/serial/stm32-usart.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c
index 143300a80090..ba503dd04ce2 100644
--- a/drivers/tty/serial/stm32-usart.c
+++ b/drivers/tty/serial/stm32-usart.c
@@ -970,7 +970,7 @@ static int stm32_init_port(struct stm32_port *stm32port,
return ret;
if (stm32port->info->cfg.has_wakeup) {
- stm32port->wakeirq = platform_get_irq(pdev, 1);
+ stm32port->wakeirq = platform_get_irq_optional(pdev, 1);
if (stm32port->wakeirq <= 0 && stm32port->wakeirq != -ENXIO)
return stm32port->wakeirq ? : -ENODEV;
}
--
2.28.0
This is a note to let you know that I've just added the patch titled
serial: 8250_exar: Fix number of ports for Commtech PCIe cards
to my tty git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git
in the tty-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From c6b9e95dde7b54e6a53c47241201ab5a4035c320 Mon Sep 17 00:00:00 2001
From: Valmer Huhn <valmer.huhn(a)concurrent-rt.com>
Date: Thu, 13 Aug 2020 12:52:55 -0400
Subject: serial: 8250_exar: Fix number of ports for Commtech PCIe cards
The following in 8250_exar.c line 589 is used to determine the number
of ports for each Exar board:
nr_ports = board->num_ports ? board->num_ports : pcidev->device & 0x0f;
If the number of ports a card has is not explicitly specified, it defaults
to the rightmost 4 bits of the PCI device ID. This is prone to error since
not all PCI device IDs contain a number which corresponds to the number of
ports that card provides.
This particular case involves COMMTECH_4222PCIE, COMMTECH_4224PCIE and
COMMTECH_4228PCIE cards with device IDs 0x0022, 0x0020 and 0x0021.
Currently the multiport cards receive 2, 0 and 1 port instead of 2, 4 and
8 ports respectively.
To fix this, each Commtech Fastcom PCIe card is given a struct where the
number of ports is explicitly specified. This ensures 'board->num_ports'
is used instead of the default 'pcidev->device & 0x0f'.
Fixes: d0aeaa83f0b0 ("serial: exar: split out the exar code from 8250_pci")
Signed-off-by: Valmer Huhn <valmer.huhn(a)concurrent-rt.com>
Tested-by: Valmer Huhn <valmer.huhn(a)concurrent-rt.com>
Cc: stable <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20200813165255.GC345440@icarus.concurrent-rt.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/tty/serial/8250/8250_exar.c | 24 +++++++++++++++++++++---
1 file changed, 21 insertions(+), 3 deletions(-)
diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c
index 04b9af7ed941..2d0e7c7e408d 100644
--- a/drivers/tty/serial/8250/8250_exar.c
+++ b/drivers/tty/serial/8250/8250_exar.c
@@ -744,6 +744,24 @@ static const struct exar8250_board pbn_exar_XR17V35x = {
.exit = pci_xr17v35x_exit,
};
+static const struct exar8250_board pbn_fastcom35x_2 = {
+ .num_ports = 2,
+ .setup = pci_xr17v35x_setup,
+ .exit = pci_xr17v35x_exit,
+};
+
+static const struct exar8250_board pbn_fastcom35x_4 = {
+ .num_ports = 4,
+ .setup = pci_xr17v35x_setup,
+ .exit = pci_xr17v35x_exit,
+};
+
+static const struct exar8250_board pbn_fastcom35x_8 = {
+ .num_ports = 8,
+ .setup = pci_xr17v35x_setup,
+ .exit = pci_xr17v35x_exit,
+};
+
static const struct exar8250_board pbn_exar_XR17V4358 = {
.num_ports = 12,
.setup = pci_xr17v35x_setup,
@@ -811,9 +829,9 @@ static const struct pci_device_id exar_pci_tbl[] = {
EXAR_DEVICE(EXAR, XR17V358, pbn_exar_XR17V35x),
EXAR_DEVICE(EXAR, XR17V4358, pbn_exar_XR17V4358),
EXAR_DEVICE(EXAR, XR17V8358, pbn_exar_XR17V8358),
- EXAR_DEVICE(COMMTECH, 4222PCIE, pbn_exar_XR17V35x),
- EXAR_DEVICE(COMMTECH, 4224PCIE, pbn_exar_XR17V35x),
- EXAR_DEVICE(COMMTECH, 4228PCIE, pbn_exar_XR17V35x),
+ EXAR_DEVICE(COMMTECH, 4222PCIE, pbn_fastcom35x_2),
+ EXAR_DEVICE(COMMTECH, 4224PCIE, pbn_fastcom35x_4),
+ EXAR_DEVICE(COMMTECH, 4228PCIE, pbn_fastcom35x_8),
EXAR_DEVICE(COMMTECH, 4222PCI335, pbn_fastcom335_2),
EXAR_DEVICE(COMMTECH, 4224PCI335, pbn_fastcom335_4),
--
2.28.0
This is a note to let you know that I've just added the patch titled
vt_ioctl: change VT_RESIZEX ioctl to check for error return from
to my tty git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git
in the tty-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From bc5269ca765057a1b762e79a1cfd267cd7bf1c46 Mon Sep 17 00:00:00 2001
From: George Kennedy <george.kennedy(a)oracle.com>
Date: Fri, 31 Jul 2020 12:33:12 -0400
Subject: vt_ioctl: change VT_RESIZEX ioctl to check for error return from
vc_resize()
vc_resize() can return with an error after failure. Change VT_RESIZEX ioctl
to save struct vc_data values that are modified and restore the original
values in case of error.
Signed-off-by: George Kennedy <george.kennedy(a)oracle.com>
Cc: stable <stable(a)vger.kernel.org>
Reported-by: syzbot+38a3699c7eaf165b97a6(a)syzkaller.appspotmail.com
Link: https://lore.kernel.org/r/1596213192-6635-2-git-send-email-george.kennedy@o…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/tty/vt/vt_ioctl.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c
index 91c301775047..a4e520bdd521 100644
--- a/drivers/tty/vt/vt_ioctl.c
+++ b/drivers/tty/vt/vt_ioctl.c
@@ -806,12 +806,22 @@ static int vt_resizex(struct vc_data *vc, struct vt_consize __user *cs)
console_lock();
vcp = vc_cons[i].d;
if (vcp) {
+ int ret;
+ int save_scan_lines = vcp->vc_scan_lines;
+ int save_font_height = vcp->vc_font.height;
+
if (v.v_vlin)
vcp->vc_scan_lines = v.v_vlin;
if (v.v_clin)
vcp->vc_font.height = v.v_clin;
vcp->vc_resize_user = 1;
- vc_resize(vcp, v.v_cols, v.v_rows);
+ ret = vc_resize(vcp, v.v_cols, v.v_rows);
+ if (ret) {
+ vcp->vc_scan_lines = save_scan_lines;
+ vcp->vc_font.height = save_font_height;
+ console_unlock();
+ return ret;
+ }
}
console_unlock();
}
--
2.28.0
This is a note to let you know that I've just added the patch titled
fbcon: prevent user font height or width change from causing
to my tty git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git
in the tty-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From 39b3cffb8cf3111738ea993e2757ab382253d86a Mon Sep 17 00:00:00 2001
From: George Kennedy <george.kennedy(a)oracle.com>
Date: Fri, 31 Jul 2020 12:33:11 -0400
Subject: fbcon: prevent user font height or width change from causing
potential out-of-bounds access
Add a check to fbcon_resize() to ensure that a possible change to user font
height or user font width will not allow a font data out-of-bounds access.
NOTE: must use original charcount in calculation as font charcount can
change and cannot be used to determine the font data allocated size.
Signed-off-by: George Kennedy <george.kennedy(a)oracle.com>
Cc: stable <stable(a)vger.kernel.org>
Reported-by: syzbot+38a3699c7eaf165b97a6(a)syzkaller.appspotmail.com
Link: https://lore.kernel.org/r/1596213192-6635-1-git-send-email-george.kennedy@o…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/video/fbdev/core/fbcon.c | 25 +++++++++++++++++++++++--
1 file changed, 23 insertions(+), 2 deletions(-)
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 8a31fc2b2258..66167830fefd 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -2191,6 +2191,9 @@ static void updatescrollmode(struct fbcon_display *p,
}
}
+#define PITCH(w) (((w) + 7) >> 3)
+#define CALC_FONTSZ(h, p, c) ((h) * (p) * (c)) /* size = height * pitch * charcount */
+
static int fbcon_resize(struct vc_data *vc, unsigned int width,
unsigned int height, unsigned int user)
{
@@ -2200,6 +2203,24 @@ static int fbcon_resize(struct vc_data *vc, unsigned int width,
struct fb_var_screeninfo var = info->var;
int x_diff, y_diff, virt_w, virt_h, virt_fw, virt_fh;
+ if (ops->p && ops->p->userfont && FNTSIZE(vc->vc_font.data)) {
+ int size;
+ int pitch = PITCH(vc->vc_font.width);
+
+ /*
+ * If user font, ensure that a possible change to user font
+ * height or width will not allow a font data out-of-bounds access.
+ * NOTE: must use original charcount in calculation as font
+ * charcount can change and cannot be used to determine the
+ * font data allocated size.
+ */
+ if (pitch <= 0)
+ return -EINVAL;
+ size = CALC_FONTSZ(vc->vc_font.height, pitch, FNTCHARCNT(vc->vc_font.data));
+ if (size > FNTSIZE(vc->vc_font.data))
+ return -EINVAL;
+ }
+
virt_w = FBCON_SWAP(ops->rotate, width, height);
virt_h = FBCON_SWAP(ops->rotate, height, width);
virt_fw = FBCON_SWAP(ops->rotate, vc->vc_font.width,
@@ -2652,7 +2673,7 @@ static int fbcon_set_font(struct vc_data *vc, struct console_font *font,
int size;
int i, csum;
u8 *new_data, *data = font->data;
- int pitch = (font->width+7) >> 3;
+ int pitch = PITCH(font->width);
/* Is there a reason why fbconsole couldn't handle any charcount >256?
* If not this check should be changed to charcount < 256 */
@@ -2668,7 +2689,7 @@ static int fbcon_set_font(struct vc_data *vc, struct console_font *font,
if (fbcon_invalid_charcount(info, charcount))
return -EINVAL;
- size = h * pitch * charcount;
+ size = CALC_FONTSZ(h, pitch, charcount);
new_data = kmalloc(FONT_EXTRA_WORDS * sizeof(int) + size, GFP_USER);
--
2.28.0
This is a note to let you know that I've just added the patch titled
vt: defer kfree() of vc_screenbuf in vc_do_resize()
to my tty git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git
in the tty-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From f8d1653daec02315e06d30246cff4af72e76e54e Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
Date: Wed, 29 Jul 2020 23:57:01 +0900
Subject: vt: defer kfree() of vc_screenbuf in vc_do_resize()
syzbot is reporting UAF bug in set_origin() from vc_do_resize() [1], for
vc_do_resize() calls kfree(vc->vc_screenbuf) before calling set_origin().
Unfortunately, in set_origin(), vc->vc_sw->con_set_origin() might access
vc->vc_pos when scroll is involved in order to manipulate cursor, but
vc->vc_pos refers already released vc->vc_screenbuf until vc->vc_pos gets
updated based on the result of vc->vc_sw->con_set_origin().
Preserving old buffer and tolerating outdated vc members until set_origin()
completes would be easier than preventing vc->vc_sw->con_set_origin() from
accessing outdated vc members.
[1] https://syzkaller.appspot.com/bug?id=6649da2081e2ebdc65c0642c214b27fe91099d…
Reported-by: syzbot <syzbot+9116ecc1978ca3a12f43(a)syzkaller.appspotmail.com>
Signed-off-by: Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
Cc: stable <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/1596034621-4714-1-git-send-email-penguin-kernel@I…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/tty/vt/vt.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index ccb533fd00a2..a4f08c0da84b 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -1201,7 +1201,7 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc,
unsigned int old_rows, old_row_size, first_copied_row;
unsigned int new_cols, new_rows, new_row_size, new_screen_size;
unsigned int user;
- unsigned short *newscreen;
+ unsigned short *oldscreen, *newscreen;
struct uni_screen *new_uniscr = NULL;
WARN_CONSOLE_UNLOCKED();
@@ -1299,10 +1299,11 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc,
if (new_scr_end > new_origin)
scr_memsetw((void *)new_origin, vc->vc_video_erase_char,
new_scr_end - new_origin);
- kfree(vc->vc_screenbuf);
+ oldscreen = vc->vc_screenbuf;
vc->vc_screenbuf = newscreen;
vc->vc_screenbuf_size = new_screen_size;
set_origin(vc);
+ kfree(oldscreen);
/* do part of a reset_terminal() */
vc->vc_top = 0;
--
2.28.0
This is a note to let you know that I've just added the patch titled
USB: Fix device driver race
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From d5643d2249b279077427b2c2b2ffae9b70c95b0b Mon Sep 17 00:00:00 2001
From: Bastien Nocera <hadess(a)hadess.net>
Date: Tue, 18 Aug 2020 13:04:45 +0200
Subject: USB: Fix device driver race
When a new device with a specialised device driver is plugged in, the
new driver will be modprobe()'d but the driver core will attach the
"generic" driver to the device.
After that, nothing will trigger a reprobe when the modprobe()'d device
driver has finished initialising, as the device has the "generic"
driver attached to it.
Trigger a reprobe ourselves when new specialised drivers get registered.
Fixes: 88b7381a939d ("USB: Select better matching USB drivers when available")
Signed-off-by: Bastien Nocera <hadess(a)hadess.net>
Cc: stable <stable(a)vger.kernel.org>
Acked-by: Alan Stern <stern(a)rowland.harvard.edu>
Link: https://lore.kernel.org/r/20200818110445.509668-3-hadess@hadess.net
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/core/driver.c | 40 +++++++++++++++++++++++++++++++++++++--
1 file changed, 38 insertions(+), 2 deletions(-)
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index f81606c6a35b..7e73e989645b 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -905,6 +905,35 @@ static int usb_uevent(struct device *dev, struct kobj_uevent_env *env)
return 0;
}
+static bool is_dev_usb_generic_driver(struct device *dev)
+{
+ struct usb_device_driver *udd = dev->driver ?
+ to_usb_device_driver(dev->driver) : NULL;
+
+ return udd == &usb_generic_driver;
+}
+
+static int __usb_bus_reprobe_drivers(struct device *dev, void *data)
+{
+ struct usb_device_driver *new_udriver = data;
+ struct usb_device *udev;
+ int ret;
+
+ if (!is_dev_usb_generic_driver(dev))
+ return 0;
+
+ udev = to_usb_device(dev);
+ if (usb_device_match_id(udev, new_udriver->id_table) == NULL &&
+ (!new_udriver->match || new_udriver->match(udev) != 0))
+ return 0;
+
+ ret = device_reprobe(dev);
+ if (ret && ret != -EPROBE_DEFER)
+ dev_err(dev, "Failed to reprobe device (error %d)\n", ret);
+
+ return 0;
+}
+
/**
* usb_register_device_driver - register a USB device (not interface) driver
* @new_udriver: USB operations for the device driver
@@ -934,13 +963,20 @@ int usb_register_device_driver(struct usb_device_driver *new_udriver,
retval = driver_register(&new_udriver->drvwrap.driver);
- if (!retval)
+ if (!retval) {
pr_info("%s: registered new device driver %s\n",
usbcore_name, new_udriver->name);
- else
+ /*
+ * Check whether any device could be better served with
+ * this new driver
+ */
+ bus_for_each_dev(&usb_bus_type, NULL, new_udriver,
+ __usb_bus_reprobe_drivers);
+ } else {
printk(KERN_ERR "%s: error %d registering device "
" driver %s\n",
usbcore_name, retval, new_udriver->name);
+ }
return retval;
}
--
2.28.0
This is a note to let you know that I've just added the patch titled
USB: Also match device drivers using the ->match vfunc
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From adb6e6ac20eedcf1dce19dc75b224e63c0828ea1 Mon Sep 17 00:00:00 2001
From: Bastien Nocera <hadess(a)hadess.net>
Date: Tue, 18 Aug 2020 13:04:43 +0200
Subject: USB: Also match device drivers using the ->match vfunc
We only ever used the ID table matching before, but we should also support
open-coded match functions.
Fixes: 88b7381a939de ("USB: Select better matching USB drivers when available")
Signed-off-by: Bastien Nocera <hadess(a)hadess.net>
Cc: stable <stable(a)vger.kernel.org>
Acked-by: Alan Stern <stern(a)rowland.harvard.edu>
Link: https://lore.kernel.org/r/20200818110445.509668-1-hadess@hadess.net
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/core/generic.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/usb/core/generic.c b/drivers/usb/core/generic.c
index b6f2d4b44754..2b2f1ab6e36a 100644
--- a/drivers/usb/core/generic.c
+++ b/drivers/usb/core/generic.c
@@ -205,8 +205,9 @@ static int __check_usb_generic(struct device_driver *drv, void *data)
udrv = to_usb_device_driver(drv);
if (udrv == &usb_generic_driver)
return 0;
-
- return usb_device_match_id(udev, udrv->id_table) != NULL;
+ if (usb_device_match_id(udev, udrv->id_table) != NULL)
+ return 1;
+ return (udrv->match && udrv->match(udev));
}
static bool usb_generic_driver_match(struct usb_device *udev)
--
2.28.0
This is a note to let you know that I've just added the patch titled
usb: host: xhci-tegra: fix tegra_xusb_get_phy()
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From d54343a87732726b04ac5af873916b5ed4f52932 Mon Sep 17 00:00:00 2001
From: JC Kuo <jckuo(a)nvidia.com>
Date: Tue, 11 Aug 2020 17:25:53 +0800
Subject: usb: host: xhci-tegra: fix tegra_xusb_get_phy()
tegra_xusb_get_phy() should take input argument "name".
Signed-off-by: JC Kuo <jckuo(a)nvidia.com>
Cc: stable <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20200811092553.657762-1-jckuo@nvidia.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/host/xhci-tegra.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c
index 0489a316099a..190923d8b246 100644
--- a/drivers/usb/host/xhci-tegra.c
+++ b/drivers/usb/host/xhci-tegra.c
@@ -1136,7 +1136,7 @@ static struct phy *tegra_xusb_get_phy(struct tegra_xusb *tegra, char *name,
unsigned int i, phy_count = 0;
for (i = 0; i < tegra->soc->num_types; i++) {
- if (!strncmp(tegra->soc->phy_types[i].name, "usb2",
+ if (!strncmp(tegra->soc->phy_types[i].name, name,
strlen(name)))
return tegra->phys[phy_count+port];
--
2.28.0
This is a note to let you know that I've just added the patch titled
usb: host: xhci-tegra: otg usb2/usb3 port init
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From 316a2868bc269be8c6e69ccc3a1f902a3f518eb9 Mon Sep 17 00:00:00 2001
From: JC Kuo <jckuo(a)nvidia.com>
Date: Tue, 11 Aug 2020 17:31:43 +0800
Subject: usb: host: xhci-tegra: otg usb2/usb3 port init
tegra_xusb_init_usb_phy() should initialize "otg_usb2_port" and
"otg_usb3_port" with -EINVAL because "0" is a valid value
represents usb2 port 0 or usb3 port 0.
Signed-off-by: JC Kuo <jckuo(a)nvidia.com>
Cc: stable <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20200811093143.699541-1-jckuo@nvidia.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/host/xhci-tegra.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c
index 014d79334f50..0489a316099a 100644
--- a/drivers/usb/host/xhci-tegra.c
+++ b/drivers/usb/host/xhci-tegra.c
@@ -1258,6 +1258,8 @@ static int tegra_xusb_init_usb_phy(struct tegra_xusb *tegra)
INIT_WORK(&tegra->id_work, tegra_xhci_id_work);
tegra->id_nb.notifier_call = tegra_xhci_id_notify;
+ tegra->otg_usb2_port = -EINVAL;
+ tegra->otg_usb3_port = -EINVAL;
for (i = 0; i < tegra->num_usb_phys; i++) {
struct phy *phy = tegra_xusb_get_phy(tegra, "usb2", i);
--
2.28.0
This is a note to let you know that I've just added the patch titled
usbip: Implement a match function to fix usbip
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From 7a2f2974f26542b4e7b9b4321edb3cbbf3eeb91a Mon Sep 17 00:00:00 2001
From: "M. Vefa Bicakci" <m.v.b(a)runbox.com>
Date: Mon, 10 Aug 2020 19:00:17 +0300
Subject: usbip: Implement a match function to fix usbip
Commit 88b7381a939d ("USB: Select better matching USB drivers when
available") introduced the use of a "match" function to select a
non-generic/better driver for a particular USB device. This
unfortunately breaks the operation of usbip in general, as reported in
the kernel bugzilla with bug 208267 (linked below).
Upon inspecting the aforementioned commit, one can observe that the
original code in the usb_device_match function used to return 1
unconditionally, but the aforementioned commit makes the usb_device_match
function use identifier tables and "match" virtual functions, if either of
them are available.
Hence, this commit implements a match function for usbip that
unconditionally returns true to ensure that usbip is functional again.
This change has been verified to restore usbip functionality, with a
v5.7.y kernel on an up-to-date version of Qubes OS 4.0, which uses
usbip to redirect USB devices between VMs.
Thanks to Jonathan Dieter for the effort in bisecting this issue down
to the aforementioned commit.
Fixes: 88b7381a939d ("USB: Select better matching USB drivers when available")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=208267
Link: https://bugzilla.redhat.com/show_bug.cgi?id=1856443
Link: https://github.com/QubesOS/qubes-issues/issues/5905
Signed-off-by: M. Vefa Bicakci <m.v.b(a)runbox.com>
Cc: <stable(a)vger.kernel.org> # 5.7
Cc: Valentina Manea <valentina.manea.m(a)gmail.com>
Cc: Alan Stern <stern(a)rowland.harvard.edu>
Reviewed-by: Bastien Nocera <hadess(a)hadess.net>
Reviewed-by: Shuah Khan <skhan(a)linuxfoundation.org>
Link: https://lore.kernel.org/r/20200810160017.46002-1-m.v.b@runbox.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/usbip/stub_dev.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c
index 2305d425e6c9..9d7d642022d1 100644
--- a/drivers/usb/usbip/stub_dev.c
+++ b/drivers/usb/usbip/stub_dev.c
@@ -461,6 +461,11 @@ static void stub_disconnect(struct usb_device *udev)
return;
}
+static bool usbip_match(struct usb_device *udev)
+{
+ return true;
+}
+
#ifdef CONFIG_PM
/* These functions need usb_port_suspend and usb_port_resume,
@@ -486,6 +491,7 @@ struct usb_device_driver stub_driver = {
.name = "usbip-host",
.probe = stub_probe,
.disconnect = stub_disconnect,
+ .match = usbip_match,
#ifdef CONFIG_PM
.suspend = stub_suspend,
.resume = stub_resume,
--
2.28.0
This is a note to let you know that I've just added the patch titled
USB: lvtest: return proper error code in probe
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From 531412492ce93ea29b9ca3b4eb5e3ed771f851dd Mon Sep 17 00:00:00 2001
From: Evgeny Novikov <novikov(a)ispras.ru>
Date: Wed, 5 Aug 2020 12:06:43 +0300
Subject: USB: lvtest: return proper error code in probe
lvs_rh_probe() can return some nonnegative value from usb_control_msg()
when it is less than "USB_DT_HUB_NONVAR_SIZE + 2" that is considered as
a failure. Make lvs_rh_probe() return -EINVAL in this case.
Found by Linux Driver Verification project (linuxtesting.org).
Signed-off-by: Evgeny Novikov <novikov(a)ispras.ru>
Cc: stable <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20200805090643.3432-1-novikov@ispras.ru
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/misc/lvstest.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/misc/lvstest.c b/drivers/usb/misc/lvstest.c
index 407fe7570f3b..f8686139d6f3 100644
--- a/drivers/usb/misc/lvstest.c
+++ b/drivers/usb/misc/lvstest.c
@@ -426,7 +426,7 @@ static int lvs_rh_probe(struct usb_interface *intf,
USB_DT_SS_HUB_SIZE, USB_CTRL_GET_TIMEOUT);
if (ret < (USB_DT_HUB_NONVAR_SIZE + 2)) {
dev_err(&hdev->dev, "wrong root hub descriptor read %d\n", ret);
- return ret;
+ return ret < 0 ? ret : -EINVAL;
}
/* submit urb to poll interrupt endpoint */
--
2.28.0
This is a note to let you know that I've just added the patch titled
usb: renesas-xhci: remove version check
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From d66a57be2f9a315fc10d0f524f670fec903e0fb4 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vkoul(a)kernel.org>
Date: Tue, 18 Aug 2020 12:47:39 +0530
Subject: usb: renesas-xhci: remove version check
Some devices in wild are reporting bunch of firmware versions, so remove
the check for versions in driver
Reported by: Anastasios Vacharakis <vacharakis(a)gmail.com>
Reported by: Glen Journeay <journeay(a)gmail.com>
Fixes: 2478be82de44 ("usb: renesas-xhci: Add ROM loader for uPD720201")
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=208911
Signed-off-by: Vinod Koul <vkoul(a)kernel.org>
Cc: stable <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20200818071739.789720-1-vkoul@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/host/xhci-pci-renesas.c | 19 +------------------
1 file changed, 1 insertion(+), 18 deletions(-)
diff --git a/drivers/usb/host/xhci-pci-renesas.c b/drivers/usb/host/xhci-pci-renesas.c
index 59b1965ad0a3..f97ac9f52bf4 100644
--- a/drivers/usb/host/xhci-pci-renesas.c
+++ b/drivers/usb/host/xhci-pci-renesas.c
@@ -50,20 +50,6 @@
#define RENESAS_RETRY 10000
#define RENESAS_DELAY 10
-#define ROM_VALID_01 0x2013
-#define ROM_VALID_02 0x2026
-
-static int renesas_verify_fw_version(struct pci_dev *pdev, u32 version)
-{
- switch (version) {
- case ROM_VALID_01:
- case ROM_VALID_02:
- return 0;
- }
- dev_err(&pdev->dev, "FW has invalid version :%d\n", version);
- return -EINVAL;
-}
-
static int renesas_fw_download_image(struct pci_dev *dev,
const u32 *fw, size_t step, bool rom)
{
@@ -202,10 +188,7 @@ static int renesas_check_rom_state(struct pci_dev *pdev)
version &= RENESAS_FW_VERSION_FIELD;
version = version >> RENESAS_FW_VERSION_OFFSET;
-
- err = renesas_verify_fw_version(pdev, version);
- if (err)
- return err;
+ dev_dbg(&pdev->dev, "Found ROM version: %x\n", version);
/*
* Test if ROM is present and loaded, if so we can skip everything
--
2.28.0
This is a note to let you know that I've just added the patch titled
USB: cdc-acm: rework notification_buffer resizing
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From f4b9d8a582f738c24ebeabce5cc15f4b8159d74e Mon Sep 17 00:00:00 2001
From: Tom Rix <trix(a)redhat.com>
Date: Sat, 1 Aug 2020 08:21:54 -0700
Subject: USB: cdc-acm: rework notification_buffer resizing
Clang static analysis reports this error
cdc-acm.c:409:3: warning: Use of memory after it is freed
acm_process_notification(acm, (unsigned char *)dr);
There are three problems, the first one is that dr is not reset
The variable dr is set with
if (acm->nb_index)
dr = (struct usb_cdc_notification *)acm->notification_buffer;
But if the notification_buffer is too small it is resized with
if (acm->nb_size) {
kfree(acm->notification_buffer);
acm->nb_size = 0;
}
alloc_size = roundup_pow_of_two(expected_size);
/*
* kmalloc ensures a valid notification_buffer after a
* use of kfree in case the previous allocation was too
* small. Final freeing is done on disconnect.
*/
acm->notification_buffer =
kmalloc(alloc_size, GFP_ATOMIC);
dr should point to the new acm->notification_buffer.
The second problem is any data in the notification_buffer is lost
when the pointer is freed. In the normal case, the current data
is accumulated in the notification_buffer here.
memcpy(&acm->notification_buffer[acm->nb_index],
urb->transfer_buffer, copy_size);
When a resize happens, anything before
notification_buffer[acm->nb_index] is garbage.
The third problem is the acm->nb_index is not reset on a
resizing buffer error.
So switch resizing to using krealloc and reassign dr and
reset nb_index.
Fixes: ea2583529cd1 ("cdc-acm: reassemble fragmented notifications")
Signed-off-by: Tom Rix <trix(a)redhat.com>
Cc: stable <stable(a)vger.kernel.org>
Acked-by: Oliver Neukum <oneukum(a)suse.com>
Link: https://lore.kernel.org/r/20200801152154.20683-1-trix@redhat.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/class/cdc-acm.c | 22 ++++++++++------------
1 file changed, 10 insertions(+), 12 deletions(-)
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 991786876dbb..7f6f3ab5b8a6 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -378,21 +378,19 @@ static void acm_ctrl_irq(struct urb *urb)
if (current_size < expected_size) {
/* notification is transmitted fragmented, reassemble */
if (acm->nb_size < expected_size) {
- if (acm->nb_size) {
- kfree(acm->notification_buffer);
- acm->nb_size = 0;
- }
+ u8 *new_buffer;
alloc_size = roundup_pow_of_two(expected_size);
- /*
- * kmalloc ensures a valid notification_buffer after a
- * use of kfree in case the previous allocation was too
- * small. Final freeing is done on disconnect.
- */
- acm->notification_buffer =
- kmalloc(alloc_size, GFP_ATOMIC);
- if (!acm->notification_buffer)
+ /* Final freeing is done on disconnect. */
+ new_buffer = krealloc(acm->notification_buffer,
+ alloc_size, GFP_ATOMIC);
+ if (!new_buffer) {
+ acm->nb_index = 0;
goto exit;
+ }
+
+ acm->notification_buffer = new_buffer;
acm->nb_size = alloc_size;
+ dr = (struct usb_cdc_notification *)acm->notification_buffer;
}
copy_size = min(current_size,
--
2.28.0
This is a note to let you know that I've just added the patch titled
USB: quirks: Add no-lpm quirk for another Raydium touchscreen
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From 5967116e8358899ebaa22702d09b0af57fef23e1 Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Date: Fri, 31 Jul 2020 13:16:20 +0800
Subject: USB: quirks: Add no-lpm quirk for another Raydium touchscreen
There's another Raydium touchscreen needs the no-lpm quirk:
[ 1.339149] usb 1-9: New USB device found, idVendor=2386, idProduct=350e, bcdDevice= 0.00
[ 1.339150] usb 1-9: New USB device strings: Mfr=1, Product=2, SerialNumber=0
[ 1.339151] usb 1-9: Product: Raydium Touch System
[ 1.339152] usb 1-9: Manufacturer: Raydium Corporation
...
[ 6.450497] usb 1-9: can't set config #1, error -110
BugLink: https://bugs.launchpad.net/bugs/1889446
Signed-off-by: Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Cc: stable <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20200731051622.28643-1-kai.heng.feng@canonical.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/core/quirks.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 7c1198f80c23..d1f38956b210 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -465,6 +465,8 @@ static const struct usb_device_id usb_quirk_list[] = {
{ USB_DEVICE(0x2386, 0x3119), .driver_info = USB_QUIRK_NO_LPM },
+ { USB_DEVICE(0x2386, 0x350e), .driver_info = USB_QUIRK_NO_LPM },
+
/* DJI CineSSD */
{ USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM },
--
2.28.0
This is a note to let you know that I've just added the patch titled
USB: yurex: Fix bad gfp argument
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From f176ede3a3bde5b398a6777a7f9ff091baa2d3ff Mon Sep 17 00:00:00 2001
From: Alan Stern <stern(a)rowland.harvard.edu>
Date: Mon, 10 Aug 2020 14:29:54 -0400
Subject: USB: yurex: Fix bad gfp argument
The syzbot fuzzer identified a bug in the yurex driver: It passes
GFP_KERNEL as a memory-allocation flag to usb_submit_urb() at a time
when its state is TASK_INTERRUPTIBLE, not TASK_RUNNING:
do not call blocking ops when !TASK_RUNNING; state=1 set at [<00000000370c7c68>] prepare_to_wait+0xb1/0x2a0 kernel/sched/wait.c:247
WARNING: CPU: 1 PID: 340 at kernel/sched/core.c:7253 __might_sleep+0x135/0x190
kernel/sched/core.c:7253
Kernel panic - not syncing: panic_on_warn set ...
CPU: 1 PID: 340 Comm: syz-executor677 Not tainted 5.8.0-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google
01/01/2011
Call Trace:
__dump_stack lib/dump_stack.c:77 [inline]
dump_stack+0xf6/0x16e lib/dump_stack.c:118
panic+0x2aa/0x6e1 kernel/panic.c:231
__warn.cold+0x20/0x50 kernel/panic.c:600
report_bug+0x1bd/0x210 lib/bug.c:198
handle_bug+0x41/0x80 arch/x86/kernel/traps.c:234
exc_invalid_op+0x14/0x40 arch/x86/kernel/traps.c:254
asm_exc_invalid_op+0x12/0x20 arch/x86/include/asm/idtentry.h:536
RIP: 0010:__might_sleep+0x135/0x190 kernel/sched/core.c:7253
Code: 65 48 8b 1c 25 40 ef 01 00 48 8d 7b 10 48 89 fe 48 c1 ee 03 80 3c 06 00 75
2b 48 8b 73 10 48 c7 c7 e0 9e 06 86 e8 ed 12 f6 ff <0f> 0b e9 46 ff ff ff e8 1f
b2 4b 00 e9 29 ff ff ff e8 15 b2 4b 00
RSP: 0018:ffff8881cdb77a28 EFLAGS: 00010282
RAX: 0000000000000000 RBX: ffff8881c6458000 RCX: 0000000000000000
RDX: ffff8881c6458000 RSI: ffffffff8129ec93 RDI: ffffed1039b6ef37
RBP: ffffffff86fdade2 R08: 0000000000000001 R09: ffff8881db32f54f
R10: 0000000000000000 R11: 0000000030343354 R12: 00000000000001f2
R13: 0000000000000000 R14: 0000000000000068 R15: ffffffff83c1b1aa
slab_pre_alloc_hook.constprop.0+0xea/0x200 mm/slab.h:498
slab_alloc_node mm/slub.c:2816 [inline]
slab_alloc mm/slub.c:2900 [inline]
kmem_cache_alloc_trace+0x46/0x220 mm/slub.c:2917
kmalloc include/linux/slab.h:554 [inline]
dummy_urb_enqueue+0x7a/0x880 drivers/usb/gadget/udc/dummy_hcd.c:1251
usb_hcd_submit_urb+0x2b2/0x22d0 drivers/usb/core/hcd.c:1547
usb_submit_urb+0xb4e/0x13e0 drivers/usb/core/urb.c:570
yurex_write+0x3ea/0x820 drivers/usb/misc/yurex.c:495
This patch changes the call to use GFP_ATOMIC instead of GFP_KERNEL.
Reported-and-tested-by: syzbot+c2c3302f9c601a4b1be2(a)syzkaller.appspotmail.com
Signed-off-by: Alan Stern <stern(a)rowland.harvard.edu>
CC: <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20200810182954.GB307778@rowland.harvard.edu
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/misc/yurex.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c
index 6e7d34e7fec4..b2e09883c7e2 100644
--- a/drivers/usb/misc/yurex.c
+++ b/drivers/usb/misc/yurex.c
@@ -492,7 +492,7 @@ static ssize_t yurex_write(struct file *file, const char __user *user_buffer,
prepare_to_wait(&dev->waitq, &wait, TASK_INTERRUPTIBLE);
dev_dbg(&dev->interface->dev, "%s - submit %c\n", __func__,
dev->cntl_buffer[0]);
- retval = usb_submit_urb(dev->cntl_urb, GFP_KERNEL);
+ retval = usb_submit_urb(dev->cntl_urb, GFP_ATOMIC);
if (retval >= 0)
timeout = schedule_timeout(YUREX_WRITE_TIMEOUT);
finish_wait(&dev->waitq, &wait);
--
2.28.0
Hi Greg,
On 17/08/2020 11:59, gregkh(a)linuxfoundation.org wrote:
>
> This is a note to let you know that I've just added the patch titled
>
> net: refactor bind_bucket fastreuse into helper
>
> to the 4.14-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> net-refactor-bind_bucket-fastreuse-into-helper.patch
> and it can be found in the queue-4.14 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
(...)
> Patches currently in stable-queue which might be from tim.froidcoeur(a)tessares.net are
>
> queue-4.14/net-refactor-bind_bucket-fastreuse-into-helper.patch
Thank you for backporting this patch!
It seems the backport of the companion patch -- d76f3351cea2 ("net:
initialize fastreuse on inet_inherit_port") -- was lost somewhere for
4.14 version. It was backported in all other newer stable versions: 5.8,
5.7, 5.4, 4.19 but not in 4.14.
The patch backported here is a preparation for the real fix which is the
missing patch.
I guess the intention is to backport d76f3351cea2 to v4.14 as well. If
not, this refactoring is maybe not needed :)
Cheers,
Matt
--
Tessares | Belgium | Hybrid Access Solutions
www.tessares.net
On Tue, 2020-08-18 at 08:24 +0000, David Laight wrote:
> From: Nick Desaulniers
> > Sent: 17 August 2020 19:37
> ..
> > That said, this libcall optimization/transformation (sprintf->stpcpy)
> > does look useful to me.
>
> I'd rather get a cow if I ask for a cow...
>
> Maybe checkpatch (etc) could report places where snprintf()
> could be replaced by a simpler function.
You mean sprintf no?
Reminds me of seq_printf vs seq_puts...
https://lkml.org/lkml/2013/3/16/79
The original problem was from nvme-over-tcp code, who mistakenly uses
kernel_sendpage() to send pages allocated by __get_free_pages() without
__GFP_COMP flag. Such pages don't have refcount (page_count is 0) on
tail pages, sending them by kernel_sendpage() may trigger a kernel panic
from a corrupted kernel heap, because these pages are incorrectly freed
in network stack as page_count 0 pages.
This patch introduces a helper sendpage_ok(), it returns true if the
checking page,
- is not slab page: PageSlab(page) is false.
- has page refcount: page_count(page) is not zero
All drivers who want to send page to remote end by kernel_sendpage()
may use this helper to check whether the page is OK. If the helper does
not return true, the driver should try other non sendpage method (e.g.
sock_no_sendpage()) to handle the page.
Signed-off-by: Coly Li <colyli(a)suse.de>
Cc: Chaitanya Kulkarni <chaitanya.kulkarni(a)wdc.com>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Hannes Reinecke <hare(a)suse.de>
Cc: Jan Kara <jack(a)suse.com>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Mikhail Skorzhinskii <mskorzhinskiy(a)solarflare.com>
Cc: Philipp Reisner <philipp.reisner(a)linbit.com>
Cc: Sagi Grimberg <sagi(a)grimberg.me>
Cc: Vlastimil Babka <vbabka(a)suse.com>
Cc: stable(a)vger.kernel.org
---
v5, include linux/mm.h in include/linux/net.h
v4, change sendpage_ok() as an inline helper, and post it as
separate patch.
v3, introduce a more common sendpage_ok()
v2, fix typo in patch subject
v1, the initial version.
include/linux/net.h | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/include/linux/net.h b/include/linux/net.h
index d48ff1180879..a807fad31958 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -21,6 +21,7 @@
#include <linux/rcupdate.h>
#include <linux/once.h>
#include <linux/fs.h>
+#include <linux/mm.h>
#include <linux/sockptr.h>
#include <uapi/linux/net.h>
@@ -286,6 +287,21 @@ do { \
#define net_get_random_once_wait(buf, nbytes) \
get_random_once_wait((buf), (nbytes))
+/*
+ * E.g. XFS meta- & log-data is in slab pages, or bcache meta
+ * data pages, or other high order pages allocated by
+ * __get_free_pages() without __GFP_COMP, which have a page_count
+ * of 0 and/or have PageSlab() set. We cannot use send_page for
+ * those, as that does get_page(); put_page(); and would cause
+ * either a VM_BUG directly, or __page_cache_release a page that
+ * would actually still be referenced by someone, leading to some
+ * obscure delayed Oops somewhere else.
+ */
+static inline bool sendpage_ok(struct page *page)
+{
+ return (!PageSlab(page) && page_count(page) >= 1);
+}
+
int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
size_t num, size_t len);
int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
--
2.26.2
The original problem was from nvme-over-tcp code, who mistakenly uses
kernel_sendpage() to send pages allocated by __get_free_pages() without
__GFP_COMP flag. Such pages don't have refcount (page_count is 0) on
tail pages, sending them by kernel_sendpage() may trigger a kernel panic
from a corrupted kernel heap, because these pages are incorrectly freed
in network stack as page_count 0 pages.
This patch introduces a helper sendpage_ok(), it returns true if the
checking page,
- is not slab page: PageSlab(page) is false.
- has page refcount: page_count(page) is not zero
All drivers who want to send page to remote end by kernel_sendpage()
may use this helper to check whether the page is OK. If the helper does
not return true, the driver should try other non sendpage method (e.g.
sock_no_sendpage()) to handle the page.
Signed-off-by: Coly Li <colyli(a)suse.de>
Cc: Chaitanya Kulkarni <chaitanya.kulkarni(a)wdc.com>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Hannes Reinecke <hare(a)suse.de>
Cc: Jan Kara <jack(a)suse.com>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Mikhail Skorzhinskii <mskorzhinskiy(a)solarflare.com>
Cc: Philipp Reisner <philipp.reisner(a)linbit.com>
Cc: Sagi Grimberg <sagi(a)grimberg.me>
Cc: Vlastimil Babka <vbabka(a)suse.com>
Cc: stable(a)vger.kernel.org
---
Changelog:
v5, include linux/mm.h in include/linux/net.h
v4, change sendpage_ok() as an inline helper, and post it as
separate patch.
v3, introduce a more common sendpage_ok()
v2, fix typo in patch subject
v1, the initial version.
include/linux/net.h | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/include/linux/net.h b/include/linux/net.h
index d48ff1180879..a807fad31958 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -21,6 +21,7 @@
#include <linux/rcupdate.h>
#include <linux/once.h>
#include <linux/fs.h>
+#include <linux/mm.h>
#include <linux/sockptr.h>
#include <uapi/linux/net.h>
@@ -286,6 +287,21 @@ do { \
#define net_get_random_once_wait(buf, nbytes) \
get_random_once_wait((buf), (nbytes))
+/*
+ * E.g. XFS meta- & log-data is in slab pages, or bcache meta
+ * data pages, or other high order pages allocated by
+ * __get_free_pages() without __GFP_COMP, which have a page_count
+ * of 0 and/or have PageSlab() set. We cannot use send_page for
+ * those, as that does get_page(); put_page(); and would cause
+ * either a VM_BUG directly, or __page_cache_release a page that
+ * would actually still be referenced by someone, leading to some
+ * obscure delayed Oops somewhere else.
+ */
+static inline bool sendpage_ok(struct page *page)
+{
+ return (!PageSlab(page) && page_count(page) >= 1);
+}
+
int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
size_t num, size_t len);
int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
--
2.26.2
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: d883e9af56ca - task_work: only grab task signal lock when needed
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://cki-artifacts.s3.us-east-2.amazonaws.com/index.html?prefix=dataware…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ ACPI table test
✅ ACPI enabled test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Host 2:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
ppc64le:
Host 1:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
Host 2:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 3:
✅ Boot test
🚧 ✅ kdump - sysrq-c
s390x:
Host 1:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
Host 2:
✅ Boot test
✅ selinux-policy: serge-testsuite
✅ stress: stress-ng
🚧 ✅ Storage blktests
x86_64:
Host 1:
✅ Boot test
✅ ACPI table test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: sanity smoke test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ❌ CPU: Frequency Driver Test
🚧 ✅ CPU: Idle Test
🚧 ✅ xfstests - btrfs
🚧 ⚡⚡⚡ IOMMU boot test
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ power-management: cpupower/sanity test
🚧 ⚡⚡⚡ Storage blktests
Host 3:
✅ Boot test
🚧 ✅ kdump - sysrq-c
🚧 ✅ kdump - file-load
Test sources: https://gitlab.com/cki-project/kernel-tests
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: 2c3b1798b8a3 - io_uring: enable lookup of links holding inflight files
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://cki-artifacts.s3.us-east-2.amazonaws.com/index.html?prefix=dataware…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ ACPI table test
✅ ACPI enabled test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Host 2:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
ppc64le:
Host 1:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
🚧 ⚡⚡⚡ kdump - sysrq-c
Host 3:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
⚡⚡⚡ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
Host 4:
✅ Boot test
🚧 ✅ kdump - sysrq-c
Host 5:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
s390x:
Host 1:
✅ Boot test
✅ selinux-policy: serge-testsuite
✅ stress: stress-ng
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
x86_64:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ❌ CPU: Frequency Driver Test
🚧 ✅ CPU: Idle Test
🚧 ✅ xfstests - btrfs
🚧 ⚡⚡⚡ IOMMU boot test
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ power-management: cpupower/sanity test
🚧 ⚡⚡⚡ Storage blktests
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ ACPI table test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
⚡⚡⚡ LTP
⚡⚡⚡ Loopdev Sanity
⚡⚡⚡ Memory function: memfd_create
⚡⚡⚡ AMTU (Abstract Machine Test Utility)
⚡⚡⚡ Networking bridge: sanity
⚡⚡⚡ Ethernet drivers sanity
⚡⚡⚡ Networking socket: fuzz
⚡⚡⚡ Networking: igmp conformance test
⚡⚡⚡ Networking route: pmtu
⚡⚡⚡ Networking route_func - local
⚡⚡⚡ Networking route_func - forward
⚡⚡⚡ Networking TCP: keepalive test
⚡⚡⚡ Networking UDP: socket
⚡⚡⚡ Networking tunnel: geneve basic test
⚡⚡⚡ Networking tunnel: gre basic
⚡⚡⚡ L2TP basic test
⚡⚡⚡ Networking tunnel: vxlan basic
⚡⚡⚡ Networking ipsec: basic netns - transport
⚡⚡⚡ Networking ipsec: basic netns - tunnel
⚡⚡⚡ Libkcapi AF_ALG test
⚡⚡⚡ pciutils: sanity smoke test
⚡⚡⚡ pciutils: update pci ids test
⚡⚡⚡ ALSA PCM loopback test
⚡⚡⚡ ALSA Control (mixer) Userspace Element test
⚡⚡⚡ storage: SCSI VPD
🚧 ⚡⚡⚡ CIFS Connectathon
🚧 ⚡⚡⚡ POSIX pjd-fstest suites
🚧 ⚡⚡⚡ jvm - jcstress tests
🚧 ⚡⚡⚡ Memory function: kaslr
🚧 ⚡⚡⚡ Networking firewall: basic netfilter test
🚧 ⚡⚡⚡ audit: audit testsuite test
🚧 ⚡⚡⚡ trace: ftrace/tracer
🚧 ⚡⚡⚡ kdump - kexec_boot
Host 3:
✅ Boot test
🚧 ✅ kdump - sysrq-c
🚧 ✅ kdump - file-load
Host 4:
✅ Boot test
✅ ACPI table test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: sanity smoke test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Test sources: https://gitlab.com/cki-project/kernel-tests
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
Before v4.15 commit 75492a51568b ("s390/scsi: Convert timers to use
timer_setup()"), we intentionally only passed zfcp_adapter as context
argument to zfcp_fsf_request_timeout_handler(). Since we only trigger
adapter recovery, it was unnecessary to sync against races between timeout
and (late) completion.
Likewise, we only passed zfcp_erp_action as context argument to
zfcp_erp_timeout_handler(). Since we only wakeup an ERP action, it was
unnecessary to sync against races between timeout and (late) completion.
Meanwhile the timeout handlers get timer_list as context argument
and do a timer-specific container-of to zfcp_fsf_req which can have
been freed.
Fix it by making sure that any request timeout handlers, that might
just have started before del_timer(), are completed by using
del_timer_sync() instead. This ensures the request free happens
afterwards.
Space time diagram of potential use-after-free:
Basic idea is to have 2 or more pending requests whose timeouts run
out at almost the same time.
req 1 timeout ERP thread req 2 timeout
---------------- ---------------- ---------------------------------------
zfcp_fsf_request_timeout_handler
fsf_req = from_timer(fsf_req, t, timer)
adapter = fsf_req->adapter
zfcp_qdio_siosl(adapter)
zfcp_erp_adapter_reopen(adapter,...)
zfcp_erp_strategy
...
zfcp_fsf_req_dismiss_all
list_for_each_entry_safe
zfcp_fsf_req_complete 1
del_timer 1
zfcp_fsf_req_free 1
zfcp_fsf_req_complete 2
zfcp_fsf_request_timeout_handler
del_timer 2
fsf_req = from_timer(fsf_req, t, timer)
zfcp_fsf_req_free 2
adapter = fsf_req->adapter
^^^^^^^ already freed
Suggested-by: Julian Wiedmann <jwi(a)linux.ibm.com>
Reviewed-by: Julian Wiedmann <jwi(a)linux.ibm.com>
Fixes: 75492a51568b ("s390/scsi: Convert timers to use timer_setup()")
Cc: <stable(a)vger.kernel.org> #4.15+
Signed-off-by: Steffen Maier <maier(a)linux.ibm.com>
---
drivers/s390/scsi/zfcp_fsf.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
index c795f22249d8..140186fe1d1e 100644
--- a/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@ -434,7 +434,7 @@ static void zfcp_fsf_req_complete(struct zfcp_fsf_req *req)
return;
}
- del_timer(&req->timer);
+ del_timer_sync(&req->timer);
zfcp_fsf_protstatus_eval(req);
zfcp_fsf_fsfstatus_eval(req);
req->handler(req);
@@ -867,7 +867,7 @@ static int zfcp_fsf_req_send(struct zfcp_fsf_req *req)
req->qdio_req.qdio_outb_usage = atomic_read(&qdio->req_q_free);
req->issued = get_tod_clock();
if (zfcp_qdio_send(qdio, &req->qdio_req)) {
- del_timer(&req->timer);
+ del_timer_sync(&req->timer);
/* lookup request again, list might have changed */
zfcp_reqlist_find_rm(adapter->req_list, req_id);
zfcp_erp_adapter_reopen(adapter, 0, "fsrs__1");
--
2.17.1
LLVM implemented a recent "libcall optimization" that lowers calls to
`sprintf(dest, "%s", str)` where the return value is used to
`stpcpy(dest, str) - dest`. This generally avoids the machinery involved
in parsing format strings.
`stpcpy` is just like `strcpy` except:
1. it returns the pointer to the new tail of `dest`. This allows you to
chain multiple calls to `stpcpy` in one statement.
2. it requires the parameters not to overlap. Calling `sprintf` with
overlapping arguments was clarified in ISO C99 and POSIX.1-2001 to be
undefined behavior.
`stpcpy` was first standardized in POSIX.1-2008.
Implement this so that we don't observe linkage failures due to missing
symbol definitions for `stpcpy`.
Similar to last year's fire drill with:
commit 5f074f3e192f ("lib/string.c: implement a basic bcmp")
This optimization was introduced into clang-12.
Cc: stable(a)vger.kernel.org
Link: https://bugs.llvm.org/show_bug.cgi?id=47162
Link: https://github.com/ClangBuiltLinux/linux/issues/1126
Link: https://man7.org/linux/man-pages/man3/stpcpy.3.html
Link: https://pubs.opengroup.org/onlinepubs/9699919799/functions/stpcpy.html
Link: https://reviews.llvm.org/D85963
Reported-by: Sami Tolvanen <samitolvanen(a)google.com>
Signed-off-by: Nick Desaulniers <ndesaulniers(a)google.com>
---
include/linux/string.h | 3 +++
lib/string.c | 23 +++++++++++++++++++++++
2 files changed, 26 insertions(+)
diff --git a/include/linux/string.h b/include/linux/string.h
index b1f3894a0a3e..e570b9b10f50 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -31,6 +31,9 @@ size_t strlcpy(char *, const char *, size_t);
#ifndef __HAVE_ARCH_STRSCPY
ssize_t strscpy(char *, const char *, size_t);
#endif
+#ifndef __HAVE_ARCH_STPCPY
+extern char *stpcpy(char *__restrict, const char *__restrict__);
+#endif
/* Wraps calls to strscpy()/memset(), no arch specific code required */
ssize_t strscpy_pad(char *dest, const char *src, size_t count);
diff --git a/lib/string.c b/lib/string.c
index 6012c385fb31..81bc4d62c256 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -272,6 +272,29 @@ ssize_t strscpy_pad(char *dest, const char *src, size_t count)
}
EXPORT_SYMBOL(strscpy_pad);
+#ifndef __HAVE_ARCH_STPCPY
+/**
+ * stpcpy - copy a string from src to dest returning a pointer to the new end
+ * of dest, including src's NULL terminator. May overrun dest.
+ * @dest: pointer to end of string being copied into. Must be large enough
+ * to receive copy.
+ * @src: pointer to the beginning of string being copied from. Must not overlap
+ * dest.
+ *
+ * stpcpy differs from strcpy in two key ways:
+ * 1. inputs must not overlap.
+ * 2. return value is the new NULL terminated character. (for strcpy, the
+ * return value is a pointer to src.
+ */
+#undef stpcpy
+char *stpcpy(char *__restrict__ dest, const char *__restrict__ src)
+{
+ while ((*dest++ = *src++) != '\0')
+ /* nothing */;
+ return dest;
+}
+#endif
+
#ifndef __HAVE_ARCH_STRCAT
/**
* strcat - Append one %NUL-terminated string to another
--
2.28.0.220.ged08abb693-goog
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: f3ab983c61ba - NFS: Don't return layout segments that are in use
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://cki-artifacts.s3.us-east-2.amazonaws.com/index.html?prefix=dataware…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ ACPI table test
✅ ACPI enabled test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
ppc64le:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
🚧 ⚡⚡⚡ kdump - sysrq-c
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ xfstests - ext4
⚡⚡⚡ xfstests - xfs
⚡⚡⚡ selinux-policy: serge-testsuite
⚡⚡⚡ storage: software RAID testing
🚧 ⚡⚡⚡ xfstests - btrfs
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ Storage blktests
s390x:
Host 1:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
Host 2:
✅ Boot test
✅ selinux-policy: serge-testsuite
✅ stress: stress-ng
🚧 ✅ Storage blktests
x86_64:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ CPU: Frequency Driver Test
🚧 ✅ CPU: Idle Test
🚧 ✅ xfstests - btrfs
🚧 ⚡⚡⚡ IOMMU boot test
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ power-management: cpupower/sanity test
🚧 ⚡⚡⚡ Storage blktests
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
🚧 ⚡⚡⚡ kdump - sysrq-c
🚧 ⚡⚡⚡ kdump - file-load
Host 3:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ ACPI table test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: sanity smoke test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ⚡⚡⚡ trace: ftrace/tracer
🚧 ⚡⚡⚡ kdump - kexec_boot
Host 4:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
🚧 ⚡⚡⚡ kdump - sysrq-c
🚧 ⚡⚡⚡ kdump - file-load
Host 5:
✅ Boot test
🚧 ✅ kdump - sysrq-c
🚧 ✅ kdump - file-load
Test sources: https://gitlab.com/cki-project/kernel-tests
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
The patch below was submitted to be applied to the 5.8-stable tree.
I fail to see how this patch meets the stable kernel rules as found at
Documentation/process/stable-kernel-rules.rst.
I could be totally wrong, and if so, please respond to
<stable(a)vger.kernel.org> and let me know why this patch should be
applied. Otherwise, it is now dropped from my patch queues, never to be
seen again.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 3bc6e3dc5a54d5842938c6f1ed78dd1add379af7 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller(a)gmx.de>
Date: Sun, 14 Jun 2020 10:50:42 +0200
Subject: [PATCH] parisc: Whitespace cleanups in atomic.h
Fix whitespace indenting and drop trailing backslashes.
Cc: <stable(a)vger.kernel.org> # 4.19+
Signed-off-by: Helge Deller <deller(a)gmx.de>
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 6dd4171c9530..90e8267fc509 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -34,13 +34,13 @@ extern arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned;
/* Can't use raw_spin_lock_irq because of #include problems, so
* this is the substitute */
#define _atomic_spin_lock_irqsave(l,f) do { \
- arch_spinlock_t *s = ATOMIC_HASH(l); \
+ arch_spinlock_t *s = ATOMIC_HASH(l); \
local_irq_save(f); \
arch_spin_lock(s); \
} while(0)
#define _atomic_spin_unlock_irqrestore(l,f) do { \
- arch_spinlock_t *s = ATOMIC_HASH(l); \
+ arch_spinlock_t *s = ATOMIC_HASH(l); \
arch_spin_unlock(s); \
local_irq_restore(f); \
} while(0)
@@ -85,7 +85,7 @@ static __inline__ void atomic_##op(int i, atomic_t *v) \
_atomic_spin_lock_irqsave(v, flags); \
v->counter c_op i; \
_atomic_spin_unlock_irqrestore(v, flags); \
-} \
+}
#define ATOMIC_OP_RETURN(op, c_op) \
static __inline__ int atomic_##op##_return(int i, atomic_t *v) \
@@ -150,7 +150,7 @@ static __inline__ void atomic64_##op(s64 i, atomic64_t *v) \
_atomic_spin_lock_irqsave(v, flags); \
v->counter c_op i; \
_atomic_spin_unlock_irqrestore(v, flags); \
-} \
+}
#define ATOMIC64_OP_RETURN(op, c_op) \
static __inline__ s64 atomic64_##op##_return(s64 i, atomic64_t *v) \
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: c6d0e10a3bb0 - crypto: aesni - add compatibility with IAS
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://cki-artifacts.s3.us-east-2.amazonaws.com/index.html?prefix=dataware…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ ACPI table test
✅ ACPI enabled test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Host 2:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
ppc64le:
Host 1:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
🚧 ❌ kdump - sysrq-c
Host 3:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
s390x:
Host 1:
✅ Boot test
✅ selinux-policy: serge-testsuite
✅ stress: stress-ng
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
x86_64:
Host 1:
✅ Boot test
🚧 ✅ kdump - sysrq-c
🚧 ✅ kdump - file-load
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ❌ CPU: Frequency Driver Test
🚧 ✅ CPU: Idle Test
🚧 ✅ xfstests - btrfs
🚧 ⚡⚡⚡ IOMMU boot test
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ power-management: cpupower/sanity test
🚧 ⚡⚡⚡ Storage blktests
Host 3:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ ACPI table test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
⚡⚡⚡ LTP
⚡⚡⚡ Loopdev Sanity
⚡⚡⚡ Memory function: memfd_create
⚡⚡⚡ AMTU (Abstract Machine Test Utility)
⚡⚡⚡ Networking bridge: sanity
⚡⚡⚡ Ethernet drivers sanity
⚡⚡⚡ Networking socket: fuzz
⚡⚡⚡ Networking: igmp conformance test
⚡⚡⚡ Networking route: pmtu
⚡⚡⚡ Networking route_func - local
⚡⚡⚡ Networking route_func - forward
⚡⚡⚡ Networking TCP: keepalive test
⚡⚡⚡ Networking UDP: socket
⚡⚡⚡ Networking tunnel: geneve basic test
⚡⚡⚡ Networking tunnel: gre basic
⚡⚡⚡ L2TP basic test
⚡⚡⚡ Networking tunnel: vxlan basic
⚡⚡⚡ Networking ipsec: basic netns - transport
⚡⚡⚡ Networking ipsec: basic netns - tunnel
⚡⚡⚡ Libkcapi AF_ALG test
⚡⚡⚡ pciutils: sanity smoke test
⚡⚡⚡ pciutils: update pci ids test
⚡⚡⚡ ALSA PCM loopback test
⚡⚡⚡ ALSA Control (mixer) Userspace Element test
⚡⚡⚡ storage: SCSI VPD
🚧 ⚡⚡⚡ CIFS Connectathon
🚧 ⚡⚡⚡ POSIX pjd-fstest suites
🚧 ⚡⚡⚡ jvm - jcstress tests
🚧 ⚡⚡⚡ Memory function: kaslr
🚧 ⚡⚡⚡ Networking firewall: basic netfilter test
🚧 ⚡⚡⚡ audit: audit testsuite test
🚧 ⚡⚡⚡ trace: ftrace/tracer
🚧 ⚡⚡⚡ kdump - kexec_boot
Host 4:
✅ Boot test
✅ ACPI table test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: sanity smoke test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Test sources: https://gitlab.com/cki-project/kernel-tests
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
Hi Greg / Thomas and all involved here. First, apologies for
necro-bumping this thread, but I'm working a backport of this patch to
kernel 4.15 (Ubuntu) and then I noticed we have it on stable, but only
in 4.19+.
Since the fixes tag presents an old commit (since ~3.19), I'm curious if
we have a special reason to not have it on long-term stables, like 4.9
or 4.14. It's a subtle portion of arch code, so I'm afraid I didn't see
something that prevents its backport for previous versions.
Thanks in advance,
Guilherme
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: db31dd90da1f - r8152: Use MAC address from correct device tree node
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://cki-artifacts.s3.us-east-2.amazonaws.com/index.html?prefix=dataware…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ xfstests - btrfs
🚧 ❌ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ ACPI table test
✅ ACPI enabled test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
ppc64le:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
⚡⚡⚡ xfstests - ext4
⚡⚡⚡ xfstests - xfs
⚡⚡⚡ selinux-policy: serge-testsuite
⚡⚡⚡ storage: software RAID testing
🚧 ⚡⚡⚡ xfstests - btrfs
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ Storage blktests
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
🚧 ⚡⚡⚡ kdump - sysrq-c
Host 3:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Podman system integration test - as root
⚡⚡⚡ Podman system integration test - as user
⚡⚡⚡ LTP
⚡⚡⚡ Loopdev Sanity
⚡⚡⚡ Memory function: memfd_create
⚡⚡⚡ AMTU (Abstract Machine Test Utility)
⚡⚡⚡ Networking bridge: sanity
⚡⚡⚡ Ethernet drivers sanity
⚡⚡⚡ Networking socket: fuzz
⚡⚡⚡ Networking route: pmtu
⚡⚡⚡ Networking route_func - local
⚡⚡⚡ Networking route_func - forward
⚡⚡⚡ Networking TCP: keepalive test
⚡⚡⚡ Networking UDP: socket
⚡⚡⚡ Networking tunnel: geneve basic test
⚡⚡⚡ Networking tunnel: gre basic
⚡⚡⚡ L2TP basic test
⚡⚡⚡ Networking tunnel: vxlan basic
⚡⚡⚡ Networking ipsec: basic netns - tunnel
⚡⚡⚡ Libkcapi AF_ALG test
⚡⚡⚡ pciutils: update pci ids test
⚡⚡⚡ ALSA PCM loopback test
⚡⚡⚡ ALSA Control (mixer) Userspace Element test
🚧 ⚡⚡⚡ CIFS Connectathon
🚧 ⚡⚡⚡ POSIX pjd-fstest suites
🚧 ⚡⚡⚡ jvm - jcstress tests
🚧 ⚡⚡⚡ Memory function: kaslr
🚧 ⚡⚡⚡ Networking firewall: basic netfilter test
🚧 ⚡⚡⚡ audit: audit testsuite test
🚧 ⚡⚡⚡ trace: ftrace/tracer
Host 4:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
🚧 ⚡⚡⚡ kdump - sysrq-c
s390x:
Host 1:
✅ Boot test
✅ selinux-policy: serge-testsuite
✅ stress: stress-ng
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
x86_64:
Host 1:
✅ Boot test
🚧 ✅ kdump - sysrq-c
🚧 ✅ kdump - file-load
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ❌ CPU: Frequency Driver Test
🚧 ✅ CPU: Idle Test
🚧 ✅ xfstests - btrfs
🚧 ⚡⚡⚡ IOMMU boot test
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ power-management: cpupower/sanity test
🚧 ⚡⚡⚡ Storage blktests
Host 3:
✅ Boot test
✅ ACPI table test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: sanity smoke test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Test sources: https://gitlab.com/cki-project/kernel-tests
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: 3be328d27df1 - r8152: Use MAC address from correct device tree node
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://cki-artifacts.s3.us-east-2.amazonaws.com/index.html?prefix=dataware…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ ACPI table test
✅ ACPI enabled test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
ppc64le:
Host 1:
✅ Boot test
🚧 ✅ kdump - sysrq-c
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
⚡⚡⚡ Podman system integration test - as root
⚡⚡⚡ Podman system integration test - as user
⚡⚡⚡ LTP
⚡⚡⚡ Loopdev Sanity
⚡⚡⚡ Memory function: memfd_create
⚡⚡⚡ AMTU (Abstract Machine Test Utility)
⚡⚡⚡ Networking bridge: sanity
⚡⚡⚡ Ethernet drivers sanity
⚡⚡⚡ Networking socket: fuzz
⚡⚡⚡ Networking route: pmtu
⚡⚡⚡ Networking route_func - local
⚡⚡⚡ Networking route_func - forward
⚡⚡⚡ Networking TCP: keepalive test
⚡⚡⚡ Networking UDP: socket
⚡⚡⚡ Networking tunnel: geneve basic test
⚡⚡⚡ Networking tunnel: gre basic
⚡⚡⚡ L2TP basic test
⚡⚡⚡ Networking tunnel: vxlan basic
⚡⚡⚡ Networking ipsec: basic netns - tunnel
⚡⚡⚡ Libkcapi AF_ALG test
⚡⚡⚡ pciutils: update pci ids test
⚡⚡⚡ ALSA PCM loopback test
⚡⚡⚡ ALSA Control (mixer) Userspace Element test
🚧 ⚡⚡⚡ CIFS Connectathon
🚧 ⚡⚡⚡ POSIX pjd-fstest suites
🚧 ⚡⚡⚡ jvm - jcstress tests
🚧 ⚡⚡⚡ Memory function: kaslr
🚧 ⚡⚡⚡ Networking firewall: basic netfilter test
🚧 ⚡⚡⚡ audit: audit testsuite test
🚧 ⚡⚡⚡ trace: ftrace/tracer
Host 3:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ xfstests - ext4
⚡⚡⚡ xfstests - xfs
⚡⚡⚡ selinux-policy: serge-testsuite
⚡⚡⚡ storage: software RAID testing
🚧 ⚡⚡⚡ xfstests - btrfs
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ Storage blktests
Host 4:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ xfstests - ext4
⚡⚡⚡ xfstests - xfs
⚡⚡⚡ selinux-policy: serge-testsuite
⚡⚡⚡ storage: software RAID testing
🚧 ⚡⚡⚡ xfstests - btrfs
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ Storage blktests
Host 5:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ xfstests - ext4
⚡⚡⚡ xfstests - xfs
⚡⚡⚡ selinux-policy: serge-testsuite
⚡⚡⚡ storage: software RAID testing
🚧 ⚡⚡⚡ xfstests - btrfs
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ Storage blktests
s390x:
Host 1:
✅ Boot test
✅ selinux-policy: serge-testsuite
✅ stress: stress-ng
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
x86_64:
Host 1:
✅ Boot test
🚧 ✅ kdump - sysrq-c
🚧 ✅ kdump - file-load
Host 2:
✅ Boot test
✅ ACPI table test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: sanity smoke test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Host 3:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ CPU: Frequency Driver Test
🚧 ✅ CPU: Idle Test
🚧 ✅ xfstests - btrfs
🚧 ⚡⚡⚡ IOMMU boot test
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ power-management: cpupower/sanity test
🚧 ⚡⚡⚡ Storage blktests
Test sources: https://gitlab.com/cki-project/kernel-tests
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
The patch below does not apply to the 5.8-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 6d816e088c359866f9867057e04f244c608c42fe Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe(a)kernel.dk>
Date: Tue, 11 Aug 2020 08:04:14 -0600
Subject: [PATCH] io_uring: hold 'ctx' reference around task_work queue +
execute
We're holding the request reference, but we need to go one higher
to ensure that the ctx remains valid after the request has finished.
If the ring is closed with pending task_work inflight, and the
given io_kiocb finishes sync during issue, then we need a reference
to the ring itself around the task_work execution cycle.
Cc: stable(a)vger.kernel.org # v5.7+
Reported-by: syzbot+9b260fc33297966f5a8e(a)syzkaller.appspotmail.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5488698189da..99582cf5106b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1821,8 +1821,10 @@ static void __io_req_task_submit(struct io_kiocb *req)
static void io_req_task_submit(struct callback_head *cb)
{
struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
+ struct io_ring_ctx *ctx = req->ctx;
__io_req_task_submit(req);
+ percpu_ref_put(&ctx->refs);
}
static void io_req_task_queue(struct io_kiocb *req)
@@ -1830,6 +1832,7 @@ static void io_req_task_queue(struct io_kiocb *req)
int ret;
init_task_work(&req->task_work, io_req_task_submit);
+ percpu_ref_get(&req->ctx->refs);
ret = io_req_task_work_add(req, &req->task_work);
if (unlikely(ret)) {
@@ -2318,6 +2321,8 @@ static void io_rw_resubmit(struct callback_head *cb)
refcount_inc(&req->refs);
io_queue_async_work(req);
}
+
+ percpu_ref_put(&ctx->refs);
}
#endif
@@ -2330,6 +2335,8 @@ static bool io_rw_reissue(struct io_kiocb *req, long res)
return false;
init_task_work(&req->task_work, io_rw_resubmit);
+ percpu_ref_get(&req->ctx->refs);
+
ret = io_req_task_work_add(req, &req->task_work);
if (!ret)
return true;
@@ -3033,6 +3040,8 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
list_del_init(&wait->entry);
init_task_work(&req->task_work, io_req_task_submit);
+ percpu_ref_get(&req->ctx->refs);
+
/* submit ref gets dropped, acquire a new one */
refcount_inc(&req->refs);
ret = io_req_task_work_add(req, &req->task_work);
@@ -4565,6 +4574,8 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
req->result = mask;
init_task_work(&req->task_work, func);
+ percpu_ref_get(&req->ctx->refs);
+
/*
* If this fails, then the task is exiting. When a task exits, the
* work gets canceled, so just cancel this request as well instead
@@ -4652,11 +4663,13 @@ static void io_poll_task_handler(struct io_kiocb *req, struct io_kiocb **nxt)
static void io_poll_task_func(struct callback_head *cb)
{
struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
+ struct io_ring_ctx *ctx = req->ctx;
struct io_kiocb *nxt = NULL;
io_poll_task_handler(req, &nxt);
if (nxt)
__io_req_task_submit(nxt);
+ percpu_ref_put(&ctx->refs);
}
static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
@@ -4752,6 +4765,7 @@ static void io_async_task_func(struct callback_head *cb)
if (io_poll_rewait(req, &apoll->poll)) {
spin_unlock_irq(&ctx->completion_lock);
+ percpu_ref_put(&ctx->refs);
return;
}
@@ -4767,6 +4781,7 @@ static void io_async_task_func(struct callback_head *cb)
else
__io_req_task_cancel(req, -ECANCELED);
+ percpu_ref_put(&ctx->refs);
kfree(apoll->double_poll);
kfree(apoll);
}
The patch below does not apply to the 5.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 6d816e088c359866f9867057e04f244c608c42fe Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe(a)kernel.dk>
Date: Tue, 11 Aug 2020 08:04:14 -0600
Subject: [PATCH] io_uring: hold 'ctx' reference around task_work queue +
execute
We're holding the request reference, but we need to go one higher
to ensure that the ctx remains valid after the request has finished.
If the ring is closed with pending task_work inflight, and the
given io_kiocb finishes sync during issue, then we need a reference
to the ring itself around the task_work execution cycle.
Cc: stable(a)vger.kernel.org # v5.7+
Reported-by: syzbot+9b260fc33297966f5a8e(a)syzkaller.appspotmail.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5488698189da..99582cf5106b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1821,8 +1821,10 @@ static void __io_req_task_submit(struct io_kiocb *req)
static void io_req_task_submit(struct callback_head *cb)
{
struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
+ struct io_ring_ctx *ctx = req->ctx;
__io_req_task_submit(req);
+ percpu_ref_put(&ctx->refs);
}
static void io_req_task_queue(struct io_kiocb *req)
@@ -1830,6 +1832,7 @@ static void io_req_task_queue(struct io_kiocb *req)
int ret;
init_task_work(&req->task_work, io_req_task_submit);
+ percpu_ref_get(&req->ctx->refs);
ret = io_req_task_work_add(req, &req->task_work);
if (unlikely(ret)) {
@@ -2318,6 +2321,8 @@ static void io_rw_resubmit(struct callback_head *cb)
refcount_inc(&req->refs);
io_queue_async_work(req);
}
+
+ percpu_ref_put(&ctx->refs);
}
#endif
@@ -2330,6 +2335,8 @@ static bool io_rw_reissue(struct io_kiocb *req, long res)
return false;
init_task_work(&req->task_work, io_rw_resubmit);
+ percpu_ref_get(&req->ctx->refs);
+
ret = io_req_task_work_add(req, &req->task_work);
if (!ret)
return true;
@@ -3033,6 +3040,8 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
list_del_init(&wait->entry);
init_task_work(&req->task_work, io_req_task_submit);
+ percpu_ref_get(&req->ctx->refs);
+
/* submit ref gets dropped, acquire a new one */
refcount_inc(&req->refs);
ret = io_req_task_work_add(req, &req->task_work);
@@ -4565,6 +4574,8 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
req->result = mask;
init_task_work(&req->task_work, func);
+ percpu_ref_get(&req->ctx->refs);
+
/*
* If this fails, then the task is exiting. When a task exits, the
* work gets canceled, so just cancel this request as well instead
@@ -4652,11 +4663,13 @@ static void io_poll_task_handler(struct io_kiocb *req, struct io_kiocb **nxt)
static void io_poll_task_func(struct callback_head *cb)
{
struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
+ struct io_ring_ctx *ctx = req->ctx;
struct io_kiocb *nxt = NULL;
io_poll_task_handler(req, &nxt);
if (nxt)
__io_req_task_submit(nxt);
+ percpu_ref_put(&ctx->refs);
}
static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
@@ -4752,6 +4765,7 @@ static void io_async_task_func(struct callback_head *cb)
if (io_poll_rewait(req, &apoll->poll)) {
spin_unlock_irq(&ctx->completion_lock);
+ percpu_ref_put(&ctx->refs);
return;
}
@@ -4767,6 +4781,7 @@ static void io_async_task_func(struct callback_head *cb)
else
__io_req_task_cancel(req, -ECANCELED);
+ percpu_ref_put(&ctx->refs);
kfree(apoll->double_poll);
kfree(apoll);
}
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 535e4fc623fab2e09a0653fc3a3e17f382ad0251 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev(a)linux.ibm.com>
Date: Tue, 4 Aug 2020 20:35:49 +0200
Subject: [PATCH] s390/numa: set node distance to LOCAL_DISTANCE
The node distance is hardcoded to 0, which causes a trouble
for some user-level applications. In particular, "libnuma"
expects the distance of a node to itself as LOCAL_DISTANCE.
This update removes the offending node distance override.
Cc: <stable(a)vger.kernel.org> # 4.4
Fixes: 3a368f742da1 ("s390/numa: add core infrastructure")
Signed-off-by: Alexander Gordeev <agordeev(a)linux.ibm.com>
Signed-off-by: Heiko Carstens <hca(a)linux.ibm.com>
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index fbb507504a3b..3a0ac0c7a9a3 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -86,12 +86,6 @@ static inline const struct cpumask *cpumask_of_node(int node)
#define pcibus_to_node(bus) __pcibus_to_node(bus)
-#define node_distance(a, b) __node_distance(a, b)
-static inline int __node_distance(int a, int b)
-{
- return 0;
-}
-
#else /* !CONFIG_NUMA */
#define numa_node_id numa_node_id
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 535e4fc623fab2e09a0653fc3a3e17f382ad0251 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev(a)linux.ibm.com>
Date: Tue, 4 Aug 2020 20:35:49 +0200
Subject: [PATCH] s390/numa: set node distance to LOCAL_DISTANCE
The node distance is hardcoded to 0, which causes a trouble
for some user-level applications. In particular, "libnuma"
expects the distance of a node to itself as LOCAL_DISTANCE.
This update removes the offending node distance override.
Cc: <stable(a)vger.kernel.org> # 4.4
Fixes: 3a368f742da1 ("s390/numa: add core infrastructure")
Signed-off-by: Alexander Gordeev <agordeev(a)linux.ibm.com>
Signed-off-by: Heiko Carstens <hca(a)linux.ibm.com>
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index fbb507504a3b..3a0ac0c7a9a3 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -86,12 +86,6 @@ static inline const struct cpumask *cpumask_of_node(int node)
#define pcibus_to_node(bus) __pcibus_to_node(bus)
-#define node_distance(a, b) __node_distance(a, b)
-static inline int __node_distance(int a, int b)
-{
- return 0;
-}
-
#else /* !CONFIG_NUMA */
#define numa_node_id numa_node_id
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 535e4fc623fab2e09a0653fc3a3e17f382ad0251 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev(a)linux.ibm.com>
Date: Tue, 4 Aug 2020 20:35:49 +0200
Subject: [PATCH] s390/numa: set node distance to LOCAL_DISTANCE
The node distance is hardcoded to 0, which causes a trouble
for some user-level applications. In particular, "libnuma"
expects the distance of a node to itself as LOCAL_DISTANCE.
This update removes the offending node distance override.
Cc: <stable(a)vger.kernel.org> # 4.4
Fixes: 3a368f742da1 ("s390/numa: add core infrastructure")
Signed-off-by: Alexander Gordeev <agordeev(a)linux.ibm.com>
Signed-off-by: Heiko Carstens <hca(a)linux.ibm.com>
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index fbb507504a3b..3a0ac0c7a9a3 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -86,12 +86,6 @@ static inline const struct cpumask *cpumask_of_node(int node)
#define pcibus_to_node(bus) __pcibus_to_node(bus)
-#define node_distance(a, b) __node_distance(a, b)
-static inline int __node_distance(int a, int b)
-{
- return 0;
-}
-
#else /* !CONFIG_NUMA */
#define numa_node_id numa_node_id
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 535e4fc623fab2e09a0653fc3a3e17f382ad0251 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev(a)linux.ibm.com>
Date: Tue, 4 Aug 2020 20:35:49 +0200
Subject: [PATCH] s390/numa: set node distance to LOCAL_DISTANCE
The node distance is hardcoded to 0, which causes a trouble
for some user-level applications. In particular, "libnuma"
expects the distance of a node to itself as LOCAL_DISTANCE.
This update removes the offending node distance override.
Cc: <stable(a)vger.kernel.org> # 4.4
Fixes: 3a368f742da1 ("s390/numa: add core infrastructure")
Signed-off-by: Alexander Gordeev <agordeev(a)linux.ibm.com>
Signed-off-by: Heiko Carstens <hca(a)linux.ibm.com>
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index fbb507504a3b..3a0ac0c7a9a3 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -86,12 +86,6 @@ static inline const struct cpumask *cpumask_of_node(int node)
#define pcibus_to_node(bus) __pcibus_to_node(bus)
-#define node_distance(a, b) __node_distance(a, b)
-static inline int __node_distance(int a, int b)
-{
- return 0;
-}
-
#else /* !CONFIG_NUMA */
#define numa_node_id numa_node_id
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 63ef91f24f9bfc70b6446319f6cabfd094481372 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz(a)kernel.org>
Date: Tue, 30 Jun 2020 11:05:46 +0100
Subject: [PATCH] PM / devfreq: rk3399_dmc: Fix kernel oops when rockchip,pmu
is absent
Booting a recent kernel on a rk3399-based system (nanopc-t4),
equipped with a recent u-boot and ATF results in an Oops due
to a NULL pointer dereference.
This turns out to be due to the rk3399-dmc driver looking for
an *undocumented* property (rockchip,pmu), and happily using
a NULL pointer when the property isn't there.
Instead, make most of what was brought in with 9173c5ceb035
("PM / devfreq: rk3399_dmc: Pass ODT and auto power down parameters
to TF-A.") conditioned on finding this property in the device-tree,
preventing the driver from exploding.
Cc: stable(a)vger.kernel.org
Fixes: 9173c5ceb035 ("PM / devfreq: rk3399_dmc: Pass ODT and auto power down parameters to TF-A.")
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Signed-off-by: Chanwoo Choi <cw00.choi(a)samsung.com>
diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c
index 24f04f78285b..027769e39f9b 100644
--- a/drivers/devfreq/rk3399_dmc.c
+++ b/drivers/devfreq/rk3399_dmc.c
@@ -95,18 +95,20 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq,
mutex_lock(&dmcfreq->lock);
- if (target_rate >= dmcfreq->odt_dis_freq)
- odt_enable = true;
-
- /*
- * This makes a SMC call to the TF-A to set the DDR PD (power-down)
- * timings and to enable or disable the ODT (on-die termination)
- * resistors.
- */
- arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, dmcfreq->odt_pd_arg0,
- dmcfreq->odt_pd_arg1,
- ROCKCHIP_SIP_CONFIG_DRAM_SET_ODT_PD,
- odt_enable, 0, 0, 0, &res);
+ if (dmcfreq->regmap_pmu) {
+ if (target_rate >= dmcfreq->odt_dis_freq)
+ odt_enable = true;
+
+ /*
+ * This makes a SMC call to the TF-A to set the DDR PD
+ * (power-down) timings and to enable or disable the
+ * ODT (on-die termination) resistors.
+ */
+ arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, dmcfreq->odt_pd_arg0,
+ dmcfreq->odt_pd_arg1,
+ ROCKCHIP_SIP_CONFIG_DRAM_SET_ODT_PD,
+ odt_enable, 0, 0, 0, &res);
+ }
/*
* If frequency scaling from low to high, adjust voltage first.
@@ -371,13 +373,14 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
}
node = of_parse_phandle(np, "rockchip,pmu", 0);
- if (node) {
- data->regmap_pmu = syscon_node_to_regmap(node);
- of_node_put(node);
- if (IS_ERR(data->regmap_pmu)) {
- ret = PTR_ERR(data->regmap_pmu);
- goto err_edev;
- }
+ if (!node)
+ goto no_pmu;
+
+ data->regmap_pmu = syscon_node_to_regmap(node);
+ of_node_put(node);
+ if (IS_ERR(data->regmap_pmu)) {
+ ret = PTR_ERR(data->regmap_pmu);
+ goto err_edev;
}
regmap_read(data->regmap_pmu, RK3399_PMUGRF_OS_REG2, &val);
@@ -399,6 +402,7 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
goto err_edev;
};
+no_pmu:
arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, 0, 0,
ROCKCHIP_SIP_CONFIG_DRAM_INIT,
0, 0, 0, 0, &res);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From de002c55cadfc2f6cdf0ed427526f6085d240238 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki(a)intel.com>
Date: Tue, 28 Jul 2020 19:09:32 +0200
Subject: [PATCH] cpufreq: intel_pstate: Fix EPP setting via sysfs in active
mode
Because intel_pstate_set_energy_pref_index() reads and writes the
MSR_HWP_REQUEST register without using the cached value of it used by
intel_pstate_hwp_boost_up() and intel_pstate_hwp_boost_down(), those
functions may overwrite the value written by it and so the EPP value
set via sysfs may be lost.
To avoid that, make intel_pstate_set_energy_pref_index() take the
cached value of MSR_HWP_REQUEST just like the other two routines
mentioned above and update it with the new EPP value coming from
user space in addition to updating the MSR.
Note that the MSR itself still needs to be updated too in case
hwp_boost is unset or the boosting mechanism is not active at the
EPP change time.
Fixes: e0efd5be63e8 ("cpufreq: intel_pstate: Add HWP boost utility and sched util hooks")
Reported-by: Francisco Jerez <currojerez(a)riseup.net>
Cc: 4.18+ <stable(a)vger.kernel.org> # 4.18+: 3da97d4db8ee cpufreq: intel_pstate: Rearrange ...
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Reviewed-by: Francisco Jerez <currojerez(a)riseup.net>
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index c55f6e35a1e3..7f5d81931483 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -650,11 +650,12 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
epp = cpu_data->epp_default;
if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
- u64 value;
-
- ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, &value);
- if (ret)
- return ret;
+ /*
+ * Use the cached HWP Request MSR value, because the register
+ * itself may be updated by intel_pstate_hwp_boost_up() or
+ * intel_pstate_hwp_boost_down() at any time.
+ */
+ u64 value = READ_ONCE(cpu_data->hwp_req_cached);
value &= ~GENMASK_ULL(31, 24);
@@ -664,6 +665,12 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
epp = epp_values[pref_index - 1];
value |= (u64)epp << 24;
+ /*
+ * The only other updater of hwp_req_cached in the active mode,
+ * intel_pstate_hwp_set(), is called under the same lock as this
+ * function, so it cannot run in parallel with the update below.
+ */
+ WRITE_ONCE(cpu_data->hwp_req_cached, value);
ret = wrmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, value);
} else {
if (epp == -EINVAL)
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From de002c55cadfc2f6cdf0ed427526f6085d240238 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki(a)intel.com>
Date: Tue, 28 Jul 2020 19:09:32 +0200
Subject: [PATCH] cpufreq: intel_pstate: Fix EPP setting via sysfs in active
mode
Because intel_pstate_set_energy_pref_index() reads and writes the
MSR_HWP_REQUEST register without using the cached value of it used by
intel_pstate_hwp_boost_up() and intel_pstate_hwp_boost_down(), those
functions may overwrite the value written by it and so the EPP value
set via sysfs may be lost.
To avoid that, make intel_pstate_set_energy_pref_index() take the
cached value of MSR_HWP_REQUEST just like the other two routines
mentioned above and update it with the new EPP value coming from
user space in addition to updating the MSR.
Note that the MSR itself still needs to be updated too in case
hwp_boost is unset or the boosting mechanism is not active at the
EPP change time.
Fixes: e0efd5be63e8 ("cpufreq: intel_pstate: Add HWP boost utility and sched util hooks")
Reported-by: Francisco Jerez <currojerez(a)riseup.net>
Cc: 4.18+ <stable(a)vger.kernel.org> # 4.18+: 3da97d4db8ee cpufreq: intel_pstate: Rearrange ...
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Reviewed-by: Francisco Jerez <currojerez(a)riseup.net>
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index c55f6e35a1e3..7f5d81931483 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -650,11 +650,12 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
epp = cpu_data->epp_default;
if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
- u64 value;
-
- ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, &value);
- if (ret)
- return ret;
+ /*
+ * Use the cached HWP Request MSR value, because the register
+ * itself may be updated by intel_pstate_hwp_boost_up() or
+ * intel_pstate_hwp_boost_down() at any time.
+ */
+ u64 value = READ_ONCE(cpu_data->hwp_req_cached);
value &= ~GENMASK_ULL(31, 24);
@@ -664,6 +665,12 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
epp = epp_values[pref_index - 1];
value |= (u64)epp << 24;
+ /*
+ * The only other updater of hwp_req_cached in the active mode,
+ * intel_pstate_hwp_set(), is called under the same lock as this
+ * function, so it cannot run in parallel with the update below.
+ */
+ WRITE_ONCE(cpu_data->hwp_req_cached, value);
ret = wrmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, value);
} else {
if (epp == -EINVAL)
The patch below does not apply to the 5.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From de002c55cadfc2f6cdf0ed427526f6085d240238 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki(a)intel.com>
Date: Tue, 28 Jul 2020 19:09:32 +0200
Subject: [PATCH] cpufreq: intel_pstate: Fix EPP setting via sysfs in active
mode
Because intel_pstate_set_energy_pref_index() reads and writes the
MSR_HWP_REQUEST register without using the cached value of it used by
intel_pstate_hwp_boost_up() and intel_pstate_hwp_boost_down(), those
functions may overwrite the value written by it and so the EPP value
set via sysfs may be lost.
To avoid that, make intel_pstate_set_energy_pref_index() take the
cached value of MSR_HWP_REQUEST just like the other two routines
mentioned above and update it with the new EPP value coming from
user space in addition to updating the MSR.
Note that the MSR itself still needs to be updated too in case
hwp_boost is unset or the boosting mechanism is not active at the
EPP change time.
Fixes: e0efd5be63e8 ("cpufreq: intel_pstate: Add HWP boost utility and sched util hooks")
Reported-by: Francisco Jerez <currojerez(a)riseup.net>
Cc: 4.18+ <stable(a)vger.kernel.org> # 4.18+: 3da97d4db8ee cpufreq: intel_pstate: Rearrange ...
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Reviewed-by: Francisco Jerez <currojerez(a)riseup.net>
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index c55f6e35a1e3..7f5d81931483 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -650,11 +650,12 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
epp = cpu_data->epp_default;
if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
- u64 value;
-
- ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, &value);
- if (ret)
- return ret;
+ /*
+ * Use the cached HWP Request MSR value, because the register
+ * itself may be updated by intel_pstate_hwp_boost_up() or
+ * intel_pstate_hwp_boost_down() at any time.
+ */
+ u64 value = READ_ONCE(cpu_data->hwp_req_cached);
value &= ~GENMASK_ULL(31, 24);
@@ -664,6 +665,12 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
epp = epp_values[pref_index - 1];
value |= (u64)epp << 24;
+ /*
+ * The only other updater of hwp_req_cached in the active mode,
+ * intel_pstate_hwp_set(), is called under the same lock as this
+ * function, so it cannot run in parallel with the update below.
+ */
+ WRITE_ONCE(cpu_data->hwp_req_cached, value);
ret = wrmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, value);
} else {
if (epp == -EINVAL)
The patch below does not apply to the 5.8-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From de002c55cadfc2f6cdf0ed427526f6085d240238 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki(a)intel.com>
Date: Tue, 28 Jul 2020 19:09:32 +0200
Subject: [PATCH] cpufreq: intel_pstate: Fix EPP setting via sysfs in active
mode
Because intel_pstate_set_energy_pref_index() reads and writes the
MSR_HWP_REQUEST register without using the cached value of it used by
intel_pstate_hwp_boost_up() and intel_pstate_hwp_boost_down(), those
functions may overwrite the value written by it and so the EPP value
set via sysfs may be lost.
To avoid that, make intel_pstate_set_energy_pref_index() take the
cached value of MSR_HWP_REQUEST just like the other two routines
mentioned above and update it with the new EPP value coming from
user space in addition to updating the MSR.
Note that the MSR itself still needs to be updated too in case
hwp_boost is unset or the boosting mechanism is not active at the
EPP change time.
Fixes: e0efd5be63e8 ("cpufreq: intel_pstate: Add HWP boost utility and sched util hooks")
Reported-by: Francisco Jerez <currojerez(a)riseup.net>
Cc: 4.18+ <stable(a)vger.kernel.org> # 4.18+: 3da97d4db8ee cpufreq: intel_pstate: Rearrange ...
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Reviewed-by: Francisco Jerez <currojerez(a)riseup.net>
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index c55f6e35a1e3..7f5d81931483 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -650,11 +650,12 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
epp = cpu_data->epp_default;
if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
- u64 value;
-
- ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, &value);
- if (ret)
- return ret;
+ /*
+ * Use the cached HWP Request MSR value, because the register
+ * itself may be updated by intel_pstate_hwp_boost_up() or
+ * intel_pstate_hwp_boost_down() at any time.
+ */
+ u64 value = READ_ONCE(cpu_data->hwp_req_cached);
value &= ~GENMASK_ULL(31, 24);
@@ -664,6 +665,12 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
epp = epp_values[pref_index - 1];
value |= (u64)epp << 24;
+ /*
+ * The only other updater of hwp_req_cached in the active mode,
+ * intel_pstate_hwp_set(), is called under the same lock as this
+ * function, so it cannot run in parallel with the update below.
+ */
+ WRITE_ONCE(cpu_data->hwp_req_cached, value);
ret = wrmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, value);
} else {
if (epp == -EINVAL)
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From b292b50b0efcc7095d8bf15505fba6909bb35dce Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
Date: Mon, 13 Jul 2020 11:12:54 +0900
Subject: [PATCH] driver core: Fix probe_count imbalance in really_probe()
syzbot is reporting hung task in wait_for_device_probe() [1]. At least,
we always need to decrement probe_count if we incremented probe_count in
really_probe().
However, since I can't find "Resources present before probing" message in
the console log, both "this message simply flowed off" and "syzbot is not
hitting this path" will be possible. Therefore, while we are at it, let's
also prepare for concurrent wait_for_device_probe() calls by replacing
wake_up() with wake_up_all().
[1] https://syzkaller.appspot.com/bug?id=25c833f1983c9c1d512f4ff860dd0d7f5a2e2c…
Reported-by: syzbot <syzbot+805f5f6ae37411f15b64(a)syzkaller.appspotmail.com>
Fixes: 7c35e699c88bd607 ("driver core: Print device when resources present in really_probe()")
Cc: Geert Uytterhoeven <geert+renesas(a)glider.be>
Signed-off-by: Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
Cc: stable <stable(a)kernel.org>
Link: https://lore.kernel.org/r/20200713021254.3444-1-penguin-kernel@I-love.SAKUR…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 2306b481109a..fce8e35b6367 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -276,7 +276,7 @@ static void deferred_probe_timeout_work_func(struct work_struct *work)
list_for_each_entry_safe(private, p, &deferred_probe_pending_list, deferred_probe)
dev_info(private->device, "deferred probe pending\n");
- wake_up(&probe_timeout_waitqueue);
+ wake_up_all(&probe_timeout_waitqueue);
}
static DECLARE_DELAYED_WORK(deferred_probe_timeout_work, deferred_probe_timeout_work_func);
@@ -498,7 +498,8 @@ static int really_probe(struct device *dev, struct device_driver *drv)
drv->bus->name, __func__, drv->name, dev_name(dev));
if (!list_empty(&dev->devres_head)) {
dev_crit(dev, "Resources present before probing\n");
- return -EBUSY;
+ ret = -EBUSY;
+ goto done;
}
re_probe:
@@ -627,7 +628,7 @@ static int really_probe(struct device *dev, struct device_driver *drv)
ret = 0;
done:
atomic_dec(&probe_count);
- wake_up(&probe_waitqueue);
+ wake_up_all(&probe_waitqueue);
return ret;
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From b292b50b0efcc7095d8bf15505fba6909bb35dce Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
Date: Mon, 13 Jul 2020 11:12:54 +0900
Subject: [PATCH] driver core: Fix probe_count imbalance in really_probe()
syzbot is reporting hung task in wait_for_device_probe() [1]. At least,
we always need to decrement probe_count if we incremented probe_count in
really_probe().
However, since I can't find "Resources present before probing" message in
the console log, both "this message simply flowed off" and "syzbot is not
hitting this path" will be possible. Therefore, while we are at it, let's
also prepare for concurrent wait_for_device_probe() calls by replacing
wake_up() with wake_up_all().
[1] https://syzkaller.appspot.com/bug?id=25c833f1983c9c1d512f4ff860dd0d7f5a2e2c…
Reported-by: syzbot <syzbot+805f5f6ae37411f15b64(a)syzkaller.appspotmail.com>
Fixes: 7c35e699c88bd607 ("driver core: Print device when resources present in really_probe()")
Cc: Geert Uytterhoeven <geert+renesas(a)glider.be>
Signed-off-by: Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
Cc: stable <stable(a)kernel.org>
Link: https://lore.kernel.org/r/20200713021254.3444-1-penguin-kernel@I-love.SAKUR…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 2306b481109a..fce8e35b6367 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -276,7 +276,7 @@ static void deferred_probe_timeout_work_func(struct work_struct *work)
list_for_each_entry_safe(private, p, &deferred_probe_pending_list, deferred_probe)
dev_info(private->device, "deferred probe pending\n");
- wake_up(&probe_timeout_waitqueue);
+ wake_up_all(&probe_timeout_waitqueue);
}
static DECLARE_DELAYED_WORK(deferred_probe_timeout_work, deferred_probe_timeout_work_func);
@@ -498,7 +498,8 @@ static int really_probe(struct device *dev, struct device_driver *drv)
drv->bus->name, __func__, drv->name, dev_name(dev));
if (!list_empty(&dev->devres_head)) {
dev_crit(dev, "Resources present before probing\n");
- return -EBUSY;
+ ret = -EBUSY;
+ goto done;
}
re_probe:
@@ -627,7 +628,7 @@ static int really_probe(struct device *dev, struct device_driver *drv)
ret = 0;
done:
atomic_dec(&probe_count);
- wake_up(&probe_waitqueue);
+ wake_up_all(&probe_waitqueue);
return ret;
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From b292b50b0efcc7095d8bf15505fba6909bb35dce Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
Date: Mon, 13 Jul 2020 11:12:54 +0900
Subject: [PATCH] driver core: Fix probe_count imbalance in really_probe()
syzbot is reporting hung task in wait_for_device_probe() [1]. At least,
we always need to decrement probe_count if we incremented probe_count in
really_probe().
However, since I can't find "Resources present before probing" message in
the console log, both "this message simply flowed off" and "syzbot is not
hitting this path" will be possible. Therefore, while we are at it, let's
also prepare for concurrent wait_for_device_probe() calls by replacing
wake_up() with wake_up_all().
[1] https://syzkaller.appspot.com/bug?id=25c833f1983c9c1d512f4ff860dd0d7f5a2e2c…
Reported-by: syzbot <syzbot+805f5f6ae37411f15b64(a)syzkaller.appspotmail.com>
Fixes: 7c35e699c88bd607 ("driver core: Print device when resources present in really_probe()")
Cc: Geert Uytterhoeven <geert+renesas(a)glider.be>
Signed-off-by: Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
Cc: stable <stable(a)kernel.org>
Link: https://lore.kernel.org/r/20200713021254.3444-1-penguin-kernel@I-love.SAKUR…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 2306b481109a..fce8e35b6367 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -276,7 +276,7 @@ static void deferred_probe_timeout_work_func(struct work_struct *work)
list_for_each_entry_safe(private, p, &deferred_probe_pending_list, deferred_probe)
dev_info(private->device, "deferred probe pending\n");
- wake_up(&probe_timeout_waitqueue);
+ wake_up_all(&probe_timeout_waitqueue);
}
static DECLARE_DELAYED_WORK(deferred_probe_timeout_work, deferred_probe_timeout_work_func);
@@ -498,7 +498,8 @@ static int really_probe(struct device *dev, struct device_driver *drv)
drv->bus->name, __func__, drv->name, dev_name(dev));
if (!list_empty(&dev->devres_head)) {
dev_crit(dev, "Resources present before probing\n");
- return -EBUSY;
+ ret = -EBUSY;
+ goto done;
}
re_probe:
@@ -627,7 +628,7 @@ static int really_probe(struct device *dev, struct device_driver *drv)
ret = 0;
done:
atomic_dec(&probe_count);
- wake_up(&probe_waitqueue);
+ wake_up_all(&probe_waitqueue);
return ret;
}
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From b292b50b0efcc7095d8bf15505fba6909bb35dce Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
Date: Mon, 13 Jul 2020 11:12:54 +0900
Subject: [PATCH] driver core: Fix probe_count imbalance in really_probe()
syzbot is reporting hung task in wait_for_device_probe() [1]. At least,
we always need to decrement probe_count if we incremented probe_count in
really_probe().
However, since I can't find "Resources present before probing" message in
the console log, both "this message simply flowed off" and "syzbot is not
hitting this path" will be possible. Therefore, while we are at it, let's
also prepare for concurrent wait_for_device_probe() calls by replacing
wake_up() with wake_up_all().
[1] https://syzkaller.appspot.com/bug?id=25c833f1983c9c1d512f4ff860dd0d7f5a2e2c…
Reported-by: syzbot <syzbot+805f5f6ae37411f15b64(a)syzkaller.appspotmail.com>
Fixes: 7c35e699c88bd607 ("driver core: Print device when resources present in really_probe()")
Cc: Geert Uytterhoeven <geert+renesas(a)glider.be>
Signed-off-by: Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
Cc: stable <stable(a)kernel.org>
Link: https://lore.kernel.org/r/20200713021254.3444-1-penguin-kernel@I-love.SAKUR…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 2306b481109a..fce8e35b6367 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -276,7 +276,7 @@ static void deferred_probe_timeout_work_func(struct work_struct *work)
list_for_each_entry_safe(private, p, &deferred_probe_pending_list, deferred_probe)
dev_info(private->device, "deferred probe pending\n");
- wake_up(&probe_timeout_waitqueue);
+ wake_up_all(&probe_timeout_waitqueue);
}
static DECLARE_DELAYED_WORK(deferred_probe_timeout_work, deferred_probe_timeout_work_func);
@@ -498,7 +498,8 @@ static int really_probe(struct device *dev, struct device_driver *drv)
drv->bus->name, __func__, drv->name, dev_name(dev));
if (!list_empty(&dev->devres_head)) {
dev_crit(dev, "Resources present before probing\n");
- return -EBUSY;
+ ret = -EBUSY;
+ goto done;
}
re_probe:
@@ -627,7 +628,7 @@ static int really_probe(struct device *dev, struct device_driver *drv)
ret = 0;
done:
atomic_dec(&probe_count);
- wake_up(&probe_waitqueue);
+ wake_up_all(&probe_waitqueue);
return ret;
}
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From b292b50b0efcc7095d8bf15505fba6909bb35dce Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
Date: Mon, 13 Jul 2020 11:12:54 +0900
Subject: [PATCH] driver core: Fix probe_count imbalance in really_probe()
syzbot is reporting hung task in wait_for_device_probe() [1]. At least,
we always need to decrement probe_count if we incremented probe_count in
really_probe().
However, since I can't find "Resources present before probing" message in
the console log, both "this message simply flowed off" and "syzbot is not
hitting this path" will be possible. Therefore, while we are at it, let's
also prepare for concurrent wait_for_device_probe() calls by replacing
wake_up() with wake_up_all().
[1] https://syzkaller.appspot.com/bug?id=25c833f1983c9c1d512f4ff860dd0d7f5a2e2c…
Reported-by: syzbot <syzbot+805f5f6ae37411f15b64(a)syzkaller.appspotmail.com>
Fixes: 7c35e699c88bd607 ("driver core: Print device when resources present in really_probe()")
Cc: Geert Uytterhoeven <geert+renesas(a)glider.be>
Signed-off-by: Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
Cc: stable <stable(a)kernel.org>
Link: https://lore.kernel.org/r/20200713021254.3444-1-penguin-kernel@I-love.SAKUR…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 2306b481109a..fce8e35b6367 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -276,7 +276,7 @@ static void deferred_probe_timeout_work_func(struct work_struct *work)
list_for_each_entry_safe(private, p, &deferred_probe_pending_list, deferred_probe)
dev_info(private->device, "deferred probe pending\n");
- wake_up(&probe_timeout_waitqueue);
+ wake_up_all(&probe_timeout_waitqueue);
}
static DECLARE_DELAYED_WORK(deferred_probe_timeout_work, deferred_probe_timeout_work_func);
@@ -498,7 +498,8 @@ static int really_probe(struct device *dev, struct device_driver *drv)
drv->bus->name, __func__, drv->name, dev_name(dev));
if (!list_empty(&dev->devres_head)) {
dev_crit(dev, "Resources present before probing\n");
- return -EBUSY;
+ ret = -EBUSY;
+ goto done;
}
re_probe:
@@ -627,7 +628,7 @@ static int really_probe(struct device *dev, struct device_driver *drv)
ret = 0;
done:
atomic_dec(&probe_count);
- wake_up(&probe_waitqueue);
+ wake_up_all(&probe_waitqueue);
return ret;
}
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 8a302808c60d441d9884cb00ea7f2b534f2e3ca5 Mon Sep 17 00:00:00 2001
From: John Allen <john.allen(a)amd.com>
Date: Mon, 22 Jun 2020 15:24:02 -0500
Subject: [PATCH] crypto: ccp - Fix use of merged scatterlists
Running the crypto manager self tests with
CONFIG_CRYPTO_MANAGER_EXTRA_TESTS may result in several types of errors
when using the ccp-crypto driver:
alg: skcipher: cbc-des3-ccp encryption failed on test vector 0; expected_error=0, actual_error=-5 ...
alg: skcipher: ctr-aes-ccp decryption overran dst buffer on test vector 0 ...
alg: ahash: sha224-ccp test failed (wrong result) on test vector ...
These errors are the result of improper processing of scatterlists mapped
for DMA.
Given a scatterlist in which entries are merged as part of mapping the
scatterlist for DMA, the DMA length of a merged entry will reflect the
combined length of the entries that were merged. The subsequent
scatterlist entry will contain DMA information for the scatterlist entry
after the last merged entry, but the non-DMA information will be that of
the first merged entry.
The ccp driver does not take this scatterlist merging into account. To
address this, add a second scatterlist pointer to track the current
position in the DMA mapped representation of the scatterlist. Both the DMA
representation and the original representation of the scatterlist must be
tracked as while most of the driver can use just the DMA representation,
scatterlist_map_and_copy() must use the original representation and
expects the scatterlist pointer to be accurate to the original
representation.
In order to properly walk the original scatterlist, the scatterlist must
be walked until the combined lengths of the entries seen is equal to the
DMA length of the current entry being processed in the DMA mapped
representation.
Fixes: 63b945091a070 ("crypto: ccp - CCP device driver and interface support")
Signed-off-by: John Allen <john.allen(a)amd.com>
Cc: stable(a)vger.kernel.org
Acked-by: Tom Lendacky <thomas.lendacky(a)amd.com>
Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
index 3f68262d9ab4..87a34d91fdf7 100644
--- a/drivers/crypto/ccp/ccp-dev.h
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -469,6 +469,7 @@ struct ccp_sg_workarea {
unsigned int sg_used;
struct scatterlist *dma_sg;
+ struct scatterlist *dma_sg_head;
struct device *dma_dev;
unsigned int dma_count;
enum dma_data_direction dma_dir;
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index d270aa792888..a06d20263efa 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -63,7 +63,7 @@ static u32 ccp_gen_jobid(struct ccp_device *ccp)
static void ccp_sg_free(struct ccp_sg_workarea *wa)
{
if (wa->dma_count)
- dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir);
+ dma_unmap_sg(wa->dma_dev, wa->dma_sg_head, wa->nents, wa->dma_dir);
wa->dma_count = 0;
}
@@ -92,6 +92,7 @@ static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
return 0;
wa->dma_sg = sg;
+ wa->dma_sg_head = sg;
wa->dma_dev = dev;
wa->dma_dir = dma_dir;
wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
@@ -104,14 +105,28 @@ static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
{
unsigned int nbytes = min_t(u64, len, wa->bytes_left);
+ unsigned int sg_combined_len = 0;
if (!wa->sg)
return;
wa->sg_used += nbytes;
wa->bytes_left -= nbytes;
- if (wa->sg_used == wa->sg->length) {
- wa->sg = sg_next(wa->sg);
+ if (wa->sg_used == sg_dma_len(wa->dma_sg)) {
+ /* Advance to the next DMA scatterlist entry */
+ wa->dma_sg = sg_next(wa->dma_sg);
+
+ /* In the case that the DMA mapped scatterlist has entries
+ * that have been merged, the non-DMA mapped scatterlist
+ * must be advanced multiple times for each merged entry.
+ * This ensures that the current non-DMA mapped entry
+ * corresponds to the current DMA mapped entry.
+ */
+ do {
+ sg_combined_len += wa->sg->length;
+ wa->sg = sg_next(wa->sg);
+ } while (wa->sg_used > sg_combined_len);
+
wa->sg_used = 0;
}
}
@@ -299,7 +314,7 @@ static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
/* Update the structures and generate the count */
buf_count = 0;
while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
- nbytes = min(sg_wa->sg->length - sg_wa->sg_used,
+ nbytes = min(sg_dma_len(sg_wa->dma_sg) - sg_wa->sg_used,
dm_wa->length - buf_count);
nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
@@ -331,11 +346,11 @@ static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
* and destination. The resulting len values will always be <= UINT_MAX
* because the dma length is an unsigned int.
*/
- sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used;
+ sg_src_len = sg_dma_len(src->sg_wa.dma_sg) - src->sg_wa.sg_used;
sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
if (dst) {
- sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used;
+ sg_dst_len = sg_dma_len(dst->sg_wa.dma_sg) - dst->sg_wa.sg_used;
sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
op_len = min(sg_src_len, sg_dst_len);
} else {
@@ -365,7 +380,7 @@ static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
/* Enough data in the sg element, but we need to
* adjust for any previously copied data
*/
- op->src.u.dma.address = sg_dma_address(src->sg_wa.sg);
+ op->src.u.dma.address = sg_dma_address(src->sg_wa.dma_sg);
op->src.u.dma.offset = src->sg_wa.sg_used;
op->src.u.dma.length = op_len & ~(block_size - 1);
@@ -386,7 +401,7 @@ static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
/* Enough room in the sg element, but we need to
* adjust for any previously used area
*/
- op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg);
+ op->dst.u.dma.address = sg_dma_address(dst->sg_wa.dma_sg);
op->dst.u.dma.offset = dst->sg_wa.sg_used;
op->dst.u.dma.length = op->src.u.dma.length;
}
@@ -2027,7 +2042,7 @@ ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
dst.sg_wa.sg_used = 0;
for (i = 1; i <= src.sg_wa.dma_count; i++) {
if (!dst.sg_wa.sg ||
- (dst.sg_wa.sg->length < src.sg_wa.sg->length)) {
+ (sg_dma_len(dst.sg_wa.sg) < sg_dma_len(src.sg_wa.sg))) {
ret = -EINVAL;
goto e_dst;
}
@@ -2053,8 +2068,8 @@ ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
goto e_dst;
}
- dst.sg_wa.sg_used += src.sg_wa.sg->length;
- if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) {
+ dst.sg_wa.sg_used += sg_dma_len(src.sg_wa.sg);
+ if (dst.sg_wa.sg_used == sg_dma_len(dst.sg_wa.sg)) {
dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
dst.sg_wa.sg_used = 0;
}
The patch below does not apply to the 5.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 783735f84fea6aad9b1e5931d6ea632796feaae3 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg(a)linux.intel.com>
Date: Thu, 2 Apr 2020 12:45:34 +0300
Subject: [PATCH] thunderbolt: Fix path indices used in USB3 tunnel discovery
The USB3 discovery used wrong indices when tunnel is discovered. It
should use TB_USB3_PATH_DOWN for path that flows downstream and
TB_USB3_PATH_UP when it flows upstream. This should not affect the
functionality but better to fix it.
Fixes: e6f818585713 ("thunderbolt: Add support for USB 3.x tunnels")
Signed-off-by: Mika Westerberg <mika.westerberg(a)linux.intel.com>
Cc: stable(a)vger.kernel.org # v5.6+
diff --git a/drivers/thunderbolt/tunnel.c b/drivers/thunderbolt/tunnel.c
index dbe90bcf4ad4..c144ca9b032c 100644
--- a/drivers/thunderbolt/tunnel.c
+++ b/drivers/thunderbolt/tunnel.c
@@ -913,21 +913,21 @@ struct tb_tunnel *tb_tunnel_discover_usb3(struct tb *tb, struct tb_port *down)
* case.
*/
path = tb_path_discover(down, TB_USB3_HOPID, NULL, -1,
- &tunnel->dst_port, "USB3 Up");
+ &tunnel->dst_port, "USB3 Down");
if (!path) {
/* Just disable the downstream port */
tb_usb3_port_enable(down, false);
goto err_free;
}
- tunnel->paths[TB_USB3_PATH_UP] = path;
- tb_usb3_init_path(tunnel->paths[TB_USB3_PATH_UP]);
+ tunnel->paths[TB_USB3_PATH_DOWN] = path;
+ tb_usb3_init_path(tunnel->paths[TB_USB3_PATH_DOWN]);
path = tb_path_discover(tunnel->dst_port, -1, down, TB_USB3_HOPID, NULL,
- "USB3 Down");
+ "USB3 Up");
if (!path)
goto err_deactivate;
- tunnel->paths[TB_USB3_PATH_DOWN] = path;
- tb_usb3_init_path(tunnel->paths[TB_USB3_PATH_DOWN]);
+ tunnel->paths[TB_USB3_PATH_UP] = path;
+ tb_usb3_init_path(tunnel->paths[TB_USB3_PATH_UP]);
/* Validate that the tunnel is complete */
if (!tb_port_is_usb3_up(tunnel->dst_port)) {
The patch below does not apply to the 5.8-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 783735f84fea6aad9b1e5931d6ea632796feaae3 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg(a)linux.intel.com>
Date: Thu, 2 Apr 2020 12:45:34 +0300
Subject: [PATCH] thunderbolt: Fix path indices used in USB3 tunnel discovery
The USB3 discovery used wrong indices when tunnel is discovered. It
should use TB_USB3_PATH_DOWN for path that flows downstream and
TB_USB3_PATH_UP when it flows upstream. This should not affect the
functionality but better to fix it.
Fixes: e6f818585713 ("thunderbolt: Add support for USB 3.x tunnels")
Signed-off-by: Mika Westerberg <mika.westerberg(a)linux.intel.com>
Cc: stable(a)vger.kernel.org # v5.6+
diff --git a/drivers/thunderbolt/tunnel.c b/drivers/thunderbolt/tunnel.c
index dbe90bcf4ad4..c144ca9b032c 100644
--- a/drivers/thunderbolt/tunnel.c
+++ b/drivers/thunderbolt/tunnel.c
@@ -913,21 +913,21 @@ struct tb_tunnel *tb_tunnel_discover_usb3(struct tb *tb, struct tb_port *down)
* case.
*/
path = tb_path_discover(down, TB_USB3_HOPID, NULL, -1,
- &tunnel->dst_port, "USB3 Up");
+ &tunnel->dst_port, "USB3 Down");
if (!path) {
/* Just disable the downstream port */
tb_usb3_port_enable(down, false);
goto err_free;
}
- tunnel->paths[TB_USB3_PATH_UP] = path;
- tb_usb3_init_path(tunnel->paths[TB_USB3_PATH_UP]);
+ tunnel->paths[TB_USB3_PATH_DOWN] = path;
+ tb_usb3_init_path(tunnel->paths[TB_USB3_PATH_DOWN]);
path = tb_path_discover(tunnel->dst_port, -1, down, TB_USB3_HOPID, NULL,
- "USB3 Down");
+ "USB3 Up");
if (!path)
goto err_deactivate;
- tunnel->paths[TB_USB3_PATH_DOWN] = path;
- tb_usb3_init_path(tunnel->paths[TB_USB3_PATH_DOWN]);
+ tunnel->paths[TB_USB3_PATH_UP] = path;
+ tb_usb3_init_path(tunnel->paths[TB_USB3_PATH_UP]);
/* Validate that the tunnel is complete */
if (!tb_port_is_usb3_up(tunnel->dst_port)) {
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From c7614ff9b73a1e6fb2b1b51396da132ed22fecdb Mon Sep 17 00:00:00 2001
From: Brant Merryman <brant.merryman(a)silabs.com>
Date: Fri, 26 Jun 2020 04:24:20 +0000
Subject: [PATCH] USB: serial: cp210x: re-enable auto-RTS on open
CP210x hardware disables auto-RTS but leaves auto-CTS when in hardware
flow control mode and UART on cp210x hardware is disabled. When
re-opening the port, if auto-CTS is enabled on the cp210x, then auto-RTS
must be re-enabled in the driver.
Signed-off-by: Brant Merryman <brant.merryman(a)silabs.com>
Co-developed-by: Phu Luu <phu.luu(a)silabs.com>
Signed-off-by: Phu Luu <phu.luu(a)silabs.com>
Link: https://lore.kernel.org/r/ECCF8E73-91F3-4080-BE17-1714BC8818FB@silabs.com
[ johan: fix up tags and problem description ]
Fixes: 39a66b8d22a3 ("[PATCH] USB: CP2101 Add support for flow control")
Cc: stable <stable(a)vger.kernel.org> # 2.6.12
Signed-off-by: Johan Hovold <johan(a)kernel.org>
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index bcceb4ad8be0..a90801ef0055 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -917,6 +917,7 @@ static void cp210x_get_termios_port(struct usb_serial_port *port,
u32 baud;
u16 bits;
u32 ctl_hs;
+ u32 flow_repl;
cp210x_read_u32_reg(port, CP210X_GET_BAUDRATE, &baud);
@@ -1017,6 +1018,22 @@ static void cp210x_get_termios_port(struct usb_serial_port *port,
ctl_hs = le32_to_cpu(flow_ctl.ulControlHandshake);
if (ctl_hs & CP210X_SERIAL_CTS_HANDSHAKE) {
dev_dbg(dev, "%s - flow control = CRTSCTS\n", __func__);
+ /*
+ * When the port is closed, the CP210x hardware disables
+ * auto-RTS and RTS is deasserted but it leaves auto-CTS when
+ * in hardware flow control mode. When re-opening the port, if
+ * auto-CTS is enabled on the cp210x, then auto-RTS must be
+ * re-enabled in the driver.
+ */
+ flow_repl = le32_to_cpu(flow_ctl.ulFlowReplace);
+ flow_repl &= ~CP210X_SERIAL_RTS_MASK;
+ flow_repl |= CP210X_SERIAL_RTS_SHIFT(CP210X_SERIAL_RTS_FLOW_CTL);
+ flow_ctl.ulFlowReplace = cpu_to_le32(flow_repl);
+ cp210x_write_reg_block(port,
+ CP210X_SET_FLOW,
+ &flow_ctl,
+ sizeof(flow_ctl));
+
cflag |= CRTSCTS;
} else {
dev_dbg(dev, "%s - flow control = NONE\n", __func__);
hi Greg,
Please apply upstream 8ab49526b53d to all stable kernels containing
07e1d88adaae, which should be v4.20 and higher stable kernels.
Thanks,
Ingo
----- Forwarded message from Eric Dumazet <edumazet(a)google.com> -----
Date: Sat, 15 Aug 2020 10:38:58 -0700
From: Eric Dumazet <edumazet(a)google.com>
To: Ingo Molnar <mingo(a)kernel.org>
Cc: linux-kernel <linux-kernel(a)vger.kernel.org>, Eric Dumazet <eric.dumazet(a)gmail.com>, Jann Horn <jannh(a)google.com>, syzbot <syzkaller(a)googlegroups.com>, Andy Lutomirski <luto(a)kernel.org>, "Chang S . Bae" <chang.seok.bae(a)intel.com>, Andy Lutomirski <luto(a)amacapital.net>,
Borislav Petkov <bp(a)alien8.de>, Brian Gerst <brgerst(a)gmail.com>, Dave Hansen <dave.hansen(a)linux.intel.com>, Denys Vlasenko <dvlasenk(a)redhat.com>, "H . Peter Anvin" <hpa(a)zytor.com>, Linus Torvalds <torvalds(a)linux-foundation.org>, Markus T Metzger
<markus.t.metzger(a)intel.com>, Peter Zijlstra <peterz(a)infradead.org>, Ravi Shankar <ravi.v.shankar(a)intel.com>, Rik van Riel <riel(a)surriel.com>, Thomas Gleixner <tglx(a)linutronix.de>
Subject: Re: [PATCH] x86/fsgsbase/64: Fix NULL deref in 86_fsgsbase_read_task
On Sat, Aug 15, 2020 at 4:48 AM Ingo Molnar <mingo(a)kernel.org> wrote:
>
>
> * Eric Dumazet <edumazet(a)google.com> wrote:
>
> > syzbot found its way in 86_fsgsbase_read_task() [1]
> >
> > Fix is to make sure ldt pointer is not NULL.
>
> Thanks for this fix. Linus has picked it up (inclusive the typos to
> the x86_fsgsbase_read_task() function name ;-), it's now upstream
> under:
>
> 8ab49526b53d: ("x86/fsgsbase/64: Fix NULL deref in 86_fsgsbase_read_task")
>
> By the fixes tag it looks like this should probably be backported all
> the way back to ~v4.20 or so?
This is absolutely right, sorry about the lack of a stable tag.
Most of my patches usually land into David Miller trees, where the
stable tag is not welcomed.
We use Fixes: tags to convey the exact information needed for stable backports.
Thanks.
----- End forwarded message -----
The original problem was from nvme-over-tcp code, who mistakenly uses
kernel_sendpage() to send pages allocated by __get_free_pages() without
__GFP_COMP flag. Such pages don't have refcount (page_count is 0) on
tail pages, sending them by kernel_sendpage() may trigger a kernel panic
from a corrupted kernel heap, because these pages are incorrectly freed
in network stack as page_count 0 pages.
This patch introduces a helper sendpage_ok(), it returns true if the
checking page,
- is not slab page: PageSlab(page) is false.
- has page refcount: page_count(page) is not zero
All drivers who want to send page to remote end by kernel_sendpage()
may use this helper to check whether the page is OK. If the helper does
not return true, the driver should try other non sendpage method (e.g.
sock_no_sendpage()) to handle the page.
Signed-off-by: Coly Li <colyli(a)suse.de>
Cc: Chaitanya Kulkarni <chaitanya.kulkarni(a)wdc.com>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Hannes Reinecke <hare(a)suse.de>
Cc: Jan Kara <jack(a)suse.com>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Mikhail Skorzhinskii <mskorzhinskiy(a)solarflare.com>
Cc: Philipp Reisner <philipp.reisner(a)linbit.com>
Cc: Sagi Grimberg <sagi(a)grimberg.me>
Cc: Vlastimil Babka <vbabka(a)suse.com>
Cc: stable(a)vger.kernel.org
---
include/linux/net.h | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/include/linux/net.h b/include/linux/net.h
index d48ff1180879..97e8f1a8a427 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -286,6 +286,21 @@ do { \
#define net_get_random_once_wait(buf, nbytes) \
get_random_once_wait((buf), (nbytes))
+/*
+ * E.g. XFS meta- & log-data is in slab pages, or bcache meta
+ * data pages, or other high order pages allocated by
+ * __get_free_pages() without __GFP_COMP, which have a page_count
+ * of 0 and/or have PageSlab() set. We cannot use send_page for
+ * those, as that does get_page(); put_page(); and would cause
+ * either a VM_BUG directly, or __page_cache_release a page that
+ * would actually still be referenced by someone, leading to some
+ * obscure delayed Oops somewhere else.
+ */
+static inline bool sendpage_ok(struct page *page)
+{
+ return (!PageSlab(page) && page_count(page) >= 1);
+}
+
int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
size_t num, size_t len);
int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
--
2.26.2
From: Coly Li <colyli(a)suse.de>
The original problem was from nvme-over-tcp code, who mistakenly uses
kernel_sendpage() to send pages allocated by __get_free_pages() without
__GFP_COMP flag. Such pages don't have refcount (page_count is 0) on
tail pages, sending them by kernel_sendpage() may trigger a kernel panic
from a corrupted kernel heap, because these pages are incorrectly freed
in network stack as page_count 0 pages.
This patch introduces a helper sendpage_ok(), it returns true if the
checking page,
- is not slab page: PageSlab(page) is false.
- has page refcount: page_count(page) is not zero
All drivers who want to send page to remote end by kernel_sendpage()
may use this helper to check whether the page is OK. If the helper does
not return true, the driver should try other non sendpage method (e.g.
sock_no_sendpage()) to handle the page.
Signed-off-by: Coly Li <colyli(a)suse.de>
Cc: Chaitanya Kulkarni <chaitanya.kulkarni(a)wdc.com>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Hannes Reinecke <hare(a)suse.de>
Cc: Jan Kara <jack(a)suse.com>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Mikhail Skorzhinskii <mskorzhinskiy(a)solarflare.com>
Cc: Philipp Reisner <philipp.reisner(a)linbit.com>
Cc: Sagi Grimberg <sagi(a)grimberg.me>
Cc: Vlastimil Babka <vbabka(a)suse.com>
Cc: stable(a)vger.kernel.org
---
Changelog:
v4, change sendpage_ok() as an inline helper, and post it as
separate patch.
v3, introduce a more common sendpage_ok()
v2, fix typo in patch subject
v1, the initial version.
include/linux/net.h | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/include/linux/net.h b/include/linux/net.h
index d48ff1180879..97e8f1a8a427 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -286,6 +286,21 @@ do { \
#define net_get_random_once_wait(buf, nbytes) \
get_random_once_wait((buf), (nbytes))
+/*
+ * E.g. XFS meta- & log-data is in slab pages, or bcache meta
+ * data pages, or other high order pages allocated by
+ * __get_free_pages() without __GFP_COMP, which have a page_count
+ * of 0 and/or have PageSlab() set. We cannot use send_page for
+ * those, as that does get_page(); put_page(); and would cause
+ * either a VM_BUG directly, or __page_cache_release a page that
+ * would actually still be referenced by someone, leading to some
+ * obscure delayed Oops somewhere else.
+ */
+static inline bool sendpage_ok(struct page *page)
+{
+ return (!PageSlab(page) && page_count(page) >= 1);
+}
+
int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
size_t num, size_t len);
int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
--
2.26.2
On Tigerlake, we are seeing a repeat of commit d8f505311717 ("drm/i915/icl:
Forcibly evict stale csb entries") where, presumably, due to a missing
Global Observation Point synchronisation, the write pointer of the CSB
ringbuffer is updated _prior_ to the contents of the ringbuffer. That is
we see the GPU report more context-switch entries for us to parse, but
those entries have not been written, leading us to process stale events,
and eventually report a hung GPU.
However, this effect appears to be much more severe than we previously
saw on Icelake (though it might be best if we try the same approach
there as well and measure), and Bruce suggested the good idea of resetting
the CSB entry after use so that we can detect when it has been updated by
the GPU. By instrumenting how long that may be, we can set a reliable
upper bound for how long we should wait for:
513 late, avg of 61 retries (590 ns), max of 1061 retries (10099 ns)
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2045
References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")
Suggested-by: Bruce Chang <yu.bruce.chang(a)intel.com>
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Bruce Chang <yu.bruce.chang(a)intel.com>
Cc: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Cc: stable(a)vger.kernel.org # v5.4
---
drivers/gpu/drm/i915/gt/intel_lrc.c | 21 ++++++++++++++++++---
1 file changed, 18 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index db982fc0f0bc..3b8161c6b601 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2498,9 +2498,22 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
*/
static inline bool gen12_csb_parse(const u64 *csb)
{
- u64 entry = READ_ONCE(*csb);
- bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
- bool new_queue =
+ bool ctx_away_valid;
+ bool new_queue;
+ u64 entry;
+
+ /* XXX HSD */
+ entry = READ_ONCE(*csb);
+ if (unlikely(entry == -1)) {
+ preempt_disable();
+ if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
+ GEM_WARN_ON("50us CSB timeout");
+ preempt_enable();
+ }
+ WRITE_ONCE(*(u64 *)csb, -1);
+
+ ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
+ new_queue =
lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
/*
@@ -3995,6 +4008,8 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
WRITE_ONCE(*execlists->csb_write, reset_value);
wmb(); /* Make sure this is visible to HW (paranoia?) */
+ /* Check that the GPU does indeed update the CSB entries! */
+ memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64));
invalidate_csb_entries(&execlists->csb_status[0],
&execlists->csb_status[reset_value]);
--
2.20.1
From: Baoquan He <bhe(a)redhat.com>
Subject: Revert "mm/vmstat.c: do not show lowmem reserve protection information of empty zone"
This reverts commit 26e7deadaae175.
Sonny reported that one of their tests started failing on the latest
kernel on their Chrome OS platform. The root cause is that the above
commit removed the protection line of empty zone, while the parser used in
the test relies on the protection line to mark the end of each zone.
Let's revert it to avoid breaking userspace testing or applications.
Link: http://lkml.kernel.org/r/20200811075412.12872-1-bhe@redhat.com
Fixes: 26e7deadaae175 ("mm/vmstat.c: do not show lowmem reserve protection information of empty zone)"
Signed-off-by: Baoquan He <bhe(a)redhat.com>
Reported-by: Sonny Rao <sonnyrao(a)chromium.org>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Acked-by: David Rientjes <rientjes(a)google.com>
Cc: <stable(a)vger.kernel.org> [5.8.x]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/vmstat.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
--- a/mm/vmstat.c~revert-mm-vmstatc-do-not-show-lowmem-reserve-protection-information-of-empty-zone
+++ a/mm/vmstat.c
@@ -1642,12 +1642,6 @@ static void zoneinfo_show_print(struct s
zone->present_pages,
zone_managed_pages(zone));
- /* If unpopulated, no other information is useful */
- if (!populated_zone(zone)) {
- seq_putc(m, '\n');
- return;
- }
-
seq_printf(m,
"\n protection: (%ld",
zone->lowmem_reserve[0]);
@@ -1655,6 +1649,12 @@ static void zoneinfo_show_print(struct s
seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
seq_putc(m, ')');
+ /* If unpopulated, no other information is useful */
+ if (!populated_zone(zone)) {
+ seq_putc(m, '\n');
+ return;
+ }
+
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
seq_printf(m, "\n %-12s %lu", zone_stat_name(i),
zone_page_state(zone, i));
_
The patch titled
Subject: khugepaged: adjust VM_BUG_ON_MM() in __khugepaged_enter()
has been added to the -mm tree. Its filename is
khugepaged-adjust-vm_bug_on_mm-in-__khugepaged_enter.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/khugepaged-adjust-vm_bug_on_mm-in-…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/khugepaged-adjust-vm_bug_on_mm-in-…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Hugh Dickins <hughd(a)google.com>
Subject: khugepaged: adjust VM_BUG_ON_MM() in __khugepaged_enter()
syzbot crashes on the VM_BUG_ON_MM(khugepaged_test_exit(mm), mm) in
__khugepaged_enter(): yes, when one thread is about to dump core, has set
core_state, and is waiting for others, another might do something calling
__khugepaged_enter(), which now crashes because I lumped the core_state
test (known as "mmget_still_valid") into khugepaged_test_exit(). I still
think it's best to lump them together, so just in this exceptional case,
check mm->mm_users directly instead of khugepaged_test_exit().
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008141503370.18085@eggly.anvils
Fixes: bbe98f9cadff ("khugepaged: khugepaged_test_exit() check mmget_still_valid()")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Reported-by: syzbot <syzkaller(a)googlegroups.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov(a)linux.intel.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Song Liu <songliubraving(a)fb.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Eric Dumazet <edumazet(a)google.com>
Cc: <stable(a)vger.kernel.org> [4.8+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/khugepaged.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/khugepaged.c~khugepaged-adjust-vm_bug_on_mm-in-__khugepaged_enter
+++ a/mm/khugepaged.c
@@ -466,7 +466,7 @@ int __khugepaged_enter(struct mm_struct
return -ENOMEM;
/* __khugepaged_exit() must not run from under us */
- VM_BUG_ON_MM(khugepaged_test_exit(mm), mm);
+ VM_BUG_ON_MM(atomic_read(&mm->mm_users) == 0, mm);
if (unlikely(test_and_set_bit(MMF_VM_HUGEPAGE, &mm->flags))) {
free_mm_slot(mm_slot);
return 0;
_
Patches currently in -mm which might be from hughd(a)google.com are
dma-debug-fix-debug_dma_assert_idle-use-rcu_read_lock.patch
khugepaged-adjust-vm_bug_on_mm-in-__khugepaged_enter.patch
Hi,
Can you queue this up for 5.4 as well? Thanks!
-------- Forwarded Message --------
Subject: [PATCH] fs/io_uring.c: Fix uninitialized variable is referenced in io_submit_sqe
Date: Wed, 12 Aug 2020 23:56:44 -0700
From: Liu Yong <pkfxxxing(a)gmail.com>
To: Jens Axboe <axboe(a)kernel.dk>
CC: linux-block(a)vger.kernel.org, linux-kernel(a)vger.kernel.org, linux-fsdevel(a)vger.kernel.org
the commit <a4d61e66ee4a> ("<io_uring: prevent re-read of sqe->opcode>")
caused another vulnerability. After io_get_req(), the sqe_submit struct
in req is not initialized, but the following code defaults that
req->submit.opcode is available.
Signed-off-by: Liu Yong <pkfxxxing(a)gmail.com>
---
fs/io_uring.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index be3d595a607f..c1aaee061dae 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2559,6 +2559,7 @@ static void io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
goto err;
}
+ memcpy(&req->submit, s, sizeof(*s));
ret = io_req_set_file(ctx, s, state, req);
if (unlikely(ret)) {
err_req:
--
2.17.1
Add skcd->no_refcnt check which is missed when backporting
ad0f75e5f57c ("cgroup: fix cgroup_sk_alloc() for sk_clone_lock()").
This patch is needed in stable-4.9, stable-4.14 and stable-4.19.
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
kernel/cgroup/cgroup.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 6ae98c714edd..2a879d34bbe5 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -5957,6 +5957,8 @@ void cgroup_sk_clone(struct sock_cgroup_data *skcd)
{
/* Socket clone path */
if (skcd->val) {
+ if (skcd->no_refcnt)
+ return;
/*
* We might be cloning a socket which is left in an empty
* cgroup and the cgroup might have already been rmdir'd.
--
2.25.1
The v4.4 stable kernel lacks this bugfix:
commit 327868212381 ("make skb_copy_datagram_msg() et.al. preserve ->msg_iter on error").
As a result, the v4.4 kernel can deliver corrupt data to the application
when a corrupt UDP packet is closely followed by a valid UDP packet: the
same invocation of the recvmsg() syscall can deliver the corrupt packet's
UDP payload to the application with the UDP payload length and the
"from IP/Port" of the valid packet.
Details:
For a UDP packet longer than 76 bytes (see the v5.8-rc6 kernel's
include/linux/skbuff.h:3951), Linux delays the UDP checksum verification
until the application invokes the syscall recvmsg().
In the recvmsg() syscall handler, while Linux is copying the UDP payload
to the application's memory, it calculates the UDP checksum. If the
calculated checksum doesn't match the received checksum, Linux drops the
corrupt UDP packet, and then starts to process the next packet (if any),
and if the next packet is valid (i.e. the checksum is correct), Linux
will copy the valid UDP packet's payload to the application's receiver
buffer.
The bug is: before Linux starts to copy the valid UDP packet, the data
structure used to track how many more bytes should be copied to the
application memory is not reset to what it was when the application just
entered the kernel by the syscall! Consequently, only a small portion or
none of the valid packet's payload is copied to the application's
receive buffer, and later when the application exits from the kernel,
actually most of the application's receive buffer contains the payload
of the corrupt packet while recvmsg() returns the length of the UDP
payload of the valid packet.
For the mainline kernel, the bug was fixed in commit 327868212381,
but unluckily the bugfix is only backported to v4.9+. It turns out
backporting 327868212381 to v4.4 means that some supporting patches
must be backported first, so the overall changes seem too big, so the
alternative is performs the csum validation earlier and drops the
corrupt packets earlier.
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Signed-off-by: Dexuan Cui <decui(a)microsoft.com>
---
net/ipv4/udp.c | 3 +--
net/ipv6/udp.c | 6 ++----
2 files changed, 3 insertions(+), 6 deletions(-)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index bb30699..49ab587 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1589,8 +1589,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
}
- if (rcu_access_pointer(sk->sk_filter) &&
- udp_lib_checksum_complete(skb))
+ if (udp_lib_checksum_complete(skb))
goto csum_error;
if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 73f1112..2d6703d 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -686,10 +686,8 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
}
- if (rcu_access_pointer(sk->sk_filter)) {
- if (udp_lib_checksum_complete(skb))
- goto csum_error;
- }
+ if (udp_lib_checksum_complete(skb))
+ goto csum_error;
if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
UDP6_INC_STATS_BH(sock_net(sk),
--
1.8.3.1
From: Oleksij Rempel <o.rempel(a)pengutronix.de>
This patch adds check to ensure that the struct net_device::ml_priv is
allocated, as it is used later by the j1939 stack.
The allocation is done by all mainline CAN network drivers, but when using
bond or team devices this is not the case.
Bail out if no ml_priv is allocated.
Reported-by: syzbot+f03d384f3455d28833eb(a)syzkaller.appspotmail.com
Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
Cc: linux-stable <stable(a)vger.kernel.org> # >= v5.4
Signed-off-by: Oleksij Rempel <o.rempel(a)pengutronix.de>
Link: https://lore.kernel.org/r/20200807105200.26441-4-o.rempel@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de>
---
net/can/j1939/socket.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index ad973370de12..b93876c57fc4 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -467,6 +467,14 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
goto out_release_sock;
}
+ if (!ndev->ml_priv) {
+ netdev_warn_once(ndev,
+ "No CAN mid layer private allocated, please fix your driver and use alloc_candev()!\n");
+ dev_put(ndev);
+ ret = -ENODEV;
+ goto out_release_sock;
+ }
+
priv = j1939_netdev_start(ndev);
dev_put(ndev);
if (IS_ERR(priv)) {
--
2.28.0
From: Oleksij Rempel <o.rempel(a)pengutronix.de>
The current stack implementation do not support ECTS requests of not
aligned TP sized blocks.
If ECTS will request a block with size and offset spanning two TP
blocks, this will cause memcpy() to read beyond the queued skb (which
does only contain one TP sized block).
Sometimes KASAN will detect this read if the memory region beyond the
skb was previously allocated and freed. In other situations it will stay
undetected. The ETP transfer in any case will be corrupted.
This patch adds a sanity check to avoid this kind of read and abort the
session with error J1939_XTP_ABORT_ECTS_TOO_BIG.
Reported-by: syzbot+5322482fe520b02aea30(a)syzkaller.appspotmail.com
Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
Cc: linux-stable <stable(a)vger.kernel.org> # >= v5.4
Signed-off-by: Oleksij Rempel <o.rempel(a)pengutronix.de>
Link: https://lore.kernel.org/r/20200807105200.26441-3-o.rempel@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de>
---
net/can/j1939/transport.c | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index b135c5e2a86e..30957c9a8eb7 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -787,6 +787,18 @@ static int j1939_session_tx_dat(struct j1939_session *session)
if (len > 7)
len = 7;
+ if (offset + len > se_skb->len) {
+ netdev_err_once(priv->ndev,
+ "%s: 0x%p: requested data outside of queued buffer: offset %i, len %i, pkt.tx: %i\n",
+ __func__, session, skcb->offset, se_skb->len , session->pkt.tx);
+ return -EOVERFLOW;
+ }
+
+ if (!len) {
+ ret = -ENOBUFS;
+ break;
+ }
+
memcpy(&dat[1], &tpdat[offset], len);
ret = j1939_tp_tx_dat(session, dat, len + 1);
if (ret < 0) {
@@ -1120,6 +1132,9 @@ static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer)
* cleanup including propagation of the error to user space.
*/
break;
+ case -EOVERFLOW:
+ j1939_session_cancel(session, J1939_XTP_ABORT_ECTS_TOO_BIG);
+ break;
case 0:
session->tx_retry = 0;
break;
--
2.28.0
This patch is used to fix ext4 direct I/O read error when
the read size is not aligned with block size.
Then, I will use a test to explain the error.
(1) Make a file that is not aligned with block size:
$dd if=/dev/zero of=./test.jar bs=1000 count=3
(2) I wrote a source file named "direct_io_read_file.c" as following:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/file.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <string.h>
#define BUF_SIZE 1024
int main()
{
int fd;
int ret;
unsigned char *buf;
ret = posix_memalign((void **)&buf, 512, BUF_SIZE);
if (ret) {
perror("posix_memalign failed");
exit(1);
}
fd = open("./test.jar", O_RDONLY | O_DIRECT, 0755);
if (fd < 0){
perror("open ./test.jar failed");
exit(1);
}
do {
ret = read(fd, buf, BUF_SIZE);
printf("ret=%d\n",ret);
if (ret < 0) {
perror("write test.jar failed");
}
} while (ret > 0);
free(buf);
close(fd);
}
(3) Compile the source file:
$gcc direct_io_read_file.c -D_GNU_SOURCE
(4) Run the test program:
$./a.out
The result is as following:
ret=1024
ret=1024
ret=952
ret=-1
write test.jar failed: Invalid argument.
I have tested this program on XFS filesystem, XFS does not have
this problem, because XFS use iomap_dio_rw() to do direct I/O
read. And the comparing between read offset and file size is done
in iomap_dio_rw(), the code is as following:
if (pos < size) {
retval = filemap_write_and_wait_range(mapping, pos,
pos + iov_length(iov, nr_segs) - 1);
if (!retval) {
retval = mapping->a_ops->direct_IO(READ, iocb,
iov, pos, nr_segs);
}
...
}
...only when "pos < size", direct I/O can be done, or 0 will be return.
I have tested the fix patch on Ext4, it is up to the mustard of
EINVAL in man2(read) as following:
#include <unistd.h>
ssize_t read(int fd, void *buf, size_t count);
EINVAL
fd is attached to an object which is unsuitable for reading;
or the file was opened with the O_DIRECT flag, and either the
address specified in buf, the value specified in count, or the
current file offset is not suitably aligned.
So I think this patch can be applied to fix ext4 direct I/O error.
However Ext4 introduces direct I/O read using iomap infrastructure
on kernel 5.5, the patch is commit <b1b4705d54ab>
("ext4: introduce direct I/O read using iomap infrastructure"),
then Ext4 will be the same as XFS, they all use iomap_dio_rw() to do direct
I/O read. So this problem does not exist on kernel 5.5 for Ext4.
>From above description, we can see this problem exists on all the kernel
versions between kernel 3.14 and kernel 5.4. It will cause the Applications
to fail to read. For example, when the search service downloads a new full
index file, the search engine is loading the previous index file and is
processing the search request, it can not use buffer io that may squeeze
the previous index file in use from pagecache, so the serch service must
use direct I/O read.
Please apply this patch on these kernel versions, or please use the method
on kernel 5.5 to fix this problem.
Fixes: 9fe55eea7e4b ("Fix race when checking i_size on direct i/o read")
Reviewed-by: Jan Kara <jack(a)suse.cz>
Co-developed-by: Wang Long <wanglong19(a)meituan.com>
Signed-off-by: Wang Long <wanglong19(a)meituan.com>
Signed-off-by: Jiang Ying <jiangying8582(a)126.com>
---
fs/ext4/inode.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 516faa2..a66b0ac 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3821,6 +3821,11 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
struct inode *inode = mapping->host;
size_t count = iov_iter_count(iter);
ssize_t ret;
+ loff_t offset = iocb->ki_pos;
+ loff_t size = i_size_read(inode);
+
+ if (offset >= size)
+ return 0;
/*
* Shared inode_lock is enough for us - it protects against concurrent
--
1.8.3.1
The patch titled
Subject: bootconfig: fix off-by-one in xbc_node_compose_key_after()
has been removed from the -mm tree. Its filename was
bootconfig-fix-off-by-one-in-xbc_node_compose_key_after.patch
This patch was dropped because it was withdrawn
------------------------------------------------------
From: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
Subject: bootconfig: fix off-by-one in xbc_node_compose_key_after()
While reviewing some patches for bootconfig, I noticed the following
code in xbc_node_compose_key_after():
ret = snprintf(buf, size, "%s%s", xbc_node_get_data(node),
depth ? "." : "");
if (ret < 0)
return ret;
if (ret > size) {
size = 0;
} else {
size -= ret;
buf += ret;
}
But snprintf() returns the number of bytes that would be written, not
the number of bytes that are written (ignoring the nul terminator).
This means that if the number of non null bytes written were to equal
size, then the nul byte, which snprintf() always adds, will overwrite
that last byte.
ret = snprintf(buf, 5, "hello");
printf("buf = '%s'
", buf);
printf("ret = %d
", ret);
produces:
buf = 'hell'
ret = 5
The string was truncated without ret being greater than 5.
Test (ret >= size) for overwrite.
Link: http://lkml.kernel.org/r/20200813183050.029a6003@oasis.local.home
Fixes: 76db5a27a827c ("bootconfig: Add Extra Boot Config support")
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
lib/bootconfig.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/lib/bootconfig.c~bootconfig-fix-off-by-one-in-xbc_node_compose_key_after
+++ a/lib/bootconfig.c
@@ -248,7 +248,7 @@ int __init xbc_node_compose_key_after(st
depth ? "." : "");
if (ret < 0)
return ret;
- if (ret > size) {
+ if (ret >= size) {
size = 0;
} else {
size -= ret;
_
Patches currently in -mm which might be from rostedt(a)goodmis.org are
The patch titled
Subject: mm, page_alloc: fix core hung in free_pcppages_bulk()
has been added to the -mm tree. Its filename is
mm-page_alloc-fix-core-hung-in-free_pcppages_bulk.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/mm-page_alloc-fix-core-hung-in-fre…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/mm-page_alloc-fix-core-hung-in-fre…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Charan Teja Reddy <charante(a)codeaurora.org>
Subject: mm, page_alloc: fix core hung in free_pcppages_bulk()
The following race is observed with the repeated online, offline and a
delay between two successive online of memory blocks of movable zone.
P1 P2
Online the first memory block in
the movable zone. The pcp struct
values are initialized to default
values,i.e., pcp->high = 0 &
pcp->batch = 1.
Allocate the pages from the
movable zone.
Try to Online the second memory
block in the movable zone thus it
entered the online_pages() but yet
to call zone_pcp_update().
This process is entered into
the exit path thus it tries
to release the order-0 pages
to pcp lists through
free_unref_page_commit().
As pcp->high = 0, pcp->count = 1
proceed to call the function
free_pcppages_bulk().
Update the pcp values thus the
new pcp values are like, say,
pcp->high = 378, pcp->batch = 63.
Read the pcp's batch value using
READ_ONCE() and pass the same to
free_pcppages_bulk(), pcp values
passed here are, batch = 63,
count = 1.
Since num of pages in the pcp
lists are less than ->batch,
then it will stuck in
while(list_empty(list)) loop
with interrupts disabled thus
a core hung.
Avoid this by ensuring free_pcppages_bulk() is called with proper count of
pcp list pages.
The mentioned race is some what easily reproducible without [1] because
pcp's are not updated for the first memory block online and thus there is
a enough race window for P2 between alloc+free and pcp struct values
update through onlining of second memory block.
With [1], the race still exists but it is very narrow as we update the pcp
struct values for the first memory block online itself.
This is not limited to the movable zone, it could also happen in cases
with the normal zone (e.g., hotplug to a node that only has DMA memory, or
no other memory yet).
[1]: https://patchwork.kernel.org/patch/11696389/
Link: http://lkml.kernel.org/r/1597150703-19003-1-git-send-email-charante@codeaur…
Fixes: 5f8dcc21211a ("page-allocator: split per-cpu list into one-list-per-migrate-type")
Signed-off-by: Charan Teja Reddy <charante(a)codeaurora.org>
Acked-by: David Hildenbrand <david(a)redhat.com>
Acked-by: David Rientjes <rientjes(a)google.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Vinayak Menon <vinmenon(a)codeaurora.org>
Cc: <stable(a)vger.kernel.org> [2.6+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/page_alloc.c | 5 +++++
1 file changed, 5 insertions(+)
--- a/mm/page_alloc.c~mm-page_alloc-fix-core-hung-in-free_pcppages_bulk
+++ a/mm/page_alloc.c
@@ -1301,6 +1301,11 @@ static void free_pcppages_bulk(struct zo
struct page *page, *tmp;
LIST_HEAD(head);
+ /*
+ * Ensure proper count is passed which otherwise would stuck in the
+ * below while (list_empty(list)) loop.
+ */
+ count = min(pcp->count, count);
while (count) {
struct list_head *list;
_
Patches currently in -mm which might be from charante(a)codeaurora.org are
mm-page_alloc-fix-core-hung-in-free_pcppages_bulk.patch
The patch titled
Subject: bootconfig: fix off-by-one in xbc_node_compose_key_after()
has been added to the -mm tree. Its filename is
bootconfig-fix-off-by-one-in-xbc_node_compose_key_after.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/bootconfig-fix-off-by-one-in-xbc_n…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/bootconfig-fix-off-by-one-in-xbc_n…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
Subject: bootconfig: fix off-by-one in xbc_node_compose_key_after()
While reviewing some patches for bootconfig, I noticed the following
code in xbc_node_compose_key_after():
ret = snprintf(buf, size, "%s%s", xbc_node_get_data(node),
depth ? "." : "");
if (ret < 0)
return ret;
if (ret > size) {
size = 0;
} else {
size -= ret;
buf += ret;
}
But snprintf() returns the number of bytes that would be written, not
the number of bytes that are written (ignoring the nul terminator).
This means that if the number of non null bytes written were to equal
size, then the nul byte, which snprintf() always adds, will overwrite
that last byte.
ret = snprintf(buf, 5, "hello");
printf("buf = '%s'
", buf);
printf("ret = %d
", ret);
produces:
buf = 'hell'
ret = 5
The string was truncated without ret being greater than 5.
Test (ret >= size) for overwrite.
Link: http://lkml.kernel.org/r/20200813183050.029a6003@oasis.local.home
Fixes: 76db5a27a827c ("bootconfig: Add Extra Boot Config support")
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
lib/bootconfig.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/lib/bootconfig.c~bootconfig-fix-off-by-one-in-xbc_node_compose_key_after
+++ a/lib/bootconfig.c
@@ -248,7 +248,7 @@ int __init xbc_node_compose_key_after(st
depth ? "." : "");
if (ret < 0)
return ret;
- if (ret > size) {
+ if (ret >= size) {
size = 0;
} else {
size -= ret;
_
Patches currently in -mm which might be from rostedt(a)goodmis.org are
bootconfig-fix-off-by-one-in-xbc_node_compose_key_after.patch
From: Paul Hsieh <paul.hsieh(a)amd.com>
[Why]
Place the cursor in the center of screen between two pipes then
adjusting the viewport but cursour doesn't update cause DFPstate hang.
[How]
If viewport changed, update cursor as well.
Cc: stable(a)vger.kernel.org
Signed-off-by: Paul Hsieh <paul.hsieh(a)amd.com>
Reviewed-by: Aric Cyr <Aric.Cyr(a)amd.com>
Acked-by: Rodrigo Siqueira <Rodrigo.Siqueira(a)amd.com>
---
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
index 66180b4332f1..c8cfd3ba1c15 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
@@ -1457,8 +1457,8 @@ static void dcn20_update_dchubp_dpp(
/* Any updates are handled in dc interface, just need to apply existing for plane enable */
if ((pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.opp_changed ||
- pipe_ctx->update_flags.bits.scaler || pipe_ctx->update_flags.bits.viewport)
- && pipe_ctx->stream->cursor_attributes.address.quad_part != 0) {
+ pipe_ctx->update_flags.bits.scaler || viewport_changed == true) &&
+ pipe_ctx->stream->cursor_attributes.address.quad_part != 0) {
dc->hwss.set_cursor_position(pipe_ctx);
dc->hwss.set_cursor_attribute(pipe_ctx);
--
2.28.0
Add skcd->no_refcnt check which is missed when backporting
ad0f75e5f57c ("cgroup: fix cgroup_sk_alloc() for sk_clone_lock()").
This patch is needed in stable-4.9, stable-4.14 and stable-4.19.
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
kernel/cgroup/cgroup.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index a2fed8fbd2bd..ada060e628ce 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -5827,6 +5827,8 @@ void cgroup_sk_clone(struct sock_cgroup_data *skcd)
{
/* Socket clone path */
if (skcd->val) {
+ if (skcd->no_refcnt)
+ return;
/*
* We might be cloning a socket which is left in an empty
* cgroup and the cgroup might have already been rmdir'd.
--
2.25.1
Add skcd->no_refcnt check which is missed when backporting
ad0f75e5f57c ("cgroup: fix cgroup_sk_alloc() for sk_clone_lock()").
This patch is needed in stable-4.9, stable-4.14 and stable-4.19.
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
kernel/cgroup.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index f047c73189f3..684d02f343b4 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -6355,6 +6355,8 @@ void cgroup_sk_clone(struct sock_cgroup_data *skcd)
{
/* Socket clone path */
if (skcd->val) {
+ if (skcd->no_refcnt)
+ return;
/*
* We might be cloning a socket which is left in an empty
* cgroup and the cgroup might have already been rmdir'd.
--
2.25.1
Add skcd->no_refcnt check which is missed when backporting
ad0f75e5f57c ("cgroup: fix cgroup_sk_alloc() for sk_clone_lock()").
This patch is needed in stable-4.9, stable-4.14 and stable-4.19.
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
kernel/cgroup.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index f047c73189f3..684d02f343b4 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -6355,6 +6355,8 @@ void cgroup_sk_clone(struct sock_cgroup_data *skcd)
{
/* Socket clone path */
if (skcd->val) {
+ if (skcd->no_refcnt)
+ return;
/*
* We might be cloning a socket which is left in an empty
* cgroup and the cgroup might have already been rmdir'd.
--
2.25.1
The following commit has been merged into the irq/urgent branch of tip:
Commit-ID: f107cee94ba4d2c7357fde59a1d84346c73d4958
Gitweb: https://git.kernel.org/tip/f107cee94ba4d2c7357fde59a1d84346c73d4958
Author: Guenter Roeck <linux(a)roeck-us.net>
AuthorDate: Tue, 11 Aug 2020 11:00:12 -07:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Thu, 13 Aug 2020 09:35:59 +02:00
genirq: Unlock irq descriptor after errors
In irq_set_irqchip_state(), the irq descriptor is not unlocked after an
error is encountered. While that should never happen in practice, a buggy
driver may trigger it. This would result in a lockup, so fix it.
Fixes: 1d0326f352bb ("genirq: Check irq_data_get_irq_chip() return value before use")
Signed-off-by: Guenter Roeck <linux(a)roeck-us.net>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20200811180012.80269-1-linux@roeck-us.net
---
kernel/irq/manage.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index d55ba62..52ac539 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -2731,8 +2731,10 @@ int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
do {
chip = irq_data_get_irq_chip(data);
- if (WARN_ON_ONCE(!chip))
- return -ENODEV;
+ if (WARN_ON_ONCE(!chip)) {
+ err = -ENODEV;
+ goto out_unlock;
+ }
if (chip->irq_set_irqchip_state)
break;
#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
@@ -2745,6 +2747,7 @@ int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
if (data)
err = chip->irq_set_irqchip_state(data, which, val);
+out_unlock:
irq_put_desc_busunlock(desc, flags);
return err;
}
binfmt_flat loader uses the gap between text and data to store data
segment pointers for the libraries. Even in the absence of shared
libraries it stores at least one pointer to the executable's own data
segment. Text and data can go back to back in the flat binary image and
without offsetting data segment last few instructions in the text
segment may get corrupted by the data segment pointer.
Fix it by reverting commit a2357223c50a ("binfmt_flat: don't offset the
data start").
Cc: stable(a)vger.kernel.org
Fixes: a2357223c50a ("binfmt_flat: don't offset the data start")
Signed-off-by: Max Filippov <jcmvbkbc(a)gmail.com>
---
fs/binfmt_flat.c | 20 ++++++++++++--------
1 file changed, 12 insertions(+), 8 deletions(-)
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index f2f9086ebe98..b9c658e0548e 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -576,7 +576,7 @@ static int load_flat_file(struct linux_binprm *bprm,
goto err;
}
- len = data_len + extra;
+ len = data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
len = PAGE_ALIGN(len);
realdatastart = vm_mmap(NULL, 0, len,
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0);
@@ -590,7 +590,9 @@ static int load_flat_file(struct linux_binprm *bprm,
vm_munmap(textpos, text_len);
goto err;
}
- datapos = ALIGN(realdatastart, FLAT_DATA_ALIGN);
+ datapos = ALIGN(realdatastart +
+ MAX_SHARED_LIBS * sizeof(unsigned long),
+ FLAT_DATA_ALIGN);
pr_debug("Allocated data+bss+stack (%u bytes): %lx\n",
data_len + bss_len + stack_len, datapos);
@@ -620,7 +622,7 @@ static int load_flat_file(struct linux_binprm *bprm,
memp_size = len;
} else {
- len = text_len + data_len + extra;
+ len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(u32);
len = PAGE_ALIGN(len);
textpos = vm_mmap(NULL, 0, len,
PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
@@ -635,7 +637,9 @@ static int load_flat_file(struct linux_binprm *bprm,
}
realdatastart = textpos + ntohl(hdr->data_start);
- datapos = ALIGN(realdatastart, FLAT_DATA_ALIGN);
+ datapos = ALIGN(realdatastart +
+ MAX_SHARED_LIBS * sizeof(u32),
+ FLAT_DATA_ALIGN);
reloc = (__be32 __user *)
(datapos + (ntohl(hdr->reloc_start) - text_len));
@@ -652,9 +656,8 @@ static int load_flat_file(struct linux_binprm *bprm,
(text_len + full_data
- sizeof(struct flat_hdr)),
0);
- if (datapos != realdatastart)
- memmove((void *)datapos, (void *)realdatastart,
- full_data);
+ memmove((void *) datapos, (void *) realdatastart,
+ full_data);
#else
/*
* This is used on MMU systems mainly for testing.
@@ -710,7 +713,8 @@ static int load_flat_file(struct linux_binprm *bprm,
if (IS_ERR_VALUE(result)) {
ret = result;
pr_err("Unable to read code+data+bss, errno %d\n", ret);
- vm_munmap(textpos, text_len + data_len + extra);
+ vm_munmap(textpos, text_len + data_len + extra +
+ MAX_SHARED_LIBS * sizeof(u32));
goto err;
}
}
--
2.20.1
The patch titled
Subject: fs/minix: reject too-large maximum file size
has been removed from the -mm tree. Its filename was
fs-minix-reject-too-large-maximum-file-size.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Eric Biggers <ebiggers(a)google.com>
Subject: fs/minix: reject too-large maximum file size
If the minix filesystem tries to map a very large logical block number to
its on-disk location, block_to_path() can return offsets that are too
large, causing out-of-bounds memory accesses when accessing indirect index
blocks. This should be prevented by the check against the maximum file
size, but this doesn't work because the maximum file size is read directly
from the on-disk superblock and isn't validated itself.
Fix this by validating the maximum file size at mount time.
Link: http://lkml.kernel.org/r/20200628060846.682158-4-ebiggers@kernel.org
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: syzbot+c7d9ec7a1a7272dd71b3(a)syzkaller.appspotmail.com
Reported-by: syzbot+3b7b03a0c28948054fb5(a)syzkaller.appspotmail.com
Reported-by: syzbot+6e056ee473568865f3e6(a)syzkaller.appspotmail.com
Signed-off-by: Eric Biggers <ebiggers(a)google.com>
Cc: Alexander Viro <viro(a)zeniv.linux.org.uk>
Cc: Qiujun Huang <anenbupt(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/minix/inode.c | 22 ++++++++++++++++++++--
1 file changed, 20 insertions(+), 2 deletions(-)
--- a/fs/minix/inode.c~fs-minix-reject-too-large-maximum-file-size
+++ a/fs/minix/inode.c
@@ -150,6 +150,23 @@ static int minix_remount (struct super_b
return 0;
}
+static bool minix_check_superblock(struct minix_sb_info *sbi)
+{
+ if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0)
+ return false;
+
+ /*
+ * s_max_size must not exceed the block mapping limitation. This check
+ * is only needed for V1 filesystems, since V2/V3 support an extra level
+ * of indirect blocks which places the limit well above U32_MAX.
+ */
+ if (sbi->s_version == MINIX_V1 &&
+ sbi->s_max_size > (7 + 512 + 512*512) * BLOCK_SIZE)
+ return false;
+
+ return true;
+}
+
static int minix_fill_super(struct super_block *s, void *data, int silent)
{
struct buffer_head *bh;
@@ -228,11 +245,12 @@ static int minix_fill_super(struct super
} else
goto out_no_fs;
+ if (!minix_check_superblock(sbi))
+ goto out_illegal_sb;
+
/*
* Allocate the buffer map to keep the superblock small.
*/
- if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0)
- goto out_illegal_sb;
i = (sbi->s_imap_blocks + sbi->s_zmap_blocks) * sizeof(bh);
map = kzalloc(i, GFP_KERNEL);
if (!map)
_
Patches currently in -mm which might be from ebiggers(a)google.com are
The patch titled
Subject: fs/minix: don't allow getting deleted inodes
has been removed from the -mm tree. Its filename was
fs-minix-dont-allow-getting-deleted-inodes.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Eric Biggers <ebiggers(a)google.com>
Subject: fs/minix: don't allow getting deleted inodes
If an inode has no links, we need to mark it bad rather than allowing it
to be accessed. This avoids WARNINGs in inc_nlink() and drop_nlink() when
doing directory operations on a fuzzed filesystem.
Link: http://lkml.kernel.org/r/20200628060846.682158-3-ebiggers@kernel.org
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: syzbot+a9ac3de1b5de5fb10efc(a)syzkaller.appspotmail.com
Reported-by: syzbot+df958cf5688a96ad3287(a)syzkaller.appspotmail.com
Signed-off-by: Eric Biggers <ebiggers(a)google.com>
Cc: Alexander Viro <viro(a)zeniv.linux.org.uk>
Cc: Qiujun Huang <anenbupt(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/minix/inode.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
--- a/fs/minix/inode.c~fs-minix-dont-allow-getting-deleted-inodes
+++ a/fs/minix/inode.c
@@ -468,6 +468,13 @@ static struct inode *V1_minix_iget(struc
iget_failed(inode);
return ERR_PTR(-EIO);
}
+ if (raw_inode->i_nlinks == 0) {
+ printk("MINIX-fs: deleted inode referenced: %lu\n",
+ inode->i_ino);
+ brelse(bh);
+ iget_failed(inode);
+ return ERR_PTR(-ESTALE);
+ }
inode->i_mode = raw_inode->i_mode;
i_uid_write(inode, raw_inode->i_uid);
i_gid_write(inode, raw_inode->i_gid);
@@ -501,6 +508,13 @@ static struct inode *V2_minix_iget(struc
iget_failed(inode);
return ERR_PTR(-EIO);
}
+ if (raw_inode->i_nlinks == 0) {
+ printk("MINIX-fs: deleted inode referenced: %lu\n",
+ inode->i_ino);
+ brelse(bh);
+ iget_failed(inode);
+ return ERR_PTR(-ESTALE);
+ }
inode->i_mode = raw_inode->i_mode;
i_uid_write(inode, raw_inode->i_uid);
i_gid_write(inode, raw_inode->i_gid);
_
Patches currently in -mm which might be from ebiggers(a)google.com are
The patch titled
Subject: fs/minix: check return value of sb_getblk()
has been removed from the -mm tree. Its filename was
fs-minix-check-return-value-of-sb_getblk.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Eric Biggers <ebiggers(a)google.com>
Subject: fs/minix: check return value of sb_getblk()
Patch series "fs/minix: fix syzbot bugs and set s_maxbytes".
This series fixes all syzbot bugs in the minix filesystem:
KASAN: null-ptr-deref Write in get_block
KASAN: use-after-free Write in get_block
KASAN: use-after-free Read in get_block
WARNING in inc_nlink
KMSAN: uninit-value in get_block
WARNING in drop_nlink
It also fixes the minix filesystem to set s_maxbytes correctly, so that
userspace sees the correct behavior when exceeding the max file size.
This patch (of 6):
sb_getblk() can fail, so check its return value.
This fixes a NULL pointer dereference.
Originally from Qiujun Huang.
Link: http://lkml.kernel.org/r/20200628060846.682158-1-ebiggers@kernel.org
Link: http://lkml.kernel.org/r/20200628060846.682158-2-ebiggers@kernel.org
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Eric Biggers <ebiggers(a)google.com>
Reported-by: syzbot+4a88b2b9dc280f47baf4(a)syzkaller.appspotmail.com
Cc: Qiujun Huang <anenbupt(a)gmail.com>
Cc: Alexander Viro <viro(a)zeniv.linux.org.uk>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/minix/itree_common.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
--- a/fs/minix/itree_common.c~fs-minix-check-return-value-of-sb_getblk
+++ a/fs/minix/itree_common.c
@@ -75,6 +75,7 @@ static int alloc_branch(struct inode *in
int n = 0;
int i;
int parent = minix_new_block(inode);
+ int err = -ENOSPC;
branch[0].key = cpu_to_block(parent);
if (parent) for (n = 1; n < num; n++) {
@@ -85,6 +86,11 @@ static int alloc_branch(struct inode *in
break;
branch[n].key = cpu_to_block(nr);
bh = sb_getblk(inode->i_sb, parent);
+ if (!bh) {
+ minix_free_block(inode, nr);
+ err = -ENOMEM;
+ break;
+ }
lock_buffer(bh);
memset(bh->b_data, 0, bh->b_size);
branch[n].bh = bh;
@@ -103,7 +109,7 @@ static int alloc_branch(struct inode *in
bforget(branch[i].bh);
for (i = 0; i < n; i++)
minix_free_block(inode, block_to_cpu(branch[i].key));
- return -ENOSPC;
+ return err;
}
static inline int splice_branch(struct inode *inode,
_
Patches currently in -mm which might be from ebiggers(a)google.com are
The patch titled
Subject: cma: don't quit at first error when activating reserved areas
has been removed from the -mm tree. Its filename was
cma-dont-quit-at-first-error-when-activating-reserved-areas.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Mike Kravetz <mike.kravetz(a)oracle.com>
Subject: cma: don't quit at first error when activating reserved areas
The routine cma_init_reserved_areas is designed to activate all
reserved cma areas. It quits when it first encounters an error.
This can leave some areas in a state where they are reserved but
not activated. There is no feedback to code which performed the
reservation. Attempting to allocate memory from areas in such a
state will result in a BUG.
Modify cma_init_reserved_areas to always attempt to activate all
areas. The called routine, cma_activate_area is responsible for
leaving the area in a valid state. No one is making active use
of returned error codes, so change the routine to void.
How to reproduce: This example uses kernelcore, hugetlb and cma
as an easy way to reproduce. However, this is a more general cma
issue.
Two node x86 VM 16GB total, 8GB per node
Kernel command line parameters, kernelcore=4G hugetlb_cma=8G
Related boot time messages,
hugetlb_cma: reserve 8192 MiB, up to 4096 MiB per node
cma: Reserved 4096 MiB at 0x0000000100000000
hugetlb_cma: reserved 4096 MiB on node 0
cma: Reserved 4096 MiB at 0x0000000300000000
hugetlb_cma: reserved 4096 MiB on node 1
cma: CMA area hugetlb could not be activated
# echo 8 > /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
BUG: kernel NULL pointer dereference, address: 0000000000000000
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
PGD 0 P4D 0
Oops: 0000 [#1] SMP PTI
...
Call Trace:
bitmap_find_next_zero_area_off+0x51/0x90
cma_alloc+0x1a5/0x310
alloc_fresh_huge_page+0x78/0x1a0
alloc_pool_huge_page+0x6f/0xf0
set_max_huge_pages+0x10c/0x250
nr_hugepages_store_common+0x92/0x120
? __kmalloc+0x171/0x270
kernfs_fop_write+0xc1/0x1a0
vfs_write+0xc7/0x1f0
ksys_write+0x5f/0xe0
do_syscall_64+0x4d/0x90
entry_SYSCALL_64_after_hwframe+0x44/0xa9
Link: http://lkml.kernel.org/r/20200730163123.6451-1-mike.kravetz@oracle.com
Fixes: c64be2bb1c6e ("drivers: add Contiguous Memory Allocator")
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Reviewed-by: Roman Gushchin <guro(a)fb.com>
Acked-by: Barry Song <song.bao.hua(a)hisilicon.com>
Cc: Marek Szyprowski <m.szyprowski(a)samsung.com>
Cc: Michal Nazarewicz <mina86(a)mina86.com>
Cc: Kyungmin Park <kyungmin.park(a)samsung.com>
Cc: Joonsoo Kim <iamjoonsoo.kim(a)lge.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/cma.c | 23 +++++++++--------------
1 file changed, 9 insertions(+), 14 deletions(-)
--- a/mm/cma.c~cma-dont-quit-at-first-error-when-activating-reserved-areas
+++ a/mm/cma.c
@@ -93,17 +93,15 @@ static void cma_clear_bitmap(struct cma
mutex_unlock(&cma->lock);
}
-static int __init cma_activate_area(struct cma *cma)
+static void __init cma_activate_area(struct cma *cma)
{
unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
unsigned i = cma->count >> pageblock_order;
struct zone *zone;
cma->bitmap = bitmap_zalloc(cma_bitmap_maxno(cma), GFP_KERNEL);
- if (!cma->bitmap) {
- cma->count = 0;
- return -ENOMEM;
- }
+ if (!cma->bitmap)
+ goto out_error;
WARN_ON_ONCE(!pfn_valid(pfn));
zone = page_zone(pfn_to_page(pfn));
@@ -133,25 +131,22 @@ static int __init cma_activate_area(stru
spin_lock_init(&cma->mem_head_lock);
#endif
- return 0;
+ return;
not_in_zone:
- pr_err("CMA area %s could not be activated\n", cma->name);
bitmap_free(cma->bitmap);
+out_error:
cma->count = 0;
- return -EINVAL;
+ pr_err("CMA area %s could not be activated\n", cma->name);
+ return;
}
static int __init cma_init_reserved_areas(void)
{
int i;
- for (i = 0; i < cma_area_count; i++) {
- int ret = cma_activate_area(&cma_areas[i]);
-
- if (ret)
- return ret;
- }
+ for (i = 0; i < cma_area_count; i++)
+ cma_activate_area(&cma_areas[i]);
return 0;
}
_
Patches currently in -mm which might be from mike.kravetz(a)oracle.com are
The patch titled
Subject: hugetlbfs: remove call to huge_pte_alloc without i_mmap_rwsem
has been removed from the -mm tree. Its filename was
hugetlbfs-remove-call-to-huge_pte_alloc-without-i_mmap_rwsem.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Mike Kravetz <mike.kravetz(a)oracle.com>
Subject: hugetlbfs: remove call to huge_pte_alloc without i_mmap_rwsem
Commit c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing
synchronization") requires callers of huge_pte_alloc to hold i_mmap_rwsem
in at least read mode. This is because the explicit locking in
huge_pmd_share (called by huge_pte_alloc) was removed. When restructuring
the code, the call to huge_pte_alloc in the else block at the beginning of
hugetlb_fault was missed.
Unfortunately, that else clause is exercised when there is no page table
entry. This will likely lead to a call to huge_pmd_share. If
huge_pmd_share thinks pmd sharing is possible, it will traverse the
mapping tree (i_mmap) without holding i_mmap_rwsem. If someone else is
modifying the tree, bad things such as addressing exceptions or worse
could happen.
Simply remove the else clause. It should have been removed previously.
The code following the else will call huge_pte_alloc with the appropriate
locking.
To prevent this type of issue in the future, add routines to assert that
i_mmap_rwsem is held, and call these routines in huge pmd sharing
routines.
Link: http://lkml.kernel.org/r/e670f327-5cf9-1959-96e4-6dc7cc30d3d5@oracle.com
Fixes: c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization")
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Suggested-by: Matthew Wilcox <willy(a)infradead.org>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Naoya Horiguchi <n-horiguchi(a)ah.jp.nec.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar(a)linux.vnet.ibm.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: "Kirill A.Shutemov" <kirill.shutemov(a)linux.intel.com>
Cc: Davidlohr Bueso <dave(a)stgolabs.net>
Cc: Prakash Sangappa <prakash.sangappa(a)oracle.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/fs.h | 10 ++++++++++
include/linux/hugetlb.h | 8 +++++---
mm/hugetlb.c | 15 +++++++--------
mm/rmap.c | 2 +-
4 files changed, 23 insertions(+), 12 deletions(-)
--- a/include/linux/fs.h~hugetlbfs-remove-call-to-huge_pte_alloc-without-i_mmap_rwsem
+++ a/include/linux/fs.h
@@ -518,6 +518,16 @@ static inline void i_mmap_unlock_read(st
up_read(&mapping->i_mmap_rwsem);
}
+static inline void i_mmap_assert_locked(struct address_space *mapping)
+{
+ lockdep_assert_held(&mapping->i_mmap_rwsem);
+}
+
+static inline void i_mmap_assert_write_locked(struct address_space *mapping)
+{
+ lockdep_assert_held_write(&mapping->i_mmap_rwsem);
+}
+
/*
* Might pages of this file be mapped into userspace?
*/
--- a/include/linux/hugetlb.h~hugetlbfs-remove-call-to-huge_pte_alloc-without-i_mmap_rwsem
+++ a/include/linux/hugetlb.h
@@ -164,7 +164,8 @@ pte_t *huge_pte_alloc(struct mm_struct *
unsigned long addr, unsigned long sz);
pte_t *huge_pte_offset(struct mm_struct *mm,
unsigned long addr, unsigned long sz);
-int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
+int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long *addr, pte_t *ptep);
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end);
struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
@@ -203,8 +204,9 @@ static inline struct address_space *huge
return NULL;
}
-static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr,
- pte_t *ptep)
+static inline int huge_pmd_unshare(struct mm_struct *mm,
+ struct vm_area_struct *vma,
+ unsigned long *addr, pte_t *ptep)
{
return 0;
}
--- a/mm/hugetlb.c~hugetlbfs-remove-call-to-huge_pte_alloc-without-i_mmap_rwsem
+++ a/mm/hugetlb.c
@@ -3967,7 +3967,7 @@ void __unmap_hugepage_range(struct mmu_g
continue;
ptl = huge_pte_lock(h, mm, ptep);
- if (huge_pmd_unshare(mm, &address, ptep)) {
+ if (huge_pmd_unshare(mm, vma, &address, ptep)) {
spin_unlock(ptl);
/*
* We just unmapped a page of PMDs by clearing a PUD.
@@ -4554,10 +4554,6 @@ vm_fault_t hugetlb_fault(struct mm_struc
} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
return VM_FAULT_HWPOISON_LARGE |
VM_FAULT_SET_HINDEX(hstate_index(h));
- } else {
- ptep = huge_pte_alloc(mm, haddr, huge_page_size(h));
- if (!ptep)
- return VM_FAULT_OOM;
}
/*
@@ -5034,7 +5030,7 @@ unsigned long hugetlb_change_protection(
if (!ptep)
continue;
ptl = huge_pte_lock(h, mm, ptep);
- if (huge_pmd_unshare(mm, &address, ptep)) {
+ if (huge_pmd_unshare(mm, vma, &address, ptep)) {
pages++;
spin_unlock(ptl);
shared_pmd = true;
@@ -5415,12 +5411,14 @@ out:
* returns: 1 successfully unmapped a shared pte page
* 0 the underlying pte page is not shared, or it is the last user
*/
-int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long *addr, pte_t *ptep)
{
pgd_t *pgd = pgd_offset(mm, *addr);
p4d_t *p4d = p4d_offset(pgd, *addr);
pud_t *pud = pud_offset(p4d, *addr);
+ i_mmap_assert_write_locked(vma->vm_file->f_mapping);
BUG_ON(page_count(virt_to_page(ptep)) == 0);
if (page_count(virt_to_page(ptep)) == 1)
return 0;
@@ -5438,7 +5436,8 @@ pte_t *huge_pmd_share(struct mm_struct *
return NULL;
}
-int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long *addr, pte_t *ptep)
{
return 0;
}
--- a/mm/rmap.c~hugetlbfs-remove-call-to-huge_pte_alloc-without-i_mmap_rwsem
+++ a/mm/rmap.c
@@ -1469,7 +1469,7 @@ static bool try_to_unmap_one(struct page
* do this outside rmap routines.
*/
VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
- if (huge_pmd_unshare(mm, &address, pvmw.pte)) {
+ if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
/*
* huge_pmd_unshare unmapped an entire PMD
* page. There is no way of knowing exactly
_
Patches currently in -mm which might be from mike.kravetz(a)oracle.com are
From: "Steven Rostedt (VMware)" <rostedt(a)goodmis.org>
commit 7ef282e05132d56b6f6b71e3873f317664bea78b upstream
If a process has the trace_pipe open on a trace_array, the current tracer
for that trace array should not be changed. This was original enforced by a
global lock, but when instances were introduced, it was moved to the
current_trace. But this structure is shared by all instances, and a
trace_pipe is for a single instance. There's no reason that a process that
has trace_pipe open on one instance should prevent another instance from
changing its current tracer. Move the reference counter to the trace_array
instead.
This is marked as "Fixes" but is more of a clean up than a true fix.
Backport if you want, but its not critical.
Fixes: cf6ab6d9143b1 ("tracing: Add ref count to tracer for when they are being read by pipe")
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
---
This addresses an issue we've seen with users trying to change
current_tracer when they happen to have rasdaemon installed.
rasdaemon uses the trace_pipe interface at runtime, which therefore
blocks changing the current tracer. But of course, unless
you know about rasdaemon internals, it isn't exactly an obvious
failure mode.
kernel/trace/trace.c | 12 ++++++------
kernel/trace/trace.h | 2 +-
2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index bb62269724d5..6fc6da55b94e 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5887,7 +5887,7 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
}
/* If trace pipe files are being read, we can't change the tracer */
- if (tr->current_trace->ref) {
+ if (tr->trace_ref) {
ret = -EBUSY;
goto out;
}
@@ -6103,7 +6103,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
nonseekable_open(inode, filp);
- tr->current_trace->ref++;
+ tr->trace_ref++;
out:
mutex_unlock(&trace_types_lock);
return ret;
@@ -6122,7 +6122,7 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
mutex_lock(&trace_types_lock);
- tr->current_trace->ref--;
+ tr->trace_ref--;
if (iter->trace->pipe_close)
iter->trace->pipe_close(iter);
@@ -7424,7 +7424,7 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp)
filp->private_data = info;
- tr->current_trace->ref++;
+ tr->trace_ref++;
mutex_unlock(&trace_types_lock);
@@ -7525,7 +7525,7 @@ static int tracing_buffers_release(struct inode *inode, struct file *file)
mutex_lock(&trace_types_lock);
- iter->tr->current_trace->ref--;
+ iter->tr->trace_ref--;
__trace_array_put(iter->tr);
@@ -8733,7 +8733,7 @@ static int __remove_instance(struct trace_array *tr)
int i;
/* Reference counter for a newly created trace array = 1. */
- if (tr->ref > 1 || (tr->current_trace && tr->current_trace->ref))
+ if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
return -EBUSY;
list_del(&tr->list);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 13db4000af3f..f21607f87189 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -356,6 +356,7 @@ struct trace_array {
struct trace_event_file *trace_marker_file;
cpumask_var_t tracing_cpumask; /* only trace on set CPUs */
int ref;
+ int trace_ref;
#ifdef CONFIG_FUNCTION_TRACER
struct ftrace_ops *ops;
struct trace_pid_list __rcu *function_pids;
@@ -547,7 +548,6 @@ struct tracer {
struct tracer *next;
struct tracer_flags *flags;
int enabled;
- int ref;
bool print_max;
bool allow_instances;
#ifdef CONFIG_TRACER_MAX_TRACE
--
2.28.0