copy_process currently copies task_struct.posix_cputimers_work as-is. If a
timer interrupt arrives while handling clone and before dup_task_struct
completes then the child task will have:
1. posix_cputimers_work.scheduled = true
2. posix_cputimers_work.work queued.
copy_process clears task_struct.task_works, so (2) will have no effect and
posix_cpu_timers_work will never run (not to mention it doesn't make sense
for two tasks to share a common linked list).
Since posix_cpu_timers_work never runs, posix_cputimers_work.scheduled is
never cleared. Since scheduled is set, future timer interrupts will skip
scheduling work, with the ultimate result that the task will never receive
timer expirations.
Together, the complete flow is:
1. Task 1 calls clone(), enters kernel.
2. Timer interrupt fires, schedules task work on Task 1.
2a. task_struct.posix_cputimers_work.scheduled = true
2b. task_struct.posix_cputimers_work.work added to
task_struct.task_works.
3. dup_task_struct copies Task 1 to Task 2.
4. copy_process clears task_struct.task_works for Task 2.
5. Future timer interrupts on Task 2 see
task_struct.posix_cputimers_work.scheduled = true and skip scheduling
work.
Fix this by explicitly clearing contents of
task_struct.posix_cputimers_work in copy_process. This was never meant to
be shared or inherited across tasks in the first place.
Signed-off-by: Michael Pratt <mpratt(a)google.com>
Reported-by: Rhys Hiltner <rhys(a)justin.tv>
Fixes: 1fb497dd0030 ("posix-cpu-timers: Provide mechanisms to defer timer handling to task_work")
Cc: <stable(a)vger.kernel.org>
---
This issue was discovered while investigating a flaky test in the Go
language standard libary, https://golang.org/issue/49065. After our testing
VMs upgraded from 5.4 to 5.10 kernels, several profiling tests started
failing ~1% of the time with threads not receiving their expected profiling
signals.
Bisection of problem by Rhys blamed b6b178e38f40 ("Merge tag
'timers-core-2020-08-14' of
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip"). This merge commit
introduced the broken commit 1fb497dd0030 ("posix-cpu-timers: Provide
mechanisms to defer timer handling to task_work") and its child
0099808553ad ("x86: Select POSIX_CPU_TIMERS_TASK_WORK"), which enables the
new codepath.
The C program below also reproduces the problem. Build with `gcc repro.c
-lrt -pthread -O2`.
The program starts a CPU timer on the main thread, which then spawns child
threads that create their own CPU timers and verify that they receive timer
signals. At HEAD and 0099808553ad this program fails with ~3-15 / 20000
threads not receiving signals.
Prior to 0099808553ad and with this patch, the program reports no failures.
// SPDX-License-Identifier: GPL-2.0
#include <pthread.h>
#include <signal.h>
#include <stdint.h>
#include <stdio.h>
#include <sys/syscall.h>
#include <time.h>
#include <unistd.h>
__thread uint64_t signaled;
_Atomic int threads_bad;
void signal_handler(int signo, siginfo_t *siginfo, void *uctx)
{
signaled++;
}
int gettid(void)
{
return syscall(SYS_gettid);
}
timer_t setup_timer(void)
{
struct sigevent sev = {
.sigev_signo = SIGPROF,
.sigev_notify = SIGEV_THREAD_ID,
._sigev_un = {
._tid = gettid(),
},
};
struct itimerspec spec = {
.it_interval = {
.tv_nsec = 10*1000*1000, /* 10ms */
},
.it_value = {
.tv_nsec = 10*1000*1000, /* 10ms */
},
};
timer_t timerid;
int ret;
ret = timer_create(CLOCK_THREAD_CPUTIME_ID, &sev, &timerid);
if (ret != 0) {
perror("timer_create");
_exit(1);
}
ret = timer_settime(timerid, 0, &spec, NULL);
if (ret != 0) {
perror("timer_settime");
_exit(1);
}
return timerid;
}
uint64_t thread_cpu_ns(void)
{
struct timespec ts;
int ret;
ret = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts);
if (ret != 0) {
perror("clock_gettime");
_exit(1);
}
return ts.tv_nsec + 1000*1000*1000*ts.tv_sec;
}
void *thread(void *arg)
{
timer_t timerid;
uint64_t start;
int ret;
timerid = setup_timer();
start = thread_cpu_ns();
while (1) {
uint64_t now;
/* 50ms passed? */
now = thread_cpu_ns();
if (now - start > 50*1000*1000)
break;
/* Busy loop */
for (volatile int i = 0; i < 100000; i++)
;
}
/*
* 50ms passed; we should certainly have received some profiling
* signals.
*/
if (signaled == 0) {
printf("Thread %d received no profiling signals!\n", gettid());
threads_bad++;
}
ret = timer_delete(timerid);
if (ret != 0) {
perror("timer_delete");
_exit(1);
}
return NULL;
}
int main(void)
{
struct sigaction sa = {
.sa_sigaction = &signal_handler,
.sa_flags = SA_SIGINFO | SA_RESTART,
};
int ret;
sigset_t set;
timer_t timerid;
int bad;
int thread_count = 0;
ret = sigaction(SIGPROF, &sa, NULL);
if (ret != 0) {
perror("sigaction");
return 1;
}
sigemptyset(&set);
sigaddset(&set, SIGPROF);
ret = sigprocmask(SIG_UNBLOCK, &set, NULL);
if (ret != 0) {
perror("sigprocmask");
return 1;
}
timerid = setup_timer();
while (thread_count < 20000) {
pthread_t threads[10];
for (int i = 0; i < 10; i++) {
ret = pthread_create(&threads[i], NULL, &thread, NULL);
if (ret != 0) {
perror("pthread_create");
return 1;
}
thread_count++;
}
/* Busy loop */
for (volatile int i = 0; i < 100000; i++)
;
for (int i = 0; i < 10; i++) {
ret = pthread_join(threads[i], NULL);
if (ret != 0) {
perror("pthread_join");
return 1;
}
}
if (thread_count % 100 == 0)
printf("%d threads\n", thread_count);
}
bad = threads_bad;
printf("Bad threads %d / %d = %f%%\n", threads_bad, thread_count,
100*((double)threads_bad) / ((double)thread_count));
if (threads_bad > 0)
return 1;
return 0;
}
include/linux/posix-timers.h | 2 ++
kernel/fork.c | 1 +
kernel/time/posix-cpu-timers.c | 19 +++++++++++++++++--
3 files changed, 20 insertions(+), 2 deletions(-)
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 00fef0064355..5bbcd280bfd2 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -184,8 +184,10 @@ static inline void posix_cputimers_group_init(struct posix_cputimers *pct,
#endif
#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
+void clear_posix_cputimers_work(struct task_struct *p);
void posix_cputimers_init_work(void);
#else
+static inline void clear_posix_cputimers_work(struct task_struct *p) { }
static inline void posix_cputimers_init_work(void) { }
#endif
diff --git a/kernel/fork.c b/kernel/fork.c
index 38681ad44c76..b1551c074b74 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2280,6 +2280,7 @@ static __latent_entropy struct task_struct *copy_process(
p->pdeath_signal = 0;
INIT_LIST_HEAD(&p->thread_group);
p->task_works = NULL;
+ clear_posix_cputimers_work(p);
#ifdef CONFIG_KRETPROBES
p->kretprobe_instances.first = NULL;
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 643d412ac623..96b4e7810426 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -1158,14 +1158,29 @@ static void posix_cpu_timers_work(struct callback_head *work)
handle_posix_cpu_timers(current);
}
+/*
+ * Clear existing posix CPU timers task work.
+ */
+void clear_posix_cputimers_work(struct task_struct *p)
+{
+ /*
+ * A copied work entry from the old task is not meaningful, clear it.
+ * N.B. init_task_work will not do this.
+ */
+ memset(&p->posix_cputimers_work.work, 0,
+ sizeof(p->posix_cputimers_work.work));
+ init_task_work(&p->posix_cputimers_work.work,
+ posix_cpu_timers_work);
+ p->posix_cputimers_work.scheduled = false;
+}
+
/*
* Initialize posix CPU timers task work in init task. Out of line to
* keep the callback static and to avoid header recursion hell.
*/
void __init posix_cputimers_init_work(void)
{
- init_task_work(¤t->posix_cputimers_work.work,
- posix_cpu_timers_work);
+ clear_posix_cputimers_work(current);
}
/*
--
2.33.1.1089.g2158813163f-goog
If two processes mount same superblock, memory leak occurs:
CPU0 | CPU1
do_new_mount | do_new_mount
fs_set_subtype | fs_set_subtype
kstrdup |
| kstrdup
memrory leak |
The following reproducer triggers the problem:
1. shell command: mount -t ntfs /dev/sda1 /mnt &
2. c program: mount("/dev/sda1", "/mnt", "fuseblk", 0, "...")
with kmemleak report being along the lines of
unreferenced object 0xffff888235f1a5c0 (size 8):
comm "mount.ntfs", pid 2860, jiffies 4295757824 (age 43.423s)
hex dump (first 8 bytes):
00 a5 f1 35 82 88 ff ff ...5....
backtrace:
[<00000000656e30cc>] __kmalloc_track_caller+0x16e/0x430
[<000000008e591727>] kstrdup+0x3e/0x90
[<000000008430d12b>] do_mount.cold+0x7b/0xd9
[<0000000078d639cd>] ksys_mount+0xb2/0x150
[<000000006015988d>] __x64_sys_mount+0x29/0x40
[<00000000e0a7c118>] do_syscall_64+0xc1/0x1d0
[<00000000bcea7df5>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[<00000000803a4067>] 0xffffffffffffffff
Linus's tree already have refactoring patchset [1], one of them can fix this bug:
c30da2e981a7 ("fuse: convert to use the new mount API")
After refactoring, init super_block->s_subtype in fuse_fill_super.
Since we did not merge the refactoring patchset in this branch, I create this patch.
This patch fix this by adding a write lock while calling fs_set_subtype.
[1] https://patchwork.kernel.org/project/linux-fsdevel/patch/20190903113640.798…
Fixes: 79c0b2df79eb ("add filesystem subtype support")
Cc: David Howells <dhowells(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: ChenXiaoSong <chenxiaosong2(a)huawei.com>
---
v1: Can not mount sshfs ([PATCH linux-4.19.y] VFS: Fix fuseblk memory leak caused by mount concurrency)
v2: Use write lock while writing superblock ([PATCH 4.19,v2] VFS: Fix fuseblk memory leak caused by mount concurrency)
v3: Update commit message
fs/namespace.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/fs/namespace.c b/fs/namespace.c
index 2f3c6a0350a8..396ff1bcfdad 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2490,9 +2490,12 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
return -ENODEV;
mnt = vfs_kern_mount(type, sb_flags, name, data);
- if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
- !mnt->mnt_sb->s_subtype)
- mnt = fs_set_subtype(mnt, fstype);
+ if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE)) {
+ down_write(&mnt->mnt_sb->s_umount);
+ if (!mnt->mnt_sb->s_subtype)
+ mnt = fs_set_subtype(mnt, fstype);
+ up_write(&mnt->mnt_sb->s_umount);
+ }
put_filesystem(type);
if (IS_ERR(mnt))
--
2.31.1
This is the start of the stable review cycle for the 4.19.215 release.
There are 35 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Wed, 03 Nov 2021 11:41:55 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.19.215-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.19.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.19.215-rc2
Xin Long <lucien.xin(a)gmail.com>
sctp: add vtag check in sctp_sf_ootb
Xin Long <lucien.xin(a)gmail.com>
sctp: add vtag check in sctp_sf_do_8_5_1_E_sa
Xin Long <lucien.xin(a)gmail.com>
sctp: add vtag check in sctp_sf_violation
Xin Long <lucien.xin(a)gmail.com>
sctp: fix the processing for COOKIE_ECHO chunk
Xin Long <lucien.xin(a)gmail.com>
sctp: use init_tag from inithdr for ABORT chunk
Trevor Woerner <twoerner(a)gmail.com>
net: nxp: lpc_eth.c: avoid hang when bringing interface down
Yuiko Oshino <yuiko.oshino(a)microchip.com>
net: ethernet: microchip: lan743x: Fix dma allocation failure by using dma_set_mask_and_coherent
Yuiko Oshino <yuiko.oshino(a)microchip.com>
net: ethernet: microchip: lan743x: Fix driver crash when lan743x_pm_resume fails
Guenter Roeck <linux(a)roeck-us.net>
nios2: Make NIOS2_DTB_SOURCE_BOOL depend on !COMPILE_TEST
Michael Chan <michael.chan(a)broadcom.com>
net: Prevent infinite while loop in skb_tx_hash()
Pavel Skripkin <paskripkin(a)gmail.com>
net: batman-adv: fix error handling
Yang Yingliang <yangyingliang(a)huawei.com>
regmap: Fix possible double-free in regcache_rbtree_exit()
Clément Bœsch <u(a)pkh.me>
arm64: dts: allwinner: h5: NanoPI Neo 2: Fix ethernet node
Patrisious Haddad <phaddad(a)nvidia.com>
RDMA/mlx5: Set user priority for DCT
Johan Hovold <johan(a)kernel.org>
net: lan78xx: fix division by zero in send path
Haibo Chen <haibo.chen(a)nxp.com>
mmc: sdhci-esdhc-imx: clear the buffer_read_ready to reset standard tuning circuit
Shawn Guo <shawn.guo(a)linaro.org>
mmc: sdhci: Map more voltage level to SDHCI_POWER_330
Jaehoon Chung <jh80.chung(a)samsung.com>
mmc: dw_mmc: exynos: fix the finding clock sample value
Wenbin Mei <wenbin.mei(a)mediatek.com>
mmc: cqhci: clear HALT state after CQE enable
Johan Hovold <johan(a)kernel.org>
mmc: vub300: fix control-message timeouts
Eric Dumazet <edumazet(a)google.com>
ipv6: make exception cache less predictible
Eric Dumazet <edumazet(a)google.com>
ipv6: use siphash in rt6_exception_hash()
Eric Dumazet <edumazet(a)google.com>
ipv4: use siphash instead of Jenkins in fnhe_hashfun()
Pavel Skripkin <paskripkin(a)gmail.com>
Revert "net: mdiobus: Fix memory leak in __mdiobus_register"
Krzysztof Kozlowski <krzysztof.kozlowski(a)canonical.com>
nfc: port100: fix using -ERRNO as command type mask
Zheyu Ma <zheyuma97(a)gmail.com>
ata: sata_mv: Fix the error handling of mv_chip_id()
Wang Hai <wanghai38(a)huawei.com>
usbnet: fix error return code in usbnet_probe()
Oliver Neukum <oneukum(a)suse.com>
usbnet: sanity check for maxpacket
Nathan Chancellor <natechancellor(a)gmail.com>
ARM: 8819/1: Remove '-p' from LDFLAGS
Robin Murphy <robin.murphy(a)arm.com>
arm64: Avoid premature usercopy failure
Naveen N. Rao <naveen.n.rao(a)linux.vnet.ibm.com>
powerpc/bpf: Fix BPF_MOD when imm == 1
Arnd Bergmann <arnd(a)arndb.de>
ARM: 9141/1: only warn about XIP address when not compile testing
Arnd Bergmann <arnd(a)arndb.de>
ARM: 9139/1: kprobes: fix arch_init_kprobes() prototype
Arnd Bergmann <arnd(a)arndb.de>
ARM: 9134/1: remove duplicate memcpy() definition
Nick Desaulniers <ndesaulniers(a)google.com>
ARM: 9133/1: mm: proc-macros: ensure *_tlb_fns are 4B aligned
-------------
Diffstat:
Makefile | 4 +-
arch/arm/Makefile | 2 +-
arch/arm/boot/bootp/Makefile | 2 +-
arch/arm/boot/compressed/Makefile | 2 -
arch/arm/boot/compressed/decompress.c | 3 ++
arch/arm/kernel/vmlinux-xip.lds.S | 2 +-
arch/arm/mm/proc-macros.S | 1 +
arch/arm/probes/kprobes/core.c | 2 +-
.../boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts | 2 +-
arch/arm64/lib/copy_from_user.S | 13 +++--
arch/arm64/lib/copy_in_user.S | 20 +++++---
arch/arm64/lib/copy_to_user.S | 14 ++++--
arch/nios2/platform/Kconfig.platform | 1 +
arch/powerpc/net/bpf_jit_comp64.c | 10 +++-
drivers/ata/sata_mv.c | 4 +-
drivers/base/regmap/regcache-rbtree.c | 7 ++-
drivers/infiniband/hw/mlx5/qp.c | 2 +
drivers/mmc/host/cqhci.c | 3 ++
drivers/mmc/host/dw_mmc-exynos.c | 14 ++++++
drivers/mmc/host/sdhci-esdhc-imx.c | 17 +++++++
drivers/mmc/host/sdhci.c | 6 +++
drivers/mmc/host/vub300.c | 18 +++----
drivers/net/ethernet/microchip/lan743x_main.c | 22 +++++++++
drivers/net/ethernet/nxp/lpc_eth.c | 5 +-
drivers/net/phy/mdio_bus.c | 1 -
drivers/net/usb/lan78xx.c | 6 +++
drivers/net/usb/usbnet.c | 5 ++
drivers/nfc/port100.c | 4 +-
net/batman-adv/bridge_loop_avoidance.c | 8 +++-
net/batman-adv/main.c | 56 +++++++++++++++-------
net/batman-adv/network-coding.c | 4 +-
net/batman-adv/translation-table.c | 4 +-
net/core/dev.c | 6 +++
net/ipv4/route.c | 12 ++---
net/ipv6/route.c | 25 +++++++---
net/sctp/sm_statefuns.c | 30 ++++++++----
36 files changed, 250 insertions(+), 87 deletions(-)
This is the start of the stable review cycle for the 4.4.291 release.
There are 17 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Wed, 03 Nov 2021 08:24:20 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.4.291-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.4.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.4.291-rc1
Xin Long <lucien.xin(a)gmail.com>
sctp: add vtag check in sctp_sf_violation
Xin Long <lucien.xin(a)gmail.com>
sctp: use init_tag from inithdr for ABORT chunk
Guenter Roeck <linux(a)roeck-us.net>
nios2: Make NIOS2_DTB_SOURCE_BOOL depend on !COMPILE_TEST
Yang Yingliang <yangyingliang(a)huawei.com>
regmap: Fix possible double-free in regcache_rbtree_exit()
Johan Hovold <johan(a)kernel.org>
net: lan78xx: fix division by zero in send path
Shawn Guo <shawn.guo(a)linaro.org>
mmc: sdhci: Map more voltage level to SDHCI_POWER_330
Jaehoon Chung <jh80.chung(a)samsung.com>
mmc: dw_mmc: exynos: fix the finding clock sample value
Johan Hovold <johan(a)kernel.org>
mmc: vub300: fix control-message timeouts
Pavel Skripkin <paskripkin(a)gmail.com>
Revert "net: mdiobus: Fix memory leak in __mdiobus_register"
Krzysztof Kozlowski <krzysztof.kozlowski(a)canonical.com>
nfc: port100: fix using -ERRNO as command type mask
Zheyu Ma <zheyuma97(a)gmail.com>
ata: sata_mv: Fix the error handling of mv_chip_id()
Wang Hai <wanghai38(a)huawei.com>
usbnet: fix error return code in usbnet_probe()
Oliver Neukum <oneukum(a)suse.com>
usbnet: sanity check for maxpacket
Nathan Chancellor <natechancellor(a)gmail.com>
ARM: 8819/1: Remove '-p' from LDFLAGS
Arnd Bergmann <arnd(a)arndb.de>
ARM: 9139/1: kprobes: fix arch_init_kprobes() prototype
Arnd Bergmann <arnd(a)arndb.de>
ARM: 9134/1: remove duplicate memcpy() definition
Nick Desaulniers <ndesaulniers(a)google.com>
ARM: 9133/1: mm: proc-macros: ensure *_tlb_fns are 4B aligned
-------------
Diffstat:
Makefile | 4 ++--
arch/arm/Makefile | 2 +-
arch/arm/boot/bootp/Makefile | 2 +-
arch/arm/boot/compressed/Makefile | 2 --
arch/arm/boot/compressed/decompress.c | 3 +++
arch/arm/mm/proc-macros.S | 1 +
arch/arm/probes/kprobes/core.c | 2 +-
arch/nios2/platform/Kconfig.platform | 1 +
drivers/ata/sata_mv.c | 4 ++--
drivers/base/regmap/regcache-rbtree.c | 7 +++----
drivers/mmc/host/dw_mmc-exynos.c | 14 ++++++++++++++
drivers/mmc/host/sdhci.c | 6 ++++++
drivers/mmc/host/vub300.c | 18 +++++++++---------
drivers/net/phy/mdio_bus.c | 1 -
drivers/net/usb/lan78xx.c | 6 ++++++
drivers/net/usb/usbnet.c | 5 +++++
drivers/nfc/port100.c | 4 ++--
net/sctp/sm_statefuns.c | 4 ++++
18 files changed, 61 insertions(+), 25 deletions(-)
This is the start of the stable review cycle for the 5.4.157 release.
There are 51 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Wed, 03 Nov 2021 11:42:01 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.4.157-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.4.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.4.157-rc2
Song Liu <songliubraving(a)fb.com>
perf script: Check session->header.env.arch before using it
Halil Pasic <pasic(a)linux.ibm.com>
KVM: s390: preserve deliverable_mask in __airqs_kick_single_vcpu
Halil Pasic <pasic(a)linux.ibm.com>
KVM: s390: clear kicked_mask before sleeping again
Janusz Dziedzic <janusz.dziedzic(a)gmail.com>
cfg80211: correct bridge/4addr mode check
Julian Wiedmann <jwi(a)linux.ibm.com>
net: use netif_is_bridge_port() to check for IFF_BRIDGE_PORT
Xin Long <lucien.xin(a)gmail.com>
sctp: add vtag check in sctp_sf_ootb
Xin Long <lucien.xin(a)gmail.com>
sctp: add vtag check in sctp_sf_do_8_5_1_E_sa
Xin Long <lucien.xin(a)gmail.com>
sctp: add vtag check in sctp_sf_violation
Xin Long <lucien.xin(a)gmail.com>
sctp: fix the processing for COOKIE_ECHO chunk
Xin Long <lucien.xin(a)gmail.com>
sctp: fix the processing for INIT_ACK chunk
Xin Long <lucien.xin(a)gmail.com>
sctp: use init_tag from inithdr for ABORT chunk
Andrew Lunn <andrew(a)lunn.ch>
phy: phy_start_aneg: Add an unlocked version
Andrew Lunn <andrew(a)lunn.ch>
phy: phy_ethtool_ksettings_get: Lock the phy for consistency
Daniel Jordan <daniel.m.jordan(a)oracle.com>
net/tls: Fix flipped sign in async_wait.err assignment
Trevor Woerner <twoerner(a)gmail.com>
net: nxp: lpc_eth.c: avoid hang when bringing interface down
Yuiko Oshino <yuiko.oshino(a)microchip.com>
net: ethernet: microchip: lan743x: Fix dma allocation failure by using dma_set_mask_and_coherent
Yuiko Oshino <yuiko.oshino(a)microchip.com>
net: ethernet: microchip: lan743x: Fix driver crash when lan743x_pm_resume fails
Guenter Roeck <linux(a)roeck-us.net>
nios2: Make NIOS2_DTB_SOURCE_BOOL depend on !COMPILE_TEST
Mark Zhang <markzhang(a)nvidia.com>
RDMA/sa_query: Use strscpy_pad instead of memcpy to copy a string
Michael Chan <michael.chan(a)broadcom.com>
net: Prevent infinite while loop in skb_tx_hash()
Pavel Skripkin <paskripkin(a)gmail.com>
net: batman-adv: fix error handling
Yang Yingliang <yangyingliang(a)huawei.com>
regmap: Fix possible double-free in regcache_rbtree_exit()
Clément Bœsch <u(a)pkh.me>
arm64: dts: allwinner: h5: NanoPI Neo 2: Fix ethernet node
Patrisious Haddad <phaddad(a)nvidia.com>
RDMA/mlx5: Set user priority for DCT
Varun Prakash <varun(a)chelsio.com>
nvme-tcp: fix data digest pointer calculation
Varun Prakash <varun(a)chelsio.com>
nvmet-tcp: fix data digest pointer calculation
Mike Marciniszyn <mike.marciniszyn(a)cornelisnetworks.com>
IB/hfi1: Fix abba locking issue with sc_disable()
Mike Marciniszyn <mike.marciniszyn(a)cornelisnetworks.com>
IB/qib: Protect from buffer overflow in struct qib_user_sdma_pkt fields
Liu Jian <liujian56(a)huawei.com>
tcp_bpf: Fix one concurrency problem in the tcp_bpf_send_verdict function
Christian König <christian.koenig(a)amd.com>
drm/ttm: fix memleak in ttm_transfered_destroy
Johan Hovold <johan(a)kernel.org>
net: lan78xx: fix division by zero in send path
Johannes Berg <johannes.berg(a)intel.com>
cfg80211: scan: fix RCU in cfg80211_add_nontrans_list()
Haibo Chen <haibo.chen(a)nxp.com>
mmc: sdhci-esdhc-imx: clear the buffer_read_ready to reset standard tuning circuit
Shawn Guo <shawn.guo(a)linaro.org>
mmc: sdhci: Map more voltage level to SDHCI_POWER_330
Jaehoon Chung <jh80.chung(a)samsung.com>
mmc: dw_mmc: exynos: fix the finding clock sample value
Wenbin Mei <wenbin.mei(a)mediatek.com>
mmc: cqhci: clear HALT state after CQE enable
Johan Hovold <johan(a)kernel.org>
mmc: vub300: fix control-message timeouts
Daniel Jordan <daniel.m.jordan(a)oracle.com>
net/tls: Fix flipped sign in tls_err_abort() calls
Pavel Skripkin <paskripkin(a)gmail.com>
Revert "net: mdiobus: Fix memory leak in __mdiobus_register"
Krzysztof Kozlowski <krzysztof.kozlowski(a)canonical.com>
nfc: port100: fix using -ERRNO as command type mask
Zheyu Ma <zheyuma97(a)gmail.com>
ata: sata_mv: Fix the error handling of mv_chip_id()
Rafał Miłecki <rafal(a)milecki.pl>
Revert "pinctrl: bcm: ns: support updated DT binding as syscon subnode"
Wang Hai <wanghai38(a)huawei.com>
usbnet: fix error return code in usbnet_probe()
Oliver Neukum <oneukum(a)suse.com>
usbnet: sanity check for maxpacket
Eric Dumazet <edumazet(a)google.com>
ipv4: use siphash instead of Jenkins in fnhe_hashfun()
Eric Dumazet <edumazet(a)google.com>
ipv6: use siphash in rt6_exception_hash()
Naveen N. Rao <naveen.n.rao(a)linux.vnet.ibm.com>
powerpc/bpf: Fix BPF_MOD when imm == 1
Arnd Bergmann <arnd(a)arndb.de>
ARM: 9141/1: only warn about XIP address when not compile testing
Arnd Bergmann <arnd(a)arndb.de>
ARM: 9139/1: kprobes: fix arch_init_kprobes() prototype
Arnd Bergmann <arnd(a)arndb.de>
ARM: 9134/1: remove duplicate memcpy() definition
Nick Desaulniers <ndesaulniers(a)google.com>
ARM: 9133/1: mm: proc-macros: ensure *_tlb_fns are 4B aligned
-------------
Diffstat:
Makefile | 4 +-
arch/arm/boot/compressed/decompress.c | 3 +
arch/arm/kernel/vmlinux-xip.lds.S | 2 +-
arch/arm/mm/proc-macros.S | 1 +
arch/arm/probes/kprobes/core.c | 2 +-
.../boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts | 2 +-
arch/nios2/platform/Kconfig.platform | 1 +
arch/powerpc/net/bpf_jit_comp64.c | 10 +++-
arch/s390/kvm/interrupt.c | 5 +-
arch/s390/kvm/kvm-s390.c | 1 +
drivers/ata/sata_mv.c | 4 +-
drivers/base/regmap/regcache-rbtree.c | 7 +--
drivers/gpu/drm/ttm/ttm_bo_util.c | 1 +
drivers/infiniband/core/sa_query.c | 5 +-
drivers/infiniband/hw/hfi1/pio.c | 9 ++-
drivers/infiniband/hw/mlx5/qp.c | 2 +
drivers/infiniband/hw/qib/qib_user_sdma.c | 33 +++++++----
drivers/mmc/host/cqhci.c | 3 +
drivers/mmc/host/dw_mmc-exynos.c | 14 +++++
drivers/mmc/host/sdhci-esdhc-imx.c | 17 ++++++
drivers/mmc/host/sdhci.c | 6 ++
drivers/mmc/host/vub300.c | 18 +++---
drivers/net/bonding/bond_main.c | 2 +-
drivers/net/ethernet/micrel/ksz884x.c | 2 +-
drivers/net/ethernet/microchip/lan743x_main.c | 22 +++++++
drivers/net/ethernet/nxp/lpc_eth.c | 5 +-
drivers/net/phy/mdio_bus.c | 1 -
drivers/net/phy/phy.c | 32 +++++++++--
drivers/net/usb/lan78xx.c | 6 ++
drivers/net/usb/usbnet.c | 5 ++
drivers/nfc/port100.c | 4 +-
drivers/nvme/host/tcp.c | 2 +-
drivers/nvme/target/tcp.c | 2 +-
drivers/pinctrl/bcm/pinctrl-ns.c | 29 ++++------
include/net/tls.h | 9 +--
net/batman-adv/bridge_loop_avoidance.c | 8 ++-
net/batman-adv/main.c | 56 ++++++++++++------
net/batman-adv/network-coding.c | 4 +-
net/batman-adv/translation-table.c | 4 +-
net/core/dev.c | 6 ++
net/core/rtnetlink.c | 12 ++--
net/ipv4/route.c | 12 ++--
net/ipv4/tcp_bpf.c | 12 ++++
net/ipv6/route.c | 20 +++++--
net/sctp/sm_statefuns.c | 67 +++++++++++++---------
net/tls/tls_sw.c | 19 ++++--
net/wireless/nl80211.c | 2 +-
net/wireless/scan.c | 7 ++-
net/wireless/util.c | 14 ++---
tools/perf/builtin-script.c | 12 ++--
50 files changed, 360 insertions(+), 166 deletions(-)
A particular RX 5600 device requires a hack in the rebar logic, but the
current branch is too general and catches other devices too, breaking
them. This patch changes the branch to be more selective on the
particular revision.
This patch fixes intermittent freezes on other RX 5600 devices where the
hack is unnecessary. Credit to all contributors in the linked issue on
the AMD bug tracker.
See also: https://gitlab.freedesktop.org/drm/amd/-/issues/1707
Fixes: 907830b0fc9e ("PCI: Add a REBAR size quirk for Sapphire RX 5600 XT Pulse")
Cc: stable(a)vger.kernel.org # v5.12+
Signed-off-by: Robin McCorkell <robin(a)mccorkell.me.uk>
Reported-by: Simon May <@Socob on gitlab.freedesktop.com>
Tested-by: Kain Centeno <@kaincenteno on gitlab.freedesktop.com>
Tested-by: Tobias Jakobi <@tobiasjakobi on gitlab.freedesktop.com>
Suggested-by: lijo lazar <@lijo on gitlab.freedesktop.com>
---
drivers/pci/pci.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index ce2ab62b64cf..1fe75243019e 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -3647,7 +3647,7 @@ u32 pci_rebar_get_possible_sizes(struct pci_dev *pdev, int bar)
/* Sapphire RX 5600 XT Pulse has an invalid cap dword for BAR 0 */
if (pdev->vendor == PCI_VENDOR_ID_ATI && pdev->device == 0x731f &&
- bar == 0 && cap == 0x7000)
+ pdev->revision == 0xC1 && bar == 0 && cap == 0x7000)
cap = 0x3f000;
return cap >> 4;
--
2.31.1
When the reply for a non-blocking transmit arrives, the sequence
field for that reply was never filled in, so userspace would have no
way of associating the reply to the original transmit.
Copy the sequence field to ensure that this is now possible.
Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
Fixes: 0dbacebede1e ([media] cec: move the CEC framework out of staging and to media)
Cc: <stable(a)vger.kernel.org>
---
drivers/media/cec/core/cec-adap.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/media/cec/core/cec-adap.c b/drivers/media/cec/core/cec-adap.c
index 79fa36de8a04..cd9cb354dc2c 100644
--- a/drivers/media/cec/core/cec-adap.c
+++ b/drivers/media/cec/core/cec-adap.c
@@ -1199,6 +1199,7 @@ void cec_received_msg_ts(struct cec_adapter *adap,
if (abort)
dst->rx_status |= CEC_RX_STATUS_FEATURE_ABORT;
msg->flags = dst->flags;
+ msg->sequence = dst->sequence;
/* Remove it from the wait_queue */
list_del_init(&data->list);
--
2.33.0
From: Laurent Vivier <lvivier(a)redhat.com>
Commit 112665286d08 ("KVM: PPC: Book3S HV: Context tracking exit guest
context before enabling irqs") moved guest_exit() into the interrupt
protected area to avoid wrong context warning (or worse). The problem is
that tick-based time accounting has not yet been updated at this point
(because it depends on the timer interrupt firing), so the guest time
gets incorrectly accounted to system time.
To fix the problem, follow the x86 fix in commit 160457140187 ("Defer
vtime accounting 'til after IRQ handling"), and allow host IRQs to run
before accounting the guest exit time.
In the case vtime accounting is enabled, this is not required because TB
is used directly for accounting.
Before this patch, with CONFIG_TICK_CPU_ACCOUNTING=y in the host and a
guest running a kernel compile, the 'guest' fields of /proc/stat are
stuck at zero. With the patch they can be observed increasing roughly as
expected.
Fixes: e233d54d4d97 ("KVM: booke: use __kvm_guest_exit")
Fixes: 112665286d08 ("KVM: PPC: Book3S HV: Context tracking exit guest context before enabling irqs")
Cc: <stable(a)vger.kernel.org> # 5.12
Signed-off-by: Laurent Vivier <lvivier(a)redhat.com>
[np: only required for tick accounting, add Book3E fix, tweak changelog]
Signed-off-by: Nicholas Piggin <npiggin(a)gmail.com>
---
Since v2:
- I took over the patch with Laurent's blessing.
- Changed to avoid processing IRQs if we do have vtime accounting
enabled.
- Changed so in either case the accounting is called with irqs disabled.
- Added similar Book3E fix.
- Rebased on upstream, tested, observed bug and confirmed fix.
arch/powerpc/kvm/book3s_hv.c | 30 ++++++++++++++++++++++++++++--
arch/powerpc/kvm/booke.c | 16 +++++++++++++++-
2 files changed, 43 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 2acb1c96cfaf..7b74fc0a986b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3726,7 +3726,20 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
kvmppc_set_host_core(pcpu);
- guest_exit_irqoff();
+ context_tracking_guest_exit();
+ if (!vtime_accounting_enabled_this_cpu()) {
+ local_irq_enable();
+ /*
+ * Service IRQs here before vtime_account_guest_exit() so any
+ * ticks that occurred while running the guest are accounted to
+ * the guest. If vtime accounting is enabled, accounting uses
+ * TB rather than ticks, so it can be done without enabling
+ * interrupts here, which has the problem that it accounts
+ * interrupt processing overhead to the host.
+ */
+ local_irq_disable();
+ }
+ vtime_account_guest_exit();
local_irq_enable();
@@ -4510,7 +4523,20 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
kvmppc_set_host_core(pcpu);
- guest_exit_irqoff();
+ context_tracking_guest_exit();
+ if (!vtime_accounting_enabled_this_cpu()) {
+ local_irq_enable();
+ /*
+ * Service IRQs here before vtime_account_guest_exit() so any
+ * ticks that occurred while running the guest are accounted to
+ * the guest. If vtime accounting is enabled, accounting uses
+ * TB rather than ticks, so it can be done without enabling
+ * interrupts here, which has the problem that it accounts
+ * interrupt processing overhead to the host.
+ */
+ local_irq_disable();
+ }
+ vtime_account_guest_exit();
local_irq_enable();
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 977801c83aff..8c15c90dd3a9 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1042,7 +1042,21 @@ int kvmppc_handle_exit(struct kvm_vcpu *vcpu, unsigned int exit_nr)
}
trace_kvm_exit(exit_nr, vcpu);
- guest_exit_irqoff();
+
+ context_tracking_guest_exit();
+ if (!vtime_accounting_enabled_this_cpu()) {
+ local_irq_enable();
+ /*
+ * Service IRQs here before vtime_account_guest_exit() so any
+ * ticks that occurred while running the guest are accounted to
+ * the guest. If vtime accounting is enabled, accounting uses
+ * TB rather than ticks, so it can be done without enabling
+ * interrupts here, which has the problem that it accounts
+ * interrupt processing overhead to the host.
+ */
+ local_irq_disable();
+ }
+ vtime_account_guest_exit();
local_irq_enable();
--
2.23.0