SMCCCv1.3 added a hint bit which callers can set in an SMCCC function ID
(AKA "FID") to indicate that it is acceptable for the SMCCC
implementation to discard SVE and/or SME state over a specific SMCCC
call. The kernel support for using this hint is broken and SMCCC calls
may clobber the SVE and/or SME state of arbitrary tasks, though FPSIMD
state is unaffected.
The kernel support is intended to use the hint when there is no SVE or
SME state to save, and to do this it checks whether TIF_FOREIGN_FPSTATE
is set or TIF_SVE is clear in assembly code:
| ldr <flags>, [<current_task>, #TSK_TI_FLAGS]
| tbnz <flags>, #TIF_FOREIGN_FPSTATE, 1f // Any live FP state?
| tbnz <flags>, #TIF_SVE, 2f // Does that state include SVE?
|
| 1: orr <fid>, <fid>, ARM_SMCCC_1_3_SVE_HINT
| 2:
| << SMCCC call using FID >>
This is not safe as-is:
(1) SMCCC calls can be made in a preemptible context and preemption can
result in TIF_FOREIGN_FPSTATE being set or cleared at arbitrary
points in time. Thus checking for TIF_FOREIGN_FPSTATE provides no
guarantee.
(2) TIF_FOREIGN_FPSTATE only indicates that the live FP/SVE/SME state in
the CPU does not belong to the current task, and does not indicate
that clobbering this state is acceptable.
When the live CPU state is clobbered it is necessary to update
fpsimd_last_state.st to ensure that a subsequent context switch will
reload FP/SVE/SME state from memory rather than consuming the
clobbered state. This and the SMCCC call itself must happen in a
critical section with preemption disabled to avoid races.
(3) Live SVE/SME state can exist with TIF_SVE clear (e.g. with only
TIF_SME set), and checking TIF_SVE alone is insufficient.
Remove the broken support for the SMCCCv1.3 SVE saving hint. This is
effectively a revert of commits:
* cfa7ff959a789a95 ("arm64: smccc: Support SMCCC v1.3 SVE register saving hint")
* a7c3acca53801e10 ("arm64: smccc: Save lr before calling __arm_smccc_sve_check()")
... leaving behind the ARM_SMCCC_VERSION_1_3 and ARM_SMCCC_1_3_SVE_HINT
definitions, since these are simply definitions from the SMCCC
specification, and the latter is used in KVM via ARM_SMCCC_CALL_HINTS.
If we want to bring this back in future, we'll probably want to handle
this logic in C where we can use all the usual FPSIMD/SVE/SME helper
functions, and that'll likely require some rework of the SMCCC code
and/or its callers.
Fixes: cfa7ff959a789a95 ("arm64: smccc: Support SMCCC v1.3 SVE register saving hint")
Signed-off-by: Mark Rutland <mark.rutland(a)arm.com>
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Marc Zyngier <maz(a)kernel.org>
Cc: Mark Brown <broonie(a)kernel.org>
Cc: Will Deacon <will(a)kernel.org>
Cc: stable(a)vger.kernel.org
---
arch/arm64/kernel/smccc-call.S | 35 +++-------------------------------
drivers/firmware/smccc/smccc.c | 4 ----
include/linux/arm-smccc.h | 32 +++----------------------------
3 files changed, 6 insertions(+), 65 deletions(-)
diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S
index 487381164ff6b..2def9d0dd3ddb 100644
--- a/arch/arm64/kernel/smccc-call.S
+++ b/arch/arm64/kernel/smccc-call.S
@@ -7,48 +7,19 @@
#include <asm/asm-offsets.h>
#include <asm/assembler.h>
-#include <asm/thread_info.h>
-
-/*
- * If we have SMCCC v1.3 and (as is likely) no SVE state in
- * the registers then set the SMCCC hint bit to say there's no
- * need to preserve it. Do this by directly adjusting the SMCCC
- * function value which is already stored in x0 ready to be called.
- */
-SYM_FUNC_START(__arm_smccc_sve_check)
-
- ldr_l x16, smccc_has_sve_hint
- cbz x16, 2f
-
- get_current_task x16
- ldr x16, [x16, #TSK_TI_FLAGS]
- tbnz x16, #TIF_FOREIGN_FPSTATE, 1f // Any live FP state?
- tbnz x16, #TIF_SVE, 2f // Does that state include SVE?
-
-1: orr x0, x0, ARM_SMCCC_1_3_SVE_HINT
-
-2: ret
-SYM_FUNC_END(__arm_smccc_sve_check)
-EXPORT_SYMBOL(__arm_smccc_sve_check)
.macro SMCCC instr
- stp x29, x30, [sp, #-16]!
- mov x29, sp
-alternative_if ARM64_SVE
- bl __arm_smccc_sve_check
-alternative_else_nop_endif
\instr #0
- ldr x4, [sp, #16]
+ ldr x4, [sp]
stp x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS]
stp x2, x3, [x4, #ARM_SMCCC_RES_X2_OFFS]
- ldr x4, [sp, #24]
+ ldr x4, [sp, #8]
cbz x4, 1f /* no quirk structure */
ldr x9, [x4, #ARM_SMCCC_QUIRK_ID_OFFS]
cmp x9, #ARM_SMCCC_QUIRK_QCOM_A6
b.ne 1f
str x6, [x4, ARM_SMCCC_QUIRK_STATE_OFFS]
-1: ldp x29, x30, [sp], #16
- ret
+1: ret
.endm
/*
diff --git a/drivers/firmware/smccc/smccc.c b/drivers/firmware/smccc/smccc.c
index d670635914ecb..a74600d9f2d72 100644
--- a/drivers/firmware/smccc/smccc.c
+++ b/drivers/firmware/smccc/smccc.c
@@ -16,7 +16,6 @@ static u32 smccc_version = ARM_SMCCC_VERSION_1_0;
static enum arm_smccc_conduit smccc_conduit = SMCCC_CONDUIT_NONE;
bool __ro_after_init smccc_trng_available = false;
-u64 __ro_after_init smccc_has_sve_hint = false;
s32 __ro_after_init smccc_soc_id_version = SMCCC_RET_NOT_SUPPORTED;
s32 __ro_after_init smccc_soc_id_revision = SMCCC_RET_NOT_SUPPORTED;
@@ -28,9 +27,6 @@ void __init arm_smccc_version_init(u32 version, enum arm_smccc_conduit conduit)
smccc_conduit = conduit;
smccc_trng_available = smccc_probe_trng();
- if (IS_ENABLED(CONFIG_ARM64_SVE) &&
- smccc_version >= ARM_SMCCC_VERSION_1_3)
- smccc_has_sve_hint = true;
if ((smccc_version >= ARM_SMCCC_VERSION_1_2) &&
(smccc_conduit != SMCCC_CONDUIT_NONE)) {
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index f59099a213d0d..67f6fdf2e7cd8 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -315,8 +315,6 @@ u32 arm_smccc_get_version(void);
void __init arm_smccc_version_init(u32 version, enum arm_smccc_conduit conduit);
-extern u64 smccc_has_sve_hint;
-
/**
* arm_smccc_get_soc_id_version()
*
@@ -414,15 +412,6 @@ struct arm_smccc_quirk {
} state;
};
-/**
- * __arm_smccc_sve_check() - Set the SVE hint bit when doing SMC calls
- *
- * Sets the SMCCC hint bit to indicate if there is live state in the SVE
- * registers, this modifies x0 in place and should never be called from C
- * code.
- */
-asmlinkage unsigned long __arm_smccc_sve_check(unsigned long x0);
-
/**
* __arm_smccc_smc() - make SMC calls
* @a0-a7: arguments passed in registers 0 to 7
@@ -490,20 +479,6 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
#endif
-/* nVHE hypervisor doesn't have a current thread so needs separate checks */
-#if defined(CONFIG_ARM64_SVE) && !defined(__KVM_NVHE_HYPERVISOR__)
-
-#define SMCCC_SVE_CHECK ALTERNATIVE("nop \n", "bl __arm_smccc_sve_check \n", \
- ARM64_SVE)
-#define smccc_sve_clobbers "x16", "x30", "cc",
-
-#else
-
-#define SMCCC_SVE_CHECK
-#define smccc_sve_clobbers
-
-#endif
-
#define __constraint_read_2 "r" (arg0)
#define __constraint_read_3 __constraint_read_2, "r" (arg1)
#define __constraint_read_4 __constraint_read_3, "r" (arg2)
@@ -574,12 +549,11 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
register unsigned long r3 asm("r3"); \
CONCATENATE(__declare_arg_, \
COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__); \
- asm volatile(SMCCC_SVE_CHECK \
- inst "\n" : \
+ asm volatile(inst "\n" : \
"=r" (r0), "=r" (r1), "=r" (r2), "=r" (r3) \
: CONCATENATE(__constraint_read_, \
COUNT_ARGS(__VA_ARGS__)) \
- : smccc_sve_clobbers "memory"); \
+ : "memory"); \
if (___res) \
*___res = (typeof(*___res)){r0, r1, r2, r3}; \
} while (0)
@@ -628,7 +602,7 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
asm ("" : \
: CONCATENATE(__constraint_read_, \
COUNT_ARGS(__VA_ARGS__)) \
- : smccc_sve_clobbers "memory"); \
+ : "memory"); \
if (___res) \
___res->a0 = SMCCC_RET_NOT_SUPPORTED; \
} while (0)
--
2.30.2
Although support for SME was merged in v5.19, we've since uncovered a
number of issues with the implementation, including issues which might
corrupt the FPSIMD/SVE/SME state of arbitrary tasks. While there are
patches to address some of these issues, ongoing review has highlighted
additional functional problems, and more time is necessary to analyse
and fix these.
For now, mark SME as BROKEN in the hope that we can fix things properly
in the near future. As SME is an OPTIONAL part of ARMv9.2+, and there is
very little extant hardware, this should not adversely affect the vast
majority of users.
Signed-off-by: Mark Rutland <mark.rutland(a)arm.com>
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Marc Zyngier <maz(a)kernel.org>
Cc: Mark Brown <broonie(a)kernel.org>
Cc: Will Deacon <will(a)kernel.org>
Cc: <stable(a)vger.kernel.org> # 5.19
---
arch/arm64/Kconfig | 1 +
1 file changed, 1 insertion(+)
Catalin, Will, if we take this, the minimal set of other fixes necessary
for now is:
* "arm64/sve: Discard stale CPU state when handling SVE traps"
https://lore.kernel.org/linux-arm-kernel/20241030-arm64-fpsimd-foreign-flus…https://lore.kernel.org/linux-arm-kernel/ZypuQNhWHKut8mLl@J2N7QTR9R3.cambri…
(already queued by Will in for-next/fixes)
* "arm64: smccc: Remove broken support for SMCCCv1.3 SVE discard hint"
https://lore.kernel.org/linux-arm-kernel/20241106160448.2712997-1-mark.rutl…
Mark.
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 3e29b44d2d7bd..14cc81e154ee2 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -2213,6 +2213,7 @@ config ARM64_SME
bool "ARM Scalable Matrix Extension support"
default y
depends on ARM64_SVE
+ depends on BROKEN
help
The Scalable Matrix Extension (SME) is an extension to the AArch64
execution state which utilises a substantial subset of the SVE
--
2.30.2
The function call alloc_percpu() returns a pointer to the memory address,
but it hasn't been checked. Our static analysis tool indicates that null
pointer dereference may exist in pointer zone->per_cpu_pageset. It is
always safe to judge the null pointer before use.
Signed-off-by: Qiu-ji Chen <chenqiuji666(a)gmail.com>
Cc: stable(a)vger.kernel.org
Fixes: 9420f89db2dd ("mm: move most of core MM initialization to mm/mm_init.c")
---
mm/page_alloc.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8afab64814dc..5deae1193dc3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5703,8 +5703,14 @@ void __meminit setup_zone_pageset(struct zone *zone)
/* Size may be 0 on !SMP && !NUMA */
if (sizeof(struct per_cpu_zonestat) > 0)
zone->per_cpu_zonestats = alloc_percpu(struct per_cpu_zonestat);
+ if (!zone->per_cpu_pageset)
+ return;
zone->per_cpu_pageset = alloc_percpu(struct per_cpu_pages);
+ if (!zone->per_cpu_pageset) {
+ free_percpu(zone->per_cpu_pageset);
+ return;
+ }
for_each_possible_cpu(cpu) {
struct per_cpu_pages *pcp;
struct per_cpu_zonestat *pzstats;
--
2.34.1
During the High-Speed Isochronous Audio transfers, xHCI
controller on certain AMD platforms experiences momentary data
loss. This results in Missed Service Errors (MSE) being
generated by the xHCI.
The root cause of the MSE is attributed to the ISOC OUT endpoint
being omitted from scheduling. This can happen either when an IN
endpoint with a 64ms service interval is pre-scheduled prior to
the ISOC OUT endpoint or when the interval of the ISOC OUT
endpoint is shorter than that of the IN endpoint. Consequently,
the OUT service is neglected when an IN endpoint with a service
interval exceeding 32ms is scheduled concurrently (every 64ms in
this scenario).
This issue is particularly seen on certain older AMD platforms.
To mitigate this problem, it is recommended to adjust the service
interval of the IN endpoint to exceed 32ms (interval 8). This
adjustment ensures that the OUT endpoint will not be bypassed,
even if a smaller interval value is utilized.
Cc: stable(a)vger.kernel.org
Signed-off-by: Raju Rangoju <Raju.Rangoju(a)amd.com>
---
drivers/usb/host/xhci-mem.c | 5 +++++
drivers/usb/host/xhci-pci.c | 14 ++++++++++++++
drivers/usb/host/xhci.h | 1 +
3 files changed, 20 insertions(+)
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index d2900197a49e..4892bb9afa6e 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -1426,6 +1426,11 @@ int xhci_endpoint_init(struct xhci_hcd *xhci,
/* Periodic endpoint bInterval limit quirk */
if (usb_endpoint_xfer_int(&ep->desc) ||
usb_endpoint_xfer_isoc(&ep->desc)) {
+ if ((xhci->quirks & XHCI_LIMIT_ENDPOINT_INTERVAL_9) &&
+ usb_endpoint_xfer_int(&ep->desc) &&
+ interval >= 9) {
+ interval = 8;
+ }
if ((xhci->quirks & XHCI_LIMIT_ENDPOINT_INTERVAL_7) &&
udev->speed >= USB_SPEED_HIGH &&
interval >= 7) {
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index cb07cee9ed0c..a078e2e5517d 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -284,6 +284,20 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
if (pdev->vendor == PCI_VENDOR_ID_NEC)
xhci->quirks |= XHCI_NEC_HOST;
+ if (pdev->vendor == PCI_VENDOR_ID_AMD &&
+ (pdev->device == 0x13ed ||
+ pdev->device == 0x13ee ||
+ pdev->device == 0x148c ||
+ pdev->device == 0x15d4 ||
+ pdev->device == 0x15d5 ||
+ pdev->device == 0x15e0 ||
+ pdev->device == 0x15e1 ||
+ pdev->device == 0x15e5))
+ xhci->quirks |= XHCI_LIMIT_ENDPOINT_INTERVAL_9;
+
+ if (pdev->vendor == PCI_VENDOR_ID_ATI && pdev->device == 0x7316)
+ xhci->quirks |= XHCI_LIMIT_ENDPOINT_INTERVAL_9;
+
if (pdev->vendor == PCI_VENDOR_ID_AMD && xhci->hci_version == 0x96)
xhci->quirks |= XHCI_AMD_0x96_HOST;
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index f0fb696d5619..fa69f7ac09b5 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1624,6 +1624,7 @@ struct xhci_hcd {
#define XHCI_ZHAOXIN_HOST BIT_ULL(46)
#define XHCI_WRITE_64_HI_LO BIT_ULL(47)
#define XHCI_CDNS_SCTX_QUIRK BIT_ULL(48)
+#define XHCI_LIMIT_ENDPOINT_INTERVAL_9 BIT_ULL(49)
unsigned int num_active_eps;
unsigned int limit_active_eps;
--
2.34.1
The patch below does not apply to the v6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From b5413156bad91dc2995a5c4eab1b05e56914638a Mon Sep 17 00:00:00 2001
From: Benjamin Segall <bsegall(a)google.com>
Date: Fri, 25 Oct 2024 18:35:35 -0700
Subject: [PATCH] posix-cpu-timers: Clear TICK_DEP_BIT_POSIX_TIMER on clone
When cloning a new thread, its posix_cputimers are not inherited, and
are cleared by posix_cputimers_init(). However, this does not clear the
tick dependency it creates in tsk->tick_dep_mask, and the handler does
not reach the code to clear the dependency if there were no timers to
begin with.
Thus if a thread has a cputimer running before clone/fork, all
descendants will prevent nohz_full unless they create a cputimer of
their own.
Fix this by entirely clearing the tick_dep_mask in copy_process().
(There is currently no inherited state that needs a tick dependency)
Process-wide timers do not have this problem because fork does not copy
signal_struct as a baseline, it creates one from scratch.
Fixes: b78783000d5c ("posix-cpu-timers: Migrate to use new tick dependency mask model")
Signed-off-by: Ben Segall <bsegall(a)google.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic(a)kernel.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/xm26o737bq8o.fsf@google.com
---
include/linux/tick.h | 8 ++++++++
kernel/fork.c | 2 ++
2 files changed, 10 insertions(+)
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 72744638c5b0f..99c9c5a7252aa 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -251,12 +251,19 @@ static inline void tick_dep_set_task(struct task_struct *tsk,
if (tick_nohz_full_enabled())
tick_nohz_dep_set_task(tsk, bit);
}
+
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_clear_task(tsk, bit);
}
+
+static inline void tick_dep_init_task(struct task_struct *tsk)
+{
+ atomic_set(&tsk->tick_dep_mask, 0);
+}
+
static inline void tick_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit)
{
@@ -290,6 +297,7 @@ static inline void tick_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
+static inline void tick_dep_init_task(struct task_struct *tsk) { }
static inline void tick_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_signal(struct signal_struct *signal,
diff --git a/kernel/fork.c b/kernel/fork.c
index 89ceb4a68af25..6fa9fe62e01e3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -105,6 +105,7 @@
#include <linux/rseq.h>
#include <uapi/linux/pidfd.h>
#include <linux/pidfs.h>
+#include <linux/tick.h>
#include <asm/pgalloc.h>
#include <linux/uaccess.h>
@@ -2292,6 +2293,7 @@ __latent_entropy struct task_struct *copy_process(
acct_clear_integrals(p);
posix_cputimers_init(&p->posix_cputimers);
+ tick_dep_init_task(p);
p->io_context = NULL;
audit_set_context(p, NULL);
--
2.43.0
The patch below does not apply to the v5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From b5413156bad91dc2995a5c4eab1b05e56914638a Mon Sep 17 00:00:00 2001
From: Benjamin Segall <bsegall(a)google.com>
Date: Fri, 25 Oct 2024 18:35:35 -0700
Subject: [PATCH] posix-cpu-timers: Clear TICK_DEP_BIT_POSIX_TIMER on clone
When cloning a new thread, its posix_cputimers are not inherited, and
are cleared by posix_cputimers_init(). However, this does not clear the
tick dependency it creates in tsk->tick_dep_mask, and the handler does
not reach the code to clear the dependency if there were no timers to
begin with.
Thus if a thread has a cputimer running before clone/fork, all
descendants will prevent nohz_full unless they create a cputimer of
their own.
Fix this by entirely clearing the tick_dep_mask in copy_process().
(There is currently no inherited state that needs a tick dependency)
Process-wide timers do not have this problem because fork does not copy
signal_struct as a baseline, it creates one from scratch.
Fixes: b78783000d5c ("posix-cpu-timers: Migrate to use new tick dependency mask model")
Signed-off-by: Ben Segall <bsegall(a)google.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic(a)kernel.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/xm26o737bq8o.fsf@google.com
---
include/linux/tick.h | 8 ++++++++
kernel/fork.c | 2 ++
2 files changed, 10 insertions(+)
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 72744638c5b0f..99c9c5a7252aa 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -251,12 +251,19 @@ static inline void tick_dep_set_task(struct task_struct *tsk,
if (tick_nohz_full_enabled())
tick_nohz_dep_set_task(tsk, bit);
}
+
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_clear_task(tsk, bit);
}
+
+static inline void tick_dep_init_task(struct task_struct *tsk)
+{
+ atomic_set(&tsk->tick_dep_mask, 0);
+}
+
static inline void tick_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit)
{
@@ -290,6 +297,7 @@ static inline void tick_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
+static inline void tick_dep_init_task(struct task_struct *tsk) { }
static inline void tick_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_signal(struct signal_struct *signal,
diff --git a/kernel/fork.c b/kernel/fork.c
index 89ceb4a68af25..6fa9fe62e01e3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -105,6 +105,7 @@
#include <linux/rseq.h>
#include <uapi/linux/pidfd.h>
#include <linux/pidfs.h>
+#include <linux/tick.h>
#include <asm/pgalloc.h>
#include <linux/uaccess.h>
@@ -2292,6 +2293,7 @@ __latent_entropy struct task_struct *copy_process(
acct_clear_integrals(p);
posix_cputimers_init(&p->posix_cputimers);
+ tick_dep_init_task(p);
p->io_context = NULL;
audit_set_context(p, NULL);
--
2.43.0
The patch below does not apply to the v5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From b5413156bad91dc2995a5c4eab1b05e56914638a Mon Sep 17 00:00:00 2001
From: Benjamin Segall <bsegall(a)google.com>
Date: Fri, 25 Oct 2024 18:35:35 -0700
Subject: [PATCH] posix-cpu-timers: Clear TICK_DEP_BIT_POSIX_TIMER on clone
When cloning a new thread, its posix_cputimers are not inherited, and
are cleared by posix_cputimers_init(). However, this does not clear the
tick dependency it creates in tsk->tick_dep_mask, and the handler does
not reach the code to clear the dependency if there were no timers to
begin with.
Thus if a thread has a cputimer running before clone/fork, all
descendants will prevent nohz_full unless they create a cputimer of
their own.
Fix this by entirely clearing the tick_dep_mask in copy_process().
(There is currently no inherited state that needs a tick dependency)
Process-wide timers do not have this problem because fork does not copy
signal_struct as a baseline, it creates one from scratch.
Fixes: b78783000d5c ("posix-cpu-timers: Migrate to use new tick dependency mask model")
Signed-off-by: Ben Segall <bsegall(a)google.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic(a)kernel.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/xm26o737bq8o.fsf@google.com
---
include/linux/tick.h | 8 ++++++++
kernel/fork.c | 2 ++
2 files changed, 10 insertions(+)
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 72744638c5b0f..99c9c5a7252aa 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -251,12 +251,19 @@ static inline void tick_dep_set_task(struct task_struct *tsk,
if (tick_nohz_full_enabled())
tick_nohz_dep_set_task(tsk, bit);
}
+
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_clear_task(tsk, bit);
}
+
+static inline void tick_dep_init_task(struct task_struct *tsk)
+{
+ atomic_set(&tsk->tick_dep_mask, 0);
+}
+
static inline void tick_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit)
{
@@ -290,6 +297,7 @@ static inline void tick_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
+static inline void tick_dep_init_task(struct task_struct *tsk) { }
static inline void tick_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_signal(struct signal_struct *signal,
diff --git a/kernel/fork.c b/kernel/fork.c
index 89ceb4a68af25..6fa9fe62e01e3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -105,6 +105,7 @@
#include <linux/rseq.h>
#include <uapi/linux/pidfd.h>
#include <linux/pidfs.h>
+#include <linux/tick.h>
#include <asm/pgalloc.h>
#include <linux/uaccess.h>
@@ -2292,6 +2293,7 @@ __latent_entropy struct task_struct *copy_process(
acct_clear_integrals(p);
posix_cputimers_init(&p->posix_cputimers);
+ tick_dep_init_task(p);
p->io_context = NULL;
audit_set_context(p, NULL);
--
2.43.0