The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 8c8ecc98f5c65947b0070a24bac11e12e47cc65d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021841-atrium-settle-8b58@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8c8ecc98f5c65947b0070a24bac11e12e47cc65d Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven(a)narfation.org>
Date: Mon, 20 Jan 2025 00:06:11 +0100
Subject: [PATCH] batman-adv: Drop unmanaged ELP metric worker
The ELP worker needs to calculate new metric values for all neighbors
"reachable" over an interface. Some of the used metric sources require
locks which might need to sleep. This sleep is incompatible with the RCU
list iterator used for the recorded neighbors. The initial approach to work
around of this problem was to queue another work item per neighbor and then
run this in a new context.
Even when this solved the RCU vs might_sleep() conflict, it has a major
problems: Nothing was stopping the work item in case it is not needed
anymore - for example because one of the related interfaces was removed or
the batman-adv module was unloaded - resulting in potential invalid memory
accesses.
Directly canceling the metric worker also has various problems:
* cancel_work_sync for a to-be-deactivated interface is called with
rtnl_lock held. But the code in the ELP metric worker also tries to use
rtnl_lock() - which will never return in this case. This also means that
cancel_work_sync would never return because it is waiting for the worker
to finish.
* iterating over the neighbor list for the to-be-deactivated interface is
currently done using the RCU specific methods. Which means that it is
possible to miss items when iterating over it without the associated
spinlock - a behaviour which is acceptable for a periodic metric check
but not for a cleanup routine (which must "stop" all still running
workers)
The better approch is to get rid of the per interface neighbor metric
worker and handle everything in the interface worker. The original problems
are solved by:
* creating a list of neighbors which require new metric information inside
the RCU protected context, gathering the metric according to the new list
outside the RCU protected context
* only use rcu_trylock inside metric gathering code to avoid a deadlock
when the cancel_delayed_work_sync is called in the interface removal code
(which is called with the rtnl_lock held)
Cc: stable(a)vger.kernel.org
Fixes: c833484e5f38 ("batman-adv: ELP - compute the metric based on the estimated throughput")
Signed-off-by: Sven Eckelmann <sven(a)narfation.org>
Signed-off-by: Simon Wunderlich <sw(a)simonwunderlich.de>
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index ac11f1f08db0..d35479c465e2 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -113,8 +113,6 @@ static void
batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh)
{
ewma_throughput_init(&hardif_neigh->bat_v.throughput);
- INIT_WORK(&hardif_neigh->bat_v.metric_work,
- batadv_v_elp_throughput_metric_update);
}
/**
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 65e52de52bcd..b065578b4436 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -18,6 +18,7 @@
#include <linux/if_ether.h>
#include <linux/jiffies.h>
#include <linux/kref.h>
+#include <linux/list.h>
#include <linux/minmax.h>
#include <linux/netdevice.h>
#include <linux/nl80211.h>
@@ -26,6 +27,7 @@
#include <linux/rcupdate.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
+#include <linux/slab.h>
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/types.h>
@@ -41,6 +43,18 @@
#include "routing.h"
#include "send.h"
+/**
+ * struct batadv_v_metric_queue_entry - list of hardif neighbors which require
+ * and metric update
+ */
+struct batadv_v_metric_queue_entry {
+ /** @hardif_neigh: hardif neighbor scheduled for metric update */
+ struct batadv_hardif_neigh_node *hardif_neigh;
+
+ /** @list: list node for metric_queue */
+ struct list_head list;
+};
+
/**
* batadv_v_elp_start_timer() - restart timer for ELP periodic work
* @hard_iface: the interface for which the timer has to be reset
@@ -137,10 +151,17 @@ static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh,
goto default_throughput;
}
+ /* only use rtnl_trylock because the elp worker will be cancelled while
+ * the rntl_lock is held. the cancel_delayed_work_sync() would otherwise
+ * wait forever when the elp work_item was started and it is then also
+ * trying to rtnl_lock
+ */
+ if (!rtnl_trylock())
+ return false;
+
/* if not a wifi interface, check if this device provides data via
* ethtool (e.g. an Ethernet adapter)
*/
- rtnl_lock();
ret = __ethtool_get_link_ksettings(hard_iface->net_dev, &link_settings);
rtnl_unlock();
if (ret == 0) {
@@ -175,31 +196,19 @@ static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh,
/**
* batadv_v_elp_throughput_metric_update() - worker updating the throughput
* metric of a single hop neighbour
- * @work: the work queue item
+ * @neigh: the neighbour to probe
*/
-void batadv_v_elp_throughput_metric_update(struct work_struct *work)
+static void
+batadv_v_elp_throughput_metric_update(struct batadv_hardif_neigh_node *neigh)
{
- struct batadv_hardif_neigh_node_bat_v *neigh_bat_v;
- struct batadv_hardif_neigh_node *neigh;
u32 throughput;
bool valid;
- neigh_bat_v = container_of(work, struct batadv_hardif_neigh_node_bat_v,
- metric_work);
- neigh = container_of(neigh_bat_v, struct batadv_hardif_neigh_node,
- bat_v);
-
valid = batadv_v_elp_get_throughput(neigh, &throughput);
if (!valid)
- goto put_neigh;
+ return;
ewma_throughput_add(&neigh->bat_v.throughput, throughput);
-
-put_neigh:
- /* decrement refcounter to balance increment performed before scheduling
- * this task
- */
- batadv_hardif_neigh_put(neigh);
}
/**
@@ -273,14 +282,16 @@ batadv_v_elp_wifi_neigh_probe(struct batadv_hardif_neigh_node *neigh)
*/
static void batadv_v_elp_periodic_work(struct work_struct *work)
{
+ struct batadv_v_metric_queue_entry *metric_entry;
+ struct batadv_v_metric_queue_entry *metric_safe;
struct batadv_hardif_neigh_node *hardif_neigh;
struct batadv_hard_iface *hard_iface;
struct batadv_hard_iface_bat_v *bat_v;
struct batadv_elp_packet *elp_packet;
+ struct list_head metric_queue;
struct batadv_priv *bat_priv;
struct sk_buff *skb;
u32 elp_interval;
- bool ret;
bat_v = container_of(work, struct batadv_hard_iface_bat_v, elp_wq.work);
hard_iface = container_of(bat_v, struct batadv_hard_iface, bat_v);
@@ -316,6 +327,8 @@ static void batadv_v_elp_periodic_work(struct work_struct *work)
atomic_inc(&hard_iface->bat_v.elp_seqno);
+ INIT_LIST_HEAD(&metric_queue);
+
/* The throughput metric is updated on each sent packet. This way, if a
* node is dead and no longer sends packets, batman-adv is still able to
* react timely to its death.
@@ -340,16 +353,28 @@ static void batadv_v_elp_periodic_work(struct work_struct *work)
/* Reading the estimated throughput from cfg80211 is a task that
* may sleep and that is not allowed in an rcu protected
- * context. Therefore schedule a task for that.
+ * context. Therefore add it to metric_queue and process it
+ * outside rcu protected context.
*/
- ret = queue_work(batadv_event_workqueue,
- &hardif_neigh->bat_v.metric_work);
-
- if (!ret)
+ metric_entry = kzalloc(sizeof(*metric_entry), GFP_ATOMIC);
+ if (!metric_entry) {
batadv_hardif_neigh_put(hardif_neigh);
+ continue;
+ }
+
+ metric_entry->hardif_neigh = hardif_neigh;
+ list_add(&metric_entry->list, &metric_queue);
}
rcu_read_unlock();
+ list_for_each_entry_safe(metric_entry, metric_safe, &metric_queue, list) {
+ batadv_v_elp_throughput_metric_update(metric_entry->hardif_neigh);
+
+ batadv_hardif_neigh_put(metric_entry->hardif_neigh);
+ list_del(&metric_entry->list);
+ kfree(metric_entry);
+ }
+
restart_timer:
batadv_v_elp_start_timer(hard_iface);
out:
diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h
index 9e2740195fa2..c9cb0a307100 100644
--- a/net/batman-adv/bat_v_elp.h
+++ b/net/batman-adv/bat_v_elp.h
@@ -10,7 +10,6 @@
#include "main.h"
#include <linux/skbuff.h>
-#include <linux/workqueue.h>
int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface);
void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface);
@@ -19,6 +18,5 @@ void batadv_v_elp_iface_activate(struct batadv_hard_iface *primary_iface,
void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface);
int batadv_v_elp_packet_recv(struct sk_buff *skb,
struct batadv_hard_iface *if_incoming);
-void batadv_v_elp_throughput_metric_update(struct work_struct *work);
#endif /* _NET_BATMAN_ADV_BAT_V_ELP_H_ */
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 04f6398b3a40..85a50096f5b2 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -596,9 +596,6 @@ struct batadv_hardif_neigh_node_bat_v {
* neighbor
*/
unsigned long last_unicast_tx;
-
- /** @metric_work: work queue callback item for metric update */
- struct work_struct metric_work;
};
/**
The quilt patch titled
Subject: panic: call panic handlers before panic_other_cpus_shutdown()
has been removed from the -mm tree. Its filename was
panic-call-panic-handlers-before-panic_other_cpus_shutdown.patch
This patch was dropped because an updated version will be issued
------------------------------------------------------
From: Hamza Mahfooz <hamzamahfooz(a)linux.microsoft.com>
Subject: panic: call panic handlers before panic_other_cpus_shutdown()
Date: Fri, 21 Feb 2025 16:30:52 -0500
Since the panic handlers may require certain cpus to be online to panic
gracefully, we should call them before turning off SMP. Without this
re-ordering, on Hyper-V hv_panic_vmbus_unload() times out, because the
vmbus channel is bound to VMBUS_CONNECT_CPU and unless the crashing cpu is
the same as VMBUS_CONNECT_CPU, VMBUS_CONNECT_CPU will be offlined by
crash_smp_send_stop() before the vmbus channel can be deconstructed.
Link: https://lkml.kernel.org/r/20250221213055.133849-1-hamzamahfooz@linux.micros…
Signed-off-by: Hamza Mahfooz <hamzamahfooz(a)linux.microsoft.com>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Baoquan he <bhe(a)redhat.com>
Cc: Dexuan Cui <decui(a)microsoft.com>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Haiyang Zhang <haiyangz(a)microsoft.com>
Cc: Hamza Mahfooz <hamzamahfooz(a)linux.microsoft.com>
Cc: Jani Nikula <jani.nikula(a)intel.com>
Cc: John Ogness <john.ogness(a)linutronix.de>
Cc: Petr Mladek <pmladek(a)suse.com>
Cc: Ryo Takakura <takakura(a)valinux.co.jp>
Cc: Wei Liu <wei.liu(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/panic.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
--- a/kernel/panic.c~panic-call-panic-handlers-before-panic_other_cpus_shutdown
+++ a/kernel/panic.c
@@ -372,16 +372,16 @@ void panic(const char *fmt, ...)
if (!_crash_kexec_post_notifiers)
__crash_kexec(NULL);
- panic_other_cpus_shutdown(_crash_kexec_post_notifiers);
-
- printk_legacy_allow_panic_sync();
-
/*
* Run any panic handlers, including those that might need to
* add information to the kmsg dump output.
*/
atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
+ panic_other_cpus_shutdown(_crash_kexec_post_notifiers);
+
+ printk_legacy_allow_panic_sync();
+
panic_print_sys_info(false);
kmsg_dump_desc(KMSG_DUMP_PANIC, buf);
_
Patches currently in -mm which might be from hamzamahfooz(a)linux.microsoft.com are
The patch titled
Subject: panic: call panic handlers before panic_other_cpus_shutdown()
has been added to the -mm mm-nonmm-unstable branch. Its filename is
panic-call-panic-handlers-before-panic_other_cpus_shutdown.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-nonmm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Hamza Mahfooz <hamzamahfooz(a)linux.microsoft.com>
Subject: panic: call panic handlers before panic_other_cpus_shutdown()
Date: Fri, 21 Feb 2025 16:30:52 -0500
Since the panic handlers may require certain cpus to be online to panic
gracefully, we should call them before turning off SMP. Without this
re-ordering, on Hyper-V hv_panic_vmbus_unload() times out, because the
vmbus channel is bound to VMBUS_CONNECT_CPU and unless the crashing cpu is
the same as VMBUS_CONNECT_CPU, VMBUS_CONNECT_CPU will be offlined by
crash_smp_send_stop() before the vmbus channel can be deconstructed.
Link: https://lkml.kernel.org/r/20250221213055.133849-1-hamzamahfooz@linux.micros…
Signed-off-by: Hamza Mahfooz <hamzamahfooz(a)linux.microsoft.com>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Baoquan he <bhe(a)redhat.com>
Cc: Dexuan Cui <decui(a)microsoft.com>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Haiyang Zhang <haiyangz(a)microsoft.com>
Cc: Hamza Mahfooz <hamzamahfooz(a)linux.microsoft.com>
Cc: Jani Nikula <jani.nikula(a)intel.com>
Cc: John Ogness <john.ogness(a)linutronix.de>
Cc: Petr Mladek <pmladek(a)suse.com>
Cc: Ryo Takakura <takakura(a)valinux.co.jp>
Cc: Wei Liu <wei.liu(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/panic.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
--- a/kernel/panic.c~panic-call-panic-handlers-before-panic_other_cpus_shutdown
+++ a/kernel/panic.c
@@ -372,16 +372,16 @@ void panic(const char *fmt, ...)
if (!_crash_kexec_post_notifiers)
__crash_kexec(NULL);
- panic_other_cpus_shutdown(_crash_kexec_post_notifiers);
-
- printk_legacy_allow_panic_sync();
-
/*
* Run any panic handlers, including those that might need to
* add information to the kmsg dump output.
*/
atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
+ panic_other_cpus_shutdown(_crash_kexec_post_notifiers);
+
+ printk_legacy_allow_panic_sync();
+
panic_print_sys_info(false);
kmsg_dump_desc(KMSG_DUMP_PANIC, buf);
_
Patches currently in -mm which might be from hamzamahfooz(a)linux.microsoft.com are
panic-call-panic-handlers-before-panic_other_cpus_shutdown.patch
From: Chen-Yu Tsai <wens(a)csie.org>
The DWMAC 1000 DMA capabilities register does not provide actual
FIFO sizes, nor does the driver really care. If they are not
provided via some other means, the driver will work fine, only
disallowing changing the MTU setting.
The recent commit 8865d22656b4 ("net: stmmac: Specify hardware
capability value when FIFO size isn't specified") changed this by
requiring the FIFO sizes to be provided, breaking devices that were
working just fine.
Provide the FIFO sizes through the driver's platform data, to not
only fix the breakage, but also enable MTU changes. The FIFO sizes
are confirmed to be the same across RK3288, RK3328, RK3399 and PX30,
based on their respective manuals. It is likely that Rockchip
synthesized their DWMAC 1000 with the same parameters on all their
chips that have it.
Fixes: eaf4fac47807 ("net: stmmac: Do not accept invalid MTU values")
Fixes: 8865d22656b4 ("net: stmmac: Specify hardware capability value when FIFO size isn't specified")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Chen-Yu Tsai <wens(a)csie.org>
---
(Resending to net-next instead of netdev.)
The commit that broke things has already been reverted in netdev.
The reason for stable inclusion is not to fix the device breakage
(which only broke in v6.14-rc1), but to provide the values so that MTU
changes can work in older kernels.
drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index a4dc89e23a68..71a4c4967467 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -1966,8 +1966,11 @@ static int rk_gmac_probe(struct platform_device *pdev)
/* If the stmmac is not already selected as gmac4,
* then make sure we fallback to gmac.
*/
- if (!plat_dat->has_gmac4)
+ if (!plat_dat->has_gmac4) {
plat_dat->has_gmac = true;
+ plat_dat->rx_fifo_size = 4096;
+ plat_dat->tx_fifo_size = 2048;
+ }
plat_dat->fix_mac_speed = rk_fix_speed;
plat_dat->bsp_priv = rk_gmac_setup(pdev, plat_dat, data);
--
2.39.5
Add check for the return value of get_zeroed_page() in
sclp_console_init() to prevent null pointer dereference.
Furthermore, to solve the memory leak caused by the loop
allocation, add a free helper to do the free job.
Fixes: 4c8f4794b61e ("[S390] sclp console: convert from bootmem to slab")
Cc: stable(a)vger.kernel.org
Signed-off-by: Haoxiang Li <haoxiang_li2024(a)163.com>
---
Changes in v2:
- Add a free helper to solve the memory leak caused by loop allocation.
- Thanks Heiko! I realized that v1 patch overlooked a potential memory leak.
After consideration, I choose to do the full exercise. I noticed a similar
handling in [1], following that handling I submit this v2 patch. Thanks again!
Reference link:
[1]https://github.com/torvalds/linux/blob/master/drivers/s390/char/sclp_vt22…
---
drivers/s390/char/sclp_con.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/drivers/s390/char/sclp_con.c b/drivers/s390/char/sclp_con.c
index e5d947c763ea..c87b0c204718 100644
--- a/drivers/s390/char/sclp_con.c
+++ b/drivers/s390/char/sclp_con.c
@@ -263,6 +263,19 @@ static struct console sclp_console =
.index = 0 /* ttyS0 */
};
+/*
+ * Release allocated pages.
+ */
+static void __init __sclp_console_free_pages(void)
+{
+ struct list_head *page, *p;
+
+ list_for_each_safe(page, p, &sclp_con_pages) {
+ list_del(page);
+ free_page((unsigned long) page);
+ }
+}
+
/*
* called by console_init() in drivers/char/tty_io.c at boot-time.
*/
@@ -282,6 +295,10 @@ sclp_console_init(void)
/* Allocate pages for output buffering */
for (i = 0; i < sclp_console_pages; i++) {
page = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+ if (!page) {
+ __sclp_console_free_pages();
+ return -ENOMEM;
+ }
list_add_tail(page, &sclp_con_pages);
}
sclp_conbuf = NULL;
--
2.25.1
Dear Maintainers,
I would like to bring to your attention the need for a backport of several patches to the 5.10.X kernel to address issues with self-tests for rsa-caam. In kernel version 5.10.231-rt123, the introduction of commit dead96e1c748ff84ecac83ea3c5a4d7a2e57e051 (crypto: caam - add error check to caam_rsa_set_priv_key_form), which added checks for memory allocation errors, has caused the self-test for rsa-caam to fail in FIPS mode, resulting in the following error message:
alg: akcipher: test 1 failed for rsa-caam, err=-12
Kernel panic - not syncing:
alg: self-tests for rsa-caam (rsa) failed in fips mode!
The following patches should be backported to resolve this issue:
8aaa4044999863199124991dfa489fd248d73789 (crypto: testmgr - some more fixes to RSA test vectors)
d824c61df41758f8c045e9522e850b615ee0ca1c (crypto: testmgr - populate RSA CRT parameters in RSA test vectors)
ceb31f1c4c6894c4f9e65f4381781917a7a4c898 (crypto: testmgr - fix version number of RSA tests)
88c2d62e7920edb50661656c85932b5cd100069b (crypto: testmgr - Fix wrong test case of RSA)
1040cf9c9e7518600e7fcc24764d1c4b8a1b62f5 (crypto: testmgr - fix wrong key length for pkcs1pad)
Thank you for your attention to this matter.
Regards,
Kamila Sionek
General Business
TSC could be reset in deep ACPI sleep states, even with invariant TSC.
That's the reason we have sched_clock() save/restore functions, to deal
with this situation. But happens that such functions are guarded with a
check for the stability of sched_clock - if not considered stable, the
save/restore routines aren't executed.
On top of that, we have a clear comment on native_sched_clock() saying
that *even* with TSC unstable, we continue using TSC for sched_clock due
to its speed. In other words, if we have a situation of TSC getting
detected as unstable, it marks the sched_clock as unstable as well,
so subsequent S3 sleep cycles could bring bogus sched_clock values due
to the lack of the save/restore mechanism, causing warnings like this:
[22.954918] ------------[ cut here ]------------
[22.954923] Delta way too big! 18446743750843854390 ts=18446744072977390405 before=322133536015 after=322133536015 write stamp=18446744072977390405
[22.954923] If you just came from a suspend/resume,
[22.954923] please switch to the trace global clock:
[22.954923] echo global > /sys/kernel/tracing/trace_clock
[22.954923] or add trace_clock=global to the kernel command line
[22.954937] WARNING: CPU: 2 PID: 5728 at kernel/trace/ring_buffer.c:2890 rb_add_timestamp+0x193/0x1c0
Notice that the above was reproduced even with "trace_clock=global".
The fix for that is to _always_ save/restore the sched_clock on suspend
cycle _if TSC is used_ as sched_clock - only if we fallback to jiffies
the sched_clock_stable() check becomes relevant to save/restore the
sched_clock.
Cc: stable(a)vger.kernel.org
Signed-off-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com>
---
arch/x86/kernel/tsc.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 34dec0b72ea8..88e5a4ed9db3 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -959,7 +959,7 @@ static unsigned long long cyc2ns_suspend;
void tsc_save_sched_clock_state(void)
{
- if (!sched_clock_stable())
+ if (!static_branch_likely(&__use_tsc) && !sched_clock_stable())
return;
cyc2ns_suspend = sched_clock();
@@ -979,7 +979,7 @@ void tsc_restore_sched_clock_state(void)
unsigned long flags;
int cpu;
- if (!sched_clock_stable())
+ if (!static_branch_likely(&__use_tsc) && !sched_clock_stable())
return;
local_irq_save(flags);
--
2.47.1
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 8c8ecc98f5c65947b0070a24bac11e12e47cc65d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021839-primal-stiffen-ba9e@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8c8ecc98f5c65947b0070a24bac11e12e47cc65d Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven(a)narfation.org>
Date: Mon, 20 Jan 2025 00:06:11 +0100
Subject: [PATCH] batman-adv: Drop unmanaged ELP metric worker
The ELP worker needs to calculate new metric values for all neighbors
"reachable" over an interface. Some of the used metric sources require
locks which might need to sleep. This sleep is incompatible with the RCU
list iterator used for the recorded neighbors. The initial approach to work
around of this problem was to queue another work item per neighbor and then
run this in a new context.
Even when this solved the RCU vs might_sleep() conflict, it has a major
problems: Nothing was stopping the work item in case it is not needed
anymore - for example because one of the related interfaces was removed or
the batman-adv module was unloaded - resulting in potential invalid memory
accesses.
Directly canceling the metric worker also has various problems:
* cancel_work_sync for a to-be-deactivated interface is called with
rtnl_lock held. But the code in the ELP metric worker also tries to use
rtnl_lock() - which will never return in this case. This also means that
cancel_work_sync would never return because it is waiting for the worker
to finish.
* iterating over the neighbor list for the to-be-deactivated interface is
currently done using the RCU specific methods. Which means that it is
possible to miss items when iterating over it without the associated
spinlock - a behaviour which is acceptable for a periodic metric check
but not for a cleanup routine (which must "stop" all still running
workers)
The better approch is to get rid of the per interface neighbor metric
worker and handle everything in the interface worker. The original problems
are solved by:
* creating a list of neighbors which require new metric information inside
the RCU protected context, gathering the metric according to the new list
outside the RCU protected context
* only use rcu_trylock inside metric gathering code to avoid a deadlock
when the cancel_delayed_work_sync is called in the interface removal code
(which is called with the rtnl_lock held)
Cc: stable(a)vger.kernel.org
Fixes: c833484e5f38 ("batman-adv: ELP - compute the metric based on the estimated throughput")
Signed-off-by: Sven Eckelmann <sven(a)narfation.org>
Signed-off-by: Simon Wunderlich <sw(a)simonwunderlich.de>
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index ac11f1f08db0..d35479c465e2 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -113,8 +113,6 @@ static void
batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh)
{
ewma_throughput_init(&hardif_neigh->bat_v.throughput);
- INIT_WORK(&hardif_neigh->bat_v.metric_work,
- batadv_v_elp_throughput_metric_update);
}
/**
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 65e52de52bcd..b065578b4436 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -18,6 +18,7 @@
#include <linux/if_ether.h>
#include <linux/jiffies.h>
#include <linux/kref.h>
+#include <linux/list.h>
#include <linux/minmax.h>
#include <linux/netdevice.h>
#include <linux/nl80211.h>
@@ -26,6 +27,7 @@
#include <linux/rcupdate.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
+#include <linux/slab.h>
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/types.h>
@@ -41,6 +43,18 @@
#include "routing.h"
#include "send.h"
+/**
+ * struct batadv_v_metric_queue_entry - list of hardif neighbors which require
+ * and metric update
+ */
+struct batadv_v_metric_queue_entry {
+ /** @hardif_neigh: hardif neighbor scheduled for metric update */
+ struct batadv_hardif_neigh_node *hardif_neigh;
+
+ /** @list: list node for metric_queue */
+ struct list_head list;
+};
+
/**
* batadv_v_elp_start_timer() - restart timer for ELP periodic work
* @hard_iface: the interface for which the timer has to be reset
@@ -137,10 +151,17 @@ static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh,
goto default_throughput;
}
+ /* only use rtnl_trylock because the elp worker will be cancelled while
+ * the rntl_lock is held. the cancel_delayed_work_sync() would otherwise
+ * wait forever when the elp work_item was started and it is then also
+ * trying to rtnl_lock
+ */
+ if (!rtnl_trylock())
+ return false;
+
/* if not a wifi interface, check if this device provides data via
* ethtool (e.g. an Ethernet adapter)
*/
- rtnl_lock();
ret = __ethtool_get_link_ksettings(hard_iface->net_dev, &link_settings);
rtnl_unlock();
if (ret == 0) {
@@ -175,31 +196,19 @@ static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh,
/**
* batadv_v_elp_throughput_metric_update() - worker updating the throughput
* metric of a single hop neighbour
- * @work: the work queue item
+ * @neigh: the neighbour to probe
*/
-void batadv_v_elp_throughput_metric_update(struct work_struct *work)
+static void
+batadv_v_elp_throughput_metric_update(struct batadv_hardif_neigh_node *neigh)
{
- struct batadv_hardif_neigh_node_bat_v *neigh_bat_v;
- struct batadv_hardif_neigh_node *neigh;
u32 throughput;
bool valid;
- neigh_bat_v = container_of(work, struct batadv_hardif_neigh_node_bat_v,
- metric_work);
- neigh = container_of(neigh_bat_v, struct batadv_hardif_neigh_node,
- bat_v);
-
valid = batadv_v_elp_get_throughput(neigh, &throughput);
if (!valid)
- goto put_neigh;
+ return;
ewma_throughput_add(&neigh->bat_v.throughput, throughput);
-
-put_neigh:
- /* decrement refcounter to balance increment performed before scheduling
- * this task
- */
- batadv_hardif_neigh_put(neigh);
}
/**
@@ -273,14 +282,16 @@ batadv_v_elp_wifi_neigh_probe(struct batadv_hardif_neigh_node *neigh)
*/
static void batadv_v_elp_periodic_work(struct work_struct *work)
{
+ struct batadv_v_metric_queue_entry *metric_entry;
+ struct batadv_v_metric_queue_entry *metric_safe;
struct batadv_hardif_neigh_node *hardif_neigh;
struct batadv_hard_iface *hard_iface;
struct batadv_hard_iface_bat_v *bat_v;
struct batadv_elp_packet *elp_packet;
+ struct list_head metric_queue;
struct batadv_priv *bat_priv;
struct sk_buff *skb;
u32 elp_interval;
- bool ret;
bat_v = container_of(work, struct batadv_hard_iface_bat_v, elp_wq.work);
hard_iface = container_of(bat_v, struct batadv_hard_iface, bat_v);
@@ -316,6 +327,8 @@ static void batadv_v_elp_periodic_work(struct work_struct *work)
atomic_inc(&hard_iface->bat_v.elp_seqno);
+ INIT_LIST_HEAD(&metric_queue);
+
/* The throughput metric is updated on each sent packet. This way, if a
* node is dead and no longer sends packets, batman-adv is still able to
* react timely to its death.
@@ -340,16 +353,28 @@ static void batadv_v_elp_periodic_work(struct work_struct *work)
/* Reading the estimated throughput from cfg80211 is a task that
* may sleep and that is not allowed in an rcu protected
- * context. Therefore schedule a task for that.
+ * context. Therefore add it to metric_queue and process it
+ * outside rcu protected context.
*/
- ret = queue_work(batadv_event_workqueue,
- &hardif_neigh->bat_v.metric_work);
-
- if (!ret)
+ metric_entry = kzalloc(sizeof(*metric_entry), GFP_ATOMIC);
+ if (!metric_entry) {
batadv_hardif_neigh_put(hardif_neigh);
+ continue;
+ }
+
+ metric_entry->hardif_neigh = hardif_neigh;
+ list_add(&metric_entry->list, &metric_queue);
}
rcu_read_unlock();
+ list_for_each_entry_safe(metric_entry, metric_safe, &metric_queue, list) {
+ batadv_v_elp_throughput_metric_update(metric_entry->hardif_neigh);
+
+ batadv_hardif_neigh_put(metric_entry->hardif_neigh);
+ list_del(&metric_entry->list);
+ kfree(metric_entry);
+ }
+
restart_timer:
batadv_v_elp_start_timer(hard_iface);
out:
diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h
index 9e2740195fa2..c9cb0a307100 100644
--- a/net/batman-adv/bat_v_elp.h
+++ b/net/batman-adv/bat_v_elp.h
@@ -10,7 +10,6 @@
#include "main.h"
#include <linux/skbuff.h>
-#include <linux/workqueue.h>
int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface);
void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface);
@@ -19,6 +18,5 @@ void batadv_v_elp_iface_activate(struct batadv_hard_iface *primary_iface,
void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface);
int batadv_v_elp_packet_recv(struct sk_buff *skb,
struct batadv_hard_iface *if_incoming);
-void batadv_v_elp_throughput_metric_update(struct work_struct *work);
#endif /* _NET_BATMAN_ADV_BAT_V_ELP_H_ */
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 04f6398b3a40..85a50096f5b2 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -596,9 +596,6 @@ struct batadv_hardif_neigh_node_bat_v {
* neighbor
*/
unsigned long last_unicast_tx;
-
- /** @metric_work: work queue callback item for metric update */
- struct work_struct metric_work;
};
/**