The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 8c8ecc98f5c65947b0070a24bac11e12e47cc65d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021841-atrium-settle-8b58@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8c8ecc98f5c65947b0070a24bac11e12e47cc65d Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven(a)narfation.org>
Date: Mon, 20 Jan 2025 00:06:11 +0100
Subject: [PATCH] batman-adv: Drop unmanaged ELP metric worker
The ELP worker needs to calculate new metric values for all neighbors
"reachable" over an interface. Some of the used metric sources require
locks which might need to sleep. This sleep is incompatible with the RCU
list iterator used for the recorded neighbors. The initial approach to work
around of this problem was to queue another work item per neighbor and then
run this in a new context.
Even when this solved the RCU vs might_sleep() conflict, it has a major
problems: Nothing was stopping the work item in case it is not needed
anymore - for example because one of the related interfaces was removed or
the batman-adv module was unloaded - resulting in potential invalid memory
accesses.
Directly canceling the metric worker also has various problems:
* cancel_work_sync for a to-be-deactivated interface is called with
rtnl_lock held. But the code in the ELP metric worker also tries to use
rtnl_lock() - which will never return in this case. This also means that
cancel_work_sync would never return because it is waiting for the worker
to finish.
* iterating over the neighbor list for the to-be-deactivated interface is
currently done using the RCU specific methods. Which means that it is
possible to miss items when iterating over it without the associated
spinlock - a behaviour which is acceptable for a periodic metric check
but not for a cleanup routine (which must "stop" all still running
workers)
The better approch is to get rid of the per interface neighbor metric
worker and handle everything in the interface worker. The original problems
are solved by:
* creating a list of neighbors which require new metric information inside
the RCU protected context, gathering the metric according to the new list
outside the RCU protected context
* only use rcu_trylock inside metric gathering code to avoid a deadlock
when the cancel_delayed_work_sync is called in the interface removal code
(which is called with the rtnl_lock held)
Cc: stable(a)vger.kernel.org
Fixes: c833484e5f38 ("batman-adv: ELP - compute the metric based on the estimated throughput")
Signed-off-by: Sven Eckelmann <sven(a)narfation.org>
Signed-off-by: Simon Wunderlich <sw(a)simonwunderlich.de>
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index ac11f1f08db0..d35479c465e2 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -113,8 +113,6 @@ static void
batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh)
{
ewma_throughput_init(&hardif_neigh->bat_v.throughput);
- INIT_WORK(&hardif_neigh->bat_v.metric_work,
- batadv_v_elp_throughput_metric_update);
}
/**
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 65e52de52bcd..b065578b4436 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -18,6 +18,7 @@
#include <linux/if_ether.h>
#include <linux/jiffies.h>
#include <linux/kref.h>
+#include <linux/list.h>
#include <linux/minmax.h>
#include <linux/netdevice.h>
#include <linux/nl80211.h>
@@ -26,6 +27,7 @@
#include <linux/rcupdate.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
+#include <linux/slab.h>
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/types.h>
@@ -41,6 +43,18 @@
#include "routing.h"
#include "send.h"
+/**
+ * struct batadv_v_metric_queue_entry - list of hardif neighbors which require
+ * and metric update
+ */
+struct batadv_v_metric_queue_entry {
+ /** @hardif_neigh: hardif neighbor scheduled for metric update */
+ struct batadv_hardif_neigh_node *hardif_neigh;
+
+ /** @list: list node for metric_queue */
+ struct list_head list;
+};
+
/**
* batadv_v_elp_start_timer() - restart timer for ELP periodic work
* @hard_iface: the interface for which the timer has to be reset
@@ -137,10 +151,17 @@ static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh,
goto default_throughput;
}
+ /* only use rtnl_trylock because the elp worker will be cancelled while
+ * the rntl_lock is held. the cancel_delayed_work_sync() would otherwise
+ * wait forever when the elp work_item was started and it is then also
+ * trying to rtnl_lock
+ */
+ if (!rtnl_trylock())
+ return false;
+
/* if not a wifi interface, check if this device provides data via
* ethtool (e.g. an Ethernet adapter)
*/
- rtnl_lock();
ret = __ethtool_get_link_ksettings(hard_iface->net_dev, &link_settings);
rtnl_unlock();
if (ret == 0) {
@@ -175,31 +196,19 @@ static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh,
/**
* batadv_v_elp_throughput_metric_update() - worker updating the throughput
* metric of a single hop neighbour
- * @work: the work queue item
+ * @neigh: the neighbour to probe
*/
-void batadv_v_elp_throughput_metric_update(struct work_struct *work)
+static void
+batadv_v_elp_throughput_metric_update(struct batadv_hardif_neigh_node *neigh)
{
- struct batadv_hardif_neigh_node_bat_v *neigh_bat_v;
- struct batadv_hardif_neigh_node *neigh;
u32 throughput;
bool valid;
- neigh_bat_v = container_of(work, struct batadv_hardif_neigh_node_bat_v,
- metric_work);
- neigh = container_of(neigh_bat_v, struct batadv_hardif_neigh_node,
- bat_v);
-
valid = batadv_v_elp_get_throughput(neigh, &throughput);
if (!valid)
- goto put_neigh;
+ return;
ewma_throughput_add(&neigh->bat_v.throughput, throughput);
-
-put_neigh:
- /* decrement refcounter to balance increment performed before scheduling
- * this task
- */
- batadv_hardif_neigh_put(neigh);
}
/**
@@ -273,14 +282,16 @@ batadv_v_elp_wifi_neigh_probe(struct batadv_hardif_neigh_node *neigh)
*/
static void batadv_v_elp_periodic_work(struct work_struct *work)
{
+ struct batadv_v_metric_queue_entry *metric_entry;
+ struct batadv_v_metric_queue_entry *metric_safe;
struct batadv_hardif_neigh_node *hardif_neigh;
struct batadv_hard_iface *hard_iface;
struct batadv_hard_iface_bat_v *bat_v;
struct batadv_elp_packet *elp_packet;
+ struct list_head metric_queue;
struct batadv_priv *bat_priv;
struct sk_buff *skb;
u32 elp_interval;
- bool ret;
bat_v = container_of(work, struct batadv_hard_iface_bat_v, elp_wq.work);
hard_iface = container_of(bat_v, struct batadv_hard_iface, bat_v);
@@ -316,6 +327,8 @@ static void batadv_v_elp_periodic_work(struct work_struct *work)
atomic_inc(&hard_iface->bat_v.elp_seqno);
+ INIT_LIST_HEAD(&metric_queue);
+
/* The throughput metric is updated on each sent packet. This way, if a
* node is dead and no longer sends packets, batman-adv is still able to
* react timely to its death.
@@ -340,16 +353,28 @@ static void batadv_v_elp_periodic_work(struct work_struct *work)
/* Reading the estimated throughput from cfg80211 is a task that
* may sleep and that is not allowed in an rcu protected
- * context. Therefore schedule a task for that.
+ * context. Therefore add it to metric_queue and process it
+ * outside rcu protected context.
*/
- ret = queue_work(batadv_event_workqueue,
- &hardif_neigh->bat_v.metric_work);
-
- if (!ret)
+ metric_entry = kzalloc(sizeof(*metric_entry), GFP_ATOMIC);
+ if (!metric_entry) {
batadv_hardif_neigh_put(hardif_neigh);
+ continue;
+ }
+
+ metric_entry->hardif_neigh = hardif_neigh;
+ list_add(&metric_entry->list, &metric_queue);
}
rcu_read_unlock();
+ list_for_each_entry_safe(metric_entry, metric_safe, &metric_queue, list) {
+ batadv_v_elp_throughput_metric_update(metric_entry->hardif_neigh);
+
+ batadv_hardif_neigh_put(metric_entry->hardif_neigh);
+ list_del(&metric_entry->list);
+ kfree(metric_entry);
+ }
+
restart_timer:
batadv_v_elp_start_timer(hard_iface);
out:
diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h
index 9e2740195fa2..c9cb0a307100 100644
--- a/net/batman-adv/bat_v_elp.h
+++ b/net/batman-adv/bat_v_elp.h
@@ -10,7 +10,6 @@
#include "main.h"
#include <linux/skbuff.h>
-#include <linux/workqueue.h>
int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface);
void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface);
@@ -19,6 +18,5 @@ void batadv_v_elp_iface_activate(struct batadv_hard_iface *primary_iface,
void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface);
int batadv_v_elp_packet_recv(struct sk_buff *skb,
struct batadv_hard_iface *if_incoming);
-void batadv_v_elp_throughput_metric_update(struct work_struct *work);
#endif /* _NET_BATMAN_ADV_BAT_V_ELP_H_ */
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 04f6398b3a40..85a50096f5b2 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -596,9 +596,6 @@ struct batadv_hardif_neigh_node_bat_v {
* neighbor
*/
unsigned long last_unicast_tx;
-
- /** @metric_work: work queue callback item for metric update */
- struct work_struct metric_work;
};
/**
The quilt patch titled
Subject: panic: call panic handlers before panic_other_cpus_shutdown()
has been removed from the -mm tree. Its filename was
panic-call-panic-handlers-before-panic_other_cpus_shutdown.patch
This patch was dropped because an updated version will be issued
------------------------------------------------------
From: Hamza Mahfooz <hamzamahfooz(a)linux.microsoft.com>
Subject: panic: call panic handlers before panic_other_cpus_shutdown()
Date: Fri, 21 Feb 2025 16:30:52 -0500
Since the panic handlers may require certain cpus to be online to panic
gracefully, we should call them before turning off SMP. Without this
re-ordering, on Hyper-V hv_panic_vmbus_unload() times out, because the
vmbus channel is bound to VMBUS_CONNECT_CPU and unless the crashing cpu is
the same as VMBUS_CONNECT_CPU, VMBUS_CONNECT_CPU will be offlined by
crash_smp_send_stop() before the vmbus channel can be deconstructed.
Link: https://lkml.kernel.org/r/20250221213055.133849-1-hamzamahfooz@linux.micros…
Signed-off-by: Hamza Mahfooz <hamzamahfooz(a)linux.microsoft.com>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Baoquan he <bhe(a)redhat.com>
Cc: Dexuan Cui <decui(a)microsoft.com>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Haiyang Zhang <haiyangz(a)microsoft.com>
Cc: Hamza Mahfooz <hamzamahfooz(a)linux.microsoft.com>
Cc: Jani Nikula <jani.nikula(a)intel.com>
Cc: John Ogness <john.ogness(a)linutronix.de>
Cc: Petr Mladek <pmladek(a)suse.com>
Cc: Ryo Takakura <takakura(a)valinux.co.jp>
Cc: Wei Liu <wei.liu(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/panic.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
--- a/kernel/panic.c~panic-call-panic-handlers-before-panic_other_cpus_shutdown
+++ a/kernel/panic.c
@@ -372,16 +372,16 @@ void panic(const char *fmt, ...)
if (!_crash_kexec_post_notifiers)
__crash_kexec(NULL);
- panic_other_cpus_shutdown(_crash_kexec_post_notifiers);
-
- printk_legacy_allow_panic_sync();
-
/*
* Run any panic handlers, including those that might need to
* add information to the kmsg dump output.
*/
atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
+ panic_other_cpus_shutdown(_crash_kexec_post_notifiers);
+
+ printk_legacy_allow_panic_sync();
+
panic_print_sys_info(false);
kmsg_dump_desc(KMSG_DUMP_PANIC, buf);
_
Patches currently in -mm which might be from hamzamahfooz(a)linux.microsoft.com are
The patch titled
Subject: panic: call panic handlers before panic_other_cpus_shutdown()
has been added to the -mm mm-nonmm-unstable branch. Its filename is
panic-call-panic-handlers-before-panic_other_cpus_shutdown.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-nonmm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Hamza Mahfooz <hamzamahfooz(a)linux.microsoft.com>
Subject: panic: call panic handlers before panic_other_cpus_shutdown()
Date: Fri, 21 Feb 2025 16:30:52 -0500
Since the panic handlers may require certain cpus to be online to panic
gracefully, we should call them before turning off SMP. Without this
re-ordering, on Hyper-V hv_panic_vmbus_unload() times out, because the
vmbus channel is bound to VMBUS_CONNECT_CPU and unless the crashing cpu is
the same as VMBUS_CONNECT_CPU, VMBUS_CONNECT_CPU will be offlined by
crash_smp_send_stop() before the vmbus channel can be deconstructed.
Link: https://lkml.kernel.org/r/20250221213055.133849-1-hamzamahfooz@linux.micros…
Signed-off-by: Hamza Mahfooz <hamzamahfooz(a)linux.microsoft.com>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Baoquan he <bhe(a)redhat.com>
Cc: Dexuan Cui <decui(a)microsoft.com>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Haiyang Zhang <haiyangz(a)microsoft.com>
Cc: Hamza Mahfooz <hamzamahfooz(a)linux.microsoft.com>
Cc: Jani Nikula <jani.nikula(a)intel.com>
Cc: John Ogness <john.ogness(a)linutronix.de>
Cc: Petr Mladek <pmladek(a)suse.com>
Cc: Ryo Takakura <takakura(a)valinux.co.jp>
Cc: Wei Liu <wei.liu(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/panic.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
--- a/kernel/panic.c~panic-call-panic-handlers-before-panic_other_cpus_shutdown
+++ a/kernel/panic.c
@@ -372,16 +372,16 @@ void panic(const char *fmt, ...)
if (!_crash_kexec_post_notifiers)
__crash_kexec(NULL);
- panic_other_cpus_shutdown(_crash_kexec_post_notifiers);
-
- printk_legacy_allow_panic_sync();
-
/*
* Run any panic handlers, including those that might need to
* add information to the kmsg dump output.
*/
atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
+ panic_other_cpus_shutdown(_crash_kexec_post_notifiers);
+
+ printk_legacy_allow_panic_sync();
+
panic_print_sys_info(false);
kmsg_dump_desc(KMSG_DUMP_PANIC, buf);
_
Patches currently in -mm which might be from hamzamahfooz(a)linux.microsoft.com are
panic-call-panic-handlers-before-panic_other_cpus_shutdown.patch
From: Chen-Yu Tsai <wens(a)csie.org>
The DWMAC 1000 DMA capabilities register does not provide actual
FIFO sizes, nor does the driver really care. If they are not
provided via some other means, the driver will work fine, only
disallowing changing the MTU setting.
The recent commit 8865d22656b4 ("net: stmmac: Specify hardware
capability value when FIFO size isn't specified") changed this by
requiring the FIFO sizes to be provided, breaking devices that were
working just fine.
Provide the FIFO sizes through the driver's platform data, to not
only fix the breakage, but also enable MTU changes. The FIFO sizes
are confirmed to be the same across RK3288, RK3328, RK3399 and PX30,
based on their respective manuals. It is likely that Rockchip
synthesized their DWMAC 1000 with the same parameters on all their
chips that have it.
Fixes: eaf4fac47807 ("net: stmmac: Do not accept invalid MTU values")
Fixes: 8865d22656b4 ("net: stmmac: Specify hardware capability value when FIFO size isn't specified")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Chen-Yu Tsai <wens(a)csie.org>
---
(Resending to net-next instead of netdev.)
The commit that broke things has already been reverted in netdev.
The reason for stable inclusion is not to fix the device breakage
(which only broke in v6.14-rc1), but to provide the values so that MTU
changes can work in older kernels.
drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index a4dc89e23a68..71a4c4967467 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -1966,8 +1966,11 @@ static int rk_gmac_probe(struct platform_device *pdev)
/* If the stmmac is not already selected as gmac4,
* then make sure we fallback to gmac.
*/
- if (!plat_dat->has_gmac4)
+ if (!plat_dat->has_gmac4) {
plat_dat->has_gmac = true;
+ plat_dat->rx_fifo_size = 4096;
+ plat_dat->tx_fifo_size = 2048;
+ }
plat_dat->fix_mac_speed = rk_fix_speed;
plat_dat->bsp_priv = rk_gmac_setup(pdev, plat_dat, data);
--
2.39.5
Add check for the return value of get_zeroed_page() in
sclp_console_init() to prevent null pointer dereference.
Furthermore, to solve the memory leak caused by the loop
allocation, add a free helper to do the free job.
Fixes: 4c8f4794b61e ("[S390] sclp console: convert from bootmem to slab")
Cc: stable(a)vger.kernel.org
Signed-off-by: Haoxiang Li <haoxiang_li2024(a)163.com>
---
Changes in v2:
- Add a free helper to solve the memory leak caused by loop allocation.
- Thanks Heiko! I realized that v1 patch overlooked a potential memory leak.
After consideration, I choose to do the full exercise. I noticed a similar
handling in [1], following that handling I submit this v2 patch. Thanks again!
Reference link:
[1]https://github.com/torvalds/linux/blob/master/drivers/s390/char/sclp_vt22…
---
drivers/s390/char/sclp_con.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/drivers/s390/char/sclp_con.c b/drivers/s390/char/sclp_con.c
index e5d947c763ea..c87b0c204718 100644
--- a/drivers/s390/char/sclp_con.c
+++ b/drivers/s390/char/sclp_con.c
@@ -263,6 +263,19 @@ static struct console sclp_console =
.index = 0 /* ttyS0 */
};
+/*
+ * Release allocated pages.
+ */
+static void __init __sclp_console_free_pages(void)
+{
+ struct list_head *page, *p;
+
+ list_for_each_safe(page, p, &sclp_con_pages) {
+ list_del(page);
+ free_page((unsigned long) page);
+ }
+}
+
/*
* called by console_init() in drivers/char/tty_io.c at boot-time.
*/
@@ -282,6 +295,10 @@ sclp_console_init(void)
/* Allocate pages for output buffering */
for (i = 0; i < sclp_console_pages; i++) {
page = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+ if (!page) {
+ __sclp_console_free_pages();
+ return -ENOMEM;
+ }
list_add_tail(page, &sclp_con_pages);
}
sclp_conbuf = NULL;
--
2.25.1
Dear Maintainers,
I would like to bring to your attention the need for a backport of several patches to the 5.10.X kernel to address issues with self-tests for rsa-caam. In kernel version 5.10.231-rt123, the introduction of commit dead96e1c748ff84ecac83ea3c5a4d7a2e57e051 (crypto: caam - add error check to caam_rsa_set_priv_key_form), which added checks for memory allocation errors, has caused the self-test for rsa-caam to fail in FIPS mode, resulting in the following error message:
alg: akcipher: test 1 failed for rsa-caam, err=-12
Kernel panic - not syncing:
alg: self-tests for rsa-caam (rsa) failed in fips mode!
The following patches should be backported to resolve this issue:
8aaa4044999863199124991dfa489fd248d73789 (crypto: testmgr - some more fixes to RSA test vectors)
d824c61df41758f8c045e9522e850b615ee0ca1c (crypto: testmgr - populate RSA CRT parameters in RSA test vectors)
ceb31f1c4c6894c4f9e65f4381781917a7a4c898 (crypto: testmgr - fix version number of RSA tests)
88c2d62e7920edb50661656c85932b5cd100069b (crypto: testmgr - Fix wrong test case of RSA)
1040cf9c9e7518600e7fcc24764d1c4b8a1b62f5 (crypto: testmgr - fix wrong key length for pkcs1pad)
Thank you for your attention to this matter.
Regards,
Kamila Sionek
General Business
TSC could be reset in deep ACPI sleep states, even with invariant TSC.
That's the reason we have sched_clock() save/restore functions, to deal
with this situation. But happens that such functions are guarded with a
check for the stability of sched_clock - if not considered stable, the
save/restore routines aren't executed.
On top of that, we have a clear comment on native_sched_clock() saying
that *even* with TSC unstable, we continue using TSC for sched_clock due
to its speed. In other words, if we have a situation of TSC getting
detected as unstable, it marks the sched_clock as unstable as well,
so subsequent S3 sleep cycles could bring bogus sched_clock values due
to the lack of the save/restore mechanism, causing warnings like this:
[22.954918] ------------[ cut here ]------------
[22.954923] Delta way too big! 18446743750843854390 ts=18446744072977390405 before=322133536015 after=322133536015 write stamp=18446744072977390405
[22.954923] If you just came from a suspend/resume,
[22.954923] please switch to the trace global clock:
[22.954923] echo global > /sys/kernel/tracing/trace_clock
[22.954923] or add trace_clock=global to the kernel command line
[22.954937] WARNING: CPU: 2 PID: 5728 at kernel/trace/ring_buffer.c:2890 rb_add_timestamp+0x193/0x1c0
Notice that the above was reproduced even with "trace_clock=global".
The fix for that is to _always_ save/restore the sched_clock on suspend
cycle _if TSC is used_ as sched_clock - only if we fallback to jiffies
the sched_clock_stable() check becomes relevant to save/restore the
sched_clock.
Cc: stable(a)vger.kernel.org
Signed-off-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com>
---
arch/x86/kernel/tsc.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 34dec0b72ea8..88e5a4ed9db3 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -959,7 +959,7 @@ static unsigned long long cyc2ns_suspend;
void tsc_save_sched_clock_state(void)
{
- if (!sched_clock_stable())
+ if (!static_branch_likely(&__use_tsc) && !sched_clock_stable())
return;
cyc2ns_suspend = sched_clock();
@@ -979,7 +979,7 @@ void tsc_restore_sched_clock_state(void)
unsigned long flags;
int cpu;
- if (!sched_clock_stable())
+ if (!static_branch_likely(&__use_tsc) && !sched_clock_stable())
return;
local_irq_save(flags);
--
2.47.1
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 8c8ecc98f5c65947b0070a24bac11e12e47cc65d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021839-primal-stiffen-ba9e@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8c8ecc98f5c65947b0070a24bac11e12e47cc65d Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven(a)narfation.org>
Date: Mon, 20 Jan 2025 00:06:11 +0100
Subject: [PATCH] batman-adv: Drop unmanaged ELP metric worker
The ELP worker needs to calculate new metric values for all neighbors
"reachable" over an interface. Some of the used metric sources require
locks which might need to sleep. This sleep is incompatible with the RCU
list iterator used for the recorded neighbors. The initial approach to work
around of this problem was to queue another work item per neighbor and then
run this in a new context.
Even when this solved the RCU vs might_sleep() conflict, it has a major
problems: Nothing was stopping the work item in case it is not needed
anymore - for example because one of the related interfaces was removed or
the batman-adv module was unloaded - resulting in potential invalid memory
accesses.
Directly canceling the metric worker also has various problems:
* cancel_work_sync for a to-be-deactivated interface is called with
rtnl_lock held. But the code in the ELP metric worker also tries to use
rtnl_lock() - which will never return in this case. This also means that
cancel_work_sync would never return because it is waiting for the worker
to finish.
* iterating over the neighbor list for the to-be-deactivated interface is
currently done using the RCU specific methods. Which means that it is
possible to miss items when iterating over it without the associated
spinlock - a behaviour which is acceptable for a periodic metric check
but not for a cleanup routine (which must "stop" all still running
workers)
The better approch is to get rid of the per interface neighbor metric
worker and handle everything in the interface worker. The original problems
are solved by:
* creating a list of neighbors which require new metric information inside
the RCU protected context, gathering the metric according to the new list
outside the RCU protected context
* only use rcu_trylock inside metric gathering code to avoid a deadlock
when the cancel_delayed_work_sync is called in the interface removal code
(which is called with the rtnl_lock held)
Cc: stable(a)vger.kernel.org
Fixes: c833484e5f38 ("batman-adv: ELP - compute the metric based on the estimated throughput")
Signed-off-by: Sven Eckelmann <sven(a)narfation.org>
Signed-off-by: Simon Wunderlich <sw(a)simonwunderlich.de>
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index ac11f1f08db0..d35479c465e2 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -113,8 +113,6 @@ static void
batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh)
{
ewma_throughput_init(&hardif_neigh->bat_v.throughput);
- INIT_WORK(&hardif_neigh->bat_v.metric_work,
- batadv_v_elp_throughput_metric_update);
}
/**
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 65e52de52bcd..b065578b4436 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -18,6 +18,7 @@
#include <linux/if_ether.h>
#include <linux/jiffies.h>
#include <linux/kref.h>
+#include <linux/list.h>
#include <linux/minmax.h>
#include <linux/netdevice.h>
#include <linux/nl80211.h>
@@ -26,6 +27,7 @@
#include <linux/rcupdate.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
+#include <linux/slab.h>
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/types.h>
@@ -41,6 +43,18 @@
#include "routing.h"
#include "send.h"
+/**
+ * struct batadv_v_metric_queue_entry - list of hardif neighbors which require
+ * and metric update
+ */
+struct batadv_v_metric_queue_entry {
+ /** @hardif_neigh: hardif neighbor scheduled for metric update */
+ struct batadv_hardif_neigh_node *hardif_neigh;
+
+ /** @list: list node for metric_queue */
+ struct list_head list;
+};
+
/**
* batadv_v_elp_start_timer() - restart timer for ELP periodic work
* @hard_iface: the interface for which the timer has to be reset
@@ -137,10 +151,17 @@ static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh,
goto default_throughput;
}
+ /* only use rtnl_trylock because the elp worker will be cancelled while
+ * the rntl_lock is held. the cancel_delayed_work_sync() would otherwise
+ * wait forever when the elp work_item was started and it is then also
+ * trying to rtnl_lock
+ */
+ if (!rtnl_trylock())
+ return false;
+
/* if not a wifi interface, check if this device provides data via
* ethtool (e.g. an Ethernet adapter)
*/
- rtnl_lock();
ret = __ethtool_get_link_ksettings(hard_iface->net_dev, &link_settings);
rtnl_unlock();
if (ret == 0) {
@@ -175,31 +196,19 @@ static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh,
/**
* batadv_v_elp_throughput_metric_update() - worker updating the throughput
* metric of a single hop neighbour
- * @work: the work queue item
+ * @neigh: the neighbour to probe
*/
-void batadv_v_elp_throughput_metric_update(struct work_struct *work)
+static void
+batadv_v_elp_throughput_metric_update(struct batadv_hardif_neigh_node *neigh)
{
- struct batadv_hardif_neigh_node_bat_v *neigh_bat_v;
- struct batadv_hardif_neigh_node *neigh;
u32 throughput;
bool valid;
- neigh_bat_v = container_of(work, struct batadv_hardif_neigh_node_bat_v,
- metric_work);
- neigh = container_of(neigh_bat_v, struct batadv_hardif_neigh_node,
- bat_v);
-
valid = batadv_v_elp_get_throughput(neigh, &throughput);
if (!valid)
- goto put_neigh;
+ return;
ewma_throughput_add(&neigh->bat_v.throughput, throughput);
-
-put_neigh:
- /* decrement refcounter to balance increment performed before scheduling
- * this task
- */
- batadv_hardif_neigh_put(neigh);
}
/**
@@ -273,14 +282,16 @@ batadv_v_elp_wifi_neigh_probe(struct batadv_hardif_neigh_node *neigh)
*/
static void batadv_v_elp_periodic_work(struct work_struct *work)
{
+ struct batadv_v_metric_queue_entry *metric_entry;
+ struct batadv_v_metric_queue_entry *metric_safe;
struct batadv_hardif_neigh_node *hardif_neigh;
struct batadv_hard_iface *hard_iface;
struct batadv_hard_iface_bat_v *bat_v;
struct batadv_elp_packet *elp_packet;
+ struct list_head metric_queue;
struct batadv_priv *bat_priv;
struct sk_buff *skb;
u32 elp_interval;
- bool ret;
bat_v = container_of(work, struct batadv_hard_iface_bat_v, elp_wq.work);
hard_iface = container_of(bat_v, struct batadv_hard_iface, bat_v);
@@ -316,6 +327,8 @@ static void batadv_v_elp_periodic_work(struct work_struct *work)
atomic_inc(&hard_iface->bat_v.elp_seqno);
+ INIT_LIST_HEAD(&metric_queue);
+
/* The throughput metric is updated on each sent packet. This way, if a
* node is dead and no longer sends packets, batman-adv is still able to
* react timely to its death.
@@ -340,16 +353,28 @@ static void batadv_v_elp_periodic_work(struct work_struct *work)
/* Reading the estimated throughput from cfg80211 is a task that
* may sleep and that is not allowed in an rcu protected
- * context. Therefore schedule a task for that.
+ * context. Therefore add it to metric_queue and process it
+ * outside rcu protected context.
*/
- ret = queue_work(batadv_event_workqueue,
- &hardif_neigh->bat_v.metric_work);
-
- if (!ret)
+ metric_entry = kzalloc(sizeof(*metric_entry), GFP_ATOMIC);
+ if (!metric_entry) {
batadv_hardif_neigh_put(hardif_neigh);
+ continue;
+ }
+
+ metric_entry->hardif_neigh = hardif_neigh;
+ list_add(&metric_entry->list, &metric_queue);
}
rcu_read_unlock();
+ list_for_each_entry_safe(metric_entry, metric_safe, &metric_queue, list) {
+ batadv_v_elp_throughput_metric_update(metric_entry->hardif_neigh);
+
+ batadv_hardif_neigh_put(metric_entry->hardif_neigh);
+ list_del(&metric_entry->list);
+ kfree(metric_entry);
+ }
+
restart_timer:
batadv_v_elp_start_timer(hard_iface);
out:
diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h
index 9e2740195fa2..c9cb0a307100 100644
--- a/net/batman-adv/bat_v_elp.h
+++ b/net/batman-adv/bat_v_elp.h
@@ -10,7 +10,6 @@
#include "main.h"
#include <linux/skbuff.h>
-#include <linux/workqueue.h>
int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface);
void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface);
@@ -19,6 +18,5 @@ void batadv_v_elp_iface_activate(struct batadv_hard_iface *primary_iface,
void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface);
int batadv_v_elp_packet_recv(struct sk_buff *skb,
struct batadv_hard_iface *if_incoming);
-void batadv_v_elp_throughput_metric_update(struct work_struct *work);
#endif /* _NET_BATMAN_ADV_BAT_V_ELP_H_ */
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 04f6398b3a40..85a50096f5b2 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -596,9 +596,6 @@ struct batadv_hardif_neigh_node_bat_v {
* neighbor
*/
unsigned long last_unicast_tx;
-
- /** @metric_work: work queue callback item for metric update */
- struct work_struct metric_work;
};
/**
On error restore anything still on the pin_list back to the invalidation
list on error. For the actual pin, so long as the vma is tracked on
either list it should get picked up on the next pin, however it looks
possible for the vma to get nuked but still be present on this per vm
pin_list leading to corruption. An alternative might be then to instead
just remove the link when destroying the vma.
v2:
- Also add some asserts.
- Keep the overzealous locking so that we are consistent with the docs;
updating the docs and related bits will be done as a follow up.
Fixes: ed2bdf3b264d ("drm/xe/vm: Subclass userptr vmas")
Suggested-by: Matthew Brost <matthew.brost(a)intel.com>
Signed-off-by: Matthew Auld <matthew.auld(a)intel.com>
Cc: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org> # v6.8+
---
drivers/gpu/drm/xe/xe_vm.c | 28 +++++++++++++++++++++-------
1 file changed, 21 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index d664f2e418b2..3dbfb20a7c60 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -667,15 +667,16 @@ int xe_vm_userptr_pin(struct xe_vm *vm)
/* Collect invalidated userptrs */
spin_lock(&vm->userptr.invalidated_lock);
+ xe_assert(vm->xe, list_empty(&vm->userptr.repin_list));
list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
userptr.invalidate_link) {
list_del_init(&uvma->userptr.invalidate_link);
- list_move_tail(&uvma->userptr.repin_link,
- &vm->userptr.repin_list);
+ list_add_tail(&uvma->userptr.repin_link,
+ &vm->userptr.repin_list);
}
spin_unlock(&vm->userptr.invalidated_lock);
- /* Pin and move to temporary list */
+ /* Pin and move to bind list */
list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
userptr.repin_link) {
err = xe_vma_userptr_pin_pages(uvma);
@@ -691,10 +692,10 @@ int xe_vm_userptr_pin(struct xe_vm *vm)
err = xe_vm_invalidate_vma(&uvma->vma);
xe_vm_unlock(vm);
if (err)
- return err;
+ break;
} else {
- if (err < 0)
- return err;
+ if (err)
+ break;
list_del_init(&uvma->userptr.repin_link);
list_move_tail(&uvma->vma.combined_links.rebind,
@@ -702,7 +703,19 @@ int xe_vm_userptr_pin(struct xe_vm *vm)
}
}
- return 0;
+ if (err) {
+ down_write(&vm->userptr.notifier_lock);
+ spin_lock(&vm->userptr.invalidated_lock);
+ list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
+ userptr.repin_link) {
+ list_del_init(&uvma->userptr.repin_link);
+ list_move_tail(&uvma->userptr.invalidate_link,
+ &vm->userptr.invalidated);
+ }
+ spin_unlock(&vm->userptr.invalidated_lock);
+ up_write(&vm->userptr.notifier_lock);
+ }
+ return err;
}
/**
@@ -1067,6 +1080,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
spin_lock(&vm->userptr.invalidated_lock);
+ xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link));
list_del(&to_userptr_vma(vma)->userptr.invalidate_link);
spin_unlock(&vm->userptr.invalidated_lock);
} else if (!xe_vma_is_null(vma)) {
--
2.48.1
Hi all,
在 2025/2/22 00:48, Mingcong Bai 写道:
> Based on the dmesg messages from the original reporter:
>
> [ 4.964073] ACPI: \_SB_.PCI0.LPCB.EC__.HKEY: BCTG evaluated but flagged as error
> [ 4.964083] thinkpad_acpi: Error probing battery 2
>
> Lenovo ThinkPad X131e also needs this battery quirk.
>
> Reported-by: Fan Yang <804284660(a)qq.com>
> Tested-by: Fan Yang <804284660(a)qq.com>
> Co-developed-by: Xi Ruoyao <xry111(a)xry111.site>
> Signed-off-by: Xi Ruoyao <xry111(a)xry111.site>
> Signed-off-by: Mingcong Bai <jeffbai(a)aosc.io>
> ---
> drivers/platform/x86/thinkpad_acpi.c | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
> index 1fcb0f99695a7..64765c6939a50 100644
> --- a/drivers/platform/x86/thinkpad_acpi.c
> +++ b/drivers/platform/x86/thinkpad_acpi.c
> @@ -9960,6 +9960,7 @@ static const struct tpacpi_quirk battery_quirk_table[] __initconst = {
> * Individual addressing is broken on models that expose the
> * primary battery as BAT1.
> */
> + TPACPI_Q_LNV('G', '8', true), /* ThinkPad X131e */
> TPACPI_Q_LNV('8', 'F', true), /* Thinkpad X120e */
> TPACPI_Q_LNV('J', '7', true), /* B5400 */
> TPACPI_Q_LNV('J', 'I', true), /* Thinkpad 11e */
I forgot to include a Cc to stable. This issue dates back to as far as
5.19 (the oldest version available in our distro repository).
Cc: stable(a)vger.kernel.org
Best Regards,
Mingcong Bai
This series primarily adds check at relevant places in venus driver
where there are possible OOB accesses due to unexpected payload from
venus firmware. The patches describes the specific OOB possibility.
Please review and share your feedback.
Validated on sc7180(v4), rb5(v6) and db410c(v1).
Changes in v4:
- fix an uninitialize variable(media ci)
- Link to v3: https://lore.kernel.org/r/20250128-venus_oob_2-v3-0-0144ecee68d8@quicinc.com
Changes in v3:
- update the packet parsing logic in hfi_parser. The utility parsing api
now returns the size of data parsed, accordingly the parser adjust the
remaining bytes, taking care of OOB scenario as well (Bryan)
- Link to v2:
https://lore.kernel.org/r/20241128-venus_oob_2-v2-0-483ae0a464b8@quicinc.com
Changes in v2:
- init_codec to always update with latest payload from firmware
(Dmitry/Bryan)
- Rewrite the logic of packet parsing to consider payload size for
different packet type (Bryan)
- Consider reading sfr data till available space (Dmitry)
- Add reviewed-by tags
- Link to v1:
https://lore.kernel.org/all/20241105-venus_oob-v1-0-8d4feedfe2bb@quicinc.co…
Signed-off-by: Vikash Garodia <quic_vgarodia(a)quicinc.com>
---
Vikash Garodia (4):
media: venus: hfi_parser: add check to avoid out of bound access
media: venus: hfi_parser: refactor hfi packet parsing logic
media: venus: hfi: add check to handle incorrect queue size
media: venus: hfi: add a check to handle OOB in sfr region
drivers/media/platform/qcom/venus/hfi_parser.c | 96 +++++++++++++++++++-------
drivers/media/platform/qcom/venus/hfi_venus.c | 15 +++-
2 files changed, 83 insertions(+), 28 deletions(-)
---
base-commit: c7ccf3683ac9746b263b0502255f5ce47f64fe0a
change-id: 20241115-venus_oob_2-21708239176a
Best regards,
--
Vikash Garodia <quic_vgarodia(a)quicinc.com>
Hi,
I was looking at your website.
I found some errors on your website.
We can place your website on the first page of Google. yahoo etc.
Can I send you a quote and errors? if interested.
I am waiting for your reply
Thank you.
From: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
The function tracer should record the preemption level at the point when
the function is invoked. If the tracing subsystem decrement the
preemption counter it needs to correct this before feeding the data into
the trace buffer. This was broken in the commit cited below while
shifting the preempt-disabled section.
Use tracing_gen_ctx_dec() which properly subtracts one from the
preemption counter on a preemptible kernel.
Cc: stable(a)vger.kernel.org
Cc: Wander Lairson Costa <wander(a)redhat.com>
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Link: https://lore.kernel.org/20250220140749.pfw8qoNZ@linutronix.de
Fixes: ce5e48036c9e7 ("ftrace: disable preemption when recursion locked")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Tested-by: Wander Lairson Costa <wander(a)redhat.com>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/trace_functions.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index d358c9935164..df56f9b76010 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -216,7 +216,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
parent_ip = function_get_true_parent_ip(parent_ip, fregs);
- trace_ctx = tracing_gen_ctx();
+ trace_ctx = tracing_gen_ctx_dec();
data = this_cpu_ptr(tr->array_buffer.data);
if (!atomic_read(&data->disabled))
@@ -321,7 +321,6 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip,
struct trace_array *tr = op->private;
struct trace_array_cpu *data;
unsigned int trace_ctx;
- unsigned long flags;
int bit;
if (unlikely(!tr->function_enabled))
@@ -347,8 +346,7 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip,
if (is_repeat_check(tr, last_info, ip, parent_ip))
goto out;
- local_save_flags(flags);
- trace_ctx = tracing_gen_ctx_flags(flags);
+ trace_ctx = tracing_gen_ctx_dec();
process_repeats(tr, ip, parent_ip, last_info, trace_ctx);
trace_function(tr, ip, parent_ip, trace_ctx);
--
2.47.2
From: Steven Rostedt <rostedt(a)goodmis.org>
When adding a new fprobe, it will update the function hash to the
functions the fprobe is attached to and register with function graph to
have it call the registered functions. The fprobe_graph_active variable
keeps track of the number of fprobes that are using function graph.
If two fprobes attach to the same function, it increments the
fprobe_graph_active for each of them. But when they are removed, the first
fprobe to be removed will see that the function it is attached to is also
used by another fprobe and it will not remove that function from
function_graph. The logic will skip decrementing the fprobe_graph_active
variable.
This causes the fprobe_graph_active variable to not go to zero when all
fprobes are removed, and in doing so it does not unregister from
function graph. As the fgraph ops hash will now be empty, and an empty
filter hash means all functions are enabled, this triggers function graph
to add a callback to the fprobe infrastructure for every function!
# echo "f:myevent1 kernel_clone" >> /sys/kernel/tracing/dynamic_events
# echo "f:myevent2 kernel_clone%return" >> /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
kernel_clone (1) tramp: 0xffffffffc0024000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
# > /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
trace_initcall_start_cb (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
run_init_process (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
try_to_run_init_process (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
x86_pmu_show_pmu_cap (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
cleanup_rapl_pmus (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
uncore_free_pcibus_map (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
uncore_types_exit (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
uncore_pci_exit.part.0 (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
kvm_shutdown (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
vmx_dump_msrs (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
[..]
# cat /sys/kernel/tracing/enabled_functions | wc -l
54702
If a fprobe is being removed and all its functions are also traced by
other fprobes, still decrement the fprobe_graph_active counter.
Cc: stable(a)vger.kernel.org
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Sven Schnelle <svens(a)linux.ibm.com>
Cc: Vasily Gorbik <gor(a)linux.ibm.com>
Cc: Alexander Gordeev <agordeev(a)linux.ibm.com>
Link: https://lore.kernel.org/20250220202055.565129766@goodmis.org
Fixes: 4346ba1604093 ("fprobe: Rewrite fprobe on function-graph tracer")
Closes: https://lore.kernel.org/all/20250217114918.10397-A-hca@linux.ibm.com/
Reported-by: Heiko Carstens <hca(a)linux.ibm.com>
Tested-by: Heiko Carstens <hca(a)linux.ibm.com>
Acked-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/fprobe.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 62e8f7d56602..33082c4e8154 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -407,7 +407,8 @@ static void fprobe_graph_remove_ips(unsigned long *addrs, int num)
if (!fprobe_graph_active)
unregister_ftrace_graph(&fprobe_graph_ops);
- ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0);
+ if (num)
+ ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0);
}
static int symbols_cmp(const void *a, const void *b)
@@ -677,8 +678,7 @@ int unregister_fprobe(struct fprobe *fp)
}
del_fprobe_hash(fp);
- if (count)
- fprobe_graph_remove_ips(addrs, count);
+ fprobe_graph_remove_ips(addrs, count);
kfree_rcu(hlist_array, rcu);
fp->hlist_array = NULL;
--
2.47.2
From: Steven Rostedt <rostedt(a)goodmis.org>
When the last fprobe is removed, it calls unregister_ftrace_graph() to
remove the graph_ops from function graph. The issue is when it does so, it
calls return before removing the function from its graph ops via
ftrace_set_filter_ips(). This leaves the last function lingering in the
fprobe's fgraph ops and if a probe is added it also enables that last
function (even though the callback will just drop it, it does add unneeded
overhead to make that call).
# echo "f:myevent1 kernel_clone" >> /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
kernel_clone (1) tramp: 0xffffffffc02f3000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
# echo "f:myevent2 schedule_timeout" >> /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
kernel_clone (1) tramp: 0xffffffffc02f3000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
schedule_timeout (1) tramp: 0xffffffffc02f3000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
# > /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
# echo "f:myevent3 kmem_cache_free" >> /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
kmem_cache_free (1) tramp: 0xffffffffc0219000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
schedule_timeout (1) tramp: 0xffffffffc0219000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
The above enabled a fprobe on kernel_clone, and then on schedule_timeout.
The content of the enabled_functions shows the functions that have a
callback attached to them. The fprobe attached to those functions
properly. Then the fprobes were cleared, and enabled_functions was empty
after that. But after adding a fprobe on kmem_cache_free, the
enabled_functions shows that the schedule_timeout was attached again. This
is because it was still left in the fprobe ops that is used to tell
function graph what functions it wants callbacks from.
Cc: stable(a)vger.kernel.org
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Sven Schnelle <svens(a)linux.ibm.com>
Cc: Vasily Gorbik <gor(a)linux.ibm.com>
Cc: Alexander Gordeev <agordeev(a)linux.ibm.com>
Link: https://lore.kernel.org/20250220202055.393254452@goodmis.org
Fixes: 4346ba1604093 ("fprobe: Rewrite fprobe on function-graph tracer")
Tested-by: Heiko Carstens <hca(a)linux.ibm.com>
Acked-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/fprobe.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 2560b312ad57..62e8f7d56602 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -403,11 +403,9 @@ static void fprobe_graph_remove_ips(unsigned long *addrs, int num)
lockdep_assert_held(&fprobe_mutex);
fprobe_graph_active--;
- if (!fprobe_graph_active) {
- /* Q: should we unregister it ? */
+ /* Q: should we unregister it ? */
+ if (!fprobe_graph_active)
unregister_ftrace_graph(&fprobe_graph_ops);
- return;
- }
ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0);
}
--
2.47.2
From: Steven Rostedt <rostedt(a)goodmis.org>
Check if a function is already in the manager ops of a subops. A manager
ops contains multiple subops, and if two or more subops are tracing the
same function, the manager ops only needs a single entry in its hash.
Cc: stable(a)vger.kernel.org
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Sven Schnelle <svens(a)linux.ibm.com>
Cc: Vasily Gorbik <gor(a)linux.ibm.com>
Cc: Alexander Gordeev <agordeev(a)linux.ibm.com>
Link: https://lore.kernel.org/20250220202055.226762894@goodmis.org
Fixes: 4f554e955614f ("ftrace: Add ftrace_set_filter_ips function")
Tested-by: Heiko Carstens <hca(a)linux.ibm.com>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/ftrace.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index bec54dc27204..6b0c25761ccb 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5718,6 +5718,9 @@ __ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove)
return -ENOENT;
free_hash_entry(hash, entry);
return 0;
+ } else if (__ftrace_lookup_ip(hash, ip) != NULL) {
+ /* Already exists */
+ return 0;
}
entry = add_hash_entry(hash, ip);
--
2.47.2
From: Steven Rostedt <rostedt(a)goodmis.org>
Function graph uses a subops and manager ops mechanism to attach to
ftrace. The manager ops connects to ftrace and the functions it connects
to is defined by a list of subops that it manages.
The function hash that defines what the above ops attaches to limits the
functions to attach if the hash has any content. If the hash is empty, it
means to trace all functions.
The creation of the manager ops hash is done by iterating over all the
subops hashes. If any of the subops hashes is empty, it means that the
manager ops hash must trace all functions as well.
The issue is in the creation of the manager ops. When a second subops is
attached, a new hash is created by starting it as NULL and adding the
subops one at a time. But the NULL ops is mistaken as an empty hash, and
once an empty hash is found, it stops the loop of subops and just enables
all functions.
# echo "f:myevent1 kernel_clone" >> /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
kernel_clone (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
# echo "f:myevent2 schedule_timeout" >> /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
trace_initcall_start_cb (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
run_init_process (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
try_to_run_init_process (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
x86_pmu_show_pmu_cap (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
cleanup_rapl_pmus (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
uncore_free_pcibus_map (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
uncore_types_exit (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
uncore_pci_exit.part.0 (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
kvm_shutdown (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
vmx_dump_msrs (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
vmx_cleanup_l1d_flush (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
[..]
Fix this by initializing the new hash to NULL and if the hash is NULL do
not treat it as an empty hash but instead allocate by copying the content
of the first sub ops. Then on subsequent iterations, the new hash will not
be NULL, but the content of the previous subops. If that first subops
attached to all functions, then new hash may assume that the manager ops
also needs to attach to all functions.
Cc: stable(a)vger.kernel.org
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Heiko Carstens <hca(a)linux.ibm.com>
Cc: Sven Schnelle <svens(a)linux.ibm.com>
Cc: Vasily Gorbik <gor(a)linux.ibm.com>
Cc: Alexander Gordeev <agordeev(a)linux.ibm.com>
Link: https://lore.kernel.org/20250220202055.060300046@goodmis.org
Fixes: 5fccc7552ccbc ("ftrace: Add subops logic to allow one ops to manage many")
Reviewed-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/ftrace.c | 33 ++++++++++++++++++++++-----------
1 file changed, 22 insertions(+), 11 deletions(-)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 728ecda6e8d4..bec54dc27204 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3220,15 +3220,22 @@ static struct ftrace_hash *copy_hash(struct ftrace_hash *src)
* The filter_hash updates uses just the append_hash() function
* and the notrace_hash does not.
*/
-static int append_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash)
+static int append_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash,
+ int size_bits)
{
struct ftrace_func_entry *entry;
int size;
int i;
- /* An empty hash does everything */
- if (ftrace_hash_empty(*hash))
- return 0;
+ if (*hash) {
+ /* An empty hash does everything */
+ if (ftrace_hash_empty(*hash))
+ return 0;
+ } else {
+ *hash = alloc_ftrace_hash(size_bits);
+ if (!*hash)
+ return -ENOMEM;
+ }
/* If new_hash has everything make hash have everything */
if (ftrace_hash_empty(new_hash)) {
@@ -3292,16 +3299,18 @@ static int intersect_hash(struct ftrace_hash **hash, struct ftrace_hash *new_has
/* Return a new hash that has a union of all @ops->filter_hash entries */
static struct ftrace_hash *append_hashes(struct ftrace_ops *ops)
{
- struct ftrace_hash *new_hash;
+ struct ftrace_hash *new_hash = NULL;
struct ftrace_ops *subops;
+ int size_bits;
int ret;
- new_hash = alloc_ftrace_hash(ops->func_hash->filter_hash->size_bits);
- if (!new_hash)
- return NULL;
+ if (ops->func_hash->filter_hash)
+ size_bits = ops->func_hash->filter_hash->size_bits;
+ else
+ size_bits = FTRACE_HASH_DEFAULT_BITS;
list_for_each_entry(subops, &ops->subop_list, list) {
- ret = append_hash(&new_hash, subops->func_hash->filter_hash);
+ ret = append_hash(&new_hash, subops->func_hash->filter_hash, size_bits);
if (ret < 0) {
free_ftrace_hash(new_hash);
return NULL;
@@ -3310,7 +3319,8 @@ static struct ftrace_hash *append_hashes(struct ftrace_ops *ops)
if (ftrace_hash_empty(new_hash))
break;
}
- return new_hash;
+ /* Can't return NULL as that means this failed */
+ return new_hash ? : EMPTY_HASH;
}
/* Make @ops trace evenything except what all its subops do not trace */
@@ -3505,7 +3515,8 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int
filter_hash = alloc_and_copy_ftrace_hash(size_bits, ops->func_hash->filter_hash);
if (!filter_hash)
return -ENOMEM;
- ret = append_hash(&filter_hash, subops->func_hash->filter_hash);
+ ret = append_hash(&filter_hash, subops->func_hash->filter_hash,
+ size_bits);
if (ret < 0) {
free_ftrace_hash(filter_hash);
return ret;
--
2.47.2
PROT_MTE (memory tagging extensions) is not supported on all user mmap()
types for various reasons (memory attributes, backing storage, CoW
handling). The arm64 arch_validate_flags() function checks whether the
VM_MTE_ALLOWED flag has been set for a vma during mmap(), usually by
arch_calc_vm_flag_bits().
Linux prior to 6.13 does not support PROT_MTE hugetlb mappings. This was
added by commit 25c17c4b55de ("hugetlb: arm64: add mte support").
However, earlier kernels inadvertently set VM_MTE_ALLOWED on
(MAP_ANONYMOUS | MAP_HUGETLB) mappings by only checking for
MAP_ANONYMOUS.
Explicitly check MAP_HUGETLB in arch_calc_vm_flag_bits() and avoid
setting VM_MTE_ALLOWED for such mappings.
Fixes: 9f3419315f3c ("arm64: mte: Add PROT_MTE support to mmap() and mprotect()")
Cc: <stable(a)vger.kernel.org> # 5.10.x-6.12.x
Reported-by: Naresh Kamboju <naresh.kamboju(a)linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas(a)arm.com>
---
Hi Greg,
This patch applies cleanly on top of the stable-rc/linux-6.12.y to
5.10.y LTS, so I'm only sending it once. It's not for 6.13 onwards since
those kernels support hugetlbfs with MTE.
Thanks,
Catalin
arch/arm64/include/asm/mman.h | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h
index 798d965760d4..5a280ac7570c 100644
--- a/arch/arm64/include/asm/mman.h
+++ b/arch/arm64/include/asm/mman.h
@@ -41,9 +41,12 @@ static inline unsigned long arch_calc_vm_flag_bits(struct file *file,
* backed by tags-capable memory. The vm_flags may be overridden by a
* filesystem supporting MTE (RAM-based).
*/
- if (system_supports_mte() &&
- ((flags & MAP_ANONYMOUS) || shmem_file(file)))
- return VM_MTE_ALLOWED;
+ if (system_supports_mte()) {
+ if ((flags & MAP_ANONYMOUS) && !(flags & MAP_HUGETLB))
+ return VM_MTE_ALLOWED;
+ if (shmem_file(file))
+ return VM_MTE_ALLOWED;
+ }
return 0;
}
In r420_cp_errata_init(), the RESYNC information is stored even
when the Scratch register is not correctly allocated.
Change the return type of r420_cp_errata_init() from void to int
to propagate errors to the caller. Add error checking after
radeon_scratch_get() to ensure RESYNC information is stored
to an allocated address. Log an error message and return the
error code immediately when radeon_scratch_get() fails.
Additionally, handle the return value of r420_cp_errata_init() in
r420_startup() to log an appropriate error message and propagate
the error if initialization fails.
Fixes: 62cdc0c20663 ("drm/radeon/kms: Workaround RV410/R420 CP errata (V3)")
Cc: stable(a)vger.kernel.org # 2.6.33+
Signed-off-by: Wentao Liang <vulab(a)iscas.ac.cn>
---
drivers/gpu/drm/radeon/r420.c | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c
index 9a31cdec6415..67c55153cba8 100644
--- a/drivers/gpu/drm/radeon/r420.c
+++ b/drivers/gpu/drm/radeon/r420.c
@@ -204,7 +204,7 @@ static void r420_clock_resume(struct radeon_device *rdev)
WREG32_PLL(R_00000D_SCLK_CNTL, sclk_cntl);
}
-static void r420_cp_errata_init(struct radeon_device *rdev)
+static int r420_cp_errata_init(struct radeon_device *rdev)
{
int r;
struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
@@ -215,7 +215,11 @@ static void r420_cp_errata_init(struct radeon_device *rdev)
* The proper workaround is to queue a RESYNC at the beginning
* of the CP init, apparently.
*/
- radeon_scratch_get(rdev, &rdev->config.r300.resync_scratch);
+ r = radeon_scratch_get(rdev, &rdev->config.r300.resync_scratch);
+ if (r) {
+ DRM_ERROR("failed to get scratch reg (%d).\n", r);
+ return r;
+ }
r = radeon_ring_lock(rdev, ring, 8);
WARN_ON(r);
radeon_ring_write(ring, PACKET0(R300_CP_RESYNC_ADDR, 1));
@@ -290,8 +294,11 @@ static int r420_startup(struct radeon_device *rdev)
dev_err(rdev->dev, "failed initializing CP (%d).\n", r);
return r;
}
- r420_cp_errata_init(rdev);
-
+ r = r420_cp_errata_init(rdev);
+ if (r) {
+ dev_err(rdev->dev, "failed initializing CP errata workaround (%d).\n", r);
+ return r;
+ }
r = radeon_ib_pool_init(rdev);
if (r) {
dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
--
2.42.0.windows.2
On error restore anything still on the pin_list back to the invalidation
list on error. For the actual pin, so long as the vma is tracked on
either list it should get picked up on the next pin, however it looks
possible for the vma to get nuked but still be present on this per vm
pin_list leading to corruption. An alternative might be then to instead
just remove the link when destroying the vma.
Fixes: ed2bdf3b264d ("drm/xe/vm: Subclass userptr vmas")
Suggested-by: Matthew Brost <matthew.brost(a)intel.com>
Signed-off-by: Matthew Auld <matthew.auld(a)intel.com>
Cc: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org> # v6.8+
---
drivers/gpu/drm/xe/xe_vm.c | 26 +++++++++++++++++++-------
1 file changed, 19 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index d664f2e418b2..668b0bde7822 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -670,12 +670,12 @@ int xe_vm_userptr_pin(struct xe_vm *vm)
list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
userptr.invalidate_link) {
list_del_init(&uvma->userptr.invalidate_link);
- list_move_tail(&uvma->userptr.repin_link,
- &vm->userptr.repin_list);
+ list_add_tail(&uvma->userptr.repin_link,
+ &vm->userptr.repin_list);
}
spin_unlock(&vm->userptr.invalidated_lock);
- /* Pin and move to temporary list */
+ /* Pin and move to bind list */
list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
userptr.repin_link) {
err = xe_vma_userptr_pin_pages(uvma);
@@ -691,10 +691,10 @@ int xe_vm_userptr_pin(struct xe_vm *vm)
err = xe_vm_invalidate_vma(&uvma->vma);
xe_vm_unlock(vm);
if (err)
- return err;
+ break;
} else {
- if (err < 0)
- return err;
+ if (err)
+ break;
list_del_init(&uvma->userptr.repin_link);
list_move_tail(&uvma->vma.combined_links.rebind,
@@ -702,7 +702,19 @@ int xe_vm_userptr_pin(struct xe_vm *vm)
}
}
- return 0;
+ if (err) {
+ down_write(&vm->userptr.notifier_lock);
+ spin_lock(&vm->userptr.invalidated_lock);
+ list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
+ userptr.repin_link) {
+ list_del_init(&uvma->userptr.repin_link);
+ list_move_tail(&uvma->userptr.invalidate_link,
+ &vm->userptr.invalidated);
+ }
+ spin_unlock(&vm->userptr.invalidated_lock);
+ up_write(&vm->userptr.notifier_lock);
+ }
+ return err;
}
/**
--
2.48.1
This is based on top of
https://git.kernel.org/pub/scm/linux/kernel/git/mmind/linux-rockchip.git/lo…
6ee0b9ad3995 ("arm64: dts: rockchip: Add rng node to RK3588") as it
depends on the (merged) series from
https://lore.kernel.org/all/20250211-pre-ict-jaguar-v6-0-4484b0f88cfc@cherr…
Patches for Haikou Video Demo adapter for PX30 Ringneck and RK3399 Puma
(patches 4 and 5) also depend on the following patch series:
https://lore.kernel.org/linux-devicetree/20250220-pca976x-reset-driver-v1-0…
This fixes incorrect pinmux on UART0 and UART5 for PX30 Ringneck on
Haikou.
This adds support for the HAIKOU-LVDS-9904379 adapter for PX30 Ringneck
fitted on a Haikou carrierboard.
Additionally, this adds support for Haikou Video Demo adapter on PX30
Ringneck and RK3399 Puma fitted on a Haikou carrierboard. Notably
missing from the overlay is the OV5675 camera module which expects
19.2MHz which we cannot exactly feed right now. Modifications to the
OV5675 drivers will be made so it's more flexible and then support for
the camera module will be added. This adapter has a 720x1280 DSI display
with a GT911 touchscreen, a GPIO-controllable LED and an I2C GPIO
expander. Support for this adapter on RK3588 Tiger is being added in a
separate patch series[1].
Note that the DSI panel currently is glitchy on both PX30 Ringneck and
RK3399 Puma but this is being tackled in another series[2]. Since this
will not be fixed through DT properties for the panel, adding the DT
nodes for the DSI panel even if not perfect right now seems acceptable
to me.
[1] https://lore.kernel.org/linux-rockchip/20241127143719.660658-1-heiko@sntech…
[2] https://lore.kernel.org/r/20240626084722.832763-1-heiko@sntech.de
Signed-off-by: Quentin Schulz <quentin.schulz(a)cherry.de>
---
Quentin Schulz (5):
arm64: dts: rockchip: fix pinmux of UART0 for PX30 Ringneck on Haikou
arm64: dts: rockchip: fix pinmux of UART5 for PX30 Ringneck on Haikou
arm64: dts: rockchip: add support for HAIKOU-LVDS-9904379 adapter for PX30 Ringneck
arm64: dts: rockchip: add overlay for PX30 Ringneck Haikou Video Demo adapter
arm64: dts: rockchip: add overlay for RK3399 Puma Haikou Video Demo adapter
arch/arm64/boot/dts/rockchip/Makefile | 15 ++
.../px30-ringneck-haikou-lvds-9904379.dtso | 130 ++++++++++++++
.../rockchip/px30-ringneck-haikou-video-demo.dtso | 190 +++++++++++++++++++++
.../boot/dts/rockchip/px30-ringneck-haikou.dts | 10 +-
.../rockchip/rk3399-puma-haikou-video-demo.dtso | 166 ++++++++++++++++++
5 files changed, 510 insertions(+), 1 deletion(-)
---
base-commit: 6ee0b9ad3995ee5fa229035c69013b7dd0d3634b
change-id: 20250128-ringneck-dtbos-98064839355e
prerequisite-change-id: 20250219-pca976x-reset-driver-c9aa95869426:v1
prerequisite-patch-id: 24af74693654b4a456aca0a1399ec8509e141c01
prerequisite-patch-id: df17910ec117317f2f456f679a77ed60e9168fa3
Best regards,
--
Quentin Schulz <quentin.schulz(a)cherry.de>
The use-after-free bug appears when:
- A platform device is created from OF, by of_device_add();
- The same device's name is changed afterwards using dev_set_name(),
by its probe for example.
Out of the 37 drivers that deal with platform devices and do a
dev_set_name() call, only one might be affected. That driver is
loongson-i2s-plat [0]. All other dev_set_name() calls are on children
devices created on the spot. The issue was found on downstream kernels
and we don't have what it takes to test loongson-i2s-plat.
Note: loongson-i2s-plat maintainers are CCed.
⟩ # Finding potential trouble-makers:
⟩ git grep -l 'struct platform_device' | xargs grep -l dev_set_name
The solution proposed is to add a flag to platform_device that tells if
it is responsible for freeing its name. We can then duplicate the
device name inside of_device_add() instead of copying the pointer.
What is done elsewhere?
- Platform bus code does a copy of the argument name that is stored
alongside the struct platform_device; see platform_device_alloc()[1].
- Other busses duplicate the device name; either through a dynamic
allocation [2] or through an array embedded inside devices [3].
- Some busses don't have a separate name; when they want a name they
take it from the device [4].
[0]: https://elixir.bootlin.com/linux/v6.13.2/source/sound/soc/loongson/loongson…
[1]: https://elixir.bootlin.com/linux/v6.13.2/source/drivers/base/platform.c#L581
[2]: https://elixir.bootlin.com/linux/v6.13.2/source/drivers/gpu/drm/drm_drv.c#L…
[3]: https://elixir.bootlin.com/linux/v6.13.2/source/include/linux/i2c.h#L343
[4]: https://elixir.bootlin.com/linux/v6.13.2/source/include/linux/pci.h#L2150
This can be reproduced using Buildroot's qemu_aarch64_virt_defconfig
with CONFIG_KASAN=y and a dev_set_name() inside the probe of:
drivers/pci/controller/pci-host-common.c
The below splat appears at boot. It happens whenever something tries to
access pdev->name; one big consumer of this field is platform_match()
that fallbacks to name matching.
==================================================================
BUG: KASAN: slab-use-after-free in strcmp+0x2c/0x78
Read of size 1 at addr ffffff80c0300160 by task swapper/0/1
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.6.32 #1
Hardware name: linux,dummy-virt (DT)
Call trace:
dump_backtrace+0x90/0xe8
show_stack+0x18/0x24
dump_stack_lvl+0x48/0x60
print_report+0xf8/0x5d8
kasan_report+0x90/0xcc
__asan_load1+0x60/0x6c
strcmp+0x2c/0x78
platform_match+0xd0/0x140
__driver_attach+0x44/0x240
bus_for_each_dev+0xe4/0x160
driver_attach+0x34/0x44
bus_add_driver+0x134/0x270
driver_register+0xa4/0x1e4
__platform_driver_register+0x44/0x54
ged_driver_init+0x1c/0x28
do_one_initcall+0xdc/0x260
kernel_init_freeable+0x314/0x448
kernel_init+0x2c/0x1e0
ret_from_fork+0x10/0x20
Allocated by task 1:
kasan_save_stack+0x3c/0x64
kasan_set_track+0x2c/0x40
kasan_save_alloc_info+0x24/0x34
__kasan_kmalloc+0xb8/0xbc
__kmalloc_node_track_caller+0x64/0xa4
kvasprintf+0xcc/0x16c
kvasprintf_const+0xe8/0x180
kobject_set_name_vargs+0x54/0xd4
dev_set_name+0xa8/0xe4
of_device_make_bus_id+0x298/0x2b0
of_device_alloc+0x1ec/0x204
of_platform_device_create_pdata+0x60/0x168
of_platform_bus_create+0x20c/0x4a0
of_platform_populate+0x50/0x10c
of_platform_default_populate_init+0xe0/0x100
do_one_initcall+0xdc/0x260
kernel_init_freeable+0x314/0x448
kernel_init+0x2c/0x1e0
ret_from_fork+0x10/0x20
Freed by task 1:
kasan_save_stack+0x3c/0x64
kasan_set_track+0x2c/0x40
kasan_save_free_info+0x38/0x60
__kasan_slab_free+0xe4/0x150
__kmem_cache_free+0x134/0x26c
kfree+0x54/0x6c
kfree_const+0x34/0x40
kobject_set_name_vargs+0xa8/0xd4
dev_set_name+0xa8/0xe4
pci_host_common_probe+0x9c/0x294
platform_probe+0x90/0x100
really_probe+0x100/0x3cc
__driver_probe_device+0xb8/0x18c
driver_probe_device+0x108/0x1d8
__driver_attach+0xc8/0x240
bus_for_each_dev+0xe4/0x160
driver_attach+0x34/0x44
bus_add_driver+0x134/0x270
driver_register+0xa4/0x1e4
__platform_driver_register+0x44/0x54
gen_pci_driver_init+0x1c/0x28
do_one_initcall+0xdc/0x260
kernel_init_freeable+0x314/0x448
kernel_init+0x2c/0x1e0
ret_from_fork+0x10/0x20
The buggy address belongs to the object at ffffff80c0300160
which belongs to the cache kmalloc-16 of size 16
The buggy address is located 0 bytes inside of
freed 16-byte region [ffffff80c0300160, ffffff80c0300170)
The buggy address belongs to the physical page:
page:0000000099fe29a0 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x100300
flags: 0x8000000000000800(slab|zone=2)
page_type: 0xffffffff()
raw: 8000000000000800 ffffff80c00013c0 dead000000000122 0000000000000000
raw: 0000000000000000 0000000080800080 00000001ffffffff 0000000000000000
page dumped because: kasan: bad access detected
Memory state around the buggy address:
ffffff80c0300000: fa fb fc fc fa fb fc fc 00 07 fc fc 00 07 fc fc
ffffff80c0300080: 00 07 fc fc 00 02 fc fc 00 02 fc fc 00 02 fc fc
>ffffff80c0300100: 00 06 fc fc 00 06 fc fc 00 06 fc fc fa fb fc fc
^
ffffff80c0300180: 00 00 fc fc 00 00 fc fc 00 06 fc fc 00 06 fc fc
ffffff80c0300200: 00 06 fc fc 00 06 fc fc 00 06 fc fc 00 06 fc fc
==================================================================
Signed-off-by: Théo Lebrun <theo.lebrun(a)bootlin.com>
---
Théo Lebrun (2):
driver core: platform: turn pdev->id_auto into pdev->flags
driver core: platform: avoid use-after-free on pdev->name
drivers/base/platform.c | 8 +++++---
drivers/of/platform.c | 12 +++++++++++-
include/linux/platform_device.h | 4 +++-
3 files changed, 19 insertions(+), 5 deletions(-)
---
base-commit: 0ad2507d5d93f39619fc42372c347d6006b64319
change-id: 20250217-pdev-uaf-1a779a98d81b
Best regards,
--
Théo Lebrun <theo.lebrun(a)bootlin.com>
Commit <d74169ceb0d2> ("iommu/vt-d: Allocate DMAR fault interrupts
locally") moved the call to enable_drhd_fault_handling() to a code
path that does not hold any lock while traversing the drhd list. Fix
it by ensuring the dmar_global_lock lock is held when traversing the
drhd list.
Without this fix, the following warning is triggered:
=============================
WARNING: suspicious RCU usage
6.14.0-rc3 #55 Not tainted
-----------------------------
drivers/iommu/intel/dmar.c:2046 RCU-list traversed in non-reader section!!
other info that might help us debug this:
rcu_scheduler_active = 1, debug_locks = 1
2 locks held by cpuhp/1/23:
#0: ffffffff84a67c50 (cpu_hotplug_lock){++++}-{0:0}, at: cpuhp_thread_fun+0x87/0x2c0
#1: ffffffff84a6a380 (cpuhp_state-up){+.+.}-{0:0}, at: cpuhp_thread_fun+0x87/0x2c0
stack backtrace:
CPU: 1 UID: 0 PID: 23 Comm: cpuhp/1 Not tainted 6.14.0-rc3 #55
Call Trace:
<TASK>
dump_stack_lvl+0xb7/0xd0
lockdep_rcu_suspicious+0x159/0x1f0
? __pfx_enable_drhd_fault_handling+0x10/0x10
enable_drhd_fault_handling+0x151/0x180
cpuhp_invoke_callback+0x1df/0x990
cpuhp_thread_fun+0x1ea/0x2c0
smpboot_thread_fn+0x1f5/0x2e0
? __pfx_smpboot_thread_fn+0x10/0x10
kthread+0x12a/0x2d0
? __pfx_kthread+0x10/0x10
ret_from_fork+0x4a/0x60
? __pfx_kthread+0x10/0x10
ret_from_fork_asm+0x1a/0x30
</TASK>
Simply holding the lock in enable_drhd_fault_handling() will trigger a
lock order splat. Avoid holding the dmar_global_lock when calling
iommu_device_register(), which starts the device probe process.
Fixes: d74169ceb0d2 ("iommu/vt-d: Allocate DMAR fault interrupts locally")
Reported-by: Ido Schimmel <idosch(a)idosch.org>
Closes: https://lore.kernel.org/linux-iommu/Zx9OwdLIc_VoQ0-a@shredder.mtl.com/
Cc: stable(a)vger.kernel.org
Signed-off-by: Lu Baolu <baolu.lu(a)linux.intel.com>
---
drivers/iommu/intel/dmar.c | 1 +
drivers/iommu/intel/iommu.c | 7 +++++++
2 files changed, 8 insertions(+)
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 9f424acf474e..e540092d664d 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -2043,6 +2043,7 @@ int enable_drhd_fault_handling(unsigned int cpu)
/*
* Enable fault control interrupt.
*/
+ guard(rwsem_read)(&dmar_global_lock);
for_each_iommu(iommu, drhd) {
u32 fault_status;
int ret;
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index cc46098f875b..9a1e61b429ca 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -3146,7 +3146,14 @@ int __init intel_iommu_init(void)
iommu_device_sysfs_add(&iommu->iommu, NULL,
intel_iommu_groups,
"%s", iommu->name);
+ /*
+ * The iommu device probe is protected by the iommu_probe_device_lock.
+ * Release the dmar_global_lock before entering the device probe path
+ * to avoid unnecessary lock order splat.
+ */
+ up_read(&dmar_global_lock);
iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
+ down_read(&dmar_global_lock);
iommu_pmu_register(iommu);
}
--
2.43.0
From: Steven Rostedt <rostedt(a)goodmis.org>
Function graph uses a subops and manager ops mechanism to attach to
ftrace. The manager ops connects to ftrace and the functions it connects
to is defined by a list of subops that it manages.
The function hash that defines what the above ops attaches to limits the
functions to attach if the hash has any content. If the hash is empty, it
means to trace all functions.
The creation of the manager ops hash is done by iterating over all the
subops hashes. If any of the subops hashes is empty, it means that the
manager ops hash must trace all functions as well.
The issue is in the creation of the manager ops. When a second subops is
attached, a new hash is created by starting it as NULL and adding the
subops one at a time. But the NULL ops is mistaken as an empty hash, and
once an empty hash is found, it stops the loop of subops and just enables
all functions.
# echo "f:myevent1 kernel_clone" >> /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
kernel_clone (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
# echo "f:myevent2 schedule_timeout" >> /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
trace_initcall_start_cb (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
run_init_process (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
try_to_run_init_process (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
x86_pmu_show_pmu_cap (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
cleanup_rapl_pmus (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
uncore_free_pcibus_map (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
uncore_types_exit (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
uncore_pci_exit.part.0 (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
kvm_shutdown (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
vmx_dump_msrs (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
vmx_cleanup_l1d_flush (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
[..]
Fix this by initializing the new hash to NULL and if the hash is NULL do
not treat it as an empty hash but instead allocate by copying the content
of the first sub ops. Then on subsequent iterations, the new hash will not
be NULL, but the content of the previous subops. If that first subops
attached to all functions, then new hash may assume that the manager ops
also needs to attach to all functions.
Cc: stable(a)vger.kernel.org
Fixes: 5fccc7552ccbc ("ftrace: Add subops logic to allow one ops to manage many")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
Changes since v2: https://lore.kernel.org/20250219220510.888959028@goodmis.org
- Have append_hashes() return EMPTY_HASH and not NULL if the resulting
new hash is empty.
kernel/trace/ftrace.c | 33 ++++++++++++++++++++++-----------
1 file changed, 22 insertions(+), 11 deletions(-)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 728ecda6e8d4..bec54dc27204 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3220,15 +3220,22 @@ static struct ftrace_hash *copy_hash(struct ftrace_hash *src)
* The filter_hash updates uses just the append_hash() function
* and the notrace_hash does not.
*/
-static int append_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash)
+static int append_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash,
+ int size_bits)
{
struct ftrace_func_entry *entry;
int size;
int i;
- /* An empty hash does everything */
- if (ftrace_hash_empty(*hash))
- return 0;
+ if (*hash) {
+ /* An empty hash does everything */
+ if (ftrace_hash_empty(*hash))
+ return 0;
+ } else {
+ *hash = alloc_ftrace_hash(size_bits);
+ if (!*hash)
+ return -ENOMEM;
+ }
/* If new_hash has everything make hash have everything */
if (ftrace_hash_empty(new_hash)) {
@@ -3292,16 +3299,18 @@ static int intersect_hash(struct ftrace_hash **hash, struct ftrace_hash *new_has
/* Return a new hash that has a union of all @ops->filter_hash entries */
static struct ftrace_hash *append_hashes(struct ftrace_ops *ops)
{
- struct ftrace_hash *new_hash;
+ struct ftrace_hash *new_hash = NULL;
struct ftrace_ops *subops;
+ int size_bits;
int ret;
- new_hash = alloc_ftrace_hash(ops->func_hash->filter_hash->size_bits);
- if (!new_hash)
- return NULL;
+ if (ops->func_hash->filter_hash)
+ size_bits = ops->func_hash->filter_hash->size_bits;
+ else
+ size_bits = FTRACE_HASH_DEFAULT_BITS;
list_for_each_entry(subops, &ops->subop_list, list) {
- ret = append_hash(&new_hash, subops->func_hash->filter_hash);
+ ret = append_hash(&new_hash, subops->func_hash->filter_hash, size_bits);
if (ret < 0) {
free_ftrace_hash(new_hash);
return NULL;
@@ -3310,7 +3319,8 @@ static struct ftrace_hash *append_hashes(struct ftrace_ops *ops)
if (ftrace_hash_empty(new_hash))
break;
}
- return new_hash;
+ /* Can't return NULL as that means this failed */
+ return new_hash ? : EMPTY_HASH;
}
/* Make @ops trace evenything except what all its subops do not trace */
@@ -3505,7 +3515,8 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int
filter_hash = alloc_and_copy_ftrace_hash(size_bits, ops->func_hash->filter_hash);
if (!filter_hash)
return -ENOMEM;
- ret = append_hash(&filter_hash, subops->func_hash->filter_hash);
+ ret = append_hash(&filter_hash, subops->func_hash->filter_hash,
+ size_bits);
if (ret < 0) {
free_ftrace_hash(filter_hash);
return ret;
--
2.47.2
From: Ard Biesheuvel <ardb(a)kernel.org>
When using -fPIE codegen, the compiler will emit const global objects
(which are useless unless statically initialized) into .data.rel.ro
rather than .rodata if the object contains fields that carry absolute
addresses of other code or data objects. This permits the linker to
annotate such regions as requiring read-write access only at load time,
but not at execution time (in user space).
This distinction does not matter for the kernel, but it does imply that
const data will end up in writable memory if the .data.rel.ro sections
are not treated in a special way.
So emit .data.rel.ro into the .rodata segment.
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Ard Biesheuvel <ardb(a)kernel.org>
---
include/asm-generic/vmlinux.lds.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 02a4adb4a999..0d5b186abee8 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -457,7 +457,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
. = ALIGN((align)); \
.rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \
__start_rodata = .; \
- *(.rodata) *(.rodata.*) \
+ *(.rodata) *(.rodata.*) *(.data.rel.ro*) \
SCHED_DATA \
RO_AFTER_INIT_DATA /* Read only after init */ \
. = ALIGN(8); \
--
2.48.1.601.g30ceb7b040-goog
From: George Moussalem <george.moussalem(a)outlook.com>
While setting the DAC value, the wrong boolean value is evaluated to set
the DSP bias current. So let's correct the conditional statement and use
the right boolean value read from the DTS set in the priv.
Cc: stable(a)vger.kernel.org
Fixes: d1cb613efbd3 ("net: phy: qcom: add support for QCA807x PHY Family")
Signed-off-by: George Moussalem <george.moussalem(a)outlook.com>
Signed-off-by: Christian Marangi <ansuelsmth(a)gmail.com>
---
drivers/net/phy/qcom/qca807x.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/phy/qcom/qca807x.c b/drivers/net/phy/qcom/qca807x.c
index 3279de857b47..2ad8c2586d64 100644
--- a/drivers/net/phy/qcom/qca807x.c
+++ b/drivers/net/phy/qcom/qca807x.c
@@ -774,7 +774,7 @@ static int qca807x_config_init(struct phy_device *phydev)
control_dac &= ~QCA807X_CONTROL_DAC_MASK;
if (!priv->dac_full_amplitude)
control_dac |= QCA807X_CONTROL_DAC_DSP_AMPLITUDE;
- if (!priv->dac_full_amplitude)
+ if (!priv->dac_full_bias_current)
control_dac |= QCA807X_CONTROL_DAC_DSP_BIAS_CURRENT;
if (!priv->dac_disable_bias_current_tweak)
control_dac |= QCA807X_CONTROL_DAC_BIAS_CURRENT_TWEAK;
--
2.47.1
From: Steven Rostedt <rostedt(a)goodmis.org>
When adding a new fprobe, it will update the function hash to the
functions the fprobe is attached to and register with function graph to
have it call the registered functions. The fprobe_graph_active variable
keeps track of the number of fprobes that are using function graph.
If two fprobes attach to the same function, it increments the
fprobe_graph_active for each of them. But when they are removed, the first
fprobe to be removed will see that the function it is attached to is also
used by another fprobe and it will not remove that function from
function_graph. The logic will skip decrementing the fprobe_graph_active
variable.
This causes the fprobe_graph_active variable to not go to zero when all
fprobes are removed, and in doing so it does not unregister from
function graph. As the fgraph ops hash will now be empty, and an empty
filter hash means all functions are enabled, this triggers function graph
to add a callback to the fprobe infrastructure for every function!
# echo "f:myevent1 kernel_clone" >> /sys/kernel/tracing/dynamic_events
# echo "f:myevent2 kernel_clone%return" >> /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
kernel_clone (1) tramp: 0xffffffffc0024000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
# > /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
trace_initcall_start_cb (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
run_init_process (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
try_to_run_init_process (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
x86_pmu_show_pmu_cap (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
cleanup_rapl_pmus (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
uncore_free_pcibus_map (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
uncore_types_exit (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
uncore_pci_exit.part.0 (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
kvm_shutdown (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
vmx_dump_msrs (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
[..]
# cat /sys/kernel/tracing/enabled_functions | wc -l
54702
If a fprobe is being removed and all its functions are also traced by
other fprobes, still decrement the fprobe_graph_active counter.
Cc: stable(a)vger.kernel.org
Fixes: 4346ba1604093 ("fprobe: Rewrite fprobe on function-graph tracer")
Closes: https://lore.kernel.org/all/20250217114918.10397-A-hca@linux.ibm.com/
Reported-by: Heiko Carstens <hca(a)linux.ibm.com>
Tested-by: Heiko Carstens <hca(a)linux.ibm.com>
Acked-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/fprobe.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 62e8f7d56602..33082c4e8154 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -407,7 +407,8 @@ static void fprobe_graph_remove_ips(unsigned long *addrs, int num)
if (!fprobe_graph_active)
unregister_ftrace_graph(&fprobe_graph_ops);
- ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0);
+ if (num)
+ ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0);
}
static int symbols_cmp(const void *a, const void *b)
@@ -677,8 +678,7 @@ int unregister_fprobe(struct fprobe *fp)
}
del_fprobe_hash(fp);
- if (count)
- fprobe_graph_remove_ips(addrs, count);
+ fprobe_graph_remove_ips(addrs, count);
kfree_rcu(hlist_array, rcu);
fp->hlist_array = NULL;
--
2.47.2
From: Steven Rostedt <rostedt(a)goodmis.org>
When the last fprobe is removed, it calls unregister_ftrace_graph() to
remove the graph_ops from function graph. The issue is when it does so, it
calls return before removing the function from its graph ops via
ftrace_set_filter_ips(). This leaves the last function lingering in the
fprobe's fgraph ops and if a probe is added it also enables that last
function (even though the callback will just drop it, it does add unneeded
overhead to make that call).
# echo "f:myevent1 kernel_clone" >> /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
kernel_clone (1) tramp: 0xffffffffc02f3000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
# echo "f:myevent2 schedule_timeout" >> /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
kernel_clone (1) tramp: 0xffffffffc02f3000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
schedule_timeout (1) tramp: 0xffffffffc02f3000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
# > /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
# echo "f:myevent3 kmem_cache_free" >> /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
kmem_cache_free (1) tramp: 0xffffffffc0219000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
schedule_timeout (1) tramp: 0xffffffffc0219000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
The above enabled a fprobe on kernel_clone, and then on schedule_timeout.
The content of the enabled_functions shows the functions that have a
callback attached to them. The fprobe attached to those functions
properly. Then the fprobes were cleared, and enabled_functions was empty
after that. But after adding a fprobe on kmem_cache_free, the
enabled_functions shows that the schedule_timeout was attached again. This
is because it was still left in the fprobe ops that is used to tell
function graph what functions it wants callbacks from.
Cc: stable(a)vger.kernel.org
Fixes: 4346ba1604093 ("fprobe: Rewrite fprobe on function-graph tracer")
Tested-by: Heiko Carstens <hca(a)linux.ibm.com>
Acked-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/fprobe.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 2560b312ad57..62e8f7d56602 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -403,11 +403,9 @@ static void fprobe_graph_remove_ips(unsigned long *addrs, int num)
lockdep_assert_held(&fprobe_mutex);
fprobe_graph_active--;
- if (!fprobe_graph_active) {
- /* Q: should we unregister it ? */
+ /* Q: should we unregister it ? */
+ if (!fprobe_graph_active)
unregister_ftrace_graph(&fprobe_graph_ops);
- return;
- }
ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0);
}
--
2.47.2
From: Steven Rostedt <rostedt(a)goodmis.org>
Check if a function is already in the manager ops of a subops. A manager
ops contains multiple subops, and if two or more subops are tracing the
same function, the manager ops only needs a single entry in its hash.
Cc: stable(a)vger.kernel.org
Fixes: 4f554e955614f ("ftrace: Add ftrace_set_filter_ips function")
Tested-by: Heiko Carstens <hca(a)linux.ibm.com>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/ftrace.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index bec54dc27204..6b0c25761ccb 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5718,6 +5718,9 @@ __ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove)
return -ENOENT;
free_hash_entry(hash, entry);
return 0;
+ } else if (__ftrace_lookup_ip(hash, ip) != NULL) {
+ /* Already exists */
+ return 0;
}
entry = add_hash_entry(hash, ip);
--
2.47.2
From: Steven Rostedt <rostedt(a)goodmis.org>
Function graph uses a subops and manager ops mechanism to attach to
ftrace. The manager ops connects to ftrace and the functions it connects
to is defined by a list of subops that it manages.
The function hash that defines what the above ops attaches to limits the
functions to attach if the hash has any content. If the hash is empty, it
means to trace all functions.
The creation of the manager ops hash is done by iterating over all the
subops hashes. If any of the subops hashes is empty, it means that the
manager ops hash must trace all functions as well.
The issue is in the creation of the manager ops. When a second subops is
attached, a new hash is created by starting it as NULL and adding the
subops one at a time. But the NULL ops is mistaken as an empty hash, and
once an empty hash is found, it stops the loop of subops and just enables
all functions.
# echo "f:myevent1 kernel_clone" >> /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
kernel_clone (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
# echo "f:myevent2 schedule_timeout" >> /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
trace_initcall_start_cb (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
run_init_process (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
try_to_run_init_process (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
x86_pmu_show_pmu_cap (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
cleanup_rapl_pmus (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
uncore_free_pcibus_map (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
uncore_types_exit (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
uncore_pci_exit.part.0 (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
kvm_shutdown (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
vmx_dump_msrs (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
vmx_cleanup_l1d_flush (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
[..]
Fix this by initializing the new hash to NULL and if the hash is NULL do
not treat it as an empty hash but instead allocate by copying the content
of the first sub ops. Then on subsequent iterations, the new hash will not
be NULL, but the content of the previous subops. If that first subops
attached to all functions, then new hash may assume that the manager ops
also needs to attach to all functions.
Cc: stable(a)vger.kernel.org
Fixes: 5fccc7552ccbc ("ftrace: Add subops logic to allow one ops to manage many")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/ftrace.c | 30 ++++++++++++++++++++----------
1 file changed, 20 insertions(+), 10 deletions(-)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 728ecda6e8d4..03b35a05808c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3220,15 +3220,22 @@ static struct ftrace_hash *copy_hash(struct ftrace_hash *src)
* The filter_hash updates uses just the append_hash() function
* and the notrace_hash does not.
*/
-static int append_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash)
+static int append_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash,
+ int size_bits)
{
struct ftrace_func_entry *entry;
int size;
int i;
- /* An empty hash does everything */
- if (ftrace_hash_empty(*hash))
- return 0;
+ if (*hash) {
+ /* An empty hash does everything */
+ if (ftrace_hash_empty(*hash))
+ return 0;
+ } else {
+ *hash = alloc_ftrace_hash(size_bits);
+ if (!*hash)
+ return -ENOMEM;
+ }
/* If new_hash has everything make hash have everything */
if (ftrace_hash_empty(new_hash)) {
@@ -3292,16 +3299,18 @@ static int intersect_hash(struct ftrace_hash **hash, struct ftrace_hash *new_has
/* Return a new hash that has a union of all @ops->filter_hash entries */
static struct ftrace_hash *append_hashes(struct ftrace_ops *ops)
{
- struct ftrace_hash *new_hash;
+ struct ftrace_hash *new_hash = NULL;
struct ftrace_ops *subops;
+ int size_bits;
int ret;
- new_hash = alloc_ftrace_hash(ops->func_hash->filter_hash->size_bits);
- if (!new_hash)
- return NULL;
+ if (ops->func_hash->filter_hash)
+ size_bits = ops->func_hash->filter_hash->size_bits;
+ else
+ size_bits = FTRACE_HASH_DEFAULT_BITS;
list_for_each_entry(subops, &ops->subop_list, list) {
- ret = append_hash(&new_hash, subops->func_hash->filter_hash);
+ ret = append_hash(&new_hash, subops->func_hash->filter_hash, size_bits);
if (ret < 0) {
free_ftrace_hash(new_hash);
return NULL;
@@ -3505,7 +3514,8 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int
filter_hash = alloc_and_copy_ftrace_hash(size_bits, ops->func_hash->filter_hash);
if (!filter_hash)
return -ENOMEM;
- ret = append_hash(&filter_hash, subops->func_hash->filter_hash);
+ ret = append_hash(&filter_hash, subops->func_hash->filter_hash,
+ size_bits);
if (ret < 0) {
free_ftrace_hash(filter_hash);
return ret;
--
2.47.2
This series primarily adds check at relevant places in venus driver
where there are possible OOB accesses due to unexpected payload from
venus firmware. The patches describes the specific OOB possibility.
Please review and share your feedback.
Validated on sc7180(v4), rb5(v6) and db410c(v1).
Changes in v5
- Add few checks as per comments from Hans
- Link to v4: https://lore.kernel.org/r/20250207-venus_oob_2-v4-0-522da0b68b22@quicinc.com
Changes in v4:
- fix an uninitialize variable(media ci)
- Link to v3: https://lore.kernel.org/r/20250128-venus_oob_2-v3-0-0144ecee68d8@quicinc.com
Changes in v3:
- update the packet parsing logic in hfi_parser. The utility parsing api
now returns the size of data parsed, accordingly the parser adjust the
remaining bytes, taking care of OOB scenario as well (Bryan)
- Link to v2:
https://lore.kernel.org/r/20241128-venus_oob_2-v2-0-483ae0a464b8@quicinc.com
Changes in v2:
- init_codec to always update with latest payload from firmware
(Dmitry/Bryan)
- Rewrite the logic of packet parsing to consider payload size for
different packet type (Bryan)
- Consider reading sfr data till available space (Dmitry)
- Add reviewed-by tags
- Link to v1:
https://lore.kernel.org/all/20241105-venus_oob-v1-0-8d4feedfe2bb@quicinc.co…
Signed-off-by: Vikash Garodia <quic_vgarodia(a)quicinc.com>
---
Vikash Garodia (4):
media: venus: hfi_parser: add check to avoid out of bound access
media: venus: hfi_parser: refactor hfi packet parsing logic
media: venus: hfi: add check to handle incorrect queue size
media: venus: hfi: add a check to handle OOB in sfr region
drivers/media/platform/qcom/venus/hfi_parser.c | 100 ++++++++++++++++++-------
drivers/media/platform/qcom/venus/hfi_venus.c | 18 ++++-
2 files changed, 90 insertions(+), 28 deletions(-)
---
base-commit: c7ccf3683ac9746b263b0502255f5ce47f64fe0a
change-id: 20241115-venus_oob_2-21708239176a
Best regards,
--
Vikash Garodia <quic_vgarodia(a)quicinc.com>
Vladimir reports that a race condition to attach a VMID to a stage-2 MMU
sometimes results in a vCPU entering the guest with a VMID of 0:
| CPU1 | CPU2
| |
| | kvm_arch_vcpu_ioctl_run
| | vcpu_load <= load VTTBR_EL2
| | kvm_vmid->id = 0
| |
| kvm_arch_vcpu_ioctl_run |
| vcpu_load <= load VTTBR_EL2 |
| with kvm_vmid->id = 0|
| kvm_arm_vmid_update <= allocates fresh |
| kvm_vmid->id and |
| reload VTTBR_EL2 |
| |
| | kvm_arm_vmid_update <= observes that kvm_vmid->id
| | already allocated,
| | skips reload VTTBR_EL2
Oh yeah, it's as bad as it looks. Remember that VHE loads the stage-2
MMU eagerly but a VMID only gets attached to the MMU later on in the
KVM_RUN loop.
Even in the "best case" where VTTBR_EL2 correctly gets reprogrammed
before entering the EL1&0 regime, there is a period of time where
hardware is configured with VMID 0. That's completely insane. So, rather
than decorating the 'late' binding with another hack, just allocate the
damn thing up front.
Attaching a VMID from vcpu_load() is still rollover safe since
(surprise!) it'll always get called after a vCPU was preempted.
Excuse me while I go find a brown paper bag.
Cc: stable(a)vger.kernel.org
Fixes: 934bf871f011 ("KVM: arm64: Load the stage-2 MMU context in kvm_vcpu_load_vhe()")
Reported-by: Vladimir Murzin <vladimir.murzin(a)arm.com>
Signed-off-by: Oliver Upton <oliver.upton(a)linux.dev>
---
arch/arm64/include/asm/kvm_host.h | 2 +-
arch/arm64/kvm/arm.c | 22 ++++++++++------------
arch/arm64/kvm/vmid.c | 11 +++--------
3 files changed, 14 insertions(+), 21 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 7cfa024de4e3..e6e8b8970caa 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -1271,7 +1271,7 @@ int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu,
extern unsigned int __ro_after_init kvm_arm_vmid_bits;
int __init kvm_arm_vmid_alloc_init(void);
void __init kvm_arm_vmid_alloc_free(void);
-bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid);
+void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid);
void kvm_arm_vmid_clear_active(void);
static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 646e806c6ca6..fafe42755031 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -559,6 +559,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
mmu = vcpu->arch.hw_mmu;
last_ran = this_cpu_ptr(mmu->last_vcpu_ran);
+ /*
+ * Ensure a VMID is allocated for the MMU before programming VTTBR_EL2,
+ * which happens eagerly in VHE.
+ *
+ * Also, the VMID allocator only preserves VMIDs that are active at the
+ * time of rollover, so KVM might need to grab a new VMID for the MMU if
+ * this is called from kvm_sched_in().
+ */
+ kvm_arm_vmid_update(&mmu->vmid);
+
/*
* We guarantee that both TLBs and I-cache are private to each
* vcpu. If detecting that a vcpu from the same VM has
@@ -1138,18 +1148,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
*/
preempt_disable();
- /*
- * The VMID allocator only tracks active VMIDs per
- * physical CPU, and therefore the VMID allocated may not be
- * preserved on VMID roll-over if the task was preempted,
- * making a thread's VMID inactive. So we need to call
- * kvm_arm_vmid_update() in non-premptible context.
- */
- if (kvm_arm_vmid_update(&vcpu->arch.hw_mmu->vmid) &&
- has_vhe())
- __load_stage2(vcpu->arch.hw_mmu,
- vcpu->arch.hw_mmu->arch);
-
kvm_pmu_flush_hwstate(vcpu);
local_irq_disable();
diff --git a/arch/arm64/kvm/vmid.c b/arch/arm64/kvm/vmid.c
index 806223b7022a..7fe8ba1a2851 100644
--- a/arch/arm64/kvm/vmid.c
+++ b/arch/arm64/kvm/vmid.c
@@ -135,11 +135,10 @@ void kvm_arm_vmid_clear_active(void)
atomic64_set(this_cpu_ptr(&active_vmids), VMID_ACTIVE_INVALID);
}
-bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid)
+void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid)
{
unsigned long flags;
u64 vmid, old_active_vmid;
- bool updated = false;
vmid = atomic64_read(&kvm_vmid->id);
@@ -157,21 +156,17 @@ bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid)
if (old_active_vmid != 0 && vmid_gen_match(vmid) &&
0 != atomic64_cmpxchg_relaxed(this_cpu_ptr(&active_vmids),
old_active_vmid, vmid))
- return false;
+ return;
raw_spin_lock_irqsave(&cpu_vmid_lock, flags);
/* Check that our VMID belongs to the current generation. */
vmid = atomic64_read(&kvm_vmid->id);
- if (!vmid_gen_match(vmid)) {
+ if (!vmid_gen_match(vmid))
vmid = new_vmid(kvm_vmid);
- updated = true;
- }
atomic64_set(this_cpu_ptr(&active_vmids), vmid);
raw_spin_unlock_irqrestore(&cpu_vmid_lock, flags);
-
- return updated;
}
/*
base-commit: 2014c95afecee3e76ca4a56956a936e23283f05b
--
2.39.5
The put_device(&epc->dev) call will trigger pci_epc_release() which
frees "epc" so the kfree(epc) on the next line is a double free.
Found by code review.
Cc: stable(a)vger.kernel.org
Fixes: 5e8cb4033807 ("PCI: endpoint: Add EP core layer to enable EP controller and EP functions")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
drivers/pci/endpoint/pci-epc-core.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c
index 46c9a5c3ca14..652350f054cf 100644
--- a/drivers/pci/endpoint/pci-epc-core.c
+++ b/drivers/pci/endpoint/pci-epc-core.c
@@ -818,7 +818,6 @@ __pci_epc_create(struct device *dev, const struct pci_epc_ops *ops,
put_dev:
put_device(&epc->dev);
- kfree(epc);
err_ret:
return ERR_PTR(ret);
--
2.25.1
From: Kevin Brodsky <kevin.brodsky(a)arm.com>
[ Upstream commit 46036188ea1f5266df23a6149dea0df1c77cd1c7 ]
The mm kselftests are currently built with no optimisation (-O0). It's
unclear why, and besides being obviously suboptimal, this also prevents
the pkeys tests from working as intended. Let's build all the tests with
-O2.
[kevin.brodsky(a)arm.com: silence unused-result warnings]
Link: https://lkml.kernel.org/r/20250107170110.2819685-1-kevin.brodsky@arm.com
Link: https://lkml.kernel.org/r/20241209095019.1732120-6-kevin.brodsky@arm.com
Signed-off-by: Kevin Brodsky <kevin.brodsky(a)arm.com>
Cc: Aruna Ramakrishna <aruna.ramakrishna(a)oracle.com>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: Joey Gouly <joey.gouly(a)arm.com>
Cc: Keith Lucas <keith.lucas(a)oracle.com>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
(cherry picked from commit 46036188ea1f5266df23a6149dea0df1c77cd1c7)
[Yifei: This commit also fix the failure of pkey_sighandler_tests_64,
which is also in linux-6.12.y, thus backport this commit. It is already
backported to linux-6.13.y by commit d9eb5a1e76f56]
Signed-off-by: Yifei Liu <yifei.l.liu(a)oracle.com>
---
tools/testing/selftests/mm/Makefile | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index 02e1204971b0..c0138cb19705 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -33,9 +33,16 @@ endif
# LDLIBS.
MAKEFLAGS += --no-builtin-rules
-CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
+CFLAGS = -Wall -O2 -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
LDLIBS = -lrt -lpthread -lm
+# Some distributions (such as Ubuntu) configure GCC so that _FORTIFY_SOURCE is
+# automatically enabled at -O1 or above. This triggers various unused-result
+# warnings where functions such as read() or write() are called and their
+# return value is not checked. Disable _FORTIFY_SOURCE to silence those
+# warnings.
+CFLAGS += -U_FORTIFY_SOURCE
+
TEST_GEN_FILES = cow
TEST_GEN_FILES += compaction_test
TEST_GEN_FILES += gup_longterm
--
2.46.0
Hello,
The following patches prevent Linux 6.6.78+ rtla to build:
- "rtla/timerlat_top: Set OSNOISE_WORKLOAD for kernel threads" (stable
commit 41955b6c268154f81e34f9b61cf8156eec0730c0)
- "rtla/timerlat_hist: Set OSNOISE_WORKLOAD for kernel threads" (stable
commit 83b74901bdc9b58739193b8ee6989254391b6ba7)
Both were added to Linux 6.6.78 based on the Fixes tag in the upstream
commits.
These patches prevent 6.6.78 rta to build with a similar error (missing
kernel_workload in the params struct)
src/timerlat_top.c:687:52: error: ‘struct timerlat_top_params’ has no member named ‘kernel_workload’
These patches appear to depend on "rtla/timerlat: Make user-space
threads the default" commit fb9e90a67ee9a42779a8ea296a4cf7734258b27d
which is not present in 6.6.
I am not sure if it's better to revert them or pick up
fb9e90a67ee9a42779a8ea296a4cf7734258b27d in 6.6. Tomas, what do you
think?
HTH,
Guillaume.
--
Guillaume Morin <guillaume(a)morinfr.org>
In r420_cp_errata_init(), the RESYNC information is stored even
when the Scratch register is not correctly allocated.
Change the return type of r420_cp_errata_init() from void to int
to propagate errors to the caller. Add error checking after
radeon_scratch_get() to ensure RESYNC information is stored
to an allocated address. Log an error message and return the
error code immediately when radeon_scratch_get() fails.
Additionally, handle the return value of r420_cp_errata_init() in
r420_startup() to log an appropriate error message and propagate
the error if initialization fails.
Fixes: 62cdc0c20663 ("drm/radeon/kms: Workaround RV410/R420 CP errata (V3)")
Cc: stable(a)vger.kernel.org # 2.6.33+
Signed-off-by: Wentao Liang <vulab(a)iscas.ac.cn>
---
drivers/gpu/drm/radeon/r420.c | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c
index 9a31cdec6415..67c55153cba8 100644
--- a/drivers/gpu/drm/radeon/r420.c
+++ b/drivers/gpu/drm/radeon/r420.c
@@ -204,7 +204,7 @@ static void r420_clock_resume(struct radeon_device *rdev)
WREG32_PLL(R_00000D_SCLK_CNTL, sclk_cntl);
}
-static void r420_cp_errata_init(struct radeon_device *rdev)
+static int r420_cp_errata_init(struct radeon_device *rdev)
{
int r;
struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
@@ -215,7 +215,11 @@ static void r420_cp_errata_init(struct radeon_device *rdev)
* The proper workaround is to queue a RESYNC at the beginning
* of the CP init, apparently.
*/
- radeon_scratch_get(rdev, &rdev->config.r300.resync_scratch);
+ r = radeon_scratch_get(rdev, &rdev->config.r300.resync_scratch);
+ if (r) {
+ DRM_ERROR("failed to get scratch reg (%d).\n", r);
+ return r;
+ }
r = radeon_ring_lock(rdev, ring, 8);
WARN_ON(r);
radeon_ring_write(ring, PACKET0(R300_CP_RESYNC_ADDR, 1));
@@ -290,8 +294,11 @@ static int r420_startup(struct radeon_device *rdev)
dev_err(rdev->dev, "failed initializing CP (%d).\n", r);
return r;
}
- r420_cp_errata_init(rdev);
-
+ r = r420_cp_errata_init(rdev);
+ if (r) {
+ dev_err(rdev->dev, "failed initializing CP errata workaround (%d).\n", r);
+ return r;
+ }
r = radeon_ib_pool_init(rdev);
if (r) {
dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
--
2.42.0.windows.2
When creating sysfs files fail, the allocated minor must be freed such that
it can be later reused. That is specially harmful for static minor numbers,
since those would always fail to register later on.
Fixes: 6d04d2b554b1 ("misc: misc_minor_alloc to use ida for all dynamic/misc dynamic minors")
Cc: stable(a)vger.kernel.org
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo(a)igalia.com>
---
drivers/char/misc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/char/misc.c b/drivers/char/misc.c
index 7a768775e558..7843a1a34d64 100644
--- a/drivers/char/misc.c
+++ b/drivers/char/misc.c
@@ -266,8 +266,8 @@ int misc_register(struct miscdevice *misc)
device_create_with_groups(&misc_class, misc->parent, dev,
misc, misc->groups, "%s", misc->name);
if (IS_ERR(misc->this_device)) {
+ misc_minor_free(misc->minor);
if (is_dynamic) {
- misc_minor_free(misc->minor);
misc->minor = MISC_DYNAMIC_MINOR;
}
err = PTR_ERR(misc->this_device);
--
2.34.1
From: Tanmay Kathpalia <tanmay.kathpalia(a)altera.com>
A Partial Region Controller can be connected to one or more
Freeze Bridge. Each Freeze Bridge has an illegal_request
bit represented in the freeze_illegal_request register.
Thus, instead of just set to clear the illegal_request bit
for first Freeze Bridge, we need to ensure the set to clear
action is applied to which ever Freeze Bridge that has
occurrence of illegal request.
Fixes: ca24a648f535 ("fpga: add altera freeze bridge support")
Cc: stable(a)vger.kernel.org
Signed-off-by: Chiau Ee Chew <chiau.ee.chew(a)intel.com>
Signed-off-by: Tanmay Kathpalia <tanmay.kathpalia(a)altera.com>
---
drivers/fpga/altera-freeze-bridge.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/fpga/altera-freeze-bridge.c b/drivers/fpga/altera-freeze-bridge.c
index 594693ff786e..23e8b2b54355 100644
--- a/drivers/fpga/altera-freeze-bridge.c
+++ b/drivers/fpga/altera-freeze-bridge.c
@@ -52,7 +52,7 @@ static int altera_freeze_br_req_ack(struct altera_freeze_br_data *priv,
if (illegal) {
dev_err(dev, "illegal request detected 0x%x", illegal);
- writel(1, csr_illegal_req_addr);
+ writel(illegal, csr_illegal_req_addr);
illegal = readl(csr_illegal_req_addr);
if (illegal)
--
2.35.3
In mptsas_setup_wide_ports() the calculation of phy bitmask is a subject
to undefined behavior when phy index exceeds the width of type 'int', but
is still less than 64.
Utilize BIT_ULL macro to fix this.
Found by Linux Verification Center (linuxtesting.org) with SVACE static
analysis tool.
Fixes: 547f9a218436 ("[SCSI] mptsas: wide port support")
Cc: stable(a)vger.kernel.org
Co-developed-by: Aleksandr Mishin <amishin(a)t-argos.ru>
Signed-off-by: Aleksandr Mishin <amishin(a)t-argos.ru>
Signed-off-by: Fedor Pchelkin <pchelkin(a)ispras.ru>
---
struct mptsas_portinfo_details::phy_bitmask is used only in various
logging printks throughout the driver. Another option would be to drop
this field completely if it is considered a more appropriate solution..
drivers/message/fusion/mptsas.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index 7e79da9684ed..cd95655f1592 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -43,6 +43,7 @@
*/
/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+#include <linux/bits.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/slab.h>
@@ -880,7 +881,7 @@ mptsas_setup_wide_ports(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info)
"%s: [%p]: deleting phy = %d\n",
ioc->name, __func__, port_details, i));
port_details->num_phys--;
- port_details->phy_bitmask &= ~ (1 << phy_info->phy_id);
+ port_details->phy_bitmask &= ~BIT_ULL(phy_info->phy_id);
memset(&phy_info->attached, 0, sizeof(struct mptsas_devinfo));
if (phy_info->phy) {
devtprintk(ioc, dev_printk(KERN_DEBUG,
@@ -915,7 +916,7 @@ mptsas_setup_wide_ports(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info)
port_details->port_info = port_info;
if (phy_info->phy_id < 64 )
port_details->phy_bitmask |=
- (1 << phy_info->phy_id);
+ BIT_ULL(phy_info->phy_id);
phy_info->sas_port_add_phy=1;
dsaswideprintk(ioc, printk(MYIOC_s_DEBUG_FMT "\t\tForming port\n\t\t"
"phy_id=%d sas_address=0x%018llX\n",
@@ -957,7 +958,7 @@ mptsas_setup_wide_ports(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info)
phy_info_cmp->port_details = port_details;
if (phy_info_cmp->phy_id < 64 )
port_details->phy_bitmask |=
- (1 << phy_info_cmp->phy_id);
+ BIT_ULL(phy_info_cmp->phy_id);
port_details->num_phys++;
}
}
--
2.48.1
<qh6envvnuw45w2omvpufqtbq5k5343ymdzswxrxmczwoe64d6g@a4z5wjdbdw6w>
Reply-To:
In-Reply-To:
<ah4qm66q5q7we7ykhl3uywgrexi7izdxrmfyn2zm3jhswitebt@cz2ipkdgr6yf>
On Tue, Feb 18, 2025 at 05:01:31PM +0100, Stefano Garzarella wrote:
>>
>>No, nothing I can think of.
>>
>>Note however that the comment above vsock_close() ("Dummy callback
>>required
>>by sockmap. See unconditional call of saved_close() in
>>sock_map_close().")
>>becomes somewhat misleading :)
>>
>
>Yeah, we can mention in the commit description of the backport that we
>backport it just to reduce conflicts but sockmap features are not
>backported. I'd touch as less as possibile in the patch, otherwise IMHO
>is better to just fix the conflicts in the 2 patches.
>
>Thanks,
>Stefano
>
Totally agree with you, will send the backport in a couple of days if
nobody has any objections.
Luigi
No upstream commit exists for this commit.
Using an arbitrary value that does not fall into the required range as an
argument of the shift operator when outputting an error is wrong in itself.
Call Trace:
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0x1e3/0x2cb lib/dump_stack.c:106
ubsan_epilogue lib/ubsan.c:151 [inline]
__ubsan_handle_shift_out_of_bounds+0x3bf/0x420 lib/ubsan.c:321
parse_options+0x4ad6/0x4ae0 fs/f2fs/super.c:919
f2fs_fill_super+0x321b/0x7c40 fs/f2fs/super.c:4214
mount_bdev+0x2c9/0x3f0 fs/super.c:1443
legacy_get_tree+0xeb/0x180 fs/fs_context.c:632
vfs_get_tree+0x88/0x270 fs/super.c:1573
do_new_mount+0x2ba/0xb40 fs/namespace.c:3051
do_mount fs/namespace.c:3394 [inline]
__do_sys_mount fs/namespace.c:3602 [inline]
__se_sys_mount+0x2d5/0x3c0 fs/namespace.c:3579
do_syscall_x64 arch/x86/entry/common.c:51 [inline]
do_syscall_64+0x3b/0xb0 arch/x86/entry/common.c:81
entry_SYSCALL_64_after_hwframe+0x68/0xd2
There is a commit 87161a2b0aed ("f2fs: deprecate io_bits") that completely
removes these strings, but it's not practical to backport it.
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
Link: syzbot+410500002694f3ff65b1(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=410500002694f3ff65b1
Fixes: ec91538dccd4 ("f2fs: get io size bit from mount option")
Signed-off-by: Denis Arefev <arefev(a)swemel.ru>
---
fs/f2fs/super.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 9afbb51bd678..5fd64bc35f31 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -722,8 +722,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
if (args->from && match_int(args, &arg))
return -EINVAL;
if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_PAGES)) {
- f2fs_warn(sbi, "Not support %d, larger than %d",
- 1 << arg, BIO_MAX_PAGES);
+ f2fs_warn(sbi, "Not support 2^%d, invalid argument %d",
+ arg, BIO_MAX_PAGES);
return -EINVAL;
}
F2FS_OPTION(sbi).write_io_size_bits = arg;
--
2.43.0
No upstream commit exists for this commit.
Using an arbitrary value that does not fall into the required range as an
argument of the shift operator when outputting an error is wrong in itself.
Call Trace:
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0x1e3/0x2cb lib/dump_stack.c:106
ubsan_epilogue lib/ubsan.c:151 [inline]
__ubsan_handle_shift_out_of_bounds+0x3bf/0x420 lib/ubsan.c:321
parse_options+0x4ad6/0x4ae0 fs/f2fs/super.c:919
f2fs_fill_super+0x321b/0x7c40 fs/f2fs/super.c:4214
mount_bdev+0x2c9/0x3f0 fs/super.c:1443
legacy_get_tree+0xeb/0x180 fs/fs_context.c:632
vfs_get_tree+0x88/0x270 fs/super.c:1573
do_new_mount+0x2ba/0xb40 fs/namespace.c:3051
do_mount fs/namespace.c:3394 [inline]
__do_sys_mount fs/namespace.c:3602 [inline]
__se_sys_mount+0x2d5/0x3c0 fs/namespace.c:3579
do_syscall_x64 arch/x86/entry/common.c:51 [inline]
do_syscall_64+0x3b/0xb0 arch/x86/entry/common.c:81
entry_SYSCALL_64_after_hwframe+0x68/0xd2
There is a commit 87161a2b0aed ("f2fs: deprecate io_bits") that completely
removes these strings, but it's not practical to backport it.
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
Link: syzbot+410500002694f3ff65b1(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=410500002694f3ff65b1
Fixes: ec91538dccd4 ("f2fs: get io size bit from mount option")
Signed-off-by: Denis Arefev <arefev(a)swemel.ru>
---
fs/f2fs/super.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index f8aaff9b1784..c0fa7d785f3c 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -891,8 +891,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
if (args->from && match_int(args, &arg))
return -EINVAL;
if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_VECS)) {
- f2fs_warn(sbi, "Not support %d, larger than %d",
- 1 << arg, BIO_MAX_VECS);
+ f2fs_warn(sbi, "Not support 2^%d, invalid argument %d",
+ arg, BIO_MAX_VECS);
return -EINVAL;
}
F2FS_OPTION(sbi).write_io_size_bits = arg;
--
2.43.0
No upstream commit exists for this commit.
Using an arbitrary value that does not fall into the required range as an
argument of the shift operator when outputting an error is wrong in itself.
Call Trace:
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0x1e3/0x2cb lib/dump_stack.c:106
ubsan_epilogue lib/ubsan.c:151 [inline]
__ubsan_handle_shift_out_of_bounds+0x3bf/0x420 lib/ubsan.c:321
parse_options+0x4ad6/0x4ae0 fs/f2fs/super.c:919
f2fs_fill_super+0x321b/0x7c40 fs/f2fs/super.c:4214
mount_bdev+0x2c9/0x3f0 fs/super.c:1443
legacy_get_tree+0xeb/0x180 fs/fs_context.c:632
vfs_get_tree+0x88/0x270 fs/super.c:1573
do_new_mount+0x2ba/0xb40 fs/namespace.c:3051
do_mount fs/namespace.c:3394 [inline]
__do_sys_mount fs/namespace.c:3602 [inline]
__se_sys_mount+0x2d5/0x3c0 fs/namespace.c:3579
do_syscall_x64 arch/x86/entry/common.c:51 [inline]
do_syscall_64+0x3b/0xb0 arch/x86/entry/common.c:81
entry_SYSCALL_64_after_hwframe+0x68/0xd2
There is a commit 87161a2b0aed ("f2fs: deprecate io_bits") that completely
removes these strings, but it's not practical to backport it.
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
Link: syzbot+410500002694f3ff65b1(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=410500002694f3ff65b1
Fixes: ec91538dccd4 ("f2fs: get io size bit from mount option")
Signed-off-by: Denis Arefev <arefev(a)swemel.ru>
---
fs/f2fs/super.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 72160b906f4b..7d7766761fe4 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -916,8 +916,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
if (args->from && match_int(args, &arg))
return -EINVAL;
if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_VECS)) {
- f2fs_warn(sbi, "Not support %ld, larger than %d",
- BIT(arg), BIO_MAX_VECS);
+ f2fs_warn(sbi, "Not support 2^%d, invalid argument %d",
+ arg, BIO_MAX_VECS);
return -EINVAL;
}
F2FS_OPTION(sbi).write_io_size_bits = arg;
--
2.43.0
There are two variables that indicate the interrupt type to be used
in the next test execution, "irq_type" as global and test->irq_type.
The global is referenced from pci_endpoint_test_get_irq() to preserve
the current type for ioctl(PCITEST_GET_IRQTYPE).
The type set in this function isn't reflected in the global "irq_type",
so ioctl(PCITEST_GET_IRQTYPE) returns the previous type.
As a result, the wrong type will be displayed in "pcitest" as follows:
# pcitest -i 0
SET IRQ TYPE TO LEGACY: OKAY
# pcitest -I
GET IRQ TYPE: MSI
Fix this issue by propagating the current type to the global "irq_type".
Cc: stable(a)vger.kernel.org
Fixes: b2ba9225e031 ("misc: pci_endpoint_test: Avoid using module parameter to determine irqtype")
Signed-off-by: Kunihiko Hayashi <hayashi.kunihiko(a)socionext.com>
---
drivers/misc/pci_endpoint_test.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c
index f13fa32ef91a..6a0972e7674f 100644
--- a/drivers/misc/pci_endpoint_test.c
+++ b/drivers/misc/pci_endpoint_test.c
@@ -829,6 +829,7 @@ static int pci_endpoint_test_set_irq(struct pci_endpoint_test *test,
return ret;
}
+ irq_type = test->irq_type;
return 0;
}
--
2.25.1
From: Steven Rostedt <rostedt(a)goodmis.org>
Check if a function is already in the manager ops of a subops. A manager
ops contains multiple subops, and if two or more subops are tracing the
same function, the manager ops only needs a single entry in its hash.
Cc: stable(a)vger.kernel.org
Fixes: 4f554e955614f ("ftrace: Add ftrace_set_filter_ips function")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/ftrace.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 03b35a05808c..189eb0a12f4b 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5717,6 +5717,9 @@ __ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove)
return -ENOENT;
free_hash_entry(hash, entry);
return 0;
+ } else if (__ftrace_lookup_ip(hash, ip) != NULL) {
+ /* Already exists */
+ return 0;
}
entry = add_hash_entry(hash, ip);
--
2.47.2
From: Steven Rostedt <rostedt(a)goodmis.org>
When adding a new fprobe, it will update the function hash to the
functions the fprobe is attached to and register with function graph to
have it call the registered functions. The fprobe_graph_active variable
keeps track of the number of fprobes that are using function graph.
If two fprobes attach to the same function, it increments the
fprobe_graph_active for each of them. But when they are removed, the first
fprobe to be removed will see that the function it is attached to is also
used by another fprobe and it will not remove that function from
function_graph. The logic will skip decrementing the fprobe_graph_active
variable.
This causes the fprobe_graph_active variable to not go to zero when all
fprobes are removed, and in doing so it does not unregister from
function graph. As the fgraph ops hash will now be empty, and an empty
filter hash means all functions are enabled, this triggers function graph
to add a callback to the fprobe infrastructure for every function!
# echo "f:myevent1 kernel_clone" >> /sys/kernel/tracing/dynamic_events
# echo "f:myevent2 kernel_clone%return" >> /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
kernel_clone (1) tramp: 0xffffffffc0024000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
# > /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
trace_initcall_start_cb (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
run_init_process (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
try_to_run_init_process (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
x86_pmu_show_pmu_cap (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
cleanup_rapl_pmus (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
uncore_free_pcibus_map (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
uncore_types_exit (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
uncore_pci_exit.part.0 (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
kvm_shutdown (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
vmx_dump_msrs (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
[..]
# cat /sys/kernel/tracing/enabled_functions | wc -l
54702
If a fprobe is being removed and all its functions are also traced by
other fprobes, still decrement the fprobe_graph_active counter.
Cc: stable(a)vger.kernel.org
Fixes: 4346ba1604093 ("fprobe: Rewrite fprobe on function-graph tracer")
Closes: https://lore.kernel.org/all/20250217114918.10397-A-hca@linux.ibm.com/
Reported-by: Heiko Carstens <hca(a)linux.ibm.com>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
Changes since v1: https://lore.kernel.org/20250218193126.619197190@goodmis.org
- Move the check into fprobe_graph_remove_ips() to keep it matching
with fprobe_graph_add_ips() (Masami Hiramatsu)
kernel/trace/fprobe.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 62e8f7d56602..33082c4e8154 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -407,7 +407,8 @@ static void fprobe_graph_remove_ips(unsigned long *addrs, int num)
if (!fprobe_graph_active)
unregister_ftrace_graph(&fprobe_graph_ops);
- ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0);
+ if (num)
+ ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0);
}
static int symbols_cmp(const void *a, const void *b)
@@ -677,8 +678,7 @@ int unregister_fprobe(struct fprobe *fp)
}
del_fprobe_hash(fp);
- if (count)
- fprobe_graph_remove_ips(addrs, count);
+ fprobe_graph_remove_ips(addrs, count);
kfree_rcu(hlist_array, rcu);
fp->hlist_array = NULL;
--
2.47.2
From: Steven Rostedt <rostedt(a)goodmis.org>
When the last fprobe is removed, it calls unregister_ftrace_graph() to
remove the graph_ops from function graph. The issue is when it does so, it
calls return before removing the function from its graph ops via
ftrace_set_filter_ips(). This leaves the last function lingering in the
fprobe's fgraph ops and if a probe is added it also enables that last
function (even though the callback will just drop it, it does add unneeded
overhead to make that call).
# echo "f:myevent1 kernel_clone" >> /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
kernel_clone (1) tramp: 0xffffffffc02f3000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
# echo "f:myevent2 schedule_timeout" >> /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
kernel_clone (1) tramp: 0xffffffffc02f3000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
schedule_timeout (1) tramp: 0xffffffffc02f3000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
# > /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
# echo "f:myevent3 kmem_cache_free" >> /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
kmem_cache_free (1) tramp: 0xffffffffc0219000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
schedule_timeout (1) tramp: 0xffffffffc0219000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
The above enabled a fprobe on kernel_clone, and then on schedule_timeout.
The content of the enabled_functions shows the functions that have a
callback attached to them. The fprobe attached to those functions
properly. Then the fprobes were cleared, and enabled_functions was empty
after that. But after adding a fprobe on kmem_cache_free, the
enabled_functions shows that the schedule_timeout was attached again. This
is because it was still left in the fprobe ops that is used to tell
function graph what functions it wants callbacks from.
Cc: stable(a)vger.kernel.org
Fixes: 4346ba1604093 ("fprobe: Rewrite fprobe on function-graph tracer")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/fprobe.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 2560b312ad57..62e8f7d56602 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -403,11 +403,9 @@ static void fprobe_graph_remove_ips(unsigned long *addrs, int num)
lockdep_assert_held(&fprobe_mutex);
fprobe_graph_active--;
- if (!fprobe_graph_active) {
- /* Q: should we unregister it ? */
+ /* Q: should we unregister it ? */
+ if (!fprobe_graph_active)
unregister_ftrace_graph(&fprobe_graph_ops);
- return;
- }
ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0);
}
--
2.47.2
When update_mmu_cache_range() is called by update_mmu_cache(), the vmf
parameter is NULL, which will cause a NULL pointer dereference issue in
adjust_pte():
Unable to handle kernel NULL pointer dereference at virtual address 00000030 when read
Hardware name: Atmel AT91SAM9
PC is at update_mmu_cache_range+0x1e0/0x278
LR is at pte_offset_map_rw_nolock+0x18/0x2c
Call trace:
update_mmu_cache_range from remove_migration_pte+0x29c/0x2ec
remove_migration_pte from rmap_walk_file+0xcc/0x130
rmap_walk_file from remove_migration_ptes+0x90/0xa4
remove_migration_ptes from migrate_pages_batch+0x6d4/0x858
migrate_pages_batch from migrate_pages+0x188/0x488
migrate_pages from compact_zone+0x56c/0x954
compact_zone from compact_node+0x90/0xf0
compact_node from kcompactd+0x1d4/0x204
kcompactd from kthread+0x120/0x12c
kthread from ret_from_fork+0x14/0x38
Exception stack(0xc0d8bfb0 to 0xc0d8bff8)
To fix it, do not rely on whether 'ptl' is equal to decide whether to hold
the pte lock, but decide it by whether CONFIG_SPLIT_PTE_PTLOCKS is
enabled. In addition, if two vmas map to the same PTE page, there is no
need to hold the pte lock again, otherwise a deadlock will occur. Just add
the need_lock parameter to let adjust_pte() know this information.
Reported-by: Ezra Buehler <ezra.buehler(a)husqvarnagroup.com>
Closes: https://lore.kernel.org/lkml/CAM1KZSmZ2T_riHvay+7cKEFxoPgeVpHkVFTzVVEQ1BO0c…
Fixes: fc9c45b71f43 ("arm: adjust_pte() use pte_offset_map_rw_nolock()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Qi Zheng <zhengqi.arch(a)bytedance.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
---
Changes in v3:
- move pmd_start_addr and pmd_end_addr to the top and initialize directly
(David Hildenbrand)
- collect an Acked-by
Changes in v2:
- change Ezra's email address (Ezra Buehler)
- some cleanups (David Hildenbrand)
arch/arm/mm/fault-armv.c | 37 +++++++++++++++++++++++++------------
1 file changed, 25 insertions(+), 12 deletions(-)
diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c
index 2bec87c3327d2..39fd5df733178 100644
--- a/arch/arm/mm/fault-armv.c
+++ b/arch/arm/mm/fault-armv.c
@@ -62,7 +62,7 @@ static int do_adjust_pte(struct vm_area_struct *vma, unsigned long address,
}
static int adjust_pte(struct vm_area_struct *vma, unsigned long address,
- unsigned long pfn, struct vm_fault *vmf)
+ unsigned long pfn, bool need_lock)
{
spinlock_t *ptl;
pgd_t *pgd;
@@ -99,12 +99,11 @@ static int adjust_pte(struct vm_area_struct *vma, unsigned long address,
if (!pte)
return 0;
- /*
- * If we are using split PTE locks, then we need to take the page
- * lock here. Otherwise we are using shared mm->page_table_lock
- * which is already locked, thus cannot take it.
- */
- if (ptl != vmf->ptl) {
+ if (need_lock) {
+ /*
+ * Use nested version here to indicate that we are already
+ * holding one similar spinlock.
+ */
spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(pmd)))) {
pte_unmap_unlock(pte, ptl);
@@ -114,7 +113,7 @@ static int adjust_pte(struct vm_area_struct *vma, unsigned long address,
ret = do_adjust_pte(vma, address, pfn, pte);
- if (ptl != vmf->ptl)
+ if (need_lock)
spin_unlock(ptl);
pte_unmap(pte);
@@ -123,9 +122,10 @@ static int adjust_pte(struct vm_area_struct *vma, unsigned long address,
static void
make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep, unsigned long pfn,
- struct vm_fault *vmf)
+ unsigned long addr, pte_t *ptep, unsigned long pfn)
{
+ const unsigned long pmd_start_addr = ALIGN_DOWN(addr, PMD_SIZE);
+ const unsigned long pmd_end_addr = pmd_start_addr + PMD_SIZE;
struct mm_struct *mm = vma->vm_mm;
struct vm_area_struct *mpnt;
unsigned long offset;
@@ -141,6 +141,14 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
*/
flush_dcache_mmap_lock(mapping);
vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
+ /*
+ * If we are using split PTE locks, then we need to take the pte
+ * lock. Otherwise we are using shared mm->page_table_lock which
+ * is already locked, thus cannot take it.
+ */
+ bool need_lock = IS_ENABLED(CONFIG_SPLIT_PTE_PTLOCKS);
+ unsigned long mpnt_addr;
+
/*
* If this VMA is not in our MM, we can ignore it.
* Note that we intentionally mask out the VMA
@@ -151,7 +159,12 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
if (!(mpnt->vm_flags & VM_MAYSHARE))
continue;
offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
- aliases += adjust_pte(mpnt, mpnt->vm_start + offset, pfn, vmf);
+ mpnt_addr = mpnt->vm_start + offset;
+
+ /* Avoid deadlocks by not grabbing the same PTE lock again. */
+ if (mpnt_addr >= pmd_start_addr && mpnt_addr < pmd_end_addr)
+ need_lock = false;
+ aliases += adjust_pte(mpnt, mpnt_addr, pfn, need_lock);
}
flush_dcache_mmap_unlock(mapping);
if (aliases)
@@ -194,7 +207,7 @@ void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma,
__flush_dcache_folio(mapping, folio);
if (mapping) {
if (cache_is_vivt())
- make_coherent(mapping, vma, addr, ptep, pfn, vmf);
+ make_coherent(mapping, vma, addr, ptep, pfn);
else if (vma->vm_flags & VM_EXEC)
__flush_icache_all();
}
--
2.20.1
The patch titled
Subject: mm/hugetlb: wait for hugetlb folios to be freed
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-hugetlb-wait-for-hugetlb-folios-to-be-freed.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ge Yang <yangge1116(a)126.com>
Subject: mm/hugetlb: wait for hugetlb folios to be freed
Date: Wed, 19 Feb 2025 11:46:44 +0800
Since the introduction of commit c77c0a8ac4c52 ("mm/hugetlb: defer freeing
of huge pages if in non-task context"), which supports deferring the
freeing of hugetlb pages, the allocation of contiguous memory through
cma_alloc() may fail probabilistically.
In the CMA allocation process, if it is found that the CMA area is
occupied by in-use hugetlb folios, these in-use hugetlb folios need to be
migrated to another location. When there are no available hugetlb folios
in the free hugetlb pool during the migration of in-use hugetlb folios,
new folios are allocated from the buddy system. A temporary state is set
on the newly allocated folio. Upon completion of the hugetlb folio
migration, the temporary state is transferred from the new folios to the
old folios. Normally, when the old folios with the temporary state are
freed, it is directly released back to the buddy system. However, due to
the deferred freeing of hugetlb pages, the PageBuddy() check fails,
ultimately leading to the failure of cma_alloc().
Here is a simplified call trace illustrating the process:
cma_alloc()
->__alloc_contig_migrate_range() // Migrate in-use hugetlb folios
->unmap_and_move_huge_page()
->folio_putback_hugetlb() // Free old folios
->test_pages_isolated()
->__test_page_isolated_in_pageblock()
->PageBuddy(page) // Check if the page is in buddy
To resolve this issue, we have implemented a function named
wait_for_freed_hugetlb_folios(). This function ensures that the hugetlb
folios are properly released back to the buddy system after their
migration is completed. By invoking wait_for_freed_hugetlb_folios()
before calling PageBuddy(), we ensure that PageBuddy() will succeed.
Link: https://lkml.kernel.org/r/1739936804-18199-1-git-send-email-yangge1116@126.…
Fixes: c77c0a8ac4c52 ("mm/hugetlb: defer freeing of huge pages if in non-task context")
Signed-off-by: Ge Yang <yangge1116(a)126.com>
Reviewed-by: Muchun Song <muchun.song(a)linux.dev>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Barry Song <21cnbao(a)gmail.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/hugetlb.h | 5 +++++
mm/hugetlb.c | 8 ++++++++
mm/page_isolation.c | 10 ++++++++++
3 files changed, 23 insertions(+)
--- a/include/linux/hugetlb.h~mm-hugetlb-wait-for-hugetlb-folios-to-be-freed
+++ a/include/linux/hugetlb.h
@@ -682,6 +682,7 @@ struct huge_bootmem_page {
int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn);
+void wait_for_freed_hugetlb_folios(void);
struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr, bool cow_from_owner);
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
@@ -1066,6 +1067,10 @@ static inline int replace_free_hugepage_
return 0;
}
+static inline void wait_for_freed_hugetlb_folios(void)
+{
+}
+
static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr,
bool cow_from_owner)
--- a/mm/hugetlb.c~mm-hugetlb-wait-for-hugetlb-folios-to-be-freed
+++ a/mm/hugetlb.c
@@ -2943,6 +2943,14 @@ int replace_free_hugepage_folios(unsigne
return ret;
}
+void wait_for_freed_hugetlb_folios(void)
+{
+ if (llist_empty(&hpage_freelist))
+ return;
+
+ flush_work(&free_hpage_work);
+}
+
typedef enum {
/*
* For either 0/1: we checked the per-vma resv map, and one resv
--- a/mm/page_isolation.c~mm-hugetlb-wait-for-hugetlb-folios-to-be-freed
+++ a/mm/page_isolation.c
@@ -608,6 +608,16 @@ int test_pages_isolated(unsigned long st
int ret;
/*
+ * Due to the deferred freeing of hugetlb folios, the hugepage folios may
+ * not immediately release to the buddy system. This can cause PageBuddy()
+ * to fail in __test_page_isolated_in_pageblock(). To ensure that the
+ * hugetlb folios are properly released back to the buddy system, we
+ * invoke the wait_for_freed_hugetlb_folios() function to wait for the
+ * release to complete.
+ */
+ wait_for_freed_hugetlb_folios();
+
+ /*
* Note: pageblock_nr_pages != MAX_PAGE_ORDER. Then, chunks of free
* pages are not aligned to pageblock_nr_pages.
* Then we just check migratetype first.
_
Patches currently in -mm which might be from yangge1116(a)126.com are
mm-hugetlb-wait-for-hugetlb-folios-to-be-freed.patch
During the "size_check" label in ea_get(), the code checks if the extended
attribute list (xattr) size matches ea_size. If not, it logs
"ea_get: invalid extended attribute" and calls print_hex_dump().
Here, EALIST_SIZE(ea_buf->xattr) returns 4110417968, which exceeds
INT_MAX (2,147,483,647). Then ea_size is clamped:
int size = clamp_t(int, ea_size, 0, EALIST_SIZE(ea_buf->xattr));
Although clamp_t aims to bound ea_size between 0 and 4110417968, the upper
limit is treated as an int, causing an overflow above 2^31 - 1. This leads
"size" to wrap around and become negative (-184549328).
The "size" is then passed to print_hex_dump() (called "len" in
print_hex_dump()), it is passed as type size_t (an unsigned
type), this is then stored inside a variable called
"int remaining", which is then assigned to "int linelen" which
is then passed to hex_dump_to_buffer(). In print_hex_dump()
the for loop, iterates through 0 to len-1, where len is
18446744073525002176, calling hex_dump_to_buffer()
on each iteration:
for (i = 0; i < len; i += rowsize) {
linelen = min(remaining, rowsize);
remaining -= rowsize;
hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize,
linebuf, sizeof(linebuf), ascii);
...
}
The expected stopping condition (i < len) is effectively broken
since len is corrupted and very large. This eventually leads to
the "ptr+i" being passed to hex_dump_to_buffer() to get closer
to the end of the actual bounds of "ptr", eventually an out of
bounds access is done in hex_dump_to_buffer() in the following
for loop:
for (j = 0; j < len; j++) {
if (linebuflen < lx + 2)
goto overflow2;
ch = ptr[j];
...
}
To fix this we should validate "EALIST_SIZE(ea_buf->xattr)"
before it is utilised.
Reported-by: syzbot <syzbot+4e6e7e4279d046613bc5(a)syzkaller.appspotmail.com>
Tested-by: syzbot <syzbot+4e6e7e4279d046613bc5(a)syzkaller.appspotmail.com>
Closes: https://syzkaller.appspot.com/bug?extid=4e6e7e4279d046613bc5
Fixes: d9f9d96136cb ("jfs: xattr: check invalid xattr size more strictly")
Cc: stable(a)vger.kernel.org
Signed-off-by: Qasim Ijaz <qasdev00(a)gmail.com>
---
v2:
- Added Cc stable tag
fs/jfs/xattr.c | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 24afbae87225..7575c51cce9b 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -559,11 +555,16 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size)
size_check:
if (EALIST_SIZE(ea_buf->xattr) != ea_size) {
- int size = clamp_t(int, ea_size, 0, EALIST_SIZE(ea_buf->xattr));
-
- printk(KERN_ERR "ea_get: invalid extended attribute\n");
- print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1,
- ea_buf->xattr, size, 1);
+ if (unlikely(EALIST_SIZE(ea_buf->xattr) > INT_MAX)) {
+ printk(KERN_ERR "ea_get: extended attribute size too large: %u > INT_MAX\n",
+ EALIST_SIZE(ea_buf->xattr));
+ } else {
+ int size = clamp_t(int, ea_size, 0, EALIST_SIZE(ea_buf->xattr));
+
+ printk(KERN_ERR "ea_get: invalid extended attribute\n");
+ print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1,
+ ea_buf->xattr, size, 1);
+ }
ea_release(inode, ea_buf);
rc = -EIO;
goto clean_up;
--
2.39.5
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x ccc45cb4e7271c74dbb27776ae8f73d84557f5c6
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023061111-tracing-shakiness-9054@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ccc45cb4e7271c74dbb27776ae8f73d84557f5c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20H=C3=B6ppner?= <hoeppner(a)linux.ibm.com>
Date: Fri, 9 Jun 2023 17:37:50 +0200
Subject: [PATCH] s390/dasd: Use correct lock while counting channel queue
length
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The lock around counting the channel queue length in the BIODASDINFO
ioctl was incorrectly changed to the dasd_block->queue_lock with commit
583d6535cb9d ("dasd: remove dead code"). This can lead to endless list
iterations and a subsequent crash.
The queue_lock is supposed to be used only for queue lists belonging to
dasd_block. For dasd_device related queue lists the ccwdev lock must be
used.
Fix the mentioned issues by correctly using the ccwdev lock instead of
the queue lock.
Fixes: 583d6535cb9d ("dasd: remove dead code")
Cc: stable(a)vger.kernel.org # v5.0+
Signed-off-by: Jan Höppner <hoeppner(a)linux.ibm.com>
Reviewed-by: Stefan Haberland <sth(a)linux.ibm.com>
Signed-off-by: Stefan Haberland <sth(a)linux.ibm.com>
Link: https://lore.kernel.org/r/20230609153750.1258763-2-sth@linux.ibm.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c
index 9327dcdd6e5e..8fca725b3dae 100644
--- a/drivers/s390/block/dasd_ioctl.c
+++ b/drivers/s390/block/dasd_ioctl.c
@@ -552,10 +552,10 @@ static int __dasd_ioctl_information(struct dasd_block *block,
memcpy(dasd_info->type, base->discipline->name, 4);
- spin_lock_irqsave(&block->queue_lock, flags);
+ spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags);
list_for_each(l, &base->ccw_queue)
dasd_info->chanq_len++;
- spin_unlock_irqrestore(&block->queue_lock, flags);
+ spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags);
return 0;
}
Hi,
I noticed there appears to be a regression in DLM (fs/dlm/) when
moving from Linux 5.4.229 to 5.4.288; I get a kernel panic when using
dlm_ls_lockx() (DLM user) with a timeout >0, and the panic occurs when
the timeout is reached (eg, attempting to take a lock on a resource
that is already locked); the host where the timeout occurs is the one
that panics:
...
[ 187.976007]
DLM: Assertion failed on line 1239 of file fs/dlm/lock.c
DLM: assertion: "!lkb->lkb_status"
DLM: time = 4294853632
[ 187.976009] lkb: nodeid 2 id 1 remid 2 exflags 40000 flags 800001
sts 1 rq 5 gr -1 wait_type 4 wait_nodeid 2 seq 0
[ 187.976009]
[ 187.976010] Kernel panic - not syncing: DLM: Record message above
and reboot.
[ 187.976099] CPU: 9 PID: 7409 Comm: dlm_scand Kdump: loaded Tainted:
P OE 5.4.288-esos.prod #1
[ 187.976195] Hardware name: Quantum H2012/H12SSW-NT, BIOS
T20201009143356 10/09/2020
[ 187.976282] Call Trace:
[ 187.976356] dump_stack+0x50/0x63
[ 187.976429] panic+0x10c/0x2e3
[ 187.976501] kill_lkb+0x51/0x52
[ 187.976570] kref_put+0x16/0x2f
[ 187.976638] __put_lkb+0x2f/0x95
[ 187.976707] dlm_scan_timeout+0x18b/0x19c
[ 187.976779] ? dlm_uevent+0x19/0x19
[ 187.976848] dlm_scand+0x94/0xd1
[ 187.976920] kthread+0xe4/0xe9
[ 187.976988] ? kthread_flush_worker+0x70/0x70
[ 187.977062] ret_from_fork+0x35/0x40
...
I examined the commits for fs/dlm/ between 5.4.229 and 5.4.288 and
this is the offender:
dlm: replace usage of found with dedicated list iterator variable
[ Upstream commit dc1acd5c94699389a9ed023e94dd860c846ea1f6 ]
Specifically, the change highlighted below in this hunk for
dlm_scan_timeout() in fs/dlm/lock.c:
@@ -1867,27 +1867,28 @@ void dlm_scan_timeout(struct dlm_ls *ls)
do_cancel = 0;
do_warn = 0;
mutex_lock(&ls->ls_timeout_mutex);
- list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) {
+ list_for_each_entry(iter, &ls->ls_timeout, lkb_time_list) {
wait_us = ktime_to_us(ktime_sub(ktime_get(),
- lkb->lkb_timestamp));
+ iter->lkb_timestamp));
- if ((lkb->lkb_exflags & DLM_LKF_TIMEOUT) &&
- wait_us >= (lkb->lkb_timeout_cs * 10000))
+ if ((iter->lkb_exflags & DLM_LKF_TIMEOUT) &&
+ wait_us >= (iter->lkb_timeout_cs * 10000))
do_cancel = 1;
- if ((lkb->lkb_flags & DLM_IFL_WATCH_TIMEWARN) &&
+ if ((iter->lkb_flags & DLM_IFL_WATCH_TIMEWARN) &&
wait_us >= dlm_config.ci_timewarn_cs * 10000)
do_warn = 1;
if (!do_cancel && !do_warn)
continue;
- hold_lkb(lkb);
+ hold_lkb(iter);
+ lkb = iter;
break;
}
mutex_unlock(&ls->ls_timeout_mutex);
- if (!do_cancel && !do_warn)
+ if (!lkb)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
break;
r = lkb->lkb_resource;
Reverting this single line change resolves the kernel panic:
$ diff -Naur fs/dlm/lock.c{.orig,}
--- fs/dlm/lock.c.orig 2024-12-19 12:05:05.000000000 -0500
+++ fs/dlm/lock.c 2025-02-16 21:21:42.544181390 -0500
@@ -1888,7 +1888,7 @@
}
mutex_unlock(&ls->ls_timeout_mutex);
- if (!lkb)
+ if (!do_cancel && !do_warn)
break;
r = lkb->lkb_resource;
It appears this same "dlm: replace usage of found with dedicated list
iterator variable" commit was pulled into other stable branches as
well, and I don't see any fix in the latest 5.4.x patch release
(5.4.290).
--Marc
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 8c8ecc98f5c65947b0070a24bac11e12e47cc65d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021842-crusher-corrode-54d0@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8c8ecc98f5c65947b0070a24bac11e12e47cc65d Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven(a)narfation.org>
Date: Mon, 20 Jan 2025 00:06:11 +0100
Subject: [PATCH] batman-adv: Drop unmanaged ELP metric worker
The ELP worker needs to calculate new metric values for all neighbors
"reachable" over an interface. Some of the used metric sources require
locks which might need to sleep. This sleep is incompatible with the RCU
list iterator used for the recorded neighbors. The initial approach to work
around of this problem was to queue another work item per neighbor and then
run this in a new context.
Even when this solved the RCU vs might_sleep() conflict, it has a major
problems: Nothing was stopping the work item in case it is not needed
anymore - for example because one of the related interfaces was removed or
the batman-adv module was unloaded - resulting in potential invalid memory
accesses.
Directly canceling the metric worker also has various problems:
* cancel_work_sync for a to-be-deactivated interface is called with
rtnl_lock held. But the code in the ELP metric worker also tries to use
rtnl_lock() - which will never return in this case. This also means that
cancel_work_sync would never return because it is waiting for the worker
to finish.
* iterating over the neighbor list for the to-be-deactivated interface is
currently done using the RCU specific methods. Which means that it is
possible to miss items when iterating over it without the associated
spinlock - a behaviour which is acceptable for a periodic metric check
but not for a cleanup routine (which must "stop" all still running
workers)
The better approch is to get rid of the per interface neighbor metric
worker and handle everything in the interface worker. The original problems
are solved by:
* creating a list of neighbors which require new metric information inside
the RCU protected context, gathering the metric according to the new list
outside the RCU protected context
* only use rcu_trylock inside metric gathering code to avoid a deadlock
when the cancel_delayed_work_sync is called in the interface removal code
(which is called with the rtnl_lock held)
Cc: stable(a)vger.kernel.org
Fixes: c833484e5f38 ("batman-adv: ELP - compute the metric based on the estimated throughput")
Signed-off-by: Sven Eckelmann <sven(a)narfation.org>
Signed-off-by: Simon Wunderlich <sw(a)simonwunderlich.de>
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index ac11f1f08db0..d35479c465e2 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -113,8 +113,6 @@ static void
batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh)
{
ewma_throughput_init(&hardif_neigh->bat_v.throughput);
- INIT_WORK(&hardif_neigh->bat_v.metric_work,
- batadv_v_elp_throughput_metric_update);
}
/**
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 65e52de52bcd..b065578b4436 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -18,6 +18,7 @@
#include <linux/if_ether.h>
#include <linux/jiffies.h>
#include <linux/kref.h>
+#include <linux/list.h>
#include <linux/minmax.h>
#include <linux/netdevice.h>
#include <linux/nl80211.h>
@@ -26,6 +27,7 @@
#include <linux/rcupdate.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
+#include <linux/slab.h>
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/types.h>
@@ -41,6 +43,18 @@
#include "routing.h"
#include "send.h"
+/**
+ * struct batadv_v_metric_queue_entry - list of hardif neighbors which require
+ * and metric update
+ */
+struct batadv_v_metric_queue_entry {
+ /** @hardif_neigh: hardif neighbor scheduled for metric update */
+ struct batadv_hardif_neigh_node *hardif_neigh;
+
+ /** @list: list node for metric_queue */
+ struct list_head list;
+};
+
/**
* batadv_v_elp_start_timer() - restart timer for ELP periodic work
* @hard_iface: the interface for which the timer has to be reset
@@ -137,10 +151,17 @@ static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh,
goto default_throughput;
}
+ /* only use rtnl_trylock because the elp worker will be cancelled while
+ * the rntl_lock is held. the cancel_delayed_work_sync() would otherwise
+ * wait forever when the elp work_item was started and it is then also
+ * trying to rtnl_lock
+ */
+ if (!rtnl_trylock())
+ return false;
+
/* if not a wifi interface, check if this device provides data via
* ethtool (e.g. an Ethernet adapter)
*/
- rtnl_lock();
ret = __ethtool_get_link_ksettings(hard_iface->net_dev, &link_settings);
rtnl_unlock();
if (ret == 0) {
@@ -175,31 +196,19 @@ static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh,
/**
* batadv_v_elp_throughput_metric_update() - worker updating the throughput
* metric of a single hop neighbour
- * @work: the work queue item
+ * @neigh: the neighbour to probe
*/
-void batadv_v_elp_throughput_metric_update(struct work_struct *work)
+static void
+batadv_v_elp_throughput_metric_update(struct batadv_hardif_neigh_node *neigh)
{
- struct batadv_hardif_neigh_node_bat_v *neigh_bat_v;
- struct batadv_hardif_neigh_node *neigh;
u32 throughput;
bool valid;
- neigh_bat_v = container_of(work, struct batadv_hardif_neigh_node_bat_v,
- metric_work);
- neigh = container_of(neigh_bat_v, struct batadv_hardif_neigh_node,
- bat_v);
-
valid = batadv_v_elp_get_throughput(neigh, &throughput);
if (!valid)
- goto put_neigh;
+ return;
ewma_throughput_add(&neigh->bat_v.throughput, throughput);
-
-put_neigh:
- /* decrement refcounter to balance increment performed before scheduling
- * this task
- */
- batadv_hardif_neigh_put(neigh);
}
/**
@@ -273,14 +282,16 @@ batadv_v_elp_wifi_neigh_probe(struct batadv_hardif_neigh_node *neigh)
*/
static void batadv_v_elp_periodic_work(struct work_struct *work)
{
+ struct batadv_v_metric_queue_entry *metric_entry;
+ struct batadv_v_metric_queue_entry *metric_safe;
struct batadv_hardif_neigh_node *hardif_neigh;
struct batadv_hard_iface *hard_iface;
struct batadv_hard_iface_bat_v *bat_v;
struct batadv_elp_packet *elp_packet;
+ struct list_head metric_queue;
struct batadv_priv *bat_priv;
struct sk_buff *skb;
u32 elp_interval;
- bool ret;
bat_v = container_of(work, struct batadv_hard_iface_bat_v, elp_wq.work);
hard_iface = container_of(bat_v, struct batadv_hard_iface, bat_v);
@@ -316,6 +327,8 @@ static void batadv_v_elp_periodic_work(struct work_struct *work)
atomic_inc(&hard_iface->bat_v.elp_seqno);
+ INIT_LIST_HEAD(&metric_queue);
+
/* The throughput metric is updated on each sent packet. This way, if a
* node is dead and no longer sends packets, batman-adv is still able to
* react timely to its death.
@@ -340,16 +353,28 @@ static void batadv_v_elp_periodic_work(struct work_struct *work)
/* Reading the estimated throughput from cfg80211 is a task that
* may sleep and that is not allowed in an rcu protected
- * context. Therefore schedule a task for that.
+ * context. Therefore add it to metric_queue and process it
+ * outside rcu protected context.
*/
- ret = queue_work(batadv_event_workqueue,
- &hardif_neigh->bat_v.metric_work);
-
- if (!ret)
+ metric_entry = kzalloc(sizeof(*metric_entry), GFP_ATOMIC);
+ if (!metric_entry) {
batadv_hardif_neigh_put(hardif_neigh);
+ continue;
+ }
+
+ metric_entry->hardif_neigh = hardif_neigh;
+ list_add(&metric_entry->list, &metric_queue);
}
rcu_read_unlock();
+ list_for_each_entry_safe(metric_entry, metric_safe, &metric_queue, list) {
+ batadv_v_elp_throughput_metric_update(metric_entry->hardif_neigh);
+
+ batadv_hardif_neigh_put(metric_entry->hardif_neigh);
+ list_del(&metric_entry->list);
+ kfree(metric_entry);
+ }
+
restart_timer:
batadv_v_elp_start_timer(hard_iface);
out:
diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h
index 9e2740195fa2..c9cb0a307100 100644
--- a/net/batman-adv/bat_v_elp.h
+++ b/net/batman-adv/bat_v_elp.h
@@ -10,7 +10,6 @@
#include "main.h"
#include <linux/skbuff.h>
-#include <linux/workqueue.h>
int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface);
void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface);
@@ -19,6 +18,5 @@ void batadv_v_elp_iface_activate(struct batadv_hard_iface *primary_iface,
void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface);
int batadv_v_elp_packet_recv(struct sk_buff *skb,
struct batadv_hard_iface *if_incoming);
-void batadv_v_elp_throughput_metric_update(struct work_struct *work);
#endif /* _NET_BATMAN_ADV_BAT_V_ELP_H_ */
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 04f6398b3a40..85a50096f5b2 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -596,9 +596,6 @@ struct batadv_hardif_neigh_node_bat_v {
* neighbor
*/
unsigned long last_unicast_tx;
-
- /** @metric_work: work queue callback item for metric update */
- struct work_struct metric_work;
};
/**
commit c910f2b65518 ("arm64/mm: Update tlb invalidation routines for
FEAT_LPA2") changed the "invalidation level unknown" hint from 0 to
TLBI_TTL_UNKNOWN (INT_MAX). But the fallback "unknown level" path in
flush_hugetlb_tlb_range() was not updated. So as it stands, when trying
to invalidate CONT_PMD_SIZE or CONT_PTE_SIZE hugetlb mappings, we will
spuriously try to invalidate at level 0 on LPA2-enabled systems.
Fix this so that the fallback passes TLBI_TTL_UNKNOWN, and while we are
at it, explicitly use the correct stride and level for CONT_PMD_SIZE and
CONT_PTE_SIZE, which should provide a minor optimization.
Cc: stable(a)vger.kernel.org
Fixes: c910f2b65518 ("arm64/mm: Update tlb invalidation routines for FEAT_LPA2")
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
---
arch/arm64/include/asm/hugetlb.h | 22 ++++++++++++++++------
1 file changed, 16 insertions(+), 6 deletions(-)
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index 03db9cb21ace..07fbf5bf85a7 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -76,12 +76,22 @@ static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma,
{
unsigned long stride = huge_page_size(hstate_vma(vma));
- if (stride == PMD_SIZE)
- __flush_tlb_range(vma, start, end, stride, false, 2);
- else if (stride == PUD_SIZE)
- __flush_tlb_range(vma, start, end, stride, false, 1);
- else
- __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0);
+ switch (stride) {
+#ifndef __PAGETABLE_PMD_FOLDED
+ case PUD_SIZE:
+ __flush_tlb_range(vma, start, end, PUD_SIZE, false, 1);
+ break;
+#endif
+ case CONT_PMD_SIZE:
+ case PMD_SIZE:
+ __flush_tlb_range(vma, start, end, PMD_SIZE, false, 2);
+ break;
+ case CONT_PTE_SIZE:
+ __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 3);
+ break;
+ default:
+ __flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN);
+ }
}
#endif /* __ASM_HUGETLB_H */
--
2.43.0
From: Claudiu Beznea <claudiu.beznea.uj(a)bp.renesas.com>
When performing continuous unbind/bind operations on the USB drivers
available on the Renesas RZ/G2L SoC, a kernel crash with the message
"Unable to handle kernel NULL pointer dereference at virtual address"
may occur. This issue points to the usbhsc_notify_hotplug() function.
Flush the delayed work to avoid its execution when driver resources are
unavailable.
Fixes: bc57381e6347 ("usb: renesas_usbhs: use delayed_work instead of work_struct")
Cc: stable(a)vger.kernel.org
Signed-off-by: Claudiu Beznea <claudiu.beznea.uj(a)bp.renesas.com>
---
drivers/usb/renesas_usbhs/common.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c
index 6c7857b66a21..4b35ef216125 100644
--- a/drivers/usb/renesas_usbhs/common.c
+++ b/drivers/usb/renesas_usbhs/common.c
@@ -781,6 +781,8 @@ static void usbhs_remove(struct platform_device *pdev)
dev_dbg(&pdev->dev, "usb remove\n");
+ flush_delayed_work(&priv->notify_hotplug_work);
+
/* power off */
if (!usbhs_get_dparam(priv, runtime_pwctrl))
usbhsc_power_ctrl(priv, 0);
--
2.43.0
From: Claudiu Beznea <claudiu.beznea.uj(a)bp.renesas.com>
The gpriv->transceiver is retrieved in probe() through usb_get_phy() but
never released. Use devm_usb_get_phy() to handle this scenario.
This issue was identified through code investigation. No issue was found
without this change.
Fixes: b5a2875605ca ("usb: renesas_usbhs: Allow an OTG PHY driver to provide VBUS")
Cc: stable(a)vger.kernel.org
Signed-off-by: Claudiu Beznea <claudiu.beznea.uj(a)bp.renesas.com>
---
drivers/usb/renesas_usbhs/mod_gadget.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c
index 105132ae87ac..e8e5723f5412 100644
--- a/drivers/usb/renesas_usbhs/mod_gadget.c
+++ b/drivers/usb/renesas_usbhs/mod_gadget.c
@@ -1094,7 +1094,7 @@ int usbhs_mod_gadget_probe(struct usbhs_priv *priv)
goto usbhs_mod_gadget_probe_err_gpriv;
}
- gpriv->transceiver = usb_get_phy(USB_PHY_TYPE_UNDEFINED);
+ gpriv->transceiver = devm_usb_get_phy(dev, USB_PHY_TYPE_UNDEFINED);
dev_info(dev, "%stransceiver found\n",
!IS_ERR(gpriv->transceiver) ? "" : "no ");
--
2.43.0
From: Claudiu Beznea <claudiu.beznea.uj(a)bp.renesas.com>
phy-rcar-gen3-usb2 driver exports 4 PHYs. The timing registers are common
to all PHYs. There is no need to set them every time a PHY is initialized.
Set timing register only when the 1st PHY is initialized.
Fixes: f3b5a8d9b50d ("phy: rcar-gen3-usb2: Add R-Car Gen3 USB2 PHY driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Claudiu Beznea <claudiu.beznea.uj(a)bp.renesas.com>
---
drivers/phy/renesas/phy-rcar-gen3-usb2.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/phy/renesas/phy-rcar-gen3-usb2.c b/drivers/phy/renesas/phy-rcar-gen3-usb2.c
index 087937407b0b..8e57fa8c1cff 100644
--- a/drivers/phy/renesas/phy-rcar-gen3-usb2.c
+++ b/drivers/phy/renesas/phy-rcar-gen3-usb2.c
@@ -467,8 +467,11 @@ static int rcar_gen3_phy_usb2_init(struct phy *p)
val = readl(usb2_base + USB2_INT_ENABLE);
val |= USB2_INT_ENABLE_UCOM_INTEN | rphy->int_enable_bits;
writel(val, usb2_base + USB2_INT_ENABLE);
- writel(USB2_SPD_RSM_TIMSET_INIT, usb2_base + USB2_SPD_RSM_TIMSET);
- writel(USB2_OC_TIMSET_INIT, usb2_base + USB2_OC_TIMSET);
+
+ if (!rcar_gen3_is_any_rphy_initialized(channel)) {
+ writel(USB2_SPD_RSM_TIMSET_INIT, usb2_base + USB2_SPD_RSM_TIMSET);
+ writel(USB2_OC_TIMSET_INIT, usb2_base + USB2_OC_TIMSET);
+ }
/* Initialize otg part (only if we initialize a PHY with IRQs). */
if (rphy->int_enable_bits)
--
2.43.0
From: Claudiu Beznea <claudiu.beznea.uj(a)bp.renesas.com>
The phy-rcar-gen3-usb2 driver exposes four individual PHYs that are
requested and configured by PHY users. The struct phy_ops APIs access the
same set of registers to configure all PHYs. Additionally, PHY settings can
be modified through sysfs or an IRQ handler. While some struct phy_ops APIs
are protected by a driver-wide mutex, others rely on individual
PHY-specific mutexes.
This approach can lead to various issues, including:
1/ the IRQ handler may interrupt PHY settings in progress, racing with
hardware configuration protected by a mutex lock
2/ due to msleep(20) in rcar_gen3_init_otg(), while a configuration thread
suspends to wait for the delay, another thread may try to configure
another PHY (with phy_init() + phy_power_on()); re-running the
phy_init() goes to the exact same configuration code, re-running the
same hardware configuration on the same set of registers (and bits)
which might impact the result of the msleep for the 1st configuring
thread
3/ sysfs can configure the hardware (though role_store()) and it can
still race with the phy_init()/phy_power_on() APIs calling into the
drivers struct phy_ops
To address these issues, add a spinlock to protect hardware register access
and driver private data structures (e.g., calls to
rcar_gen3_is_any_rphy_initialized()). Checking driver-specific data remains
necessary as all PHY instances share common settings. With this change,
the existing mutex protection is removed and the cleanup.h helpers are
used.
While at it, to keep the code simpler, do not skip
regulator_enable()/regulator_disable() APIs in
rcar_gen3_phy_usb2_power_on()/rcar_gen3_phy_usb2_power_off() as the
regulators enable/disable operations are reference counted anyway.
Fixes: f3b5a8d9b50d ("phy: rcar-gen3-usb2: Add R-Car Gen3 USB2 PHY driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Claudiu Beznea <claudiu.beznea.uj(a)bp.renesas.com>
---
drivers/phy/renesas/phy-rcar-gen3-usb2.c | 49 +++++++++++++-----------
1 file changed, 26 insertions(+), 23 deletions(-)
diff --git a/drivers/phy/renesas/phy-rcar-gen3-usb2.c b/drivers/phy/renesas/phy-rcar-gen3-usb2.c
index 826c9c4dd4c0..5c0ceba09b67 100644
--- a/drivers/phy/renesas/phy-rcar-gen3-usb2.c
+++ b/drivers/phy/renesas/phy-rcar-gen3-usb2.c
@@ -9,6 +9,7 @@
* Copyright (C) 2014 Cogent Embedded, Inc.
*/
+#include <linux/cleanup.h>
#include <linux/extcon-provider.h>
#include <linux/interrupt.h>
#include <linux/io.h>
@@ -118,7 +119,7 @@ struct rcar_gen3_chan {
struct regulator *vbus;
struct reset_control *rstc;
struct work_struct work;
- struct mutex lock; /* protects rphys[...].powered */
+ spinlock_t lock; /* protects access to hardware and driver data structure. */
enum usb_dr_mode dr_mode;
u32 obint_enable_bits;
bool extcon_host;
@@ -348,6 +349,8 @@ static ssize_t role_store(struct device *dev, struct device_attribute *attr,
bool is_b_device;
enum phy_mode cur_mode, new_mode;
+ guard(spinlock_irqsave)(&ch->lock);
+
if (!ch->is_otg_channel || !rcar_gen3_is_any_otg_rphy_initialized(ch))
return -EIO;
@@ -415,7 +418,7 @@ static void rcar_gen3_init_otg(struct rcar_gen3_chan *ch)
val = readl(usb2_base + USB2_ADPCTRL);
writel(val | USB2_ADPCTRL_IDPULLUP, usb2_base + USB2_ADPCTRL);
}
- msleep(20);
+ mdelay(20);
writel(0xffffffff, usb2_base + USB2_OBINTSTA);
writel(ch->obint_enable_bits, usb2_base + USB2_OBINTEN);
@@ -436,12 +439,14 @@ static irqreturn_t rcar_gen3_phy_usb2_irq(int irq, void *_ch)
if (pm_runtime_suspended(dev))
goto rpm_put;
- status = readl(usb2_base + USB2_OBINTSTA);
- if (status & ch->obint_enable_bits) {
- dev_vdbg(dev, "%s: %08x\n", __func__, status);
- writel(ch->obint_enable_bits, usb2_base + USB2_OBINTSTA);
- rcar_gen3_device_recognition(ch);
- ret = IRQ_HANDLED;
+ scoped_guard(spinlock, &ch->lock) {
+ status = readl(usb2_base + USB2_OBINTSTA);
+ if (status & ch->obint_enable_bits) {
+ dev_vdbg(dev, "%s: %08x\n", __func__, status);
+ writel(ch->obint_enable_bits, usb2_base + USB2_OBINTSTA);
+ rcar_gen3_device_recognition(ch);
+ ret = IRQ_HANDLED;
+ }
}
rpm_put:
@@ -456,6 +461,8 @@ static int rcar_gen3_phy_usb2_init(struct phy *p)
void __iomem *usb2_base = channel->base;
u32 val;
+ guard(spinlock_irqsave)(&channel->lock);
+
/* Initialize USB2 part */
val = readl(usb2_base + USB2_INT_ENABLE);
val |= USB2_INT_ENABLE_UCOM_INTEN | rphy->int_enable_bits;
@@ -479,6 +486,8 @@ static int rcar_gen3_phy_usb2_exit(struct phy *p)
void __iomem *usb2_base = channel->base;
u32 val;
+ guard(spinlock_irqsave)(&channel->lock);
+
rphy->initialized = false;
val = readl(usb2_base + USB2_INT_ENABLE);
@@ -498,16 +507,17 @@ static int rcar_gen3_phy_usb2_power_on(struct phy *p)
u32 val;
int ret = 0;
- mutex_lock(&channel->lock);
- if (!rcar_gen3_are_all_rphys_power_off(channel))
- goto out;
-
if (channel->vbus) {
ret = regulator_enable(channel->vbus);
if (ret)
- goto out;
+ return ret;
}
+ guard(spinlock_irqsave)(&channel->lock);
+
+ if (!rcar_gen3_are_all_rphys_power_off(channel))
+ goto out;
+
val = readl(usb2_base + USB2_USBCTR);
val |= USB2_USBCTR_PLL_RST;
writel(val, usb2_base + USB2_USBCTR);
@@ -517,7 +527,6 @@ static int rcar_gen3_phy_usb2_power_on(struct phy *p)
out:
/* The powered flag should be set for any other phys anyway */
rphy->powered = true;
- mutex_unlock(&channel->lock);
return 0;
}
@@ -528,18 +537,12 @@ static int rcar_gen3_phy_usb2_power_off(struct phy *p)
struct rcar_gen3_chan *channel = rphy->ch;
int ret = 0;
- mutex_lock(&channel->lock);
- rphy->powered = false;
-
- if (!rcar_gen3_are_all_rphys_power_off(channel))
- goto out;
+ scoped_guard(spinlock_irqsave, &channel->lock)
+ rphy->powered = false;
if (channel->vbus)
ret = regulator_disable(channel->vbus);
-out:
- mutex_unlock(&channel->lock);
-
return ret;
}
@@ -750,7 +753,7 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev)
if (phy_data->no_adp_ctrl)
channel->obint_enable_bits = USB2_OBINT_IDCHG_EN;
- mutex_init(&channel->lock);
+ spin_lock_init(&channel->lock);
for (i = 0; i < NUM_OF_PHYS; i++) {
channel->rphys[i].phy = devm_phy_create(dev, NULL,
phy_data->phy_usb2_ops);
--
2.43.0
The symlink body (->target) should be freed at the same time as the inode
itself per commit 4fdcfab5b553 ("jffs2: fix use-after-free on symlink
traversal"). It is a filesystem-specific field but there exist several
error paths during generic inode allocation when ->free_inode(), namely
jffs2_free_inode(), is called with still uninitialized private info.
The calltrace looks like:
alloc_inode
inode_init_always // fails
i_callback
free_inode
jffs2_free_inode // touches uninit ->target field
Commit af9a8730ddb6 ("jffs2: Fix potential illegal address access in
jffs2_free_inode") approached the observed problem but fixed it only
partially. Our local Syzkaller instance is still hitting these kinds of
failures.
The thing is that jffs2_i_init_once(), where the initialization of
f->target has been moved, is called once per slab allocation so it won't
be called for the object structure possibly retrieved later from the slab
cache for reuse.
The practice followed by many other filesystems is to initialize
filesystem-private inode contents in the corresponding ->alloc_inode()
callbacks. This also allows to drop initialization from jffs2_iget() and
jffs2_new_inode() as ->alloc_inode() is called in those places.
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
Fixes: 4fdcfab5b553 ("jffs2: fix use-after-free on symlink traversal")
Cc: stable(a)vger.kernel.org
Signed-off-by: Fedor Pchelkin <pchelkin(a)ispras.ru>
---
fs/jffs2/fs.c | 2 --
fs/jffs2/super.c | 3 ++-
2 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index d175cccb7c55..85c4b273918f 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -271,7 +271,6 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
f = JFFS2_INODE_INFO(inode);
c = JFFS2_SB_INFO(inode->i_sb);
- jffs2_init_inode_info(f);
mutex_lock(&f->sem);
ret = jffs2_do_read_inode(c, f, inode->i_ino, &latest_node);
@@ -439,7 +438,6 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r
return ERR_PTR(-ENOMEM);
f = JFFS2_INODE_INFO(inode);
- jffs2_init_inode_info(f);
mutex_lock(&f->sem);
memset(ri, 0, sizeof(*ri));
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 4545f885c41e..b56ff63357f3 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -42,6 +42,8 @@ static struct inode *jffs2_alloc_inode(struct super_block *sb)
f = alloc_inode_sb(sb, jffs2_inode_cachep, GFP_KERNEL);
if (!f)
return NULL;
+
+ jffs2_init_inode_info(f);
return &f->vfs_inode;
}
@@ -58,7 +60,6 @@ static void jffs2_i_init_once(void *foo)
struct jffs2_inode_info *f = foo;
mutex_init(&f->sem);
- f->target = NULL;
inode_init_once(&f->vfs_inode);
}
--
2.39.5
From: Steven Rostedt <rostedt(a)goodmis.org>
When the last fprobe is removed, it calls unregister_ftrace_graph() to
remove the graph_ops from function graph. The issue is when it does so, it
calls return before removing the function from its graph ops via
ftrace_set_filter_ips(). This leaves the last function lingering in the
fprobe's fgraph ops and if a probe is added it also enables that last
function (even though the callback will just drop it, it does add unneeded
overhead to make that call).
# echo "f:myevent1 kernel_clone" >> /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
kernel_clone (1) tramp: 0xffffffffc02f3000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
# echo "f:myevent2 schedule_timeout" >> /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
kernel_clone (1) tramp: 0xffffffffc02f3000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
schedule_timeout (1) tramp: 0xffffffffc02f3000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
# > /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
# echo "f:myevent3 kmem_cache_free" >> /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
kmem_cache_free (1) tramp: 0xffffffffc0219000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
schedule_timeout (1) tramp: 0xffffffffc0219000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
The above enabled a fprobe on kernel_clone, and then on schedule_timeout.
The content of the enabled_functions shows the functions that have a
callback attached to them. The fprobe attached to those functions
properly. Then the fprobes were cleared, and enabled_functions was empty
after that. But after adding a fprobe on kmem_cache_free, the
enabled_functions shows that the schedule_timeout was attached again. This
is because it was still left in the fprobe ops that is used to tell
function graph what functions it wants callbacks from.
Cc: stable(a)vger.kernel.org
Fixes: 4346ba1604093 ("fprobe: Rewrite fprobe on function-graph tracer")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/fprobe.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 90241091ca61..886090845b1a 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -403,11 +403,9 @@ static void fprobe_graph_remove_ips(unsigned long *addrs, int num)
lockdep_assert_held(&fprobe_mutex);
fprobe_graph_active--;
- if (!fprobe_graph_active) {
- /* Q: should we unregister it ? */
+ /* Q: should we unregister it ? */
+ if (!fprobe_graph_active)
unregister_ftrace_graph(&fprobe_graph_ops);
- return;
- }
ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0);
}
--
2.47.2
From: Steven Rostedt <rostedt(a)goodmis.org>
When adding a new fprobe, it will update the function hash to the
functions the fprobe is attached to and register with function graph to
have it call the registered functions. The fprobe_graph_active variable
keeps track of the number of fprobes that are using function graph.
If two fprobes attach to the same function, it increments the
fprobe_graph_active for each of them. But when they are removed, the first
fprobe to be removed will see that the function it is attached to is also
used by another fprobe and it will not remove that function from
function_graph. The logic will skip decrementing the fprobe_graph_active
variable.
This causes the fprobe_graph_active variable to not go to zero when all
fprobes are removed, and in doing so it does not unregister from
function graph. As the fgraph ops hash will now be empty, and an empty
filter hash means all functions are enabled, this triggers function graph
to add a callback to the fprobe infrastructure for every function!
# echo "f:myevent1 kernel_clone" >> /sys/kernel/tracing/dynamic_events
# echo "f:myevent2 kernel_clone%return" >> /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
kernel_clone (1) tramp: 0xffffffffc0024000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
# > /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
trace_initcall_start_cb (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
run_init_process (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
try_to_run_init_process (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
x86_pmu_show_pmu_cap (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
cleanup_rapl_pmus (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
uncore_free_pcibus_map (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
uncore_types_exit (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
uncore_pci_exit.part.0 (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
kvm_shutdown (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
vmx_dump_msrs (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170
[..]
# cat /sys/kernel/tracing/enabled_functions | wc -l
54702
If a fprobe is being removed and all its functions are also traced by
other fprobes, still decrement the fprobe_graph_active counter.
Cc: stable(a)vger.kernel.org
Fixes: 4346ba1604093 ("fprobe: Rewrite fprobe on function-graph tracer")
Closes: https://lore.kernel.org/all/20250217114918.10397-A-hca@linux.ibm.com/
Reported-by: Heiko Carstens <hca(a)linux.ibm.com>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/fprobe.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 2560b312ad57..90241091ca61 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -681,6 +681,8 @@ int unregister_fprobe(struct fprobe *fp)
if (count)
fprobe_graph_remove_ips(addrs, count);
+ else
+ fprobe_graph_active--;
kfree_rcu(hlist_array, rcu);
fp->hlist_array = NULL;
--
2.47.2
Hi,
This series fixes a couple of issues reported by Johan in [1]. First one fixes
a deadlock that happens during shutdown and suspend. Second one fixes the
driver's PM behavior.
[1] https://lore.kernel.org/mhi/Z1me8iaK7cwgjL92@hovoldconsulting.com
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
---
Manivannan Sadhasivam (2):
bus: mhi: host: pci_generic: Use pci_try_reset_function() to avoid deadlock
bus: mhi: host: pci_generic: Recover the device synchronously from mhi_pci_runtime_resume()
drivers/bus/mhi/host/pci_generic.c | 29 +++++++++++++++++------------
1 file changed, 17 insertions(+), 12 deletions(-)
---
base-commit: fc033cf25e612e840e545f8d5ad2edd6ba613ed5
change-id: 20250108-mhi_recovery_fix-a8f37168f91c
Best regards,
--
Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
From: Paolo Valente <paolo.valente(a)linaro.org>
commit 9778369a2d6c5ed2b81a04164c4aa9da1bdb193d upstream.
Single-LUN multi-actuator SCSI drives, as well as all multi-actuator
SATA drives appear as a single device to the I/O subsystem [1]. Yet
they address commands to different actuators internally, as a function
of Logical Block Addressing (LBAs). A given sector is reachable by
only one of the actuators. For example, Seagate’s Serial Advanced
Technology Attachment (SATA) version contains two actuators and maps
the lower half of the SATA LBA space to the lower actuator and the
upper half to the upper actuator.
Evidently, to fully utilize actuators, no actuator must be left idle
or underutilized while there is pending I/O for it. The block layer
must somehow control the load of each actuator individually. This
commit lays the ground for allowing BFQ to provide such a per-actuator
control.
BFQ associates an I/O-request sync bfq_queue with each process doing
synchronous I/O, or with a group of processes, in case of queue
merging. Then BFQ serves one bfq_queue at a time. While in service, a
bfq_queue is emptied in request-position order. Yet the same process,
or group of processes, may generate I/O for different actuators. In
this case, different streams of I/O (each for a different actuator)
get all inserted into the same sync bfq_queue. So there is basically
no individual control on when each stream is served, i.e., on when the
I/O requests of the stream are picked from the bfq_queue and
dispatched to the drive.
This commit enables BFQ to control the service of each actuator
individually for synchronous I/O, by simply splitting each sync
bfq_queue into N queues, one for each actuator. In other words, a sync
bfq_queue is now associated to a pair (process, actuator). As a
consequence of this split, the per-queue proportional-share policy
implemented by BFQ will guarantee that the sync I/O generated for each
actuator, by each process, receives its fair share of service.
This is just a preparatory patch. If the I/O of the same process
happens to be sent to different queues, then each of these queues may
undergo queue merging. To handle this event, the bfq_io_cq data
structure must be properly extended. In addition, stable merging must
be disabled to avoid loss of control on individual actuators. Finally,
also async queues must be split. These issues are described in detail
and addressed in next commits. As for this commit, although multiple
per-process bfq_queues are provided, the I/O of each process or group
of processes is still sent to only one queue, regardless of the
actuator the I/O is for. The forwarding to distinct bfq_queues will be
enabled after addressing the above issues.
[1] https://www.linaro.org/blog/budget-fair-queueing-bfq-linux-io-scheduler-opt…
Reviewed-by: Damien Le Moal <damien.lemoal(a)opensource.wdc.com>
Signed-off-by: Gabriele Felici <felicigb(a)gmail.com>
Signed-off-by: Carmine Zaccagnino <carmine(a)carminezacc.com>
Signed-off-by: Paolo Valente <paolo.valente(a)linaro.org>
Link: https://lore.kernel.org/r/20230103145503.71712-2-paolo.valente@linaro.org
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
Stable-dep-of: e8b8344de398 ("block, bfq: fix bfqq uaf in bfq_limit_depth()")
[Hagar: needed contextual fixes]
Signed-off-by: Hagar Hemdan <hagarhem(a)amazon.com>
---
block/bfq-cgroup.c | 95 +++++++++++++-------------
block/bfq-iosched.c | 160 +++++++++++++++++++++++++++++---------------
block/bfq-iosched.h | 51 +++++++++++---
3 files changed, 196 insertions(+), 110 deletions(-)
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 60b4299bec8ec..5d202b775beb4 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -704,6 +704,46 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfq_put_queue(bfqq);
}
+static void bfq_sync_bfqq_move(struct bfq_data *bfqd,
+ struct bfq_queue *sync_bfqq,
+ struct bfq_io_cq *bic,
+ struct bfq_group *bfqg,
+ unsigned int act_idx)
+{
+ struct bfq_queue *bfqq;
+
+ if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) {
+ /* We are the only user of this bfqq, just move it */
+ if (sync_bfqq->entity.sched_data != &bfqg->sched_data)
+ bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
+ return;
+ }
+
+ /*
+ * The queue was merged to a different queue. Check
+ * that the merge chain still belongs to the same
+ * cgroup.
+ */
+ for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq)
+ if (bfqq->entity.sched_data != &bfqg->sched_data)
+ break;
+ if (bfqq) {
+ /*
+ * Some queue changed cgroup so the merge is not valid
+ * anymore. We cannot easily just cancel the merge (by
+ * clearing new_bfqq) as there may be other processes
+ * using this queue and holding refs to all queues
+ * below sync_bfqq->new_bfqq. Similarly if the merge
+ * already happened, we need to detach from bfqq now
+ * so that we cannot merge bio to a request from the
+ * old cgroup.
+ */
+ bfq_put_cooperator(sync_bfqq);
+ bfq_release_process_ref(bfqd, sync_bfqq);
+ bic_set_bfqq(bic, NULL, true, act_idx);
+ }
+}
+
/**
* __bfq_bic_change_cgroup - move @bic to @bfqg.
* @bfqd: the queue descriptor.
@@ -714,60 +754,25 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
* sure that the reference to cgroup is valid across the call (see
* comments in bfq_bic_update_cgroup on this issue)
*/
-static void *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
+static void __bfq_bic_change_cgroup(struct bfq_data *bfqd,
struct bfq_io_cq *bic,
struct bfq_group *bfqg)
{
- struct bfq_queue *async_bfqq = bic_to_bfqq(bic, false);
- struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, true);
- struct bfq_entity *entity;
+ unsigned int act_idx;
- if (async_bfqq) {
- entity = &async_bfqq->entity;
+ for (act_idx = 0; act_idx < bfqd->num_actuators; act_idx++) {
+ struct bfq_queue *async_bfqq = bic_to_bfqq(bic, false, act_idx);
+ struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, true, act_idx);
- if (entity->sched_data != &bfqg->sched_data) {
- bic_set_bfqq(bic, NULL, false);
+ if (async_bfqq &&
+ async_bfqq->entity.sched_data != &bfqg->sched_data) {
+ bic_set_bfqq(bic, NULL, false, act_idx);
bfq_release_process_ref(bfqd, async_bfqq);
}
- }
- if (sync_bfqq) {
- if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) {
- /* We are the only user of this bfqq, just move it */
- if (sync_bfqq->entity.sched_data != &bfqg->sched_data)
- bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
- } else {
- struct bfq_queue *bfqq;
-
- /*
- * The queue was merged to a different queue. Check
- * that the merge chain still belongs to the same
- * cgroup.
- */
- for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq)
- if (bfqq->entity.sched_data !=
- &bfqg->sched_data)
- break;
- if (bfqq) {
- /*
- * Some queue changed cgroup so the merge is
- * not valid anymore. We cannot easily just
- * cancel the merge (by clearing new_bfqq) as
- * there may be other processes using this
- * queue and holding refs to all queues below
- * sync_bfqq->new_bfqq. Similarly if the merge
- * already happened, we need to detach from
- * bfqq now so that we cannot merge bio to a
- * request from the old cgroup.
- */
- bfq_put_cooperator(sync_bfqq);
- bic_set_bfqq(bic, NULL, true);
- bfq_release_process_ref(bfqd, sync_bfqq);
- }
- }
+ if (sync_bfqq)
+ bfq_sync_bfqq_move(bfqd, sync_bfqq, bic, bfqg, act_idx);
}
-
- return bfqg;
}
void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index f759457646530..76b2e66d09070 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -377,16 +377,23 @@ static const unsigned long bfq_late_stable_merging = 600;
#define RQ_BIC(rq) ((struct bfq_io_cq *)((rq)->elv.priv[0]))
#define RQ_BFQQ(rq) ((rq)->elv.priv[1])
-struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync)
+struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync,
+ unsigned int actuator_idx)
{
- return bic->bfqq[is_sync];
+ if (is_sync)
+ return bic->bfqq[1][actuator_idx];
+
+ return bic->bfqq[0][actuator_idx];
}
static void bfq_put_stable_ref(struct bfq_queue *bfqq);
-void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync)
+void bic_set_bfqq(struct bfq_io_cq *bic,
+ struct bfq_queue *bfqq,
+ bool is_sync,
+ unsigned int actuator_idx)
{
- struct bfq_queue *old_bfqq = bic->bfqq[is_sync];
+ struct bfq_queue *old_bfqq = bic->bfqq[is_sync][actuator_idx];
/* Clear bic pointer if bfqq is detached from this bic */
if (old_bfqq && old_bfqq->bic == bic)
@@ -405,7 +412,10 @@ void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync)
* we cancel the stable merge if
* bic->stable_merge_bfqq == bfqq.
*/
- bic->bfqq[is_sync] = bfqq;
+ if (is_sync)
+ bic->bfqq[1][actuator_idx] = bfqq;
+ else
+ bic->bfqq[0][actuator_idx] = bfqq;
if (bfqq && bic->stable_merge_bfqq == bfqq) {
/*
@@ -680,9 +690,9 @@ static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
{
struct bfq_data *bfqd = data->q->elevator->elevator_data;
struct bfq_io_cq *bic = bfq_bic_lookup(data->q);
- struct bfq_queue *bfqq = bic ? bic_to_bfqq(bic, op_is_sync(opf)) : NULL;
int depth;
unsigned limit = data->q->nr_requests;
+ unsigned int act_idx;
/* Sync reads have full depth available */
if (op_is_sync(opf) && !op_is_write(opf)) {
@@ -692,14 +702,21 @@ static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
limit = (limit * depth) >> bfqd->full_depth_shift;
}
- /*
- * Does queue (or any parent entity) exceed number of requests that
- * should be available to it? Heavily limit depth so that it cannot
- * consume more available requests and thus starve other entities.
- */
- if (bfqq && bfqq_request_over_limit(bfqq, limit))
- depth = 1;
+ for (act_idx = 0; bic && act_idx < bfqd->num_actuators; act_idx++) {
+ struct bfq_queue *bfqq =
+ bic_to_bfqq(bic, op_is_sync(opf), act_idx);
+ /*
+ * Does queue (or any parent entity) exceed number of
+ * requests that should be available to it? Heavily
+ * limit depth so that it cannot consume more
+ * available requests and thus starve other entities.
+ */
+ if (bfqq && bfqq_request_over_limit(bfqq, limit)) {
+ depth = 1;
+ break;
+ }
+ }
bfq_log(bfqd, "[%s] wr_busy %d sync %d depth %u",
__func__, bfqd->wr_busy_queues, op_is_sync(opf), depth);
if (depth)
@@ -1820,6 +1837,18 @@ static bool bfq_bfqq_higher_class_or_weight(struct bfq_queue *bfqq,
return bfqq_weight > in_serv_weight;
}
+/*
+ * Get the index of the actuator that will serve bio.
+ */
+static unsigned int bfq_actuator_index(struct bfq_data *bfqd, struct bio *bio)
+{
+ /*
+ * Multi-actuator support not complete yet, so always return 0
+ * for the moment (to keep incomplete mechanisms off).
+ */
+ return 0;
+}
+
static bool bfq_better_to_idle(struct bfq_queue *bfqq);
static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
@@ -2150,7 +2179,7 @@ static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq,
* We reset waker detection logic also if too much time has passed
* since the first detection. If wakeups are rare, pointless idling
* doesn't hurt throughput that much. The condition below makes sure
- * we do not uselessly idle blocking waker in more than 1/64 cases.
+ * we do not uselessly idle blocking waker in more than 1/64 cases.
*/
if (bfqd->last_completed_rq_bfqq !=
bfqq->tentative_waker_bfqq ||
@@ -2486,7 +2515,8 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio,
*/
bfq_bic_update_cgroup(bic, bio);
- bfqd->bio_bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf));
+ bfqd->bio_bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf),
+ bfq_actuator_index(bfqd, bio));
} else {
bfqd->bio_bfqq = NULL;
}
@@ -3188,7 +3218,7 @@ static struct bfq_queue *bfq_merge_bfqqs(struct bfq_data *bfqd,
/*
* Merge queues (that is, let bic redirect its requests to new_bfqq)
*/
- bic_set_bfqq(bic, new_bfqq, true);
+ bic_set_bfqq(bic, new_bfqq, true, bfqq->actuator_idx);
bfq_mark_bfqq_coop(new_bfqq);
/*
* new_bfqq now belongs to at least two bics (it is a shared queue):
@@ -4818,11 +4848,8 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
*/
if (bfq_bfqq_wait_request(bfqq) ||
(bfqq->dispatched != 0 && bfq_better_to_idle(bfqq))) {
- struct bfq_queue *async_bfqq =
- bfqq->bic && bfqq->bic->bfqq[0] &&
- bfq_bfqq_busy(bfqq->bic->bfqq[0]) &&
- bfqq->bic->bfqq[0]->next_rq ?
- bfqq->bic->bfqq[0] : NULL;
+ unsigned int act_idx = bfqq->actuator_idx;
+ struct bfq_queue *async_bfqq = NULL;
struct bfq_queue *blocked_bfqq =
!hlist_empty(&bfqq->woken_list) ?
container_of(bfqq->woken_list.first,
@@ -4830,6 +4857,10 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
woken_list_node)
: NULL;
+ if (bfqq->bic && bfqq->bic->bfqq[0][act_idx] &&
+ bfq_bfqq_busy(bfqq->bic->bfqq[0][act_idx]) &&
+ bfqq->bic->bfqq[0][act_idx]->next_rq)
+ async_bfqq = bfqq->bic->bfqq[0][act_idx];
/*
* The next four mutually-exclusive ifs decide
* whether to try injection, and choose the queue to
@@ -4914,7 +4945,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
icq_to_bic(async_bfqq->next_rq->elv.icq) == bfqq->bic &&
bfq_serv_to_charge(async_bfqq->next_rq, async_bfqq) <=
bfq_bfqq_budget_left(async_bfqq))
- bfqq = bfqq->bic->bfqq[0];
+ bfqq = bfqq->bic->bfqq[0][act_idx];
else if (bfqq->waker_bfqq &&
bfq_bfqq_busy(bfqq->waker_bfqq) &&
bfqq->waker_bfqq->next_rq &&
@@ -5375,48 +5406,54 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
bfq_release_process_ref(bfqd, bfqq);
}
-static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync)
+static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync,
+ unsigned int actuator_idx)
{
- struct bfq_queue *bfqq = bic_to_bfqq(bic, is_sync);
+ struct bfq_queue *bfqq = bic_to_bfqq(bic, is_sync, actuator_idx);
struct bfq_data *bfqd;
if (bfqq)
bfqd = bfqq->bfqd; /* NULL if scheduler already exited */
if (bfqq && bfqd) {
- unsigned long flags;
-
- spin_lock_irqsave(&bfqd->lock, flags);
- bic_set_bfqq(bic, NULL, is_sync);
+ bic_set_bfqq(bic, NULL, is_sync, actuator_idx);
bfq_exit_bfqq(bfqd, bfqq);
- spin_unlock_irqrestore(&bfqd->lock, flags);
}
}
static void bfq_exit_icq(struct io_cq *icq)
{
struct bfq_io_cq *bic = icq_to_bic(icq);
+ struct bfq_data *bfqd = bic_to_bfqd(bic);
+ unsigned long flags;
+ unsigned int act_idx;
+ /*
+ * If bfqd and thus bfqd->num_actuators is not available any
+ * longer, then cycle over all possible per-actuator bfqqs in
+ * next loop. We rely on bic being zeroed on creation, and
+ * therefore on its unused per-actuator fields being NULL.
+ */
+ unsigned int num_actuators = BFQ_MAX_ACTUATORS;
- if (bic->stable_merge_bfqq) {
- struct bfq_data *bfqd = bic->stable_merge_bfqq->bfqd;
+ /*
+ * bfqd is NULL if scheduler already exited, and in that case
+ * this is the last time these queues are accessed.
+ */
+ if (bfqd) {
+ spin_lock_irqsave(&bfqd->lock, flags);
+ num_actuators = bfqd->num_actuators;
+ }
- /*
- * bfqd is NULL if scheduler already exited, and in
- * that case this is the last time bfqq is accessed.
- */
- if (bfqd) {
- unsigned long flags;
+ if (bic->stable_merge_bfqq)
+ bfq_put_stable_ref(bic->stable_merge_bfqq);
- spin_lock_irqsave(&bfqd->lock, flags);
- bfq_put_stable_ref(bic->stable_merge_bfqq);
- spin_unlock_irqrestore(&bfqd->lock, flags);
- } else {
- bfq_put_stable_ref(bic->stable_merge_bfqq);
- }
+ for (act_idx = 0; act_idx < num_actuators; act_idx++) {
+ bfq_exit_icq_bfqq(bic, true, act_idx);
+ bfq_exit_icq_bfqq(bic, false, act_idx);
}
- bfq_exit_icq_bfqq(bic, true);
- bfq_exit_icq_bfqq(bic, false);
+ if (bfqd)
+ spin_unlock_irqrestore(&bfqd->lock, flags);
}
/*
@@ -5493,25 +5530,27 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio)
bic->ioprio = ioprio;
- bfqq = bic_to_bfqq(bic, false);
+ bfqq = bic_to_bfqq(bic, false, bfq_actuator_index(bfqd, bio));
if (bfqq) {
struct bfq_queue *old_bfqq = bfqq;
bfqq = bfq_get_queue(bfqd, bio, false, bic, true);
- bic_set_bfqq(bic, bfqq, false);
+ bic_set_bfqq(bic, bfqq, false, bfq_actuator_index(bfqd, bio));
bfq_release_process_ref(bfqd, old_bfqq);
}
- bfqq = bic_to_bfqq(bic, true);
+ bfqq = bic_to_bfqq(bic, true, bfq_actuator_index(bfqd, bio));
if (bfqq)
bfq_set_next_ioprio_data(bfqq, bic);
}
static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
- struct bfq_io_cq *bic, pid_t pid, int is_sync)
+ struct bfq_io_cq *bic, pid_t pid, int is_sync,
+ unsigned int act_idx)
{
u64 now_ns = ktime_get_ns();
+ bfqq->actuator_idx = act_idx;
RB_CLEAR_NODE(&bfqq->entity.rb_node);
INIT_LIST_HEAD(&bfqq->fifo);
INIT_HLIST_NODE(&bfqq->burst_list_node);
@@ -5762,7 +5801,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
if (bfqq) {
bfq_init_bfqq(bfqd, bfqq, bic, current->pid,
- is_sync);
+ is_sync, bfq_actuator_index(bfqd, bio));
bfq_init_entity(&bfqq->entity, bfqg);
bfq_log_bfqq(bfqd, bfqq, "allocated");
} else {
@@ -6078,7 +6117,8 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
* then complete the merge and redirect it to
* new_bfqq.
*/
- if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq) {
+ if (bic_to_bfqq(RQ_BIC(rq), true,
+ bfq_actuator_index(bfqd, rq->bio)) == bfqq) {
while (bfqq != new_bfqq)
bfqq = bfq_merge_bfqqs(bfqd, RQ_BIC(rq), bfqq);
}
@@ -6632,7 +6672,7 @@ bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)
return bfqq;
}
- bic_set_bfqq(bic, NULL, true);
+ bic_set_bfqq(bic, NULL, true, bfqq->actuator_idx);
bfq_put_cooperator(bfqq);
@@ -6646,7 +6686,8 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
bool split, bool is_sync,
bool *new_queue)
{
- struct bfq_queue *bfqq = bic_to_bfqq(bic, is_sync);
+ unsigned int act_idx = bfq_actuator_index(bfqd, bio);
+ struct bfq_queue *bfqq = bic_to_bfqq(bic, is_sync, act_idx);
if (likely(bfqq && bfqq != &bfqd->oom_bfqq))
return bfqq;
@@ -6658,7 +6699,7 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
bfq_put_queue(bfqq);
bfqq = bfq_get_queue(bfqd, bio, is_sync, bic, split);
- bic_set_bfqq(bic, bfqq, is_sync);
+ bic_set_bfqq(bic, bfqq, is_sync, act_idx);
if (split && is_sync) {
if ((bic->was_in_burst_list && bfqd->large_burst) ||
bic->saved_in_large_burst)
@@ -7139,8 +7180,10 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
* Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues.
* Grab a permanent reference to it, so that the normal code flow
* will not attempt to free it.
+ * Set zero as actuator index: we will pretend that
+ * all I/O requests are for the same actuator.
*/
- bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, NULL, 1, 0);
+ bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, NULL, 1, 0, 0);
bfqd->oom_bfqq.ref++;
bfqd->oom_bfqq.new_ioprio = BFQ_DEFAULT_QUEUE_IOPRIO;
bfqd->oom_bfqq.new_ioprio_class = IOPRIO_CLASS_BE;
@@ -7159,6 +7202,13 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
bfqd->queue = q;
+ /*
+ * Multi-actuator support not complete yet, unconditionally
+ * set to only one actuator for the moment (to keep incomplete
+ * mechanisms off).
+ */
+ bfqd->num_actuators = 1;
+
INIT_LIST_HEAD(&bfqd->dispatch);
hrtimer_init(&bfqd->idle_slice_timer, CLOCK_MONOTONIC,
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 71f721670ab62..2b413ddffbb95 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -33,6 +33,14 @@
*/
#define BFQ_SOFTRT_WEIGHT_FACTOR 100
+/*
+ * Maximum number of actuators supported. This constant is used simply
+ * to define the size of the static array that will contain
+ * per-actuator data. The current value is hopefully a good upper
+ * bound to the possible number of actuators of any actual drive.
+ */
+#define BFQ_MAX_ACTUATORS 8
+
struct bfq_entity;
/**
@@ -225,12 +233,14 @@ struct bfq_ttime {
* struct bfq_queue - leaf schedulable entity.
*
* A bfq_queue is a leaf request queue; it can be associated with an
- * io_context or more, if it is async or shared between cooperating
- * processes. @cgroup holds a reference to the cgroup, to be sure that it
- * does not disappear while a bfqq still references it (mostly to avoid
- * races between request issuing and task migration followed by cgroup
- * destruction).
- * All the fields are protected by the queue lock of the containing bfqd.
+ * io_context or more, if it is async or shared between cooperating
+ * processes. Besides, it contains I/O requests for only one actuator
+ * (an io_context is associated with a different bfq_queue for each
+ * actuator it generates I/O for). @cgroup holds a reference to the
+ * cgroup, to be sure that it does not disappear while a bfqq still
+ * references it (mostly to avoid races between request issuing and
+ * task migration followed by cgroup destruction). All the fields are
+ * protected by the queue lock of the containing bfqd.
*/
struct bfq_queue {
/* reference counter */
@@ -395,6 +405,9 @@ struct bfq_queue {
* the woken queues when this queue exits.
*/
struct hlist_head woken_list;
+
+ /* index of the actuator this queue is associated with */
+ unsigned int actuator_idx;
};
/**
@@ -403,8 +416,17 @@ struct bfq_queue {
struct bfq_io_cq {
/* associated io_cq structure */
struct io_cq icq; /* must be the first member */
- /* array of two process queues, the sync and the async */
- struct bfq_queue *bfqq[2];
+ /*
+ * Matrix of associated process queues: first row for async
+ * queues, second row sync queues. Each row contains one
+ * column for each actuator. An I/O request generated by the
+ * process is inserted into the queue pointed by bfqq[i][j] if
+ * the request is to be served by the j-th actuator of the
+ * drive, where i==0 or i==1, depending on whether the request
+ * is async or sync. So there is a distinct queue for each
+ * actuator.
+ */
+ struct bfq_queue *bfqq[2][BFQ_MAX_ACTUATORS];
/* per (request_queue, blkcg) ioprio */
int ioprio;
#ifdef CONFIG_BFQ_GROUP_IOSCHED
@@ -768,6 +790,13 @@ struct bfq_data {
*/
unsigned int word_depths[2][2];
unsigned int full_depth_shift;
+
+ /*
+ * Number of independent actuators. This is equal to 1 in
+ * case of single-actuator drives.
+ */
+ unsigned int num_actuators;
+
};
enum bfqq_state_flags {
@@ -964,8 +993,10 @@ struct bfq_group {
extern const int bfq_timeout;
-struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync);
-void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync);
+struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync,
+ unsigned int actuator_idx);
+void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync,
+ unsigned int actuator_idx);
struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic);
void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq);
void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq,
--
2.47.1
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 8802766324e1f5d414a81ac43365c20142e85603
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021801-splinter-sappy-56b3@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8802766324e1f5d414a81ac43365c20142e85603 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Wed, 12 Feb 2025 13:46:46 +0000
Subject: [PATCH] io_uring/kbuf: reallocate buf lists on upgrade
IORING_REGISTER_PBUF_RING can reuse an old struct io_buffer_list if it
was created for legacy selected buffer and has been emptied. It violates
the requirement that most of the field should stay stable after publish.
Always reallocate it instead.
Cc: stable(a)vger.kernel.org
Reported-by: Pumpkin Chang <pumpkin(a)devco.re>
Fixes: 2fcabce2d7d34 ("io_uring: disallow mixed provided buffer group registrations")
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index 04bf493eecae..8e72de7712ac 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -415,6 +415,13 @@ void io_destroy_buffers(struct io_ring_ctx *ctx)
}
}
+static void io_destroy_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
+{
+ scoped_guard(mutex, &ctx->mmap_lock)
+ WARN_ON_ONCE(xa_erase(&ctx->io_bl_xa, bl->bgid) != bl);
+ io_put_bl(ctx, bl);
+}
+
int io_remove_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf);
@@ -636,12 +643,13 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
/* if mapped buffer ring OR classic exists, don't allow */
if (bl->flags & IOBL_BUF_RING || !list_empty(&bl->buf_list))
return -EEXIST;
- } else {
- free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL);
- if (!bl)
- return -ENOMEM;
+ io_destroy_bl(ctx, bl);
}
+ free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL);
+ if (!bl)
+ return -ENOMEM;
+
mmap_offset = (unsigned long)reg.bgid << IORING_OFF_PBUF_SHIFT;
ring_size = flex_array_size(br, bufs, reg.ring_entries);
The patch below does not apply to the 6.13-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.13.y
git checkout FETCH_HEAD
git cherry-pick -x 8802766324e1f5d414a81ac43365c20142e85603
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021855-snugly-hacked-a8fa@gregkh' --subject-prefix 'PATCH 6.13.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8802766324e1f5d414a81ac43365c20142e85603 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Wed, 12 Feb 2025 13:46:46 +0000
Subject: [PATCH] io_uring/kbuf: reallocate buf lists on upgrade
IORING_REGISTER_PBUF_RING can reuse an old struct io_buffer_list if it
was created for legacy selected buffer and has been emptied. It violates
the requirement that most of the field should stay stable after publish.
Always reallocate it instead.
Cc: stable(a)vger.kernel.org
Reported-by: Pumpkin Chang <pumpkin(a)devco.re>
Fixes: 2fcabce2d7d34 ("io_uring: disallow mixed provided buffer group registrations")
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index 04bf493eecae..8e72de7712ac 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -415,6 +415,13 @@ void io_destroy_buffers(struct io_ring_ctx *ctx)
}
}
+static void io_destroy_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
+{
+ scoped_guard(mutex, &ctx->mmap_lock)
+ WARN_ON_ONCE(xa_erase(&ctx->io_bl_xa, bl->bgid) != bl);
+ io_put_bl(ctx, bl);
+}
+
int io_remove_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf);
@@ -636,12 +643,13 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
/* if mapped buffer ring OR classic exists, don't allow */
if (bl->flags & IOBL_BUF_RING || !list_empty(&bl->buf_list))
return -EEXIST;
- } else {
- free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL);
- if (!bl)
- return -ENOMEM;
+ io_destroy_bl(ctx, bl);
}
+ free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL);
+ if (!bl)
+ return -ENOMEM;
+
mmap_offset = (unsigned long)reg.bgid << IORING_OFF_PBUF_SHIFT;
ring_size = flex_array_size(br, bufs, reg.ring_entries);
From: Shu Han <ebpqwerty472123(a)gmail.com>
commit ea7e2d5e49c05e5db1922387b09ca74aa40f46e2 upstream.
The remap_file_pages syscall handler calls do_mmap() directly, which
doesn't contain the LSM security check. And if the process has called
personality(READ_IMPLIES_EXEC) before and remap_file_pages() is called for
RW pages, this will actually result in remapping the pages to RWX,
bypassing a W^X policy enforced by SELinux.
So we should check prot by security_mmap_file LSM hook in the
remap_file_pages syscall handler before do_mmap() is called. Otherwise, it
potentially permits an attacker to bypass a W^X policy enforced by
SELinux.
The bypass is similar to CVE-2016-10044, which bypass the same thing via
AIO and can be found in [1].
The PoC:
$ cat > test.c
int main(void) {
size_t pagesz = sysconf(_SC_PAGE_SIZE);
int mfd = syscall(SYS_memfd_create, "test", 0);
const char *buf = mmap(NULL, 4 * pagesz, PROT_READ | PROT_WRITE,
MAP_SHARED, mfd, 0);
unsigned int old = syscall(SYS_personality, 0xffffffff);
syscall(SYS_personality, READ_IMPLIES_EXEC | old);
syscall(SYS_remap_file_pages, buf, pagesz, 0, 2, 0);
syscall(SYS_personality, old);
// show the RWX page exists even if W^X policy is enforced
int fd = open("/proc/self/maps", O_RDONLY);
unsigned char buf2[1024];
while (1) {
int ret = read(fd, buf2, 1024);
if (ret <= 0) break;
write(1, buf2, ret);
}
close(fd);
}
$ gcc test.c -o test
$ ./test | grep rwx
7f1836c34000-7f1836c35000 rwxs 00002000 00:01 2050 /memfd:test (deleted)
Link: https://project-zero.issues.chromium.org/issues/42452389 [1]
Cc: stable(a)vger.kernel.org
Signed-off-by: Shu Han <ebpqwerty472123(a)gmail.com>
Acked-by: Stephen Smalley <stephen.smalley.work(a)gmail.com>
[PM: subject line tweaks]
Signed-off-by: Paul Moore <paul(a)paul-moore.com>
Signed-off-by: Pratyush Yadav <ptyadav(a)amazon.de>
---
mm/mmap.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/mm/mmap.c b/mm/mmap.c
index 9f76625a1743..2c17eb840e44 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3078,8 +3078,12 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
}
file = get_file(vma->vm_file);
+ ret = security_mmap_file(vma->vm_file, prot, flags);
+ if (ret)
+ goto out_fput;
ret = do_mmap(vma->vm_file, start, size,
prot, flags, pgoff, &populate, NULL);
+out_fput:
fput(file);
out:
mmap_write_unlock(mm);
--
2.47.1
[BUG]
When running generic/417 with a btrfs whose block size < page size
(subpage cases), it always fails.
And the following minimal reproducer is more than enough to trigger it
reliably:
workload()
{
mkfs.btrfs -s 4k -f $dev > /dev/null
dmesg -C
mount $dev $mnt
$fsstree_dir/src/dio-invalidate-cache -r -b 4096 -n 3 -i 1 -f $mnt/diotest
ret=$?
umount $mnt
stop_trace
if [ $ret -ne 0 ]; then
fail
fi
}
for (( i = 0; i < 1024; i++)); do
echo "=== $i/$runtime ==="
workload
done
[CAUSE]
With extra trace printk added to the following functions:
- btrfs_buffered_write()
* Which folio is touched
* The file offset (start) where the buffered write is at
* How many bytes are copied
* The content of the write (the first 2 bytes)
- submit_one_sector()
* Which folio is touched
* The position inside the folio
- pagecache_isize_extended()
* The parameters of the function itself
* The parameters of the folio_zero_range()
Which are enough to show the problem:
22.158114: btrfs_buffered_write: folio pos=0 start=0 copied=4096 content=0x0101
22.158161: submit_one_sector: r/i=5/257 folio=0 pos=0 content=0x0101
22.158609: btrfs_buffered_write: folio pos=0 start=4096 copied=4096 content=0x0101
22.158634: btrfs_buffered_write: folio pos=0 start=8192 copied=4096 content=0x0101
22.158650: pagecache_isize_extended: folio=0 from=4096 to=8192 bsize=4096 zero off=4096 len=8192
22.158682: submit_one_sector: r/i=5/257 folio=0 pos=4096 content=0x0000
22.158686: submit_one_sector: r/i=5/257 folio=0 pos=8192 content=0x0101
The tool dio-invalidate-cache will start 3 threads, each doing a buffered
write with 0x01 at 4096 * i (i is 0, 1 ,2), do a fsync, then do a direct read,
and compare the read buffer with the write buffer.
Note that all 3 btrfs_buffered_write() are writing the correct 0x01 into
the page cache.
But at submit_one_sector(), at file offset 4096, the content is zeroed
out, mostly by pagecache_isize_extended().
The race happens like this:
Thread A is writing into range [4K, 8K).
Thread B is writing into range [8K, 12k).
Thread A | Thread B
-------------------------------------+------------------------------------
btrfs_buffered_write() | btrfs_buffered_write()
|- old_isize = 4K; | |- old_isize = 4096;
|- btrfs_inode_lock() | |
|- write into folio range [4K, 8K) | |
|- pagecache_isize_extended() | |
| extend isize from 4096 to 8192 | |
| no folio_zero_range() called | |
|- btrfs_inode_lock() | |
| |- btrfs_inode_lock()
| |- write into folio range [8K, 12K)
| |- pagecache_isize_extended()
| | calling folio_zero_range(4K, 8K)
| | This is caused by the old_isize is
| | grabbed too early, without any
| | inode lock.
| |- btrfs_inode_unlock()
The @old_isize is grabbed without inode lock, causing race between two
buffered write threads and making pagecache_isize_extended() to zero
range which is still containing cached data.
And this is only affecting subpage btrfs, because for regular blocksize
== page size case, the function pagecache_isize_extended() will do
nothing if the block size >= page size.
[FIX]
Grab the old isize with inode lock hold.
This means each buffered write thread will have a stable view of the
old inode size, thus avoid the above race.
Cc: stable(a)vger.kernel.org
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
---
fs/btrfs/file.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index fd90855fe717..896dc03689d6 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1090,7 +1090,7 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
u64 lockend;
size_t num_written = 0;
ssize_t ret;
- loff_t old_isize = i_size_read(inode);
+ loff_t old_isize;
unsigned int ilock_flags = 0;
const bool nowait = (iocb->ki_flags & IOCB_NOWAIT);
unsigned int bdp_flags = (nowait ? BDP_ASYNC : 0);
@@ -1103,6 +1103,13 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
if (ret < 0)
return ret;
+ /*
+ * We can only trust the isize with inode lock hold, or it can race with
+ * other buffered writes and cause incorrect call of
+ * pagecache_isize_extended() to overwrite existing data.
+ */
+ old_isize = i_size_read(inode);
+
ret = generic_write_checks(iocb, i);
if (ret <= 0)
goto out;
--
2.48.1
According to the chip manual, the I2C register access type of
Loongson-2K2000/LS7A is "B", so we can only access registers in byte
form (readb/writeb).
Although Loongson-2K0500/Loongson-2K1000 do not have similar
constraints, register accesses in byte form also behave correctly.
Also, in hardware, the frequency division registers are defined as two
separate registers (high 8-bit and low 8-bit), so we just access them
directly as bytes.
Cc: stable(a)vger.kernel.org
Fixes: 015e61f0bffd ("i2c: ls2x: Add driver for Loongson-2K/LS7A I2C controller")
Co-developed-by: Hongliang Wang <wanghongliang(a)loongson.cn>
Signed-off-by: Hongliang Wang <wanghongliang(a)loongson.cn>
Signed-off-by: Binbin Zhou <zhoubinbin(a)loongson.cn>
---
V2:
- Add a comment to prevent from changing that back to 16-bit write.
Link to V1:
https://lore.kernel.org/all/20250218111133.3058590-1-zhoubinbin@loongson.cn/
drivers/i2c/busses/i2c-ls2x.c | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/drivers/i2c/busses/i2c-ls2x.c b/drivers/i2c/busses/i2c-ls2x.c
index 8821cac3897b..61377693a4d6 100644
--- a/drivers/i2c/busses/i2c-ls2x.c
+++ b/drivers/i2c/busses/i2c-ls2x.c
@@ -10,6 +10,7 @@
* Rewritten for mainline by Binbin Zhou <zhoubinbin(a)loongson.cn>
*/
+#include <linux/bitfield.h>
#include <linux/bits.h>
#include <linux/completion.h>
#include <linux/device.h>
@@ -26,7 +27,8 @@
#include <linux/units.h>
/* I2C Registers */
-#define I2C_LS2X_PRER 0x0 /* Freq Division Register(16 bits) */
+#define I2C_LS2X_PRER_LO 0x0 /* Freq Division Low Byte Register */
+#define I2C_LS2X_PRER_HI 0x1 /* Freq Division High Byte Register */
#define I2C_LS2X_CTR 0x2 /* Control Register */
#define I2C_LS2X_TXR 0x3 /* Transport Data Register */
#define I2C_LS2X_RXR 0x3 /* Receive Data Register */
@@ -93,6 +95,7 @@ static irqreturn_t ls2x_i2c_isr(int this_irq, void *dev_id)
*/
static void ls2x_i2c_adjust_bus_speed(struct ls2x_i2c_priv *priv)
{
+ u16 val;
struct i2c_timings *t = &priv->i2c_t;
struct device *dev = priv->adapter.dev.parent;
u32 acpi_speed = i2c_acpi_find_bus_speed(dev);
@@ -104,9 +107,14 @@ static void ls2x_i2c_adjust_bus_speed(struct ls2x_i2c_priv *priv)
else
t->bus_freq_hz = LS2X_I2C_FREQ_STD;
- /* Calculate and set i2c frequency. */
- writew(LS2X_I2C_PCLK_FREQ / (5 * t->bus_freq_hz) - 1,
- priv->base + I2C_LS2X_PRER);
+ /*
+ * According to the chip manual, we can only access the registers as bytes,
+ * otherwise the high bits will be truncated.
+ * So set the I2C frequency with a sequential writeb instead of writew.
+ */
+ val = LS2X_I2C_PCLK_FREQ / (5 * t->bus_freq_hz) - 1;
+ writeb(FIELD_GET(GENMASK(7, 0), val), priv->base + I2C_LS2X_PRER_LO);
+ writeb(FIELD_GET(GENMASK(15, 8), val), priv->base + I2C_LS2X_PRER_HI);
}
static void ls2x_i2c_init(struct ls2x_i2c_priv *priv)
base-commit: 7e45b505e699f4c80aa8bf79b4ea2a5f5a66bb51
--
2.47.1
From: Ge Yang <yangge1116(a)126.com>
Since the introduction of commit c77c0a8ac4c52 ("mm/hugetlb: defer freeing
of huge pages if in non-task context"), which supports deferring the
freeing of hugetlb pages, the allocation of contiguous memory through
cma_alloc() may fail probabilistically.
In the CMA allocation process, if it is found that the CMA area is occupied
by in-use hugetlb folios, these in-use hugetlb folios need to be migrated
to another location. When there are no available hugetlb folios in the
free hugetlb pool during the migration of in-use hugetlb folios, new folios
are allocated from the buddy system. A temporary state is set on the newly
allocated folio. Upon completion of the hugetlb folio migration, the
temporary state is transferred from the new folios to the old folios.
Normally, when the old folios with the temporary state are freed, it is
directly released back to the buddy system. However, due to the deferred
freeing of hugetlb pages, the PageBuddy() check fails, ultimately leading
to the failure of cma_alloc().
Here is a simplified call trace illustrating the process:
cma_alloc()
->__alloc_contig_migrate_range() // Migrate in-use hugetlb folios
->unmap_and_move_huge_page()
->folio_putback_hugetlb() // Free old folios
->test_pages_isolated()
->__test_page_isolated_in_pageblock()
->PageBuddy(page) // Check if the page is in buddy
To resolve this issue, we have implemented a function named
wait_for_freed_hugetlb_folios(). This function ensures that the hugetlb
folios are properly released back to the buddy system after their migration
is completed. By invoking wait_for_freed_hugetlb_folios() before calling
PageBuddy(), we ensure that PageBuddy() will succeed.
Fixes: c77c0a8ac4c52 ("mm/hugetlb: defer freeing of huge pages if in non-task context")
Signed-off-by: Ge Yang <yangge1116(a)126.com>
Cc: <stable(a)vger.kernel.org>
---
V4:
- add a check to determine if hpage_freelist is empty suggested by David
V3:
- adjust code and message suggested by Muchun and David
V2:
- flush all folios at once suggested by David
include/linux/hugetlb.h | 5 +++++
mm/hugetlb.c | 8 ++++++++
mm/page_isolation.c | 10 ++++++++++
3 files changed, 23 insertions(+)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 6c6546b..0c54b3a 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -697,6 +697,7 @@ bool hugetlb_bootmem_page_zones_valid(int nid, struct huge_bootmem_page *m);
int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn);
+void wait_for_freed_hugetlb_folios(void);
struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr, bool cow_from_owner);
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
@@ -1092,6 +1093,10 @@ static inline int replace_free_hugepage_folios(unsigned long start_pfn,
return 0;
}
+static inline void wait_for_freed_hugetlb_folios(void)
+{
+}
+
static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr,
bool cow_from_owner)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 30bc34d..8801dbc 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2955,6 +2955,14 @@ int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn)
return ret;
}
+void wait_for_freed_hugetlb_folios(void)
+{
+ if (llist_empty(&hpage_freelist))
+ return;
+
+ flush_work(&free_hpage_work);
+}
+
typedef enum {
/*
* For either 0/1: we checked the per-vma resv map, and one resv
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 8ed53ee0..b2fc526 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -615,6 +615,16 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
int ret;
/*
+ * Due to the deferred freeing of hugetlb folios, the hugepage folios may
+ * not immediately release to the buddy system. This can cause PageBuddy()
+ * to fail in __test_page_isolated_in_pageblock(). To ensure that the
+ * hugetlb folios are properly released back to the buddy system, we
+ * invoke the wait_for_freed_hugetlb_folios() function to wait for the
+ * release to complete.
+ */
+ wait_for_freed_hugetlb_folios();
+
+ /*
* Note: pageblock_nr_pages != MAX_PAGE_ORDER. Then, chunks of free
* pages are not aligned to pageblock_nr_pages.
* Then we just check migratetype first.
--
2.7.4
This reverts commit 484fd6c1de13b336806a967908a927cc0356e312. The
commit caused a regression because now the umask was applied to
symlinks and the fix is unnecessary because the umask/O_TMPFILE bug
has been fixed somewhere else already.
Fixes: https://lore.kernel.org/lkml/28DSITL9912E1.2LSZUVTGTO52Q@mforney.org/
Signed-off-by: Max Kellermann <max.kellermann(a)ionos.com>
---
fs/ext4/acl.h | 5 -----
1 file changed, 5 deletions(-)
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index ef4c19e5f570..0c5a79c3b5d4 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -68,11 +68,6 @@ extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
static inline int
ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
{
- /* usually, the umask is applied by posix_acl_create(), but if
- ext4 ACL support is disabled at compile time, we need to do
- it here, because posix_acl_create() will never be called */
- inode->i_mode &= ~current_umask();
-
return 0;
}
#endif /* CONFIG_EXT4_FS_POSIX_ACL */
--
2.39.2
On Thu, Feb 13, 2025 at 11:07:07AM +0100, Greg KH wrote:
> On Thu, Feb 13, 2025 at 12:20:25AM +0000, Qasim Ijaz wrote:
> > During the "size_check" label in ea_get(), the code checks if the extended
> > attribute list (xattr) size matches ea_size. If not, it logs
> > "ea_get: invalid extended attribute" and calls print_hex_dump().
> >
> > Here, EALIST_SIZE(ea_buf->xattr) returns 4110417968, which exceeds
> > INT_MAX (2,147,483,647). Then ea_size is clamped:
> >
> > int size = clamp_t(int, ea_size, 0, EALIST_SIZE(ea_buf->xattr));
> >
> > Although clamp_t aims to bound ea_size between 0 and 4110417968, the upper
> > limit is treated as an int, causing an overflow above 2^31 - 1. This leads
> > "size" to wrap around and become negative (-184549328).
> >
> > The "size" is then passed to print_hex_dump() (called "len" in
> > print_hex_dump()), it is passed as type size_t (an unsigned
> > type), this is then stored inside a variable called
> > "int remaining", which is then assigned to "int linelen" which
> > is then passed to hex_dump_to_buffer(). In print_hex_dump()
> > the for loop, iterates through 0 to len-1, where len is
> > 18446744073525002176, calling hex_dump_to_buffer()
> > on each iteration:
> >
> > for (i = 0; i < len; i += rowsize) {
> > linelen = min(remaining, rowsize);
> > remaining -= rowsize;
> >
> > hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize,
> > linebuf, sizeof(linebuf), ascii);
> >
> > ...
> > }
> >
> > The expected stopping condition (i < len) is effectively broken
> > since len is corrupted and very large. This eventually leads to
> > the "ptr+i" being passed to hex_dump_to_buffer() to get closer
> > to the end of the actual bounds of "ptr", eventually an out of
> > bounds access is done in hex_dump_to_buffer() in the following
> > for loop:
> >
> > for (j = 0; j < len; j++) {
> > if (linebuflen < lx + 2)
> > goto overflow2;
> > ch = ptr[j];
> > ...
> > }
> >
> > To fix this we should validate "EALIST_SIZE(ea_buf->xattr)"
> > before it is utilised.
> >
> > Reported-by: syzbot <syzbot+4e6e7e4279d046613bc5(a)syzkaller.appspotmail.com>
> > Tested-by: syzbot <syzbot+4e6e7e4279d046613bc5(a)syzkaller.appspotmail.com>
> > Closes: https://syzkaller.appspot.com/bug?extid=4e6e7e4279d046613bc5
> > Fixes: d9f9d96136cb ("jfs: xattr: check invalid xattr size more strictly")
> > Signed-off-by: Qasim Ijaz <qasdev00(a)gmail.com>
> > ---
> > fs/jfs/xattr.c | 15 ++++++++++-----
> > 1 file changed, 10 insertions(+), 5 deletions(-)
> >
> > diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
> > index 24afbae87225..7575c51cce9b 100644
> > --- a/fs/jfs/xattr.c
> > +++ b/fs/jfs/xattr.c
> > @@ -559,11 +555,16 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size)
> >
> > size_check:
> > if (EALIST_SIZE(ea_buf->xattr) != ea_size) {
> > - int size = clamp_t(int, ea_size, 0, EALIST_SIZE(ea_buf->xattr));
> > -
> > - printk(KERN_ERR "ea_get: invalid extended attribute\n");
> > - print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1,
> > - ea_buf->xattr, size, 1);
> > + if (unlikely(EALIST_SIZE(ea_buf->xattr) > INT_MAX)) {
> > + printk(KERN_ERR "ea_get: extended attribute size too large: %u > INT_MAX\n",
> > + EALIST_SIZE(ea_buf->xattr));
> > + } else {
> > + int size = clamp_t(int, ea_size, 0, EALIST_SIZE(ea_buf->xattr));
> > +
> > + printk(KERN_ERR "ea_get: invalid extended attribute\n");
> > + print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1,
> > + ea_buf->xattr, size, 1);
> > + }
> > ea_release(inode, ea_buf);
> > rc = -EIO;
> > goto clean_up;
> > --
> > 2.39.5
> >
>
> Hi,
>
> This is the friendly patch-bot of Greg Kroah-Hartman. You have sent him
> a patch that has triggered this response. He used to manually respond
> to these common problems, but in order to save his sanity (he kept
> writing the same thing over and over, yet to different people), I was
> created. Hopefully you will not take offence and will fix the problem
> in your patch and resubmit it so that it can be accepted into the Linux
> kernel tree.
>
> You are receiving this message because of the following common error(s)
> as indicated below:
>
> - You have marked a patch with a "Fixes:" tag for a commit that is in an
> older released kernel, yet you do not have a cc: stable line in the
> signed-off-by area at all, which means that the patch will not be
> applied to any older kernel releases. To properly fix this, please
> follow the documented rules in the
> Documentation/process/stable-kernel-rules.rst file for how to resolve
> this.
>
> If you wish to discuss this problem further, or you have questions about
> how to resolve this issue, please feel free to respond to this email and
> Greg will reply once he has dug out from the pending patches received
> from other developers.
>
Hi Greg,
Just following up on this patch. I’ve sent v2 with the added CC stable tag. Here’s the link:
https://lore.kernel.org/all/20250213210553.28613-1-qasdev00@gmail.com/
Let me know if any further changes are needed.
Thanks,
Qasim
> thanks,
>
> greg k-h's patch email bot
From: Kevin Brodsky <kevin.brodsky(a)arm.com>
[ Upstream commit 46036188ea1f5266df23a6149dea0df1c77cd1c7 ]
The mm kselftests are currently built with no optimisation (-O0). It's
unclear why, and besides being obviously suboptimal, this also prevents
the pkeys tests from working as intended. Let's build all the tests with
-O2.
[kevin.brodsky(a)arm.com: silence unused-result warnings]
Link: https://lkml.kernel.org/r/20250107170110.2819685-1-kevin.brodsky@arm.com
Link: https://lkml.kernel.org/r/20241209095019.1732120-6-kevin.brodsky@arm.com
Signed-off-by: Kevin Brodsky <kevin.brodsky(a)arm.com>
Cc: Aruna Ramakrishna <aruna.ramakrishna(a)oracle.com>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: Joey Gouly <joey.gouly(a)arm.com>
Cc: Keith Lucas <keith.lucas(a)oracle.com>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
(cherry picked from commit 46036188ea1f5266df23a6149dea0df1c77cd1c7)
[Yifei: This commit also fix the failure of pkey_sighandler_tests_64,
which is also in linux-6.12.y and linux-6.13.y, thus backport this commit]
Signed-off-by: Yifei Liu <yifei.l.liu(a)oracle.com>
---
tools/testing/selftests/mm/Makefile | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index 02e1204971b0..c0138cb19705 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -33,9 +33,16 @@ endif
# LDLIBS.
MAKEFLAGS += --no-builtin-rules
-CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
+CFLAGS = -Wall -O2 -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
LDLIBS = -lrt -lpthread -lm
+# Some distributions (such as Ubuntu) configure GCC so that _FORTIFY_SOURCE is
+# automatically enabled at -O1 or above. This triggers various unused-result
+# warnings where functions such as read() or write() are called and their
+# return value is not checked. Disable _FORTIFY_SOURCE to silence those
+# warnings.
+CFLAGS += -U_FORTIFY_SOURCE
+
TEST_GEN_FILES = cow
TEST_GEN_FILES += compaction_test
TEST_GEN_FILES += gup_longterm
--
2.46.0
From: Tomasz Rusinowicz <tomasz.rusinowicz(a)intel.com>
The `struct ttm_resource->placement` contains TTM_PL_FLAG_* flags, but
it was incorrectly tested for XE_PL_* flags.
This caused xe_dma_buf_pin() to always fail when invoked for
the second time. Fix this by checking the `mem_type` field instead.
Fixes: 7764222d54b7 ("drm/xe: Disallow pinning dma-bufs in VRAM")
Cc: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Cc: Lucas De Marchi <lucas.demarchi(a)intel.com>
Cc: "Thomas Hellström" <thomas.hellstrom(a)linux.intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko(a)intel.com>
Cc: Matthew Brost <matthew.brost(a)intel.com>
Cc: Matthew Auld <matthew.auld(a)intel.com>
Cc: Nirmoy Das <nirmoy.das(a)intel.com>
Cc: Jani Nikula <jani.nikula(a)intel.com>
Cc: intel-xe(a)lists.freedesktop.org
Cc: <stable(a)vger.kernel.org> # v6.8+
Signed-off-by: Tomasz Rusinowicz <tomasz.rusinowicz(a)intel.com>
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz(a)linux.intel.com>
---
drivers/gpu/drm/xe/xe_bo.h | 2 --
drivers/gpu/drm/xe/xe_dma_buf.c | 2 +-
2 files changed, 1 insertion(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index d9386ab031404..43bf6f140d40d 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -341,7 +341,6 @@ static inline unsigned int xe_sg_segment_size(struct device *dev)
return round_down(max / 2, PAGE_SIZE);
}
-#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
/**
* xe_bo_is_mem_type - Whether the bo currently resides in the given
* TTM memory type
@@ -356,4 +355,3 @@ static inline bool xe_bo_is_mem_type(struct xe_bo *bo, u32 mem_type)
return bo->ttm.resource->mem_type == mem_type;
}
#endif
-#endif
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
index c5b95470fa324..f67803e15a0e6 100644
--- a/drivers/gpu/drm/xe/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -58,7 +58,7 @@ static int xe_dma_buf_pin(struct dma_buf_attachment *attach)
* 1) Avoid pinning in a placement not accessible to some importers.
* 2) Pinning in VRAM requires PIN accounting which is a to-do.
*/
- if (xe_bo_is_pinned(bo) && bo->ttm.resource->placement != XE_PL_TT) {
+ if (xe_bo_is_pinned(bo) && !xe_bo_is_mem_type(bo, XE_PL_TT)) {
drm_dbg(&xe->drm, "Can't migrate pinned bo for dma-buf pin.\n");
return -EINVAL;
}
--
2.45.1
From: Ge Yang <yangge1116(a)126.com>
Since the introduction of commit c77c0a8ac4c52 ("mm/hugetlb: defer freeing
of huge pages if in non-task context"), which supports deferring the
freeing of hugetlb pages, the allocation of contiguous memory through
cma_alloc() may fail probabilistically.
In the CMA allocation process, if it is found that the CMA area is occupied
by in-use hugetlb folios, these in-use hugetlb folios need to be migrated
to another location. When there are no available hugetlb folios in the
free hugetlb pool during the migration of in-use hugetlb folios, new folios
are allocated from the buddy system. A temporary state is set on the newly
allocated folio. Upon completion of the hugetlb folio migration, the
temporary state is transferred from the new folios to the old folios.
Normally, when the old folios with the temporary state are freed, it is
directly released back to the buddy system. However, due to the deferred
freeing of hugetlb pages, the PageBuddy() check fails, ultimately leading
to the failure of cma_alloc().
Here is a simplified call trace illustrating the process:
cma_alloc()
->__alloc_contig_migrate_range() // Migrate in-use hugetlb folios
->unmap_and_move_huge_page()
->folio_putback_hugetlb() // Free old folios
->test_pages_isolated()
->__test_page_isolated_in_pageblock()
->PageBuddy(page) // Check if the page is in buddy
To resolve this issue, we have implemented a function named
wait_for_freed_hugetlb_folios(). This function ensures that the hugetlb
folios are properly released back to the buddy system after their migration
is completed. By invoking wait_for_freed_hugetlb_folios() before calling
PageBuddy(), we ensure that PageBuddy() will succeed.
Fixes: c77c0a8ac4c52 ("mm/hugetlb: defer freeing of huge pages if in non-task context")
Signed-off-by: Ge Yang <yangge1116(a)126.com>
Cc: <stable(a)vger.kernel.org>
---
V3:
- adjust code and message suggested by Muchun and David
V2:
- flush all folios at once suggested by David
include/linux/hugetlb.h | 5 +++++
mm/hugetlb.c | 5 +++++
mm/page_isolation.c | 10 ++++++++++
3 files changed, 20 insertions(+)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 6c6546b..0c54b3a 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -697,6 +697,7 @@ bool hugetlb_bootmem_page_zones_valid(int nid, struct huge_bootmem_page *m);
int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn);
+void wait_for_freed_hugetlb_folios(void);
struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr, bool cow_from_owner);
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
@@ -1092,6 +1093,10 @@ static inline int replace_free_hugepage_folios(unsigned long start_pfn,
return 0;
}
+static inline void wait_for_freed_hugetlb_folios(void)
+{
+}
+
static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr,
bool cow_from_owner)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 30bc34d..b4630b3 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2955,6 +2955,11 @@ int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn)
return ret;
}
+void wait_for_freed_hugetlb_folios(void)
+{
+ flush_work(&free_hpage_work);
+}
+
typedef enum {
/*
* For either 0/1: we checked the per-vma resv map, and one resv
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 8ed53ee0..b2fc526 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -615,6 +615,16 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
int ret;
/*
+ * Due to the deferred freeing of hugetlb folios, the hugepage folios may
+ * not immediately release to the buddy system. This can cause PageBuddy()
+ * to fail in __test_page_isolated_in_pageblock(). To ensure that the
+ * hugetlb folios are properly released back to the buddy system, we
+ * invoke the wait_for_freed_hugetlb_folios() function to wait for the
+ * release to complete.
+ */
+ wait_for_freed_hugetlb_folios();
+
+ /*
* Note: pageblock_nr_pages != MAX_PAGE_ORDER. Then, chunks of free
* pages are not aligned to pageblock_nr_pages.
* Then we just check migratetype first.
--
2.7.4
From: Joshua Washington <joshwash(a)google.com>
Before this patch the NETDEV_XDP_ACT_NDO_XMIT XDP feature flag is set by
default as part of driver initialization, and is never cleared. However,
this flag differs from others in that it is used as an indicator for
whether the driver is ready to perform the ndo_xdp_xmit operation as
part of an XDP_REDIRECT. Kernel helpers
xdp_features_(set|clear)_redirect_target exist to convey this meaning.
This patch ensures that the netdev is only reported as a redirect target
when XDP queues exist to forward traffic.
Fixes: 39a7f4aa3e4a ("gve: Add XDP REDIRECT support for GQI-QPL format")
Cc: stable(a)vger.kernel.org
Reviewed-by: Praveen Kaligineedi <pkaligineedi(a)google.com>
Reviewed-by: Jeroen de Borst <jeroendb(a)google.com>
Signed-off-by: Joshua Washington <joshwash(a)google.com>
---
drivers/net/ethernet/google/gve/gve.h | 10 ++++++++++
drivers/net/ethernet/google/gve/gve_main.c | 6 +++++-
2 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index 8167cc5fb0df..78d2a19593d1 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -1116,6 +1116,16 @@ static inline u32 gve_xdp_tx_start_queue_id(struct gve_priv *priv)
return gve_xdp_tx_queue_id(priv, 0);
}
+static inline bool gve_supports_xdp_xmit(struct gve_priv *priv)
+{
+ switch (priv->queue_format) {
+ case GVE_GQI_QPL_FORMAT:
+ return true;
+ default:
+ return false;
+ }
+}
+
/* gqi napi handler defined in gve_main.c */
int gve_napi_poll(struct napi_struct *napi, int budget);
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 533e659b15b3..92237fb0b60c 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -1903,6 +1903,8 @@ static void gve_turndown(struct gve_priv *priv)
/* Stop tx queues */
netif_tx_disable(priv->dev);
+ xdp_features_clear_redirect_target(priv->dev);
+
gve_clear_napi_enabled(priv);
gve_clear_report_stats(priv);
@@ -1972,6 +1974,9 @@ static void gve_turnup(struct gve_priv *priv)
napi_schedule(&block->napi);
}
+ if (priv->num_xdp_queues && gve_supports_xdp_xmit(priv))
+ xdp_features_set_redirect_target(priv->dev, false);
+
gve_set_napi_enabled(priv);
}
@@ -2246,7 +2251,6 @@ static void gve_set_netdev_xdp_features(struct gve_priv *priv)
if (priv->queue_format == GVE_GQI_QPL_FORMAT) {
xdp_features = NETDEV_XDP_ACT_BASIC;
xdp_features |= NETDEV_XDP_ACT_REDIRECT;
- xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
} else {
xdp_features = 0;
--
2.48.1.601.g30ceb7b040-goog
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 8802766324e1f5d414a81ac43365c20142e85603
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021855-fancy-trough-87ae@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8802766324e1f5d414a81ac43365c20142e85603 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Wed, 12 Feb 2025 13:46:46 +0000
Subject: [PATCH] io_uring/kbuf: reallocate buf lists on upgrade
IORING_REGISTER_PBUF_RING can reuse an old struct io_buffer_list if it
was created for legacy selected buffer and has been emptied. It violates
the requirement that most of the field should stay stable after publish.
Always reallocate it instead.
Cc: stable(a)vger.kernel.org
Reported-by: Pumpkin Chang <pumpkin(a)devco.re>
Fixes: 2fcabce2d7d34 ("io_uring: disallow mixed provided buffer group registrations")
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index 04bf493eecae..8e72de7712ac 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -415,6 +415,13 @@ void io_destroy_buffers(struct io_ring_ctx *ctx)
}
}
+static void io_destroy_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
+{
+ scoped_guard(mutex, &ctx->mmap_lock)
+ WARN_ON_ONCE(xa_erase(&ctx->io_bl_xa, bl->bgid) != bl);
+ io_put_bl(ctx, bl);
+}
+
int io_remove_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf);
@@ -636,12 +643,13 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
/* if mapped buffer ring OR classic exists, don't allow */
if (bl->flags & IOBL_BUF_RING || !list_empty(&bl->buf_list))
return -EEXIST;
- } else {
- free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL);
- if (!bl)
- return -ENOMEM;
+ io_destroy_bl(ctx, bl);
}
+ free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL);
+ if (!bl)
+ return -ENOMEM;
+
mmap_offset = (unsigned long)reg.bgid << IORING_OFF_PBUF_SHIFT;
ring_size = flex_array_size(br, bufs, reg.ring_entries);
The patch titled
Subject: dma: kmsan: export kmsan_handle_dma() for modules
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
dma-kmsan-export-kmsan_handle_dma-for-modules.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Subject: dma: kmsan: export kmsan_handle_dma() for modules
Date: Tue, 18 Feb 2025 10:14:11 +0100
kmsan_handle_dma() is used by virtio_ring() which can be built as a
module. kmsan_handle_dma() needs to be exported otherwise building the
virtio_ring fails.
Export kmsan_handle_dma for modules.
Link: https://lkml.kernel.org/r/20250218091411.MMS3wBN9@linutronix.de
Reported-by: kernel test robot <lkp(a)intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202502150634.qjxwSeJR-lkp@intel.com/
Fixes: 7ade4f10779cb ("dma: kmsan: unpoison DMA mappings")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Cc: Alexander Potapenko <glider(a)google.com>
Cc: Dmitriy Vyukov <dvyukov(a)google.com>
Cc: Macro Elver <elver(a)google.com>
Cc: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/kmsan/hooks.c | 1 +
1 file changed, 1 insertion(+)
--- a/mm/kmsan/hooks.c~dma-kmsan-export-kmsan_handle_dma-for-modules
+++ a/mm/kmsan/hooks.c
@@ -357,6 +357,7 @@ void kmsan_handle_dma(struct page *page,
size -= to_go;
}
}
+EXPORT_SYMBOL_GPL(kmsan_handle_dma);
void kmsan_handle_dma_sg(struct scatterlist *sg, int nents,
enum dma_data_direction dir)
_
Patches currently in -mm which might be from bigeasy(a)linutronix.de are
dma-kmsan-export-kmsan_handle_dma-for-modules.patch
rcu-provide-a-static-initializer-for-hlist_nulls_head.patch
ucount-replace-get_ucounts_or_wrap-with-atomic_inc_not_zero.patch
ucount-use-rcu-for-ucounts-lookups.patch
ucount-use-rcuref_t-for-reference-counting.patch
The patch titled
Subject: mm/hugetlb: wait for hugetlb folios to be freed
has been added to the -mm mm-unstable branch. Its filename is
mm-hugetlb-wait-for-hugetlb-folios-to-be-freed.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ge Yang <yangge1116(a)126.com>
Subject: mm/hugetlb: wait for hugetlb folios to be freed
Date: Tue, 18 Feb 2025 19:40:28 +0800
Since the introduction of commit c77c0a8ac4c52 ("mm/hugetlb: defer freeing
of huge pages if in non-task context"), which supports deferring the
freeing of hugetlb pages, the allocation of contiguous memory through
cma_alloc() may fail probabilistically.
In the CMA allocation process, if it is found that the CMA area is
occupied by in-use hugetlb folios, these in-use hugetlb folios need to be
migrated to another location. When there are no available hugetlb folios
in the free hugetlb pool during the migration of in-use hugetlb folios,
new folios are allocated from the buddy system. A temporary state is set
on the newly allocated folio. Upon completion of the hugetlb folio
migration, the temporary state is transferred from the new folios to the
old folios. Normally, when the old folios with the temporary state are
freed, it is directly released back to the buddy system. However, due to
the deferred freeing of hugetlb pages, the PageBuddy() check fails,
ultimately leading to the failure of cma_alloc().
Here is a simplified call trace illustrating the process:
cma_alloc()
->__alloc_contig_migrate_range() // Migrate in-use hugetlb folios
->unmap_and_move_huge_page()
->folio_putback_hugetlb() // Free old folios
->test_pages_isolated()
->__test_page_isolated_in_pageblock()
->PageBuddy(page) // Check if the page is in buddy
To resolve this issue, we have implemented a function named
wait_for_freed_hugetlb_folios(). This function ensures that the hugetlb
folios are properly released back to the buddy system after their
migration is completed. By invoking wait_for_freed_hugetlb_folios()
before calling PageBuddy(), we ensure that PageBuddy() will succeed.
Link: https://lkml.kernel.org/r/1739878828-9960-1-git-send-email-yangge1116@126.c…
Fixes: c77c0a8ac4c52 ("mm/hugetlb: defer freeing of huge pages if in non-task context")
Signed-off-by: Ge Yang <yangge1116(a)126.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Barry Song <21cnbao(a)gmail.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/hugetlb.h | 5 +++++
mm/hugetlb.c | 5 +++++
mm/page_isolation.c | 10 ++++++++++
3 files changed, 20 insertions(+)
--- a/include/linux/hugetlb.h~mm-hugetlb-wait-for-hugetlb-folios-to-be-freed
+++ a/include/linux/hugetlb.h
@@ -697,6 +697,7 @@ bool hugetlb_bootmem_page_zones_valid(in
int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn);
+void wait_for_freed_hugetlb_folios(void);
struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr, bool cow_from_owner);
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
@@ -1092,6 +1093,10 @@ static inline int replace_free_hugepage_
return 0;
}
+static inline void wait_for_freed_hugetlb_folios(void)
+{
+}
+
static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr,
bool cow_from_owner)
--- a/mm/hugetlb.c~mm-hugetlb-wait-for-hugetlb-folios-to-be-freed
+++ a/mm/hugetlb.c
@@ -2955,6 +2955,11 @@ int replace_free_hugepage_folios(unsigne
return ret;
}
+void wait_for_freed_hugetlb_folios(void)
+{
+ flush_work(&free_hpage_work);
+}
+
typedef enum {
/*
* For either 0/1: we checked the per-vma resv map, and one resv
--- a/mm/page_isolation.c~mm-hugetlb-wait-for-hugetlb-folios-to-be-freed
+++ a/mm/page_isolation.c
@@ -615,6 +615,16 @@ int test_pages_isolated(unsigned long st
int ret;
/*
+ * Due to the deferred freeing of hugetlb folios, the hugepage folios may
+ * not immediately release to the buddy system. This can cause PageBuddy()
+ * to fail in __test_page_isolated_in_pageblock(). To ensure that the
+ * hugetlb folios are properly released back to the buddy system, we
+ * invoke the wait_for_freed_hugetlb_folios() function to wait for the
+ * release to complete.
+ */
+ wait_for_freed_hugetlb_folios();
+
+ /*
* Note: pageblock_nr_pages != MAX_PAGE_ORDER. Then, chunks of free
* pages are not aligned to pageblock_nr_pages.
* Then we just check migratetype first.
_
Patches currently in -mm which might be from yangge1116(a)126.com are
mm-hugetlb-wait-for-hugepage-folios-to-be-freed.patch
mm-hugetlb-wait-for-hugetlb-folios-to-be-freed.patch
From: Zhang Lixu <lixu.zhang(a)intel.com>
[ Upstream commit 4b54ae69197b9f416baa0fceadff7e89075f8454 ]
The timestamps in the Firmware log and HID sensor samples are incorrect.
They show 1970-01-01 because the current IPC driver only uses the first
8 bytes of bootup time when synchronizing time with the firmware. The
firmware converts the bootup time to UTC time, which results in the
display of 1970-01-01.
In write_ipc_from_queue(), when sending the MNG_SYNC_FW_CLOCK message,
the clock is updated according to the definition of ipc_time_update_msg.
However, in _ish_sync_fw_clock(), the message length is specified as the
size of uint64_t when building the doorbell. As a result, the firmware
only receives the first 8 bytes of struct ipc_time_update_msg.
This patch corrects the length in the doorbell to ensure the entire
ipc_time_update_msg is sent, fixing the timestamp issue.
Signed-off-by: Zhang Lixu <lixu.zhang(a)intel.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
Signed-off-by: Jiri Kosina <jkosina(a)suse.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/hid/intel-ish-hid/ipc/ipc.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/hid/intel-ish-hid/ipc/ipc.c b/drivers/hid/intel-ish-hid/ipc/ipc.c
index 8f8dfdf64833e..871f043eeba1e 100644
--- a/drivers/hid/intel-ish-hid/ipc/ipc.c
+++ b/drivers/hid/intel-ish-hid/ipc/ipc.c
@@ -549,14 +549,14 @@ static void fw_reset_work_fn(struct work_struct *unused)
static void _ish_sync_fw_clock(struct ishtp_device *dev)
{
static unsigned long prev_sync;
- uint64_t usec;
+ struct ipc_time_update_msg time = {};
if (prev_sync && jiffies - prev_sync < 20 * HZ)
return;
prev_sync = jiffies;
- usec = ktime_to_us(ktime_get_boottime());
- ipc_send_mng_msg(dev, MNG_SYNC_FW_CLOCK, &usec, sizeof(uint64_t));
+ /* The fields of time would be updated while sending message */
+ ipc_send_mng_msg(dev, MNG_SYNC_FW_CLOCK, &time, sizeof(time));
}
/**
--
2.39.5
From: Zhang Lixu <lixu.zhang(a)intel.com>
[ Upstream commit 4b54ae69197b9f416baa0fceadff7e89075f8454 ]
The timestamps in the Firmware log and HID sensor samples are incorrect.
They show 1970-01-01 because the current IPC driver only uses the first
8 bytes of bootup time when synchronizing time with the firmware. The
firmware converts the bootup time to UTC time, which results in the
display of 1970-01-01.
In write_ipc_from_queue(), when sending the MNG_SYNC_FW_CLOCK message,
the clock is updated according to the definition of ipc_time_update_msg.
However, in _ish_sync_fw_clock(), the message length is specified as the
size of uint64_t when building the doorbell. As a result, the firmware
only receives the first 8 bytes of struct ipc_time_update_msg.
This patch corrects the length in the doorbell to ensure the entire
ipc_time_update_msg is sent, fixing the timestamp issue.
Signed-off-by: Zhang Lixu <lixu.zhang(a)intel.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
Signed-off-by: Jiri Kosina <jkosina(a)suse.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/hid/intel-ish-hid/ipc/ipc.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/hid/intel-ish-hid/ipc/ipc.c b/drivers/hid/intel-ish-hid/ipc/ipc.c
index a45ac7fa417b9..da896f5c74424 100644
--- a/drivers/hid/intel-ish-hid/ipc/ipc.c
+++ b/drivers/hid/intel-ish-hid/ipc/ipc.c
@@ -549,14 +549,14 @@ static void fw_reset_work_fn(struct work_struct *unused)
static void _ish_sync_fw_clock(struct ishtp_device *dev)
{
static unsigned long prev_sync;
- uint64_t usec;
+ struct ipc_time_update_msg time = {};
if (prev_sync && jiffies - prev_sync < 20 * HZ)
return;
prev_sync = jiffies;
- usec = ktime_to_us(ktime_get_boottime());
- ipc_send_mng_msg(dev, MNG_SYNC_FW_CLOCK, &usec, sizeof(uint64_t));
+ /* The fields of time would be updated while sending message */
+ ipc_send_mng_msg(dev, MNG_SYNC_FW_CLOCK, &time, sizeof(time));
}
/**
--
2.39.5
From: Zhang Lixu <lixu.zhang(a)intel.com>
[ Upstream commit 4b54ae69197b9f416baa0fceadff7e89075f8454 ]
The timestamps in the Firmware log and HID sensor samples are incorrect.
They show 1970-01-01 because the current IPC driver only uses the first
8 bytes of bootup time when synchronizing time with the firmware. The
firmware converts the bootup time to UTC time, which results in the
display of 1970-01-01.
In write_ipc_from_queue(), when sending the MNG_SYNC_FW_CLOCK message,
the clock is updated according to the definition of ipc_time_update_msg.
However, in _ish_sync_fw_clock(), the message length is specified as the
size of uint64_t when building the doorbell. As a result, the firmware
only receives the first 8 bytes of struct ipc_time_update_msg.
This patch corrects the length in the doorbell to ensure the entire
ipc_time_update_msg is sent, fixing the timestamp issue.
Signed-off-by: Zhang Lixu <lixu.zhang(a)intel.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
Signed-off-by: Jiri Kosina <jkosina(a)suse.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/hid/intel-ish-hid/ipc/ipc.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/hid/intel-ish-hid/ipc/ipc.c b/drivers/hid/intel-ish-hid/ipc/ipc.c
index ba45605fc6b52..a48f7cd514b0f 100644
--- a/drivers/hid/intel-ish-hid/ipc/ipc.c
+++ b/drivers/hid/intel-ish-hid/ipc/ipc.c
@@ -577,14 +577,14 @@ static void fw_reset_work_fn(struct work_struct *unused)
static void _ish_sync_fw_clock(struct ishtp_device *dev)
{
static unsigned long prev_sync;
- uint64_t usec;
+ struct ipc_time_update_msg time = {};
if (prev_sync && jiffies - prev_sync < 20 * HZ)
return;
prev_sync = jiffies;
- usec = ktime_to_us(ktime_get_boottime());
- ipc_send_mng_msg(dev, MNG_SYNC_FW_CLOCK, &usec, sizeof(uint64_t));
+ /* The fields of time would be updated while sending message */
+ ipc_send_mng_msg(dev, MNG_SYNC_FW_CLOCK, &time, sizeof(time));
}
/**
--
2.39.5
From: Zhang Lixu <lixu.zhang(a)intel.com>
[ Upstream commit 4b54ae69197b9f416baa0fceadff7e89075f8454 ]
The timestamps in the Firmware log and HID sensor samples are incorrect.
They show 1970-01-01 because the current IPC driver only uses the first
8 bytes of bootup time when synchronizing time with the firmware. The
firmware converts the bootup time to UTC time, which results in the
display of 1970-01-01.
In write_ipc_from_queue(), when sending the MNG_SYNC_FW_CLOCK message,
the clock is updated according to the definition of ipc_time_update_msg.
However, in _ish_sync_fw_clock(), the message length is specified as the
size of uint64_t when building the doorbell. As a result, the firmware
only receives the first 8 bytes of struct ipc_time_update_msg.
This patch corrects the length in the doorbell to ensure the entire
ipc_time_update_msg is sent, fixing the timestamp issue.
Signed-off-by: Zhang Lixu <lixu.zhang(a)intel.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
Signed-off-by: Jiri Kosina <jkosina(a)suse.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/hid/intel-ish-hid/ipc/ipc.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/hid/intel-ish-hid/ipc/ipc.c b/drivers/hid/intel-ish-hid/ipc/ipc.c
index dd5fc60874ba1..b1a41c90c5741 100644
--- a/drivers/hid/intel-ish-hid/ipc/ipc.c
+++ b/drivers/hid/intel-ish-hid/ipc/ipc.c
@@ -577,14 +577,14 @@ static void fw_reset_work_fn(struct work_struct *unused)
static void _ish_sync_fw_clock(struct ishtp_device *dev)
{
static unsigned long prev_sync;
- uint64_t usec;
+ struct ipc_time_update_msg time = {};
if (prev_sync && time_before(jiffies, prev_sync + 20 * HZ))
return;
prev_sync = jiffies;
- usec = ktime_to_us(ktime_get_boottime());
- ipc_send_mng_msg(dev, MNG_SYNC_FW_CLOCK, &usec, sizeof(uint64_t));
+ /* The fields of time would be updated while sending message */
+ ipc_send_mng_msg(dev, MNG_SYNC_FW_CLOCK, &time, sizeof(time));
}
/**
--
2.39.5
From: Tejun Heo <tj(a)kernel.org>
[ Upstream commit e9fe182772dcb2630964724fd93e9c90b68ea0fd ]
dsp_local_on has several incorrect assumptions, one of which is that
p->nr_cpus_allowed always tracks p->cpus_ptr. This is not true when a task
is scheduled out while migration is disabled - p->cpus_ptr is temporarily
overridden to the previous CPU while p->nr_cpus_allowed remains unchanged.
This led to sporadic test faliures when dsp_local_on_dispatch() tries to put
a migration disabled task to a different CPU. Fix it by keeping the previous
CPU when migration is disabled.
There are SCX schedulers that make use of p->nr_cpus_allowed. They should
also implement explicit handling for p->migration_disabled.
Signed-off-by: Tejun Heo <tj(a)kernel.org>
Reported-by: Ihor Solodrai <ihor.solodrai(a)pm.me>
Cc: Andrea Righi <arighi(a)nvidia.com>
Cc: Changwoo Min <changwoo(a)igalia.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
tools/testing/selftests/sched_ext/dsp_local_on.bpf.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c
index c9a2da0575a0f..eea06decb6f59 100644
--- a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c
+++ b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c
@@ -43,7 +43,7 @@ void BPF_STRUCT_OPS(dsp_local_on_dispatch, s32 cpu, struct task_struct *prev)
if (!p)
return;
- if (p->nr_cpus_allowed == nr_cpus)
+ if (p->nr_cpus_allowed == nr_cpus && !p->migration_disabled)
target = bpf_get_prandom_u32() % nr_cpus;
else
target = scx_bpf_task_cpu(p);
--
2.39.5
From: Tejun Heo <tj(a)kernel.org>
[ Upstream commit e9fe182772dcb2630964724fd93e9c90b68ea0fd ]
dsp_local_on has several incorrect assumptions, one of which is that
p->nr_cpus_allowed always tracks p->cpus_ptr. This is not true when a task
is scheduled out while migration is disabled - p->cpus_ptr is temporarily
overridden to the previous CPU while p->nr_cpus_allowed remains unchanged.
This led to sporadic test faliures when dsp_local_on_dispatch() tries to put
a migration disabled task to a different CPU. Fix it by keeping the previous
CPU when migration is disabled.
There are SCX schedulers that make use of p->nr_cpus_allowed. They should
also implement explicit handling for p->migration_disabled.
Signed-off-by: Tejun Heo <tj(a)kernel.org>
Reported-by: Ihor Solodrai <ihor.solodrai(a)pm.me>
Cc: Andrea Righi <arighi(a)nvidia.com>
Cc: Changwoo Min <changwoo(a)igalia.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
tools/testing/selftests/sched_ext/dsp_local_on.bpf.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c
index fbda6bf546712..758b479bd1ee1 100644
--- a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c
+++ b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c
@@ -43,7 +43,7 @@ void BPF_STRUCT_OPS(dsp_local_on_dispatch, s32 cpu, struct task_struct *prev)
if (!p)
return;
- if (p->nr_cpus_allowed == nr_cpus)
+ if (p->nr_cpus_allowed == nr_cpus && !p->migration_disabled)
target = bpf_get_prandom_u32() % nr_cpus;
else
target = scx_bpf_task_cpu(p);
--
2.39.5
It is observed that on some systems an initial PPM reset during the boot
phase can trigger a timeout:
[ 6.482546] ucsi_acpi USBC000:00: failed to reset PPM!
[ 6.482551] ucsi_acpi USBC000:00: error -ETIMEDOUT: PPM init failed
Still, increasing the timeout value, albeit being the most straightforward
solution, eliminates the problem: the initial PPM reset may take up to
~8000-10000ms on some Lenovo laptops. When it is reset after the above
period of time (or even if ucsi_reset_ppm() is not called overall), UCSI
works as expected.
Moreover, if the ucsi_acpi module is loaded/unloaded manually after the
system has booted, reading the CCI values and resetting the PPM works
perfectly, without any timeout. Thus it's only a boot-time issue.
The reason for this behavior is not clear but it may be the consequence
of some tricks that the firmware performs or be an actual firmware bug.
As a workaround, increase the timeout to avoid failing the UCSI
initialization prematurely.
Fixes: b1b59e16075f ("usb: typec: ucsi: Increase command completion timeout value")
Cc: stable(a)vger.kernel.org
Signed-off-by: Fedor Pchelkin <boddah8794(a)gmail.com>
---
drivers/usb/typec/ucsi/ucsi.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
index 0fe1476f4c29..7a56d3f840d7 100644
--- a/drivers/usb/typec/ucsi/ucsi.c
+++ b/drivers/usb/typec/ucsi/ucsi.c
@@ -25,7 +25,7 @@
* difficult to estimate the time it takes for the system to process the command
* before it is actually passed to the PPM.
*/
-#define UCSI_TIMEOUT_MS 5000
+#define UCSI_TIMEOUT_MS 10000
/*
* UCSI_SWAP_TIMEOUT_MS - Timeout for role swap requests
--
2.48.1
From: Si-Wei Liu <si-wei.liu(a)oracle.com>
create_user_mr() has correct code to count the number of null keys
used to fill in a hole for the memory map. However, fill_indir()
does not follow the same to cap the range up to the 1GB limit
correspondinly. Fill in more null keys for the gaps in between,
so that null keys are correctly populated.
Fixes: 94abbccdf291 ("vdpa/mlx5: Add shared memory registration code")
Cc: stable(a)vger.kernel.org
Signed-off-by: Si-Wei Liu <si-wei.liu(a)oracle.com>
Signed-off-by: Dragos Tatulea <dtatulea(a)nvidia.com>
---
drivers/vdpa/mlx5/core/mr.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
index 8455f08f5d40..61424342c096 100644
--- a/drivers/vdpa/mlx5/core/mr.c
+++ b/drivers/vdpa/mlx5/core/mr.c
@@ -190,9 +190,12 @@ static void fill_indir(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey, v
klm->bcount = cpu_to_be32(klm_bcount(dmr->end - dmr->start));
preve = dmr->end;
} else {
+ u64 bcount = min_t(u64, dmr->start - preve, MAX_KLM_SIZE);
+
klm->key = cpu_to_be32(mvdev->res.null_mkey);
- klm->bcount = cpu_to_be32(klm_bcount(dmr->start - preve));
- preve = dmr->start;
+ klm->bcount = cpu_to_be32(klm_bcount(bcount));
+ preve += bcount;
+
goto again;
}
}
--
2.43.0
From: Steven Rostedt <rostedt(a)goodmis.org>
Check if a function is already in the manager ops of a subops. A manager
ops contains multiple subops, and if two or more subops are tracing the
same function, the manager ops only needs a single entry in its hash.
Cc: stable(a)vger.kernel.org
Fixes: 4f554e955614f ("ftrace: Add ftrace_set_filter_ips function")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/ftrace.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 03b35a05808c..189eb0a12f4b 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5717,6 +5717,9 @@ __ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove)
return -ENOENT;
free_hash_entry(hash, entry);
return 0;
+ } else if (__ftrace_lookup_ip(hash, ip) != NULL) {
+ /* Already exists */
+ return 0;
}
entry = add_hash_entry(hash, ip);
--
2.47.2
From: Steven Rostedt <rostedt(a)goodmis.org>
Function graph uses a subops and manager ops mechanism to attach to
ftrace. The manager ops connects to ftrace and the functions it connects
to is defined by a list of subops that it manages.
The function hash that defines what the above ops attaches to limits the
functions to attach if the hash has any content. If the hash is empty, it
means to trace all functions.
The creation of the manager ops hash is done by iterating over all the
subops hashes. If any of the subops hashes is empty, it means that the
manager ops hash must trace all functions as well.
The issue is in the creation of the manager ops. When a second subops is
attached, a new hash is created by starting it as NULL and adding the
subops one at a time. But the NULL ops is mistaken as an empty hash, and
once an empty hash is found, it stops the loop of subops and just enables
all functions.
# echo "f:myevent1 kernel_clone" >> /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
kernel_clone (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
# echo "f:myevent2 schedule_timeout" >> /sys/kernel/tracing/dynamic_events
# cat /sys/kernel/tracing/enabled_functions
trace_initcall_start_cb (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
run_init_process (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
try_to_run_init_process (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
x86_pmu_show_pmu_cap (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
cleanup_rapl_pmus (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
uncore_free_pcibus_map (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
uncore_types_exit (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
uncore_pci_exit.part.0 (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
kvm_shutdown (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
vmx_dump_msrs (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
vmx_cleanup_l1d_flush (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60
[..]
Fix this by initializing the new hash to NULL and if the hash is NULL do
not treat it as an empty hash but instead allocate by copying the content
of the first sub ops. Then on subsequent iterations, the new hash will not
be NULL, but the content of the previous subops. If that first subops
attached to all functions, then new hash may assume that the manager ops
also needs to attach to all functions.
Cc: stable(a)vger.kernel.org
Fixes: 5fccc7552ccbc ("ftrace: Add subops logic to allow one ops to manage many")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/ftrace.c | 30 ++++++++++++++++++++----------
1 file changed, 20 insertions(+), 10 deletions(-)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 728ecda6e8d4..03b35a05808c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3220,15 +3220,22 @@ static struct ftrace_hash *copy_hash(struct ftrace_hash *src)
* The filter_hash updates uses just the append_hash() function
* and the notrace_hash does not.
*/
-static int append_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash)
+static int append_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash,
+ int size_bits)
{
struct ftrace_func_entry *entry;
int size;
int i;
- /* An empty hash does everything */
- if (ftrace_hash_empty(*hash))
- return 0;
+ if (*hash) {
+ /* An empty hash does everything */
+ if (ftrace_hash_empty(*hash))
+ return 0;
+ } else {
+ *hash = alloc_ftrace_hash(size_bits);
+ if (!*hash)
+ return -ENOMEM;
+ }
/* If new_hash has everything make hash have everything */
if (ftrace_hash_empty(new_hash)) {
@@ -3292,16 +3299,18 @@ static int intersect_hash(struct ftrace_hash **hash, struct ftrace_hash *new_has
/* Return a new hash that has a union of all @ops->filter_hash entries */
static struct ftrace_hash *append_hashes(struct ftrace_ops *ops)
{
- struct ftrace_hash *new_hash;
+ struct ftrace_hash *new_hash = NULL;
struct ftrace_ops *subops;
+ int size_bits;
int ret;
- new_hash = alloc_ftrace_hash(ops->func_hash->filter_hash->size_bits);
- if (!new_hash)
- return NULL;
+ if (ops->func_hash->filter_hash)
+ size_bits = ops->func_hash->filter_hash->size_bits;
+ else
+ size_bits = FTRACE_HASH_DEFAULT_BITS;
list_for_each_entry(subops, &ops->subop_list, list) {
- ret = append_hash(&new_hash, subops->func_hash->filter_hash);
+ ret = append_hash(&new_hash, subops->func_hash->filter_hash, size_bits);
if (ret < 0) {
free_ftrace_hash(new_hash);
return NULL;
@@ -3505,7 +3514,8 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int
filter_hash = alloc_and_copy_ftrace_hash(size_bits, ops->func_hash->filter_hash);
if (!filter_hash)
return -ENOMEM;
- ret = append_hash(&filter_hash, subops->func_hash->filter_hash);
+ ret = append_hash(&filter_hash, subops->func_hash->filter_hash,
+ size_bits);
if (ret < 0) {
free_ftrace_hash(filter_hash);
return ret;
--
2.47.2
Hello,
New build issue found on stable-rc/linux-5.4.y:
---
./arch/mips/include/asm/syscall.h:66:28: error: ‘struct pt_regs’ has
no member named ‘args’ in arch/mips/kernel/ptrace.o
(arch/mips/kernel/ptrace.c) [logspec:kbuild,kbuild.compiler.error]
---
- dashboard: https://d.kernelci.org/issue/maestro:609e973861db59e6d6e75d96a9f0f0a24ba09b…
- giturl: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
- commit HEAD: 46b505f46fed8d28d9f0cf8e2aace766b99e48ce
Log excerpt:
=====================================================
In file included from arch/mips/kernel/ptrace.c:45:
./arch/mips/include/asm/syscall.h: In function ‘mips_get_syscall_arg’:
./arch/mips/include/asm/syscall.h:66:28: error: ‘struct pt_regs’ has
no member named ‘args’
66 | *arg = regs->args[n];
| ^~
=====================================================
# Builds where the incident occurred:
## 32r2el_defconfig on (mips):
- compiler: gcc-12
- dashboard: https://d.kernelci.org/build/maestro:67b4a28b50b59caecce1c871
#kernelci issue maestro:609e973861db59e6d6e75d96a9f0f0a24ba09ba0
Reported-by: kernelci.org bot <bot(a)kernelci.org>
--
This is an experimental report format. Please send feedback in!
Talk to us at kernelci(a)lists.linux.dev
Made with love by the KernelCI team - https://kernelci.org
When an attribute group is created with sysfs_create_group(), the
->sysfs_ops() callback is set to kobj_sysfs_ops, which sets the ->show()
and ->store() callbacks to kobj_attr_show() and kobj_attr_store()
respectively. These functions use container_of() to get the respective
callback from the passed attribute, meaning that these callbacks need to
be the same type as the callbacks in 'struct kobj_attribute'.
However, the platform_profile sysfs functions have the type of the
->show() and ->store() callbacks in 'struct device_attribute', which
results a CFI violation when accessing platform_profile or
platform_profile_choices under /sys/firmware/acpi because the types do
not match:
CFI failure at kobj_attr_show+0x19/0x30 (target: platform_profile_choices_show+0x0/0x140; expected type: 0x7a69590c)
There is no functional issue from the type mismatch because the layout
of 'struct kobj_attribute' and 'struct device_attribute' are the same,
so the container_of() cast does not break anything aside from CFI.
Change the type of platform_profile_choices_show() and
platform_profile_{show,store}() to match the callbacks in
'struct kobj_attribute' and update the attribute variables to match,
which resolves the CFI violation.
Cc: stable(a)vger.kernel.org
Fixes: a2ff95e018f1 ("ACPI: platform: Add platform profile support")
Reported-by: John Rowley <lkml(a)johnrowley.me>
Closes: https://github.com/ClangBuiltLinux/linux/issues/2047
Tested-by: John Rowley <lkml(a)johnrowley.me>
Reviewed-by: Sami Tolvanen <samitolvanen(a)google.com>
Signed-off-by: Nathan Chancellor <nathan(a)kernel.org>
---
Changes in v3:
- Rebase on 6.14-rc1, which includes updates to the driver to address
Greg's previous concerns but this change is still needed for the
legacy sysfs interface. v2 can be used for the stable backport.
- Link to v2: https://lore.kernel.org/r/20241118-acpi-platform_profile-fix-cfi-violation-…
Changes in v2:
- Rebase on linux-pm/acpi
- Pick up Sami's reviewed-by tag
- Adjust wording around why there is no functional issue from the
mismatched types
- Link to v1: https://lore.kernel.org/r/20240819-acpi-platform_profile-fix-cfi-violation-…
---
drivers/acpi/platform_profile.c | 26 +++++++++++++-------------
1 file changed, 13 insertions(+), 13 deletions(-)
diff --git a/drivers/acpi/platform_profile.c b/drivers/acpi/platform_profile.c
index fc92e43d0fe9..1b6317f759f9 100644
--- a/drivers/acpi/platform_profile.c
+++ b/drivers/acpi/platform_profile.c
@@ -260,14 +260,14 @@ static int _aggregate_choices(struct device *dev, void *data)
/**
* platform_profile_choices_show - Show the available profile choices for legacy sysfs interface
- * @dev: The device
+ * @kobj: The kobject
* @attr: The attribute
* @buf: The buffer to write to
*
* Return: The number of bytes written
*/
-static ssize_t platform_profile_choices_show(struct device *dev,
- struct device_attribute *attr,
+static ssize_t platform_profile_choices_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
char *buf)
{
unsigned long aggregate[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)];
@@ -333,14 +333,14 @@ static int _store_and_notify(struct device *dev, void *data)
/**
* platform_profile_show - Show the current profile for legacy sysfs interface
- * @dev: The device
+ * @kobj: The kobject
* @attr: The attribute
* @buf: The buffer to write to
*
* Return: The number of bytes written
*/
-static ssize_t platform_profile_show(struct device *dev,
- struct device_attribute *attr,
+static ssize_t platform_profile_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
char *buf)
{
enum platform_profile_option profile = PLATFORM_PROFILE_LAST;
@@ -362,15 +362,15 @@ static ssize_t platform_profile_show(struct device *dev,
/**
* platform_profile_store - Set the profile for legacy sysfs interface
- * @dev: The device
+ * @kobj: The kobject
* @attr: The attribute
* @buf: The buffer to read from
* @count: The number of bytes to read
*
* Return: The number of bytes read
*/
-static ssize_t platform_profile_store(struct device *dev,
- struct device_attribute *attr,
+static ssize_t platform_profile_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
const char *buf, size_t count)
{
unsigned long choices[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)];
@@ -401,12 +401,12 @@ static ssize_t platform_profile_store(struct device *dev,
return count;
}
-static DEVICE_ATTR_RO(platform_profile_choices);
-static DEVICE_ATTR_RW(platform_profile);
+static struct kobj_attribute attr_platform_profile_choices = __ATTR_RO(platform_profile_choices);
+static struct kobj_attribute attr_platform_profile = __ATTR_RW(platform_profile);
static struct attribute *platform_profile_attrs[] = {
- &dev_attr_platform_profile_choices.attr,
- &dev_attr_platform_profile.attr,
+ &attr_platform_profile_choices.attr,
+ &attr_platform_profile.attr,
NULL
};
---
base-commit: 2014c95afecee3e76ca4a56956a936e23283f05b
change-id: 20240819-acpi-platform_profile-fix-cfi-violation-de278753bd5f
Best regards,
--
Nathan Chancellor <nathan(a)kernel.org>
On 18.02.25 13:36, Sasha Levin wrote:
> This is a note to let you know that I've just added the patch titled
>
> s390/qeth: move netif_napi_add_tx() and napi_enable() from under BH
>
> to the 6.13-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> s390-qeth-move-netif_napi_add_tx-and-napi_enable-fro.patch
> and it can be found in the queue-6.13 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
>
Hello Sasha,
this is a fix for a regression that was introduced with v6.14-rc1.
So I do not think it needs to go into 6.13 stable tree.
But it does not hurt either.
>
>
> commit 48eda8093b86b426078bd245a9b4fbc5d057c436
> Author: Alexandra Winter <wintera(a)linux.ibm.com>
> Date: Wed Feb 12 17:36:59 2025 +0100
>
> s390/qeth: move netif_napi_add_tx() and napi_enable() from under BH
>
> [ Upstream commit 0d0b752f2497471ddd2b32143d167d42e18a8f3c ]
>
> Like other drivers qeth is calling local_bh_enable() after napi_schedule()
> to kick-start softirqs [0].
> Since netif_napi_add_tx() and napi_enable() now take the netdev_lock()
> mutex [1], move them out from under the BH protection. Same solution as in
> commit a60558644e20 ("wifi: mt76: move napi_enable() from under BH")
>
> Fixes: 1b23cdbd2bbc ("net: protect netdev->napi_list with netdev_lock()")
> Link: https://lore.kernel.org/netdev/20240612181900.4d9d18d0@kernel.org/ [0]
> Link: https://lore.kernel.org/netdev/20250115035319.559603-1-kuba@kernel.org/ [1]
> Signed-off-by: Alexandra Winter <wintera(a)linux.ibm.com>
> Acked-by: Joe Damato <jdamato(a)fastly.com>
> Link: https://patch.msgid.link/20250212163659.2287292-1-wintera@linux.ibm.com
> Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
> index a3adaec5504e4..20328d695ef92 100644
> --- a/drivers/s390/net/qeth_core_main.c
> +++ b/drivers/s390/net/qeth_core_main.c
> @@ -7050,14 +7050,16 @@ int qeth_open(struct net_device *dev)
> card->data.state = CH_STATE_UP;
> netif_tx_start_all_queues(dev);
>
> - local_bh_disable();
> qeth_for_each_output_queue(card, queue, i) {
> netif_napi_add_tx(dev, &queue->napi, qeth_tx_poll);
> napi_enable(&queue->napi);
> - napi_schedule(&queue->napi);
> }
> -
> napi_enable(&card->napi);
> +
> + local_bh_disable();
> + qeth_for_each_output_queue(card, queue, i) {
> + napi_schedule(&queue->napi);
> + }
> napi_schedule(&card->napi);
> /* kick-start the NAPI softirq: */
> local_bh_enable();
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 011b0335903832facca86cd8ed05d7d8d94c9c76
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021859-renewal-onto-1877@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 011b0335903832facca86cd8ed05d7d8d94c9c76 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni(a)redhat.com>
Date: Thu, 6 Feb 2025 22:28:48 +0100
Subject: [PATCH] Revert "net: skb: introduce and use a single page frag cache"
This reverts commit dbae2b062824 ("net: skb: introduce and use a single
page frag cache"). The intended goal of such change was to counter a
performance regression introduced by commit 3226b158e67c ("net: avoid
32 x truesize under-estimation for tiny skbs").
Unfortunately, the blamed commit introduces another regression for the
virtio_net driver. Such a driver calls napi_alloc_skb() with a tiny
size, so that the whole head frag could fit a 512-byte block.
The single page frag cache uses a 1K fragment for such allocation, and
the additional overhead, under small UDP packets flood, makes the page
allocator a bottleneck.
Thanks to commit bf9f1baa279f ("net: add dedicated kmem_cache for
typical/small skb->head"), this revert does not re-introduce the
original regression. Actually, in the relevant test on top of this
revert, I measure a small but noticeable positive delta, just above
noise level.
The revert itself required some additional mangling due to the
introduction of the SKB_HEAD_ALIGN() helper and local lock infra in the
affected code.
Suggested-by: Eric Dumazet <edumazet(a)google.com>
Fixes: dbae2b062824 ("net: skb: introduce and use a single page frag cache")
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Link: https://patch.msgid.link/e649212fde9f0fdee23909ca0d14158d32bb7425.173887729…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c0a86afb85da..365f0e2098d1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4115,7 +4115,6 @@ void netif_receive_skb_list(struct list_head *head);
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
void napi_gro_flush(struct napi_struct *napi, bool flush_old);
struct sk_buff *napi_get_frags(struct napi_struct *napi);
-void napi_get_frags_check(struct napi_struct *napi);
gro_result_t napi_gro_frags(struct napi_struct *napi);
static inline void napi_free_frags(struct napi_struct *napi)
diff --git a/net/core/dev.c b/net/core/dev.c
index b91658e8aedb..55e356a68db6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6920,6 +6920,23 @@ netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi)
list_add_rcu(&napi->dev_list, higher); /* adds after higher */
}
+/* Double check that napi_get_frags() allocates skbs with
+ * skb->head being backed by slab, not a page fragment.
+ * This is to make sure bug fixed in 3226b158e67c
+ * ("net: avoid 32 x truesize under-estimation for tiny skbs")
+ * does not accidentally come back.
+ */
+static void napi_get_frags_check(struct napi_struct *napi)
+{
+ struct sk_buff *skb;
+
+ local_bh_disable();
+ skb = napi_get_frags(napi);
+ WARN_ON_ONCE(skb && skb->head_frag);
+ napi_free_frags(napi);
+ local_bh_enable();
+}
+
void netif_napi_add_weight_locked(struct net_device *dev,
struct napi_struct *napi,
int (*poll)(struct napi_struct *, int),
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a441613a1e6c..6a99c453397f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -220,67 +220,9 @@ static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
#define NAPI_SKB_CACHE_BULK 16
#define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2)
-#if PAGE_SIZE == SZ_4K
-
-#define NAPI_HAS_SMALL_PAGE_FRAG 1
-#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) ((nc).pfmemalloc)
-
-/* specialized page frag allocator using a single order 0 page
- * and slicing it into 1K sized fragment. Constrained to systems
- * with a very limited amount of 1K fragments fitting a single
- * page - to avoid excessive truesize underestimation
- */
-
-struct page_frag_1k {
- void *va;
- u16 offset;
- bool pfmemalloc;
-};
-
-static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp)
-{
- struct page *page;
- int offset;
-
- offset = nc->offset - SZ_1K;
- if (likely(offset >= 0))
- goto use_frag;
-
- page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
- if (!page)
- return NULL;
-
- nc->va = page_address(page);
- nc->pfmemalloc = page_is_pfmemalloc(page);
- offset = PAGE_SIZE - SZ_1K;
- page_ref_add(page, offset / SZ_1K);
-
-use_frag:
- nc->offset = offset;
- return nc->va + offset;
-}
-#else
-
-/* the small page is actually unused in this build; add dummy helpers
- * to please the compiler and avoid later preprocessor's conditionals
- */
-#define NAPI_HAS_SMALL_PAGE_FRAG 0
-#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) false
-
-struct page_frag_1k {
-};
-
-static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp_mask)
-{
- return NULL;
-}
-
-#endif
-
struct napi_alloc_cache {
local_lock_t bh_lock;
struct page_frag_cache page;
- struct page_frag_1k page_small;
unsigned int skb_count;
void *skb_cache[NAPI_SKB_CACHE_SIZE];
};
@@ -290,23 +232,6 @@ static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache) = {
.bh_lock = INIT_LOCAL_LOCK(bh_lock),
};
-/* Double check that napi_get_frags() allocates skbs with
- * skb->head being backed by slab, not a page fragment.
- * This is to make sure bug fixed in 3226b158e67c
- * ("net: avoid 32 x truesize under-estimation for tiny skbs")
- * does not accidentally come back.
- */
-void napi_get_frags_check(struct napi_struct *napi)
-{
- struct sk_buff *skb;
-
- local_bh_disable();
- skb = napi_get_frags(napi);
- WARN_ON_ONCE(!NAPI_HAS_SMALL_PAGE_FRAG && skb && skb->head_frag);
- napi_free_frags(napi);
- local_bh_enable();
-}
-
void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
{
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
@@ -813,10 +738,8 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
/* If requested length is either too small or too big,
* we use kmalloc() for skb->head allocation.
- * When the small frag allocator is available, prefer it over kmalloc
- * for small fragments
*/
- if ((!NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) ||
+ if (len <= SKB_WITH_OVERHEAD(1024) ||
len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
(gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI,
@@ -826,32 +749,16 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
goto skb_success;
}
+ len = SKB_HEAD_ALIGN(len);
+
if (sk_memalloc_socks())
gfp_mask |= __GFP_MEMALLOC;
local_lock_nested_bh(&napi_alloc_cache.bh_lock);
nc = this_cpu_ptr(&napi_alloc_cache);
- if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) {
- /* we are artificially inflating the allocation size, but
- * that is not as bad as it may look like, as:
- * - 'len' less than GRO_MAX_HEAD makes little sense
- * - On most systems, larger 'len' values lead to fragment
- * size above 512 bytes
- * - kmalloc would use the kmalloc-1k slab for such values
- * - Builds with smaller GRO_MAX_HEAD will very likely do
- * little networking, as that implies no WiFi and no
- * tunnels support, and 32 bits arches.
- */
- len = SZ_1K;
- data = page_frag_alloc_1k(&nc->page_small, gfp_mask);
- pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC(nc->page_small);
- } else {
- len = SKB_HEAD_ALIGN(len);
-
- data = page_frag_alloc(&nc->page, len, gfp_mask);
- pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
- }
+ data = page_frag_alloc(&nc->page, len, gfp_mask);
+ pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
if (unlikely(!data))
From: Srinivasan Shanmugam <srinivasan.shanmugam(a)amd.com>
[ Upstream commit f22f4754aaa47d8c59f166ba3042182859e5dff7 ]
This commit addresses a potential null pointer dereference issue in the
`dcn201_acquire_free_pipe_for_layer` function. The issue could occur
when `head_pipe` is null.
The fix adds a check to ensure `head_pipe` is not null before asserting
it. If `head_pipe` is null, the function returns NULL to prevent a
potential null pointer dereference.
Reported by smatch:
drivers/gpu/drm/amd/amdgpu/../display/dc/resource/dcn201/dcn201_resource.c:1016 dcn201_acquire_free_pipe_for_layer() error: we previously assumed 'head_pipe' could be null (see line 1010)
Cc: Tom Chung <chiahsuan.chung(a)amd.com>
Cc: Rodrigo Siqueira <Rodrigo.Siqueira(a)amd.com>
Cc: Roman Li <roman.li(a)amd.com>
Cc: Alex Hung <alex.hung(a)amd.com>
Cc: Aurabindo Pillai <aurabindo.pillai(a)amd.com>
Cc: Harry Wentland <harry.wentland(a)amd.com>
Cc: Hamza Mahfooz <hamza.mahfooz(a)amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam(a)amd.com>
Reviewed-by: Tom Chung <chiahsuan.chung(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
[dcn201 was moved from drivers/gpu/drm/amd/display/dc to
drivers/gpu/drm/amd/display/dc/resource since
8b8eed05a1c6 ("drm/amd/display: Refactor resource into component directory").
The path is changed accordingly to apply the patch on 6.6.y.]
Signed-off-by: Xiangyu Chen <xiangyu.chen(a)windriver.com>
Signed-off-by: He Zhe <zhe.he(a)windriver.com>
---
Verified the build test only due to we don't have DCN201 device.
---
drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c
index 2dc4d2c1410b..8efe3f32a0e7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c
@@ -1002,8 +1002,10 @@ static struct pipe_ctx *dcn201_acquire_free_pipe_for_layer(
struct pipe_ctx *head_pipe = resource_get_otg_master_for_stream(res_ctx, opp_head_pipe->stream);
struct pipe_ctx *idle_pipe = resource_find_free_secondary_pipe_legacy(res_ctx, pool, head_pipe);
- if (!head_pipe)
+ if (!head_pipe) {
ASSERT(0);
+ return NULL;
+ }
if (!idle_pipe)
return NULL;
--
2.25.1
On Tue, Feb 18, 2025 at 01:55:45PM +0100, gregkh(a)linuxfoundation.org wrote:
>
> This is a note to let you know that I've just added the patch titled
>
> alpha: make stack 16-byte aligned (most cases)
Hi Greg, thanks for applying this!
> Patches currently in stable-queue which might be from ink(a)unseen.parts are
>
> queue-6.6/alpha-make-stack-16-byte-aligned-most-cases.patch
> queue-6.6/alpha-align-stack-for-page-fault-and-user-unaligned-trap-handlers.patch
The third one (commit 77b823fa619f97d alpha: replace hardcoded stack offsets
with autogenerated ones) is also needed, but it won't apply as-is to 6.6
and older kernels.
Do you want me to provide the patches?
Ivan.
Please backport
42602e3a06f8e5b9a059344e305c9bee2dcc87c8
bpf: handle implicit declaration of function gettid in bpf_iter.c
and
4b7c05598a644782b8451e415bb56f31e5c9d3ee
selftests/bpf: Fix uprobe consumer test
to 6.12.y to fix BPF selftest-related issues (compilation failure and
test failure respectively). Both apply to linux-6.12.y cleanly.
Thank you!
Alan
Greg,
please add the following upstream commits to the stable branches:
5cc2db37124bb33914996d6fdbb2ddb3811f2945
98a5cfd2320966f40fe049a9855f8787f0126825
0bd797b801bd8ee06c822844e20d73aaea0878dd
They are fixing boot failures when running as a Xen PVH guest for
some kernel configurations.
Thanks,
Juergen
Hi,
May I request this be ported to stable kernels?
6d3e0d8cc632 ("kdb: Do not assume write() callback available")
It landed in v6.6, and I've tested the trivial cherry-pick to v6.1.y.
AFAICT, it makes sense and applies cleanly to at least v5.15.y and
v5.10.y (and possibly more), although I didn't test those.
It's a bit of an older (but trivial) fix, and I assume not many folks
actually test out KDB/KGDB that often, but I wasted my time
re-discovering it.
Thanks,
Brian
The patch below does not apply to the 6.13-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.13.y
git checkout FETCH_HEAD
git cherry-pick -x 011b0335903832facca86cd8ed05d7d8d94c9c76
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021858-cobalt-scanner-666f@gregkh' --subject-prefix 'PATCH 6.13.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 011b0335903832facca86cd8ed05d7d8d94c9c76 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni(a)redhat.com>
Date: Thu, 6 Feb 2025 22:28:48 +0100
Subject: [PATCH] Revert "net: skb: introduce and use a single page frag cache"
This reverts commit dbae2b062824 ("net: skb: introduce and use a single
page frag cache"). The intended goal of such change was to counter a
performance regression introduced by commit 3226b158e67c ("net: avoid
32 x truesize under-estimation for tiny skbs").
Unfortunately, the blamed commit introduces another regression for the
virtio_net driver. Such a driver calls napi_alloc_skb() with a tiny
size, so that the whole head frag could fit a 512-byte block.
The single page frag cache uses a 1K fragment for such allocation, and
the additional overhead, under small UDP packets flood, makes the page
allocator a bottleneck.
Thanks to commit bf9f1baa279f ("net: add dedicated kmem_cache for
typical/small skb->head"), this revert does not re-introduce the
original regression. Actually, in the relevant test on top of this
revert, I measure a small but noticeable positive delta, just above
noise level.
The revert itself required some additional mangling due to the
introduction of the SKB_HEAD_ALIGN() helper and local lock infra in the
affected code.
Suggested-by: Eric Dumazet <edumazet(a)google.com>
Fixes: dbae2b062824 ("net: skb: introduce and use a single page frag cache")
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Link: https://patch.msgid.link/e649212fde9f0fdee23909ca0d14158d32bb7425.173887729…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c0a86afb85da..365f0e2098d1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4115,7 +4115,6 @@ void netif_receive_skb_list(struct list_head *head);
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
void napi_gro_flush(struct napi_struct *napi, bool flush_old);
struct sk_buff *napi_get_frags(struct napi_struct *napi);
-void napi_get_frags_check(struct napi_struct *napi);
gro_result_t napi_gro_frags(struct napi_struct *napi);
static inline void napi_free_frags(struct napi_struct *napi)
diff --git a/net/core/dev.c b/net/core/dev.c
index b91658e8aedb..55e356a68db6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6920,6 +6920,23 @@ netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi)
list_add_rcu(&napi->dev_list, higher); /* adds after higher */
}
+/* Double check that napi_get_frags() allocates skbs with
+ * skb->head being backed by slab, not a page fragment.
+ * This is to make sure bug fixed in 3226b158e67c
+ * ("net: avoid 32 x truesize under-estimation for tiny skbs")
+ * does not accidentally come back.
+ */
+static void napi_get_frags_check(struct napi_struct *napi)
+{
+ struct sk_buff *skb;
+
+ local_bh_disable();
+ skb = napi_get_frags(napi);
+ WARN_ON_ONCE(skb && skb->head_frag);
+ napi_free_frags(napi);
+ local_bh_enable();
+}
+
void netif_napi_add_weight_locked(struct net_device *dev,
struct napi_struct *napi,
int (*poll)(struct napi_struct *, int),
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a441613a1e6c..6a99c453397f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -220,67 +220,9 @@ static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
#define NAPI_SKB_CACHE_BULK 16
#define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2)
-#if PAGE_SIZE == SZ_4K
-
-#define NAPI_HAS_SMALL_PAGE_FRAG 1
-#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) ((nc).pfmemalloc)
-
-/* specialized page frag allocator using a single order 0 page
- * and slicing it into 1K sized fragment. Constrained to systems
- * with a very limited amount of 1K fragments fitting a single
- * page - to avoid excessive truesize underestimation
- */
-
-struct page_frag_1k {
- void *va;
- u16 offset;
- bool pfmemalloc;
-};
-
-static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp)
-{
- struct page *page;
- int offset;
-
- offset = nc->offset - SZ_1K;
- if (likely(offset >= 0))
- goto use_frag;
-
- page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
- if (!page)
- return NULL;
-
- nc->va = page_address(page);
- nc->pfmemalloc = page_is_pfmemalloc(page);
- offset = PAGE_SIZE - SZ_1K;
- page_ref_add(page, offset / SZ_1K);
-
-use_frag:
- nc->offset = offset;
- return nc->va + offset;
-}
-#else
-
-/* the small page is actually unused in this build; add dummy helpers
- * to please the compiler and avoid later preprocessor's conditionals
- */
-#define NAPI_HAS_SMALL_PAGE_FRAG 0
-#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) false
-
-struct page_frag_1k {
-};
-
-static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp_mask)
-{
- return NULL;
-}
-
-#endif
-
struct napi_alloc_cache {
local_lock_t bh_lock;
struct page_frag_cache page;
- struct page_frag_1k page_small;
unsigned int skb_count;
void *skb_cache[NAPI_SKB_CACHE_SIZE];
};
@@ -290,23 +232,6 @@ static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache) = {
.bh_lock = INIT_LOCAL_LOCK(bh_lock),
};
-/* Double check that napi_get_frags() allocates skbs with
- * skb->head being backed by slab, not a page fragment.
- * This is to make sure bug fixed in 3226b158e67c
- * ("net: avoid 32 x truesize under-estimation for tiny skbs")
- * does not accidentally come back.
- */
-void napi_get_frags_check(struct napi_struct *napi)
-{
- struct sk_buff *skb;
-
- local_bh_disable();
- skb = napi_get_frags(napi);
- WARN_ON_ONCE(!NAPI_HAS_SMALL_PAGE_FRAG && skb && skb->head_frag);
- napi_free_frags(napi);
- local_bh_enable();
-}
-
void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
{
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
@@ -813,10 +738,8 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
/* If requested length is either too small or too big,
* we use kmalloc() for skb->head allocation.
- * When the small frag allocator is available, prefer it over kmalloc
- * for small fragments
*/
- if ((!NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) ||
+ if (len <= SKB_WITH_OVERHEAD(1024) ||
len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
(gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI,
@@ -826,32 +749,16 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
goto skb_success;
}
+ len = SKB_HEAD_ALIGN(len);
+
if (sk_memalloc_socks())
gfp_mask |= __GFP_MEMALLOC;
local_lock_nested_bh(&napi_alloc_cache.bh_lock);
nc = this_cpu_ptr(&napi_alloc_cache);
- if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) {
- /* we are artificially inflating the allocation size, but
- * that is not as bad as it may look like, as:
- * - 'len' less than GRO_MAX_HEAD makes little sense
- * - On most systems, larger 'len' values lead to fragment
- * size above 512 bytes
- * - kmalloc would use the kmalloc-1k slab for such values
- * - Builds with smaller GRO_MAX_HEAD will very likely do
- * little networking, as that implies no WiFi and no
- * tunnels support, and 32 bits arches.
- */
- len = SZ_1K;
- data = page_frag_alloc_1k(&nc->page_small, gfp_mask);
- pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC(nc->page_small);
- } else {
- len = SKB_HEAD_ALIGN(len);
-
- data = page_frag_alloc(&nc->page, len, gfp_mask);
- pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
- }
+ data = page_frag_alloc(&nc->page, len, gfp_mask);
+ pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
if (unlikely(!data))
Currently on book3s-hv, the capability KVM_CAP_SPAPR_TCE_VFIO is only
available for KVM Guests running on PowerNV and not for the KVM guests
running on pSeries hypervisors. This prevents a pSeries L2 guest from
leveraging the in-kernel acceleration for H_PUT_TCE_INDIRECT and
H_STUFF_TCE hcalls that results in slow startup times for large memory
guests.
Fix this by enabling the CAP_SPAPR_TCE_VFIO on the pSeries hosts as well
for the nested PAPR guests. With the patch, booting an L2 guest with
128G memory results in an average improvement of 11% in the startup
times.
Fixes: f431a8cde7f1 ("powerpc/iommu: Reimplement the iommu_table_group_ops for pSeries")
Cc: stable(a)vger.kernel.org
Signed-off-by: Amit Machhiwal <amachhiw(a)linux.ibm.com>
---
Changes since v1:
* Addressed review comments from Ritesh
* v1: https://lore.kernel.org/all/20250109132053.158436-1-amachhiw@linux.ibm.com/
arch/powerpc/kvm/powerpc.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index ce1d91eed231..a7138eb18d59 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -543,26 +543,23 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = !hv_enabled;
break;
#ifdef CONFIG_KVM_MPIC
case KVM_CAP_IRQ_MPIC:
r = 1;
break;
#endif
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CAP_SPAPR_TCE:
+ fallthrough;
case KVM_CAP_SPAPR_TCE_64:
- r = 1;
- break;
case KVM_CAP_SPAPR_TCE_VFIO:
- r = !!cpu_has_feature(CPU_FTR_HVMODE);
- break;
case KVM_CAP_PPC_RTAS:
case KVM_CAP_PPC_FIXUP_HCALL:
case KVM_CAP_PPC_ENABLE_HCALL:
#ifdef CONFIG_KVM_XICS
case KVM_CAP_IRQ_XICS:
#endif
case KVM_CAP_PPC_GET_CPU_CHAR:
r = 1;
break;
#ifdef CONFIG_KVM_XIVE
base-commit: 6d61a53dd6f55405ebcaea6ee38d1ab5a8856c2c
--
2.48.1
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 6389e616fae8a101ce00068f7690461ab57b29d8
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021837-entree-estrogen-6751@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6389e616fae8a101ce00068f7690461ab57b29d8 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen+renesas(a)ideasonboard.com>
Date: Tue, 17 Dec 2024 07:31:35 +0200
Subject: [PATCH] drm/rcar-du: dsi: Fix PHY lock bit check
The driver checks for bit 16 (using CLOCKSET1_LOCK define) in CLOCKSET1
register when waiting for the PPI clock. However, the right bit to check
is bit 17 (CLOCKSET1_LOCK_PHY define). Not only that, but there's
nothing in the documents for bit 16 for V3U nor V4H.
So, fix the check to use bit 17, and drop the define for bit 16.
Fixes: 155358310f01 ("drm: rcar-du: Add R-Car DSI driver")
Fixes: 11696c5e8924 ("drm: Place Renesas drivers in a separate dir")
Cc: stable(a)vger.kernel.org
Signed-off-by: Tomi Valkeinen <tomi.valkeinen+renesas(a)ideasonboard.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart+renesas(a)ideasonboard.com>
Tested-by: Geert Uytterhoeven <geert+renesas(a)glider.be>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen(a)ideasonboard.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241217-rcar-gh-dsi-v5-1-e77…
diff --git a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c
index 8180625d5866..be4ffc0ab14f 100644
--- a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c
+++ b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c
@@ -587,7 +587,7 @@ static int rcar_mipi_dsi_startup(struct rcar_mipi_dsi *dsi,
for (timeout = 10; timeout > 0; --timeout) {
if ((rcar_mipi_dsi_read(dsi, PPICLSR) & PPICLSR_STPST) &&
(rcar_mipi_dsi_read(dsi, PPIDLSR) & PPIDLSR_STPST) &&
- (rcar_mipi_dsi_read(dsi, CLOCKSET1) & CLOCKSET1_LOCK))
+ (rcar_mipi_dsi_read(dsi, CLOCKSET1) & CLOCKSET1_LOCK_PHY))
break;
usleep_range(1000, 2000);
diff --git a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h
index f8114d11f2d1..a6b276f1d6ee 100644
--- a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h
+++ b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h
@@ -142,7 +142,6 @@
#define CLOCKSET1 0x101c
#define CLOCKSET1_LOCK_PHY (1 << 17)
-#define CLOCKSET1_LOCK (1 << 16)
#define CLOCKSET1_CLKSEL (1 << 8)
#define CLOCKSET1_CLKINSEL_EXTAL (0 << 2)
#define CLOCKSET1_CLKINSEL_DIG (1 << 2)
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 3a47f4b439beb98e955d501c609dfd12b7836d61
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021816-flail-manger-7c9a@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3a47f4b439beb98e955d501c609dfd12b7836d61 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter(a)linaro.org>
Date: Fri, 15 Nov 2024 17:50:08 +0300
Subject: [PATCH] drm/msm/gem: prevent integer overflow in
msm_ioctl_gem_submit()
The "submit->cmd[i].size" and "submit->cmd[i].offset" variables are u32
values that come from the user via the submit_lookup_cmds() function.
This addition could lead to an integer wrapping bug so use size_add()
to prevent that.
Fixes: 198725337ef1 ("drm/msm: fix cmdstream size check")
Cc: stable(a)vger.kernel.org
Signed-off-by: Dan Carpenter <dan.carpenter(a)linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/624696/
Signed-off-by: Rob Clark <robdclark(a)chromium.org>
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index fba78193127d..f775638d239a 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -787,8 +787,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
goto out;
if (!submit->cmd[i].size ||
- ((submit->cmd[i].size + submit->cmd[i].offset) >
- obj->size / 4)) {
+ (size_add(submit->cmd[i].size, submit->cmd[i].offset) > obj->size / 4)) {
SUBMIT_ERROR(submit, "invalid cmdstream size: %u\n", submit->cmd[i].size * 4);
ret = -EINVAL;
goto out;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 3a47f4b439beb98e955d501c609dfd12b7836d61
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021814-rhyme-unimpeded-1604@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3a47f4b439beb98e955d501c609dfd12b7836d61 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter(a)linaro.org>
Date: Fri, 15 Nov 2024 17:50:08 +0300
Subject: [PATCH] drm/msm/gem: prevent integer overflow in
msm_ioctl_gem_submit()
The "submit->cmd[i].size" and "submit->cmd[i].offset" variables are u32
values that come from the user via the submit_lookup_cmds() function.
This addition could lead to an integer wrapping bug so use size_add()
to prevent that.
Fixes: 198725337ef1 ("drm/msm: fix cmdstream size check")
Cc: stable(a)vger.kernel.org
Signed-off-by: Dan Carpenter <dan.carpenter(a)linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/624696/
Signed-off-by: Rob Clark <robdclark(a)chromium.org>
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index fba78193127d..f775638d239a 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -787,8 +787,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
goto out;
if (!submit->cmd[i].size ||
- ((submit->cmd[i].size + submit->cmd[i].offset) >
- obj->size / 4)) {
+ (size_add(submit->cmd[i].size, submit->cmd[i].offset) > obj->size / 4)) {
SUBMIT_ERROR(submit, "invalid cmdstream size: %u\n", submit->cmd[i].size * 4);
ret = -EINVAL;
goto out;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 3a47f4b439beb98e955d501c609dfd12b7836d61
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021813-scorebook-fountain-acda@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3a47f4b439beb98e955d501c609dfd12b7836d61 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter(a)linaro.org>
Date: Fri, 15 Nov 2024 17:50:08 +0300
Subject: [PATCH] drm/msm/gem: prevent integer overflow in
msm_ioctl_gem_submit()
The "submit->cmd[i].size" and "submit->cmd[i].offset" variables are u32
values that come from the user via the submit_lookup_cmds() function.
This addition could lead to an integer wrapping bug so use size_add()
to prevent that.
Fixes: 198725337ef1 ("drm/msm: fix cmdstream size check")
Cc: stable(a)vger.kernel.org
Signed-off-by: Dan Carpenter <dan.carpenter(a)linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/624696/
Signed-off-by: Rob Clark <robdclark(a)chromium.org>
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index fba78193127d..f775638d239a 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -787,8 +787,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
goto out;
if (!submit->cmd[i].size ||
- ((submit->cmd[i].size + submit->cmd[i].offset) >
- obj->size / 4)) {
+ (size_add(submit->cmd[i].size, submit->cmd[i].offset) > obj->size / 4)) {
SUBMIT_ERROR(submit, "invalid cmdstream size: %u\n", submit->cmd[i].size * 4);
ret = -EINVAL;
goto out;
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 3a47f4b439beb98e955d501c609dfd12b7836d61
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021811-coauthor-gosling-ff49@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3a47f4b439beb98e955d501c609dfd12b7836d61 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter(a)linaro.org>
Date: Fri, 15 Nov 2024 17:50:08 +0300
Subject: [PATCH] drm/msm/gem: prevent integer overflow in
msm_ioctl_gem_submit()
The "submit->cmd[i].size" and "submit->cmd[i].offset" variables are u32
values that come from the user via the submit_lookup_cmds() function.
This addition could lead to an integer wrapping bug so use size_add()
to prevent that.
Fixes: 198725337ef1 ("drm/msm: fix cmdstream size check")
Cc: stable(a)vger.kernel.org
Signed-off-by: Dan Carpenter <dan.carpenter(a)linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/624696/
Signed-off-by: Rob Clark <robdclark(a)chromium.org>
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index fba78193127d..f775638d239a 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -787,8 +787,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
goto out;
if (!submit->cmd[i].size ||
- ((submit->cmd[i].size + submit->cmd[i].offset) >
- obj->size / 4)) {
+ (size_add(submit->cmd[i].size, submit->cmd[i].offset) > obj->size / 4)) {
SUBMIT_ERROR(submit, "invalid cmdstream size: %u\n", submit->cmd[i].size * 4);
ret = -EINVAL;
goto out;
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 3a47f4b439beb98e955d501c609dfd12b7836d61
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021809-sixtyfold-showroom-5f4e@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3a47f4b439beb98e955d501c609dfd12b7836d61 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter(a)linaro.org>
Date: Fri, 15 Nov 2024 17:50:08 +0300
Subject: [PATCH] drm/msm/gem: prevent integer overflow in
msm_ioctl_gem_submit()
The "submit->cmd[i].size" and "submit->cmd[i].offset" variables are u32
values that come from the user via the submit_lookup_cmds() function.
This addition could lead to an integer wrapping bug so use size_add()
to prevent that.
Fixes: 198725337ef1 ("drm/msm: fix cmdstream size check")
Cc: stable(a)vger.kernel.org
Signed-off-by: Dan Carpenter <dan.carpenter(a)linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/624696/
Signed-off-by: Rob Clark <robdclark(a)chromium.org>
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index fba78193127d..f775638d239a 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -787,8 +787,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
goto out;
if (!submit->cmd[i].size ||
- ((submit->cmd[i].size + submit->cmd[i].offset) >
- obj->size / 4)) {
+ (size_add(submit->cmd[i].size, submit->cmd[i].offset) > obj->size / 4)) {
SUBMIT_ERROR(submit, "invalid cmdstream size: %u\n", submit->cmd[i].size * 4);
ret = -EINVAL;
goto out;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x a9a73f2661e6f625d306c9b0ef082e4593f45a21
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021816-rotten-viral-2615@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a9a73f2661e6f625d306c9b0ef082e4593f45a21 Mon Sep 17 00:00:00 2001
From: Devarsh Thakkar <devarsht(a)ti.com>
Date: Mon, 21 Oct 2024 17:07:50 +0300
Subject: [PATCH] drm/tidss: Fix race condition while handling interrupt
registers
The driver has a spinlock for protecting the irq_masks field and irq
enable registers. However, the driver misses protecting the irq status
registers which can lead to races.
Take the spinlock when accessing irqstatus too.
Fixes: 32a1795f57ee ("drm/tidss: New driver for TI Keystone platform Display SubSystem")
Cc: stable(a)vger.kernel.org
Signed-off-by: Devarsh Thakkar <devarsht(a)ti.com>
[Tomi: updated the desc]
Reviewed-by: Jonathan Cormier <jcormier(a)criticallink.com>
Tested-by: Jonathan Cormier <jcormier(a)criticallink.com>
Reviewed-by: Aradhya Bhatia <aradhya.bhatia(a)linux.dev>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen(a)ideasonboard.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241021-tidss-irq-fix-v1-6-8…
diff --git a/drivers/gpu/drm/tidss/tidss_dispc.c b/drivers/gpu/drm/tidss/tidss_dispc.c
index 515f82e8a0a5..07f5c26cfa26 100644
--- a/drivers/gpu/drm/tidss/tidss_dispc.c
+++ b/drivers/gpu/drm/tidss/tidss_dispc.c
@@ -2767,8 +2767,12 @@ static void dispc_init_errata(struct dispc_device *dispc)
*/
static void dispc_softreset_k2g(struct dispc_device *dispc)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dispc->tidss->wait_lock, flags);
dispc_set_irqenable(dispc, 0);
dispc_read_and_clear_irqstatus(dispc);
+ spin_unlock_irqrestore(&dispc->tidss->wait_lock, flags);
for (unsigned int vp_idx = 0; vp_idx < dispc->feat->num_vps; ++vp_idx)
VP_REG_FLD_MOD(dispc, vp_idx, DISPC_VP_CONTROL, 0, 0, 0);
diff --git a/drivers/gpu/drm/tidss/tidss_irq.c b/drivers/gpu/drm/tidss/tidss_irq.c
index 3cc4024ec7ff..8af4682ba56b 100644
--- a/drivers/gpu/drm/tidss/tidss_irq.c
+++ b/drivers/gpu/drm/tidss/tidss_irq.c
@@ -60,7 +60,9 @@ static irqreturn_t tidss_irq_handler(int irq, void *arg)
unsigned int id;
dispc_irq_t irqstatus;
+ spin_lock(&tidss->wait_lock);
irqstatus = dispc_read_and_clear_irqstatus(tidss->dispc);
+ spin_unlock(&tidss->wait_lock);
for (id = 0; id < tidss->num_crtcs; id++) {
struct drm_crtc *crtc = tidss->crtcs[id];
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x a9a73f2661e6f625d306c9b0ef082e4593f45a21
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021816-stonework-task-247b@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a9a73f2661e6f625d306c9b0ef082e4593f45a21 Mon Sep 17 00:00:00 2001
From: Devarsh Thakkar <devarsht(a)ti.com>
Date: Mon, 21 Oct 2024 17:07:50 +0300
Subject: [PATCH] drm/tidss: Fix race condition while handling interrupt
registers
The driver has a spinlock for protecting the irq_masks field and irq
enable registers. However, the driver misses protecting the irq status
registers which can lead to races.
Take the spinlock when accessing irqstatus too.
Fixes: 32a1795f57ee ("drm/tidss: New driver for TI Keystone platform Display SubSystem")
Cc: stable(a)vger.kernel.org
Signed-off-by: Devarsh Thakkar <devarsht(a)ti.com>
[Tomi: updated the desc]
Reviewed-by: Jonathan Cormier <jcormier(a)criticallink.com>
Tested-by: Jonathan Cormier <jcormier(a)criticallink.com>
Reviewed-by: Aradhya Bhatia <aradhya.bhatia(a)linux.dev>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen(a)ideasonboard.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241021-tidss-irq-fix-v1-6-8…
diff --git a/drivers/gpu/drm/tidss/tidss_dispc.c b/drivers/gpu/drm/tidss/tidss_dispc.c
index 515f82e8a0a5..07f5c26cfa26 100644
--- a/drivers/gpu/drm/tidss/tidss_dispc.c
+++ b/drivers/gpu/drm/tidss/tidss_dispc.c
@@ -2767,8 +2767,12 @@ static void dispc_init_errata(struct dispc_device *dispc)
*/
static void dispc_softreset_k2g(struct dispc_device *dispc)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dispc->tidss->wait_lock, flags);
dispc_set_irqenable(dispc, 0);
dispc_read_and_clear_irqstatus(dispc);
+ spin_unlock_irqrestore(&dispc->tidss->wait_lock, flags);
for (unsigned int vp_idx = 0; vp_idx < dispc->feat->num_vps; ++vp_idx)
VP_REG_FLD_MOD(dispc, vp_idx, DISPC_VP_CONTROL, 0, 0, 0);
diff --git a/drivers/gpu/drm/tidss/tidss_irq.c b/drivers/gpu/drm/tidss/tidss_irq.c
index 3cc4024ec7ff..8af4682ba56b 100644
--- a/drivers/gpu/drm/tidss/tidss_irq.c
+++ b/drivers/gpu/drm/tidss/tidss_irq.c
@@ -60,7 +60,9 @@ static irqreturn_t tidss_irq_handler(int irq, void *arg)
unsigned int id;
dispc_irq_t irqstatus;
+ spin_lock(&tidss->wait_lock);
irqstatus = dispc_read_and_clear_irqstatus(tidss->dispc);
+ spin_unlock(&tidss->wait_lock);
for (id = 0; id < tidss->num_crtcs; id++) {
struct drm_crtc *crtc = tidss->crtcs[id];
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x a9a73f2661e6f625d306c9b0ef082e4593f45a21
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021815-procreate-false-2b40@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a9a73f2661e6f625d306c9b0ef082e4593f45a21 Mon Sep 17 00:00:00 2001
From: Devarsh Thakkar <devarsht(a)ti.com>
Date: Mon, 21 Oct 2024 17:07:50 +0300
Subject: [PATCH] drm/tidss: Fix race condition while handling interrupt
registers
The driver has a spinlock for protecting the irq_masks field and irq
enable registers. However, the driver misses protecting the irq status
registers which can lead to races.
Take the spinlock when accessing irqstatus too.
Fixes: 32a1795f57ee ("drm/tidss: New driver for TI Keystone platform Display SubSystem")
Cc: stable(a)vger.kernel.org
Signed-off-by: Devarsh Thakkar <devarsht(a)ti.com>
[Tomi: updated the desc]
Reviewed-by: Jonathan Cormier <jcormier(a)criticallink.com>
Tested-by: Jonathan Cormier <jcormier(a)criticallink.com>
Reviewed-by: Aradhya Bhatia <aradhya.bhatia(a)linux.dev>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen(a)ideasonboard.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241021-tidss-irq-fix-v1-6-8…
diff --git a/drivers/gpu/drm/tidss/tidss_dispc.c b/drivers/gpu/drm/tidss/tidss_dispc.c
index 515f82e8a0a5..07f5c26cfa26 100644
--- a/drivers/gpu/drm/tidss/tidss_dispc.c
+++ b/drivers/gpu/drm/tidss/tidss_dispc.c
@@ -2767,8 +2767,12 @@ static void dispc_init_errata(struct dispc_device *dispc)
*/
static void dispc_softreset_k2g(struct dispc_device *dispc)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dispc->tidss->wait_lock, flags);
dispc_set_irqenable(dispc, 0);
dispc_read_and_clear_irqstatus(dispc);
+ spin_unlock_irqrestore(&dispc->tidss->wait_lock, flags);
for (unsigned int vp_idx = 0; vp_idx < dispc->feat->num_vps; ++vp_idx)
VP_REG_FLD_MOD(dispc, vp_idx, DISPC_VP_CONTROL, 0, 0, 0);
diff --git a/drivers/gpu/drm/tidss/tidss_irq.c b/drivers/gpu/drm/tidss/tidss_irq.c
index 3cc4024ec7ff..8af4682ba56b 100644
--- a/drivers/gpu/drm/tidss/tidss_irq.c
+++ b/drivers/gpu/drm/tidss/tidss_irq.c
@@ -60,7 +60,9 @@ static irqreturn_t tidss_irq_handler(int irq, void *arg)
unsigned int id;
dispc_irq_t irqstatus;
+ spin_lock(&tidss->wait_lock);
irqstatus = dispc_read_and_clear_irqstatus(tidss->dispc);
+ spin_unlock(&tidss->wait_lock);
for (id = 0; id < tidss->num_crtcs; id++) {
struct drm_crtc *crtc = tidss->crtcs[id];
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x a9a73f2661e6f625d306c9b0ef082e4593f45a21
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021815-chrome-sycamore-9640@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a9a73f2661e6f625d306c9b0ef082e4593f45a21 Mon Sep 17 00:00:00 2001
From: Devarsh Thakkar <devarsht(a)ti.com>
Date: Mon, 21 Oct 2024 17:07:50 +0300
Subject: [PATCH] drm/tidss: Fix race condition while handling interrupt
registers
The driver has a spinlock for protecting the irq_masks field and irq
enable registers. However, the driver misses protecting the irq status
registers which can lead to races.
Take the spinlock when accessing irqstatus too.
Fixes: 32a1795f57ee ("drm/tidss: New driver for TI Keystone platform Display SubSystem")
Cc: stable(a)vger.kernel.org
Signed-off-by: Devarsh Thakkar <devarsht(a)ti.com>
[Tomi: updated the desc]
Reviewed-by: Jonathan Cormier <jcormier(a)criticallink.com>
Tested-by: Jonathan Cormier <jcormier(a)criticallink.com>
Reviewed-by: Aradhya Bhatia <aradhya.bhatia(a)linux.dev>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen(a)ideasonboard.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241021-tidss-irq-fix-v1-6-8…
diff --git a/drivers/gpu/drm/tidss/tidss_dispc.c b/drivers/gpu/drm/tidss/tidss_dispc.c
index 515f82e8a0a5..07f5c26cfa26 100644
--- a/drivers/gpu/drm/tidss/tidss_dispc.c
+++ b/drivers/gpu/drm/tidss/tidss_dispc.c
@@ -2767,8 +2767,12 @@ static void dispc_init_errata(struct dispc_device *dispc)
*/
static void dispc_softreset_k2g(struct dispc_device *dispc)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dispc->tidss->wait_lock, flags);
dispc_set_irqenable(dispc, 0);
dispc_read_and_clear_irqstatus(dispc);
+ spin_unlock_irqrestore(&dispc->tidss->wait_lock, flags);
for (unsigned int vp_idx = 0; vp_idx < dispc->feat->num_vps; ++vp_idx)
VP_REG_FLD_MOD(dispc, vp_idx, DISPC_VP_CONTROL, 0, 0, 0);
diff --git a/drivers/gpu/drm/tidss/tidss_irq.c b/drivers/gpu/drm/tidss/tidss_irq.c
index 3cc4024ec7ff..8af4682ba56b 100644
--- a/drivers/gpu/drm/tidss/tidss_irq.c
+++ b/drivers/gpu/drm/tidss/tidss_irq.c
@@ -60,7 +60,9 @@ static irqreturn_t tidss_irq_handler(int irq, void *arg)
unsigned int id;
dispc_irq_t irqstatus;
+ spin_lock(&tidss->wait_lock);
irqstatus = dispc_read_and_clear_irqstatus(tidss->dispc);
+ spin_unlock(&tidss->wait_lock);
for (id = 0; id < tidss->num_crtcs; id++) {
struct drm_crtc *crtc = tidss->crtcs[id];
At the end of a 128b/132b link training sequence, the HW expects the
transcoder training pattern to be set to TPS2 and from that to normal
mode (disabling the training pattern). Transitioning from TPS1 directly
to normal mode leaves the transcoder in a stuck state, resulting in
page-flip timeouts later in the modeset sequence.
Atm, in case of a failure during link training, the transcoder may be
still set to output the TPS1 pattern. Later the transcoder is then set
from TPS1 directly to normal mode in intel_dp_stop_link_train(), leading
to modeset failures later as described above. Fix this by setting the
training patter to TPS2, if the link training failed at any point.
Cc: stable(a)vger.kernel.org # v5.18+
Cc: Jani Nikula <jani.nikula(a)intel.com>
Signed-off-by: Imre Deak <imre.deak(a)intel.com>
---
.../gpu/drm/i915/display/intel_dp_link_training.c | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
index 3cc06c916017d..11953b03bb6aa 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
@@ -1563,7 +1563,7 @@ intel_dp_128b132b_link_train(struct intel_dp *intel_dp,
if (wait_for(intel_dp_128b132b_intra_hop(intel_dp, crtc_state) == 0, 500)) {
lt_err(intel_dp, DP_PHY_DPRX, "128b/132b intra-hop not clear\n");
- return false;
+ goto out;
}
if (intel_dp_128b132b_lane_eq(intel_dp, crtc_state) &&
@@ -1575,6 +1575,19 @@ intel_dp_128b132b_link_train(struct intel_dp *intel_dp,
passed ? "passed" : "failed",
crtc_state->port_clock, crtc_state->lane_count);
+out:
+ /*
+ * Ensure that the training pattern does get set to TPS2 even in case
+ * of a failure, as is the case at the end of a passing link training
+ * and what is expected by the transcoder. Leaving TPS1 set (and
+ * disabling the link train mode in DP_TP_CTL later from TPS1 directly)
+ * would result in a stuck transcoder HW state and flip-done timeouts
+ * later in the modeset sequence.
+ */
+ if (!passed)
+ intel_dp_program_link_training_pattern(intel_dp, crtc_state,
+ DP_PHY_DPRX, DP_TRAINING_PATTERN_2);
+
return passed;
}
--
2.44.2
On Tue, Feb 18, 2025 at 07:48:33AM -0500, Sasha Levin wrote:
> This is a note to let you know that I've just added the patch titled
>
> MIPS: fix mips_get_syscall_arg() for o32
>
> to the 5.4-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> mips-fix-mips_get_syscall_arg-for-o32.patch
> and it can be found in the queue-5.4 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
>
>
>
> commit de89111213a3fb4751245214dc4dd590ed153d29
> Author: Dmitry V. Levin <ldv(a)strace.io>
> Date: Wed Feb 12 01:02:09 2025 +0200
>
> MIPS: fix mips_get_syscall_arg() for o32
>
> [ Upstream commit 733a90561ad0a4a74035d2d627098da85d43b592 ]
Just in case, the previous commit,
ed975485a13d1f6080218aa71c29425ba2dfb332, has to be applied
along with this one, otherwise the change will not compile.
--
ldv
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x da2dccd7451de62b175fb8f0808d644959e964c7
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021859-obligate-simile-7eaf@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From da2dccd7451de62b175fb8f0808d644959e964c7 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Wed, 5 Feb 2025 17:36:48 +0000
Subject: [PATCH] btrfs: fix hole expansion when writing at an offset beyond
EOF
At btrfs_write_check() if our file's i_size is not sector size aligned and
we have a write that starts at an offset larger than the i_size that falls
within the same page of the i_size, then we end up not zeroing the file
range [i_size, write_offset).
The code is this:
start_pos = round_down(pos, fs_info->sectorsize);
oldsize = i_size_read(inode);
if (start_pos > oldsize) {
/* Expand hole size to cover write data, preventing empty gap */
loff_t end_pos = round_up(pos + count, fs_info->sectorsize);
ret = btrfs_cont_expand(BTRFS_I(inode), oldsize, end_pos);
if (ret)
return ret;
}
So if our file's i_size is 90269 bytes and a write at offset 90365 bytes
comes in, we get 'start_pos' set to 90112 bytes, which is less than the
i_size and therefore we don't zero out the range [90269, 90365) by
calling btrfs_cont_expand().
This is an old bug introduced in commit 9036c10208e1 ("Btrfs: update hole
handling v2"), from 2008, and the buggy code got moved around over the
years.
Fix this by discarding 'start_pos' and comparing against the write offset
('pos') without any alignment.
This bug was recently exposed by test case generic/363 which tests this
scenario by polluting ranges beyond EOF with an mmap write and than verify
that after a file increases we get zeroes for the range which is supposed
to be a hole and not what we wrote with the previous mmaped write.
We're only seeing this exposed now because generic/363 used to run only
on xfs until last Sunday's fstests update.
The test was failing like this:
$ ./check generic/363
FSTYP -- btrfs
PLATFORM -- Linux/x86_64 debian0 6.13.0-rc7-btrfs-next-185+ #17 SMP PREEMPT_DYNAMIC Mon Feb 3 12:28:46 WET 2025
MKFS_OPTIONS -- /dev/sdc
MOUNT_OPTIONS -- /dev/sdc /home/fdmanana/btrfs-tests/scratch_1
generic/363 0s ... [failed, exit status 1]- output mismatch (see /home/fdmanana/git/hub/xfstests/results//generic/363.out.bad)
--- tests/generic/363.out 2025-02-05 15:31:14.013646509 +0000
+++ /home/fdmanana/git/hub/xfstests/results//generic/363.out.bad 2025-02-05 17:25:33.112630781 +0000
@@ -1 +1,46 @@
QA output created by 363
+READ BAD DATA: offset = 0xdcad, size = 0xd921, fname = /home/fdmanana/btrfs-tests/dev/junk
+OFFSET GOOD BAD RANGE
+0x1609d 0x0000 0x3104 0x0
+operation# (mod 256) for the bad data may be 4
+0x1609e 0x0000 0x0472 0x1
+operation# (mod 256) for the bad data may be 4
...
(Run 'diff -u /home/fdmanana/git/hub/xfstests/tests/generic/363.out /home/fdmanana/git/hub/xfstests/results//generic/363.out.bad' to see the entire diff)
Ran: generic/363
Failures: generic/363
Failed 1 of 1 tests
Fixes: 9036c10208e1 ("Btrfs: update hole handling v2")
CC: stable(a)vger.kernel.org
Reviewed-by: Qu Wenruo <wqu(a)suse.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 36f51c311bb1..ed3c0d6546c5 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1039,7 +1039,6 @@ int btrfs_write_check(struct kiocb *iocb, size_t count)
loff_t pos = iocb->ki_pos;
int ret;
loff_t oldsize;
- loff_t start_pos;
/*
* Quickly bail out on NOWAIT writes if we don't have the nodatacow or
@@ -1066,9 +1065,8 @@ int btrfs_write_check(struct kiocb *iocb, size_t count)
inode_inc_iversion(inode);
}
- start_pos = round_down(pos, fs_info->sectorsize);
oldsize = i_size_read(inode);
- if (start_pos > oldsize) {
+ if (pos > oldsize) {
/* Expand hole size to cover write data, preventing empty gap */
loff_t end_pos = round_up(pos + count, fs_info->sectorsize);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x da2dccd7451de62b175fb8f0808d644959e964c7
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021853-wiring-vest-fedd@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From da2dccd7451de62b175fb8f0808d644959e964c7 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Wed, 5 Feb 2025 17:36:48 +0000
Subject: [PATCH] btrfs: fix hole expansion when writing at an offset beyond
EOF
At btrfs_write_check() if our file's i_size is not sector size aligned and
we have a write that starts at an offset larger than the i_size that falls
within the same page of the i_size, then we end up not zeroing the file
range [i_size, write_offset).
The code is this:
start_pos = round_down(pos, fs_info->sectorsize);
oldsize = i_size_read(inode);
if (start_pos > oldsize) {
/* Expand hole size to cover write data, preventing empty gap */
loff_t end_pos = round_up(pos + count, fs_info->sectorsize);
ret = btrfs_cont_expand(BTRFS_I(inode), oldsize, end_pos);
if (ret)
return ret;
}
So if our file's i_size is 90269 bytes and a write at offset 90365 bytes
comes in, we get 'start_pos' set to 90112 bytes, which is less than the
i_size and therefore we don't zero out the range [90269, 90365) by
calling btrfs_cont_expand().
This is an old bug introduced in commit 9036c10208e1 ("Btrfs: update hole
handling v2"), from 2008, and the buggy code got moved around over the
years.
Fix this by discarding 'start_pos' and comparing against the write offset
('pos') without any alignment.
This bug was recently exposed by test case generic/363 which tests this
scenario by polluting ranges beyond EOF with an mmap write and than verify
that after a file increases we get zeroes for the range which is supposed
to be a hole and not what we wrote with the previous mmaped write.
We're only seeing this exposed now because generic/363 used to run only
on xfs until last Sunday's fstests update.
The test was failing like this:
$ ./check generic/363
FSTYP -- btrfs
PLATFORM -- Linux/x86_64 debian0 6.13.0-rc7-btrfs-next-185+ #17 SMP PREEMPT_DYNAMIC Mon Feb 3 12:28:46 WET 2025
MKFS_OPTIONS -- /dev/sdc
MOUNT_OPTIONS -- /dev/sdc /home/fdmanana/btrfs-tests/scratch_1
generic/363 0s ... [failed, exit status 1]- output mismatch (see /home/fdmanana/git/hub/xfstests/results//generic/363.out.bad)
--- tests/generic/363.out 2025-02-05 15:31:14.013646509 +0000
+++ /home/fdmanana/git/hub/xfstests/results//generic/363.out.bad 2025-02-05 17:25:33.112630781 +0000
@@ -1 +1,46 @@
QA output created by 363
+READ BAD DATA: offset = 0xdcad, size = 0xd921, fname = /home/fdmanana/btrfs-tests/dev/junk
+OFFSET GOOD BAD RANGE
+0x1609d 0x0000 0x3104 0x0
+operation# (mod 256) for the bad data may be 4
+0x1609e 0x0000 0x0472 0x1
+operation# (mod 256) for the bad data may be 4
...
(Run 'diff -u /home/fdmanana/git/hub/xfstests/tests/generic/363.out /home/fdmanana/git/hub/xfstests/results//generic/363.out.bad' to see the entire diff)
Ran: generic/363
Failures: generic/363
Failed 1 of 1 tests
Fixes: 9036c10208e1 ("Btrfs: update hole handling v2")
CC: stable(a)vger.kernel.org
Reviewed-by: Qu Wenruo <wqu(a)suse.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 36f51c311bb1..ed3c0d6546c5 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1039,7 +1039,6 @@ int btrfs_write_check(struct kiocb *iocb, size_t count)
loff_t pos = iocb->ki_pos;
int ret;
loff_t oldsize;
- loff_t start_pos;
/*
* Quickly bail out on NOWAIT writes if we don't have the nodatacow or
@@ -1066,9 +1065,8 @@ int btrfs_write_check(struct kiocb *iocb, size_t count)
inode_inc_iversion(inode);
}
- start_pos = round_down(pos, fs_info->sectorsize);
oldsize = i_size_read(inode);
- if (start_pos > oldsize) {
+ if (pos > oldsize) {
/* Expand hole size to cover write data, preventing empty gap */
loff_t end_pos = round_up(pos + count, fs_info->sectorsize);
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 32966821574cd2917bd60f2554f435fe527f4702
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021801-myself-cane-67bf@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 32966821574cd2917bd60f2554f435fe527f4702 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj(a)kernel.org>
Date: Fri, 7 Feb 2025 10:59:06 -1000
Subject: [PATCH] sched_ext: Fix migration disabled handling in targeted
dispatches
A dispatch operation that can target a specific local DSQ -
scx_bpf_dsq_move_to_local() or scx_bpf_dsq_move() - checks whether the task
can be migrated to the target CPU using task_can_run_on_remote_rq(). If the
task can't be migrated to the targeted CPU, it is bounced through a global
DSQ.
task_can_run_on_remote_rq() assumes that the task is on a CPU that's
different from the targeted CPU but the callers doesn't uphold the
assumption and may call the function when the task is already on the target
CPU. When such task has migration disabled, task_can_run_on_remote_rq() ends
up returning %false incorrectly unnecessarily bouncing the task to a global
DSQ.
Fix it by updating the callers to only call task_can_run_on_remote_rq() when
the task is on a different CPU than the target CPU. As this is a bit subtle,
for clarity and documentation:
- Make task_can_run_on_remote_rq() trigger SCHED_WARN_ON() if the task is on
the same CPU as the target CPU.
- is_migration_disabled() test in task_can_run_on_remote_rq() cannot trigger
if the task is on a different CPU than the target CPU as the preceding
task_allowed_on_cpu() test should fail beforehand. Convert the test into
SCHED_WARN_ON().
Signed-off-by: Tejun Heo <tj(a)kernel.org>
Fixes: 4c30f5ce4f7a ("sched_ext: Implement scx_bpf_dispatch[_vtime]_from_dsq()")
Fixes: 0366017e0973 ("sched_ext: Use task_can_run_on_remote_rq() test in dispatch_to_local_dsq()")
Cc: stable(a)vger.kernel.org # v6.12+
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index efdbf4d85a21..e01144340d67 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -2333,12 +2333,16 @@ static void move_remote_task_to_local_dsq(struct task_struct *p, u64 enq_flags,
*
* - The BPF scheduler is bypassed while the rq is offline and we can always say
* no to the BPF scheduler initiated migrations while offline.
+ *
+ * The caller must ensure that @p and @rq are on different CPUs.
*/
static bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq,
bool trigger_error)
{
int cpu = cpu_of(rq);
+ SCHED_WARN_ON(task_cpu(p) == cpu);
+
/*
* We don't require the BPF scheduler to avoid dispatching to offline
* CPUs mostly for convenience but also because CPUs can go offline
@@ -2352,8 +2356,11 @@ static bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq,
return false;
}
- if (unlikely(is_migration_disabled(p)))
- return false;
+ /*
+ * If @p has migration disabled, @p->cpus_ptr only contains its current
+ * CPU and the above task_allowed_on_cpu() test should have failed.
+ */
+ SCHED_WARN_ON(is_migration_disabled(p));
if (!scx_rq_online(rq))
return false;
@@ -2457,7 +2464,8 @@ static struct rq *move_task_between_dsqs(struct task_struct *p, u64 enq_flags,
if (dst_dsq->id == SCX_DSQ_LOCAL) {
dst_rq = container_of(dst_dsq, struct rq, scx.local_dsq);
- if (!task_can_run_on_remote_rq(p, dst_rq, true)) {
+ if (src_rq != dst_rq &&
+ unlikely(!task_can_run_on_remote_rq(p, dst_rq, true))) {
dst_dsq = find_global_dsq(p);
dst_rq = src_rq;
}
@@ -2611,7 +2619,8 @@ static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq,
}
#ifdef CONFIG_SMP
- if (unlikely(!task_can_run_on_remote_rq(p, dst_rq, true))) {
+ if (src_rq != dst_rq &&
+ unlikely(!task_can_run_on_remote_rq(p, dst_rq, true))) {
dispatch_enqueue(find_global_dsq(p), p,
enq_flags | SCX_ENQ_CLEAR_OPSS);
return;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 4ab37fcb42832cdd3e9d5e50653285ca84d6686f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021832-vowel-unsheathe-d6d6@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4ab37fcb42832cdd3e9d5e50653285ca84d6686f Mon Sep 17 00:00:00 2001
From: Jill Donahue <jilliandonahue58(a)gmail.com>
Date: Tue, 11 Feb 2025 10:48:05 -0700
Subject: [PATCH] USB: gadget: f_midi: f_midi_complete to call queue_work
When using USB MIDI, a lock is attempted to be acquired twice through a
re-entrant call to f_midi_transmit, causing a deadlock.
Fix it by using queue_work() to schedule the inner f_midi_transmit() via
a high priority work queue from the completion handler.
Link: https://lore.kernel.org/all/CAArt=LjxU0fUZOj06X+5tkeGT+6RbXzpWg1h4t4Fwa_KGV…
Fixes: d5daf49b58661 ("USB: gadget: midi: add midi function driver")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Jill Donahue <jilliandonahue58(a)gmail.com>
Link: https://lore.kernel.org/r/20250211174805.1369265-1-jdonahue@fender.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c
index 47260d65066a..da82598fcef8 100644
--- a/drivers/usb/gadget/function/f_midi.c
+++ b/drivers/usb/gadget/function/f_midi.c
@@ -283,7 +283,7 @@ f_midi_complete(struct usb_ep *ep, struct usb_request *req)
/* Our transmit completed. See if there's more to go.
* f_midi_transmit eats req, don't queue it again. */
req->length = 0;
- f_midi_transmit(midi);
+ queue_work(system_highpri_wq, &midi->work);
return;
}
break;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 4ab37fcb42832cdd3e9d5e50653285ca84d6686f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021832-floss-petticoat-45ad@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4ab37fcb42832cdd3e9d5e50653285ca84d6686f Mon Sep 17 00:00:00 2001
From: Jill Donahue <jilliandonahue58(a)gmail.com>
Date: Tue, 11 Feb 2025 10:48:05 -0700
Subject: [PATCH] USB: gadget: f_midi: f_midi_complete to call queue_work
When using USB MIDI, a lock is attempted to be acquired twice through a
re-entrant call to f_midi_transmit, causing a deadlock.
Fix it by using queue_work() to schedule the inner f_midi_transmit() via
a high priority work queue from the completion handler.
Link: https://lore.kernel.org/all/CAArt=LjxU0fUZOj06X+5tkeGT+6RbXzpWg1h4t4Fwa_KGV…
Fixes: d5daf49b58661 ("USB: gadget: midi: add midi function driver")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Jill Donahue <jilliandonahue58(a)gmail.com>
Link: https://lore.kernel.org/r/20250211174805.1369265-1-jdonahue@fender.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c
index 47260d65066a..da82598fcef8 100644
--- a/drivers/usb/gadget/function/f_midi.c
+++ b/drivers/usb/gadget/function/f_midi.c
@@ -283,7 +283,7 @@ f_midi_complete(struct usb_ep *ep, struct usb_request *req)
/* Our transmit completed. See if there's more to go.
* f_midi_transmit eats req, don't queue it again. */
req->length = 0;
- f_midi_transmit(midi);
+ queue_work(system_highpri_wq, &midi->work);
return;
}
break;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 4ab37fcb42832cdd3e9d5e50653285ca84d6686f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021831-swab-hassle-6824@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4ab37fcb42832cdd3e9d5e50653285ca84d6686f Mon Sep 17 00:00:00 2001
From: Jill Donahue <jilliandonahue58(a)gmail.com>
Date: Tue, 11 Feb 2025 10:48:05 -0700
Subject: [PATCH] USB: gadget: f_midi: f_midi_complete to call queue_work
When using USB MIDI, a lock is attempted to be acquired twice through a
re-entrant call to f_midi_transmit, causing a deadlock.
Fix it by using queue_work() to schedule the inner f_midi_transmit() via
a high priority work queue from the completion handler.
Link: https://lore.kernel.org/all/CAArt=LjxU0fUZOj06X+5tkeGT+6RbXzpWg1h4t4Fwa_KGV…
Fixes: d5daf49b58661 ("USB: gadget: midi: add midi function driver")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Jill Donahue <jilliandonahue58(a)gmail.com>
Link: https://lore.kernel.org/r/20250211174805.1369265-1-jdonahue@fender.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c
index 47260d65066a..da82598fcef8 100644
--- a/drivers/usb/gadget/function/f_midi.c
+++ b/drivers/usb/gadget/function/f_midi.c
@@ -283,7 +283,7 @@ f_midi_complete(struct usb_ep *ep, struct usb_request *req)
/* Our transmit completed. See if there's more to go.
* f_midi_transmit eats req, don't queue it again. */
req->length = 0;
- f_midi_transmit(midi);
+ queue_work(system_highpri_wq, &midi->work);
return;
}
break;
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 4ab37fcb42832cdd3e9d5e50653285ca84d6686f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021831-fedora-vanquish-5096@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4ab37fcb42832cdd3e9d5e50653285ca84d6686f Mon Sep 17 00:00:00 2001
From: Jill Donahue <jilliandonahue58(a)gmail.com>
Date: Tue, 11 Feb 2025 10:48:05 -0700
Subject: [PATCH] USB: gadget: f_midi: f_midi_complete to call queue_work
When using USB MIDI, a lock is attempted to be acquired twice through a
re-entrant call to f_midi_transmit, causing a deadlock.
Fix it by using queue_work() to schedule the inner f_midi_transmit() via
a high priority work queue from the completion handler.
Link: https://lore.kernel.org/all/CAArt=LjxU0fUZOj06X+5tkeGT+6RbXzpWg1h4t4Fwa_KGV…
Fixes: d5daf49b58661 ("USB: gadget: midi: add midi function driver")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Jill Donahue <jilliandonahue58(a)gmail.com>
Link: https://lore.kernel.org/r/20250211174805.1369265-1-jdonahue@fender.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c
index 47260d65066a..da82598fcef8 100644
--- a/drivers/usb/gadget/function/f_midi.c
+++ b/drivers/usb/gadget/function/f_midi.c
@@ -283,7 +283,7 @@ f_midi_complete(struct usb_ep *ep, struct usb_request *req)
/* Our transmit completed. See if there's more to go.
* f_midi_transmit eats req, don't queue it again. */
req->length = 0;
- f_midi_transmit(midi);
+ queue_work(system_highpri_wq, &midi->work);
return;
}
break;
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 4ab37fcb42832cdd3e9d5e50653285ca84d6686f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021830-agenda-disgrace-4850@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4ab37fcb42832cdd3e9d5e50653285ca84d6686f Mon Sep 17 00:00:00 2001
From: Jill Donahue <jilliandonahue58(a)gmail.com>
Date: Tue, 11 Feb 2025 10:48:05 -0700
Subject: [PATCH] USB: gadget: f_midi: f_midi_complete to call queue_work
When using USB MIDI, a lock is attempted to be acquired twice through a
re-entrant call to f_midi_transmit, causing a deadlock.
Fix it by using queue_work() to schedule the inner f_midi_transmit() via
a high priority work queue from the completion handler.
Link: https://lore.kernel.org/all/CAArt=LjxU0fUZOj06X+5tkeGT+6RbXzpWg1h4t4Fwa_KGV…
Fixes: d5daf49b58661 ("USB: gadget: midi: add midi function driver")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Jill Donahue <jilliandonahue58(a)gmail.com>
Link: https://lore.kernel.org/r/20250211174805.1369265-1-jdonahue@fender.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c
index 47260d65066a..da82598fcef8 100644
--- a/drivers/usb/gadget/function/f_midi.c
+++ b/drivers/usb/gadget/function/f_midi.c
@@ -283,7 +283,7 @@ f_midi_complete(struct usb_ep *ep, struct usb_request *req)
/* Our transmit completed. See if there's more to go.
* f_midi_transmit eats req, don't queue it again. */
req->length = 0;
- f_midi_transmit(midi);
+ queue_work(system_highpri_wq, &midi->work);
return;
}
break;
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 4ab37fcb42832cdd3e9d5e50653285ca84d6686f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021830-phantom-negligent-416a@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4ab37fcb42832cdd3e9d5e50653285ca84d6686f Mon Sep 17 00:00:00 2001
From: Jill Donahue <jilliandonahue58(a)gmail.com>
Date: Tue, 11 Feb 2025 10:48:05 -0700
Subject: [PATCH] USB: gadget: f_midi: f_midi_complete to call queue_work
When using USB MIDI, a lock is attempted to be acquired twice through a
re-entrant call to f_midi_transmit, causing a deadlock.
Fix it by using queue_work() to schedule the inner f_midi_transmit() via
a high priority work queue from the completion handler.
Link: https://lore.kernel.org/all/CAArt=LjxU0fUZOj06X+5tkeGT+6RbXzpWg1h4t4Fwa_KGV…
Fixes: d5daf49b58661 ("USB: gadget: midi: add midi function driver")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Jill Donahue <jilliandonahue58(a)gmail.com>
Link: https://lore.kernel.org/r/20250211174805.1369265-1-jdonahue@fender.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c
index 47260d65066a..da82598fcef8 100644
--- a/drivers/usb/gadget/function/f_midi.c
+++ b/drivers/usb/gadget/function/f_midi.c
@@ -283,7 +283,7 @@ f_midi_complete(struct usb_ep *ep, struct usb_request *req)
/* Our transmit completed. See if there's more to go.
* f_midi_transmit eats req, don't queue it again. */
req->length = 0;
- f_midi_transmit(midi);
+ queue_work(system_highpri_wq, &midi->work);
return;
}
break;
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 011b0335903832facca86cd8ed05d7d8d94c9c76
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021800-freebase-womanlike-28b0@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 011b0335903832facca86cd8ed05d7d8d94c9c76 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni(a)redhat.com>
Date: Thu, 6 Feb 2025 22:28:48 +0100
Subject: [PATCH] Revert "net: skb: introduce and use a single page frag cache"
This reverts commit dbae2b062824 ("net: skb: introduce and use a single
page frag cache"). The intended goal of such change was to counter a
performance regression introduced by commit 3226b158e67c ("net: avoid
32 x truesize under-estimation for tiny skbs").
Unfortunately, the blamed commit introduces another regression for the
virtio_net driver. Such a driver calls napi_alloc_skb() with a tiny
size, so that the whole head frag could fit a 512-byte block.
The single page frag cache uses a 1K fragment for such allocation, and
the additional overhead, under small UDP packets flood, makes the page
allocator a bottleneck.
Thanks to commit bf9f1baa279f ("net: add dedicated kmem_cache for
typical/small skb->head"), this revert does not re-introduce the
original regression. Actually, in the relevant test on top of this
revert, I measure a small but noticeable positive delta, just above
noise level.
The revert itself required some additional mangling due to the
introduction of the SKB_HEAD_ALIGN() helper and local lock infra in the
affected code.
Suggested-by: Eric Dumazet <edumazet(a)google.com>
Fixes: dbae2b062824 ("net: skb: introduce and use a single page frag cache")
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Link: https://patch.msgid.link/e649212fde9f0fdee23909ca0d14158d32bb7425.173887729…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c0a86afb85da..365f0e2098d1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4115,7 +4115,6 @@ void netif_receive_skb_list(struct list_head *head);
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
void napi_gro_flush(struct napi_struct *napi, bool flush_old);
struct sk_buff *napi_get_frags(struct napi_struct *napi);
-void napi_get_frags_check(struct napi_struct *napi);
gro_result_t napi_gro_frags(struct napi_struct *napi);
static inline void napi_free_frags(struct napi_struct *napi)
diff --git a/net/core/dev.c b/net/core/dev.c
index b91658e8aedb..55e356a68db6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6920,6 +6920,23 @@ netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi)
list_add_rcu(&napi->dev_list, higher); /* adds after higher */
}
+/* Double check that napi_get_frags() allocates skbs with
+ * skb->head being backed by slab, not a page fragment.
+ * This is to make sure bug fixed in 3226b158e67c
+ * ("net: avoid 32 x truesize under-estimation for tiny skbs")
+ * does not accidentally come back.
+ */
+static void napi_get_frags_check(struct napi_struct *napi)
+{
+ struct sk_buff *skb;
+
+ local_bh_disable();
+ skb = napi_get_frags(napi);
+ WARN_ON_ONCE(skb && skb->head_frag);
+ napi_free_frags(napi);
+ local_bh_enable();
+}
+
void netif_napi_add_weight_locked(struct net_device *dev,
struct napi_struct *napi,
int (*poll)(struct napi_struct *, int),
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a441613a1e6c..6a99c453397f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -220,67 +220,9 @@ static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
#define NAPI_SKB_CACHE_BULK 16
#define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2)
-#if PAGE_SIZE == SZ_4K
-
-#define NAPI_HAS_SMALL_PAGE_FRAG 1
-#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) ((nc).pfmemalloc)
-
-/* specialized page frag allocator using a single order 0 page
- * and slicing it into 1K sized fragment. Constrained to systems
- * with a very limited amount of 1K fragments fitting a single
- * page - to avoid excessive truesize underestimation
- */
-
-struct page_frag_1k {
- void *va;
- u16 offset;
- bool pfmemalloc;
-};
-
-static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp)
-{
- struct page *page;
- int offset;
-
- offset = nc->offset - SZ_1K;
- if (likely(offset >= 0))
- goto use_frag;
-
- page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
- if (!page)
- return NULL;
-
- nc->va = page_address(page);
- nc->pfmemalloc = page_is_pfmemalloc(page);
- offset = PAGE_SIZE - SZ_1K;
- page_ref_add(page, offset / SZ_1K);
-
-use_frag:
- nc->offset = offset;
- return nc->va + offset;
-}
-#else
-
-/* the small page is actually unused in this build; add dummy helpers
- * to please the compiler and avoid later preprocessor's conditionals
- */
-#define NAPI_HAS_SMALL_PAGE_FRAG 0
-#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) false
-
-struct page_frag_1k {
-};
-
-static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp_mask)
-{
- return NULL;
-}
-
-#endif
-
struct napi_alloc_cache {
local_lock_t bh_lock;
struct page_frag_cache page;
- struct page_frag_1k page_small;
unsigned int skb_count;
void *skb_cache[NAPI_SKB_CACHE_SIZE];
};
@@ -290,23 +232,6 @@ static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache) = {
.bh_lock = INIT_LOCAL_LOCK(bh_lock),
};
-/* Double check that napi_get_frags() allocates skbs with
- * skb->head being backed by slab, not a page fragment.
- * This is to make sure bug fixed in 3226b158e67c
- * ("net: avoid 32 x truesize under-estimation for tiny skbs")
- * does not accidentally come back.
- */
-void napi_get_frags_check(struct napi_struct *napi)
-{
- struct sk_buff *skb;
-
- local_bh_disable();
- skb = napi_get_frags(napi);
- WARN_ON_ONCE(!NAPI_HAS_SMALL_PAGE_FRAG && skb && skb->head_frag);
- napi_free_frags(napi);
- local_bh_enable();
-}
-
void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
{
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
@@ -813,10 +738,8 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
/* If requested length is either too small or too big,
* we use kmalloc() for skb->head allocation.
- * When the small frag allocator is available, prefer it over kmalloc
- * for small fragments
*/
- if ((!NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) ||
+ if (len <= SKB_WITH_OVERHEAD(1024) ||
len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
(gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI,
@@ -826,32 +749,16 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
goto skb_success;
}
+ len = SKB_HEAD_ALIGN(len);
+
if (sk_memalloc_socks())
gfp_mask |= __GFP_MEMALLOC;
local_lock_nested_bh(&napi_alloc_cache.bh_lock);
nc = this_cpu_ptr(&napi_alloc_cache);
- if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) {
- /* we are artificially inflating the allocation size, but
- * that is not as bad as it may look like, as:
- * - 'len' less than GRO_MAX_HEAD makes little sense
- * - On most systems, larger 'len' values lead to fragment
- * size above 512 bytes
- * - kmalloc would use the kmalloc-1k slab for such values
- * - Builds with smaller GRO_MAX_HEAD will very likely do
- * little networking, as that implies no WiFi and no
- * tunnels support, and 32 bits arches.
- */
- len = SZ_1K;
- data = page_frag_alloc_1k(&nc->page_small, gfp_mask);
- pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC(nc->page_small);
- } else {
- len = SKB_HEAD_ALIGN(len);
-
- data = page_frag_alloc(&nc->page, len, gfp_mask);
- pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
- }
+ data = page_frag_alloc(&nc->page, len, gfp_mask);
+ pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
if (unlikely(!data))
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 011b0335903832facca86cd8ed05d7d8d94c9c76
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021859-upturned-trailside-5b21@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 011b0335903832facca86cd8ed05d7d8d94c9c76 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni(a)redhat.com>
Date: Thu, 6 Feb 2025 22:28:48 +0100
Subject: [PATCH] Revert "net: skb: introduce and use a single page frag cache"
This reverts commit dbae2b062824 ("net: skb: introduce and use a single
page frag cache"). The intended goal of such change was to counter a
performance regression introduced by commit 3226b158e67c ("net: avoid
32 x truesize under-estimation for tiny skbs").
Unfortunately, the blamed commit introduces another regression for the
virtio_net driver. Such a driver calls napi_alloc_skb() with a tiny
size, so that the whole head frag could fit a 512-byte block.
The single page frag cache uses a 1K fragment for such allocation, and
the additional overhead, under small UDP packets flood, makes the page
allocator a bottleneck.
Thanks to commit bf9f1baa279f ("net: add dedicated kmem_cache for
typical/small skb->head"), this revert does not re-introduce the
original regression. Actually, in the relevant test on top of this
revert, I measure a small but noticeable positive delta, just above
noise level.
The revert itself required some additional mangling due to the
introduction of the SKB_HEAD_ALIGN() helper and local lock infra in the
affected code.
Suggested-by: Eric Dumazet <edumazet(a)google.com>
Fixes: dbae2b062824 ("net: skb: introduce and use a single page frag cache")
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Link: https://patch.msgid.link/e649212fde9f0fdee23909ca0d14158d32bb7425.173887729…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c0a86afb85da..365f0e2098d1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4115,7 +4115,6 @@ void netif_receive_skb_list(struct list_head *head);
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
void napi_gro_flush(struct napi_struct *napi, bool flush_old);
struct sk_buff *napi_get_frags(struct napi_struct *napi);
-void napi_get_frags_check(struct napi_struct *napi);
gro_result_t napi_gro_frags(struct napi_struct *napi);
static inline void napi_free_frags(struct napi_struct *napi)
diff --git a/net/core/dev.c b/net/core/dev.c
index b91658e8aedb..55e356a68db6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6920,6 +6920,23 @@ netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi)
list_add_rcu(&napi->dev_list, higher); /* adds after higher */
}
+/* Double check that napi_get_frags() allocates skbs with
+ * skb->head being backed by slab, not a page fragment.
+ * This is to make sure bug fixed in 3226b158e67c
+ * ("net: avoid 32 x truesize under-estimation for tiny skbs")
+ * does not accidentally come back.
+ */
+static void napi_get_frags_check(struct napi_struct *napi)
+{
+ struct sk_buff *skb;
+
+ local_bh_disable();
+ skb = napi_get_frags(napi);
+ WARN_ON_ONCE(skb && skb->head_frag);
+ napi_free_frags(napi);
+ local_bh_enable();
+}
+
void netif_napi_add_weight_locked(struct net_device *dev,
struct napi_struct *napi,
int (*poll)(struct napi_struct *, int),
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a441613a1e6c..6a99c453397f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -220,67 +220,9 @@ static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
#define NAPI_SKB_CACHE_BULK 16
#define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2)
-#if PAGE_SIZE == SZ_4K
-
-#define NAPI_HAS_SMALL_PAGE_FRAG 1
-#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) ((nc).pfmemalloc)
-
-/* specialized page frag allocator using a single order 0 page
- * and slicing it into 1K sized fragment. Constrained to systems
- * with a very limited amount of 1K fragments fitting a single
- * page - to avoid excessive truesize underestimation
- */
-
-struct page_frag_1k {
- void *va;
- u16 offset;
- bool pfmemalloc;
-};
-
-static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp)
-{
- struct page *page;
- int offset;
-
- offset = nc->offset - SZ_1K;
- if (likely(offset >= 0))
- goto use_frag;
-
- page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
- if (!page)
- return NULL;
-
- nc->va = page_address(page);
- nc->pfmemalloc = page_is_pfmemalloc(page);
- offset = PAGE_SIZE - SZ_1K;
- page_ref_add(page, offset / SZ_1K);
-
-use_frag:
- nc->offset = offset;
- return nc->va + offset;
-}
-#else
-
-/* the small page is actually unused in this build; add dummy helpers
- * to please the compiler and avoid later preprocessor's conditionals
- */
-#define NAPI_HAS_SMALL_PAGE_FRAG 0
-#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) false
-
-struct page_frag_1k {
-};
-
-static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp_mask)
-{
- return NULL;
-}
-
-#endif
-
struct napi_alloc_cache {
local_lock_t bh_lock;
struct page_frag_cache page;
- struct page_frag_1k page_small;
unsigned int skb_count;
void *skb_cache[NAPI_SKB_CACHE_SIZE];
};
@@ -290,23 +232,6 @@ static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache) = {
.bh_lock = INIT_LOCAL_LOCK(bh_lock),
};
-/* Double check that napi_get_frags() allocates skbs with
- * skb->head being backed by slab, not a page fragment.
- * This is to make sure bug fixed in 3226b158e67c
- * ("net: avoid 32 x truesize under-estimation for tiny skbs")
- * does not accidentally come back.
- */
-void napi_get_frags_check(struct napi_struct *napi)
-{
- struct sk_buff *skb;
-
- local_bh_disable();
- skb = napi_get_frags(napi);
- WARN_ON_ONCE(!NAPI_HAS_SMALL_PAGE_FRAG && skb && skb->head_frag);
- napi_free_frags(napi);
- local_bh_enable();
-}
-
void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
{
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
@@ -813,10 +738,8 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
/* If requested length is either too small or too big,
* we use kmalloc() for skb->head allocation.
- * When the small frag allocator is available, prefer it over kmalloc
- * for small fragments
*/
- if ((!NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) ||
+ if (len <= SKB_WITH_OVERHEAD(1024) ||
len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
(gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI,
@@ -826,32 +749,16 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
goto skb_success;
}
+ len = SKB_HEAD_ALIGN(len);
+
if (sk_memalloc_socks())
gfp_mask |= __GFP_MEMALLOC;
local_lock_nested_bh(&napi_alloc_cache.bh_lock);
nc = this_cpu_ptr(&napi_alloc_cache);
- if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) {
- /* we are artificially inflating the allocation size, but
- * that is not as bad as it may look like, as:
- * - 'len' less than GRO_MAX_HEAD makes little sense
- * - On most systems, larger 'len' values lead to fragment
- * size above 512 bytes
- * - kmalloc would use the kmalloc-1k slab for such values
- * - Builds with smaller GRO_MAX_HEAD will very likely do
- * little networking, as that implies no WiFi and no
- * tunnels support, and 32 bits arches.
- */
- len = SZ_1K;
- data = page_frag_alloc_1k(&nc->page_small, gfp_mask);
- pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC(nc->page_small);
- } else {
- len = SKB_HEAD_ALIGN(len);
-
- data = page_frag_alloc(&nc->page, len, gfp_mask);
- pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
- }
+ data = page_frag_alloc(&nc->page, len, gfp_mask);
+ pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
if (unlikely(!data))
The patch below does not apply to the 6.13-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.13.y
git checkout FETCH_HEAD
git cherry-pick -x e5644be4079750a0a0a5a7068fd90b97bf6fac55
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021831-mushiness-herbs-d06c@gregkh' --subject-prefix 'PATCH 6.13.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e5644be4079750a0a0a5a7068fd90b97bf6fac55 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic(a)kernel.org>
Date: Wed, 12 Feb 2025 14:55:14 +0100
Subject: [PATCH] usb: gadget: uvc: Fix unstarted kthread worker
The behaviour of kthread_create_worker() was recently changed to align
with the one of kthread_create(). The kthread worker is created but not
awaken by default. This is to allow the use of kthread_affine_preferred()
and kthread_bind[_mask]() with kthread workers. In order to keep the
old behaviour and wake the kthread up, kthread_run_worker() must be
used. All the pre-existing users have been converted, except for UVC
that was introduced in the same merge window as the API change.
This results in hangs:
INFO: task UVCG:82 blocked for more than 491 seconds.
Tainted: G T 6.13.0-rc2-00014-gb04e317b5226 #1
task:UVCG state:D stack:0 pid:82
Call Trace:
__schedule
schedule
schedule_preempt_disabled
kthread
? kthread_flush_work
ret_from_fork
ret_from_fork_asm
entry_INT80_32
Fix this with converting UVCG kworker to the new API.
Reported-by: kernel test robot <oliver.sang(a)intel.com>
Closes: https://lore.kernel.org/oe-lkp/202502121025.55bfa801-lkp@intel.com
Fixes: f0bbfbd16b3b ("usb: gadget: uvc: rework to enqueue in pump worker from encoded queue")
Cc: stable <stable(a)kernel.org>
Cc: Michael Grzeschik <m.grzeschik(a)pengutronix.de>
Signed-off-by: Frederic Weisbecker <frederic(a)kernel.org>
Link: https://lore.kernel.org/r/20250212135514.30539-1-frederic@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
index 79e223713d8b..fb77b0b21790 100644
--- a/drivers/usb/gadget/function/uvc_video.c
+++ b/drivers/usb/gadget/function/uvc_video.c
@@ -818,7 +818,7 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
return -EINVAL;
/* Allocate a kthread for asynchronous hw submit handler. */
- video->kworker = kthread_create_worker(0, "UVCG");
+ video->kworker = kthread_run_worker(0, "UVCG");
if (IS_ERR(video->kworker)) {
uvcg_err(&video->uvc->func, "failed to create UVCG kworker\n");
return PTR_ERR(video->kworker);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 4ab37fcb42832cdd3e9d5e50653285ca84d6686f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021846-penholder-punisher-66e6@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4ab37fcb42832cdd3e9d5e50653285ca84d6686f Mon Sep 17 00:00:00 2001
From: Jill Donahue <jilliandonahue58(a)gmail.com>
Date: Tue, 11 Feb 2025 10:48:05 -0700
Subject: [PATCH] USB: gadget: f_midi: f_midi_complete to call queue_work
When using USB MIDI, a lock is attempted to be acquired twice through a
re-entrant call to f_midi_transmit, causing a deadlock.
Fix it by using queue_work() to schedule the inner f_midi_transmit() via
a high priority work queue from the completion handler.
Link: https://lore.kernel.org/all/CAArt=LjxU0fUZOj06X+5tkeGT+6RbXzpWg1h4t4Fwa_KGV…
Fixes: d5daf49b58661 ("USB: gadget: midi: add midi function driver")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Jill Donahue <jilliandonahue58(a)gmail.com>
Link: https://lore.kernel.org/r/20250211174805.1369265-1-jdonahue@fender.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c
index 47260d65066a..da82598fcef8 100644
--- a/drivers/usb/gadget/function/f_midi.c
+++ b/drivers/usb/gadget/function/f_midi.c
@@ -283,7 +283,7 @@ f_midi_complete(struct usb_ep *ep, struct usb_request *req)
/* Our transmit completed. See if there's more to go.
* f_midi_transmit eats req, don't queue it again. */
req->length = 0;
- f_midi_transmit(midi);
+ queue_work(system_highpri_wq, &midi->work);
return;
}
break;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 399a45e5237ca14037120b1b895bd38a3b4492ea
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021848-sixtieth-living-e034@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 399a45e5237ca14037120b1b895bd38a3b4492ea Mon Sep 17 00:00:00 2001
From: Roy Luo <royluo(a)google.com>
Date: Tue, 4 Feb 2025 23:36:42 +0000
Subject: [PATCH] usb: gadget: core: flush gadget workqueue after device
removal
device_del() can lead to new work being scheduled in gadget->work
workqueue. This is observed, for example, with the dwc3 driver with the
following call stack:
device_del()
gadget_unbind_driver()
usb_gadget_disconnect_locked()
dwc3_gadget_pullup()
dwc3_gadget_soft_disconnect()
usb_gadget_set_state()
schedule_work(&gadget->work)
Move flush_work() after device_del() to ensure the workqueue is cleaned
up.
Fixes: 5702f75375aa9 ("usb: gadget: udc-core: move sysfs_notify() to a workqueue")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Roy Luo <royluo(a)google.com>
Reviewed-by: Alan Stern <stern(a)rowland.harvard.edu>
Reviewed-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Link: https://lore.kernel.org/r/20250204233642.666991-1-royluo@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index a6f46364be65..4b3d5075621a 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -1543,8 +1543,8 @@ void usb_del_gadget(struct usb_gadget *gadget)
kobject_uevent(&udc->dev.kobj, KOBJ_REMOVE);
sysfs_remove_link(&udc->dev.kobj, "gadget");
- flush_work(&gadget->work);
device_del(&gadget->dev);
+ flush_work(&gadget->work);
ida_free(&gadget_id_numbers, gadget->id_number);
cancel_work_sync(&udc->vbus_work);
device_unregister(&udc->dev);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 399a45e5237ca14037120b1b895bd38a3b4492ea
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021846-antihero-overall-1bfb@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 399a45e5237ca14037120b1b895bd38a3b4492ea Mon Sep 17 00:00:00 2001
From: Roy Luo <royluo(a)google.com>
Date: Tue, 4 Feb 2025 23:36:42 +0000
Subject: [PATCH] usb: gadget: core: flush gadget workqueue after device
removal
device_del() can lead to new work being scheduled in gadget->work
workqueue. This is observed, for example, with the dwc3 driver with the
following call stack:
device_del()
gadget_unbind_driver()
usb_gadget_disconnect_locked()
dwc3_gadget_pullup()
dwc3_gadget_soft_disconnect()
usb_gadget_set_state()
schedule_work(&gadget->work)
Move flush_work() after device_del() to ensure the workqueue is cleaned
up.
Fixes: 5702f75375aa9 ("usb: gadget: udc-core: move sysfs_notify() to a workqueue")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Roy Luo <royluo(a)google.com>
Reviewed-by: Alan Stern <stern(a)rowland.harvard.edu>
Reviewed-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Link: https://lore.kernel.org/r/20250204233642.666991-1-royluo@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index a6f46364be65..4b3d5075621a 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -1543,8 +1543,8 @@ void usb_del_gadget(struct usb_gadget *gadget)
kobject_uevent(&udc->dev.kobj, KOBJ_REMOVE);
sysfs_remove_link(&udc->dev.kobj, "gadget");
- flush_work(&gadget->work);
device_del(&gadget->dev);
+ flush_work(&gadget->work);
ida_free(&gadget_id_numbers, gadget->id_number);
cancel_work_sync(&udc->vbus_work);
device_unregister(&udc->dev);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 399a45e5237ca14037120b1b895bd38a3b4492ea
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021844-stimuli-handmade-9628@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 399a45e5237ca14037120b1b895bd38a3b4492ea Mon Sep 17 00:00:00 2001
From: Roy Luo <royluo(a)google.com>
Date: Tue, 4 Feb 2025 23:36:42 +0000
Subject: [PATCH] usb: gadget: core: flush gadget workqueue after device
removal
device_del() can lead to new work being scheduled in gadget->work
workqueue. This is observed, for example, with the dwc3 driver with the
following call stack:
device_del()
gadget_unbind_driver()
usb_gadget_disconnect_locked()
dwc3_gadget_pullup()
dwc3_gadget_soft_disconnect()
usb_gadget_set_state()
schedule_work(&gadget->work)
Move flush_work() after device_del() to ensure the workqueue is cleaned
up.
Fixes: 5702f75375aa9 ("usb: gadget: udc-core: move sysfs_notify() to a workqueue")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Roy Luo <royluo(a)google.com>
Reviewed-by: Alan Stern <stern(a)rowland.harvard.edu>
Reviewed-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Link: https://lore.kernel.org/r/20250204233642.666991-1-royluo@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index a6f46364be65..4b3d5075621a 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -1543,8 +1543,8 @@ void usb_del_gadget(struct usb_gadget *gadget)
kobject_uevent(&udc->dev.kobj, KOBJ_REMOVE);
sysfs_remove_link(&udc->dev.kobj, "gadget");
- flush_work(&gadget->work);
device_del(&gadget->dev);
+ flush_work(&gadget->work);
ida_free(&gadget_id_numbers, gadget->id_number);
cancel_work_sync(&udc->vbus_work);
device_unregister(&udc->dev);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 399a45e5237ca14037120b1b895bd38a3b4492ea
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021842-eject-splendor-7c5d@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 399a45e5237ca14037120b1b895bd38a3b4492ea Mon Sep 17 00:00:00 2001
From: Roy Luo <royluo(a)google.com>
Date: Tue, 4 Feb 2025 23:36:42 +0000
Subject: [PATCH] usb: gadget: core: flush gadget workqueue after device
removal
device_del() can lead to new work being scheduled in gadget->work
workqueue. This is observed, for example, with the dwc3 driver with the
following call stack:
device_del()
gadget_unbind_driver()
usb_gadget_disconnect_locked()
dwc3_gadget_pullup()
dwc3_gadget_soft_disconnect()
usb_gadget_set_state()
schedule_work(&gadget->work)
Move flush_work() after device_del() to ensure the workqueue is cleaned
up.
Fixes: 5702f75375aa9 ("usb: gadget: udc-core: move sysfs_notify() to a workqueue")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Roy Luo <royluo(a)google.com>
Reviewed-by: Alan Stern <stern(a)rowland.harvard.edu>
Reviewed-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Link: https://lore.kernel.org/r/20250204233642.666991-1-royluo@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index a6f46364be65..4b3d5075621a 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -1543,8 +1543,8 @@ void usb_del_gadget(struct usb_gadget *gadget)
kobject_uevent(&udc->dev.kobj, KOBJ_REMOVE);
sysfs_remove_link(&udc->dev.kobj, "gadget");
- flush_work(&gadget->work);
device_del(&gadget->dev);
+ flush_work(&gadget->work);
ida_free(&gadget_id_numbers, gadget->id_number);
cancel_work_sync(&udc->vbus_work);
device_unregister(&udc->dev);
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 399a45e5237ca14037120b1b895bd38a3b4492ea
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021839-vacancy-doormat-6a57@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 399a45e5237ca14037120b1b895bd38a3b4492ea Mon Sep 17 00:00:00 2001
From: Roy Luo <royluo(a)google.com>
Date: Tue, 4 Feb 2025 23:36:42 +0000
Subject: [PATCH] usb: gadget: core: flush gadget workqueue after device
removal
device_del() can lead to new work being scheduled in gadget->work
workqueue. This is observed, for example, with the dwc3 driver with the
following call stack:
device_del()
gadget_unbind_driver()
usb_gadget_disconnect_locked()
dwc3_gadget_pullup()
dwc3_gadget_soft_disconnect()
usb_gadget_set_state()
schedule_work(&gadget->work)
Move flush_work() after device_del() to ensure the workqueue is cleaned
up.
Fixes: 5702f75375aa9 ("usb: gadget: udc-core: move sysfs_notify() to a workqueue")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Roy Luo <royluo(a)google.com>
Reviewed-by: Alan Stern <stern(a)rowland.harvard.edu>
Reviewed-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Link: https://lore.kernel.org/r/20250204233642.666991-1-royluo@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index a6f46364be65..4b3d5075621a 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -1543,8 +1543,8 @@ void usb_del_gadget(struct usb_gadget *gadget)
kobject_uevent(&udc->dev.kobj, KOBJ_REMOVE);
sysfs_remove_link(&udc->dev.kobj, "gadget");
- flush_work(&gadget->work);
device_del(&gadget->dev);
+ flush_work(&gadget->work);
ida_free(&gadget_id_numbers, gadget->id_number);
cancel_work_sync(&udc->vbus_work);
device_unregister(&udc->dev);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 659f5d55feb75782bd46cf130da3c1f240afe9ba
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021852-blemish-humility-00ca@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 659f5d55feb75782bd46cf130da3c1f240afe9ba Mon Sep 17 00:00:00 2001
From: Jos Wang <joswang(a)lenovo.com>
Date: Thu, 13 Feb 2025 21:49:21 +0800
Subject: [PATCH] usb: typec: tcpm: PSSourceOffTimer timeout in PR_Swap enters
ERROR_RECOVERY
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
As PD2.0 spec ("6.5.6.2 PSSourceOffTimer"),the PSSourceOffTimer is
used by the Policy Engine in Dual-Role Power device that is currently
acting as a Sink to timeout on a PS_RDY Message during a Power Role
Swap sequence. This condition leads to a Hard Reset for USB Type-A and
Type-B Plugs and Error Recovery for Type-C plugs and return to USB
Default Operation.
Therefore, after PSSourceOffTimer timeout, the tcpm state machine should
switch from PR_SWAP_SNK_SRC_SINK_OFF to ERROR_RECOVERY. This can also
solve the test items in the USB power delivery compliance test:
TEST.PD.PROT.SNK.12 PR_Swap – PSSourceOffTimer Timeout
[1] https://usb.org/document-library/usb-power-delivery-compliance-test-specifi…
Fixes: f0690a25a140 ("staging: typec: USB Type-C Port Manager (tcpm)")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Jos Wang <joswang(a)lenovo.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Tested-by: Amit Sunil Dhamne <amitsd(a)google.com>
Link: https://lore.kernel.org/r/20250213134921.3798-1-joswang1221@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 47be450d2be3..6bf1a22c785a 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -5591,8 +5591,7 @@ static void run_state_machine(struct tcpm_port *port)
tcpm_set_auto_vbus_discharge_threshold(port, TYPEC_PWR_MODE_USB,
port->pps_data.active, 0);
tcpm_set_charge(port, false);
- tcpm_set_state(port, hard_reset_state(port),
- port->timings.ps_src_off_time);
+ tcpm_set_state(port, ERROR_RECOVERY, port->timings.ps_src_off_time);
break;
case PR_SWAP_SNK_SRC_SOURCE_ON:
tcpm_enable_auto_vbus_discharge(port, true);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 659f5d55feb75782bd46cf130da3c1f240afe9ba
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021851-irritable-untracked-3604@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 659f5d55feb75782bd46cf130da3c1f240afe9ba Mon Sep 17 00:00:00 2001
From: Jos Wang <joswang(a)lenovo.com>
Date: Thu, 13 Feb 2025 21:49:21 +0800
Subject: [PATCH] usb: typec: tcpm: PSSourceOffTimer timeout in PR_Swap enters
ERROR_RECOVERY
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
As PD2.0 spec ("6.5.6.2 PSSourceOffTimer"),the PSSourceOffTimer is
used by the Policy Engine in Dual-Role Power device that is currently
acting as a Sink to timeout on a PS_RDY Message during a Power Role
Swap sequence. This condition leads to a Hard Reset for USB Type-A and
Type-B Plugs and Error Recovery for Type-C plugs and return to USB
Default Operation.
Therefore, after PSSourceOffTimer timeout, the tcpm state machine should
switch from PR_SWAP_SNK_SRC_SINK_OFF to ERROR_RECOVERY. This can also
solve the test items in the USB power delivery compliance test:
TEST.PD.PROT.SNK.12 PR_Swap – PSSourceOffTimer Timeout
[1] https://usb.org/document-library/usb-power-delivery-compliance-test-specifi…
Fixes: f0690a25a140 ("staging: typec: USB Type-C Port Manager (tcpm)")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Jos Wang <joswang(a)lenovo.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Tested-by: Amit Sunil Dhamne <amitsd(a)google.com>
Link: https://lore.kernel.org/r/20250213134921.3798-1-joswang1221@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 47be450d2be3..6bf1a22c785a 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -5591,8 +5591,7 @@ static void run_state_machine(struct tcpm_port *port)
tcpm_set_auto_vbus_discharge_threshold(port, TYPEC_PWR_MODE_USB,
port->pps_data.active, 0);
tcpm_set_charge(port, false);
- tcpm_set_state(port, hard_reset_state(port),
- port->timings.ps_src_off_time);
+ tcpm_set_state(port, ERROR_RECOVERY, port->timings.ps_src_off_time);
break;
case PR_SWAP_SNK_SRC_SOURCE_ON:
tcpm_enable_auto_vbus_discharge(port, true);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 659f5d55feb75782bd46cf130da3c1f240afe9ba
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021850-dividers-finalist-b7b7@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 659f5d55feb75782bd46cf130da3c1f240afe9ba Mon Sep 17 00:00:00 2001
From: Jos Wang <joswang(a)lenovo.com>
Date: Thu, 13 Feb 2025 21:49:21 +0800
Subject: [PATCH] usb: typec: tcpm: PSSourceOffTimer timeout in PR_Swap enters
ERROR_RECOVERY
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
As PD2.0 spec ("6.5.6.2 PSSourceOffTimer"),the PSSourceOffTimer is
used by the Policy Engine in Dual-Role Power device that is currently
acting as a Sink to timeout on a PS_RDY Message during a Power Role
Swap sequence. This condition leads to a Hard Reset for USB Type-A and
Type-B Plugs and Error Recovery for Type-C plugs and return to USB
Default Operation.
Therefore, after PSSourceOffTimer timeout, the tcpm state machine should
switch from PR_SWAP_SNK_SRC_SINK_OFF to ERROR_RECOVERY. This can also
solve the test items in the USB power delivery compliance test:
TEST.PD.PROT.SNK.12 PR_Swap – PSSourceOffTimer Timeout
[1] https://usb.org/document-library/usb-power-delivery-compliance-test-specifi…
Fixes: f0690a25a140 ("staging: typec: USB Type-C Port Manager (tcpm)")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Jos Wang <joswang(a)lenovo.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Tested-by: Amit Sunil Dhamne <amitsd(a)google.com>
Link: https://lore.kernel.org/r/20250213134921.3798-1-joswang1221@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 47be450d2be3..6bf1a22c785a 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -5591,8 +5591,7 @@ static void run_state_machine(struct tcpm_port *port)
tcpm_set_auto_vbus_discharge_threshold(port, TYPEC_PWR_MODE_USB,
port->pps_data.active, 0);
tcpm_set_charge(port, false);
- tcpm_set_state(port, hard_reset_state(port),
- port->timings.ps_src_off_time);
+ tcpm_set_state(port, ERROR_RECOVERY, port->timings.ps_src_off_time);
break;
case PR_SWAP_SNK_SRC_SOURCE_ON:
tcpm_enable_auto_vbus_discharge(port, true);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 659f5d55feb75782bd46cf130da3c1f240afe9ba
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021850-starry-ideally-6050@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 659f5d55feb75782bd46cf130da3c1f240afe9ba Mon Sep 17 00:00:00 2001
From: Jos Wang <joswang(a)lenovo.com>
Date: Thu, 13 Feb 2025 21:49:21 +0800
Subject: [PATCH] usb: typec: tcpm: PSSourceOffTimer timeout in PR_Swap enters
ERROR_RECOVERY
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
As PD2.0 spec ("6.5.6.2 PSSourceOffTimer"),the PSSourceOffTimer is
used by the Policy Engine in Dual-Role Power device that is currently
acting as a Sink to timeout on a PS_RDY Message during a Power Role
Swap sequence. This condition leads to a Hard Reset for USB Type-A and
Type-B Plugs and Error Recovery for Type-C plugs and return to USB
Default Operation.
Therefore, after PSSourceOffTimer timeout, the tcpm state machine should
switch from PR_SWAP_SNK_SRC_SINK_OFF to ERROR_RECOVERY. This can also
solve the test items in the USB power delivery compliance test:
TEST.PD.PROT.SNK.12 PR_Swap – PSSourceOffTimer Timeout
[1] https://usb.org/document-library/usb-power-delivery-compliance-test-specifi…
Fixes: f0690a25a140 ("staging: typec: USB Type-C Port Manager (tcpm)")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Jos Wang <joswang(a)lenovo.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Tested-by: Amit Sunil Dhamne <amitsd(a)google.com>
Link: https://lore.kernel.org/r/20250213134921.3798-1-joswang1221@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 47be450d2be3..6bf1a22c785a 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -5591,8 +5591,7 @@ static void run_state_machine(struct tcpm_port *port)
tcpm_set_auto_vbus_discharge_threshold(port, TYPEC_PWR_MODE_USB,
port->pps_data.active, 0);
tcpm_set_charge(port, false);
- tcpm_set_state(port, hard_reset_state(port),
- port->timings.ps_src_off_time);
+ tcpm_set_state(port, ERROR_RECOVERY, port->timings.ps_src_off_time);
break;
case PR_SWAP_SNK_SRC_SOURCE_ON:
tcpm_enable_auto_vbus_discharge(port, true);
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 659f5d55feb75782bd46cf130da3c1f240afe9ba
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021849-tree-erased-b30b@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 659f5d55feb75782bd46cf130da3c1f240afe9ba Mon Sep 17 00:00:00 2001
From: Jos Wang <joswang(a)lenovo.com>
Date: Thu, 13 Feb 2025 21:49:21 +0800
Subject: [PATCH] usb: typec: tcpm: PSSourceOffTimer timeout in PR_Swap enters
ERROR_RECOVERY
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
As PD2.0 spec ("6.5.6.2 PSSourceOffTimer"),the PSSourceOffTimer is
used by the Policy Engine in Dual-Role Power device that is currently
acting as a Sink to timeout on a PS_RDY Message during a Power Role
Swap sequence. This condition leads to a Hard Reset for USB Type-A and
Type-B Plugs and Error Recovery for Type-C plugs and return to USB
Default Operation.
Therefore, after PSSourceOffTimer timeout, the tcpm state machine should
switch from PR_SWAP_SNK_SRC_SINK_OFF to ERROR_RECOVERY. This can also
solve the test items in the USB power delivery compliance test:
TEST.PD.PROT.SNK.12 PR_Swap – PSSourceOffTimer Timeout
[1] https://usb.org/document-library/usb-power-delivery-compliance-test-specifi…
Fixes: f0690a25a140 ("staging: typec: USB Type-C Port Manager (tcpm)")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Jos Wang <joswang(a)lenovo.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Tested-by: Amit Sunil Dhamne <amitsd(a)google.com>
Link: https://lore.kernel.org/r/20250213134921.3798-1-joswang1221@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 47be450d2be3..6bf1a22c785a 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -5591,8 +5591,7 @@ static void run_state_machine(struct tcpm_port *port)
tcpm_set_auto_vbus_discharge_threshold(port, TYPEC_PWR_MODE_USB,
port->pps_data.active, 0);
tcpm_set_charge(port, false);
- tcpm_set_state(port, hard_reset_state(port),
- port->timings.ps_src_off_time);
+ tcpm_set_state(port, ERROR_RECOVERY, port->timings.ps_src_off_time);
break;
case PR_SWAP_SNK_SRC_SOURCE_ON:
tcpm_enable_auto_vbus_discharge(port, true);
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 659f5d55feb75782bd46cf130da3c1f240afe9ba
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021848-lend-city-f5bb@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 659f5d55feb75782bd46cf130da3c1f240afe9ba Mon Sep 17 00:00:00 2001
From: Jos Wang <joswang(a)lenovo.com>
Date: Thu, 13 Feb 2025 21:49:21 +0800
Subject: [PATCH] usb: typec: tcpm: PSSourceOffTimer timeout in PR_Swap enters
ERROR_RECOVERY
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
As PD2.0 spec ("6.5.6.2 PSSourceOffTimer"),the PSSourceOffTimer is
used by the Policy Engine in Dual-Role Power device that is currently
acting as a Sink to timeout on a PS_RDY Message during a Power Role
Swap sequence. This condition leads to a Hard Reset for USB Type-A and
Type-B Plugs and Error Recovery for Type-C plugs and return to USB
Default Operation.
Therefore, after PSSourceOffTimer timeout, the tcpm state machine should
switch from PR_SWAP_SNK_SRC_SINK_OFF to ERROR_RECOVERY. This can also
solve the test items in the USB power delivery compliance test:
TEST.PD.PROT.SNK.12 PR_Swap – PSSourceOffTimer Timeout
[1] https://usb.org/document-library/usb-power-delivery-compliance-test-specifi…
Fixes: f0690a25a140 ("staging: typec: USB Type-C Port Manager (tcpm)")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Jos Wang <joswang(a)lenovo.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Tested-by: Amit Sunil Dhamne <amitsd(a)google.com>
Link: https://lore.kernel.org/r/20250213134921.3798-1-joswang1221@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 47be450d2be3..6bf1a22c785a 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -5591,8 +5591,7 @@ static void run_state_machine(struct tcpm_port *port)
tcpm_set_auto_vbus_discharge_threshold(port, TYPEC_PWR_MODE_USB,
port->pps_data.active, 0);
tcpm_set_charge(port, false);
- tcpm_set_state(port, hard_reset_state(port),
- port->timings.ps_src_off_time);
+ tcpm_set_state(port, ERROR_RECOVERY, port->timings.ps_src_off_time);
break;
case PR_SWAP_SNK_SRC_SOURCE_ON:
tcpm_enable_auto_vbus_discharge(port, true);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x d3a8c28426fc1fb3252753a9f1db0d691ffc21b0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021803-magnesium-breeze-0a67@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d3a8c28426fc1fb3252753a9f1db0d691ffc21b0 Mon Sep 17 00:00:00 2001
From: Selvarasu Ganesan <selvarasu.g(a)samsung.com>
Date: Sat, 1 Feb 2025 22:09:02 +0530
Subject: [PATCH] usb: dwc3: Fix timeout issue during controller enter/exit
from halt state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
There is a frequent timeout during controller enter/exit from halt state
after toggling the run_stop bit by SW. This timeout occurs when
performing frequent role switches between host and device, causing
device enumeration issues due to the timeout. This issue was not present
when USB2 suspend PHY was disabled by passing the SNPS quirks
(snps,dis_u2_susphy_quirk and snps,dis_enblslpm_quirk) from the DTS.
However, there is a requirement to enable USB2 suspend PHY by setting of
GUSB2PHYCFG.ENBLSLPM and GUSB2PHYCFG.SUSPHY bits when controller starts
in gadget or host mode results in the timeout issue.
This commit addresses this timeout issue by ensuring that the bits
GUSB2PHYCFG.ENBLSLPM and GUSB2PHYCFG.SUSPHY are cleared before starting
the dwc3_gadget_run_stop sequence and restoring them after the
dwc3_gadget_run_stop sequence is completed.
Fixes: 72246da40f37 ("usb: Introduce DesignWare USB3 DRD Driver")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Selvarasu Ganesan <selvarasu.g(a)samsung.com>
Acked-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Link: https://lore.kernel.org/r/20250201163903.459-1-selvarasu.g@samsung.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index d27af65eb08a..ddd6b2ce5710 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2629,10 +2629,38 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on)
{
u32 reg;
u32 timeout = 2000;
+ u32 saved_config = 0;
if (pm_runtime_suspended(dwc->dev))
return 0;
+ /*
+ * When operating in USB 2.0 speeds (HS/FS), ensure that
+ * GUSB2PHYCFG.ENBLSLPM and GUSB2PHYCFG.SUSPHY are cleared before starting
+ * or stopping the controller. This resolves timeout issues that occur
+ * during frequent role switches between host and device modes.
+ *
+ * Save and clear these settings, then restore them after completing the
+ * controller start or stop sequence.
+ *
+ * This solution was discovered through experimentation as it is not
+ * mentioned in the dwc3 programming guide. It has been tested on an
+ * Exynos platforms.
+ */
+ reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0));
+ if (reg & DWC3_GUSB2PHYCFG_SUSPHY) {
+ saved_config |= DWC3_GUSB2PHYCFG_SUSPHY;
+ reg &= ~DWC3_GUSB2PHYCFG_SUSPHY;
+ }
+
+ if (reg & DWC3_GUSB2PHYCFG_ENBLSLPM) {
+ saved_config |= DWC3_GUSB2PHYCFG_ENBLSLPM;
+ reg &= ~DWC3_GUSB2PHYCFG_ENBLSLPM;
+ }
+
+ if (saved_config)
+ dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg);
+
reg = dwc3_readl(dwc->regs, DWC3_DCTL);
if (is_on) {
if (DWC3_VER_IS_WITHIN(DWC3, ANY, 187A)) {
@@ -2660,6 +2688,12 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on)
reg &= DWC3_DSTS_DEVCTRLHLT;
} while (--timeout && !(!is_on ^ !reg));
+ if (saved_config) {
+ reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0));
+ reg |= saved_config;
+ dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg);
+ }
+
if (!timeout)
return -ETIMEDOUT;