The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 4f102d747cadd8f595f2b25882eed9bec1675fb1
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112459-askew-underhand-3094@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4f102d747cadd8f595f2b25882eed9bec1675fb1 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni(a)redhat.com>
Date: Tue, 18 Nov 2025 08:20:20 +0100
Subject: [PATCH] mptcp: avoid unneeded subflow-level drops
The rcv window is shared among all the subflows. Currently, MPTCP sync
the TCP-level rcv window with the MPTCP one at tcp_transmit_skb() time.
The above means that incoming data may sporadically observe outdated
TCP-level rcv window and being wrongly dropped by TCP.
Address the issue checking for the edge condition before queuing the
data at TCP level, and eventually syncing the rcv window as needed.
Note that the issue is actually present from the very first MPTCP
implementation, but backports older than the blamed commit below will
range from impossible to useless.
Before:
$ nstat -n; sleep 1; nstat -z TcpExtBeyondWindow
TcpExtBeyondWindow 14 0.0
After:
$ nstat -n; sleep 1; nstat -z TcpExtBeyondWindow
TcpExtBeyondWindow 0 0.0
Fixes: fa3fe2b15031 ("mptcp: track window announced to peer")
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Link: https://patch.msgid.link/20251118-net-mptcp-misc-fixes-6-18-rc6-v1-2-806d37…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 8a63bd00807d..f24ae7d40e88 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -1044,6 +1044,31 @@ static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una)
WRITE_ONCE(msk->snd_una, new_snd_una);
}
+static void rwin_update(struct mptcp_sock *msk, struct sock *ssk,
+ struct sk_buff *skb)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct tcp_sock *tp = tcp_sk(ssk);
+ u64 mptcp_rcv_wnd;
+
+ /* Avoid touching extra cachelines if TCP is going to accept this
+ * skb without filling the TCP-level window even with a possibly
+ * outdated mptcp-level rwin.
+ */
+ if (!skb->len || skb->len < tcp_receive_window(tp))
+ return;
+
+ mptcp_rcv_wnd = atomic64_read(&msk->rcv_wnd_sent);
+ if (!after64(mptcp_rcv_wnd, subflow->rcv_wnd_sent))
+ return;
+
+ /* Some other subflow grew the mptcp-level rwin since rcv_wup,
+ * resync.
+ */
+ tp->rcv_wnd += mptcp_rcv_wnd - subflow->rcv_wnd_sent;
+ subflow->rcv_wnd_sent = mptcp_rcv_wnd;
+}
+
static void ack_update_msk(struct mptcp_sock *msk,
struct sock *ssk,
struct mptcp_options_received *mp_opt)
@@ -1211,6 +1236,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
*/
if (mp_opt.use_ack)
ack_update_msk(msk, sk, &mp_opt);
+ rwin_update(msk, sk, skb);
/* Zero-data-length packets are dropped by the caller and not
* propagated to the MPTCP layer, so the skb extension does not
@@ -1297,6 +1323,10 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)
if (rcv_wnd_new != rcv_wnd_old) {
raise_win:
+ /* The msk-level rcv wnd is after the tcp level one,
+ * sync the latter.
+ */
+ rcv_wnd_new = rcv_wnd_old;
win = rcv_wnd_old - ack_seq;
tp->rcv_wnd = min_t(u64, win, U32_MAX);
new_win = tp->rcv_wnd;
@@ -1320,6 +1350,7 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)
update_wspace:
WRITE_ONCE(msk->old_wspace, tp->rcv_wnd);
+ subflow->rcv_wnd_sent = rcv_wnd_new;
}
static void mptcp_track_rwin(struct tcp_sock *tp)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 379a88e14e8d..5575ef64ea31 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -509,6 +509,7 @@ struct mptcp_subflow_context {
u64 remote_key;
u64 idsn;
u64 map_seq;
+ u64 rcv_wnd_sent;
u32 snd_isn;
u32 token;
u32 rel_write_seq;
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x ae155060247be8dcae3802a95bd1bdf93ab3215d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112411-cytoplasm-virus-b6dd@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ae155060247be8dcae3802a95bd1bdf93ab3215d Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni(a)redhat.com>
Date: Tue, 18 Nov 2025 08:20:24 +0100
Subject: [PATCH] mptcp: fix duplicate reset on fastclose
The CI reports sporadic failures of the fastclose self-tests. The root
cause is a duplicate reset, not carrying the relevant MPTCP option.
In the failing scenario the bad reset is received by the peer before
the fastclose one, preventing the reception of the latter.
Indeed there is window of opportunity at fastclose time for the
following race:
mptcp_do_fastclose
__mptcp_close_ssk
__tcp_close()
tcp_set_state() [1]
tcp_send_active_reset() [2]
After [1] the stack will send reset to in-flight data reaching the now
closed port. Such reset may race with [2].
Address the issue explicitly sending a single reset on fastclose before
explicitly moving the subflow to close status.
Fixes: d21f83485518 ("mptcp: use fastclose on more edge scenarios")
Cc: stable(a)vger.kernel.org
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/596
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Reviewed-by: Geliang Tang <geliang(a)kernel.org>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Link: https://patch.msgid.link/20251118-net-mptcp-misc-fixes-6-18-rc6-v1-6-806d37…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index c59246c1fde6..a70267a74e3c 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2409,7 +2409,6 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)
/* flags for __mptcp_close_ssk() */
#define MPTCP_CF_PUSH BIT(1)
-#define MPTCP_CF_FASTCLOSE BIT(2)
/* be sure to send a reset only if the caller asked for it, also
* clean completely the subflow status when the subflow reaches
@@ -2420,7 +2419,7 @@ static void __mptcp_subflow_disconnect(struct sock *ssk,
unsigned int flags)
{
if (((1 << ssk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
- (flags & MPTCP_CF_FASTCLOSE)) {
+ subflow->send_fastclose) {
/* The MPTCP code never wait on the subflow sockets, TCP-level
* disconnect should never fail
*/
@@ -2467,14 +2466,8 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
- if ((flags & MPTCP_CF_FASTCLOSE) && !__mptcp_check_fallback(msk)) {
- /* be sure to force the tcp_close path
- * to generate the egress reset
- */
- ssk->sk_lingertime = 0;
- sock_set_flag(ssk, SOCK_LINGER);
- subflow->send_fastclose = 1;
- }
+ if (subflow->send_fastclose && ssk->sk_state != TCP_CLOSE)
+ tcp_set_state(ssk, TCP_CLOSE);
need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk);
if (!dispose_it) {
@@ -2779,9 +2772,26 @@ static void mptcp_do_fastclose(struct sock *sk)
struct mptcp_sock *msk = mptcp_sk(sk);
mptcp_set_state(sk, TCP_CLOSE);
- mptcp_for_each_subflow_safe(msk, subflow, tmp)
- __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow),
- subflow, MPTCP_CF_FASTCLOSE);
+
+ /* Explicitly send the fastclose reset as need */
+ if (__mptcp_check_fallback(msk))
+ return;
+
+ mptcp_for_each_subflow_safe(msk, subflow, tmp) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ lock_sock(ssk);
+
+ /* Some subflow socket states don't allow/need a reset.*/
+ if ((1 << ssk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
+ goto unlock;
+
+ subflow->send_fastclose = 1;
+ tcp_send_active_reset(ssk, ssk->sk_allocation,
+ SK_RST_REASON_TCP_ABORT_ON_CLOSE);
+unlock:
+ release_sock(ssk);
+ }
}
static void mptcp_worker(struct work_struct *work)
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 6457595db9870298ee30b6d75287b8548e33fe19
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112028-sly-exclude-8f08@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6457595db9870298ee30b6d75287b8548e33fe19 Mon Sep 17 00:00:00 2001
From: "Matthieu Baerts (NGI0)" <matttbe(a)kernel.org>
Date: Mon, 10 Nov 2025 19:23:42 +0100
Subject: [PATCH] selftests: mptcp: join: endpoints: longer transfer
In rare cases, when the test environment is very slow, some userspace
tests can fail because some expected events have not been seen.
Because the tests are expecting a long on-going connection, and they are
not waiting for the end of the transfer, it is fine to make the
connection longer. This connection will be killed at the end, after the
verifications, so making it longer doesn't change anything, apart from
avoid it to end before the end of the verifications
To play it safe, all endpoints tests not waiting for the end of the
transfer are now sharing a longer file (128KB) at slow speed.
Fixes: 69c6ce7b6eca ("selftests: mptcp: add implicit endpoint test case")
Cc: stable(a)vger.kernel.org
Fixes: e274f7154008 ("selftests: mptcp: add subflow limits test-cases")
Fixes: b5e2fb832f48 ("selftests: mptcp: add explicit test case for remove/readd")
Fixes: e06959e9eebd ("selftests: mptcp: join: test for flush/re-add endpoints")
Reviewed-by: Geliang Tang <geliang(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Link: https://patch.msgid.link/20251110-net-mptcp-sft-join-unstable-v1-3-a4332c71…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 9ed9ec7202d6..97af8d89ac5c 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -3943,7 +3943,7 @@ endpoint_tests()
pm_nl_set_limits $ns1 2 2
pm_nl_set_limits $ns2 2 2
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
- { speed=slow \
+ { test_linkfail=128 speed=slow \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
@@ -3970,7 +3970,7 @@ endpoint_tests()
pm_nl_set_limits $ns2 0 3
pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow
pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
- { test_linkfail=4 speed=5 \
+ { test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
@@ -4048,7 +4048,7 @@ endpoint_tests()
# broadcast IP: no packet for this address will be received on ns1
pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal
pm_nl_add_endpoint $ns1 10.0.1.1 id 42 flags signal
- { test_linkfail=4 speed=5 \
+ { test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
@@ -4121,7 +4121,7 @@ endpoint_tests()
# broadcast IP: no packet for this address will be received on ns1
pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal
pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
- { test_linkfail=4 speed=20 \
+ { test_linkfail=128 speed=20 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
The function max6620_read checks shared data (tach and target) for zero
before passing it to max6620_fan_tach_to_rpm, which uses it as a divisor.
These accesses are currently lockless. If the data changes to zero
between the check and the division, it causes a divide-by-zero error.
Explicitly acquire the update lock around these checks and calculations
to ensure the data remains stable, preventing Time-of-Check to
Time-of-Use (TOCTOU) race conditions.
This change also aligns the locking behavior with the hwmon_fan_alarm
case, which already uses the update lock.
Link: https://lore.kernel.org/all/CALbr=LYJ_ehtp53HXEVkSpYoub+XYSTU8Rg=o1xxMJ8=5z…
Fixes: e8ac01e5db32 ("hwmon: Add Maxim MAX6620 hardware monitoring driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Gui-Dong Han <hanguidong02(a)gmail.com>
---
Based on the discussion in the link, I will submit a series of patches to
address TOCTOU issues in the hwmon subsystem by converting macros to
functions or adjusting locking where appropriate.
---
drivers/hwmon/max6620.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/hwmon/max6620.c b/drivers/hwmon/max6620.c
index 13201fb755c9..0dce2f5cb61b 100644
--- a/drivers/hwmon/max6620.c
+++ b/drivers/hwmon/max6620.c
@@ -290,20 +290,24 @@ max6620_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
*val = max6620_fan_div_from_reg(data->fandyn[channel]);
break;
case hwmon_fan_input:
+ mutex_lock(&data->update_lock);
if (data->tach[channel] == 0) {
*val = 0;
} else {
div = max6620_fan_div_from_reg(data->fandyn[channel]);
*val = max6620_fan_tach_to_rpm(div, data->tach[channel]);
}
+ mutex_unlock(&data->update_lock);
break;
case hwmon_fan_target:
+ mutex_lock(&data->update_lock);
if (data->target[channel] == 0) {
*val = 0;
} else {
div = max6620_fan_div_from_reg(data->fandyn[channel]);
*val = max6620_fan_tach_to_rpm(div, data->target[channel]);
}
+ mutex_unlock(&data->update_lock);
break;
default:
return -EOPNOTSUPP;
--
2.43.0
The patch titled
Subject: idr: fix idr_alloc() returning an ID out of range
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
idr-fix-idr_alloc-returning-an-id-out-of-range.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: "Matthew Wilcox (Oracle)" <willy(a)infradead.org>
Subject: idr: fix idr_alloc() returning an ID out of range
Date: Fri, 28 Nov 2025 16:18:32 +0000
If you use an IDR with a non-zero base, and specify a range that lies
entirely below the base, 'max - base' becomes very large and
idr_get_free() can return an ID that lies outside of the requested range.
Link: https://lkml.kernel.org/r/20251128161853.3200058-1-willy@infradead.org
Fixes: 6ce711f27500 (idr: Make 1-based IDRs more efficient)
Signed-off-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Reported-by: Jan Sokolowski <jan.sokolowski(a)intel.com>
Reported-by: Koen Koning koen.koning(a)intel.com
Reported-by: Peter Senna Tschudin peter.senna(a)linux.intel.com
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6449
Reviewed-by: Christian K��nig <christian.koenig(a)amd.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
lib/idr.c | 2 ++
tools/testing/radix-tree/idr-test.c | 21 +++++++++++++++++++++
2 files changed, 23 insertions(+)
--- a/lib/idr.c~idr-fix-idr_alloc-returning-an-id-out-of-range
+++ a/lib/idr.c
@@ -40,6 +40,8 @@ int idr_alloc_u32(struct idr *idr, void
if (WARN_ON_ONCE(!(idr->idr_rt.xa_flags & ROOT_IS_IDR)))
idr->idr_rt.xa_flags |= IDR_RT_MARKER;
+ if (max < base)
+ return -ENOSPC;
id = (id < base) ? 0 : id - base;
radix_tree_iter_init(&iter, id);
--- a/tools/testing/radix-tree/idr-test.c~idr-fix-idr_alloc-returning-an-id-out-of-range
+++ a/tools/testing/radix-tree/idr-test.c
@@ -57,6 +57,26 @@ void idr_alloc_test(void)
idr_destroy(&idr);
}
+void idr_alloc2_test(void)
+{
+ int id;
+ struct idr idr = IDR_INIT_BASE(idr, 1);
+
+ id = idr_alloc(&idr, idr_alloc2_test, 0, 1, GFP_KERNEL);
+ assert(id == -ENOSPC);
+
+ id = idr_alloc(&idr, idr_alloc2_test, 1, 2, GFP_KERNEL);
+ assert(id == 1);
+
+ id = idr_alloc(&idr, idr_alloc2_test, 0, 1, GFP_KERNEL);
+ assert(id == -ENOSPC);
+
+ id = idr_alloc(&idr, idr_alloc2_test, 0, 2, GFP_KERNEL);
+ assert(id == -ENOSPC);
+
+ idr_destroy(&idr);
+}
+
void idr_replace_test(void)
{
DEFINE_IDR(idr);
@@ -409,6 +429,7 @@ void idr_checks(void)
idr_replace_test();
idr_alloc_test();
+ idr_alloc2_test();
idr_null_test();
idr_nowait_test();
idr_get_next_test(0);
_
Patches currently in -mm which might be from willy(a)infradead.org are
idr-fix-idr_alloc-returning-an-id-out-of-range.patch
mm-fix-vma_start_write_killable-signal-handling.patch
In max16065_current_show, data->curr_sense is read twice: once for the
error check and again for the calculation. Since
i2c_smbus_read_byte_data returns negative error codes on failure, if the
data changes to an error code between the check and the use, ADC_TO_CURR
results in an incorrect calculation.
Read data->curr_sense into a local variable to ensure consistency. Note
that data->curr_gain is constant and safe to access directly.
This aligns max16065_current_show with max16065_input_show, which
already uses a local variable for the same reason.
Link: https://lore.kernel.org/all/CALbr=LYJ_ehtp53HXEVkSpYoub+XYSTU8Rg=o1xxMJ8=5z…
Fixes: f5bae2642e3d ("hwmon: Driver for MAX16065 System Manager and compatibles")
Cc: stable(a)vger.kernel.org
Signed-off-by: Gui-Dong Han <hanguidong02(a)gmail.com>
---
Based on the discussion in the link, I will submit a series of patches to
address TOCTOU issues in the hwmon subsystem by converting macros to
functions or adjusting locking where appropriate.
---
drivers/hwmon/max16065.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/hwmon/max16065.c b/drivers/hwmon/max16065.c
index 0ccb5eb596fc..4c9e7892a73c 100644
--- a/drivers/hwmon/max16065.c
+++ b/drivers/hwmon/max16065.c
@@ -216,12 +216,13 @@ static ssize_t max16065_current_show(struct device *dev,
struct device_attribute *da, char *buf)
{
struct max16065_data *data = max16065_update_device(dev);
+ int curr_sense = data->curr_sense;
- if (unlikely(data->curr_sense < 0))
- return data->curr_sense;
+ if (unlikely(curr_sense < 0))
+ return curr_sense;
return sysfs_emit(buf, "%d\n",
- ADC_TO_CURR(data->curr_sense, data->curr_gain));
+ ADC_TO_CURR(curr_sense, data->curr_gain));
}
static ssize_t max16065_limit_store(struct device *dev,
--
2.43.0
The "timers: Provide timer_shutdown[_sync]()" patch series implemented a
useful feature that addresses various bugs caused by attempts to rearm
shutdown timers.
https://lore.kernel.org/all/20221123201306.823305113@linutronix.de/
However, this patch series was not fully backported to versions prior to
6.2, requiring separate patches for older kernels if these bugs were
encountered.
The biggest problem with this is that even if these bugs were discovered
and patched in the upstream kernel, if the maintainer or author didn't
create a separate backport patch for versions prior to 6.2, the bugs would
remain untouched in older kernels.
Therefore, to reduce the hassle of having to write a separate patch, we
should backport the remaining unbackported commits from the
"timers: Provide timer_shutdown[_sync]()" patch series to versions prior
to 6.2.
---
Documentation/RCU/Design/Requirements/Requirements.rst | 2 +-
Documentation/core-api/local_ops.rst | 2 +-
Documentation/kernel-hacking/locking.rst | 17 +++++++-----
Documentation/timers/hrtimers.rst | 2 +-
Documentation/translations/it_IT/kernel-hacking/locking.rst | 14 +++++-----
Documentation/translations/zh_CN/core-api/local_ops.rst | 2 +-
arch/arm/mach-spear/time.c | 8 +++---
drivers/bluetooth/hci_qca.c | 10 +++++--
drivers/char/tpm/tpm-dev-common.c | 4 +--
drivers/clocksource/arm_arch_timer.c | 12 ++++-----
drivers/clocksource/timer-sp804.c | 6 ++---
drivers/staging/wlan-ng/hfa384x_usb.c | 4 +--
drivers/staging/wlan-ng/prism2usb.c | 6 ++---
include/linux/timer.h | 17 ++++++++++--
kernel/time/timer.c | 315 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------------------
net/sunrpc/xprt.c | 2 +-
16 files changed, 322 insertions(+), 101 deletions(-)
From: Florian Westphal <fw(a)strlen.de>
[ Upstream commit 791a615b7ad2258c560f91852be54b0480837c93 ]
The initial buffer has to be inited to all-ones, but it must restrict
it to the size of the first field, not the total field size.
After each round in the map search step, the result and the fill map
are swapped, so if we have a set where f->bsize of the first element
is smaller than m->bsize_max, those one-bits are leaked into future
rounds result map.
This makes pipapo find an incorrect matching results for sets where
first field size is not the largest.
Followup patch adds a test case to nft_concat_range.sh selftest script.
Thanks to Stefano Brivio for pointing out that we need to zero out
the remainder explicitly, only correcting memset() argument isn't enough.
Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges")
Reported-by: Yi Chen <yiche(a)redhat.com>
Cc: Stefano Brivio <sbrivio(a)redhat.com>
Signed-off-by: Florian Westphal <fw(a)strlen.de>
Reviewed-by: Stefano Brivio <sbrivio(a)redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
Signed-off-by: Nazar Kalashnikov <sivartiwe(a)gmail.com>
---
Backport fix for CVE-2024-57947
net/netfilter/nft_set_pipapo.c | 4 ++--
net/netfilter/nft_set_pipapo.h | 21 +++++++++++++++++++++
net/netfilter/nft_set_pipapo_avx2.c | 10 ++++++----
3 files changed, 29 insertions(+), 6 deletions(-)
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index ce617f6a215f..6813ff660b72 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -432,7 +432,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
res_map = scratch->map + (map_index ? m->bsize_max : 0);
fill_map = scratch->map + (map_index ? 0 : m->bsize_max);
- memset(res_map, 0xff, m->bsize_max * sizeof(*res_map));
+ pipapo_resmap_init(m, res_map);
nft_pipapo_for_each_field(f, i, m) {
bool last = i == m->field_count - 1;
@@ -536,7 +536,7 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net,
goto out;
}
- memset(res_map, 0xff, m->bsize_max * sizeof(*res_map));
+ pipapo_resmap_init(m, res_map);
nft_pipapo_for_each_field(f, i, m) {
bool last = i == m->field_count - 1;
diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h
index 2e709ae01924..8f8f58af4e34 100644
--- a/net/netfilter/nft_set_pipapo.h
+++ b/net/netfilter/nft_set_pipapo.h
@@ -287,4 +287,25 @@ static u64 pipapo_estimate_size(const struct nft_set_desc *desc)
return size;
}
+/**
+ * pipapo_resmap_init() - Initialise result map before first use
+ * @m: Matching data, including mapping table
+ * @res_map: Result map
+ *
+ * Initialize all bits covered by the first field to one, so that after
+ * the first step, only the matching bits of the first bit group remain.
+ *
+ * If other fields have a large bitmap, set remainder of res_map to 0.
+ */
+static inline void pipapo_resmap_init(const struct nft_pipapo_match *m, unsigned long *res_map)
+{
+ const struct nft_pipapo_field *f = m->f;
+ int i;
+
+ for (i = 0; i < f->bsize; i++)
+ res_map[i] = ULONG_MAX;
+
+ for (i = f->bsize; i < m->bsize_max; i++)
+ res_map[i] = 0ul;
+}
#endif /* _NFT_SET_PIPAPO_H */
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index 0a23d297084d..81e6d12ab4cd 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -1028,6 +1028,7 @@ static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill,
/**
* nft_pipapo_avx2_lookup_slow() - Fallback function for uncommon field sizes
+ * @mdata: Matching data, including mapping table
* @map: Previous match result, used as initial bitmap
* @fill: Destination bitmap to be filled with current match result
* @f: Field, containing lookup and mapping tables
@@ -1043,7 +1044,8 @@ static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill,
* Return: -1 on no match, rule index of match if @last, otherwise first long
* word index to be checked next (i.e. first filled word).
*/
-static int nft_pipapo_avx2_lookup_slow(unsigned long *map, unsigned long *fill,
+static int nft_pipapo_avx2_lookup_slow(const struct nft_pipapo_match *mdata,
+ unsigned long *map, unsigned long *fill,
struct nft_pipapo_field *f, int offset,
const u8 *pkt, bool first, bool last)
{
@@ -1053,7 +1055,7 @@ static int nft_pipapo_avx2_lookup_slow(unsigned long *map, unsigned long *fill,
lt += offset * NFT_PIPAPO_LONGS_PER_M256;
if (first)
- memset(map, 0xff, bsize * sizeof(*map));
+ pipapo_resmap_init(mdata, map);
for (i = offset; i < bsize; i++) {
if (f->bb == 8)
@@ -1181,7 +1183,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
} else if (f->groups == 16) {
NFT_SET_PIPAPO_AVX2_LOOKUP(8, 16);
} else {
- ret = nft_pipapo_avx2_lookup_slow(res, fill, f,
+ ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f,
ret, rp,
first, last);
}
@@ -1197,7 +1199,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
} else if (f->groups == 32) {
NFT_SET_PIPAPO_AVX2_LOOKUP(4, 32);
} else {
- ret = nft_pipapo_avx2_lookup_slow(res, fill, f,
+ ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f,
ret, rp,
first, last);
}
--
2.43.0
From: Nikolay Aleksandrov <razor(a)blackwall.org>
[ Upstream commit 9ec7eb60dcbcb6c41076defbc5df7bbd95ceaba5 ]
Add bond_ether_setup helper which is used to fix ether_setup() calls in the
bonding driver. It takes care of both IFF_MASTER and IFF_SLAVE flags, the
former is always restored and the latter only if it was set.
If the bond enslaves non-ARPHRD_ETHER device (changes its type), then
releases it and enslaves ARPHRD_ETHER device (changes back) then we
use ether_setup() to restore the bond device type but it also resets its
flags and removes IFF_MASTER and IFF_SLAVE[1]. Use the bond_ether_setup
helper to restore both after such transition.
[1] reproduce (nlmon is non-ARPHRD_ETHER):
$ ip l add nlmon0 type nlmon
$ ip l add bond2 type bond mode active-backup
$ ip l set nlmon0 master bond2
$ ip l set nlmon0 nomaster
$ ip l add bond1 type bond
(we use bond1 as ARPHRD_ETHER device to restore bond2's mode)
$ ip l set bond1 master bond2
$ ip l sh dev bond2
37: bond2: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ether be:d7:c5:40:5b:cc brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 1500
(notice bond2's IFF_MASTER is missing)
Fixes: e36b9d16c6a6 ("bonding: clean muticast addresses when device changes type")
Signed-off-by: Nikolay Aleksandrov <razor(a)blackwall.org>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Alexey Panov <apanov(a)astralinux.ru>
---
Backport fix for CVE-2023-53103
Tested with the syzkaller reproducer for
https://syzkaller.appspot.com/bug?extid=9dfc3f3348729cc82277:
the issue triggers on vanilla v5.10.y and no longer reproduces with these
patches applied.
drivers/net/bonding/bond_main.c | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 08bc930afc4c..127242101c8e 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1691,6 +1691,19 @@ void bond_lower_state_changed(struct slave *slave)
netdev_lower_state_changed(slave->dev, &info);
}
+/* The bonding driver uses ether_setup() to convert a master bond device
+ * to ARPHRD_ETHER, that resets the target netdevice's flags so we always
+ * have to restore the IFF_MASTER flag, and only restore IFF_SLAVE if it was set
+ */
+static void bond_ether_setup(struct net_device *bond_dev)
+{
+ unsigned int slave_flag = bond_dev->flags & IFF_SLAVE;
+
+ ether_setup(bond_dev);
+ bond_dev->flags |= IFF_MASTER | slave_flag;
+ bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+}
+
/* enslave device <slave> to bond device <master> */
int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
struct netlink_ext_ack *extack)
@@ -1777,10 +1790,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
if (slave_dev->type != ARPHRD_ETHER)
bond_setup_by_slave(bond_dev, slave_dev);
- else {
- ether_setup(bond_dev);
- bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
- }
+ else
+ bond_ether_setup(bond_dev);
call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE,
bond_dev);
--
2.30.2
Changes in v3:
- Fixed patch numbering (previously sent as 2/3 instead of 3/3)
Changes in v2:
- Added a new patch fixing bonding regression, based on the Fixes tag in
c484fcc058ba ("bonding: Fix memory leak when changing bond type to Ethernet")
- Added a cover letter
- No changes in patches 1 and 3
- Retested the reproducer [1]
Tested with the syzkaller reproducer [1].
The issue triggers on vanilla v5.10.y and no longer reproduces with these
patches applied.
Additionally, c484fcc058ba ("bonding: Fix memory leak when changing bond type
to Ethernet") has a Fixes tag pointing to
9ec7eb60dcbc ("bonding: restore IFF_MASTER/SLAVE flags on bond enslave ether
type change"), so it should be ported as well.
[1]: https://syzkaller.appspot.com/bug?extid=9dfc3f3348729cc82277
Ido Schimmel (1):
bonding: Fix memory leak when changing bond type to Ethernet
Nikolay Aleksandrov (2):
bonding: restore IFF_MASTER/SLAVE flags on bond enslave ether type
change
bonding: restore bond's IFF_SLAVE flag if a non-eth dev enslave fails
drivers/net/bonding/bond_main.c | 24 +++++++++++++++++-------
1 file changed, 17 insertions(+), 7 deletions(-)
--
2.39.5
Changes in v2:
- Added a new patch fixing bonding regression, based on the Fixes tag in
c484fcc058ba ("bonding: Fix memory leak when changing bond type to Ethernet")
- Added a cover letter
- No changes in patches 1 and 3
- Retested the reproducer [1]
Tested with the syzkaller reproducer [1].
The issue triggers on vanilla v5.10.y and no longer reproduces with these
patches applied.
Additionally, c484fcc058ba ("bonding: Fix memory leak when changing bond type
to Ethernet") has a Fixes tag pointing to
9ec7eb60dcbc ("bonding: restore IFF_MASTER/SLAVE flags on bond enslave ether
type change"), so it should be ported as well.
[1]: https://syzkaller.appspot.com/bug?extid=9dfc3f3348729cc82277
Ido Schimmel (1):
bonding: Fix memory leak when changing bond type to Ethernet
Nikolay Aleksandrov (2):
bonding: restore IFF_MASTER/SLAVE flags on bond enslave ether type
change
bonding: restore bond's IFF_SLAVE flag if a non-eth dev enslave fails
drivers/net/bonding/bond_main.c | 24 +++++++++++++++++-------
1 file changed, 17 insertions(+), 7 deletions(-)
--
2.39.5
The calculation of bridge window head alignment is done by
calculate_mem_align() [*]. With the default bridge window alignment, it
is used for both head and tail alignment.
The selected head alignment does not always result in tight-fitting
resources (gap at d4f00000-d4ffffff):
d4800000-dbffffff : PCI Bus 0000:06
d4800000-d48fffff : PCI Bus 0000:07
d4800000-d4803fff : 0000:07:00.0
d4800000-d4803fff : nvme
d4900000-d49fffff : PCI Bus 0000:0a
d4900000-d490ffff : 0000:0a:00.0
d4900000-d490ffff : r8169
d4910000-d4913fff : 0000:0a:00.0
d4a00000-d4cfffff : PCI Bus 0000:0b
d4a00000-d4bfffff : 0000:0b:00.0
d4a00000-d4bfffff : 0000:0b:00.0
d4c00000-d4c07fff : 0000:0b:00.0
d4d00000-d4dfffff : PCI Bus 0000:15
d4d00000-d4d07fff : 0000:15:00.0
d4d00000-d4d07fff : xhci-hcd
d4e00000-d4efffff : PCI Bus 0000:16
d4e00000-d4e7ffff : 0000:16:00.0
d4e80000-d4e803ff : 0000:16:00.0
d4e80000-d4e803ff : ahci
d5000000-dbffffff : PCI Bus 0000:0c
This has not been caused problems (for years) with the default bridge
window tail alignment that grossly over-estimates the required tail
alignment leaving more tail room than necessary. With the introduction
of relaxed tail alignment that leaves no extra tail room whatsoever,
any gaps will immediately turn into assignment failures.
Introduce head alignment calculation that ensures no gaps are left and
apply the new approach when using relaxed alignment.
([*] I don't understand the algorithm in calculate_mem_align().)
Fixes: 5d0a8965aea9 ("[PATCH] 2.5.14: New PCI allocation code (alpha, arm, parisc) [2/2]")
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220775
Reported-by: Malte Schröder <malte+lkml(a)tnxip.de>
Tested-by: Malte Schröder <malte+lkml(a)tnxip.de>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
---
Little annoyingly, there's difference in what aligns array contains
between the legacy alignment approach (which I dare not to touch as I
really don't understand what the algorithm tries to do) and this new
head aligment algorithm, both consuming stack space. After making the
new approach the only available approach in the follow-up patch, only
one array remains (however, that follow-up change is also somewhat
riskier when it comes to regressions).
That being said, the new head alignment could work with the same aligns
array as the legacy approach, it just won't necessarily produce an
optimal (the smallest possible) head alignment when if (r_size <=
align) condition is used. Just let me know if that approach is
preferred (to save some stack space).
---
drivers/pci/setup-bus.c | 53 ++++++++++++++++++++++++++++++++++-------
1 file changed, 44 insertions(+), 9 deletions(-)
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 70d021ffb486..93f6b0750174 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -1224,6 +1224,45 @@ static inline resource_size_t calculate_mem_align(resource_size_t *aligns,
return min_align;
}
+/*
+ * Calculate bridge window head alignment that leaves no gaps in between
+ * resources.
+ */
+static resource_size_t calculate_head_align(resource_size_t *aligns,
+ int max_order)
+{
+ resource_size_t head_align = 1;
+ resource_size_t remainder = 0;
+ int order;
+
+ /* Take the largest alignment as the starting point. */
+ head_align <<= max_order + __ffs(SZ_1M);
+
+ for (order = max_order - 1; order >= 0; order--) {
+ resource_size_t align1 = 1;
+
+ align1 <<= order + __ffs(SZ_1M);
+
+ /*
+ * Account smaller resources with alignment < max_order that
+ * could be used to fill head room if alignment less than
+ * max_order is used.
+ */
+ remainder += aligns[order];
+
+ /*
+ * Test if head fill is enough to satisfy the alignment of
+ * the larger resources after reducing the alignment.
+ */
+ while ((head_align > align1) && (remainder >= head_align / 2)) {
+ head_align /= 2;
+ remainder -= head_align;
+ }
+ }
+
+ return head_align;
+}
+
/**
* pbus_upstream_space_available - Check no upstream resource limits allocation
* @bus: The bus
@@ -1311,13 +1350,13 @@ static void pbus_size_mem(struct pci_bus *bus, unsigned long type,
{
struct pci_dev *dev;
resource_size_t min_align, win_align, align, size, size0, size1 = 0;
- resource_size_t aligns[28]; /* Alignments from 1MB to 128TB */
+ resource_size_t aligns[28] = {}; /* Alignments from 1MB to 128TB */
+ resource_size_t aligns2[28] = {};/* Alignments from 1MB to 128TB */
int order, max_order;
struct resource *b_res = pbus_select_window_for_type(bus, type);
resource_size_t children_add_size = 0;
resource_size_t children_add_align = 0;
resource_size_t add_align = 0;
- resource_size_t relaxed_align;
resource_size_t old_size;
if (!b_res)
@@ -1327,7 +1366,6 @@ static void pbus_size_mem(struct pci_bus *bus, unsigned long type,
if (b_res->parent)
return;
- memset(aligns, 0, sizeof(aligns));
max_order = 0;
size = 0;
@@ -1378,6 +1416,7 @@ static void pbus_size_mem(struct pci_bus *bus, unsigned long type,
*/
if (r_size <= align)
aligns[order] += align;
+ aligns2[order] += align;
if (order > max_order)
max_order = order;
@@ -1402,9 +1441,7 @@ static void pbus_size_mem(struct pci_bus *bus, unsigned long type,
if (bus->self && size0 &&
!pbus_upstream_space_available(bus, b_res, size0, min_align)) {
- relaxed_align = 1ULL << (max_order + __ffs(SZ_1M));
- relaxed_align = max(relaxed_align, win_align);
- min_align = min(min_align, relaxed_align);
+ min_align = calculate_head_align(aligns2, max_order);
size0 = calculate_memsize(size, min_size, 0, 0, old_size, win_align);
resource_set_range(b_res, min_align, size0);
pci_info(bus->self, "bridge window %pR to %pR requires relaxed alignment rules\n",
@@ -1418,9 +1455,7 @@ static void pbus_size_mem(struct pci_bus *bus, unsigned long type,
if (bus->self && size1 &&
!pbus_upstream_space_available(bus, b_res, size1, add_align)) {
- relaxed_align = 1ULL << (max_order + __ffs(SZ_1M));
- relaxed_align = max(relaxed_align, win_align);
- min_align = min(min_align, relaxed_align);
+ min_align = calculate_head_align(aligns2, max_order);
size1 = calculate_memsize(size, min_size, add_size, children_add_size,
old_size, win_align);
pci_info(bus->self,
--
2.39.5
pbus_size_mem() has two alignments, one for required resources in
min_align and another in add_align that takes account optional
resources.
The add_align is applied to the bridge window through the realloc_head
list. It can happen, however, that add_align is larger than min_align
but calculated size1 and size0 are equal due to extra tailroom (e.g.,
hotplug reservation, tail alignment), and therefore no entry is created
to the realloc_head list. Without the bridge appearing in the realloc
head, add_align is lost when pbus_size_mem() returns.
The problem is visible in this log for 0000:05:00.0 which lacks
add_size ... add_align ... line that would indicate it was added into
the realloc_head list:
pci 0000:05:00.0: PCI bridge to [bus 06-16]
...
pci 0000:06:00.0: bridge window [mem 0x00100000-0x001fffff] to [bus 07] requires relaxed alignment rules
pci 0000:06:06.0: bridge window [mem 0x00100000-0x001fffff] to [bus 0a] requires relaxed alignment rules
pci 0000:06:07.0: bridge window [mem 0x00100000-0x003fffff] to [bus 0b] requires relaxed alignment rules
pci 0000:06:08.0: bridge window [mem 0x00800000-0x00ffffff 64bit pref] to [bus 0c-14] requires relaxed alignment rules
pci 0000:06:08.0: bridge window [mem 0x01000000-0x057fffff] to [bus 0c-14] requires relaxed alignment rules
pci 0000:06:08.0: bridge window [mem 0x01000000-0x057fffff] to [bus 0c-14] requires relaxed alignment rules
pci 0000:06:08.0: bridge window [mem 0x01000000-0x057fffff] to [bus 0c-14] add_size 100000 add_align 1000000
pci 0000:06:0c.0: bridge window [mem 0x00100000-0x001fffff] to [bus 15] requires relaxed alignment rules
pci 0000:06:0d.0: bridge window [mem 0x00100000-0x001fffff] to [bus 16] requires relaxed alignment rules
pci 0000:06:0d.0: bridge window [mem 0x00100000-0x001fffff] to [bus 16] requires relaxed alignment rules
pci 0000:05:00.0: bridge window [mem 0xd4800000-0xd97fffff]: assigned
pci 0000:05:00.0: bridge window [mem 0x1060000000-0x10607fffff 64bit pref]: assigned
pci 0000:06:08.0: bridge window [mem size 0x04900000]: can't assign; no space
pci 0000:06:08.0: bridge window [mem size 0x04900000]: failed to assign
While this bug itself seems old, it has likely become more visible
after the relaxed tail alignment that does not grossly overestimate the
size needed for the bridge window.
Make sure add_align > min_align too results in adding an entry into the
realloc head list. In addition, add handling to the cases where
add_size is zero while only alignment differs.
Fixes: d74b9027a4da ("PCI: Consider additional PF's IOV BAR alignment in sizing and assigning")
Reported-by: Malte Schröder <malte+lkml(a)tnxip.de>
Tested-by: Malte Schröder <malte+lkml(a)tnxip.de>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
---
drivers/pci/setup-bus.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 4a8735b275e4..70d021ffb486 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -14,6 +14,7 @@
* tighter packing. Prefetchable range support.
*/
+#include <linux/align.h>
#include <linux/bitops.h>
#include <linux/init.h>
#include <linux/kernel.h>
@@ -452,7 +453,7 @@ static void reassign_resources_sorted(struct list_head *realloc_head,
"%s %pR: ignoring failure in optional allocation\n",
res_name, res);
}
- } else if (add_size > 0) {
+ } else if (add_size > 0 || !IS_ALIGNED(res->start, align)) {
res->flags |= add_res->flags &
(IORESOURCE_STARTALIGN|IORESOURCE_SIZEALIGN);
if (pci_reassign_resource(dev, idx, add_size, align))
@@ -1438,12 +1439,13 @@ static void pbus_size_mem(struct pci_bus *bus, unsigned long type,
resource_set_range(b_res, min_align, size0);
b_res->flags |= IORESOURCE_STARTALIGN;
- if (bus->self && size1 > size0 && realloc_head) {
+ if (bus->self && realloc_head && (size1 > size0 || add_align > min_align)) {
b_res->flags &= ~IORESOURCE_DISABLED;
- add_to_list(realloc_head, bus->self, b_res, size1-size0, add_align);
+ add_size = size1 > size0 ? size1 - size0 : 0;
+ add_to_list(realloc_head, bus->self, b_res, add_size, add_align);
pci_info(bus->self, "bridge window %pR to %pR add_size %llx add_align %llx\n",
b_res, &bus->busn_res,
- (unsigned long long) (size1 - size0),
+ (unsigned long long) add_size,
(unsigned long long) add_align);
}
}
--
2.39.5
Let's properly adjust BALLOON_MIGRATE like the other drivers.
Note that the INFLATE/DEFLATE events are triggered from the core when
enqueueing/dequeueing pages.
Not completely sure whether really is stable material, but the fix is
trivial so let's just CC stable.
This was found by code inspection.
Fixes: fe030c9b85e6 ("powerpc/pseries/cmm: Implement balloon compaction")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: David Hildenbrand <david(a)redhat.com>
---
arch/powerpc/platforms/pseries/cmm.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
index 688f5fa1c7245..310dab4bc8679 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -532,6 +532,7 @@ static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
spin_lock_irqsave(&b_dev_info->pages_lock, flags);
balloon_page_insert(b_dev_info, newpage);
+ __count_vm_event(BALLOON_MIGRATE);
b_dev_info->isolated_pages--;
spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
--
2.51.0
Backport commit: 094ee6017ea0 ("bonding: check xdp prog when set bond
mode") to 6.6.y to fix a bond issue.
It depends on commit: 22ccb684c1ca ("bonding: return detailed
error when loading native XDP fails)
In order to make a clean backport on stable kernel, backport 2 commits.
Hangbin Liu (1):
bonding: return detailed error when loading native XDP fails
Wang Liang (1):
bonding: check xdp prog when set bond mode
drivers/net/bonding/bond_main.c | 11 +++++++----
drivers/net/bonding/bond_options.c | 3 +++
include/net/bonding.h | 1 +
3 files changed, 11 insertions(+), 4 deletions(-)
--
2.17.1
We always have to initialize the balloon_dev_info, even when compaction
is not configured in: otherwise the containing list and the lock are
left uninitialized.
Likely not many such configs exist in practice, but let's CC stable to
be sure.
This was found by code inspection.
Fixes: fe030c9b85e6 ("powerpc/pseries/cmm: Implement balloon compaction")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: David Hildenbrand <david(a)redhat.com>
---
arch/powerpc/platforms/pseries/cmm.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
index 0823fa2da1516..688f5fa1c7245 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -550,7 +550,6 @@ static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
static void cmm_balloon_compaction_init(void)
{
- balloon_devinfo_init(&b_dev_info);
b_dev_info.migratepage = cmm_migratepage;
}
#else /* CONFIG_BALLOON_COMPACTION */
@@ -572,6 +571,7 @@ static int cmm_init(void)
if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate)
return -EOPNOTSUPP;
+ balloon_devinfo_init(&b_dev_info);
cmm_balloon_compaction_init();
rc = register_oom_notifier(&cmm_oom_nb);
--
2.51.0
From: luoguangfei <15388634752(a)163.com>
[ Upstream commit 01b9128c5db1b470575d07b05b67ffa3cb02ebf1 ]
When removing a macb device, the driver calls phy_exit() before
unregister_netdev(). This leads to a WARN from kernfs:
------------[ cut here ]------------
kernfs: can not remove 'attached_dev', no directory
WARNING: CPU: 1 PID: 27146 at fs/kernfs/dir.c:1683
Call trace:
kernfs_remove_by_name_ns+0xd8/0xf0
sysfs_remove_link+0x24/0x58
phy_detach+0x5c/0x168
phy_disconnect+0x4c/0x70
phylink_disconnect_phy+0x6c/0xc0 [phylink]
macb_close+0x6c/0x170 [macb]
...
macb_remove+0x60/0x168 [macb]
platform_remove+0x5c/0x80
...
The warning happens because the PHY is being exited while the netdev
is still registered. The correct order is to unregister the netdev
before shutting down the PHY and cleaning up the MDIO bus.
Fix this by moving unregister_netdev() ahead of phy_exit() in
macb_remove().
Fixes: 8b73fa3ae02b ("net: macb: Added ZynqMP-specific initialization")
Signed-off-by: luoguangfei <15388634752(a)163.com>
Link: https://patch.msgid.link/20250818232527.1316-1-15388634752@163.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
[ Minor context change fixed. ]
Signed-off-by: Alva Lan <alvalan9(a)foxmail.com>
---
drivers/net/ethernet/cadence/macb_main.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 7593255e6e53..bf7b5d1d5616 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -5182,11 +5182,11 @@ static int macb_remove(struct platform_device *pdev)
if (dev) {
bp = netdev_priv(dev);
+ unregister_netdev(dev);
phy_exit(bp->sgmii_phy);
mdiobus_unregister(bp->mii_bus);
mdiobus_free(bp->mii_bus);
- unregister_netdev(dev);
tasklet_kill(&bp->hresp_err_tasklet);
pm_runtime_disable(&pdev->dev);
pm_runtime_dont_use_autosuspend(&pdev->dev);
--
2.43.0
Good day,
I am reaching out to invite your company to provide a quotation for the products detailed in the attached request. We recognise that some of these items may not align with your usual supplies, but we expect your expertise in sourcing and supplying these products.
Please note that this is a one-time tender, and we require the product and its components delivered on or before the date specified in the attached document. We anticipate your prompt response to enable us to proceed to the next step.
Thank you and looking forward to reviewing your proposal soonest.
Thomas Pierre
Procurement Manager
Phone: +1-713-564-2377
fax: +1-713-969-7350
Email: totalenergiesbids(a)contractor.net
Note: This message, including any attachments, is intended solely for the use of the individual or entity to whom it is addressed and may contain confidential, proprietary, or legally privileged information. Any unauthorized review, use, disclosure, distribution, reproduction, or any form of dissemination of this communication is strictly prohibited.If you are not the intended recipient, please notify the sender immediately, delete this message from your system, and do not retain, copy, or distribute it.Please note that any views or opinions expressed in this communication are those of the sender and do not necessarily reflect the official views or policies of the company.
"Please consider the environment before printing this email."
'tpm2_load_cmd' allocates a tempoary blob indirectly via 'tpm2_key_decode'
but it is not freed in the failure paths. Address this by wrapping the blob
into with a cleanup helper.
Cc: stable(a)vger.kernel.org # v5.13+
Fixes: f2219745250f ("security: keys: trusted: use ASN.1 TPM2 key format for the blobs")
Signed-off-by: Jarkko Sakkinen <jarkko(a)kernel.org>
---
v9:
- Fixed up the commit message. It was not up to date and referred to wrong
function.
- Simplified the patch considereably. It was not optimally small.
v8:
- No changes.
v7:
- Fix compiler warning.
v6:
- A new patch in this version.
---
security/keys/trusted-keys/trusted_tpm2.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/security/keys/trusted-keys/trusted_tpm2.c b/security/keys/trusted-keys/trusted_tpm2.c
index 4467e880ebd5..225b8c9932bf 100644
--- a/security/keys/trusted-keys/trusted_tpm2.c
+++ b/security/keys/trusted-keys/trusted_tpm2.c
@@ -366,6 +366,7 @@ static int tpm2_load_cmd(struct tpm_chip *chip,
struct trusted_key_options *options,
u32 *blob_handle)
{
+ u8 *blob_ref __free(kfree) = NULL;
struct tpm_buf buf;
unsigned int private_len;
unsigned int public_len;
@@ -379,6 +380,9 @@ static int tpm2_load_cmd(struct tpm_chip *chip,
/* old form */
blob = payload->blob;
payload->old_format = 1;
+ } else {
+ /* Bind for cleanup: */
+ blob_ref = blob;
}
/* new format carries keyhandle but old format doesn't */
@@ -443,8 +447,6 @@ static int tpm2_load_cmd(struct tpm_chip *chip,
(__be32 *) &buf.data[TPM_HEADER_SIZE]);
out:
- if (blob != payload->blob)
- kfree(blob);
tpm_buf_destroy(&buf);
return tpm_ret_to_err(rc);
--
2.52.0
From: "Kirill A. Shutemov" <kirill.shutemov(a)linux.intel.com>
commit 58a039e679fe72bd0efa8b2abe669a7914bb4429 upstream.
Commit ea7e2d5e49c0 ("mm: call the security_mmap_file() LSM hook in
remap_file_pages()") fixed a security issue, it added an LSM check when
trying to remap file pages, so that LSMs have the opportunity to evaluate
such action like for other memory operations such as mmap() and
mprotect().
However, that commit called security_mmap_file() inside the mmap_lock
lock, while the other calls do it before taking the lock, after commit
8b3ec6814c83 ("take security_mmap_file() outside of ->mmap_sem").
This caused lock inversion issue with IMA which was taking the mmap_lock
and i_mutex lock in the opposite way when the remap_file_pages() system
call was called.
Solve the issue by splitting the critical region in remap_file_pages() in
two regions: the first takes a read lock of mmap_lock, retrieves the VMA
and the file descriptor associated, and calculates the 'prot' and 'flags'
variables; the second takes a write lock on mmap_lock, checks that the VMA
flags and the VMA file descriptor are the same as the ones obtained in the
first critical region (otherwise the system call fails), and calls
do_mmap().
In between, after releasing the read lock and before taking the write
lock, call security_mmap_file(), and solve the lock inversion issue.
Link: https://lkml.kernel.org/r/20241018161415.3845146-1-roberto.sassu@huaweiclou…
Fixes: ea7e2d5e49c0 ("mm: call the security_mmap_file() LSM hook in remap_file_pages()")
Signed-off-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Signed-off-by: Roberto Sassu <roberto.sassu(a)huawei.com>
Reported-by: syzbot+1cd571a672400ef3a930(a)syzkaller.appspotmail.com
Closes: https://lore.kernel.org/linux-security-module/66f7b10e.050a0220.46d20.0036.…
Tested-by: Roberto Sassu <roberto.sassu(a)huawei.com>
Reviewed-by: Roberto Sassu <roberto.sassu(a)huawei.com>
Reviewed-by: Jann Horn <jannh(a)google.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett(a)Oracle.com>
Reviewed-by: Paul Moore <paul(a)paul-moore.com>
Tested-by: syzbot+1cd571a672400ef3a930(a)syzkaller.appspotmail.com
Cc: Jarkko Sakkinen <jarkko(a)kernel.org>
Cc: Dmitry Kasatkin <dmitry.kasatkin(a)gmail.com>
Cc: Eric Snowberg <eric.snowberg(a)oracle.com>
Cc: James Morris <jmorris(a)namei.org>
Cc: Mimi Zohar <zohar(a)linux.ibm.com>
Cc: "Serge E. Hallyn" <serge(a)hallyn.com>
Cc: Shu Han <ebpqwerty472123(a)gmail.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Alexey Panov <apanov(a)astralinux.ru>
---
Tested with the syzkaller reproducers for
https://syzkaller.appspot.com/bug?extid=1cd571a672400ef3a930https://syzkaller.appspot.com/bug?extid=b02bbe0ff80a09a08c1b:
the issue triggers on vanilla v6.1.y and no longer reproduces with this
patch applied.
mm/mmap.c | 69 +++++++++++++++++++++++++++++++++++++++++--------------
1 file changed, 52 insertions(+), 17 deletions(-)
diff --git a/mm/mmap.c b/mm/mmap.c
index f93526b3df0c..5672d22be7e7 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2974,6 +2974,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
unsigned long populate = 0;
unsigned long ret = -EINVAL;
struct file *file;
+ vm_flags_t vm_flags;
pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/mm/remap_file_pages.rst.\n",
current->comm, current->pid);
@@ -2990,12 +2991,60 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
if (pgoff + (size >> PAGE_SHIFT) < pgoff)
return ret;
- if (mmap_write_lock_killable(mm))
+ if (mmap_read_lock_killable(mm))
return -EINTR;
+ /*
+ * Look up VMA under read lock first so we can perform the security
+ * without holding locks (which can be problematic). We reacquire a
+ * write lock later and check nothing changed underneath us.
+ */
vma = vma_lookup(mm, start);
- if (!vma || !(vma->vm_flags & VM_SHARED))
+ if (!vma || !(vma->vm_flags & VM_SHARED)) {
+ mmap_read_unlock(mm);
+ return -EINVAL;
+ }
+
+ prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
+ prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0;
+ prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0;
+
+ flags &= MAP_NONBLOCK;
+ flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
+ if (vma->vm_flags & VM_LOCKED)
+ flags |= MAP_LOCKED;
+
+ /* Save vm_flags used to calculate prot and flags, and recheck later. */
+ vm_flags = vma->vm_flags;
+ file = get_file(vma->vm_file);
+
+ mmap_read_unlock(mm);
+
+ /* Call outside mmap_lock to be consistent with other callers. */
+ ret = security_mmap_file(file, prot, flags);
+ if (ret) {
+ fput(file);
+ return ret;
+ }
+
+ ret = -EINVAL;
+
+ /* OK security check passed, take write lock + let it rip. */
+ if (mmap_write_lock_killable(mm)) {
+ fput(file);
+ return -EINTR;
+ }
+
+ vma = vma_lookup(mm, start);
+
+ if (!vma)
+ goto out;
+
+ /* Make sure things didn't change under us. */
+ if (vma->vm_flags != vm_flags)
+ goto out;
+ if (vma->vm_file != file)
goto out;
if (start + size > vma->vm_end) {
@@ -3023,25 +3072,11 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
goto out;
}
- prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
- prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0;
- prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0;
-
- flags &= MAP_NONBLOCK;
- flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
- if (vma->vm_flags & VM_LOCKED)
- flags |= MAP_LOCKED;
-
- file = get_file(vma->vm_file);
- ret = security_mmap_file(vma->vm_file, prot, flags);
- if (ret)
- goto out_fput;
ret = do_mmap(vma->vm_file, start, size,
prot, flags, pgoff, &populate, NULL);
-out_fput:
- fput(file);
out:
mmap_write_unlock(mm);
+ fput(file);
if (populate)
mm_populate(ret, populate);
if (!IS_ERR_VALUE(ret))
--
2.30.2
The patch titled
Subject: arm64: kernel: initialize missing kexec_buf->random field
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
arm64-kernel-initialize-missing-kexec_buf-random-field.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Yeoreum Yun <yeoreum.yun(a)arm.com>
Subject: arm64: kernel: initialize missing kexec_buf->random field
Date: Thu, 27 Nov 2025 18:26:44 +0000
Commit bf454ec31add ("kexec_file: allow to place kexec_buf randomly")
introduced the kexec_buf->random field to enable random placement of
kexec_buf.
However, this field was never properly initialized for kexec images that
do not need to be placed randomly, leading to the following UBSAN warning:
[ +0.364528] ------------[ cut here ]------------
[ +0.000019] UBSAN: invalid-load in ./include/linux/kexec.h:210:12
[ +0.000131] load of value 2 is not a valid value for type 'bool' (aka '_Bool')
[ +0.000003] CPU: 4 UID: 0 PID: 927 Comm: kexec Not tainted 6.18.0-rc7+ #3 PREEMPT(full)
[ +0.000002] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015
[ +0.000000] Call trace:
[ +0.000001] show_stack+0x24/0x40 (C)
[ +0.000006] __dump_stack+0x28/0x48
[ +0.000002] dump_stack_lvl+0x7c/0xb0
[ +0.000002] dump_stack+0x18/0x34
[ +0.000001] ubsan_epilogue+0x10/0x50
[ +0.000002] __ubsan_handle_load_invalid_value+0xc8/0xd0
[ +0.000003] locate_mem_hole_callback+0x28c/0x2a0
[ +0.000003] kexec_locate_mem_hole+0xf4/0x2f0
[ +0.000001] kexec_add_buffer+0xa8/0x178
[ +0.000002] image_load+0xf0/0x258
[ +0.000001] __arm64_sys_kexec_file_load+0x510/0x718
[ +0.000002] invoke_syscall+0x68/0xe8
[ +0.000001] el0_svc_common+0xb0/0xf8
[ +0.000002] do_el0_svc+0x28/0x48
[ +0.000001] el0_svc+0x40/0xe8
[ +0.000002] el0t_64_sync_handler+0x84/0x140
[ +0.000002] el0t_64_sync+0x1bc/0x1c0
To address this, initialise kexec_buf->random field properly.
Link: https://lkml.kernel.org/r/20251127182644.1577592-1-yeoreum.yun@arm.com
Fixes: bf454ec31add ("kexec_file: allow to place kexec_buf randomly")
Signed-off-by: Yeoreum Yun <yeoreum.yun(a)arm.com>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: Breno Leitao <leitao(a)debian.org>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Coiby Xu <coxu(a)redhat.com>
Cc: levi.yun <yeoreum.yun(a)arm.com>
Cc: Will Deacon <will(a)kernel.org>
Cc: Jan Pazdziora <jpazdziora(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/arm64/kernel/kexec_image.c | 3 +++
arch/arm64/kernel/machine_kexec_file.c | 6 +++++-
2 files changed, 8 insertions(+), 1 deletion(-)
--- a/arch/arm64/kernel/kexec_image.c~arm64-kernel-initialize-missing-kexec_buf-random-field
+++ a/arch/arm64/kernel/kexec_image.c
@@ -76,6 +76,9 @@ static void *image_load(struct kimage *i
kbuf.buf_min = 0;
kbuf.buf_max = ULONG_MAX;
kbuf.top_down = false;
+#ifdef CONFIG_CRASH_DUMP
+ kbuf.random = false;
+#endif
kbuf.buffer = kernel;
kbuf.bufsz = kernel_len;
--- a/arch/arm64/kernel/machine_kexec_file.c~arm64-kernel-initialize-missing-kexec_buf-random-field
+++ a/arch/arm64/kernel/machine_kexec_file.c
@@ -94,7 +94,11 @@ int load_other_segments(struct kimage *i
char *initrd, unsigned long initrd_len,
char *cmdline)
{
- struct kexec_buf kbuf = {};
+ struct kexec_buf kbuf = {
+#ifdef CONFIG_CRASH_DUMP
+ .random = false,
+#endif
+ };
void *dtb = NULL;
unsigned long initrd_load_addr = 0, dtb_len,
orig_segments = image->nr_segments;
_
Patches currently in -mm which might be from yeoreum.yun(a)arm.com are
arm64-kernel-initialize-missing-kexec_buf-random-field.patch
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x ae155060247be8dcae3802a95bd1bdf93ab3215d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112433-making-debatable-abfe@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ae155060247be8dcae3802a95bd1bdf93ab3215d Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni(a)redhat.com>
Date: Tue, 18 Nov 2025 08:20:24 +0100
Subject: [PATCH] mptcp: fix duplicate reset on fastclose
The CI reports sporadic failures of the fastclose self-tests. The root
cause is a duplicate reset, not carrying the relevant MPTCP option.
In the failing scenario the bad reset is received by the peer before
the fastclose one, preventing the reception of the latter.
Indeed there is window of opportunity at fastclose time for the
following race:
mptcp_do_fastclose
__mptcp_close_ssk
__tcp_close()
tcp_set_state() [1]
tcp_send_active_reset() [2]
After [1] the stack will send reset to in-flight data reaching the now
closed port. Such reset may race with [2].
Address the issue explicitly sending a single reset on fastclose before
explicitly moving the subflow to close status.
Fixes: d21f83485518 ("mptcp: use fastclose on more edge scenarios")
Cc: stable(a)vger.kernel.org
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/596
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Reviewed-by: Geliang Tang <geliang(a)kernel.org>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Link: https://patch.msgid.link/20251118-net-mptcp-misc-fixes-6-18-rc6-v1-6-806d37…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index c59246c1fde6..a70267a74e3c 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2409,7 +2409,6 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)
/* flags for __mptcp_close_ssk() */
#define MPTCP_CF_PUSH BIT(1)
-#define MPTCP_CF_FASTCLOSE BIT(2)
/* be sure to send a reset only if the caller asked for it, also
* clean completely the subflow status when the subflow reaches
@@ -2420,7 +2419,7 @@ static void __mptcp_subflow_disconnect(struct sock *ssk,
unsigned int flags)
{
if (((1 << ssk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
- (flags & MPTCP_CF_FASTCLOSE)) {
+ subflow->send_fastclose) {
/* The MPTCP code never wait on the subflow sockets, TCP-level
* disconnect should never fail
*/
@@ -2467,14 +2466,8 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
- if ((flags & MPTCP_CF_FASTCLOSE) && !__mptcp_check_fallback(msk)) {
- /* be sure to force the tcp_close path
- * to generate the egress reset
- */
- ssk->sk_lingertime = 0;
- sock_set_flag(ssk, SOCK_LINGER);
- subflow->send_fastclose = 1;
- }
+ if (subflow->send_fastclose && ssk->sk_state != TCP_CLOSE)
+ tcp_set_state(ssk, TCP_CLOSE);
need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk);
if (!dispose_it) {
@@ -2779,9 +2772,26 @@ static void mptcp_do_fastclose(struct sock *sk)
struct mptcp_sock *msk = mptcp_sk(sk);
mptcp_set_state(sk, TCP_CLOSE);
- mptcp_for_each_subflow_safe(msk, subflow, tmp)
- __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow),
- subflow, MPTCP_CF_FASTCLOSE);
+
+ /* Explicitly send the fastclose reset as need */
+ if (__mptcp_check_fallback(msk))
+ return;
+
+ mptcp_for_each_subflow_safe(msk, subflow, tmp) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ lock_sock(ssk);
+
+ /* Some subflow socket states don't allow/need a reset.*/
+ if ((1 << ssk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
+ goto unlock;
+
+ subflow->send_fastclose = 1;
+ tcp_send_active_reset(ssk, ssk->sk_allocation,
+ SK_RST_REASON_TCP_ABORT_ON_CLOSE);
+unlock:
+ release_sock(ssk);
+ }
}
static void mptcp_worker(struct work_struct *work)
From: "Darrick J. Wong" <djwong(a)kernel.org>
[ Upstream commit f0c2d7d2abca24d19831c99edea458704fac8087 ]
Every now and then, I see the following hang during mount time
quotacheck when running fstests. Turning on KASAN seems to make it
happen somewhat more frequently. I've edited the backtrace for brevity.
XFS (sdd): Quotacheck needed: Please wait.
XFS: Assertion failed: bp->b_flags & _XBF_DELWRI_Q, file: fs/xfs/xfs_buf.c, line: 2411
------------[ cut here ]------------
WARNING: CPU: 0 PID: 1831409 at fs/xfs/xfs_message.c:104 assfail+0x46/0x4a [xfs]
CPU: 0 PID: 1831409 Comm: mount Tainted: G W 5.19.0-rc6-xfsx #rc6 09911566947b9f737b036b4af85e399e4b9aef64
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014
RIP: 0010:assfail+0x46/0x4a [xfs]
Code: a0 8f 41 a0 e8 45 fe ff ff 8a 1d 2c 36 10 00 80 fb 01 76 0f 0f b6 f3 48 c7 c7 c0 f0 4f a0 e8 10 f0 02 e1 80 e3 01 74 02 0f 0b <0f> 0b 5b c3 48 8d 45 10 48 89 e2 4c 89 e6 48 89 1c 24 48 89 44 24
RSP: 0018:ffffc900078c7b30 EFLAGS: 00010246
RAX: 0000000000000000 RBX: ffff8880099ac000 RCX: 000000007fffffff
RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffffa0418fa0
RBP: ffff8880197bc1c0 R08: 0000000000000000 R09: 000000000000000a
R10: 000000000000000a R11: f000000000000000 R12: ffffc900078c7d20
R13: 00000000fffffff5 R14: ffffc900078c7d20 R15: 0000000000000000
FS: 00007f0449903800(0000) GS:ffff88803ec00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00005610ada631f0 CR3: 0000000014dd8002 CR4: 00000000001706f0
Call Trace:
<TASK>
xfs_buf_delwri_pushbuf+0x150/0x160 [xfs 4561f5b32c9bfb874ec98d58d0719464e1f87368]
xfs_qm_flush_one+0xd6/0x130 [xfs 4561f5b32c9bfb874ec98d58d0719464e1f87368]
xfs_qm_dquot_walk.isra.0+0x109/0x1e0 [xfs 4561f5b32c9bfb874ec98d58d0719464e1f87368]
xfs_qm_quotacheck+0x319/0x490 [xfs 4561f5b32c9bfb874ec98d58d0719464e1f87368]
xfs_qm_mount_quotas+0x65/0x2c0 [xfs 4561f5b32c9bfb874ec98d58d0719464e1f87368]
xfs_mountfs+0x6b5/0xab0 [xfs 4561f5b32c9bfb874ec98d58d0719464e1f87368]
xfs_fs_fill_super+0x781/0x990 [xfs 4561f5b32c9bfb874ec98d58d0719464e1f87368]
get_tree_bdev+0x175/0x280
vfs_get_tree+0x1a/0x80
path_mount+0x6f5/0xaa0
__x64_sys_mount+0x103/0x140
do_syscall_64+0x2b/0x80
entry_SYSCALL_64_after_hwframe+0x46/0xb0
I /think/ this can happen if xfs_qm_flush_one is racing with
xfs_qm_dquot_isolate (i.e. dquot reclaim) when the second function has
taken the dquot flush lock but xfs_qm_dqflush hasn't yet locked the
dquot buffer, let alone queued it to the delwri list. In this case,
flush_one will fail to get the dquot flush lock, but it can lock the
incore buffer, but xfs_buf_delwri_pushbuf will then trip over this
ASSERT, which checks that the buffer isn't on a delwri list. The hang
results because the _delwri_submit_buffers ignores non DELWRI_Q buffers,
which means that xfs_buf_iowait waits forever for an IO that has not yet
been scheduled.
AFAICT, a reasonable solution here is to detect a dquot buffer that is
not on a DELWRI list, drop it, and return -EAGAIN to try the flush
again. It's not /that/ big of a deal if quotacheck writes the dquot
buffer repeatedly before we even set QUOTA_CHKD.
Signed-off-by: Darrick J. Wong <djwong(a)kernel.org>
Reviewed-by: Dave Chinner <dchinner(a)redhat.com>
Signed-off-by: Alexey Panov <apanov(a)astralinux.ru>
---
Tested with the syzkaller reproducer for
https://syzkaller.appspot.com/bug?extid=3ae67535b07c92aa02fd:
the issue triggers on vanilla v5.10.y and no longer reproduces with this
patch applied.
fs/xfs/xfs_qm.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 3c17e0c0f816..907819e7873e 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -1245,6 +1245,13 @@ xfs_qm_flush_one(
error = -EINVAL;
goto out_unlock;
}
+
+ if (!(bp->b_flags & _XBF_DELWRI_Q)) {
+ error = -EAGAIN;
+ xfs_buf_relse(bp);
+ goto out_unlock;
+ }
+
xfs_buf_unlock(bp);
xfs_buf_delwri_pushbuf(bp, buffer_list);
--
2.39.5
Initialize eb->vma[].vma pointers to NULL when the eb structure is first
set up.
During the execution of eb_lookup_vmas(), the eb->vma array is
successively filled up with struct eb_vma objects. This process includes
calling eb_add_vma(), which might fail; however, even in the event of
failure, eb->vma[i].vma is set for the currently processed buffer.
If eb_add_vma() fails, eb_lookup_vmas() returns with an error, which
prompts a call to eb_release_vmas() to clean up the mess. Since
eb_lookup_vmas() might fail during processing any (possibly not first)
buffer, eb_release_vmas() checks whether a buffer's vma is NULL to know
at what point did the lookup function fail.
In eb_lookup_vmas(), eb->vma[i].vma is set to NULL if either the helper
function eb_lookup_vma() or eb_validate_vma() fails. eb->vma[i+1].vma is
set to NULL in case i915_gem_object_userptr_submit_init() fails; the
current one needs to be cleaned up by eb_release_vmas() at this point,
so the next one is set. If eb_add_vma() fails, neither the current nor
the next vma is nullified, which is a source of a NULL deref bug
described in [1].
When entering eb_lookup_vmas(), the vma pointers are set to the slab
poison value, instead of NULL. This doesn't matter for the actual
lookup, since it gets overwritten anyway, however the eb_release_vmas()
function only recognizes NULL as the stopping value, hence the pointers
are being nullified as they go in case of intermediate failure. This
patch changes the approach to filling them all with NULL at the start
instead, rather than handling that manually during failure.
Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/15062
Fixes: 544460c33821 ("drm/i915: Multi-BB execbuf")
Reported-by: Gangmin Kim <km.kim1503(a)gmail.com>
Cc: <stable(a)vger.kernel.org> # 5.16.x
Signed-off-by: Krzysztof Niemiec <krzysztof.niemiec(a)intel.com>
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 27 ++++++-------------
1 file changed, 8 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index b057c2fa03a4..02120203af55 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -951,13 +951,13 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
vma = eb_lookup_vma(eb, eb->exec[i].handle);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
- goto err;
+ return err;
}
err = eb_validate_vma(eb, &eb->exec[i], vma);
if (unlikely(err)) {
i915_vma_put(vma);
- goto err;
+ return err;
}
err = eb_add_vma(eb, ¤t_batch, i, vma);
@@ -966,19 +966,8 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
if (i915_gem_object_is_userptr(vma->obj)) {
err = i915_gem_object_userptr_submit_init(vma->obj);
- if (err) {
- if (i + 1 < eb->buffer_count) {
- /*
- * Execbuffer code expects last vma entry to be NULL,
- * since we already initialized this entry,
- * set the next value to NULL or we mess up
- * cleanup handling.
- */
- eb->vma[i + 1].vma = NULL;
- }
-
+ if (err)
return err;
- }
eb->vma[i].flags |= __EXEC_OBJECT_USERPTR_INIT;
eb->args->flags |= __EXEC_USERPTR_USED;
@@ -986,10 +975,6 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
}
return 0;
-
-err:
- eb->vma[i].vma = NULL;
- return err;
}
static int eb_lock_vmas(struct i915_execbuffer *eb)
@@ -3362,6 +3347,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
struct sync_file *out_fence = NULL;
int out_fence_fd = -1;
int err;
+ int i;
BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &
@@ -3375,7 +3361,10 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.exec = exec;
eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1);
- eb.vma[0].vma = NULL;
+
+ for (i = 0; i < args->buffer_count; i++)
+ eb.vma[i].vma = NULL;
+
eb.batch_pool = NULL;
eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
--
2.45.2
Hello,
New build issue found on stable-rc/linux-6.1.y:
---
variable 'clidr' is uninitialized when passed as a const pointer argument here [-Werror,-Wuninitialized-const-pointer] in arch/arm64/kvm/sys_regs.o (arch/arm64/kvm/sys_regs.c) [logspec:kbuild,kbuild.compiler.error]
---
- dashboard: https://d.kernelci.org/i/maestro:20aa3d9f2fb39bcb7ec7ca6a54be07b6b2668acb
- giturl: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
- commit HEAD: 87757e18cdafe3f3dd4fc9b9e920e6416e6d00eb
Please include the KernelCI tag when submitting a fix:
Reported-by: kernelci.org bot <bot(a)kernelci.org>
Log excerpt:
=====================================================
arch/arm64/kvm/sys_regs.c:3002:23: error: variable 'clidr' is uninitialized when passed as a const pointer argument here [-Werror,-Wuninitialized-const-pointer]
3002 | get_clidr_el1(NULL, &clidr); /* Ugly... */
| ^~~~~
CC block/partitions/aix.o
1 error generated.
=====================================================
# Builds where the incident occurred:
## defconfig+allmodconfig on (arm64):
- compiler: clang-21
- config: https://files.kernelci.org/kbuild-clang-21-arm64-allmodconfig-69286db7f5b87…
- dashboard: https://d.kernelci.org/build/maestro:69286db7f5b8743b1f65f467
#kernelci issue maestro:20aa3d9f2fb39bcb7ec7ca6a54be07b6b2668acb
--
This is an experimental report format. Please send feedback in!
Talk to us at kernelci(a)lists.linux.dev
Made with love by the KernelCI team - https://kernelci.org
Function dualshock4_get_calibration_data allocates memory to pointer
buf .However, the function may exit prematurely due to transfer_failure
in this case it does not handle freeing memory.
This patch handles memory deallocation at exit.
Reported-by: syzbot+4f5f81e1456a1f645bf8(a)syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/691560c4.a70a0220.3124cb.0019.GAE@google.com/T/
Tested-by: syzbot+4f5f81e1456a1f645bf8(a)syzkaller.appspotmail.com
Fixes: 947992c7fa9e0 ("HID: playstation: DS4: Fix calibration workaround for clone devices")
Cc: stable(a)vger.kernel.org
Signed-off-by: Eslam Khafagy <eslam.medhat1993(a)gmail.com>
---
v3:
* Address issues reported by checkpatch and re-format commit message
for better readability
* kfree() is safe so no need to check for NULL pointer
v2: https://lore.kernel.org/all/20251116022723.29857-1-eslam.medhat1993@gmail.c…
* Adding tag "Cc: stable(a)vger.kernel.org"
v1: https://lore.kernel.org/all/20251115022323.1395726-1-eslam.medhat1993@gmail…
drivers/hid/hid-playstation.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c
index 128aa6abd10b..05a8522ace4f 100644
--- a/drivers/hid/hid-playstation.c
+++ b/drivers/hid/hid-playstation.c
@@ -1994,9 +1994,6 @@ static int dualshock4_get_calibration_data(struct dualshock4 *ds4)
acc_z_plus = get_unaligned_le16(&buf[31]);
acc_z_minus = get_unaligned_le16(&buf[33]);
- /* Done parsing the buffer, so let's free it. */
- kfree(buf);
-
/*
* Set gyroscope calibration and normalization parameters.
* Data values will be normalized to 1/DS4_GYRO_RES_PER_DEG_S degree/s.
@@ -2043,6 +2040,9 @@ static int dualshock4_get_calibration_data(struct dualshock4 *ds4)
ds4->accel_calib_data[2].sens_denom = range_2g;
transfer_failed:
+ /* First free buf if still allocated */
+ kfree(buf);
+
/*
* Sanity check gyro calibration data. This is needed to prevent crashes
* during report handling of virtual, clone or broken devices not implementing
--
2.43.0
From: Sean Heelan <seanheelan(a)gmail.com>
commit 2fc9feff45d92a92cd5f96487655d5be23fb7e2b upstream.
The sess->user object can currently be in use by another thread, for
example if another connection has sent a session setup request to
bind to the session being free'd. The handler for that connection could
be in the smb2_sess_setup function which makes use of sess->user.
Signed-off-by: Sean Heelan <seanheelan(a)gmail.com>
Acked-by: Namjae Jeon <linkinjeon(a)kernel.org>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
Signed-off-by: Nazar Kalashnikov <sivartiwe(a)gmail.com>
---
v2: Fix duplicate From: header
Backport fix for CVE-2025-37899
fs/smb/server/smb2pdu.c | 4 ----
1 file changed, 4 deletions(-)
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 9f64808c7917..a819f198c333 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -2255,10 +2255,6 @@ int smb2_session_logoff(struct ksmbd_work *work)
sess->state = SMB2_SESSION_EXPIRED;
up_write(&conn->session_lock);
- if (sess->user) {
- ksmbd_free_user(sess->user);
- sess->user = NULL;
- }
ksmbd_all_conn_set_status(sess_id, KSMBD_SESS_NEED_SETUP);
rsp->StructureSize = cpu_to_le16(4);
--
2.43.0
From: Sean Heelan <seanheelan(a)gmail.com>
commit 2fc9feff45d92a92cd5f96487655d5be23fb7e2b upstream.
The sess->user object can currently be in use by another thread, for
example if another connection has sent a session setup request to
bind to the session being free'd. The handler for that connection could
be in the smb2_sess_setup function which makes use of sess->user.
Signed-off-by: Sean Heelan <seanheelan(a)gmail.com>
Acked-by: Namjae Jeon <linkinjeon(a)kernel.org>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
Signed-off-by: Nazar Kalashnikov <sivartiwe(a)gmail.com>
---
v2: Fix duplicate From: header
Backport fix for CVE-2025-37899
fs/smb/server/smb2pdu.c | 4 ----
1 file changed, 4 deletions(-)
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index d2dca5d2f17c..f72ef3fe4968 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -2252,10 +2252,6 @@ int smb2_session_logoff(struct ksmbd_work *work)
sess->state = SMB2_SESSION_EXPIRED;
up_write(&conn->session_lock);
- if (sess->user) {
- ksmbd_free_user(sess->user);
- sess->user = NULL;
- }
ksmbd_all_conn_set_status(sess_id, KSMBD_SESS_NEED_NEGOTIATE);
rsp->StructureSize = cpu_to_le16(4);
--
2.39.2
The i2c regmap allocated during probe is never freed.
Switch to using the device managed allocator so that the regmap is
released on probe failures (e.g. probe deferral) and on driver unbind.
Fixes: 3a2a8cc3fe24 ("hwmon: (max6697) Convert to use regmap")
Cc: stable(a)vger.kernel.org # 6.12
Cc: Guenter Roeck <linux(a)roeck-us.net>
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
drivers/hwmon/max6697.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/hwmon/max6697.c b/drivers/hwmon/max6697.c
index 0735a1d2c20f..6926d787b5ad 100644
--- a/drivers/hwmon/max6697.c
+++ b/drivers/hwmon/max6697.c
@@ -548,7 +548,7 @@ static int max6697_probe(struct i2c_client *client)
struct regmap *regmap;
int err;
- regmap = regmap_init_i2c(client, &max6697_regmap_config);
+ regmap = devm_regmap_init_i2c(client, &max6697_regmap_config);
if (IS_ERR(regmap))
return PTR_ERR(regmap);
--
2.51.2
Hello,
New build issue found on stable-rc/linux-5.4.y:
---
GCC does not allow variable declarations in for loop initializers before C99 [-Wgcc-compat] in mm/mempool.o (mm/mempool.c) [logspec:kbuild,kbuild.compiler.warning]
---
- dashboard: https://d.kernelci.org/i/maestro:d414057925e4dea9704ce677eef188319c8610a4
- giturl: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
- commit HEAD: b11ac6e8f913ac67f9424a5cdd4158b283c0e3cb
Please include the KernelCI tag when submitting a fix:
Reported-by: kernelci.org bot <bot(a)kernelci.org>
Log excerpt:
=====================================================
mm/mempool.c:69:8: warning: GCC does not allow variable declarations in for loop initializers before C99 [-Wgcc-compat]
69 | for (int i = 0; i < (1 << order); i++) {
| ^
CC arch/arm/mach-imx/mach-vpr200.o
mm/mempool.c:71:17: error: implicit declaration of function 'kmap_local_page' [-Werror,-Wimplicit-function-declaration]
71 | void *addr = kmap_local_page(page + i);
| ^
mm/mempool.c:71:17: note: did you mean 'kmap_to_page'?
./include/linux/highmem.h:67:14: note: 'kmap_to_page' declared here
67 | struct page *kmap_to_page(void *addr);
| ^
mm/mempool.c:71:10: error: incompatible integer to pointer conversion initializing 'void *' with an expression of type 'int' [-Wint-conversion]
71 | void *addr = kmap_local_page(page + i);
| ^ ~~~~~~~~~~~~~~~~~~~~~~~~~
mm/mempool.c:74:4: error: implicit declaration of function 'kunmap_local' [-Werror,-Wimplicit-function-declaration]
74 | kunmap_local(addr);
| ^
mm/mempool.c:103:8: warning: GCC does not allow variable declarations in for loop initializers before C99 [-Wgcc-compat]
103 | for (int i = 0; i < (1 << order); i++) {
| ^
mm/mempool.c:105:17: error: implicit declaration of function 'kmap_local_page' [-Werror,-Wimplicit-function-declaration]
105 | void *addr = kmap_local_page(page + i);
| ^
mm/mempool.c:105:10: error: incompatible integer to pointer conversion initializing 'void *' with an expression of type 'int' [-Wint-conversion]
105 | void *addr = kmap_local_page(page + i);
| ^ ~~~~~~~~~~~~~~~~~~~~~~~~~
mm/mempool.c:108:4: error: implicit declaration of function 'kunmap_local' [-Werror,-Wimplicit-function-declaration]
108 | kunmap_local(addr);
| ^
2 warnings and 6 errors generated.
=====================================================
# Builds where the incident occurred:
## defconfig+allmodconfig+CONFIG_FRAME_WARN=2048 on (arm):
- compiler: clang-21
- config: https://files.kernelci.org/kbuild-clang-21-arm-allmodconfig-69286c4df5b8743…
- dashboard: https://d.kernelci.org/build/maestro:69286c4df5b8743b1f65f308
#kernelci issue maestro:d414057925e4dea9704ce677eef188319c8610a4
--
This is an experimental report format. Please send feedback in!
Talk to us at kernelci(a)lists.linux.dev
Made with love by the KernelCI team - https://kernelci.org
Hello,
New build issue found on stable-rc/linux-5.10.y:
---
GCC does not allow variable declarations in for loop initializers before C99 [-Wgcc-compat] in mm/mempool.o (mm/mempool.c) [logspec:kbuild,kbuild.compiler.warning]
---
- dashboard: https://d.kernelci.org/i/maestro:edfc0791be4ae7547a2a17054e9cd0317c106f20
- giturl: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
- commit HEAD: 92a27d160c829ca1d8dd3be92e8e8669620d66d5
Please include the KernelCI tag when submitting a fix:
Reported-by: kernelci.org bot <bot(a)kernelci.org>
Log excerpt:
=====================================================
mm/mempool.c:68:8: warning: GCC does not allow variable declarations in for loop initializers before C99 [-Wgcc-compat]
68 | for (int i = 0; i < (1 << order); i++) {
| ^
mm/mempool.c:70:17: error: implicit declaration of function 'kmap_local_page' [-Werror,-Wimplicit-function-declaration]
70 | void *addr = kmap_local_page(page + i);
| ^
mm/mempool.c:70:17: note: did you mean 'kmap_to_page'?
./include/linux/highmem.h:124:14: note: 'kmap_to_page' declared here
124 | struct page *kmap_to_page(void *addr);
| ^
mm/mempool.c:70:10: error: incompatible integer to pointer conversion initializing 'void *' with an expression of type 'int' [-Wint-conversion]
70 | void *addr = kmap_local_page(page + i);
| ^ ~~~~~~~~~~~~~~~~~~~~~~~~~
mm/mempool.c:73:4: error: implicit declaration of function 'kunmap_local' [-Werror,-Wimplicit-function-declaration]
73 | kunmap_local(addr);
| ^
mm/mempool.c:101:8: warning: GCC does not allow variable declarations in for loop initializers before C99 [-Wgcc-compat]
101 | for (int i = 0; i < (1 << order); i++) {
| ^
mm/mempool.c:103:17: error: implicit declaration of function 'kmap_local_page' [-Werror,-Wimplicit-function-declaration]
103 | void *addr = kmap_local_page(page + i);
| ^
mm/mempool.c:103:10: error: incompatible integer to pointer conversion initializing 'void *' with an expression of type 'int' [-Wint-conversion]
103 | void *addr = kmap_local_page(page + i);
| ^ ~~~~~~~~~~~~~~~~~~~~~~~~~
mm/mempool.c:106:4: error: implicit declaration of function 'kunmap_local' [-Werror,-Wimplicit-function-declaration]
106 | kunmap_local(addr);
| ^
CC fs/notify/fanotify/fanotify.o
2 warnings and 6 errors generated.
=====================================================
# Builds where the incident occurred:
## defconfig+allmodconfig+CONFIG_FRAME_WARN=2048 on (arm):
- compiler: clang-21
- config: https://files.kernelci.org/kbuild-clang-21-arm-allmodconfig-69286cb5f5b8743…
- dashboard: https://d.kernelci.org/build/maestro:69286cb5f5b8743b1f65f372
## i386_defconfig+allmodconfig+CONFIG_FRAME_WARN=2048 on (i386):
- compiler: clang-21
- config: https://files.kernelci.org/kbuild-clang-21-i386-allmodconfig-69286ceff5b874…
- dashboard: https://d.kernelci.org/build/maestro:69286ceff5b8743b1f65f3b6
#kernelci issue maestro:edfc0791be4ae7547a2a17054e9cd0317c106f20
--
This is an experimental report format. Please send feedback in!
Talk to us at kernelci(a)lists.linux.dev
Made with love by the KernelCI team - https://kernelci.org
Hello,
New build issue found on stable-rc/linux-5.15.y:
---
GCC does not allow variable declarations in for loop initializers before C99 [-Werror,-Wgcc-compat] in mm/mempool.o (mm/mempool.c) [logspec:kbuild,kbuild.compiler.error]
---
- dashboard: https://d.kernelci.org/i/maestro:5387ce1530c340d198d1f318d34f9904675295a1
- giturl: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
- commit HEAD: 0001989708674740432a288983eddc1fdad1073b
Please include the KernelCI tag when submitting a fix:
Reported-by: kernelci.org bot <bot(a)kernelci.org>
Log excerpt:
=====================================================
mm/mempool.c:68:8: error: GCC does not allow variable declarations in for loop initializers before C99 [-Werror,-Wgcc-compat]
68 | for (int i = 0; i < (1 << order); i++) {
| ^
mm/mempool.c:101:8: error: GCC does not allow variable declarations in for loop initializers before C99 [-Werror,-Wgcc-compat]
101 | for (int i = 0; i < (1 << order); i++) {
| ^
CC kernel/irq/spurious.o
2 errors generated.
=====================================================
# Builds where the incident occurred:
## defconfig+allmodconfig+CONFIG_FRAME_WARN=2048 on (arm):
- compiler: clang-21
- config: https://files.kernelci.org/kbuild-clang-21-arm-allmodconfig-69286d2ef5b8743…
- dashboard: https://d.kernelci.org/build/maestro:69286d2ef5b8743b1f65f3ee
## i386_defconfig+allmodconfig+CONFIG_FRAME_WARN=2048 on (i386):
- compiler: clang-21
- config: https://files.kernelci.org/kbuild-clang-21-i386-allmodconfig-69286d6ff5b874…
- dashboard: https://d.kernelci.org/build/maestro:69286d6ff5b8743b1f65f41f
#kernelci issue maestro:5387ce1530c340d198d1f318d34f9904675295a1
--
This is an experimental report format. Please send feedback in!
Talk to us at kernelci(a)lists.linux.dev
Made with love by the KernelCI team - https://kernelci.org
Mejora tus contrataciones con PsicoSmart
body {
margin: 0;
padding: 0;
font-family: Arial, Helvetica, sans-serif;
font-size: 14px;
color: #333;
background-color: #ffffff;
}
table {
border-spacing: 0;
width: 100%;
max-width: 600px;
margin: auto;
}
td {
padding: 12px 20px;
}
a {
color: #1a73e8;
text-decoration: none;
}
.footer {
font-size: 12px;
color: #888888;
text-align: center;
}
Evalúa talento de forma objetiva y mejora tus contrataciones con PsicoSmart.
Hola, ,
¿Te ha pasado que un candidato luce perfecto en entrevista, pero en el trabajo no encaja como esperabas?
En selección, confiar solo en la percepción puede llevar a decisiones costosas. Por eso quiero presentarte PsicoSmart, una herramienta creada para que los equipos de Recursos Humanos tomen decisiones más objetivas y acertadas.
Con PsicoSmart puedes:
Aplicar 31 pruebas psicométricas que evalúan liderazgo, honestidad, comunicación e inteligencia.
Validar conocimientos técnicos con más de 2,500 exámenes especializados.
Supervisar la identidad de quien responde mediante captura fotográfica automática durante la evaluación.
Gestionar todo desde una sola plataforma, accesible desde cualquier dispositivo.
Si estás buscando mejorar tus contrataciones, podría ser una muy buena opción. Si quieres conocer más puedes responder este correo o simplemente contactarme, mis datos están abajo.
Saludos,
--------------
Atte.: Valeria Pérez
Ciudad de México: (55) 5018 0565
WhatsApp: +52 33 1607 2089
Si no deseas recibir más correos, haz clic aquí para darte de baja.
Para remover su dirección de esta lista haga <a href="https://s1.arrobamail.com/unsuscribe.php?id=yiwtsrewiswqrwseup">click aquí</a>
Make sure to drop the reference taken when looking up the PMU device and
its regmap.
Note that holding a reference to a device does not prevent its regmap
from going away so there is no point in keeping the reference.
Fixes: 0b7c6075022c ("soc: samsung: exynos-pmu: Add regmap support for SoCs that protect PMU regs")
Cc: stable(a)vger.kernel.org # 6.9
Cc: Peter Griffin <peter.griffin(a)linaro.org>
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
drivers/soc/samsung/exynos-pmu.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/soc/samsung/exynos-pmu.c b/drivers/soc/samsung/exynos-pmu.c
index 22c50ca2aa79..ba4de8194a0e 100644
--- a/drivers/soc/samsung/exynos-pmu.c
+++ b/drivers/soc/samsung/exynos-pmu.c
@@ -346,6 +346,8 @@ struct regmap *exynos_get_pmu_regmap_by_phandle(struct device_node *np,
if (!dev)
return ERR_PTR(-EPROBE_DEFER);
+ put_device(dev);
+
return syscon_node_to_regmap(pmu_np);
}
EXPORT_SYMBOL_GPL(exynos_get_pmu_regmap_by_phandle);
--
2.51.2
From: Nazar Kalashnikov <sivartiwe(a)gmail.com>
From: Sean Heelan <seanheelan(a)gmail.com>
commit 2fc9feff45d92a92cd5f96487655d5be23fb7e2b upstream.
The sess->user object can currently be in use by another thread, for
example if another connection has sent a session setup request to
bind to the session being free'd. The handler for that connection could
be in the smb2_sess_setup function which makes use of sess->user.
Signed-off-by: Sean Heelan <seanheelan(a)gmail.com>
Acked-by: Namjae Jeon <linkinjeon(a)kernel.org>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
Signed-off-by: Nazar Kalashnikov <sivartiwe(a)gmail.com>
---
Backport fix for CVE-2025-37899
fs/smb/server/smb2pdu.c | 4 ----
1 file changed, 4 deletions(-)
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index d2dca5d2f17c..f72ef3fe4968 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -2252,10 +2252,6 @@ int smb2_session_logoff(struct ksmbd_work *work)
sess->state = SMB2_SESSION_EXPIRED;
up_write(&conn->session_lock);
- if (sess->user) {
- ksmbd_free_user(sess->user);
- sess->user = NULL;
- }
ksmbd_all_conn_set_status(sess_id, KSMBD_SESS_NEED_NEGOTIATE);
rsp->StructureSize = cpu_to_le16(4);
--
2.39.2
From: Nazar Kalashnikov <sivartiwe(a)gmail.com>
From: Sean Heelan <seanheelan(a)gmail.com>
commit 2fc9feff45d92a92cd5f96487655d5be23fb7e2b upstream.
The sess->user object can currently be in use by another thread, for
example if another connection has sent a session setup request to
bind to the session being free'd. The handler for that connection could
be in the smb2_sess_setup function which makes use of sess->user.
Signed-off-by: Sean Heelan <seanheelan(a)gmail.com>
Acked-by: Namjae Jeon <linkinjeon(a)kernel.org>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
Signed-off-by: Nazar Kalashnikov <sivartiwe(a)gmail.com>
---
Backport fix for CVE-2025-37899
fs/smb/server/smb2pdu.c | 4 ----
1 file changed, 4 deletions(-)
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 9f64808c7917..a819f198c333 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -2255,10 +2255,6 @@ int smb2_session_logoff(struct ksmbd_work *work)
sess->state = SMB2_SESSION_EXPIRED;
up_write(&conn->session_lock);
- if (sess->user) {
- ksmbd_free_user(sess->user);
- sess->user = NULL;
- }
ksmbd_all_conn_set_status(sess_id, KSMBD_SESS_NEED_SETUP);
rsp->StructureSize = cpu_to_le16(4);
--
2.43.0
From: Quentin Schulz <quentin.schulz(a)cherry.de>
This reverts commit 8240e87f16d17a9592c9d67857a3dcdbcb98f10d.
The original commit claimed that APIO5 IO domain (supplied with
audio-supply) is supplied by RK808-D Buck 4 as stated in the schematics.
The linked PDF has two non-schematics pages where APIO5 indeed is said
to be 1.8V. Reading the actual schematics[1][2][3][4][5][6][7][8], this
is actually wrong as APIO5 is supplied VCC_3V0 which is LDO8 from
RK808-D and is 3.0V instead of 1.8V from vcca1v8_codec.
This fixes the console disappearing in U-Boot, where the Device Tree is
imported from the Linux kernel repo, when the IO domain driver is built,
as reported by Heinrich[9]. As to why this breaks the console while the
serial is not exposed on any of the pins on the bank in the APIO5
domain, that is a well-kept secret by the SoC for now.
The issue "fixed" by the original commit will need to be fixed another
way.
[1] https://dl.radxa.com/rockpi4/docs/hw/rockpi4/4ap/radxa_rock_4ap_v1600_schem…
[2] https://dl.radxa.com/rockpi4/docs/hw/rockpi4/4ap/radxa_rock_4ap_v1730_schem…
[3] https://dl.radxa.com/rockpi4/docs/hw/rockpi4/4bp/radxa_rock_4bp_v1600_schem…
[4] https://dl.radxa.com/rockpi4/docs/hw/rockpi4/4bp/radxa_rock_4bp_v1730_schem…
[5] https://dl.radxa.com/rockpi4/docs/hw/rockpi4/ROCK-4-SE-V1.53-SCH.pdf
[6] https://dl.radxa.com/rockpi4/docs/hw/rockpi4/4b/ROCK_4B_v1.52_SCH.pdf
[7] https://dl.radxa.com/rockpi4/docs/hw/rockpi4/4a/ROCK_4A_V1.52_SCH.pdf
[8] https://dl.radxa.com/rockpi4/docs/hw/rockpi4/rockpi4_v13_sch_20181112.pdf
[9] https://lore.kernel.org/u-boot/e7b7b905-4a6c-4342-b1a5-0ad32a5837cf@gmx.de/
Cc: stable(a)vger.kernel.org
Reported-by: Heinrich Schuchardt <xypron.glpk(a)gmx.de>
Signed-off-by: Quentin Schulz <quentin.schulz(a)cherry.de>
---
Note: I do not own any of the Rock Pi 4 variants so I cannot work on
fixing the original issue report by Alex.
---
arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi
index 046dbe3290178..fda7ea87e4efc 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi
@@ -516,7 +516,7 @@ &i2s2 {
};
&io_domains {
- audio-supply = <&vcca1v8_codec>;
+ audio-supply = <&vcc_3v0>;
bt656-supply = <&vcc_3v0>;
gpio1830-supply = <&vcc_3v0>;
sdmmc-supply = <&vcc_sdio>;
---
base-commit: 765e56e41a5af2d456ddda6cbd617b9d3295ab4e
change-id: 20251127-rock-pi-4-io-domain-apio5-26bc2afa8224
Best regards,
--
Quentin Schulz <quentin.schulz(a)cherry.de>
This reverts commit 25decf0469d4c91d90aa2e28d996aed276bfc622.
This software node change doesn't actually fix any current issues
with the kernel, it is an improvement to the lookup process rather
than fixing a live bug. It also causes a couple of regressions with
shipping laptops, which relied on the label based lookup.
There is a fix for the regressions in mainline, the first 5 patches
of [1]. However, those patches are fairly substantial changes and
given the patch causing the regression doesn't actually fix a bug
it seems better to just revert it in stable.
CC: stable(a)vger.kernel.org # 6.12, 6.17
Link: https://lore.kernel.org/linux-sound/20251120-reset-gpios-swnodes-v7-0-a1004… [1]
Closes: https://github.com/thesofproject/linux/issues/5599
Closes: https://github.com/thesofproject/linux/issues/5603
Acked-by: Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org>
Signed-off-by: Charles Keepax <ckeepax(a)opensource.cirrus.com>
---
I wasn't exactly sure of the proceedure for reverting a patch that was
cherry-picked to stable, so apologies if I have made any mistakes here
but happy to update if necessary.
Thanks,
Charles
drivers/gpio/gpiolib-swnode.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpio/gpiolib-swnode.c b/drivers/gpio/gpiolib-swnode.c
index e3806db1c0e07..f21dbc28cf2c8 100644
--- a/drivers/gpio/gpiolib-swnode.c
+++ b/drivers/gpio/gpiolib-swnode.c
@@ -41,7 +41,7 @@ static struct gpio_device *swnode_get_gpio_device(struct fwnode_handle *fwnode)
!strcmp(gdev_node->name, GPIOLIB_SWNODE_UNDEFINED_NAME))
return ERR_PTR(-ENOENT);
- gdev = gpio_device_find_by_fwnode(fwnode);
+ gdev = gpio_device_find_by_label(gdev_node->name);
return gdev ?: ERR_PTR(-EPROBE_DEFER);
}
--
2.47.3
During system shutdown, KFENCE can cause IPI synchronization issues if
it remains active through the reboot process. To prevent this, register
a reboot notifier that disables KFENCE and cancels any pending timer
work early in the shutdown sequence.
This is only necessary when CONFIG_KFENCE_STATIC_KEYS is enabled, as
this configuration sends IPIs that can interfere with shutdown. Without
static keys, no IPIs are generated and KFENCE can safely remain active.
The notifier uses maximum priority (INT_MAX) to ensure KFENCE shuts
down before other subsystems that might still depend on stable memory
allocation behavior.
This fixes a late kexec CSD lockup[1] when kfence is trying to IPI a CPU
that is busy in a IRQ-disabled context printing characters to the
console.
Link: https://lore.kernel.org/all/sqwajvt7utnt463tzxgwu2yctyn5m6bjwrslsnupfexeml6… [1]
Cc: stable(a)vger.kernel.org
Signed-off-by: Breno Leitao <leitao(a)debian.org>
Reviewed-by: Marco Elver <elver(a)google.com>
Fixes: 0ce20dd84089 ("mm: add Kernel Electric-Fence infrastructure")
---
Changes in v2:
- Adding Fixes: tag and CCing stable (akpm)
- Link to v1: https://patch.msgid.link/20251126-kfence-v1-1-5a6e1d7c681c@debian.org
---
mm/kfence/core.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 727c20c94ac5..162a026871ab 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -26,6 +26,7 @@
#include <linux/panic_notifier.h>
#include <linux/random.h>
#include <linux/rcupdate.h>
+#include <linux/reboot.h>
#include <linux/sched/clock.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
@@ -820,6 +821,25 @@ static struct notifier_block kfence_check_canary_notifier = {
static struct delayed_work kfence_timer;
#ifdef CONFIG_KFENCE_STATIC_KEYS
+static int kfence_reboot_callback(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ /*
+ * Disable kfence to avoid static keys IPI synchronization during
+ * late shutdown/kexec
+ */
+ WRITE_ONCE(kfence_enabled, false);
+ /* Cancel any pending timer work */
+ cancel_delayed_work_sync(&kfence_timer);
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block kfence_reboot_notifier = {
+ .notifier_call = kfence_reboot_callback,
+ .priority = INT_MAX, /* Run early to stop timers ASAP */
+};
+
/* Wait queue to wake up allocation-gate timer task. */
static DECLARE_WAIT_QUEUE_HEAD(allocation_wait);
@@ -901,6 +921,10 @@ static void kfence_init_enable(void)
if (kfence_check_on_panic)
atomic_notifier_chain_register(&panic_notifier_list, &kfence_check_canary_notifier);
+#ifdef CONFIG_KFENCE_STATIC_KEYS
+ register_reboot_notifier(&kfence_reboot_notifier);
+#endif
+
WRITE_ONCE(kfence_enabled, true);
queue_delayed_work(system_unbound_wq, &kfence_timer, 0);
---
base-commit: ab084f0b8d6d2ee4b1c6a28f39a2a7430bdfa7f0
change-id: 20251126-kfence-42c93f9b3979
Best regards,
--
Breno Leitao <leitao(a)debian.org>
Stable team, please backport
8c9006283e4b ("Revert "drm/i915/dp: Reject HBR3 when sink doesn't support TPS4"")
from v6.18-rc1 to v6.15+. It's missing the obvious
Fixes: 584cf613c24a ("drm/i915/dp: Reject HBR3 when sink doesn't support TPS4")
Thanks,
Jani.
--
Jani Nikula, Intel
The patch below does not apply to the 6.17-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.17.y
git checkout FETCH_HEAD
git cherry-pick -x fa759cd75bce5489eed34596daa53f721849a86f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112149-ahoy-manliness-1554@gregkh' --subject-prefix 'PATCH 6.17.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From fa759cd75bce5489eed34596daa53f721849a86f Mon Sep 17 00:00:00 2001
From: Pasha Tatashin <pasha.tatashin(a)soleen.com>
Date: Mon, 20 Oct 2025 20:08:52 -0400
Subject: [PATCH] kho: allocate metadata directly from the buddy allocator
KHO allocates metadata for its preserved memory map using the slab
allocator via kzalloc(). This metadata is temporary and is used by the
next kernel during early boot to find preserved memory.
A problem arises when KFENCE is enabled. kzalloc() calls can be randomly
intercepted by kfence_alloc(), which services the allocation from a
dedicated KFENCE memory pool. This pool is allocated early in boot via
memblock.
When booting via KHO, the memblock allocator is restricted to a "scratch
area", forcing the KFENCE pool to be allocated within it. This creates a
conflict, as the scratch area is expected to be ephemeral and
overwriteable by a subsequent kexec. If KHO metadata is placed in this
KFENCE pool, it leads to memory corruption when the next kernel is loaded.
To fix this, modify KHO to allocate its metadata directly from the buddy
allocator instead of slab.
Link: https://lkml.kernel.org/r/20251021000852.2924827-4-pasha.tatashin@soleen.com
Fixes: fc33e4b44b27 ("kexec: enable KHO support for memory preservation")
Signed-off-by: Pasha Tatashin <pasha.tatashin(a)soleen.com>
Reviewed-by: Pratyush Yadav <pratyush(a)kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org>
Reviewed-by: David Matlack <dmatlack(a)google.com>
Cc: Alexander Graf <graf(a)amazon.com>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: Jason Gunthorpe <jgg(a)ziepe.ca>
Cc: Jonathan Corbet <corbet(a)lwn.net>
Cc: Masahiro Yamada <masahiroy(a)kernel.org>
Cc: Miguel Ojeda <ojeda(a)kernel.org>
Cc: Randy Dunlap <rdunlap(a)infradead.org>
Cc: Samiullah Khawaja <skhawaja(a)google.com>
Cc: Tejun Heo <tj(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 0ceb4e09306c..623bee335383 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -7,6 +7,7 @@
#include <linux/mmzone.h>
#include <linux/topology.h>
#include <linux/alloc_tag.h>
+#include <linux/cleanup.h>
#include <linux/sched.h>
struct vm_area_struct;
@@ -463,4 +464,6 @@ static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp,
/* This should be paired with folio_put() rather than free_contig_range(). */
#define folio_alloc_gigantic(...) alloc_hooks(folio_alloc_gigantic_noprof(__VA_ARGS__))
+DEFINE_FREE(free_page, void *, free_page((unsigned long)_T))
+
#endif /* __LINUX_GFP_H */
diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c
index 9217d2fdd2d3..2a8c20c238a8 100644
--- a/kernel/kexec_handover.c
+++ b/kernel/kexec_handover.c
@@ -142,7 +142,7 @@ static void *xa_load_or_alloc(struct xarray *xa, unsigned long index)
if (res)
return res;
- void *elm __free(kfree) = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ void *elm __free(free_page) = (void *)get_zeroed_page(GFP_KERNEL);
if (!elm)
return ERR_PTR(-ENOMEM);
@@ -348,9 +348,9 @@ static_assert(sizeof(struct khoser_mem_chunk) == PAGE_SIZE);
static struct khoser_mem_chunk *new_chunk(struct khoser_mem_chunk *cur_chunk,
unsigned long order)
{
- struct khoser_mem_chunk *chunk __free(kfree) = NULL;
+ struct khoser_mem_chunk *chunk __free(free_page) = NULL;
- chunk = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ chunk = (void *)get_zeroed_page(GFP_KERNEL);
if (!chunk)
return ERR_PTR(-ENOMEM);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 74207de2ba10c2973334906822dc94d2e859ffc5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112026-substance-senator-8409@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 74207de2ba10c2973334906822dc94d2e859ffc5 Mon Sep 17 00:00:00 2001
From: Kiryl Shutsemau <kas(a)kernel.org>
Date: Mon, 27 Oct 2025 11:56:35 +0000
Subject: [PATCH] mm/memory: do not populate page table entries beyond i_size
Patch series "Fix SIGBUS semantics with large folios", v3.
Accessing memory within a VMA, but beyond i_size rounded up to the next
page size, is supposed to generate SIGBUS.
Darrick reported[1] an xfstests regression in v6.18-rc1. generic/749
failed due to missing SIGBUS. This was caused by my recent changes that
try to fault in the whole folio where possible:
19773df031bc ("mm/fault: try to map the entire file folio in finish_fault()")
357b92761d94 ("mm/filemap: map entire large folio faultaround")
These changes did not consider i_size when setting up PTEs, leading to
xfstest breakage.
However, the problem has been present in the kernel for a long time -
since huge tmpfs was introduced in 2016. The kernel happily maps
PMD-sized folios as PMD without checking i_size. And huge=always tmpfs
allocates PMD-size folios on any writes.
I considered this corner case when I implemented a large tmpfs, and my
conclusion was that no one in their right mind should rely on receiving a
SIGBUS signal when accessing beyond i_size. I cannot imagine how it could
be useful for the workload.
But apparently filesystem folks care a lot about preserving strict SIGBUS
semantics.
Generic/749 was introduced last year with reference to POSIX, but no real
workloads were mentioned. It also acknowledged the tmpfs deviation from
the test case.
POSIX indeed says[3]:
References within the address range starting at pa and
continuing for len bytes to whole pages following the end of an
object shall result in delivery of a SIGBUS signal.
The patchset fixes the regression introduced by recent changes as well as
more subtle SIGBUS breakage due to split failure on truncation.
This patch (of 2):
Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are
supposed to generate SIGBUS.
Recent changes attempted to fault in full folio where possible. They did
not respect i_size, which led to populating PTEs beyond i_size and
breaking SIGBUS semantics.
Darrick reported generic/749 breakage because of this.
However, the problem existed before the recent changes. With huge=always
tmpfs, any write to a file leads to PMD-size allocation. Following the
fault-in of the folio will install PMD mapping regardless of i_size.
Fix filemap_map_pages() and finish_fault() to not install:
- PTEs beyond i_size;
- PMD mappings across i_size;
Make an exception for shmem/tmpfs that for long time intentionally
mapped with PMDs across i_size.
Link: https://lkml.kernel.org/r/20251027115636.82382-1-kirill@shutemov.name
Link: https://lkml.kernel.org/r/20251027115636.82382-2-kirill@shutemov.name
Signed-off-by: Kiryl Shutsemau <kas(a)kernel.org>
Fixes: 6795801366da ("xfs: Support large folios")
Reported-by: "Darrick J. Wong" <djwong(a)kernel.org>
Cc: Al Viro <viro(a)zeniv.linux.org.uk>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: Dave Chinner <david(a)fromorbit.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Rik van Riel <riel(a)surriel.com>
Cc: Shakeel Butt <shakeel.butt(a)linux.dev>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/filemap.c b/mm/filemap.c
index 13f0259d993c..2f1e7e283a51 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3681,7 +3681,8 @@ static struct folio *next_uptodate_folio(struct xa_state *xas,
static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
struct folio *folio, unsigned long start,
unsigned long addr, unsigned int nr_pages,
- unsigned long *rss, unsigned short *mmap_miss)
+ unsigned long *rss, unsigned short *mmap_miss,
+ bool can_map_large)
{
unsigned int ref_from_caller = 1;
vm_fault_t ret = 0;
@@ -3696,7 +3697,7 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
* The folio must not cross VMA or page table boundary.
*/
addr0 = addr - start * PAGE_SIZE;
- if (folio_within_vma(folio, vmf->vma) &&
+ if (can_map_large && folio_within_vma(folio, vmf->vma) &&
(addr0 & PMD_MASK) == ((addr0 + folio_size(folio) - 1) & PMD_MASK)) {
vmf->pte -= start;
page -= start;
@@ -3811,13 +3812,27 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
unsigned long rss = 0;
unsigned int nr_pages = 0, folio_type;
unsigned short mmap_miss = 0, mmap_miss_saved;
+ bool can_map_large;
rcu_read_lock();
folio = next_uptodate_folio(&xas, mapping, end_pgoff);
if (!folio)
goto out;
- if (filemap_map_pmd(vmf, folio, start_pgoff)) {
+ file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1;
+ end_pgoff = min(end_pgoff, file_end);
+
+ /*
+ * Do not allow to map with PTEs beyond i_size and with PMD
+ * across i_size to preserve SIGBUS semantics.
+ *
+ * Make an exception for shmem/tmpfs that for long time
+ * intentionally mapped with PMDs across i_size.
+ */
+ can_map_large = shmem_mapping(mapping) ||
+ file_end >= folio_next_index(folio);
+
+ if (can_map_large && filemap_map_pmd(vmf, folio, start_pgoff)) {
ret = VM_FAULT_NOPAGE;
goto out;
}
@@ -3830,10 +3845,6 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
goto out;
}
- file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1;
- if (end_pgoff > file_end)
- end_pgoff = file_end;
-
folio_type = mm_counter_file(folio);
do {
unsigned long end;
@@ -3850,7 +3861,8 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
else
ret |= filemap_map_folio_range(vmf, folio,
xas.xa_index - folio->index, addr,
- nr_pages, &rss, &mmap_miss);
+ nr_pages, &rss, &mmap_miss,
+ can_map_large);
folio_unlock(folio);
} while ((folio = next_uptodate_folio(&xas, mapping, end_pgoff)) != NULL);
diff --git a/mm/memory.c b/mm/memory.c
index 74b45e258323..b59ae7ce42eb 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -65,6 +65,7 @@
#include <linux/gfp.h>
#include <linux/migrate.h>
#include <linux/string.h>
+#include <linux/shmem_fs.h>
#include <linux/memory-tiers.h>
#include <linux/debugfs.h>
#include <linux/userfaultfd_k.h>
@@ -5501,8 +5502,25 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
return ret;
}
+ if (!needs_fallback && vma->vm_file) {
+ struct address_space *mapping = vma->vm_file->f_mapping;
+ pgoff_t file_end;
+
+ file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
+
+ /*
+ * Do not allow to map with PTEs beyond i_size and with PMD
+ * across i_size to preserve SIGBUS semantics.
+ *
+ * Make an exception for shmem/tmpfs that for long time
+ * intentionally mapped with PMDs across i_size.
+ */
+ needs_fallback = !shmem_mapping(mapping) &&
+ file_end < folio_next_index(folio);
+ }
+
if (pmd_none(*vmf->pmd)) {
- if (folio_test_pmd_mappable(folio)) {
+ if (!needs_fallback && folio_test_pmd_mappable(folio)) {
ret = do_set_pmd(vmf, folio, page);
if (ret != VM_FAULT_FALLBACK)
return ret;
The patch below does not apply to the 6.17-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.17.y
git checkout FETCH_HEAD
git cherry-pick -x fa04f5b60fda62c98a53a60de3a1e763f11feb41
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112037-resurface-backlight-da75@gregkh' --subject-prefix 'PATCH 6.17.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From fa04f5b60fda62c98a53a60de3a1e763f11feb41 Mon Sep 17 00:00:00 2001
From: Kiryl Shutsemau <kas(a)kernel.org>
Date: Mon, 27 Oct 2025 11:56:36 +0000
Subject: [PATCH] mm/truncate: unmap large folio on split failure
Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are
supposed to generate SIGBUS.
This behavior might not be respected on truncation.
During truncation, the kernel splits a large folio in order to reclaim
memory. As a side effect, it unmaps the folio and destroys PMD mappings
of the folio. The folio will be refaulted as PTEs and SIGBUS semantics
are preserved.
However, if the split fails, PMD mappings are preserved and the user will
not receive SIGBUS on any accesses within the PMD.
Unmap the folio on split failure. It will lead to refault as PTEs and
preserve SIGBUS semantics.
Make an exception for shmem/tmpfs that for long time intentionally mapped
with PMDs across i_size.
Link: https://lkml.kernel.org/r/20251027115636.82382-3-kirill@shutemov.name
Fixes: b9a8a4195c7d ("truncate,shmem: Handle truncates that split large folios")
Signed-off-by: Kiryl Shutsemau <kas(a)kernel.org>
Cc: Al Viro <viro(a)zeniv.linux.org.uk>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: "Darrick J. Wong" <djwong(a)kernel.org>
Cc: Dave Chinner <david(a)fromorbit.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Rik van Riel <riel(a)surriel.com>
Cc: Shakeel Butt <shakeel.butt(a)linux.dev>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/truncate.c b/mm/truncate.c
index 9210cf808f5c..3c5a50ae3274 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -177,6 +177,32 @@ int truncate_inode_folio(struct address_space *mapping, struct folio *folio)
return 0;
}
+static int try_folio_split_or_unmap(struct folio *folio, struct page *split_at,
+ unsigned long min_order)
+{
+ enum ttu_flags ttu_flags =
+ TTU_SYNC |
+ TTU_SPLIT_HUGE_PMD |
+ TTU_IGNORE_MLOCK;
+ int ret;
+
+ ret = try_folio_split_to_order(folio, split_at, min_order);
+
+ /*
+ * If the split fails, unmap the folio, so it will be refaulted
+ * with PTEs to respect SIGBUS semantics.
+ *
+ * Make an exception for shmem/tmpfs that for long time
+ * intentionally mapped with PMDs across i_size.
+ */
+ if (ret && !shmem_mapping(folio->mapping)) {
+ try_to_unmap(folio, ttu_flags);
+ WARN_ON(folio_mapped(folio));
+ }
+
+ return ret;
+}
+
/*
* Handle partial folios. The folio may be entirely within the
* range if a split has raced with us. If not, we zero the part of the
@@ -226,7 +252,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
min_order = mapping_min_folio_order(folio->mapping);
split_at = folio_page(folio, PAGE_ALIGN_DOWN(offset) / PAGE_SIZE);
- if (!try_folio_split_to_order(folio, split_at, min_order)) {
+ if (!try_folio_split_or_unmap(folio, split_at, min_order)) {
/*
* try to split at offset + length to make sure folios within
* the range can be dropped, especially to avoid memory waste
@@ -250,13 +276,10 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
if (!folio_trylock(folio2))
goto out;
- /*
- * make sure folio2 is large and does not change its mapping.
- * Its split result does not matter here.
- */
+ /* make sure folio2 is large and does not change its mapping */
if (folio_test_large(folio2) &&
folio2->mapping == folio->mapping)
- try_folio_split_to_order(folio2, split_at2, min_order);
+ try_folio_split_or_unmap(folio2, split_at2, min_order);
folio_unlock(folio2);
out:
From: Franz Schnyder <franz.schnyder(a)toradex.com>
Currently, the PHY only registers the typec orientation switch when it
is built in. If the typec driver is built as a module, the switch
registration is skipped due to the preprocessor condition, causing
orientation detection to fail.
With commit
45fe729be9a6 ("usb: typec: Stub out typec_switch APIs when CONFIG_TYPEC=n")
the preprocessor condition is not needed anymore and the orientation
switch is correctly registered for both built-in and module builds.
Fixes: b58f0f86fd61 ("phy: fsl-imx8mq-usb: add tca function driver for imx95")
Cc: stable(a)vger.kernel.org
Suggested-by: Xu Yang <xu.yang_2(a)nxp.com>
Signed-off-by: Franz Schnyder <franz.schnyder(a)toradex.com>
---
v2: Drop the preprocessor condition after a better suggestion.
Reviewed-by Neil tag not added as patch is different
---
drivers/phy/freescale/phy-fsl-imx8mq-usb.c | 14 --------------
1 file changed, 14 deletions(-)
diff --git a/drivers/phy/freescale/phy-fsl-imx8mq-usb.c b/drivers/phy/freescale/phy-fsl-imx8mq-usb.c
index b94f242420fc..72e8aff38b92 100644
--- a/drivers/phy/freescale/phy-fsl-imx8mq-usb.c
+++ b/drivers/phy/freescale/phy-fsl-imx8mq-usb.c
@@ -124,8 +124,6 @@ struct imx8mq_usb_phy {
static void tca_blk_orientation_set(struct tca_blk *tca,
enum typec_orientation orientation);
-#ifdef CONFIG_TYPEC
-
static int tca_blk_typec_switch_set(struct typec_switch_dev *sw,
enum typec_orientation orientation)
{
@@ -173,18 +171,6 @@ static void tca_blk_put_typec_switch(struct typec_switch_dev *sw)
typec_switch_unregister(sw);
}
-#else
-
-static struct typec_switch_dev *tca_blk_get_typec_switch(struct platform_device *pdev,
- struct imx8mq_usb_phy *imx_phy)
-{
- return NULL;
-}
-
-static void tca_blk_put_typec_switch(struct typec_switch_dev *sw) {}
-
-#endif /* CONFIG_TYPEC */
-
static void tca_blk_orientation_set(struct tca_blk *tca,
enum typec_orientation orientation)
{
--
2.43.0
kstack_offset was previously maintained per-cpu, but this caused a
couple of issues. So let's instead make it per-task.
Issue 1: add_random_kstack_offset() and choose_random_kstack_offset()
expected and required to be called with interrupts and preemption
disabled so that it could manipulate per-cpu state. But arm64, loongarch
and risc-v are calling them with interrupts and preemption enabled. I
don't _think_ this causes any functional issues, but it's certainly
unexpected and could lead to manipulating the wrong cpu's state, which
could cause a minor performance degradation due to bouncing the cache
lines. By maintaining the state per-task those functions can safely be
called in preemptible context.
Issue 2: add_random_kstack_offset() is called before executing the
syscall and expands the stack using a previously chosen rnadom offset.
choose_random_kstack_offset() is called after executing the syscall and
chooses and stores a new random offset for the next syscall. With
per-cpu storage for this offset, an attacker could force cpu migration
during the execution of the syscall and prevent the offset from being
updated for the original cpu such that it is predictable for the next
syscall on that cpu. By maintaining the state per-task, this problem
goes away because the per-task random offset is updated after the
syscall regardless of which cpu it is executing on.
Fixes: 39218ff4c625 ("stack: Optionally randomize kernel stack offset each syscall")
Closes: https://lore.kernel.org/all/dd8c37bc-795f-4c7a-9086-69e584d8ab24@arm.com/
Cc: stable(a)vger.kernel.org
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
---
include/linux/randomize_kstack.h | 26 +++++++++++++++-----------
include/linux/sched.h | 4 ++++
init/main.c | 1 -
kernel/fork.c | 2 ++
4 files changed, 21 insertions(+), 12 deletions(-)
diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h
index 1d982dbdd0d0..089b1432f7e6 100644
--- a/include/linux/randomize_kstack.h
+++ b/include/linux/randomize_kstack.h
@@ -9,7 +9,6 @@
DECLARE_STATIC_KEY_MAYBE(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
randomize_kstack_offset);
-DECLARE_PER_CPU(u32, kstack_offset);
/*
* Do not use this anywhere else in the kernel. This is used here because
@@ -50,15 +49,14 @@ DECLARE_PER_CPU(u32, kstack_offset);
* add_random_kstack_offset - Increase stack utilization by previously
* chosen random offset
*
- * This should be used in the syscall entry path when interrupts and
- * preempt are disabled, and after user registers have been stored to
- * the stack. For testing the resulting entropy, please see:
- * tools/testing/selftests/lkdtm/stack-entropy.sh
+ * This should be used in the syscall entry path after user registers have been
+ * stored to the stack. Preemption may be enabled. For testing the resulting
+ * entropy, please see: tools/testing/selftests/lkdtm/stack-entropy.sh
*/
#define add_random_kstack_offset() do { \
if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
&randomize_kstack_offset)) { \
- u32 offset = raw_cpu_read(kstack_offset); \
+ u32 offset = current->kstack_offset; \
u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset)); \
/* Keep allocation even after "ptr" loses scope. */ \
asm volatile("" :: "r"(ptr) : "memory"); \
@@ -69,9 +67,9 @@ DECLARE_PER_CPU(u32, kstack_offset);
* choose_random_kstack_offset - Choose the random offset for the next
* add_random_kstack_offset()
*
- * This should only be used during syscall exit when interrupts and
- * preempt are disabled. This position in the syscall flow is done to
- * frustrate attacks from userspace attempting to learn the next offset:
+ * This should only be used during syscall exit. Preemption may be enabled. This
+ * position in the syscall flow is done to frustrate attacks from userspace
+ * attempting to learn the next offset:
* - Maximize the timing uncertainty visible from userspace: if the
* offset is chosen at syscall entry, userspace has much more control
* over the timing between choosing offsets. "How long will we be in
@@ -85,14 +83,20 @@ DECLARE_PER_CPU(u32, kstack_offset);
#define choose_random_kstack_offset(rand) do { \
if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
&randomize_kstack_offset)) { \
- u32 offset = raw_cpu_read(kstack_offset); \
+ u32 offset = current->kstack_offset; \
offset = ror32(offset, 5) ^ (rand); \
- raw_cpu_write(kstack_offset, offset); \
+ current->kstack_offset = offset; \
} \
} while (0)
+
+static inline void random_kstack_task_init(struct task_struct *tsk)
+{
+ current->kstack_offset = 0;
+}
#else /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
#define add_random_kstack_offset() do { } while (0)
#define choose_random_kstack_offset(rand) do { } while (0)
+#define random_kstack_task_init(tsk) do { } while (0)
#endif /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b469878de25c..dae227d217ef 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1613,6 +1613,10 @@ struct task_struct {
unsigned long prev_lowest_stack;
#endif
+#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
+ u32 kstack_offset;
+#endif
+
#ifdef CONFIG_X86_MCE
void __user *mce_vaddr;
__u64 mce_kflags;
diff --git a/init/main.c b/init/main.c
index 07a3116811c5..048a62538242 100644
--- a/init/main.c
+++ b/init/main.c
@@ -830,7 +830,6 @@ static inline void initcall_debug_enable(void)
#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
DEFINE_STATIC_KEY_MAYBE_RO(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
randomize_kstack_offset);
-DEFINE_PER_CPU(u32, kstack_offset);
static int __init early_randomize_kstack_offset(char *buf)
{
diff --git a/kernel/fork.c b/kernel/fork.c
index 3da0f08615a9..c0dced542b8a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -95,6 +95,7 @@
#include <linux/thread_info.h>
#include <linux/kstack_erase.h>
#include <linux/kasan.h>
+#include <linux/randomize_kstack.h>
#include <linux/scs.h>
#include <linux/io_uring.h>
#include <linux/bpf.h>
@@ -2191,6 +2192,7 @@ __latent_entropy struct task_struct *copy_process(
if (retval)
goto bad_fork_cleanup_io;
+ random_kstack_task_init(p);
stackleak_task_init(p);
if (pid != &init_struct_pid) {
--
2.43.0
The USB OTG port of the RK3308 exibits a bug when:
- configured as peripheral, and
- used in gadget mode, and
- the USB cable is connected since before booting
The symptom is: about 6 seconds after configuring gadget mode the device is
disconnected and then re-enumerated. This happens only once per boot.
Investigation showed that in this configuration the charger detection code
turns off the PHY after 6 seconds. Patch 1 avoids this when a cable is
connected (VBUS present).
After patch 1 the connection is stable but communication stops after 6
seconds. this is addressed by patch 2.
The topic had been discussed in [0]. Thanks Alan and Minas for the
discussion and Louis for having found the 1st issue, leading to patch 1.
[0] https://lore.kernel.org/lkml/20250414185458.7767aabc@booty/
Luca
Signed-off-by: Luca Ceresoli <luca.ceresoli(a)bootlin.com>
---
Changes in v2:
- Patch 1: Fixed Co-developed-by: and SoB line order
- Added missing Cc: stable(a)vger.kernel.org
- Added Théo's Reviewed-by
- Improved commit message
- Patch 2: trimmed discussion about "there is no disconnection" from
commit message to not distract from the actual issue (writes to
chg_det.opmode)
- Link to v1: https://lore.kernel.org/r/20250722-rk3308-fix-usb-gadget-phy-disconnect-v1-…
---
Louis Chauvet (1):
phy: rockchip: inno-usb2: fix disconnection in gadget mode
Luca Ceresoli (1):
phy: rockchip: inno-usb2: fix communication disruption in gadget mode
drivers/phy/rockchip/phy-rockchip-inno-usb2.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
---
base-commit: cabb748c4b98ef67bbb088be61a2e0c850ebf70d
change-id: 20250718-rk3308-fix-usb-gadget-phy-disconnect-d7de71fb28b4
Best regards,
--
Luca Ceresoli <luca.ceresoli(a)bootlin.com>
Set CLK_IGNORE_UNUSED flag to clock adsp audio26m to prevent disabling
this clock during early boot, as turning it off causes other modules
to fail probing and leads to boot failures in ARM SystemReady test cases
and the EFI boot process (on Linux Distributions, for example, debian,
openSuse, etc.). Without this flag, disabling unused clocks cannot
complete properly after adsp_audio26m is turned off.
Fixes: 0d2f2cefba64 ("clk: mediatek: Add MT8188 adsp clock support")
Cc: Stable(a)vger.kernel.org
Signed-off-by: Macpaul Lin <macpaul.lin(a)mediatek.com>
---
drivers/clk/mediatek/clk-mt8188-adsp_audio26m.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/clk/mediatek/clk-mt8188-adsp_audio26m.c b/drivers/clk/mediatek/clk-mt8188-adsp_audio26m.c
index dcde2187d24a..b9fe66ef4f2e 100644
--- a/drivers/clk/mediatek/clk-mt8188-adsp_audio26m.c
+++ b/drivers/clk/mediatek/clk-mt8188-adsp_audio26m.c
@@ -20,8 +20,8 @@ static const struct mtk_gate_regs adsp_audio26m_cg_regs = {
};
#define GATE_ADSP_FLAGS(_id, _name, _parent, _shift) \
- GATE_MTK(_id, _name, _parent, &adsp_audio26m_cg_regs, _shift, \
- &mtk_clk_gate_ops_no_setclr)
+ GATE_MTK_FLAGS(_id, _name, _parent, &adsp_audio26m_cg_regs, _shift, \
+ &mtk_clk_gate_ops_no_setclr, CLK_IGNORE_UNUSED)
static const struct mtk_gate adsp_audio26m_clks[] = {
GATE_ADSP_FLAGS(CLK_AUDIODSP_AUDIO26M, "audiodsp_audio26m", "clk26m", 3),
--
2.45.2
Dzień dobry,
pomagamy przedsiębiorcom wprowadzić model wymiany walut, który minimalizuje wahania kosztów przy rozliczeniach międzynarodowych.
Kiedyv możemy umówić się na 15-minutową rozmowę, aby zaprezentować, jak taki model mógłby działać w Państwa firmie - z gwarancją indywidualnych kursów i pełnym uproszczeniem płatności? Proszę o propozycję dogodnego terminu.
Pozdrawiam
Marek Poradecki
Building htmldocs generates a warning:
WARNING: include/uapi/linux/media/amlogic/c3-isp-config.h:199
error: Cannot parse struct or union!
Which correctly highlights that the c3_isp_params_block_header symbol
is wrongly documented as a struct while it's a plain #define instead.
Fix this by removing the 'struct' identifier from the documentation of
the c3_isp_params_block_header symbol.
Reported-by: Stephen Rothwell <sfr(a)canb.auug.org.au>
Fixes: 45662082855c ("media: uapi: Convert Amlogic C3 to V4L2 extensible params")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jacopo Mondi <jacopo.mondi(a)ideasonboard.com>
---
Stephen reported this error on linux-next. Please collect this patch for v6.19.
I'm not sure this qualifies for stable, as it will ideally land in the same
release as the patch that introduces the warning. If that's not the case, please
strip:
Fixes: 45662082855c ("media: uapi: Convert Amlogic C3 to V4L2 extensible params")
Cc: stable(a)vger.kernel.org
from the commit message when applying, thanks!
---
include/uapi/linux/media/amlogic/c3-isp-config.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/uapi/linux/media/amlogic/c3-isp-config.h b/include/uapi/linux/media/amlogic/c3-isp-config.h
index 0a3c1cc55ccb..92db5dcdda18 100644
--- a/include/uapi/linux/media/amlogic/c3-isp-config.h
+++ b/include/uapi/linux/media/amlogic/c3-isp-config.h
@@ -186,7 +186,7 @@ enum c3_isp_params_block_type {
#define C3_ISP_PARAMS_BLOCK_FL_ENABLE V4L2_ISP_PARAMS_FL_BLOCK_ENABLE
/**
- * struct c3_isp_params_block_header - C3 ISP parameter block header
+ * c3_isp_params_block_header - C3 ISP parameter block header
*
* This structure represents the common part of all the ISP configuration
* blocks and is identical to :c:type:`v4l2_isp_params_block_header`.
--
2.51.1
Commit a2fb4bc4e2a6 ("net: implement virtio helpers to handle UDP
GSO tunneling.") inadvertently altered checksum offload behavior
for guests not using UDP GSO tunneling.
Before, tun_put_user called tun_vnet_hdr_from_skb, which passed
has_data_valid = true to virtio_net_hdr_from_skb.
After, tun_put_user began calling tun_vnet_hdr_tnl_from_skb instead,
which passes has_data_valid = false into both call sites.
This caused virtio hdr flags to not include VIRTIO_NET_HDR_F_DATA_VALID
for SKBs where skb->ip_summed == CHECKSUM_UNNECESSARY. As a result,
guests are forced to recalculate checksums unnecessarily.
Restore the previous behavior by ensuring has_data_valid = true is
passed in the !tnl_gso_type case, but only from tun side, as
virtio_net_hdr_tnl_from_skb() is used also by the virtio_net driver,
which in turn must not use VIRTIO_NET_HDR_F_DATA_VALID on tx.
Cc: Paolo Abeni <pabeni(a)redhat.com>
Cc: stable(a)vger.kernel.org
Acked-by: Michael S. Tsirkin <mst(a)redhat.com>
Acked-by: Jason Wang <jasowang(a)redhat.com>
Fixes: a2fb4bc4e2a6 ("net: implement virtio helpers to handle UDP GSO tunneling.")
Signed-off-by: Jon Kohler <jon(a)nutanix.com>
---
v3: https://patchwork.kernel.org/project/netdevbpf/patch/20251125222754.1737443…
v2: https://patchwork.kernel.org/project/netdevbpf/patch/20251125211418.1707482…
v3-v4: Collect acked-by (MST, Jason) and cc stable (Jason)
v2-v3: Add net tag (whoops, sorry!)
v1-v2: Add arg to avoid conflict from driver (Paolo) and send to net
instead of net-next.
drivers/net/tun_vnet.h | 2 +-
drivers/net/virtio_net.c | 3 ++-
include/linux/virtio_net.h | 7 ++++---
3 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/drivers/net/tun_vnet.h b/drivers/net/tun_vnet.h
index 81662328b2c7..a5f93b6c4482 100644
--- a/drivers/net/tun_vnet.h
+++ b/drivers/net/tun_vnet.h
@@ -244,7 +244,7 @@ tun_vnet_hdr_tnl_from_skb(unsigned int flags,
if (virtio_net_hdr_tnl_from_skb(skb, tnl_hdr, has_tnl_offload,
tun_vnet_is_little_endian(flags),
- vlan_hlen)) {
+ vlan_hlen, true)) {
struct virtio_net_hdr_v1 *hdr = &tnl_hdr->hash_hdr.hdr;
struct skb_shared_info *sinfo = skb_shinfo(skb);
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index b0947e15895f..1bb3aeca66c6 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -3344,7 +3344,8 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan)
hdr = &skb_vnet_common_hdr(skb)->tnl_hdr;
if (virtio_net_hdr_tnl_from_skb(skb, hdr, vi->tx_tnl,
- virtio_is_little_endian(vi->vdev), 0))
+ virtio_is_little_endian(vi->vdev), 0,
+ false))
return -EPROTO;
if (vi->mergeable_rx_bufs)
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index b673c31569f3..75dabb763c65 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -384,7 +384,8 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb,
struct virtio_net_hdr_v1_hash_tunnel *vhdr,
bool tnl_hdr_negotiated,
bool little_endian,
- int vlan_hlen)
+ int vlan_hlen,
+ bool has_data_valid)
{
struct virtio_net_hdr *hdr = (struct virtio_net_hdr *)vhdr;
unsigned int inner_nh, outer_th;
@@ -394,8 +395,8 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb,
tnl_gso_type = skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM);
if (!tnl_gso_type)
- return virtio_net_hdr_from_skb(skb, hdr, little_endian, false,
- vlan_hlen);
+ return virtio_net_hdr_from_skb(skb, hdr, little_endian,
+ has_data_valid, vlan_hlen);
/* Tunnel support not negotiated but skb ask for it. */
if (!tnl_hdr_negotiated)
--
2.43.0
From: Haotien Hsu <haotienh(a)nvidia.com>
The driver previously skipped handling ClearFeature(ENDPOINT_HALT)
when the endpoint was already not halted. This prevented the
controller from resetting the data sequence number and reinitializing
the endpoint state.
According to USB 3.2 specification Rev. 1.1, section 9.4.5,
ClearFeature(ENDPOINT_HALT) must always reset the data sequence and
set the stream state machine to Disabled, regardless of whether the
endpoint was halted.
Remove the early return so that ClearFeature(ENDPOINT_HALT) always
resets the endpoint sequence state as required by the specification.
Fixes: 49db427232fe ("usb: gadget: Add UDC driver for tegra XUSB device mode controller")
Cc: stable(a)vger.kernel.org
Signed-off-by: Haotien Hsu <haotienh(a)nvidia.com>
Signed-off-by: Wayne Chang <waynec(a)nvidia.com>
---
drivers/usb/gadget/udc/tegra-xudc.c | 6 ------
1 file changed, 6 deletions(-)
diff --git a/drivers/usb/gadget/udc/tegra-xudc.c b/drivers/usb/gadget/udc/tegra-xudc.c
index 0c38fc37b6e6..9d2007f448c0 100644
--- a/drivers/usb/gadget/udc/tegra-xudc.c
+++ b/drivers/usb/gadget/udc/tegra-xudc.c
@@ -1558,12 +1558,6 @@ static int __tegra_xudc_ep_set_halt(struct tegra_xudc_ep *ep, bool halt)
return -ENOTSUPP;
}
- if (!!(xudc_readl(xudc, EP_HALT) & BIT(ep->index)) == halt) {
- dev_dbg(xudc->dev, "EP %u already %s\n", ep->index,
- halt ? "halted" : "not halted");
- return 0;
- }
-
if (halt) {
ep_halt(xudc, ep->index);
} else {
--
2.25.1
From: Paolo Abeni <pabeni(a)redhat.com>
When __mptcp_retrans() kicks-in, it schedules one or more subflows for
retransmission, but such subflows could be actually left alone if there
is no more data to retransmit and/or in case of concurrent fallback.
Scheduled subflows could be processed much later in time, i.e. when new
data will be transmitted, leading to bad subflow selection.
Explicitly clear all scheduled subflows before leaving the
retransmission function.
Fixes: ee2708aedad0 ("mptcp: use get_retrans wrapper")
Cc: stable(a)vger.kernel.org
Reported-by: Filip Pokryvka <fpokryvk(a)redhat.com>
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
---
net/mptcp/protocol.c | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 33b5dce431c2..8abb425d8b5f 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2665,7 +2665,7 @@ static void __mptcp_retrans(struct sock *sk)
}
if (!mptcp_send_head(sk))
- return;
+ goto clear_scheduled;
goto reset_timer;
}
@@ -2696,7 +2696,7 @@ static void __mptcp_retrans(struct sock *sk)
if (__mptcp_check_fallback(msk)) {
spin_unlock_bh(&msk->fallback_lock);
release_sock(ssk);
- return;
+ goto clear_scheduled;
}
while (info.sent < info.limit) {
@@ -2728,6 +2728,15 @@ static void __mptcp_retrans(struct sock *sk)
if (!mptcp_rtx_timer_pending(sk))
mptcp_reset_rtx_timer(sk);
+
+clear_scheduled:
+ /* If no rtx data was available or in case of fallback, there
+ * could be left-over scheduled subflows; clear them all
+ * or later xmit could use bad ones
+ */
+ mptcp_for_each_subflow(msk, subflow)
+ if (READ_ONCE(subflow->scheduled))
+ mptcp_subflow_set_scheduled(subflow, false);
}
/* schedule the timeout timer for the relevant event: either close timeout
---
base-commit: 4fe5a00ec70717a7f1002d8913ec6143582b3c8e
change-id: 20251125-net-mptcp-clear-sched-rtx-a4eba7e4d5e1
Best regards,
--
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
When discarding descriptors with IN_ORDER, we should rewind
next_avail_head otherwise it would run out of sync with
last_avail_idx. This would cause driver to report
"id X is not a head".
Fixing this by returning the number of descriptors that is used for
each buffer via vhost_get_vq_desc_n() so caller can use the value
while discarding descriptors.
Fixes: 67a873df0c41 ("vhost: basic in order support")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jason Wang <jasowang(a)redhat.com>
---
Changes since V1:
- Add the function document
- Tweak the variable name
---
drivers/vhost/net.c | 53 ++++++++++++++++++------------
drivers/vhost/vhost.c | 76 +++++++++++++++++++++++++++++++++++--------
drivers/vhost/vhost.h | 10 +++++-
3 files changed, 103 insertions(+), 36 deletions(-)
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 35ded4330431..8f7f50acb6d6 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -592,14 +592,15 @@ static void vhost_net_busy_poll(struct vhost_net *net,
static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
struct vhost_net_virtqueue *tnvq,
unsigned int *out_num, unsigned int *in_num,
- struct msghdr *msghdr, bool *busyloop_intr)
+ struct msghdr *msghdr, bool *busyloop_intr,
+ unsigned int *ndesc)
{
struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX];
struct vhost_virtqueue *rvq = &rnvq->vq;
struct vhost_virtqueue *tvq = &tnvq->vq;
- int r = vhost_get_vq_desc(tvq, tvq->iov, ARRAY_SIZE(tvq->iov),
- out_num, in_num, NULL, NULL);
+ int r = vhost_get_vq_desc_n(tvq, tvq->iov, ARRAY_SIZE(tvq->iov),
+ out_num, in_num, NULL, NULL, ndesc);
if (r == tvq->num && tvq->busyloop_timeout) {
/* Flush batched packets first */
@@ -610,8 +611,8 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, false);
- r = vhost_get_vq_desc(tvq, tvq->iov, ARRAY_SIZE(tvq->iov),
- out_num, in_num, NULL, NULL);
+ r = vhost_get_vq_desc_n(tvq, tvq->iov, ARRAY_SIZE(tvq->iov),
+ out_num, in_num, NULL, NULL, ndesc);
}
return r;
@@ -642,12 +643,14 @@ static int get_tx_bufs(struct vhost_net *net,
struct vhost_net_virtqueue *nvq,
struct msghdr *msg,
unsigned int *out, unsigned int *in,
- size_t *len, bool *busyloop_intr)
+ size_t *len, bool *busyloop_intr,
+ unsigned int *ndesc)
{
struct vhost_virtqueue *vq = &nvq->vq;
int ret;
- ret = vhost_net_tx_get_vq_desc(net, nvq, out, in, msg, busyloop_intr);
+ ret = vhost_net_tx_get_vq_desc(net, nvq, out, in, msg,
+ busyloop_intr, ndesc);
if (ret < 0 || ret == vq->num)
return ret;
@@ -766,6 +769,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
int sent_pkts = 0;
bool sock_can_batch = (sock->sk->sk_sndbuf == INT_MAX);
bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
+ unsigned int ndesc = 0;
do {
bool busyloop_intr = false;
@@ -774,7 +778,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
vhost_tx_batch(net, nvq, sock, &msg);
head = get_tx_bufs(net, nvq, &msg, &out, &in, &len,
- &busyloop_intr);
+ &busyloop_intr, &ndesc);
/* On error, stop handling until the next kick. */
if (unlikely(head < 0))
break;
@@ -806,7 +810,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
goto done;
} else if (unlikely(err != -ENOSPC)) {
vhost_tx_batch(net, nvq, sock, &msg);
- vhost_discard_vq_desc(vq, 1);
+ vhost_discard_vq_desc(vq, 1, ndesc);
vhost_net_enable_vq(net, vq);
break;
}
@@ -829,7 +833,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
err = sock->ops->sendmsg(sock, &msg, len);
if (unlikely(err < 0)) {
if (err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS) {
- vhost_discard_vq_desc(vq, 1);
+ vhost_discard_vq_desc(vq, 1, ndesc);
vhost_net_enable_vq(net, vq);
break;
}
@@ -868,6 +872,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
int err;
struct vhost_net_ubuf_ref *ubufs;
struct ubuf_info_msgzc *ubuf;
+ unsigned int ndesc = 0;
bool zcopy_used;
int sent_pkts = 0;
@@ -879,7 +884,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
busyloop_intr = false;
head = get_tx_bufs(net, nvq, &msg, &out, &in, &len,
- &busyloop_intr);
+ &busyloop_intr, &ndesc);
/* On error, stop handling until the next kick. */
if (unlikely(head < 0))
break;
@@ -941,7 +946,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN;
}
if (retry) {
- vhost_discard_vq_desc(vq, 1);
+ vhost_discard_vq_desc(vq, 1, ndesc);
vhost_net_enable_vq(net, vq);
break;
}
@@ -1045,11 +1050,12 @@ static int get_rx_bufs(struct vhost_net_virtqueue *nvq,
unsigned *iovcount,
struct vhost_log *log,
unsigned *log_num,
- unsigned int quota)
+ unsigned int quota,
+ unsigned int *ndesc)
{
struct vhost_virtqueue *vq = &nvq->vq;
bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
- unsigned int out, in;
+ unsigned int out, in, desc_num, n = 0;
int seg = 0;
int headcount = 0;
unsigned d;
@@ -1064,9 +1070,9 @@ static int get_rx_bufs(struct vhost_net_virtqueue *nvq,
r = -ENOBUFS;
goto err;
}
- r = vhost_get_vq_desc(vq, vq->iov + seg,
- ARRAY_SIZE(vq->iov) - seg, &out,
- &in, log, log_num);
+ r = vhost_get_vq_desc_n(vq, vq->iov + seg,
+ ARRAY_SIZE(vq->iov) - seg, &out,
+ &in, log, log_num, &desc_num);
if (unlikely(r < 0))
goto err;
@@ -1093,6 +1099,7 @@ static int get_rx_bufs(struct vhost_net_virtqueue *nvq,
++headcount;
datalen -= len;
seg += in;
+ n += desc_num;
}
*iovcount = seg;
@@ -1113,9 +1120,11 @@ static int get_rx_bufs(struct vhost_net_virtqueue *nvq,
nheads[0] = headcount;
}
+ *ndesc = n;
+
return headcount;
err:
- vhost_discard_vq_desc(vq, headcount);
+ vhost_discard_vq_desc(vq, headcount, n);
return r;
}
@@ -1151,6 +1160,7 @@ static void handle_rx(struct vhost_net *net)
struct iov_iter fixup;
__virtio16 num_buffers;
int recv_pkts = 0;
+ unsigned int ndesc;
mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_RX);
sock = vhost_vq_get_backend(vq);
@@ -1182,7 +1192,8 @@ static void handle_rx(struct vhost_net *net)
headcount = get_rx_bufs(nvq, vq->heads + count,
vq->nheads + count,
vhost_len, &in, vq_log, &log,
- likely(mergeable) ? UIO_MAXIOV : 1);
+ likely(mergeable) ? UIO_MAXIOV : 1,
+ &ndesc);
/* On error, stop handling until the next kick. */
if (unlikely(headcount < 0))
goto out;
@@ -1228,7 +1239,7 @@ static void handle_rx(struct vhost_net *net)
if (unlikely(err != sock_len)) {
pr_debug("Discarded rx packet: "
" len %d, expected %zd\n", err, sock_len);
- vhost_discard_vq_desc(vq, headcount);
+ vhost_discard_vq_desc(vq, headcount, ndesc);
continue;
}
/* Supply virtio_net_hdr if VHOST_NET_F_VIRTIO_NET_HDR */
@@ -1252,7 +1263,7 @@ static void handle_rx(struct vhost_net *net)
copy_to_iter(&num_buffers, sizeof num_buffers,
&fixup) != sizeof num_buffers) {
vq_err(vq, "Failed num_buffers write");
- vhost_discard_vq_desc(vq, headcount);
+ vhost_discard_vq_desc(vq, headcount, ndesc);
goto out;
}
nvq->done_idx += headcount;
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 8570fdf2e14a..a78226b37739 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2792,18 +2792,34 @@ static int get_indirect(struct vhost_virtqueue *vq,
return 0;
}
-/* This looks in the virtqueue and for the first available buffer, and converts
- * it to an iovec for convenient access. Since descriptors consist of some
- * number of output then some number of input descriptors, it's actually two
- * iovecs, but we pack them into one and note how many of each there were.
+/**
+ * vhost_get_vq_desc_n - Fetch the next available descriptor chain and build iovecs
+ * @vq: target virtqueue
+ * @iov: array that receives the scatter/gather segments
+ * @iov_size: capacity of @iov in elements
+ * @out_num: the number of output segments
+ * @in_num: the number of input segments
+ * @log: optional array to record addr/len for each writable segment; NULL if unused
+ * @log_num: optional output; number of entries written to @log when provided
+ * @ndesc: optional output; number of descriptors consumed from the available ring
+ * (useful for rollback via vhost_discard_vq_desc)
*
- * This function returns the descriptor number found, or vq->num (which is
- * never a valid descriptor number) if none was found. A negative code is
- * returned on error. */
-int vhost_get_vq_desc(struct vhost_virtqueue *vq,
- struct iovec iov[], unsigned int iov_size,
- unsigned int *out_num, unsigned int *in_num,
- struct vhost_log *log, unsigned int *log_num)
+ * Extracts one available descriptor chain from @vq and translates guest addresses
+ * into host iovecs.
+ *
+ * On success, advances @vq->last_avail_idx by 1 and @vq->next_avail_head by the
+ * number of descriptors consumed (also stored via @ndesc when non-NULL).
+ *
+ * Return:
+ * - head index in [0, @vq->num) on success;
+ * - @vq->num if no descriptor is currently available;
+ * - negative errno on failure
+ */
+int vhost_get_vq_desc_n(struct vhost_virtqueue *vq,
+ struct iovec iov[], unsigned int iov_size,
+ unsigned int *out_num, unsigned int *in_num,
+ struct vhost_log *log, unsigned int *log_num,
+ unsigned int *ndesc)
{
bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
struct vring_desc desc;
@@ -2921,17 +2937,49 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
vq->last_avail_idx++;
vq->next_avail_head += c;
+ if (ndesc)
+ *ndesc = c;
+
/* Assume notifications from guest are disabled at this point,
* if they aren't we would need to update avail_event index. */
BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY));
return head;
}
+EXPORT_SYMBOL_GPL(vhost_get_vq_desc_n);
+
+/* This looks in the virtqueue and for the first available buffer, and converts
+ * it to an iovec for convenient access. Since descriptors consist of some
+ * number of output then some number of input descriptors, it's actually two
+ * iovecs, but we pack them into one and note how many of each there were.
+ *
+ * This function returns the descriptor number found, or vq->num (which is
+ * never a valid descriptor number) if none was found. A negative code is
+ * returned on error.
+ */
+int vhost_get_vq_desc(struct vhost_virtqueue *vq,
+ struct iovec iov[], unsigned int iov_size,
+ unsigned int *out_num, unsigned int *in_num,
+ struct vhost_log *log, unsigned int *log_num)
+{
+ return vhost_get_vq_desc_n(vq, iov, iov_size, out_num, in_num,
+ log, log_num, NULL);
+}
EXPORT_SYMBOL_GPL(vhost_get_vq_desc);
-/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
-void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n)
+/**
+ * vhost_discard_vq_desc - Reverse the effect of vhost_get_vq_desc_n()
+ * @vq: target virtqueue
+ * @nbufs: number of buffers to roll back
+ * @ndesc: number of descriptors to roll back
+ *
+ * Rewinds the internal consumer cursors after a failed attempt to use buffers
+ * returned by vhost_get_vq_desc_n().
+ */
+void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int nbufs,
+ unsigned int ndesc)
{
- vq->last_avail_idx -= n;
+ vq->next_avail_head -= ndesc;
+ vq->last_avail_idx -= nbufs;
}
EXPORT_SYMBOL_GPL(vhost_discard_vq_desc);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 621a6d9a8791..b49f08e4a1b4 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -230,7 +230,15 @@ int vhost_get_vq_desc(struct vhost_virtqueue *,
struct iovec iov[], unsigned int iov_size,
unsigned int *out_num, unsigned int *in_num,
struct vhost_log *log, unsigned int *log_num);
-void vhost_discard_vq_desc(struct vhost_virtqueue *, int n);
+
+int vhost_get_vq_desc_n(struct vhost_virtqueue *vq,
+ struct iovec iov[], unsigned int iov_size,
+ unsigned int *out_num, unsigned int *in_num,
+ struct vhost_log *log, unsigned int *log_num,
+ unsigned int *ndesc);
+
+void vhost_discard_vq_desc(struct vhost_virtqueue *, int nbuf,
+ unsigned int ndesc);
bool vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work);
bool vhost_vq_has_work(struct vhost_virtqueue *vq);
--
2.31.1
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 678e1cc2f482e0985a0613ab4a5bf89c497e5acc
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112403-evaluate-bogged-d093@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 678e1cc2f482e0985a0613ab4a5bf89c497e5acc Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong(a)kernel.org>
Date: Wed, 12 Nov 2025 08:35:18 -0800
Subject: [PATCH] xfs: fix out of bounds memory read error in symlink repair
xfs/286 produced this report on my test fleet:
==================================================================
BUG: KFENCE: out-of-bounds read in memcpy_orig+0x54/0x110
Out-of-bounds read at 0xffff88843fe9e038 (184B right of kfence-#184):
memcpy_orig+0x54/0x110
xrep_symlink_salvage_inline+0xb3/0xf0 [xfs]
xrep_symlink_salvage+0x100/0x110 [xfs]
xrep_symlink+0x2e/0x80 [xfs]
xrep_attempt+0x61/0x1f0 [xfs]
xfs_scrub_metadata+0x34f/0x5c0 [xfs]
xfs_ioc_scrubv_metadata+0x387/0x560 [xfs]
xfs_file_ioctl+0xe23/0x10e0 [xfs]
__x64_sys_ioctl+0x76/0xc0
do_syscall_64+0x4e/0x1e0
entry_SYSCALL_64_after_hwframe+0x4b/0x53
kfence-#184: 0xffff88843fe9df80-0xffff88843fe9dfea, size=107, cache=kmalloc-128
allocated by task 3470 on cpu 1 at 263329.131592s (192823.508886s ago):
xfs_init_local_fork+0x79/0xe0 [xfs]
xfs_iformat_local+0xa4/0x170 [xfs]
xfs_iformat_data_fork+0x148/0x180 [xfs]
xfs_inode_from_disk+0x2cd/0x480 [xfs]
xfs_iget+0x450/0xd60 [xfs]
xfs_bulkstat_one_int+0x6b/0x510 [xfs]
xfs_bulkstat_iwalk+0x1e/0x30 [xfs]
xfs_iwalk_ag_recs+0xdf/0x150 [xfs]
xfs_iwalk_run_callbacks+0xb9/0x190 [xfs]
xfs_iwalk_ag+0x1dc/0x2f0 [xfs]
xfs_iwalk_args.constprop.0+0x6a/0x120 [xfs]
xfs_iwalk+0xa4/0xd0 [xfs]
xfs_bulkstat+0xfa/0x170 [xfs]
xfs_ioc_fsbulkstat.isra.0+0x13a/0x230 [xfs]
xfs_file_ioctl+0xbf2/0x10e0 [xfs]
__x64_sys_ioctl+0x76/0xc0
do_syscall_64+0x4e/0x1e0
entry_SYSCALL_64_after_hwframe+0x4b/0x53
CPU: 1 UID: 0 PID: 1300113 Comm: xfs_scrub Not tainted 6.18.0-rc4-djwx #rc4 PREEMPT(lazy) 3d744dd94e92690f00a04398d2bd8631dcef1954
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-4.module+el8.8.0+21164+ed375313 04/01/2014
==================================================================
On further analysis, I realized that the second parameter to min() is
not correct. xfs_ifork::if_bytes is the size of the xfs_ifork::if_data
buffer. if_bytes can be smaller than the data fork size because:
(a) the forkoff code tries to keep the data area as large as possible
(b) for symbolic links, if_bytes is the ondisk file size + 1
(c) forkoff is always a multiple of 8.
Case in point: for a single-byte symlink target, forkoff will be
8 but the buffer will only be 2 bytes long.
In other words, the logic here is wrong and we walk off the end of the
incore buffer. Fix that.
Cc: stable(a)vger.kernel.org # v6.10
Fixes: 2651923d8d8db0 ("xfs: online repair of symbolic links")
Signed-off-by: Darrick J. Wong <djwong(a)kernel.org>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Signed-off-by: Carlos Maiolino <cem(a)kernel.org>
diff --git a/fs/xfs/scrub/symlink_repair.c b/fs/xfs/scrub/symlink_repair.c
index 5902398185a8..df629892462f 100644
--- a/fs/xfs/scrub/symlink_repair.c
+++ b/fs/xfs/scrub/symlink_repair.c
@@ -184,7 +184,7 @@ xrep_symlink_salvage_inline(
sc->ip->i_disk_size == 1 && old_target[0] == '?')
return 0;
- nr = min(XFS_SYMLINK_MAXLEN, xfs_inode_data_fork_size(ip));
+ nr = min(XFS_SYMLINK_MAXLEN, ifp->if_bytes);
memcpy(target_buf, ifp->if_data, nr);
return nr;
}
The patch below does not apply to the 6.17-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.17.y
git checkout FETCH_HEAD
git cherry-pick -x 678e1cc2f482e0985a0613ab4a5bf89c497e5acc
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112457-shining-trough-db05@gregkh' --subject-prefix 'PATCH 6.17.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 678e1cc2f482e0985a0613ab4a5bf89c497e5acc Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong(a)kernel.org>
Date: Wed, 12 Nov 2025 08:35:18 -0800
Subject: [PATCH] xfs: fix out of bounds memory read error in symlink repair
xfs/286 produced this report on my test fleet:
==================================================================
BUG: KFENCE: out-of-bounds read in memcpy_orig+0x54/0x110
Out-of-bounds read at 0xffff88843fe9e038 (184B right of kfence-#184):
memcpy_orig+0x54/0x110
xrep_symlink_salvage_inline+0xb3/0xf0 [xfs]
xrep_symlink_salvage+0x100/0x110 [xfs]
xrep_symlink+0x2e/0x80 [xfs]
xrep_attempt+0x61/0x1f0 [xfs]
xfs_scrub_metadata+0x34f/0x5c0 [xfs]
xfs_ioc_scrubv_metadata+0x387/0x560 [xfs]
xfs_file_ioctl+0xe23/0x10e0 [xfs]
__x64_sys_ioctl+0x76/0xc0
do_syscall_64+0x4e/0x1e0
entry_SYSCALL_64_after_hwframe+0x4b/0x53
kfence-#184: 0xffff88843fe9df80-0xffff88843fe9dfea, size=107, cache=kmalloc-128
allocated by task 3470 on cpu 1 at 263329.131592s (192823.508886s ago):
xfs_init_local_fork+0x79/0xe0 [xfs]
xfs_iformat_local+0xa4/0x170 [xfs]
xfs_iformat_data_fork+0x148/0x180 [xfs]
xfs_inode_from_disk+0x2cd/0x480 [xfs]
xfs_iget+0x450/0xd60 [xfs]
xfs_bulkstat_one_int+0x6b/0x510 [xfs]
xfs_bulkstat_iwalk+0x1e/0x30 [xfs]
xfs_iwalk_ag_recs+0xdf/0x150 [xfs]
xfs_iwalk_run_callbacks+0xb9/0x190 [xfs]
xfs_iwalk_ag+0x1dc/0x2f0 [xfs]
xfs_iwalk_args.constprop.0+0x6a/0x120 [xfs]
xfs_iwalk+0xa4/0xd0 [xfs]
xfs_bulkstat+0xfa/0x170 [xfs]
xfs_ioc_fsbulkstat.isra.0+0x13a/0x230 [xfs]
xfs_file_ioctl+0xbf2/0x10e0 [xfs]
__x64_sys_ioctl+0x76/0xc0
do_syscall_64+0x4e/0x1e0
entry_SYSCALL_64_after_hwframe+0x4b/0x53
CPU: 1 UID: 0 PID: 1300113 Comm: xfs_scrub Not tainted 6.18.0-rc4-djwx #rc4 PREEMPT(lazy) 3d744dd94e92690f00a04398d2bd8631dcef1954
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-4.module+el8.8.0+21164+ed375313 04/01/2014
==================================================================
On further analysis, I realized that the second parameter to min() is
not correct. xfs_ifork::if_bytes is the size of the xfs_ifork::if_data
buffer. if_bytes can be smaller than the data fork size because:
(a) the forkoff code tries to keep the data area as large as possible
(b) for symbolic links, if_bytes is the ondisk file size + 1
(c) forkoff is always a multiple of 8.
Case in point: for a single-byte symlink target, forkoff will be
8 but the buffer will only be 2 bytes long.
In other words, the logic here is wrong and we walk off the end of the
incore buffer. Fix that.
Cc: stable(a)vger.kernel.org # v6.10
Fixes: 2651923d8d8db0 ("xfs: online repair of symbolic links")
Signed-off-by: Darrick J. Wong <djwong(a)kernel.org>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Signed-off-by: Carlos Maiolino <cem(a)kernel.org>
diff --git a/fs/xfs/scrub/symlink_repair.c b/fs/xfs/scrub/symlink_repair.c
index 5902398185a8..df629892462f 100644
--- a/fs/xfs/scrub/symlink_repair.c
+++ b/fs/xfs/scrub/symlink_repair.c
@@ -184,7 +184,7 @@ xrep_symlink_salvage_inline(
sc->ip->i_disk_size == 1 && old_target[0] == '?')
return 0;
- nr = min(XFS_SYMLINK_MAXLEN, xfs_inode_data_fork_size(ip));
+ nr = min(XFS_SYMLINK_MAXLEN, ifp->if_bytes);
memcpy(target_buf, ifp->if_data, nr);
return nr;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 0d6c356dd6547adac2b06b461528e3573f52d953
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112034-decrease-sardine-8989@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0d6c356dd6547adac2b06b461528e3573f52d953 Mon Sep 17 00:00:00 2001
From: "Isaac J. Manjarres" <isaacmanjarres(a)google.com>
Date: Tue, 28 Oct 2025 12:10:12 -0700
Subject: [PATCH] mm/mm_init: fix hash table order logging in
alloc_large_system_hash()
When emitting the order of the allocation for a hash table,
alloc_large_system_hash() unconditionally subtracts PAGE_SHIFT from log
base 2 of the allocation size. This is not correct if the allocation size
is smaller than a page, and yields a negative value for the order as seen
below:
TCP established hash table entries: 32 (order: -4, 256 bytes, linear) TCP
bind hash table entries: 32 (order: -2, 1024 bytes, linear)
Use get_order() to compute the order when emitting the hash table
information to correctly handle cases where the allocation size is smaller
than a page:
TCP established hash table entries: 32 (order: 0, 256 bytes, linear) TCP
bind hash table entries: 32 (order: 0, 1024 bytes, linear)
Link: https://lkml.kernel.org/r/20251028191020.413002-1-isaacmanjarres@google.com
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Isaac J. Manjarres <isaacmanjarres(a)google.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 3db2dea7db4c..7712d887b696 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2469,7 +2469,7 @@ void *__init alloc_large_system_hash(const char *tablename,
panic("Failed to allocate %s hash table\n", tablename);
pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n",
- tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size,
+ tablename, 1UL << log2qty, get_order(size), size,
virt ? (huge ? "vmalloc hugepage" : "vmalloc") : "linear");
if (_hash_shift)
Evaluaciones de Desempeño Objetivas con Vorecol 360 Feedback
body {
margin: 0;
padding: 0;
font-family: Arial, Helvetica, sans-serif;
font-size: 14px;
color: #333333;
background-color: #ffffff;
}
table {
border-spacing: 0;
width: 100%;
max-width: 600px;
margin: auto;
}
td {
padding: 12px 20px;
}
a {
color: #1a73e8;
text-decoration: none;
}
.footer {
font-size: 12px;
color: #888888;
text-align: center;
padding-top: 10px;
}
Mejora tus evaluaciones de desempeño con feedback 360 real y automatizado.
Hola,
¿Te has preguntado qué tan completas son tus evaluaciones de desempeño?
En Vorecol 360 Feedback te ayudamos a implementar evaluaciones verdaderamente objetivas, recogiendo percepciones desde todas las direcciones: líderes, pares, colaboradores y autoevaluación.
Lo que más valoran nuestros clientes de RRHH es que:
Obtienen una visión completa y real del desempeño.
Fomentan una cultura de feedback constructivo.
Identifican oportunidades de desarrollo con mayor precisión.
Automatizan todo el proceso con reportes claros y personalizables.
Si estás buscando mejorar tus evaluaciones y fortalecer el desarrollo interno, te lo recomiendo muchísimo. Para más información puedes responder este correo o llamarme al número de abajo.
Saludos,
------------------------
Atte.: Luis Rodríguez
Ciudad de México: (55) 5018 0565
WhatsApp: +52 33 1607 2089
Si no deseas recibir más correos, haz clic aquí para darte de baja.
Para remover su dirección de esta lista haga <a href="https://s1.arrobamail.com/unsuscribe.php?id=yiwtsrewiswqrqseup">click aquí</a>
This reverts commit 7777f47f2ea64efd1016262e7b59fab34adfb869.
The commit 1a721de8489f ("block: don't add or resize partition on the disk
with GENHD_FL_NO_PART") and the commit 7777f47f2ea6 ("block: Move checking
GENHD_FL_NO_PART to bdev_add_partition()") used the flag GENHD_FL_NO_PART
to prevent the add or resize of partitions in 5.15 stable kernels.But in
these 5.15 kernels, this is giving an issue with the following error
where the loop driver wants to create a partition when the partscan is
disabled on the loop device:
dd if=/dev/zero of=loopDisk.dsk bs=1M count=1 seek=10240;
losetup -f loopDisk.dsk;parted -s /dev/loop0 -- mklabel gpt mkpart primary
2048s 4096s
1+0 records in
1+0 records out
1048576 bytes (1.0 MB, 1.0 MiB) copied, 0.0016293 s, 644 MB/s
""
Error: Partition(s) 1 on /dev/loop0 have been written, but we have been
unable to inform the kernel of the change, probably because it/they are
in use. As a result, the old partition(s) will remain in use. You should
reboot now before making further changes.
""
If the partition scan is not enabled on the loop device, this flag
GENHD_FL_NO_PART is getting set and when partition creation is tried,
it returns an error EINVAL thereby preventing the creation of partitions.
So, there is no such distinction between disabling of partition scan and
partition creation.
Later in 6.xxx kernels, the commit b9684a71fca7 ("block, loop: support
partitions without scanning") a new flag GD_SUPPRESS_PART_SCAN was
introduced that just disables the partition scan and uses GENHD_FL_NO_PART
only to prevent creating partition scan. So, the partition creationg can
proceed with even if partition scan is disabled.
As the commit b9684a71fca7 ("block, loop: support partitions without
scanning") is not available in 5.15 stable kernel, and since there is no
distinction between disabling of "partition scan" and "partition
creation", we need to revert the commits 1a721de8489f and 7777f47f2ea6
from 5.15 stable kernel to allow partition creation when partscan is
disabled.
Cc: stable(a)vger.kernel.org
Signed-off-by: Gulam Mohamed <gulam.mohamed(a)oracle.com>
---
block/ioctl.c | 2 ++
block/partitions/core.c | 5 -----
2 files changed, 2 insertions(+), 5 deletions(-)
diff --git a/block/ioctl.c b/block/ioctl.c
index a260e39e56a4..d25b84441237 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -20,6 +20,8 @@ static int blkpg_do_ioctl(struct block_device *bdev,
struct blkpg_partition p;
sector_t start, length;
+ if (disk->flags & GENHD_FL_NO_PART)
+ return -EINVAL;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (copy_from_user(&p, upart, sizeof(struct blkpg_partition)))
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 0d1fe2b42b85..7b5750db7eaf 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -463,11 +463,6 @@ int bdev_add_partition(struct gendisk *disk, int partno, sector_t start,
goto out;
}
- if (disk->flags & GENHD_FL_NO_PART) {
- ret = -EINVAL;
- goto out;
- }
-
if (partition_overlaps(disk, start, length, -1)) {
ret = -EBUSY;
goto out;
--
2.47.3
This reverts commit 7777f47f2ea64efd1016262e7b59fab34adfb869.
The commit 1a721de8489f ("block: don't add or resize partition on the disk
with GENHD_FL_NO_PART") and the commit 7777f47f2ea6 ("block: Move checking
GENHD_FL_NO_PART to bdev_add_partition()") used the flag GENHD_FL_NO_PART
to prevent the add or resize of partitions in 5.15 stable kernels.But in
these 5.15 kernels, this is giving an issue with the following error
where the loop driver wants to create a partition when the partscan is
disabled on the loop device:
dd if=/dev/zero of=loopDisk.dsk bs=1M count=1 seek=10240;
losetup -f loopDisk.dsk;parted -s /dev/loop0 -- mklabel gpt mkpart primary
2048s 4096s
1+0 records in
1+0 records out
1048576 bytes (1.0 MB, 1.0 MiB) copied, 0.0016293 s, 644 MB/s
""
Error: Partition(s) 1 on /dev/loop0 have been written, but we have been
unable to inform the kernel of the change, probably because it/they are
in use. As a result, the old partition(s) will remain in use. You should
reboot now before making further changes.
""
If the partition scan is not enabled on the loop device, this flag
GENHD_FL_NO_PART is getting set and when partition creation is tried,
it returns an error EINVAL thereby preventing the creation of partitions.
So, there is no such distinction between disabling of partition scan and
partition creation.
Later in 6.xxx kernels, the commit b9684a71fca7 ("block, loop: support
partitions without scanning") a new flag GD_SUPPRESS_PART_SCAN was
introduced that just disables the partition scan and uses GENHD_FL_NO_PART
only to prevent creating partition scan. So, the partition creationg can
proceed with even if partition scan is disabled.
As the commit b9684a71fca7 ("block, loop: support partitions without
scanning") is not available in 5.15 stable kernel, and since there is no
distinction between disabling of "partition scan" and "partition
creation", we need to revert the commits 1a721de8489f and 7777f47f2ea6
from 5.15 stable kernel to allow partition creation when partscan is
disabled.
Cc: stable(a)vger.kernel.org
Signed-off-by: Gulam Mohamed <gulam.mohamed(a)oracle.com>
---
Changes in V2:
- Added,in the second patch, the reason for reverting the patch
- Added the linux stable version 5.15.y to the subject line
- Added the version V2 to the subject line
Links to V1:
https://lore.kernel.org/all/20251117174315.367072-1-gulam.mohamed@oracle.co…https://lore.kernel.org/all/20251117174315.367072-2-gulam.mohamed@oracle.co…
---
block/ioctl.c | 2 ++
block/partitions/core.c | 5 -----
2 files changed, 2 insertions(+), 5 deletions(-)
diff --git a/block/ioctl.c b/block/ioctl.c
index a260e39e56a4..d25b84441237 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -20,6 +20,8 @@ static int blkpg_do_ioctl(struct block_device *bdev,
struct blkpg_partition p;
sector_t start, length;
+ if (disk->flags & GENHD_FL_NO_PART)
+ return -EINVAL;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (copy_from_user(&p, upart, sizeof(struct blkpg_partition)))
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 0d1fe2b42b85..7b5750db7eaf 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -463,11 +463,6 @@ int bdev_add_partition(struct gendisk *disk, int partno, sector_t start,
goto out;
}
- if (disk->flags & GENHD_FL_NO_PART) {
- ret = -EINVAL;
- goto out;
- }
-
if (partition_overlaps(disk, start, length, -1)) {
ret = -EBUSY;
goto out;
--
2.47.3
The patch titled
Subject: powerpc/pseries/cmm: adjust BALLOON_MIGRATE when migrating pages
has been added to the -mm mm-new branch. Its filename is
powerpc-pseries-cmm-adjust-balloon_migrate-when-migrating-pages.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-new branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Note, mm-new is a provisional staging ground for work-in-progress
patches, and acceptance into mm-new is a notification for others take
notice and to finish up reviews. Please do not hesitate to respond to
review feedback and post updated versions to replace or incrementally
fixup patches in mm-new.
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: David Hildenbrand <david(a)redhat.com>
Subject: powerpc/pseries/cmm: adjust BALLOON_MIGRATE when migrating pages
Date: Tue, 21 Oct 2025 12:06:06 +0200
Let's properly adjust BALLOON_MIGRATE like the other drivers.
Note that the INFLATE/DEFLATE events are triggered from the core when
enqueueing/dequeueing pages.
This was found by code inspection.
Link: https://lkml.kernel.org/r/20251021100606.148294-3-david@redhat.com
Fixes: fe030c9b85e6 ("powerpc/pseries/cmm: Implement balloon compaction")
Signed-off-by: David Hildenbrand <david(a)redhat.com>
Cc: Christophe Leroy <christophe.leroy(a)csgroup.eu>
Cc: Madhavan Srinivasan <maddy(a)linux.ibm.com>
Cc: Michael Ellerman <mpe(a)ellerman.id.au>
Cc: Nicholas Piggin <npiggin(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/powerpc/platforms/pseries/cmm.c | 1 +
1 file changed, 1 insertion(+)
--- a/arch/powerpc/platforms/pseries/cmm.c~powerpc-pseries-cmm-adjust-balloon_migrate-when-migrating-pages
+++ a/arch/powerpc/platforms/pseries/cmm.c
@@ -532,6 +532,7 @@ static int cmm_migratepage(struct balloo
spin_lock_irqsave(&b_dev_info->pages_lock, flags);
balloon_page_insert(b_dev_info, newpage);
+ __count_vm_event(BALLOON_MIGRATE);
b_dev_info->isolated_pages--;
spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
_
Patches currently in -mm which might be from david(a)redhat.com are
powerpc-pseries-cmm-call-balloon_devinfo_init-also-without-config_balloon_compaction.patch
powerpc-pseries-cmm-adjust-balloon_migrate-when-migrating-pages.patch
The patch titled
Subject: powerpc/pseries/cmm: call balloon_devinfo_init() also without CONFIG_BALLOON_COMPACTION
has been added to the -mm mm-new branch. Its filename is
powerpc-pseries-cmm-call-balloon_devinfo_init-also-without-config_balloon_compaction.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-new branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Note, mm-new is a provisional staging ground for work-in-progress
patches, and acceptance into mm-new is a notification for others take
notice and to finish up reviews. Please do not hesitate to respond to
review feedback and post updated versions to replace or incrementally
fixup patches in mm-new.
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: David Hildenbrand <david(a)redhat.com>
Subject: powerpc/pseries/cmm: call balloon_devinfo_init() also without CONFIG_BALLOON_COMPACTION
Date: Tue, 21 Oct 2025 12:06:05 +0200
Patch series "powerpc/pseries/cmm: two smaller fixes".
Two smaller fixes identified while doing a bigger rework.
This patch (of 2):
We always have to initialize the balloon_dev_info, even when compaction is
not configured in: otherwise the containing list and the lock are left
uninitialized.
Likely not many such configs exist in practice, but let's CC stable to
be sure.
This was found by code inspection.
Link: https://lkml.kernel.org/r/20251021100606.148294-1-david@redhat.com
Link: https://lkml.kernel.org/r/20251021100606.148294-2-david@redhat.com
Fixes: fe030c9b85e6 ("powerpc/pseries/cmm: Implement balloon compaction")
Signed-off-by: David Hildenbrand <david(a)redhat.com>
Cc: Christophe Leroy <christophe.leroy(a)csgroup.eu>
Cc: Madhavan Srinivasan <maddy(a)linux.ibm.com>
Cc: Michael Ellerman <mpe(a)ellerman.id.au>
Cc: Nicholas Piggin <npiggin(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/powerpc/platforms/pseries/cmm.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/arch/powerpc/platforms/pseries/cmm.c~powerpc-pseries-cmm-call-balloon_devinfo_init-also-without-config_balloon_compaction
+++ a/arch/powerpc/platforms/pseries/cmm.c
@@ -550,7 +550,6 @@ static int cmm_migratepage(struct balloo
static void cmm_balloon_compaction_init(void)
{
- balloon_devinfo_init(&b_dev_info);
b_dev_info.migratepage = cmm_migratepage;
}
#else /* CONFIG_BALLOON_COMPACTION */
@@ -572,6 +571,7 @@ static int cmm_init(void)
if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate)
return -EOPNOTSUPP;
+ balloon_devinfo_init(&b_dev_info);
cmm_balloon_compaction_init();
rc = register_oom_notifier(&cmm_oom_nb);
_
Patches currently in -mm which might be from david(a)redhat.com are
powerpc-pseries-cmm-call-balloon_devinfo_init-also-without-config_balloon_compaction.patch
powerpc-pseries-cmm-adjust-balloon_migrate-when-migrating-pages.patch
Previously, the AMPDU state bit for a given TID was set before attempting
to start a BA session, which could result in the AMPDU state being marked
active even if ieee80211_start_tx_ba_session() failed. This patch changes
the logic to only set the AMPDU state bit after successfully starting a BA
session, ensuring proper synchronization between AMPDU state and BA session
status.
This fixes potential issues with aggregation state tracking and improves
compatibility with mac80211 BA session management.
Fixes: 44eb173bdd4f ("wifi: mt76: mt7925: add link handling in mt7925_txwi_free")
Cc: stable(a)vger.kernel.org
Signed-off-by: Quan Zhou <quan.zhou(a)mediatek.com>
---
drivers/net/wireless/mediatek/mt76/mt7925/mac.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c
index 871b67101976..80f1d738ec22 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c
@@ -881,8 +881,9 @@ static void mt7925_tx_check_aggr(struct ieee80211_sta *sta, struct sk_buff *skb,
else
mlink = &msta->deflink;
- if (!test_and_set_bit(tid, &mlink->wcid.ampdu_state))
- ieee80211_start_tx_ba_session(sta, tid, 0);
+ if (!test_bit(tid, &mlink->wcid.ampdu_state) &&
+ !ieee80211_start_tx_ba_session(sta, tid, 0))
+ set_bit(tid, &mlink->wcid.ampdu_state);
}
static bool
--
2.45.2
Hi,
Are you interested in the 15,805 verified business contacts available for
the Live Design International 2025 event in Las Vegas this December. We're
also including a Thanksgiving Day 20% reduction on our compliant contact
data.
If you'd like more details, just let me know or reply "Send me pricing."
Best regards,
Melissa Underwood
Sr. Marketing Manager
If you prefer not to receive updates, reply "Not Interested."
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 0d6c356dd6547adac2b06b461528e3573f52d953
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112033-barista-manicure-43e9@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0d6c356dd6547adac2b06b461528e3573f52d953 Mon Sep 17 00:00:00 2001
From: "Isaac J. Manjarres" <isaacmanjarres(a)google.com>
Date: Tue, 28 Oct 2025 12:10:12 -0700
Subject: [PATCH] mm/mm_init: fix hash table order logging in
alloc_large_system_hash()
When emitting the order of the allocation for a hash table,
alloc_large_system_hash() unconditionally subtracts PAGE_SHIFT from log
base 2 of the allocation size. This is not correct if the allocation size
is smaller than a page, and yields a negative value for the order as seen
below:
TCP established hash table entries: 32 (order: -4, 256 bytes, linear) TCP
bind hash table entries: 32 (order: -2, 1024 bytes, linear)
Use get_order() to compute the order when emitting the hash table
information to correctly handle cases where the allocation size is smaller
than a page:
TCP established hash table entries: 32 (order: 0, 256 bytes, linear) TCP
bind hash table entries: 32 (order: 0, 1024 bytes, linear)
Link: https://lkml.kernel.org/r/20251028191020.413002-1-isaacmanjarres@google.com
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Isaac J. Manjarres <isaacmanjarres(a)google.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 3db2dea7db4c..7712d887b696 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2469,7 +2469,7 @@ void *__init alloc_large_system_hash(const char *tablename,
panic("Failed to allocate %s hash table\n", tablename);
pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n",
- tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size,
+ tablename, 1UL << log2qty, get_order(size), size,
virt ? (huge ? "vmalloc hugepage" : "vmalloc") : "linear");
if (_hash_shift)
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 6f86d0534fddfbd08687fa0f01479d4226bc3c3d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112023-overact-rehydrate-ae42@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6f86d0534fddfbd08687fa0f01479d4226bc3c3d Mon Sep 17 00:00:00 2001
From: Lance Yang <lance.yang(a)linux.dev>
Date: Fri, 31 Oct 2025 20:09:55 +0800
Subject: [PATCH] mm/secretmem: fix use-after-free race in fault handler
When a page fault occurs in a secret memory file created with
`memfd_secret(2)`, the kernel will allocate a new folio for it, mark the
underlying page as not-present in the direct map, and add it to the file
mapping.
If two tasks cause a fault in the same page concurrently, both could end
up allocating a folio and removing the page from the direct map, but only
one would succeed in adding the folio to the file mapping. The task that
failed undoes the effects of its attempt by (a) freeing the folio again
and (b) putting the page back into the direct map. However, by doing
these two operations in this order, the page becomes available to the
allocator again before it is placed back in the direct mapping.
If another task attempts to allocate the page between (a) and (b), and the
kernel tries to access it via the direct map, it would result in a
supervisor not-present page fault.
Fix the ordering to restore the direct map before the folio is freed.
Link: https://lkml.kernel.org/r/20251031120955.92116-1-lance.yang@linux.dev
Fixes: 1507f51255c9 ("mm: introduce memfd_secret system call to create "secret" memory areas")
Signed-off-by: Lance Yang <lance.yang(a)linux.dev>
Reported-by: Google Big Sleep <big-sleep-vuln-reports(a)google.com>
Closes: https://lore.kernel.org/linux-mm/CAEXGt5QeDpiHTu3K9tvjUTPqo+d-=wuCNYPa+6sWK…
Acked-by: David Hildenbrand <david(a)redhat.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/secretmem.c b/mm/secretmem.c
index 60137305bc20..b59350daffe3 100644
--- a/mm/secretmem.c
+++ b/mm/secretmem.c
@@ -82,13 +82,13 @@ static vm_fault_t secretmem_fault(struct vm_fault *vmf)
__folio_mark_uptodate(folio);
err = filemap_add_folio(mapping, folio, offset, gfp);
if (unlikely(err)) {
- folio_put(folio);
/*
* If a split of large page was required, it
* already happened when we marked the page invalid
* which guarantees that this call won't fail
*/
set_direct_map_default_noflush(folio_page(folio, 0));
+ folio_put(folio);
if (err == -EEXIST)
goto retry;
Reading the interrupt register `SUN4I_REG_INT_ADDR` causes all of its bits
to be reset. If we ever reach the condition of handling more than
`SUN4I_CAN_MAX_IRQ` IRQs, we will have read the register and reset all its
bits but without actually handling the interrupt inside of the loop body.
This may, among other issues, cause us to never `netif_wake_queue()` again
after a transmission interrupt.
Fixes: 0738eff14d81 ("can: Allwinner A10/A20 CAN Controller support - Kernel module")
Cc: stable(a)vger.kernel.org
Co-developed-by: Thomas Mühlbacher <tmuehlbacher(a)posteo.net>
Signed-off-by: Thomas Mühlbacher <tmuehlbacher(a)posteo.net>
Acked-by: Jernej Skrabec <jernej.skrabec(a)gmail.com>
Link: https://patch.msgid.link/20251116-sun4i-fix-loop-v1-1-3d76d3f81950@pengutro…
Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de>
---
drivers/net/can/sun4i_can.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/can/sun4i_can.c b/drivers/net/can/sun4i_can.c
index 53bfd873de9b..0a7ba0942839 100644
--- a/drivers/net/can/sun4i_can.c
+++ b/drivers/net/can/sun4i_can.c
@@ -657,8 +657,8 @@ static irqreturn_t sun4i_can_interrupt(int irq, void *dev_id)
u8 isrc, status;
int n = 0;
- while ((isrc = readl(priv->base + SUN4I_REG_INT_ADDR)) &&
- (n < SUN4I_CAN_MAX_IRQ)) {
+ while ((n < SUN4I_CAN_MAX_IRQ) &&
+ (isrc = readl(priv->base + SUN4I_REG_INT_ADDR))) {
n++;
status = readl(priv->base + SUN4I_REG_STA_ADDR);
--
2.51.0
From: Thomas Mühlbacher <tmuehlbacher(a)posteo.net>
Reading the interrupt register `SJA1000_IR` causes all of its bits to be
reset. If we ever reach the condition of handling more than
`SJA1000_MAX_IRQ` IRQs, we will have read the register and reset all its
bits but without actually handling the interrupt inside of the loop
body.
This may, among other issues, cause us to never `netif_wake_queue()`
again after a transmission interrupt.
Fixes: 429da1cc841b ("can: Driver for the SJA1000 CAN controller")
Cc: stable(a)vger.kernel.org
Signed-off-by: Thomas Mühlbacher <tmuehlbacher(a)posteo.net>
Acked-by: Oliver Hartkopp <socketcan(a)hartkopp.net>
Link: https://patch.msgid.link/20251115153437.11419-1-tmuehlbacher@posteo.net
Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de>
---
drivers/net/can/sja1000/sja1000.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c
index 4d245857ef1c..83476af8adb5 100644
--- a/drivers/net/can/sja1000/sja1000.c
+++ b/drivers/net/can/sja1000/sja1000.c
@@ -548,8 +548,8 @@ irqreturn_t sja1000_interrupt(int irq, void *dev_id)
if (priv->read_reg(priv, SJA1000_IER) == IRQ_OFF)
goto out;
- while ((isrc = priv->read_reg(priv, SJA1000_IR)) &&
- (n < SJA1000_MAX_IRQ)) {
+ while ((n < SJA1000_MAX_IRQ) &&
+ (isrc = priv->read_reg(priv, SJA1000_IR))) {
status = priv->read_reg(priv, SJA1000_SR);
/* check for absent controller due to hw unplug */
--
2.51.0
logi_dj_recv_query_paired_devices() and logi_dj_recv_switch_to_dj_mode()
both have 2 callers which all log an error if the function fails. Move
the error logging to inside these 2 functions to remove the duplicated
error logging in the callers.
While at it also move the logi_dj_recv_send_report() call error handling
in logi_dj_recv_switch_to_dj_mode() to directly after the call. That call
only fails if the report cannot be found and in that case it does nothing,
so the msleep() is not necessary on failures.
Fixes: 6f20d3261265 ("HID: logitech-dj: Fix error handling in logi_dj_recv_switch_to_dj_mode()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Hans de Goede <johannes.goede(a)oss.qualcomm.com>
---
drivers/hid/hid-logitech-dj.c | 56 ++++++++++++++---------------------
1 file changed, 23 insertions(+), 33 deletions(-)
diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c
index d66f4807311a..58a848ed248d 100644
--- a/drivers/hid/hid-logitech-dj.c
+++ b/drivers/hid/hid-logitech-dj.c
@@ -889,7 +889,6 @@ static void delayedwork_callback(struct work_struct *work)
struct dj_workitem workitem;
unsigned long flags;
int count;
- int retval;
dbg_hid("%s\n", __func__);
@@ -926,11 +925,7 @@ static void delayedwork_callback(struct work_struct *work)
logi_dj_recv_destroy_djhid_device(djrcv_dev, &workitem);
break;
case WORKITEM_TYPE_UNKNOWN:
- retval = logi_dj_recv_query_paired_devices(djrcv_dev);
- if (retval) {
- hid_err(djrcv_dev->hidpp, "%s: logi_dj_recv_query_paired_devices error: %d\n",
- __func__, retval);
- }
+ logi_dj_recv_query_paired_devices(djrcv_dev);
break;
case WORKITEM_TYPE_EMPTY:
dbg_hid("%s: device list is empty\n", __func__);
@@ -1323,8 +1318,10 @@ static int logi_dj_recv_query_paired_devices(struct dj_receiver_dev *djrcv_dev)
djrcv_dev->last_query = jiffies;
- if (djrcv_dev->type != recvr_type_dj)
- return logi_dj_recv_query_hidpp_devices(djrcv_dev);
+ if (djrcv_dev->type != recvr_type_dj) {
+ retval = logi_dj_recv_query_hidpp_devices(djrcv_dev);
+ goto out;
+ }
dj_report = kzalloc(sizeof(struct dj_report), GFP_KERNEL);
if (!dj_report)
@@ -1334,6 +1331,10 @@ static int logi_dj_recv_query_paired_devices(struct dj_receiver_dev *djrcv_dev)
dj_report->report_type = REPORT_TYPE_CMD_GET_PAIRED_DEVICES;
retval = logi_dj_recv_send_report(djrcv_dev, dj_report);
kfree(dj_report);
+out:
+ if (retval < 0)
+ hid_err(djrcv_dev->hidpp, "%s error:%d\n", __func__, retval);
+
return retval;
}
@@ -1359,6 +1360,8 @@ static int logi_dj_recv_switch_to_dj_mode(struct dj_receiver_dev *djrcv_dev,
(u8)timeout;
retval = logi_dj_recv_send_report(djrcv_dev, dj_report);
+ if (retval)
+ goto out;
/*
* Ugly sleep to work around a USB 3.0 bug when the receiver is
@@ -1367,11 +1370,6 @@ static int logi_dj_recv_switch_to_dj_mode(struct dj_receiver_dev *djrcv_dev,
* 50 msec should gives enough time to the receiver to be ready.
*/
msleep(50);
-
- if (retval) {
- kfree(dj_report);
- return retval;
- }
}
/*
@@ -1397,7 +1395,12 @@ static int logi_dj_recv_switch_to_dj_mode(struct dj_receiver_dev *djrcv_dev,
HIDPP_REPORT_SHORT_LENGTH, HID_OUTPUT_REPORT,
HID_REQ_SET_REPORT);
+out:
kfree(dj_report);
+
+ if (retval < 0)
+ hid_err(hdev, "%s error:%d\n", __func__, retval);
+
return retval;
}
@@ -1935,11 +1938,8 @@ static int logi_dj_probe(struct hid_device *hdev,
if (has_hidpp) {
retval = logi_dj_recv_switch_to_dj_mode(djrcv_dev, 0);
- if (retval < 0) {
- hid_err(hdev, "%s: logi_dj_recv_switch_to_dj_mode returned error:%d\n",
- __func__, retval);
+ if (retval < 0)
goto switch_to_dj_mode_fail;
- }
}
/* This is enabling the polling urb on the IN endpoint */
@@ -1957,15 +1957,11 @@ static int logi_dj_probe(struct hid_device *hdev,
spin_lock_irqsave(&djrcv_dev->lock, flags);
djrcv_dev->ready = true;
spin_unlock_irqrestore(&djrcv_dev->lock, flags);
- retval = logi_dj_recv_query_paired_devices(djrcv_dev);
- if (retval < 0) {
- hid_err(hdev, "%s: logi_dj_recv_query_paired_devices error:%d\n",
- __func__, retval);
- /*
- * This can happen with a KVM, let the probe succeed,
- * logi_dj_recv_queue_unknown_work will retry later.
- */
- }
+ /*
+ * This can fail with a KVM. Ignore errors to let the probe
+ * succeed, logi_dj_recv_queue_unknown_work will retry later.
+ */
+ logi_dj_recv_query_paired_devices(djrcv_dev);
}
return 0;
@@ -1982,18 +1978,12 @@ static int logi_dj_probe(struct hid_device *hdev,
#ifdef CONFIG_PM
static int logi_dj_reset_resume(struct hid_device *hdev)
{
- int retval;
struct dj_receiver_dev *djrcv_dev = hid_get_drvdata(hdev);
if (!djrcv_dev || djrcv_dev->hidpp != hdev)
return 0;
- retval = logi_dj_recv_switch_to_dj_mode(djrcv_dev, 0);
- if (retval < 0) {
- hid_err(hdev, "%s: logi_dj_recv_switch_to_dj_mode returned error:%d\n",
- __func__, retval);
- }
-
+ logi_dj_recv_switch_to_dj_mode(djrcv_dev, 0);
return 0;
}
#endif
--
2.51.1
Slimbus devices can be allocated dynamically upon reception of
report-present messages.
Make sure to drop the reference taken when looking up already registered
devices.
Note that this requires taking an extra reference in case the device has
not yet been registered and has to be allocated.
Fixes: 46a2bb5a7f7e ("slimbus: core: Add slim controllers support")
Cc: stable(a)vger.kernel.org # 4.16
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
drivers/slimbus/core.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/slimbus/core.c b/drivers/slimbus/core.c
index 9f85c4280171..b4ab9a5d44b3 100644
--- a/drivers/slimbus/core.c
+++ b/drivers/slimbus/core.c
@@ -379,6 +379,8 @@ struct slim_device *slim_get_device(struct slim_controller *ctrl,
sbdev = slim_alloc_device(ctrl, e_addr, NULL);
if (!sbdev)
return ERR_PTR(-ENOMEM);
+
+ get_device(&sbdev->dev);
}
return sbdev;
@@ -505,6 +507,7 @@ int slim_device_report_present(struct slim_controller *ctrl,
ret = slim_device_alloc_laddr(sbdev, true);
}
+ put_device(&sbdev->dev);
out_put_rpm:
pm_runtime_mark_last_busy(ctrl->dev);
pm_runtime_put_autosuspend(ctrl->dev);
--
2.51.2
The current UFS clocks does not align with their respective names,
causing the ref_clk to be set to an incorrect frequency as below,
which results in command timeouts.
ufshcd-qcom 1d84000.ufshc: invalid ref_clk setting = 300000000
This commit fixes the issue by properly reordering the UFS clocks to
match their names.
Fixes: ea172f61f4fd ("arm64: dts: qcom: qcs615: Fix up UFS clocks")
Cc: stable(a)vger.kernel.org
Signed-off-by: Pradeep P V K <pradeep.pragallapati(a)oss.qualcomm.com>
---
arch/arm64/boot/dts/qcom/talos.dtsi | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/boot/dts/qcom/talos.dtsi b/arch/arm64/boot/dts/qcom/talos.dtsi
index d1dbfa3bd81c..95d26e313622 100644
--- a/arch/arm64/boot/dts/qcom/talos.dtsi
+++ b/arch/arm64/boot/dts/qcom/talos.dtsi
@@ -1399,10 +1399,10 @@
<&gcc GCC_AGGRE_UFS_PHY_AXI_CLK>,
<&gcc GCC_UFS_PHY_AHB_CLK>,
<&gcc GCC_UFS_PHY_UNIPRO_CORE_CLK>,
- <&gcc GCC_UFS_PHY_ICE_CORE_CLK>,
<&rpmhcc RPMH_CXO_CLK>,
<&gcc GCC_UFS_PHY_TX_SYMBOL_0_CLK>,
- <&gcc GCC_UFS_PHY_RX_SYMBOL_0_CLK>;
+ <&gcc GCC_UFS_PHY_RX_SYMBOL_0_CLK>,
+ <&gcc GCC_UFS_PHY_ICE_CORE_CLK>;
clock-names = "core_clk",
"bus_aggr_clk",
"iface_clk",
--
2.17.1
# TL;DR
previous discussion: https://lore.kernel.org/linux-mm/20250921232709.1608699-1-harry.yoo@oracle.…
A "bad pmd" error occurs due to race condition between
change_prot_numa() and THP migration. The mainline kernel does not have
this bug as commit 670ddd8cdc fixes the race condition. 6.1.y, 5.15.y,
5.10.y, 5.4.y are affected by this bug.
Fixing this in -stable kernels is tricky because pte_map_offset_lock()
has different semantics in pre-6.5 and post-6.5 kernels. I am trying to
backport the same mechanism we have in the mainline kernel.
# Testing
I verified that the bug described below is not reproduced anymore
(on a downstream kernel) after applying this patch series. It used to
trigger in few days of intensive numa balancing testing, but it survived
2 weeks with this applied.
# Bug Description
It was reported that a bad pmd is seen when automatic NUMA
balancing is marking page table entries as prot_numa:
[2437548.196018] mm/pgtable-generic.c:50: bad pmd 00000000af22fc02(dffffffe71fbfe02)
[2437548.235022] Call Trace:
[2437548.238234] <TASK>
[2437548.241060] dump_stack_lvl+0x46/0x61
[2437548.245689] panic+0x106/0x2e5
[2437548.249497] pmd_clear_bad+0x3c/0x3c
[2437548.253967] change_pmd_range.isra.0+0x34d/0x3a7
[2437548.259537] change_p4d_range+0x156/0x20e
[2437548.264392] change_protection_range+0x116/0x1a9
[2437548.269976] change_prot_numa+0x15/0x37
[2437548.274774] task_numa_work+0x1b8/0x302
[2437548.279512] task_work_run+0x62/0x95
[2437548.283882] exit_to_user_mode_loop+0x1a4/0x1a9
[2437548.289277] exit_to_user_mode_prepare+0xf4/0xfc
[2437548.294751] ? sysvec_apic_timer_interrupt+0x34/0x81
[2437548.300677] irqentry_exit_to_user_mode+0x5/0x25
[2437548.306153] asm_sysvec_apic_timer_interrupt+0x16/0x1b
This is due to a race condition between change_prot_numa() and
THP migration because the kernel doesn't check is_swap_pmd() and
pmd_trans_huge() atomically:
change_prot_numa() THP migration
======================================================================
- change_pmd_range()
-> is_swap_pmd() returns false,
meaning it's not a PMD migration
entry.
- do_huge_pmd_numa_page()
-> migrate_misplaced_page() sets
migration entries for the THP.
- change_pmd_range()
-> pmd_none_or_clear_bad_unless_trans_huge()
-> pmd_none() and pmd_trans_huge() returns false
- pmd_none_or_clear_bad_unless_trans_huge()
-> pmd_bad() returns true for the migration entry!
The upstream commit 670ddd8cdcbd ("mm/mprotect: delete
pmd_none_or_clear_bad_unless_trans_huge()") closes this race condition
by checking is_swap_pmd() and pmd_trans_huge() atomically.
# Backporting note
commit a79390f5d6a7 ("mm/mprotect: use long for page accountings and retval")
is backported to return an error code (negative value) in
change_pte_range().
Unlike the mainline, pte_offset_map_lock() does not check if the pmd
entry is a migration entry or a hugepage; acquires PTL unconditionally
instead of returning failure. Therefore, it is necessary to keep the
!is_swap_pmd() && !pmd_trans_huge() && !pmd_devmap() checks in
change_pmd_range() before acquiring the PTL.
After acquiring the lock, open-code the semantics of
pte_offset_map_lock() in the mainline kernel; change_pte_range() fails
if the pmd value has changed. This requires adding pmd_old parameter
(pmd_t value that is read before calling the function) to
change_pte_range().
Hugh Dickins (1):
mm/mprotect: delete pmd_none_or_clear_bad_unless_trans_huge()
Peter Xu (1):
mm/mprotect: use long for page accountings and retval
include/linux/hugetlb.h | 4 +-
include/linux/mm.h | 2 +-
mm/hugetlb.c | 4 +-
mm/mempolicy.c | 2 +-
mm/mprotect.c | 121 ++++++++++++++++++----------------------
5 files changed, 59 insertions(+), 74 deletions(-)
--
2.43.0
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 0d6c356dd6547adac2b06b461528e3573f52d953
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112033-overstep-denim-0e6a@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0d6c356dd6547adac2b06b461528e3573f52d953 Mon Sep 17 00:00:00 2001
From: "Isaac J. Manjarres" <isaacmanjarres(a)google.com>
Date: Tue, 28 Oct 2025 12:10:12 -0700
Subject: [PATCH] mm/mm_init: fix hash table order logging in
alloc_large_system_hash()
When emitting the order of the allocation for a hash table,
alloc_large_system_hash() unconditionally subtracts PAGE_SHIFT from log
base 2 of the allocation size. This is not correct if the allocation size
is smaller than a page, and yields a negative value for the order as seen
below:
TCP established hash table entries: 32 (order: -4, 256 bytes, linear) TCP
bind hash table entries: 32 (order: -2, 1024 bytes, linear)
Use get_order() to compute the order when emitting the hash table
information to correctly handle cases where the allocation size is smaller
than a page:
TCP established hash table entries: 32 (order: 0, 256 bytes, linear) TCP
bind hash table entries: 32 (order: 0, 1024 bytes, linear)
Link: https://lkml.kernel.org/r/20251028191020.413002-1-isaacmanjarres@google.com
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Isaac J. Manjarres <isaacmanjarres(a)google.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 3db2dea7db4c..7712d887b696 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2469,7 +2469,7 @@ void *__init alloc_large_system_hash(const char *tablename,
panic("Failed to allocate %s hash table\n", tablename);
pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n",
- tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size,
+ tablename, 1UL << log2qty, get_order(size), size,
virt ? (huge ? "vmalloc hugepage" : "vmalloc") : "linear");
if (_hash_shift)
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 0d6c356dd6547adac2b06b461528e3573f52d953
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112032-parted-progeny-cd9e@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0d6c356dd6547adac2b06b461528e3573f52d953 Mon Sep 17 00:00:00 2001
From: "Isaac J. Manjarres" <isaacmanjarres(a)google.com>
Date: Tue, 28 Oct 2025 12:10:12 -0700
Subject: [PATCH] mm/mm_init: fix hash table order logging in
alloc_large_system_hash()
When emitting the order of the allocation for a hash table,
alloc_large_system_hash() unconditionally subtracts PAGE_SHIFT from log
base 2 of the allocation size. This is not correct if the allocation size
is smaller than a page, and yields a negative value for the order as seen
below:
TCP established hash table entries: 32 (order: -4, 256 bytes, linear) TCP
bind hash table entries: 32 (order: -2, 1024 bytes, linear)
Use get_order() to compute the order when emitting the hash table
information to correctly handle cases where the allocation size is smaller
than a page:
TCP established hash table entries: 32 (order: 0, 256 bytes, linear) TCP
bind hash table entries: 32 (order: 0, 1024 bytes, linear)
Link: https://lkml.kernel.org/r/20251028191020.413002-1-isaacmanjarres@google.com
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Isaac J. Manjarres <isaacmanjarres(a)google.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 3db2dea7db4c..7712d887b696 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2469,7 +2469,7 @@ void *__init alloc_large_system_hash(const char *tablename,
panic("Failed to allocate %s hash table\n", tablename);
pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n",
- tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size,
+ tablename, 1UL << log2qty, get_order(size), size,
virt ? (huge ? "vmalloc hugepage" : "vmalloc") : "linear");
if (_hash_shift)
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 31475b88110c4725b4f9a79c3a0d9bbf97e69e1c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112421-strudel-attractor-63fc@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 31475b88110c4725b4f9a79c3a0d9bbf97e69e1c Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca(a)linux.ibm.com>
Date: Thu, 13 Nov 2025 13:21:47 +0100
Subject: [PATCH] s390/mm: Fix __ptep_rdp() inline assembly
When a zero ASCE is passed to the __ptep_rdp() inline assembly, the
generated instruction should have the R3 field of the instruction set to
zero. However the inline assembly is written incorrectly: for such cases a
zero is loaded into a register allocated by the compiler and this register
is then used by the instruction.
This means that selected TLB entries may not be flushed since the specified
ASCE does not match the one which was used when the selected TLB entries
were created.
Fix this by removing the asce and opt parameters of __ptep_rdp(), since
all callers always pass zero, and use a hard-coded register zero for
the R3 field.
Fixes: 0807b856521f ("s390/mm: add support for RDP (Reset DAT-Protection)")
Cc: stable(a)vger.kernel.org
Reviewed-by: Gerald Schaefer <gerald.schaefer(a)linux.ibm.com>
Signed-off-by: Heiko Carstens <hca(a)linux.ibm.com>
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index b7100c6a4054..6663f1619abb 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1154,17 +1154,15 @@ static inline pte_t pte_mkhuge(pte_t pte)
#define IPTE_NODAT 0x400
#define IPTE_GUEST_ASCE 0x800
-static __always_inline void __ptep_rdp(unsigned long addr, pte_t *ptep,
- unsigned long opt, unsigned long asce,
- int local)
+static __always_inline void __ptep_rdp(unsigned long addr, pte_t *ptep, int local)
{
unsigned long pto;
pto = __pa(ptep) & ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
- asm volatile(".insn rrf,0xb98b0000,%[r1],%[r2],%[asce],%[m4]"
+ asm volatile(".insn rrf,0xb98b0000,%[r1],%[r2],%%r0,%[m4]"
: "+m" (*ptep)
- : [r1] "a" (pto), [r2] "a" ((addr & PAGE_MASK) | opt),
- [asce] "a" (asce), [m4] "i" (local));
+ : [r1] "a" (pto), [r2] "a" (addr & PAGE_MASK),
+ [m4] "i" (local));
}
static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep,
@@ -1348,7 +1346,7 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
* A local RDP can be used to do the flush.
*/
if (cpu_has_rdp() && !(pte_val(*ptep) & _PAGE_PROTECT))
- __ptep_rdp(address, ptep, 0, 0, 1);
+ __ptep_rdp(address, ptep, 1);
}
#define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 0fde20bbc50b..05974304d622 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -274,9 +274,9 @@ void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
preempt_disable();
atomic_inc(&mm->context.flush_count);
if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
- __ptep_rdp(addr, ptep, 0, 0, 1);
+ __ptep_rdp(addr, ptep, 1);
else
- __ptep_rdp(addr, ptep, 0, 0, 0);
+ __ptep_rdp(addr, ptep, 0);
/*
* PTE is not invalidated by RDP, only _PAGE_PROTECT is cleared. That
* means it is still valid and active, and must not be changed according
Pavel Machek <pavel(a)denx.de> wrote:
> > Zizhi Wo <wozizhi(a)huaweicloud.com>
> > tty/vt: Add missing return value for VT_RESIZE in vt_ioctl()
>
> This one was backported wrongly to 6.12:
>
> +++ b/drivers/tty/vt/vt_ioctl.c
> @@ -923,7 +923,9 @@ int vt_ioctl(struct tty_struct *tty,
>
> if (vc) {
> /* FIXME: review v tty lock */
> - __vc_resize(vc_cons[i].d, cc, ll, true);
> + ret = __vc_resize(vc_cons[i].d, cc, ll, true);
> + if (ret)
> + return ret;
> }
> }
> console_unlock();
>
> It needs to do console_unlock() before returning.
I have already sent 2 emails about this, but Greg's to-do
list seems to be very long, or something like that.
https://lore.kernel.org/lkml/zuLBWV-yhJXc0iM4l5T-O63M-kKmI2FlUSVgZl6B3WubvF…https://lore.kernel.org/lkml/8mT8aJsAQPfUnmI5mmsgbUweQAptUFDu5XqhrxPPI1DgJr…
That second email has a small test program to demo the
problem and a patch to fix it for 6.12.58+ and 6.17.8+
--
Jari Ruusu 4096R/8132F189 12D6 4C3A DCDA 0AA4 27BD ACDF F073 3C80 8132 F189
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 31475b88110c4725b4f9a79c3a0d9bbf97e69e1c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112418-impish-remix-d936@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 31475b88110c4725b4f9a79c3a0d9bbf97e69e1c Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca(a)linux.ibm.com>
Date: Thu, 13 Nov 2025 13:21:47 +0100
Subject: [PATCH] s390/mm: Fix __ptep_rdp() inline assembly
When a zero ASCE is passed to the __ptep_rdp() inline assembly, the
generated instruction should have the R3 field of the instruction set to
zero. However the inline assembly is written incorrectly: for such cases a
zero is loaded into a register allocated by the compiler and this register
is then used by the instruction.
This means that selected TLB entries may not be flushed since the specified
ASCE does not match the one which was used when the selected TLB entries
were created.
Fix this by removing the asce and opt parameters of __ptep_rdp(), since
all callers always pass zero, and use a hard-coded register zero for
the R3 field.
Fixes: 0807b856521f ("s390/mm: add support for RDP (Reset DAT-Protection)")
Cc: stable(a)vger.kernel.org
Reviewed-by: Gerald Schaefer <gerald.schaefer(a)linux.ibm.com>
Signed-off-by: Heiko Carstens <hca(a)linux.ibm.com>
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index b7100c6a4054..6663f1619abb 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1154,17 +1154,15 @@ static inline pte_t pte_mkhuge(pte_t pte)
#define IPTE_NODAT 0x400
#define IPTE_GUEST_ASCE 0x800
-static __always_inline void __ptep_rdp(unsigned long addr, pte_t *ptep,
- unsigned long opt, unsigned long asce,
- int local)
+static __always_inline void __ptep_rdp(unsigned long addr, pte_t *ptep, int local)
{
unsigned long pto;
pto = __pa(ptep) & ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
- asm volatile(".insn rrf,0xb98b0000,%[r1],%[r2],%[asce],%[m4]"
+ asm volatile(".insn rrf,0xb98b0000,%[r1],%[r2],%%r0,%[m4]"
: "+m" (*ptep)
- : [r1] "a" (pto), [r2] "a" ((addr & PAGE_MASK) | opt),
- [asce] "a" (asce), [m4] "i" (local));
+ : [r1] "a" (pto), [r2] "a" (addr & PAGE_MASK),
+ [m4] "i" (local));
}
static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep,
@@ -1348,7 +1346,7 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
* A local RDP can be used to do the flush.
*/
if (cpu_has_rdp() && !(pte_val(*ptep) & _PAGE_PROTECT))
- __ptep_rdp(address, ptep, 0, 0, 1);
+ __ptep_rdp(address, ptep, 1);
}
#define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 0fde20bbc50b..05974304d622 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -274,9 +274,9 @@ void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
preempt_disable();
atomic_inc(&mm->context.flush_count);
if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
- __ptep_rdp(addr, ptep, 0, 0, 1);
+ __ptep_rdp(addr, ptep, 1);
else
- __ptep_rdp(addr, ptep, 0, 0, 0);
+ __ptep_rdp(addr, ptep, 0);
/*
* PTE is not invalidated by RDP, only _PAGE_PROTECT is cleared. That
* means it is still valid and active, and must not be changed according
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x d52dea485cd3c98cfeeb474cf66cf95df2ab142f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112408-crunching-july-9814@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d52dea485cd3c98cfeeb474cf66cf95df2ab142f Mon Sep 17 00:00:00 2001
From: Shuicheng Lin <shuicheng.lin(a)intel.com>
Date: Wed, 12 Nov 2025 18:10:06 +0000
Subject: [PATCH] drm/xe: Prevent BIT() overflow when handling invalid prefetch
region
If user provides a large value (such as 0x80) for parameter
prefetch_mem_region_instance in vm_bind ioctl, it will cause
BIT(prefetch_region) overflow as below:
"
------------[ cut here ]------------
UBSAN: shift-out-of-bounds in drivers/gpu/drm/xe/xe_vm.c:3414:7
shift exponent 128 is too large for 64-bit type 'long unsigned int'
CPU: 8 UID: 0 PID: 53120 Comm: xe_exec_system_ Tainted: G W 6.18.0-rc1-lgci-xe-kernel+ #200 PREEMPT(voluntary)
Tainted: [W]=WARN
Hardware name: ASUS System Product Name/PRIME Z790-P WIFI, BIOS 0812 02/24/2023
Call Trace:
<TASK>
dump_stack_lvl+0xa0/0xc0
dump_stack+0x10/0x20
ubsan_epilogue+0x9/0x40
__ubsan_handle_shift_out_of_bounds+0x10e/0x170
? mutex_unlock+0x12/0x20
xe_vm_bind_ioctl.cold+0x20/0x3c [xe]
...
"
Fix it by validating prefetch_region before the BIT() usage.
v2: Add Closes and Cc stable kernels. (Matt)
Reported-by: Koen Koning <koen.koning(a)intel.com>
Reported-by: Peter Senna Tschudin <peter.senna(a)linux.intel.com>
Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6478
Cc: <stable(a)vger.kernel.org> # v6.8+
Reviewed-by: Matthew Auld <matthew.auld(a)intel.com>
Signed-off-by: Shuicheng Lin <shuicheng.lin(a)intel.com>
Signed-off-by: Matthew Auld <matthew.auld(a)intel.com>
Link: https://patch.msgid.link/20251112181005.2120521-2-shuicheng.lin@intel.com
(cherry picked from commit 8f565bdd14eec5611cc041dba4650e42ccdf71d9)
Signed-off-by: Lucas De Marchi <lucas.demarchi(a)intel.com>
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index ccb09ef4ec9e..cdd1dc540a59 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3369,8 +3369,10 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
op == DRM_XE_VM_BIND_OP_PREFETCH) ||
XE_IOCTL_DBG(xe, prefetch_region &&
op != DRM_XE_VM_BIND_OP_PREFETCH) ||
- XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC &&
- !(BIT(prefetch_region) & xe->info.mem_region_mask))) ||
+ XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC &&
+ /* Guard against undefined shift in BIT(prefetch_region) */
+ (prefetch_region >= (sizeof(xe->info.mem_region_mask) * 8) ||
+ !(BIT(prefetch_region) & xe->info.mem_region_mask)))) ||
XE_IOCTL_DBG(xe, obj &&
op == DRM_XE_VM_BIND_OP_UNMAP) ||
XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) &&
Revert commit 8ced3cb73ccd20e744deab7b49f2b7468c984eb2 which is upstream
commit 060842fed53f77a73824c9147f51dc6746c1267a
It causes regression in 6.12.58 stable, no issues in upstream.
The Kconfig dependency change 060842fed53f ("RDMA/irdma: Update Kconfig")
went in linux kernel 6.18 where RDMA IDPF support was merged.
Even though IDPF driver exists in older kernels, it doesn't provide RDMA
support so there is no need for IRDMA to depend on IDPF in kernels <= 6.17.
Link: https://lore.kernel.org/all/IA1PR11MB7727692DE0ECFE84E9B52F02CBD5A@IA1PR11M…
Link: https://lore.kernel.org/all/IA1PR11MB772718B36A3B27D2F07B0109CBD5A@IA1PR11M…
Cc: stable(a)vger.kernel.org # v6.12.58
Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova(a)intel.com>
Signed-off-by: Wentao Guan <guanwentao(a)uniontech.com>
---
drivers/infiniband/hw/irdma/Kconfig | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/drivers/infiniband/hw/irdma/Kconfig b/drivers/infiniband/hw/irdma/Kconfig
index 41660203e0049..b6f9c41bca51d 100644
--- a/drivers/infiniband/hw/irdma/Kconfig
+++ b/drivers/infiniband/hw/irdma/Kconfig
@@ -4,10 +4,9 @@ config INFINIBAND_IRDMA
depends on INET
depends on IPV6 || !IPV6
depends on PCI
- depends on IDPF && ICE && I40E
+ depends on ICE && I40E
select GENERIC_ALLOCATOR
select AUXILIARY_BUS
help
- This is an Intel(R) Ethernet Protocol Driver for RDMA that
- supports IPU E2000 (RoCEv2), E810 (iWARP/RoCEv2) and X722 (iWARP)
- network devices.
+ This is an Intel(R) Ethernet Protocol Driver for RDMA driver
+ that support E810 (iWARP/RoCE) and X722 (iWARP) network devices.
--
2.20.1
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 7458f72cc28f9eb0de811effcb5376d0ec19094a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112053-shallot-unsold-98f0@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7458f72cc28f9eb0de811effcb5376d0ec19094a Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla(a)arm.com>
Date: Fri, 17 Oct 2025 12:03:20 +0100
Subject: [PATCH] pmdomain: arm: scmi: Fix genpd leak on provider registration
failure
If of_genpd_add_provider_onecell() fails during probe, the previously
created generic power domains are not removed, leading to a memory leak
and potential kernel crash later in genpd_debug_add().
Add proper error handling to unwind the initialized domains before
returning from probe to ensure all resources are correctly released on
failure.
Example crash trace observed without this fix:
| Unable to handle kernel paging request at virtual address fffffffffffffc70
| CPU: 1 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.18.0-rc1 #405 PREEMPT
| Hardware name: ARM LTD ARM Juno Development Platform/ARM Juno Development Platform
| pstate: 00000005 (nzcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : genpd_debug_add+0x2c/0x160
| lr : genpd_debug_init+0x74/0x98
| Call trace:
| genpd_debug_add+0x2c/0x160 (P)
| genpd_debug_init+0x74/0x98
| do_one_initcall+0xd0/0x2d8
| do_initcall_level+0xa0/0x140
| do_initcalls+0x60/0xa8
| do_basic_setup+0x28/0x40
| kernel_init_freeable+0xe8/0x170
| kernel_init+0x2c/0x140
| ret_from_fork+0x10/0x20
Fixes: 898216c97ed2 ("firmware: arm_scmi: add device power domain support using genpd")
Signed-off-by: Sudeep Holla <sudeep.holla(a)arm.com>
Reviewed-by: Peng Fan <peng.fan(a)nxp.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/pmdomain/arm/scmi_pm_domain.c b/drivers/pmdomain/arm/scmi_pm_domain.c
index 8fe1c0a501c9..b5e2ffd5ea64 100644
--- a/drivers/pmdomain/arm/scmi_pm_domain.c
+++ b/drivers/pmdomain/arm/scmi_pm_domain.c
@@ -41,7 +41,7 @@ static int scmi_pd_power_off(struct generic_pm_domain *domain)
static int scmi_pm_domain_probe(struct scmi_device *sdev)
{
- int num_domains, i;
+ int num_domains, i, ret;
struct device *dev = &sdev->dev;
struct device_node *np = dev->of_node;
struct scmi_pm_domain *scmi_pd;
@@ -108,9 +108,18 @@ static int scmi_pm_domain_probe(struct scmi_device *sdev)
scmi_pd_data->domains = domains;
scmi_pd_data->num_domains = num_domains;
+ ret = of_genpd_add_provider_onecell(np, scmi_pd_data);
+ if (ret)
+ goto err_rm_genpds;
+
dev_set_drvdata(dev, scmi_pd_data);
- return of_genpd_add_provider_onecell(np, scmi_pd_data);
+ return 0;
+err_rm_genpds:
+ for (i = num_domains - 1; i >= 0; i--)
+ pm_genpd_remove(domains[i]);
+
+ return ret;
}
static void scmi_pm_domain_remove(struct scmi_device *sdev)
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 3f9eacf4f0705876a5d6526d7d320ca91d7d7a16
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112021-arrest-chip-7336@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3f9eacf4f0705876a5d6526d7d320ca91d7d7a16 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz(a)kernel.org>
Date: Thu, 30 Oct 2025 12:27:05 +0000
Subject: [PATCH] KVM: arm64: Make all 32bit ID registers fully writable
32bit ID registers aren't getting much love these days, and are
often missed in updates. One of these updates broke restoring
a GICv2 guest on a GICv3 machine.
Instead of performing a piecemeal fix, just bite the bullet
and make all 32bit ID regs fully writable. KVM itself never
relies on them for anything, and if the VMM wants to mess up
the guest, so be it.
Fixes: 5cb57a1aff755 ("KVM: arm64: Zero ID_AA64PFR0_EL1.GIC when no GICv3 is presented to the guest")
Reported-by: Peter Maydell <peter.maydell(a)linaro.org>
Cc: stable(a)vger.kernel.org
Reviewed-by: Oliver Upton <oupton(a)kernel.org>
Link: https://patch.msgid.link/20251030122707.2033690-2-maz@kernel.org
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index e67eb39ddc11..ad82264c6cbe 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -2595,19 +2595,23 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu,
.val = 0, \
}
-/* sys_reg_desc initialiser for known cpufeature ID registers */
-#define AA32_ID_SANITISED(name) { \
- ID_DESC(name), \
- .visibility = aa32_id_visibility, \
- .val = 0, \
-}
-
/* sys_reg_desc initialiser for writable ID registers */
#define ID_WRITABLE(name, mask) { \
ID_DESC(name), \
.val = mask, \
}
+/*
+ * 32bit ID regs are fully writable when the guest is 32bit
+ * capable. Nothing in the KVM code should rely on 32bit features
+ * anyway, only 64bit, so let the VMM do its worse.
+ */
+#define AA32_ID_WRITABLE(name) { \
+ ID_DESC(name), \
+ .visibility = aa32_id_visibility, \
+ .val = GENMASK(31, 0), \
+}
+
/* sys_reg_desc initialiser for cpufeature ID registers that need filtering */
#define ID_FILTERED(sysreg, name, mask) { \
ID_DESC(sysreg), \
@@ -3128,40 +3132,39 @@ static const struct sys_reg_desc sys_reg_descs[] = {
/* AArch64 mappings of the AArch32 ID registers */
/* CRm=1 */
- AA32_ID_SANITISED(ID_PFR0_EL1),
- AA32_ID_SANITISED(ID_PFR1_EL1),
+ AA32_ID_WRITABLE(ID_PFR0_EL1),
+ AA32_ID_WRITABLE(ID_PFR1_EL1),
{ SYS_DESC(SYS_ID_DFR0_EL1),
.access = access_id_reg,
.get_user = get_id_reg,
.set_user = set_id_dfr0_el1,
.visibility = aa32_id_visibility,
.reset = read_sanitised_id_dfr0_el1,
- .val = ID_DFR0_EL1_PerfMon_MASK |
- ID_DFR0_EL1_CopDbg_MASK, },
+ .val = GENMASK(31, 0) },
ID_HIDDEN(ID_AFR0_EL1),
- AA32_ID_SANITISED(ID_MMFR0_EL1),
- AA32_ID_SANITISED(ID_MMFR1_EL1),
- AA32_ID_SANITISED(ID_MMFR2_EL1),
- AA32_ID_SANITISED(ID_MMFR3_EL1),
+ AA32_ID_WRITABLE(ID_MMFR0_EL1),
+ AA32_ID_WRITABLE(ID_MMFR1_EL1),
+ AA32_ID_WRITABLE(ID_MMFR2_EL1),
+ AA32_ID_WRITABLE(ID_MMFR3_EL1),
/* CRm=2 */
- AA32_ID_SANITISED(ID_ISAR0_EL1),
- AA32_ID_SANITISED(ID_ISAR1_EL1),
- AA32_ID_SANITISED(ID_ISAR2_EL1),
- AA32_ID_SANITISED(ID_ISAR3_EL1),
- AA32_ID_SANITISED(ID_ISAR4_EL1),
- AA32_ID_SANITISED(ID_ISAR5_EL1),
- AA32_ID_SANITISED(ID_MMFR4_EL1),
- AA32_ID_SANITISED(ID_ISAR6_EL1),
+ AA32_ID_WRITABLE(ID_ISAR0_EL1),
+ AA32_ID_WRITABLE(ID_ISAR1_EL1),
+ AA32_ID_WRITABLE(ID_ISAR2_EL1),
+ AA32_ID_WRITABLE(ID_ISAR3_EL1),
+ AA32_ID_WRITABLE(ID_ISAR4_EL1),
+ AA32_ID_WRITABLE(ID_ISAR5_EL1),
+ AA32_ID_WRITABLE(ID_MMFR4_EL1),
+ AA32_ID_WRITABLE(ID_ISAR6_EL1),
/* CRm=3 */
- AA32_ID_SANITISED(MVFR0_EL1),
- AA32_ID_SANITISED(MVFR1_EL1),
- AA32_ID_SANITISED(MVFR2_EL1),
+ AA32_ID_WRITABLE(MVFR0_EL1),
+ AA32_ID_WRITABLE(MVFR1_EL1),
+ AA32_ID_WRITABLE(MVFR2_EL1),
ID_UNALLOCATED(3,3),
- AA32_ID_SANITISED(ID_PFR2_EL1),
+ AA32_ID_WRITABLE(ID_PFR2_EL1),
ID_HIDDEN(ID_DFR1_EL1),
- AA32_ID_SANITISED(ID_MMFR5_EL1),
+ AA32_ID_WRITABLE(ID_MMFR5_EL1),
ID_UNALLOCATED(3,7),
/* AArch64 ID registers */
The recent backport of the upstream commit 05a1fc5efdd8 ("ALSA:
usb-audio: Fix potential overflow of PCM transfer buffer") on the
older stable kernels like 6.12.y was broken since it doesn't consider
the mutex unlock, where the upstream code manages with guard().
In the older code, we still need an explicit unlock.
This is a fix that corrects the error path, applied only on old stable
trees.
Reported-by: Pavel Machek <pavel(a)denx.de>
Closes: https://lore.kernel.org/aSWtH0AZH5+aeb+a@duo.ucw.cz
Fixes: 98e9d5e33bda ("ALSA: usb-audio: Fix potential overflow of PCM transfer buffer")
Reviewed-by: Pavel Machek <pavel(a)denx.de>
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
---
sound/usb/endpoint.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c
index 7238f65cbcff..aa201e4744bf 100644
--- a/sound/usb/endpoint.c
+++ b/sound/usb/endpoint.c
@@ -1389,7 +1389,8 @@ int snd_usb_endpoint_set_params(struct snd_usb_audio *chip,
if (ep->packsize[1] > ep->maxpacksize) {
usb_audio_dbg(chip, "Too small maxpacksize %u for rate %u / pps %u\n",
ep->maxpacksize, ep->cur_rate, ep->pps);
- return -EINVAL;
+ err = -EINVAL;
+ goto unlock;
}
/* calculate the frequency in 16.16 format */
--
2.52.0
The patch below does not apply to the 6.17-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.17.y
git checkout FETCH_HEAD
git cherry-pick -x d52dea485cd3c98cfeeb474cf66cf95df2ab142f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112407-envious-erupt-fc37@gregkh' --subject-prefix 'PATCH 6.17.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d52dea485cd3c98cfeeb474cf66cf95df2ab142f Mon Sep 17 00:00:00 2001
From: Shuicheng Lin <shuicheng.lin(a)intel.com>
Date: Wed, 12 Nov 2025 18:10:06 +0000
Subject: [PATCH] drm/xe: Prevent BIT() overflow when handling invalid prefetch
region
If user provides a large value (such as 0x80) for parameter
prefetch_mem_region_instance in vm_bind ioctl, it will cause
BIT(prefetch_region) overflow as below:
"
------------[ cut here ]------------
UBSAN: shift-out-of-bounds in drivers/gpu/drm/xe/xe_vm.c:3414:7
shift exponent 128 is too large for 64-bit type 'long unsigned int'
CPU: 8 UID: 0 PID: 53120 Comm: xe_exec_system_ Tainted: G W 6.18.0-rc1-lgci-xe-kernel+ #200 PREEMPT(voluntary)
Tainted: [W]=WARN
Hardware name: ASUS System Product Name/PRIME Z790-P WIFI, BIOS 0812 02/24/2023
Call Trace:
<TASK>
dump_stack_lvl+0xa0/0xc0
dump_stack+0x10/0x20
ubsan_epilogue+0x9/0x40
__ubsan_handle_shift_out_of_bounds+0x10e/0x170
? mutex_unlock+0x12/0x20
xe_vm_bind_ioctl.cold+0x20/0x3c [xe]
...
"
Fix it by validating prefetch_region before the BIT() usage.
v2: Add Closes and Cc stable kernels. (Matt)
Reported-by: Koen Koning <koen.koning(a)intel.com>
Reported-by: Peter Senna Tschudin <peter.senna(a)linux.intel.com>
Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6478
Cc: <stable(a)vger.kernel.org> # v6.8+
Reviewed-by: Matthew Auld <matthew.auld(a)intel.com>
Signed-off-by: Shuicheng Lin <shuicheng.lin(a)intel.com>
Signed-off-by: Matthew Auld <matthew.auld(a)intel.com>
Link: https://patch.msgid.link/20251112181005.2120521-2-shuicheng.lin@intel.com
(cherry picked from commit 8f565bdd14eec5611cc041dba4650e42ccdf71d9)
Signed-off-by: Lucas De Marchi <lucas.demarchi(a)intel.com>
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index ccb09ef4ec9e..cdd1dc540a59 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3369,8 +3369,10 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
op == DRM_XE_VM_BIND_OP_PREFETCH) ||
XE_IOCTL_DBG(xe, prefetch_region &&
op != DRM_XE_VM_BIND_OP_PREFETCH) ||
- XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC &&
- !(BIT(prefetch_region) & xe->info.mem_region_mask))) ||
+ XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC &&
+ /* Guard against undefined shift in BIT(prefetch_region) */
+ (prefetch_region >= (sizeof(xe->info.mem_region_mask) * 8) ||
+ !(BIT(prefetch_region) & xe->info.mem_region_mask)))) ||
XE_IOCTL_DBG(xe, obj &&
op == DRM_XE_VM_BIND_OP_UNMAP) ||
XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) &&
The fix of CVE-2024-37354 1ff2bd566fbc doesn't apply cleanly to 5.15 as
it requires an extra prequisite patch `8a2b3da191e5a btrfs: add helper
to truncate inode items when logging inode` for the context to be the
same. Therefore two patches were brought back as part of this backport.
The prerequisite patch is part of a 10 patch patch series however
bringing all of them back felt more of a feature backport rather
than CVE fix. Please let me know if bringing the entire series back is
more preferrable then I shall re-send v2.
Some basic smoke testing and btrfs testing on inhouse test suite was
done and no regressions were observerd.
Filipe Manana (1):
btrfs: add helper to truncate inode items when logging inode
Omar Sandoval (1):
btrfs: fix crash on racing fsync and size-extending write into
prealloc
fs/btrfs/tree-log.c | 47 ++++++++++++++++++++++++++++-----------------
1 file changed, 29 insertions(+), 18 deletions(-)
--
2.50.1
On SM8250 (IRIS2) with firmware older than 1.0.087, the firmware could
not handle a dummy device address for EOS buffers, so a NULL device
address is sent instead. The existing check used IS_V6() alongside a
firmware version gate:
if (IS_V6(core) && is_fw_rev_or_older(core, 1, 0, 87))
fdata.device_addr = 0;
else
fdata.device_addr = 0xdeadb000;
However, SC7280 which is also V6, uses a firmware string of the form
"1.0.<commit-hash>", which the version parser translates to 1.0.0. This
unintentionally satisfies the `is_fw_rev_or_older(..., 1, 0, 87)`
condition on SC7280. Combined with IS_V6() matching there as well, the
quirk is incorrectly applied to SC7280, causing VP9 decode failures.
Constrain the check to IRIS2 (SM8250) only, which is the only platform
that needed this quirk, by replacing IS_V6() with IS_IRIS2(). This
restores correct behavior on SC7280 (no forced NULL EOS buffer address).
Fixes: 47f867cb1b63 ("media: venus: fix EOS handling in decoder stop command")
Cc: stable(a)vger.kernel.org
Reported-by: Mecid <mecid(a)mecomediagroup.de>
Closes: https://github.com/qualcomm-linux/kernel-topics/issues/222
Co-developed-by: Renjiang Han <renjiang.han(a)oss.qualcomm.com>
Signed-off-by: Renjiang Han <renjiang.han(a)oss.qualcomm.com>
Signed-off-by: Dikshita Agarwal <dikshita.agarwal(a)oss.qualcomm.com>
---
Changes in v2:
- Fixed email address for Mecid (Konrad)
- Added inline comment for the quirk (Konrad)
- Link to v1: https://lore.kernel.org/r/20251124-venus-vp9-fix-v1-1-2ff36d9f2374@oss.qual…
---
drivers/media/platform/qcom/venus/vdec.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c
index 4a6641fdffcf79705893be58c7ec5cf485e2fab9..6b3d5e59133e6902353d15c24c8bbaed4fcb6808 100644
--- a/drivers/media/platform/qcom/venus/vdec.c
+++ b/drivers/media/platform/qcom/venus/vdec.c
@@ -565,7 +565,13 @@ vdec_decoder_cmd(struct file *file, void *fh, struct v4l2_decoder_cmd *cmd)
fdata.buffer_type = HFI_BUFFER_INPUT;
fdata.flags |= HFI_BUFFERFLAG_EOS;
- if (IS_V6(inst->core) && is_fw_rev_or_older(inst->core, 1, 0, 87))
+
+ /* Send NULL EOS addr for only IRIS2 (SM8250),for firmware <= 1.0.87.
+ * SC7280 also reports "1.0.<hash>" parsed as 1.0.0; restricting to IRIS2
+ * avoids misapplying this quirk and breaking VP9 decode on SC7280.
+ */
+
+ if (IS_IRIS2(inst->core) && is_fw_rev_or_older(inst->core, 1, 0, 87))
fdata.device_addr = 0;
else
fdata.device_addr = 0xdeadb000;
---
base-commit: 1f2353f5a1af995efbf7bea44341aa0d03460b28
change-id: 20251121-venus-vp9-fix-1ff602724c02
Best regards,
--
Dikshita Agarwal <dikshita.agarwal(a)oss.qualcomm.com>
From: Łukasz Bartosik <ukaszb(a)chromium.org>
When DbC is disconnected then xhci_dbc_tty_unregister_device()
is called. However if there is any user space process blocked
on write to DbC terminal device then it will never be signalled
and thus stay blocked indifinitely.
This fix adds a tty_vhangup() call in xhci_dbc_tty_unregister_device().
The tty_vhangup() wakes up any blocked writers and causes subsequent
write attempts to DbC terminal device to fail.
Cc: stable(a)vger.kernel.org
Fixes: dfba2174dc42 ("usb: xhci: Add DbC support in xHCI driver")
Signed-off-by: Łukasz Bartosik <ukaszb(a)chromium.org>
---
Changes in v2:
- Replaced tty_hangup() with tty_vhangup()
---
drivers/usb/host/xhci-dbgtty.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/usb/host/xhci-dbgtty.c b/drivers/usb/host/xhci-dbgtty.c
index d894081d8d15..ad86f315c26d 100644
--- a/drivers/usb/host/xhci-dbgtty.c
+++ b/drivers/usb/host/xhci-dbgtty.c
@@ -535,6 +535,12 @@ static void xhci_dbc_tty_unregister_device(struct xhci_dbc *dbc)
if (!port->registered)
return;
+ /*
+ * Hang up the TTY. This wakes up any blocked
+ * writers and causes subsequent writes to fail.
+ */
+ tty_vhangup(port->port.tty);
+
tty_unregister_device(dbc_tty_driver, port->minor);
xhci_dbc_tty_exit_port(port);
port->registered = false;
--
2.52.0.rc1.455.g30608eb744-goog
The delayed work uec->work is scheduled in gaokun_ucsi_probe()
but never properly canceled in gaokun_ucsi_remove(). This creates
use-after-free scenarios where the ucsi and gaokun_ucsi structure
are freed after ucsi_destroy() completes execution, while the
gaokun_ucsi_register_worker() might be either currently executing
or still pending in the work queue. The already-freed gaokun_ucsi
or ucsi structure may then be accessed.
Furthermore, the race window is 3 seconds, which is sufficiently
long to make this bug easily reproducible. The following is the
trace captured by KASAN:
==================================================================
BUG: KASAN: slab-use-after-free in __run_timers+0x5ec/0x630
Write of size 8 at addr ffff00000ec28cc8 by task swapper/0/0
...
Call trace:
show_stack+0x18/0x24 (C)
dump_stack_lvl+0x78/0x90
print_report+0x114/0x580
kasan_report+0xa4/0xf0
__asan_report_store8_noabort+0x20/0x2c
__run_timers+0x5ec/0x630
run_timer_softirq+0xe8/0x1cc
handle_softirqs+0x294/0x720
__do_softirq+0x14/0x20
____do_softirq+0x10/0x1c
call_on_irq_stack+0x30/0x48
do_softirq_own_stack+0x1c/0x28
__irq_exit_rcu+0x27c/0x364
irq_exit_rcu+0x10/0x1c
el1_interrupt+0x40/0x60
el1h_64_irq_handler+0x18/0x24
el1h_64_irq+0x6c/0x70
arch_local_irq_enable+0x4/0x8 (P)
do_idle+0x334/0x458
cpu_startup_entry+0x60/0x70
rest_init+0x158/0x174
start_kernel+0x2f8/0x394
__primary_switched+0x8c/0x94
Allocated by task 72 on cpu 0 at 27.510341s:
kasan_save_stack+0x2c/0x54
kasan_save_track+0x24/0x5c
kasan_save_alloc_info+0x40/0x54
__kasan_kmalloc+0xa0/0xb8
__kmalloc_node_track_caller_noprof+0x1c0/0x588
devm_kmalloc+0x7c/0x1c8
gaokun_ucsi_probe+0xa0/0x840 auxiliary_bus_probe+0x94/0xf8
really_probe+0x17c/0x5b8
__driver_probe_device+0x158/0x2c4
driver_probe_device+0x10c/0x264
__device_attach_driver+0x168/0x2d0
bus_for_each_drv+0x100/0x188
__device_attach+0x174/0x368
device_initial_probe+0x14/0x20
bus_probe_device+0x120/0x150
device_add+0xb3c/0x10fc
__auxiliary_device_add+0x88/0x130
...
Freed by task 73 on cpu 1 at 28.910627s:
kasan_save_stack+0x2c/0x54
kasan_save_track+0x24/0x5c
__kasan_save_free_info+0x4c/0x74
__kasan_slab_free+0x60/0x8c
kfree+0xd4/0x410
devres_release_all+0x140/0x1f0
device_unbind_cleanup+0x20/0x190
device_release_driver_internal+0x344/0x460
device_release_driver+0x18/0x24
bus_remove_device+0x198/0x274
device_del+0x310/0xa84
...
The buggy address belongs to the object at ffff00000ec28c00
which belongs to the cache kmalloc-512 of size 512
The buggy address is located 200 bytes inside of
freed 512-byte region
The buggy address belongs to the physical page:
page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x4ec28
head: order:2 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0
flags: 0x3fffe0000000040(head|node=0|zone=0|lastcpupid=0x1ffff)
page_type: f5(slab)
raw: 03fffe0000000040 ffff000008801c80 dead000000000122 0000000000000000
raw: 0000000000000000 0000000080100010 00000000f5000000 0000000000000000
head: 03fffe0000000040 ffff000008801c80 dead000000000122 0000000000000000
head: 0000000000000000 0000000080100010 00000000f5000000 0000000000000000
head: 03fffe0000000002 fffffdffc03b0a01 00000000ffffffff 00000000ffffffff
head: ffffffffffffffff 0000000000000000 00000000ffffffff 0000000000000004
page dumped because: kasan: bad access detected
Memory state around the buggy address:
ffff00000ec28b80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
ffff00000ec28c00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
>ffff00000ec28c80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
^
ffff00000ec28d00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
ffff00000ec28d80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
==================================================================
Add disable_delayed_work_sync() in gaokun_ucsi_remove() to ensure
that uec->work is properly canceled and prevented from executing
after the ucsi and gaokun_ucsi structure have been deallocated.
Fixes: 00327d7f2c8c ("usb: typec: ucsi: add Huawei Matebook E Go ucsi driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Duoming Zhou <duoming(a)zju.edu.cn>
---
drivers/usb/typec/ucsi/ucsi_huawei_gaokun.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/usb/typec/ucsi/ucsi_huawei_gaokun.c b/drivers/usb/typec/ucsi/ucsi_huawei_gaokun.c
index 8401ab414bd..c5965656bab 100644
--- a/drivers/usb/typec/ucsi/ucsi_huawei_gaokun.c
+++ b/drivers/usb/typec/ucsi/ucsi_huawei_gaokun.c
@@ -503,6 +503,7 @@ static void gaokun_ucsi_remove(struct auxiliary_device *adev)
{
struct gaokun_ucsi *uec = auxiliary_get_drvdata(adev);
+ disable_delayed_work_sync(&uec->work);
gaokun_ec_unregister_notify(uec->ec, &uec->nb);
ucsi_unregister(uec->ucsi);
ucsi_destroy(uec->ucsi);
--
2.34.1
The gaokun_ucsi_probe() uses ucsi_create() to allocate a UCSI instance.
The ucsi_create() validates whether ops->poll_cci is defined, and if not,
it directly returns -EINVAL. However, the gaokun_ucsi_ops structure does
not define the poll_cci, causing ucsi_create() always fail with -EINVAL.
This issue can be observed in the kernel log with the following error:
ucsi_huawei_gaokun.ucsi huawei_gaokun_ec.ucsi.0: probe with driver
ucsi_huawei_gaokun.ucsi failed with error -22
Fix the issue by adding the missing poll_cci callback to gaokun_ucsi_ops.
Fixes: 00327d7f2c8c ("usb: typec: ucsi: add Huawei Matebook E Go ucsi driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Duoming Zhou <duoming(a)zju.edu.cn>
---
Changes in v2:
- Add cc: stable.
- Correct spelling mistake.
drivers/usb/typec/ucsi/ucsi_huawei_gaokun.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/usb/typec/ucsi/ucsi_huawei_gaokun.c b/drivers/usb/typec/ucsi/ucsi_huawei_gaokun.c
index 7b5222081bb..8401ab414bd 100644
--- a/drivers/usb/typec/ucsi/ucsi_huawei_gaokun.c
+++ b/drivers/usb/typec/ucsi/ucsi_huawei_gaokun.c
@@ -196,6 +196,7 @@ static void gaokun_ucsi_connector_status(struct ucsi_connector *con)
const struct ucsi_operations gaokun_ucsi_ops = {
.read_version = gaokun_ucsi_read_version,
.read_cci = gaokun_ucsi_read_cci,
+ .poll_cci = gaokun_ucsi_read_cci,
.read_message_in = gaokun_ucsi_read_message_in,
.sync_control = ucsi_sync_control_common,
.async_control = gaokun_ucsi_async_control,
--
2.34.1
OTX_CPT_UCODE_NAME_LENGTH limits the microcode name to 64 bytes. If a
user writes a string of exactly 64 characters, the original code used
'strlen(buf) > 64' to check the length, but then strscpy() copies only
63 characters before adding a NUL terminator, silently truncating the
copied string.
Fix this off-by-one error by using 'count' directly for the length check
to ensure long names are rejected early and copied without truncation.
Cc: stable(a)vger.kernel.org
Fixes: d9110b0b01ff ("crypto: marvell - add support for OCTEON TX CPT engine")
Signed-off-by: Thorsten Blum <thorsten.blum(a)linux.dev>
---
drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c
index 9f5601c0280b..417a48f41350 100644
--- a/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c
+++ b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c
@@ -1326,7 +1326,7 @@ static ssize_t ucode_load_store(struct device *dev,
int del_grp_idx = -1;
int ucode_idx = 0;
- if (strlen(buf) > OTX_CPT_UCODE_NAME_LENGTH)
+ if (count >= OTX_CPT_UCODE_NAME_LENGTH)
return -EINVAL;
eng_grps = container_of(attr, struct otx_cpt_eng_grps, ucode_load_attr);
--
Thorsten Blum <thorsten.blum(a)linux.dev>
GPG: 1D60 735E 8AEF 3BE4 73B6 9D84 7336 78FD 8DFE EAD4
Hi Sergey,
On 11/21/25 03:55, Sergey Senozhatsky wrote:
> Hi Christian,
>
> On (25/11/20 10:15), Christian Loehle wrote:
>> On 11/20/25 04:45, Sergey Senozhatsky wrote:
>>> Hi,
>>>
>>> We are observing a performance regression on one of our arm64 boards.
>>> We tracked it down to the linux-6.6.y commit ada8d7fa0ad4 ("sched/cpufreq:
>>> Rework schedutil governor performance estimation").
>>>
>>> UI speedometer benchmark:
>>> w/commit: 395 +/-38
>>> w/o commit: 439 +/-14
>>>
>>
>> Hi Sergey,
>> Would be nice to get some details. What board?
>
> It's an MT8196 chromebook.
>
>> What do the OPPs look like?
>
> How do I find that out?
>
>> Does this system use uclamp during the benchmark? How?
>
> How do I find that out?
>
>> Given how large the stddev given by speedometer (version 3?) itself is, can we get the
>> stats of a few runs?
>
> v2.1
>
> w/o patch w/ patch
> 440 +/-30 406 +/-11
> 440 +/-14 413 +/-16
> 444 +/-12 403 +/-14
> 442 +/-12 412 +/-15
>
>> Maybe traces of cpu_frequency for both w/ and w/o?
>
> trace-cmd record -e power:cpu_frequency attached.
>
> "base" is with ada8d7fa0ad4
> "revert" is ada8d7fa0ad4 reverted.
I did some analysis based on your trace files.
I have been playing some time ago with speedometer performance
issues so that's why I'm curious about your report here.
I've filtered your trace purely based on cpu7 (the single biggest cpu).
Then I have cut the data from the 'warm-up' phase in both traces, to
have similar start point (I think).
It looks like the 2 traces can show similar 'pattern' of that benchmark
which is good for analysis. If you align the timestamp:
176.051s and 972.465s then both plots (frequency changes in time) look
similar.
There are some differences, though:
1. there are more deeps in the freq in time, so more often you would
pay extra penalty for the ramp-up again
2. some of the ramp-up phases are a bit longer ~100ms instead of ~80ms
going from 2GHz to 3.6GHz
3.
There are idle phases missing in the trace, so we have to be careful
when e.g. comparing avg frequency, because that might not be the real
indication of the delivered computation and not indicate the gap in the
score.
Here are the stats:
1. revert:
frequency
count 1.318000e+03
mean 2.932240e+06
std 5.434045e+05
min 2.000000e+06
50% 3.000000e+06
85% 3.600000e+06
90% 3.626000e+06
95% 3.626000e+06
99% 3.626000e+06
max 3.626000e+06
2. base:
frequency
count 1.551000e+03
mean 2.809391e+06
std 5.369750e+05
min 2.000000e+06
50% 2.800000e+06
85% 3.500000e+06
90% 3.600000e+06
95% 3.626000e+06
99% 3.626000e+06
max 3.626000e+06
A better indication in this case would be comparison of the frequency
residency in time, especially for the max freq:
1. revert: 11.92s
2. base: 9.11s
So there is 2.8s longer residency for that fmax (while we even have
longer period for finishing that Speedometer 2 test on 'base').
Here is some detail about that run*:
+---------------+---------------------+---------------+----------------+
| Trace | Total Trace | Time at Max | % of Total |
| | Duration (s) | Freq (s) | Time |
+---------------+---------------------+---------------+----------------+
| Base Trace | 24.72 | 9.11 | 36.9% |
| Revert Trace | 22.88 | 11.92 | 52.1% |
+---------------+---------------------+---------------+----------------+
*We don't know the idle periods which might happen for those frequencies
I wonder if you had a fix patch for the util_est in your kernel...
That fix has been recently backported to 6.6 stable [1].
You might want to try that patch as well, w/ or w/o this revert.
IMHO it might be worth to have it on top. It might help
the main Chrome task ('CrRendererMain') to stay longer on the biggest
cpu, since the util_est would be higher. You can read the discussion
that I had back then with PeterZ and VincentG [2].
Regards,
Lukasz
[1]
https://lore.kernel.org/stable/20251121130232.828187990@linuxfoundation.org/
[2]
https://lore.kernel.org/lkml/20230912142821.GA22166@noisy.programming.kicks…
This is v2 of a series to address multiple reports [0][1]
(+ 2 offlist) of suspend failing when NBCON console drivers are
in use. With the help of NXP and NVIDIA we were able to isolate
the problem and verify the fix.
v1 is here [2].
The first NBCON drivers appeared in 6.13, so currently there is
no LTS kernel that requires this series. But it should go into
6.17.x and 6.18.
The changes since v1:
- For printk_trigger_flush() add support for all flush types
that are available. This will prevent printk_trigger_flush()
from trying to inappropriately queue irq_work after this
series is applied.
- Add WARN_ON_ONCE() to the printk irq_work queueing functions
in case they are called when irq_work is blocked. There
should never be (and currently are no) such callers, but
these functions are externally available.
John Ogness
[0] https://lore.kernel.org/lkml/80b020fc-c18a-4da4-b222-16da1cab2f4c@nvidia.com
[1] https://lore.kernel.org/lkml/DB9PR04MB8429E7DDF2D93C2695DE401D92C4A@DB9PR04…
[2] https://lore.kernel.org/lkml/20251111144328.887159-1-john.ogness@linutronix…
John Ogness (2):
printk: Allow printk_trigger_flush() to flush all types
printk: Avoid scheduling irq_work on suspend
kernel/printk/internal.h | 8 ++--
kernel/printk/nbcon.c | 9 ++++-
kernel/printk/printk.c | 81 ++++++++++++++++++++++++++++++++--------
3 files changed, 78 insertions(+), 20 deletions(-)
base-commit: e9a6fb0bcdd7609be6969112f3fbfcce3b1d4a7c
--
2.47.3
media_pad_remote_pad_first() may return NULL when the media link is absent
or disabled. The code dereferenced remote_pad->entity unconditionally in
ipu6_isys_csi2_enable_streams() and ipu6_isys_csi2_disable_streams(),
leading to a possible NULL dereference.
Guard the remote pad/subdev: in enable path return -ENOLINK/-ENODEV, in
disable path always shut down the local stream first and return 0 if the
remote side is missing. This keeps local shutdown semantics intact and
prevents a crash when the graph is partially torn down.
Also, ipu6_isys_csi2_calc_timing() passes link_freq into calc_timing()
where it is used as a divisor. v4l2_get_link_freq() may yield 0; add an
explicit check and return -EINVAL to avoid division by zero.
Found by Linux Verification Center (linuxtesting.org) with SVACE.
Fixes: 3a5c59ad926b ("media: ipu6: Rework CSI-2 sub-device streaming control")
Cc: stable(a)vger.kernel.org
Signed-off-by: Alexei Safin <a.safin(a)rosa.ru>
---
drivers/media/pci/intel/ipu6/ipu6-isys-csi2.c | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/drivers/media/pci/intel/ipu6/ipu6-isys-csi2.c b/drivers/media/pci/intel/ipu6/ipu6-isys-csi2.c
index d1fece6210ab..58944918c344 100644
--- a/drivers/media/pci/intel/ipu6/ipu6-isys-csi2.c
+++ b/drivers/media/pci/intel/ipu6/ipu6-isys-csi2.c
@@ -171,6 +171,10 @@ ipu6_isys_csi2_calc_timing(struct ipu6_isys_csi2 *csi2,
if (link_freq < 0)
return link_freq;
+ /* Avoid division by zero in calc_timing() if link frequency is zero */
+ if (!link_freq)
+ return -EINVAL;
+
timing->ctermen = calc_timing(CSI2_CSI_RX_DLY_CNT_TERMEN_CLANE_A,
CSI2_CSI_RX_DLY_CNT_TERMEN_CLANE_B,
link_freq, accinv);
@@ -352,7 +356,12 @@ static int ipu6_isys_csi2_enable_streams(struct v4l2_subdev *sd,
int ret;
remote_pad = media_pad_remote_pad_first(&sd->entity.pads[CSI2_PAD_SINK]);
+ if (!remote_pad)
+ return -ENOLINK;
+
remote_sd = media_entity_to_v4l2_subdev(remote_pad->entity);
+ if (!remote_sd)
+ return -ENODEV;
sink_streams =
v4l2_subdev_state_xlate_streams(state, pad, CSI2_PAD_SINK,
@@ -389,7 +398,16 @@ static int ipu6_isys_csi2_disable_streams(struct v4l2_subdev *sd,
&streams_mask);
remote_pad = media_pad_remote_pad_first(&sd->entity.pads[CSI2_PAD_SINK]);
+ if (!remote_pad) {
+ ipu6_isys_csi2_set_stream(sd, NULL, 0, false);
+ return 0;
+ }
+
remote_sd = media_entity_to_v4l2_subdev(remote_pad->entity);
+ if (!remote_sd) {
+ ipu6_isys_csi2_set_stream(sd, NULL, 0, false);
+ return 0;
+ }
ipu6_isys_csi2_set_stream(sd, NULL, 0, false);
--
2.50.1 (Apple Git-155)
This reverts commit 7777f47f2ea64efd1016262e7b59fab34adfb869.
The commit 1a721de8489f ("block: don't add or resize partition on the disk
with GENHD_FL_NO_PART") and the commit 7777f47f2ea6 ("block: Move checking
GENHD_FL_NO_PART to bdev_add_partition()") used the flag GENHD_FL_NO_PART
to prevent the add or resize of partitions in 5.15 stable kernels.But in
these 5.15 kernels, this is giving an issue with the following error
where the loop driver wants to create a partition when the partscan is
disabled on the loop device:
dd if=/dev/zero of=loopDisk.dsk bs=1M count=1 seek=10240;
losetup -f loopDisk.dsk;parted -s /dev/loop0 -- mklabel gpt mkpart primary
2048s 4096s
1+0 records in
1+0 records out
1048576 bytes (1.0 MB, 1.0 MiB) copied, 0.0016293 s, 644 MB/s
""
Error: Partition(s) 1 on /dev/loop0 have been written, but we have been
unable to inform the kernel of the change, probably because it/they are
in use. As a result, the old partition(s) will remain in use. You should
reboot now before making further changes.
""
If the partition scan is not enabled on the loop device, this flag
GENHD_FL_NO_PART is getting set and when partition creation is tried,
it returns an error EINVAL thereby preventing the creation of partitions.
So, there is no such distinction between disabling of partition scan and
partition creation.
Later in 6.xxx kernels, the commit b9684a71fca7 ("block, loop: support
partitions without scanning") a new flag GD_SUPPRESS_PART_SCAN was
introduced that just disables the partition scan and uses GENHD_FL_NO_PART
only to prevent creating partition scan. So, the partition creationg can
proceed with even if partition scan is disabled.
As the commit b9684a71fca7 ("block, loop: support partitions without
scanning") is not available in 5.15 stable kernel, and since there is no
distinction between disabling of "partition scan" and "partition
creation", we need to revert the commits 1a721de8489f and 7777f47f2ea6
from 5.15 stable kernel to allow partition creation when partscan is
disabled.
Cc: stable(a)vger.kernel.org
Signed-off-by: Gulam Mohamed <gulam.mohamed(a)oracle.com>
---
block/ioctl.c | 2 ++
block/partitions/core.c | 5 -----
2 files changed, 2 insertions(+), 5 deletions(-)
diff --git a/block/ioctl.c b/block/ioctl.c
index a260e39e56a4..d25b84441237 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -20,6 +20,8 @@ static int blkpg_do_ioctl(struct block_device *bdev,
struct blkpg_partition p;
sector_t start, length;
+ if (disk->flags & GENHD_FL_NO_PART)
+ return -EINVAL;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (copy_from_user(&p, upart, sizeof(struct blkpg_partition)))
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 0d1fe2b42b85..7b5750db7eaf 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -463,11 +463,6 @@ int bdev_add_partition(struct gendisk *disk, int partno, sector_t start,
goto out;
}
- if (disk->flags & GENHD_FL_NO_PART) {
- ret = -EINVAL;
- goto out;
- }
-
if (partition_overlaps(disk, start, length, -1)) {
ret = -EBUSY;
goto out;
--
2.47.3
This reverts commit 7777f47f2ea64efd1016262e7b59fab34adfb869.
The commit 1a721de8489f ("block: don't add or resize partition on the disk
with GENHD_FL_NO_PART") and the commit 7777f47f2ea6 ("block: Move checking
GENHD_FL_NO_PART to bdev_add_partition()") used the flag GENHD_FL_NO_PART
to prevent the add or resize of partitions in 5.15 stable kernels.But in
these 5.15 kernels, this is giving an issue with the following error
where the loop driver wants to create a partition when the partscan is
disabled on the loop device:
dd if=/dev/zero of=loopDisk.dsk bs=1M count=1 seek=10240;
losetup -f loopDisk.dsk;parted -s /dev/loop0 -- mklabel gpt mkpart primary
2048s 4096s
1+0 records in
1+0 records out
1048576 bytes (1.0 MB, 1.0 MiB) copied, 0.0016293 s, 644 MB/s
""
Error: Partition(s) 1 on /dev/loop0 have been written, but we have been
unable to inform the kernel of the change, probably because it/they are
in use. As a result, the old partition(s) will remain in use. You should
reboot now before making further changes.
""
If the partition scan is not enabled on the loop device, this flag
GENHD_FL_NO_PART is getting set and when partition creation is tried,
it returns an error EINVAL thereby preventing the creation of partitions.
So, there is no such distinction between disabling of partition scan and
partition creation.
Later in 6.xxx kernels, the commit b9684a71fca7 ("block, loop: support
partitions without scanning") a new flag GD_SUPPRESS_PART_SCAN was
introduced that just disables the partition scan and uses GENHD_FL_NO_PART
only to prevent creating partition scan. So, the partition creationg can
proceed with even if partition scan is disabled.
As the commit b9684a71fca7 ("block, loop: support partitions without
scanning") is not available in 5.15 stable kernel, and since there is no
distinction between disabling of "partition scan" and "partition
creation", we need to revert the commits 1a721de8489f and 7777f47f2ea6
from 5.15 stable kernel to allow partition creation when partscan is
disabled.
Cc: stable(a)vger.kernel.org
Signed-off-by: Gulam Mohamed <gulam.mohamed(a)oracle.com>
---
block/ioctl.c | 2 ++
block/partitions/core.c | 5 -----
2 files changed, 2 insertions(+), 5 deletions(-)
diff --git a/block/ioctl.c b/block/ioctl.c
index a260e39e56a4..d25b84441237 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -20,6 +20,8 @@ static int blkpg_do_ioctl(struct block_device *bdev,
struct blkpg_partition p;
sector_t start, length;
+ if (disk->flags & GENHD_FL_NO_PART)
+ return -EINVAL;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (copy_from_user(&p, upart, sizeof(struct blkpg_partition)))
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 0d1fe2b42b85..7b5750db7eaf 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -463,11 +463,6 @@ int bdev_add_partition(struct gendisk *disk, int partno, sector_t start,
goto out;
}
- if (disk->flags & GENHD_FL_NO_PART) {
- ret = -EINVAL;
- goto out;
- }
-
if (partition_overlaps(disk, start, length, -1)) {
ret = -EBUSY;
goto out;
--
2.47.3
Although it is guided that `#mbox-cells` must be at least 1, there are
many instances of `#mbox-cells = <0>;` in the device tree. If that is
the case and the corresponding mailbox controller does not provide
`fw_xlate` and of_xlate` function pointers, `fw_mbox_index_xlate()` will
be used by default and out-of-bounds accesses could occur due to lack of
bounds check in that function.
Cc: stable(a)vger.kernel.org
Signed-off-by: Joonwon Kang <joonwonkang(a)google.com>
---
V3 -> V4: Prevented access to sp->args[0] if sp->nargs < 1 and rebased
on the linux-next tree.
For CVE review, below is a problematic control flow when
`#mbox-cells = <0>;`:
```
static struct mbox_chan *
fw_mbox_index_xlate(struct mbox_controller *mbox,
const struct fwnode_reference_args *sp)
{
int ind = sp->args[0]; // (4)
if (ind >= mbox->num_chans) // (5)
return ERR_PTR(-EINVAL);
return &mbox->chans[ind]; // (6)
}
struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index)
{
...
struct fwnode_reference_args fwspec; // (1)
...
ret = fwnode_property_get_reference_args(fwnode, "mboxes", // (2)
"#mbox-cells", 0, index, &fwspec);
...
scoped_guard(mutex, &con_mutex) {
...
list_for_each_entry(mbox, &mbox_cons, node) {
if (device_match_fwnode(mbox->dev, fwspec.fwnode)) {
if (mbox->fw_xlate) {
chan = mbox->fw_xlate(mbox, &fwspec); // (3)
if (!IS_ERR(chan))
break;
}
...
}
}
...
ret = __mbox_bind_client(chan, cl); // (7)
...
}
...
}
static int __mbox_bind_client(struct mbox_chan *chan,
struct mbox_client *cl)
{
if (chan->cl || ...) { // (8)
}
```
(1) `fwspec.args[]` is filled with arbitrary leftover values in the stack.
Let's say that `fwspec.args[0] == 0xffffffff`.
(2) Since `#mbox-cells = <0>;`, `fwspec.nargs` is assigned 0 and
`fwspec.args[]` are untouched.
(3) Since the controller has not provided `fw_xlate` and `of_xlate`,
`fw_mbox_index_xlate()` is used instead.
(4) `idx` is assigned -1 due to the value of `fwspec.args[0]`.
(5) Since `mbox->num_chans >= 0` and `idx == -1`, this condition does
not filter out this case.
(6) Out-of-bounds address is returned. Depending on what was left in
`fwspec.args[0]`, it could be an arbitrary(but confined to a specific
range) address.
(7) A function is called with the out-of-bounds address in `chan`.
(8) The out-of-bounds address is accessed.
drivers/mailbox/mailbox.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c
index 2acc6ec229a4..617ba505691d 100644
--- a/drivers/mailbox/mailbox.c
+++ b/drivers/mailbox/mailbox.c
@@ -489,12 +489,10 @@ EXPORT_SYMBOL_GPL(mbox_free_channel);
static struct mbox_chan *fw_mbox_index_xlate(struct mbox_controller *mbox,
const struct fwnode_reference_args *sp)
{
- int ind = sp->args[0];
-
- if (ind >= mbox->num_chans)
+ if (sp->nargs < 1 || sp->args[0] >= mbox->num_chans)
return ERR_PTR(-EINVAL);
- return &mbox->chans[ind];
+ return &mbox->chans[sp->args[0]];
}
/**
--
2.52.0.487.g5c8c507ade-goog
From: Long Li <longli(a)microsoft.com>
Enable the user space to manage interrupt_mask for subchannels through
irqcontrol interface for uio device. Also remove the memory barrier
when monitor bit is enabled as it is not necessary.
This is a backport of the upstream
commit d062463edf17 ("uio_hv_generic: Set event for all channels on the device")
with some modifications to resolve merge conflicts and take care of
missing support for slow devices on older kernels.
Original change was not a fix, but it needs to be backported to fix a
NULL pointer crash resulting from missing interrupt mask setting.
Commit b15b7d2a1b09 ("uio_hv_generic: Let userspace take care of interrupt mask")
removed the default setting of interrupt_mask for channels (including
subchannels) in the uio_hv_generic driver, as it relies on the user space
to take care of managing it. This approach works fine when user space
can control this setting using the irqcontrol interface provided for uio
devices. Support for setting the interrupt mask through this interface for
subchannels came only after commit d062463edf17 ("uio_hv_generic: Set event
for all channels on the device"). On older kernels, this change is not
present. With uio_hv_generic no longer setting the interrupt_mask, and
userspace not having the capability to set it, it remains unset,
and interrupts can come for the subchannels, which can result in a crash
in hv_uio_channel_cb. Backport the change to older kernels, where this
change was not present, to allow userspace to set the interrupt mask
properly for subchannels. Additionally, this patch also adds certain
checks for primary vs subchannels in the hv_uio_channel_cb, which can
gracefully handle these two cases and prevent the NULL pointer crashes.
Signed-off-by: Long Li <longli(a)microsoft.com>
Fixes: b15b7d2a1b09 ("uio_hv_generic: Let userspace take care of interrupt mask")
Closes: https://bugs.debian.org/1120602
Cc: stable(a)vger.kernel.org # all LTS kernels from 5.4 till 6.6
Signed-off-by: Naman Jain <namjain(a)linux.microsoft.com>
---
Changes since v1:
https://lore.kernel.org/all/20251115085937.2237-1-namjain@linux.microsoft.c…
* Changed commit being fixed, to reflect upstream commit
* Added stable kernel versions info in Stable tag and email subject.
This needs to be ported to all the stable kernels, where the Fixes patch
went in - 5.4.x, 5.10.x, 5.15.x, 6.1.x, 6.6.x.
Previous notes:
Remove reviewed-by tags since the original code has changed quite a bit
while backporting.
Backported change for 6.12 kernel is sent separately.
---
drivers/uio/uio_hv_generic.c | 21 +++++++++++++++++----
1 file changed, 17 insertions(+), 4 deletions(-)
diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c
index 137109f5f69b..95c1f08028a3 100644
--- a/drivers/uio/uio_hv_generic.c
+++ b/drivers/uio/uio_hv_generic.c
@@ -80,9 +80,15 @@ hv_uio_irqcontrol(struct uio_info *info, s32 irq_state)
{
struct hv_uio_private_data *pdata = info->priv;
struct hv_device *dev = pdata->device;
+ struct vmbus_channel *primary, *sc;
- dev->channel->inbound.ring_buffer->interrupt_mask = !irq_state;
- virt_mb();
+ primary = dev->channel;
+ primary->inbound.ring_buffer->interrupt_mask = !irq_state;
+
+ mutex_lock(&vmbus_connection.channel_mutex);
+ list_for_each_entry(sc, &primary->sc_list, sc_list)
+ sc->inbound.ring_buffer->interrupt_mask = !irq_state;
+ mutex_unlock(&vmbus_connection.channel_mutex);
return 0;
}
@@ -93,11 +99,18 @@ hv_uio_irqcontrol(struct uio_info *info, s32 irq_state)
static void hv_uio_channel_cb(void *context)
{
struct vmbus_channel *chan = context;
- struct hv_device *hv_dev = chan->device_obj;
- struct hv_uio_private_data *pdata = hv_get_drvdata(hv_dev);
+ struct hv_device *hv_dev;
+ struct hv_uio_private_data *pdata;
virt_mb();
+ /*
+ * The callback may come from a subchannel, in which case look
+ * for the hv device in the primary channel
+ */
+ hv_dev = chan->primary_channel ?
+ chan->primary_channel->device_obj : chan->device_obj;
+ pdata = hv_get_drvdata(hv_dev);
uio_event_notify(&pdata->info);
}
--
2.43.0
From: Long Li <longli(a)microsoft.com>
Enable the user space to manage interrupt_mask for subchannels through
irqcontrol interface for uio device. Also remove the memory barrier
when monitor bit is enabled as it is not necessary.
This is a backport of the upstream commit
d062463edf17 ("uio_hv_generic: Set event for all channels on the device")
with some modifications to resolve merge conflicts and take care of
missing support for slow devices on older kernels.
Original change was not a fix, but it needs to be backported to fix a
NULL pointer crash resulting from missing interrupt mask setting.
Commit 37bd91f22794 ("uio_hv_generic: Let userspace take care of interrupt mask")
removed the default setting of interrupt_mask for channels (including
subchannels) in the uio_hv_generic driver, as it relies on the user space
to take care of managing it. This approach works fine when user space
can control this setting using the irqcontrol interface provided for uio
devices. Support for setting the interrupt mask through this interface for
subchannels came only after commit d062463edf17 ("uio_hv_generic: Set event
for all channels on the device"). On older kernels, this change is not
present. With uio_hv_generic no longer setting the interrupt_mask, and
userspace not having the capability to set it, it remains unset,
and interrupts can come for the subchannels, which can result in a crash
in hv_uio_channel_cb. Backport the change to older kernels, where this
change was not present, to allow userspace to set the interrupt mask
properly for subchannels. Additionally, this patch also adds certain
checks for primary vs subchannels in the hv_uio_channel_cb, which can
gracefully handle these two cases and prevent the NULL pointer crashes.
Signed-off-by: Long Li <longli(a)microsoft.com>
Fixes: 37bd91f22794 ("uio_hv_generic: Let userspace take care of interrupt mask")
Closes: https://bugs.debian.org/1120602
Cc: <stable(a)vger.kernel.org> # 6.6.x and older
Signed-off-by: Naman Jain <namjain(a)linux.microsoft.com>
---
Remove reviewed-by tags since the original code has changed quite a bit
while backporting.
Backported change for 6.12 kernel is sent separately.
---
drivers/uio/uio_hv_generic.c | 21 +++++++++++++++++----
1 file changed, 17 insertions(+), 4 deletions(-)
diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c
index 2724656bf634..69e5016ebd46 100644
--- a/drivers/uio/uio_hv_generic.c
+++ b/drivers/uio/uio_hv_generic.c
@@ -80,9 +80,15 @@ hv_uio_irqcontrol(struct uio_info *info, s32 irq_state)
{
struct hv_uio_private_data *pdata = info->priv;
struct hv_device *dev = pdata->device;
+ struct vmbus_channel *primary, *sc;
- dev->channel->inbound.ring_buffer->interrupt_mask = !irq_state;
- virt_mb();
+ primary = dev->channel;
+ primary->inbound.ring_buffer->interrupt_mask = !irq_state;
+
+ mutex_lock(&vmbus_connection.channel_mutex);
+ list_for_each_entry(sc, &primary->sc_list, sc_list)
+ sc->inbound.ring_buffer->interrupt_mask = !irq_state;
+ mutex_unlock(&vmbus_connection.channel_mutex);
return 0;
}
@@ -93,11 +99,18 @@ hv_uio_irqcontrol(struct uio_info *info, s32 irq_state)
static void hv_uio_channel_cb(void *context)
{
struct vmbus_channel *chan = context;
- struct hv_device *hv_dev = chan->device_obj;
- struct hv_uio_private_data *pdata = hv_get_drvdata(hv_dev);
+ struct hv_device *hv_dev;
+ struct hv_uio_private_data *pdata;
virt_mb();
+ /*
+ * The callback may come from a subchannel, in which case look
+ * for the hv device in the primary channel
+ */
+ hv_dev = chan->primary_channel ?
+ chan->primary_channel->device_obj : chan->device_obj;
+ pdata = hv_get_drvdata(hv_dev);
uio_event_notify(&pdata->info);
}
--
2.34.1
In btusb_mtk_setup(), we set `btmtk_data->isopkt_intf` to:
usb_ifnum_to_if(data->udev, MTK_ISO_IFNUM)
That function can return NULL in some cases. Even when it returns
NULL, though, we still go on to call btusb_mtk_claim_iso_intf().
As of commit e9087e828827 ("Bluetooth: btusb: mediatek: Add locks for
usb_driver_claim_interface()"), calling btusb_mtk_claim_iso_intf()
when `btmtk_data->isopkt_intf` is NULL will cause a crash because
we'll end up passing a bad pointer to device_lock(). Prior to that
commit we'd pass the NULL pointer directly to
usb_driver_claim_interface() which would detect it and return an
error, which was handled.
Resolve the crash in btusb_mtk_claim_iso_intf() by adding a NULL check
at the start of the function. This makes the code handle a NULL
`btmtk_data->isopkt_intf` the same way it did before the problematic
commit (just with a slight change to the error message printed).
Reported-by: IncogCyberpunk <incogcyberpunk(a)proton.me>
Closes: http://lore.kernel.org/r/a380d061-479e-4713-bddd-1d6571ca7e86@leemhuis.info
Fixes: e9087e828827 ("Bluetooth: btusb: mediatek: Add locks for usb_driver_claim_interface()")
Cc: stable(a)vger.kernel.org
Tested-by: IncogCyberpunk <incogcyberpunk(a)proton.me>
Signed-off-by: Douglas Anderson <dianders(a)chromium.org>
---
Changes in v3:
- Added Cc to stable.
- Added IncogCyberpunk Tested-by tag.
- v2: https://patch.msgid.link/20251119092641.v2.1.I1ae7aebc967e52c7c4be7aa65fbd8…
Changes in v2:
- Rebase atop commit 529ac8e706c3 ("Bluetooth: ... mtk iso interface")
- v1: https://patch.msgid.link/20251119085354.1.I1ae7aebc967e52c7c4be7aa65fbd8173…
drivers/bluetooth/btusb.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index fcc62e2fb641..683ac02e964b 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -2751,6 +2751,11 @@ static void btusb_mtk_claim_iso_intf(struct btusb_data *data)
if (!btmtk_data)
return;
+ if (!btmtk_data->isopkt_intf) {
+ bt_dev_err(data->hdev, "Can't claim NULL iso interface");
+ return;
+ }
+
/*
* The function usb_driver_claim_interface() is documented to need
* locks held if it's not called from a probe routine. The code here
--
2.52.0.rc1.455.g30608eb744-goog
Now the optimized version of arch_dup_task_struct() for LoongArch
assumes 'thread' is the last member of 'task_struct'. But this is
not true if CONFIG_RANDSTRUCT is enabled after Linux-6.16.
So fix the arch_dup_task_struct() function for CONFIG_RANDSTRUCT by
copying the whole 'task_struct'.
Cc: stable(a)vger.kernel.org
Signed-off-by: Huacai Chen <chenhuacai(a)loongson.cn>
---
arch/loongarch/kernel/process.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c
index efd9edf65603..d1e04f9e0f79 100644
--- a/arch/loongarch/kernel/process.c
+++ b/arch/loongarch/kernel/process.c
@@ -130,6 +130,11 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
preempt_enable();
+ if (IS_ENABLED(CONFIG_RANDSTRUCT)) {
+ memcpy(dst, src, sizeof(struct task_struct));
+ return 0;
+ }
+
if (!used_math())
memcpy(dst, src, offsetof(struct task_struct, thread.fpu.fpr));
else
--
2.47.3
ipc_msg_send_request() waits for a generic netlink reply using an
ipc_msg_table_entry on the stack. The generic netlink handler
(handle_generic_event()/handle_response()) fills entry->response under
ipc_msg_table_lock, but ipc_msg_send_request() used to validate and free
entry->response without holding the same lock.
Under high concurrency this allows a race where handle_response() is
copying data into entry->response while ipc_msg_send_request() has just
freed it, leading to a slab-use-after-free reported by KASAN in
handle_generic_event():
BUG: KASAN: slab-use-after-free in handle_generic_event+0x3c4/0x5f0 [ksmbd]
Write of size 12 at addr ffff888198ee6e20 by task pool/109349
...
Freed by task:
kvfree
ipc_msg_send_request [ksmbd]
ksmbd_rpc_open -> ksmbd_session_rpc_open [ksmbd]
Fix by:
- Taking ipc_msg_table_lock in ipc_msg_send_request() while validating
entry->response, freeing it when invalid, and removing the entry from
ipc_msg_table.
- Returning the final entry->response pointer to the caller only after
the hash entry is removed under the lock.
- Returning NULL in the error path, preserving the original API
semantics.
This makes all accesses to entry->response consistent with
handle_response(), which already updates and fills the response buffer
under ipc_msg_table_lock, and closes the race that allowed the UAF.
Reported-by: Qianchang Zhao <pioooooooooip(a)gmail.com>
Reported-by: Zhitong Liu <liuzhitong1993(a)gmail.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Qianchang Zhao <pioooooooooip(a)gmail.com>
---
fs/smb/server/transport_ipc.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c
index 46f87fd1c..7b1a060da 100644
--- a/fs/smb/server/transport_ipc.c
+++ b/fs/smb/server/transport_ipc.c
@@ -532,6 +532,7 @@ static int ipc_validate_msg(struct ipc_msg_table_entry *entry)
static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle)
{
struct ipc_msg_table_entry entry;
+ void *response = NULL;
int ret;
if ((int)handle < 0)
@@ -553,6 +554,8 @@ static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle
ret = wait_event_interruptible_timeout(entry.wait,
entry.response != NULL,
IPC_WAIT_TIMEOUT);
+
+ down_write(&ipc_msg_table_lock);
if (entry.response) {
ret = ipc_validate_msg(&entry);
if (ret) {
@@ -560,11 +563,19 @@ static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle
entry.response = NULL;
}
}
+
+ response = entry.response;
+ hash_del(&entry.ipc_table_hlist);
+ up_write(&ipc_msg_table_lock);
+
+ return response;
+
out:
down_write(&ipc_msg_table_lock);
hash_del(&entry.ipc_table_hlist);
up_write(&ipc_msg_table_lock);
- return entry.response;
+
+ return NULL;
}
static int ksmbd_ipc_heartbeat_request(void)
--
2.34.1
On 11/23/2025 7:45 PM, Guangshuo Li wrote:
> Hi Tony, all,
>
> Thanks for the review. As suggested by Tony, I’ll keep the declarations
> at the top and place the bounds checks before assigning last_byte. I’ll
> send a v2 with the following change:
>
> static bool e1000_tbi_should_accept(struct e1000_adapter *adapter,
> u8 status, u8 errors,
> u32 length, const u8 *data)
> {
> struct e1000_hw *hw = &adapter->hw;
> u8 last_byte;
>
> /* Guard against OOB on data[length - 1] */
> if (unlikely(!length))
> return false;
>
> /* Upper bound: length must not exceed rx_buffer_len */
> if (unlikely(length > adapter->rx_buffer_len))
> return false;
>
> last_byte = data[length - 1];
>
> /* existing logic follows ... */
> }
> Please let me know if further adjustments are preferred.
This looks along the lines of what I was expecting.
Thanks,
Tony
> Best regards,
> Guangshuo Li
From: Thomas Gleixner <tglx(a)linutronix.de>
Luigi reported that retriggering a posted MSI interrupt does not work
correctly.
The reason is that the retrigger happens at the vector domain by sending an
IPI to the actual vector on the target CPU. That works correctly exactly
once because the posted MSI interrupt chip does not issue an EOI as that's
only required for the posted MSI notification vector itself.
As a consequence the vector becomes stale in the ISR, which not only
affects this vector but also any lower priority vector in the affected
APIC because the ISR bit is not cleared.
Luigi proposed to set the vector in the remap PIR bitmap and raise the
posted MSI notification vector. That works, but that still does not cure a
related problem:
If there is ever a stray interrupt on such a vector, then the related
APIC ISR bit becomes stale due to the lack of EOI as described above.
Unlikely to happen, but if it happens it's not debuggable at all.
So instead of playing games with the PIR, this can be actually solved
for both cases by:
1) Keeping track of the posted interrupt vector handler state
2) Implementing a posted MSI specific irq_ack() callback which checks that
state. If the posted vector handler is inactive it issues an EOI,
otherwise it delegates that to the posted handler.
This is correct versus affinity changes and concurrent events on the posted
vector as the actual handler invocation is serialized through the interrupt
descriptor lock.
Fixes: ed1e48ea4370 ("iommu/vt-d: Enable posted mode for device MSIs")
Reported-by: Luigi Rizzo <lrizzo(a)google.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Tested-by: Luigi Rizzo <lrizzo(a)google.com>
Cc: stable(a)vger.kernel.org
Closes: https://lore.kernel.org/lkml/20251124104836.3685533-1-lrizzo@google.com
---
V2: Use __this_cpu...() - Luigi
---
arch/x86/include/asm/irq_remapping.h | 7 +++++++
arch/x86/kernel/irq.c | 23 +++++++++++++++++++++++
drivers/iommu/intel/irq_remapping.c | 8 ++++----
3 files changed, 34 insertions(+), 4 deletions(-)
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -87,4 +87,11 @@ static inline void panic_if_irq_remap(co
}
#endif /* CONFIG_IRQ_REMAP */
+
+#ifdef CONFIG_X86_POSTED_MSI
+void intel_ack_posted_msi_irq(struct irq_data *irqd);
+#else
+#define intel_ack_posted_msi_irq NULL
+#endif
+
#endif /* __X86_IRQ_REMAPPING_H */
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -396,6 +396,7 @@ DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm
/* Posted Interrupt Descriptors for coalesced MSIs to be posted */
DEFINE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc);
+static DEFINE_PER_CPU_CACHE_HOT(bool, posted_msi_handler_active);
void intel_posted_msi_init(void)
{
@@ -413,6 +414,25 @@ void intel_posted_msi_init(void)
this_cpu_write(posted_msi_pi_desc.ndst, destination);
}
+void intel_ack_posted_msi_irq(struct irq_data *irqd)
+{
+ irq_move_irq(irqd);
+
+ /*
+ * Handle the rare case that irq_retrigger() raised the actual
+ * assigned vector on the target CPU, which means that it was not
+ * invoked via the posted MSI handler below. In that case APIC EOI
+ * is required as otherwise the ISR entry becomes stale and lower
+ * priority interrupts are never going to be delivered after that.
+ *
+ * If the posted handler invoked the device interrupt handler then
+ * the EOI would be premature because it would acknowledge the
+ * posted vector.
+ */
+ if (unlikely(!__this_cpu_read(posted_msi_handler_active)))
+ apic_eoi();
+}
+
static __always_inline bool handle_pending_pir(unsigned long *pir, struct pt_regs *regs)
{
unsigned long pir_copy[NR_PIR_WORDS];
@@ -445,6 +465,8 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi
pid = this_cpu_ptr(&posted_msi_pi_desc);
+ /* Mark the handler active for intel_ack_posted_msi_irq() */
+ __this_cpu_write(posted_msi_handler_active, true);
inc_irq_stat(posted_msi_notification_count);
irq_enter();
@@ -473,6 +495,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi
apic_eoi();
irq_exit();
+ __this_cpu_write(posted_msi_handler_active, false);
set_irq_regs(old_regs);
}
#endif /* X86_POSTED_MSI */
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -1303,17 +1303,17 @@ static struct irq_chip intel_ir_chip = {
* irq_enter();
* handle_edge_irq()
* irq_chip_ack_parent()
- * irq_move_irq(); // No EOI
+ * intel_ack_posted_msi_irq(); // No EOI
* handle_irq_event()
* driver_handler()
* handle_edge_irq()
* irq_chip_ack_parent()
- * irq_move_irq(); // No EOI
+ * intel_ack_posted_msi_irq(); // No EOI
* handle_irq_event()
* driver_handler()
* handle_edge_irq()
* irq_chip_ack_parent()
- * irq_move_irq(); // No EOI
+ * intel_ack_posted_msi_irq(); // No EOI
* handle_irq_event()
* driver_handler()
* apic_eoi()
@@ -1322,7 +1322,7 @@ static struct irq_chip intel_ir_chip = {
*/
static struct irq_chip intel_ir_chip_post_msi = {
.name = "INTEL-IR-POST",
- .irq_ack = irq_move_irq,
+ .irq_ack = intel_ack_posted_msi_irq,
.irq_set_affinity = intel_ir_set_affinity,
.irq_compose_msi_msg = intel_ir_compose_msi_msg,
.irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity,
On Tue, Nov 25, 2025 at 10:15 AM Joanne Koong <joannelkoong(a)gmail.com> wrote:
>
> When a request is terminated before it has been committed, the request
> is not removed from the queue's list. This leaves a dangling list entry
> that leads to list corruption and use-after-free issues.
>
> Remove the request from the queue's list for terminated non-committed
> requests.
>
> Signed-off-by: Joanne Koong <joannelkoong(a)gmail.com>
> Fixes: c090c8abae4b ("fuse: Add io-uring sqe commit and fetch support")
Sorry, forgot to add the stable tag. There should be this line:
Cc: stable(a)vger.kernel.org
> ---
> fs/fuse/dev_uring.c | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
> index 0066c9c0a5d5..7760fe4e1f9e 100644
> --- a/fs/fuse/dev_uring.c
> +++ b/fs/fuse/dev_uring.c
> @@ -86,6 +86,7 @@ static void fuse_uring_req_end(struct fuse_ring_ent *ent, struct fuse_req *req,
> lockdep_assert_not_held(&queue->lock);
> spin_lock(&queue->lock);
> ent->fuse_req = NULL;
> + list_del_init(&req->list);
> if (test_bit(FR_BACKGROUND, &req->flags)) {
> queue->active_background--;
> spin_lock(&fc->bg_lock);
> --
> 2.47.3
>
Luigi reported that retriggering a posted MSI interrupt does not work
correctly.
The reason is that the retrigger happens at the vector domain by sending an
IPI to the actual vector on the target CPU. That works correctly exactly
once because the posted MSI interrupt chip does not issue an EOI as that's
only required for the posted MSI notification vector itself.
As a consequence the vector becomes stale in the ISR, which not only
affects this vector but also any lower priority vector in the affected
APIC because the ISR bit is not cleared.
Luigi proposed to set the vector in the remap PIR bitmap and raise the
posted MSI notification vector. That works, but that still does not cure a
related problem:
If there is ever a stray interrupt on such a vector, then the related
APIC ISR bit becomes stale due to the lack of EOI as described above.
Unlikely to happen, but if it happens it's not debuggable at all.
So instead of playing games with the PIR, this can be actually solved
for both cases by:
1) Keeping track of the posted interrupt vector handler state
2) Implementing a posted MSI specific irq_ack() callback which checks that
state. If the posted vector handler is inactive it issues an EOI,
otherwise it delegates that to the posted handler.
This is correct versus affinity changes and concurrent events on the posted
vector as the actual handler invocation is serialized through the interrupt
descriptor lock.
Fixes: ed1e48ea4370 ("iommu/vt-d: Enable posted mode for device MSIs")
Reported-by: Luigi Rizzo <lrizzo(a)google.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Tested-by: Luigi Rizzo <lrizzo(a)google.com>
Cc: stable(a)vger.kernel.org
Closes: https://lore.kernel.org/lkml/20251124104836.3685533-1-lrizzo@google.com
---
arch/x86/include/asm/irq_remapping.h | 7 +++++++
arch/x86/kernel/irq.c | 23 +++++++++++++++++++++++
drivers/iommu/intel/irq_remapping.c | 8 ++++----
3 files changed, 34 insertions(+), 4 deletions(-)
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -87,4 +87,11 @@ static inline void panic_if_irq_remap(co
}
#endif /* CONFIG_IRQ_REMAP */
+
+#ifdef CONFIG_X86_POSTED_MSI
+void intel_ack_posted_msi_irq(struct irq_data *irqd);
+#else
+#define intel_ack_posted_msi_irq NULL
+#endif
+
#endif /* __X86_IRQ_REMAPPING_H */
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -397,6 +397,7 @@ DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm
/* Posted Interrupt Descriptors for coalesced MSIs to be posted */
DEFINE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc);
+static DEFINE_PER_CPU_CACHE_HOT(bool, posted_msi_handler_active);
void intel_posted_msi_init(void)
{
@@ -414,6 +415,25 @@ void intel_posted_msi_init(void)
this_cpu_write(posted_msi_pi_desc.ndst, destination);
}
+void intel_ack_posted_msi_irq(struct irq_data *irqd)
+{
+ irq_move_irq(irqd);
+
+ /*
+ * Handle the rare case that irq_retrigger() raised the actual
+ * assigned vector on the target CPU, which means that it was not
+ * invoked via the posted MSI handler below. In that case APIC EOI
+ * is required as otherwise the ISR entry becomes stale and lower
+ * priority interrupts are never going to be delivered after that.
+ *
+ * If the posted handler invoked the device interrupt handler then
+ * the EOI would be premature because it would acknowledge the
+ * posted vector.
+ */
+ if (unlikely(!this_cpu_read(posted_msi_handler_active)))
+ apic_eoi();
+}
+
static __always_inline bool handle_pending_pir(unsigned long *pir, struct pt_regs *regs)
{
unsigned long pir_copy[NR_PIR_WORDS];
@@ -446,6 +466,8 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi
pid = this_cpu_ptr(&posted_msi_pi_desc);
+ /* Mark the handler active for intel_ack_posted_msi_irq() */
+ this_cpu_write(posted_msi_handler_active, true);
inc_irq_stat(posted_msi_notification_count);
irq_enter();
@@ -474,6 +496,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi
apic_eoi();
irq_exit();
+ this_cpu_write(posted_msi_handler_active, false);
set_irq_regs(old_regs);
}
#endif /* X86_POSTED_MSI */
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -1303,17 +1303,17 @@ static struct irq_chip intel_ir_chip = {
* irq_enter();
* handle_edge_irq()
* irq_chip_ack_parent()
- * irq_move_irq(); // No EOI
+ * intel_ack_posted_msi_irq(); // No EOI
* handle_irq_event()
* driver_handler()
* handle_edge_irq()
* irq_chip_ack_parent()
- * irq_move_irq(); // No EOI
+ * intel_ack_posted_msi_irq(); // No EOI
* handle_irq_event()
* driver_handler()
* handle_edge_irq()
* irq_chip_ack_parent()
- * irq_move_irq(); // No EOI
+ * intel_ack_posted_msi_irq(); // No EOI
* handle_irq_event()
* driver_handler()
* apic_eoi()
@@ -1322,7 +1322,7 @@ static struct irq_chip intel_ir_chip = {
*/
static struct irq_chip intel_ir_chip_post_msi = {
.name = "INTEL-IR-POST",
- .irq_ack = irq_move_irq,
+ .irq_ack = intel_ack_posted_msi_irq,
.irq_set_affinity = intel_ir_set_affinity,
.irq_compose_msi_msg = intel_ir_compose_msi_msg,
.irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity,
Mejora tus contrataciones con PsicoSmart
body {
margin: 0;
padding: 0;
font-family: Arial, Helvetica, sans-serif;
font-size: 14px;
color: #333;
background-color: #ffffff;
}
table {
border-spacing: 0;
width: 100%;
max-width: 600px;
margin: auto;
}
td {
padding: 12px 20px;
}
a {
color: #1a73e8;
text-decoration: none;
}
.footer {
font-size: 12px;
color: #888888;
text-align: center;
}
Evalúa talento de forma objetiva y mejora tus contrataciones con PsicoSmart.
Hola, ,
¿Te ha pasado que un candidato luce perfecto en entrevista, pero en el trabajo no encaja como esperabas?
En selección, confiar solo en la percepción puede llevar a decisiones costosas. Por eso quiero presentarte PsicoSmart, una herramienta creada para que los equipos de Recursos Humanos tomen decisiones más objetivas y acertadas.
Con PsicoSmart puedes:
Aplicar 31 pruebas psicométricas que evalúan liderazgo, honestidad, comunicación e inteligencia.
Validar conocimientos técnicos con más de 2,500 exámenes especializados.
Supervisar la identidad de quien responde mediante captura fotográfica automática durante la evaluación.
Gestionar todo desde una sola plataforma, accesible desde cualquier dispositivo.
Si estás buscando mejorar tus contrataciones, podría ser una muy buena opción. Si quieres conocer más puedes responder este correo o simplemente contactarme, mis datos están abajo.
Saludos,
--------------
Atte.: Valeria Pérez
Ciudad de México: (55) 5018 0565
WhatsApp: +52 33 1607 2089
Si no deseas recibir más correos, haz clic aquí para darte de baja.
Para remover su dirección de esta lista haga <a href="https://s1.arrobamail.com/unsuscribe.php?id=yiwtsrewiswqeiseup">click aquí</a>
media_pad_remote_pad_first() may return NULL when the media link is absent
or disabled. The code dereferenced remote_pad->entity unconditionally,
leading to a possible NULL dereference.
On the disable path, always shut down the local stream when the remote pad
or subdev is missing and then return 0, preserving local shutdown semantics
and avoiding a crash.
Found by Linux Verification Center (linuxtesting.org) with SVACE.
Fixes: 3a5c59ad926b ("media: ipu6: Rework CSI-2 sub-device streaming control")
Signed-off-by: Alexei Safin <a.safin(a)rosa.ru>
---
drivers/media/pci/intel/ipu6/ipu6-isys-csi2.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/drivers/media/pci/intel/ipu6/ipu6-isys-csi2.c b/drivers/media/pci/intel/ipu6/ipu6-isys-csi2.c
index 08148bfc2b4b..4a75b0b6c525 100644
--- a/drivers/media/pci/intel/ipu6/ipu6-isys-csi2.c
+++ b/drivers/media/pci/intel/ipu6/ipu6-isys-csi2.c
@@ -358,7 +358,12 @@ static int ipu6_isys_csi2_enable_streams(struct v4l2_subdev *sd,
int ret;
remote_pad = media_pad_remote_pad_first(&sd->entity.pads[CSI2_PAD_SINK]);
+ if (!remote_pad)
+ return -ENOLINK;
+
remote_sd = media_entity_to_v4l2_subdev(remote_pad->entity);
+ if (!remote_sd)
+ return -ENODEV;
sink_streams =
v4l2_subdev_state_xlate_streams(state, pad, CSI2_PAD_SINK,
@@ -395,7 +400,16 @@ static int ipu6_isys_csi2_disable_streams(struct v4l2_subdev *sd,
&streams_mask);
remote_pad = media_pad_remote_pad_first(&sd->entity.pads[CSI2_PAD_SINK]);
+ if (!remote_pad) {
+ ipu6_isys_csi2_set_stream(sd, NULL, 0, false);
+ return 0;
+ }
+
remote_sd = media_entity_to_v4l2_subdev(remote_pad->entity);
+ if (!remote_sd) {
+ ipu6_isys_csi2_set_stream(sd, NULL, 0, false);
+ return 0;
+ }
ipu6_isys_csi2_set_stream(sd, NULL, 0, false);
--
2.50.1 (Apple Git-155)
From: Brian Norris <briannorris(a)google.com>
When transitioning to D3cold, __pci_set_power_state() will first
transition a device to D3hot. If the device was already in D3hot, this
will add excess work:
(a) read/modify/write PMCSR; and
(b) excess delay (pci_dev_d3_sleep()).
For (b), we already performed the necessary delay on the previous D3hot
entry; this was extra noticeable when evaluating runtime PM transition
latency.
Check whether we're already in the target state before continuing.
Note that __pci_set_power_state() already does this same check for other
state transitions, but D3cold is special because __pci_set_power_state()
converts it to D3hot for the purposes of PMCSR.
This seems to be an oversight in commit 0aacdc957401 ("PCI/PM: Clean up
pci_set_low_power_state()").
Fixes: 0aacdc957401 ("PCI/PM: Clean up pci_set_low_power_state()")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Brian Norris <briannorris(a)google.com>
Signed-off-by: Brian Norris <briannorris(a)chromium.org>
---
drivers/pci/pci.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b0f4d98036cd..7517f1380201 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1539,6 +1539,9 @@ static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state, bool
|| (state == PCI_D2 && !dev->d2_support))
return -EIO;
+ if (state == dev->current_state)
+ return 0;
+
pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
if (PCI_POSSIBLE_ERROR(pmcsr)) {
pci_err(dev, "Unable to change power state from %s to %s, device inaccessible\n",
--
2.51.0.618.g983fd99d29-goog
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x fff0c87996672816a84c3386797a5e69751c5888
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112412-trough-caloric-1503@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From fff0c87996672816a84c3386797a5e69751c5888 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni(a)redhat.com>
Date: Tue, 18 Nov 2025 08:20:23 +0100
Subject: [PATCH] mptcp: decouple mptcp fastclose from tcp close
With the current fastclose implementation, the mptcp_do_fastclose()
helper is in charge of two distinct actions: send the fastclose reset
and cleanup the subflows.
Formally decouple the two steps, ensuring that mptcp explicitly closes
all the subflows after the mentioned helper.
This will make the upcoming fix simpler, and allows dropping the 2nd
argument from mptcp_destroy_common(). The Fixes tag is then the same as
in the next commit to help with the backports.
Fixes: d21f83485518 ("mptcp: use fastclose on more edge scenarios")
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Reviewed-by: Geliang Tang <geliang(a)kernel.org>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Link: https://patch.msgid.link/20251118-net-mptcp-misc-fixes-6-18-rc6-v1-5-806d37…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 6f0e8f670d83..c59246c1fde6 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2808,7 +2808,11 @@ static void mptcp_worker(struct work_struct *work)
__mptcp_close_subflow(sk);
if (mptcp_close_tout_expired(sk)) {
+ struct mptcp_subflow_context *subflow, *tmp;
+
mptcp_do_fastclose(sk);
+ mptcp_for_each_subflow_safe(msk, subflow, tmp)
+ __mptcp_close_ssk(sk, subflow->tcp_sock, subflow, 0);
mptcp_close_wake_up(sk);
}
@@ -3233,7 +3237,8 @@ static int mptcp_disconnect(struct sock *sk, int flags)
/* msk->subflow is still intact, the following will not free the first
* subflow
*/
- mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE);
+ mptcp_do_fastclose(sk);
+ mptcp_destroy_common(msk);
/* The first subflow is already in TCP_CLOSE status, the following
* can't overlap with a fallback anymore
@@ -3412,7 +3417,7 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT;
}
-void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
+void mptcp_destroy_common(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow, *tmp;
struct sock *sk = (struct sock *)msk;
@@ -3421,7 +3426,7 @@ void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
/* join list will be eventually flushed (with rst) at sock lock release time */
mptcp_for_each_subflow_safe(msk, subflow, tmp)
- __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, flags);
+ __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, 0);
__skb_queue_purge(&sk->sk_receive_queue);
skb_rbtree_purge(&msk->out_of_order_queue);
@@ -3439,7 +3444,7 @@ static void mptcp_destroy(struct sock *sk)
/* allow the following to close even the initial subflow */
msk->free_first = 1;
- mptcp_destroy_common(msk, 0);
+ mptcp_destroy_common(msk);
sk_sockets_allocated_dec(sk);
}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 5575ef64ea31..6ca97096607c 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -977,7 +977,7 @@ static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
local_bh_enable();
}
-void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags);
+void mptcp_destroy_common(struct mptcp_sock *msk);
#define MPTCP_TOKEN_MAX_RETRIES 4
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x cfa0904a35fd0231f4d05da0190f0a22ed881cce
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112425-aspirin-conduit-e44b@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From cfa0904a35fd0231f4d05da0190f0a22ed881cce Mon Sep 17 00:00:00 2001
From: Fangzhi Zuo <Jerry.Zuo(a)amd.com>
Date: Thu, 18 Sep 2025 16:25:45 -0400
Subject: [PATCH] drm/amd/display: Prevent Gating DTBCLK before It Is Properly
Latched
[why]
1. With allow_0_dtb_clk enabled, the time required to latch DTBCLK to 600 MHz
depends on the SMU. If DTBCLK is not latched to 600 MHz before set_mode completes,
gating DTBCLK causes the DP2 sink to lose its clock source.
2. The existing DTBCLK gating sequence ungates DTBCLK based on both pix_clk and ref_dtbclk,
but gates DTBCLK when either pix_clk or ref_dtbclk is zero.
pix_clk can be zero outside the set_mode sequence before DTBCLK is properly latched,
which can lead to DTBCLK being gated by mistake.
[how]
Consider both pixel_clk and ref_dtbclk when determining when it is safe to gate DTBCLK;
this is more accurate.
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4701
Fixes: 5949e7c4890c ("drm/amd/display: Enable Dynamic DTBCLK Switch")
Reviewed-by: Charlene Liu <charlene.liu(a)amd.com>
Reviewed-by: Aurabindo Pillai <aurabindo.pillai(a)amd.com>
Signed-off-by: Fangzhi Zuo <Jerry.Zuo(a)amd.com>
Signed-off-by: Roman Li <roman.li(a)amd.com>
Tested-by: Dan Wheeler <daniel.wheeler(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
(cherry picked from commit d04eb0c402780ca037b62a6aecf23b863545ebca)
Cc: stable(a)vger.kernel.org
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index b11383fba35f..1eb04772f5da 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -394,6 +394,8 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
display_count = dcn35_get_active_display_cnt_wa(dc, context, &all_active_disps);
if (new_clocks->dtbclk_en && !new_clocks->ref_dtbclk_khz)
new_clocks->ref_dtbclk_khz = 600000;
+ else if (!new_clocks->dtbclk_en && new_clocks->ref_dtbclk_khz > 590000)
+ new_clocks->ref_dtbclk_khz = 0;
/*
* if it is safe to lower, but we are already in the lower state, we don't have to do anything
@@ -435,7 +437,7 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
actual_dtbclk = REG_READ(CLK1_CLK4_CURRENT_CNT);
- if (actual_dtbclk) {
+ if (actual_dtbclk > 590000) {
clk_mgr_base->clks.ref_dtbclk_khz = new_clocks->ref_dtbclk_khz;
clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c
index de6d62401362..c899c09ea31b 100644
--- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c
@@ -1411,7 +1411,7 @@ static void dccg35_set_dtbclk_dto(
__func__, params->otg_inst, params->pixclk_khz,
params->ref_dtbclk_khz, req_dtbclk_khz, phase, modulo);
- } else {
+ } else if (!params->ref_dtbclk_khz && !req_dtbclk_khz) {
switch (params->otg_inst) {
case 0:
REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, 0);
Add two flags for KVM_CAP_X2APIC_API to allow userspace to control support
for Suppress EOI Broadcasts, which KVM completely mishandles. When x2APIC
support was first added, KVM incorrectly advertised and "enabled" Suppress
EOI Broadcast, without fully supporting the I/O APIC side of the equation,
i.e. without adding directed EOI to KVM's in-kernel I/O APIC.
That flaw was carried over to split IRQCHIP support, i.e. KVM advertised
support for Suppress EOI Broadcasts irrespective of whether or not the
userspace I/O APIC implementation supported directed EOIs. Even worse,
KVM didn't actually suppress EOI broadcasts, i.e. userspace VMMs without
support for directed EOI came to rely on the "spurious" broadcasts.
KVM "fixed" the in-kernel I/O APIC implementation by completely disabling
support for Suppress EOI Broadcasts in commit 0bcc3fb95b97 ("KVM: lapic:
stop advertising DIRECTED_EOI when in-kernel IOAPIC is in use"), but
didn't do anything to remedy userspace I/O APIC implementations.
KVM's bogus handling of Suppress EOI Broadcast is problematic when the guest
relies on interrupts being masked in the I/O APIC until well after the
initial local APIC EOI. E.g. Windows with Credential Guard enabled
handles interrupts in the following order:
1. Interrupt for L2 arrives.
2. L1 APIC EOIs the interrupt.
3. L1 resumes L2 and injects the interrupt.
4. L2 EOIs after servicing.
5. L1 performs the I/O APIC EOI.
Because KVM EOIs the I/O APIC at step #2, the guest can get an interrupt
storm, e.g. if the IRQ line is still asserted and userspace reacts to the
EOI by re-injecting the IRQ, because the guest doesn't de-assert the line
until step #4, and doesn't expect the interrupt to be re-enabled until
step #5.
Unfortunately, simply "fixing" the bug isn't an option, as KVM has no way
of knowing if the userspace I/O APIC supports directed EOIs, i.e.
suppressing EOI broadcasts would result in interrupts being stuck masked
in the userspace I/O APIC due to step #5 being ignored by userspace. And
fully disabling support for Suppress EOI Broadcast is also undesirable, as
picking up the fix would require a guest reboot, *and* more importantly
would change the virtual CPU model exposed to the guest without any buy-in
from userspace.
Add two flags to allow userspace to choose exactly how to solve the
immediate issue, and in the long term to allow userspace to control the
virtual CPU model that is exposed to the guest (KVM should never have
enabled support for Suppress EOI Broadcast without a userspace opt-in).
Note, Suppress EOI Broadcasts is defined only in Intel's SDM, not in AMD's
APM. But the bit is writable on some AMD CPUs, e.g. Turin, and KVM's ABI
is to support Directed EOI (KVM's name) irrespective of guest CPU vendor.
Fixes: 7543a635aa09 ("KVM: x86: Add KVM exit for IOAPIC EOIs")
Closes: https://lore.kernel.org/kvm/7D497EF1-607D-4D37-98E7-DAF95F099342@nutanix.com
Cc: stable(a)vger.kernel.org
Co-developed-by: Sean Christopherson <seanjc(a)google.com>
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Signed-off-by: Khushit Shah <khushit.shah(a)nutanix.com>
---
All discussions on v1, v2 only apply the naming feedback and grammar
fixes.
Testing:
I ran the tests with QEMU 9.1 and a 6.12 kernel with the patch applied.
- With an unmodified QEMU build, KVM’s LAPIC SEOIB behavior remains unchanged.
- Invoking the x2APIC API with KVM_X2APIC_API_DISABLE_IGNORE_SUPPRESS_EOI_BROADCAST_QUIRK
correctly suppresses LAPIC -> IOAPIC EOI broadcasts (verified via KVM tracepoints).
- Invoking the x2APIC API with KVM_X2APIC_API_DISABLE_SUPPRESS_EOI_BROADCAST
results in SEOIB not being advertised to the guest, as expected (confirmed by
checking the LAPIC LVR value inside the guest).
I'll send the corresponding QEMU-side patch shortly.
---
Documentation/virt/kvm/api.rst | 14 ++++++++++++--
arch/x86/include/asm/kvm_host.h | 2 ++
arch/x86/include/uapi/asm/kvm.h | 6 ++++--
arch/x86/kvm/lapic.c | 13 +++++++++++++
arch/x86/kvm/x86.c | 12 +++++++++---
5 files changed, 40 insertions(+), 7 deletions(-)
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 57061fa29e6a..4141d2bd8156 100644
results in SEOIB not being advertised to the guest, as expected (confirmed by
checking the LAPIC LVR value inside the guest).
I'll send the corresponding QEMU-side patch shortly.
---
Documentation/virt/kvm/api.rst | 14 ++++++++++++--
arch/x86/include/asm/kvm_host.h | 2 ++
arch/x86/include/uapi/asm/kvm.h | 6 ++++--
arch/x86/kvm/lapic.c | 13 +++++++++++++
arch/x86/kvm/x86.c | 12 +++++++++---
5 files changed, 40 insertions(+), 7 deletions(-)
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 57061fa29e6a..4141d2bd8156 100644
results in SEOIB not being advertised to the guest, as expected (confirmed by
checking the LAPIC LVR value inside the guest).
I'll send the corresponding QEMU-side patch shortly.
---
Documentation/virt/kvm/api.rst | 14 ++++++++++++--
arch/x86/include/asm/kvm_host.h | 2 ++
arch/x86/include/uapi/asm/kvm.h | 6 ++++--
arch/x86/kvm/lapic.c | 13 +++++++++++++
arch/x86/kvm/x86.c | 12 +++++++++---
5 files changed, 40 insertions(+), 7 deletions(-)
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 57061fa29e6a..4141d2bd8156 100644
results in SEOIB not being advertised to the guest, as expected (confirmed by
checking the LAPIC LVR value inside the guest).
I'll send the corresponding QEMU-side patch shortly.
---
Documentation/virt/kvm/api.rst | 14 ++++++++++++--
arch/x86/include/asm/kvm_host.h | 2 ++
arch/x86/include/uapi/asm/kvm.h | 6 ++++--
arch/x86/kvm/lapic.c | 13 +++++++++++++
arch/x86/kvm/x86.c | 12 +++++++++---
5 files changed, 40 insertions(+), 7 deletions(-)
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 57061fa29e6a..4141d2bd8156 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -7800,8 +7800,10 @@ Will return -EBUSY if a VCPU has already been created.
Valid feature flags in args[0] are::
- #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
- #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
+ #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
+ #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
+ #define KVM_X2APIC_API_DISABLE_IGNORE_SUPPRESS_EOI_BROADCAST_QUIRK (1ULL << 2)
+ #define KVM_X2APIC_API_DISABLE_SUPPRESS_EOI_BROADCAST (1ULL << 3)
Enabling KVM_X2APIC_API_USE_32BIT_IDS changes the behavior of
KVM_SET_GSI_ROUTING, KVM_SIGNAL_MSI, KVM_SET_LAPIC, and KVM_GET_LAPIC,
@@ -7814,6 +7816,14 @@ as a broadcast even in x2APIC mode in order to support physical x2APIC
without interrupt remapping. This is undesirable in logical mode,
where 0xff represents CPUs 0-7 in cluster 0.
+Setting KVM_X2APIC_API_DISABLE_IGNORE_SUPPRESS_EOI_BROADCAST_QUIRK overrides
+KVM's quirky behavior of not actually suppressing EOI broadcasts for split IRQ
+chips when support for Suppress EOI Broadcasts is advertised to the guest.
+
+Setting KVM_X2APIC_API_DISABLE_SUPPRESS_EOI_BROADCAST disables support for
+Suppress EOI Broadcasts entirely, i.e. instructs KVM to NOT advertise support
+to the guest and thus disallow enabling EOI broadcast suppression in SPIV.
+
7.8 KVM_CAP_S390_USER_INSTR0
----------------------------
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 48598d017d6f..f6fdc0842c05 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1480,6 +1480,8 @@ struct kvm_arch {
bool x2apic_format;
bool x2apic_broadcast_quirk_disabled;
+ bool disable_ignore_suppress_eoi_broadcast_quirk;
+ bool x2apic_disable_suppress_eoi_broadcast;
+ * Suppress EOI Broadcasts without actually suppressing EOIs).
+ */
+ if ((kvm_lapic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
+ apic->vcpu->kvm->arch.disable_ignore_suppress_eoi_broadcast_quirk)
+ return;
+
apic->vcpu->arch.pending_ioapic_eoi = vector;
kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
return;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c9c2aa6f4705..e1b6fe783615 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -121,8 +121,11 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
#define KVM_CAP_PMU_VALID_MASK KVM_PMU_CAP_DISABLE
-#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
- KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
+#define KVM_X2APIC_API_VALID_FLAGS \
+ (KVM_X2APIC_API_USE_32BIT_IDS | \
+ KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK | \
+ KVM_X2APIC_API_DISABLE_IGNORE_SUPPRESS_EOI_BROADCAST_QUIRK | \
+ KVM_X2APIC_API_DISABLE_SUPPRESS_EOI_BROADCAST)
static void update_cr8_intercept(struct kvm_vcpu *vcpu);
static void process_nmi(struct kvm_vcpu *vcpu);
@@ -6782,7 +6785,10 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
kvm->arch.x2apic_format = true;
if (cap->args[0] & KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
kvm->arch.x2apic_broadcast_quirk_disabled = true;
-
+ if (cap->args[0] & KVM_X2APIC_API_DISABLE_IGNORE_SUPPRESS_EOI_BROADCAST_QUIRK)
+ kvm->arch.disable_ignore_suppress_eoi_broadcast_quirk = true;
+ if (cap->args[0] & KVM_X2APIC_API_DISABLE_SUPPRESS_EOI_BROADCAST)
+ kvm->arch.x2apic_disable_suppress_eoi_broadcast = true;
r = 0;
break;
case KVM_CAP_X86_DISABLE_EXITS:
--
2.39.3
The return type of __modify_irte_ga() is int, but modify_irte_ga()
treats it as a bool. Casting the int to bool discards the error code.
To fix the issue, change the type of ret to int in modify_irte_ga().
Fixes: 57cdb720eaa5 ("iommu/amd: Do not flush IRTE when only updating isRun and destination fields")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jinhui Guo <guojinhui.liam(a)bytedance.com>
Reviewed-by: Ankit Soni <Ankit.Soni(a)amd.com>
---
v1: https://lore.kernel.org/all/20251120154725.435-1-guojinhui.liam@bytedance.c…
Changelog in v1 -> v2 (suggested by Ankit Soni)
- Trim subject line to the recommanded length
drivers/iommu/amd/iommu.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 2e1865daa1ce..a38304f1a8df 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -3354,7 +3354,7 @@ static int __modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index,
static int modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index,
struct irte_ga *irte)
{
- bool ret;
+ int ret;
ret = __modify_irte_ga(iommu, devid, index, irte);
if (ret)
--
2.20.1
The return type of __modify_irte_ga() is int, but modify_irte_ga()
treats it as a bool. Casting the int to bool discards the error code.
To fix the issue, change the type of ret to int in modify_irte_ga().
Fixes: 57cdb720eaa5 ("iommu/amd: Do not flush IRTE when only updating isRun and destination fields")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jinhui Guo <guojinhui.liam(a)bytedance.com>
---
drivers/iommu/amd/iommu.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 2e1865daa1ce..a38304f1a8df 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -3354,7 +3354,7 @@ static int __modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index,
static int modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index,
struct irte_ga *irte)
{
- bool ret;
+ int ret;
ret = __modify_irte_ga(iommu, devid, index, irte);
if (ret)
--
2.20.1
The patch below does not apply to the 6.17-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.17.y
git checkout FETCH_HEAD
git cherry-pick -x cfa0904a35fd0231f4d05da0190f0a22ed881cce
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112424-handball-smolder-0f15@gregkh' --subject-prefix 'PATCH 6.17.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From cfa0904a35fd0231f4d05da0190f0a22ed881cce Mon Sep 17 00:00:00 2001
From: Fangzhi Zuo <Jerry.Zuo(a)amd.com>
Date: Thu, 18 Sep 2025 16:25:45 -0400
Subject: [PATCH] drm/amd/display: Prevent Gating DTBCLK before It Is Properly
Latched
[why]
1. With allow_0_dtb_clk enabled, the time required to latch DTBCLK to 600 MHz
depends on the SMU. If DTBCLK is not latched to 600 MHz before set_mode completes,
gating DTBCLK causes the DP2 sink to lose its clock source.
2. The existing DTBCLK gating sequence ungates DTBCLK based on both pix_clk and ref_dtbclk,
but gates DTBCLK when either pix_clk or ref_dtbclk is zero.
pix_clk can be zero outside the set_mode sequence before DTBCLK is properly latched,
which can lead to DTBCLK being gated by mistake.
[how]
Consider both pixel_clk and ref_dtbclk when determining when it is safe to gate DTBCLK;
this is more accurate.
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4701
Fixes: 5949e7c4890c ("drm/amd/display: Enable Dynamic DTBCLK Switch")
Reviewed-by: Charlene Liu <charlene.liu(a)amd.com>
Reviewed-by: Aurabindo Pillai <aurabindo.pillai(a)amd.com>
Signed-off-by: Fangzhi Zuo <Jerry.Zuo(a)amd.com>
Signed-off-by: Roman Li <roman.li(a)amd.com>
Tested-by: Dan Wheeler <daniel.wheeler(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
(cherry picked from commit d04eb0c402780ca037b62a6aecf23b863545ebca)
Cc: stable(a)vger.kernel.org
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index b11383fba35f..1eb04772f5da 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -394,6 +394,8 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
display_count = dcn35_get_active_display_cnt_wa(dc, context, &all_active_disps);
if (new_clocks->dtbclk_en && !new_clocks->ref_dtbclk_khz)
new_clocks->ref_dtbclk_khz = 600000;
+ else if (!new_clocks->dtbclk_en && new_clocks->ref_dtbclk_khz > 590000)
+ new_clocks->ref_dtbclk_khz = 0;
/*
* if it is safe to lower, but we are already in the lower state, we don't have to do anything
@@ -435,7 +437,7 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
actual_dtbclk = REG_READ(CLK1_CLK4_CURRENT_CNT);
- if (actual_dtbclk) {
+ if (actual_dtbclk > 590000) {
clk_mgr_base->clks.ref_dtbclk_khz = new_clocks->ref_dtbclk_khz;
clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c
index de6d62401362..c899c09ea31b 100644
--- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c
@@ -1411,7 +1411,7 @@ static void dccg35_set_dtbclk_dto(
__func__, params->otg_inst, params->pixclk_khz,
params->ref_dtbclk_khz, req_dtbclk_khz, phase, modulo);
- } else {
+ } else if (!params->ref_dtbclk_khz && !req_dtbclk_khz) {
switch (params->otg_inst) {
case 0:
REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, 0);
Make sure to drop the reference taken when looking up the sync provider
device and its driver data during DAI probe on probe failures and on
unbind.
Note that holding a reference to a device does not prevent its driver
data from going away so there is no point in keeping the reference.
Fixes: 7dd0d835582f ("ASoC: stm32: sai: simplify sync modes management")
Fixes: 1c3816a19487 ("ASoC: stm32: sai: add missing put_device()")
Cc: stable(a)vger.kernel.org # 4.16: 1c3816a19487
Cc: olivier moysan <olivier.moysan(a)st.com>
Cc: Wen Yang <yellowriver2010(a)hotmail.com>
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
sound/soc/stm/stm32_sai.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/sound/soc/stm/stm32_sai.c b/sound/soc/stm/stm32_sai.c
index fa821e3fb427..7065aeb0e524 100644
--- a/sound/soc/stm/stm32_sai.c
+++ b/sound/soc/stm/stm32_sai.c
@@ -143,6 +143,7 @@ static int stm32_sai_set_sync(struct stm32_sai_data *sai_client,
}
sai_provider = platform_get_drvdata(pdev);
+ put_device(&pdev->dev);
if (!sai_provider) {
dev_err(&sai_client->pdev->dev,
"SAI sync provider data not found\n");
@@ -159,7 +160,6 @@ static int stm32_sai_set_sync(struct stm32_sai_data *sai_client,
ret = stm32_sai_sync_conf_provider(sai_provider, synco);
error:
- put_device(&pdev->dev);
of_node_put(np_provider);
return ret;
}
--
2.51.2
Hello,
Are you ready to send your productivity soaring, fuelled by none other
than Spaceboard 2.0? Buckle up, because this ride is taking us
straight to the planet of Achievement (it's right next to Planet
Procrastination, which we’re leaving far behind).
Brace Yourself for Awesomeness:
🤖 AI-Powered Assistant – Our Robot Friends Are Here!
We brought more than 90 AI models to the party. Yes, 90! Use them
wisely or use them wildly; either way, they’re here to serve (and
maybe dance a little jig).
📚 Smart Collections – Organize or Chaos-‘nize!
Think of it as your personal organization ninja, ready to karate-chop
your digital mess into a zen-like order.
📅 Advanced Task Management – Because To-Do Lists Deserve Better
It's more than just a list – it's a life coach that fits in your
browser. Automatic deadline tracking and Google Calendar integration
might mean you’ve finally found your planner soulmate.
🎨 Chrome Sidepanel – Cool Features On-the-Go
Who needs to jump tabs when Spaceboard’s right there? Consider this
your personal assistant, minus the coffee runs (AI is still working on
that).
🌤 Weather & Location Services – Because Knowing Is Half the
Battle
Avoid surprising rain showers with real-time forecasts. We promise it
will not predict catnados – yet.
🔍 Universal Search – Find All the Things!
It’s like having a GPS for your brain! From bookmarks to hidden
chocolates at the back of your mind – find it all instantly.
🎯 Subscription Plans – More Choices than a Dessert Menu
Explorer Plan for beginners, Visionary Plan for Jedis. Pick your path
to ultimate productivity enlightenment.
So, What Are You Waiting For?
Zoom over to spaceboard.ai
[https://emailmarketingsrb.inboxingproserver.com/index.php/campaigns/qw394ew…]
and update your extension today. Go ahead, transform your browser into
a bustling hub of energy and excitement!
Questions, comments, or philosophical musings? We're here to chat,
brainstorm, or just swap dad jokes.
Catch you at the productivity summit!
Cheers,
The Spaceboard Crew 🙌
Unsubscribe here
[https://emailmarketingsrb.inboxingproserver.com/index.php/campaigns/qw394ew…]
Protect access to fore200e->available_cell_rate with rate_mtx lock in the
error handling path of fore200e_open() to prevent a data race.
The field fore200e->available_cell_rate is a shared resource used to track
available bandwidth. It is concurrently accessed by fore200e_open(),
fore200e_close(), and fore200e_change_qos().
In fore200e_open(), the lock rate_mtx is correctly held when subtracting
vcc->qos.txtp.max_pcr from available_cell_rate to reserve bandwidth.
However, if the subsequent call to fore200e_activate_vcin() fails, the
function restores the reserved bandwidth by adding back to
available_cell_rate without holding the lock.
This introduces a race condition because available_cell_rate is a global
device resource shared across all VCCs. If the error path in
fore200e_open() executes concurrently with operations like
fore200e_close() or fore200e_change_qos() on other VCCs, a
read-modify-write race occurs.
Specifically, the error path reads the rate without the lock. If another
CPU acquires the lock and modifies the rate (e.g., releasing bandwidth in
fore200e_close()) between this read and the subsequent write, the error
path will overwrite the concurrent update with a stale value. This results
in incorrect bandwidth accounting.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable(a)vger.kernel.org
Signed-off-by: Gui-Dong Han <hanguidong02(a)gmail.com>
Reviewed-by: Simon Horman <horms(a)kernel.org>
---
v3:
* Expanded the commit message to describe the specific call paths causing
the race, as suggested by Jakub Kicinski and Paolo Abeni.
v2:
* Added a description of the data race hazard in fore200e_open(), as
suggested by Jakub Kicinski and Simon Horman.
---
drivers/atm/fore200e.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index 4fea1149e003..f62e38571440 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -1374,7 +1374,9 @@ fore200e_open(struct atm_vcc *vcc)
vcc->dev_data = NULL;
+ mutex_lock(&fore200e->rate_mtx);
fore200e->available_cell_rate += vcc->qos.txtp.max_pcr;
+ mutex_unlock(&fore200e->rate_mtx);
kfree(fore200e_vcc);
return -EINVAL;
--
2.34.1
Hi all,
I worked on adding PTP support for the KSZ8463. While doing so, I ran
into a few bugs in the resource release process that occur when things go
wrong arount IRQ initialization.
This small series fixes those bugs.
The next series, which will add the PTP support, depend on this one.
Signed-off-by: Bastien Curutchet (Schneider Electric) <bastien.curutchet(a)bootlin.com>
---
Changes in v6:
- PATCH 4: Jump in the middle of the release loop instead of partially
freeing resource before jumping at the beginning of the release loop.
- PATCH 5: Add Andrew's Reviewed-By.
- Link to v5: https://lore.kernel.org/r/20251118-ksz-fix-v5-0-8e9c7f56618d@bootlin.com
Changes in v5:
- All: Add Cc Tag.
- PATCH 3: Use dsa_switch_for_each_user_port_continue_reverse() to only
iterate over initialized ports.
- PATCH 4: Also clean PTP IRQs on port initialization failures
- Link to v4: https://lore.kernel.org/r/20251117-ksz-fix-v4-0-13e1da58a492@bootlin.com
Changes in v4:
- PATCH 1 & 2: Add Andrew's Reviewed-By.
- PATCH 3: Ensure ksz_irq is initialized outside of ksz_irq_free()
- Add PATCH 4
- PATCH 5: Fix symetry issues in ksz_ptp_msg_irq_{setup/free}()
- Link to v3: https://lore.kernel.org/r/20251114-ksz-fix-v3-0-acbb3b9cc32f@bootlin.com
Changes in v3:
- PATCH 1 and 3: Fix Fixes tags
- PATCH 3: Move the irq_dispose_mapping() behind the check that verifies that
the domain is initialized
- Link to v2: https://lore.kernel.org/r/20251106-ksz-fix-v2-0-07188f608873@bootlin.com
Changes in v2:
- Add Fixes tag.
- Split PATCH 1 in two patches as it needed two different Fixes tags
- Add details in commit logs
- Link to v1: https://lore.kernel.org/r/20251031-ksz-fix-v1-0-7e46de999ed1@bootlin.com
---
Bastien Curutchet (Schneider Electric) (5):
net: dsa: microchip: common: Fix checks on irq_find_mapping()
net: dsa: microchip: ptp: Fix checks on irq_find_mapping()
net: dsa: microchip: Don't free uninitialized ksz_irq
net: dsa: microchip: Free previously initialized ports on init failures
net: dsa: microchip: Fix symetry in ksz_ptp_msg_irq_{setup/free}()
drivers/net/dsa/microchip/ksz_common.c | 31 +++++++++++++++----------------
drivers/net/dsa/microchip/ksz_ptp.c | 22 +++++++++-------------
2 files changed, 24 insertions(+), 29 deletions(-)
---
base-commit: 09652e543e809c2369dca142fee5d9b05be9bdc7
change-id: 20251031-ksz-fix-db345df7635f
Best regards,
--
Bastien Curutchet (Schneider Electric) <bastien.curutchet(a)bootlin.com>
Add two flags for KVM_CAP_X2APIC_API to allow userspace to control support
for Suppress EOI Broadcasts, which KVM completely mishandles. When x2APIC
support was first added, KVM incorrectly advertised and "enabled" Suppress
EOI Broadcast, without fully supporting the I/O APIC side of the equation,
i.e. without adding directed EOI to KVM's in-kernel I/O APIC.
That flaw was carried over to split IRQCHIP support, i.e. KVM advertised
support for Suppress EOI Broadcasts irrespective of whether or not the
userspace I/O APIC implementation supported directed EOIs. Even worse,
KVM didn't actually suppress EOI broadcasts, i.e. userspace VMMs without
support for directed EOI came to rely on the "spurious" broadcasts.
KVM "fixed" the in-kernel I/O APIC implementation by completely disabling
support for Supress EOI Broadcasts in commit 0bcc3fb95b97 ("KVM: lapic:
stop advertising DIRECTED_EOI when in-kernel IOAPIC is in use"), but
didn't do anything to remedy userspace I/O APIC implementations.
KVM's bogus handling of Supress EOI Broad is problematic when the guest
relies on interrupts being masked in the I/O APIC until well after the
initial local APIC EOI. E.g. Windows with Credential Guard enabled
handles interrupts in the following order:`
1. Interrupt for L2 arrives.
2. L1 APIC EOIs the interrupt.
3. L1 resumes L2 and injects the interrupt.
4. L2 EOIs after servicing.
5. L1 performs the I/O APIC EOI.
Because KVM EOIs the I/O APIC at step #2, the guest can get an interrupt
storm, e.g. if the IRQ line is still asserted and userspace reacts to the
EOI by re-injecting the IRQ, because the guest doesn't de-assert the line
until step #4, and doesn't expect the interrupt to be re-enabled until
step #5.
Unfortunately, simply "fixing" the bug isn't an option, as KVM has no way
of knowing if the userspace I/O APIC supports directed EOIs, i.e.
suppressing EOI broadcasts would result in interrupts being stuck masked
in the userspace I/O APIC due to step #5 being ignored by userspace. And
fully disabling support for Suppress EOI Broadcast is also undesirable, as
picking up the fix would require a guest reboot, *and* more importantly
would change the virtual CPU model exposed to the guest without any buy-in
from userspace.
Add two flags to allow userspace to choose exactly how to solve the
immediate issue, and in the long term to allow userspace to control the
virtual CPU model that is exposed to the guest (KVM should never have
enabled supported for Supress EOI Broadcast without a userspace opt-in).
Note, Suppress EOI Broadcasts is defined only in Intel's SDM, not in AMD's
APM. But the bit is writable on some AMD CPUs, e.g. Turin, and KVM's ABI
is to support Directed EOI (KVM's name) irrespective of guest CPU vendor.
Fixes: 7543a635aa09 ("KVM: x86: Add KVM exit for IOAPIC EOIs")
Closes: https://lore.kernel.org/kvm/7D497EF1-607D-4D37-98E7-DAF95F099342@nutanix.com
Cc: stable(a)vger.kernel.org
Co-developed-by: Sean Christopherson <seanjc(a)google.com>
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Signed-off-by: Khushit Shah <khushit.shah(a)nutanix.com>
---
Documentation/virt/kvm/api.rst | 14 ++++++++++++--
arch/x86/include/asm/kvm_host.h | 2 ++
arch/x86/include/uapi/asm/kvm.h | 6 ++++--
arch/x86/kvm/lapic.c | 13 +++++++++++++
arch/x86/kvm/x86.c | 12 +++++++++---
5 files changed, 40 insertions(+), 7 deletions(-)
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 57061fa29e6a..4141d2bd8156 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -7800,8 +7800,10 @@ Will return -EBUSY if a VCPU has already been created.
Valid feature flags in args[0] are::
- #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
- #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
+ #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
+ #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
+ #define KVM_X2APIC_API_DISABLE_IGNORE_SUPPRESS_EOI_BROADCAST_QUIRK (1ULL << 2)
+ #define KVM_X2APIC_API_DISABLE_SUPPRESS_EOI_BROADCAST (1ULL << 3)
Enabling KVM_X2APIC_API_USE_32BIT_IDS changes the behavior of
KVM_SET_GSI_ROUTING, KVM_SIGNAL_MSI, KVM_SET_LAPIC, and KVM_GET_LAPIC,
@@ -7814,6 +7816,14 @@ as a broadcast even in x2APIC mode in order to support physical x2APIC
without interrupt remapping. This is undesirable in logical mode,
where 0xff represents CPUs 0-7 in cluster 0.
+Setting KVM_X2APIC_API_DISABLE_IGNORE_SUPPRESS_EOI_BROADCAST_QUIRK overrides
+KVM's quirky behavior of not actually suppressing EOI broadcasts for split IRQ
+chips when support for Suppress EOI Broadcasts is advertised to the guest.
+
+Setting KVM_X2APIC_API_DISABLE_SUPPRESS_EOI_BROADCAST disables support for
+Suppress EOI Broadcasts entirely, i.e. instructs KVM to NOT advertise support
+to the guest and thus disallow enabling EOI broadcast suppression in SPIV.
+
7.8 KVM_CAP_S390_USER_INSTR0
----------------------------
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 48598d017d6f..f6fdc0842c05 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1480,6 +1480,8 @@ struct kvm_arch {
bool x2apic_format;
bool x2apic_broadcast_quirk_disabled;
+ bool disable_ignore_suppress_eoi_broadcast_quirk;
+ bool x2apic_disable_suppress_eoi_broadcast;
bool has_mapped_host_mmio;
bool guest_can_read_msr_platform_info;
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index d420c9c066d4..82d49696118f 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -913,8 +913,10 @@ struct kvm_sev_snp_launch_finish {
__u64 pad1[4];
};
-#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
-#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
+#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
+#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
+#define KVM_X2APIC_API_DISABLE_IGNORE_SUPPRESS_EOI_BROADCAST_QUIRK (1ULL << 2)
+#define KVM_X2APIC_API_DISABLE_SUPPRESS_EOI_BROADCAST (1ULL << 3)
struct kvm_hyperv_eventfd {
__u32 conn_id;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0ae7f913d782..cf8a2162872b 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -562,6 +562,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
* IOAPIC.
*/
if (guest_cpu_cap_has(vcpu, X86_FEATURE_X2APIC) &&
+ !vcpu->kvm->arch.x2apic_disable_suppress_eoi_broadcast &&
!ioapic_in_kernel(vcpu->kvm))
v |= APIC_LVR_DIRECTED_EOI;
kvm_lapic_set_reg(apic, APIC_LVR, v);
@@ -1517,6 +1518,18 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
/* Request a KVM exit to inform the userspace IOAPIC. */
if (irqchip_split(apic->vcpu->kvm)) {
+ /*
+ * Don't exit to userspace if the guest has enabled Directed
+ * EOI, a.k.a. Suppress EOI Broadcasts, in which case the local
+ * APIC doesn't broadcast EOIs (the guest must EOI the target
+ * I/O APIC(s) directly). Ignore the suppression if userspace
+ * has NOT disabled KVM's quirk (KVM advertised support for
+ * Suppress EOI Broadcasts without actually suppressing EOIs).
+ */
+ if ((kvm_lapic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
+ apic->vcpu->kvm->arch.disable_ignore_suppress_eoi_broadcast_quirk)
+ return;
+
apic->vcpu->arch.pending_ioapic_eoi = vector;
kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
return;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c9c2aa6f4705..e1b6fe783615 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -121,8 +121,11 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
#define KVM_CAP_PMU_VALID_MASK KVM_PMU_CAP_DISABLE
-#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
- KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
+#define KVM_X2APIC_API_VALID_FLAGS \
+ (KVM_X2APIC_API_USE_32BIT_IDS | \
+ KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK | \
+ KVM_X2APIC_API_DISABLE_IGNORE_SUPPRESS_EOI_BROADCAST_QUIRK | \
+ KVM_X2APIC_API_DISABLE_SUPPRESS_EOI_BROADCAST)
static void update_cr8_intercept(struct kvm_vcpu *vcpu);
static void process_nmi(struct kvm_vcpu *vcpu);
@@ -6782,7 +6785,10 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
kvm->arch.x2apic_format = true;
if (cap->args[0] & KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
kvm->arch.x2apic_broadcast_quirk_disabled = true;
-
+ if (cap->args[0] & KVM_X2APIC_API_DISABLE_IGNORE_SUPPRESS_EOI_BROADCAST_QUIRK)
+ kvm->arch.disable_ignore_suppress_eoi_broadcast_quirk = true;
+ if (cap->args[0] & KVM_X2APIC_API_DISABLE_SUPPRESS_EOI_BROADCAST)
+ kvm->arch.x2apic_disable_suppress_eoi_broadcast = true;
r = 0;
break;
case KVM_CAP_X86_DISABLE_EXITS:
--
2.39.3
From: Franz Schnyder <franz.schnyder(a)toradex.com>
Currently, the PHY only registers the typec orientation switch when it
is built in. If the typec driver is built as a module, the switch
registration is skipped due to the preprocessor condition, causing
orientation detection to fail.
This patch replaces the preprocessor condition so that the orientation
switch is correctly registered for both built-in and module builds.
Fixes: b58f0f86fd61 ("phy: fsl-imx8mq-usb: add tca function driver for imx95")
Cc: stable(a)vger.kernel.org
Signed-off-by: Franz Schnyder <franz.schnyder(a)toradex.com>
---
drivers/phy/freescale/phy-fsl-imx8mq-usb.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/phy/freescale/phy-fsl-imx8mq-usb.c b/drivers/phy/freescale/phy-fsl-imx8mq-usb.c
index b94f242420fc..d498a6b7234b 100644
--- a/drivers/phy/freescale/phy-fsl-imx8mq-usb.c
+++ b/drivers/phy/freescale/phy-fsl-imx8mq-usb.c
@@ -124,7 +124,7 @@ struct imx8mq_usb_phy {
static void tca_blk_orientation_set(struct tca_blk *tca,
enum typec_orientation orientation);
-#ifdef CONFIG_TYPEC
+#if IS_ENABLED(CONFIG_TYPEC)
static int tca_blk_typec_switch_set(struct typec_switch_dev *sw,
enum typec_orientation orientation)
--
2.43.0
In the driver code for the MV‑based queue variant (struct hpt_iopmu_mv of the
hptiop driver), the field "inbound_head" is read from the hardware register
and used as an index into the array "inbound_q[MVIOP_QUEUE_LEN]". For example:
u32 inbound_head = readl(&hba->u.mv.mu->inbound_head);
/* ... */
memcpy_toio(&hba->u.mv.mu->inbound_q[inbound_head], &p, 8);
The code then increments head and wraps it to zero when it equals MVIOP_QUEUE_LEN.
However, the driver does *not* check that the initial value of "inbound_head"
is strictly less than "MVIOP_QUEUE_LEN". If the hardware (or attacker‑controlled
firmware/hardware device) writes a malicious value into the inbound_head register
(which could be ≥ MVIOP_QUEUE_LEN), then subsequent "memcpy_toio" will write
past the end of "inbound_q", leading to an out‑of‑bounds write condition.
Since inbound_q is allocated with exactly MVIOP_QUEUE_LEN entries (see:
__le64 inbound_q[MVIOP_QUEUE_LEN]; /* MVIOP_QUEUE_LEN == 512 */
), indexing at e.g. "inbound_head == 512" or greater results in undefined memory access
and potential corruption of adjacent fields or memory regions.
This issue is particularly concerning in scenarios where an attacker has control
or influence over the hardware/firmware on the adapter card (for example a malicious
or compromised controller), because they could deliberately set "inbound_head" to
a value outside the expected [0, MVIOP_QUEUE_LEN‑1] range, thus forcing the driver
to write arbitrary data beyond the queue bounds.
To mitigate this issue, we add a check to validate the value of "inbound_head"
before it is used as an index. If "inbound_head" is found to be out of bounds (≥ MVIOP_QUEUE_LEN),
the head will be reset to 0, and "head" will be set to 1 to ensure that a valid entry is written to
the queue. The resetting of "inbound_head" to 0 ensures that the queue processing can continue
safely and predictably, while the adjustment of "head = 1" ensures that the next valid index is used
for subsequent writes.
This prevents any out-of-bounds writes and ensures that the queue continues to operate safely
even if the hardware is compromised.
Fixes: 00f5970193e22 ("[SCSI] hptiop: add more adapter models and other fixes")
Cc: stable(a)vger.kernel.org
Signed-off-by: Guangshuo Li <lgs201920130244(a)gmail.com>
---
drivers/scsi/hptiop.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/drivers/scsi/hptiop.c b/drivers/scsi/hptiop.c
index c01370893a81..a1a3840e6ea8 100644
--- a/drivers/scsi/hptiop.c
+++ b/drivers/scsi/hptiop.c
@@ -166,6 +166,14 @@ static void mv_inbound_write(u64 p, struct hptiop_hba *hba)
if (head == MVIOP_QUEUE_LEN)
head = 0;
+ if (inbound_head >= MVIOP_QUEUE_LEN) {
+ dev_err(&hba->pdev->dev,
+ "hptiop: inbound_head out of range (%u)\n",
+ inbound_head);
+ inbound_head = 0;
+ head = 1;
+ }
+
memcpy_toio(&hba->u.mv.mu->inbound_q[inbound_head], &p, 8);
writel(head, &hba->u.mv.mu->inbound_head);
writel(MVIOP_MU_INBOUND_INT_POSTQUEUE,
--
2.43.0
# TL;DR
previous discussion: https://lore.kernel.org/linux-mm/20250921232709.1608699-1-harry.yoo@oracle.…
A "bad pmd" error occurs due to race condition between
change_prot_numa() and THP migration. The mainline kernel does not have
this bug as commit 670ddd8cdc fixes the race condition. 6.1.y, 5.15.y,
5.10.y, 5.4.y are affected by this bug.
Fixing this in -stable kernels is tricky because pte_map_offset_lock()
has different semantics in pre-6.5 and post-6.5 kernels. I am trying to
backport the same mechanism we have in the mainline kernel.
# Testing
I verified that the bug described below is not reproduced anymore
(on a downstream kernel) after applying this patch series. It used to
trigger in few days of intensive numa balancing testing, but it survived
2 weeks with this applied.
# Bug Description
It was reported that a bad pmd is seen when automatic NUMA
balancing is marking page table entries as prot_numa:
[2437548.196018] mm/pgtable-generic.c:50: bad pmd 00000000af22fc02(dffffffe71fbfe02)
[2437548.235022] Call Trace:
[2437548.238234] <TASK>
[2437548.241060] dump_stack_lvl+0x46/0x61
[2437548.245689] panic+0x106/0x2e5
[2437548.249497] pmd_clear_bad+0x3c/0x3c
[2437548.253967] change_pmd_range.isra.0+0x34d/0x3a7
[2437548.259537] change_p4d_range+0x156/0x20e
[2437548.264392] change_protection_range+0x116/0x1a9
[2437548.269976] change_prot_numa+0x15/0x37
[2437548.274774] task_numa_work+0x1b8/0x302
[2437548.279512] task_work_run+0x62/0x95
[2437548.283882] exit_to_user_mode_loop+0x1a4/0x1a9
[2437548.289277] exit_to_user_mode_prepare+0xf4/0xfc
[2437548.294751] ? sysvec_apic_timer_interrupt+0x34/0x81
[2437548.300677] irqentry_exit_to_user_mode+0x5/0x25
[2437548.306153] asm_sysvec_apic_timer_interrupt+0x16/0x1b
This is due to a race condition between change_prot_numa() and
THP migration because the kernel doesn't check is_swap_pmd() and
pmd_trans_huge() atomically:
change_prot_numa() THP migration
======================================================================
- change_pmd_range()
-> is_swap_pmd() returns false,
meaning it's not a PMD migration
entry.
- do_huge_pmd_numa_page()
-> migrate_misplaced_page() sets
migration entries for the THP.
- change_pmd_range()
-> pmd_none_or_clear_bad_unless_trans_huge()
-> pmd_none() and pmd_trans_huge() returns false
- pmd_none_or_clear_bad_unless_trans_huge()
-> pmd_bad() returns true for the migration entry!
The upstream commit 670ddd8cdcbd ("mm/mprotect: delete
pmd_none_or_clear_bad_unless_trans_huge()") closes this race condition
by checking is_swap_pmd() and pmd_trans_huge() atomically.
# Backporting note
commit a79390f5d6a7 ("mm/mprotect: use long for page accountings and retval")
is backported to return an error code (negative value) in
change_pte_range().
Unlike the mainline, pte_offset_map_lock() does not check if the pmd
entry is a migration entry or a hugepage; acquires PTL unconditionally
instead of returning failure. Therefore, it is necessary to keep the
!is_swap_pmd() && !pmd_trans_huge() && !pmd_devmap() checks in
change_pmd_range() before acquiring the PTL.
After acquiring the lock, open-code the semantics of
pte_offset_map_lock() in the mainline kernel; change_pte_range() fails
if the pmd value has changed. This requires adding pmd_old parameter
(pmd_t value that is read before calling the function) to
change_pte_range().
Hugh Dickins (1):
mm/mprotect: delete pmd_none_or_clear_bad_unless_trans_huge()
Peter Xu (1):
mm/mprotect: use long for page accountings and retval
include/linux/hugetlb.h | 4 +-
include/linux/mm.h | 2 +-
mm/hugetlb.c | 4 +-
mm/mempolicy.c | 2 +-
mm/mprotect.c | 124 +++++++++++++++++-----------------------
5 files changed, 60 insertions(+), 76 deletions(-)
--
2.43.0
# TL;DR
previous discussion: https://lore.kernel.org/linux-mm/20250921232709.1608699-1-harry.yoo@oracle.…
A "bad pmd" error occurs due to race condition between
change_prot_numa() and THP migration. The mainline kernel does not have
this bug as commit 670ddd8cdc fixes the race condition. 6.1.y, 5.15.y,
5.10.y, 5.4.y are affected by this bug.
Fixing this in -stable kernels is tricky because pte_map_offset_lock()
has different semantics in pre-6.5 and post-6.5 kernels. I am trying to
backport the same mechanism we have in the mainline kernel.
# Testing
I verified that the bug described below is not reproduced anymore
(on a downstream kernel) after applying this patch series. It used to
trigger in few days of intensive numa balancing testing, but it survived
2 weeks with this applied.
# Bug Description
It was reported that a bad pmd is seen when automatic NUMA
balancing is marking page table entries as prot_numa:
[2437548.196018] mm/pgtable-generic.c:50: bad pmd 00000000af22fc02(dffffffe71fbfe02)
[2437548.235022] Call Trace:
[2437548.238234] <TASK>
[2437548.241060] dump_stack_lvl+0x46/0x61
[2437548.245689] panic+0x106/0x2e5
[2437548.249497] pmd_clear_bad+0x3c/0x3c
[2437548.253967] change_pmd_range.isra.0+0x34d/0x3a7
[2437548.259537] change_p4d_range+0x156/0x20e
[2437548.264392] change_protection_range+0x116/0x1a9
[2437548.269976] change_prot_numa+0x15/0x37
[2437548.274774] task_numa_work+0x1b8/0x302
[2437548.279512] task_work_run+0x62/0x95
[2437548.283882] exit_to_user_mode_loop+0x1a4/0x1a9
[2437548.289277] exit_to_user_mode_prepare+0xf4/0xfc
[2437548.294751] ? sysvec_apic_timer_interrupt+0x34/0x81
[2437548.300677] irqentry_exit_to_user_mode+0x5/0x25
[2437548.306153] asm_sysvec_apic_timer_interrupt+0x16/0x1b
This is due to a race condition between change_prot_numa() and
THP migration because the kernel doesn't check is_swap_pmd() and
pmd_trans_huge() atomically:
change_prot_numa() THP migration
======================================================================
- change_pmd_range()
-> is_swap_pmd() returns false,
meaning it's not a PMD migration
entry.
- do_huge_pmd_numa_page()
-> migrate_misplaced_page() sets
migration entries for the THP.
- change_pmd_range()
-> pmd_none_or_clear_bad_unless_trans_huge()
-> pmd_none() and pmd_trans_huge() returns false
- pmd_none_or_clear_bad_unless_trans_huge()
-> pmd_bad() returns true for the migration entry!
The upstream commit 670ddd8cdcbd ("mm/mprotect: delete
pmd_none_or_clear_bad_unless_trans_huge()") closes this race condition
by checking is_swap_pmd() and pmd_trans_huge() atomically.
# Backporting note
commit a79390f5d6a7 ("mm/mprotect: use long for page accountings and retval")
is backported to return an error code (negative value) in
change_pte_range().
Unlike the mainline, pte_offset_map_lock() does not check if the pmd
entry is a migration entry or a hugepage; acquires PTL unconditionally
instead of returning failure. Therefore, it is necessary to keep the
!is_swap_pmd() && !pmd_trans_huge() && !pmd_devmap() checks in
change_pmd_range() before acquiring the PTL.
After acquiring the lock, open-code the semantics of
pte_offset_map_lock() in the mainline kernel; change_pte_range() fails
if the pmd value has changed. This requires adding pmd_old parameter
(pmd_t value that is read before calling the function) to
change_pte_range().
Hugh Dickins (1):
mm/mprotect: delete pmd_none_or_clear_bad_unless_trans_huge()
Peter Xu (1):
mm/mprotect: use long for page accountings and retval
include/linux/hugetlb.h | 4 +-
include/linux/mm.h | 2 +-
mm/hugetlb.c | 4 +-
mm/mempolicy.c | 2 +-
mm/mprotect.c | 107 ++++++++++++++++++++++------------------
5 files changed, 64 insertions(+), 55 deletions(-)
--
2.43.0
On SM8250 (IRIS2) with firmware older than 1.0.087, the firmware could
not handle a dummy device address for EOS buffers, so a NULL device
address is sent instead. The existing check used IS_V6() alongside a
firmware version gate:
if (IS_V6(core) && is_fw_rev_or_older(core, 1, 0, 87))
fdata.device_addr = 0;
else
fdata.device_addr = 0xdeadb000;
However, SC7280 which is also V6, uses a firmware string of the form
"1.0.<commit-hash>", which the version parser translates to 1.0.0. This
unintentionally satisfies the `is_fw_rev_or_older(..., 1, 0, 87)`
condition on SC7280. Combined with IS_V6() matching there as well, the
quirk is incorrectly applied to SC7280, causing VP9 decode failures.
Constrain the check to IRIS2 (SM8250) only, which is the only platform
that needed this quirk, by replacing IS_V6() with IS_IRIS2(). This
restores correct behavior on SC7280 (no forced NULL EOS buffer address).
Fixes: 47f867cb1b63 ("media: venus: fix EOS handling in decoder stop command")
Cc: stable(a)vger.kernel.org
Reported-by: Mecid <notifications(a)github.com>
Closes: https://github.com/qualcomm-linux/kernel-topics/issues/222
Co-developed-by: Renjiang Han <renjiang.han(a)oss.qualcomm.com>
Signed-off-by: Renjiang Han <renjiang.han(a)oss.qualcomm.com>
Signed-off-by: Dikshita Agarwal <dikshita.agarwal(a)oss.qualcomm.com>
---
drivers/media/platform/qcom/venus/vdec.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c
index 4a6641fdffcf79705893be58c7ec5cf485e2fab9..dc85a5b8c989eb8339e5de9fea7ab49532e7f15a 100644
--- a/drivers/media/platform/qcom/venus/vdec.c
+++ b/drivers/media/platform/qcom/venus/vdec.c
@@ -565,7 +565,7 @@ vdec_decoder_cmd(struct file *file, void *fh, struct v4l2_decoder_cmd *cmd)
fdata.buffer_type = HFI_BUFFER_INPUT;
fdata.flags |= HFI_BUFFERFLAG_EOS;
- if (IS_V6(inst->core) && is_fw_rev_or_older(inst->core, 1, 0, 87))
+ if (IS_IRIS2(inst->core) && is_fw_rev_or_older(inst->core, 1, 0, 87))
fdata.device_addr = 0;
else
fdata.device_addr = 0xdeadb000;
---
base-commit: 1f2353f5a1af995efbf7bea44341aa0d03460b28
change-id: 20251121-venus-vp9-fix-1ff602724c02
Best regards,
--
Dikshita Agarwal <dikshita.agarwal(a)oss.qualcomm.com>
The patch below does not apply to the 6.17-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.17.y
git checkout FETCH_HEAD
git cherry-pick -x 92e239e36d600002559074994a545fcfac9afd2d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112435-stray-aflutter-0f77@gregkh' --subject-prefix 'PATCH 6.17.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 92e239e36d600002559074994a545fcfac9afd2d Mon Sep 17 00:00:00 2001
From: Gang Yan <yangang(a)kylinos.cn>
Date: Tue, 18 Nov 2025 08:20:28 +0100
Subject: [PATCH] mptcp: fix address removal logic in mptcp_pm_nl_rm_addr
Fix inverted WARN_ON_ONCE condition that prevented normal address
removal counter updates. The current code only executes decrement
logic when the counter is already 0 (abnormal state), while
normal removals (counter > 0) are ignored.
Signed-off-by: Gang Yan <yangang(a)kylinos.cn>
Fixes: 636113918508 ("mptcp: pm: remove '_nl' from mptcp_pm_nl_rm_addr_received")
Cc: stable(a)vger.kernel.org
Reviewed-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Link: https://patch.msgid.link/20251118-net-mptcp-misc-fixes-6-18-rc6-v1-10-806d3…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c
index 2ae95476dba3..0a50fd5edc06 100644
--- a/net/mptcp/pm_kernel.c
+++ b/net/mptcp/pm_kernel.c
@@ -672,7 +672,7 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
void mptcp_pm_nl_rm_addr(struct mptcp_sock *msk, u8 rm_id)
{
- if (rm_id && WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) {
+ if (rm_id && !WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) {
u8 limit_add_addr_accepted =
mptcp_pm_get_limit_add_addr_accepted(msk);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 1bba3f219c5e8c29e63afa3c1fc24f875ebec119
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112426-backroom-negate-d125@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1bba3f219c5e8c29e63afa3c1fc24f875ebec119 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni(a)redhat.com>
Date: Tue, 18 Nov 2025 08:20:22 +0100
Subject: [PATCH] mptcp: do not fallback when OoO is present
In case of DSS corruption, the MPTCP protocol tries to avoid the subflow
reset if fallback is possible. Such corruptions happen in the receive
path; to ensure fallback is possible the stack additionally needs to
check for OoO data, otherwise the fallback will break the data stream.
Fixes: e32d262c89e2 ("mptcp: handle consistently DSS corruption")
Cc: stable(a)vger.kernel.org
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/598
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Link: https://patch.msgid.link/20251118-net-mptcp-misc-fixes-6-18-rc6-v1-4-806d37…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index e30e9043a694..6f0e8f670d83 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -76,6 +76,13 @@ bool __mptcp_try_fallback(struct mptcp_sock *msk, int fb_mib)
if (__mptcp_check_fallback(msk))
return true;
+ /* The caller possibly is not holding the msk socket lock, but
+ * in the fallback case only the current subflow is touching
+ * the OoO queue.
+ */
+ if (!RB_EMPTY_ROOT(&msk->out_of_order_queue))
+ return false;
+
spin_lock_bh(&msk->fallback_lock);
if (!msk->allow_infinite_fallback) {
spin_unlock_bh(&msk->fallback_lock);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 1bba3f219c5e8c29e63afa3c1fc24f875ebec119
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112425-math-lasso-c3b8@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1bba3f219c5e8c29e63afa3c1fc24f875ebec119 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni(a)redhat.com>
Date: Tue, 18 Nov 2025 08:20:22 +0100
Subject: [PATCH] mptcp: do not fallback when OoO is present
In case of DSS corruption, the MPTCP protocol tries to avoid the subflow
reset if fallback is possible. Such corruptions happen in the receive
path; to ensure fallback is possible the stack additionally needs to
check for OoO data, otherwise the fallback will break the data stream.
Fixes: e32d262c89e2 ("mptcp: handle consistently DSS corruption")
Cc: stable(a)vger.kernel.org
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/598
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Link: https://patch.msgid.link/20251118-net-mptcp-misc-fixes-6-18-rc6-v1-4-806d37…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index e30e9043a694..6f0e8f670d83 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -76,6 +76,13 @@ bool __mptcp_try_fallback(struct mptcp_sock *msk, int fb_mib)
if (__mptcp_check_fallback(msk))
return true;
+ /* The caller possibly is not holding the msk socket lock, but
+ * in the fallback case only the current subflow is touching
+ * the OoO queue.
+ */
+ if (!RB_EMPTY_ROOT(&msk->out_of_order_queue))
+ return false;
+
spin_lock_bh(&msk->fallback_lock);
if (!msk->allow_infinite_fallback) {
spin_unlock_bh(&msk->fallback_lock);
Hello,
New build issue found on stable-rc/linux-6.11.y:
---
./include/net/ip.h:472:14: error: default initialization of an object of type 'typeof (rt->dst.expires)' (aka 'const unsigned long') leaves the object uninitialized [-Werror,-Wdefault-const-init-var-unsafe] in security/selinux/avc.o (security/selinux/avc.c) [logspec:kbuild,kbuild.compiler.error]
---
- dashboard: https://d.kernelci.org/i/maestro:16f2d4d7ea51d0d2cee2101fea4a5d762aaeca6d
- giturl: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
- commit HEAD: f6d41443f54856ceece0d5b584f47f681513bde4
- tags: v6.11.11
Log excerpt:
=====================================================
In file included from security/selinux/avc.c:30:
In file included from ./security/selinux/include/avc.h:18:
In file included from ./include/linux/lsm_audit.h:25:
In file included from ./include/rdma/ib_verbs.h:26:
./include/net/ip.h:472:14: error: default initialization of an object of type 'typeof (rt->dst.expires)' (aka 'const unsigned long') leaves the object uninitialized [-Werror,-Wdefault-const-init-var-unsafe]
472 | if (mtu && time_before(jiffies, rt->dst.expires))
| ^
./include/linux/jiffies.h:138:26: note: expanded from macro 'time_before'
138 | #define time_before(a,b) time_after(b,a)
| ^
./include/linux/jiffies.h:128:3: note: expanded from macro 'time_after'
128 | (typecheck(unsigned long, a) && \
| ^
./include/linux/typecheck.h:11:12: note: expanded from macro 'typecheck'
11 | typeof(x) __dummy2; \
| ^
1 error generated.
CC security/keys/request_key_auth.o
=====================================================
# Builds where the incident occurred:
## i386_defconfig+allmodconfig+CONFIG_FRAME_WARN=2048 on (i386):
- compiler: clang-21
- config: https://files.kernelci.org/kbuild-clang-21-i386-allmodconfig-69250a4bf5b874…
- dashboard: https://d.kernelci.org/build/maestro:69250a4bf5b8743b1f5fbe05
## x86_64_defconfig on (x86_64):
- compiler: clang-21
- config: https://files.kernelci.org/kbuild-clang-21-x86-69250a16f5b8743b1f5fbddc/.co…
- dashboard: https://d.kernelci.org/build/maestro:69250a16f5b8743b1f5fbddc
#kernelci issue maestro:16f2d4d7ea51d0d2cee2101fea4a5d762aaeca6d
Reported-by: kernelci.org bot <bot(a)kernelci.org>
--
This is an experimental report format. Please send feedback in!
Talk to us at kernelci(a)lists.linux.dev
Made with love by the KernelCI team - https://kernelci.org
Hello,
New build issue found on stable-rc/linux-6.10.y:
---
the frame size of 1192 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] in drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_state.o (drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_state.c) [logspec:kbuild,kbuild.compiler.error]
---
- dashboard: https://d.kernelci.org/i/maestro:2fa2a6ca830cbfb7c388f6da90bf40ae58e3185b
- giturl: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
- commit HEAD: 47c2f92131c47a37ea0e3d8e1a4e4c82a9b473d4
- tags: v6.10.14
Log excerpt:
=====================================================
drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_state.c:219:1: error: the frame size of 1192 bytes is larger than 1024 bytes [-Werror=frame-larger-than=]
219 | }
| ^
CC drivers/gpu/drm/amd/amdgpu/../display/dmub/src/dmub_reg.o
CC drivers/gpu/drm/amd/amdgpu/../display/dmub/src/dmub_dcn20.o
CC drivers/gpu/drm/amd/amdgpu/../display/dmub/src/dmub_dcn21.o
CC drivers/gpu/drm/amd/amdgpu/../display/dmub/src/dmub_dcn30.o
CC drivers/gpu/drm/amd/amdgpu/../display/dmub/src/dmub_dcn301.o
CC drivers/gpu/drm/amd/amdgpu/../display/dmub/src/dmub_dcn302.o
cc1: all warnings being treated as errors
=====================================================
# Builds where the incident occurred:
## defconfig+kcidebug+x86-board on (i386):
- compiler: gcc-14
- config: https://files.kernelci.org/kbuild-gcc-14-x86-kcidebug-6924fef4f5b8743b1f5fa…
- dashboard: https://d.kernelci.org/build/maestro:6924fef4f5b8743b1f5fac6f
#kernelci issue maestro:2fa2a6ca830cbfb7c388f6da90bf40ae58e3185b
Reported-by: kernelci.org bot <bot(a)kernelci.org>
--
This is an experimental report format. Please send feedback in!
Talk to us at kernelci(a)lists.linux.dev
Made with love by the KernelCI team - https://kernelci.org
Hello,
New build issue found on stable-rc/linux-6.7.y:
---
./include/net/ip.h:466:14: error: default initialization of an object of type 'typeof (rt->dst.expires)' (aka 'const unsigned long') leaves the object uninitialized [-Werror,-Wdefault-const-init-var-unsafe] in fs/select.o (fs/select.c) [logspec:kbuild,kbuild.compiler.error]
---
- dashboard: https://d.kernelci.org/i/maestro:ef7b256354a7cedfb60e6ae7cbe595e4f34cf4ed
- giturl: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
- commit HEAD: dacf7e83da42bd9d3978560e41869a784c24d912
- tags: v6.7.12
Log excerpt:
=====================================================
In file included from fs/select.c:33:
In file included from ./include/net/busy_poll.h:18:
./include/net/ip.h:466:14: error: default initialization of an object of type 'typeof (rt->dst.expires)' (aka 'const unsigned long') leaves the object uninitialized [-Werror,-Wdefault-const-init-var-unsafe]
466 | if (mtu && time_before(jiffies, rt->dst.expires))
| ^
./include/linux/jiffies.h:135:26: note: expanded from macro 'time_before'
135 | #define time_before(a,b) time_after(b,a)
| ^
./include/linux/jiffies.h:125:3: note: expanded from macro 'time_after'
125 | (typecheck(unsigned long, a) && \
| ^
./include/linux/typecheck.h:11:12: note: expanded from macro 'typecheck'
11 | typeof(x) __dummy2; \
| ^
1 error generated.
=====================================================
# Builds where the incident occurred:
## x86_64_defconfig on (x86_64):
- compiler: clang-21
- config: https://files.kernelci.org/kbuild-clang-21-x86-6924fd86f5b8743b1f5fa865/.co…
- dashboard: https://d.kernelci.org/build/maestro:6924fd86f5b8743b1f5fa865
#kernelci issue maestro:ef7b256354a7cedfb60e6ae7cbe595e4f34cf4ed
Reported-by: kernelci.org bot <bot(a)kernelci.org>
--
This is an experimental report format. Please send feedback in!
Talk to us at kernelci(a)lists.linux.dev
Made with love by the KernelCI team - https://kernelci.org
Hello,
New build issue found on stable-rc/linux-6.7.y:
---
default initialization of an object of type 'struct kernel_param' with const member leaves the object uninitialized [-Werror,-Wdefault-const-init-field-unsafe] in kernel/params.o (kernel/params.c) [logspec:kbuild,kbuild.compiler.error]
---
- dashboard: https://d.kernelci.org/i/maestro:b49cc34e36d28dfb446fecde078c599d5e502b0f
- giturl: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
- commit HEAD: dacf7e83da42bd9d3978560e41869a784c24d912
- tags: v6.7.12
Log excerpt:
=====================================================
kernel/params.c:368:22: error: default initialization of an object of type 'struct kernel_param' with const member leaves the object uninitialized [-Werror,-Wdefault-const-init-field-unsafe]
368 | struct kernel_param dummy;
| ^
./include/linux/moduleparam.h:73:12: note: member 'perm' declared 'const' here
73 | const u16 perm;
| ^
kernel/params.c:424:22: error: default initialization of an object of type 'struct kernel_param' with const member leaves the object uninitialized [-Werror,-Wdefault-const-init-field-unsafe]
424 | struct kernel_param kp;
| ^
./include/linux/moduleparam.h:73:12: note: member 'perm' declared 'const' here
73 | co CC arch/x86/xen/setup.o
nst u16 perm;
| ^
HDRTEST usr/include/linux/net_namespace.h
2 errors generated.
=====================================================
# Builds where the incident occurred:
## x86_64_defconfig+allmodconfig on (x86_64):
- compiler: clang-21
- config: https://files.kernelci.org/kbuild-clang-21-x86-allmodconfig-6924fd8bf5b8743…
- dashboard: https://d.kernelci.org/build/maestro:6924fd8bf5b8743b1f5fa868
#kernelci issue maestro:b49cc34e36d28dfb446fecde078c599d5e502b0f
Reported-by: kernelci.org bot <bot(a)kernelci.org>
--
This is an experimental report format. Please send feedback in!
Talk to us at kernelci(a)lists.linux.dev
Made with love by the KernelCI team - https://kernelci.org
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 17393fa7b7086664be519e7230cb6ed7ec7d9462
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112424-drift-duffel-a7f9@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 17393fa7b7086664be519e7230cb6ed7ec7d9462 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni(a)redhat.com>
Date: Tue, 18 Nov 2025 08:20:21 +0100
Subject: [PATCH] mptcp: fix premature close in case of fallback
I'm observing very frequent self-tests failures in case of fallback when
running on a CONFIG_PREEMPT kernel.
The root cause is that subflow_sched_work_if_closed() closes any subflow
as soon as it is half-closed and has no incoming data pending.
That works well for regular subflows - MPTCP needs bi-directional
connectivity to operate on a given subflow - but for fallback socket is
race prone.
When TCP peer closes the connection before the MPTCP one,
subflow_sched_work_if_closed() will schedule the MPTCP worker to
gracefully close the subflow, and shortly after will do another schedule
to inject and process a dummy incoming DATA_FIN.
On CONFIG_PREEMPT kernel, the MPTCP worker can kick-in and close the
fallback subflow before subflow_sched_work_if_closed() is able to create
the dummy DATA_FIN, unexpectedly interrupting the transfer.
Address the issue explicitly avoiding closing fallback subflows on when
the peer is only half-closed.
Note that, when the subflow is able to create the DATA_FIN before the
worker invocation, the worker will change the msk state before trying to
close the subflow and will skip the latter operation as the msk will not
match anymore the precondition in __mptcp_close_subflow().
Fixes: f09b0ad55a11 ("mptcp: close subflow when receiving TCP+FIN")
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Link: https://patch.msgid.link/20251118-net-mptcp-misc-fixes-6-18-rc6-v1-3-806d37…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index e27e0fe2460f..e30e9043a694 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2563,7 +2563,8 @@ static void __mptcp_close_subflow(struct sock *sk)
if (ssk_state != TCP_CLOSE &&
(ssk_state != TCP_CLOSE_WAIT ||
- inet_sk_state_load(sk) != TCP_ESTABLISHED))
+ inet_sk_state_load(sk) != TCP_ESTABLISHED ||
+ __mptcp_check_fallback(msk)))
continue;
/* 'subflow_data_ready' will re-sched once rx queue is empty */
Optimiza la gestión de compensaciones
Garantiza remuneraciones justas y competitivas con Vorecol Compensation Management.
Hola ,
Garantizar remuneraciones justas y competitivas es clave para atraer y retener talento, pero muchas veces determinar el salario adecuado puede ser complejo.
Con Vorecol Compensation Management podrás establecer con precisión el salario para cada puesto, respaldado por criterios objetivos basados en los factores más relevantes de tu organización.
Con Vorecol Compensation Management puedes:
• Determinar salarios adecuados y equitativos para todos los roles.
• Respaldar las decisiones de compensación con justificaciones objetivas.
• Facilitar la transparencia y confianza en la gestión de remuneraciones.
Esto permite crear un entorno laboral más justo, motivador y competitivo, beneficiando tanto a tu equipo como a la empresa.
Si quieres conocer más sobre cómo optimizar la gestión de compensaciones, puedes responder a este correo o contactarme directamente, mis datos están abajo.
Saludos,
--------------
Atte.: Daniel Rodríguez
Ciudad de México: (55) 5018 0565
WhatsApp: +52 33 1607 2089
Si no deseas recibir más correos, haz clic aquí para darte de baja.
Para remover su dirección de esta lista haga <a href="https://s1.arrobamail.com/unsuscribe.php?id=yiwtsrewiswqeyseup">click aquí</a>