commit c929d08df8bee855528b9d15b853c892c54e1eee upstream.
If the warning mode with disabled mitigation mode is used, then on each
CPU where the split lock occurred detection will be disabled in order to
make progress and delayed work will be scheduled, which then will enable
detection back.
Now it turns out that all CPUs use one global delayed work structure.
This leads to the fact that if a split lock occurs on several CPUs
at the same time (within 2 jiffies), only one CPU will schedule delayed
work, but the rest will not.
The return value of schedule_delayed_work_on() would have shown this,
but it is not checked in the code.
A diagram that can help to understand the bug reproduction:
- sld_update_msr() enables/disables SLD on both CPUs on the same core
- schedule_delayed_work_on() internally checks WORK_STRUCT_PENDING_BIT.
If a work has the 'pending' status, then schedule_delayed_work_on()
will return an error code and, most importantly, the work will not
be placed in the workqueue.
Let's say we have a multicore system on which split_lock_mitigate=0 and
a multithreaded application is running that calls splitlock in multiple
threads. Due to the fact that sld_update_msr() affects the entire core
(both CPUs), we will consider 2 CPUs from different cores. Let the 2
threads of this application schedule to CPU0 (core 0) and to CPU 2
(core 1), then:
| || |
| CPU 0 (core 0) || CPU 2 (core 1) |
|_________________________________||___________________________________|
| || |
| 1) SPLIT LOCK occured || |
| || |
| 2) split_lock_warn() || |
| || |
| 3) sysctl_sld_mitigate == 0 || |
| (work = &sl_reenable) || |
| || |
| 4) schedule_delayed_work_on() || |
| (reenable will be called || |
| after 2 jiffies on CPU 0) || |
| || |
| 5) disable SLD for core 0 || |
| || |
| ------------------------- || |
| || |
| || 6) SPLIT LOCK occured |
| || |
| || 7) split_lock_warn() |
| || |
| || 8) sysctl_sld_mitigate == 0 |
| || (work = &sl_reenable, |
| || the same address as in 3) ) |
| || |
| 2 jiffies || 9) schedule_delayed_work_on() |
| || fials because the work is in |
| || the pending state since 4). |
| || The work wasn't placed to the |
| || workqueue. reenable won't be |
| || called on CPU 2 |
| || |
| || 10) disable SLD for core 0 |
| || |
| || From now on SLD will |
| || never be reenabled on core 1 |
| || |
| ------------------------- || |
| || |
| 11) enable SLD for core 0 by || |
| __split_lock_reenable || |
| || |
If the application threads can be scheduled to all processor cores,
then over time there will be only one core left, on which SLD will be
enabled and split lock will be able to be detected; and on all other
cores SLD will be disabled all the time.
Most likely, this bug has not been noticed for so long because
sysctl_sld_mitigate default value is 1, and in this case a semaphore
is used that does not allow 2 different cores to have SLD disabled at
the same time, that is, strictly only one work is placed in the
workqueue.
In order to fix the warning mode with disabled mitigation mode,
delayed work has to be per-CPU. Implement it.
Fixes: 727209376f49 ("x86/split_lock: Add sysctl to control the misery mode")
Signed-off-by: Maksim Davydov <davydov-max(a)yandex-team.ru>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Tested-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Ravi Bangoria <ravi.bangoria(a)amd.com>
Cc: Tom Lendacky <thomas.lendacky(a)amd.com>
Link: https://lore.kernel.org/r/20250115131704.132609-1-davydov-max@yandex-team.ru
---
arch/x86/kernel/cpu/intel.c | 20 ++++++++++++++++----
1 file changed, 16 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index b91f3d72bcdd..2c43a1423b09 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -1204,7 +1204,13 @@ static void __split_lock_reenable(struct work_struct *work)
{
sld_update_msr(true);
}
-static DECLARE_DELAYED_WORK(sl_reenable, __split_lock_reenable);
+/*
+ * In order for each CPU to schedule its delayed work independently of the
+ * others, delayed work struct must be per-CPU. This is not required when
+ * sysctl_sld_mitigate is enabled because of the semaphore that limits
+ * the number of simultaneously scheduled delayed works to 1.
+ */
+static DEFINE_PER_CPU(struct delayed_work, sl_reenable);
/*
* If a CPU goes offline with pending delayed work to re-enable split lock
@@ -1225,7 +1231,7 @@ static int splitlock_cpu_offline(unsigned int cpu)
static void split_lock_warn(unsigned long ip)
{
- struct delayed_work *work;
+ struct delayed_work *work = NULL;
int cpu;
if (!current->reported_split_lock)
@@ -1247,11 +1253,17 @@ static void split_lock_warn(unsigned long ip)
if (down_interruptible(&buslock_sem) == -EINTR)
return;
work = &sl_reenable_unlock;
- } else {
- work = &sl_reenable;
}
cpu = get_cpu();
+
+ if (!work) {
+ work = this_cpu_ptr(&sl_reenable);
+ /* Deferred initialization of per-CPU struct */
+ if (!work->work.func)
+ INIT_DELAYED_WORK(work, __split_lock_reenable);
+ }
+
schedule_delayed_work_on(cpu, work, 2);
/* Disable split lock detection on this CPU to make progress */
--
2.34.1
commit c929d08df8bee855528b9d15b853c892c54e1eee upstream.
If the warning mode with disabled mitigation mode is used, then on each
CPU where the split lock occurred detection will be disabled in order to
make progress and delayed work will be scheduled, which then will enable
detection back.
Now it turns out that all CPUs use one global delayed work structure.
This leads to the fact that if a split lock occurs on several CPUs
at the same time (within 2 jiffies), only one CPU will schedule delayed
work, but the rest will not.
The return value of schedule_delayed_work_on() would have shown this,
but it is not checked in the code.
A diagram that can help to understand the bug reproduction:
- sld_update_msr() enables/disables SLD on both CPUs on the same core
- schedule_delayed_work_on() internally checks WORK_STRUCT_PENDING_BIT.
If a work has the 'pending' status, then schedule_delayed_work_on()
will return an error code and, most importantly, the work will not
be placed in the workqueue.
Let's say we have a multicore system on which split_lock_mitigate=0 and
a multithreaded application is running that calls splitlock in multiple
threads. Due to the fact that sld_update_msr() affects the entire core
(both CPUs), we will consider 2 CPUs from different cores. Let the 2
threads of this application schedule to CPU0 (core 0) and to CPU 2
(core 1), then:
| || |
| CPU 0 (core 0) || CPU 2 (core 1) |
|_________________________________||___________________________________|
| || |
| 1) SPLIT LOCK occured || |
| || |
| 2) split_lock_warn() || |
| || |
| 3) sysctl_sld_mitigate == 0 || |
| (work = &sl_reenable) || |
| || |
| 4) schedule_delayed_work_on() || |
| (reenable will be called || |
| after 2 jiffies on CPU 0) || |
| || |
| 5) disable SLD for core 0 || |
| || |
| ------------------------- || |
| || |
| || 6) SPLIT LOCK occured |
| || |
| || 7) split_lock_warn() |
| || |
| || 8) sysctl_sld_mitigate == 0 |
| || (work = &sl_reenable, |
| || the same address as in 3) ) |
| || |
| 2 jiffies || 9) schedule_delayed_work_on() |
| || fials because the work is in |
| || the pending state since 4). |
| || The work wasn't placed to the |
| || workqueue. reenable won't be |
| || called on CPU 2 |
| || |
| || 10) disable SLD for core 0 |
| || |
| || From now on SLD will |
| || never be reenabled on core 1 |
| || |
| ------------------------- || |
| || |
| 11) enable SLD for core 0 by || |
| __split_lock_reenable || |
| || |
If the application threads can be scheduled to all processor cores,
then over time there will be only one core left, on which SLD will be
enabled and split lock will be able to be detected; and on all other
cores SLD will be disabled all the time.
Most likely, this bug has not been noticed for so long because
sysctl_sld_mitigate default value is 1, and in this case a semaphore
is used that does not allow 2 different cores to have SLD disabled at
the same time, that is, strictly only one work is placed in the
workqueue.
In order to fix the warning mode with disabled mitigation mode,
delayed work has to be per-CPU. Implement it.
Fixes: 727209376f49 ("x86/split_lock: Add sysctl to control the misery mode")
Signed-off-by: Maksim Davydov <davydov-max(a)yandex-team.ru>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Tested-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Ravi Bangoria <ravi.bangoria(a)amd.com>
Cc: Tom Lendacky <thomas.lendacky(a)amd.com>
Link: https://lore.kernel.org/r/20250115131704.132609-1-davydov-max@yandex-team.ru
---
arch/x86/kernel/cpu/intel.c | 20 ++++++++++++++++----
1 file changed, 16 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 38eeff91109f..6c0e0619e6d3 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -1168,7 +1168,13 @@ static void __split_lock_reenable(struct work_struct *work)
{
sld_update_msr(true);
}
-static DECLARE_DELAYED_WORK(sl_reenable, __split_lock_reenable);
+/*
+ * In order for each CPU to schedule its delayed work independently of the
+ * others, delayed work struct must be per-CPU. This is not required when
+ * sysctl_sld_mitigate is enabled because of the semaphore that limits
+ * the number of simultaneously scheduled delayed works to 1.
+ */
+static DEFINE_PER_CPU(struct delayed_work, sl_reenable);
/*
* If a CPU goes offline with pending delayed work to re-enable split lock
@@ -1189,7 +1195,7 @@ static int splitlock_cpu_offline(unsigned int cpu)
static void split_lock_warn(unsigned long ip)
{
- struct delayed_work *work;
+ struct delayed_work *work = NULL;
int cpu;
if (!current->reported_split_lock)
@@ -1211,11 +1217,17 @@ static void split_lock_warn(unsigned long ip)
if (down_interruptible(&buslock_sem) == -EINTR)
return;
work = &sl_reenable_unlock;
- } else {
- work = &sl_reenable;
}
cpu = get_cpu();
+
+ if (!work) {
+ work = this_cpu_ptr(&sl_reenable);
+ /* Deferred initialization of per-CPU struct */
+ if (!work->work.func)
+ INIT_DELAYED_WORK(work, __split_lock_reenable);
+ }
+
schedule_delayed_work_on(cpu, work, 2);
/* Disable split lock detection on this CPU to make progress */
--
2.34.1
startup()/shutdown() callbacks access SIFIVE_SERIAL_IE_OFFS.
The register is also accessed from write() callback.
If console were printing and startup()/shutdown() callback
gets called, its access to the register could be overwritten.
Add port->lock to startup()/shutdown() callbacks to make sure
their access to SIFIVE_SERIAL_IE_OFFS is synchronized against
write() callback.
Fixes: 45c054d0815b ("tty: serial: add driver for the SiFive UART")
Signed-off-by: Ryo Takakura <ryotkkr98(a)gmail.com>
Cc: stable(a)vger.kernel.org
---
This patch used be part of a series for converting sifive driver to
nbcon[0]. It's now sent seperatly as the rest of the series does not
need be applied to the stable branch.
Sincerely,
Ryo Takakura
[0] https://lore.kernel.org/all/20250405043833.397020-1-ryotkkr98@gmail.com/
---
drivers/tty/serial/sifive.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/tty/serial/sifive.c b/drivers/tty/serial/sifive.c
index 5904a2d4c..054a8e630 100644
--- a/drivers/tty/serial/sifive.c
+++ b/drivers/tty/serial/sifive.c
@@ -563,8 +563,11 @@ static void sifive_serial_break_ctl(struct uart_port *port, int break_state)
static int sifive_serial_startup(struct uart_port *port)
{
struct sifive_serial_port *ssp = port_to_sifive_serial_port(port);
+ unsigned long flags;
+ uart_port_lock_irqsave(&ssp->port, &flags);
__ssp_enable_rxwm(ssp);
+ uart_port_unlock_irqrestore(&ssp->port, flags);
return 0;
}
@@ -572,9 +575,12 @@ static int sifive_serial_startup(struct uart_port *port)
static void sifive_serial_shutdown(struct uart_port *port)
{
struct sifive_serial_port *ssp = port_to_sifive_serial_port(port);
+ unsigned long flags;
+ uart_port_lock_irqsave(&ssp->port, &flags);
__ssp_disable_rxwm(ssp);
__ssp_disable_txwm(ssp);
+ uart_port_unlock_irqrestore(&ssp->port, flags);
}
/**
--
2.34.1
In commit 7e119cff9d0a, "ocfs2: convert w_pages to w_folios" the
chunk page allocations became order 0 folio allocations. If an
allocation failed, the folio array entry should be NULL so the
error path can skip the entry. In the port it is -ENOMEM and
the error path panics trying to free this bad value.
Signed-off-by: Mark Tinguely <mark.tinguely(a)oracle.com>
Cc: stable(a)vger.kernel.org
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: Junxiao Bi <junxiao.bi(a)oracle.com>
Cc: Mark Fasheh <mark(a)fasheh.com>
---
fs/ocfs2/aops.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 40b6bce12951..89aadc6cdd87 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1071,6 +1071,7 @@ static int ocfs2_grab_folios_for_write(struct
address_space *mapping,
if (IS_ERR(wc->w_folios[i])) {
ret = PTR_ERR(wc->w_folios[i]);
mlog_errno(ret);
+ wc->w_folios[i] = NULL;
goto out;
}
}
--
2.39.5 (Apple Git-154)