From: Steven Rostedt <rostedt(a)goodmis.org>
A bug was discovered where the idle shadow stacks were not initialized
for offline CPUs when starting function graph tracer, and when they came
online they were not traced due to the missing shadow stack. To fix
this, the idle task shadow stack initialization was moved to using the
CPU hotplug callbacks. But it removed the initialization when the
function graph was enabled. The problem here is that the hotplug
callbacks are called when the CPUs come online, but the idle shadow
stack initialization only happens if function graph is currently
active. This caused the online CPUs to not get their shadow stack
initialized.
The idle shadow stack initialization still needs to be done when the
function graph is registered, as they will not be allocated if function
graph is not registered.
Cc: stable(a)vger.kernel.org
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Link: https://lore.kernel.org/20241211135335.094ba282@batman.local.home
Fixes: 2c02f7375e65 ("fgraph: Use CPU hotplug mechanism to initialize idle shadow stacks")
Reported-by: Linus Walleij <linus.walleij(a)linaro.org>
Tested-by: Linus Walleij <linus.walleij(a)linaro.org>
Closes: https://lore.kernel.org/all/CACRpkdaTBrHwRbbrphVy-=SeDz6MSsXhTKypOtLrTQ+DgG…
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/fgraph.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 0bf78517b5d4..ddedcb50917f 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -1215,7 +1215,7 @@ void fgraph_update_pid_func(void)
static int start_graph_tracing(void)
{
unsigned long **ret_stack_list;
- int ret;
+ int ret, cpu;
ret_stack_list = kcalloc(FTRACE_RETSTACK_ALLOC_SIZE,
sizeof(*ret_stack_list), GFP_KERNEL);
@@ -1223,6 +1223,12 @@ static int start_graph_tracing(void)
if (!ret_stack_list)
return -ENOMEM;
+ /* The cpu_boot init_task->ret_stack will never be freed */
+ for_each_online_cpu(cpu) {
+ if (!idle_task(cpu)->ret_stack)
+ ftrace_graph_init_idle_task(idle_task(cpu), cpu);
+ }
+
do {
ret = alloc_retstack_tasklist(ret_stack_list);
} while (ret == -EAGAIN);
--
2.45.2
From: Steven Rostedt <rostedt(a)goodmis.org>
The "%p" in the trace output is by default hashes the pointer. An option
was added to disable the hashing as reading trace output is a privileged
operation (just like reading kallsyms). When hashing is disabled, the
iter->fmt temp buffer is used to add "x" to "%p" into "%px" before sending
to the svnprintf() functions.
The problem with using iter->fmt, is that the trace_check_vprintf() that
makes sure that trace events "%pX" pointers are not dereferencing freed
addresses (and prints a warning if it does) also uses the iter->fmt to
save to and use to print out for the trace file. When the hash_ptr option
is disabled, the "%px" version is added to the iter->fmt buffer, and that
then is passed to the trace_check_vprintf() function that then uses the
iter->fmt as a temp buffer. Obviously this caused bad results.
This was noticed when backporting the persistent ring buffer to 5.10 and
added this code without the option being disabled by default, so it failed
one of the selftests because the sched_wakeup was missing the "comm"
field:
cat-907 [006] dN.4. 249.722403: sched_wakeup: comm= pid=74 prio=120 target_cpu=006
Instead of showing:
<idle>-0 [004] dNs6. 49.076464: sched_wakeup: comm=sshd-session pid=896 prio=120 target_cpu=0040
To fix this, change trace_check_vprintf() to modify the iter->fmt instead
of copying to it. If the fmt passed in is not the iter->fmt, first copy
the entire fmt string to iter->fmt and then iterate the iter->fmt. When
the format needs to be processed, perform the following like actions:
save_ch = p[i];
p[i] = '\0';
trace_seq_printf(&iter->seq, p, str);
p[i] = save_ch;
Cc: stable(a)vger.kernel.org
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Link: https://lore.kernel.org/20241212105426.113f2be3@batman.local.home
Fixes: efbbdaa22bb78 ("tracing: Show real address for trace event arguments")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/trace.c | 90 +++++++++++++++++++++++++++-----------------
1 file changed, 55 insertions(+), 35 deletions(-)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index be62f0ea1814..b44b1cdaa20e 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3711,8 +3711,10 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
{
long text_delta = 0;
long data_delta = 0;
- const char *p = fmt;
const char *str;
+ char save_ch;
+ char *buf = NULL;
+ char *p;
bool good;
int i, j;
@@ -3720,7 +3722,7 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
return;
if (static_branch_unlikely(&trace_no_verify))
- goto print;
+ goto print_fmt;
/*
* When the kernel is booted with the tp_printk command line
@@ -3735,8 +3737,21 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
/* Don't bother checking when doing a ftrace_dump() */
if (iter->fmt == static_fmt_buf)
- goto print;
+ goto print_fmt;
+ if (fmt != iter->fmt) {
+ int len = strlen(fmt);
+ while (iter->fmt_size < len + 1) {
+ /*
+ * If we can't expand the copy buffer,
+ * just print it.
+ */
+ if (!trace_iter_expand_format(iter))
+ goto print_fmt;
+ }
+ strscpy(iter->fmt, fmt, iter->fmt_size);
+ }
+ p = iter->fmt;
while (*p) {
bool star = false;
int len = 0;
@@ -3748,14 +3763,6 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
* as well as %p[sS] if delta is non-zero
*/
for (i = 0; p[i]; i++) {
- if (i + 1 >= iter->fmt_size) {
- /*
- * If we can't expand the copy buffer,
- * just print it.
- */
- if (!trace_iter_expand_format(iter))
- goto print;
- }
if (p[i] == '\\' && p[i+1]) {
i++;
@@ -3788,10 +3795,11 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
if (!p[i])
break;
- /* Copy up to the %s, and print that */
- strncpy(iter->fmt, p, i);
- iter->fmt[i] = '\0';
- trace_seq_vprintf(&iter->seq, iter->fmt, ap);
+ /* Print up to the %s */
+ save_ch = p[i];
+ p[i] = '\0';
+ trace_seq_vprintf(&iter->seq, p, ap);
+ p[i] = save_ch;
/* Add delta to %pS pointers */
if (p[i+1] == 'p') {
@@ -3837,6 +3845,8 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
good = trace_safe_str(iter, str, star, len);
}
+ p += i;
+
/*
* If you hit this warning, it is likely that the
* trace event in question used %s on a string that
@@ -3849,41 +3859,51 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
if (WARN_ONCE(!good, "fmt: '%s' current_buffer: '%s'",
fmt, seq_buf_str(&iter->seq.seq))) {
int ret;
+#define TEMP_BUFSIZ 1024
+
+ if (!buf) {
+ char *buf = kmalloc(TEMP_BUFSIZ, GFP_KERNEL);
+ if (!buf) {
+ /* Need buffer to read address */
+ trace_seq_printf(&iter->seq, "(0x%px)[UNSAFE-MEMORY]", str);
+ p += j + 1;
+ goto print;
+ }
+ }
+ if (len >= TEMP_BUFSIZ)
+ len = TEMP_BUFSIZ - 1;
/* Try to safely read the string */
if (star) {
- if (len + 1 > iter->fmt_size)
- len = iter->fmt_size - 1;
- if (len < 0)
- len = 0;
- ret = copy_from_kernel_nofault(iter->fmt, str, len);
- iter->fmt[len] = 0;
- star = false;
+ ret = copy_from_kernel_nofault(buf, str, len);
+ buf[len] = 0;
} else {
- ret = strncpy_from_kernel_nofault(iter->fmt, str,
- iter->fmt_size);
+ ret = strncpy_from_kernel_nofault(buf, str, TEMP_BUFSIZ);
}
if (ret < 0)
trace_seq_printf(&iter->seq, "(0x%px)", str);
else
- trace_seq_printf(&iter->seq, "(0x%px:%s)",
- str, iter->fmt);
- str = "[UNSAFE-MEMORY]";
- strcpy(iter->fmt, "%s");
+ trace_seq_printf(&iter->seq, "(0x%px:%s)", str, buf);
+ trace_seq_puts(&iter->seq, "[UNSAFE-MEMORY]");
} else {
- strncpy(iter->fmt, p + i, j + 1);
- iter->fmt[j+1] = '\0';
+ save_ch = p[j + 1];
+ p[j + 1] = '\0';
+ if (star)
+ trace_seq_printf(&iter->seq, p, len, str);
+ else
+ trace_seq_printf(&iter->seq, p, str);
+ p[j + 1] = save_ch;
}
- if (star)
- trace_seq_printf(&iter->seq, iter->fmt, len, str);
- else
- trace_seq_printf(&iter->seq, iter->fmt, str);
- p += i + j + 1;
+ p += j + 1;
}
print:
if (*p)
trace_seq_vprintf(&iter->seq, p, ap);
+ kfree(buf);
+ return;
+ print_fmt:
+ trace_seq_vprintf(&iter->seq, fmt, ap);
}
const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
--
2.45.2
From: Ian Ray <ian.ray(a)gehealthcare.com>
[ Upstream commit bfc6444b57dc7186b6acc964705d7516cbaf3904 ]
Ensure that `i2c_lock' is held when setting interrupt latch and mask in
pca953x_irq_bus_sync_unlock() in order to avoid races.
The other (non-probe) call site pca953x_gpio_set_multiple() ensures the
lock is held before calling pca953x_write_regs().
The problem occurred when a request raced against irq_bus_sync_unlock()
approximately once per thousand reboots on an i.MX8MP based system.
* Normal case
0-0022: write register AI|3a {03,02,00,00,01} Input latch P0
0-0022: write register AI|49 {fc,fd,ff,ff,fe} Interrupt mask P0
0-0022: write register AI|08 {ff,00,00,00,00} Output P3
0-0022: write register AI|12 {fc,00,00,00,00} Config P3
* Race case
0-0022: write register AI|08 {ff,00,00,00,00} Output P3
0-0022: write register AI|08 {03,02,00,00,01} *** Wrong register ***
0-0022: write register AI|12 {fc,00,00,00,00} Config P3
0-0022: write register AI|49 {fc,fd,ff,ff,fe} Interrupt mask P0
Signed-off-by: Ian Ray <ian.ray(a)gehealthcare.com>
Link: https://lore.kernel.org/r/20240620042915.2173-1-ian.ray@gehealthcare.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org>
Signed-off-by: Guocai He <guocai.he.cn(a)windriver.com>
---
This commit is to solve the CVE-2024-42253. Please merge this commit to linux-5.15.y.
drivers/gpio/gpio-pca953x.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index 4860bf3b7e00..4e97b6ae4f72 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -672,6 +672,8 @@ static void pca953x_irq_bus_sync_unlock(struct irq_data *d)
int level;
if (chip->driver_data & PCA_PCAL) {
+ guard(mutex)(&chip->i2c_lock);
+
/* Enable latch on interrupt-enabled inputs */
pca953x_write_regs(chip, PCAL953X_IN_LATCH, chip->irq_mask);
--
2.34.1
From: Baokun Li <libaokun1(a)huawei.com>
[ Upstream commit b4b4fda34e535756f9e774fb2d09c4537b7dfd1c ]
In the following concurrency we will access the uninitialized rs->lock:
ext4_fill_super
ext4_register_sysfs
// sysfs registered msg_ratelimit_interval_ms
// Other processes modify rs->interval to
// non-zero via msg_ratelimit_interval_ms
ext4_orphan_cleanup
ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
__ext4_msg
___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state)
if (!rs->interval) // do nothing if interval is 0
return 1;
raw_spin_trylock_irqsave(&rs->lock, flags)
raw_spin_trylock(lock)
_raw_spin_trylock
__raw_spin_trylock
spin_acquire(&lock->dep_map, 0, 1, _RET_IP_)
lock_acquire
__lock_acquire
register_lock_class
assign_lock_key
dump_stack();
ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
raw_spin_lock_init(&rs->lock);
// init rs->lock here
and get the following dump_stack:
=========================================================
INFO: trying to register non-static key.
The code is fine but needs lockdep annotation, or maybe
you didn't initialize this object before use?
turning off the locking correctness validator.
CPU: 12 PID: 753 Comm: mount Tainted: G E 6.7.0-rc6-next-20231222 #504
[...]
Call Trace:
dump_stack_lvl+0xc5/0x170
dump_stack+0x18/0x30
register_lock_class+0x740/0x7c0
__lock_acquire+0x69/0x13a0
lock_acquire+0x120/0x450
_raw_spin_trylock+0x98/0xd0
___ratelimit+0xf6/0x220
__ext4_msg+0x7f/0x160 [ext4]
ext4_orphan_cleanup+0x665/0x740 [ext4]
__ext4_fill_super+0x21ea/0x2b10 [ext4]
ext4_fill_super+0x14d/0x360 [ext4]
[...]
=========================================================
Normally interval is 0 until s_msg_ratelimit_state is initialized, so
___ratelimit() does nothing. But registering sysfs precedes initializing
rs->lock, so it is possible to change rs->interval to a non-zero value
via the msg_ratelimit_interval_ms interface of sysfs while rs->lock is
uninitialized, and then a call to ext4_msg triggers the problem by
accessing an uninitialized rs->lock. Therefore register sysfs after all
initializations are complete to avoid such problems.
Signed-off-by: Baokun Li <libaokun1(a)huawei.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Link: https://lore.kernel.org/r/20240102133730.1098120-1-libaokun1@huawei.com
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
[ Resolve merge conflicts ]
Signed-off-by: Bin Lan <bin.lan.cn(a)windriver.com>
---
fs/ext4/super.c | 22 ++++++++++------------
1 file changed, 10 insertions(+), 12 deletions(-)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 987d49e18dbe..e6f08ee9895f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -5506,19 +5506,15 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
if (err)
goto failed_mount6;
- err = ext4_register_sysfs(sb);
- if (err)
- goto failed_mount7;
-
err = ext4_init_orphan_info(sb);
if (err)
- goto failed_mount8;
+ goto failed_mount7;
#ifdef CONFIG_QUOTA
/* Enable quota usage during mount. */
if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) {
err = ext4_enable_quotas(sb);
if (err)
- goto failed_mount9;
+ goto failed_mount8;
}
#endif /* CONFIG_QUOTA */
@@ -5545,7 +5541,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
ext4_msg(sb, KERN_INFO, "recovery complete");
err = ext4_mark_recovery_complete(sb, es);
if (err)
- goto failed_mount10;
+ goto failed_mount9;
}
if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev))
@@ -5562,15 +5558,17 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
atomic_set(&sbi->s_warning_count, 0);
atomic_set(&sbi->s_msg_count, 0);
+ /* Register sysfs after all initializations are complete. */
+ err = ext4_register_sysfs(sb);
+ if (err)
+ goto failed_mount9;
+
return 0;
-failed_mount10:
+failed_mount9:
ext4_quota_off_umount(sb);
-failed_mount9: __maybe_unused
+failed_mount8: __maybe_unused
ext4_release_orphan_info(sb);
-failed_mount8:
- ext4_unregister_sysfs(sb);
- kobject_put(&sbi->s_kobj);
failed_mount7:
ext4_unregister_li_request(sb);
failed_mount6:
--
2.43.0