If the starting position of our insert range happens to be in the hole
between the two ext4_extent_idx, because the lblk of the ext4_extent in
the previous ext4_extent_idx is always less than the start, which leads
to the "extent" variable access across the boundary, the following UAF is
triggered:
==================================================================
BUG: KASAN: use-after-free in ext4_ext_shift_extents+0x257/0x790
Read of size 4 at addr ffff88819807a008 by task fallocate/8010
CPU: 3 PID: 8010 Comm: fallocate Tainted: G E 5.10.0+ #492
Call Trace:
dump_stack+0x7d/0xa3
print_address_description.constprop.0+0x1e/0x220
kasan_report.cold+0x67/0x7f
ext4_ext_shift_extents+0x257/0x790
ext4_insert_range+0x5b6/0x700
ext4_fallocate+0x39e/0x3d0
vfs_fallocate+0x26f/0x470
ksys_fallocate+0x3a/0x70
__x64_sys_fallocate+0x4f/0x60
do_syscall_64+0x33/0x40
entry_SYSCALL_64_after_hwframe+0x44/0xa9
==================================================================
For right shifts, we can divide them into the following situations:
1. When the first ee_block of ext4_extent_idx is greater than or equal to
start, make right shifts directly from the first ee_block.
1) If it is greater than start, we need to continue searching in the
previous ext4_extent_idx.
2) If it is equal to start, we can exit the loop (iterator=NULL).
2. When the first ee_block of ext4_extent_idx is less than start, then
traverse from the last extent to find the first extent whose ee_block
is less than start.
1) If extent is still the last extent after traversal, it means that
the last ee_block of ext4_extent_idx is less than start, that is,
start is located in the hole between idx and (idx+1), so we can
exit the loop directly (break) without right shifts.
2) Otherwise, make right shifts at the corresponding position of the
found extent, and then exit the loop (iterator=NULL).
Fixes: 331573febb6a ("ext4: Add support FALLOC_FL_INSERT_RANGE for fallocate")
Cc: stable(a)vger.kernel.org # v4.2+
Signed-off-by: Zhihao Cheng <chengzhihao1(a)huawei.com>
Signed-off-by: Baokun Li <libaokun1(a)huawei.com>
---
V1->V2:
Initialize "ret" after the "again:" label to avoid return value mismatch.
Refactoring reduces cycles and makes code more readable.
fs/ext4/extents.c | 18 +++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index c148bb97b527..39c9f87de0be 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5179,6 +5179,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
* and it is decreased till we reach start.
*/
again:
+ ret = 0;
if (SHIFT == SHIFT_LEFT)
iterator = &start;
else
@@ -5222,14 +5223,21 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
ext4_ext_get_actual_len(extent);
} else {
extent = EXT_FIRST_EXTENT(path[depth].p_hdr);
- if (le32_to_cpu(extent->ee_block) > 0)
+ if (le32_to_cpu(extent->ee_block) > start)
*iterator = le32_to_cpu(extent->ee_block) - 1;
- else
- /* Beginning is reached, end of the loop */
+ else if (le32_to_cpu(extent->ee_block) == start)
iterator = NULL;
- /* Update path extent in case we need to stop */
- while (le32_to_cpu(extent->ee_block) < start)
+ else {
+ extent = EXT_LAST_EXTENT(path[depth].p_hdr);
+ while (le32_to_cpu(extent->ee_block) >= start)
+ extent--;
+
+ if (extent == EXT_LAST_EXTENT(path[depth].p_hdr))
+ break;
+
extent++;
+ iterator = NULL;
+ }
path[depth].p_ext = extent;
}
ret = ext4_ext_shift_path_extents(path, shift, inode,
--
2.31.1
From: Eric Biggers <ebiggers(a)google.com>
Due to several different off-by-one errors, or perhaps due to a late
change in design that wasn't fully reflected in the code that was
actually merged, there are several very strange constraints on how
fast-commit blocks are filled with tlv entries:
- tlvs must start at least 10 bytes before the end of the block, even
though the minimum tlv length is 8. Otherwise, the replay code will
ignore them. (BUG: ext4_fc_reserve_space() could violate this
requirement if called with a len of blocksize - 9 or blocksize - 8.
Fortunately, this doesn't seem to happen currently.)
- tlvs must end at least 1 byte before the end of the block. Otherwise
the replay code will consider them to be invalid. This quirk
contributed to a bug (fixed by an earlier commit) where uninitialized
memory was being leaked to disk in the last byte of blocks.
Also, strangely these constraints don't apply to the replay code in
e2fsprogs, which will accept any tlvs in the blocks (with no bounds
checks at all, but that is a separate issue...).
Given that this all seems to be a bug, let's fix it by just filling
blocks with tlv entries in the natural way.
Note that old kernels will be unable to replay fast-commit journals
created by kernels that have this commit.
Fixes: aa75f4d3daae ("ext4: main fast-commit commit path")
Cc: <stable(a)vger.kernel.org> # v5.10+
Signed-off-by: Eric Biggers <ebiggers(a)google.com>
---
fs/ext4/fast_commit.c | 66 +++++++++++++++++++++----------------------
1 file changed, 33 insertions(+), 33 deletions(-)
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 892fa7c7a768b..7ed71c652f67f 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -714,43 +714,43 @@ static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc)
struct buffer_head *bh;
int bsize = sbi->s_journal->j_blocksize;
int ret, off = sbi->s_fc_bytes % bsize;
- int pad_len;
+ int remaining;
u8 *dst;
/*
- * After allocating len, we should have space at least for a 0 byte
- * padding.
+ * If 'len' is too long to fit in any block alongside a PAD tlv, then we
+ * cannot fulfill the request.
*/
- if (len + EXT4_FC_TAG_BASE_LEN > bsize)
+ if (len > bsize - EXT4_FC_TAG_BASE_LEN)
return NULL;
- if (bsize - off - 1 > len + EXT4_FC_TAG_BASE_LEN) {
- /*
- * Only allocate from current buffer if we have enough space for
- * this request AND we have space to add a zero byte padding.
- */
- if (!sbi->s_fc_bh) {
- ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
- if (ret)
- return NULL;
- sbi->s_fc_bh = bh;
- }
- sbi->s_fc_bytes += len;
- return sbi->s_fc_bh->b_data + off;
+ if (!sbi->s_fc_bh) {
+ ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
+ if (ret)
+ return NULL;
+ sbi->s_fc_bh = bh;
}
- /* Need to add PAD tag */
dst = sbi->s_fc_bh->b_data + off;
+
+ /*
+ * Allocate the bytes in the current block if we can do so while still
+ * leaving enough space for a PAD tlv.
+ */
+ remaining = bsize - EXT4_FC_TAG_BASE_LEN - off;
+ if (len <= remaining) {
+ sbi->s_fc_bytes += len;
+ return dst;
+ }
+
+ /*
+ * Else, terminate the current block with a PAD tlv, then allocate a new
+ * block and allocate the bytes at the start of that new block.
+ */
+
tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD);
- pad_len = bsize - off - 1 - EXT4_FC_TAG_BASE_LEN;
- tl.fc_len = cpu_to_le16(pad_len);
+ tl.fc_len = cpu_to_le16(remaining);
ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc);
- dst += EXT4_FC_TAG_BASE_LEN;
- if (pad_len > 0) {
- ext4_fc_memzero(sb, dst, pad_len, crc);
- dst += pad_len;
- }
- /* Don't leak uninitialized memory in the unused last byte. */
- *dst = 0;
+ ext4_fc_memzero(sb, dst + EXT4_FC_TAG_BASE_LEN, remaining, crc);
ext4_fc_submit_bh(sb, false);
@@ -758,7 +758,7 @@ static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc)
if (ret)
return NULL;
sbi->s_fc_bh = bh;
- sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len;
+ sbi->s_fc_bytes += bsize - off + len;
return sbi->s_fc_bh->b_data;
}
@@ -789,7 +789,7 @@ static int ext4_fc_write_tail(struct super_block *sb, u32 crc)
off = sbi->s_fc_bytes % bsize;
tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL);
- tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail));
+ tl.fc_len = cpu_to_le16(bsize - off + sizeof(struct ext4_fc_tail));
sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize);
ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, &crc);
@@ -2056,7 +2056,7 @@ static int ext4_fc_replay_scan(journal_t *journal,
state = &sbi->s_fc_replay_state;
start = (u8 *)bh->b_data;
- end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
+ end = start + journal->j_blocksize;
if (state->fc_replay_expected_off == 0) {
state->fc_cur_tag = 0;
@@ -2077,7 +2077,7 @@ static int ext4_fc_replay_scan(journal_t *journal,
}
state->fc_replay_expected_off++;
- for (cur = start; cur < end - EXT4_FC_TAG_BASE_LEN;
+ for (cur = start; cur <= end - EXT4_FC_TAG_BASE_LEN;
cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) {
ext4_fc_get_tl(&tl, cur);
val = cur + EXT4_FC_TAG_BASE_LEN;
@@ -2195,9 +2195,9 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
#endif
start = (u8 *)bh->b_data;
- end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
+ end = start + journal->j_blocksize;
- for (cur = start; cur < end - EXT4_FC_TAG_BASE_LEN;
+ for (cur = start; cur <= end - EXT4_FC_TAG_BASE_LEN;
cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) {
ext4_fc_get_tl(&tl, cur);
val = cur + EXT4_FC_TAG_BASE_LEN;
--
2.38.1
From: "Masami Hiramatsu (Google)" <mhiramat(a)kernel.org>
Since commit ab51e15d535e ("fprobe: Introduce FPROBE_FL_KPROBE_SHARED flag
for fprobe") introduced fprobe_kprobe_handler() for fprobe::ops::func,
unregister_fprobe() fails to unregister the registered if user specifies
FPROBE_FL_KPROBE_SHARED flag.
Moreover, __register_ftrace_function() is possible to change the
ftrace_ops::func, thus we have to check fprobe::ops::saved_func instead.
To check it correctly, it should confirm the fprobe::ops::saved_func is
either fprobe_handler() or fprobe_kprobe_handler().
Link: https://lore.kernel.org/all/166677683946.1459107.15997653945538644683.stgit…
Fixes: cad9931f64dc ("fprobe: Add ftrace based probe APIs")
Cc: stable(a)vger.kernel.org
Signed-off-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
---
kernel/trace/fprobe.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 71614b2a67ff..e8143e368074 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -303,7 +303,8 @@ int unregister_fprobe(struct fprobe *fp)
{
int ret;
- if (!fp || fp->ops.func != fprobe_handler)
+ if (!fp || (fp->ops.saved_func != fprobe_handler &&
+ fp->ops.saved_func != fprobe_kprobe_handler))
return -EINVAL;
/*
--
2.35.1
From: Li Qiang <liq3ea(a)163.com>
In aggregate kprobe case, when arm_kprobe failed,
we need set the kp->flags with KPROBE_FLAG_DISABLED again.
If not, the 'kp' kprobe will been considered as enabled
but it actually not enabled.
Link: https://lore.kernel.org/all/20220902155820.34755-1-liq3ea@163.com/
Fixes: 12310e343755 ("kprobes: Propagate error from arm_kprobe_ftrace()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Li Qiang <liq3ea(a)163.com>
Acked-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
---
kernel/kprobes.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 3220b0a2fb4a..cd9f5a66a690 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -2429,8 +2429,11 @@ int enable_kprobe(struct kprobe *kp)
if (!kprobes_all_disarmed && kprobe_disabled(p)) {
p->flags &= ~KPROBE_FLAG_DISABLED;
ret = arm_kprobe(p);
- if (ret)
+ if (ret) {
p->flags |= KPROBE_FLAG_DISABLED;
+ if (p != kp)
+ kp->flags |= KPROBE_FLAG_DISABLED;
+ }
}
out:
mutex_unlock(&kprobe_mutex);
--
2.35.1
From: Li Huafei <lihuafei1(a)huawei.com>
KASAN reported a use-after-free with ftrace ops [1]. It was found from
vmcore that perf had registered two ops with the same content
successively, both dynamic. After unregistering the second ops, a
use-after-free occurred.
In ftrace_shutdown(), when the second ops is unregistered, the
FTRACE_UPDATE_CALLS command is not set because there is another enabled
ops with the same content. Also, both ops are dynamic and the ftrace
callback function is ftrace_ops_list_func, so the
FTRACE_UPDATE_TRACE_FUNC command will not be set. Eventually the value
of 'command' will be 0 and ftrace_shutdown() will skip the rcu
synchronization.
However, ftrace may be activated. When the ops is released, another CPU
may be accessing the ops. Add the missing synchronization to fix this
problem.
[1]
BUG: KASAN: use-after-free in __ftrace_ops_list_func kernel/trace/ftrace.c:7020 [inline]
BUG: KASAN: use-after-free in ftrace_ops_list_func+0x2b0/0x31c kernel/trace/ftrace.c:7049
Read of size 8 at addr ffff56551965bbc8 by task syz-executor.2/14468
CPU: 1 PID: 14468 Comm: syz-executor.2 Not tainted 5.10.0 #7
Hardware name: linux,dummy-virt (DT)
Call trace:
dump_backtrace+0x0/0x40c arch/arm64/kernel/stacktrace.c:132
show_stack+0x30/0x40 arch/arm64/kernel/stacktrace.c:196
__dump_stack lib/dump_stack.c:77 [inline]
dump_stack+0x1b4/0x248 lib/dump_stack.c:118
print_address_description.constprop.0+0x28/0x48c mm/kasan/report.c:387
__kasan_report mm/kasan/report.c:547 [inline]
kasan_report+0x118/0x210 mm/kasan/report.c:564
check_memory_region_inline mm/kasan/generic.c:187 [inline]
__asan_load8+0x98/0xc0 mm/kasan/generic.c:253
__ftrace_ops_list_func kernel/trace/ftrace.c:7020 [inline]
ftrace_ops_list_func+0x2b0/0x31c kernel/trace/ftrace.c:7049
ftrace_graph_call+0x0/0x4
__might_sleep+0x8/0x100 include/linux/perf_event.h:1170
__might_fault mm/memory.c:5183 [inline]
__might_fault+0x58/0x70 mm/memory.c:5171
do_strncpy_from_user lib/strncpy_from_user.c:41 [inline]
strncpy_from_user+0x1f4/0x4b0 lib/strncpy_from_user.c:139
getname_flags+0xb0/0x31c fs/namei.c:149
getname+0x2c/0x40 fs/namei.c:209
[...]
Allocated by task 14445:
kasan_save_stack+0x24/0x50 mm/kasan/common.c:48
kasan_set_track mm/kasan/common.c:56 [inline]
__kasan_kmalloc mm/kasan/common.c:479 [inline]
__kasan_kmalloc.constprop.0+0x110/0x13c mm/kasan/common.c:449
kasan_kmalloc+0xc/0x14 mm/kasan/common.c:493
kmem_cache_alloc_trace+0x440/0x924 mm/slub.c:2950
kmalloc include/linux/slab.h:563 [inline]
kzalloc include/linux/slab.h:675 [inline]
perf_event_alloc.part.0+0xb4/0x1350 kernel/events/core.c:11230
perf_event_alloc kernel/events/core.c:11733 [inline]
__do_sys_perf_event_open kernel/events/core.c:11831 [inline]
__se_sys_perf_event_open+0x550/0x15f4 kernel/events/core.c:11723
__arm64_sys_perf_event_open+0x6c/0x80 kernel/events/core.c:11723
[...]
Freed by task 14445:
kasan_save_stack+0x24/0x50 mm/kasan/common.c:48
kasan_set_track+0x24/0x34 mm/kasan/common.c:56
kasan_set_free_info+0x20/0x40 mm/kasan/generic.c:358
__kasan_slab_free.part.0+0x11c/0x1b0 mm/kasan/common.c:437
__kasan_slab_free mm/kasan/common.c:445 [inline]
kasan_slab_free+0x2c/0x40 mm/kasan/common.c:446
slab_free_hook mm/slub.c:1569 [inline]
slab_free_freelist_hook mm/slub.c:1608 [inline]
slab_free mm/slub.c:3179 [inline]
kfree+0x12c/0xc10 mm/slub.c:4176
perf_event_alloc.part.0+0xa0c/0x1350 kernel/events/core.c:11434
perf_event_alloc kernel/events/core.c:11733 [inline]
__do_sys_perf_event_open kernel/events/core.c:11831 [inline]
__se_sys_perf_event_open+0x550/0x15f4 kernel/events/core.c:11723
[...]
Link: https://lore.kernel.org/linux-trace-kernel/20221103031010.166498-1-lihuafei…
Fixes: edb096e00724f ("ftrace: Fix memleak when unregistering dynamic ops when tracing disabled")
Cc: stable(a)vger.kernel.org
Suggested-by: Steven Rostedt <rostedt(a)goodmis.org>
Signed-off-by: Li Huafei <lihuafei1(a)huawei.com>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/ftrace.c | 16 +++-------------
1 file changed, 3 insertions(+), 13 deletions(-)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index fbf2543111c0..7dc023641bf1 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3028,18 +3028,8 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command)
command |= FTRACE_UPDATE_TRACE_FUNC;
}
- if (!command || !ftrace_enabled) {
- /*
- * If these are dynamic or per_cpu ops, they still
- * need their data freed. Since, function tracing is
- * not currently active, we can just free them
- * without synchronizing all CPUs.
- */
- if (ops->flags & FTRACE_OPS_FL_DYNAMIC)
- goto free_ops;
-
- return 0;
- }
+ if (!command || !ftrace_enabled)
+ goto out;
/*
* If the ops uses a trampoline, then it needs to be
@@ -3076,6 +3066,7 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command)
removed_ops = NULL;
ops->flags &= ~FTRACE_OPS_FL_REMOVING;
+out:
/*
* Dynamic ops may be freed, we must make sure that all
* callers are done before leaving this function.
@@ -3103,7 +3094,6 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command)
if (IS_ENABLED(CONFIG_PREEMPTION))
synchronize_rcu_tasks();
- free_ops:
ftrace_trampoline_free(ops);
}
--
2.35.1
From: "Steven Rostedt (Google)" <rostedt(a)goodmis.org>
On some machines the number of listed CPUs may be bigger than the actual
CPUs that exist. The tracing subsystem allocates a per_cpu directory with
access to the per CPU ring buffer via a cpuX file. But to save space, the
ring buffer will only allocate buffers for online CPUs, even though the
CPU array will be as big as the nr_cpu_ids.
With the addition of waking waiters on the ring buffer when closing the
file, the ring_buffer_wake_waiters() now needs to make sure that the
buffer is allocated (with the irq_work allocated with it) before trying to
wake waiters, as it will cause a NULL pointer dereference.
While debugging this, I added a NULL check for the buffer itself (which is
OK to do), and also NULL pointer checks against buffer->buffers (which is
not fine, and will WARN) as well as making sure the CPU number passed in
is within the nr_cpu_ids (which is also not fine if it isn't).
Link: https://lore.kernel.org/all/87h6zklb6n.wl-tiwai@suse.de/
Link: https://lore.kernel.org/all/CAM6Wdxc0KRJMXVAA0Y=u6Jh2V=uWB-_Fn6M4xRuNppfXzL…
Link: https://lkml.kernel.org/linux-trace-kernel/20221101191009.1e7378c8@rorschac…
Cc: stable(a)vger.kernel.org
Cc: Steven Noonan <steven.noonan(a)gmail.com>
Bugzilla: https://bugzilla.opensuse.org/show_bug.cgi?id=1204705
Reported-by: Takashi Iwai <tiwai(a)suse.de>
Reported-by: Roland Ruckerbauer <roland.rucky(a)gmail.com>
Fixes: f3ddb74ad079 ("tracing: Wake up ring buffer waiters on closing of the file")
Reviewed-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/ring_buffer.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 199759c73519..9712083832f4 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -937,6 +937,9 @@ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu)
struct ring_buffer_per_cpu *cpu_buffer;
struct rb_irq_work *rbwork;
+ if (!buffer)
+ return;
+
if (cpu == RING_BUFFER_ALL_CPUS) {
/* Wake up individual ones too. One level recursion */
@@ -945,7 +948,15 @@ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu)
rbwork = &buffer->irq_work;
} else {
+ if (WARN_ON_ONCE(!buffer->buffers))
+ return;
+ if (WARN_ON_ONCE(cpu >= nr_cpu_ids))
+ return;
+
cpu_buffer = buffer->buffers[cpu];
+ /* The CPU buffer may not have been initialized yet */
+ if (!cpu_buffer)
+ return;
rbwork = &cpu_buffer->irq_work;
}
--
2.35.1
From: xiongxin <xiongxin(a)kylinos.cn>
The actual calculation formula in the code below is:
max_size = (count - (size + PAGES_FOR_IO)) / 2
- 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE);
But function comments are written differently, the comment is wrong?
By the way, what exactly do the "/ 2" and "2 *" mean?
Cc: stable(a)vger.kernel.org
Signed-off-by: xiongxin <xiongxin(a)kylinos.cn>
---
kernel/power/snapshot.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 2a406753af90..c20ca5fb9adc 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1723,8 +1723,8 @@ static unsigned long minimum_image_size(unsigned long saveable)
* /sys/power/reserved_size, respectively). To make this happen, we compute the
* total number of available page frames and allocate at least
*
- * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2
- * + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE)
+ * ([page frames total] - PAGES_FOR_IO - [metadata pages]) / 2
+ * - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE)
*
* of them, which corresponds to the maximum size of a hibernation image.
*
--
2.25.1
--
Die Summe von 500.000,00 € wurde Ihnen von STEFANO PESSINA gespendet.
Bitte kontaktieren Sie uns für weitere Informationen über
stefanopessia755(a)hotmail.com
--
Die Summe von 500.000,00 € wurde Ihnen von STEFANO PESSINA gespendet.
Bitte kontaktieren Sie uns für weitere Informationen über
stefanopessia755(a)hotmail.com
--
Die Summe von 500.000,00 € wurde Ihnen von STEFANO PESSINA gespendet.
Bitte kontaktieren Sie uns für weitere Informationen über
stefanopessia755(a)hotmail.com
--
Dear Friend,
I warmly greet you
Please forgive me if my plea sounds a bit strange or embarrassing to
you I am 63 years old am suffering from protracted cancer of the
lungs which has also affected part of my brain cells due to
complications,from all indication my condition is really deteriorating
and it is quite obvious according to my doctors that i may not live
for the next few months,because my condition has gotten to a critical
and life threatening stage
Regards to my situation as well as the doctors report i have decided
to entrust my wealth and treasures to a trust worthy person Get back
to me if you can be trusted for more details
Sincerely Your's
Mrs. Helen Robert
This is a note to let you know that I've just added the patch titled
iio: adc: at91_adc: fix possible memory leak in
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 65f20301607d07ee279b0804d11a05a62a6c1a1c Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang(a)huawei.com>
Date: Mon, 24 Oct 2022 16:45:11 +0800
Subject: iio: adc: at91_adc: fix possible memory leak in
at91_adc_allocate_trigger()
If iio_trigger_register() returns error, it should call iio_trigger_free()
to give up the reference that hold in iio_trigger_alloc(), so that it can
call iio_trig_release() to free memory when the refcount hit to 0.
Fixes: 0e589d5fb317 ("ARM: AT91: IIO: Add AT91 ADC driver.")
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
Link: https://lore.kernel.org/r/20221024084511.815096-1-yangyingliang@huawei.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/at91_adc.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c
index 532daaa6f943..366e252ebeb0 100644
--- a/drivers/iio/adc/at91_adc.c
+++ b/drivers/iio/adc/at91_adc.c
@@ -634,8 +634,10 @@ static struct iio_trigger *at91_adc_allocate_trigger(struct iio_dev *idev,
trig->ops = &at91_adc_trigger_ops;
ret = iio_trigger_register(trig);
- if (ret)
+ if (ret) {
+ iio_trigger_free(trig);
return NULL;
+ }
return trig;
}
--
2.38.1
This is a note to let you know that I've just added the patch titled
iio: adc: mp2629: fix wrong comparison of channel
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 1eb20332a082fa801fb89c347c5e62de916a4001 Mon Sep 17 00:00:00 2001
From: Saravanan Sekar <sravanhome(a)gmail.com>
Date: Sat, 29 Oct 2022 11:29:53 +0200
Subject: iio: adc: mp2629: fix wrong comparison of channel
Input voltage channel enum is compared against iio address instead
of the channel.
Fixes: 7abd9fb64682 ("iio: adc: mp2629: Add support for mp2629 ADC driver")
Signed-off-by: Saravanan Sekar <sravanhome(a)gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko(a)gmail.com>
Link: https://lore.kernel.org/r/20221029093000.45451-2-sravanhome@gmail.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/mp2629_adc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iio/adc/mp2629_adc.c b/drivers/iio/adc/mp2629_adc.c
index 30a31f185d08..f7af9af1665d 100644
--- a/drivers/iio/adc/mp2629_adc.c
+++ b/drivers/iio/adc/mp2629_adc.c
@@ -74,7 +74,7 @@ static int mp2629_read_raw(struct iio_dev *indio_dev,
if (ret)
return ret;
- if (chan->address == MP2629_INPUT_VOLT)
+ if (chan->channel == MP2629_INPUT_VOLT)
rval &= GENMASK(6, 0);
*val = rval;
return IIO_VAL_INT;
--
2.38.1
This is a note to let you know that I've just added the patch titled
iio: adc: mp2629: fix potential array out of bound access
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From ca1547ab15f48dc81624183ae17a2fd1bad06dfc Mon Sep 17 00:00:00 2001
From: Saravanan Sekar <sravanhome(a)gmail.com>
Date: Sat, 29 Oct 2022 11:29:55 +0200
Subject: iio: adc: mp2629: fix potential array out of bound access
Add sentinel at end of maps to avoid potential array out of
bound access in iio core.
Fixes: 7abd9fb64682 ("iio: adc: mp2629: Add support for mp2629 ADC driver")
Signed-off-by: Saravanan Sekar <sravanhome(a)gmail.com>
Link: https://lore.kernel.org/r/20221029093000.45451-4-sravanhome@gmail.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/mp2629_adc.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/iio/adc/mp2629_adc.c b/drivers/iio/adc/mp2629_adc.c
index f7af9af1665d..88e947f300cf 100644
--- a/drivers/iio/adc/mp2629_adc.c
+++ b/drivers/iio/adc/mp2629_adc.c
@@ -57,7 +57,8 @@ static struct iio_map mp2629_adc_maps[] = {
MP2629_MAP(SYSTEM_VOLT, "system-volt"),
MP2629_MAP(INPUT_VOLT, "input-volt"),
MP2629_MAP(BATT_CURRENT, "batt-current"),
- MP2629_MAP(INPUT_CURRENT, "input-current")
+ MP2629_MAP(INPUT_CURRENT, "input-current"),
+ { }
};
static int mp2629_read_raw(struct iio_dev *indio_dev,
--
2.38.1
This is a note to let you know that I've just added the patch titled
iio: pressure: ms5611: changed hardcoded SPI speed to value limited
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 741cec30cc52058d1c10d415f3b98319887e4f73 Mon Sep 17 00:00:00 2001
From: Mitja Spes <mitja(a)lxnav.com>
Date: Fri, 21 Oct 2022 15:58:21 +0200
Subject: iio: pressure: ms5611: changed hardcoded SPI speed to value limited
Don't hardcode the ms5611 SPI speed, limit it instead.
Signed-off-by: Mitja Spes <mitja(a)lxnav.com>
Fixes: c0644160a8b5 ("iio: pressure: add support for MS5611 pressure and temperature sensor")
Link: https://lore.kernel.org/r/20221021135827.1444793-3-mitja@lxnav.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/pressure/ms5611_spi.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iio/pressure/ms5611_spi.c b/drivers/iio/pressure/ms5611_spi.c
index 432e912096f4..a0a7205c9c3a 100644
--- a/drivers/iio/pressure/ms5611_spi.c
+++ b/drivers/iio/pressure/ms5611_spi.c
@@ -91,7 +91,7 @@ static int ms5611_spi_probe(struct spi_device *spi)
spi_set_drvdata(spi, indio_dev);
spi->mode = SPI_MODE_0;
- spi->max_speed_hz = 20000000;
+ spi->max_speed_hz = min(spi->max_speed_hz, 20000000U);
spi->bits_per_word = 8;
ret = spi_setup(spi);
if (ret < 0)
--
2.38.1
This is a note to let you know that I've just added the patch titled
iio: accel: bma400: Ensure VDDIO is enable defore reading the chip
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 57572cacd36e6d4be7722d7770d23f4430219827 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
Date: Sun, 2 Oct 2022 15:41:33 +0100
Subject: iio: accel: bma400: Ensure VDDIO is enable defore reading the chip
ID.
The regulator enables were after the check on the chip variant, which was
very unlikely to return a correct value when not powered.
Presumably all the device anyone is testing on have a regulator that
is already powered up when this code runs for reasons beyond the scope
of this driver. Move the read call down a few lines.
Fixes: 3cf7ded15e40 ("iio: accel: bma400: basic regulator support")
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
Reviewed-by: Dan Robertson <dan(a)dlrobertson.com>
Cc: <Stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20221002144133.3771029-1-jic23@kernel.org
---
drivers/iio/accel/bma400_core.c | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/drivers/iio/accel/bma400_core.c b/drivers/iio/accel/bma400_core.c
index ad8fce3e08cd..490c342ef72a 100644
--- a/drivers/iio/accel/bma400_core.c
+++ b/drivers/iio/accel/bma400_core.c
@@ -869,18 +869,6 @@ static int bma400_init(struct bma400_data *data)
unsigned int val;
int ret;
- /* Try to read chip_id register. It must return 0x90. */
- ret = regmap_read(data->regmap, BMA400_CHIP_ID_REG, &val);
- if (ret) {
- dev_err(data->dev, "Failed to read chip id register\n");
- return ret;
- }
-
- if (val != BMA400_ID_REG_VAL) {
- dev_err(data->dev, "Chip ID mismatch\n");
- return -ENODEV;
- }
-
data->regulators[BMA400_VDD_REGULATOR].supply = "vdd";
data->regulators[BMA400_VDDIO_REGULATOR].supply = "vddio";
ret = devm_regulator_bulk_get(data->dev,
@@ -906,6 +894,18 @@ static int bma400_init(struct bma400_data *data)
if (ret)
return ret;
+ /* Try to read chip_id register. It must return 0x90. */
+ ret = regmap_read(data->regmap, BMA400_CHIP_ID_REG, &val);
+ if (ret) {
+ dev_err(data->dev, "Failed to read chip id register\n");
+ return ret;
+ }
+
+ if (val != BMA400_ID_REG_VAL) {
+ dev_err(data->dev, "Chip ID mismatch\n");
+ return -ENODEV;
+ }
+
ret = bma400_get_power_mode(data);
if (ret) {
dev_err(data->dev, "Failed to get the initial power-mode\n");
--
2.38.1
This is a note to let you know that I've just added the patch titled
iio: trigger: sysfs: fix possible memory leak in
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From efa17e90e1711bdb084e3954fa44afb6647331c0 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang(a)huawei.com>
Date: Sat, 22 Oct 2022 15:42:12 +0800
Subject: iio: trigger: sysfs: fix possible memory leak in
iio_sysfs_trig_init()
dev_set_name() allocates memory for name, it need be freed
when device_add() fails, call put_device() to give up the
reference that hold in device_initialize(), so that it can
be freed in kobject_cleanup() when the refcount hit to 0.
Fault injection test can trigger this:
unreferenced object 0xffff8e8340a7b4c0 (size 32):
comm "modprobe", pid 243, jiffies 4294678145 (age 48.845s)
hex dump (first 32 bytes):
69 69 6f 5f 73 79 73 66 73 5f 74 72 69 67 67 65 iio_sysfs_trigge
72 00 a7 40 83 8e ff ff 00 86 13 c4 f6 ee ff ff r..@............
backtrace:
[<0000000074999de8>] __kmem_cache_alloc_node+0x1e9/0x360
[<00000000497fd30b>] __kmalloc_node_track_caller+0x44/0x1a0
[<000000003636c520>] kstrdup+0x2d/0x60
[<0000000032f84da2>] kobject_set_name_vargs+0x1e/0x90
[<0000000092efe493>] dev_set_name+0x4e/0x70
Fixes: 1f785681a870 ("staging:iio:trigger sysfs userspace trigger rework.")
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
Cc: <Stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20221022074212.1386424-1-yangyingliang@huawei.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/trigger/iio-trig-sysfs.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/iio/trigger/iio-trig-sysfs.c b/drivers/iio/trigger/iio-trig-sysfs.c
index d6c5e9644738..6b05eed41612 100644
--- a/drivers/iio/trigger/iio-trig-sysfs.c
+++ b/drivers/iio/trigger/iio-trig-sysfs.c
@@ -203,9 +203,13 @@ static int iio_sysfs_trigger_remove(int id)
static int __init iio_sysfs_trig_init(void)
{
+ int ret;
device_initialize(&iio_sysfs_trig_dev);
dev_set_name(&iio_sysfs_trig_dev, "iio_sysfs_trigger");
- return device_add(&iio_sysfs_trig_dev);
+ ret = device_add(&iio_sysfs_trig_dev);
+ if (ret)
+ put_device(&iio_sysfs_trig_dev);
+ return ret;
}
module_init(iio_sysfs_trig_init);
--
2.38.1
--
Die Summe von 500.000,00 € wurde Ihnen von STEFANO PESSINA gespendet.
Bitte kontaktieren Sie uns für weitere Informationen über
stefanopessia755(a)hotmail.com
Since commit 1da52815d5f1 ("binder: fix alloc->vma_vm_mm null-ptr
dereference") binder caches a pointer to the current->mm during open().
This fixes a null-ptr dereference reported by syzkaller. Unfortunately,
it also opens the door for a process to update its mm after the open(),
(e.g. via execve) making the cached alloc->mm pointer invalid.
Things get worse when the process continues to mmap() a vma. From this
point forward, binder will attempt to find this vma using an obsolete
alloc->mm reference. Such as in binder_update_page_range(), where the
wrong vma is obtained via vma_lookup(), yet binder proceeds to happily
insert new pages into it.
To avoid this issue fail the ->mmap() callback if we detect a mismatch
between the vma->vm_mm and the original alloc->mm pointer. This prevents
alloc->vm_addr from getting set, so that any subsequent vma_lookup()
calls fail as expected.
Fixes: 1da52815d5f1 ("binder: fix alloc->vma_vm_mm null-ptr dereference")
Reported-by: Jann Horn <jannh(a)google.com>
Cc: <stable(a)vger.kernel.org> # 5.15+
Signed-off-by: Carlos Llamas <cmllamas(a)google.com>
---
drivers/android/binder_alloc.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index 1c39cfce32fa..4ad42b0f75cd 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -739,6 +739,12 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc,
const char *failure_string;
struct binder_buffer *buffer;
+ if (unlikely(vma->vm_mm != alloc->mm)) {
+ ret = -EINVAL;
+ failure_string = "invalid vma->vm_mm";
+ goto err_invalid_mm;
+ }
+
mutex_lock(&binder_alloc_mmap_lock);
if (alloc->buffer_size) {
ret = -EBUSY;
@@ -785,6 +791,7 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc,
alloc->buffer_size = 0;
err_already_mapped:
mutex_unlock(&binder_alloc_mmap_lock);
+err_invalid_mm:
binder_alloc_debug(BINDER_DEBUG_USER_ERROR,
"%s: %d %lx-%lx %s failed %d\n", __func__,
alloc->pid, vma->vm_start, vma->vm_end,
--
2.38.1.431.g37b22c650d-goog
On the way to fixing and regression testing Jonathan's report of CXL
region creation failure on a single-port host bridge configuration [1],
several other fixes fell out. Details in the individual commits, but the
fixes mostly revolve around leaked references and other bugs in the
region creation failure case. All but the last fix are tagged for
-stable. The final fix is cosmetic, but leaving it unfixed gives the
appearance of another memory leak condition.
Lastly, the problematic configuration is added to cxl_test to allow for
regression testing it going forward.
[1]: http://lore.kernel.org/r/20221010172057.00001559@huawei.com
---
Dan Williams (7):
cxl/region: Fix region HPA ordering validation
cxl/region: Fix cxl_region leak, cleanup targets at region delete
cxl/pmem: Fix cxl_pmem_region and cxl_memdev leak
tools/testing/cxl: Fix some error exits
tools/testing/cxl: Add a single-port host-bridge regression config
cxl/region: Fix 'distance' calculation with passthrough ports
cxl/region: Recycle region ids
drivers/cxl/core/pmem.c | 2
drivers/cxl/core/port.c | 11 +-
drivers/cxl/core/region.c | 43 ++++++
drivers/cxl/cxl.h | 4 -
drivers/cxl/pmem.c | 100 +++++++++-----
tools/testing/cxl/test/cxl.c | 301 +++++++++++++++++++++++++++++++++++++++---
6 files changed, 400 insertions(+), 61 deletions(-)
base-commit: 4f1aa35f1fb7d51b125487c835982af792697ecb
The patch titled
Subject: nilfs2: fix use-after-free bug of ns_writer on remount
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
nilfs2-fix-use-after-free-bug-of-ns_writer-on-remount.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Subject: nilfs2: fix use-after-free bug of ns_writer on remount
Date: Fri, 4 Nov 2022 23:29:59 +0900
If a nilfs2 filesystem is downgraded to read-only due to metadata
corruption on disk and is remounted read/write, or if emergency read-only
remount is performed, detaching a log writer and synchronizing the
filesystem can be done at the same time.
In these cases, use-after-free of the log writer (hereinafter
nilfs->ns_writer) can happen as shown in the scenario below:
Task1 Task2
-------------------------------- ------------------------------
nilfs_construct_segment
nilfs_segctor_sync
init_wait
init_waitqueue_entry
add_wait_queue
schedule
nilfs_remount (R/W remount case)
nilfs_attach_log_writer
nilfs_detach_log_writer
nilfs_segctor_destroy
kfree
finish_wait
_raw_spin_lock_irqsave
__raw_spin_lock_irqsave
do_raw_spin_lock
debug_spin_lock_before <-- use-after-free
While Task1 is sleeping, nilfs->ns_writer is freed by Task2. After Task1
waked up, Task1 accesses nilfs->ns_writer which is already freed. This
scenario diagram is based on the Shigeru Yoshida's post [1].
This patch fixes the issue by not detaching nilfs->ns_writer on remount so
that this UAF race doesn't happen. Along with this change, this patch
also inserts a few necessary read-only checks with superblock instance
where only the ns_writer pointer was used to check if the filesystem is
read-only.
Link: https://syzkaller.appspot.com/bug?id=79a4c002e960419ca173d55e863bd09e8112df…
Link: https://lkml.kernel.org/r/20221103141759.1836312-1-syoshida@redhat.com [1]
Link: https://lkml.kernel.org/r/20221104142959.28296-1-konishi.ryusuke@gmail.com
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Reported-by: syzbot+f816fa82f8783f7a02bb(a)syzkaller.appspotmail.com
Reported-by: Shigeru Yoshida <syoshida(a)redhat.com>
Tested-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/nilfs2/segment.c | 15 ++++++++-------
fs/nilfs2/super.c | 2 --
2 files changed, 8 insertions(+), 9 deletions(-)
--- a/fs/nilfs2/segment.c~nilfs2-fix-use-after-free-bug-of-ns_writer-on-remount
+++ a/fs/nilfs2/segment.c
@@ -317,7 +317,7 @@ void nilfs_relax_pressure_in_lock(struct
struct the_nilfs *nilfs = sb->s_fs_info;
struct nilfs_sc_info *sci = nilfs->ns_writer;
- if (!sci || !sci->sc_flush_request)
+ if (sb_rdonly(sb) || unlikely(!sci) || !sci->sc_flush_request)
return;
set_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags);
@@ -2242,7 +2242,7 @@ int nilfs_construct_segment(struct super
struct nilfs_sc_info *sci = nilfs->ns_writer;
struct nilfs_transaction_info *ti;
- if (!sci)
+ if (sb_rdonly(sb) || unlikely(!sci))
return -EROFS;
/* A call inside transactions causes a deadlock. */
@@ -2280,7 +2280,7 @@ int nilfs_construct_dsync_segment(struct
struct nilfs_transaction_info ti;
int err = 0;
- if (!sci)
+ if (sb_rdonly(sb) || unlikely(!sci))
return -EROFS;
nilfs_transaction_lock(sb, &ti, 0);
@@ -2776,11 +2776,12 @@ int nilfs_attach_log_writer(struct super
if (nilfs->ns_writer) {
/*
- * This happens if the filesystem was remounted
- * read/write after nilfs_error degenerated it into a
- * read-only mount.
+ * This happens if the filesystem is made read-only by
+ * __nilfs_error or nilfs_remount and then remounted
+ * read/write. In these cases, reuse the existing
+ * writer.
*/
- nilfs_detach_log_writer(sb);
+ return 0;
}
nilfs->ns_writer = nilfs_segctor_new(sb, root);
--- a/fs/nilfs2/super.c~nilfs2-fix-use-after-free-bug-of-ns_writer-on-remount
+++ a/fs/nilfs2/super.c
@@ -1133,8 +1133,6 @@ static int nilfs_remount(struct super_bl
if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
goto out;
if (*flags & SB_RDONLY) {
- /* Shutting down log writer */
- nilfs_detach_log_writer(sb);
sb->s_flags |= SB_RDONLY;
/*
_
Patches currently in -mm which might be from konishi.ryusuke(a)gmail.com are
nilfs2-fix-deadlock-in-nilfs_count_free_blocks.patch
nilfs2-fix-use-after-free-bug-of-ns_writer-on-remount.patch
nilfs2-fix-shift-out-of-bounds-overflow-in-nilfs_sb2_bad_offset.patch
nilfs2-fix-shift-out-of-bounds-due-to-too-large-exponent-of-block-size.patch
In commit 720c24192404 ("ANDROID: binder: change down_write to
down_read") binder assumed the mmap read lock is sufficient to protect
alloc->vma inside binder_update_page_range(). This used to be accurate
until commit dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in
munmap"), which now downgrades the mmap_lock after detaching the vma
from the rbtree in munmap(). Then it proceeds to teardown and free the
vma with only the read lock held.
This means that accesses to alloc->vma in binder_update_page_range() now
will race with vm_area_free() in munmap() and can cause a UAF as shown
in the following KASAN trace:
==================================================================
BUG: KASAN: use-after-free in vm_insert_page+0x7c/0x1f0
Read of size 8 at addr ffff16204ad00600 by task server/558
CPU: 3 PID: 558 Comm: server Not tainted 5.10.150-00001-gdc8dcf942daa #1
Hardware name: linux,dummy-virt (DT)
Call trace:
dump_backtrace+0x0/0x2a0
show_stack+0x18/0x2c
dump_stack+0xf8/0x164
print_address_description.constprop.0+0x9c/0x538
kasan_report+0x120/0x200
__asan_load8+0xa0/0xc4
vm_insert_page+0x7c/0x1f0
binder_update_page_range+0x278/0x50c
binder_alloc_new_buf+0x3f0/0xba0
binder_transaction+0x64c/0x3040
binder_thread_write+0x924/0x2020
binder_ioctl+0x1610/0x2e5c
__arm64_sys_ioctl+0xd4/0x120
el0_svc_common.constprop.0+0xac/0x270
do_el0_svc+0x38/0xa0
el0_svc+0x1c/0x2c
el0_sync_handler+0xe8/0x114
el0_sync+0x180/0x1c0
Allocated by task 559:
kasan_save_stack+0x38/0x6c
__kasan_kmalloc.constprop.0+0xe4/0xf0
kasan_slab_alloc+0x18/0x2c
kmem_cache_alloc+0x1b0/0x2d0
vm_area_alloc+0x28/0x94
mmap_region+0x378/0x920
do_mmap+0x3f0/0x600
vm_mmap_pgoff+0x150/0x17c
ksys_mmap_pgoff+0x284/0x2dc
__arm64_sys_mmap+0x84/0xa4
el0_svc_common.constprop.0+0xac/0x270
do_el0_svc+0x38/0xa0
el0_svc+0x1c/0x2c
el0_sync_handler+0xe8/0x114
el0_sync+0x180/0x1c0
Freed by task 560:
kasan_save_stack+0x38/0x6c
kasan_set_track+0x28/0x40
kasan_set_free_info+0x24/0x4c
__kasan_slab_free+0x100/0x164
kasan_slab_free+0x14/0x20
kmem_cache_free+0xc4/0x34c
vm_area_free+0x1c/0x2c
remove_vma+0x7c/0x94
__do_munmap+0x358/0x710
__vm_munmap+0xbc/0x130
__arm64_sys_munmap+0x4c/0x64
el0_svc_common.constprop.0+0xac/0x270
do_el0_svc+0x38/0xa0
el0_svc+0x1c/0x2c
el0_sync_handler+0xe8/0x114
el0_sync+0x180/0x1c0
[...]
==================================================================
To prevent the race above, revert back to taking the mmap write lock
inside binder_update_page_range(). One might expect an increase of mmap
lock contention. However, binder already serializes these calls via top
level alloc->mutex. Also, there was no performance impact shown when
running the binder benchmark tests.
Note this patch is specific to stable branches 5.4 and 5.10. Since in
newer kernel releases binder no longer caches a pointer to the vma.
Instead, it has been refactored to use vma_lookup() which avoids the
issue described here. This switch was introduced in commit a43cfc87caaf
("android: binder: stop saving a pointer to the VMA").
Fixes: dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap")
Reported-by: Jann Horn <jannh(a)google.com>
Cc: <stable(a)vger.kernel.org> # 5.4.x
Cc: Minchan Kim <minchan(a)kernel.org>
Cc: Yang Shi <yang.shi(a)linux.alibaba.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Signed-off-by: Carlos Llamas <cmllamas(a)google.com>
---
drivers/android/binder_alloc.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index b5022a7f6bae..7e48ed7c9c8e 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -212,7 +212,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate,
mm = alloc->vma_vm_mm;
if (mm) {
- down_read(&mm->mmap_sem);
+ down_write(&mm->mmap_sem);
vma = alloc->vma;
}
@@ -271,7 +271,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate,
/* vm_insert_page does not seem to increment the refcount */
}
if (mm) {
- up_read(&mm->mmap_sem);
+ up_write(&mm->mmap_sem);
mmput(mm);
}
return 0;
@@ -304,7 +304,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate,
}
err_no_vma:
if (mm) {
- up_read(&mm->mmap_sem);
+ up_write(&mm->mmap_sem);
mmput(mm);
}
return vma ? -ENOMEM : -ESRCH;
--
2.38.1.431.g37b22c650d-goog
This is the start of the stable review cycle for the 4.19.264 release.
There are 78 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Fri, 04 Nov 2022 02:20:38 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.19.264-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.19.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.19.264-rc1
Biju Das <biju.das.jz(a)bp.renesas.com>
can: rcar_canfd: rcar_canfd_handle_global_receive(): fix IRQ storm on global FIFO receive
Hyong Youb Kim <hyonkim(a)cisco.com>
net/mlx5e: Do not increment ESN when updating IPsec ESN state
Yang Yingliang <yangyingliang(a)huawei.com>
net: ehea: fix possible memory leak in ehea_register_port()
Aaron Conole <aconole(a)redhat.com>
openvswitch: switch from WARN to pr_warn
Takashi Iwai <tiwai(a)suse.de>
ALSA: aoa: Fix I2S device accounting
Yang Yingliang <yangyingliang(a)huawei.com>
ALSA: aoa: i2sbus: fix possible memory leak in i2sbus_add_dev()
Sudeep Holla <sudeep.holla(a)arm.com>
PM: domains: Fix handling of unavailable/disabled idle states
Yang Yingliang <yangyingliang(a)huawei.com>
net: ksz884x: fix missing pci_disable_device() on error in pcidev_init()
Slawomir Laba <slawomirx.laba(a)intel.com>
i40e: Fix flow-type by setting GL_HASH_INSET registers
Sylwester Dziedziuch <sylwesterx.dziedziuch(a)intel.com>
i40e: Fix VF hang when reset is triggered on another VF
Slawomir Laba <slawomirx.laba(a)intel.com>
i40e: Fix ethtool rx-flow-hash setting for X722
Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
media: videodev2.h: V4L2_DV_BT_BLANKING_HEIGHT should check 'interlaced'
Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
media: v4l2-dv-timings: add sanity checks for blanking values
Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
media: vivid: dev->bitmap_cap wasn't freed in all cases
Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
media: vivid: s_fbuf: add more sanity checks
Mario Limonciello <mario.limonciello(a)amd.com>
PM: hibernate: Allow hybrid sleep to work with s2idle
Dongliang Mu <dzm91(a)hust.edu.cn>
can: mscan: mpc5xxx: mpc5xxx_can_probe(): add missing put_clock() in error path
Neal Cardwell <ncardwell(a)google.com>
tcp: fix indefinite deferral of RTO with SACK reneging
Zhang Changzhong <zhangchangzhong(a)huawei.com>
net: lantiq_etop: don't free skb when returning NETDEV_TX_BUSY
Zhengchao Shao <shaozhengchao(a)huawei.com>
net: fix UAF issue in nfqnl_nf_hook_drop() when ops_init() failed
Eric Dumazet <edumazet(a)google.com>
kcm: annotate data-races around kcm->rx_wait
Eric Dumazet <edumazet(a)google.com>
kcm: annotate data-races around kcm->rx_psock
Raju Rangoju <Raju.Rangoju(a)amd.com>
amd-xgbe: add the bit rate quirk for Molex cables
Raju Rangoju <Raju.Rangoju(a)amd.com>
amd-xgbe: fix the SFP compliance codes check for DAC cables
Chen Zhongjin <chenzhongjin(a)huawei.com>
x86/unwind/orc: Fix unreliable stack dump with gcov
Yang Yingliang <yangyingliang(a)huawei.com>
net: netsec: fix error handling in netsec_register_mdio()
Xin Long <lucien.xin(a)gmail.com>
tipc: fix a null-ptr-deref in tipc_topsrv_accept
Yang Yingliang <yangyingliang(a)huawei.com>
ALSA: ac97: fix possible memory leak in snd_ac97_dev_register()
Randy Dunlap <rdunlap(a)infradead.org>
arc: iounmap() arg is volatile
Nathan Huckleberry <nhuck(a)google.com>
drm/msm: Fix return type of mdp4_lvds_connector_mode_valid
Wei Yongjun <weiyongjun1(a)huawei.com>
net: ieee802154: fix error return code in dgram_bind()
Rik van Riel <riel(a)surriel.com>
mm,hugetlb: take hugetlb_lock before decrementing h->resv_huge_pages
M. Vefa Bicakci <m.v.b(a)runbox.com>
xen/gntdev: Prevent leaking grants
Jan Beulich <jbeulich(a)suse.com>
Xen/gntdev: don't ignore kernel unmapping error
Heiko Carstens <hca(a)linux.ibm.com>
s390/futex: add missing EX_TABLE entry to __futex_atomic_op()
Adrian Hunter <adrian.hunter(a)intel.com>
perf auxtrace: Fix address filter symbol name match for modules
Christian A. Ehrhardt <lk(a)c--e.de>
kernfs: fix use-after-free in __kernfs_remove
Matthew Ma <mahongwei(a)zeku.com>
mmc: core: Fix kernel panic when remove non-standard SDIO card
Johan Hovold <johan+linaro(a)kernel.org>
drm/msm/hdmi: fix memory corruption with too many bridges
Johan Hovold <johan+linaro(a)kernel.org>
drm/msm/dsi: fix memory corruption with too many bridges
Miquel Raynal <miquel.raynal(a)bootlin.com>
mac802154: Fix LQI recording
Hyunwoo Kim <imv4bel(a)gmail.com>
fbdev: smscufx: Fix several use-after-free bugs
Shreeya Patel <shreeya.patel(a)collabora.com>
iio: light: tsl2583: Fix module unloading
Matti Vaittinen <mazziesaccount(a)gmail.com>
tools: iio: iio_utils: fix digit calculation
Mathias Nyman <mathias.nyman(a)linux.intel.com>
xhci: Remove device endpoints from bandwidth list when freeing the device
Jens Glathe <jens.glathe(a)oldschoolsolutions.biz>
usb: xhci: add XHCI_SPURIOUS_SUCCESS to ASM1042 despite being a V0.96 controller
Justin Chen <justinpopo6(a)gmail.com>
usb: bdc: change state when port disconnected
Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
usb: dwc3: gadget: Don't set IMI for no_interrupt
Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
usb: dwc3: gadget: Stop processing more requests on IMI
Hannu Hartikainen <hannu(a)hrtk.in>
USB: add RESET_RESUME quirk for NVIDIA Jetson devices in RCM
Jason A. Donenfeld <Jason(a)zx2c4.com>
ALSA: au88x0: use explicitly signed char
Steven Rostedt (Google) <rostedt(a)goodmis.org>
ALSA: Use del_timer_sync() before freeing timer
Anssi Hannula <anssi.hannula(a)bitwise.fi>
can: kvaser_usb: Fix possible completions during init_completion
Seth Jenkins <sethjenkins(a)google.com>
mm: /proc/pid/smaps_rollup: fix no vma's null-deref
Gaurav Kohli <gauravkohli(a)linux.microsoft.com>
hv_netvsc: Fix race between VF offering and VF association message from host
Nick Desaulniers <ndesaulniers(a)google.com>
Makefile.debug: re-enable debug info for .S files
Werner Sembach <wse(a)tuxedocomputers.com>
ACPI: video: Force backlight native for more TongFang devices
Chen-Yu Tsai <wenst(a)chromium.org>
media: v4l2-mem2mem: Apply DST_QUEUE_OFF_BASE on MMAP buffers across ioctls
Jerry Snitselaar <jsnitsel(a)redhat.com>
iommu/vt-d: Clean up si_domain in the init_dmars() error path
Yang Yingliang <yangyingliang(a)huawei.com>
net: hns: fix possible memory leak in hnae_ae_register()
Zhengchao Shao <shaozhengchao(a)huawei.com>
net: sched: cake: fix null pointer access issue when cake_init() fails
Xiaobo Liu <cppcoffee(a)gmail.com>
net/atm: fix proc_mpc_write incorrect return value
José Expósito <jose.exposito89(a)gmail.com>
HID: magicmouse: Do not set BTN_MOUSE on double report
Alexander Potapenko <glider(a)google.com>
tipc: fix an information leak in tipc_topsrv_kern_subscr
Mark Tomlinson <mark.tomlinson(a)alliedtelesis.co.nz>
tipc: Fix recognition of trial period
Tony Luck <tony.luck(a)intel.com>
ACPI: extlog: Handle multiple records
Filipe Manana <fdmanana(a)suse.com>
btrfs: fix processing of delayed tree block refs during backref walking
Filipe Manana <fdmanana(a)suse.com>
btrfs: fix processing of delayed data refs during backref walking
Jean-Francois Le Fillatre <jflf_kernel(a)gmx.com>
r8152: add PID for the Lenovo OneLink+ Dock
James Morse <james.morse(a)arm.com>
arm64: errata: Remove AES hwcap for COMPAT tasks
Bryan O'Donoghue <bryan.odonoghue(a)linaro.org>
media: venus: dec: Handle the case where find_format fails
Eric Ren <renzhengeek(a)gmail.com>
KVM: arm64: vgic: Fix exit condition in scan_its_table()
Kai-Heng Feng <kai.heng.feng(a)canonical.com>
ata: ahci: Match EM_MAX_SLOTS with SATA_PMP_MAX_PORTS
Alexander Stein <alexander.stein(a)ew.tq-group.com>
ata: ahci-imx: Fix MODULE_ALIAS
Zhang Rui <rui.zhang(a)intel.com>
hwmon/coretemp: Handle large core ID value
Borislav Petkov <bp(a)suse.de>
x86/microcode/AMD: Apply the patch early on every logical thread
Joseph Qi <joseph.qi(a)linux.alibaba.com>
ocfs2: fix BUG when iput after ocfs2_mknod fails
Joseph Qi <joseph.qi(a)linux.alibaba.com>
ocfs2: clear dinode links count in case of error
-------------
Diffstat:
Documentation/arm64/silicon-errata.txt | 2 +
Makefile | 8 +-
arch/arc/include/asm/io.h | 2 +-
arch/arc/mm/ioremap.c | 2 +-
arch/arm64/Kconfig | 16 ++++
arch/arm64/include/asm/cpucaps.h | 3 +-
arch/arm64/kernel/cpu_errata.c | 17 ++++
arch/arm64/kernel/cpufeature.c | 13 ++-
arch/s390/include/asm/futex.h | 3 +-
arch/x86/kernel/cpu/microcode/amd.c | 16 +++-
arch/x86/kernel/unwind_orc.c | 2 +-
drivers/acpi/acpi_extlog.c | 33 ++++---
drivers/acpi/video_detect.c | 64 +++++++++++++
drivers/ata/ahci.h | 2 +-
drivers/ata/ahci_imx.c | 2 +-
drivers/base/power/domain.c | 4 +
.../gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c | 5 +-
drivers/gpu/drm/msm/dsi/dsi.c | 6 ++
drivers/gpu/drm/msm/hdmi/hdmi.c | 5 ++
drivers/hid/hid-magicmouse.c | 2 +-
drivers/hwmon/coretemp.c | 56 ++++++++----
drivers/iio/light/tsl2583.c | 2 +-
drivers/iommu/intel-iommu.c | 5 ++
drivers/media/platform/qcom/venus/vdec.c | 2 +
drivers/media/platform/vivid/vivid-core.c | 22 +++++
drivers/media/platform/vivid/vivid-core.h | 2 +
drivers/media/platform/vivid/vivid-vid-cap.c | 27 ++++--
drivers/media/v4l2-core/v4l2-dv-timings.c | 14 +++
drivers/media/v4l2-core/v4l2-mem2mem.c | 62 +++++++++----
drivers/mmc/core/sdio_bus.c | 3 +-
drivers/net/can/mscan/mpc5xxx_can.c | 8 +-
drivers/net/can/rcar/rcar_canfd.c | 6 +-
drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c | 4 +-
drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c | 4 +-
drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 17 ++--
drivers/net/ethernet/hisilicon/hns/hnae.c | 4 +-
drivers/net/ethernet/ibm/ehea/ehea_main.c | 1 +
drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 100 ++++++++++++---------
drivers/net/ethernet/intel/i40e/i40e_type.h | 4 +
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 43 ++++++---
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h | 1 +
drivers/net/ethernet/lantiq_etop.c | 1 -
.../ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 3 -
drivers/net/ethernet/micrel/ksz884x.c | 2 +-
drivers/net/ethernet/socionext/netsec.c | 2 +
drivers/net/hyperv/hyperv_net.h | 3 +
drivers/net/hyperv/netvsc.c | 4 +
drivers/net/hyperv/netvsc_drv.c | 20 +++++
drivers/net/usb/cdc_ether.c | 7 ++
drivers/net/usb/r8152.c | 1 +
drivers/usb/core/quirks.c | 9 ++
drivers/usb/dwc3/gadget.c | 8 +-
drivers/usb/gadget/udc/bdc/bdc_udc.c | 1 +
drivers/usb/host/xhci-mem.c | 20 +++--
drivers/usb/host/xhci-pci.c | 8 +-
drivers/video/fbdev/smscufx.c | 55 ++++++------
drivers/xen/gntdev.c | 30 +++++--
fs/btrfs/backref.c | 46 ++++++----
fs/kernfs/dir.c | 5 +-
fs/ocfs2/namei.c | 23 +++--
fs/proc/task_mmu.c | 2 +-
include/uapi/linux/videodev2.h | 3 +-
kernel/power/hibernate.c | 2 +-
mm/hugetlb.c | 2 +-
net/atm/mpoa_proc.c | 3 +-
net/core/net_namespace.c | 7 ++
net/ieee802154/socket.c | 4 +-
net/ipv4/tcp_input.c | 3 +-
net/kcm/kcmsock.c | 23 +++--
net/mac802154/rx.c | 5 +-
net/openvswitch/datapath.c | 3 +-
net/sched/sch_cake.c | 4 +
net/tipc/discover.c | 2 +-
net/tipc/topsrv.c | 18 ++--
sound/aoa/soundbus/i2sbus/core.c | 7 +-
sound/pci/ac97/ac97_codec.c | 1 +
sound/pci/au88x0/au88x0.h | 6 +-
sound/pci/au88x0/au88x0_core.c | 2 +-
sound/synth/emux/emux.c | 7 +-
tools/iio/iio_utils.c | 4 +
tools/perf/util/auxtrace.c | 10 ++-
virt/kvm/arm/vgic/vgic-its.c | 5 +-
82 files changed, 718 insertions(+), 247 deletions(-)
Dear ,
Please can I have your attention and possibly help me for humanity's
sake please. I am writing this message with a heavy heart filled with
sorrows and sadness.
Please if you can respond, i have an issue that i will be most
grateful if you could help me deal with it please.
Susan
Dearly Beloved In Christ,
Please forgive me for stressing you with my predicaments as I directly
believe that you will be honest to fulfill my final wish before i die.
I am Mrs.Sophia Erick, and i was Diagnosed with Cancer about 2 years
ago, before i go for a surgery i have to do this by helping the
Orphanages home, Motherless babies home, less privileged and disable
citizens and widows around the world,
So If you are interested to fulfill my final wish by using the sum of
Eleven Million Dollars, to help them as I mentioned, kindly get back
to me for more information on how the fund will be transferred to your
account.
Warm Regards,
Sincerely Mrs. Sophia Erick.
From: Oliver Hartkopp <socketcan(a)hartkopp.net>
The read access to struct canxl_frame::len inside of a j1939 created
skbuff revealed a missing initialization of reserved and later filled
elements in struct can_frame.
This patch initializes the 8 byte CAN header with zero.
Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
Cc: Oleksij Rempel <o.rempel(a)pengutronix.de>
Link: https://lore.kernel.org/linux-can/20221104052235.GA6474@pengutronix.de
Reported-by: syzbot+d168ec0caca4697e03b1(a)syzkaller.appspotmail.com
Signed-off-by: Oliver Hartkopp <socketcan(a)hartkopp.net>
Link: https://lore.kernel.org/all/20221104075000.105414-1-socketcan@hartkopp.net
Cc: stable(a)vger.kernel.org
Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de>
---
net/can/j1939/main.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
index 144c86b0e3ff..821d4ff303b3 100644
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -336,6 +336,9 @@ int j1939_send_one(struct j1939_priv *priv, struct sk_buff *skb)
/* re-claim the CAN_HDR from the SKB */
cf = skb_push(skb, J1939_CAN_HDR);
+ /* initialize header structure */
+ memset(cf, 0, J1939_CAN_HDR);
+
/* make it a full can frame again */
skb_put(skb, J1939_CAN_FTR + (8 - dlc));
--
2.35.1
On 10/16/22 19:21, Bagas Sanjaya wrote:
> On 10/16/22 03:59, Phillip Lougher wrote:
>>
>> Which identified the "squashfs: support reading fragments in readahead call"
>> patch.
>>
>> There is a race-condition introduced in that patch, which involves cache
>> releasing and reuse.
>>
>> The following diff will fix that race-condition. It would be great if
>> someone could test and verify before sending it out as a patch.
>>
>> Thanks
>>
>> Phillip
>>
>> diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
>> index e56510964b22..6cc23178e9ad 100644
>> --- a/fs/squashfs/file.c
>> +++ b/fs/squashfs/file.c
>> @@ -506,8 +506,9 @@ static int squashfs_readahead_fragment(struct page **page,
>> squashfs_i(inode)->fragment_size);
>> struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
>> unsigned int n, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
>> + int error = buffer->error;
>>
>> - if (buffer->error)
>> + if (error)
>> goto out;
>>
>> expected += squashfs_i(inode)->fragment_offset;
>> @@ -529,7 +530,7 @@ static int squashfs_readahead_fragment(struct page **page,
>>
>> out:
>> squashfs_cache_put(buffer);
>> - return buffer->error;
>> + return error;
>> }
>>
>> static void squashfs_readahead(struct readahead_control *ractl)
>>
>
> No Verneed warnings so far. However, I need to test for a longer time
> (a day) to check if any warnings are reported.
>
> Thanks.
>
Also, since this regression is also found on linux-6.0.y stable branch,
don't forget to Cc stable list.
Thanks.
--
An old man doll... just what I always wanted! - Clara
#regzbot ^introduced 2139619bcad7ac44cc8f6f749089120594056613
Over at https://lore.kernel.org/linux-riscv/Yz80ewHKTPI5Rvuz@spud/T/#ebde47064434d4… it is reported that 2139619bcad7ac44cc8f6f749089120594056613 regresses userspace (openjdk) on riscv64.
This commit has already been released in v6.0 kernel upstream, but has also been included in the stable patch series all the way back to v4.19.y
There is a proposed fix for this at https://lore.kernel.org/linux-riscv/20220915193702.2201018-1-abrestic@rivos… which has not yet been merged upstream or in stable series.
Please review and merge above proposed fix, or please revert 2139619bcad7ac44cc8f6f749089120594056613 to stop the regression spreading to all the distributions.
In Ubuntu this regression will be tracked as https://bugs.launchpad.net/bugs/+bug/1992484
-------- Forwarded Message --------
Subject: Re: [PATCH] riscv: mmap with PROT_WRITE but no PROT_READ is invalid
Date: Thu, 6 Oct 2022 22:20:02 +0300
From: Eva Kotova <nyandarknessgirl(a)gmail.com>
Reply-To: PH7PR14MB559464DBDD310E755F5B21E8CEDC9(a)PH7PR14MB5594.namprd14.prod.outlook.com
To: coelacanthus(a)outlook.com
CC: c141028(a)gmail.com, dramforever(a)live.com, linux-riscv(a)lists.infradead.org, palmer(a)dabbelt.com, xc-tan(a)outlook.com
On Tue, 31 May 2022 00:56:52 PDT (-0700), coelacanthus(a)outlook.com wrote:
> As mentioned in Table 4.5 in RISC-V spec Volume 2 Section 4.3, write
> but not read is "Reserved for future use.". For now, they are not valid.
> In the current code, -wx is marked as invalid, but -w- is not marked
> as invalid.
This patch breaks OpenJDK/Java on RISC-V, as it tries to create a w-only
protective page:
#
# There is insufficient memory for the Java Runtime Environment to continue.
# Native memory allocation (mmap) failed to map 4096 bytes for failed to
allocate memory for PaX check.
# An error report file with more information is saved as:
# /root/hs_err_pid107.log
I bisected to this commit since on Linux 5.19+ java no longer works.
Perhaps some fallback should be implemented, to prevent userspace
breakage. It is currently documented, that at least on i386 PROT_WRITE
mappings imply PROT_READ (See man mmap(2) NOTES), this would be a good
place to start.
Best regards,
Eva
_______________________________________________
linux-riscv mailing list
linux-riscv(a)lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
On Wed, 26 Oct 2022 20:24:38 +0900 NARIBAYASHI Akira <a.naribayashi(a)fujitsu.com> wrote:
> Depending on the memory configuration, isolate_freepages_block() may
> scan pages out of the target range and causes panic.
>
> The problem is that pfn as argument of fast_isolate_around() could
> be out of the target range. Therefore we should consider the case
> where pfn < start_pfn, and also the case where end_pfn < pfn.
>
> This problem should have been addressd by the commit 6e2b7044c199
> ("mm, compaction: make fast_isolate_freepages() stay within zone")
> but there was an oversight.
>
> Case1: pfn < start_pfn
>
> <at memory compaction for node Y>
> | node X's zone | node Y's zone
> +-----------------+------------------------------...
> pageblock ^ ^ ^
> +-----------+-----------+-----------+-----------+...
> ^ ^ ^
> ^ ^ end_pfn
> ^ start_pfn = cc->zone->zone_start_pfn
> pfn
> <---------> scanned range by "Scan After"
>
> Case2: end_pfn < pfn
>
> <at memory compaction for node X>
> | node X's zone | node Y's zone
> +-----------------+------------------------------...
> pageblock ^ ^ ^
> +-----------+-----------+-----------+-----------+...
> ^ ^ ^
> ^ ^ pfn
> ^ end_pfn
> start_pfn
> <---------> scanned range by "Scan Before"
>
> It seems that there is no good reason to skip nr_isolated pages
> just after given pfn. So let perform simple scan from start to end
> instead of dividing the scan into "Before" and "After".
Under what circumstances will this panic occur? I assume those
circumstnces are pretty rare, give that 6e2b7044c1992 was nearly two
years ago.
Did you consider the desirability of backporting this fix into earlier
kernels?
A problem about modprobe ingenic-drm failed is triggered with the following
log given:
[ 303.561088] Error: Driver 'ingenic-ipu' is already registered, aborting...
modprobe: ERROR: could not insert 'ingenic_drm': Device or resource busy
The reason is that ingenic_drm_init() returns platform_driver_register()
directly without checking its return value, if platform_driver_register()
failed, it returns without unregistering ingenic_ipu_driver_ptr, resulting
the ingenic-drm can never be installed later.
A simple call graph is shown as below:
ingenic_drm_init()
platform_driver_register() # ingenic_ipu_driver_ptr are registered
platform_driver_register()
driver_register()
bus_add_driver()
priv = kzalloc(...) # OOM happened
# return without unregister ingenic_ipu_driver_ptr
Fixing this problem by checking the return value of
platform_driver_register() and do platform_unregister_drivers() if
error happened.
Fixes: fc1acf317b01 ("drm/ingenic: Add support for the IPU")
Signed-off-by: Yuan Can <yuancan(a)huawei.com>
Cc: stable(a)vger.kernel.org
---
Changes in v2:
- Add missing Cc: stable(a)vger.kernel.org
drivers/gpu/drm/ingenic/ingenic-drm-drv.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c
index ab0515d2c420..4499a04f7c13 100644
--- a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c
+++ b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c
@@ -1629,7 +1629,11 @@ static int ingenic_drm_init(void)
return err;
}
- return platform_driver_register(&ingenic_drm_driver);
+ err = platform_driver_register(&ingenic_drm_driver);
+ if (IS_ENABLED(CONFIG_DRM_INGENIC_IPU) && err)
+ platform_driver_unregister(ingenic_ipu_driver_ptr);
+
+ return err;
}
module_init(ingenic_drm_init);
--
2.17.1
From: xiongxin <xiongxin(a)kylinos.cn>
The actual calculation formula in the code below is:
max_size = (count - (size + PAGES_FOR_IO)) / 2
- 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE);
But function comments are written differently, the comment is wrong?
By the way, what exactly do the "/ 2" and "2 *" mean?
Cc: stable(a)vger.kernel.org
Signed-off-by: xiongxin <xiongxin(a)kylinos.cn>
---
kernel/power/snapshot.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 2a406753af90..c20ca5fb9adc 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1723,8 +1723,8 @@ static unsigned long minimum_image_size(unsigned long saveable)
* /sys/power/reserved_size, respectively). To make this happen, we compute the
* total number of available page frames and allocate at least
*
- * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2
- * + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE)
+ * ([page frames total] - PAGES_FOR_IO - [metadata pages]) / 2
+ * - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE)
*
* of them, which corresponds to the maximum size of a hibernation image.
*
--
2.25.1
From: Jason Gerecke <killertofu(a)gmail.com>
When support was added for devices using an explicit 3rd barrel switch,
the logic used by devices emulating this feature was broken. The 'if'
statement / block that was introduced only handles the case where the
button is pressed (i.e. 'barrelswitch' and 'barrelswitch2' are both set)
but not the case where it is released (i.e. one or both being cleared).
This results in a BTN_STYLUS3 "down" event being sent when the button
is pressed, but no "up" event ever being sent afterwards.
This patch restores the previously-used logic for determining button
states in the emulated case so that switches are reported correctly
again.
Link: https://github.com/linuxwacom/xf86-input-wacom/issues/292
Fixes: 6d09085b38e5 ("HID: wacom: Adding Support for new usages")
CC: stable(a)vger.kernel.org #v5.19+
Signed-off-by: Jason Gerecke <jason.gerecke(a)wacom.com>
Tested-by: Joshua Dickens <joshua.dickens(a)wacom.com>
Reviewed-by: Ping Cheng <ping.cheng(a)wacom.com>
---
drivers/hid/wacom_wac.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 77486962a773f..0f3d57b426846 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -2520,11 +2520,12 @@ static void wacom_wac_pen_report(struct hid_device *hdev,
if (!delay_pen_events(wacom_wac) && wacom_wac->tool[0]) {
int id = wacom_wac->id[0];
- if (wacom_wac->features.quirks & WACOM_QUIRK_PEN_BUTTON3 &&
- wacom_wac->hid_data.barrelswitch & wacom_wac->hid_data.barrelswitch2) {
- wacom_wac->hid_data.barrelswitch = 0;
- wacom_wac->hid_data.barrelswitch2 = 0;
- wacom_wac->hid_data.barrelswitch3 = 1;
+ if (wacom_wac->features.quirks & WACOM_QUIRK_PEN_BUTTON3) {
+ int sw_state = wacom_wac->hid_data.barrelswitch |
+ (wacom_wac->hid_data.barrelswitch2 << 1);
+ wacom_wac->hid_data.barrelswitch = sw_state == 1;
+ wacom_wac->hid_data.barrelswitch2 = sw_state == 2;
+ wacom_wac->hid_data.barrelswitch3 = sw_state == 3;
}
input_report_key(input, BTN_STYLUS, wacom_wac->hid_data.barrelswitch);
input_report_key(input, BTN_STYLUS2, wacom_wac->hid_data.barrelswitch2);
--
2.38.1
From: Luca Ceresoli <luca.ceresoli(a)bootlin.com>
On Tegra20 and Tegra30 the HOST1X clock is a fractional clock divider with
7 integer bits + 1 decimal bit. This has been verified on both
documentation and real hardware for Tegra20 an on the documentation I was
able to find for Tegra30.
However in the kernel code this clock is declared as an integer divider. A
consequence of this is that requesting 144 MHz for HOST1X which is fed by
pll_p running at 216 MHz would result in 108 MHz (216 / 2) instead of 144
MHz (216 / 1.5).
Fix by replacing the INT() macro with the MUX() macro which, despite the
name, defines a fractional divider. The only difference between the two
macros is the former does not have the TEGRA_DIVIDER_INT flag.
Also move the line together with the other MUX*() ones to keep the existing
file organization.
Fixes: 76ebc134d45d ("clk: tegra: move periph clocks to common file")
Cc: stable(a)vger.kernel.org
Cc: Peter De Schrijver <pdeschrijver(a)nvidia.com>
Signed-off-by: Luca Ceresoli <luca.ceresoli(a)bootlin.com>
---
drivers/clk/tegra/clk-tegra-periph.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/clk/tegra/clk-tegra-periph.c b/drivers/clk/tegra/clk-tegra-periph.c
index 4dcf7f7cb8a0..806d835ca0d2 100644
--- a/drivers/clk/tegra/clk-tegra-periph.c
+++ b/drivers/clk/tegra/clk-tegra-periph.c
@@ -615,7 +615,6 @@ static struct tegra_periph_init_data periph_clks[] = {
INT("vde", mux_pllp_pllc_pllm_clkm, CLK_SOURCE_VDE, 61, 0, tegra_clk_vde),
INT("vi", mux_pllm_pllc_pllp_plla, CLK_SOURCE_VI, 20, 0, tegra_clk_vi),
INT("epp", mux_pllm_pllc_pllp_plla, CLK_SOURCE_EPP, 19, 0, tegra_clk_epp),
- INT("host1x", mux_pllm_pllc_pllp_plla, CLK_SOURCE_HOST1X, 28, 0, tegra_clk_host1x),
INT("mpe", mux_pllm_pllc_pllp_plla, CLK_SOURCE_MPE, 60, 0, tegra_clk_mpe),
INT("2d", mux_pllm_pllc_pllp_plla, CLK_SOURCE_2D, 21, 0, tegra_clk_gr2d),
INT("3d", mux_pllm_pllc_pllp_plla, CLK_SOURCE_3D, 24, 0, tegra_clk_gr3d),
@@ -664,6 +663,7 @@ static struct tegra_periph_init_data periph_clks[] = {
MUX("owr", mux_pllp_pllc_clkm, CLK_SOURCE_OWR, 71, TEGRA_PERIPH_ON_APB, tegra_clk_owr_8),
MUX("nor", mux_pllp_pllc_pllm_clkm, CLK_SOURCE_NOR, 42, 0, tegra_clk_nor),
MUX("mipi", mux_pllp_pllc_pllm_clkm, CLK_SOURCE_MIPI, 50, TEGRA_PERIPH_ON_APB, tegra_clk_mipi),
+ MUX("host1x", mux_pllm_pllc_pllp_plla, CLK_SOURCE_HOST1X, 28, 0, tegra_clk_host1x),
MUX("vi_sensor", mux_pllm_pllc_pllp_plla, CLK_SOURCE_VI_SENSOR, 20, TEGRA_PERIPH_NO_RESET, tegra_clk_vi_sensor),
MUX("vi_sensor", mux_pllc_pllp_plla, CLK_SOURCE_VI_SENSOR, 20, TEGRA_PERIPH_NO_RESET, tegra_clk_vi_sensor_9),
MUX("cilab", mux_pllp_pllc_clkm, CLK_SOURCE_CILAB, 144, 0, tegra_clk_cilab),
--
2.34.1
From: xiongxin <xiongxin(a)kylinos.cn>
Added a check on the return value of preallocate_image_highmem(). If
memory preallocate is insufficient, S4 cannot be done;
I am playing 4K video on a machine with AMD or other graphics card and
only 8GiB memory, and the kernel is not configured with CONFIG_HIGHMEM.
When doing the S4 test, the analysis found that when the pages get from
minimum_image_size() is large enough, The preallocate_image_memory() and
preallocate_image_highmem() calls failed to obtain enough memory. Add
the judgment that memory preallocate is insufficient;
The detailed debugging data is as follows:
image_size: 3225923584, totalram_pages: 1968948 in
hibernate_reserved_size_init();
in hibernate_preallocate_memory():
code pages = minimum_image_size(saveable) = 717992, at this time(line):
count: 2030858
avail_normal: 2053753
highmem: 0
totalreserve_pages: 22895
max_size: 1013336
size: 787579
saveable: 1819905
When the code executes to:
pages = preallocate_image_memory(alloc, avail_normal), at that
time(line):
pages_highmem: 0
avail_normal: 1335761
alloc: 1017522
pages: 1017522
So enter the else branch judged by (pages < alloc), When executed to
size = preallocate_image_memory(alloc, avail_normal):
alloc = max_size - size = 225757;
size = preallocate_image_memory(alloc, avail_normal) = 168671, That is,
preallocate_image_memory() does not apply for all alloc memory pages,
because highmem is not enabled, and size_highmem will return 0 here, so
there is a memory page that has not been preallocated, so I think a
judgment needs to be added here.
But what I can't understand is that although pages are not preallocated
enough, "pages -= free_unnecessary_pages()" in the code below can also
discard some pages that have been preallocated, so I am not sure whether
it is appropriate to add a judgment here.
Cc: stable(a)vger.kernel.org
Signed-off-by: xiongxin <xiongxin(a)kylinos.cn>
Signed-off-by: huanglei <huanglei(a)kylinos.cn>
---
kernel/power/snapshot.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index c20ca5fb9adc..546d544cf7de 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1854,6 +1854,8 @@ int hibernate_preallocate_memory(void)
alloc = (count - pages) - size;
pages += preallocate_image_highmem(alloc);
} else {
+ unsigned long size_highmem = 0;
+
/*
* There are approximately max_size saveable pages at this point
* and we want to reduce this number down to size.
@@ -1863,8 +1865,13 @@ int hibernate_preallocate_memory(void)
pages_highmem += size;
alloc -= size;
size = preallocate_image_memory(alloc, avail_normal);
- pages_highmem += preallocate_image_highmem(alloc - size);
- pages += pages_highmem + size;
+ size_highmem = preallocate_image_highmem(alloc - size);
+ if (size_highmem < (alloc - size)) {
+ pr_err("Image allocation is %lu pages short, exit\n",
+ alloc - size - pages_highmem);
+ goto err_out;
+ }
+ pages += pages_highmem + size_highmem + size;
}
/*
--
2.25.1
This reverts commit 6000b8d900cd5f52fbcd0776d0cc396e88c8c2ea.
The offending commit disabled the USB core PHY management as the dwc3
already manages the PHYs in question.
Unfortunately some platforms have started relying on having USB core
also controlling the PHY and this is specifically currently needed on
some Exynos platforms for PHY calibration or connected device may fail
to enumerate.
The PHY calibration was previously handled in the dwc3 driver, but to
work around some issues related to how the dwc3 driver interacts with
xhci (e.g. using multiple drivers) this was moved to USB core by commits
34c7ed72f4f0 ("usb: core: phy: add support for PHY calibration") and
a0a465569b45 ("usb: dwc3: remove generic PHY calibrate() calls").
The same PHY obviously should not be controlled from two different
places, which for example do no agree on the PHY mode or power state
during suspend, but as the offending patch was backported to stable,
let's revert it for now.
Reported-by: Stefan Agner <stefan(a)agner.ch>
Link: https://lore.kernel.org/lkml/808bdba846bb60456adf10a3016911ee@agner.ch/
Fixes: 6000b8d900cd ("usb: dwc3: disable USB core PHY management")
Cc: stable(a)vger.kernel.org
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
drivers/usb/dwc3/host.c | 10 ----------
1 file changed, 10 deletions(-)
diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c
index a7154fe8206d..f6f13e7f1ba1 100644
--- a/drivers/usb/dwc3/host.c
+++ b/drivers/usb/dwc3/host.c
@@ -11,13 +11,8 @@
#include <linux/of.h>
#include <linux/platform_device.h>
-#include "../host/xhci-plat.h"
#include "core.h"
-static const struct xhci_plat_priv dwc3_xhci_plat_priv = {
- .quirks = XHCI_SKIP_PHY_INIT,
-};
-
static void dwc3_host_fill_xhci_irq_res(struct dwc3 *dwc,
int irq, char *name)
{
@@ -97,11 +92,6 @@ int dwc3_host_init(struct dwc3 *dwc)
goto err;
}
- ret = platform_device_add_data(xhci, &dwc3_xhci_plat_priv,
- sizeof(dwc3_xhci_plat_priv));
- if (ret)
- goto err;
-
memset(props, 0, sizeof(struct property_entry) * ARRAY_SIZE(props));
if (dwc->usb3_lpm_capable)
--
2.37.3
A non-first waiter can potentially spin in the for loop of
rwsem_down_write_slowpath() without sleeping but fail to acquire the
lock even if the rwsem is free if the following sequence happens:
Non-first RT waiter First waiter Lock holder
------------------- ------------ -----------
Acquire wait_lock
rwsem_try_write_lock():
Set handoff bit if RT or
wait too long
Set waiter->handoff_set
Release wait_lock
Acquire wait_lock
Inherit waiter->handoff_set
Release wait_lock
Clear owner
Release lock
if (waiter.handoff_set) {
rwsem_spin_on_owner(();
if (OWNER_NULL)
goto trylock_again;
}
trylock_again:
Acquire wait_lock
rwsem_try_write_lock():
if (first->handoff_set && (waiter != first))
return false;
Release wait_lock
A non-first waiter cannot really acquire the rwsem even if it mistakenly
believes that it can spin on OWNER_NULL value. If that waiter happens
to be an RT task running on the same CPU as the first waiter, it can
block the first waiter from acquiring the rwsem leading to live lock.
Fix this problem by making sure that a non-first waiter cannot spin in
the slowpath loop without sleeping.
Fixes: d257cc8cb8d5 ("locking/rwsem: Make handoff bit handling more consistent")
Reviewed-and-tested-by: Mukesh Ojha <quic_mojha(a)quicinc.com>
Signed-off-by: Waiman Long <longman(a)redhat.com>
Cc: stable(a)vger.kernel.org
---
kernel/locking/rwsem.c | 19 +++++++++----------
1 file changed, 9 insertions(+), 10 deletions(-)
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 44873594de03..be2df9ea7c30 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -624,18 +624,16 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
*/
if (first->handoff_set && (waiter != first))
return false;
-
- /*
- * First waiter can inherit a previously set handoff
- * bit and spin on rwsem if lock acquisition fails.
- */
- if (waiter == first)
- waiter->handoff_set = true;
}
new = count;
if (count & RWSEM_LOCK_MASK) {
+ /*
+ * A waiter (first or not) can set the handoff bit
+ * if it is an RT task or wait in the wait queue
+ * for too long.
+ */
if (has_handoff || (!rt_task(waiter->task) &&
!time_after(jiffies, waiter->timeout)))
return false;
@@ -651,11 +649,12 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
} while (!atomic_long_try_cmpxchg_acquire(&sem->count, &count, new));
/*
- * We have either acquired the lock with handoff bit cleared or
- * set the handoff bit.
+ * We have either acquired the lock with handoff bit cleared or set
+ * the handoff bit. Only the first waiter can have its handoff_set
+ * set here to enable optimistic spinning in slowpath loop.
*/
if (new & RWSEM_FLAG_HANDOFF) {
- waiter->handoff_set = true;
+ first->handoff_set = true;
lockevent_inc(rwsem_wlock_handoff);
return false;
}
--
2.31.1
From: xiongxin <xiongxin(a)kylinos.cn>
Added a check on the return value of preallocate_image_highmem(). If
memory preallocate is insufficient, S4 cannot be done;
I am playing 4K video on a machine with AMD or other graphics card and
only 8GiB memory, and the kernel is not configured with CONFIG_HIGHMEM.
When doing the S4 test, the analysis found that when the pages get from
minimum_image_size() is large enough, The preallocate_image_memory() and
preallocate_image_highmem() calls failed to obtain enough memory. Add
the judgment that memory preallocate is insufficient;
"pages -= free_unnecessary_pages()" below will let pages to drop a lot,
so I wonder if it makes sense to add a judgment here.
Cc: stable(a)vger.kernel.org
Signed-off-by: xiongxin <xiongxin(a)kylinos.cn>
Signed-off-by: huanglei <huanglei(a)kylinos.cn>
---
kernel/power/snapshot.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index c20ca5fb9adc..670abf89cf31 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1738,6 +1738,7 @@ int hibernate_preallocate_memory(void)
struct zone *zone;
unsigned long saveable, size, max_size, count, highmem, pages = 0;
unsigned long alloc, save_highmem, pages_highmem, avail_normal;
+ unsigned long size_highmem;
ktime_t start, stop;
int error;
@@ -1863,7 +1864,13 @@ int hibernate_preallocate_memory(void)
pages_highmem += size;
alloc -= size;
size = preallocate_image_memory(alloc, avail_normal);
- pages_highmem += preallocate_image_highmem(alloc - size);
+ size_highmem += preallocate_image_highmem(alloc - size);
+ if (size_highmem < (alloc - size)) {
+ pr_err("Image allocation is %lu pages short, exit\n",
+ alloc - size - pages_highmem);
+ goto err_out;
+ }
+ pages_highmem += size_highmem;
pages += pages_highmem + size;
}
--
2.25.1
This bug is marked as fixed by commit:
ext4: block range must be validated before use in ext4_mb_clear_bb()
But I can't find it in any tested tree for more than 90 days.
Is it a correct commit? Please update it by replying:
#syz fix: exact-commit-title
Until then the bug is still considered open and
new crashes with the same signature are ignored.
Here are backports of the three patches that failed to apply to 5.15 due
to trivial context conflicts.
Hopefully they apply to the older stable trees as well as-is.
Note that the last patch depends on features that were not added until
5.9 as mentioned in the commit message. Note that the author of that
patch did not add a stable tag for this one, but backporting shouldn't
hurt.
Johan
Johan Hovold (3):
usb: dwc3: fix PHY disable sequence
usb: dwc3: qcom: fix use-after-free on runtime-PM wakeup
usb: dwc3: disable USB core PHY management
drivers/usb/dwc3/core.c | 19 ++++++++++---------
drivers/usb/dwc3/dwc3-qcom.c | 14 +++++++++++++-
drivers/usb/dwc3/host.c | 11 +++++++++++
3 files changed, 34 insertions(+), 10 deletions(-)
--
2.35.1
It is valid to receive external interrupt and have broken IDT entry,
which will lead to #GP with exit_int_into that will contain the index of
the IDT entry (e.g any value).
Other exceptions can happen as well, like #NP or #SS
(if stack switch fails).
Thus this warning can be user triggred and has very little value.
Cc: stable(a)vger.kernel.org
Signed-off-by: Maxim Levitsky <mlevitsk(a)redhat.com>
---
arch/x86/kvm/svm/svm.c | 9 ---------
1 file changed, 9 deletions(-)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e9cec1b692051c..36f651ce842174 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3428,15 +3428,6 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
return 0;
}
- if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
- exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
- exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
- exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
- printk(KERN_ERR "%s: unexpected exit_int_info 0x%x "
- "exit_code 0x%x\n",
- __func__, svm->vmcb->control.exit_int_info,
- exit_code);
-
if (exit_fastpath != EXIT_FASTPATH_NONE)
return 1;
--
2.34.3
While not obivous, kvm_vcpu_reset() leaves the nested mode by clearing
'vcpu->arch.hflags' but it does so without all the required housekeeping.
On SVM, it is possible to have a vCPU reset while in guest mode because
unlike VMX, on SVM, INIT's are not latched in SVM non root mode and in
addition to that L1 doesn't have to intercept triple fault, which should
also trigger L1's reset if happens in L2 while L1 didn't intercept it.
If one of the above conditions happen, KVM will continue to use vmcb02 while
not having in the guest mode.
Later the IA32_EFER will be cleared which will lead to freeing of the nested
guest state which will (correctly) free the vmcb02, but since KVM still
uses it (incorrectly) this will lead to a use after free and kernel crash.
This issue is assigned CVE-2022-3344
Cc: stable(a)vger.kernel.org
Signed-off-by: Maxim Levitsky <mlevitsk(a)redhat.com>
---
arch/x86/kvm/x86.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 316ab1d5317f92..3fd900504e683b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11694,8 +11694,18 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
WARN_ON_ONCE(!init_event &&
(old_cr0 || kvm_read_cr3(vcpu) || kvm_read_cr4(vcpu)));
+ /*
+ * SVM doesn't unconditionally VM-Exit on INIT and SHUTDOWN, thus it's
+ * possible to INIT the vCPU while L2 is active. Force the vCPU back
+ * into L1 as EFER.SVME is cleared on INIT (along with all other EFER
+ * bits), i.e. virtualization is disabled.
+ */
+ if (is_guest_mode(vcpu))
+ kvm_leave_nested(vcpu);
+
kvm_lapic_reset(vcpu, init_event);
+ WARN_ON_ONCE(is_guest_mode(vcpu) || is_smm(vcpu));
vcpu->arch.hflags = 0;
vcpu->arch.smi_pending = 0;
--
2.34.3
Make sure that KVM uses vmcb01 before freeing nested state, and warn if
that is not the case.
This is a minimal fix for CVE-2022-3344 making the kernel print a warning
instead of a kernel panic.
Cc: stable(a)vger.kernel.org
Signed-off-by: Maxim Levitsky <mlevitsk(a)redhat.com>
---
arch/x86/kvm/svm/nested.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index b258d6988f5dde..b74da40c1fc40c 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1126,6 +1126,9 @@ void svm_free_nested(struct vcpu_svm *svm)
if (!svm->nested.initialized)
return;
+ if (WARN_ON_ONCE(svm->vmcb != svm->vmcb01.ptr))
+ svm_switch_vmcb(svm, &svm->vmcb01);
+
svm_vcpu_free_msrpm(svm->nested.msrpm);
svm->nested.msrpm = NULL;
--
2.34.3
If the VM was terminated while nested, we free the nested state
while the vCPU still is in nested mode.
Soon a warning will be added for this condition.
Cc: stable(a)vger.kernel.org
Signed-off-by: Maxim Levitsky <mlevitsk(a)redhat.com>
---
arch/x86/kvm/svm/svm.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index d22a809d923339..e9cec1b692051c 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1440,6 +1440,7 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu)
*/
svm_clear_current_vmcb(svm->vmcb);
+ svm_leave_nested(vcpu);
svm_free_nested(svm);
sev_free_vcpu(vcpu);
--
2.34.3
This is the start of the stable review cycle for the 5.4.223 release.
There are 64 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Fri, 04 Nov 2022 02:20:38 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.4.223-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.4.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.4.223-rc1
Biju Das <biju.das.jz(a)bp.renesas.com>
can: rcar_canfd: rcar_canfd_handle_global_receive(): fix IRQ storm on global FIFO receive
Vladimir Oltean <vladimir.oltean(a)nxp.com>
net: enetc: survive memory pressure without crashing
Tariq Toukan <tariqt(a)nvidia.com>
net/mlx5: Fix possible use-after-free in async command interface
Hyong Youb Kim <hyonkim(a)cisco.com>
net/mlx5e: Do not increment ESN when updating IPsec ESN state
Nicolas Dichtel <nicolas.dichtel(a)6wind.com>
nh: fix scope used to find saddr when adding non gw nh
Yang Yingliang <yangyingliang(a)huawei.com>
net: ehea: fix possible memory leak in ehea_register_port()
Aaron Conole <aconole(a)redhat.com>
openvswitch: switch from WARN to pr_warn
Takashi Iwai <tiwai(a)suse.de>
ALSA: aoa: Fix I2S device accounting
Yang Yingliang <yangyingliang(a)huawei.com>
ALSA: aoa: i2sbus: fix possible memory leak in i2sbus_add_dev()
Sudeep Holla <sudeep.holla(a)arm.com>
PM: domains: Fix handling of unavailable/disabled idle states
Yang Yingliang <yangyingliang(a)huawei.com>
net: ksz884x: fix missing pci_disable_device() on error in pcidev_init()
Slawomir Laba <slawomirx.laba(a)intel.com>
i40e: Fix flow-type by setting GL_HASH_INSET registers
Sylwester Dziedziuch <sylwesterx.dziedziuch(a)intel.com>
i40e: Fix VF hang when reset is triggered on another VF
Slawomir Laba <slawomirx.laba(a)intel.com>
i40e: Fix ethtool rx-flow-hash setting for X722
Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
media: videodev2.h: V4L2_DV_BT_BLANKING_HEIGHT should check 'interlaced'
Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
media: v4l2-dv-timings: add sanity checks for blanking values
Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
media: vivid: dev->bitmap_cap wasn't freed in all cases
Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
media: vivid: s_fbuf: add more sanity checks
Mario Limonciello <mario.limonciello(a)amd.com>
PM: hibernate: Allow hybrid sleep to work with s2idle
Dongliang Mu <dzm91(a)hust.edu.cn>
can: mscan: mpc5xxx: mpc5xxx_can_probe(): add missing put_clock() in error path
Neal Cardwell <ncardwell(a)google.com>
tcp: fix indefinite deferral of RTO with SACK reneging
Zhang Changzhong <zhangchangzhong(a)huawei.com>
net: lantiq_etop: don't free skb when returning NETDEV_TX_BUSY
Zhengchao Shao <shaozhengchao(a)huawei.com>
net: fix UAF issue in nfqnl_nf_hook_drop() when ops_init() failed
Eric Dumazet <edumazet(a)google.com>
kcm: annotate data-races around kcm->rx_wait
Eric Dumazet <edumazet(a)google.com>
kcm: annotate data-races around kcm->rx_psock
Raju Rangoju <Raju.Rangoju(a)amd.com>
amd-xgbe: add the bit rate quirk for Molex cables
Raju Rangoju <Raju.Rangoju(a)amd.com>
amd-xgbe: fix the SFP compliance codes check for DAC cables
Chen Zhongjin <chenzhongjin(a)huawei.com>
x86/unwind/orc: Fix unreliable stack dump with gcov
Yang Yingliang <yangyingliang(a)huawei.com>
net: netsec: fix error handling in netsec_register_mdio()
Xin Long <lucien.xin(a)gmail.com>
tipc: fix a null-ptr-deref in tipc_topsrv_accept
Yang Yingliang <yangyingliang(a)huawei.com>
ALSA: ac97: fix possible memory leak in snd_ac97_dev_register()
Randy Dunlap <rdunlap(a)infradead.org>
arc: iounmap() arg is volatile
Nathan Huckleberry <nhuck(a)google.com>
drm/msm: Fix return type of mdp4_lvds_connector_mode_valid
Alexander Stein <alexander.stein(a)ew.tq-group.com>
media: v4l2: Fix v4l2_i2c_subdev_set_name function documentation
Wei Yongjun <weiyongjun1(a)huawei.com>
net: ieee802154: fix error return code in dgram_bind()
Rik van Riel <riel(a)surriel.com>
mm,hugetlb: take hugetlb_lock before decrementing h->resv_huge_pages
Chen Zhou <chenzhou10(a)huawei.com>
cgroup-v1: add disabled controller check in cgroup1_parse_param()
M. Vefa Bicakci <m.v.b(a)runbox.com>
xen/gntdev: Prevent leaking grants
Jan Beulich <jbeulich(a)suse.com>
Xen/gntdev: don't ignore kernel unmapping error
Chandan Babu R <chandan.babu(a)oracle.com>
xfs: force the log after remapping a synchronous-writes file
Chandan Babu R <chandan.babu(a)oracle.com>
xfs: clear XFS_DQ_FREEING if we can't lock the dquot buffer to flush
Chandan Babu R <chandan.babu(a)oracle.com>
xfs: finish dfops on every insert range shift iteration
Heiko Carstens <hca(a)linux.ibm.com>
s390/pci: add missing EX_TABLE entries to __pcistg_mio_inuser()/__pcilg_mio_inuser()
Heiko Carstens <hca(a)linux.ibm.com>
s390/futex: add missing EX_TABLE entry to __futex_atomic_op()
Adrian Hunter <adrian.hunter(a)intel.com>
perf auxtrace: Fix address filter symbol name match for modules
Christian A. Ehrhardt <lk(a)c--e.de>
kernfs: fix use-after-free in __kernfs_remove
Matthew Ma <mahongwei(a)zeku.com>
mmc: core: Fix kernel panic when remove non-standard SDIO card
Johan Hovold <johan+linaro(a)kernel.org>
drm/msm/hdmi: fix memory corruption with too many bridges
Johan Hovold <johan+linaro(a)kernel.org>
drm/msm/dsi: fix memory corruption with too many bridges
Miquel Raynal <miquel.raynal(a)bootlin.com>
mac802154: Fix LQI recording
Hyunwoo Kim <imv4bel(a)gmail.com>
fbdev: smscufx: Fix several use-after-free bugs
Shreeya Patel <shreeya.patel(a)collabora.com>
iio: light: tsl2583: Fix module unloading
Matti Vaittinen <mazziesaccount(a)gmail.com>
tools: iio: iio_utils: fix digit calculation
Mathias Nyman <mathias.nyman(a)linux.intel.com>
xhci: Remove device endpoints from bandwidth list when freeing the device
Tony O'Brien <tony.obrien(a)alliedtelesis.co.nz>
mtd: rawnand: marvell: Use correct logic for nand-keep-config
Jens Glathe <jens.glathe(a)oldschoolsolutions.biz>
usb: xhci: add XHCI_SPURIOUS_SUCCESS to ASM1042 despite being a V0.96 controller
Justin Chen <justinpopo6(a)gmail.com>
usb: bdc: change state when port disconnected
Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
usb: dwc3: gadget: Don't set IMI for no_interrupt
Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
usb: dwc3: gadget: Stop processing more requests on IMI
Hannu Hartikainen <hannu(a)hrtk.in>
USB: add RESET_RESUME quirk for NVIDIA Jetson devices in RCM
Jason A. Donenfeld <Jason(a)zx2c4.com>
ALSA: au88x0: use explicitly signed char
Steven Rostedt (Google) <rostedt(a)goodmis.org>
ALSA: Use del_timer_sync() before freeing timer
Anssi Hannula <anssi.hannula(a)bitwise.fi>
can: kvaser_usb: Fix possible completions during init_completion
Yang Yingliang <yangyingliang(a)huawei.com>
can: j1939: transport: j1939_session_skb_drop_old(): spin_unlock_irqrestore() before kfree_skb()
-------------
Diffstat:
Makefile | 4 +-
arch/arc/include/asm/io.h | 2 +-
arch/arc/mm/ioremap.c | 2 +-
arch/s390/include/asm/futex.h | 3 +-
arch/s390/pci/pci_mmio.c | 8 +-
arch/x86/kernel/unwind_orc.c | 2 +-
drivers/base/power/domain.c | 4 +
.../gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c | 5 +-
drivers/gpu/drm/msm/dsi/dsi.c | 6 ++
drivers/gpu/drm/msm/hdmi/hdmi.c | 5 ++
drivers/iio/light/tsl2583.c | 2 +-
drivers/media/platform/vivid/vivid-core.c | 22 +++++
drivers/media/platform/vivid/vivid-core.h | 2 +
drivers/media/platform/vivid/vivid-vid-cap.c | 27 ++++--
drivers/media/v4l2-core/v4l2-dv-timings.c | 14 +++
drivers/mmc/core/sdio_bus.c | 3 +-
drivers/mtd/nand/raw/marvell_nand.c | 2 +-
drivers/net/can/mscan/mpc5xxx_can.c | 8 +-
drivers/net/can/rcar/rcar_canfd.c | 6 +-
drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c | 4 +-
drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c | 4 +-
drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 17 ++--
drivers/net/ethernet/freescale/enetc/enetc.c | 5 ++
drivers/net/ethernet/ibm/ehea/ehea_main.c | 1 +
drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 100 ++++++++++++---------
drivers/net/ethernet/intel/i40e/i40e_type.h | 4 +
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 43 ++++++---
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h | 1 +
drivers/net/ethernet/lantiq_etop.c | 1 -
drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 10 +--
.../ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 3 -
drivers/net/ethernet/micrel/ksz884x.c | 2 +-
drivers/net/ethernet/socionext/netsec.c | 2 +
drivers/usb/core/quirks.c | 9 ++
drivers/usb/dwc3/gadget.c | 8 +-
drivers/usb/gadget/udc/bdc/bdc_udc.c | 1 +
drivers/usb/host/xhci-mem.c | 20 +++--
drivers/usb/host/xhci-pci.c | 8 +-
drivers/video/fbdev/smscufx.c | 55 ++++++------
drivers/xen/gntdev.c | 30 +++++--
fs/kernfs/dir.c | 5 +-
fs/xfs/xfs_bmap_util.c | 2 +-
fs/xfs/xfs_file.c | 17 +++-
fs/xfs/xfs_qm.c | 1 +
include/linux/mlx5/driver.h | 2 +-
include/media/v4l2-common.h | 3 +-
include/uapi/linux/videodev2.h | 3 +-
kernel/cgroup/cgroup-v1.c | 3 +
kernel/power/hibernate.c | 2 +-
mm/hugetlb.c | 2 +-
net/can/j1939/transport.c | 4 +-
net/core/net_namespace.c | 7 ++
net/ieee802154/socket.c | 4 +-
net/ipv4/nexthop.c | 2 +-
net/ipv4/tcp_input.c | 3 +-
net/kcm/kcmsock.c | 23 +++--
net/mac802154/rx.c | 5 +-
net/openvswitch/datapath.c | 3 +-
net/tipc/topsrv.c | 16 +++-
sound/aoa/soundbus/i2sbus/core.c | 7 +-
sound/pci/ac97/ac97_codec.c | 1 +
sound/pci/au88x0/au88x0.h | 6 +-
sound/pci/au88x0/au88x0_core.c | 2 +-
sound/synth/emux/emux.c | 7 +-
tools/iio/iio_utils.c | 4 +
tools/perf/util/auxtrace.c | 10 ++-
66 files changed, 423 insertions(+), 176 deletions(-)
This is the start of the stable review cycle for the 4.9.332 release.
There are 44 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Fri, 04 Nov 2022 02:20:38 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.9.332-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.9.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.9.332-rc1
Biju Das <biju.das.jz(a)bp.renesas.com>
can: rcar_canfd: rcar_canfd_handle_global_receive(): fix IRQ storm on global FIFO receive
Yang Yingliang <yangyingliang(a)huawei.com>
net: ehea: fix possible memory leak in ehea_register_port()
Aaron Conole <aconole(a)redhat.com>
openvswitch: switch from WARN to pr_warn
Takashi Iwai <tiwai(a)suse.de>
ALSA: aoa: Fix I2S device accounting
Yang Yingliang <yangyingliang(a)huawei.com>
ALSA: aoa: i2sbus: fix possible memory leak in i2sbus_add_dev()
Yang Yingliang <yangyingliang(a)huawei.com>
net: ksz884x: fix missing pci_disable_device() on error in pcidev_init()
Slawomir Laba <slawomirx.laba(a)intel.com>
i40e: Fix ethtool rx-flow-hash setting for X722
Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
media: videodev2.h: V4L2_DV_BT_BLANKING_HEIGHT should check 'interlaced'
Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
media: v4l2-dv-timings: add sanity checks for blanking values
Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
media: vivid: dev->bitmap_cap wasn't freed in all cases
Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
media: vivid: s_fbuf: add more sanity checks
Dongliang Mu <dzm91(a)hust.edu.cn>
can: mscan: mpc5xxx: mpc5xxx_can_probe(): add missing put_clock() in error path
Neal Cardwell <ncardwell(a)google.com>
tcp: fix indefinite deferral of RTO with SACK reneging
Zhang Changzhong <zhangchangzhong(a)huawei.com>
net: lantiq_etop: don't free skb when returning NETDEV_TX_BUSY
Eric Dumazet <edumazet(a)google.com>
kcm: annotate data-races around kcm->rx_wait
Eric Dumazet <edumazet(a)google.com>
kcm: annotate data-races around kcm->rx_psock
Yang Yingliang <yangyingliang(a)huawei.com>
ALSA: ac97: fix possible memory leak in snd_ac97_dev_register()
Randy Dunlap <rdunlap(a)infradead.org>
arc: iounmap() arg is volatile
Nathan Huckleberry <nhuck(a)google.com>
drm/msm: Fix return type of mdp4_lvds_connector_mode_valid
Wei Yongjun <weiyongjun1(a)huawei.com>
net: ieee802154: fix error return code in dgram_bind()
Rik van Riel <riel(a)surriel.com>
mm,hugetlb: take hugetlb_lock before decrementing h->resv_huge_pages
M. Vefa Bicakci <m.v.b(a)runbox.com>
xen/gntdev: Prevent leaking grants
Jan Beulich <jbeulich(a)suse.com>
Xen/gntdev: don't ignore kernel unmapping error
Heiko Carstens <hca(a)linux.ibm.com>
s390/futex: add missing EX_TABLE entry to __futex_atomic_op()
Christian A. Ehrhardt <lk(a)c--e.de>
kernfs: fix use-after-free in __kernfs_remove
Matthew Ma <mahongwei(a)zeku.com>
mmc: core: Fix kernel panic when remove non-standard SDIO card
Johan Hovold <johan+linaro(a)kernel.org>
drm/msm/hdmi: fix memory corruption with too many bridges
Miquel Raynal <miquel.raynal(a)bootlin.com>
mac802154: Fix LQI recording
Hyunwoo Kim <imv4bel(a)gmail.com>
fbdev: smscufx: Fix several use-after-free bugs
Matti Vaittinen <mazziesaccount(a)gmail.com>
tools: iio: iio_utils: fix digit calculation
Mathias Nyman <mathias.nyman(a)linux.intel.com>
xhci: Remove device endpoints from bandwidth list when freeing the device
Justin Chen <justinpopo6(a)gmail.com>
usb: bdc: change state when port disconnected
Hannu Hartikainen <hannu(a)hrtk.in>
USB: add RESET_RESUME quirk for NVIDIA Jetson devices in RCM
Jason A. Donenfeld <Jason(a)zx2c4.com>
ALSA: au88x0: use explicitly signed char
Steven Rostedt (Google) <rostedt(a)goodmis.org>
ALSA: Use del_timer_sync() before freeing timer
Werner Sembach <wse(a)tuxedocomputers.com>
ACPI: video: Force backlight native for more TongFang devices
Yang Yingliang <yangyingliang(a)huawei.com>
net: hns: fix possible memory leak in hnae_ae_register()
Xiaobo Liu <cppcoffee(a)gmail.com>
net/atm: fix proc_mpc_write incorrect return value
José Expósito <jose.exposito89(a)gmail.com>
HID: magicmouse: Do not set BTN_MOUSE on double report
James Morse <james.morse(a)arm.com>
arm64: errata: Remove AES hwcap for COMPAT tasks
Kai-Heng Feng <kai.heng.feng(a)canonical.com>
ata: ahci: Match EM_MAX_SLOTS with SATA_PMP_MAX_PORTS
Alexander Stein <alexander.stein(a)ew.tq-group.com>
ata: ahci-imx: Fix MODULE_ALIAS
Joseph Qi <joseph.qi(a)linux.alibaba.com>
ocfs2: fix BUG when iput after ocfs2_mknod fails
Joseph Qi <joseph.qi(a)linux.alibaba.com>
ocfs2: clear dinode links count in case of error
-------------
Diffstat:
Documentation/arm64/silicon-errata.txt | 2 +
Makefile | 4 +-
arch/arc/include/asm/io.h | 2 +-
arch/arc/mm/ioremap.c | 2 +-
arch/arm64/Kconfig | 16 ++++++
arch/arm64/include/asm/cpucaps.h | 3 +-
arch/arm64/kernel/cpu_errata.c | 16 ++++++
arch/arm64/kernel/cpufeature.c | 13 ++++-
arch/s390/include/asm/futex.h | 3 +-
drivers/acpi/video_detect.c | 64 ++++++++++++++++++++++
drivers/ata/ahci.h | 2 +-
drivers/ata/ahci_imx.c | 2 +-
drivers/gpu/drm/msm/hdmi/hdmi.c | 5 ++
drivers/gpu/drm/msm/mdp/mdp4/mdp4_lvds_connector.c | 5 +-
drivers/hid/hid-magicmouse.c | 2 +-
drivers/media/platform/vivid/vivid-core.c | 22 ++++++++
drivers/media/platform/vivid/vivid-core.h | 2 +
drivers/media/platform/vivid/vivid-vid-cap.c | 27 +++++++--
drivers/media/v4l2-core/v4l2-dv-timings.c | 14 +++++
drivers/mmc/core/sdio_bus.c | 3 +-
drivers/net/can/mscan/mpc5xxx_can.c | 8 ++-
drivers/net/can/rcar/rcar_canfd.c | 6 +-
drivers/net/ethernet/hisilicon/hns/hnae.c | 4 +-
drivers/net/ethernet/ibm/ehea/ehea_main.c | 1 +
drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 31 ++++++++---
drivers/net/ethernet/intel/i40e/i40e_type.h | 4 ++
drivers/net/ethernet/lantiq_etop.c | 1 -
drivers/net/ethernet/micrel/ksz884x.c | 2 +-
drivers/usb/core/quirks.c | 9 +++
drivers/usb/gadget/udc/bdc/bdc_udc.c | 1 +
drivers/usb/host/xhci-mem.c | 20 ++++---
drivers/video/fbdev/smscufx.c | 55 ++++++++++---------
drivers/xen/gntdev.c | 30 ++++++++--
fs/kernfs/dir.c | 5 +-
fs/ocfs2/namei.c | 23 ++++----
include/uapi/linux/videodev2.h | 3 +-
mm/hugetlb.c | 2 +-
net/atm/mpoa_proc.c | 3 +-
net/ieee802154/socket.c | 4 +-
net/ipv4/tcp_input.c | 3 +-
net/kcm/kcmsock.c | 23 +++++---
net/mac802154/rx.c | 5 +-
net/openvswitch/datapath.c | 3 +-
sound/aoa/soundbus/i2sbus/core.c | 7 ++-
sound/pci/ac97/ac97_codec.c | 1 +
sound/pci/au88x0/au88x0.h | 6 +-
sound/pci/au88x0/au88x0_core.c | 2 +-
sound/synth/emux/emux.c | 7 +--
tools/iio/iio_utils.c | 4 ++
49 files changed, 369 insertions(+), 113 deletions(-)
This is the start of the stable review cycle for the 4.14.298 release.
There are 60 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Fri, 04 Nov 2022 02:20:38 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.14.298-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.14.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.14.298-rc1
Biju Das <biju.das.jz(a)bp.renesas.com>
can: rcar_canfd: rcar_canfd_handle_global_receive(): fix IRQ storm on global FIFO receive
Yang Yingliang <yangyingliang(a)huawei.com>
net: ehea: fix possible memory leak in ehea_register_port()
Aaron Conole <aconole(a)redhat.com>
openvswitch: switch from WARN to pr_warn
Takashi Iwai <tiwai(a)suse.de>
ALSA: aoa: Fix I2S device accounting
Yang Yingliang <yangyingliang(a)huawei.com>
ALSA: aoa: i2sbus: fix possible memory leak in i2sbus_add_dev()
Sudeep Holla <sudeep.holla(a)arm.com>
PM: domains: Fix handling of unavailable/disabled idle states
Yang Yingliang <yangyingliang(a)huawei.com>
net: ksz884x: fix missing pci_disable_device() on error in pcidev_init()
Slawomir Laba <slawomirx.laba(a)intel.com>
i40e: Fix flow-type by setting GL_HASH_INSET registers
Slawomir Laba <slawomirx.laba(a)intel.com>
i40e: Fix ethtool rx-flow-hash setting for X722
Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
media: videodev2.h: V4L2_DV_BT_BLANKING_HEIGHT should check 'interlaced'
Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
media: v4l2-dv-timings: add sanity checks for blanking values
Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
media: vivid: dev->bitmap_cap wasn't freed in all cases
Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
media: vivid: s_fbuf: add more sanity checks
Mario Limonciello <mario.limonciello(a)amd.com>
PM: hibernate: Allow hybrid sleep to work with s2idle
Dongliang Mu <dzm91(a)hust.edu.cn>
can: mscan: mpc5xxx: mpc5xxx_can_probe(): add missing put_clock() in error path
Neal Cardwell <ncardwell(a)google.com>
tcp: fix indefinite deferral of RTO with SACK reneging
Zhang Changzhong <zhangchangzhong(a)huawei.com>
net: lantiq_etop: don't free skb when returning NETDEV_TX_BUSY
Eric Dumazet <edumazet(a)google.com>
kcm: annotate data-races around kcm->rx_wait
Eric Dumazet <edumazet(a)google.com>
kcm: annotate data-races around kcm->rx_psock
Raju Rangoju <Raju.Rangoju(a)amd.com>
amd-xgbe: add the bit rate quirk for Molex cables
Raju Rangoju <Raju.Rangoju(a)amd.com>
amd-xgbe: fix the SFP compliance codes check for DAC cables
Chen Zhongjin <chenzhongjin(a)huawei.com>
x86/unwind/orc: Fix unreliable stack dump with gcov
Yang Yingliang <yangyingliang(a)huawei.com>
ALSA: ac97: fix possible memory leak in snd_ac97_dev_register()
Randy Dunlap <rdunlap(a)infradead.org>
arc: iounmap() arg is volatile
Nathan Huckleberry <nhuck(a)google.com>
drm/msm: Fix return type of mdp4_lvds_connector_mode_valid
Wei Yongjun <weiyongjun1(a)huawei.com>
net: ieee802154: fix error return code in dgram_bind()
Rik van Riel <riel(a)surriel.com>
mm,hugetlb: take hugetlb_lock before decrementing h->resv_huge_pages
M. Vefa Bicakci <m.v.b(a)runbox.com>
xen/gntdev: Prevent leaking grants
Jan Beulich <jbeulich(a)suse.com>
Xen/gntdev: don't ignore kernel unmapping error
Heiko Carstens <hca(a)linux.ibm.com>
s390/futex: add missing EX_TABLE entry to __futex_atomic_op()
Christian A. Ehrhardt <lk(a)c--e.de>
kernfs: fix use-after-free in __kernfs_remove
Matthew Ma <mahongwei(a)zeku.com>
mmc: core: Fix kernel panic when remove non-standard SDIO card
Johan Hovold <johan+linaro(a)kernel.org>
drm/msm/hdmi: fix memory corruption with too many bridges
Miquel Raynal <miquel.raynal(a)bootlin.com>
mac802154: Fix LQI recording
Hyunwoo Kim <imv4bel(a)gmail.com>
fbdev: smscufx: Fix several use-after-free bugs
Shreeya Patel <shreeya.patel(a)collabora.com>
iio: light: tsl2583: Fix module unloading
Matti Vaittinen <mazziesaccount(a)gmail.com>
tools: iio: iio_utils: fix digit calculation
Mathias Nyman <mathias.nyman(a)linux.intel.com>
xhci: Remove device endpoints from bandwidth list when freeing the device
Jens Glathe <jens.glathe(a)oldschoolsolutions.biz>
usb: xhci: add XHCI_SPURIOUS_SUCCESS to ASM1042 despite being a V0.96 controller
Justin Chen <justinpopo6(a)gmail.com>
usb: bdc: change state when port disconnected
Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
usb: dwc3: gadget: Don't set IMI for no_interrupt
Hannu Hartikainen <hannu(a)hrtk.in>
USB: add RESET_RESUME quirk for NVIDIA Jetson devices in RCM
Jason A. Donenfeld <Jason(a)zx2c4.com>
ALSA: au88x0: use explicitly signed char
Steven Rostedt (Google) <rostedt(a)goodmis.org>
ALSA: Use del_timer_sync() before freeing timer
Werner Sembach <wse(a)tuxedocomputers.com>
ACPI: video: Force backlight native for more TongFang devices
Chen-Yu Tsai <wenst(a)chromium.org>
media: v4l2-mem2mem: Apply DST_QUEUE_OFF_BASE on MMAP buffers across ioctls
Jerry Snitselaar <jsnitsel(a)redhat.com>
iommu/vt-d: Clean up si_domain in the init_dmars() error path
Yang Yingliang <yangyingliang(a)huawei.com>
net: hns: fix possible memory leak in hnae_ae_register()
Xiaobo Liu <cppcoffee(a)gmail.com>
net/atm: fix proc_mpc_write incorrect return value
José Expósito <jose.exposito89(a)gmail.com>
HID: magicmouse: Do not set BTN_MOUSE on double report
Tony Luck <tony.luck(a)intel.com>
ACPI: extlog: Handle multiple records
Filipe Manana <fdmanana(a)suse.com>
btrfs: fix processing of delayed data refs during backref walking
Jean-Francois Le Fillatre <jflf_kernel(a)gmx.com>
r8152: add PID for the Lenovo OneLink+ Dock
James Morse <james.morse(a)arm.com>
arm64: errata: Remove AES hwcap for COMPAT tasks
Eric Ren <renzhengeek(a)gmail.com>
KVM: arm64: vgic: Fix exit condition in scan_its_table()
Kai-Heng Feng <kai.heng.feng(a)canonical.com>
ata: ahci: Match EM_MAX_SLOTS with SATA_PMP_MAX_PORTS
Alexander Stein <alexander.stein(a)ew.tq-group.com>
ata: ahci-imx: Fix MODULE_ALIAS
Borislav Petkov <bp(a)suse.de>
x86/microcode/AMD: Apply the patch early on every logical thread
Joseph Qi <joseph.qi(a)linux.alibaba.com>
ocfs2: fix BUG when iput after ocfs2_mknod fails
Joseph Qi <joseph.qi(a)linux.alibaba.com>
ocfs2: clear dinode links count in case of error
-------------
Diffstat:
Documentation/arm64/silicon-errata.txt | 2 +
Makefile | 4 +-
arch/arc/include/asm/io.h | 2 +-
arch/arc/mm/ioremap.c | 2 +-
arch/arm64/Kconfig | 16 ++++
arch/arm64/include/asm/cpucaps.h | 3 +-
arch/arm64/kernel/cpu_errata.c | 16 ++++
arch/arm64/kernel/cpufeature.c | 13 ++-
arch/s390/include/asm/futex.h | 3 +-
arch/x86/kernel/cpu/microcode/amd.c | 16 +++-
arch/x86/kernel/unwind_orc.c | 2 +-
drivers/acpi/acpi_extlog.c | 33 ++++---
drivers/acpi/video_detect.c | 64 +++++++++++++
drivers/ata/ahci.h | 2 +-
drivers/ata/ahci_imx.c | 2 +-
drivers/base/power/domain.c | 4 +
drivers/gpu/drm/msm/hdmi/hdmi.c | 5 ++
drivers/gpu/drm/msm/mdp/mdp4/mdp4_lvds_connector.c | 5 +-
drivers/hid/hid-magicmouse.c | 2 +-
drivers/iio/light/tsl2583.c | 2 +-
drivers/iommu/intel-iommu.c | 5 ++
drivers/media/platform/vivid/vivid-core.c | 22 +++++
drivers/media/platform/vivid/vivid-core.h | 2 +
drivers/media/platform/vivid/vivid-vid-cap.c | 27 ++++--
drivers/media/v4l2-core/v4l2-dv-timings.c | 14 +++
drivers/media/v4l2-core/v4l2-mem2mem.c | 62 +++++++++----
drivers/mmc/core/sdio_bus.c | 3 +-
drivers/net/can/mscan/mpc5xxx_can.c | 8 +-
drivers/net/can/rcar/rcar_canfd.c | 6 +-
drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 17 ++--
drivers/net/ethernet/hisilicon/hns/hnae.c | 4 +-
drivers/net/ethernet/ibm/ehea/ehea_main.c | 1 +
drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 100 ++++++++++++---------
drivers/net/ethernet/intel/i40e/i40e_type.h | 4 +
drivers/net/ethernet/lantiq_etop.c | 1 -
drivers/net/ethernet/micrel/ksz884x.c | 2 +-
drivers/net/usb/cdc_ether.c | 7 ++
drivers/net/usb/r8152.c | 1 +
drivers/usb/core/quirks.c | 9 ++
drivers/usb/dwc3/gadget.c | 4 +-
drivers/usb/gadget/udc/bdc/bdc_udc.c | 1 +
drivers/usb/host/xhci-mem.c | 20 +++--
drivers/usb/host/xhci-pci.c | 8 +-
drivers/video/fbdev/smscufx.c | 55 ++++++------
drivers/xen/gntdev.c | 30 +++++--
fs/btrfs/backref.c | 33 +++++--
fs/kernfs/dir.c | 5 +-
fs/ocfs2/namei.c | 23 +++--
include/uapi/linux/videodev2.h | 3 +-
kernel/power/hibernate.c | 2 +-
mm/hugetlb.c | 2 +-
net/atm/mpoa_proc.c | 3 +-
net/ieee802154/socket.c | 4 +-
net/ipv4/tcp_input.c | 3 +-
net/kcm/kcmsock.c | 23 +++--
net/mac802154/rx.c | 5 +-
net/openvswitch/datapath.c | 3 +-
sound/aoa/soundbus/i2sbus/core.c | 7 +-
sound/pci/ac97/ac97_codec.c | 1 +
sound/pci/au88x0/au88x0.h | 6 +-
sound/pci/au88x0/au88x0_core.c | 2 +-
sound/synth/emux/emux.c | 7 +-
tools/iio/iio_utils.c | 4 +
virt/kvm/arm/vgic/vgic-its.c | 5 +-
64 files changed, 553 insertions(+), 199 deletions(-)
From: Thomas Gleixner <tglx(a)linutronix.de>
v4.19.255-rt114-rc2 stable review patch.
If anyone has any objections, please let me know.
-----------
Upstream commit bb7262b295472eb6858b5c49893954794027cd84
syzbot reported KCSAN data races vs. timer_base::timer_running being set to
NULL without holding base::lock in expire_timers().
This looks innocent and most reads are clearly not problematic, but
Frederic identified an issue which is:
int data = 0;
void timer_func(struct timer_list *t)
{
data = 1;
}
CPU 0 CPU 1
------------------------------ --------------------------
base = lock_timer_base(timer, &flags); raw_spin_unlock(&base->lock);
if (base->running_timer != timer) call_timer_fn(timer, fn, baseclk);
ret = detach_if_pending(timer, base, true); base->running_timer = NULL;
raw_spin_unlock_irqrestore(&base->lock, flags); raw_spin_lock(&base->lock);
x = data;
If the timer has previously executed on CPU 1 and then CPU 0 can observe
base->running_timer == NULL and returns, assuming the timer has completed,
but it's not guaranteed on all architectures. The comment for
del_timer_sync() makes that guarantee. Moving the assignment under
base->lock prevents this.
For non-RT kernel it's performance wise completely irrelevant whether the
store happens before or after taking the lock. For an RT kernel moving the
store under the lock requires an extra unlock/lock pair in the case that
there is a waiter for the timer, but that's not the end of the world.
Reported-by: syzbot+aa7c2385d46c5eba0b89(a)syzkaller.appspotmail.com
Reported-by: syzbot+abea4558531bae1ba9fe(a)syzkaller.appspotmail.com
Fixes: 030dcdd197d7 ("timers: Prepare support for PREEMPT_RT")
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Tested-by: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Link: https://lore.kernel.org/r/87lfea7gw8.fsf@nanos.tec.linutronix.de
Cc: stable(a)vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Signed-off-by: Daniel Wagner <wagi(a)monom.org>
---
kernel/time/timer.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index b859ecf6424b..603985720f54 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1282,8 +1282,10 @@ static inline void timer_base_unlock_expiry(struct timer_base *base)
static void timer_sync_wait_running(struct timer_base *base)
{
if (atomic_read(&base->timer_waiters)) {
+ raw_spin_unlock_irq(&base->lock);
spin_unlock(&base->expiry_lock);
spin_lock(&base->expiry_lock);
+ raw_spin_lock_irq(&base->lock);
}
}
@@ -1458,14 +1460,14 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head)
if (timer->flags & TIMER_IRQSAFE) {
raw_spin_unlock(&base->lock);
call_timer_fn(timer, fn);
- base->running_timer = NULL;
raw_spin_lock(&base->lock);
+ base->running_timer = NULL;
} else {
raw_spin_unlock_irq(&base->lock);
call_timer_fn(timer, fn);
+ raw_spin_lock_irq(&base->lock);
base->running_timer = NULL;
timer_sync_wait_running(base);
- raw_spin_lock_irq(&base->lock);
}
}
}
--
2.38.0
From: Roberto Sassu <roberto.sassu(a)huawei.com>
Commit f3cc6b25dcc5 ("ima: always measure and audit files in policy") lets
measurement or audit happen even if the file digest cannot be calculated.
As a result, iint->ima_hash could have been allocated despite
ima_collect_measurement() returning an error.
Since ima_hash belongs to a temporary inode metadata structure, declared
at the beginning of __ima_inode_hash(), just add a kfree() call if
ima_collect_measurement() returns an error different from -ENOMEM (in that
case, ima_hash should not have been allocated).
Cc: stable(a)vger.kernel.org
Fixes: 280fe8367b0d ("ima: Always return a file measurement in ima_file_hash()")
Signed-off-by: Roberto Sassu <roberto.sassu(a)huawei.com>
---
security/integrity/ima/ima_main.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 040b03ddc1c7..4a207a3ef7ef 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -542,8 +542,13 @@ static int __ima_inode_hash(struct inode *inode, struct file *file, char *buf,
rc = ima_collect_measurement(&tmp_iint, file, NULL, 0,
ima_hash_algo, NULL);
- if (rc < 0)
+ if (rc < 0) {
+ /* ima_hash could be allocated in case of failure. */
+ if (rc != -ENOMEM)
+ kfree(tmp_iint.ima_hash);
+
return -EOPNOTSUPP;
+ }
iint = &tmp_iint;
mutex_lock(&iint->mutex);
--
2.25.1
From: Kan Liang <kan.liang(a)linux.intel.com>
The intel_pebs_isolation quirk checks both model number and stepping.
Cooper Lake has a different stepping (11) than the other Skylake Xeon.
It cannot benefit from the optimization in commit 9b545c04abd4f
("perf/x86/kvm: Avoid unnecessary work in guest filtering").
Add the stepping of Cooper Lake into the isolation_ucodes[] table.
Signed-off-by: Kan Liang <kan.liang(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
---
arch/x86/events/intel/core.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index d8af75466ee9..dfd2c124cdf8 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4908,6 +4908,7 @@ static const struct x86_cpu_desc isolation_ucodes[] = {
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 5, 0x00000000),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 6, 0x00000000),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 7, 0x00000000),
+ INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 11, 0x00000000),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_L, 3, 0x0000007c),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE, 3, 0x0000007c),
INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 9, 0x0000004e),
--
2.35.1
The patch titled
Subject: mm: hugetlb_vmemmap: include missing linux/moduleparam.h
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-hugetlb_vmemmap-include-missing-linux-moduleparamh.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Vasily Gorbik <gor(a)linux.ibm.com>
Subject: mm: hugetlb_vmemmap: include missing linux/moduleparam.h
Date: Wed, 2 Nov 2022 19:09:17 +0100
The kernel test robot reported build failures with a 'randconfig' on s390:
>> mm/hugetlb_vmemmap.c:421:11: error: a function declaration without a
prototype is deprecated in all versions of C [-Werror,-Wstrict-prototypes]
core_param(hugetlb_free_vmemmap, vmemmap_optimize_enabled, bool, 0);
^
Link: https://lore.kernel.org/linux-mm/202210300751.rG3UDsuc-lkp@intel.com/
Link: https://lkml.kernel.org/r/patch.git-296b83ca939b.your-ad-here.call-01667411…
Fixes: 30152245c63b ("mm: hugetlb_vmemmap: replace early_param() with core_param()")
Signed-off-by: Vasily Gorbik <gor(a)linux.ibm.com>
Reported-by: kernel test robot <lkp(a)intel.com>
Cc: Gerald Schaefer <gerald.schaefer(a)linux.ibm.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Muchun Song <songmuchun(a)bytedance.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb_vmemmap.c | 1 +
1 file changed, 1 insertion(+)
--- a/mm/hugetlb_vmemmap.c~mm-hugetlb_vmemmap-include-missing-linux-moduleparamh
+++ a/mm/hugetlb_vmemmap.c
@@ -11,6 +11,7 @@
#define pr_fmt(fmt) "HugeTLB: " fmt
#include <linux/pgtable.h>
+#include <linux/moduleparam.h>
#include <linux/bootmem_info.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
_
Patches currently in -mm which might be from gor(a)linux.ibm.com are
mm-hugetlb_vmemmap-include-missing-linux-moduleparamh.patch
The patch titled
Subject: mm/shmem: use page_mapping() to detect page cache for uffd continue
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-shmem-use-page_mapping-to-detect-page-cache-for-uffd-continue.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Peter Xu <peterx(a)redhat.com>
Subject: mm/shmem: use page_mapping() to detect page cache for uffd continue
Date: Wed, 2 Nov 2022 14:41:52 -0400
mfill_atomic_install_pte() checks page->mapping to detect whether one page
is used in the page cache. However as pointed out by Matthew, the page
can logically be a tail page rather than always the head in the case of
uffd minor mode with UFFDIO_CONTINUE. It means we could wrongly install
one pte with shmem thp tail page assuming it's an anonymous page.
It's not that clear even for anonymous page, since normally anonymous
pages also have page->mapping being setup with the anon vma. It's safe
here only because the only such caller to mfill_atomic_install_pte() is
always passing in a newly allocated page (mcopy_atomic_pte()), whose
page->mapping is not yet setup. However that's not extremely obvious
either.
For either of above, use page_mapping() instead.
Link: https://lkml.kernel.org/r/Y2K+y7wnhC4vbnP2@x1n
Fixes: 153132571f02 ("userfaultfd/shmem: support UFFDIO_CONTINUE for shmem")
Signed-off-by: Peter Xu <peterx(a)redhat.com>
Reported-by: Matthew Wilcox <willy(a)infradead.org>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Axel Rasmussen <axelrasmussen(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/userfaultfd.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/userfaultfd.c~mm-shmem-use-page_mapping-to-detect-page-cache-for-uffd-continue
+++ a/mm/userfaultfd.c
@@ -64,7 +64,7 @@ int mfill_atomic_install_pte(struct mm_s
pte_t _dst_pte, *dst_pte;
bool writable = dst_vma->vm_flags & VM_WRITE;
bool vm_shared = dst_vma->vm_flags & VM_SHARED;
- bool page_in_cache = page->mapping;
+ bool page_in_cache = page_mapping(page);
spinlock_t *ptl;
struct inode *inode;
pgoff_t offset, max_off;
_
Patches currently in -mm which might be from peterx(a)redhat.com are
partly-revert-mm-thp-carry-over-dirty-bit-when-thp-splits-on-pmd.patch
mm-shmem-use-page_mapping-to-detect-page-cache-for-uffd-continue.patch
selftests-vm-use-memfd-for-uffd-hugetlb-tests.patch
selftests-vm-use-memfd-for-hugetlb-madvise-test.patch
selftests-vm-use-memfd-for-hugepage-mremap-test.patch
selftests-vm-drop-mnt-point-for-hugetlb-in-run_vmtestssh.patch
mm-hugetlb-unify-clearing-of-restorereserve-for-private-pages.patch
revert-mm-uffd-fix-warning-without-pte_marker_uffd_wp-compiled-in.patch
The commit 3c52c6bb831f (tcp/udp: Fix memory leak in
ipv6_renew_options()) fixes a memory leak reported by syzbot. This seems
to be a good candidate for the stable trees. This patch didn't apply cleanly
in 5.4 kernel, since release_sock() calls are changed to
sockopt_release_sock() in the latest kernel versions.
Kuniyuki Iwashima (1):
tcp/udp: Fix memory leak in ipv6_renew_options().
net/ipv6/ipv6_sockglue.c | 7 +++++++
1 file changed, 7 insertions(+)
--
2.38.1.273.g43a17bfeac-goog
From: "Jason A. Donenfeld" <Jason(a)zx2c4.com>
[ Upstream commit a7c01fa93aeb03ab76cd3cb2107990dd160498e6 ]
I was recently surprised to learn that msleep_interruptible(),
wait_for_completion_interruptible_timeout(), and related functions
simply hung when I called kthread_stop() on kthreads using them. The
solution to fixing the case with msleep_interruptible() was more simply
to move to schedule_timeout_interruptible(). Why?
The reason is that msleep_interruptible(), and many functions just like
it, has a loop like this:
while (timeout && !signal_pending(current))
timeout = schedule_timeout_interruptible(timeout);
The call to kthread_stop() woke up the thread, so schedule_timeout_
interruptible() returned early, but because signal_pending() returned
true, it went back into another timeout, which was never woken up.
This wait loop pattern is common to various pieces of code, and I
suspect that the subtle misuse in a kthread that caused a deadlock in
the code I looked at last week is also found elsewhere.
So this commit causes signal_pending() to return true when
kthread_stop() is called, by setting TIF_NOTIFY_SIGNAL.
The same also probably applies to the similar kthread_park()
functionality, but that can be addressed later, as its semantics are
slightly different.
Cc: Eric W. Biederman <ebiederm(a)xmission.com>
Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
v1: https://lkml.kernel.org/r/20220627120020.608117-1-Jason@zx2c4.com
v2: https://lkml.kernel.org/r/20220627145716.641185-1-Jason@zx2c4.com
v3: https://lkml.kernel.org/r/20220628161441.892925-1-Jason@zx2c4.com
v4: https://lkml.kernel.org/r/20220711202136.64458-1-Jason@zx2c4.com
v5: https://lkml.kernel.org/r/20220711232123.136330-1-Jason@zx2c4.com
Signed-off-by: Eric W. Biederman <ebiederm(a)xmission.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
kernel/kthread.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 3c677918d8f2..7243a010f433 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -704,6 +704,7 @@ int kthread_stop(struct task_struct *k)
kthread = to_kthread(k);
set_bit(KTHREAD_SHOULD_STOP, &kthread->flags);
kthread_unpark(k);
+ set_tsk_thread_flag(k, TIF_NOTIFY_SIGNAL);
wake_up_process(k);
wait_for_completion(&kthread->exited);
ret = kthread->result;
--
2.35.1
From: "Jason A. Donenfeld" <Jason(a)zx2c4.com>
[ Upstream commit a7c01fa93aeb03ab76cd3cb2107990dd160498e6 ]
I was recently surprised to learn that msleep_interruptible(),
wait_for_completion_interruptible_timeout(), and related functions
simply hung when I called kthread_stop() on kthreads using them. The
solution to fixing the case with msleep_interruptible() was more simply
to move to schedule_timeout_interruptible(). Why?
The reason is that msleep_interruptible(), and many functions just like
it, has a loop like this:
while (timeout && !signal_pending(current))
timeout = schedule_timeout_interruptible(timeout);
The call to kthread_stop() woke up the thread, so schedule_timeout_
interruptible() returned early, but because signal_pending() returned
true, it went back into another timeout, which was never woken up.
This wait loop pattern is common to various pieces of code, and I
suspect that the subtle misuse in a kthread that caused a deadlock in
the code I looked at last week is also found elsewhere.
So this commit causes signal_pending() to return true when
kthread_stop() is called, by setting TIF_NOTIFY_SIGNAL.
The same also probably applies to the similar kthread_park()
functionality, but that can be addressed later, as its semantics are
slightly different.
Cc: Eric W. Biederman <ebiederm(a)xmission.com>
Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
v1: https://lkml.kernel.org/r/20220627120020.608117-1-Jason@zx2c4.com
v2: https://lkml.kernel.org/r/20220627145716.641185-1-Jason@zx2c4.com
v3: https://lkml.kernel.org/r/20220628161441.892925-1-Jason@zx2c4.com
v4: https://lkml.kernel.org/r/20220711202136.64458-1-Jason@zx2c4.com
v5: https://lkml.kernel.org/r/20220711232123.136330-1-Jason@zx2c4.com
Signed-off-by: Eric W. Biederman <ebiederm(a)xmission.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
kernel/kthread.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 3c677918d8f2..7243a010f433 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -704,6 +704,7 @@ int kthread_stop(struct task_struct *k)
kthread = to_kthread(k);
set_bit(KTHREAD_SHOULD_STOP, &kthread->flags);
kthread_unpark(k);
+ set_tsk_thread_flag(k, TIF_NOTIFY_SIGNAL);
wake_up_process(k);
wait_for_completion(&kthread->exited);
ret = kthread->result;
--
2.35.1
Dear ,
Please can I have your attention and possibly help me for humanity's
sake please. I am writing this message with a heavy heart filled with
sorrows and sadness.
Please if you can respond, i have an issue that i will be most
grateful if you could help me deal with it please.
Susan
Change 3222717 by automation@vsergiienko-flipday-internal-rtd1395-nemo on 2022/11/02 14:06:37
commit 1a55116834aad9a6fe7d3ea03e4a6998150eb2fb
Author: Manjur Khan <mokhan(a)roku.com>
Date: Mon Oct 31 13:20:42 2022 -0700
REALTEK-9066 credit-rng to Nemo to fix FW-133531 by improving entropy at boot for WPA3 TLS OpenSSL+, patch-2 applied.
From git@gitlab-partner.tools.roku.com:rtd1315x-stark/stark.git
From bf8a4000ed271b6772ec4562fa3309ed07bdaf0b
From: Dominik Brodowski <linux(a)dominikbrodowski.net>
Date: Wed, 29 Dec 2021 22:10:03 +0100
Subject: [PATCH] random: fix crash on multiple early calls to
add_bootloader_randomness()
Currently, if CONFIG_RANDOM_TRUST_BOOTLOADER is enabled, multiple calls
to add_bootloader_randomness() are broken and can cause a NULL pointer
dereference, as noted by Ivan T. Ivanov. This is not only a hypothetical
problem, as qemu on arm64 may provide bootloader entropy via EFI and via
devicetree.
On the first call to add_hwgenerator_randomness(), crng_fast_load() is
executed, and if the seed is long enough, crng_init will be set to 1.
On subsequent calls to add_bootloader_randomness() and then to
add_hwgenerator_randomness(), crng_fast_load() will be skipped. Instead,
wait_event_interruptible() and then credit_entropy_bits() will be
called.
If the entropy count for that second seed is large enough, that proceeds
to crng_reseed().
However, both wait_event_interruptible() and crng_reseed() depends
(at least in numa_crng_init()) on workqueues. Therefore, test whether
system_wq is already initialized, which is a sufficient indicator that
workqueue_init_early() has progressed far enough.
If we wind up hitting the !system_wq case, we later want to do what
would have been done there when wqs are up, so set a flag, and do that
work later from the rand_initialize() call.
Reported-by: Ivan T. Ivanov <iivanov(a)suse.de>
Fixes: 18b915ac6b0a ("efi/random: Treat EFI_RNG_PROTOCOL output as
bootloader randomness")
Cc: stable(a)vger.kernel.org
Signed-off-by: Dominik Brodowski <linux(a)dominikbrodowski.net>
[Jason: added crng_need_done state and related logic.]
Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
Affected files ...
.. //depot/firmware/release/main/port/realtek/rtd1395/platform/software_phoenix/linux-kernel/drivers/char/random.c#3 edit
Differences ...
==== //depot/firmware/release/main/port/realtek/rtd1395/platform/software_phoenix/linux-kernel/drivers/char/random.c#3 (text) ====
@@ -432,6 +432,7 @@
* its value (from 0->1->2).
*/
static int crng_init = 0;
+static bool crng_need_final_init = false;
#define crng_ready() (likely(crng_init > 1))
static int crng_init_cnt = 0;
static unsigned long crng_global_init_time = 0;
@@ -860,6 +861,36 @@
static void numa_crng_init(void) {}
#endif
+static void crng_finalize_init(struct crng_state *crng)
+{
+ if (crng != &primary_crng || crng_init >= 2)
+ return;
+ if (!system_wq) {
+ /* We can't call numa_crng_init until we have workqueues,
+ * so mark this for processing later. */
+ crng_need_final_init = true;
+ return;
+ }
+
+ numa_crng_init();
+ crng_init = 2;
+ process_random_ready_list();
+ wake_up_interruptible(&crng_init_wait);
+ pr_notice("random: crng init done\n");
+ if (unseeded_warning.missed) {
+ pr_notice("random: %d get_random_xx warning(s) missed "
+ "due to ratelimiting\n",
+ unseeded_warning.missed);
+ unseeded_warning.missed = 0;
+ }
+ if (urandom_warning.missed) {
+ pr_notice("random: %d urandom warning(s) missed "
+ "due to ratelimiting\n",
+ urandom_warning.missed);
+ urandom_warning.missed = 0;
+ }
+}
+
static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
{
unsigned long flags;
@@ -888,25 +919,7 @@
}
memzero_explicit(&buf, sizeof(buf));
crng->init_time = jiffies;
- if (crng == &primary_crng && crng_init < 2) {
- numa_crng_init();
- crng_init = 2;
- process_random_ready_list();
- wake_up_interruptible(&crng_init_wait);
- pr_notice("random: crng init done\n");
- if (unseeded_warning.missed) {
- pr_notice("random: %d get_random_xx warning(s) missed "
- "due to ratelimiting\n",
- unseeded_warning.missed);
- unseeded_warning.missed = 0;
- }
- if (urandom_warning.missed) {
- pr_notice("random: %d urandom warning(s) missed "
- "due to ratelimiting\n",
- urandom_warning.missed);
- urandom_warning.missed = 0;
- }
- }
+ crng_finalize_init(crng);
spin_unlock_irqrestore(&crng->lock, flags);
}
@@ -1717,6 +1730,8 @@
{
init_std_data(&input_pool);
init_std_data(&blocking_pool);
+ if (crng_need_final_init)
+ crng_finalize_init(&primary_crng);
crng_initialize(&primary_crng);
crng_global_init_time = jiffies;
if (ratelimit_disable) {
@@ -2216,7 +2231,8 @@
* We'll be woken up again once below random_write_wakeup_thresh,
* or when the calling thread is about to terminate.
*/
- wait_event_interruptible(random_write_wait, kthread_should_stop() ||
+ wait_event_interruptible(random_write_wait,
+ !system_wq || kthread_should_stop() ||
ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits);
mix_pool_bytes(poolp, buffer, count);
credit_entropy_bits(poolp, entropy);
From: Luca Ceresoli <luca.ceresoli(a)bootlin.com>
chan->mipi takes the return value of tegra_mipi_request() which can be a
valid pointer or an error. However chan->mipi is checked in several places,
including error-cleanup code in tegra_csi_channels_cleanup(), as 'if
(chan->mipi)', which suggests the initial intent was that chan->mipi should
be either NULL or a valid pointer, never an error. As a consequence,
cleanup code in case of tegra_mipi_request() errors would dereference an
invalid pointer.
Fix by ensuring chan->mipi always contains either NULL or a void pointer.
Also add that to the documentation.
Fixes: 523c857e34ce ("media: tegra-video: Add CSI MIPI pads calibration")
Cc: stable(a)vger.kernel.org
Reported-by: Dan Carpenter <dan.carpenter(a)oracle.com>
Signed-off-by: Luca Ceresoli <luca.ceresoli(a)bootlin.com>
---
This patch was added in v2.
---
drivers/staging/media/tegra-video/csi.c | 1 +
drivers/staging/media/tegra-video/csi.h | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/staging/media/tegra-video/csi.c b/drivers/staging/media/tegra-video/csi.c
index b26e44adb2be..6b59ef55c525 100644
--- a/drivers/staging/media/tegra-video/csi.c
+++ b/drivers/staging/media/tegra-video/csi.c
@@ -448,6 +448,7 @@ static int tegra_csi_channel_alloc(struct tegra_csi *csi,
chan->mipi = tegra_mipi_request(csi->dev, node);
if (IS_ERR(chan->mipi)) {
ret = PTR_ERR(chan->mipi);
+ chan->mipi = NULL;
dev_err(csi->dev, "failed to get mipi device: %d\n", ret);
}
diff --git a/drivers/staging/media/tegra-video/csi.h b/drivers/staging/media/tegra-video/csi.h
index 4ee05a1785cf..6960ea2e3d36 100644
--- a/drivers/staging/media/tegra-video/csi.h
+++ b/drivers/staging/media/tegra-video/csi.h
@@ -56,7 +56,7 @@ struct tegra_csi;
* @framerate: active framerate for TPG
* @h_blank: horizontal blanking for TPG active format
* @v_blank: vertical blanking for TPG active format
- * @mipi: mipi device for corresponding csi channel pads
+ * @mipi: mipi device for corresponding csi channel pads, or NULL if not applicable (TPG, error)
* @pixel_rate: active pixel rate from the sensor on this channel
*/
struct tegra_csi_channel {
--
2.34.1
The `char` type with no explicit sign is sometimes signed and sometimes
unsigned. This code will break on platforms such as arm, where char is
unsigned. So mark it here as explicitly signed, so that the
todrop_counter decrement and subsequent comparison is correct.
Cc: Pablo Neira Ayuso <pablo(a)netfilter.org>
Cc: Julian Anastasov <ja(a)ssi.bg>
Cc: Simon Horman <horms(a)verge.net.au>
Cc: stable(a)vger.kernel.org
Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
---
net/netfilter/ipvs/ip_vs_conn.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 8c04bb57dd6f..7c4866c04343 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1249,40 +1249,40 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
.next = ip_vs_conn_seq_next,
.stop = ip_vs_conn_seq_stop,
.show = ip_vs_conn_sync_seq_show,
};
#endif
/* Randomly drop connection entries before running out of memory
* Can be used for DATA and CTL conns. For TPL conns there are exceptions:
* - traffic for services in OPS mode increases ct->in_pkts, so it is supported
* - traffic for services not in OPS mode does not increase ct->in_pkts in
* all cases, so it is not supported
*/
static inline int todrop_entry(struct ip_vs_conn *cp)
{
/*
* The drop rate array needs tuning for real environments.
* Called from timer bh only => no locking
*/
- static const char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
- static char todrop_counter[9] = {0};
+ static const signed char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
+ static signed char todrop_counter[9] = {0};
int i;
/* if the conn entry hasn't lasted for 60 seconds, don't drop it.
This will leave enough time for normal connection to get
through. */
if (time_before(cp->timeout + jiffies, cp->timer.expires + 60*HZ))
return 0;
/* Don't drop the entry if its number of incoming packets is not
located in [0, 8] */
i = atomic_read(&cp->in_pkts);
if (i > 8 || i < 0) return 0;
if (!todrop_rate[i]) return 0;
if (--todrop_counter[i] > 0) return 0;
todrop_counter[i] = todrop_rate[i];
return 1;
}
--
2.38.1
The current wording on third option of stable kernel submission doesn't
mention how to specify desired kernel version. Submitters reading the
documentation could simply send multiple backported patches of the same
upstream commit without any kernel version information, leaving stable
maintainers and reviewers hard time to figure out the correct kernel
version to be applied.
Describe the subject prefix for specifying kernel version for the case
above.
Cc: stable(a)vger.kernel.org
Signed-off-by: Bagas Sanjaya <bagasdotme(a)gmail.com>
---
This patch is sent as response to [1].
[1]: https://lore.kernel.org/stable/20221101074351.GA8310@amd/
Documentation/process/stable-kernel-rules.rst | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/Documentation/process/stable-kernel-rules.rst b/Documentation/process/stable-kernel-rules.rst
index 2fd8aa593a2851..409ae73c1ffcd1 100644
--- a/Documentation/process/stable-kernel-rules.rst
+++ b/Documentation/process/stable-kernel-rules.rst
@@ -77,7 +77,9 @@ Option 3
Send the patch, after verifying that it follows the above rules, to
stable(a)vger.kernel.org. You must note the upstream commit ID in the
changelog of your submission, as well as the kernel version you wish
-it to be applied to.
+it to be applied to by adding desired kernel version number to the
+patch subject prefix. For example, patches targeting 5.15 kernel should
+have ``[PATCH 5.15]`` prefix.
:ref:`option_1` is **strongly** preferred, is the easiest and most common.
:ref:`option_2` and :ref:`option_3` are more useful if the patch isn't deemed
base-commit: 30a0b95b1335e12efef89dd78518ed3e4a71a763
--
An old man doll... just what I always wanted! - Clara
The patch titled
Subject: hugetlb: don't delete vma_lock in hugetlb MADV_DONTNEED processing
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
hugetlb-dont-delete-vma_lock-in-hugetlb-madv_dontneed-processing.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Mike Kravetz <mike.kravetz(a)oracle.com>
Subject: hugetlb: don't delete vma_lock in hugetlb MADV_DONTNEED processing
Date: Tue, 1 Nov 2022 18:31:00 -0700
madvise(MADV_DONTNEED) ends up calling zap_page_range() to clear page
tables associated with the address range. For hugetlb vmas,
zap_page_range will call __unmap_hugepage_range_final. However,
__unmap_hugepage_range_final assumes the passed vma is about to be removed
and deletes the vma_lock to prevent pmd sharing as the vma is on the way
out. In the case of madvise(MADV_DONTNEED) the vma remains, but the
missing vma_lock prevents pmd sharing and could potentially lead to issues
with truncation/fault races.
This issue was originally reported here [1] as a BUG triggered in
page_try_dup_anon_rmap. Prior to the introduction of the hugetlb
vma_lock, __unmap_hugepage_range_final cleared the VM_MAYSHARE flag to
prevent pmd sharing. Subsequent faults on this vma were confused as
VM_MAYSHARE indicates a sharable vma, but was not set so page_mapping was
not set in new pages added to the page table. This resulted in pages that
appeared anonymous in a VM_SHARED vma and triggered the BUG.
Address issue by:
- Add a new zap flag ZAP_FLAG_UNMAP to indicate an unmap call from
unmap_vmas(). This is used to indicate the 'final' unmapping of a vma.
When called via MADV_DONTNEED, this flag is not set and the vm_lock is
not deleted.
- mmu notification is removed from __unmap_hugepage_range to avoid
duplication, and notification is added to the other calling routine
(unmap_hugepage_range).
- notification calls are updated in zap_page range to take into account
the possibility of multiple vmas.
[1] https://lore.kernel.org/lkml/CAO4mrfdLMXsao9RF4fUE8-Wfde8xmjsKrTNMNC9wjUb6J…
Link: https://lkml.kernel.org/r/20221102013100.455139-1-mike.kravetz@oracle.com
Fixes: 90e7e7f5ef3f ("mm: enable MADV_DONTNEED for hugetlb mappings")
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Reported-by: Wei Chen <harperchen1110(a)gmail.com>
Cc: Axel Rasmussen <axelrasmussen(a)google.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Mina Almasry <almasrymina(a)google.com>
Cc: Nadav Amit <nadav.amit(a)gmail.com>
Cc: Naoya Horiguchi <naoya.horiguchi(a)linux.dev>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: Rik van Riel <riel(a)surriel.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/mm.h | 3 ++
mm/hugetlb.c | 45 +++++++++++++++++++++++++------------------
mm/memory.c | 21 ++++++++++++++------
3 files changed, 45 insertions(+), 24 deletions(-)
--- a/include/linux/mm.h~hugetlb-dont-delete-vma_lock-in-hugetlb-madv_dontneed-processing
+++ a/include/linux/mm.h
@@ -3475,4 +3475,7 @@ madvise_set_anon_name(struct mm_struct *
*/
#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0))
+/* Set in unmap_vmas() to indicate an unmap call. Only used by hugetlb */
+#define ZAP_FLAG_UNMAP ((__force zap_flags_t) BIT(1))
+
#endif /* _LINUX_MM_H */
--- a/mm/hugetlb.c~hugetlb-dont-delete-vma_lock-in-hugetlb-madv_dontneed-processing
+++ a/mm/hugetlb.c
@@ -5064,7 +5064,6 @@ static void __unmap_hugepage_range(struc
struct page *page;
struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h);
- struct mmu_notifier_range range;
unsigned long last_addr_mask;
bool force_flush = false;
@@ -5079,13 +5078,6 @@ static void __unmap_hugepage_range(struc
tlb_change_page_size(tlb, sz);
tlb_start_vma(tlb, vma);
- /*
- * If sharing possible, alert mmu notifiers of worst case.
- */
- mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, start,
- end);
- adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
- mmu_notifier_invalidate_range_start(&range);
last_addr_mask = hugetlb_mask_last_page(h);
address = start;
for (; address < end; address += sz) {
@@ -5174,7 +5166,6 @@ static void __unmap_hugepage_range(struc
if (ref_page)
break;
}
- mmu_notifier_invalidate_range_end(&range);
tlb_end_vma(tlb, vma);
/*
@@ -5199,32 +5190,50 @@ void __unmap_hugepage_range_final(struct
unsigned long end, struct page *ref_page,
zap_flags_t zap_flags)
{
+ bool final = zap_flags & ZAP_FLAG_UNMAP;
+
hugetlb_vma_lock_write(vma);
i_mmap_lock_write(vma->vm_file->f_mapping);
__unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags);
/*
- * Unlock and free the vma lock before releasing i_mmap_rwsem. When
- * the vma_lock is freed, this makes the vma ineligible for pmd
- * sharing. And, i_mmap_rwsem is required to set up pmd sharing.
- * This is important as page tables for this unmapped range will
- * be asynchrously deleted. If the page tables are shared, there
- * will be issues when accessed by someone else.
+ * When called via zap_page_range (MADV_DONTNEED), this is not the
+ * final unmap of the vma, and we do not want to delete the vma_lock.
*/
- __hugetlb_vma_unlock_write_free(vma);
-
- i_mmap_unlock_write(vma->vm_file->f_mapping);
+ if (final) {
+ /*
+ * Unlock and free the vma lock before releasing i_mmap_rwsem.
+ * When the vma_lock is freed, this makes the vma ineligible
+ * for pmd sharing. And, i_mmap_rwsem is required to set up
+ * pmd sharing. This is important as page tables for this
+ * unmapped range will be asynchrously deleted. If the page
+ * tables are shared, there will be issues when accessed by
+ * someone else.
+ */
+ __hugetlb_vma_unlock_write_free(vma);
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+ } else {
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+ hugetlb_vma_unlock_write(vma);
+ }
}
void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end, struct page *ref_page,
zap_flags_t zap_flags)
{
+ struct mmu_notifier_range range;
struct mmu_gather tlb;
+ mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
+ start, end);
+ adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
tlb_gather_mmu(&tlb, vma->vm_mm);
+
__unmap_hugepage_range(&tlb, vma, start, end, ref_page, zap_flags);
+
+ mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb);
}
--- a/mm/memory.c~hugetlb-dont-delete-vma_lock-in-hugetlb-madv_dontneed-processing
+++ a/mm/memory.c
@@ -1720,7 +1720,7 @@ void unmap_vmas(struct mmu_gather *tlb,
{
struct mmu_notifier_range range;
struct zap_details details = {
- .zap_flags = ZAP_FLAG_DROP_MARKER,
+ .zap_flags = ZAP_FLAG_DROP_MARKER | ZAP_FLAG_UNMAP,
/* Careful - we need to zap private pages too! */
.even_cows = true,
};
@@ -1753,15 +1753,24 @@ void zap_page_range(struct vm_area_struc
MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
lru_add_drain();
- mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
- start, start + size);
tlb_gather_mmu(&tlb, vma->vm_mm);
update_hiwater_rss(vma->vm_mm);
- mmu_notifier_invalidate_range_start(&range);
do {
- unmap_single_vma(&tlb, vma, start, range.end, NULL);
+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma,
+ vma->vm_mm,
+ max(start, vma->vm_start),
+ min(end, vma->vm_end));
+ if (is_vm_hugetlb_page(vma))
+ adjust_range_if_pmd_sharing_possible(vma,
+ &range.start, &range.end);
+ mmu_notifier_invalidate_range_start(&range);
+ /*
+ * unmap 'start-end' not 'range.start-range.end' as range
+ * could have been expanded for pmd sharing.
+ */
+ unmap_single_vma(&tlb, vma, start, end, NULL);
+ mmu_notifier_invalidate_range_end(&range);
} while ((vma = mas_find(&mas, end - 1)) != NULL);
- mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb);
}
_
Patches currently in -mm which might be from mike.kravetz(a)oracle.com are
hugetlb-dont-delete-vma_lock-in-hugetlb-madv_dontneed-processing.patch
hugetlb-simplify-hugetlb-handling-in-follow_page_mask.patch
hugetlb-simplify-hugetlb-handling-in-follow_page_mask-v4.patch
hugetlb-simplify-hugetlb-handling-in-follow_page_mask-v5.patch
The commit 3c52c6bb831f (tcp/udp: Fix memory leak in
ipv6_renew_options()) fixes a memory leak reported by syzbot. This seems
to be a good candidate for the stable trees. This patch didn't apply cleanly
in 5.15 kernel, since release_sock() calls are changed to
sockopt_release_sock() in the latest kernel versions.
Kuniyuki Iwashima (1):
tcp/udp: Fix memory leak in ipv6_renew_options().
net/ipv6/ipv6_sockglue.c | 7 +++++++
1 file changed, 7 insertions(+)
--
2.38.1.273.g43a17bfeac-goog
From: Lino Sanfilippo <LinoSanfilippo(a)gmx.de>
Several drivers that support setting the RS485 configuration via userspace
implement one or more of the following tasks:
- in case of an invalid RTS configuration (both RTS after send and RTS on
send set or both unset) fall back to enable RTS on send and disable RTS
after send
- nullify the padding field of the returned serial_rs485 struct
- copy the configuration into the uart port struct
- limit RTS delays to 100 ms
Move these tasks into the serial core to make them generic and to provide
a consistent behaviour among all drivers.
[ Upstream commit 0ed12afa5655512ee418047fb3546d229df20aa1 ]
Link: https://lkml.kernel.org/r/20221017051737.51727-1-dominique.martinet@atmark-…
Signed-off-by: Lino Sanfilippo <LinoSanfilippo(a)gmx.de>
Link: https://lore.kernel.org/r/20220410104642.32195-2-LinoSanfilippo@gmx.de
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Daisuke Mizobuchi <mizo(a)atmark-techno.com>
Signed-off-by: Dominique Martinet <dominique.martinet(a)atmark-techno.com>
---
5.15 version of the 5.10 backport:
https://lkml.kernel.org/r/20221017051737.51727-1-dominique.martinet@atmark-…
(only build tested)
drivers/tty/serial/serial_core.c | 33 ++++++++++++++++++++++++++++++++
1 file changed, 33 insertions(+)
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 82ddbb92d07d..48dafd1e084b 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -42,6 +42,11 @@ static struct lock_class_key port_lock_key;
#define HIGH_BITS_OFFSET ((sizeof(long)-sizeof(int))*8)
+/*
+ * Max time with active RTS before/after data is sent.
+ */
+#define RS485_MAX_RTS_DELAY 100 /* msecs */
+
static void uart_change_speed(struct tty_struct *tty, struct uart_state *state,
struct ktermios *old_termios);
static void uart_wait_until_sent(struct tty_struct *tty, int timeout);
@@ -1299,8 +1304,36 @@ static int uart_set_rs485_config(struct uart_port *port,
if (copy_from_user(&rs485, rs485_user, sizeof(*rs485_user)))
return -EFAULT;
+ /* pick sane settings if the user hasn't */
+ if (!(rs485.flags & SER_RS485_RTS_ON_SEND) ==
+ !(rs485.flags & SER_RS485_RTS_AFTER_SEND)) {
+ dev_warn_ratelimited(port->dev,
+ "%s (%d): invalid RTS setting, using RTS_ON_SEND instead\n",
+ port->name, port->line);
+ rs485.flags |= SER_RS485_RTS_ON_SEND;
+ rs485.flags &= ~SER_RS485_RTS_AFTER_SEND;
+ }
+
+ if (rs485.delay_rts_before_send > RS485_MAX_RTS_DELAY) {
+ rs485.delay_rts_before_send = RS485_MAX_RTS_DELAY;
+ dev_warn_ratelimited(port->dev,
+ "%s (%d): RTS delay before sending clamped to %u ms\n",
+ port->name, port->line, rs485.delay_rts_before_send);
+ }
+
+ if (rs485.delay_rts_after_send > RS485_MAX_RTS_DELAY) {
+ rs485.delay_rts_after_send = RS485_MAX_RTS_DELAY;
+ dev_warn_ratelimited(port->dev,
+ "%s (%d): RTS delay after sending clamped to %u ms\n",
+ port->name, port->line, rs485.delay_rts_after_send);
+ }
+ /* Return clean padding area to userspace */
+ memset(rs485.padding, 0, sizeof(rs485.padding));
+
spin_lock_irqsave(&port->lock, flags);
ret = port->rs485_config(port, &rs485);
+ if (!ret)
+ port->rs485 = rs485;
spin_unlock_irqrestore(&port->lock, flags);
if (ret)
return ret;
--
2.35.1
commit 702de2c21eed04c67cefaaedc248ef16e5f6b293 upstream.
We are seeing an IRQ storm on the global receive IRQ line under heavy
CAN bus load conditions with both CAN channels enabled.
Conditions:
The global receive IRQ line is shared between can0 and can1, either of
the channels can trigger interrupt while the other channel's IRQ line
is disabled (RFIE).
When global a receive IRQ interrupt occurs, we mask the interrupt in
the IRQ handler. Clearing and unmasking of the interrupt is happening
in rx_poll(). There is a race condition where rx_poll() unmasks the
interrupt, but the next IRQ handler does not mask the IRQ due to
NAPIF_STATE_MISSED flag (e.g.: can0 RX FIFO interrupt is disabled and
can1 is triggering RX interrupt, the delay in rx_poll() processing
results in setting NAPIF_STATE_MISSED flag) leading to an IRQ storm.
This patch fixes the issue by checking IRQ active and enabled before
handling the IRQ on a particular channel.
Fixes: dd3bd23eb438 ("can: rcar_canfd: Add Renesas R-Car CAN FD driver")
Suggested-by: Marc Kleine-Budde <mkl(a)pengutronix.de>
Signed-off-by: Biju Das <biju.das.jz(a)bp.renesas.com>
Link: https://lore.kernel.org/all/20221025155657.1426948-2-biju.das.jz@bp.renesas…
Cc: stable(a)vger.kernel.org # 4.9.x
[mkl: adjust commit message]
Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de>
[biju: removed gpriv from RCANFD_RFCC_RFIE macro]
Signed-off-by: Biju Das <biju.das.jz(a)bp.renesas.com>
---
Resending to 4.9 with confilcts[1] fixed
[1] https://lore.kernel.org/stable/OS0PR01MB59226F2443DFCE7C5D73778786379@OS0PR…
---
drivers/net/can/rcar/rcar_canfd.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c
index a127c853a4e9..694a3354554f 100644
--- a/drivers/net/can/rcar/rcar_canfd.c
+++ b/drivers/net/can/rcar/rcar_canfd.c
@@ -1079,7 +1079,7 @@ static irqreturn_t rcar_canfd_global_interrupt(int irq, void *dev_id)
struct rcar_canfd_global *gpriv = dev_id;
struct net_device *ndev;
struct rcar_canfd_channel *priv;
- u32 sts, gerfl;
+ u32 sts, cc, gerfl;
u32 ch, ridx;
/* Global error interrupts still indicate a condition specific
@@ -1097,7 +1097,9 @@ static irqreturn_t rcar_canfd_global_interrupt(int irq, void *dev_id)
/* Handle Rx interrupts */
sts = rcar_canfd_read(priv->base, RCANFD_RFSTS(ridx));
- if (likely(sts & RCANFD_RFSTS_RFIF)) {
+ cc = rcar_canfd_read(priv->base, RCANFD_RFCC(ridx));
+ if (likely(sts & RCANFD_RFSTS_RFIF &&
+ cc & RCANFD_RFCC_RFIE)) {
if (napi_schedule_prep(&priv->napi)) {
/* Disable Rx FIFO interrupts */
rcar_canfd_clear_bit(priv->base,
--
2.25.1
madvise(MADV_DONTNEED) ends up calling zap_page_range() to clear page
tables associated with the address range. For hugetlb vmas,
zap_page_range will call __unmap_hugepage_range_final. However,
__unmap_hugepage_range_final assumes the passed vma is about to be removed
and deletes the vma_lock to prevent pmd sharing as the vma is on the way
out. In the case of madvise(MADV_DONTNEED) the vma remains, but the
missing vma_lock prevents pmd sharing and could potentially lead to issues
with truncation/fault races.
This issue was originally reported here [1] as a BUG triggered in
page_try_dup_anon_rmap. Prior to the introduction of the hugetlb
vma_lock, __unmap_hugepage_range_final cleared the VM_MAYSHARE flag to
prevent pmd sharing. Subsequent faults on this vma were confused as
VM_MAYSHARE indicates a sharable vma, but was not set so page_mapping was
not set in new pages added to the page table. This resulted in pages that
appeared anonymous in a VM_SHARED vma and triggered the BUG.
Address issue by:
- Add a new zap flag ZAP_FLAG_UNMAP to indicate an unmap call from
unmap_vmas(). This is used to indicate the 'final' unmapping of a vma.
When called via MADV_DONTNEED, this flag is not set and the vm_lock is
not deleted.
- mmu notification is removed from __unmap_hugepage_range to avoid
duplication, and notification is added to the other calling routine
(unmap_hugepage_range).
- notification calls are updated in zap_page range to take into account
the possibility of multiple vmas.
[1] https://lore.kernel.org/lkml/CAO4mrfdLMXsao9RF4fUE8-Wfde8xmjsKrTNMNC9wjUb6J…
Fixes: 90e7e7f5ef3f ("mm: enable MADV_DONTNEED for hugetlb mappings")
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Reported-by: Wei Chen <harperchen1110(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
---
include/linux/mm.h | 3 +++
mm/hugetlb.c | 45 +++++++++++++++++++++++++++------------------
mm/memory.c | 21 +++++++++++++++------
3 files changed, 45 insertions(+), 24 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8bbcccbc5565..b19d65c36d14 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3475,4 +3475,7 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
*/
#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0))
+/* Set in unmap_vmas() to indicate an unmap call. Only used by hugetlb */
+#define ZAP_FLAG_UNMAP ((__force zap_flags_t) BIT(1))
+
#endif /* _LINUX_MM_H */
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 546df97c31e4..4699889f11e9 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5064,7 +5064,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
struct page *page;
struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h);
- struct mmu_notifier_range range;
unsigned long last_addr_mask;
bool force_flush = false;
@@ -5079,13 +5078,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
tlb_change_page_size(tlb, sz);
tlb_start_vma(tlb, vma);
- /*
- * If sharing possible, alert mmu notifiers of worst case.
- */
- mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, start,
- end);
- adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
- mmu_notifier_invalidate_range_start(&range);
last_addr_mask = hugetlb_mask_last_page(h);
address = start;
for (; address < end; address += sz) {
@@ -5174,7 +5166,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
if (ref_page)
break;
}
- mmu_notifier_invalidate_range_end(&range);
tlb_end_vma(tlb, vma);
/*
@@ -5199,32 +5190,50 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb,
unsigned long end, struct page *ref_page,
zap_flags_t zap_flags)
{
+ bool final = zap_flags & ZAP_FLAG_UNMAP;
+
hugetlb_vma_lock_write(vma);
i_mmap_lock_write(vma->vm_file->f_mapping);
__unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags);
/*
- * Unlock and free the vma lock before releasing i_mmap_rwsem. When
- * the vma_lock is freed, this makes the vma ineligible for pmd
- * sharing. And, i_mmap_rwsem is required to set up pmd sharing.
- * This is important as page tables for this unmapped range will
- * be asynchrously deleted. If the page tables are shared, there
- * will be issues when accessed by someone else.
+ * When called via zap_page_range (MADV_DONTNEED), this is not the
+ * final unmap of the vma, and we do not want to delete the vma_lock.
*/
- __hugetlb_vma_unlock_write_free(vma);
-
- i_mmap_unlock_write(vma->vm_file->f_mapping);
+ if (final) {
+ /*
+ * Unlock and free the vma lock before releasing i_mmap_rwsem.
+ * When the vma_lock is freed, this makes the vma ineligible
+ * for pmd sharing. And, i_mmap_rwsem is required to set up
+ * pmd sharing. This is important as page tables for this
+ * unmapped range will be asynchrously deleted. If the page
+ * tables are shared, there will be issues when accessed by
+ * someone else.
+ */
+ __hugetlb_vma_unlock_write_free(vma);
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+ } else {
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+ hugetlb_vma_unlock_write(vma);
+ }
}
void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end, struct page *ref_page,
zap_flags_t zap_flags)
{
+ struct mmu_notifier_range range;
struct mmu_gather tlb;
+ mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
+ start, end);
+ adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
tlb_gather_mmu(&tlb, vma->vm_mm);
+
__unmap_hugepage_range(&tlb, vma, start, end, ref_page, zap_flags);
+
+ mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb);
}
diff --git a/mm/memory.c b/mm/memory.c
index f88c351aecd4..474c43156ecf 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1720,7 +1720,7 @@ void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
{
struct mmu_notifier_range range;
struct zap_details details = {
- .zap_flags = ZAP_FLAG_DROP_MARKER,
+ .zap_flags = ZAP_FLAG_DROP_MARKER | ZAP_FLAG_UNMAP,
/* Careful - we need to zap private pages too! */
.even_cows = true,
};
@@ -1753,15 +1753,24 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,
MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
lru_add_drain();
- mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
- start, start + size);
tlb_gather_mmu(&tlb, vma->vm_mm);
update_hiwater_rss(vma->vm_mm);
- mmu_notifier_invalidate_range_start(&range);
do {
- unmap_single_vma(&tlb, vma, start, range.end, NULL);
+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma,
+ vma->vm_mm,
+ max(start, vma->vm_start),
+ min(end, vma->vm_end));
+ if (is_vm_hugetlb_page(vma))
+ adjust_range_if_pmd_sharing_possible(vma,
+ &range.start, &range.end);
+ mmu_notifier_invalidate_range_start(&range);
+ /*
+ * unmap 'start-end' not 'range.start-range.end' as range
+ * could have been expanded for pmd sharing.
+ */
+ unmap_single_vma(&tlb, vma, start, end, NULL);
+ mmu_notifier_invalidate_range_end(&range);
} while ((vma = mas_find(&mas, end - 1)) != NULL);
- mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb);
}
--
2.37.3
Patch #1 (merged in 5.12-rc3) is required to address the issue
Anders Roxell reported on the list [1]. Patch #2 (in 5.15-rc1) is
a follow up.
[1] https://lore.kernel.org/lkml/20220826120020.GB520@mutt
Anshuman Khandual (1):
arm64/kexec: Test page size support with new TGRAN range values
James Morse (1):
arm64/mm: Fix __enable_mmu() for new TGRAN range values
arch/arm64/include/asm/cpufeature.h | 9 ++++--
arch/arm64/include/asm/sysreg.h | 36 +++++++++++++++--------
arch/arm64/kernel/head.S | 6 ++--
arch/arm64/kvm/reset.c | 10 ++++---
drivers/firmware/efi/libstub/arm64-stub.c | 2 +-
5 files changed, 41 insertions(+), 22 deletions(-)
--
2.33.0
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: a6dd6f39008bb3ef7c73ef0a2acc2a4209555bd8
Gitweb: https://git.kernel.org/tip/a6dd6f39008bb3ef7c73ef0a2acc2a4209555bd8
Author: Dave Hansen <dave.hansen(a)linux.intel.com>
AuthorDate: Fri, 28 Oct 2022 17:12:19 +03:00
Committer: Dave Hansen <dave.hansen(a)linux.intel.com>
CommitterDate: Tue, 01 Nov 2022 10:07:15 -07:00
x86/tdx: Prepare for using "INFO" call for a second purpose
The TDG.VP.INFO TDCALL provides the guest with various details about
the TDX system that the guest needs to run. Only one field is currently
used: 'gpa_width' which tells the guest which PTE bits mark pages shared
or private.
A second field is now needed: the guest "TD attributes" to tell if
virtualization exceptions are configured in a way that can harm the guest.
Make the naming and calling convention more generic and discrete from the
mask-centric one.
Thanks to Sathya for the inspiration here, but there's no code, comments
or changelogs left from where he started.
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Tested-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
---
arch/x86/coco/tdx/tdx.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 928dcf7..3fee969 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -98,7 +98,7 @@ static inline void tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
panic("TDCALL %lld failed (Buggy TDX module!)\n", fn);
}
-static u64 get_cc_mask(void)
+static void tdx_parse_tdinfo(u64 *cc_mask)
{
struct tdx_module_output out;
unsigned int gpa_width;
@@ -121,7 +121,7 @@ static u64 get_cc_mask(void)
* The highest bit of a guest physical address is the "sharing" bit.
* Set it for shared pages and clear it for private pages.
*/
- return BIT_ULL(gpa_width - 1);
+ *cc_mask = BIT_ULL(gpa_width - 1);
}
/*
@@ -758,7 +758,7 @@ void __init tdx_early_init(void)
setup_force_cpu_cap(X86_FEATURE_TDX_GUEST);
cc_set_vendor(CC_VENDOR_INTEL);
- cc_mask = get_cc_mask();
+ tdx_parse_tdinfo(&cc_mask);
cc_set_mask(cc_mask);
/*
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: 373e715e31bf4e0f129befe87613a278fac228d3
Gitweb: https://git.kernel.org/tip/373e715e31bf4e0f129befe87613a278fac228d3
Author: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
AuthorDate: Fri, 28 Oct 2022 17:12:20 +03:00
Committer: Dave Hansen <dave.hansen(a)linux.intel.com>
CommitterDate: Tue, 01 Nov 2022 16:02:40 -07:00
x86/tdx: Panic on bad configs that #VE on "private" memory access
All normal kernel memory is "TDX private memory". This includes
everything from kernel stacks to kernel text. Handling
exceptions on arbitrary accesses to kernel memory is essentially
impossible because they can happen in horribly nasty places like
kernel entry/exit. But, TDX hardware can theoretically _deliver_
a virtualization exception (#VE) on any access to private memory.
But, it's not as bad as it sounds. TDX can be configured to never
deliver these exceptions on private memory with a "TD attribute"
called ATTR_SEPT_VE_DISABLE. The guest has no way to *set* this
attribute, but it can check it.
Ensure ATTR_SEPT_VE_DISABLE is set in early boot. panic() if it
is unset. There is no sane way for Linux to run with this
attribute clear so a panic() is appropriate.
There's small window during boot before the check where kernel
has an early #VE handler. But the handler is only for port I/O
and will also panic() as soon as it sees any other #VE, such as
a one generated by a private memory access.
[ dhansen: Rewrite changelog and rebase on new tdx_parse_tdinfo().
Add Kirill's tested-by because I made changes since
he wrote this. ]
Fixes: 9a22bf6debbf ("x86/traps: Add #VE support for TDX guest")
Reported-by: ruogui.ygr(a)alibaba-inc.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Tested-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/20221028141220.29217-3-kirill.shutemov%40linux.…
---
arch/x86/coco/tdx/tdx.c | 21 ++++++++++++++++-----
1 file changed, 16 insertions(+), 5 deletions(-)
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 3fee969..b8998cf 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -34,6 +34,8 @@
#define VE_GET_PORT_NUM(e) ((e) >> 16)
#define VE_IS_IO_STRING(e) ((e) & BIT(4))
+#define ATTR_SEPT_VE_DISABLE BIT(28)
+
/*
* Wrapper for standard use of __tdx_hypercall with no output aside from
* return code.
@@ -102,6 +104,7 @@ static void tdx_parse_tdinfo(u64 *cc_mask)
{
struct tdx_module_output out;
unsigned int gpa_width;
+ u64 td_attr;
/*
* TDINFO TDX module call is used to get the TD execution environment
@@ -109,19 +112,27 @@ static void tdx_parse_tdinfo(u64 *cc_mask)
* information, etc. More details about the ABI can be found in TDX
* Guest-Host-Communication Interface (GHCI), section 2.4.2 TDCALL
* [TDG.VP.INFO].
- *
- * The GPA width that comes out of this call is critical. TDX guests
- * can not meaningfully run without it.
*/
tdx_module_call(TDX_GET_INFO, 0, 0, 0, 0, &out);
- gpa_width = out.rcx & GENMASK(5, 0);
-
/*
* The highest bit of a guest physical address is the "sharing" bit.
* Set it for shared pages and clear it for private pages.
+ *
+ * The GPA width that comes out of this call is critical. TDX guests
+ * can not meaningfully run without it.
*/
+ gpa_width = out.rcx & GENMASK(5, 0);
*cc_mask = BIT_ULL(gpa_width - 1);
+
+ /*
+ * The kernel can not handle #VE's when accessing normal kernel
+ * memory. Ensure that no #VE will be delivered for accesses to
+ * TD-private memory. Only VMM-shared memory (MMIO) will #VE.
+ */
+ td_attr = out.rdx;
+ if (!(td_attr & ATTR_SEPT_VE_DISABLE))
+ panic("TD misconfiguration: SEPT_VE_DISABLE attibute must be set.\n");
}
/*
The commit 3c52c6bb831f (tcp/udp: Fix memory leak in
ipv6_renew_options()) fixes a memory leak reported by syzbot. This seems
to be a good candidate for the stable trees. This patch didn't apply cleanly
in 6.0 kernel, since release_sock() calls are changed to
sockopt_release_sock() in the latest kernel versions.
Kuniyuki Iwashima (1):
tcp/udp: Fix memory leak in ipv6_renew_options().
net/ipv6/ipv6_sockglue.c | 7 +++++++
1 file changed, 7 insertions(+)
--
2.38.1.273.g43a17bfeac-goog
From: Yu Kuai <yukuai3(a)huawei.com>
This reverts commit 84f7a9de0602704bbec774a6c7f7c8c4994bee9c.
Because it introduces a problem that rq->__data_len is set to the wrong
value.
before the patch:
1) nr_bytes = rq->__data_len
2) rq->__data_len = sdp->sector_size
3) scsi_init_io()
4) rq->__data_len = nr_bytes
after the patch:
1) rq->__data_len = sdp->sector_size
2) scsi_init_io()
3) rq->__data_len = rq->__data_len -> __data_len is wrong
It will cause that io can only complete one segment each time, and the io
will requeue in scsi_io_completion_action(), which will cause severe
performance degradation.
Scsi write same is removed in commit e383e16e84e9 ("scsi: sd: Remove
WRITE_SAME support") from mainline, hence this patch is only needed for
stable kernels.
Fixes: 84f7a9de0602 ("scsi: sd: Remove a local variable")
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Reviewed-by: Bart Van Assche <bvanassche(a)acm.org>
---
Changes in v2:
- add description that this patch is only needed for stable kernels.
drivers/scsi/sd.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index de6640ad1943..1e887c11e83d 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1072,6 +1072,7 @@ static blk_status_t sd_setup_write_same_cmnd(struct scsi_cmnd *cmd)
struct bio *bio = rq->bio;
u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq));
u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq));
+ unsigned int nr_bytes = blk_rq_bytes(rq);
blk_status_t ret;
if (sdkp->device->no_write_same)
@@ -1108,7 +1109,7 @@ static blk_status_t sd_setup_write_same_cmnd(struct scsi_cmnd *cmd)
*/
rq->__data_len = sdp->sector_size;
ret = scsi_alloc_sgtables(cmd);
- rq->__data_len = blk_rq_bytes(rq);
+ rq->__data_len = nr_bytes;
return ret;
}
--
2.31.1
On systems with SME access to the SMPRI_EL1 priority management register is
controlled by the nSMPRI_EL1 fine grained trap and TPIDR2_EL0 is controlled
by nTPIDR2_EL0. We manage these traps in nVHE mode but do not do so when in
VHE mode, add the required management.
Without this these registers could be used as side channels where implemented.
Fixes: 861262ab8627 ("KVM: arm64: Handle SME host state when running guests")
Signed-off-by: Mark Brown <broonie(a)kernel.org>
Cc: stable(a)vger.kernel.org
---
arch/arm64/kvm/hyp/vhe/switch.c | 26 ++++++++++++++++++++++++--
1 file changed, 24 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 7acb87eaa092..9dac3a1a85f7 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -63,10 +63,20 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
__activate_traps_fpsimd32(vcpu);
}
- if (cpus_have_final_cap(ARM64_SME))
+ if (cpus_have_final_cap(ARM64_SME)) {
write_sysreg(read_sysreg(sctlr_el2) & ~SCTLR_ELx_ENTP2,
sctlr_el2);
+ sysreg_clear_set_s(SYS_HFGRTR_EL2,
+ HFGxTR_EL2_nSMPRI_EL1_MASK |
+ HFGxTR_EL2_nTPIDR2_EL0_MASK,
+ 0);
+ sysreg_clear_set_s(SYS_HFGWTR_EL2,
+ HFGxTR_EL2_nSMPRI_EL1_MASK |
+ HFGxTR_EL2_nTPIDR2_EL0_MASK,
+ 0);
+ }
+
write_sysreg(val, cpacr_el1);
write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el1);
@@ -88,9 +98,21 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
*/
asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
- if (cpus_have_final_cap(ARM64_SME))
+ if (cpus_have_final_cap(ARM64_SME)) {
+ /*
+ * Enable access to SMPRI_EL1 - we don't need to
+ * control nTPIDR2_EL0 in VHE mode.
+ */
+ sysreg_clear_set_s(SYS_HFGRTR_EL2, 0,
+ HFGxTR_EL2_nSMPRI_EL1_MASK |
+ HFGxTR_EL2_nTPIDR2_EL0_MASK);
+ sysreg_clear_set_s(SYS_HFGWTR_EL2, 0,
+ HFGxTR_EL2_nSMPRI_EL1_MASK |
+ HFGxTR_EL2_nTPIDR2_EL0_MASK);
+
write_sysreg(read_sysreg(sctlr_el2) | SCTLR_ELx_ENTP2,
sctlr_el2);
+ }
write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
--
2.30.2
Hello,
Please can you consider my "ALSA: usb-audio: Add quirks for MacroSilicon
MS2100/MS2106 devices" patch, with upstream commit ID
6e2c9105e0b743c92a157389d40f00b81bdd09fe for inclusion in all -stable
kernels. Apart from the device IDs, it is a copy of the similar existing
patch for MS2109 devices, which is already present in -stable kernels.
John Veness
The quilt patch titled
Subject: hugetlb: don't delete vma_lock in hugetlb MADV_DONTNEED processing
has been removed from the -mm tree. Its filename was
hugetlb-dont-delete-vma_lock-in-hugetlb-madv_dontneed-processing.patch
This patch was dropped because an updated version will be merged
------------------------------------------------------
From: Mike Kravetz <mike.kravetz(a)oracle.com>
Subject: hugetlb: don't delete vma_lock in hugetlb MADV_DONTNEED processing
Date: Mon, 31 Oct 2022 15:34:40 -0700
madvise(MADV_DONTNEED) ends up calling zap_page_range() to clear the page
tables associated with the address range. For hugetlb vmas,
zap_page_range will call __unmap_hugepage_range_final. However,
__unmap_hugepage_range_final assumes the passed vma is about to be removed
and deletes the vma_lock to prevent pmd sharing as the vma is on the way
out. In the case of madvise(MADV_DONTNEED) the vma remains, but the
missing vma_lock prevents pmd sharing and could potentially lead to issues
with truncation/fault races.
This issue was originally reported here [1] as a BUG triggered in
page_try_dup_anon_rmap. Prior to the introduction of the hugetlb
vma_lock, __unmap_hugepage_range_final cleared the VM_MAYSHARE flag to
prevent pmd sharing. Subsequent faults on this vma were confused as
VM_MAYSHARE indicates a sharable vma, but was not set so page_mapping was
not set in new pages added to the page table. This resulted in pages that
appeared anonymous in a VM_SHARED vma and triggered the BUG.
Create a new routine clear_hugetlb_page_range() that can be called from
madvise(MADV_DONTNEED) for hugetlb vmas. It has the same setup as
zap_page_range, but does not delete the vma_lock. Also, add a new zap
flag ZAP_FLAG_UNMAP to indicate an unmap call from unmap_vmas(). This is
used to indicate the 'final' unmapping of a vma. The routine
__unmap_hugepage_range to take a notification_needed argument. This is
used to prevent duplicate notifications.
[1] https://lore.kernel.org/lkml/CAO4mrfdLMXsao9RF4fUE8-Wfde8xmjsKrTNMNC9wjUb6J…
Link: https://lkml.kernel.org/r/20221031223440.285187-1-mike.kravetz@oracle.com
Fixes: 90e7e7f5ef3f ("mm: enable MADV_DONTNEED for hugetlb mappings")
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Reported-by: Wei Chen <harperchen1110(a)gmail.com>
Cc: Axel Rasmussen <axelrasmussen(a)google.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Mina Almasry <almasrymina(a)google.com>
Cc: Nadav Amit <nadav.amit(a)gmail.com>
Cc: Naoya Horiguchi <naoya.horiguchi(a)linux.dev>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: Rik van Riel <riel(a)surriel.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/hugetlb.h | 7 +++
include/linux/mm.h | 3 +
mm/hugetlb.c | 80 ++++++++++++++++++++++++++++----------
mm/memory.c | 18 +++++---
4 files changed, 82 insertions(+), 26 deletions(-)
--- a/include/linux/hugetlb.h~hugetlb-dont-delete-vma_lock-in-hugetlb-madv_dontneed-processing
+++ a/include/linux/hugetlb.h
@@ -156,6 +156,8 @@ long follow_hugetlb_page(struct mm_struc
void unmap_hugepage_range(struct vm_area_struct *,
unsigned long, unsigned long, struct page *,
zap_flags_t);
+void clear_hugetlb_page_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end);
void __unmap_hugepage_range_final(struct mmu_gather *tlb,
struct vm_area_struct *vma,
unsigned long start, unsigned long end,
@@ -460,6 +462,11 @@ static inline void __unmap_hugepage_rang
BUG();
}
+static void __maybe_unused clear_hugetlb_page_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+}
+
static inline vm_fault_t hugetlb_fault(struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address,
unsigned int flags)
--- a/include/linux/mm.h~hugetlb-dont-delete-vma_lock-in-hugetlb-madv_dontneed-processing
+++ a/include/linux/mm.h
@@ -3475,4 +3475,7 @@ madvise_set_anon_name(struct mm_struct *
*/
#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0))
+/* Set in unmap_vmas() to indicate an unmap call. Only used by hugetlb */
+#define ZAP_FLAG_UNMAP ((__force zap_flags_t) BIT(1))
+
#endif /* _LINUX_MM_H */
--- a/mm/hugetlb.c~hugetlb-dont-delete-vma_lock-in-hugetlb-madv_dontneed-processing
+++ a/mm/hugetlb.c
@@ -5064,7 +5064,6 @@ static void __unmap_hugepage_range(struc
struct page *page;
struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h);
- struct mmu_notifier_range range;
unsigned long last_addr_mask;
bool force_flush = false;
@@ -5079,13 +5078,6 @@ static void __unmap_hugepage_range(struc
tlb_change_page_size(tlb, sz);
tlb_start_vma(tlb, vma);
- /*
- * If sharing possible, alert mmu notifiers of worst case.
- */
- mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, start,
- end);
- adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
- mmu_notifier_invalidate_range_start(&range);
last_addr_mask = hugetlb_mask_last_page(h);
address = start;
for (; address < end; address += sz) {
@@ -5174,7 +5166,6 @@ static void __unmap_hugepage_range(struc
if (ref_page)
break;
}
- mmu_notifier_invalidate_range_end(&range);
tlb_end_vma(tlb, vma);
/*
@@ -5194,37 +5185,86 @@ static void __unmap_hugepage_range(struc
tlb_flush_mmu_tlbonly(tlb);
}
-void __unmap_hugepage_range_final(struct mmu_gather *tlb,
+static void __unmap_hugepage_range_locking(struct mmu_gather *tlb,
struct vm_area_struct *vma, unsigned long start,
unsigned long end, struct page *ref_page,
zap_flags_t zap_flags)
{
+ bool final = zap_flags & ZAP_FLAG_UNMAP;
+
hugetlb_vma_lock_write(vma);
i_mmap_lock_write(vma->vm_file->f_mapping);
__unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags);
- /*
- * Unlock and free the vma lock before releasing i_mmap_rwsem. When
- * the vma_lock is freed, this makes the vma ineligible for pmd
- * sharing. And, i_mmap_rwsem is required to set up pmd sharing.
- * This is important as page tables for this unmapped range will
- * be asynchrously deleted. If the page tables are shared, there
- * will be issues when accessed by someone else.
- */
- __hugetlb_vma_unlock_write_free(vma);
+ if (final) {
+ /*
+ * Unlock and free the vma lock before releasing i_mmap_rwsem.
+ * When the vma_lock is freed, this makes the vma ineligible
+ * for pmd sharing. And, i_mmap_rwsem is required to set up
+ * pmd sharing. This is important as page tables for this
+ * unmapped range will be asynchrously deleted. If the page
+ * tables are shared, there will be issues when accessed by
+ * someone else.
+ */
+ __hugetlb_vma_unlock_write_free(vma);
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+ } else {
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+ hugetlb_vma_unlock_write(vma);
+ }
+}
+
+void __unmap_hugepage_range_final(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, unsigned long start,
+ unsigned long end, struct page *ref_page,
+ zap_flags_t zap_flags)
+{
+ __unmap_hugepage_range_locking(tlb, vma, start, end, ref_page,
+ zap_flags);
+}
+
+#ifdef CONFIG_ADVISE_SYSCALLS
+/*
+ * Similar setup as in zap_page_range(). madvise(MADV_DONTNEED) can not call
+ * zap_page_range for hugetlb vmas as __unmap_hugepage_range_final will delete
+ * the associated vma_lock.
+ */
+void clear_hugetlb_page_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+{
+ struct mmu_notifier_range range;
+ struct mmu_gather tlb;
+
+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
+ start, end);
+ adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
+ tlb_gather_mmu(&tlb, vma->vm_mm);
+ update_hiwater_rss(vma->vm_mm);
+ mmu_notifier_invalidate_range_start(&range);
- i_mmap_unlock_write(vma->vm_file->f_mapping);
+ __unmap_hugepage_range_locking(&tlb, vma, start, end, NULL, 0);
+
+ mmu_notifier_invalidate_range_end(&range);
+ tlb_finish_mmu(&tlb);
}
+#endif
void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end, struct page *ref_page,
zap_flags_t zap_flags)
{
+ struct mmu_notifier_range range;
struct mmu_gather tlb;
+ mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
+ start, end);
+ adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
tlb_gather_mmu(&tlb, vma->vm_mm);
+
__unmap_hugepage_range(&tlb, vma, start, end, ref_page, zap_flags);
+
+ mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb);
}
--- a/mm/memory.c~hugetlb-dont-delete-vma_lock-in-hugetlb-madv_dontneed-processing
+++ a/mm/memory.c
@@ -1720,7 +1720,7 @@ void unmap_vmas(struct mmu_gather *tlb,
{
struct mmu_notifier_range range;
struct zap_details details = {
- .zap_flags = ZAP_FLAG_DROP_MARKER,
+ .zap_flags = ZAP_FLAG_DROP_MARKER | ZAP_FLAG_UNMAP,
/* Careful - we need to zap private pages too! */
.even_cows = true,
};
@@ -1753,15 +1753,21 @@ void zap_page_range(struct vm_area_struc
MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
lru_add_drain();
- mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
- start, start + size);
tlb_gather_mmu(&tlb, vma->vm_mm);
update_hiwater_rss(vma->vm_mm);
- mmu_notifier_invalidate_range_start(&range);
do {
- unmap_single_vma(&tlb, vma, start, range.end, NULL);
+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma,
+ vma->vm_mm,
+ max(start, vma->vm_start),
+ min(start + size, vma->vm_end));
+ if (is_vm_hugetlb_page(vma))
+ adjust_range_if_pmd_sharing_possible(vma,
+ &range.start,
+ &range.end);
+ mmu_notifier_invalidate_range_start(&range);
+ unmap_single_vma(&tlb, vma, start, start + size, NULL);
+ mmu_notifier_invalidate_range_end(&range);
} while ((vma = mas_find(&mas, end - 1)) != NULL);
- mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb);
}
_
Patches currently in -mm which might be from mike.kravetz(a)oracle.com are
hugetlb-simplify-hugetlb-handling-in-follow_page_mask.patch
hugetlb-simplify-hugetlb-handling-in-follow_page_mask-v4.patch
hugetlb-simplify-hugetlb-handling-in-follow_page_mask-v5.patch