handshake_req_submit() replaces sk->sk_destruct but never restores it when
submission fails before the request is hashed. handshake_sk_destruct() then
returns early and the original destructor never runs, leaking the socket.
Restore sk_destruct on the error path.
Fixes: 3b3009ea8abb ("net/handshake: Create a NETLINK service for handling handshake requests")
Signed-off-by: caoping <caoping(a)cmss.chinamobile.com>
---
net/handshake/request.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/net/handshake/request.c b/net/handshake/request.c
index 274d2c89b6b2..89435ed755cd 100644
--- a/net/handshake/request.c
+++ b/net/handshake/request.c
@@ -276,6 +276,8 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req,
out_unlock:
spin_unlock(&hn->hn_lock);
out_err:
+ /* Restore original destructor so socket teardown still runs on failure */
+ req->hr_sk->sk_destruct = req->hr_odestruct;
trace_handshake_submit_err(net, req, req->hr_sk, ret);
handshake_req_destroy(req);
return ret;
base-commit: 4a26e7032d7d57c998598c08a034872d6f0d3945
--
2.47.3
From: Maciej Wieczor-Retman <maciej.wieczor-retman(a)intel.com>
A KASAN tag mismatch, possibly causing a kernel panic, can be observed
on systems with a tag-based KASAN enabled and with multiple NUMA nodes.
It was reported on arm64 and reproduced on x86. It can be explained in
the following points:
1. There can be more than one virtual memory chunk.
2. Chunk's base address has a tag.
3. The base address points at the first chunk and thus inherits
the tag of the first chunk.
4. The subsequent chunks will be accessed with the tag from the
first chunk.
5. Thus, the subsequent chunks need to have their tag set to
match that of the first chunk.
Use the modified __kasan_unpoison_vmalloc() to pass the tag of the first
vm_struct's address when vm_structs are unpoisoned in
pcpu_get_vm_areas(). Assigning a common tag resolves the pcpu chunk
address mismatch.
Fixes: 1d96320f8d53 ("kasan, vmalloc: add vmalloc tagging for SW_TAGS")
Cc: <stable(a)vger.kernel.org> # 6.1+
Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman(a)intel.com>
---
Changelog v2:
- Revise the whole patch to match the fixed refactorization from the
first patch.
Changelog v1:
- Rewrite the patch message to point at the user impact of the issue.
- Move helper to common.c so it can be compiled in all KASAN modes.
mm/kasan/common.c | 3 ++-
mm/kasan/hw_tags.c | 12 ++++++++----
mm/kasan/shadow.c | 15 +++++++++++----
3 files changed, 21 insertions(+), 9 deletions(-)
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 7884ea7d13f9..e5a867a5670b 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -591,11 +591,12 @@ void kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms,
unsigned long size;
void *addr;
int area;
+ u8 tag = get_tag(vms[0]->addr);
for (area = 0 ; area < nr_vms ; area++) {
size = vms[area]->size;
addr = vms[area]->addr;
- vms[area]->addr = __kasan_unpoison_vmap_areas(addr, size, flags);
+ vms[area]->addr = __kasan_unpoison_vmap_areas(addr, size, flags, tag);
}
}
#endif
diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c
index 4b7936a2bd6f..2a02b898b9d8 100644
--- a/mm/kasan/hw_tags.c
+++ b/mm/kasan/hw_tags.c
@@ -317,7 +317,7 @@ static void init_vmalloc_pages(const void *start, unsigned long size)
}
static void *__kasan_unpoison_vmalloc(const void *start, unsigned long size,
- kasan_vmalloc_flags_t flags)
+ kasan_vmalloc_flags_t flags, int unpoison_tag)
{
u8 tag;
unsigned long redzone_start, redzone_size;
@@ -361,7 +361,11 @@ static void *__kasan_unpoison_vmalloc(const void *start, unsigned long size,
return (void *)start;
}
- tag = kasan_random_tag();
+ if (unpoison_tag < 0)
+ tag = kasan_random_tag();
+ else
+ tag = unpoison_tag;
+
start = set_tag(start, tag);
/* Unpoison and initialize memory up to size. */
@@ -390,7 +394,7 @@ static void *__kasan_unpoison_vmalloc(const void *start, unsigned long size,
void *__kasan_random_unpoison_vmalloc(const void *start, unsigned long size,
kasan_vmalloc_flags_t flags)
{
- return __kasan_unpoison_vmalloc(start, size, flags);
+ return __kasan_unpoison_vmalloc(start, size, flags, -1);
}
void __kasan_poison_vmalloc(const void *start, unsigned long size)
@@ -405,7 +409,7 @@ void __kasan_poison_vmalloc(const void *start, unsigned long size)
void *__kasan_unpoison_vmap_areas(void *addr, unsigned long size,
kasan_vmalloc_flags_t flags, u8 tag)
{
- return __kasan_unpoison_vmalloc(addr, size, flags);
+ return __kasan_unpoison_vmalloc(addr, size, flags, tag);
}
#endif
diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c
index 0a8d8bf6e9cf..7a66ffc1d5b3 100644
--- a/mm/kasan/shadow.c
+++ b/mm/kasan/shadow.c
@@ -625,8 +625,10 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end,
}
static void *__kasan_unpoison_vmalloc(const void *start, unsigned long size,
- kasan_vmalloc_flags_t flags)
+ kasan_vmalloc_flags_t flags, int unpoison_tag)
{
+ u8 tag;
+
/*
* Software KASAN modes unpoison both VM_ALLOC and non-VM_ALLOC
* mappings, so the KASAN_VMALLOC_VM_ALLOC flag is ignored.
@@ -648,7 +650,12 @@ static void *__kasan_unpoison_vmalloc(const void *start, unsigned long size,
!(flags & KASAN_VMALLOC_PROT_NORMAL))
return (void *)start;
- start = set_tag(start, kasan_random_tag());
+ if (unpoison_tag < 0)
+ tag = kasan_random_tag();
+ else
+ tag = unpoison_tag;
+
+ start = set_tag(start, tag);
kasan_unpoison(start, size, false);
return (void *)start;
}
@@ -656,13 +663,13 @@ static void *__kasan_unpoison_vmalloc(const void *start, unsigned long size,
void *__kasan_random_unpoison_vmalloc(const void *start, unsigned long size,
kasan_vmalloc_flags_t flags)
{
- return __kasan_unpoison_vmalloc(start, size, flags);
+ return __kasan_unpoison_vmalloc(start, size, flags, -1);
}
void *__kasan_unpoison_vmap_areas(void *addr, unsigned long size,
kasan_vmalloc_flags_t flags, u8 tag)
{
- return __kasan_unpoison_vmalloc(addr, size, flags);
+ return __kasan_unpoison_vmalloc(addr, size, flags, tag);
}
/*
--
2.52.0
From: Baokun Li <libaokun1(a)huawei.com>
commit 72a6e22c604c95ddb3b10b5d3bb85b6ff4dbc34f upstream.
The fscache_cookie_lru_timer is initialized when the fscache module
is inserted, but is not deleted when the fscache module is removed.
If timer_reduce() is called before removing the fscache module,
the fscache_cookie_lru_timer will be added to the timer list of
the current cpu. Afterwards, a use-after-free will be triggered
in the softIRQ after removing the fscache module, as follows:
==================================================================
BUG: unable to handle page fault for address: fffffbfff803c9e9
PF: supervisor read access in kernel mode
PF: error_code(0x0000) - not-present page
PGD 21ffea067 P4D 21ffea067 PUD 21ffe6067 PMD 110a7c067 PTE 0
Oops: Oops: 0000 [#1] PREEMPT SMP KASAN PTI
CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Tainted: G W 6.11.0-rc3 #855
Tainted: [W]=WARN
RIP: 0010:__run_timer_base.part.0+0x254/0x8a0
Call Trace:
<IRQ>
tmigr_handle_remote_up+0x627/0x810
__walk_groups.isra.0+0x47/0x140
tmigr_handle_remote+0x1fa/0x2f0
handle_softirqs+0x180/0x590
irq_exit_rcu+0x84/0xb0
sysvec_apic_timer_interrupt+0x6e/0x90
</IRQ>
<TASK>
asm_sysvec_apic_timer_interrupt+0x1a/0x20
RIP: 0010:default_idle+0xf/0x20
default_idle_call+0x38/0x60
do_idle+0x2b5/0x300
cpu_startup_entry+0x54/0x60
start_secondary+0x20d/0x280
common_startup_64+0x13e/0x148
</TASK>
Modules linked in: [last unloaded: netfs]
==================================================================
Therefore delete fscache_cookie_lru_timer when removing the fscahe module.
Fixes: 12bb21a29c19 ("fscache: Implement cookie user counting and resource pinning")
Cc: stable(a)kernel.org
Signed-off-by: Baokun Li <libaokun1(a)huawei.com>
Link: https://lore.kernel.org/r/20240826112056.2458299-1-libaokun@huaweicloud.com
Acked-by: David Howells <dhowells(a)redhat.com>
Signed-off-by: Christian Brauner <brauner(a)kernel.org>
[ Changed the file path due to missing commit:47757ea83a54 ("netfs,
fscache: Move fs/fscache/* into fs/netfs/") ]
Signed-off-by: Chen Yu <xnguchen(a)sina.cn>
---
fs/fscache/main.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index dad85fd84f6f..7a60cd96e87e 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -114,6 +114,7 @@ static void __exit fscache_exit(void)
kmem_cache_destroy(fscache_cookie_jar);
fscache_proc_cleanup();
+ timer_shutdown_sync(&fscache_cookie_lru_timer);
destroy_workqueue(fscache_wq);
pr_notice("Unloaded\n");
}
--
2.17.1
From: Joanne Koong <joannelkoong(a)gmail.com>
[ Upstream commit 7aa6bc3e8766990824f66ca76c19596ce10daf3e ]
iomap_adjust_read_range() assumes that the position and length passed in
are block-aligned. This is not always the case however, as shown in the
syzbot generated case for erofs. This causes too many bytes to be
skipped for uptodate blocks, which results in returning the incorrect
position and length to read in. If all the blocks are uptodate, this
underflows length and returns a position beyond the folio.
Fix the calculation to also take into account the block offset when
calculating how many bytes can be skipped for uptodate blocks.
Signed-off-by: Joanne Koong <joannelkoong(a)gmail.com>
Tested-by: syzbot(a)syzkaller.appspotmail.com
Reviewed-by: Brian Foster <bfoster(a)redhat.com>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Signed-off-by: Christian Brauner <brauner(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
LLM Generated explanations, may be completely bogus:
Now I have all the information needed for a comprehensive analysis. Let
me compile my findings.
---
## COMPREHENSIVE ANALYSIS: iomap_adjust_read_range() Bug Fix
### 1. COMMIT MESSAGE ANALYSIS
**Subject:** `iomap: adjust read range correctly for non-block-aligned
positions`
**Key Points from Message:**
- The function `iomap_adjust_read_range()` **assumes block-aligned
positions**, but this assumption is violated
- Bug was discovered via **syzbot testing with erofs** filesystem
- Causes "too many bytes to be skipped for uptodate blocks"
- Results in **incorrect position and length to read**
- If all blocks are uptodate: **underflows length** and **returns
position beyond the folio**
**Tags Analysis:**
- ✅ `Tested-by: syzbot(a)syzkaller.appspotmail.com` - Automated testing
confirmed
- ✅ `Reviewed-by: Brian Foster <bfoster(a)redhat.com>` - Red Hat
filesystem expert
- ✅ `Reviewed-by: Christoph Hellwig <hch(a)lst.de>` - Major iomap author
- ✅ `Signed-off-by: Christian Brauner <brauner(a)kernel.org>` - VFS
maintainer
- ❌ **No `Cc: stable(a)vger.kernel.org`** tag
- ❌ **No `Fixes:` tag** pointing to original buggy commit
### 2. CODE CHANGE ANALYSIS
**Bug Location:** `fs/iomap/buffered-io.c`, function
`iomap_adjust_read_range()`
**The Buggy Code (lines 246-253 before fix):**
```c
for (i = first; i <= last; i++) {
if (!ifs_block_is_uptodate(ifs, i))
break;
*pos += block_size; // Bug: assumes pos is block-aligned
poff += block_size;
plen -= block_size;
first++;
}
```
**Technical Root Cause:**
When `*pos` has a sub-block offset (e.g., `pos = 512` in a 1024-byte
block):
- `first = poff >> block_bits` computes the block index correctly (block
0)
- But the loop skips a full `block_size` per block, ignoring the offset
**Example demonstrating the bug:**
- Block size: 1024 bytes
- Position: 512 (half-way into block 0)
- Block 0 is uptodate
**Old buggy calculation:**
- Skip full 1024 bytes → `pos = 1536`, overshoots by 512 bytes
- `plen -= 1024` → underflows if `plen < 1024`
**Fixed calculation:**
```c
blocks_skipped = i - first;
if (blocks_skipped) {
unsigned long block_offset = *pos & (block_size - 1); // = 512
unsigned bytes_skipped =
(blocks_skipped << block_bits) - block_offset; // = 1024 -
512 = 512
*pos += bytes_skipped; // Correct: pos = 1024
poff += bytes_skipped;
plen -= bytes_skipped;
}
```
**Consequences of the Bug:**
1. **Integer underflow**: `plen` becomes huge (wraps around as unsigned)
2. **Position beyond folio**: `*pos` can point past the folio boundary
3. **Incorrect read ranges**: Wrong data read, potential corruption
4. **Potential crashes**: Invalid memory access from bad ranges
### 3. CLASSIFICATION
- **Type:** BUG FIX (arithmetic/logic error causing incorrect
calculations)
- **NOT:** Feature addition, cleanup, or optimization
- **Severity:** HIGH - affects filesystem data integrity
- **Scope:** Core iomap buffered I/O infrastructure
### 4. SCOPE AND RISK ASSESSMENT
**Change Size:**
- 13 insertions, 6 deletions
- Single file: `fs/iomap/buffered-io.c`
- Localized to one function
**Risk Level:** LOW
- Fix is mathematically straightforward
- Does not change control flow significantly
- Well-reviewed by iomap experts
- Tested by syzbot
**Affected Subsystem:** `fs/iomap/` - mature, widely-used infrastructure
**Affected Filesystems:** Multiple major filesystems use iomap:
- **XFS** - Primary enterprise Linux filesystem
- **GFS2** - Cluster filesystem
- **erofs** - Read-only filesystem (Android)
- **zonefs** - Zoned storage filesystem
- **fuse** - User-space filesystems
### 5. USER IMPACT
**Who is affected?**
- ALL users of XFS, GFS2, erofs, zonefs, and fuse filesystems
- Particularly affects systems with:
- Block sizes smaller than page size
- Partial folio reads/writes at non-block-aligned offsets
**How severe?**
- Can cause **incorrect data reads**
- Potential **data corruption** in read paths
- Possible **kernel crashes** from invalid memory access
- **Data integrity** issues for critical filesystems
**Triggered by:** syzbot (automated fuzzing) - confirms this is a real
triggerable bug
### 6. STABILITY INDICATORS
- ✅ `Tested-by: syzbot` - Confirmed fix works
- ✅ Two expert reviewers (Brian Foster, Christoph Hellwig)
- ✅ Merged by VFS maintainer (Christian Brauner)
- ⚠️ Relatively new commit (September 2025)
### 7. DEPENDENCY CHECK
**Bug Origin:** Commit `9dc55f1389f95` ("iomap: add support for sub-
pagesize buffered I/O without buffer heads") from **v4.19-rc1** (2018)
**Dependencies for backport:**
- The fix is self-contained
- May need minor adaptation for older kernels:
- v5.x: Uses `struct iomap_page` and pages (not folios)
- v6.x: Uses `struct iomap_folio_state` and folios
- Core logic remains the same across versions
**Related commits:**
- `8e3c15ee0d292` - Minor loop syntax change (`for` → `while`), NOT a
dependency
- `9d875e0eef8ec` - Related fix for unaligned END offsets (separate
issue)
### 8. HISTORICAL CONTEXT
The bug has existed for **~6 years** (since v4.19). It was latent
because:
- Most callers pass block-aligned positions
- erofs triggered an edge case with non-aligned positions
- syzbot's extensive testing exposed it
The original code assumed callers would always pass block-aligned
positions, which is a reasonable assumption violated in specific edge
cases.
---
## BACKPORT RECOMMENDATION
### Arguments FOR backporting:
1. **Fixes a real bug** discovered by automated testing (syzbot)
2. **Affects critical filesystems** (XFS, GFS2) used in production
3. **Small, surgical fix** - 13 lines added, 6 removed
4. **Low regression risk** - mathematically correct fix
5. **Expert-reviewed** by iomap maintainers
6. **Bug existed since v4.19** - all stable trees are affected
7. **Can cause data integrity issues** - serious for a filesystem bug
### Arguments AGAINST backporting:
1. **No `Cc: stable` tag** - maintainers didn't explicitly request
backport
2. **No `Fixes:` tag** - doesn't identify the buggy commit
3. **Relatively new commit** - hasn't had extensive mainline testing yet
4. **Needs adaptation** for older kernels (page vs folio APIs)
### Conclusion:
Despite the missing stable tags, this commit should be backported
because:
- It fixes a **real, triggerable bug** found by syzbot
- The bug can cause **incorrect data reads and potential corruption**
- It affects **major filesystems** (XFS, GFS2) used in production
environments
- The fix is **small, contained, and well-reviewed**
- The **risk of NOT fixing** (data corruption) outweighs the risk of the
fix (minimal)
The absence of `Cc: stable` may be an oversight, as this clearly meets
stable criteria.
**YES**
fs/iomap/buffered-io.c | 19 +++++++++++++------
1 file changed, 13 insertions(+), 6 deletions(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 8b847a1e27f13..1c95a0a7b302d 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -240,17 +240,24 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio,
* to avoid reading in already uptodate ranges.
*/
if (ifs) {
- unsigned int i;
+ unsigned int i, blocks_skipped;
/* move forward for each leading block marked uptodate */
- for (i = first; i <= last; i++) {
+ for (i = first; i <= last; i++)
if (!ifs_block_is_uptodate(ifs, i))
break;
- *pos += block_size;
- poff += block_size;
- plen -= block_size;
- first++;
+
+ blocks_skipped = i - first;
+ if (blocks_skipped) {
+ unsigned long block_offset = *pos & (block_size - 1);
+ unsigned bytes_skipped =
+ (blocks_skipped << block_bits) - block_offset;
+
+ *pos += bytes_skipped;
+ poff += bytes_skipped;
+ plen -= bytes_skipped;
}
+ first = i;
/* truncate len if we find any trailing uptodate block(s) */
while (++i <= last) {
--
2.51.0
If a DIO read or an unbuffered read request extends beyond the EOF, the
server will return a short read and a status code indicating that EOF was
hit, which gets translated to -ENODATA. Note that the client does not cap
the request at i_size, but asks for the amount requested in case there's a
race on the server with a third party.
Now, on the client side, the request will get split into multiple
subrequests if rsize is smaller than the full request size. A subrequest
that starts before or at the EOF and returns short data up to the EOF will
be correctly handled, with the NETFS_SREQ_HIT_EOF flag being set,
indicating to netfslib that we can't read more.
If a subrequest, however, starts after the EOF and not at it, HIT_EOF will
not be flagged, its error will be set to -ENODATA and it will be abandoned.
This will cause the request as a whole to fail with -ENODATA.
Fix this by setting NETFS_SREQ_HIT_EOF on any subrequest that lies beyond
the EOF marker.
This can be reproduced by mounting with "cache=none,sign,vers=1.0" and
doing a read of a file that's significantly bigger than the size of the
file (e.g. attempting to read 64KiB from a 16KiB file).
Fixes: a68c74865f51 ("cifs: Fix SMB1 readv/writev callback in the same way as SMB2/3")
Signed-off-by: David Howells <dhowells(a)redhat.com>
cc: Steve French <sfrench(a)samba.org>
cc: Paulo Alcantara <pc(a)manguebit.org>
cc: Shyam Prasad N <sprasad(a)microsoft.com>
cc: linux-cifs(a)vger.kernel.org
cc: netfs(a)lists.linux.dev
cc: linux-fsdevel(a)vger.kernel.org
---
fs/smb/client/cifssmb.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index 645831708e1b..1871d2c1a8e0 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -1395,7 +1395,7 @@ cifs_readv_callback(struct mid_q_entry *mid)
} else {
size_t trans = rdata->subreq.transferred + rdata->got_bytes;
if (trans < rdata->subreq.len &&
- rdata->subreq.start + trans == ictx->remote_i_size) {
+ rdata->subreq.start + trans >= ictx->remote_i_size) {
rdata->result = 0;
__set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags);
} else if (rdata->got_bytes > 0) {
Hi there,
While running performance benchmarks for the 5.15.196 LTS tags , it was
observed that several regressions across different benchmarks is being
introduced when compared to the previous 5.15.193 kernel tag. Running an
automated bisect on both of them narrowed down the culprit commit to:
- 5666bcc3c00f7 Revert "cpuidle: menu: Avoid discarding useful
information" for 5.15
Regressions on 5.15.196 include:
-9.3% : Phoronix pts/sqlite using 2 processes on OnPrem X6-2
-6.3% : Phoronix system/sqlite on OnPrem X6-2
-18% : rds-stress -M 1 (readonly rdma-mode) metrics with 1 depth & 1
thread & 1M buffer size on OnPrem X6-2
-4 -> -8% : rds-stress -M 2 (writeonly rdma-mode) metrics with 1 depth &
1 thread & 1M buffer size on OnPrem X6-2
Up to -30% : Some Netpipe metrics on OnPrem X5-2
The culprit commits' messages mention that these reverts were done due
to performance regressions introduced in Intel Jasper Lake systems but
this revert is causing issues in other systems unfortunately. I wanted
to know the maintainers' opinion on how we should proceed in order to
fix this. If we reapply it'll bring back the previous regressions on
Jasper Lake systems and if we don't revert it then it's stuck with
current regressions. If this problem has been reported before and a fix
is in the works then please let me know I shall follow developments to
that mail thread.
Thanks & Regards,
Harshvardhan