February 2023 - Linux-stable-mirror

[PATCH v6] ceph: blocklist the kclient when receiving corrupted snap trace

by xiubli＠redhat.com

From: Xiubo Li <xiubli(a)redhat.com> When received corrupted snap trace we don't know what exactly has happened in MDS side. And we shouldn't continue IOs and metadatas access to MDS, which may corrupt or get incorrect contents. This patch will just block all the further IO/MDS requests immediately and then evict the kclient itself. The reason why we still need to evict the kclient just after blocking all the further IOs is that the MDS could revoke the caps faster. Cc: stable(a)vger.kernel.org URL: https://tracker.ceph.com/issues/57686 Reviewed-by: Venky Shankar <vshankar(a)redhat.com> Signed-off-by: Xiubo Li <xiubli(a)redhat.com> --- V6: - switch to ceph_inode_is_shutdown() to check the mount state - fix two debug logs - use the WRITE_ONCE to set the FENCE_IO state V5: - s/CEPH_MOUNT_CORRUPTED/CEPH_MOUNT_FENCE_IO/g V4: - block all the IO/metadata requests before evicting the client. V3: - Fixed ERROR: spaces required around that ':' (ctx:VxW) V2: - Switched to WARN() to taint the Linux kernel. fs/ceph/addr.c | 17 +++++++++++++++-- fs/ceph/caps.c | 17 ++++++++++++++--- fs/ceph/file.c | 9 +++++++++ fs/ceph/mds_client.c | 28 +++++++++++++++++++++++++--- fs/ceph/snap.c | 38 ++++++++++++++++++++++++++++++++++++-- fs/ceph/super.h | 1 + 6 files changed, 100 insertions(+), 10 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 6fb329a70ac1..13d1c24d2f53 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -305,13 +305,18 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) struct inode *inode = rreq->inode; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); - struct ceph_osd_request *req; + struct ceph_osd_request *req = NULL; struct ceph_vino vino = ceph_vino(inode); struct iov_iter iter; int err = 0; u64 len = subreq->len; bool sparse = ceph_test_mount_opt(fsc, SPARSEREAD); + if (ceph_inode_is_shutdown(inode)) { + err = -EIO; + goto out; + } + if (ceph_has_inline_data(ci) && ceph_netfs_issue_op_inline(subreq)) return; @@ -557,6 +562,9 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) dout("writepage %p idx %lu\n", page, page->index); + if (ceph_inode_is_shutdown(inode)) + return -EIO; + /* verify this is a writeable snap context */ snapc = page_snap_context(page); if (!snapc) { @@ -1637,7 +1645,7 @@ int ceph_uninline_data(struct file *file) struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_osd_request *req = NULL; - struct ceph_cap_flush *prealloc_cf; + struct ceph_cap_flush *prealloc_cf = NULL; struct folio *folio = NULL; u64 inline_version = CEPH_INLINE_NONE; struct page *pages[1]; @@ -1651,6 +1659,11 @@ int ceph_uninline_data(struct file *file) dout("uninline_data %p %llx.%llx inline_version %llu\n", inode, ceph_vinop(inode), inline_version); + if (ceph_inode_is_shutdown(inode)) { + err = -EIO; + goto out; + } + if (inline_version == CEPH_INLINE_NONE) return 0; diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 948136f81fc8..5230ab64fff0 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -4134,6 +4134,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, void *p, *end; struct cap_extra_info extra_info = {}; bool queue_trunc; + bool close_sessions = false; dout("handle_caps from mds%d\n", session->s_mds); @@ -4275,9 +4276,13 @@ void ceph_handle_caps(struct ceph_mds_session *session, realm = NULL; if (snaptrace_len) { down_write(&mdsc->snap_rwsem); - ceph_update_snap_trace(mdsc, snaptrace, - snaptrace + snaptrace_len, - false, &realm); + if (ceph_update_snap_trace(mdsc, snaptrace, + snaptrace + snaptrace_len, + false, &realm)) { + up_write(&mdsc->snap_rwsem); + close_sessions = true; + goto done; + } downgrade_write(&mdsc->snap_rwsem); } else { down_read(&mdsc->snap_rwsem); @@ -4341,6 +4346,11 @@ void ceph_handle_caps(struct ceph_mds_session *session, iput(inode); out: ceph_put_string(extra_info.pool_ns); + + /* Defer closing the sessions after s_mutex lock being released */ + if (close_sessions) + ceph_mdsc_close_sessions(mdsc); + return; flush_cap_releases: @@ -4350,6 +4360,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, * cap). */ ceph_flush_cap_releases(mdsc, session); + goto done; bad: diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 59c89c436185..1ba3c07e242b 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -976,6 +976,9 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to, dout("sync_read on file %p %llu~%u %s\n", file, off, (unsigned)len, (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); + if (ceph_inode_is_shutdown(inode)) + return -EIO; + if (!len) return 0; /* @@ -1342,6 +1345,9 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, bool should_dirty = !write && user_backed_iter(iter); bool sparse = ceph_test_mount_opt(fsc, SPARSEREAD); + if (ceph_inode_is_shutdown(inode)) + return -EIO; + if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP) return -EROFS; @@ -2078,6 +2084,9 @@ static int ceph_zero_partial_object(struct inode *inode, loff_t zero = 0; int op; + if (ceph_inode_is_shutdown(inode)) + return -EIO; + if (!length) { op = offset ? CEPH_OSD_OP_DELETE : CEPH_OSD_OP_TRUNCATE; length = &zero; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index cbbaf334b6b8..b60812707fce 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -957,6 +957,9 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, { struct ceph_mds_session *s; + if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_FENCE_IO) + return ERR_PTR(-EIO); + if (mds >= mdsc->mdsmap->possible_max_rank) return ERR_PTR(-EINVAL); @@ -1632,6 +1635,9 @@ static int __open_session(struct ceph_mds_client *mdsc, int mstate; int mds = session->s_mds; + if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_FENCE_IO) + return -EIO; + /* wait for mds to go active? */ mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds); dout("open_session to mds%d (%s)\n", mds, @@ -3205,6 +3211,11 @@ static void __do_request(struct ceph_mds_client *mdsc, err = -ETIMEDOUT; goto finish; } + if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_FENCE_IO) { + dout("do_request metadata corrupted\n"); + err = -EIO; + goto finish; + } if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { dout("do_request forced umount\n"); err = -EIO; @@ -3584,6 +3595,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) u64 tid; int err, result; int mds = session->s_mds; + bool close_sessions = false; if (msg->front.iov_len < sizeof(*head)) { pr_err("mdsc_handle_reply got corrupt (short) reply\n"); @@ -3698,10 +3710,15 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) realm = NULL; if (rinfo->snapblob_len) { down_write(&mdsc->snap_rwsem); - ceph_update_snap_trace(mdsc, rinfo->snapblob, + err = ceph_update_snap_trace(mdsc, rinfo->snapblob, rinfo->snapblob + rinfo->snapblob_len, le32_to_cpu(head->op) == CEPH_MDS_OP_RMSNAP, &realm); + if (err) { + up_write(&mdsc->snap_rwsem); + close_sessions = true; + goto out_err; + } downgrade_write(&mdsc->snap_rwsem); } else { down_read(&mdsc->snap_rwsem); @@ -3759,6 +3776,10 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) req->r_end_latency, err); out: ceph_mdsc_put_request(req); + + /* Defer closing the sessions after s_mutex lock being released */ + if (close_sessions) + ceph_mdsc_close_sessions(mdsc); return; } @@ -5358,7 +5379,7 @@ static bool done_closing_sessions(struct ceph_mds_client *mdsc, int skipped) } /* - * called after sb is ro. + * called after sb is ro or when metadata corrupted. */ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) { @@ -5648,7 +5669,8 @@ static void mds_peer_reset(struct ceph_connection *con) struct ceph_mds_client *mdsc = s->s_mdsc; pr_warn("mds%d closed our session\n", s->s_mds); - send_mds_reconnect(mdsc, s); + if (READ_ONCE(mdsc->fsc->mount_state) != CEPH_MOUNT_FENCE_IO) + send_mds_reconnect(mdsc, s); } static void mds_dispatch(struct ceph_connection *con, struct ceph_msg *msg) diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index c1c452afa84d..3d417ec8da0c 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> +#include <linux/fs.h> #include <linux/sort.h> #include <linux/slab.h> #include <linux/iversion.h> @@ -767,8 +768,10 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm; struct ceph_snap_realm *first_realm = NULL; struct ceph_snap_realm *realm_to_rebuild = NULL; + struct ceph_client *client = mdsc->fsc->client; int rebuild_snapcs; int err = -ENOMEM; + int ret; LIST_HEAD(dirty_realms); lockdep_assert_held_write(&mdsc->snap_rwsem); @@ -885,6 +888,27 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, if (first_realm) ceph_put_snap_realm(mdsc, first_realm); pr_err("%s error %d\n", __func__, err); + + /* + * When receiving a corrupted snap trace we don't know what + * exactly has happened in MDS side. And we shouldn't continue + * writing to OSD, which may corrupt the snapshot contents. + * + * Just try to blocklist this kclient and then this kclient + * must be remounted to continue after the corrupted metadata + * fixed in the MDS side. + */ + WRITE_ONCE(mdsc->fsc->mount_state, CEPH_MOUNT_FENCE_IO); + ret = ceph_monc_blocklist_add(&client->monc, &client->msgr.inst.addr); + if (ret) + pr_err("%s blocklist of %s failed: %d", __func__, + ceph_pr_addr(&client->msgr.inst.addr), ret); + + WARN(1, "%s: %s%s do remount to continue%s", + __func__, ret ? "" : ceph_pr_addr(&client->msgr.inst.addr), + ret ? "" : " was blocklisted,", + err == -EIO ? " after corrupted snaptrace is fixed" : ""); + return err; } @@ -985,6 +1009,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, __le64 *split_inos = NULL, *split_realms = NULL; int i; int locked_rwsem = 0; + bool close_sessions = false; /* decode */ if (msg->front.iov_len < sizeof(*h)) @@ -1093,8 +1118,12 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, * update using the provided snap trace. if we are deleting a * snap, we can avoid queueing cap_snaps. */ - ceph_update_snap_trace(mdsc, p, e, - op == CEPH_SNAP_OP_DESTROY, NULL); + if (ceph_update_snap_trace(mdsc, p, e, + op == CEPH_SNAP_OP_DESTROY, + NULL)) { + close_sessions = true; + goto bad; + } if (op == CEPH_SNAP_OP_SPLIT) /* we took a reference when we created the realm, above */ @@ -1113,6 +1142,11 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, out: if (locked_rwsem) up_write(&mdsc->snap_rwsem); + + /* Defer closing the sessions after s_mutex lock being released */ + if (close_sessions) + ceph_mdsc_close_sessions(mdsc); + return; } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index ec1edfae20a0..22086f78732f 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -111,6 +111,7 @@ enum { CEPH_MOUNT_UNMOUNTED, CEPH_MOUNT_SHUTDOWN, CEPH_MOUNT_RECOVER, + CEPH_MOUNT_FENCE_IO, }; #define CEPH_ASYNC_CREATE_CONFLICT_BITS 8 -- 2.31.1

2 years, 5 months

4
4
0 0

[merged mm-hotfixes-stable] mm-memcg-fix-null-pointer-in-mem_cgroup_track_foreign_dirty_slowpath.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: mm: memcg: fix NULL pointer in mem_cgroup_track_foreign_dirty_slowpath() has been removed from the -mm tree. Its filename was mm-memcg-fix-null-pointer-in-mem_cgroup_track_foreign_dirty_slowpath.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Kefeng Wang <wangkefeng.wang(a)huawei.com> Subject: mm: memcg: fix NULL pointer in mem_cgroup_track_foreign_dirty_slowpath() Date: Sun, 29 Jan 2023 12:09:45 +0800 As commit 18365225f044 ("hwpoison, memcg: forcibly uncharge LRU pages"), hwpoison will forcibly uncharg a LRU hwpoisoned page, the folio_memcg could be NULl, then, mem_cgroup_track_foreign_dirty_slowpath() could occurs a NULL pointer dereference, let's do not record the foreign writebacks for folio memcg is null in mem_cgroup_track_foreign_dirty() to fix it. Link: https://lkml.kernel.org/r/20230129040945.180629-1-wangkefeng.wang@huawei.com Fixes: 97b27821b485 ("writeback, memcg: Implement foreign dirty flushing") Signed-off-by: Kefeng Wang <wangkefeng.wang(a)huawei.com> Reported-by: Ma Wupeng <mawupeng1(a)huawei.com> Tested-by: Miko Larsson <mikoxyzzz(a)gmail.com> Acked-by: Michal Hocko <mhocko(a)suse.com> Cc: Jan Kara <jack(a)suse.cz> Cc: Jens Axboe <axboe(a)kernel.dk> Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com> Cc: Ma Wupeng <mawupeng1(a)huawei.com> Cc: Naoya Horiguchi <naoya.horiguchi(a)nec.com> Cc: Shakeel Butt <shakeelb(a)google.com> Cc: Tejun Heo <tj(a)kernel.org> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- include/linux/memcontrol.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) --- a/include/linux/memcontrol.h~mm-memcg-fix-null-pointer-in-mem_cgroup_track_foreign_dirty_slowpath +++ a/include/linux/memcontrol.h @@ -1666,10 +1666,13 @@ void mem_cgroup_track_foreign_dirty_slow static inline void mem_cgroup_track_foreign_dirty(struct folio *folio, struct bdi_writeback *wb) { + struct mem_cgroup *memcg; + if (mem_cgroup_disabled()) return; - if (unlikely(&folio_memcg(folio)->css != wb->memcg_css)) + memcg = folio_memcg(folio); + if (unlikely(memcg && &memcg->css != wb->memcg_css)) mem_cgroup_track_foreign_dirty_slowpath(folio, wb); } _ Patches currently in -mm which might be from wangkefeng.wang(a)huawei.com are mm-hwposion-support-recovery-from-ksm_might_need_to_copy.patch mm-hwposion-support-recovery-from-ksm_might_need_to_copy-v3.patch mm-madvise-use-vm_normal_folio-in-madvise_free_pte_range.patch

2 years, 5 months

1
0
0 0

[merged mm-hotfixes-stable] mm-swapfile-add-cond_resched-in-get_swap_pages.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: mm/swapfile: add cond_resched() in get_swap_pages() has been removed from the -mm tree. Its filename was mm-swapfile-add-cond_resched-in-get_swap_pages.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Longlong Xia <xialonglong1(a)huawei.com> Subject: mm/swapfile: add cond_resched() in get_swap_pages() Date: Sat, 28 Jan 2023 09:47:57 +0000 The softlockup still occurs in get_swap_pages() under memory pressure. 64 CPU cores, 64GB memory, and 28 zram devices, the disksize of each zram device is 50MB with same priority as si. Use the stress-ng tool to increase memory pressure, causing the system to oom frequently. The plist_for_each_entry_safe() loops in get_swap_pages() could reach tens of thousands of times to find available space (extreme case: cond_resched() is not called in scan_swap_map_slots()). Let's add cond_resched() into get_swap_pages() when failed to find available space to avoid softlockup. Link: https://lkml.kernel.org/r/20230128094757.1060525-1-xialonglong1@huawei.com Signed-off-by: Longlong Xia <xialonglong1(a)huawei.com> Reviewed-by: "Huang, Ying" <ying.huang(a)intel.com> Cc: Chen Wandun <chenwandun(a)huawei.com> Cc: Huang Ying <ying.huang(a)intel.com> Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com> Cc: Nanyong Sun <sunnanyong(a)huawei.com> Cc: Hugh Dickins <hughd(a)google.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/swapfile.c | 1 + 1 file changed, 1 insertion(+) --- a/mm/swapfile.c~mm-swapfile-add-cond_resched-in-get_swap_pages +++ a/mm/swapfile.c @@ -1100,6 +1100,7 @@ start_over: goto check_out; pr_debug("scan_swap_map of si %d failed to find offset\n", si->type); + cond_resched(); spin_lock(&swap_avail_lock); nextsi: _ Patches currently in -mm which might be from xialonglong1(a)huawei.com are mm-swapfile-remove-pr_debug-in-get_swap_pages.patch

2 years, 5 months

1
0
0 0

[merged mm-hotfixes-stable] squashfs-fix-handling-and-sanity-checking-of-xattr_ids-count.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: Squashfs: fix handling and sanity checking of xattr_ids count has been removed from the -mm tree. Its filename was squashfs-fix-handling-and-sanity-checking-of-xattr_ids-count.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Phillip Lougher <phillip(a)squashfs.org.uk> Subject: Squashfs: fix handling and sanity checking of xattr_ids count Date: Fri, 27 Jan 2023 06:18:42 +0000 A Sysbot [1] corrupted filesystem exposes two flaws in the handling and sanity checking of the xattr_ids count in the filesystem. Both of these flaws cause computation overflow due to incorrect typing. In the corrupted filesystem the xattr_ids value is 4294967071, which stored in a signed variable becomes the negative number -225. Flaw 1 (64-bit systems only): The signed integer xattr_ids variable causes sign extension. This causes variable overflow in the SQUASHFS_XATTR_*(A) macros. The variable is first multiplied by sizeof(struct squashfs_xattr_id) where the type of the sizeof operator is "unsigned long". On a 64-bit system this is 64-bits in size, and causes the negative number to be sign extended and widened to 64-bits and then become unsigned. This produces the very large number 18446744073709548016 or 2^64 - 3600. This number when rounded up by SQUASHFS_METADATA_SIZE - 1 (8191 bytes) and divided by SQUASHFS_METADATA_SIZE overflows and produces a length of 0 (stored in len). Flaw 2 (32-bit systems only): On a 32-bit system the integer variable is not widened by the unsigned long type of the sizeof operator (32-bits), and the signedness of the variable has no effect due it always being treated as unsigned. The above corrupted xattr_ids value of 4294967071, when multiplied overflows and produces the number 4294963696 or 2^32 - 3400. This number when rounded up by SQUASHFS_METADATA_SIZE - 1 (8191 bytes) and divided by SQUASHFS_METADATA_SIZE overflows again and produces a length of 0. The effect of the 0 length computation: In conjunction with the corrupted xattr_ids field, the filesystem also has a corrupted xattr_table_start value, where it matches the end of filesystem value of 850. This causes the following sanity check code to fail because the incorrectly computed len of 0 matches the incorrect size of the table reported by the superblock (0 bytes). len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids); indexes = SQUASHFS_XATTR_BLOCKS(*xattr_ids); /* * The computed size of the index table (len bytes) should exactly * match the table start and end points */ start = table_start + sizeof(*id_table); end = msblk->bytes_used; if (len != (end - start)) return ERR_PTR(-EINVAL); Changing the xattr_ids variable to be "usigned int" fixes the flaw on a 64-bit system. This relies on the fact the computation is widened by the unsigned long type of the sizeof operator. Casting the variable to u64 in the above macro fixes this flaw on a 32-bit system. It also means 64-bit systems do not implicitly rely on the type of the sizeof operator to widen the computation. [1] https://lore.kernel.org/lkml/000000000000cd44f005f1a0f17f@google.com/ Link: https://lkml.kernel.org/r/20230127061842.10965-1-phillip@squashfs.org.uk Fixes: 506220d2ba21 ("squashfs: add more sanity checks in xattr id lookup") Signed-off-by: Phillip Lougher <phillip(a)squashfs.org.uk> Reported-by: <syzbot+082fa4af80a5bb1a9843(a)syzkaller.appspotmail.com> Cc: Alexey Khoroshilov <khoroshilov(a)ispras.ru> Cc: Fedor Pchelkin <pchelkin(a)ispras.ru> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- fs/squashfs/squashfs_fs.h | 2 +- fs/squashfs/squashfs_fs_sb.h | 2 +- fs/squashfs/xattr.h | 4 ++-- fs/squashfs/xattr_id.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) --- a/fs/squashfs/squashfs_fs.h~squashfs-fix-handling-and-sanity-checking-of-xattr_ids-count +++ a/fs/squashfs/squashfs_fs.h @@ -183,7 +183,7 @@ static inline int squashfs_block_size(__ #define SQUASHFS_ID_BLOCK_BYTES(A) (SQUASHFS_ID_BLOCKS(A) *\ sizeof(u64)) /* xattr id lookup table defines */ -#define SQUASHFS_XATTR_BYTES(A) ((A) * sizeof(struct squashfs_xattr_id)) +#define SQUASHFS_XATTR_BYTES(A) (((u64) (A)) * sizeof(struct squashfs_xattr_id)) #define SQUASHFS_XATTR_BLOCK(A) (SQUASHFS_XATTR_BYTES(A) / \ SQUASHFS_METADATA_SIZE) --- a/fs/squashfs/squashfs_fs_sb.h~squashfs-fix-handling-and-sanity-checking-of-xattr_ids-count +++ a/fs/squashfs/squashfs_fs_sb.h @@ -63,7 +63,7 @@ struct squashfs_sb_info { long long bytes_used; unsigned int inodes; unsigned int fragments; - int xattr_ids; + unsigned int xattr_ids; unsigned int ids; bool panic_on_errors; const struct squashfs_decompressor_thread_ops *thread_ops; --- a/fs/squashfs/xattr.h~squashfs-fix-handling-and-sanity-checking-of-xattr_ids-count +++ a/fs/squashfs/xattr.h @@ -10,12 +10,12 @@ #ifdef CONFIG_SQUASHFS_XATTR extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64, - u64 *, int *); + u64 *, unsigned int *); extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *, unsigned int *, unsigned long long *); #else static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb, - u64 start, u64 *xattr_table_start, int *xattr_ids) + u64 start, u64 *xattr_table_start, unsigned int *xattr_ids) { struct squashfs_xattr_id_table *id_table; --- a/fs/squashfs/xattr_id.c~squashfs-fix-handling-and-sanity-checking-of-xattr_ids-count +++ a/fs/squashfs/xattr_id.c @@ -56,7 +56,7 @@ int squashfs_xattr_lookup(struct super_b * Read uncompressed xattr id lookup table indexes from disk into memory */ __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start, - u64 *xattr_table_start, int *xattr_ids) + u64 *xattr_table_start, unsigned int *xattr_ids) { struct squashfs_sb_info *msblk = sb->s_fs_info; unsigned int len, indexes; _ Patches currently in -mm which might be from phillip(a)squashfs.org.uk are

2 years, 5 months

1
0
0 0

[merged mm-hotfixes-stable] highmem-round-down-the-address-passed-to-kunmap_flush_on_unmap.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: highmem: round down the address passed to kunmap_flush_on_unmap() has been removed from the -mm tree. Its filename was highmem-round-down-the-address-passed-to-kunmap_flush_on_unmap.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: "Matthew Wilcox (Oracle)" <willy(a)infradead.org> Subject: highmem: round down the address passed to kunmap_flush_on_unmap() Date: Thu, 26 Jan 2023 20:07:27 +0000 We already round down the address in kunmap_local_indexed() which is the other implementation of __kunmap_local(). The only implementation of kunmap_flush_on_unmap() is PA-RISC which is expecting a page-aligned address. This may be causing PA-RISC to be flushing the wrong addresses currently. Link: https://lkml.kernel.org/r/20230126200727.1680362-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) <willy(a)infradead.org> Fixes: 298fa1ad5571 ("highmem: Provide generic variant of kmap_atomic*") Reviewed-by: Ira Weiny <ira.weiny(a)intel.com> Cc: "Fabio M. De Francesco" <fmdefrancesco(a)gmail.com> Cc: Al Viro <viro(a)zeniv.linux.org.uk> Cc: Thomas Gleixner <tglx(a)linutronix.de> Cc: Helge Deller <deller(a)gmx.de> Cc: Alexander Potapenko <glider(a)google.com> Cc: Andrey Konovalov <andreyknvl(a)gmail.com> Cc: Bagas Sanjaya <bagasdotme(a)gmail.com> Cc: David Sterba <dsterba(a)suse.com> Cc: Kees Cook <keescook(a)chromium.org> Cc: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de> Cc: Tony Luck <tony.luck(a)intel.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- include/linux/highmem-internal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) --- a/include/linux/highmem-internal.h~highmem-round-down-the-address-passed-to-kunmap_flush_on_unmap +++ a/include/linux/highmem-internal.h @@ -200,7 +200,7 @@ static inline void *kmap_local_pfn(unsig static inline void __kunmap_local(const void *addr) { #ifdef ARCH_HAS_FLUSH_ON_KUNMAP - kunmap_flush_on_unmap(addr); + kunmap_flush_on_unmap(PTR_ALIGN_DOWN(addr, PAGE_SIZE)); #endif } @@ -227,7 +227,7 @@ static inline void *kmap_atomic_pfn(unsi static inline void __kunmap_atomic(const void *addr) { #ifdef ARCH_HAS_FLUSH_ON_KUNMAP - kunmap_flush_on_unmap(addr); + kunmap_flush_on_unmap(PTR_ALIGN_DOWN(addr, PAGE_SIZE)); #endif pagefault_enable(); if (IS_ENABLED(CONFIG_PREEMPT_RT)) _ Patches currently in -mm which might be from willy(a)infradead.org are mm-remove-folio_pincount_ptr-and-head_compound_pincount.patch mm-convert-head_subpages_mapcount-into-folio_nr_pages_mapped.patch doc-clarify-refcount-section-by-referring-to-folios-pages.patch mm-convert-total_compound_mapcount-to-folio_total_mapcount.patch mm-convert-page_remove_rmap-to-use-a-folio-internally.patch mm-convert-page_add_anon_rmap-to-use-a-folio-internally.patch mm-convert-page_add_file_rmap-to-use-a-folio-internally.patch mm-add-folio_add_new_anon_rmap.patch mm-add-folio_add_new_anon_rmap-fix-2.patch page_alloc-use-folio-fields-directly.patch mm-use-a-folio-in-hugepage_add_anon_rmap-and-hugepage_add_new_anon_rmap.patch mm-use-entire_mapcount-in-__page_dup_rmap.patch mm-debug-remove-call-to-head_compound_mapcount.patch hugetlb-remove-uses-of-folio_mapcount_ptr.patch mm-convert-page_mapcount-to-use-folio_entire_mapcount.patch mm-remove-head_compound_mapcount-and-_ptr-functions.patch mm-reimplement-compound_order.patch mm-reimplement-compound_nr.patch mm-reimplement-compound_nr-fix.patch mm-convert-set_compound_page_dtor-and-set_compound_order-to-folios.patch mm-convert-is_transparent_hugepage-to-use-a-folio.patch mm-convert-destroy_large_folio-to-use-folio_dtor.patch hugetlb-remove-uses-of-compound_dtor-and-compound_nr.patch mm-remove-first-tail-page-members-from-struct-page.patch doc-correct-struct-folio-kernel-doc.patch mm-move-page-deferred_list-to-folio-_deferred_list.patch mm-huge_memory-remove-page_deferred_list.patch mm-huge_memory-convert-get_deferred_split_queue-to-take-a-folio.patch mm-convert-deferred_split_huge_page-to-deferred_split_folio.patch shmem-convert-shmem_write_end-to-use-a-folio.patch mm-add-vma_alloc_zeroed_movable_folio.patch mm-convert-do_anonymous_page-to-use-a-folio.patch mm-convert-wp_page_copy-to-use-folios.patch mm-use-a-folio-in-copy_pte_range.patch mm-use-a-folio-in-copy_present_pte.patch mm-fs-convert-inode_attach_wb-to-take-a-folio.patch mm-convert-mem_cgroup_css_from_page-to-mem_cgroup_css_from_folio.patch mm-remove-page_evictable.patch mm-remove-mlock_vma_page.patch mm-remove-munlock_vma_page.patch mm-clean-up-mlock_page-munlock_page-references-in-comments.patch rmap-add-folio-parameter-to-__page_set_anon_rmap.patch filemap-convert-filemap_map_pmd-to-take-a-folio.patch filemap-convert-filemap_range_has_page-to-use-a-folio.patch readahead-convert-readahead_expand-to-use-a-folio.patch mm-add-memcpy_from_file_folio.patch fs-convert-writepage_t-callback-to-pass-a-folio.patch mpage-convert-__mpage_writepage-to-use-a-folio-more-fully.patch mpage-convert-__mpage_writepage-to-use-a-folio-more-fully-fix.patch

2 years, 5 months

1
0
0 0

[merged mm-hotfixes-stable] migrate-hugetlb-check-for-hugetlb-shared-pmd-in-node-migration.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: migrate: hugetlb: check for hugetlb shared PMD in node migration has been removed from the -mm tree. Its filename was migrate-hugetlb-check-for-hugetlb-shared-pmd-in-node-migration.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Mike Kravetz <mike.kravetz(a)oracle.com> Subject: migrate: hugetlb: check for hugetlb shared PMD in node migration Date: Thu, 26 Jan 2023 14:27:21 -0800 migrate_pages/mempolicy semantics state that CAP_SYS_NICE is required to move pages shared with another process to a different node. page_mapcount > 1 is being used to determine if a hugetlb page is shared. However, a hugetlb page will have a mapcount of 1 if mapped by multiple processes via a shared PMD. As a result, hugetlb pages shared by multiple processes and mapped with a shared PMD can be moved by a process without CAP_SYS_NICE. To fix, check for a shared PMD if mapcount is 1. If a shared PMD is found consider the page shared. Link: https://lkml.kernel.org/r/20230126222721.222195-3-mike.kravetz@oracle.com Fixes: e2d8cf405525 ("migrate: add hugepage migration code to migrate_pages()") Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com> Acked-by: Peter Xu <peterx(a)redhat.com> Acked-by: David Hildenbrand <david(a)redhat.com> Cc: James Houghton <jthoughton(a)google.com> Cc: Matthew Wilcox <willy(a)infradead.org> Cc: Michal Hocko <mhocko(a)suse.com> Cc: Muchun Song <songmuchun(a)bytedance.com> Cc: Naoya Horiguchi <naoya.horiguchi(a)linux.dev> Cc: Vishal Moola (Oracle) <vishal.moola(a)gmail.com> Cc: Yang Shi <shy828301(a)gmail.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/mempolicy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) --- a/mm/mempolicy.c~migrate-hugetlb-check-for-hugetlb-shared-pmd-in-node-migration +++ a/mm/mempolicy.c @@ -600,7 +600,8 @@ static int queue_pages_hugetlb(pte_t *pt /* With MPOL_MF_MOVE, we migrate only unshared hugepage. */ if (flags & (MPOL_MF_MOVE_ALL) || - (flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) { + (flags & MPOL_MF_MOVE && page_mapcount(page) == 1 && + !hugetlb_pmd_shared(pte))) { if (isolate_hugetlb(page, qp->pagelist) && (flags & MPOL_MF_STRICT)) /* _ Patches currently in -mm which might be from mike.kravetz(a)oracle.com are

2 years, 5 months

1
0
0 0

[merged mm-hotfixes-stable] mm-hugetlb-proc-check-for-hugetlb-shared-pmd-in-proc-pid-smaps.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: mm: hugetlb: proc: check for hugetlb shared PMD in /proc/PID/smaps has been removed from the -mm tree. Its filename was mm-hugetlb-proc-check-for-hugetlb-shared-pmd-in-proc-pid-smaps.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Mike Kravetz <mike.kravetz(a)oracle.com> Subject: mm: hugetlb: proc: check for hugetlb shared PMD in /proc/PID/smaps Date: Thu, 26 Jan 2023 14:27:20 -0800 Patch series "Fixes for hugetlb mapcount at most 1 for shared PMDs". This issue of mapcount in hugetlb pages referenced by shared PMDs was discussed in [1]. The following two patches address user visible behavior caused by this issue. [1] https://lore.kernel.org/linux-mm/Y9BF+OCdWnCSilEu@monkey/ This patch (of 2): A hugetlb page will have a mapcount of 1 if mapped by multiple processes via a shared PMD. This is because only the first process increases the map count, and subsequent processes just add the shared PMD page to their page table. page_mapcount is being used to decide if a hugetlb page is shared or private in /proc/PID/smaps. Pages referenced via a shared PMD were incorrectly being counted as private. To fix, check for a shared PMD if mapcount is 1. If a shared PMD is found count the hugetlb page as shared. A new helper to check for a shared PMD is added. [akpm(a)linux-foundation.org: simplification, per David] [akpm(a)linux-foundation.org: hugetlb.h: include page_ref.h for page_count()] Link: https://lkml.kernel.org/r/20230126222721.222195-2-mike.kravetz@oracle.com Fixes: 25ee01a2fca0 ("mm: hugetlb: proc: add hugetlb-related fields to /proc/PID/smaps") Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com> Acked-by: Peter Xu <peterx(a)redhat.com> Cc: David Hildenbrand <david(a)redhat.com> Cc: James Houghton <jthoughton(a)google.com> Cc: Matthew Wilcox <willy(a)infradead.org> Cc: Michal Hocko <mhocko(a)suse.com> Cc: Muchun Song <songmuchun(a)bytedance.com> Cc: Naoya Horiguchi <naoya.horiguchi(a)linux.dev> Cc: Vishal Moola (Oracle) <vishal.moola(a)gmail.com> Cc: Yang Shi <shy828301(a)gmail.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- fs/proc/task_mmu.c | 4 +--- include/linux/hugetlb.h | 13 +++++++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) --- a/fs/proc/task_mmu.c~mm-hugetlb-proc-check-for-hugetlb-shared-pmd-in-proc-pid-smaps +++ a/fs/proc/task_mmu.c @@ -745,9 +745,7 @@ static int smaps_hugetlb_range(pte_t *pt page = pfn_swap_entry_to_page(swpent); } if (page) { - int mapcount = page_mapcount(page); - - if (mapcount >= 2) + if (page_mapcount(page) >= 2 || hugetlb_pmd_shared(pte)) mss->shared_hugetlb += huge_page_size(hstate_vma(vma)); else mss->private_hugetlb += huge_page_size(hstate_vma(vma)); --- a/include/linux/hugetlb.h~mm-hugetlb-proc-check-for-hugetlb-shared-pmd-in-proc-pid-smaps +++ a/include/linux/hugetlb.h @@ -7,6 +7,7 @@ #include <linux/fs.h> #include <linux/hugetlb_inline.h> #include <linux/cgroup.h> +#include <linux/page_ref.h> #include <linux/list.h> #include <linux/kref.h> #include <linux/pgtable.h> @@ -1187,6 +1188,18 @@ static inline __init void hugetlb_cma_re } #endif +#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE +static inline bool hugetlb_pmd_shared(pte_t *pte) +{ + return page_count(virt_to_page(pte)) > 1; +} +#else +static inline bool hugetlb_pmd_shared(pte_t *pte) +{ + return false; +} +#endif + bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr); #ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE _ Patches currently in -mm which might be from mike.kravetz(a)oracle.com are

2 years, 5 months

1
0
0 0

[merged mm-hotfixes-stable] mm-madv_collapse-catch-none-huge-bad-pmd-lookups.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: mm/MADV_COLLAPSE: catch !none !huge !bad pmd lookups has been removed from the -mm tree. Its filename was mm-madv_collapse-catch-none-huge-bad-pmd-lookups.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: "Zach O'Keefe" <zokeefe(a)google.com> Subject: mm/MADV_COLLAPSE: catch !none !huge !bad pmd lookups Date: Wed, 25 Jan 2023 14:53:58 -0800 In commit 34488399fa08 ("mm/madvise: add file and shmem support to MADV_COLLAPSE") we make the following change to find_pmd_or_thp_or_none(): - if (!pmd_present(pmde)) - return SCAN_PMD_NULL; + if (pmd_none(pmde)) + return SCAN_PMD_NONE; This was for-use by MADV_COLLAPSE file/shmem codepaths, where MADV_COLLAPSE might identify a pte-mapped hugepage, only to have khugepaged race-in, free the pte table, and clear the pmd. Such codepaths include: A) If we find a suitably-aligned compound page of order HPAGE_PMD_ORDER already in the pagecache. B) In retract_page_tables(), if we fail to grab mmap_lock for the target mm/address. In these cases, collapse_pte_mapped_thp() really does expect a none (not just !present) pmd, and we want to suitably identify that case separate from the case where no pmd is found, or it's a bad-pmd (of course, many things could happen once we drop mmap_lock, and the pmd could plausibly undergo multiple transitions due to intervening fault, split, etc). Regardless, the code is prepared install a huge-pmd only when the existing pmd entry is either a genuine pte-table-mapping-pmd, or the none-pmd. However, the commit introduces a logical hole; namely, that we've allowed !none- && !huge- && !bad-pmds to be classified as genuine pte-table-mapping-pmds. One such example that could leak through are swap entries. The pmd values aren't checked again before use in pte_offset_map_lock(), which is expecting nothing less than a genuine pte-table-mapping-pmd. We want to put back the !pmd_present() check (below the pmd_none() check), but need to be careful to deal with subtleties in pmd transitions and treatments by various arch. The issue is that __split_huge_pmd_locked() temporarily clears the present bit (or otherwise marks the entry as invalid), but pmd_present() and pmd_trans_huge() still need to return true while the pmd is in this transitory state. For example, x86's pmd_present() also checks the _PAGE_PSE , riscv's version also checks the _PAGE_LEAF bit, and arm64 also checks a PMD_PRESENT_INVALID bit. Covering all 4 cases for x86 (all checks done on the same pmd value): 1) pmd_present() && pmd_trans_huge() All we actually know here is that the PSE bit is set. Either: a) We aren't racing with __split_huge_page(), and PRESENT or PROTNONE is set. => huge-pmd b) We are currently racing with __split_huge_page(). The danger here is that we proceed as-if we have a huge-pmd, but really we are looking at a pte-mapping-pmd. So, what is the risk of this danger? The only relevant path is: madvise_collapse() -> collapse_pte_mapped_thp() Where we might just incorrectly report back "success", when really the memory isn't pmd-backed. This is fine, since split could happen immediately after (actually) successful madvise_collapse(). So, it should be safe to just assume huge-pmd here. 2) pmd_present() && !pmd_trans_huge() Either: a) PSE not set and either PRESENT or PROTNONE is. => pte-table-mapping pmd (or PROT_NONE) b) devmap. This routine can be called immediately after unlocking/locking mmap_lock -- or called with no locks held (see khugepaged_scan_mm_slot()), so previous VMA checks have since been invalidated. 3) !pmd_present() && pmd_trans_huge() Not possible. 4) !pmd_present() && !pmd_trans_huge() Neither PRESENT nor PROTNONE set => not present I've checked all archs that implement pmd_trans_huge() (arm64, riscv, powerpc, longarch, x86, mips, s390) and this logic roughly translates (though devmap treatment is unique to x86 and powerpc, and (3) doesn't necessarily hold in general -- but that doesn't matter since !pmd_present() always takes failure path). Also, add a comment above find_pmd_or_thp_or_none() to help future travelers reason about the validity of the code; namely, the possible mutations that might happen out from under us, depending on how mmap_lock is held (if at all). Link: https://lkml.kernel.org/r/20230125225358.2576151-1-zokeefe@google.com Fixes: 34488399fa08 ("mm/madvise: add file and shmem support to MADV_COLLAPSE") Signed-off-by: Zach O'Keefe <zokeefe(a)google.com> Reported-by: Hugh Dickins <hughd(a)google.com> Reviewed-by: Yang Shi <shy828301(a)gmail.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/khugepaged.c | 8 ++++++++ 1 file changed, 8 insertions(+) --- a/mm/khugepaged.c~mm-madv_collapse-catch-none-huge-bad-pmd-lookups +++ a/mm/khugepaged.c @@ -847,6 +847,10 @@ static int hugepage_vma_revalidate(struc return SCAN_SUCCEED; } +/* + * See pmd_trans_unstable() for how the result may change out from + * underneath us, even if we hold mmap_lock in read. + */ static int find_pmd_or_thp_or_none(struct mm_struct *mm, unsigned long address, pmd_t **pmd) @@ -865,8 +869,12 @@ static int find_pmd_or_thp_or_none(struc #endif if (pmd_none(pmde)) return SCAN_PMD_NONE; + if (!pmd_present(pmde)) + return SCAN_PMD_NULL; if (pmd_trans_huge(pmde)) return SCAN_PMD_MAPPED; + if (pmd_devmap(pmde)) + return SCAN_PMD_NULL; if (pmd_bad(pmde)) return SCAN_PMD_NULL; return SCAN_SUCCEED; _ Patches currently in -mm which might be from zokeefe(a)google.com are

2 years, 5 months

1
0
0 0

[merged mm-hotfixes-stable] revert-mm-kmemleak-alloc-gray-object-for-reserved-region-with-direct-map.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: Revert "mm: kmemleak: alloc gray object for reserved region with direct map" has been removed from the -mm tree. Its filename was revert-mm-kmemleak-alloc-gray-object-for-reserved-region-with-direct-map.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: "Isaac J. Manjarres" <isaacmanjarres(a)google.com> Subject: Revert "mm: kmemleak: alloc gray object for reserved region with direct map" Date: Tue, 24 Jan 2023 15:02:54 -0800 This reverts commit 972fa3a7c17c9d60212e32ecc0205dc585b1e769. Kmemleak operates by periodically scanning memory regions for pointers to allocated memory blocks to determine if they are leaked or not. However, reserved memory regions can be used for DMA transactions between a device and a CPU, and thus, wouldn't contain pointers to allocated memory blocks, making them inappropriate for kmemleak to scan. Thus, revert this commit. Link: https://lkml.kernel.org/r/20230124230254.295589-1-isaacmanjarres@google.com Fixes: 972fa3a7c17c9 ("mm: kmemleak: alloc gray object for reserved region with direct map") Signed-off-by: Isaac J. Manjarres <isaacmanjarres(a)google.com> Acked-by: Catalin Marinas <catalin.marinas(a)arm.com> Cc: Calvin Zhang <calvinzhang.cool(a)gmail.com> Cc: Frank Rowand <frowand.list(a)gmail.com> Cc: Rob Herring <robh+dt(a)kernel.org> Cc: Saravana Kannan <saravanak(a)google.com> Cc: <stable(a)vger.kernel.org> [5.17+] Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- drivers/of/fdt.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) --- a/drivers/of/fdt.c~revert-mm-kmemleak-alloc-gray-object-for-reserved-region-with-direct-map +++ a/drivers/of/fdt.c @@ -26,7 +26,6 @@ #include <linux/serial_core.h> #include <linux/sysfs.h> #include <linux/random.h> -#include <linux/kmemleak.h> #include <asm/setup.h> /* for COMMAND_LINE_SIZE */ #include <asm/page.h> @@ -525,12 +524,9 @@ static int __init __reserved_mem_reserve size = dt_mem_next_cell(dt_root_size_cells, &prop); if (size && - early_init_dt_reserve_memory(base, size, nomap) == 0) { + early_init_dt_reserve_memory(base, size, nomap) == 0) pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n", uname, &base, (unsigned long)(size / SZ_1M)); - if (!nomap) - kmemleak_alloc_phys(base, size, 0); - } else pr_err("Reserved memory: failed to reserve memory for node '%s': base %pa, size %lu MiB\n", uname, &base, (unsigned long)(size / SZ_1M)); _ Patches currently in -mm which might be from isaacmanjarres(a)google.com are

2 years, 5 months

1
0
0 0

[merged mm-hotfixes-stable] mm-mremap-fix-mremap-expanding-for-vmas-with-vm_ops-close.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: mm, mremap: fix mremap() expanding for vma's with vm_ops->close() has been removed from the -mm tree. Its filename was mm-mremap-fix-mremap-expanding-for-vmas-with-vm_ops-close.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Vlastimil Babka <vbabka(a)suse.cz> Subject: mm, mremap: fix mremap() expanding for vma's with vm_ops->close() Date: Tue, 17 Jan 2023 11:19:39 +0100 Fabian has reported another regression in 6.1 due to ca3d76b0aa80 ("mm: add merging after mremap resize"). The problem is that vma_merge() can fail when vma has a vm_ops->close() method, causing is_mergeable_vma() test to be negative. This was happening for vma mapping a file from fuse-overlayfs, which does have the method. But when we are simply expanding the vma, we never remove it due to the "merge" with the added area, so the test should not prevent the expansion. As a quick fix, check for such vmas and expand them using vma_adjust() directly as was done before commit ca3d76b0aa80. For a more robust long term solution we should try to limit the check for vma_ops->close only to cases that actually result in vma removal, so that no merge would be prevented unnecessarily. [akpm(a)linux-foundation.org: fix indenting whitespace, reflow comment] Link: https://lkml.kernel.org/r/20230117101939.9753-1-vbabka@suse.cz Fixes: ca3d76b0aa80 ("mm: add merging after mremap resize") Signed-off-by: Vlastimil Babka <vbabka(a)suse.cz> Reported-by: Fabian Vogt <fvogt(a)suse.com> Link: https://bugzilla.suse.com/show_bug.cgi?id=1206359#c35 Tested-by: Fabian Vogt <fvogt(a)suse.com> Cc: Jakub Mat��na <matenajakub(a)gmail.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/mremap.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) --- a/mm/mremap.c~mm-mremap-fix-mremap-expanding-for-vmas-with-vm_ops-close +++ a/mm/mremap.c @@ -1027,16 +1027,29 @@ SYSCALL_DEFINE5(mremap, unsigned long, a } /* - * Function vma_merge() is called on the extension we are adding to - * the already existing vma, vma_merge() will merge this extension with - * the already existing vma (expand operation itself) and possibly also - * with the next vma if it becomes adjacent to the expanded vma and - * otherwise compatible. + * Function vma_merge() is called on the extension we + * are adding to the already existing vma, vma_merge() + * will merge this extension with the already existing + * vma (expand operation itself) and possibly also with + * the next vma if it becomes adjacent to the expanded + * vma and otherwise compatible. + * + * However, vma_merge() can currently fail due to + * is_mergeable_vma() check for vm_ops->close (see the + * comment there). Yet this should not prevent vma + * expanding, so perform a simple expand for such vma. + * Ideally the check for close op should be only done + * when a vma would be actually removed due to a merge. */ - vma = vma_merge(mm, vma, extension_start, extension_end, + if (!vma->vm_ops || !vma->vm_ops->close) { + vma = vma_merge(mm, vma, extension_start, extension_end, vma->vm_flags, vma->anon_vma, vma->vm_file, extension_pgoff, vma_policy(vma), vma->vm_userfaultfd_ctx, anon_vma_name(vma)); + } else if (vma_adjust(vma, vma->vm_start, addr + new_len, + vma->vm_pgoff, NULL)) { + vma = NULL; + } if (!vma) { vm_unacct_memory(pages); ret = -ENOMEM; _ Patches currently in -mm which might be from vbabka(a)suse.cz are revert-mm-compaction-fix-set-skip-in-fast_find_migrateblock.patch

2 years, 5 months

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror February 2023