- Linux-stable-mirror - lists.linaro.org

[PATCH linux-6.12.y 1/2] selftests/bpf: Add a test for arena range tree algorithm

by Yifei Liu

From: Alexei Starovoitov <ast(a)kernel.org> [ Upstream commit e58358afa84e8e271a296459d35d1715c7572013 ] Add a test that verifies specific behavior of arena range tree algorithm and adjust existing big_alloc1 test due to use of global data in arena. Signed-off-by: Alexei Starovoitov <ast(a)kernel.org> Signed-off-by: Andrii Nakryiko <andrii(a)kernel.org> Acked-by: Kumar Kartikeya Dwivedi <memxor(a)gmail.com> Link: https://lore.kernel.org/bpf/20241108025616.17625-3-alexei.starovoitov@gmail… [Yifei: This commit fixes the failure of verifier_arena_large test over 64k page size kernels. This commit also introduce some new tests targeting the new feature, arena range tree algorithm, which is not in linux-6.12.y, I just comment out the test headers so that it would not be run here. If this feature is introduced later, we can just uncomment those two lines.] Signed-off-by: Yifei Liu <yifei.l.liu(a)oracle.com> --- .../bpf/progs/verifier_arena_large.c | 110 +++++++++++++++++- 1 file changed, 108 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c index 6065f862d964..f318675814c6 100644 --- a/tools/testing/selftests/bpf/progs/verifier_arena_large.c +++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c @@ -29,12 +29,12 @@ int big_alloc1(void *ctx) if (!page1) return 1; *page1 = 1; - page2 = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE - PAGE_SIZE, + page2 = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE - PAGE_SIZE * 2, 1, NUMA_NO_NODE, 0); if (!page2) return 2; *page2 = 2; - no_page = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE, + no_page = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE - PAGE_SIZE, 1, NUMA_NO_NODE, 0); if (no_page) return 3; @@ -66,4 +66,110 @@ int big_alloc1(void *ctx) #endif return 0; } + +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) +#define PAGE_CNT 100 +__u8 __arena * __arena page[PAGE_CNT]; /* occupies the first page */ +__u8 __arena *base; + +/* + * Check that arena's range_tree algorithm allocates pages sequentially + * on the first pass and then fills in all gaps on the second pass. + */ +__noinline int alloc_pages(int page_cnt, int pages_atonce, bool first_pass, + int max_idx, int step) +{ + __u8 __arena *pg; + int i, pg_idx; + + for (i = 0; i < page_cnt; i++) { + pg = bpf_arena_alloc_pages(&arena, NULL, pages_atonce, + NUMA_NO_NODE, 0); + if (!pg) + return step; + pg_idx = (pg - base) / PAGE_SIZE; + if (first_pass) { + /* Pages must be allocated sequentially */ + if (pg_idx != i) + return step + 100; + } else { + /* Allocator must fill into gaps */ + if (pg_idx >= max_idx || (pg_idx & 1)) + return step + 200; + } + *pg = pg_idx; + page[pg_idx] = pg; + cond_break; + } + return 0; +} + +//SEC("syscall") +//__success __retval(0) +int big_alloc2(void *ctx) +{ + __u8 __arena *pg; + int i, err; + + base = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + if (!base) + return 1; + bpf_arena_free_pages(&arena, (void __arena *)base, 1); + + err = alloc_pages(PAGE_CNT, 1, true, PAGE_CNT, 2); + if (err) + return err; + + /* Clear all even pages */ + for (i = 0; i < PAGE_CNT; i += 2) { + pg = page[i]; + if (*pg != i) + return 3; + bpf_arena_free_pages(&arena, (void __arena *)pg, 1); + page[i] = NULL; + cond_break; + } + + /* Allocate into freed gaps */ + err = alloc_pages(PAGE_CNT / 2, 1, false, PAGE_CNT, 4); + if (err) + return err; + + /* Free pairs of pages */ + for (i = 0; i < PAGE_CNT; i += 4) { + pg = page[i]; + if (*pg != i) + return 5; + bpf_arena_free_pages(&arena, (void __arena *)pg, 2); + page[i] = NULL; + page[i + 1] = NULL; + cond_break; + } + + /* Allocate 2 pages at a time into freed gaps */ + err = alloc_pages(PAGE_CNT / 4, 2, false, PAGE_CNT, 6); + if (err) + return err; + + /* Check pages without freeing */ + for (i = 0; i < PAGE_CNT; i += 2) { + pg = page[i]; + if (*pg != i) + return 7; + cond_break; + } + + pg = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + + if (!pg) + return 8; + /* + * The first PAGE_CNT pages are occupied. The new page + * must be above. + */ + if ((pg - base) / PAGE_SIZE < PAGE_CNT) + return 9; + return 0; +} +#endif char _license[] SEC("license") = "GPL"; -- 2.46.0

1 month, 1 week

1
1
0 0

[PATCH net,v2] hv_netvsc: Fix panic during namespace deletion with VF

by Haiyang Zhang

From: Haiyang Zhang <haiyangz(a)microsoft.com> The existing code move the VF NIC to new namespace when NETDEV_REGISTER is received on netvsc NIC. During deletion of the namespace, default_device_exit_batch() >> default_device_exit_net() is called. When netvsc NIC is moved back and registered to the default namespace, it automatically brings VF NIC back to the default namespace. This will cause the default_device_exit_net() >> for_each_netdev_safe loop unable to detect the list end, and hit NULL ptr: [ 231.449420] mana 7870:00:00.0 enP30832s1: Moved VF to namespace with: eth0 [ 231.449656] BUG: kernel NULL pointer dereference, address: 0000000000000010 [ 231.450246] #PF: supervisor read access in kernel mode [ 231.450579] #PF: error_code(0x0000) - not-present page [ 231.450916] PGD 17b8a8067 P4D 0 [ 231.451163] Oops: Oops: 0000 [#1] SMP NOPTI [ 231.451450] CPU: 82 UID: 0 PID: 1394 Comm: kworker/u768:1 Not tainted 6.16.0-rc4+ #3 VOLUNTARY [ 231.452042] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.1 11/21/2024 [ 231.452692] Workqueue: netns cleanup_net [ 231.452947] RIP: 0010:default_device_exit_batch+0x16c/0x3f0 [ 231.453326] Code: c0 0c f5 b3 e8 d5 db fe ff 48 85 c0 74 15 48 c7 c2 f8 fd ca b2 be 10 00 00 00 48 8d 7d c0 e8 7b 77 25 00 49 8b 86 28 01 00 00 <48> 8b 50 10 4c 8b 2a 4c 8d 62 f0 49 83 ed 10 4c 39 e0 0f 84 d6 00 [ 231.454294] RSP: 0018:ff75fc7c9bf9fd00 EFLAGS: 00010246 [ 231.454610] RAX: 0000000000000000 RBX: 0000000000000002 RCX: 61c8864680b583eb [ 231.455094] RDX: ff1fa9f71462d800 RSI: ff75fc7c9bf9fd38 RDI: 0000000030766564 [ 231.455686] RBP: ff75fc7c9bf9fd78 R08: 0000000000000000 R09: 0000000000000000 [ 231.456126] R10: 0000000000000001 R11: 0000000000000004 R12: ff1fa9f70088e340 [ 231.456621] R13: ff1fa9f70088e340 R14: ffffffffb3f50c20 R15: ff1fa9f7103e6340 [ 231.457161] FS: 0000000000000000(0000) GS:ff1faa6783a08000(0000) knlGS:0000000000000000 [ 231.457707] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 231.458031] CR2: 0000000000000010 CR3: 0000000179ab2006 CR4: 0000000000b73ef0 [ 231.458434] Call Trace: [ 231.458600] <TASK> [ 231.458777] ops_undo_list+0x100/0x220 [ 231.459015] cleanup_net+0x1b8/0x300 [ 231.459285] process_one_work+0x184/0x340 To fix it, move the ns change to a workqueue, and take rtnl_lock to avoid changing the netdev list when default_device_exit_net() is using it. Cc: stable(a)vger.kernel.org Fixes: 4c262801ea60 ("hv_netvsc: Fix VF namespace also in synthetic NIC NETDEV_REGISTER event") Signed-off-by: Haiyang Zhang <haiyangz(a)microsoft.com> --- v2: Moved the ns change to a workqueue as suggested by Jakub Kicinski. --- drivers/net/hyperv/hyperv_net.h | 3 +++ drivers/net/hyperv/netvsc_drv.c | 29 ++++++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index cb6f5482d203..7397c693f984 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -1061,6 +1061,7 @@ struct net_device_context { struct net_device __rcu *vf_netdev; struct netvsc_vf_pcpu_stats __percpu *vf_stats; struct delayed_work vf_takeover; + struct delayed_work vfns_work; /* 1: allocated, serial number is valid. 0: not allocated */ u32 vf_alloc; @@ -1075,6 +1076,8 @@ struct net_device_context { struct netvsc_device_info *saved_netvsc_dev_info; }; +void netvsc_vfns_work(struct work_struct *w); + /* Azure hosts don't support non-TCP port numbers in hashing for fragmented * packets. We can use ethtool to change UDP hash level when necessary. */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index f44753756358..39c892e46cb0 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2522,6 +2522,7 @@ static int netvsc_probe(struct hv_device *dev, spin_lock_init(&net_device_ctx->lock); INIT_LIST_HEAD(&net_device_ctx->reconfig_events); INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup); + INIT_DELAYED_WORK(&net_device_ctx->vfns_work, netvsc_vfns_work); net_device_ctx->vf_stats = netdev_alloc_pcpu_stats(struct netvsc_vf_pcpu_stats); @@ -2666,6 +2667,8 @@ static void netvsc_remove(struct hv_device *dev) cancel_delayed_work_sync(&ndev_ctx->dwork); rtnl_lock(); + cancel_delayed_work_sync(&ndev_ctx->vfns_work); + nvdev = rtnl_dereference(ndev_ctx->nvdev); if (nvdev) { cancel_work_sync(&nvdev->subchan_work); @@ -2707,6 +2710,7 @@ static int netvsc_suspend(struct hv_device *dev) cancel_delayed_work_sync(&ndev_ctx->dwork); rtnl_lock(); + cancel_delayed_work_sync(&ndev_ctx->vfns_work); nvdev = rtnl_dereference(ndev_ctx->nvdev); if (nvdev == NULL) { @@ -2800,6 +2804,27 @@ static void netvsc_event_set_vf_ns(struct net_device *ndev) } } +void netvsc_vfns_work(struct work_struct *w) +{ + struct net_device_context *ndev_ctx = + container_of(w, struct net_device_context, vfns_work.work); + struct net_device *ndev; + + if (!rtnl_trylock()) { + schedule_delayed_work(&ndev_ctx->vfns_work, 1); + return; + } + + ndev = hv_get_drvdata(ndev_ctx->device_ctx); + if (!ndev) + goto out; + + netvsc_event_set_vf_ns(ndev); + +out: + rtnl_unlock(); +} + /* * On Hyper-V, every VF interface is matched with a corresponding * synthetic interface. The synthetic interface is presented first @@ -2810,10 +2835,12 @@ static int netvsc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + struct net_device_context *ndev_ctx; int ret = 0; if (event_dev->netdev_ops == &device_ops && event == NETDEV_REGISTER) { - netvsc_event_set_vf_ns(event_dev); + ndev_ctx = netdev_priv(event_dev); + schedule_delayed_work(&ndev_ctx->vfns_work, 0); return NOTIFY_DONE; } -- 2.34.1

1 month, 1 week

2
1
0 0

[PATCH AUTOSEL 6.16-6.6] exfat: add cluster chain loop check for dir

by Sasha Levin

From: Yuezhang Mo <Yuezhang.Mo(a)sony.com> [ Upstream commit 99f9a97dce39ad413c39b92c90393bbd6778f3fd ] An infinite loop may occur if the following conditions occur due to file system corruption. (1) Condition for exfat_count_dir_entries() to loop infinitely. - The cluster chain includes a loop. - There is no UNUSED entry in the cluster chain. (2) Condition for exfat_create_upcase_table() to loop infinitely. - The cluster chain of the root directory includes a loop. - There are no UNUSED entry and up-case table entry in the cluster chain of the root directory. (3) Condition for exfat_load_bitmap() to loop infinitely. - The cluster chain of the root directory includes a loop. - There are no UNUSED entry and bitmap entry in the cluster chain of the root directory. (4) Condition for exfat_find_dir_entry() to loop infinitely. - The cluster chain includes a loop. - The unused directory entries were exhausted by some operation. (5) Condition for exfat_check_dir_empty() to loop infinitely. - The cluster chain includes a loop. - The unused directory entries were exhausted by some operation. - All files and sub-directories under the directory are deleted. This commit adds checks to break the above infinite loop. Signed-off-by: Yuezhang Mo <Yuezhang.Mo(a)sony.com> Signed-off-by: Namjae Jeon <linkinjeon(a)kernel.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- LLM Generated explanations, may be completely bogus: **YES** This commit should be backported to stable kernel trees for the following critical reasons: ## Security Impact - Prevents Multiple Infinite Loop Vulnerabilities The commit fixes **five distinct infinite loop conditions** that can occur due to filesystem corruption, each representing a potential denial-of-service vulnerability: 1. **exfat_count_dir_entries()** - Adds loop detection using `sbi->used_clusters` as a bound 2. **exfat_create_upcase_table()** - Addressed through root directory chain validation 3. **exfat_load_bitmap()** - Addressed through root directory chain validation 4. **exfat_find_dir_entry()** - Adds loop detection using `EXFAT_DATA_CLUSTER_COUNT(sbi)` 5. **exfat_check_dir_empty()** - Adds loop detection using `EXFAT_DATA_CLUSTER_COUNT(sbi)` ## Critical Bug Fix Characteristics 1. **Fixes Real Security Issues**: The infinite loops can cause system hangs and DoS conditions when mounting corrupted/malicious exFAT filesystems 2. **Small, Contained Changes**: The fix adds simple counter checks (4-5 lines per location) without architectural changes 3. **Clear Root Cause**: Addresses missing validation of cluster chain loops in directory traversal 4. **Pattern of Similar Fixes**: This follows three previous infinite loop fixes in the same subsystem (commits b0522303f672, a5324b3a488d, fee873761bd9), all of which fix similar issues dating back to the original exfat implementation ## Code Analysis Shows Low Risk The changes are minimal and safe: - Adds `unsigned int clu_count = 0` declarations - Increments counter when following cluster chains - Breaks traversal if counter exceeds valid cluster count - In `exfat_count_num_clusters()`: adds explicit loop detection with error message ## Follows Stable Kernel Rules ✓ Fixes critical bugs (infinite loops/DoS) ✓ Minimal code changes (~50 lines total) ✓ No new features or API changes ✓ Similar fixes already backported (the three previous infinite loop fixes) ✓ Clear error conditions with proper error returns (-EIO) The commit message explicitly states these are corruption-triggered infinite loops, and the pattern matches previous fixes that have "Fixes:" tags pointing to the original exfat implementation. This is a critical reliability and security fix that prevents system hangs when handling corrupted exFAT filesystems. fs/exfat/dir.c | 12 ++++++++++++ fs/exfat/fatent.c | 10 ++++++++++ fs/exfat/namei.c | 5 +++++ fs/exfat/super.c | 32 +++++++++++++++++++++----------- 4 files changed, 48 insertions(+), 11 deletions(-) diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index 3103b932b674..ee060e26f51d 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -996,6 +996,7 @@ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei, struct exfat_hint_femp candi_empty; struct exfat_sb_info *sbi = EXFAT_SB(sb); int num_entries = exfat_calc_num_entries(p_uniname); + unsigned int clu_count = 0; if (num_entries < 0) return num_entries; @@ -1133,6 +1134,10 @@ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei, } else { if (exfat_get_next_cluster(sb, &clu.dir)) return -EIO; + + /* break if the cluster chain includes a loop */ + if (unlikely(++clu_count > EXFAT_DATA_CLUSTER_COUNT(sbi))) + goto not_found; } } @@ -1195,6 +1200,7 @@ int exfat_count_dir_entries(struct super_block *sb, struct exfat_chain *p_dir) int i, count = 0; int dentries_per_clu; unsigned int entry_type; + unsigned int clu_count = 0; struct exfat_chain clu; struct exfat_dentry *ep; struct exfat_sb_info *sbi = EXFAT_SB(sb); @@ -1227,6 +1233,12 @@ int exfat_count_dir_entries(struct super_block *sb, struct exfat_chain *p_dir) } else { if (exfat_get_next_cluster(sb, &(clu.dir))) return -EIO; + + if (unlikely(++clu_count > sbi->used_clusters)) { + exfat_fs_error(sb, "FAT or bitmap is corrupted"); + return -EIO; + } + } } diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c index 23065f948ae7..232cc7f8ab92 100644 --- a/fs/exfat/fatent.c +++ b/fs/exfat/fatent.c @@ -490,5 +490,15 @@ int exfat_count_num_clusters(struct super_block *sb, } *ret_count = count; + + /* + * since exfat_count_used_clusters() is not called, sbi->used_clusters + * cannot be used here. + */ + if (unlikely(i == sbi->num_clusters && clu != EXFAT_EOF_CLUSTER)) { + exfat_fs_error(sb, "The cluster chain has a loop"); + return -EIO; + } + return 0; } diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index fede0283d6e2..f5f1c4e8a29f 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -890,6 +890,7 @@ static int exfat_check_dir_empty(struct super_block *sb, { int i, dentries_per_clu; unsigned int type; + unsigned int clu_count = 0; struct exfat_chain clu; struct exfat_dentry *ep; struct exfat_sb_info *sbi = EXFAT_SB(sb); @@ -926,6 +927,10 @@ static int exfat_check_dir_empty(struct super_block *sb, } else { if (exfat_get_next_cluster(sb, &(clu.dir))) return -EIO; + + /* break if the cluster chain includes a loop */ + if (unlikely(++clu_count > EXFAT_DATA_CLUSTER_COUNT(sbi))) + break; } } diff --git a/fs/exfat/super.c b/fs/exfat/super.c index 7ed858937d45..3a9ec75ab452 100644 --- a/fs/exfat/super.c +++ b/fs/exfat/super.c @@ -341,13 +341,12 @@ static void exfat_hash_init(struct super_block *sb) INIT_HLIST_HEAD(&sbi->inode_hashtable[i]); } -static int exfat_read_root(struct inode *inode) +static int exfat_read_root(struct inode *inode, struct exfat_chain *root_clu) { struct super_block *sb = inode->i_sb; struct exfat_sb_info *sbi = EXFAT_SB(sb); struct exfat_inode_info *ei = EXFAT_I(inode); - struct exfat_chain cdir; - int num_subdirs, num_clu = 0; + int num_subdirs; exfat_chain_set(&ei->dir, sbi->root_dir, 0, ALLOC_FAT_CHAIN); ei->entry = -1; @@ -360,12 +359,9 @@ static int exfat_read_root(struct inode *inode) ei->hint_stat.clu = sbi->root_dir; ei->hint_femp.eidx = EXFAT_HINT_NONE; - exfat_chain_set(&cdir, sbi->root_dir, 0, ALLOC_FAT_CHAIN); - if (exfat_count_num_clusters(sb, &cdir, &num_clu)) - return -EIO; - i_size_write(inode, num_clu << sbi->cluster_size_bits); + i_size_write(inode, EXFAT_CLU_TO_B(root_clu->size, sbi)); - num_subdirs = exfat_count_dir_entries(sb, &cdir); + num_subdirs = exfat_count_dir_entries(sb, root_clu); if (num_subdirs < 0) return -EIO; set_nlink(inode, num_subdirs + EXFAT_MIN_SUBDIR); @@ -578,7 +574,8 @@ static int exfat_verify_boot_region(struct super_block *sb) } /* mount the file system volume */ -static int __exfat_fill_super(struct super_block *sb) +static int __exfat_fill_super(struct super_block *sb, + struct exfat_chain *root_clu) { int ret; struct exfat_sb_info *sbi = EXFAT_SB(sb); @@ -595,6 +592,18 @@ static int __exfat_fill_super(struct super_block *sb) goto free_bh; } + /* + * Call exfat_count_num_cluster() before searching for up-case and + * bitmap directory entries to avoid infinite loop if they are missing + * and the cluster chain includes a loop. + */ + exfat_chain_set(root_clu, sbi->root_dir, 0, ALLOC_FAT_CHAIN); + ret = exfat_count_num_clusters(sb, root_clu, &root_clu->size); + if (ret) { + exfat_err(sb, "failed to count the number of clusters in root"); + goto free_bh; + } + ret = exfat_create_upcase_table(sb); if (ret) { exfat_err(sb, "failed to load upcase table"); @@ -627,6 +636,7 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc) struct exfat_sb_info *sbi = sb->s_fs_info; struct exfat_mount_options *opts = &sbi->options; struct inode *root_inode; + struct exfat_chain root_clu; int err; if (opts->allow_utime == (unsigned short)-1) @@ -645,7 +655,7 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_time_min = EXFAT_MIN_TIMESTAMP_SECS; sb->s_time_max = EXFAT_MAX_TIMESTAMP_SECS; - err = __exfat_fill_super(sb); + err = __exfat_fill_super(sb, &root_clu); if (err) { exfat_err(sb, "failed to recognize exfat type"); goto check_nls_io; @@ -680,7 +690,7 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc) root_inode->i_ino = EXFAT_ROOT_INO; inode_set_iversion(root_inode, 1); - err = exfat_read_root(root_inode); + err = exfat_read_root(root_inode, &root_clu); if (err) { exfat_err(sb, "failed to initialize root inode"); goto put_inode; -- 2.39.5

1 month, 1 week

1
4
0 0

[PATCH 6.12 v2] ice/ptp: fix crosstimestamp reporting

by Markus Blöchl

From: Anton Nadezhdin <anton.nadezhdin(a)intel.com> commit a5a441ae283d54ec329aadc7426991dc32786d52 upstream. Set use_nsecs=true as timestamp is reported in ns. Lack of this result in smaller timestamp error window which cause error during phc2sys execution on E825 NICs: phc2sys[1768.256]: ioctl PTP_SYS_OFFSET_PRECISE: Invalid argument This problem was introduced in the cited commit which omitted setting use_nsecs to true when converting the ice driver to use convert_base_to_cs(). Testing hints (ethX is PF netdev): phc2sys -s ethX -c CLOCK_REALTIME -O 37 -m phc2sys[1769.256]: CLOCK_REALTIME phc offset -5 s0 freq -0 delay 0 Fixes: d4bea547ebb57 ("ice/ptp: Remove convert_art_to_tsc()") Signed-off-by: Anton Nadezhdin <anton.nadezhdin(a)intel.com> Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov(a)intel.com> Reviewed-by: Arkadiusz Kubalewski <arkadiusz.kubalewski(a)intel.com> Tested-by: Rinitha S <sx.rinitha(a)intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen(a)intel.com> Signed-off-by: Markus Blöchl <markus(a)blochl.de> --- Hi Greg, please consider this backport for linux-6.12.y It fixes a regression from the series around d4bea547ebb57 ("ice/ptp: Remove convert_art_to_tsc()") which affected multiple drivers and occasionally caused phc2sys to fail on ioctl(fd, PTP_SYS_OFFSET_PRECISE, ...). This was the initial fix for ice but apparently tagging it for stable was forgotten during submission. The hunk was moved around slightly in the upstream commit 92456e795ac6 ("ice: Add unified ice_capture_crosststamp") from ice_ptp_get_syncdevicetime() into another helper function ice_capture_crosststamp() so its indentation and context have changed. I adapted it to apply cleanly. --- Changes in v2: - Expand reference to upstream commit to full 40 character SHA - Add branch 6.12 target designator to PATCH prefix - Rebase onto current 6.12.41 - Link to v1: https://lore.kernel.org/r/20250725-ice_crosstimestamp_reporting-v1-1-3d0473… --- drivers/net/ethernet/intel/ice/ice_ptp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 7c6f81beaee4602050b4cf366441a2584507d949..369c968a0117d0f7012241fd3e2c0a45a059bfa4 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -2226,6 +2226,7 @@ ice_ptp_get_syncdevicetime(ktime_t *device, hh_ts = ((u64)hh_ts_hi << 32) | hh_ts_lo; system->cycles = hh_ts; system->cs_id = CSID_X86_ART; + system->use_nsecs = true; /* Read Device source clock time */ hh_ts_lo = rd32(hw, GLTSYN_HHTIME_L(tmr_idx)); hh_ts_hi = rd32(hw, GLTSYN_HHTIME_H(tmr_idx)); --- base-commit: 8f5ff9784f3262e6e85c68d86f8b7931827f2983 change-id: 20250716-ice_crosstimestamp_reporting-b6236a246c48 Best regards, -- Markus Blöchl <markus(a)blochl.de> --

1 month, 1 week

2
1
0 0

[PATCH] mm/userfaultfd: fix missing PTE unmap for non-migration entries

by Sasha Levin

When handling non-swap entries in move_pages_pte(), the error handling for entries that are NOT migration entries fails to unmap the page table entries before jumping to the error handling label. This results in a kmap/kunmap imbalance which on CONFIG_HIGHPTE systems triggers a WARNING in kunmap_local_indexed() because the kmap stack is corrupted. Example call trace on ARM32 (CONFIG_HIGHPTE enabled): WARNING: CPU: 1 PID: 633 at mm/highmem.c:622 kunmap_local_indexed+0x178/0x17c Call trace: kunmap_local_indexed from move_pages+0x964/0x19f4 move_pages from userfaultfd_ioctl+0x129c/0x2144 userfaultfd_ioctl from sys_ioctl+0x558/0xd24 The issue was introduced with the UFFDIO_MOVE feature but became more frequent with the addition of guard pages (commit 7c53dfbdb024 ("mm: add PTE_MARKER_GUARD PTE marker")) which made the non-migration entry code path more commonly executed during userfaultfd operations. Fix this by ensuring PTEs are properly unmapped in all non-swap entry paths before jumping to the error handling label, not just for migration entries. Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI") Cc: stable(a)vger.kernel.org Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- mm/userfaultfd.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 8253978ee0fb1..7c298e9cbc18f 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1384,14 +1384,15 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, entry = pte_to_swp_entry(orig_src_pte); if (non_swap_entry(entry)) { + pte_unmap(src_pte); + pte_unmap(dst_pte); + src_pte = dst_pte = NULL; if (is_migration_entry(entry)) { - pte_unmap(src_pte); - pte_unmap(dst_pte); - src_pte = dst_pte = NULL; migration_entry_wait(mm, src_pmd, src_addr); err = -EAGAIN; - } else + } else { err = -EFAULT; + } goto out; } -- 2.39.5

1 month, 1 week

5
22
0 0

[PATCH v5 1/1] userfaultfd: fix a crash in UFFDIO_MOVE when PMD is a migration entry

by Suren Baghdasaryan

When UFFDIO_MOVE encounters a migration PMD entry, it proceeds with obtaining a folio and accessing it even though the entry is swp_entry_t. Add the missing check and let split_huge_pmd() handle migration entries. While at it also remove unnecessary folio check. Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI") Reported-by: syzbot+b446dbe27035ef6bd6c2(a)syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/68794b5c.a70a0220.693ce.0050.GAE@google.com/ Signed-off-by: Suren Baghdasaryan <surenb(a)google.com> Reviewed-by: Peter Xu <peterx(a)redhat.com> Cc: stable(a)vger.kernel.org --- Applies to mm-unstable after reverting older v4 [1] version. Changes since v4 [1] - Removed extra folio check, per David Hildenbrand [1] https://lore.kernel.org/all/20250806220022.926763-1-surenb@google.com/ mm/userfaultfd.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 5431c9dd7fd7..aefdf3a812a1 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1826,13 +1826,16 @@ ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start, /* Check if we can move the pmd without splitting it. */ if (move_splits_huge_pmd(dst_addr, src_addr, src_start + len) || !pmd_none(dst_pmdval)) { - struct folio *folio = pmd_folio(*src_pmd); - - if (!folio || (!is_huge_zero_folio(folio) && - !PageAnonExclusive(&folio->page))) { - spin_unlock(ptl); - err = -EBUSY; - break; + /* Can be a migration entry */ + if (pmd_present(*src_pmd)) { + struct folio *folio = pmd_folio(*src_pmd); + + if (!is_huge_zero_folio(folio) && + !PageAnonExclusive(&folio->page)) { + spin_unlock(ptl); + err = -EBUSY; + break; + } } spin_unlock(ptl); -- 2.50.1.703.g449372360f-goog

1 month, 1 week

2
1
0 0

[PATCH] accel/ivpu: Fix potential Spectre issue in debugfs

by Jacek Lawrynowicz

Fix potential Spectre vulnerability in repoted by smatch: warn: potential spectre issue 'vdev->hw->hws.grace_period' [w] (local cap) warn: potential spectre issue 'vdev->hw->hws.process_grace_period' [w] (local cap) warn: potential spectre issue 'vdev->hw->hws.process_quantum' [w] (local cap) The priority_bands_fops_write() function in ivpu_debugfs.c uses an index 'band' derived from user input. This index is used to write to the vdev->hw->hws.grace_period, vdev->hw->hws.process_grace_period, and vdev->hw->hws.process_quantum arrays. This pattern presented a potential Spectre Variant 1 (Bounds Check Bypass) vulnerability. An attacker-controlled 'band' value could theoretically lead to speculative out-of-bounds array writes if the CPU speculatively executed these assignments before the bounds check on 'band' was fully resolved. This commit mitigates this potential vulnerability by sanitizing the 'band' index using array_index_nospec() before it is used in the array assignments. The array_index_nospec() function ensures that 'band' is constrained to the valid range [0, VPU_JOB_SCHEDULING_PRIORITY_BAND_COUNT - 1], even during speculative execution. Fixes: 320323d2e545 ("accel/ivpu: Add debugfs interface for setting HWS priority bands") Cc: <stable(a)vger.kernel.org> # v6.15+ Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz(a)linux.intel.com> --- drivers/accel/ivpu/ivpu_debugfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c index cd24ccd20ba6c..2ffe5bf8f1fab 100644 --- a/drivers/accel/ivpu/ivpu_debugfs.c +++ b/drivers/accel/ivpu/ivpu_debugfs.c @@ -5,6 +5,7 @@ #include <linux/debugfs.h> #include <linux/fault-inject.h> +#include <linux/nospec.h> #include <drm/drm_debugfs.h> #include <drm/drm_file.h> @@ -464,6 +465,7 @@ priority_bands_fops_write(struct file *file, const char __user *user_buf, size_t if (band >= VPU_JOB_SCHEDULING_PRIORITY_BAND_COUNT) return -EINVAL; + band = array_index_nospec(band, VPU_JOB_SCHEDULING_PRIORITY_BAND_COUNT); vdev->hw->hws.grace_period[band] = grace_period; vdev->hw->hws.process_grace_period[band] = process_grace_period; vdev->hw->hws.process_quantum[band] = process_quantum; -- 2.45.1

1 month, 1 week

2
1
0 0

[PATCH v2] NFSD: Fix destination buffer size in nfsd4_ssc_setup_dul()

by Thorsten Blum

Commit 5304877936c0 ("NFSD: Fix strncpy() fortify warning") replaced strncpy(,, sizeof(..)) with strlcpy(,, sizeof(..) - 1), but strlcpy() already guaranteed NUL-termination of the destination buffer and subtracting one byte potentially truncated the source string. The incorrect size was then carried over in commit 72f78ae00a8e ("NFSD: move from strlcpy with unused retval to strscpy") when switching from strlcpy() to strscpy(). Fix this off-by-one error by using the full size of the destination buffer again. Cc: stable(a)vger.kernel.org Fixes: 5304877936c0 ("NFSD: Fix strncpy() fortify warning") Signed-off-by: Thorsten Blum <thorsten.blum(a)linux.dev> --- Changes in v2: - Use three parameter variant of strscpy() for easier backporting - Link to v1: https://lore.kernel.org/lkml/20250805175302.29386-2-thorsten.blum@linux.dev/ --- fs/nfsd/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 71b428efcbb5..954543e92988 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1469,7 +1469,7 @@ static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr, return 0; } if (work) { - strscpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr) - 1); + strscpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr)); refcount_set(&work->nsui_refcnt, 2); work->nsui_busy = true; list_add_tail(&work->nsui_list, &nn->nfsd_ssc_mount_list); -- 2.50.1

1 month, 1 week

2
1
0 0

[PATCH v3] fs: always return zero on success from replace_fd()

by Thomas Weißschuh

replace_fd() returns the number of the new file descriptor through the return value of do_dup2(). However its callers never care about the specific returned number. In fact the caller in receive_fd_replace() treats any non-zero return value as an error and therefore never calls __receive_sock() for most file descriptors, which is a bug. To fix the bug in receive_fd_replace() and to avoid the same issue happening in future callers, signal success through a plain zero. Suggested-by: Al Viro <viro(a)zeniv.linux.org.uk> Link: https://lore.kernel.org/lkml/20250801220215.GS222315@ZenIV/ Fixes: 173817151b15 ("fs: Expand __receive_fd() to accept existing fd") Fixes: 42eb0d54c08a ("fs: split receive_fd_replace from __receive_fd") Cc: stable(a)vger.kernel.org Signed-off-by: Thomas Weißschuh <thomas.weissschuh(a)linutronix.de> --- Changes in v3: - Make commit message slightly more precise - Avoid double-unlock of file_lock - Link to v2: https://lore.kernel.org/r/20250804-fix-receive_fd_replace-v2-1-ecb28c7b9129… Changes in v2: - Move the fix to replace_fd() (Al) - Link to v1: https://lore.kernel.org/r/20250801-fix-receive_fd_replace-v1-1-d46d600c74d6… --- Untested, it stuck out while reading the code. --- fs/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/file.c b/fs/file.c index 6d2275c3be9c6967d16c75d1b6521f9b58980926..80957d0813db5946ba8a635520e8283c722982b9 100644 --- a/fs/file.c +++ b/fs/file.c @@ -1330,7 +1330,8 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags) err = expand_files(files, fd); if (unlikely(err < 0)) goto out_unlock; - return do_dup2(files, file, fd, flags); + err = do_dup2(files, file, fd, flags); + return err < 0 ? err : 0; out_unlock: spin_unlock(&files->file_lock); --- base-commit: d2eedaa3909be9102d648a4a0a50ccf64f96c54f change-id: 20250801-fix-receive_fd_replace-7fdd5ce6532d Best regards, -- Thomas Weißschuh <thomas.weissschuh(a)linutronix.de>

1 month, 1 week

2
1
0 0

[PATCH v3 0/2] fscontext: do not consume log entries when returning -EMSGSIZE

by Aleksa Sarai

Userspace generally expects APIs that return -EMSGSIZE to allow for them to adjust their buffer size and retry the operation. However, the fscontext log would previously clear the message even in the -EMSGSIZE case. Given that it is very cheap for us to check whether the buffer is too small before we remove the message from the ring buffer, let's just do that instead. While we're at it, refactor some fscontext_read() into a separate helper to make the ring buffer logic a bit easier to read. Fixes: 007ec26cdc9f ("vfs: Implement logging through fs_context") Signed-off-by: Aleksa Sarai <cyphar(a)cyphar.com> --- Changes in v3: - selftests: use EXPECT_STREQ() - v2: <https://lore.kernel.org/r/20250806-fscontext-log-cleanups-v2-0-88e9d34d142f…> Changes in v2: - Refactor message fetching to fetch_message_locked() which returns ERR_PTR() in error cases. [Al Viro] - v1: <https://lore.kernel.org/r/20250806-fscontext-log-cleanups-v1-0-880597d42a5a…> --- Aleksa Sarai (2): fscontext: do not consume log entries when returning -EMSGSIZE selftests/filesystems: add basic fscontext log tests fs/fsopen.c | 54 +++++----- tools/testing/selftests/filesystems/.gitignore | 1 + tools/testing/selftests/filesystems/Makefile | 2 +- tools/testing/selftests/filesystems/fclog.c | 130 +++++++++++++++++++++++++ 4 files changed, 162 insertions(+), 25 deletions(-) --- base-commit: 66639db858112bf6b0f76677f7517643d586e575 change-id: 20250806-fscontext-log-cleanups-50f0143674ae Best regards, -- Aleksa Sarai <cyphar(a)cyphar.com>

1 month, 1 week

3
6
0 0

[PATCH] media: v4l2-subdev: Fix alloc failure check in v4l2_subdev_call_state_try()

by Tomi Valkeinen

v4l2_subdev_call_state_try() macro allocates a subdev state with __v4l2_subdev_state_alloc(), but does not check the returned value. If __v4l2_subdev_state_alloc fails, it returns an ERR_PTR, and that would cause v4l2_subdev_call_state_try() to crash. Add proper error handling to v4l2_subdev_call_state_try(). Signed-off-by: Tomi Valkeinen <tomi.valkeinen(a)ideasonboard.com> Fixes: 982c0487185b ("media: subdev: Add v4l2_subdev_call_state_try() macro") Reported-by: Dan Carpenter <dan.carpenter(a)linaro.org> Closes: https://lore.kernel.org/all/aJTNtpDUbTz7eyJc%40stanley.mountain/ Cc: stable(a)vger.kernel.org --- include/media/v4l2-subdev.h | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index 5dcf4065708f..398b57461677 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -1962,19 +1962,23 @@ extern const struct v4l2_subdev_ops v4l2_subdev_call_wrappers; * * Note: only legacy non-MC drivers may need this macro. */ -#define v4l2_subdev_call_state_try(sd, o, f, args...) \ - ({ \ - int __result; \ - static struct lock_class_key __key; \ - const char *name = KBUILD_BASENAME \ - ":" __stringify(__LINE__) ":state->lock"; \ - struct v4l2_subdev_state *state = \ - __v4l2_subdev_state_alloc(sd, name, &__key); \ - v4l2_subdev_lock_state(state); \ - __result = v4l2_subdev_call(sd, o, f, state, ##args); \ - v4l2_subdev_unlock_state(state); \ - __v4l2_subdev_state_free(state); \ - __result; \ +#define v4l2_subdev_call_state_try(sd, o, f, args...) \ + ({ \ + int __result; \ + static struct lock_class_key __key; \ + const char *name = KBUILD_BASENAME \ + ":" __stringify(__LINE__) ":state->lock"; \ + struct v4l2_subdev_state *state = \ + __v4l2_subdev_state_alloc(sd, name, &__key); \ + if (IS_ERR(state)) { \ + __result = PTR_ERR(state); \ + } else { \ + v4l2_subdev_lock_state(state); \ + __result = v4l2_subdev_call(sd, o, f, state, ##args); \ + v4l2_subdev_unlock_state(state); \ + __v4l2_subdev_state_free(state); \ + } \ + __result; \ }) /** --- base-commit: d968e50b5c26642754492dea23cbd3592bde62d8 change-id: 20250808-fix-subdev-call-state-try-e724fa6907f8 Best regards, -- Tomi Valkeinen <tomi.valkeinen(a)ideasonboard.com>

1 month, 1 week

2
1
0 0

[PATCH 0/2] open_tree_attr: do not allow id-mapping changes without OPEN_TREE_CLONE

by Aleksa Sarai

As described in commit 7a54947e727b ('Merge patch series "fs: allow changing idmappings"'), open_tree_attr(2) was necessary in order to allow for a detached mount to be created and have its idmappings changed without the risk of any racing threads operating on it. For this reason, mount_setattr(2) still does not allow for id-mappings to be changed. However, there was a bug in commit 2462651ffa76 ("fs: allow changing idmappings") which allowed users to bypass this restriction by calling open_tree_attr(2) *without* OPEN_TREE_CLONE. can_idmap_mount() prevented this bug from allowing an attached mountpoint's id-mapping from being modified (thanks to an is_anon_ns() check), but this still allows for detached (but visible) mounts to have their be id-mapping changed. This risks the same UAF and locking issues as described in the merge commit, and was likely unintentional. For what it's worth, I found this while working on the open_tree_attr(2) man page, and was trying to figure out what open_tree_attr(2)'s behaviour was in the (slightly fruity) ~OPEN_TREE_CLONE case. Signed-off-by: Aleksa Sarai <cyphar(a)cyphar.com> --- Aleksa Sarai (2): open_tree_attr: do not allow id-mapping changes without OPEN_TREE_CLONE selftests/mount_setattr: add smoke tests for open_tree_attr(2) bug fs/namespace.c | 3 +- .../selftests/mount_setattr/mount_setattr_test.c | 77 ++++++++++++++++++---- 2 files changed, 66 insertions(+), 14 deletions(-) --- base-commit: 66639db858112bf6b0f76677f7517643d586e575 change-id: 20250808-open_tree_attr-bugfix-idmap-bb741166dc04 Best regards, -- Aleksa Sarai <cyphar(a)cyphar.com>

1 month, 1 week

2
2
0 0

[PATCH v2] usb: dwc3: Remove WARN_ON for device endpoint command timeouts

by Selvarasu Ganesan

This commit addresses a rarely observed endpoint command timeout which causes kernel panic due to warn when 'panic_on_warn' is enabled and unnecessary call trace prints when 'panic_on_warn' is disabled. It is seen during fast software-controlled connect/disconnect testcases. The following is one such endpoint command timeout that we observed: 1. Connect ======= ->dwc3_thread_interrupt ->dwc3_ep0_interrupt ->configfs_composite_setup ->composite_setup ->usb_ep_queue ->dwc3_gadget_ep0_queue ->__dwc3_gadget_ep0_queue ->__dwc3_ep0_do_control_data ->dwc3_send_gadget_ep_cmd 2. Disconnect ========== ->dwc3_thread_interrupt ->dwc3_gadget_disconnect_interrupt ->dwc3_ep0_reset_state ->dwc3_ep0_end_control_data ->dwc3_send_gadget_ep_cmd In the issue scenario, in Exynos platforms, we observed that control transfers for the previous connect have not yet been completed and end transfer command sent as a part of the disconnect sequence and processing of USB_ENDPOINT_HALT feature request from the host timeout. This maybe an expected scenario since the controller is processing EP commands sent as a part of the previous connect. It maybe better to remove WARN_ON in all places where device endpoint commands are sent to avoid unnecessary kernel panic due to warn. Cc: stable(a)vger.kernel.org Signed-off-by: Akash M <akash.m5(a)samsung.com> Signed-off-by: Selvarasu Ganesan <selvarasu.g(a)samsung.com> --- Changes in v2: - Removed the 'Fixes' tag from the commit message, as this patch does not contain a fix. - And Retained the 'stable' tag, as these changes are intended to be applied across all stable kernels. - Additionally, replaced 'dev_warn*' with 'dev_err*'." Link to v1: https://lore.kernel.org/all/20250807005638.thhsgjn73aaov2af@synopsys.com/ --- drivers/usb/dwc3/ep0.c | 20 ++++++++++++++++---- drivers/usb/dwc3/gadget.c | 10 ++++++++-- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 666ac432f52d..b4229aa13f37 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -288,7 +288,9 @@ void dwc3_ep0_out_start(struct dwc3 *dwc) dwc3_ep0_prepare_one_trb(dep, dwc->ep0_trb_addr, 8, DWC3_TRBCTL_CONTROL_SETUP, false); ret = dwc3_ep0_start_trans(dep); - WARN_ON(ret < 0); + if (ret < 0) + dev_err(dwc->dev, "ep0 out start transfer failed: %d\n", ret); + for (i = 2; i < DWC3_ENDPOINTS_NUM; i++) { struct dwc3_ep *dwc3_ep; @@ -1061,7 +1063,9 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, ret = dwc3_ep0_start_trans(dep); } - WARN_ON(ret < 0); + if (ret < 0) + dev_err(dwc->dev, + "ep0 data phase start transfer failed: %d\n", ret); } static int dwc3_ep0_start_control_status(struct dwc3_ep *dep) @@ -1078,7 +1082,12 @@ static int dwc3_ep0_start_control_status(struct dwc3_ep *dep) static void __dwc3_ep0_do_control_status(struct dwc3 *dwc, struct dwc3_ep *dep) { - WARN_ON(dwc3_ep0_start_control_status(dep)); + int ret; + + ret = dwc3_ep0_start_control_status(dep); + if (ret) + dev_err(dwc->dev, + "ep0 status phase start transfer failed: %d\n", ret); } static void dwc3_ep0_do_control_status(struct dwc3 *dwc, @@ -1121,7 +1130,10 @@ void dwc3_ep0_end_control_data(struct dwc3 *dwc, struct dwc3_ep *dep) cmd |= DWC3_DEPCMD_PARAM(dep->resource_index); memset(&params, 0, sizeof(params)); ret = dwc3_send_gadget_ep_cmd(dep, cmd, &params); - WARN_ON_ONCE(ret); + if (ret) + dev_err_ratelimited(dwc->dev, + "ep0 data phase end transfer failed: %d\n", ret); + dep->resource_index = 0; } diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 4a3e97e606d1..4a3d076c1015 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1772,7 +1772,11 @@ static int __dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool int dep->flags |= DWC3_EP_DELAY_STOP; return 0; } - WARN_ON_ONCE(ret); + + if (ret) + dev_err_ratelimited(dep->dwc->dev, + "end transfer failed: %d\n", ret); + dep->resource_index = 0; if (!interrupt) @@ -4039,7 +4043,9 @@ static void dwc3_clear_stall_all_ep(struct dwc3 *dwc) dep->flags &= ~DWC3_EP_STALL; ret = dwc3_send_clear_stall_ep_cmd(dep); - WARN_ON_ONCE(ret); + if (ret) + dev_err_ratelimited(dwc->dev, + "failed to clear STALL on %s\n", dep->name); } } -- 2.17.1

1 month, 1 week

3
7
0 0

[REGRESSION] vfio gpu passthrough stopped working

by cat

#regzbot introduced: v6.12.34..v6.12.35 After upgrade to kernel 6.12.35, vfio passthrough for my GPU has stopped working within a windows VM, it sees device in device manager but reports that it did not start correctly. I compared lspci logs in the vm before and after upgrade to 6.12.35, and here are the changes I noticed: - the reported link speed for the passthrough GPU has changed from 2.5 to 16GT/s - the passthrough GPU has lost it's 'BusMaster' and MSI enable flags - latency measurement feature appeared These entries also began appearing within the vm in dmesg when host kernel is 6.12.35 or above: [ 1.963177] nouveau 0000:01:00.0: sec2(gsp): mbox 1c503000 00000001 [ 1.963296] nouveau 0000:01:00.0: sec2(gsp):booter-load: boot failed: -5 ... [ 1.964580] nouveau 0000:01:00.0: gsp: init failed, -5 [ 1.964641] nouveau 0000:01:00.0: init failed with -5 [ 1.964681] nouveau: drm:00000000:00000080: init failed with -5 [ 1.964721] nouveau 0000:01:00.0: drm: Device allocation failed: -5 [ 1.966318] nouveau 0000:01:00.0: probe with driver nouveau failed with error -5 6.12.34 worked fine, and latest 6.12 LTS does not work either. I am using intel CPU and nvidia GPU (for passthrough, and as my GPU on linux system).

1 month, 1 week

3
5
0 0

BPF selftest: mptcp subtest failing

by Harshvardhan Jha

Hi there, I have explicitly disabled mptpcp by default on my custom kernel and this seems to be causing the test case to fail. Even after enabling mtpcp via sysctl command or adding an entry to /etc/sysctl.conf this fails. I don't think this test should be failing and should account for cases where mptcp has not been enabled by default? These are the test logs: $ sudo tools/testing/selftests/bpf/test_progs -t mptcp Can't find bpf_testmod.ko kernel module: -2 WARNING! Selftests relying on bpf_testmod.ko will be skipped. run_test:PASS:bpf_prog_attach 0 nsec run_test:PASS:connect to fd 0 nsec verify_tsk:PASS:bpf_map_lookup_elem 0 nsec verify_tsk:PASS:unexpected invoked count 0 nsec verify_tsk:PASS:unexpected is_mptcp 0 nsec test_base:PASS:run_test tcp 0 nsec (network_helpers.c:107: errno: Protocol not available) Failed to create server socket test_base:FAIL:start_mptcp_server unexpected start_mptcp_server: actual -1 < expected 0 #178/1 mptcp/base:FAIL test_mptcpify:PASS:test__join_cgroup 0 nsec create_netns:PASS:ip netns add mptcp_ns 0 nsec create_netns:PASS:ip -net mptcp_ns link set dev lo up 0 nsec test_mptcpify:PASS:create_netns 0 nsec run_mptcpify:PASS:skel_open_load 0 nsec run_mptcpify:PASS:skel_attach 0 nsec (network_helpers.c:107: errno: Protocol not available) Failed to create server socket run_mptcpify:FAIL:start_server unexpected start_server: actual -1 < expected 0 test_mptcpify:FAIL:run_mptcpify unexpected error: -5 (errno 92) #178/2 mptcp/mptcpify:FAIL #178 mptcp:FAIL All error logs: test_base:PASS:test__join_cgroup 0 nsec create_netns:PASS:ip netns add mptcp_ns 0 nsec create_netns:PASS:ip -net mptcp_ns link set dev lo up 0 nsec test_base:PASS:create_netns 0 nsec test_base:PASS:start_server 0 nsec run_test:PASS:skel_open_load 0 nsec run_test:PASS:skel_attach 0 nsec run_test:PASS:bpf_prog_attach 0 nsec run_test:PASS:connect to fd 0 nsec verify_tsk:PASS:bpf_map_lookup_elem 0 nsec verify_tsk:PASS:unexpected invoked count 0 nsec verify_tsk:PASS:unexpected is_mptcp 0 nsec test_base:PASS:run_test tcp 0 nsec (network_helpers.c:107: errno: Protocol not available) Failed to create server socket test_base:FAIL:start_mptcp_server unexpected start_mptcp_server: actual -1 < expected 0 #178/1 mptcp/base:FAIL test_mptcpify:PASS:test__join_cgroup 0 nsec create_netns:PASS:ip netns add mptcp_ns 0 nsec create_netns:PASS:ip -net mptcp_ns link set dev lo up 0 nsec test_mptcpify:PASS:create_netns 0 nsec run_mptcpify:PASS:skel_open_load 0 nsec run_mptcpify:PASS:skel_attach 0 nsec (network_helpers.c:107: errno: Protocol not available) Failed to create server socket run_mptcpify:FAIL:start_server unexpected start_server: actual -1 < expected 0 test_mptcpify:FAIL:run_mptcpify unexpected error: -5 (errno 92) #178/2 mptcp/mptcpify:FAIL #178 mptcp:FAIL Summary: 0/0 PASSED, 0 SKIPPED, 1 FAILED This is the custom patch I had applied on the LTS v6.12.36 kernel and tested it: diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index dd595d9b5e50c..bdcc4136e92ef 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -89,7 +89,7 @@ const char *mptcp_get_scheduler(const struct net *net) static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) { - pernet->mptcp_enabled = 1; + pernet->mptcp_enabled = 0; pernet->add_addr_timeout = TCP_RTO_MAX; pernet->blackhole_timeout = 3600; atomic_set(&pernet->active_disable_times, 0); -- Thanks & Regards, Harshvardhan

1 month, 1 week

2
3
0 0

[PATCH v2] mailbox: pcc: Add missed acpi_put_table() to fix memory leak

by Zhen Ni

In pcc_mbox_probe(), the PCCT table acquired via acpi_get_table() is only released in error paths but not in the success path. This leads to a permanent ACPI memory leak when the driver successfully initializes. Fixes: ce028702ddbc ("mailbox: pcc: Move bulk of PCCT parsing into pcc_mbox_probe") Cc: stable(a)vger.kernel.org Signed-off-by: Zhen Ni <zhen.ni(a)easystack.cn> --- Changes in v2: - Add tags of 'Fixes' and 'Cc' - Change goto target from out_put_pcct to e_nomem --- drivers/mailbox/pcc.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c index f6714c233f5a..b5b4e3665593 100644 --- a/drivers/mailbox/pcc.c +++ b/drivers/mailbox/pcc.c @@ -763,19 +763,19 @@ static int pcc_mbox_probe(struct platform_device *pdev) GFP_KERNEL); if (!pcc_mbox_channels) { rc = -ENOMEM; - goto err; + goto e_nomem; } chan_info = devm_kcalloc(dev, count, sizeof(*chan_info), GFP_KERNEL); if (!chan_info) { rc = -ENOMEM; - goto err; + goto e_nomem; } pcc_mbox_ctrl = devm_kzalloc(dev, sizeof(*pcc_mbox_ctrl), GFP_KERNEL); if (!pcc_mbox_ctrl) { rc = -ENOMEM; - goto err; + goto e_nomem; } /* Point to the first PCC subspace entry */ @@ -796,17 +796,17 @@ static int pcc_mbox_probe(struct platform_device *pdev) !pcc_mbox_ctrl->txdone_irq) { pr_err("Platform Interrupt flag must be set to 1"); rc = -EINVAL; - goto err; + goto e_nomem; } if (pcc_mbox_ctrl->txdone_irq) { rc = pcc_parse_subspace_irq(pchan, pcct_entry); if (rc < 0) - goto err; + goto e_nomem; } rc = pcc_parse_subspace_db_reg(pchan, pcct_entry); if (rc < 0) - goto err; + goto e_nomem; pcc_parse_subspace_shmem(pchan, pcct_entry); @@ -827,9 +827,8 @@ static int pcc_mbox_probe(struct platform_device *pdev) rc = mbox_controller_register(pcc_mbox_ctrl); if (rc) pr_err("Err registering PCC as Mailbox controller: %d\n", rc); - else - return 0; -err: + +e_nomem: acpi_put_table(pcct_tbl); return rc; } -- 2.20.1

1 month, 1 week

6
12
0 0

[PATCH v2] mm: Fix possible deadlock in console_trylock_spinning

by Gu Bowen

Our syztester report the lockdep WARNING [1]. kmemleak_scan_thread() invokes scan_block() which may invoke a nomal printk() to print warning message. This can cause a deadlock in the scenario reported below: CPU0 CPU1 ---- ---- lock(kmemleak_lock); lock(&port->lock); lock(kmemleak_lock); lock(console_owner); To solve this problem, switch to printk_safe mode before printing warning message, this will redirect all printk()-s to a special per-CPU buffer, which will be flushed later from a safe context (irq work), and this deadlock problem can be avoided. The proper API to use should be printk_deferred_enter()/printk_deferred_exit() if we want to deferred the printing [2]. This patch also fix some similar cases that need to use the printk deferring [3]. [1] https://lore.kernel.org/all/20250730094914.566582-1-gubowen5@huawei.com/ [2] https://lore.kernel.org/all/5ca375cd-4a20-4807-b897-68b289626550@redhat.com/ [3] https://lore.kernel.org/all/aJCir5Wh362XzLSx@arm.com/ ==================== Cc: stable(a)vger.kernel.org # 5.10 Signed-off-by: Gu Bowen <gubowen5(a)huawei.com> --- mm/kmemleak.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 4801751cb6b6..381145dde54f 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -390,9 +390,15 @@ static struct kmemleak_object *lookup_object(unsigned long ptr, int alias) else if (object->pointer == ptr || alias) return object; else { + /* + * Printk deferring due to the kmemleak_lock held. + * This is done to avoid deadlock. + */ + printk_deferred_enter(); kmemleak_warn("Found object by alias at 0x%08lx\n", ptr); dump_object_info(object); + printk_deferred_exit(); break; } } @@ -433,8 +439,15 @@ static struct kmemleak_object *mem_pool_alloc(gfp_t gfp) list_del(&object->object_list); else if (mem_pool_free_count) object = &mem_pool[--mem_pool_free_count]; - else + else { + /* + * Printk deferring due to the kmemleak_lock held. + * This is done to avoid deadlock. + */ + printk_deferred_enter(); pr_warn_once("Memory pool empty, consider increasing CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE\n"); + printk_deferred_exit(); + } raw_spin_unlock_irqrestore(&kmemleak_lock, flags); return object; @@ -632,6 +645,11 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size, else if (parent->pointer + parent->size <= ptr) link = &parent->rb_node.rb_right; else { + /* + * Printk deferring due to the kmemleak_lock held. + * This is done to avoid deadlock. + */ + printk_deferred_enter(); kmemleak_stop("Cannot insert 0x%lx into the object search tree (overlaps existing)\n", ptr); /* @@ -639,6 +657,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size, * be freed while the kmemleak_lock is held. */ dump_object_info(parent); + printk_deferred_exit(); kmem_cache_free(object_cache, object); object = NULL; goto out; -- 2.25.1

1 month, 1 week

2
2
0 0

+ proc-proc_maps_open-allow-proc_mem_open-to-return-null.patch added to mm-hotfixes-unstable branch

by Andrew Morton

The patch titled Subject: proc: proc_maps_open allow proc_mem_open to return NULL has been added to the -mm mm-hotfixes-unstable branch. Its filename is proc-proc_maps_open-allow-proc_mem_open-to-return-null.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche… This patch will later appear in the mm-hotfixes-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Jialin Wang <wjl.linux(a)gmail.com> Subject: proc: proc_maps_open allow proc_mem_open to return NULL Date: Fri, 8 Aug 2025 00:54:55 +0800 commit 65c66047259f ("proc: fix the issue of proc_mem_open returning NULL") breaks `perf record -g -p PID` when profiling a kernel thread. The strace of `perf record -g -p $(pgrep kswapd0)` shows: openat(AT_FDCWD, "/proc/65/task/65/maps", O_RDONLY) = -1 ESRCH (No such process) This patch partially reverts the commit to fix it. Link: https://lkml.kernel.org/r/20250807165455.73656-1-wjl.linux@gmail.com Fixes: 65c66047259f ("proc: fix the issue of proc_mem_open returning NULL") Signed-off-by: Jialin Wang <wjl.linux(a)gmail.com> Cc: Penglei Jiang <superman.xpt(a)gmail.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- fs/proc/task_mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) --- a/fs/proc/task_mmu.c~proc-proc_maps_open-allow-proc_mem_open-to-return-null +++ a/fs/proc/task_mmu.c @@ -340,8 +340,8 @@ static int proc_maps_open(struct inode * priv->inode = inode; priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); - if (IS_ERR_OR_NULL(priv->mm)) { - int err = priv->mm ? PTR_ERR(priv->mm) : -ESRCH; + if (IS_ERR(priv->mm)) { + int err = PTR_ERR(priv->mm); seq_release_private(inode, file); return err; _ Patches currently in -mm which might be from wjl.linux(a)gmail.com are proc-proc_maps_open-allow-proc_mem_open-to-return-null.patch

1 month, 1 week

1
0
0 0

[to-be-updated] mm-fix-accounting-of-memmap-pages-for-early-sections.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: mm: fix accounting of memmap pages for early sections has been removed from the -mm tree. Its filename was mm-fix-accounting-of-memmap-pages-for-early-sections.patch This patch was dropped because an updated version will be issued ------------------------------------------------------ From: Sumanth Korikkar <sumanthk(a)linux.ibm.com> Subject: mm: fix accounting of memmap pages for early sections Date: Mon, 4 Aug 2025 10:40:15 +0200 memmap pages can be allocated either from the memblock (boot) allocator during early boot or from the buddy allocator. When these memmap pages are removed via arch_remove_memory(), the deallocation path depends on their source: * For pages from the buddy allocator, depopulate_section_memmap() is called, which also decrements the count of nr_memmap_pages. * For pages from the boot allocator, free_map_bootmem() is called. But it currently does not adjust the nr_memmap_boot_pages. To fix this inconsistency, update free_map_bootmem() to also decrement the nr_memmap_boot_pages count by invoking memmap_boot_pages_add(), mirroring how free_vmemmap_page() handles this for boot-allocated pages. This ensures correct tracking of memmap pages regardless of allocation source. Link: https://lkml.kernel.org/r/20250804084015.270570-1-sumanthk@linux.ibm.com Fixes: 15995a352474 ("mm: report per-page metadata information") Signed-off-by: Sumanth Korikkar <sumanthk(a)linux.ibm.com> Cc: Alexander Gordeev <agordeev(a)linux.ibm.com> Cc: David Hildenbrand <david(a)redhat.com> Cc: Gerald Schaefer <gerald.schaefer(a)linux.ibm.com> Cc: Heiko Carstens <hca(a)linux.ibm.com> Cc: Vasily Gorbik <gor(a)linux.ibm.com> Cc: David Rientjes <rientjes(a)google.com> Cc: Pasha Tatashin <pasha.tatashin(a)soleen.com> Cc: Sourav Panda <souravpanda(a)google.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/sparse.c | 1 + 1 file changed, 1 insertion(+) --- a/mm/sparse.c~mm-fix-accounting-of-memmap-pages-for-early-sections +++ a/mm/sparse.c @@ -688,6 +688,7 @@ static void free_map_bootmem(struct page unsigned long start = (unsigned long)memmap; unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION); + memmap_boot_pages_add(-1L * (DIV_ROUND_UP(end - start, PAGE_SIZE))); vmemmap_free(start, end, NULL); } _ Patches currently in -mm which might be from sumanthk(a)linux.ibm.com are mm-fix-accounting-of-memmap-pages.patch

1 month, 1 week

1
0
0 0

+ mm-fix-accounting-of-memmap-pages.patch added to mm-hotfixes-unstable branch

by Andrew Morton

The patch titled Subject: mm: fix accounting of memmap pages has been added to the -mm mm-hotfixes-unstable branch. Its filename is mm-fix-accounting-of-memmap-pages.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche… This patch will later appear in the mm-hotfixes-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Sumanth Korikkar <sumanthk(a)linux.ibm.com> Subject: mm: fix accounting of memmap pages Date: Thu, 7 Aug 2025 20:35:45 +0200 For !CONFIG_SPARSEMEM_VMEMMAP, memmap page accounting is currently done upfront in sparse_buffer_init(). However, sparse_buffer_alloc() may return NULL in failure scenario. Also, memmap pages may be allocated either from the memblock allocator during early boot or from the buddy allocator. When removed via arch_remove_memory(), accounting of memmap pages must reflect the original allocation source. To ensure correctness: * Account memmap pages after successful allocation in sparse_init_nid() and section_activate(). * Account memmap pages in section_deactivate() based on allocation source. Link: https://lkml.kernel.org/r/20250807183545.1424509-1-sumanthk@linux.ibm.com Fixes: 15995a352474 ("mm: report per-page metadata information") Signed-off-by: Sumanth Korikkar <sumanthk(a)linux.ibm.com> Suggested-by: David Hildenbrand <david(a)redhat.com> Cc: Alexander Gordeev <agordeev(a)linux.ibm.com> Cc: Gerald Schaefer <gerald.schaefer(a)linux.ibm.com> Cc: Heiko Carstens <hca(a)linux.ibm.com> Cc: Vasily Gorbik <gor(a)linux.ibm.com> Cc: Wei Yang <richard.weiyang(a)gmail.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/sparse-vmemmap.c | 5 ----- mm/sparse.c | 15 +++++++++------ 2 files changed, 9 insertions(+), 11 deletions(-) --- a/mm/sparse.c~mm-fix-accounting-of-memmap-pages +++ a/mm/sparse.c @@ -454,9 +454,6 @@ static void __init sparse_buffer_init(un */ sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, true); sparsemap_buf_end = sparsemap_buf + size; -#ifndef CONFIG_SPARSEMEM_VMEMMAP - memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE)); -#endif } static void __init sparse_buffer_fini(void) @@ -567,6 +564,8 @@ static void __init sparse_init_nid(int n sparse_buffer_fini(); goto failed; } + memmap_boot_pages_add(DIV_ROUND_UP(PAGES_PER_SECTION * sizeof(struct page), + PAGE_SIZE)); sparse_init_early_section(nid, map, pnum, 0); } } @@ -680,7 +679,6 @@ static void depopulate_section_memmap(un unsigned long start = (unsigned long) pfn_to_page(pfn); unsigned long end = start + nr_pages * sizeof(struct page); - memmap_pages_add(-1L * (DIV_ROUND_UP(end - start, PAGE_SIZE))); vmemmap_free(start, end, altmap); } static void free_map_bootmem(struct page *memmap) @@ -857,10 +855,14 @@ static void section_deactivate(unsigned * The memmap of early sections is always fully populated. See * section_activate() and pfn_valid() . */ - if (!section_is_early) + if (!section_is_early) { + memmap_pages_add(-1L * (DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE))); depopulate_section_memmap(pfn, nr_pages, altmap); - else if (memmap) + } else if (memmap) { + memmap_boot_pages_add(-1L * (DIV_ROUND_UP(nr_pages * sizeof(struct page), + PAGE_SIZE))); free_map_bootmem(memmap); + } if (empty) ms->section_mem_map = (unsigned long)NULL; @@ -905,6 +907,7 @@ static struct page * __meminit section_a section_deactivate(pfn, nr_pages, altmap); return ERR_PTR(-ENOMEM); } + memmap_pages_add(DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE)); return memmap; } --- a/mm/sparse-vmemmap.c~mm-fix-accounting-of-memmap-pages +++ a/mm/sparse-vmemmap.c @@ -578,11 +578,6 @@ struct page * __meminit __populate_secti if (r < 0) return NULL; - if (system_state == SYSTEM_BOOTING) - memmap_boot_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE)); - else - memmap_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE)); - return pfn_to_page(pfn); } _ Patches currently in -mm which might be from sumanthk(a)linux.ibm.com are mm-fix-accounting-of-memmap-pages-for-early-sections.patch mm-fix-accounting-of-memmap-pages.patch

1 month, 1 week

1
0
0 0

[PATCH v4 1/1] userfaultfd: fix a crash in UFFDIO_MOVE when PMD is a migration entry

by Suren Baghdasaryan

When UFFDIO_MOVE encounters a migration PMD entry, it proceeds with obtaining a folio and accessing it even though the entry is swp_entry_t. Add the missing check and let split_huge_pmd() handle migration entries. Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI") Reported-by: syzbot+b446dbe27035ef6bd6c2(a)syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/68794b5c.a70a0220.693ce.0050.GAE@google.com/ Signed-off-by: Suren Baghdasaryan <surenb(a)google.com> Reviewed-by: Peter Xu <peterx(a)redhat.com> Cc: stable(a)vger.kernel.org --- Changes since v3 [1] - Updated the title and changelog, per Peter Xu - Added Reviewed-by: per Peter Xu [1] https://lore.kernel.org/all/20250806154015.769024-1-surenb@google.com/ mm/userfaultfd.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 5431c9dd7fd7..116481606be8 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1826,13 +1826,16 @@ ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start, /* Check if we can move the pmd without splitting it. */ if (move_splits_huge_pmd(dst_addr, src_addr, src_start + len) || !pmd_none(dst_pmdval)) { - struct folio *folio = pmd_folio(*src_pmd); - - if (!folio || (!is_huge_zero_folio(folio) && - !PageAnonExclusive(&folio->page))) { - spin_unlock(ptl); - err = -EBUSY; - break; + /* Can be a migration entry */ + if (pmd_present(*src_pmd)) { + struct folio *folio = pmd_folio(*src_pmd); + + if (!folio || (!is_huge_zero_folio(folio) && + !PageAnonExclusive(&folio->page))) { + spin_unlock(ptl); + err = -EBUSY; + break; + } } spin_unlock(ptl); base-commit: 8e7e0c6d09502e44aa7a8fce0821e042a6ec03d1 -- 2.50.1.565.gc32cd1483b-goog

1 month, 1 week

2
5
0 0

[PATCH] ext4: don't try to clear the orphan_present feature block device is r/o

by Theodore Ts'o

When the file system is frozen in preparation for taking an LVM snapshot, the journal is checkpointed and if the orphan_file feature is enabled, and the orphan file is empty, we clear the orphan_present feature flag. But if there are pending inodes that need to be removed the orphan_present feature flag can't be cleared. The problem comes if the block device is read-only. In that case, we can't process the orphan inode list, so it is skipped in ext4_orphan_cleanup(). But then in ext4_mark_recovery_complete(), this results in the ext4 error "Orphan file not empty on read-only fs" firing and the file system mount is aborted. Fix this by clearing the needs_recovery flag in the block device is read-only. We do this after the call to ext4_load_and_init-journal() since there are some error checks need to be done in case the journal needs to be replayed and the block device is read-only, or if the block device containing the externa journal is read-only, etc. Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1108271 Cc: stable(a)vger.kernel.org Fixes: 02f310fcf47f ("ext4: Speedup ext4 orphan inode handling") Signed-off-by: Theodore Ts'o <tytso(a)mit.edu> --- fs/ext4/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c7d39da7e733..52a5f2b391fb 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5414,6 +5414,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) err = ext4_load_and_init_journal(sb, es, ctx); if (err) goto failed_mount3a; + if (bdev_read_only(sb->s_bdev)) + needs_recovery = 0; } else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) && ext4_has_feature_journal_needs_recovery(sb)) { ext4_msg(sb, KERN_ERR, "required journal recovery " -- 2.47.2

1 month, 1 week

1
0
0 0

[PATCH 01/19] drm/i915/lnl+/tc: Fix handling of an enabled/disconnected dp-alt sink

by Imre Deak

The TypeC PHY HW readout during driver loading and system resume determines which TypeC mode the PHY is in (legacy/DP-alt/TBT-alt) and whether the PHY is connected, based on the PHY's Owned and Ready flags. For the PHY to be in DP-alt or legacy mode and for the PHY to be in the connected state in these modes, both the Owned (set by the BIOS/driver) and the Ready (set by the HW) flags should be set. On ICL-MTL the HW kept the PHY's Ready flag set after the driver connected the PHY by acquiring the PHY ownership (by setting the Owned flag), until the driver disconnected the PHY by releasing the PHY ownership (by clearing the Owned flag). On LNL+ this has changed, in that the HW clears the Ready flag as soon as the sink gets disconnected, even if the PHY ownership was acquired already and hence the PHY is being used by the display. When inheriting the HW state from BIOS for a PHY connected in DP-alt mode on which the sink got disconnected - i.e. in a case where the sink was connected while BIOS/GOP was running and so the sink got enabled connecting the PHY, but the user disconnected the sink by the time the driver loaded - the PHY Owned but not Ready state must be accounted for on LNL+ according to the above. Do that by assuming on LNL+ that the PHY is connected in DP-alt mode whenever the PHY Owned flag is set, regardless of the PHY Ready flag. This fixes a problem on LNL+, where the PHY TypeC mode / connected state was detected incorrectly for a DP-alt sink, which got connected and then disconnected by the user in the above way. Cc: stable(a)vger.kernel.org # v6.8+ Reported-by: Charlton Lin <charlton.lin(a)intel.com> Tested-by: Khaled Almahallawy <khaled.almahallawy(a)intel.com> Signed-off-by: Imre Deak <imre.deak(a)intel.com> --- drivers/gpu/drm/i915/display/intel_tc.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 3bc57579fe53e..73a08bd84a70a 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -1226,14 +1226,18 @@ static void tc_phy_get_hw_state(struct intel_tc_port *tc) tc->phy_ops->get_hw_state(tc); } -static bool tc_phy_is_ready_and_owned(struct intel_tc_port *tc, - bool phy_is_ready, bool phy_is_owned) +static bool tc_phy_in_legacy_or_dp_alt_mode(struct intel_tc_port *tc, + bool phy_is_ready, bool phy_is_owned) { struct intel_display *display = to_intel_display(tc->dig_port); - drm_WARN_ON(display->drm, phy_is_owned && !phy_is_ready); + if (DISPLAY_VER(display) < 20) { + drm_WARN_ON(display->drm, phy_is_owned && !phy_is_ready); - return phy_is_ready && phy_is_owned; + return phy_is_ready && phy_is_owned; + } else { + return phy_is_owned; + } } static bool tc_phy_is_connected(struct intel_tc_port *tc, @@ -1244,7 +1248,7 @@ static bool tc_phy_is_connected(struct intel_tc_port *tc, bool phy_is_owned = tc_phy_is_owned(tc); bool is_connected; - if (tc_phy_is_ready_and_owned(tc, phy_is_ready, phy_is_owned)) + if (tc_phy_in_legacy_or_dp_alt_mode(tc, phy_is_ready, phy_is_owned)) is_connected = port_pll_type == ICL_PORT_DPLL_MG_PHY; else is_connected = port_pll_type == ICL_PORT_DPLL_DEFAULT; @@ -1352,7 +1356,7 @@ tc_phy_get_current_mode(struct intel_tc_port *tc) phy_is_ready = tc_phy_is_ready(tc); phy_is_owned = tc_phy_is_owned(tc); - if (!tc_phy_is_ready_and_owned(tc, phy_is_ready, phy_is_owned)) { + if (!tc_phy_in_legacy_or_dp_alt_mode(tc, phy_is_ready, phy_is_owned)) { mode = get_tc_mode_in_phy_not_owned_state(tc, live_mode); } else { drm_WARN_ON(display->drm, live_mode == TC_PORT_TBT_ALT); -- 2.49.1

1 month, 1 week

4
9
0 0

[PATCH] ACPI: APEI: EINJ: Fix einj_tab memleak in einj_probe()

by Zhen Ni

Fixes a permanent ACPI table memory leak when einj_probe() return 0 by adding acpi_put_table(). Remove the assignment rc = -ENOMEM to allow the function to propagate the actual return value. Fixes: e40213450b53 ("ACPI, APEI, EINJ support") Cc: stable(a)vger.kernel.org Signed-off-by: Zhen Ni <zhen.ni(a)easystack.cn> --- drivers/acpi/apei/einj-core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c index bf8dc92a373a..919caa819cf8 100644 --- a/drivers/acpi/apei/einj-core.c +++ b/drivers/acpi/apei/einj-core.c @@ -990,7 +990,7 @@ static bool setup_einjv2_component_files(void) static int __init einj_probe(struct faux_device *fdev) { - int rc; + int rc = 0; acpi_status status; struct apei_exec_context ctx; @@ -1015,7 +1015,6 @@ static int __init einj_probe(struct faux_device *fdev) if (rc) goto err_put_table; - rc = -ENOMEM; einj_debug_dir = debugfs_create_dir("einj", apei_get_debugfs_dir()); debugfs_create_file("available_error_type", S_IRUSR, einj_debug_dir, @@ -1078,7 +1077,7 @@ static int __init einj_probe(struct faux_device *fdev) pr_info("Error INJection is initialized.\n"); - return 0; + goto err_put_table; err_release: apei_resources_release(&einj_resources); -- 2.20.1

1 month, 1 week

2
1
0 0

[PATCH 05/19] drm/i915/icl+/tc: Convert AUX powered WARN to a debug message

by Imre Deak

The BIOS can leave the AUX power well enabled on an output, even if this isn't required (on platforms where the AUX power is only needed for an AUX access). This was observed at least on PTL. To avoid the WARN which would be triggered by this during the HW readout, convert the WARN to a debug message. Cc: stable(a)vger.kernel.org # v6.8+ Reported-by: Charlton Lin <charlton.lin(a)intel.com> Tested-by: Khaled Almahallawy <khaled.almahallawy(a)intel.com> Signed-off-by: Imre Deak <imre.deak(a)intel.com> --- drivers/gpu/drm/i915/display/intel_tc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 14042a64375e1..dec54cb0c8c63 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -1494,11 +1494,11 @@ static void intel_tc_port_reset_mode(struct intel_tc_port *tc, intel_display_power_flush_work(display); if (!intel_tc_cold_requires_aux_pw(dig_port)) { enum intel_display_power_domain aux_domain; - bool aux_powered; aux_domain = intel_aux_power_domain(dig_port); - aux_powered = intel_display_power_is_enabled(display, aux_domain); - drm_WARN_ON(display->drm, aux_powered); + if (intel_display_power_is_enabled(display, aux_domain)) + drm_dbg_kms(display->drm, "Port %s: AUX unexpectedly powered\n", + tc->port_name); } tc_phy_disconnect(tc); -- 2.49.1

1 month, 1 week

2
1
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror