commit e67e0eb6a98b261caf45048f9eb95fd7609289c0 upstream.
LoongArch's toolchain may change the default code model from normal to
medium. This is unnecessary for kernel, and generates some relocations
which cannot be handled by the module loader. So explicitly specify the
code model to normal in Makefile (for Rust 'normal' is 'small').
Cc: stable(a)vger.kernel.org
Tested-by: Haiyong Sun <sunhaiyong(a)loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai(a)loongson.cn>
---
V2: Add upstream commit id.
arch/loongarch/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index a74bbcb05ee1..f2966745b058 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -55,7 +55,7 @@ endif
ifdef CONFIG_64BIT
ld-emul = $(64bit-emul)
-cflags-y += -mabi=lp64s
+cflags-y += -mabi=lp64s -mcmodel=normal
endif
cflags-y += -pipe -msoft-float
--
2.47.1
From: Zi Yan <ziy(a)nvidia.com>
commit a259945efe6ada94087ef666e9b38f8e34ea34ba upstream.
nr_failed was missing the large folio splits from migrate_pages_batch()
and can cause a mismatch between migrate_pages() return value and the
number of not migrated pages, i.e., when the return value of
migrate_pages() is 0, there are still pages left in the from page list.
It will happen when a non-PMD THP large folio fails to migrate due to
-ENOMEM and is split successfully but not all the split pages are not
migrated, migrate_pages_batch() would return non-zero, but
astats.nr_thp_split = 0. nr_failed would be 0 and returned to the caller
of migrate_pages(), but the not migrated pages are left in the from page
list without being added back to LRU lists.
Fix it by adding a new nr_split counter for large folio splits and adding
it to nr_failed in migrate_page_sync() after migrate_pages_batch() is
done.
Link: https://lkml.kernel.org/r/20231017163129.2025214-1-zi.yan@sent.com
Fixes: 2ef7dbb26990 ("migrate_pages: try migrate in batch asynchronously firstly")
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
Acked-by: Huang Ying <ying.huang(a)intel.com>
Reviewed-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Huacai Chen <chenhuacai(a)loongson.cn>
---
V2: Add upstream commit id.
mm/migrate.c | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index 1004b1def1c2..4ed470885217 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1504,6 +1504,7 @@ struct migrate_pages_stats {
int nr_thp_succeeded; /* THP migrated successfully */
int nr_thp_failed; /* THP failed to be migrated */
int nr_thp_split; /* THP split before migrating */
+ int nr_split; /* Large folio (include THP) split before migrating */
};
/*
@@ -1623,6 +1624,7 @@ static int migrate_pages_batch(struct list_head *from,
int nr_retry_pages = 0;
int pass = 0;
bool is_thp = false;
+ bool is_large = false;
struct folio *folio, *folio2, *dst = NULL, *dst2;
int rc, rc_saved = 0, nr_pages;
LIST_HEAD(unmap_folios);
@@ -1638,7 +1640,8 @@ static int migrate_pages_batch(struct list_head *from,
nr_retry_pages = 0;
list_for_each_entry_safe(folio, folio2, from, lru) {
- is_thp = folio_test_large(folio) && folio_test_pmd_mappable(folio);
+ is_large = folio_test_large(folio);
+ is_thp = is_large && folio_test_pmd_mappable(folio);
nr_pages = folio_nr_pages(folio);
cond_resched();
@@ -1658,6 +1661,7 @@ static int migrate_pages_batch(struct list_head *from,
stats->nr_thp_failed++;
if (!try_split_folio(folio, split_folios)) {
stats->nr_thp_split++;
+ stats->nr_split++;
continue;
}
stats->nr_failed_pages += nr_pages;
@@ -1686,11 +1690,12 @@ static int migrate_pages_batch(struct list_head *from,
nr_failed++;
stats->nr_thp_failed += is_thp;
/* Large folio NUMA faulting doesn't split to retry. */
- if (folio_test_large(folio) && !nosplit) {
+ if (is_large && !nosplit) {
int ret = try_split_folio(folio, split_folios);
if (!ret) {
stats->nr_thp_split += is_thp;
+ stats->nr_split += is_large;
break;
} else if (reason == MR_LONGTERM_PIN &&
ret == -EAGAIN) {
@@ -1836,6 +1841,7 @@ static int migrate_pages_sync(struct list_head *from, new_folio_t get_new_folio,
stats->nr_succeeded += astats.nr_succeeded;
stats->nr_thp_succeeded += astats.nr_thp_succeeded;
stats->nr_thp_split += astats.nr_thp_split;
+ stats->nr_split += astats.nr_split;
if (rc < 0) {
stats->nr_failed_pages += astats.nr_failed_pages;
stats->nr_thp_failed += astats.nr_thp_failed;
@@ -1843,7 +1849,11 @@ static int migrate_pages_sync(struct list_head *from, new_folio_t get_new_folio,
return rc;
}
stats->nr_thp_failed += astats.nr_thp_split;
- nr_failed += astats.nr_thp_split;
+ /*
+ * Do not count rc, as pages will be retried below.
+ * Count nr_split only, since it includes nr_thp_split.
+ */
+ nr_failed += astats.nr_split;
/*
* Fall back to migrate all failed folios one by one synchronously. All
* failed folios except split THPs will be retried, so their failure
--
2.47.1
From: Baokun Li <libaokun1(a)huawei.com>
[ Upstream commit b4b4fda34e535756f9e774fb2d09c4537b7dfd1c ]
In the following concurrency we will access the uninitialized rs->lock:
ext4_fill_super
ext4_register_sysfs
// sysfs registered msg_ratelimit_interval_ms
// Other processes modify rs->interval to
// non-zero via msg_ratelimit_interval_ms
ext4_orphan_cleanup
ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
__ext4_msg
___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state)
if (!rs->interval) // do nothing if interval is 0
return 1;
raw_spin_trylock_irqsave(&rs->lock, flags)
raw_spin_trylock(lock)
_raw_spin_trylock
__raw_spin_trylock
spin_acquire(&lock->dep_map, 0, 1, _RET_IP_)
lock_acquire
__lock_acquire
register_lock_class
assign_lock_key
dump_stack();
ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
raw_spin_lock_init(&rs->lock);
// init rs->lock here
and get the following dump_stack:
=========================================================
INFO: trying to register non-static key.
The code is fine but needs lockdep annotation, or maybe
you didn't initialize this object before use?
turning off the locking correctness validator.
CPU: 12 PID: 753 Comm: mount Tainted: G E 6.7.0-rc6-next-20231222 #504
[...]
Call Trace:
dump_stack_lvl+0xc5/0x170
dump_stack+0x18/0x30
register_lock_class+0x740/0x7c0
__lock_acquire+0x69/0x13a0
lock_acquire+0x120/0x450
_raw_spin_trylock+0x98/0xd0
___ratelimit+0xf6/0x220
__ext4_msg+0x7f/0x160 [ext4]
ext4_orphan_cleanup+0x665/0x740 [ext4]
__ext4_fill_super+0x21ea/0x2b10 [ext4]
ext4_fill_super+0x14d/0x360 [ext4]
[...]
=========================================================
Normally interval is 0 until s_msg_ratelimit_state is initialized, so
___ratelimit() does nothing. But registering sysfs precedes initializing
rs->lock, so it is possible to change rs->interval to a non-zero value
via the msg_ratelimit_interval_ms interface of sysfs while rs->lock is
uninitialized, and then a call to ext4_msg triggers the problem by
accessing an uninitialized rs->lock. Therefore register sysfs after all
initializations are complete to avoid such problems.
Signed-off-by: Baokun Li <libaokun1(a)huawei.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Link: https://lore.kernel.org/r/20240102133730.1098120-1-libaokun1@huawei.com
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
[Minor context change fixed.]
Signed-off-by: Bin Lan <bin.lan.cn(a)windriver.com>
Signed-off-by: He Zhe <zhe.he(a)windriver.com>
---
Build test passed.
---
fs/ext4/super.c | 22 ++++++++++------------
1 file changed, 10 insertions(+), 12 deletions(-)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7f0231b34905..8528f61854ab 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -5496,19 +5496,15 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
if (err)
goto failed_mount6;
- err = ext4_register_sysfs(sb);
- if (err)
- goto failed_mount7;
-
err = ext4_init_orphan_info(sb);
if (err)
- goto failed_mount8;
+ goto failed_mount7;
#ifdef CONFIG_QUOTA
/* Enable quota usage during mount. */
if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) {
err = ext4_enable_quotas(sb);
if (err)
- goto failed_mount9;
+ goto failed_mount8;
}
#endif /* CONFIG_QUOTA */
@@ -5534,7 +5530,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
ext4_msg(sb, KERN_INFO, "recovery complete");
err = ext4_mark_recovery_complete(sb, es);
if (err)
- goto failed_mount10;
+ goto failed_mount9;
}
if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev))
@@ -5551,15 +5547,17 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
atomic_set(&sbi->s_warning_count, 0);
atomic_set(&sbi->s_msg_count, 0);
+ /* Register sysfs after all initializations are complete. */
+ err = ext4_register_sysfs(sb);
+ if (err)
+ goto failed_mount9;
+
return 0;
-failed_mount10:
+failed_mount9:
ext4_quota_off_umount(sb);
-failed_mount9: __maybe_unused
+failed_mount8: __maybe_unused
ext4_release_orphan_info(sb);
-failed_mount8:
- ext4_unregister_sysfs(sb);
- kobject_put(&sbi->s_kobj);
failed_mount7:
ext4_unregister_li_request(sb);
failed_mount6:
--
2.34.1
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 94cff94634e506a4a44684bee1875d2dbf782722
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025051258-washbowl-alongside-de3d@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 94cff94634e506a4a44684bee1875d2dbf782722 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Date: Fri, 4 Apr 2025 15:31:16 +0200
Subject: [PATCH] clocksource/i8253: Use raw_spinlock_irqsave() in
clockevent_i8253_disable()
On x86 during boot, clockevent_i8253_disable() can be invoked via
x86_late_time_init -> hpet_time_init() -> pit_timer_init() which happens
with enabled interrupts.
If some of the old i8253 hardware is actually used then lockdep will notice
that i8253_lock is used in hard interrupt context. This causes lockdep to
complain because it observed the lock being acquired with interrupts
enabled and in hard interrupt context.
Make clockevent_i8253_disable() acquire the lock with
raw_spinlock_irqsave() to cure this.
[ tglx: Massage change log and use guard() ]
Fixes: c8c4076723dac ("x86/timer: Skip PIT initialization on modern chipsets")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/20250404133116.p-XRWJXf@linutronix.de
diff --git a/drivers/clocksource/i8253.c b/drivers/clocksource/i8253.c
index 39f7c2d736d1..b603c25f3dfa 100644
--- a/drivers/clocksource/i8253.c
+++ b/drivers/clocksource/i8253.c
@@ -103,7 +103,7 @@ int __init clocksource_i8253_init(void)
#ifdef CONFIG_CLKEVT_I8253
void clockevent_i8253_disable(void)
{
- raw_spin_lock(&i8253_lock);
+ guard(raw_spinlock_irqsave)(&i8253_lock);
/*
* Writing the MODE register should stop the counter, according to
@@ -132,8 +132,6 @@ void clockevent_i8253_disable(void)
outb_p(0, PIT_CH0);
outb_p(0x30, PIT_MODE);
-
- raw_spin_unlock(&i8253_lock);
}
static int pit_shutdown(struct clock_event_device *evt)
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x ab00ddd802f80e31fc9639c652d736fe3913feae
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025051245-jailbreak-unlinked-27ec@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ab00ddd802f80e31fc9639c652d736fe3913feae Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang(a)linux.alibaba.com>
Date: Wed, 23 Apr 2025 18:36:45 +0800
Subject: [PATCH] selftests/mm: compaction_test: support platform with huge
mount of memory
When running mm selftest to verify mm patches, 'compaction_test' case
failed on an x86 server with 1TB memory. And the root cause is that it
has too much free memory than what the test supports.
The test case tries to allocate 100000 huge pages, which is about 200 GB
for that x86 server, and when it succeeds, it expects it's large than 1/3
of 80% of the free memory in system. This logic only works for platform
with 750 GB ( 200 / (1/3) / 80% ) or less free memory, and may raise false
alarm for others.
Fix it by changing the fixed page number to self-adjustable number
according to the real number of free memory.
Link: https://lkml.kernel.org/r/20250423103645.2758-1-feng.tang@linux.alibaba.com
Fixes: bd67d5c15cc1 ("Test compaction of mlocked memory")
Signed-off-by: Feng Tang <feng.tang(a)linux.alibaba.com>
Acked-by: Dev Jain <dev.jain(a)arm.com>
Reviewed-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Tested-by: Baolin Wang <baolin.wang(a)inux.alibaba.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Sri Jayaramappa <sjayaram(a)akamai.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c
index 2c3a0eb6b22d..9bc4591c7b16 100644
--- a/tools/testing/selftests/mm/compaction_test.c
+++ b/tools/testing/selftests/mm/compaction_test.c
@@ -90,6 +90,8 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size,
int compaction_index = 0;
char nr_hugepages[20] = {0};
char init_nr_hugepages[24] = {0};
+ char target_nr_hugepages[24] = {0};
+ int slen;
snprintf(init_nr_hugepages, sizeof(init_nr_hugepages),
"%lu", initial_nr_hugepages);
@@ -106,11 +108,18 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size,
goto out;
}
- /* Request a large number of huge pages. The Kernel will allocate
- as much as it can */
- if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) {
- ksft_print_msg("Failed to write 100000 to /proc/sys/vm/nr_hugepages: %s\n",
- strerror(errno));
+ /*
+ * Request huge pages for about half of the free memory. The Kernel
+ * will allocate as much as it can, and we expect it will get at least 1/3
+ */
+ nr_hugepages_ul = mem_free / hugepage_size / 2;
+ snprintf(target_nr_hugepages, sizeof(target_nr_hugepages),
+ "%lu", nr_hugepages_ul);
+
+ slen = strlen(target_nr_hugepages);
+ if (write(fd, target_nr_hugepages, slen) != slen) {
+ ksft_print_msg("Failed to write %lu to /proc/sys/vm/nr_hugepages: %s\n",
+ nr_hugepages_ul, strerror(errno));
goto close_fd;
}
V2: not to add extra read-back in vcn_v4_0_5_start as there is a
read-back already. New comment for better understanding.
On VCN v4.0.5 there is a race condition where the WPTR is not
updated after starting from idle when doorbell is used. The read-back
of regVCN_RB1_DB_CTRL register after written is to ensure the
doorbell_index is updated before it can work properly.
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12528
Cc: stable(a)vger.kernel.org
Signed-off-by: David (Ming Qiang) Wu <David.Wu3(a)amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello(a)amd.com>
Tested-by: Mario Limonciello <mario.limonciello(a)amd.com>
---
drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
index ed00d35039c13..e55b76d71367d 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
@@ -1034,6 +1034,10 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst,
ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
VCN_RB1_DB_CTRL__EN_MASK);
+ /* Keeping one read-back to ensure all register writes are done, otherwise
+ * it may introduce race conditions */
+ RREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL);
+
return 0;
}
--
2.34.1
From: Ashish Kalra <ashish.kalra(a)amd.com>
When the shared pages are being made private during kdump preparation
there are additional checks to handle shared GHCB pages.
These additional checks include handling the case of GHCB page being
contained within a huge page.
The check for handling the case of GHCB contained within a huge
page incorrectly skips a page just below the GHCB page from being
transitioned back to private during kdump preparation.
This skipped page causes a 0x404 #VC exception when it is accessed
later while dumping guest memory during vmcore generation via kdump.
Correct the range to be checked for GHCB contained in a huge page.
Also ensure that the skipped huge page containing the GHCB page is
transitioned back to private by applying the correct address mask
later when changing GHCBs to private at end of kdump preparation.
Fixes: 3074152e56c9 ("x86/sev: Convert shared memory back to private on kexec")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ashish Kalra <ashish.kalra(a)amd.com>
---
arch/x86/coco/sev/core.c | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
index d35fec7b164a..30b74e4e4e88 100644
--- a/arch/x86/coco/sev/core.c
+++ b/arch/x86/coco/sev/core.c
@@ -1019,7 +1019,8 @@ static void unshare_all_memory(void)
data = per_cpu(runtime_data, cpu);
ghcb = (unsigned long)&data->ghcb_page;
- if (addr <= ghcb && ghcb <= addr + size) {
+ /* Handle the case of a huge page containing the GHCB page */
+ if (addr <= ghcb && ghcb < addr + size) {
skipped_addr = true;
break;
}
@@ -1131,8 +1132,8 @@ static void shutdown_all_aps(void)
void snp_kexec_finish(void)
{
struct sev_es_runtime_data *data;
+ unsigned long size, addr;
unsigned int level, cpu;
- unsigned long size;
struct ghcb *ghcb;
pte_t *pte;
@@ -1160,8 +1161,10 @@ void snp_kexec_finish(void)
ghcb = &data->ghcb_page;
pte = lookup_address((unsigned long)ghcb, &level);
size = page_level_size(level);
- set_pte_enc(pte, level, (void *)ghcb);
- snp_set_memory_private((unsigned long)ghcb, (size / PAGE_SIZE));
+ /* Handle the case of a huge page containing the GHCB page */
+ addr = (unsigned long)ghcb & page_level_mask(level);
+ set_pte_enc(pte, level, (void *)addr);
+ snp_set_memory_private(addr, (size / PAGE_SIZE));
}
}
--
2.34.1
On VCN v4.0.5 there is a race condition where the WPTR is not
updated after starting from idle when doorbell is used. The read-back
of regVCN_RB1_DB_CTRL register after written is to ensure the
doorbell_index is updated before it can work properly.
Link: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12528
Signed-off-by: David (Ming Qiang) Wu <David.Wu3(a)amd.com>
---
drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
index ed00d35039c1..d6be8b05d7a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
@@ -1033,6 +1033,8 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst,
WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL,
ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
VCN_RB1_DB_CTRL__EN_MASK);
+ /* Read DB_CTRL to flush the write DB_CTRL command. */
+ RREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL);
return 0;
}
@@ -1195,6 +1197,8 @@ static int vcn_v4_0_5_start(struct amdgpu_vcn_inst *vinst)
WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL,
ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
VCN_RB1_DB_CTRL__EN_MASK);
+ /* Read DB_CTRL to flush the write DB_CTRL command. */
+ RREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL);
WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr);
WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
--
2.49.0
The current HID bpf implementation assumes no output report/request will
go through it after hid_bpf_destroy_device() has been called. This leads
to a bug that unplugging certain types of HID devices causes a cleaned-
up SRCU to be accessed. The bug was previously a hidden failure until a
recent x86 percpu change [1] made it access not-present pages.
The bug will be triggered if the conditions below are met:
A) a device under the driver has some LEDs on
B) hid_ll_driver->request() is uninplemented (e.g., logitech-djreceiver)
If condition A is met, hidinput_led_worker() is always scheduled *after*
hid_bpf_destroy_device().
hid_destroy_device
` hid_bpf_destroy_device
` cleanup_srcu_struct(&hdev->bpf.srcu)
` hid_remove_device
` ...
` led_classdev_unregister
` led_trigger_set(led_cdev, NULL)
` led_set_brightness(led_cdev, LED_OFF)
` ...
` input_inject_event
` input_event_dispose
` hidinput_input_event
` schedule_work(&hid->led_work) [hidinput_led_worker]
This is fine when condition B is not met, where hidinput_led_worker()
calls hid_ll_driver->request(). This is the case for most HID drivers,
which implement it or use the generic one from usbhid. The driver itself
or an underlying driver will then abort processing the request.
Otherwise, hidinput_led_worker() tries hid_hw_output_report() and leads
to the bug.
hidinput_led_worker
` hid_hw_output_report
` dispatch_hid_bpf_output_report
` srcu_read_lock(&hdev->bpf.srcu)
` srcu_read_unlock(&hdev->bpf.srcu, idx)
The bug has existed since the introduction [2] of
dispatch_hid_bpf_output_report(). However, the same bug also exists in
dispatch_hid_bpf_raw_requests(), and I've reproduced (no visible effect
because of the lack of [1], but confirmed bpf.destroyed == 1) the bug
against the commit (i.e., the Fixes:) introducing the function. This is
because hidinput_led_worker() falls back to hid_hw_raw_request() when
hid_ll_driver->output_report() is uninplemented (e.g., logitech-
djreceiver).
hidinput_led_worker
` hid_hw_output_report: -ENOSYS
` hid_hw_raw_request
` dispatch_hid_bpf_raw_requests
` srcu_read_lock(&hdev->bpf.srcu)
` srcu_read_unlock(&hdev->bpf.srcu, idx)
Fix the issue by returning early in the two mentioned functions if
hid_bpf has been marked as destroyed. Though
dispatch_hid_bpf_device_event() handles input events, and there is no
evidence that it may be called after the destruction, the same check, as
a safety net, is also added to it to maintain the consistency among all
dispatch functions.
The impact of the bug on other architectures is unclear. Even if it acts
as a hidden failure, this is still dangerous because it corrupts
whatever is on the address calculated by SRCU. Thus, CC'ing the stable
list.
[1]: commit 9d7de2aa8b41 ("x86/percpu/64: Use relative percpu offsets")
[2]: commit 9286675a2aed ("HID: bpf: add HID-BPF hooks for
hid_hw_output_report")
Closes: https://lore.kernel.org/all/20250506145548.GGaBoi9Jzp3aeJizTR@fat_crate.loc…
Fixes: 8bd0488b5ea5 ("HID: bpf: add HID-BPF hooks for hid_hw_raw_requests")
Cc: stable(a)vger.kernel.org
Signed-off-by: Rong Zhang <i(a)rong.moe>
---
drivers/hid/bpf/hid_bpf_dispatch.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c
index 2e96ec6a3073..9a06f9b0e4ef 100644
--- a/drivers/hid/bpf/hid_bpf_dispatch.c
+++ b/drivers/hid/bpf/hid_bpf_dispatch.c
@@ -38,6 +38,9 @@ dispatch_hid_bpf_device_event(struct hid_device *hdev, enum hid_report_type type
struct hid_bpf_ops *e;
int ret;
+ if (unlikely(hdev->bpf.destroyed))
+ return ERR_PTR(-ENODEV);
+
if (type >= HID_REPORT_TYPES)
return ERR_PTR(-EINVAL);
@@ -93,6 +96,9 @@ int dispatch_hid_bpf_raw_requests(struct hid_device *hdev,
struct hid_bpf_ops *e;
int ret, idx;
+ if (unlikely(hdev->bpf.destroyed))
+ return -ENODEV;
+
if (rtype >= HID_REPORT_TYPES)
return -EINVAL;
@@ -130,6 +136,9 @@ int dispatch_hid_bpf_output_report(struct hid_device *hdev,
struct hid_bpf_ops *e;
int ret, idx;
+ if (unlikely(hdev->bpf.destroyed))
+ return -ENODEV;
+
idx = srcu_read_lock(&hdev->bpf.srcu);
list_for_each_entry_srcu(e, &hdev->bpf.prog_list, list,
srcu_read_lock_held(&hdev->bpf.srcu)) {
base-commit: 82f2b0b97b36ee3fcddf0f0780a9a0825d52fec3
--
2.49.0
There is a long standing bug which causes I2C communication not to
work on the Armada 3700 based boards. This small series restores
that functionality.
Signed-off-by: Gabor Juhos <j4g8y7(a)gmail.com>
Signed-off-by: Imre Kaloz <kaloz(a)openwrt.org>
---
Gabor Juhos (3):
i2c: add init_recovery() callback
i2c: pxa: prevent calling of the generic recovery init code
i2c: pxa: handle 'Early Bus Busy' condition on Armada 3700
drivers/i2c/busses/i2c-pxa.c | 25 +++++++++++++++++++------
drivers/i2c/i2c-core-base.c | 8 +++++++-
include/linux/i2c.h | 4 ++++
3 files changed, 30 insertions(+), 7 deletions(-)
---
base-commit: 92a09c47464d040866cf2b4cd052bc60555185fb
change-id: 20250510-i2c-pxa-fix-i2c-communication-3e6de1e3d0c6
Best regards,
--
Gabor Juhos <j4g8y7(a)gmail.com>
With an unset CONFIG_CPU_MITIGATIONS, or more precisely with
CONFIG_MITIGATION_ITS,
I get a compilation error:
ld: arch/x86/net/bpf_jit_comp.o: in function `emit_indirect_jump':
bpf_jit_comp.c:(.text+0x8cb): undefined reference to
`__x86_indirect_its_thunk_array'
make[2]: *** [scripts/Makefile.vmlinux:34: vmlinux] Error 1
make[1]: *** [/usr/src/linux/Makefile:1182: vmlinux] Error 2
make: *** [Makefile:224: __sub-make] Error 2
From: Emanuele Ghidoli <emanuele.ghidoli(a)toradex.com>
If an input changes state during wake-up and is used as an interrupt
source, the IRQ handler reads the volatile input register to clear the
interrupt mask and deassert the IRQ line. However, the IRQ handler is
triggered before access to the register is granted, causing the read
operation to fail.
As a result, the IRQ handler enters a loop, repeatedly printing the
"failed reading register" message, until `pca953x_resume()` is eventually
called, which restores the driver context and enables access to
registers.
Fix by disabling the IRQ line before entering suspend mode, and
re-enabling it after the driver context is restored in `pca953x_resume()`.
An IRQ can be disabled with disable_irq() and still wake the system as
long as the IRQ has wake enabled, so the wake-up functionality is
preserved.
Fixes: b76574300504 ("gpio: pca953x: Restore registers after suspend/resume cycle")
Cc: stable(a)vger.kernel.org
Signed-off-by: Emanuele Ghidoli <emanuele.ghidoli(a)toradex.com>
Signed-off-by: Francesco Dolcini <francesco.dolcini(a)toradex.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
Tested-by: Geert Uytterhoeven <geert+renesas(a)glider.be>
---
v2 -> v3
- add r-b Andy, t-b Geert
- fixed commit message
v1 -> v2
- Instead of calling PM ops with disabled interrupts, just disable the
irq while going in suspend and re-enable it after restoring the
context in resume function.
---
drivers/gpio/gpio-pca953x.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index ab2c0fd428fb..b852e4997629 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -1226,6 +1226,8 @@ static int pca953x_restore_context(struct pca953x_chip *chip)
guard(mutex)(&chip->i2c_lock);
+ if (chip->client->irq > 0)
+ enable_irq(chip->client->irq);
regcache_cache_only(chip->regmap, false);
regcache_mark_dirty(chip->regmap);
ret = pca953x_regcache_sync(chip);
@@ -1238,6 +1240,10 @@ static int pca953x_restore_context(struct pca953x_chip *chip)
static void pca953x_save_context(struct pca953x_chip *chip)
{
guard(mutex)(&chip->i2c_lock);
+
+ /* Disable IRQ to prevent early triggering while regmap "cache only" is on */
+ if (chip->client->irq > 0)
+ disable_irq(chip->client->irq);
regcache_cache_only(chip->regmap, true);
}
--
2.39.5
A user reported on the Arch Linux Forums that their device is emitting
the following message in the kernel journal, which is fixed by adding
the quirk as submitted in this patch:
> kernel: usb 1-2: current rate 8436480 is different from the runtime rate 48000
There also is an entry for this product line added long time ago.
Their specific device has the following ID:
$ lsusb | grep Audio
Bus 001 Device 002: ID 1101:0003 EasyPass Industrial Co., Ltd Audioengine D1
Link: https://bbs.archlinux.org/viewtopic.php?id=305494
Fixes: 93f9d1a4ac593 ("ALSA: usb-audio: Apply sample rate quirk for Audioengine D1")
Cc: stable(a)vger.kernel.org
Signed-off-by: Christian Heusel <christian(a)heusel.eu>
---
sound/usb/quirks.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 9112313a9dbc005d8ab6076a6f2f4b0c0cecc64f..eb192834db68ca599770055cb563201e648f20ba 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -2250,6 +2250,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
QUIRK_FLAG_FIXED_RATE),
DEVICE_FLG(0x0fd9, 0x0008, /* Hauppauge HVR-950Q */
QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER),
+ DEVICE_FLG(0x1101, 0x0003, /* Audioengine D1 */
+ QUIRK_FLAG_GET_SAMPLE_RATE),
DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */
QUIRK_FLAG_GET_SAMPLE_RATE | QUIRK_FLAG_MIC_RES_16),
DEVICE_FLG(0x1395, 0x740a, /* Sennheiser DECT */
---
base-commit: 82f2b0b97b36ee3fcddf0f0780a9a0825d52fec3
change-id: 20250512-audioengine-quirk-addition-718e6c86a2cc
Best regards,
--
Christian Heusel <christian(a)heusel.eu>
This patch series addresses a regression in Energy Efficient Ethernet
(EEE) handling for KSZ switches with integrated PHYs, introduced in
kernel v6.9 by commit fe0d4fd9285e ("net: phy: Keep track of EEE
configuration").
The first patch updates the DSA driver to allow phylink to properly
manage PHY EEE configuration. Since integrated PHYs handle LPI
internally and ports without integrated PHYs do not document MAC-level
LPI support, dummy MAC LPI callbacks are provided.
The second patch removes outdated EEE workarounds from the micrel PHY
driver, as they are no longer needed with correct phylink handling.
This series addresses the regression for mainline and kernels starting
from v6.14. It is not easily possible to fully fix older kernels due
to missing infrastructure changes.
Tested on KSZ9893 hardware.
Oleksij Rempel (2):
net: dsa: microchip: let phylink manage PHY EEE configuration on KSZ
switches
net: phy: micrel: remove KSZ9477 EEE quirks now handled by phylink
drivers/net/dsa/microchip/ksz_common.c | 135 ++++++++++++++++++++-----
drivers/net/phy/micrel.c | 7 --
include/linux/micrel_phy.h | 1 -
3 files changed, 107 insertions(+), 36 deletions(-)
--
2.39.5
While the set_msix() callback function in pcie-cadence-ep writes the
Table Size field correctly (N-1), the calculation of the PBA offset
is wrong because it calculates space for (N-1) entries instead of N.
This results in e.g. the following error when using QEMU with PCI
passthrough on a device which relies on the PCI endpoint subsystem:
failed to add PCI capability 0x11[0x50]@0xb0: table & pba overlap, or they don't fit in BARs, or don't align
Fix the calculation of PBA offset in the MSI-X capability.
Cc: stable(a)vger.kernel.org
Fixes: 3ef5d16f50f8 ("PCI: cadence: Add MSI-X support to Endpoint driver")
Reviewed-by: Wilfred Mallawa <wilfred.mallawa(a)wdc.com>
Reviewed-by: Damien Le Moal <dlemoal(a)kernel.org>
Signed-off-by: Niklas Cassel <cassel(a)kernel.org>
---
drivers/pci/controller/cadence/pcie-cadence-ep.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/pci/controller/cadence/pcie-cadence-ep.c b/drivers/pci/controller/cadence/pcie-cadence-ep.c
index 599ec4b1223e..112ae200b393 100644
--- a/drivers/pci/controller/cadence/pcie-cadence-ep.c
+++ b/drivers/pci/controller/cadence/pcie-cadence-ep.c
@@ -292,13 +292,14 @@ static int cdns_pcie_ep_set_msix(struct pci_epc *epc, u8 fn, u8 vfn,
struct cdns_pcie *pcie = &ep->pcie;
u32 cap = CDNS_PCIE_EP_FUNC_MSIX_CAP_OFFSET;
u32 val, reg;
+ u16 actual_interrupts = interrupts + 1;
fn = cdns_pcie_get_fn_from_vfn(pcie, fn, vfn);
reg = cap + PCI_MSIX_FLAGS;
val = cdns_pcie_ep_fn_readw(pcie, fn, reg);
val &= ~PCI_MSIX_FLAGS_QSIZE;
- val |= interrupts;
+ val |= interrupts; /* 0's based value */
cdns_pcie_ep_fn_writew(pcie, fn, reg, val);
/* Set MSI-X BAR and offset */
@@ -308,7 +309,7 @@ static int cdns_pcie_ep_set_msix(struct pci_epc *epc, u8 fn, u8 vfn,
/* Set PBA BAR and offset. BAR must match MSI-X BAR */
reg = cap + PCI_MSIX_PBA;
- val = (offset + (interrupts * PCI_MSIX_ENTRY_SIZE)) | bir;
+ val = (offset + (actual_interrupts * PCI_MSIX_ENTRY_SIZE)) | bir;
cdns_pcie_ep_fn_writel(pcie, fn, reg, val);
return 0;
--
2.49.0
While the set_msix() callback function in pcie-designware-ep writes the
Table Size field correctly (N-1), the calculation of the PBA offset
is wrong because it calculates space for (N-1) entries instead of N.
This results in e.g. the following error when using QEMU with PCI
passthrough on a device which relies on the PCI endpoint subsystem:
failed to add PCI capability 0x11[0x50]@0xb0: table & pba overlap, or they don't fit in BARs, or don't align
Fix the calculation of PBA offset in the MSI-X capability.
Cc: stable(a)vger.kernel.org
Fixes: 83153d9f36e2 ("PCI: endpoint: Fix ->set_msix() to take BIR and offset as arguments")
Reviewed-by: Wilfred Mallawa <wilfred.mallawa(a)wdc.com>
Reviewed-by: Damien Le Moal <dlemoal(a)kernel.org>
Signed-off-by: Niklas Cassel <cassel(a)kernel.org>
---
drivers/pci/controller/dwc/pcie-designware-ep.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c
index 1a0bf9341542..24026f3f3413 100644
--- a/drivers/pci/controller/dwc/pcie-designware-ep.c
+++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
@@ -585,6 +585,7 @@ static int dw_pcie_ep_set_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
struct dw_pcie_ep_func *ep_func;
u32 val, reg;
+ u16 actual_interrupts = interrupts + 1;
ep_func = dw_pcie_ep_get_func_from_ep(ep, func_no);
if (!ep_func || !ep_func->msix_cap)
@@ -595,7 +596,7 @@ static int dw_pcie_ep_set_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
reg = ep_func->msix_cap + PCI_MSIX_FLAGS;
val = dw_pcie_ep_readw_dbi(ep, func_no, reg);
val &= ~PCI_MSIX_FLAGS_QSIZE;
- val |= interrupts;
+ val |= interrupts; /* 0's based value */
dw_pcie_writew_dbi(pci, reg, val);
reg = ep_func->msix_cap + PCI_MSIX_TABLE;
@@ -603,7 +604,7 @@ static int dw_pcie_ep_set_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
dw_pcie_ep_writel_dbi(ep, func_no, reg, val);
reg = ep_func->msix_cap + PCI_MSIX_PBA;
- val = (offset + (interrupts * PCI_MSIX_ENTRY_SIZE)) | bir;
+ val = (offset + (actual_interrupts * PCI_MSIX_ENTRY_SIZE)) | bir;
dw_pcie_ep_writel_dbi(ep, func_no, reg, val);
dw_pcie_dbi_ro_wr_dis(pci);
--
2.49.0
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 94cff94634e506a4a44684bee1875d2dbf782722
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025051256-encrust-scribe-9996@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 94cff94634e506a4a44684bee1875d2dbf782722 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Date: Fri, 4 Apr 2025 15:31:16 +0200
Subject: [PATCH] clocksource/i8253: Use raw_spinlock_irqsave() in
clockevent_i8253_disable()
On x86 during boot, clockevent_i8253_disable() can be invoked via
x86_late_time_init -> hpet_time_init() -> pit_timer_init() which happens
with enabled interrupts.
If some of the old i8253 hardware is actually used then lockdep will notice
that i8253_lock is used in hard interrupt context. This causes lockdep to
complain because it observed the lock being acquired with interrupts
enabled and in hard interrupt context.
Make clockevent_i8253_disable() acquire the lock with
raw_spinlock_irqsave() to cure this.
[ tglx: Massage change log and use guard() ]
Fixes: c8c4076723dac ("x86/timer: Skip PIT initialization on modern chipsets")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/20250404133116.p-XRWJXf@linutronix.de
diff --git a/drivers/clocksource/i8253.c b/drivers/clocksource/i8253.c
index 39f7c2d736d1..b603c25f3dfa 100644
--- a/drivers/clocksource/i8253.c
+++ b/drivers/clocksource/i8253.c
@@ -103,7 +103,7 @@ int __init clocksource_i8253_init(void)
#ifdef CONFIG_CLKEVT_I8253
void clockevent_i8253_disable(void)
{
- raw_spin_lock(&i8253_lock);
+ guard(raw_spinlock_irqsave)(&i8253_lock);
/*
* Writing the MODE register should stop the counter, according to
@@ -132,8 +132,6 @@ void clockevent_i8253_disable(void)
outb_p(0, PIT_CH0);
outb_p(0x30, PIT_MODE);
-
- raw_spin_unlock(&i8253_lock);
}
static int pit_shutdown(struct clock_event_device *evt)
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 6beb6835c1fbb3f676aebb51a5fee6b77fed9308
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025050913-rubble-confirm-99ee@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6beb6835c1fbb3f676aebb51a5fee6b77fed9308 Mon Sep 17 00:00:00 2001
From: Eelco Chaudron <echaudro(a)redhat.com>
Date: Tue, 6 May 2025 16:28:54 +0200
Subject: [PATCH] openvswitch: Fix unsafe attribute parsing in
output_userspace()
This patch replaces the manual Netlink attribute iteration in
output_userspace() with nla_for_each_nested(), which ensures that only
well-formed attributes are processed.
Fixes: ccb1352e76cf ("net: Add Open vSwitch kernel components.")
Signed-off-by: Eelco Chaudron <echaudro(a)redhat.com>
Acked-by: Ilya Maximets <i.maximets(a)ovn.org>
Acked-by: Aaron Conole <aconole(a)redhat.com>
Link: https://patch.msgid.link/0bd65949df61591d9171c0dc13e42cea8941da10.174654173…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 61fea7baae5d..2f22ca59586f 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -975,8 +975,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
upcall.cmd = OVS_PACKET_CMD_ACTION;
upcall.mru = OVS_CB(skb)->mru;
- for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
- a = nla_next(a, &rem)) {
+ nla_for_each_nested(a, attr, rem) {
switch (nla_type(a)) {
case OVS_USERSPACE_ATTR_USERDATA:
upcall.userdata = a;