The following commit has been merged into the sched/urgent branch of tip:
Commit-ID: 82c387ef7568c0d96a918a5a78d9cad6256cfa15
Gitweb: https://git.kernel.org/tip/82c387ef7568c0d96a918a5a78d9cad6256cfa15
Author: Thomas Gleixner <tglx(a)linutronix.de>
AuthorDate: Mon, 16 Dec 2024 14:20:56 +01:00
Committer: Ingo Molnar <mingo(a)kernel.org>
CommitterDate: Thu, 27 Feb 2025 21:13:57 +01:00
sched/core: Prevent rescheduling when interrupts are disabled
David reported a warning observed while loop testing kexec jump:
Interrupts enabled after irqrouter_resume+0x0/0x50
WARNING: CPU: 0 PID: 560 at drivers/base/syscore.c:103 syscore_resume+0x18a/0x220
kernel_kexec+0xf6/0x180
__do_sys_reboot+0x206/0x250
do_syscall_64+0x95/0x180
The corresponding interrupt flag trace:
hardirqs last enabled at (15573): [<ffffffffa8281b8e>] __up_console_sem+0x7e/0x90
hardirqs last disabled at (15580): [<ffffffffa8281b73>] __up_console_sem+0x63/0x90
That means __up_console_sem() was invoked with interrupts enabled. Further
instrumentation revealed that in the interrupt disabled section of kexec
jump one of the syscore_suspend() callbacks woke up a task, which set the
NEED_RESCHED flag. A later callback in the resume path invoked
cond_resched() which in turn led to the invocation of the scheduler:
__cond_resched+0x21/0x60
down_timeout+0x18/0x60
acpi_os_wait_semaphore+0x4c/0x80
acpi_ut_acquire_mutex+0x3d/0x100
acpi_ns_get_node+0x27/0x60
acpi_ns_evaluate+0x1cb/0x2d0
acpi_rs_set_srs_method_data+0x156/0x190
acpi_pci_link_set+0x11c/0x290
irqrouter_resume+0x54/0x60
syscore_resume+0x6a/0x200
kernel_kexec+0x145/0x1c0
__do_sys_reboot+0xeb/0x240
do_syscall_64+0x95/0x180
This is a long standing problem, which probably got more visible with
the recent printk changes. Something does a task wakeup and the
scheduler sets the NEED_RESCHED flag. cond_resched() sees it set and
invokes schedule() from a completely bogus context. The scheduler
enables interrupts after context switching, which causes the above
warning at the end.
Quite some of the code paths in syscore_suspend()/resume() can result in
triggering a wakeup with the exactly same consequences. They might not
have done so yet, but as they share a lot of code with normal operations
it's just a question of time.
The problem only affects the PREEMPT_NONE and PREEMPT_VOLUNTARY scheduling
models. Full preemption is not affected as cond_resched() is disabled and
the preemption check preemptible() takes the interrupt disabled flag into
account.
Cure the problem by adding a corresponding check into cond_resched().
Reported-by: David Woodhouse <dwmw(a)amazon.co.uk>
Suggested-by: Peter Zijlstra <peterz(a)infradead.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Tested-by: David Woodhouse <dwmw(a)amazon.co.uk>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: stable(a)vger.kernel.org
Closes: https://lore.kernel.org/all/7717fe2ac0ce5f0a2c43fdab8b11f4483d54a2a4.camel@…
---
kernel/sched/core.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9aecd91..6718990 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7285,7 +7285,7 @@ out_unlock:
#if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC)
int __sched __cond_resched(void)
{
- if (should_resched(0)) {
+ if (should_resched(0) && !irqs_disabled()) {
preempt_schedule_common();
return 1;
}
The following commit has been merged into the sched/urgent branch of tip:
Commit-ID: c092dc7d88c1214e109591790c9021a0f734677a
Gitweb: https://git.kernel.org/tip/c092dc7d88c1214e109591790c9021a0f734677a
Author: Thomas Gleixner <tglx(a)linutronix.de>
AuthorDate: Mon, 16 Dec 2024 14:20:56 +01:00
Committer: Ingo Molnar <mingo(a)kernel.org>
CommitterDate: Thu, 27 Feb 2025 20:55:16 +01:00
sched/core: Prevent rescheduling when interrupts are disabled
David reported a warning observed while loop testing kexec jump:
Interrupts enabled after irqrouter_resume+0x0/0x50
WARNING: CPU: 0 PID: 560 at drivers/base/syscore.c:103 syscore_resume+0x18a/0x220
kernel_kexec+0xf6/0x180
__do_sys_reboot+0x206/0x250
do_syscall_64+0x95/0x180
The corresponding interrupt flag trace:
hardirqs last enabled at (15573): [<ffffffffa8281b8e>] __up_console_sem+0x7e/0x90
hardirqs last disabled at (15580): [<ffffffffa8281b73>] __up_console_sem+0x63/0x90
That means __up_console_sem() was invoked with interrupts enabled. Further
instrumentation revealed that in the interrupt disabled section of kexec
jump one of the syscore_suspend() callbacks woke up a task, which set the
NEED_RESCHED flag. A later callback in the resume path invoked
cond_resched() which in turn led to the invocation of the scheduler:
__cond_resched+0x21/0x60
down_timeout+0x18/0x60
acpi_os_wait_semaphore+0x4c/0x80
acpi_ut_acquire_mutex+0x3d/0x100
acpi_ns_get_node+0x27/0x60
acpi_ns_evaluate+0x1cb/0x2d0
acpi_rs_set_srs_method_data+0x156/0x190
acpi_pci_link_set+0x11c/0x290
irqrouter_resume+0x54/0x60
syscore_resume+0x6a/0x200
kernel_kexec+0x145/0x1c0
__do_sys_reboot+0xeb/0x240
do_syscall_64+0x95/0x180
This is a long standing problem, which probably got more visible with
the recent printk changes. Something does a task wakeup and the
scheduler sets the NEED_RESCHED flag. cond_resched() sees it set and
invokes schedule() from a completely bogus context. The scheduler
enables interrupts after context switching, which causes the above
warning at the end.
Quite some of the code paths in syscore_suspend()/resume() can result in
triggering a wakeup with the exactly same consequences. They might not
have done so yet, but as they share a lot of code with normal operations
it's just a question of time.
The problem only affects the PREEMPT_NONE and PREEMPT_VOLUNTARY scheduling
models. Full preemption is not affected as cond_resched() is disabled and
the preemption check preemptible() takes the interrupt disabled flag into
account.
Cure the problem by adding a corresponding check into cond_resched().
Reported-by: David Woodhouse <dwmw(a)amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Tested-by: David Woodhouse <dwmw(a)amazon.co.uk>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: stable(a)vger.kernel.org
Closes: https://lore.kernel.org/all/7717fe2ac0ce5f0a2c43fdab8b11f4483d54a2a4.camel@…
---
kernel/sched/core.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9aecd91..6718990 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7285,7 +7285,7 @@ out_unlock:
#if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC)
int __sched __cond_resched(void)
{
- if (should_resched(0)) {
+ if (should_resched(0) && !irqs_disabled()) {
preempt_schedule_common();
return 1;
}
When CONFIG_RANDOM_KMALLOC_CACHES or other randomization infrastructrue
enabled, the idle_task's stack may different between the booting kernel
and target kernel. So when resuming from hibernation, an ACTION_BOOT_CPU
IPI wakeup the idle instruction in arch_cpu_idle_dead() and jump to the
interrupt handler. But since the stack pointer is changed, the interrupt
handler cannot restore correct context.
So rename the current arch_cpu_idle_dead() to idle_play_dead(), make it
as the default version of play_dead(), and the new arch_cpu_idle_dead()
call play_dead() directly. For hibernation, implement an arch-specific
hibernate_resume_nonboot_cpu_disable() to use the polling version (idle
instruction is replace by nop, and irq is disabled) of play_dead(), i.e.
poll_play_dead(), to avoid IPI handler corrupting the idle_task's stack
when resuming from hibernation.
This solution is a little similar to commit 406f992e4a372dafbe3c ("x86 /
hibernate: Use hlt_play_dead() when resuming from hibernation").
Cc: stable(a)vger.kernel.org
Signed-off-by: Huacai Chen <chenhuacai(a)loongson.cn>
---
arch/loongarch/kernel/smp.c | 40 ++++++++++++++++++++++++++++++++++++-
1 file changed, 39 insertions(+), 1 deletion(-)
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index fbf747447f13..308478f29278 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -19,6 +19,7 @@
#include <linux/smp.h>
#include <linux/threads.h>
#include <linux/export.h>
+#include <linux/suspend.h>
#include <linux/syscore_ops.h>
#include <linux/time.h>
#include <linux/tracepoint.h>
@@ -423,7 +424,7 @@ void loongson_cpu_die(unsigned int cpu)
mb();
}
-void __noreturn arch_cpu_idle_dead(void)
+static void __noreturn idle_play_dead(void)
{
register uint64_t addr;
register void (*init_fn)(void);
@@ -447,6 +448,43 @@ void __noreturn arch_cpu_idle_dead(void)
BUG();
}
+static void __noreturn poll_play_dead(void)
+{
+ register uint64_t addr;
+ register void (*init_fn)(void);
+
+ idle_task_exit();
+ __this_cpu_write(cpu_state, CPU_DEAD);
+
+ __smp_mb();
+ do {
+ __asm__ __volatile__("nop\n\t");
+ addr = iocsr_read64(LOONGARCH_IOCSR_MBUF0);
+ } while (addr == 0);
+
+ init_fn = (void *)TO_CACHE(addr);
+ iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR);
+
+ init_fn();
+ BUG();
+}
+
+static void (*play_dead)(void) = idle_play_dead;
+
+void __noreturn arch_cpu_idle_dead(void)
+{
+ play_dead();
+ BUG(); /* play_dead() doesn't return */
+}
+
+#ifdef CONFIG_HIBERNATION
+int hibernate_resume_nonboot_cpu_disable(void)
+{
+ play_dead = poll_play_dead;
+ return suspend_disable_secondary_cpus();
+}
+#endif
+
#endif
/*
--
2.47.1
When handling faults for anon shmem finish_fault() will attempt to install
ptes for the entire folio. Unfortunately if it encounters a single
non-pte_none entry in that range it will bail, even if the pte that
triggered the fault is still pte_none. When this situation happens the
fault will be retried endlessly never making forward progress.
This patch fixes this behavior and if it detects that a pte in the range
is not pte_none it will fall back to setting just the pte for the
address that triggered the fault.
Cc: stable(a)vger.kernel.org
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Hugh Dickins <hughd(a)google.com>
Fixes: 43e027e41423 ("mm: memory: extend finish_fault() to support large folio")
Reported-by: Marek Maslanka <mmaslanka(a)google.com>
Signed-off-by: Brian Geffon <bgeffon(a)google.com>
---
mm/memory.c | 19 ++++++++++++++++---
1 file changed, 16 insertions(+), 3 deletions(-)
diff --git a/mm/memory.c b/mm/memory.c
index b4d3d4893267..32de626ec1da 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5258,9 +5258,22 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
ret = VM_FAULT_NOPAGE;
goto unlock;
} else if (nr_pages > 1 && !pte_range_none(vmf->pte, nr_pages)) {
- update_mmu_tlb_range(vma, addr, vmf->pte, nr_pages);
- ret = VM_FAULT_NOPAGE;
- goto unlock;
+ /*
+ * We encountered a set pte, let's just try to install the
+ * pte for the original fault if that pte is still pte none.
+ */
+ pgoff_t idx = (vmf->address - addr) / PAGE_SIZE;
+
+ if (!pte_none(ptep_get_lockless(vmf->pte + idx))) {
+ update_mmu_tlb_range(vma, addr, vmf->pte, nr_pages);
+ ret = VM_FAULT_NOPAGE;
+ goto unlock;
+ }
+
+ vmf->pte = vmf->pte + idx;
+ page = folio_page(folio, idx);
+ addr = vmf->address;
+ nr_pages = 1;
}
folio_ref_add(folio, nr_pages - 1);
--
2.48.1.711.g2feabab25a-goog
The xHC resources allocated for USB devices are not released in correct
order after resuming in case when while suspend device was reconnected.
This issue has been detected during the fallowing scenario:
- connect hub HS to root port
- connect LS/FS device to hub port
- wait for enumeration to finish
- force host to suspend
- reconnect hub attached to root port
- wake host
For this scenario during enumeration of USB LS/FS device the Cadence xHC
reports completion error code for xHC commands because the xHC resources
used for devices has not been property released.
XHCI specification doesn't mention that device can be reset in any order
so, we should not treat this issue as Cadence xHC controller bug.
Similar as during disconnecting in this case the device resources should
be cleared starting form the last usb device in tree toward the root hub.
To fix this issue usbcore driver should call hcd->driver->reset_device
for all USB devices connected to hub which was reconnected while
suspending.
Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver")
cc: <stable(a)vger.kernel.org>
Signed-off-by: Pawel Laszczak <pawell(a)cadence.com>
---
Changelog:
v2:
- Replaced disconnection procedure with releasing only the xHC resources
drivers/usb/core/hub.c | 33 +++++++++++++++++++++++++++++++++
1 file changed, 33 insertions(+)
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index a76bb50b6202..d3f89528a414 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -6065,6 +6065,36 @@ void usb_hub_cleanup(void)
usb_deregister(&hub_driver);
} /* usb_hub_cleanup() */
+/**
+ * hub_hc_release_resources - clear resources used by host controller
+ * @pdev: pointer to device being released
+ *
+ * Context: task context, might sleep
+ *
+ * Function releases the host controller resources in correct order before
+ * making any operation on resuming usb device. The host controller resources
+ * allocated for devices in tree should be released starting from the last
+ * usb device in tree toward the root hub. This function is used only during
+ * resuming device when usb device require reinitialization - that is, when
+ * flag udev->reset_resume is set.
+ *
+ * This call is synchronous, and may not be used in an interrupt context.
+ */
+static void hub_hc_release_resources(struct usb_device *udev)
+{
+ struct usb_hub *hub = usb_hub_to_struct_hub(udev);
+ struct usb_hcd *hcd = bus_to_hcd(udev->bus);
+ int i;
+
+ /* Release up resources for all children before this device */
+ for (i = 0; i < udev->maxchild; i++)
+ if (hub->ports[i]->child)
+ hub_hc_release_resources(hub->ports[i]->child);
+
+ if (hcd->driver->reset_device)
+ hcd->driver->reset_device(hcd, udev);
+}
+
/**
* usb_reset_and_verify_device - perform a USB port reset to reinitialize a device
* @udev: device to reset (not in SUSPENDED or NOTATTACHED state)
@@ -6131,6 +6161,9 @@ static int usb_reset_and_verify_device(struct usb_device *udev)
mutex_lock(hcd->address0_mutex);
+ if (udev->reset_resume)
+ hub_hc_release_resources(udev);
+
for (i = 0; i < PORT_INIT_TRIES; ++i) {
if (hub_port_stop_enumerate(parent_hub, port1, i)) {
ret = -ENODEV;
--
2.43.0
The u2phy1_host should always have the same status as usb_host1_ehci
and usb_host1_ohci, otherwise the EHCI and OHCI drivers may be
initialized for a disabled usb port.
Per the NanoPi R4S schematic, the phy-supply for u2phy1_host is set to
the vdd_5v regulator.
Fixes: db792e9adbf8 ("rockchip: rk3399: Add support for FriendlyARM NanoPi R4S")
Cc: stable(a)vger.kernel.org
Signed-off-by: Justin Klaassen <justin(a)tidylabs.net>
Reviewed-by: Dragan Simic <dsimic(a)manjaro.org>
---
v1 -> v2: Updated commit message, added Fixes: and Cc: stable tags
arch/arm64/boot/dts/rockchip/rk3399-nanopi-r4s.dtsi | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-nanopi-r4s.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-nanopi-r4s.dtsi
index b1c9bd0e63ef..8d94d9f91a5c 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-nanopi-r4s.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-nanopi-r4s.dtsi
@@ -115,7 +115,7 @@ &u2phy0_host {
};
&u2phy1_host {
- status = "disabled";
+ phy-supply = <&vdd_5v>;
};
&uart0 {
--
2.47.1
This fixes incorrect pinmux on UART0 and UART5 for PX30 Ringneck on
Haikou.
Signed-off-by: Quentin Schulz <quentin.schulz(a)cherry.de>
---
Changes in v3:
- removed already merged patches (Device Tree overlays),
- rebased on top of master to avoid conflicts,
- added comment above pinctrl-0 in uart5 to explain we are only adding a
pinmux and not modifying anything else,
- Link to v2: https://lore.kernel.org/r/20250221-ringneck-dtbos-v2-0-310c0b9a3909@cherry.…
Changes in v2:
- rename uart5_rts_gpio to uart5_rts_pin to stop triggering a false
positive of the dtschema checker,
- remove PU from uart5_rts_pin,
- Link to v1: https://lore.kernel.org/r/20250220-ringneck-dtbos-v1-0-25c97f2385e6@cherry.…
---
Quentin Schulz (2):
arm64: dts: rockchip: fix pinmux of UART0 for PX30 Ringneck on Haikou
arm64: dts: rockchip: fix pinmux of UART5 for PX30 Ringneck on Haikou
arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts | 10 ++++++++++
1 file changed, 10 insertions(+)
---
base-commit: d082ecbc71e9e0bf49883ee4afd435a77a5101b6
change-id: 20250128-ringneck-dtbos-98064839355e
Best regards,
--
Quentin Schulz <quentin.schulz(a)cherry.de>
[BUG]
When testing subpage block size btrfs (block size < page size), I hit
the following spin lock hang on x86_64, with the experimental 2K block
size support:
<TASK>
_raw_spin_lock_irq+0x2f/0x40
wait_subpage_spinlock+0x69/0x80 [btrfs]
btrfs_release_folio+0x46/0x70 [btrfs]
folio_unmap_invalidate+0xcb/0x250
folio_end_writeback+0x127/0x1b0
btrfs_subpage_clear_writeback+0xef/0x140 [btrfs]
end_bbio_data_write+0x13a/0x3c0 [btrfs]
btrfs_bio_end_io+0x6f/0xc0 [btrfs]
process_one_work+0x156/0x310
worker_thread+0x252/0x390
? __pfx_worker_thread+0x10/0x10
kthread+0xef/0x250
? finish_task_switch.isra.0+0x8a/0x250
? __pfx_kthread+0x10/0x10
ret_from_fork+0x34/0x50
? __pfx_kthread+0x10/0x10
ret_from_fork_asm+0x1a/0x30
</TASK>
[CAUSE]
It's a self deadlock with the following sequence:
btrfs_subpage_clear_writeback()
|- spin_lock_irqsave(&subpage->lock);
|- folio_end_writeback()
|- folio_end_dropbehind_write()
|- folio_unmap_invalidate()
|- btrfs_release_folio()
|- wait_subpage_spinlock()
|- spin_lock_irq(&subpage->lock);
!! DEADLOCK !!
We're trying to acquire the same spin lock already held by ourselves.
[FIX]
Move the folio_end_writeback() call out of the spin lock critical
section.
And since we no longer have all the bitmap operation and the writeback
flag clearing happening inside the critical section, we must do extra
checks to make sure only the last one clearing the writeback bitmap can
clear the folio writeback flag.
Fixes: 3470da3b7d87 ("btrfs: subpage: introduce helpers for writeback status")
Cc: stable(a)vger.kernel.org # 5.15+
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
---
fs/btrfs/subpage.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index ebb40f506921..bedb5fac579b 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -466,15 +466,21 @@ void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info,
struct btrfs_subpage *subpage = folio_get_private(folio);
unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
writeback, start, len);
+ bool was_writeback;
+ bool last = false;
unsigned long flags;
spin_lock_irqsave(&subpage->lock, flags);
+ was_writeback = !subpage_test_bitmap_all_zero(fs_info, folio, writeback);
bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
- if (subpage_test_bitmap_all_zero(fs_info, folio, writeback)) {
+ if (subpage_test_bitmap_all_zero(fs_info, folio, writeback) &&
+ was_writeback) {
ASSERT(folio_test_writeback(folio));
- folio_end_writeback(folio);
+ last = true;
}
spin_unlock_irqrestore(&subpage->lock, flags);
+ if (last)
+ folio_end_writeback(folio);
}
void btrfs_subpage_set_ordered(const struct btrfs_fs_info *fs_info,
--
2.48.1
This patch series is to fix bugs and improve codes for drivers/of/*.
Signed-off-by: Zijun Hu <quic_zijuhu(a)quicinc.com>
---
Changes in v4:
- Remove 2 modalias relevant patches, and add more patches.
- Link to v3: https://lore.kernel.org/r/20241217-of_core_fix-v3-0-3bc49a2e8bda@quicinc.com
Changes in v3:
- Drop 2 applied patches and pick up patch 4/7 again
- Fix build error for patch 6/7.
- Include of_private.h instead of function declaration for patch 2/7
- Correct tile and commit messages.
- Link to v2: https://lore.kernel.org/r/20241216-of_core_fix-v2-0-e69b8f60da63@quicinc.com
Changes in v2:
- Drop applied/conflict/TBD patches.
- Correct based on Rob's comments.
- Link to v1: https://lore.kernel.org/r/20241206-of_core_fix-v1-0-dc28ed56bec3@quicinc.com
---
Zijun Hu (14):
of: Correct child specifier used as input of the 2nd nexus node
of: Do not expose of_alias_scan() and correct its comments
of: Make of_property_present() applicable to all kinds of property
of: property: Use of_property_present() for of_fwnode_property_present()
of: Fix available buffer size calculating error in API of_device_uevent_modalias()
of: property: Avoiding using uninitialized variable @imaplen in parse_interrupt_map()
of: property: Fix potential fwnode reference's argument count got out of range
of: Remove a duplicated code block
of: reserved-memory: Fix using wrong number of cells to get property 'alignment'
of: reserved-memory: Do not make kmemleak ignore freed address
of: reserved-memory: Warn for missing static reserved memory regions
of: reserved-memory: Move an assignment to effective place in __reserved_mem_alloc_size()
of/fdt: Check fdt_get_mem_rsv() error in early_init_fdt_scan_reserved_mem()
of: Improve __of_add_property_sysfs() readability
drivers/of/address.c | 21 +++------------------
drivers/of/base.c | 7 +++----
drivers/of/device.c | 14 ++++++++++----
drivers/of/fdt.c | 7 ++++++-
drivers/of/fdt_address.c | 21 ++++-----------------
drivers/of/kobj.c | 3 ++-
drivers/of/of_private.h | 20 ++++++++++++++++++++
drivers/of/of_reserved_mem.c | 15 ++++++++++-----
drivers/of/pdt.c | 2 ++
drivers/of/property.c | 9 +++++++--
include/linux/of.h | 24 ++++++++++++------------
11 files changed, 79 insertions(+), 64 deletions(-)
---
base-commit: 456f3000f82571697d23c255c451cfcfb5c9ae75
change-id: 20241206-of_core_fix-dc3021a06418
Best regards,
--
Zijun Hu <quic_zijuhu(a)quicinc.com>
From: Arnd Bergmann <arnd(a)arndb.de>
An older cleanup of mine inadvertently removed geode-gx1 and geode-lx
from the list of CPUs that are known to support a working cmpxchg8b.
Fixes: 88a2b4edda3d ("x86/Kconfig: Rework CONFIG_X86_PAE dependency")
Cc: stable(a)vger.kernel.org
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
---
arch/x86/Kconfig.cpu | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 2a7279d80460..42e6a40876ea 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -368,7 +368,7 @@ config X86_HAVE_PAE
config X86_CMPXCHG64
def_bool y
- depends on X86_HAVE_PAE || M586TSC || M586MMX || MK6 || MK7
+ depends on X86_HAVE_PAE || M586TSC || M586MMX || MK6 || MK7 || MGEODEGX1 || MGEODE_LX
# this should be set for all -march=.. options where the compiler
# generates cmov.
--
2.39.5
We create the stream encoders and attach connectors for each pipe we
have. As the number of pipes has increased, we've failed to update the
topology manager maximum number of payloads to match that. Bump up the
max stream count to match number of pipes, enabling the fourth stream on
platforms that support four pipes.
Cc: stable(a)vger.kernel.org
Cc: Imre Deak <imre.deak(a)intel.com>
Cc: Ville Syrjala <ville.syrjala(a)linux.intel.com>
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
---
drivers/gpu/drm/i915/display/intel_dp_mst.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 167e4a70ab12..822218d8cfd4 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -1896,7 +1896,8 @@ intel_dp_mst_encoder_init(struct intel_digital_port *dig_port, int conn_base_id)
/* create encoders */
mst_stream_encoders_create(dig_port);
ret = drm_dp_mst_topology_mgr_init(&intel_dp->mst_mgr, display->drm,
- &intel_dp->aux, 16, 3, conn_base_id);
+ &intel_dp->aux, 16,
+ INTEL_NUM_PIPES(display), conn_base_id);
if (ret) {
intel_dp->mst_mgr.cbs = NULL;
return ret;
--
2.39.5
Any rules using engine matching are currently broken due RTP processing
happening too in early init, before the list of hardware engines has been
initialised.
Fix this by moving workaround processing to later in the driver probe
sequence, to just before the processed list is used for the first time.
Looking at the debugfs gt0/workarounds on ADL-P we notice 14011060649
should be present while we see, before:
GT Workarounds
14011059788
14015795083
And with the patch:
GT Workarounds
14011060649
14011059788
14015795083
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin(a)igalia.com>
Cc: Lucas De Marchi <lucas.demarchi(a)intel.com>
Cc: Matt Roper <matthew.d.roper(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v6.11+
Reviewed-by: Lucas De Marchi <lucas.demarchi(a)intel.com>
---
drivers/gpu/drm/xe/xe_gt.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 650a0ee56e97..d59c03bc05b7 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -361,9 +361,7 @@ int xe_gt_init_early(struct xe_gt *gt)
if (err)
return err;
- xe_wa_process_gt(gt);
xe_wa_process_oob(gt);
- xe_tuning_process_gt(gt);
xe_force_wake_init_gt(gt, gt_to_fw(gt));
spin_lock_init(>->global_invl_lock);
@@ -450,6 +448,8 @@ static int all_fw_domain_init(struct xe_gt *gt)
}
xe_gt_mcr_set_implicit_defaults(gt);
+ xe_wa_process_gt(gt);
+ xe_tuning_process_gt(gt);
xe_reg_sr_apply_mmio(>->reg_sr, gt);
err = xe_gt_clock_init(gt);
--
2.48.0
arm64 supports multiple huge_pte sizes. Some of the sizes are covered by
a single pte entry at a particular level (PMD_SIZE, PUD_SIZE), and some
are covered by multiple ptes at a particular level (CONT_PTE_SIZE,
CONT_PMD_SIZE). So the function has to figure out the size from the
huge_pte pointer. This was previously done by walking the pgtable to
determine the level and by using the PTE_CONT bit to determine the
number of ptes at the level.
But the PTE_CONT bit is only valid when the pte is present. For
non-present pte values (e.g. markers, migration entries), the previous
implementation was therefore erroneously determining the size. There is
at least one known caller in core-mm, move_huge_pte(), which may call
huge_ptep_get_and_clear() for a non-present pte. So we must be robust to
this case. Additionally the "regular" ptep_get_and_clear() is robust to
being called for non-present ptes so it makes sense to follow the
behavior.
Fix this by using the new sz parameter which is now provided to the
function. Additionally when clearing each pte in a contig range, don't
gather the access and dirty bits if the pte is not present.
An alternative approach that would not require API changes would be to
store the PTE_CONT bit in a spare bit in the swap entry pte for the
non-present case. But it felt cleaner to follow other APIs' lead and
just pass in the size.
As an aside, PTE_CONT is bit 52, which corresponds to bit 40 in the swap
entry offset field (layout of non-present pte). Since hugetlb is never
swapped to disk, this field will only be populated for markers, which
always set this bit to 0 and hwpoison swap entries, which set the offset
field to a PFN; So it would only ever be 1 for a 52-bit PVA system where
memory in that high half was poisoned (I think!). So in practice, this
bit would almost always be zero for non-present ptes and we would only
clear the first entry if it was actually a contiguous block. That's
probably a less severe symptom than if it was always interpreted as 1
and cleared out potentially-present neighboring PTEs.
Cc: stable(a)vger.kernel.org
Fixes: 66b3923a1a0f ("arm64: hugetlb: add support for PTE contiguous bit")
Reviewed-by: Catalin Marinas <catalin.marinas(a)arm.com>
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
tmp
---
arch/arm64/mm/hugetlbpage.c | 53 ++++++++++++++-----------------------
1 file changed, 20 insertions(+), 33 deletions(-)
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 06db4649af91..b3a7fafe8892 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -100,20 +100,11 @@ static int find_num_contig(struct mm_struct *mm, unsigned long addr,
static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
{
- int contig_ptes = 0;
+ int contig_ptes = 1;
*pgsize = size;
switch (size) {
-#ifndef __PAGETABLE_PMD_FOLDED
- case PUD_SIZE:
- if (pud_sect_supported())
- contig_ptes = 1;
- break;
-#endif
- case PMD_SIZE:
- contig_ptes = 1;
- break;
case CONT_PMD_SIZE:
*pgsize = PMD_SIZE;
contig_ptes = CONT_PMDS;
@@ -122,6 +113,8 @@ static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
*pgsize = PAGE_SIZE;
contig_ptes = CONT_PTES;
break;
+ default:
+ WARN_ON(!__hugetlb_valid_size(size));
}
return contig_ptes;
@@ -163,24 +156,23 @@ static pte_t get_clear_contig(struct mm_struct *mm,
unsigned long pgsize,
unsigned long ncontig)
{
- pte_t orig_pte = __ptep_get(ptep);
- unsigned long i;
-
- for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
- pte_t pte = __ptep_get_and_clear(mm, addr, ptep);
-
- /*
- * If HW_AFDBM is enabled, then the HW could turn on
- * the dirty or accessed bit for any page in the set,
- * so check them all.
- */
- if (pte_dirty(pte))
- orig_pte = pte_mkdirty(orig_pte);
-
- if (pte_young(pte))
- orig_pte = pte_mkyoung(orig_pte);
+ pte_t pte, tmp_pte;
+ bool present;
+
+ pte = __ptep_get_and_clear(mm, addr, ptep);
+ present = pte_present(pte);
+ while (--ncontig) {
+ ptep++;
+ addr += pgsize;
+ tmp_pte = __ptep_get_and_clear(mm, addr, ptep);
+ if (present) {
+ if (pte_dirty(tmp_pte))
+ pte = pte_mkdirty(pte);
+ if (pte_young(tmp_pte))
+ pte = pte_mkyoung(pte);
+ }
}
- return orig_pte;
+ return pte;
}
static pte_t get_clear_contig_flush(struct mm_struct *mm,
@@ -401,13 +393,8 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
{
int ncontig;
size_t pgsize;
- pte_t orig_pte = __ptep_get(ptep);
-
- if (!pte_cont(orig_pte))
- return __ptep_get_and_clear(mm, addr, ptep);
-
- ncontig = find_num_contig(mm, addr, ptep, &pgsize);
+ ncontig = num_contig_ptes(sz, &pgsize);
return get_clear_contig(mm, addr, ptep, pgsize, ncontig);
}
--
2.43.0