When a card is present in the reader, the driver currently defers
autosuspend by returning -EAGAIN during the suspend callback to
trigger USB remote wakeup signaling. However, this does not guarantee
that the mmc child device has been resumed, which may cause issues if
it remains suspended while the card is accessible.
This patch ensures that all child devices, including the mmc host
controller, are explicitly resumed before returning -EAGAIN. This
fixes a corner case introduced by earlier remote wakeup handling,
improving reliability of runtime PM when a card is inserted.
Fixes: 883a87ddf2f1 ("misc: rtsx_usb: Use USB remote wakeup signaling for card insertion detection")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ricky Wu <ricky_wu(a)realtek.com>
---
drivers/misc/cardreader/rtsx_usb.c | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/drivers/misc/cardreader/rtsx_usb.c b/drivers/misc/cardreader/rtsx_usb.c
index 148107a4547c..d007a4455ce5 100644
--- a/drivers/misc/cardreader/rtsx_usb.c
+++ b/drivers/misc/cardreader/rtsx_usb.c
@@ -698,6 +698,12 @@ static void rtsx_usb_disconnect(struct usb_interface *intf)
}
#ifdef CONFIG_PM
+static int rtsx_usb_resume_child(struct device *dev, void *data)
+{
+ pm_request_resume(dev);
+ return 0;
+}
+
static int rtsx_usb_suspend(struct usb_interface *intf, pm_message_t message)
{
struct rtsx_ucr *ucr =
@@ -713,8 +719,10 @@ static int rtsx_usb_suspend(struct usb_interface *intf, pm_message_t message)
mutex_unlock(&ucr->dev_mutex);
/* Defer the autosuspend if card exists */
- if (val & (SD_CD | MS_CD))
+ if (val & (SD_CD | MS_CD)) {
+ device_for_each_child(&intf->dev, NULL, rtsx_usb_resume_child);
return -EAGAIN;
+ }
} else {
/* There is an ongoing operation*/
return -EAGAIN;
@@ -724,12 +732,6 @@ static int rtsx_usb_suspend(struct usb_interface *intf, pm_message_t message)
return 0;
}
-static int rtsx_usb_resume_child(struct device *dev, void *data)
-{
- pm_request_resume(dev);
- return 0;
-}
-
static int rtsx_usb_resume(struct usb_interface *intf)
{
device_for_each_child(&intf->dev, NULL, rtsx_usb_resume_child);
--
2.25.1
DIV_ROUND_CLOSEST(x, 100) returns either 0 or 1 if 0<x<=100, so the
division needs to be performed after the multiplication and not the
other way around, to properly scale the value.
Fixes: 8b5f3a229a70 ("drm/amd/display: Fix default DC and AC levels")
Signed-off-by: Lauri Tirkkonen <lauri(a)hacktheplanet.fi>
Cc: stable(a)vger.kernel.org
---
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index f58fa5da7fe5..8a5b5dfad1ab 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -4941,9 +4941,9 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector)
caps = &dm->backlight_caps[aconnector->bl_idx];
if (get_brightness_range(caps, &min, &max)) {
if (power_supply_is_system_supplied() > 0)
- props.brightness = (max - min) * DIV_ROUND_CLOSEST(caps->ac_level, 100);
+ props.brightness = DIV_ROUND_CLOSEST((max - min) * caps->ac_level, 100);
else
- props.brightness = (max - min) * DIV_ROUND_CLOSEST(caps->dc_level, 100);
+ props.brightness = DIV_ROUND_CLOSEST((max - min) * caps->dc_level, 100);
/* min is zero, so max needs to be adjusted */
props.max_brightness = max - min;
drm_dbg(drm, "Backlight caps: min: %d, max: %d, ac %d, dc %d\n", min, max,
--
2.50.1
--
Lauri Tirkkonen | lotheac @ IRCnet
Hi,
I hit this regression on the stable kernel on Alpine with a Lenovo Yoga
Slim 7 Pro 17ACH5. During early boot, when the amdgpu module gets
loaded, backlight brightness is set to zero, resulting in a black
screen (and nothing in userspace is running yet to handle brightness
keys; I need to use an external monitor or type in my rootfs passphrase
blind).
#regzbot introduced: 6c56c8ec6f9762c33bd22f31d43af4194d12da53
bisect log:
git bisect start
# status: waiting for both good and bad commits
# good: [e60eb441596d1c70e4a264d2bac726c6cd2da067] Linux 6.15.4
git bisect good e60eb441596d1c70e4a264d2bac726c6cd2da067
# status: waiting for bad commit, 1 good commit known
# bad: [1562d948232546cfad45a1beddc70fe0c7b34950] Linux 6.15.6
git bisect bad 1562d948232546cfad45a1beddc70fe0c7b34950
# good: [5e10620cb8e76279fd86411536c3fa0f486cd634] drm/xe/vm: move rebind_work init earlier
git bisect good 5e10620cb8e76279fd86411536c3fa0f486cd634
# bad: [ece85751c3e46c0e3c4f772113f691b7aec81d5d] btrfs: record new subvolume in parent dir earlier to avoid dir logging races
git bisect bad ece85751c3e46c0e3c4f772113f691b7aec81d5d
# bad: [9f5d2487a9fad1d36bcf107d1f3b1ebc8b6796cf] iommufd/selftest: Add asserts testing global mfd
git bisect bad 9f5d2487a9fad1d36bcf107d1f3b1ebc8b6796cf
# good: [c0687ec5625b2261d48936d03c761e38657f4a4b] rust: completion: implement initial abstraction
git bisect good c0687ec5625b2261d48936d03c761e38657f4a4b
# bad: [889906e6eb5fab990c9b6b5fe8f1122b2416fc22] drm/amd/display: Export full brightness range to userspace
git bisect bad 889906e6eb5fab990c9b6b5fe8f1122b2416fc22
# good: [c7d15ba11c8561c5f325ffeb27ed8a4e82d4d322] io_uring/kbuf: flag partial buffer mappings
git bisect good c7d15ba11c8561c5f325ffeb27ed8a4e82d4d322
# good: [66089fa8c9ed162744037ab0375e38cc74c7f7ed] drm/amd/display: Add debugging message for brightness caps
git bisect good 66089fa8c9ed162744037ab0375e38cc74c7f7ed
# bad: [cd711c87c2862be5e71eee79901f94e1c943f9fc] drm/amd/display: Only read ACPI backlight caps once
git bisect bad cd711c87c2862be5e71eee79901f94e1c943f9fc
# bad: [6c56c8ec6f9762c33bd22f31d43af4194d12da53] drm/amd/display: Fix default DC and AC levels
git bisect bad 6c56c8ec6f9762c33bd22f31d43af4194d12da53
# first bad commit: [6c56c8ec6f9762c33bd22f31d43af4194d12da53] drm/amd/display: Fix default DC and AC levels
'dmesg|grep amd' on 6.15.7 on this machine:
[ 0.319726] perf/amd_iommu: Detected AMD IOMMU #0 (2 banks, 4 counters/bank).
[ 4.090573] [drm] amdgpu kernel modesetting enabled.
[ 4.094238] amdgpu: Virtual CRAT table created for CPU
[ 4.095389] amdgpu: Topology: Add CPU node
[ 4.096451] amdgpu 0000:03:00.0: enabling device (0006 -> 0007)
[ 4.174815] amdgpu 0000:03:00.0: amdgpu: detected ip block number 0 <soc15_common>
[ 4.176034] amdgpu 0000:03:00.0: amdgpu: detected ip block number 1 <gmc_v9_0>
[ 4.176992] amdgpu 0000:03:00.0: amdgpu: detected ip block number 2 <vega10_ih>
[ 4.177911] amdgpu 0000:03:00.0: amdgpu: detected ip block number 3 <psp>
[ 4.178799] amdgpu 0000:03:00.0: amdgpu: detected ip block number 4 <smu>
[ 4.179704] amdgpu 0000:03:00.0: amdgpu: detected ip block number 5 <dm>
[ 4.180594] amdgpu 0000:03:00.0: amdgpu: detected ip block number 6 <gfx_v9_0>
[ 4.181445] amdgpu 0000:03:00.0: amdgpu: detected ip block number 7 <sdma_v4_0>
[ 4.182299] amdgpu 0000:03:00.0: amdgpu: detected ip block number 8 <vcn_v2_0>
[ 4.183114] amdgpu 0000:03:00.0: amdgpu: detected ip block number 9 <jpeg_v2_0>
[ 4.183910] amdgpu 0000:03:00.0: amdgpu: Fetched VBIOS from VFCT
[ 4.184800] amdgpu: ATOM BIOS: 113-CEZANNE-017
[ 4.208484] amdgpu 0000:03:00.0: vgaarb: deactivate vga console
[ 4.208493] amdgpu 0000:03:00.0: amdgpu: Trusted Memory Zone (TMZ) feature enabled
[ 4.208509] amdgpu 0000:03:00.0: amdgpu: MODE2 reset
[ 4.209086] amdgpu 0000:03:00.0: amdgpu: VRAM: 2048M 0x000000F400000000 - 0x000000F47FFFFFFF (2048M used)
[ 4.209099] amdgpu 0000:03:00.0: amdgpu: GART: 1024M 0x0000000000000000 - 0x000000003FFFFFFF
[ 4.209376] [drm] amdgpu: 2048M of VRAM memory ready
[ 4.209386] [drm] amdgpu: 6912M of GTT memory ready.
[ 4.210517] amdgpu 0000:03:00.0: amdgpu: Found VCN firmware Version ENC: 1.24 DEC: 8 VEP: 0 Revision: 3
[ 4.927350] amdgpu 0000:03:00.0: amdgpu: reserve 0x400000 from 0xf47f400000 for PSP TMR
[ 5.010609] amdgpu 0000:03:00.0: amdgpu: RAS: optional ras ta ucode is not available
[ 5.021347] amdgpu 0000:03:00.0: amdgpu: RAP: optional rap ta ucode is not available
[ 5.021357] amdgpu 0000:03:00.0: amdgpu: SECUREDISPLAY: securedisplay ta ucode is not available
[ 5.021725] amdgpu 0000:03:00.0: amdgpu: SMU is initialized successfully!
[ 5.131949] amdgpu 0000:03:00.0: amdgpu: [drm] Using ACPI provided EDID for eDP-1
[ 5.385266] kfd kfd: amdgpu: Allocated 3969056 bytes on gart
[ 5.385286] kfd kfd: amdgpu: Total number of KFD nodes to be created: 1
[ 5.385435] amdgpu: Virtual CRAT table created for GPU
[ 5.385562] amdgpu: Topology: Add dGPU node [0x1638:0x1002]
[ 5.385569] kfd kfd: amdgpu: added device 1002:1638
[ 5.385582] amdgpu 0000:03:00.0: amdgpu: SE 1, SH per SE 1, CU per SH 8, active_cu_number 8
[ 5.385592] amdgpu 0000:03:00.0: amdgpu: ring gfx uses VM inv eng 0 on hub 0
[ 5.385598] amdgpu 0000:03:00.0: amdgpu: ring comp_1.0.0 uses VM inv eng 1 on hub 0
[ 5.385605] amdgpu 0000:03:00.0: amdgpu: ring comp_1.1.0 uses VM inv eng 4 on hub 0
[ 5.385612] amdgpu 0000:03:00.0: amdgpu: ring comp_1.2.0 uses VM inv eng 5 on hub 0
[ 5.385619] amdgpu 0000:03:00.0: amdgpu: ring comp_1.3.0 uses VM inv eng 6 on hub 0
[ 5.385625] amdgpu 0000:03:00.0: amdgpu: ring comp_1.0.1 uses VM inv eng 7 on hub 0
[ 5.385632] amdgpu 0000:03:00.0: amdgpu: ring comp_1.1.1 uses VM inv eng 8 on hub 0
[ 5.385639] amdgpu 0000:03:00.0: amdgpu: ring comp_1.2.1 uses VM inv eng 9 on hub 0
[ 5.385645] amdgpu 0000:03:00.0: amdgpu: ring comp_1.3.1 uses VM inv eng 10 on hub 0
[ 5.385652] amdgpu 0000:03:00.0: amdgpu: ring kiq_0.2.1.0 uses VM inv eng 11 on hub 0
[ 5.385659] amdgpu 0000:03:00.0: amdgpu: ring sdma0 uses VM inv eng 0 on hub 8
[ 5.385665] amdgpu 0000:03:00.0: amdgpu: ring vcn_dec uses VM inv eng 1 on hub 8
[ 5.385672] amdgpu 0000:03:00.0: amdgpu: ring vcn_enc0 uses VM inv eng 4 on hub 8
[ 5.385679] amdgpu 0000:03:00.0: amdgpu: ring vcn_enc1 uses VM inv eng 5 on hub 8
[ 5.385685] amdgpu 0000:03:00.0: amdgpu: ring jpeg_dec uses VM inv eng 6 on hub 8
[ 5.454665] amdgpu 0000:03:00.0: amdgpu: Runtime PM not available
[ 5.455003] amdgpu 0000:03:00.0: amdgpu: [drm] Using custom brightness curve
[ 5.455339] [drm] Initialized amdgpu 3.63.0 for 0000:03:00.0 on minor 1
[ 5.480731] fbcon: amdgpudrmfb (fb0) is primary device
[ 6.796057] amdgpu 0000:03:00.0: [drm] fb0: amdgpudrmfb frame buffer device
--
Lauri Tirkkonen | lotheac @ IRCnet
From: Ge Yang <yangge1116(a)126.com>
Since commit d228814b1913 ("efi/libstub: Add get_event_log() support
for CC platforms") reuses TPM2 support code for the CC platforms, when
launching a TDX virtual machine with coco measurement enabled, the
following error log is generated:
[Firmware Bug]: Failed to parse event in TPM Final Events Log
Call Trace:
efi_config_parse_tables()
efi_tpm_eventlog_init()
tpm2_calc_event_log_size()
__calc_tpm2_event_size()
The pcr_idx value in the Intel TDX log header is 1, causing the function
__calc_tpm2_event_size() to fail to recognize the log header, ultimately
leading to the "Failed to parse event in TPM Final Events Log" error.
According to UEFI Specification 2.10, Section 38.4.1: For TDX, TPM PCR
0 maps to MRTD, so the log header uses TPM PCR 1 instead. To successfully
parse the TDX event log header, the check for a pcr_idx value of 0
must be skipped.
According to Table 6 in Section 10.2.1 of the TCG PC Client
Specification, the index field does not require the PCR index to be
fixed at zero. Therefore, skipping the check for a pcr_idx value of
0 for CC platforms is safe.
Link: https://uefi.org/specs/UEFI/2.10/38_Confidential_Computing.html#intel-trust…
Link: https://trustedcomputinggroup.org/wp-content/uploads/TCG_PCClient_PFP_r1p05…
Fixes: d228814b1913 ("efi/libstub: Add get_event_log() support for CC platforms")
Signed-off-by: Ge Yang <yangge1116(a)126.com>
Cc: stable(a)vger.kernel.org
---
V2:
- limit the fix for CC only suggested by Jarkko and Sathyanarayanan
drivers/char/tpm/eventlog/tpm2.c | 3 ++-
drivers/firmware/efi/libstub/tpm.c | 13 +++++++++----
drivers/firmware/efi/tpm.c | 3 ++-
include/linux/tpm_eventlog.h | 14 +++++++++++---
4 files changed, 24 insertions(+), 9 deletions(-)
diff --git a/drivers/char/tpm/eventlog/tpm2.c b/drivers/char/tpm/eventlog/tpm2.c
index 37a0580..87a8b7f 100644
--- a/drivers/char/tpm/eventlog/tpm2.c
+++ b/drivers/char/tpm/eventlog/tpm2.c
@@ -36,7 +36,8 @@
static size_t calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
struct tcg_pcr_event *event_header)
{
- return __calc_tpm2_event_size(event, event_header, false);
+ return __calc_tpm2_event_size(event, event_header, false,
+ cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT));
}
static void *tpm2_bios_measurements_start(struct seq_file *m, loff_t *pos)
diff --git a/drivers/firmware/efi/libstub/tpm.c b/drivers/firmware/efi/libstub/tpm.c
index a5c6c4f..9728060 100644
--- a/drivers/firmware/efi/libstub/tpm.c
+++ b/drivers/firmware/efi/libstub/tpm.c
@@ -50,7 +50,8 @@ void efi_enable_reset_attack_mitigation(void)
static void efi_retrieve_tcg2_eventlog(int version, efi_physical_addr_t log_location,
efi_physical_addr_t log_last_entry,
efi_bool_t truncated,
- struct efi_tcg2_final_events_table *final_events_table)
+ struct efi_tcg2_final_events_table *final_events_table,
+ bool is_cc_event)
{
efi_guid_t linux_eventlog_guid = LINUX_EFI_TPM_EVENT_LOG_GUID;
efi_status_t status;
@@ -87,7 +88,8 @@ static void efi_retrieve_tcg2_eventlog(int version, efi_physical_addr_t log_loca
last_entry_size =
__calc_tpm2_event_size((void *)last_entry_addr,
(void *)(long)log_location,
- false);
+ false,
+ is_cc_event);
} else {
last_entry_size = sizeof(struct tcpa_event) +
((struct tcpa_event *) last_entry_addr)->event_size;
@@ -123,7 +125,8 @@ static void efi_retrieve_tcg2_eventlog(int version, efi_physical_addr_t log_loca
header = data + offset + final_events_size;
event_size = __calc_tpm2_event_size(header,
(void *)(long)log_location,
- false);
+ false,
+ is_cc_event);
/* If calc fails this is a malformed log */
if (!event_size)
break;
@@ -157,6 +160,7 @@ void efi_retrieve_eventlog(void)
efi_tcg2_protocol_t *tpm2 = NULL;
efi_bool_t truncated;
efi_status_t status;
+ bool is_cc_event = false;
status = efi_bs_call(locate_protocol, &tpm2_guid, NULL, (void **)&tpm2);
if (status == EFI_SUCCESS) {
@@ -186,11 +190,12 @@ void efi_retrieve_eventlog(void)
final_events_table =
get_efi_config_table(EFI_CC_FINAL_EVENTS_TABLE_GUID);
+ is_cc_event = true;
}
if (status != EFI_SUCCESS || !log_location)
return;
efi_retrieve_tcg2_eventlog(version, log_location, log_last_entry,
- truncated, final_events_table);
+ truncated, final_events_table, is_cc_event);
}
diff --git a/drivers/firmware/efi/tpm.c b/drivers/firmware/efi/tpm.c
index cdd4310..a94816d 100644
--- a/drivers/firmware/efi/tpm.c
+++ b/drivers/firmware/efi/tpm.c
@@ -23,7 +23,8 @@ static int __init tpm2_calc_event_log_size(void *data, int count, void *size_inf
while (count > 0) {
header = data + size;
- event_size = __calc_tpm2_event_size(header, size_info, true);
+ event_size = __calc_tpm2_event_size(header, size_info, true,
+ cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT));
if (event_size == 0)
return -1;
size += event_size;
diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h
index 891368e..b3380c9 100644
--- a/include/linux/tpm_eventlog.h
+++ b/include/linux/tpm_eventlog.h
@@ -143,6 +143,7 @@ struct tcg_algorithm_info {
* @event: Pointer to the event whose size should be calculated
* @event_header: Pointer to the initial event containing the digest lengths
* @do_mapping: Whether or not the event needs to be mapped
+ * @is_cc_event: Whether or not the event is from a CC platform
*
* The TPM2 event log format can contain multiple digests corresponding to
* separate PCR banks, and also contains a variable length of the data that
@@ -159,7 +160,8 @@ struct tcg_algorithm_info {
static __always_inline u32 __calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
struct tcg_pcr_event *event_header,
- bool do_mapping)
+ bool do_mapping,
+ bool is_cc_event)
{
struct tcg_efi_specid_event_head *efispecid;
struct tcg_event_field *event_field;
@@ -201,8 +203,14 @@ static __always_inline u32 __calc_tpm2_event_size(struct tcg_pcr_event2_head *ev
count = event->count;
event_type = event->event_type;
- /* Verify that it's the log header */
- if (event_header->pcr_idx != 0 ||
+ /*
+ * Verify that it's the log header. According to the TCG PC Client
+ * Specification, when identifying a log header, the check for a
+ * pcr_idx value of 0 is not required. For CC platforms, skipping
+ * this check during log header is necessary; otherwise, the CC
+ * platform's log header may fail to be recognized.
+ */
+ if ((!is_cc_event && event_header->pcr_idx != 0) ||
event_header->event_type != NO_ACTION ||
memcmp(event_header->digest, zero_digest, sizeof(zero_digest))) {
size = 0;
--
2.7.4
Now that p*d_populate_kernel{,init}() handles page table synchronization,
calling sync_global_pgds() is no longer necessary. Remove those
redundant calls.
Additionally, since arch_sync_kernel_mappings() is now the only remaining
caller of sync_global_pgds(), fold the function into its user.
Cc: stable(a)vger.kernel.org
Suggested-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Signed-off-by: Harry Yoo <harry.yoo(a)oracle.com>
---
arch/x86/mm/init_64.c | 17 ++---------------
1 file changed, 2 insertions(+), 15 deletions(-)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index e4922b9c8403..f1507de3b7a3 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -228,7 +228,7 @@ static void sync_global_pgds_l4(unsigned long start, unsigned long end)
* When memory was added make sure all the processes MM have
* suitable PGD entries in the local PGD level page.
*/
-static void sync_global_pgds(unsigned long start, unsigned long end)
+void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
{
if (pgtable_l5_enabled())
sync_global_pgds_l5(start, end);
@@ -236,11 +236,6 @@ static void sync_global_pgds(unsigned long start, unsigned long end)
sync_global_pgds_l4(start, end);
}
-void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
-{
- sync_global_pgds(start, end);
-}
-
/*
* NOTE: This function is marked __ref because it calls __init function
* (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
@@ -746,13 +741,11 @@ __kernel_physical_mapping_init(unsigned long paddr_start,
unsigned long page_size_mask,
pgprot_t prot, bool init)
{
- bool pgd_changed = false;
- unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
+ unsigned long vaddr, vaddr_end, vaddr_next, paddr_last;
paddr_last = paddr_end;
vaddr = (unsigned long)__va(paddr_start);
vaddr_end = (unsigned long)__va(paddr_end);
- vaddr_start = vaddr;
for (; vaddr < vaddr_end; vaddr = vaddr_next) {
pgd_t *pgd = pgd_offset_k(vaddr);
@@ -781,12 +774,8 @@ __kernel_physical_mapping_init(unsigned long paddr_start,
(pud_t *) p4d, init);
spin_unlock(&init_mm.page_table_lock);
- pgd_changed = true;
}
- if (pgd_changed)
- sync_global_pgds(vaddr_start, vaddr_end - 1);
-
return paddr_last;
}
@@ -1580,8 +1569,6 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
err = -ENOMEM;
} else
err = vmemmap_populate_basepages(start, end, node, NULL);
- if (!err)
- sync_global_pgds(start, end - 1);
return err;
}
--
2.43.0
Introduce and use {pgd,p4d}_populate_kernel() in core MM code when
populating PGD and P4D entries for the kernel address space.
These helpers ensure proper synchronization of page tables when
updating the kernel portion of top-level page tables.
Until now, the kernel has relied on each architecture to handle
synchronization of top-level page tables in an ad-hoc manner.
For example, see commit 9b861528a801 ("x86-64, mem: Update all PGDs for
direct mapping and vmemmap mapping changes").
However, this approach has proven fragile for following reasons:
1) It is easy to forget to perform the necessary page table
synchronization when introducing new changes.
For instance, commit 4917f55b4ef9 ("mm/sparse-vmemmap: improve memory
savings for compound devmaps") overlooked the need to synchronize
page tables for the vmemmap area.
2) It is also easy to overlook that the vmemmap and direct mapping areas
must not be accessed before explicit page table synchronization.
For example, commit 8d400913c231 ("x86/vmemmap: handle unpopulated
sub-pmd ranges")) caused crashes by accessing the vmemmap area
before calling sync_global_pgds().
To address this, as suggested by Dave Hansen, introduce _kernel() variants
of the page table population helpers, which invoke architecture-specific
hooks to properly synchronize page tables.
They reuse existing infrastructure for vmalloc and ioremap.
Synchronization requirements are determined by ARCH_PAGE_TABLE_SYNC_MASK,
and the actual synchronization is performed by arch_sync_kernel_mappings().
This change currently targets only x86_64, so only PGD and P4D level
helpers are introduced. In theory, PUD and PMD level helpers can be added
later if needed by other architectures.
Currently this is a no-op, since no architecture sets
PGTBL_{PGD,P4D}_MODIFIED in ARCH_PAGE_TABLE_SYNC_MASK.
Cc: stable(a)vger.kernel.org
Suggested-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Signed-off-by: Harry Yoo <harry.yoo(a)oracle.com>
---
include/asm-generic/pgalloc.h | 18 ++++++++++++++++--
mm/kasan/init.c | 10 +++++-----
mm/percpu.c | 4 ++--
mm/sparse-vmemmap.c | 4 ++--
4 files changed, 25 insertions(+), 11 deletions(-)
diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
index 7ff5d7ca4cd6..c05fea06b3fd 100644
--- a/include/asm-generic/pgalloc.h
+++ b/include/asm-generic/pgalloc.h
@@ -298,8 +298,8 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
/*
* Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values
- * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings()
- * needs to be called.
+ * and let generic vmalloc, ioremap and page table update code know when
+ * arch_sync_kernel_mappings() needs to be called.
*/
#ifndef ARCH_PAGE_TABLE_SYNC_MASK
#define ARCH_PAGE_TABLE_SYNC_MASK 0
@@ -312,6 +312,20 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
*/
void arch_sync_kernel_mappings(unsigned long start, unsigned long end);
+#define pgd_populate_kernel(addr, pgd, p4d) \
+do { \
+ pgd_populate(&init_mm, pgd, p4d); \
+ if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_PGD_MODIFIED) \
+ arch_sync_kernel_mappings(addr, addr); \
+} while (0)
+
+#define p4d_populate_kernel(addr, p4d, pud) \
+do { \
+ p4d_populate(&init_mm, p4d, pud); \
+ if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_P4D_MODIFIED) \
+ arch_sync_kernel_mappings(addr, addr); \
+} while (0)
+
#endif /* CONFIG_MMU */
#endif /* __ASM_GENERIC_PGALLOC_H */
diff --git a/mm/kasan/init.c b/mm/kasan/init.c
index ced6b29fcf76..43de820ee282 100644
--- a/mm/kasan/init.c
+++ b/mm/kasan/init.c
@@ -191,7 +191,7 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr,
pud_t *pud;
pmd_t *pmd;
- p4d_populate(&init_mm, p4d,
+ p4d_populate_kernel(addr, p4d,
lm_alias(kasan_early_shadow_pud));
pud = pud_offset(p4d, addr);
pud_populate(&init_mm, pud,
@@ -212,7 +212,7 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr,
} else {
p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
pud_init(p);
- p4d_populate(&init_mm, p4d, p);
+ p4d_populate_kernel(addr, p4d, p);
}
}
zero_pud_populate(p4d, addr, next);
@@ -251,10 +251,10 @@ int __ref kasan_populate_early_shadow(const void *shadow_start,
* puds,pmds, so pgd_populate(), pud_populate()
* is noops.
*/
- pgd_populate(&init_mm, pgd,
+ pgd_populate_kernel(addr, pgd,
lm_alias(kasan_early_shadow_p4d));
p4d = p4d_offset(pgd, addr);
- p4d_populate(&init_mm, p4d,
+ p4d_populate_kernel(addr, p4d,
lm_alias(kasan_early_shadow_pud));
pud = pud_offset(p4d, addr);
pud_populate(&init_mm, pud,
@@ -273,7 +273,7 @@ int __ref kasan_populate_early_shadow(const void *shadow_start,
if (!p)
return -ENOMEM;
} else {
- pgd_populate(&init_mm, pgd,
+ pgd_populate_kernel(addr, pgd,
early_alloc(PAGE_SIZE, NUMA_NO_NODE));
}
}
diff --git a/mm/percpu.c b/mm/percpu.c
index 782cc148b39c..57450a03c432 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -3134,13 +3134,13 @@ void __init __weak pcpu_populate_pte(unsigned long addr)
if (pgd_none(*pgd)) {
p4d = memblock_alloc_or_panic(P4D_TABLE_SIZE, P4D_TABLE_SIZE);
- pgd_populate(&init_mm, pgd, p4d);
+ pgd_populate_kernel(addr, pgd, p4d);
}
p4d = p4d_offset(pgd, addr);
if (p4d_none(*p4d)) {
pud = memblock_alloc_or_panic(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
- p4d_populate(&init_mm, p4d, pud);
+ p4d_populate_kernel(addr, p4d, pud);
}
pud = pud_offset(p4d, addr);
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index fd2ab5118e13..e275310ac708 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -229,7 +229,7 @@ p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
if (!p)
return NULL;
pud_init(p);
- p4d_populate(&init_mm, p4d, p);
+ p4d_populate_kernel(addr, p4d, p);
}
return p4d;
}
@@ -241,7 +241,7 @@ pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
if (!p)
return NULL;
- pgd_populate(&init_mm, pgd, p);
+ pgd_populate_kernel(addr, pgd, p);
}
return pgd;
}
--
2.43.0