When using Secure TSC, the GUEST_TSC_FREQ MSR reports a frequency based on the nominal P0 frequency, which deviates slightly (typically ~0.2%) from the actual mean TSC frequency due to clocking parameters. Over extended VM uptime, this discrepancy accumulates, causing clock skew between the hypervisor and SEV-SNP VM, leading to early timer interrupts as perceived by the guest.
The guest kernel relies on the reported nominal frequency for TSC-based timekeeping, while the actual frequency set during SNP_LAUNCH_START may differ. This mismatch results in inaccurate time calculations, causing the guest to perceive hrtimers as firing earlier than expected.
Utilize the TSC_FACTOR from the SEV firmware's secrets page (see "Secrets Page Format" in the SNP Firmware ABI Specification) to calculate the mean TSC frequency, ensuring accurate timekeeping and mitigating clock skew in SEV-SNP VMs.
Use early_ioremap_encrypted() to map the secrets page as ioremap_encrypted() uses kmalloc() which is not available during early TSC initialization and causes a panic.
Fixes: 73bbf3b0fbba ("x86/tsc: Init the TSC for Secure TSC guests") Cc: stable@vger.kernel.org Signed-off-by: Nikunj A Dadhania nikunj@amd.com
--- v3: * Remove unnecessary parenthesis (Ingo) * To avoid type cast, harmonize the types of snp_tsc_freq_khz and securetsc_get_tsc_khz() (Ingo) * Use rdmsr for GUEST_TSC_FREQ and extract BIT[17:0] from lower 32-bit
v2: * Move the SNP TSC scaling constant to the header (Dionna) * Drop the unsigned long cast and add in securetsc_get_tsc_khz (Tom) * Drop the RB from Tom as the code has changed --- arch/x86/include/asm/sev.h | 18 +++++++++++++++++- arch/x86/coco/sev/core.c | 22 ++++++++++++++++++---- 2 files changed, 35 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index a81769a32eaa..cfa3ace227e6 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -223,6 +223,19 @@ struct snp_tsc_info_resp { u8 rsvd2[100]; } __packed;
+ +/* + * Obtain the mean TSC frequency by decreasing the nominal TSC frequency with + * TSC_FACTOR as documented in the SNP Firmware ABI specification: + * + * GUEST_TSC_FREQ * (1 - (TSC_FACTOR * 0.00001)) + * + * which is equivalent to: + * + * GUEST_TSC_FREQ -= (GUEST_TSC_FREQ * TSC_FACTOR) / 100000; + */ +#define SNP_SCALE_TSC_FREQ(freq, factor) ((freq) - (freq) * (factor) / 100000) + struct snp_guest_req { void *req_buf; size_t req_sz; @@ -283,8 +296,11 @@ struct snp_secrets_page { u8 svsm_guest_vmpl; u8 rsvd3[3];
+ /* The percentage decrease from nominal to mean TSC frequency. */ + u32 tsc_factor; + /* Remainder of page */ - u8 rsvd4[3744]; + u8 rsvd4[3740]; } __packed;
struct snp_msg_desc { diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index 46bd89578ec7..115a5750c40d 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -88,7 +88,7 @@ static const char * const sev_status_feat_names[] = { */ static u64 snp_tsc_scale __ro_after_init; static u64 snp_tsc_offset __ro_after_init; -static u64 snp_tsc_freq_khz __ro_after_init; +static unsigned long snp_tsc_freq_khz __ro_after_init;
DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa); @@ -2174,15 +2174,29 @@ static unsigned long securetsc_get_tsc_khz(void)
void __init snp_secure_tsc_init(void) { - unsigned long long tsc_freq_mhz; + unsigned long tsc_freq_mhz, dummy; + struct snp_secrets_page *secrets; + void *mem;
if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) return;
+ mem = early_memremap_encrypted(sev_secrets_pa, PAGE_SIZE); + if (!mem) { + pr_err("Unable to get TSC_FACTOR: failed to map the SNP secrets page.\n"); + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC); + } + + secrets = (__force struct snp_secrets_page *)mem; + setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); - rdmsrq(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz); - snp_tsc_freq_khz = (unsigned long)(tsc_freq_mhz * 1000); + rdmsr(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz, dummy); + /* Extract the GUEST TSC MHZ from BIT[17:0], rest is reserved space */ + tsc_freq_mhz = tsc_freq_mhz & GENMASK_ULL(17, 0); + snp_tsc_freq_khz = SNP_SCALE_TSC_FREQ(tsc_freq_mhz * 1000, secrets->tsc_factor);
x86_platform.calibrate_cpu = securetsc_get_tsc_khz; x86_platform.calibrate_tsc = securetsc_get_tsc_khz; + + early_memunmap(mem, PAGE_SIZE); }
base-commit: 4da71e9f8939987cd2063e0b2ab5bb5eafc80a87
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: 4a35d2b5254af89595fd90dae9ee0c8f990a148d Gitweb: https://git.kernel.org/tip/4a35d2b5254af89595fd90dae9ee0c8f990a148d Author: Nikunj A Dadhania nikunj@amd.com AuthorDate: Mon, 30 Jun 2025 13:48:58 +05:30 Committer: Borislav Petkov (AMD) bp@alien8.de CommitterDate: Mon, 30 Jun 2025 11:11:46 +02:00
x86/sev: Use TSC_FACTOR for Secure TSC frequency calculation
When using Secure TSC, the GUEST_TSC_FREQ MSR reports a frequency based on the nominal P0 frequency, which deviates slightly (typically ~0.2%) from the actual mean TSC frequency due to clocking parameters.
Over extended VM uptime, this discrepancy accumulates, causing clock skew between the hypervisor and a SEV-SNP VM, leading to early timer interrupts as perceived by the guest.
The guest kernel relies on the reported nominal frequency for TSC-based timekeeping, while the actual frequency set during SNP_LAUNCH_START may differ. This mismatch results in inaccurate time calculations, causing the guest to perceive hrtimers as firing earlier than expected.
Utilize the TSC_FACTOR from the SEV firmware's secrets page (see "Secrets Page Format" in the SNP Firmware ABI Specification) to calculate the mean TSC frequency, ensuring accurate timekeeping and mitigating clock skew in SEV-SNP VMs.
Use early_ioremap_encrypted() to map the secrets page as ioremap_encrypted() uses kmalloc() which is not available during early TSC initialization and causes a panic.
Fixes: 73bbf3b0fbba ("x86/tsc: Init the TSC for Secure TSC guests") Signed-off-by: Nikunj A Dadhania nikunj@amd.com Signed-off-by: Borislav Petkov (AMD) bp@alien8.de Cc: stable@vger.kernel.org Link: https://lore.kernel.org/20250630081858.485187-1-nikunj@amd.com --- arch/x86/coco/sev/core.c | 22 ++++++++++++++++++---- arch/x86/include/asm/sev.h | 18 +++++++++++++++++- 2 files changed, 35 insertions(+), 5 deletions(-)
diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index b6db4e0..47d10d9 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -88,7 +88,7 @@ static const char * const sev_status_feat_names[] = { */ static u64 snp_tsc_scale __ro_after_init; static u64 snp_tsc_offset __ro_after_init; -static u64 snp_tsc_freq_khz __ro_after_init; +static unsigned long snp_tsc_freq_khz __ro_after_init;
DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa); @@ -2167,15 +2167,29 @@ static unsigned long securetsc_get_tsc_khz(void)
void __init snp_secure_tsc_init(void) { - unsigned long long tsc_freq_mhz; + unsigned long tsc_freq_mhz, dummy; + struct snp_secrets_page *secrets; + void *mem;
if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) return;
+ mem = early_memremap_encrypted(sev_secrets_pa, PAGE_SIZE); + if (!mem) { + pr_err("Unable to get TSC_FACTOR: failed to map the SNP secrets page.\n"); + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC); + } + + secrets = (__force struct snp_secrets_page *)mem; + setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); - rdmsrq(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz); - snp_tsc_freq_khz = (unsigned long)(tsc_freq_mhz * 1000); + rdmsr(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz, dummy); + /* Extract the GUEST TSC MHZ from BIT[17:0], rest is reserved space */ + tsc_freq_mhz = tsc_freq_mhz & GENMASK_ULL(17, 0); + snp_tsc_freq_khz = SNP_SCALE_TSC_FREQ(tsc_freq_mhz * 1000, secrets->tsc_factor);
x86_platform.calibrate_cpu = securetsc_get_tsc_khz; x86_platform.calibrate_tsc = securetsc_get_tsc_khz; + + early_memunmap(mem, PAGE_SIZE); } diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 58e028d..2624947 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -223,6 +223,19 @@ struct snp_tsc_info_resp { u8 rsvd2[100]; } __packed;
+ +/* + * Obtain the mean TSC frequency by decreasing the nominal TSC frequency with + * TSC_FACTOR as documented in the SNP Firmware ABI specification: + * + * GUEST_TSC_FREQ * (1 - (TSC_FACTOR * 0.00001)) + * + * which is equivalent to: + * + * GUEST_TSC_FREQ -= (GUEST_TSC_FREQ * TSC_FACTOR) / 100000; + */ +#define SNP_SCALE_TSC_FREQ(freq, factor) ((freq) - (freq) * (factor) / 100000) + struct snp_guest_req { void *req_buf; size_t req_sz; @@ -282,8 +295,11 @@ struct snp_secrets_page { u8 svsm_guest_vmpl; u8 rsvd3[3];
+ /* The percentage decrease from nominal to mean TSC frequency. */ + u32 tsc_factor; + /* Remainder of page */ - u8 rsvd4[3744]; + u8 rsvd4[3740]; } __packed;
struct snp_msg_desc {
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: 52e1a03e6cf61ae165f59f41c44394a653a0a788 Gitweb: https://git.kernel.org/tip/52e1a03e6cf61ae165f59f41c44394a653a0a788 Author: Nikunj A Dadhania nikunj@amd.com AuthorDate: Mon, 30 Jun 2025 13:48:58 +05:30 Committer: Borislav Petkov (AMD) bp@alien8.de CommitterDate: Tue, 01 Jul 2025 00:29:27 +02:00
x86/sev: Use TSC_FACTOR for Secure TSC frequency calculation
When using Secure TSC, the GUEST_TSC_FREQ MSR reports a frequency based on the nominal P0 frequency, which deviates slightly (typically ~0.2%) from the actual mean TSC frequency due to clocking parameters.
Over extended VM uptime, this discrepancy accumulates, causing clock skew between the hypervisor and a SEV-SNP VM, leading to early timer interrupts as perceived by the guest.
The guest kernel relies on the reported nominal frequency for TSC-based timekeeping, while the actual frequency set during SNP_LAUNCH_START may differ. This mismatch results in inaccurate time calculations, causing the guest to perceive hrtimers as firing earlier than expected.
Utilize the TSC_FACTOR from the SEV firmware's secrets page (see "Secrets Page Format" in the SNP Firmware ABI Specification) to calculate the mean TSC frequency, ensuring accurate timekeeping and mitigating clock skew in SEV-SNP VMs.
Use early_ioremap_encrypted() to map the secrets page as ioremap_encrypted() uses kmalloc() which is not available during early TSC initialization and causes a panic.
[ bp: Drop the silly dummy var: https://lore.kernel.org/r/20250630192726.GBaGLlHl84xIopx4Pt@fat_crate.local ]
Fixes: 73bbf3b0fbba ("x86/tsc: Init the TSC for Secure TSC guests") Signed-off-by: Nikunj A Dadhania nikunj@amd.com Signed-off-by: Borislav Petkov (AMD) bp@alien8.de Cc: stable@vger.kernel.org Link: https://lore.kernel.org/20250630081858.485187-1-nikunj@amd.com --- arch/x86/coco/sev/core.c | 22 +++++++++++++++++++--- arch/x86/include/asm/sev.h | 17 ++++++++++++++++- 2 files changed, 35 insertions(+), 4 deletions(-)
diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index b6db4e0..7543a8b 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -88,7 +88,7 @@ static const char * const sev_status_feat_names[] = { */ static u64 snp_tsc_scale __ro_after_init; static u64 snp_tsc_offset __ro_after_init; -static u64 snp_tsc_freq_khz __ro_after_init; +static unsigned long snp_tsc_freq_khz __ro_after_init;
DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa); @@ -2167,15 +2167,31 @@ static unsigned long securetsc_get_tsc_khz(void)
void __init snp_secure_tsc_init(void) { - unsigned long long tsc_freq_mhz; + struct snp_secrets_page *secrets; + unsigned long tsc_freq_mhz; + void *mem;
if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) return;
+ mem = early_memremap_encrypted(sev_secrets_pa, PAGE_SIZE); + if (!mem) { + pr_err("Unable to get TSC_FACTOR: failed to map the SNP secrets page.\n"); + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC); + } + + secrets = (__force struct snp_secrets_page *)mem; + setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); rdmsrq(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz); - snp_tsc_freq_khz = (unsigned long)(tsc_freq_mhz * 1000); + + /* Extract the GUEST TSC MHZ from BIT[17:0], rest is reserved space */ + tsc_freq_mhz &= GENMASK_ULL(17, 0); + + snp_tsc_freq_khz = SNP_SCALE_TSC_FREQ(tsc_freq_mhz * 1000, secrets->tsc_factor);
x86_platform.calibrate_cpu = securetsc_get_tsc_khz; x86_platform.calibrate_tsc = securetsc_get_tsc_khz; + + early_memunmap(mem, PAGE_SIZE); } diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 58e028d..a631f7d 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -223,6 +223,18 @@ struct snp_tsc_info_resp { u8 rsvd2[100]; } __packed;
+/* + * Obtain the mean TSC frequency by decreasing the nominal TSC frequency with + * TSC_FACTOR as documented in the SNP Firmware ABI specification: + * + * GUEST_TSC_FREQ * (1 - (TSC_FACTOR * 0.00001)) + * + * which is equivalent to: + * + * GUEST_TSC_FREQ -= (GUEST_TSC_FREQ * TSC_FACTOR) / 100000; + */ +#define SNP_SCALE_TSC_FREQ(freq, factor) ((freq) - (freq) * (factor) / 100000) + struct snp_guest_req { void *req_buf; size_t req_sz; @@ -282,8 +294,11 @@ struct snp_secrets_page { u8 svsm_guest_vmpl; u8 rsvd3[3];
+ /* The percentage decrease from nominal to mean TSC frequency. */ + u32 tsc_factor; + /* Remainder of page */ - u8 rsvd4[3744]; + u8 rsvd4[3740]; } __packed;
struct snp_msg_desc {
linux-stable-mirror@lists.linaro.org