Hello Greg,
Following up on our discussion during the review of kernel 4.4.138-stable, I have backported a few patches that remove the remaining FPU lazy mode deadcode from 4.4.y and 4.9.y. To avoid confusion I will send the explanation on separate e-mails:
Patches for 4.9.y:
[PATCH 1/4] x86/fpu: Remove use_eager_fpu() -> I only had to solve a small conflict caused by the fact that commit b22cbe404a9c ("x86/fpu: Fix invalid FPU ptrace state after execve()") had been applied before (it was supposed to come after)
[PATCH 2/4] x86/fpu: Remove struct fpu::counter -> This is just a git cherry-pick of 3913cc350757 ("x86/fpu: Remove struct fpu::counter") which applies cleanly on 4.9.y. You can cherry-pick it instead of applying my patch.
[PATCH 3/4] Revert "perf: sync up x86/.../cpufeatures.h" -> Sorry to revert your patch ;). This was just for the next patch to apply cleanly.
[PATCH 4/4] x86/fpu: Finish excising 'eagerfpu' -> Again you can use a cherry pick of e63650840e8b ("x86/fpu: Finish excising 'eagerfpu'") instead of applying my patch.
Tested on x86_64 only with: - ./runltp -f math (all tests pass) - IEEE 754 tests (no regressions seen) Ref: http://www.math.utah.edu/~beebe/software/ieee/ [Note] I could not prepare a x86 32-bit machine to test no387 [Note] If someone knows better about testing the FPU please let me know.
Best regards, Daniel Sangorrin
PS: Using git send-email --cc-cmd="scripts/get_maintainer.pl" for the first time. Apologies if this is not the right method.
From: Andy Lutomirski luto@kernel.org
commit c592b57347069abfc0dcad3b3a302cf882602597 upstream
This removes all the obvious code paths that depend on lazy FPU mode. It shouldn't change the generated code at all.
Signed-off-by: Andy Lutomirski luto@kernel.org Signed-off-by: Rik van Riel riel@redhat.com Cc: Borislav Petkov bp@alien8.de Cc: Brian Gerst brgerst@gmail.com Cc: Dave Hansen dave.hansen@linux.intel.com Cc: Denys Vlasenko dvlasenk@redhat.com Cc: Fenghua Yu fenghua.yu@intel.com Cc: H. Peter Anvin hpa@zytor.com Cc: Josh Poimboeuf jpoimboe@redhat.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Oleg Nesterov oleg@redhat.com Cc: Peter Zijlstra peterz@infradead.org Cc: Quentin Casasnovas quentin.casasnovas@oracle.com Cc: Thomas Gleixner tglx@linutronix.de Cc: pbonzini@redhat.com Link: http://lkml.kernel.org/r/1475627678-20788-5-git-send-email-riel@redhat.com Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Daniel Sangorrin daniel.sangorrin@toshiba.co.jp --- arch/x86/crypto/crc32c-intel_glue.c | 17 ++++------------- arch/x86/include/asm/fpu/internal.h | 34 +--------------------------------- arch/x86/kernel/fpu/core.c | 36 ++++-------------------------------- arch/x86/kernel/fpu/signal.c | 8 +++----- arch/x86/kernel/fpu/xstate.c | 9 --------- arch/x86/kvm/cpuid.c | 4 +--- arch/x86/kvm/x86.c | 10 ---------- 7 files changed, 13 insertions(+), 105 deletions(-)
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c index dd19584..5773e11 100644 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ b/arch/x86/crypto/crc32c-intel_glue.c @@ -48,21 +48,13 @@ #ifdef CONFIG_X86_64 /* * use carryless multiply version of crc32c when buffer - * size is >= 512 (when eager fpu is enabled) or - * >= 1024 (when eager fpu is disabled) to account + * size is >= 512 to account * for fpu state save/restore overhead. */ -#define CRC32C_PCL_BREAKEVEN_EAGERFPU 512 -#define CRC32C_PCL_BREAKEVEN_NOEAGERFPU 1024 +#define CRC32C_PCL_BREAKEVEN 512
asmlinkage unsigned int crc_pcl(const u8 *buffer, int len, unsigned int crc_init); -static int crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_EAGERFPU; -#define set_pcl_breakeven_point() \ -do { \ - if (!use_eager_fpu()) \ - crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU; \ -} while (0) #endif /* CONFIG_X86_64 */
static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length) @@ -185,7 +177,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data, * use faster PCL version if datasize is large enough to * overcome kernel fpu state save/restore overhead */ - if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) { + if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) { kernel_fpu_begin(); *crcp = crc_pcl(data, len, *crcp); kernel_fpu_end(); @@ -197,7 +189,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data, static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len, u8 *out) { - if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) { + if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) { kernel_fpu_begin(); *(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp)); kernel_fpu_end(); @@ -257,7 +249,6 @@ static int __init crc32c_intel_mod_init(void) alg.update = crc32c_pcl_intel_update; alg.finup = crc32c_pcl_intel_finup; alg.digest = crc32c_pcl_intel_digest; - set_pcl_breakeven_point(); } #endif return crypto_register_shash(&alg); diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 8852e3a..7801d32 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -60,11 +60,6 @@ extern u64 fpu__get_supported_xfeatures_mask(void); /* * FPU related CPU feature flag helper routines: */ -static __always_inline __pure bool use_eager_fpu(void) -{ - return true; -} - static __always_inline __pure bool use_xsaveopt(void) { return static_cpu_has(X86_FEATURE_XSAVEOPT); @@ -501,24 +496,6 @@ static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu) }
-/* - * Wrap lazy FPU TS handling in a 'hw fpregs activation/deactivation' - * idiom, which is then paired with the sw-flag (fpregs_active) later on: - */ - -static inline void __fpregs_activate_hw(void) -{ - if (!use_eager_fpu()) - clts(); -} - -static inline void __fpregs_deactivate_hw(void) -{ - if (!use_eager_fpu()) - stts(); -} - -/* Must be paired with an 'stts' (fpregs_deactivate_hw()) after! */ static inline void __fpregs_deactivate(struct fpu *fpu) { WARN_ON_FPU(!fpu->fpregs_active); @@ -528,7 +505,6 @@ static inline void __fpregs_deactivate(struct fpu *fpu) trace_x86_fpu_regs_deactivated(fpu); }
-/* Must be paired with a 'clts' (fpregs_activate_hw()) before! */ static inline void __fpregs_activate(struct fpu *fpu) { WARN_ON_FPU(fpu->fpregs_active); @@ -554,22 +530,17 @@ static inline int fpregs_active(void) }
/* - * Encapsulate the CR0.TS handling together with the - * software flag. - * * These generally need preemption protection to work, * do try to avoid using these on their own. */ static inline void fpregs_activate(struct fpu *fpu) { - __fpregs_activate_hw(); __fpregs_activate(fpu); }
static inline void fpregs_deactivate(struct fpu *fpu) { __fpregs_deactivate(fpu); - __fpregs_deactivate_hw(); }
/* @@ -596,8 +567,7 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) * or if the past 5 consecutive context-switches used math. */ fpu.preload = static_cpu_has(X86_FEATURE_FPU) && - new_fpu->fpstate_active && - (use_eager_fpu() || new_fpu->counter > 5); + new_fpu->fpstate_active;
if (old_fpu->fpregs_active) { if (!copy_fpregs_to_fpstate(old_fpu)) @@ -615,8 +585,6 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) __fpregs_activate(new_fpu); trace_x86_fpu_regs_activated(new_fpu); prefetch(&new_fpu->state); - } else { - __fpregs_deactivate_hw(); } } else { old_fpu->counter = 0; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 96d80df..2dc1927 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -58,27 +58,9 @@ static bool kernel_fpu_disabled(void) return this_cpu_read(in_kernel_fpu); }
-/* - * Were we in an interrupt that interrupted kernel mode? - * - * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that - * pair does nothing at all: the thread must not have fpu (so - * that we don't try to save the FPU state), and TS must - * be set (so that the clts/stts pair does nothing that is - * visible in the interrupted kernel thread). - * - * Except for the eagerfpu case when we return true; in the likely case - * the thread has FPU but we are not going to set/clear TS. - */ static bool interrupted_kernel_fpu_idle(void) { - if (kernel_fpu_disabled()) - return false; - - if (use_eager_fpu()) - return true; - - return !current->thread.fpu.fpregs_active && (read_cr0() & X86_CR0_TS); + return !kernel_fpu_disabled(); }
/* @@ -126,7 +108,6 @@ void __kernel_fpu_begin(void) copy_fpregs_to_fpstate(fpu); } else { this_cpu_write(fpu_fpregs_owner_ctx, NULL); - __fpregs_activate_hw(); } } EXPORT_SYMBOL(__kernel_fpu_begin); @@ -137,8 +118,6 @@ void __kernel_fpu_end(void)
if (fpu->fpregs_active) copy_kernel_to_fpregs(&fpu->state); - else - __fpregs_deactivate_hw();
kernel_fpu_enable(); } @@ -200,10 +179,7 @@ void fpu__save(struct fpu *fpu) trace_x86_fpu_before_save(fpu); if (fpu->fpregs_active) { if (!copy_fpregs_to_fpstate(fpu)) { - if (use_eager_fpu()) - copy_kernel_to_fpregs(&fpu->state); - else - fpregs_deactivate(fpu); + copy_kernel_to_fpregs(&fpu->state); } } trace_x86_fpu_after_save(fpu); @@ -261,8 +237,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) * Don't let 'init optimized' areas of the XSAVE area * leak into the child task: */ - if (use_eager_fpu()) - memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size); + memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
/* * Save current FPU registers directly into the child @@ -284,10 +259,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size);
- if (use_eager_fpu()) - copy_kernel_to_fpregs(&src_fpu->state); - else - fpregs_deactivate(src_fpu); + copy_kernel_to_fpregs(&src_fpu->state); } preempt_enable();
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 3ec0d2d..3a93186 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -344,11 +344,9 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) }
fpu->fpstate_active = 1; - if (use_eager_fpu()) { - preempt_disable(); - fpu__restore(fpu); - preempt_enable(); - } + preempt_disable(); + fpu__restore(fpu); + preempt_enable();
return err; } else { diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index abfbb61b..e9d7f46 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -890,15 +890,6 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, */ if (!boot_cpu_has(X86_FEATURE_OSPKE)) return -EINVAL; - /* - * For most XSAVE components, this would be an arduous task: - * brining fpstate up to date with fpregs, updating fpstate, - * then re-populating fpregs. But, for components that are - * never lazily managed, we can just access the fpregs - * directly. PKRU is never managed lazily, so we can just - * manipulate it directly. Make sure it stays that way. - */ - WARN_ON_ONCE(!use_eager_fpu());
/* Set the bits we need in PKRU: */ if (init_val & PKEY_DISABLE_ACCESS) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 7e5119c..c17d389 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -16,7 +16,6 @@ #include <linux/export.h> #include <linux/vmalloc.h> #include <linux/uaccess.h> -#include <asm/fpu/internal.h> /* For use_eager_fpu. Ugh! */ #include <asm/user.h> #include <asm/fpu/xstate.h> #include "cpuid.h" @@ -114,8 +113,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
- if (use_eager_fpu()) - kvm_x86_ops->fpu_activate(vcpu); + kvm_x86_ops->fpu_activate(vcpu);
/* * The existing code assumes virtual address is 48-bit in the canonical diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5ca23af..0754dae 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7513,16 +7513,6 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu); __kernel_fpu_end(); ++vcpu->stat.fpu_reload; - /* - * If using eager FPU mode, or if the guest is a frequent user - * of the FPU, just leave the FPU active for next time. - * Every 255 times fpu_counter rolls over to 0; a guest that uses - * the FPU in bursts will revert to loading it on demand. - */ - if (!use_eager_fpu()) { - if (++vcpu->fpu_counter < 5) - kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); - } trace_kvm_fpu(0); }
From: Rik van Riel riel@redhat.com
With the lazy FPU code gone, we no longer use the counter field in struct fpu for anything. Get rid it.
Signed-off-by: Rik van Riel riel@redhat.com Reviewed-by: Andy Lutomirski luto@kernel.org Cc: Borislav Petkov bp@alien8.de Cc: Brian Gerst brgerst@gmail.com Cc: Dave Hansen dave.hansen@linux.intel.com Cc: Denys Vlasenko dvlasenk@redhat.com Cc: Fenghua Yu fenghua.yu@intel.com Cc: H. Peter Anvin hpa@zytor.com Cc: Josh Poimboeuf jpoimboe@redhat.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Oleg Nesterov oleg@redhat.com Cc: Peter Zijlstra peterz@infradead.org Cc: Quentin Casasnovas quentin.casasnovas@oracle.com Cc: Thomas Gleixner tglx@linutronix.de Cc: pbonzini@redhat.com Link: http://lkml.kernel.org/r/1475627678-20788-6-git-send-email-riel@redhat.com Signed-off-by: Ingo Molnar mingo@kernel.org --- arch/x86/include/asm/fpu/internal.h | 3 --- arch/x86/include/asm/fpu/types.h | 11 ----------- arch/x86/include/asm/trace/fpu.h | 5 +---- arch/x86/kernel/fpu/core.c | 3 --- 4 files changed, 1 insertion(+), 21 deletions(-)
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 7801d32..499d6ed 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -581,16 +581,13 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
/* Don't change CR0.TS if we just switch! */ if (fpu.preload) { - new_fpu->counter++; __fpregs_activate(new_fpu); trace_x86_fpu_regs_activated(new_fpu); prefetch(&new_fpu->state); } } else { - old_fpu->counter = 0; old_fpu->last_cpu = -1; if (fpu.preload) { - new_fpu->counter++; if (fpu_want_lazy_restore(new_fpu, cpu)) fpu.preload = 0; else diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 48df486..e31332d 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -322,17 +322,6 @@ struct fpu { unsigned char fpregs_active;
/* - * @counter: - * - * This counter contains the number of consecutive context switches - * during which the FPU stays used. If this is over a threshold, the - * lazy FPU restore logic becomes eager, to save the trap overhead. - * This is an unsigned char so that after 256 iterations the counter - * wraps and the context switch behavior turns lazy again; this is to - * deal with bursty apps that only use the FPU for a short time: - */ - unsigned char counter; - /* * @state: * * In-memory copy of all FPU registers that we save/restore diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h index 9217ab1..342e597 100644 --- a/arch/x86/include/asm/trace/fpu.h +++ b/arch/x86/include/asm/trace/fpu.h @@ -14,7 +14,6 @@ DECLARE_EVENT_CLASS(x86_fpu, __field(struct fpu *, fpu) __field(bool, fpregs_active) __field(bool, fpstate_active) - __field(int, counter) __field(u64, xfeatures) __field(u64, xcomp_bv) ), @@ -23,17 +22,15 @@ DECLARE_EVENT_CLASS(x86_fpu, __entry->fpu = fpu; __entry->fpregs_active = fpu->fpregs_active; __entry->fpstate_active = fpu->fpstate_active; - __entry->counter = fpu->counter; if (boot_cpu_has(X86_FEATURE_OSXSAVE)) { __entry->xfeatures = fpu->state.xsave.header.xfeatures; __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv; } ), - TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d counter: %d xfeatures: %llx xcomp_bv: %llx", + TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d xfeatures: %llx xcomp_bv: %llx", __entry->fpu, __entry->fpregs_active, __entry->fpstate_active, - __entry->counter, __entry->xfeatures, __entry->xcomp_bv ) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 2dc1927..76c094c 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -224,7 +224,6 @@ EXPORT_SYMBOL_GPL(fpstate_init);
int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) { - dst_fpu->counter = 0; dst_fpu->fpregs_active = 0; dst_fpu->last_cpu = -1;
@@ -432,7 +431,6 @@ void fpu__restore(struct fpu *fpu) trace_x86_fpu_before_restore(fpu); fpregs_activate(fpu); copy_kernel_to_fpregs(&fpu->state); - fpu->counter++; trace_x86_fpu_after_restore(fpu); kernel_fpu_enable(); } @@ -450,7 +448,6 @@ EXPORT_SYMBOL_GPL(fpu__restore); void fpu__drop(struct fpu *fpu) { preempt_disable(); - fpu->counter = 0;
if (fpu->fpregs_active) { /* Ignore delayed exceptions from user space */
This reverts commit f09a7b0eead737b33d940bf5c2509ca1441e9590. so that it does not conflict with e63650840e8b ("x86/fpu: Finish excising 'eagerfpu'")
Signed-off-by: Daniel Sangorrin daniel.sangorrin@toshiba.co.jp --- tools/arch/x86/include/asm/cpufeatures.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index aea30af..c278f27 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -104,7 +104,7 @@ #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ #define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ -/* free, was #define X86_FEATURE_EAGER_FPU ( 3*32+29) * "eagerfpu" Non lazy FPU restore */ +#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */ #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
From: Andy Lutomirski luto@kernel.org
Now that eagerfpu= is gone, remove it from the docs and some comments. Also sync the changes to tools/.
Signed-off-by: Andy Lutomirski luto@kernel.org Cc: Borislav Petkov bp@alien8.de Cc: Brian Gerst brgerst@gmail.com Cc: Dave Hansen dave.hansen@linux.intel.com Cc: Denys Vlasenko dvlasenk@redhat.com Cc: Fenghua Yu fenghua.yu@intel.com Cc: H. Peter Anvin hpa@zytor.com Cc: Josh Poimboeuf jpoimboe@redhat.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Oleg Nesterov oleg@redhat.com Cc: Peter Zijlstra peterz@infradead.org Cc: Quentin Casasnovas quentin.casasnovas@oracle.com Cc: Rik van Riel riel@redhat.com Cc: Thomas Gleixner tglx@linutronix.de Link: http://lkml.kernel.org/r/cf430dd4481d41280e93ac6cf0def1007a67fc8e.1476740397... Signed-off-by: Ingo Molnar mingo@kernel.org --- Documentation/kernel-parameters.txt | 6 ------ arch/x86/include/asm/cpufeatures.h | 1 - arch/x86/include/asm/fpu/types.h | 23 ----------------------- arch/x86/mm/pkeys.c | 3 +-- tools/arch/x86/include/asm/cpufeatures.h | 1 - 5 files changed, 1 insertion(+), 33 deletions(-)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 52240a6..cb6e169 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1084,12 +1084,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nopku [X86] Disable Memory Protection Keys CPU feature found in some Intel CPUs.
- eagerfpu= [X86] - on enable eager fpu restore - off disable eager fpu restore - auto selects the default scheme, which automatically - enables eagerfpu restore for xsaveopt. - module.async_probe [KNL] Enable asynchronous probe on this module.
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index aea30af..3d5df1c1 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -104,7 +104,6 @@ #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ #define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ -/* free, was #define X86_FEATURE_EAGER_FPU ( 3*32+29) * "eagerfpu" Non lazy FPU restore */ #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index e31332d..3c80f5b 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -329,29 +329,6 @@ struct fpu { * the registers in the FPU are more recent than this state * copy. If the task context-switches away then they get * saved here and represent the FPU state. - * - * After context switches there may be a (short) time period - * during which the in-FPU hardware registers are unchanged - * and still perfectly match this state, if the tasks - * scheduled afterwards are not using the FPU. - * - * This is the 'lazy restore' window of optimization, which - * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'. - * - * We detect whether a subsequent task uses the FPU via setting - * CR0::TS to 1, which causes any FPU use to raise a #NM fault. - * - * During this window, if the task gets scheduled again, we - * might be able to skip having to do a restore from this - * memory buffer to the hardware registers - at the cost of - * incurring the overhead of #NM fault traps. - * - * Note that on modern CPUs that support the XSAVEOPT (or other - * optimized XSAVE instructions), we don't use #NM traps anymore, - * as the hardware can track whether FPU registers need saving - * or not. On such CPUs we activate the non-lazy ('eagerfpu') - * logic, which unconditionally saves/restores all FPU state - * across context switches. (if FPU state exists.) */ union fpregs_state state; /* diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c index 0bbec04..e2d2b3c 100644 --- a/arch/x86/mm/pkeys.c +++ b/arch/x86/mm/pkeys.c @@ -142,8 +142,7 @@ u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) | * Called from the FPU code when creating a fresh set of FPU * registers. This is called from a very specific context where * we know the FPU regstiers are safe for use and we can use PKRU - * directly. The fact that PKRU is only available when we are - * using eagerfpu mode makes this possible. + * directly. */ void copy_init_pkru_to_fpregs(void) { diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index c278f27..3d5df1c1 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -104,7 +104,6 @@ #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ #define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ -#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */ #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
linux-stable-mirror@lists.linaro.org