Hello Greg,
This series contains patches that complete the removal of FPU lazy-mode code, documentation and comments for kernel 4.4.y.
The patches are mostly cleanups and they can be useful to backport future patches if more bugs on the FPU code are found.
[PATCH v4 4.4 1/4] KVM: x86: remove eager_fpu field of struct [PATCH v4 4.4 2/4] x86/fpu: Remove use_eager_fpu() [PATCH v4 4.4 3/4] x86/fpu: Remove struct fpu::counter [PATCH v4 4.4 4/4] x86/fpu: Finish excising 'eagerfpu'
Diffstat: Documentation/kernel-parameters.txt | 5 ----- arch/x86/crypto/crc32c-intel_glue.c | 17 ++++------------- arch/x86/include/asm/cpufeatures.h | 1 - arch/x86/include/asm/fpu/internal.h | 37 +------------------------------------ arch/x86/include/asm/fpu/types.h | 34 ---------------------------------- arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kernel/fpu/core.c | 41 +++++------------------------------------ arch/x86/kernel/fpu/signal.c | 8 +++----- arch/x86/kvm/cpuid.c | 5 +---- arch/x86/kvm/x86.c | 10 ---------- 10 files changed, 14 insertions(+), 145 deletions(-)
Thanks, Daniel
From: Paolo Bonzini pbonzini@redhat.com
commit 5a5fbdc0e3f1159a734f1890da60fce70e98271d upstream.
It is now equal to use_eager_fpu(), which simply tests a cpufeature bit.
Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Daniel Sangorrin daniel.sangorrin@toshiba.co.jp --- arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/cpuid.c | 3 +-- arch/x86/kvm/x86.c | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 74fda1a..3a37cdb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -439,7 +439,6 @@ struct kvm_vcpu_arch { struct kvm_mmu_memory_cache mmu_page_header_cache;
struct fpu guest_fpu; - bool eager_fpu; u64 xcr0; u64 guest_supported_xcr0; u32 guest_xstate_size; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 83d6369..7b4ea5e 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -104,8 +104,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
- vcpu->arch.eager_fpu = use_eager_fpu(); - if (vcpu->arch.eager_fpu) + if (use_eager_fpu()) kvm_x86_ops->fpu_activate(vcpu);
/* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 53d43d2..660d8a0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7325,7 +7325,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) * Every 255 times fpu_counter rolls over to 0; a guest that uses * the FPU in bursts will revert to loading it on demand. */ - if (!vcpu->arch.eager_fpu) { + if (!use_eager_fpu()) { if (++vcpu->fpu_counter < 5) kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); }
From: Andy Lutomirski luto@kernel.org
commit c592b57347069abfc0dcad3b3a302cf882602597 upstream.
This removes all the obvious code paths that depend on lazy FPU mode. It shouldn't change the generated code at all.
Signed-off-by: Andy Lutomirski luto@kernel.org Signed-off-by: Rik van Riel riel@redhat.com Cc: Borislav Petkov bp@alien8.de Cc: Brian Gerst brgerst@gmail.com Cc: Dave Hansen dave.hansen@linux.intel.com Cc: Denys Vlasenko dvlasenk@redhat.com Cc: Fenghua Yu fenghua.yu@intel.com Cc: H. Peter Anvin hpa@zytor.com Cc: Josh Poimboeuf jpoimboe@redhat.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Oleg Nesterov oleg@redhat.com Cc: Peter Zijlstra peterz@infradead.org Cc: Quentin Casasnovas quentin.casasnovas@oracle.com Cc: Thomas Gleixner tglx@linutronix.de Cc: pbonzini@redhat.com Link: http://lkml.kernel.org/r/1475627678-20788-5-git-send-email-riel@redhat.com Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Daniel Sangorrin daniel.sangorrin@toshiba.co.jp --- arch/x86/crypto/crc32c-intel_glue.c | 17 ++++------------- arch/x86/include/asm/fpu/internal.h | 34 +-------------------------------- arch/x86/kernel/fpu/core.c | 38 +++++-------------------------------- arch/x86/kernel/fpu/signal.c | 8 +++----- arch/x86/kvm/cpuid.c | 4 +--- arch/x86/kvm/x86.c | 10 ---------- 6 files changed, 14 insertions(+), 97 deletions(-)
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c index 715399b..c194d57 100644 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ b/arch/x86/crypto/crc32c-intel_glue.c @@ -48,21 +48,13 @@ #ifdef CONFIG_X86_64 /* * use carryless multiply version of crc32c when buffer - * size is >= 512 (when eager fpu is enabled) or - * >= 1024 (when eager fpu is disabled) to account + * size is >= 512 to account * for fpu state save/restore overhead. */ -#define CRC32C_PCL_BREAKEVEN_EAGERFPU 512 -#define CRC32C_PCL_BREAKEVEN_NOEAGERFPU 1024 +#define CRC32C_PCL_BREAKEVEN 512
asmlinkage unsigned int crc_pcl(const u8 *buffer, int len, unsigned int crc_init); -static int crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_EAGERFPU; -#define set_pcl_breakeven_point() \ -do { \ - if (!use_eager_fpu()) \ - crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU; \ -} while (0) #endif /* CONFIG_X86_64 */
static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length) @@ -185,7 +177,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data, * use faster PCL version if datasize is large enough to * overcome kernel fpu state save/restore overhead */ - if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) { + if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) { kernel_fpu_begin(); *crcp = crc_pcl(data, len, *crcp); kernel_fpu_end(); @@ -197,7 +189,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data, static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len, u8 *out) { - if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) { + if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) { kernel_fpu_begin(); *(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp)); kernel_fpu_end(); @@ -256,7 +248,6 @@ static int __init crc32c_intel_mod_init(void) alg.update = crc32c_pcl_intel_update; alg.finup = crc32c_pcl_intel_finup; alg.digest = crc32c_pcl_intel_digest; - set_pcl_breakeven_point(); } #endif return crypto_register_shash(&alg); diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index ec2aedb..2ff03cc 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -57,11 +57,6 @@ extern u64 fpu__get_supported_xfeatures_mask(void); /* * FPU related CPU feature flag helper routines: */ -static __always_inline __pure bool use_eager_fpu(void) -{ - return true; -} - static __always_inline __pure bool use_xsaveopt(void) { return static_cpu_has(X86_FEATURE_XSAVEOPT); @@ -498,24 +493,6 @@ static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu) }
-/* - * Wrap lazy FPU TS handling in a 'hw fpregs activation/deactivation' - * idiom, which is then paired with the sw-flag (fpregs_active) later on: - */ - -static inline void __fpregs_activate_hw(void) -{ - if (!use_eager_fpu()) - clts(); -} - -static inline void __fpregs_deactivate_hw(void) -{ - if (!use_eager_fpu()) - stts(); -} - -/* Must be paired with an 'stts' (fpregs_deactivate_hw()) after! */ static inline void __fpregs_deactivate(struct fpu *fpu) { WARN_ON_FPU(!fpu->fpregs_active); @@ -524,7 +501,6 @@ static inline void __fpregs_deactivate(struct fpu *fpu) this_cpu_write(fpu_fpregs_owner_ctx, NULL); }
-/* Must be paired with a 'clts' (fpregs_activate_hw()) before! */ static inline void __fpregs_activate(struct fpu *fpu) { WARN_ON_FPU(fpu->fpregs_active); @@ -549,22 +525,17 @@ static inline int fpregs_active(void) }
/* - * Encapsulate the CR0.TS handling together with the - * software flag. - * * These generally need preemption protection to work, * do try to avoid using these on their own. */ static inline void fpregs_activate(struct fpu *fpu) { - __fpregs_activate_hw(); __fpregs_activate(fpu); }
static inline void fpregs_deactivate(struct fpu *fpu) { __fpregs_deactivate(fpu); - __fpregs_deactivate_hw(); }
/* @@ -591,8 +562,7 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) * or if the past 5 consecutive context-switches used math. */ fpu.preload = static_cpu_has(X86_FEATURE_FPU) && - new_fpu->fpstate_active && - (use_eager_fpu() || new_fpu->counter > 5); + new_fpu->fpstate_active;
if (old_fpu->fpregs_active) { if (!copy_fpregs_to_fpstate(old_fpu)) @@ -608,8 +578,6 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) new_fpu->counter++; __fpregs_activate(new_fpu); prefetch(&new_fpu->state); - } else { - __fpregs_deactivate_hw(); } } else { old_fpu->counter = 0; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 6aa0b51..b282364 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -53,27 +53,9 @@ static bool kernel_fpu_disabled(void) return this_cpu_read(in_kernel_fpu); }
-/* - * Were we in an interrupt that interrupted kernel mode? - * - * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that - * pair does nothing at all: the thread must not have fpu (so - * that we don't try to save the FPU state), and TS must - * be set (so that the clts/stts pair does nothing that is - * visible in the interrupted kernel thread). - * - * Except for the eagerfpu case when we return true; in the likely case - * the thread has FPU but we are not going to set/clear TS. - */ static bool interrupted_kernel_fpu_idle(void) { - if (kernel_fpu_disabled()) - return false; - - if (use_eager_fpu()) - return true; - - return !current->thread.fpu.fpregs_active && (read_cr0() & X86_CR0_TS); + return !kernel_fpu_disabled(); }
/* @@ -121,7 +103,6 @@ void __kernel_fpu_begin(void) copy_fpregs_to_fpstate(fpu); } else { this_cpu_write(fpu_fpregs_owner_ctx, NULL); - __fpregs_activate_hw(); } } EXPORT_SYMBOL(__kernel_fpu_begin); @@ -132,8 +113,6 @@ void __kernel_fpu_end(void)
if (fpu->fpregs_active) copy_kernel_to_fpregs(&fpu->state); - else - __fpregs_deactivate_hw();
kernel_fpu_enable(); } @@ -194,10 +173,7 @@ void fpu__save(struct fpu *fpu) preempt_disable(); if (fpu->fpregs_active) { if (!copy_fpregs_to_fpstate(fpu)) { - if (use_eager_fpu()) - copy_kernel_to_fpregs(&fpu->state); - else - fpregs_deactivate(fpu); + copy_kernel_to_fpregs(&fpu->state); } } preempt_enable(); @@ -245,8 +221,7 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) * Don't let 'init optimized' areas of the XSAVE area * leak into the child task: */ - if (use_eager_fpu()) - memset(&dst_fpu->state.xsave, 0, xstate_size); + memset(&dst_fpu->state.xsave, 0, xstate_size);
/* * Save current FPU registers directly into the child @@ -268,10 +243,7 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) if (!copy_fpregs_to_fpstate(dst_fpu)) { memcpy(&src_fpu->state, &dst_fpu->state, xstate_size);
- if (use_eager_fpu()) - copy_kernel_to_fpregs(&src_fpu->state); - else - fpregs_deactivate(src_fpu); + copy_kernel_to_fpregs(&src_fpu->state); } preempt_enable(); } @@ -437,7 +409,7 @@ void fpu__clear(struct fpu *fpu) { WARN_ON_FPU(fpu != ¤t->thread.fpu); /* Almost certainly an anomaly */
- if (!use_eager_fpu() || !static_cpu_has(X86_FEATURE_FPU)) { + if (!static_cpu_has(X86_FEATURE_FPU)) { /* FPU state will be reallocated lazily at the first use. */ fpu__drop(fpu); } else { diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 3de0771..9be3e79 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -319,11 +319,9 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) }
fpu->fpstate_active = 1; - if (use_eager_fpu()) { - preempt_disable(); - fpu__restore(fpu); - preempt_enable(); - } + preempt_disable(); + fpu__restore(fpu); + preempt_enable();
return err; } else { diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 7b4ea5e..338d13d 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -16,7 +16,6 @@ #include <linux/module.h> #include <linux/vmalloc.h> #include <linux/uaccess.h> -#include <asm/fpu/internal.h> /* For use_eager_fpu. Ugh! */ #include <asm/user.h> #include <asm/fpu/xstate.h> #include "cpuid.h" @@ -104,8 +103,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
- if (use_eager_fpu()) - kvm_x86_ops->fpu_activate(vcpu); + kvm_x86_ops->fpu_activate(vcpu);
/* * The existing code assumes virtual address is 48-bit in the canonical diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 660d8a0..e6ab034 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7319,16 +7319,6 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu); __kernel_fpu_end(); ++vcpu->stat.fpu_reload; - /* - * If using eager FPU mode, or if the guest is a frequent user - * of the FPU, just leave the FPU active for next time. - * Every 255 times fpu_counter rolls over to 0; a guest that uses - * the FPU in bursts will revert to loading it on demand. - */ - if (!use_eager_fpu()) { - if (++vcpu->fpu_counter < 5) - kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); - } trace_kvm_fpu(0); }
From: Rik van Riel riel@redhat.com
commit 3913cc3507575273beb165a5e027a081913ed507 upstream.
With the lazy FPU code gone, we no longer use the counter field in struct fpu for anything. Get rid it.
Signed-off-by: Rik van Riel riel@redhat.com Reviewed-by: Andy Lutomirski luto@kernel.org Cc: Borislav Petkov bp@alien8.de Cc: Brian Gerst brgerst@gmail.com Cc: Dave Hansen dave.hansen@linux.intel.com Cc: Denys Vlasenko dvlasenk@redhat.com Cc: Fenghua Yu fenghua.yu@intel.com Cc: H. Peter Anvin hpa@zytor.com Cc: Josh Poimboeuf jpoimboe@redhat.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Oleg Nesterov oleg@redhat.com Cc: Peter Zijlstra peterz@infradead.org Cc: Quentin Casasnovas quentin.casasnovas@oracle.com Cc: Thomas Gleixner tglx@linutronix.de Cc: pbonzini@redhat.com Link: http://lkml.kernel.org/r/1475627678-20788-6-git-send-email-riel@redhat.com Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Daniel Sangorrin daniel.sangorrin@toshiba.co.jp --- arch/x86/include/asm/fpu/internal.h | 3 --- arch/x86/include/asm/fpu/types.h | 11 ----------- arch/x86/kernel/fpu/core.c | 3 --- 3 files changed, 17 deletions(-)
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 2ff03cc..16825dd 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -575,15 +575,12 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
/* Don't change CR0.TS if we just switch! */ if (fpu.preload) { - new_fpu->counter++; __fpregs_activate(new_fpu); prefetch(&new_fpu->state); } } else { - old_fpu->counter = 0; old_fpu->last_cpu = -1; if (fpu.preload) { - new_fpu->counter++; if (fpu_want_lazy_restore(new_fpu, cpu)) fpu.preload = 0; else diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 1c6f6ac..dcb85a9 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -303,17 +303,6 @@ struct fpu { unsigned char fpregs_active;
/* - * @counter: - * - * This counter contains the number of consecutive context switches - * during which the FPU stays used. If this is over a threshold, the - * lazy FPU restore logic becomes eager, to save the trap overhead. - * This is an unsigned char so that after 256 iterations the counter - * wraps and the context switch behavior turns lazy again; this is to - * deal with bursty apps that only use the FPU for a short time: - */ - unsigned char counter; - /* * @state: * * In-memory copy of all FPU registers that we save/restore diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index b282364..b322325 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -250,7 +250,6 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu)
int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) { - dst_fpu->counter = 0; dst_fpu->fpregs_active = 0; dst_fpu->last_cpu = -1;
@@ -353,7 +352,6 @@ void fpu__restore(struct fpu *fpu) kernel_fpu_disable(); fpregs_activate(fpu); copy_kernel_to_fpregs(&fpu->state); - fpu->counter++; kernel_fpu_enable(); } EXPORT_SYMBOL_GPL(fpu__restore); @@ -370,7 +368,6 @@ EXPORT_SYMBOL_GPL(fpu__restore); void fpu__drop(struct fpu *fpu) { preempt_disable(); - fpu->counter = 0;
if (fpu->fpregs_active) { /* Ignore delayed exceptions from user space */
From: Andy Lutomirski luto@kernel.org
commit e63650840e8b053aa09ad934877e87e9941ed135 upstream.
Now that eagerfpu= is gone, remove it from the docs and some comments. Also sync the changes to tools/.
Signed-off-by: Andy Lutomirski luto@kernel.org Cc: Borislav Petkov bp@alien8.de Cc: Brian Gerst brgerst@gmail.com Cc: Dave Hansen dave.hansen@linux.intel.com Cc: Denys Vlasenko dvlasenk@redhat.com Cc: Fenghua Yu fenghua.yu@intel.com Cc: H. Peter Anvin hpa@zytor.com Cc: Josh Poimboeuf jpoimboe@redhat.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Oleg Nesterov oleg@redhat.com Cc: Peter Zijlstra peterz@infradead.org Cc: Quentin Casasnovas quentin.casasnovas@oracle.com Cc: Rik van Riel riel@redhat.com Cc: Thomas Gleixner tglx@linutronix.de Link: http://lkml.kernel.org/r/cf430dd4481d41280e93ac6cf0def1007a67fc8e.1476740397... Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Daniel Sangorrin daniel.sangorrin@toshiba.co.jp --- Documentation/kernel-parameters.txt | 5 ----- arch/x86/include/asm/cpufeatures.h | 1 - arch/x86/include/asm/fpu/types.h | 23 ----------------------- 3 files changed, 29 deletions(-)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 3fd53e1..da515c5 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -961,11 +961,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. See Documentation/x86/intel_mpx.txt for more information about the feature.
- eagerfpu= [X86] - on enable eager fpu restore - off disable eager fpu restore - auto selects the default scheme, which automatically - enables eagerfpu restore for xsaveopt.
module.async_probe [KNL] Enable asynchronous probe on this module. diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index dd2269dc..a5fa319 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -104,7 +104,6 @@ #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ #define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ -/* free, was #define X86_FEATURE_EAGER_FPU ( 3*32+29) * "eagerfpu" Non lazy FPU restore */ #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index dcb85a9..0d81c7d 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -310,29 +310,6 @@ struct fpu { * the registers in the FPU are more recent than this state * copy. If the task context-switches away then they get * saved here and represent the FPU state. - * - * After context switches there may be a (short) time period - * during which the in-FPU hardware registers are unchanged - * and still perfectly match this state, if the tasks - * scheduled afterwards are not using the FPU. - * - * This is the 'lazy restore' window of optimization, which - * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'. - * - * We detect whether a subsequent task uses the FPU via setting - * CR0::TS to 1, which causes any FPU use to raise a #NM fault. - * - * During this window, if the task gets scheduled again, we - * might be able to skip having to do a restore from this - * memory buffer to the hardware registers - at the cost of - * incurring the overhead of #NM fault traps. - * - * Note that on modern CPUs that support the XSAVEOPT (or other - * optimized XSAVE instructions), we don't use #NM traps anymore, - * as the hardware can track whether FPU registers need saving - * or not. On such CPUs we activate the non-lazy ('eagerfpu') - * logic, which unconditionally saves/restores all FPU state - * across context switches. (if FPU state exists.) */ union fpregs_state state; /*
On Tue, Oct 16, 2018 at 03:59:59PM +0900, Daniel Sangorrin wrote:
Hello Greg,
This series contains patches that complete the removal of FPU lazy-mode code, documentation and comments for kernel 4.4.y.
The patches are mostly cleanups and they can be useful to backport future patches if more bugs on the FPU code are found.
[PATCH v4 4.4 1/4] KVM: x86: remove eager_fpu field of struct [PATCH v4 4.4 2/4] x86/fpu: Remove use_eager_fpu() [PATCH v4 4.4 3/4] x86/fpu: Remove struct fpu::counter [PATCH v4 4.4 4/4] x86/fpu: Finish excising 'eagerfpu'
Thanks, all now queued up.
greg k-h
linux-stable-mirror@lists.linaro.org