2
0
mirror of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-09-04 20:19:47 +08:00

x86/entry: Switch page fault exception to IDTENTRY_RAW

Convert page fault exceptions to IDTENTRY_RAW:

  - Implement the C entry point with DEFINE_IDTENTRY_RAW
  - Add the CR2 read into the exception handler
  - Add the idtentry_enter/exit_cond_rcu() invocations in
    in the regular page fault handler and in the async PF
    part.
  - Emit the ASM stub with DECLARE_IDTENTRY_RAW
  - Remove the ASM idtentry in 64-bit
  - Remove the CR2 read from 64-bit
  - Remove the open coded ASM entry code in 32-bit
  - Fix up the XEN/PV code
  - Remove the old prototypes

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Andy Lutomirski <luto@kernel.org>
Link: https://lore.kernel.org/r/20200521202118.238455120@linutronix.de
This commit is contained in:
Thomas Gleixner 2020-05-21 22:05:28 +02:00
parent 00cf8baf9c
commit 91eeafea1e
9 changed files with 63 additions and 92 deletions

View File

@ -1398,36 +1398,6 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR,
#endif /* CONFIG_HYPERV */ #endif /* CONFIG_HYPERV */
SYM_CODE_START(page_fault)
ASM_CLAC
pushl $do_page_fault
jmp common_exception_read_cr2
SYM_CODE_END(page_fault)
SYM_CODE_START_LOCAL_NOALIGN(common_exception_read_cr2)
/* the function address is in %gs's slot on the stack */
SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
ENCODE_FRAME_POINTER
/* fixup %gs */
GS_TO_REG %ecx
movl PT_GS(%esp), %edi
REG_TO_PTGS %ecx
SET_KERNEL_GS %ecx
GET_CR2_INTO(%ecx) # might clobber %eax
/* fixup orig %eax */
movl PT_ORIG_EAX(%esp), %edx # get the error code
movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
TRACE_IRQS_OFF
movl %esp, %eax # pt_regs pointer
CALL_NOSPEC edi
jmp ret_from_exception
SYM_CODE_END(common_exception_read_cr2)
SYM_CODE_START_LOCAL_NOALIGN(common_exception) SYM_CODE_START_LOCAL_NOALIGN(common_exception)
/* the function address is in %gs's slot on the stack */ /* the function address is in %gs's slot on the stack */
SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1

View File

@ -506,15 +506,6 @@ SYM_CODE_END(spurious_entries_start)
call error_entry call error_entry
UNWIND_HINT_REGS UNWIND_HINT_REGS
.if \vector == X86_TRAP_PF
/*
* Store CR2 early so subsequent faults cannot clobber it. Use R12 as
* intermediate storage as RDX can be clobbered in enter_from_user_mode().
* GET_CR2_INTO can clobber RAX.
*/
GET_CR2_INTO(%r12);
.endif
.if \sane == 0 .if \sane == 0
TRACE_IRQS_OFF TRACE_IRQS_OFF
@ -533,10 +524,6 @@ SYM_CODE_END(spurious_entries_start)
movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
.endif .endif
.if \vector == X86_TRAP_PF
movq %r12, %rdx /* Move CR2 into 3rd argument */
.endif
call \cfunc call \cfunc
.if \sane == 0 .if \sane == 0
@ -1059,12 +1046,6 @@ apicinterrupt SPURIOUS_APIC_VECTOR spurious_interrupt smp_spurious_interrupt
apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
#endif #endif
/*
* Exception entry points.
*/
idtentry X86_TRAP_PF page_fault do_page_fault has_error_code=1
/* /*
* Reload gs selector with exception handling * Reload gs selector with exception handling
* edi: new selector * edi: new selector

View File

@ -387,7 +387,8 @@ DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_GP, exc_general_protection);
DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_AC, exc_alignment_check); DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_AC, exc_alignment_check);
/* Raw exception entries which need extra work */ /* Raw exception entries which need extra work */
DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3); DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3);
DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF, exc_page_fault);
#ifdef CONFIG_X86_MCE #ifdef CONFIG_X86_MCE
DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check); DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check);

View File

@ -9,17 +9,6 @@
#include <asm/idtentry.h> #include <asm/idtentry.h>
#include <asm/siginfo.h> /* TRAP_TRACE, ... */ #include <asm/siginfo.h> /* TRAP_TRACE, ... */
#define dotraplinkage __visible
asmlinkage void page_fault(void);
asmlinkage void async_page_fault(void);
#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
asmlinkage void xen_page_fault(void);
#endif
dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs); asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
asmlinkage __visible notrace asmlinkage __visible notrace

View File

@ -62,7 +62,7 @@ static const __initconst struct idt_data early_idts[] = {
INTG(X86_TRAP_DB, asm_exc_debug), INTG(X86_TRAP_DB, asm_exc_debug),
SYSG(X86_TRAP_BP, asm_exc_int3), SYSG(X86_TRAP_BP, asm_exc_int3),
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
INTG(X86_TRAP_PF, page_fault), INTG(X86_TRAP_PF, asm_exc_page_fault),
#endif #endif
}; };
@ -156,7 +156,7 @@ static const __initconst struct idt_data apic_idts[] = {
* stacks work only after cpu_init(). * stacks work only after cpu_init().
*/ */
static const __initconst struct idt_data early_pf_idts[] = { static const __initconst struct idt_data early_pf_idts[] = {
INTG(X86_TRAP_PF, page_fault), INTG(X86_TRAP_PF, asm_exc_page_fault),
}; };
/* /*

View File

@ -218,7 +218,7 @@ again:
} }
EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake); EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
u32 kvm_read_and_reset_apf_flags(void) noinstr u32 kvm_read_and_reset_apf_flags(void)
{ {
u32 flags = 0; u32 flags = 0;
@ -230,11 +230,11 @@ u32 kvm_read_and_reset_apf_flags(void)
return flags; return flags;
} }
EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags); EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags);
NOKPROBE_SYMBOL(kvm_read_and_reset_apf_flags);
bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token) noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
{ {
u32 reason = kvm_read_and_reset_apf_flags(); u32 reason = kvm_read_and_reset_apf_flags();
bool rcu_exit;
switch (reason) { switch (reason) {
case KVM_PV_REASON_PAGE_NOT_PRESENT: case KVM_PV_REASON_PAGE_NOT_PRESENT:
@ -244,6 +244,9 @@ bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
return false; return false;
} }
rcu_exit = idtentry_enter_cond_rcu(regs);
instrumentation_begin();
/* /*
* If the host managed to inject an async #PF into an interrupt * If the host managed to inject an async #PF into an interrupt
* disabled region, then die hard as this is not going to end well * disabled region, then die hard as this is not going to end well
@ -258,13 +261,13 @@ bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
/* Page is swapped out by the host. */ /* Page is swapped out by the host. */
kvm_async_pf_task_wait_schedule(token); kvm_async_pf_task_wait_schedule(token);
} else { } else {
rcu_irq_enter();
kvm_async_pf_task_wake(token); kvm_async_pf_task_wake(token);
rcu_irq_exit();
} }
instrumentation_end();
idtentry_exit_cond_rcu(regs, rcu_exit);
return true; return true;
} }
NOKPROBE_SYMBOL(__kvm_handle_async_pf);
static void __init paravirt_ops_setup(void) static void __init paravirt_ops_setup(void)
{ {

View File

@ -1357,11 +1357,38 @@ trace_page_fault_entries(struct pt_regs *regs, unsigned long error_code,
trace_page_fault_kernel(address, regs, error_code); trace_page_fault_kernel(address, regs, error_code);
} }
dotraplinkage void static __always_inline void
do_page_fault(struct pt_regs *regs, unsigned long hw_error_code, handle_page_fault(struct pt_regs *regs, unsigned long error_code,
unsigned long address) unsigned long address)
{ {
trace_page_fault_entries(regs, error_code, address);
if (unlikely(kmmio_fault(regs, address)))
return;
/* Was the fault on kernel-controlled part of the address space? */
if (unlikely(fault_in_kernel_space(address))) {
do_kern_addr_fault(regs, error_code, address);
} else {
do_user_addr_fault(regs, error_code, address);
/*
* User address page fault handling might have reenabled
* interrupts. Fixing up all potential exit points of
* do_user_addr_fault() and its leaf functions is just not
* doable w/o creating an unholy mess or turning the code
* upside down.
*/
local_irq_disable();
}
}
DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
{
unsigned long address = read_cr2();
bool rcu_exit;
prefetchw(&current->mm->mmap_lock); prefetchw(&current->mm->mmap_lock);
/* /*
* KVM has two types of events that are, logically, interrupts, but * KVM has two types of events that are, logically, interrupts, but
* are unfortunately delivered using the #PF vector. These events are * are unfortunately delivered using the #PF vector. These events are
@ -1376,28 +1403,28 @@ do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
* getting values from real and async page faults mixed up. * getting values from real and async page faults mixed up.
* *
* Fingers crossed. * Fingers crossed.
*
* The async #PF handling code takes care of idtentry handling
* itself.
*/ */
if (kvm_handle_async_pf(regs, (u32)address)) if (kvm_handle_async_pf(regs, (u32)address))
return; return;
trace_page_fault_entries(regs, hw_error_code, address); /*
* Entry handling for valid #PF from kernel mode is slightly
* different: RCU is already watching and rcu_irq_enter() must not
* be invoked because a kernel fault on a user space address might
* sleep.
*
* In case the fault hit a RCU idle region the conditional entry
* code reenabled RCU to avoid subsequent wreckage which helps
* debugability.
*/
rcu_exit = idtentry_enter_cond_rcu(regs);
if (unlikely(kmmio_fault(regs, address))) instrumentation_begin();
return; handle_page_fault(regs, error_code, address);
instrumentation_end();
/* Was the fault on kernel-controlled part of the address space? */ idtentry_exit_cond_rcu(regs, rcu_exit);
if (unlikely(fault_in_kernel_space(address))) {
do_kern_addr_fault(regs, hw_error_code, address);
} else {
do_user_addr_fault(regs, hw_error_code, address);
/*
* User address page fault handling might have reenabled
* interrupts. Fixing up all potential exit points of
* do_user_addr_fault() and its leaf functions is just not
* doable w/o creating an unholy mess or turning the code
* upside down.
*/
local_irq_disable();
}
} }
NOKPROBE_SYMBOL(do_page_fault);

View File

@ -626,7 +626,7 @@ static struct trap_array_entry trap_array[] = {
#ifdef CONFIG_IA32_EMULATION #ifdef CONFIG_IA32_EMULATION
{ entry_INT80_compat, xen_entry_INT80_compat, false }, { entry_INT80_compat, xen_entry_INT80_compat, false },
#endif #endif
{ page_fault, xen_page_fault, false }, TRAP_ENTRY(exc_page_fault, false ),
TRAP_ENTRY(exc_divide_error, false ), TRAP_ENTRY(exc_divide_error, false ),
TRAP_ENTRY(exc_bounds, false ), TRAP_ENTRY(exc_bounds, false ),
TRAP_ENTRY(exc_invalid_op, false ), TRAP_ENTRY(exc_invalid_op, false ),

View File

@ -43,7 +43,7 @@ xen_pv_trap asm_exc_invalid_tss
xen_pv_trap asm_exc_segment_not_present xen_pv_trap asm_exc_segment_not_present
xen_pv_trap asm_exc_stack_segment xen_pv_trap asm_exc_stack_segment
xen_pv_trap asm_exc_general_protection xen_pv_trap asm_exc_general_protection
xen_pv_trap page_fault xen_pv_trap asm_exc_page_fault
xen_pv_trap asm_exc_spurious_interrupt_bug xen_pv_trap asm_exc_spurious_interrupt_bug
xen_pv_trap asm_exc_coprocessor_error xen_pv_trap asm_exc_coprocessor_error
xen_pv_trap asm_exc_alignment_check xen_pv_trap asm_exc_alignment_check