mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-04 20:19:47 +08:00 
			
		
		
		
	x86/entry: Switch page fault exception to IDTENTRY_RAW
Convert page fault exceptions to IDTENTRY_RAW:
  - Implement the C entry point with DEFINE_IDTENTRY_RAW
  - Add the CR2 read into the exception handler
  - Add the idtentry_enter/exit_cond_rcu() invocations in
    in the regular page fault handler and in the async PF
    part.
  - Emit the ASM stub with DECLARE_IDTENTRY_RAW
  - Remove the ASM idtentry in 64-bit
  - Remove the CR2 read from 64-bit
  - Remove the open coded ASM entry code in 32-bit
  - Fix up the XEN/PV code
  - Remove the old prototypes
No functional change.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Andy Lutomirski <luto@kernel.org>
Link: https://lore.kernel.org/r/20200521202118.238455120@linutronix.de
			
			
This commit is contained in:
		
							parent
							
								
									00cf8baf9c
								
							
						
					
					
						commit
						91eeafea1e
					
				| @ -1398,36 +1398,6 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR, | |||||||
| 
 | 
 | ||||||
| #endif /* CONFIG_HYPERV */ | #endif /* CONFIG_HYPERV */ | ||||||
| 
 | 
 | ||||||
| SYM_CODE_START(page_fault) |  | ||||||
| 	ASM_CLAC |  | ||||||
| 	pushl	$do_page_fault |  | ||||||
| 	jmp	common_exception_read_cr2 |  | ||||||
| SYM_CODE_END(page_fault) |  | ||||||
| 
 |  | ||||||
| SYM_CODE_START_LOCAL_NOALIGN(common_exception_read_cr2) |  | ||||||
| 	/* the function address is in %gs's slot on the stack */ |  | ||||||
| 	SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 |  | ||||||
| 
 |  | ||||||
| 	ENCODE_FRAME_POINTER |  | ||||||
| 
 |  | ||||||
| 	/* fixup %gs */ |  | ||||||
| 	GS_TO_REG %ecx |  | ||||||
| 	movl	PT_GS(%esp), %edi |  | ||||||
| 	REG_TO_PTGS %ecx |  | ||||||
| 	SET_KERNEL_GS %ecx |  | ||||||
| 
 |  | ||||||
| 	GET_CR2_INTO(%ecx)			# might clobber %eax |  | ||||||
| 
 |  | ||||||
| 	/* fixup orig %eax */ |  | ||||||
| 	movl	PT_ORIG_EAX(%esp), %edx		# get the error code |  | ||||||
| 	movl	$-1, PT_ORIG_EAX(%esp)		# no syscall to restart |  | ||||||
| 
 |  | ||||||
| 	TRACE_IRQS_OFF |  | ||||||
| 	movl	%esp, %eax			# pt_regs pointer |  | ||||||
| 	CALL_NOSPEC edi |  | ||||||
| 	jmp	ret_from_exception |  | ||||||
| SYM_CODE_END(common_exception_read_cr2) |  | ||||||
| 
 |  | ||||||
| SYM_CODE_START_LOCAL_NOALIGN(common_exception) | SYM_CODE_START_LOCAL_NOALIGN(common_exception) | ||||||
| 	/* the function address is in %gs's slot on the stack */ | 	/* the function address is in %gs's slot on the stack */ | ||||||
| 	SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 | 	SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 | ||||||
|  | |||||||
| @ -506,15 +506,6 @@ SYM_CODE_END(spurious_entries_start) | |||||||
| 	call	error_entry | 	call	error_entry | ||||||
| 	UNWIND_HINT_REGS | 	UNWIND_HINT_REGS | ||||||
| 
 | 
 | ||||||
| 	.if \vector == X86_TRAP_PF |  | ||||||
| 		/* |  | ||||||
| 		 * Store CR2 early so subsequent faults cannot clobber it. Use R12 as |  | ||||||
| 		 * intermediate storage as RDX can be clobbered in enter_from_user_mode(). |  | ||||||
| 		 * GET_CR2_INTO can clobber RAX. |  | ||||||
| 		 */ |  | ||||||
| 		GET_CR2_INTO(%r12);
 |  | ||||||
| 	.endif |  | ||||||
| 
 |  | ||||||
| 	.if \sane == 0 | 	.if \sane == 0 | ||||||
| 	TRACE_IRQS_OFF | 	TRACE_IRQS_OFF | ||||||
| 
 | 
 | ||||||
| @ -533,10 +524,6 @@ SYM_CODE_END(spurious_entries_start) | |||||||
| 		movq	$-1, ORIG_RAX(%rsp)	/* no syscall to restart */ | 		movq	$-1, ORIG_RAX(%rsp)	/* no syscall to restart */ | ||||||
| 	.endif | 	.endif | ||||||
| 
 | 
 | ||||||
| 	.if \vector == X86_TRAP_PF |  | ||||||
| 		movq	%r12, %rdx		/* Move CR2 into 3rd argument */ |  | ||||||
| 	.endif |  | ||||||
| 
 |  | ||||||
| 	call	\cfunc | 	call	\cfunc | ||||||
| 
 | 
 | ||||||
| 	.if \sane == 0 | 	.if \sane == 0 | ||||||
| @ -1059,12 +1046,6 @@ apicinterrupt SPURIOUS_APIC_VECTOR		spurious_interrupt		smp_spurious_interrupt | |||||||
| apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt | apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| /* |  | ||||||
|  * Exception entry points. |  | ||||||
|  */ |  | ||||||
| 
 |  | ||||||
| idtentry	X86_TRAP_PF		page_fault		do_page_fault			has_error_code=1 |  | ||||||
| 
 |  | ||||||
| /* | /* | ||||||
|  * Reload gs selector with exception handling |  * Reload gs selector with exception handling | ||||||
|  * edi:  new selector |  * edi:  new selector | ||||||
|  | |||||||
| @ -387,7 +387,8 @@ DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_GP,	exc_general_protection); | |||||||
| DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_AC,	exc_alignment_check); | DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_AC,	exc_alignment_check); | ||||||
| 
 | 
 | ||||||
| /* Raw exception entries which need extra work */ | /* Raw exception entries which need extra work */ | ||||||
| DECLARE_IDTENTRY_RAW(X86_TRAP_BP,	exc_int3); | DECLARE_IDTENTRY_RAW(X86_TRAP_BP,		exc_int3); | ||||||
|  | DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF,	exc_page_fault); | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_X86_MCE | #ifdef CONFIG_X86_MCE | ||||||
| DECLARE_IDTENTRY_MCE(X86_TRAP_MC,	exc_machine_check); | DECLARE_IDTENTRY_MCE(X86_TRAP_MC,	exc_machine_check); | ||||||
|  | |||||||
| @ -9,17 +9,6 @@ | |||||||
| #include <asm/idtentry.h> | #include <asm/idtentry.h> | ||||||
| #include <asm/siginfo.h>			/* TRAP_TRACE, ... */ | #include <asm/siginfo.h>			/* TRAP_TRACE, ... */ | ||||||
| 
 | 
 | ||||||
| #define dotraplinkage __visible |  | ||||||
| 
 |  | ||||||
| asmlinkage void page_fault(void); |  | ||||||
| asmlinkage void async_page_fault(void); |  | ||||||
| 
 |  | ||||||
| #if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) |  | ||||||
| asmlinkage void xen_page_fault(void); |  | ||||||
| #endif |  | ||||||
| 
 |  | ||||||
| dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); |  | ||||||
| 
 |  | ||||||
| #ifdef CONFIG_X86_64 | #ifdef CONFIG_X86_64 | ||||||
| asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs); | asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs); | ||||||
| asmlinkage __visible notrace | asmlinkage __visible notrace | ||||||
|  | |||||||
| @ -62,7 +62,7 @@ static const __initconst struct idt_data early_idts[] = { | |||||||
| 	INTG(X86_TRAP_DB,		asm_exc_debug), | 	INTG(X86_TRAP_DB,		asm_exc_debug), | ||||||
| 	SYSG(X86_TRAP_BP,		asm_exc_int3), | 	SYSG(X86_TRAP_BP,		asm_exc_int3), | ||||||
| #ifdef CONFIG_X86_32 | #ifdef CONFIG_X86_32 | ||||||
| 	INTG(X86_TRAP_PF,		page_fault), | 	INTG(X86_TRAP_PF,		asm_exc_page_fault), | ||||||
| #endif | #endif | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| @ -156,7 +156,7 @@ static const __initconst struct idt_data apic_idts[] = { | |||||||
|  * stacks work only after cpu_init(). |  * stacks work only after cpu_init(). | ||||||
|  */ |  */ | ||||||
| static const __initconst struct idt_data early_pf_idts[] = { | static const __initconst struct idt_data early_pf_idts[] = { | ||||||
| 	INTG(X86_TRAP_PF,		page_fault), | 	INTG(X86_TRAP_PF,		asm_exc_page_fault), | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  | |||||||
| @ -218,7 +218,7 @@ again: | |||||||
| } | } | ||||||
| EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake); | EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake); | ||||||
| 
 | 
 | ||||||
| u32 kvm_read_and_reset_apf_flags(void) | noinstr u32 kvm_read_and_reset_apf_flags(void) | ||||||
| { | { | ||||||
| 	u32 flags = 0; | 	u32 flags = 0; | ||||||
| 
 | 
 | ||||||
| @ -230,11 +230,11 @@ u32 kvm_read_and_reset_apf_flags(void) | |||||||
| 	return flags; | 	return flags; | ||||||
| } | } | ||||||
| EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags); | EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags); | ||||||
| NOKPROBE_SYMBOL(kvm_read_and_reset_apf_flags); |  | ||||||
| 
 | 
 | ||||||
| bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token) | noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token) | ||||||
| { | { | ||||||
| 	u32 reason = kvm_read_and_reset_apf_flags(); | 	u32 reason = kvm_read_and_reset_apf_flags(); | ||||||
|  | 	bool rcu_exit; | ||||||
| 
 | 
 | ||||||
| 	switch (reason) { | 	switch (reason) { | ||||||
| 	case KVM_PV_REASON_PAGE_NOT_PRESENT: | 	case KVM_PV_REASON_PAGE_NOT_PRESENT: | ||||||
| @ -244,6 +244,9 @@ bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token) | |||||||
| 		return false; | 		return false; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | 	rcu_exit = idtentry_enter_cond_rcu(regs); | ||||||
|  | 	instrumentation_begin(); | ||||||
|  | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * If the host managed to inject an async #PF into an interrupt | 	 * If the host managed to inject an async #PF into an interrupt | ||||||
| 	 * disabled region, then die hard as this is not going to end well | 	 * disabled region, then die hard as this is not going to end well | ||||||
| @ -258,13 +261,13 @@ bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token) | |||||||
| 		/* Page is swapped out by the host. */ | 		/* Page is swapped out by the host. */ | ||||||
| 		kvm_async_pf_task_wait_schedule(token); | 		kvm_async_pf_task_wait_schedule(token); | ||||||
| 	} else { | 	} else { | ||||||
| 		rcu_irq_enter(); |  | ||||||
| 		kvm_async_pf_task_wake(token); | 		kvm_async_pf_task_wake(token); | ||||||
| 		rcu_irq_exit(); |  | ||||||
| 	} | 	} | ||||||
|  | 
 | ||||||
|  | 	instrumentation_end(); | ||||||
|  | 	idtentry_exit_cond_rcu(regs, rcu_exit); | ||||||
| 	return true; | 	return true; | ||||||
| } | } | ||||||
| NOKPROBE_SYMBOL(__kvm_handle_async_pf); |  | ||||||
| 
 | 
 | ||||||
| static void __init paravirt_ops_setup(void) | static void __init paravirt_ops_setup(void) | ||||||
| { | { | ||||||
|  | |||||||
| @ -1357,11 +1357,38 @@ trace_page_fault_entries(struct pt_regs *regs, unsigned long error_code, | |||||||
| 		trace_page_fault_kernel(address, regs, error_code); | 		trace_page_fault_kernel(address, regs, error_code); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| dotraplinkage void | static __always_inline void | ||||||
| do_page_fault(struct pt_regs *regs, unsigned long hw_error_code, | handle_page_fault(struct pt_regs *regs, unsigned long error_code, | ||||||
| 		unsigned long address) | 			      unsigned long address) | ||||||
| { | { | ||||||
|  | 	trace_page_fault_entries(regs, error_code, address); | ||||||
|  | 
 | ||||||
|  | 	if (unlikely(kmmio_fault(regs, address))) | ||||||
|  | 		return; | ||||||
|  | 
 | ||||||
|  | 	/* Was the fault on kernel-controlled part of the address space? */ | ||||||
|  | 	if (unlikely(fault_in_kernel_space(address))) { | ||||||
|  | 		do_kern_addr_fault(regs, error_code, address); | ||||||
|  | 	} else { | ||||||
|  | 		do_user_addr_fault(regs, error_code, address); | ||||||
|  | 		/*
 | ||||||
|  | 		 * User address page fault handling might have reenabled | ||||||
|  | 		 * interrupts. Fixing up all potential exit points of | ||||||
|  | 		 * do_user_addr_fault() and its leaf functions is just not | ||||||
|  | 		 * doable w/o creating an unholy mess or turning the code | ||||||
|  | 		 * upside down. | ||||||
|  | 		 */ | ||||||
|  | 		local_irq_disable(); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault) | ||||||
|  | { | ||||||
|  | 	unsigned long address = read_cr2(); | ||||||
|  | 	bool rcu_exit; | ||||||
|  | 
 | ||||||
| 	prefetchw(¤t->mm->mmap_lock); | 	prefetchw(¤t->mm->mmap_lock); | ||||||
|  | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * KVM has two types of events that are, logically, interrupts, but | 	 * KVM has two types of events that are, logically, interrupts, but | ||||||
| 	 * are unfortunately delivered using the #PF vector.  These events are | 	 * are unfortunately delivered using the #PF vector.  These events are | ||||||
| @ -1376,28 +1403,28 @@ do_page_fault(struct pt_regs *regs, unsigned long hw_error_code, | |||||||
| 	 * getting values from real and async page faults mixed up. | 	 * getting values from real and async page faults mixed up. | ||||||
| 	 * | 	 * | ||||||
| 	 * Fingers crossed. | 	 * Fingers crossed. | ||||||
|  | 	 * | ||||||
|  | 	 * The async #PF handling code takes care of idtentry handling | ||||||
|  | 	 * itself. | ||||||
| 	 */ | 	 */ | ||||||
| 	if (kvm_handle_async_pf(regs, (u32)address)) | 	if (kvm_handle_async_pf(regs, (u32)address)) | ||||||
| 		return; | 		return; | ||||||
| 
 | 
 | ||||||
| 	trace_page_fault_entries(regs, hw_error_code, address); | 	/*
 | ||||||
|  | 	 * Entry handling for valid #PF from kernel mode is slightly | ||||||
|  | 	 * different: RCU is already watching and rcu_irq_enter() must not | ||||||
|  | 	 * be invoked because a kernel fault on a user space address might | ||||||
|  | 	 * sleep. | ||||||
|  | 	 * | ||||||
|  | 	 * In case the fault hit a RCU idle region the conditional entry | ||||||
|  | 	 * code reenabled RCU to avoid subsequent wreckage which helps | ||||||
|  | 	 * debugability. | ||||||
|  | 	 */ | ||||||
|  | 	rcu_exit = idtentry_enter_cond_rcu(regs); | ||||||
| 
 | 
 | ||||||
| 	if (unlikely(kmmio_fault(regs, address))) | 	instrumentation_begin(); | ||||||
| 		return; | 	handle_page_fault(regs, error_code, address); | ||||||
|  | 	instrumentation_end(); | ||||||
| 
 | 
 | ||||||
| 	/* Was the fault on kernel-controlled part of the address space? */ | 	idtentry_exit_cond_rcu(regs, rcu_exit); | ||||||
| 	if (unlikely(fault_in_kernel_space(address))) { |  | ||||||
| 		do_kern_addr_fault(regs, hw_error_code, address); |  | ||||||
| 	} else { |  | ||||||
| 		do_user_addr_fault(regs, hw_error_code, address); |  | ||||||
| 		/*
 |  | ||||||
| 		 * User address page fault handling might have reenabled |  | ||||||
| 		 * interrupts. Fixing up all potential exit points of |  | ||||||
| 		 * do_user_addr_fault() and its leaf functions is just not |  | ||||||
| 		 * doable w/o creating an unholy mess or turning the code |  | ||||||
| 		 * upside down. |  | ||||||
| 		 */ |  | ||||||
| 		local_irq_disable(); |  | ||||||
| 	} |  | ||||||
| } | } | ||||||
| NOKPROBE_SYMBOL(do_page_fault); |  | ||||||
|  | |||||||
| @ -626,7 +626,7 @@ static struct trap_array_entry trap_array[] = { | |||||||
| #ifdef CONFIG_IA32_EMULATION | #ifdef CONFIG_IA32_EMULATION | ||||||
| 	{ entry_INT80_compat,          xen_entry_INT80_compat,          false }, | 	{ entry_INT80_compat,          xen_entry_INT80_compat,          false }, | ||||||
| #endif | #endif | ||||||
| 	{ page_fault,                  xen_page_fault,                  false }, | 	TRAP_ENTRY(exc_page_fault,			false ), | ||||||
| 	TRAP_ENTRY(exc_divide_error,			false ), | 	TRAP_ENTRY(exc_divide_error,			false ), | ||||||
| 	TRAP_ENTRY(exc_bounds,				false ), | 	TRAP_ENTRY(exc_bounds,				false ), | ||||||
| 	TRAP_ENTRY(exc_invalid_op,			false ), | 	TRAP_ENTRY(exc_invalid_op,			false ), | ||||||
|  | |||||||
| @ -43,7 +43,7 @@ xen_pv_trap asm_exc_invalid_tss | |||||||
| xen_pv_trap asm_exc_segment_not_present | xen_pv_trap asm_exc_segment_not_present | ||||||
| xen_pv_trap asm_exc_stack_segment | xen_pv_trap asm_exc_stack_segment | ||||||
| xen_pv_trap asm_exc_general_protection | xen_pv_trap asm_exc_general_protection | ||||||
| xen_pv_trap page_fault | xen_pv_trap asm_exc_page_fault | ||||||
| xen_pv_trap asm_exc_spurious_interrupt_bug | xen_pv_trap asm_exc_spurious_interrupt_bug | ||||||
| xen_pv_trap asm_exc_coprocessor_error | xen_pv_trap asm_exc_coprocessor_error | ||||||
| xen_pv_trap asm_exc_alignment_check | xen_pv_trap asm_exc_alignment_check | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Thomas Gleixner
						Thomas Gleixner