mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-04 20:19:47 +08:00 
			
		
		
		
	x86/entry/64: Handle FSGSBASE enabled paranoid entry/exit
Without FSGSBASE, user space cannot change GSBASE other than through a PRCTL. The kernel enforces that the user space GSBASE value is postive as negative values are used for detecting the kernel space GSBASE value in the paranoid entry code. If FSGSBASE is enabled, user space can set arbitrary GSBASE values without kernel intervention, including negative ones, which breaks the paranoid entry assumptions. To avoid this, paranoid entry needs to unconditionally save the current GSBASE value independent of the interrupted context, retrieve and write the kernel GSBASE and unconditionally restore the saved value on exit. The restore happens either in paranoid_exit or in the special exit path of the NMI low level code. All other entry code pathes which use unconditional SWAPGS are not affected as they do not depend on the actual content. [ tglx: Massaged changelogs and comments ] Suggested-by: H. Peter Anvin <hpa@zytor.com> Suggested-by: Andy Lutomirski <luto@kernel.org> Suggested-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Sasha Levin <sashal@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Link: https://lkml.kernel.org/r/1557309753-24073-13-git-send-email-chang.seok.bae@intel.com Link: https://lkml.kernel.org/r/20200528201402.1708239-12-sashal@kernel.org
This commit is contained in:
		
							parent
							
								
									eaad981291
								
							
						
					
					
						commit
						c82965f9e5
					
				| @ -342,6 +342,12 @@ For 32-bit we have the following conventions - kernel is built with | |||||||
| #endif | #endif | ||||||
| .endm | .endm | ||||||
| 
 | 
 | ||||||
|  | .macro SAVE_AND_SET_GSBASE scratch_reg:req save_reg:req | ||||||
|  | 	rdgsbase \save_reg | ||||||
|  | 	GET_PERCPU_BASE \scratch_reg | ||||||
|  | 	wrgsbase \scratch_reg | ||||||
|  | .endm | ||||||
|  | 
 | ||||||
| #else /* CONFIG_X86_64 */ | #else /* CONFIG_X86_64 */ | ||||||
| # undef		UNWIND_HINT_IRET_REGS | # undef		UNWIND_HINT_IRET_REGS | ||||||
| # define	UNWIND_HINT_IRET_REGS | # define	UNWIND_HINT_IRET_REGS | ||||||
|  | |||||||
| @ -38,6 +38,7 @@ | |||||||
| #include <asm/frame.h> | #include <asm/frame.h> | ||||||
| #include <asm/trapnr.h> | #include <asm/trapnr.h> | ||||||
| #include <asm/nospec-branch.h> | #include <asm/nospec-branch.h> | ||||||
|  | #include <asm/fsgsbase.h> | ||||||
| #include <linux/err.h> | #include <linux/err.h> | ||||||
| 
 | 
 | ||||||
| #include "calling.h" | #include "calling.h" | ||||||
| @ -426,10 +427,7 @@ SYM_CODE_START(\asmsym) | |||||||
| 	testb	$3, CS-ORIG_RAX(%rsp) | 	testb	$3, CS-ORIG_RAX(%rsp) | ||||||
| 	jnz	.Lfrom_usermode_switch_stack_\@
 | 	jnz	.Lfrom_usermode_switch_stack_\@
 | ||||||
| 
 | 
 | ||||||
| 	/* | 	/* paranoid_entry returns GS information for paranoid_exit in EBX. */ | ||||||
| 	 * paranoid_entry returns SWAPGS flag for paranoid_exit in EBX. |  | ||||||
| 	 * EBX == 0 -> SWAPGS, EBX == 1 -> no SWAPGS |  | ||||||
| 	 */ |  | ||||||
| 	call	paranoid_entry | 	call	paranoid_entry | ||||||
| 
 | 
 | ||||||
| 	UNWIND_HINT_REGS | 	UNWIND_HINT_REGS | ||||||
| @ -458,10 +456,7 @@ SYM_CODE_START(\asmsym) | |||||||
| 	UNWIND_HINT_IRET_REGS offset=8 | 	UNWIND_HINT_IRET_REGS offset=8 | ||||||
| 	ASM_CLAC | 	ASM_CLAC | ||||||
| 
 | 
 | ||||||
| 	/* | 	/* paranoid_entry returns GS information for paranoid_exit in EBX. */ | ||||||
| 	 * paranoid_entry returns SWAPGS flag for paranoid_exit in EBX. |  | ||||||
| 	 * EBX == 0 -> SWAPGS, EBX == 1 -> no SWAPGS |  | ||||||
| 	 */ |  | ||||||
| 	call	paranoid_entry | 	call	paranoid_entry | ||||||
| 	UNWIND_HINT_REGS | 	UNWIND_HINT_REGS | ||||||
| 
 | 
 | ||||||
| @ -798,9 +793,14 @@ SYM_CODE_END(xen_failsafe_callback) | |||||||
| #endif /* CONFIG_XEN_PV */ | #endif /* CONFIG_XEN_PV */ | ||||||
| 
 | 
 | ||||||
| /* | /* | ||||||
|  * Save all registers in pt_regs, and switch gs if needed. |  * Save all registers in pt_regs. Return GSBASE related information | ||||||
|  * Use slow, but surefire "are we in kernel?" check. |  * in EBX depending on the availability of the FSGSBASE instructions: | ||||||
|  * Return: ebx=0: need swapgs on exit, ebx=1: otherwise |  * | ||||||
|  |  * FSGSBASE	R/EBX | ||||||
|  |  *     N        0 -> SWAPGS on exit | ||||||
|  |  *              1 -> no SWAPGS on exit | ||||||
|  |  * | ||||||
|  |  *     Y        GSBASE value at entry, must be restored in paranoid_exit | ||||||
|  */ |  */ | ||||||
| SYM_CODE_START_LOCAL(paranoid_entry) | SYM_CODE_START_LOCAL(paranoid_entry) | ||||||
| 	UNWIND_HINT_FUNC | 	UNWIND_HINT_FUNC | ||||||
| @ -808,7 +808,6 @@ SYM_CODE_START_LOCAL(paranoid_entry) | |||||||
| 	PUSH_AND_CLEAR_REGS save_ret=1 | 	PUSH_AND_CLEAR_REGS save_ret=1 | ||||||
| 	ENCODE_FRAME_POINTER 8 | 	ENCODE_FRAME_POINTER 8 | ||||||
| 
 | 
 | ||||||
| 1: |  | ||||||
| 	/* | 	/* | ||||||
| 	 * Always stash CR3 in %r14.  This value will be restored, | 	 * Always stash CR3 in %r14.  This value will be restored, | ||||||
| 	 * verbatim, at exit.  Needed if paranoid_entry interrupted | 	 * verbatim, at exit.  Needed if paranoid_entry interrupted | ||||||
| @ -826,6 +825,28 @@ SYM_CODE_START_LOCAL(paranoid_entry) | |||||||
| 	 */ | 	 */ | ||||||
| 	SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 | 	SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 | ||||||
| 
 | 
 | ||||||
|  | 	/* | ||||||
|  | 	 * Handling GSBASE depends on the availability of FSGSBASE. | ||||||
|  | 	 * | ||||||
|  | 	 * Without FSGSBASE the kernel enforces that negative GSBASE | ||||||
|  | 	 * values indicate kernel GSBASE. With FSGSBASE no assumptions | ||||||
|  | 	 * can be made about the GSBASE value when entering from user | ||||||
|  | 	 * space. | ||||||
|  | 	 */ | ||||||
|  | 	ALTERNATIVE "jmp .Lparanoid_entry_checkgs", "", X86_FEATURE_FSGSBASE | ||||||
|  | 
 | ||||||
|  | 	/* | ||||||
|  | 	 * Read the current GSBASE and store it in %rbx unconditionally, | ||||||
|  | 	 * retrieve and set the current CPUs kernel GSBASE. The stored value | ||||||
|  | 	 * has to be restored in paranoid_exit unconditionally. | ||||||
|  | 	 * | ||||||
|  | 	 * The MSR write ensures that no subsequent load is based on a | ||||||
|  | 	 * mispredicted GSBASE. No extra FENCE required. | ||||||
|  | 	 */ | ||||||
|  | 	SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx | ||||||
|  | 	ret | ||||||
|  | 
 | ||||||
|  | .Lparanoid_entry_checkgs: | ||||||
| 	/* EBX = 1 -> kernel GSBASE active, no restore required */ | 	/* EBX = 1 -> kernel GSBASE active, no restore required */ | ||||||
| 	movl	$1, %ebx | 	movl	$1, %ebx | ||||||
| 	/* | 	/* | ||||||
| @ -860,24 +881,45 @@ SYM_CODE_END(paranoid_entry) | |||||||
|  * |  * | ||||||
|  * We may be returning to very strange contexts (e.g. very early |  * We may be returning to very strange contexts (e.g. very early | ||||||
|  * in syscall entry), so checking for preemption here would |  * in syscall entry), so checking for preemption here would | ||||||
|  * be complicated.  Fortunately, we there's no good reason |  * be complicated.  Fortunately, there's no good reason to try | ||||||
|  * to try to handle preemption here. |  * to handle preemption here. | ||||||
|  * |  * | ||||||
|  * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) |  * R/EBX contains the GSBASE related information depending on the | ||||||
|  |  * availability of the FSGSBASE instructions: | ||||||
|  |  * | ||||||
|  |  * FSGSBASE	R/EBX | ||||||
|  |  *     N        0 -> SWAPGS on exit | ||||||
|  |  *              1 -> no SWAPGS on exit | ||||||
|  |  * | ||||||
|  |  *     Y        User space GSBASE, must be restored unconditionally | ||||||
|  */ |  */ | ||||||
| SYM_CODE_START_LOCAL(paranoid_exit) | SYM_CODE_START_LOCAL(paranoid_exit) | ||||||
| 	UNWIND_HINT_REGS | 	UNWIND_HINT_REGS | ||||||
| 	/* If EBX is 0, SWAPGS is required */ | 	/* | ||||||
| 	testl	%ebx, %ebx | 	 * The order of operations is important. RESTORE_CR3 requires | ||||||
| 	jnz	.Lparanoid_exit_no_swapgs | 	 * kernel GSBASE. | ||||||
| 	/* Always restore stashed CR3 value (see paranoid_entry) */ | 	 * | ||||||
| 	RESTORE_CR3	scratch_reg=%rbx save_reg=%r14 | 	 * NB to anyone to try to optimize this code: this code does | ||||||
|  | 	 * not execute at all for exceptions from user mode. Those | ||||||
|  | 	 * exceptions go through error_exit instead. | ||||||
|  | 	 */ | ||||||
|  | 	RESTORE_CR3	scratch_reg=%rax save_reg=%r14 | ||||||
|  | 
 | ||||||
|  | 	/* Handle the three GSBASE cases */ | ||||||
|  | 	ALTERNATIVE "jmp .Lparanoid_exit_checkgs", "", X86_FEATURE_FSGSBASE | ||||||
|  | 
 | ||||||
|  | 	/* With FSGSBASE enabled, unconditionally restore GSBASE */ | ||||||
|  | 	wrgsbase	%rbx | ||||||
|  | 	jmp		restore_regs_and_return_to_kernel | ||||||
|  | 
 | ||||||
|  | .Lparanoid_exit_checkgs: | ||||||
|  | 	/* On non-FSGSBASE systems, conditionally do SWAPGS */ | ||||||
|  | 	testl		%ebx, %ebx | ||||||
|  | 	jnz		restore_regs_and_return_to_kernel | ||||||
|  | 
 | ||||||
|  | 	/* We are returning to a context with user GSBASE */ | ||||||
| 	SWAPGS_UNSAFE_STACK | 	SWAPGS_UNSAFE_STACK | ||||||
| 	jmp	restore_regs_and_return_to_kernel | 	jmp		restore_regs_and_return_to_kernel | ||||||
| .Lparanoid_exit_no_swapgs: |  | ||||||
| 	/* Always restore stashed CR3 value (see paranoid_entry) */ |  | ||||||
| 	RESTORE_CR3	scratch_reg=%rbx save_reg=%r14 |  | ||||||
| 	jmp restore_regs_and_return_to_kernel |  | ||||||
| SYM_CODE_END(paranoid_exit) | SYM_CODE_END(paranoid_exit) | ||||||
| 
 | 
 | ||||||
| /* | /* | ||||||
| @ -1282,10 +1324,27 @@ end_repeat_nmi: | |||||||
| 	/* Always restore stashed CR3 value (see paranoid_entry) */ | 	/* Always restore stashed CR3 value (see paranoid_entry) */ | ||||||
| 	RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 | 	RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 | ||||||
| 
 | 
 | ||||||
| 	testl	%ebx, %ebx			/* swapgs needed? */ | 	/* | ||||||
|  | 	 * The above invocation of paranoid_entry stored the GSBASE | ||||||
|  | 	 * related information in R/EBX depending on the availability | ||||||
|  | 	 * of FSGSBASE. | ||||||
|  | 	 * | ||||||
|  | 	 * If FSGSBASE is enabled, restore the saved GSBASE value | ||||||
|  | 	 * unconditionally, otherwise take the conditional SWAPGS path. | ||||||
|  | 	 */ | ||||||
|  | 	ALTERNATIVE "jmp nmi_no_fsgsbase", "", X86_FEATURE_FSGSBASE | ||||||
|  | 
 | ||||||
|  | 	wrgsbase	%rbx | ||||||
|  | 	jmp	nmi_restore | ||||||
|  | 
 | ||||||
|  | nmi_no_fsgsbase: | ||||||
|  | 	/* EBX == 0 -> invoke SWAPGS */ | ||||||
|  | 	testl	%ebx, %ebx | ||||||
| 	jnz	nmi_restore | 	jnz	nmi_restore | ||||||
|  | 
 | ||||||
| nmi_swapgs: | nmi_swapgs: | ||||||
| 	SWAPGS_UNSAFE_STACK | 	SWAPGS_UNSAFE_STACK | ||||||
|  | 
 | ||||||
| nmi_restore: | nmi_restore: | ||||||
| 	POP_REGS | 	POP_REGS | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Chang S. Bae
						Chang S. Bae