mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-04 20:19:47 +08:00

- A large and involved preparatory series to pave the way to add exception handling for relocate_kernel - which will be a debugging facility that has aided in the field to debug an exceptionally hard to debug early boot bug. Plus assorted cleanups and fixes that were discovered along the way, by David Woodhouse: - Clean up and document register use in relocate_kernel_64.S - Use named labels in swap_pages in relocate_kernel_64.S - Only swap pages for ::preserve_context mode - Allocate PGD for x86_64 transition page tables separately - Copy control page into place in machine_kexec_prepare() - Invoke copy of relocate_kernel() instead of the original - Move relocate_kernel to kernel .data section - Add data section to relocate_kernel - Drop page_list argument from relocate_kernel() - Eliminate writes through kernel mapping of relocate_kernel page - Clean up register usage in relocate_kernel() - Mark relocate_kernel page as ROX instead of RWX - Disable global pages before writing to control page - Ensure preserve_context flag is set on return to kernel - Use correct swap page in swap_pages function - Fix stack and handling of re-entry point for ::preserve_context - Mark machine_kexec() with __nocfi - Cope with relocate_kernel() not being at the start of the page - Use typedef for relocate_kernel_fn function prototype - Fix location of relocate_kernel with -ffunction-sections (fix by Nathan Chancellor) - A series to remove the last remaining absolute symbol references from .head.text, and enforce this at build time, by Ard Biesheuvel: - Avoid WARN()s and panic()s in early boot code - Don't hang but terminate on failure to remap SVSM CA - Determine VA/PA offset before entering C code - Avoid intentional absolute symbol references in .head.text - Disable UBSAN in early boot code - Move ENTRY_TEXT to the start of the image - Move .head.text into its own output section - Reject absolute references in .head.text - Which build-time enforcement uncovered a handful of bugs of essentially non-working code, and a wrokaround for a toolchain bug, fixed by Ard Biesheuvel as well: - Fix spurious undefined reference when CONFIG_X86_5LEVEL=n, on GCC-12 - Disable UBSAN on SEV code that may execute very early - Disable ftrace branch profiling in SEV startup code - And miscellaneous cleanups: - kexec_core: Add and update comments regarding the KEXEC_JUMP flow (Rafael J. Wysocki) - x86/sysfs: Constify 'struct bin_attribute' (Thomas Weißschuh) Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmeQDmURHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1inwRAAjD5QR/Yu7Yiv2nM/ncUwAItsFkv9Jk4Y HPGz9qNJoZxKxuZVj9bfQhWDe3g6VLnlDYgatht9BsyP5b12qZrUe+yp/TOH54Z3 wPD+U/jun4jiSr7oJkJC+bFn+a/tL39pB8Y6m+jblacgVglleO3SH5fBWNE1UbIV e2iiNxi0bfuHy3wquegnKaMyF1e7YLw1p5laGSwwk21g5FjT7cLQOqC0/9u8u9xX Ha+iaod7JOcjiQOqIt/MV57ldWEFCrUhQozRV3tK5Ptf5aoGFpisgQoRoduWUtFz UbHiHhv6zE4DOIUzaAbJjYfR1Z/LCviwON97XJgeOOkJaULF7yFCfhGxKSyQoMIh qZtlBs4VsGl2/dOl+iW6xKwgRiNundTzSQtt5D/xuFz5LnDxe/SrlZnYp8lOPP8R w9V2b/fC0YxmUzEW6EDhBqvfuScKiNWoic47qvYfZPaWyg1ESpvWTIh6AKB5ThUR upgJQdA4HW+y5C57uHW40TSe3xEeqM3+Slk0jxLElP7/yTul5r7jrjq2EkwaAv/j 6/0LsMSr33r9fVFeMP1qLXPUaipcqTWWTpeeTr8NBGUcvOKzw5SltEG4NihzCyhF 3/UMQhcQ6KE3iFMPlRu4hV7ZV4gErZmLoRwh9Uk28f2Xx8T95uoV8KTg1/sRZRTo uQLeRxYnyrw= =vGWS -----END PGP SIGNATURE----- Merge tag 'x86-boot-2025-01-21' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 boot updates from Ingo Molnar: - A large and involved preparatory series to pave the way to add exception handling for relocate_kernel - which will be a debugging facility that has aided in the field to debug an exceptionally hard to debug early boot bug. Plus assorted cleanups and fixes that were discovered along the way, by David Woodhouse: - Clean up and document register use in relocate_kernel_64.S - Use named labels in swap_pages in relocate_kernel_64.S - Only swap pages for ::preserve_context mode - Allocate PGD for x86_64 transition page tables separately - Copy control page into place in machine_kexec_prepare() - Invoke copy of relocate_kernel() instead of the original - Move relocate_kernel to kernel .data section - Add data section to relocate_kernel - Drop page_list argument from relocate_kernel() - Eliminate writes through kernel mapping of relocate_kernel page - Clean up register usage in relocate_kernel() - Mark relocate_kernel page as ROX instead of RWX - Disable global pages before writing to control page - Ensure preserve_context flag is set on return to kernel - Use correct swap page in swap_pages function - Fix stack and handling of re-entry point for ::preserve_context - Mark machine_kexec() with __nocfi - Cope with relocate_kernel() not being at the start of the page - Use typedef for relocate_kernel_fn function prototype - Fix location of relocate_kernel with -ffunction-sections (fix by Nathan Chancellor) - A series to remove the last remaining absolute symbol references from .head.text, and enforce this at build time, by Ard Biesheuvel: - Avoid WARN()s and panic()s in early boot code - Don't hang but terminate on failure to remap SVSM CA - Determine VA/PA offset before entering C code - Avoid intentional absolute symbol references in .head.text - Disable UBSAN in early boot code - Move ENTRY_TEXT to the start of the image - Move .head.text into its own output section - Reject absolute references in .head.text - The above build-time enforcement uncovered a handful of bugs of essentially non-working code, and a wrokaround for a toolchain bug, fixed by Ard Biesheuvel as well: - Fix spurious undefined reference when CONFIG_X86_5LEVEL=n, on GCC-12 - Disable UBSAN on SEV code that may execute very early - Disable ftrace branch profiling in SEV startup code - And miscellaneous cleanups: - kexec_core: Add and update comments regarding the KEXEC_JUMP flow (Rafael J. Wysocki) - x86/sysfs: Constify 'struct bin_attribute' (Thomas Weißschuh)" * tag 'x86-boot-2025-01-21' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (33 commits) x86/sev: Disable ftrace branch profiling in SEV startup code x86/kexec: Use typedef for relocate_kernel_fn function prototype x86/kexec: Cope with relocate_kernel() not being at the start of the page kexec_core: Add and update comments regarding the KEXEC_JUMP flow x86/kexec: Mark machine_kexec() with __nocfi x86/kexec: Fix location of relocate_kernel with -ffunction-sections x86/kexec: Fix stack and handling of re-entry point for ::preserve_context x86/kexec: Use correct swap page in swap_pages function x86/kexec: Ensure preserve_context flag is set on return to kernel x86/kexec: Disable global pages before writing to control page x86/sev: Don't hang but terminate on failure to remap SVSM CA x86/sev: Disable UBSAN on SEV code that may execute very early x86/boot/64: Fix spurious undefined reference when CONFIG_X86_5LEVEL=n, on GCC-12 x86/sysfs: Constify 'struct bin_attribute' x86/kexec: Mark relocate_kernel page as ROX instead of RWX x86/kexec: Clean up register usage in relocate_kernel() x86/kexec: Eliminate writes through kernel mapping of relocate_kernel page x86/kexec: Drop page_list argument from relocate_kernel() x86/kexec: Add data section to relocate_kernel x86/kexec: Move relocate_kernel to kernel .data section ...
344 lines
7.9 KiB
ArmAsm
344 lines
7.9 KiB
ArmAsm
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* relocate_kernel.S - put the kernel image in place to boot
|
|
* Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
|
|
*/
|
|
|
|
#include <linux/linkage.h>
|
|
#include <linux/stringify.h>
|
|
#include <asm/alternative.h>
|
|
#include <asm/page_types.h>
|
|
#include <asm/kexec.h>
|
|
#include <asm/processor-flags.h>
|
|
#include <asm/pgtable_types.h>
|
|
#include <asm/nospec-branch.h>
|
|
#include <asm/unwind_hints.h>
|
|
#include <asm/asm-offsets.h>
|
|
|
|
/*
|
|
* Must be relocatable PIC code callable as a C function, in particular
|
|
* there must be a plain RET and not jump to return thunk.
|
|
*/
|
|
|
|
#define PTR(x) (x << 3)
|
|
#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
|
|
|
|
/*
|
|
* The .text..relocate_kernel and .data..relocate_kernel sections are copied
|
|
* into the control page, and the remainder of the page is used as the stack.
|
|
*/
|
|
|
|
.section .data..relocate_kernel,"a";
|
|
/* Minimal CPU state */
|
|
SYM_DATA_LOCAL(saved_rsp, .quad 0)
|
|
SYM_DATA_LOCAL(saved_cr0, .quad 0)
|
|
SYM_DATA_LOCAL(saved_cr3, .quad 0)
|
|
SYM_DATA_LOCAL(saved_cr4, .quad 0)
|
|
/* other data */
|
|
SYM_DATA(kexec_va_control_page, .quad 0)
|
|
SYM_DATA(kexec_pa_table_page, .quad 0)
|
|
SYM_DATA(kexec_pa_swap_page, .quad 0)
|
|
SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0)
|
|
|
|
.section .text..relocate_kernel,"ax";
|
|
.code64
|
|
SYM_CODE_START_NOALIGN(relocate_kernel)
|
|
UNWIND_HINT_END_OF_STACK
|
|
ANNOTATE_NOENDBR
|
|
/*
|
|
* %rdi indirection_page
|
|
* %rsi pa_control_page
|
|
* %rdx start address
|
|
* %rcx preserve_context
|
|
* %r8 host_mem_enc_active
|
|
*/
|
|
|
|
/* Save the CPU context, used for jumping back */
|
|
pushq %rbx
|
|
pushq %rbp
|
|
pushq %r12
|
|
pushq %r13
|
|
pushq %r14
|
|
pushq %r15
|
|
pushf
|
|
|
|
/* zero out flags, and disable interrupts */
|
|
pushq $0
|
|
popfq
|
|
|
|
/* Switch to the identity mapped page tables */
|
|
movq %cr3, %rax
|
|
movq kexec_pa_table_page(%rip), %r9
|
|
movq %r9, %cr3
|
|
|
|
/* Leave CR4 in %r13 to enable the right paging mode later. */
|
|
movq %cr4, %r13
|
|
|
|
/* Disable global pages immediately to ensure this mapping is RWX */
|
|
movq %r13, %r12
|
|
andq $~(X86_CR4_PGE), %r12
|
|
movq %r12, %cr4
|
|
|
|
/* Save %rsp and CRs. */
|
|
movq %r13, saved_cr4(%rip)
|
|
movq %rsp, saved_rsp(%rip)
|
|
movq %rax, saved_cr3(%rip)
|
|
movq %cr0, %rax
|
|
movq %rax, saved_cr0(%rip)
|
|
|
|
/* save indirection list for jumping back */
|
|
movq %rdi, pa_backup_pages_map(%rip)
|
|
|
|
/* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */
|
|
movq %rcx, %r11
|
|
|
|
/* setup a new stack at the end of the physical control page */
|
|
lea PAGE_SIZE(%rsi), %rsp
|
|
|
|
/* jump to identity mapped page */
|
|
0: addq $identity_mapped - 0b, %rsi
|
|
subq $__relocate_kernel_start - 0b, %rsi
|
|
ANNOTATE_RETPOLINE_SAFE
|
|
jmp *%rsi
|
|
SYM_CODE_END(relocate_kernel)
|
|
|
|
SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
|
|
UNWIND_HINT_END_OF_STACK
|
|
/*
|
|
* %rdi indirection page
|
|
* %rdx start address
|
|
* %r8 host_mem_enc_active
|
|
* %r9 page table page
|
|
* %r11 preserve_context
|
|
* %r13 original CR4 when relocate_kernel() was invoked
|
|
*/
|
|
|
|
/* store the start address on the stack */
|
|
pushq %rdx
|
|
|
|
/*
|
|
* Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP
|
|
* below.
|
|
*/
|
|
movq %cr4, %rax
|
|
andq $~(X86_CR4_CET), %rax
|
|
movq %rax, %cr4
|
|
|
|
/*
|
|
* Set cr0 to a known state:
|
|
* - Paging enabled
|
|
* - Alignment check disabled
|
|
* - Write protect disabled
|
|
* - No task switch
|
|
* - Don't do FP software emulation.
|
|
* - Protected mode enabled
|
|
*/
|
|
movq %cr0, %rax
|
|
andq $~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax
|
|
orl $(X86_CR0_PG | X86_CR0_PE), %eax
|
|
movq %rax, %cr0
|
|
|
|
/*
|
|
* Set cr4 to a known state:
|
|
* - physical address extension enabled
|
|
* - 5-level paging, if it was enabled before
|
|
* - Machine check exception on TDX guest, if it was enabled before.
|
|
* Clearing MCE might not be allowed in TDX guests, depending on setup.
|
|
*
|
|
* Use R13 that contains the original CR4 value, read in relocate_kernel().
|
|
* PAE is always set in the original CR4.
|
|
*/
|
|
andl $(X86_CR4_PAE | X86_CR4_LA57), %r13d
|
|
ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST
|
|
movq %r13, %cr4
|
|
|
|
/* Flush the TLB (needed?) */
|
|
movq %r9, %cr3
|
|
|
|
/*
|
|
* If SME is active, there could be old encrypted cache line
|
|
* entries that will conflict with the now unencrypted memory
|
|
* used by kexec. Flush the caches before copying the kernel.
|
|
*/
|
|
testq %r8, %r8
|
|
jz .Lsme_off
|
|
wbinvd
|
|
.Lsme_off:
|
|
|
|
call swap_pages
|
|
|
|
/*
|
|
* To be certain of avoiding problems with self-modifying code
|
|
* I need to execute a serializing instruction here.
|
|
* So I flush the TLB by reloading %cr3 here, it's handy,
|
|
* and not processor dependent.
|
|
*/
|
|
movq %cr3, %rax
|
|
movq %rax, %cr3
|
|
|
|
testq %r11, %r11 /* preserve_context */
|
|
jnz .Lrelocate
|
|
|
|
/*
|
|
* set all of the registers to known values
|
|
* leave %rsp alone
|
|
*/
|
|
|
|
xorl %eax, %eax
|
|
xorl %ebx, %ebx
|
|
xorl %ecx, %ecx
|
|
xorl %edx, %edx
|
|
xorl %esi, %esi
|
|
xorl %edi, %edi
|
|
xorl %ebp, %ebp
|
|
xorl %r8d, %r8d
|
|
xorl %r9d, %r9d
|
|
xorl %r10d, %r10d
|
|
xorl %r11d, %r11d
|
|
xorl %r12d, %r12d
|
|
xorl %r13d, %r13d
|
|
xorl %r14d, %r14d
|
|
xorl %r15d, %r15d
|
|
|
|
ANNOTATE_UNRET_SAFE
|
|
ret
|
|
int3
|
|
|
|
.Lrelocate:
|
|
popq %rdx
|
|
|
|
/* Use the swap page for the callee's stack */
|
|
movq kexec_pa_swap_page(%rip), %r10
|
|
leaq PAGE_SIZE(%r10), %rsp
|
|
|
|
/* push the existing entry point onto the callee's stack */
|
|
pushq %rdx
|
|
|
|
ANNOTATE_RETPOLINE_SAFE
|
|
call *%rdx
|
|
|
|
/* get the re-entry point of the peer system */
|
|
popq %rbp
|
|
movq kexec_pa_swap_page(%rip), %r10
|
|
movq pa_backup_pages_map(%rip), %rdi
|
|
movq kexec_pa_table_page(%rip), %rax
|
|
movq %rax, %cr3
|
|
|
|
/* Find start (and end) of this physical mapping of control page */
|
|
leaq (%rip), %r8
|
|
ANNOTATE_NOENDBR
|
|
andq $PAGE_MASK, %r8
|
|
lea PAGE_SIZE(%r8), %rsp
|
|
movl $1, %r11d /* Ensure preserve_context flag is set */
|
|
call swap_pages
|
|
movq kexec_va_control_page(%rip), %rax
|
|
0: addq $virtual_mapped - 0b, %rax
|
|
subq $__relocate_kernel_start - 0b, %rax
|
|
pushq %rax
|
|
ANNOTATE_UNRET_SAFE
|
|
ret
|
|
int3
|
|
SYM_CODE_END(identity_mapped)
|
|
|
|
SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
|
|
UNWIND_HINT_END_OF_STACK
|
|
ANNOTATE_NOENDBR // RET target, above
|
|
movq saved_rsp(%rip), %rsp
|
|
movq saved_cr4(%rip), %rax
|
|
movq %rax, %cr4
|
|
movq saved_cr3(%rip), %rax
|
|
movq saved_cr0(%rip), %r8
|
|
movq %rax, %cr3
|
|
movq %r8, %cr0
|
|
|
|
#ifdef CONFIG_KEXEC_JUMP
|
|
/* Saved in save_processor_state. */
|
|
movq $saved_context, %rax
|
|
lgdt saved_context_gdt_desc(%rax)
|
|
#endif
|
|
|
|
/* relocate_kernel() returns the re-entry point for next time */
|
|
movq %rbp, %rax
|
|
|
|
popf
|
|
popq %r15
|
|
popq %r14
|
|
popq %r13
|
|
popq %r12
|
|
popq %rbp
|
|
popq %rbx
|
|
ANNOTATE_UNRET_SAFE
|
|
ret
|
|
int3
|
|
SYM_CODE_END(virtual_mapped)
|
|
|
|
/* Do the copies */
|
|
SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
|
|
UNWIND_HINT_END_OF_STACK
|
|
/*
|
|
* %rdi indirection page
|
|
* %r11 preserve_context
|
|
*/
|
|
movq %rdi, %rcx /* Put the indirection_page in %rcx */
|
|
xorl %edi, %edi
|
|
xorl %esi, %esi
|
|
jmp .Lstart /* Should start with an indirection record */
|
|
|
|
.Lloop: /* top, read another word for the indirection page */
|
|
|
|
movq (%rbx), %rcx
|
|
addq $8, %rbx
|
|
.Lstart:
|
|
testb $0x1, %cl /* is it a destination page? */
|
|
jz .Lnotdest
|
|
movq %rcx, %rdi
|
|
andq $0xfffffffffffff000, %rdi
|
|
jmp .Lloop
|
|
.Lnotdest:
|
|
testb $0x2, %cl /* is it an indirection page? */
|
|
jz .Lnotind
|
|
movq %rcx, %rbx
|
|
andq $0xfffffffffffff000, %rbx
|
|
jmp .Lloop
|
|
.Lnotind:
|
|
testb $0x4, %cl /* is it the done indicator? */
|
|
jz .Lnotdone
|
|
jmp .Ldone
|
|
.Lnotdone:
|
|
testb $0x8, %cl /* is it the source indicator? */
|
|
jz .Lloop /* Ignore it otherwise */
|
|
movq %rcx, %rsi /* For ever source page do a copy */
|
|
andq $0xfffffffffffff000, %rsi
|
|
|
|
movq %rdi, %rdx /* Save destination page to %rdx */
|
|
movq %rsi, %rax /* Save source page to %rax */
|
|
|
|
testq %r11, %r11 /* Only actually swap for ::preserve_context */
|
|
jz .Lnoswap
|
|
|
|
/* copy source page to swap page */
|
|
movq kexec_pa_swap_page(%rip), %rdi
|
|
movl $512, %ecx
|
|
rep ; movsq
|
|
|
|
/* copy destination page to source page */
|
|
movq %rax, %rdi
|
|
movq %rdx, %rsi
|
|
movl $512, %ecx
|
|
rep ; movsq
|
|
|
|
/* copy swap page to destination page */
|
|
movq %rdx, %rdi
|
|
movq kexec_pa_swap_page(%rip), %rsi
|
|
.Lnoswap:
|
|
movl $512, %ecx
|
|
rep ; movsq
|
|
|
|
lea PAGE_SIZE(%rax), %rsi
|
|
jmp .Lloop
|
|
.Ldone:
|
|
ANNOTATE_UNRET_SAFE
|
|
ret
|
|
int3
|
|
SYM_CODE_END(swap_pages)
|