2
0
mirror of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-09-04 20:19:47 +08:00
linux/arch/x86/kernel/relocate_kernel_64.S
Linus Torvalds 5b7f7234ff x86/boot changes for v6.14:
- A large and involved preparatory series to pave the way to add exception
    handling for relocate_kernel - which will be a debugging facility that
    has aided in the field to debug an exceptionally hard to debug early boot bug.
    Plus assorted cleanups and fixes that were discovered along the way,
    by David Woodhouse:
 
       - Clean up and document register use in relocate_kernel_64.S
       - Use named labels in swap_pages in relocate_kernel_64.S
       - Only swap pages for ::preserve_context mode
       - Allocate PGD for x86_64 transition page tables separately
       - Copy control page into place in machine_kexec_prepare()
       - Invoke copy of relocate_kernel() instead of the original
       - Move relocate_kernel to kernel .data section
       - Add data section to relocate_kernel
       - Drop page_list argument from relocate_kernel()
       - Eliminate writes through kernel mapping of relocate_kernel page
       - Clean up register usage in relocate_kernel()
       - Mark relocate_kernel page as ROX instead of RWX
       - Disable global pages before writing to control page
       - Ensure preserve_context flag is set on return to kernel
       - Use correct swap page in swap_pages function
       - Fix stack and handling of re-entry point for ::preserve_context
       - Mark machine_kexec() with __nocfi
       - Cope with relocate_kernel() not being at the start of the page
       - Use typedef for relocate_kernel_fn function prototype
       - Fix location of relocate_kernel with -ffunction-sections (fix by Nathan Chancellor)
 
  - A series to remove the last remaining absolute symbol references from
    .head.text, and enforce this at build time, by Ard Biesheuvel:
 
       - Avoid WARN()s and panic()s in early boot code
       - Don't hang but terminate on failure to remap SVSM CA
       - Determine VA/PA offset before entering C code
       - Avoid intentional absolute symbol references in .head.text
       - Disable UBSAN in early boot code
       - Move ENTRY_TEXT to the start of the image
       - Move .head.text into its own output section
       - Reject absolute references in .head.text
 
  - Which build-time enforcement uncovered a handful of bugs of essentially
    non-working code, and a wrokaround for a toolchain bug, fixed by
    Ard Biesheuvel as well:
 
       - Fix spurious undefined reference when CONFIG_X86_5LEVEL=n, on GCC-12
       - Disable UBSAN on SEV code that may execute very early
       - Disable ftrace branch profiling in SEV startup code
 
  - And miscellaneous cleanups:
 
        - kexec_core: Add and update comments regarding the KEXEC_JUMP flow (Rafael J. Wysocki)
        - x86/sysfs: Constify 'struct bin_attribute' (Thomas Weißschuh)
 
 Signed-off-by: Ingo Molnar <mingo@kernel.org>
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmeQDmURHG1pbmdvQGtl
 cm5lbC5vcmcACgkQEnMQ0APhK1inwRAAjD5QR/Yu7Yiv2nM/ncUwAItsFkv9Jk4Y
 HPGz9qNJoZxKxuZVj9bfQhWDe3g6VLnlDYgatht9BsyP5b12qZrUe+yp/TOH54Z3
 wPD+U/jun4jiSr7oJkJC+bFn+a/tL39pB8Y6m+jblacgVglleO3SH5fBWNE1UbIV
 e2iiNxi0bfuHy3wquegnKaMyF1e7YLw1p5laGSwwk21g5FjT7cLQOqC0/9u8u9xX
 Ha+iaod7JOcjiQOqIt/MV57ldWEFCrUhQozRV3tK5Ptf5aoGFpisgQoRoduWUtFz
 UbHiHhv6zE4DOIUzaAbJjYfR1Z/LCviwON97XJgeOOkJaULF7yFCfhGxKSyQoMIh
 qZtlBs4VsGl2/dOl+iW6xKwgRiNundTzSQtt5D/xuFz5LnDxe/SrlZnYp8lOPP8R
 w9V2b/fC0YxmUzEW6EDhBqvfuScKiNWoic47qvYfZPaWyg1ESpvWTIh6AKB5ThUR
 upgJQdA4HW+y5C57uHW40TSe3xEeqM3+Slk0jxLElP7/yTul5r7jrjq2EkwaAv/j
 6/0LsMSr33r9fVFeMP1qLXPUaipcqTWWTpeeTr8NBGUcvOKzw5SltEG4NihzCyhF
 3/UMQhcQ6KE3iFMPlRu4hV7ZV4gErZmLoRwh9Uk28f2Xx8T95uoV8KTg1/sRZRTo
 uQLeRxYnyrw=
 =vGWS
 -----END PGP SIGNATURE-----

Merge tag 'x86-boot-2025-01-21' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 boot updates from Ingo Molnar:

 - A large and involved preparatory series to pave the way to add
   exception handling for relocate_kernel - which will be a debugging
   facility that has aided in the field to debug an exceptionally hard
   to debug early boot bug. Plus assorted cleanups and fixes that were
   discovered along the way, by David Woodhouse:

      - Clean up and document register use in relocate_kernel_64.S
      - Use named labels in swap_pages in relocate_kernel_64.S
      - Only swap pages for ::preserve_context mode
      - Allocate PGD for x86_64 transition page tables separately
      - Copy control page into place in machine_kexec_prepare()
      - Invoke copy of relocate_kernel() instead of the original
      - Move relocate_kernel to kernel .data section
      - Add data section to relocate_kernel
      - Drop page_list argument from relocate_kernel()
      - Eliminate writes through kernel mapping of relocate_kernel page
      - Clean up register usage in relocate_kernel()
      - Mark relocate_kernel page as ROX instead of RWX
      - Disable global pages before writing to control page
      - Ensure preserve_context flag is set on return to kernel
      - Use correct swap page in swap_pages function
      - Fix stack and handling of re-entry point for ::preserve_context
      - Mark machine_kexec() with __nocfi
      - Cope with relocate_kernel() not being at the start of the page
      - Use typedef for relocate_kernel_fn function prototype
      - Fix location of relocate_kernel with -ffunction-sections (fix by Nathan Chancellor)

 - A series to remove the last remaining absolute symbol references from
   .head.text, and enforce this at build time, by Ard Biesheuvel:

      - Avoid WARN()s and panic()s in early boot code
      - Don't hang but terminate on failure to remap SVSM CA
      - Determine VA/PA offset before entering C code
      - Avoid intentional absolute symbol references in .head.text
      - Disable UBSAN in early boot code
      - Move ENTRY_TEXT to the start of the image
      - Move .head.text into its own output section
      - Reject absolute references in .head.text

 - The above build-time enforcement uncovered a handful of bugs of
   essentially non-working code, and a wrokaround for a toolchain bug,
   fixed by Ard Biesheuvel as well:

      - Fix spurious undefined reference when CONFIG_X86_5LEVEL=n, on GCC-12
      - Disable UBSAN on SEV code that may execute very early
      - Disable ftrace branch profiling in SEV startup code

 - And miscellaneous cleanups:

      - kexec_core: Add and update comments regarding the KEXEC_JUMP flow (Rafael J. Wysocki)
      - x86/sysfs: Constify 'struct bin_attribute' (Thomas Weißschuh)"

* tag 'x86-boot-2025-01-21' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (33 commits)
  x86/sev: Disable ftrace branch profiling in SEV startup code
  x86/kexec: Use typedef for relocate_kernel_fn function prototype
  x86/kexec: Cope with relocate_kernel() not being at the start of the page
  kexec_core: Add and update comments regarding the KEXEC_JUMP flow
  x86/kexec: Mark machine_kexec() with __nocfi
  x86/kexec: Fix location of relocate_kernel with -ffunction-sections
  x86/kexec: Fix stack and handling of re-entry point for ::preserve_context
  x86/kexec: Use correct swap page in swap_pages function
  x86/kexec: Ensure preserve_context flag is set on return to kernel
  x86/kexec: Disable global pages before writing to control page
  x86/sev: Don't hang but terminate on failure to remap SVSM CA
  x86/sev: Disable UBSAN on SEV code that may execute very early
  x86/boot/64: Fix spurious undefined reference when CONFIG_X86_5LEVEL=n, on GCC-12
  x86/sysfs: Constify 'struct bin_attribute'
  x86/kexec: Mark relocate_kernel page as ROX instead of RWX
  x86/kexec: Clean up register usage in relocate_kernel()
  x86/kexec: Eliminate writes through kernel mapping of relocate_kernel page
  x86/kexec: Drop page_list argument from relocate_kernel()
  x86/kexec: Add data section to relocate_kernel
  x86/kexec: Move relocate_kernel to kernel .data section
  ...
2025-01-24 05:54:26 -08:00

344 lines
7.9 KiB
ArmAsm

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* relocate_kernel.S - put the kernel image in place to boot
* Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
*/
#include <linux/linkage.h>
#include <linux/stringify.h>
#include <asm/alternative.h>
#include <asm/page_types.h>
#include <asm/kexec.h>
#include <asm/processor-flags.h>
#include <asm/pgtable_types.h>
#include <asm/nospec-branch.h>
#include <asm/unwind_hints.h>
#include <asm/asm-offsets.h>
/*
* Must be relocatable PIC code callable as a C function, in particular
* there must be a plain RET and not jump to return thunk.
*/
#define PTR(x) (x << 3)
#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
/*
* The .text..relocate_kernel and .data..relocate_kernel sections are copied
* into the control page, and the remainder of the page is used as the stack.
*/
.section .data..relocate_kernel,"a";
/* Minimal CPU state */
SYM_DATA_LOCAL(saved_rsp, .quad 0)
SYM_DATA_LOCAL(saved_cr0, .quad 0)
SYM_DATA_LOCAL(saved_cr3, .quad 0)
SYM_DATA_LOCAL(saved_cr4, .quad 0)
/* other data */
SYM_DATA(kexec_va_control_page, .quad 0)
SYM_DATA(kexec_pa_table_page, .quad 0)
SYM_DATA(kexec_pa_swap_page, .quad 0)
SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0)
.section .text..relocate_kernel,"ax";
.code64
SYM_CODE_START_NOALIGN(relocate_kernel)
UNWIND_HINT_END_OF_STACK
ANNOTATE_NOENDBR
/*
* %rdi indirection_page
* %rsi pa_control_page
* %rdx start address
* %rcx preserve_context
* %r8 host_mem_enc_active
*/
/* Save the CPU context, used for jumping back */
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushf
/* zero out flags, and disable interrupts */
pushq $0
popfq
/* Switch to the identity mapped page tables */
movq %cr3, %rax
movq kexec_pa_table_page(%rip), %r9
movq %r9, %cr3
/* Leave CR4 in %r13 to enable the right paging mode later. */
movq %cr4, %r13
/* Disable global pages immediately to ensure this mapping is RWX */
movq %r13, %r12
andq $~(X86_CR4_PGE), %r12
movq %r12, %cr4
/* Save %rsp and CRs. */
movq %r13, saved_cr4(%rip)
movq %rsp, saved_rsp(%rip)
movq %rax, saved_cr3(%rip)
movq %cr0, %rax
movq %rax, saved_cr0(%rip)
/* save indirection list for jumping back */
movq %rdi, pa_backup_pages_map(%rip)
/* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */
movq %rcx, %r11
/* setup a new stack at the end of the physical control page */
lea PAGE_SIZE(%rsi), %rsp
/* jump to identity mapped page */
0: addq $identity_mapped - 0b, %rsi
subq $__relocate_kernel_start - 0b, %rsi
ANNOTATE_RETPOLINE_SAFE
jmp *%rsi
SYM_CODE_END(relocate_kernel)
SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
UNWIND_HINT_END_OF_STACK
/*
* %rdi indirection page
* %rdx start address
* %r8 host_mem_enc_active
* %r9 page table page
* %r11 preserve_context
* %r13 original CR4 when relocate_kernel() was invoked
*/
/* store the start address on the stack */
pushq %rdx
/*
* Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP
* below.
*/
movq %cr4, %rax
andq $~(X86_CR4_CET), %rax
movq %rax, %cr4
/*
* Set cr0 to a known state:
* - Paging enabled
* - Alignment check disabled
* - Write protect disabled
* - No task switch
* - Don't do FP software emulation.
* - Protected mode enabled
*/
movq %cr0, %rax
andq $~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax
orl $(X86_CR0_PG | X86_CR0_PE), %eax
movq %rax, %cr0
/*
* Set cr4 to a known state:
* - physical address extension enabled
* - 5-level paging, if it was enabled before
* - Machine check exception on TDX guest, if it was enabled before.
* Clearing MCE might not be allowed in TDX guests, depending on setup.
*
* Use R13 that contains the original CR4 value, read in relocate_kernel().
* PAE is always set in the original CR4.
*/
andl $(X86_CR4_PAE | X86_CR4_LA57), %r13d
ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST
movq %r13, %cr4
/* Flush the TLB (needed?) */
movq %r9, %cr3
/*
* If SME is active, there could be old encrypted cache line
* entries that will conflict with the now unencrypted memory
* used by kexec. Flush the caches before copying the kernel.
*/
testq %r8, %r8
jz .Lsme_off
wbinvd
.Lsme_off:
call swap_pages
/*
* To be certain of avoiding problems with self-modifying code
* I need to execute a serializing instruction here.
* So I flush the TLB by reloading %cr3 here, it's handy,
* and not processor dependent.
*/
movq %cr3, %rax
movq %rax, %cr3
testq %r11, %r11 /* preserve_context */
jnz .Lrelocate
/*
* set all of the registers to known values
* leave %rsp alone
*/
xorl %eax, %eax
xorl %ebx, %ebx
xorl %ecx, %ecx
xorl %edx, %edx
xorl %esi, %esi
xorl %edi, %edi
xorl %ebp, %ebp
xorl %r8d, %r8d
xorl %r9d, %r9d
xorl %r10d, %r10d
xorl %r11d, %r11d
xorl %r12d, %r12d
xorl %r13d, %r13d
xorl %r14d, %r14d
xorl %r15d, %r15d
ANNOTATE_UNRET_SAFE
ret
int3
.Lrelocate:
popq %rdx
/* Use the swap page for the callee's stack */
movq kexec_pa_swap_page(%rip), %r10
leaq PAGE_SIZE(%r10), %rsp
/* push the existing entry point onto the callee's stack */
pushq %rdx
ANNOTATE_RETPOLINE_SAFE
call *%rdx
/* get the re-entry point of the peer system */
popq %rbp
movq kexec_pa_swap_page(%rip), %r10
movq pa_backup_pages_map(%rip), %rdi
movq kexec_pa_table_page(%rip), %rax
movq %rax, %cr3
/* Find start (and end) of this physical mapping of control page */
leaq (%rip), %r8
ANNOTATE_NOENDBR
andq $PAGE_MASK, %r8
lea PAGE_SIZE(%r8), %rsp
movl $1, %r11d /* Ensure preserve_context flag is set */
call swap_pages
movq kexec_va_control_page(%rip), %rax
0: addq $virtual_mapped - 0b, %rax
subq $__relocate_kernel_start - 0b, %rax
pushq %rax
ANNOTATE_UNRET_SAFE
ret
int3
SYM_CODE_END(identity_mapped)
SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
UNWIND_HINT_END_OF_STACK
ANNOTATE_NOENDBR // RET target, above
movq saved_rsp(%rip), %rsp
movq saved_cr4(%rip), %rax
movq %rax, %cr4
movq saved_cr3(%rip), %rax
movq saved_cr0(%rip), %r8
movq %rax, %cr3
movq %r8, %cr0
#ifdef CONFIG_KEXEC_JUMP
/* Saved in save_processor_state. */
movq $saved_context, %rax
lgdt saved_context_gdt_desc(%rax)
#endif
/* relocate_kernel() returns the re-entry point for next time */
movq %rbp, %rax
popf
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbp
popq %rbx
ANNOTATE_UNRET_SAFE
ret
int3
SYM_CODE_END(virtual_mapped)
/* Do the copies */
SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
UNWIND_HINT_END_OF_STACK
/*
* %rdi indirection page
* %r11 preserve_context
*/
movq %rdi, %rcx /* Put the indirection_page in %rcx */
xorl %edi, %edi
xorl %esi, %esi
jmp .Lstart /* Should start with an indirection record */
.Lloop: /* top, read another word for the indirection page */
movq (%rbx), %rcx
addq $8, %rbx
.Lstart:
testb $0x1, %cl /* is it a destination page? */
jz .Lnotdest
movq %rcx, %rdi
andq $0xfffffffffffff000, %rdi
jmp .Lloop
.Lnotdest:
testb $0x2, %cl /* is it an indirection page? */
jz .Lnotind
movq %rcx, %rbx
andq $0xfffffffffffff000, %rbx
jmp .Lloop
.Lnotind:
testb $0x4, %cl /* is it the done indicator? */
jz .Lnotdone
jmp .Ldone
.Lnotdone:
testb $0x8, %cl /* is it the source indicator? */
jz .Lloop /* Ignore it otherwise */
movq %rcx, %rsi /* For ever source page do a copy */
andq $0xfffffffffffff000, %rsi
movq %rdi, %rdx /* Save destination page to %rdx */
movq %rsi, %rax /* Save source page to %rax */
testq %r11, %r11 /* Only actually swap for ::preserve_context */
jz .Lnoswap
/* copy source page to swap page */
movq kexec_pa_swap_page(%rip), %rdi
movl $512, %ecx
rep ; movsq
/* copy destination page to source page */
movq %rax, %rdi
movq %rdx, %rsi
movl $512, %ecx
rep ; movsq
/* copy swap page to destination page */
movq %rdx, %rdi
movq kexec_pa_swap_page(%rip), %rsi
.Lnoswap:
movl $512, %ecx
rep ; movsq
lea PAGE_SIZE(%rax), %rsi
jmp .Lloop
.Ldone:
ANNOTATE_UNRET_SAFE
ret
int3
SYM_CODE_END(swap_pages)