mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-04 20:19:47 +08:00

The set_p4d() and set_pgd() functions (in 4-level or 5-level page table setups respectively) assume that the root page table is actually a 8KiB allocation, with the userspace root immediately after the kernel root page table (so that the former can enforce NX on on all the subordinate page tables, which are actually shared). However, users of the kernel_ident_mapping_init() code do not give it an 8KiB allocation for its PGD. Both swsusp_arch_resume() and acpi_mp_setup_reset() allocate only a single 4KiB page. The kexec code on x86_64 currently gets away with it purely by chance, because it allocates 8KiB for its "control code page" and then actually uses the first half for the PGD, then copies the actual trampoline code into the second half only after the identmap code has finished scribbling over it. Fix this by defining a _PAGE_NOPTISHADOW bit (which can use the same bit as _PAGE_SAVED_DIRTY since one is only for the PGD/P4D root and the other is exclusively for leaf PTEs.). This instructs __pti_set_user_pgtbl() not to write to the userspace 'shadow' PGD. Strictly, the _PAGE_NOPTISHADOW bit doesn't need to be written out to the actual page tables; since __pti_set_user_pgtbl() returns the value to be written to the kernel page table, it could be filtered out. But there seems to be no benefit to actually doing so. Suggested-by: Dave Hansen <dave.hansen@intel.com> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Signed-off-by: Ingo Molnar <mingo@kernel.org> Link: https://lore.kernel.org/r/412c90a4df7aef077141d9f68d19cbe5602d6c6d.camel@infradead.org Cc: stable@kernel.org Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andy Lutomirski <luto@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Rik van Riel <riel@surriel.com>
234 lines
5.0 KiB
C
234 lines
5.0 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Helper routines for building identity mapping page tables. This is
|
|
* included by both the compressed kernel and the regular kernel.
|
|
*/
|
|
|
|
static void free_pte(struct x86_mapping_info *info, pmd_t *pmd)
|
|
{
|
|
pte_t *pte = pte_offset_kernel(pmd, 0);
|
|
|
|
info->free_pgt_page(pte, info->context);
|
|
}
|
|
|
|
static void free_pmd(struct x86_mapping_info *info, pud_t *pud)
|
|
{
|
|
pmd_t *pmd = pmd_offset(pud, 0);
|
|
int i;
|
|
|
|
for (i = 0; i < PTRS_PER_PMD; i++) {
|
|
if (!pmd_present(pmd[i]))
|
|
continue;
|
|
|
|
if (pmd_leaf(pmd[i]))
|
|
continue;
|
|
|
|
free_pte(info, &pmd[i]);
|
|
}
|
|
|
|
info->free_pgt_page(pmd, info->context);
|
|
}
|
|
|
|
static void free_pud(struct x86_mapping_info *info, p4d_t *p4d)
|
|
{
|
|
pud_t *pud = pud_offset(p4d, 0);
|
|
int i;
|
|
|
|
for (i = 0; i < PTRS_PER_PUD; i++) {
|
|
if (!pud_present(pud[i]))
|
|
continue;
|
|
|
|
if (pud_leaf(pud[i]))
|
|
continue;
|
|
|
|
free_pmd(info, &pud[i]);
|
|
}
|
|
|
|
info->free_pgt_page(pud, info->context);
|
|
}
|
|
|
|
static void free_p4d(struct x86_mapping_info *info, pgd_t *pgd)
|
|
{
|
|
p4d_t *p4d = p4d_offset(pgd, 0);
|
|
int i;
|
|
|
|
for (i = 0; i < PTRS_PER_P4D; i++) {
|
|
if (!p4d_present(p4d[i]))
|
|
continue;
|
|
|
|
free_pud(info, &p4d[i]);
|
|
}
|
|
|
|
if (pgtable_l5_enabled())
|
|
info->free_pgt_page(p4d, info->context);
|
|
}
|
|
|
|
void kernel_ident_mapping_free(struct x86_mapping_info *info, pgd_t *pgd)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < PTRS_PER_PGD; i++) {
|
|
if (!pgd_present(pgd[i]))
|
|
continue;
|
|
|
|
free_p4d(info, &pgd[i]);
|
|
}
|
|
|
|
info->free_pgt_page(pgd, info->context);
|
|
}
|
|
|
|
static void ident_pmd_init(struct x86_mapping_info *info, pmd_t *pmd_page,
|
|
unsigned long addr, unsigned long end)
|
|
{
|
|
addr &= PMD_MASK;
|
|
for (; addr < end; addr += PMD_SIZE) {
|
|
pmd_t *pmd = pmd_page + pmd_index(addr);
|
|
|
|
if (pmd_present(*pmd))
|
|
continue;
|
|
|
|
set_pmd(pmd, __pmd((addr - info->offset) | info->page_flag));
|
|
}
|
|
}
|
|
|
|
static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
|
|
unsigned long addr, unsigned long end)
|
|
{
|
|
unsigned long next;
|
|
|
|
for (; addr < end; addr = next) {
|
|
pud_t *pud = pud_page + pud_index(addr);
|
|
pmd_t *pmd;
|
|
bool use_gbpage;
|
|
|
|
next = (addr & PUD_MASK) + PUD_SIZE;
|
|
if (next > end)
|
|
next = end;
|
|
|
|
/* if this is already a gbpage, this portion is already mapped */
|
|
if (pud_leaf(*pud))
|
|
continue;
|
|
|
|
/* Is using a gbpage allowed? */
|
|
use_gbpage = info->direct_gbpages;
|
|
|
|
/* Don't use gbpage if it maps more than the requested region. */
|
|
/* at the begining: */
|
|
use_gbpage &= ((addr & ~PUD_MASK) == 0);
|
|
/* ... or at the end: */
|
|
use_gbpage &= ((next & ~PUD_MASK) == 0);
|
|
|
|
/* Never overwrite existing mappings */
|
|
use_gbpage &= !pud_present(*pud);
|
|
|
|
if (use_gbpage) {
|
|
pud_t pudval;
|
|
|
|
pudval = __pud((addr - info->offset) | info->page_flag);
|
|
set_pud(pud, pudval);
|
|
continue;
|
|
}
|
|
|
|
if (pud_present(*pud)) {
|
|
pmd = pmd_offset(pud, 0);
|
|
ident_pmd_init(info, pmd, addr, next);
|
|
continue;
|
|
}
|
|
pmd = (pmd_t *)info->alloc_pgt_page(info->context);
|
|
if (!pmd)
|
|
return -ENOMEM;
|
|
ident_pmd_init(info, pmd, addr, next);
|
|
set_pud(pud, __pud(__pa(pmd) | info->kernpg_flag));
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page,
|
|
unsigned long addr, unsigned long end)
|
|
{
|
|
unsigned long next;
|
|
int result;
|
|
|
|
for (; addr < end; addr = next) {
|
|
p4d_t *p4d = p4d_page + p4d_index(addr);
|
|
pud_t *pud;
|
|
|
|
next = (addr & P4D_MASK) + P4D_SIZE;
|
|
if (next > end)
|
|
next = end;
|
|
|
|
if (p4d_present(*p4d)) {
|
|
pud = pud_offset(p4d, 0);
|
|
result = ident_pud_init(info, pud, addr, next);
|
|
if (result)
|
|
return result;
|
|
|
|
continue;
|
|
}
|
|
pud = (pud_t *)info->alloc_pgt_page(info->context);
|
|
if (!pud)
|
|
return -ENOMEM;
|
|
|
|
result = ident_pud_init(info, pud, addr, next);
|
|
if (result)
|
|
return result;
|
|
|
|
set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag | _PAGE_NOPTISHADOW));
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
|
|
unsigned long pstart, unsigned long pend)
|
|
{
|
|
unsigned long addr = pstart + info->offset;
|
|
unsigned long end = pend + info->offset;
|
|
unsigned long next;
|
|
int result;
|
|
|
|
/* Set the default pagetable flags if not supplied */
|
|
if (!info->kernpg_flag)
|
|
info->kernpg_flag = _KERNPG_TABLE;
|
|
|
|
/* Filter out unsupported __PAGE_KERNEL_* bits: */
|
|
info->kernpg_flag &= __default_kernel_pte_mask;
|
|
|
|
for (; addr < end; addr = next) {
|
|
pgd_t *pgd = pgd_page + pgd_index(addr);
|
|
p4d_t *p4d;
|
|
|
|
next = (addr & PGDIR_MASK) + PGDIR_SIZE;
|
|
if (next > end)
|
|
next = end;
|
|
|
|
if (pgd_present(*pgd)) {
|
|
p4d = p4d_offset(pgd, 0);
|
|
result = ident_p4d_init(info, p4d, addr, next);
|
|
if (result)
|
|
return result;
|
|
continue;
|
|
}
|
|
|
|
p4d = (p4d_t *)info->alloc_pgt_page(info->context);
|
|
if (!p4d)
|
|
return -ENOMEM;
|
|
result = ident_p4d_init(info, p4d, addr, next);
|
|
if (result)
|
|
return result;
|
|
if (pgtable_l5_enabled()) {
|
|
set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag | _PAGE_NOPTISHADOW));
|
|
} else {
|
|
/*
|
|
* With p4d folded, pgd is equal to p4d.
|
|
* The pgd entry has to point to the pud page table in this case.
|
|
*/
|
|
pud_t *pud = pud_offset(p4d, 0);
|
|
set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag | _PAGE_NOPTISHADOW));
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|