From 0090c0a247cd3dc37181be4a9af4750ae3fedbd0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 26 Jul 2025 11:38:09 +0100 Subject: [PATCH 01/16] KVM: arm64: Add helper computing the state of 52bit PA support Track whether the guest is using 52bit PAs, either LPA or LPA2. This further simplifies the handling of LVA for 4k and 16k pages, as LPA2 implies LVA in this case. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_nested.h | 1 + arch/arm64/kvm/at.c | 31 ++++++++++++++++++++++++----- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 7fd76f41c296..038e35430bb2 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -299,6 +299,7 @@ struct s1_walk_info { bool pan; bool be; bool s2; + bool pa52bit; }; struct s1_walk_result { diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index d71ca4ddc9d1..8e275ea68cfa 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -31,6 +31,29 @@ static bool check_output_size(u64 ipa, struct s1_walk_info *wi) return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits)); } +static bool has_52bit_pa(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, u64 tcr) +{ + switch (BIT(wi->pgshift)) { + case SZ_64K: + default: /* IMPDEF: treat any other value as 64k */ + if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, PARANGE, 52)) + return false; + return ((wi->regime == TR_EL2 ? + FIELD_GET(TCR_EL2_PS_MASK, tcr) : + FIELD_GET(TCR_IPS_MASK, tcr)) == 0b0110); + case SZ_16K: + if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT)) + return false; + break; + case SZ_4K: + if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT)) + return false; + break; + } + + return (tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS)); +} + /* Return the translation regime that applies to an AT instruction */ static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op) { @@ -232,15 +255,13 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, goto transfault_l0; } + wi->pa52bit = has_52bit_pa(vcpu, wi, tcr); + /* R_GTJBY, R_SXWGM */ switch (BIT(wi->pgshift)) { case SZ_4K: - lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT); - lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS); - break; case SZ_16K: - lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT); - lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS); + lva = wi->pa52bit; break; case SZ_64K: lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52); From 23cf13def0c87d9ce234f12eb6132f6bf9442f29 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 26 Jul 2025 11:52:34 +0100 Subject: [PATCH 02/16] KVM: arm64: Account for 52bit when computing maximum OA Adjust the computation of the max OA to account for 52bit PAs. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_nested.h | 7 +++++-- arch/arm64/kvm/at.c | 2 +- arch/arm64/kvm/nested.c | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 038e35430bb2..1a03095b03c5 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -265,7 +265,7 @@ static inline u64 decode_range_tlbi(u64 val, u64 *range, u16 *asid) return base; } -static inline unsigned int ps_to_output_size(unsigned int ps) +static inline unsigned int ps_to_output_size(unsigned int ps, bool pa52bit) { switch (ps) { case 0: return 32; @@ -273,7 +273,10 @@ static inline unsigned int ps_to_output_size(unsigned int ps) case 2: return 40; case 3: return 42; case 4: return 44; - case 5: + case 5: return 48; + case 6: if (pa52bit) + return 52; + fallthrough; default: return 48; } diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 8e275ea68cfa..96452fdc90e2 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -295,7 +295,7 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, ps = (wi->regime == TR_EL2 ? FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr)); - wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps)); + wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps, wi->pa52bit)); /* Compute minimal alignment */ x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift); diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 77db81bae86f..cb36974e010a 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -349,7 +349,7 @@ static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi) wi->sl = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); /* Global limit for now, should eventually be per-VM */ wi->max_oa_bits = min(get_kvm_ipa_limit(), - ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr))); + ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr), false)); } int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa, From e645226a9c238db919d105d0ee7b4e09d80d13b1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 27 Jul 2025 10:18:56 +0100 Subject: [PATCH 03/16] KVM: arm64: Compute 52bit TTBR address and alignment 52bit addresses from TTBR need extra adjustment and alignment checks. Implement the requirements of the architecture. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/kvm/at.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 96452fdc90e2..e02e467fc2cc 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -28,6 +28,8 @@ static int get_ia_size(struct s1_walk_info *wi) /* Return true if the IPA is out of the OA range */ static bool check_output_size(u64 ipa, struct s1_walk_info *wi) { + if (wi->pa52bit) + return wi->max_oa_bits < 52 && (ipa & GENMASK_ULL(51, wi->max_oa_bits)); return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits)); } @@ -301,6 +303,16 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift); wi->baddr = ttbr & TTBRx_EL1_BADDR; + if (wi->pa52bit) { + /* + * Force the alignment on 64 bytes for top-level tables + * smaller than 8 entries, since TTBR.BADDR[5:2] are used to + * store bits [51:48] of the first level of lookup. + */ + x = max(x, 6); + + wi->baddr |= FIELD_GET(GENMASK_ULL(5, 2), ttbr) << 48; + } /* R_VPBBF */ if (check_output_size(wi->baddr, wi)) From df1d0197a2b939e28321686cafaead4a183980fa Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 27 Jul 2025 18:46:02 +0100 Subject: [PATCH 04/16] KVM: arm64: Decouple output address from the PT descriptor Add a helper converting the descriptor into a nicely formed OA, irrespective of the in-descriptor representation (< 52bit, LPA or LPA2). Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/kvm/at.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index e02e467fc2cc..bdb2c3e22f24 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -56,6 +56,29 @@ static bool has_52bit_pa(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, u64 tcr return (tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS)); } +static u64 desc_to_oa(struct s1_walk_info *wi, u64 desc) +{ + u64 addr; + + if (!wi->pa52bit) + return desc & GENMASK_ULL(47, wi->pgshift); + + switch (BIT(wi->pgshift)) { + case SZ_4K: + case SZ_16K: + addr = desc & GENMASK_ULL(49, wi->pgshift); + addr |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, desc) << 50; + break; + case SZ_64K: + default: /* IMPDEF: treat any other value as 64k */ + addr = desc & GENMASK_ULL(47, wi->pgshift); + addr |= FIELD_GET(KVM_PTE_ADDR_51_48, desc) << 48; + break; + } + + return addr; +} + /* Return the translation regime that applies to an AT instruction */ static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op) { @@ -402,7 +425,7 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc); } - baddr = desc & GENMASK_ULL(47, wi->pgshift); + baddr = desc_to_oa(wi, desc); /* Check for out-of-range OA */ if (check_output_size(baddr, wi)) @@ -431,7 +454,8 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, goto transfault; } - if (check_output_size(desc & GENMASK(47, va_bottom), wi)) + baddr = desc_to_oa(wi, desc); + if (check_output_size(baddr & GENMASK(52, va_bottom), wi)) goto addrsz; if (!(desc & PTE_AF)) { @@ -444,7 +468,7 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, wr->failed = false; wr->level = level; wr->desc = desc; - wr->pa = desc & GENMASK(47, va_bottom); + wr->pa = baddr & GENMASK(52, va_bottom); wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0); wr->nG = (wi->regime != TR_EL2) && (desc & PTE_NG); From e4bd479884a1353efd43aa950c996d333145642d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 25 Aug 2025 14:48:32 +0100 Subject: [PATCH 05/16] KVM: arm64: Pass the walk_info structure to compute_par_s1() Instead of just passing the translation regime, pass the full walk_info structure to compute_par_s1(). This will help further chamges that will require it. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/kvm/at.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index bdb2c3e22f24..acf6a5d49777 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -807,8 +807,8 @@ static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par, return par; } -static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr, - enum trans_regime regime) +static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, + struct s1_walk_result *wr) { u64 par; @@ -823,7 +823,7 @@ static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr, par = SYS_PAR_EL1_NSE; par |= wr->pa & GENMASK_ULL(47, 12); - if (regime == TR_EL10 && + if (wi->regime == TR_EL10 && (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) { par |= FIELD_PREP(SYS_PAR_EL1_ATTR, MEMATTR(WbRaWa, WbRaWa)); @@ -838,14 +838,14 @@ static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr, par = SYS_PAR_EL1_NSE; - mair = (regime == TR_EL10 ? + mair = (wi->regime == TR_EL10 ? vcpu_read_sys_reg(vcpu, MAIR_EL1) : vcpu_read_sys_reg(vcpu, MAIR_EL2)); mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8; mair &= 0xff; - sctlr = (regime == TR_EL10 ? + sctlr = (wi->regime == TR_EL10 ? vcpu_read_sys_reg(vcpu, SCTLR_EL1) : vcpu_read_sys_reg(vcpu, SCTLR_EL2)); @@ -1243,7 +1243,7 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false); compute_par: - return compute_par_s1(vcpu, &wr, wi.regime); + return compute_par_s1(vcpu, &wi, &wr); } /* From c0cc438046eed8d906ac917bc70a7284b6cc3f03 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 27 Jul 2025 19:37:01 +0100 Subject: [PATCH 06/16] KVM: arm64: Compute shareability for LPA2 LPA2 gets the memory access shareability from TCR_ELx instead of getting it form the descriptors. Store it in the walk info struct so that it is passed around and evaluated as required. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_nested.h | 1 + arch/arm64/kvm/at.c | 37 +++++++++++++++++++++++------ 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 1a03095b03c5..f3135ded47b7 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -295,6 +295,7 @@ struct s1_walk_info { unsigned int pgshift; unsigned int txsz; int sl; + u8 sh; bool as_el0; bool hpd; bool e0poe; diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index acf6a5d49777..410d12b5b0ac 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -188,6 +188,12 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, if (!tbi && (u64)sign_extend64(va, 55) != va) goto addrsz; + wi->sh = (wi->regime == TR_EL2 ? + FIELD_GET(TCR_EL2_SH0_MASK, tcr) : + (va55 ? + FIELD_GET(TCR_SH1_MASK, tcr) : + FIELD_GET(TCR_SH0_MASK, tcr))); + va = (u64)sign_extend64(va, 55); /* Let's put the MMU disabled case aside immediately */ @@ -697,21 +703,36 @@ static u8 combine_s1_s2_attr(u8 s1, u8 s2) #define ATTR_OSH 0b10 #define ATTR_ISH 0b11 -static u8 compute_sh(u8 attr, u64 desc) +static u8 compute_final_sh(u8 attr, u8 sh) { - u8 sh; - /* Any form of device, as well as NC has SH[1:0]=0b10 */ if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC)) return ATTR_OSH; - sh = FIELD_GET(PTE_SHARED, desc); if (sh == ATTR_RSV) /* Reserved, mapped to NSH */ sh = ATTR_NSH; return sh; } +static u8 compute_s1_sh(struct s1_walk_info *wi, struct s1_walk_result *wr, + u8 attr) +{ + u8 sh; + + /* + * non-52bit and LPA have their basic shareability described in the + * descriptor. LPA2 gets it from the corresponding field in TCR, + * conveniently recorded in the walk info. + */ + if (!wi->pa52bit || BIT(wi->pgshift) == SZ_64K) + sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_SH, wr->desc); + else + sh = wi->sh; + + return compute_final_sh(attr, sh); +} + static u8 combine_sh(u8 s1_sh, u8 s2_sh) { if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH) @@ -725,7 +746,7 @@ static u8 combine_sh(u8 s1_sh, u8 s2_sh) static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par, struct kvm_s2_trans *tr) { - u8 s1_parattr, s2_memattr, final_attr; + u8 s1_parattr, s2_memattr, final_attr, s2_sh; u64 par; /* If S2 has failed to translate, report the damage */ @@ -798,11 +819,13 @@ static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par, !MEMATTR_IS_DEVICE(final_attr)) final_attr = MEMATTR(NC, NC); + s2_sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_SH, tr->desc); + par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr); par |= tr->output & GENMASK(47, 12); par |= FIELD_PREP(SYS_PAR_EL1_SH, combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par), - compute_sh(final_attr, tr->desc))); + compute_final_sh(final_attr, s2_sh))); return par; } @@ -856,7 +879,7 @@ static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair); par |= wr->pa & GENMASK_ULL(47, 12); - sh = compute_sh(mair, wr->desc); + sh = compute_s1_sh(wi, wr, mair); par |= FIELD_PREP(SYS_PAR_EL1_SH, sh); } From dd82412c2b2b30bf4aa08ef069eb38c7795cd9b8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 27 Jul 2025 19:47:00 +0100 Subject: [PATCH 07/16] KVM: arm64: Populate PAR_EL1 with 52bit addresses Expand the output address populated in PAR_EL1 to 52bit addresses. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/kvm/at.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 410d12b5b0ac..d06cf816f848 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -844,7 +844,7 @@ static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, } else if (wr->level == S1_MMU_DISABLED) { /* MMU off or HCR_EL2.DC == 1 */ par = SYS_PAR_EL1_NSE; - par |= wr->pa & GENMASK_ULL(47, 12); + par |= wr->pa & SYS_PAR_EL1_PA; if (wi->regime == TR_EL10 && (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) { @@ -877,7 +877,7 @@ static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, mair = MEMATTR(NC, NC); par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair); - par |= wr->pa & GENMASK_ULL(47, 12); + par |= wr->pa & SYS_PAR_EL1_PA; sh = compute_s1_sh(wi, wr, mair); par |= FIELD_PREP(SYS_PAR_EL1_SH, sh); From 5da3a3b27a0108562547086e0ba7d9593f147cfe Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 28 Jul 2025 16:29:56 +0100 Subject: [PATCH 08/16] KVM: arm64: Expand valid block mappings to FEAT_LPA/LPA2 support With 52bit PAs, block mappings can exist at different levels (such as level 0 for 4kB pages, or level 1 for 16kB and 64kB pages). Account for this in walk_s1(). Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/kvm/at.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index d06cf816f848..e740df0d82f8 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -448,11 +448,11 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, switch (BIT(wi->pgshift)) { case SZ_4K: - valid_block = level == 1 || level == 2; + valid_block = level == 1 || level == 2 || (wi->pa52bit && level == 0); break; case SZ_16K: case SZ_64K: - valid_block = level == 2; + valid_block = level == 2 || (wi->pa52bit && level == 1); break; } From dabf9f73fed86e096255c5be12c7e1d08a939c67 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 28 Jul 2025 17:20:29 +0100 Subject: [PATCH 09/16] KVM: arm64: Report faults from S1 walk setup at the expected start level Translation faults from TTBR must be reported on the start level, and not level-0. Enforcing this requires moving quite a lot of code around so that the start level can be computed early enough that it is usable. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/kvm/at.c | 103 +++++++++++++++++++++++--------------------- 1 file changed, 54 insertions(+), 49 deletions(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index e740df0d82f8..7b9711021db0 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -154,9 +154,6 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, va55 = va & BIT(55); - if (wi->regime == TR_EL2 && va55) - goto addrsz; - wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC)); switch (wi->regime) { @@ -179,6 +176,46 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, BUG(); } + /* Someone was silly enough to encode TG0/TG1 differently */ + if (va55 && wi->regime != TR_EL2) { + wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr); + tg = FIELD_GET(TCR_TG1_MASK, tcr); + + switch (tg << TCR_TG1_SHIFT) { + case TCR_TG1_4K: + wi->pgshift = 12; break; + case TCR_TG1_16K: + wi->pgshift = 14; break; + case TCR_TG1_64K: + default: /* IMPDEF: treat any other value as 64k */ + wi->pgshift = 16; break; + } + } else { + wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr); + tg = FIELD_GET(TCR_TG0_MASK, tcr); + + switch (tg << TCR_TG0_SHIFT) { + case TCR_TG0_4K: + wi->pgshift = 12; break; + case TCR_TG0_16K: + wi->pgshift = 14; break; + case TCR_TG0_64K: + default: /* IMPDEF: treat any other value as 64k */ + wi->pgshift = 16; break; + } + } + + wi->pa52bit = has_52bit_pa(vcpu, wi, tcr); + + ia_bits = get_ia_size(wi); + + /* AArch64.S1StartLevel() */ + stride = wi->pgshift - 3; + wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride); + + if (wi->regime == TR_EL2 && va55) + goto addrsz; + tbi = (wi->regime == TR_EL2 ? FIELD_GET(TCR_EL2_TBI, tcr) : (va55 ? @@ -248,46 +285,15 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, /* R_BVXDG */ wi->hpd |= (wi->poe || wi->e0poe); - /* Someone was silly enough to encode TG0/TG1 differently */ - if (va55) { - wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr); - tg = FIELD_GET(TCR_TG1_MASK, tcr); - - switch (tg << TCR_TG1_SHIFT) { - case TCR_TG1_4K: - wi->pgshift = 12; break; - case TCR_TG1_16K: - wi->pgshift = 14; break; - case TCR_TG1_64K: - default: /* IMPDEF: treat any other value as 64k */ - wi->pgshift = 16; break; - } - } else { - wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr); - tg = FIELD_GET(TCR_TG0_MASK, tcr); - - switch (tg << TCR_TG0_SHIFT) { - case TCR_TG0_4K: - wi->pgshift = 12; break; - case TCR_TG0_16K: - wi->pgshift = 14; break; - case TCR_TG0_64K: - default: /* IMPDEF: treat any other value as 64k */ - wi->pgshift = 16; break; - } - } - /* R_PLCGL, R_YXNYW */ if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) { if (wi->txsz > 39) - goto transfault_l0; + goto transfault; } else { if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47)) - goto transfault_l0; + goto transfault; } - wi->pa52bit = has_52bit_pa(vcpu, wi, tcr); - /* R_GTJBY, R_SXWGM */ switch (BIT(wi->pgshift)) { case SZ_4K: @@ -300,28 +306,22 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, } if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16)) - goto transfault_l0; - - ia_bits = get_ia_size(wi); + goto transfault; /* R_YYVYV, I_THCZK */ if ((!va55 && va > GENMASK(ia_bits - 1, 0)) || (va55 && va < GENMASK(63, ia_bits))) - goto transfault_l0; + goto transfault; /* I_ZFSYQ */ if (wi->regime != TR_EL2 && (tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK))) - goto transfault_l0; + goto transfault; /* R_BNDVG and following statements */ if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) && wi->as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0))) - goto transfault_l0; - - /* AArch64.S1StartLevel() */ - stride = wi->pgshift - 3; - wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride); + goto transfault; ps = (wi->regime == TR_EL2 ? FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr)); @@ -351,12 +351,17 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, return 0; -addrsz: /* Address Size Fault level 0 */ +addrsz: + /* + * Address Size Fault level 0 to indicate it comes from TTBR. + * yes, this is an oddity. + */ fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false); return -EFAULT; -transfault_l0: /* Translation Fault level 0 */ - fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false); +transfault: + /* Translation Fault on start level */ + fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(wi->sl), false); return -EFAULT; } From 14d4802dc22acf670333c8aad4e1931e7d6ee412 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 29 Jul 2025 12:06:14 +0100 Subject: [PATCH 10/16] KVM: arm64: Allow use of S1 PTW for non-NV vcpus As we are about to use the S1 PTW in non-NV contexts, we must make sure that we don't evaluate the EL2 state when dealing with the EL1&0 translation regime. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/kvm/at.c | 58 ++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 7b9711021db0..9baa929c102f 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -98,21 +98,26 @@ static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 o } } +static u64 effective_tcr2(struct kvm_vcpu *vcpu, enum trans_regime regime) +{ + if (regime == TR_EL10) { + if (vcpu_has_nv(vcpu) && + !(__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En)) + return 0; + + return __vcpu_sys_reg(vcpu, TCR2_EL1); + } + + return vcpu_read_sys_reg(vcpu, TCR2_EL2); +} + static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime) { if (!kvm_has_s1pie(vcpu->kvm)) return false; - switch (regime) { - case TR_EL2: - case TR_EL20: - return vcpu_read_sys_reg(vcpu, TCR2_EL2) & TCR2_EL2_PIE; - case TR_EL10: - return (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) && - (__vcpu_sys_reg(vcpu, TCR2_EL1) & TCR2_EL1_PIE); - default: - BUG(); - } + /* Abuse TCR2_EL1_PIE and use it for EL2 as well */ + return effective_tcr2(vcpu, regime) & TCR2_EL1_PIE; } static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi) @@ -124,23 +129,11 @@ static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi) return; } - switch (wi->regime) { - case TR_EL2: - case TR_EL20: - val = vcpu_read_sys_reg(vcpu, TCR2_EL2); - wi->poe = val & TCR2_EL2_POE; - wi->e0poe = (wi->regime == TR_EL20) && (val & TCR2_EL2_E0POE); - break; - case TR_EL10: - if (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) { - wi->poe = wi->e0poe = false; - return; - } + val = effective_tcr2(vcpu, wi->regime); - val = __vcpu_sys_reg(vcpu, TCR2_EL1); - wi->poe = val & TCR2_EL1_POE; - wi->e0poe = val & TCR2_EL1_E0POE; - } + /* Abuse TCR2_EL1_* for EL2 */ + wi->poe = val & TCR2_EL1_POE; + wi->e0poe = (wi->regime != TR_EL2) && (val & TCR2_EL1_E0POE); } static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, @@ -150,11 +143,16 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, unsigned int stride, x; bool va55, tbi, lva; - hcr = __vcpu_sys_reg(vcpu, HCR_EL2); - va55 = va & BIT(55); - wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC)); + if (vcpu_has_nv(vcpu)) { + hcr = __vcpu_sys_reg(vcpu, HCR_EL2); + wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC)); + } else { + WARN_ON_ONCE(wi->regime != TR_EL10); + wi->s2 = false; + hcr = 0; + } switch (wi->regime) { case TR_EL10: @@ -851,7 +849,7 @@ static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, par = SYS_PAR_EL1_NSE; par |= wr->pa & SYS_PAR_EL1_PA; - if (wi->regime == TR_EL10 && + if (wi->regime == TR_EL10 && vcpu_has_nv(vcpu) && (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) { par |= FIELD_PREP(SYS_PAR_EL1_ATTR, MEMATTR(WbRaWa, WbRaWa)); From cb1762904c5000220a0facf9bcab68ba687ec417 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 25 Aug 2025 15:20:06 +0100 Subject: [PATCH 11/16] KVM: arm64: Allow EL1 control registers to be accessed from the CPU state As we are about to plug the SW PTW into the EL1-only code, we can no longer assume that the EL1 state is not resident on the CPU, as we don't necessarily get there from EL2 traps. Turn the __vcpu_sys_reg() access on the EL1 state into calls to the vcpu_read_sys_reg() helper, which is guaranteed to do the right thing. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/kvm/at.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 9baa929c102f..4efa2167116f 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -105,7 +105,7 @@ static u64 effective_tcr2(struct kvm_vcpu *vcpu, enum trans_regime regime) !(__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En)) return 0; - return __vcpu_sys_reg(vcpu, TCR2_EL1); + return vcpu_read_sys_reg(vcpu, TCR2_EL1); } return vcpu_read_sys_reg(vcpu, TCR2_EL2); @@ -956,7 +956,7 @@ static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu, wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_WXN); break; case TR_EL10: - wxn = (__vcpu_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN); + wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN); break; } From 61b0280a670bdbb3a209ae474625f387788af0a8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 25 Aug 2025 11:24:11 +0100 Subject: [PATCH 12/16] KVM: arm64: Don't switch MMU on translation from non-NV context If calling into the AT code from guest EL1, there is no need to consider any context switch, as we are guaranteed to be in the correct context. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/kvm/at.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 4efa2167116f..c06a8e831f33 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -1285,7 +1285,7 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) { struct mmu_config config; struct kvm_s2_mmu *mmu; - bool fail; + bool fail, mmu_cs; u64 par; par = SYS_PAR_EL1_F; @@ -1301,8 +1301,13 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) * If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already * the right one (as we trapped from vEL2). If not, save the * full MMU context. + * + * We are also guaranteed to be in the correct context if + * we're not in a nested VM. */ - if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)) + mmu_cs = (vcpu_has_nv(vcpu) && + !(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))); + if (!mmu_cs) goto skip_mmu_switch; /* @@ -1370,7 +1375,7 @@ skip_mmu_switch: write_sysreg_hcr(HCR_HOST_VHE_FLAGS); - if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))) + if (mmu_cs) __mmu_config_restore(&config); return par; From 0c5471408cb5c518bce76b851aff89719283a428 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 25 Aug 2025 11:28:19 +0100 Subject: [PATCH 13/16] KVM: arm64: Add filtering hook to S1 page table walk Add a filtering hook that can get called on each level of the walk, and providing access to the full state. Crucially, this is called *before* the access is made, so that it is possible to track down the level of a faulting access. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_nested.h | 14 ++++++++++++++ arch/arm64/kvm/at.c | 11 +++++++++++ 2 files changed, 25 insertions(+) diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index f3135ded47b7..cce0e4cb5448 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -288,7 +288,21 @@ enum trans_regime { TR_EL2, }; +struct s1_walk_info; + +struct s1_walk_context { + struct s1_walk_info *wi; + u64 table_ipa; + int level; +}; + +struct s1_walk_filter { + int (*fn)(struct s1_walk_context *, void *); + void *priv; +}; + struct s1_walk_info { + struct s1_walk_filter *filter; u64 baddr; enum trans_regime regime; unsigned int max_oa_bits; diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index c06a8e831f33..b70b777a3c20 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -404,6 +404,17 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, ipa = kvm_s2_trans_output(&s2_trans); } + if (wi->filter) { + ret = wi->filter->fn(&(struct s1_walk_context) + { + .wi = wi, + .table_ipa = baddr, + .level = level, + }, wi->filter->priv); + if (ret) + return ret; + } + ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc)); if (ret) { fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false); From b8e625167a321138f83b1f6c99cf25d1290cb04e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 25 Aug 2025 11:31:33 +0100 Subject: [PATCH 14/16] KVM: arm64: Add S1 IPA to page table level walker Use the filtering hook infrastructure to implement a new walker that, for a given VA and an IPA, returns the level of the first occurence of this IPA in the walk from that VA. This will be used to improve our SEA syndrome reporting. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_nested.h | 2 + arch/arm64/kvm/at.c | 65 +++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index cce0e4cb5448..2be6c3de74e3 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -353,6 +353,8 @@ struct s1_walk_result { int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, struct s1_walk_result *wr, u64 va); +int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, + int *level); /* VNCR management */ int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index b70b777a3c20..20bb9af125b1 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -1569,3 +1569,68 @@ int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, return 0; } + +struct desc_match { + u64 ipa; + int level; +}; + +static int match_s1_desc(struct s1_walk_context *ctxt, void *priv) +{ + struct desc_match *dm = priv; + u64 ipa = dm->ipa; + + /* Use S1 granule alignment */ + ipa &= GENMASK(51, ctxt->wi->pgshift); + + /* Not the IPA we're looking for? Continue. */ + if (ipa != ctxt->table_ipa) + return 0; + + /* Note the level and interrupt the walk */ + dm->level = ctxt->level; + return -EINTR; +} + +int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level) +{ + struct desc_match dm = { + .ipa = ipa, + }; + struct s1_walk_info wi = { + .filter = &(struct s1_walk_filter){ + .fn = match_s1_desc, + .priv = &dm, + }, + .regime = TR_EL10, + .as_el0 = false, + .pan = false, + }; + struct s1_walk_result wr = {}; + int ret; + + ret = setup_s1_walk(vcpu, &wi, &wr, va); + if (ret) + return ret; + + /* We really expect the S1 MMU to be on here... */ + if (WARN_ON_ONCE(wr.level == S1_MMU_DISABLED)) { + *level = 0; + return 0; + } + + /* Walk the guest's PT, looking for a match along the way */ + ret = walk_s1(vcpu, &wi, &wr, va); + switch (ret) { + case -EINTR: + /* We interrupted the walk on a match, return the level */ + *level = dm.level; + return 0; + case 0: + /* The walk completed, we failed to find the entry */ + return -ENOENT; + default: + /* Any other error... */ + return ret; + } +} From 50f77dc87f133b09db44a5bbfdd64b1ca83a8d8e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 25 Aug 2025 13:13:56 +0100 Subject: [PATCH 15/16] KVM: arm64: Populate level on S1PTW SEA injection Our fault injection mechanism is mildly primitive, and doesn't really implement the architecture when it comes to reporting the level of a failing S1 PTW (we blindly report a SEA outside of a PTW). Now that we can walk the S1 page tables and look for a particular IPA in the descriptors, it is pretty easy to improve the SEA injection code. Note that we only do it for AArch64 guests, and that 32bit guests are left to their own device (oddly enough, I don't fancy writing a 32bit PTW...). Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/kvm/inject_fault.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 6745f38b64f9..dfcd66c65517 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -106,7 +106,30 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr { unsigned long cpsr = *vcpu_cpsr(vcpu); bool is_aarch32 = vcpu_mode_is_32bit(vcpu); - u64 esr = 0; + u64 esr = 0, fsc; + int level; + + /* + * If injecting an abort from a failed S1PTW, rewalk the S1 PTs to + * find the failing level. If we can't find it, assume the error was + * transient and restart without changing the state. + */ + if (kvm_vcpu_abt_iss1tw(vcpu)) { + u64 hpfar = kvm_vcpu_get_fault_ipa(vcpu); + int ret; + + if (hpfar == INVALID_GPA) + return; + + ret = __kvm_find_s1_desc_level(vcpu, addr, hpfar, &level); + if (ret) + return; + + WARN_ON_ONCE(level < -1 || level > 3); + fsc = ESR_ELx_FSC_SEA_TTW(level); + } else { + fsc = ESR_ELx_FSC_EXTABT; + } /* This delight is brought to you by FEAT_DoubleFault2. */ if (effective_sctlr2_ease(vcpu)) @@ -133,7 +156,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr if (!is_iabt) esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT; - esr |= ESR_ELx_FSC_EXTABT; + esr |= fsc; vcpu_write_sys_reg(vcpu, addr, exception_far_elx(vcpu)); vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu)); From 00a37271c8a68070dc64f81a5d64644beb4cef2f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 26 Aug 2025 17:33:24 +0100 Subject: [PATCH 16/16] KVM: arm64: selftest: Expand external_aborts test to look for TTW levels Add a basic test corrupting a level-2 table entry to check that the resulting abort is a SEA on a PTW at level-3. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- .../selftests/kvm/arm64/external_aborts.c | 42 +++++++++++++++++++ .../selftests/kvm/include/arm64/processor.h | 1 + .../selftests/kvm/lib/arm64/processor.c | 13 +++++- 3 files changed, 55 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/arm64/external_aborts.c b/tools/testing/selftests/kvm/arm64/external_aborts.c index 062bf84cced1..592b26ded779 100644 --- a/tools/testing/selftests/kvm/arm64/external_aborts.c +++ b/tools/testing/selftests/kvm/arm64/external_aborts.c @@ -250,6 +250,47 @@ static void test_serror(void) kvm_vm_free(vm); } +static void expect_sea_s1ptw_handler(struct ex_regs *regs) +{ + u64 esr = read_sysreg(esr_el1); + + GUEST_ASSERT_EQ(regs->pc, expected_abort_pc); + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); + GUEST_ASSERT_EQ((esr & ESR_ELx_FSC), ESR_ELx_FSC_SEA_TTW(3)); + + GUEST_DONE(); +} + +static noinline void test_s1ptw_abort_guest(void) +{ + extern char test_s1ptw_abort_insn; + + WRITE_ONCE(expected_abort_pc, (u64)&test_s1ptw_abort_insn); + + asm volatile("test_s1ptw_abort_insn:\n\t" + "ldr x0, [%0]\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("Load on S1PTW abort should not retire"); +} + +static void test_s1ptw_abort(void) +{ + struct kvm_vcpu *vcpu; + u64 *ptep, bad_pa; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_s1ptw_abort_guest, + expect_sea_s1ptw_handler); + + ptep = virt_get_pte_hva_at_level(vm, MMIO_ADDR, 2); + bad_pa = BIT(vm->pa_bits) - vm->page_size; + + *ptep &= ~GENMASK(47, 12); + *ptep |= bad_pa; + + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + static void test_serror_emulated_guest(void) { GUEST_ASSERT(!(read_sysreg(isr_el1) & ISR_EL1_A)); @@ -327,4 +368,5 @@ int main(void) test_serror_masked(); test_serror_emulated(); test_mmio_ease(); + test_s1ptw_abort(); } diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index 255fed769a8a..e3e916b1d9c4 100644 --- a/tools/testing/selftests/kvm/include/arm64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h @@ -175,6 +175,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec, handler_fn handler); +uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level); uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva); static inline void cpu_relax(void) diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index eb115123d741..bd7480a93f96 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -185,7 +185,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) _virt_pg_map(vm, vaddr, paddr, attr_idx); } -uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva) +uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level) { uint64_t *ptep; @@ -195,17 +195,23 @@ uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva) ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8; if (!ptep) goto unmapped_gva; + if (level == 0) + return ptep; switch (vm->pgtable_levels) { case 4: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8; if (!ptep) goto unmapped_gva; + if (level == 1) + break; /* fall through */ case 3: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8; if (!ptep) goto unmapped_gva; + if (level == 2) + break; /* fall through */ case 2: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8; @@ -223,6 +229,11 @@ unmapped_gva: exit(EXIT_FAILURE); } +uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva) +{ + return virt_get_pte_hva_at_level(vm, gva, 3); +} + vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) { uint64_t *ptep = virt_get_pte_hva(vm, gva);