Merge branch kvm-arm64/52bit-at into kvmarm-master/next

* kvm-arm64/52bit-at:
  : .
  : Upgrade the S1 page table walker to support 52bit PA, and use it to
  : report the fault level when taking a S2 fault on S1PTW, which is required
  : by the architecture (20250915114451.660351-1-maz@kernel.org).
  : .
  KVM: arm64: selftest: Expand external_aborts test to look for TTW levels
  KVM: arm64: Populate level on S1PTW SEA injection
  KVM: arm64: Add S1 IPA to page table level walker
  KVM: arm64: Add filtering hook to S1 page table walk
  KVM: arm64: Don't switch MMU on translation from non-NV context
  KVM: arm64: Allow EL1 control registers to be accessed from the CPU state
  KVM: arm64: Allow use of S1 PTW for non-NV vcpus
  KVM: arm64: Report faults from S1 walk setup at the expected start level
  KVM: arm64: Expand valid block mappings to FEAT_LPA/LPA2 support
  KVM: arm64: Populate PAR_EL1 with 52bit addresses
  KVM: arm64: Compute shareability for LPA2
  KVM: arm64: Pass the walk_info structure to compute_par_s1()
  KVM: arm64: Decouple output address from the PT descriptor
  KVM: arm64: Compute 52bit TTBR address and alignment
  KVM: arm64: Account for 52bit when computing maximum OA
  KVM: arm64: Add helper computing the state of 52bit PA support

Signed-off-by: Marc Zyngier <maz@kernel.org>
This commit is contained in:
Marc Zyngier
2025-09-20 12:25:57 +01:00
7 changed files with 374 additions and 112 deletions

View File

@@ -265,7 +265,7 @@ static inline u64 decode_range_tlbi(u64 val, u64 *range, u16 *asid)
return base;
}
static inline unsigned int ps_to_output_size(unsigned int ps)
static inline unsigned int ps_to_output_size(unsigned int ps, bool pa52bit)
{
switch (ps) {
case 0: return 32;
@@ -273,7 +273,10 @@ static inline unsigned int ps_to_output_size(unsigned int ps)
case 2: return 40;
case 3: return 42;
case 4: return 44;
case 5:
case 5: return 48;
case 6: if (pa52bit)
return 52;
fallthrough;
default:
return 48;
}
@@ -285,13 +288,28 @@ enum trans_regime {
TR_EL2,
};
struct s1_walk_info;
struct s1_walk_context {
struct s1_walk_info *wi;
u64 table_ipa;
int level;
};
struct s1_walk_filter {
int (*fn)(struct s1_walk_context *, void *);
void *priv;
};
struct s1_walk_info {
struct s1_walk_filter *filter;
u64 baddr;
enum trans_regime regime;
unsigned int max_oa_bits;
unsigned int pgshift;
unsigned int txsz;
int sl;
u8 sh;
bool as_el0;
bool hpd;
bool e0poe;
@@ -299,6 +317,7 @@ struct s1_walk_info {
bool pan;
bool be;
bool s2;
bool pa52bit;
};
struct s1_walk_result {
@@ -334,6 +353,8 @@ struct s1_walk_result {
int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
struct s1_walk_result *wr, u64 va);
int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa,
int *level);
/* VNCR management */
int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu);

View File

@@ -28,9 +28,57 @@ static int get_ia_size(struct s1_walk_info *wi)
/* Return true if the IPA is out of the OA range */
static bool check_output_size(u64 ipa, struct s1_walk_info *wi)
{
if (wi->pa52bit)
return wi->max_oa_bits < 52 && (ipa & GENMASK_ULL(51, wi->max_oa_bits));
return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits));
}
static bool has_52bit_pa(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, u64 tcr)
{
switch (BIT(wi->pgshift)) {
case SZ_64K:
default: /* IMPDEF: treat any other value as 64k */
if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, PARANGE, 52))
return false;
return ((wi->regime == TR_EL2 ?
FIELD_GET(TCR_EL2_PS_MASK, tcr) :
FIELD_GET(TCR_IPS_MASK, tcr)) == 0b0110);
case SZ_16K:
if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT))
return false;
break;
case SZ_4K:
if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT))
return false;
break;
}
return (tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS));
}
static u64 desc_to_oa(struct s1_walk_info *wi, u64 desc)
{
u64 addr;
if (!wi->pa52bit)
return desc & GENMASK_ULL(47, wi->pgshift);
switch (BIT(wi->pgshift)) {
case SZ_4K:
case SZ_16K:
addr = desc & GENMASK_ULL(49, wi->pgshift);
addr |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, desc) << 50;
break;
case SZ_64K:
default: /* IMPDEF: treat any other value as 64k */
addr = desc & GENMASK_ULL(47, wi->pgshift);
addr |= FIELD_GET(KVM_PTE_ADDR_51_48, desc) << 48;
break;
}
return addr;
}
/* Return the translation regime that applies to an AT instruction */
static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op)
{
@@ -50,21 +98,26 @@ static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 o
}
}
static u64 effective_tcr2(struct kvm_vcpu *vcpu, enum trans_regime regime)
{
if (regime == TR_EL10) {
if (vcpu_has_nv(vcpu) &&
!(__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En))
return 0;
return vcpu_read_sys_reg(vcpu, TCR2_EL1);
}
return vcpu_read_sys_reg(vcpu, TCR2_EL2);
}
static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
{
if (!kvm_has_s1pie(vcpu->kvm))
return false;
switch (regime) {
case TR_EL2:
case TR_EL20:
return vcpu_read_sys_reg(vcpu, TCR2_EL2) & TCR2_EL2_PIE;
case TR_EL10:
return (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) &&
(__vcpu_sys_reg(vcpu, TCR2_EL1) & TCR2_EL1_PIE);
default:
BUG();
}
/* Abuse TCR2_EL1_PIE and use it for EL2 as well */
return effective_tcr2(vcpu, regime) & TCR2_EL1_PIE;
}
static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi)
@@ -76,23 +129,11 @@ static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi)
return;
}
switch (wi->regime) {
case TR_EL2:
case TR_EL20:
val = vcpu_read_sys_reg(vcpu, TCR2_EL2);
wi->poe = val & TCR2_EL2_POE;
wi->e0poe = (wi->regime == TR_EL20) && (val & TCR2_EL2_E0POE);
break;
case TR_EL10:
if (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) {
wi->poe = wi->e0poe = false;
return;
}
val = effective_tcr2(vcpu, wi->regime);
val = __vcpu_sys_reg(vcpu, TCR2_EL1);
wi->poe = val & TCR2_EL1_POE;
wi->e0poe = val & TCR2_EL1_E0POE;
}
/* Abuse TCR2_EL1_* for EL2 */
wi->poe = val & TCR2_EL1_POE;
wi->e0poe = (wi->regime != TR_EL2) && (val & TCR2_EL1_E0POE);
}
static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
@@ -102,14 +143,16 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
unsigned int stride, x;
bool va55, tbi, lva;
hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
va55 = va & BIT(55);
if (wi->regime == TR_EL2 && va55)
goto addrsz;
wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC));
if (vcpu_has_nv(vcpu)) {
hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC));
} else {
WARN_ON_ONCE(wi->regime != TR_EL10);
wi->s2 = false;
hcr = 0;
}
switch (wi->regime) {
case TR_EL10:
@@ -131,6 +174,46 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
BUG();
}
/* Someone was silly enough to encode TG0/TG1 differently */
if (va55 && wi->regime != TR_EL2) {
wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
tg = FIELD_GET(TCR_TG1_MASK, tcr);
switch (tg << TCR_TG1_SHIFT) {
case TCR_TG1_4K:
wi->pgshift = 12; break;
case TCR_TG1_16K:
wi->pgshift = 14; break;
case TCR_TG1_64K:
default: /* IMPDEF: treat any other value as 64k */
wi->pgshift = 16; break;
}
} else {
wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
tg = FIELD_GET(TCR_TG0_MASK, tcr);
switch (tg << TCR_TG0_SHIFT) {
case TCR_TG0_4K:
wi->pgshift = 12; break;
case TCR_TG0_16K:
wi->pgshift = 14; break;
case TCR_TG0_64K:
default: /* IMPDEF: treat any other value as 64k */
wi->pgshift = 16; break;
}
}
wi->pa52bit = has_52bit_pa(vcpu, wi, tcr);
ia_bits = get_ia_size(wi);
/* AArch64.S1StartLevel() */
stride = wi->pgshift - 3;
wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride);
if (wi->regime == TR_EL2 && va55)
goto addrsz;
tbi = (wi->regime == TR_EL2 ?
FIELD_GET(TCR_EL2_TBI, tcr) :
(va55 ?
@@ -140,6 +223,12 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
if (!tbi && (u64)sign_extend64(va, 55) != va)
goto addrsz;
wi->sh = (wi->regime == TR_EL2 ?
FIELD_GET(TCR_EL2_SH0_MASK, tcr) :
(va55 ?
FIELD_GET(TCR_SH1_MASK, tcr) :
FIELD_GET(TCR_SH0_MASK, tcr)));
va = (u64)sign_extend64(va, 55);
/* Let's put the MMU disabled case aside immediately */
@@ -194,53 +283,20 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
/* R_BVXDG */
wi->hpd |= (wi->poe || wi->e0poe);
/* Someone was silly enough to encode TG0/TG1 differently */
if (va55) {
wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
tg = FIELD_GET(TCR_TG1_MASK, tcr);
switch (tg << TCR_TG1_SHIFT) {
case TCR_TG1_4K:
wi->pgshift = 12; break;
case TCR_TG1_16K:
wi->pgshift = 14; break;
case TCR_TG1_64K:
default: /* IMPDEF: treat any other value as 64k */
wi->pgshift = 16; break;
}
} else {
wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
tg = FIELD_GET(TCR_TG0_MASK, tcr);
switch (tg << TCR_TG0_SHIFT) {
case TCR_TG0_4K:
wi->pgshift = 12; break;
case TCR_TG0_16K:
wi->pgshift = 14; break;
case TCR_TG0_64K:
default: /* IMPDEF: treat any other value as 64k */
wi->pgshift = 16; break;
}
}
/* R_PLCGL, R_YXNYW */
if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) {
if (wi->txsz > 39)
goto transfault_l0;
goto transfault;
} else {
if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47))
goto transfault_l0;
goto transfault;
}
/* R_GTJBY, R_SXWGM */
switch (BIT(wi->pgshift)) {
case SZ_4K:
lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT);
lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
break;
case SZ_16K:
lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT);
lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
lva = wi->pa52bit;
break;
case SZ_64K:
lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52);
@@ -248,38 +304,42 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
}
if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16))
goto transfault_l0;
ia_bits = get_ia_size(wi);
goto transfault;
/* R_YYVYV, I_THCZK */
if ((!va55 && va > GENMASK(ia_bits - 1, 0)) ||
(va55 && va < GENMASK(63, ia_bits)))
goto transfault_l0;
goto transfault;
/* I_ZFSYQ */
if (wi->regime != TR_EL2 &&
(tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK)))
goto transfault_l0;
goto transfault;
/* R_BNDVG and following statements */
if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) &&
wi->as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0)))
goto transfault_l0;
/* AArch64.S1StartLevel() */
stride = wi->pgshift - 3;
wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride);
goto transfault;
ps = (wi->regime == TR_EL2 ?
FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr));
wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps));
wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps, wi->pa52bit));
/* Compute minimal alignment */
x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift);
wi->baddr = ttbr & TTBRx_EL1_BADDR;
if (wi->pa52bit) {
/*
* Force the alignment on 64 bytes for top-level tables
* smaller than 8 entries, since TTBR.BADDR[5:2] are used to
* store bits [51:48] of the first level of lookup.
*/
x = max(x, 6);
wi->baddr |= FIELD_GET(GENMASK_ULL(5, 2), ttbr) << 48;
}
/* R_VPBBF */
if (check_output_size(wi->baddr, wi))
@@ -289,12 +349,17 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
return 0;
addrsz: /* Address Size Fault level 0 */
addrsz:
/*
* Address Size Fault level 0 to indicate it comes from TTBR.
* yes, this is an oddity.
*/
fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false);
return -EFAULT;
transfault_l0: /* Translation Fault level 0 */
fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false);
transfault:
/* Translation Fault on start level */
fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(wi->sl), false);
return -EFAULT;
}
@@ -339,6 +404,17 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
ipa = kvm_s2_trans_output(&s2_trans);
}
if (wi->filter) {
ret = wi->filter->fn(&(struct s1_walk_context)
{
.wi = wi,
.table_ipa = baddr,
.level = level,
}, wi->filter->priv);
if (ret)
return ret;
}
ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc));
if (ret) {
fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false);
@@ -369,7 +445,7 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc);
}
baddr = desc & GENMASK_ULL(47, wi->pgshift);
baddr = desc_to_oa(wi, desc);
/* Check for out-of-range OA */
if (check_output_size(baddr, wi))
@@ -386,11 +462,11 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
switch (BIT(wi->pgshift)) {
case SZ_4K:
valid_block = level == 1 || level == 2;
valid_block = level == 1 || level == 2 || (wi->pa52bit && level == 0);
break;
case SZ_16K:
case SZ_64K:
valid_block = level == 2;
valid_block = level == 2 || (wi->pa52bit && level == 1);
break;
}
@@ -398,7 +474,8 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
goto transfault;
}
if (check_output_size(desc & GENMASK(47, va_bottom), wi))
baddr = desc_to_oa(wi, desc);
if (check_output_size(baddr & GENMASK(52, va_bottom), wi))
goto addrsz;
if (!(desc & PTE_AF)) {
@@ -411,7 +488,7 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
wr->failed = false;
wr->level = level;
wr->desc = desc;
wr->pa = desc & GENMASK(47, va_bottom);
wr->pa = baddr & GENMASK(52, va_bottom);
wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0);
wr->nG = (wi->regime != TR_EL2) && (desc & PTE_NG);
@@ -640,21 +717,36 @@ static u8 combine_s1_s2_attr(u8 s1, u8 s2)
#define ATTR_OSH 0b10
#define ATTR_ISH 0b11
static u8 compute_sh(u8 attr, u64 desc)
static u8 compute_final_sh(u8 attr, u8 sh)
{
u8 sh;
/* Any form of device, as well as NC has SH[1:0]=0b10 */
if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC))
return ATTR_OSH;
sh = FIELD_GET(PTE_SHARED, desc);
if (sh == ATTR_RSV) /* Reserved, mapped to NSH */
sh = ATTR_NSH;
return sh;
}
static u8 compute_s1_sh(struct s1_walk_info *wi, struct s1_walk_result *wr,
u8 attr)
{
u8 sh;
/*
* non-52bit and LPA have their basic shareability described in the
* descriptor. LPA2 gets it from the corresponding field in TCR,
* conveniently recorded in the walk info.
*/
if (!wi->pa52bit || BIT(wi->pgshift) == SZ_64K)
sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_SH, wr->desc);
else
sh = wi->sh;
return compute_final_sh(attr, sh);
}
static u8 combine_sh(u8 s1_sh, u8 s2_sh)
{
if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH)
@@ -668,7 +760,7 @@ static u8 combine_sh(u8 s1_sh, u8 s2_sh)
static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
struct kvm_s2_trans *tr)
{
u8 s1_parattr, s2_memattr, final_attr;
u8 s1_parattr, s2_memattr, final_attr, s2_sh;
u64 par;
/* If S2 has failed to translate, report the damage */
@@ -741,17 +833,19 @@ static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
!MEMATTR_IS_DEVICE(final_attr))
final_attr = MEMATTR(NC, NC);
s2_sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_SH, tr->desc);
par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr);
par |= tr->output & GENMASK(47, 12);
par |= FIELD_PREP(SYS_PAR_EL1_SH,
combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par),
compute_sh(final_attr, tr->desc)));
compute_final_sh(final_attr, s2_sh)));
return par;
}
static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr,
enum trans_regime regime)
static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
struct s1_walk_result *wr)
{
u64 par;
@@ -764,9 +858,9 @@ static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr,
} else if (wr->level == S1_MMU_DISABLED) {
/* MMU off or HCR_EL2.DC == 1 */
par = SYS_PAR_EL1_NSE;
par |= wr->pa & GENMASK_ULL(47, 12);
par |= wr->pa & SYS_PAR_EL1_PA;
if (regime == TR_EL10 &&
if (wi->regime == TR_EL10 && vcpu_has_nv(vcpu) &&
(__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) {
par |= FIELD_PREP(SYS_PAR_EL1_ATTR,
MEMATTR(WbRaWa, WbRaWa));
@@ -781,14 +875,14 @@ static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr,
par = SYS_PAR_EL1_NSE;
mair = (regime == TR_EL10 ?
mair = (wi->regime == TR_EL10 ?
vcpu_read_sys_reg(vcpu, MAIR_EL1) :
vcpu_read_sys_reg(vcpu, MAIR_EL2));
mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8;
mair &= 0xff;
sctlr = (regime == TR_EL10 ?
sctlr = (wi->regime == TR_EL10 ?
vcpu_read_sys_reg(vcpu, SCTLR_EL1) :
vcpu_read_sys_reg(vcpu, SCTLR_EL2));
@@ -797,9 +891,9 @@ static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr,
mair = MEMATTR(NC, NC);
par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair);
par |= wr->pa & GENMASK_ULL(47, 12);
par |= wr->pa & SYS_PAR_EL1_PA;
sh = compute_sh(mair, wr->desc);
sh = compute_s1_sh(wi, wr, mair);
par |= FIELD_PREP(SYS_PAR_EL1_SH, sh);
}
@@ -873,7 +967,7 @@ static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu,
wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_WXN);
break;
case TR_EL10:
wxn = (__vcpu_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN);
wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN);
break;
}
@@ -1186,7 +1280,7 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false);
compute_par:
return compute_par_s1(vcpu, &wr, wi.regime);
return compute_par_s1(vcpu, &wi, &wr);
}
/*
@@ -1202,7 +1296,7 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
{
struct mmu_config config;
struct kvm_s2_mmu *mmu;
bool fail;
bool fail, mmu_cs;
u64 par;
par = SYS_PAR_EL1_F;
@@ -1218,8 +1312,13 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
* If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already
* the right one (as we trapped from vEL2). If not, save the
* full MMU context.
*
* We are also guaranteed to be in the correct context if
* we're not in a nested VM.
*/
if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))
mmu_cs = (vcpu_has_nv(vcpu) &&
!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)));
if (!mmu_cs)
goto skip_mmu_switch;
/*
@@ -1287,7 +1386,7 @@ skip_mmu_switch:
write_sysreg_hcr(HCR_HOST_VHE_FLAGS);
if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)))
if (mmu_cs)
__mmu_config_restore(&config);
return par;
@@ -1470,3 +1569,68 @@ int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
return 0;
}
struct desc_match {
u64 ipa;
int level;
};
static int match_s1_desc(struct s1_walk_context *ctxt, void *priv)
{
struct desc_match *dm = priv;
u64 ipa = dm->ipa;
/* Use S1 granule alignment */
ipa &= GENMASK(51, ctxt->wi->pgshift);
/* Not the IPA we're looking for? Continue. */
if (ipa != ctxt->table_ipa)
return 0;
/* Note the level and interrupt the walk */
dm->level = ctxt->level;
return -EINTR;
}
int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level)
{
struct desc_match dm = {
.ipa = ipa,
};
struct s1_walk_info wi = {
.filter = &(struct s1_walk_filter){
.fn = match_s1_desc,
.priv = &dm,
},
.regime = TR_EL10,
.as_el0 = false,
.pan = false,
};
struct s1_walk_result wr = {};
int ret;
ret = setup_s1_walk(vcpu, &wi, &wr, va);
if (ret)
return ret;
/* We really expect the S1 MMU to be on here... */
if (WARN_ON_ONCE(wr.level == S1_MMU_DISABLED)) {
*level = 0;
return 0;
}
/* Walk the guest's PT, looking for a match along the way */
ret = walk_s1(vcpu, &wi, &wr, va);
switch (ret) {
case -EINTR:
/* We interrupted the walk on a match, return the level */
*level = dm.level;
return 0;
case 0:
/* The walk completed, we failed to find the entry */
return -ENOENT;
default:
/* Any other error... */
return ret;
}
}

View File

@@ -106,7 +106,30 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
{
unsigned long cpsr = *vcpu_cpsr(vcpu);
bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
u64 esr = 0;
u64 esr = 0, fsc;
int level;
/*
* If injecting an abort from a failed S1PTW, rewalk the S1 PTs to
* find the failing level. If we can't find it, assume the error was
* transient and restart without changing the state.
*/
if (kvm_vcpu_abt_iss1tw(vcpu)) {
u64 hpfar = kvm_vcpu_get_fault_ipa(vcpu);
int ret;
if (hpfar == INVALID_GPA)
return;
ret = __kvm_find_s1_desc_level(vcpu, addr, hpfar, &level);
if (ret)
return;
WARN_ON_ONCE(level < -1 || level > 3);
fsc = ESR_ELx_FSC_SEA_TTW(level);
} else {
fsc = ESR_ELx_FSC_EXTABT;
}
/* This delight is brought to you by FEAT_DoubleFault2. */
if (effective_sctlr2_ease(vcpu))
@@ -133,7 +156,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
if (!is_iabt)
esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT;
esr |= ESR_ELx_FSC_EXTABT;
esr |= fsc;
vcpu_write_sys_reg(vcpu, addr, exception_far_elx(vcpu));
vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu));

View File

@@ -349,7 +349,7 @@ static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi)
wi->sl = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
/* Global limit for now, should eventually be per-VM */
wi->max_oa_bits = min(get_kvm_ipa_limit(),
ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr)));
ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr), false));
}
int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,

View File

@@ -250,6 +250,47 @@ static void test_serror(void)
kvm_vm_free(vm);
}
static void expect_sea_s1ptw_handler(struct ex_regs *regs)
{
u64 esr = read_sysreg(esr_el1);
GUEST_ASSERT_EQ(regs->pc, expected_abort_pc);
GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
GUEST_ASSERT_EQ((esr & ESR_ELx_FSC), ESR_ELx_FSC_SEA_TTW(3));
GUEST_DONE();
}
static noinline void test_s1ptw_abort_guest(void)
{
extern char test_s1ptw_abort_insn;
WRITE_ONCE(expected_abort_pc, (u64)&test_s1ptw_abort_insn);
asm volatile("test_s1ptw_abort_insn:\n\t"
"ldr x0, [%0]\n\t"
: : "r" (MMIO_ADDR) : "x0", "memory");
GUEST_FAIL("Load on S1PTW abort should not retire");
}
static void test_s1ptw_abort(void)
{
struct kvm_vcpu *vcpu;
u64 *ptep, bad_pa;
struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_s1ptw_abort_guest,
expect_sea_s1ptw_handler);
ptep = virt_get_pte_hva_at_level(vm, MMIO_ADDR, 2);
bad_pa = BIT(vm->pa_bits) - vm->page_size;
*ptep &= ~GENMASK(47, 12);
*ptep |= bad_pa;
vcpu_run_expect_done(vcpu);
kvm_vm_free(vm);
}
static void test_serror_emulated_guest(void)
{
GUEST_ASSERT(!(read_sysreg(isr_el1) & ISR_EL1_A));
@@ -327,4 +368,5 @@ int main(void)
test_serror_masked();
test_serror_emulated();
test_mmio_ease();
test_s1ptw_abort();
}

View File

@@ -175,6 +175,7 @@ void vm_install_exception_handler(struct kvm_vm *vm,
void vm_install_sync_handler(struct kvm_vm *vm,
int vector, int ec, handler_fn handler);
uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level);
uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva);
static inline void cpu_relax(void)

View File

@@ -185,7 +185,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
_virt_pg_map(vm, vaddr, paddr, attr_idx);
}
uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level)
{
uint64_t *ptep;
@@ -195,17 +195,23 @@ uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8;
if (!ptep)
goto unmapped_gva;
if (level == 0)
return ptep;
switch (vm->pgtable_levels) {
case 4:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8;
if (!ptep)
goto unmapped_gva;
if (level == 1)
break;
/* fall through */
case 3:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8;
if (!ptep)
goto unmapped_gva;
if (level == 2)
break;
/* fall through */
case 2:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8;
@@ -223,6 +229,11 @@ unmapped_gva:
exit(EXIT_FAILURE);
}
uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
{
return virt_get_pte_hva_at_level(vm, gva, 3);
}
vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
uint64_t *ptep = virt_get_pte_hva(vm, gva);