mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-04-05 14:09:10 +08:00
Merge branch kvm-arm64/52bit-at into kvmarm-master/next
* kvm-arm64/52bit-at: : . : Upgrade the S1 page table walker to support 52bit PA, and use it to : report the fault level when taking a S2 fault on S1PTW, which is required : by the architecture (20250915114451.660351-1-maz@kernel.org). : . KVM: arm64: selftest: Expand external_aborts test to look for TTW levels KVM: arm64: Populate level on S1PTW SEA injection KVM: arm64: Add S1 IPA to page table level walker KVM: arm64: Add filtering hook to S1 page table walk KVM: arm64: Don't switch MMU on translation from non-NV context KVM: arm64: Allow EL1 control registers to be accessed from the CPU state KVM: arm64: Allow use of S1 PTW for non-NV vcpus KVM: arm64: Report faults from S1 walk setup at the expected start level KVM: arm64: Expand valid block mappings to FEAT_LPA/LPA2 support KVM: arm64: Populate PAR_EL1 with 52bit addresses KVM: arm64: Compute shareability for LPA2 KVM: arm64: Pass the walk_info structure to compute_par_s1() KVM: arm64: Decouple output address from the PT descriptor KVM: arm64: Compute 52bit TTBR address and alignment KVM: arm64: Account for 52bit when computing maximum OA KVM: arm64: Add helper computing the state of 52bit PA support Signed-off-by: Marc Zyngier <maz@kernel.org>
This commit is contained in:
@@ -265,7 +265,7 @@ static inline u64 decode_range_tlbi(u64 val, u64 *range, u16 *asid)
|
||||
return base;
|
||||
}
|
||||
|
||||
static inline unsigned int ps_to_output_size(unsigned int ps)
|
||||
static inline unsigned int ps_to_output_size(unsigned int ps, bool pa52bit)
|
||||
{
|
||||
switch (ps) {
|
||||
case 0: return 32;
|
||||
@@ -273,7 +273,10 @@ static inline unsigned int ps_to_output_size(unsigned int ps)
|
||||
case 2: return 40;
|
||||
case 3: return 42;
|
||||
case 4: return 44;
|
||||
case 5:
|
||||
case 5: return 48;
|
||||
case 6: if (pa52bit)
|
||||
return 52;
|
||||
fallthrough;
|
||||
default:
|
||||
return 48;
|
||||
}
|
||||
@@ -285,13 +288,28 @@ enum trans_regime {
|
||||
TR_EL2,
|
||||
};
|
||||
|
||||
struct s1_walk_info;
|
||||
|
||||
struct s1_walk_context {
|
||||
struct s1_walk_info *wi;
|
||||
u64 table_ipa;
|
||||
int level;
|
||||
};
|
||||
|
||||
struct s1_walk_filter {
|
||||
int (*fn)(struct s1_walk_context *, void *);
|
||||
void *priv;
|
||||
};
|
||||
|
||||
struct s1_walk_info {
|
||||
struct s1_walk_filter *filter;
|
||||
u64 baddr;
|
||||
enum trans_regime regime;
|
||||
unsigned int max_oa_bits;
|
||||
unsigned int pgshift;
|
||||
unsigned int txsz;
|
||||
int sl;
|
||||
u8 sh;
|
||||
bool as_el0;
|
||||
bool hpd;
|
||||
bool e0poe;
|
||||
@@ -299,6 +317,7 @@ struct s1_walk_info {
|
||||
bool pan;
|
||||
bool be;
|
||||
bool s2;
|
||||
bool pa52bit;
|
||||
};
|
||||
|
||||
struct s1_walk_result {
|
||||
@@ -334,6 +353,8 @@ struct s1_walk_result {
|
||||
|
||||
int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
|
||||
struct s1_walk_result *wr, u64 va);
|
||||
int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa,
|
||||
int *level);
|
||||
|
||||
/* VNCR management */
|
||||
int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu);
|
||||
|
||||
@@ -28,9 +28,57 @@ static int get_ia_size(struct s1_walk_info *wi)
|
||||
/* Return true if the IPA is out of the OA range */
|
||||
static bool check_output_size(u64 ipa, struct s1_walk_info *wi)
|
||||
{
|
||||
if (wi->pa52bit)
|
||||
return wi->max_oa_bits < 52 && (ipa & GENMASK_ULL(51, wi->max_oa_bits));
|
||||
return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits));
|
||||
}
|
||||
|
||||
static bool has_52bit_pa(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, u64 tcr)
|
||||
{
|
||||
switch (BIT(wi->pgshift)) {
|
||||
case SZ_64K:
|
||||
default: /* IMPDEF: treat any other value as 64k */
|
||||
if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, PARANGE, 52))
|
||||
return false;
|
||||
return ((wi->regime == TR_EL2 ?
|
||||
FIELD_GET(TCR_EL2_PS_MASK, tcr) :
|
||||
FIELD_GET(TCR_IPS_MASK, tcr)) == 0b0110);
|
||||
case SZ_16K:
|
||||
if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT))
|
||||
return false;
|
||||
break;
|
||||
case SZ_4K:
|
||||
if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT))
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
|
||||
return (tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS));
|
||||
}
|
||||
|
||||
static u64 desc_to_oa(struct s1_walk_info *wi, u64 desc)
|
||||
{
|
||||
u64 addr;
|
||||
|
||||
if (!wi->pa52bit)
|
||||
return desc & GENMASK_ULL(47, wi->pgshift);
|
||||
|
||||
switch (BIT(wi->pgshift)) {
|
||||
case SZ_4K:
|
||||
case SZ_16K:
|
||||
addr = desc & GENMASK_ULL(49, wi->pgshift);
|
||||
addr |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, desc) << 50;
|
||||
break;
|
||||
case SZ_64K:
|
||||
default: /* IMPDEF: treat any other value as 64k */
|
||||
addr = desc & GENMASK_ULL(47, wi->pgshift);
|
||||
addr |= FIELD_GET(KVM_PTE_ADDR_51_48, desc) << 48;
|
||||
break;
|
||||
}
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
/* Return the translation regime that applies to an AT instruction */
|
||||
static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op)
|
||||
{
|
||||
@@ -50,21 +98,26 @@ static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 o
|
||||
}
|
||||
}
|
||||
|
||||
static u64 effective_tcr2(struct kvm_vcpu *vcpu, enum trans_regime regime)
|
||||
{
|
||||
if (regime == TR_EL10) {
|
||||
if (vcpu_has_nv(vcpu) &&
|
||||
!(__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En))
|
||||
return 0;
|
||||
|
||||
return vcpu_read_sys_reg(vcpu, TCR2_EL1);
|
||||
}
|
||||
|
||||
return vcpu_read_sys_reg(vcpu, TCR2_EL2);
|
||||
}
|
||||
|
||||
static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
|
||||
{
|
||||
if (!kvm_has_s1pie(vcpu->kvm))
|
||||
return false;
|
||||
|
||||
switch (regime) {
|
||||
case TR_EL2:
|
||||
case TR_EL20:
|
||||
return vcpu_read_sys_reg(vcpu, TCR2_EL2) & TCR2_EL2_PIE;
|
||||
case TR_EL10:
|
||||
return (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) &&
|
||||
(__vcpu_sys_reg(vcpu, TCR2_EL1) & TCR2_EL1_PIE);
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
/* Abuse TCR2_EL1_PIE and use it for EL2 as well */
|
||||
return effective_tcr2(vcpu, regime) & TCR2_EL1_PIE;
|
||||
}
|
||||
|
||||
static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi)
|
||||
@@ -76,23 +129,11 @@ static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi)
|
||||
return;
|
||||
}
|
||||
|
||||
switch (wi->regime) {
|
||||
case TR_EL2:
|
||||
case TR_EL20:
|
||||
val = vcpu_read_sys_reg(vcpu, TCR2_EL2);
|
||||
wi->poe = val & TCR2_EL2_POE;
|
||||
wi->e0poe = (wi->regime == TR_EL20) && (val & TCR2_EL2_E0POE);
|
||||
break;
|
||||
case TR_EL10:
|
||||
if (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) {
|
||||
wi->poe = wi->e0poe = false;
|
||||
return;
|
||||
}
|
||||
val = effective_tcr2(vcpu, wi->regime);
|
||||
|
||||
val = __vcpu_sys_reg(vcpu, TCR2_EL1);
|
||||
wi->poe = val & TCR2_EL1_POE;
|
||||
wi->e0poe = val & TCR2_EL1_E0POE;
|
||||
}
|
||||
/* Abuse TCR2_EL1_* for EL2 */
|
||||
wi->poe = val & TCR2_EL1_POE;
|
||||
wi->e0poe = (wi->regime != TR_EL2) && (val & TCR2_EL1_E0POE);
|
||||
}
|
||||
|
||||
static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
|
||||
@@ -102,14 +143,16 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
|
||||
unsigned int stride, x;
|
||||
bool va55, tbi, lva;
|
||||
|
||||
hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
|
||||
|
||||
va55 = va & BIT(55);
|
||||
|
||||
if (wi->regime == TR_EL2 && va55)
|
||||
goto addrsz;
|
||||
|
||||
wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC));
|
||||
if (vcpu_has_nv(vcpu)) {
|
||||
hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
|
||||
wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC));
|
||||
} else {
|
||||
WARN_ON_ONCE(wi->regime != TR_EL10);
|
||||
wi->s2 = false;
|
||||
hcr = 0;
|
||||
}
|
||||
|
||||
switch (wi->regime) {
|
||||
case TR_EL10:
|
||||
@@ -131,6 +174,46 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
|
||||
BUG();
|
||||
}
|
||||
|
||||
/* Someone was silly enough to encode TG0/TG1 differently */
|
||||
if (va55 && wi->regime != TR_EL2) {
|
||||
wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
|
||||
tg = FIELD_GET(TCR_TG1_MASK, tcr);
|
||||
|
||||
switch (tg << TCR_TG1_SHIFT) {
|
||||
case TCR_TG1_4K:
|
||||
wi->pgshift = 12; break;
|
||||
case TCR_TG1_16K:
|
||||
wi->pgshift = 14; break;
|
||||
case TCR_TG1_64K:
|
||||
default: /* IMPDEF: treat any other value as 64k */
|
||||
wi->pgshift = 16; break;
|
||||
}
|
||||
} else {
|
||||
wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
|
||||
tg = FIELD_GET(TCR_TG0_MASK, tcr);
|
||||
|
||||
switch (tg << TCR_TG0_SHIFT) {
|
||||
case TCR_TG0_4K:
|
||||
wi->pgshift = 12; break;
|
||||
case TCR_TG0_16K:
|
||||
wi->pgshift = 14; break;
|
||||
case TCR_TG0_64K:
|
||||
default: /* IMPDEF: treat any other value as 64k */
|
||||
wi->pgshift = 16; break;
|
||||
}
|
||||
}
|
||||
|
||||
wi->pa52bit = has_52bit_pa(vcpu, wi, tcr);
|
||||
|
||||
ia_bits = get_ia_size(wi);
|
||||
|
||||
/* AArch64.S1StartLevel() */
|
||||
stride = wi->pgshift - 3;
|
||||
wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride);
|
||||
|
||||
if (wi->regime == TR_EL2 && va55)
|
||||
goto addrsz;
|
||||
|
||||
tbi = (wi->regime == TR_EL2 ?
|
||||
FIELD_GET(TCR_EL2_TBI, tcr) :
|
||||
(va55 ?
|
||||
@@ -140,6 +223,12 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
|
||||
if (!tbi && (u64)sign_extend64(va, 55) != va)
|
||||
goto addrsz;
|
||||
|
||||
wi->sh = (wi->regime == TR_EL2 ?
|
||||
FIELD_GET(TCR_EL2_SH0_MASK, tcr) :
|
||||
(va55 ?
|
||||
FIELD_GET(TCR_SH1_MASK, tcr) :
|
||||
FIELD_GET(TCR_SH0_MASK, tcr)));
|
||||
|
||||
va = (u64)sign_extend64(va, 55);
|
||||
|
||||
/* Let's put the MMU disabled case aside immediately */
|
||||
@@ -194,53 +283,20 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
|
||||
/* R_BVXDG */
|
||||
wi->hpd |= (wi->poe || wi->e0poe);
|
||||
|
||||
/* Someone was silly enough to encode TG0/TG1 differently */
|
||||
if (va55) {
|
||||
wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
|
||||
tg = FIELD_GET(TCR_TG1_MASK, tcr);
|
||||
|
||||
switch (tg << TCR_TG1_SHIFT) {
|
||||
case TCR_TG1_4K:
|
||||
wi->pgshift = 12; break;
|
||||
case TCR_TG1_16K:
|
||||
wi->pgshift = 14; break;
|
||||
case TCR_TG1_64K:
|
||||
default: /* IMPDEF: treat any other value as 64k */
|
||||
wi->pgshift = 16; break;
|
||||
}
|
||||
} else {
|
||||
wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
|
||||
tg = FIELD_GET(TCR_TG0_MASK, tcr);
|
||||
|
||||
switch (tg << TCR_TG0_SHIFT) {
|
||||
case TCR_TG0_4K:
|
||||
wi->pgshift = 12; break;
|
||||
case TCR_TG0_16K:
|
||||
wi->pgshift = 14; break;
|
||||
case TCR_TG0_64K:
|
||||
default: /* IMPDEF: treat any other value as 64k */
|
||||
wi->pgshift = 16; break;
|
||||
}
|
||||
}
|
||||
|
||||
/* R_PLCGL, R_YXNYW */
|
||||
if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) {
|
||||
if (wi->txsz > 39)
|
||||
goto transfault_l0;
|
||||
goto transfault;
|
||||
} else {
|
||||
if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47))
|
||||
goto transfault_l0;
|
||||
goto transfault;
|
||||
}
|
||||
|
||||
/* R_GTJBY, R_SXWGM */
|
||||
switch (BIT(wi->pgshift)) {
|
||||
case SZ_4K:
|
||||
lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT);
|
||||
lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
|
||||
break;
|
||||
case SZ_16K:
|
||||
lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT);
|
||||
lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
|
||||
lva = wi->pa52bit;
|
||||
break;
|
||||
case SZ_64K:
|
||||
lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52);
|
||||
@@ -248,38 +304,42 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
|
||||
}
|
||||
|
||||
if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16))
|
||||
goto transfault_l0;
|
||||
|
||||
ia_bits = get_ia_size(wi);
|
||||
goto transfault;
|
||||
|
||||
/* R_YYVYV, I_THCZK */
|
||||
if ((!va55 && va > GENMASK(ia_bits - 1, 0)) ||
|
||||
(va55 && va < GENMASK(63, ia_bits)))
|
||||
goto transfault_l0;
|
||||
goto transfault;
|
||||
|
||||
/* I_ZFSYQ */
|
||||
if (wi->regime != TR_EL2 &&
|
||||
(tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK)))
|
||||
goto transfault_l0;
|
||||
goto transfault;
|
||||
|
||||
/* R_BNDVG and following statements */
|
||||
if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) &&
|
||||
wi->as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0)))
|
||||
goto transfault_l0;
|
||||
|
||||
/* AArch64.S1StartLevel() */
|
||||
stride = wi->pgshift - 3;
|
||||
wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride);
|
||||
goto transfault;
|
||||
|
||||
ps = (wi->regime == TR_EL2 ?
|
||||
FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr));
|
||||
|
||||
wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps));
|
||||
wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps, wi->pa52bit));
|
||||
|
||||
/* Compute minimal alignment */
|
||||
x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift);
|
||||
|
||||
wi->baddr = ttbr & TTBRx_EL1_BADDR;
|
||||
if (wi->pa52bit) {
|
||||
/*
|
||||
* Force the alignment on 64 bytes for top-level tables
|
||||
* smaller than 8 entries, since TTBR.BADDR[5:2] are used to
|
||||
* store bits [51:48] of the first level of lookup.
|
||||
*/
|
||||
x = max(x, 6);
|
||||
|
||||
wi->baddr |= FIELD_GET(GENMASK_ULL(5, 2), ttbr) << 48;
|
||||
}
|
||||
|
||||
/* R_VPBBF */
|
||||
if (check_output_size(wi->baddr, wi))
|
||||
@@ -289,12 +349,17 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
|
||||
|
||||
return 0;
|
||||
|
||||
addrsz: /* Address Size Fault level 0 */
|
||||
addrsz:
|
||||
/*
|
||||
* Address Size Fault level 0 to indicate it comes from TTBR.
|
||||
* yes, this is an oddity.
|
||||
*/
|
||||
fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false);
|
||||
return -EFAULT;
|
||||
|
||||
transfault_l0: /* Translation Fault level 0 */
|
||||
fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false);
|
||||
transfault:
|
||||
/* Translation Fault on start level */
|
||||
fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(wi->sl), false);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
@@ -339,6 +404,17 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
|
||||
ipa = kvm_s2_trans_output(&s2_trans);
|
||||
}
|
||||
|
||||
if (wi->filter) {
|
||||
ret = wi->filter->fn(&(struct s1_walk_context)
|
||||
{
|
||||
.wi = wi,
|
||||
.table_ipa = baddr,
|
||||
.level = level,
|
||||
}, wi->filter->priv);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc));
|
||||
if (ret) {
|
||||
fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false);
|
||||
@@ -369,7 +445,7 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
|
||||
wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc);
|
||||
}
|
||||
|
||||
baddr = desc & GENMASK_ULL(47, wi->pgshift);
|
||||
baddr = desc_to_oa(wi, desc);
|
||||
|
||||
/* Check for out-of-range OA */
|
||||
if (check_output_size(baddr, wi))
|
||||
@@ -386,11 +462,11 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
|
||||
|
||||
switch (BIT(wi->pgshift)) {
|
||||
case SZ_4K:
|
||||
valid_block = level == 1 || level == 2;
|
||||
valid_block = level == 1 || level == 2 || (wi->pa52bit && level == 0);
|
||||
break;
|
||||
case SZ_16K:
|
||||
case SZ_64K:
|
||||
valid_block = level == 2;
|
||||
valid_block = level == 2 || (wi->pa52bit && level == 1);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -398,7 +474,8 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
|
||||
goto transfault;
|
||||
}
|
||||
|
||||
if (check_output_size(desc & GENMASK(47, va_bottom), wi))
|
||||
baddr = desc_to_oa(wi, desc);
|
||||
if (check_output_size(baddr & GENMASK(52, va_bottom), wi))
|
||||
goto addrsz;
|
||||
|
||||
if (!(desc & PTE_AF)) {
|
||||
@@ -411,7 +488,7 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
|
||||
wr->failed = false;
|
||||
wr->level = level;
|
||||
wr->desc = desc;
|
||||
wr->pa = desc & GENMASK(47, va_bottom);
|
||||
wr->pa = baddr & GENMASK(52, va_bottom);
|
||||
wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0);
|
||||
|
||||
wr->nG = (wi->regime != TR_EL2) && (desc & PTE_NG);
|
||||
@@ -640,21 +717,36 @@ static u8 combine_s1_s2_attr(u8 s1, u8 s2)
|
||||
#define ATTR_OSH 0b10
|
||||
#define ATTR_ISH 0b11
|
||||
|
||||
static u8 compute_sh(u8 attr, u64 desc)
|
||||
static u8 compute_final_sh(u8 attr, u8 sh)
|
||||
{
|
||||
u8 sh;
|
||||
|
||||
/* Any form of device, as well as NC has SH[1:0]=0b10 */
|
||||
if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC))
|
||||
return ATTR_OSH;
|
||||
|
||||
sh = FIELD_GET(PTE_SHARED, desc);
|
||||
if (sh == ATTR_RSV) /* Reserved, mapped to NSH */
|
||||
sh = ATTR_NSH;
|
||||
|
||||
return sh;
|
||||
}
|
||||
|
||||
static u8 compute_s1_sh(struct s1_walk_info *wi, struct s1_walk_result *wr,
|
||||
u8 attr)
|
||||
{
|
||||
u8 sh;
|
||||
|
||||
/*
|
||||
* non-52bit and LPA have their basic shareability described in the
|
||||
* descriptor. LPA2 gets it from the corresponding field in TCR,
|
||||
* conveniently recorded in the walk info.
|
||||
*/
|
||||
if (!wi->pa52bit || BIT(wi->pgshift) == SZ_64K)
|
||||
sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_SH, wr->desc);
|
||||
else
|
||||
sh = wi->sh;
|
||||
|
||||
return compute_final_sh(attr, sh);
|
||||
}
|
||||
|
||||
static u8 combine_sh(u8 s1_sh, u8 s2_sh)
|
||||
{
|
||||
if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH)
|
||||
@@ -668,7 +760,7 @@ static u8 combine_sh(u8 s1_sh, u8 s2_sh)
|
||||
static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
|
||||
struct kvm_s2_trans *tr)
|
||||
{
|
||||
u8 s1_parattr, s2_memattr, final_attr;
|
||||
u8 s1_parattr, s2_memattr, final_attr, s2_sh;
|
||||
u64 par;
|
||||
|
||||
/* If S2 has failed to translate, report the damage */
|
||||
@@ -741,17 +833,19 @@ static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
|
||||
!MEMATTR_IS_DEVICE(final_attr))
|
||||
final_attr = MEMATTR(NC, NC);
|
||||
|
||||
s2_sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_SH, tr->desc);
|
||||
|
||||
par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr);
|
||||
par |= tr->output & GENMASK(47, 12);
|
||||
par |= FIELD_PREP(SYS_PAR_EL1_SH,
|
||||
combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par),
|
||||
compute_sh(final_attr, tr->desc)));
|
||||
compute_final_sh(final_attr, s2_sh)));
|
||||
|
||||
return par;
|
||||
}
|
||||
|
||||
static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr,
|
||||
enum trans_regime regime)
|
||||
static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
|
||||
struct s1_walk_result *wr)
|
||||
{
|
||||
u64 par;
|
||||
|
||||
@@ -764,9 +858,9 @@ static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr,
|
||||
} else if (wr->level == S1_MMU_DISABLED) {
|
||||
/* MMU off or HCR_EL2.DC == 1 */
|
||||
par = SYS_PAR_EL1_NSE;
|
||||
par |= wr->pa & GENMASK_ULL(47, 12);
|
||||
par |= wr->pa & SYS_PAR_EL1_PA;
|
||||
|
||||
if (regime == TR_EL10 &&
|
||||
if (wi->regime == TR_EL10 && vcpu_has_nv(vcpu) &&
|
||||
(__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) {
|
||||
par |= FIELD_PREP(SYS_PAR_EL1_ATTR,
|
||||
MEMATTR(WbRaWa, WbRaWa));
|
||||
@@ -781,14 +875,14 @@ static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr,
|
||||
|
||||
par = SYS_PAR_EL1_NSE;
|
||||
|
||||
mair = (regime == TR_EL10 ?
|
||||
mair = (wi->regime == TR_EL10 ?
|
||||
vcpu_read_sys_reg(vcpu, MAIR_EL1) :
|
||||
vcpu_read_sys_reg(vcpu, MAIR_EL2));
|
||||
|
||||
mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8;
|
||||
mair &= 0xff;
|
||||
|
||||
sctlr = (regime == TR_EL10 ?
|
||||
sctlr = (wi->regime == TR_EL10 ?
|
||||
vcpu_read_sys_reg(vcpu, SCTLR_EL1) :
|
||||
vcpu_read_sys_reg(vcpu, SCTLR_EL2));
|
||||
|
||||
@@ -797,9 +891,9 @@ static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr,
|
||||
mair = MEMATTR(NC, NC);
|
||||
|
||||
par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair);
|
||||
par |= wr->pa & GENMASK_ULL(47, 12);
|
||||
par |= wr->pa & SYS_PAR_EL1_PA;
|
||||
|
||||
sh = compute_sh(mair, wr->desc);
|
||||
sh = compute_s1_sh(wi, wr, mair);
|
||||
par |= FIELD_PREP(SYS_PAR_EL1_SH, sh);
|
||||
}
|
||||
|
||||
@@ -873,7 +967,7 @@ static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu,
|
||||
wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_WXN);
|
||||
break;
|
||||
case TR_EL10:
|
||||
wxn = (__vcpu_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN);
|
||||
wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -1186,7 +1280,7 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
|
||||
fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false);
|
||||
|
||||
compute_par:
|
||||
return compute_par_s1(vcpu, &wr, wi.regime);
|
||||
return compute_par_s1(vcpu, &wi, &wr);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1202,7 +1296,7 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
|
||||
{
|
||||
struct mmu_config config;
|
||||
struct kvm_s2_mmu *mmu;
|
||||
bool fail;
|
||||
bool fail, mmu_cs;
|
||||
u64 par;
|
||||
|
||||
par = SYS_PAR_EL1_F;
|
||||
@@ -1218,8 +1312,13 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
|
||||
* If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already
|
||||
* the right one (as we trapped from vEL2). If not, save the
|
||||
* full MMU context.
|
||||
*
|
||||
* We are also guaranteed to be in the correct context if
|
||||
* we're not in a nested VM.
|
||||
*/
|
||||
if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))
|
||||
mmu_cs = (vcpu_has_nv(vcpu) &&
|
||||
!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)));
|
||||
if (!mmu_cs)
|
||||
goto skip_mmu_switch;
|
||||
|
||||
/*
|
||||
@@ -1287,7 +1386,7 @@ skip_mmu_switch:
|
||||
|
||||
write_sysreg_hcr(HCR_HOST_VHE_FLAGS);
|
||||
|
||||
if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)))
|
||||
if (mmu_cs)
|
||||
__mmu_config_restore(&config);
|
||||
|
||||
return par;
|
||||
@@ -1470,3 +1569,68 @@ int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct desc_match {
|
||||
u64 ipa;
|
||||
int level;
|
||||
};
|
||||
|
||||
static int match_s1_desc(struct s1_walk_context *ctxt, void *priv)
|
||||
{
|
||||
struct desc_match *dm = priv;
|
||||
u64 ipa = dm->ipa;
|
||||
|
||||
/* Use S1 granule alignment */
|
||||
ipa &= GENMASK(51, ctxt->wi->pgshift);
|
||||
|
||||
/* Not the IPA we're looking for? Continue. */
|
||||
if (ipa != ctxt->table_ipa)
|
||||
return 0;
|
||||
|
||||
/* Note the level and interrupt the walk */
|
||||
dm->level = ctxt->level;
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level)
|
||||
{
|
||||
struct desc_match dm = {
|
||||
.ipa = ipa,
|
||||
};
|
||||
struct s1_walk_info wi = {
|
||||
.filter = &(struct s1_walk_filter){
|
||||
.fn = match_s1_desc,
|
||||
.priv = &dm,
|
||||
},
|
||||
.regime = TR_EL10,
|
||||
.as_el0 = false,
|
||||
.pan = false,
|
||||
};
|
||||
struct s1_walk_result wr = {};
|
||||
int ret;
|
||||
|
||||
ret = setup_s1_walk(vcpu, &wi, &wr, va);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* We really expect the S1 MMU to be on here... */
|
||||
if (WARN_ON_ONCE(wr.level == S1_MMU_DISABLED)) {
|
||||
*level = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Walk the guest's PT, looking for a match along the way */
|
||||
ret = walk_s1(vcpu, &wi, &wr, va);
|
||||
switch (ret) {
|
||||
case -EINTR:
|
||||
/* We interrupted the walk on a match, return the level */
|
||||
*level = dm.level;
|
||||
return 0;
|
||||
case 0:
|
||||
/* The walk completed, we failed to find the entry */
|
||||
return -ENOENT;
|
||||
default:
|
||||
/* Any other error... */
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -106,7 +106,30 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
|
||||
{
|
||||
unsigned long cpsr = *vcpu_cpsr(vcpu);
|
||||
bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
|
||||
u64 esr = 0;
|
||||
u64 esr = 0, fsc;
|
||||
int level;
|
||||
|
||||
/*
|
||||
* If injecting an abort from a failed S1PTW, rewalk the S1 PTs to
|
||||
* find the failing level. If we can't find it, assume the error was
|
||||
* transient and restart without changing the state.
|
||||
*/
|
||||
if (kvm_vcpu_abt_iss1tw(vcpu)) {
|
||||
u64 hpfar = kvm_vcpu_get_fault_ipa(vcpu);
|
||||
int ret;
|
||||
|
||||
if (hpfar == INVALID_GPA)
|
||||
return;
|
||||
|
||||
ret = __kvm_find_s1_desc_level(vcpu, addr, hpfar, &level);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(level < -1 || level > 3);
|
||||
fsc = ESR_ELx_FSC_SEA_TTW(level);
|
||||
} else {
|
||||
fsc = ESR_ELx_FSC_EXTABT;
|
||||
}
|
||||
|
||||
/* This delight is brought to you by FEAT_DoubleFault2. */
|
||||
if (effective_sctlr2_ease(vcpu))
|
||||
@@ -133,7 +156,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
|
||||
if (!is_iabt)
|
||||
esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT;
|
||||
|
||||
esr |= ESR_ELx_FSC_EXTABT;
|
||||
esr |= fsc;
|
||||
|
||||
vcpu_write_sys_reg(vcpu, addr, exception_far_elx(vcpu));
|
||||
vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu));
|
||||
|
||||
@@ -349,7 +349,7 @@ static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi)
|
||||
wi->sl = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
|
||||
/* Global limit for now, should eventually be per-VM */
|
||||
wi->max_oa_bits = min(get_kvm_ipa_limit(),
|
||||
ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr)));
|
||||
ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr), false));
|
||||
}
|
||||
|
||||
int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
|
||||
|
||||
@@ -250,6 +250,47 @@ static void test_serror(void)
|
||||
kvm_vm_free(vm);
|
||||
}
|
||||
|
||||
static void expect_sea_s1ptw_handler(struct ex_regs *regs)
|
||||
{
|
||||
u64 esr = read_sysreg(esr_el1);
|
||||
|
||||
GUEST_ASSERT_EQ(regs->pc, expected_abort_pc);
|
||||
GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
|
||||
GUEST_ASSERT_EQ((esr & ESR_ELx_FSC), ESR_ELx_FSC_SEA_TTW(3));
|
||||
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
static noinline void test_s1ptw_abort_guest(void)
|
||||
{
|
||||
extern char test_s1ptw_abort_insn;
|
||||
|
||||
WRITE_ONCE(expected_abort_pc, (u64)&test_s1ptw_abort_insn);
|
||||
|
||||
asm volatile("test_s1ptw_abort_insn:\n\t"
|
||||
"ldr x0, [%0]\n\t"
|
||||
: : "r" (MMIO_ADDR) : "x0", "memory");
|
||||
|
||||
GUEST_FAIL("Load on S1PTW abort should not retire");
|
||||
}
|
||||
|
||||
static void test_s1ptw_abort(void)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
u64 *ptep, bad_pa;
|
||||
struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_s1ptw_abort_guest,
|
||||
expect_sea_s1ptw_handler);
|
||||
|
||||
ptep = virt_get_pte_hva_at_level(vm, MMIO_ADDR, 2);
|
||||
bad_pa = BIT(vm->pa_bits) - vm->page_size;
|
||||
|
||||
*ptep &= ~GENMASK(47, 12);
|
||||
*ptep |= bad_pa;
|
||||
|
||||
vcpu_run_expect_done(vcpu);
|
||||
kvm_vm_free(vm);
|
||||
}
|
||||
|
||||
static void test_serror_emulated_guest(void)
|
||||
{
|
||||
GUEST_ASSERT(!(read_sysreg(isr_el1) & ISR_EL1_A));
|
||||
@@ -327,4 +368,5 @@ int main(void)
|
||||
test_serror_masked();
|
||||
test_serror_emulated();
|
||||
test_mmio_ease();
|
||||
test_s1ptw_abort();
|
||||
}
|
||||
|
||||
@@ -175,6 +175,7 @@ void vm_install_exception_handler(struct kvm_vm *vm,
|
||||
void vm_install_sync_handler(struct kvm_vm *vm,
|
||||
int vector, int ec, handler_fn handler);
|
||||
|
||||
uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level);
|
||||
uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva);
|
||||
|
||||
static inline void cpu_relax(void)
|
||||
|
||||
@@ -185,7 +185,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
|
||||
_virt_pg_map(vm, vaddr, paddr, attr_idx);
|
||||
}
|
||||
|
||||
uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
|
||||
uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level)
|
||||
{
|
||||
uint64_t *ptep;
|
||||
|
||||
@@ -195,17 +195,23 @@ uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
|
||||
ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8;
|
||||
if (!ptep)
|
||||
goto unmapped_gva;
|
||||
if (level == 0)
|
||||
return ptep;
|
||||
|
||||
switch (vm->pgtable_levels) {
|
||||
case 4:
|
||||
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8;
|
||||
if (!ptep)
|
||||
goto unmapped_gva;
|
||||
if (level == 1)
|
||||
break;
|
||||
/* fall through */
|
||||
case 3:
|
||||
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8;
|
||||
if (!ptep)
|
||||
goto unmapped_gva;
|
||||
if (level == 2)
|
||||
break;
|
||||
/* fall through */
|
||||
case 2:
|
||||
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8;
|
||||
@@ -223,6 +229,11 @@ unmapped_gva:
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
|
||||
{
|
||||
return virt_get_pte_hva_at_level(vm, gva, 3);
|
||||
}
|
||||
|
||||
vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
|
||||
{
|
||||
uint64_t *ptep = virt_get_pte_hva(vm, gva);
|
||||
|
||||
Reference in New Issue
Block a user