mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-04-09 16:00:22 +08:00
Merge tag 'kvm-x86-misc-6.16' of https://github.com/kvm-x86/linux into HEAD
KVM x86 misc changes for 6.16: - Unify virtualization of IBRS on nested VM-Exit, and cross-vCPU IBPB, between SVM and VMX. - Advertise support to userspace for WRMSRNS and PREFETCHI. - Rescan I/O APIC routes after handling EOI that needed to be intercepted due to the old/previous routing, but not the new/current routing. - Add a module param to control and enumerate support for device posted interrupts. - Misc cleanups.
This commit is contained in:
@@ -336,6 +336,7 @@
|
||||
#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */
|
||||
#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
|
||||
#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */
|
||||
#define X86_FEATURE_AMD_IBRS_SAME_MODE (13*32+19) /* Indirect Branch Restricted Speculation same mode protection*/
|
||||
#define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */
|
||||
#define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */
|
||||
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */
|
||||
@@ -457,6 +458,7 @@
|
||||
#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */
|
||||
#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */
|
||||
|
||||
#define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */
|
||||
#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */
|
||||
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */
|
||||
#define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */
|
||||
|
||||
@@ -1034,6 +1034,7 @@ struct kvm_vcpu_arch {
|
||||
|
||||
int pending_ioapic_eoi;
|
||||
int pending_external_vector;
|
||||
int highest_stale_pending_ioapic_eoi;
|
||||
|
||||
/* be preempted when it's in kernel-mode(cpl=0) */
|
||||
bool preempted_in_kernel;
|
||||
@@ -1941,6 +1942,7 @@ struct kvm_arch_async_pf {
|
||||
extern u32 __read_mostly kvm_nr_uret_msrs;
|
||||
extern bool __read_mostly allow_smaller_maxphyaddr;
|
||||
extern bool __read_mostly enable_apicv;
|
||||
extern bool __read_mostly enable_device_posted_irqs;
|
||||
extern struct kvm_x86_ops kvm_x86_ops;
|
||||
|
||||
#define kvm_x86_call(func) static_call(kvm_x86_##func)
|
||||
@@ -2444,7 +2446,7 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
|
||||
|
||||
static inline bool kvm_arch_has_irq_bypass(void)
|
||||
{
|
||||
return enable_apicv && irq_remapping_cap(IRQ_POSTING_CAP);
|
||||
return enable_device_posted_irqs;
|
||||
}
|
||||
|
||||
#endif /* _ASM_X86_KVM_HOST_H */
|
||||
|
||||
@@ -300,7 +300,7 @@ do { \
|
||||
#endif /* !CONFIG_PARAVIRT_XXL */
|
||||
|
||||
/* Instruction opcode for WRMSRNS supported in binutils >= 2.40 */
|
||||
#define WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6)
|
||||
#define ASM_WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6)
|
||||
|
||||
/* Non-serializing WRMSR, when available. Falls back to a serializing WRMSR. */
|
||||
static __always_inline void wrmsrns(u32 msr, u64 val)
|
||||
@@ -309,7 +309,7 @@ static __always_inline void wrmsrns(u32 msr, u64 val)
|
||||
* WRMSR is 2 bytes. WRMSRNS is 3 bytes. Pad WRMSR with a redundant
|
||||
* DS prefix to avoid a trailing NOP.
|
||||
*/
|
||||
asm volatile("1: " ALTERNATIVE("ds wrmsr", WRMSRNS, X86_FEATURE_WRMSRNS)
|
||||
asm volatile("1: " ALTERNATIVE("ds wrmsr", ASM_WRMSRNS, X86_FEATURE_WRMSRNS)
|
||||
"2: " _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
|
||||
: : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)));
|
||||
}
|
||||
|
||||
@@ -978,6 +978,7 @@ void kvm_set_cpu_caps(void)
|
||||
F(FZRM),
|
||||
F(FSRS),
|
||||
F(FSRC),
|
||||
F(WRMSRNS),
|
||||
F(AMX_FP16),
|
||||
F(AVX_IFMA),
|
||||
F(LAM),
|
||||
@@ -1093,6 +1094,7 @@ void kvm_set_cpu_caps(void)
|
||||
F(AMD_SSB_NO),
|
||||
F(AMD_STIBP),
|
||||
F(AMD_STIBP_ALWAYS_ON),
|
||||
F(AMD_IBRS_SAME_MODE),
|
||||
F(AMD_PSFD),
|
||||
F(AMD_IBPB_RET),
|
||||
);
|
||||
@@ -1150,6 +1152,7 @@ void kvm_set_cpu_caps(void)
|
||||
|
||||
kvm_cpu_cap_init(CPUID_8000_0021_EAX,
|
||||
F(NO_NESTED_DATA_BP),
|
||||
F(WRMSR_XX_BASE_NS),
|
||||
/*
|
||||
* Synthesize "LFENCE is serializing" into the AMD-defined entry
|
||||
* in KVM's supported CPUID, i.e. if the feature is reported as
|
||||
@@ -1163,10 +1166,13 @@ void kvm_set_cpu_caps(void)
|
||||
SYNTHESIZED_F(LFENCE_RDTSC),
|
||||
/* SmmPgCfgLock */
|
||||
F(NULL_SEL_CLR_BASE),
|
||||
/* UpperAddressIgnore */
|
||||
F(AUTOIBRS),
|
||||
F(PREFETCHI),
|
||||
EMULATED_F(NO_SMM_CTL_MSR),
|
||||
/* PrefetchCtlMsr */
|
||||
F(WRMSR_XX_BASE_NS),
|
||||
/* GpOnUserCpuid */
|
||||
/* EPSF */
|
||||
SYNTHESIZED_F(SBPB),
|
||||
SYNTHESIZED_F(IBPB_BRTYPE),
|
||||
SYNTHESIZED_F(SRSO_NO),
|
||||
|
||||
@@ -296,11 +296,8 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
|
||||
index == RTC_GSI) {
|
||||
u16 dm = kvm_lapic_irq_dest_mode(!!e->fields.dest_mode);
|
||||
|
||||
if (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
|
||||
e->fields.dest_id, dm) ||
|
||||
kvm_apic_pending_eoi(vcpu, e->fields.vector))
|
||||
__set_bit(e->fields.vector,
|
||||
ioapic_handled_vectors);
|
||||
kvm_scan_ioapic_irq(vcpu, e->fields.dest_id, dm,
|
||||
e->fields.vector, ioapic_handled_vectors);
|
||||
}
|
||||
}
|
||||
spin_unlock(&ioapic->lock);
|
||||
|
||||
@@ -120,4 +120,6 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu,
|
||||
ulong *ioapic_handled_vectors);
|
||||
void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
|
||||
ulong *ioapic_handled_vectors);
|
||||
void kvm_scan_ioapic_irq(struct kvm_vcpu *vcpu, u32 dest_id, u16 dest_mode,
|
||||
u8 vector, unsigned long *ioapic_handled_vectors);
|
||||
#endif
|
||||
|
||||
@@ -402,6 +402,33 @@ void kvm_arch_post_irq_routing_update(struct kvm *kvm)
|
||||
kvm_make_scan_ioapic_request(kvm);
|
||||
}
|
||||
|
||||
void kvm_scan_ioapic_irq(struct kvm_vcpu *vcpu, u32 dest_id, u16 dest_mode,
|
||||
u8 vector, unsigned long *ioapic_handled_vectors)
|
||||
{
|
||||
/*
|
||||
* Intercept EOI if the vCPU is the target of the new IRQ routing, or
|
||||
* the vCPU has a pending IRQ from the old routing, i.e. if the vCPU
|
||||
* may receive a level-triggered IRQ in the future, or already received
|
||||
* level-triggered IRQ. The EOI needs to be intercepted and forwarded
|
||||
* to I/O APIC emulation so that the IRQ can be de-asserted.
|
||||
*/
|
||||
if (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT, dest_id, dest_mode)) {
|
||||
__set_bit(vector, ioapic_handled_vectors);
|
||||
} else if (kvm_apic_pending_eoi(vcpu, vector)) {
|
||||
__set_bit(vector, ioapic_handled_vectors);
|
||||
|
||||
/*
|
||||
* Track the highest pending EOI for which the vCPU is NOT the
|
||||
* target in the new routing. Only the EOI for the IRQ that is
|
||||
* in-flight (for the old routing) needs to be intercepted, any
|
||||
* future IRQs that arrive on this vCPU will be coincidental to
|
||||
* the level-triggered routing and don't need to be intercepted.
|
||||
*/
|
||||
if ((int)vector > vcpu->arch.highest_stale_pending_ioapic_eoi)
|
||||
vcpu->arch.highest_stale_pending_ioapic_eoi = vector;
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
|
||||
ulong *ioapic_handled_vectors)
|
||||
{
|
||||
@@ -424,11 +451,11 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
|
||||
|
||||
kvm_set_msi_irq(vcpu->kvm, entry, &irq);
|
||||
|
||||
if (irq.trig_mode &&
|
||||
(kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
|
||||
irq.dest_id, irq.dest_mode) ||
|
||||
kvm_apic_pending_eoi(vcpu, irq.vector)))
|
||||
__set_bit(irq.vector, ioapic_handled_vectors);
|
||||
if (!irq.trig_mode)
|
||||
continue;
|
||||
|
||||
kvm_scan_ioapic_irq(vcpu, irq.dest_id, irq.dest_mode,
|
||||
irq.vector, ioapic_handled_vectors);
|
||||
}
|
||||
}
|
||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||
|
||||
@@ -1459,6 +1459,14 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
|
||||
if (!kvm_ioapic_handles_vector(apic, vector))
|
||||
return;
|
||||
|
||||
/*
|
||||
* If the intercepted EOI is for an IRQ that was pending from previous
|
||||
* routing, then re-scan the I/O APIC routes as EOIs for the IRQ likely
|
||||
* no longer need to be intercepted.
|
||||
*/
|
||||
if (apic->vcpu->arch.highest_stale_pending_ioapic_eoi == vector)
|
||||
kvm_make_request(KVM_REQ_SCAN_IOAPIC, apic->vcpu);
|
||||
|
||||
/* Request a KVM exit to inform the userspace IOAPIC. */
|
||||
if (irqchip_split(apic->vcpu->kvm)) {
|
||||
apic->vcpu->arch.pending_ioapic_eoi = vector;
|
||||
|
||||
@@ -1041,6 +1041,8 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
|
||||
|
||||
nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
|
||||
|
||||
kvm_nested_vmexit_handle_ibrs(vcpu);
|
||||
|
||||
svm_switch_vmcb(svm, &svm->vmcb01);
|
||||
|
||||
/*
|
||||
|
||||
@@ -231,6 +231,8 @@ module_param(tsc_scaling, int, 0444);
|
||||
static bool avic;
|
||||
module_param(avic, bool, 0444);
|
||||
|
||||
module_param(enable_device_posted_irqs, bool, 0444);
|
||||
|
||||
bool __read_mostly dump_invalid_vmcb;
|
||||
module_param(dump_invalid_vmcb, bool, 0644);
|
||||
|
||||
@@ -1484,25 +1486,10 @@ out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static void svm_clear_current_vmcb(struct vmcb *vmcb)
|
||||
{
|
||||
int i;
|
||||
|
||||
for_each_online_cpu(i)
|
||||
cmpxchg(per_cpu_ptr(&svm_data.current_vmcb, i), vmcb, NULL);
|
||||
}
|
||||
|
||||
static void svm_vcpu_free(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
/*
|
||||
* The vmcb page can be recycled, causing a false negative in
|
||||
* svm_vcpu_load(). So, ensure that no logical CPU has this
|
||||
* vmcb page recorded as its current vmcb.
|
||||
*/
|
||||
svm_clear_current_vmcb(svm->vmcb);
|
||||
|
||||
svm_leave_nested(vcpu);
|
||||
svm_free_nested(svm);
|
||||
|
||||
@@ -1616,19 +1603,9 @@ static void svm_prepare_host_switch(struct kvm_vcpu *vcpu)
|
||||
|
||||
static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
|
||||
|
||||
if (vcpu->scheduled_out && !kvm_pause_in_guest(vcpu->kvm))
|
||||
shrink_ple_window(vcpu);
|
||||
|
||||
if (sd->current_vmcb != svm->vmcb) {
|
||||
sd->current_vmcb = svm->vmcb;
|
||||
|
||||
if (!cpu_feature_enabled(X86_FEATURE_IBPB_ON_VMEXIT) &&
|
||||
static_branch_likely(&switch_vcpu_ibpb))
|
||||
indirect_branch_prediction_barrier();
|
||||
}
|
||||
if (kvm_vcpu_apicv_active(vcpu))
|
||||
avic_vcpu_load(vcpu, cpu);
|
||||
}
|
||||
|
||||
@@ -340,8 +340,6 @@ struct svm_cpu_data {
|
||||
struct vmcb *save_area;
|
||||
unsigned long save_area_pa;
|
||||
|
||||
struct vmcb *current_vmcb;
|
||||
|
||||
/* index = sev_asid, value = vmcb pointer */
|
||||
struct vmcb **sev_vmcbs;
|
||||
};
|
||||
|
||||
@@ -301,7 +301,7 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
|
||||
cpu = get_cpu();
|
||||
prev = vmx->loaded_vmcs;
|
||||
vmx->loaded_vmcs = vmcs;
|
||||
vmx_vcpu_load_vmcs(vcpu, cpu, prev);
|
||||
vmx_vcpu_load_vmcs(vcpu, cpu);
|
||||
vmx_sync_vmcs_host_state(vmx, prev);
|
||||
put_cpu();
|
||||
|
||||
@@ -4520,12 +4520,12 @@ static void copy_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
|
||||
|
||||
cpu = get_cpu();
|
||||
vmx->loaded_vmcs = &vmx->nested.vmcs02;
|
||||
vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->vmcs01);
|
||||
vmx_vcpu_load_vmcs(vcpu, cpu);
|
||||
|
||||
sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
|
||||
|
||||
vmx->loaded_vmcs = &vmx->vmcs01;
|
||||
vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->nested.vmcs02);
|
||||
vmx_vcpu_load_vmcs(vcpu, cpu);
|
||||
put_cpu();
|
||||
}
|
||||
|
||||
@@ -5020,16 +5020,7 @@ void __nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
|
||||
|
||||
vmx_switch_vmcs(vcpu, &vmx->vmcs01);
|
||||
|
||||
/*
|
||||
* If IBRS is advertised to the vCPU, KVM must flush the indirect
|
||||
* branch predictors when transitioning from L2 to L1, as L1 expects
|
||||
* hardware (KVM in this case) to provide separate predictor modes.
|
||||
* Bare metal isolates VMX root (host) from VMX non-root (guest), but
|
||||
* doesn't isolate different VMCSs, i.e. in this case, doesn't provide
|
||||
* separate modes for L2 vs L1.
|
||||
*/
|
||||
if (guest_cpu_cap_has(vcpu, X86_FEATURE_SPEC_CTRL))
|
||||
indirect_branch_prediction_barrier();
|
||||
kvm_nested_vmexit_handle_ibrs(vcpu);
|
||||
|
||||
/* Update any VMCS fields that might have changed while L2 ran */
|
||||
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
|
||||
|
||||
@@ -148,9 +148,8 @@ after_clear_sn:
|
||||
|
||||
static bool vmx_can_use_vtd_pi(struct kvm *kvm)
|
||||
{
|
||||
return irqchip_in_kernel(kvm) && enable_apicv &&
|
||||
kvm_arch_has_assigned_device(kvm) &&
|
||||
irq_remapping_cap(IRQ_POSTING_CAP);
|
||||
return irqchip_in_kernel(kvm) && kvm_arch_has_irq_bypass() &&
|
||||
kvm_arch_has_assigned_device(kvm);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -281,7 +280,7 @@ bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
void vmx_pi_start_assignment(struct kvm *kvm)
|
||||
{
|
||||
if (!irq_remapping_cap(IRQ_POSTING_CAP))
|
||||
if (!kvm_arch_has_irq_bypass())
|
||||
return;
|
||||
|
||||
kvm_make_all_cpus_request(kvm, KVM_REQ_UNBLOCK);
|
||||
|
||||
@@ -116,6 +116,8 @@ module_param(enable_apicv, bool, 0444);
|
||||
bool __read_mostly enable_ipiv = true;
|
||||
module_param(enable_ipiv, bool, 0444);
|
||||
|
||||
module_param(enable_device_posted_irqs, bool, 0444);
|
||||
|
||||
/*
|
||||
* If nested=1, nested virtualization is supported, i.e., guests may use
|
||||
* VMX and be a hypervisor for its own guests. If nested=0, guests may not
|
||||
@@ -1443,8 +1445,7 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
}
|
||||
|
||||
void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
|
||||
struct loaded_vmcs *buddy)
|
||||
void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
|
||||
@@ -1471,17 +1472,6 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
|
||||
if (prev != vmx->loaded_vmcs->vmcs) {
|
||||
per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
|
||||
vmcs_load(vmx->loaded_vmcs->vmcs);
|
||||
|
||||
/*
|
||||
* No indirect branch prediction barrier needed when switching
|
||||
* the active VMCS within a vCPU, unless IBRS is advertised to
|
||||
* the vCPU. To minimize the number of IBPBs executed, KVM
|
||||
* performs IBPB on nested VM-Exit (a single nested transition
|
||||
* may switch the active VMCS multiple times).
|
||||
*/
|
||||
if (static_branch_likely(&switch_vcpu_ibpb) &&
|
||||
(!buddy || WARN_ON_ONCE(buddy->vmcs != prev)))
|
||||
indirect_branch_prediction_barrier();
|
||||
}
|
||||
|
||||
if (!already_loaded) {
|
||||
@@ -1520,7 +1510,7 @@ void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
if (vcpu->scheduled_out && !kvm_pause_in_guest(vcpu->kvm))
|
||||
shrink_ple_window(vcpu);
|
||||
|
||||
vmx_vcpu_load_vmcs(vcpu, cpu, NULL);
|
||||
vmx_vcpu_load_vmcs(vcpu, cpu);
|
||||
|
||||
vmx_vcpu_pi_load(vcpu, cpu);
|
||||
}
|
||||
|
||||
@@ -354,8 +354,7 @@ static __always_inline u32 vmx_get_intr_info(struct kvm_vcpu *vcpu)
|
||||
return vt->exit_intr_info;
|
||||
}
|
||||
|
||||
void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
|
||||
struct loaded_vmcs *buddy);
|
||||
void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu);
|
||||
int allocate_vpid(void);
|
||||
void free_vpid(int vpid);
|
||||
void vmx_set_constant_host_state(struct vcpu_vmx *vmx);
|
||||
|
||||
@@ -226,6 +226,9 @@ EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);
|
||||
bool __read_mostly enable_apicv = true;
|
||||
EXPORT_SYMBOL_GPL(enable_apicv);
|
||||
|
||||
bool __read_mostly enable_device_posted_irqs = true;
|
||||
EXPORT_SYMBOL_GPL(enable_device_posted_irqs);
|
||||
|
||||
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
|
||||
KVM_GENERIC_VM_STATS(),
|
||||
STATS_DESC_COUNTER(VM, mmu_shadow_zapped),
|
||||
@@ -4990,6 +4993,8 @@ static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
|
||||
return kvm_arch_has_noncoherent_dma(vcpu->kvm);
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu);
|
||||
|
||||
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
@@ -5012,6 +5017,19 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
|
||||
kvm_x86_call(vcpu_load)(vcpu, cpu);
|
||||
|
||||
if (vcpu != per_cpu(last_vcpu, cpu)) {
|
||||
/*
|
||||
* Flush the branch predictor when switching vCPUs on the same
|
||||
* physical CPU, as each vCPU needs its own branch prediction
|
||||
* domain. No IBPB is needed when switching between L1 and L2
|
||||
* on the same vCPU unless IBRS is advertised to the vCPU; that
|
||||
* is handled on the nested VM-Exit path.
|
||||
*/
|
||||
if (static_branch_likely(&switch_vcpu_ibpb))
|
||||
indirect_branch_prediction_barrier();
|
||||
per_cpu(last_vcpu, cpu) = vcpu;
|
||||
}
|
||||
|
||||
/* Save host pkru register if supported */
|
||||
vcpu->arch.host_pkru = read_pkru();
|
||||
|
||||
@@ -8023,7 +8041,7 @@ static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
|
||||
return rc;
|
||||
|
||||
if (!vcpu->mmio_nr_fragments)
|
||||
return rc;
|
||||
return X86EMUL_CONTINUE;
|
||||
|
||||
gpa = vcpu->mmio_fragments[0].gpa;
|
||||
|
||||
@@ -9811,6 +9829,9 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
|
||||
if (r != 0)
|
||||
goto out_mmu_exit;
|
||||
|
||||
enable_device_posted_irqs &= enable_apicv &&
|
||||
irq_remapping_cap(IRQ_POSTING_CAP);
|
||||
|
||||
kvm_ops_update(ops);
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
@@ -10694,6 +10715,7 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
|
||||
return;
|
||||
|
||||
bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256);
|
||||
vcpu->arch.highest_stale_pending_ioapic_eoi = -1;
|
||||
|
||||
kvm_x86_call(sync_pir_to_irr)(vcpu);
|
||||
|
||||
@@ -12419,13 +12441,16 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
||||
|
||||
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int idx;
|
||||
int idx, cpu;
|
||||
|
||||
kvm_clear_async_pf_completion_queue(vcpu);
|
||||
kvm_mmu_unload(vcpu);
|
||||
|
||||
kvmclock_reset(vcpu);
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
cmpxchg(per_cpu_ptr(&last_vcpu, cpu), vcpu, NULL);
|
||||
|
||||
kvm_x86_call(vcpu_free)(vcpu);
|
||||
|
||||
kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt);
|
||||
|
||||
@@ -121,6 +121,24 @@ static inline void kvm_leave_nested(struct kvm_vcpu *vcpu)
|
||||
kvm_x86_ops.nested_ops->leave_nested(vcpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* If IBRS is advertised to the vCPU, KVM must flush the indirect branch
|
||||
* predictors when transitioning from L2 to L1, as L1 expects hardware (KVM in
|
||||
* this case) to provide separate predictor modes. Bare metal isolates the host
|
||||
* from the guest, but doesn't isolate different guests from one another (in
|
||||
* this case L1 and L2). The exception is if bare metal supports same mode IBRS,
|
||||
* which offers protection within the same mode, and hence protects L1 from L2.
|
||||
*/
|
||||
static inline void kvm_nested_vmexit_handle_ibrs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (cpu_feature_enabled(X86_FEATURE_AMD_IBRS_SAME_MODE))
|
||||
return;
|
||||
|
||||
if (guest_cpu_cap_has(vcpu, X86_FEATURE_SPEC_CTRL) ||
|
||||
guest_cpu_cap_has(vcpu, X86_FEATURE_AMD_IBRS))
|
||||
indirect_branch_prediction_barrier();
|
||||
}
|
||||
|
||||
static inline bool kvm_vcpu_has_run(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.last_vmentry_cpu != -1;
|
||||
|
||||
@@ -336,6 +336,7 @@
|
||||
#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */
|
||||
#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
|
||||
#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */
|
||||
#define X86_FEATURE_AMD_IBRS_SAME_MODE (13*32+19) /* Indirect Branch Restricted Speculation same mode protection*/
|
||||
#define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */
|
||||
#define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */
|
||||
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */
|
||||
|
||||
@@ -5765,7 +5765,6 @@ static int __kvm_io_bus_write(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus,
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/* kvm_io_bus_write - called under kvm->slots_lock */
|
||||
int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
|
||||
int len, const void *val)
|
||||
{
|
||||
@@ -5786,7 +5785,6 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_io_bus_write);
|
||||
|
||||
/* kvm_io_bus_write_cookie - called under kvm->slots_lock */
|
||||
int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
|
||||
gpa_t addr, int len, const void *val, long cookie)
|
||||
{
|
||||
@@ -5836,7 +5834,6 @@ static int __kvm_io_bus_read(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus,
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/* kvm_io_bus_read - called under kvm->slots_lock */
|
||||
int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
|
||||
int len, void *val)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user