Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:

 - Fix a bug where AVIC is incorrectly inhibited when running with
   x2AVIC disabled via module param (or on a system without x2AVIC)

 - Fix a dangling device posted IRQs bug by explicitly checking if the
   irqfd is still active (on the list) when handling an eventfd signal,
   instead of zeroing the irqfd's routing information when the irqfd is
   deassigned.

   Zeroing the irqfd's routing info causes arm64 and x86's to not
   disable posting for the IRQ (kvm_arch_irq_bypass_del_producer() looks
   for an MSI), incorrectly leaving the IRQ in posted mode (and leading
   to use-after-free and memory leaks on AMD in particular).

   This is both the most pressing and scariest, but it's been in -next
   for a while.

 - Disable FORTIFY_SOURCE for KVM selftests to prevent the compiler from
   generating calls to the checked versions of memset() and friends,
   which leads to unexpected page faults in guest code due e.g.
   __memset_chk@plt not being resolved.

 - Explicitly configure the supported XSS capabilities from within
   {svm,vmx}_set_cpu_caps() to fix a bug where VMX will compute the
   reference VMCS configuration with SHSTK and IBT enabled, but then
   compute each CPUs local config with SHSTK and IBT disabled if not all
   CET xfeatures are enabled, e.g. if the kernel is built with
   X86_KERNEL_IBT=n.

   The mismatch in features results in differing nVMX setting, and
   ultimately causes kvm-intel.ko to refuse to load with nested=1.

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: x86: Explicitly configure supported XSS from {svm,vmx}_set_cpu_caps()
  KVM: selftests: Add -U_FORTIFY_SOURCE to avoid some unpredictable test failures
  KVM: x86: Assert that non-MSI doesn't have bypass vCPU when deleting producer
  KVM: Don't clobber irqfd routing type when deassigning irqfd
  KVM: SVM: Check vCPU ID against max x2AVIC ID if and only if x2AVIC is enabled
This commit is contained in:
Linus Torvalds
2026-02-04 10:38:56 -08:00
8 changed files with 52 additions and 36 deletions

View File

@@ -514,7 +514,8 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
*/
spin_lock_irq(&kvm->irqfds.lock);
if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI) {
if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI ||
WARN_ON_ONCE(irqfd->irq_bypass_vcpu)) {
ret = kvm_pi_update_irte(irqfd, NULL);
if (ret)
pr_info("irq bypass consumer (eventfd %p) unregistration fails: %d\n",

View File

@@ -376,6 +376,7 @@ void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb)
static int avic_init_backing_page(struct kvm_vcpu *vcpu)
{
u32 max_id = x2avic_enabled ? x2avic_max_physical_id : AVIC_MAX_PHYSICAL_ID;
struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
struct vcpu_svm *svm = to_svm(vcpu);
u32 id = vcpu->vcpu_id;
@@ -388,8 +389,7 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
* avic_vcpu_load() expects to be called if and only if the vCPU has
* fully initialized AVIC.
*/
if ((!x2avic_enabled && id > AVIC_MAX_PHYSICAL_ID) ||
(id > x2avic_max_physical_id)) {
if (id > max_id) {
kvm_set_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_PHYSICAL_ID_TOO_BIG);
vcpu->arch.apic->apicv_active = false;
return 0;

View File

@@ -5284,6 +5284,8 @@ static __init void svm_set_cpu_caps(void)
*/
kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT);
kvm_cpu_cap_clear(X86_FEATURE_MSR_IMM);
kvm_setup_xss_caps();
}
static __init int svm_hardware_setup(void)

View File

@@ -8051,6 +8051,8 @@ static __init void vmx_set_cpu_caps(void)
kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
kvm_cpu_cap_clear(X86_FEATURE_IBT);
}
kvm_setup_xss_caps();
}
static bool vmx_is_io_intercepted(struct kvm_vcpu *vcpu,

View File

@@ -9953,6 +9953,23 @@ static struct notifier_block pvclock_gtod_notifier = {
};
#endif
void kvm_setup_xss_caps(void)
{
if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
kvm_caps.supported_xss = 0;
if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
!kvm_cpu_cap_has(X86_FEATURE_IBT))
kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
if ((kvm_caps.supported_xss & XFEATURE_MASK_CET_ALL) != XFEATURE_MASK_CET_ALL) {
kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
kvm_cpu_cap_clear(X86_FEATURE_IBT);
kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
}
}
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_setup_xss_caps);
static inline void kvm_ops_update(struct kvm_x86_init_ops *ops)
{
memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
@@ -10125,19 +10142,6 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
if (!tdp_enabled)
kvm_caps.supported_quirks &= ~KVM_X86_QUIRK_IGNORE_GUEST_PAT;
if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
kvm_caps.supported_xss = 0;
if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
!kvm_cpu_cap_has(X86_FEATURE_IBT))
kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
if ((kvm_caps.supported_xss & XFEATURE_MASK_CET_ALL) != XFEATURE_MASK_CET_ALL) {
kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
kvm_cpu_cap_clear(X86_FEATURE_IBT);
kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
}
if (kvm_caps.has_tsc_control) {
/*
* Make sure the user can only configure tsc_khz values that

View File

@@ -471,6 +471,8 @@ extern struct kvm_host_values kvm_host;
extern bool enable_pmu;
void kvm_setup_xss_caps(void);
/*
* Get a filtered version of KVM's supported XCR0 that strips out dynamic
* features for which the current process doesn't (yet) have permission to use.

View File

@@ -251,6 +251,7 @@ LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
-Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \
-U_FORTIFY_SOURCE \
-fno-builtin-memcmp -fno-builtin-memcpy \
-fno-builtin-memset -fno-builtin-strnlen \
-fno-stack-protector -fno-PIE -fno-strict-aliasing \

View File

@@ -157,21 +157,28 @@ irqfd_shutdown(struct work_struct *work)
}
/* assumes kvm->irqfds.lock is held */
static bool
irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
static bool irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
{
/*
* Assert that either irqfds.lock or SRCU is held, as irqfds.lock must
* be held to prevent false positives (on the irqfd being active), and
* while false negatives are impossible as irqfds are never added back
* to the list once they're deactivated, the caller must at least hold
* SRCU to guard against routing changes if the irqfd is deactivated.
*/
lockdep_assert_once(lockdep_is_held(&irqfd->kvm->irqfds.lock) ||
srcu_read_lock_held(&irqfd->kvm->irq_srcu));
return list_empty(&irqfd->list) ? false : true;
}
/*
* Mark the irqfd as inactive and schedule it for removal
*
* assumes kvm->irqfds.lock is held
*/
static void
irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
static void irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
{
lockdep_assert_held(&irqfd->kvm->irqfds.lock);
BUG_ON(!irqfd_is_active(irqfd));
list_del_init(&irqfd->list);
@@ -217,8 +224,15 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
seq = read_seqcount_begin(&irqfd->irq_entry_sc);
irq = irqfd->irq_entry;
} while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
/* An event has been signaled, inject an interrupt */
if (kvm_arch_set_irq_inatomic(&irq, kvm,
/*
* An event has been signaled, inject an interrupt unless the
* irqfd is being deassigned (isn't active), in which case the
* routing information may be stale (once the irqfd is removed
* from the list, it will stop receiving routing updates).
*/
if (unlikely(!irqfd_is_active(irqfd)) ||
kvm_arch_set_irq_inatomic(&irq, kvm,
KVM_USERSPACE_IRQ_SOURCE_ID, 1,
false) == -EWOULDBLOCK)
schedule_work(&irqfd->inject);
@@ -585,18 +599,8 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
spin_lock_irq(&kvm->irqfds.lock);
list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
/*
* This clearing of irq_entry.type is needed for when
* another thread calls kvm_irq_routing_update before
* we flush workqueue below (we synchronize with
* kvm_irq_routing_update using irqfds.lock).
*/
write_seqcount_begin(&irqfd->irq_entry_sc);
irqfd->irq_entry.type = 0;
write_seqcount_end(&irqfd->irq_entry_sc);
if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi)
irqfd_deactivate(irqfd);
}
}
spin_unlock_irq(&kvm->irqfds.lock);