mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-04 20:19:47 +08:00 
			
		
		
		
	 137e553135
			
		
	
	
		137e553135
		
	
	
	
	
		
			
			With the VF Token interface we can now expect that a vfio userspace driver must be in collaboration with the PF driver, an unwitting userspace driver will not be able to get past the GET_DEVICE_FD step in accessing the device. We can now move on to actually allowing SR-IOV to be enabled by vfio-pci on the PF. Support for this is not enabled by default in this commit, but it does provide a module option for this to be enabled (enable_sriov=1). Enabling VFs is rather straightforward, except we don't want to risk that a VF might get autoprobed and bound to other drivers, so a bus notifier is used to "capture" VFs to vfio-pci using the driver_override support. We assume any later action to bind the device to other drivers is condoned by the system admin and allow it with a log warning. vfio-pci will disable SR-IOV on a PF before releasing the device, allowing a VF driver to be assured other drivers cannot take over the PF and that any other userspace driver must know the shared VF token. This support also does not provide a mechanism for the PF userspace driver itself to manipulate SR-IOV through the vfio API. With this patch SR-IOV can only be enabled via the host sysfs interface and the PF driver user cannot create or remove VFs. Reviewed-by: Cornelia Huck <cohuck@redhat.com> Reviewed-by: Kevin Tian <kevin.tian@intel.com> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
		
			
				
	
	
		
			200 lines
		
	
	
		
			5.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			200 lines
		
	
	
		
			5.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /* SPDX-License-Identifier: GPL-2.0-only */
 | |
| /*
 | |
|  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
 | |
|  *     Author: Alex Williamson <alex.williamson@redhat.com>
 | |
|  *
 | |
|  * Derived from original vfio:
 | |
|  * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
 | |
|  * Author: Tom Lyon, pugs@cisco.com
 | |
|  */
 | |
| 
 | |
| #include <linux/mutex.h>
 | |
| #include <linux/pci.h>
 | |
| #include <linux/irqbypass.h>
 | |
| #include <linux/types.h>
 | |
| #include <linux/uuid.h>
 | |
| #include <linux/notifier.h>
 | |
| 
 | |
| #ifndef VFIO_PCI_PRIVATE_H
 | |
| #define VFIO_PCI_PRIVATE_H
 | |
| 
 | |
| #define VFIO_PCI_OFFSET_SHIFT   40
 | |
| 
 | |
| #define VFIO_PCI_OFFSET_TO_INDEX(off)	(off >> VFIO_PCI_OFFSET_SHIFT)
 | |
| #define VFIO_PCI_INDEX_TO_OFFSET(index)	((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
 | |
| #define VFIO_PCI_OFFSET_MASK	(((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
 | |
| 
 | |
| /* Special capability IDs predefined access */
 | |
| #define PCI_CAP_ID_INVALID		0xFF	/* default raw access */
 | |
| #define PCI_CAP_ID_INVALID_VIRT		0xFE	/* default virt access */
 | |
| 
 | |
| /* Cap maximum number of ioeventfds per device (arbitrary) */
 | |
| #define VFIO_PCI_IOEVENTFD_MAX		1000
 | |
| 
 | |
| struct vfio_pci_ioeventfd {
 | |
| 	struct list_head	next;
 | |
| 	struct virqfd		*virqfd;
 | |
| 	void __iomem		*addr;
 | |
| 	uint64_t		data;
 | |
| 	loff_t			pos;
 | |
| 	int			bar;
 | |
| 	int			count;
 | |
| };
 | |
| 
 | |
| struct vfio_pci_irq_ctx {
 | |
| 	struct eventfd_ctx	*trigger;
 | |
| 	struct virqfd		*unmask;
 | |
| 	struct virqfd		*mask;
 | |
| 	char			*name;
 | |
| 	bool			masked;
 | |
| 	struct irq_bypass_producer	producer;
 | |
| };
 | |
| 
 | |
| struct vfio_pci_device;
 | |
| struct vfio_pci_region;
 | |
| 
 | |
| struct vfio_pci_regops {
 | |
| 	size_t	(*rw)(struct vfio_pci_device *vdev, char __user *buf,
 | |
| 		      size_t count, loff_t *ppos, bool iswrite);
 | |
| 	void	(*release)(struct vfio_pci_device *vdev,
 | |
| 			   struct vfio_pci_region *region);
 | |
| 	int	(*mmap)(struct vfio_pci_device *vdev,
 | |
| 			struct vfio_pci_region *region,
 | |
| 			struct vm_area_struct *vma);
 | |
| 	int	(*add_capability)(struct vfio_pci_device *vdev,
 | |
| 				  struct vfio_pci_region *region,
 | |
| 				  struct vfio_info_cap *caps);
 | |
| };
 | |
| 
 | |
| struct vfio_pci_region {
 | |
| 	u32				type;
 | |
| 	u32				subtype;
 | |
| 	const struct vfio_pci_regops	*ops;
 | |
| 	void				*data;
 | |
| 	size_t				size;
 | |
| 	u32				flags;
 | |
| };
 | |
| 
 | |
| struct vfio_pci_dummy_resource {
 | |
| 	struct resource		resource;
 | |
| 	int			index;
 | |
| 	struct list_head	res_next;
 | |
| };
 | |
| 
 | |
| struct vfio_pci_reflck {
 | |
| 	struct kref		kref;
 | |
| 	struct mutex		lock;
 | |
| };
 | |
| 
 | |
| struct vfio_pci_vf_token {
 | |
| 	struct mutex		lock;
 | |
| 	uuid_t			uuid;
 | |
| 	int			users;
 | |
| };
 | |
| 
 | |
| struct vfio_pci_device {
 | |
| 	struct pci_dev		*pdev;
 | |
| 	void __iomem		*barmap[PCI_STD_NUM_BARS];
 | |
| 	bool			bar_mmap_supported[PCI_STD_NUM_BARS];
 | |
| 	u8			*pci_config_map;
 | |
| 	u8			*vconfig;
 | |
| 	struct perm_bits	*msi_perm;
 | |
| 	spinlock_t		irqlock;
 | |
| 	struct mutex		igate;
 | |
| 	struct vfio_pci_irq_ctx	*ctx;
 | |
| 	int			num_ctx;
 | |
| 	int			irq_type;
 | |
| 	int			num_regions;
 | |
| 	struct vfio_pci_region	*region;
 | |
| 	u8			msi_qmax;
 | |
| 	u8			msix_bar;
 | |
| 	u16			msix_size;
 | |
| 	u32			msix_offset;
 | |
| 	u32			rbar[7];
 | |
| 	bool			pci_2_3;
 | |
| 	bool			virq_disabled;
 | |
| 	bool			reset_works;
 | |
| 	bool			extended_caps;
 | |
| 	bool			bardirty;
 | |
| 	bool			has_vga;
 | |
| 	bool			needs_reset;
 | |
| 	bool			nointx;
 | |
| 	bool			needs_pm_restore;
 | |
| 	struct pci_saved_state	*pci_saved_state;
 | |
| 	struct pci_saved_state	*pm_save;
 | |
| 	struct vfio_pci_reflck	*reflck;
 | |
| 	int			refcnt;
 | |
| 	int			ioeventfds_nr;
 | |
| 	struct eventfd_ctx	*err_trigger;
 | |
| 	struct eventfd_ctx	*req_trigger;
 | |
| 	struct list_head	dummy_resources_list;
 | |
| 	struct mutex		ioeventfds_lock;
 | |
| 	struct list_head	ioeventfds_list;
 | |
| 	struct vfio_pci_vf_token	*vf_token;
 | |
| 	struct notifier_block	nb;
 | |
| };
 | |
| 
 | |
| #define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)
 | |
| #define is_msi(vdev) (vdev->irq_type == VFIO_PCI_MSI_IRQ_INDEX)
 | |
| #define is_msix(vdev) (vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX)
 | |
| #define is_irq_none(vdev) (!(is_intx(vdev) || is_msi(vdev) || is_msix(vdev)))
 | |
| #define irq_is(vdev, type) (vdev->irq_type == type)
 | |
| 
 | |
| extern void vfio_pci_intx_mask(struct vfio_pci_device *vdev);
 | |
| extern void vfio_pci_intx_unmask(struct vfio_pci_device *vdev);
 | |
| 
 | |
| extern int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev,
 | |
| 				   uint32_t flags, unsigned index,
 | |
| 				   unsigned start, unsigned count, void *data);
 | |
| 
 | |
| extern ssize_t vfio_pci_config_rw(struct vfio_pci_device *vdev,
 | |
| 				  char __user *buf, size_t count,
 | |
| 				  loff_t *ppos, bool iswrite);
 | |
| 
 | |
| extern ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
 | |
| 			       size_t count, loff_t *ppos, bool iswrite);
 | |
| 
 | |
| extern ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
 | |
| 			       size_t count, loff_t *ppos, bool iswrite);
 | |
| 
 | |
| extern long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset,
 | |
| 			       uint64_t data, int count, int fd);
 | |
| 
 | |
| extern int vfio_pci_init_perm_bits(void);
 | |
| extern void vfio_pci_uninit_perm_bits(void);
 | |
| 
 | |
| extern int vfio_config_init(struct vfio_pci_device *vdev);
 | |
| extern void vfio_config_free(struct vfio_pci_device *vdev);
 | |
| 
 | |
| extern int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
 | |
| 					unsigned int type, unsigned int subtype,
 | |
| 					const struct vfio_pci_regops *ops,
 | |
| 					size_t size, u32 flags, void *data);
 | |
| 
 | |
| extern int vfio_pci_set_power_state(struct vfio_pci_device *vdev,
 | |
| 				    pci_power_t state);
 | |
| 
 | |
| #ifdef CONFIG_VFIO_PCI_IGD
 | |
| extern int vfio_pci_igd_init(struct vfio_pci_device *vdev);
 | |
| #else
 | |
| static inline int vfio_pci_igd_init(struct vfio_pci_device *vdev)
 | |
| {
 | |
| 	return -ENODEV;
 | |
| }
 | |
| #endif
 | |
| #ifdef CONFIG_VFIO_PCI_NVLINK2
 | |
| extern int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev);
 | |
| extern int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev);
 | |
| #else
 | |
| static inline int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev)
 | |
| {
 | |
| 	return -ENODEV;
 | |
| }
 | |
| 
 | |
| static inline int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev)
 | |
| {
 | |
| 	return -ENODEV;
 | |
| }
 | |
| #endif
 | |
| #endif /* VFIO_PCI_PRIVATE_H */
 |