mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-03-21 23:16:50 +08:00
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio updates from Michael Tsirkin: - in-order support in virtio core - multiple address space support in vduse - fixes, cleanups all over the place, notably dma alignment fixes for non-cache-coherent systems * tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (59 commits) vduse: avoid adding implicit padding vhost: fix caching attributes of MMIO regions by setting them explicitly vdpa/mlx5: update MAC address handling in mlx5_vdpa_set_attr() vdpa/mlx5: reuse common function for MAC address updates vdpa/mlx5: update mlx_features with driver state check crypto: virtio: Replace package id with numa node id crypto: virtio: Remove duplicated virtqueue_kick in virtio_crypto_skcipher_crypt_req crypto: virtio: Add spinlock protection with virtqueue notification Documentation: Add documentation for VDUSE Address Space IDs vduse: bump version number vduse: add vq group asid support vduse: merge tree search logic of IOTLB_GET_FD and IOTLB_GET_INFO ioctls vduse: take out allocations from vduse_dev_alloc_coherent vduse: remove unused vaddr parameter of vduse_domain_free_coherent vduse: refactor vdpa_dev_add for goto err handling vhost: forbid change vq groups ASID if DRIVER_OK is set vdpa: document set_group_asid thread safety vduse: return internal vq group struct as map token vduse: add vq group support vduse: add v1 API definition ...
This commit is contained in:
@@ -146,6 +146,58 @@ What about block I/O and networking buffers? The block I/O and
|
||||
networking subsystems make sure that the buffers they use are valid
|
||||
for you to DMA from/to.
|
||||
|
||||
__dma_from_device_group_begin/end annotations
|
||||
=============================================
|
||||
|
||||
As explained previously, when a structure contains a DMA_FROM_DEVICE /
|
||||
DMA_BIDIRECTIONAL buffer (device writes to memory) alongside fields that the
|
||||
CPU writes to, cache line sharing between the DMA buffer and CPU-written fields
|
||||
can cause data corruption on CPUs with DMA-incoherent caches.
|
||||
|
||||
The ``__dma_from_device_group_begin(GROUP)/__dma_from_device_group_end(GROUP)``
|
||||
macros ensure proper alignment to prevent this::
|
||||
|
||||
struct my_device {
|
||||
spinlock_t lock1;
|
||||
__dma_from_device_group_begin();
|
||||
char dma_buffer1[16];
|
||||
char dma_buffer2[16];
|
||||
__dma_from_device_group_end();
|
||||
spinlock_t lock2;
|
||||
};
|
||||
|
||||
To isolate a DMA buffer from adjacent fields, use
|
||||
``__dma_from_device_group_begin(GROUP)`` before the first DMA buffer
|
||||
field and ``__dma_from_device_group_end(GROUP)`` after the last DMA
|
||||
buffer field (with the same GROUP name). This protects both the head
|
||||
and tail of the buffer from cache line sharing.
|
||||
|
||||
The GROUP parameter is an optional identifier that names the DMA buffer group
|
||||
(in case you have several in the same structure)::
|
||||
|
||||
struct my_device {
|
||||
spinlock_t lock1;
|
||||
__dma_from_device_group_begin(buffer1);
|
||||
char dma_buffer1[16];
|
||||
__dma_from_device_group_end(buffer1);
|
||||
spinlock_t lock2;
|
||||
__dma_from_device_group_begin(buffer2);
|
||||
char dma_buffer2[16];
|
||||
__dma_from_device_group_end(buffer2);
|
||||
};
|
||||
|
||||
On cache-coherent platforms these macros expand to zero-length array markers.
|
||||
On non-coherent platforms, they also ensure the minimal DMA alignment, which
|
||||
can be as large as 128 bytes.
|
||||
|
||||
.. note::
|
||||
|
||||
It is allowed (though somewhat fragile) to include extra fields, not
|
||||
intended for DMA from the device, within the group (in order to pack the
|
||||
structure tightly) - but only as long as the CPU does not write these
|
||||
fields while any fields in the group are mapped for DMA_FROM_DEVICE or
|
||||
DMA_BIDIRECTIONAL.
|
||||
|
||||
DMA addressing capabilities
|
||||
===========================
|
||||
|
||||
|
||||
@@ -148,3 +148,12 @@ DMA_ATTR_MMIO is appropriate.
|
||||
For architectures that require cache flushing for DMA coherence
|
||||
DMA_ATTR_MMIO will not perform any cache flushing. The address
|
||||
provided must never be mapped cacheable into the CPU.
|
||||
|
||||
DMA_ATTR_CPU_CACHE_CLEAN
|
||||
------------------------
|
||||
|
||||
This attribute indicates the CPU will not dirty any cacheline overlapping this
|
||||
DMA_FROM_DEVICE/DMA_BIDIRECTIONAL buffer while it is mapped. This allows
|
||||
multiple small buffers to safely share a cacheline without risk of data
|
||||
corruption, suppressing DMA debug warnings about overlapping mappings.
|
||||
All mappings sharing a cacheline should have this attribute.
|
||||
|
||||
@@ -230,4 +230,57 @@ able to start the dataplane processing as follows:
|
||||
5. Inject an interrupt for specific virtqueue with the VDUSE_INJECT_VQ_IRQ ioctl
|
||||
after the used ring is filled.
|
||||
|
||||
Enabling ASID (API version 1)
|
||||
------------------------------
|
||||
|
||||
VDUSE supports per-address-space identifiers (ASIDs) starting with API
|
||||
version 1. Set it up with ioctl(VDUSE_SET_API_VERSION) on `/dev/vduse/control`
|
||||
and pass `VDUSE_API_VERSION_1` before creating a new VDUSE instance with
|
||||
ioctl(VDUSE_CREATE_DEV).
|
||||
|
||||
Afterwards, you can use the member asid of ioctl(VDUSE_VQ_SETUP) argument to
|
||||
select the address space of the IOTLB you are querying. The driver could
|
||||
change the address space of any virtqueue group by using the
|
||||
VDUSE_SET_VQ_GROUP_ASID VDUSE message type, and the VDUSE instance needs to
|
||||
reply with VDUSE_REQ_RESULT_OK if it was possible to change it.
|
||||
|
||||
Similarly, you can use ioctl(VDUSE_IOTLB_GET_FD2) to obtain the file descriptor
|
||||
describing an IOVA region of a specific ASID. Example usage:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
static void *iova_to_va(int dev_fd, uint32_t asid, uint64_t iova,
|
||||
uint64_t *len)
|
||||
{
|
||||
int fd;
|
||||
void *addr;
|
||||
size_t size;
|
||||
struct vduse_iotlb_entry_v2 entry = { 0 };
|
||||
|
||||
entry.v1.start = iova;
|
||||
entry.v1.last = iova;
|
||||
entry.asid = asid;
|
||||
|
||||
fd = ioctl(dev_fd, VDUSE_IOTLB_GET_FD2, &entry);
|
||||
if (fd < 0)
|
||||
return NULL;
|
||||
|
||||
size = entry.v1.last - entry.v1.start + 1;
|
||||
*len = entry.v1.last - iova + 1;
|
||||
addr = mmap(0, size, perm_to_prot(entry.v1.perm), MAP_SHARED,
|
||||
fd, entry.v1.offset);
|
||||
close(fd);
|
||||
if (addr == MAP_FAILED)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Using some data structures such as linked list to store
|
||||
* the iotlb mapping. The munmap(2) should be called for the
|
||||
* cached mapping when the corresponding VDUSE_UPDATE_IOTLB
|
||||
* message is received or the device is reset.
|
||||
*/
|
||||
|
||||
return addr + iova - entry.v1.start;
|
||||
}
|
||||
|
||||
For more details on the uAPI, please see include/uapi/linux/vduse.h.
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/virtio.h>
|
||||
#include <linux/virtio_rng.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
@@ -28,11 +29,13 @@ struct virtrng_info {
|
||||
unsigned int data_avail;
|
||||
unsigned int data_idx;
|
||||
/* minimal size returned by rng_buffer_size() */
|
||||
__dma_from_device_group_begin();
|
||||
#if SMP_CACHE_BYTES < 32
|
||||
u8 data[32];
|
||||
#else
|
||||
u8 data[SMP_CACHE_BYTES];
|
||||
#endif
|
||||
__dma_from_device_group_end();
|
||||
};
|
||||
|
||||
static void random_recv_done(struct virtqueue *vq)
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
*/
|
||||
|
||||
#include <linux/completion.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/gpio/driver.h>
|
||||
#include <linux/io.h>
|
||||
@@ -24,9 +25,13 @@
|
||||
struct virtio_gpio_line {
|
||||
struct mutex lock; /* Protects line operation */
|
||||
struct completion completion;
|
||||
struct virtio_gpio_request req ____cacheline_aligned;
|
||||
struct virtio_gpio_response res ____cacheline_aligned;
|
||||
|
||||
unsigned int rxlen;
|
||||
|
||||
__dma_from_device_group_begin();
|
||||
struct virtio_gpio_request req;
|
||||
struct virtio_gpio_response res;
|
||||
__dma_from_device_group_end();
|
||||
};
|
||||
|
||||
struct vgpio_irq_line {
|
||||
@@ -37,8 +42,10 @@ struct vgpio_irq_line {
|
||||
bool update_pending;
|
||||
bool queue_pending;
|
||||
|
||||
struct virtio_gpio_irq_request ireq ____cacheline_aligned;
|
||||
struct virtio_gpio_irq_response ires ____cacheline_aligned;
|
||||
__dma_from_device_group_begin();
|
||||
struct virtio_gpio_irq_request ireq;
|
||||
struct virtio_gpio_irq_response ires;
|
||||
__dma_from_device_group_end();
|
||||
};
|
||||
|
||||
struct virtio_gpio {
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
#include <scsi/scsi_tcq.h>
|
||||
#include <scsi/scsi_devinfo.h>
|
||||
#include <linux/seqlock.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
|
||||
#include "sd.h"
|
||||
|
||||
@@ -61,7 +62,7 @@ struct virtio_scsi_cmd {
|
||||
|
||||
struct virtio_scsi_event_node {
|
||||
struct virtio_scsi *vscsi;
|
||||
struct virtio_scsi_event event;
|
||||
struct virtio_scsi_event *event;
|
||||
struct work_struct work;
|
||||
};
|
||||
|
||||
@@ -89,6 +90,11 @@ struct virtio_scsi {
|
||||
|
||||
struct virtio_scsi_vq ctrl_vq;
|
||||
struct virtio_scsi_vq event_vq;
|
||||
|
||||
__dma_from_device_group_begin();
|
||||
struct virtio_scsi_event events[VIRTIO_SCSI_EVENT_LEN];
|
||||
__dma_from_device_group_end();
|
||||
|
||||
struct virtio_scsi_vq req_vqs[];
|
||||
};
|
||||
|
||||
@@ -237,12 +243,12 @@ static int virtscsi_kick_event(struct virtio_scsi *vscsi,
|
||||
unsigned long flags;
|
||||
|
||||
INIT_WORK(&event_node->work, virtscsi_handle_event);
|
||||
sg_init_one(&sg, &event_node->event, sizeof(struct virtio_scsi_event));
|
||||
sg_init_one(&sg, event_node->event, sizeof(struct virtio_scsi_event));
|
||||
|
||||
spin_lock_irqsave(&vscsi->event_vq.vq_lock, flags);
|
||||
|
||||
err = virtqueue_add_inbuf(vscsi->event_vq.vq, &sg, 1, event_node,
|
||||
GFP_ATOMIC);
|
||||
err = virtqueue_add_inbuf_cache_clean(vscsi->event_vq.vq, &sg, 1, event_node,
|
||||
GFP_ATOMIC);
|
||||
if (!err)
|
||||
virtqueue_kick(vscsi->event_vq.vq);
|
||||
|
||||
@@ -257,6 +263,7 @@ static int virtscsi_kick_event_all(struct virtio_scsi *vscsi)
|
||||
|
||||
for (i = 0; i < VIRTIO_SCSI_EVENT_LEN; i++) {
|
||||
vscsi->event_list[i].vscsi = vscsi;
|
||||
vscsi->event_list[i].event = &vscsi->events[i];
|
||||
virtscsi_kick_event(vscsi, &vscsi->event_list[i]);
|
||||
}
|
||||
|
||||
@@ -380,7 +387,7 @@ static void virtscsi_handle_event(struct work_struct *work)
|
||||
struct virtio_scsi_event_node *event_node =
|
||||
container_of(work, struct virtio_scsi_event_node, work);
|
||||
struct virtio_scsi *vscsi = event_node->vscsi;
|
||||
struct virtio_scsi_event *event = &event_node->event;
|
||||
struct virtio_scsi_event *event = event_node->event;
|
||||
|
||||
if (event->event &
|
||||
cpu_to_virtio32(vscsi->vdev, VIRTIO_SCSI_T_EVENTS_MISSED)) {
|
||||
|
||||
@@ -2125,6 +2125,74 @@ static void teardown_steering(struct mlx5_vdpa_net *ndev)
|
||||
mlx5_destroy_flow_table(ndev->rxft);
|
||||
}
|
||||
|
||||
static int mlx5_vdpa_change_mac(struct mlx5_vdpa_net *ndev,
|
||||
struct mlx5_core_dev *pfmdev,
|
||||
const u8 *new_mac)
|
||||
{
|
||||
struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
|
||||
u8 old_mac[ETH_ALEN];
|
||||
|
||||
if (is_zero_ether_addr(new_mac))
|
||||
return -EINVAL;
|
||||
|
||||
if (!is_zero_ether_addr(ndev->config.mac)) {
|
||||
if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
|
||||
mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
|
||||
ndev->config.mac);
|
||||
return -EIO;
|
||||
}
|
||||
}
|
||||
|
||||
if (mlx5_mpfs_add_mac(pfmdev, (u8 *)new_mac)) {
|
||||
mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
|
||||
new_mac);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/* backup the original mac address so that if failed to add the forward rules
|
||||
* we could restore it
|
||||
*/
|
||||
ether_addr_copy(old_mac, ndev->config.mac);
|
||||
|
||||
ether_addr_copy(ndev->config.mac, new_mac);
|
||||
|
||||
/* Need recreate the flow table entry, so that the packet could forward back
|
||||
*/
|
||||
mac_vlan_del(ndev, old_mac, 0, false);
|
||||
|
||||
if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
|
||||
mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
|
||||
|
||||
/* Although it hardly run here, we still need double check */
|
||||
if (is_zero_ether_addr(old_mac)) {
|
||||
mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/* Try to restore original mac address to MFPS table, and try to restore
|
||||
* the forward rule entry.
|
||||
*/
|
||||
if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
|
||||
mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n",
|
||||
ndev->config.mac);
|
||||
}
|
||||
|
||||
if (mlx5_mpfs_add_mac(pfmdev, old_mac)) {
|
||||
mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n",
|
||||
old_mac);
|
||||
}
|
||||
|
||||
ether_addr_copy(ndev->config.mac, old_mac);
|
||||
|
||||
if (mac_vlan_add(ndev, ndev->config.mac, 0, false))
|
||||
mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n");
|
||||
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
|
||||
{
|
||||
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
|
||||
@@ -2132,12 +2200,13 @@ static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
|
||||
virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
|
||||
struct mlx5_core_dev *pfmdev;
|
||||
size_t read;
|
||||
u8 mac[ETH_ALEN], mac_back[ETH_ALEN];
|
||||
u8 mac[ETH_ALEN];
|
||||
|
||||
pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
|
||||
switch (cmd) {
|
||||
case VIRTIO_NET_CTRL_MAC_ADDR_SET:
|
||||
read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
|
||||
read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov,
|
||||
(void *)mac, ETH_ALEN);
|
||||
if (read != ETH_ALEN)
|
||||
break;
|
||||
|
||||
@@ -2145,66 +2214,8 @@ static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
|
||||
status = VIRTIO_NET_OK;
|
||||
break;
|
||||
}
|
||||
|
||||
if (is_zero_ether_addr(mac))
|
||||
break;
|
||||
|
||||
if (!is_zero_ether_addr(ndev->config.mac)) {
|
||||
if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
|
||||
mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
|
||||
ndev->config.mac);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (mlx5_mpfs_add_mac(pfmdev, mac)) {
|
||||
mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
|
||||
mac);
|
||||
break;
|
||||
}
|
||||
|
||||
/* backup the original mac address so that if failed to add the forward rules
|
||||
* we could restore it
|
||||
*/
|
||||
memcpy(mac_back, ndev->config.mac, ETH_ALEN);
|
||||
|
||||
memcpy(ndev->config.mac, mac, ETH_ALEN);
|
||||
|
||||
/* Need recreate the flow table entry, so that the packet could forward back
|
||||
*/
|
||||
mac_vlan_del(ndev, mac_back, 0, false);
|
||||
|
||||
if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
|
||||
mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
|
||||
|
||||
/* Although it hardly run here, we still need double check */
|
||||
if (is_zero_ether_addr(mac_back)) {
|
||||
mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n");
|
||||
break;
|
||||
}
|
||||
|
||||
/* Try to restore original mac address to MFPS table, and try to restore
|
||||
* the forward rule entry.
|
||||
*/
|
||||
if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
|
||||
mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n",
|
||||
ndev->config.mac);
|
||||
}
|
||||
|
||||
if (mlx5_mpfs_add_mac(pfmdev, mac_back)) {
|
||||
mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n",
|
||||
mac_back);
|
||||
}
|
||||
|
||||
memcpy(ndev->config.mac, mac_back, ETH_ALEN);
|
||||
|
||||
if (mac_vlan_add(ndev, ndev->config.mac, 0, false))
|
||||
mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n");
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
status = VIRTIO_NET_OK;
|
||||
status = mlx5_vdpa_change_mac(ndev, pfmdev, mac) ? VIRTIO_NET_ERR :
|
||||
VIRTIO_NET_OK;
|
||||
break;
|
||||
|
||||
default:
|
||||
@@ -3640,9 +3651,6 @@ static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
|
||||
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
|
||||
int err = 0;
|
||||
|
||||
if (group >= MLX5_VDPA_NUMVQ_GROUPS)
|
||||
return -EINVAL;
|
||||
|
||||
mvdev->mres.group2asid[group] = asid;
|
||||
|
||||
mutex_lock(&mvdev->mres.lock);
|
||||
@@ -4044,7 +4052,6 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
|
||||
static int mlx5_vdpa_set_attr(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev,
|
||||
const struct vdpa_dev_set_config *add_config)
|
||||
{
|
||||
struct virtio_net_config *config;
|
||||
struct mlx5_core_dev *pfmdev;
|
||||
struct mlx5_vdpa_dev *mvdev;
|
||||
struct mlx5_vdpa_net *ndev;
|
||||
@@ -4054,16 +4061,23 @@ static int mlx5_vdpa_set_attr(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
|
||||
mvdev = to_mvdev(dev);
|
||||
ndev = to_mlx5_vdpa_ndev(mvdev);
|
||||
mdev = mvdev->mdev;
|
||||
config = &ndev->config;
|
||||
|
||||
down_write(&ndev->reslock);
|
||||
if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
|
||||
|
||||
if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
|
||||
if (!(ndev->mvdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
|
||||
ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC);
|
||||
} else {
|
||||
mlx5_vdpa_warn(mvdev, "device running, skip updating MAC\n");
|
||||
err = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
|
||||
err = mlx5_mpfs_add_mac(pfmdev, config->mac);
|
||||
if (!err)
|
||||
ether_addr_copy(config->mac, add_config->net.mac);
|
||||
err = mlx5_vdpa_change_mac(ndev, pfmdev,
|
||||
(u8 *)add_config->net.mac);
|
||||
}
|
||||
|
||||
out:
|
||||
up_write(&ndev->reslock);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -606,12 +606,6 @@ static int vdpasim_set_group_asid(struct vdpa_device *vdpa, unsigned int group,
|
||||
struct vhost_iotlb *iommu;
|
||||
int i;
|
||||
|
||||
if (group > vdpasim->dev_attr.ngroups)
|
||||
return -EINVAL;
|
||||
|
||||
if (asid >= vdpasim->dev_attr.nas)
|
||||
return -EINVAL;
|
||||
|
||||
iommu = &vdpasim->iommu[asid];
|
||||
|
||||
mutex_lock(&vdpasim->mutex);
|
||||
|
||||
@@ -493,17 +493,15 @@ void vduse_domain_unmap_page(struct vduse_iova_domain *domain,
|
||||
vduse_domain_free_iova(iovad, dma_addr, size);
|
||||
}
|
||||
|
||||
void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain,
|
||||
size_t size, dma_addr_t *dma_addr,
|
||||
gfp_t flag)
|
||||
dma_addr_t vduse_domain_alloc_coherent(struct vduse_iova_domain *domain,
|
||||
size_t size, void *orig)
|
||||
{
|
||||
struct iova_domain *iovad = &domain->consistent_iovad;
|
||||
unsigned long limit = domain->iova_limit;
|
||||
dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit);
|
||||
void *orig = alloc_pages_exact(size, flag);
|
||||
|
||||
if (!iova || !orig)
|
||||
goto err;
|
||||
if (!iova)
|
||||
return DMA_MAPPING_ERROR;
|
||||
|
||||
spin_lock(&domain->iotlb_lock);
|
||||
if (vduse_iotlb_add_range(domain, (u64)iova, (u64)iova + size - 1,
|
||||
@@ -514,27 +512,20 @@ void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain,
|
||||
}
|
||||
spin_unlock(&domain->iotlb_lock);
|
||||
|
||||
*dma_addr = iova;
|
||||
return iova;
|
||||
|
||||
return orig;
|
||||
err:
|
||||
*dma_addr = DMA_MAPPING_ERROR;
|
||||
if (orig)
|
||||
free_pages_exact(orig, size);
|
||||
if (iova)
|
||||
vduse_domain_free_iova(iovad, iova, size);
|
||||
vduse_domain_free_iova(iovad, iova, size);
|
||||
|
||||
return NULL;
|
||||
return DMA_MAPPING_ERROR;
|
||||
}
|
||||
|
||||
void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size,
|
||||
void *vaddr, dma_addr_t dma_addr,
|
||||
unsigned long attrs)
|
||||
dma_addr_t dma_addr, unsigned long attrs)
|
||||
{
|
||||
struct iova_domain *iovad = &domain->consistent_iovad;
|
||||
struct vhost_iotlb_map *map;
|
||||
struct vdpa_map_file *map_file;
|
||||
phys_addr_t pa;
|
||||
|
||||
spin_lock(&domain->iotlb_lock);
|
||||
map = vhost_iotlb_itree_first(domain->iotlb, (u64)dma_addr,
|
||||
@@ -546,12 +537,10 @@ void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size,
|
||||
map_file = (struct vdpa_map_file *)map->opaque;
|
||||
fput(map_file->file);
|
||||
kfree(map_file);
|
||||
pa = map->addr;
|
||||
vhost_iotlb_map_free(domain->iotlb, map);
|
||||
spin_unlock(&domain->iotlb_lock);
|
||||
|
||||
vduse_domain_free_iova(iovad, dma_addr, size);
|
||||
free_pages_exact(phys_to_virt(pa), size);
|
||||
}
|
||||
|
||||
static vm_fault_t vduse_domain_mmap_fault(struct vm_fault *vmf)
|
||||
|
||||
@@ -65,13 +65,11 @@ void vduse_domain_unmap_page(struct vduse_iova_domain *domain,
|
||||
dma_addr_t dma_addr, size_t size,
|
||||
enum dma_data_direction dir, unsigned long attrs);
|
||||
|
||||
void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain,
|
||||
size_t size, dma_addr_t *dma_addr,
|
||||
gfp_t flag);
|
||||
dma_addr_t vduse_domain_alloc_coherent(struct vduse_iova_domain *domain,
|
||||
size_t size, void *orig);
|
||||
|
||||
void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size,
|
||||
void *vaddr, dma_addr_t dma_addr,
|
||||
unsigned long attrs);
|
||||
dma_addr_t dma_addr, unsigned long attrs);
|
||||
|
||||
void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain);
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
*/
|
||||
|
||||
#include "linux/virtio_net.h"
|
||||
#include <linux/cleanup.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/cdev.h>
|
||||
@@ -22,6 +23,7 @@
|
||||
#include <linux/uio.h>
|
||||
#include <linux/vdpa.h>
|
||||
#include <linux/nospec.h>
|
||||
#include <linux/virtio.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <uapi/linux/vduse.h>
|
||||
@@ -39,6 +41,8 @@
|
||||
#define DRV_LICENSE "GPL v2"
|
||||
|
||||
#define VDUSE_DEV_MAX (1U << MINORBITS)
|
||||
#define VDUSE_DEV_MAX_GROUPS 0xffff
|
||||
#define VDUSE_DEV_MAX_AS 0xffff
|
||||
#define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024)
|
||||
#define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024)
|
||||
#define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
|
||||
@@ -48,6 +52,15 @@
|
||||
|
||||
#define IRQ_UNBOUND -1
|
||||
|
||||
/*
|
||||
* VDUSE instance have not asked the vduse API version, so assume 0.
|
||||
*
|
||||
* Old devices may not ask for the device version and assume it is 0. Keep
|
||||
* this value for these. From the moment the VDUSE instance ask for the
|
||||
* version, convert to the latests supported one and continue regular flow
|
||||
*/
|
||||
#define VDUSE_API_VERSION_NOT_ASKED U64_MAX
|
||||
|
||||
struct vduse_virtqueue {
|
||||
u16 index;
|
||||
u16 num_max;
|
||||
@@ -58,6 +71,7 @@ struct vduse_virtqueue {
|
||||
struct vdpa_vq_state state;
|
||||
bool ready;
|
||||
bool kicked;
|
||||
u32 group;
|
||||
spinlock_t kick_lock;
|
||||
spinlock_t irq_lock;
|
||||
struct eventfd_ctx *kickfd;
|
||||
@@ -83,11 +97,23 @@ struct vduse_umem {
|
||||
struct mm_struct *mm;
|
||||
};
|
||||
|
||||
struct vduse_as {
|
||||
struct vduse_iova_domain *domain;
|
||||
struct vduse_umem *umem;
|
||||
struct mutex mem_lock;
|
||||
};
|
||||
|
||||
struct vduse_vq_group {
|
||||
rwlock_t as_lock;
|
||||
struct vduse_as *as; /* Protected by as_lock */
|
||||
struct vduse_dev *dev;
|
||||
};
|
||||
|
||||
struct vduse_dev {
|
||||
struct vduse_vdpa *vdev;
|
||||
struct device *dev;
|
||||
struct vduse_virtqueue **vqs;
|
||||
struct vduse_iova_domain *domain;
|
||||
struct vduse_as *as;
|
||||
char *name;
|
||||
struct mutex lock;
|
||||
spinlock_t msg_lock;
|
||||
@@ -114,8 +140,9 @@ struct vduse_dev {
|
||||
u8 status;
|
||||
u32 vq_num;
|
||||
u32 vq_align;
|
||||
struct vduse_umem *umem;
|
||||
struct mutex mem_lock;
|
||||
u32 ngroups;
|
||||
u32 nas;
|
||||
struct vduse_vq_group *groups;
|
||||
unsigned int bounce_size;
|
||||
struct mutex domain_lock;
|
||||
};
|
||||
@@ -305,7 +332,7 @@ static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
|
||||
return vduse_dev_msg_sync(dev, &msg);
|
||||
}
|
||||
|
||||
static int vduse_dev_update_iotlb(struct vduse_dev *dev,
|
||||
static int vduse_dev_update_iotlb(struct vduse_dev *dev, u32 asid,
|
||||
u64 start, u64 last)
|
||||
{
|
||||
struct vduse_dev_msg msg = { 0 };
|
||||
@@ -314,8 +341,14 @@ static int vduse_dev_update_iotlb(struct vduse_dev *dev,
|
||||
return -EINVAL;
|
||||
|
||||
msg.req.type = VDUSE_UPDATE_IOTLB;
|
||||
msg.req.iova.start = start;
|
||||
msg.req.iova.last = last;
|
||||
if (dev->api_version < VDUSE_API_VERSION_1) {
|
||||
msg.req.iova.start = start;
|
||||
msg.req.iova.last = last;
|
||||
} else {
|
||||
msg.req.iova_v2.start = start;
|
||||
msg.req.iova_v2.last = last;
|
||||
msg.req.iova_v2.asid = asid;
|
||||
}
|
||||
|
||||
return vduse_dev_msg_sync(dev, &msg);
|
||||
}
|
||||
@@ -430,11 +463,14 @@ static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
|
||||
static void vduse_dev_reset(struct vduse_dev *dev)
|
||||
{
|
||||
int i;
|
||||
struct vduse_iova_domain *domain = dev->domain;
|
||||
|
||||
/* The coherent mappings are handled in vduse_dev_free_coherent() */
|
||||
if (domain && domain->bounce_map)
|
||||
vduse_domain_reset_bounce_map(domain);
|
||||
for (i = 0; i < dev->nas; i++) {
|
||||
struct vduse_iova_domain *domain = dev->as[i].domain;
|
||||
|
||||
if (domain && domain->bounce_map)
|
||||
vduse_domain_reset_bounce_map(domain);
|
||||
}
|
||||
|
||||
down_write(&dev->rwsem);
|
||||
|
||||
@@ -592,6 +628,63 @@ static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u32 vduse_get_vq_group(struct vdpa_device *vdpa, u16 idx)
|
||||
{
|
||||
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
|
||||
|
||||
if (dev->api_version < VDUSE_API_VERSION_1)
|
||||
return 0;
|
||||
|
||||
return dev->vqs[idx]->group;
|
||||
}
|
||||
|
||||
static union virtio_map vduse_get_vq_map(struct vdpa_device *vdpa, u16 idx)
|
||||
{
|
||||
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
|
||||
u32 vq_group = vduse_get_vq_group(vdpa, idx);
|
||||
union virtio_map ret = {
|
||||
.group = &dev->groups[vq_group],
|
||||
};
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
DEFINE_GUARD(vq_group_as_read_lock, struct vduse_vq_group *,
|
||||
if (_T->dev->nas > 1)
|
||||
read_lock(&_T->as_lock),
|
||||
if (_T->dev->nas > 1)
|
||||
read_unlock(&_T->as_lock))
|
||||
|
||||
DEFINE_GUARD(vq_group_as_write_lock, struct vduse_vq_group *,
|
||||
if (_T->dev->nas > 1)
|
||||
write_lock(&_T->as_lock),
|
||||
if (_T->dev->nas > 1)
|
||||
write_unlock(&_T->as_lock))
|
||||
|
||||
static int vduse_set_group_asid(struct vdpa_device *vdpa, unsigned int group,
|
||||
unsigned int asid)
|
||||
{
|
||||
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
|
||||
struct vduse_dev_msg msg = { 0 };
|
||||
int r;
|
||||
|
||||
if (dev->api_version < VDUSE_API_VERSION_1)
|
||||
return -EINVAL;
|
||||
|
||||
msg.req.type = VDUSE_SET_VQ_GROUP_ASID;
|
||||
msg.req.vq_group_asid.group = group;
|
||||
msg.req.vq_group_asid.asid = asid;
|
||||
|
||||
r = vduse_dev_msg_sync(dev, &msg);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
guard(vq_group_as_write_lock)(&dev->groups[group]);
|
||||
dev->groups[group].as = &dev->as[asid];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
|
||||
struct vdpa_vq_state *state)
|
||||
{
|
||||
@@ -763,13 +856,13 @@ static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
|
||||
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
|
||||
int ret;
|
||||
|
||||
ret = vduse_domain_set_map(dev->domain, iotlb);
|
||||
ret = vduse_domain_set_map(dev->as[asid].domain, iotlb);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX);
|
||||
ret = vduse_dev_update_iotlb(dev, asid, 0ULL, ULLONG_MAX);
|
||||
if (ret) {
|
||||
vduse_domain_clear_map(dev->domain, iotlb);
|
||||
vduse_domain_clear_map(dev->as[asid].domain, iotlb);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -789,6 +882,7 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = {
|
||||
.set_vq_cb = vduse_vdpa_set_vq_cb,
|
||||
.set_vq_num = vduse_vdpa_set_vq_num,
|
||||
.get_vq_size = vduse_vdpa_get_vq_size,
|
||||
.get_vq_group = vduse_get_vq_group,
|
||||
.set_vq_ready = vduse_vdpa_set_vq_ready,
|
||||
.get_vq_ready = vduse_vdpa_get_vq_ready,
|
||||
.set_vq_state = vduse_vdpa_set_vq_state,
|
||||
@@ -811,6 +905,8 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = {
|
||||
.get_vq_affinity = vduse_vdpa_get_vq_affinity,
|
||||
.reset = vduse_vdpa_reset,
|
||||
.set_map = vduse_vdpa_set_map,
|
||||
.set_group_asid = vduse_set_group_asid,
|
||||
.get_vq_map = vduse_get_vq_map,
|
||||
.free = vduse_vdpa_free,
|
||||
};
|
||||
|
||||
@@ -818,8 +914,13 @@ static void vduse_dev_sync_single_for_device(union virtio_map token,
|
||||
dma_addr_t dma_addr, size_t size,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
struct vduse_iova_domain *domain = token.iova_domain;
|
||||
struct vduse_iova_domain *domain;
|
||||
|
||||
if (!token.group)
|
||||
return;
|
||||
|
||||
guard(vq_group_as_read_lock)(token.group);
|
||||
domain = token.group->as->domain;
|
||||
vduse_domain_sync_single_for_device(domain, dma_addr, size, dir);
|
||||
}
|
||||
|
||||
@@ -827,8 +928,13 @@ static void vduse_dev_sync_single_for_cpu(union virtio_map token,
|
||||
dma_addr_t dma_addr, size_t size,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
struct vduse_iova_domain *domain = token.iova_domain;
|
||||
struct vduse_iova_domain *domain;
|
||||
|
||||
if (!token.group)
|
||||
return;
|
||||
|
||||
guard(vq_group_as_read_lock)(token.group);
|
||||
domain = token.group->as->domain;
|
||||
vduse_domain_sync_single_for_cpu(domain, dma_addr, size, dir);
|
||||
}
|
||||
|
||||
@@ -837,8 +943,13 @@ static dma_addr_t vduse_dev_map_page(union virtio_map token, struct page *page,
|
||||
enum dma_data_direction dir,
|
||||
unsigned long attrs)
|
||||
{
|
||||
struct vduse_iova_domain *domain = token.iova_domain;
|
||||
struct vduse_iova_domain *domain;
|
||||
|
||||
if (!token.group)
|
||||
return DMA_MAPPING_ERROR;
|
||||
|
||||
guard(vq_group_as_read_lock)(token.group);
|
||||
domain = token.group->as->domain;
|
||||
return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
|
||||
}
|
||||
|
||||
@@ -846,43 +957,71 @@ static void vduse_dev_unmap_page(union virtio_map token, dma_addr_t dma_addr,
|
||||
size_t size, enum dma_data_direction dir,
|
||||
unsigned long attrs)
|
||||
{
|
||||
struct vduse_iova_domain *domain = token.iova_domain;
|
||||
struct vduse_iova_domain *domain;
|
||||
|
||||
return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
|
||||
if (!token.group)
|
||||
return;
|
||||
|
||||
guard(vq_group_as_read_lock)(token.group);
|
||||
domain = token.group->as->domain;
|
||||
vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
|
||||
}
|
||||
|
||||
static void *vduse_dev_alloc_coherent(union virtio_map token, size_t size,
|
||||
dma_addr_t *dma_addr, gfp_t flag)
|
||||
{
|
||||
struct vduse_iova_domain *domain = token.iova_domain;
|
||||
unsigned long iova;
|
||||
void *addr;
|
||||
|
||||
*dma_addr = DMA_MAPPING_ERROR;
|
||||
addr = vduse_domain_alloc_coherent(domain, size,
|
||||
(dma_addr_t *)&iova, flag);
|
||||
if (!token.group)
|
||||
return NULL;
|
||||
|
||||
addr = alloc_pages_exact(size, flag);
|
||||
if (!addr)
|
||||
return NULL;
|
||||
|
||||
*dma_addr = (dma_addr_t)iova;
|
||||
{
|
||||
struct vduse_iova_domain *domain;
|
||||
|
||||
guard(vq_group_as_read_lock)(token.group);
|
||||
domain = token.group->as->domain;
|
||||
*dma_addr = vduse_domain_alloc_coherent(domain, size, addr);
|
||||
if (*dma_addr == DMA_MAPPING_ERROR)
|
||||
goto err;
|
||||
}
|
||||
|
||||
return addr;
|
||||
|
||||
err:
|
||||
free_pages_exact(addr, size);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void vduse_dev_free_coherent(union virtio_map token, size_t size,
|
||||
void *vaddr, dma_addr_t dma_addr,
|
||||
unsigned long attrs)
|
||||
{
|
||||
struct vduse_iova_domain *domain = token.iova_domain;
|
||||
if (!token.group)
|
||||
return;
|
||||
|
||||
vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
|
||||
{
|
||||
struct vduse_iova_domain *domain;
|
||||
|
||||
guard(vq_group_as_read_lock)(token.group);
|
||||
domain = token.group->as->domain;
|
||||
vduse_domain_free_coherent(domain, size, dma_addr, attrs);
|
||||
}
|
||||
|
||||
free_pages_exact(vaddr, size);
|
||||
}
|
||||
|
||||
static bool vduse_dev_need_sync(union virtio_map token, dma_addr_t dma_addr)
|
||||
{
|
||||
struct vduse_iova_domain *domain = token.iova_domain;
|
||||
if (!token.group)
|
||||
return false;
|
||||
|
||||
return dma_addr < domain->bounce_size;
|
||||
guard(vq_group_as_read_lock)(token.group);
|
||||
return dma_addr < token.group->as->domain->bounce_size;
|
||||
}
|
||||
|
||||
static int vduse_dev_mapping_error(union virtio_map token, dma_addr_t dma_addr)
|
||||
@@ -894,9 +1033,11 @@ static int vduse_dev_mapping_error(union virtio_map token, dma_addr_t dma_addr)
|
||||
|
||||
static size_t vduse_dev_max_mapping_size(union virtio_map token)
|
||||
{
|
||||
struct vduse_iova_domain *domain = token.iova_domain;
|
||||
if (!token.group)
|
||||
return 0;
|
||||
|
||||
return domain->bounce_size;
|
||||
guard(vq_group_as_read_lock)(token.group);
|
||||
return token.group->as->domain->bounce_size;
|
||||
}
|
||||
|
||||
static const struct virtio_map_ops vduse_map_ops = {
|
||||
@@ -1036,39 +1177,40 @@ unlock:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vduse_dev_dereg_umem(struct vduse_dev *dev,
|
||||
static int vduse_dev_dereg_umem(struct vduse_dev *dev, u32 asid,
|
||||
u64 iova, u64 size)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&dev->mem_lock);
|
||||
mutex_lock(&dev->as[asid].mem_lock);
|
||||
ret = -ENOENT;
|
||||
if (!dev->umem)
|
||||
if (!dev->as[asid].umem)
|
||||
goto unlock;
|
||||
|
||||
ret = -EINVAL;
|
||||
if (!dev->domain)
|
||||
if (!dev->as[asid].domain)
|
||||
goto unlock;
|
||||
|
||||
if (dev->umem->iova != iova || size != dev->domain->bounce_size)
|
||||
if (dev->as[asid].umem->iova != iova ||
|
||||
size != dev->as[asid].domain->bounce_size)
|
||||
goto unlock;
|
||||
|
||||
vduse_domain_remove_user_bounce_pages(dev->domain);
|
||||
unpin_user_pages_dirty_lock(dev->umem->pages,
|
||||
dev->umem->npages, true);
|
||||
atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm);
|
||||
mmdrop(dev->umem->mm);
|
||||
vfree(dev->umem->pages);
|
||||
kfree(dev->umem);
|
||||
dev->umem = NULL;
|
||||
vduse_domain_remove_user_bounce_pages(dev->as[asid].domain);
|
||||
unpin_user_pages_dirty_lock(dev->as[asid].umem->pages,
|
||||
dev->as[asid].umem->npages, true);
|
||||
atomic64_sub(dev->as[asid].umem->npages, &dev->as[asid].umem->mm->pinned_vm);
|
||||
mmdrop(dev->as[asid].umem->mm);
|
||||
vfree(dev->as[asid].umem->pages);
|
||||
kfree(dev->as[asid].umem);
|
||||
dev->as[asid].umem = NULL;
|
||||
ret = 0;
|
||||
unlock:
|
||||
mutex_unlock(&dev->mem_lock);
|
||||
mutex_unlock(&dev->as[asid].mem_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vduse_dev_reg_umem(struct vduse_dev *dev,
|
||||
u64 iova, u64 uaddr, u64 size)
|
||||
u32 asid, u64 iova, u64 uaddr, u64 size)
|
||||
{
|
||||
struct page **page_list = NULL;
|
||||
struct vduse_umem *umem = NULL;
|
||||
@@ -1076,14 +1218,14 @@ static int vduse_dev_reg_umem(struct vduse_dev *dev,
|
||||
unsigned long npages, lock_limit;
|
||||
int ret;
|
||||
|
||||
if (!dev->domain || !dev->domain->bounce_map ||
|
||||
size != dev->domain->bounce_size ||
|
||||
if (!dev->as[asid].domain || !dev->as[asid].domain->bounce_map ||
|
||||
size != dev->as[asid].domain->bounce_size ||
|
||||
iova != 0 || uaddr & ~PAGE_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&dev->mem_lock);
|
||||
mutex_lock(&dev->as[asid].mem_lock);
|
||||
ret = -EEXIST;
|
||||
if (dev->umem)
|
||||
if (dev->as[asid].umem)
|
||||
goto unlock;
|
||||
|
||||
ret = -ENOMEM;
|
||||
@@ -1107,7 +1249,7 @@ static int vduse_dev_reg_umem(struct vduse_dev *dev,
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = vduse_domain_add_user_bounce_pages(dev->domain,
|
||||
ret = vduse_domain_add_user_bounce_pages(dev->as[asid].domain,
|
||||
page_list, pinned);
|
||||
if (ret)
|
||||
goto out;
|
||||
@@ -1120,7 +1262,7 @@ static int vduse_dev_reg_umem(struct vduse_dev *dev,
|
||||
umem->mm = current->mm;
|
||||
mmgrab(current->mm);
|
||||
|
||||
dev->umem = umem;
|
||||
dev->as[asid].umem = umem;
|
||||
out:
|
||||
if (ret && pinned > 0)
|
||||
unpin_user_pages(page_list, pinned);
|
||||
@@ -1131,7 +1273,7 @@ unlock:
|
||||
vfree(page_list);
|
||||
kfree(umem);
|
||||
}
|
||||
mutex_unlock(&dev->mem_lock);
|
||||
mutex_unlock(&dev->as[asid].mem_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1151,6 +1293,54 @@ static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
|
||||
vq->irq_effective_cpu = curr_cpu;
|
||||
}
|
||||
|
||||
static int vduse_dev_iotlb_entry(struct vduse_dev *dev,
|
||||
struct vduse_iotlb_entry_v2 *entry,
|
||||
struct file **f, uint64_t *capability)
|
||||
{
|
||||
u32 asid;
|
||||
int r = -EINVAL;
|
||||
struct vhost_iotlb_map *map;
|
||||
|
||||
if (entry->start > entry->last || entry->asid >= dev->nas)
|
||||
return -EINVAL;
|
||||
|
||||
asid = array_index_nospec(entry->asid, dev->nas);
|
||||
mutex_lock(&dev->domain_lock);
|
||||
|
||||
if (!dev->as[asid].domain)
|
||||
goto out;
|
||||
|
||||
spin_lock(&dev->as[asid].domain->iotlb_lock);
|
||||
map = vhost_iotlb_itree_first(dev->as[asid].domain->iotlb,
|
||||
entry->start, entry->last);
|
||||
if (map) {
|
||||
if (f) {
|
||||
const struct vdpa_map_file *map_file;
|
||||
|
||||
map_file = (struct vdpa_map_file *)map->opaque;
|
||||
entry->offset = map_file->offset;
|
||||
*f = get_file(map_file->file);
|
||||
}
|
||||
entry->start = map->start;
|
||||
entry->last = map->last;
|
||||
entry->perm = map->perm;
|
||||
if (capability) {
|
||||
*capability = 0;
|
||||
|
||||
if (dev->as[asid].domain->bounce_map && map->start == 0 &&
|
||||
map->last == dev->as[asid].domain->bounce_size - 1)
|
||||
*capability |= VDUSE_IOVA_CAP_UMEM;
|
||||
}
|
||||
|
||||
r = 0;
|
||||
}
|
||||
spin_unlock(&dev->as[asid].domain->iotlb_lock);
|
||||
|
||||
out:
|
||||
mutex_unlock(&dev->domain_lock);
|
||||
return r;
|
||||
}
|
||||
|
||||
static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
@@ -1162,44 +1352,36 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
|
||||
return -EPERM;
|
||||
|
||||
switch (cmd) {
|
||||
case VDUSE_IOTLB_GET_FD: {
|
||||
struct vduse_iotlb_entry entry;
|
||||
struct vhost_iotlb_map *map;
|
||||
struct vdpa_map_file *map_file;
|
||||
case VDUSE_IOTLB_GET_FD:
|
||||
case VDUSE_IOTLB_GET_FD2: {
|
||||
struct vduse_iotlb_entry_v2 entry = {0};
|
||||
struct file *f = NULL;
|
||||
|
||||
ret = -ENOIOCTLCMD;
|
||||
if (dev->api_version < VDUSE_API_VERSION_1 &&
|
||||
cmd == VDUSE_IOTLB_GET_FD2)
|
||||
break;
|
||||
|
||||
ret = -EFAULT;
|
||||
if (copy_from_user(&entry, argp, sizeof(entry)))
|
||||
if (copy_from_user(&entry, argp, _IOC_SIZE(cmd)))
|
||||
break;
|
||||
|
||||
ret = -EINVAL;
|
||||
if (entry.start > entry.last)
|
||||
if (!is_mem_zero((const char *)entry.reserved,
|
||||
sizeof(entry.reserved)))
|
||||
break;
|
||||
|
||||
mutex_lock(&dev->domain_lock);
|
||||
if (!dev->domain) {
|
||||
mutex_unlock(&dev->domain_lock);
|
||||
ret = vduse_dev_iotlb_entry(dev, &entry, &f, NULL);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
spin_lock(&dev->domain->iotlb_lock);
|
||||
map = vhost_iotlb_itree_first(dev->domain->iotlb,
|
||||
entry.start, entry.last);
|
||||
if (map) {
|
||||
map_file = (struct vdpa_map_file *)map->opaque;
|
||||
f = get_file(map_file->file);
|
||||
entry.offset = map_file->offset;
|
||||
entry.start = map->start;
|
||||
entry.last = map->last;
|
||||
entry.perm = map->perm;
|
||||
}
|
||||
spin_unlock(&dev->domain->iotlb_lock);
|
||||
mutex_unlock(&dev->domain_lock);
|
||||
|
||||
ret = -EINVAL;
|
||||
if (!f)
|
||||
break;
|
||||
|
||||
ret = -EFAULT;
|
||||
if (copy_to_user(argp, &entry, sizeof(entry))) {
|
||||
ret = copy_to_user(argp, &entry, _IOC_SIZE(cmd));
|
||||
if (ret) {
|
||||
ret = -EFAULT;
|
||||
fput(f);
|
||||
break;
|
||||
}
|
||||
@@ -1252,12 +1434,24 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
|
||||
if (config.index >= dev->vq_num)
|
||||
break;
|
||||
|
||||
if (!is_mem_zero((const char *)config.reserved,
|
||||
sizeof(config.reserved)))
|
||||
if (dev->api_version < VDUSE_API_VERSION_1) {
|
||||
if (config.group)
|
||||
break;
|
||||
} else {
|
||||
if (config.group >= dev->ngroups)
|
||||
break;
|
||||
if (dev->status & VIRTIO_CONFIG_S_DRIVER_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
if (config.reserved1 ||
|
||||
!is_mem_zero((const char *)config.reserved2,
|
||||
sizeof(config.reserved2)))
|
||||
break;
|
||||
|
||||
index = array_index_nospec(config.index, dev->vq_num);
|
||||
dev->vqs[index]->num_max = config.max_size;
|
||||
dev->vqs[index]->group = config.group;
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
@@ -1336,6 +1530,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
|
||||
}
|
||||
case VDUSE_IOTLB_REG_UMEM: {
|
||||
struct vduse_iova_umem umem;
|
||||
u32 asid;
|
||||
|
||||
ret = -EFAULT;
|
||||
if (copy_from_user(&umem, argp, sizeof(umem)))
|
||||
@@ -1343,17 +1538,21 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
|
||||
|
||||
ret = -EINVAL;
|
||||
if (!is_mem_zero((const char *)umem.reserved,
|
||||
sizeof(umem.reserved)))
|
||||
sizeof(umem.reserved)) ||
|
||||
(dev->api_version < VDUSE_API_VERSION_1 &&
|
||||
umem.asid != 0) || umem.asid >= dev->nas)
|
||||
break;
|
||||
|
||||
mutex_lock(&dev->domain_lock);
|
||||
ret = vduse_dev_reg_umem(dev, umem.iova,
|
||||
asid = array_index_nospec(umem.asid, dev->nas);
|
||||
ret = vduse_dev_reg_umem(dev, asid, umem.iova,
|
||||
umem.uaddr, umem.size);
|
||||
mutex_unlock(&dev->domain_lock);
|
||||
break;
|
||||
}
|
||||
case VDUSE_IOTLB_DEREG_UMEM: {
|
||||
struct vduse_iova_umem umem;
|
||||
u32 asid;
|
||||
|
||||
ret = -EFAULT;
|
||||
if (copy_from_user(&umem, argp, sizeof(umem)))
|
||||
@@ -1361,51 +1560,49 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
|
||||
|
||||
ret = -EINVAL;
|
||||
if (!is_mem_zero((const char *)umem.reserved,
|
||||
sizeof(umem.reserved)))
|
||||
sizeof(umem.reserved)) ||
|
||||
(dev->api_version < VDUSE_API_VERSION_1 &&
|
||||
umem.asid != 0) ||
|
||||
umem.asid >= dev->nas)
|
||||
break;
|
||||
|
||||
mutex_lock(&dev->domain_lock);
|
||||
ret = vduse_dev_dereg_umem(dev, umem.iova,
|
||||
asid = array_index_nospec(umem.asid, dev->nas);
|
||||
ret = vduse_dev_dereg_umem(dev, asid, umem.iova,
|
||||
umem.size);
|
||||
mutex_unlock(&dev->domain_lock);
|
||||
break;
|
||||
}
|
||||
case VDUSE_IOTLB_GET_INFO: {
|
||||
struct vduse_iova_info info;
|
||||
struct vhost_iotlb_map *map;
|
||||
struct vduse_iotlb_entry_v2 entry;
|
||||
|
||||
ret = -EFAULT;
|
||||
if (copy_from_user(&info, argp, sizeof(info)))
|
||||
break;
|
||||
|
||||
ret = -EINVAL;
|
||||
if (info.start > info.last)
|
||||
break;
|
||||
|
||||
if (!is_mem_zero((const char *)info.reserved,
|
||||
sizeof(info.reserved)))
|
||||
break;
|
||||
|
||||
mutex_lock(&dev->domain_lock);
|
||||
if (!dev->domain) {
|
||||
mutex_unlock(&dev->domain_lock);
|
||||
if (dev->api_version < VDUSE_API_VERSION_1) {
|
||||
if (info.asid)
|
||||
break;
|
||||
} else if (info.asid >= dev->nas)
|
||||
break;
|
||||
}
|
||||
spin_lock(&dev->domain->iotlb_lock);
|
||||
map = vhost_iotlb_itree_first(dev->domain->iotlb,
|
||||
info.start, info.last);
|
||||
if (map) {
|
||||
info.start = map->start;
|
||||
info.last = map->last;
|
||||
info.capability = 0;
|
||||
if (dev->domain->bounce_map && map->start == 0 &&
|
||||
map->last == dev->domain->bounce_size - 1)
|
||||
info.capability |= VDUSE_IOVA_CAP_UMEM;
|
||||
}
|
||||
spin_unlock(&dev->domain->iotlb_lock);
|
||||
mutex_unlock(&dev->domain_lock);
|
||||
if (!map)
|
||||
|
||||
entry.start = info.start;
|
||||
entry.last = info.last;
|
||||
entry.asid = info.asid;
|
||||
ret = vduse_dev_iotlb_entry(dev, &entry, NULL,
|
||||
&info.capability);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
info.start = entry.start;
|
||||
info.last = entry.last;
|
||||
info.asid = entry.asid;
|
||||
|
||||
ret = -EFAULT;
|
||||
if (copy_to_user(argp, &info, sizeof(info)))
|
||||
break;
|
||||
@@ -1426,8 +1623,10 @@ static int vduse_dev_release(struct inode *inode, struct file *file)
|
||||
struct vduse_dev *dev = file->private_data;
|
||||
|
||||
mutex_lock(&dev->domain_lock);
|
||||
if (dev->domain)
|
||||
vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
|
||||
for (int i = 0; i < dev->nas; i++)
|
||||
if (dev->as[i].domain)
|
||||
vduse_dev_dereg_umem(dev, i, 0,
|
||||
dev->as[i].domain->bounce_size);
|
||||
mutex_unlock(&dev->domain_lock);
|
||||
spin_lock(&dev->msg_lock);
|
||||
/* Make sure the inflight messages can processed after reconncection */
|
||||
@@ -1646,7 +1845,6 @@ static struct vduse_dev *vduse_dev_create(void)
|
||||
return NULL;
|
||||
|
||||
mutex_init(&dev->lock);
|
||||
mutex_init(&dev->mem_lock);
|
||||
mutex_init(&dev->domain_lock);
|
||||
spin_lock_init(&dev->msg_lock);
|
||||
INIT_LIST_HEAD(&dev->send_list);
|
||||
@@ -1697,9 +1895,13 @@ static int vduse_destroy_dev(char *name)
|
||||
idr_remove(&vduse_idr, dev->minor);
|
||||
kvfree(dev->config);
|
||||
vduse_dev_deinit_vqs(dev);
|
||||
if (dev->domain)
|
||||
vduse_domain_destroy(dev->domain);
|
||||
for (int i = 0; i < dev->nas; i++) {
|
||||
if (dev->as[i].domain)
|
||||
vduse_domain_destroy(dev->as[i].domain);
|
||||
}
|
||||
kfree(dev->as);
|
||||
kfree(dev->name);
|
||||
kfree(dev->groups);
|
||||
vduse_dev_destroy(dev);
|
||||
module_put(THIS_MODULE);
|
||||
|
||||
@@ -1737,12 +1939,25 @@ static bool features_is_valid(struct vduse_dev_config *config)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool vduse_validate_config(struct vduse_dev_config *config)
|
||||
static bool vduse_validate_config(struct vduse_dev_config *config,
|
||||
u64 api_version)
|
||||
{
|
||||
if (!is_mem_zero((const char *)config->reserved,
|
||||
sizeof(config->reserved)))
|
||||
return false;
|
||||
|
||||
if (api_version < VDUSE_API_VERSION_1 &&
|
||||
(config->ngroups || config->nas))
|
||||
return false;
|
||||
|
||||
if (api_version >= VDUSE_API_VERSION_1) {
|
||||
if (!config->ngroups || config->ngroups > VDUSE_DEV_MAX_GROUPS)
|
||||
return false;
|
||||
|
||||
if (!config->nas || config->nas > VDUSE_DEV_MAX_AS)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (config->vq_align > PAGE_SIZE)
|
||||
return false;
|
||||
|
||||
@@ -1806,7 +2021,8 @@ static ssize_t bounce_size_store(struct device *device,
|
||||
|
||||
ret = -EPERM;
|
||||
mutex_lock(&dev->domain_lock);
|
||||
if (dev->domain)
|
||||
/* Assuming that if the first domain is allocated, all are allocated */
|
||||
if (dev->as[0].domain)
|
||||
goto unlock;
|
||||
|
||||
ret = kstrtouint(buf, 10, &bounce_size);
|
||||
@@ -1858,6 +2074,27 @@ static int vduse_create_dev(struct vduse_dev_config *config,
|
||||
dev->device_features = config->features;
|
||||
dev->device_id = config->device_id;
|
||||
dev->vendor_id = config->vendor_id;
|
||||
|
||||
dev->nas = (dev->api_version < VDUSE_API_VERSION_1) ? 1 : config->nas;
|
||||
dev->as = kcalloc(dev->nas, sizeof(dev->as[0]), GFP_KERNEL);
|
||||
if (!dev->as)
|
||||
goto err_as;
|
||||
for (int i = 0; i < dev->nas; i++)
|
||||
mutex_init(&dev->as[i].mem_lock);
|
||||
|
||||
dev->ngroups = (dev->api_version < VDUSE_API_VERSION_1)
|
||||
? 1
|
||||
: config->ngroups;
|
||||
dev->groups = kcalloc(dev->ngroups, sizeof(dev->groups[0]),
|
||||
GFP_KERNEL);
|
||||
if (!dev->groups)
|
||||
goto err_vq_groups;
|
||||
for (u32 i = 0; i < dev->ngroups; ++i) {
|
||||
dev->groups[i].dev = dev;
|
||||
rwlock_init(&dev->groups[i].as_lock);
|
||||
dev->groups[i].as = &dev->as[0];
|
||||
}
|
||||
|
||||
dev->name = kstrdup(config->name, GFP_KERNEL);
|
||||
if (!dev->name)
|
||||
goto err_str;
|
||||
@@ -1894,6 +2131,10 @@ err_dev:
|
||||
err_idr:
|
||||
kfree(dev->name);
|
||||
err_str:
|
||||
kfree(dev->groups);
|
||||
err_vq_groups:
|
||||
kfree(dev->as);
|
||||
err_as:
|
||||
vduse_dev_destroy(dev);
|
||||
err:
|
||||
return ret;
|
||||
@@ -1909,6 +2150,8 @@ static long vduse_ioctl(struct file *file, unsigned int cmd,
|
||||
mutex_lock(&vduse_lock);
|
||||
switch (cmd) {
|
||||
case VDUSE_GET_API_VERSION:
|
||||
if (control->api_version == VDUSE_API_VERSION_NOT_ASKED)
|
||||
control->api_version = VDUSE_API_VERSION_1;
|
||||
ret = put_user(control->api_version, (u64 __user *)argp);
|
||||
break;
|
||||
case VDUSE_SET_API_VERSION: {
|
||||
@@ -1919,7 +2162,7 @@ static long vduse_ioctl(struct file *file, unsigned int cmd,
|
||||
break;
|
||||
|
||||
ret = -EINVAL;
|
||||
if (api_version > VDUSE_API_VERSION)
|
||||
if (api_version > VDUSE_API_VERSION_1)
|
||||
break;
|
||||
|
||||
ret = 0;
|
||||
@@ -1936,7 +2179,9 @@ static long vduse_ioctl(struct file *file, unsigned int cmd,
|
||||
break;
|
||||
|
||||
ret = -EINVAL;
|
||||
if (vduse_validate_config(&config) == false)
|
||||
if (control->api_version == VDUSE_API_VERSION_NOT_ASKED)
|
||||
control->api_version = VDUSE_API_VERSION;
|
||||
if (!vduse_validate_config(&config, control->api_version))
|
||||
break;
|
||||
|
||||
buf = vmemdup_user(argp + size, config.config_size);
|
||||
@@ -1986,7 +2231,7 @@ static int vduse_open(struct inode *inode, struct file *file)
|
||||
if (!control)
|
||||
return -ENOMEM;
|
||||
|
||||
control->api_version = VDUSE_API_VERSION;
|
||||
control->api_version = VDUSE_API_VERSION_NOT_ASKED;
|
||||
file->private_data = control;
|
||||
|
||||
return 0;
|
||||
@@ -2017,7 +2262,7 @@ static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
|
||||
|
||||
vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
|
||||
&vduse_vdpa_config_ops, &vduse_map_ops,
|
||||
1, 1, name, true);
|
||||
dev->ngroups, dev->nas, name, true);
|
||||
if (IS_ERR(vdev))
|
||||
return PTR_ERR(vdev);
|
||||
|
||||
@@ -2032,7 +2277,8 @@ static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
|
||||
const struct vdpa_dev_set_config *config)
|
||||
{
|
||||
struct vduse_dev *dev;
|
||||
int ret;
|
||||
size_t domain_bounce_size;
|
||||
int ret, i;
|
||||
|
||||
mutex_lock(&vduse_lock);
|
||||
dev = vduse_find_dev(name);
|
||||
@@ -2046,27 +2292,41 @@ static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
|
||||
return ret;
|
||||
|
||||
mutex_lock(&dev->domain_lock);
|
||||
if (!dev->domain)
|
||||
dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
|
||||
dev->bounce_size);
|
||||
mutex_unlock(&dev->domain_lock);
|
||||
if (!dev->domain) {
|
||||
put_device(&dev->vdev->vdpa.dev);
|
||||
return -ENOMEM;
|
||||
ret = 0;
|
||||
|
||||
domain_bounce_size = dev->bounce_size / dev->nas;
|
||||
for (i = 0; i < dev->nas; ++i) {
|
||||
dev->as[i].domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
|
||||
domain_bounce_size);
|
||||
if (!dev->as[i].domain) {
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
dev->vdev->vdpa.vmap.iova_domain = dev->domain;
|
||||
mutex_unlock(&dev->domain_lock);
|
||||
|
||||
ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
|
||||
if (ret) {
|
||||
put_device(&dev->vdev->vdpa.dev);
|
||||
mutex_lock(&dev->domain_lock);
|
||||
vduse_domain_destroy(dev->domain);
|
||||
dev->domain = NULL;
|
||||
mutex_unlock(&dev->domain_lock);
|
||||
return ret;
|
||||
}
|
||||
if (ret)
|
||||
goto err_register;
|
||||
|
||||
return 0;
|
||||
|
||||
err_register:
|
||||
mutex_lock(&dev->domain_lock);
|
||||
|
||||
err:
|
||||
for (int j = 0; j < i; j++) {
|
||||
if (dev->as[j].domain) {
|
||||
vduse_domain_destroy(dev->as[j].domain);
|
||||
dev->as[j].domain = NULL;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&dev->domain_lock);
|
||||
|
||||
put_device(&dev->vdev->vdpa.dev);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
|
||||
|
||||
@@ -680,8 +680,10 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
|
||||
case VHOST_VDPA_SET_GROUP_ASID:
|
||||
if (copy_from_user(&s, argp, sizeof(s)))
|
||||
return -EFAULT;
|
||||
if (s.num >= vdpa->nas)
|
||||
if (idx >= vdpa->ngroups || s.num >= vdpa->nas)
|
||||
return -EINVAL;
|
||||
if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK)
|
||||
return -EBUSY;
|
||||
if (!ops->set_group_asid)
|
||||
return -EOPNOTSUPP;
|
||||
return ops->set_group_asid(vdpa, idx, s.num);
|
||||
@@ -1527,6 +1529,7 @@ static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
if (vma->vm_end - vma->vm_start != notify.size)
|
||||
return -ENOTSUPP;
|
||||
|
||||
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
|
||||
vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
|
||||
vma->vm_ops = &vhost_vdpa_vm_ops;
|
||||
return 0;
|
||||
|
||||
@@ -1444,13 +1444,13 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
|
||||
({ \
|
||||
int ret; \
|
||||
if (!vq->iotlb) { \
|
||||
ret = __put_user(x, ptr); \
|
||||
ret = put_user(x, ptr); \
|
||||
} else { \
|
||||
__typeof__(ptr) to = \
|
||||
(__typeof__(ptr)) __vhost_get_user(vq, ptr, \
|
||||
sizeof(*ptr), VHOST_ADDR_USED); \
|
||||
if (to != NULL) \
|
||||
ret = __put_user(x, to); \
|
||||
ret = put_user(x, to); \
|
||||
else \
|
||||
ret = -EFAULT; \
|
||||
} \
|
||||
@@ -1489,14 +1489,14 @@ static inline int vhost_put_used_idx(struct vhost_virtqueue *vq)
|
||||
({ \
|
||||
int ret; \
|
||||
if (!vq->iotlb) { \
|
||||
ret = __get_user(x, ptr); \
|
||||
ret = get_user(x, ptr); \
|
||||
} else { \
|
||||
__typeof__(ptr) from = \
|
||||
(__typeof__(ptr)) __vhost_get_user(vq, ptr, \
|
||||
sizeof(*ptr), \
|
||||
type); \
|
||||
if (from != NULL) \
|
||||
ret = __get_user(x, from); \
|
||||
ret = get_user(x, from); \
|
||||
else \
|
||||
ret = -EFAULT; \
|
||||
} \
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#include <linux/virtio_config.h>
|
||||
#include <linux/input.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
|
||||
#include <uapi/linux/virtio_ids.h>
|
||||
#include <uapi/linux/virtio_input.h>
|
||||
@@ -16,7 +17,9 @@ struct virtio_input {
|
||||
char serial[64];
|
||||
char phys[64];
|
||||
struct virtqueue *evt, *sts;
|
||||
__dma_from_device_group_begin();
|
||||
struct virtio_input_event evts[64];
|
||||
__dma_from_device_group_end();
|
||||
spinlock_t lock;
|
||||
bool ready;
|
||||
};
|
||||
@@ -27,7 +30,7 @@ static void virtinput_queue_evtbuf(struct virtio_input *vi,
|
||||
struct scatterlist sg[1];
|
||||
|
||||
sg_init_one(sg, evtbuf, sizeof(*evtbuf));
|
||||
virtqueue_add_inbuf(vi->evt, sg, 1, evtbuf, GFP_ATOMIC);
|
||||
virtqueue_add_inbuf_cache_clean(vi->evt, sg, 1, evtbuf, GFP_ATOMIC);
|
||||
}
|
||||
|
||||
static void virtinput_recv_events(struct virtqueue *vq)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -7,6 +7,7 @@
|
||||
#include <linux/dma-direction.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/cache.h>
|
||||
|
||||
/**
|
||||
* List of possible attributes associated with a DMA mapping. The semantics
|
||||
@@ -78,6 +79,13 @@
|
||||
*/
|
||||
#define DMA_ATTR_MMIO (1UL << 10)
|
||||
|
||||
/*
|
||||
* DMA_ATTR_CPU_CACHE_CLEAN: Indicates the CPU will not dirty any cacheline
|
||||
* overlapping this buffer while it is mapped for DMA. All mappings sharing
|
||||
* a cacheline must have this attribute for this to be considered safe.
|
||||
*/
|
||||
#define DMA_ATTR_CPU_CACHE_CLEAN (1UL << 11)
|
||||
|
||||
/*
|
||||
* A dma_addr_t can hold any valid DMA or bus address for the platform. It can
|
||||
* be given to a device to use as a DMA source or target. It is specific to a
|
||||
@@ -703,6 +711,18 @@ static inline int dma_get_cache_alignment(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ARCH_HAS_DMA_MINALIGN
|
||||
#define ____dma_from_device_aligned __aligned(ARCH_DMA_MINALIGN)
|
||||
#else
|
||||
#define ____dma_from_device_aligned
|
||||
#endif
|
||||
/* Mark start of DMA buffer */
|
||||
#define __dma_from_device_group_begin(GROUP) \
|
||||
__cacheline_group_begin(GROUP) ____dma_from_device_aligned
|
||||
/* Mark end of DMA buffer */
|
||||
#define __dma_from_device_group_end(GROUP) \
|
||||
__cacheline_group_end(GROUP) ____dma_from_device_aligned
|
||||
|
||||
static inline void *dmam_alloc_coherent(struct device *dev, size_t size,
|
||||
dma_addr_t *dma_handle, gfp_t gfp)
|
||||
{
|
||||
|
||||
@@ -312,7 +312,9 @@ struct vdpa_map_file {
|
||||
* @idx: virtqueue index
|
||||
* Returns the affinity mask
|
||||
* @set_group_asid: Set address space identifier for a
|
||||
* virtqueue group (optional)
|
||||
* virtqueue group (optional). Caller must
|
||||
* prevent this from being executed concurrently
|
||||
* with set_status.
|
||||
* @vdev: vdpa device
|
||||
* @group: virtqueue group
|
||||
* @asid: address space id for this group
|
||||
|
||||
@@ -43,13 +43,13 @@ struct virtqueue {
|
||||
void *priv;
|
||||
};
|
||||
|
||||
struct vduse_iova_domain;
|
||||
struct vduse_vq_group;
|
||||
|
||||
union virtio_map {
|
||||
/* Device that performs DMA */
|
||||
struct device *dma_dev;
|
||||
/* VDUSE specific mapping data */
|
||||
struct vduse_iova_domain *iova_domain;
|
||||
/* VDUSE specific virtqueue group for doing map */
|
||||
struct vduse_vq_group *group;
|
||||
};
|
||||
|
||||
int virtqueue_add_outbuf(struct virtqueue *vq,
|
||||
@@ -62,6 +62,11 @@ int virtqueue_add_inbuf(struct virtqueue *vq,
|
||||
void *data,
|
||||
gfp_t gfp);
|
||||
|
||||
int virtqueue_add_inbuf_cache_clean(struct virtqueue *vq,
|
||||
struct scatterlist sg[], unsigned int num,
|
||||
void *data,
|
||||
gfp_t gfp);
|
||||
|
||||
int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
|
||||
struct scatterlist sg[], unsigned int num,
|
||||
void *data,
|
||||
|
||||
@@ -10,6 +10,10 @@
|
||||
|
||||
#define VDUSE_API_VERSION 0
|
||||
|
||||
/* VQ groups and ASID support */
|
||||
|
||||
#define VDUSE_API_VERSION_1 1
|
||||
|
||||
/*
|
||||
* Get the version of VDUSE API that kernel supported (VDUSE_API_VERSION).
|
||||
* This is used for future extension.
|
||||
@@ -27,6 +31,8 @@
|
||||
* @features: virtio features
|
||||
* @vq_num: the number of virtqueues
|
||||
* @vq_align: the allocation alignment of virtqueue's metadata
|
||||
* @ngroups: number of vq groups that VDUSE device declares
|
||||
* @nas: number of address spaces that VDUSE device declares
|
||||
* @reserved: for future use, needs to be initialized to zero
|
||||
* @config_size: the size of the configuration space
|
||||
* @config: the buffer of the configuration space
|
||||
@@ -41,7 +47,9 @@ struct vduse_dev_config {
|
||||
__u64 features;
|
||||
__u32 vq_num;
|
||||
__u32 vq_align;
|
||||
__u32 reserved[13];
|
||||
__u32 ngroups; /* if VDUSE_API_VERSION >= 1 */
|
||||
__u32 nas; /* if VDUSE_API_VERSION >= 1 */
|
||||
__u32 reserved[11];
|
||||
__u32 config_size;
|
||||
__u8 config[];
|
||||
};
|
||||
@@ -118,14 +126,18 @@ struct vduse_config_data {
|
||||
* struct vduse_vq_config - basic configuration of a virtqueue
|
||||
* @index: virtqueue index
|
||||
* @max_size: the max size of virtqueue
|
||||
* @reserved: for future use, needs to be initialized to zero
|
||||
* @reserved1: for future use, needs to be initialized to zero
|
||||
* @group: virtqueue group
|
||||
* @reserved2: for future use, needs to be initialized to zero
|
||||
*
|
||||
* Structure used by VDUSE_VQ_SETUP ioctl to setup a virtqueue.
|
||||
*/
|
||||
struct vduse_vq_config {
|
||||
__u32 index;
|
||||
__u16 max_size;
|
||||
__u16 reserved[13];
|
||||
__u16 reserved1;
|
||||
__u32 group;
|
||||
__u16 reserved2[10];
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -156,6 +168,16 @@ struct vduse_vq_state_packed {
|
||||
__u16 last_used_idx;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct vduse_vq_group_asid - virtqueue group ASID
|
||||
* @group: Index of the virtqueue group
|
||||
* @asid: Address space ID of the group
|
||||
*/
|
||||
struct vduse_vq_group_asid {
|
||||
__u32 group;
|
||||
__u32 asid;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct vduse_vq_info - information of a virtqueue
|
||||
* @index: virtqueue index
|
||||
@@ -215,6 +237,7 @@ struct vduse_vq_eventfd {
|
||||
* @uaddr: start address of userspace memory, it must be aligned to page size
|
||||
* @iova: start of the IOVA region
|
||||
* @size: size of the IOVA region
|
||||
* @asid: Address space ID of the IOVA region
|
||||
* @reserved: for future use, needs to be initialized to zero
|
||||
*
|
||||
* Structure used by VDUSE_IOTLB_REG_UMEM and VDUSE_IOTLB_DEREG_UMEM
|
||||
@@ -224,7 +247,8 @@ struct vduse_iova_umem {
|
||||
__u64 uaddr;
|
||||
__u64 iova;
|
||||
__u64 size;
|
||||
__u64 reserved[3];
|
||||
__u32 asid;
|
||||
__u32 reserved[5];
|
||||
};
|
||||
|
||||
/* Register userspace memory for IOVA regions */
|
||||
@@ -238,6 +262,7 @@ struct vduse_iova_umem {
|
||||
* @start: start of the IOVA region
|
||||
* @last: last of the IOVA region
|
||||
* @capability: capability of the IOVA region
|
||||
* @asid: Address space ID of the IOVA region, only if device API version >= 1
|
||||
* @reserved: for future use, needs to be initialized to zero
|
||||
*
|
||||
* Structure used by VDUSE_IOTLB_GET_INFO ioctl to get information of
|
||||
@@ -248,7 +273,8 @@ struct vduse_iova_info {
|
||||
__u64 last;
|
||||
#define VDUSE_IOVA_CAP_UMEM (1 << 0)
|
||||
__u64 capability;
|
||||
__u64 reserved[3];
|
||||
__u32 asid; /* Only if device API version >= 1 */
|
||||
__u32 reserved[5];
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -257,6 +283,32 @@ struct vduse_iova_info {
|
||||
*/
|
||||
#define VDUSE_IOTLB_GET_INFO _IOWR(VDUSE_BASE, 0x1a, struct vduse_iova_info)
|
||||
|
||||
/**
|
||||
* struct vduse_iotlb_entry_v2 - entry of IOTLB to describe one IOVA region
|
||||
*
|
||||
* @v1: the original vduse_iotlb_entry
|
||||
* @asid: address space ID of the IOVA region
|
||||
* @reserved: for future use, needs to be initialized to zero
|
||||
*
|
||||
* Structure used by VDUSE_IOTLB_GET_FD2 ioctl to find an overlapped IOVA region.
|
||||
*/
|
||||
struct vduse_iotlb_entry_v2 {
|
||||
__u64 offset;
|
||||
__u64 start;
|
||||
__u64 last;
|
||||
__u8 perm;
|
||||
__u8 padding[7];
|
||||
__u32 asid;
|
||||
__u32 reserved[11];
|
||||
};
|
||||
|
||||
/*
|
||||
* Same as VDUSE_IOTLB_GET_FD but with vduse_iotlb_entry_v2 argument that
|
||||
* support extra fields.
|
||||
*/
|
||||
#define VDUSE_IOTLB_GET_FD2 _IOWR(VDUSE_BASE, 0x1b, struct vduse_iotlb_entry_v2)
|
||||
|
||||
|
||||
/* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */
|
||||
|
||||
/**
|
||||
@@ -265,11 +317,14 @@ struct vduse_iova_info {
|
||||
* @VDUSE_SET_STATUS: set the device status
|
||||
* @VDUSE_UPDATE_IOTLB: Notify userspace to update the memory mapping for
|
||||
* specified IOVA range via VDUSE_IOTLB_GET_FD ioctl
|
||||
* @VDUSE_SET_VQ_GROUP_ASID: Notify userspace to update the address space of a
|
||||
* virtqueue group.
|
||||
*/
|
||||
enum vduse_req_type {
|
||||
VDUSE_GET_VQ_STATE,
|
||||
VDUSE_SET_STATUS,
|
||||
VDUSE_UPDATE_IOTLB,
|
||||
VDUSE_SET_VQ_GROUP_ASID,
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -304,6 +359,19 @@ struct vduse_iova_range {
|
||||
__u64 last;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct vduse_iova_range_v2 - IOVA range [start, last] if API_VERSION >= 1
|
||||
* @start: start of the IOVA range
|
||||
* @last: last of the IOVA range
|
||||
* @asid: address space ID of the IOVA range
|
||||
*/
|
||||
struct vduse_iova_range_v2 {
|
||||
__u64 start;
|
||||
__u64 last;
|
||||
__u32 asid;
|
||||
__u32 padding;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct vduse_dev_request - control request
|
||||
* @type: request type
|
||||
@@ -312,6 +380,8 @@ struct vduse_iova_range {
|
||||
* @vq_state: virtqueue state, only index field is available
|
||||
* @s: device status
|
||||
* @iova: IOVA range for updating
|
||||
* @iova_v2: IOVA range for updating if API_VERSION >= 1
|
||||
* @vq_group_asid: ASID of a virtqueue group
|
||||
* @padding: padding
|
||||
*
|
||||
* Structure used by read(2) on /dev/vduse/$NAME.
|
||||
@@ -324,6 +394,11 @@ struct vduse_dev_request {
|
||||
struct vduse_vq_state vq_state;
|
||||
struct vduse_dev_status s;
|
||||
struct vduse_iova_range iova;
|
||||
/* Following members but padding exist only if vduse api
|
||||
* version >= 1
|
||||
*/
|
||||
struct vduse_iova_range_v2 iova_v2;
|
||||
struct vduse_vq_group_asid vq_group_asid;
|
||||
__u32 padding[32];
|
||||
};
|
||||
};
|
||||
|
||||
@@ -31,9 +31,6 @@
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* Copyright Rusty Russell IBM Corporation 2007. */
|
||||
#ifndef __KERNEL__
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
#include <linux/types.h>
|
||||
#include <linux/virtio_types.h>
|
||||
|
||||
@@ -202,7 +199,7 @@ static inline void vring_init(struct vring *vr, unsigned int num, void *p,
|
||||
vr->num = num;
|
||||
vr->desc = p;
|
||||
vr->avail = (struct vring_avail *)((char *)p + num * sizeof(struct vring_desc));
|
||||
vr->used = (void *)(((uintptr_t)&vr->avail->ring[num] + sizeof(__virtio16)
|
||||
vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + sizeof(__virtio16)
|
||||
+ align-1) & ~(align - 1));
|
||||
}
|
||||
|
||||
|
||||
@@ -63,6 +63,7 @@ enum map_err_types {
|
||||
* @sg_mapped_ents: 'mapped_ents' from dma_map_sg
|
||||
* @paddr: physical start address of the mapping
|
||||
* @map_err_type: track whether dma_mapping_error() was checked
|
||||
* @is_cache_clean: driver promises not to write to buffer while mapped
|
||||
* @stack_len: number of backtrace entries in @stack_entries
|
||||
* @stack_entries: stack of backtrace history
|
||||
*/
|
||||
@@ -76,7 +77,8 @@ struct dma_debug_entry {
|
||||
int sg_call_ents;
|
||||
int sg_mapped_ents;
|
||||
phys_addr_t paddr;
|
||||
enum map_err_types map_err_type;
|
||||
enum map_err_types map_err_type;
|
||||
bool is_cache_clean;
|
||||
#ifdef CONFIG_STACKTRACE
|
||||
unsigned int stack_len;
|
||||
unsigned long stack_entries[DMA_DEBUG_STACKTRACE_ENTRIES];
|
||||
@@ -472,12 +474,15 @@ static int active_cacheline_dec_overlap(phys_addr_t cln)
|
||||
return active_cacheline_set_overlap(cln, --overlap);
|
||||
}
|
||||
|
||||
static int active_cacheline_insert(struct dma_debug_entry *entry)
|
||||
static int active_cacheline_insert(struct dma_debug_entry *entry,
|
||||
bool *overlap_cache_clean)
|
||||
{
|
||||
phys_addr_t cln = to_cacheline_number(entry);
|
||||
unsigned long flags;
|
||||
int rc;
|
||||
|
||||
*overlap_cache_clean = false;
|
||||
|
||||
/* If the device is not writing memory then we don't have any
|
||||
* concerns about the cpu consuming stale data. This mitigates
|
||||
* legitimate usages of overlapping mappings.
|
||||
@@ -487,8 +492,16 @@ static int active_cacheline_insert(struct dma_debug_entry *entry)
|
||||
|
||||
spin_lock_irqsave(&radix_lock, flags);
|
||||
rc = radix_tree_insert(&dma_active_cacheline, cln, entry);
|
||||
if (rc == -EEXIST)
|
||||
if (rc == -EEXIST) {
|
||||
struct dma_debug_entry *existing;
|
||||
|
||||
active_cacheline_inc_overlap(cln);
|
||||
existing = radix_tree_lookup(&dma_active_cacheline, cln);
|
||||
/* A lookup failure here after we got -EEXIST is unexpected. */
|
||||
WARN_ON(!existing);
|
||||
if (existing)
|
||||
*overlap_cache_clean = existing->is_cache_clean;
|
||||
}
|
||||
spin_unlock_irqrestore(&radix_lock, flags);
|
||||
|
||||
return rc;
|
||||
@@ -583,19 +596,24 @@ DEFINE_SHOW_ATTRIBUTE(dump);
|
||||
*/
|
||||
static void add_dma_entry(struct dma_debug_entry *entry, unsigned long attrs)
|
||||
{
|
||||
bool overlap_cache_clean;
|
||||
struct hash_bucket *bucket;
|
||||
unsigned long flags;
|
||||
int rc;
|
||||
|
||||
entry->is_cache_clean = !!(attrs & DMA_ATTR_CPU_CACHE_CLEAN);
|
||||
|
||||
bucket = get_hash_bucket(entry, &flags);
|
||||
hash_bucket_add(bucket, entry);
|
||||
put_hash_bucket(bucket, flags);
|
||||
|
||||
rc = active_cacheline_insert(entry);
|
||||
rc = active_cacheline_insert(entry, &overlap_cache_clean);
|
||||
if (rc == -ENOMEM) {
|
||||
pr_err_once("cacheline tracking ENOMEM, dma-debug disabled\n");
|
||||
global_disable = true;
|
||||
} else if (rc == -EEXIST && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
|
||||
} else if (rc == -EEXIST &&
|
||||
!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
|
||||
!(entry->is_cache_clean && overlap_cache_clean) &&
|
||||
!(IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) &&
|
||||
is_swiotlb_active(entry->dev))) {
|
||||
err_printk(entry->dev, entry,
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include <linux/virtio_ids.h>
|
||||
#include <linux/virtio_config.h>
|
||||
#include <linux/virtio_vsock.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <net/sock.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <net/af_vsock.h>
|
||||
@@ -54,13 +55,6 @@ struct virtio_vsock {
|
||||
int rx_buf_nr;
|
||||
int rx_buf_max_nr;
|
||||
|
||||
/* The following fields are protected by event_lock.
|
||||
* vqs[VSOCK_VQ_EVENT] must be accessed with event_lock held.
|
||||
*/
|
||||
struct mutex event_lock;
|
||||
bool event_run;
|
||||
struct virtio_vsock_event event_list[8];
|
||||
|
||||
u32 guest_cid;
|
||||
bool seqpacket_allow;
|
||||
|
||||
@@ -74,6 +68,15 @@ struct virtio_vsock {
|
||||
*/
|
||||
struct scatterlist *out_sgs[MAX_SKB_FRAGS + 1];
|
||||
struct scatterlist out_bufs[MAX_SKB_FRAGS + 1];
|
||||
|
||||
/* The following fields are protected by event_lock.
|
||||
* vqs[VSOCK_VQ_EVENT] must be accessed with event_lock held.
|
||||
*/
|
||||
struct mutex event_lock;
|
||||
bool event_run;
|
||||
__dma_from_device_group_begin();
|
||||
struct virtio_vsock_event event_list[8];
|
||||
__dma_from_device_group_end();
|
||||
};
|
||||
|
||||
static u32 virtio_transport_get_local_cid(void)
|
||||
@@ -390,7 +393,7 @@ static int virtio_vsock_event_fill_one(struct virtio_vsock *vsock,
|
||||
|
||||
sg_init_one(&sg, event, sizeof(*event));
|
||||
|
||||
return virtqueue_add_inbuf(vq, &sg, 1, event, GFP_KERNEL);
|
||||
return virtqueue_add_inbuf_cache_clean(vq, &sg, 1, event, GFP_KERNEL);
|
||||
}
|
||||
|
||||
/* event_lock must be held */
|
||||
|
||||
@@ -1102,7 +1102,9 @@ our $declaration_macros = qr{(?x:
|
||||
(?:$Storage\s+)?(?:[A-Z_][A-Z0-9]*_){0,2}(?:DEFINE|DECLARE)(?:_[A-Z0-9]+){1,6}\s*\(|
|
||||
(?:$Storage\s+)?[HLP]?LIST_HEAD\s*\(|
|
||||
(?:SKCIPHER_REQUEST|SHASH_DESC|AHASH_REQUEST)_ON_STACK\s*\(|
|
||||
(?:$Storage\s+)?(?:XA_STATE|XA_STATE_ORDER)\s*\(
|
||||
(?:$Storage\s+)?(?:XA_STATE|XA_STATE_ORDER)\s*\(|
|
||||
__cacheline_group_(?:begin|end)(?:_aligned)?\s*\(|
|
||||
__dma_from_device_group_(?:begin|end)\s*\(
|
||||
)};
|
||||
|
||||
our %allow_repeated_words = (
|
||||
|
||||
Reference in New Issue
Block a user