Merge tag 'for-7.0/io_uring-zcrx-large-buffers-20260206' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux

Pull io_uring large rx buffer support from Jens Axboe:
 "Now that the networking updates are upstream, here's the support for
  large buffers for zcrx.

  Using larger (bigger than 4K) rx buffers can increase the effiency of
  zcrx. For example, it's been shown that using 32K buffers can decrease
  CPU usage by ~30% compared to 4K buffers"

* tag 'for-7.0/io_uring-zcrx-large-buffers-20260206' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux:
  io_uring/zcrx: implement large rx buffer support
This commit is contained in:
Linus Torvalds
2026-02-12 15:07:50 -08:00
2 changed files with 34 additions and 6 deletions

View File

@@ -1104,7 +1104,7 @@ struct io_uring_zcrx_ifq_reg {
struct io_uring_zcrx_offsets offsets;
__u32 zcrx_id;
__u32 __resv2;
__u32 rx_buf_len;
__u64 __resv[3];
};

View File

@@ -55,6 +55,18 @@ static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
return area->mem.pages[net_iov_idx(niov) << niov_pages_shift];
}
static int io_area_max_shift(struct io_zcrx_mem *mem)
{
struct sg_table *sgt = mem->sgt;
struct scatterlist *sg;
unsigned shift = -1U;
unsigned i;
for_each_sgtable_dma_sg(sgt, sg, i)
shift = min(shift, __ffs(sg->length));
return shift;
}
static int io_populate_area_dma(struct io_zcrx_ifq *ifq,
struct io_zcrx_area *area)
{
@@ -417,12 +429,21 @@ static int io_zcrx_append_area(struct io_zcrx_ifq *ifq,
}
static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
struct io_uring_zcrx_area_reg *area_reg)
struct io_uring_zcrx_area_reg *area_reg,
struct io_uring_zcrx_ifq_reg *reg)
{
int buf_size_shift = PAGE_SHIFT;
struct io_zcrx_area *area;
unsigned nr_iovs;
int i, ret;
if (reg->rx_buf_len) {
if (!is_power_of_2(reg->rx_buf_len) ||
reg->rx_buf_len < PAGE_SIZE)
return -EINVAL;
buf_size_shift = ilog2(reg->rx_buf_len);
}
ret = -ENOMEM;
area = kzalloc(sizeof(*area), GFP_KERNEL);
if (!area)
@@ -433,7 +454,12 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
if (ret)
goto err;
ifq->niov_shift = PAGE_SHIFT;
if (buf_size_shift > io_area_max_shift(&area->mem)) {
ret = -ERANGE;
goto err;
}
ifq->niov_shift = buf_size_shift;
nr_iovs = area->mem.size >> ifq->niov_shift;
area->nia.num_niovs = nr_iovs;
@@ -743,8 +769,7 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
return -EINVAL;
if (copy_from_user(&reg, arg, sizeof(reg)))
return -EFAULT;
if (!mem_is_zero(&reg.__resv, sizeof(reg.__resv)) ||
reg.__resv2 || reg.zcrx_id)
if (!mem_is_zero(&reg.__resv, sizeof(reg.__resv)) || reg.zcrx_id)
return -EINVAL;
if (reg.flags & ZCRX_REG_IMPORT)
return import_zcrx(ctx, arg, &reg);
@@ -801,10 +826,11 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
}
get_device(ifq->dev);
ret = io_zcrx_create_area(ifq, &area);
ret = io_zcrx_create_area(ifq, &area, &reg);
if (ret)
goto netdev_put_unlock;
mp_param.rx_page_size = 1U << ifq->niov_shift;
mp_param.mp_ops = &io_uring_pp_zc_ops;
mp_param.mp_priv = ifq;
ret = __net_mp_open_rxq(ifq->netdev, reg.if_rxq, &mp_param, NULL);
@@ -822,6 +848,8 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
goto err;
}
reg.rx_buf_len = 1U << ifq->niov_shift;
if (copy_to_user(arg, &reg, sizeof(reg)) ||
copy_to_user(u64_to_user_ptr(reg.region_ptr), &rd, sizeof(rd)) ||
copy_to_user(u64_to_user_ptr(reg.area_ptr), &area, sizeof(area))) {