2
0
mirror of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-09-04 20:19:47 +08:00

Merge branch 'io_uring-6.16' into for-6.17/io_uring

Merge in 6.16 io_uring fixes, to avoid clashes with pending net and
settings changes.

* io_uring-6.16:
  io_uring: gate REQ_F_ISREG on !S_ANON_INODE as well
  io_uring/kbuf: flag partial buffer mappings
  io_uring/net: mark iov as dynamically allocated even for single segments
  io_uring: fix resource leak in io_import_dmabuf()
  io_uring: don't assume uaddr alignment in io_vec_fill_bvec
  io_uring/rsrc: don't rely on user vaddr alignment
  io_uring/rsrc: fix folio unpinning
  io_uring: make fallocate be hashed work
This commit is contained in:
Jens Axboe 2025-07-06 16:42:23 -06:00
commit 1bc8890264
8 changed files with 54 additions and 25 deletions

View File

@ -1706,11 +1706,12 @@ static void io_iopoll_req_issued(struct io_kiocb *req, unsigned int issue_flags)
io_req_flags_t io_file_get_flags(struct file *file) io_req_flags_t io_file_get_flags(struct file *file)
{ {
struct inode *inode = file_inode(file);
io_req_flags_t res = 0; io_req_flags_t res = 0;
BUILD_BUG_ON(REQ_F_ISREG_BIT != REQ_F_SUPPORT_NOWAIT_BIT + 1); BUILD_BUG_ON(REQ_F_ISREG_BIT != REQ_F_SUPPORT_NOWAIT_BIT + 1);
if (S_ISREG(file_inode(file)->i_mode)) if (S_ISREG(inode->i_mode) && !(inode->i_flags & S_ANON_INODE))
res |= REQ_F_ISREG; res |= REQ_F_ISREG;
if ((file->f_flags & O_NONBLOCK) || (file->f_mode & FMODE_NOWAIT)) if ((file->f_flags & O_NONBLOCK) || (file->f_mode & FMODE_NOWAIT))
res |= REQ_F_SUPPORT_NOWAIT; res |= REQ_F_SUPPORT_NOWAIT;

View File

@ -271,6 +271,7 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
if (len > arg->max_len) { if (len > arg->max_len) {
len = arg->max_len; len = arg->max_len;
if (!(bl->flags & IOBL_INC)) { if (!(bl->flags & IOBL_INC)) {
arg->partial_map = 1;
if (iov != arg->iovs) if (iov != arg->iovs)
break; break;
buf->len = len; buf->len = len;

View File

@ -58,7 +58,8 @@ struct buf_sel_arg {
size_t max_len; size_t max_len;
unsigned short nr_iovs; unsigned short nr_iovs;
unsigned short mode; unsigned short mode;
unsigned buf_group; unsigned short buf_group;
unsigned short partial_map;
}; };
void __user *io_buffer_select(struct io_kiocb *req, size_t *len, void __user *io_buffer_select(struct io_kiocb *req, size_t *len,

View File

@ -75,12 +75,17 @@ struct io_sr_msg {
u16 flags; u16 flags;
/* initialised and used only by !msg send variants */ /* initialised and used only by !msg send variants */
u16 buf_group; u16 buf_group;
bool retry; unsigned short retry_flags;
void __user *msg_control; void __user *msg_control;
/* used only for send zerocopy */ /* used only for send zerocopy */
struct io_kiocb *notif; struct io_kiocb *notif;
}; };
enum sr_retry_flags {
IO_SR_MSG_RETRY = 1,
IO_SR_MSG_PARTIAL_MAP = 2,
};
/* /*
* Number of times we'll try and do receives if there's more data. If we * Number of times we'll try and do receives if there's more data. If we
* exceed this limit, then add us to the back of the queue and retry from * exceed this limit, then add us to the back of the queue and retry from
@ -187,7 +192,7 @@ static inline void io_mshot_prep_retry(struct io_kiocb *req,
req->flags &= ~REQ_F_BL_EMPTY; req->flags &= ~REQ_F_BL_EMPTY;
sr->done_io = 0; sr->done_io = 0;
sr->retry = false; sr->retry_flags = 0;
sr->len = 0; /* get from the provided buffer */ sr->len = 0; /* get from the provided buffer */
} }
@ -397,7 +402,7 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
sr->done_io = 0; sr->done_io = 0;
sr->retry = false; sr->retry_flags = 0;
sr->len = READ_ONCE(sqe->len); sr->len = READ_ONCE(sqe->len);
sr->flags = READ_ONCE(sqe->ioprio); sr->flags = READ_ONCE(sqe->ioprio);
if (sr->flags & ~SENDMSG_FLAGS) if (sr->flags & ~SENDMSG_FLAGS)
@ -751,7 +756,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
sr->done_io = 0; sr->done_io = 0;
sr->retry = false; sr->retry_flags = 0;
if (unlikely(sqe->file_index || sqe->addr2)) if (unlikely(sqe->file_index || sqe->addr2))
return -EINVAL; return -EINVAL;
@ -823,7 +828,7 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret), cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret),
issue_flags); issue_flags);
if (sr->retry) if (sr->retry_flags & IO_SR_MSG_RETRY)
cflags = req->cqe.flags | (cflags & CQE_F_MASK); cflags = req->cqe.flags | (cflags & CQE_F_MASK);
/* bundle with no more immediate buffers, we're done */ /* bundle with no more immediate buffers, we're done */
if (req->flags & REQ_F_BL_EMPTY) if (req->flags & REQ_F_BL_EMPTY)
@ -832,12 +837,12 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
* If more is available AND it was a full transfer, retry and * If more is available AND it was a full transfer, retry and
* append to this one * append to this one
*/ */
if (!sr->retry && kmsg->msg.msg_inq > 1 && this_ret > 0 && if (!sr->retry_flags && kmsg->msg.msg_inq > 1 && this_ret > 0 &&
!iov_iter_count(&kmsg->msg.msg_iter)) { !iov_iter_count(&kmsg->msg.msg_iter)) {
req->cqe.flags = cflags & ~CQE_F_MASK; req->cqe.flags = cflags & ~CQE_F_MASK;
sr->len = kmsg->msg.msg_inq; sr->len = kmsg->msg.msg_inq;
sr->done_io += this_ret; sr->done_io += this_ret;
sr->retry = true; sr->retry_flags |= IO_SR_MSG_RETRY;
return false; return false;
} }
} else { } else {
@ -1077,6 +1082,14 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
if (unlikely(ret < 0)) if (unlikely(ret < 0))
return ret; return ret;
if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) {
kmsg->vec.nr = ret;
kmsg->vec.iovec = arg.iovs;
req->flags |= REQ_F_NEED_CLEANUP;
}
if (arg.partial_map)
sr->retry_flags |= IO_SR_MSG_PARTIAL_MAP;
/* special case 1 vec, can be a fast path */ /* special case 1 vec, can be a fast path */
if (ret == 1) { if (ret == 1) {
sr->buf = arg.iovs[0].iov_base; sr->buf = arg.iovs[0].iov_base;
@ -1085,11 +1098,6 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
} }
iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret, iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret,
arg.out_len); arg.out_len);
if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) {
kmsg->vec.nr = ret;
kmsg->vec.iovec = arg.iovs;
req->flags |= REQ_F_NEED_CLEANUP;
}
} else { } else {
void __user *buf; void __user *buf;
@ -1275,7 +1283,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
int ret; int ret;
zc->done_io = 0; zc->done_io = 0;
zc->retry = false; zc->retry_flags = 0;
if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
return -EINVAL; return -EINVAL;

View File

@ -216,6 +216,7 @@ const struct io_issue_def io_issue_defs[] = {
}, },
[IORING_OP_FALLOCATE] = { [IORING_OP_FALLOCATE] = {
.needs_file = 1, .needs_file = 1,
.hash_reg_file = 1,
.prep = io_fallocate_prep, .prep = io_fallocate_prep,
.issue = io_fallocate, .issue = io_fallocate,
}, },

View File

@ -112,8 +112,11 @@ static void io_release_ubuf(void *priv)
struct io_mapped_ubuf *imu = priv; struct io_mapped_ubuf *imu = priv;
unsigned int i; unsigned int i;
for (i = 0; i < imu->nr_bvecs; i++) for (i = 0; i < imu->nr_bvecs; i++) {
unpin_user_page(imu->bvec[i].bv_page); struct folio *folio = page_folio(imu->bvec[i].bv_page);
unpin_user_folio(folio, 1);
}
} }
static struct io_mapped_ubuf *io_alloc_imu(struct io_ring_ctx *ctx, static struct io_mapped_ubuf *io_alloc_imu(struct io_ring_ctx *ctx,
@ -733,6 +736,7 @@ bool io_check_coalesce_buffer(struct page **page_array, int nr_pages,
data->nr_pages_mid = folio_nr_pages(folio); data->nr_pages_mid = folio_nr_pages(folio);
data->folio_shift = folio_shift(folio); data->folio_shift = folio_shift(folio);
data->first_folio_page_idx = folio_page_idx(folio, page_array[0]);
/* /*
* Check if pages are contiguous inside a folio, and all folios have * Check if pages are contiguous inside a folio, and all folios have
@ -826,7 +830,11 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
if (coalesced) if (coalesced)
imu->folio_shift = data.folio_shift; imu->folio_shift = data.folio_shift;
refcount_set(&imu->refs, 1); refcount_set(&imu->refs, 1);
off = (unsigned long) iov->iov_base & ((1UL << imu->folio_shift) - 1);
off = (unsigned long)iov->iov_base & ~PAGE_MASK;
if (coalesced)
off += data.first_folio_page_idx << PAGE_SHIFT;
node->buf = imu; node->buf = imu;
ret = 0; ret = 0;
@ -842,8 +850,10 @@ done:
if (ret) { if (ret) {
if (imu) if (imu)
io_free_imu(ctx, imu); io_free_imu(ctx, imu);
if (pages) if (pages) {
unpin_user_pages(pages, nr_pages); for (i = 0; i < nr_pages; i++)
unpin_user_folio(page_folio(pages[i]), 1);
}
io_cache_free(&ctx->node_cache, node); io_cache_free(&ctx->node_cache, node);
node = ERR_PTR(ret); node = ERR_PTR(ret);
} }
@ -1331,7 +1341,6 @@ static int io_vec_fill_bvec(int ddir, struct iov_iter *iter,
{ {
unsigned long folio_size = 1 << imu->folio_shift; unsigned long folio_size = 1 << imu->folio_shift;
unsigned long folio_mask = folio_size - 1; unsigned long folio_mask = folio_size - 1;
u64 folio_addr = imu->ubuf & ~folio_mask;
struct bio_vec *res_bvec = vec->bvec; struct bio_vec *res_bvec = vec->bvec;
size_t total_len = 0; size_t total_len = 0;
unsigned bvec_idx = 0; unsigned bvec_idx = 0;
@ -1353,8 +1362,13 @@ static int io_vec_fill_bvec(int ddir, struct iov_iter *iter,
if (unlikely(check_add_overflow(total_len, iov_len, &total_len))) if (unlikely(check_add_overflow(total_len, iov_len, &total_len)))
return -EOVERFLOW; return -EOVERFLOW;
/* by using folio address it also accounts for bvec offset */ offset = buf_addr - imu->ubuf;
offset = buf_addr - folio_addr; /*
* Only the first bvec can have non zero bv_offset, account it
* here and work with full folios below.
*/
offset += imu->bvec[0].bv_offset;
src_bvec = imu->bvec + (offset >> imu->folio_shift); src_bvec = imu->bvec + (offset >> imu->folio_shift);
offset &= folio_mask; offset &= folio_mask;

View File

@ -49,6 +49,7 @@ struct io_imu_folio_data {
unsigned int nr_pages_mid; unsigned int nr_pages_mid;
unsigned int folio_shift; unsigned int folio_shift;
unsigned int nr_folios; unsigned int nr_folios;
unsigned long first_folio_page_idx;
}; };
bool io_rsrc_cache_init(struct io_ring_ctx *ctx); bool io_rsrc_cache_init(struct io_ring_ctx *ctx);

View File

@ -106,8 +106,10 @@ static int io_import_dmabuf(struct io_zcrx_ifq *ifq,
for_each_sgtable_dma_sg(mem->sgt, sg, i) for_each_sgtable_dma_sg(mem->sgt, sg, i)
total_size += sg_dma_len(sg); total_size += sg_dma_len(sg);
if (total_size < off + len) if (total_size < off + len) {
return -EINVAL; ret = -EINVAL;
goto err;
}
mem->dmabuf_offset = off; mem->dmabuf_offset = off;
mem->size = len; mem->size = len;