mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-04 20:19:47 +08:00
Merge branch 'io_uring-6.16' into for-6.17/io_uring
Merge in 6.16 io_uring fixes, to avoid clashes with pending net and settings changes. * io_uring-6.16: io_uring: gate REQ_F_ISREG on !S_ANON_INODE as well io_uring/kbuf: flag partial buffer mappings io_uring/net: mark iov as dynamically allocated even for single segments io_uring: fix resource leak in io_import_dmabuf() io_uring: don't assume uaddr alignment in io_vec_fill_bvec io_uring/rsrc: don't rely on user vaddr alignment io_uring/rsrc: fix folio unpinning io_uring: make fallocate be hashed work
This commit is contained in:
commit
1bc8890264
@ -1706,11 +1706,12 @@ static void io_iopoll_req_issued(struct io_kiocb *req, unsigned int issue_flags)
|
||||
|
||||
io_req_flags_t io_file_get_flags(struct file *file)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
io_req_flags_t res = 0;
|
||||
|
||||
BUILD_BUG_ON(REQ_F_ISREG_BIT != REQ_F_SUPPORT_NOWAIT_BIT + 1);
|
||||
|
||||
if (S_ISREG(file_inode(file)->i_mode))
|
||||
if (S_ISREG(inode->i_mode) && !(inode->i_flags & S_ANON_INODE))
|
||||
res |= REQ_F_ISREG;
|
||||
if ((file->f_flags & O_NONBLOCK) || (file->f_mode & FMODE_NOWAIT))
|
||||
res |= REQ_F_SUPPORT_NOWAIT;
|
||||
|
@ -271,6 +271,7 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
|
||||
if (len > arg->max_len) {
|
||||
len = arg->max_len;
|
||||
if (!(bl->flags & IOBL_INC)) {
|
||||
arg->partial_map = 1;
|
||||
if (iov != arg->iovs)
|
||||
break;
|
||||
buf->len = len;
|
||||
|
@ -58,7 +58,8 @@ struct buf_sel_arg {
|
||||
size_t max_len;
|
||||
unsigned short nr_iovs;
|
||||
unsigned short mode;
|
||||
unsigned buf_group;
|
||||
unsigned short buf_group;
|
||||
unsigned short partial_map;
|
||||
};
|
||||
|
||||
void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
|
||||
|
@ -75,12 +75,17 @@ struct io_sr_msg {
|
||||
u16 flags;
|
||||
/* initialised and used only by !msg send variants */
|
||||
u16 buf_group;
|
||||
bool retry;
|
||||
unsigned short retry_flags;
|
||||
void __user *msg_control;
|
||||
/* used only for send zerocopy */
|
||||
struct io_kiocb *notif;
|
||||
};
|
||||
|
||||
enum sr_retry_flags {
|
||||
IO_SR_MSG_RETRY = 1,
|
||||
IO_SR_MSG_PARTIAL_MAP = 2,
|
||||
};
|
||||
|
||||
/*
|
||||
* Number of times we'll try and do receives if there's more data. If we
|
||||
* exceed this limit, then add us to the back of the queue and retry from
|
||||
@ -187,7 +192,7 @@ static inline void io_mshot_prep_retry(struct io_kiocb *req,
|
||||
|
||||
req->flags &= ~REQ_F_BL_EMPTY;
|
||||
sr->done_io = 0;
|
||||
sr->retry = false;
|
||||
sr->retry_flags = 0;
|
||||
sr->len = 0; /* get from the provided buffer */
|
||||
}
|
||||
|
||||
@ -397,7 +402,7 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
|
||||
|
||||
sr->done_io = 0;
|
||||
sr->retry = false;
|
||||
sr->retry_flags = 0;
|
||||
sr->len = READ_ONCE(sqe->len);
|
||||
sr->flags = READ_ONCE(sqe->ioprio);
|
||||
if (sr->flags & ~SENDMSG_FLAGS)
|
||||
@ -751,7 +756,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
|
||||
|
||||
sr->done_io = 0;
|
||||
sr->retry = false;
|
||||
sr->retry_flags = 0;
|
||||
|
||||
if (unlikely(sqe->file_index || sqe->addr2))
|
||||
return -EINVAL;
|
||||
@ -823,7 +828,7 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
|
||||
|
||||
cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret),
|
||||
issue_flags);
|
||||
if (sr->retry)
|
||||
if (sr->retry_flags & IO_SR_MSG_RETRY)
|
||||
cflags = req->cqe.flags | (cflags & CQE_F_MASK);
|
||||
/* bundle with no more immediate buffers, we're done */
|
||||
if (req->flags & REQ_F_BL_EMPTY)
|
||||
@ -832,12 +837,12 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
|
||||
* If more is available AND it was a full transfer, retry and
|
||||
* append to this one
|
||||
*/
|
||||
if (!sr->retry && kmsg->msg.msg_inq > 1 && this_ret > 0 &&
|
||||
if (!sr->retry_flags && kmsg->msg.msg_inq > 1 && this_ret > 0 &&
|
||||
!iov_iter_count(&kmsg->msg.msg_iter)) {
|
||||
req->cqe.flags = cflags & ~CQE_F_MASK;
|
||||
sr->len = kmsg->msg.msg_inq;
|
||||
sr->done_io += this_ret;
|
||||
sr->retry = true;
|
||||
sr->retry_flags |= IO_SR_MSG_RETRY;
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
@ -1077,6 +1082,14 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
|
||||
if (unlikely(ret < 0))
|
||||
return ret;
|
||||
|
||||
if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) {
|
||||
kmsg->vec.nr = ret;
|
||||
kmsg->vec.iovec = arg.iovs;
|
||||
req->flags |= REQ_F_NEED_CLEANUP;
|
||||
}
|
||||
if (arg.partial_map)
|
||||
sr->retry_flags |= IO_SR_MSG_PARTIAL_MAP;
|
||||
|
||||
/* special case 1 vec, can be a fast path */
|
||||
if (ret == 1) {
|
||||
sr->buf = arg.iovs[0].iov_base;
|
||||
@ -1085,11 +1098,6 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
|
||||
}
|
||||
iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret,
|
||||
arg.out_len);
|
||||
if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) {
|
||||
kmsg->vec.nr = ret;
|
||||
kmsg->vec.iovec = arg.iovs;
|
||||
req->flags |= REQ_F_NEED_CLEANUP;
|
||||
}
|
||||
} else {
|
||||
void __user *buf;
|
||||
|
||||
@ -1275,7 +1283,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
int ret;
|
||||
|
||||
zc->done_io = 0;
|
||||
zc->retry = false;
|
||||
zc->retry_flags = 0;
|
||||
|
||||
if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
|
||||
return -EINVAL;
|
||||
|
@ -216,6 +216,7 @@ const struct io_issue_def io_issue_defs[] = {
|
||||
},
|
||||
[IORING_OP_FALLOCATE] = {
|
||||
.needs_file = 1,
|
||||
.hash_reg_file = 1,
|
||||
.prep = io_fallocate_prep,
|
||||
.issue = io_fallocate,
|
||||
},
|
||||
|
@ -112,8 +112,11 @@ static void io_release_ubuf(void *priv)
|
||||
struct io_mapped_ubuf *imu = priv;
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < imu->nr_bvecs; i++)
|
||||
unpin_user_page(imu->bvec[i].bv_page);
|
||||
for (i = 0; i < imu->nr_bvecs; i++) {
|
||||
struct folio *folio = page_folio(imu->bvec[i].bv_page);
|
||||
|
||||
unpin_user_folio(folio, 1);
|
||||
}
|
||||
}
|
||||
|
||||
static struct io_mapped_ubuf *io_alloc_imu(struct io_ring_ctx *ctx,
|
||||
@ -733,6 +736,7 @@ bool io_check_coalesce_buffer(struct page **page_array, int nr_pages,
|
||||
|
||||
data->nr_pages_mid = folio_nr_pages(folio);
|
||||
data->folio_shift = folio_shift(folio);
|
||||
data->first_folio_page_idx = folio_page_idx(folio, page_array[0]);
|
||||
|
||||
/*
|
||||
* Check if pages are contiguous inside a folio, and all folios have
|
||||
@ -826,7 +830,11 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
|
||||
if (coalesced)
|
||||
imu->folio_shift = data.folio_shift;
|
||||
refcount_set(&imu->refs, 1);
|
||||
off = (unsigned long) iov->iov_base & ((1UL << imu->folio_shift) - 1);
|
||||
|
||||
off = (unsigned long)iov->iov_base & ~PAGE_MASK;
|
||||
if (coalesced)
|
||||
off += data.first_folio_page_idx << PAGE_SHIFT;
|
||||
|
||||
node->buf = imu;
|
||||
ret = 0;
|
||||
|
||||
@ -842,8 +850,10 @@ done:
|
||||
if (ret) {
|
||||
if (imu)
|
||||
io_free_imu(ctx, imu);
|
||||
if (pages)
|
||||
unpin_user_pages(pages, nr_pages);
|
||||
if (pages) {
|
||||
for (i = 0; i < nr_pages; i++)
|
||||
unpin_user_folio(page_folio(pages[i]), 1);
|
||||
}
|
||||
io_cache_free(&ctx->node_cache, node);
|
||||
node = ERR_PTR(ret);
|
||||
}
|
||||
@ -1331,7 +1341,6 @@ static int io_vec_fill_bvec(int ddir, struct iov_iter *iter,
|
||||
{
|
||||
unsigned long folio_size = 1 << imu->folio_shift;
|
||||
unsigned long folio_mask = folio_size - 1;
|
||||
u64 folio_addr = imu->ubuf & ~folio_mask;
|
||||
struct bio_vec *res_bvec = vec->bvec;
|
||||
size_t total_len = 0;
|
||||
unsigned bvec_idx = 0;
|
||||
@ -1353,8 +1362,13 @@ static int io_vec_fill_bvec(int ddir, struct iov_iter *iter,
|
||||
if (unlikely(check_add_overflow(total_len, iov_len, &total_len)))
|
||||
return -EOVERFLOW;
|
||||
|
||||
/* by using folio address it also accounts for bvec offset */
|
||||
offset = buf_addr - folio_addr;
|
||||
offset = buf_addr - imu->ubuf;
|
||||
/*
|
||||
* Only the first bvec can have non zero bv_offset, account it
|
||||
* here and work with full folios below.
|
||||
*/
|
||||
offset += imu->bvec[0].bv_offset;
|
||||
|
||||
src_bvec = imu->bvec + (offset >> imu->folio_shift);
|
||||
offset &= folio_mask;
|
||||
|
||||
|
@ -49,6 +49,7 @@ struct io_imu_folio_data {
|
||||
unsigned int nr_pages_mid;
|
||||
unsigned int folio_shift;
|
||||
unsigned int nr_folios;
|
||||
unsigned long first_folio_page_idx;
|
||||
};
|
||||
|
||||
bool io_rsrc_cache_init(struct io_ring_ctx *ctx);
|
||||
|
@ -106,8 +106,10 @@ static int io_import_dmabuf(struct io_zcrx_ifq *ifq,
|
||||
for_each_sgtable_dma_sg(mem->sgt, sg, i)
|
||||
total_size += sg_dma_len(sg);
|
||||
|
||||
if (total_size < off + len)
|
||||
return -EINVAL;
|
||||
if (total_size < off + len) {
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
mem->dmabuf_offset = off;
|
||||
mem->size = len;
|
||||
|
Loading…
Reference in New Issue
Block a user