mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-04 20:19:47 +08:00
Merge branch 'io_uring-6.16' into for-6.17/io_uring
Merge in 6.16 io_uring fixes, to avoid clashes with pending net and settings changes. * io_uring-6.16: io_uring: gate REQ_F_ISREG on !S_ANON_INODE as well io_uring/kbuf: flag partial buffer mappings io_uring/net: mark iov as dynamically allocated even for single segments io_uring: fix resource leak in io_import_dmabuf() io_uring: don't assume uaddr alignment in io_vec_fill_bvec io_uring/rsrc: don't rely on user vaddr alignment io_uring/rsrc: fix folio unpinning io_uring: make fallocate be hashed work
This commit is contained in:
commit
1bc8890264
@ -1706,11 +1706,12 @@ static void io_iopoll_req_issued(struct io_kiocb *req, unsigned int issue_flags)
|
|||||||
|
|
||||||
io_req_flags_t io_file_get_flags(struct file *file)
|
io_req_flags_t io_file_get_flags(struct file *file)
|
||||||
{
|
{
|
||||||
|
struct inode *inode = file_inode(file);
|
||||||
io_req_flags_t res = 0;
|
io_req_flags_t res = 0;
|
||||||
|
|
||||||
BUILD_BUG_ON(REQ_F_ISREG_BIT != REQ_F_SUPPORT_NOWAIT_BIT + 1);
|
BUILD_BUG_ON(REQ_F_ISREG_BIT != REQ_F_SUPPORT_NOWAIT_BIT + 1);
|
||||||
|
|
||||||
if (S_ISREG(file_inode(file)->i_mode))
|
if (S_ISREG(inode->i_mode) && !(inode->i_flags & S_ANON_INODE))
|
||||||
res |= REQ_F_ISREG;
|
res |= REQ_F_ISREG;
|
||||||
if ((file->f_flags & O_NONBLOCK) || (file->f_mode & FMODE_NOWAIT))
|
if ((file->f_flags & O_NONBLOCK) || (file->f_mode & FMODE_NOWAIT))
|
||||||
res |= REQ_F_SUPPORT_NOWAIT;
|
res |= REQ_F_SUPPORT_NOWAIT;
|
||||||
|
@ -271,6 +271,7 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
|
|||||||
if (len > arg->max_len) {
|
if (len > arg->max_len) {
|
||||||
len = arg->max_len;
|
len = arg->max_len;
|
||||||
if (!(bl->flags & IOBL_INC)) {
|
if (!(bl->flags & IOBL_INC)) {
|
||||||
|
arg->partial_map = 1;
|
||||||
if (iov != arg->iovs)
|
if (iov != arg->iovs)
|
||||||
break;
|
break;
|
||||||
buf->len = len;
|
buf->len = len;
|
||||||
|
@ -58,7 +58,8 @@ struct buf_sel_arg {
|
|||||||
size_t max_len;
|
size_t max_len;
|
||||||
unsigned short nr_iovs;
|
unsigned short nr_iovs;
|
||||||
unsigned short mode;
|
unsigned short mode;
|
||||||
unsigned buf_group;
|
unsigned short buf_group;
|
||||||
|
unsigned short partial_map;
|
||||||
};
|
};
|
||||||
|
|
||||||
void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
|
void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
|
||||||
|
@ -75,12 +75,17 @@ struct io_sr_msg {
|
|||||||
u16 flags;
|
u16 flags;
|
||||||
/* initialised and used only by !msg send variants */
|
/* initialised and used only by !msg send variants */
|
||||||
u16 buf_group;
|
u16 buf_group;
|
||||||
bool retry;
|
unsigned short retry_flags;
|
||||||
void __user *msg_control;
|
void __user *msg_control;
|
||||||
/* used only for send zerocopy */
|
/* used only for send zerocopy */
|
||||||
struct io_kiocb *notif;
|
struct io_kiocb *notif;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum sr_retry_flags {
|
||||||
|
IO_SR_MSG_RETRY = 1,
|
||||||
|
IO_SR_MSG_PARTIAL_MAP = 2,
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Number of times we'll try and do receives if there's more data. If we
|
* Number of times we'll try and do receives if there's more data. If we
|
||||||
* exceed this limit, then add us to the back of the queue and retry from
|
* exceed this limit, then add us to the back of the queue and retry from
|
||||||
@ -187,7 +192,7 @@ static inline void io_mshot_prep_retry(struct io_kiocb *req,
|
|||||||
|
|
||||||
req->flags &= ~REQ_F_BL_EMPTY;
|
req->flags &= ~REQ_F_BL_EMPTY;
|
||||||
sr->done_io = 0;
|
sr->done_io = 0;
|
||||||
sr->retry = false;
|
sr->retry_flags = 0;
|
||||||
sr->len = 0; /* get from the provided buffer */
|
sr->len = 0; /* get from the provided buffer */
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -397,7 +402,7 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||||||
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
|
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
|
||||||
|
|
||||||
sr->done_io = 0;
|
sr->done_io = 0;
|
||||||
sr->retry = false;
|
sr->retry_flags = 0;
|
||||||
sr->len = READ_ONCE(sqe->len);
|
sr->len = READ_ONCE(sqe->len);
|
||||||
sr->flags = READ_ONCE(sqe->ioprio);
|
sr->flags = READ_ONCE(sqe->ioprio);
|
||||||
if (sr->flags & ~SENDMSG_FLAGS)
|
if (sr->flags & ~SENDMSG_FLAGS)
|
||||||
@ -751,7 +756,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||||||
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
|
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
|
||||||
|
|
||||||
sr->done_io = 0;
|
sr->done_io = 0;
|
||||||
sr->retry = false;
|
sr->retry_flags = 0;
|
||||||
|
|
||||||
if (unlikely(sqe->file_index || sqe->addr2))
|
if (unlikely(sqe->file_index || sqe->addr2))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
@ -823,7 +828,7 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
|
|||||||
|
|
||||||
cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret),
|
cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret),
|
||||||
issue_flags);
|
issue_flags);
|
||||||
if (sr->retry)
|
if (sr->retry_flags & IO_SR_MSG_RETRY)
|
||||||
cflags = req->cqe.flags | (cflags & CQE_F_MASK);
|
cflags = req->cqe.flags | (cflags & CQE_F_MASK);
|
||||||
/* bundle with no more immediate buffers, we're done */
|
/* bundle with no more immediate buffers, we're done */
|
||||||
if (req->flags & REQ_F_BL_EMPTY)
|
if (req->flags & REQ_F_BL_EMPTY)
|
||||||
@ -832,12 +837,12 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
|
|||||||
* If more is available AND it was a full transfer, retry and
|
* If more is available AND it was a full transfer, retry and
|
||||||
* append to this one
|
* append to this one
|
||||||
*/
|
*/
|
||||||
if (!sr->retry && kmsg->msg.msg_inq > 1 && this_ret > 0 &&
|
if (!sr->retry_flags && kmsg->msg.msg_inq > 1 && this_ret > 0 &&
|
||||||
!iov_iter_count(&kmsg->msg.msg_iter)) {
|
!iov_iter_count(&kmsg->msg.msg_iter)) {
|
||||||
req->cqe.flags = cflags & ~CQE_F_MASK;
|
req->cqe.flags = cflags & ~CQE_F_MASK;
|
||||||
sr->len = kmsg->msg.msg_inq;
|
sr->len = kmsg->msg.msg_inq;
|
||||||
sr->done_io += this_ret;
|
sr->done_io += this_ret;
|
||||||
sr->retry = true;
|
sr->retry_flags |= IO_SR_MSG_RETRY;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -1077,6 +1082,14 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
|
|||||||
if (unlikely(ret < 0))
|
if (unlikely(ret < 0))
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
|
if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) {
|
||||||
|
kmsg->vec.nr = ret;
|
||||||
|
kmsg->vec.iovec = arg.iovs;
|
||||||
|
req->flags |= REQ_F_NEED_CLEANUP;
|
||||||
|
}
|
||||||
|
if (arg.partial_map)
|
||||||
|
sr->retry_flags |= IO_SR_MSG_PARTIAL_MAP;
|
||||||
|
|
||||||
/* special case 1 vec, can be a fast path */
|
/* special case 1 vec, can be a fast path */
|
||||||
if (ret == 1) {
|
if (ret == 1) {
|
||||||
sr->buf = arg.iovs[0].iov_base;
|
sr->buf = arg.iovs[0].iov_base;
|
||||||
@ -1085,11 +1098,6 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
|
|||||||
}
|
}
|
||||||
iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret,
|
iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret,
|
||||||
arg.out_len);
|
arg.out_len);
|
||||||
if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) {
|
|
||||||
kmsg->vec.nr = ret;
|
|
||||||
kmsg->vec.iovec = arg.iovs;
|
|
||||||
req->flags |= REQ_F_NEED_CLEANUP;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
void __user *buf;
|
void __user *buf;
|
||||||
|
|
||||||
@ -1275,7 +1283,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
zc->done_io = 0;
|
zc->done_io = 0;
|
||||||
zc->retry = false;
|
zc->retry_flags = 0;
|
||||||
|
|
||||||
if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
|
if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
@ -216,6 +216,7 @@ const struct io_issue_def io_issue_defs[] = {
|
|||||||
},
|
},
|
||||||
[IORING_OP_FALLOCATE] = {
|
[IORING_OP_FALLOCATE] = {
|
||||||
.needs_file = 1,
|
.needs_file = 1,
|
||||||
|
.hash_reg_file = 1,
|
||||||
.prep = io_fallocate_prep,
|
.prep = io_fallocate_prep,
|
||||||
.issue = io_fallocate,
|
.issue = io_fallocate,
|
||||||
},
|
},
|
||||||
|
@ -112,8 +112,11 @@ static void io_release_ubuf(void *priv)
|
|||||||
struct io_mapped_ubuf *imu = priv;
|
struct io_mapped_ubuf *imu = priv;
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
|
|
||||||
for (i = 0; i < imu->nr_bvecs; i++)
|
for (i = 0; i < imu->nr_bvecs; i++) {
|
||||||
unpin_user_page(imu->bvec[i].bv_page);
|
struct folio *folio = page_folio(imu->bvec[i].bv_page);
|
||||||
|
|
||||||
|
unpin_user_folio(folio, 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct io_mapped_ubuf *io_alloc_imu(struct io_ring_ctx *ctx,
|
static struct io_mapped_ubuf *io_alloc_imu(struct io_ring_ctx *ctx,
|
||||||
@ -733,6 +736,7 @@ bool io_check_coalesce_buffer(struct page **page_array, int nr_pages,
|
|||||||
|
|
||||||
data->nr_pages_mid = folio_nr_pages(folio);
|
data->nr_pages_mid = folio_nr_pages(folio);
|
||||||
data->folio_shift = folio_shift(folio);
|
data->folio_shift = folio_shift(folio);
|
||||||
|
data->first_folio_page_idx = folio_page_idx(folio, page_array[0]);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if pages are contiguous inside a folio, and all folios have
|
* Check if pages are contiguous inside a folio, and all folios have
|
||||||
@ -826,7 +830,11 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
|
|||||||
if (coalesced)
|
if (coalesced)
|
||||||
imu->folio_shift = data.folio_shift;
|
imu->folio_shift = data.folio_shift;
|
||||||
refcount_set(&imu->refs, 1);
|
refcount_set(&imu->refs, 1);
|
||||||
off = (unsigned long) iov->iov_base & ((1UL << imu->folio_shift) - 1);
|
|
||||||
|
off = (unsigned long)iov->iov_base & ~PAGE_MASK;
|
||||||
|
if (coalesced)
|
||||||
|
off += data.first_folio_page_idx << PAGE_SHIFT;
|
||||||
|
|
||||||
node->buf = imu;
|
node->buf = imu;
|
||||||
ret = 0;
|
ret = 0;
|
||||||
|
|
||||||
@ -842,8 +850,10 @@ done:
|
|||||||
if (ret) {
|
if (ret) {
|
||||||
if (imu)
|
if (imu)
|
||||||
io_free_imu(ctx, imu);
|
io_free_imu(ctx, imu);
|
||||||
if (pages)
|
if (pages) {
|
||||||
unpin_user_pages(pages, nr_pages);
|
for (i = 0; i < nr_pages; i++)
|
||||||
|
unpin_user_folio(page_folio(pages[i]), 1);
|
||||||
|
}
|
||||||
io_cache_free(&ctx->node_cache, node);
|
io_cache_free(&ctx->node_cache, node);
|
||||||
node = ERR_PTR(ret);
|
node = ERR_PTR(ret);
|
||||||
}
|
}
|
||||||
@ -1331,7 +1341,6 @@ static int io_vec_fill_bvec(int ddir, struct iov_iter *iter,
|
|||||||
{
|
{
|
||||||
unsigned long folio_size = 1 << imu->folio_shift;
|
unsigned long folio_size = 1 << imu->folio_shift;
|
||||||
unsigned long folio_mask = folio_size - 1;
|
unsigned long folio_mask = folio_size - 1;
|
||||||
u64 folio_addr = imu->ubuf & ~folio_mask;
|
|
||||||
struct bio_vec *res_bvec = vec->bvec;
|
struct bio_vec *res_bvec = vec->bvec;
|
||||||
size_t total_len = 0;
|
size_t total_len = 0;
|
||||||
unsigned bvec_idx = 0;
|
unsigned bvec_idx = 0;
|
||||||
@ -1353,8 +1362,13 @@ static int io_vec_fill_bvec(int ddir, struct iov_iter *iter,
|
|||||||
if (unlikely(check_add_overflow(total_len, iov_len, &total_len)))
|
if (unlikely(check_add_overflow(total_len, iov_len, &total_len)))
|
||||||
return -EOVERFLOW;
|
return -EOVERFLOW;
|
||||||
|
|
||||||
/* by using folio address it also accounts for bvec offset */
|
offset = buf_addr - imu->ubuf;
|
||||||
offset = buf_addr - folio_addr;
|
/*
|
||||||
|
* Only the first bvec can have non zero bv_offset, account it
|
||||||
|
* here and work with full folios below.
|
||||||
|
*/
|
||||||
|
offset += imu->bvec[0].bv_offset;
|
||||||
|
|
||||||
src_bvec = imu->bvec + (offset >> imu->folio_shift);
|
src_bvec = imu->bvec + (offset >> imu->folio_shift);
|
||||||
offset &= folio_mask;
|
offset &= folio_mask;
|
||||||
|
|
||||||
|
@ -49,6 +49,7 @@ struct io_imu_folio_data {
|
|||||||
unsigned int nr_pages_mid;
|
unsigned int nr_pages_mid;
|
||||||
unsigned int folio_shift;
|
unsigned int folio_shift;
|
||||||
unsigned int nr_folios;
|
unsigned int nr_folios;
|
||||||
|
unsigned long first_folio_page_idx;
|
||||||
};
|
};
|
||||||
|
|
||||||
bool io_rsrc_cache_init(struct io_ring_ctx *ctx);
|
bool io_rsrc_cache_init(struct io_ring_ctx *ctx);
|
||||||
|
@ -106,8 +106,10 @@ static int io_import_dmabuf(struct io_zcrx_ifq *ifq,
|
|||||||
for_each_sgtable_dma_sg(mem->sgt, sg, i)
|
for_each_sgtable_dma_sg(mem->sgt, sg, i)
|
||||||
total_size += sg_dma_len(sg);
|
total_size += sg_dma_len(sg);
|
||||||
|
|
||||||
if (total_size < off + len)
|
if (total_size < off + len) {
|
||||||
return -EINVAL;
|
ret = -EINVAL;
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
mem->dmabuf_offset = off;
|
mem->dmabuf_offset = off;
|
||||||
mem->size = len;
|
mem->size = len;
|
||||||
|
Loading…
Reference in New Issue
Block a user