mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-03-22 07:27:12 +08:00
There are two filename-related problems in io_uring and its interplay with audit. Filenames are imported when request is submitted and used when it is processed. Unfortunately, the latter may very well happen in a different thread. In that case the reference to filename is put into the wrong audit_context - that of submitting thread, not the processing one. Audit logics is called by the latter, and it really wants to be able to find the names in audit_context current (== processing) thread. Another related problem is the headache with refcounts - normally all references to given struct filename are visible only to one thread (the one that uses that struct filename). io_uring violates that - an extra reference is stashed in audit_context of submitter. It gets dropped when submitter returns to userland, which can happen simultaneously with processing thread deciding to drop the reference it got. We paper over that by making refcount atomic, but that means pointless headache for everyone. Solution: the notion of partially imported filenames. Namely, already copied from userland, but *not* exposed to audit yet. io_uring can create that in submitter thread, and complete the import (obtaining the usual reference to struct filename) in processing thread. Object: struct delayed_filename. Primitives for working with it: delayed_getname(&delayed_filename, user_string) - copies the name from userland, returning 0 and stashing the address of (still incomplete) struct filename in delayed_filename on success and returning -E... on error. delayed_getname_uflags(&delayed_filename, user_string, atflags) - similar, in the same relation to delayed_getname() as getname_uflags() is to getname() complete_getname(&delayed_filename) - completes the import of filename stashed in delayed_filename and returns struct filename to caller, emptying delayed_filename. CLASS(filename_complete_delayed, name)(&delayed_filename) - variant of CLASS(filename) with complete_getname() for constructor. dismiss_delayed_filename(&delayed_filename) - destructor; drops whatever might be stashed in delayed_filename, emptying it. putname_to_delayed(&delayed_filename, name) - if name is shared, stashes its copy into delayed_filename and drops the reference to name, otherwise stashes the name itself in there. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
437 lines
10 KiB
C
437 lines
10 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include <linux/kernel.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/file.h>
|
|
#include <linux/fdtable.h>
|
|
#include <linux/fsnotify.h>
|
|
#include <linux/namei.h>
|
|
#include <linux/pipe_fs_i.h>
|
|
#include <linux/watch_queue.h>
|
|
#include <linux/io_uring.h>
|
|
|
|
#include <uapi/linux/io_uring.h>
|
|
|
|
#include "../fs/internal.h"
|
|
|
|
#include "filetable.h"
|
|
#include "io_uring.h"
|
|
#include "rsrc.h"
|
|
#include "openclose.h"
|
|
|
|
struct io_open {
|
|
struct file *file;
|
|
int dfd;
|
|
u32 file_slot;
|
|
struct delayed_filename filename;
|
|
struct open_how how;
|
|
unsigned long nofile;
|
|
};
|
|
|
|
struct io_close {
|
|
struct file *file;
|
|
int fd;
|
|
u32 file_slot;
|
|
};
|
|
|
|
struct io_fixed_install {
|
|
struct file *file;
|
|
unsigned int o_flags;
|
|
};
|
|
|
|
static bool io_openat_force_async(struct io_open *open)
|
|
{
|
|
/*
|
|
* Don't bother trying for O_TRUNC, O_CREAT, or O_TMPFILE open,
|
|
* it'll always -EAGAIN. Note that we test for __O_TMPFILE because
|
|
* O_TMPFILE includes O_DIRECTORY, which isn't a flag we need to force
|
|
* async for.
|
|
*/
|
|
return open->how.flags & (O_TRUNC | O_CREAT | __O_TMPFILE);
|
|
}
|
|
|
|
static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|
{
|
|
struct io_open *open = io_kiocb_to_cmd(req, struct io_open);
|
|
const char __user *fname;
|
|
int ret;
|
|
|
|
if (unlikely(sqe->buf_index))
|
|
return -EINVAL;
|
|
if (unlikely(req->flags & REQ_F_FIXED_FILE))
|
|
return -EBADF;
|
|
|
|
/* open.how should be already initialised */
|
|
if (!(open->how.flags & O_PATH) && force_o_largefile())
|
|
open->how.flags |= O_LARGEFILE;
|
|
|
|
open->dfd = READ_ONCE(sqe->fd);
|
|
fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
|
|
ret = delayed_getname(&open->filename, fname);
|
|
if (unlikely(ret))
|
|
return ret;
|
|
req->flags |= REQ_F_NEED_CLEANUP;
|
|
|
|
open->file_slot = READ_ONCE(sqe->file_index);
|
|
if (open->file_slot && (open->how.flags & O_CLOEXEC))
|
|
return -EINVAL;
|
|
|
|
open->nofile = rlimit(RLIMIT_NOFILE);
|
|
if (io_openat_force_async(open))
|
|
req->flags |= REQ_F_FORCE_ASYNC;
|
|
return 0;
|
|
}
|
|
|
|
int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|
{
|
|
struct io_open *open = io_kiocb_to_cmd(req, struct io_open);
|
|
u64 mode = READ_ONCE(sqe->len);
|
|
u64 flags = READ_ONCE(sqe->open_flags);
|
|
|
|
open->how = build_open_how(flags, mode);
|
|
return __io_openat_prep(req, sqe);
|
|
}
|
|
|
|
int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|
{
|
|
struct io_open *open = io_kiocb_to_cmd(req, struct io_open);
|
|
struct open_how __user *how;
|
|
size_t len;
|
|
int ret;
|
|
|
|
how = u64_to_user_ptr(READ_ONCE(sqe->addr2));
|
|
len = READ_ONCE(sqe->len);
|
|
if (len < OPEN_HOW_SIZE_VER0)
|
|
return -EINVAL;
|
|
|
|
ret = copy_struct_from_user(&open->how, sizeof(open->how), how, len);
|
|
if (ret)
|
|
return ret;
|
|
|
|
return __io_openat_prep(req, sqe);
|
|
}
|
|
|
|
int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
|
|
{
|
|
struct io_open *open = io_kiocb_to_cmd(req, struct io_open);
|
|
struct open_flags op;
|
|
struct file *file;
|
|
bool resolve_nonblock, nonblock_set;
|
|
bool fixed = !!open->file_slot;
|
|
CLASS(filename_complete_delayed, name)(&open->filename);
|
|
int ret;
|
|
|
|
ret = build_open_flags(&open->how, &op);
|
|
if (ret)
|
|
goto err;
|
|
nonblock_set = op.open_flag & O_NONBLOCK;
|
|
resolve_nonblock = open->how.resolve & RESOLVE_CACHED;
|
|
if (issue_flags & IO_URING_F_NONBLOCK) {
|
|
WARN_ON_ONCE(io_openat_force_async(open));
|
|
op.lookup_flags |= LOOKUP_CACHED;
|
|
op.open_flag |= O_NONBLOCK;
|
|
}
|
|
|
|
if (!fixed) {
|
|
ret = __get_unused_fd_flags(open->how.flags, open->nofile);
|
|
if (ret < 0)
|
|
goto err;
|
|
}
|
|
|
|
file = do_filp_open(open->dfd, name, &op);
|
|
if (IS_ERR(file)) {
|
|
/*
|
|
* We could hang on to this 'fd' on retrying, but seems like
|
|
* marginal gain for something that is now known to be a slower
|
|
* path. So just put it, and we'll get a new one when we retry.
|
|
*/
|
|
if (!fixed)
|
|
put_unused_fd(ret);
|
|
|
|
ret = PTR_ERR(file);
|
|
/* only retry if RESOLVE_CACHED wasn't already set by application */
|
|
if (ret == -EAGAIN && !resolve_nonblock &&
|
|
(issue_flags & IO_URING_F_NONBLOCK)) {
|
|
ret = putname_to_delayed(&open->filename,
|
|
no_free_ptr(name));
|
|
if (likely(!ret))
|
|
return -EAGAIN;
|
|
}
|
|
goto err;
|
|
}
|
|
|
|
if ((issue_flags & IO_URING_F_NONBLOCK) && !nonblock_set)
|
|
file->f_flags &= ~O_NONBLOCK;
|
|
|
|
if (!fixed)
|
|
fd_install(ret, file);
|
|
else
|
|
ret = io_fixed_fd_install(req, issue_flags, file,
|
|
open->file_slot);
|
|
err:
|
|
req->flags &= ~REQ_F_NEED_CLEANUP;
|
|
if (ret < 0)
|
|
req_set_fail(req);
|
|
io_req_set_res(req, ret, 0);
|
|
return IOU_COMPLETE;
|
|
}
|
|
|
|
int io_openat(struct io_kiocb *req, unsigned int issue_flags)
|
|
{
|
|
return io_openat2(req, issue_flags);
|
|
}
|
|
|
|
void io_open_cleanup(struct io_kiocb *req)
|
|
{
|
|
struct io_open *open = io_kiocb_to_cmd(req, struct io_open);
|
|
|
|
dismiss_delayed_filename(&open->filename);
|
|
}
|
|
|
|
int __io_close_fixed(struct io_ring_ctx *ctx, unsigned int issue_flags,
|
|
unsigned int offset)
|
|
{
|
|
int ret;
|
|
|
|
io_ring_submit_lock(ctx, issue_flags);
|
|
ret = io_fixed_fd_remove(ctx, offset);
|
|
io_ring_submit_unlock(ctx, issue_flags);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static inline int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags)
|
|
{
|
|
struct io_close *close = io_kiocb_to_cmd(req, struct io_close);
|
|
|
|
return __io_close_fixed(req->ctx, issue_flags, close->file_slot - 1);
|
|
}
|
|
|
|
int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|
{
|
|
struct io_close *close = io_kiocb_to_cmd(req, struct io_close);
|
|
|
|
if (sqe->off || sqe->addr || sqe->len || sqe->rw_flags || sqe->buf_index)
|
|
return -EINVAL;
|
|
if (req->flags & REQ_F_FIXED_FILE)
|
|
return -EBADF;
|
|
|
|
close->fd = READ_ONCE(sqe->fd);
|
|
close->file_slot = READ_ONCE(sqe->file_index);
|
|
if (close->file_slot && close->fd)
|
|
return -EINVAL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int io_close(struct io_kiocb *req, unsigned int issue_flags)
|
|
{
|
|
struct files_struct *files = current->files;
|
|
struct io_close *close = io_kiocb_to_cmd(req, struct io_close);
|
|
struct file *file;
|
|
int ret = -EBADF;
|
|
|
|
if (close->file_slot) {
|
|
ret = io_close_fixed(req, issue_flags);
|
|
goto err;
|
|
}
|
|
|
|
spin_lock(&files->file_lock);
|
|
file = files_lookup_fd_locked(files, close->fd);
|
|
if (!file || io_is_uring_fops(file)) {
|
|
spin_unlock(&files->file_lock);
|
|
goto err;
|
|
}
|
|
|
|
/* if the file has a flush method, be safe and punt to async */
|
|
if (file->f_op->flush && (issue_flags & IO_URING_F_NONBLOCK)) {
|
|
spin_unlock(&files->file_lock);
|
|
return -EAGAIN;
|
|
}
|
|
|
|
file = file_close_fd_locked(files, close->fd);
|
|
spin_unlock(&files->file_lock);
|
|
if (!file)
|
|
goto err;
|
|
|
|
/* No ->flush() or already async, safely close from here */
|
|
ret = filp_close(file, current->files);
|
|
err:
|
|
if (ret < 0)
|
|
req_set_fail(req);
|
|
io_req_set_res(req, ret, 0);
|
|
return IOU_COMPLETE;
|
|
}
|
|
|
|
int io_install_fixed_fd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|
{
|
|
struct io_fixed_install *ifi;
|
|
unsigned int flags;
|
|
|
|
if (sqe->off || sqe->addr || sqe->len || sqe->buf_index ||
|
|
sqe->splice_fd_in || sqe->addr3)
|
|
return -EINVAL;
|
|
|
|
/* must be a fixed file */
|
|
if (!(req->flags & REQ_F_FIXED_FILE))
|
|
return -EBADF;
|
|
|
|
flags = READ_ONCE(sqe->install_fd_flags);
|
|
if (flags & ~IORING_FIXED_FD_NO_CLOEXEC)
|
|
return -EINVAL;
|
|
|
|
/* ensure the task's creds are used when installing/receiving fds */
|
|
if (req->flags & REQ_F_CREDS)
|
|
return -EPERM;
|
|
|
|
/* default to O_CLOEXEC, disable if IORING_FIXED_FD_NO_CLOEXEC is set */
|
|
ifi = io_kiocb_to_cmd(req, struct io_fixed_install);
|
|
ifi->o_flags = O_CLOEXEC;
|
|
if (flags & IORING_FIXED_FD_NO_CLOEXEC)
|
|
ifi->o_flags = 0;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int io_install_fixed_fd(struct io_kiocb *req, unsigned int issue_flags)
|
|
{
|
|
struct io_fixed_install *ifi;
|
|
int ret;
|
|
|
|
ifi = io_kiocb_to_cmd(req, struct io_fixed_install);
|
|
ret = receive_fd(req->file, NULL, ifi->o_flags);
|
|
if (ret < 0)
|
|
req_set_fail(req);
|
|
io_req_set_res(req, ret, 0);
|
|
return IOU_COMPLETE;
|
|
}
|
|
|
|
struct io_pipe {
|
|
struct file *file;
|
|
int __user *fds;
|
|
int flags;
|
|
int file_slot;
|
|
unsigned long nofile;
|
|
};
|
|
|
|
int io_pipe_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|
{
|
|
struct io_pipe *p = io_kiocb_to_cmd(req, struct io_pipe);
|
|
|
|
if (sqe->fd || sqe->off || sqe->addr3)
|
|
return -EINVAL;
|
|
|
|
p->fds = u64_to_user_ptr(READ_ONCE(sqe->addr));
|
|
p->flags = READ_ONCE(sqe->pipe_flags);
|
|
if (p->flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT | O_NOTIFICATION_PIPE))
|
|
return -EINVAL;
|
|
|
|
p->file_slot = READ_ONCE(sqe->file_index);
|
|
p->nofile = rlimit(RLIMIT_NOFILE);
|
|
return 0;
|
|
}
|
|
|
|
static int io_pipe_fixed(struct io_kiocb *req, struct file **files,
|
|
unsigned int issue_flags)
|
|
{
|
|
struct io_pipe *p = io_kiocb_to_cmd(req, struct io_pipe);
|
|
struct io_ring_ctx *ctx = req->ctx;
|
|
int ret, fds[2] = { -1, -1 };
|
|
int slot = p->file_slot;
|
|
|
|
if (p->flags & O_CLOEXEC)
|
|
return -EINVAL;
|
|
|
|
io_ring_submit_lock(ctx, issue_flags);
|
|
|
|
ret = __io_fixed_fd_install(ctx, files[0], slot);
|
|
if (ret < 0)
|
|
goto err;
|
|
fds[0] = ret;
|
|
files[0] = NULL;
|
|
|
|
/*
|
|
* If a specific slot is given, next one will be used for
|
|
* the write side.
|
|
*/
|
|
if (slot != IORING_FILE_INDEX_ALLOC)
|
|
slot++;
|
|
|
|
ret = __io_fixed_fd_install(ctx, files[1], slot);
|
|
if (ret < 0)
|
|
goto err;
|
|
fds[1] = ret;
|
|
files[1] = NULL;
|
|
|
|
io_ring_submit_unlock(ctx, issue_flags);
|
|
|
|
if (!copy_to_user(p->fds, fds, sizeof(fds)))
|
|
return 0;
|
|
|
|
ret = -EFAULT;
|
|
io_ring_submit_lock(ctx, issue_flags);
|
|
err:
|
|
if (fds[0] != -1)
|
|
io_fixed_fd_remove(ctx, fds[0]);
|
|
if (fds[1] != -1)
|
|
io_fixed_fd_remove(ctx, fds[1]);
|
|
io_ring_submit_unlock(ctx, issue_flags);
|
|
return ret;
|
|
}
|
|
|
|
static int io_pipe_fd(struct io_kiocb *req, struct file **files)
|
|
{
|
|
struct io_pipe *p = io_kiocb_to_cmd(req, struct io_pipe);
|
|
int ret, fds[2] = { -1, -1 };
|
|
|
|
ret = __get_unused_fd_flags(p->flags, p->nofile);
|
|
if (ret < 0)
|
|
goto err;
|
|
fds[0] = ret;
|
|
|
|
ret = __get_unused_fd_flags(p->flags, p->nofile);
|
|
if (ret < 0)
|
|
goto err;
|
|
fds[1] = ret;
|
|
|
|
if (!copy_to_user(p->fds, fds, sizeof(fds))) {
|
|
fd_install(fds[0], files[0]);
|
|
fd_install(fds[1], files[1]);
|
|
return 0;
|
|
}
|
|
ret = -EFAULT;
|
|
err:
|
|
if (fds[0] != -1)
|
|
put_unused_fd(fds[0]);
|
|
if (fds[1] != -1)
|
|
put_unused_fd(fds[1]);
|
|
return ret;
|
|
}
|
|
|
|
int io_pipe(struct io_kiocb *req, unsigned int issue_flags)
|
|
{
|
|
struct io_pipe *p = io_kiocb_to_cmd(req, struct io_pipe);
|
|
struct file *files[2];
|
|
int ret;
|
|
|
|
ret = create_pipe_files(files, p->flags);
|
|
if (ret)
|
|
return ret;
|
|
|
|
if (!!p->file_slot)
|
|
ret = io_pipe_fixed(req, files, issue_flags);
|
|
else
|
|
ret = io_pipe_fd(req, files);
|
|
|
|
io_req_set_res(req, ret, 0);
|
|
if (!ret)
|
|
return IOU_COMPLETE;
|
|
|
|
req_set_fail(req);
|
|
if (files[0])
|
|
fput(files[0]);
|
|
if (files[1])
|
|
fput(files[1]);
|
|
return ret;
|
|
}
|