2
0
mirror of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-09-04 20:19:47 +08:00

block-6.15-20250424

-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmgK7wMQHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgppNwD/46vpEWhwGLeNXxFic5CCNMbUBUl04+Rc9I
 p22BY9rp1+7ooXJGJJTGaQAjwTFKP/kxyaQWZrJFXK8t2wYhJ8E2PWPVolk8jsOT
 0wSYaF9iW4kw5twcmWq+VqPM+joLGKxkwojDTvz4CiorKrq2J14yHkrtfp81R3d3
 rR7VzeggglSxEJAKkIBkbRWtMwTQ6WvImm4uufccI3AwfPJcM3qxSXGqq3wryA0O
 PyqFlkOdjDIbNP3Zu0QvqQ0xyefGCyGyAfKEPNEAn1oOpD8Y/SUvdMdlPzA9pJ93
 9+8F9pAg6fo8vgBEMavVGNjFOw4OrxNBNL9St3vlz+VMpid+HMyflolLwCTdQXCz
 HEZ+H75uwMwh3mskHp5paitdE4Y70tqXW6LWgr/5wXOsl8Lh5p1A7Ll0tP27gUe1
 vV1Yh+nwbg5TQ1qi+NmjhUThivT96hop+5nK9p5r7GHSZP1xiJdb7RsQqOhDXBmP
 I5sjc5Dny8S9b87ehX6b4VfpTbk3aRVhOaEJ4l6k4dwFFkTRP0ODa0/bWPf3Reb0
 4HI2/NYRsdLNyut2896P19RxcpcXz5PKEKvcCt5pAehwv4urIdpLoeNLgMdgwfcc
 qbtNbTkV2V/fOKG7pS6yUQmWGR/XXQZDFJ4gCFZemiYYVAXJCqjBB6dliMy8roFz
 p5Rc31AEMw==
 =s0NF
 -----END PGP SIGNATURE-----

Merge tag 'block-6.15-20250424' of git://git.kernel.dk/linux

Pull block fixes from Jens Axboe:

 - Fix autoloading of drivers from stat*(2)

 - Fix losing read-ahead setting one suspend/resume, when a device is
   re-probed.

 - Fix race between setting the block size and page cache updates.
   Includes a helper that a coming XFS fix will use as well.

 - ublk cancelation fixes.

 - ublk selftest additions and fixes.

 - NVMe pull via Christoph:
      - fix an out-of-bounds access in nvmet_enable_port (Richard
        Weinberger)

* tag 'block-6.15-20250424' of git://git.kernel.dk/linux:
  ublk: fix race between io_uring_cmd_complete_in_task and ublk_cancel_cmd
  ublk: call ublk_dispatch_req() for handling UBLK_U_IO_NEED_GET_DATA
  block: don't autoload drivers on blk-cgroup configuration
  block: don't autoload drivers on stat
  block: remove the backing_inode variable in bdev_statx
  block: move blkdev_{get,put} _no_open prototypes out of blkdev.h
  block: never reduce ra_pages in blk_apply_bdi_limits
  selftests: ublk: common: fix _get_disk_dev_t for pre-9.0 coreutils
  selftests: ublk: remove useless 'delay_us' from 'struct dev_ctx'
  selftests: ublk: fix recover test
  block: hoist block size validation code to a separate function
  block: fix race between set_blocksize and read paths
  nvmet: fix out-of-bounds access in nvmet_enable_port
This commit is contained in:
Linus Torvalds 2025-04-25 11:34:39 -07:00
commit 7deea5634a
14 changed files with 121 additions and 47 deletions

View File

@ -152,27 +152,65 @@ static void set_init_blocksize(struct block_device *bdev)
get_order(bsize));
}
/**
* bdev_validate_blocksize - check that this block size is acceptable
* @bdev: blockdevice to check
* @block_size: block size to check
*
* For block device users that do not use buffer heads or the block device
* page cache, make sure that this block size can be used with the device.
*
* Return: On success zero is returned, negative error code on failure.
*/
int bdev_validate_blocksize(struct block_device *bdev, int block_size)
{
if (blk_validate_block_size(block_size))
return -EINVAL;
/* Size cannot be smaller than the size supported by the device */
if (block_size < bdev_logical_block_size(bdev))
return -EINVAL;
return 0;
}
EXPORT_SYMBOL_GPL(bdev_validate_blocksize);
int set_blocksize(struct file *file, int size)
{
struct inode *inode = file->f_mapping->host;
struct block_device *bdev = I_BDEV(inode);
int ret;
if (blk_validate_block_size(size))
return -EINVAL;
/* Size cannot be smaller than the size supported by the device */
if (size < bdev_logical_block_size(bdev))
return -EINVAL;
ret = bdev_validate_blocksize(bdev, size);
if (ret)
return ret;
if (!file->private_data)
return -EINVAL;
/* Don't change the size if it is same as current */
if (inode->i_blkbits != blksize_bits(size)) {
/*
* Flush and truncate the pagecache before we reconfigure the
* mapping geometry because folio sizes are variable now. If a
* reader has already allocated a folio whose size is smaller
* than the new min_order but invokes readahead after the new
* min_order becomes visible, readahead will think there are
* "zero" blocks per folio and crash. Take the inode and
* invalidation locks to avoid racing with
* read/write/fallocate.
*/
inode_lock(inode);
filemap_invalidate_lock(inode->i_mapping);
sync_blockdev(bdev);
kill_bdev(bdev);
inode->i_blkbits = blksize_bits(size);
mapping_set_folio_min_order(inode->i_mapping, get_order(size));
kill_bdev(bdev);
filemap_invalidate_unlock(inode->i_mapping);
inode_unlock(inode);
}
return 0;
}
@ -777,13 +815,13 @@ static void blkdev_put_part(struct block_device *part)
blkdev_put_whole(whole);
}
struct block_device *blkdev_get_no_open(dev_t dev)
struct block_device *blkdev_get_no_open(dev_t dev, bool autoload)
{
struct block_device *bdev;
struct inode *inode;
inode = ilookup(blockdev_superblock, dev);
if (!inode && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) {
if (!inode && autoload && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) {
blk_request_module(dev);
inode = ilookup(blockdev_superblock, dev);
if (inode)
@ -1005,7 +1043,7 @@ struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
if (ret)
return ERR_PTR(ret);
bdev = blkdev_get_no_open(dev);
bdev = blkdev_get_no_open(dev, true);
if (!bdev)
return ERR_PTR(-ENXIO);
@ -1274,18 +1312,15 @@ void sync_bdevs(bool wait)
*/
void bdev_statx(const struct path *path, struct kstat *stat, u32 request_mask)
{
struct inode *backing_inode;
struct block_device *bdev;
backing_inode = d_backing_inode(path->dentry);
/*
* Note that backing_inode is the inode of a block device node file,
* not the block device's internal inode. Therefore it is *not* valid
* to use I_BDEV() here; the block device has to be looked up by i_rdev
* Note that d_backing_inode() returns the block device node inode, not
* the block device's internal inode. Therefore it is *not* valid to
* use I_BDEV() here; the block device has to be looked up by i_rdev
* instead.
*/
bdev = blkdev_get_no_open(backing_inode->i_rdev);
bdev = blkdev_get_no_open(d_backing_inode(path->dentry)->i_rdev, false);
if (!bdev)
return;

View File

@ -797,7 +797,7 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx)
return -EINVAL;
input = skip_spaces(input);
bdev = blkdev_get_no_open(MKDEV(major, minor));
bdev = blkdev_get_no_open(MKDEV(major, minor), false);
if (!bdev)
return -ENODEV;
if (bdev_is_partition(bdev)) {

View File

@ -61,8 +61,14 @@ void blk_apply_bdi_limits(struct backing_dev_info *bdi,
/*
* For read-ahead of large files to be effective, we need to read ahead
* at least twice the optimal I/O size.
*
* There is no hardware limitation for the read-ahead size and the user
* might have increased the read-ahead size through sysfs, so don't ever
* decrease it.
*/
bdi->ra_pages = max(lim->io_opt * 2 / PAGE_SIZE, VM_READAHEAD_PAGES);
bdi->ra_pages = max3(bdi->ra_pages,
lim->io_opt * 2 / PAGE_SIZE,
VM_READAHEAD_PAGES);
bdi->io_pages = lim->max_sectors >> PAGE_SECTORS_SHIFT;
}

View File

@ -343,6 +343,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
op = REQ_OP_ZONE_RESET;
/* Invalidate the page cache, including dirty pages. */
inode_lock(bdev->bd_mapping->host);
filemap_invalidate_lock(bdev->bd_mapping);
ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
if (ret)
@ -364,8 +365,10 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors);
fail:
if (cmd == BLKRESETZONE)
if (cmd == BLKRESETZONE) {
filemap_invalidate_unlock(bdev->bd_mapping);
inode_unlock(bdev->bd_mapping->host);
}
return ret;
}

View File

@ -94,6 +94,9 @@ static inline void blk_wait_io(struct completion *done)
wait_for_completion_io(done);
}
struct block_device *blkdev_get_no_open(dev_t dev, bool autoload);
void blkdev_put_no_open(struct block_device *bdev);
#define BIO_INLINE_VECS 4
struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
gfp_t gfp_mask);

View File

@ -642,7 +642,7 @@ static int blkdev_open(struct inode *inode, struct file *filp)
if (ret)
return ret;
bdev = blkdev_get_no_open(inode->i_rdev);
bdev = blkdev_get_no_open(inode->i_rdev, true);
if (!bdev)
return -ENXIO;
@ -746,7 +746,14 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
ret = direct_write_fallback(iocb, from, ret,
blkdev_buffered_write(iocb, from));
} else {
/*
* Take i_rwsem and invalidate_lock to avoid racing with
* set_blocksize changing i_blkbits/folio order and punching
* out the pagecache.
*/
inode_lock_shared(bd_inode);
ret = blkdev_buffered_write(iocb, from);
inode_unlock_shared(bd_inode);
}
if (ret > 0)
@ -757,6 +764,7 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct inode *bd_inode = bdev_file_inode(iocb->ki_filp);
struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
loff_t size = bdev_nr_bytes(bdev);
loff_t pos = iocb->ki_pos;
@ -793,7 +801,13 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
goto reexpand;
}
/*
* Take i_rwsem and invalidate_lock to avoid racing with set_blocksize
* changing i_blkbits/folio order and punching out the pagecache.
*/
inode_lock_shared(bd_inode);
ret = filemap_read(iocb, to, ret);
inode_unlock_shared(bd_inode);
reexpand:
if (unlikely(shorted))
@ -836,6 +850,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
if ((start | len) & (bdev_logical_block_size(bdev) - 1))
return -EINVAL;
inode_lock(inode);
filemap_invalidate_lock(inode->i_mapping);
/*
@ -868,6 +883,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
fail:
filemap_invalidate_unlock(inode->i_mapping);
inode_unlock(inode);
return error;
}

View File

@ -142,6 +142,7 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
if (err)
return err;
inode_lock(bdev->bd_mapping->host);
filemap_invalidate_lock(bdev->bd_mapping);
err = truncate_bdev_range(bdev, mode, start, start + len - 1);
if (err)
@ -174,6 +175,7 @@ out_unplug:
blk_finish_plug(&plug);
fail:
filemap_invalidate_unlock(bdev->bd_mapping);
inode_unlock(bdev->bd_mapping->host);
return err;
}
@ -199,12 +201,14 @@ static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode,
end > bdev_nr_bytes(bdev))
return -EINVAL;
inode_lock(bdev->bd_mapping->host);
filemap_invalidate_lock(bdev->bd_mapping);
err = truncate_bdev_range(bdev, mode, start, end - 1);
if (!err)
err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9,
GFP_KERNEL);
filemap_invalidate_unlock(bdev->bd_mapping);
inode_unlock(bdev->bd_mapping->host);
return err;
}
@ -236,6 +240,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode,
return -EINVAL;
/* Invalidate the page cache, including dirty pages */
inode_lock(bdev->bd_mapping->host);
filemap_invalidate_lock(bdev->bd_mapping);
err = truncate_bdev_range(bdev, mode, start, end);
if (err)
@ -246,6 +251,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode,
fail:
filemap_invalidate_unlock(bdev->bd_mapping);
inode_unlock(bdev->bd_mapping->host);
return err;
}

View File

@ -1683,14 +1683,31 @@ static void ublk_start_cancel(struct ublk_queue *ubq)
ublk_put_disk(disk);
}
static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io,
static void ublk_cancel_cmd(struct ublk_queue *ubq, unsigned tag,
unsigned int issue_flags)
{
struct ublk_io *io = &ubq->ios[tag];
struct ublk_device *ub = ubq->dev;
struct request *req;
bool done;
if (!(io->flags & UBLK_IO_FLAG_ACTIVE))
return;
/*
* Don't try to cancel this command if the request is started for
* avoiding race between io_uring_cmd_done() and
* io_uring_cmd_complete_in_task().
*
* Either the started request will be aborted via __ublk_abort_rq(),
* then this uring_cmd is canceled next time, or it will be done in
* task work function ublk_dispatch_req() because io_uring guarantees
* that ublk_dispatch_req() is always called
*/
req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag);
if (req && blk_mq_request_started(req))
return;
spin_lock(&ubq->cancel_lock);
done = !!(io->flags & UBLK_IO_FLAG_CANCELED);
if (!done)
@ -1722,7 +1739,6 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd,
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
struct ublk_queue *ubq = pdu->ubq;
struct task_struct *task;
struct ublk_io *io;
if (WARN_ON_ONCE(!ubq))
return;
@ -1737,9 +1753,8 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd,
if (!ubq->canceling)
ublk_start_cancel(ubq);
io = &ubq->ios[pdu->tag];
WARN_ON_ONCE(io->cmd != cmd);
ublk_cancel_cmd(ubq, io, issue_flags);
WARN_ON_ONCE(ubq->ios[pdu->tag].cmd != cmd);
ublk_cancel_cmd(ubq, pdu->tag, issue_flags);
}
static inline bool ublk_queue_ready(struct ublk_queue *ubq)
@ -1752,7 +1767,7 @@ static void ublk_cancel_queue(struct ublk_queue *ubq)
int i;
for (i = 0; i < ubq->q_depth; i++)
ublk_cancel_cmd(ubq, &ubq->ios[i], IO_URING_F_UNLOCKED);
ublk_cancel_cmd(ubq, i, IO_URING_F_UNLOCKED);
}
/* Cancel all pending commands, must be called after del_gendisk() returns */
@ -1886,15 +1901,6 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
}
}
static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
int tag)
{
struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag);
ublk_queue_cmd(ubq, req);
}
static inline int ublk_check_cmd_op(u32 cmd_op)
{
u32 ioc_type = _IOC_TYPE(cmd_op);
@ -2103,8 +2109,9 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
goto out;
ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag);
break;
req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag);
ublk_dispatch_req(ubq, req, issue_flags);
return -EIOCBQUEUED;
default:
goto out;
}

View File

@ -324,6 +324,9 @@ int nvmet_enable_port(struct nvmet_port *port)
lockdep_assert_held(&nvmet_config_sem);
if (port->disc_addr.trtype == NVMF_TRTYPE_MAX)
return -EINVAL;
ops = nvmet_transports[port->disc_addr.trtype];
if (!ops) {
up_write(&nvmet_config_sem);

View File

@ -1637,6 +1637,7 @@ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time)
return bio_end_io_acct_remapped(bio, start_time, bio->bi_bdev);
}
int bdev_validate_blocksize(struct block_device *bdev, int block_size);
int set_blocksize(struct file *file, int size);
int lookup_bdev(const char *pathname, dev_t *dev);
@ -1693,10 +1694,6 @@ int bd_prepare_to_claim(struct block_device *bdev, void *holder,
const struct blk_holder_ops *hops);
void bd_abort_claiming(struct block_device *bdev, void *holder);
/* just for blk-cgroup, don't use elsewhere */
struct block_device *blkdev_get_no_open(dev_t dev);
void blkdev_put_no_open(struct block_device *bdev);
struct block_device *I_BDEV(struct inode *inode);
struct block_device *file_bdev(struct file *bdev_file);
bool disk_live(struct gendisk *disk);

View File

@ -1354,6 +1354,7 @@ int main(int argc, char *argv[])
value = strtol(optarg, NULL, 10);
if (value)
ctx.flags |= UBLK_F_NEED_GET_DATA;
break;
case 0:
if (!strcmp(longopts[option_idx].name, "debug_mask"))
ublk_dbg_mask = strtol(optarg, NULL, 16);

View File

@ -86,9 +86,6 @@ struct dev_ctx {
unsigned int fg:1;
unsigned int recovery:1;
/* fault_inject */
long long delay_us;
int _evtfd;
int _shmid;

View File

@ -17,8 +17,8 @@ _get_disk_dev_t() {
local minor
dev=/dev/ublkb"${dev_id}"
major=$(stat -c '%Hr' "$dev")
minor=$(stat -c '%Lr' "$dev")
major="0x"$(stat -c '%t' "$dev")
minor="0x"$(stat -c '%T' "$dev")
echo $(( (major & 0xfff) << 20 | (minor & 0xfffff) ))
}

View File

@ -3,7 +3,7 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
TID="generic_04"
TID="generic_05"
ERR_CODE=0
ublk_run_recover_test()