2
0
mirror of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-09-04 20:19:47 +08:00
linux/drivers/md/linear.c
Linus Torvalds a682e00354 Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
Pull md updates from Shaohua Li:
 "Mainly fixes bugs and improves performance:

   - Improve scalability for raid1 from Coly

   - Improve raid5-cache read performance, disk efficiency and IO
     pattern from Song and me

   - Fix a race condition of disk hotplug for linear from Coly

   - A few cleanup patches from Ming and Byungchul

   - Fix a memory leak from Neil

   - Fix WRITE SAME IO failure from me

   - Add doc for raid5-cache from me"

* 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md: (23 commits)
  md/raid1: fix write behind issues introduced by bio_clone_bioset_partial
  md/raid1: handle flush request correctly
  md/linear: shutup lockdep warnning
  md/raid1: fix a use-after-free bug
  RAID1: avoid unnecessary spin locks in I/O barrier code
  RAID1: a new I/O barrier implementation to remove resync window
  md/raid5: Don't reinvent the wheel but use existing llist API
  md: fast clone bio in bio_clone_mddev()
  md: remove unnecessary check on mddev
  md/raid1: use bio_clone_bioset_partial() in case of write behind
  md: fail if mddev->bio_set can't be created
  block: introduce bio_clone_bioset_partial()
  md: disable WRITE SAME if it fails in underlayer disks
  md/raid5-cache: exclude reclaiming stripes in reclaim check
  md/raid5-cache: stripe reclaim only counts valid stripes
  MD: add doc for raid5-cache
  Documentation: move MD related doc into a separate dir
  md: ensure md devices are freed before module is unloaded.
  md/r5cache: improve journal device efficiency
  md/r5cache: enable chunk_aligned_read with write back cache
  ...
2017-02-24 14:42:19 -08:00

352 lines
9.0 KiB
C

/*
linear.c : Multiple Devices driver for Linux
Copyright (C) 1994-96 Marc ZYNGIER
<zyngier@ufr-info-p7.ibp.fr> or
<maz@gloups.fdn.fr>
Linear mode management functions.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
You should have received a copy of the GNU General Public License
(for example /usr/src/linux/COPYING); if not, write to the Free
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/blkdev.h>
#include <linux/raid/md_u.h>
#include <linux/seq_file.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <trace/events/block.h>
#include "md.h"
#include "linear.h"
/*
* find which device holds a particular offset
*/
static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector)
{
int lo, mid, hi;
struct linear_conf *conf;
lo = 0;
hi = mddev->raid_disks - 1;
conf = mddev->private;
/*
* Binary Search
*/
while (hi > lo) {
mid = (hi + lo) / 2;
if (sector < conf->disks[mid].end_sector)
hi = mid;
else
lo = mid + 1;
}
return conf->disks + lo;
}
/*
* In linear_congested() conf->raid_disks is used as a copy of
* mddev->raid_disks to iterate conf->disks[], because conf->raid_disks
* and conf->disks[] are created in linear_conf(), they are always
* consitent with each other, but mddev->raid_disks does not.
*/
static int linear_congested(struct mddev *mddev, int bits)
{
struct linear_conf *conf;
int i, ret = 0;
rcu_read_lock();
conf = rcu_dereference(mddev->private);
for (i = 0; i < conf->raid_disks && !ret ; i++) {
struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev);
ret |= bdi_congested(q->backing_dev_info, bits);
}
rcu_read_unlock();
return ret;
}
static sector_t linear_size(struct mddev *mddev, sector_t sectors, int raid_disks)
{
struct linear_conf *conf;
sector_t array_sectors;
conf = mddev->private;
WARN_ONCE(sectors || raid_disks,
"%s does not support generic reshape\n", __func__);
array_sectors = conf->array_sectors;
return array_sectors;
}
static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
{
struct linear_conf *conf;
struct md_rdev *rdev;
int i, cnt;
bool discard_supported = false;
conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(struct dev_info),
GFP_KERNEL);
if (!conf)
return NULL;
cnt = 0;
conf->array_sectors = 0;
rdev_for_each(rdev, mddev) {
int j = rdev->raid_disk;
struct dev_info *disk = conf->disks + j;
sector_t sectors;
if (j < 0 || j >= raid_disks || disk->rdev) {
pr_warn("md/linear:%s: disk numbering problem. Aborting!\n",
mdname(mddev));
goto out;
}
disk->rdev = rdev;
if (mddev->chunk_sectors) {
sectors = rdev->sectors;
sector_div(sectors, mddev->chunk_sectors);
rdev->sectors = sectors * mddev->chunk_sectors;
}
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
conf->array_sectors += rdev->sectors;
cnt++;
if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
discard_supported = true;
}
if (cnt != raid_disks) {
pr_warn("md/linear:%s: not enough drives present. Aborting!\n",
mdname(mddev));
goto out;
}
if (!discard_supported)
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
else
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
/*
* Here we calculate the device offsets.
*/
conf->disks[0].end_sector = conf->disks[0].rdev->sectors;
for (i = 1; i < raid_disks; i++)
conf->disks[i].end_sector =
conf->disks[i-1].end_sector +
conf->disks[i].rdev->sectors;
/*
* conf->raid_disks is copy of mddev->raid_disks. The reason to
* keep a copy of mddev->raid_disks in struct linear_conf is,
* mddev->raid_disks may not be consistent with pointers number of
* conf->disks[] when it is updated in linear_add() and used to
* iterate old conf->disks[] earray in linear_congested().
* Here conf->raid_disks is always consitent with number of
* pointers in conf->disks[] array, and mddev->private is updated
* with rcu_assign_pointer() in linear_addr(), such race can be
* avoided.
*/
conf->raid_disks = raid_disks;
return conf;
out:
kfree(conf);
return NULL;
}
static int linear_run (struct mddev *mddev)
{
struct linear_conf *conf;
int ret;
if (md_check_no_bitmap(mddev))
return -EINVAL;
conf = linear_conf(mddev, mddev->raid_disks);
if (!conf)
return 1;
mddev->private = conf;
md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
ret = md_integrity_register(mddev);
if (ret) {
kfree(conf);
mddev->private = NULL;
}
return ret;
}
static int linear_add(struct mddev *mddev, struct md_rdev *rdev)
{
/* Adding a drive to a linear array allows the array to grow.
* It is permitted if the new drive has a matching superblock
* already on it, with raid_disk equal to raid_disks.
* It is achieved by creating a new linear_private_data structure
* and swapping it in in-place of the current one.
* The current one is never freed until the array is stopped.
* This avoids races.
*/
struct linear_conf *newconf, *oldconf;
if (rdev->saved_raid_disk != mddev->raid_disks)
return -EINVAL;
rdev->raid_disk = rdev->saved_raid_disk;
rdev->saved_raid_disk = -1;
newconf = linear_conf(mddev,mddev->raid_disks+1);
if (!newconf)
return -ENOMEM;
/* newconf->raid_disks already keeps a copy of * the increased
* value of mddev->raid_disks, WARN_ONCE() is just used to make
* sure of this. It is possible that oldconf is still referenced
* in linear_congested(), therefore kfree_rcu() is used to free
* oldconf until no one uses it anymore.
*/
mddev_suspend(mddev);
oldconf = rcu_dereference_protected(mddev->private,
lockdep_is_held(&mddev->reconfig_mutex));
mddev->raid_disks++;
WARN_ONCE(mddev->raid_disks != newconf->raid_disks,
"copied raid_disks doesn't match mddev->raid_disks");
rcu_assign_pointer(mddev->private, newconf);
md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
set_capacity(mddev->gendisk, mddev->array_sectors);
mddev_resume(mddev);
revalidate_disk(mddev->gendisk);
kfree_rcu(oldconf, rcu);
return 0;
}
static void linear_free(struct mddev *mddev, void *priv)
{
struct linear_conf *conf = priv;
kfree(conf);
}
static void linear_make_request(struct mddev *mddev, struct bio *bio)
{
char b[BDEVNAME_SIZE];
struct dev_info *tmp_dev;
struct bio *split;
sector_t start_sector, end_sector, data_offset;
if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
md_flush_request(mddev, bio);
return;
}
do {
sector_t bio_sector = bio->bi_iter.bi_sector;
tmp_dev = which_dev(mddev, bio_sector);
start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
end_sector = tmp_dev->end_sector;
data_offset = tmp_dev->rdev->data_offset;
bio->bi_bdev = tmp_dev->rdev->bdev;
if (unlikely(bio_sector >= end_sector ||
bio_sector < start_sector))
goto out_of_bounds;
if (unlikely(bio_end_sector(bio) > end_sector)) {
/* This bio crosses a device boundary, so we have to
* split it.
*/
split = bio_split(bio, end_sector - bio_sector,
GFP_NOIO, fs_bio_set);
bio_chain(split, bio);
} else {
split = bio;
}
split->bi_iter.bi_sector = split->bi_iter.bi_sector -
start_sector + data_offset;
if (unlikely((bio_op(split) == REQ_OP_DISCARD) &&
!blk_queue_discard(bdev_get_queue(split->bi_bdev)))) {
/* Just ignore it */
bio_endio(split);
} else {
if (mddev->gendisk)
trace_block_bio_remap(bdev_get_queue(split->bi_bdev),
split, disk_devt(mddev->gendisk),
bio_sector);
mddev_check_writesame(mddev, split);
generic_make_request(split);
}
} while (split != bio);
return;
out_of_bounds:
pr_err("md/linear:%s: make_request: Sector %llu out of bounds on dev %s: %llu sectors, offset %llu\n",
mdname(mddev),
(unsigned long long)bio->bi_iter.bi_sector,
bdevname(tmp_dev->rdev->bdev, b),
(unsigned long long)tmp_dev->rdev->sectors,
(unsigned long long)start_sector);
bio_io_error(bio);
}
static void linear_status (struct seq_file *seq, struct mddev *mddev)
{
seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2);
}
static void linear_quiesce(struct mddev *mddev, int state)
{
}
static struct md_personality linear_personality =
{
.name = "linear",
.level = LEVEL_LINEAR,
.owner = THIS_MODULE,
.make_request = linear_make_request,
.run = linear_run,
.free = linear_free,
.status = linear_status,
.hot_add_disk = linear_add,
.size = linear_size,
.quiesce = linear_quiesce,
.congested = linear_congested,
};
static int __init linear_init (void)
{
return register_md_personality (&linear_personality);
}
static void linear_exit (void)
{
unregister_md_personality (&linear_personality);
}
module_init(linear_init);
module_exit(linear_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Linear device concatenation personality for MD");
MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/
MODULE_ALIAS("md-linear");
MODULE_ALIAS("md-level--1");