mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-04 20:19:47 +08:00
xfs: New code for 6.16
Signed-off-by: Carlos Maiolino <cem@kernel.org> -----BEGIN PGP SIGNATURE----- iJUEABMJAB0WIQSmtYVZ/MfVMGUq1GNcsMJ8RxYuYwUCaDQXTQAKCRBcsMJ8RxYu YwUHAYDYYm9oit6AIr0AgTXBMJ+DHyqaszBy0VT2jQUP+yXxyrQc46QExXKU9YQV ffmGRAsBgN7ZdDI8D5qWySyOynB3b1Jn3/0jY82GscFK0k0oX3EtxbN9MdrovbgK qyO66BVx7w== =pG5y -----END PGP SIGNATURE----- Merge tag 'xfs-merge-6.16' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux Pull xfs updates from Carlos Maiolino: - Atomic writes for XFS - Remove experimental warnings for pNFS, scrub and parent pointers * tag 'xfs-merge-6.16' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (26 commits) xfs: add inode to zone caching for data placement xfs: free the item in xfs_mru_cache_insert on failure xfs: remove the EXPERIMENTAL warning for pNFS xfs: remove some EXPERIMENTAL warnings xfs: Remove deprecated xfs_bufd sysctl parameters xfs: stop using set_blocksize xfs: allow sysadmins to specify a maximum atomic write limit at mount time xfs: update atomic write limits xfs: add xfs_calc_atomic_write_unit_max() xfs: add xfs_file_dio_write_atomic() xfs: commit CoW-based atomic writes atomically xfs: add large atomic writes checks in xfs_direct_write_iomap_begin() xfs: add xfs_atomic_write_cow_iomap_begin() xfs: refine atomic write size check in xfs_file_write_iter() xfs: refactor xfs_reflink_end_cow_extent() xfs: allow block allocator to take an alignment hint xfs: ignore HW which cannot atomic write a single block xfs: add helpers to compute transaction reservation for finishing intent items xfs: add helpers to compute log item overhead xfs: separate out setting buftarg atomic writes limits ...
This commit is contained in:
commit
f83fcb87f8
@ -151,6 +151,17 @@ When mounting an XFS filesystem, the following options are accepted.
|
|||||||
optional, and the log section can be separate from the data
|
optional, and the log section can be separate from the data
|
||||||
section or contained within it.
|
section or contained within it.
|
||||||
|
|
||||||
|
max_atomic_write=value
|
||||||
|
Set the maximum size of an atomic write. The size may be
|
||||||
|
specified in bytes, in kilobytes with a "k" suffix, in megabytes
|
||||||
|
with a "m" suffix, or in gigabytes with a "g" suffix. The size
|
||||||
|
cannot be larger than the maximum write size, larger than the
|
||||||
|
size of any allocation group, or larger than the size of a
|
||||||
|
remapping operation that the log can complete atomically.
|
||||||
|
|
||||||
|
The default value is to set the maximum I/O completion size
|
||||||
|
to allow each CPU to handle one at a time.
|
||||||
|
|
||||||
max_open_zones=value
|
max_open_zones=value
|
||||||
Specify the max number of zones to keep open for writing on a
|
Specify the max number of zones to keep open for writing on a
|
||||||
zoned rt device. Many open zones aids file data separation
|
zoned rt device. Many open zones aids file data separation
|
||||||
|
@ -1335,7 +1335,8 @@ void bdev_statx(const struct path *path, struct kstat *stat, u32 request_mask)
|
|||||||
|
|
||||||
generic_fill_statx_atomic_writes(stat,
|
generic_fill_statx_atomic_writes(stat,
|
||||||
queue_atomic_write_unit_min_bytes(bd_queue),
|
queue_atomic_write_unit_min_bytes(bd_queue),
|
||||||
queue_atomic_write_unit_max_bytes(bd_queue));
|
queue_atomic_write_unit_max_bytes(bd_queue),
|
||||||
|
0);
|
||||||
}
|
}
|
||||||
|
|
||||||
stat->blksize = bdev_io_min(bdev);
|
stat->blksize = bdev_io_min(bdev);
|
||||||
|
@ -5692,7 +5692,7 @@ int ext4_getattr(struct mnt_idmap *idmap, const struct path *path,
|
|||||||
awu_max = sbi->s_awu_max;
|
awu_max = sbi->s_awu_max;
|
||||||
}
|
}
|
||||||
|
|
||||||
generic_fill_statx_atomic_writes(stat, awu_min, awu_max);
|
generic_fill_statx_atomic_writes(stat, awu_min, awu_max, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
|
flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
|
||||||
|
@ -136,13 +136,15 @@ EXPORT_SYMBOL(generic_fill_statx_attr);
|
|||||||
* @stat: Where to fill in the attribute flags
|
* @stat: Where to fill in the attribute flags
|
||||||
* @unit_min: Minimum supported atomic write length in bytes
|
* @unit_min: Minimum supported atomic write length in bytes
|
||||||
* @unit_max: Maximum supported atomic write length in bytes
|
* @unit_max: Maximum supported atomic write length in bytes
|
||||||
|
* @unit_max_opt: Optimised maximum supported atomic write length in bytes
|
||||||
*
|
*
|
||||||
* Fill in the STATX{_ATTR}_WRITE_ATOMIC flags in the kstat structure from
|
* Fill in the STATX{_ATTR}_WRITE_ATOMIC flags in the kstat structure from
|
||||||
* atomic write unit_min and unit_max values.
|
* atomic write unit_min and unit_max values.
|
||||||
*/
|
*/
|
||||||
void generic_fill_statx_atomic_writes(struct kstat *stat,
|
void generic_fill_statx_atomic_writes(struct kstat *stat,
|
||||||
unsigned int unit_min,
|
unsigned int unit_min,
|
||||||
unsigned int unit_max)
|
unsigned int unit_max,
|
||||||
|
unsigned int unit_max_opt)
|
||||||
{
|
{
|
||||||
/* Confirm that the request type is known */
|
/* Confirm that the request type is known */
|
||||||
stat->result_mask |= STATX_WRITE_ATOMIC;
|
stat->result_mask |= STATX_WRITE_ATOMIC;
|
||||||
@ -153,6 +155,7 @@ void generic_fill_statx_atomic_writes(struct kstat *stat,
|
|||||||
if (unit_min) {
|
if (unit_min) {
|
||||||
stat->atomic_write_unit_min = unit_min;
|
stat->atomic_write_unit_min = unit_min;
|
||||||
stat->atomic_write_unit_max = unit_max;
|
stat->atomic_write_unit_max = unit_max;
|
||||||
|
stat->atomic_write_unit_max_opt = unit_max_opt;
|
||||||
/* Initially only allow 1x segment */
|
/* Initially only allow 1x segment */
|
||||||
stat->atomic_write_segments_max = 1;
|
stat->atomic_write_segments_max = 1;
|
||||||
|
|
||||||
@ -741,6 +744,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
|
|||||||
tmp.stx_atomic_write_unit_min = stat->atomic_write_unit_min;
|
tmp.stx_atomic_write_unit_min = stat->atomic_write_unit_min;
|
||||||
tmp.stx_atomic_write_unit_max = stat->atomic_write_unit_max;
|
tmp.stx_atomic_write_unit_max = stat->atomic_write_unit_max;
|
||||||
tmp.stx_atomic_write_segments_max = stat->atomic_write_segments_max;
|
tmp.stx_atomic_write_segments_max = stat->atomic_write_segments_max;
|
||||||
|
tmp.stx_atomic_write_unit_max_opt = stat->atomic_write_unit_max_opt;
|
||||||
|
|
||||||
return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
|
return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
|
||||||
}
|
}
|
||||||
|
@ -3312,6 +3312,11 @@ xfs_bmap_compute_alignments(
|
|||||||
align = xfs_get_cowextsz_hint(ap->ip);
|
align = xfs_get_cowextsz_hint(ap->ip);
|
||||||
else if (ap->datatype & XFS_ALLOC_USERDATA)
|
else if (ap->datatype & XFS_ALLOC_USERDATA)
|
||||||
align = xfs_get_extsz_hint(ap->ip);
|
align = xfs_get_extsz_hint(ap->ip);
|
||||||
|
|
||||||
|
/* Try to align start block to any minimum allocation alignment */
|
||||||
|
if (align > 1 && (ap->flags & XFS_BMAPI_EXTSZALIGN))
|
||||||
|
args->alignment = align;
|
||||||
|
|
||||||
if (align) {
|
if (align) {
|
||||||
if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0,
|
if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0,
|
||||||
ap->eof, 0, ap->conv, &ap->offset,
|
ap->eof, 0, ap->conv, &ap->offset,
|
||||||
|
@ -87,6 +87,9 @@ struct xfs_bmalloca {
|
|||||||
/* Do not update the rmap btree. Used for reconstructing bmbt from rmapbt. */
|
/* Do not update the rmap btree. Used for reconstructing bmbt from rmapbt. */
|
||||||
#define XFS_BMAPI_NORMAP (1u << 10)
|
#define XFS_BMAPI_NORMAP (1u << 10)
|
||||||
|
|
||||||
|
/* Try to align allocations to the extent size hint */
|
||||||
|
#define XFS_BMAPI_EXTSZALIGN (1u << 11)
|
||||||
|
|
||||||
#define XFS_BMAPI_FLAGS \
|
#define XFS_BMAPI_FLAGS \
|
||||||
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \
|
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \
|
||||||
{ XFS_BMAPI_METADATA, "METADATA" }, \
|
{ XFS_BMAPI_METADATA, "METADATA" }, \
|
||||||
@ -98,7 +101,8 @@ struct xfs_bmalloca {
|
|||||||
{ XFS_BMAPI_REMAP, "REMAP" }, \
|
{ XFS_BMAPI_REMAP, "REMAP" }, \
|
||||||
{ XFS_BMAPI_COWFORK, "COWFORK" }, \
|
{ XFS_BMAPI_COWFORK, "COWFORK" }, \
|
||||||
{ XFS_BMAPI_NODISCARD, "NODISCARD" }, \
|
{ XFS_BMAPI_NODISCARD, "NODISCARD" }, \
|
||||||
{ XFS_BMAPI_NORMAP, "NORMAP" }
|
{ XFS_BMAPI_NORMAP, "NORMAP" },\
|
||||||
|
{ XFS_BMAPI_EXTSZALIGN, "EXTSZALIGN" }
|
||||||
|
|
||||||
|
|
||||||
static inline int xfs_bmapi_aflag(int w)
|
static inline int xfs_bmapi_aflag(int w)
|
||||||
|
@ -91,6 +91,7 @@ xfs_log_calc_trans_resv_for_minlogblocks(
|
|||||||
*/
|
*/
|
||||||
if (xfs_want_minlogsize_fixes(&mp->m_sb)) {
|
if (xfs_want_minlogsize_fixes(&mp->m_sb)) {
|
||||||
xfs_trans_resv_calc(mp, resv);
|
xfs_trans_resv_calc(mp, resv);
|
||||||
|
resv->tr_atomic_ioend = M_RES(mp)->tr_atomic_ioend;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -107,6 +108,9 @@ xfs_log_calc_trans_resv_for_minlogblocks(
|
|||||||
|
|
||||||
xfs_trans_resv_calc(mp, resv);
|
xfs_trans_resv_calc(mp, resv);
|
||||||
|
|
||||||
|
/* Copy the dynamic transaction reservation types from the running fs */
|
||||||
|
resv->tr_atomic_ioend = M_RES(mp)->tr_atomic_ioend;
|
||||||
|
|
||||||
if (xfs_has_reflink(mp)) {
|
if (xfs_has_reflink(mp)) {
|
||||||
/*
|
/*
|
||||||
* In the early days of reflink, typical log operation counts
|
* In the early days of reflink, typical log operation counts
|
||||||
|
@ -22,6 +22,12 @@
|
|||||||
#include "xfs_rtbitmap.h"
|
#include "xfs_rtbitmap.h"
|
||||||
#include "xfs_attr_item.h"
|
#include "xfs_attr_item.h"
|
||||||
#include "xfs_log.h"
|
#include "xfs_log.h"
|
||||||
|
#include "xfs_defer.h"
|
||||||
|
#include "xfs_bmap_item.h"
|
||||||
|
#include "xfs_extfree_item.h"
|
||||||
|
#include "xfs_rmap_item.h"
|
||||||
|
#include "xfs_refcount_item.h"
|
||||||
|
#include "xfs_trace.h"
|
||||||
|
|
||||||
#define _ALLOC true
|
#define _ALLOC true
|
||||||
#define _FREE false
|
#define _FREE false
|
||||||
@ -263,6 +269,42 @@ xfs_rtalloc_block_count(
|
|||||||
* register overflow from temporaries in the calculations.
|
* register overflow from temporaries in the calculations.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Finishing a data device refcount updates (t1):
|
||||||
|
* the agfs of the ags containing the blocks: nr_ops * sector size
|
||||||
|
* the refcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
|
||||||
|
*/
|
||||||
|
inline unsigned int
|
||||||
|
xfs_calc_finish_cui_reservation(
|
||||||
|
struct xfs_mount *mp,
|
||||||
|
unsigned int nr_ops)
|
||||||
|
{
|
||||||
|
if (!xfs_has_reflink(mp))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return xfs_calc_buf_res(nr_ops, mp->m_sb.sb_sectsize) +
|
||||||
|
xfs_calc_buf_res(xfs_refcountbt_block_count(mp, nr_ops),
|
||||||
|
mp->m_sb.sb_blocksize);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Realtime refcount updates (t2);
|
||||||
|
* the rt refcount inode
|
||||||
|
* the rtrefcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
|
||||||
|
*/
|
||||||
|
inline unsigned int
|
||||||
|
xfs_calc_finish_rt_cui_reservation(
|
||||||
|
struct xfs_mount *mp,
|
||||||
|
unsigned int nr_ops)
|
||||||
|
{
|
||||||
|
if (!xfs_has_rtreflink(mp))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return xfs_calc_inode_res(mp, 1) +
|
||||||
|
xfs_calc_buf_res(xfs_rtrefcountbt_block_count(mp, nr_ops),
|
||||||
|
mp->m_sb.sb_blocksize);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Compute the log reservation required to handle the refcount update
|
* Compute the log reservation required to handle the refcount update
|
||||||
* transaction. Refcount updates are always done via deferred log items.
|
* transaction. Refcount updates are always done via deferred log items.
|
||||||
@ -280,19 +322,10 @@ xfs_calc_refcountbt_reservation(
|
|||||||
struct xfs_mount *mp,
|
struct xfs_mount *mp,
|
||||||
unsigned int nr_ops)
|
unsigned int nr_ops)
|
||||||
{
|
{
|
||||||
unsigned int blksz = XFS_FSB_TO_B(mp, 1);
|
unsigned int t1, t2;
|
||||||
unsigned int t1, t2 = 0;
|
|
||||||
|
|
||||||
if (!xfs_has_reflink(mp))
|
t1 = xfs_calc_finish_cui_reservation(mp, nr_ops);
|
||||||
return 0;
|
t2 = xfs_calc_finish_rt_cui_reservation(mp, nr_ops);
|
||||||
|
|
||||||
t1 = xfs_calc_buf_res(nr_ops, mp->m_sb.sb_sectsize) +
|
|
||||||
xfs_calc_buf_res(xfs_refcountbt_block_count(mp, nr_ops), blksz);
|
|
||||||
|
|
||||||
if (xfs_has_realtime(mp))
|
|
||||||
t2 = xfs_calc_inode_res(mp, 1) +
|
|
||||||
xfs_calc_buf_res(xfs_rtrefcountbt_block_count(mp, nr_ops),
|
|
||||||
blksz);
|
|
||||||
|
|
||||||
return max(t1, t2);
|
return max(t1, t2);
|
||||||
}
|
}
|
||||||
@ -379,6 +412,96 @@ xfs_calc_write_reservation_minlogsize(
|
|||||||
return xfs_calc_write_reservation(mp, true);
|
return xfs_calc_write_reservation(mp, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Finishing an EFI can free the blocks and bmap blocks (t2):
|
||||||
|
* the agf for each of the ags: nr * sector size
|
||||||
|
* the agfl for each of the ags: nr * sector size
|
||||||
|
* the super block to reflect the freed blocks: sector size
|
||||||
|
* worst case split in allocation btrees per extent assuming nr extents:
|
||||||
|
* nr exts * 2 trees * (2 * max depth - 1) * block size
|
||||||
|
*/
|
||||||
|
inline unsigned int
|
||||||
|
xfs_calc_finish_efi_reservation(
|
||||||
|
struct xfs_mount *mp,
|
||||||
|
unsigned int nr)
|
||||||
|
{
|
||||||
|
return xfs_calc_buf_res((2 * nr) + 1, mp->m_sb.sb_sectsize) +
|
||||||
|
xfs_calc_buf_res(xfs_allocfree_block_count(mp, nr),
|
||||||
|
mp->m_sb.sb_blocksize);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Or, if it's a realtime file (t3):
|
||||||
|
* the agf for each of the ags: 2 * sector size
|
||||||
|
* the agfl for each of the ags: 2 * sector size
|
||||||
|
* the super block to reflect the freed blocks: sector size
|
||||||
|
* the realtime bitmap:
|
||||||
|
* 2 exts * ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes
|
||||||
|
* the realtime summary: 2 exts * 1 block
|
||||||
|
* worst case split in allocation btrees per extent assuming 2 extents:
|
||||||
|
* 2 exts * 2 trees * (2 * max depth - 1) * block size
|
||||||
|
*/
|
||||||
|
inline unsigned int
|
||||||
|
xfs_calc_finish_rt_efi_reservation(
|
||||||
|
struct xfs_mount *mp,
|
||||||
|
unsigned int nr)
|
||||||
|
{
|
||||||
|
if (!xfs_has_realtime(mp))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return xfs_calc_buf_res((2 * nr) + 1, mp->m_sb.sb_sectsize) +
|
||||||
|
xfs_calc_buf_res(xfs_rtalloc_block_count(mp, nr),
|
||||||
|
mp->m_sb.sb_blocksize) +
|
||||||
|
xfs_calc_buf_res(xfs_allocfree_block_count(mp, nr),
|
||||||
|
mp->m_sb.sb_blocksize);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Finishing an RUI is the same as an EFI. We can split the rmap btree twice
|
||||||
|
* on each end of the record, and that can cause the AGFL to be refilled or
|
||||||
|
* emptied out.
|
||||||
|
*/
|
||||||
|
inline unsigned int
|
||||||
|
xfs_calc_finish_rui_reservation(
|
||||||
|
struct xfs_mount *mp,
|
||||||
|
unsigned int nr)
|
||||||
|
{
|
||||||
|
if (!xfs_has_rmapbt(mp))
|
||||||
|
return 0;
|
||||||
|
return xfs_calc_finish_efi_reservation(mp, nr);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Finishing an RUI is the same as an EFI. We can split the rmap btree twice
|
||||||
|
* on each end of the record, and that can cause the AGFL to be refilled or
|
||||||
|
* emptied out.
|
||||||
|
*/
|
||||||
|
inline unsigned int
|
||||||
|
xfs_calc_finish_rt_rui_reservation(
|
||||||
|
struct xfs_mount *mp,
|
||||||
|
unsigned int nr)
|
||||||
|
{
|
||||||
|
if (!xfs_has_rtrmapbt(mp))
|
||||||
|
return 0;
|
||||||
|
return xfs_calc_finish_rt_efi_reservation(mp, nr);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In finishing a BUI, we can modify:
|
||||||
|
* the inode being truncated: inode size
|
||||||
|
* dquots
|
||||||
|
* the inode's bmap btree: (max depth + 1) * block size
|
||||||
|
*/
|
||||||
|
inline unsigned int
|
||||||
|
xfs_calc_finish_bui_reservation(
|
||||||
|
struct xfs_mount *mp,
|
||||||
|
unsigned int nr)
|
||||||
|
{
|
||||||
|
return xfs_calc_inode_res(mp, 1) + XFS_DQUOT_LOGRES +
|
||||||
|
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
|
||||||
|
mp->m_sb.sb_blocksize);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In truncating a file we free up to two extents at once. We can modify (t1):
|
* In truncating a file we free up to two extents at once. We can modify (t1):
|
||||||
* the inode being truncated: inode size
|
* the inode being truncated: inode size
|
||||||
@ -411,16 +534,8 @@ xfs_calc_itruncate_reservation(
|
|||||||
t1 = xfs_calc_inode_res(mp, 1) +
|
t1 = xfs_calc_inode_res(mp, 1) +
|
||||||
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz);
|
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz);
|
||||||
|
|
||||||
t2 = xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
|
t2 = xfs_calc_finish_efi_reservation(mp, 4);
|
||||||
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 4), blksz);
|
t3 = xfs_calc_finish_rt_efi_reservation(mp, 2);
|
||||||
|
|
||||||
if (xfs_has_realtime(mp)) {
|
|
||||||
t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
|
|
||||||
xfs_calc_buf_res(xfs_rtalloc_block_count(mp, 2), blksz) +
|
|
||||||
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz);
|
|
||||||
} else {
|
|
||||||
t3 = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In the early days of reflink, we included enough reservation to log
|
* In the early days of reflink, we included enough reservation to log
|
||||||
@ -501,9 +616,7 @@ xfs_calc_rename_reservation(
|
|||||||
xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
|
xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
|
||||||
XFS_FSB_TO_B(mp, 1));
|
XFS_FSB_TO_B(mp, 1));
|
||||||
|
|
||||||
t2 = xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
|
t2 = xfs_calc_finish_efi_reservation(mp, 3);
|
||||||
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 3),
|
|
||||||
XFS_FSB_TO_B(mp, 1));
|
|
||||||
|
|
||||||
if (xfs_has_parent(mp)) {
|
if (xfs_has_parent(mp)) {
|
||||||
unsigned int rename_overhead, exchange_overhead;
|
unsigned int rename_overhead, exchange_overhead;
|
||||||
@ -611,9 +724,7 @@ xfs_calc_link_reservation(
|
|||||||
overhead += xfs_calc_iunlink_remove_reservation(mp);
|
overhead += xfs_calc_iunlink_remove_reservation(mp);
|
||||||
t1 = xfs_calc_inode_res(mp, 2) +
|
t1 = xfs_calc_inode_res(mp, 2) +
|
||||||
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
|
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
|
||||||
t2 = xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
|
t2 = xfs_calc_finish_efi_reservation(mp, 1);
|
||||||
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
|
|
||||||
XFS_FSB_TO_B(mp, 1));
|
|
||||||
|
|
||||||
if (xfs_has_parent(mp)) {
|
if (xfs_has_parent(mp)) {
|
||||||
t3 = resp->tr_attrsetm.tr_logres;
|
t3 = resp->tr_attrsetm.tr_logres;
|
||||||
@ -676,9 +787,7 @@ xfs_calc_remove_reservation(
|
|||||||
|
|
||||||
t1 = xfs_calc_inode_res(mp, 2) +
|
t1 = xfs_calc_inode_res(mp, 2) +
|
||||||
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
|
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
|
||||||
t2 = xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
|
t2 = xfs_calc_finish_efi_reservation(mp, 2);
|
||||||
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2),
|
|
||||||
XFS_FSB_TO_B(mp, 1));
|
|
||||||
|
|
||||||
if (xfs_has_parent(mp)) {
|
if (xfs_has_parent(mp)) {
|
||||||
t3 = resp->tr_attrrm.tr_logres;
|
t3 = resp->tr_attrrm.tr_logres;
|
||||||
@ -1181,6 +1290,15 @@ xfs_calc_namespace_reservations(
|
|||||||
resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
|
resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
STATIC void
|
||||||
|
xfs_calc_default_atomic_ioend_reservation(
|
||||||
|
struct xfs_mount *mp,
|
||||||
|
struct xfs_trans_resv *resp)
|
||||||
|
{
|
||||||
|
/* Pick a default that will scale reasonably for the log size. */
|
||||||
|
resp->tr_atomic_ioend = resp->tr_itruncate;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
xfs_trans_resv_calc(
|
xfs_trans_resv_calc(
|
||||||
struct xfs_mount *mp,
|
struct xfs_mount *mp,
|
||||||
@ -1275,4 +1393,167 @@ xfs_trans_resv_calc(
|
|||||||
resp->tr_itruncate.tr_logcount += logcount_adj;
|
resp->tr_itruncate.tr_logcount += logcount_adj;
|
||||||
resp->tr_write.tr_logcount += logcount_adj;
|
resp->tr_write.tr_logcount += logcount_adj;
|
||||||
resp->tr_qm_dqalloc.tr_logcount += logcount_adj;
|
resp->tr_qm_dqalloc.tr_logcount += logcount_adj;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Now that we've finished computing the static reservations, we can
|
||||||
|
* compute the dynamic reservation for atomic writes.
|
||||||
|
*/
|
||||||
|
xfs_calc_default_atomic_ioend_reservation(mp, resp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return the per-extent and fixed transaction reservation sizes needed to
|
||||||
|
* complete an atomic write.
|
||||||
|
*/
|
||||||
|
STATIC unsigned int
|
||||||
|
xfs_calc_atomic_write_ioend_geometry(
|
||||||
|
struct xfs_mount *mp,
|
||||||
|
unsigned int *step_size)
|
||||||
|
{
|
||||||
|
const unsigned int efi = xfs_efi_log_space(1);
|
||||||
|
const unsigned int efd = xfs_efd_log_space(1);
|
||||||
|
const unsigned int rui = xfs_rui_log_space(1);
|
||||||
|
const unsigned int rud = xfs_rud_log_space();
|
||||||
|
const unsigned int cui = xfs_cui_log_space(1);
|
||||||
|
const unsigned int cud = xfs_cud_log_space();
|
||||||
|
const unsigned int bui = xfs_bui_log_space(1);
|
||||||
|
const unsigned int bud = xfs_bud_log_space();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Maximum overhead to complete an atomic write ioend in software:
|
||||||
|
* remove data fork extent + remove cow fork extent + map extent into
|
||||||
|
* data fork.
|
||||||
|
*
|
||||||
|
* tx0: Creates a BUI and a CUI and that's all it needs.
|
||||||
|
*
|
||||||
|
* tx1: Roll to finish the BUI. Need space for the BUD, an RUI, and
|
||||||
|
* enough space to relog the CUI (== CUI + CUD).
|
||||||
|
*
|
||||||
|
* tx2: Roll again to finish the RUI. Need space for the RUD and space
|
||||||
|
* to relog the CUI.
|
||||||
|
*
|
||||||
|
* tx3: Roll again, need space for the CUD and possibly a new EFI.
|
||||||
|
*
|
||||||
|
* tx4: Roll again, need space for an EFD.
|
||||||
|
*
|
||||||
|
* If the extent referenced by the pair of BUI/CUI items is not the one
|
||||||
|
* being currently processed, then we need to reserve space to relog
|
||||||
|
* both items.
|
||||||
|
*/
|
||||||
|
const unsigned int tx0 = bui + cui;
|
||||||
|
const unsigned int tx1 = bud + rui + cui + cud;
|
||||||
|
const unsigned int tx2 = rud + cui + cud;
|
||||||
|
const unsigned int tx3 = cud + efi;
|
||||||
|
const unsigned int tx4 = efd;
|
||||||
|
const unsigned int relog = bui + bud + cui + cud;
|
||||||
|
|
||||||
|
const unsigned int per_intent = max(max3(tx0, tx1, tx2),
|
||||||
|
max3(tx3, tx4, relog));
|
||||||
|
|
||||||
|
/* Overhead to finish one step of each intent item type */
|
||||||
|
const unsigned int f1 = xfs_calc_finish_efi_reservation(mp, 1);
|
||||||
|
const unsigned int f2 = xfs_calc_finish_rui_reservation(mp, 1);
|
||||||
|
const unsigned int f3 = xfs_calc_finish_cui_reservation(mp, 1);
|
||||||
|
const unsigned int f4 = xfs_calc_finish_bui_reservation(mp, 1);
|
||||||
|
|
||||||
|
/* We only finish one item per transaction in a chain */
|
||||||
|
*step_size = max(f4, max3(f1, f2, f3));
|
||||||
|
|
||||||
|
return per_intent;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Compute the maximum size (in fsblocks) of atomic writes that we can complete
|
||||||
|
* given the existing log reservations.
|
||||||
|
*/
|
||||||
|
xfs_extlen_t
|
||||||
|
xfs_calc_max_atomic_write_fsblocks(
|
||||||
|
struct xfs_mount *mp)
|
||||||
|
{
|
||||||
|
const struct xfs_trans_res *resv = &M_RES(mp)->tr_atomic_ioend;
|
||||||
|
unsigned int per_intent = 0;
|
||||||
|
unsigned int step_size = 0;
|
||||||
|
unsigned int ret = 0;
|
||||||
|
|
||||||
|
if (resv->tr_logres > 0) {
|
||||||
|
per_intent = xfs_calc_atomic_write_ioend_geometry(mp,
|
||||||
|
&step_size);
|
||||||
|
|
||||||
|
if (resv->tr_logres >= step_size)
|
||||||
|
ret = (resv->tr_logres - step_size) / per_intent;
|
||||||
|
}
|
||||||
|
|
||||||
|
trace_xfs_calc_max_atomic_write_fsblocks(mp, per_intent, step_size,
|
||||||
|
resv->tr_logres, ret);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Compute the log blocks and transaction reservation needed to complete an
|
||||||
|
* atomic write of a given number of blocks. Worst case, each block requires
|
||||||
|
* separate handling. A return value of 0 means something went wrong.
|
||||||
|
*/
|
||||||
|
xfs_extlen_t
|
||||||
|
xfs_calc_atomic_write_log_geometry(
|
||||||
|
struct xfs_mount *mp,
|
||||||
|
xfs_extlen_t blockcount,
|
||||||
|
unsigned int *new_logres)
|
||||||
|
{
|
||||||
|
struct xfs_trans_res *curr_res = &M_RES(mp)->tr_atomic_ioend;
|
||||||
|
uint old_logres = curr_res->tr_logres;
|
||||||
|
unsigned int per_intent, step_size;
|
||||||
|
unsigned int logres;
|
||||||
|
xfs_extlen_t min_logblocks;
|
||||||
|
|
||||||
|
ASSERT(blockcount > 0);
|
||||||
|
|
||||||
|
xfs_calc_default_atomic_ioend_reservation(mp, M_RES(mp));
|
||||||
|
|
||||||
|
per_intent = xfs_calc_atomic_write_ioend_geometry(mp, &step_size);
|
||||||
|
|
||||||
|
/* Check for overflows */
|
||||||
|
if (check_mul_overflow(blockcount, per_intent, &logres) ||
|
||||||
|
check_add_overflow(logres, step_size, &logres))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
curr_res->tr_logres = logres;
|
||||||
|
min_logblocks = xfs_log_calc_minimum_size(mp);
|
||||||
|
curr_res->tr_logres = old_logres;
|
||||||
|
|
||||||
|
trace_xfs_calc_max_atomic_write_log_geometry(mp, per_intent, step_size,
|
||||||
|
blockcount, min_logblocks, logres);
|
||||||
|
|
||||||
|
*new_logres = logres;
|
||||||
|
return min_logblocks;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Compute the transaction reservation needed to complete an out of place
|
||||||
|
* atomic write of a given number of blocks.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
xfs_calc_atomic_write_reservation(
|
||||||
|
struct xfs_mount *mp,
|
||||||
|
xfs_extlen_t blockcount)
|
||||||
|
{
|
||||||
|
unsigned int new_logres;
|
||||||
|
xfs_extlen_t min_logblocks;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the caller doesn't ask for a specific atomic write size, then
|
||||||
|
* use the defaults.
|
||||||
|
*/
|
||||||
|
if (blockcount == 0) {
|
||||||
|
xfs_calc_default_atomic_ioend_reservation(mp, M_RES(mp));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
min_logblocks = xfs_calc_atomic_write_log_geometry(mp, blockcount,
|
||||||
|
&new_logres);
|
||||||
|
if (!min_logblocks || min_logblocks > mp->m_sb.sb_logblocks)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
M_RES(mp)->tr_atomic_ioend.tr_logres = new_logres;
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -48,6 +48,7 @@ struct xfs_trans_resv {
|
|||||||
struct xfs_trans_res tr_qm_dqalloc; /* allocate quota on disk */
|
struct xfs_trans_res tr_qm_dqalloc; /* allocate quota on disk */
|
||||||
struct xfs_trans_res tr_sb; /* modify superblock */
|
struct xfs_trans_res tr_sb; /* modify superblock */
|
||||||
struct xfs_trans_res tr_fsyncts; /* update timestamps on fsync */
|
struct xfs_trans_res tr_fsyncts; /* update timestamps on fsync */
|
||||||
|
struct xfs_trans_res tr_atomic_ioend; /* untorn write completion */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* shorthand way of accessing reservation structure */
|
/* shorthand way of accessing reservation structure */
|
||||||
@ -98,8 +99,32 @@ struct xfs_trans_resv {
|
|||||||
void xfs_trans_resv_calc(struct xfs_mount *mp, struct xfs_trans_resv *resp);
|
void xfs_trans_resv_calc(struct xfs_mount *mp, struct xfs_trans_resv *resp);
|
||||||
uint xfs_allocfree_block_count(struct xfs_mount *mp, uint num_ops);
|
uint xfs_allocfree_block_count(struct xfs_mount *mp, uint num_ops);
|
||||||
|
|
||||||
|
unsigned int xfs_calc_finish_bui_reservation(struct xfs_mount *mp,
|
||||||
|
unsigned int nr_ops);
|
||||||
|
|
||||||
|
unsigned int xfs_calc_finish_efi_reservation(struct xfs_mount *mp,
|
||||||
|
unsigned int nr_ops);
|
||||||
|
unsigned int xfs_calc_finish_rt_efi_reservation(struct xfs_mount *mp,
|
||||||
|
unsigned int nr_ops);
|
||||||
|
|
||||||
|
unsigned int xfs_calc_finish_rui_reservation(struct xfs_mount *mp,
|
||||||
|
unsigned int nr_ops);
|
||||||
|
unsigned int xfs_calc_finish_rt_rui_reservation(struct xfs_mount *mp,
|
||||||
|
unsigned int nr_ops);
|
||||||
|
|
||||||
|
unsigned int xfs_calc_finish_cui_reservation(struct xfs_mount *mp,
|
||||||
|
unsigned int nr_ops);
|
||||||
|
unsigned int xfs_calc_finish_rt_cui_reservation(struct xfs_mount *mp,
|
||||||
|
unsigned int nr_ops);
|
||||||
|
|
||||||
unsigned int xfs_calc_itruncate_reservation_minlogsize(struct xfs_mount *mp);
|
unsigned int xfs_calc_itruncate_reservation_minlogsize(struct xfs_mount *mp);
|
||||||
unsigned int xfs_calc_write_reservation_minlogsize(struct xfs_mount *mp);
|
unsigned int xfs_calc_write_reservation_minlogsize(struct xfs_mount *mp);
|
||||||
unsigned int xfs_calc_qm_dqalloc_reservation_minlogsize(struct xfs_mount *mp);
|
unsigned int xfs_calc_qm_dqalloc_reservation_minlogsize(struct xfs_mount *mp);
|
||||||
|
|
||||||
|
xfs_extlen_t xfs_calc_max_atomic_write_fsblocks(struct xfs_mount *mp);
|
||||||
|
xfs_extlen_t xfs_calc_atomic_write_log_geometry(struct xfs_mount *mp,
|
||||||
|
xfs_extlen_t blockcount, unsigned int *new_logres);
|
||||||
|
int xfs_calc_atomic_write_reservation(struct xfs_mount *mp,
|
||||||
|
xfs_extlen_t blockcount);
|
||||||
|
|
||||||
#endif /* __XFS_TRANS_RESV_H__ */
|
#endif /* __XFS_TRANS_RESV_H__ */
|
||||||
|
@ -680,8 +680,6 @@ xfs_scrub_metadata(
|
|||||||
if (error)
|
if (error)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
xfs_warn_experimental(mp, XFS_EXPERIMENTAL_SCRUB);
|
|
||||||
|
|
||||||
sc = kzalloc(sizeof(struct xfs_scrub), XCHK_GFP_FLAGS);
|
sc = kzalloc(sizeof(struct xfs_scrub), XCHK_GFP_FLAGS);
|
||||||
if (!sc) {
|
if (!sc) {
|
||||||
error = -ENOMEM;
|
error = -ENOMEM;
|
||||||
|
@ -77,6 +77,11 @@ xfs_bui_item_size(
|
|||||||
*nbytes += xfs_bui_log_format_sizeof(buip->bui_format.bui_nextents);
|
*nbytes += xfs_bui_log_format_sizeof(buip->bui_format.bui_nextents);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned int xfs_bui_log_space(unsigned int nr)
|
||||||
|
{
|
||||||
|
return xlog_item_space(1, xfs_bui_log_format_sizeof(nr));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is called to fill in the vector of log iovecs for the
|
* This is called to fill in the vector of log iovecs for the
|
||||||
* given bui log item. We use only 1 iovec, and we point that
|
* given bui log item. We use only 1 iovec, and we point that
|
||||||
@ -168,6 +173,11 @@ xfs_bud_item_size(
|
|||||||
*nbytes += sizeof(struct xfs_bud_log_format);
|
*nbytes += sizeof(struct xfs_bud_log_format);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned int xfs_bud_log_space(void)
|
||||||
|
{
|
||||||
|
return xlog_item_space(1, sizeof(struct xfs_bud_log_format));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is called to fill in the vector of log iovecs for the
|
* This is called to fill in the vector of log iovecs for the
|
||||||
* given bud log item. We use only 1 iovec, and we point that
|
* given bud log item. We use only 1 iovec, and we point that
|
||||||
|
@ -72,4 +72,7 @@ struct xfs_bmap_intent;
|
|||||||
|
|
||||||
void xfs_bmap_defer_add(struct xfs_trans *tp, struct xfs_bmap_intent *bi);
|
void xfs_bmap_defer_add(struct xfs_trans *tp, struct xfs_bmap_intent *bi);
|
||||||
|
|
||||||
|
unsigned int xfs_bui_log_space(unsigned int nr);
|
||||||
|
unsigned int xfs_bud_log_space(void);
|
||||||
|
|
||||||
#endif /* __XFS_BMAP_ITEM_H__ */
|
#endif /* __XFS_BMAP_ITEM_H__ */
|
||||||
|
@ -1687,23 +1687,65 @@ xfs_free_buftarg(
|
|||||||
kfree(btp);
|
kfree(btp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Configure this buffer target for hardware-assisted atomic writes if the
|
||||||
|
* underlying block device supports is congruent with the filesystem geometry.
|
||||||
|
*/
|
||||||
|
static inline void
|
||||||
|
xfs_configure_buftarg_atomic_writes(
|
||||||
|
struct xfs_buftarg *btp)
|
||||||
|
{
|
||||||
|
struct xfs_mount *mp = btp->bt_mount;
|
||||||
|
unsigned int min_bytes, max_bytes;
|
||||||
|
|
||||||
|
min_bytes = bdev_atomic_write_unit_min_bytes(btp->bt_bdev);
|
||||||
|
max_bytes = bdev_atomic_write_unit_max_bytes(btp->bt_bdev);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Ignore atomic write geometry that is nonsense or doesn't even cover
|
||||||
|
* a single fsblock.
|
||||||
|
*/
|
||||||
|
if (min_bytes > max_bytes ||
|
||||||
|
min_bytes > mp->m_sb.sb_blocksize ||
|
||||||
|
max_bytes < mp->m_sb.sb_blocksize) {
|
||||||
|
min_bytes = 0;
|
||||||
|
max_bytes = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
btp->bt_bdev_awu_min = min_bytes;
|
||||||
|
btp->bt_bdev_awu_max = max_bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Configure a buffer target that abstracts a block device. */
|
||||||
int
|
int
|
||||||
xfs_setsize_buftarg(
|
xfs_configure_buftarg(
|
||||||
struct xfs_buftarg *btp,
|
struct xfs_buftarg *btp,
|
||||||
unsigned int sectorsize)
|
unsigned int sectorsize)
|
||||||
{
|
{
|
||||||
|
int error;
|
||||||
|
|
||||||
|
ASSERT(btp->bt_bdev != NULL);
|
||||||
|
|
||||||
/* Set up metadata sector size info */
|
/* Set up metadata sector size info */
|
||||||
btp->bt_meta_sectorsize = sectorsize;
|
btp->bt_meta_sectorsize = sectorsize;
|
||||||
btp->bt_meta_sectormask = sectorsize - 1;
|
btp->bt_meta_sectormask = sectorsize - 1;
|
||||||
|
|
||||||
if (set_blocksize(btp->bt_bdev_file, sectorsize)) {
|
error = bdev_validate_blocksize(btp->bt_bdev, sectorsize);
|
||||||
|
if (error) {
|
||||||
xfs_warn(btp->bt_mount,
|
xfs_warn(btp->bt_mount,
|
||||||
"Cannot set_blocksize to %u on device %pg",
|
"Cannot use blocksize %u on device %pg, err %d",
|
||||||
sectorsize, btp->bt_bdev);
|
sectorsize, btp->bt_bdev, error);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
/*
|
||||||
|
* Flush the block device pagecache so our bios see anything dirtied
|
||||||
|
* before mount.
|
||||||
|
*/
|
||||||
|
if (bdev_can_atomic_write(btp->bt_bdev))
|
||||||
|
xfs_configure_buftarg_atomic_writes(btp);
|
||||||
|
|
||||||
|
return sync_blockdev(btp->bt_bdev);
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
@ -1752,6 +1794,8 @@ xfs_alloc_buftarg(
|
|||||||
{
|
{
|
||||||
struct xfs_buftarg *btp;
|
struct xfs_buftarg *btp;
|
||||||
const struct dax_holder_operations *ops = NULL;
|
const struct dax_holder_operations *ops = NULL;
|
||||||
|
int error;
|
||||||
|
|
||||||
|
|
||||||
#if defined(CONFIG_FS_DAX) && defined(CONFIG_MEMORY_FAILURE)
|
#if defined(CONFIG_FS_DAX) && defined(CONFIG_MEMORY_FAILURE)
|
||||||
ops = &xfs_dax_holder_operations;
|
ops = &xfs_dax_holder_operations;
|
||||||
@ -1765,28 +1809,31 @@ xfs_alloc_buftarg(
|
|||||||
btp->bt_daxdev = fs_dax_get_by_bdev(btp->bt_bdev, &btp->bt_dax_part_off,
|
btp->bt_daxdev = fs_dax_get_by_bdev(btp->bt_bdev, &btp->bt_dax_part_off,
|
||||||
mp, ops);
|
mp, ops);
|
||||||
|
|
||||||
if (bdev_can_atomic_write(btp->bt_bdev)) {
|
/*
|
||||||
btp->bt_bdev_awu_min = bdev_atomic_write_unit_min_bytes(
|
* Flush and invalidate all devices' pagecaches before reading any
|
||||||
btp->bt_bdev);
|
* metadata because XFS doesn't use the bdev pagecache.
|
||||||
btp->bt_bdev_awu_max = bdev_atomic_write_unit_max_bytes(
|
*/
|
||||||
btp->bt_bdev);
|
error = sync_blockdev(btp->bt_bdev);
|
||||||
}
|
if (error)
|
||||||
|
goto error_free;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When allocating the buftargs we have not yet read the super block and
|
* When allocating the buftargs we have not yet read the super block and
|
||||||
* thus don't know the file system sector size yet.
|
* thus don't know the file system sector size yet.
|
||||||
*/
|
*/
|
||||||
if (xfs_setsize_buftarg(btp, bdev_logical_block_size(btp->bt_bdev)))
|
btp->bt_meta_sectorsize = bdev_logical_block_size(btp->bt_bdev);
|
||||||
goto error_free;
|
btp->bt_meta_sectormask = btp->bt_meta_sectorsize - 1;
|
||||||
if (xfs_init_buftarg(btp, bdev_logical_block_size(btp->bt_bdev),
|
|
||||||
mp->m_super->s_id))
|
error = xfs_init_buftarg(btp, btp->bt_meta_sectorsize,
|
||||||
|
mp->m_super->s_id);
|
||||||
|
if (error)
|
||||||
goto error_free;
|
goto error_free;
|
||||||
|
|
||||||
return btp;
|
return btp;
|
||||||
|
|
||||||
error_free:
|
error_free:
|
||||||
kfree(btp);
|
kfree(btp);
|
||||||
return NULL;
|
return ERR_PTR(error);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
|
@ -112,7 +112,7 @@ struct xfs_buftarg {
|
|||||||
struct percpu_counter bt_readahead_count;
|
struct percpu_counter bt_readahead_count;
|
||||||
struct ratelimit_state bt_ioerror_rl;
|
struct ratelimit_state bt_ioerror_rl;
|
||||||
|
|
||||||
/* Atomic write unit values */
|
/* Atomic write unit values, bytes */
|
||||||
unsigned int bt_bdev_awu_min;
|
unsigned int bt_bdev_awu_min;
|
||||||
unsigned int bt_bdev_awu_max;
|
unsigned int bt_bdev_awu_max;
|
||||||
|
|
||||||
@ -374,7 +374,7 @@ struct xfs_buftarg *xfs_alloc_buftarg(struct xfs_mount *mp,
|
|||||||
extern void xfs_free_buftarg(struct xfs_buftarg *);
|
extern void xfs_free_buftarg(struct xfs_buftarg *);
|
||||||
extern void xfs_buftarg_wait(struct xfs_buftarg *);
|
extern void xfs_buftarg_wait(struct xfs_buftarg *);
|
||||||
extern void xfs_buftarg_drain(struct xfs_buftarg *);
|
extern void xfs_buftarg_drain(struct xfs_buftarg *);
|
||||||
extern int xfs_setsize_buftarg(struct xfs_buftarg *, unsigned int);
|
int xfs_configure_buftarg(struct xfs_buftarg *btp, unsigned int sectorsize);
|
||||||
|
|
||||||
#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev)
|
#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev)
|
||||||
#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)
|
#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)
|
||||||
|
@ -103,6 +103,25 @@ xfs_buf_item_size_segment(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Compute the worst case log item overhead for an invalidated buffer with the
|
||||||
|
* given map count and block size.
|
||||||
|
*/
|
||||||
|
unsigned int
|
||||||
|
xfs_buf_inval_log_space(
|
||||||
|
unsigned int map_count,
|
||||||
|
unsigned int blocksize)
|
||||||
|
{
|
||||||
|
unsigned int chunks = DIV_ROUND_UP(blocksize, XFS_BLF_CHUNK);
|
||||||
|
unsigned int bitmap_size = DIV_ROUND_UP(chunks, NBWORD);
|
||||||
|
unsigned int ret =
|
||||||
|
offsetof(struct xfs_buf_log_format, blf_data_map) +
|
||||||
|
(bitmap_size * sizeof_field(struct xfs_buf_log_format,
|
||||||
|
blf_data_map[0]));
|
||||||
|
|
||||||
|
return ret * map_count;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return the number of log iovecs and space needed to log the given buf log
|
* Return the number of log iovecs and space needed to log the given buf log
|
||||||
* item.
|
* item.
|
||||||
|
@ -64,6 +64,9 @@ static inline void xfs_buf_dquot_iodone(struct xfs_buf *bp)
|
|||||||
void xfs_buf_iodone(struct xfs_buf *);
|
void xfs_buf_iodone(struct xfs_buf *);
|
||||||
bool xfs_buf_log_check_iovec(struct xfs_log_iovec *iovec);
|
bool xfs_buf_log_check_iovec(struct xfs_log_iovec *iovec);
|
||||||
|
|
||||||
|
unsigned int xfs_buf_inval_log_space(unsigned int map_count,
|
||||||
|
unsigned int blocksize);
|
||||||
|
|
||||||
extern struct kmem_cache *xfs_buf_item_cache;
|
extern struct kmem_cache *xfs_buf_item_cache;
|
||||||
|
|
||||||
#endif /* __XFS_BUF_ITEM_H__ */
|
#endif /* __XFS_BUF_ITEM_H__ */
|
||||||
|
@ -167,6 +167,14 @@ xfs_discard_extents(
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Care must be taken setting up the trim cursor as the perags may not have been
|
||||||
|
* initialised when the cursor is initialised. e.g. a clean mount which hasn't
|
||||||
|
* read in AGFs and the first operation run on the mounted fs is a trim. This
|
||||||
|
* can result in perag fields that aren't initialised until
|
||||||
|
* xfs_trim_gather_extents() calls xfs_alloc_read_agf() to lock down the AG for
|
||||||
|
* the free space search.
|
||||||
|
*/
|
||||||
struct xfs_trim_cur {
|
struct xfs_trim_cur {
|
||||||
xfs_agblock_t start;
|
xfs_agblock_t start;
|
||||||
xfs_extlen_t count;
|
xfs_extlen_t count;
|
||||||
@ -204,6 +212,14 @@ xfs_trim_gather_extents(
|
|||||||
if (error)
|
if (error)
|
||||||
goto out_trans_cancel;
|
goto out_trans_cancel;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* First time through tcur->count will not have been initialised as
|
||||||
|
* pag->pagf_longest is not guaranteed to be valid before we read
|
||||||
|
* the AGF buffer above.
|
||||||
|
*/
|
||||||
|
if (!tcur->count)
|
||||||
|
tcur->count = pag->pagf_longest;
|
||||||
|
|
||||||
if (tcur->by_bno) {
|
if (tcur->by_bno) {
|
||||||
/* sub-AG discard request always starts at tcur->start */
|
/* sub-AG discard request always starts at tcur->start */
|
||||||
cur = xfs_bnobt_init_cursor(mp, tp, agbp, pag);
|
cur = xfs_bnobt_init_cursor(mp, tp, agbp, pag);
|
||||||
@ -350,7 +366,6 @@ xfs_trim_perag_extents(
|
|||||||
{
|
{
|
||||||
struct xfs_trim_cur tcur = {
|
struct xfs_trim_cur tcur = {
|
||||||
.start = start,
|
.start = start,
|
||||||
.count = pag->pagf_longest,
|
|
||||||
.end = end,
|
.end = end,
|
||||||
.minlen = minlen,
|
.minlen = minlen,
|
||||||
};
|
};
|
||||||
|
@ -83,6 +83,11 @@ xfs_efi_item_size(
|
|||||||
*nbytes += xfs_efi_log_format_sizeof(efip->efi_format.efi_nextents);
|
*nbytes += xfs_efi_log_format_sizeof(efip->efi_format.efi_nextents);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned int xfs_efi_log_space(unsigned int nr)
|
||||||
|
{
|
||||||
|
return xlog_item_space(1, xfs_efi_log_format_sizeof(nr));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is called to fill in the vector of log iovecs for the
|
* This is called to fill in the vector of log iovecs for the
|
||||||
* given efi log item. We use only 1 iovec, and we point that
|
* given efi log item. We use only 1 iovec, and we point that
|
||||||
@ -254,6 +259,11 @@ xfs_efd_item_size(
|
|||||||
*nbytes += xfs_efd_log_format_sizeof(efdp->efd_format.efd_nextents);
|
*nbytes += xfs_efd_log_format_sizeof(efdp->efd_format.efd_nextents);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned int xfs_efd_log_space(unsigned int nr)
|
||||||
|
{
|
||||||
|
return xlog_item_space(1, xfs_efd_log_format_sizeof(nr));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is called to fill in the vector of log iovecs for the
|
* This is called to fill in the vector of log iovecs for the
|
||||||
* given efd log item. We use only 1 iovec, and we point that
|
* given efd log item. We use only 1 iovec, and we point that
|
||||||
|
@ -94,4 +94,7 @@ void xfs_extent_free_defer_add(struct xfs_trans *tp,
|
|||||||
struct xfs_extent_free_item *xefi,
|
struct xfs_extent_free_item *xefi,
|
||||||
struct xfs_defer_pending **dfpp);
|
struct xfs_defer_pending **dfpp);
|
||||||
|
|
||||||
|
unsigned int xfs_efi_log_space(unsigned int nr);
|
||||||
|
unsigned int xfs_efd_log_space(unsigned int nr);
|
||||||
|
|
||||||
#endif /* __XFS_EXTFREE_ITEM_H__ */
|
#endif /* __XFS_EXTFREE_ITEM_H__ */
|
||||||
|
@ -576,6 +576,9 @@ xfs_dio_write_end_io(
|
|||||||
nofs_flag = memalloc_nofs_save();
|
nofs_flag = memalloc_nofs_save();
|
||||||
|
|
||||||
if (flags & IOMAP_DIO_COW) {
|
if (flags & IOMAP_DIO_COW) {
|
||||||
|
if (iocb->ki_flags & IOCB_ATOMIC)
|
||||||
|
error = xfs_reflink_end_atomic_cow(ip, offset, size);
|
||||||
|
else
|
||||||
error = xfs_reflink_end_cow(ip, offset, size);
|
error = xfs_reflink_end_cow(ip, offset, size);
|
||||||
if (error)
|
if (error)
|
||||||
goto out;
|
goto out;
|
||||||
@ -725,6 +728,72 @@ xfs_file_dio_write_zoned(
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Handle block atomic writes
|
||||||
|
*
|
||||||
|
* Two methods of atomic writes are supported:
|
||||||
|
* - REQ_ATOMIC-based, which would typically use some form of HW offload in the
|
||||||
|
* disk
|
||||||
|
* - COW-based, which uses a COW fork as a staging extent for data updates
|
||||||
|
* before atomically updating extent mappings for the range being written
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
static noinline ssize_t
|
||||||
|
xfs_file_dio_write_atomic(
|
||||||
|
struct xfs_inode *ip,
|
||||||
|
struct kiocb *iocb,
|
||||||
|
struct iov_iter *from)
|
||||||
|
{
|
||||||
|
unsigned int iolock = XFS_IOLOCK_SHARED;
|
||||||
|
ssize_t ret, ocount = iov_iter_count(from);
|
||||||
|
const struct iomap_ops *dops;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* HW offload should be faster, so try that first if it is already
|
||||||
|
* known that the write length is not too large.
|
||||||
|
*/
|
||||||
|
if (ocount > xfs_inode_buftarg(ip)->bt_bdev_awu_max)
|
||||||
|
dops = &xfs_atomic_write_cow_iomap_ops;
|
||||||
|
else
|
||||||
|
dops = &xfs_direct_write_iomap_ops;
|
||||||
|
|
||||||
|
retry:
|
||||||
|
ret = xfs_ilock_iocb_for_write(iocb, &iolock);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
ret = xfs_file_write_checks(iocb, from, &iolock, NULL);
|
||||||
|
if (ret)
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
|
/* Demote similar to xfs_file_dio_write_aligned() */
|
||||||
|
if (iolock == XFS_IOLOCK_EXCL) {
|
||||||
|
xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
|
||||||
|
iolock = XFS_IOLOCK_SHARED;
|
||||||
|
}
|
||||||
|
|
||||||
|
trace_xfs_file_direct_write(iocb, from);
|
||||||
|
ret = iomap_dio_rw(iocb, from, dops, &xfs_dio_write_ops,
|
||||||
|
0, NULL, 0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The retry mechanism is based on the ->iomap_begin method returning
|
||||||
|
* -ENOPROTOOPT, which would be when the REQ_ATOMIC-based write is not
|
||||||
|
* possible. The REQ_ATOMIC-based method typically not be possible if
|
||||||
|
* the write spans multiple extents or the disk blocks are misaligned.
|
||||||
|
*/
|
||||||
|
if (ret == -ENOPROTOOPT && dops == &xfs_direct_write_iomap_ops) {
|
||||||
|
xfs_iunlock(ip, iolock);
|
||||||
|
dops = &xfs_atomic_write_cow_iomap_ops;
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
|
|
||||||
|
out_unlock:
|
||||||
|
if (iolock)
|
||||||
|
xfs_iunlock(ip, iolock);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Handle block unaligned direct I/O writes
|
* Handle block unaligned direct I/O writes
|
||||||
*
|
*
|
||||||
@ -840,6 +909,8 @@ xfs_file_dio_write(
|
|||||||
return xfs_file_dio_write_unaligned(ip, iocb, from);
|
return xfs_file_dio_write_unaligned(ip, iocb, from);
|
||||||
if (xfs_is_zoned_inode(ip))
|
if (xfs_is_zoned_inode(ip))
|
||||||
return xfs_file_dio_write_zoned(ip, iocb, from);
|
return xfs_file_dio_write_zoned(ip, iocb, from);
|
||||||
|
if (iocb->ki_flags & IOCB_ATOMIC)
|
||||||
|
return xfs_file_dio_write_atomic(ip, iocb, from);
|
||||||
return xfs_file_dio_write_aligned(ip, iocb, from,
|
return xfs_file_dio_write_aligned(ip, iocb, from,
|
||||||
&xfs_direct_write_iomap_ops, &xfs_dio_write_ops, NULL);
|
&xfs_direct_write_iomap_ops, &xfs_dio_write_ops, NULL);
|
||||||
}
|
}
|
||||||
@ -1032,14 +1103,12 @@ xfs_file_write_iter(
|
|||||||
return xfs_file_dax_write(iocb, from);
|
return xfs_file_dax_write(iocb, from);
|
||||||
|
|
||||||
if (iocb->ki_flags & IOCB_ATOMIC) {
|
if (iocb->ki_flags & IOCB_ATOMIC) {
|
||||||
/*
|
if (ocount < xfs_get_atomic_write_min(ip))
|
||||||
* Currently only atomic writing of a single FS block is
|
|
||||||
* supported. It would be possible to atomic write smaller than
|
|
||||||
* a FS block, but there is no requirement to support this.
|
|
||||||
* Note that iomap also does not support this yet.
|
|
||||||
*/
|
|
||||||
if (ocount != ip->i_mount->m_sb.sb_blocksize)
|
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (ocount > xfs_get_atomic_write_max(ip))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
ret = generic_atomic_write_valid(iocb, from);
|
ret = generic_atomic_write_valid(iocb, from);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
@ -1488,7 +1557,7 @@ xfs_file_open(
|
|||||||
if (xfs_is_shutdown(XFS_M(inode->i_sb)))
|
if (xfs_is_shutdown(XFS_M(inode->i_sb)))
|
||||||
return -EIO;
|
return -EIO;
|
||||||
file->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
|
file->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
|
||||||
if (xfs_inode_can_atomicwrite(XFS_I(inode)))
|
if (xfs_get_atomic_write_min(XFS_I(inode)) > 0)
|
||||||
file->f_mode |= FMODE_CAN_ATOMIC_WRITE;
|
file->f_mode |= FMODE_CAN_ATOMIC_WRITE;
|
||||||
return generic_file_open(inode, file);
|
return generic_file_open(inode, file);
|
||||||
}
|
}
|
||||||
|
@ -304,11 +304,9 @@ xfs_filestream_create_association(
|
|||||||
* for us, so all we need to do here is take another active reference to
|
* for us, so all we need to do here is take another active reference to
|
||||||
* the perag for the cached association.
|
* the perag for the cached association.
|
||||||
*
|
*
|
||||||
* If we fail to store the association, we need to drop the fstrms
|
* If we fail to store the association, we do not need to return an
|
||||||
* counter as well as drop the perag reference we take here for the
|
* error for this failure - as long as we return a referenced AG, the
|
||||||
* item. We do not need to return an error for this failure - as long as
|
* allocation can still go ahead just fine.
|
||||||
* we return a referenced AG, the allocation can still go ahead just
|
|
||||||
* fine.
|
|
||||||
*/
|
*/
|
||||||
item = kmalloc(sizeof(*item), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
|
item = kmalloc(sizeof(*item), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
|
||||||
if (!item)
|
if (!item)
|
||||||
@ -316,14 +314,9 @@ xfs_filestream_create_association(
|
|||||||
|
|
||||||
atomic_inc(&pag_group(args->pag)->xg_active_ref);
|
atomic_inc(&pag_group(args->pag)->xg_active_ref);
|
||||||
item->pag = args->pag;
|
item->pag = args->pag;
|
||||||
error = xfs_mru_cache_insert(mp->m_filestream, pino, &item->mru);
|
xfs_mru_cache_insert(mp->m_filestream, pino, &item->mru);
|
||||||
if (error)
|
|
||||||
goto out_free_item;
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
out_free_item:
|
|
||||||
xfs_perag_rele(item->pag);
|
|
||||||
kfree(item);
|
|
||||||
out_put_fstrms:
|
out_put_fstrms:
|
||||||
atomic_dec(&args->pag->pagf_fstrms);
|
atomic_dec(&args->pag->pagf_fstrms);
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -23,8 +23,6 @@ xfs_param_t xfs_params = {
|
|||||||
.inherit_sync = { 0, 1, 1 },
|
.inherit_sync = { 0, 1, 1 },
|
||||||
.inherit_nodump = { 0, 1, 1 },
|
.inherit_nodump = { 0, 1, 1 },
|
||||||
.inherit_noatim = { 0, 1, 1 },
|
.inherit_noatim = { 0, 1, 1 },
|
||||||
.xfs_buf_timer = { 100/2, 1*100, 30*100 },
|
|
||||||
.xfs_buf_age = { 1*100, 15*100, 7200*100},
|
|
||||||
.inherit_nosym = { 0, 0, 1 },
|
.inherit_nosym = { 0, 0, 1 },
|
||||||
.rotorstep = { 1, 1, 255 },
|
.rotorstep = { 1, 1, 255 },
|
||||||
.inherit_nodfrg = { 0, 1, 1 },
|
.inherit_nodfrg = { 0, 1, 1 },
|
||||||
|
@ -356,19 +356,9 @@ static inline bool xfs_inode_has_bigrtalloc(const struct xfs_inode *ip)
|
|||||||
(XFS_IS_REALTIME_INODE(ip) ? \
|
(XFS_IS_REALTIME_INODE(ip) ? \
|
||||||
(ip)->i_mount->m_rtdev_targp : (ip)->i_mount->m_ddev_targp)
|
(ip)->i_mount->m_rtdev_targp : (ip)->i_mount->m_ddev_targp)
|
||||||
|
|
||||||
static inline bool
|
static inline bool xfs_inode_can_hw_atomic_write(const struct xfs_inode *ip)
|
||||||
xfs_inode_can_atomicwrite(
|
|
||||||
struct xfs_inode *ip)
|
|
||||||
{
|
{
|
||||||
struct xfs_mount *mp = ip->i_mount;
|
return xfs_inode_buftarg(ip)->bt_bdev_awu_max > 0;
|
||||||
struct xfs_buftarg *target = xfs_inode_buftarg(ip);
|
|
||||||
|
|
||||||
if (mp->m_sb.sb_blocksize < target->bt_bdev_awu_min)
|
|
||||||
return false;
|
|
||||||
if (mp->m_sb.sb_blocksize > target->bt_bdev_awu_max)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -798,6 +798,38 @@ imap_spans_range(
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
xfs_bmap_hw_atomic_write_possible(
|
||||||
|
struct xfs_inode *ip,
|
||||||
|
struct xfs_bmbt_irec *imap,
|
||||||
|
xfs_fileoff_t offset_fsb,
|
||||||
|
xfs_fileoff_t end_fsb)
|
||||||
|
{
|
||||||
|
struct xfs_mount *mp = ip->i_mount;
|
||||||
|
xfs_fsize_t len = XFS_FSB_TO_B(mp, end_fsb - offset_fsb);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* atomic writes are required to be naturally aligned for disk blocks,
|
||||||
|
* which ensures that we adhere to block layer rules that we won't
|
||||||
|
* straddle any boundary or violate write alignment requirement.
|
||||||
|
*/
|
||||||
|
if (!IS_ALIGNED(imap->br_startblock, imap->br_blockcount))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Spanning multiple extents would mean that multiple BIOs would be
|
||||||
|
* issued, and so would lose atomicity required for REQ_ATOMIC-based
|
||||||
|
* atomics.
|
||||||
|
*/
|
||||||
|
if (!imap_spans_range(imap, offset_fsb, end_fsb))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The ->iomap_begin caller should ensure this, but check anyway.
|
||||||
|
*/
|
||||||
|
return len <= xfs_inode_buftarg(ip)->bt_bdev_awu_max;
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
xfs_direct_write_iomap_begin(
|
xfs_direct_write_iomap_begin(
|
||||||
struct inode *inode,
|
struct inode *inode,
|
||||||
@ -812,9 +844,11 @@ xfs_direct_write_iomap_begin(
|
|||||||
struct xfs_bmbt_irec imap, cmap;
|
struct xfs_bmbt_irec imap, cmap;
|
||||||
xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
|
xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
|
||||||
xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length);
|
xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length);
|
||||||
|
xfs_fileoff_t orig_end_fsb = end_fsb;
|
||||||
int nimaps = 1, error = 0;
|
int nimaps = 1, error = 0;
|
||||||
bool shared = false;
|
bool shared = false;
|
||||||
u16 iomap_flags = 0;
|
u16 iomap_flags = 0;
|
||||||
|
bool needs_alloc;
|
||||||
unsigned int lockmode;
|
unsigned int lockmode;
|
||||||
u64 seq;
|
u64 seq;
|
||||||
|
|
||||||
@ -875,13 +909,37 @@ relock:
|
|||||||
(flags & IOMAP_DIRECT) || IS_DAX(inode));
|
(flags & IOMAP_DIRECT) || IS_DAX(inode));
|
||||||
if (error)
|
if (error)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
if (shared)
|
if (shared) {
|
||||||
|
if ((flags & IOMAP_ATOMIC) &&
|
||||||
|
!xfs_bmap_hw_atomic_write_possible(ip, &cmap,
|
||||||
|
offset_fsb, end_fsb)) {
|
||||||
|
error = -ENOPROTOOPT;
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
goto out_found_cow;
|
goto out_found_cow;
|
||||||
|
}
|
||||||
end_fsb = imap.br_startoff + imap.br_blockcount;
|
end_fsb = imap.br_startoff + imap.br_blockcount;
|
||||||
length = XFS_FSB_TO_B(mp, end_fsb) - offset;
|
length = XFS_FSB_TO_B(mp, end_fsb) - offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (imap_needs_alloc(inode, flags, &imap, nimaps))
|
needs_alloc = imap_needs_alloc(inode, flags, &imap, nimaps);
|
||||||
|
|
||||||
|
if (flags & IOMAP_ATOMIC) {
|
||||||
|
error = -ENOPROTOOPT;
|
||||||
|
/*
|
||||||
|
* If we allocate less than what is required for the write
|
||||||
|
* then we may end up with multiple extents, which means that
|
||||||
|
* REQ_ATOMIC-based cannot be used, so avoid this possibility.
|
||||||
|
*/
|
||||||
|
if (needs_alloc && orig_end_fsb - offset_fsb > 1)
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
|
if (!xfs_bmap_hw_atomic_write_possible(ip, &imap, offset_fsb,
|
||||||
|
orig_end_fsb))
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (needs_alloc)
|
||||||
goto allocate_blocks;
|
goto allocate_blocks;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1022,6 +1080,134 @@ const struct iomap_ops xfs_zoned_direct_write_iomap_ops = {
|
|||||||
};
|
};
|
||||||
#endif /* CONFIG_XFS_RT */
|
#endif /* CONFIG_XFS_RT */
|
||||||
|
|
||||||
|
static int
|
||||||
|
xfs_atomic_write_cow_iomap_begin(
|
||||||
|
struct inode *inode,
|
||||||
|
loff_t offset,
|
||||||
|
loff_t length,
|
||||||
|
unsigned flags,
|
||||||
|
struct iomap *iomap,
|
||||||
|
struct iomap *srcmap)
|
||||||
|
{
|
||||||
|
struct xfs_inode *ip = XFS_I(inode);
|
||||||
|
struct xfs_mount *mp = ip->i_mount;
|
||||||
|
const xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
|
||||||
|
xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length);
|
||||||
|
xfs_filblks_t count_fsb = end_fsb - offset_fsb;
|
||||||
|
int nmaps = 1;
|
||||||
|
xfs_filblks_t resaligned;
|
||||||
|
struct xfs_bmbt_irec cmap;
|
||||||
|
struct xfs_iext_cursor icur;
|
||||||
|
struct xfs_trans *tp;
|
||||||
|
unsigned int dblocks = 0, rblocks = 0;
|
||||||
|
int error;
|
||||||
|
u64 seq;
|
||||||
|
|
||||||
|
ASSERT(flags & IOMAP_WRITE);
|
||||||
|
ASSERT(flags & IOMAP_DIRECT);
|
||||||
|
|
||||||
|
if (xfs_is_shutdown(mp))
|
||||||
|
return -EIO;
|
||||||
|
|
||||||
|
if (!xfs_can_sw_atomic_write(mp)) {
|
||||||
|
ASSERT(xfs_can_sw_atomic_write(mp));
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* blocks are always allocated in this path */
|
||||||
|
if (flags & IOMAP_NOWAIT)
|
||||||
|
return -EAGAIN;
|
||||||
|
|
||||||
|
trace_xfs_iomap_atomic_write_cow(ip, offset, length);
|
||||||
|
|
||||||
|
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||||
|
|
||||||
|
if (!ip->i_cowfp) {
|
||||||
|
ASSERT(!xfs_is_reflink_inode(ip));
|
||||||
|
xfs_ifork_init_cow(ip);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap))
|
||||||
|
cmap.br_startoff = end_fsb;
|
||||||
|
if (cmap.br_startoff <= offset_fsb) {
|
||||||
|
xfs_trim_extent(&cmap, offset_fsb, count_fsb);
|
||||||
|
goto found;
|
||||||
|
}
|
||||||
|
|
||||||
|
end_fsb = cmap.br_startoff;
|
||||||
|
count_fsb = end_fsb - offset_fsb;
|
||||||
|
|
||||||
|
resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb,
|
||||||
|
xfs_get_cowextsz_hint(ip));
|
||||||
|
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||||
|
|
||||||
|
if (XFS_IS_REALTIME_INODE(ip)) {
|
||||||
|
dblocks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
|
||||||
|
rblocks = resaligned;
|
||||||
|
} else {
|
||||||
|
dblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
|
||||||
|
rblocks = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, dblocks,
|
||||||
|
rblocks, false, &tp);
|
||||||
|
if (error)
|
||||||
|
return error;
|
||||||
|
|
||||||
|
/* extent layout could have changed since the unlock, so check again */
|
||||||
|
if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap))
|
||||||
|
cmap.br_startoff = end_fsb;
|
||||||
|
if (cmap.br_startoff <= offset_fsb) {
|
||||||
|
xfs_trim_extent(&cmap, offset_fsb, count_fsb);
|
||||||
|
xfs_trans_cancel(tp);
|
||||||
|
goto found;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allocate the entire reservation as unwritten blocks.
|
||||||
|
*
|
||||||
|
* Use XFS_BMAPI_EXTSZALIGN to hint at aligning new extents according to
|
||||||
|
* extszhint, such that there will be a greater chance that future
|
||||||
|
* atomic writes to that same range will be aligned (and don't require
|
||||||
|
* this COW-based method).
|
||||||
|
*/
|
||||||
|
error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
|
||||||
|
XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC |
|
||||||
|
XFS_BMAPI_EXTSZALIGN, 0, &cmap, &nmaps);
|
||||||
|
if (error) {
|
||||||
|
xfs_trans_cancel(tp);
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
xfs_inode_set_cowblocks_tag(ip);
|
||||||
|
error = xfs_trans_commit(tp);
|
||||||
|
if (error)
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
|
found:
|
||||||
|
if (cmap.br_state != XFS_EXT_NORM) {
|
||||||
|
error = xfs_reflink_convert_cow_locked(ip, offset_fsb,
|
||||||
|
count_fsb);
|
||||||
|
if (error)
|
||||||
|
goto out_unlock;
|
||||||
|
cmap.br_state = XFS_EXT_NORM;
|
||||||
|
}
|
||||||
|
|
||||||
|
length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount);
|
||||||
|
trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap);
|
||||||
|
seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
|
||||||
|
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||||
|
return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq);
|
||||||
|
|
||||||
|
out_unlock:
|
||||||
|
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
const struct iomap_ops xfs_atomic_write_cow_iomap_ops = {
|
||||||
|
.iomap_begin = xfs_atomic_write_cow_iomap_begin,
|
||||||
|
};
|
||||||
|
|
||||||
static int
|
static int
|
||||||
xfs_dax_write_iomap_end(
|
xfs_dax_write_iomap_end(
|
||||||
struct inode *inode,
|
struct inode *inode,
|
||||||
|
@ -56,5 +56,6 @@ extern const struct iomap_ops xfs_read_iomap_ops;
|
|||||||
extern const struct iomap_ops xfs_seek_iomap_ops;
|
extern const struct iomap_ops xfs_seek_iomap_ops;
|
||||||
extern const struct iomap_ops xfs_xattr_iomap_ops;
|
extern const struct iomap_ops xfs_xattr_iomap_ops;
|
||||||
extern const struct iomap_ops xfs_dax_write_iomap_ops;
|
extern const struct iomap_ops xfs_dax_write_iomap_ops;
|
||||||
|
extern const struct iomap_ops xfs_atomic_write_cow_iomap_ops;
|
||||||
|
|
||||||
#endif /* __XFS_IOMAP_H__*/
|
#endif /* __XFS_IOMAP_H__*/
|
||||||
|
@ -601,16 +601,82 @@ xfs_report_dioalign(
|
|||||||
stat->dio_offset_align = stat->dio_read_offset_align;
|
stat->dio_offset_align = stat->dio_read_offset_align;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned int
|
||||||
|
xfs_get_atomic_write_min(
|
||||||
|
struct xfs_inode *ip)
|
||||||
|
{
|
||||||
|
struct xfs_mount *mp = ip->i_mount;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we can complete an atomic write via atomic out of place writes,
|
||||||
|
* then advertise a minimum size of one fsblock. Without this
|
||||||
|
* mechanism, we can only guarantee atomic writes up to a single LBA.
|
||||||
|
*
|
||||||
|
* If out of place writes are not available, we can guarantee an atomic
|
||||||
|
* write of exactly one single fsblock if the bdev will make that
|
||||||
|
* guarantee for us.
|
||||||
|
*/
|
||||||
|
if (xfs_inode_can_hw_atomic_write(ip) || xfs_can_sw_atomic_write(mp))
|
||||||
|
return mp->m_sb.sb_blocksize;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int
|
||||||
|
xfs_get_atomic_write_max(
|
||||||
|
struct xfs_inode *ip)
|
||||||
|
{
|
||||||
|
struct xfs_mount *mp = ip->i_mount;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If out of place writes are not available, we can guarantee an atomic
|
||||||
|
* write of exactly one single fsblock if the bdev will make that
|
||||||
|
* guarantee for us.
|
||||||
|
*/
|
||||||
|
if (!xfs_can_sw_atomic_write(mp)) {
|
||||||
|
if (xfs_inode_can_hw_atomic_write(ip))
|
||||||
|
return mp->m_sb.sb_blocksize;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we can complete an atomic write via atomic out of place writes,
|
||||||
|
* then advertise a maximum size of whatever we can complete through
|
||||||
|
* that means. Hardware support is reported via max_opt, not here.
|
||||||
|
*/
|
||||||
|
if (XFS_IS_REALTIME_INODE(ip))
|
||||||
|
return XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_RTG].awu_max);
|
||||||
|
return XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_AG].awu_max);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int
|
||||||
|
xfs_get_atomic_write_max_opt(
|
||||||
|
struct xfs_inode *ip)
|
||||||
|
{
|
||||||
|
unsigned int awu_max = xfs_get_atomic_write_max(ip);
|
||||||
|
|
||||||
|
/* if the max is 1x block, then just keep behaviour that opt is 0 */
|
||||||
|
if (awu_max <= ip->i_mount->m_sb.sb_blocksize)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Advertise the maximum size of an atomic write that we can tell the
|
||||||
|
* block device to perform for us. In general the bdev limit will be
|
||||||
|
* less than our out of place write limit, but we don't want to exceed
|
||||||
|
* the awu_max.
|
||||||
|
*/
|
||||||
|
return min(awu_max, xfs_inode_buftarg(ip)->bt_bdev_awu_max);
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
xfs_report_atomic_write(
|
xfs_report_atomic_write(
|
||||||
struct xfs_inode *ip,
|
struct xfs_inode *ip,
|
||||||
struct kstat *stat)
|
struct kstat *stat)
|
||||||
{
|
{
|
||||||
unsigned int unit_min = 0, unit_max = 0;
|
generic_fill_statx_atomic_writes(stat,
|
||||||
|
xfs_get_atomic_write_min(ip),
|
||||||
if (xfs_inode_can_atomicwrite(ip))
|
xfs_get_atomic_write_max(ip),
|
||||||
unit_min = unit_max = ip->i_mount->m_sb.sb_blocksize;
|
xfs_get_atomic_write_max_opt(ip));
|
||||||
generic_fill_statx_atomic_writes(stat, unit_min, unit_max);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
STATIC int
|
STATIC int
|
||||||
|
@ -19,5 +19,8 @@ int xfs_inode_init_security(struct inode *inode, struct inode *dir,
|
|||||||
extern void xfs_setup_inode(struct xfs_inode *ip);
|
extern void xfs_setup_inode(struct xfs_inode *ip);
|
||||||
extern void xfs_setup_iops(struct xfs_inode *ip);
|
extern void xfs_setup_iops(struct xfs_inode *ip);
|
||||||
extern void xfs_diflags_to_iflags(struct xfs_inode *ip, bool init);
|
extern void xfs_diflags_to_iflags(struct xfs_inode *ip, bool init);
|
||||||
|
unsigned int xfs_get_atomic_write_min(struct xfs_inode *ip);
|
||||||
|
unsigned int xfs_get_atomic_write_max(struct xfs_inode *ip);
|
||||||
|
unsigned int xfs_get_atomic_write_max_opt(struct xfs_inode *ip);
|
||||||
|
|
||||||
#endif /* __XFS_IOPS_H__ */
|
#endif /* __XFS_IOPS_H__ */
|
||||||
|
@ -309,9 +309,7 @@ xlog_cil_alloc_shadow_bufs(
|
|||||||
* Then round nbytes up to 64-bit alignment so that the initial
|
* Then round nbytes up to 64-bit alignment so that the initial
|
||||||
* buffer alignment is easy to calculate and verify.
|
* buffer alignment is easy to calculate and verify.
|
||||||
*/
|
*/
|
||||||
nbytes += niovecs *
|
nbytes = xlog_item_space(niovecs, nbytes);
|
||||||
(sizeof(uint64_t) + sizeof(struct xlog_op_header));
|
|
||||||
nbytes = round_up(nbytes, sizeof(uint64_t));
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The data buffer needs to start 64-bit aligned, so round up
|
* The data buffer needs to start 64-bit aligned, so round up
|
||||||
|
@ -698,4 +698,17 @@ xlog_kvmalloc(
|
|||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Given a count of iovecs and space for a log item, compute the space we need
|
||||||
|
* in the log to store that data plus the log headers.
|
||||||
|
*/
|
||||||
|
static inline unsigned int
|
||||||
|
xlog_item_space(
|
||||||
|
unsigned int niovecs,
|
||||||
|
unsigned int nbytes)
|
||||||
|
{
|
||||||
|
nbytes += niovecs * (sizeof(uint64_t) + sizeof(struct xlog_op_header));
|
||||||
|
return round_up(nbytes, sizeof(uint64_t));
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* __XFS_LOG_PRIV_H__ */
|
#endif /* __XFS_LOG_PRIV_H__ */
|
||||||
|
@ -141,14 +141,6 @@ xfs_warn_experimental(
|
|||||||
const char *name;
|
const char *name;
|
||||||
long opstate;
|
long opstate;
|
||||||
} features[] = {
|
} features[] = {
|
||||||
[XFS_EXPERIMENTAL_PNFS] = {
|
|
||||||
.opstate = XFS_OPSTATE_WARNED_PNFS,
|
|
||||||
.name = "pNFS",
|
|
||||||
},
|
|
||||||
[XFS_EXPERIMENTAL_SCRUB] = {
|
|
||||||
.opstate = XFS_OPSTATE_WARNED_SCRUB,
|
|
||||||
.name = "online scrub",
|
|
||||||
},
|
|
||||||
[XFS_EXPERIMENTAL_SHRINK] = {
|
[XFS_EXPERIMENTAL_SHRINK] = {
|
||||||
.opstate = XFS_OPSTATE_WARNED_SHRINK,
|
.opstate = XFS_OPSTATE_WARNED_SHRINK,
|
||||||
.name = "online shrink",
|
.name = "online shrink",
|
||||||
@ -161,14 +153,6 @@ xfs_warn_experimental(
|
|||||||
.opstate = XFS_OPSTATE_WARNED_LBS,
|
.opstate = XFS_OPSTATE_WARNED_LBS,
|
||||||
.name = "large block size",
|
.name = "large block size",
|
||||||
},
|
},
|
||||||
[XFS_EXPERIMENTAL_EXCHRANGE] = {
|
|
||||||
.opstate = XFS_OPSTATE_WARNED_EXCHRANGE,
|
|
||||||
.name = "exchange range",
|
|
||||||
},
|
|
||||||
[XFS_EXPERIMENTAL_PPTR] = {
|
|
||||||
.opstate = XFS_OPSTATE_WARNED_PPTR,
|
|
||||||
.name = "parent pointer",
|
|
||||||
},
|
|
||||||
[XFS_EXPERIMENTAL_METADIR] = {
|
[XFS_EXPERIMENTAL_METADIR] = {
|
||||||
.opstate = XFS_OPSTATE_WARNED_METADIR,
|
.opstate = XFS_OPSTATE_WARNED_METADIR,
|
||||||
.name = "metadata directory tree",
|
.name = "metadata directory tree",
|
||||||
|
@ -91,13 +91,9 @@ void xfs_buf_alert_ratelimited(struct xfs_buf *bp, const char *rlmsg,
|
|||||||
const char *fmt, ...);
|
const char *fmt, ...);
|
||||||
|
|
||||||
enum xfs_experimental_feat {
|
enum xfs_experimental_feat {
|
||||||
XFS_EXPERIMENTAL_PNFS,
|
|
||||||
XFS_EXPERIMENTAL_SCRUB,
|
|
||||||
XFS_EXPERIMENTAL_SHRINK,
|
XFS_EXPERIMENTAL_SHRINK,
|
||||||
XFS_EXPERIMENTAL_LARP,
|
XFS_EXPERIMENTAL_LARP,
|
||||||
XFS_EXPERIMENTAL_LBS,
|
XFS_EXPERIMENTAL_LBS,
|
||||||
XFS_EXPERIMENTAL_EXCHRANGE,
|
|
||||||
XFS_EXPERIMENTAL_PPTR,
|
|
||||||
XFS_EXPERIMENTAL_METADIR,
|
XFS_EXPERIMENTAL_METADIR,
|
||||||
XFS_EXPERIMENTAL_ZONED,
|
XFS_EXPERIMENTAL_ZONED,
|
||||||
|
|
||||||
|
@ -666,6 +666,158 @@ xfs_agbtree_compute_maxlevels(
|
|||||||
mp->m_agbtree_maxlevels = max(levels, mp->m_refc_maxlevels);
|
mp->m_agbtree_maxlevels = max(levels, mp->m_refc_maxlevels);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Maximum atomic write IO size that the kernel allows. */
|
||||||
|
static inline xfs_extlen_t xfs_calc_atomic_write_max(struct xfs_mount *mp)
|
||||||
|
{
|
||||||
|
return rounddown_pow_of_two(XFS_B_TO_FSB(mp, MAX_RW_COUNT));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned int max_pow_of_two_factor(const unsigned int nr)
|
||||||
|
{
|
||||||
|
return 1 << (ffs(nr) - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the data device advertises atomic write support, limit the size of data
|
||||||
|
* device atomic writes to the greatest power-of-two factor of the AG size so
|
||||||
|
* that every atomic write unit aligns with the start of every AG. This is
|
||||||
|
* required so that the per-AG allocations for an atomic write will always be
|
||||||
|
* aligned compatibly with the alignment requirements of the storage.
|
||||||
|
*
|
||||||
|
* If the data device doesn't advertise atomic writes, then there are no
|
||||||
|
* alignment restrictions and the largest out-of-place write we can do
|
||||||
|
* ourselves is the number of blocks that user files can allocate from any AG.
|
||||||
|
*/
|
||||||
|
static inline xfs_extlen_t xfs_calc_perag_awu_max(struct xfs_mount *mp)
|
||||||
|
{
|
||||||
|
if (mp->m_ddev_targp->bt_bdev_awu_min > 0)
|
||||||
|
return max_pow_of_two_factor(mp->m_sb.sb_agblocks);
|
||||||
|
return rounddown_pow_of_two(mp->m_ag_max_usable);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reflink on the realtime device requires rtgroups, and atomic writes require
|
||||||
|
* reflink.
|
||||||
|
*
|
||||||
|
* If the realtime device advertises atomic write support, limit the size of
|
||||||
|
* data device atomic writes to the greatest power-of-two factor of the rtgroup
|
||||||
|
* size so that every atomic write unit aligns with the start of every rtgroup.
|
||||||
|
* This is required so that the per-rtgroup allocations for an atomic write
|
||||||
|
* will always be aligned compatibly with the alignment requirements of the
|
||||||
|
* storage.
|
||||||
|
*
|
||||||
|
* If the rt device doesn't advertise atomic writes, then there are no
|
||||||
|
* alignment restrictions and the largest out-of-place write we can do
|
||||||
|
* ourselves is the number of blocks that user files can allocate from any
|
||||||
|
* rtgroup.
|
||||||
|
*/
|
||||||
|
static inline xfs_extlen_t xfs_calc_rtgroup_awu_max(struct xfs_mount *mp)
|
||||||
|
{
|
||||||
|
struct xfs_groups *rgs = &mp->m_groups[XG_TYPE_RTG];
|
||||||
|
|
||||||
|
if (rgs->blocks == 0)
|
||||||
|
return 0;
|
||||||
|
if (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_bdev_awu_min > 0)
|
||||||
|
return max_pow_of_two_factor(rgs->blocks);
|
||||||
|
return rounddown_pow_of_two(rgs->blocks);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Compute the maximum atomic write unit size for each section. */
|
||||||
|
static inline void
|
||||||
|
xfs_calc_atomic_write_unit_max(
|
||||||
|
struct xfs_mount *mp)
|
||||||
|
{
|
||||||
|
struct xfs_groups *ags = &mp->m_groups[XG_TYPE_AG];
|
||||||
|
struct xfs_groups *rgs = &mp->m_groups[XG_TYPE_RTG];
|
||||||
|
|
||||||
|
const xfs_extlen_t max_write = xfs_calc_atomic_write_max(mp);
|
||||||
|
const xfs_extlen_t max_ioend = xfs_reflink_max_atomic_cow(mp);
|
||||||
|
const xfs_extlen_t max_agsize = xfs_calc_perag_awu_max(mp);
|
||||||
|
const xfs_extlen_t max_rgsize = xfs_calc_rtgroup_awu_max(mp);
|
||||||
|
|
||||||
|
ags->awu_max = min3(max_write, max_ioend, max_agsize);
|
||||||
|
rgs->awu_max = min3(max_write, max_ioend, max_rgsize);
|
||||||
|
|
||||||
|
trace_xfs_calc_atomic_write_unit_max(mp, max_write, max_ioend,
|
||||||
|
max_agsize, max_rgsize);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Try to set the atomic write maximum to a new value that we got from
|
||||||
|
* userspace via mount option.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
xfs_set_max_atomic_write_opt(
|
||||||
|
struct xfs_mount *mp,
|
||||||
|
unsigned long long new_max_bytes)
|
||||||
|
{
|
||||||
|
const xfs_filblks_t new_max_fsbs = XFS_B_TO_FSBT(mp, new_max_bytes);
|
||||||
|
const xfs_extlen_t max_write = xfs_calc_atomic_write_max(mp);
|
||||||
|
const xfs_extlen_t max_group =
|
||||||
|
max(mp->m_groups[XG_TYPE_AG].blocks,
|
||||||
|
mp->m_groups[XG_TYPE_RTG].blocks);
|
||||||
|
const xfs_extlen_t max_group_write =
|
||||||
|
max(xfs_calc_perag_awu_max(mp), xfs_calc_rtgroup_awu_max(mp));
|
||||||
|
int error;
|
||||||
|
|
||||||
|
if (new_max_bytes == 0)
|
||||||
|
goto set_limit;
|
||||||
|
|
||||||
|
ASSERT(max_write <= U32_MAX);
|
||||||
|
|
||||||
|
/* generic_atomic_write_valid enforces power of two length */
|
||||||
|
if (!is_power_of_2(new_max_bytes)) {
|
||||||
|
xfs_warn(mp,
|
||||||
|
"max atomic write size of %llu bytes is not a power of 2",
|
||||||
|
new_max_bytes);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (new_max_bytes & mp->m_blockmask) {
|
||||||
|
xfs_warn(mp,
|
||||||
|
"max atomic write size of %llu bytes not aligned with fsblock",
|
||||||
|
new_max_bytes);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (new_max_fsbs > max_write) {
|
||||||
|
xfs_warn(mp,
|
||||||
|
"max atomic write size of %lluk cannot be larger than max write size %lluk",
|
||||||
|
new_max_bytes >> 10,
|
||||||
|
XFS_FSB_TO_B(mp, max_write) >> 10);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (new_max_fsbs > max_group) {
|
||||||
|
xfs_warn(mp,
|
||||||
|
"max atomic write size of %lluk cannot be larger than allocation group size %lluk",
|
||||||
|
new_max_bytes >> 10,
|
||||||
|
XFS_FSB_TO_B(mp, max_group) >> 10);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (new_max_fsbs > max_group_write) {
|
||||||
|
xfs_warn(mp,
|
||||||
|
"max atomic write size of %lluk cannot be larger than max allocation group write size %lluk",
|
||||||
|
new_max_bytes >> 10,
|
||||||
|
XFS_FSB_TO_B(mp, max_group_write) >> 10);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
set_limit:
|
||||||
|
error = xfs_calc_atomic_write_reservation(mp, new_max_fsbs);
|
||||||
|
if (error) {
|
||||||
|
xfs_warn(mp,
|
||||||
|
"cannot support completing atomic writes of %lluk",
|
||||||
|
new_max_bytes >> 10);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
xfs_calc_atomic_write_unit_max(mp);
|
||||||
|
mp->m_awu_max_bytes = new_max_bytes;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* Compute maximum possible height for realtime btree types for this fs. */
|
/* Compute maximum possible height for realtime btree types for this fs. */
|
||||||
static inline void
|
static inline void
|
||||||
xfs_rtbtree_compute_maxlevels(
|
xfs_rtbtree_compute_maxlevels(
|
||||||
@ -1082,6 +1234,15 @@ xfs_mountfs(
|
|||||||
xfs_zone_gc_start(mp);
|
xfs_zone_gc_start(mp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Pre-calculate atomic write unit max. This involves computations
|
||||||
|
* derived from transaction reservations, so we must do this after the
|
||||||
|
* log is fully initialized.
|
||||||
|
*/
|
||||||
|
error = xfs_set_max_atomic_write_opt(mp, mp->m_awu_max_bytes);
|
||||||
|
if (error)
|
||||||
|
goto out_agresv;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
out_agresv:
|
out_agresv:
|
||||||
|
@ -119,6 +119,12 @@ struct xfs_groups {
|
|||||||
* SMR hard drives.
|
* SMR hard drives.
|
||||||
*/
|
*/
|
||||||
xfs_fsblock_t start_fsb;
|
xfs_fsblock_t start_fsb;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Maximum length of an atomic write for files stored in this
|
||||||
|
* collection of allocation groups, in fsblocks.
|
||||||
|
*/
|
||||||
|
xfs_extlen_t awu_max;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct xfs_freecounter {
|
struct xfs_freecounter {
|
||||||
@ -230,6 +236,10 @@ typedef struct xfs_mount {
|
|||||||
bool m_update_sb; /* sb needs update in mount */
|
bool m_update_sb; /* sb needs update in mount */
|
||||||
unsigned int m_max_open_zones;
|
unsigned int m_max_open_zones;
|
||||||
unsigned int m_zonegc_low_space;
|
unsigned int m_zonegc_low_space;
|
||||||
|
struct xfs_mru_cache *m_zone_cache; /* Inode to open zone cache */
|
||||||
|
|
||||||
|
/* max_atomic_write mount option value */
|
||||||
|
unsigned long long m_awu_max_bytes;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Bitsets of per-fs metadata that have been checked and/or are sick.
|
* Bitsets of per-fs metadata that have been checked and/or are sick.
|
||||||
@ -464,6 +474,11 @@ static inline bool xfs_has_nonzoned(const struct xfs_mount *mp)
|
|||||||
return !xfs_has_zoned(mp);
|
return !xfs_has_zoned(mp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool xfs_can_sw_atomic_write(struct xfs_mount *mp)
|
||||||
|
{
|
||||||
|
return xfs_has_reflink(mp);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Some features are always on for v5 file systems, allow the compiler to
|
* Some features are always on for v5 file systems, allow the compiler to
|
||||||
* eliminiate dead code when building without v4 support.
|
* eliminiate dead code when building without v4 support.
|
||||||
@ -543,10 +558,6 @@ __XFS_HAS_FEAT(nouuid, NOUUID)
|
|||||||
*/
|
*/
|
||||||
#define XFS_OPSTATE_BLOCKGC_ENABLED 6
|
#define XFS_OPSTATE_BLOCKGC_ENABLED 6
|
||||||
|
|
||||||
/* Kernel has logged a warning about pNFS being used on this fs. */
|
|
||||||
#define XFS_OPSTATE_WARNED_PNFS 7
|
|
||||||
/* Kernel has logged a warning about online fsck being used on this fs. */
|
|
||||||
#define XFS_OPSTATE_WARNED_SCRUB 8
|
|
||||||
/* Kernel has logged a warning about shrink being used on this fs. */
|
/* Kernel has logged a warning about shrink being used on this fs. */
|
||||||
#define XFS_OPSTATE_WARNED_SHRINK 9
|
#define XFS_OPSTATE_WARNED_SHRINK 9
|
||||||
/* Kernel has logged a warning about logged xattr updates being used. */
|
/* Kernel has logged a warning about logged xattr updates being used. */
|
||||||
@ -559,10 +570,6 @@ __XFS_HAS_FEAT(nouuid, NOUUID)
|
|||||||
#define XFS_OPSTATE_USE_LARP 13
|
#define XFS_OPSTATE_USE_LARP 13
|
||||||
/* Kernel has logged a warning about blocksize > pagesize on this fs. */
|
/* Kernel has logged a warning about blocksize > pagesize on this fs. */
|
||||||
#define XFS_OPSTATE_WARNED_LBS 14
|
#define XFS_OPSTATE_WARNED_LBS 14
|
||||||
/* Kernel has logged a warning about exchange-range being used on this fs. */
|
|
||||||
#define XFS_OPSTATE_WARNED_EXCHRANGE 15
|
|
||||||
/* Kernel has logged a warning about parent pointers being used on this fs. */
|
|
||||||
#define XFS_OPSTATE_WARNED_PPTR 16
|
|
||||||
/* Kernel has logged a warning about metadata dirs being used on this fs. */
|
/* Kernel has logged a warning about metadata dirs being used on this fs. */
|
||||||
#define XFS_OPSTATE_WARNED_METADIR 17
|
#define XFS_OPSTATE_WARNED_METADIR 17
|
||||||
/* Filesystem should use qflags to determine quotaon status */
|
/* Filesystem should use qflags to determine quotaon status */
|
||||||
@ -631,7 +638,6 @@ xfs_should_warn(struct xfs_mount *mp, long nr)
|
|||||||
{ (1UL << XFS_OPSTATE_READONLY), "read_only" }, \
|
{ (1UL << XFS_OPSTATE_READONLY), "read_only" }, \
|
||||||
{ (1UL << XFS_OPSTATE_INODEGC_ENABLED), "inodegc" }, \
|
{ (1UL << XFS_OPSTATE_INODEGC_ENABLED), "inodegc" }, \
|
||||||
{ (1UL << XFS_OPSTATE_BLOCKGC_ENABLED), "blockgc" }, \
|
{ (1UL << XFS_OPSTATE_BLOCKGC_ENABLED), "blockgc" }, \
|
||||||
{ (1UL << XFS_OPSTATE_WARNED_SCRUB), "wscrub" }, \
|
|
||||||
{ (1UL << XFS_OPSTATE_WARNED_SHRINK), "wshrink" }, \
|
{ (1UL << XFS_OPSTATE_WARNED_SHRINK), "wshrink" }, \
|
||||||
{ (1UL << XFS_OPSTATE_WARNED_LARP), "wlarp" }, \
|
{ (1UL << XFS_OPSTATE_WARNED_LARP), "wlarp" }, \
|
||||||
{ (1UL << XFS_OPSTATE_QUOTACHECK_RUNNING), "quotacheck" }, \
|
{ (1UL << XFS_OPSTATE_QUOTACHECK_RUNNING), "quotacheck" }, \
|
||||||
@ -793,4 +799,7 @@ static inline void xfs_mod_sb_delalloc(struct xfs_mount *mp, int64_t delta)
|
|||||||
percpu_counter_add(&mp->m_delalloc_blks, delta);
|
percpu_counter_add(&mp->m_delalloc_blks, delta);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int xfs_set_max_atomic_write_opt(struct xfs_mount *mp,
|
||||||
|
unsigned long long new_max_bytes);
|
||||||
|
|
||||||
#endif /* __XFS_MOUNT_H__ */
|
#endif /* __XFS_MOUNT_H__ */
|
||||||
|
@ -414,6 +414,8 @@ xfs_mru_cache_destroy(
|
|||||||
* To insert an element, call xfs_mru_cache_insert() with the data store, the
|
* To insert an element, call xfs_mru_cache_insert() with the data store, the
|
||||||
* element's key and the client data pointer. This function returns 0 on
|
* element's key and the client data pointer. This function returns 0 on
|
||||||
* success or ENOMEM if memory for the data element couldn't be allocated.
|
* success or ENOMEM if memory for the data element couldn't be allocated.
|
||||||
|
*
|
||||||
|
* The passed in elem is freed through the per-cache free_func on failure.
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
xfs_mru_cache_insert(
|
xfs_mru_cache_insert(
|
||||||
@ -421,14 +423,15 @@ xfs_mru_cache_insert(
|
|||||||
unsigned long key,
|
unsigned long key,
|
||||||
struct xfs_mru_cache_elem *elem)
|
struct xfs_mru_cache_elem *elem)
|
||||||
{
|
{
|
||||||
int error;
|
int error = -EINVAL;
|
||||||
|
|
||||||
ASSERT(mru && mru->lists);
|
ASSERT(mru && mru->lists);
|
||||||
if (!mru || !mru->lists)
|
if (!mru || !mru->lists)
|
||||||
return -EINVAL;
|
goto out_free;
|
||||||
|
|
||||||
|
error = -ENOMEM;
|
||||||
if (radix_tree_preload(GFP_KERNEL))
|
if (radix_tree_preload(GFP_KERNEL))
|
||||||
return -ENOMEM;
|
goto out_free;
|
||||||
|
|
||||||
INIT_LIST_HEAD(&elem->list_node);
|
INIT_LIST_HEAD(&elem->list_node);
|
||||||
elem->key = key;
|
elem->key = key;
|
||||||
@ -440,6 +443,12 @@ xfs_mru_cache_insert(
|
|||||||
_xfs_mru_cache_list_insert(mru, elem);
|
_xfs_mru_cache_list_insert(mru, elem);
|
||||||
spin_unlock(&mru->lock);
|
spin_unlock(&mru->lock);
|
||||||
|
|
||||||
|
if (error)
|
||||||
|
goto out_free;
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
out_free:
|
||||||
|
mru->free_func(mru->data, elem);
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -58,8 +58,6 @@ xfs_fs_get_uuid(
|
|||||||
{
|
{
|
||||||
struct xfs_mount *mp = XFS_M(sb);
|
struct xfs_mount *mp = XFS_M(sb);
|
||||||
|
|
||||||
xfs_warn_experimental(mp, XFS_EXPERIMENTAL_PNFS);
|
|
||||||
|
|
||||||
if (*len < sizeof(uuid_t))
|
if (*len < sizeof(uuid_t))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
|
@ -78,6 +78,11 @@ xfs_cui_item_size(
|
|||||||
*nbytes += xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents);
|
*nbytes += xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned int xfs_cui_log_space(unsigned int nr)
|
||||||
|
{
|
||||||
|
return xlog_item_space(1, xfs_cui_log_format_sizeof(nr));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is called to fill in the vector of log iovecs for the
|
* This is called to fill in the vector of log iovecs for the
|
||||||
* given cui log item. We use only 1 iovec, and we point that
|
* given cui log item. We use only 1 iovec, and we point that
|
||||||
@ -179,6 +184,11 @@ xfs_cud_item_size(
|
|||||||
*nbytes += sizeof(struct xfs_cud_log_format);
|
*nbytes += sizeof(struct xfs_cud_log_format);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned int xfs_cud_log_space(void)
|
||||||
|
{
|
||||||
|
return xlog_item_space(1, sizeof(struct xfs_cud_log_format));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is called to fill in the vector of log iovecs for the
|
* This is called to fill in the vector of log iovecs for the
|
||||||
* given cud log item. We use only 1 iovec, and we point that
|
* given cud log item. We use only 1 iovec, and we point that
|
||||||
|
@ -76,4 +76,7 @@ struct xfs_refcount_intent;
|
|||||||
void xfs_refcount_defer_add(struct xfs_trans *tp,
|
void xfs_refcount_defer_add(struct xfs_trans *tp,
|
||||||
struct xfs_refcount_intent *ri);
|
struct xfs_refcount_intent *ri);
|
||||||
|
|
||||||
|
unsigned int xfs_cui_log_space(unsigned int nr);
|
||||||
|
unsigned int xfs_cud_log_space(void);
|
||||||
|
|
||||||
#endif /* __XFS_REFCOUNT_ITEM_H__ */
|
#endif /* __XFS_REFCOUNT_ITEM_H__ */
|
||||||
|
@ -293,7 +293,7 @@ xfs_bmap_trim_cow(
|
|||||||
return xfs_reflink_trim_around_shared(ip, imap, shared);
|
return xfs_reflink_trim_around_shared(ip, imap, shared);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
int
|
||||||
xfs_reflink_convert_cow_locked(
|
xfs_reflink_convert_cow_locked(
|
||||||
struct xfs_inode *ip,
|
struct xfs_inode *ip,
|
||||||
xfs_fileoff_t offset_fsb,
|
xfs_fileoff_t offset_fsb,
|
||||||
@ -786,35 +786,19 @@ xfs_reflink_update_quota(
|
|||||||
* requirements as low as possible.
|
* requirements as low as possible.
|
||||||
*/
|
*/
|
||||||
STATIC int
|
STATIC int
|
||||||
xfs_reflink_end_cow_extent(
|
xfs_reflink_end_cow_extent_locked(
|
||||||
|
struct xfs_trans *tp,
|
||||||
struct xfs_inode *ip,
|
struct xfs_inode *ip,
|
||||||
xfs_fileoff_t *offset_fsb,
|
xfs_fileoff_t *offset_fsb,
|
||||||
xfs_fileoff_t end_fsb)
|
xfs_fileoff_t end_fsb)
|
||||||
{
|
{
|
||||||
struct xfs_iext_cursor icur;
|
struct xfs_iext_cursor icur;
|
||||||
struct xfs_bmbt_irec got, del, data;
|
struct xfs_bmbt_irec got, del, data;
|
||||||
struct xfs_mount *mp = ip->i_mount;
|
|
||||||
struct xfs_trans *tp;
|
|
||||||
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_COW_FORK);
|
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_COW_FORK);
|
||||||
unsigned int resblks;
|
|
||||||
int nmaps;
|
int nmaps;
|
||||||
bool isrt = XFS_IS_REALTIME_INODE(ip);
|
bool isrt = XFS_IS_REALTIME_INODE(ip);
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
|
|
||||||
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
|
|
||||||
XFS_TRANS_RESERVE, &tp);
|
|
||||||
if (error)
|
|
||||||
return error;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Lock the inode. We have to ijoin without automatic unlock because
|
|
||||||
* the lead transaction is the refcountbt record deletion; the data
|
|
||||||
* fork update follows as a deferred log item.
|
|
||||||
*/
|
|
||||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
|
||||||
xfs_trans_ijoin(tp, ip, 0);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In case of racing, overlapping AIO writes no COW extents might be
|
* In case of racing, overlapping AIO writes no COW extents might be
|
||||||
* left by the time I/O completes for the loser of the race. In that
|
* left by the time I/O completes for the loser of the race. In that
|
||||||
@ -823,7 +807,7 @@ xfs_reflink_end_cow_extent(
|
|||||||
if (!xfs_iext_lookup_extent(ip, ifp, *offset_fsb, &icur, &got) ||
|
if (!xfs_iext_lookup_extent(ip, ifp, *offset_fsb, &icur, &got) ||
|
||||||
got.br_startoff >= end_fsb) {
|
got.br_startoff >= end_fsb) {
|
||||||
*offset_fsb = end_fsb;
|
*offset_fsb = end_fsb;
|
||||||
goto out_cancel;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -837,7 +821,7 @@ xfs_reflink_end_cow_extent(
|
|||||||
if (!xfs_iext_next_extent(ifp, &icur, &got) ||
|
if (!xfs_iext_next_extent(ifp, &icur, &got) ||
|
||||||
got.br_startoff >= end_fsb) {
|
got.br_startoff >= end_fsb) {
|
||||||
*offset_fsb = end_fsb;
|
*offset_fsb = end_fsb;
|
||||||
goto out_cancel;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
del = got;
|
del = got;
|
||||||
@ -846,14 +830,14 @@ xfs_reflink_end_cow_extent(
|
|||||||
error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK,
|
error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK,
|
||||||
XFS_IEXT_REFLINK_END_COW_CNT);
|
XFS_IEXT_REFLINK_END_COW_CNT);
|
||||||
if (error)
|
if (error)
|
||||||
goto out_cancel;
|
return error;
|
||||||
|
|
||||||
/* Grab the corresponding mapping in the data fork. */
|
/* Grab the corresponding mapping in the data fork. */
|
||||||
nmaps = 1;
|
nmaps = 1;
|
||||||
error = xfs_bmapi_read(ip, del.br_startoff, del.br_blockcount, &data,
|
error = xfs_bmapi_read(ip, del.br_startoff, del.br_blockcount, &data,
|
||||||
&nmaps, 0);
|
&nmaps, 0);
|
||||||
if (error)
|
if (error)
|
||||||
goto out_cancel;
|
return error;
|
||||||
|
|
||||||
/* We can only remap the smaller of the two extent sizes. */
|
/* We can only remap the smaller of the two extent sizes. */
|
||||||
data.br_blockcount = min(data.br_blockcount, del.br_blockcount);
|
data.br_blockcount = min(data.br_blockcount, del.br_blockcount);
|
||||||
@ -882,7 +866,7 @@ xfs_reflink_end_cow_extent(
|
|||||||
error = xfs_bunmapi(NULL, ip, data.br_startoff,
|
error = xfs_bunmapi(NULL, ip, data.br_startoff,
|
||||||
data.br_blockcount, 0, 1, &done);
|
data.br_blockcount, 0, 1, &done);
|
||||||
if (error)
|
if (error)
|
||||||
goto out_cancel;
|
return error;
|
||||||
ASSERT(done);
|
ASSERT(done);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -899,17 +883,45 @@ xfs_reflink_end_cow_extent(
|
|||||||
/* Remove the mapping from the CoW fork. */
|
/* Remove the mapping from the CoW fork. */
|
||||||
xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
|
xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
|
||||||
|
|
||||||
error = xfs_trans_commit(tp);
|
|
||||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
|
||||||
if (error)
|
|
||||||
return error;
|
|
||||||
|
|
||||||
/* Update the caller about how much progress we made. */
|
/* Update the caller about how much progress we made. */
|
||||||
*offset_fsb = del.br_startoff + del.br_blockcount;
|
*offset_fsb = del.br_startoff + del.br_blockcount;
|
||||||
return 0;
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
out_cancel:
|
/*
|
||||||
|
* Remap part of the CoW fork into the data fork.
|
||||||
|
*
|
||||||
|
* We aim to remap the range starting at @offset_fsb and ending at @end_fsb
|
||||||
|
* into the data fork; this function will remap what it can (at the end of the
|
||||||
|
* range) and update @end_fsb appropriately. Each remap gets its own
|
||||||
|
* transaction because we can end up merging and splitting bmbt blocks for
|
||||||
|
* every remap operation and we'd like to keep the block reservation
|
||||||
|
* requirements as low as possible.
|
||||||
|
*/
|
||||||
|
STATIC int
|
||||||
|
xfs_reflink_end_cow_extent(
|
||||||
|
struct xfs_inode *ip,
|
||||||
|
xfs_fileoff_t *offset_fsb,
|
||||||
|
xfs_fileoff_t end_fsb)
|
||||||
|
{
|
||||||
|
struct xfs_mount *mp = ip->i_mount;
|
||||||
|
struct xfs_trans *tp;
|
||||||
|
unsigned int resblks;
|
||||||
|
int error;
|
||||||
|
|
||||||
|
resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
|
||||||
|
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
|
||||||
|
XFS_TRANS_RESERVE, &tp);
|
||||||
|
if (error)
|
||||||
|
return error;
|
||||||
|
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||||
|
xfs_trans_ijoin(tp, ip, 0);
|
||||||
|
|
||||||
|
error = xfs_reflink_end_cow_extent_locked(tp, ip, offset_fsb, end_fsb);
|
||||||
|
if (error)
|
||||||
xfs_trans_cancel(tp);
|
xfs_trans_cancel(tp);
|
||||||
|
else
|
||||||
|
error = xfs_trans_commit(tp);
|
||||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
@ -972,6 +984,78 @@ xfs_reflink_end_cow(
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Fully remap all of the file's data fork at once, which is the critical part
|
||||||
|
* in achieving atomic behaviour.
|
||||||
|
* The regular CoW end path does not use function as to keep the block
|
||||||
|
* reservation per transaction as low as possible.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
xfs_reflink_end_atomic_cow(
|
||||||
|
struct xfs_inode *ip,
|
||||||
|
xfs_off_t offset,
|
||||||
|
xfs_off_t count)
|
||||||
|
{
|
||||||
|
xfs_fileoff_t offset_fsb;
|
||||||
|
xfs_fileoff_t end_fsb;
|
||||||
|
int error = 0;
|
||||||
|
struct xfs_mount *mp = ip->i_mount;
|
||||||
|
struct xfs_trans *tp;
|
||||||
|
unsigned int resblks;
|
||||||
|
|
||||||
|
trace_xfs_reflink_end_cow(ip, offset, count);
|
||||||
|
|
||||||
|
offset_fsb = XFS_B_TO_FSBT(mp, offset);
|
||||||
|
end_fsb = XFS_B_TO_FSB(mp, offset + count);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Each remapping operation could cause a btree split, so in the worst
|
||||||
|
* case that's one for each block.
|
||||||
|
*/
|
||||||
|
resblks = (end_fsb - offset_fsb) *
|
||||||
|
XFS_NEXTENTADD_SPACE_RES(mp, 1, XFS_DATA_FORK);
|
||||||
|
|
||||||
|
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_atomic_ioend, resblks, 0,
|
||||||
|
XFS_TRANS_RESERVE, &tp);
|
||||||
|
if (error)
|
||||||
|
return error;
|
||||||
|
|
||||||
|
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||||
|
xfs_trans_ijoin(tp, ip, 0);
|
||||||
|
|
||||||
|
while (end_fsb > offset_fsb && !error) {
|
||||||
|
error = xfs_reflink_end_cow_extent_locked(tp, ip, &offset_fsb,
|
||||||
|
end_fsb);
|
||||||
|
}
|
||||||
|
if (error) {
|
||||||
|
trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_);
|
||||||
|
goto out_cancel;
|
||||||
|
}
|
||||||
|
error = xfs_trans_commit(tp);
|
||||||
|
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||||
|
return error;
|
||||||
|
out_cancel:
|
||||||
|
xfs_trans_cancel(tp);
|
||||||
|
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Compute the largest atomic write that we can complete through software. */
|
||||||
|
xfs_extlen_t
|
||||||
|
xfs_reflink_max_atomic_cow(
|
||||||
|
struct xfs_mount *mp)
|
||||||
|
{
|
||||||
|
/* We cannot do any atomic writes without out of place writes. */
|
||||||
|
if (!xfs_can_sw_atomic_write(mp))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Atomic write limits must always be a power-of-2, according to
|
||||||
|
* generic_atomic_write_valid.
|
||||||
|
*/
|
||||||
|
return rounddown_pow_of_two(xfs_calc_max_atomic_write_fsblocks(mp));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Free all CoW staging blocks that are still referenced by the ondisk refcount
|
* Free all CoW staging blocks that are still referenced by the ondisk refcount
|
||||||
* metadata. The ondisk metadata does not track which inode created the
|
* metadata. The ondisk metadata does not track which inode created the
|
||||||
|
@ -35,6 +35,8 @@ int xfs_reflink_allocate_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap,
|
|||||||
bool convert_now);
|
bool convert_now);
|
||||||
extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset,
|
extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset,
|
||||||
xfs_off_t count);
|
xfs_off_t count);
|
||||||
|
int xfs_reflink_convert_cow_locked(struct xfs_inode *ip,
|
||||||
|
xfs_fileoff_t offset_fsb, xfs_filblks_t count_fsb);
|
||||||
|
|
||||||
extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip,
|
extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip,
|
||||||
struct xfs_trans **tpp, xfs_fileoff_t offset_fsb,
|
struct xfs_trans **tpp, xfs_fileoff_t offset_fsb,
|
||||||
@ -43,6 +45,8 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
|
|||||||
xfs_off_t count, bool cancel_real);
|
xfs_off_t count, bool cancel_real);
|
||||||
extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
|
extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
|
||||||
xfs_off_t count);
|
xfs_off_t count);
|
||||||
|
int xfs_reflink_end_atomic_cow(struct xfs_inode *ip, xfs_off_t offset,
|
||||||
|
xfs_off_t count);
|
||||||
extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
|
extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
|
||||||
extern loff_t xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
|
extern loff_t xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
|
||||||
struct file *file_out, loff_t pos_out, loff_t len,
|
struct file *file_out, loff_t pos_out, loff_t len,
|
||||||
@ -64,4 +68,6 @@ extern int xfs_reflink_update_dest(struct xfs_inode *dest, xfs_off_t newlen,
|
|||||||
|
|
||||||
bool xfs_reflink_supports_rextsize(struct xfs_mount *mp, unsigned int rextsize);
|
bool xfs_reflink_supports_rextsize(struct xfs_mount *mp, unsigned int rextsize);
|
||||||
|
|
||||||
|
xfs_extlen_t xfs_reflink_max_atomic_cow(struct xfs_mount *mp);
|
||||||
|
|
||||||
#endif /* __XFS_REFLINK_H */
|
#endif /* __XFS_REFLINK_H */
|
||||||
|
@ -77,6 +77,11 @@ xfs_rui_item_size(
|
|||||||
*nbytes += xfs_rui_log_format_sizeof(ruip->rui_format.rui_nextents);
|
*nbytes += xfs_rui_log_format_sizeof(ruip->rui_format.rui_nextents);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned int xfs_rui_log_space(unsigned int nr)
|
||||||
|
{
|
||||||
|
return xlog_item_space(1, xfs_rui_log_format_sizeof(nr));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is called to fill in the vector of log iovecs for the
|
* This is called to fill in the vector of log iovecs for the
|
||||||
* given rui log item. We use only 1 iovec, and we point that
|
* given rui log item. We use only 1 iovec, and we point that
|
||||||
@ -180,6 +185,11 @@ xfs_rud_item_size(
|
|||||||
*nbytes += sizeof(struct xfs_rud_log_format);
|
*nbytes += sizeof(struct xfs_rud_log_format);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned int xfs_rud_log_space(void)
|
||||||
|
{
|
||||||
|
return xlog_item_space(1, sizeof(struct xfs_rud_log_format));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is called to fill in the vector of log iovecs for the
|
* This is called to fill in the vector of log iovecs for the
|
||||||
* given rud log item. We use only 1 iovec, and we point that
|
* given rud log item. We use only 1 iovec, and we point that
|
||||||
|
@ -75,4 +75,7 @@ struct xfs_rmap_intent;
|
|||||||
|
|
||||||
void xfs_rmap_defer_add(struct xfs_trans *tp, struct xfs_rmap_intent *ri);
|
void xfs_rmap_defer_add(struct xfs_trans *tp, struct xfs_rmap_intent *ri);
|
||||||
|
|
||||||
|
unsigned int xfs_rui_log_space(unsigned int nr);
|
||||||
|
unsigned int xfs_rud_log_space(void);
|
||||||
|
|
||||||
#endif /* __XFS_RMAP_ITEM_H__ */
|
#endif /* __XFS_RMAP_ITEM_H__ */
|
||||||
|
@ -111,7 +111,7 @@ enum {
|
|||||||
Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
|
Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
|
||||||
Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
|
Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
|
||||||
Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum, Opt_max_open_zones,
|
Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum, Opt_max_open_zones,
|
||||||
Opt_lifetime, Opt_nolifetime,
|
Opt_lifetime, Opt_nolifetime, Opt_max_atomic_write,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct fs_parameter_spec xfs_fs_parameters[] = {
|
static const struct fs_parameter_spec xfs_fs_parameters[] = {
|
||||||
@ -159,6 +159,7 @@ static const struct fs_parameter_spec xfs_fs_parameters[] = {
|
|||||||
fsparam_u32("max_open_zones", Opt_max_open_zones),
|
fsparam_u32("max_open_zones", Opt_max_open_zones),
|
||||||
fsparam_flag("lifetime", Opt_lifetime),
|
fsparam_flag("lifetime", Opt_lifetime),
|
||||||
fsparam_flag("nolifetime", Opt_nolifetime),
|
fsparam_flag("nolifetime", Opt_nolifetime),
|
||||||
|
fsparam_string("max_atomic_write", Opt_max_atomic_write),
|
||||||
{}
|
{}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -241,6 +242,9 @@ xfs_fs_show_options(
|
|||||||
|
|
||||||
if (mp->m_max_open_zones)
|
if (mp->m_max_open_zones)
|
||||||
seq_printf(m, ",max_open_zones=%u", mp->m_max_open_zones);
|
seq_printf(m, ",max_open_zones=%u", mp->m_max_open_zones);
|
||||||
|
if (mp->m_awu_max_bytes)
|
||||||
|
seq_printf(m, ",max_atomic_write=%lluk",
|
||||||
|
mp->m_awu_max_bytes >> 10);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -380,9 +384,10 @@ xfs_blkdev_get(
|
|||||||
struct file **bdev_filep)
|
struct file **bdev_filep)
|
||||||
{
|
{
|
||||||
int error = 0;
|
int error = 0;
|
||||||
|
blk_mode_t mode;
|
||||||
|
|
||||||
*bdev_filep = bdev_file_open_by_path(name,
|
mode = sb_open_mode(mp->m_super->s_flags);
|
||||||
BLK_OPEN_READ | BLK_OPEN_WRITE | BLK_OPEN_RESTRICT_WRITES,
|
*bdev_filep = bdev_file_open_by_path(name, mode,
|
||||||
mp->m_super, &fs_holder_ops);
|
mp->m_super, &fs_holder_ops);
|
||||||
if (IS_ERR(*bdev_filep)) {
|
if (IS_ERR(*bdev_filep)) {
|
||||||
error = PTR_ERR(*bdev_filep);
|
error = PTR_ERR(*bdev_filep);
|
||||||
@ -481,21 +486,29 @@ xfs_open_devices(
|
|||||||
/*
|
/*
|
||||||
* Setup xfs_mount buffer target pointers
|
* Setup xfs_mount buffer target pointers
|
||||||
*/
|
*/
|
||||||
error = -ENOMEM;
|
|
||||||
mp->m_ddev_targp = xfs_alloc_buftarg(mp, sb->s_bdev_file);
|
mp->m_ddev_targp = xfs_alloc_buftarg(mp, sb->s_bdev_file);
|
||||||
if (!mp->m_ddev_targp)
|
if (IS_ERR(mp->m_ddev_targp)) {
|
||||||
|
error = PTR_ERR(mp->m_ddev_targp);
|
||||||
|
mp->m_ddev_targp = NULL;
|
||||||
goto out_close_rtdev;
|
goto out_close_rtdev;
|
||||||
|
}
|
||||||
|
|
||||||
if (rtdev_file) {
|
if (rtdev_file) {
|
||||||
mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev_file);
|
mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev_file);
|
||||||
if (!mp->m_rtdev_targp)
|
if (IS_ERR(mp->m_rtdev_targp)) {
|
||||||
|
error = PTR_ERR(mp->m_rtdev_targp);
|
||||||
|
mp->m_rtdev_targp = NULL;
|
||||||
goto out_free_ddev_targ;
|
goto out_free_ddev_targ;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (logdev_file && file_bdev(logdev_file) != ddev) {
|
if (logdev_file && file_bdev(logdev_file) != ddev) {
|
||||||
mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev_file);
|
mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev_file);
|
||||||
if (!mp->m_logdev_targp)
|
if (IS_ERR(mp->m_logdev_targp)) {
|
||||||
|
error = PTR_ERR(mp->m_logdev_targp);
|
||||||
|
mp->m_logdev_targp = NULL;
|
||||||
goto out_free_rtdev_targ;
|
goto out_free_rtdev_targ;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
mp->m_logdev_targp = mp->m_ddev_targp;
|
mp->m_logdev_targp = mp->m_ddev_targp;
|
||||||
/* Handle won't be used, drop it */
|
/* Handle won't be used, drop it */
|
||||||
@ -528,7 +541,7 @@ xfs_setup_devices(
|
|||||||
{
|
{
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
|
error = xfs_configure_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
|
||||||
if (error)
|
if (error)
|
||||||
return error;
|
return error;
|
||||||
|
|
||||||
@ -537,7 +550,7 @@ xfs_setup_devices(
|
|||||||
|
|
||||||
if (xfs_has_sector(mp))
|
if (xfs_has_sector(mp))
|
||||||
log_sector_size = mp->m_sb.sb_logsectsize;
|
log_sector_size = mp->m_sb.sb_logsectsize;
|
||||||
error = xfs_setsize_buftarg(mp->m_logdev_targp,
|
error = xfs_configure_buftarg(mp->m_logdev_targp,
|
||||||
log_sector_size);
|
log_sector_size);
|
||||||
if (error)
|
if (error)
|
||||||
return error;
|
return error;
|
||||||
@ -551,7 +564,7 @@ xfs_setup_devices(
|
|||||||
}
|
}
|
||||||
mp->m_rtdev_targp = mp->m_ddev_targp;
|
mp->m_rtdev_targp = mp->m_ddev_targp;
|
||||||
} else if (mp->m_rtname) {
|
} else if (mp->m_rtname) {
|
||||||
error = xfs_setsize_buftarg(mp->m_rtdev_targp,
|
error = xfs_configure_buftarg(mp->m_rtdev_targp,
|
||||||
mp->m_sb.sb_sectsize);
|
mp->m_sb.sb_sectsize);
|
||||||
if (error)
|
if (error)
|
||||||
return error;
|
return error;
|
||||||
@ -1334,6 +1347,42 @@ suffix_kstrtoint(
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
suffix_kstrtoull(
|
||||||
|
const char *s,
|
||||||
|
unsigned int base,
|
||||||
|
unsigned long long *res)
|
||||||
|
{
|
||||||
|
int last, shift_left_factor = 0;
|
||||||
|
unsigned long long _res;
|
||||||
|
char *value;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
value = kstrdup(s, GFP_KERNEL);
|
||||||
|
if (!value)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
last = strlen(value) - 1;
|
||||||
|
if (value[last] == 'K' || value[last] == 'k') {
|
||||||
|
shift_left_factor = 10;
|
||||||
|
value[last] = '\0';
|
||||||
|
}
|
||||||
|
if (value[last] == 'M' || value[last] == 'm') {
|
||||||
|
shift_left_factor = 20;
|
||||||
|
value[last] = '\0';
|
||||||
|
}
|
||||||
|
if (value[last] == 'G' || value[last] == 'g') {
|
||||||
|
shift_left_factor = 30;
|
||||||
|
value[last] = '\0';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (kstrtoull(value, base, &_res))
|
||||||
|
ret = -EINVAL;
|
||||||
|
kfree(value);
|
||||||
|
*res = _res << shift_left_factor;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
xfs_fs_warn_deprecated(
|
xfs_fs_warn_deprecated(
|
||||||
struct fs_context *fc,
|
struct fs_context *fc,
|
||||||
@ -1518,6 +1567,14 @@ xfs_fs_parse_param(
|
|||||||
case Opt_nolifetime:
|
case Opt_nolifetime:
|
||||||
parsing_mp->m_features |= XFS_FEAT_NOLIFETIME;
|
parsing_mp->m_features |= XFS_FEAT_NOLIFETIME;
|
||||||
return 0;
|
return 0;
|
||||||
|
case Opt_max_atomic_write:
|
||||||
|
if (suffix_kstrtoull(param->string, 10,
|
||||||
|
&parsing_mp->m_awu_max_bytes)) {
|
||||||
|
xfs_warn(parsing_mp,
|
||||||
|
"max atomic write size must be positive integer");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
default:
|
default:
|
||||||
xfs_warn(parsing_mp, "unknown mount option [%s].", param->key);
|
xfs_warn(parsing_mp, "unknown mount option [%s].", param->key);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
@ -1897,13 +1954,6 @@ xfs_fs_fill_super(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (xfs_has_exchange_range(mp))
|
|
||||||
xfs_warn_experimental(mp, XFS_EXPERIMENTAL_EXCHRANGE);
|
|
||||||
|
|
||||||
if (xfs_has_parent(mp))
|
|
||||||
xfs_warn_experimental(mp, XFS_EXPERIMENTAL_PPTR);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If no quota mount options were provided, maybe we'll try to pick
|
* If no quota mount options were provided, maybe we'll try to pick
|
||||||
* up the quota accounting and enforcement flags from the ondisk sb.
|
* up the quota accounting and enforcement flags from the ondisk sb.
|
||||||
@ -1969,6 +2019,20 @@ xfs_remount_rw(
|
|||||||
struct xfs_sb *sbp = &mp->m_sb;
|
struct xfs_sb *sbp = &mp->m_sb;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
|
if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp &&
|
||||||
|
bdev_read_only(mp->m_logdev_targp->bt_bdev)) {
|
||||||
|
xfs_warn(mp,
|
||||||
|
"ro->rw transition prohibited by read-only logdev");
|
||||||
|
return -EACCES;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mp->m_rtdev_targp &&
|
||||||
|
bdev_read_only(mp->m_rtdev_targp->bt_bdev)) {
|
||||||
|
xfs_warn(mp,
|
||||||
|
"ro->rw transition prohibited by read-only rtdev");
|
||||||
|
return -EACCES;
|
||||||
|
}
|
||||||
|
|
||||||
if (xfs_has_norecovery(mp)) {
|
if (xfs_has_norecovery(mp)) {
|
||||||
xfs_warn(mp,
|
xfs_warn(mp,
|
||||||
"ro->rw transition prohibited on norecovery mount");
|
"ro->rw transition prohibited on norecovery mount");
|
||||||
@ -2129,6 +2193,14 @@ xfs_fs_reconfigure(
|
|||||||
mp->m_features |= XFS_FEAT_ATTR2;
|
mp->m_features |= XFS_FEAT_ATTR2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Validate new max_atomic_write option before making other changes */
|
||||||
|
if (mp->m_awu_max_bytes != new_mp->m_awu_max_bytes) {
|
||||||
|
error = xfs_set_max_atomic_write_opt(mp,
|
||||||
|
new_mp->m_awu_max_bytes);
|
||||||
|
if (error)
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
/* inode32 -> inode64 */
|
/* inode32 -> inode64 */
|
||||||
if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) {
|
if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) {
|
||||||
mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
|
mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
|
||||||
|
@ -29,8 +29,6 @@ typedef struct xfs_param {
|
|||||||
xfs_sysctl_val_t inherit_sync; /* Inherit the "sync" inode flag. */
|
xfs_sysctl_val_t inherit_sync; /* Inherit the "sync" inode flag. */
|
||||||
xfs_sysctl_val_t inherit_nodump;/* Inherit the "nodump" inode flag. */
|
xfs_sysctl_val_t inherit_nodump;/* Inherit the "nodump" inode flag. */
|
||||||
xfs_sysctl_val_t inherit_noatim;/* Inherit the "noatime" inode flag. */
|
xfs_sysctl_val_t inherit_noatim;/* Inherit the "noatime" inode flag. */
|
||||||
xfs_sysctl_val_t xfs_buf_timer; /* Interval between xfsbufd wakeups. */
|
|
||||||
xfs_sysctl_val_t xfs_buf_age; /* Metadata buffer age before flush. */
|
|
||||||
xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */
|
xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */
|
||||||
xfs_sysctl_val_t rotorstep; /* inode32 AG rotoring control knob */
|
xfs_sysctl_val_t rotorstep; /* inode32 AG rotoring control knob */
|
||||||
xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */
|
xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */
|
||||||
|
@ -170,6 +170,99 @@ DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound);
|
|||||||
DEFINE_ATTR_LIST_EVENT(xfs_attr_leaf_list);
|
DEFINE_ATTR_LIST_EVENT(xfs_attr_leaf_list);
|
||||||
DEFINE_ATTR_LIST_EVENT(xfs_attr_node_list);
|
DEFINE_ATTR_LIST_EVENT(xfs_attr_node_list);
|
||||||
|
|
||||||
|
TRACE_EVENT(xfs_calc_atomic_write_unit_max,
|
||||||
|
TP_PROTO(struct xfs_mount *mp, unsigned int max_write,
|
||||||
|
unsigned int max_ioend, unsigned int max_agsize,
|
||||||
|
unsigned int max_rgsize),
|
||||||
|
TP_ARGS(mp, max_write, max_ioend, max_agsize, max_rgsize),
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(dev_t, dev)
|
||||||
|
__field(unsigned int, max_write)
|
||||||
|
__field(unsigned int, max_ioend)
|
||||||
|
__field(unsigned int, max_agsize)
|
||||||
|
__field(unsigned int, max_rgsize)
|
||||||
|
__field(unsigned int, data_awu_max)
|
||||||
|
__field(unsigned int, rt_awu_max)
|
||||||
|
),
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->dev = mp->m_super->s_dev;
|
||||||
|
__entry->max_write = max_write;
|
||||||
|
__entry->max_ioend = max_ioend;
|
||||||
|
__entry->max_agsize = max_agsize;
|
||||||
|
__entry->max_rgsize = max_rgsize;
|
||||||
|
__entry->data_awu_max = mp->m_groups[XG_TYPE_AG].awu_max;
|
||||||
|
__entry->rt_awu_max = mp->m_groups[XG_TYPE_RTG].awu_max;
|
||||||
|
),
|
||||||
|
TP_printk("dev %d:%d max_write %u max_ioend %u max_agsize %u max_rgsize %u data_awu_max %u rt_awu_max %u",
|
||||||
|
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||||
|
__entry->max_write,
|
||||||
|
__entry->max_ioend,
|
||||||
|
__entry->max_agsize,
|
||||||
|
__entry->max_rgsize,
|
||||||
|
__entry->data_awu_max,
|
||||||
|
__entry->rt_awu_max)
|
||||||
|
);
|
||||||
|
|
||||||
|
TRACE_EVENT(xfs_calc_max_atomic_write_fsblocks,
|
||||||
|
TP_PROTO(struct xfs_mount *mp, unsigned int per_intent,
|
||||||
|
unsigned int step_size, unsigned int logres,
|
||||||
|
unsigned int blockcount),
|
||||||
|
TP_ARGS(mp, per_intent, step_size, logres, blockcount),
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(dev_t, dev)
|
||||||
|
__field(unsigned int, per_intent)
|
||||||
|
__field(unsigned int, step_size)
|
||||||
|
__field(unsigned int, logres)
|
||||||
|
__field(unsigned int, blockcount)
|
||||||
|
),
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->dev = mp->m_super->s_dev;
|
||||||
|
__entry->per_intent = per_intent;
|
||||||
|
__entry->step_size = step_size;
|
||||||
|
__entry->logres = logres;
|
||||||
|
__entry->blockcount = blockcount;
|
||||||
|
),
|
||||||
|
TP_printk("dev %d:%d per_intent %u step_size %u logres %u blockcount %u",
|
||||||
|
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||||
|
__entry->per_intent,
|
||||||
|
__entry->step_size,
|
||||||
|
__entry->logres,
|
||||||
|
__entry->blockcount)
|
||||||
|
);
|
||||||
|
|
||||||
|
TRACE_EVENT(xfs_calc_max_atomic_write_log_geometry,
|
||||||
|
TP_PROTO(struct xfs_mount *mp, unsigned int per_intent,
|
||||||
|
unsigned int step_size, unsigned int blockcount,
|
||||||
|
unsigned int min_logblocks, unsigned int logres),
|
||||||
|
TP_ARGS(mp, per_intent, step_size, blockcount, min_logblocks, logres),
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(dev_t, dev)
|
||||||
|
__field(unsigned int, per_intent)
|
||||||
|
__field(unsigned int, step_size)
|
||||||
|
__field(unsigned int, blockcount)
|
||||||
|
__field(unsigned int, min_logblocks)
|
||||||
|
__field(unsigned int, cur_logblocks)
|
||||||
|
__field(unsigned int, logres)
|
||||||
|
),
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->dev = mp->m_super->s_dev;
|
||||||
|
__entry->per_intent = per_intent;
|
||||||
|
__entry->step_size = step_size;
|
||||||
|
__entry->blockcount = blockcount;
|
||||||
|
__entry->min_logblocks = min_logblocks;
|
||||||
|
__entry->cur_logblocks = mp->m_sb.sb_logblocks;
|
||||||
|
__entry->logres = logres;
|
||||||
|
),
|
||||||
|
TP_printk("dev %d:%d per_intent %u step_size %u blockcount %u min_logblocks %u logblocks %u logres %u",
|
||||||
|
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||||
|
__entry->per_intent,
|
||||||
|
__entry->step_size,
|
||||||
|
__entry->blockcount,
|
||||||
|
__entry->min_logblocks,
|
||||||
|
__entry->cur_logblocks,
|
||||||
|
__entry->logres)
|
||||||
|
);
|
||||||
|
|
||||||
TRACE_EVENT(xlog_intent_recovery_failed,
|
TRACE_EVENT(xlog_intent_recovery_failed,
|
||||||
TP_PROTO(struct xfs_mount *mp, const struct xfs_defer_op_type *ops,
|
TP_PROTO(struct xfs_mount *mp, const struct xfs_defer_op_type *ops,
|
||||||
int error),
|
int error),
|
||||||
@ -1657,6 +1750,28 @@ DEFINE_RW_EVENT(xfs_file_direct_write);
|
|||||||
DEFINE_RW_EVENT(xfs_file_dax_write);
|
DEFINE_RW_EVENT(xfs_file_dax_write);
|
||||||
DEFINE_RW_EVENT(xfs_reflink_bounce_dio_write);
|
DEFINE_RW_EVENT(xfs_reflink_bounce_dio_write);
|
||||||
|
|
||||||
|
TRACE_EVENT(xfs_iomap_atomic_write_cow,
|
||||||
|
TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
|
||||||
|
TP_ARGS(ip, offset, count),
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(dev_t, dev)
|
||||||
|
__field(xfs_ino_t, ino)
|
||||||
|
__field(xfs_off_t, offset)
|
||||||
|
__field(ssize_t, count)
|
||||||
|
),
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->dev = VFS_I(ip)->i_sb->s_dev;
|
||||||
|
__entry->ino = ip->i_ino;
|
||||||
|
__entry->offset = offset;
|
||||||
|
__entry->count = count;
|
||||||
|
),
|
||||||
|
TP_printk("dev %d:%d ino 0x%llx pos 0x%llx bytecount 0x%zx",
|
||||||
|
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||||
|
__entry->ino,
|
||||||
|
__entry->offset,
|
||||||
|
__entry->count)
|
||||||
|
)
|
||||||
|
|
||||||
DECLARE_EVENT_CLASS(xfs_imap_class,
|
DECLARE_EVENT_CLASS(xfs_imap_class,
|
||||||
TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
|
TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
|
||||||
int whichfork, struct xfs_bmbt_irec *irec),
|
int whichfork, struct xfs_bmbt_irec *irec),
|
||||||
|
@ -24,6 +24,7 @@
|
|||||||
#include "xfs_zone_priv.h"
|
#include "xfs_zone_priv.h"
|
||||||
#include "xfs_zones.h"
|
#include "xfs_zones.h"
|
||||||
#include "xfs_trace.h"
|
#include "xfs_trace.h"
|
||||||
|
#include "xfs_mru_cache.h"
|
||||||
|
|
||||||
void
|
void
|
||||||
xfs_open_zone_put(
|
xfs_open_zone_put(
|
||||||
@ -796,6 +797,100 @@ xfs_submit_zoned_bio(
|
|||||||
submit_bio(&ioend->io_bio);
|
submit_bio(&ioend->io_bio);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Cache the last zone written to for an inode so that it is considered first
|
||||||
|
* for subsequent writes.
|
||||||
|
*/
|
||||||
|
struct xfs_zone_cache_item {
|
||||||
|
struct xfs_mru_cache_elem mru;
|
||||||
|
struct xfs_open_zone *oz;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline struct xfs_zone_cache_item *
|
||||||
|
xfs_zone_cache_item(struct xfs_mru_cache_elem *mru)
|
||||||
|
{
|
||||||
|
return container_of(mru, struct xfs_zone_cache_item, mru);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
xfs_zone_cache_free_func(
|
||||||
|
void *data,
|
||||||
|
struct xfs_mru_cache_elem *mru)
|
||||||
|
{
|
||||||
|
struct xfs_zone_cache_item *item = xfs_zone_cache_item(mru);
|
||||||
|
|
||||||
|
xfs_open_zone_put(item->oz);
|
||||||
|
kfree(item);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if we have a cached last open zone available for the inode and
|
||||||
|
* if yes return a reference to it.
|
||||||
|
*/
|
||||||
|
static struct xfs_open_zone *
|
||||||
|
xfs_cached_zone(
|
||||||
|
struct xfs_mount *mp,
|
||||||
|
struct xfs_inode *ip)
|
||||||
|
{
|
||||||
|
struct xfs_mru_cache_elem *mru;
|
||||||
|
struct xfs_open_zone *oz;
|
||||||
|
|
||||||
|
mru = xfs_mru_cache_lookup(mp->m_zone_cache, ip->i_ino);
|
||||||
|
if (!mru)
|
||||||
|
return NULL;
|
||||||
|
oz = xfs_zone_cache_item(mru)->oz;
|
||||||
|
if (oz) {
|
||||||
|
/*
|
||||||
|
* GC only steals open zones at mount time, so no GC zones
|
||||||
|
* should end up in the cache.
|
||||||
|
*/
|
||||||
|
ASSERT(!oz->oz_is_gc);
|
||||||
|
ASSERT(atomic_read(&oz->oz_ref) > 0);
|
||||||
|
atomic_inc(&oz->oz_ref);
|
||||||
|
}
|
||||||
|
xfs_mru_cache_done(mp->m_zone_cache);
|
||||||
|
return oz;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Update the last used zone cache for a given inode.
|
||||||
|
*
|
||||||
|
* The caller must have a reference on the open zone.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
xfs_zone_cache_create_association(
|
||||||
|
struct xfs_inode *ip,
|
||||||
|
struct xfs_open_zone *oz)
|
||||||
|
{
|
||||||
|
struct xfs_mount *mp = ip->i_mount;
|
||||||
|
struct xfs_zone_cache_item *item = NULL;
|
||||||
|
struct xfs_mru_cache_elem *mru;
|
||||||
|
|
||||||
|
ASSERT(atomic_read(&oz->oz_ref) > 0);
|
||||||
|
atomic_inc(&oz->oz_ref);
|
||||||
|
|
||||||
|
mru = xfs_mru_cache_lookup(mp->m_zone_cache, ip->i_ino);
|
||||||
|
if (mru) {
|
||||||
|
/*
|
||||||
|
* If we have an association already, update it to point to the
|
||||||
|
* new zone.
|
||||||
|
*/
|
||||||
|
item = xfs_zone_cache_item(mru);
|
||||||
|
xfs_open_zone_put(item->oz);
|
||||||
|
item->oz = oz;
|
||||||
|
xfs_mru_cache_done(mp->m_zone_cache);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
item = kmalloc(sizeof(*item), GFP_KERNEL);
|
||||||
|
if (!item) {
|
||||||
|
xfs_open_zone_put(oz);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
item->oz = oz;
|
||||||
|
xfs_mru_cache_insert(mp->m_zone_cache, ip->i_ino, &item->mru);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
xfs_zone_alloc_and_submit(
|
xfs_zone_alloc_and_submit(
|
||||||
struct iomap_ioend *ioend,
|
struct iomap_ioend *ioend,
|
||||||
@ -819,11 +914,16 @@ xfs_zone_alloc_and_submit(
|
|||||||
*/
|
*/
|
||||||
if (!*oz && ioend->io_offset)
|
if (!*oz && ioend->io_offset)
|
||||||
*oz = xfs_last_used_zone(ioend);
|
*oz = xfs_last_used_zone(ioend);
|
||||||
|
if (!*oz)
|
||||||
|
*oz = xfs_cached_zone(mp, ip);
|
||||||
|
|
||||||
if (!*oz) {
|
if (!*oz) {
|
||||||
select_zone:
|
select_zone:
|
||||||
*oz = xfs_select_zone(mp, write_hint, pack_tight);
|
*oz = xfs_select_zone(mp, write_hint, pack_tight);
|
||||||
if (!*oz)
|
if (!*oz)
|
||||||
goto out_error;
|
goto out_error;
|
||||||
|
|
||||||
|
xfs_zone_cache_create_association(ip, *oz);
|
||||||
}
|
}
|
||||||
|
|
||||||
alloc_len = xfs_zone_alloc_blocks(*oz, XFS_B_TO_FSB(mp, ioend->io_size),
|
alloc_len = xfs_zone_alloc_blocks(*oz, XFS_B_TO_FSB(mp, ioend->io_size),
|
||||||
@ -1211,6 +1311,14 @@ xfs_mount_zones(
|
|||||||
error = xfs_zone_gc_mount(mp);
|
error = xfs_zone_gc_mount(mp);
|
||||||
if (error)
|
if (error)
|
||||||
goto out_free_zone_info;
|
goto out_free_zone_info;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set up a mru cache to track inode to open zone for data placement
|
||||||
|
* purposes. The magic values for group count and life time is the
|
||||||
|
* same as the defaults for file streams, which seems sane enough.
|
||||||
|
*/
|
||||||
|
xfs_mru_cache_create(&mp->m_zone_cache, mp,
|
||||||
|
5000, 10, xfs_zone_cache_free_func);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
out_free_zone_info:
|
out_free_zone_info:
|
||||||
@ -1224,4 +1332,5 @@ xfs_unmount_zones(
|
|||||||
{
|
{
|
||||||
xfs_zone_gc_unmount(mp);
|
xfs_zone_gc_unmount(mp);
|
||||||
xfs_free_zone_info(mp->m_zone_info);
|
xfs_free_zone_info(mp->m_zone_info);
|
||||||
|
xfs_mru_cache_destroy(mp->m_zone_cache);
|
||||||
}
|
}
|
||||||
|
@ -3502,7 +3502,8 @@ void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *);
|
|||||||
void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
|
void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
|
||||||
void generic_fill_statx_atomic_writes(struct kstat *stat,
|
void generic_fill_statx_atomic_writes(struct kstat *stat,
|
||||||
unsigned int unit_min,
|
unsigned int unit_min,
|
||||||
unsigned int unit_max);
|
unsigned int unit_max,
|
||||||
|
unsigned int unit_max_opt);
|
||||||
extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
|
extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
|
||||||
extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
|
extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
|
||||||
void __inode_add_bytes(struct inode *inode, loff_t bytes);
|
void __inode_add_bytes(struct inode *inode, loff_t bytes);
|
||||||
|
@ -57,6 +57,7 @@ struct kstat {
|
|||||||
u32 dio_read_offset_align;
|
u32 dio_read_offset_align;
|
||||||
u32 atomic_write_unit_min;
|
u32 atomic_write_unit_min;
|
||||||
u32 atomic_write_unit_max;
|
u32 atomic_write_unit_max;
|
||||||
|
u32 atomic_write_unit_max_opt;
|
||||||
u32 atomic_write_segments_max;
|
u32 atomic_write_segments_max;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -182,8 +182,12 @@ struct statx {
|
|||||||
/* File offset alignment for direct I/O reads */
|
/* File offset alignment for direct I/O reads */
|
||||||
__u32 stx_dio_read_offset_align;
|
__u32 stx_dio_read_offset_align;
|
||||||
|
|
||||||
/* 0xb8 */
|
/* Optimised max atomic write unit in bytes */
|
||||||
__u64 __spare3[9]; /* Spare space for future expansion */
|
__u32 stx_atomic_write_unit_max_opt;
|
||||||
|
__u32 __spare2[1];
|
||||||
|
|
||||||
|
/* 0xc0 */
|
||||||
|
__u64 __spare3[8]; /* Spare space for future expansion */
|
||||||
|
|
||||||
/* 0x100 */
|
/* 0x100 */
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user