mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-03-22 07:27:12 +08:00
Merge tag 'xfs-merge-6.18' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs updates from Carlos Maiolino:
"For this merge window, there are really no new features, but there are
a few things worth to emphasize:
- Deprecated for years already, the (no)attr2 and (no)ikeep mount
options have been removed for good
- Several cleanups (specially from typedefs) and bug fixes
- Improvements made in the online repair reap calculations
- online fsck is now enabled by default"
* tag 'xfs-merge-6.18' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (53 commits)
xfs: rework datasync tracking and execution
xfs: rearrange code in xfs_inode_item_precommit
xfs: scrub: use kstrdup_const() for metapath scan setups
xfs: use bt_nr_sectors in xfs_dax_translate_range
xfs: track the number of blocks in each buftarg
xfs: constify xfs_errortag_random_default
xfs: improve default maximum number of open zones
xfs: improve zone statistics message
xfs: centralize error tag definitions
xfs: remove pointless externs in xfs_error.h
xfs: remove the expr argument to XFS_TEST_ERROR
xfs: remove xfs_errortag_set
xfs: remove xfs_errortag_get
xfs: move the XLOG_REG_ constants out of xfs_log_format.h
xfs: adjust the hint based zone allocation policy
xfs: refactor hint based zone allocation
fs: add an enum for number of life time hints
xfs: fix log CRC mismatches between i386 and other architectures
xfs: rename the old_crc variable in xlog_recover_process
xfs: remove the unused xfs_log_iovec_t typedef
...
This commit is contained in:
@@ -34,22 +34,6 @@ When mounting an XFS filesystem, the following options are accepted.
|
||||
to the file. Specifying a fixed ``allocsize`` value turns off
|
||||
the dynamic behaviour.
|
||||
|
||||
attr2 or noattr2
|
||||
The options enable/disable an "opportunistic" improvement to
|
||||
be made in the way inline extended attributes are stored
|
||||
on-disk. When the new form is used for the first time when
|
||||
``attr2`` is selected (either when setting or removing extended
|
||||
attributes) the on-disk superblock feature bit field will be
|
||||
updated to reflect this format being in use.
|
||||
|
||||
The default behaviour is determined by the on-disk feature
|
||||
bit indicating that ``attr2`` behaviour is active. If either
|
||||
mount option is set, then that becomes the new default used
|
||||
by the filesystem.
|
||||
|
||||
CRC enabled filesystems always use the ``attr2`` format, and so
|
||||
will reject the ``noattr2`` mount option if it is set.
|
||||
|
||||
discard or nodiscard (default)
|
||||
Enable/disable the issuing of commands to let the block
|
||||
device reclaim space freed by the filesystem. This is
|
||||
@@ -75,12 +59,6 @@ When mounting an XFS filesystem, the following options are accepted.
|
||||
across the entire filesystem rather than just on directories
|
||||
configured to use it.
|
||||
|
||||
ikeep or noikeep (default)
|
||||
When ``ikeep`` is specified, XFS does not delete empty inode
|
||||
clusters and keeps them around on disk. When ``noikeep`` is
|
||||
specified, empty inode clusters are returned to the free
|
||||
space pool.
|
||||
|
||||
inode32 or inode64 (default)
|
||||
When ``inode32`` is specified, it indicates that XFS limits
|
||||
inode creation to locations which will not result in inode
|
||||
@@ -253,9 +231,8 @@ latest version and try again.
|
||||
|
||||
The deprecation will take place in two parts. Support for mounting V4
|
||||
filesystems can now be disabled at kernel build time via Kconfig option.
|
||||
The option will default to yes until September 2025, at which time it
|
||||
will be changed to default to no. In September 2030, support will be
|
||||
removed from the codebase entirely.
|
||||
These options were changed to default to no in September 2025. In
|
||||
September 2030, support will be removed from the codebase entirely.
|
||||
|
||||
Note: Distributors may choose to withdraw V4 format support earlier than
|
||||
the dates listed above.
|
||||
@@ -268,8 +245,6 @@ Deprecated Mount Options
|
||||
============================ ================
|
||||
Mounting with V4 filesystem September 2030
|
||||
Mounting ascii-ci filesystem September 2030
|
||||
ikeep/noikeep September 2025
|
||||
attr2/noattr2 September 2025
|
||||
============================ ================
|
||||
|
||||
|
||||
@@ -285,6 +260,8 @@ Removed Mount Options
|
||||
osyncisdsync/osyncisosync v4.0
|
||||
barrier v4.19
|
||||
nobarrier v4.19
|
||||
ikeep/noikeep v6.18
|
||||
attr2/noattr2 v6.18
|
||||
=========================== =======
|
||||
|
||||
sysctls
|
||||
@@ -312,9 +289,6 @@ The following sysctls are available for the XFS filesystem:
|
||||
removes unused preallocation from clean inodes and releases
|
||||
the unused space back to the free pool.
|
||||
|
||||
fs.xfs.speculative_cow_prealloc_lifetime
|
||||
This is an alias for speculative_prealloc_lifetime.
|
||||
|
||||
fs.xfs.error_level (Min: 0 Default: 3 Max: 11)
|
||||
A volume knob for error reporting when internal errors occur.
|
||||
This will generate detailed messages & backtraces for filesystem
|
||||
@@ -341,17 +315,6 @@ The following sysctls are available for the XFS filesystem:
|
||||
|
||||
This option is intended for debugging only.
|
||||
|
||||
fs.xfs.irix_symlink_mode (Min: 0 Default: 0 Max: 1)
|
||||
Controls whether symlinks are created with mode 0777 (default)
|
||||
or whether their mode is affected by the umask (irix mode).
|
||||
|
||||
fs.xfs.irix_sgid_inherit (Min: 0 Default: 0 Max: 1)
|
||||
Controls files created in SGID directories.
|
||||
If the group ID of the new file does not match the effective group
|
||||
ID or one of the supplementary group IDs of the parent dir, the
|
||||
ISGID bit is cleared if the irix_sgid_inherit compatibility sysctl
|
||||
is set.
|
||||
|
||||
fs.xfs.inherit_sync (Min: 0 Default: 1 Max: 1)
|
||||
Setting this to "1" will cause the "sync" flag set
|
||||
by the **xfs_io(8)** chattr command on a directory to be
|
||||
@@ -387,24 +350,20 @@ The following sysctls are available for the XFS filesystem:
|
||||
Deprecated Sysctls
|
||||
==================
|
||||
|
||||
=========================================== ================
|
||||
Name Removal Schedule
|
||||
=========================================== ================
|
||||
fs.xfs.irix_sgid_inherit September 2025
|
||||
fs.xfs.irix_symlink_mode September 2025
|
||||
fs.xfs.speculative_cow_prealloc_lifetime September 2025
|
||||
=========================================== ================
|
||||
|
||||
None currently.
|
||||
|
||||
Removed Sysctls
|
||||
===============
|
||||
|
||||
============================= =======
|
||||
Name Removed
|
||||
============================= =======
|
||||
fs.xfs.xfsbufd_centisec v4.0
|
||||
fs.xfs.age_buffer_centisecs v4.0
|
||||
============================= =======
|
||||
========================================== =======
|
||||
Name Removed
|
||||
========================================== =======
|
||||
fs.xfs.xfsbufd_centisec v4.0
|
||||
fs.xfs.age_buffer_centisecs v4.0
|
||||
fs.xfs.irix_symlink_mode v6.18
|
||||
fs.xfs.irix_sgid_inherit v6.18
|
||||
fs.xfs.speculative_cow_prealloc_lifetime v6.18
|
||||
========================================== =======
|
||||
|
||||
Error handling
|
||||
==============
|
||||
|
||||
@@ -25,7 +25,7 @@ config XFS_FS
|
||||
config XFS_SUPPORT_V4
|
||||
bool "Support deprecated V4 (crc=0) format"
|
||||
depends on XFS_FS
|
||||
default y
|
||||
default n
|
||||
help
|
||||
The V4 filesystem format lacks certain features that are supported
|
||||
by the V5 format, such as metadata checksumming, strengthened
|
||||
@@ -40,7 +40,7 @@ config XFS_SUPPORT_V4
|
||||
filesystem is a V4 filesystem. If no such string is found, please
|
||||
upgrade xfsprogs to the latest version and try again.
|
||||
|
||||
This option will become default N in September 2025. Support for the
|
||||
This option became default N in September 2025. Support for the
|
||||
V4 format will be removed entirely in September 2030. Distributors
|
||||
can say N here to withdraw support earlier.
|
||||
|
||||
@@ -50,7 +50,7 @@ config XFS_SUPPORT_V4
|
||||
config XFS_SUPPORT_ASCII_CI
|
||||
bool "Support deprecated case-insensitive ascii (ascii-ci=1) format"
|
||||
depends on XFS_FS
|
||||
default y
|
||||
default n
|
||||
help
|
||||
The ASCII case insensitivity filesystem feature only works correctly
|
||||
on systems that have been coerced into using ISO 8859-1, and it does
|
||||
@@ -67,7 +67,7 @@ config XFS_SUPPORT_ASCII_CI
|
||||
filesystem is a case-insensitive filesystem. If no such string is
|
||||
found, please upgrade xfsprogs to the latest version and try again.
|
||||
|
||||
This option will become default N in September 2025. Support for the
|
||||
This option became default N in September 2025. Support for the
|
||||
feature will be removed entirely in September 2030. Distributors
|
||||
can say N here to withdraw support earlier.
|
||||
|
||||
@@ -137,7 +137,7 @@ config XFS_BTREE_IN_MEM
|
||||
|
||||
config XFS_ONLINE_SCRUB
|
||||
bool "XFS online metadata check support"
|
||||
default n
|
||||
default y
|
||||
depends on XFS_FS
|
||||
depends on TMPFS && SHMEM
|
||||
select XFS_LIVE_HOOKS
|
||||
@@ -150,12 +150,8 @@ config XFS_ONLINE_SCRUB
|
||||
advantage here is to look for problems proactively so that
|
||||
they can be dealt with in a controlled manner.
|
||||
|
||||
This feature is considered EXPERIMENTAL. Use with caution!
|
||||
|
||||
See the xfs_scrub man page in section 8 for additional information.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config XFS_ONLINE_SCRUB_STATS
|
||||
bool "XFS online metadata check usage data collection"
|
||||
default y
|
||||
@@ -171,11 +167,9 @@ config XFS_ONLINE_SCRUB_STATS
|
||||
|
||||
Usage data are collected in /sys/kernel/debug/xfs/scrub.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config XFS_ONLINE_REPAIR
|
||||
bool "XFS online metadata repair support"
|
||||
default n
|
||||
default y
|
||||
depends on XFS_FS && XFS_ONLINE_SCRUB
|
||||
select XFS_BTREE_IN_MEM
|
||||
help
|
||||
@@ -186,12 +180,8 @@ config XFS_ONLINE_REPAIR
|
||||
formatted with secondary metadata, such as reverse mappings and inode
|
||||
parent pointers.
|
||||
|
||||
This feature is considered EXPERIMENTAL. Use with caution!
|
||||
|
||||
See the xfs_scrub man page in section 8 for additional information.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config XFS_WARN
|
||||
bool "XFS Verbose Warnings"
|
||||
depends on XFS_FS && !XFS_DEBUG
|
||||
|
||||
@@ -92,9 +92,8 @@ xfs_ag_resv_critical(
|
||||
trace_xfs_ag_resv_critical(pag, type, avail);
|
||||
|
||||
/* Critically low if less than 10% or max btree height remains. */
|
||||
return XFS_TEST_ERROR(avail < orig / 10 ||
|
||||
avail < mp->m_agbtree_maxlevels,
|
||||
mp, XFS_ERRTAG_AG_RESV_CRITICAL);
|
||||
return avail < orig / 10 || avail < mp->m_agbtree_maxlevels ||
|
||||
XFS_TEST_ERROR(mp, XFS_ERRTAG_AG_RESV_CRITICAL);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -203,7 +202,7 @@ __xfs_ag_resv_init(
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_AG_RESV_FAIL))
|
||||
if (XFS_TEST_ERROR(mp, XFS_ERRTAG_AG_RESV_FAIL))
|
||||
error = -ENOSPC;
|
||||
else
|
||||
error = xfs_dec_fdblocks(mp, hidden_space, true);
|
||||
|
||||
@@ -3321,7 +3321,7 @@ xfs_agf_read_verify(
|
||||
xfs_verifier_error(bp, -EFSBADCRC, __this_address);
|
||||
else {
|
||||
fa = xfs_agf_verify(bp);
|
||||
if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
|
||||
if (fa || XFS_TEST_ERROR(mp, XFS_ERRTAG_ALLOC_READ_AGF))
|
||||
xfs_verifier_error(bp, -EFSCORRUPTED, fa);
|
||||
}
|
||||
}
|
||||
@@ -4019,8 +4019,7 @@ __xfs_free_extent(
|
||||
ASSERT(len != 0);
|
||||
ASSERT(type != XFS_AG_RESV_AGFL);
|
||||
|
||||
if (XFS_TEST_ERROR(false, mp,
|
||||
XFS_ERRTAG_FREE_EXTENT))
|
||||
if (XFS_TEST_ERROR(mp, XFS_ERRTAG_FREE_EXTENT))
|
||||
return -EIO;
|
||||
|
||||
error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
|
||||
|
||||
@@ -667,12 +667,8 @@ xfs_attr_shortform_bytesfit(
|
||||
|
||||
/*
|
||||
* For attr2 we can try to move the forkoff if there is space in the
|
||||
* literal area, but for the old format we are done if there is no
|
||||
* space in the fixed attribute fork.
|
||||
* literal area
|
||||
*/
|
||||
if (!xfs_has_attr2(mp))
|
||||
return 0;
|
||||
|
||||
dsize = dp->i_df.if_bytes;
|
||||
|
||||
switch (dp->i_df.if_format) {
|
||||
@@ -723,22 +719,16 @@ xfs_attr_shortform_bytesfit(
|
||||
}
|
||||
|
||||
/*
|
||||
* Switch on the ATTR2 superblock bit (implies also FEATURES2) unless:
|
||||
* - noattr2 mount option is set,
|
||||
* - on-disk version bit says it is already set, or
|
||||
* - the attr2 mount option is not set to enable automatic upgrade from attr1.
|
||||
* Switch on the ATTR2 superblock bit (implies also FEATURES2) unless
|
||||
* on-disk version bit says it is already set
|
||||
*/
|
||||
STATIC void
|
||||
xfs_sbversion_add_attr2(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_trans *tp)
|
||||
{
|
||||
if (xfs_has_noattr2(mp))
|
||||
return;
|
||||
if (mp->m_sb.sb_features2 & XFS_SB_VERSION2_ATTR2BIT)
|
||||
return;
|
||||
if (!xfs_has_attr2(mp))
|
||||
return;
|
||||
|
||||
spin_lock(&mp->m_sb_lock);
|
||||
xfs_add_attr2(mp);
|
||||
@@ -889,7 +879,7 @@ xfs_attr_sf_removename(
|
||||
/*
|
||||
* Fix up the start offset of the attribute fork
|
||||
*/
|
||||
if (totsize == sizeof(struct xfs_attr_sf_hdr) && xfs_has_attr2(mp) &&
|
||||
if (totsize == sizeof(struct xfs_attr_sf_hdr) &&
|
||||
(dp->i_df.if_format != XFS_DINODE_FMT_BTREE) &&
|
||||
!(args->op_flags & (XFS_DA_OP_ADDNAME | XFS_DA_OP_REPLACE)) &&
|
||||
!xfs_has_parent(mp)) {
|
||||
@@ -900,7 +890,6 @@ xfs_attr_sf_removename(
|
||||
ASSERT(dp->i_forkoff);
|
||||
ASSERT(totsize > sizeof(struct xfs_attr_sf_hdr) ||
|
||||
(args->op_flags & XFS_DA_OP_ADDNAME) ||
|
||||
!xfs_has_attr2(mp) ||
|
||||
dp->i_df.if_format == XFS_DINODE_FMT_BTREE ||
|
||||
xfs_has_parent(mp));
|
||||
xfs_trans_log_inode(args->trans, dp,
|
||||
@@ -1040,8 +1029,7 @@ xfs_attr_shortform_allfit(
|
||||
bytes += xfs_attr_sf_entsize_byname(name_loc->namelen,
|
||||
be16_to_cpu(name_loc->valuelen));
|
||||
}
|
||||
if (xfs_has_attr2(dp->i_mount) &&
|
||||
(dp->i_df.if_format != XFS_DINODE_FMT_BTREE) &&
|
||||
if ((dp->i_df.if_format != XFS_DINODE_FMT_BTREE) &&
|
||||
(bytes == sizeof(struct xfs_attr_sf_hdr)))
|
||||
return -1;
|
||||
return xfs_attr_shortform_bytesfit(dp, bytes);
|
||||
@@ -1161,7 +1149,6 @@ xfs_attr3_leaf_to_shortform(
|
||||
* this case.
|
||||
*/
|
||||
if (!(args->op_flags & XFS_DA_OP_REPLACE)) {
|
||||
ASSERT(xfs_has_attr2(dp->i_mount));
|
||||
ASSERT(dp->i_df.if_format != XFS_DINODE_FMT_BTREE);
|
||||
xfs_attr_fork_remove(dp, args->trans);
|
||||
}
|
||||
@@ -1225,7 +1212,7 @@ xfs_attr3_leaf_to_node(
|
||||
|
||||
trace_xfs_attr_leaf_to_node(args);
|
||||
|
||||
if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_ATTR_LEAF_TO_NODE)) {
|
||||
if (XFS_TEST_ERROR(mp, XFS_ERRTAG_ATTR_LEAF_TO_NODE)) {
|
||||
error = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -997,8 +997,7 @@ xfs_bmap_add_attrfork_local(
|
||||
static int
|
||||
xfs_bmap_set_attrforkoff(
|
||||
struct xfs_inode *ip,
|
||||
int size,
|
||||
int *version)
|
||||
int size)
|
||||
{
|
||||
int default_size = xfs_default_attroffset(ip) >> 3;
|
||||
|
||||
@@ -1012,8 +1011,6 @@ xfs_bmap_set_attrforkoff(
|
||||
ip->i_forkoff = xfs_attr_shortform_bytesfit(ip, size);
|
||||
if (!ip->i_forkoff)
|
||||
ip->i_forkoff = default_size;
|
||||
else if (xfs_has_attr2(ip->i_mount) && version)
|
||||
*version = 2;
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
@@ -1035,7 +1032,6 @@ xfs_bmap_add_attrfork(
|
||||
int rsvd) /* xact may use reserved blks */
|
||||
{
|
||||
struct xfs_mount *mp = tp->t_mountp;
|
||||
int version = 1; /* superblock attr version */
|
||||
int logflags; /* logging flags */
|
||||
int error; /* error return value */
|
||||
|
||||
@@ -1045,7 +1041,7 @@ xfs_bmap_add_attrfork(
|
||||
ASSERT(!xfs_inode_has_attr_fork(ip));
|
||||
|
||||
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
|
||||
error = xfs_bmap_set_attrforkoff(ip, size, &version);
|
||||
error = xfs_bmap_set_attrforkoff(ip, size);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
@@ -1069,16 +1065,12 @@ xfs_bmap_add_attrfork(
|
||||
xfs_trans_log_inode(tp, ip, logflags);
|
||||
if (error)
|
||||
return error;
|
||||
if (!xfs_has_attr(mp) ||
|
||||
(!xfs_has_attr2(mp) && version == 2)) {
|
||||
if (!xfs_has_attr(mp)) {
|
||||
bool log_sb = false;
|
||||
|
||||
spin_lock(&mp->m_sb_lock);
|
||||
if (!xfs_has_attr(mp)) {
|
||||
xfs_add_attr(mp);
|
||||
log_sb = true;
|
||||
}
|
||||
if (!xfs_has_attr2(mp) && version == 2) {
|
||||
xfs_add_attr2(mp);
|
||||
log_sb = true;
|
||||
}
|
||||
@@ -3662,8 +3654,7 @@ xfs_bmap_btalloc(
|
||||
/* Trim the allocation back to the maximum an AG can fit. */
|
||||
args.maxlen = min(ap->length, mp->m_ag_max_usable);
|
||||
|
||||
if (unlikely(XFS_TEST_ERROR(false, mp,
|
||||
XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
|
||||
if (unlikely(XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
|
||||
error = xfs_bmap_exact_minlen_extent_alloc(ap, &args);
|
||||
else if ((ap->datatype & XFS_ALLOC_USERDATA) &&
|
||||
xfs_inode_is_filestream(ap->ip))
|
||||
@@ -3849,7 +3840,7 @@ xfs_bmapi_read(
|
||||
}
|
||||
|
||||
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
|
||||
XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
|
||||
XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) {
|
||||
xfs_bmap_mark_sick(ip, whichfork);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
@@ -4200,7 +4191,7 @@ xfs_bmapi_write(
|
||||
(XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
|
||||
|
||||
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
|
||||
XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
|
||||
XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) {
|
||||
xfs_bmap_mark_sick(ip, whichfork);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
@@ -4545,7 +4536,7 @@ xfs_bmapi_remap(
|
||||
(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC));
|
||||
|
||||
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
|
||||
XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
|
||||
XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) {
|
||||
xfs_bmap_mark_sick(ip, whichfork);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
@@ -5679,7 +5670,7 @@ xfs_bmap_collapse_extents(
|
||||
int logflags = 0;
|
||||
|
||||
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
|
||||
XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
|
||||
XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) {
|
||||
xfs_bmap_mark_sick(ip, whichfork);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
@@ -5795,7 +5786,7 @@ xfs_bmap_insert_extents(
|
||||
int logflags = 0;
|
||||
|
||||
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
|
||||
XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
|
||||
XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) {
|
||||
xfs_bmap_mark_sick(ip, whichfork);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
@@ -5900,7 +5891,7 @@ xfs_bmap_split_extent(
|
||||
int i = 0;
|
||||
|
||||
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
|
||||
XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
|
||||
XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) {
|
||||
xfs_bmap_mark_sick(ip, whichfork);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
@@ -6065,7 +6056,7 @@ xfs_bmap_finish_one(
|
||||
|
||||
trace_xfs_bmap_deferred(bi);
|
||||
|
||||
if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_BMAP_FINISH_ONE))
|
||||
if (XFS_TEST_ERROR(tp->t_mountp, XFS_ERRTAG_BMAP_FINISH_ONE))
|
||||
return -EIO;
|
||||
|
||||
switch (bi->bi_type) {
|
||||
|
||||
@@ -306,7 +306,7 @@ xfs_btree_check_block(
|
||||
|
||||
fa = __xfs_btree_check_block(cur, block, level, bp);
|
||||
if (XFS_IS_CORRUPT(mp, fa != NULL) ||
|
||||
XFS_TEST_ERROR(false, mp, xfs_btree_block_errtag(cur))) {
|
||||
XFS_TEST_ERROR(mp, xfs_btree_block_errtag(cur))) {
|
||||
if (bp)
|
||||
trace_xfs_btree_corrupt(bp, _RET_IP_);
|
||||
xfs_btree_mark_sick(cur);
|
||||
|
||||
@@ -565,7 +565,7 @@ xfs_da3_split(
|
||||
|
||||
trace_xfs_da_split(state->args);
|
||||
|
||||
if (XFS_TEST_ERROR(false, state->mp, XFS_ERRTAG_DA_LEAF_SPLIT))
|
||||
if (XFS_TEST_ERROR(state->mp, XFS_ERRTAG_DA_LEAF_SPLIT))
|
||||
return -EIO;
|
||||
|
||||
/*
|
||||
|
||||
@@ -223,7 +223,7 @@ xfs_dir_ino_validate(
|
||||
bool ino_ok = xfs_verify_dir_ino(mp, ino);
|
||||
|
||||
if (XFS_IS_CORRUPT(mp, !ino_ok) ||
|
||||
XFS_TEST_ERROR(false, mp, XFS_ERRTAG_DIR_INO_VALIDATE)) {
|
||||
XFS_TEST_ERROR(mp, XFS_ERRTAG_DIR_INO_VALIDATE)) {
|
||||
xfs_warn(mp, "Invalid inode number 0x%Lx",
|
||||
(unsigned long long) ino);
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
@@ -4,14 +4,22 @@
|
||||
* Copyright (C) 2017 Oracle.
|
||||
* All Rights Reserved.
|
||||
*/
|
||||
#ifndef __XFS_ERRORTAG_H_
|
||||
#if !defined(__XFS_ERRORTAG_H_) || defined(XFS_ERRTAG)
|
||||
#define __XFS_ERRORTAG_H_
|
||||
|
||||
/*
|
||||
* error injection tags - the labels can be anything you want
|
||||
* but each tag should have its own unique number
|
||||
* There are two ways to use this header file. The first way is to #include it
|
||||
* bare, which will define all the XFS_ERRTAG_* error injection knobs for use
|
||||
* with the XFS_TEST_ERROR macro. The second way is to enclose the #include
|
||||
* with a #define for an XFS_ERRTAG macro, in which case the header will define
|
||||
" an XFS_ERRTAGS macro that expands to invoke that XFS_ERRTAG macro for each
|
||||
* defined error injection knob.
|
||||
*/
|
||||
|
||||
/*
|
||||
* These are the actual error injection tags. The numbers should be consecutive
|
||||
* because arrays are sized based on the maximum.
|
||||
*/
|
||||
#define XFS_ERRTAG_NOERROR 0
|
||||
#define XFS_ERRTAG_IFLUSH_1 1
|
||||
#define XFS_ERRTAG_IFLUSH_2 2
|
||||
@@ -71,49 +79,61 @@
|
||||
* Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
|
||||
*/
|
||||
#define XFS_RANDOM_DEFAULT 100
|
||||
#define XFS_RANDOM_IFLUSH_1 XFS_RANDOM_DEFAULT
|
||||
#define XFS_RANDOM_IFLUSH_2 XFS_RANDOM_DEFAULT
|
||||
#define XFS_RANDOM_IFLUSH_3 XFS_RANDOM_DEFAULT
|
||||
#define XFS_RANDOM_IFLUSH_4 XFS_RANDOM_DEFAULT
|
||||
#define XFS_RANDOM_IFLUSH_5 XFS_RANDOM_DEFAULT
|
||||
#define XFS_RANDOM_IFLUSH_6 XFS_RANDOM_DEFAULT
|
||||
#define XFS_RANDOM_DA_READ_BUF XFS_RANDOM_DEFAULT
|
||||
#define XFS_RANDOM_BTREE_CHECK_LBLOCK (XFS_RANDOM_DEFAULT/4)
|
||||
#define XFS_RANDOM_BTREE_CHECK_SBLOCK XFS_RANDOM_DEFAULT
|
||||
#define XFS_RANDOM_ALLOC_READ_AGF XFS_RANDOM_DEFAULT
|
||||
#define XFS_RANDOM_IALLOC_READ_AGI XFS_RANDOM_DEFAULT
|
||||
#define XFS_RANDOM_ITOBP_INOTOBP XFS_RANDOM_DEFAULT
|
||||
#define XFS_RANDOM_IUNLINK XFS_RANDOM_DEFAULT
|
||||
#define XFS_RANDOM_IUNLINK_REMOVE XFS_RANDOM_DEFAULT
|
||||
#define XFS_RANDOM_DIR_INO_VALIDATE XFS_RANDOM_DEFAULT
|
||||
#define XFS_RANDOM_BULKSTAT_READ_CHUNK XFS_RANDOM_DEFAULT
|
||||
#define XFS_RANDOM_IODONE_IOERR (XFS_RANDOM_DEFAULT/10)
|
||||
#define XFS_RANDOM_STRATREAD_IOERR (XFS_RANDOM_DEFAULT/10)
|
||||
#define XFS_RANDOM_STRATCMPL_IOERR (XFS_RANDOM_DEFAULT/10)
|
||||
#define XFS_RANDOM_DIOWRITE_IOERR (XFS_RANDOM_DEFAULT/10)
|
||||
#define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT
|
||||
#define XFS_RANDOM_FREE_EXTENT 1
|
||||
#define XFS_RANDOM_RMAP_FINISH_ONE 1
|
||||
#define XFS_RANDOM_REFCOUNT_CONTINUE_UPDATE 1
|
||||
#define XFS_RANDOM_REFCOUNT_FINISH_ONE 1
|
||||
#define XFS_RANDOM_BMAP_FINISH_ONE 1
|
||||
#define XFS_RANDOM_AG_RESV_CRITICAL 4
|
||||
#define XFS_RANDOM_LOG_BAD_CRC 1
|
||||
#define XFS_RANDOM_LOG_ITEM_PIN 1
|
||||
#define XFS_RANDOM_BUF_LRU_REF 2
|
||||
#define XFS_RANDOM_FORCE_SCRUB_REPAIR 1
|
||||
#define XFS_RANDOM_FORCE_SUMMARY_RECALC 1
|
||||
#define XFS_RANDOM_IUNLINK_FALLBACK (XFS_RANDOM_DEFAULT/10)
|
||||
#define XFS_RANDOM_BUF_IOERROR XFS_RANDOM_DEFAULT
|
||||
#define XFS_RANDOM_REDUCE_MAX_IEXTENTS 1
|
||||
#define XFS_RANDOM_BMAP_ALLOC_MINLEN_EXTENT 1
|
||||
#define XFS_RANDOM_AG_RESV_FAIL 1
|
||||
#define XFS_RANDOM_LARP 1
|
||||
#define XFS_RANDOM_DA_LEAF_SPLIT 1
|
||||
#define XFS_RANDOM_ATTR_LEAF_TO_NODE 1
|
||||
#define XFS_RANDOM_WB_DELAY_MS 3000
|
||||
#define XFS_RANDOM_WRITE_DELAY_MS 3000
|
||||
#define XFS_RANDOM_EXCHMAPS_FINISH_ONE 1
|
||||
#define XFS_RANDOM_METAFILE_RESV_CRITICAL 4
|
||||
|
||||
/*
|
||||
* Table of errror injection knobs. The parameters to the XFS_ERRTAG macro are:
|
||||
* 1. The XFS_ERRTAG_ flag but without the prefix;
|
||||
* 2. The name of the sysfs knob; and
|
||||
* 3. The default value for the knob.
|
||||
*/
|
||||
#ifdef XFS_ERRTAG
|
||||
# undef XFS_ERRTAGS
|
||||
# define XFS_ERRTAGS \
|
||||
XFS_ERRTAG(NOERROR, noerror, XFS_RANDOM_DEFAULT) \
|
||||
XFS_ERRTAG(IFLUSH_1, iflush1, XFS_RANDOM_DEFAULT) \
|
||||
XFS_ERRTAG(IFLUSH_2, iflush2, XFS_RANDOM_DEFAULT) \
|
||||
XFS_ERRTAG(IFLUSH_3, iflush3, XFS_RANDOM_DEFAULT) \
|
||||
XFS_ERRTAG(IFLUSH_4, iflush4, XFS_RANDOM_DEFAULT) \
|
||||
XFS_ERRTAG(IFLUSH_5, iflush5, XFS_RANDOM_DEFAULT) \
|
||||
XFS_ERRTAG(IFLUSH_6, iflush6, XFS_RANDOM_DEFAULT) \
|
||||
XFS_ERRTAG(DA_READ_BUF, dareadbuf, XFS_RANDOM_DEFAULT) \
|
||||
XFS_ERRTAG(BTREE_CHECK_LBLOCK, btree_chk_lblk, XFS_RANDOM_DEFAULT/4) \
|
||||
XFS_ERRTAG(BTREE_CHECK_SBLOCK, btree_chk_sblk, XFS_RANDOM_DEFAULT) \
|
||||
XFS_ERRTAG(ALLOC_READ_AGF, readagf, XFS_RANDOM_DEFAULT) \
|
||||
XFS_ERRTAG(IALLOC_READ_AGI, readagi, XFS_RANDOM_DEFAULT) \
|
||||
XFS_ERRTAG(ITOBP_INOTOBP, itobp, XFS_RANDOM_DEFAULT) \
|
||||
XFS_ERRTAG(IUNLINK, iunlink, XFS_RANDOM_DEFAULT) \
|
||||
XFS_ERRTAG(IUNLINK_REMOVE, iunlinkrm, XFS_RANDOM_DEFAULT) \
|
||||
XFS_ERRTAG(DIR_INO_VALIDATE, dirinovalid, XFS_RANDOM_DEFAULT) \
|
||||
XFS_ERRTAG(BULKSTAT_READ_CHUNK, bulkstat, XFS_RANDOM_DEFAULT) \
|
||||
XFS_ERRTAG(IODONE_IOERR, logiodone, XFS_RANDOM_DEFAULT/10) \
|
||||
XFS_ERRTAG(STRATREAD_IOERR, stratread, XFS_RANDOM_DEFAULT/10) \
|
||||
XFS_ERRTAG(STRATCMPL_IOERR, stratcmpl, XFS_RANDOM_DEFAULT/10) \
|
||||
XFS_ERRTAG(DIOWRITE_IOERR, diowrite, XFS_RANDOM_DEFAULT/10) \
|
||||
XFS_ERRTAG(BMAPIFORMAT, bmapifmt, XFS_RANDOM_DEFAULT) \
|
||||
XFS_ERRTAG(FREE_EXTENT, free_extent, 1) \
|
||||
XFS_ERRTAG(RMAP_FINISH_ONE, rmap_finish_one, 1) \
|
||||
XFS_ERRTAG(REFCOUNT_CONTINUE_UPDATE, refcount_continue_update, 1) \
|
||||
XFS_ERRTAG(REFCOUNT_FINISH_ONE, refcount_finish_one, 1) \
|
||||
XFS_ERRTAG(BMAP_FINISH_ONE, bmap_finish_one, 1) \
|
||||
XFS_ERRTAG(AG_RESV_CRITICAL, ag_resv_critical, 4) \
|
||||
XFS_ERRTAG(LOG_BAD_CRC, log_bad_crc, 1) \
|
||||
XFS_ERRTAG(LOG_ITEM_PIN, log_item_pin, 1) \
|
||||
XFS_ERRTAG(BUF_LRU_REF, buf_lru_ref, 2) \
|
||||
XFS_ERRTAG(FORCE_SCRUB_REPAIR, force_repair, 1) \
|
||||
XFS_ERRTAG(FORCE_SUMMARY_RECALC, bad_summary, 1) \
|
||||
XFS_ERRTAG(IUNLINK_FALLBACK, iunlink_fallback, XFS_RANDOM_DEFAULT/10) \
|
||||
XFS_ERRTAG(BUF_IOERROR, buf_ioerror, XFS_RANDOM_DEFAULT) \
|
||||
XFS_ERRTAG(REDUCE_MAX_IEXTENTS, reduce_max_iextents, 1) \
|
||||
XFS_ERRTAG(BMAP_ALLOC_MINLEN_EXTENT, bmap_alloc_minlen_extent, 1) \
|
||||
XFS_ERRTAG(AG_RESV_FAIL, ag_resv_fail, 1) \
|
||||
XFS_ERRTAG(LARP, larp, 1) \
|
||||
XFS_ERRTAG(DA_LEAF_SPLIT, da_leaf_split, 1) \
|
||||
XFS_ERRTAG(ATTR_LEAF_TO_NODE, attr_leaf_to_node, 1) \
|
||||
XFS_ERRTAG(WB_DELAY_MS, wb_delay_ms, 3000) \
|
||||
XFS_ERRTAG(WRITE_DELAY_MS, write_delay_ms, 3000) \
|
||||
XFS_ERRTAG(EXCHMAPS_FINISH_ONE, exchmaps_finish_one, 1) \
|
||||
XFS_ERRTAG(METAFILE_RESV_CRITICAL, metafile_resv_crit, 4)
|
||||
#endif /* XFS_ERRTAG */
|
||||
|
||||
#endif /* __XFS_ERRORTAG_H_ */
|
||||
|
||||
@@ -616,7 +616,7 @@ xfs_exchmaps_finish_one(
|
||||
return error;
|
||||
}
|
||||
|
||||
if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_EXCHMAPS_FINISH_ONE))
|
||||
if (XFS_TEST_ERROR(tp->t_mountp, XFS_ERRTAG_EXCHMAPS_FINISH_ONE))
|
||||
return -EIO;
|
||||
|
||||
/* If we still have work to do, ask for a new transaction. */
|
||||
@@ -882,7 +882,7 @@ xmi_ensure_delta_nextents(
|
||||
&new_nextents))
|
||||
return -EFBIG;
|
||||
|
||||
if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) &&
|
||||
if (XFS_TEST_ERROR(mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) &&
|
||||
new_nextents > 10)
|
||||
return -EFBIG;
|
||||
|
||||
|
||||
@@ -2140,7 +2140,7 @@ xfs_difree_inobt(
|
||||
* remove the chunk if the block size is large enough for multiple inode
|
||||
* chunks (that might not be free).
|
||||
*/
|
||||
if (!xfs_has_ikeep(mp) && rec.ir_free == XFS_INOBT_ALL_FREE &&
|
||||
if (rec.ir_free == XFS_INOBT_ALL_FREE &&
|
||||
mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
|
||||
xic->deleted = true;
|
||||
xic->first_ino = xfs_agino_to_ino(pag, rec.ir_startino);
|
||||
@@ -2286,7 +2286,7 @@ xfs_difree_finobt(
|
||||
* enough for multiple chunks. Leave the finobt record to remain in sync
|
||||
* with the inobt.
|
||||
*/
|
||||
if (!xfs_has_ikeep(mp) && rec.ir_free == XFS_INOBT_ALL_FREE &&
|
||||
if (rec.ir_free == XFS_INOBT_ALL_FREE &&
|
||||
mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
|
||||
error = xfs_btree_delete(cur, &i);
|
||||
if (error)
|
||||
@@ -2706,7 +2706,7 @@ xfs_agi_read_verify(
|
||||
xfs_verifier_error(bp, -EFSBADCRC, __this_address);
|
||||
else {
|
||||
fa = xfs_agi_verify(bp);
|
||||
if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_IALLOC_READ_AGI))
|
||||
if (fa || XFS_TEST_ERROR(mp, XFS_ERRTAG_IALLOC_READ_AGI))
|
||||
xfs_verifier_error(bp, -EFSCORRUPTED, fa);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -61,8 +61,8 @@ xfs_inode_buf_verify(
|
||||
di_ok = xfs_verify_magic16(bp, dip->di_magic) &&
|
||||
xfs_dinode_good_version(mp, dip->di_version) &&
|
||||
xfs_verify_agino_or_null(bp->b_pag, unlinked_ino);
|
||||
if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
|
||||
XFS_ERRTAG_ITOBP_INOTOBP))) {
|
||||
if (unlikely(!di_ok ||
|
||||
XFS_TEST_ERROR(mp, XFS_ERRTAG_ITOBP_INOTOBP))) {
|
||||
if (readahead) {
|
||||
bp->b_flags &= ~XBF_DONE;
|
||||
xfs_buf_ioerror(bp, -EIO);
|
||||
|
||||
@@ -756,8 +756,7 @@ xfs_iext_count_extend(
|
||||
if (nr_exts < ifp->if_nextents)
|
||||
return -EFBIG;
|
||||
|
||||
if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) &&
|
||||
nr_exts > 10)
|
||||
if (XFS_TEST_ERROR(mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) && nr_exts > 10)
|
||||
return -EFBIG;
|
||||
|
||||
if (nr_exts > xfs_iext_max_nextents(has_large, whichfork)) {
|
||||
|
||||
@@ -299,17 +299,6 @@ xfs_inode_init(
|
||||
} else {
|
||||
inode_init_owner(args->idmap, inode, dir, args->mode);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the group ID of the new file does not match the effective
|
||||
* group ID or one of the supplementary group IDs, the S_ISGID
|
||||
* bit is cleared (and only if the irix_sgid_inherit
|
||||
* compatibility variable is set).
|
||||
*/
|
||||
if (irix_sgid_inherit && (inode->i_mode & S_ISGID) &&
|
||||
!vfsgid_in_group_p(i_gid_into_vfsgid(args->idmap, inode)))
|
||||
inode->i_mode &= ~S_ISGID;
|
||||
|
||||
ip->i_projid = xfs_get_initial_prid(pip);
|
||||
}
|
||||
|
||||
|
||||
@@ -86,43 +86,6 @@ struct xfs_unmount_log_format {
|
||||
uint32_t pad2; /* may as well make it 64 bits */
|
||||
};
|
||||
|
||||
/* Region types for iovec's i_type */
|
||||
#define XLOG_REG_TYPE_BFORMAT 1
|
||||
#define XLOG_REG_TYPE_BCHUNK 2
|
||||
#define XLOG_REG_TYPE_EFI_FORMAT 3
|
||||
#define XLOG_REG_TYPE_EFD_FORMAT 4
|
||||
#define XLOG_REG_TYPE_IFORMAT 5
|
||||
#define XLOG_REG_TYPE_ICORE 6
|
||||
#define XLOG_REG_TYPE_IEXT 7
|
||||
#define XLOG_REG_TYPE_IBROOT 8
|
||||
#define XLOG_REG_TYPE_ILOCAL 9
|
||||
#define XLOG_REG_TYPE_IATTR_EXT 10
|
||||
#define XLOG_REG_TYPE_IATTR_BROOT 11
|
||||
#define XLOG_REG_TYPE_IATTR_LOCAL 12
|
||||
#define XLOG_REG_TYPE_QFORMAT 13
|
||||
#define XLOG_REG_TYPE_DQUOT 14
|
||||
#define XLOG_REG_TYPE_QUOTAOFF 15
|
||||
#define XLOG_REG_TYPE_LRHEADER 16
|
||||
#define XLOG_REG_TYPE_UNMOUNT 17
|
||||
#define XLOG_REG_TYPE_COMMIT 18
|
||||
#define XLOG_REG_TYPE_TRANSHDR 19
|
||||
#define XLOG_REG_TYPE_ICREATE 20
|
||||
#define XLOG_REG_TYPE_RUI_FORMAT 21
|
||||
#define XLOG_REG_TYPE_RUD_FORMAT 22
|
||||
#define XLOG_REG_TYPE_CUI_FORMAT 23
|
||||
#define XLOG_REG_TYPE_CUD_FORMAT 24
|
||||
#define XLOG_REG_TYPE_BUI_FORMAT 25
|
||||
#define XLOG_REG_TYPE_BUD_FORMAT 26
|
||||
#define XLOG_REG_TYPE_ATTRI_FORMAT 27
|
||||
#define XLOG_REG_TYPE_ATTRD_FORMAT 28
|
||||
#define XLOG_REG_TYPE_ATTR_NAME 29
|
||||
#define XLOG_REG_TYPE_ATTR_VALUE 30
|
||||
#define XLOG_REG_TYPE_XMI_FORMAT 31
|
||||
#define XLOG_REG_TYPE_XMD_FORMAT 32
|
||||
#define XLOG_REG_TYPE_ATTR_NEWNAME 33
|
||||
#define XLOG_REG_TYPE_ATTR_NEWVALUE 34
|
||||
#define XLOG_REG_TYPE_MAX 34
|
||||
|
||||
/*
|
||||
* Flags to log operation header
|
||||
*
|
||||
@@ -141,14 +104,13 @@ struct xfs_unmount_log_format {
|
||||
#define XLOG_END_TRANS 0x10 /* End a continued transaction */
|
||||
#define XLOG_UNMOUNT_TRANS 0x20 /* Unmount a filesystem transaction */
|
||||
|
||||
|
||||
typedef struct xlog_op_header {
|
||||
struct xlog_op_header {
|
||||
__be32 oh_tid; /* transaction id of operation : 4 b */
|
||||
__be32 oh_len; /* bytes in data region : 4 b */
|
||||
__u8 oh_clientid; /* who sent me this : 1 b */
|
||||
__u8 oh_flags; /* : 1 b */
|
||||
__u16 oh_res2; /* 32 bit align : 2 b */
|
||||
} xlog_op_header_t;
|
||||
};
|
||||
|
||||
/* valid values for h_fmt */
|
||||
#define XLOG_FMT_UNKNOWN 0
|
||||
@@ -174,12 +136,40 @@ typedef struct xlog_rec_header {
|
||||
__be32 h_prev_block; /* block number to previous LR : 4 */
|
||||
__be32 h_num_logops; /* number of log operations in this LR : 4 */
|
||||
__be32 h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE];
|
||||
/* new fields */
|
||||
|
||||
/* fields added by the Linux port: */
|
||||
__be32 h_fmt; /* format of log record : 4 */
|
||||
uuid_t h_fs_uuid; /* uuid of FS : 16 */
|
||||
|
||||
/* fields added for log v2: */
|
||||
__be32 h_size; /* iclog size : 4 */
|
||||
|
||||
/*
|
||||
* When h_size added for log v2 support, it caused structure to have
|
||||
* a different size on i386 vs all other architectures because the
|
||||
* sum of the size ofthe member is not aligned by that of the largest
|
||||
* __be64-sized member, and i386 has really odd struct alignment rules.
|
||||
*
|
||||
* Due to the way the log headers are placed out on-disk that alone is
|
||||
* not a problem becaue the xlog_rec_header always sits alone in a
|
||||
* BBSIZEs area, and the rest of that area is padded with zeroes.
|
||||
* But xlog_cksum used to calculate the checksum based on the structure
|
||||
* size, and thus gives different checksums for i386 vs the rest.
|
||||
* We now do two checksum validation passes for both sizes to allow
|
||||
* moving v5 file systems with unclean logs between i386 and other
|
||||
* (little-endian) architectures.
|
||||
*/
|
||||
__u32 h_pad0;
|
||||
} xlog_rec_header_t;
|
||||
|
||||
#ifdef __i386__
|
||||
#define XLOG_REC_SIZE offsetofend(struct xlog_rec_header, h_size)
|
||||
#define XLOG_REC_SIZE_OTHER sizeof(struct xlog_rec_header)
|
||||
#else
|
||||
#define XLOG_REC_SIZE sizeof(struct xlog_rec_header)
|
||||
#define XLOG_REC_SIZE_OTHER offsetofend(struct xlog_rec_header, h_size)
|
||||
#endif /* __i386__ */
|
||||
|
||||
typedef struct xlog_rec_ext_header {
|
||||
__be32 xh_cycle; /* write cycle of log : 4 */
|
||||
__be32 xh_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; /* : 256 */
|
||||
@@ -195,12 +185,11 @@ typedef union xlog_in_core2 {
|
||||
} xlog_in_core_2_t;
|
||||
|
||||
/* not an on-disk structure, but needed by log recovery in userspace */
|
||||
typedef struct xfs_log_iovec {
|
||||
struct xfs_log_iovec {
|
||||
void *i_addr; /* beginning address of region */
|
||||
int i_len; /* length in bytes of region */
|
||||
uint i_type; /* type of region */
|
||||
} xfs_log_iovec_t;
|
||||
|
||||
};
|
||||
|
||||
/*
|
||||
* Transaction Header definitions.
|
||||
@@ -213,12 +202,12 @@ typedef struct xfs_log_iovec {
|
||||
* Do not change the below structure without redoing the code in
|
||||
* xlog_recover_add_to_trans() and xlog_recover_add_to_cont_trans().
|
||||
*/
|
||||
typedef struct xfs_trans_header {
|
||||
struct xfs_trans_header {
|
||||
uint th_magic; /* magic number */
|
||||
uint th_type; /* transaction type */
|
||||
int32_t th_tid; /* transaction id (unused) */
|
||||
uint th_num_items; /* num items logged by trans */
|
||||
} xfs_trans_header_t;
|
||||
};
|
||||
|
||||
#define XFS_TRANS_HEADER_MAGIC 0x5452414e /* TRAN */
|
||||
|
||||
@@ -542,7 +531,7 @@ struct xfs_log_dinode {
|
||||
#define __XFS_BLF_DATAMAP_SIZE ((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / NBWORD)
|
||||
#define XFS_BLF_DATAMAP_SIZE (__XFS_BLF_DATAMAP_SIZE + 1)
|
||||
|
||||
typedef struct xfs_buf_log_format {
|
||||
struct xfs_buf_log_format {
|
||||
unsigned short blf_type; /* buf log item type indicator */
|
||||
unsigned short blf_size; /* size of this item */
|
||||
unsigned short blf_flags; /* misc state */
|
||||
@@ -550,7 +539,7 @@ typedef struct xfs_buf_log_format {
|
||||
int64_t blf_blkno; /* starting blkno of this buf */
|
||||
unsigned int blf_map_size; /* used size of data bitmap in words */
|
||||
unsigned int blf_data_map[XFS_BLF_DATAMAP_SIZE]; /* dirty bitmap */
|
||||
} xfs_buf_log_format_t;
|
||||
};
|
||||
|
||||
/*
|
||||
* All buffers now need to tell recovery where the magic number
|
||||
@@ -606,40 +595,41 @@ xfs_blft_from_flags(struct xfs_buf_log_format *blf)
|
||||
/*
|
||||
* EFI/EFD log format definitions
|
||||
*/
|
||||
typedef struct xfs_extent {
|
||||
struct xfs_extent {
|
||||
xfs_fsblock_t ext_start;
|
||||
xfs_extlen_t ext_len;
|
||||
} xfs_extent_t;
|
||||
};
|
||||
|
||||
/*
|
||||
* Since an xfs_extent_t has types (start:64, len: 32)
|
||||
* there are different alignments on 32 bit and 64 bit kernels.
|
||||
* So we provide the different variants for use by a
|
||||
* conversion routine.
|
||||
* Since the structures in struct xfs_extent add up to 96 bytes, it has
|
||||
* different alignments on i386 vs all other architectures, because i386
|
||||
* does not pad structures to their natural alignment.
|
||||
*
|
||||
* Provide the different variants for use by a conversion routine.
|
||||
*/
|
||||
typedef struct xfs_extent_32 {
|
||||
struct xfs_extent_32 {
|
||||
uint64_t ext_start;
|
||||
uint32_t ext_len;
|
||||
} __attribute__((packed)) xfs_extent_32_t;
|
||||
} __attribute__((packed));
|
||||
|
||||
typedef struct xfs_extent_64 {
|
||||
struct xfs_extent_64 {
|
||||
uint64_t ext_start;
|
||||
uint32_t ext_len;
|
||||
uint32_t ext_pad;
|
||||
} xfs_extent_64_t;
|
||||
};
|
||||
|
||||
/*
|
||||
* This is the structure used to lay out an efi log item in the
|
||||
* log. The efi_extents field is a variable size array whose
|
||||
* size is given by efi_nextents.
|
||||
*/
|
||||
typedef struct xfs_efi_log_format {
|
||||
struct xfs_efi_log_format {
|
||||
uint16_t efi_type; /* efi log item type */
|
||||
uint16_t efi_size; /* size of this item */
|
||||
uint32_t efi_nextents; /* # extents to free */
|
||||
uint64_t efi_id; /* efi identifier */
|
||||
xfs_extent_t efi_extents[]; /* array of extents to free */
|
||||
} xfs_efi_log_format_t;
|
||||
struct xfs_extent efi_extents[]; /* array of extents to free */
|
||||
};
|
||||
|
||||
static inline size_t
|
||||
xfs_efi_log_format_sizeof(
|
||||
@@ -649,13 +639,13 @@ xfs_efi_log_format_sizeof(
|
||||
nr * sizeof(struct xfs_extent);
|
||||
}
|
||||
|
||||
typedef struct xfs_efi_log_format_32 {
|
||||
struct xfs_efi_log_format_32 {
|
||||
uint16_t efi_type; /* efi log item type */
|
||||
uint16_t efi_size; /* size of this item */
|
||||
uint32_t efi_nextents; /* # extents to free */
|
||||
uint64_t efi_id; /* efi identifier */
|
||||
xfs_extent_32_t efi_extents[]; /* array of extents to free */
|
||||
} __attribute__((packed)) xfs_efi_log_format_32_t;
|
||||
struct xfs_extent_32 efi_extents[]; /* array of extents to free */
|
||||
} __attribute__((packed));
|
||||
|
||||
static inline size_t
|
||||
xfs_efi_log_format32_sizeof(
|
||||
@@ -665,13 +655,13 @@ xfs_efi_log_format32_sizeof(
|
||||
nr * sizeof(struct xfs_extent_32);
|
||||
}
|
||||
|
||||
typedef struct xfs_efi_log_format_64 {
|
||||
struct xfs_efi_log_format_64 {
|
||||
uint16_t efi_type; /* efi log item type */
|
||||
uint16_t efi_size; /* size of this item */
|
||||
uint32_t efi_nextents; /* # extents to free */
|
||||
uint64_t efi_id; /* efi identifier */
|
||||
xfs_extent_64_t efi_extents[]; /* array of extents to free */
|
||||
} xfs_efi_log_format_64_t;
|
||||
struct xfs_extent_64 efi_extents[]; /* array of extents to free */
|
||||
};
|
||||
|
||||
static inline size_t
|
||||
xfs_efi_log_format64_sizeof(
|
||||
@@ -686,13 +676,13 @@ xfs_efi_log_format64_sizeof(
|
||||
* log. The efd_extents array is a variable size array whose
|
||||
* size is given by efd_nextents;
|
||||
*/
|
||||
typedef struct xfs_efd_log_format {
|
||||
struct xfs_efd_log_format {
|
||||
uint16_t efd_type; /* efd log item type */
|
||||
uint16_t efd_size; /* size of this item */
|
||||
uint32_t efd_nextents; /* # of extents freed */
|
||||
uint64_t efd_efi_id; /* id of corresponding efi */
|
||||
xfs_extent_t efd_extents[]; /* array of extents freed */
|
||||
} xfs_efd_log_format_t;
|
||||
struct xfs_extent efd_extents[]; /* array of extents freed */
|
||||
};
|
||||
|
||||
static inline size_t
|
||||
xfs_efd_log_format_sizeof(
|
||||
@@ -702,13 +692,13 @@ xfs_efd_log_format_sizeof(
|
||||
nr * sizeof(struct xfs_extent);
|
||||
}
|
||||
|
||||
typedef struct xfs_efd_log_format_32 {
|
||||
struct xfs_efd_log_format_32 {
|
||||
uint16_t efd_type; /* efd log item type */
|
||||
uint16_t efd_size; /* size of this item */
|
||||
uint32_t efd_nextents; /* # of extents freed */
|
||||
uint64_t efd_efi_id; /* id of corresponding efi */
|
||||
xfs_extent_32_t efd_extents[]; /* array of extents freed */
|
||||
} __attribute__((packed)) xfs_efd_log_format_32_t;
|
||||
struct xfs_extent_32 efd_extents[]; /* array of extents freed */
|
||||
} __attribute__((packed));
|
||||
|
||||
static inline size_t
|
||||
xfs_efd_log_format32_sizeof(
|
||||
@@ -718,13 +708,13 @@ xfs_efd_log_format32_sizeof(
|
||||
nr * sizeof(struct xfs_extent_32);
|
||||
}
|
||||
|
||||
typedef struct xfs_efd_log_format_64 {
|
||||
struct xfs_efd_log_format_64 {
|
||||
uint16_t efd_type; /* efd log item type */
|
||||
uint16_t efd_size; /* size of this item */
|
||||
uint32_t efd_nextents; /* # of extents freed */
|
||||
uint64_t efd_efi_id; /* id of corresponding efi */
|
||||
xfs_extent_64_t efd_extents[]; /* array of extents freed */
|
||||
} xfs_efd_log_format_64_t;
|
||||
struct xfs_extent_64 efd_extents[]; /* array of extents freed */
|
||||
};
|
||||
|
||||
static inline size_t
|
||||
xfs_efd_log_format64_sizeof(
|
||||
@@ -957,14 +947,14 @@ struct xfs_xmd_log_format {
|
||||
* The first two fields must be the type and size fitting into
|
||||
* 32 bits : log_recovery code assumes that.
|
||||
*/
|
||||
typedef struct xfs_dq_logformat {
|
||||
struct xfs_dq_logformat {
|
||||
uint16_t qlf_type; /* dquot log item type */
|
||||
uint16_t qlf_size; /* size of this item */
|
||||
xfs_dqid_t qlf_id; /* usr/grp/proj id : 32 bits */
|
||||
int64_t qlf_blkno; /* blkno of dquot buffer */
|
||||
int32_t qlf_len; /* len of dquot buffer */
|
||||
uint32_t qlf_boffset; /* off of dquot in buffer */
|
||||
} xfs_dq_logformat_t;
|
||||
};
|
||||
|
||||
/*
|
||||
* log format struct for QUOTAOFF records.
|
||||
@@ -974,12 +964,12 @@ typedef struct xfs_dq_logformat {
|
||||
* to the first and ensures that the first logitem is taken out of the AIL
|
||||
* only when the last one is securely committed.
|
||||
*/
|
||||
typedef struct xfs_qoff_logformat {
|
||||
struct xfs_qoff_logformat {
|
||||
unsigned short qf_type; /* quotaoff log item type */
|
||||
unsigned short qf_size; /* size of this item */
|
||||
unsigned int qf_flags; /* USR and/or GRP */
|
||||
char qf_pad[12]; /* padding for future */
|
||||
} xfs_qoff_logformat_t;
|
||||
};
|
||||
|
||||
/*
|
||||
* Disk quotas status in m_qflags, and also sb_qflags. 16 bits.
|
||||
|
||||
@@ -111,7 +111,7 @@ struct xlog_recover_item {
|
||||
struct xlog_recover {
|
||||
struct hlist_node r_list;
|
||||
xlog_tid_t r_log_tid; /* log's transaction id */
|
||||
xfs_trans_header_t r_theader; /* trans header for partial */
|
||||
struct xfs_trans_header r_theader; /* trans header for partial */
|
||||
int r_state; /* not needed */
|
||||
xfs_lsn_t r_lsn; /* xact lsn */
|
||||
struct list_head r_itemq; /* q for items */
|
||||
|
||||
@@ -121,7 +121,7 @@ xfs_metafile_resv_critical(
|
||||
div_u64(mp->m_metafile_resv_target, 10)))
|
||||
return true;
|
||||
|
||||
return XFS_TEST_ERROR(false, mp, XFS_ERRTAG_METAFILE_RESV_CRITICAL);
|
||||
return XFS_TEST_ERROR(mp, XFS_ERRTAG_METAFILE_RESV_CRITICAL);
|
||||
}
|
||||
|
||||
/* Allocate a block from the metadata file's reservation. */
|
||||
|
||||
@@ -174,6 +174,8 @@ xfs_check_ondisk_structs(void)
|
||||
XFS_CHECK_STRUCT_SIZE(struct xfs_rud_log_format, 16);
|
||||
XFS_CHECK_STRUCT_SIZE(struct xfs_map_extent, 32);
|
||||
XFS_CHECK_STRUCT_SIZE(struct xfs_phys_extent, 16);
|
||||
XFS_CHECK_STRUCT_SIZE(struct xlog_rec_header, 328);
|
||||
XFS_CHECK_STRUCT_SIZE(struct xlog_rec_ext_header, 260);
|
||||
|
||||
XFS_CHECK_OFFSET(struct xfs_bui_log_format, bui_extents, 16);
|
||||
XFS_CHECK_OFFSET(struct xfs_cui_log_format, cui_extents, 16);
|
||||
|
||||
@@ -1113,8 +1113,7 @@ xfs_refcount_still_have_space(
|
||||
* refcount continue update "error" has been injected.
|
||||
*/
|
||||
if (cur->bc_refc.nr_ops > 2 &&
|
||||
XFS_TEST_ERROR(false, cur->bc_mp,
|
||||
XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE))
|
||||
XFS_TEST_ERROR(cur->bc_mp, XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE))
|
||||
return false;
|
||||
|
||||
if (cur->bc_refc.nr_ops == 0)
|
||||
@@ -1398,7 +1397,7 @@ xfs_refcount_finish_one(
|
||||
|
||||
trace_xfs_refcount_deferred(mp, ri);
|
||||
|
||||
if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE))
|
||||
if (XFS_TEST_ERROR(mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE))
|
||||
return -EIO;
|
||||
|
||||
/*
|
||||
@@ -1511,7 +1510,7 @@ xfs_rtrefcount_finish_one(
|
||||
|
||||
trace_xfs_refcount_deferred(mp, ri);
|
||||
|
||||
if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE))
|
||||
if (XFS_TEST_ERROR(mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE))
|
||||
return -EIO;
|
||||
|
||||
/*
|
||||
|
||||
@@ -2690,7 +2690,7 @@ xfs_rmap_finish_one(
|
||||
|
||||
trace_xfs_rmap_deferred(mp, ri);
|
||||
|
||||
if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_RMAP_FINISH_ONE))
|
||||
if (XFS_TEST_ERROR(mp, XFS_ERRTAG_RMAP_FINISH_ONE))
|
||||
return -EIO;
|
||||
|
||||
/*
|
||||
|
||||
@@ -1067,7 +1067,7 @@ xfs_rtfree_extent(
|
||||
ASSERT(rbmip->i_itemp != NULL);
|
||||
xfs_assert_ilocked(rbmip, XFS_ILOCK_EXCL);
|
||||
|
||||
if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_FREE_EXTENT))
|
||||
if (XFS_TEST_ERROR(mp, XFS_ERRTAG_FREE_EXTENT))
|
||||
return -EIO;
|
||||
|
||||
error = xfs_rtcheck_alloc_range(&args, start, len);
|
||||
|
||||
@@ -142,8 +142,6 @@ xfs_sb_version_to_features(
|
||||
if (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) {
|
||||
if (sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT)
|
||||
features |= XFS_FEAT_LAZYSBCOUNT;
|
||||
if (sbp->sb_features2 & XFS_SB_VERSION2_ATTR2BIT)
|
||||
features |= XFS_FEAT_ATTR2;
|
||||
if (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT)
|
||||
features |= XFS_FEAT_PROJID32;
|
||||
if (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE)
|
||||
@@ -155,7 +153,7 @@ xfs_sb_version_to_features(
|
||||
|
||||
/* Always on V5 features */
|
||||
features |= XFS_FEAT_ALIGN | XFS_FEAT_LOGV2 | XFS_FEAT_EXTFLG |
|
||||
XFS_FEAT_LAZYSBCOUNT | XFS_FEAT_ATTR2 | XFS_FEAT_PROJID32 |
|
||||
XFS_FEAT_LAZYSBCOUNT | XFS_FEAT_PROJID32 |
|
||||
XFS_FEAT_V3INODES | XFS_FEAT_CRC | XFS_FEAT_PQUOTINO;
|
||||
|
||||
/* Optional V5 features */
|
||||
@@ -1524,7 +1522,8 @@ xfs_fs_geometry(
|
||||
geo->version = XFS_FSOP_GEOM_VERSION;
|
||||
geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK |
|
||||
XFS_FSOP_GEOM_FLAGS_DIRV2 |
|
||||
XFS_FSOP_GEOM_FLAGS_EXTFLG;
|
||||
XFS_FSOP_GEOM_FLAGS_EXTFLG |
|
||||
XFS_FSOP_GEOM_FLAGS_ATTR2;
|
||||
if (xfs_has_attr(mp))
|
||||
geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR;
|
||||
if (xfs_has_quota(mp))
|
||||
@@ -1537,8 +1536,6 @@ xfs_fs_geometry(
|
||||
geo->flags |= XFS_FSOP_GEOM_FLAGS_DIRV2CI;
|
||||
if (xfs_has_lazysbcount(mp))
|
||||
geo->flags |= XFS_FSOP_GEOM_FLAGS_LAZYSB;
|
||||
if (xfs_has_attr2(mp))
|
||||
geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR2;
|
||||
if (xfs_has_projid32(mp))
|
||||
geo->flags |= XFS_FSOP_GEOM_FLAGS_PROJID32;
|
||||
if (xfs_has_crc(mp))
|
||||
|
||||
@@ -29,6 +29,13 @@ struct xfs_rtgroup;
|
||||
#define XFS_OPEN_GC_ZONES 1U
|
||||
#define XFS_MIN_OPEN_ZONES (XFS_OPEN_GC_ZONES + 1U)
|
||||
|
||||
/*
|
||||
* For zoned devices that do not have a limit on the number of open zones, and
|
||||
* for regular devices using the zoned allocator, use the most common SMR disks
|
||||
* limit (128) as the default limit on the number of open zones.
|
||||
*/
|
||||
#define XFS_DEFAULT_MAX_OPEN_ZONES 128
|
||||
|
||||
bool xfs_zone_validate(struct blk_zone *zone, struct xfs_rtgroup *rtg,
|
||||
xfs_rgblock_t *write_pointer);
|
||||
|
||||
|
||||
@@ -300,7 +300,7 @@ xrep_cow_find_bad(
|
||||
* on the debugging knob, replace everything in the CoW fork.
|
||||
*/
|
||||
if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
|
||||
XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) {
|
||||
XFS_TEST_ERROR(sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) {
|
||||
error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock,
|
||||
xc->irec.br_blockcount);
|
||||
if (error)
|
||||
@@ -385,7 +385,7 @@ xrep_cow_find_bad_rt(
|
||||
* CoW fork and then scan for staging extents in the refcountbt.
|
||||
*/
|
||||
if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
|
||||
XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) {
|
||||
XFS_TEST_ERROR(sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) {
|
||||
error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock,
|
||||
xc->irec.br_blockcount);
|
||||
if (error)
|
||||
|
||||
@@ -79,7 +79,7 @@ xchk_metapath_cleanup(
|
||||
|
||||
if (mpath->dp_ilock_flags)
|
||||
xfs_iunlock(mpath->dp, mpath->dp_ilock_flags);
|
||||
kfree(mpath->path);
|
||||
kfree_const(mpath->path);
|
||||
}
|
||||
|
||||
/* Set up a metadir path scan. @path must be dynamically allocated. */
|
||||
@@ -98,13 +98,13 @@ xchk_setup_metapath_scan(
|
||||
|
||||
error = xchk_install_live_inode(sc, ip);
|
||||
if (error) {
|
||||
kfree(path);
|
||||
kfree_const(path);
|
||||
return error;
|
||||
}
|
||||
|
||||
mpath = kzalloc(sizeof(struct xchk_metapath), XCHK_GFP_FLAGS);
|
||||
if (!mpath) {
|
||||
kfree(path);
|
||||
kfree_const(path);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@@ -132,7 +132,7 @@ xchk_setup_metapath_rtdir(
|
||||
return -ENOENT;
|
||||
|
||||
return xchk_setup_metapath_scan(sc, sc->mp->m_metadirip,
|
||||
kasprintf(GFP_KERNEL, "rtgroups"), sc->mp->m_rtdirip);
|
||||
kstrdup_const("rtgroups", GFP_KERNEL), sc->mp->m_rtdirip);
|
||||
}
|
||||
|
||||
/* Scan a rtgroup inode under the /rtgroups directory. */
|
||||
@@ -179,7 +179,7 @@ xchk_setup_metapath_quotadir(
|
||||
return -ENOENT;
|
||||
|
||||
return xchk_setup_metapath_scan(sc, sc->mp->m_metadirip,
|
||||
kstrdup("quota", GFP_KERNEL), qi->qi_dirip);
|
||||
kstrdup_const("quota", GFP_KERNEL), qi->qi_dirip);
|
||||
}
|
||||
|
||||
/* Scan a quota inode under the /quota directory. */
|
||||
@@ -212,7 +212,7 @@ xchk_setup_metapath_dqinode(
|
||||
return -ENOENT;
|
||||
|
||||
return xchk_setup_metapath_scan(sc, qi->qi_dirip,
|
||||
kstrdup(xfs_dqinode_path(type), GFP_KERNEL), ip);
|
||||
kstrdup_const(xfs_dqinode_path(type), GFP_KERNEL), ip);
|
||||
}
|
||||
#else
|
||||
# define xchk_setup_metapath_quotadir(...) (-ENOENT)
|
||||
|
||||
@@ -27,6 +27,15 @@
|
||||
#include "scrub/repair.h"
|
||||
#include "scrub/newbt.h"
|
||||
|
||||
/*
|
||||
* This is the maximum number of deferred extent freeing item extents (EFIs)
|
||||
* that we'll attach to a transaction without rolling the transaction to avoid
|
||||
* overrunning a tr_itruncate reservation. The newbt code should reserve
|
||||
* exactly the correct number of blocks to rebuild the btree, so there should
|
||||
* not be any excess blocks to free when committing a new btree.
|
||||
*/
|
||||
#define XREP_MAX_ITRUNCATE_EFIS (128)
|
||||
|
||||
/*
|
||||
* Estimate proper slack values for a btree that's being reloaded.
|
||||
*
|
||||
|
||||
@@ -36,6 +36,12 @@
|
||||
#include "xfs_metafile.h"
|
||||
#include "xfs_rtgroup.h"
|
||||
#include "xfs_rtrmap_btree.h"
|
||||
#include "xfs_extfree_item.h"
|
||||
#include "xfs_rmap_item.h"
|
||||
#include "xfs_refcount_item.h"
|
||||
#include "xfs_buf_item.h"
|
||||
#include "xfs_bmap_item.h"
|
||||
#include "xfs_bmap_btree.h"
|
||||
#include "scrub/scrub.h"
|
||||
#include "scrub/common.h"
|
||||
#include "scrub/trace.h"
|
||||
@@ -91,21 +97,33 @@
|
||||
struct xreap_state {
|
||||
struct xfs_scrub *sc;
|
||||
|
||||
/* Reverse mapping owner and metadata reservation type. */
|
||||
const struct xfs_owner_info *oinfo;
|
||||
enum xfs_ag_resv_type resv;
|
||||
|
||||
/* If true, roll the transaction before reaping the next extent. */
|
||||
bool force_roll;
|
||||
|
||||
/* Number of deferred reaps attached to the current transaction. */
|
||||
unsigned int deferred;
|
||||
union {
|
||||
struct {
|
||||
/*
|
||||
* For AG blocks, this is reverse mapping owner and
|
||||
* metadata reservation type.
|
||||
*/
|
||||
const struct xfs_owner_info *oinfo;
|
||||
enum xfs_ag_resv_type resv;
|
||||
};
|
||||
struct {
|
||||
/* For file blocks, this is the inode and fork. */
|
||||
struct xfs_inode *ip;
|
||||
int whichfork;
|
||||
};
|
||||
};
|
||||
|
||||
/* Number of invalidated buffers logged to the current transaction. */
|
||||
unsigned int invalidated;
|
||||
unsigned int nr_binval;
|
||||
|
||||
/* Number of deferred reaps queued during the whole reap sequence. */
|
||||
unsigned long long total_deferred;
|
||||
/* Maximum number of buffers we can invalidate in a single tx. */
|
||||
unsigned int max_binval;
|
||||
|
||||
/* Number of deferred reaps attached to the current transaction. */
|
||||
unsigned int nr_deferred;
|
||||
|
||||
/* Maximum number of intents we can reap in a single transaction. */
|
||||
unsigned int max_deferred;
|
||||
};
|
||||
|
||||
/* Put a block back on the AGFL. */
|
||||
@@ -148,71 +166,79 @@ xreap_put_freelist(
|
||||
}
|
||||
|
||||
/* Are there any uncommitted reap operations? */
|
||||
static inline bool xreap_dirty(const struct xreap_state *rs)
|
||||
static inline bool xreap_is_dirty(const struct xreap_state *rs)
|
||||
{
|
||||
if (rs->force_roll)
|
||||
return true;
|
||||
if (rs->deferred)
|
||||
return true;
|
||||
if (rs->invalidated)
|
||||
return true;
|
||||
if (rs->total_deferred)
|
||||
return true;
|
||||
return false;
|
||||
return rs->nr_binval > 0 || rs->nr_deferred > 0;
|
||||
}
|
||||
|
||||
#define XREAP_MAX_BINVAL (2048)
|
||||
|
||||
/*
|
||||
* Decide if we want to roll the transaction after reaping an extent. We don't
|
||||
* want to overrun the transaction reservation, so we prohibit more than
|
||||
* 128 EFIs per transaction. For the same reason, we limit the number
|
||||
* of buffer invalidations to 2048.
|
||||
* Decide if we need to roll the transaction to clear out the the log
|
||||
* reservation that we allocated to buffer invalidations.
|
||||
*/
|
||||
static inline bool xreap_want_roll(const struct xreap_state *rs)
|
||||
static inline bool xreap_want_binval_roll(const struct xreap_state *rs)
|
||||
{
|
||||
if (rs->force_roll)
|
||||
return true;
|
||||
if (rs->deferred > XREP_MAX_ITRUNCATE_EFIS)
|
||||
return true;
|
||||
if (rs->invalidated > XREAP_MAX_BINVAL)
|
||||
return true;
|
||||
return false;
|
||||
return rs->nr_binval >= rs->max_binval;
|
||||
}
|
||||
|
||||
static inline void xreap_reset(struct xreap_state *rs)
|
||||
/* Reset the buffer invalidation count after rolling. */
|
||||
static inline void xreap_binval_reset(struct xreap_state *rs)
|
||||
{
|
||||
rs->total_deferred += rs->deferred;
|
||||
rs->deferred = 0;
|
||||
rs->invalidated = 0;
|
||||
rs->force_roll = false;
|
||||
rs->nr_binval = 0;
|
||||
}
|
||||
|
||||
#define XREAP_MAX_DEFER_CHAIN (2048)
|
||||
/*
|
||||
* Bump the number of invalidated buffers, and return true if we can continue,
|
||||
* or false if we need to roll the transaction.
|
||||
*/
|
||||
static inline bool xreap_inc_binval(struct xreap_state *rs)
|
||||
{
|
||||
rs->nr_binval++;
|
||||
return rs->nr_binval < rs->max_binval;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decide if we want to finish the deferred ops that are attached to the scrub
|
||||
* transaction. We don't want to queue huge chains of deferred ops because
|
||||
* that can consume a lot of log space and kernel memory. Hence we trigger a
|
||||
* xfs_defer_finish if there are more than 2048 deferred reap operations or the
|
||||
* caller did some real work.
|
||||
* xfs_defer_finish if there are too many deferred reap operations or we've run
|
||||
* out of space for invalidations.
|
||||
*/
|
||||
static inline bool
|
||||
xreap_want_defer_finish(const struct xreap_state *rs)
|
||||
static inline bool xreap_want_defer_finish(const struct xreap_state *rs)
|
||||
{
|
||||
if (rs->force_roll)
|
||||
return true;
|
||||
if (rs->total_deferred > XREAP_MAX_DEFER_CHAIN)
|
||||
return true;
|
||||
return false;
|
||||
return rs->nr_deferred >= rs->max_deferred;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reset the defer chain length and buffer invalidation count after finishing
|
||||
* items.
|
||||
*/
|
||||
static inline void xreap_defer_finish_reset(struct xreap_state *rs)
|
||||
{
|
||||
rs->total_deferred = 0;
|
||||
rs->deferred = 0;
|
||||
rs->invalidated = 0;
|
||||
rs->force_roll = false;
|
||||
rs->nr_deferred = 0;
|
||||
rs->nr_binval = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Bump the number of deferred extent reaps.
|
||||
*/
|
||||
static inline void xreap_inc_defer(struct xreap_state *rs)
|
||||
{
|
||||
rs->nr_deferred++;
|
||||
}
|
||||
|
||||
/* Force the caller to finish a deferred item chain. */
|
||||
static inline void xreap_force_defer_finish(struct xreap_state *rs)
|
||||
{
|
||||
rs->nr_deferred = rs->max_deferred;
|
||||
}
|
||||
|
||||
/* Maximum number of fsblocks that we might find in a buffer to invalidate. */
|
||||
static inline unsigned int
|
||||
xrep_binval_max_fsblocks(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
/* Remote xattr values are the largest buffers that we support. */
|
||||
return xfs_attr3_max_rmt_blocks(mp);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -224,12 +250,8 @@ xrep_bufscan_max_sectors(
|
||||
struct xfs_mount *mp,
|
||||
xfs_extlen_t fsblocks)
|
||||
{
|
||||
int max_fsbs;
|
||||
|
||||
/* Remote xattr values are the largest buffers that we support. */
|
||||
max_fsbs = xfs_attr3_max_rmt_blocks(mp);
|
||||
|
||||
return XFS_FSB_TO_BB(mp, min_t(xfs_extlen_t, fsblocks, max_fsbs));
|
||||
return XFS_FSB_TO_BB(mp, min_t(xfs_extlen_t, fsblocks,
|
||||
xrep_binval_max_fsblocks(mp)));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -297,14 +319,13 @@ xreap_agextent_binval(
|
||||
while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) {
|
||||
xfs_trans_bjoin(sc->tp, bp);
|
||||
xfs_trans_binval(sc->tp, bp);
|
||||
rs->invalidated++;
|
||||
|
||||
/*
|
||||
* Stop invalidating if we've hit the limit; we should
|
||||
* still have enough reservation left to free however
|
||||
* far we've gotten.
|
||||
*/
|
||||
if (rs->invalidated > XREAP_MAX_BINVAL) {
|
||||
if (!xreap_inc_binval(rs)) {
|
||||
*aglenp -= agbno_next - bno;
|
||||
goto out;
|
||||
}
|
||||
@@ -416,21 +437,23 @@ xreap_agextent_iter(
|
||||
trace_xreap_dispose_unmap_extent(pag_group(sc->sa.pag), agbno,
|
||||
*aglenp);
|
||||
|
||||
rs->force_roll = true;
|
||||
|
||||
if (rs->oinfo == &XFS_RMAP_OINFO_COW) {
|
||||
/*
|
||||
* If we're unmapping CoW staging extents, remove the
|
||||
* t0: Unmapping CoW staging extents, remove the
|
||||
* records from the refcountbt, which will remove the
|
||||
* rmap record as well.
|
||||
*/
|
||||
xfs_refcount_free_cow_extent(sc->tp, false, fsbno,
|
||||
*aglenp);
|
||||
xreap_inc_defer(rs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return xfs_rmap_free(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno,
|
||||
*aglenp, rs->oinfo);
|
||||
/* t1: unmap crosslinked metadata blocks */
|
||||
xfs_rmap_free_extent(sc->tp, false, fsbno, *aglenp,
|
||||
rs->oinfo->oi_owner);
|
||||
xreap_inc_defer(rs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
trace_xreap_dispose_free_extent(pag_group(sc->sa.pag), agbno, *aglenp);
|
||||
@@ -443,12 +466,12 @@ xreap_agextent_iter(
|
||||
*/
|
||||
xreap_agextent_binval(rs, agbno, aglenp);
|
||||
if (*aglenp == 0) {
|
||||
ASSERT(xreap_want_roll(rs));
|
||||
ASSERT(xreap_want_binval_roll(rs));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we're getting rid of CoW staging extents, use deferred work items
|
||||
* t2: To get rid of CoW staging extents, use deferred work items
|
||||
* to remove the refcountbt records (which removes the rmap records)
|
||||
* and free the extent. We're not worried about the system going down
|
||||
* here because log recovery walks the refcount btree to clean out the
|
||||
@@ -463,23 +486,23 @@ xreap_agextent_iter(
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
rs->force_roll = true;
|
||||
xreap_inc_defer(rs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Put blocks back on the AGFL one at a time. */
|
||||
/* t3: Put blocks back on the AGFL one at a time. */
|
||||
if (rs->resv == XFS_AG_RESV_AGFL) {
|
||||
ASSERT(*aglenp == 1);
|
||||
error = xreap_put_freelist(sc, agbno);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
rs->force_roll = true;
|
||||
xreap_force_defer_finish(rs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use deferred frees to get rid of the old btree blocks to try to
|
||||
* t4: Use deferred frees to get rid of the old btree blocks to try to
|
||||
* minimize the window in which we could crash and lose the old blocks.
|
||||
* Add a defer ops barrier every other extent to avoid stressing the
|
||||
* system with large EFIs.
|
||||
@@ -489,12 +512,194 @@ xreap_agextent_iter(
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
rs->deferred++;
|
||||
if (rs->deferred % 2 == 0)
|
||||
xreap_inc_defer(rs);
|
||||
if (rs->nr_deferred % 2 == 0)
|
||||
xfs_defer_add_barrier(sc->tp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Configure the deferral and invalidation limits */
|
||||
static inline void
|
||||
xreap_configure_limits(
|
||||
struct xreap_state *rs,
|
||||
unsigned int fixed_overhead,
|
||||
unsigned int variable_overhead,
|
||||
unsigned int per_intent,
|
||||
unsigned int per_binval)
|
||||
{
|
||||
struct xfs_scrub *sc = rs->sc;
|
||||
unsigned int res = sc->tp->t_log_res - fixed_overhead;
|
||||
|
||||
/* Don't underflow the reservation */
|
||||
if (sc->tp->t_log_res < (fixed_overhead + variable_overhead)) {
|
||||
ASSERT(sc->tp->t_log_res >=
|
||||
(fixed_overhead + variable_overhead));
|
||||
xfs_force_shutdown(sc->mp, SHUTDOWN_CORRUPT_INCORE);
|
||||
return;
|
||||
}
|
||||
|
||||
rs->max_deferred = per_intent ? res / variable_overhead : 0;
|
||||
res -= rs->max_deferred * per_intent;
|
||||
rs->max_binval = per_binval ? res / per_binval : 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute the maximum number of intent items that reaping can attach to the
|
||||
* scrub transaction given the worst case log overhead of the intent items
|
||||
* needed to reap a single per-AG space extent. This is not for freeing CoW
|
||||
* staging extents.
|
||||
*/
|
||||
STATIC void
|
||||
xreap_configure_agextent_limits(
|
||||
struct xreap_state *rs)
|
||||
{
|
||||
struct xfs_scrub *sc = rs->sc;
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
|
||||
/*
|
||||
* In the worst case, relogging an intent item causes both an intent
|
||||
* item and a done item to be attached to a transaction for each extent
|
||||
* that we'd like to process.
|
||||
*/
|
||||
const unsigned int efi = xfs_efi_log_space(1) +
|
||||
xfs_efd_log_space(1);
|
||||
const unsigned int rui = xfs_rui_log_space(1) +
|
||||
xfs_rud_log_space();
|
||||
|
||||
/*
|
||||
* Various things can happen when reaping non-CoW metadata blocks:
|
||||
*
|
||||
* t1: Unmapping crosslinked metadata blocks: deferred removal of rmap
|
||||
* record.
|
||||
*
|
||||
* t3: Freeing to AGFL: roll and finish deferred items for every block.
|
||||
* Limits here do not matter.
|
||||
*
|
||||
* t4: Freeing metadata blocks: deferred freeing of the space, which
|
||||
* also removes the rmap record.
|
||||
*
|
||||
* For simplicity, we'll use the worst-case intents size to determine
|
||||
* the maximum number of deferred extents before we have to finish the
|
||||
* whole chain. If we're trying to reap a btree larger than this size,
|
||||
* a crash midway through reaping can result in leaked blocks.
|
||||
*/
|
||||
const unsigned int t1 = rui;
|
||||
const unsigned int t4 = rui + efi;
|
||||
const unsigned int per_intent = max(t1, t4);
|
||||
|
||||
/*
|
||||
* For each transaction in a reap chain, we must be able to take one
|
||||
* step in the defer item chain, which should only consist of EFI or
|
||||
* RUI items.
|
||||
*/
|
||||
const unsigned int f1 = xfs_calc_finish_efi_reservation(mp, 1);
|
||||
const unsigned int f2 = xfs_calc_finish_rui_reservation(mp, 1);
|
||||
const unsigned int step_size = max(f1, f2);
|
||||
|
||||
/* Largest buffer size (in fsblocks) that can be invalidated. */
|
||||
const unsigned int max_binval = xrep_binval_max_fsblocks(mp);
|
||||
|
||||
/* Maximum overhead of invalidating one buffer. */
|
||||
const unsigned int per_binval =
|
||||
xfs_buf_inval_log_space(1, XFS_B_TO_FSBT(mp, max_binval));
|
||||
|
||||
/*
|
||||
* For each transaction in a reap chain, we can delete some number of
|
||||
* extents and invalidate some number of blocks. We assume that btree
|
||||
* blocks aren't usually contiguous; and that scrub likely pulled all
|
||||
* the buffers into memory. From these assumptions, set the maximum
|
||||
* number of deferrals we can queue before flushing the defer chain,
|
||||
* and the number of invalidations we can queue before rolling to a
|
||||
* clean transaction (and possibly relogging some of the deferrals) to
|
||||
* the same quantity.
|
||||
*/
|
||||
const unsigned int variable_overhead = per_intent + per_binval;
|
||||
|
||||
xreap_configure_limits(rs, step_size, variable_overhead, per_intent,
|
||||
per_binval);
|
||||
|
||||
trace_xreap_agextent_limits(sc->tp, per_binval, rs->max_binval,
|
||||
step_size, per_intent, rs->max_deferred);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute the maximum number of intent items that reaping can attach to the
|
||||
* scrub transaction given the worst case log overhead of the intent items
|
||||
* needed to reap a single CoW staging extent. This is not for freeing
|
||||
* metadata blocks.
|
||||
*/
|
||||
STATIC void
|
||||
xreap_configure_agcow_limits(
|
||||
struct xreap_state *rs)
|
||||
{
|
||||
struct xfs_scrub *sc = rs->sc;
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
|
||||
/*
|
||||
* In the worst case, relogging an intent item causes both an intent
|
||||
* item and a done item to be attached to a transaction for each extent
|
||||
* that we'd like to process.
|
||||
*/
|
||||
const unsigned int efi = xfs_efi_log_space(1) +
|
||||
xfs_efd_log_space(1);
|
||||
const unsigned int rui = xfs_rui_log_space(1) +
|
||||
xfs_rud_log_space();
|
||||
const unsigned int cui = xfs_cui_log_space(1) +
|
||||
xfs_cud_log_space();
|
||||
|
||||
/*
|
||||
* Various things can happen when reaping non-CoW metadata blocks:
|
||||
*
|
||||
* t0: Unmapping crosslinked CoW blocks: deferred removal of refcount
|
||||
* record, which defers removal of rmap record
|
||||
*
|
||||
* t2: Freeing CoW blocks: deferred removal of refcount record, which
|
||||
* defers removal of rmap record; and deferred removal of the space
|
||||
*
|
||||
* For simplicity, we'll use the worst-case intents size to determine
|
||||
* the maximum number of deferred extents before we have to finish the
|
||||
* whole chain. If we're trying to reap a btree larger than this size,
|
||||
* a crash midway through reaping can result in leaked blocks.
|
||||
*/
|
||||
const unsigned int t0 = cui + rui;
|
||||
const unsigned int t2 = cui + rui + efi;
|
||||
const unsigned int per_intent = max(t0, t2);
|
||||
|
||||
/*
|
||||
* For each transaction in a reap chain, we must be able to take one
|
||||
* step in the defer item chain, which should only consist of CUI, EFI,
|
||||
* or RUI items.
|
||||
*/
|
||||
const unsigned int f1 = xfs_calc_finish_efi_reservation(mp, 1);
|
||||
const unsigned int f2 = xfs_calc_finish_rui_reservation(mp, 1);
|
||||
const unsigned int f3 = xfs_calc_finish_cui_reservation(mp, 1);
|
||||
const unsigned int step_size = max3(f1, f2, f3);
|
||||
|
||||
/* Largest buffer size (in fsblocks) that can be invalidated. */
|
||||
const unsigned int max_binval = xrep_binval_max_fsblocks(mp);
|
||||
|
||||
/* Overhead of invalidating one buffer */
|
||||
const unsigned int per_binval =
|
||||
xfs_buf_inval_log_space(1, XFS_B_TO_FSBT(mp, max_binval));
|
||||
|
||||
/*
|
||||
* For each transaction in a reap chain, we can delete some number of
|
||||
* extents and invalidate some number of blocks. We assume that CoW
|
||||
* staging extents are usually more than 1 fsblock, and that there
|
||||
* shouldn't be any buffers for those blocks. From the assumptions,
|
||||
* set the number of deferrals to use as much of the reservation as
|
||||
* it can, but leave space to invalidate 1/8th that number of buffers.
|
||||
*/
|
||||
const unsigned int variable_overhead = per_intent +
|
||||
(per_binval / 8);
|
||||
|
||||
xreap_configure_limits(rs, step_size, variable_overhead, per_intent,
|
||||
per_binval);
|
||||
|
||||
trace_xreap_agcow_limits(sc->tp, per_binval, rs->max_binval, step_size,
|
||||
per_intent, rs->max_deferred);
|
||||
}
|
||||
|
||||
/*
|
||||
* Break an AG metadata extent into sub-extents by fate (crosslinked, not
|
||||
* crosslinked), and dispose of each sub-extent separately.
|
||||
@@ -531,11 +736,11 @@ xreap_agmeta_extent(
|
||||
if (error)
|
||||
return error;
|
||||
xreap_defer_finish_reset(rs);
|
||||
} else if (xreap_want_roll(rs)) {
|
||||
} else if (xreap_want_binval_roll(rs)) {
|
||||
error = xrep_roll_ag_trans(sc);
|
||||
if (error)
|
||||
return error;
|
||||
xreap_reset(rs);
|
||||
xreap_binval_reset(rs);
|
||||
}
|
||||
|
||||
agbno += aglen;
|
||||
@@ -562,11 +767,12 @@ xrep_reap_agblocks(
|
||||
ASSERT(xfs_has_rmapbt(sc->mp));
|
||||
ASSERT(sc->ip == NULL);
|
||||
|
||||
xreap_configure_agextent_limits(&rs);
|
||||
error = xagb_bitmap_walk(bitmap, xreap_agmeta_extent, &rs);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (xreap_dirty(&rs))
|
||||
if (xreap_is_dirty(&rs))
|
||||
return xrep_defer_finish(sc);
|
||||
|
||||
return 0;
|
||||
@@ -628,7 +834,7 @@ xreap_fsmeta_extent(
|
||||
if (error)
|
||||
goto out_agf;
|
||||
xreap_defer_finish_reset(rs);
|
||||
} else if (xreap_want_roll(rs)) {
|
||||
} else if (xreap_want_binval_roll(rs)) {
|
||||
/*
|
||||
* Hold the AGF buffer across the transaction roll so
|
||||
* that we don't have to reattach it to the scrub
|
||||
@@ -639,7 +845,7 @@ xreap_fsmeta_extent(
|
||||
xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
|
||||
if (error)
|
||||
goto out_agf;
|
||||
xreap_reset(rs);
|
||||
xreap_binval_reset(rs);
|
||||
}
|
||||
|
||||
agbno += aglen;
|
||||
@@ -674,11 +880,15 @@ xrep_reap_fsblocks(
|
||||
ASSERT(xfs_has_rmapbt(sc->mp));
|
||||
ASSERT(sc->ip != NULL);
|
||||
|
||||
if (oinfo == &XFS_RMAP_OINFO_COW)
|
||||
xreap_configure_agcow_limits(&rs);
|
||||
else
|
||||
xreap_configure_agextent_limits(&rs);
|
||||
error = xfsb_bitmap_walk(bitmap, xreap_fsmeta_extent, &rs);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (xreap_dirty(&rs))
|
||||
if (xreap_is_dirty(&rs))
|
||||
return xrep_defer_finish(sc);
|
||||
|
||||
return 0;
|
||||
@@ -770,7 +980,7 @@ xreap_rgextent_iter(
|
||||
rtbno = xfs_rgbno_to_rtb(sc->sr.rtg, rgbno);
|
||||
|
||||
/*
|
||||
* If there are other rmappings, this block is cross linked and must
|
||||
* t1: There are other rmappings; this block is cross linked and must
|
||||
* not be freed. Remove the forward and reverse mapping and move on.
|
||||
*/
|
||||
if (crosslinked) {
|
||||
@@ -778,14 +988,14 @@ xreap_rgextent_iter(
|
||||
*rglenp);
|
||||
|
||||
xfs_refcount_free_cow_extent(sc->tp, true, rtbno, *rglenp);
|
||||
rs->deferred++;
|
||||
xreap_inc_defer(rs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
trace_xreap_dispose_free_extent(rtg_group(sc->sr.rtg), rgbno, *rglenp);
|
||||
|
||||
/*
|
||||
* The CoW staging extent is not crosslinked. Use deferred work items
|
||||
* t2: The CoW staging extent is not crosslinked. Use deferred work
|
||||
* to remove the refcountbt records (which removes the rmap records)
|
||||
* and free the extent. We're not worried about the system going down
|
||||
* here because log recovery walks the refcount btree to clean out the
|
||||
@@ -799,10 +1009,73 @@ xreap_rgextent_iter(
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
rs->deferred++;
|
||||
xreap_inc_defer(rs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute the maximum number of intent items that reaping can attach to the
|
||||
* scrub transaction given the worst case log overhead of the intent items
|
||||
* needed to reap a single CoW staging extent. This is not for freeing
|
||||
* metadata blocks.
|
||||
*/
|
||||
STATIC void
|
||||
xreap_configure_rgcow_limits(
|
||||
struct xreap_state *rs)
|
||||
{
|
||||
struct xfs_scrub *sc = rs->sc;
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
|
||||
/*
|
||||
* In the worst case, relogging an intent item causes both an intent
|
||||
* item and a done item to be attached to a transaction for each extent
|
||||
* that we'd like to process.
|
||||
*/
|
||||
const unsigned int efi = xfs_efi_log_space(1) +
|
||||
xfs_efd_log_space(1);
|
||||
const unsigned int rui = xfs_rui_log_space(1) +
|
||||
xfs_rud_log_space();
|
||||
const unsigned int cui = xfs_cui_log_space(1) +
|
||||
xfs_cud_log_space();
|
||||
|
||||
/*
|
||||
* Various things can happen when reaping non-CoW metadata blocks:
|
||||
*
|
||||
* t1: Unmapping crosslinked CoW blocks: deferred removal of refcount
|
||||
* record, which defers removal of rmap record
|
||||
*
|
||||
* t2: Freeing CoW blocks: deferred removal of refcount record, which
|
||||
* defers removal of rmap record; and deferred removal of the space
|
||||
*
|
||||
* For simplicity, we'll use the worst-case intents size to determine
|
||||
* the maximum number of deferred extents before we have to finish the
|
||||
* whole chain. If we're trying to reap a btree larger than this size,
|
||||
* a crash midway through reaping can result in leaked blocks.
|
||||
*/
|
||||
const unsigned int t1 = cui + rui;
|
||||
const unsigned int t2 = cui + rui + efi;
|
||||
const unsigned int per_intent = max(t1, t2);
|
||||
|
||||
/*
|
||||
* For each transaction in a reap chain, we must be able to take one
|
||||
* step in the defer item chain, which should only consist of CUI, EFI,
|
||||
* or RUI items.
|
||||
*/
|
||||
const unsigned int f1 = xfs_calc_finish_rt_efi_reservation(mp, 1);
|
||||
const unsigned int f2 = xfs_calc_finish_rt_rui_reservation(mp, 1);
|
||||
const unsigned int f3 = xfs_calc_finish_rt_cui_reservation(mp, 1);
|
||||
const unsigned int step_size = max3(f1, f2, f3);
|
||||
|
||||
/*
|
||||
* The only buffer for the rt device is the rtgroup super, so we don't
|
||||
* need to save space for buffer invalidations.
|
||||
*/
|
||||
xreap_configure_limits(rs, step_size, per_intent, per_intent, 0);
|
||||
|
||||
trace_xreap_rgcow_limits(sc->tp, 0, 0, step_size, per_intent,
|
||||
rs->max_deferred);
|
||||
}
|
||||
|
||||
#define XREAP_RTGLOCK_ALL (XFS_RTGLOCK_BITMAP | \
|
||||
XFS_RTGLOCK_RMAP | \
|
||||
XFS_RTGLOCK_REFCOUNT)
|
||||
@@ -855,11 +1128,11 @@ xreap_rtmeta_extent(
|
||||
if (error)
|
||||
goto out_unlock;
|
||||
xreap_defer_finish_reset(rs);
|
||||
} else if (xreap_want_roll(rs)) {
|
||||
} else if (xreap_want_binval_roll(rs)) {
|
||||
error = xfs_trans_roll_inode(&sc->tp, sc->ip);
|
||||
if (error)
|
||||
goto out_unlock;
|
||||
xreap_reset(rs);
|
||||
xreap_binval_reset(rs);
|
||||
}
|
||||
|
||||
rgbno += rglen;
|
||||
@@ -891,12 +1164,14 @@ xrep_reap_rtblocks(
|
||||
|
||||
ASSERT(xfs_has_rmapbt(sc->mp));
|
||||
ASSERT(sc->ip != NULL);
|
||||
ASSERT(oinfo == &XFS_RMAP_OINFO_COW);
|
||||
|
||||
xreap_configure_rgcow_limits(&rs);
|
||||
error = xrtb_bitmap_walk(bitmap, xreap_rtmeta_extent, &rs);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (xreap_dirty(&rs))
|
||||
if (xreap_is_dirty(&rs))
|
||||
return xrep_defer_finish(sc);
|
||||
|
||||
return 0;
|
||||
@@ -929,13 +1204,13 @@ xrep_reap_metadir_fsblocks(
|
||||
ASSERT(sc->ip != NULL);
|
||||
ASSERT(xfs_is_metadir_inode(sc->ip));
|
||||
|
||||
xreap_configure_agextent_limits(&rs);
|
||||
xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, XFS_DATA_FORK);
|
||||
|
||||
error = xfsb_bitmap_walk(bitmap, xreap_fsmeta_extent, &rs);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (xreap_dirty(&rs)) {
|
||||
if (xreap_is_dirty(&rs)) {
|
||||
error = xrep_defer_finish(sc);
|
||||
if (error)
|
||||
return error;
|
||||
@@ -955,13 +1230,12 @@ xrep_reap_metadir_fsblocks(
|
||||
*/
|
||||
STATIC int
|
||||
xreap_bmapi_select(
|
||||
struct xfs_scrub *sc,
|
||||
struct xfs_inode *ip,
|
||||
int whichfork,
|
||||
struct xreap_state *rs,
|
||||
struct xfs_bmbt_irec *imap,
|
||||
bool *crosslinked)
|
||||
{
|
||||
struct xfs_owner_info oinfo;
|
||||
struct xfs_scrub *sc = rs->sc;
|
||||
struct xfs_btree_cur *cur;
|
||||
xfs_filblks_t len = 1;
|
||||
xfs_agblock_t bno;
|
||||
@@ -975,7 +1249,8 @@ xreap_bmapi_select(
|
||||
cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
|
||||
sc->sa.pag);
|
||||
|
||||
xfs_rmap_ino_owner(&oinfo, ip->i_ino, whichfork, imap->br_startoff);
|
||||
xfs_rmap_ino_owner(&oinfo, rs->ip->i_ino, rs->whichfork,
|
||||
imap->br_startoff);
|
||||
error = xfs_rmap_has_other_keys(cur, agbno, 1, &oinfo, crosslinked);
|
||||
if (error)
|
||||
goto out_cur;
|
||||
@@ -1038,21 +1313,19 @@ xreap_buf_loggable(
|
||||
*/
|
||||
STATIC int
|
||||
xreap_bmapi_binval(
|
||||
struct xfs_scrub *sc,
|
||||
struct xfs_inode *ip,
|
||||
int whichfork,
|
||||
struct xreap_state *rs,
|
||||
struct xfs_bmbt_irec *imap)
|
||||
{
|
||||
struct xfs_scrub *sc = rs->sc;
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
struct xfs_perag *pag = sc->sa.pag;
|
||||
int bmap_flags = xfs_bmapi_aflag(whichfork);
|
||||
int bmap_flags = xfs_bmapi_aflag(rs->whichfork);
|
||||
xfs_fileoff_t off;
|
||||
xfs_fileoff_t max_off;
|
||||
xfs_extlen_t scan_blocks;
|
||||
xfs_agblock_t bno;
|
||||
xfs_agblock_t agbno;
|
||||
xfs_agblock_t agbno_next;
|
||||
unsigned int invalidated = 0;
|
||||
int error;
|
||||
|
||||
/*
|
||||
@@ -1079,7 +1352,7 @@ xreap_bmapi_binval(
|
||||
struct xfs_bmbt_irec hmap;
|
||||
int nhmaps = 1;
|
||||
|
||||
error = xfs_bmapi_read(ip, off, max_off - off, &hmap,
|
||||
error = xfs_bmapi_read(rs->ip, off, max_off - off, &hmap,
|
||||
&nhmaps, bmap_flags);
|
||||
if (error)
|
||||
return error;
|
||||
@@ -1120,14 +1393,13 @@ xreap_bmapi_binval(
|
||||
xfs_buf_stale(bp);
|
||||
xfs_buf_relse(bp);
|
||||
}
|
||||
invalidated++;
|
||||
|
||||
/*
|
||||
* Stop invalidating if we've hit the limit; we should
|
||||
* still have enough reservation left to free however
|
||||
* much of the mapping we've seen so far.
|
||||
* far we've gotten.
|
||||
*/
|
||||
if (invalidated > XREAP_MAX_BINVAL) {
|
||||
if (!xreap_inc_binval(rs)) {
|
||||
imap->br_blockcount = agbno_next - bno;
|
||||
goto out;
|
||||
}
|
||||
@@ -1149,12 +1421,11 @@ out:
|
||||
*/
|
||||
STATIC int
|
||||
xrep_reap_bmapi_iter(
|
||||
struct xfs_scrub *sc,
|
||||
struct xfs_inode *ip,
|
||||
int whichfork,
|
||||
struct xreap_state *rs,
|
||||
struct xfs_bmbt_irec *imap,
|
||||
bool crosslinked)
|
||||
{
|
||||
struct xfs_scrub *sc = rs->sc;
|
||||
int error;
|
||||
|
||||
if (crosslinked) {
|
||||
@@ -1171,14 +1442,14 @@ xrep_reap_bmapi_iter(
|
||||
imap->br_blockcount);
|
||||
|
||||
/*
|
||||
* Schedule removal of the mapping from the fork. We use
|
||||
* t0: Schedule removal of the mapping from the fork. We use
|
||||
* deferred log intents in this function to control the exact
|
||||
* sequence of metadata updates.
|
||||
*/
|
||||
xfs_bmap_unmap_extent(sc->tp, ip, whichfork, imap);
|
||||
xfs_trans_mod_dquot_byino(sc->tp, ip, XFS_TRANS_DQ_BCOUNT,
|
||||
xfs_bmap_unmap_extent(sc->tp, rs->ip, rs->whichfork, imap);
|
||||
xfs_trans_mod_dquot_byino(sc->tp, rs->ip, XFS_TRANS_DQ_BCOUNT,
|
||||
-(int64_t)imap->br_blockcount);
|
||||
xfs_rmap_unmap_extent(sc->tp, ip, whichfork, imap);
|
||||
xfs_rmap_unmap_extent(sc->tp, rs->ip, rs->whichfork, imap);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1199,41 +1470,139 @@ xrep_reap_bmapi_iter(
|
||||
* transaction is full of logged buffer invalidations, so we need to
|
||||
* return early so that we can roll and retry.
|
||||
*/
|
||||
error = xreap_bmapi_binval(sc, ip, whichfork, imap);
|
||||
error = xreap_bmapi_binval(rs, imap);
|
||||
if (error || imap->br_blockcount == 0)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* Schedule removal of the mapping from the fork. We use deferred log
|
||||
* intents in this function to control the exact sequence of metadata
|
||||
* t1: Schedule removal of the mapping from the fork. We use deferred
|
||||
* work in this function to control the exact sequence of metadata
|
||||
* updates.
|
||||
*/
|
||||
xfs_bmap_unmap_extent(sc->tp, ip, whichfork, imap);
|
||||
xfs_trans_mod_dquot_byino(sc->tp, ip, XFS_TRANS_DQ_BCOUNT,
|
||||
xfs_bmap_unmap_extent(sc->tp, rs->ip, rs->whichfork, imap);
|
||||
xfs_trans_mod_dquot_byino(sc->tp, rs->ip, XFS_TRANS_DQ_BCOUNT,
|
||||
-(int64_t)imap->br_blockcount);
|
||||
return xfs_free_extent_later(sc->tp, imap->br_startblock,
|
||||
imap->br_blockcount, NULL, XFS_AG_RESV_NONE,
|
||||
XFS_FREE_EXTENT_SKIP_DISCARD);
|
||||
}
|
||||
|
||||
/* Compute the maximum mapcount of a file buffer. */
|
||||
static unsigned int
|
||||
xreap_bmapi_binval_mapcount(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
/* directory blocks can span multiple fsblocks and be discontiguous */
|
||||
if (sc->sm->sm_type == XFS_SCRUB_TYPE_DIR)
|
||||
return sc->mp->m_dir_geo->fsbcount;
|
||||
|
||||
/* all other file xattr/symlink blocks must be contiguous */
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Compute the maximum block size of a file buffer. */
|
||||
static unsigned int
|
||||
xreap_bmapi_binval_blocksize(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
switch (sc->sm->sm_type) {
|
||||
case XFS_SCRUB_TYPE_DIR:
|
||||
return sc->mp->m_dir_geo->blksize;
|
||||
case XFS_SCRUB_TYPE_XATTR:
|
||||
case XFS_SCRUB_TYPE_PARENT:
|
||||
/*
|
||||
* The xattr structure itself consists of single fsblocks, but
|
||||
* there could be remote xattr blocks to invalidate.
|
||||
*/
|
||||
return XFS_XATTR_SIZE_MAX;
|
||||
}
|
||||
|
||||
/* everything else is a single block */
|
||||
return sc->mp->m_sb.sb_blocksize;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute the maximum number of buffer invalidations that we can do while
|
||||
* reaping a single extent from a file fork.
|
||||
*/
|
||||
STATIC void
|
||||
xreap_configure_bmapi_limits(
|
||||
struct xreap_state *rs)
|
||||
{
|
||||
struct xfs_scrub *sc = rs->sc;
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
|
||||
/* overhead of invalidating a buffer */
|
||||
const unsigned int per_binval =
|
||||
xfs_buf_inval_log_space(xreap_bmapi_binval_mapcount(sc),
|
||||
xreap_bmapi_binval_blocksize(sc));
|
||||
|
||||
/*
|
||||
* In the worst case, relogging an intent item causes both an intent
|
||||
* item and a done item to be attached to a transaction for each extent
|
||||
* that we'd like to process.
|
||||
*/
|
||||
const unsigned int efi = xfs_efi_log_space(1) +
|
||||
xfs_efd_log_space(1);
|
||||
const unsigned int rui = xfs_rui_log_space(1) +
|
||||
xfs_rud_log_space();
|
||||
const unsigned int bui = xfs_bui_log_space(1) +
|
||||
xfs_bud_log_space();
|
||||
|
||||
/*
|
||||
* t1: Unmapping crosslinked file data blocks: one bmap deletion,
|
||||
* possibly an EFI for underfilled bmbt blocks, and an rmap deletion.
|
||||
*
|
||||
* t2: Freeing freeing file data blocks: one bmap deletion, possibly an
|
||||
* EFI for underfilled bmbt blocks, and another EFI for the space
|
||||
* itself.
|
||||
*/
|
||||
const unsigned int t1 = (bui + efi) + rui;
|
||||
const unsigned int t2 = (bui + efi) + efi;
|
||||
const unsigned int per_intent = max(t1, t2);
|
||||
|
||||
/*
|
||||
* For each transaction in a reap chain, we must be able to take one
|
||||
* step in the defer item chain, which should only consist of CUI, EFI,
|
||||
* or RUI items.
|
||||
*/
|
||||
const unsigned int f1 = xfs_calc_finish_efi_reservation(mp, 1);
|
||||
const unsigned int f2 = xfs_calc_finish_rui_reservation(mp, 1);
|
||||
const unsigned int f3 = xfs_calc_finish_bui_reservation(mp, 1);
|
||||
const unsigned int step_size = max3(f1, f2, f3);
|
||||
|
||||
/*
|
||||
* Each call to xreap_ifork_extent starts with a clean transaction and
|
||||
* operates on a single mapping by creating a chain of log intent items
|
||||
* for that mapping. We need to leave enough reservation in the
|
||||
* transaction to log btree buffer and inode updates for each step in
|
||||
* the chain, and to relog the log intents.
|
||||
*/
|
||||
const unsigned int per_extent_res = per_intent + step_size;
|
||||
|
||||
xreap_configure_limits(rs, per_extent_res, per_binval, 0, per_binval);
|
||||
|
||||
trace_xreap_bmapi_limits(sc->tp, per_binval, rs->max_binval,
|
||||
step_size, per_intent, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Dispose of as much of this file extent as we can. Upon successful return,
|
||||
* the imap will reflect the mapping that was removed from the fork.
|
||||
*/
|
||||
STATIC int
|
||||
xreap_ifork_extent(
|
||||
struct xfs_scrub *sc,
|
||||
struct xfs_inode *ip,
|
||||
int whichfork,
|
||||
struct xreap_state *rs,
|
||||
struct xfs_bmbt_irec *imap)
|
||||
{
|
||||
struct xfs_scrub *sc = rs->sc;
|
||||
xfs_agnumber_t agno;
|
||||
bool crosslinked;
|
||||
int error;
|
||||
|
||||
ASSERT(sc->sa.pag == NULL);
|
||||
|
||||
trace_xreap_ifork_extent(sc, ip, whichfork, imap);
|
||||
trace_xreap_ifork_extent(sc, rs->ip, rs->whichfork, imap);
|
||||
|
||||
agno = XFS_FSB_TO_AGNO(sc->mp, imap->br_startblock);
|
||||
sc->sa.pag = xfs_perag_get(sc->mp, agno);
|
||||
@@ -1248,11 +1617,11 @@ xreap_ifork_extent(
|
||||
* Decide the fate of the blocks at the beginning of the mapping, then
|
||||
* update the mapping to use it with the unmap calls.
|
||||
*/
|
||||
error = xreap_bmapi_select(sc, ip, whichfork, imap, &crosslinked);
|
||||
error = xreap_bmapi_select(rs, imap, &crosslinked);
|
||||
if (error)
|
||||
goto out_agf;
|
||||
|
||||
error = xrep_reap_bmapi_iter(sc, ip, whichfork, imap, crosslinked);
|
||||
error = xrep_reap_bmapi_iter(rs, imap, crosslinked);
|
||||
if (error)
|
||||
goto out_agf;
|
||||
|
||||
@@ -1276,6 +1645,11 @@ xrep_reap_ifork(
|
||||
struct xfs_inode *ip,
|
||||
int whichfork)
|
||||
{
|
||||
struct xreap_state rs = {
|
||||
.sc = sc,
|
||||
.ip = ip,
|
||||
.whichfork = whichfork,
|
||||
};
|
||||
xfs_fileoff_t off = 0;
|
||||
int bmap_flags = xfs_bmapi_aflag(whichfork);
|
||||
int error;
|
||||
@@ -1284,6 +1658,7 @@ xrep_reap_ifork(
|
||||
ASSERT(ip == sc->ip || ip == sc->tempip);
|
||||
ASSERT(whichfork == XFS_ATTR_FORK || !XFS_IS_REALTIME_INODE(ip));
|
||||
|
||||
xreap_configure_bmapi_limits(&rs);
|
||||
while (off < XFS_MAX_FILEOFF) {
|
||||
struct xfs_bmbt_irec imap;
|
||||
int nimaps = 1;
|
||||
@@ -1303,13 +1678,14 @@ xrep_reap_ifork(
|
||||
* can in a single transaction.
|
||||
*/
|
||||
if (xfs_bmap_is_real_extent(&imap)) {
|
||||
error = xreap_ifork_extent(sc, ip, whichfork, &imap);
|
||||
error = xreap_ifork_extent(&rs, &imap);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = xfs_defer_finish(&sc->tp);
|
||||
if (error)
|
||||
return error;
|
||||
xreap_defer_finish_reset(&rs);
|
||||
}
|
||||
|
||||
off = imap.br_startoff + imap.br_blockcount;
|
||||
|
||||
@@ -1110,7 +1110,7 @@ xrep_will_attempt(
|
||||
return true;
|
||||
|
||||
/* Let debug users force us into the repair routines. */
|
||||
if (XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR))
|
||||
if (XFS_TEST_ERROR(sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR))
|
||||
return true;
|
||||
|
||||
/* Metadata is corrupt or failed cross-referencing. */
|
||||
|
||||
@@ -18,14 +18,6 @@ static inline int xrep_notsupported(struct xfs_scrub *sc)
|
||||
|
||||
#ifdef CONFIG_XFS_ONLINE_REPAIR
|
||||
|
||||
/*
|
||||
* This is the maximum number of deferred extent freeing item extents (EFIs)
|
||||
* that we'll attach to a transaction without rolling the transaction to avoid
|
||||
* overrunning a tr_itruncate reservation.
|
||||
*/
|
||||
#define XREP_MAX_ITRUNCATE_EFIS (128)
|
||||
|
||||
|
||||
/* Repair helpers */
|
||||
|
||||
int xrep_attempt(struct xfs_scrub *sc, struct xchk_stats_run *run);
|
||||
|
||||
@@ -185,7 +185,7 @@ xrep_symlink_salvage_inline(
|
||||
return 0;
|
||||
|
||||
nr = min(XFS_SYMLINK_MAXLEN, xfs_inode_data_fork_size(ip));
|
||||
strncpy(target_buf, ifp->if_data, nr);
|
||||
memcpy(target_buf, ifp->if_data, nr);
|
||||
return nr;
|
||||
}
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
#include "xfs_parent.h"
|
||||
#include "xfs_metafile.h"
|
||||
#include "xfs_rtgroup.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "scrub/scrub.h"
|
||||
#include "scrub/xfile.h"
|
||||
#include "scrub/xfarray.h"
|
||||
|
||||
@@ -2000,6 +2000,51 @@ DEFINE_REPAIR_EXTENT_EVENT(xreap_agextent_binval);
|
||||
DEFINE_REPAIR_EXTENT_EVENT(xreap_bmapi_binval);
|
||||
DEFINE_REPAIR_EXTENT_EVENT(xrep_agfl_insert);
|
||||
|
||||
DECLARE_EVENT_CLASS(xrep_reap_limits_class,
|
||||
TP_PROTO(const struct xfs_trans *tp, unsigned int per_binval,
|
||||
unsigned int max_binval, unsigned int step_size,
|
||||
unsigned int per_intent,
|
||||
unsigned int max_deferred),
|
||||
TP_ARGS(tp, per_binval, max_binval, step_size, per_intent, max_deferred),
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev)
|
||||
__field(unsigned int, log_res)
|
||||
__field(unsigned int, per_binval)
|
||||
__field(unsigned int, max_binval)
|
||||
__field(unsigned int, step_size)
|
||||
__field(unsigned int, per_intent)
|
||||
__field(unsigned int, max_deferred)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->dev = tp->t_mountp->m_super->s_dev;
|
||||
__entry->log_res = tp->t_log_res;
|
||||
__entry->per_binval = per_binval;
|
||||
__entry->max_binval = max_binval;
|
||||
__entry->step_size = step_size;
|
||||
__entry->per_intent = per_intent;
|
||||
__entry->max_deferred = max_deferred;
|
||||
),
|
||||
TP_printk("dev %d:%d logres %u per_binval %u max_binval %u step_size %u per_intent %u max_deferred %u",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->log_res,
|
||||
__entry->per_binval,
|
||||
__entry->max_binval,
|
||||
__entry->step_size,
|
||||
__entry->per_intent,
|
||||
__entry->max_deferred)
|
||||
);
|
||||
#define DEFINE_REPAIR_REAP_LIMITS_EVENT(name) \
|
||||
DEFINE_EVENT(xrep_reap_limits_class, name, \
|
||||
TP_PROTO(const struct xfs_trans *tp, unsigned int per_binval, \
|
||||
unsigned int max_binval, unsigned int step_size, \
|
||||
unsigned int per_intent, \
|
||||
unsigned int max_deferred), \
|
||||
TP_ARGS(tp, per_binval, max_binval, step_size, per_intent, max_deferred))
|
||||
DEFINE_REPAIR_REAP_LIMITS_EVENT(xreap_agextent_limits);
|
||||
DEFINE_REPAIR_REAP_LIMITS_EVENT(xreap_agcow_limits);
|
||||
DEFINE_REPAIR_REAP_LIMITS_EVENT(xreap_rgcow_limits);
|
||||
DEFINE_REPAIR_REAP_LIMITS_EVENT(xreap_bmapi_limits);
|
||||
|
||||
DECLARE_EVENT_CLASS(xrep_reap_find_class,
|
||||
TP_PROTO(const struct xfs_group *xg, xfs_agblock_t agbno,
|
||||
xfs_extlen_t len, bool crosslinked),
|
||||
|
||||
@@ -491,7 +491,7 @@ xfs_attr_finish_item(
|
||||
/* Reset trans after EAGAIN cycle since the transaction is new */
|
||||
args->trans = tp;
|
||||
|
||||
if (XFS_TEST_ERROR(false, args->dp->i_mount, XFS_ERRTAG_LARP)) {
|
||||
if (XFS_TEST_ERROR(args->dp->i_mount, XFS_ERRTAG_LARP)) {
|
||||
error = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -387,8 +387,6 @@ xfs_buf_map_verify(
|
||||
struct xfs_buftarg *btp,
|
||||
struct xfs_buf_map *map)
|
||||
{
|
||||
xfs_daddr_t eofs;
|
||||
|
||||
/* Check for IOs smaller than the sector size / not sector aligned */
|
||||
ASSERT(!(BBTOB(map->bm_len) < btp->bt_meta_sectorsize));
|
||||
ASSERT(!(BBTOB(map->bm_bn) & (xfs_off_t)btp->bt_meta_sectormask));
|
||||
@@ -397,11 +395,10 @@ xfs_buf_map_verify(
|
||||
* Corrupted block numbers can get through to here, unfortunately, so we
|
||||
* have to check that the buffer falls within the filesystem bounds.
|
||||
*/
|
||||
eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
|
||||
if (map->bm_bn < 0 || map->bm_bn >= eofs) {
|
||||
if (map->bm_bn < 0 || map->bm_bn >= btp->bt_nr_sectors) {
|
||||
xfs_alert(btp->bt_mount,
|
||||
"%s: daddr 0x%llx out of range, EOFS 0x%llx",
|
||||
__func__, map->bm_bn, eofs);
|
||||
__func__, map->bm_bn, btp->bt_nr_sectors);
|
||||
WARN_ON(1);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
@@ -1299,7 +1296,7 @@ xfs_buf_bio_end_io(
|
||||
if (bio->bi_status)
|
||||
xfs_buf_ioerror(bp, blk_status_to_errno(bio->bi_status));
|
||||
else if ((bp->b_flags & XBF_WRITE) && (bp->b_flags & XBF_ASYNC) &&
|
||||
XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_IOERROR))
|
||||
XFS_TEST_ERROR(bp->b_mount, XFS_ERRTAG_BUF_IOERROR))
|
||||
xfs_buf_ioerror(bp, -EIO);
|
||||
|
||||
if (bp->b_flags & XBF_ASYNC) {
|
||||
@@ -1720,26 +1717,30 @@ xfs_configure_buftarg_atomic_writes(
|
||||
int
|
||||
xfs_configure_buftarg(
|
||||
struct xfs_buftarg *btp,
|
||||
unsigned int sectorsize)
|
||||
unsigned int sectorsize,
|
||||
xfs_rfsblock_t nr_blocks)
|
||||
{
|
||||
int error;
|
||||
struct xfs_mount *mp = btp->bt_mount;
|
||||
|
||||
ASSERT(btp->bt_bdev != NULL);
|
||||
if (btp->bt_bdev) {
|
||||
int error;
|
||||
|
||||
/* Set up metadata sector size info */
|
||||
btp->bt_meta_sectorsize = sectorsize;
|
||||
btp->bt_meta_sectormask = sectorsize - 1;
|
||||
error = bdev_validate_blocksize(btp->bt_bdev, sectorsize);
|
||||
if (error) {
|
||||
xfs_warn(mp,
|
||||
"Cannot use blocksize %u on device %pg, err %d",
|
||||
sectorsize, btp->bt_bdev, error);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
error = bdev_validate_blocksize(btp->bt_bdev, sectorsize);
|
||||
if (error) {
|
||||
xfs_warn(btp->bt_mount,
|
||||
"Cannot use blocksize %u on device %pg, err %d",
|
||||
sectorsize, btp->bt_bdev, error);
|
||||
return -EINVAL;
|
||||
if (bdev_can_atomic_write(btp->bt_bdev))
|
||||
xfs_configure_buftarg_atomic_writes(btp);
|
||||
}
|
||||
|
||||
if (bdev_can_atomic_write(btp->bt_bdev))
|
||||
xfs_configure_buftarg_atomic_writes(btp);
|
||||
btp->bt_meta_sectorsize = sectorsize;
|
||||
btp->bt_meta_sectormask = sectorsize - 1;
|
||||
/* m_blkbb_log is not set up yet */
|
||||
btp->bt_nr_sectors = nr_blocks << (mp->m_sb.sb_blocklog - BBSHIFT);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1749,6 +1750,9 @@ xfs_init_buftarg(
|
||||
size_t logical_sectorsize,
|
||||
const char *descr)
|
||||
{
|
||||
/* The maximum size of the buftarg is only known once the sb is read. */
|
||||
btp->bt_nr_sectors = (xfs_daddr_t)-1;
|
||||
|
||||
/* Set up device logical sector size mask */
|
||||
btp->bt_logical_sectorsize = logical_sectorsize;
|
||||
btp->bt_logical_sectormask = logical_sectorsize - 1;
|
||||
@@ -2084,7 +2088,7 @@ void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
|
||||
* This allows userspace to disrupt buffer caching for debug/testing
|
||||
* purposes.
|
||||
*/
|
||||
if (XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_LRU_REF))
|
||||
if (XFS_TEST_ERROR(bp->b_mount, XFS_ERRTAG_BUF_LRU_REF))
|
||||
lru_ref = 0;
|
||||
|
||||
atomic_set(&bp->b_lru_ref, lru_ref);
|
||||
|
||||
@@ -103,6 +103,7 @@ struct xfs_buftarg {
|
||||
size_t bt_meta_sectormask;
|
||||
size_t bt_logical_sectorsize;
|
||||
size_t bt_logical_sectormask;
|
||||
xfs_daddr_t bt_nr_sectors;
|
||||
|
||||
/* LRU control structures */
|
||||
struct shrinker *bt_shrinker;
|
||||
@@ -372,7 +373,8 @@ struct xfs_buftarg *xfs_alloc_buftarg(struct xfs_mount *mp,
|
||||
extern void xfs_free_buftarg(struct xfs_buftarg *);
|
||||
extern void xfs_buftarg_wait(struct xfs_buftarg *);
|
||||
extern void xfs_buftarg_drain(struct xfs_buftarg *);
|
||||
int xfs_configure_buftarg(struct xfs_buftarg *btp, unsigned int sectorsize);
|
||||
int xfs_configure_buftarg(struct xfs_buftarg *btp, unsigned int sectorsize,
|
||||
xfs_fsblock_t nr_blocks);
|
||||
|
||||
#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)
|
||||
|
||||
|
||||
@@ -736,6 +736,16 @@ xlog_recover_do_primary_sb_buffer(
|
||||
*/
|
||||
xfs_sb_from_disk(&mp->m_sb, dsb);
|
||||
|
||||
/*
|
||||
* Grow can change the device size. Mirror that into the buftarg.
|
||||
*/
|
||||
mp->m_ddev_targp->bt_nr_sectors =
|
||||
XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
|
||||
if (mp->m_rtdev_targp && mp->m_rtdev_targp != mp->m_ddev_targp) {
|
||||
mp->m_rtdev_targp->bt_nr_sectors =
|
||||
XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
|
||||
}
|
||||
|
||||
if (mp->m_sb.sb_agcount < orig_agcount) {
|
||||
xfs_alert(mp, "Shrinking AG count in log recovery not supported");
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
@@ -10,61 +10,17 @@
|
||||
#include "xfs_log_format.h"
|
||||
#include "xfs_trans_resv.h"
|
||||
#include "xfs_mount.h"
|
||||
#include "xfs_errortag.h"
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_sysfs.h"
|
||||
#include "xfs_inode.h"
|
||||
|
||||
#ifdef DEBUG
|
||||
|
||||
static unsigned int xfs_errortag_random_default[] = {
|
||||
XFS_RANDOM_DEFAULT,
|
||||
XFS_RANDOM_IFLUSH_1,
|
||||
XFS_RANDOM_IFLUSH_2,
|
||||
XFS_RANDOM_IFLUSH_3,
|
||||
XFS_RANDOM_IFLUSH_4,
|
||||
XFS_RANDOM_IFLUSH_5,
|
||||
XFS_RANDOM_IFLUSH_6,
|
||||
XFS_RANDOM_DA_READ_BUF,
|
||||
XFS_RANDOM_BTREE_CHECK_LBLOCK,
|
||||
XFS_RANDOM_BTREE_CHECK_SBLOCK,
|
||||
XFS_RANDOM_ALLOC_READ_AGF,
|
||||
XFS_RANDOM_IALLOC_READ_AGI,
|
||||
XFS_RANDOM_ITOBP_INOTOBP,
|
||||
XFS_RANDOM_IUNLINK,
|
||||
XFS_RANDOM_IUNLINK_REMOVE,
|
||||
XFS_RANDOM_DIR_INO_VALIDATE,
|
||||
XFS_RANDOM_BULKSTAT_READ_CHUNK,
|
||||
XFS_RANDOM_IODONE_IOERR,
|
||||
XFS_RANDOM_STRATREAD_IOERR,
|
||||
XFS_RANDOM_STRATCMPL_IOERR,
|
||||
XFS_RANDOM_DIOWRITE_IOERR,
|
||||
XFS_RANDOM_BMAPIFORMAT,
|
||||
XFS_RANDOM_FREE_EXTENT,
|
||||
XFS_RANDOM_RMAP_FINISH_ONE,
|
||||
XFS_RANDOM_REFCOUNT_CONTINUE_UPDATE,
|
||||
XFS_RANDOM_REFCOUNT_FINISH_ONE,
|
||||
XFS_RANDOM_BMAP_FINISH_ONE,
|
||||
XFS_RANDOM_AG_RESV_CRITICAL,
|
||||
0, /* XFS_RANDOM_DROP_WRITES has been removed */
|
||||
XFS_RANDOM_LOG_BAD_CRC,
|
||||
XFS_RANDOM_LOG_ITEM_PIN,
|
||||
XFS_RANDOM_BUF_LRU_REF,
|
||||
XFS_RANDOM_FORCE_SCRUB_REPAIR,
|
||||
XFS_RANDOM_FORCE_SUMMARY_RECALC,
|
||||
XFS_RANDOM_IUNLINK_FALLBACK,
|
||||
XFS_RANDOM_BUF_IOERROR,
|
||||
XFS_RANDOM_REDUCE_MAX_IEXTENTS,
|
||||
XFS_RANDOM_BMAP_ALLOC_MINLEN_EXTENT,
|
||||
XFS_RANDOM_AG_RESV_FAIL,
|
||||
XFS_RANDOM_LARP,
|
||||
XFS_RANDOM_DA_LEAF_SPLIT,
|
||||
XFS_RANDOM_ATTR_LEAF_TO_NODE,
|
||||
XFS_RANDOM_WB_DELAY_MS,
|
||||
XFS_RANDOM_WRITE_DELAY_MS,
|
||||
XFS_RANDOM_EXCHMAPS_FINISH_ONE,
|
||||
XFS_RANDOM_METAFILE_RESV_CRITICAL,
|
||||
};
|
||||
#define XFS_ERRTAG(_tag, _name, _default) \
|
||||
[XFS_ERRTAG_##_tag] = (_default),
|
||||
#include "xfs_errortag.h"
|
||||
static const unsigned int xfs_errortag_random_default[] = { XFS_ERRTAGS };
|
||||
#undef XFS_ERRTAG
|
||||
|
||||
struct xfs_errortag_attr {
|
||||
struct attribute attr;
|
||||
@@ -93,21 +49,18 @@ xfs_errortag_attr_store(
|
||||
size_t count)
|
||||
{
|
||||
struct xfs_mount *mp = to_mp(kobject);
|
||||
struct xfs_errortag_attr *xfs_attr = to_attr(attr);
|
||||
unsigned int error_tag = to_attr(attr)->tag;
|
||||
int ret;
|
||||
unsigned int val;
|
||||
|
||||
if (strcmp(buf, "default") == 0) {
|
||||
val = xfs_errortag_random_default[xfs_attr->tag];
|
||||
mp->m_errortag[error_tag] =
|
||||
xfs_errortag_random_default[error_tag];
|
||||
} else {
|
||||
ret = kstrtouint(buf, 0, &val);
|
||||
ret = kstrtouint(buf, 0, &mp->m_errortag[error_tag]);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = xfs_errortag_set(mp, xfs_attr->tag, val);
|
||||
if (ret)
|
||||
return ret;
|
||||
return count;
|
||||
}
|
||||
|
||||
@@ -118,10 +71,9 @@ xfs_errortag_attr_show(
|
||||
char *buf)
|
||||
{
|
||||
struct xfs_mount *mp = to_mp(kobject);
|
||||
struct xfs_errortag_attr *xfs_attr = to_attr(attr);
|
||||
unsigned int error_tag = to_attr(attr)->tag;
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%u\n",
|
||||
xfs_errortag_get(mp, xfs_attr->tag));
|
||||
return snprintf(buf, PAGE_SIZE, "%u\n", mp->m_errortag[error_tag]);
|
||||
}
|
||||
|
||||
static const struct sysfs_ops xfs_errortag_sysfs_ops = {
|
||||
@@ -129,110 +81,28 @@ static const struct sysfs_ops xfs_errortag_sysfs_ops = {
|
||||
.store = xfs_errortag_attr_store,
|
||||
};
|
||||
|
||||
#define XFS_ERRORTAG_ATTR_RW(_name, _tag) \
|
||||
#define XFS_ERRTAG(_tag, _name, _default) \
|
||||
static struct xfs_errortag_attr xfs_errortag_attr_##_name = { \
|
||||
.attr = {.name = __stringify(_name), \
|
||||
.mode = VERIFY_OCTAL_PERMISSIONS(S_IWUSR | S_IRUGO) }, \
|
||||
.tag = (_tag), \
|
||||
}
|
||||
|
||||
#define XFS_ERRORTAG_ATTR_LIST(_name) &xfs_errortag_attr_##_name.attr
|
||||
|
||||
XFS_ERRORTAG_ATTR_RW(noerror, XFS_ERRTAG_NOERROR);
|
||||
XFS_ERRORTAG_ATTR_RW(iflush1, XFS_ERRTAG_IFLUSH_1);
|
||||
XFS_ERRORTAG_ATTR_RW(iflush2, XFS_ERRTAG_IFLUSH_2);
|
||||
XFS_ERRORTAG_ATTR_RW(iflush3, XFS_ERRTAG_IFLUSH_3);
|
||||
XFS_ERRORTAG_ATTR_RW(iflush4, XFS_ERRTAG_IFLUSH_4);
|
||||
XFS_ERRORTAG_ATTR_RW(iflush5, XFS_ERRTAG_IFLUSH_5);
|
||||
XFS_ERRORTAG_ATTR_RW(iflush6, XFS_ERRTAG_IFLUSH_6);
|
||||
XFS_ERRORTAG_ATTR_RW(dareadbuf, XFS_ERRTAG_DA_READ_BUF);
|
||||
XFS_ERRORTAG_ATTR_RW(btree_chk_lblk, XFS_ERRTAG_BTREE_CHECK_LBLOCK);
|
||||
XFS_ERRORTAG_ATTR_RW(btree_chk_sblk, XFS_ERRTAG_BTREE_CHECK_SBLOCK);
|
||||
XFS_ERRORTAG_ATTR_RW(readagf, XFS_ERRTAG_ALLOC_READ_AGF);
|
||||
XFS_ERRORTAG_ATTR_RW(readagi, XFS_ERRTAG_IALLOC_READ_AGI);
|
||||
XFS_ERRORTAG_ATTR_RW(itobp, XFS_ERRTAG_ITOBP_INOTOBP);
|
||||
XFS_ERRORTAG_ATTR_RW(iunlink, XFS_ERRTAG_IUNLINK);
|
||||
XFS_ERRORTAG_ATTR_RW(iunlinkrm, XFS_ERRTAG_IUNLINK_REMOVE);
|
||||
XFS_ERRORTAG_ATTR_RW(dirinovalid, XFS_ERRTAG_DIR_INO_VALIDATE);
|
||||
XFS_ERRORTAG_ATTR_RW(bulkstat, XFS_ERRTAG_BULKSTAT_READ_CHUNK);
|
||||
XFS_ERRORTAG_ATTR_RW(logiodone, XFS_ERRTAG_IODONE_IOERR);
|
||||
XFS_ERRORTAG_ATTR_RW(stratread, XFS_ERRTAG_STRATREAD_IOERR);
|
||||
XFS_ERRORTAG_ATTR_RW(stratcmpl, XFS_ERRTAG_STRATCMPL_IOERR);
|
||||
XFS_ERRORTAG_ATTR_RW(diowrite, XFS_ERRTAG_DIOWRITE_IOERR);
|
||||
XFS_ERRORTAG_ATTR_RW(bmapifmt, XFS_ERRTAG_BMAPIFORMAT);
|
||||
XFS_ERRORTAG_ATTR_RW(free_extent, XFS_ERRTAG_FREE_EXTENT);
|
||||
XFS_ERRORTAG_ATTR_RW(rmap_finish_one, XFS_ERRTAG_RMAP_FINISH_ONE);
|
||||
XFS_ERRORTAG_ATTR_RW(refcount_continue_update, XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE);
|
||||
XFS_ERRORTAG_ATTR_RW(refcount_finish_one, XFS_ERRTAG_REFCOUNT_FINISH_ONE);
|
||||
XFS_ERRORTAG_ATTR_RW(bmap_finish_one, XFS_ERRTAG_BMAP_FINISH_ONE);
|
||||
XFS_ERRORTAG_ATTR_RW(ag_resv_critical, XFS_ERRTAG_AG_RESV_CRITICAL);
|
||||
XFS_ERRORTAG_ATTR_RW(log_bad_crc, XFS_ERRTAG_LOG_BAD_CRC);
|
||||
XFS_ERRORTAG_ATTR_RW(log_item_pin, XFS_ERRTAG_LOG_ITEM_PIN);
|
||||
XFS_ERRORTAG_ATTR_RW(buf_lru_ref, XFS_ERRTAG_BUF_LRU_REF);
|
||||
XFS_ERRORTAG_ATTR_RW(force_repair, XFS_ERRTAG_FORCE_SCRUB_REPAIR);
|
||||
XFS_ERRORTAG_ATTR_RW(bad_summary, XFS_ERRTAG_FORCE_SUMMARY_RECALC);
|
||||
XFS_ERRORTAG_ATTR_RW(iunlink_fallback, XFS_ERRTAG_IUNLINK_FALLBACK);
|
||||
XFS_ERRORTAG_ATTR_RW(buf_ioerror, XFS_ERRTAG_BUF_IOERROR);
|
||||
XFS_ERRORTAG_ATTR_RW(reduce_max_iextents, XFS_ERRTAG_REDUCE_MAX_IEXTENTS);
|
||||
XFS_ERRORTAG_ATTR_RW(bmap_alloc_minlen_extent, XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT);
|
||||
XFS_ERRORTAG_ATTR_RW(ag_resv_fail, XFS_ERRTAG_AG_RESV_FAIL);
|
||||
XFS_ERRORTAG_ATTR_RW(larp, XFS_ERRTAG_LARP);
|
||||
XFS_ERRORTAG_ATTR_RW(da_leaf_split, XFS_ERRTAG_DA_LEAF_SPLIT);
|
||||
XFS_ERRORTAG_ATTR_RW(attr_leaf_to_node, XFS_ERRTAG_ATTR_LEAF_TO_NODE);
|
||||
XFS_ERRORTAG_ATTR_RW(wb_delay_ms, XFS_ERRTAG_WB_DELAY_MS);
|
||||
XFS_ERRORTAG_ATTR_RW(write_delay_ms, XFS_ERRTAG_WRITE_DELAY_MS);
|
||||
XFS_ERRORTAG_ATTR_RW(exchmaps_finish_one, XFS_ERRTAG_EXCHMAPS_FINISH_ONE);
|
||||
XFS_ERRORTAG_ATTR_RW(metafile_resv_crit, XFS_ERRTAG_METAFILE_RESV_CRITICAL);
|
||||
.tag = XFS_ERRTAG_##_tag, \
|
||||
};
|
||||
#include "xfs_errortag.h"
|
||||
XFS_ERRTAGS
|
||||
#undef XFS_ERRTAG
|
||||
|
||||
#define XFS_ERRTAG(_tag, _name, _default) \
|
||||
&xfs_errortag_attr_##_name.attr,
|
||||
#include "xfs_errortag.h"
|
||||
static struct attribute *xfs_errortag_attrs[] = {
|
||||
XFS_ERRORTAG_ATTR_LIST(noerror),
|
||||
XFS_ERRORTAG_ATTR_LIST(iflush1),
|
||||
XFS_ERRORTAG_ATTR_LIST(iflush2),
|
||||
XFS_ERRORTAG_ATTR_LIST(iflush3),
|
||||
XFS_ERRORTAG_ATTR_LIST(iflush4),
|
||||
XFS_ERRORTAG_ATTR_LIST(iflush5),
|
||||
XFS_ERRORTAG_ATTR_LIST(iflush6),
|
||||
XFS_ERRORTAG_ATTR_LIST(dareadbuf),
|
||||
XFS_ERRORTAG_ATTR_LIST(btree_chk_lblk),
|
||||
XFS_ERRORTAG_ATTR_LIST(btree_chk_sblk),
|
||||
XFS_ERRORTAG_ATTR_LIST(readagf),
|
||||
XFS_ERRORTAG_ATTR_LIST(readagi),
|
||||
XFS_ERRORTAG_ATTR_LIST(itobp),
|
||||
XFS_ERRORTAG_ATTR_LIST(iunlink),
|
||||
XFS_ERRORTAG_ATTR_LIST(iunlinkrm),
|
||||
XFS_ERRORTAG_ATTR_LIST(dirinovalid),
|
||||
XFS_ERRORTAG_ATTR_LIST(bulkstat),
|
||||
XFS_ERRORTAG_ATTR_LIST(logiodone),
|
||||
XFS_ERRORTAG_ATTR_LIST(stratread),
|
||||
XFS_ERRORTAG_ATTR_LIST(stratcmpl),
|
||||
XFS_ERRORTAG_ATTR_LIST(diowrite),
|
||||
XFS_ERRORTAG_ATTR_LIST(bmapifmt),
|
||||
XFS_ERRORTAG_ATTR_LIST(free_extent),
|
||||
XFS_ERRORTAG_ATTR_LIST(rmap_finish_one),
|
||||
XFS_ERRORTAG_ATTR_LIST(refcount_continue_update),
|
||||
XFS_ERRORTAG_ATTR_LIST(refcount_finish_one),
|
||||
XFS_ERRORTAG_ATTR_LIST(bmap_finish_one),
|
||||
XFS_ERRORTAG_ATTR_LIST(ag_resv_critical),
|
||||
XFS_ERRORTAG_ATTR_LIST(log_bad_crc),
|
||||
XFS_ERRORTAG_ATTR_LIST(log_item_pin),
|
||||
XFS_ERRORTAG_ATTR_LIST(buf_lru_ref),
|
||||
XFS_ERRORTAG_ATTR_LIST(force_repair),
|
||||
XFS_ERRORTAG_ATTR_LIST(bad_summary),
|
||||
XFS_ERRORTAG_ATTR_LIST(iunlink_fallback),
|
||||
XFS_ERRORTAG_ATTR_LIST(buf_ioerror),
|
||||
XFS_ERRORTAG_ATTR_LIST(reduce_max_iextents),
|
||||
XFS_ERRORTAG_ATTR_LIST(bmap_alloc_minlen_extent),
|
||||
XFS_ERRORTAG_ATTR_LIST(ag_resv_fail),
|
||||
XFS_ERRORTAG_ATTR_LIST(larp),
|
||||
XFS_ERRORTAG_ATTR_LIST(da_leaf_split),
|
||||
XFS_ERRORTAG_ATTR_LIST(attr_leaf_to_node),
|
||||
XFS_ERRORTAG_ATTR_LIST(wb_delay_ms),
|
||||
XFS_ERRORTAG_ATTR_LIST(write_delay_ms),
|
||||
XFS_ERRORTAG_ATTR_LIST(exchmaps_finish_one),
|
||||
XFS_ERRORTAG_ATTR_LIST(metafile_resv_crit),
|
||||
NULL,
|
||||
XFS_ERRTAGS
|
||||
NULL
|
||||
};
|
||||
ATTRIBUTE_GROUPS(xfs_errortag);
|
||||
#undef XFS_ERRTAG
|
||||
|
||||
/* -1 because XFS_ERRTAG_DROP_WRITES got removed, + 1 for NULL termination */
|
||||
static_assert(ARRAY_SIZE(xfs_errortag_attrs) == XFS_ERRTAG_MAX);
|
||||
|
||||
static const struct kobj_type xfs_errortag_ktype = {
|
||||
.release = xfs_sysfs_release,
|
||||
@@ -295,7 +165,6 @@ xfs_errortag_enabled(
|
||||
bool
|
||||
xfs_errortag_test(
|
||||
struct xfs_mount *mp,
|
||||
const char *expression,
|
||||
const char *file,
|
||||
int line,
|
||||
unsigned int error_tag)
|
||||
@@ -321,35 +190,11 @@ xfs_errortag_test(
|
||||
return false;
|
||||
|
||||
xfs_warn_ratelimited(mp,
|
||||
"Injecting error (%s) at file %s, line %d, on filesystem \"%s\"",
|
||||
expression, file, line, mp->m_super->s_id);
|
||||
"Injecting error at file %s, line %d, on filesystem \"%s\"",
|
||||
file, line, mp->m_super->s_id);
|
||||
return true;
|
||||
}
|
||||
|
||||
int
|
||||
xfs_errortag_get(
|
||||
struct xfs_mount *mp,
|
||||
unsigned int error_tag)
|
||||
{
|
||||
if (!xfs_errortag_valid(error_tag))
|
||||
return -EINVAL;
|
||||
|
||||
return mp->m_errortag[error_tag];
|
||||
}
|
||||
|
||||
int
|
||||
xfs_errortag_set(
|
||||
struct xfs_mount *mp,
|
||||
unsigned int error_tag,
|
||||
unsigned int tag_value)
|
||||
{
|
||||
if (!xfs_errortag_valid(error_tag))
|
||||
return -EINVAL;
|
||||
|
||||
mp->m_errortag[error_tag] = tag_value;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
xfs_errortag_add(
|
||||
struct xfs_mount *mp,
|
||||
@@ -359,9 +204,8 @@ xfs_errortag_add(
|
||||
|
||||
if (!xfs_errortag_valid(error_tag))
|
||||
return -EINVAL;
|
||||
|
||||
return xfs_errortag_set(mp, error_tag,
|
||||
xfs_errortag_random_default[error_tag]);
|
||||
mp->m_errortag[error_tag] = xfs_errortag_random_default[error_tag];
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
|
||||
@@ -8,22 +8,17 @@
|
||||
|
||||
struct xfs_mount;
|
||||
|
||||
extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
|
||||
const char *filename, int linenum,
|
||||
xfs_failaddr_t failaddr);
|
||||
extern void xfs_corruption_error(const char *tag, int level,
|
||||
struct xfs_mount *mp, const void *buf, size_t bufsize,
|
||||
const char *filename, int linenum,
|
||||
xfs_failaddr_t failaddr);
|
||||
void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
|
||||
const char *filename, int linenum, xfs_failaddr_t failaddr);
|
||||
void xfs_corruption_error(const char *tag, int level, struct xfs_mount *mp,
|
||||
const void *buf, size_t bufsize, const char *filename,
|
||||
int linenum, xfs_failaddr_t failaddr);
|
||||
void xfs_buf_corruption_error(struct xfs_buf *bp, xfs_failaddr_t fa);
|
||||
extern void xfs_buf_verifier_error(struct xfs_buf *bp, int error,
|
||||
const char *name, const void *buf, size_t bufsz,
|
||||
xfs_failaddr_t failaddr);
|
||||
extern void xfs_verifier_error(struct xfs_buf *bp, int error,
|
||||
xfs_failaddr_t failaddr);
|
||||
extern void xfs_inode_verifier_error(struct xfs_inode *ip, int error,
|
||||
const char *name, const void *buf, size_t bufsz,
|
||||
xfs_failaddr_t failaddr);
|
||||
void xfs_buf_verifier_error(struct xfs_buf *bp, int error, const char *name,
|
||||
const void *buf, size_t bufsz, xfs_failaddr_t failaddr);
|
||||
void xfs_verifier_error(struct xfs_buf *bp, int error, xfs_failaddr_t failaddr);
|
||||
void xfs_inode_verifier_error(struct xfs_inode *ip, int error, const char *name,
|
||||
const void *buf, size_t bufsz, xfs_failaddr_t failaddr);
|
||||
|
||||
#define XFS_ERROR_REPORT(e, lvl, mp) \
|
||||
xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address)
|
||||
@@ -39,12 +34,12 @@ extern void xfs_inode_verifier_error(struct xfs_inode *ip, int error,
|
||||
#define XFS_CORRUPTION_DUMP_LEN (128)
|
||||
|
||||
#ifdef DEBUG
|
||||
extern int xfs_errortag_init(struct xfs_mount *mp);
|
||||
extern void xfs_errortag_del(struct xfs_mount *mp);
|
||||
extern bool xfs_errortag_test(struct xfs_mount *mp, const char *expression,
|
||||
const char *file, int line, unsigned int error_tag);
|
||||
#define XFS_TEST_ERROR(expr, mp, tag) \
|
||||
((expr) || xfs_errortag_test((mp), #expr, __FILE__, __LINE__, (tag)))
|
||||
int xfs_errortag_init(struct xfs_mount *mp);
|
||||
void xfs_errortag_del(struct xfs_mount *mp);
|
||||
bool xfs_errortag_test(struct xfs_mount *mp, const char *file, int line,
|
||||
unsigned int error_tag);
|
||||
#define XFS_TEST_ERROR(mp, tag) \
|
||||
xfs_errortag_test((mp), __FILE__, __LINE__, (tag))
|
||||
bool xfs_errortag_enabled(struct xfs_mount *mp, unsigned int tag);
|
||||
#define XFS_ERRORTAG_DELAY(mp, tag) \
|
||||
do { \
|
||||
@@ -58,17 +53,13 @@ bool xfs_errortag_enabled(struct xfs_mount *mp, unsigned int tag);
|
||||
mdelay((mp)->m_errortag[(tag)]); \
|
||||
} while (0)
|
||||
|
||||
extern int xfs_errortag_get(struct xfs_mount *mp, unsigned int error_tag);
|
||||
extern int xfs_errortag_set(struct xfs_mount *mp, unsigned int error_tag,
|
||||
unsigned int tag_value);
|
||||
extern int xfs_errortag_add(struct xfs_mount *mp, unsigned int error_tag);
|
||||
extern int xfs_errortag_clearall(struct xfs_mount *mp);
|
||||
int xfs_errortag_add(struct xfs_mount *mp, unsigned int error_tag);
|
||||
int xfs_errortag_clearall(struct xfs_mount *mp);
|
||||
#else
|
||||
#define xfs_errortag_init(mp) (0)
|
||||
#define xfs_errortag_del(mp)
|
||||
#define XFS_TEST_ERROR(expr, mp, tag) (expr)
|
||||
#define XFS_TEST_ERROR(mp, tag) (false)
|
||||
#define XFS_ERRORTAG_DELAY(mp, tag) ((void)0)
|
||||
#define xfs_errortag_set(mp, tag, val) (ENOSYS)
|
||||
#define xfs_errortag_add(mp, tag) (ENOSYS)
|
||||
#define xfs_errortag_clearall(mp) (ENOSYS)
|
||||
#endif /* DEBUG */
|
||||
|
||||
@@ -202,7 +202,7 @@ xfs_efi_copy_format(
|
||||
sizeof(struct xfs_extent));
|
||||
return 0;
|
||||
} else if (buf->iov_len == len32) {
|
||||
xfs_efi_log_format_32_t *src_efi_fmt_32 = buf->iov_base;
|
||||
struct xfs_efi_log_format_32 *src_efi_fmt_32 = buf->iov_base;
|
||||
|
||||
dst_efi_fmt->efi_type = src_efi_fmt_32->efi_type;
|
||||
dst_efi_fmt->efi_size = src_efi_fmt_32->efi_size;
|
||||
@@ -216,7 +216,7 @@ xfs_efi_copy_format(
|
||||
}
|
||||
return 0;
|
||||
} else if (buf->iov_len == len64) {
|
||||
xfs_efi_log_format_64_t *src_efi_fmt_64 = buf->iov_base;
|
||||
struct xfs_efi_log_format_64 *src_efi_fmt_64 = buf->iov_base;
|
||||
|
||||
dst_efi_fmt->efi_type = src_efi_fmt_64->efi_type;
|
||||
dst_efi_fmt->efi_size = src_efi_fmt_64->efi_size;
|
||||
|
||||
@@ -49,7 +49,7 @@ struct xfs_efi_log_item {
|
||||
struct xfs_log_item efi_item;
|
||||
atomic_t efi_refcount;
|
||||
atomic_t efi_next_extent;
|
||||
xfs_efi_log_format_t efi_format;
|
||||
struct xfs_efi_log_format efi_format;
|
||||
};
|
||||
|
||||
static inline size_t
|
||||
@@ -69,7 +69,7 @@ struct xfs_efd_log_item {
|
||||
struct xfs_log_item efd_item;
|
||||
struct xfs_efi_log_item *efd_efip;
|
||||
uint efd_next_extent;
|
||||
xfs_efd_log_format_t efd_format;
|
||||
struct xfs_efd_log_format efd_format;
|
||||
};
|
||||
|
||||
static inline size_t
|
||||
|
||||
@@ -75,52 +75,47 @@ xfs_dir_fsync(
|
||||
return xfs_log_force_inode(ip);
|
||||
}
|
||||
|
||||
static xfs_csn_t
|
||||
xfs_fsync_seq(
|
||||
struct xfs_inode *ip,
|
||||
bool datasync)
|
||||
{
|
||||
if (!xfs_ipincount(ip))
|
||||
return 0;
|
||||
if (datasync && !(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
|
||||
return 0;
|
||||
return ip->i_itemp->ili_commit_seq;
|
||||
}
|
||||
|
||||
/*
|
||||
* All metadata updates are logged, which means that we just have to flush the
|
||||
* log up to the latest LSN that touched the inode.
|
||||
* All metadata updates are logged, which means that we just have to push the
|
||||
* journal to the required sequence number than holds the updates. We track
|
||||
* datasync commits separately to full sync commits, and hence only need to
|
||||
* select the correct sequence number for the log force here.
|
||||
*
|
||||
* If we have concurrent fsync/fdatasync() calls, we need them to all block on
|
||||
* the log force before we clear the ili_fsync_fields field. This ensures that
|
||||
* we don't get a racing sync operation that does not wait for the metadata to
|
||||
* hit the journal before returning. If we race with clearing ili_fsync_fields,
|
||||
* then all that will happen is the log force will do nothing as the lsn will
|
||||
* already be on disk. We can't race with setting ili_fsync_fields because that
|
||||
* is done under XFS_ILOCK_EXCL, and that can't happen because we hold the lock
|
||||
* shared until after the ili_fsync_fields is cleared.
|
||||
* We don't have to serialise against concurrent modifications, as we do not
|
||||
* have to wait for modifications that have not yet completed. We define a
|
||||
* transaction commit as completing when the commit sequence number is updated,
|
||||
* hence if the sequence number has not updated, the sync operation has been
|
||||
* run before the commit completed and we don't have to wait for it.
|
||||
*
|
||||
* If we have concurrent fsync/fdatasync() calls, the sequence numbers remain
|
||||
* set on the log item until - at least - the journal flush completes. In
|
||||
* reality, they are only cleared when the inode is fully unpinned (i.e.
|
||||
* persistent in the journal and not dirty in the CIL), and so we rely on
|
||||
* xfs_log_force_seq() either skipping sequences that have been persisted or
|
||||
* waiting on sequences that are still in flight to correctly order concurrent
|
||||
* sync operations.
|
||||
*/
|
||||
static int
|
||||
static int
|
||||
xfs_fsync_flush_log(
|
||||
struct xfs_inode *ip,
|
||||
bool datasync,
|
||||
int *log_flushed)
|
||||
{
|
||||
int error = 0;
|
||||
xfs_csn_t seq;
|
||||
struct xfs_inode_log_item *iip = ip->i_itemp;
|
||||
xfs_csn_t seq = 0;
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_SHARED);
|
||||
seq = xfs_fsync_seq(ip, datasync);
|
||||
if (seq) {
|
||||
error = xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC,
|
||||
spin_lock(&iip->ili_lock);
|
||||
if (datasync)
|
||||
seq = iip->ili_datasync_seq;
|
||||
else
|
||||
seq = iip->ili_commit_seq;
|
||||
spin_unlock(&iip->ili_lock);
|
||||
|
||||
if (!seq)
|
||||
return 0;
|
||||
|
||||
return xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC,
|
||||
log_flushed);
|
||||
|
||||
spin_lock(&ip->i_itemp->ili_lock);
|
||||
ip->i_itemp->ili_fsync_fields = 0;
|
||||
spin_unlock(&ip->i_itemp->ili_lock);
|
||||
}
|
||||
xfs_iunlock(ip, XFS_ILOCK_SHARED);
|
||||
return error;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
@@ -158,12 +153,10 @@ xfs_file_fsync(
|
||||
error = blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
|
||||
|
||||
/*
|
||||
* Any inode that has dirty modifications in the log is pinned. The
|
||||
* racy check here for a pinned inode will not catch modifications
|
||||
* that happen concurrently to the fsync call, but fsync semantics
|
||||
* only require to sync previously completed I/O.
|
||||
* If the inode has a inode log item attached, it may need the journal
|
||||
* flushed to persist any changes the log item might be tracking.
|
||||
*/
|
||||
if (xfs_ipincount(ip)) {
|
||||
if (ip->i_itemp) {
|
||||
err2 = xfs_fsync_flush_log(ip, datasync, &log_flushed);
|
||||
if (err2 && !error)
|
||||
error = err2;
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
*/
|
||||
xfs_param_t xfs_params = {
|
||||
/* MIN DFLT MAX */
|
||||
.sgid_inherit = { 0, 0, 1 },
|
||||
.symlink_mode = { 0, 0, 1 },
|
||||
.panic_mask = { 0, 0, XFS_PTAG_MASK},
|
||||
.error_level = { 0, 3, 11 },
|
||||
.syncd_timer = { 1*100, 30*100, 7200*100},
|
||||
|
||||
@@ -646,8 +646,7 @@ xfs_iget_cache_miss(
|
||||
goto out_destroy;
|
||||
|
||||
/*
|
||||
* For version 5 superblocks, if we are initialising a new inode and we
|
||||
* are not utilising the XFS_FEAT_IKEEP inode cluster mode, we can
|
||||
* For version 5 superblocks, if we are initialising a new inode, we
|
||||
* simply build the new inode core with a random generation number.
|
||||
*
|
||||
* For version 4 (and older) superblocks, log recovery is dependent on
|
||||
@@ -655,8 +654,7 @@ xfs_iget_cache_miss(
|
||||
* value and hence we must also read the inode off disk even when
|
||||
* initializing new inodes.
|
||||
*/
|
||||
if (xfs_has_v3inodes(mp) &&
|
||||
(flags & XFS_IGET_CREATE) && !xfs_has_ikeep(mp)) {
|
||||
if (xfs_has_v3inodes(mp) && (flags & XFS_IGET_CREATE)) {
|
||||
VFS_I(ip)->i_generation = get_random_u32();
|
||||
} else {
|
||||
struct xfs_buf *bp;
|
||||
|
||||
@@ -877,6 +877,35 @@ xfs_create_tmpfile(
|
||||
return error;
|
||||
}
|
||||
|
||||
static inline int
|
||||
xfs_projid_differ(
|
||||
struct xfs_inode *tdp,
|
||||
struct xfs_inode *sip)
|
||||
{
|
||||
/*
|
||||
* If we are using project inheritance, we only allow hard link/renames
|
||||
* creation in our tree when the project IDs are the same; else
|
||||
* the tree quota mechanism could be circumvented.
|
||||
*/
|
||||
if (unlikely((tdp->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
|
||||
tdp->i_projid != sip->i_projid)) {
|
||||
/*
|
||||
* Project quota setup skips special files which can
|
||||
* leave inodes in a PROJINHERIT directory without a
|
||||
* project ID set. We need to allow links to be made
|
||||
* to these "project-less" inodes because userspace
|
||||
* expects them to succeed after project ID setup,
|
||||
* but everything else should be rejected.
|
||||
*/
|
||||
if (!special_file(VFS_I(sip)->i_mode) ||
|
||||
sip->i_projid != 0) {
|
||||
return -EXDEV;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
xfs_link(
|
||||
struct xfs_inode *tdp,
|
||||
@@ -930,27 +959,9 @@ xfs_link(
|
||||
goto error_return;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we are using project inheritance, we only allow hard link
|
||||
* creation in our tree when the project IDs are the same; else
|
||||
* the tree quota mechanism could be circumvented.
|
||||
*/
|
||||
if (unlikely((tdp->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
|
||||
tdp->i_projid != sip->i_projid)) {
|
||||
/*
|
||||
* Project quota setup skips special files which can
|
||||
* leave inodes in a PROJINHERIT directory without a
|
||||
* project ID set. We need to allow links to be made
|
||||
* to these "project-less" inodes because userspace
|
||||
* expects them to succeed after project ID setup,
|
||||
* but everything else should be rejected.
|
||||
*/
|
||||
if (!special_file(VFS_I(sip)->i_mode) ||
|
||||
sip->i_projid != 0) {
|
||||
error = -EXDEV;
|
||||
goto error_return;
|
||||
}
|
||||
}
|
||||
error = xfs_projid_differ(tdp, sip);
|
||||
if (error)
|
||||
goto error_return;
|
||||
|
||||
error = xfs_dir_add_child(tp, resblks, &du);
|
||||
if (error)
|
||||
@@ -1656,7 +1667,6 @@ retry:
|
||||
spin_lock(&iip->ili_lock);
|
||||
iip->ili_last_fields = iip->ili_fields;
|
||||
iip->ili_fields = 0;
|
||||
iip->ili_fsync_fields = 0;
|
||||
spin_unlock(&iip->ili_lock);
|
||||
ASSERT(iip->ili_last_fields);
|
||||
|
||||
@@ -1821,12 +1831,20 @@ static void
|
||||
xfs_iunpin(
|
||||
struct xfs_inode *ip)
|
||||
{
|
||||
xfs_assert_ilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED);
|
||||
struct xfs_inode_log_item *iip = ip->i_itemp;
|
||||
xfs_csn_t seq = 0;
|
||||
|
||||
trace_xfs_inode_unpin_nowait(ip, _RET_IP_);
|
||||
xfs_assert_ilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED);
|
||||
|
||||
spin_lock(&iip->ili_lock);
|
||||
seq = iip->ili_commit_seq;
|
||||
spin_unlock(&iip->ili_lock);
|
||||
if (!seq)
|
||||
return;
|
||||
|
||||
/* Give the log a push to start the unpinning I/O */
|
||||
xfs_log_force_seq(ip->i_mount, ip->i_itemp->ili_commit_seq, 0, NULL);
|
||||
xfs_log_force_seq(ip->i_mount, seq, 0, NULL);
|
||||
|
||||
}
|
||||
|
||||
@@ -2227,16 +2245,9 @@ retry:
|
||||
if (du_wip.ip)
|
||||
xfs_trans_ijoin(tp, du_wip.ip, 0);
|
||||
|
||||
/*
|
||||
* If we are using project inheritance, we only allow renames
|
||||
* into our tree when the project IDs are the same; else the
|
||||
* tree quota mechanism would be circumvented.
|
||||
*/
|
||||
if (unlikely((target_dp->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
|
||||
target_dp->i_projid != src_ip->i_projid)) {
|
||||
error = -EXDEV;
|
||||
error = xfs_projid_differ(target_dp, src_ip);
|
||||
if (error)
|
||||
goto out_trans_cancel;
|
||||
}
|
||||
|
||||
/* RENAME_EXCHANGE is unique from here on. */
|
||||
if (flags & RENAME_EXCHANGE) {
|
||||
@@ -2377,8 +2388,8 @@ xfs_iflush(
|
||||
* error handling as the caller will shutdown and fail the buffer.
|
||||
*/
|
||||
error = -EFSCORRUPTED;
|
||||
if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
|
||||
mp, XFS_ERRTAG_IFLUSH_1)) {
|
||||
if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC) ||
|
||||
XFS_TEST_ERROR(mp, XFS_ERRTAG_IFLUSH_1)) {
|
||||
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
|
||||
"%s: Bad inode %llu magic number 0x%x, ptr "PTR_FMT,
|
||||
__func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
|
||||
@@ -2394,29 +2405,27 @@ xfs_iflush(
|
||||
goto flush_out;
|
||||
}
|
||||
} else if (S_ISREG(VFS_I(ip)->i_mode)) {
|
||||
if (XFS_TEST_ERROR(
|
||||
ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
|
||||
ip->i_df.if_format != XFS_DINODE_FMT_BTREE,
|
||||
mp, XFS_ERRTAG_IFLUSH_3)) {
|
||||
if ((ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
|
||||
ip->i_df.if_format != XFS_DINODE_FMT_BTREE) ||
|
||||
XFS_TEST_ERROR(mp, XFS_ERRTAG_IFLUSH_3)) {
|
||||
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
|
||||
"%s: Bad regular inode %llu, ptr "PTR_FMT,
|
||||
__func__, ip->i_ino, ip);
|
||||
goto flush_out;
|
||||
}
|
||||
} else if (S_ISDIR(VFS_I(ip)->i_mode)) {
|
||||
if (XFS_TEST_ERROR(
|
||||
ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
|
||||
ip->i_df.if_format != XFS_DINODE_FMT_BTREE &&
|
||||
ip->i_df.if_format != XFS_DINODE_FMT_LOCAL,
|
||||
mp, XFS_ERRTAG_IFLUSH_4)) {
|
||||
if ((ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
|
||||
ip->i_df.if_format != XFS_DINODE_FMT_BTREE &&
|
||||
ip->i_df.if_format != XFS_DINODE_FMT_LOCAL) ||
|
||||
XFS_TEST_ERROR(mp, XFS_ERRTAG_IFLUSH_4)) {
|
||||
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
|
||||
"%s: Bad directory inode %llu, ptr "PTR_FMT,
|
||||
__func__, ip->i_ino, ip);
|
||||
goto flush_out;
|
||||
}
|
||||
}
|
||||
if (XFS_TEST_ERROR(ip->i_df.if_nextents + xfs_ifork_nextents(&ip->i_af) >
|
||||
ip->i_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) {
|
||||
if (ip->i_df.if_nextents + xfs_ifork_nextents(&ip->i_af) >
|
||||
ip->i_nblocks || XFS_TEST_ERROR(mp, XFS_ERRTAG_IFLUSH_5)) {
|
||||
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
|
||||
"%s: detected corrupt incore inode %llu, "
|
||||
"total extents = %llu nblocks = %lld, ptr "PTR_FMT,
|
||||
@@ -2425,8 +2434,8 @@ xfs_iflush(
|
||||
ip->i_nblocks, ip);
|
||||
goto flush_out;
|
||||
}
|
||||
if (XFS_TEST_ERROR(ip->i_forkoff > mp->m_sb.sb_inodesize,
|
||||
mp, XFS_ERRTAG_IFLUSH_6)) {
|
||||
if (ip->i_forkoff > mp->m_sb.sb_inodesize ||
|
||||
XFS_TEST_ERROR(mp, XFS_ERRTAG_IFLUSH_6)) {
|
||||
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
|
||||
"%s: bad inode %llu, forkoff 0x%x, ptr "PTR_FMT,
|
||||
__func__, ip->i_ino, ip->i_forkoff, ip);
|
||||
@@ -2502,7 +2511,6 @@ flush_out:
|
||||
spin_lock(&iip->ili_lock);
|
||||
iip->ili_last_fields = iip->ili_fields;
|
||||
iip->ili_fields = 0;
|
||||
iip->ili_fsync_fields = 0;
|
||||
set_bit(XFS_LI_FLUSHING, &iip->ili_item.li_flags);
|
||||
spin_unlock(&iip->ili_lock);
|
||||
|
||||
@@ -2661,12 +2669,15 @@ int
|
||||
xfs_log_force_inode(
|
||||
struct xfs_inode *ip)
|
||||
{
|
||||
struct xfs_inode_log_item *iip = ip->i_itemp;
|
||||
xfs_csn_t seq = 0;
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_SHARED);
|
||||
if (xfs_ipincount(ip))
|
||||
seq = ip->i_itemp->ili_commit_seq;
|
||||
xfs_iunlock(ip, XFS_ILOCK_SHARED);
|
||||
if (!iip)
|
||||
return 0;
|
||||
|
||||
spin_lock(&iip->ili_lock);
|
||||
seq = iip->ili_commit_seq;
|
||||
spin_unlock(&iip->ili_lock);
|
||||
|
||||
if (!seq)
|
||||
return 0;
|
||||
|
||||
@@ -131,46 +131,28 @@ xfs_inode_item_precommit(
|
||||
}
|
||||
|
||||
/*
|
||||
* Inode verifiers do not check that the extent size hint is an integer
|
||||
* multiple of the rt extent size on a directory with both rtinherit
|
||||
* and extszinherit flags set. If we're logging a directory that is
|
||||
* misconfigured in this way, clear the hint.
|
||||
* Inode verifiers do not check that the extent size hints are an
|
||||
* integer multiple of the rt extent size on a directory with
|
||||
* rtinherit flags set. If we're logging a directory that is
|
||||
* misconfigured in this way, clear the bad hints.
|
||||
*/
|
||||
if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
|
||||
(ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
|
||||
xfs_extlen_to_rtxmod(ip->i_mount, ip->i_extsize) > 0) {
|
||||
ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
|
||||
XFS_DIFLAG_EXTSZINHERIT);
|
||||
ip->i_extsize = 0;
|
||||
flags |= XFS_ILOG_CORE;
|
||||
if (ip->i_diflags & XFS_DIFLAG_RTINHERIT) {
|
||||
if ((ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
|
||||
xfs_extlen_to_rtxmod(ip->i_mount, ip->i_extsize) > 0) {
|
||||
ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
|
||||
XFS_DIFLAG_EXTSZINHERIT);
|
||||
ip->i_extsize = 0;
|
||||
flags |= XFS_ILOG_CORE;
|
||||
}
|
||||
if ((ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) &&
|
||||
xfs_extlen_to_rtxmod(ip->i_mount, ip->i_cowextsize) > 0) {
|
||||
ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE;
|
||||
ip->i_cowextsize = 0;
|
||||
flags |= XFS_ILOG_CORE;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Record the specific change for fdatasync optimisation. This allows
|
||||
* fdatasync to skip log forces for inodes that are only timestamp
|
||||
* dirty. Once we've processed the XFS_ILOG_IVERSION flag, convert it
|
||||
* to XFS_ILOG_CORE so that the actual on-disk dirty tracking
|
||||
* (ili_fields) correctly tracks that the version has changed.
|
||||
*/
|
||||
spin_lock(&iip->ili_lock);
|
||||
iip->ili_fsync_fields |= (flags & ~XFS_ILOG_IVERSION);
|
||||
if (flags & XFS_ILOG_IVERSION)
|
||||
flags = ((flags & ~XFS_ILOG_IVERSION) | XFS_ILOG_CORE);
|
||||
|
||||
/*
|
||||
* Inode verifiers do not check that the CoW extent size hint is an
|
||||
* integer multiple of the rt extent size on a directory with both
|
||||
* rtinherit and cowextsize flags set. If we're logging a directory
|
||||
* that is misconfigured in this way, clear the hint.
|
||||
*/
|
||||
if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
|
||||
(ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) &&
|
||||
xfs_extlen_to_rtxmod(ip->i_mount, ip->i_cowextsize) > 0) {
|
||||
ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE;
|
||||
ip->i_cowextsize = 0;
|
||||
flags |= XFS_ILOG_CORE;
|
||||
}
|
||||
|
||||
if (!iip->ili_item.li_buf) {
|
||||
struct xfs_buf *bp;
|
||||
int error;
|
||||
@@ -204,6 +186,20 @@ xfs_inode_item_precommit(
|
||||
xfs_trans_brelse(tp, bp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Store the dirty flags back into the inode item as this state is used
|
||||
* later on in xfs_inode_item_committing() to determine whether the
|
||||
* transaction is relevant to fsync state or not.
|
||||
*/
|
||||
iip->ili_dirty_flags = flags;
|
||||
|
||||
/*
|
||||
* Convert the flags on-disk fields that have been modified in the
|
||||
* transaction so that ili_fields tracks the changes correctly.
|
||||
*/
|
||||
if (flags & XFS_ILOG_IVERSION)
|
||||
flags = ((flags & ~XFS_ILOG_IVERSION) | XFS_ILOG_CORE);
|
||||
|
||||
/*
|
||||
* Always OR in the bits from the ili_last_fields field. This is to
|
||||
* coordinate with the xfs_iflush() and xfs_buf_inode_iodone() routines
|
||||
@@ -214,12 +210,6 @@ xfs_inode_item_precommit(
|
||||
spin_unlock(&iip->ili_lock);
|
||||
|
||||
xfs_inode_item_precommit_check(ip);
|
||||
|
||||
/*
|
||||
* We are done with the log item transaction dirty state, so clear it so
|
||||
* that it doesn't pollute future transactions.
|
||||
*/
|
||||
iip->ili_dirty_flags = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -729,13 +719,24 @@ xfs_inode_item_unpin(
|
||||
struct xfs_log_item *lip,
|
||||
int remove)
|
||||
{
|
||||
struct xfs_inode *ip = INODE_ITEM(lip)->ili_inode;
|
||||
struct xfs_inode_log_item *iip = INODE_ITEM(lip);
|
||||
struct xfs_inode *ip = iip->ili_inode;
|
||||
|
||||
trace_xfs_inode_unpin(ip, _RET_IP_);
|
||||
ASSERT(lip->li_buf || xfs_iflags_test(ip, XFS_ISTALE));
|
||||
ASSERT(atomic_read(&ip->i_pincount) > 0);
|
||||
if (atomic_dec_and_test(&ip->i_pincount))
|
||||
|
||||
/*
|
||||
* If this is the last unpin, then the inode no longer needs a journal
|
||||
* flush to persist it. Hence we can clear the commit sequence numbers
|
||||
* as a fsync/fdatasync operation on the inode at this point is a no-op.
|
||||
*/
|
||||
if (atomic_dec_and_lock(&ip->i_pincount, &iip->ili_lock)) {
|
||||
iip->ili_commit_seq = 0;
|
||||
iip->ili_datasync_seq = 0;
|
||||
spin_unlock(&iip->ili_lock);
|
||||
wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT);
|
||||
}
|
||||
}
|
||||
|
||||
STATIC uint
|
||||
@@ -858,12 +859,45 @@ xfs_inode_item_committed(
|
||||
return lsn;
|
||||
}
|
||||
|
||||
/*
|
||||
* The modification is now complete, so before we unlock the inode we need to
|
||||
* update the commit sequence numbers for data integrity journal flushes. We
|
||||
* always record the commit sequence number (ili_commit_seq) so that anything
|
||||
* that needs a full journal sync will capture all of this modification.
|
||||
*
|
||||
* We then
|
||||
* check if the changes will impact a datasync (O_DSYNC) journal flush. If the
|
||||
* changes will require a datasync flush, then we also record the sequence in
|
||||
* ili_datasync_seq.
|
||||
*
|
||||
* These commit sequence numbers will get cleared atomically with the inode being
|
||||
* unpinned (i.e. pin count goes to zero), and so it will only be set when the
|
||||
* inode is dirty in the journal. This removes the need for checking if the
|
||||
* inode is pinned to determine if a journal flush is necessary, and hence
|
||||
* removes the need for holding the ILOCK_SHARED in xfs_file_fsync() to
|
||||
* serialise pin counts against commit sequence number updates.
|
||||
*
|
||||
*/
|
||||
STATIC void
|
||||
xfs_inode_item_committing(
|
||||
struct xfs_log_item *lip,
|
||||
xfs_csn_t seq)
|
||||
{
|
||||
INODE_ITEM(lip)->ili_commit_seq = seq;
|
||||
struct xfs_inode_log_item *iip = INODE_ITEM(lip);
|
||||
|
||||
spin_lock(&iip->ili_lock);
|
||||
iip->ili_commit_seq = seq;
|
||||
if (iip->ili_dirty_flags & ~(XFS_ILOG_IVERSION | XFS_ILOG_TIMESTAMP))
|
||||
iip->ili_datasync_seq = seq;
|
||||
spin_unlock(&iip->ili_lock);
|
||||
|
||||
/*
|
||||
* Clear the per-transaction dirty flags now that we have finished
|
||||
* recording the transaction's inode modifications in the CIL and are
|
||||
* about to release and (maybe) unlock the inode.
|
||||
*/
|
||||
iip->ili_dirty_flags = 0;
|
||||
|
||||
return xfs_inode_item_release(lip);
|
||||
}
|
||||
|
||||
@@ -1055,7 +1089,6 @@ xfs_iflush_abort_clean(
|
||||
{
|
||||
iip->ili_last_fields = 0;
|
||||
iip->ili_fields = 0;
|
||||
iip->ili_fsync_fields = 0;
|
||||
iip->ili_flush_lsn = 0;
|
||||
iip->ili_item.li_buf = NULL;
|
||||
list_del_init(&iip->ili_item.li_bio_list);
|
||||
|
||||
@@ -32,9 +32,17 @@ struct xfs_inode_log_item {
|
||||
spinlock_t ili_lock; /* flush state lock */
|
||||
unsigned int ili_last_fields; /* fields when flushed */
|
||||
unsigned int ili_fields; /* fields to be logged */
|
||||
unsigned int ili_fsync_fields; /* logged since last fsync */
|
||||
xfs_lsn_t ili_flush_lsn; /* lsn at last flush */
|
||||
|
||||
/*
|
||||
* We record the sequence number for every inode modification, as
|
||||
* well as those that only require fdatasync operations for data
|
||||
* integrity. This allows optimisation of the O_DSYNC/fdatasync path
|
||||
* without needing to track what modifications the journal is currently
|
||||
* carrying for the inode. These are protected by the above ili_lock.
|
||||
*/
|
||||
xfs_csn_t ili_commit_seq; /* last transaction commit */
|
||||
xfs_csn_t ili_datasync_seq; /* for datasync optimisation */
|
||||
};
|
||||
|
||||
static inline int xfs_inode_clean(struct xfs_inode *ip)
|
||||
|
||||
@@ -512,9 +512,6 @@ xfs_fileattr_get(
|
||||
{
|
||||
struct xfs_inode *ip = XFS_I(d_inode(dentry));
|
||||
|
||||
if (d_is_special(dentry))
|
||||
return -ENOTTY;
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_SHARED);
|
||||
xfs_fill_fsxattr(ip, XFS_DATA_FORK, fa);
|
||||
xfs_iunlock(ip, XFS_ILOCK_SHARED);
|
||||
@@ -736,9 +733,6 @@ xfs_fileattr_set(
|
||||
|
||||
trace_xfs_ioctl_setattr(ip);
|
||||
|
||||
if (d_is_special(dentry))
|
||||
return -ENOTTY;
|
||||
|
||||
if (!fa->fsx_valid) {
|
||||
if (fa->flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL |
|
||||
FS_NOATIME_FL | FS_NODUMP_FL |
|
||||
@@ -1209,21 +1203,21 @@ xfs_file_ioctl(
|
||||
current->comm);
|
||||
return -ENOTTY;
|
||||
case XFS_IOC_DIOINFO: {
|
||||
struct xfs_buftarg *target = xfs_inode_buftarg(ip);
|
||||
struct kstat st;
|
||||
struct dioattr da;
|
||||
|
||||
da.d_mem = target->bt_logical_sectorsize;
|
||||
error = vfs_getattr(&filp->f_path, &st, STATX_DIOALIGN, 0);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* See xfs_report_dioalign() for an explanation about why this
|
||||
* reports a value larger than the sector size for COW inodes.
|
||||
* Some userspace directly feeds the return value to
|
||||
* posix_memalign, which fails for values that are smaller than
|
||||
* the pointer size. Round up the value to not break userspace.
|
||||
*/
|
||||
if (xfs_is_cow_inode(ip))
|
||||
da.d_miniosz = xfs_inode_alloc_unitsize(ip);
|
||||
else
|
||||
da.d_miniosz = target->bt_logical_sectorsize;
|
||||
da.d_mem = roundup(st.dio_mem_align, sizeof(void *));
|
||||
da.d_miniosz = st.dio_offset_align;
|
||||
da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
|
||||
|
||||
if (copy_to_user(arg, &da, sizeof(da)))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
|
||||
@@ -149,9 +149,18 @@ xfs_bmbt_to_iomap(
|
||||
iomap->bdev = target->bt_bdev;
|
||||
iomap->flags = iomap_flags;
|
||||
|
||||
if (xfs_ipincount(ip) &&
|
||||
(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
|
||||
iomap->flags |= IOMAP_F_DIRTY;
|
||||
/*
|
||||
* If the inode is dirty for datasync purposes, let iomap know so it
|
||||
* doesn't elide the IO completion journal flushes on O_DSYNC IO.
|
||||
*/
|
||||
if (ip->i_itemp) {
|
||||
struct xfs_inode_log_item *iip = ip->i_itemp;
|
||||
|
||||
spin_lock(&iip->ili_lock);
|
||||
if (iip->ili_datasync_seq)
|
||||
iomap->flags |= IOMAP_F_DIRTY;
|
||||
spin_unlock(&iip->ili_lock);
|
||||
}
|
||||
|
||||
iomap->validity_cookie = sequence_cookie;
|
||||
return 0;
|
||||
@@ -1554,7 +1563,7 @@ xfs_zoned_buffered_write_iomap_begin(
|
||||
return error;
|
||||
|
||||
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(&ip->i_df)) ||
|
||||
XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
|
||||
XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) {
|
||||
xfs_bmap_mark_sick(ip, XFS_DATA_FORK);
|
||||
error = -EFSCORRUPTED;
|
||||
goto out_unlock;
|
||||
@@ -1728,7 +1737,7 @@ xfs_buffered_write_iomap_begin(
|
||||
return error;
|
||||
|
||||
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(&ip->i_df)) ||
|
||||
XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
|
||||
XFS_TEST_ERROR(mp, XFS_ERRTAG_BMAPIFORMAT)) {
|
||||
xfs_bmap_mark_sick(ip, XFS_DATA_FORK);
|
||||
error = -EFSCORRUPTED;
|
||||
goto out_unlock;
|
||||
|
||||
@@ -431,14 +431,12 @@ xfs_vn_symlink(
|
||||
struct dentry *dentry,
|
||||
const char *symname)
|
||||
{
|
||||
struct inode *inode;
|
||||
struct xfs_inode *cip = NULL;
|
||||
struct xfs_name name;
|
||||
int error;
|
||||
umode_t mode;
|
||||
struct inode *inode;
|
||||
struct xfs_inode *cip = NULL;
|
||||
struct xfs_name name;
|
||||
int error;
|
||||
umode_t mode = S_IFLNK | S_IRWXUGO;
|
||||
|
||||
mode = S_IFLNK |
|
||||
(irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
|
||||
error = xfs_dentry_mode_to_name(&name, dentry, mode);
|
||||
if (unlikely(error))
|
||||
goto out;
|
||||
@@ -1335,6 +1333,8 @@ static const struct inode_operations xfs_symlink_inode_operations = {
|
||||
.setattr = xfs_vn_setattr,
|
||||
.listxattr = xfs_vn_listxattr,
|
||||
.update_time = xfs_vn_update_time,
|
||||
.fileattr_get = xfs_fileattr_get,
|
||||
.fileattr_set = xfs_fileattr_set,
|
||||
};
|
||||
|
||||
/* Figure out if this file actually supports DAX. */
|
||||
|
||||
@@ -89,8 +89,6 @@ typedef __u32 xfs_nlink_t;
|
||||
#undef XFS_NATIVE_HOST
|
||||
#endif
|
||||
|
||||
#define irix_sgid_inherit xfs_params.sgid_inherit.val
|
||||
#define irix_symlink_mode xfs_params.symlink_mode.val
|
||||
#define xfs_panic_mask xfs_params.panic_mask.val
|
||||
#define xfs_error_level xfs_params.error_level.val
|
||||
#define xfs_syncd_centisecs xfs_params.syncd_timer.val
|
||||
|
||||
@@ -969,8 +969,8 @@ xfs_log_unmount_write(
|
||||
* counters will be recalculated. Refer to xlog_check_unmount_rec for
|
||||
* more details.
|
||||
*/
|
||||
if (XFS_TEST_ERROR(xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS), mp,
|
||||
XFS_ERRTAG_FORCE_SUMMARY_RECALC)) {
|
||||
if (xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS) ||
|
||||
XFS_TEST_ERROR(mp, XFS_ERRTAG_FORCE_SUMMARY_RECALC)) {
|
||||
xfs_alert(mp, "%s: will fix summary counters at next mount",
|
||||
__func__);
|
||||
return;
|
||||
@@ -1240,7 +1240,7 @@ xlog_ioend_work(
|
||||
/*
|
||||
* Race to shutdown the filesystem if we see an error.
|
||||
*/
|
||||
if (XFS_TEST_ERROR(error, log->l_mp, XFS_ERRTAG_IODONE_IOERR)) {
|
||||
if (error || XFS_TEST_ERROR(log->l_mp, XFS_ERRTAG_IODONE_IOERR)) {
|
||||
xfs_alert(log->l_mp, "log I/O error %d", error);
|
||||
xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
|
||||
}
|
||||
@@ -1567,13 +1567,13 @@ xlog_cksum(
|
||||
struct xlog *log,
|
||||
struct xlog_rec_header *rhead,
|
||||
char *dp,
|
||||
int size)
|
||||
unsigned int hdrsize,
|
||||
unsigned int size)
|
||||
{
|
||||
uint32_t crc;
|
||||
|
||||
/* first generate the crc for the record header ... */
|
||||
crc = xfs_start_cksum_update((char *)rhead,
|
||||
sizeof(struct xlog_rec_header),
|
||||
crc = xfs_start_cksum_update((char *)rhead, hdrsize,
|
||||
offsetof(struct xlog_rec_header, h_crc));
|
||||
|
||||
/* ... then for additional cycle data for v2 logs ... */
|
||||
@@ -1817,7 +1817,7 @@ xlog_sync(
|
||||
|
||||
/* calculcate the checksum */
|
||||
iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header,
|
||||
iclog->ic_datap, size);
|
||||
iclog->ic_datap, XLOG_REC_SIZE, size);
|
||||
/*
|
||||
* Intentionally corrupt the log record CRC based on the error injection
|
||||
* frequency, if defined. This facilitates testing log recovery in the
|
||||
@@ -1826,7 +1826,7 @@ xlog_sync(
|
||||
* detects the bad CRC and attempts to recover.
|
||||
*/
|
||||
#ifdef DEBUG
|
||||
if (XFS_TEST_ERROR(false, log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) {
|
||||
if (XFS_TEST_ERROR(log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) {
|
||||
iclog->ic_header.h_crc &= cpu_to_le32(0xAAAAAAAA);
|
||||
iclog->ic_fail_crc = true;
|
||||
xfs_warn(log->l_mp,
|
||||
@@ -2655,10 +2655,11 @@ restart:
|
||||
* until you know exactly how many bytes get copied. Therefore, wait
|
||||
* until later to update ic_offset.
|
||||
*
|
||||
* xlog_write() algorithm assumes that at least 2 xlog_op_header_t's
|
||||
* xlog_write() algorithm assumes that at least 2 xlog_op_header's
|
||||
* can fit into remaining data section.
|
||||
*/
|
||||
if (iclog->ic_size - iclog->ic_offset < 2*sizeof(xlog_op_header_t)) {
|
||||
if (iclog->ic_size - iclog->ic_offset <
|
||||
2 * sizeof(struct xlog_op_header)) {
|
||||
int error = 0;
|
||||
|
||||
xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
|
||||
@@ -3152,11 +3153,11 @@ xlog_calc_unit_res(
|
||||
*/
|
||||
|
||||
/* for trans header */
|
||||
unit_bytes += sizeof(xlog_op_header_t);
|
||||
unit_bytes += sizeof(xfs_trans_header_t);
|
||||
unit_bytes += sizeof(struct xlog_op_header);
|
||||
unit_bytes += sizeof(struct xfs_trans_header);
|
||||
|
||||
/* for start-rec */
|
||||
unit_bytes += sizeof(xlog_op_header_t);
|
||||
unit_bytes += sizeof(struct xlog_op_header);
|
||||
|
||||
/*
|
||||
* for LR headers - the space for data in an iclog is the size minus
|
||||
@@ -3179,12 +3180,12 @@ xlog_calc_unit_res(
|
||||
num_headers = howmany(unit_bytes, iclog_space);
|
||||
|
||||
/* for split-recs - ophdrs added when data split over LRs */
|
||||
unit_bytes += sizeof(xlog_op_header_t) * num_headers;
|
||||
unit_bytes += sizeof(struct xlog_op_header) * num_headers;
|
||||
|
||||
/* add extra header reservations if we overrun */
|
||||
while (!num_headers ||
|
||||
howmany(unit_bytes, iclog_space) > num_headers) {
|
||||
unit_bytes += sizeof(xlog_op_header_t);
|
||||
unit_bytes += sizeof(struct xlog_op_header);
|
||||
num_headers++;
|
||||
}
|
||||
unit_bytes += log->l_iclog_hsize * num_headers;
|
||||
@@ -3321,7 +3322,7 @@ xlog_verify_iclog(
|
||||
struct xlog_in_core *iclog,
|
||||
int count)
|
||||
{
|
||||
xlog_op_header_t *ophead;
|
||||
struct xlog_op_header *ophead;
|
||||
xlog_in_core_t *icptr;
|
||||
xlog_in_core_2_t *xhdr;
|
||||
void *base_ptr, *ptr, *p;
|
||||
@@ -3399,7 +3400,7 @@ xlog_verify_iclog(
|
||||
op_len = be32_to_cpu(iclog->ic_header.h_cycle_data[idx]);
|
||||
}
|
||||
}
|
||||
ptr += sizeof(xlog_op_header_t) + op_len;
|
||||
ptr += sizeof(struct xlog_op_header) + op_len;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -20,6 +20,43 @@ struct xfs_log_vec {
|
||||
int lv_alloc_size; /* size of allocated lv */
|
||||
};
|
||||
|
||||
/* Region types for iovec's i_type */
|
||||
#define XLOG_REG_TYPE_BFORMAT 1
|
||||
#define XLOG_REG_TYPE_BCHUNK 2
|
||||
#define XLOG_REG_TYPE_EFI_FORMAT 3
|
||||
#define XLOG_REG_TYPE_EFD_FORMAT 4
|
||||
#define XLOG_REG_TYPE_IFORMAT 5
|
||||
#define XLOG_REG_TYPE_ICORE 6
|
||||
#define XLOG_REG_TYPE_IEXT 7
|
||||
#define XLOG_REG_TYPE_IBROOT 8
|
||||
#define XLOG_REG_TYPE_ILOCAL 9
|
||||
#define XLOG_REG_TYPE_IATTR_EXT 10
|
||||
#define XLOG_REG_TYPE_IATTR_BROOT 11
|
||||
#define XLOG_REG_TYPE_IATTR_LOCAL 12
|
||||
#define XLOG_REG_TYPE_QFORMAT 13
|
||||
#define XLOG_REG_TYPE_DQUOT 14
|
||||
#define XLOG_REG_TYPE_QUOTAOFF 15
|
||||
#define XLOG_REG_TYPE_LRHEADER 16
|
||||
#define XLOG_REG_TYPE_UNMOUNT 17
|
||||
#define XLOG_REG_TYPE_COMMIT 18
|
||||
#define XLOG_REG_TYPE_TRANSHDR 19
|
||||
#define XLOG_REG_TYPE_ICREATE 20
|
||||
#define XLOG_REG_TYPE_RUI_FORMAT 21
|
||||
#define XLOG_REG_TYPE_RUD_FORMAT 22
|
||||
#define XLOG_REG_TYPE_CUI_FORMAT 23
|
||||
#define XLOG_REG_TYPE_CUD_FORMAT 24
|
||||
#define XLOG_REG_TYPE_BUI_FORMAT 25
|
||||
#define XLOG_REG_TYPE_BUD_FORMAT 26
|
||||
#define XLOG_REG_TYPE_ATTRI_FORMAT 27
|
||||
#define XLOG_REG_TYPE_ATTRD_FORMAT 28
|
||||
#define XLOG_REG_TYPE_ATTR_NAME 29
|
||||
#define XLOG_REG_TYPE_ATTR_VALUE 30
|
||||
#define XLOG_REG_TYPE_XMI_FORMAT 31
|
||||
#define XLOG_REG_TYPE_XMD_FORMAT 32
|
||||
#define XLOG_REG_TYPE_ATTR_NEWNAME 33
|
||||
#define XLOG_REG_TYPE_ATTR_NEWVALUE 34
|
||||
#define XLOG_REG_TYPE_MAX 34
|
||||
|
||||
#define XFS_LOG_VEC_ORDERED (-1)
|
||||
|
||||
/*
|
||||
|
||||
@@ -499,8 +499,8 @@ xlog_recover_finish(
|
||||
extern void
|
||||
xlog_recover_cancel(struct xlog *);
|
||||
|
||||
extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead,
|
||||
char *dp, int size);
|
||||
__le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead,
|
||||
char *dp, unsigned int hdrsize, unsigned int size);
|
||||
|
||||
extern struct kmem_cache *xfs_log_ticket_cache;
|
||||
struct xlog_ticket *xlog_ticket_alloc(struct xlog *log, int unit_bytes,
|
||||
|
||||
@@ -2894,20 +2894,34 @@ xlog_recover_process(
|
||||
int pass,
|
||||
struct list_head *buffer_list)
|
||||
{
|
||||
__le32 old_crc = rhead->h_crc;
|
||||
__le32 crc;
|
||||
__le32 expected_crc = rhead->h_crc, crc, other_crc;
|
||||
|
||||
crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len));
|
||||
crc = xlog_cksum(log, rhead, dp, XLOG_REC_SIZE,
|
||||
be32_to_cpu(rhead->h_len));
|
||||
|
||||
/*
|
||||
* Look at the end of the struct xlog_rec_header definition in
|
||||
* xfs_log_format.h for the glory details.
|
||||
*/
|
||||
if (expected_crc && crc != expected_crc) {
|
||||
other_crc = xlog_cksum(log, rhead, dp, XLOG_REC_SIZE_OTHER,
|
||||
be32_to_cpu(rhead->h_len));
|
||||
if (other_crc == expected_crc) {
|
||||
xfs_notice_once(log->l_mp,
|
||||
"Fixing up incorrect CRC due to padding.");
|
||||
crc = other_crc;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Nothing else to do if this is a CRC verification pass. Just return
|
||||
* if this a record with a non-zero crc. Unfortunately, mkfs always
|
||||
* sets old_crc to 0 so we must consider this valid even on v5 supers.
|
||||
* Otherwise, return EFSBADCRC on failure so the callers up the stack
|
||||
* know precisely what failed.
|
||||
* sets expected_crc to 0 so we must consider this valid even on v5
|
||||
* supers. Otherwise, return EFSBADCRC on failure so the callers up the
|
||||
* stack know precisely what failed.
|
||||
*/
|
||||
if (pass == XLOG_RECOVER_CRCPASS) {
|
||||
if (old_crc && crc != old_crc)
|
||||
if (expected_crc && crc != expected_crc)
|
||||
return -EFSBADCRC;
|
||||
return 0;
|
||||
}
|
||||
@@ -2918,11 +2932,11 @@ xlog_recover_process(
|
||||
* zero CRC check prevents warnings from being emitted when upgrading
|
||||
* the kernel from one that does not add CRCs by default.
|
||||
*/
|
||||
if (crc != old_crc) {
|
||||
if (old_crc || xfs_has_crc(log->l_mp)) {
|
||||
if (crc != expected_crc) {
|
||||
if (expected_crc || xfs_has_crc(log->l_mp)) {
|
||||
xfs_alert(log->l_mp,
|
||||
"log record CRC mismatch: found 0x%x, expected 0x%x.",
|
||||
le32_to_cpu(old_crc),
|
||||
le32_to_cpu(expected_crc),
|
||||
le32_to_cpu(crc));
|
||||
xfs_hex_dump(dp, 32);
|
||||
}
|
||||
|
||||
@@ -1057,19 +1057,6 @@ xfs_mountfs(
|
||||
xfs_inodegc_start(mp);
|
||||
xfs_blockgc_start(mp);
|
||||
|
||||
/*
|
||||
* Now that we've recovered any pending superblock feature bit
|
||||
* additions, we can finish setting up the attr2 behaviour for the
|
||||
* mount. The noattr2 option overrides the superblock flag, so only
|
||||
* check the superblock feature flag if the mount option is not set.
|
||||
*/
|
||||
if (xfs_has_noattr2(mp)) {
|
||||
mp->m_features &= ~XFS_FEAT_ATTR2;
|
||||
} else if (!xfs_has_attr2(mp) &&
|
||||
(mp->m_sb.sb_features2 & XFS_SB_VERSION2_ATTR2BIT)) {
|
||||
mp->m_features |= XFS_FEAT_ATTR2;
|
||||
}
|
||||
|
||||
if (xfs_has_metadir(mp)) {
|
||||
error = xfs_mount_setup_metadir(mp);
|
||||
if (error)
|
||||
|
||||
@@ -363,7 +363,6 @@ typedef struct xfs_mount {
|
||||
#define XFS_FEAT_EXTFLG (1ULL << 7) /* unwritten extents */
|
||||
#define XFS_FEAT_ASCIICI (1ULL << 8) /* ASCII only case-insens. */
|
||||
#define XFS_FEAT_LAZYSBCOUNT (1ULL << 9) /* Superblk counters */
|
||||
#define XFS_FEAT_ATTR2 (1ULL << 10) /* dynamic attr fork */
|
||||
#define XFS_FEAT_PARENT (1ULL << 11) /* parent pointers */
|
||||
#define XFS_FEAT_PROJID32 (1ULL << 12) /* 32 bit project id */
|
||||
#define XFS_FEAT_CRC (1ULL << 13) /* metadata CRCs */
|
||||
@@ -386,7 +385,6 @@ typedef struct xfs_mount {
|
||||
|
||||
/* Mount features */
|
||||
#define XFS_FEAT_NOLIFETIME (1ULL << 47) /* disable lifetime hints */
|
||||
#define XFS_FEAT_NOATTR2 (1ULL << 48) /* disable attr2 creation */
|
||||
#define XFS_FEAT_NOALIGN (1ULL << 49) /* ignore alignment */
|
||||
#define XFS_FEAT_ALLOCSIZE (1ULL << 50) /* user specified allocation size */
|
||||
#define XFS_FEAT_LARGE_IOSIZE (1ULL << 51) /* report large preferred
|
||||
@@ -396,7 +394,6 @@ typedef struct xfs_mount {
|
||||
#define XFS_FEAT_DISCARD (1ULL << 54) /* discard unused blocks */
|
||||
#define XFS_FEAT_GRPID (1ULL << 55) /* group-ID assigned from directory */
|
||||
#define XFS_FEAT_SMALL_INUMS (1ULL << 56) /* user wants 32bit inodes */
|
||||
#define XFS_FEAT_IKEEP (1ULL << 57) /* keep empty inode clusters*/
|
||||
#define XFS_FEAT_SWALLOC (1ULL << 58) /* stripe width allocation */
|
||||
#define XFS_FEAT_FILESTREAMS (1ULL << 59) /* use filestreams allocator */
|
||||
#define XFS_FEAT_DAX_ALWAYS (1ULL << 60) /* DAX always enabled */
|
||||
@@ -504,12 +501,17 @@ __XFS_HAS_V4_FEAT(align, ALIGN)
|
||||
__XFS_HAS_V4_FEAT(logv2, LOGV2)
|
||||
__XFS_HAS_V4_FEAT(extflg, EXTFLG)
|
||||
__XFS_HAS_V4_FEAT(lazysbcount, LAZYSBCOUNT)
|
||||
__XFS_ADD_V4_FEAT(attr2, ATTR2)
|
||||
__XFS_ADD_V4_FEAT(projid32, PROJID32)
|
||||
__XFS_HAS_V4_FEAT(v3inodes, V3INODES)
|
||||
__XFS_HAS_V4_FEAT(crc, CRC)
|
||||
__XFS_HAS_V4_FEAT(pquotino, PQUOTINO)
|
||||
|
||||
static inline void xfs_add_attr2(struct xfs_mount *mp)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_XFS_SUPPORT_V4))
|
||||
xfs_sb_version_addattr2(&mp->m_sb);
|
||||
}
|
||||
|
||||
/*
|
||||
* Mount features
|
||||
*
|
||||
@@ -517,7 +519,6 @@ __XFS_HAS_V4_FEAT(pquotino, PQUOTINO)
|
||||
* bit inodes and read-only state, are kept as operational state rather than
|
||||
* features.
|
||||
*/
|
||||
__XFS_HAS_FEAT(noattr2, NOATTR2)
|
||||
__XFS_HAS_FEAT(noalign, NOALIGN)
|
||||
__XFS_HAS_FEAT(allocsize, ALLOCSIZE)
|
||||
__XFS_HAS_FEAT(large_iosize, LARGE_IOSIZE)
|
||||
@@ -526,7 +527,6 @@ __XFS_HAS_FEAT(dirsync, DIRSYNC)
|
||||
__XFS_HAS_FEAT(discard, DISCARD)
|
||||
__XFS_HAS_FEAT(grpid, GRPID)
|
||||
__XFS_HAS_FEAT(small_inums, SMALL_INUMS)
|
||||
__XFS_HAS_FEAT(ikeep, IKEEP)
|
||||
__XFS_HAS_FEAT(swalloc, SWALLOC)
|
||||
__XFS_HAS_FEAT(filestreams, FILESTREAMS)
|
||||
__XFS_HAS_FEAT(dax_always, DAX_ALWAYS)
|
||||
|
||||
@@ -165,7 +165,7 @@ xfs_dax_translate_range(
|
||||
uint64_t *bblen)
|
||||
{
|
||||
u64 dev_start = btp->bt_dax_part_off;
|
||||
u64 dev_len = bdev_nr_bytes(btp->bt_bdev);
|
||||
u64 dev_len = BBTOB(btp->bt_nr_sectors);
|
||||
u64 dev_end = dev_start + dev_len - 1;
|
||||
|
||||
/* Notify failure on the whole device. */
|
||||
|
||||
@@ -105,8 +105,8 @@ enum {
|
||||
Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev,
|
||||
Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid,
|
||||
Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups,
|
||||
Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_ikeep,
|
||||
Opt_noikeep, Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2,
|
||||
Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32,
|
||||
Opt_largeio, Opt_nolargeio,
|
||||
Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota,
|
||||
Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
|
||||
Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
|
||||
@@ -133,12 +133,8 @@ static const struct fs_parameter_spec xfs_fs_parameters[] = {
|
||||
fsparam_flag("norecovery", Opt_norecovery),
|
||||
fsparam_flag("inode64", Opt_inode64),
|
||||
fsparam_flag("inode32", Opt_inode32),
|
||||
fsparam_flag("ikeep", Opt_ikeep),
|
||||
fsparam_flag("noikeep", Opt_noikeep),
|
||||
fsparam_flag("largeio", Opt_largeio),
|
||||
fsparam_flag("nolargeio", Opt_nolargeio),
|
||||
fsparam_flag("attr2", Opt_attr2),
|
||||
fsparam_flag("noattr2", Opt_noattr2),
|
||||
fsparam_flag("filestreams", Opt_filestreams),
|
||||
fsparam_flag("quota", Opt_quota),
|
||||
fsparam_flag("noquota", Opt_noquota),
|
||||
@@ -175,13 +171,11 @@ xfs_fs_show_options(
|
||||
{
|
||||
static struct proc_xfs_info xfs_info_set[] = {
|
||||
/* the few simple ones we can get from the mount struct */
|
||||
{ XFS_FEAT_IKEEP, ",ikeep" },
|
||||
{ XFS_FEAT_WSYNC, ",wsync" },
|
||||
{ XFS_FEAT_NOALIGN, ",noalign" },
|
||||
{ XFS_FEAT_SWALLOC, ",swalloc" },
|
||||
{ XFS_FEAT_NOUUID, ",nouuid" },
|
||||
{ XFS_FEAT_NORECOVERY, ",norecovery" },
|
||||
{ XFS_FEAT_ATTR2, ",attr2" },
|
||||
{ XFS_FEAT_FILESTREAMS, ",filestreams" },
|
||||
{ XFS_FEAT_GRPID, ",grpid" },
|
||||
{ XFS_FEAT_DISCARD, ",discard" },
|
||||
@@ -541,7 +535,8 @@ xfs_setup_devices(
|
||||
{
|
||||
int error;
|
||||
|
||||
error = xfs_configure_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
|
||||
error = xfs_configure_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize,
|
||||
mp->m_sb.sb_dblocks);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
@@ -551,7 +546,7 @@ xfs_setup_devices(
|
||||
if (xfs_has_sector(mp))
|
||||
log_sector_size = mp->m_sb.sb_logsectsize;
|
||||
error = xfs_configure_buftarg(mp->m_logdev_targp,
|
||||
log_sector_size);
|
||||
log_sector_size, mp->m_sb.sb_logblocks);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
@@ -565,7 +560,7 @@ xfs_setup_devices(
|
||||
mp->m_rtdev_targp = mp->m_ddev_targp;
|
||||
} else if (mp->m_rtname) {
|
||||
error = xfs_configure_buftarg(mp->m_rtdev_targp,
|
||||
mp->m_sb.sb_sectsize);
|
||||
mp->m_sb.sb_sectsize, mp->m_sb.sb_rblocks);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
@@ -1088,15 +1083,6 @@ xfs_finish_flags(
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* V5 filesystems always use attr2 format for attributes.
|
||||
*/
|
||||
if (xfs_has_crc(mp) && xfs_has_noattr2(mp)) {
|
||||
xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. "
|
||||
"attr2 is always enabled for V5 filesystems.");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* prohibit r/w mounts of read-only filesystems
|
||||
*/
|
||||
@@ -1543,22 +1529,6 @@ xfs_fs_parse_param(
|
||||
return 0;
|
||||
#endif
|
||||
/* Following mount options will be removed in September 2025 */
|
||||
case Opt_ikeep:
|
||||
xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, true);
|
||||
parsing_mp->m_features |= XFS_FEAT_IKEEP;
|
||||
return 0;
|
||||
case Opt_noikeep:
|
||||
xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, false);
|
||||
parsing_mp->m_features &= ~XFS_FEAT_IKEEP;
|
||||
return 0;
|
||||
case Opt_attr2:
|
||||
xfs_fs_warn_deprecated(fc, param, XFS_FEAT_ATTR2, true);
|
||||
parsing_mp->m_features |= XFS_FEAT_ATTR2;
|
||||
return 0;
|
||||
case Opt_noattr2:
|
||||
xfs_fs_warn_deprecated(fc, param, XFS_FEAT_NOATTR2, true);
|
||||
parsing_mp->m_features |= XFS_FEAT_NOATTR2;
|
||||
return 0;
|
||||
case Opt_max_open_zones:
|
||||
parsing_mp->m_max_open_zones = result.uint_32;
|
||||
return 0;
|
||||
@@ -1594,16 +1564,6 @@ xfs_fs_validate_params(
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* We have not read the superblock at this point, so only the attr2
|
||||
* mount option can set the attr2 feature by this stage.
|
||||
*/
|
||||
if (xfs_has_attr2(mp) && xfs_has_noattr2(mp)) {
|
||||
xfs_warn(mp, "attr2 and noattr2 cannot both be specified.");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
||||
if (xfs_has_noalign(mp) && (mp->m_dalign || mp->m_swidth)) {
|
||||
xfs_warn(mp,
|
||||
"sunit and swidth options incompatible with the noalign option");
|
||||
@@ -2178,21 +2138,6 @@ xfs_fs_reconfigure(
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* attr2 -> noattr2 */
|
||||
if (xfs_has_noattr2(new_mp)) {
|
||||
if (xfs_has_crc(mp)) {
|
||||
xfs_warn(mp,
|
||||
"attr2 is always enabled for a V5 filesystem - can't be changed.");
|
||||
return -EINVAL;
|
||||
}
|
||||
mp->m_features &= ~XFS_FEAT_ATTR2;
|
||||
mp->m_features |= XFS_FEAT_NOATTR2;
|
||||
} else if (xfs_has_attr2(new_mp)) {
|
||||
/* noattr2 -> attr2 */
|
||||
mp->m_features &= ~XFS_FEAT_NOATTR2;
|
||||
mp->m_features |= XFS_FEAT_ATTR2;
|
||||
}
|
||||
|
||||
/* Validate new max_atomic_write option before making other changes */
|
||||
if (mp->m_awu_max_bytes != new_mp->m_awu_max_bytes) {
|
||||
error = xfs_set_max_atomic_write_opt(mp,
|
||||
|
||||
@@ -50,7 +50,7 @@ xfs_panic_mask_proc_handler(
|
||||
}
|
||||
#endif /* CONFIG_PROC_FS */
|
||||
|
||||
STATIC int
|
||||
static inline int
|
||||
xfs_deprecated_dointvec_minmax(
|
||||
const struct ctl_table *ctl,
|
||||
int write,
|
||||
@@ -67,24 +67,6 @@ xfs_deprecated_dointvec_minmax(
|
||||
}
|
||||
|
||||
static const struct ctl_table xfs_table[] = {
|
||||
{
|
||||
.procname = "irix_sgid_inherit",
|
||||
.data = &xfs_params.sgid_inherit.val,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = xfs_deprecated_dointvec_minmax,
|
||||
.extra1 = &xfs_params.sgid_inherit.min,
|
||||
.extra2 = &xfs_params.sgid_inherit.max
|
||||
},
|
||||
{
|
||||
.procname = "irix_symlink_mode",
|
||||
.data = &xfs_params.symlink_mode.val,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = xfs_deprecated_dointvec_minmax,
|
||||
.extra1 = &xfs_params.symlink_mode.min,
|
||||
.extra2 = &xfs_params.symlink_mode.max
|
||||
},
|
||||
{
|
||||
.procname = "panic_mask",
|
||||
.data = &xfs_params.panic_mask.val,
|
||||
@@ -185,15 +167,6 @@ static const struct ctl_table xfs_table[] = {
|
||||
.extra1 = &xfs_params.blockgc_timer.min,
|
||||
.extra2 = &xfs_params.blockgc_timer.max,
|
||||
},
|
||||
{
|
||||
.procname = "speculative_cow_prealloc_lifetime",
|
||||
.data = &xfs_params.blockgc_timer.val,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = xfs_deprecated_dointvec_minmax,
|
||||
.extra1 = &xfs_params.blockgc_timer.min,
|
||||
.extra2 = &xfs_params.blockgc_timer.max,
|
||||
},
|
||||
/* please keep this the last entry */
|
||||
#ifdef CONFIG_PROC_FS
|
||||
{
|
||||
|
||||
@@ -19,9 +19,6 @@ typedef struct xfs_sysctl_val {
|
||||
} xfs_sysctl_val_t;
|
||||
|
||||
typedef struct xfs_param {
|
||||
xfs_sysctl_val_t sgid_inherit; /* Inherit S_ISGID if process' GID is
|
||||
* not a member of parent dir GID. */
|
||||
xfs_sysctl_val_t symlink_mode; /* Link creat mode affected by umask */
|
||||
xfs_sysctl_val_t panic_mask; /* bitmask to cause panic on errors. */
|
||||
xfs_sysctl_val_t error_level; /* Degree of reporting for problems */
|
||||
xfs_sysctl_val_t syncd_timer; /* Interval between xfssyncd wakeups */
|
||||
|
||||
@@ -452,19 +452,17 @@ xfs_trans_mod_sb(
|
||||
*/
|
||||
STATIC void
|
||||
xfs_trans_apply_sb_deltas(
|
||||
xfs_trans_t *tp)
|
||||
struct xfs_trans *tp)
|
||||
{
|
||||
struct xfs_dsb *sbp;
|
||||
struct xfs_buf *bp;
|
||||
int whole = 0;
|
||||
|
||||
bp = xfs_trans_getsb(tp);
|
||||
sbp = bp->b_addr;
|
||||
struct xfs_mount *mp = tp->t_mountp;
|
||||
struct xfs_buf *bp = xfs_trans_getsb(tp);
|
||||
struct xfs_dsb *sbp = bp->b_addr;
|
||||
int whole = 0;
|
||||
|
||||
/*
|
||||
* Only update the superblock counters if we are logging them
|
||||
*/
|
||||
if (!xfs_has_lazysbcount((tp->t_mountp))) {
|
||||
if (!xfs_has_lazysbcount(mp)) {
|
||||
if (tp->t_icount_delta)
|
||||
be64_add_cpu(&sbp->sb_icount, tp->t_icount_delta);
|
||||
if (tp->t_ifree_delta)
|
||||
@@ -491,8 +489,7 @@ xfs_trans_apply_sb_deltas(
|
||||
* write the correct value ondisk.
|
||||
*/
|
||||
if ((tp->t_frextents_delta || tp->t_res_frextents_delta) &&
|
||||
!xfs_has_rtgroups(tp->t_mountp)) {
|
||||
struct xfs_mount *mp = tp->t_mountp;
|
||||
!xfs_has_rtgroups(mp)) {
|
||||
int64_t rtxdelta;
|
||||
|
||||
rtxdelta = tp->t_frextents_delta + tp->t_res_frextents_delta;
|
||||
@@ -505,6 +502,8 @@ xfs_trans_apply_sb_deltas(
|
||||
|
||||
if (tp->t_dblocks_delta) {
|
||||
be64_add_cpu(&sbp->sb_dblocks, tp->t_dblocks_delta);
|
||||
mp->m_ddev_targp->bt_nr_sectors +=
|
||||
XFS_FSB_TO_BB(mp, tp->t_dblocks_delta);
|
||||
whole = 1;
|
||||
}
|
||||
if (tp->t_agcount_delta) {
|
||||
@@ -524,7 +523,7 @@ xfs_trans_apply_sb_deltas(
|
||||
* recompute the ondisk rtgroup block log. The incore values
|
||||
* will be recomputed in xfs_trans_unreserve_and_mod_sb.
|
||||
*/
|
||||
if (xfs_has_rtgroups(tp->t_mountp)) {
|
||||
if (xfs_has_rtgroups(mp)) {
|
||||
sbp->sb_rgblklog = xfs_compute_rgblklog(
|
||||
be32_to_cpu(sbp->sb_rgextents),
|
||||
be32_to_cpu(sbp->sb_rextsize));
|
||||
@@ -537,6 +536,8 @@ xfs_trans_apply_sb_deltas(
|
||||
}
|
||||
if (tp->t_rblocks_delta) {
|
||||
be64_add_cpu(&sbp->sb_rblocks, tp->t_rblocks_delta);
|
||||
mp->m_rtdev_targp->bt_nr_sectors +=
|
||||
XFS_FSB_TO_BB(mp, tp->t_rblocks_delta);
|
||||
whole = 1;
|
||||
}
|
||||
if (tp->t_rextents_delta) {
|
||||
|
||||
@@ -374,7 +374,7 @@ xfsaild_push_item(
|
||||
* If log item pinning is enabled, skip the push and track the item as
|
||||
* pinned. This can help induce head-behind-tail conditions.
|
||||
*/
|
||||
if (XFS_TEST_ERROR(false, ailp->ail_log->l_mp, XFS_ERRTAG_LOG_ITEM_PIN))
|
||||
if (XFS_TEST_ERROR(ailp->ail_log->l_mp, XFS_ERRTAG_LOG_ITEM_PIN))
|
||||
return XFS_ITEM_PINNED;
|
||||
|
||||
/*
|
||||
|
||||
@@ -493,64 +493,58 @@ xfs_try_open_zone(
|
||||
return oz;
|
||||
}
|
||||
|
||||
/*
|
||||
* For data with short or medium lifetime, try to colocated it into an
|
||||
* already open zone with a matching temperature.
|
||||
*/
|
||||
static bool
|
||||
xfs_colocate_eagerly(
|
||||
enum rw_hint file_hint)
|
||||
{
|
||||
switch (file_hint) {
|
||||
case WRITE_LIFE_MEDIUM:
|
||||
case WRITE_LIFE_SHORT:
|
||||
case WRITE_LIFE_NONE:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
enum xfs_zone_alloc_score {
|
||||
/* Any open zone will do it, we're desperate */
|
||||
XFS_ZONE_ALLOC_ANY = 0,
|
||||
|
||||
static bool
|
||||
xfs_good_hint_match(
|
||||
struct xfs_open_zone *oz,
|
||||
enum rw_hint file_hint)
|
||||
{
|
||||
switch (oz->oz_write_hint) {
|
||||
case WRITE_LIFE_LONG:
|
||||
case WRITE_LIFE_EXTREME:
|
||||
/* colocate long and extreme */
|
||||
if (file_hint == WRITE_LIFE_LONG ||
|
||||
file_hint == WRITE_LIFE_EXTREME)
|
||||
return true;
|
||||
break;
|
||||
case WRITE_LIFE_MEDIUM:
|
||||
/* colocate medium with medium */
|
||||
if (file_hint == WRITE_LIFE_MEDIUM)
|
||||
return true;
|
||||
break;
|
||||
case WRITE_LIFE_SHORT:
|
||||
case WRITE_LIFE_NONE:
|
||||
case WRITE_LIFE_NOT_SET:
|
||||
/* colocate short and none */
|
||||
if (file_hint <= WRITE_LIFE_SHORT)
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
/* It better fit somehow */
|
||||
XFS_ZONE_ALLOC_OK = 1,
|
||||
|
||||
/* Only reuse a zone if it fits really well. */
|
||||
XFS_ZONE_ALLOC_GOOD = 2,
|
||||
};
|
||||
|
||||
/*
|
||||
* Life time hint co-location matrix. Fields not set default to 0
|
||||
* aka XFS_ZONE_ALLOC_ANY.
|
||||
*/
|
||||
static const unsigned int
|
||||
xfs_zoned_hint_score[WRITE_LIFE_HINT_NR][WRITE_LIFE_HINT_NR] = {
|
||||
[WRITE_LIFE_NOT_SET] = {
|
||||
[WRITE_LIFE_NOT_SET] = XFS_ZONE_ALLOC_OK,
|
||||
},
|
||||
[WRITE_LIFE_NONE] = {
|
||||
[WRITE_LIFE_NONE] = XFS_ZONE_ALLOC_OK,
|
||||
},
|
||||
[WRITE_LIFE_SHORT] = {
|
||||
[WRITE_LIFE_SHORT] = XFS_ZONE_ALLOC_GOOD,
|
||||
},
|
||||
[WRITE_LIFE_MEDIUM] = {
|
||||
[WRITE_LIFE_MEDIUM] = XFS_ZONE_ALLOC_GOOD,
|
||||
},
|
||||
[WRITE_LIFE_LONG] = {
|
||||
[WRITE_LIFE_LONG] = XFS_ZONE_ALLOC_OK,
|
||||
[WRITE_LIFE_EXTREME] = XFS_ZONE_ALLOC_OK,
|
||||
},
|
||||
[WRITE_LIFE_EXTREME] = {
|
||||
[WRITE_LIFE_LONG] = XFS_ZONE_ALLOC_OK,
|
||||
[WRITE_LIFE_EXTREME] = XFS_ZONE_ALLOC_OK,
|
||||
},
|
||||
};
|
||||
|
||||
static bool
|
||||
xfs_try_use_zone(
|
||||
struct xfs_zone_info *zi,
|
||||
enum rw_hint file_hint,
|
||||
struct xfs_open_zone *oz,
|
||||
bool lowspace)
|
||||
unsigned int goodness)
|
||||
{
|
||||
if (oz->oz_allocated == rtg_blocks(oz->oz_rtg))
|
||||
return false;
|
||||
if (!lowspace && !xfs_good_hint_match(oz, file_hint))
|
||||
|
||||
if (xfs_zoned_hint_score[oz->oz_write_hint][file_hint] < goodness)
|
||||
return false;
|
||||
|
||||
if (!atomic_inc_not_zero(&oz->oz_ref))
|
||||
return false;
|
||||
|
||||
@@ -581,14 +575,14 @@ static struct xfs_open_zone *
|
||||
xfs_select_open_zone_lru(
|
||||
struct xfs_zone_info *zi,
|
||||
enum rw_hint file_hint,
|
||||
bool lowspace)
|
||||
unsigned int goodness)
|
||||
{
|
||||
struct xfs_open_zone *oz;
|
||||
|
||||
lockdep_assert_held(&zi->zi_open_zones_lock);
|
||||
|
||||
list_for_each_entry(oz, &zi->zi_open_zones, oz_entry)
|
||||
if (xfs_try_use_zone(zi, file_hint, oz, lowspace))
|
||||
if (xfs_try_use_zone(zi, file_hint, oz, goodness))
|
||||
return oz;
|
||||
|
||||
cond_resched_lock(&zi->zi_open_zones_lock);
|
||||
@@ -651,9 +645,11 @@ xfs_select_zone_nowait(
|
||||
* data.
|
||||
*/
|
||||
spin_lock(&zi->zi_open_zones_lock);
|
||||
if (xfs_colocate_eagerly(write_hint))
|
||||
oz = xfs_select_open_zone_lru(zi, write_hint, false);
|
||||
else if (pack_tight)
|
||||
oz = xfs_select_open_zone_lru(zi, write_hint, XFS_ZONE_ALLOC_GOOD);
|
||||
if (oz)
|
||||
goto out_unlock;
|
||||
|
||||
if (pack_tight)
|
||||
oz = xfs_select_open_zone_mru(zi, write_hint);
|
||||
if (oz)
|
||||
goto out_unlock;
|
||||
@@ -667,16 +663,16 @@ xfs_select_zone_nowait(
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* Try to colocate cold data with other cold data if we failed to open a
|
||||
* new zone for it.
|
||||
* Try to find an zone that is an ok match to colocate data with.
|
||||
*/
|
||||
if (write_hint != WRITE_LIFE_NOT_SET &&
|
||||
!xfs_colocate_eagerly(write_hint))
|
||||
oz = xfs_select_open_zone_lru(zi, write_hint, false);
|
||||
if (!oz)
|
||||
oz = xfs_select_open_zone_lru(zi, WRITE_LIFE_NOT_SET, false);
|
||||
if (!oz)
|
||||
oz = xfs_select_open_zone_lru(zi, WRITE_LIFE_NOT_SET, true);
|
||||
oz = xfs_select_open_zone_lru(zi, write_hint, XFS_ZONE_ALLOC_OK);
|
||||
if (oz)
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* Pick the least recently used zone, regardless of hint match
|
||||
*/
|
||||
oz = xfs_select_open_zone_lru(zi, write_hint, XFS_ZONE_ALLOC_ANY);
|
||||
out_unlock:
|
||||
spin_unlock(&zi->zi_open_zones_lock);
|
||||
return oz;
|
||||
@@ -1135,7 +1131,7 @@ xfs_calc_open_zones(
|
||||
if (bdev_open_zones)
|
||||
mp->m_max_open_zones = bdev_open_zones;
|
||||
else
|
||||
mp->m_max_open_zones = xfs_max_open_zones(mp);
|
||||
mp->m_max_open_zones = XFS_DEFAULT_MAX_OPEN_ZONES;
|
||||
}
|
||||
|
||||
if (mp->m_max_open_zones < XFS_MIN_OPEN_ZONES) {
|
||||
@@ -1248,7 +1244,7 @@ xfs_mount_zones(
|
||||
if (!mp->m_zone_info)
|
||||
return -ENOMEM;
|
||||
|
||||
xfs_info(mp, "%u zones of %u blocks size (%u max open)",
|
||||
xfs_info(mp, "%u zones of %u blocks (%u max open zones)",
|
||||
mp->m_sb.sb_rgcount, mp->m_groups[XG_TYPE_RTG].blocks,
|
||||
mp->m_max_open_zones);
|
||||
trace_xfs_zones_mount(mp);
|
||||
|
||||
@@ -14,6 +14,7 @@ enum rw_hint {
|
||||
WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM,
|
||||
WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG,
|
||||
WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME,
|
||||
WRITE_LIFE_HINT_NR,
|
||||
} __packed;
|
||||
|
||||
/* Sparse ignores __packed annotations on enums, hence the #ifndef below. */
|
||||
|
||||
Reference in New Issue
Block a user