mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-04 20:19:47 +08:00
for-6.17-rc4-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmi3Od8ACgkQxWXV+ddt WDvZrA/7BOm51t1iNjw46NFviYq5u9/uTjL9hnYJoaXIH/DUdK5PyTACwlDJsrKk aFYTSH04OfFhmvy2Fje1YxmkuCx+VQ49qUdUBZdbx5wD7Rj+FthXcSLxrIATfQ7W qrLyHr65pxpVI9zWdZh+E2Ls5beCDqi/Rcdidii+NqTdnJJ/l21LXNsLPw6O09rh dRIJlb4fr+3ioEkAMDfC5p5lLUb/76lAeB+3CHc2cB4m3hPRMZeET/bd8pXo4g2D C9vjOp2asRzawM9bg33jb6LTQlHW+Yug1N13MN0tx01pTvnRbs/mGq7qlnCPtDXo SbFUTDqMIXnkq9ohdaOXJH9mPksG/vq+2GoS5bCORvsl+cDbFtzQsjWoC7wkcXa+ e8JfcKrjTUKGSaCOacbz+CHEzAhiv5yCdIloofUQSQJfw+R0rsQbGlZuCP9dlzD7 GGlnQKD23duaLs0u9wA+KC6T+Ifz7z42GRDBlzo/9ZXQcSunJJvK5hsE6S5wejka aZqSPfyb95gvYb+LnXepHCrRVi3lCSfV3W6m6xg2fDEgwBvxe8iEZnzSUmcMMcvu nY+sXS3ezpBLym+3NRYVWMlbk+Jre28TkeUCrTMTvd6nPCK9yIOeygx6XR8jKZRb tJG0nhs3daqcG/EUmEKwQOar5lF7dpr8kAI+LbK+Wi8a92tTxL4= =fL6z -----END PGP SIGNATURE----- Merge tag 'for-6.17-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs fixes from David Sterba: - fix a few races related to inode link count - fix inode leak on failure to add link to inode - move transaction aborts closer to where they happen * tag 'for-6.17-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: avoid load/store tearing races when checking if an inode was logged btrfs: fix race between setting last_dir_index_offset and inode logging btrfs: fix race between logging inode and checking if it was logged before btrfs: simplify error handling logic for btrfs_link() btrfs: fix inode leak on failure to add link to inode btrfs: abort transaction on failure to add link to inode
This commit is contained in:
commit
e3c94a539e
@ -248,7 +248,7 @@ struct btrfs_inode {
|
|||||||
u64 new_delalloc_bytes;
|
u64 new_delalloc_bytes;
|
||||||
/*
|
/*
|
||||||
* The offset of the last dir index key that was logged.
|
* The offset of the last dir index key that was logged.
|
||||||
* This is used only for directories.
|
* This is used only for directories. Protected by 'log_mutex'.
|
||||||
*/
|
*/
|
||||||
u64 last_dir_index_offset;
|
u64 last_dir_index_offset;
|
||||||
};
|
};
|
||||||
|
@ -6805,7 +6805,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
|
|||||||
struct fscrypt_name fname;
|
struct fscrypt_name fname;
|
||||||
u64 index;
|
u64 index;
|
||||||
int ret;
|
int ret;
|
||||||
int drop_inode = 0;
|
|
||||||
|
|
||||||
/* do not allow sys_link's with other subvols of the same device */
|
/* do not allow sys_link's with other subvols of the same device */
|
||||||
if (btrfs_root_id(root) != btrfs_root_id(BTRFS_I(inode)->root))
|
if (btrfs_root_id(root) != btrfs_root_id(BTRFS_I(inode)->root))
|
||||||
@ -6837,44 +6836,44 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
|
|||||||
|
|
||||||
/* There are several dir indexes for this inode, clear the cache. */
|
/* There are several dir indexes for this inode, clear the cache. */
|
||||||
BTRFS_I(inode)->dir_index = 0ULL;
|
BTRFS_I(inode)->dir_index = 0ULL;
|
||||||
inc_nlink(inode);
|
|
||||||
inode_inc_iversion(inode);
|
inode_inc_iversion(inode);
|
||||||
inode_set_ctime_current(inode);
|
inode_set_ctime_current(inode);
|
||||||
ihold(inode);
|
|
||||||
set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
|
set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
|
||||||
|
|
||||||
ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
|
ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
|
||||||
&fname.disk_name, 1, index);
|
&fname.disk_name, 1, index);
|
||||||
|
if (ret)
|
||||||
|
goto fail;
|
||||||
|
|
||||||
|
/* Link added now we update the inode item with the new link count. */
|
||||||
|
inc_nlink(inode);
|
||||||
|
ret = btrfs_update_inode(trans, BTRFS_I(inode));
|
||||||
if (ret) {
|
if (ret) {
|
||||||
drop_inode = 1;
|
btrfs_abort_transaction(trans, ret);
|
||||||
} else {
|
goto fail;
|
||||||
struct dentry *parent = dentry->d_parent;
|
|
||||||
|
|
||||||
ret = btrfs_update_inode(trans, BTRFS_I(inode));
|
|
||||||
if (ret)
|
|
||||||
goto fail;
|
|
||||||
if (inode->i_nlink == 1) {
|
|
||||||
/*
|
|
||||||
* If new hard link count is 1, it's a file created
|
|
||||||
* with open(2) O_TMPFILE flag.
|
|
||||||
*/
|
|
||||||
ret = btrfs_orphan_del(trans, BTRFS_I(inode));
|
|
||||||
if (ret)
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
d_instantiate(dentry, inode);
|
|
||||||
btrfs_log_new_name(trans, old_dentry, NULL, 0, parent);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (inode->i_nlink == 1) {
|
||||||
|
/*
|
||||||
|
* If the new hard link count is 1, it's a file created with the
|
||||||
|
* open(2) O_TMPFILE flag.
|
||||||
|
*/
|
||||||
|
ret = btrfs_orphan_del(trans, BTRFS_I(inode));
|
||||||
|
if (ret) {
|
||||||
|
btrfs_abort_transaction(trans, ret);
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Grab reference for the new dentry passed to d_instantiate(). */
|
||||||
|
ihold(inode);
|
||||||
|
d_instantiate(dentry, inode);
|
||||||
|
btrfs_log_new_name(trans, old_dentry, NULL, 0, dentry->d_parent);
|
||||||
|
|
||||||
fail:
|
fail:
|
||||||
fscrypt_free_filename(&fname);
|
fscrypt_free_filename(&fname);
|
||||||
if (trans)
|
if (trans)
|
||||||
btrfs_end_transaction(trans);
|
btrfs_end_transaction(trans);
|
||||||
if (drop_inode) {
|
|
||||||
inode_dec_link_count(inode);
|
|
||||||
iput(inode);
|
|
||||||
}
|
|
||||||
btrfs_btree_balance_dirty(fs_info);
|
btrfs_btree_balance_dirty(fs_info);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -7830,6 +7829,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
|
|||||||
ei->last_sub_trans = 0;
|
ei->last_sub_trans = 0;
|
||||||
ei->logged_trans = 0;
|
ei->logged_trans = 0;
|
||||||
ei->delalloc_bytes = 0;
|
ei->delalloc_bytes = 0;
|
||||||
|
/* new_delalloc_bytes and last_dir_index_offset are in a union. */
|
||||||
ei->new_delalloc_bytes = 0;
|
ei->new_delalloc_bytes = 0;
|
||||||
ei->defrag_bytes = 0;
|
ei->defrag_bytes = 0;
|
||||||
ei->disk_i_size = 0;
|
ei->disk_i_size = 0;
|
||||||
|
@ -3340,6 +3340,31 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool mark_inode_as_not_logged(const struct btrfs_trans_handle *trans,
|
||||||
|
struct btrfs_inode *inode)
|
||||||
|
{
|
||||||
|
bool ret = false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Do this only if ->logged_trans is still 0 to prevent races with
|
||||||
|
* concurrent logging as we may see the inode not logged when
|
||||||
|
* inode_logged() is called but it gets logged after inode_logged() did
|
||||||
|
* not find it in the log tree and we end up setting ->logged_trans to a
|
||||||
|
* value less than trans->transid after the concurrent logging task has
|
||||||
|
* set it to trans->transid. As a consequence, subsequent rename, unlink
|
||||||
|
* and link operations may end up not logging new names and removing old
|
||||||
|
* names from the log.
|
||||||
|
*/
|
||||||
|
spin_lock(&inode->lock);
|
||||||
|
if (inode->logged_trans == 0)
|
||||||
|
inode->logged_trans = trans->transid - 1;
|
||||||
|
else if (inode->logged_trans == trans->transid)
|
||||||
|
ret = true;
|
||||||
|
spin_unlock(&inode->lock);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if an inode was logged in the current transaction. This correctly deals
|
* Check if an inode was logged in the current transaction. This correctly deals
|
||||||
* with the case where the inode was logged but has a logged_trans of 0, which
|
* with the case where the inode was logged but has a logged_trans of 0, which
|
||||||
@ -3357,15 +3382,32 @@ static int inode_logged(const struct btrfs_trans_handle *trans,
|
|||||||
struct btrfs_key key;
|
struct btrfs_key key;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (inode->logged_trans == trans->transid)
|
/*
|
||||||
|
* Quick lockless call, since once ->logged_trans is set to the current
|
||||||
|
* transaction, we never set it to a lower value anywhere else.
|
||||||
|
*/
|
||||||
|
if (data_race(inode->logged_trans) == trans->transid)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If logged_trans is not 0, then we know the inode logged was not logged
|
* If logged_trans is not 0 and not trans->transid, then we know the
|
||||||
* in this transaction, so we can return false right away.
|
* inode was not logged in this transaction, so we can return false
|
||||||
|
* right away. We take the lock to avoid a race caused by load/store
|
||||||
|
* tearing with a concurrent btrfs_log_inode() call or a concurrent task
|
||||||
|
* in this function further below - an update to trans->transid can be
|
||||||
|
* teared into two 32 bits updates for example, in which case we could
|
||||||
|
* see a positive value that is not trans->transid and assume the inode
|
||||||
|
* was not logged when it was.
|
||||||
*/
|
*/
|
||||||
if (inode->logged_trans > 0)
|
spin_lock(&inode->lock);
|
||||||
|
if (inode->logged_trans == trans->transid) {
|
||||||
|
spin_unlock(&inode->lock);
|
||||||
|
return 1;
|
||||||
|
} else if (inode->logged_trans > 0) {
|
||||||
|
spin_unlock(&inode->lock);
|
||||||
return 0;
|
return 0;
|
||||||
|
}
|
||||||
|
spin_unlock(&inode->lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If no log tree was created for this root in this transaction, then
|
* If no log tree was created for this root in this transaction, then
|
||||||
@ -3374,10 +3416,8 @@ static int inode_logged(const struct btrfs_trans_handle *trans,
|
|||||||
* transaction's ID, to avoid the search below in a future call in case
|
* transaction's ID, to avoid the search below in a future call in case
|
||||||
* a log tree gets created after this.
|
* a log tree gets created after this.
|
||||||
*/
|
*/
|
||||||
if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &inode->root->state)) {
|
if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &inode->root->state))
|
||||||
inode->logged_trans = trans->transid - 1;
|
return mark_inode_as_not_logged(trans, inode);
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We have a log tree and the inode's logged_trans is 0. We can't tell
|
* We have a log tree and the inode's logged_trans is 0. We can't tell
|
||||||
@ -3431,8 +3471,7 @@ static int inode_logged(const struct btrfs_trans_handle *trans,
|
|||||||
* Set logged_trans to a value greater than 0 and less then the
|
* Set logged_trans to a value greater than 0 and less then the
|
||||||
* current transaction to avoid doing the search in future calls.
|
* current transaction to avoid doing the search in future calls.
|
||||||
*/
|
*/
|
||||||
inode->logged_trans = trans->transid - 1;
|
return mark_inode_as_not_logged(trans, inode);
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -3440,20 +3479,9 @@ static int inode_logged(const struct btrfs_trans_handle *trans,
|
|||||||
* the current transacion's ID, to avoid future tree searches as long as
|
* the current transacion's ID, to avoid future tree searches as long as
|
||||||
* the inode is not evicted again.
|
* the inode is not evicted again.
|
||||||
*/
|
*/
|
||||||
|
spin_lock(&inode->lock);
|
||||||
inode->logged_trans = trans->transid;
|
inode->logged_trans = trans->transid;
|
||||||
|
spin_unlock(&inode->lock);
|
||||||
/*
|
|
||||||
* If it's a directory, then we must set last_dir_index_offset to the
|
|
||||||
* maximum possible value, so that the next attempt to log the inode does
|
|
||||||
* not skip checking if dir index keys found in modified subvolume tree
|
|
||||||
* leaves have been logged before, otherwise it would result in attempts
|
|
||||||
* to insert duplicate dir index keys in the log tree. This must be done
|
|
||||||
* because last_dir_index_offset is an in-memory only field, not persisted
|
|
||||||
* in the inode item or any other on-disk structure, so its value is lost
|
|
||||||
* once the inode is evicted.
|
|
||||||
*/
|
|
||||||
if (S_ISDIR(inode->vfs_inode.i_mode))
|
|
||||||
inode->last_dir_index_offset = (u64)-1;
|
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@ -4045,7 +4073,7 @@ done:
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* If the inode was logged before and it was evicted, then its
|
* If the inode was logged before and it was evicted, then its
|
||||||
* last_dir_index_offset is (u64)-1, so we don't the value of the last index
|
* last_dir_index_offset is 0, so we don't know the value of the last index
|
||||||
* key offset. If that's the case, search for it and update the inode. This
|
* key offset. If that's the case, search for it and update the inode. This
|
||||||
* is to avoid lookups in the log tree every time we try to insert a dir index
|
* is to avoid lookups in the log tree every time we try to insert a dir index
|
||||||
* key from a leaf changed in the current transaction, and to allow us to always
|
* key from a leaf changed in the current transaction, and to allow us to always
|
||||||
@ -4061,7 +4089,7 @@ static int update_last_dir_index_offset(struct btrfs_inode *inode,
|
|||||||
|
|
||||||
lockdep_assert_held(&inode->log_mutex);
|
lockdep_assert_held(&inode->log_mutex);
|
||||||
|
|
||||||
if (inode->last_dir_index_offset != (u64)-1)
|
if (inode->last_dir_index_offset != 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (!ctx->logged_before) {
|
if (!ctx->logged_before) {
|
||||||
|
Loading…
Reference in New Issue
Block a user