2
0
mirror of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-09-04 20:19:47 +08:00

for-6.17-rc4-tag

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmi3Od8ACgkQxWXV+ddt
 WDvZrA/7BOm51t1iNjw46NFviYq5u9/uTjL9hnYJoaXIH/DUdK5PyTACwlDJsrKk
 aFYTSH04OfFhmvy2Fje1YxmkuCx+VQ49qUdUBZdbx5wD7Rj+FthXcSLxrIATfQ7W
 qrLyHr65pxpVI9zWdZh+E2Ls5beCDqi/Rcdidii+NqTdnJJ/l21LXNsLPw6O09rh
 dRIJlb4fr+3ioEkAMDfC5p5lLUb/76lAeB+3CHc2cB4m3hPRMZeET/bd8pXo4g2D
 C9vjOp2asRzawM9bg33jb6LTQlHW+Yug1N13MN0tx01pTvnRbs/mGq7qlnCPtDXo
 SbFUTDqMIXnkq9ohdaOXJH9mPksG/vq+2GoS5bCORvsl+cDbFtzQsjWoC7wkcXa+
 e8JfcKrjTUKGSaCOacbz+CHEzAhiv5yCdIloofUQSQJfw+R0rsQbGlZuCP9dlzD7
 GGlnQKD23duaLs0u9wA+KC6T+Ifz7z42GRDBlzo/9ZXQcSunJJvK5hsE6S5wejka
 aZqSPfyb95gvYb+LnXepHCrRVi3lCSfV3W6m6xg2fDEgwBvxe8iEZnzSUmcMMcvu
 nY+sXS3ezpBLym+3NRYVWMlbk+Jre28TkeUCrTMTvd6nPCK9yIOeygx6XR8jKZRb
 tJG0nhs3daqcG/EUmEKwQOar5lF7dpr8kAI+LbK+Wi8a92tTxL4=
 =fL6z
 -----END PGP SIGNATURE-----

Merge tag 'for-6.17-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:

 - fix a few races related to inode link count

 - fix inode leak on failure to add link to inode

 - move transaction aborts closer to where they happen

* tag 'for-6.17-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: avoid load/store tearing races when checking if an inode was logged
  btrfs: fix race between setting last_dir_index_offset and inode logging
  btrfs: fix race between logging inode and checking if it was logged before
  btrfs: simplify error handling logic for btrfs_link()
  btrfs: fix inode leak on failure to add link to inode
  btrfs: abort transaction on failure to add link to inode
This commit is contained in:
Linus Torvalds 2025-09-02 13:13:22 -07:00
commit e3c94a539e
3 changed files with 79 additions and 51 deletions

View File

@ -248,7 +248,7 @@ struct btrfs_inode {
u64 new_delalloc_bytes; u64 new_delalloc_bytes;
/* /*
* The offset of the last dir index key that was logged. * The offset of the last dir index key that was logged.
* This is used only for directories. * This is used only for directories. Protected by 'log_mutex'.
*/ */
u64 last_dir_index_offset; u64 last_dir_index_offset;
}; };

View File

@ -6805,7 +6805,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
struct fscrypt_name fname; struct fscrypt_name fname;
u64 index; u64 index;
int ret; int ret;
int drop_inode = 0;
/* do not allow sys_link's with other subvols of the same device */ /* do not allow sys_link's with other subvols of the same device */
if (btrfs_root_id(root) != btrfs_root_id(BTRFS_I(inode)->root)) if (btrfs_root_id(root) != btrfs_root_id(BTRFS_I(inode)->root))
@ -6837,44 +6836,44 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
/* There are several dir indexes for this inode, clear the cache. */ /* There are several dir indexes for this inode, clear the cache. */
BTRFS_I(inode)->dir_index = 0ULL; BTRFS_I(inode)->dir_index = 0ULL;
inc_nlink(inode);
inode_inc_iversion(inode); inode_inc_iversion(inode);
inode_set_ctime_current(inode); inode_set_ctime_current(inode);
ihold(inode);
set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
&fname.disk_name, 1, index); &fname.disk_name, 1, index);
if (ret) {
drop_inode = 1;
} else {
struct dentry *parent = dentry->d_parent;
ret = btrfs_update_inode(trans, BTRFS_I(inode));
if (ret) if (ret)
goto fail; goto fail;
/* Link added now we update the inode item with the new link count. */
inc_nlink(inode);
ret = btrfs_update_inode(trans, BTRFS_I(inode));
if (ret) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
if (inode->i_nlink == 1) { if (inode->i_nlink == 1) {
/* /*
* If new hard link count is 1, it's a file created * If the new hard link count is 1, it's a file created with the
* with open(2) O_TMPFILE flag. * open(2) O_TMPFILE flag.
*/ */
ret = btrfs_orphan_del(trans, BTRFS_I(inode)); ret = btrfs_orphan_del(trans, BTRFS_I(inode));
if (ret) if (ret) {
btrfs_abort_transaction(trans, ret);
goto fail; goto fail;
} }
d_instantiate(dentry, inode);
btrfs_log_new_name(trans, old_dentry, NULL, 0, parent);
} }
/* Grab reference for the new dentry passed to d_instantiate(). */
ihold(inode);
d_instantiate(dentry, inode);
btrfs_log_new_name(trans, old_dentry, NULL, 0, dentry->d_parent);
fail: fail:
fscrypt_free_filename(&fname); fscrypt_free_filename(&fname);
if (trans) if (trans)
btrfs_end_transaction(trans); btrfs_end_transaction(trans);
if (drop_inode) {
inode_dec_link_count(inode);
iput(inode);
}
btrfs_btree_balance_dirty(fs_info); btrfs_btree_balance_dirty(fs_info);
return ret; return ret;
} }
@ -7830,6 +7829,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->last_sub_trans = 0; ei->last_sub_trans = 0;
ei->logged_trans = 0; ei->logged_trans = 0;
ei->delalloc_bytes = 0; ei->delalloc_bytes = 0;
/* new_delalloc_bytes and last_dir_index_offset are in a union. */
ei->new_delalloc_bytes = 0; ei->new_delalloc_bytes = 0;
ei->defrag_bytes = 0; ei->defrag_bytes = 0;
ei->disk_i_size = 0; ei->disk_i_size = 0;

View File

@ -3340,6 +3340,31 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
return 0; return 0;
} }
static bool mark_inode_as_not_logged(const struct btrfs_trans_handle *trans,
struct btrfs_inode *inode)
{
bool ret = false;
/*
* Do this only if ->logged_trans is still 0 to prevent races with
* concurrent logging as we may see the inode not logged when
* inode_logged() is called but it gets logged after inode_logged() did
* not find it in the log tree and we end up setting ->logged_trans to a
* value less than trans->transid after the concurrent logging task has
* set it to trans->transid. As a consequence, subsequent rename, unlink
* and link operations may end up not logging new names and removing old
* names from the log.
*/
spin_lock(&inode->lock);
if (inode->logged_trans == 0)
inode->logged_trans = trans->transid - 1;
else if (inode->logged_trans == trans->transid)
ret = true;
spin_unlock(&inode->lock);
return ret;
}
/* /*
* Check if an inode was logged in the current transaction. This correctly deals * Check if an inode was logged in the current transaction. This correctly deals
* with the case where the inode was logged but has a logged_trans of 0, which * with the case where the inode was logged but has a logged_trans of 0, which
@ -3357,15 +3382,32 @@ static int inode_logged(const struct btrfs_trans_handle *trans,
struct btrfs_key key; struct btrfs_key key;
int ret; int ret;
if (inode->logged_trans == trans->transid) /*
* Quick lockless call, since once ->logged_trans is set to the current
* transaction, we never set it to a lower value anywhere else.
*/
if (data_race(inode->logged_trans) == trans->transid)
return 1; return 1;
/* /*
* If logged_trans is not 0, then we know the inode logged was not logged * If logged_trans is not 0 and not trans->transid, then we know the
* in this transaction, so we can return false right away. * inode was not logged in this transaction, so we can return false
* right away. We take the lock to avoid a race caused by load/store
* tearing with a concurrent btrfs_log_inode() call or a concurrent task
* in this function further below - an update to trans->transid can be
* teared into two 32 bits updates for example, in which case we could
* see a positive value that is not trans->transid and assume the inode
* was not logged when it was.
*/ */
if (inode->logged_trans > 0) spin_lock(&inode->lock);
if (inode->logged_trans == trans->transid) {
spin_unlock(&inode->lock);
return 1;
} else if (inode->logged_trans > 0) {
spin_unlock(&inode->lock);
return 0; return 0;
}
spin_unlock(&inode->lock);
/* /*
* If no log tree was created for this root in this transaction, then * If no log tree was created for this root in this transaction, then
@ -3374,10 +3416,8 @@ static int inode_logged(const struct btrfs_trans_handle *trans,
* transaction's ID, to avoid the search below in a future call in case * transaction's ID, to avoid the search below in a future call in case
* a log tree gets created after this. * a log tree gets created after this.
*/ */
if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &inode->root->state)) { if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &inode->root->state))
inode->logged_trans = trans->transid - 1; return mark_inode_as_not_logged(trans, inode);
return 0;
}
/* /*
* We have a log tree and the inode's logged_trans is 0. We can't tell * We have a log tree and the inode's logged_trans is 0. We can't tell
@ -3431,8 +3471,7 @@ static int inode_logged(const struct btrfs_trans_handle *trans,
* Set logged_trans to a value greater than 0 and less then the * Set logged_trans to a value greater than 0 and less then the
* current transaction to avoid doing the search in future calls. * current transaction to avoid doing the search in future calls.
*/ */
inode->logged_trans = trans->transid - 1; return mark_inode_as_not_logged(trans, inode);
return 0;
} }
/* /*
@ -3440,20 +3479,9 @@ static int inode_logged(const struct btrfs_trans_handle *trans,
* the current transacion's ID, to avoid future tree searches as long as * the current transacion's ID, to avoid future tree searches as long as
* the inode is not evicted again. * the inode is not evicted again.
*/ */
spin_lock(&inode->lock);
inode->logged_trans = trans->transid; inode->logged_trans = trans->transid;
spin_unlock(&inode->lock);
/*
* If it's a directory, then we must set last_dir_index_offset to the
* maximum possible value, so that the next attempt to log the inode does
* not skip checking if dir index keys found in modified subvolume tree
* leaves have been logged before, otherwise it would result in attempts
* to insert duplicate dir index keys in the log tree. This must be done
* because last_dir_index_offset is an in-memory only field, not persisted
* in the inode item or any other on-disk structure, so its value is lost
* once the inode is evicted.
*/
if (S_ISDIR(inode->vfs_inode.i_mode))
inode->last_dir_index_offset = (u64)-1;
return 1; return 1;
} }
@ -4045,7 +4073,7 @@ done:
/* /*
* If the inode was logged before and it was evicted, then its * If the inode was logged before and it was evicted, then its
* last_dir_index_offset is (u64)-1, so we don't the value of the last index * last_dir_index_offset is 0, so we don't know the value of the last index
* key offset. If that's the case, search for it and update the inode. This * key offset. If that's the case, search for it and update the inode. This
* is to avoid lookups in the log tree every time we try to insert a dir index * is to avoid lookups in the log tree every time we try to insert a dir index
* key from a leaf changed in the current transaction, and to allow us to always * key from a leaf changed in the current transaction, and to allow us to always
@ -4061,7 +4089,7 @@ static int update_last_dir_index_offset(struct btrfs_inode *inode,
lockdep_assert_held(&inode->log_mutex); lockdep_assert_held(&inode->log_mutex);
if (inode->last_dir_index_offset != (u64)-1) if (inode->last_dir_index_offset != 0)
return 0; return 0;
if (!ctx->logged_before) { if (!ctx->logged_before) {