2
0
mirror of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-09-04 20:19:47 +08:00

Major ext4 changes for 6.17:

- Better scalability for ext4 block allocation
   - Fix insufficient credits when writing back large folios
 
 Miscellaneous bug fixes, especially when handling exteded attriutes,
 inline data, and fast commit.
 -----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEEK2m5VNv+CHkogTfJ8vlZVpUNgaMFAmiIQEoACgkQ8vlZVpUN
 gaPB9wf/QursT7eLjx9Gz+4PYNWPKptBERQtmmDAnNYxDlEQ28+CHdMdEeiIPPoP
 IW1DIHfR7VaTI2K7gy6D5632VAhDDKiXBpIYu1yh3KPClAxjTZbhrif8J5UBXj1K
 ZwmCeLDF40jijua4rVKq3Fqf4iTJUyU2NqLpvcze7BZg7FwstXiNJrZ3DjAwi1BW
 j/5veWwh/KrNMzT5u0+RpMs4FBrdXQXvwSe/4pSx6d75r6WAdzhgUMy09os1wAWU
 3N0JU+R5hAG6iFfbWQRURB6oLMmmxl4x2F7r5BvM27uQtELNLNcxBKZhMW97HpiE
 uSwKgo/59DKpWX0xQ2x/yugQIzd62w==
 =oPHD
 -----END PGP SIGNATURE-----

Merge tag 'ext4_for_linus_6.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "Major ext4 changes for 6.17:

   - Better scalability for ext4 block allocation

   - Fix insufficient credits when writing back large folios

  Miscellaneous bug fixes, especially when handling exteded attriutes,
  inline data, and fast commit"

* tag 'ext4_for_linus_6.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (39 commits)
  ext4: do not BUG when INLINE_DATA_FL lacks system.data xattr
  ext4: implement linear-like traversal across order xarrays
  ext4: refactor choose group to scan group
  ext4: convert free groups order lists to xarrays
  ext4: factor out ext4_mb_scan_group()
  ext4: factor out ext4_mb_might_prefetch()
  ext4: factor out __ext4_mb_scan_group()
  ext4: fix largest free orders lists corruption on mb_optimize_scan switch
  ext4: fix zombie groups in average fragment size lists
  ext4: merge freed extent with existing extents before insertion
  ext4: convert sbi->s_mb_free_pending to atomic_t
  ext4: fix typo in CR_GOAL_LEN_SLOW comment
  ext4: get rid of some obsolete EXT4_MB_HINT flags
  ext4: utilize multiple global goals to reduce contention
  ext4: remove unnecessary s_md_lock on update s_mb_last_group
  ext4: remove unnecessary s_mb_last_start
  ext4: separate stream goal hits from s_bal_goals for better tracking
  ext4: add ext4_try_lock_group() to skip busy groups
  ext4: initialize superblock fields in the kballoc-test.c kunit tests
  ext4: refactor the inline directory conversion and new directory codepaths
  ...
This commit is contained in:
Linus Torvalds 2025-07-31 10:02:44 -07:00
commit ff7dcfedf9
15 changed files with 902 additions and 694 deletions

View File

@ -703,7 +703,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
* possible we just missed a transaction commit that did so * possible we just missed a transaction commit that did so
*/ */
smp_mb(); smp_mb();
if (sbi->s_mb_free_pending == 0) { if (atomic_read(&sbi->s_mb_free_pending) == 0) {
if (test_opt(sb, DISCARD)) { if (test_opt(sb, DISCARD)) {
atomic_inc(&sbi->s_retry_alloc_pending); atomic_inc(&sbi->s_retry_alloc_pending);
flush_work(&sbi->s_discard_work); flush_work(&sbi->s_discard_work);

View File

@ -157,7 +157,7 @@ enum criteria {
/* /*
* Reads each block group sequentially, performing disk IO if * Reads each block group sequentially, performing disk IO if
* necessary, to find find_suitable block group. Tries to * necessary, to find suitable block group. Tries to
* allocate goal length but might trim the request if nothing * allocate goal length but might trim the request if nothing
* is found after enough tries. * is found after enough tries.
*/ */
@ -185,14 +185,8 @@ enum criteria {
/* prefer goal again. length */ /* prefer goal again. length */
#define EXT4_MB_HINT_MERGE 0x0001 #define EXT4_MB_HINT_MERGE 0x0001
/* blocks already reserved */
#define EXT4_MB_HINT_RESERVED 0x0002
/* metadata is being allocated */
#define EXT4_MB_HINT_METADATA 0x0004
/* first blocks in the file */ /* first blocks in the file */
#define EXT4_MB_HINT_FIRST 0x0008 #define EXT4_MB_HINT_FIRST 0x0008
/* search for the best chunk */
#define EXT4_MB_HINT_BEST 0x0010
/* data is being allocated */ /* data is being allocated */
#define EXT4_MB_HINT_DATA 0x0020 #define EXT4_MB_HINT_DATA 0x0020
/* don't preallocate (for tails) */ /* don't preallocate (for tails) */
@ -213,15 +207,6 @@ enum criteria {
#define EXT4_MB_USE_RESERVED 0x2000 #define EXT4_MB_USE_RESERVED 0x2000
/* Do strict check for free blocks while retrying block allocation */ /* Do strict check for free blocks while retrying block allocation */
#define EXT4_MB_STRICT_CHECK 0x4000 #define EXT4_MB_STRICT_CHECK 0x4000
/* Large fragment size list lookup succeeded at least once for
* CR_POWER2_ALIGNED */
#define EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED 0x8000
/* Avg fragment size rb tree lookup succeeded at least once for
* CR_GOAL_LEN_FAST */
#define EXT4_MB_CR_GOAL_LEN_FAST_OPTIMIZED 0x00010000
/* Avg fragment size rb tree lookup succeeded at least once for
* CR_BEST_AVAIL_LEN */
#define EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED 0x00020000
struct ext4_allocation_request { struct ext4_allocation_request {
/* target inode for block we're allocating */ /* target inode for block we're allocating */
@ -1608,16 +1593,14 @@ struct ext4_sb_info {
unsigned short *s_mb_offsets; unsigned short *s_mb_offsets;
unsigned int *s_mb_maxs; unsigned int *s_mb_maxs;
unsigned int s_group_info_size; unsigned int s_group_info_size;
unsigned int s_mb_free_pending; atomic_t s_mb_free_pending;
struct list_head s_freed_data_list[2]; /* List of blocks to be freed struct list_head s_freed_data_list[2]; /* List of blocks to be freed
after commit completed */ after commit completed */
struct list_head s_discard_list; struct list_head s_discard_list;
struct work_struct s_discard_work; struct work_struct s_discard_work;
atomic_t s_retry_alloc_pending; atomic_t s_retry_alloc_pending;
struct list_head *s_mb_avg_fragment_size; struct xarray *s_mb_avg_fragment_size;
rwlock_t *s_mb_avg_fragment_size_locks; struct xarray *s_mb_largest_free_orders;
struct list_head *s_mb_largest_free_orders;
rwlock_t *s_mb_largest_free_orders_locks;
/* tunables */ /* tunables */
unsigned long s_stripe; unsigned long s_stripe;
@ -1629,15 +1612,16 @@ struct ext4_sb_info {
unsigned int s_mb_order2_reqs; unsigned int s_mb_order2_reqs;
unsigned int s_mb_group_prealloc; unsigned int s_mb_group_prealloc;
unsigned int s_max_dir_size_kb; unsigned int s_max_dir_size_kb;
/* where last allocation was done - for stream allocation */
unsigned long s_mb_last_group;
unsigned long s_mb_last_start;
unsigned int s_mb_prefetch; unsigned int s_mb_prefetch;
unsigned int s_mb_prefetch_limit; unsigned int s_mb_prefetch_limit;
unsigned int s_mb_best_avail_max_trim_order; unsigned int s_mb_best_avail_max_trim_order;
unsigned int s_sb_update_sec; unsigned int s_sb_update_sec;
unsigned int s_sb_update_kb; unsigned int s_sb_update_kb;
/* where last allocation was done - for stream allocation */
ext4_group_t *s_mb_last_groups;
unsigned int s_mb_nr_global_goals;
/* stats for buddy allocator */ /* stats for buddy allocator */
atomic_t s_bal_reqs; /* number of reqs with len > 1 */ atomic_t s_bal_reqs; /* number of reqs with len > 1 */
atomic_t s_bal_success; /* we found long enough chunks */ atomic_t s_bal_success; /* we found long enough chunks */
@ -1646,12 +1630,10 @@ struct ext4_sb_info {
atomic_t s_bal_cX_ex_scanned[EXT4_MB_NUM_CRS]; /* total extents scanned */ atomic_t s_bal_cX_ex_scanned[EXT4_MB_NUM_CRS]; /* total extents scanned */
atomic_t s_bal_groups_scanned; /* number of groups scanned */ atomic_t s_bal_groups_scanned; /* number of groups scanned */
atomic_t s_bal_goals; /* goal hits */ atomic_t s_bal_goals; /* goal hits */
atomic_t s_bal_stream_goals; /* stream allocation global goal hits */
atomic_t s_bal_len_goals; /* len goal hits */ atomic_t s_bal_len_goals; /* len goal hits */
atomic_t s_bal_breaks; /* too long searches */ atomic_t s_bal_breaks; /* too long searches */
atomic_t s_bal_2orders; /* 2^order hits */ atomic_t s_bal_2orders; /* 2^order hits */
atomic_t s_bal_p2_aligned_bad_suggestions;
atomic_t s_bal_goal_fast_bad_suggestions;
atomic_t s_bal_best_avail_bad_suggestions;
atomic64_t s_bal_cX_groups_considered[EXT4_MB_NUM_CRS]; atomic64_t s_bal_cX_groups_considered[EXT4_MB_NUM_CRS];
atomic64_t s_bal_cX_hits[EXT4_MB_NUM_CRS]; atomic64_t s_bal_cX_hits[EXT4_MB_NUM_CRS];
atomic64_t s_bal_cX_failed[EXT4_MB_NUM_CRS]; /* cX loop didn't find blocks */ atomic64_t s_bal_cX_failed[EXT4_MB_NUM_CRS]; /* cX loop didn't find blocks */
@ -3020,7 +3002,7 @@ int ext4_walk_page_buffers(handle_t *handle,
struct buffer_head *bh)); struct buffer_head *bh));
int do_journal_get_write_access(handle_t *handle, struct inode *inode, int do_journal_get_write_access(handle_t *handle, struct inode *inode,
struct buffer_head *bh); struct buffer_head *bh);
bool ext4_should_enable_large_folio(struct inode *inode); void ext4_set_inode_mapping_order(struct inode *inode);
#define FALL_BACK_TO_NONDELALLOC 1 #define FALL_BACK_TO_NONDELALLOC 1
#define CONVERT_INLINE_DATA 2 #define CONVERT_INLINE_DATA 2
@ -3064,9 +3046,9 @@ extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length);
extern void ext4_set_inode_flags(struct inode *, bool init); extern void ext4_set_inode_flags(struct inode *, bool init);
extern int ext4_alloc_da_blocks(struct inode *inode); extern int ext4_alloc_da_blocks(struct inode *inode);
extern void ext4_set_aops(struct inode *inode); extern void ext4_set_aops(struct inode *inode);
extern int ext4_writepage_trans_blocks(struct inode *);
extern int ext4_normal_submit_inode_data_buffers(struct jbd2_inode *jinode); extern int ext4_normal_submit_inode_data_buffers(struct jbd2_inode *jinode);
extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
extern int ext4_chunk_trans_extent(struct inode *inode, int nrblocks);
extern int ext4_meta_trans_blocks(struct inode *inode, int lblocks, extern int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
int pextents); int pextents);
extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
@ -3489,8 +3471,6 @@ struct ext4_group_info {
void *bb_bitmap; void *bb_bitmap;
#endif #endif
struct rw_semaphore alloc_sem; struct rw_semaphore alloc_sem;
struct list_head bb_avg_fragment_size_node;
struct list_head bb_largest_free_order_node;
ext4_grpblk_t bb_counters[]; /* Nr of free power-of-two-block ext4_grpblk_t bb_counters[]; /* Nr of free power-of-two-block
* regions, index is order. * regions, index is order.
* bb_counters[3] = 5 means * bb_counters[3] = 5 means
@ -3541,23 +3521,28 @@ static inline int ext4_fs_is_busy(struct ext4_sb_info *sbi)
return (atomic_read(&sbi->s_lock_busy) > EXT4_CONTENTION_THRESHOLD); return (atomic_read(&sbi->s_lock_busy) > EXT4_CONTENTION_THRESHOLD);
} }
static inline bool ext4_try_lock_group(struct super_block *sb, ext4_group_t group)
{
if (!spin_trylock(ext4_group_lock_ptr(sb, group)))
return false;
/*
* We're able to grab the lock right away, so drop the lock
* contention counter.
*/
atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0);
return true;
}
static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
{ {
spinlock_t *lock = ext4_group_lock_ptr(sb, group); if (!ext4_try_lock_group(sb, group)) {
if (spin_trylock(lock))
/*
* We're able to grab the lock right away, so drop the
* lock contention counter.
*/
atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0);
else {
/* /*
* The lock is busy, so bump the contention counter, * The lock is busy, so bump the contention counter,
* and then wait on the spin lock. * and then wait on the spin lock.
*/ */
atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, 1, atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, 1,
EXT4_MAX_CONTENTION); EXT4_MAX_CONTENTION);
spin_lock(lock); spin_lock(ext4_group_lock_ptr(sb, group));
} }
} }
@ -3612,6 +3597,7 @@ extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
extern int ext4_get_max_inline_size(struct inode *inode); extern int ext4_get_max_inline_size(struct inode *inode);
extern int ext4_find_inline_data_nolock(struct inode *inode); extern int ext4_find_inline_data_nolock(struct inode *inode);
extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode); extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);
extern void ext4_update_final_de(void *de_buf, int old_size, int new_size);
int ext4_readpage_inline(struct inode *inode, struct folio *folio); int ext4_readpage_inline(struct inode *inode, struct folio *folio);
extern int ext4_try_to_write_inline_data(struct address_space *mapping, extern int ext4_try_to_write_inline_data(struct address_space *mapping,
@ -3671,10 +3657,10 @@ static inline int ext4_has_inline_data(struct inode *inode)
extern const struct inode_operations ext4_dir_inode_operations; extern const struct inode_operations ext4_dir_inode_operations;
extern const struct inode_operations ext4_special_inode_operations; extern const struct inode_operations ext4_special_inode_operations;
extern struct dentry *ext4_get_parent(struct dentry *child); extern struct dentry *ext4_get_parent(struct dentry *child);
extern struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode, extern int ext4_init_dirblock(handle_t *handle, struct inode *inode,
struct ext4_dir_entry_2 *de, struct buffer_head *dir_block,
int blocksize, int csum_size, unsigned int parent_ino, void *inline_buf,
unsigned int parent_ino, int dotdot_real_len); int inline_size);
extern void ext4_initialize_dirent_tail(struct buffer_head *bh, extern void ext4_initialize_dirent_tail(struct buffer_head *bh,
unsigned int blocksize); unsigned int blocksize);
extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode, extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode,

View File

@ -30,13 +30,6 @@
*/ */
#define CHECK_BINSEARCH__ #define CHECK_BINSEARCH__
/*
* If EXT_STATS is defined then stats numbers are collected.
* These number will be displayed at umount time.
*/
#define EXT_STATS_
/* /*
* ext4_inode has i_block array (60 bytes total). * ext4_inode has i_block array (60 bytes total).
* The first 12 bytes store ext4_extent_header; * The first 12 bytes store ext4_extent_header;

View File

@ -5215,7 +5215,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
credits = depth + 2; credits = depth + 2;
} }
restart_credits = ext4_writepage_trans_blocks(inode); restart_credits = ext4_chunk_trans_extent(inode, 0);
err = ext4_datasem_ensure_credits(handle, inode, credits, err = ext4_datasem_ensure_credits(handle, inode, credits,
restart_credits, 0); restart_credits, 0);
if (err) { if (err) {
@ -5475,7 +5475,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
truncate_pagecache(inode, start); truncate_pagecache(inode, start);
credits = ext4_writepage_trans_blocks(inode); credits = ext4_chunk_trans_extent(inode, 0);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
if (IS_ERR(handle)) if (IS_ERR(handle))
return PTR_ERR(handle); return PTR_ERR(handle);
@ -5571,7 +5571,7 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
truncate_pagecache(inode, start); truncate_pagecache(inode, start);
credits = ext4_writepage_trans_blocks(inode); credits = ext4_chunk_trans_extent(inode, 0);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
if (IS_ERR(handle)) if (IS_ERR(handle))
return PTR_ERR(handle); return PTR_ERR(handle);

View File

@ -1335,8 +1335,7 @@ got:
} }
} }
if (ext4_should_enable_large_folio(inode)) ext4_set_inode_mapping_order(inode);
mapping_set_large_folios(inode->i_mapping);
ext4_update_inode_fsync_trans(handle, inode, 1); ext4_update_inode_fsync_trans(handle, inode, 1);

View File

@ -303,7 +303,11 @@ static int ext4_create_inline_data(handle_t *handle,
if (error) if (error)
goto out; goto out;
BUG_ON(!is.s.not_found); if (!is.s.not_found) {
EXT4_ERROR_INODE(inode, "unexpected inline data xattr");
error = -EFSCORRUPTED;
goto out;
}
error = ext4_xattr_ibody_set(handle, inode, &i, &is); error = ext4_xattr_ibody_set(handle, inode, &i, &is);
if (error) { if (error) {
@ -354,7 +358,11 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode,
if (error) if (error)
goto out; goto out;
BUG_ON(is.s.not_found); if (is.s.not_found) {
EXT4_ERROR_INODE(inode, "missing inline data xattr");
error = -EFSCORRUPTED;
goto out;
}
len -= EXT4_MIN_INLINE_DATA_SIZE; len -= EXT4_MIN_INLINE_DATA_SIZE;
value = kzalloc(len, GFP_NOFS); value = kzalloc(len, GFP_NOFS);
@ -562,7 +570,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
return 0; return 0;
} }
needed_blocks = ext4_writepage_trans_blocks(inode); needed_blocks = ext4_chunk_trans_extent(inode, 1);
ret = ext4_get_inode_loc(inode, &iloc); ret = ext4_get_inode_loc(inode, &iloc);
if (ret) if (ret)
@ -612,6 +620,7 @@ retry:
} else } else
ret = ext4_block_write_begin(handle, folio, from, to, ret = ext4_block_write_begin(handle, folio, from, to,
ext4_get_block); ext4_get_block);
clear_buffer_new(folio_buffers(folio));
if (!ret && ext4_should_journal_data(inode)) { if (!ret && ext4_should_journal_data(inode)) {
ret = ext4_walk_page_buffers(handle, inode, ret = ext4_walk_page_buffers(handle, inode,
@ -891,6 +900,7 @@ static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping,
return ret; return ret;
} }
clear_buffer_new(folio_buffers(folio));
folio_mark_dirty(folio); folio_mark_dirty(folio);
folio_mark_uptodate(folio); folio_mark_uptodate(folio);
ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
@ -995,7 +1005,7 @@ static void *ext4_get_inline_xattr_pos(struct inode *inode,
} }
/* Set the final de to cover the whole block. */ /* Set the final de to cover the whole block. */
static void ext4_update_final_de(void *de_buf, int old_size, int new_size) void ext4_update_final_de(void *de_buf, int old_size, int new_size)
{ {
struct ext4_dir_entry_2 *de, *prev_de; struct ext4_dir_entry_2 *de, *prev_de;
void *limit; void *limit;
@ -1059,51 +1069,6 @@ static void ext4_restore_inline_data(handle_t *handle, struct inode *inode,
ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
} }
static int ext4_finish_convert_inline_dir(handle_t *handle,
struct inode *inode,
struct buffer_head *dir_block,
void *buf,
int inline_size)
{
int err, csum_size = 0, header_size = 0;
struct ext4_dir_entry_2 *de;
void *target = dir_block->b_data;
/*
* First create "." and ".." and then copy the dir information
* back to the block.
*/
de = target;
de = ext4_init_dot_dotdot(inode, de,
inode->i_sb->s_blocksize, csum_size,
le32_to_cpu(((struct ext4_dir_entry_2 *)buf)->inode), 1);
header_size = (void *)de - target;
memcpy((void *)de, buf + EXT4_INLINE_DOTDOT_SIZE,
inline_size - EXT4_INLINE_DOTDOT_SIZE);
if (ext4_has_feature_metadata_csum(inode->i_sb))
csum_size = sizeof(struct ext4_dir_entry_tail);
inode->i_size = inode->i_sb->s_blocksize;
i_size_write(inode, inode->i_sb->s_blocksize);
EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
ext4_update_final_de(dir_block->b_data,
inline_size - EXT4_INLINE_DOTDOT_SIZE + header_size,
inode->i_sb->s_blocksize - csum_size);
if (csum_size)
ext4_initialize_dirent_tail(dir_block,
inode->i_sb->s_blocksize);
set_buffer_uptodate(dir_block);
unlock_buffer(dir_block);
err = ext4_handle_dirty_dirblock(handle, inode, dir_block);
if (err)
return err;
set_buffer_verified(dir_block);
return ext4_mark_inode_dirty(handle, inode);
}
static int ext4_convert_inline_data_nolock(handle_t *handle, static int ext4_convert_inline_data_nolock(handle_t *handle,
struct inode *inode, struct inode *inode,
struct ext4_iloc *iloc) struct ext4_iloc *iloc)
@ -1175,8 +1140,17 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
error = ext4_handle_dirty_metadata(handle, error = ext4_handle_dirty_metadata(handle,
inode, data_bh); inode, data_bh);
} else { } else {
error = ext4_finish_convert_inline_dir(handle, inode, data_bh, unlock_buffer(data_bh);
buf, inline_size); inode->i_size = inode->i_sb->s_blocksize;
i_size_write(inode, inode->i_sb->s_blocksize);
EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
error = ext4_init_dirblock(handle, inode, data_bh,
le32_to_cpu(((struct ext4_dir_entry_2 *)buf)->inode),
buf + EXT4_INLINE_DOTDOT_SIZE,
inline_size - EXT4_INLINE_DOTDOT_SIZE);
if (!error)
error = ext4_mark_inode_dirty(handle, inode);
} }
out_restore: out_restore:
@ -1315,7 +1289,7 @@ int ext4_inlinedir_to_tree(struct file *dir_file,
if (pos == 0) { if (pos == 0) {
fake.inode = cpu_to_le32(inode->i_ino); fake.inode = cpu_to_le32(inode->i_ino);
fake.name_len = 1; fake.name_len = 1;
strcpy(fake.name, "."); memcpy(fake.name, ".", 2);
fake.rec_len = ext4_rec_len_to_disk( fake.rec_len = ext4_rec_len_to_disk(
ext4_dir_rec_len(fake.name_len, NULL), ext4_dir_rec_len(fake.name_len, NULL),
inline_size); inline_size);
@ -1325,7 +1299,7 @@ int ext4_inlinedir_to_tree(struct file *dir_file,
} else if (pos == EXT4_INLINE_DOTDOT_OFFSET) { } else if (pos == EXT4_INLINE_DOTDOT_OFFSET) {
fake.inode = cpu_to_le32(parent_ino); fake.inode = cpu_to_le32(parent_ino);
fake.name_len = 2; fake.name_len = 2;
strcpy(fake.name, ".."); memcpy(fake.name, "..", 3);
fake.rec_len = ext4_rec_len_to_disk( fake.rec_len = ext4_rec_len_to_disk(
ext4_dir_rec_len(fake.name_len, NULL), ext4_dir_rec_len(fake.name_len, NULL),
inline_size); inline_size);
@ -1864,7 +1838,7 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline)
}; };
needed_blocks = ext4_writepage_trans_blocks(inode); needed_blocks = ext4_chunk_trans_extent(inode, 1);
handle = ext4_journal_start(inode, EXT4_HT_INODE, needed_blocks); handle = ext4_journal_start(inode, EXT4_HT_INODE, needed_blocks);
if (IS_ERR(handle)) if (IS_ERR(handle))
return PTR_ERR(handle); return PTR_ERR(handle);
@ -1903,7 +1877,12 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline)
if ((err = ext4_xattr_ibody_find(inode, &i, &is)) != 0) if ((err = ext4_xattr_ibody_find(inode, &i, &is)) != 0)
goto out_error; goto out_error;
BUG_ON(is.s.not_found); if (is.s.not_found) {
EXT4_ERROR_INODE(inode,
"missing inline data xattr");
err = -EFSCORRUPTED;
goto out_error;
}
value_len = le32_to_cpu(is.s.here->e_value_size); value_len = le32_to_cpu(is.s.here->e_value_size);
value = kmalloc(value_len, GFP_NOFS); value = kmalloc(value_len, GFP_NOFS);
@ -1979,7 +1958,7 @@ int ext4_convert_inline_data(struct inode *inode)
return 0; return 0;
} }
needed_blocks = ext4_writepage_trans_blocks(inode); needed_blocks = ext4_chunk_trans_extent(inode, 1);
iloc.bh = NULL; iloc.bh = NULL;
error = ext4_get_inode_loc(inode, &iloc); error = ext4_get_inode_loc(inode, &iloc);

View File

@ -723,8 +723,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
ext4_check_map_extents_env(inode); ext4_check_map_extents_env(inode);
/* Lookup extent status tree firstly */ /* Lookup extent status tree firstly */
if (!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) && if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
map->m_pblk = ext4_es_pblock(&es) + map->m_pblk = ext4_es_pblock(&es) +
map->m_lblk - es.es_lblk; map->m_lblk - es.es_lblk;
@ -757,8 +756,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
orig_mlen == map->m_len) orig_mlen == map->m_len)
goto found; goto found;
if (flags & EXT4_GET_BLOCKS_QUERY_LAST_IN_LEAF) map->m_len = orig_mlen;
map->m_len = orig_mlen;
} }
/* /*
* In the query cache no-wait mode, nothing we can do more if we * In the query cache no-wait mode, nothing we can do more if we
@ -877,6 +875,26 @@ static void ext4_update_bh_state(struct buffer_head *bh, unsigned long flags)
} while (unlikely(!try_cmpxchg(&bh->b_state, &old_state, new_state))); } while (unlikely(!try_cmpxchg(&bh->b_state, &old_state, new_state)));
} }
/*
* Make sure that the current journal transaction has enough credits to map
* one extent. Return -EAGAIN if it cannot extend the current running
* transaction.
*/
static inline int ext4_journal_ensure_extent_credits(handle_t *handle,
struct inode *inode)
{
int credits;
int ret;
/* Called from ext4_da_write_begin() which has no handle started? */
if (!handle)
return 0;
credits = ext4_chunk_trans_blocks(inode, 1);
ret = __ext4_journal_ensure_credits(handle, credits, credits, 0);
return ret <= 0 ? ret : -EAGAIN;
}
static int _ext4_get_block(struct inode *inode, sector_t iblock, static int _ext4_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int flags) struct buffer_head *bh, int flags)
{ {
@ -1171,11 +1189,13 @@ int ext4_block_write_begin(handle_t *handle, struct folio *folio,
} }
continue; continue;
} }
if (buffer_new(bh)) if (WARN_ON_ONCE(buffer_new(bh)))
clear_buffer_new(bh); clear_buffer_new(bh);
if (!buffer_mapped(bh)) { if (!buffer_mapped(bh)) {
WARN_ON(bh->b_size != blocksize); WARN_ON(bh->b_size != blocksize);
err = get_block(inode, block, bh, 1); err = ext4_journal_ensure_extent_credits(handle, inode);
if (!err)
err = get_block(inode, block, bh, 1);
if (err) if (err)
break; break;
if (buffer_new(bh)) { if (buffer_new(bh)) {
@ -1274,7 +1294,8 @@ static int ext4_write_begin(const struct kiocb *iocb,
* Reserve one block more for addition to orphan list in case * Reserve one block more for addition to orphan list in case
* we allocate blocks but write fails for some reason * we allocate blocks but write fails for some reason
*/ */
needed_blocks = ext4_writepage_trans_blocks(inode) + 1; needed_blocks = ext4_chunk_trans_extent(inode,
ext4_journal_blocks_per_folio(inode)) + 1;
index = pos >> PAGE_SHIFT; index = pos >> PAGE_SHIFT;
if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
@ -1372,8 +1393,9 @@ retry_journal:
ext4_orphan_del(NULL, inode); ext4_orphan_del(NULL, inode);
} }
if (ret == -ENOSPC && if (ret == -EAGAIN ||
ext4_should_retry_alloc(inode->i_sb, &retries)) (ret == -ENOSPC &&
ext4_should_retry_alloc(inode->i_sb, &retries)))
goto retry_journal; goto retry_journal;
folio_put(folio); folio_put(folio);
return ret; return ret;
@ -1393,6 +1415,7 @@ static int write_end_fn(handle_t *handle, struct inode *inode,
ret = ext4_dirty_journalled_data(handle, bh); ret = ext4_dirty_journalled_data(handle, bh);
clear_buffer_meta(bh); clear_buffer_meta(bh);
clear_buffer_prio(bh); clear_buffer_prio(bh);
clear_buffer_new(bh);
return ret; return ret;
} }
@ -1665,11 +1688,12 @@ struct mpage_da_data {
unsigned int can_map:1; /* Can writepages call map blocks? */ unsigned int can_map:1; /* Can writepages call map blocks? */
/* These are internal state of ext4_do_writepages() */ /* These are internal state of ext4_do_writepages() */
pgoff_t first_page; /* The first page to write */ loff_t start_pos; /* The start pos to write */
pgoff_t next_page; /* Current page to examine */ loff_t next_pos; /* Current pos to examine */
pgoff_t last_page; /* Last page to examine */ loff_t end_pos; /* Last pos to examine */
/* /*
* Extent to map - this can be after first_page because that can be * Extent to map - this can be after start_pos because that can be
* fully mapped. We somewhat abuse m_flags to store whether the extent * fully mapped. We somewhat abuse m_flags to store whether the extent
* is delalloc or unwritten. * is delalloc or unwritten.
*/ */
@ -1689,38 +1713,38 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
struct inode *inode = mpd->inode; struct inode *inode = mpd->inode;
struct address_space *mapping = inode->i_mapping; struct address_space *mapping = inode->i_mapping;
/* This is necessary when next_page == 0. */ /* This is necessary when next_pos == 0. */
if (mpd->first_page >= mpd->next_page) if (mpd->start_pos >= mpd->next_pos)
return; return;
mpd->scanned_until_end = 0; mpd->scanned_until_end = 0;
index = mpd->first_page;
end = mpd->next_page - 1;
if (invalidate) { if (invalidate) {
ext4_lblk_t start, last; ext4_lblk_t start, last;
start = index << (PAGE_SHIFT - inode->i_blkbits); start = EXT4_B_TO_LBLK(inode, mpd->start_pos);
last = end << (PAGE_SHIFT - inode->i_blkbits); last = mpd->next_pos >> inode->i_blkbits;
/* /*
* avoid racing with extent status tree scans made by * avoid racing with extent status tree scans made by
* ext4_insert_delayed_block() * ext4_insert_delayed_block()
*/ */
down_write(&EXT4_I(inode)->i_data_sem); down_write(&EXT4_I(inode)->i_data_sem);
ext4_es_remove_extent(inode, start, last - start + 1); ext4_es_remove_extent(inode, start, last - start);
up_write(&EXT4_I(inode)->i_data_sem); up_write(&EXT4_I(inode)->i_data_sem);
} }
folio_batch_init(&fbatch); folio_batch_init(&fbatch);
while (index <= end) { index = mpd->start_pos >> PAGE_SHIFT;
nr = filemap_get_folios(mapping, &index, end, &fbatch); end = mpd->next_pos >> PAGE_SHIFT;
while (index < end) {
nr = filemap_get_folios(mapping, &index, end - 1, &fbatch);
if (nr == 0) if (nr == 0)
break; break;
for (i = 0; i < nr; i++) { for (i = 0; i < nr; i++) {
struct folio *folio = fbatch.folios[i]; struct folio *folio = fbatch.folios[i];
if (folio->index < mpd->first_page) if (folio_pos(folio) < mpd->start_pos)
continue; continue;
if (folio_next_index(folio) - 1 > end) if (folio_next_index(folio) > end)
continue; continue;
BUG_ON(!folio_test_locked(folio)); BUG_ON(!folio_test_locked(folio));
BUG_ON(folio_test_writeback(folio)); BUG_ON(folio_test_writeback(folio));
@ -2022,7 +2046,8 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
static void mpage_folio_done(struct mpage_da_data *mpd, struct folio *folio) static void mpage_folio_done(struct mpage_da_data *mpd, struct folio *folio)
{ {
mpd->first_page += folio_nr_pages(folio); mpd->start_pos += folio_size(folio);
mpd->wbc->nr_to_write -= folio_nr_pages(folio);
folio_unlock(folio); folio_unlock(folio);
} }
@ -2032,7 +2057,7 @@ static int mpage_submit_folio(struct mpage_da_data *mpd, struct folio *folio)
loff_t size; loff_t size;
int err; int err;
BUG_ON(folio->index != mpd->first_page); WARN_ON_ONCE(folio_pos(folio) != mpd->start_pos);
folio_clear_dirty_for_io(folio); folio_clear_dirty_for_io(folio);
/* /*
* We have to be very careful here! Nothing protects writeback path * We have to be very careful here! Nothing protects writeback path
@ -2053,8 +2078,6 @@ static int mpage_submit_folio(struct mpage_da_data *mpd, struct folio *folio)
!ext4_verity_in_progress(mpd->inode)) !ext4_verity_in_progress(mpd->inode))
len = size & (len - 1); len = size & (len - 1);
err = ext4_bio_write_folio(&mpd->io_submit, folio, len); err = ext4_bio_write_folio(&mpd->io_submit, folio, len);
if (!err)
mpd->wbc->nr_to_write -= folio_nr_pages(folio);
return err; return err;
} }
@ -2321,6 +2344,11 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
int get_blocks_flags; int get_blocks_flags;
int err, dioread_nolock; int err, dioread_nolock;
/* Make sure transaction has enough credits for this extent */
err = ext4_journal_ensure_extent_credits(handle, inode);
if (err < 0)
return err;
trace_ext4_da_write_pages_extent(inode, map); trace_ext4_da_write_pages_extent(inode, map);
/* /*
* Call ext4_map_blocks() to allocate any delayed allocation blocks, or * Call ext4_map_blocks() to allocate any delayed allocation blocks, or
@ -2359,6 +2387,47 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
return 0; return 0;
} }
/*
* This is used to submit mapped buffers in a single folio that is not fully
* mapped for various reasons, such as insufficient space or journal credits.
*/
static int mpage_submit_partial_folio(struct mpage_da_data *mpd)
{
struct inode *inode = mpd->inode;
struct folio *folio;
loff_t pos;
int ret;
folio = filemap_get_folio(inode->i_mapping,
mpd->start_pos >> PAGE_SHIFT);
if (IS_ERR(folio))
return PTR_ERR(folio);
/*
* The mapped position should be within the current processing folio
* but must not be the folio start position.
*/
pos = ((loff_t)mpd->map.m_lblk) << inode->i_blkbits;
if (WARN_ON_ONCE((folio_pos(folio) == pos) ||
!folio_contains(folio, pos >> PAGE_SHIFT)))
return -EINVAL;
ret = mpage_submit_folio(mpd, folio);
if (ret)
goto out;
/*
* Update start_pos to prevent this folio from being released in
* mpage_release_unused_pages(), it will be reset to the aligned folio
* pos when this folio is written again in the next round. Additionally,
* do not update wbc->nr_to_write here, as it will be updated once the
* entire folio has finished processing.
*/
mpd->start_pos = pos;
out:
folio_unlock(folio);
folio_put(folio);
return ret;
}
/* /*
* mpage_map_and_submit_extent - map extent starting at mpd->lblk of length * mpage_map_and_submit_extent - map extent starting at mpd->lblk of length
* mpd->len and submit pages underlying it for IO * mpd->len and submit pages underlying it for IO
@ -2407,10 +2476,18 @@ static int mpage_map_and_submit_extent(handle_t *handle,
* In the case of ENOSPC, if ext4_count_free_blocks() * In the case of ENOSPC, if ext4_count_free_blocks()
* is non-zero, a commit should free up blocks. * is non-zero, a commit should free up blocks.
*/ */
if ((err == -ENOMEM) || if ((err == -ENOMEM) || (err == -EAGAIN) ||
(err == -ENOSPC && ext4_count_free_clusters(sb))) { (err == -ENOSPC && ext4_count_free_clusters(sb))) {
if (progress) /*
* We may have already allocated extents for
* some bhs inside the folio, issue the
* corresponding data to prevent stale data.
*/
if (progress) {
if (mpage_submit_partial_folio(mpd))
goto invalidate_dirty_pages;
goto update_disksize; goto update_disksize;
}
return err; return err;
} }
ext4_msg(sb, KERN_CRIT, ext4_msg(sb, KERN_CRIT,
@ -2444,7 +2521,7 @@ update_disksize:
* Update on-disk size after IO is submitted. Races with * Update on-disk size after IO is submitted. Races with
* truncate are avoided by checking i_size under i_data_sem. * truncate are avoided by checking i_size under i_data_sem.
*/ */
disksize = ((loff_t)mpd->first_page) << PAGE_SHIFT; disksize = mpd->start_pos;
if (disksize > READ_ONCE(EXT4_I(inode)->i_disksize)) { if (disksize > READ_ONCE(EXT4_I(inode)->i_disksize)) {
int err2; int err2;
loff_t i_size; loff_t i_size;
@ -2468,21 +2545,6 @@ update_disksize:
return err; return err;
} }
/*
* Calculate the total number of credits to reserve for one writepages
* iteration. This is called from ext4_writepages(). We map an extent of
* up to MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping
* the last partial page. So in total we can map MAX_WRITEPAGES_EXTENT_LEN +
* bpp - 1 blocks in bpp different extents.
*/
static int ext4_da_writepages_trans_blocks(struct inode *inode)
{
int bpp = ext4_journal_blocks_per_folio(inode);
return ext4_meta_trans_blocks(inode,
MAX_WRITEPAGES_EXTENT_LEN + bpp - 1, bpp);
}
static int ext4_journal_folio_buffers(handle_t *handle, struct folio *folio, static int ext4_journal_folio_buffers(handle_t *handle, struct folio *folio,
size_t len) size_t len)
{ {
@ -2547,8 +2609,8 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
struct address_space *mapping = mpd->inode->i_mapping; struct address_space *mapping = mpd->inode->i_mapping;
struct folio_batch fbatch; struct folio_batch fbatch;
unsigned int nr_folios; unsigned int nr_folios;
pgoff_t index = mpd->first_page; pgoff_t index = mpd->start_pos >> PAGE_SHIFT;
pgoff_t end = mpd->last_page; pgoff_t end = mpd->end_pos >> PAGE_SHIFT;
xa_mark_t tag; xa_mark_t tag;
int i, err = 0; int i, err = 0;
int blkbits = mpd->inode->i_blkbits; int blkbits = mpd->inode->i_blkbits;
@ -2563,7 +2625,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
tag = PAGECACHE_TAG_DIRTY; tag = PAGECACHE_TAG_DIRTY;
mpd->map.m_len = 0; mpd->map.m_len = 0;
mpd->next_page = index; mpd->next_pos = mpd->start_pos;
if (ext4_should_journal_data(mpd->inode)) { if (ext4_should_journal_data(mpd->inode)) {
handle = ext4_journal_start(mpd->inode, EXT4_HT_WRITE_PAGE, handle = ext4_journal_start(mpd->inode, EXT4_HT_WRITE_PAGE,
bpp); bpp);
@ -2594,7 +2656,8 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
goto out; goto out;
/* If we can't merge this page, we are done. */ /* If we can't merge this page, we are done. */
if (mpd->map.m_len > 0 && mpd->next_page != folio->index) if (mpd->map.m_len > 0 &&
mpd->next_pos != folio_pos(folio))
goto out; goto out;
if (handle) { if (handle) {
@ -2640,8 +2703,8 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
} }
if (mpd->map.m_len == 0) if (mpd->map.m_len == 0)
mpd->first_page = folio->index; mpd->start_pos = folio_pos(folio);
mpd->next_page = folio_next_index(folio); mpd->next_pos = folio_pos(folio) + folio_size(folio);
/* /*
* Writeout when we cannot modify metadata is simple. * Writeout when we cannot modify metadata is simple.
* Just submit the page. For data=journal mode we * Just submit the page. For data=journal mode we
@ -2769,12 +2832,12 @@ static int ext4_do_writepages(struct mpage_da_data *mpd)
mpd->journalled_more_data = 0; mpd->journalled_more_data = 0;
if (ext4_should_dioread_nolock(inode)) { if (ext4_should_dioread_nolock(inode)) {
int bpf = ext4_journal_blocks_per_folio(inode);
/* /*
* We may need to convert up to one extent per block in * We may need to convert up to one extent per block in
* the page and we may dirty the inode. * the folio and we may dirty the inode.
*/ */
rsv_blocks = 1 + ext4_chunk_trans_blocks(inode, rsv_blocks = 1 + ext4_ext_index_trans_blocks(inode, bpf);
PAGE_SIZE >> inode->i_blkbits);
} }
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
@ -2784,18 +2847,18 @@ static int ext4_do_writepages(struct mpage_da_data *mpd)
writeback_index = mapping->writeback_index; writeback_index = mapping->writeback_index;
if (writeback_index) if (writeback_index)
cycled = 0; cycled = 0;
mpd->first_page = writeback_index; mpd->start_pos = writeback_index << PAGE_SHIFT;
mpd->last_page = -1; mpd->end_pos = LLONG_MAX;
} else { } else {
mpd->first_page = wbc->range_start >> PAGE_SHIFT; mpd->start_pos = wbc->range_start;
mpd->last_page = wbc->range_end >> PAGE_SHIFT; mpd->end_pos = wbc->range_end;
} }
ext4_io_submit_init(&mpd->io_submit, wbc); ext4_io_submit_init(&mpd->io_submit, wbc);
retry: retry:
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
tag_pages_for_writeback(mapping, mpd->first_page, tag_pages_for_writeback(mapping, mpd->start_pos >> PAGE_SHIFT,
mpd->last_page); mpd->end_pos >> PAGE_SHIFT);
blk_start_plug(&plug); blk_start_plug(&plug);
/* /*
@ -2838,8 +2901,14 @@ retry:
* not supported by delalloc. * not supported by delalloc.
*/ */
BUG_ON(ext4_should_journal_data(inode)); BUG_ON(ext4_should_journal_data(inode));
needed_blocks = ext4_da_writepages_trans_blocks(inode); /*
* Calculate the number of credits needed to reserve for one
* extent of up to MAX_WRITEPAGES_EXTENT_LEN blocks. It will
* attempt to extend the transaction or start a new iteration
* if the reserved credits are insufficient.
*/
needed_blocks = ext4_chunk_trans_blocks(inode,
MAX_WRITEPAGES_EXTENT_LEN);
/* start a new transaction */ /* start a new transaction */
handle = ext4_journal_start_with_reserve(inode, handle = ext4_journal_start_with_reserve(inode,
EXT4_HT_WRITE_PAGE, needed_blocks, rsv_blocks); EXT4_HT_WRITE_PAGE, needed_blocks, rsv_blocks);
@ -2855,7 +2924,8 @@ retry:
} }
mpd->do_map = 1; mpd->do_map = 1;
trace_ext4_da_write_pages(inode, mpd->first_page, wbc); trace_ext4_da_write_folios_start(inode, mpd->start_pos,
mpd->next_pos, wbc);
ret = mpage_prepare_extent_to_map(mpd); ret = mpage_prepare_extent_to_map(mpd);
if (!ret && mpd->map.m_len) if (!ret && mpd->map.m_len)
ret = mpage_map_and_submit_extent(handle, mpd, ret = mpage_map_and_submit_extent(handle, mpd,
@ -2893,6 +2963,8 @@ retry:
} else } else
ext4_put_io_end(mpd->io_submit.io_end); ext4_put_io_end(mpd->io_submit.io_end);
mpd->io_submit.io_end = NULL; mpd->io_submit.io_end = NULL;
trace_ext4_da_write_folios_end(inode, mpd->start_pos,
mpd->next_pos, wbc, ret);
if (ret == -ENOSPC && sbi->s_journal) { if (ret == -ENOSPC && sbi->s_journal) {
/* /*
@ -2904,6 +2976,8 @@ retry:
ret = 0; ret = 0;
continue; continue;
} }
if (ret == -EAGAIN)
ret = 0;
/* Fatal error - ENOMEM, EIO... */ /* Fatal error - ENOMEM, EIO... */
if (ret) if (ret)
break; break;
@ -2912,8 +2986,8 @@ unplug:
blk_finish_plug(&plug); blk_finish_plug(&plug);
if (!ret && !cycled && wbc->nr_to_write > 0) { if (!ret && !cycled && wbc->nr_to_write > 0) {
cycled = 1; cycled = 1;
mpd->last_page = writeback_index - 1; mpd->end_pos = (writeback_index << PAGE_SHIFT) - 1;
mpd->first_page = 0; mpd->start_pos = 0;
goto retry; goto retry;
} }
@ -2923,7 +2997,7 @@ unplug:
* Set the writeback_index so that range_cyclic * Set the writeback_index so that range_cyclic
* mode will write it back later * mode will write it back later
*/ */
mapping->writeback_index = mpd->first_page; mapping->writeback_index = mpd->start_pos >> PAGE_SHIFT;
out_writepages: out_writepages:
trace_ext4_writepages_result(inode, wbc, ret, trace_ext4_writepages_result(inode, wbc, ret,
@ -4384,7 +4458,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
return ret; return ret;
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
credits = ext4_writepage_trans_blocks(inode); credits = ext4_chunk_trans_extent(inode, 2);
else else
credits = ext4_blocks_for_truncate(inode); credits = ext4_blocks_for_truncate(inode);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
@ -4533,7 +4607,7 @@ int ext4_truncate(struct inode *inode)
} }
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
credits = ext4_writepage_trans_blocks(inode); credits = ext4_chunk_trans_extent(inode, 1);
else else
credits = ext4_blocks_for_truncate(inode); credits = ext4_blocks_for_truncate(inode);
@ -5101,7 +5175,7 @@ error:
return -EFSCORRUPTED; return -EFSCORRUPTED;
} }
bool ext4_should_enable_large_folio(struct inode *inode) static bool ext4_should_enable_large_folio(struct inode *inode)
{ {
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
@ -5118,6 +5192,22 @@ bool ext4_should_enable_large_folio(struct inode *inode)
return true; return true;
} }
/*
* Limit the maximum folio order to 2048 blocks to prevent overestimation
* of reserve handle credits during the folio writeback in environments
* where the PAGE_SIZE exceeds 4KB.
*/
#define EXT4_MAX_PAGECACHE_ORDER(i) \
umin(MAX_PAGECACHE_ORDER, (11 + (i)->i_blkbits - PAGE_SHIFT))
void ext4_set_inode_mapping_order(struct inode *inode)
{
if (!ext4_should_enable_large_folio(inode))
return;
mapping_set_folio_order_range(inode->i_mapping, 0,
EXT4_MAX_PAGECACHE_ORDER(inode));
}
struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
ext4_iget_flags flags, const char *function, ext4_iget_flags flags, const char *function,
unsigned int line) unsigned int line)
@ -5435,8 +5525,8 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
ret = -EFSCORRUPTED; ret = -EFSCORRUPTED;
goto bad_inode; goto bad_inode;
} }
if (ext4_should_enable_large_folio(inode))
mapping_set_large_folios(inode->i_mapping); ext4_set_inode_mapping_order(inode);
ret = check_igot_inode(inode, flags, function, line); ret = check_igot_inode(inode, flags, function, line);
/* /*
@ -6134,7 +6224,7 @@ int ext4_meta_trans_blocks(struct inode *inode, int lblocks, int pextents)
int ret; int ret;
/* /*
* How many index and lead blocks need to touch to map @lblocks * How many index and leaf blocks need to touch to map @lblocks
* logical blocks to @pextents physical extents? * logical blocks to @pextents physical extents?
*/ */
idxblocks = ext4_index_trans_blocks(inode, lblocks, pextents); idxblocks = ext4_index_trans_blocks(inode, lblocks, pextents);
@ -6143,7 +6233,7 @@ int ext4_meta_trans_blocks(struct inode *inode, int lblocks, int pextents)
* Now let's see how many group bitmaps and group descriptors need * Now let's see how many group bitmaps and group descriptors need
* to account * to account
*/ */
groups = idxblocks; groups = idxblocks + pextents;
gdpblocks = groups; gdpblocks = groups;
if (groups > ngroups) if (groups > ngroups)
groups = ngroups; groups = ngroups;
@ -6160,25 +6250,19 @@ int ext4_meta_trans_blocks(struct inode *inode, int lblocks, int pextents)
} }
/* /*
* Calculate the total number of credits to reserve to fit * Calculate the journal credits for modifying the number of blocks
* the modification of a single pages into a single transaction, * in a single extent within one transaction. 'nrblocks' is used only
* which may include multiple chunks of block allocations. * for non-extent inodes. For extent type inodes, 'nrblocks' can be
* * zero if the exact number of blocks is unknown.
* This could be called via ext4_write_begin()
*
* We need to consider the worse case, when
* one new block per extent.
*/ */
int ext4_writepage_trans_blocks(struct inode *inode) int ext4_chunk_trans_extent(struct inode *inode, int nrblocks)
{ {
int bpp = ext4_journal_blocks_per_folio(inode);
int ret; int ret;
ret = ext4_meta_trans_blocks(inode, bpp, bpp); ret = ext4_meta_trans_blocks(inode, nrblocks, 1);
/* Account for data blocks for journalled mode */ /* Account for data blocks for journalled mode */
if (ext4_should_journal_data(inode)) if (ext4_should_journal_data(inode))
ret += bpp; ret += nrblocks;
return ret; return ret;
} }
@ -6550,6 +6634,55 @@ static int ext4_bh_unmapped(handle_t *handle, struct inode *inode,
return !buffer_mapped(bh); return !buffer_mapped(bh);
} }
static int ext4_block_page_mkwrite(struct inode *inode, struct folio *folio,
get_block_t get_block)
{
handle_t *handle;
loff_t size;
unsigned long len;
int credits;
int ret;
credits = ext4_chunk_trans_extent(inode,
ext4_journal_blocks_per_folio(inode));
handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, credits);
if (IS_ERR(handle))
return PTR_ERR(handle);
folio_lock(folio);
size = i_size_read(inode);
/* Page got truncated from under us? */
if (folio->mapping != inode->i_mapping || folio_pos(folio) > size) {
ret = -EFAULT;
goto out_error;
}
len = folio_size(folio);
if (folio_pos(folio) + len > size)
len = size - folio_pos(folio);
ret = ext4_block_write_begin(handle, folio, 0, len, get_block);
if (ret)
goto out_error;
if (!ext4_should_journal_data(inode)) {
block_commit_write(folio, 0, len);
folio_mark_dirty(folio);
} else {
ret = ext4_journal_folio_buffers(handle, folio, len);
if (ret)
goto out_error;
}
ext4_journal_stop(handle);
folio_wait_stable(folio);
return ret;
out_error:
folio_unlock(folio);
ext4_journal_stop(handle);
return ret;
}
vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf) vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
{ {
struct vm_area_struct *vma = vmf->vma; struct vm_area_struct *vma = vmf->vma;
@ -6561,8 +6694,7 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
struct file *file = vma->vm_file; struct file *file = vma->vm_file;
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
struct address_space *mapping = inode->i_mapping; struct address_space *mapping = inode->i_mapping;
handle_t *handle; get_block_t *get_block = ext4_get_block;
get_block_t *get_block;
int retries = 0; int retries = 0;
if (unlikely(IS_IMMUTABLE(inode))) if (unlikely(IS_IMMUTABLE(inode)))
@ -6630,47 +6762,11 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
/* OK, we need to fill the hole... */ /* OK, we need to fill the hole... */
if (ext4_should_dioread_nolock(inode)) if (ext4_should_dioread_nolock(inode))
get_block = ext4_get_block_unwritten; get_block = ext4_get_block_unwritten;
else
get_block = ext4_get_block;
retry_alloc: retry_alloc:
handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, /* Start journal and allocate blocks */
ext4_writepage_trans_blocks(inode)); err = ext4_block_page_mkwrite(inode, folio, get_block);
if (IS_ERR(handle)) { if (err == -EAGAIN ||
ret = VM_FAULT_SIGBUS; (err == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)))
goto out;
}
/*
* Data journalling can't use block_page_mkwrite() because it
* will set_buffer_dirty() before do_journal_get_write_access()
* thus might hit warning messages for dirty metadata buffers.
*/
if (!ext4_should_journal_data(inode)) {
err = block_page_mkwrite(vma, vmf, get_block);
} else {
folio_lock(folio);
size = i_size_read(inode);
/* Page got truncated from under us? */
if (folio->mapping != mapping || folio_pos(folio) > size) {
ret = VM_FAULT_NOPAGE;
goto out_error;
}
len = folio_size(folio);
if (folio_pos(folio) + len > size)
len = size - folio_pos(folio);
err = ext4_block_write_begin(handle, folio, 0, len,
ext4_get_block);
if (!err) {
ret = VM_FAULT_SIGBUS;
if (ext4_journal_folio_buffers(handle, folio, len))
goto out_error;
} else {
folio_unlock(folio);
}
}
ext4_journal_stop(handle);
if (err == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry_alloc; goto retry_alloc;
out_ret: out_ret:
ret = vmf_fs_error(err); ret = vmf_fs_error(err);
@ -6678,8 +6774,4 @@ out:
filemap_invalidate_unlock_shared(mapping); filemap_invalidate_unlock_shared(mapping);
sb_end_pagefault(inode->i_sb); sb_end_pagefault(inode->i_sb);
return ret; return ret;
out_error:
folio_unlock(folio);
ext4_journal_stop(handle);
goto out;
} }

View File

@ -155,6 +155,7 @@ static struct super_block *mbt_ext4_alloc_super_block(void)
bgl_lock_init(sbi->s_blockgroup_lock); bgl_lock_init(sbi->s_blockgroup_lock);
sbi->s_es = &fsb->es; sbi->s_es = &fsb->es;
sbi->s_sb = sb;
sb->s_fs_info = sbi; sb->s_fs_info = sbi;
up_write(&sb->s_umount); up_write(&sb->s_umount);
@ -802,6 +803,8 @@ static void test_mb_mark_used(struct kunit *test)
KUNIT_ASSERT_EQ(test, ret, 0); KUNIT_ASSERT_EQ(test, ret, 0);
grp->bb_free = EXT4_CLUSTERS_PER_GROUP(sb); grp->bb_free = EXT4_CLUSTERS_PER_GROUP(sb);
grp->bb_largest_free_order = -1;
grp->bb_avg_fragment_size_order = -1;
mbt_generate_test_ranges(sb, ranges, TEST_RANGE_COUNT); mbt_generate_test_ranges(sb, ranges, TEST_RANGE_COUNT);
for (i = 0; i < TEST_RANGE_COUNT; i++) for (i = 0; i < TEST_RANGE_COUNT; i++)
test_mb_mark_used_range(test, &e4b, ranges[i].start, test_mb_mark_used_range(test, &e4b, ranges[i].start,
@ -875,6 +878,8 @@ static void test_mb_free_blocks(struct kunit *test)
ext4_unlock_group(sb, TEST_GOAL_GROUP); ext4_unlock_group(sb, TEST_GOAL_GROUP);
grp->bb_free = 0; grp->bb_free = 0;
grp->bb_largest_free_order = -1;
grp->bb_avg_fragment_size_order = -1;
memset(bitmap, 0xff, sb->s_blocksize); memset(bitmap, 0xff, sb->s_blocksize);
mbt_generate_test_ranges(sb, ranges, TEST_RANGE_COUNT); mbt_generate_test_ranges(sb, ranges, TEST_RANGE_COUNT);

File diff suppressed because it is too large Load Diff

View File

@ -192,8 +192,13 @@ struct ext4_allocation_context {
*/ */
ext4_grpblk_t ac_orig_goal_len; ext4_grpblk_t ac_orig_goal_len;
ext4_group_t ac_prefetch_grp;
unsigned int ac_prefetch_ios;
unsigned int ac_prefetch_nr;
int ac_first_err;
__u32 ac_flags; /* allocation hints */ __u32 ac_flags; /* allocation hints */
__u32 ac_groups_linear_remaining;
__u16 ac_groups_scanned; __u16 ac_groups_scanned;
__u16 ac_found; __u16 ac_found;
__u16 ac_cX_found[EXT4_MB_NUM_CRS]; __u16 ac_cX_found[EXT4_MB_NUM_CRS];
@ -204,6 +209,8 @@ struct ext4_allocation_context {
__u8 ac_2order; /* if request is to allocate 2^N blocks and __u8 ac_2order; /* if request is to allocate 2^N blocks and
* N > 0, the field stores N, otherwise 0 */ * N > 0, the field stores N, otherwise 0 */
__u8 ac_op; /* operation, for history only */ __u8 ac_op; /* operation, for history only */
struct ext4_buddy *ac_e4b;
struct folio *ac_bitmap_folio; struct folio *ac_bitmap_folio;
struct folio *ac_buddy_folio; struct folio *ac_buddy_folio;
struct ext4_prealloc_space *ac_pa; struct ext4_prealloc_space *ac_pa;

View File

@ -280,7 +280,8 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
*/ */
again: again:
*err = 0; *err = 0;
jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; jblocks = ext4_meta_trans_blocks(orig_inode, block_len_in_page,
block_len_in_page) * 2;
handle = ext4_journal_start(orig_inode, EXT4_HT_MOVE_EXTENTS, jblocks); handle = ext4_journal_start(orig_inode, EXT4_HT_MOVE_EXTENTS, jblocks);
if (IS_ERR(handle)) { if (IS_ERR(handle)) {
*err = PTR_ERR(handle); *err = PTR_ERR(handle);

View File

@ -2915,33 +2915,50 @@ err_unlock_inode:
return err; return err;
} }
struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode, int ext4_init_dirblock(handle_t *handle, struct inode *inode,
struct ext4_dir_entry_2 *de, struct buffer_head *bh, unsigned int parent_ino,
int blocksize, int csum_size, void *inline_buf, int inline_size)
unsigned int parent_ino, int dotdot_real_len)
{ {
struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) bh->b_data;
size_t blocksize = bh->b_size;
int csum_size = 0, header_size;
if (ext4_has_feature_metadata_csum(inode->i_sb))
csum_size = sizeof(struct ext4_dir_entry_tail);
de->inode = cpu_to_le32(inode->i_ino); de->inode = cpu_to_le32(inode->i_ino);
de->name_len = 1; de->name_len = 1;
de->rec_len = ext4_rec_len_to_disk(ext4_dir_rec_len(de->name_len, NULL), de->rec_len = ext4_rec_len_to_disk(ext4_dir_rec_len(de->name_len, NULL),
blocksize); blocksize);
strcpy(de->name, "."); memcpy(de->name, ".", 2);
ext4_set_de_type(inode->i_sb, de, S_IFDIR); ext4_set_de_type(inode->i_sb, de, S_IFDIR);
de = ext4_next_entry(de, blocksize); de = ext4_next_entry(de, blocksize);
de->inode = cpu_to_le32(parent_ino); de->inode = cpu_to_le32(parent_ino);
de->name_len = 2; de->name_len = 2;
if (!dotdot_real_len) memcpy(de->name, "..", 3);
de->rec_len = ext4_rec_len_to_disk(blocksize - ext4_set_de_type(inode->i_sb, de, S_IFDIR);
(csum_size + ext4_dir_rec_len(1, NULL)), if (inline_buf) {
blocksize);
else
de->rec_len = ext4_rec_len_to_disk( de->rec_len = ext4_rec_len_to_disk(
ext4_dir_rec_len(de->name_len, NULL), ext4_dir_rec_len(de->name_len, NULL),
blocksize); blocksize);
strcpy(de->name, ".."); de = ext4_next_entry(de, blocksize);
ext4_set_de_type(inode->i_sb, de, S_IFDIR); header_size = (char *)de - bh->b_data;
memcpy((void *)de, inline_buf, inline_size);
ext4_update_final_de(bh->b_data, inline_size + header_size,
blocksize - csum_size);
} else {
de->rec_len = ext4_rec_len_to_disk(blocksize -
(csum_size + ext4_dir_rec_len(1, NULL)),
blocksize);
}
return ext4_next_entry(de, blocksize); if (csum_size)
ext4_initialize_dirent_tail(bh, blocksize);
BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
set_buffer_uptodate(bh);
set_buffer_verified(bh);
return ext4_handle_dirty_dirblock(handle, inode, bh);
} }
int ext4_init_new_dir(handle_t *handle, struct inode *dir, int ext4_init_new_dir(handle_t *handle, struct inode *dir,
@ -2950,13 +2967,8 @@ int ext4_init_new_dir(handle_t *handle, struct inode *dir,
struct buffer_head *dir_block = NULL; struct buffer_head *dir_block = NULL;
struct ext4_dir_entry_2 *de; struct ext4_dir_entry_2 *de;
ext4_lblk_t block = 0; ext4_lblk_t block = 0;
unsigned int blocksize = dir->i_sb->s_blocksize;
int csum_size = 0;
int err; int err;
if (ext4_has_feature_metadata_csum(dir->i_sb))
csum_size = sizeof(struct ext4_dir_entry_tail);
if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
err = ext4_try_create_inline_dir(handle, dir, inode); err = ext4_try_create_inline_dir(handle, dir, inode);
if (err < 0 && err != -ENOSPC) if (err < 0 && err != -ENOSPC)
@ -2965,21 +2977,15 @@ int ext4_init_new_dir(handle_t *handle, struct inode *dir,
goto out; goto out;
} }
set_nlink(inode, 2);
inode->i_size = 0; inode->i_size = 0;
dir_block = ext4_append(handle, inode, &block); dir_block = ext4_append(handle, inode, &block);
if (IS_ERR(dir_block)) if (IS_ERR(dir_block))
return PTR_ERR(dir_block); return PTR_ERR(dir_block);
de = (struct ext4_dir_entry_2 *)dir_block->b_data; de = (struct ext4_dir_entry_2 *)dir_block->b_data;
ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0); err = ext4_init_dirblock(handle, inode, dir_block, dir->i_ino, NULL, 0);
set_nlink(inode, 2);
if (csum_size)
ext4_initialize_dirent_tail(dir_block, blocksize);
BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
err = ext4_handle_dirty_dirblock(handle, inode, dir_block);
if (err) if (err)
goto out; goto out;
set_buffer_verified(dir_block);
out: out:
brelse(dir_block); brelse(dir_block);
return err; return err;
@ -3082,7 +3088,8 @@ bool ext4_empty_dir(struct inode *inode)
de = (struct ext4_dir_entry_2 *) bh->b_data; de = (struct ext4_dir_entry_2 *) bh->b_data;
if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size, if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size,
0) || 0) ||
le32_to_cpu(de->inode) != inode->i_ino || strcmp(".", de->name)) { le32_to_cpu(de->inode) != inode->i_ino || de->name_len != 1 ||
de->name[0] != '.') {
ext4_warning_inode(inode, "directory missing '.'"); ext4_warning_inode(inode, "directory missing '.'");
brelse(bh); brelse(bh);
return false; return false;
@ -3091,7 +3098,8 @@ bool ext4_empty_dir(struct inode *inode)
de = ext4_next_entry(de, sb->s_blocksize); de = ext4_next_entry(de, sb->s_blocksize);
if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size, if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size,
offset) || offset) ||
le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) { le32_to_cpu(de->inode) == 0 || de->name_len != 2 ||
de->name[0] != '.' || de->name[1] != '.') {
ext4_warning_inode(inode, "directory missing '..'"); ext4_warning_inode(inode, "directory missing '..'");
brelse(bh); brelse(bh);
return false; return false;
@ -3532,7 +3540,7 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data,
bh->b_size, 0) || bh->b_size, 0) ||
le32_to_cpu(de->inode) != inode->i_ino || le32_to_cpu(de->inode) != inode->i_ino ||
strcmp(".", de->name)) { de->name_len != 1 || de->name[0] != '.') {
EXT4_ERROR_INODE(inode, "directory missing '.'"); EXT4_ERROR_INODE(inode, "directory missing '.'");
brelse(bh); brelse(bh);
*retval = -EFSCORRUPTED; *retval = -EFSCORRUPTED;
@ -3543,7 +3551,8 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
de = ext4_next_entry(de, inode->i_sb->s_blocksize); de = ext4_next_entry(de, inode->i_sb->s_blocksize);
if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data,
bh->b_size, offset) || bh->b_size, offset) ||
le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) { le32_to_cpu(de->inode) == 0 || de->name_len != 2 ||
de->name[0] != '.' || de->name[1] != '.') {
EXT4_ERROR_INODE(inode, "directory missing '..'"); EXT4_ERROR_INODE(inode, "directory missing '..'");
brelse(bh); brelse(bh);
*retval = -EFSCORRUPTED; *retval = -EFSCORRUPTED;

View File

@ -236,10 +236,12 @@ static void dump_completed_IO(struct inode *inode, struct list_head *head)
static bool ext4_io_end_defer_completion(ext4_io_end_t *io_end) static bool ext4_io_end_defer_completion(ext4_io_end_t *io_end)
{ {
if (io_end->flag & EXT4_IO_END_UNWRITTEN) if (io_end->flag & EXT4_IO_END_UNWRITTEN &&
!list_empty(&io_end->list_vec))
return true; return true;
if (test_opt(io_end->inode->i_sb, DATA_ERR_ABORT) && if (test_opt(io_end->inode->i_sb, DATA_ERR_ABORT) &&
io_end->flag & EXT4_IO_END_FAILED) io_end->flag & EXT4_IO_END_FAILED &&
!ext4_emergency_state(io_end->inode->i_sb))
return true; return true;
return false; return false;
} }
@ -256,6 +258,7 @@ static void ext4_add_complete_io(ext4_io_end_t *io_end)
WARN_ON(!(io_end->flag & EXT4_IO_END_DEFER_COMPLETION)); WARN_ON(!(io_end->flag & EXT4_IO_END_DEFER_COMPLETION));
WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN && WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN &&
!io_end->handle && sbi->s_journal); !io_end->handle && sbi->s_journal);
WARN_ON(!io_end->bio);
spin_lock_irqsave(&ei->i_completed_io_lock, flags); spin_lock_irqsave(&ei->i_completed_io_lock, flags);
wq = sbi->rsv_conversion_wq; wq = sbi->rsv_conversion_wq;
@ -318,12 +321,9 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
void ext4_put_io_end_defer(ext4_io_end_t *io_end) void ext4_put_io_end_defer(ext4_io_end_t *io_end)
{ {
if (refcount_dec_and_test(&io_end->count)) { if (refcount_dec_and_test(&io_end->count)) {
if (io_end->flag & EXT4_IO_END_FAILED || if (ext4_io_end_defer_completion(io_end))
(io_end->flag & EXT4_IO_END_UNWRITTEN && return ext4_add_complete_io(io_end);
!list_empty(&io_end->list_vec))) {
ext4_add_complete_io(io_end);
return;
}
ext4_release_io_end(io_end); ext4_release_io_end(io_end);
} }
} }

View File

@ -338,7 +338,7 @@ xattr_find_entry(struct inode *inode, struct ext4_xattr_entry **pentry,
cmp = name_len - entry->e_name_len; cmp = name_len - entry->e_name_len;
if (!cmp) if (!cmp)
cmp = memcmp(name, entry->e_name, name_len); cmp = memcmp(name, entry->e_name, name_len);
if (cmp <= 0 && (sorted || cmp == 0)) if (!cmp || (cmp < 0 && sorted))
break; break;
} }
*pentry = entry; *pentry = entry;
@ -962,7 +962,7 @@ int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode,
* so we need to reserve credits for this eventuality * so we need to reserve credits for this eventuality
*/ */
if (inode && ext4_has_inline_data(inode)) if (inode && ext4_has_inline_data(inode))
credits += ext4_writepage_trans_blocks(inode) + 1; credits += ext4_chunk_trans_extent(inode, 1) + 1;
/* We are done if ea_inode feature is not enabled. */ /* We are done if ea_inode feature is not enabled. */
if (!ext4_has_feature_ea_inode(sb)) if (!ext4_has_feature_ea_inode(sb))

View File

@ -23,10 +23,7 @@ struct partial_cluster;
#define show_mballoc_flags(flags) __print_flags(flags, "|", \ #define show_mballoc_flags(flags) __print_flags(flags, "|", \
{ EXT4_MB_HINT_MERGE, "HINT_MERGE" }, \ { EXT4_MB_HINT_MERGE, "HINT_MERGE" }, \
{ EXT4_MB_HINT_RESERVED, "HINT_RESV" }, \
{ EXT4_MB_HINT_METADATA, "HINT_MDATA" }, \
{ EXT4_MB_HINT_FIRST, "HINT_FIRST" }, \ { EXT4_MB_HINT_FIRST, "HINT_FIRST" }, \
{ EXT4_MB_HINT_BEST, "HINT_BEST" }, \
{ EXT4_MB_HINT_DATA, "HINT_DATA" }, \ { EXT4_MB_HINT_DATA, "HINT_DATA" }, \
{ EXT4_MB_HINT_NOPREALLOC, "HINT_NOPREALLOC" }, \ { EXT4_MB_HINT_NOPREALLOC, "HINT_NOPREALLOC" }, \
{ EXT4_MB_HINT_GROUP_ALLOC, "HINT_GRP_ALLOC" }, \ { EXT4_MB_HINT_GROUP_ALLOC, "HINT_GRP_ALLOC" }, \
@ -483,16 +480,17 @@ TRACE_EVENT(ext4_writepages,
(unsigned long) __entry->writeback_index) (unsigned long) __entry->writeback_index)
); );
TRACE_EVENT(ext4_da_write_pages, TRACE_EVENT(ext4_da_write_folios_start,
TP_PROTO(struct inode *inode, pgoff_t first_page, TP_PROTO(struct inode *inode, loff_t start_pos, loff_t next_pos,
struct writeback_control *wbc), struct writeback_control *wbc),
TP_ARGS(inode, first_page, wbc), TP_ARGS(inode, start_pos, next_pos, wbc),
TP_STRUCT__entry( TP_STRUCT__entry(
__field( dev_t, dev ) __field( dev_t, dev )
__field( ino_t, ino ) __field( ino_t, ino )
__field( pgoff_t, first_page ) __field( loff_t, start_pos )
__field( loff_t, next_pos )
__field( long, nr_to_write ) __field( long, nr_to_write )
__field( int, sync_mode ) __field( int, sync_mode )
), ),
@ -500,18 +498,48 @@ TRACE_EVENT(ext4_da_write_pages,
TP_fast_assign( TP_fast_assign(
__entry->dev = inode->i_sb->s_dev; __entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino; __entry->ino = inode->i_ino;
__entry->first_page = first_page; __entry->start_pos = start_pos;
__entry->next_pos = next_pos;
__entry->nr_to_write = wbc->nr_to_write; __entry->nr_to_write = wbc->nr_to_write;
__entry->sync_mode = wbc->sync_mode; __entry->sync_mode = wbc->sync_mode;
), ),
TP_printk("dev %d,%d ino %lu first_page %lu nr_to_write %ld " TP_printk("dev %d,%d ino %lu start_pos 0x%llx next_pos 0x%llx nr_to_write %ld sync_mode %d",
"sync_mode %d",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino, __entry->first_page, (unsigned long) __entry->ino, __entry->start_pos, __entry->next_pos,
__entry->nr_to_write, __entry->sync_mode) __entry->nr_to_write, __entry->sync_mode)
); );
TRACE_EVENT(ext4_da_write_folios_end,
TP_PROTO(struct inode *inode, loff_t start_pos, loff_t next_pos,
struct writeback_control *wbc, int ret),
TP_ARGS(inode, start_pos, next_pos, wbc, ret),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( loff_t, start_pos )
__field( loff_t, next_pos )
__field( long, nr_to_write )
__field( int, ret )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->start_pos = start_pos;
__entry->next_pos = next_pos;
__entry->nr_to_write = wbc->nr_to_write;
__entry->ret = ret;
),
TP_printk("dev %d,%d ino %lu start_pos 0x%llx next_pos 0x%llx nr_to_write %ld ret %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino, __entry->start_pos, __entry->next_pos,
__entry->nr_to_write, __entry->ret)
);
TRACE_EVENT(ext4_da_write_pages_extent, TRACE_EVENT(ext4_da_write_pages_extent,
TP_PROTO(struct inode *inode, struct ext4_map_blocks *map), TP_PROTO(struct inode *inode, struct ext4_map_blocks *map),