mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-04 20:19:47 +08:00

acc->k.data should be used with the lock hold: 00221 ========= TEST generic/187 00221 run fstests generic/187 at 2025-02-09 21:08:10 00221 spectre-v4 mitigation disabled by command-line option 00222 bcachefs (vdc): starting version 1.20: directory_size opts=errors=ro 00222 bcachefs (vdc): initializing new filesystem 00222 bcachefs (vdc): going read-write 00222 bcachefs (vdc): marking superblocks 00222 bcachefs (vdc): initializing freespace 00222 bcachefs (vdc): done initializing freespace 00222 bcachefs (vdc): reading snapshots table 00222 bcachefs (vdc): reading snapshots done 00222 bcachefs (vdc): done starting filesystem 00222 bcachefs (vdc): shutting down 00222 bcachefs (vdc): going read-only 00222 bcachefs (vdc): finished waiting for writes to stop 00223 bcachefs (vdc): flushing journal and stopping allocators, journal seq 6 00223 bcachefs (vdc): flushing journal and stopping allocators complete, journal seq 8 00223 bcachefs (vdc): clean shutdown complete, journal seq 9 00223 bcachefs (vdc): marking filesystem clean 00223 bcachefs (vdc): shutdown complete 00223 bcachefs (vdc): starting version 1.20: directory_size opts=errors=ro 00223 bcachefs (vdc): initializing new filesystem 00223 bcachefs (vdc): going read-write 00223 bcachefs (vdc): marking superblocks 00223 bcachefs (vdc): initializing freespace 00223 bcachefs (vdc): done initializing freespace 00223 bcachefs (vdc): reading snapshots table 00223 bcachefs (vdc): reading snapshots done 00223 bcachefs (vdc): done starting filesystem 00244 hrtimer: interrupt took 123350440 ns 00264 bcachefs (vdc): shutting down 00264 bcachefs (vdc): going read-only 00264 bcachefs (vdc): finished waiting for writes to stop 00264 bcachefs (vdc): flushing journal and stopping allocators, journal seq 97 00265 bcachefs (vdc): flushing journal and stopping allocators complete, journal seq 101 00265 bcachefs (vdc): clean shutdown complete, journal seq 102 00265 bcachefs (vdc): marking filesystem clean 00265 bcachefs (vdc): shutdown complete 00265 bcachefs (vdc): starting version 1.20: directory_size opts=errors=ro 00265 bcachefs (vdc): recovering from clean shutdown, journal seq 102 00265 bcachefs (vdc): accounting_read... 00265 ================================================================== 00265 done 00265 BUG: KASAN: slab-use-after-free in bch2_fs_to_text+0x12b4/0x1728 00265 bcachefs (vdc): alloc_read... done 00265 bcachefs (vdc): stripes_read... done 00265 Read of size 4 at addr ffffff80c57eac00 by task cat/7531 00265 bcachefs (vdc): snapshots_read... done 00265 00265 CPU: 6 UID: 0 PID: 7531 Comm: cat Not tainted 6.13.0-rc3-ktest-g16fc6fa3819d #14103 00265 Hardware name: linux,dummy-virt (DT) 00265 Call trace: 00265 show_stack+0x1c/0x30 (C) 00265 dump_stack_lvl+0x6c/0x80 00265 print_report+0xf8/0x5d8 00265 kasan_report+0x90/0xd0 00265 __asan_report_load4_noabort+0x1c/0x28 00265 bch2_fs_to_text+0x12b4/0x1728 00265 bch2_fs_show+0x94/0x188 00265 sysfs_kf_seq_show+0x1a4/0x348 00265 kernfs_seq_show+0x12c/0x198 00265 seq_read_iter+0x27c/0xfd0 00265 kernfs_fop_read_iter+0x390/0x4f8 00265 vfs_read+0x480/0x7f0 00265 ksys_read+0xe0/0x1e8 00265 __arm64_sys_read+0x70/0xa8 00265 invoke_syscall.constprop.0+0x74/0x1e8 00265 do_el0_svc+0xc8/0x1c8 00265 el0_svc+0x20/0x60 00265 el0t_64_sync_handler+0x104/0x130 00265 el0t_64_sync+0x154/0x158 00265 00265 Allocated by task 7510: 00265 kasan_save_stack+0x28/0x50 00265 kasan_save_track+0x1c/0x38 00265 kasan_save_alloc_info+0x3c/0x50 00265 __kasan_kmalloc+0xac/0xb0 00265 __kmalloc_node_noprof+0x168/0x348 00265 __kvmalloc_node_noprof+0x20/0x140 00265 __bch2_darray_resize_noprof+0x90/0x1b0 00265 __bch2_accounting_mem_insert+0x76c/0xb08 00265 bch2_accounting_mem_insert+0x224/0x3b8 00265 bch2_accounting_mem_mod_locked+0x480/0xc58 00265 bch2_accounting_read+0xa94/0x3eb8 00265 bch2_run_recovery_pass+0x80/0x178 00265 bch2_run_recovery_passes+0x340/0x698 00265 bch2_fs_recovery+0x1c98/0x2bd8 00265 bch2_fs_start+0x240/0x490 00265 bch2_fs_get_tree+0xe1c/0x1458 00265 vfs_get_tree+0x7c/0x250 00265 path_mount+0xe24/0x1648 00265 __arm64_sys_mount+0x240/0x438 00265 invoke_syscall.constprop.0+0x74/0x1e8 00265 do_el0_svc+0xc8/0x1c8 00265 el0_svc+0x20/0x60 00265 el0t_64_sync_handler+0x104/0x130 00265 el0t_64_sync+0x154/0x158 00265 00265 Freed by task 7510: 00265 kasan_save_stack+0x28/0x50 00265 kasan_save_track+0x1c/0x38 00265 kasan_save_free_info+0x48/0x88 00265 __kasan_slab_free+0x48/0x60 00265 kfree+0x188/0x408 00265 kvfree+0x3c/0x50 00265 __bch2_darray_resize_noprof+0xe0/0x1b0 00265 __bch2_accounting_mem_insert+0x76c/0xb08 00265 bch2_accounting_mem_insert+0x224/0x3b8 00265 bch2_accounting_mem_mod_locked+0x480/0xc58 00265 bch2_accounting_read+0xa94/0x3eb8 00265 bch2_run_recovery_pass+0x80/0x178 00265 bch2_run_recovery_passes+0x340/0x698 00265 bch2_fs_recovery+0x1c98/0x2bd8 00265 bch2_fs_start+0x240/0x490 00265 bch2_fs_get_tree+0xe1c/0x1458 00265 vfs_get_tree+0x7c/0x250 00265 path_mount+0xe24/0x1648 00265 bcachefs (vdc): going read-write 00265 __arm64_sys_mount+0x240/0x438 00265 invoke_syscall.constprop.0+0x74/0x1e8 00265 do_el0_svc+0xc8/0x1c8 00265 el0_svc+0x20/0x60 00265 el0t_64_sync_handler+0x104/0x130 00265 el0t_64_sync+0x154/0x158 00265 00265 The buggy address belongs to the object at ffffff80c57eac00 00265 which belongs to the cache kmalloc-128 of size 128 00265 The buggy address is located 0 bytes inside of 00265 freed 128-byte region [ffffff80c57eac00, ffffff80c57eac80) 00265 00265 The buggy address belongs to the physical page: 00265 page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1057ea 00265 head: order:1 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0 00265 flags: 0x8000000000000040(head|zone=2) 00265 page_type: f5(slab) 00265 raw: 8000000000000040 ffffff80c0002800 dead000000000100 dead000000000122 00265 raw: 0000000000000000 0000000000200020 00000001f5000000 ffffff80c57a6400 00265 head: 8000000000000040 ffffff80c0002800 dead000000000100 dead000000000122 00265 head: 0000000000000000 0000000000200020 00000001f5000000 ffffff80c57a6400 00265 head: 8000000000000001 fffffffec315fa81 ffffffffffffffff 0000000000000000 00265 head: 0000000000000002 0000000000000000 00000000ffffffff 0000000000000000 00265 page dumped because: kasan: bad access detected 00265 00265 Memory state around the buggy address: 00265 ffffff80c57eab00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00265 ffffff80c57eab80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc 00265 >ffffff80c57eac00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb 00265 ^ 00265 ffffff80c57eac80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc 00265 ffffff80c57ead00: 00 00 00 00 00 00 00 00 00 00 00 00 00 fc fc fc 00265 ================================================================== 00265 Kernel panic - not syncing: kasan.fault=panic set ... 00265 CPU: 6 UID: 0 PID: 7531 Comm: cat Not tainted 6.13.0-rc3-ktest-g16fc6fa3819d #14103 00265 Hardware name: linux,dummy-virt (DT) 00265 Call trace: 00265 show_stack+0x1c/0x30 (C) 00265 dump_stack_lvl+0x30/0x80 00265 dump_stack+0x18/0x20 00265 panic+0x4d4/0x518 00265 start_report.constprop.0+0x0/0x90 00265 kasan_report+0xa0/0xd0 00265 __asan_report_load4_noabort+0x1c/0x28 00265 bch2_fs_to_text+0x12b4/0x1728 00265 bch2_fs_show+0x94/0x188 00265 sysfs_kf_seq_show+0x1a4/0x348 00265 kernfs_seq_show+0x12c/0x198 00265 seq_read_iter+0x27c/0xfd0 00265 kernfs_fop_read_iter+0x390/0x4f8 00265 vfs_read+0x480/0x7f0 00265 ksys_read+0xe0/0x1e8 00265 __arm64_sys_read+0x70/0xa8 00265 invoke_syscall.constprop.0+0x74/0x1e8 00265 do_el0_svc+0xc8/0x1c8 00265 el0_svc+0x20/0x60 00265 el0t_64_sync_handler+0x104/0x130 00265 el0t_64_sync+0x154/0x158 00265 SMP: stopping secondary CPUs 00265 Kernel Offset: disabled 00265 CPU features: 0x000,00000070,00000010,8240500b 00265 Memory Limit: none 00265 ---[ end Kernel panic - not syncing: kasan.fault=panic set ... ]--- 00270 ========= FAILED TIMEOUT generic.187 in 1200s Signed-off-by: Alan Huang <mmpgouride@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
276 lines
7.8 KiB
C
276 lines
7.8 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _BCACHEFS_DISK_ACCOUNTING_H
|
|
#define _BCACHEFS_DISK_ACCOUNTING_H
|
|
|
|
#include "btree_update.h"
|
|
#include "eytzinger.h"
|
|
#include "sb-members.h"
|
|
|
|
static inline void bch2_u64s_neg(u64 *v, unsigned nr)
|
|
{
|
|
for (unsigned i = 0; i < nr; i++)
|
|
v[i] = -v[i];
|
|
}
|
|
|
|
static inline unsigned bch2_accounting_counters(const struct bkey *k)
|
|
{
|
|
return bkey_val_u64s(k) - offsetof(struct bch_accounting, d) / sizeof(u64);
|
|
}
|
|
|
|
static inline void bch2_accounting_neg(struct bkey_s_accounting a)
|
|
{
|
|
bch2_u64s_neg(a.v->d, bch2_accounting_counters(a.k));
|
|
}
|
|
|
|
static inline bool bch2_accounting_key_is_zero(struct bkey_s_c_accounting a)
|
|
{
|
|
for (unsigned i = 0; i < bch2_accounting_counters(a.k); i++)
|
|
if (a.v->d[i])
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
static inline void bch2_accounting_accumulate(struct bkey_i_accounting *dst,
|
|
struct bkey_s_c_accounting src)
|
|
{
|
|
EBUG_ON(dst->k.u64s != src.k->u64s);
|
|
|
|
for (unsigned i = 0; i < bch2_accounting_counters(&dst->k); i++)
|
|
dst->v.d[i] += src.v->d[i];
|
|
if (bversion_cmp(dst->k.bversion, src.k->bversion) < 0)
|
|
dst->k.bversion = src.k->bversion;
|
|
}
|
|
|
|
static inline void fs_usage_data_type_to_base(struct bch_fs_usage_base *fs_usage,
|
|
enum bch_data_type data_type,
|
|
s64 sectors)
|
|
{
|
|
switch (data_type) {
|
|
case BCH_DATA_btree:
|
|
fs_usage->btree += sectors;
|
|
break;
|
|
case BCH_DATA_user:
|
|
case BCH_DATA_parity:
|
|
fs_usage->data += sectors;
|
|
break;
|
|
case BCH_DATA_cached:
|
|
fs_usage->cached += sectors;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
static inline void bpos_to_disk_accounting_pos(struct disk_accounting_pos *acc, struct bpos p)
|
|
{
|
|
BUILD_BUG_ON(sizeof(*acc) != sizeof(p));
|
|
|
|
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
|
acc->_pad = p;
|
|
#else
|
|
memcpy_swab(acc, &p, sizeof(p));
|
|
#endif
|
|
}
|
|
|
|
static inline struct bpos disk_accounting_pos_to_bpos(struct disk_accounting_pos *acc)
|
|
{
|
|
struct bpos p;
|
|
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
|
p = acc->_pad;
|
|
#else
|
|
memcpy_swab(&p, acc, sizeof(p));
|
|
#endif
|
|
return p;
|
|
}
|
|
|
|
int bch2_disk_accounting_mod(struct btree_trans *, struct disk_accounting_pos *,
|
|
s64 *, unsigned, bool);
|
|
int bch2_mod_dev_cached_sectors(struct btree_trans *, unsigned, s64, bool);
|
|
|
|
int bch2_accounting_validate(struct bch_fs *, struct bkey_s_c,
|
|
struct bkey_validate_context);
|
|
void bch2_accounting_key_to_text(struct printbuf *, struct disk_accounting_pos *);
|
|
void bch2_accounting_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
|
void bch2_accounting_swab(struct bkey_s);
|
|
|
|
#define bch2_bkey_ops_accounting ((struct bkey_ops) { \
|
|
.key_validate = bch2_accounting_validate, \
|
|
.val_to_text = bch2_accounting_to_text, \
|
|
.swab = bch2_accounting_swab, \
|
|
.min_val_size = 8, \
|
|
})
|
|
|
|
int bch2_accounting_update_sb(struct btree_trans *);
|
|
|
|
static inline int accounting_pos_cmp(const void *_l, const void *_r)
|
|
{
|
|
const struct bpos *l = _l, *r = _r;
|
|
|
|
return bpos_cmp(*l, *r);
|
|
}
|
|
|
|
enum bch_accounting_mode {
|
|
BCH_ACCOUNTING_normal,
|
|
BCH_ACCOUNTING_gc,
|
|
BCH_ACCOUNTING_read,
|
|
};
|
|
|
|
int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, enum bch_accounting_mode);
|
|
void bch2_accounting_mem_gc(struct bch_fs *);
|
|
|
|
static inline bool bch2_accounting_is_mem(struct disk_accounting_pos acc)
|
|
{
|
|
return acc.type < BCH_DISK_ACCOUNTING_TYPE_NR &&
|
|
acc.type != BCH_DISK_ACCOUNTING_inum;
|
|
}
|
|
|
|
/*
|
|
* Update in memory counters so they match the btree update we're doing; called
|
|
* from transaction commit path
|
|
*/
|
|
static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans,
|
|
struct bkey_s_c_accounting a,
|
|
enum bch_accounting_mode mode)
|
|
{
|
|
struct bch_fs *c = trans->c;
|
|
struct bch_accounting_mem *acc = &c->accounting;
|
|
struct disk_accounting_pos acc_k;
|
|
bpos_to_disk_accounting_pos(&acc_k, a.k->p);
|
|
bool gc = mode == BCH_ACCOUNTING_gc;
|
|
|
|
if (gc && !acc->gc_running)
|
|
return 0;
|
|
|
|
if (!bch2_accounting_is_mem(acc_k))
|
|
return 0;
|
|
|
|
if (mode == BCH_ACCOUNTING_normal) {
|
|
switch (acc_k.type) {
|
|
case BCH_DISK_ACCOUNTING_persistent_reserved:
|
|
trans->fs_usage_delta.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0];
|
|
break;
|
|
case BCH_DISK_ACCOUNTING_replicas:
|
|
fs_usage_data_type_to_base(&trans->fs_usage_delta, acc_k.replicas.data_type, a.v->d[0]);
|
|
break;
|
|
case BCH_DISK_ACCOUNTING_dev_data_type:
|
|
rcu_read_lock();
|
|
struct bch_dev *ca = bch2_dev_rcu_noerror(c, acc_k.dev_data_type.dev);
|
|
if (ca) {
|
|
this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].buckets, a.v->d[0]);
|
|
this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].sectors, a.v->d[1]);
|
|
this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].fragmented, a.v->d[2]);
|
|
}
|
|
rcu_read_unlock();
|
|
break;
|
|
}
|
|
}
|
|
|
|
unsigned idx;
|
|
|
|
while ((idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
|
|
accounting_pos_cmp, &a.k->p)) >= acc->k.nr) {
|
|
int ret = bch2_accounting_mem_insert(c, a, mode);
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
|
|
struct accounting_mem_entry *e = &acc->k.data[idx];
|
|
|
|
EBUG_ON(bch2_accounting_counters(a.k) != e->nr_counters);
|
|
|
|
for (unsigned i = 0; i < bch2_accounting_counters(a.k); i++)
|
|
this_cpu_add(e->v[gc][i], a.v->d[i]);
|
|
return 0;
|
|
}
|
|
|
|
static inline int bch2_accounting_mem_add(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc)
|
|
{
|
|
percpu_down_read(&trans->c->mark_lock);
|
|
int ret = bch2_accounting_mem_mod_locked(trans, a, gc ? BCH_ACCOUNTING_gc : BCH_ACCOUNTING_normal);
|
|
percpu_up_read(&trans->c->mark_lock);
|
|
return ret;
|
|
}
|
|
|
|
static inline void bch2_accounting_mem_read_counters(struct bch_accounting_mem *acc,
|
|
unsigned idx, u64 *v, unsigned nr, bool gc)
|
|
{
|
|
memset(v, 0, sizeof(*v) * nr);
|
|
|
|
if (unlikely(idx >= acc->k.nr))
|
|
return;
|
|
|
|
struct accounting_mem_entry *e = &acc->k.data[idx];
|
|
|
|
nr = min_t(unsigned, nr, e->nr_counters);
|
|
|
|
for (unsigned i = 0; i < nr; i++)
|
|
v[i] = percpu_u64_get(e->v[gc] + i);
|
|
}
|
|
|
|
static inline void bch2_accounting_mem_read(struct bch_fs *c, struct bpos p,
|
|
u64 *v, unsigned nr)
|
|
{
|
|
percpu_down_read(&c->mark_lock);
|
|
struct bch_accounting_mem *acc = &c->accounting;
|
|
unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
|
|
accounting_pos_cmp, &p);
|
|
|
|
bch2_accounting_mem_read_counters(acc, idx, v, nr, false);
|
|
percpu_up_read(&c->mark_lock);
|
|
}
|
|
|
|
static inline struct bversion journal_pos_to_bversion(struct journal_res *res, unsigned offset)
|
|
{
|
|
EBUG_ON(!res->ref);
|
|
|
|
return (struct bversion) {
|
|
.hi = res->seq >> 32,
|
|
.lo = (res->seq << 32) | (res->offset + offset),
|
|
};
|
|
}
|
|
|
|
static inline int bch2_accounting_trans_commit_hook(struct btree_trans *trans,
|
|
struct bkey_i_accounting *a,
|
|
unsigned commit_flags)
|
|
{
|
|
a->k.bversion = journal_pos_to_bversion(&trans->journal_res,
|
|
(u64 *) a - (u64 *) trans->journal_entries);
|
|
|
|
EBUG_ON(bversion_zero(a->k.bversion));
|
|
|
|
return likely(!(commit_flags & BCH_TRANS_COMMIT_skip_accounting_apply))
|
|
? bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal)
|
|
: 0;
|
|
}
|
|
|
|
static inline void bch2_accounting_trans_commit_revert(struct btree_trans *trans,
|
|
struct bkey_i_accounting *a_i,
|
|
unsigned commit_flags)
|
|
{
|
|
if (likely(!(commit_flags & BCH_TRANS_COMMIT_skip_accounting_apply))) {
|
|
struct bkey_s_accounting a = accounting_i_to_s(a_i);
|
|
|
|
bch2_accounting_neg(a);
|
|
bch2_accounting_mem_mod_locked(trans, a.c, BCH_ACCOUNTING_normal);
|
|
bch2_accounting_neg(a);
|
|
}
|
|
}
|
|
|
|
int bch2_fs_replicas_usage_read(struct bch_fs *, darray_char *);
|
|
int bch2_fs_accounting_read(struct bch_fs *, darray_char *, unsigned);
|
|
|
|
int bch2_gc_accounting_start(struct bch_fs *);
|
|
int bch2_gc_accounting_done(struct bch_fs *);
|
|
|
|
int bch2_accounting_read(struct bch_fs *);
|
|
|
|
int bch2_dev_usage_remove(struct bch_fs *, unsigned);
|
|
int bch2_dev_usage_init(struct bch_dev *, bool);
|
|
|
|
void bch2_verify_accounting_clean(struct bch_fs *c);
|
|
|
|
void bch2_accounting_gc_free(struct bch_fs *);
|
|
void bch2_fs_accounting_exit(struct bch_fs *);
|
|
|
|
#endif /* _BCACHEFS_DISK_ACCOUNTING_H */
|