mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-04 20:19:47 +08:00

We periodically check the available rt blocks when filling up zones
and start GC if needed, but we may run completely out in between
filling zones, so start GC(unless already running) if we can't reserve
writable space.
This should only happen as a corner case in setups with very few
backing zones.
Fixes: 080d01c41d
("xfs: implement zoned garbage collection")
Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
264 lines
7.1 KiB
C
264 lines
7.1 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (c) 2023-2025 Christoph Hellwig.
|
|
* Copyright (c) 2024-2025, Western Digital Corporation or its affiliates.
|
|
*/
|
|
#include "xfs.h"
|
|
#include "xfs_shared.h"
|
|
#include "xfs_format.h"
|
|
#include "xfs_trans_resv.h"
|
|
#include "xfs_mount.h"
|
|
#include "xfs_inode.h"
|
|
#include "xfs_rtbitmap.h"
|
|
#include "xfs_zone_alloc.h"
|
|
#include "xfs_zone_priv.h"
|
|
#include "xfs_zones.h"
|
|
|
|
/*
|
|
* Note: the zoned allocator does not support a rtextsize > 1, so this code and
|
|
* the allocator itself uses file system blocks interchangeable with realtime
|
|
* extents without doing the otherwise required conversions.
|
|
*/
|
|
|
|
/*
|
|
* Per-task space reservation.
|
|
*
|
|
* Tasks that need to wait for GC to free up space allocate one of these
|
|
* on-stack and adds it to the per-mount zi_reclaim_reservations lists.
|
|
* The GC thread will then wake the tasks in order when space becomes available.
|
|
*/
|
|
struct xfs_zone_reservation {
|
|
struct list_head entry;
|
|
struct task_struct *task;
|
|
xfs_filblks_t count_fsb;
|
|
};
|
|
|
|
/*
|
|
* Calculate the number of reserved blocks.
|
|
*
|
|
* XC_FREE_RTEXTENTS counts the user available capacity, to which the file
|
|
* system can be filled, while XC_FREE_RTAVAILABLE counts the blocks instantly
|
|
* available for writes without waiting for GC.
|
|
*
|
|
* For XC_FREE_RTAVAILABLE only the smaller reservation required for GC and
|
|
* block zeroing is excluded from the user capacity, while XC_FREE_RTEXTENTS
|
|
* is further restricted by at least one zone as well as the optional
|
|
* persistently reserved blocks. This allows the allocator to run more
|
|
* smoothly by not always triggering GC.
|
|
*/
|
|
uint64_t
|
|
xfs_zoned_default_resblks(
|
|
struct xfs_mount *mp,
|
|
enum xfs_free_counter ctr)
|
|
{
|
|
switch (ctr) {
|
|
case XC_FREE_RTEXTENTS:
|
|
return (uint64_t)XFS_RESERVED_ZONES *
|
|
mp->m_groups[XG_TYPE_RTG].blocks +
|
|
mp->m_sb.sb_rtreserved;
|
|
case XC_FREE_RTAVAILABLE:
|
|
return (uint64_t)XFS_GC_ZONES *
|
|
mp->m_groups[XG_TYPE_RTG].blocks;
|
|
default:
|
|
ASSERT(0);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
void
|
|
xfs_zoned_resv_wake_all(
|
|
struct xfs_mount *mp)
|
|
{
|
|
struct xfs_zone_info *zi = mp->m_zone_info;
|
|
struct xfs_zone_reservation *reservation;
|
|
|
|
spin_lock(&zi->zi_reservation_lock);
|
|
list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry)
|
|
wake_up_process(reservation->task);
|
|
spin_unlock(&zi->zi_reservation_lock);
|
|
}
|
|
|
|
void
|
|
xfs_zoned_add_available(
|
|
struct xfs_mount *mp,
|
|
xfs_filblks_t count_fsb)
|
|
{
|
|
struct xfs_zone_info *zi = mp->m_zone_info;
|
|
struct xfs_zone_reservation *reservation;
|
|
|
|
if (list_empty_careful(&zi->zi_reclaim_reservations)) {
|
|
xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb);
|
|
return;
|
|
}
|
|
|
|
spin_lock(&zi->zi_reservation_lock);
|
|
xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb);
|
|
count_fsb = xfs_sum_freecounter(mp, XC_FREE_RTAVAILABLE);
|
|
list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry) {
|
|
if (reservation->count_fsb > count_fsb)
|
|
break;
|
|
wake_up_process(reservation->task);
|
|
count_fsb -= reservation->count_fsb;
|
|
|
|
}
|
|
spin_unlock(&zi->zi_reservation_lock);
|
|
}
|
|
|
|
static int
|
|
xfs_zoned_space_wait_error(
|
|
struct xfs_mount *mp)
|
|
{
|
|
if (xfs_is_shutdown(mp))
|
|
return -EIO;
|
|
if (fatal_signal_pending(current))
|
|
return -EINTR;
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
xfs_zoned_reserve_available(
|
|
struct xfs_inode *ip,
|
|
xfs_filblks_t count_fsb,
|
|
unsigned int flags)
|
|
{
|
|
struct xfs_mount *mp = ip->i_mount;
|
|
struct xfs_zone_info *zi = mp->m_zone_info;
|
|
struct xfs_zone_reservation reservation = {
|
|
.task = current,
|
|
.count_fsb = count_fsb,
|
|
};
|
|
int error;
|
|
|
|
/*
|
|
* If there are no waiters, try to directly grab the available blocks
|
|
* from the percpu counter.
|
|
*
|
|
* If the caller wants to dip into the reserved pool also bypass the
|
|
* wait list. This relies on the fact that we have a very graciously
|
|
* sized reserved pool that always has enough space. If the reserved
|
|
* allocations fail we're in trouble.
|
|
*/
|
|
if (likely(list_empty_careful(&zi->zi_reclaim_reservations) ||
|
|
(flags & XFS_ZR_RESERVED))) {
|
|
error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb,
|
|
flags & XFS_ZR_RESERVED);
|
|
if (error != -ENOSPC)
|
|
return error;
|
|
}
|
|
|
|
if (flags & XFS_ZR_NOWAIT)
|
|
return -EAGAIN;
|
|
|
|
spin_lock(&zi->zi_reservation_lock);
|
|
list_add_tail(&reservation.entry, &zi->zi_reclaim_reservations);
|
|
while ((error = xfs_zoned_space_wait_error(mp)) == 0) {
|
|
set_current_state(TASK_KILLABLE);
|
|
|
|
error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb,
|
|
flags & XFS_ZR_RESERVED);
|
|
if (error != -ENOSPC)
|
|
break;
|
|
|
|
/*
|
|
* Make sure to start GC if it is not running already. As we
|
|
* check the rtavailable count when filling up zones, GC is
|
|
* normally already running at this point, but in some setups
|
|
* with very few zones we may completely run out of non-
|
|
* reserved blocks in between filling zones.
|
|
*/
|
|
if (!xfs_is_zonegc_running(mp))
|
|
wake_up_process(zi->zi_gc_thread);
|
|
|
|
/*
|
|
* If there is no reclaimable group left and we aren't still
|
|
* processing a pending GC request give up as we're fully out
|
|
* of space.
|
|
*/
|
|
if (!xfs_group_marked(mp, XG_TYPE_RTG, XFS_RTG_RECLAIMABLE) &&
|
|
!xfs_is_zonegc_running(mp))
|
|
break;
|
|
|
|
spin_unlock(&zi->zi_reservation_lock);
|
|
schedule();
|
|
spin_lock(&zi->zi_reservation_lock);
|
|
}
|
|
list_del(&reservation.entry);
|
|
spin_unlock(&zi->zi_reservation_lock);
|
|
|
|
__set_current_state(TASK_RUNNING);
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Implement greedy space allocation for short writes by trying to grab all
|
|
* that is left after locking out other threads from trying to do the same.
|
|
*
|
|
* This isn't exactly optimal and can hopefully be replaced by a proper
|
|
* percpu_counter primitive one day.
|
|
*/
|
|
static int
|
|
xfs_zoned_reserve_extents_greedy(
|
|
struct xfs_inode *ip,
|
|
xfs_filblks_t *count_fsb,
|
|
unsigned int flags)
|
|
{
|
|
struct xfs_mount *mp = ip->i_mount;
|
|
struct xfs_zone_info *zi = mp->m_zone_info;
|
|
s64 len = *count_fsb;
|
|
int error = -ENOSPC;
|
|
|
|
spin_lock(&zi->zi_reservation_lock);
|
|
len = min(len, xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS));
|
|
if (len > 0) {
|
|
*count_fsb = len;
|
|
error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, *count_fsb,
|
|
flags & XFS_ZR_RESERVED);
|
|
}
|
|
spin_unlock(&zi->zi_reservation_lock);
|
|
return error;
|
|
}
|
|
|
|
int
|
|
xfs_zoned_space_reserve(
|
|
struct xfs_inode *ip,
|
|
xfs_filblks_t count_fsb,
|
|
unsigned int flags,
|
|
struct xfs_zone_alloc_ctx *ac)
|
|
{
|
|
struct xfs_mount *mp = ip->i_mount;
|
|
int error;
|
|
|
|
ASSERT(ac->reserved_blocks == 0);
|
|
ASSERT(ac->open_zone == NULL);
|
|
|
|
error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb,
|
|
flags & XFS_ZR_RESERVED);
|
|
if (error == -ENOSPC && (flags & XFS_ZR_GREEDY) && count_fsb > 1)
|
|
error = xfs_zoned_reserve_extents_greedy(ip, &count_fsb, flags);
|
|
if (error)
|
|
return error;
|
|
|
|
error = xfs_zoned_reserve_available(ip, count_fsb, flags);
|
|
if (error) {
|
|
xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb);
|
|
return error;
|
|
}
|
|
ac->reserved_blocks = count_fsb;
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
xfs_zoned_space_unreserve(
|
|
struct xfs_inode *ip,
|
|
struct xfs_zone_alloc_ctx *ac)
|
|
{
|
|
if (ac->reserved_blocks > 0) {
|
|
struct xfs_mount *mp = ip->i_mount;
|
|
|
|
xfs_zoned_add_available(mp, ac->reserved_blocks);
|
|
xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, ac->reserved_blocks);
|
|
}
|
|
if (ac->open_zone)
|
|
xfs_open_zone_put(ac->open_zone);
|
|
}
|