2
0
mirror of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-09-04 20:19:47 +08:00

mm/page_alloc: pageblock flags functions clean up

Patch series "Make MIGRATE_ISOLATE a standalone bit", v10.

This patchset moves MIGRATE_ISOLATE to a standalone bit to avoid being
overwritten during pageblock isolation process.  Currently,
MIGRATE_ISOLATE is part of enum migratetype (in include/linux/mmzone.h),
thus, setting a pageblock to MIGRATE_ISOLATE overwrites its original
migratetype.  This causes pageblock migratetype loss during
alloc_contig_range() and memory offline, especially when the process fails
due to a failed pageblock isolation and the code tries to undo the
finished pageblock isolations.

In terms of performance for changing pageblock types, no performance
change is observed:

1. I used perf to collect stats of offlining and onlining all memory
   of a 40GB VM 10 times and see that get_pfnblock_flags_mask() and
   set_pfnblock_flags_mask() take about 0.12% and 0.02% of the whole
   process respectively with and without this patchset across 3 runs.

2. I used perf to collect stats of dd from /dev/random to a 40GB tmpfs
   file and find get_pfnblock_flags_mask() takes about 0.05% of the
   process with and without this patchset across 3 runs.


This patch (of 6):

No functional change is intended.

1. Add __NR_PAGEBLOCK_BITS for the number of pageblock flag bits and use
   roundup_pow_of_two(__NR_PAGEBLOCK_BITS) as NR_PAGEBLOCK_BITS to take
   right amount of bits for pageblock flags.
2. Rename PB_migrate_skip to PB_compact_skip.
3. Add {get,set,clear}_pfnblock_bit() to operate one a standalone bit,
   like PB_compact_skip.
3. Make {get,set}_pfnblock_flags_mask() internal functions and use
   {get,set}_pfnblock_migratetype() for pageblock migratetype operations.
4. Move pageblock flags common code to get_pfnblock_bitmap_bitidx().
3. Use MIGRATETYPE_MASK to get the migratetype of a pageblock from its
   flags.
4. Use PB_migrate_end in the definition of MIGRATETYPE_MASK instead of
   PB_migrate_bits.
5. Add a comment on is_migrate_cma_folio() to prevent one from changing it
   to use get_pageblock_migratetype() and causing issues.

Link: https://lkml.kernel.org/r/20250617021115.2331563-1-ziy@nvidia.com
Link: https://lkml.kernel.org/r/20250617021115.2331563-2-ziy@nvidia.com
Signed-off-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Brendan Jackman <jackmanb@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kirill A. Shuemov <kirill.shutemov@linux.intel.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Richard Chang <richardycc@google.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Zi Yan 2025-06-16 22:11:09 -04:00 committed by Andrew Morton
parent d2a9721d80
commit 42f46ed99a
6 changed files with 162 additions and 67 deletions

View File

@ -584,7 +584,7 @@ Compaction control
``compact_blockskip_flush`` ``compact_blockskip_flush``
Set to true when compaction migration scanner and free scanner meet, which Set to true when compaction migration scanner and free scanner meet, which
means the ``PB_migrate_skip`` bits should be cleared. means the ``PB_compact_skip`` bits should be cleared.
``contiguous`` ``contiguous``
Set to true when the zone is contiguous (in other words, no hole). Set to true when the zone is contiguous (in other words, no hole).

View File

@ -92,8 +92,12 @@ extern const char * const migratetype_names[MIGRATE_TYPES];
#ifdef CONFIG_CMA #ifdef CONFIG_CMA
# define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
# define is_migrate_cma_page(_page) (get_pageblock_migratetype(_page) == MIGRATE_CMA) # define is_migrate_cma_page(_page) (get_pageblock_migratetype(_page) == MIGRATE_CMA)
# define is_migrate_cma_folio(folio, pfn) (MIGRATE_CMA == \ /*
get_pfnblock_flags_mask(&folio->page, pfn, MIGRATETYPE_MASK)) * __dump_folio() in mm/debug.c passes a folio pointer to on-stack struct folio,
* so folio_pfn() cannot be used and pfn is needed.
*/
# define is_migrate_cma_folio(folio, pfn) \
(get_pfnblock_migratetype(&folio->page, pfn) == MIGRATE_CMA)
#else #else
# define is_migrate_cma(migratetype) false # define is_migrate_cma(migratetype) false
# define is_migrate_cma_page(_page) false # define is_migrate_cma_page(_page) false
@ -122,14 +126,12 @@ static inline bool migratetype_is_mergeable(int mt)
extern int page_group_by_mobility_disabled; extern int page_group_by_mobility_disabled;
#define MIGRATETYPE_MASK ((1UL << PB_migratetype_bits) - 1)
#define get_pageblock_migratetype(page) \ #define get_pageblock_migratetype(page) \
get_pfnblock_flags_mask(page, page_to_pfn(page), MIGRATETYPE_MASK) get_pfnblock_migratetype(page, page_to_pfn(page))
#define folio_migratetype(folio) \ #define folio_migratetype(folio) \
get_pfnblock_flags_mask(&folio->page, folio_pfn(folio), \ get_pageblock_migratetype(&folio->page)
MIGRATETYPE_MASK)
struct free_area { struct free_area {
struct list_head free_list[MIGRATE_TYPES]; struct list_head free_list[MIGRATE_TYPES];
unsigned long nr_free; unsigned long nr_free;

View File

@ -25,7 +25,7 @@ static inline bool is_migrate_isolate(int migratetype)
#define MEMORY_OFFLINE 0x1 #define MEMORY_OFFLINE 0x1
#define REPORT_FAILURE 0x2 #define REPORT_FAILURE 0x2
void set_pageblock_migratetype(struct page *page, int migratetype); void set_pageblock_migratetype(struct page *page, enum migratetype migratetype);
bool move_freepages_block_isolate(struct zone *zone, struct page *page, bool move_freepages_block_isolate(struct zone *zone, struct page *page,
int migratetype); int migratetype);

View File

@ -19,15 +19,19 @@ enum pageblock_bits {
PB_migrate, PB_migrate,
PB_migrate_end = PB_migrate + PB_migratetype_bits - 1, PB_migrate_end = PB_migrate + PB_migratetype_bits - 1,
/* 3 bits required for migrate types */ /* 3 bits required for migrate types */
PB_migrate_skip,/* If set the block is skipped by compaction */ PB_compact_skip,/* If set the block is skipped by compaction */
/* /*
* Assume the bits will always align on a word. If this assumption * Assume the bits will always align on a word. If this assumption
* changes then get/set pageblock needs updating. * changes then get/set pageblock needs updating.
*/ */
NR_PAGEBLOCK_BITS __NR_PAGEBLOCK_BITS
}; };
#define NR_PAGEBLOCK_BITS (roundup_pow_of_two(__NR_PAGEBLOCK_BITS))
#define MIGRATETYPE_MASK ((1UL << (PB_migrate_end + 1)) - 1)
#if defined(CONFIG_HUGETLB_PAGE) #if defined(CONFIG_HUGETLB_PAGE)
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
@ -65,27 +69,23 @@ extern unsigned int pageblock_order;
/* Forward declaration */ /* Forward declaration */
struct page; struct page;
unsigned long get_pfnblock_flags_mask(const struct page *page, enum migratetype get_pfnblock_migratetype(const struct page *page,
unsigned long pfn, unsigned long pfn);
unsigned long mask); bool get_pfnblock_bit(const struct page *page, unsigned long pfn,
enum pageblock_bits pb_bit);
void set_pfnblock_flags_mask(struct page *page, void set_pfnblock_bit(const struct page *page, unsigned long pfn,
unsigned long flags, enum pageblock_bits pb_bit);
unsigned long pfn, void clear_pfnblock_bit(const struct page *page, unsigned long pfn,
unsigned long mask); enum pageblock_bits pb_bit);
/* Declarations for getting and setting flags. See mm/page_alloc.c */ /* Declarations for getting and setting flags. See mm/page_alloc.c */
#ifdef CONFIG_COMPACTION #ifdef CONFIG_COMPACTION
#define get_pageblock_skip(page) \ #define get_pageblock_skip(page) \
get_pfnblock_flags_mask(page, page_to_pfn(page), \ get_pfnblock_bit(page, page_to_pfn(page), PB_compact_skip)
(1 << (PB_migrate_skip)))
#define clear_pageblock_skip(page) \ #define clear_pageblock_skip(page) \
set_pfnblock_flags_mask(page, 0, page_to_pfn(page), \ clear_pfnblock_bit(page, page_to_pfn(page), PB_compact_skip)
(1 << PB_migrate_skip))
#define set_pageblock_skip(page) \ #define set_pageblock_skip(page) \
set_pfnblock_flags_mask(page, (1 << PB_migrate_skip), \ set_pfnblock_bit(page, page_to_pfn(page), PB_compact_skip)
page_to_pfn(page), \
(1 << PB_migrate_skip))
#else #else
static inline bool get_pageblock_skip(struct page *page) static inline bool get_pageblock_skip(struct page *page)
{ {

View File

@ -774,7 +774,7 @@ void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
/* /*
* TODO now we have a visible range of pages which are not associated * TODO now we have a visible range of pages which are not associated
* with their zone properly. Not nice but set_pfnblock_flags_mask * with their zone properly. Not nice but set_pfnblock_migratetype()
* expects the zone spans the pfn range. All the pages in the range * expects the zone spans the pfn range. All the pages in the range
* are reserved so nobody should be touching them so we should be safe * are reserved so nobody should be touching them so we should be safe
*/ */

View File

@ -353,81 +353,174 @@ static inline int pfn_to_bitidx(const struct page *page, unsigned long pfn)
return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
} }
static __always_inline bool is_standalone_pb_bit(enum pageblock_bits pb_bit)
{
return pb_bit > PB_migrate_end && pb_bit < __NR_PAGEBLOCK_BITS;
}
static __always_inline void
get_pfnblock_bitmap_bitidx(const struct page *page, unsigned long pfn,
unsigned long **bitmap_word, unsigned long *bitidx)
{
unsigned long *bitmap;
unsigned long word_bitidx;
BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);
bitmap = get_pageblock_bitmap(page, pfn);
*bitidx = pfn_to_bitidx(page, pfn);
word_bitidx = *bitidx / BITS_PER_LONG;
*bitidx &= (BITS_PER_LONG - 1);
*bitmap_word = &bitmap[word_bitidx];
}
/** /**
* get_pfnblock_flags_mask - Return the requested group of flags for the pageblock_nr_pages block of pages * __get_pfnblock_flags_mask - Return the requested group of flags for
* a pageblock_nr_pages block of pages
* @page: The page within the block of interest * @page: The page within the block of interest
* @pfn: The target page frame number * @pfn: The target page frame number
* @mask: mask of bits that the caller is interested in * @mask: mask of bits that the caller is interested in
* *
* Return: pageblock_bits flags * Return: pageblock_bits flags
*/ */
unsigned long get_pfnblock_flags_mask(const struct page *page, static unsigned long __get_pfnblock_flags_mask(const struct page *page,
unsigned long pfn, unsigned long mask)
{
unsigned long *bitmap;
unsigned long bitidx, word_bitidx;
unsigned long word;
bitmap = get_pageblock_bitmap(page, pfn);
bitidx = pfn_to_bitidx(page, pfn);
word_bitidx = bitidx / BITS_PER_LONG;
bitidx &= (BITS_PER_LONG-1);
/*
* This races, without locks, with set_pfnblock_flags_mask(). Ensure
* a consistent read of the memory array, so that results, even though
* racy, are not corrupted.
*/
word = READ_ONCE(bitmap[word_bitidx]);
return (word >> bitidx) & mask;
}
static __always_inline int get_pfnblock_migratetype(const struct page *page,
unsigned long pfn)
{
return get_pfnblock_flags_mask(page, pfn, MIGRATETYPE_MASK);
}
/**
* set_pfnblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pages
* @page: The page within the block of interest
* @flags: The flags to set
* @pfn: The target page frame number
* @mask: mask of bits that the caller is interested in
*/
void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
unsigned long pfn, unsigned long pfn,
unsigned long mask) unsigned long mask)
{ {
unsigned long *bitmap; unsigned long *bitmap_word;
unsigned long bitidx, word_bitidx; unsigned long bitidx;
unsigned long word; unsigned long word;
BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4); get_pfnblock_bitmap_bitidx(page, pfn, &bitmap_word, &bitidx);
BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits)); /*
* This races, without locks, with set_pfnblock_migratetype(). Ensure
* a consistent read of the memory array, so that results, even though
* racy, are not corrupted.
*/
word = READ_ONCE(*bitmap_word);
return (word >> bitidx) & mask;
}
bitmap = get_pageblock_bitmap(page, pfn); /**
bitidx = pfn_to_bitidx(page, pfn); * get_pfnblock_bit - Check if a standalone bit of a pageblock is set
word_bitidx = bitidx / BITS_PER_LONG; * @page: The page within the block of interest
bitidx &= (BITS_PER_LONG-1); * @pfn: The target page frame number
* @pb_bit: pageblock bit to check
*
* Return: true if the bit is set, otherwise false
*/
bool get_pfnblock_bit(const struct page *page, unsigned long pfn,
enum pageblock_bits pb_bit)
{
unsigned long *bitmap_word;
unsigned long bitidx;
VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page); if (WARN_ON_ONCE(!is_standalone_pb_bit(pb_bit)))
return false;
get_pfnblock_bitmap_bitidx(page, pfn, &bitmap_word, &bitidx);
return test_bit(bitidx + pb_bit, bitmap_word);
}
/**
* get_pfnblock_migratetype - Return the migratetype of a pageblock
* @page: The page within the block of interest
* @pfn: The target page frame number
*
* Return: The migratetype of the pageblock
*
* Use get_pfnblock_migratetype() if caller already has both @page and @pfn
* to save a call to page_to_pfn().
*/
__always_inline enum migratetype
get_pfnblock_migratetype(const struct page *page, unsigned long pfn)
{
return __get_pfnblock_flags_mask(page, pfn, MIGRATETYPE_MASK);
}
/**
* __set_pfnblock_flags_mask - Set the requested group of flags for
* a pageblock_nr_pages block of pages
* @page: The page within the block of interest
* @pfn: The target page frame number
* @flags: The flags to set
* @mask: mask of bits that the caller is interested in
*/
static void __set_pfnblock_flags_mask(struct page *page, unsigned long pfn,
unsigned long flags, unsigned long mask)
{
unsigned long *bitmap_word;
unsigned long bitidx;
unsigned long word;
get_pfnblock_bitmap_bitidx(page, pfn, &bitmap_word, &bitidx);
mask <<= bitidx; mask <<= bitidx;
flags <<= bitidx; flags <<= bitidx;
word = READ_ONCE(bitmap[word_bitidx]); word = READ_ONCE(*bitmap_word);
do { do {
} while (!try_cmpxchg(&bitmap[word_bitidx], &word, (word & ~mask) | flags)); } while (!try_cmpxchg(bitmap_word, &word, (word & ~mask) | flags));
} }
void set_pageblock_migratetype(struct page *page, int migratetype) /**
* set_pfnblock_bit - Set a standalone bit of a pageblock
* @page: The page within the block of interest
* @pfn: The target page frame number
* @pb_bit: pageblock bit to set
*/
void set_pfnblock_bit(const struct page *page, unsigned long pfn,
enum pageblock_bits pb_bit)
{
unsigned long *bitmap_word;
unsigned long bitidx;
if (WARN_ON_ONCE(!is_standalone_pb_bit(pb_bit)))
return;
get_pfnblock_bitmap_bitidx(page, pfn, &bitmap_word, &bitidx);
set_bit(bitidx + pb_bit, bitmap_word);
}
/**
* clear_pfnblock_bit - Clear a standalone bit of a pageblock
* @page: The page within the block of interest
* @pfn: The target page frame number
* @pb_bit: pageblock bit to clear
*/
void clear_pfnblock_bit(const struct page *page, unsigned long pfn,
enum pageblock_bits pb_bit)
{
unsigned long *bitmap_word;
unsigned long bitidx;
if (WARN_ON_ONCE(!is_standalone_pb_bit(pb_bit)))
return;
get_pfnblock_bitmap_bitidx(page, pfn, &bitmap_word, &bitidx);
clear_bit(bitidx + pb_bit, bitmap_word);
}
/**
* set_pageblock_migratetype - Set the migratetype of a pageblock
* @page: The page within the block of interest
* @migratetype: migratetype to set
*/
__always_inline void set_pageblock_migratetype(struct page *page,
enum migratetype migratetype)
{ {
if (unlikely(page_group_by_mobility_disabled && if (unlikely(page_group_by_mobility_disabled &&
migratetype < MIGRATE_PCPTYPES)) migratetype < MIGRATE_PCPTYPES))
migratetype = MIGRATE_UNMOVABLE; migratetype = MIGRATE_UNMOVABLE;
set_pfnblock_flags_mask(page, (unsigned long)migratetype, __set_pfnblock_flags_mask(page, page_to_pfn(page),
page_to_pfn(page), MIGRATETYPE_MASK); (unsigned long)migratetype, MIGRATETYPE_MASK);
} }
#ifdef CONFIG_DEBUG_VM #ifdef CONFIG_DEBUG_VM
@ -667,7 +760,7 @@ static inline void __add_to_free_list(struct page *page, struct zone *zone,
int nr_pages = 1 << order; int nr_pages = 1 << order;
VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype, VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype,
"page type is %lu, passed migratetype is %d (nr=%d)\n", "page type is %d, passed migratetype is %d (nr=%d)\n",
get_pageblock_migratetype(page), migratetype, nr_pages); get_pageblock_migratetype(page), migratetype, nr_pages);
if (tail) if (tail)
@ -693,7 +786,7 @@ static inline void move_to_free_list(struct page *page, struct zone *zone,
/* Free page moving can fail, so it happens before the type update */ /* Free page moving can fail, so it happens before the type update */
VM_WARN_ONCE(get_pageblock_migratetype(page) != old_mt, VM_WARN_ONCE(get_pageblock_migratetype(page) != old_mt,
"page type is %lu, passed migratetype is %d (nr=%d)\n", "page type is %d, passed migratetype is %d (nr=%d)\n",
get_pageblock_migratetype(page), old_mt, nr_pages); get_pageblock_migratetype(page), old_mt, nr_pages);
list_move_tail(&page->buddy_list, &area->free_list[new_mt]); list_move_tail(&page->buddy_list, &area->free_list[new_mt]);
@ -715,7 +808,7 @@ static inline void __del_page_from_free_list(struct page *page, struct zone *zon
int nr_pages = 1 << order; int nr_pages = 1 << order;
VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype, VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype,
"page type is %lu, passed migratetype is %d (nr=%d)\n", "page type is %d, passed migratetype is %d (nr=%d)\n",
get_pageblock_migratetype(page), migratetype, nr_pages); get_pageblock_migratetype(page), migratetype, nr_pages);
/* clear reported state and update reported page count */ /* clear reported state and update reported page count */
@ -3123,7 +3216,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
/* /*
* Do not instrument rmqueue() with KMSAN. This function may call * Do not instrument rmqueue() with KMSAN. This function may call
* __msan_poison_alloca() through a call to set_pfnblock_flags_mask(). * __msan_poison_alloca() through a call to set_pfnblock_migratetype().
* If __msan_poison_alloca() attempts to allocate pages for the stack depot, it * If __msan_poison_alloca() attempts to allocate pages for the stack depot, it
* may call rmqueue() again, which will result in a deadlock. * may call rmqueue() again, which will result in a deadlock.
*/ */