2
0
mirror of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-09-04 20:19:47 +08:00

mm/page_alloc: pageblock flags functions clean up

Patch series "Make MIGRATE_ISOLATE a standalone bit", v10.

This patchset moves MIGRATE_ISOLATE to a standalone bit to avoid being
overwritten during pageblock isolation process.  Currently,
MIGRATE_ISOLATE is part of enum migratetype (in include/linux/mmzone.h),
thus, setting a pageblock to MIGRATE_ISOLATE overwrites its original
migratetype.  This causes pageblock migratetype loss during
alloc_contig_range() and memory offline, especially when the process fails
due to a failed pageblock isolation and the code tries to undo the
finished pageblock isolations.

In terms of performance for changing pageblock types, no performance
change is observed:

1. I used perf to collect stats of offlining and onlining all memory
   of a 40GB VM 10 times and see that get_pfnblock_flags_mask() and
   set_pfnblock_flags_mask() take about 0.12% and 0.02% of the whole
   process respectively with and without this patchset across 3 runs.

2. I used perf to collect stats of dd from /dev/random to a 40GB tmpfs
   file and find get_pfnblock_flags_mask() takes about 0.05% of the
   process with and without this patchset across 3 runs.


This patch (of 6):

No functional change is intended.

1. Add __NR_PAGEBLOCK_BITS for the number of pageblock flag bits and use
   roundup_pow_of_two(__NR_PAGEBLOCK_BITS) as NR_PAGEBLOCK_BITS to take
   right amount of bits for pageblock flags.
2. Rename PB_migrate_skip to PB_compact_skip.
3. Add {get,set,clear}_pfnblock_bit() to operate one a standalone bit,
   like PB_compact_skip.
3. Make {get,set}_pfnblock_flags_mask() internal functions and use
   {get,set}_pfnblock_migratetype() for pageblock migratetype operations.
4. Move pageblock flags common code to get_pfnblock_bitmap_bitidx().
3. Use MIGRATETYPE_MASK to get the migratetype of a pageblock from its
   flags.
4. Use PB_migrate_end in the definition of MIGRATETYPE_MASK instead of
   PB_migrate_bits.
5. Add a comment on is_migrate_cma_folio() to prevent one from changing it
   to use get_pageblock_migratetype() and causing issues.

Link: https://lkml.kernel.org/r/20250617021115.2331563-1-ziy@nvidia.com
Link: https://lkml.kernel.org/r/20250617021115.2331563-2-ziy@nvidia.com
Signed-off-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Brendan Jackman <jackmanb@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kirill A. Shuemov <kirill.shutemov@linux.intel.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Richard Chang <richardycc@google.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Zi Yan 2025-06-16 22:11:09 -04:00 committed by Andrew Morton
parent d2a9721d80
commit 42f46ed99a
6 changed files with 162 additions and 67 deletions

View File

@ -584,7 +584,7 @@ Compaction control
``compact_blockskip_flush``
Set to true when compaction migration scanner and free scanner meet, which
means the ``PB_migrate_skip`` bits should be cleared.
means the ``PB_compact_skip`` bits should be cleared.
``contiguous``
Set to true when the zone is contiguous (in other words, no hole).

View File

@ -92,8 +92,12 @@ extern const char * const migratetype_names[MIGRATE_TYPES];
#ifdef CONFIG_CMA
# define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
# define is_migrate_cma_page(_page) (get_pageblock_migratetype(_page) == MIGRATE_CMA)
# define is_migrate_cma_folio(folio, pfn) (MIGRATE_CMA == \
get_pfnblock_flags_mask(&folio->page, pfn, MIGRATETYPE_MASK))
/*
* __dump_folio() in mm/debug.c passes a folio pointer to on-stack struct folio,
* so folio_pfn() cannot be used and pfn is needed.
*/
# define is_migrate_cma_folio(folio, pfn) \
(get_pfnblock_migratetype(&folio->page, pfn) == MIGRATE_CMA)
#else
# define is_migrate_cma(migratetype) false
# define is_migrate_cma_page(_page) false
@ -122,14 +126,12 @@ static inline bool migratetype_is_mergeable(int mt)
extern int page_group_by_mobility_disabled;
#define MIGRATETYPE_MASK ((1UL << PB_migratetype_bits) - 1)
#define get_pageblock_migratetype(page) \
get_pfnblock_flags_mask(page, page_to_pfn(page), MIGRATETYPE_MASK)
get_pfnblock_migratetype(page, page_to_pfn(page))
#define folio_migratetype(folio) \
get_pfnblock_flags_mask(&folio->page, folio_pfn(folio), \
MIGRATETYPE_MASK)
get_pageblock_migratetype(&folio->page)
struct free_area {
struct list_head free_list[MIGRATE_TYPES];
unsigned long nr_free;

View File

@ -25,7 +25,7 @@ static inline bool is_migrate_isolate(int migratetype)
#define MEMORY_OFFLINE 0x1
#define REPORT_FAILURE 0x2
void set_pageblock_migratetype(struct page *page, int migratetype);
void set_pageblock_migratetype(struct page *page, enum migratetype migratetype);
bool move_freepages_block_isolate(struct zone *zone, struct page *page,
int migratetype);

View File

@ -19,15 +19,19 @@ enum pageblock_bits {
PB_migrate,
PB_migrate_end = PB_migrate + PB_migratetype_bits - 1,
/* 3 bits required for migrate types */
PB_migrate_skip,/* If set the block is skipped by compaction */
PB_compact_skip,/* If set the block is skipped by compaction */
/*
* Assume the bits will always align on a word. If this assumption
* changes then get/set pageblock needs updating.
*/
NR_PAGEBLOCK_BITS
__NR_PAGEBLOCK_BITS
};
#define NR_PAGEBLOCK_BITS (roundup_pow_of_two(__NR_PAGEBLOCK_BITS))
#define MIGRATETYPE_MASK ((1UL << (PB_migrate_end + 1)) - 1)
#if defined(CONFIG_HUGETLB_PAGE)
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
@ -65,27 +69,23 @@ extern unsigned int pageblock_order;
/* Forward declaration */
struct page;
unsigned long get_pfnblock_flags_mask(const struct page *page,
unsigned long pfn,
unsigned long mask);
void set_pfnblock_flags_mask(struct page *page,
unsigned long flags,
unsigned long pfn,
unsigned long mask);
enum migratetype get_pfnblock_migratetype(const struct page *page,
unsigned long pfn);
bool get_pfnblock_bit(const struct page *page, unsigned long pfn,
enum pageblock_bits pb_bit);
void set_pfnblock_bit(const struct page *page, unsigned long pfn,
enum pageblock_bits pb_bit);
void clear_pfnblock_bit(const struct page *page, unsigned long pfn,
enum pageblock_bits pb_bit);
/* Declarations for getting and setting flags. See mm/page_alloc.c */
#ifdef CONFIG_COMPACTION
#define get_pageblock_skip(page) \
get_pfnblock_flags_mask(page, page_to_pfn(page), \
(1 << (PB_migrate_skip)))
get_pfnblock_bit(page, page_to_pfn(page), PB_compact_skip)
#define clear_pageblock_skip(page) \
set_pfnblock_flags_mask(page, 0, page_to_pfn(page), \
(1 << PB_migrate_skip))
clear_pfnblock_bit(page, page_to_pfn(page), PB_compact_skip)
#define set_pageblock_skip(page) \
set_pfnblock_flags_mask(page, (1 << PB_migrate_skip), \
page_to_pfn(page), \
(1 << PB_migrate_skip))
set_pfnblock_bit(page, page_to_pfn(page), PB_compact_skip)
#else
static inline bool get_pageblock_skip(struct page *page)
{

View File

@ -774,7 +774,7 @@ void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
/*
* TODO now we have a visible range of pages which are not associated
* with their zone properly. Not nice but set_pfnblock_flags_mask
* with their zone properly. Not nice but set_pfnblock_migratetype()
* expects the zone spans the pfn range. All the pages in the range
* are reserved so nobody should be touching them so we should be safe
*/

View File

@ -353,81 +353,174 @@ static inline int pfn_to_bitidx(const struct page *page, unsigned long pfn)
return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
}
static __always_inline bool is_standalone_pb_bit(enum pageblock_bits pb_bit)
{
return pb_bit > PB_migrate_end && pb_bit < __NR_PAGEBLOCK_BITS;
}
static __always_inline void
get_pfnblock_bitmap_bitidx(const struct page *page, unsigned long pfn,
unsigned long **bitmap_word, unsigned long *bitidx)
{
unsigned long *bitmap;
unsigned long word_bitidx;
BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);
bitmap = get_pageblock_bitmap(page, pfn);
*bitidx = pfn_to_bitidx(page, pfn);
word_bitidx = *bitidx / BITS_PER_LONG;
*bitidx &= (BITS_PER_LONG - 1);
*bitmap_word = &bitmap[word_bitidx];
}
/**
* get_pfnblock_flags_mask - Return the requested group of flags for the pageblock_nr_pages block of pages
* __get_pfnblock_flags_mask - Return the requested group of flags for
* a pageblock_nr_pages block of pages
* @page: The page within the block of interest
* @pfn: The target page frame number
* @mask: mask of bits that the caller is interested in
*
* Return: pageblock_bits flags
*/
unsigned long get_pfnblock_flags_mask(const struct page *page,
unsigned long pfn, unsigned long mask)
{
unsigned long *bitmap;
unsigned long bitidx, word_bitidx;
unsigned long word;
bitmap = get_pageblock_bitmap(page, pfn);
bitidx = pfn_to_bitidx(page, pfn);
word_bitidx = bitidx / BITS_PER_LONG;
bitidx &= (BITS_PER_LONG-1);
/*
* This races, without locks, with set_pfnblock_flags_mask(). Ensure
* a consistent read of the memory array, so that results, even though
* racy, are not corrupted.
*/
word = READ_ONCE(bitmap[word_bitidx]);
return (word >> bitidx) & mask;
}
static __always_inline int get_pfnblock_migratetype(const struct page *page,
unsigned long pfn)
{
return get_pfnblock_flags_mask(page, pfn, MIGRATETYPE_MASK);
}
/**
* set_pfnblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pages
* @page: The page within the block of interest
* @flags: The flags to set
* @pfn: The target page frame number
* @mask: mask of bits that the caller is interested in
*/
void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
static unsigned long __get_pfnblock_flags_mask(const struct page *page,
unsigned long pfn,
unsigned long mask)
{
unsigned long *bitmap;
unsigned long bitidx, word_bitidx;
unsigned long *bitmap_word;
unsigned long bitidx;
unsigned long word;
BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
get_pfnblock_bitmap_bitidx(page, pfn, &bitmap_word, &bitidx);
/*
* This races, without locks, with set_pfnblock_migratetype(). Ensure
* a consistent read of the memory array, so that results, even though
* racy, are not corrupted.
*/
word = READ_ONCE(*bitmap_word);
return (word >> bitidx) & mask;
}
bitmap = get_pageblock_bitmap(page, pfn);
bitidx = pfn_to_bitidx(page, pfn);
word_bitidx = bitidx / BITS_PER_LONG;
bitidx &= (BITS_PER_LONG-1);
/**
* get_pfnblock_bit - Check if a standalone bit of a pageblock is set
* @page: The page within the block of interest
* @pfn: The target page frame number
* @pb_bit: pageblock bit to check
*
* Return: true if the bit is set, otherwise false
*/
bool get_pfnblock_bit(const struct page *page, unsigned long pfn,
enum pageblock_bits pb_bit)
{
unsigned long *bitmap_word;
unsigned long bitidx;
VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);
if (WARN_ON_ONCE(!is_standalone_pb_bit(pb_bit)))
return false;
get_pfnblock_bitmap_bitidx(page, pfn, &bitmap_word, &bitidx);
return test_bit(bitidx + pb_bit, bitmap_word);
}
/**
* get_pfnblock_migratetype - Return the migratetype of a pageblock
* @page: The page within the block of interest
* @pfn: The target page frame number
*
* Return: The migratetype of the pageblock
*
* Use get_pfnblock_migratetype() if caller already has both @page and @pfn
* to save a call to page_to_pfn().
*/
__always_inline enum migratetype
get_pfnblock_migratetype(const struct page *page, unsigned long pfn)
{
return __get_pfnblock_flags_mask(page, pfn, MIGRATETYPE_MASK);
}
/**
* __set_pfnblock_flags_mask - Set the requested group of flags for
* a pageblock_nr_pages block of pages
* @page: The page within the block of interest
* @pfn: The target page frame number
* @flags: The flags to set
* @mask: mask of bits that the caller is interested in
*/
static void __set_pfnblock_flags_mask(struct page *page, unsigned long pfn,
unsigned long flags, unsigned long mask)
{
unsigned long *bitmap_word;
unsigned long bitidx;
unsigned long word;
get_pfnblock_bitmap_bitidx(page, pfn, &bitmap_word, &bitidx);
mask <<= bitidx;
flags <<= bitidx;
word = READ_ONCE(bitmap[word_bitidx]);
word = READ_ONCE(*bitmap_word);
do {
} while (!try_cmpxchg(&bitmap[word_bitidx], &word, (word & ~mask) | flags));
} while (!try_cmpxchg(bitmap_word, &word, (word & ~mask) | flags));
}
void set_pageblock_migratetype(struct page *page, int migratetype)
/**
* set_pfnblock_bit - Set a standalone bit of a pageblock
* @page: The page within the block of interest
* @pfn: The target page frame number
* @pb_bit: pageblock bit to set
*/
void set_pfnblock_bit(const struct page *page, unsigned long pfn,
enum pageblock_bits pb_bit)
{
unsigned long *bitmap_word;
unsigned long bitidx;
if (WARN_ON_ONCE(!is_standalone_pb_bit(pb_bit)))
return;
get_pfnblock_bitmap_bitidx(page, pfn, &bitmap_word, &bitidx);
set_bit(bitidx + pb_bit, bitmap_word);
}
/**
* clear_pfnblock_bit - Clear a standalone bit of a pageblock
* @page: The page within the block of interest
* @pfn: The target page frame number
* @pb_bit: pageblock bit to clear
*/
void clear_pfnblock_bit(const struct page *page, unsigned long pfn,
enum pageblock_bits pb_bit)
{
unsigned long *bitmap_word;
unsigned long bitidx;
if (WARN_ON_ONCE(!is_standalone_pb_bit(pb_bit)))
return;
get_pfnblock_bitmap_bitidx(page, pfn, &bitmap_word, &bitidx);
clear_bit(bitidx + pb_bit, bitmap_word);
}
/**
* set_pageblock_migratetype - Set the migratetype of a pageblock
* @page: The page within the block of interest
* @migratetype: migratetype to set
*/
__always_inline void set_pageblock_migratetype(struct page *page,
enum migratetype migratetype)
{
if (unlikely(page_group_by_mobility_disabled &&
migratetype < MIGRATE_PCPTYPES))
migratetype = MIGRATE_UNMOVABLE;
set_pfnblock_flags_mask(page, (unsigned long)migratetype,
page_to_pfn(page), MIGRATETYPE_MASK);
__set_pfnblock_flags_mask(page, page_to_pfn(page),
(unsigned long)migratetype, MIGRATETYPE_MASK);
}
#ifdef CONFIG_DEBUG_VM
@ -667,7 +760,7 @@ static inline void __add_to_free_list(struct page *page, struct zone *zone,
int nr_pages = 1 << order;
VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype,
"page type is %lu, passed migratetype is %d (nr=%d)\n",
"page type is %d, passed migratetype is %d (nr=%d)\n",
get_pageblock_migratetype(page), migratetype, nr_pages);
if (tail)
@ -693,7 +786,7 @@ static inline void move_to_free_list(struct page *page, struct zone *zone,
/* Free page moving can fail, so it happens before the type update */
VM_WARN_ONCE(get_pageblock_migratetype(page) != old_mt,
"page type is %lu, passed migratetype is %d (nr=%d)\n",
"page type is %d, passed migratetype is %d (nr=%d)\n",
get_pageblock_migratetype(page), old_mt, nr_pages);
list_move_tail(&page->buddy_list, &area->free_list[new_mt]);
@ -715,7 +808,7 @@ static inline void __del_page_from_free_list(struct page *page, struct zone *zon
int nr_pages = 1 << order;
VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype,
"page type is %lu, passed migratetype is %d (nr=%d)\n",
"page type is %d, passed migratetype is %d (nr=%d)\n",
get_pageblock_migratetype(page), migratetype, nr_pages);
/* clear reported state and update reported page count */
@ -3123,7 +3216,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
/*
* Do not instrument rmqueue() with KMSAN. This function may call
* __msan_poison_alloca() through a call to set_pfnblock_flags_mask().
* __msan_poison_alloca() through a call to set_pfnblock_migratetype().
* If __msan_poison_alloca() attempts to allocate pages for the stack depot, it
* may call rmqueue() again, which will result in a deadlock.
*/