mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-04 20:19:47 +08:00
mm/page_alloc: pageblock flags functions clean up
Patch series "Make MIGRATE_ISOLATE a standalone bit", v10. This patchset moves MIGRATE_ISOLATE to a standalone bit to avoid being overwritten during pageblock isolation process. Currently, MIGRATE_ISOLATE is part of enum migratetype (in include/linux/mmzone.h), thus, setting a pageblock to MIGRATE_ISOLATE overwrites its original migratetype. This causes pageblock migratetype loss during alloc_contig_range() and memory offline, especially when the process fails due to a failed pageblock isolation and the code tries to undo the finished pageblock isolations. In terms of performance for changing pageblock types, no performance change is observed: 1. I used perf to collect stats of offlining and onlining all memory of a 40GB VM 10 times and see that get_pfnblock_flags_mask() and set_pfnblock_flags_mask() take about 0.12% and 0.02% of the whole process respectively with and without this patchset across 3 runs. 2. I used perf to collect stats of dd from /dev/random to a 40GB tmpfs file and find get_pfnblock_flags_mask() takes about 0.05% of the process with and without this patchset across 3 runs. This patch (of 6): No functional change is intended. 1. Add __NR_PAGEBLOCK_BITS for the number of pageblock flag bits and use roundup_pow_of_two(__NR_PAGEBLOCK_BITS) as NR_PAGEBLOCK_BITS to take right amount of bits for pageblock flags. 2. Rename PB_migrate_skip to PB_compact_skip. 3. Add {get,set,clear}_pfnblock_bit() to operate one a standalone bit, like PB_compact_skip. 3. Make {get,set}_pfnblock_flags_mask() internal functions and use {get,set}_pfnblock_migratetype() for pageblock migratetype operations. 4. Move pageblock flags common code to get_pfnblock_bitmap_bitidx(). 3. Use MIGRATETYPE_MASK to get the migratetype of a pageblock from its flags. 4. Use PB_migrate_end in the definition of MIGRATETYPE_MASK instead of PB_migrate_bits. 5. Add a comment on is_migrate_cma_folio() to prevent one from changing it to use get_pageblock_migratetype() and causing issues. Link: https://lkml.kernel.org/r/20250617021115.2331563-1-ziy@nvidia.com Link: https://lkml.kernel.org/r/20250617021115.2331563-2-ziy@nvidia.com Signed-off-by: Zi Yan <ziy@nvidia.com> Reviewed-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: David Hildenbrand <david@redhat.com> Cc: Baolin Wang <baolin.wang@linux.alibaba.com> Cc: Brendan Jackman <jackmanb@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Kirill A. Shuemov <kirill.shutemov@linux.intel.com> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Michal Hocko <mhocko@suse.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: Richard Chang <richardycc@google.com> Cc: Suren Baghdasaryan <surenb@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
d2a9721d80
commit
42f46ed99a
@ -584,7 +584,7 @@ Compaction control
|
||||
|
||||
``compact_blockskip_flush``
|
||||
Set to true when compaction migration scanner and free scanner meet, which
|
||||
means the ``PB_migrate_skip`` bits should be cleared.
|
||||
means the ``PB_compact_skip`` bits should be cleared.
|
||||
|
||||
``contiguous``
|
||||
Set to true when the zone is contiguous (in other words, no hole).
|
||||
|
@ -92,8 +92,12 @@ extern const char * const migratetype_names[MIGRATE_TYPES];
|
||||
#ifdef CONFIG_CMA
|
||||
# define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
|
||||
# define is_migrate_cma_page(_page) (get_pageblock_migratetype(_page) == MIGRATE_CMA)
|
||||
# define is_migrate_cma_folio(folio, pfn) (MIGRATE_CMA == \
|
||||
get_pfnblock_flags_mask(&folio->page, pfn, MIGRATETYPE_MASK))
|
||||
/*
|
||||
* __dump_folio() in mm/debug.c passes a folio pointer to on-stack struct folio,
|
||||
* so folio_pfn() cannot be used and pfn is needed.
|
||||
*/
|
||||
# define is_migrate_cma_folio(folio, pfn) \
|
||||
(get_pfnblock_migratetype(&folio->page, pfn) == MIGRATE_CMA)
|
||||
#else
|
||||
# define is_migrate_cma(migratetype) false
|
||||
# define is_migrate_cma_page(_page) false
|
||||
@ -122,14 +126,12 @@ static inline bool migratetype_is_mergeable(int mt)
|
||||
|
||||
extern int page_group_by_mobility_disabled;
|
||||
|
||||
#define MIGRATETYPE_MASK ((1UL << PB_migratetype_bits) - 1)
|
||||
#define get_pageblock_migratetype(page) \
|
||||
get_pfnblock_migratetype(page, page_to_pfn(page))
|
||||
|
||||
#define get_pageblock_migratetype(page) \
|
||||
get_pfnblock_flags_mask(page, page_to_pfn(page), MIGRATETYPE_MASK)
|
||||
#define folio_migratetype(folio) \
|
||||
get_pageblock_migratetype(&folio->page)
|
||||
|
||||
#define folio_migratetype(folio) \
|
||||
get_pfnblock_flags_mask(&folio->page, folio_pfn(folio), \
|
||||
MIGRATETYPE_MASK)
|
||||
struct free_area {
|
||||
struct list_head free_list[MIGRATE_TYPES];
|
||||
unsigned long nr_free;
|
||||
|
@ -25,7 +25,7 @@ static inline bool is_migrate_isolate(int migratetype)
|
||||
#define MEMORY_OFFLINE 0x1
|
||||
#define REPORT_FAILURE 0x2
|
||||
|
||||
void set_pageblock_migratetype(struct page *page, int migratetype);
|
||||
void set_pageblock_migratetype(struct page *page, enum migratetype migratetype);
|
||||
|
||||
bool move_freepages_block_isolate(struct zone *zone, struct page *page,
|
||||
int migratetype);
|
||||
|
@ -19,15 +19,19 @@ enum pageblock_bits {
|
||||
PB_migrate,
|
||||
PB_migrate_end = PB_migrate + PB_migratetype_bits - 1,
|
||||
/* 3 bits required for migrate types */
|
||||
PB_migrate_skip,/* If set the block is skipped by compaction */
|
||||
PB_compact_skip,/* If set the block is skipped by compaction */
|
||||
|
||||
/*
|
||||
* Assume the bits will always align on a word. If this assumption
|
||||
* changes then get/set pageblock needs updating.
|
||||
*/
|
||||
NR_PAGEBLOCK_BITS
|
||||
__NR_PAGEBLOCK_BITS
|
||||
};
|
||||
|
||||
#define NR_PAGEBLOCK_BITS (roundup_pow_of_two(__NR_PAGEBLOCK_BITS))
|
||||
|
||||
#define MIGRATETYPE_MASK ((1UL << (PB_migrate_end + 1)) - 1)
|
||||
|
||||
#if defined(CONFIG_HUGETLB_PAGE)
|
||||
|
||||
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
|
||||
@ -65,27 +69,23 @@ extern unsigned int pageblock_order;
|
||||
/* Forward declaration */
|
||||
struct page;
|
||||
|
||||
unsigned long get_pfnblock_flags_mask(const struct page *page,
|
||||
unsigned long pfn,
|
||||
unsigned long mask);
|
||||
|
||||
void set_pfnblock_flags_mask(struct page *page,
|
||||
unsigned long flags,
|
||||
unsigned long pfn,
|
||||
unsigned long mask);
|
||||
enum migratetype get_pfnblock_migratetype(const struct page *page,
|
||||
unsigned long pfn);
|
||||
bool get_pfnblock_bit(const struct page *page, unsigned long pfn,
|
||||
enum pageblock_bits pb_bit);
|
||||
void set_pfnblock_bit(const struct page *page, unsigned long pfn,
|
||||
enum pageblock_bits pb_bit);
|
||||
void clear_pfnblock_bit(const struct page *page, unsigned long pfn,
|
||||
enum pageblock_bits pb_bit);
|
||||
|
||||
/* Declarations for getting and setting flags. See mm/page_alloc.c */
|
||||
#ifdef CONFIG_COMPACTION
|
||||
#define get_pageblock_skip(page) \
|
||||
get_pfnblock_flags_mask(page, page_to_pfn(page), \
|
||||
(1 << (PB_migrate_skip)))
|
||||
get_pfnblock_bit(page, page_to_pfn(page), PB_compact_skip)
|
||||
#define clear_pageblock_skip(page) \
|
||||
set_pfnblock_flags_mask(page, 0, page_to_pfn(page), \
|
||||
(1 << PB_migrate_skip))
|
||||
clear_pfnblock_bit(page, page_to_pfn(page), PB_compact_skip)
|
||||
#define set_pageblock_skip(page) \
|
||||
set_pfnblock_flags_mask(page, (1 << PB_migrate_skip), \
|
||||
page_to_pfn(page), \
|
||||
(1 << PB_migrate_skip))
|
||||
set_pfnblock_bit(page, page_to_pfn(page), PB_compact_skip)
|
||||
#else
|
||||
static inline bool get_pageblock_skip(struct page *page)
|
||||
{
|
||||
|
@ -774,7 +774,7 @@ void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
|
||||
|
||||
/*
|
||||
* TODO now we have a visible range of pages which are not associated
|
||||
* with their zone properly. Not nice but set_pfnblock_flags_mask
|
||||
* with their zone properly. Not nice but set_pfnblock_migratetype()
|
||||
* expects the zone spans the pfn range. All the pages in the range
|
||||
* are reserved so nobody should be touching them so we should be safe
|
||||
*/
|
||||
|
171
mm/page_alloc.c
171
mm/page_alloc.c
@ -353,81 +353,174 @@ static inline int pfn_to_bitidx(const struct page *page, unsigned long pfn)
|
||||
return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
|
||||
}
|
||||
|
||||
static __always_inline bool is_standalone_pb_bit(enum pageblock_bits pb_bit)
|
||||
{
|
||||
return pb_bit > PB_migrate_end && pb_bit < __NR_PAGEBLOCK_BITS;
|
||||
}
|
||||
|
||||
static __always_inline void
|
||||
get_pfnblock_bitmap_bitidx(const struct page *page, unsigned long pfn,
|
||||
unsigned long **bitmap_word, unsigned long *bitidx)
|
||||
{
|
||||
unsigned long *bitmap;
|
||||
unsigned long word_bitidx;
|
||||
|
||||
BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
|
||||
BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
|
||||
VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);
|
||||
|
||||
bitmap = get_pageblock_bitmap(page, pfn);
|
||||
*bitidx = pfn_to_bitidx(page, pfn);
|
||||
word_bitidx = *bitidx / BITS_PER_LONG;
|
||||
*bitidx &= (BITS_PER_LONG - 1);
|
||||
*bitmap_word = &bitmap[word_bitidx];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* get_pfnblock_flags_mask - Return the requested group of flags for the pageblock_nr_pages block of pages
|
||||
* __get_pfnblock_flags_mask - Return the requested group of flags for
|
||||
* a pageblock_nr_pages block of pages
|
||||
* @page: The page within the block of interest
|
||||
* @pfn: The target page frame number
|
||||
* @mask: mask of bits that the caller is interested in
|
||||
*
|
||||
* Return: pageblock_bits flags
|
||||
*/
|
||||
unsigned long get_pfnblock_flags_mask(const struct page *page,
|
||||
unsigned long pfn, unsigned long mask)
|
||||
static unsigned long __get_pfnblock_flags_mask(const struct page *page,
|
||||
unsigned long pfn,
|
||||
unsigned long mask)
|
||||
{
|
||||
unsigned long *bitmap;
|
||||
unsigned long bitidx, word_bitidx;
|
||||
unsigned long *bitmap_word;
|
||||
unsigned long bitidx;
|
||||
unsigned long word;
|
||||
|
||||
bitmap = get_pageblock_bitmap(page, pfn);
|
||||
bitidx = pfn_to_bitidx(page, pfn);
|
||||
word_bitidx = bitidx / BITS_PER_LONG;
|
||||
bitidx &= (BITS_PER_LONG-1);
|
||||
get_pfnblock_bitmap_bitidx(page, pfn, &bitmap_word, &bitidx);
|
||||
/*
|
||||
* This races, without locks, with set_pfnblock_flags_mask(). Ensure
|
||||
* This races, without locks, with set_pfnblock_migratetype(). Ensure
|
||||
* a consistent read of the memory array, so that results, even though
|
||||
* racy, are not corrupted.
|
||||
*/
|
||||
word = READ_ONCE(bitmap[word_bitidx]);
|
||||
word = READ_ONCE(*bitmap_word);
|
||||
return (word >> bitidx) & mask;
|
||||
}
|
||||
|
||||
static __always_inline int get_pfnblock_migratetype(const struct page *page,
|
||||
unsigned long pfn)
|
||||
/**
|
||||
* get_pfnblock_bit - Check if a standalone bit of a pageblock is set
|
||||
* @page: The page within the block of interest
|
||||
* @pfn: The target page frame number
|
||||
* @pb_bit: pageblock bit to check
|
||||
*
|
||||
* Return: true if the bit is set, otherwise false
|
||||
*/
|
||||
bool get_pfnblock_bit(const struct page *page, unsigned long pfn,
|
||||
enum pageblock_bits pb_bit)
|
||||
{
|
||||
return get_pfnblock_flags_mask(page, pfn, MIGRATETYPE_MASK);
|
||||
unsigned long *bitmap_word;
|
||||
unsigned long bitidx;
|
||||
|
||||
if (WARN_ON_ONCE(!is_standalone_pb_bit(pb_bit)))
|
||||
return false;
|
||||
|
||||
get_pfnblock_bitmap_bitidx(page, pfn, &bitmap_word, &bitidx);
|
||||
|
||||
return test_bit(bitidx + pb_bit, bitmap_word);
|
||||
}
|
||||
|
||||
/**
|
||||
* set_pfnblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pages
|
||||
* get_pfnblock_migratetype - Return the migratetype of a pageblock
|
||||
* @page: The page within the block of interest
|
||||
* @flags: The flags to set
|
||||
* @pfn: The target page frame number
|
||||
*
|
||||
* Return: The migratetype of the pageblock
|
||||
*
|
||||
* Use get_pfnblock_migratetype() if caller already has both @page and @pfn
|
||||
* to save a call to page_to_pfn().
|
||||
*/
|
||||
__always_inline enum migratetype
|
||||
get_pfnblock_migratetype(const struct page *page, unsigned long pfn)
|
||||
{
|
||||
return __get_pfnblock_flags_mask(page, pfn, MIGRATETYPE_MASK);
|
||||
}
|
||||
|
||||
/**
|
||||
* __set_pfnblock_flags_mask - Set the requested group of flags for
|
||||
* a pageblock_nr_pages block of pages
|
||||
* @page: The page within the block of interest
|
||||
* @pfn: The target page frame number
|
||||
* @flags: The flags to set
|
||||
* @mask: mask of bits that the caller is interested in
|
||||
*/
|
||||
void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
|
||||
unsigned long pfn,
|
||||
unsigned long mask)
|
||||
static void __set_pfnblock_flags_mask(struct page *page, unsigned long pfn,
|
||||
unsigned long flags, unsigned long mask)
|
||||
{
|
||||
unsigned long *bitmap;
|
||||
unsigned long bitidx, word_bitidx;
|
||||
unsigned long *bitmap_word;
|
||||
unsigned long bitidx;
|
||||
unsigned long word;
|
||||
|
||||
BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
|
||||
BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
|
||||
|
||||
bitmap = get_pageblock_bitmap(page, pfn);
|
||||
bitidx = pfn_to_bitidx(page, pfn);
|
||||
word_bitidx = bitidx / BITS_PER_LONG;
|
||||
bitidx &= (BITS_PER_LONG-1);
|
||||
|
||||
VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);
|
||||
get_pfnblock_bitmap_bitidx(page, pfn, &bitmap_word, &bitidx);
|
||||
|
||||
mask <<= bitidx;
|
||||
flags <<= bitidx;
|
||||
|
||||
word = READ_ONCE(bitmap[word_bitidx]);
|
||||
word = READ_ONCE(*bitmap_word);
|
||||
do {
|
||||
} while (!try_cmpxchg(&bitmap[word_bitidx], &word, (word & ~mask) | flags));
|
||||
} while (!try_cmpxchg(bitmap_word, &word, (word & ~mask) | flags));
|
||||
}
|
||||
|
||||
void set_pageblock_migratetype(struct page *page, int migratetype)
|
||||
/**
|
||||
* set_pfnblock_bit - Set a standalone bit of a pageblock
|
||||
* @page: The page within the block of interest
|
||||
* @pfn: The target page frame number
|
||||
* @pb_bit: pageblock bit to set
|
||||
*/
|
||||
void set_pfnblock_bit(const struct page *page, unsigned long pfn,
|
||||
enum pageblock_bits pb_bit)
|
||||
{
|
||||
unsigned long *bitmap_word;
|
||||
unsigned long bitidx;
|
||||
|
||||
if (WARN_ON_ONCE(!is_standalone_pb_bit(pb_bit)))
|
||||
return;
|
||||
|
||||
get_pfnblock_bitmap_bitidx(page, pfn, &bitmap_word, &bitidx);
|
||||
|
||||
set_bit(bitidx + pb_bit, bitmap_word);
|
||||
}
|
||||
|
||||
/**
|
||||
* clear_pfnblock_bit - Clear a standalone bit of a pageblock
|
||||
* @page: The page within the block of interest
|
||||
* @pfn: The target page frame number
|
||||
* @pb_bit: pageblock bit to clear
|
||||
*/
|
||||
void clear_pfnblock_bit(const struct page *page, unsigned long pfn,
|
||||
enum pageblock_bits pb_bit)
|
||||
{
|
||||
unsigned long *bitmap_word;
|
||||
unsigned long bitidx;
|
||||
|
||||
if (WARN_ON_ONCE(!is_standalone_pb_bit(pb_bit)))
|
||||
return;
|
||||
|
||||
get_pfnblock_bitmap_bitidx(page, pfn, &bitmap_word, &bitidx);
|
||||
|
||||
clear_bit(bitidx + pb_bit, bitmap_word);
|
||||
}
|
||||
|
||||
/**
|
||||
* set_pageblock_migratetype - Set the migratetype of a pageblock
|
||||
* @page: The page within the block of interest
|
||||
* @migratetype: migratetype to set
|
||||
*/
|
||||
__always_inline void set_pageblock_migratetype(struct page *page,
|
||||
enum migratetype migratetype)
|
||||
{
|
||||
if (unlikely(page_group_by_mobility_disabled &&
|
||||
migratetype < MIGRATE_PCPTYPES))
|
||||
migratetype = MIGRATE_UNMOVABLE;
|
||||
|
||||
set_pfnblock_flags_mask(page, (unsigned long)migratetype,
|
||||
page_to_pfn(page), MIGRATETYPE_MASK);
|
||||
__set_pfnblock_flags_mask(page, page_to_pfn(page),
|
||||
(unsigned long)migratetype, MIGRATETYPE_MASK);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_VM
|
||||
@ -667,7 +760,7 @@ static inline void __add_to_free_list(struct page *page, struct zone *zone,
|
||||
int nr_pages = 1 << order;
|
||||
|
||||
VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype,
|
||||
"page type is %lu, passed migratetype is %d (nr=%d)\n",
|
||||
"page type is %d, passed migratetype is %d (nr=%d)\n",
|
||||
get_pageblock_migratetype(page), migratetype, nr_pages);
|
||||
|
||||
if (tail)
|
||||
@ -693,7 +786,7 @@ static inline void move_to_free_list(struct page *page, struct zone *zone,
|
||||
|
||||
/* Free page moving can fail, so it happens before the type update */
|
||||
VM_WARN_ONCE(get_pageblock_migratetype(page) != old_mt,
|
||||
"page type is %lu, passed migratetype is %d (nr=%d)\n",
|
||||
"page type is %d, passed migratetype is %d (nr=%d)\n",
|
||||
get_pageblock_migratetype(page), old_mt, nr_pages);
|
||||
|
||||
list_move_tail(&page->buddy_list, &area->free_list[new_mt]);
|
||||
@ -715,7 +808,7 @@ static inline void __del_page_from_free_list(struct page *page, struct zone *zon
|
||||
int nr_pages = 1 << order;
|
||||
|
||||
VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype,
|
||||
"page type is %lu, passed migratetype is %d (nr=%d)\n",
|
||||
"page type is %d, passed migratetype is %d (nr=%d)\n",
|
||||
get_pageblock_migratetype(page), migratetype, nr_pages);
|
||||
|
||||
/* clear reported state and update reported page count */
|
||||
@ -3123,7 +3216,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
|
||||
|
||||
/*
|
||||
* Do not instrument rmqueue() with KMSAN. This function may call
|
||||
* __msan_poison_alloca() through a call to set_pfnblock_flags_mask().
|
||||
* __msan_poison_alloca() through a call to set_pfnblock_migratetype().
|
||||
* If __msan_poison_alloca() attempts to allocate pages for the stack depot, it
|
||||
* may call rmqueue() again, which will result in a deadlock.
|
||||
*/
|
||||
|
Loading…
Reference in New Issue
Block a user