mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-04 20:19:47 +08:00 
			
		
		
		
	 682a3385e7
			
		
	
	
		682a3385e7
		
	
	
	
	
		
			
			The page allocator iterates through a zonelist for zones that match the
addressing limitations and nodemask of the caller but many allocations
will not be restricted.  Despite this, there is always functional call
overhead which builds up.
This patch inlines the optimistic basic case and only calls the iterator
function for the complex case.  A hindrance was the fact that
cpuset_current_mems_allowed is used in the fastpath as the allowed
nodemask even though all nodes are allowed on most systems.  The patch
handles this by only considering cpuset_current_mems_allowed if a cpuset
exists.  As well as being faster in the fast-path, this removes some
junk in the slowpath.
The performance difference on a page allocator microbenchmark is;
                                             4.6.0-rc2                  4.6.0-rc2
                                      statinline-v1r20              optiter-v1r20
  Min      alloc-odr0-1               412.00 (  0.00%)           382.00 (  7.28%)
  Min      alloc-odr0-2               301.00 (  0.00%)           282.00 (  6.31%)
  Min      alloc-odr0-4               247.00 (  0.00%)           233.00 (  5.67%)
  Min      alloc-odr0-8               215.00 (  0.00%)           203.00 (  5.58%)
  Min      alloc-odr0-16              199.00 (  0.00%)           188.00 (  5.53%)
  Min      alloc-odr0-32              191.00 (  0.00%)           182.00 (  4.71%)
  Min      alloc-odr0-64              187.00 (  0.00%)           177.00 (  5.35%)
  Min      alloc-odr0-128             185.00 (  0.00%)           175.00 (  5.41%)
  Min      alloc-odr0-256             193.00 (  0.00%)           184.00 (  4.66%)
  Min      alloc-odr0-512             207.00 (  0.00%)           197.00 (  4.83%)
  Min      alloc-odr0-1024            213.00 (  0.00%)           203.00 (  4.69%)
  Min      alloc-odr0-2048            220.00 (  0.00%)           209.00 (  5.00%)
  Min      alloc-odr0-4096            226.00 (  0.00%)           214.00 (  5.31%)
  Min      alloc-odr0-8192            229.00 (  0.00%)           218.00 (  4.80%)
  Min      alloc-odr0-16384           229.00 (  0.00%)           219.00 (  4.37%)
perf indicated that next_zones_zonelist disappeared in the profile and
__next_zones_zonelist did not appear.  This is expected as the
micro-benchmark would hit the inlined fast-path every time.
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
		
	
			
		
			
				
	
	
		
			115 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			115 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * linux/mm/mmzone.c
 | |
|  *
 | |
|  * management codes for pgdats, zones and page flags
 | |
|  */
 | |
| 
 | |
| 
 | |
| #include <linux/stddef.h>
 | |
| #include <linux/mm.h>
 | |
| #include <linux/mmzone.h>
 | |
| 
 | |
| struct pglist_data *first_online_pgdat(void)
 | |
| {
 | |
| 	return NODE_DATA(first_online_node);
 | |
| }
 | |
| 
 | |
| struct pglist_data *next_online_pgdat(struct pglist_data *pgdat)
 | |
| {
 | |
| 	int nid = next_online_node(pgdat->node_id);
 | |
| 
 | |
| 	if (nid == MAX_NUMNODES)
 | |
| 		return NULL;
 | |
| 	return NODE_DATA(nid);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * next_zone - helper magic for for_each_zone()
 | |
|  */
 | |
| struct zone *next_zone(struct zone *zone)
 | |
| {
 | |
| 	pg_data_t *pgdat = zone->zone_pgdat;
 | |
| 
 | |
| 	if (zone < pgdat->node_zones + MAX_NR_ZONES - 1)
 | |
| 		zone++;
 | |
| 	else {
 | |
| 		pgdat = next_online_pgdat(pgdat);
 | |
| 		if (pgdat)
 | |
| 			zone = pgdat->node_zones;
 | |
| 		else
 | |
| 			zone = NULL;
 | |
| 	}
 | |
| 	return zone;
 | |
| }
 | |
| 
 | |
| static inline int zref_in_nodemask(struct zoneref *zref, nodemask_t *nodes)
 | |
| {
 | |
| #ifdef CONFIG_NUMA
 | |
| 	return node_isset(zonelist_node_idx(zref), *nodes);
 | |
| #else
 | |
| 	return 1;
 | |
| #endif /* CONFIG_NUMA */
 | |
| }
 | |
| 
 | |
| /* Returns the next zone at or below highest_zoneidx in a zonelist */
 | |
| struct zoneref *__next_zones_zonelist(struct zoneref *z,
 | |
| 					enum zone_type highest_zoneidx,
 | |
| 					nodemask_t *nodes)
 | |
| {
 | |
| 	/*
 | |
| 	 * Find the next suitable zone to use for the allocation.
 | |
| 	 * Only filter based on nodemask if it's set
 | |
| 	 */
 | |
| 	if (likely(nodes == NULL))
 | |
| 		while (zonelist_zone_idx(z) > highest_zoneidx)
 | |
| 			z++;
 | |
| 	else
 | |
| 		while (zonelist_zone_idx(z) > highest_zoneidx ||
 | |
| 				(z->zone && !zref_in_nodemask(z, nodes)))
 | |
| 			z++;
 | |
| 
 | |
| 	return z;
 | |
| }
 | |
| 
 | |
| #ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL
 | |
| bool memmap_valid_within(unsigned long pfn,
 | |
| 					struct page *page, struct zone *zone)
 | |
| {
 | |
| 	if (page_to_pfn(page) != pfn)
 | |
| 		return false;
 | |
| 
 | |
| 	if (page_zone(page) != zone)
 | |
| 		return false;
 | |
| 
 | |
| 	return true;
 | |
| }
 | |
| #endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */
 | |
| 
 | |
| void lruvec_init(struct lruvec *lruvec)
 | |
| {
 | |
| 	enum lru_list lru;
 | |
| 
 | |
| 	memset(lruvec, 0, sizeof(struct lruvec));
 | |
| 
 | |
| 	for_each_lru(lru)
 | |
| 		INIT_LIST_HEAD(&lruvec->lists[lru]);
 | |
| }
 | |
| 
 | |
| #if defined(CONFIG_NUMA_BALANCING) && !defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS)
 | |
| int page_cpupid_xchg_last(struct page *page, int cpupid)
 | |
| {
 | |
| 	unsigned long old_flags, flags;
 | |
| 	int last_cpupid;
 | |
| 
 | |
| 	do {
 | |
| 		old_flags = flags = page->flags;
 | |
| 		last_cpupid = page_cpupid_last(page);
 | |
| 
 | |
| 		flags &= ~(LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT);
 | |
| 		flags |= (cpupid & LAST_CPUPID_MASK) << LAST_CPUPID_PGSHIFT;
 | |
| 	} while (unlikely(cmpxchg(&page->flags, old_flags, flags) != old_flags));
 | |
| 
 | |
| 	return last_cpupid;
 | |
| }
 | |
| #endif
 |