mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-04 20:19:47 +08:00
Patch series "make vm_committed_as_batch aware of vm overcommit policy", v6.
When checking a performance change for will-it-scale scalability mmap test
[1], we found very high lock contention for spinlock of percpu counter
'vm_committed_as':
94.14% 0.35% [kernel.kallsyms] [k] _raw_spin_lock_irqsave
48.21% _raw_spin_lock_irqsave;percpu_counter_add_batch;__vm_enough_memory;mmap_region;do_mmap;
45.91% _raw_spin_lock_irqsave;percpu_counter_add_batch;__do_munmap;
Actually this heavy lock contention is not always necessary. The
'vm_committed_as' needs to be very precise when the strict
OVERCOMMIT_NEVER policy is set, which requires a rather small batch number
for the percpu counter.
So keep 'batch' number unchanged for strict OVERCOMMIT_NEVER policy, and
enlarge it for not-so-strict OVERCOMMIT_ALWAYS and OVERCOMMIT_GUESS
policies.
Benchmark with the same testcase in [1] shows 53% improvement on a 8C/16T
desktop, and 2097%(20X) on a 4S/72C/144T server. And for that case,
whether it shows improvements depends on if the test mmap size is bigger
than the batch number computed.
We tested 10+ platforms in 0day (server, desktop and laptop). If we lift
it to 64X, 80%+ platforms show improvements, and for 16X lift, 1/3 of the
platforms will show improvements.
And generally it should help the mmap/unmap usage,as Michal Hocko
mentioned:
: I believe that there are non-synthetic worklaods which would benefit
: from a larger batch. E.g. large in memory databases which do large
: mmaps during startups from multiple threads.
Note: There are some style complain from checkpatch for patch 4, as sysctl
handler declaration follows the similar format of sibling functions
[1] https://lore.kernel.org/lkml/20200305062138.GI5972@shao2-debian/
This patch (of 4):
Use the existing vm_memory_committed() instead, which is also convenient
for future change.
Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Qian Cai <cai@lca.pw>
Cc: Kees Cook <keescook@chromium.org>
Cc: Andi Kleen <andi.kleen@intel.com>
Cc: Tim Chen <tim.c.chen@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: kernel test robot <rong.a.chen@intel.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1594389708-60781-1-git-send-email-feng.tang@intel.com
Link: http://lkml.kernel.org/r/1594389708-60781-2-git-send-email-feng.tang@intel.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
162 lines
5.2 KiB
C
162 lines
5.2 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include <linux/fs.h>
|
|
#include <linux/init.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/hugetlb.h>
|
|
#include <linux/mman.h>
|
|
#include <linux/mmzone.h>
|
|
#include <linux/proc_fs.h>
|
|
#include <linux/percpu.h>
|
|
#include <linux/seq_file.h>
|
|
#include <linux/swap.h>
|
|
#include <linux/vmstat.h>
|
|
#include <linux/atomic.h>
|
|
#include <linux/vmalloc.h>
|
|
#ifdef CONFIG_CMA
|
|
#include <linux/cma.h>
|
|
#endif
|
|
#include <asm/page.h>
|
|
#include "internal.h"
|
|
|
|
void __attribute__((weak)) arch_report_meminfo(struct seq_file *m)
|
|
{
|
|
}
|
|
|
|
static void show_val_kb(struct seq_file *m, const char *s, unsigned long num)
|
|
{
|
|
seq_put_decimal_ull_width(m, s, num << (PAGE_SHIFT - 10), 8);
|
|
seq_write(m, " kB\n", 4);
|
|
}
|
|
|
|
static int meminfo_proc_show(struct seq_file *m, void *v)
|
|
{
|
|
struct sysinfo i;
|
|
unsigned long committed;
|
|
long cached;
|
|
long available;
|
|
unsigned long pages[NR_LRU_LISTS];
|
|
unsigned long sreclaimable, sunreclaim;
|
|
int lru;
|
|
|
|
si_meminfo(&i);
|
|
si_swapinfo(&i);
|
|
committed = vm_memory_committed();
|
|
|
|
cached = global_node_page_state(NR_FILE_PAGES) -
|
|
total_swapcache_pages() - i.bufferram;
|
|
if (cached < 0)
|
|
cached = 0;
|
|
|
|
for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
|
|
pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
|
|
|
|
available = si_mem_available();
|
|
sreclaimable = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B);
|
|
sunreclaim = global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B);
|
|
|
|
show_val_kb(m, "MemTotal: ", i.totalram);
|
|
show_val_kb(m, "MemFree: ", i.freeram);
|
|
show_val_kb(m, "MemAvailable: ", available);
|
|
show_val_kb(m, "Buffers: ", i.bufferram);
|
|
show_val_kb(m, "Cached: ", cached);
|
|
show_val_kb(m, "SwapCached: ", total_swapcache_pages());
|
|
show_val_kb(m, "Active: ", pages[LRU_ACTIVE_ANON] +
|
|
pages[LRU_ACTIVE_FILE]);
|
|
show_val_kb(m, "Inactive: ", pages[LRU_INACTIVE_ANON] +
|
|
pages[LRU_INACTIVE_FILE]);
|
|
show_val_kb(m, "Active(anon): ", pages[LRU_ACTIVE_ANON]);
|
|
show_val_kb(m, "Inactive(anon): ", pages[LRU_INACTIVE_ANON]);
|
|
show_val_kb(m, "Active(file): ", pages[LRU_ACTIVE_FILE]);
|
|
show_val_kb(m, "Inactive(file): ", pages[LRU_INACTIVE_FILE]);
|
|
show_val_kb(m, "Unevictable: ", pages[LRU_UNEVICTABLE]);
|
|
show_val_kb(m, "Mlocked: ", global_zone_page_state(NR_MLOCK));
|
|
|
|
#ifdef CONFIG_HIGHMEM
|
|
show_val_kb(m, "HighTotal: ", i.totalhigh);
|
|
show_val_kb(m, "HighFree: ", i.freehigh);
|
|
show_val_kb(m, "LowTotal: ", i.totalram - i.totalhigh);
|
|
show_val_kb(m, "LowFree: ", i.freeram - i.freehigh);
|
|
#endif
|
|
|
|
#ifndef CONFIG_MMU
|
|
show_val_kb(m, "MmapCopy: ",
|
|
(unsigned long)atomic_long_read(&mmap_pages_allocated));
|
|
#endif
|
|
|
|
show_val_kb(m, "SwapTotal: ", i.totalswap);
|
|
show_val_kb(m, "SwapFree: ", i.freeswap);
|
|
show_val_kb(m, "Dirty: ",
|
|
global_node_page_state(NR_FILE_DIRTY));
|
|
show_val_kb(m, "Writeback: ",
|
|
global_node_page_state(NR_WRITEBACK));
|
|
show_val_kb(m, "AnonPages: ",
|
|
global_node_page_state(NR_ANON_MAPPED));
|
|
show_val_kb(m, "Mapped: ",
|
|
global_node_page_state(NR_FILE_MAPPED));
|
|
show_val_kb(m, "Shmem: ", i.sharedram);
|
|
show_val_kb(m, "KReclaimable: ", sreclaimable +
|
|
global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE));
|
|
show_val_kb(m, "Slab: ", sreclaimable + sunreclaim);
|
|
show_val_kb(m, "SReclaimable: ", sreclaimable);
|
|
show_val_kb(m, "SUnreclaim: ", sunreclaim);
|
|
seq_printf(m, "KernelStack: %8lu kB\n",
|
|
global_node_page_state(NR_KERNEL_STACK_KB));
|
|
#ifdef CONFIG_SHADOW_CALL_STACK
|
|
seq_printf(m, "ShadowCallStack:%8lu kB\n",
|
|
global_node_page_state(NR_KERNEL_SCS_KB));
|
|
#endif
|
|
show_val_kb(m, "PageTables: ",
|
|
global_zone_page_state(NR_PAGETABLE));
|
|
|
|
show_val_kb(m, "NFS_Unstable: ", 0);
|
|
show_val_kb(m, "Bounce: ",
|
|
global_zone_page_state(NR_BOUNCE));
|
|
show_val_kb(m, "WritebackTmp: ",
|
|
global_node_page_state(NR_WRITEBACK_TEMP));
|
|
show_val_kb(m, "CommitLimit: ", vm_commit_limit());
|
|
show_val_kb(m, "Committed_AS: ", committed);
|
|
seq_printf(m, "VmallocTotal: %8lu kB\n",
|
|
(unsigned long)VMALLOC_TOTAL >> 10);
|
|
show_val_kb(m, "VmallocUsed: ", vmalloc_nr_pages());
|
|
show_val_kb(m, "VmallocChunk: ", 0ul);
|
|
show_val_kb(m, "Percpu: ", pcpu_nr_pages());
|
|
|
|
#ifdef CONFIG_MEMORY_FAILURE
|
|
seq_printf(m, "HardwareCorrupted: %5lu kB\n",
|
|
atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10));
|
|
#endif
|
|
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
show_val_kb(m, "AnonHugePages: ",
|
|
global_node_page_state(NR_ANON_THPS) * HPAGE_PMD_NR);
|
|
show_val_kb(m, "ShmemHugePages: ",
|
|
global_node_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR);
|
|
show_val_kb(m, "ShmemPmdMapped: ",
|
|
global_node_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR);
|
|
show_val_kb(m, "FileHugePages: ",
|
|
global_node_page_state(NR_FILE_THPS) * HPAGE_PMD_NR);
|
|
show_val_kb(m, "FilePmdMapped: ",
|
|
global_node_page_state(NR_FILE_PMDMAPPED) * HPAGE_PMD_NR);
|
|
#endif
|
|
|
|
#ifdef CONFIG_CMA
|
|
show_val_kb(m, "CmaTotal: ", totalcma_pages);
|
|
show_val_kb(m, "CmaFree: ",
|
|
global_zone_page_state(NR_FREE_CMA_PAGES));
|
|
#endif
|
|
|
|
hugetlb_report_meminfo(m);
|
|
|
|
arch_report_meminfo(m);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int __init proc_meminfo_init(void)
|
|
{
|
|
proc_create_single("meminfo", 0, NULL, meminfo_proc_show);
|
|
return 0;
|
|
}
|
|
fs_initcall(proc_meminfo_init);
|