mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-04 20:19:47 +08:00
Benchmark the various operations allowed for epoll_ctl(2). The idea is
to concurrently stress a single epoll instance doing add/mod/del
operations.
Committer testing:
# perf bench epoll ctl
# Running 'epoll/ctl' benchmark:
Run summary [PID 20344]: 4 threads doing epoll_ctl ops 64 file-descriptors for 8 secs.
[thread 0] fdmap: 0x21a46b0 ... 0x21a47ac [ add: 1680960 ops; mod: 1680960 ops; del: 1680960 ops ]
[thread 1] fdmap: 0x21a4960 ... 0x21a4a5c [ add: 1685440 ops; mod: 1685440 ops; del: 1685440 ops ]
[thread 2] fdmap: 0x21a4c10 ... 0x21a4d0c [ add: 1674368 ops; mod: 1674368 ops; del: 1674368 ops ]
[thread 3] fdmap: 0x21a4ec0 ... 0x21a4fbc [ add: 1677568 ops; mod: 1677568 ops; del: 1677568 ops ]
Averaged 1679584 ADD operations (+- 0.14%)
Averaged 1679584 MOD operations (+- 0.14%)
Averaged 1679584 DEL operations (+- 0.14%)
#
Lets measure those calls with 'perf trace' to get a glympse at what this
benchmark is doing in terms of syscalls:
# perf trace -m32768 -s perf bench epoll ctl
# Running 'epoll/ctl' benchmark:
Run summary [PID 20405]: 4 threads doing epoll_ctl ops 64 file-descriptors for 8 secs.
[thread 0] fdmap: 0x21764e0 ... 0x21765dc [ add: 1100480 ops; mod: 1100480 ops; del: 1100480 ops ]
[thread 1] fdmap: 0x2176790 ... 0x217688c [ add: 1250176 ops; mod: 1250176 ops; del: 1250176 ops ]
[thread 2] fdmap: 0x2176a40 ... 0x2176b3c [ add: 1022464 ops; mod: 1022464 ops; del: 1022464 ops ]
[thread 3] fdmap: 0x2176cf0 ... 0x2176dec [ add: 705472 ops; mod: 705472 ops; del: 705472 ops ]
Averaged 1019648 ADD operations (+- 11.27%)
Averaged 1019648 MOD operations (+- 11.27%)
Averaged 1019648 DEL operations (+- 11.27%)
Summary of events:
epoll-ctl (20405), 1264 events, 0.0%
syscall calls total min avg max stddev
(msec) (msec) (msec) (msec) (%)
--------------- -------- --------- --------- --------- --------- ------
eventfd2 256 9.514 0.001 0.037 5.243 68.00%
clone 4 1.245 0.204 0.311 0.531 24.13%
mprotect 66 0.345 0.002 0.005 0.021 7.43%
openat 45 0.313 0.004 0.007 0.073 21.93%
mmap 88 0.302 0.002 0.003 0.013 5.02%
futex 4 0.160 0.002 0.040 0.140 83.43%
sched_setaffinity 4 0.124 0.005 0.031 0.070 49.39%
read 44 0.103 0.001 0.002 0.013 15.54%
fstat 40 0.052 0.001 0.001 0.003 5.43%
close 39 0.039 0.001 0.001 0.001 1.48%
stat 9 0.034 0.003 0.004 0.006 7.30%
access 3 0.023 0.007 0.008 0.008 4.25%
open 2 0.021 0.008 0.011 0.013 22.60%
getdents 4 0.019 0.001 0.005 0.009 37.15%
write 2 0.013 0.004 0.007 0.009 38.48%
munmap 1 0.010 0.010 0.010 0.010 0.00%
brk 3 0.006 0.001 0.002 0.003 26.34%
rt_sigprocmask 2 0.004 0.001 0.002 0.003 43.95%
rt_sigaction 3 0.004 0.001 0.001 0.002 16.07%
prlimit64 3 0.004 0.001 0.001 0.001 5.39%
prctl 1 0.003 0.003 0.003 0.003 0.00%
epoll_create 1 0.003 0.003 0.003 0.003 0.00%
lseek 2 0.002 0.001 0.001 0.001 11.42%
sched_getaffinity 1 0.002 0.002 0.002 0.002 0.00%
arch_prctl 1 0.002 0.002 0.002 0.002 0.00%
set_tid_address 1 0.001 0.001 0.001 0.001 0.00%
getpid 1 0.001 0.001 0.001 0.001 0.00%
set_robust_list 1 0.001 0.001 0.001 0.001 0.00%
execve 1 0.000 0.000 0.000 0.000 0.00%
epoll-ctl (20406), 1245480 events, 14.6%
syscall calls total min avg max stddev
(msec) (msec) (msec) (msec) (%)
--------------- -------- --------- --------- --------- --------- ------
epoll_ctl 619511 1034.927 0.001 0.002 6.691 0.67%
nanosleep 3226 616.114 0.006 0.191 10.376 7.57%
futex 2 11.336 0.002 5.668 11.334 99.97%
set_robust_list 1 0.001 0.001 0.001 0.001 0.00%
clone 1 0.000 0.000 0.000 0.000 0.00%
epoll-ctl (20407), 1243151 events, 14.5%
syscall calls total min avg max stddev
(msec) (msec) (msec) (msec) (%)
--------------- -------- --------- --------- --------- --------- ------
epoll_ctl 618350 1042.181 0.001 0.002 2.512 0.40%
nanosleep 3220 366.261 0.012 0.114 18.162 9.59%
futex 4 5.463 0.001 1.366 5.427 99.12%
set_robust_list 1 0.002 0.002 0.002 0.002 0.00%
epoll-ctl (20408), 1801690 events, 21.1%
syscall calls total min avg max stddev
(msec) (msec) (msec) (msec) (%)
--------------- -------- --------- --------- --------- --------- ------
epoll_ctl 896174 1540.581 0.001 0.002 6.987 0.74%
nanosleep 4667 783.393 0.006 0.168 10.419 7.10%
futex 2 4.682 0.002 2.341 4.681 99.93%
set_robust_list 1 0.002 0.002 0.002 0.002 0.00%
clone 1 0.000 0.000 0.000 0.000 0.00%
epoll-ctl (20409), 4254890 events, 49.8%
syscall calls total min avg max stddev
(msec) (msec) (msec) (msec) (%)
--------------- -------- --------- --------- --------- --------- ------
epoll_ctl 2116416 3768.097 0.001 0.002 9.956 0.41%
nanosleep 11023 1141.778 0.006 0.104 9.447 4.95%
futex 3 0.037 0.002 0.012 0.029 70.50%
set_robust_list 1 0.008 0.008 0.008 0.008 0.00%
madvise 1 0.005 0.005 0.005 0.005 0.00%
clone 1 0.000 0.000 0.000 0.000 0.00%
#
Committer notes:
Fix build on fedora:24-x-ARC-uClibc, debian:experimental-x-mips,
debian:experimental-x-mipsel, ubuntu:16.04-x-arm and ubuntu:16.04-x-powerpc
CC /tmp/build/perf/bench/epoll-ctl.o
bench/epoll-ctl.c: In function 'init_fdmaps':
bench/epoll-ctl.c:214:16: error: comparison between signed and unsigned integer expressions [-Werror=sign-compare]
for (i = 0; i < nfds; i+=inc) {
^
bench/epoll-ctl.c: In function 'bench_epoll_ctl':
bench/epoll-ctl.c:377:16: error: comparison between signed and unsigned integer expressions [-Werror=sign-compare]
for (i = 0; i < nthreads; i++) {
^
bench/epoll-ctl.c:388:16: error: comparison between signed and unsigned integer expressions [-Werror=sign-compare]
for (i = 0; i < nthreads; i++) {
^
cc1: all warnings being treated as errors
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Davidlohr Bueso <dbueso@suse.de>
Cc: Jason Baron <jbaron@akamai.com>
Link: http://lkml.kernel.org/r/20181106152226.20883-3-dave@stgolabs.net
[ Use inttypes.h to print rlim_t fields, fixing the build on Alpine Linux / musl libc ]
[ Check if eventfd() is available, i.e. if HAVE_EVENTFD is defined ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
307 lines
7.4 KiB
C
307 lines
7.4 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* builtin-bench.c
|
|
*
|
|
* General benchmarking collections provided by perf
|
|
*
|
|
* Copyright (C) 2009, Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
|
|
*/
|
|
|
|
/*
|
|
* Available benchmark collection list:
|
|
*
|
|
* sched ... scheduler and IPC performance
|
|
* mem ... memory access performance
|
|
* numa ... NUMA scheduling and MM performance
|
|
* futex ... Futex performance
|
|
* epoll ... Event poll performance
|
|
*/
|
|
#include "perf.h"
|
|
#include "util/util.h"
|
|
#include <subcmd/parse-options.h>
|
|
#include "builtin.h"
|
|
#include "bench/bench.h"
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <sys/prctl.h>
|
|
|
|
typedef int (*bench_fn_t)(int argc, const char **argv);
|
|
|
|
struct bench {
|
|
const char *name;
|
|
const char *summary;
|
|
bench_fn_t fn;
|
|
};
|
|
|
|
#ifdef HAVE_LIBNUMA_SUPPORT
|
|
static struct bench numa_benchmarks[] = {
|
|
{ "mem", "Benchmark for NUMA workloads", bench_numa },
|
|
{ "all", "Run all NUMA benchmarks", NULL },
|
|
{ NULL, NULL, NULL }
|
|
};
|
|
#endif
|
|
|
|
static struct bench sched_benchmarks[] = {
|
|
{ "messaging", "Benchmark for scheduling and IPC", bench_sched_messaging },
|
|
{ "pipe", "Benchmark for pipe() between two processes", bench_sched_pipe },
|
|
{ "all", "Run all scheduler benchmarks", NULL },
|
|
{ NULL, NULL, NULL }
|
|
};
|
|
|
|
static struct bench mem_benchmarks[] = {
|
|
{ "memcpy", "Benchmark for memcpy() functions", bench_mem_memcpy },
|
|
{ "memset", "Benchmark for memset() functions", bench_mem_memset },
|
|
{ "all", "Run all memory access benchmarks", NULL },
|
|
{ NULL, NULL, NULL }
|
|
};
|
|
|
|
static struct bench futex_benchmarks[] = {
|
|
{ "hash", "Benchmark for futex hash table", bench_futex_hash },
|
|
{ "wake", "Benchmark for futex wake calls", bench_futex_wake },
|
|
{ "wake-parallel", "Benchmark for parallel futex wake calls", bench_futex_wake_parallel },
|
|
{ "requeue", "Benchmark for futex requeue calls", bench_futex_requeue },
|
|
/* pi-futexes */
|
|
{ "lock-pi", "Benchmark for futex lock_pi calls", bench_futex_lock_pi },
|
|
{ "all", "Run all futex benchmarks", NULL },
|
|
{ NULL, NULL, NULL }
|
|
};
|
|
|
|
#ifdef HAVE_EVENTFD
|
|
static struct bench epoll_benchmarks[] = {
|
|
{ "wait", "Benchmark epoll concurrent epoll_waits", bench_epoll_wait },
|
|
{ "ctl", "Benchmark epoll concurrent epoll_ctls", bench_epoll_ctl },
|
|
{ "all", "Run all futex benchmarks", NULL },
|
|
{ NULL, NULL, NULL }
|
|
};
|
|
#endif // HAVE_EVENTFD
|
|
|
|
struct collection {
|
|
const char *name;
|
|
const char *summary;
|
|
struct bench *benchmarks;
|
|
};
|
|
|
|
static struct collection collections[] = {
|
|
{ "sched", "Scheduler and IPC benchmarks", sched_benchmarks },
|
|
{ "mem", "Memory access benchmarks", mem_benchmarks },
|
|
#ifdef HAVE_LIBNUMA_SUPPORT
|
|
{ "numa", "NUMA scheduling and MM benchmarks", numa_benchmarks },
|
|
#endif
|
|
{"futex", "Futex stressing benchmarks", futex_benchmarks },
|
|
#ifdef HAVE_EVENTFD
|
|
{"epoll", "Epoll stressing benchmarks", epoll_benchmarks },
|
|
#endif
|
|
{ "all", "All benchmarks", NULL },
|
|
{ NULL, NULL, NULL }
|
|
};
|
|
|
|
/* Iterate over all benchmark collections: */
|
|
#define for_each_collection(coll) \
|
|
for (coll = collections; coll->name; coll++)
|
|
|
|
/* Iterate over all benchmarks within a collection: */
|
|
#define for_each_bench(coll, bench) \
|
|
for (bench = coll->benchmarks; bench && bench->name; bench++)
|
|
|
|
static void dump_benchmarks(struct collection *coll)
|
|
{
|
|
struct bench *bench;
|
|
|
|
printf("\n # List of available benchmarks for collection '%s':\n\n", coll->name);
|
|
|
|
for_each_bench(coll, bench)
|
|
printf("%14s: %s\n", bench->name, bench->summary);
|
|
|
|
printf("\n");
|
|
}
|
|
|
|
static const char *bench_format_str;
|
|
|
|
/* Output/formatting style, exported to benchmark modules: */
|
|
int bench_format = BENCH_FORMAT_DEFAULT;
|
|
unsigned int bench_repeat = 10; /* default number of times to repeat the run */
|
|
|
|
static const struct option bench_options[] = {
|
|
OPT_STRING('f', "format", &bench_format_str, "default|simple", "Specify the output formatting style"),
|
|
OPT_UINTEGER('r', "repeat", &bench_repeat, "Specify amount of times to repeat the run"),
|
|
OPT_END()
|
|
};
|
|
|
|
static const char * const bench_usage[] = {
|
|
"perf bench [<common options>] <collection> <benchmark> [<options>]",
|
|
NULL
|
|
};
|
|
|
|
static void print_usage(void)
|
|
{
|
|
struct collection *coll;
|
|
int i;
|
|
|
|
printf("Usage: \n");
|
|
for (i = 0; bench_usage[i]; i++)
|
|
printf("\t%s\n", bench_usage[i]);
|
|
printf("\n");
|
|
|
|
printf(" # List of all available benchmark collections:\n\n");
|
|
|
|
for_each_collection(coll)
|
|
printf("%14s: %s\n", coll->name, coll->summary);
|
|
printf("\n");
|
|
}
|
|
|
|
static int bench_str2int(const char *str)
|
|
{
|
|
if (!str)
|
|
return BENCH_FORMAT_DEFAULT;
|
|
|
|
if (!strcmp(str, BENCH_FORMAT_DEFAULT_STR))
|
|
return BENCH_FORMAT_DEFAULT;
|
|
else if (!strcmp(str, BENCH_FORMAT_SIMPLE_STR))
|
|
return BENCH_FORMAT_SIMPLE;
|
|
|
|
return BENCH_FORMAT_UNKNOWN;
|
|
}
|
|
|
|
/*
|
|
* Run a specific benchmark but first rename the running task's ->comm[]
|
|
* to something meaningful:
|
|
*/
|
|
static int run_bench(const char *coll_name, const char *bench_name, bench_fn_t fn,
|
|
int argc, const char **argv)
|
|
{
|
|
int size;
|
|
char *name;
|
|
int ret;
|
|
|
|
size = strlen(coll_name) + 1 + strlen(bench_name) + 1;
|
|
|
|
name = zalloc(size);
|
|
BUG_ON(!name);
|
|
|
|
scnprintf(name, size, "%s-%s", coll_name, bench_name);
|
|
|
|
prctl(PR_SET_NAME, name);
|
|
argv[0] = name;
|
|
|
|
ret = fn(argc, argv);
|
|
|
|
free(name);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void run_collection(struct collection *coll)
|
|
{
|
|
struct bench *bench;
|
|
const char *argv[2];
|
|
|
|
argv[1] = NULL;
|
|
/*
|
|
* TODO:
|
|
*
|
|
* Preparing preset parameters for
|
|
* embedded, ordinary PC, HPC, etc...
|
|
* would be helpful.
|
|
*/
|
|
for_each_bench(coll, bench) {
|
|
if (!bench->fn)
|
|
break;
|
|
printf("# Running %s/%s benchmark...\n", coll->name, bench->name);
|
|
fflush(stdout);
|
|
|
|
argv[1] = bench->name;
|
|
run_bench(coll->name, bench->name, bench->fn, 1, argv);
|
|
printf("\n");
|
|
}
|
|
}
|
|
|
|
static void run_all_collections(void)
|
|
{
|
|
struct collection *coll;
|
|
|
|
for_each_collection(coll)
|
|
run_collection(coll);
|
|
}
|
|
|
|
int cmd_bench(int argc, const char **argv)
|
|
{
|
|
struct collection *coll;
|
|
int ret = 0;
|
|
|
|
if (argc < 2) {
|
|
/* No collection specified. */
|
|
print_usage();
|
|
goto end;
|
|
}
|
|
|
|
argc = parse_options(argc, argv, bench_options, bench_usage,
|
|
PARSE_OPT_STOP_AT_NON_OPTION);
|
|
|
|
bench_format = bench_str2int(bench_format_str);
|
|
if (bench_format == BENCH_FORMAT_UNKNOWN) {
|
|
printf("Unknown format descriptor: '%s'\n", bench_format_str);
|
|
goto end;
|
|
}
|
|
|
|
if (bench_repeat == 0) {
|
|
printf("Invalid repeat option: Must specify a positive value\n");
|
|
goto end;
|
|
}
|
|
|
|
if (argc < 1) {
|
|
print_usage();
|
|
goto end;
|
|
}
|
|
|
|
if (!strcmp(argv[0], "all")) {
|
|
run_all_collections();
|
|
goto end;
|
|
}
|
|
|
|
for_each_collection(coll) {
|
|
struct bench *bench;
|
|
|
|
if (strcmp(coll->name, argv[0]))
|
|
continue;
|
|
|
|
if (argc < 2) {
|
|
/* No bench specified. */
|
|
dump_benchmarks(coll);
|
|
goto end;
|
|
}
|
|
|
|
if (!strcmp(argv[1], "all")) {
|
|
run_collection(coll);
|
|
goto end;
|
|
}
|
|
|
|
for_each_bench(coll, bench) {
|
|
if (strcmp(bench->name, argv[1]))
|
|
continue;
|
|
|
|
if (bench_format == BENCH_FORMAT_DEFAULT)
|
|
printf("# Running '%s/%s' benchmark:\n", coll->name, bench->name);
|
|
fflush(stdout);
|
|
ret = run_bench(coll->name, bench->name, bench->fn, argc-1, argv+1);
|
|
goto end;
|
|
}
|
|
|
|
if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
|
|
dump_benchmarks(coll);
|
|
goto end;
|
|
}
|
|
|
|
printf("Unknown benchmark: '%s' for collection '%s'\n", argv[1], argv[0]);
|
|
ret = 1;
|
|
goto end;
|
|
}
|
|
|
|
printf("Unknown collection: '%s'\n", argv[0]);
|
|
ret = 1;
|
|
|
|
end:
|
|
return ret;
|
|
}
|