mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-04 20:19:47 +08:00

This patch tests bpf_loop in pyperf and strobemeta, and measures the verifier performance of replacing the traditional for loop with bpf_loop. The results are as follows: ~strobemeta~ Baseline verification time 6808200 usec stack depth 496 processed 554252 insns (limit 1000000) max_states_per_insn 16 total_states 15878 peak_states 13489 mark_read 3110 #192 verif_scale_strobemeta:OK (unrolled loop) Using bpf_loop verification time 31589 usec stack depth 96+400 processed 1513 insns (limit 1000000) max_states_per_insn 2 total_states 106 peak_states 106 mark_read 60 #193 verif_scale_strobemeta_bpf_loop:OK ~pyperf600~ Baseline verification time 29702486 usec stack depth 368 processed 626838 insns (limit 1000000) max_states_per_insn 7 total_states 30368 peak_states 30279 mark_read 748 #182 verif_scale_pyperf600:OK (unrolled loop) Using bpf_loop verification time 148488 usec stack depth 320+40 processed 10518 insns (limit 1000000) max_states_per_insn 10 total_states 705 peak_states 517 mark_read 38 #183 verif_scale_pyperf600_bpf_loop:OK Using the bpf_loop helper led to approximately a 99% decrease in the verification time and in the number of instructions. Signed-off-by: Joanne Koong <joannekoong@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Andrii Nakryiko <andrii@kernel.org> Link: https://lore.kernel.org/bpf/20211130030622.4131246-4-joannekoong@fb.com
350 lines
8.3 KiB
C
350 lines
8.3 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
// Copyright (c) 2019 Facebook
|
|
#include <linux/sched.h>
|
|
#include <linux/ptrace.h>
|
|
#include <stdint.h>
|
|
#include <stddef.h>
|
|
#include <stdbool.h>
|
|
#include <linux/bpf.h>
|
|
#include <bpf/bpf_helpers.h>
|
|
|
|
#define FUNCTION_NAME_LEN 64
|
|
#define FILE_NAME_LEN 128
|
|
#define TASK_COMM_LEN 16
|
|
|
|
typedef struct {
|
|
int PyThreadState_frame;
|
|
int PyThreadState_thread;
|
|
int PyFrameObject_back;
|
|
int PyFrameObject_code;
|
|
int PyFrameObject_lineno;
|
|
int PyCodeObject_filename;
|
|
int PyCodeObject_name;
|
|
int String_data;
|
|
int String_size;
|
|
} OffsetConfig;
|
|
|
|
typedef struct {
|
|
uintptr_t current_state_addr;
|
|
uintptr_t tls_key_addr;
|
|
OffsetConfig offsets;
|
|
bool use_tls;
|
|
} PidData;
|
|
|
|
typedef struct {
|
|
uint32_t success;
|
|
} Stats;
|
|
|
|
typedef struct {
|
|
char name[FUNCTION_NAME_LEN];
|
|
char file[FILE_NAME_LEN];
|
|
} Symbol;
|
|
|
|
typedef struct {
|
|
uint32_t pid;
|
|
uint32_t tid;
|
|
char comm[TASK_COMM_LEN];
|
|
int32_t kernel_stack_id;
|
|
int32_t user_stack_id;
|
|
bool thread_current;
|
|
bool pthread_match;
|
|
bool stack_complete;
|
|
int16_t stack_len;
|
|
int32_t stack[STACK_MAX_LEN];
|
|
|
|
int has_meta;
|
|
int metadata;
|
|
char dummy_safeguard;
|
|
} Event;
|
|
|
|
|
|
typedef int pid_t;
|
|
|
|
typedef struct {
|
|
void* f_back; // PyFrameObject.f_back, previous frame
|
|
void* f_code; // PyFrameObject.f_code, pointer to PyCodeObject
|
|
void* co_filename; // PyCodeObject.co_filename
|
|
void* co_name; // PyCodeObject.co_name
|
|
} FrameData;
|
|
|
|
#ifdef SUBPROGS
|
|
__noinline
|
|
#else
|
|
__always_inline
|
|
#endif
|
|
static void *get_thread_state(void *tls_base, PidData *pidData)
|
|
{
|
|
void* thread_state;
|
|
int key;
|
|
|
|
bpf_probe_read_user(&key, sizeof(key), (void*)(long)pidData->tls_key_addr);
|
|
bpf_probe_read_user(&thread_state, sizeof(thread_state),
|
|
tls_base + 0x310 + key * 0x10 + 0x08);
|
|
return thread_state;
|
|
}
|
|
|
|
static __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData,
|
|
FrameData *frame, Symbol *symbol)
|
|
{
|
|
// read data from PyFrameObject
|
|
bpf_probe_read_user(&frame->f_back,
|
|
sizeof(frame->f_back),
|
|
frame_ptr + pidData->offsets.PyFrameObject_back);
|
|
bpf_probe_read_user(&frame->f_code,
|
|
sizeof(frame->f_code),
|
|
frame_ptr + pidData->offsets.PyFrameObject_code);
|
|
|
|
// read data from PyCodeObject
|
|
if (!frame->f_code)
|
|
return false;
|
|
bpf_probe_read_user(&frame->co_filename,
|
|
sizeof(frame->co_filename),
|
|
frame->f_code + pidData->offsets.PyCodeObject_filename);
|
|
bpf_probe_read_user(&frame->co_name,
|
|
sizeof(frame->co_name),
|
|
frame->f_code + pidData->offsets.PyCodeObject_name);
|
|
// read actual names into symbol
|
|
if (frame->co_filename)
|
|
bpf_probe_read_user_str(&symbol->file,
|
|
sizeof(symbol->file),
|
|
frame->co_filename +
|
|
pidData->offsets.String_data);
|
|
if (frame->co_name)
|
|
bpf_probe_read_user_str(&symbol->name,
|
|
sizeof(symbol->name),
|
|
frame->co_name +
|
|
pidData->offsets.String_data);
|
|
return true;
|
|
}
|
|
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_HASH);
|
|
__uint(max_entries, 1);
|
|
__type(key, int);
|
|
__type(value, PidData);
|
|
} pidmap SEC(".maps");
|
|
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_HASH);
|
|
__uint(max_entries, 1);
|
|
__type(key, int);
|
|
__type(value, Event);
|
|
} eventmap SEC(".maps");
|
|
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_HASH);
|
|
__uint(max_entries, 1);
|
|
__type(key, Symbol);
|
|
__type(value, int);
|
|
} symbolmap SEC(".maps");
|
|
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_ARRAY);
|
|
__uint(max_entries, 1);
|
|
__type(key, int);
|
|
__type(value, Stats);
|
|
} statsmap SEC(".maps");
|
|
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
|
__uint(max_entries, 32);
|
|
__uint(key_size, sizeof(int));
|
|
__uint(value_size, sizeof(int));
|
|
} perfmap SEC(".maps");
|
|
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_STACK_TRACE);
|
|
__uint(max_entries, 1000);
|
|
__uint(key_size, sizeof(int));
|
|
__uint(value_size, sizeof(long long) * 127);
|
|
} stackmap SEC(".maps");
|
|
|
|
#ifdef USE_BPF_LOOP
|
|
struct process_frame_ctx {
|
|
int cur_cpu;
|
|
int32_t *symbol_counter;
|
|
void *frame_ptr;
|
|
FrameData *frame;
|
|
PidData *pidData;
|
|
Symbol *sym;
|
|
Event *event;
|
|
bool done;
|
|
};
|
|
|
|
#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var))
|
|
|
|
static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx)
|
|
{
|
|
int zero = 0;
|
|
void *frame_ptr = ctx->frame_ptr;
|
|
PidData *pidData = ctx->pidData;
|
|
FrameData *frame = ctx->frame;
|
|
int32_t *symbol_counter = ctx->symbol_counter;
|
|
int cur_cpu = ctx->cur_cpu;
|
|
Event *event = ctx->event;
|
|
Symbol *sym = ctx->sym;
|
|
|
|
if (frame_ptr && get_frame_data(frame_ptr, pidData, frame, sym)) {
|
|
int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
|
|
int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, sym);
|
|
|
|
if (!symbol_id) {
|
|
bpf_map_update_elem(&symbolmap, sym, &zero, 0);
|
|
symbol_id = bpf_map_lookup_elem(&symbolmap, sym);
|
|
if (!symbol_id) {
|
|
ctx->done = true;
|
|
return 1;
|
|
}
|
|
}
|
|
if (*symbol_id == new_symbol_id)
|
|
(*symbol_counter)++;
|
|
|
|
barrier_var(i);
|
|
if (i >= STACK_MAX_LEN)
|
|
return 1;
|
|
|
|
event->stack[i] = *symbol_id;
|
|
|
|
event->stack_len = i + 1;
|
|
frame_ptr = frame->f_back;
|
|
}
|
|
return 0;
|
|
}
|
|
#endif /* USE_BPF_LOOP */
|
|
|
|
#ifdef GLOBAL_FUNC
|
|
__noinline
|
|
#elif defined(SUBPROGS)
|
|
static __noinline
|
|
#else
|
|
static __always_inline
|
|
#endif
|
|
int __on_event(struct bpf_raw_tracepoint_args *ctx)
|
|
{
|
|
uint64_t pid_tgid = bpf_get_current_pid_tgid();
|
|
pid_t pid = (pid_t)(pid_tgid >> 32);
|
|
PidData* pidData = bpf_map_lookup_elem(&pidmap, &pid);
|
|
if (!pidData)
|
|
return 0;
|
|
|
|
int zero = 0;
|
|
Event* event = bpf_map_lookup_elem(&eventmap, &zero);
|
|
if (!event)
|
|
return 0;
|
|
|
|
event->pid = pid;
|
|
|
|
event->tid = (pid_t)pid_tgid;
|
|
bpf_get_current_comm(&event->comm, sizeof(event->comm));
|
|
|
|
event->user_stack_id = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK);
|
|
event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0);
|
|
|
|
void* thread_state_current = (void*)0;
|
|
bpf_probe_read_user(&thread_state_current,
|
|
sizeof(thread_state_current),
|
|
(void*)(long)pidData->current_state_addr);
|
|
|
|
struct task_struct* task = (struct task_struct*)bpf_get_current_task();
|
|
void* tls_base = (void*)task;
|
|
|
|
void* thread_state = pidData->use_tls ? get_thread_state(tls_base, pidData)
|
|
: thread_state_current;
|
|
event->thread_current = thread_state == thread_state_current;
|
|
|
|
if (pidData->use_tls) {
|
|
uint64_t pthread_created;
|
|
uint64_t pthread_self;
|
|
bpf_probe_read_user(&pthread_self, sizeof(pthread_self),
|
|
tls_base + 0x10);
|
|
|
|
bpf_probe_read_user(&pthread_created,
|
|
sizeof(pthread_created),
|
|
thread_state +
|
|
pidData->offsets.PyThreadState_thread);
|
|
event->pthread_match = pthread_created == pthread_self;
|
|
} else {
|
|
event->pthread_match = 1;
|
|
}
|
|
|
|
if (event->pthread_match || !pidData->use_tls) {
|
|
void* frame_ptr;
|
|
FrameData frame;
|
|
Symbol sym = {};
|
|
int cur_cpu = bpf_get_smp_processor_id();
|
|
|
|
bpf_probe_read_user(&frame_ptr,
|
|
sizeof(frame_ptr),
|
|
thread_state +
|
|
pidData->offsets.PyThreadState_frame);
|
|
|
|
int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
|
|
if (symbol_counter == NULL)
|
|
return 0;
|
|
#ifdef USE_BPF_LOOP
|
|
struct process_frame_ctx ctx = {
|
|
.cur_cpu = cur_cpu,
|
|
.symbol_counter = symbol_counter,
|
|
.frame_ptr = frame_ptr,
|
|
.frame = &frame,
|
|
.pidData = pidData,
|
|
.sym = &sym,
|
|
.event = event,
|
|
};
|
|
|
|
bpf_loop(STACK_MAX_LEN, process_frame_callback, &ctx, 0);
|
|
if (ctx.done)
|
|
return 0;
|
|
#else
|
|
#ifdef NO_UNROLL
|
|
#pragma clang loop unroll(disable)
|
|
#else
|
|
#pragma clang loop unroll(full)
|
|
#endif /* NO_UNROLL */
|
|
/* Unwind python stack */
|
|
for (int i = 0; i < STACK_MAX_LEN; ++i) {
|
|
if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
|
|
int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
|
|
int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
|
|
if (!symbol_id) {
|
|
bpf_map_update_elem(&symbolmap, &sym, &zero, 0);
|
|
symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
|
|
if (!symbol_id)
|
|
return 0;
|
|
}
|
|
if (*symbol_id == new_symbol_id)
|
|
(*symbol_counter)++;
|
|
event->stack[i] = *symbol_id;
|
|
event->stack_len = i + 1;
|
|
frame_ptr = frame.f_back;
|
|
}
|
|
}
|
|
#endif /* USE_BPF_LOOP */
|
|
event->stack_complete = frame_ptr == NULL;
|
|
} else {
|
|
event->stack_complete = 1;
|
|
}
|
|
|
|
Stats* stats = bpf_map_lookup_elem(&statsmap, &zero);
|
|
if (stats)
|
|
stats->success++;
|
|
|
|
event->has_meta = 0;
|
|
bpf_perf_event_output(ctx, &perfmap, 0, event, offsetof(Event, metadata));
|
|
return 0;
|
|
}
|
|
|
|
SEC("raw_tracepoint/kfree_skb")
|
|
int on_event(struct bpf_raw_tracepoint_args* ctx)
|
|
{
|
|
int i, ret = 0;
|
|
ret |= __on_event(ctx);
|
|
ret |= __on_event(ctx);
|
|
ret |= __on_event(ctx);
|
|
ret |= __on_event(ctx);
|
|
ret |= __on_event(ctx);
|
|
return ret;
|
|
}
|
|
|
|
char _license[] SEC("license") = "GPL";
|