mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-04 20:19:47 +08:00 
			
		
		
		
	 f0c328f8af
			
		
	
	
		f0c328f8af
		
	
	
	
	
		
			
			For the problem of increasing fragmentation of the bpf loader programs,
instead of using bpf_loader.o, which is used in samples/bpf, this
commit refactors the existing tracepoint tracing programs with libbbpf
bpf loader.
    - Adding a tracepoint event and attaching a bpf program to it was done
    through bpf_program_attach().
    - Instead of using the existing BPF MAP definition, MAP definition
    has been refactored with the new BTF-defined MAP format.
Signed-off-by: Daniel T. Lee <danieltimlee@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200823085334.9413-4-danieltimlee@gmail.com
		
	
			
		
			
				
	
	
		
			158 lines
		
	
	
		
			3.8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			158 lines
		
	
	
		
			3.8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /* Copyright (c) 2016 Facebook
 | |
|  *
 | |
|  * This program is free software; you can redistribute it and/or
 | |
|  * modify it under the terms of version 2 of the GNU General Public
 | |
|  * License as published by the Free Software Foundation.
 | |
|  */
 | |
| #include <uapi/linux/bpf.h>
 | |
| #include <uapi/linux/ptrace.h>
 | |
| #include <uapi/linux/perf_event.h>
 | |
| #include <linux/version.h>
 | |
| #include <linux/sched.h>
 | |
| #include <bpf/bpf_helpers.h>
 | |
| #include <bpf/bpf_tracing.h>
 | |
| 
 | |
| #define _(P)                                                                   \
 | |
| 	({                                                                     \
 | |
| 		typeof(P) val;                                                 \
 | |
| 		bpf_probe_read_kernel(&val, sizeof(val), &(P));                \
 | |
| 		val;                                                           \
 | |
| 	})
 | |
| 
 | |
| #define MINBLOCK_US	1
 | |
| 
 | |
| struct key_t {
 | |
| 	char waker[TASK_COMM_LEN];
 | |
| 	char target[TASK_COMM_LEN];
 | |
| 	u32 wret;
 | |
| 	u32 tret;
 | |
| };
 | |
| 
 | |
| struct {
 | |
| 	__uint(type, BPF_MAP_TYPE_HASH);
 | |
| 	__type(key, struct key_t);
 | |
| 	__type(value, u64);
 | |
| 	__uint(max_entries, 10000);
 | |
| } counts SEC(".maps");
 | |
| 
 | |
| struct {
 | |
| 	__uint(type, BPF_MAP_TYPE_HASH);
 | |
| 	__type(key, u32);
 | |
| 	__type(value, u64);
 | |
| 	__uint(max_entries, 10000);
 | |
| } start SEC(".maps");
 | |
| 
 | |
| struct wokeby_t {
 | |
| 	char name[TASK_COMM_LEN];
 | |
| 	u32 ret;
 | |
| };
 | |
| 
 | |
| struct {
 | |
| 	__uint(type, BPF_MAP_TYPE_HASH);
 | |
| 	__type(key, u32);
 | |
| 	__type(value, struct wokeby_t);
 | |
| 	__uint(max_entries, 10000);
 | |
| } wokeby SEC(".maps");
 | |
| 
 | |
| struct {
 | |
| 	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
 | |
| 	__uint(key_size, sizeof(u32));
 | |
| 	__uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
 | |
| 	__uint(max_entries, 10000);
 | |
| } stackmap SEC(".maps");
 | |
| 
 | |
| #define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
 | |
| 
 | |
| SEC("kprobe/try_to_wake_up")
 | |
| int waker(struct pt_regs *ctx)
 | |
| {
 | |
| 	struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
 | |
| 	struct wokeby_t woke;
 | |
| 	u32 pid;
 | |
| 
 | |
| 	pid = _(p->pid);
 | |
| 
 | |
| 	bpf_get_current_comm(&woke.name, sizeof(woke.name));
 | |
| 	woke.ret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
 | |
| 
 | |
| 	bpf_map_update_elem(&wokeby, &pid, &woke, BPF_ANY);
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static inline int update_counts(void *ctx, u32 pid, u64 delta)
 | |
| {
 | |
| 	struct wokeby_t *woke;
 | |
| 	u64 zero = 0, *val;
 | |
| 	struct key_t key;
 | |
| 
 | |
| 	__builtin_memset(&key.waker, 0, sizeof(key.waker));
 | |
| 	bpf_get_current_comm(&key.target, sizeof(key.target));
 | |
| 	key.tret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
 | |
| 	key.wret = 0;
 | |
| 
 | |
| 	woke = bpf_map_lookup_elem(&wokeby, &pid);
 | |
| 	if (woke) {
 | |
| 		key.wret = woke->ret;
 | |
| 		__builtin_memcpy(&key.waker, woke->name, sizeof(key.waker));
 | |
| 		bpf_map_delete_elem(&wokeby, &pid);
 | |
| 	}
 | |
| 
 | |
| 	val = bpf_map_lookup_elem(&counts, &key);
 | |
| 	if (!val) {
 | |
| 		bpf_map_update_elem(&counts, &key, &zero, BPF_NOEXIST);
 | |
| 		val = bpf_map_lookup_elem(&counts, &key);
 | |
| 		if (!val)
 | |
| 			return 0;
 | |
| 	}
 | |
| 	(*val) += delta;
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| #if 1
 | |
| /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
 | |
| struct sched_switch_args {
 | |
| 	unsigned long long pad;
 | |
| 	char prev_comm[16];
 | |
| 	int prev_pid;
 | |
| 	int prev_prio;
 | |
| 	long long prev_state;
 | |
| 	char next_comm[16];
 | |
| 	int next_pid;
 | |
| 	int next_prio;
 | |
| };
 | |
| SEC("tracepoint/sched/sched_switch")
 | |
| int oncpu(struct sched_switch_args *ctx)
 | |
| {
 | |
| 	/* record previous thread sleep time */
 | |
| 	u32 pid = ctx->prev_pid;
 | |
| #else
 | |
| SEC("kprobe/finish_task_switch")
 | |
| int oncpu(struct pt_regs *ctx)
 | |
| {
 | |
| 	struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
 | |
| 	/* record previous thread sleep time */
 | |
| 	u32 pid = _(p->pid);
 | |
| #endif
 | |
| 	u64 delta, ts, *tsp;
 | |
| 
 | |
| 	ts = bpf_ktime_get_ns();
 | |
| 	bpf_map_update_elem(&start, &pid, &ts, BPF_ANY);
 | |
| 
 | |
| 	/* calculate current thread's delta time */
 | |
| 	pid = bpf_get_current_pid_tgid();
 | |
| 	tsp = bpf_map_lookup_elem(&start, &pid);
 | |
| 	if (!tsp)
 | |
| 		/* missed start or filtered */
 | |
| 		return 0;
 | |
| 
 | |
| 	delta = bpf_ktime_get_ns() - *tsp;
 | |
| 	bpf_map_delete_elem(&start, &pid);
 | |
| 	delta = delta / 1000;
 | |
| 	if (delta < MINBLOCK_US)
 | |
| 		return 0;
 | |
| 
 | |
| 	return update_counts(ctx, pid, delta);
 | |
| }
 | |
| char _license[] SEC("license") = "GPL";
 | |
| u32 _version SEC("version") = LINUX_VERSION_CODE;
 |