eBPF tool in practice - AmpereComputing/ampere-lts-kernel---DEPRECATED GitHub Wiki
This wiki shows the real-world use case of eBPF:
eventfd is used in virtio for event notification between virtio driver (in guest) and virtio device (backend in host).
Using the bcc stackcount tool it is easy to know how many times kernel ‘eventfd*’ functions are called. Here is an example:
- On host, run the stackcount tool to trace ‘eventfd_*’ kernel function.
# /usr/share/bcc/tools/stackcount -P 'eventfd_*'
Tracing 12 functions for "eventfd_*"... Hit Ctrl-C to end. '
- Run ‘fio’ in a VM for 60 seconds. The virtual block device is a qcow2 file (virtio-blk-pci):
# fio -name=fio-4k -filename=/dev/vda1 -ioengine=libaio -runtime=60 -time_based=1 -direct=1 -bs=4k -iodepth=16 -numjobs=1 -rw=randread
- stackcount Output:
… ….
b'eventfd_write'
b'ksys_write'
b'__arm64_sys_write'
b'do_el0_svc'
b'el0_svc'
b'el0_sync_handler'
b'el0_sync'
b'__libc_write'
b'[unknown]'
b'[unknown]'
b'[unknown]'
b'[unknown]'
b'[unknown]'
b'g_main_context_dispatch'
b'[unknown]'
b'[unknown]'
b'[unknown]'
b'__libc_start_main'
b'[unknown]'
b'qemu-system-aar' [6445] <- --- Qemu process. This shows the call stack of eventfd_write()
549545 < ----- Sample number in one second
b'eventfd_poll'
b'__arm64_sys_ppoll'
b'do_el0_svc'
b'el0_svc'
b'el0_sync_handler'
b'el0_sync'
b'ppoll'
b'[unknown]'
b'[unknown]'
b'[unknown]'
b'[unknown]'
b'__libc_start_main'
b'[unknown]'
b'qemu-system-aar' [6445]
69035104
- Prepare a function to be calculated
# cat test.c
#include <stdio.h>
#include <sys/time.h>
struct timeval tv;
int do_func(){
select(0, NULL, NULL, NULL, &tv);
return 2;
}
int main(){
while(1){
tv.tv_sec=1;
tv.tv_usec=0;
do_func();
}
return 0;
}
# gcc test.c -o test
# ./test
- Prepare ebpf code:
# cat test.py
#!/usr/bin/python3
from bcc import BPF
prog = '''
#include <uapi/linux/ptrace.h>
BPF_HASH(start_time);
int do_func_start(struct pt_regs *ctx) {
u64 do_func=0;
u64 start_ns = bpf_ktime_get_ns();
start_time.update(&do_func,&start_ns);
return 0;
}
int do_func_end(struct pt_regs *ctx) {
u64 do_func=0,diff;
int retval;
u64 *start_ns = start_time.lookup(&do_func);
if(start_ns != NULL){
retval = PT_REGS_RC(ctx);
diff = bpf_ktime_get_ns() - *start_ns;
bpf_trace_printk("do_func_end retval=%d, time_ns=%d\\n",retval,diff);
}
return 0;
}
'''
# load BPF program
b = BPF(text=prog)
b.attach_uprobe(name="/root/workspace/ebpf/test", sym="do_func", fn_name="do_func_start")
b.attach_uretprobe(name="/root/workspace/ebpf/test", sym="do_func", fn_name="do_func_end")
b.trace_print()
# python3 test.py
- Result:
b' <...>-497355 [155] .... 599073.722333: 0: do_func_end retval=2, time_ns=180040'
b' <...>-497355 [155] .... 599073.722517: 0: do_func_end retval=2, time_ns=180080'
b' <...>-497355 [155] .... 599073.722704: 0: do_func_end retval=2, time_ns=179760'
b' <...>-497355 [155] .... 599073.722886: 0: do_func_end retval=2, time_ns=179640'
- Prepare a function to be calculated
# cat test.c
#include <stdio.h>
#include <sys/time.h>
struct timeval tv;
int do_func(){
select(0, NULL, NULL, NULL, &tv);
return 2;
}
int main(){
while(1){
tv.tv_sec=1;
tv.tv_usec=0;
do_func();
}
return 0;
}
# gcc test.c -o test
# objdump -t test | grep do_func
000000000000077c g F .text 0000000000000030 do_func
# ./test
- start uprobes
echo "p:do_func_entry /root/workspace/ebpf/test:0x77c" > /sys/kernel/debug/tracing/uprobe_events
echo "r:do_func_exit /root/workspace/ebpf/test:0x77c $retval" >> /sys/kernel/debug/tracing/uprobe_events
echo 1 > /sys/kernel/debug/tracing/events/uprobes/enable
cat /sys/kernel/debug/tracing/trace
- stop uprobes
echo 0 > /sys/kernel/debug/tracing/events/uprobes/enable
echo > /sys/kernel/debug/tracing/uprobe_events