eBPF Development - antimetal/system-agent GitHub Wiki
This guide covers developing eBPF programs for the Antimetal Agent, focusing on CO-RE (Compile Once - Run Everywhere) technology for cross-kernel compatibility and best practices for performance monitoring collectors.
The Antimetal Agent uses eBPF for deep kernel observability with minimal overhead. Our eBPF programs are designed for portability across kernel versions 4.18+ using CO-RE technology and the Cilium eBPF library.
- CO-RE (Compile Once - Run Everywhere): Portable eBPF across kernel versions
- BTF (BPF Type Format): Kernel structure information for relocations
- Cilium eBPF: Go library for eBPF program management
- Ring Buffer Maps: High-performance data streaming
- libbpf: User-space eBPF loading and management
ebpf/
├── include/ # Shared header files
│ └── profiler_types.h
├── src/ # eBPF C programs
│ └── profiler.bpf.c
└── build/ # Generated artifacts (gitignored)
pkg/performance/collectors/
├── profiler.go # Main collector
└── profiler_bpfel.go # Generated eBPF bindings
Shared data structures in ebpf/include/
:
// profiler_types.h - Shared between eBPF and userspace
#ifndef PROFILER_TYPES_H
#define PROFILER_TYPES_H
// 32-byte profile event for ring buffer efficiency
struct profile_event {
__u64 timestamp; // 8 bytes - nanoseconds since boot
__s32 pid; // 4 bytes - process ID
__s32 tid; // 4 bytes - thread ID
__s32 user_stack_id; // 4 bytes - stack trace ID
__s32 kernel_stack_id; // 4 bytes - stack trace ID
__u32 cpu; // 4 bytes - CPU number
__u32 flags; // 4 bytes - event flags
} __attribute__((packed));
// Event flags for metadata
#define PROFILE_FLAG_USER_STACK_TRUNCATED (1 << 0)
#define PROFILE_FLAG_KERNEL_STACK_TRUNCATED (1 << 1)
#define PROFILE_FLAG_STACK_COLLISION (1 << 2)
#endif
Example profiler program in ebpf/src/profiler.bpf.c
:
//go:build ignore
#include "vmlinux.h"
#include "profiler_types.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
// Ring buffer for streaming events (8MB capacity)
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
__uint(max_entries, 8 * 1024 * 1024); // 8MB
} events SEC(".maps");
// Stack trace map for call stack collection
struct {
__uint(type, BPF_MAP_TYPE_STACK_TRACE);
__uint(max_entries, 10000);
__uint(key_size, sizeof(__u32));
__uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(__u64));
} stacks SEC(".maps");
// Perf event program attached to PMU events
SEC("perf_event")
int profile(struct bpf_perf_event_data *ctx)
{
struct profile_event *event;
__u64 id = bpf_get_current_pid_tgid();
// Reserve space in ring buffer
event = bpf_ringbuf_reserve(&events, sizeof(*event), 0);
if (!event) {
return 0; // Ring buffer full, drop sample
}
// Populate event data
event->timestamp = bpf_ktime_get_ns();
event->pid = id >> 32;
event->tid = (__u32)id;
event->cpu = bpf_get_smp_processor_id();
event->flags = 0;
// Collect stack traces (optional)
event->user_stack_id = bpf_get_stackid(ctx, &stacks, BPF_F_USER_STACK);
event->kernel_stack_id = bpf_get_stackid(ctx, &stacks, 0);
// Set flags for stack collection status
if (event->user_stack_id < 0) {
event->flags |= PROFILE_FLAG_USER_STACK_TRUNCATED;
event->user_stack_id = -1;
}
if (event->kernel_stack_id < 0) {
event->flags |= PROFILE_FLAG_KERNEL_STACK_TRUNCATED;
event->kernel_stack_id = -1;
}
// Submit event to ring buffer
bpf_ringbuf_submit(event, 0);
return 0;
}
char _license[] SEC("license") = "GPL";
Use bpf2go
to generate Go bindings:
//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -cc clang -cflags "-I../../../ebpf/include -Wall -Werror -g -O2 -D__TARGET_ARCH_x86 -fdebug-types-section -fno-stack-protector" -target bpfel profiler ../../../ebpf/src/profiler.bpf.c -- -I../../../ebpf/include
This generates:
-
profiler_bpfel.go
- eBPF bytecode and loading functions -
profiler_bpfel.o
- Compiled eBPF object file
import (
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/link"
"github.com/cilium/ebpf/ringbuf"
"github.com/cilium/ebpf/rlimit"
)
type ProfilerCollector struct {
objs *profilerObjects // Generated eBPF objects
links []link.Link // Program attachments
ringReader *ringbuf.Reader // Ring buffer reader
}
func (p *ProfilerCollector) Start(ctx context.Context) error {
// Remove memory limit for eBPF
if err := rlimit.RemoveMemlock(); err != nil {
return fmt.Errorf("failed to remove memory limit: %w", err)
}
// Load eBPF spec and objects
spec, err := loadProfiler()
if err != nil {
return fmt.Errorf("failed to load eBPF spec: %w", err)
}
p.objs = &profilerObjects{}
if err := spec.LoadAndAssign(p.objs, nil); err != nil {
return fmt.Errorf("failed to load eBPF objects: %w", err)
}
// Create ring buffer reader
p.ringReader, err = ringbuf.NewReader(p.objs.Events)
if err != nil {
return fmt.Errorf("failed to create ring buffer reader: %w", err)
}
// Attach to perf events
perfLink, err := link.AttachPerfEvent(link.PerfEventOptions{
PerfEvent: &link.PerfEvent{
Type: link.PerfTypeHardware,
Config: link.PerfConfigHardwareCPUCycles,
},
Program: p.objs.Profile,
SampleFreq: 99, // 99Hz sampling
})
if err != nil {
return fmt.Errorf("failed to attach perf event: %w", err)
}
p.links = append(p.links, perfLink)
return nil
}
Key targets in the main Makefile
:
# Build eBPF programs
.PHONY: build-ebpf
build-ebpf:
@echo "Building eBPF programs..."
go generate ./pkg/performance/collectors/...
# Generate eBPF bindings
.PHONY: generate-ebpf-bindings
generate-ebpf-bindings:
@echo "Generating eBPF Go bindings..."
go generate -x ./pkg/performance/collectors/
# Format eBPF C code
.PHONY: fmt.clang
fmt.clang:
find ebpf -name "*.c" -o -name "*.h" | xargs clang-format -i
# Clean eBPF artifacts
.PHONY: clean-ebpf
clean-ebpf:
rm -f pkg/performance/collectors/*_bpfel.go
rm -f pkg/performance/collectors/*_bpfel.o
Ensure required tools are installed:
# Clang and LLVM for eBPF compilation
apt-get install clang llvm
# libbpf development headers
apt-get install libbpf-dev
# BTF support (kernel headers)
apt-get install linux-headers-$(uname -r)
Test eBPF loading and basic functionality:
func TestProfilerLoad(t *testing.T) {
// Load eBPF spec
spec, err := loadProfiler()
require.NoError(t, err)
// Test object loading
objs := &profilerObjects{}
err = spec.LoadAndAssign(objs, nil)
require.NoError(t, err)
defer objs.Close()
// Verify map properties
assert.Equal(t, ebpf.RingBuf, objs.Events.Type())
assert.Equal(t, uint32(8*1024*1024), objs.Events.MaxEntries())
}
Test with real perf events:
func TestProfilerIntegration(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test")
}
collector, err := NewProfilerCollector(logger, config)
require.NoError(t, err)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
eventChan, err := collector.Start(ctx)
require.NoError(t, err)
defer collector.Stop()
// Verify events are received
select {
case event := <-eventChan:
profileEvent := event.(*ProfileEvent)
assert.NotZero(t, profileEvent.Timestamp)
assert.NotZero(t, profileEvent.PID)
case <-time.After(time.Second):
t.Fatal("no events received within timeout")
}
}
Test on bare metal with PMU access:
//go:build hardware
func TestProfilerHardwarePMU(t *testing.T) {
// Verify PMU availability
if !isPerfEventAvailable(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES) {
t.Skip("hardware PMU not available")
}
// Test hardware event attachment
collector, err := NewProfilerCollector(logger, config)
require.NoError(t, err)
// Should succeed on bare metal
_, err = collector.Start(context.Background())
assert.NoError(t, err)
}
-
Efficient Data Structures
- Use packed structs for ring buffer data
- Minimize per-event data size
- Align data structures properly
-
Ring Buffer Sizing
- Size based on event rate and processing latency
- Monitor buffer usage to prevent overflows
- Use non-blocking submission to avoid kernel stalls
-
Stack Collection
- Collect stacks only when needed
- Use appropriate stack depth limits
- Handle stack collection failures gracefully
-
Graceful Degradation
event = bpf_ringbuf_reserve(&events, sizeof(*event), 0); if (!event) { return 0; // Drop sample, don't fail program }
-
Capability Checking
- Check capabilities before loading programs
- Provide actionable error messages
- Suggest alternatives for common failures
-
Resource Cleanup
defer func() { for _, link := range p.links { link.Close() } if p.objs != nil { p.objs.Close() } }()
-
Privilege Requirements
- Document required capabilities clearly
- Use least-privilege principle
- Support unprivileged operation when possible
-
Input Validation
- Validate all user-provided configuration
- Sanitize kernel data before processing
- Use appropriate bounds checking
-
Resource Limits
- Set appropriate map sizes
- Implement rate limiting for high-frequency events
- Monitor memory usage
-
Missing headers: Install
linux-headers-$(uname -r)
- Clang version: Use clang 10+ for full CO-RE support
- BTF missing: Enable BTF in kernel config or install BTF data
-
Permission denied: Check capabilities (
CAP_BPF
,CAP_SYS_ADMIN
) - Program load failed: Check kernel version (4.18+ required)
- Ring buffer full: Increase buffer size or improve processing speed
- High overhead: Reduce sampling frequency or optimize event processing
- Lost events: Increase ring buffer size or use non-blocking operations
- Memory usage: Monitor map sizes and implement appropriate limits
# Check eBPF program status
bpftool prog list
# Inspect eBPF maps
bpftool map list
bpftool map dump id <map_id>
# Debug program loading
strace -e bpf ./your-program
# Monitor kernel logs
dmesg | grep -i bpf
For complex collectors, organize multiple programs:
SEC("perf_event/cpu_cycles")
int profile_cpu_cycles(struct bpf_perf_event_data *ctx) { ... }
SEC("perf_event/instructions")
int profile_instructions(struct bpf_perf_event_data *ctx) { ... }
Implement event filtering in eBPF:
// Process filtering
if (event->pid != target_pid) {
bpf_ringbuf_discard(event, 0);
return 0;
}
Use CO-RE for portable field access:
// Portable field access across kernel versions
struct task_struct *task = (struct task_struct *)bpf_get_current_task();
pid_t pid = BPF_CORE_READ(task, pid);
This guide provides the foundation for developing robust, portable eBPF programs for the Antimetal Agent. Always test across different kernel versions and environments to ensure compatibility.