Introduction

eBPF is not a tool, it’s a technology that helps engineers to create programs that safely execute bytecode next to the Linux kernel in a Virtual Machine. Use cases are enormous, but mainly focuses on Networking, Security, Tracing and Performance analysis.

Libbpf

During this first blog I will give a short introduction in libbpf and how to write a simple bpf program in C. These small programs are portable and also know as BPF CO-RE.

Preparation

Before we can start building our first hello program we must ensure that most prereqs are set. Since this is not a full-blown tutorial I only mark them here.

  • Ensure that the Linux kernel headers are installed and available.
  • Ensure that you have the latest version of Clang/LLVM installed. Clang will be used by our Makefile to compile the actual C program.
  • For compiling everything you must have the libbpf sources available at libbpf-bootstrap/libbpf. Or just customize the Makefile for this.
# eBPF development specific repos for Debian Buster
deb https://deb.debian.org/debian buster-backports main contrib non-free
deb http://apt.llvm.org/buster/ llvm-toolchain-buster main
deb-src http://apt.llvm.org/buster/ llvm-toolchain-buster main
deb http://apt.llvm.org/buster/ llvm-toolchain-buster-10 main
deb-src http://apt.llvm.org/buster/ llvm-toolchain-buster-10 main
deb http://apt.llvm.org/buster/ llvm-toolchain-buster-11 main
deb-src http://apt.llvm.org/buster/ llvm-toolchain-buster-11 main

Hello reader example

Hopefully you have a Linux box ready to create your first minimal “Hello reader” example program. A good start is cloning the libbpf-bootstrap repository. You can already test your development environment by building the example C programs like minimal and bootstrap.

char LICENSE[] SEC("license") = "Dual BSD/GPL";
SEC("tp/syscalls/sys_enter_kill")
int handle_tp(struct trace_event_raw_sys_enter *ctx)
{
int pid = bpf_get_current_pid_tgid() >> 32;
int tpid = ctx->args[0];
int sig = ctx->args[1];
bpf_printk("Hello reader, we have TPID: %d killed with Signal: %d by PID:%d\n", tpid, sig, pid);
return 0;}
name: sys_enter_kill
ID: 174
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:int __syscall_nr; offset:8; size:4; signed:1;
field:pid_t pid; offset:16; size:8; signed:0;
field:int sig; offset:24; size:8; signed:0;
print fmt: "pid: 0x%08lx, sig: 0x%08lx", ((unsigned long)(REC->pid)), ((unsigned long)(REC->sig))
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
/* Copyright (c) 2020 Facebook */
#include <stdio.h>
#include <unistd.h>
#include <sys/resource.h>
#include <bpf/libbpf.h>
#include "hello.skel.h"
static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
{
return vfprintf(stderr, format, args);
}
static void bump_memlock_rlimit(void)
{
struct rlimit rlim_new = {
.rlim_cur = RLIM_INFINITY,
.rlim_max = RLIM_INFINITY,
};
if (setrlimit(RLIMIT_MEMLOCK, &rlim_new)) {
fprintf(stderr, "Failed to increase RLIMIT_MEMLOCK limit!\n");
exit(1);
}
}
int main(int argc, char **argv)
{
struct hello_bpf *skel;
int err;
/* Set up libbpf errors and debug info callback */
libbpf_set_print(libbpf_print_fn);
/* Bump RLIMIT_MEMLOCK to allow BPF sub-system to do anything */
bump_memlock_rlimit();
/* Open BPF application */
skel = hello_bpf__open();
if (!skel) {
fprintf(stderr, "Failed to open BPF skeleton\n");
return 1;
}
/* Load & verify BPF programs */
err = hello_bpf__load(skel);
if (err) {
fprintf(stderr, "Failed to load and verify BPF skeleton\n");
goto cleanup;
}
/* Attach tracepoint handler */
err = hello_bpf__attach(skel);
if (err) {
fprintf(stderr, "Failed to attach BPF skeleton\n");
goto cleanup;
}
printf("Successfully started! Please run `sudo cat /sys/kernel/debug/tracing/trace_pipe` "
"to see output of the BPF programs.\n");
for (;;) {
/* trigger our BPF program */
fprintf(stderr, ".");
sleep(1);
}
cleanup:
hello_bpf__destroy(skel);
return -err;
}
Successfully started! Please run `sudo cat /sys/kernel/debug/tracing/trace_pipe` to see output of the BPF programs
$ top &
[5] 9359
$ kill -9 9359
bash-8946    [000] d... 204503.703946: bpf_trace_printk: Hello reader, we have TPID: 9359 killed with Signal: 9 by PID:8946

hard kill tracer

Now let’s take it to the next step by creating a full-blown executable tool. Again this is based on the bootstrap example.

#include "vmlinux.h"#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
#include "hkill.h"
char LICENSE[] SEC("license") = "Dual BSD/GPL";
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 8192);
__type(key, pid_t);
__type(value, u64);
} exec_start SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
__uint(max_entries, 256 * 1024);
} rb SEC(".maps");
int bad_sig = 9;
SEC("tp/syscalls/sys_enter_kill")
int handle_tp(struct trace_event_raw_sys_enter *ctx)
{
struct event *e;
int pid = bpf_get_current_pid_tgid() >> 32;
u64 ts = bpf_ktime_get_ns();
bpf_map_update_elem(&exec_start, &pid, &ts, BPF_ANY);
int tpid = ctx->args[0];
int sig = ctx->args[1];

/* only catch hard kills */
if (sig != bad_sig)
return 0;

/* reserve sample from BPF ringbuf */
e = bpf_ringbuf_reserve(&rb, sizeof(*e), 0);
if (!e)
return 0;
/* fill out the sample with data */
e->pid = pid;
e->signal = sig;
e->tpid = tpid;
bpf_get_current_comm(&e->comm, sizeof(e->comm));

/* send data to user-space for post-processing */
bpf_ringbuf_submit(e, 0);
return 0;
}
#ifndef __HKILL_H
#define __HKILL_H
#define COMM_LEN 16
struct event {
int pid;
char comm[COMM_LEN];
int tpid;
int signal;
};
#endif /* __HKILL_H */
#include <argp.h>
#include <signal.h>
#include <stdio.h>
#include <time.h>
#include <sys/resource.h>
#include <bpf/libbpf.h>
#include "hkill.h"
#include "hkill.skel.h"
static struct env {
bool verbose;
} env;
const char *argp_program_version = "hkill 0.1";
const char *argp_program_bug_address = "<arnold@avwsolutions.nl>";
const char argp_program_doc[] =
"BPF hkill demo application.\n"
"\n"
"It traces process hard kills and shows associated \n"
"information (Timestamp, causing PID, causing COMM, Target PID and SIGnal level).\n"
"\n"
"USAGE: ./hkill [-v]\n";
static const struct argp_option opts[] = {
{ "verbose", 'v', NULL, 0, "Verbose debug output" },
{},
};
static error_t parse_arg(int key, char *arg, struct argp_state *state)
{
switch (key) {
case 'v':
env.verbose = true;
break;
case ARGP_KEY_ARG:
argp_usage(state);
break;
default:
return ARGP_ERR_UNKNOWN;
}
return 0;
}
static const struct argp argp = {
.options = opts,
.parser = parse_arg,
.doc = argp_program_doc,
};
static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
{
if (level == LIBBPF_DEBUG && !env.verbose)
return 0;
return vfprintf(stderr, format, args);
}
static void bump_memlock_rlimit(void)
{
struct rlimit rlim_new = {
.rlim_cur = RLIM_INFINITY,
.rlim_max = RLIM_INFINITY,
};
if (setrlimit(RLIMIT_MEMLOCK, &rlim_new)) {
fprintf(stderr, "Failed to increase RLIMIT_MEMLOCK limit!\n");
exit(1);
}
}
static volatile bool exiting = false;
static void sig_handler(int sig)
{
exiting = true;
}
static int handle_event(void *ctx, void *data, size_t data_sz)
{
const struct event *e = data;
struct tm *tm;
char ts[32];
time_t t;
time(&t);
tm = localtime(&t);
strftime(ts, sizeof(ts), "%H:%M:%S", tm);
printf("%-8s %-6d %-8s %-8d %-8d\n", ts, e->pid, e->comm, e->tpid, e->signal);
return 0;
}
int main(int argc, char **argv)
{
struct ring_buffer *rb = NULL;
struct hkill_bpf *skel;
int err;
/* Parse command line arguments */
err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
if (err)
return err;
/* Set up libbpf errors and debug info callback */
libbpf_set_print(libbpf_print_fn);
/* Bump RLIMIT_MEMLOCK to create BPF maps */
bump_memlock_rlimit();
/* Cleaner handling of Ctrl-C */
signal(SIGINT, sig_handler);
signal(SIGTERM, sig_handler);
/* Load and verify BPF application */
skel = hkill_bpf__open();
if (!skel) {
fprintf(stderr, "Failed to open and load BPF skeleton\n");
return 1;
}
/* Load & verify BPF programs */
err = hkill_bpf__load(skel);
if (err) {
fprintf(stderr, "Failed to load and verify BPF skeleton\n");
goto cleanup;
}
/* Attach tracepoints */
err = hkill_bpf__attach(skel);
if (err) {
fprintf(stderr, "Failed to attach BPF skeleton\n");
goto cleanup;
}
/* Set up ring buffer polling */
rb = ring_buffer__new(bpf_map__fd(skel->maps.rb), handle_event, NULL, NULL);
if (!rb) {
err = -1;
fprintf(stderr, "Failed to create ring buffer\n");
goto cleanup;
}
/* Process events */
printf("%-8s %-6s %-8s %-8s %-6s\n",
"TIME","PID", "COMM", "TPID", "SIG");
while (!exiting) {
err = ring_buffer__poll(rb, 100 /* timeout, ms */);
/* Ctrl-C will cause -EINTR */
if (err == -EINTR) {
err = 0;
break;
}
if (err < 0) {
printf("Error polling perf buffer: %d\n", err);
break;
}
}
cleanup:
/* Clean up */
ring_buffer__free(rb);
hkill_bpf__destroy(skel);
return err < 0 ? -err : 0;
}
TIME     PID    COMM     TPID     SIG12:34:39 8946   bash     9682     9 
12:35:45 8946 bash 9703 9
12:36:11 8946 bash 9811 9
12:37:30 8946 bash 9823 9
12:37:55 8946 bash 9902 9

More blogs to come

I have also included a hello2 example, which shows how you can extract a readable string, instead of the actual inode with bpf_probe_read_user_str .

Interested in more about libbpf-bootstrap

If you are interested in more things to read and learn about libbpf I really can suggest the blogs of Andrii Nakryiko. Another great source is of course the Github repository and learn from code examples, just how I started to create this blog.

Observability 🥑 @Fullstaq , ex- @Devoteam ; Occasional speaker, I ❤️ #k8s #linux #o11y #ChaosEngineering #SRE #Monitoringlove #CloudNative #DevSecOps