Skip to content

Instantly share code, notes, and snippets.

@ii64
Created February 16, 2022 22:00
Show Gist options
  • Save ii64/1de9b90308ce10654bb7767c7e4d4558 to your computer and use it in GitHub Desktop.
Save ii64/1de9b90308ce10654bb7767c7e4d4558 to your computer and use it in GitHub Desktop.
Go ebpf tracepoint example of kmem:mm_page_alloc , syscalls:sys_enter_open, and syscalls:sys_enter_openat
//go:build linux
// +build linux
// This program demonstrates attaching an eBPF program to a kernel tracepoint.
// The eBPF program will be attached to the page allocation tracepoint and
// prints out the number of times it has been reached. The tracepoint fields
// are printed into /sys/kernel/debug/tracing/trace_pipe.
package main
import (
"errors"
"fmt"
"log"
"time"
"unsafe"
"github.com/cilium/ebpf/link"
"github.com/cilium/ebpf/ringbuf"
"github.com/cilium/ebpf/rlimit"
"golang.org/x/sys/unix"
)
// $BPF_CLANG and $BPF_CFLAGS are set by the Makefile.
//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -cc $BPF_CLANG -cflags $BPF_CFLAGS bpf bpf_obj/tracepoint.c -- -I../headers
type Event struct {
pid uint64
syscallNr uint64
flags uint64
mode uint64
filename [64]byte
}
func (e Event) String() string {
return fmt.Sprintf("event { {tgid:%d, pid:%d}, syscallNr:%+#v, flags:%+#v, mode:%+#v, filename:%q}",
e.pid>>32, e.pid&0xffff,
e.syscallNr,
e.flags,
e.mode,
unix.ByteSliceToString(e.filename[:]),
)
}
const __sz_event = unsafe.Sizeof(Event{})
func (e *Event) UnmarshalBinary(b []byte) {
if len(b) != int(__sz_event) {
log.Fatalf("expected %d got %d", __sz_event, len(b))
return
}
*e = *(*Event)(unsafe.Pointer(&b[0]))
}
const mapKey uint32 = 0
const mapKey1 uint32 = 1
const mapKey2 uint32 = 2
var objs = bpfObjects{}
func syscallOpenAt() {
var mapKey = mapKey1
sy, err := link.Tracepoint("syscalls", "sys_enter_openat", objs.SysEnterOpenat)
if err != nil {
panic(err)
}
defer sy.Close()
fmt.Printf("%+#v\n", sy)
ticker := time.NewTicker(1 * time.Second)
log.Println("Waiting for events..")
for range ticker.C {
var value uint64
if err := objs.CountingMap.Lookup(mapKey, &value); err != nil {
// log.Fatalf("reading map: %v", err)
continue
}
log.Printf("key: %d - %v times", mapKey, value)
}
}
func syscallOpen() {
var mapKey = mapKey2
sy, err := link.Tracepoint("syscalls", "sys_enter_open", objs.SysEnterOpen)
if err != nil {
panic(err)
}
defer sy.Close()
fmt.Printf("%+#v\n", sy)
ticker := time.NewTicker(1 * time.Second)
log.Println("Waiting for events..")
go func() {
rd, err := ringbuf.NewReader(objs.bpfMaps.Events)
if err != nil {
log.Fatal(err)
}
defer rd.Close()
var ev Event
for {
rec, err := rd.Read()
if err != nil {
if errors.Is(err, ringbuf.ErrClosed) {
log.Println("received signal, exiting...")
return
}
log.Printf("reading from reader: %s\n", err)
continue
}
ev.UnmarshalBinary(rec.RawSample)
log.Printf("events: %s\n", ev.String())
}
}()
for range ticker.C {
var value uint64
if err := objs.CountingMap.Lookup(mapKey, &value); err != nil {
// log.Fatalf("reading map: %v", err)
continue
}
log.Printf("key: %d - %v times", mapKey, value)
}
}
func main() {
// Allow the current process to lock memory for eBPF resources.
if err := rlimit.RemoveMemlock(); err != nil {
log.Fatal(err)
}
// Load pre-compiled programs and maps into the kernel.
if err := loadBpfObjects(&objs, nil); err != nil {
log.Fatalf("loading objects: %v", err)
}
defer objs.Close()
go syscallOpenAt()
go syscallOpen()
// Open a tracepoint and attach the pre-compiled program. Each time
// the kernel function enters, the program will increment the execution
// counter by 1. The read loop below polls this map value once per
// second.
// The first two arguments are taken from the following pathname:
// /sys/kernel/debug/tracing/events/kmem/mm_page_alloc
kp, err := link.Tracepoint("kmem", "mm_page_alloc", objs.MmPageAlloc)
if err != nil {
log.Fatalf("opening tracepoint: %s", err)
}
defer kp.Close()
fmt.Printf("%+#v\n", kp)
// Read loop reporting the total amount of times the kernel
// function was entered, once per second.
ticker := time.NewTicker(1 * time.Second)
log.Println("Waiting for events..")
for range ticker.C {
var value uint64
if err := objs.CountingMap.Lookup(mapKey, &value); err != nil {
// log.Fatalf("reading map: %v", err)
continue
}
log.Printf("key: %d - %v times", mapKey, value)
}
}
// +build ignore
#include "common.h"
#include "bpf_helpers.h"
char __license[] SEC("license") = "Dual MIT/GPL";
struct bpf_map_def SEC("maps") counting_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(u32),
.value_size = sizeof(u64),
.max_entries = 3,
};
// This struct is defined according to the following format file:
// /sys/kernel/debug/tracing/events/kmem/mm_page_alloc/format
struct alloc_info
{
/* The first 8 bytes is not allowed to read */
unsigned long pad;
unsigned long pfn;
unsigned int order;
unsigned int gfp_flags;
int migratetype;
};
// This tracepoint is defined in mm/page_alloc.c:__alloc_pages_nodemask()
// Userspace pathname: /sys/kernel/debug/tracing/events/kmem/mm_page_alloc
SEC("tracepoint/kmem/mm_page_alloc")
int mm_page_alloc(struct alloc_info *info)
{
u32 key = 0;
u64 initval = 1, *valp;
valp = bpf_map_lookup_elem(&counting_map, &key);
if (!valp)
{
bpf_map_update_elem(&counting_map, &key, &initval, BPF_ANY);
return 0;
}
__sync_fetch_and_add(valp, 1);
return 0;
}
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
__uint(max_entries, 1 << 24);
} events SEC(".maps");
// https://github.com/torvalds/linux/blob/master/samples/bpf/syscall_tp_kern.c
struct syscalls_enter_open_args
{
unsigned long long unused;
long syscall_nr;
long filename_ptr;
long flags;
long mode;
};
struct syscalls_exit_open_args
{
unsigned long long unused;
long syscall_nr;
long ret;
};
struct event_t {
u64 pid;
u64 syscall_nr;
u64 flags;
u64 mode;
char filename[64];
};
inline int klog_event(struct syscalls_enter_open_args *ctx) {
struct event_t *event;
char *fname = (char *)(ctx->filename_ptr);
event = bpf_ringbuf_reserve(&events, sizeof(struct event_t), 0);
if (!event) {
return -1;
}
event->pid = bpf_get_current_pid_tgid();
event->syscall_nr = ctx->syscall_nr;
event->flags = ctx->flags;
event->mode = ctx->mode;
if (fname) {
bpf_probe_read_str(&event->filename, sizeof(event->filename), fname);
}
bpf_ringbuf_submit(event, 0);
return 0;
}
SEC("tracepoint/syscalls/sys_enter_openat")
int sys_enter_openat(struct syscalls_enter_open_args *ctx)
{
u32 key = 1;
u64 initval = 1, *valp;
// klog_event(ctx);
valp = bpf_map_lookup_elem(&counting_map, &key);
if (!valp)
{
bpf_map_update_elem(&counting_map, &key, &initval, BPF_ANY);
return 0;
}
__sync_fetch_and_add(valp, 1);
return 0;
}
SEC("tracepoint/syscalls/sys_enter_open")
int sys_enter_open(struct syscalls_enter_open_args *ctx)
{
u32 key = 2;
u64 initval = 1, *valp;
klog_event(ctx);
valp = bpf_map_lookup_elem(&counting_map, &key);
if (!valp)
{
bpf_map_update_elem(&counting_map, &key, &initval, BPF_ANY);
return 0;
}
__sync_fetch_and_add(valp, 1);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment