Skip to content

Commit 03f7330

Browse files
committed
add ebpf test program
Signed-off-by: vsoch <[email protected]>
1 parent e6bc146 commit 03f7330

File tree

8 files changed

+428
-26
lines changed

8 files changed

+428
-26
lines changed

README.md

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,32 @@ This example will walk through running lammps. Other example runs are [also prov
4444
4545
### 1. Setup the Cluster
4646
47+
For simple local development:
48+
4749
```bash
4850
# Create the cluster
4951
kind create cluster --config ./kind-config.yaml
52+
```
53+
54+
For ebpf (that requires mounting the host) I recommend a cloud:
55+
56+
```bash
57+
NODES=2
58+
GOOGLE_PROJECT=myproject
59+
INSTANCE=h3-standard-88
60+
61+
time gcloud container clusters create test-cluster \
62+
--threads-per-core=1 \
63+
--num-nodes=$NODES \
64+
--machine-type=$INSTANCE \
65+
--placement-type=COMPACT \
66+
--image-type=UBUNTU_CONTAINERD \
67+
--region=us-central1-a --project=${GOOGLE_PROJECT}
68+
```
69+
70+
Finally, install the Flux Operator
5071

72+
```bash
5173
# Install the Flux Operator
5274
kubectl apply -f https://raw.githubusercontent.com/flux-framework/flux-operator/refs/heads/main/examples/dist/flux-operator.yaml
5375
```
@@ -115,9 +137,10 @@ helm install lammps lammps-reax/ --debug --dry-run
115137
Then install the chart. This will deploy the Flux MiniCluster and run lammps for some number of iterations. All variables are technically defined so you don't need any `--set`.
116138

117139
```bash
140+
container=$(ocifit ghcr.io/converged-computing/lammps-reax --instance)
118141
helm install \
119142
--set minicluster.size=1 \
120-
--set minicluster.image=ghcr.io/converged-computing/metric-lammps-cpu:zen4-reax \
143+
--set minicluster.image= \
121144
--set minicluster.addFlux=true \
122145
lammps ./lammps-reax
123146
```
@@ -534,6 +557,21 @@ helm install \
534557
lammps ./lammps-reax
535558
```
536559

560+
You'll need to look at the logs to see the sidecar vs. lammps.
561+
562+
```bash
563+
kubeclt lo
564+
```
565+
566+
Try changing the command:
567+
568+
```bash
569+
helm install \
570+
--set experiment.monitor=true \
571+
--set minicluster.save_logs=true \
572+
--set minicluster.monitor_command="tcplife-bpfcc -stT" \
573+
lammps ./lammps-reax
574+
```
537575

538576
##### 4. Recording
539577

base-template/docker/bcc-sidecar/Dockerfile

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
FROM ubuntu:22.04
2-
1+
FROM ubuntu:24.04
32
ENV DEBIAN_FRONTEND=noninteractive
43

54
RUN apt-get update && \
@@ -11,19 +10,15 @@ RUN apt-get update && \
1110
cmake \
1211
flex \
1312
libedit-dev \
14-
libllvm15 \
15-
llvm-15-dev \
16-
llvm-15-tools \
17-
libclang-15-dev \
18-
clang-15 \
13+
libllvm-18-ocaml-dev libpolly-18-dev libllvm18 llvm-18-dev llvm-18-tools \
14+
libclang-18-dev clang-18 \
1915
zlib1g-dev \
2016
libelf-dev \
2117
libfl-dev \
2218
pkg-config \
2319
python3 \
2420
python3-pip \
2521
python3-setuptools \
26-
python3-distutils \
2722
netcat-openbsd \
2823
iperf3 \
2924
netperf \
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
#include <uapi/linux/ptrace.h>
2+
#include <linux/sched.h>
3+
4+
#define MAX_FILENAME_LEN_EBPF 256
5+
#define TASK_COMM_LEN_EBPF 16
6+
7+
enum event_type { EVENT_OPEN = 0, EVENT_CLOSE = 1 };
8+
9+
struct data_t {
10+
u64 timestamp_ns; u32 pid; char comm[TASK_COMM_LEN_EBPF];
11+
enum event_type type; char filename[MAX_FILENAME_LEN_EBPF];
12+
int fd; int ret_val;
13+
};
14+
BPF_RINGBUF_OUTPUT(events, 8);
15+
16+
struct temp_filename_t { char fname[MAX_FILENAME_LEN_EBPF]; };
17+
BPF_HASH(open_filenames_map, u64, struct temp_filename_t);
18+
19+
enum debug_stage {
20+
DBG_OPEN_ENTRY_START = 100, DBG_OPEN_ENTRY_READ_DONE = 101,
21+
DBG_OPEN_RETURN_START = 200, DBG_OPEN_RETURN_LOOKUP_DONE = 201,
22+
};
23+
struct debug_event_t { u64 id; enum debug_stage stage; long val1; long val2; };
24+
BPF_RINGBUF_OUTPUT(debug_events_rb, 4);
25+
26+
TRACEPOINT_PROBE(syscalls, sys_enter_openat) {
27+
u64 id = bpf_get_current_pid_tgid();
28+
struct temp_filename_t temp_fn_data = {};
29+
long read_res = 0;
30+
const char __user *filename_ptr_from_args = (const char __user *)args->filename;
31+
32+
struct debug_event_t *dbg_evt = debug_events_rb.ringbuf_reserve(sizeof(struct debug_event_t));
33+
if (dbg_evt) {
34+
dbg_evt->id = id; dbg_evt->stage = DBG_OPEN_ENTRY_START;
35+
dbg_evt->val1 = 0; dbg_evt->val2 = 0;
36+
debug_events_rb.ringbuf_submit(dbg_evt, 0);
37+
}
38+
39+
read_res = bpf_probe_read_user_str(&temp_fn_data.fname, sizeof(temp_fn_data.fname), (void *)filename_ptr_from_args);
40+
41+
dbg_evt = debug_events_rb.ringbuf_reserve(sizeof(struct debug_event_t));
42+
if (dbg_evt) {
43+
dbg_evt->id = id; dbg_evt->stage = DBG_OPEN_ENTRY_READ_DONE;
44+
dbg_evt->val1 = read_res; dbg_evt->val2 = (read_res > 0) ? 1 : 0;
45+
debug_events_rb.ringbuf_submit(dbg_evt, 0);
46+
}
47+
48+
if (read_res <= 0) { return 0; }
49+
temp_fn_data.fname[MAX_FILENAME_LEN_EBPF - 1] = '\0'; // CORRECTED
50+
open_filenames_map.update(&id, &temp_fn_data);
51+
return 0;
52+
}
53+
54+
int trace_openat_return_kretprobe(struct pt_regs *ctx) {
55+
u64 id = bpf_get_current_pid_tgid();
56+
int ret_fd = PT_REGS_RC(ctx);
57+
struct temp_filename_t *temp_fn_ptr = NULL;
58+
long lookup_success = 0;
59+
60+
struct debug_event_t *dbg_evt = debug_events_rb.ringbuf_reserve(sizeof(struct debug_event_t));
61+
if (dbg_evt) {
62+
dbg_evt->id = id; dbg_evt->stage = DBG_OPEN_RETURN_START;
63+
dbg_evt->val1 = ret_fd; dbg_evt->val2 = 0;
64+
debug_events_rb.ringbuf_submit(dbg_evt, 0);
65+
}
66+
67+
if (ret_fd < 0) {
68+
open_filenames_map.delete(&id);
69+
return 0;
70+
}
71+
72+
temp_fn_ptr = open_filenames_map.lookup(&id);
73+
lookup_success = (temp_fn_ptr != NULL) ? 1 : 0;
74+
75+
dbg_evt = debug_events_rb.ringbuf_reserve(sizeof(struct debug_event_t));
76+
if (dbg_evt) {
77+
dbg_evt->id = id; dbg_evt->stage = DBG_OPEN_RETURN_LOOKUP_DONE;
78+
dbg_evt->val1 = ret_fd; dbg_evt->val2 = lookup_success;
79+
debug_events_rb.ringbuf_submit(dbg_evt, 0);
80+
}
81+
82+
if (!temp_fn_ptr) { return 0; }
83+
84+
struct data_t *event_data_ptr = events.ringbuf_reserve(sizeof(struct data_t));
85+
if (!event_data_ptr) {
86+
open_filenames_map.delete(&id);
87+
return 0;
88+
}
89+
event_data_ptr->timestamp_ns = bpf_ktime_get_ns();
90+
event_data_ptr->pid = id >> 32;
91+
bpf_get_current_comm(&event_data_ptr->comm, sizeof(event_data_ptr->comm));
92+
event_data_ptr->comm[TASK_COMM_LEN_EBPF - 1] = '\0'; // CORRECTED
93+
event_data_ptr->type = EVENT_OPEN;
94+
event_data_ptr->fd = ret_fd;
95+
event_data_ptr->ret_val = ret_fd;
96+
__builtin_memcpy(event_data_ptr->filename, temp_fn_ptr->fname, MAX_FILENAME_LEN_EBPF);
97+
event_data_ptr->filename[MAX_FILENAME_LEN_EBPF - 1] = '\0'; // CORRECTED
98+
events.ringbuf_submit(event_data_ptr, 0);
99+
open_filenames_map.delete(&id);
100+
return 0;
101+
}
102+
103+
int trace_close_entry_kprobe(struct pt_regs *ctx, int fd_to_close) {
104+
u64 id = bpf_get_current_pid_tgid();
105+
struct data_t *event_data_ptr = events.ringbuf_reserve(sizeof(struct data_t));
106+
if (!event_data_ptr) { return 0; }
107+
event_data_ptr->timestamp_ns = bpf_ktime_get_ns();
108+
event_data_ptr->pid = id >> 32;
109+
bpf_get_current_comm(&event_data_ptr->comm, sizeof(event_data_ptr->comm));
110+
event_data_ptr->comm[TASK_COMM_LEN_EBPF - 1] = '\0'; // CORRECTED
111+
event_data_ptr->type = EVENT_CLOSE;
112+
event_data_ptr->fd = fd_to_close;
113+
event_data_ptr->filename[0] = '\0'; // CORRECTED
114+
event_data_ptr->ret_val = 0;
115+
events.ringbuf_submit(event_data_ptr, 0);
116+
return 0;
117+
}

0 commit comments

Comments
 (0)