Skip to content

Commit 8580bc3

Browse files
committed
add shared memory prototype
Signed-off-by: vsoch <[email protected]>
1 parent 6adb8a2 commit 8580bc3

File tree

6 files changed

+566
-35
lines changed

6 files changed

+566
-35
lines changed

base-template/docker/bcc-sidecar/programs/cpu/ebpf-collect.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,9 @@ int tp_sched_switch(struct trace_event_raw_sched_switch *ctx) {
118118

119119
wakeup_ts_ptr = task_wakeup_ts.lookup(&next_tid);
120120
if (wakeup_ts_ptr) {
121+
122+
// This means it's ready and wants to use the CPU)
123+
// but was waiting for a CPU core to become available.
121124
u64 runq_latency = current_ts - *wakeup_ts_ptr;
122125
task_wakeup_ts.delete(&next_tid);
123126

base-template/docker/bcc-sidecar/programs/cpu/run-ebpf-collect.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,25 @@
88
import json
99
import signal
1010

11+
# did you ever see a goose, kissing a moose?
12+
# did you ever see a whale with a polkadot tail?
13+
# did you ever see a fly wearing a tie?
14+
# did you ever see a spider drinking a cider?
15+
# did you ever see a teacher kissing a creature?
16+
# did you ever a bear combing his hair
17+
# did you ever see llamas eating their pajamas
18+
# did you ever see a snail delivering the mail?
19+
# did you ever have a time when you couldn't make it rhyme?
20+
21+
# This code effectively reconstructs two important metrics for each scheduling cycle of a thread:
22+
# How long it ran on the CPU.
23+
# How long it waited in the run queue after being woken up before it got to run.
24+
# high runq_latency_ns values from the CPU scheduler script can be a strong symptom of CPU throttling,
25+
# especially if caused by cgroup CPU quotas. It can also indicate general CPU contention due to system overload.
26+
27+
# memory pressure
28+
29+
# if a process is trying to access shared memory and the shared memory is insufficient, too small for what needed, so it required access to another tier because of it. Access to cache. Where we place the pocesses and threads in terms of numa node and bind to shared cache can have a profound impact on certain application performance. NUMA node binding vs access time. How does memory access calls
1130

1231
# Global state
1332
running = True
@@ -24,6 +43,7 @@
2443
if not os.path.exists(filename):
2544
sys.exit(f"Missing c code {filename}")
2645

46+
2747
def get_cgroup_filter(cgroup_indicator_file):
2848
"""
2949
Filtering to a cgroup id can scope the results to one container.
@@ -91,8 +111,10 @@ def print_event(cpu, data, size):
91111
on_cpu_ms = 0.0
92112
runq_latency_ms = 0.0
93113

114+
# This is how long it ran on the cpu
94115
if event.on_cpu_ns > 0:
95116
on_cpu_ms = event.on_cpu_ns / 1000000.0
117+
# This is how long it waited after being woken up
96118
if event.runq_latency_ns > 0:
97119
runq_latency_ms = event.runq_latency_ns / 1000000.0
98120

base-template/docker/bcc-sidecar/programs/futex/run-ebpf-collect.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def get_futex_operation(op_full):
107107
"""
108108
# Basic command part (ignoring FUTEX_CLOCK_REALTIME for this simple string conversion)
109109
# 256 is FUTEX_CLOCK_REALTIME
110-
op_cmd = op_full & ~(FUTEX_PRIVATE_FLAG_PY | 256)
110+
op_cmd = op_full & ~(FUTEX_PRIVATE_FLAG_PY | 256)
111111

112112
s = ""
113113
if op_cmd == FUTEX_WAIT_PY:
Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
// shm_monitor_final.bpf.c
2+
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
3+
4+
// For PT_REGS_PARMx access
5+
#include <uapi/linux/ptrace.h>
6+
// For bpf_trace_printk, often included by bcc itself, but can be explicit if needed
7+
// #include <bcc/proto.h>
8+
9+
#include <uapi/linux/shm.h>
10+
#include <uapi/linux/fcntl.h>
11+
#include <uapi/linux/mman.h>
12+
13+
// Define SEC macro
14+
#ifndef SEC
15+
# define SEC(name) __attribute__((section(name), used))
16+
#endif
17+
18+
typedef unsigned long long u64;
19+
typedef unsigned int u32;
20+
#define TASK_COMM_LEN 16
21+
#define SHM_NAME_LEN 64
22+
23+
// --- Manually define tracepoint context structures ---
24+
struct trace_event_raw_sys_enter {
25+
unsigned long long __unused_common_fields;
26+
long syscall_nr;
27+
unsigned long args[6];
28+
};
29+
30+
struct trace_event_raw_sys_exit {
31+
unsigned long long __unused_common_fields;
32+
long syscall_nr;
33+
long ret;
34+
};
35+
36+
// --- Aggregated Stats Structures ---
37+
struct shm_tgid_stats_t {
38+
u64 shmget_calls;
39+
u64 shmget_success;
40+
u64 shmat_calls;
41+
u64 shmdt_calls;
42+
u64 shmctl_rmid_calls;
43+
u64 total_shmget_size_bytes;
44+
u64 shm_open_calls;
45+
u64 shm_open_success;
46+
u64 shm_unlink_calls;
47+
u64 mmap_shared_calls;
48+
u64 munmap_shared_calls;
49+
u64 total_mmap_shared_size_bytes;
50+
};
51+
52+
BPF_HASH(proc_shm_stats, u32, struct shm_tgid_stats_t);
53+
54+
struct shmget_args_t {
55+
size_t size;
56+
};
57+
BPF_HASH(active_shmgets, u32, struct shmget_args_t);
58+
59+
struct mmap_args_t {
60+
u64 len;
61+
int flags;
62+
int fd;
63+
};
64+
BPF_HASH(active_mmaps, u32, struct mmap_args_t);
65+
66+
struct shm_open_kprobe_args_t {
67+
char name[SHM_NAME_LEN];
68+
};
69+
BPF_HASH(active_shm_opens_kprobe, u32, struct shm_open_kprobe_args_t);
70+
71+
72+
static __always_inline struct shm_tgid_stats_t* get_tgid_stats(u32 tgid) {
73+
struct shm_tgid_stats_t zero_stats = {0};
74+
struct shm_tgid_stats_t *stats = proc_shm_stats.lookup_or_try_init(&tgid, &zero_stats);
75+
if (!stats) {
76+
bpf_trace_printk("BPF_ERR: get_stats failed TGID: %u\n", sizeof("BPF_ERR: get_stats failed TGID: %u\n")-1, tgid);
77+
}
78+
return stats;
79+
}
80+
81+
// --- SysV Shared Memory Syscalls (Using Tracepoints - Unchanged) ---
82+
SEC("tracepoint/syscalls/sys_enter_shmget")
83+
int trace_enter_shmget(struct trace_event_raw_sys_enter* ctx) { /* ... as before ... */
84+
u32 current_tgid = bpf_get_current_pid_tgid() >> 32;
85+
bpf_trace_printk("BPF_TP: enter_shmget TGID: %u\n", sizeof("BPF_TP: enter_shmget TGID: %u\n")-1, current_tgid);
86+
u32 tgid = current_tgid;
87+
u32 tid = (u32)bpf_get_current_pid_tgid();
88+
struct shm_tgid_stats_t *stats = get_tgid_stats(tgid);
89+
if (!stats) return 0;
90+
stats->shmget_calls++;
91+
struct shmget_args_t args = {};
92+
args.size = (size_t)ctx->args[1];
93+
active_shmgets.update(&tid, &args);
94+
return 0;
95+
}
96+
SEC("tracepoint/syscalls/sys_exit_shmget")
97+
int trace_exit_shmget(struct trace_event_raw_sys_exit* ctx) { /* ... as before ... */
98+
u32 current_tgid = bpf_get_current_pid_tgid() >> 32;
99+
long retval = ctx->ret;
100+
bpf_trace_printk("BPF_TP: exit_shmget TGID: %u ret: %ld\n", sizeof("BPF_TP: exit_shmget TGID: %u ret: %ld\n")-1, current_tgid, retval);
101+
u32 tgid = current_tgid;
102+
u32 tid = (u32)bpf_get_current_pid_tgid();
103+
struct shmget_args_t *entry_args = active_shmgets.lookup(&tid);
104+
if (!entry_args) return 0;
105+
if (ctx->ret >= 0) {
106+
struct shm_tgid_stats_t *stats = get_tgid_stats(tgid);
107+
if (!stats) { active_shmgets.delete(&tid); return 0; }
108+
stats->shmget_success++;
109+
stats->total_shmget_size_bytes += entry_args->size;
110+
}
111+
active_shmgets.delete(&tid);
112+
return 0;
113+
}
114+
SEC("tracepoint/syscalls/sys_enter_shmat")
115+
int trace_enter_shmat(struct trace_event_raw_sys_enter* ctx) { /* ... as before ... */
116+
u32 tgid = bpf_get_current_pid_tgid() >> 32;
117+
struct shm_tgid_stats_t *stats = get_tgid_stats(tgid);
118+
if (!stats) return 0;
119+
stats->shmat_calls++;
120+
bpf_trace_printk("BPF_TP: enter_shmat TGID: %u\n", sizeof("BPF_TP: enter_shmat TGID: %u\n")-1, tgid);
121+
return 0;
122+
}
123+
SEC("tracepoint/syscalls/sys_enter_shmdt")
124+
int trace_enter_shmdt(struct trace_event_raw_sys_enter* ctx) { /* ... as before ... */
125+
u32 tgid = bpf_get_current_pid_tgid() >> 32;
126+
struct shm_tgid_stats_t *stats = get_tgid_stats(tgid);
127+
if (!stats) return 0;
128+
stats->shmdt_calls++;
129+
bpf_trace_printk("BPF_TP: enter_shmdt TGID: %u\n", sizeof("BPF_TP: enter_shmdt TGID: %u\n")-1, tgid);
130+
return 0;
131+
}
132+
SEC("tracepoint/syscalls/sys_enter_shmctl")
133+
int trace_enter_shmctl(struct trace_event_raw_sys_enter* ctx) { /* ... as before ... */
134+
int cmd = (int)ctx->args[1];
135+
if (cmd == IPC_RMID) {
136+
u32 tgid = bpf_get_current_pid_tgid() >> 32;
137+
struct shm_tgid_stats_t *stats = get_tgid_stats(tgid);
138+
if (!stats) return 0;
139+
stats->shmctl_rmid_calls++;
140+
bpf_trace_printk("BPF_TP: enter_shmctl (IPC_RMID) TGID: %u\n", sizeof("BPF_TP: enter_shmctl (IPC_RMID) TGID: %u\n")-1, tgid);
141+
}
142+
return 0;
143+
}
144+
145+
146+
// --- POSIX Shared Memory (shm_open, shm_unlink via Kprobes) ---
147+
148+
// Standard kprobe signature: first arg is struct pt_regs *ctx
149+
// Kernel function shm_open(const char *name, int oflag, mode_t mode)
150+
SEC("kprobe/shm_open")
151+
int kp_shm_open(struct pt_regs *ctx) { // Changed signature
152+
u32 tgid = bpf_get_current_pid_tgid() >> 32;
153+
u32 tid = (u32)bpf_get_current_pid_tgid();
154+
bpf_trace_printk("BPF_KP: kp_shm_open TGID: %u\n", sizeof("BPF_KP: kp_shm_open TGID: %u\n")-1, tgid);
155+
156+
struct shm_tgid_stats_t *stats = get_tgid_stats(tgid);
157+
if (!stats) return 0;
158+
stats->shm_open_calls++;
159+
160+
// Access arguments using PT_REGS_PARMx(ctx)
161+
// These macros depend on architecture (e.g. x86_64 uses di, si, dx, cx, r8, r9)
162+
// PT_REGS_PARM1(ctx) is the first argument, PT_REGS_PARM2(ctx) the second, etc.
163+
// The types must match the kernel function's arguments.
164+
const char *name_user_ptr = (const char *)PT_REGS_PARM1(ctx);
165+
// int oflag = (int)PT_REGS_PARM2(ctx); // If needed
166+
// mode_t mode = (mode_t)PT_REGS_PARM3(ctx); // If needed
167+
168+
169+
struct shm_open_kprobe_args_t k_args = {};
170+
bpf_probe_read_user_str(&k_args.name, sizeof(k_args.name), (void *)name_user_ptr);
171+
active_shm_opens_kprobe.update(&tid, &k_args);
172+
173+
return 0;
174+
}
175+
176+
// Standard kretprobe signature: first arg is struct pt_regs *ctx
177+
// The return value of the probed function is in PT_REGS_RC(ctx)
178+
SEC("kretprobe/shm_open")
179+
int krp_shm_open(struct pt_regs *ctx) { // Changed signature
180+
int ret_val = (int)PT_REGS_RC(ctx); // Get return value
181+
182+
u32 tgid = bpf_get_current_pid_tgid() >> 32;
183+
u32 tid = (u32)bpf_get_current_pid_tgid();
184+
bpf_trace_printk("BPF_KP: krp_shm_open TGID: %u, ret: %d\n", sizeof("BPF_KP: krp_shm_open TGID: %u, ret: %d\n")-1, tgid, ret_val);
185+
186+
struct shm_open_kprobe_args_t *k_args __attribute__((unused)) = active_shm_opens_kprobe.lookup(&tid);
187+
active_shm_opens_kprobe.delete(&tid);
188+
189+
if (ret_val >= 0) { // Success (ret_val is fd)
190+
struct shm_tgid_stats_t *stats = get_tgid_stats(tgid);
191+
if (!stats) return 0;
192+
stats->shm_open_success++;
193+
}
194+
return 0;
195+
}
196+
197+
// Kernel function shm_unlink(const char *name)
198+
SEC("kprobe/shm_unlink")
199+
int kp_shm_unlink(struct pt_regs *ctx) { // Changed signature
200+
u32 tgid = bpf_get_current_pid_tgid() >> 32;
201+
bpf_trace_printk("BPF_KP: kp_shm_unlink TGID: %u\n", sizeof("BPF_KP: kp_shm_unlink TGID: %u\n")-1, tgid);
202+
203+
// const char *name_user_ptr = (const char *)PT_REGS_PARM1(ctx); // If you need to read the name
204+
// char name_buf[SHM_NAME_LEN];
205+
// bpf_probe_read_user_str(&name_buf, sizeof(name_buf), (void *)name_user_ptr);
206+
// bpf_trace_printk("BPF_KP: shm_unlink path: %s (cannot print directly)\n", ...);
207+
208+
209+
struct shm_tgid_stats_t *stats = get_tgid_stats(tgid);
210+
if (!stats) return 0;
211+
stats->shm_unlink_calls++;
212+
return 0;
213+
}
214+
215+
216+
// --- mmap/munmap (Using Tracepoints - Unchanged) ---
217+
SEC("tracepoint/syscalls/sys_enter_mmap")
218+
int trace_enter_mmap(struct trace_event_raw_sys_enter* ctx) { /* ... as before ... */
219+
int flags_arg = (int)ctx->args[3];
220+
if (!(flags_arg & MAP_SHARED)) {
221+
return 0;
222+
}
223+
u32 current_tgid = bpf_get_current_pid_tgid() >> 32;
224+
bpf_trace_printk("BPF_TP: enter_mmap (SHARED) TGID: %u, flags: 0x%x\n", sizeof("BPF_TP: enter_mmap (SHARED) TGID: %u, flags: 0x%x\n")-1, current_tgid, (u64)flags_arg);
225+
u32 tid = (u32)bpf_get_current_pid_tgid();
226+
struct mmap_args_t args = {};
227+
args.len = (u64)ctx->args[1];
228+
args.flags = flags_arg;
229+
args.fd = (int)ctx->args[4];
230+
active_mmaps.update(&tid, &args);
231+
return 0;
232+
}
233+
SEC("tracepoint/syscalls/sys_exit_mmap")
234+
int trace_exit_mmap(struct trace_event_raw_sys_exit* ctx) { /* ... as before ... */
235+
u32 tgid = bpf_get_current_pid_tgid() >> 32;
236+
u32 tid = (u32)bpf_get_current_pid_tgid();
237+
struct mmap_args_t *entry_args = active_mmaps.lookup(&tid);
238+
if (!entry_args) return 0;
239+
if (!(entry_args->flags & MAP_SHARED)) {
240+
active_mmaps.delete(&tid);
241+
return 0;
242+
}
243+
u64 ret_val_u64 = (u64)ctx->ret;
244+
bpf_trace_printk("BPF_TP: exit_mmap (SHARED) TGID: %u, ret: 0x%lx\n", sizeof("BPF_TP: exit_mmap (SHARED) TGID: %u, ret: 0x%lx\n")-1, tgid, ret_val_u64);
245+
if (ret_val_u64 != (u64)-1L) {
246+
struct shm_tgid_stats_t *stats = get_tgid_stats(tgid);
247+
if (!stats) { active_mmaps.delete(&tid); return 0; }
248+
stats->mmap_shared_calls++;
249+
stats->total_mmap_shared_size_bytes += entry_args->len;
250+
}
251+
active_mmaps.delete(&tid);
252+
return 0;
253+
}
254+
SEC("tracepoint/syscalls/sys_enter_munmap") // Corrected closing brace was missing in previous snippet
255+
int trace_enter_munmap(struct trace_event_raw_sys_enter* ctx) { /* ... as before ... */
256+
u32 tgid = bpf_get_current_pid_tgid() >> 32;
257+
struct shm_tgid_stats_t *stats = get_tgid_stats(tgid);
258+
if (!stats) return 0;
259+
stats->munmap_shared_calls++;
260+
bpf_trace_printk("BPF_TP: enter_munmap TGID: %u\n", sizeof("BPF_TP: enter_munmap TGID: %u\n")-1, tgid);
261+
return 0;
262+
} // <--- Added missing brace
263+

0 commit comments

Comments
 (0)