|
| 1 | +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) |
| 2 | + |
| 3 | +// Define SEC macro because got error and not provided by BCC |
| 4 | +#ifndef SEC |
| 5 | +# define SEC(name) __attribute__((section(name), used)) |
| 6 | +#endif |
| 7 | + |
| 8 | +// Basic types, another import error... |
| 9 | +typedef unsigned long long u64; |
| 10 | +typedef unsigned int u32; |
| 11 | +typedef unsigned char u8; |
| 12 | +// pid_t is usually defined by system headers included by BCC |
| 13 | + |
| 14 | +#define TASK_COMM_LEN 16 |
| 15 | + |
| 16 | +// --- Manually defined tracepoint context structures (from your original script) --- |
| 17 | +struct trace_event_raw_sched_switch { |
| 18 | + unsigned long long __unused_header; |
| 19 | + char prev_comm[TASK_COMM_LEN]; |
| 20 | + pid_t prev_pid; |
| 21 | + int prev_prio; |
| 22 | + long prev_state; |
| 23 | + char next_comm[TASK_COMM_LEN]; |
| 24 | + pid_t next_pid; |
| 25 | + int next_prio; |
| 26 | +}; |
| 27 | + |
| 28 | +struct trace_event_raw_sched_wakeup { |
| 29 | + unsigned long long __unused_header; |
| 30 | + char comm[TASK_COMM_LEN]; |
| 31 | + pid_t pid; |
| 32 | + int prio; |
| 33 | + int success; |
| 34 | + int target_cpu; |
| 35 | +}; |
| 36 | + |
| 37 | +// Event types sent to user-space |
| 38 | +#define EVENT_TYPE_SCHED_STATS 1 |
| 39 | + |
| 40 | +// Data structure for events sent to user-space |
| 41 | +struct sched_event_data { |
| 42 | + u64 timestamp_ns; |
| 43 | + u32 tgid; |
| 44 | + u32 tid; |
| 45 | + u64 cgroup_id; |
| 46 | + char comm[TASK_COMM_LEN]; |
| 47 | + u64 on_cpu_ns; |
| 48 | + u64 runq_latency_ns; |
| 49 | + u8 event_type; |
| 50 | + u8 prev_state_task_switched_out; |
| 51 | +}; |
| 52 | + |
| 53 | +// --- BCC Style Map Definitions --- |
| 54 | +BPF_HASH(task_scheduled_in_ts, pid_t, u64, 10240); |
| 55 | +BPF_HASH(task_wakeup_ts, pid_t, u64, 10240); |
| 56 | + |
| 57 | +// Use BCC's perf output mechanism |
| 58 | +BPF_PERF_OUTPUT(events_out); |
| 59 | + |
| 60 | + |
| 61 | +// --- BPF Program Functions --- |
| 62 | + |
| 63 | +SEC("tracepoint/sched/sched_wakeup") |
| 64 | +int tp_sched_wakeup(struct trace_event_raw_sched_wakeup *ctx) { |
| 65 | + pid_t tid = ctx->pid; |
| 66 | + u64 ts = bpf_ktime_get_ns(); |
| 67 | + task_wakeup_ts.update(&tid, &ts); |
| 68 | + return 0; |
| 69 | +} |
| 70 | + |
| 71 | +SEC("tracepoint/sched/sched_wakeup_new") |
| 72 | +int tp_sched_wakeup_new(struct trace_event_raw_sched_wakeup *ctx) { |
| 73 | + pid_t tid = ctx->pid; |
| 74 | + u64 ts = bpf_ktime_get_ns(); |
| 75 | + task_wakeup_ts.update(&tid, &ts); |
| 76 | + return 0; |
| 77 | +} |
| 78 | + |
| 79 | +SEC("tracepoint/sched/sched_switch") |
| 80 | +int tp_sched_switch(struct trace_event_raw_sched_switch *ctx) { |
| 81 | + u64 current_ts = bpf_ktime_get_ns(); |
| 82 | + pid_t prev_tid = ctx->prev_pid; |
| 83 | + pid_t next_tid = ctx->next_pid; |
| 84 | + // This initializes to zero |
| 85 | + struct sched_event_data data = {}; |
| 86 | + u64 *scheduled_in_ts_ptr; |
| 87 | + u64 *wakeup_ts_ptr; |
| 88 | + |
| 89 | + // --- Handle previous task switching out --- |
| 90 | + scheduled_in_ts_ptr = task_scheduled_in_ts.lookup(&prev_tid); |
| 91 | + |
| 92 | + if (scheduled_in_ts_ptr) { |
| 93 | + u64 on_cpu_duration = current_ts - *scheduled_in_ts_ptr; |
| 94 | + task_scheduled_in_ts.delete(&prev_tid); |
| 95 | + |
| 96 | + data.timestamp_ns = current_ts; |
| 97 | + data.tgid = prev_tid; // Note: This is TID |
| 98 | + data.tid = prev_tid; |
| 99 | + |
| 100 | + for (int i = 0; i < TASK_COMM_LEN; ++i) { |
| 101 | + data.comm[i] = ctx->prev_comm[i]; |
| 102 | + if (ctx->prev_comm[i] == '\0') break; |
| 103 | + } |
| 104 | + data.comm[TASK_COMM_LEN - 1] = '\0'; |
| 105 | + |
| 106 | + data.cgroup_id = bpf_get_current_cgroup_id(); |
| 107 | + data.on_cpu_ns = on_cpu_duration; |
| 108 | + data.runq_latency_ns = 0; |
| 109 | + data.event_type = EVENT_TYPE_SCHED_STATS; |
| 110 | + data.prev_state_task_switched_out = (u8)ctx->prev_state; |
| 111 | + |
| 112 | + events_out.perf_submit(ctx, &data, sizeof(data)); |
| 113 | + } |
| 114 | + |
| 115 | + // --- Handle next task switching in --- |
| 116 | + u64 current_ts_val = current_ts; |
| 117 | + task_scheduled_in_ts.update(&next_tid, ¤t_ts_val); |
| 118 | + |
| 119 | + wakeup_ts_ptr = task_wakeup_ts.lookup(&next_tid); |
| 120 | + if (wakeup_ts_ptr) { |
| 121 | + u64 runq_latency = current_ts - *wakeup_ts_ptr; |
| 122 | + task_wakeup_ts.delete(&next_tid); |
| 123 | + |
| 124 | + struct sched_event_data data_next = {}; |
| 125 | + data_next.timestamp_ns = current_ts; |
| 126 | + data_next.tgid = next_tid; // Note: This is TID |
| 127 | + data_next.tid = next_tid; |
| 128 | + |
| 129 | + for (int i = 0; i < TASK_COMM_LEN; ++i) { |
| 130 | + data_next.comm[i] = ctx->next_comm[i]; |
| 131 | + if (ctx->next_comm[i] == '\0') break; |
| 132 | + } |
| 133 | + data_next.comm[TASK_COMM_LEN - 1] = '\0'; |
| 134 | + |
| 135 | + data_next.cgroup_id = bpf_get_current_cgroup_id(); |
| 136 | + data_next.on_cpu_ns = 0; |
| 137 | + data_next.runq_latency_ns = runq_latency; |
| 138 | + data_next.event_type = EVENT_TYPE_SCHED_STATS; |
| 139 | + data_next.prev_state_task_switched_out = 0; |
| 140 | + |
| 141 | + events_out.perf_submit(ctx, &data_next, sizeof(data_next)); |
| 142 | + } |
| 143 | + return 0; |
| 144 | +} |
0 commit comments