@@ -25,6 +25,61 @@ BPF_RODATA_VAR(u32, task_stack_offset, 0)
2525// The offset of struct pt_regs within the kernel entry stack.
2626BPF_RODATA_VAR (u32 , stack_ptregs_offset , 0 )
2727
28+ // If enabled, the profiler translates host-level PIDs/TGIDs into the
29+ // corresponding IDs within a specific PID namespace. This is essential
30+ // for sidecar deployments to report PIDs consistent with the container's
31+ // internal view (e.g., reporting PID 1 instead of the host PID).
32+ // It requires to have BTF support for the kernel.
33+ BPF_RODATA_VAR (bool , pid_ns_translation_enabled , false)
34+
35+ // The inode number of the target PID namespace.
36+ // Obtained by calling stat() on /proc/[pid]/ns/pid.
37+ BPF_RODATA_VAR (u64 , target_pid_ns_inode , 0 )
38+
39+ // The device ID (st_dev) of the target PID namespace inode.
40+ // Required by the bpf_get_ns_current_pid_tgid helper to uniquely
41+ // identify the namespace filesystem (nsfs) instance.
42+ BPF_RODATA_VAR (u64 , target_pid_ns_dev , 0 )
43+
44+ // Offsets for walking kernel structures to translate host PIDs into a target PID
45+ // namespace (see parseBTFForNsTranslation). Hierarchy of related kernel types:
46+ //
47+ // task_struct nsproxy pid_namespace
48+ // +------------------+ +----------------+ +------------------+
49+ // | nsproxy |---------->| pid_ns_for_ |------>| ns.inum |
50+ // | thread_pid |--+ | children | +------------------+
51+ // | group_leader |--| +----------------+ ^
52+ // +------------------+ | |
53+ // | v |
54+ // | struct pid |
55+ // | +------------------+ |
56+ // +-------->| level | |
57+ // | numbers[] |--+ (array of struct upid)
58+ // +------------------+ | |
59+ // v v
60+ // struct upid struct upid ...
61+ // +----------+ +----------+
62+ // | nr | | nr | (PID value per level)
63+ // +----------+ +----------+
64+ //
65+ // task_struct:
66+ // offset of nsproxy (-> struct nsproxy)
67+ BPF_RODATA_VAR (u32 , task_nsproxy_off , 0 )
68+ // offset of thread_pid (-> struct pid)
69+ BPF_RODATA_VAR (u32 , task_thread_pid_off , 0 )
70+ // offset of group_leader (-> task_struct of main thread)
71+ BPF_RODATA_VAR (u32 , task_group_leader_off , 0 )
72+ // offset of pid_ns_for_children (-> struct pid_namespace)
73+ BPF_RODATA_VAR (u32 , nsproxy_pid_ns_for_children_off , 0 )
74+ // pid_namespace: one per PID namespace; ns.inum is the inode number (e.g. for /proc/pid/ns/pid).
75+ BPF_RODATA_VAR (u32 , pid_ns_inum_off , 0 ) // offset of ns.inum within pid_namespace
76+ // pid: represents a PID across namespace levels; numbers[] has one upid per level.
77+ BPF_RODATA_VAR (u32 , pid_level_off , 0 ) // offset of level
78+ BPF_RODATA_VAR (u32 , pid_numbers_off , 0 ) // offset of numbers (array of struct upid)
79+ // upid: PID value in a single namespace; nr is the numeric PID in that namespace.
80+ BPF_RODATA_VAR (u32 , upid_nr_off , 0 ) // offset of nr within struct upid
81+ BPF_RODATA_VAR (u32 , upid_size , 0 ) // sizeof(struct upid), stride of pid.numbers[]
82+
2883// Macro to create a map named exe_id_to_X_stack_deltas that is a nested maps with a fileID for the
2984// outer map and an array as inner map that holds up to 2^X stack delta entries for the given
3085// fileID.
@@ -607,17 +662,93 @@ static EBPF_INLINE int unwind_native(struct pt_regs *ctx)
607662 return -1 ;
608663}
609664
610- SEC ("perf_event/native_tracer_entry" )
611- int native_tracer_entry (struct bpf_perf_event_data * ctx )
665+ struct ns_pid_info {
666+ u32 ns_inode ;
667+ u32 vpid ;
668+ u32 vtgid ;
669+ };
670+
671+ #ifdef TESTING_COREDUMP
672+ static int get_current_ns_pid_tgid (struct ns_pid_info * pid_info )
612673{
613- // Get the PID and TGID register.
614- u64 id = bpf_get_current_pid_tgid ();
615- u32 pid = id >> 32 ;
616- u32 tid = id & 0xFFFFFFFF ;
674+ * pid_info = (struct ns_pid_info ){0 , 0 , 0 };
675+ return 0 ;
676+ }
677+ #else
678+ // Get namespace inode, virtual PID and virtual TGID using only offsets (no kernel struct defs).
679+ // Offsets must be set at load time via BPF_RODATA;
680+ // Return 0 on success, -1 on failure.
681+ static int get_current_ns_pid_tgid (struct ns_pid_info * pid_info )
682+ {
683+ * pid_info = (struct ns_pid_info ){0 , 0 , 0 };
684+
685+ u64 ptr_val ;
686+ u32 level ;
617687
618- if (pid == 0 && filter_idle_frames ) {
688+ if (! pid_ns_translation_enabled ) {
619689 return 0 ;
620690 }
691+ void * task = (void * )bpf_get_current_task ();
692+ char * t = (char * )task ;
693+
694+ if (bpf_probe_read_kernel (& ptr_val , sizeof (ptr_val ), t + task_nsproxy_off ) == 0 && ptr_val ) {
695+ if (
696+ bpf_probe_read_kernel (
697+ & ptr_val , sizeof (ptr_val ), (char * )ptr_val + nsproxy_pid_ns_for_children_off ) == 0 &&
698+ ptr_val ) {
699+ bpf_probe_read_kernel (
700+ & pid_info -> ns_inode , sizeof (pid_info -> ns_inode ), (char * )ptr_val + pid_ns_inum_off );
701+ }
702+ }
703+
704+ if (bpf_probe_read_kernel (& ptr_val , sizeof (ptr_val ), t + task_thread_pid_off ) != 0 ) {
705+ return -1 ;
706+ }
707+ if (
708+ bpf_probe_read_kernel (& level , sizeof (level ), (char * )ptr_val + pid_level_off ) != 0 ||
709+ level > 32 ) {
710+ return -1 ;
711+ }
712+ bpf_probe_read_kernel (
713+ & pid_info -> vpid ,
714+ sizeof (pid_info -> vpid ),
715+ (char * )ptr_val + pid_numbers_off + level * upid_size + upid_nr_off );
716+
717+ if (bpf_probe_read_kernel (& ptr_val , sizeof (ptr_val ), t + task_group_leader_off ) == 0 && ptr_val ) {
718+ if (
719+ bpf_probe_read_kernel (& ptr_val , sizeof (ptr_val ), (char * )ptr_val + task_thread_pid_off ) ==
720+ 0 &&
721+ ptr_val ) {
722+ bpf_probe_read_kernel (
723+ & pid_info -> vtgid ,
724+ sizeof (pid_info -> vtgid ),
725+ (char * )ptr_val + pid_numbers_off + level * upid_size + upid_nr_off );
726+ }
727+ }
728+ return 0 ;
729+ }
730+ #endif
731+
732+ SEC ("perf_event/native_tracer_entry" )
733+ int native_tracer_entry (struct bpf_perf_event_data * ctx )
734+ {
735+ u32 pid = 0 ;
736+ u32 tid = 0 ;
737+ if (pid_ns_translation_enabled ) {
738+ struct ns_pid_info pid_info = {0 };
739+ if (get_current_ns_pid_tgid (& pid_info ) != 0 ) {
740+ return 0 ;
741+ }
742+ if (pid_info .ns_inode != target_pid_ns_inode ) {
743+ return 0 ;
744+ }
745+ pid = pid_info .vpid ;
746+ tid = pid_info .vtgid ;
747+ } else {
748+ u64 id = bpf_get_current_pid_tgid ();
749+ pid = id >> 32 ;
750+ tid = id & 0xFFFFFFFF ;
751+ }
621752
622753 u64 ts = bpf_ktime_get_ns ();
623754 return collect_trace ((struct pt_regs * )& ctx -> regs , TRACE_SAMPLING , pid , tid , ts , 0 );
0 commit comments