1
1
// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/
2
2
// SPDX-License-Identifier: Apache-2.0
3
3
4
+ //! Crash data collector process management for Unix socket communication.
5
+ //!
6
+ //! This module manages the collector process that writes crash data to Unix sockets.
7
+ //! The collector runs in a forked child process and is responsible for serializing
8
+ //! and transmitting crash information to the receiver process.
9
+ //!
10
+ //! ## Communication Flow (Collector Side)
11
+ //!
12
+ //! The collector performs these steps to transmit crash data:
13
+ //!
14
+ //! 1. **Process Setup**: Forks from crashing process, closes stdio, disables SIGPIPE
15
+ //! 2. **Socket Creation**: Creates `UnixStream` from inherited file descriptor
16
+ //! 3. **Data Serialization**: Calls [`emit_crashreport()`] to write structured crash data
17
+ //! 4. **Graceful Exit**: Flushes data and exits with `libc::_exit(0)`
18
+ //!
19
+ //! ```text
20
+ //! ┌─────────────────────┐ ┌──────────────────────┐
21
+ //! │ Signal Handler │ │ Collector Process │
22
+ //! │ (Original Process) │ │ (Forked Child) │
23
+ //! │ │ │ │
24
+ //! │ 1. Catch crash │────fork()──────────►│ 2. Setup stdio │
25
+ //! │ 2. Fork collector │ │ 3. Create UnixStream │
26
+ //! │ 3. Wait for child │ │ 4. Write crash data │
27
+ //! │ │◄────wait()──────────│ 5. Exit cleanly │
28
+ //! └─────────────────────┘ └──────────────────────┘
29
+ //! ```
30
+ //!
31
+ //! ## Signal Safety
32
+ //!
33
+ //! All collector operations use only async-signal-safe functions since the collector
34
+ //! runs in a signal handler context:
35
+ //!
36
+ //! - No memory allocations
37
+ //! - Pre-prepared data structures
38
+ //! - Only safe system calls
39
+ //!
40
+ //! For complete protocol documentation, see [`crate::shared::unix_socket_communication`].
41
+ //!
42
+ //! [`emit_crashreport()`]: crate::collector::emitters::emit_crashreport
43
+
4
44
use super :: process_handle:: ProcessHandle ;
5
45
use super :: receiver_manager:: Receiver ;
6
46
use ddcommon:: timeout:: TimeoutManager ;
@@ -25,6 +65,42 @@ pub enum CollectorSpawnError {
25
65
}
26
66
27
67
impl Collector {
68
+ /// Spawns a collector process to write crash data to the Unix socket.
69
+ ///
70
+ /// This method forks a child process that will serialize and transmit crash data
71
+ /// to the receiver process via the Unix socket established in the receiver.
72
+ ///
73
+ /// ## Process Architecture
74
+ ///
75
+ /// ```text
76
+ /// Parent Process (Signal Handler) Child Process (Collector)
77
+ /// ┌─────────────────────────────┐ ┌─────────────────────────────┐
78
+ /// │ 1. Catches crash signal │ │ 4. Closes stdio (0,1,2) │
79
+ /// │ 2. Forks collector process │──►│ 5. Disables SIGPIPE │
80
+ /// │ 3. Returns to caller │ │ 6. Creates UnixStream │
81
+ /// │ │ │ 7. Calls emit_crashreport() │
82
+ /// │ │ │ 8. Exits with _exit(0) │
83
+ /// └─────────────────────────────┘ └─────────────────────────────┘
84
+ /// ```
85
+ ///
86
+ /// ## Arguments
87
+ ///
88
+ /// * `receiver` - The receiver process that will read crash data from the Unix socket
89
+ /// * `config` - Crash tracker configuration
90
+ /// * `config_str` - JSON-serialized configuration string
91
+ /// * `metadata_str` - JSON-serialized metadata string
92
+ /// * `sig_info` - Signal information from the crash
93
+ /// * `ucontext` - Process context at crash time
94
+ ///
95
+ /// ## Returns
96
+ ///
97
+ /// * `Ok(Collector)` - Handle to the spawned collector process
98
+ /// * `Err(CollectorSpawnError::ForkFailed)` - If the fork operation fails
99
+ ///
100
+ /// ## Safety
101
+ ///
102
+ /// This function is called from signal handler context and uses only async-signal-safe operations.
103
+ /// The child process performs all potentially unsafe operations after fork.
28
104
pub ( crate ) fn spawn (
29
105
receiver : & Receiver ,
30
106
config : & CrashtrackerConfiguration ,
@@ -33,8 +109,8 @@ impl Collector {
33
109
sig_info : * const siginfo_t ,
34
110
ucontext : * const ucontext_t ,
35
111
) -> Result < Self , CollectorSpawnError > {
36
- // When we spawn the child, our pid becomes the ppid.
37
- // SAFETY: This function has no safety requirements .
112
+ // When we spawn the child, our pid becomes the ppid for process tracking .
113
+ // SAFETY: getpid() is async-signal-safe .
38
114
let pid = unsafe { libc:: getpid ( ) } ;
39
115
40
116
let fork_result = alt_fork ( ) ;
@@ -66,6 +142,42 @@ impl Collector {
66
142
}
67
143
}
68
144
145
+ /// Collector child process entry point - serializes and transmits crash data via Unix socket.
146
+ ///
147
+ /// This function runs in the forked collector process and performs the actual crash data
148
+ /// transmission. It establishes the Unix socket connection and writes all crash information
149
+ /// using the structured protocol.
150
+ ///
151
+ /// ## Process Flow
152
+ ///
153
+ /// 1. **Isolate from parent**: Closes stdin, stdout, stderr to prevent interference
154
+ /// 2. **Signal handling**: Disables SIGPIPE to handle broken pipe gracefully
155
+ /// 3. **Socket setup**: Creates `UnixStream` from inherited file descriptor
156
+ /// 4. **Data transmission**: Calls [`emit_crashreport()`] to write structured crash data
157
+ /// 5. **Clean exit**: Exits with `_exit(0)` to avoid cleanup issues
158
+ ///
159
+ /// ## Communication Protocol
160
+ ///
161
+ /// The crash data is written as a structured stream with delimited sections:
162
+ /// - Metadata, Configuration, Signal Info, Process Context
163
+ /// - Counters, Spans, Tags, Traces, Memory Maps, Stack Trace
164
+ /// - Completion marker
165
+ ///
166
+ /// For details, see [`crate::shared::unix_socket_communication`].
167
+ ///
168
+ /// ## Arguments
169
+ ///
170
+ /// * `config` - Crash tracker configuration object
171
+ /// * `config_str` - JSON-serialized configuration for receiver
172
+ /// * `metadata_str` - JSON-serialized metadata for receiver
173
+ /// * `sig_info` - Signal information from crash context
174
+ /// * `ucontext` - Processor context at crash time
175
+ /// * `uds_fd` - Unix socket file descriptor for writing crash data
176
+ /// * `ppid` - Parent process ID for identification
177
+ ///
178
+ /// This function never returns - it always exits via `_exit(0)` or `terminate()`.
179
+ ///
180
+ /// [`emit_crashreport()`]: crate::collector::emitters::emit_crashreport
69
181
pub ( crate ) fn run_collector_child (
70
182
config : & CrashtrackerConfiguration ,
71
183
config_str : & str ,
@@ -75,22 +187,24 @@ pub(crate) fn run_collector_child(
75
187
uds_fd : RawFd ,
76
188
ppid : libc:: pid_t ,
77
189
) -> ! {
78
- // Close stdio
79
- let _ = unsafe { libc:: close ( 0 ) } ;
80
- let _ = unsafe { libc:: close ( 1 ) } ;
81
- let _ = unsafe { libc:: close ( 2 ) } ;
190
+ // Close stdio to isolate from parent process and prevent interference with crash data transmission
191
+ let _ = unsafe { libc:: close ( 0 ) } ; // stdin
192
+ let _ = unsafe { libc:: close ( 1 ) } ; // stdout
193
+ let _ = unsafe { libc:: close ( 2 ) } ; // stderr
82
194
83
- // Disable SIGPIPE
195
+ // Disable SIGPIPE - if receiver closes socket early, we want to handle it gracefully
196
+ // rather than being killed by SIGPIPE
84
197
let _ = unsafe {
85
198
signal:: sigaction (
86
199
signal:: SIGPIPE ,
87
200
& SigAction :: new ( SigHandler :: SigIgn , SaFlags :: empty ( ) , SigSet :: empty ( ) ) ,
88
201
)
89
202
} ;
90
203
91
- // Emit crashreport
204
+ // Create Unix socket stream for crash data transmission
92
205
let mut unix_stream = unsafe { UnixStream :: from_raw_fd ( uds_fd) } ;
93
206
207
+ // Serialize and transmit all crash data using structured protocol
94
208
let report = emit_crashreport (
95
209
& mut unix_stream,
96
210
config,
0 commit comments