Skip to content

Commit 814e8d4

Browse files
committed
intercept: preload is more roboust against environment changes
1 parent f0f3c09 commit 814e8d4

File tree

9 files changed

+909
-160
lines changed

9 files changed

+909
-160
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bear/src/environment.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
use std::collections::HashSet;
44

5-
pub const KEY_DESTINATION: &str = "INTERCEPT_COLLECTOR_ADDRESS";
5+
pub const KEY_INTERCEPT_STATE: &str = "BEAR_INTERCEPT";
66

77
// man page for `ld.so` (Linux dynamic linker/loader)
88
pub const KEY_OS__PRELOAD_PATH: &str = "LD_PRELOAD";
@@ -105,7 +105,7 @@ static GCC_INCLUDE_KEYS: std::sync::LazyLock<HashSet<&'static str>> = std::sync:
105105
});
106106

107107
pub fn relevant_env(key: &str) -> bool {
108-
matches!(key, KEY_DESTINATION | KEY_OS__PRELOAD_PATH | KEY_OS__MACOS_PRELOAD_PATH | KEY_OS__MACOS_FLAT_NAMESPACE)
108+
matches!(key, KEY_INTERCEPT_STATE | KEY_OS__PRELOAD_PATH | KEY_OS__MACOS_PRELOAD_PATH | KEY_OS__MACOS_FLAT_NAMESPACE)
109109
|| MAKE_PROGRAM_KEYS.contains(key)
110110
|| MAKE_FLAGS_KEYS.contains(key)
111111
|| CARGO_PROGRAM_KEYS.contains(key)

bear/src/intercept/environment.rs

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
#[cfg(not(target_os = "macos"))]
44
use crate::environment::KEY_OS__PRELOAD_PATH;
5-
use crate::environment::{KEY_DESTINATION, KEY_OS__PATH};
5+
use crate::environment::{KEY_INTERCEPT_STATE, KEY_OS__PATH};
66
#[cfg(target_os = "macos")]
77
use crate::environment::{KEY_OS__MACOS_FLAT_NAMESPACE, KEY_OS__MACOS_PRELOAD_PATH};
88
use crate::intercept::supervise;
@@ -20,6 +20,42 @@ use thiserror::Error;
2020

2121
use crate::intercept::wrapper::{WrapperDirectory, WrapperDirectoryBuilder, WrapperDirectoryError};
2222

23+
/// Represents the state information needed for preload-based interception.
24+
///
25+
/// This struct is serialized to JSON and passed to the preloaded library via
26+
/// an environment variable. It contains all the information the library needs
27+
/// to report execution events back to the Bear process.
28+
#[derive(Clone, Debug, Eq, PartialEq, serde::Deserialize, serde::Serialize)]
29+
pub struct PreloadState {
30+
/// The socket address where execution events should be reported
31+
pub destination: SocketAddr,
32+
/// The path to the preload library itself
33+
pub library: PathBuf,
34+
}
35+
36+
impl TryInto<String> for PreloadState {
37+
type Error = serde_json::Error;
38+
39+
fn try_into(self) -> Result<String, Self::Error> {
40+
serde_json::to_string(&self)
41+
}
42+
}
43+
impl TryFrom<&str> for PreloadState {
44+
type Error = serde_json::Error;
45+
46+
fn try_from(value: &str) -> Result<Self, Self::Error> {
47+
serde_json::from_str(value)
48+
}
49+
}
50+
51+
impl TryFrom<String> for PreloadState {
52+
type Error = serde_json::Error;
53+
54+
fn try_from(value: String) -> Result<Self, Self::Error> {
55+
serde_json::from_str(&value)
56+
}
57+
}
58+
2359
/// Manages the environment setup for intercepting build commands during compilation.
2460
///
2561
/// `BuildEnvironment` is responsible for configuring the execution environment to enable
@@ -248,7 +284,11 @@ impl BuildEnvironment {
248284
environment_overrides.insert(KEY_OS__PRELOAD_PATH.to_string(), preload_updated);
249285
}
250286

251-
environment_overrides.insert(KEY_DESTINATION.to_string(), address.to_string());
287+
// Make the current state available as a single environment variable
288+
let state: String = PreloadState { destination: address, library: path.to_path_buf() }
289+
.try_into()
290+
.map_err(|_| ConfigurationError::PathNotFound)?;
291+
environment_overrides.insert(KEY_INTERCEPT_STATE.to_string(), state);
252292

253293
Ok(Self { environment_overrides, _wrapper_directory: None })
254294
}
@@ -331,7 +371,7 @@ pub enum ConfigurationError {
331371
/// - If `first` already exists in `original`, it's moved to the front
332372
/// - If `first` doesn't exist, it's prepended to the existing paths
333373
/// - Uses platform-appropriate path separators and handles path encoding
334-
fn insert_to_path<P: AsRef<Path>>(original: &str, first: P) -> Result<String, JoinPathsError> {
374+
pub fn insert_to_path<P: AsRef<Path>>(original: &str, first: P) -> Result<String, JoinPathsError> {
335375
let first_path = first.as_ref();
336376

337377
if original.is_empty() {
@@ -519,7 +559,10 @@ mod test {
519559
};
520560

521561
// Check that destination is set
522-
assert_eq!(sut.environment_overrides.get(KEY_DESTINATION), Some(&"127.0.0.1:8080".to_string()));
562+
assert_eq!(
563+
sut.environment_overrides.get(KEY_INTERCEPT_STATE),
564+
Some(&r#"{"destination":"127.0.0.1:8080","library":"/usr/local/lib/libexec.so"}"#.to_string())
565+
);
523566

524567
// Check platform-specific preload configuration
525568
#[cfg(target_os = "macos")]

bear/src/intercept/reporter.rs

Lines changed: 1 addition & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,9 @@
1010
//! - Defining the `Reporter` trait for sending events.
1111
//! - Providing error types for initialization and reporting.
1212
//! - Implementing a factory to create TCP-based reporters.
13-
//!
14-
//! # Usage
15-
//!
16-
//! Use `ReporterFactory::create()` to instantiate a reporter with a socket address, or
17-
//! `ReporterFactory::create_as_ptr()` to obtain a raw pointer suitable for static/global usage.
18-
//! The reporter sends events to a remote collector at the specified address.
1913
2014
use crate::intercept::{Event, tcp};
2115
use std::net::SocketAddr;
22-
use std::sync::atomic::AtomicPtr;
2316
use thiserror::Error;
2417

2518
/// Trait for reporting intercepted events to a remote collector.
@@ -47,36 +40,7 @@ impl ReporterFactory {
4740
/// Creates a new TCP-based reporter using the destination from the environment.
4841
///
4942
/// The created reporter is not connected yet; it only stores the destination address.
50-
pub fn create(address: SocketAddr) -> impl Reporter {
43+
pub fn create(address: SocketAddr) -> tcp::ReporterOnTcp {
5144
tcp::ReporterOnTcp::new(address)
5245
}
53-
54-
/// Creates a new reporter and returns it as an atomic pointer.
55-
///
56-
/// This is useful for static/global usage where a stable pointer is required
57-
/// for the program's lifetime.
58-
///
59-
/// # Safety
60-
///
61-
/// The caller is responsible for ensuring the returned pointer is not used after
62-
/// the program terminates. The memory will be leaked intentionally to provide a
63-
/// stable pointer for the lifetime of the program.
64-
///
65-
/// Returns a null pointer if reporter creation fails. Caller must check for null
66-
/// before dereferencing.
67-
pub fn create_as_ptr(address_str: &str) -> AtomicPtr<tcp::ReporterOnTcp> {
68-
match address_str.parse::<SocketAddr>() {
69-
Ok(address) => {
70-
// Leak the reporter to get a stable pointer for the lifetime of the program
71-
let boxed_reporter = Box::new(tcp::ReporterOnTcp::new(address));
72-
let ptr = Box::into_raw(boxed_reporter);
73-
74-
AtomicPtr::new(ptr)
75-
}
76-
Err(err) => {
77-
log::warn!("Failed to create reporter: {err}");
78-
AtomicPtr::new(std::ptr::null_mut())
79-
}
80-
}
81-
}
8246
}

intercept-preload/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ libc.workspace = true
3333
platform-checks = { path = "../platform-checks" }
3434
cc.workspace = true
3535

36+
[dev-dependencies]
37+
serde_json.workspace = true
38+
3639
[lints.rust]
3740
unexpected_cfgs = { level = "warn", check-cfg = [
3841
'cfg(has_header_dlfcn_h)',

intercept-preload/src/c/shim.c

Lines changed: 45 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,32 @@
3131
// Ensure symbols are exported from the shared library
3232
#define EXPORT __attribute__((visibility("default")))
3333

34+
// Platform-specific environment access
35+
//
36+
// When the dynamic linker loads the library, the `environ` variable might not
37+
// be available yet. This is the case on macOS where we need to use
38+
// `_NSGetEnviron()` to reliably access the environment during library
39+
// initialization.
40+
41+
#if defined(__APPLE__)
42+
#include <crt_externs.h>
43+
#define get_environ() (*_NSGetEnviron())
44+
#else
45+
extern char **environ;
46+
#define get_environ() environ
47+
#endif
48+
3449
// Rust implementation functions
3550
//
3651
// These are defined in implementation.rs with #[no_mangle] and handle:
3752
// - Reporting the execution to the collector
3853
// - Calling the real function via dlsym(RTLD_NEXT, ...)
3954

40-
extern int rust_execv(const char *path, char *const argv[]);
55+
// Session initialization - called from constructor to capture environment
56+
extern void rust_session_init(char *const *envp);
57+
58+
// Exec family functions
4159
extern int rust_execve(const char *path, char *const argv[], char *const envp[]);
42-
extern int rust_execvp(const char *file, char *const argv[]);
4360
extern int rust_execvpe(const char *file, char *const argv[], char *const envp[]);
4461
extern int rust_execvP(const char *file, const char *search_path, char *const argv[]);
4562
extern int rust_exect(const char *path, char *const argv[], char *const envp[]);
@@ -54,6 +71,27 @@ extern int rust_posix_spawnp(pid_t *pid, const char *file,
5471
extern FILE *rust_popen(const char *command, const char *mode);
5572
extern int rust_system(const char *command);
5673

74+
// Library constructor
75+
//
76+
// This function is called when the library is loaded into memory. It captures
77+
// the current environment and passes it to Rust for session initialization.
78+
// This is critical because:
79+
//
80+
// 1. On macOS, `environ` is not available during early library initialization,
81+
// so we use `_NSGetEnviron()` instead.
82+
// 2. Build systems may clear or modify environment variables like LD_PRELOAD
83+
// and INTERCEPT_COLLECTOR_ADDRESS. By capturing them early, we can restore
84+
// them when executing child processes.
85+
86+
__attribute__((constructor))
87+
static void on_load(void)
88+
{
89+
char *const *envp = get_environ();
90+
if (envp != NULL) {
91+
rust_session_init(envp);
92+
}
93+
}
94+
5795
// Count variadic arguments until NULL terminator
5896
// The va_list is consumed by this function
5997
static size_t va_count_args(va_list ap)
@@ -97,7 +135,7 @@ EXPORT int execl(const char *path, const char *arg0, ...)
97135

98136
va_end(ap);
99137

100-
return rust_execv(path, argv);
138+
return rust_execve(path, argv, get_environ());
101139
}
102140
#endif
103141

@@ -125,7 +163,7 @@ EXPORT int execlp(const char *file, const char *arg0, ...)
125163

126164
va_end(ap);
127165

128-
return rust_execvp(file, argv);
166+
return rust_execvpe(file, argv, get_environ());
129167
}
130168
#endif
131169

@@ -168,7 +206,7 @@ EXPORT int execle(const char *path, const char *arg0, ...)
168206
#if defined(has_symbol_execv)
169207
EXPORT int execv(const char *path, char *const argv[])
170208
{
171-
return rust_execv(path, argv);
209+
return rust_execve(path, argv, get_environ());
172210
}
173211
#endif
174212

@@ -188,7 +226,7 @@ EXPORT int execve(const char *path, char *const argv[], char *const envp[])
188226
#if defined(has_symbol_execvp)
189227
EXPORT int execvp(const char *file, char *const argv[])
190228
{
191-
return rust_execvp(file, argv);
229+
return rust_execvpe(file, argv, get_environ());
192230
}
193231
#endif
194232

@@ -266,4 +304,4 @@ EXPORT int system(const char *command)
266304
{
267305
return rust_system(command);
268306
}
269-
#endif
307+
#endif

0 commit comments

Comments
 (0)