Skip to content

Commit 05f6fc7

Browse files
committed
refactor(profiling): extract profiling into dedicated crate
Move duplicated profiling functionality from multiple crates into a single shared 'profiler' crate. Key changes: - Create new 'profiler' crate with optimized implementation - Use AtomicU64 instead of Mutex for snapshot counter - Add proper error handling with thiserror - Ensure proper directory structure for profiling output - Make profiling opt-in via a feature flag
1 parent 9217e7e commit 05f6fc7

File tree

11 files changed

+300
-385
lines changed

11 files changed

+300
-385
lines changed

Cargo.lock

Lines changed: 13 additions & 16 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ members = [
66
"crates/dips",
77
"crates/indexer-receipt",
88
"crates/monitor",
9+
"crates/profiler",
910
"crates/query",
1011
"crates/service",
1112
"crates/tap-agent",

crates/profiler/Cargo.toml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
[package]
2+
name = "profiler"
3+
version = "0.1.0"
4+
edition = "2021"
5+
6+
7+
[features]
8+
default = ["flamegraph", "protobuf-codec"]
9+
flamegraph = ["pprof/flamegraph"]
10+
protobuf-codec = ["pprof/protobuf-codec"]
11+
12+
[dependencies]
13+
tracing.workspace = true
14+
thiserror.workspace = true
15+
16+
pprof = { version = "0.14", default-features = false }
17+
timer = { version = "0.2" }
18+
chrono = { version = "0.4" }

crates/profiler/src/error.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// Copyright 2023-, Edge & Node, GraphOps, and Semiotic Labs.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
use thiserror::Error;
5+
6+
#[derive(Error, Debug)]
7+
pub enum ProfilerError {
8+
#[error("IO Error: {0}")]
9+
IoError(#[from] std::io::Error),
10+
11+
#[error("Failed to create flamegraph: {0}")]
12+
FlamegraphCreationError(String),
13+
14+
#[error("Failed to generate protobuf: {0}")]
15+
ProtobufError(String),
16+
17+
#[error("Failed to generate profile report: {0}")]
18+
ReportError(String),
19+
20+
#[error("Failed to serialize profile: {0}")]
21+
SerializationError(String),
22+
23+
#[error("System time error: {0}")]
24+
TimeError(#[from] std::time::SystemTimeError),
25+
}

crates/profiler/src/lib.rs

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
// Copyright 2023-, Edge & Node, GraphOps, and Semiotic Labs.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
use std::fs::{self, File};
5+
use std::io::Write;
6+
use std::path::{Path, PathBuf};
7+
use std::sync::atomic::{AtomicU64, Ordering};
8+
use std::sync::Arc;
9+
use std::thread;
10+
use std::time::{Duration, SystemTime};
11+
12+
use pprof::protos::Message;
13+
14+
mod error;
15+
pub use error::ProfilerError;
16+
17+
/// Save a flamegraph to the specified path
18+
fn save_flamegraph(
19+
report: &pprof::Report,
20+
path: &Path,
21+
options: &mut pprof::flamegraph::Options,
22+
) -> Result<(), ProfilerError> {
23+
let file = File::create(path)?;
24+
25+
report
26+
.flamegraph_with_options(file, options)
27+
.map_err(|e| ProfilerError::FlamegraphCreationError(e.to_string()))?;
28+
29+
tracing::info!("✅ Generated flamegraph: {:?}", path);
30+
Ok(())
31+
}
32+
33+
/// Save a protobuf profile to the specified path
34+
fn save_protobuf(profile: &pprof::protos::Profile, path: &Path) -> Result<(), ProfilerError> {
35+
// Try write_to_bytes first
36+
match profile.write_to_bytes() {
37+
Ok(bytes) => {
38+
let mut file = File::create(path)?;
39+
file.write_all(&bytes)?;
40+
}
41+
Err(e) => {
42+
// Alternative approach: try direct file writing
43+
tracing::info!(
44+
"⚠️ Failed to serialize profile: {}, trying direct writer",
45+
e
46+
);
47+
48+
let mut file = File::create(path)?;
49+
profile
50+
.write_to_writer(&mut file)
51+
.map_err(|e| ProfilerError::SerializationError(e.to_string()))?;
52+
}
53+
}
54+
55+
tracing::info!("✅ Generated protobuf profile: {:?}", path);
56+
Ok(())
57+
}
58+
59+
/// Generate a unique filename with timestamp and counter
60+
fn generate_filename(
61+
base_path: &str,
62+
prefix: &str,
63+
extension: &str,
64+
counter: u64,
65+
) -> Result<PathBuf, ProfilerError> {
66+
let timestamp = SystemTime::now()
67+
.duration_since(SystemTime::UNIX_EPOCH)?
68+
.as_secs()
69+
.to_string();
70+
71+
let filename = format!("{}-{}-{}.{}", prefix, timestamp, counter, extension);
72+
Ok(Path::new(base_path).join(filename))
73+
}
74+
75+
/// Process a single profiling report
76+
fn process_profiling_report(
77+
guard: &pprof::ProfilerGuard<'_>,
78+
path: &str,
79+
counter: u64,
80+
options: &mut pprof::flamegraph::Options,
81+
) -> Result<(), ProfilerError> {
82+
let report = guard
83+
.report()
84+
.build()
85+
.map_err(|e| ProfilerError::ReportError(e.to_string()))?;
86+
87+
// Generate flamegraph
88+
let flamegraph_path = generate_filename(path, "flamegraph", "svg", counter)?;
89+
if let Err(e) = save_flamegraph(&report, &flamegraph_path, options) {
90+
tracing::error!("Failed to save flamegraph: {}", e);
91+
// Continue execution to try saving protobuf
92+
}
93+
94+
// Generate protobuf profile
95+
match report.pprof() {
96+
Ok(profile) => {
97+
let proto_path = generate_filename(path, "profile", "pb", counter)?;
98+
if let Err(e) = save_protobuf(&profile, &proto_path) {
99+
tracing::error!("Failed to save protobuf: {}", e);
100+
}
101+
}
102+
Err(e) => {
103+
tracing::error!("Failed to generate pprof profile: {}", e);
104+
}
105+
}
106+
107+
Ok(())
108+
}
109+
110+
fn setup(path: String, frequency: i32, interval: u64, name: String) -> Result<(), ProfilerError> {
111+
// Ensure the profiling directory exists
112+
let profile_dir = Path::new(&path);
113+
if !profile_dir.exists() {
114+
fs::create_dir_all(profile_dir)?;
115+
}
116+
117+
// Create a background thread for continuous profiling
118+
let path_clone = path.clone();
119+
thread::spawn(move || {
120+
// Wait a bit for the application to start
121+
thread::sleep(Duration::from_secs(10));
122+
tracing::info!("🔍 Starting continuous profiling...");
123+
124+
// Counter for tracking report generation
125+
let counter = Arc::new(AtomicU64::new(0));
126+
127+
// Create a separate thread for continuous data collection
128+
thread::spawn(move || {
129+
// Start continuous profiling
130+
let guard = match pprof::ProfilerGuardBuilder::default()
131+
.frequency(frequency)
132+
.blocklist(&["libc", "libgcc", "pthread", "vdso"])
133+
.build()
134+
{
135+
Ok(guard) => guard,
136+
Err(e) => {
137+
tracing::error!("Failed to initialize profiler: {}", e);
138+
return;
139+
}
140+
};
141+
142+
tracing::info!("📊 Continuous profiling active");
143+
let mut options = pprof::flamegraph::Options::default();
144+
options.title = name;
145+
146+
// Create a timer thread to periodically save reports
147+
thread::spawn(move || {
148+
loop {
149+
// Sleep for `interval` seconds before saving reports
150+
thread::sleep(Duration::from_secs(interval));
151+
152+
let current_counter = counter.fetch_add(1, Ordering::Relaxed);
153+
154+
tracing::info!("💾 Saving profiling snapshot #{}...", current_counter);
155+
156+
if let Err(e) =
157+
process_profiling_report(&guard, &path_clone, current_counter, &mut options)
158+
{
159+
tracing::error!("Error processing profiling report: {}", e);
160+
}
161+
}
162+
});
163+
164+
// Keep profiling thread alive
165+
loop {
166+
thread::sleep(Duration::from_secs(3600));
167+
}
168+
});
169+
});
170+
171+
Ok(())
172+
}
173+
174+
/// Sets up continuous CPU profiling with flamegraph and protobuf output.
175+
///
176+
/// # Arguments
177+
///
178+
/// * `path` - Directory where profiling data will be stored
179+
/// * `frequency` - Sampling frequency in Hz
180+
/// * `interval` - Time between saving reports in seconds
181+
/// * `name` - Optional service name for labeling profiles
182+
///
183+
/// # Errors
184+
///
185+
/// Returns `ProfilerError` if directory creation fails
186+
///
187+
/// # Examples
188+
///
189+
/// ```
190+
/// profiler::setup_profiling(
191+
/// "/opt/profiling/my-service".to_string(),
192+
/// 150,
193+
/// 120,
194+
/// Some("My Service".to_string())
195+
/// )?;
196+
/// ```
197+
pub fn setup_profiling(
198+
path: String,
199+
frequency: i32,
200+
interval: u64,
201+
name: Option<String>,
202+
) -> Result<(), ProfilerError> {
203+
tracing::info!("🔍 Setting up profiling...");
204+
setup(
205+
path,
206+
frequency,
207+
interval,
208+
name.unwrap_or("Service".to_string()),
209+
)
210+
}

crates/service/Cargo.toml

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ indexer-config = { path = "../config" }
1414
indexer-dips = { path = "../dips" }
1515
indexer-query = { path = "../query" }
1616
indexer-receipt = { path = "../indexer-receipt" }
17+
profiler = { path = "../profiler", optional = true }
1718
anyhow = { workspace = true }
1819
prometheus = { workspace = true }
1920
reqwest = { workspace = true }
@@ -62,12 +63,6 @@ pin-project = "1.1.7"
6263
tonic.workspace = true
6364
itertools = "0.14.0"
6465
graph-networks-registry.workspace = true
65-
pprof = { version = "0.14", features = [
66-
"flamegraph",
67-
"protobuf-codec",
68-
], optional = true }
69-
timer = { version = "0.2", optional = true }
70-
chrono = { version = "0.4", optional = true }
7166

7267

7368
[dev-dependencies]
@@ -88,4 +83,4 @@ build-info-build.workspace = true
8883

8984
[features]
9085
default = []
91-
profiling = ["pprof", "timer", "dep:chrono"]
86+
profiling = ["profiler"]

0 commit comments

Comments
 (0)