Skip to content

Commit 6cf21d5

Browse files
committed
split service&instance metrics, serve metrics for build-server & watcher
* split service & instance metrics into separate objects, serve them via separate endpoints * add a small webserver to serve metrics for the build-server & registry watcher
1 parent 4b18bfe commit 6cf21d5

File tree

17 files changed

+322
-102
lines changed

17 files changed

+322
-102
lines changed

src/bin/cratesfyi.rs

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ use docs_rs::utils::{
1414
remove_crate_priority, set_crate_priority, ConfigName,
1515
};
1616
use docs_rs::{
17-
start_web_server, BuildQueue, Config, Context, Index, Metrics, PackageKind, RustwideBuilder,
18-
Storage,
17+
start_background_metrics_webserver, start_web_server, BuildQueue, Config, Context, Index,
18+
Metrics, PackageKind, RustwideBuilder, Storage,
1919
};
2020
use humantime::Duration;
2121
use once_cell::sync::OnceCell;
@@ -111,6 +111,8 @@ enum CommandLine {
111111
},
112112

113113
StartRegistryWatcher {
114+
#[arg(name = "SOCKET_ADDR", default_value = "0.0.0.0:3000")]
115+
metric_server_socket_addr: String,
114116
/// Enable or disable the repository stats updater
115117
#[arg(
116118
long = "repository-stats-updater",
@@ -122,7 +124,10 @@ enum CommandLine {
122124
cdn_invalidator: Toggle,
123125
},
124126

125-
StartBuildServer,
127+
StartBuildServer {
128+
#[arg(name = "SOCKET_ADDR", default_value = "0.0.0.0:3000")]
129+
metric_server_socket_addr: String,
130+
},
126131

127132
/// Starts the daemon
128133
Daemon {
@@ -154,6 +159,7 @@ impl CommandLine {
154159
subcommand,
155160
} => subcommand.handle_args(ctx, skip_if_exists)?,
156161
Self::StartRegistryWatcher {
162+
metric_server_socket_addr,
157163
repository_stats_updater,
158164
cdn_invalidator,
159165
} => {
@@ -164,9 +170,15 @@ impl CommandLine {
164170
docs_rs::utils::daemon::start_background_cdn_invalidator(&ctx)?;
165171
}
166172

173+
start_background_metrics_webserver(Some(&metric_server_socket_addr), &ctx)?;
174+
167175
docs_rs::utils::watch_registry(ctx.build_queue()?, ctx.config()?, ctx.index()?)?;
168176
}
169-
Self::StartBuildServer => {
177+
Self::StartBuildServer {
178+
metric_server_socket_addr,
179+
} => {
180+
start_background_metrics_webserver(Some(&metric_server_socket_addr), &ctx)?;
181+
170182
let build_queue = ctx.build_queue()?;
171183
let rustwide_builder = RustwideBuilder::init(&ctx)?;
172184
queue_builder(rustwide_builder, build_queue)?;

src/build_queue.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ impl BuildQueue {
204204
to_process.name, to_process.version
205205
)
206206
});
207-
self.metrics.total_builds.inc();
207+
self.metrics.instance.total_builds.inc();
208208
if let Err(err) =
209209
cdn::queue_crate_invalidation(&mut transaction, &self.config, &to_process.name)
210210
{
@@ -225,7 +225,7 @@ impl BuildQueue {
225225
.get(0);
226226

227227
if attempt >= self.max_attempts {
228-
self.metrics.failed_builds.inc();
228+
self.metrics.instance.failed_builds.inc();
229229
}
230230

231231
report_error(&e);
@@ -320,7 +320,7 @@ impl BuildQueue {
320320
"{}-{} added into build queue",
321321
release.name, release.version
322322
);
323-
self.metrics.queued_builds.inc();
323+
self.metrics.instance.queued_builds.inc();
324324
crates_added += 1;
325325
}
326326
Err(err) => report_error(&err),
@@ -601,8 +601,8 @@ mod tests {
601601

602602
// Ensure metrics were recorded correctly
603603
let metrics = env.metrics();
604-
assert_eq!(metrics.total_builds.get(), 9);
605-
assert_eq!(metrics.failed_builds.get(), 1);
604+
assert_eq!(metrics.instance.total_builds.get(), 9);
605+
assert_eq!(metrics.instance.failed_builds.get(), 1);
606606

607607
// no invalidations were run since we don't have a distribution id configured
608608
assert!(cdn::queued_or_active_crate_invalidations(&mut *env.db().conn())?.is_empty());

src/cdn.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,7 @@ pub(crate) fn handle_queued_invalidation_requests(
363363
if let Ok(duration) = (now - row.get::<_, DateTime<Utc>>(0)).to_std() {
364364
// This can only fail when the duration is negative, which can't happen anyways
365365
metrics
366+
.instance
366367
.cdn_invalidation_time
367368
.with_label_values(&[distribution_id])
368369
.observe(duration_to_seconds(duration));
@@ -400,6 +401,7 @@ pub(crate) fn handle_queued_invalidation_requests(
400401
if let Ok(duration) = (now - row.get::<_, DateTime<Utc>>("queued")).to_std() {
401402
// This can only fail when the duration is negative, which can't happen anyways
402403
metrics
404+
.instance
403405
.cdn_queue_time
404406
.with_label_values(&[distribution_id])
405407
.observe(duration_to_seconds(duration));

src/db/pool.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ impl Pool {
7777
match self.with_pool(|p| p.get()) {
7878
Ok(conn) => Ok(conn),
7979
Err(err) => {
80-
self.metrics.failed_db_connections.inc();
80+
self.metrics.instance.failed_db_connections.inc();
8181
Err(PoolError::ClientError(err))
8282
}
8383
}

src/docbuilder/rustwide_builder.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -484,11 +484,11 @@ impl RustwideBuilder {
484484

485485
let has_examples = build.host_source_dir().join("examples").is_dir();
486486
if res.result.successful {
487-
self.metrics.successful_builds.inc();
487+
self.metrics.instance.successful_builds.inc();
488488
} else if res.cargo_metadata.root().is_library() {
489-
self.metrics.failed_builds.inc();
489+
self.metrics.instance.failed_builds.inc();
490490
} else {
491-
self.metrics.non_library_builds.inc();
491+
self.metrics.instance.non_library_builds.inc();
492492
}
493493

494494
let release_data = if !is_local {

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ pub use self::docbuilder::RustwideBuilder;
1010
pub use self::index::Index;
1111
pub use self::metrics::Metrics;
1212
pub use self::storage::Storage;
13-
pub use self::web::start_web_server;
13+
pub use self::web::{start_background_metrics_webserver, start_web_server};
1414

1515
mod build_queue;
1616
pub mod cdn;

src/metrics/macros.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ pub(super) trait MetricFromOpts: Sized {
22
fn from_opts(opts: prometheus::Opts) -> Result<Self, prometheus::Error>;
33
}
44

5-
#[macro_export]
65
macro_rules! metrics {
76
(
87
$vis:vis struct $name:ident {
@@ -82,7 +81,6 @@ macro_rules! metrics {
8281
};
8382
}
8483

85-
#[macro_export]
8684
macro_rules! load_metric_type {
8785
($name:ident as single) => {
8886
use prometheus::$name;

src/metrics/mod.rs

Lines changed: 116 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -35,22 +35,23 @@ pub const CDN_INVALIDATION_HISTOGRAM_BUCKETS: &[f64; 11] = &[
3535
24000.0, // 240
3636
];
3737

38-
metrics! {
39-
pub struct Metrics {
40-
/// Number of crates in the build queue
41-
queued_crates_count: IntGauge,
42-
/// Number of crates in the build queue that have a positive priority
43-
prioritized_crates_count: IntGauge,
44-
/// Number of crates that failed to build
45-
failed_crates_count: IntGauge,
46-
/// Whether the build queue is locked
47-
queue_is_locked: IntGauge,
48-
/// queued crates by priority
49-
queued_crates_count_by_priority: IntGaugeVec["priority"],
50-
51-
/// queued CDN invalidations
52-
queued_cdn_invalidations_by_distribution: IntGaugeVec["distribution"],
38+
#[derive(Debug)]
39+
pub struct Metrics {
40+
pub instance: InstanceMetrics,
41+
pub service: ServiceMetrics,
42+
}
43+
44+
impl Metrics {
45+
pub fn new() -> Result<Self, prometheus::Error> {
46+
Ok(Self {
47+
instance: InstanceMetrics::new()?,
48+
service: ServiceMetrics::new()?,
49+
})
50+
}
51+
}
5352

53+
metrics! {
54+
pub struct InstanceMetrics {
5455
/// The number of idle database connections
5556
idle_db_connections: IntGauge,
5657
/// The number of used database connections
@@ -143,7 +144,7 @@ impl RecentlyAccessedReleases {
143144
.insert((version, TargetAtom::from(target)), now);
144145
}
145146

146-
pub(crate) fn gather(&self, metrics: &Metrics) {
147+
pub(crate) fn gather(&self, metrics: &InstanceMetrics) {
147148
fn inner<K: std::hash::Hash + Eq>(map: &DashMap<K, Instant>, metric: &IntGaugeVec) {
148149
let mut hour_count = 0;
149150
let mut half_hour_count = 0;
@@ -182,18 +183,112 @@ impl RecentlyAccessedReleases {
182183
}
183184
}
184185

185-
impl Metrics {
186+
impl InstanceMetrics {
187+
pub(crate) fn gather(&self, pool: &Pool) -> Result<Vec<MetricFamily>, Error> {
188+
self.idle_db_connections.set(pool.idle_connections() as i64);
189+
self.used_db_connections.set(pool.used_connections() as i64);
190+
self.max_db_connections.set(pool.max_size() as i64);
191+
192+
self.recently_accessed_releases.gather(self);
193+
self.gather_system_performance();
194+
Ok(self.registry.gather())
195+
}
196+
197+
#[cfg(not(target_os = "linux"))]
198+
fn gather_system_performance(&self) {}
199+
200+
#[cfg(target_os = "linux")]
201+
fn gather_system_performance(&self) {
202+
use procfs::process::Process;
203+
204+
let process = Process::myself().unwrap();
205+
self.open_file_descriptors
206+
.set(process.fd_count().unwrap() as i64);
207+
self.running_threads
208+
.set(process.stat().unwrap().num_threads);
209+
}
210+
}
211+
212+
fn metric_from_opts<T: MetricFromOpts + Clone + prometheus::core::Collector + 'static>(
213+
registry: &prometheus::Registry,
214+
metric: &str,
215+
help: &str,
216+
variable_label: Option<&str>,
217+
) -> Result<T, prometheus::Error> {
218+
let mut opts = prometheus::Opts::new(metric, help).namespace("docsrs");
219+
220+
if let Some(label) = variable_label {
221+
opts = opts.variable_label(label);
222+
}
223+
224+
let metric = T::from_opts(opts)?;
225+
registry.register(Box::new(metric.clone()))?;
226+
Ok(metric)
227+
}
228+
229+
#[derive(Debug)]
230+
pub struct ServiceMetrics {
231+
pub queued_crates_count: IntGauge,
232+
pub prioritized_crates_count: IntGauge,
233+
pub failed_crates_count: IntGauge,
234+
pub queue_is_locked: IntGauge,
235+
pub queued_crates_count_by_priority: IntGaugeVec,
236+
pub queued_cdn_invalidations_by_distribution: IntGaugeVec,
237+
238+
registry: prometheus::Registry,
239+
}
240+
241+
impl ServiceMetrics {
242+
pub fn new() -> Result<Self, prometheus::Error> {
243+
let registry = prometheus::Registry::new();
244+
Ok(Self {
245+
registry: registry.clone(),
246+
queued_crates_count: metric_from_opts(
247+
&registry,
248+
"queued_crates_count",
249+
"Number of crates in the build queue",
250+
None,
251+
)?,
252+
prioritized_crates_count: metric_from_opts(
253+
&registry,
254+
"prioritized_crates_count",
255+
"Number of crates in the build queue that have a positive priority",
256+
None,
257+
)?,
258+
failed_crates_count: metric_from_opts(
259+
&registry,
260+
"failed_crates_count",
261+
"Number of crates that failed to build",
262+
None,
263+
)?,
264+
queue_is_locked: metric_from_opts(
265+
&registry,
266+
"queue_is_locked",
267+
"Whether the build queue is locked",
268+
None,
269+
)?,
270+
queued_crates_count_by_priority: metric_from_opts(
271+
&registry,
272+
"queued_crates_count_by_priority",
273+
"queued crates by priority",
274+
Some("priority"),
275+
)?,
276+
queued_cdn_invalidations_by_distribution: metric_from_opts(
277+
&registry,
278+
"queued_cdn_invalidations_by_distribution",
279+
"queued CDN invalidations",
280+
Some("distribution"),
281+
)?,
282+
})
283+
}
284+
186285
pub(crate) fn gather(
187286
&self,
188287
pool: &Pool,
189288
queue: &BuildQueue,
190289
config: &Config,
191290
) -> Result<Vec<MetricFamily>, Error> {
192-
self.idle_db_connections.set(pool.idle_connections() as i64);
193-
self.used_db_connections.set(pool.used_connections() as i64);
194-
self.max_db_connections.set(pool.max_size() as i64);
195291
self.queue_is_locked.set(queue.is_locked()? as i64);
196-
197292
self.queued_crates_count.set(queue.pending_count()? as i64);
198293
self.prioritized_crates_count
199294
.set(queue.prioritized_count()? as i64);
@@ -216,23 +311,6 @@ impl Metrics {
216311
}
217312

218313
self.failed_crates_count.set(queue.failed_count()? as i64);
219-
220-
self.recently_accessed_releases.gather(self);
221-
self.gather_system_performance();
222314
Ok(self.registry.gather())
223315
}
224-
225-
#[cfg(not(target_os = "linux"))]
226-
fn gather_system_performance(&self) {}
227-
228-
#[cfg(target_os = "linux")]
229-
fn gather_system_performance(&self) {
230-
use procfs::process::Process;
231-
232-
let process = Process::myself().unwrap();
233-
self.open_file_descriptors
234-
.set(process.fd_count().unwrap() as i64);
235-
self.running_threads
236-
.set(process.stat().unwrap().num_threads);
237-
}
238316
}

src/storage/database.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ impl<'a> StorageTransaction for DatabaseStorageTransaction<'a> {
160160
SET mime = EXCLUDED.mime, content = EXCLUDED.content, compression = EXCLUDED.compression",
161161
&[&blob.path, &blob.mime, &blob.content, &compression],
162162
)?;
163-
self.metrics.uploaded_files_total.inc();
163+
self.metrics.instance.uploaded_files_total.inc();
164164
}
165165
Ok(())
166166
}

src/storage/mod.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -810,7 +810,10 @@ mod backend_tests {
810810
assert_eq!(blob.mime, actual.mime);
811811
}
812812

813-
assert_eq!(NAMES.len(), metrics.uploaded_files_total.get() as usize);
813+
assert_eq!(
814+
NAMES.len(),
815+
metrics.instance.uploaded_files_total.get() as usize
816+
);
814817

815818
Ok(())
816819
}
@@ -886,7 +889,7 @@ mod backend_tests {
886889
assert_eq!(file.mime, "text/rust");
887890
assert_eq!(file.path, "folder/test.zip/src/main.rs");
888891

889-
assert_eq!(2, metrics.uploaded_files_total.get());
892+
assert_eq!(2, metrics.instance.uploaded_files_total.get());
890893

891894
Ok(())
892895
}
@@ -933,7 +936,7 @@ mod backend_tests {
933936
expected_algs.insert(CompressionAlgorithm::default());
934937
assert_eq!(algs, expected_algs);
935938

936-
assert_eq!(2, metrics.uploaded_files_total.get());
939+
assert_eq!(2, metrics.instance.uploaded_files_total.get());
937940

938941
Ok(())
939942
}

0 commit comments

Comments
 (0)