Skip to content

Commit ba6ba30

Browse files
feat: add histogram metric for file access analysis (openobserve#6982)
This histogram metric should allow understanding the age of files handled by querier. Example queries `histogram_quantile(0.95, rate(zo_file_access_time_bucket{stream_type='logs'}[5m]))` --------- Co-authored-by: Hengfei Yang <[email protected]>
1 parent 63d6db9 commit ba6ba30

File tree

2 files changed

+47
-1
lines changed

2 files changed

+47
-1
lines changed

src/config/src/metrics.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -852,6 +852,21 @@ pub static FILE_DOWNLOADER_PRIORITY_QUEUE_SIZE: Lazy<IntGaugeVec> = Lazy::new(||
852852
.expect("Metric created")
853853
});
854854

855+
// File access time bucket histogram
856+
pub static FILE_ACCESS_TIME: Lazy<HistogramVec> = Lazy::new(|| {
857+
HistogramVec::new(
858+
HistogramOpts::new(
859+
"file_access_time",
860+
"Histogram showing query counts within time windows from 1h to 1week (1h, 2h, 3h, 6h, 12h, 24h, 48h, 96h, 168h)"
861+
)
862+
.namespace(NAMESPACE)
863+
.buckets(vec![1.0, 2.0, 3.0, 6.0, 12.0, 24.0, 48.0, 96.0, 168.0])
864+
.const_labels(create_const_labels()),
865+
&["stream_type"],
866+
)
867+
.expect("Metric created")
868+
});
869+
855870
fn register_metrics(registry: &Registry) {
856871
// http latency
857872
registry
@@ -1080,6 +1095,9 @@ fn register_metrics(registry: &Registry) {
10801095
registry
10811096
.register(Box::new(FILE_DOWNLOADER_PRIORITY_QUEUE_SIZE.clone()))
10821097
.expect("Metric registered");
1098+
registry
1099+
.register(Box::new(FILE_ACCESS_TIME.clone()))
1100+
.expect("Metric registered");
10831101
}
10841102

10851103
fn create_const_labels() -> HashMap<String, String> {

src/service/search/grpc/storage.rs

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,8 @@ pub async fn cache_files(
411411
// check how many files already cached
412412
let mut cached_files = HashSet::with_capacity(files.len());
413413
let (mut cache_hits, mut cache_misses) = (0, 0);
414-
for (_id, _account, file, _size, _ts) in files.iter() {
414+
415+
for (_id, _account, file, _size, max_ts) in files.iter() {
415416
if file_data::memory::exist(file).await {
416417
scan_stats.querier_memory_cached_files += 1;
417418
cached_files.insert(file);
@@ -422,8 +423,35 @@ pub async fn cache_files(
422423
cache_hits += 1;
423424
} else {
424425
cache_misses += 1;
426+
};
427+
428+
// Record file access metrics
429+
let stream_type = if file_type == "index" {
430+
config::meta::stream::StreamType::Index
431+
} else {
432+
// Determine stream type from the file path
433+
if file.contains("/logs/") {
434+
config::meta::stream::StreamType::Logs
435+
} else if file.contains("/metrics/") {
436+
config::meta::stream::StreamType::Metrics
437+
} else if file.contains("/traces/") {
438+
config::meta::stream::StreamType::Traces
439+
} else {
440+
config::meta::stream::StreamType::Logs // Default
441+
}
442+
};
443+
444+
let current_time = chrono::Utc::now().timestamp_micros();
445+
let file_age_seconds = (current_time - max_ts) / 1_000_000;
446+
let file_age_hours = file_age_seconds as f64 / 3600.0;
447+
448+
if file_age_hours > 0.0 {
449+
config::metrics::FILE_ACCESS_TIME
450+
.with_label_values(&[&stream_type.to_string()])
451+
.observe(file_age_hours);
425452
}
426453
}
454+
427455
let files_num = files.len() as i64;
428456
if files_num == scan_stats.querier_memory_cached_files + scan_stats.querier_disk_cached_files {
429457
// all files are cached

0 commit comments

Comments
 (0)