Skip to content

Commit b6e89c6

Browse files
committed
Improve search error logs and metrics
1 parent 0358ca3 commit b6e89c6

File tree

3 files changed

+85
-56
lines changed

3 files changed

+85
-56
lines changed

quickwit/quickwit-search/src/leaf.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1177,7 +1177,7 @@ impl CanSplitDoBetter {
11771177

11781178
/// Searches multiple splits, potentially in multiple indexes, sitting on different storages and
11791179
/// having different doc mappings.
1180-
#[instrument(skip_all, fields(index = ?leaf_search_request.search_request.as_ref().unwrap().index_id_patterns))]
1180+
#[instrument(skip_all, fields(index = ?PrettySample::new(&leaf_search_request.search_request.as_ref().unwrap().index_id_patterns, 5)))]
11811181
pub async fn multi_index_leaf_search(
11821182
searcher_context: Arc<SearcherContext>,
11831183
leaf_search_request: LeafSearchRequest,

quickwit/quickwit-search/src/metrics_trackers.rs

Lines changed: 80 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -19,56 +19,81 @@ use std::task::{Context, Poll, ready};
1919
use std::time::Instant;
2020

2121
use pin_project::{pin_project, pinned_drop};
22-
use quickwit_proto::search::LeafSearchResponse;
22+
use quickwit_proto::search::{LeafSearchResponse, SearchResponse};
2323

2424
use crate::SearchError;
2525
use crate::metrics::SEARCH_METRICS;
2626

27-
// root
27+
// planning
2828

29-
pub enum RootSearchMetricsStep {
30-
Plan,
31-
Exec { num_targeted_splits: usize },
29+
/// Wrapper around the plan future to tracks error/cancellation metrics.
30+
/// Planning phase success isn't explicitely recorded as it can be deduced from
31+
/// the search phase metrics.
32+
#[pin_project(PinnedDrop)]
33+
pub struct SearchPlanMetricsFuture<F> {
34+
#[pin]
35+
pub tracked: F,
36+
pub start: Instant,
37+
pub is_success: Option<bool>,
38+
}
39+
40+
#[pinned_drop]
41+
impl<F> PinnedDrop for SearchPlanMetricsFuture<F> {
42+
fn drop(self: Pin<&mut Self>) {
43+
let status = match self.is_success {
44+
// this is a partial success, actual status will be recorded during the search step
45+
Some(true) => return,
46+
Some(false) => "plan-error",
47+
None => "plan-cancelled",
48+
};
49+
50+
let label_values = [status];
51+
SEARCH_METRICS
52+
.root_search_requests_total
53+
.with_label_values(label_values)
54+
.inc();
55+
SEARCH_METRICS
56+
.root_search_request_duration_seconds
57+
.with_label_values(label_values)
58+
.observe(self.start.elapsed().as_secs_f64());
59+
}
3260
}
3361

34-
/// Wrapper around the plan and search futures to track metrics.
62+
impl<F, R> Future for SearchPlanMetricsFuture<F>
63+
where F: Future<Output = crate::Result<R>>
64+
{
65+
type Output = crate::Result<R>;
66+
67+
fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
68+
let this = self.project();
69+
let response = ready!(this.tracked.poll(cx));
70+
if let Err(err) = &response {
71+
tracing::error!(?err, "root search planning failed");
72+
}
73+
*this.is_success = Some(response.is_ok());
74+
Poll::Ready(Ok(response?))
75+
}
76+
}
77+
78+
// root search
79+
80+
/// Wrapper around the root search futures to track metrics.
3581
#[pin_project(PinnedDrop)]
3682
pub struct RootSearchMetricsFuture<F> {
3783
#[pin]
3884
pub tracked: F,
3985
pub start: Instant,
40-
pub step: RootSearchMetricsStep,
41-
pub is_success: Option<bool>,
86+
pub num_targeted_splits: usize,
87+
pub status: Option<&'static str>,
4288
}
4389

4490
#[pinned_drop]
4591
impl<F> PinnedDrop for RootSearchMetricsFuture<F> {
4692
fn drop(self: Pin<&mut Self>) {
47-
let (num_targeted_splits, status) = match (&self.step, self.is_success) {
48-
// is is a partial success, actual success is recorded during the search step
49-
(RootSearchMetricsStep::Plan, Some(true)) => return,
50-
(RootSearchMetricsStep::Plan, Some(false)) => (0, "plan-error"),
51-
(RootSearchMetricsStep::Plan, None) => (0, "plan-cancelled"),
52-
(
53-
RootSearchMetricsStep::Exec {
54-
num_targeted_splits,
55-
},
56-
Some(true),
57-
) => (*num_targeted_splits, "success"),
58-
(
59-
RootSearchMetricsStep::Exec {
60-
num_targeted_splits,
61-
},
62-
Some(false),
63-
) => (*num_targeted_splits, "error"),
64-
(
65-
RootSearchMetricsStep::Exec {
66-
num_targeted_splits,
67-
},
68-
None,
69-
) => (*num_targeted_splits, "cancelled"),
93+
let status = match self.status {
94+
Some(val) => val,
95+
None => "cancelled",
7096
};
71-
7297
let label_values = [status];
7398
SEARCH_METRICS
7499
.root_search_requests_total
@@ -81,30 +106,39 @@ impl<F> PinnedDrop for RootSearchMetricsFuture<F> {
81106
SEARCH_METRICS
82107
.root_search_targeted_splits
83108
.with_label_values(label_values)
84-
.observe(num_targeted_splits as f64);
109+
.observe(self.num_targeted_splits as f64);
85110
}
86111
}
87112

88-
impl<F, R, E> Future for RootSearchMetricsFuture<F>
89-
where F: Future<Output = Result<R, E>>
113+
impl<F> Future for RootSearchMetricsFuture<F>
114+
where F: Future<Output = crate::Result<SearchResponse>>
90115
{
91-
type Output = Result<R, E>;
116+
type Output = crate::Result<SearchResponse>;
92117

93118
fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
94119
let this = self.project();
95120
let response = ready!(this.tracked.poll(cx));
96-
*this.is_success = Some(response.is_ok());
121+
if let Err(err) = &response {
122+
tracing::error!(?err, "root search failed");
123+
}
124+
if let Ok(resp) = &response {
125+
if resp.failed_splits.is_empty() {
126+
*this.status = Some("success");
127+
} else {
128+
*this.status = Some("partial-success");
129+
}
130+
} else {
131+
*this.status = Some("error");
132+
}
97133
Poll::Ready(Ok(response?))
98134
}
99135
}
100136

101-
// leaf
137+
// leaf search
102138

103139
/// Wrapper around the search future to track metrics.
104140
#[pin_project(PinnedDrop)]
105-
pub struct LeafSearchMetricsFuture<F>
106-
where F: Future<Output = Result<LeafSearchResponse, SearchError>>
107-
{
141+
pub struct LeafSearchMetricsFuture<F> {
108142
#[pin]
109143
pub tracked: F,
110144
pub start: Instant,
@@ -113,9 +147,7 @@ where F: Future<Output = Result<LeafSearchResponse, SearchError>>
113147
}
114148

115149
#[pinned_drop]
116-
impl<F> PinnedDrop for LeafSearchMetricsFuture<F>
117-
where F: Future<Output = Result<LeafSearchResponse, SearchError>>
118-
{
150+
impl<F> PinnedDrop for LeafSearchMetricsFuture<F> {
119151
fn drop(self: Pin<&mut Self>) {
120152
let label_values = [self.status.unwrap_or("cancelled")];
121153
SEARCH_METRICS
@@ -141,10 +173,10 @@ where F: Future<Output = Result<LeafSearchResponse, SearchError>>
141173
fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
142174
let this = self.project();
143175
let response = ready!(this.tracked.poll(cx));
144-
*this.status = if response.is_ok() {
145-
Some("success")
146-
} else {
147-
Some("error")
176+
*this.status = match &response {
177+
Ok(resp) if !resp.failed_splits.is_empty() => Some("partial-success"),
178+
Ok(_) => Some("success"),
179+
Err(_) => Some("error"),
148180
};
149181
Poll::Ready(Ok(response?))
150182
}

quickwit/quickwit-search/src/root.rs

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ use tracing::{debug, info, info_span, instrument};
4949

5050
use crate::cluster_client::ClusterClient;
5151
use crate::collector::{QuickwitAggregations, make_merge_collector};
52-
use crate::metrics_trackers::{RootSearchMetricsFuture, RootSearchMetricsStep};
52+
use crate::metrics_trackers::{RootSearchMetricsFuture, SearchPlanMetricsFuture};
5353
use crate::scroll_context::{ScrollContext, ScrollKeyAndStartOffset};
5454
use crate::search_job_placer::{Job, group_by, group_jobs_by_index_id};
5555
use crate::search_response_rest::StorageRequestCount;
@@ -1201,11 +1201,10 @@ pub async fn root_search(
12011201
) -> crate::Result<SearchResponse> {
12021202
let start_instant = Instant::now();
12031203

1204-
let (split_metadatas, indexes_meta_for_leaf_search) = RootSearchMetricsFuture {
1204+
let (split_metadatas, indexes_meta_for_leaf_search) = SearchPlanMetricsFuture {
12051205
start: start_instant,
12061206
tracked: plan_splits_for_root_search(&mut search_request, &mut metastore),
12071207
is_success: None,
1208-
step: RootSearchMetricsStep::Plan,
12091208
}
12101209
.await?;
12111210

@@ -1233,10 +1232,8 @@ pub async fn root_search(
12331232
split_metadatas,
12341233
cluster_client,
12351234
),
1236-
is_success: None,
1237-
step: RootSearchMetricsStep::Exec {
1238-
num_targeted_splits: num_splits,
1239-
},
1235+
status: None,
1236+
num_targeted_splits: num_splits,
12401237
}
12411238
.await;
12421239

0 commit comments

Comments
 (0)