diff --git a/quickwit/quickwit-search/src/leaf.rs b/quickwit/quickwit-search/src/leaf.rs index f8aa60cf98f..2a99de4287a 100644 --- a/quickwit/quickwit-search/src/leaf.rs +++ b/quickwit/quickwit-search/src/leaf.rs @@ -1177,7 +1177,7 @@ impl CanSplitDoBetter { /// Searches multiple splits, potentially in multiple indexes, sitting on different storages and /// having different doc mappings. -#[instrument(skip_all, fields(index = ?leaf_search_request.search_request.as_ref().unwrap().index_id_patterns))] +#[instrument(skip_all, fields(index = ?PrettySample::new(&leaf_search_request.search_request.as_ref().unwrap().index_id_patterns, 5)))] pub async fn multi_index_leaf_search( searcher_context: Arc, leaf_search_request: LeafSearchRequest, diff --git a/quickwit/quickwit-search/src/metrics_trackers.rs b/quickwit/quickwit-search/src/metrics_trackers.rs index 7f2f9fbbfb3..adf7f93929f 100644 --- a/quickwit/quickwit-search/src/metrics_trackers.rs +++ b/quickwit/quickwit-search/src/metrics_trackers.rs @@ -19,56 +19,78 @@ use std::task::{Context, Poll, ready}; use std::time::Instant; use pin_project::{pin_project, pinned_drop}; -use quickwit_proto::search::LeafSearchResponse; +use quickwit_proto::search::{LeafSearchResponse, SearchResponse}; use crate::SearchError; use crate::metrics::SEARCH_METRICS; -// root +// planning -pub enum RootSearchMetricsStep { - Plan, - Exec { num_targeted_splits: usize }, +/// Wrapper around the plan future to tracks error/cancellation metrics. +/// Planning phase success isn't explicitely recorded as it can be deduced from +/// the search phase metrics. +#[pin_project(PinnedDrop)] +pub struct SearchPlanMetricsFuture { + #[pin] + pub tracked: F, + pub start: Instant, + pub is_success: Option, +} + +#[pinned_drop] +impl PinnedDrop for SearchPlanMetricsFuture { + fn drop(self: Pin<&mut Self>) { + let status = match self.is_success { + // this is a partial success, actual status will be recorded during the search step + Some(true) => return, + Some(false) => "plan-error", + None => "plan-cancelled", + }; + + let label_values = [status]; + SEARCH_METRICS + .root_search_requests_total + .with_label_values(label_values) + .inc(); + SEARCH_METRICS + .root_search_request_duration_seconds + .with_label_values(label_values) + .observe(self.start.elapsed().as_secs_f64()); + } } -/// Wrapper around the plan and search futures to track metrics. +impl Future for SearchPlanMetricsFuture +where F: Future> +{ + type Output = crate::Result; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let this = self.project(); + let response = ready!(this.tracked.poll(cx)); + if let Err(err) = &response { + tracing::error!(?err, "root search planning failed"); + } + *this.is_success = Some(response.is_ok()); + Poll::Ready(Ok(response?)) + } +} + +// root search + +/// Wrapper around the root search futures to track metrics. #[pin_project(PinnedDrop)] pub struct RootSearchMetricsFuture { #[pin] pub tracked: F, pub start: Instant, - pub step: RootSearchMetricsStep, - pub is_success: Option, + pub num_targeted_splits: usize, + pub status: Option<&'static str>, } #[pinned_drop] impl PinnedDrop for RootSearchMetricsFuture { fn drop(self: Pin<&mut Self>) { - let (num_targeted_splits, status) = match (&self.step, self.is_success) { - // is is a partial success, actual success is recorded during the search step - (RootSearchMetricsStep::Plan, Some(true)) => return, - (RootSearchMetricsStep::Plan, Some(false)) => (0, "plan-error"), - (RootSearchMetricsStep::Plan, None) => (0, "plan-cancelled"), - ( - RootSearchMetricsStep::Exec { - num_targeted_splits, - }, - Some(true), - ) => (*num_targeted_splits, "success"), - ( - RootSearchMetricsStep::Exec { - num_targeted_splits, - }, - Some(false), - ) => (*num_targeted_splits, "error"), - ( - RootSearchMetricsStep::Exec { - num_targeted_splits, - }, - None, - ) => (*num_targeted_splits, "cancelled"), - }; - + let status = self.status.unwrap_or("cancelled"); let label_values = [status]; SEARCH_METRICS .root_search_requests_total @@ -81,30 +103,39 @@ impl PinnedDrop for RootSearchMetricsFuture { SEARCH_METRICS .root_search_targeted_splits .with_label_values(label_values) - .observe(num_targeted_splits as f64); + .observe(self.num_targeted_splits as f64); } } -impl Future for RootSearchMetricsFuture -where F: Future> +impl Future for RootSearchMetricsFuture +where F: Future> { - type Output = Result; + type Output = crate::Result; fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { let this = self.project(); let response = ready!(this.tracked.poll(cx)); - *this.is_success = Some(response.is_ok()); + if let Err(err) = &response { + tracing::error!(?err, "root search failed"); + } + if let Ok(resp) = &response { + if resp.failed_splits.is_empty() { + *this.status = Some("success"); + } else { + *this.status = Some("partial-success"); + } + } else { + *this.status = Some("error"); + } Poll::Ready(Ok(response?)) } } -// leaf +// leaf search /// Wrapper around the search future to track metrics. #[pin_project(PinnedDrop)] -pub struct LeafSearchMetricsFuture -where F: Future> -{ +pub struct LeafSearchMetricsFuture { #[pin] pub tracked: F, pub start: Instant, @@ -113,9 +144,7 @@ where F: Future> } #[pinned_drop] -impl PinnedDrop for LeafSearchMetricsFuture -where F: Future> -{ +impl PinnedDrop for LeafSearchMetricsFuture { fn drop(self: Pin<&mut Self>) { let label_values = [self.status.unwrap_or("cancelled")]; SEARCH_METRICS @@ -141,10 +170,10 @@ where F: Future> fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { let this = self.project(); let response = ready!(this.tracked.poll(cx)); - *this.status = if response.is_ok() { - Some("success") - } else { - Some("error") + *this.status = match &response { + Ok(resp) if !resp.failed_splits.is_empty() => Some("partial-success"), + Ok(_) => Some("success"), + Err(_) => Some("error"), }; Poll::Ready(Ok(response?)) } diff --git a/quickwit/quickwit-search/src/root.rs b/quickwit/quickwit-search/src/root.rs index c2225b7524e..483c401d178 100644 --- a/quickwit/quickwit-search/src/root.rs +++ b/quickwit/quickwit-search/src/root.rs @@ -49,7 +49,7 @@ use tracing::{debug, info, info_span, instrument}; use crate::cluster_client::ClusterClient; use crate::collector::{QuickwitAggregations, make_merge_collector}; -use crate::metrics_trackers::{RootSearchMetricsFuture, RootSearchMetricsStep}; +use crate::metrics_trackers::{RootSearchMetricsFuture, SearchPlanMetricsFuture}; use crate::scroll_context::{ScrollContext, ScrollKeyAndStartOffset}; use crate::search_job_placer::{Job, group_by, group_jobs_by_index_id}; use crate::search_response_rest::StorageRequestCount; @@ -1201,11 +1201,10 @@ pub async fn root_search( ) -> crate::Result { let start_instant = Instant::now(); - let (split_metadatas, indexes_meta_for_leaf_search) = RootSearchMetricsFuture { + let (split_metadatas, indexes_meta_for_leaf_search) = SearchPlanMetricsFuture { start: start_instant, tracked: plan_splits_for_root_search(&mut search_request, &mut metastore), is_success: None, - step: RootSearchMetricsStep::Plan, } .await?; @@ -1233,10 +1232,8 @@ pub async fn root_search( split_metadatas, cluster_client, ), - is_success: None, - step: RootSearchMetricsStep::Exec { - num_targeted_splits: num_splits, - }, + status: None, + num_targeted_splits: num_splits, } .await;