@@ -27,7 +27,7 @@ use futures::{Future, StreamExt};
2727use itertools:: Itertools ;
2828use quickwit_common:: metrics:: IntCounter ;
2929use quickwit_common:: pretty:: PrettySample ;
30- use quickwit_common:: Progress ;
30+ use quickwit_common:: { rate_limited_info , Progress } ;
3131use quickwit_metastore:: {
3232 ListSplitsQuery , ListSplitsRequestExt , MetastoreServiceStreamSplitsExt , SplitInfo ,
3333 SplitMetadata , SplitState ,
@@ -122,8 +122,8 @@ pub async fn run_garbage_collect(
122122
123123 let index_uids: Vec < IndexUid > = indexes. keys ( ) . cloned ( ) . collect ( ) ;
124124
125- let Some ( list_splits_query_for_index_uids ) =
126- ListSplitsQuery :: try_from_index_uids ( index_uids. clone ( ) )
125+ // TODO maybe we want to do a ListSplitsQuery::for_all_indexes and post-filter ourselves here
126+ let Some ( list_splits_query_for_index_uids ) = ListSplitsQuery :: try_from_index_uids ( index_uids)
127127 else {
128128 return Ok ( SplitRemovalInfo :: default ( ) ) ;
129129 } ;
@@ -187,7 +187,6 @@ pub async fn run_garbage_collect(
187187 OffsetDateTime :: now_utc ( ) . unix_timestamp ( ) - deletion_grace_period. as_secs ( ) as i64 ;
188188
189189 Ok ( delete_splits_marked_for_deletion_several_indexes (
190- index_uids,
191190 updated_before_timestamp,
192191 metastore,
193192 indexes,
@@ -221,20 +220,15 @@ async fn delete_splits(
221220 )
222221 . await
223222 } else {
224- error ! (
225- "we are trying to GC without knowing the storage, this shouldn't \
226- happen"
223+ // in practice this can happen if the index was created between the start of
224+ // the run and now, and one of its splits has already expired, which likely
225+ // means a very long gc run, or if we run gc on a single index from the cli.
226+ quickwit_common:: rate_limited_warn!(
227+ limit_per_min = 2 ,
228+ index_uid=%index_uid,
229+ "we are trying to GC without knowing the storage" ,
227230 ) ;
228- Err ( DeleteSplitsError {
229- successes : Vec :: new ( ) ,
230- storage_error : None ,
231- storage_failures : splits_metadata_to_delete
232- . into_iter ( )
233- . map ( |split| split. as_split_info ( ) )
234- . collect ( ) ,
235- metastore_error : None ,
236- metastore_failures : Vec :: new ( ) ,
237- } )
231+ Ok ( Vec :: new ( ) )
238232 }
239233 }
240234 } )
@@ -304,11 +298,12 @@ async fn list_splits_metadata(
304298/// Removes any splits marked for deletion which haven't been
305299/// updated after `updated_before_timestamp` in batches of 1000 splits.
306300///
301+ /// Only splits from index_uids in the `storages` map will be deleted.
302+ ///
307303/// The aim of this is to spread the load out across a longer period
308304/// rather than short, heavy bursts on the metastore and storage system itself.
309- #[ instrument( skip( index_uids , storages, metastore, progress_opt, metrics) , fields( num_indexes=%index_uids . len( ) ) ) ]
305+ #[ instrument( skip( storages, metastore, progress_opt, metrics) , fields( num_indexes=%storages . len( ) ) ) ]
310306async fn delete_splits_marked_for_deletion_several_indexes (
311- index_uids : Vec < IndexUid > ,
312307 updated_before_timestamp : i64 ,
313308 metastore : MetastoreServiceClient ,
314309 storages : HashMap < IndexUid , Arc < dyn Storage > > ,
@@ -317,18 +312,22 @@ async fn delete_splits_marked_for_deletion_several_indexes(
317312) -> SplitRemovalInfo {
318313 let mut split_removal_info = SplitRemovalInfo :: default ( ) ;
319314
320- let Some ( list_splits_query) = ListSplitsQuery :: try_from_index_uids ( index_uids) else {
321- error ! ( "failed to create list splits query. this should never happen" ) ;
322- return split_removal_info;
323- } ;
315+ // we ask for all indexes because the query is more efficient and we almost always want all
316+ // indexes anyway. The exception is when garbage collecting a single index from the commandline.
317+ // In this case, we will log a bunch of warn. i (trinity) consider it worth the more generic
318+ // code which needs fewer special case while testing, but we could check index_uids len if we
319+ // think it's a better idea.
320+ let list_splits_query = ListSplitsQuery :: for_all_indexes ( ) ;
324321
325322 let mut list_splits_query = list_splits_query
326323 . with_split_state ( SplitState :: MarkedForDeletion )
327324 . with_update_timestamp_lte ( updated_before_timestamp)
328325 . with_limit ( DELETE_SPLITS_BATCH_SIZE )
329326 . sort_by_index_uid ( ) ;
330327
331- loop {
328+ let mut splits_to_delete_possibly_remaining = true ;
329+
330+ while splits_to_delete_possibly_remaining {
332331 let splits_metadata_to_delete: Vec < SplitMetadata > = match protect_future (
333332 progress_opt,
334333 list_splits_metadata ( & metastore, & list_splits_query) ,
@@ -342,19 +341,32 @@ async fn delete_splits_marked_for_deletion_several_indexes(
342341 }
343342 } ;
344343
344+ // We page through the list of splits to delete using a limit and a `search_after` trick.
345+ // To detect if this is the last page, we check if the number of splits is less than the
346+ // limit.
347+ assert ! ( splits_metadata_to_delete. len( ) <= DELETE_SPLITS_BATCH_SIZE ) ;
348+ splits_to_delete_possibly_remaining =
349+ splits_metadata_to_delete. len ( ) == DELETE_SPLITS_BATCH_SIZE ;
350+
345351 // set split after which to search for the next loop
346352 let Some ( last_split_metadata) = splits_metadata_to_delete. last ( ) else {
347353 break ;
348354 } ;
349355 list_splits_query = list_splits_query. after_split ( last_split_metadata) ;
350356
351- let num_splits_to_delete = splits_metadata_to_delete. len ( ) ;
357+ let mut splits_metadata_to_delete_per_index: HashMap < IndexUid , Vec < SplitMetadata > > =
358+ HashMap :: with_capacity ( storages. len ( ) ) ;
352359
353- let splits_metadata_to_delete_per_index: HashMap < IndexUid , Vec < SplitMetadata > > =
354- splits_metadata_to_delete
355- . into_iter ( )
356- . map ( |meta| ( meta. index_uid . clone ( ) , meta) )
357- . into_group_map ( ) ;
360+ for meta in splits_metadata_to_delete {
361+ if !storages. contains_key ( & meta. index_uid ) {
362+ rate_limited_info ! ( limit_per_min=6 , index_uid=?meta. index_uid, "split not listed in storage map: skipping" ) ;
363+ continue ;
364+ }
365+ splits_metadata_to_delete_per_index
366+ . entry ( meta. index_uid . clone ( ) )
367+ . or_default ( )
368+ . push ( meta) ;
369+ }
358370
359371 // ignore return we continue either way
360372 let _: Result < ( ) , ( ) > = delete_splits (
@@ -366,12 +378,6 @@ async fn delete_splits_marked_for_deletion_several_indexes(
366378 & mut split_removal_info,
367379 )
368380 . await ;
369-
370- if num_splits_to_delete < DELETE_SPLITS_BATCH_SIZE {
371- // stop the gc if this was the last batch
372- // we are guaranteed to make progress due to .after_split()
373- break ;
374- }
375381 }
376382
377383 split_removal_info
0 commit comments