@@ -699,10 +699,11 @@ def __init__(self, workload, params, **kwargs):
699699 raise exceptions .InvalidSyntax ("'batch-size' must be numeric" )
700700
701701 self .ingest_percentage = self .float_param (params , name = "ingest-percentage" , default_value = 100 , min_value = 0 , max_value = 100 )
702+ self .looped = params .get ("looped" , False )
702703 self .param_source = PartitionBulkIndexParamSource (self .corpora , self .batch_size , self .bulk_size ,
703704 self .ingest_percentage , self .id_conflicts ,
704705 self .conflict_probability , self .on_conflict ,
705- self .recency , self .pipeline , self ._params )
706+ self .recency , self .pipeline , self .looped , self . _params )
706707
707708 def float_param (self , params , name , default_value , min_value , max_value , min_operator = operator .le ):
708709 try :
@@ -749,7 +750,7 @@ def params(self):
749750
750751class PartitionBulkIndexParamSource :
751752 def __init__ (self , corpora , batch_size , bulk_size , ingest_percentage , id_conflicts , conflict_probability ,
752- on_conflict , recency , pipeline = None , original_params = None ):
753+ on_conflict , recency , pipeline = None , looped = False , original_params = None ):
753754 """
754755
755756 :param corpora: Specification of affected document corpora.
@@ -762,6 +763,7 @@ def __init__(self, corpora, batch_size, bulk_size, ingest_percentage, id_conflic
762763 :param recency: A number between [0.0, 1.0] indicating whether to bias generation of conflicting ids towards more recent ones.
763764 May be None.
764765 :param pipeline: The name of the ingest pipeline to run.
766+ :param looped: Set to True for looped mode where bulk requests are repeated from the beginning when entire corpus was ingested.
765767 :param original_params: The original dict passed to the parent parameter source.
766768 """
767769 self .corpora = corpora
@@ -775,6 +777,7 @@ def __init__(self, corpora, batch_size, bulk_size, ingest_percentage, id_conflic
775777 self .on_conflict = on_conflict
776778 self .recency = recency
777779 self .pipeline = pipeline
780+ self .looped = looped
778781 self .original_params = original_params
779782 # this is only intended for unit-testing
780783 self .create_reader = original_params .pop ("__create_reader" , create_default_reader )
@@ -798,7 +801,11 @@ def params(self):
798801 # self.internal_params always reads all files. This is necessary to ensure we terminate early in case
799802 # the user has specified ingest percentage.
800803 if not self .streaming_ingestion and self .current_bulk == self .total_bulks :
801- raise StopIteration ()
804+ if self .looped :
805+ self .current_bulk = 0
806+ self ._init_internal_params ()
807+ else :
808+ raise StopIteration ()
802809 self .current_bulk += 1
803810 return next (self .internal_params )
804811
0 commit comments