@@ -32,8 +32,11 @@ def run_extraction(record_dict_list):
3232
3333def run_checks (progress ):
3434 if progress :
35- print (f"Tokenization is still in progress. Currently { progress } % done." )
36- print ("Function will run with reduced set." )
35+ print (
36+ f"Tokenization is still in progress. Currently { progress } % done." ,
37+ flush = True ,
38+ )
39+ print ("Function will run with reduced set." , flush = True )
3740
3841
3942# https://www.delftstack.com/howto/python/python-split-list-into-chunks/#split-list-in-python-to-chunks-using-the-lambda-islice-method
@@ -72,7 +75,7 @@ def parse_data_to_record_dict(record_chunk):
7275if __name__ == "__main__" :
7376 _ , progress , iso2_code , payload_url = sys .argv
7477 run_checks (progress )
75- print ("Preparing data for labeling function." )
78+ print ("Preparing data for labeling function." , flush = True )
7679 # This import statement will always be highlighted as a potential error, as during devtime,
7780 # the script `labeling_functions` does not exist. It will be inserted at runtime
7881 from labeling_functions import lf
@@ -83,7 +86,7 @@ def parse_data_to_record_dict(record_chunk):
8386 docbin_data = json .load (infile )
8487
8588 is_extraction = inspect .isgeneratorfunction (lf )
86- print ("Running labeling function." )
89+ print ("Running labeling function." , flush = True )
8790 workload = len (docbin_data )
8891 lf_results_by_record_id = {}
8992 chunk_size = 100
@@ -93,6 +96,8 @@ def parse_data_to_record_dict(record_chunk):
9396 lf_results_by_record_id .update (run_extraction (record_dict_list ))
9497 else :
9598 lf_results_by_record_id .update (run_classification (record_dict_list ))
99+ progress = (idx * chunk_size ) / workload
100+ print ("progress: " , progress , flush = True )
96101
97- print ("Finished execution." )
102+ print ("Finished execution." , flush = True )
98103 requests .put (payload_url , json = lf_results_by_record_id )
0 commit comments