@@ -55,6 +55,8 @@ def __check_data_type_text(attr_value):
5555 return False
5656 return True
5757
58+ def __print_progress (progress : float ) -> None :
59+ print (f"progress: { progress } " , flush = True )
5860
5961def load_data_dict (record ):
6062 if record ["bytes" ][:2 ] == "\\ x" :
@@ -66,7 +68,7 @@ def load_data_dict(record):
6668 doc_bin_loaded = DocBin ().from_bytes (byte )
6769 docs = list (doc_bin_loaded .get_docs (vocab ))
6870 data_dict = {}
69- for ( col , doc ) in zip (record ["columns" ], docs ):
71+ for col , doc in zip (record ["columns" ], docs ):
7072 data_dict [col ] = doc
7173
7274 for key in record :
@@ -103,15 +105,23 @@ def parse_data_to_record_dict(record_chunk):
103105
104106 print ("Running attribute calculation." )
105107 calculated_attribute_by_record_id = {}
108+ idx = 0
109+ progress_size = 100
110+ amount = len (record_dict_list )
111+ __print_progress (0.0 )
106112 for record_dict in record_dict_list :
113+ idx += 1
114+ if idx % progress_size == 0 :
115+ progress = round (idx / amount , 2 )
116+ __print_progress (progress )
107117 attr_value = ac (record_dict ["data" ])
108118 if not check_data_type (attr_value ):
109119 raise ValueError (
110120 f"Attribute value `{ attr_value } ` is of type { type (attr_value )} , "
111121 f"but data_type { data_type } requires "
112122 f"{ str (py_data_types ) if len (py_data_types ) > 1 else str (py_data_types [0 ])} ."
113123 )
114- calculated_attribute_by_record_id [record_dict ["id" ]] = ac ( record_dict [ "data" ])
115-
124+ calculated_attribute_by_record_id [record_dict ["id" ]] = attr_value
125+ __print_progress ( 1.0 )
116126 print ("Finished execution." )
117127 requests .put (payload_url , json = calculated_attribute_by_record_id )
0 commit comments