Skip to content

Commit a6fbff0

Browse files
JWittmeyerJWittmeyer
andauthored
Adds example progress implmentation (#19)
* Adds example progress implmentation * Flush prints --------- Co-authored-by: JWittmeyer <[email protected]>
1 parent 2761f0a commit a6fbff0

File tree

1 file changed

+10
-5
lines changed

1 file changed

+10
-5
lines changed

run_lf.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,11 @@ def run_extraction(record_dict_list):
3232

3333
def run_checks(progress):
3434
if progress:
35-
print(f"Tokenization is still in progress. Currently {progress}% done.")
36-
print("Function will run with reduced set.")
35+
print(
36+
f"Tokenization is still in progress. Currently {progress}% done.",
37+
flush=True,
38+
)
39+
print("Function will run with reduced set.", flush=True)
3740

3841

3942
# https://www.delftstack.com/howto/python/python-split-list-into-chunks/#split-list-in-python-to-chunks-using-the-lambda-islice-method
@@ -72,7 +75,7 @@ def parse_data_to_record_dict(record_chunk):
7275
if __name__ == "__main__":
7376
_, progress, iso2_code, payload_url = sys.argv
7477
run_checks(progress)
75-
print("Preparing data for labeling function.")
78+
print("Preparing data for labeling function.", flush=True)
7679
# This import statement will always be highlighted as a potential error, as during devtime,
7780
# the script `labeling_functions` does not exist. It will be inserted at runtime
7881
from labeling_functions import lf
@@ -83,7 +86,7 @@ def parse_data_to_record_dict(record_chunk):
8386
docbin_data = json.load(infile)
8487

8588
is_extraction = inspect.isgeneratorfunction(lf)
86-
print("Running labeling function.")
89+
print("Running labeling function.", flush=True)
8790
workload = len(docbin_data)
8891
lf_results_by_record_id = {}
8992
chunk_size = 100
@@ -93,6 +96,8 @@ def parse_data_to_record_dict(record_chunk):
9396
lf_results_by_record_id.update(run_extraction(record_dict_list))
9497
else:
9598
lf_results_by_record_id.update(run_classification(record_dict_list))
99+
progress = (idx * chunk_size) / workload
100+
print("progress: ", progress, flush=True)
96101

97-
print("Finished execution.")
102+
print("Finished execution.", flush=True)
98103
requests.put(payload_url, json=lf_results_by_record_id)

0 commit comments

Comments
 (0)