@@ -729,6 +729,7 @@ def convert(
729
729
vcfs , out_path , * , column_chunk_size = 16 , worker_processes = 1 , show_progress = False
730
730
):
731
731
out_path = pathlib .Path (out_path )
732
+ # TODO make scan work in parallel using general progress code too
732
733
vcf_metadata = scan_vcfs (vcfs , show_progress = show_progress )
733
734
pcvcf = PickleChunkedVcf (out_path , vcf_metadata )
734
735
pcvcf .mkdirs ()
@@ -741,29 +742,16 @@ def convert(
741
742
f"Exploding { pcvcf .num_columns } columns { total_variants } variants "
742
743
f"{ pcvcf .num_samples } samples"
743
744
)
744
- global progress_counter
745
- progress_counter = multiprocessing .Value ("Q" , 0 )
746
-
747
- # start update progress bar process
748
- bar_thread = None
745
+ progress_config = None
749
746
if show_progress :
750
- bar_thread = threading .Thread (
751
- target = update_bar ,
752
- args = (progress_counter , total_variants , "Explode" , "vars" ),
753
- name = "progress" ,
754
- daemon = True ,
755
- )
756
- bar_thread .start ()
747
+ progress_config = core .ProgressConfig (
748
+ total = total_variants , units = "vars" , title = "Explode" )
749
+ with core .ParallelWorkManager (worker_processes , progress_config ) as pwm :
757
750
758
- with cf .ProcessPoolExecutor (
759
- max_workers = worker_processes ,
760
- initializer = init_workers ,
761
- initargs = (progress_counter ,),
762
- ) as executor :
763
751
futures = []
764
752
for j , partition in enumerate (vcf_metadata .partitions ):
765
753
futures .append (
766
- executor .submit (
754
+ pwm . executor .submit (
767
755
PickleChunkedVcf .convert_partition ,
768
756
vcf_metadata ,
769
757
j ,
@@ -775,9 +763,43 @@ def convert(
775
763
future .result () for future in cf .as_completed (futures )
776
764
]
777
765
778
- assert progress_counter .value == total_variants
779
- if bar_thread is not None :
780
- bar_thread .join ()
766
+ # global progress_counter
767
+ # progress_counter = multiprocessing.Value("Q", 0)
768
+
769
+ # # start update progress bar process
770
+ # bar_thread = None
771
+ # if show_progress:
772
+ # bar_thread = threading.Thread(
773
+ # target=update_bar,
774
+ # args=(progress_counter, total_variants, "Explode", "vars"),
775
+ # name="progress",
776
+ # daemon=True,
777
+ # )
778
+ # bar_thread.start()
779
+
780
+ # with cf.ProcessPoolExecutor(
781
+ # max_workers=worker_processes,
782
+ # initializer=init_workers,
783
+ # initargs=(progress_counter,),
784
+ # ) as executor:
785
+ # futures = []
786
+ # for j, partition in enumerate(vcf_metadata.partitions):
787
+ # futures.append(
788
+ # executor.submit(
789
+ # PickleChunkedVcf.convert_partition,
790
+ # vcf_metadata,
791
+ # j,
792
+ # out_path,
793
+ # column_chunk_size=column_chunk_size,
794
+ # )
795
+ # )
796
+ # partition_summaries = [
797
+ # future.result() for future in cf.as_completed(futures)
798
+ # ]
799
+
800
+ # assert progress_counter.value == total_variants
801
+ # if bar_thread is not None:
802
+ # bar_thread.join()
781
803
782
804
for field in vcf_metadata .fields :
783
805
for summary in partition_summaries :
@@ -862,11 +884,11 @@ def service_futures(max_waiting=2 * flush_threads):
862
884
863
885
service_futures ()
864
886
887
+
865
888
# Note: an issue with updating the progress per variant here like this
866
889
# is that we get a significant pause at the end of the counter while
867
890
# all the "small" fields get flushed. Possibly not much to be done about it.
868
- with progress_counter .get_lock ():
869
- progress_counter .value += 1
891
+ core .update_progress (1 )
870
892
871
893
for col in columns .values ():
872
894
col .flush ()
@@ -876,21 +898,21 @@ def service_futures(max_waiting=2 * flush_threads):
876
898
return summaries
877
899
878
900
879
- def update_bar (progress_counter , total , title , units ):
880
- pbar = tqdm .tqdm (
881
- total = total , desc = title , unit_scale = True , unit = units , smoothing = 0.1
882
- )
901
+ # def update_bar(progress_counter, total, title, units):
902
+ # pbar = tqdm.tqdm(
903
+ # total=total, desc=title, unit_scale=True, unit=units, smoothing=0.1
904
+ # )
883
905
884
- while (current := progress_counter .value ) < total :
885
- inc = current - pbar .n
886
- pbar .update (inc )
887
- time .sleep (0.1 )
888
- pbar .close ()
906
+ # while (current := progress_counter.value) < total:
907
+ # inc = current - pbar.n
908
+ # pbar.update(inc)
909
+ # time.sleep(0.1)
910
+ # pbar.close()
889
911
890
912
891
- def init_workers (counter ):
892
- global progress_counter
893
- progress_counter = counter
913
+ # def init_workers(counter):
914
+ # global progress_counter
915
+ # progress_counter = counter
894
916
895
917
896
918
def explode (
@@ -1418,7 +1440,9 @@ def convert_vcf(
1418
1440
)
1419
1441
1420
1442
1421
- def encode_bed_partition_genotypes (bed_path , zarr_path , start_variant , end_variant , encoder_threads = 8 ):
1443
+ def encode_bed_partition_genotypes (
1444
+ bed_path , zarr_path , start_variant , end_variant , encoder_threads = 8
1445
+ ):
1422
1446
bed = bed_reader .open_bed (bed_path , num_threads = 1 )
1423
1447
1424
1448
store = zarr .DirectoryStore (zarr_path )
@@ -1432,7 +1456,6 @@ def encode_bed_partition_genotypes(bed_path, zarr_path, start_variant, end_varia
1432
1456
buffered_arrays = [gt , gt_phased , gt_mask ]
1433
1457
1434
1458
with core .ThreadedZarrEncoder (buffered_arrays , encoder_threads ) as te :
1435
-
1436
1459
start = start_variant
1437
1460
while start < end_variant :
1438
1461
stop = min (start + chunk_length , end_variant )
0 commit comments