@@ -890,6 +890,15 @@ def num_columns(self):
890
890
return len (self .columns )
891
891
892
892
893
+
894
+ def mkdir_with_progress (path ):
895
+ logger .debug (f"mkdir f{ path } " )
896
+ # NOTE we may have race-conditions here, I'm not sure. Hopefully allowing
897
+ # parents=True will take care of it.
898
+ path .mkdir (parents = True )
899
+ core .update_progress (1 )
900
+
901
+
893
902
class IntermediateColumnarFormatWriter :
894
903
def __init__ (self , path ):
895
904
self .path = pathlib .Path (path )
@@ -932,7 +941,7 @@ def init(
932
941
# dependencies as well.
933
942
self .metadata .provenance = {"source" : f"bio2zarr-{ provenance .__version__ } " }
934
943
935
- self .mkdirs (worker_processes )
944
+ self .mkdirs (worker_processes , show_progress = show_progress )
936
945
937
946
# Note: this is needed for the current version of the vcfzarr spec, but it's
938
947
# probably going to be dropped.
@@ -947,30 +956,30 @@ def init(
947
956
json .dump (self .metadata .asdict (), f , indent = 4 )
948
957
return self .num_partitions
949
958
950
- def mkdirs (self , worker_processes = 1 ):
951
- logger .info (
952
- f"Creating { len (self .metadata .fields ) * self .num_partitions } directories"
953
- )
959
+ def mkdirs (self , worker_processes = 1 , show_progress = False ):
960
+ num_dirs = len (self .metadata .fields ) * self .num_partitions
961
+ logger .info (f"Creating { num_dirs } directories" )
954
962
self .path .mkdir ()
955
963
self .wip_path .mkdir ()
956
964
# Due to high latency batch system filesystems, we create all the directories in
957
965
# parallel
958
966
progress_config = core .ProgressConfig (
959
- total = len ( self . metadata . fields ) * self . num_partitions ,
960
- units = "dir " ,
961
- title = "Creating directories " ,
962
- show = True
967
+ total = num_dirs ,
968
+ units = "dirs " ,
969
+ title = "Mkdirs " ,
970
+ show = show_progress ,
963
971
)
964
972
with core .ParallelWorkManager (
965
- worker_processes = worker_processes ,
966
- progress_config = progress_config
973
+ worker_processes = worker_processes , progress_config = progress_config
967
974
) as manager :
968
975
for field in self .metadata .fields :
969
976
col_path = get_vcf_field_path (self .path , field )
977
+ # Don't bother trying to count the intermediate directories towards
978
+ # progress
970
979
manager .submit (col_path .mkdir , parents = True )
971
980
for j in range (self .num_partitions ):
972
981
part_path = col_path / f"p{ j } "
973
- manager .submit (part_path . mkdir , parents = True )
982
+ manager .submit (mkdir_with_progress , part_path )
974
983
975
984
def load_partition_summaries (self ):
976
985
summaries = []
0 commit comments