@@ -88,44 +88,40 @@ def explode(vcfs, zarr_path, verbose, worker_processes, column_chunk_size):
8888@click .argument ("vcfs" , nargs = - 1 , required = True )
8989@click .argument ("icf_path" , type = click .Path ())
9090@click .argument ("num_partitions" , type = int )
91+ @column_chunk_size
9192@verbose
9293@worker_processes
93- def dexplode_init (vcfs , icf_path , num_partitions , verbose , worker_processes ):
94+ def dexplode_init (
95+ vcfs , icf_path , num_partitions , column_chunk_size , verbose , worker_processes
96+ ):
9497 """
9598 Initial step for parallel conversion of VCF(s) to intermediate columnar format
9699 over the requested number of paritions.
97100 """
98101 setup_logging (verbose )
99102 num_partitions = vcf .explode_init (
100- vcfs ,
101103 icf_path ,
104+ vcfs ,
102105 target_num_partitions = num_partitions ,
106+ column_chunk_size = column_chunk_size ,
103107 worker_processes = worker_processes ,
104108 show_progress = True ,
105109 )
106110 click .echo (num_partitions )
107111
108112
109113@click .command
110- @click .argument ("path" , type = click .Path (), required = True )
111- @click .argument ("start" , type = int )
112- @click .argument ("end" , type = int )
114+ @click .argument ("icf_path" , type = click .Path ())
115+ @click .argument ("partition" , type = int )
113116@verbose
114- @worker_processes
115- @column_chunk_size
116- def dexplode_slice (path , start , end , verbose , worker_processes , column_chunk_size ):
117+ def dexplode_partition (icf_path , partition , verbose ):
117118 """
118- Convert VCF(s) to intermediate columnar format
119+ Convert a VCF partition into intermediate columnar format. Must be called *after*
120+ the ICF path has been initialised with dexplode_init. Partition indexes must be
121+ from 0 (inclusive) to the number of paritions returned by dexplode_init (exclusive).
119122 """
120123 setup_logging (verbose )
121- vcf .explode_slice (
122- path ,
123- start ,
124- end ,
125- worker_processes = worker_processes ,
126- column_chunk_size = column_chunk_size ,
127- show_progress = True ,
128- )
124+ vcf .explode_partition (icf_path , partition , show_progress = True )
129125
130126
131127@click .command
@@ -297,7 +293,7 @@ def vcf2zarr():
297293
298294 \b
299295 $ vcf2zarr dexplode-init [VCF_FILE_1] ... [VCF_FILE_N] [ICF_PATH] [NUM_PARTITIONS]
300- $ vcf2zarr dexplode-slice [ICF_PATH] [START] [STOP ]
296+ $ vcf2zarr dexplode-partition [ICF_PATH] [PARTITION_INDEX ]
301297 $ vcf2zarr dexplode-finalise [ICF_PATH]
302298
303299 See the online documentation at [FIXME] for more details on distributed explode.
@@ -311,7 +307,7 @@ def vcf2zarr():
311307vcf2zarr .add_command (mkschema )
312308vcf2zarr .add_command (encode )
313309vcf2zarr .add_command (dexplode_init )
314- vcf2zarr .add_command (dexplode_slice )
310+ vcf2zarr .add_command (dexplode_partition )
315311vcf2zarr .add_command (dexplode_finalise )
316312vcf2zarr .add_command (validate )
317313
0 commit comments