@@ -88,44 +88,40 @@ def explode(vcfs, zarr_path, verbose, worker_processes, column_chunk_size):
88
88
@click .argument ("vcfs" , nargs = - 1 , required = True )
89
89
@click .argument ("icf_path" , type = click .Path ())
90
90
@click .argument ("num_partitions" , type = int )
91
+ @column_chunk_size
91
92
@verbose
92
93
@worker_processes
93
- def dexplode_init (vcfs , icf_path , num_partitions , verbose , worker_processes ):
94
+ def dexplode_init (
95
+ vcfs , icf_path , num_partitions , column_chunk_size , verbose , worker_processes
96
+ ):
94
97
"""
95
98
Initial step for parallel conversion of VCF(s) to intermediate columnar format
96
99
over the requested number of paritions.
97
100
"""
98
101
setup_logging (verbose )
99
102
num_partitions = vcf .explode_init (
100
- vcfs ,
101
103
icf_path ,
104
+ vcfs ,
102
105
target_num_partitions = num_partitions ,
106
+ column_chunk_size = column_chunk_size ,
103
107
worker_processes = worker_processes ,
104
108
show_progress = True ,
105
109
)
106
110
click .echo (num_partitions )
107
111
108
112
109
113
@click .command
110
- @click .argument ("path" , type = click .Path (), required = True )
111
- @click .argument ("start" , type = int )
112
- @click .argument ("end" , type = int )
114
+ @click .argument ("icf_path" , type = click .Path ())
115
+ @click .argument ("partition" , type = int )
113
116
@verbose
114
- @worker_processes
115
- @column_chunk_size
116
- def dexplode_slice (path , start , end , verbose , worker_processes , column_chunk_size ):
117
+ def dexplode_partition (icf_path , partition , verbose ):
117
118
"""
118
- Convert VCF(s) to intermediate columnar format
119
+ Convert a VCF partition into intermediate columnar format. Must be called *after*
120
+ the ICF path has been initialised with dexplode_init. Partition indexes must be
121
+ from 0 (inclusive) to the number of paritions returned by dexplode_init (exclusive).
119
122
"""
120
123
setup_logging (verbose )
121
- vcf .explode_slice (
122
- path ,
123
- start ,
124
- end ,
125
- worker_processes = worker_processes ,
126
- column_chunk_size = column_chunk_size ,
127
- show_progress = True ,
128
- )
124
+ vcf .explode_partition (icf_path , partition , show_progress = True )
129
125
130
126
131
127
@click .command
@@ -297,7 +293,7 @@ def vcf2zarr():
297
293
298
294
\b
299
295
$ vcf2zarr dexplode-init [VCF_FILE_1] ... [VCF_FILE_N] [ICF_PATH] [NUM_PARTITIONS]
300
- $ vcf2zarr dexplode-slice [ICF_PATH] [START] [STOP ]
296
+ $ vcf2zarr dexplode-partition [ICF_PATH] [PARTITION_INDEX ]
301
297
$ vcf2zarr dexplode-finalise [ICF_PATH]
302
298
303
299
See the online documentation at [FIXME] for more details on distributed explode.
@@ -311,7 +307,7 @@ def vcf2zarr():
311
307
vcf2zarr .add_command (mkschema )
312
308
vcf2zarr .add_command (encode )
313
309
vcf2zarr .add_command (dexplode_init )
314
- vcf2zarr .add_command (dexplode_slice )
310
+ vcf2zarr .add_command (dexplode_partition )
315
311
vcf2zarr .add_command (dexplode_finalise )
316
312
vcf2zarr .add_command (validate )
317
313
0 commit comments