1+ import logging
2+ import os
3+ import pathlib
4+ import shutil
5+
16import click
27import tabulate
38import coloredlogs
813from . import provenance
914
1015
16+ logger = logging .getLogger (__name__ )
17+
18+
1119class NaturalOrderGroup (click .Group ):
1220 """
1321 List commands in the order they are provided in the help text.
@@ -18,8 +26,32 @@ def list_commands(self, ctx):
1826
1927
2028# Common arguments/options
29+ vcfs = click .argument (
30+ "vcfs" , nargs = - 1 , required = True , type = click .Path (exists = True , dir_okay = False )
31+ )
32+
33+ new_icf_path = click .argument (
34+ "icf_path" , type = click .Path (file_okay = False , dir_okay = True )
35+ )
36+
37+ icf_path = click .argument (
38+ "icf_path" , type = click .Path (exists = True , file_okay = False , dir_okay = True )
39+ )
40+
41+ new_zarr_path = click .argument (
42+ "zarr_path" , type = click .Path (file_okay = False , dir_okay = True )
43+ )
44+
2145verbose = click .option ("-v" , "--verbose" , count = True , help = "Increase verbosity" )
2246
47+ force = click .option (
48+ "-f" ,
49+ "--force" ,
50+ is_flag = True ,
51+ flag_value = True ,
52+ help = "Force overwriting of existing directories" ,
53+ )
54+
2355version = click .version_option (version = f"{ provenance .__version__ } " )
2456
2557worker_processes = click .option (
@@ -64,41 +96,62 @@ def setup_logging(verbosity):
6496 coloredlogs .install (level = level )
6597
6698
99+ def check_overwrite_dir (path , force ):
100+ path = pathlib .Path (path )
101+ if path .exists ():
102+ if not force :
103+ click .confirm (
104+ f"Do you want to overwrite { path } ? (use --force to skip this check)" ,
105+ abort = True ,
106+ )
107+ # These trees can be mondo-big and on slow file systems, so it's entirely
108+ # feasible that the delete would fail or be killed. This makes it less likely
109+ # that partially deleted paths are mistaken for good paths.
110+ tmp_delete_path = path .with_suffix (f"{ path .suffix } .{ os .getpid ()} .DELETING" )
111+ logger .info (f"Deleting { path } (renamed to { tmp_delete_path } while in progress)" )
112+ os .rename (path , tmp_delete_path )
113+ shutil .rmtree (tmp_delete_path )
114+
115+
67116@click .command
68- @click .argument ("vcfs" , nargs = - 1 , required = True )
69- @click .argument ("zarr_path" , type = click .Path ())
117+ @vcfs
118+ @new_icf_path
119+ @force
70120@verbose
71121@worker_processes
72122@column_chunk_size
73- def explode (vcfs , zarr_path , verbose , worker_processes , column_chunk_size ):
123+ def explode (vcfs , icf_path , force , verbose , worker_processes , column_chunk_size ):
74124 """
75125 Convert VCF(s) to intermediate columnar format
76126 """
77127 setup_logging (verbose )
128+ check_overwrite_dir (icf_path , force )
78129 vcf .explode (
79130 vcfs ,
80- zarr_path ,
131+ icf_path ,
81132 worker_processes = worker_processes ,
82133 column_chunk_size = column_chunk_size ,
83134 show_progress = True ,
84135 )
85136
86137
87138@click .command
88- @click .argument ("vcfs" , nargs = - 1 , required = True )
89- @click .argument ("icf_path" , type = click .Path ())
90- @click .argument ("num_partitions" , type = int )
139+ @vcfs
140+ @new_icf_path
141+ @click .argument ("num_partitions" , type = click .IntRange (min = 1 ))
142+ @force
91143@column_chunk_size
92144@verbose
93145@worker_processes
94146def dexplode_init (
95- vcfs , icf_path , num_partitions , column_chunk_size , verbose , worker_processes
147+ vcfs , icf_path , num_partitions , force , column_chunk_size , verbose , worker_processes
96148):
97149 """
98- Initial step for parallel conversion of VCF(s) to intermediate columnar format
150+ Initial step for distributed conversion of VCF(s) to intermediate columnar format
99151 over the requested number of paritions.
100152 """
101153 setup_logging (verbose )
154+ check_overwrite_dir (icf_path , force )
102155 num_partitions = vcf .explode_init (
103156 icf_path ,
104157 vcfs ,
@@ -111,12 +164,12 @@ def dexplode_init(
111164
112165
113166@click .command
114- @click . argument ( " icf_path" , type = click . Path ())
115- @click .argument ("partition" , type = int )
167+ @icf_path
168+ @click .argument ("partition" , type = click . IntRange ( min = 0 ) )
116169@verbose
117170def dexplode_partition (icf_path , partition , verbose ):
118171 """
119- Convert a VCF partition into intermediate columnar format. Must be called *after*
172+ Convert a VCF partition to intermediate columnar format. Must be called *after*
120173 the ICF path has been initialised with dexplode_init. Partition indexes must be
121174 from 0 (inclusive) to the number of paritions returned by dexplode_init (exclusive).
122175 """
@@ -129,26 +182,26 @@ def dexplode_partition(icf_path, partition, verbose):
129182@verbose
130183def dexplode_finalise (path , verbose ):
131184 """
132- Final step for parallel conversion of VCF(s) to intermediate columnar format
185+ Final step for distributed conversion of VCF(s) to intermediate columnar format.
133186 """
134187 setup_logging (verbose )
135188 vcf .explode_finalise (path )
136189
137190
138191@click .command
139- @click .argument ("icf_path " , type = click .Path ())
192+ @click .argument ("path " , type = click .Path ())
140193@verbose
141- def inspect (icf_path , verbose ):
194+ def inspect (path , verbose ):
142195 """
143- Inspect an intermediate format or Zarr path.
196+ Inspect an intermediate columnar format or Zarr path.
144197 """
145198 setup_logging (verbose )
146- data = vcf .inspect (icf_path )
199+ data = vcf .inspect (path )
147200 click .echo (tabulate .tabulate (data , headers = "keys" ))
148201
149202
150203@click .command
151- @click . argument ( " icf_path" , type = click . Path ())
204+ @icf_path
152205def mkschema (icf_path ):
153206 """
154207 Generate a schema for zarr encoding
@@ -158,8 +211,9 @@ def mkschema(icf_path):
158211
159212
160213@click .command
161- @click .argument ("icf_path" , type = click .Path ())
162- @click .argument ("zarr_path" , type = click .Path ())
214+ @icf_path
215+ @new_zarr_path
216+ @force
163217@verbose
164218@click .option ("-s" , "--schema" , default = None , type = click .Path (exists = True ))
165219@variants_chunk_size
@@ -186,6 +240,7 @@ def mkschema(icf_path):
186240def encode (
187241 icf_path ,
188242 zarr_path ,
243+ force ,
189244 verbose ,
190245 schema ,
191246 variants_chunk_size ,
@@ -198,10 +253,11 @@ def encode(
198253 Encode intermediate columnar format (see explode) to vcfzarr.
199254 """
200255 setup_logging (verbose )
256+ check_overwrite_dir (zarr_path , force )
201257 vcf .encode (
202258 icf_path ,
203259 zarr_path ,
204- schema ,
260+ schema_path = schema ,
205261 variants_chunk_size = variants_chunk_size ,
206262 samples_chunk_size = samples_chunk_size ,
207263 max_v_chunks = max_variant_chunks ,
@@ -212,8 +268,8 @@ def encode(
212268
213269
214270@click .command (name = "convert" )
215- @click . argument ( " vcfs" , nargs = - 1 , required = True )
216- @click . argument ( "zarr_path" , type = click . Path ())
271+ @vcfs
272+ @new_zarr_path
217273@variants_chunk_size
218274@samples_chunk_size
219275@verbose
@@ -235,17 +291,6 @@ def convert_vcf(
235291 )
236292
237293
238- @click .command
239- @click .argument ("vcfs" , nargs = - 1 , required = True )
240- @click .argument ("zarr_path" , type = click .Path ())
241- def validate (vcfs , zarr_path ):
242- """
243- Development only, do not use. Will be removed before release.
244- """
245- # FIXME! Will silently not look at remaining VCFs
246- vcf .validate (vcfs [0 ], zarr_path , show_progress = True )
247-
248-
249294@version
250295@click .group (cls = NaturalOrderGroup )
251296def vcf2zarr ():
@@ -309,7 +354,6 @@ def vcf2zarr():
309354vcf2zarr .add_command (dexplode_init )
310355vcf2zarr .add_command (dexplode_partition )
311356vcf2zarr .add_command (dexplode_finalise )
312- vcf2zarr .add_command (validate )
313357
314358
315359@click .command (name = "convert" )
0 commit comments