1
+ import logging
2
+ import os
3
+ import pathlib
4
+ import shutil
5
+
1
6
import click
2
7
import tabulate
3
8
import coloredlogs
8
13
from . import provenance
9
14
10
15
16
+ logger = logging .getLogger (__name__ )
17
+
18
+
11
19
class NaturalOrderGroup (click .Group ):
12
20
"""
13
21
List commands in the order they are provided in the help text.
@@ -18,8 +26,32 @@ def list_commands(self, ctx):
18
26
19
27
20
28
# Common arguments/options
29
+ vcfs = click .argument (
30
+ "vcfs" , nargs = - 1 , required = True , type = click .Path (exists = True , dir_okay = False )
31
+ )
32
+
33
+ new_icf_path = click .argument (
34
+ "icf_path" , type = click .Path (file_okay = False , dir_okay = True )
35
+ )
36
+
37
+ icf_path = click .argument (
38
+ "icf_path" , type = click .Path (exists = True , file_okay = False , dir_okay = True )
39
+ )
40
+
41
+ new_zarr_path = click .argument (
42
+ "zarr_path" , type = click .Path (file_okay = False , dir_okay = True )
43
+ )
44
+
21
45
verbose = click .option ("-v" , "--verbose" , count = True , help = "Increase verbosity" )
22
46
47
+ force = click .option (
48
+ "-f" ,
49
+ "--force" ,
50
+ is_flag = True ,
51
+ flag_value = True ,
52
+ help = "Force overwriting of existing directories" ,
53
+ )
54
+
23
55
version = click .version_option (version = f"{ provenance .__version__ } " )
24
56
25
57
worker_processes = click .option (
@@ -64,41 +96,62 @@ def setup_logging(verbosity):
64
96
coloredlogs .install (level = level )
65
97
66
98
99
+ def check_overwrite_dir (path , force ):
100
+ path = pathlib .Path (path )
101
+ if path .exists ():
102
+ if not force :
103
+ click .confirm (
104
+ f"Do you want to overwrite { path } ? (use --force to skip this check)" ,
105
+ abort = True ,
106
+ )
107
+ # These trees can be mondo-big and on slow file systems, so it's entirely
108
+ # feasible that the delete would fail or be killed. This makes it less likely
109
+ # that partially deleted paths are mistaken for good paths.
110
+ tmp_delete_path = path .with_suffix (f"{ path .suffix } .{ os .getpid ()} .DELETING" )
111
+ logger .info (f"Deleting { path } (renamed to { tmp_delete_path } while in progress)" )
112
+ os .rename (path , tmp_delete_path )
113
+ shutil .rmtree (tmp_delete_path )
114
+
115
+
67
116
@click .command
68
- @click .argument ("vcfs" , nargs = - 1 , required = True )
69
- @click .argument ("zarr_path" , type = click .Path ())
117
+ @vcfs
118
+ @new_icf_path
119
+ @force
70
120
@verbose
71
121
@worker_processes
72
122
@column_chunk_size
73
- def explode (vcfs , zarr_path , verbose , worker_processes , column_chunk_size ):
123
+ def explode (vcfs , icf_path , force , verbose , worker_processes , column_chunk_size ):
74
124
"""
75
125
Convert VCF(s) to intermediate columnar format
76
126
"""
77
127
setup_logging (verbose )
128
+ check_overwrite_dir (icf_path , force )
78
129
vcf .explode (
79
130
vcfs ,
80
- zarr_path ,
131
+ icf_path ,
81
132
worker_processes = worker_processes ,
82
133
column_chunk_size = column_chunk_size ,
83
134
show_progress = True ,
84
135
)
85
136
86
137
87
138
@click .command
88
- @click .argument ("vcfs" , nargs = - 1 , required = True )
89
- @click .argument ("icf_path" , type = click .Path ())
90
- @click .argument ("num_partitions" , type = int )
139
+ @vcfs
140
+ @new_icf_path
141
+ @click .argument ("num_partitions" , type = click .IntRange (min = 1 ))
142
+ @force
91
143
@column_chunk_size
92
144
@verbose
93
145
@worker_processes
94
146
def dexplode_init (
95
- vcfs , icf_path , num_partitions , column_chunk_size , verbose , worker_processes
147
+ vcfs , icf_path , num_partitions , force , column_chunk_size , verbose , worker_processes
96
148
):
97
149
"""
98
- Initial step for parallel conversion of VCF(s) to intermediate columnar format
150
+ Initial step for distributed conversion of VCF(s) to intermediate columnar format
99
151
over the requested number of paritions.
100
152
"""
101
153
setup_logging (verbose )
154
+ check_overwrite_dir (icf_path , force )
102
155
num_partitions = vcf .explode_init (
103
156
icf_path ,
104
157
vcfs ,
@@ -111,12 +164,12 @@ def dexplode_init(
111
164
112
165
113
166
@click .command
114
- @click . argument ( " icf_path" , type = click . Path ())
115
- @click .argument ("partition" , type = int )
167
+ @icf_path
168
+ @click .argument ("partition" , type = click . IntRange ( min = 0 ) )
116
169
@verbose
117
170
def dexplode_partition (icf_path , partition , verbose ):
118
171
"""
119
- Convert a VCF partition into intermediate columnar format. Must be called *after*
172
+ Convert a VCF partition to intermediate columnar format. Must be called *after*
120
173
the ICF path has been initialised with dexplode_init. Partition indexes must be
121
174
from 0 (inclusive) to the number of paritions returned by dexplode_init (exclusive).
122
175
"""
@@ -129,26 +182,26 @@ def dexplode_partition(icf_path, partition, verbose):
129
182
@verbose
130
183
def dexplode_finalise (path , verbose ):
131
184
"""
132
- Final step for parallel conversion of VCF(s) to intermediate columnar format
185
+ Final step for distributed conversion of VCF(s) to intermediate columnar format.
133
186
"""
134
187
setup_logging (verbose )
135
188
vcf .explode_finalise (path )
136
189
137
190
138
191
@click .command
139
- @click .argument ("icf_path " , type = click .Path ())
192
+ @click .argument ("path " , type = click .Path ())
140
193
@verbose
141
- def inspect (icf_path , verbose ):
194
+ def inspect (path , verbose ):
142
195
"""
143
- Inspect an intermediate format or Zarr path.
196
+ Inspect an intermediate columnar format or Zarr path.
144
197
"""
145
198
setup_logging (verbose )
146
- data = vcf .inspect (icf_path )
199
+ data = vcf .inspect (path )
147
200
click .echo (tabulate .tabulate (data , headers = "keys" ))
148
201
149
202
150
203
@click .command
151
- @click . argument ( " icf_path" , type = click . Path ())
204
+ @icf_path
152
205
def mkschema (icf_path ):
153
206
"""
154
207
Generate a schema for zarr encoding
@@ -158,8 +211,9 @@ def mkschema(icf_path):
158
211
159
212
160
213
@click .command
161
- @click .argument ("icf_path" , type = click .Path ())
162
- @click .argument ("zarr_path" , type = click .Path ())
214
+ @icf_path
215
+ @new_zarr_path
216
+ @force
163
217
@verbose
164
218
@click .option ("-s" , "--schema" , default = None , type = click .Path (exists = True ))
165
219
@variants_chunk_size
@@ -186,6 +240,7 @@ def mkschema(icf_path):
186
240
def encode (
187
241
icf_path ,
188
242
zarr_path ,
243
+ force ,
189
244
verbose ,
190
245
schema ,
191
246
variants_chunk_size ,
@@ -198,10 +253,11 @@ def encode(
198
253
Encode intermediate columnar format (see explode) to vcfzarr.
199
254
"""
200
255
setup_logging (verbose )
256
+ check_overwrite_dir (zarr_path , force )
201
257
vcf .encode (
202
258
icf_path ,
203
259
zarr_path ,
204
- schema ,
260
+ schema_path = schema ,
205
261
variants_chunk_size = variants_chunk_size ,
206
262
samples_chunk_size = samples_chunk_size ,
207
263
max_v_chunks = max_variant_chunks ,
@@ -212,8 +268,8 @@ def encode(
212
268
213
269
214
270
@click .command (name = "convert" )
215
- @click . argument ( " vcfs" , nargs = - 1 , required = True )
216
- @click . argument ( "zarr_path" , type = click . Path ())
271
+ @vcfs
272
+ @new_zarr_path
217
273
@variants_chunk_size
218
274
@samples_chunk_size
219
275
@verbose
@@ -235,17 +291,6 @@ def convert_vcf(
235
291
)
236
292
237
293
238
- @click .command
239
- @click .argument ("vcfs" , nargs = - 1 , required = True )
240
- @click .argument ("zarr_path" , type = click .Path ())
241
- def validate (vcfs , zarr_path ):
242
- """
243
- Development only, do not use. Will be removed before release.
244
- """
245
- # FIXME! Will silently not look at remaining VCFs
246
- vcf .validate (vcfs [0 ], zarr_path , show_progress = True )
247
-
248
-
249
294
@version
250
295
@click .group (cls = NaturalOrderGroup )
251
296
def vcf2zarr ():
@@ -309,7 +354,6 @@ def vcf2zarr():
309
354
vcf2zarr .add_command (dexplode_init )
310
355
vcf2zarr .add_command (dexplode_partition )
311
356
vcf2zarr .add_command (dexplode_finalise )
312
- vcf2zarr .add_command (validate )
313
357
314
358
315
359
@click .command (name = "convert" )
0 commit comments