1
+ import logging
2
+ import os
3
+ import pathlib
4
+ import shutil
5
+
1
6
import click
2
7
import tabulate
3
8
import coloredlogs
8
13
from . import provenance
9
14
10
15
16
+ logger = logging .getLogger (__name__ )
17
+
11
18
class NaturalOrderGroup (click .Group ):
12
19
"""
13
20
List commands in the order they are provided in the help text.
@@ -22,10 +29,13 @@ def list_commands(self, ctx):
22
29
"vcfs" , nargs = - 1 , required = True , type = click .Path (exists = True , dir_okay = False )
23
30
)
24
31
25
- icf_path = click .argument ("icf_path" , type = click .Path ())
32
+ icf_path = click .argument ("icf_path" , type = click .Path (file_okay = False , dir_okay = True ))
26
33
27
34
verbose = click .option ("-v" , "--verbose" , count = True , help = "Increase verbosity" )
28
35
36
+ force = click .option ("-f" , "--force" , is_flag = True , flag_value = True ,
37
+ help = "Force overwriting of existing directories" )
38
+
29
39
version = click .version_option (version = f"{ provenance .__version__ } " )
30
40
31
41
worker_processes = click .option (
@@ -70,20 +80,36 @@ def setup_logging(verbosity):
70
80
coloredlogs .install (level = level )
71
81
72
82
83
+ def check_overwrite_dir (path , force ):
84
+ path = pathlib .Path (path )
85
+ if path .exists ():
86
+ if not force :
87
+ click .confirm (f"Do you want to overwrite { path } ? (use --force to skip this check)" )
88
+ # These trees can be mondo-big and on slow file systems, so it's entirely
89
+ # feasible that the delete would fail or be killed. This makes it less likely
90
+ # that partially deleted paths are mistaken for good paths.
91
+ tmp_delete_path = path .with_suffix (f"{ path .suffix } .{ os .getpid ()} .DELETING" )
92
+ logger .info (f"Deleting { path } (renamed to { tmp_delete_path } while in progress)" )
93
+ os .rename (path , tmp_delete_path )
94
+ shutil .rmtree (tmp_delete_path )
95
+
96
+
73
97
@click .command
74
98
@vcfs
75
- @click .argument ("zarr_path" , type = click .Path ())
99
+ @icf_path
100
+ @force
76
101
@verbose
77
102
@worker_processes
78
103
@column_chunk_size
79
- def explode (vcfs , zarr_path , verbose , worker_processes , column_chunk_size ):
104
+ def explode (vcfs , icf_path , force , verbose , worker_processes , column_chunk_size ):
80
105
"""
81
106
Convert VCF(s) to intermediate columnar format
82
107
"""
83
108
setup_logging (verbose )
109
+ check_overwrite_dir (icf_path , force )
84
110
vcf .explode (
85
111
vcfs ,
86
- zarr_path ,
112
+ icf_path ,
87
113
worker_processes = worker_processes ,
88
114
column_chunk_size = column_chunk_size ,
89
115
show_progress = True ,
@@ -94,17 +120,19 @@ def explode(vcfs, zarr_path, verbose, worker_processes, column_chunk_size):
94
120
@vcfs
95
121
@icf_path
96
122
@click .argument ("num_partitions" , type = int )
123
+ @force
97
124
@column_chunk_size
98
125
@verbose
99
126
@worker_processes
100
127
def dexplode_init (
101
- vcfs , icf_path , num_partitions , column_chunk_size , verbose , worker_processes
128
+ vcfs , icf_path , num_partitions , force , column_chunk_size , verbose , worker_processes
102
129
):
103
130
"""
104
131
Initial step for distributed conversion of VCF(s) to intermediate columnar format
105
132
over the requested number of paritions.
106
133
"""
107
134
setup_logging (verbose )
135
+ check_overwrite_dir (icf_path , force )
108
136
num_partitions = vcf .explode_init (
109
137
icf_path ,
110
138
vcfs ,
0 commit comments