1+ import logging
2+ import os
3+ import pathlib
4+ import shutil
5+
16import click
27import tabulate
38import coloredlogs
813from . import provenance
914
1015
16+ logger = logging .getLogger (__name__ )
17+
1118class NaturalOrderGroup (click .Group ):
1219 """
1320 List commands in the order they are provided in the help text.
@@ -22,10 +29,13 @@ def list_commands(self, ctx):
2229 "vcfs" , nargs = - 1 , required = True , type = click .Path (exists = True , dir_okay = False )
2330)
2431
25- icf_path = click .argument ("icf_path" , type = click .Path ())
32+ icf_path = click .argument ("icf_path" , type = click .Path (file_okay = False , dir_okay = True ))
2633
2734verbose = click .option ("-v" , "--verbose" , count = True , help = "Increase verbosity" )
2835
36+ force = click .option ("-f" , "--force" , is_flag = True , flag_value = True ,
37+ help = "Force overwriting of existing directories" )
38+
2939version = click .version_option (version = f"{ provenance .__version__ } " )
3040
3141worker_processes = click .option (
@@ -70,20 +80,36 @@ def setup_logging(verbosity):
7080 coloredlogs .install (level = level )
7181
7282
83+ def check_overwrite_dir (path , force ):
84+ path = pathlib .Path (path )
85+ if path .exists ():
86+ if not force :
87+ click .confirm (f"Do you want to overwrite { path } ? (use --force to skip this check)" )
88+ # These trees can be mondo-big and on slow file systems, so it's entirely
89+ # feasible that the delete would fail or be killed. This makes it less likely
90+ # that partially deleted paths are mistaken for good paths.
91+ tmp_delete_path = path .with_suffix (f"{ path .suffix } .{ os .getpid ()} .DELETING" )
92+ logger .info (f"Deleting { path } (renamed to { tmp_delete_path } while in progress)" )
93+ os .rename (path , tmp_delete_path )
94+ shutil .rmtree (tmp_delete_path )
95+
96+
7397@click .command
7498@vcfs
75- @click .argument ("zarr_path" , type = click .Path ())
99+ @icf_path
100+ @force
76101@verbose
77102@worker_processes
78103@column_chunk_size
79- def explode (vcfs , zarr_path , verbose , worker_processes , column_chunk_size ):
104+ def explode (vcfs , icf_path , force , verbose , worker_processes , column_chunk_size ):
80105 """
81106 Convert VCF(s) to intermediate columnar format
82107 """
83108 setup_logging (verbose )
109+ check_overwrite_dir (icf_path , force )
84110 vcf .explode (
85111 vcfs ,
86- zarr_path ,
112+ icf_path ,
87113 worker_processes = worker_processes ,
88114 column_chunk_size = column_chunk_size ,
89115 show_progress = True ,
@@ -94,17 +120,19 @@ def explode(vcfs, zarr_path, verbose, worker_processes, column_chunk_size):
94120@vcfs
95121@icf_path
96122@click .argument ("num_partitions" , type = int )
123+ @force
97124@column_chunk_size
98125@verbose
99126@worker_processes
100127def dexplode_init (
101- vcfs , icf_path , num_partitions , column_chunk_size , verbose , worker_processes
128+ vcfs , icf_path , num_partitions , force , column_chunk_size , verbose , worker_processes
102129):
103130 """
104131 Initial step for distributed conversion of VCF(s) to intermediate columnar format
105132 over the requested number of paritions.
106133 """
107134 setup_logging (verbose )
135+ check_overwrite_dir (icf_path , force )
108136 num_partitions = vcf .explode_init (
109137 icf_path ,
110138 vcfs ,
0 commit comments