@@ -161,14 +161,20 @@ def import_alignments(dataset, fastas, initialise, progress, verbose):
161161@click .command ()
162162@click .argument ("dataset" , type = click .Path (dir_okay = True , file_okay = False ))
163163@click .argument ("metadata" , type = click .Path (dir_okay = False , file_okay = True ))
164+ @click .option (
165+ "--field-descriptions" ,
166+ type = click .File (mode = "r" ),
167+ default = None ,
168+ help = "JSON formatted file of field descriptions" ,
169+ )
164170@click .option (
165171 "--viridian" ,
166172 is_flag = True ,
167173 help = "Do some preprocessing appropriate for the Viridian metadata "
168174 "(Available at https://figshare.com/ndownloader/files/49694808)" ,
169175)
170176@verbose
171- def import_metadata (dataset , metadata , viridian , verbose ):
177+ def import_metadata (dataset , metadata , field_descriptions , viridian , verbose ):
172178 """
173179 Import a CSV/TSV metadata file into the dataset.
174180 """
@@ -178,48 +184,14 @@ def import_metadata(dataset, metadata, viridian, verbose):
178184 if viridian :
179185 dtype = {"Artic_primer_version" : str }
180186 df_in = pd .read_csv (metadata , sep = "\t " , dtype = dtype )
181- date_field = "date"
182187 index_field = "Run"
183188 if viridian :
184189 df_in = sc2ts .massage_viridian_metadata (df_in )
185190 df = df_in .set_index (index_field )
186- sc2ts .Dataset .add_metadata (dataset , df )
187-
188-
189- @click .command ()
190- @click .argument ("in_dataset" , type = click .Path (dir_okay = True , file_okay = False ))
191- @click .argument ("out_dataset" , type = click .Path (dir_okay = True , file_okay = False ))
192- @click .option (
193- "--date-field" , default = "date" , help = "The metadata field to use for dates"
194- )
195- @click .option (
196- "-a" ,
197- "--additional-field" ,
198- default = [],
199- help = "Additional fields to sort by" ,
200- multiple = True ,
201- )
202- @chunk_cache_size
203- @progress
204- @verbose
205- def reorder_dataset (
206- in_dataset ,
207- out_dataset ,
208- chunk_cache_size ,
209- date_field ,
210- additional_field ,
211- progress ,
212- verbose ,
213- ):
214- """
215- Create a copy of the specified dataset where the samples are reordered by
216- date (and optionally other fields).
217- """
218- setup_logging (verbose )
219- ds = sc2ts .Dataset (
220- in_dataset , chunk_cache_size = chunk_cache_size , date_field = date_field
221- )
222- ds .reorder (out_dataset , show_progress = progress , additional_fields = additional_field )
191+ d = {}
192+ if field_descriptions is not None :
193+ d = json .load (field_descriptions )
194+ sc2ts .Dataset .add_metadata (dataset , df , field_descriptions = d )
223195
224196
225197@click .command ()
@@ -415,6 +387,11 @@ def infer(config_file, start, stop, force):
415387@dataset
416388@click .argument ("ts_file" )
417389@deletions_as_missing
390+ @click .option (
391+ "--date-field" ,
392+ default = None ,
393+ help = "Specify date field to use. Required for metadata." ,
394+ )
418395@click .option (
419396 "--genotypes/--no-genotypes" ,
420397 default = True ,
@@ -440,6 +417,7 @@ def infer(config_file, start, stop, force):
440417def validate (
441418 dataset ,
442419 ts_file ,
420+ date_field ,
443421 deletions_as_missing ,
444422 genotypes ,
445423 metadata ,
@@ -453,7 +431,9 @@ def validate(
453431 setup_logging (verbose )
454432
455433 ts = tszip .load (ts_file )
456- ds = sc2ts .Dataset (dataset , chunk_cache_size = chunk_cache_size )
434+ ds = sc2ts .Dataset (
435+ dataset , date_field = date_field , chunk_cache_size = chunk_cache_size
436+ )
457437 if genotypes :
458438 sc2ts .validate_genotypes (ts , ds , deletions_as_missing , show_progress = True )
459439 if metadata :
@@ -564,7 +544,6 @@ def cli():
564544
565545cli .add_command (import_alignments )
566546cli .add_command (import_metadata )
567- cli .add_command (reorder_dataset )
568547
569548cli .add_command (info_dataset )
570549cli .add_command (info_matches )
0 commit comments