@@ -162,37 +162,53 @@ def run_infer(args):
162
162
"via the Python function `tsinfer.SampleData.from_tree_sequence()`)."
163
163
)
164
164
sample_data = tsinfer .SampleData .load (args .samples )
165
- ts = tsinfer .infer (
166
- sample_data ,
167
- progress_monitor = args .progress ,
168
- num_threads = args .num_threads ,
169
- recombination_rate = get_recombination_map (args ),
170
- mismatch_ratio = args .mismatch_ratio ,
171
- record_provenance = False ,
172
- )
173
- output_trees = get_output_trees_path (args .output_trees , args .samples )
174
- write_ts (ts , output_trees )
165
+ if args .keep_intermediates :
166
+ run_generate_ancestors (args , usage_summary = False )
167
+ run_match_ancestors (args , usage_summary = False )
168
+ run_match_samples (args , usage_summary = False )
169
+ else :
170
+ if args .ancestors is not None :
171
+ raise ValueError (
172
+ "Must specify --keep-intermediates to save an ancestors file"
173
+ )
174
+ if args .ancestors_trees is not None :
175
+ raise ValueError (
176
+ "Must specify --keep-intermediates to save an ancestors tree sequence"
177
+ )
178
+
179
+ ts = tsinfer .infer (
180
+ sample_data ,
181
+ progress_monitor = args .progress ,
182
+ num_threads = args .num_threads ,
183
+ recombination_rate = get_recombination_map (args ),
184
+ mismatch_ratio = args .mismatch_ratio ,
185
+ path_compression = not args .no_path_compression ,
186
+ record_provenance = False ,
187
+ )
188
+ output_trees = get_output_trees_path (args .output_trees , args .samples )
189
+ write_ts (ts , output_trees )
175
190
summarise_usage ()
176
191
177
192
178
- def run_generate_ancestors (args ):
193
+ def run_generate_ancestors (args , usage_summary = True ):
179
194
setup_logging (args )
180
195
ancestors_path = get_ancestors_path (args .ancestors , args .samples )
181
196
sample_data = tsinfer .SampleData .load (args .samples )
182
197
tsinfer .generate_ancestors (
183
198
sample_data ,
184
199
progress_monitor = args .progress ,
185
- num_flush_threads = args . num_flush_threads ,
200
+ num_flush_threads = getattr ( args , " num_flush_threads" , 0 ) ,
186
201
num_threads = args .num_threads ,
187
202
path = ancestors_path ,
188
203
record_provenance = False ,
189
204
)
190
205
# NB: ideally we should store the cli provenance in here, but this creates
191
206
# perf issues - see https://github.com/tskit-dev/tsinfer/issues/743
192
- summarise_usage ()
207
+ if usage_summary :
208
+ summarise_usage ()
193
209
194
210
195
- def run_match_ancestors (args ):
211
+ def run_match_ancestors (args , usage_summary = True ):
196
212
setup_logging (args )
197
213
ancestors_path = get_ancestors_path (args .ancestors , args .samples )
198
214
logger .info (f"Loading ancestral haplotypes from { ancestors_path } " )
@@ -210,10 +226,11 @@ def run_match_ancestors(args):
210
226
record_provenance = False ,
211
227
)
212
228
write_ts (ts , ancestors_trees )
213
- summarise_usage ()
229
+ if usage_summary :
230
+ summarise_usage ()
214
231
215
232
216
- def run_augment_ancestors (args ):
233
+ def run_augment_ancestors (args , usage_summary = True ):
217
234
setup_logging (args )
218
235
219
236
sample_data = tsinfer .SampleData .load (args .samples )
@@ -241,10 +258,11 @@ def run_augment_ancestors(args):
241
258
)
242
259
logger .info (f"Writing output tree sequence to { output_path } " )
243
260
ts .dump (output_path )
244
- summarise_usage ()
261
+ if usage_summary :
262
+ summarise_usage ()
245
263
246
264
247
- def run_match_samples (args ):
265
+ def run_match_samples (args , usage_summary = True ):
248
266
setup_logging (args )
249
267
250
268
sample_data = tsinfer .SampleData .load (args .samples )
@@ -264,7 +282,8 @@ def run_match_samples(args):
264
282
record_provenance = False ,
265
283
)
266
284
write_ts (ts , output_trees )
267
- summarise_usage ()
285
+ if usage_summary :
286
+ summarise_usage ()
268
287
269
288
270
289
def run_verify (args ):
@@ -425,6 +444,19 @@ def add_num_flush_threads_argument(parser):
425
444
)
426
445
427
446
447
+ def add_keep_intermediates_argument (parser ):
448
+ parser .add_argument (
449
+ "--keep-intermediates" ,
450
+ "-k" ,
451
+ action = "store_true" ,
452
+ help = (
453
+ "Keep the intermediate ancestors and ancestors-tree-sequence files. "
454
+ "To override the default locations where these files are saved, use the "
455
+ "--ancestors and --ancestors-trees options"
456
+ ),
457
+ )
458
+
459
+
428
460
def get_cli_parser ():
429
461
top_parser = argparse .ArgumentParser (
430
462
description = "Command line interface for tsinfer."
@@ -525,17 +557,21 @@ def get_cli_parser():
525
557
"infer" ,
526
558
help = (
527
559
"Runs the generate-ancestors, match-ancestors and match-samples "
528
- "commands without writing the intermediate files to disk. Not "
529
- "recommended for large inferences."
560
+ "steps in one go. Not recommended for large inferences."
530
561
),
531
562
)
532
563
add_samples_file_argument (parser )
533
564
add_logging_arguments (parser )
534
565
add_output_trees_argument (parser )
566
+ add_path_compression_argument (parser )
535
567
add_num_threads_argument (parser )
536
568
add_progress_argument (parser )
569
+ add_postprocess_argument (parser )
537
570
add_recombination_arguments (parser )
538
571
add_mismatch_argument (parser )
572
+ add_keep_intermediates_argument (parser )
573
+ add_ancestors_file_argument (parser ) # Only used if keep-intermediates
574
+ add_ancestors_trees_argument (parser ) # Only used if keep-intermediates
539
575
parser .set_defaults (runner = run_infer )
540
576
541
577
parser = subparsers .add_parser (
0 commit comments