1+
2+ # This is a path to the dataset, in VCZ format.
13dataset =" viridian_mafft_2024-10-14_v1.vcz.zip"
4+ # The metadata field used for dates. For the Viridian dataset, this is
5+ # "Date_tree" (which means, "date used to partition samples when building
6+ # the Viridian tree")
27date_field =" Date_tree"
38
9+ # The run_id is a prefix added to all output files. This is useful when
10+ # running lots of different parameter combinations.
411run_id =" ex1"
512# Configure where the result files are stored. For simplicity
613# we put them all in the "example_inference" directory.
@@ -13,8 +20,9 @@ matches_dir= "example_inference/"
1320# This is full debug output, which is verbose (but useful!)
1421log_level = 2
1522
16- # Dates to exclude from inference. This one is a large outlier in terms of the
17- # numbers of samples, and enriched for incorrectly assigned dates.
23+ # Dates to exclude from inference. This one is a large outlier in the
24+ # Viridian data in terms of the numbers of samples, and enriched for
25+ # incorrectly assigned dates.
1826exclude_dates = [" 2020-12-31" ]
1927
2028# The set of site positions to mask during inference (list of integers).
@@ -23,24 +31,49 @@ exclude_dates = ["2020-12-31"]
2331exclude_sites = []
2432
2533[extend_parameters ]
34+ # The recombination penalty "k" parameter
2635num_mismatches =4
36+ # Any samples with a HMM cost <= to this value are included in the ARG
2737hmm_cost_threshold =7
38+ # The maximum number of missing sites for a sample to be considered
2839max_missing_sites =500
40+ # Do we mask deletions as missing data?
2941deletions_as_missing =true
42+ # The maximum number of samples to consider, per day
3043# max_daily_samples=1000
3144
32- # Knobs for tuning retro group insertion
45+ # # Various knobs for tuning retro group insertion:
46+
47+ # The minimum number of samples in a retro group
3348min_group_size =10
49+ # The minimum number of mutations shared by all samples
3450min_root_mutations =2
51+ # The maxmimum number of recurrent mutations in the group tree
3552max_recurrent_mutations =2
53+ # The maxmimum number of mutations per sample, overall
3654max_mutations_per_sample =5
55+ # The size of the windown in which to consider samples for retrospective
56+ # inclusion, in days.
3757retrospective_window =7
3858
59+ # # Performance parameters.
60+
61+ # The number of matching threads to use. -1 means use all available cores.
62+ # Note that this will likely not make much difference until large numbers
63+ # of samples per days are involved.
3964num_threads =-1
65+ # An approximate ceiling on the total amount of memory used (in GiB) by HMM
66+ # matching. Once the memory used goes above this value, new HMM match jobs are
67+ # held back until it goes under it again. If many memory intensive match jobs
68+ # are run at once however, this will not prevent them from exceeding this
69+ # limit.
4070memory_limit =32
4171
72+ # A list of sample IDs (strings) for unconditional inclusion (e.g., to
73+ # help seed major saltation events).
4274include_samples =[]
4375
76+ # Override specific parameter values over a time period.
4477[[override ]]
4578start = " 2020-01-01"
4679stop = " 2020-03-01"
0 commit comments