Skip to content

Commit 31a0f2a

Browse files
dcdatcudcdehaas
authored andcommitted
Add -recomb_map option for parallel_singer
singer_master already supports it.
1 parent a214631 commit 31a0f2a

File tree

5 files changed

+36
-26
lines changed

5 files changed

+36
-26
lines changed

SINGER/SINGER/Sampler.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ Sampler::Sampler(double pop_size, Rate_map &rm, Rate_map &mm) {
2121
recomb_rate = rm.mean_rate()*Ne;
2222
recomb_map = rm;
2323
mut_map = mm;
24+
has_map = true;
2425
}
2526

2627
void Sampler::set_precision(double c, double q) {
@@ -306,7 +307,7 @@ void Sampler::build_singleton_arg() {
306307
arg = ARG(Ne, sequence_length);
307308
arg.discretize(bin_size);
308309
arg.build_singleton_arg(n);
309-
if (mut_rate > 0 and recomb_rate > 0) {
310+
if (!has_map) {
310311
arg.compute_rhos_thetas(recomb_rate, mut_rate);
311312
} else {
312313
arg.compute_rhos_thetas(recomb_map, mut_map);
@@ -787,7 +788,7 @@ void Sampler::load_resume_arg() {
787788
coord_file = output_prefix + "_coordinates.txt";
788789
arg.read(node_file, branch_file, recomb_file, mut_file);
789790
arg.read_coordinates(coord_file);
790-
if (mut_rate > 0 and recomb_rate > 0) {
791+
if (!has_map) {
791792
arg.compute_rhos_thetas(recomb_rate, mut_rate);
792793
} else {
793794
arg.compute_rhos_thetas(recomb_map, mut_map);

SINGER/SINGER/Sampler.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ class Sampler {
4848
vector<Node_ptr> ordered_sample_nodes = {};
4949
unordered_map<double, set<Node_ptr>> carriers = {};
5050
unordered_map<Node_ptr, set<double>> mutation_sets = {};
51+
bool has_map = false;
5152

5253
Sampler();
5354

SINGER/SINGER/main.cpp

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -224,16 +224,11 @@ int main(int argc, const char * argv[]) {
224224
exit(1);
225225
}
226226
}
227-
if (r < 0) {
228-
cerr << "-r flag missing or invalid value. " << endl;
229-
exit(1);
230-
}
231-
if (m < 0) {
232-
cerr << "-m flag missing or invalid value. " << endl;
233-
exit(1);
234-
}
235-
if (Ne < 0) {
236-
cerr << "-Ne flag missing or invalid value. " << endl;
227+
bool bothRates = (r > 0 && m > 0);
228+
bool bothMaps = !(recomb_map_filename.empty() || mut_map_filename.empty());
229+
if (!(bothRates || bothMaps)) {
230+
cerr << "Must provide either valid values for -r and -m, OR provide a valid "
231+
<< "mutation map and recombination map. " << endl;
237232
exit(1);
238233
}
239234
if (input_filename.size() == 0) {
@@ -253,7 +248,7 @@ int main(int argc, const char * argv[]) {
253248
exit(1);
254249
}
255250
Sampler sampler;
256-
if (r > 0 and m > 0) {
251+
if (bothRates) {
257252
sampler = Sampler(Ne, r, m);
258253
} else {
259254
Rate_map recomb_map = Rate_map();

SINGER/SINGER/parallel_singer

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def index_vcf(vcf_prefix, block_length):
1414
subprocess.run(["python", indexer, vcf_prefix, str(block_length)])
1515

1616

17-
def run_singer_in_parallel(vcf_prefix, output_prefix, mutation_rate, ratio, block_length, num_iters, thinning_interval, Ne, polar, num_cores):
17+
def run_singer_in_parallel(vcf_prefix, output_prefix, mutation_rate, ratio, block_length, num_iters, thinning_interval, Ne, polar, num_cores, recomb_map, mut_map):
1818
"""Run singer in parallel using the specified parameters."""
1919

2020
# Read the breakpoints from the index file
@@ -30,8 +30,18 @@ def run_singer_in_parallel(vcf_prefix, output_prefix, mutation_rate, ratio, bloc
3030
start = breakpoints[i]
3131

3232
# Base command
33-
cmd = f"{singer_master_executable} -Ne {Ne} -m {mutation_rate} -ratio {ratio} -vcf {vcf_prefix} -output {output_prefix}_{i}_{i+1} -start {start} -end {start + block_length} -n {num_iters} -thin {thinning_interval} -polar {polar}"
34-
33+
cmd = f"{singer_master_executable} -vcf {vcf_prefix} -output {output_prefix}_{i}_{i+1} -start {start} -end {start + block_length} -n {num_iters} -thin {thinning_interval} -polar {polar}"
34+
if Ne is not None:
35+
cmd += f" -Ne {Ne}"
36+
if recomb_map:
37+
cmd += f" -recomb_map \"{recomb_map}\""
38+
else:
39+
cmd += f" -ratio {ratio}"
40+
if mut_map is not None:
41+
cmd += f" -mut_map {mut_map}"
42+
else:
43+
cmd += f" -m {mutation_rate}"
44+
3545
cmd_list.append(cmd)
3646

3747
# Execute the commands in parallel
@@ -47,28 +57,32 @@ def convert_long_ARG(vcf_prefix, output_prefix, num_iters, freq):
4757

4858
def main():
4959
parser = argparse.ArgumentParser(description="Parallelize singer runs by cutting the chromosome")
50-
parser.add_argument('-Ne', type=float, required=True, help='Effective population size. Default: 1e4.')
51-
parser.add_argument("-m", type=float, required=True, help="Mutation rate.")
60+
parser.add_argument('-Ne', type=float, required=False, help='Effective population size. Default: 1e4.')
61+
parser.add_argument("-m", type=float, required=False, help="Mutation rate.")
62+
parser.add_argument("-mut_map", type=str, help="Filename for the mutation rate map.")
5263
parser.add_argument("-ratio", type=float, default=1, help="Recombination to mutation ratio. Default: 1.")
64+
parser.add_argument("-recomb_map", default=None, required=False, help="Filename for the recombination rate map.")
5365
parser.add_argument("-L", type=int, default=int(1e6), help="Block length. Default: 1e6.")
5466
parser.add_argument("-vcf", type=str, required=True, help="VCF file prefix (without .vcf or .vcf.gz extension).")
5567
parser.add_argument("-output", type=str, required=True, help="Output file prefix.")
5668
parser.add_argument("-n", type=int, required=True, help="Number of MCMC samples.")
5769
parser.add_argument("-thin", type=int, required=True, help="Thinning interval length.")
5870
parser.add_argument("-polar", type=float, default=0.5, required=False, help="Site flip probability. Default: 0.5.")
5971
parser.add_argument("-freq", type=float, default=1, required=False, help="Convert to tskit every {freq} samples. Default: 1.")
60-
parser.add_argument("-num_cores", type=int, default=20, required=False, help="Number of cores. Default: 20.")
61-
72+
parser.add_argument("-num_cores", type=int, default=20, required=False, help="Number of cores. Default: 20.")
73+
6274
if len(sys.argv) == 1:
6375
parser.print_help(sys.stderr)
6476
sys.exit(1)
6577

6678
args = parser.parse_args()
6779

6880
print("Parameters:")
69-
print(f"Effective population size: {args.Ne}")
81+
print(f"Effective population size: {args.Ne if args.Ne is not None else 'estimated'}")
7082
print(f"Mutation rate: {args.m}")
83+
print(f"Mutation map: {args.mut_map}")
7184
print(f"Recombination to mutation ratio: {args.ratio}")
85+
print(f"Recombination map: {args.recomb_map}")
7286
print(f"Block length: {args.L}")
7387
print(f"VCF file prefix: {args.vcf}")
7488
print(f"Output file prefix: {args.output}")
@@ -79,7 +93,7 @@ def main():
7993
print(f"Number of cores: {args.num_cores}")
8094

8195
index_vcf(args.vcf, args.L)
82-
run_singer_in_parallel(args.vcf, args.output, args.m, args.ratio, args.L, args.n, args.thin, args.Ne, args.polar, args.num_cores)
96+
run_singer_in_parallel(args.vcf, args.output, args.m, args.ratio, args.L, args.n, args.thin, args.Ne, args.polar, args.num_cores, args.recomb_map, args.mut_map)
8397
convert_long_ARG(args.vcf, args.output, args.n, args.freq)
8498

8599
if __name__ == "__main__":

SINGER/SINGER/singer_master

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -293,16 +293,15 @@ def main():
293293

294294

295295
if args.m:
296+
if args.recomb_map:
297+
parser.error("-recomb_map is not compatible with -m; you must use -mut_map")
296298
if args.Ne < 0:
297299
args.Ne = compute_Ne(args.vcf, args.start, args.end, args.m)
298300
if (args.resume):
299301
resume_rate_singer(args.Ne, args.m, args.m*args.ratio, args.start, args.end, args.vcf, args.output, args.n, args.thin, args.polar, args.seed)
300302
else:
301303
run_rate_singer(args.Ne, args.m, args.m*args.ratio, args.start, args.end, args.vcf, args.output, args.n, args.thin, args.polar, args.seed)
302-
return
303-
304-
if args.mut_map and not args.m:
305-
args.m = compute_average_rate(args.mut_map, args.start, args.end)
304+
elif args.mut_map:
306305
if not args.recomb_map:
307306
parser.error("You must provide -recomb_map when using the -mut_map flag")
308307

0 commit comments

Comments
 (0)