Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions SINGER/SINGER/Rate_map.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@

#include "Rate_map.hpp"

Rate_map::Rate_map() {}
// Even if there are regions of truly no recombination, we have to pretend there is
// a really low rate to make the math work out currently.
static constexpr double MIN_RECOMB_RATE = 1e-16;

Rate_map::Rate_map(double poffset)
: offset(poffset) {}

void Rate_map::load_map(string mut_map_file) {
ifstream fin(mut_map_file);
Expand All @@ -21,8 +26,12 @@ void Rate_map::load_map(string mut_map_file) {
double rate;
double mut_dist;
while (fin >> left >> right >> rate) {
if (rate < MIN_RECOMB_RATE) {
rate = MIN_RECOMB_RATE;
}
coordinates.push_back(left);
mut_dist = rate_distances.back() + rate*(right - left);
assert(mut_dist != rate_distances.back());
rate_distances.push_back(mut_dist);
}
sequence_length = right;
Expand All @@ -38,16 +47,19 @@ int Rate_map::find_index(double x) {
}

double Rate_map::cumulative_distance(double x) {
int index = find_index(x);
const double actualCoordinate = x + offset;
int index = find_index(actualCoordinate);
double prev_dist = rate_distances[index];
double next_dist = rate_distances[index+1];
double p = (x - coordinates[index])/(coordinates[index+1] - coordinates[index]);
double p = (actualCoordinate - coordinates[index])/(coordinates[index+1] - coordinates[index]);
double dist = (1-p)*prev_dist + p*next_dist;
return dist;
}
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

likely Rate_map::mean_rate() also needs to be adjusted based on the region between start_pos and end_pos instead of taking the mean of the entire map. hopefully it is not a big deal, given that parallel_singer uses regions of 1Mbp that hopefully have means close to the full mean.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This fix makes sense (to me) because the alternative is that user has to adjust the recombination map many times, which could be cumbersome.


double Rate_map::segment_distance(double x, double y) {
return cumulative_distance(y) - cumulative_distance(x);
double cd = cumulative_distance(y) - cumulative_distance(x);
assert(cd > 0.0);
return cd;
}

double Rate_map::mean_rate() {
Expand Down
12 changes: 8 additions & 4 deletions SINGER/SINGER/Rate_map.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,28 +9,32 @@
#define Rate_map_hpp

#include <stdio.h>
#include <limits>
#include "Node.hpp"

class Rate_map {

public:

double sequence_length = INT_MAX;
// The rate map is for the entire chromosome, typically, but you may be running SINGER on only
// a subset of that chromosome. This offset is where SINGER's concept of 0 starts.
double offset = std::numeric_limits<double>::max();
double sequence_length = std::numeric_limits<double>::max();
vector<double> coordinates = {};
vector<double> rate_distances = {};

Rate_map();
Rate_map(double poffset = 0.0);

void load_map(string mut_map_file);

int find_index(double x);

double cumulative_distance(double x);

double segment_distance(double x, double y);

double mean_rate();

private:
int find_index(double x);
};

#endif /* Rate_map_hpp */
5 changes: 3 additions & 2 deletions SINGER/SINGER/Sampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Sampler::Sampler(double pop_size, Rate_map &rm, Rate_map &mm) {
recomb_rate = rm.mean_rate()*Ne;
recomb_map = rm;
mut_map = mm;
has_map = true;
}

void Sampler::set_precision(double c, double q) {
Expand Down Expand Up @@ -306,7 +307,7 @@ void Sampler::build_singleton_arg() {
arg = ARG(Ne, sequence_length);
arg.discretize(bin_size);
arg.build_singleton_arg(n);
if (mut_rate > 0 and recomb_rate > 0) {
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this was always true, because of lines 20 and 21 above.

if (!has_map) {
arg.compute_rhos_thetas(recomb_rate, mut_rate);
} else {
arg.compute_rhos_thetas(recomb_map, mut_map);
Expand Down Expand Up @@ -787,7 +788,7 @@ void Sampler::load_resume_arg() {
coord_file = output_prefix + "_coordinates.txt";
arg.read(node_file, branch_file, recomb_file, mut_file);
arg.read_coordinates(coord_file);
if (mut_rate > 0 and recomb_rate > 0) {
if (!has_map) {
arg.compute_rhos_thetas(recomb_rate, mut_rate);
} else {
arg.compute_rhos_thetas(recomb_map, mut_map);
Expand Down
1 change: 1 addition & 0 deletions SINGER/SINGER/Sampler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class Sampler {
vector<Node_ptr> ordered_sample_nodes = {};
unordered_map<double, set<Node_ptr>> carriers = {};
unordered_map<Node_ptr, set<double>> mutation_sets = {};
bool has_map = false;

Sampler();

Expand Down
21 changes: 8 additions & 13 deletions SINGER/SINGER/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,16 +224,11 @@ int main(int argc, const char * argv[]) {
exit(1);
}
}
if (r < 0) {
cerr << "-r flag missing or invalid value. " << endl;
exit(1);
}
if (m < 0) {
cerr << "-m flag missing or invalid value. " << endl;
exit(1);
}
if (Ne < 0) {
cerr << "-Ne flag missing or invalid value. " << endl;
bool bothRates = (r > 0 && m > 0);
bool bothMaps = !(recomb_map_filename.empty() || mut_map_filename.empty());
if (!(bothRates || bothMaps)) {
cerr << "Must provide either valid values for -r and -m, OR provide a valid "
<< "mutation map and recombination map. " << endl;
exit(1);
}
if (input_filename.size() == 0) {
Expand All @@ -253,12 +248,12 @@ int main(int argc, const char * argv[]) {
exit(1);
}
Sampler sampler;
if (r > 0 and m > 0) {
if (bothRates) {
sampler = Sampler(Ne, r, m);
} else {
Rate_map recomb_map = Rate_map();
Rate_map recomb_map = Rate_map(start_pos);
recomb_map.load_map(recomb_map_filename);
Rate_map mut_map = Rate_map();
Rate_map mut_map = Rate_map(start_pos);
mut_map.load_map(mut_map_filename);
sampler = Sampler(Ne, recomb_map, mut_map);
}
Expand Down
32 changes: 23 additions & 9 deletions SINGER/SINGER/parallel_singer
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def index_vcf(vcf_prefix, block_length):
subprocess.run(["python", indexer, vcf_prefix, str(block_length)])


def run_singer_in_parallel(vcf_prefix, output_prefix, mutation_rate, ratio, block_length, num_iters, thinning_interval, Ne, polar, num_cores):
def run_singer_in_parallel(vcf_prefix, output_prefix, mutation_rate, ratio, block_length, num_iters, thinning_interval, Ne, polar, num_cores, recomb_map, mut_map):
"""Run singer in parallel using the specified parameters."""

# Read the breakpoints from the index file
Expand All @@ -30,8 +30,18 @@ def run_singer_in_parallel(vcf_prefix, output_prefix, mutation_rate, ratio, bloc
start = breakpoints[i]

# Base command
cmd = f"{singer_master_executable} -Ne {Ne} -m {mutation_rate} -ratio {ratio} -vcf {vcf_prefix} -output {output_prefix}_{i}_{i+1} -start {start} -end {start + block_length} -n {num_iters} -thin {thinning_interval} -polar {polar}"

cmd = f"{singer_master_executable} -vcf {vcf_prefix} -output {output_prefix}_{i}_{i+1} -start {start} -end {start + block_length} -n {num_iters} -thin {thinning_interval} -polar {polar}"
if Ne is not None:
cmd += f" -Ne {Ne}"
if recomb_map:
cmd += f" -recomb_map \"{recomb_map}\""
else:
cmd += f" -ratio {ratio}"
if mut_map is not None:
cmd += f" -mut_map {mut_map}"
else:
cmd += f" -m {mutation_rate}"

cmd_list.append(cmd)

# Execute the commands in parallel
Expand All @@ -47,28 +57,32 @@ def convert_long_ARG(vcf_prefix, output_prefix, num_iters, freq):

def main():
parser = argparse.ArgumentParser(description="Parallelize singer runs by cutting the chromosome")
parser.add_argument('-Ne', type=float, required=True, help='Effective population size. Default: 1e4.')
parser.add_argument("-m", type=float, required=True, help="Mutation rate.")
parser.add_argument('-Ne', type=float, required=False, help='Effective population size. Default: 1e4.')
parser.add_argument("-m", type=float, required=False, help="Mutation rate.")
parser.add_argument("-mut_map", type=str, help="Filename for the mutation rate map.")
parser.add_argument("-ratio", type=float, default=1, help="Recombination to mutation ratio. Default: 1.")
parser.add_argument("-recomb_map", default=None, required=False, help="Filename for the recombination rate map.")
parser.add_argument("-L", type=int, default=int(1e6), help="Block length. Default: 1e6.")
parser.add_argument("-vcf", type=str, required=True, help="VCF file prefix (without .vcf or .vcf.gz extension).")
parser.add_argument("-output", type=str, required=True, help="Output file prefix.")
parser.add_argument("-n", type=int, required=True, help="Number of MCMC samples.")
parser.add_argument("-thin", type=int, required=True, help="Thinning interval length.")
parser.add_argument("-polar", type=float, default=0.5, required=False, help="Site flip probability. Default: 0.5.")
parser.add_argument("-freq", type=float, default=1, required=False, help="Convert to tskit every {freq} samples. Default: 1.")
parser.add_argument("-num_cores", type=int, default=20, required=False, help="Number of cores. Default: 20.")
parser.add_argument("-num_cores", type=int, default=20, required=False, help="Number of cores. Default: 20.")

if len(sys.argv) == 1:
parser.print_help(sys.stderr)
sys.exit(1)

args = parser.parse_args()

print("Parameters:")
print(f"Effective population size: {args.Ne}")
print(f"Effective population size: {args.Ne if args.Ne is not None else 'estimated'}")
print(f"Mutation rate: {args.m}")
print(f"Mutation map: {args.mut_map}")
print(f"Recombination to mutation ratio: {args.ratio}")
print(f"Recombination map: {args.recomb_map}")
print(f"Block length: {args.L}")
print(f"VCF file prefix: {args.vcf}")
print(f"Output file prefix: {args.output}")
Expand All @@ -79,7 +93,7 @@ def main():
print(f"Number of cores: {args.num_cores}")

index_vcf(args.vcf, args.L)
run_singer_in_parallel(args.vcf, args.output, args.m, args.ratio, args.L, args.n, args.thin, args.Ne, args.polar, args.num_cores)
run_singer_in_parallel(args.vcf, args.output, args.m, args.ratio, args.L, args.n, args.thin, args.Ne, args.polar, args.num_cores, args.recomb_map, args.mut_map)
convert_long_ARG(args.vcf, args.output, args.n, args.freq)

if __name__ == "__main__":
Expand Down
7 changes: 3 additions & 4 deletions SINGER/SINGER/singer_master
Original file line number Diff line number Diff line change
Expand Up @@ -293,16 +293,15 @@ def main():


if args.m:
if args.recomb_map:
parser.error("-recomb_map is not compatible with -m; you must use -mut_map")
if args.Ne < 0:
args.Ne = compute_Ne(args.vcf, args.start, args.end, args.m)
if (args.resume):
resume_rate_singer(args.Ne, args.m, args.m*args.ratio, args.start, args.end, args.vcf, args.output, args.n, args.thin, args.polar, args.seed)
else:
run_rate_singer(args.Ne, args.m, args.m*args.ratio, args.start, args.end, args.vcf, args.output, args.n, args.thin, args.polar, args.seed)
return

if args.mut_map and not args.m:
args.m = compute_average_rate(args.mut_map, args.start, args.end)
elif args.mut_map:
if not args.recomb_map:
parser.error("You must provide -recomb_map when using the -mut_map flag")

Expand Down