|
| 1 | +#!/usr/bin/env python2 |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | + |
| 4 | +# bootstrap_hyde.py |
| 5 | +# Written by PD Blischak |
| 6 | + |
| 7 | +""" |
| 8 | +<< bootstrap_hyde.py >> |
| 9 | +
|
| 10 | +Bootstrap resample individuals within a putative hybrid population for a |
| 11 | +specified triple. |
| 12 | +
|
| 13 | +Arguments |
| 14 | +--------- |
| 15 | +
|
| 16 | + - infile <string> : name of the DNA sequence data file. |
| 17 | + - mapfile <string> : name of the taxon map file. |
| 18 | + - outgroup <string> : name of the outgroup. |
| 19 | + - triples <string> : name of the file containing triples for testing. |
| 20 | + - reps <int> : number of bootstrap replicates (default=100). |
| 21 | + - nind <int> : number of sampled individuals. |
| 22 | + - nsites <int> : number of sampled sites. |
| 23 | + - ntaxa <int> : number of sampled taxa/populations. |
| 24 | + - prefix <string> : name added to the beginning of output file. |
| 25 | + - quiet <flag> : suppress printing to stdout. |
| 26 | +
|
| 27 | +Output |
| 28 | +------ |
| 29 | +
|
| 30 | + Writes a file ('hyde-boot.txt') with the bootstrap results for each triple. These are Written |
| 31 | + in the same format as the normal hyde results file except that the bootstrap |
| 32 | + replicates for each triple are separated by four pound symbols and a new line |
| 33 | + "####\\n" (can be used to split the bootstrap replicates with the split() |
| 34 | + function). |
| 35 | +""" |
| 36 | + |
| 37 | +from __future__ import print_function |
| 38 | +import phyde as hd |
| 39 | +import argparse |
| 40 | +import sys |
| 41 | +import os |
| 42 | +import multiprocess as mp |
| 43 | + |
| 44 | +def parse_triples(triples_file): |
| 45 | + """ |
| 46 | + Parse a three column table or previous results file to get the names |
| 47 | + of the taxa that are going to be tested for hybridization. |
| 48 | +
|
| 49 | + Returns a list of three-tuples of the form (P1, Hybrid, P2) for all triples |
| 50 | + that are to be tested. |
| 51 | + """ |
| 52 | + triples = [] |
| 53 | + with open(triples_file) as f: |
| 54 | + lines = f.read().splitlines() |
| 55 | + # remove header information if reading in a previous results file |
| 56 | + if lines[0].split()[0] == "P1" and lines[0].split()[1] == "Hybrid" and lines[0].split()[2] == "P2": |
| 57 | + lines = lines[1:] |
| 58 | + # catch the case where the last line in the file is blank |
| 59 | + if len(lines[-1]) == 0: |
| 60 | + triples = [(l.split()[0], l.split()[1], l.split()[2]) for l in lines[:-1]] |
| 61 | + else: |
| 62 | + triples = [(l.split()[0], l.split()[1], l.split()[2]) for l in lines] |
| 63 | + return triples |
| 64 | + |
| 65 | +def write_boot(boot, triple, outfile): |
| 66 | + """ |
| 67 | + Write the current dictionary of bootstrapping output from bootstrap_triple() |
| 68 | + to the file passed as an argument to the function. |
| 69 | + """ |
| 70 | + print("P1\tHybrid\tP2\tZscore\tPvalue\tGamma\tAAAA\tAAAB\tAABA\tAABB\tAABC\tABAA\tABAB\tABAC\tABBA\tBAAA\tABBC\tCABC\tBACA\tBCAA\tABCD\n", end='', file=outfile) |
| 71 | + for v in boot.values(): |
| 72 | + print(triple[0], "\t", triple[1], "\t", triple[2], "\t", sep='', end='', file=outfile) |
| 73 | + print(v['Zscore'], "\t", sep='', end='', file=outfile) |
| 74 | + print(v['Pvalue'], "\t", sep='', end='', file=outfile) |
| 75 | + print(v['Gamma'], "\t", sep='', end='', file=outfile) |
| 76 | + print(v['AAAA'], "\t", sep='', end='', file=outfile) |
| 77 | + print(v['AAAB'], "\t", sep='', end='', file=outfile) |
| 78 | + print(v['AABA'], "\t", sep='', end='', file=outfile) |
| 79 | + print(v['AABB'], "\t", sep='', end='', file=outfile) |
| 80 | + print(v['AABC'], "\t", sep='', end='', file=outfile) |
| 81 | + print(v['ABAA'], "\t", sep='', end='', file=outfile) |
| 82 | + print(v['ABAB'], "\t", sep='', end='', file=outfile) |
| 83 | + print(v['ABAC'], "\t", sep='', end='', file=outfile) |
| 84 | + print(v['ABBA'], "\t", sep='', end='', file=outfile) |
| 85 | + print(v['BAAA'], "\t", sep='', end='', file=outfile) |
| 86 | + print(v['ABBC'], "\t", sep='', end='', file=outfile) |
| 87 | + print(v['CABC'], "\t", sep='', end='', file=outfile) |
| 88 | + print(v['BACA'], "\t", sep='', end='', file=outfile) |
| 89 | + print(v['BCAA'], "\t", sep='', end='', file=outfile) |
| 90 | + print(v['ABCD'], "\n", sep='', end='', file=outfile) |
| 91 | + |
| 92 | +if __name__ == "__main__": |
| 93 | + """ |
| 94 | + Runs the script. |
| 95 | + """ |
| 96 | + # print docstring if only the name of the script is given |
| 97 | + if len(sys.argv) < 2: |
| 98 | + print(__doc__) |
| 99 | + sys.exit(0) |
| 100 | + |
| 101 | + parser = argparse.ArgumentParser(description="Options for bootstrap_hyde.py", |
| 102 | + add_help=True) |
| 103 | + |
| 104 | + required = parser.add_argument_group("required arguments") |
| 105 | + required.add_argument('-i', '--infile', action="store", type=str, required=True, |
| 106 | + metavar='\b', help="name of the data input file") |
| 107 | + required.add_argument('-m', '--map', action="store", type=str, required=True, |
| 108 | + metavar='\b', help="map of individuals to taxa") |
| 109 | + required.add_argument('-o', '--outgroup', action="store", type=str, required=True, |
| 110 | + metavar='\b', help="name of the outgroup (only one accepted)") |
| 111 | + required.add_argument('-tr','--triples', action="store", type=str, required=True, |
| 112 | + metavar='\b', help="table of triples to be analyzed.") |
| 113 | + required.add_argument('-n', '--num_ind', action="store", type=int, required=True, |
| 114 | + metavar='\b', help="number of individuals in data matrix") |
| 115 | + required.add_argument('-t', '--num_taxa', action="store", type=int, required=True, |
| 116 | + metavar='\b', help="number of taxa (species, OTUs)") |
| 117 | + required.add_argument('-s', '--num_sites', action="store", type=int, required=True, |
| 118 | + metavar='\b', help="number of sites in the data matrix") |
| 119 | + |
| 120 | + |
| 121 | + additional = parser.add_argument_group("additional arguments") |
| 122 | + additional.add_argument('-r','--reps', action="store", type=int, default=100, |
| 123 | + metavar='\b', help="number of bootstrap replicates [default=100]") |
| 124 | + additional.add_argument('-j','--threads', action="store", type=int, default=mp.cpu_count(), |
| 125 | + metavar='\b', help="number of threads [default=all available]") |
| 126 | + additional.add_argument('--prefix', action="store", type=str, default="hyde", |
| 127 | + metavar='\b', help="prefix appended to output files [default=hyde]") |
| 128 | + additional.add_argument('-q', '--quiet', action="store_true", |
| 129 | + help="supress printing to stdout") |
| 130 | + |
| 131 | + args = parser.parse_args() |
| 132 | + infile = args.infile |
| 133 | + mapfile = args.map |
| 134 | + outgroup = args.outgroup |
| 135 | + triples = parse_triples(args.triples) |
| 136 | + nind = args.num_ind |
| 137 | + ntaxa = args.num_taxa |
| 138 | + nsites = args.num_sites |
| 139 | + reps = args.reps |
| 140 | + prefix = args.prefix |
| 141 | + quiet = args.quiet |
| 142 | + threads = args.threads |
| 143 | + |
| 144 | + if not quiet: print("\nCurrently using ", threads, " thread(s).", sep='') |
| 145 | + # Read data into a HydeData object |
| 146 | + data = hd.HydeData(infile, mapfile, outgroup, nind, ntaxa, nsites) |
| 147 | + |
| 148 | + if os.path.exists(prefix+"-boot.txt"): |
| 149 | + if not quiet: print("\n** Warning: File '"+prefix+"-boot.txt' already exists. **") |
| 150 | + if not quiet: print("** Renaming to 'old-"+prefix+"-boot.txt'. **\n") |
| 151 | + os.rename(prefix+"-boot.txt", "old-"+prefix+"-boot.txt") |
| 152 | + outfile = open(prefix+"-boot.txt", 'wa') |
| 153 | + else: |
| 154 | + outfile = open(prefix+"-boot.txt", 'wa') |
| 155 | + |
| 156 | + def wrap_test((p1, hyb, p2)): |
| 157 | + """ |
| 158 | + Wrapper function for running a hypothesis |
| 159 | + test on a given triple. |
| 160 | + """ |
| 161 | + res = {} |
| 162 | + res[(p1, hyb, p2)] = data.bootstrap_triple(p1, hyb, p2, reps) |
| 163 | + return res |
| 164 | + |
| 165 | + def mp_run(): |
| 166 | + """ |
| 167 | + Run tests on multiple threads. |
| 168 | + """ |
| 169 | + p = mp.Pool(threads) |
| 170 | + res = p.map(wrap_test, triples) |
| 171 | + return res |
| 172 | + |
| 173 | + # run the bootsrap replicates |
| 174 | + out = mp_run() |
| 175 | + counter = 0 |
| 176 | + for o in out: |
| 177 | + key = o.keys()[0] |
| 178 | + value = o.values()[0] |
| 179 | + write_boot(value, key, outfile) |
| 180 | + counter += 1 |
| 181 | + if counter != len(triples): print("####\n", end='', file=outfile) |
0 commit comments