Skip to content

Commit cd0fd15

Browse files
committed
made individual and bootstrapping scripts multithreaded; updating docs and other code accordingly
1 parent 91aeae7 commit cd0fd15

File tree

8 files changed

+388
-25
lines changed

8 files changed

+388
-25
lines changed

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ before_install:
88
- sudo apt-get update -qq
99
- sudo apt-get install g++-4.8 gcc -y -qq
1010
- make CXX=g++-4.8
11-
- pip install numpy pandas scipy cython matplotlib seaborn multiprocessing
11+
- pip install numpy pandas scipy cython matplotlib seaborn multiprocess
1212

1313
install:
1414
- python setup.py install

CHANGELOG.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@
22

33
## v0.3.3
44

5-
**New multithreaded script**
5+
**New multithreaded scripts**
66

7-
- The `run_hyde.py` script now has a multithreaded version: `run_hyde_mp.py`.
8-
It works best for data sets with a lot of species.
7+
- All of the `*_hyde.py` scripts now have multithreaded versions:
8+
`run_hyde_mp.py`, `individual_hyde_mp.py`, and `bootstrap_hyde_mp.py`.
9+
Parallelization with these scripts works best with data sets that have a lot of species.
10+
We use the `multiprocess` module, which will need to be installed: `pip install multiprocess`.
911

10-
- We also added a quiet option (`-q` or `--quiet`) to supress printing to stdout
12+
- We also added a quiet option (`-q` or `--quiet`) to suppress printing to stdout
1113
while the program is running.
1214

1315
## v0.3.2

README.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,11 @@ If you only have a few hypotheses that you want to test, then you can also pass
5454
a triples file to ``run_hyde.py`` and it will only test those hypotheses rather than
5555
testing everything.
5656

57+
Multithreaded versions of these scripts are also available (``run_hyde_mp.py``,
58+
``individual_hyde_mp.py``, and ``bootstrap_hyde_mp.py``).
59+
Make sure you have the ``multiprocess`` module installed before you use them:
60+
``pip install multiprocess``.
61+
5762
Getting help
5863
------------
5964

docs/conf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,9 @@
5555
# built documents.
5656
#
5757
# The short X.Y version.
58-
version = u'0.3.0'
58+
version = u'0.3.3'
5959
# The full version, including alpha/beta/rc tags.
60-
release = u'0.3.0'
60+
release = u'0.3.3'
6161

6262
# The language for content autogenerated by Sphinx. Refer to documentation
6363
# for a list of supported languages.

scripts/bootstrap_hyde_mp.py

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
#!/usr/bin/env python2
2+
# -*- coding: utf-8 -*-
3+
4+
# bootstrap_hyde.py
5+
# Written by PD Blischak
6+
7+
"""
8+
<< bootstrap_hyde.py >>
9+
10+
Bootstrap resample individuals within a putative hybrid population for a
11+
specified triple.
12+
13+
Arguments
14+
---------
15+
16+
- infile <string> : name of the DNA sequence data file.
17+
- mapfile <string> : name of the taxon map file.
18+
- outgroup <string> : name of the outgroup.
19+
- triples <string> : name of the file containing triples for testing.
20+
- reps <int> : number of bootstrap replicates (default=100).
21+
- nind <int> : number of sampled individuals.
22+
- nsites <int> : number of sampled sites.
23+
- ntaxa <int> : number of sampled taxa/populations.
24+
- prefix <string> : name added to the beginning of output file.
25+
- quiet <flag> : suppress printing to stdout.
26+
27+
Output
28+
------
29+
30+
Writes a file ('hyde-boot.txt') with the bootstrap results for each triple. These are Written
31+
in the same format as the normal hyde results file except that the bootstrap
32+
replicates for each triple are separated by four pound symbols and a new line
33+
"####\\n" (can be used to split the bootstrap replicates with the split()
34+
function).
35+
"""
36+
37+
from __future__ import print_function
38+
import phyde as hd
39+
import argparse
40+
import sys
41+
import os
42+
import multiprocess as mp
43+
44+
def parse_triples(triples_file):
45+
"""
46+
Parse a three column table or previous results file to get the names
47+
of the taxa that are going to be tested for hybridization.
48+
49+
Returns a list of three-tuples of the form (P1, Hybrid, P2) for all triples
50+
that are to be tested.
51+
"""
52+
triples = []
53+
with open(triples_file) as f:
54+
lines = f.read().splitlines()
55+
# remove header information if reading in a previous results file
56+
if lines[0].split()[0] == "P1" and lines[0].split()[1] == "Hybrid" and lines[0].split()[2] == "P2":
57+
lines = lines[1:]
58+
# catch the case where the last line in the file is blank
59+
if len(lines[-1]) == 0:
60+
triples = [(l.split()[0], l.split()[1], l.split()[2]) for l in lines[:-1]]
61+
else:
62+
triples = [(l.split()[0], l.split()[1], l.split()[2]) for l in lines]
63+
return triples
64+
65+
def write_boot(boot, triple, outfile):
66+
"""
67+
Write the current dictionary of bootstrapping output from bootstrap_triple()
68+
to the file passed as an argument to the function.
69+
"""
70+
print("P1\tHybrid\tP2\tZscore\tPvalue\tGamma\tAAAA\tAAAB\tAABA\tAABB\tAABC\tABAA\tABAB\tABAC\tABBA\tBAAA\tABBC\tCABC\tBACA\tBCAA\tABCD\n", end='', file=outfile)
71+
for v in boot.values():
72+
print(triple[0], "\t", triple[1], "\t", triple[2], "\t", sep='', end='', file=outfile)
73+
print(v['Zscore'], "\t", sep='', end='', file=outfile)
74+
print(v['Pvalue'], "\t", sep='', end='', file=outfile)
75+
print(v['Gamma'], "\t", sep='', end='', file=outfile)
76+
print(v['AAAA'], "\t", sep='', end='', file=outfile)
77+
print(v['AAAB'], "\t", sep='', end='', file=outfile)
78+
print(v['AABA'], "\t", sep='', end='', file=outfile)
79+
print(v['AABB'], "\t", sep='', end='', file=outfile)
80+
print(v['AABC'], "\t", sep='', end='', file=outfile)
81+
print(v['ABAA'], "\t", sep='', end='', file=outfile)
82+
print(v['ABAB'], "\t", sep='', end='', file=outfile)
83+
print(v['ABAC'], "\t", sep='', end='', file=outfile)
84+
print(v['ABBA'], "\t", sep='', end='', file=outfile)
85+
print(v['BAAA'], "\t", sep='', end='', file=outfile)
86+
print(v['ABBC'], "\t", sep='', end='', file=outfile)
87+
print(v['CABC'], "\t", sep='', end='', file=outfile)
88+
print(v['BACA'], "\t", sep='', end='', file=outfile)
89+
print(v['BCAA'], "\t", sep='', end='', file=outfile)
90+
print(v['ABCD'], "\n", sep='', end='', file=outfile)
91+
92+
if __name__ == "__main__":
93+
"""
94+
Runs the script.
95+
"""
96+
# print docstring if only the name of the script is given
97+
if len(sys.argv) < 2:
98+
print(__doc__)
99+
sys.exit(0)
100+
101+
parser = argparse.ArgumentParser(description="Options for bootstrap_hyde.py",
102+
add_help=True)
103+
104+
required = parser.add_argument_group("required arguments")
105+
required.add_argument('-i', '--infile', action="store", type=str, required=True,
106+
metavar='\b', help="name of the data input file")
107+
required.add_argument('-m', '--map', action="store", type=str, required=True,
108+
metavar='\b', help="map of individuals to taxa")
109+
required.add_argument('-o', '--outgroup', action="store", type=str, required=True,
110+
metavar='\b', help="name of the outgroup (only one accepted)")
111+
required.add_argument('-tr','--triples', action="store", type=str, required=True,
112+
metavar='\b', help="table of triples to be analyzed.")
113+
required.add_argument('-n', '--num_ind', action="store", type=int, required=True,
114+
metavar='\b', help="number of individuals in data matrix")
115+
required.add_argument('-t', '--num_taxa', action="store", type=int, required=True,
116+
metavar='\b', help="number of taxa (species, OTUs)")
117+
required.add_argument('-s', '--num_sites', action="store", type=int, required=True,
118+
metavar='\b', help="number of sites in the data matrix")
119+
120+
121+
additional = parser.add_argument_group("additional arguments")
122+
additional.add_argument('-r','--reps', action="store", type=int, default=100,
123+
metavar='\b', help="number of bootstrap replicates [default=100]")
124+
additional.add_argument('-j','--threads', action="store", type=int, default=mp.cpu_count(),
125+
metavar='\b', help="number of threads [default=all available]")
126+
additional.add_argument('--prefix', action="store", type=str, default="hyde",
127+
metavar='\b', help="prefix appended to output files [default=hyde]")
128+
additional.add_argument('-q', '--quiet', action="store_true",
129+
help="supress printing to stdout")
130+
131+
args = parser.parse_args()
132+
infile = args.infile
133+
mapfile = args.map
134+
outgroup = args.outgroup
135+
triples = parse_triples(args.triples)
136+
nind = args.num_ind
137+
ntaxa = args.num_taxa
138+
nsites = args.num_sites
139+
reps = args.reps
140+
prefix = args.prefix
141+
quiet = args.quiet
142+
threads = args.threads
143+
144+
if not quiet: print("\nCurrently using ", threads, " thread(s).", sep='')
145+
# Read data into a HydeData object
146+
data = hd.HydeData(infile, mapfile, outgroup, nind, ntaxa, nsites)
147+
148+
if os.path.exists(prefix+"-boot.txt"):
149+
if not quiet: print("\n** Warning: File '"+prefix+"-boot.txt' already exists. **")
150+
if not quiet: print("** Renaming to 'old-"+prefix+"-boot.txt'. **\n")
151+
os.rename(prefix+"-boot.txt", "old-"+prefix+"-boot.txt")
152+
outfile = open(prefix+"-boot.txt", 'wa')
153+
else:
154+
outfile = open(prefix+"-boot.txt", 'wa')
155+
156+
def wrap_test((p1, hyb, p2)):
157+
"""
158+
Wrapper function for running a hypothesis
159+
test on a given triple.
160+
"""
161+
res = {}
162+
res[(p1, hyb, p2)] = data.bootstrap_triple(p1, hyb, p2, reps)
163+
return res
164+
165+
def mp_run():
166+
"""
167+
Run tests on multiple threads.
168+
"""
169+
p = mp.Pool(threads)
170+
res = p.map(wrap_test, triples)
171+
return res
172+
173+
# run the bootsrap replicates
174+
out = mp_run()
175+
counter = 0
176+
for o in out:
177+
key = o.keys()[0]
178+
value = o.values()[0]
179+
write_boot(value, key, outfile)
180+
counter += 1
181+
if counter != len(triples): print("####\n", end='', file=outfile)

0 commit comments

Comments
 (0)