Skip to content

Commit 844ea0d

Browse files
committed
Merge branch 'usepopinfo'
2 parents a366038 + faad1c0 commit 844ea0d

File tree

3 files changed

+96
-48
lines changed

3 files changed

+96
-48
lines changed

.travis.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
language: python
22

3+
os:
4+
- linux
5+
36
python:
47
- "3.5"
58

structure_threader/plotter/structplot.py

Lines changed: 92 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,66 @@
1616
# along with structure_threader. If not, see <http://www.gnu.org/licenses/>.
1717

1818

19+
import os
20+
from collections import Counter, defaultdict
21+
import numpy as np
1922
import matplotlib
2023
matplotlib.use('Agg')
21-
2224
import matplotlib.pyplot as plt
23-
import numpy as np
24-
from collections import Counter
25-
import os
25+
26+
27+
def parse_usepopinfo(fhandle, end_string):
28+
"""
29+
Parses Structure results when using the USEPOPINFO flag.
30+
"""
31+
qvalues = np.array([])
32+
poplist = []
33+
# Skip subheader
34+
next(fhandle)
35+
for line in fhandle:
36+
if line.strip() != "":
37+
if line.strip().lower().startswith(end_string):
38+
return qvalues, poplist
39+
40+
qvalues_dic = defaultdict(float)
41+
fields = line.strip().split("|")[:-1]
42+
# Assumed pop
43+
qvalues_dic[fields[0].split()[3]] = float(fields[0].split()[5])
44+
# Other pops
45+
for pop in fields[1:]:
46+
prob = sum(map(float, pop.split()[-3:]))
47+
qvalues_dic[pop.split()[1][:-1]] = prob
48+
clv = []
49+
for percents in sorted(list(map(int, list(qvalues_dic.keys())))):
50+
clv.append(qvalues_dic[str(percents)])
51+
try:
52+
qvalues = np.vstack((qvalues, clv))
53+
except ValueError:
54+
qvalues = np.array(clv)
55+
poplist.append(int(fields[0].split()[3]))
56+
57+
58+
def parse_nousepopinfo(fhandle, end_string):
59+
"""
60+
Parses Structure results when **not** using the USEPOPINFO flag.
61+
"""
62+
qvalues = np.array([])
63+
poplist = []
64+
65+
for line in fhandle:
66+
if line.strip() != "":
67+
if line.strip().lower().startswith(end_string):
68+
return qvalues, poplist
69+
70+
fields = line.strip().split()
71+
# Get cluster values
72+
clv = [float(x) for x in fields[5:]]
73+
try:
74+
qvalues = np.vstack((qvalues, clv))
75+
except ValueError:
76+
qvalues = np.array(clv)
77+
# Get population
78+
poplist.append(int(fields[3]))
2679

2780

2881
def dataminer(indfile_name, fmt, popfile=None):
@@ -52,43 +105,34 @@ def dataminer(indfile_name, fmt, popfile=None):
52105
[x[2] for x in poparray])]
53106

54107
# Parse structure/faststructure output file
55-
if fmt == "fastStructure":
108+
if fmt == "fastStructure": # fastStructure
56109
qvalues = np.genfromtxt(indfile_name)
57110

58-
else:
59-
qvalues = np.array([])
111+
else: # STRUCTURE
112+
parsing_string = "inferred ancestry of individuals:"
113+
popinfo_string = ("probability of being from assumed population | " +
114+
"prob of other pops")
115+
end_parsing_string = "estimated allele frequencies in each cluster"
60116

61-
# Start file parsing
62-
parse = False
63117
with open(indfile_name) as file_handle:
64118
for line in file_handle:
65-
if line.strip().lower().startswith("inferred ancestry of "
66-
"individuals:"):
67-
# Enter parse mode ON
68-
parse = True
69-
# Skip subheader
70-
next(file_handle)
71-
elif line.strip().lower().startswith("estimated allele "
72-
"frequencies in each "
73-
"cluster"):
74-
# parse mode OFF
75-
parse = False
76-
elif parse:
77-
if line.strip() != "":
78-
fields = line.strip().split()
79-
# Get cluster values
80-
cl = [float(x) for x in fields[5:]]
81-
try:
82-
qvalues = np.vstack((qvalues, cl))
83-
except ValueError:
84-
qvalues = np.array(cl)
85-
if not popfile:
86-
# Get population
87-
poplist.append(int(fields[3]))
119+
if line.strip().lower().startswith(parsing_string):
120+
if next(file_handle).lower().startswith(popinfo_string):
121+
qvalues, numlist = parse_usepopinfo(file_handle,
122+
end_parsing_string)
123+
break
124+
else:
125+
qvalues, numlist = parse_nousepopinfo(file_handle,
126+
end_parsing_string)
127+
break
128+
#if not popfile:
129+
#poplist = numlist
130+
88131

89132
if not popfile:
90133
# Transform poplist in convenient format, in which each element
91134
# is the boundary of a population in the x-axis
135+
poplist = numlist
92136
poplist = Counter(poplist)
93137
poplist = [([x]*y, None) for x, y in poplist.items()]
94138

@@ -159,7 +203,7 @@ def plotter(qvalues, poplist, outfile):
159203
plt.rcParams["figure.figsize"] = (8 * numinds * .01, 2.64)
160204

161205
fig = plt.figure()
162-
ax = fig.add_subplot(111, xlim=(0, numinds), ylim=(0, 1))
206+
axe = fig.add_subplot(111, xlim=(0, numinds), ylim=(0, 1))
163207

164208
for i in range(qvalues.shape[1]):
165209
# Get bar color. If K exceeds the 12 colors, generate random color
@@ -169,19 +213,19 @@ def plotter(qvalues, poplist, outfile):
169213
clr = np.random.rand(3, 1)
170214

171215
if i == 0:
172-
ax.bar(range(numinds), qvalues[:, i], facecolor=clr,
173-
edgecolor="none", width=1)
216+
axe.bar(range(numinds), qvalues[:, i], facecolor=clr,
217+
edgecolor="none", width=1)
174218
former_q = qvalues[:, i]
175219
else:
176-
ax.bar(range(numinds), qvalues[:, i], bottom=former_q,
177-
facecolor=clr, edgecolor="none", width=1)
220+
axe.bar(range(numinds), qvalues[:, i], bottom=former_q,
221+
facecolor=clr, edgecolor="none", width=1)
178222
former_q = former_q + qvalues[:, i]
179223

180224
# Annotate population info
181225
if poplist:
182226
for i in zip(np.cumsum([len(x[0]) for x in poplist]), poplist):
183227
orderings = [(x, y[0][0]) for x, y in
184-
zip(np.cumsum([len(x[0]) for x in poplist]), poplist)]
228+
zip(np.cumsum([len(x[0]) for x in poplist]), poplist)]
185229
count = 1
186230
for ppl, vals in enumerate(orderings):
187231

@@ -194,19 +238,20 @@ def plotter(qvalues, poplist, outfile):
194238
else vals[0] / 2
195239

196240
# Draw text
197-
ax.text(xpos, -0.05, vals[1] if vals[1] else "Pop{}".format(count),
198-
rotation=45, va="top", ha="right", fontsize=6,
199-
weight="bold")
241+
axe.text(xpos, -0.05, vals[1] if vals[1] else "Pop{}".format(count),
242+
rotation=45, va="top", ha="right", fontsize=6,
243+
weight="bold")
200244
count += 1
201245

202246
for axis in ["top", "bottom", "left", "right"]:
203-
ax.spines[axis].set_linewidth(2)
204-
ax.spines[axis].set_color("black")
247+
axe.spines[axis].set_linewidth(2)
248+
axe.spines[axis].set_color("black")
205249

206250
plt.yticks([])
207251
plt.xticks([])
208252

209-
plt.savefig("{}.pdf".format(outfile), bbox_inches="tight")
253+
254+
plt.savefig("{}.svg".format(outfile), bbox_inches="tight")
210255

211256

212257
def main(result_files, fmt, outdir, popfile=None):
@@ -225,6 +270,6 @@ def main(result_files, fmt, outdir, popfile=None):
225270
if __name__ == "__main__":
226271
from sys import argv
227272
# Usage: structplot.py results_file format outdir
228-
datafile = []
229-
datafile.append(argv[1])
230-
main(datafile, argv[2], argv[3], argv[4])
273+
DATAFILES = []
274+
DATAFILES.append(argv[1])
275+
main(DATAFILES, argv[2], argv[3])

structure_threader/structure_threader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
def gracious_exit(*args):
3838
"""Graciously exit the program."""
3939
print("\rExiting graciously, murdering child processes and cleaning output"
40-
" directory", end="")
40+
" directory.", end="")
4141
os.chdir(CWD)
4242
sys.exit(0)
4343

0 commit comments

Comments
 (0)