1616# along with structure_threader. If not, see <http://www.gnu.org/licenses/>.
1717
1818
19+ import os
20+ from collections import Counter , defaultdict
21+ import numpy as np
1922import matplotlib
2023matplotlib .use ('Agg' )
21-
2224import matplotlib .pyplot as plt
23- import numpy as np
24- from collections import Counter
25- import os
25+
26+
27+ def parse_usepopinfo (fhandle , end_string ):
28+ """
29+ Parses Structure results when using the USEPOPINFO flag.
30+ """
31+ qvalues = np .array ([])
32+ poplist = []
33+ # Skip subheader
34+ next (fhandle )
35+ for line in fhandle :
36+ if line .strip () != "" :
37+ if line .strip ().lower ().startswith (end_string ):
38+ return qvalues , poplist
39+
40+ qvalues_dic = defaultdict (float )
41+ fields = line .strip ().split ("|" )[:- 1 ]
42+ # Assumed pop
43+ qvalues_dic [fields [0 ].split ()[3 ]] = float (fields [0 ].split ()[5 ])
44+ # Other pops
45+ for pop in fields [1 :]:
46+ prob = sum (map (float , pop .split ()[- 3 :]))
47+ qvalues_dic [pop .split ()[1 ][:- 1 ]] = prob
48+ clv = []
49+ for percents in sorted (list (map (int , list (qvalues_dic .keys ())))):
50+ clv .append (qvalues_dic [str (percents )])
51+ try :
52+ qvalues = np .vstack ((qvalues , clv ))
53+ except ValueError :
54+ qvalues = np .array (clv )
55+ poplist .append (int (fields [0 ].split ()[3 ]))
56+
57+
58+ def parse_nousepopinfo (fhandle , end_string ):
59+ """
60+ Parses Structure results when **not** using the USEPOPINFO flag.
61+ """
62+ qvalues = np .array ([])
63+ poplist = []
64+
65+ for line in fhandle :
66+ if line .strip () != "" :
67+ if line .strip ().lower ().startswith (end_string ):
68+ return qvalues , poplist
69+
70+ fields = line .strip ().split ()
71+ # Get cluster values
72+ clv = [float (x ) for x in fields [5 :]]
73+ try :
74+ qvalues = np .vstack ((qvalues , clv ))
75+ except ValueError :
76+ qvalues = np .array (clv )
77+ # Get population
78+ poplist .append (int (fields [3 ]))
2679
2780
2881def dataminer (indfile_name , fmt , popfile = None ):
@@ -52,43 +105,34 @@ def dataminer(indfile_name, fmt, popfile=None):
52105 [x [2 ] for x in poparray ])]
53106
54107 # Parse structure/faststructure output file
55- if fmt == "fastStructure" :
108+ if fmt == "fastStructure" : # fastStructure
56109 qvalues = np .genfromtxt (indfile_name )
57110
58- else :
59- qvalues = np .array ([])
111+ else : # STRUCTURE
112+ parsing_string = "inferred ancestry of individuals:"
113+ popinfo_string = ("probability of being from assumed population | " +
114+ "prob of other pops" )
115+ end_parsing_string = "estimated allele frequencies in each cluster"
60116
61- # Start file parsing
62- parse = False
63117 with open (indfile_name ) as file_handle :
64118 for line in file_handle :
65- if line .strip ().lower ().startswith ("inferred ancestry of "
66- "individuals:" ):
67- # Enter parse mode ON
68- parse = True
69- # Skip subheader
70- next (file_handle )
71- elif line .strip ().lower ().startswith ("estimated allele "
72- "frequencies in each "
73- "cluster" ):
74- # parse mode OFF
75- parse = False
76- elif parse :
77- if line .strip () != "" :
78- fields = line .strip ().split ()
79- # Get cluster values
80- cl = [float (x ) for x in fields [5 :]]
81- try :
82- qvalues = np .vstack ((qvalues , cl ))
83- except ValueError :
84- qvalues = np .array (cl )
85- if not popfile :
86- # Get population
87- poplist .append (int (fields [3 ]))
119+ if line .strip ().lower ().startswith (parsing_string ):
120+ if next (file_handle ).lower ().startswith (popinfo_string ):
121+ qvalues , numlist = parse_usepopinfo (file_handle ,
122+ end_parsing_string )
123+ break
124+ else :
125+ qvalues , numlist = parse_nousepopinfo (file_handle ,
126+ end_parsing_string )
127+ break
128+ #if not popfile:
129+ #poplist = numlist
130+
88131
89132 if not popfile :
90133 # Transform poplist in convenient format, in which each element
91134 # is the boundary of a population in the x-axis
135+ poplist = numlist
92136 poplist = Counter (poplist )
93137 poplist = [([x ]* y , None ) for x , y in poplist .items ()]
94138
@@ -159,7 +203,7 @@ def plotter(qvalues, poplist, outfile):
159203 plt .rcParams ["figure.figsize" ] = (8 * numinds * .01 , 2.64 )
160204
161205 fig = plt .figure ()
162- ax = fig .add_subplot (111 , xlim = (0 , numinds ), ylim = (0 , 1 ))
206+ axe = fig .add_subplot (111 , xlim = (0 , numinds ), ylim = (0 , 1 ))
163207
164208 for i in range (qvalues .shape [1 ]):
165209 # Get bar color. If K exceeds the 12 colors, generate random color
@@ -169,19 +213,19 @@ def plotter(qvalues, poplist, outfile):
169213 clr = np .random .rand (3 , 1 )
170214
171215 if i == 0 :
172- ax .bar (range (numinds ), qvalues [:, i ], facecolor = clr ,
173- edgecolor = "none" , width = 1 )
216+ axe .bar (range (numinds ), qvalues [:, i ], facecolor = clr ,
217+ edgecolor = "none" , width = 1 )
174218 former_q = qvalues [:, i ]
175219 else :
176- ax .bar (range (numinds ), qvalues [:, i ], bottom = former_q ,
177- facecolor = clr , edgecolor = "none" , width = 1 )
220+ axe .bar (range (numinds ), qvalues [:, i ], bottom = former_q ,
221+ facecolor = clr , edgecolor = "none" , width = 1 )
178222 former_q = former_q + qvalues [:, i ]
179223
180224 # Annotate population info
181225 if poplist :
182226 for i in zip (np .cumsum ([len (x [0 ]) for x in poplist ]), poplist ):
183227 orderings = [(x , y [0 ][0 ]) for x , y in
184- zip (np .cumsum ([len (x [0 ]) for x in poplist ]), poplist )]
228+ zip (np .cumsum ([len (x [0 ]) for x in poplist ]), poplist )]
185229 count = 1
186230 for ppl , vals in enumerate (orderings ):
187231
@@ -194,19 +238,20 @@ def plotter(qvalues, poplist, outfile):
194238 else vals [0 ] / 2
195239
196240 # Draw text
197- ax .text (xpos , - 0.05 , vals [1 ] if vals [1 ] else "Pop{}" .format (count ),
198- rotation = 45 , va = "top" , ha = "right" , fontsize = 6 ,
199- weight = "bold" )
241+ axe .text (xpos , - 0.05 , vals [1 ] if vals [1 ] else "Pop{}" .format (count ),
242+ rotation = 45 , va = "top" , ha = "right" , fontsize = 6 ,
243+ weight = "bold" )
200244 count += 1
201245
202246 for axis in ["top" , "bottom" , "left" , "right" ]:
203- ax .spines [axis ].set_linewidth (2 )
204- ax .spines [axis ].set_color ("black" )
247+ axe .spines [axis ].set_linewidth (2 )
248+ axe .spines [axis ].set_color ("black" )
205249
206250 plt .yticks ([])
207251 plt .xticks ([])
208252
209- plt .savefig ("{}.pdf" .format (outfile ), bbox_inches = "tight" )
253+
254+ plt .savefig ("{}.svg" .format (outfile ), bbox_inches = "tight" )
210255
211256
212257def main (result_files , fmt , outdir , popfile = None ):
@@ -225,6 +270,6 @@ def main(result_files, fmt, outdir, popfile=None):
225270if __name__ == "__main__" :
226271 from sys import argv
227272 # Usage: structplot.py results_file format outdir
228- datafile = []
229- datafile .append (argv [1 ])
230- main (datafile , argv [2 ], argv [3 ], argv [ 4 ])
273+ DATAFILES = []
274+ DATAFILES .append (argv [1 ])
275+ main (DATAFILES , argv [2 ], argv [3 ])
0 commit comments