File tree Expand file tree Collapse file tree 4 files changed +96
-2
lines changed Expand file tree Collapse file tree 4 files changed +96
-2
lines changed Original file line number Diff line number Diff line change 1+ from glstring import flatten
2+ import argparse
3+
4+
5+ def main ():
6+ parser = argparse .ArgumentParser ()
7+ parser .add_argument ("-i" , "--infile" ,
8+ required = True ,
9+ help = "input file" ,
10+ type = str )
11+
12+ parser .add_argument ("-o" , "--outfile" ,
13+ required = True ,
14+ help = "output file" ,
15+ type = str )
16+ args = parser .parse_args ()
17+
18+ infile = args .infile
19+ outfile = args .outfile
20+ print ("reading from " , infile )
21+ print ("writing to " , outfile )
22+
23+ fin = open (infile , 'r' )
24+ fout = open (outfile , 'w' )
25+
26+ with fin as lines :
27+ for line in lines :
28+ (id , gl ) = line .rstrip ().split ('%' )
29+ fgl = flatten (gl )
30+ fout .write ('%' .join ([id , fgl ]) + '\n ' )
31+ fin .close ()
32+ fout .close ()
33+
34+
35+ if __name__ == '__main__' :
36+ """The following will be run if file is executed directly,
37+ but not if imported as a module"""
38+ main ()
Original file line number Diff line number Diff line change 1+ #! /bin/bash
2+ INDIR=/vol/bio/wmda_simulator/graph/PlanA
3+ OUTDIR=/vol/bio/wmda_simulator/graph/PlanA/flatgl
4+ for pop in AAFA_CARB AAFA_NAMER FILII_NAMER MENAFC_NAMER
5+ do
6+ for popcat in donor patient
7+ do
8+ INFILE=${INDIR} /${pop} _GraphVal_PlanA_${popcat} .in
9+ OUTFILE=${OUTDIR} /${pop} _GraphVal_PlanA_${popcat} .flat.gl
10+ python flatten_glstring.py -i ${INFILE} -o ${OUTFILE}
11+ done
12+ done
13+
14+
Original file line number Diff line number Diff line change 1+ # glstring
2+ # module for working with glstrings
3+
4+ import re
5+
6+
7+ # convert genotype ambiguity into allele ambiguity
8+
9+ # TODO: need to handle "^" character
10+
11+ def flatten (gls ):
12+ # if gls contains ^
13+ if re .search ("\^" , gls ):
14+ # loop over all loci
15+ return "^" .join (flatten_loc (g ) for g in gls .split ("^" ))
16+ else :
17+ return flatten_loc (gls )
18+
19+
20+ def flatten_loc (gls ):
21+ # if gls contains |
22+ if re .search ("\|" , gls ):
23+ # loop over all genos
24+ typ1 = dict ()
25+ typ2 = dict ()
26+ for geno in gls .split ("|" ):
27+ # split on +
28+ if not re .search ("\+" , geno ):
29+ print ("geno " , geno , " has no +" )
30+ t1 , t2 = geno .split ("+" )
31+ # add to hash1, hash2
32+ typ1 [t1 ]= 1
33+ typ2 [t2 ]= 1
34+
35+ # join keys by /
36+ newt1 = "/" .join (sorted (typ1 .keys ()))
37+ newt2 = "/" .join (sorted (typ2 .keys ()))
38+ # join these by +
39+ newgeno = "+" .join ([newt1 , newt2 ])
40+ return (newgeno )
41+ else :
42+ return (gls )
Original file line number Diff line number Diff line change 11# -*- coding: utf-8 -*-
22
33#
4- # seqann Sequence Annotation
5- # Copyright (c) 2017 Be The Match operated by National Marrow Donor Program. All Rights Reserved.
4+ # pyard pyARD
5+ # Copyright (c) 2018 Be The Match operated by National Marrow Donor Program. All Rights Reserved.
66#
77# This library is free software; you can redistribute it and/or modify it
88# under the terms of the GNU Lesser General Public License as published
You can’t perform that action at this time.
0 commit comments