Skip to content

Commit 7ec954f

Browse files
committed
code for flattening glstrings
1 parent 2c72440 commit 7ec954f

File tree

4 files changed

+96
-2
lines changed

4 files changed

+96
-2
lines changed

pyard/flatten_glstring.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
from glstring import flatten
2+
import argparse
3+
4+
5+
def main():
6+
parser = argparse.ArgumentParser()
7+
parser.add_argument("-i", "--infile",
8+
required=True,
9+
help="input file",
10+
type=str)
11+
12+
parser.add_argument("-o", "--outfile",
13+
required=True,
14+
help="output file",
15+
type=str)
16+
args = parser.parse_args()
17+
18+
infile = args.infile
19+
outfile = args.outfile
20+
print("reading from ", infile)
21+
print("writing to ", outfile)
22+
23+
fin = open(infile, 'r')
24+
fout = open(outfile, 'w')
25+
26+
with fin as lines:
27+
for line in lines:
28+
(id, gl) = line.rstrip().split('%')
29+
fgl = flatten(gl)
30+
fout.write('%'.join([id, fgl]) + '\n')
31+
fin.close()
32+
fout.close()
33+
34+
35+
if __name__ == '__main__':
36+
"""The following will be run if file is executed directly,
37+
but not if imported as a module"""
38+
main()

pyard/flatten_glstring.sh

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/bin/bash
2+
INDIR=/vol/bio/wmda_simulator/graph/PlanA
3+
OUTDIR=/vol/bio/wmda_simulator/graph/PlanA/flatgl
4+
for pop in AAFA_CARB AAFA_NAMER FILII_NAMER MENAFC_NAMER
5+
do
6+
for popcat in donor patient
7+
do
8+
INFILE=${INDIR}/${pop}_GraphVal_PlanA_${popcat}.in
9+
OUTFILE=${OUTDIR}/${pop}_GraphVal_PlanA_${popcat}.flat.gl
10+
python flatten_glstring.py -i ${INFILE} -o ${OUTFILE}
11+
done
12+
done
13+
14+

pyard/glstring.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# glstring
2+
# module for working with glstrings
3+
4+
import re
5+
6+
7+
# convert genotype ambiguity into allele ambiguity
8+
9+
# TODO: need to handle "^" character
10+
11+
def flatten (gls):
12+
# if gls contains ^
13+
if re.search("\^", gls):
14+
# loop over all loci
15+
return "^".join(flatten_loc(g) for g in gls.split("^"))
16+
else:
17+
return flatten_loc(gls)
18+
19+
20+
def flatten_loc (gls):
21+
# if gls contains |
22+
if re.search("\|", gls):
23+
# loop over all genos
24+
typ1 = dict()
25+
typ2 = dict()
26+
for geno in gls.split("|"):
27+
# split on +
28+
if not re.search("\+", geno):
29+
print("geno ", geno, " has no +")
30+
t1, t2 = geno.split("+")
31+
# add to hash1, hash2
32+
typ1[t1]=1
33+
typ2[t2]=1
34+
35+
# join keys by /
36+
newt1 = "/".join(sorted(typ1.keys()))
37+
newt2 = "/".join(sorted(typ2.keys()))
38+
# join these by +
39+
newgeno = "+".join([newt1, newt2])
40+
return(newgeno)
41+
else:
42+
return (gls)

pyard/util.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# -*- coding: utf-8 -*-
22

33
#
4-
# seqann Sequence Annotation
5-
# Copyright (c) 2017 Be The Match operated by National Marrow Donor Program. All Rights Reserved.
4+
# pyard pyARD
5+
# Copyright (c) 2018 Be The Match operated by National Marrow Donor Program. All Rights Reserved.
66
#
77
# This library is free software; you can redistribute it and/or modify it
88
# under the terms of the GNU Lesser General Public License as published

0 commit comments

Comments
 (0)