Skip to content

Commit a81b6ed

Browse files
G2gl support added
G2gl added
2 parents bafb5ff + 7ec954f commit a81b6ed

File tree

6 files changed

+137
-3
lines changed

6 files changed

+137
-3
lines changed

pyard/flatten_glstring.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
from glstring import flatten
2+
import argparse
3+
4+
5+
def main():
6+
parser = argparse.ArgumentParser()
7+
parser.add_argument("-i", "--infile",
8+
required=True,
9+
help="input file",
10+
type=str)
11+
12+
parser.add_argument("-o", "--outfile",
13+
required=True,
14+
help="output file",
15+
type=str)
16+
args = parser.parse_args()
17+
18+
infile = args.infile
19+
outfile = args.outfile
20+
print("reading from ", infile)
21+
print("writing to ", outfile)
22+
23+
fin = open(infile, 'r')
24+
fout = open(outfile, 'w')
25+
26+
with fin as lines:
27+
for line in lines:
28+
(id, gl) = line.rstrip().split('%')
29+
fgl = flatten(gl)
30+
fout.write('%'.join([id, fgl]) + '\n')
31+
fin.close()
32+
fout.close()
33+
34+
35+
if __name__ == '__main__':
36+
"""The following will be run if file is executed directly,
37+
but not if imported as a module"""
38+
main()

pyard/flatten_glstring.sh

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/bin/bash
2+
INDIR=/vol/bio/wmda_simulator/graph/PlanA
3+
OUTDIR=/vol/bio/wmda_simulator/graph/PlanA/flatgl
4+
for pop in AAFA_CARB AAFA_NAMER FILII_NAMER MENAFC_NAMER
5+
do
6+
for popcat in donor patient
7+
do
8+
INFILE=${INDIR}/${pop}_GraphVal_PlanA_${popcat}.in
9+
OUTFILE=${OUTDIR}/${pop}_GraphVal_PlanA_${popcat}.flat.gl
10+
python flatten_glstring.py -i ${INFILE} -o ${OUTFILE}
11+
done
12+
done
13+
14+

pyard/glstring.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# glstring
2+
# module for working with glstrings
3+
4+
import re
5+
6+
7+
# convert genotype ambiguity into allele ambiguity
8+
9+
# TODO: need to handle "^" character
10+
11+
def flatten (gls):
12+
# if gls contains ^
13+
if re.search("\^", gls):
14+
# loop over all loci
15+
return "^".join(flatten_loc(g) for g in gls.split("^"))
16+
else:
17+
return flatten_loc(gls)
18+
19+
20+
def flatten_loc (gls):
21+
# if gls contains |
22+
if re.search("\|", gls):
23+
# loop over all genos
24+
typ1 = dict()
25+
typ2 = dict()
26+
for geno in gls.split("|"):
27+
# split on +
28+
if not re.search("\+", geno):
29+
print("geno ", geno, " has no +")
30+
t1, t2 = geno.split("+")
31+
# add to hash1, hash2
32+
typ1[t1]=1
33+
typ2[t2]=1
34+
35+
# join keys by /
36+
newt1 = "/".join(sorted(typ1.keys()))
37+
newt2 = "/".join(sorted(typ2.keys()))
38+
# join these by +
39+
newgeno = "+".join([newt1, newt2])
40+
return(newgeno)
41+
else:
42+
return (gls)

pyard/pyard.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,7 @@ def redux(self, allele: str, ars_type: str) -> str:
355355
:return: ARS reduced allele
356356
:rtype: str
357357
"""
358+
358359
if re.search("HLA-", allele):
359360
hla, allele_name = allele.split("-")
360361
return "-".join(["HLA", self.redux(allele_name, ars_type)])
@@ -389,6 +390,9 @@ def redux_gl(self, glstring: str, redux_type: str) -> str:
389390
:rtype: str
390391
"""
391392

393+
if not self.isvalid_gl(glstring):
394+
return ""
395+
392396
if re.search("\^", glstring):
393397
return "^".join(sorted(set([self.redux_gl(a, redux_type) for a in glstring.split("^")]), key=functools.cmp_to_key(loci_sort)))
394398

@@ -429,6 +433,42 @@ def redux_gl(self, glstring: str, redux_type: str) -> str:
429433
return self.redux_gl("/".join(sorted(alleles, key=functools.cmp_to_key(loci_sort))), redux_type)
430434
return self.redux(glstring, redux_type)
431435

436+
def isvalid(self, allele: str) -> str:
437+
"""
438+
Determines valididy of an allele
439+
440+
:param allele: An HLA allele.
441+
:type: str
442+
:return: allele or empty
443+
:rtype: boolean
444+
"""
445+
v = lambda a: a in self.valid
446+
return v(allele)
447+
448+
def isvalid_gl(self, glstring: str) -> str:
449+
"""
450+
Determine validity of glstring
451+
452+
:param glstring
453+
:type: str
454+
:return: result
455+
:rtype: boolean
456+
"""
457+
458+
if re.search("\^", glstring):
459+
return(all(list(map(self.isvalid_gl,glstring.split("^")))))
460+
if re.search("\|", glstring):
461+
return(all(list(map(self.isvalid_gl,glstring.split("|")))))
462+
if re.search("\+", glstring):
463+
return(all(list(map(self.isvalid_gl,glstring.split("+")))))
464+
if re.search("\~", glstring):
465+
return(all(list(map(self.isvalid_gl,glstring.split("~")))))
466+
if re.search("/", glstring):
467+
return(all(list(map(self.isvalid_gl,glstring.split("/")))))
468+
469+
# what falls through here is an allele
470+
return(self.isvalid(glstring))
471+
432472
def mac_toG(self, allele: str) -> str:
433473
"""
434474
Does ARS reduction with allele and ARS type

pyard/util.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# -*- coding: utf-8 -*-
22

33
#
4-
# seqann Sequence Annotation
5-
# Copyright (c) 2017 Be The Match operated by National Marrow Donor Program. All Rights Reserved.
4+
# pyard pyARD
5+
# Copyright (c) 2018 Be The Match operated by National Marrow Donor Program. All Rights Reserved.
66
#
77
# This library is free software; you can redistribute it and/or modify it
88
# under the terms of the GNU Lesser General Public License as published

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# -*- coding: utf-8 -*-
33

44
#
5-
# pyars pyARS.
5+
# pyard pyARD.
66
# Copyright (c) 2018 Be The Match operated by National Marrow Donor Program. All Rights Reserved.
77
#
88
# This library is free software; you can redistribute it and/or modify it

0 commit comments

Comments
 (0)