-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMtDNA.GeneOrder.check.py
More file actions
executable file
·98 lines (78 loc) · 2.66 KB
/
MtDNA.GeneOrder.check.py
File metadata and controls
executable file
·98 lines (78 loc) · 2.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/python
from sys import argv
from collections import defaultdict
def geneOrder(geneOrder, geneOrder_dct):
for i in range(0,len(geneOrder)):
if i == 0:
geneOrder_dct[geneOrder[i]].append((geneOrder[-1], geneOrder[i+1]))
elif i == len(geneOrder)-1:
geneOrder_dct[geneOrder[i]].append((geneOrder[i-1], geneOrder[0]))
else:
geneOrder_dct[geneOrder[i]].append((geneOrder[i-1], geneOrder[i+1]))
return geneOrder_dct
# Order to check
contig=open(argv[1], 'r').readlines()
# Known order
Order=open(argv[2], 'r').readline()
#
cntg_id=argv[3]
# Strand
strand=argv[4]
Order=Order.strip().replace('-','').split()
TrueOrder_dict=defaultdict(list)
# Giving the gene order list 'argv[2]', this function create the dictionary
# that will be used to check the order of the given contig
geneOrder(Order, TrueOrder_dict)
ContigOrder=[]
#for gene in TrueOrder_dict.keys():
# print gene, TrueOrder_dict[gene]
if len(contig) > 1:
for gene in contig:
gene=gene.strip()
ContigOrder.append(gene)
# print gene
num_ann_genes=len(ContigOrder)
ordered=0
for i in range(0,len(ContigOrder)):
gene=ContigOrder[i]
if i == 0:
n_gene=ContigOrder[i+1]
if strand == '+':
if n_gene == TrueOrder_dict[gene][0][1]:
ordered+=1
#print gene, n_gene, TrueOrder_dict[gene]
#else: print gene, 'Not in the right order'
elif strand == '-':
if n_gene == TrueOrder_dict[gene][0][0]:
ordered+=1
#print gene, n_gene, TrueOrder_dict[gene]
#else: print gene, 'Not in the right order'
elif i == len(ContigOrder)-1:
p_gene=ContigOrder[i-1]
if strand == '+':
if p_gene == TrueOrder_dict[gene][0][0]:
ordered+=1
#print gene, p_gene, TrueOrder_dict[gene]
#else: print gene, 'Not in the right order'
elif strand == '-':
if p_gene == TrueOrder_dict[gene][0][1]:
ordered+=1
#print gene, p_gene, TrueOrder_dict[gene]
#else: print gene, 'Not in the right order'
else:
n_gene=ContigOrder[i+1]
p_gene=ContigOrder[i-1]
if strand == '+':
if (n_gene == TrueOrder_dict[gene][0][1]) and (p_gene == TrueOrder_dict[gene][0][0]):
ordered+=1
#print gene, p_gene, n_gene, TrueOrder_dict[gene]
#else: print gene, 'Not in the right order'
elif strand == '-':
if (n_gene == TrueOrder_dict[gene][0][0]) and (p_gene == TrueOrder_dict[gene][0][1]):
ordered+=1
#print gene, p_gene, n_gene, TrueOrder_dict[gene]
#else: print gene, 'Not in the right order'
print '%s: %i genes, %i are ordered. Strand %s' % (cntg_id, len(ContigOrder), ordered, strand)
else:
ContigOrder.append(contig[0].strip())
print '%s: contains only %i gene: %s on strand %s' % (cntg_id, len(ContigOrder), ContigOrder[0], strand)