-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutility.py
More file actions
69 lines (62 loc) · 1.82 KB
/
utility.py
File metadata and controls
69 lines (62 loc) · 1.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from __future__ import division
from split import chop
import numpy as np
import pickle
def calculate_roc(predict, true):
TP = 0
FP = 0
TN = 0
FN = 0
sensitivity, specificity = 0,0
for i in xrange(len(predict)):
if predict[i] == 1.0 and true[i] == 1.0:
TP = TP + 1
elif predict[i] == 1.0 and true[i] == 0.0:
FP = FP + 1
elif predict[i] == 0.0 and true[i] == 0.0:
TN = TN + 1
elif predict[i] == 0.0 and true[i] == 1.0:
FN = FN + 1
sensitivity = TP/(TP + FN + 0.00000000001)
specificity = TN/(FP + TN + 0.00000000001)
return sensitivity, specificity
def load_obj(name):
with open( name + '.pkl', 'rb') as f:
return pickle.load(f)
def initialize():
return load_obj('protVec')
def split(start, model, seq, lis):
for index in xrange(start,len(seq) - 2,3):
kmer = seq[index:index+3].encode('utf-8')
if kmer in model:
lis.append(np.array(model[kmer]))
else:
lis.append(np.array(model['<unk>']))
lis = np.mean(lis, axis=0).tolist()
return lis
def embedding(model, seq):
first, second, third = [], [] , []
#First Split
first = split(0, model, seq, first)
#Second Split
second = split(1, model, seq, second)
#Third Split
third = split(2, model, seq, third)
return [first, second, third]
def load_test(data):
array = []
with open(data, 'r') as file:
for line in file:
line = line.strip('\n')
array.append(line)
return list(chop(3,array))
def create_seq194(data):
array = []
with open(data, 'r') as file:
for line in file:
line = line.strip('\n')
array.append(line)
array = list(chop(3,array))
with open('seq194_dataset.data', 'w') as file:
for line in array:
file.write(' '.join(line[1]) +'\t' + ' '.join(line[2]) +'\n')