Skip to content

Commit e7e6581

Browse files
committed
added test
1 parent 373f436 commit e7e6581

File tree

4 files changed

+130
-7
lines changed

4 files changed

+130
-7
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,6 @@ chr20.fa
4040
chr20.fa.fxi
4141
hg19.fa
4242
stria.cp38-win_amd64.pyd
43+
Homo_sapiens.GRCh38.dna.toplevel.fa.gz.fxi
44+
Homo_sapiens.GRCh38.dna.toplevel.fa.gz
45+
stria.cpython-310-x86_64-linux-gnu.so

benchmark.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import time
2+
import stria
3+
import pyfastx
4+
gfile = 'Homo_sapiens.GRCh38.dna.toplevel.fa.gz'
5+
6+
for name, seq in pyfastx.Fastx(gfile):
7+
break
8+
9+
finder = stria.SSRMiner(name, seq)
10+
11+
start = time.time()
12+
ssrs = finder.as_list()
13+
print(time.time() - start)
14+
15+
start = time.time()
16+
ssrs = finder.as_test()
17+
print(time.time() - start)

src/ssr.c

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,58 @@ static PyObject* stria_ssrminer_as_list(stria_SSRMiner *self) {
217217
return ssrs;
218218
}
219219

220+
static PyObject* stria_ssrminer_as_test(stria_SSRMiner *self) {
221+
PyObject *ssrs = PyList_New(0);
222+
PyObject *tmp;
223+
Py_ssize_t current_start;
224+
Py_ssize_t ssr_end;
225+
Py_ssize_t boundary;
226+
int replen;
227+
int repeats;
228+
int length;
229+
//char *motif = (char *)malloc(7);
230+
char motif[7];
231+
232+
for (Py_ssize_t i = 0; i < self->size; ++i) {
233+
if (self->seq[i] == 78) {
234+
continue;
235+
}
236+
237+
current_start = i;
238+
for (int j = 1; j < 7; ++j) {
239+
boundary = self->size - j;
240+
241+
while ((i < boundary) && (self->seq[i] == self->seq[i+j])) {
242+
++i;
243+
}
244+
245+
replen = i + j - current_start;
246+
247+
if (replen >= self->min_lens[j]) {
248+
memcpy(motif, self->seq+current_start, j);
249+
motif[j] = '\0';
250+
repeats = replen / j;
251+
length = repeats * j;
252+
ssr_end = current_start+length;
253+
tmp = Py_BuildValue("Onnsiii", self->seqname, current_start+1, ssr_end, motif, j, repeats, length);
254+
PyList_Append(ssrs, tmp);
255+
Py_DECREF(tmp);
256+
257+
i = ssr_end;
258+
break;
259+
} else {
260+
i = current_start;
261+
}
262+
}
263+
}
264+
265+
//free(motif);
266+
return ssrs;
267+
}
268+
220269
static PyMethodDef stria_ssrminer_methods[] = {
221270
{"as_list", (PyCFunction)stria_ssrminer_as_list, METH_NOARGS, NULL},
271+
{"as_test", (PyCFunction)stria_ssrminer_as_test, METH_NOARGS, NULL},
222272
{"reset_min_repeats", (PyCFunction)stria_ssrminer_reset_min_repeats, METH_VARARGS|METH_KEYWORDS, NULL},
223273
{NULL, NULL, 0, NULL}
224274
};

test.py

Lines changed: 60 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,63 @@
1+
import sys
12
import time
2-
import stria
33
import pyfastx
44

5-
start = time.time()
6-
for name, seq, _ in pyfastx.Fastx('chr1.fa'):
7-
ssrs = stria.test(name, seq)
8-
#ssrs = stria.SSRMiner(name,seq).as_list()
9-
print(time.time()-start)
10-
print(len(ssrs))
5+
start_time = time.time()
6+
mins = [0, 12, 14, 15, 16, 20, 24]
7+
8+
for name, seq in pyfastx.Fastx(sys.argv[1]):
9+
pass
10+
11+
size = len(seq)
12+
count = 0
13+
i = 0
14+
jump = 0
15+
16+
while i < size:
17+
if seq[i] == 'N':
18+
i += 1
19+
continue
20+
21+
start = i
22+
run = [0, 0, 0, 0, 0, 0, 0]
23+
24+
for j in range(1, 7):
25+
'''
26+
if j > 1:
27+
if run[1] >= j:
28+
jump += 1
29+
continue
30+
31+
if j == 4 and run[2] >= j:
32+
jump += 1
33+
continue
34+
35+
elif j == 6 and (run[2] >= j or run[3] >= j):
36+
jump += 1
37+
continue
38+
'''
39+
40+
b = size - j
41+
42+
while i < b and seq[i] == seq[i+j]:
43+
i += 1
44+
45+
run[j] = i + j - start
46+
47+
if run[j] >= mins[j]:
48+
count += 1
49+
repeat = run[j]//j
50+
i = start + repeat*j
51+
run = [0, 0, 0, 0, 0, 0, 0]
52+
break
53+
else:
54+
i = start
55+
56+
i += 1
57+
58+
end_time = time.time()
59+
60+
print(end_time-start_time)
61+
print(count)
62+
print(jump)
63+
print(size)

0 commit comments

Comments
 (0)