-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathsequence_reversal.py
More file actions
97 lines (79 loc) · 3.03 KB
/
sequence_reversal.py
File metadata and controls
97 lines (79 loc) · 3.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
from string_reversal import *
from base_complement import *
print "This script will only slice windows of a user-specified size in the genome file provided--if you need to reverse complement or anything like that, run that before running this script."
print "For now it'll get you windows symmetric around the cleavage site. I'll build in more options to get different types of windows as needed later."
#middle = int(raw_input("At what index [you should be able to find this on Mochiview] is the cleavage site? \n"))
###I'm getting some weird index offsets between what I'm seeing in mochiview and the file itself...bear with me
genetype = raw_input("What cleavage site is this? \n")
genetype = genetype.lower()
#bear with me here
#finding the index in NC_000964 for each cleavage site
if 'cggr' in genetype:
middle = 3482769
offset = 49756 #why the weird offsets? I'm basing this off what I see in Mochiview...if I don't do this my window is not at the indices I'd expect
middle += offset
elif 'atpi' in genetype:
middle = 3783696
offset = 49756+8237+127
middle += offset
elif 'cwlo' in genetype:
middle = 3575886
offset = 49756+8237+127-6925-98
middle += offset
elif 'daca' in genetype:
middle = 17476
offset = 263
middle += offset
elif 'ddl' in genetype:
middle = 508162
offset = 7169+103
middle += offset
elif 'glna' in genetype:
middle = 1878263
offset = 2650+23853+341
middle += offset
elif 'mena' in genetype:
middle = 3951710
offset = 55669+796
middle += offset
#defining window size
windowsize = int(raw_input("How big should be the window be?\n"))
if windowsize%2==1:
print "Your window size is odd so I'm going to assume you want two even-sized windows flanking the cleavage site."
windowsize -=1
else:
print "Your window size is even so I'm going to assume you want two even-sized windows flanking the cleavage site."
flanking = windowsize/2
start = middle - flanking
if start<0:
print "Your window size is out of bounds so I'm going to start it at the beginning of the file."
if windowsize%2==0:
end = middle+flanking+2 #adding the +1 because of how Python slices arrays
#open genome file and extract window
genomefile = "../B_subtilis_NC_000964.txt" #edit path as needed
#genomefile = "hello.txt" #use for debugging purposes
genome = open(genomefile, 'r')
genomelist = []
for line in genome:
if '>' in line:
pass
for char in line:
genomelist.append(char)
if end>len(genomelist):
print "Your window size is out of bounds so I'm going to end it at the end of the file."
end = len(genomelist)
genome.close()
window = genomelist[start:end]
endwindow = "".join(window)
finalwindow = endwindow.rstrip('\n')
#reverse complement if the transcript is on the minus strand
#either way, print out the window here
strandedness = raw_input("Is this on the + strand?\n")
if 'n' in strandedness or 'N' in strandedness:
reverse = reversal(finalwindow)
comp = seqcomplement(reverse, 'DNA')
print "Here's the window for the minus strand: \n"
print comp
else:
print "Here's the window for the plus strand: \n"
print finalwindow