Skip to content

Commit 5b1dc20

Browse files
committed
working for some circular sequences, not all
1 parent 31938c7 commit 5b1dc20

File tree

4 files changed

+53
-24
lines changed

4 files changed

+53
-24
lines changed

dummy2.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,17 @@
77

88
shifted_cutsite_pairs = seq.shift_melt_cutsite_pairs(cutsite_pairs)
99

10-
print(cutsite_pairs)
11-
print(shifted_cutsite_pairs)
1210
assert shifted_cutsite_pairs == [((10, 6), None), ((7, 5), None)]
1311

1412
expected_product = seq.apply_cut(((10, 6), None), ((7, 5), None), allow_overlap=True)
13+
14+
for shift in range(len(seq)):
15+
print(shift)
16+
new_seq = seq.shifted(shift)
17+
18+
cutsite_pairs = new_seq.get_cutsite_pairs(new_seq.get_ds_meltsites(3))
19+
shifted_cutsite_pairs = new_seq.shift_melt_cutsite_pairs(cutsite_pairs)
20+
print(shifted_cutsite_pairs)
21+
assert len(shifted_cutsite_pairs) == 2
22+
# prod = new_seq.apply_cut(*shifted_cutsite_pairs, allow_overlap=True)
23+
# assert prod == expected_product

src/pydna/cre_lox.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from Bio.Seq import reverse_complement
66
from pydna.sequence_regex import compute_regex_site, dseqrecord_finditer
77
from Bio.SeqFeature import Location, SimpleLocation, SeqFeature
8-
from pydna.utils import shift_location
8+
from pydna.utils import shift_location, deduplicate
99

1010
# We create a dictionary to map ambiguous bases to their consensus base
1111
# For example, ambigous_base_dict['ACGT'] -> 'N'
@@ -58,12 +58,7 @@ def cre_loxP_overlap(
5858
value_y = match_y.group()
5959
if value_x[13:21] == value_y[13:21]:
6060
out.append((match_x.start() + 13, match_y.start() + 13, 8))
61-
# Unique values (keeping the order)
62-
unique_out = []
63-
for item in out:
64-
if item not in unique_out:
65-
unique_out.append(item)
66-
return unique_out
61+
return deduplicate(out)
6762

6863

6964
loxP_dict = {

src/pydna/dseq.py

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from pydna.utils import rc
3434
from pydna.utils import flatten
3535
from pydna.utils import cuts_overlap
36+
from pydna.utils import deduplicate
3637

3738
from pydna.alphabet import basepair_dict
3839
from pydna.alphabet import dscode_to_watson_table
@@ -2762,6 +2763,9 @@ def apply_cut(
27622763
left_watson, left_crick, ovhg_left = self.get_cut_parameters(left_cut, True)
27632764
right_watson, right_crick, _ = self.get_cut_parameters(right_cut, False)
27642765

2766+
print("%%")
2767+
print(self[left_watson:right_watson])
2768+
print(self[left_crick:right_crick])
27652769
return Dseq(
27662770
self[left_watson:right_watson]._data.translate(dscode_to_watson_table),
27672771
self[left_crick:right_crick]
@@ -2831,42 +2835,46 @@ def shift_melt_cutsite_pairs(
28312835
"""
28322836
ss_crick_bytes = set(ss_letters_crick.encode("ascii"))
28332837
ss_watson_bytes = set(ss_letters_watson.encode("ascii"))
2838+
n = len(self._data)
2839+
data = self._data * 2 if self.circular else self._data
28342840

28352841
new_cutsite_pairs = []
28362842
for left_cut, right_cut in cutsite_pairs:
28372843
if left_cut is not None:
28382844
(watson, ovhg), enz = left_cut
28392845
crick = watson - ovhg
28402846
if ovhg > 0:
2841-
while (
2842-
watson < len(self._data)
2843-
and self._data[watson] in ss_crick_bytes
2844-
):
2847+
while watson < len(data) and data[watson] in ss_crick_bytes:
28452848
watson += 1
2846-
ovhg += 1
28472849
elif ovhg < 0:
2848-
while (
2849-
crick < len(self._data) and self._data[crick] in ss_watson_bytes
2850-
):
2850+
while crick < len(data) and data[crick] in ss_watson_bytes:
28512851
crick += 1
2852-
ovhg -= 1
2853-
left_cut = ((watson, ovhg), enz)
2852+
if self.circular:
2853+
left_cut = ((watson % n, (watson % n) - (crick % n)), enz)
2854+
else:
2855+
left_cut = ((watson, watson - crick), enz)
28542856

28552857
if right_cut is not None:
28562858
(watson, ovhg), enz = right_cut
28572859
crick = watson - ovhg
28582860
if ovhg > 0:
2859-
while crick > 0 and self._data[crick - 1] in ss_watson_bytes:
2861+
while crick > 0 and data[crick - 1] in ss_watson_bytes:
28602862
crick -= 1
2861-
ovhg += 1
28622863
elif ovhg < 0:
2863-
while watson > 0 and self._data[watson - 1] in ss_crick_bytes:
2864+
while watson > 0 and data[watson - 1] in ss_crick_bytes:
28642865
watson -= 1
2865-
ovhg -= 1
2866-
right_cut = ((watson, ovhg), enz)
2866+
if self.circular:
2867+
right_cut = ((watson % n, (watson % n) - (crick % n)), enz)
2868+
else:
2869+
right_cut = ((watson, watson - crick), enz)
28672870

28682871
new_cutsite_pairs.append((left_cut, right_cut))
28692872

2873+
if self.circular:
2874+
new_cutsite_pairs = [
2875+
cut for pair in deduplicate(new_cutsite_pairs) for cut in pair
2876+
]
2877+
28702878
return new_cutsite_pairs
28712879

28722880
def get_parts(self):

src/pydna/utils.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -879,3 +879,20 @@ def create_location(
879879
return SimpleLocation(start, end, strand)
880880
else:
881881
return shift_location(SimpleLocation(start, end + lim, strand), 0, lim)
882+
883+
884+
def deduplicate(iterable):
885+
"""Remove duplicates from an iterable while preserving order.
886+
887+
>>> deduplicate([3, 1, 2, 1, 3, 4])
888+
[3, 1, 2, 4]
889+
>>> deduplicate([(1, 2), (3, 4), (1, 2)])
890+
[(1, 2), (3, 4)]
891+
"""
892+
seen = set()
893+
result = []
894+
for item in iterable:
895+
if item not in seen:
896+
seen.add(item)
897+
result.append(item)
898+
return result

0 commit comments

Comments
 (0)