working for some circular sequences, not all

manulera · manulera · commit 5b1dc200ddc7 · 2026-02-26T12:27:47.000Z
diff --git a/dummy2.py b/dummy2.py
@@ -7,8 +7,17 @@
 
 shifted_cutsite_pairs = seq.shift_melt_cutsite_pairs(cutsite_pairs)
 
-print(cutsite_pairs)
-print(shifted_cutsite_pairs)
 assert shifted_cutsite_pairs == [((10, 6), None), ((7, 5), None)]
 
 expected_product = seq.apply_cut(((10, 6), None), ((7, 5), None), allow_overlap=True)
+
+for shift in range(len(seq)):
+    print(shift)
+    new_seq = seq.shifted(shift)
+
+    cutsite_pairs = new_seq.get_cutsite_pairs(new_seq.get_ds_meltsites(3))
+    shifted_cutsite_pairs = new_seq.shift_melt_cutsite_pairs(cutsite_pairs)
+    print(shifted_cutsite_pairs)
+    assert len(shifted_cutsite_pairs) == 2
+    # prod = new_seq.apply_cut(*shifted_cutsite_pairs, allow_overlap=True)
+    # assert prod == expected_product
diff --git a/src/pydna/cre_lox.py b/src/pydna/cre_lox.py
@@ -5,7 +5,7 @@
 from Bio.Seq import reverse_complement
 from pydna.sequence_regex import compute_regex_site, dseqrecord_finditer
 from Bio.SeqFeature import Location, SimpleLocation, SeqFeature
-from pydna.utils import shift_location
+from pydna.utils import shift_location, deduplicate
 
 # We create a dictionary to map ambiguous bases to their consensus base
 # For example, ambigous_base_dict['ACGT'] -> 'N'
@@ -58,12 +58,7 @@ def cre_loxP_overlap(
             value_y = match_y.group()
             if value_x[13:21] == value_y[13:21]:
                 out.append((match_x.start() + 13, match_y.start() + 13, 8))
-    # Unique values (keeping the order)
-    unique_out = []
-    for item in out:
-        if item not in unique_out:
-            unique_out.append(item)
-    return unique_out
+    return deduplicate(out)
 
 
 loxP_dict = {
diff --git a/src/pydna/dseq.py b/src/pydna/dseq.py
@@ -33,6 +33,7 @@
 from pydna.utils import rc
 from pydna.utils import flatten
 from pydna.utils import cuts_overlap
+from pydna.utils import deduplicate
 
 from pydna.alphabet import basepair_dict
 from pydna.alphabet import dscode_to_watson_table
@@ -2762,6 +2763,9 @@ def apply_cut(
         left_watson, left_crick, ovhg_left = self.get_cut_parameters(left_cut, True)
         right_watson, right_crick, _ = self.get_cut_parameters(right_cut, False)
 
+        print("%%")
+        print(self[left_watson:right_watson])
+        print(self[left_crick:right_crick])
         return Dseq(
             self[left_watson:right_watson]._data.translate(dscode_to_watson_table),
             self[left_crick:right_crick]
@@ -2831,42 +2835,46 @@ def shift_melt_cutsite_pairs(
         """
         ss_crick_bytes = set(ss_letters_crick.encode("ascii"))
         ss_watson_bytes = set(ss_letters_watson.encode("ascii"))
+        n = len(self._data)
+        data = self._data * 2 if self.circular else self._data
 
         new_cutsite_pairs = []
         for left_cut, right_cut in cutsite_pairs:
             if left_cut is not None:
                 (watson, ovhg), enz = left_cut
                 crick = watson - ovhg
                 if ovhg > 0:
-                    while (
-                        watson < len(self._data)
-                        and self._data[watson] in ss_crick_bytes
-                    ):
+                    while watson < len(data) and data[watson] in ss_crick_bytes:
                         watson += 1
-                        ovhg += 1
                 elif ovhg < 0:
-                    while (
-                        crick < len(self._data) and self._data[crick] in ss_watson_bytes
-                    ):
+                    while crick < len(data) and data[crick] in ss_watson_bytes:
                         crick += 1
-                        ovhg -= 1
-                left_cut = ((watson, ovhg), enz)
+                if self.circular:
+                    left_cut = ((watson % n, (watson % n) - (crick % n)), enz)
+                else:
+                    left_cut = ((watson, watson - crick), enz)
 
             if right_cut is not None:
                 (watson, ovhg), enz = right_cut
                 crick = watson - ovhg
                 if ovhg > 0:
-                    while crick > 0 and self._data[crick - 1] in ss_watson_bytes:
+                    while crick > 0 and data[crick - 1] in ss_watson_bytes:
                         crick -= 1
-                        ovhg += 1
                 elif ovhg < 0:
-                    while watson > 0 and self._data[watson - 1] in ss_crick_bytes:
+                    while watson > 0 and data[watson - 1] in ss_crick_bytes:
                         watson -= 1
-                        ovhg -= 1
-                right_cut = ((watson, ovhg), enz)
+                if self.circular:
+                    right_cut = ((watson % n, (watson % n) - (crick % n)), enz)
+                else:
+                    right_cut = ((watson, watson - crick), enz)
 
             new_cutsite_pairs.append((left_cut, right_cut))
 
+        if self.circular:
+            new_cutsite_pairs = [
+                cut for pair in deduplicate(new_cutsite_pairs) for cut in pair
+            ]
+
         return new_cutsite_pairs
 
     def get_parts(self):
diff --git a/src/pydna/utils.py b/src/pydna/utils.py
@@ -879,3 +879,20 @@ def create_location(
         return SimpleLocation(start, end, strand)
     else:
         return shift_location(SimpleLocation(start, end + lim, strand), 0, lim)
+
+
+def deduplicate(iterable):
+    """Remove duplicates from an iterable while preserving order.
+
+    >>> deduplicate([3, 1, 2, 1, 3, 4])
+    [3, 1, 2, 4]
+    >>> deduplicate([(1, 2), (3, 4), (1, 2)])
+    [(1, 2), (3, 4)]
+    """
+    seen = set()
+    result = []
+    for item in iterable:
+        if item not in seen:
+            seen.add(item)
+            result.append(item)
+    return result