From 82b924c55abbdba18ff230c6e877c62da9ff3493 Mon Sep 17 00:00:00 2001
From: areebamomin <areebam@umich.edu>
Date: Wed, 10 Dec 2025 16:27:44 -0500
Subject: [PATCH 01/10] continuing to implement feature, vibe coded and fixed
 some, still need to check tests

Add recombinase assembly algorithm for attB/attP
---
 src/pydna/assembly2.py           |  70 ++++++++
 src/pydna/opencloning_models.py  |   4 +
 src/pydna/recombinase.py         | 251 +++++++++++++++++++++++++++
 tests/test_module_recombinase.py | 288 +++++++++++++++++++++++++++++++
 4 files changed, 613 insertions(+)
 create mode 100644 src/pydna/recombinase.py
 create mode 100644 tests/test_module_recombinase.py

diff --git a/src/pydna/assembly2.py b/src/pydna/assembly2.py
index d62f8fb7..85b75005 100644
--- a/src/pydna/assembly2.py
+++ b/src/pydna/assembly2.py
@@ -39,6 +39,7 @@
 from pydna.gateway import gateway_overlap, find_gateway_sites
 from pydna.cre_lox import cre_loxP_overlap
 from pydna.alphabet import anneal_strands
+from pydna.recombinase import Recombinase
 
 from typing import TYPE_CHECKING, Callable, Literal
 from pydna.opencloning_models import (
@@ -52,6 +53,7 @@
     GatewaySource,
     HomologousRecombinationSource,
     CreLoxRecombinationSource,
+    RecombinaseSource,
     PCRSource,
     SourceInput,
     CRISPRSource,
@@ -2801,6 +2803,74 @@ def cre_lox_excision(genome: Dseqrecord) -> list[Dseqrecord]:
     return _recast_sources(products, CreLoxRecombinationSource)
 
 
+def recombinase_excision(
+    genome: Dseqrecord,
+    site1: str,
+    site2: str,
+) -> list[Dseqrecord]:
+    """Returns the products for recombinase-mediated excision.
+
+    Parameters
+    ----------
+    genome : Dseqrecord
+        Target genome sequence containing two recombinase sites.
+    site1 : str
+        First recombinase recognition site (lowercase = overlap core).
+    site2 : str
+        Second recombinase recognition site (lowercase = overlap core).
+
+    Returns
+    -------
+    list[Dseqrecord]
+        List containing excised plasmid and remaining genome sequence.
+    """
+    rec = Recombinase(site1, site2)
+    products = common_function_excision_products(genome, None, rec.overlap)
+    return _recast_sources(products, RecombinaseSource)
+
+
+def recombinase_integration(
+    genome: Dseqrecord,
+    inserts: list[Dseqrecord],
+    site1: str,
+    site2: str,
+) -> list[Dseqrecord]:
+    """Returns the products resulting from recombinase-mediated integration.
+
+    Parameters
+    ----------
+    genome : Dseqrecord
+        Target genome sequence.
+    inserts : list[Dseqrecord]
+        DNA fragment(s) to insert.
+    site1 : str
+        First recombinase recognition site (lowercase = overlap core).
+    site2 : str
+        Second recombinase recognition site (lowercase = overlap core).
+
+    Returns
+    -------
+    list[Dseqrecord]
+        List of integrated DNA molecules.
+
+    Examples
+    --------
+    >>> from pydna.dseqrecord import Dseqrecord
+    >>> from pydna.assembly2 import recombinase_integration, recombinase_excision
+    >>> site1 = "ATGCCCTAAaaTT"
+    >>> site2 = "AAaaTTTTTTTCCCT"
+    >>> genome = Dseqrecord(f"cccccc{site1.upper()}aaaaa")
+    >>> insert = Dseqrecord(f"{site2.upper()}bbbbb", circular=True)
+    >>> products = recombinase_integration(genome, [insert], site1, site2)
+    >>> len(products) >= 1
+    True
+    """
+    fragments = common_handle_insertion_fragments(genome, inserts)
+    rec = Recombinase(site1, site2)
+    products = common_function_integration_products(fragments, None, rec.overlap)
+    return _recast_sources(products, RecombinaseSource)
+
+
 def crispr_integration(
     genome: Dseqrecord,
     inserts: list[Dseqrecord],
diff --git a/src/pydna/opencloning_models.py b/src/pydna/opencloning_models.py
index 1353a763..9efc13ab 100644
--- a/src/pydna/opencloning_models.py
+++ b/src/pydna/opencloning_models.py
@@ -524,6 +524,10 @@ class CreLoxRecombinationSource(AssemblySource):
     )
 
 
+class RecombinaseSource(AssemblySource):
+    pass
+
+
 class PCRSource(AssemblySource):
     TARGET_MODEL: ClassVar[Type[_PCRSource]] = _PCRSource
     add_primer_features: bool = Field(default=False)
diff --git a/src/pydna/recombinase.py b/src/pydna/recombinase.py
new file mode 100644
index 00000000..4436dee2
--- /dev/null
+++ b/src/pydna/recombinase.py
@@ -0,0 +1,251 @@
+# -*- coding: utf-8 -*-
+"""
+Generic site-specific recombinase functionality.
+
+This module provides tools for simulating site-specific recombination reactions
+mediated by recombinases (e.g. phage integrases such as phiC31, Bxb1, etc.).
+
+Recombinase sites are specified as strings where the **lowercase** portion
+indicates the homology/overlap core shared between the two sites. The
+uppercase portion represents the flanking recognition arms.
+
+For example::
+
+    site1 = "ATGCCCTAAaaTT"
+    site2 = "AAaaTTTTTTTCCCT"
+
+The lowercase ``aa`` is the overlap that will appear in the assembled product.
+
+Sites may contain IUPAC degenerate bases (N, W, S, etc.) and the search
+handles both linear and circular sequences.
+"""
+
+import re
+import itertools as _itertools
+
+from Bio.Seq import reverse_complement
+from Bio.SeqFeature import SimpleLocation, SeqFeature
+
+from pydna.dseqrecord import Dseqrecord as _Dseqrecord
+from pydna.utils import shift_location
+from pydna.sequence_regex import compute_regex_site, dseqrecord_finditer
+from pydna.types import SequenceOverlap
+
+
+def _recombinase_homology_offset_and_length(site: str) -> tuple[int, int]:
+    """
+    Return (offset, length) of the lowercase homology region inside a
+    recombinase recognition site string.
+
+    The lowercase segment represents the shared homology between the two
+    recombinase sites.
+
+    Parameters
+    ----------
+    site : str
+        The recognition site sequence, with the homology region in lowercase.
+        Must contain only letters (no digits or symbols).
+
+    Returns
+    -------
+    offset : int
+        Index of the first lowercase character in the site.
+    length : int
+        Length of the contiguous lowercase homology region.
+
+    Raises
+    ------
+    ValueError
+        If the site contains invalid characters or no lowercase region.
+    """
+    if not re.fullmatch(r"[A-Z]+[a-z]+[A-Z]+", site):
+        raise ValueError(
+            "Recombinase recognition site is not in the expected format."
+            "Expected format: [A-Z]+[a-z]+[A-Z]+, e.g. 'ATGCCCTAAaaTT'"
+        )
+    match = re.search(r"[a-z]+", site)
+    return match.start(), len(match.group())
+
+
+class Recombinase:
+    """A site-specific recombinase defined by two recognition sites. Homology cores must match.
+
+    Parameters
+    ----------
+    site1 : str
+        The first recognition site. The homology core must be in lowercase.
+    site2 : str
+        The second recognition site. The homology core must be in lowercase.
+
+    Examples
+    --------
+    >>> from pydna.dseqrecord import Dseqrecord
+    >>> from pydna.recombinase import Recombinase
+    >>> rec = Recombinase("ATGCCCTAAaaTT", "AAaaTTTTTTTCCCT")
+    >>> seqA = Dseqrecord("aaaATGCCCTAAaaTTtt")
+    >>> seqB = Dseqrecord("tataAAaaTTTTTTTCCCTaaa")
+    >>> rec.overlap(seqA, seqB)
+    [(12, 6, 2)]
+    """
+
+    def __init__(self, site1: str, site2: str):
+        self.site1 = site1
+        self.site2 = site2
+
+        off1, len1 = _recombinase_homology_offset_and_length(site1)
+        off2, len2 = _recombinase_homology_offset_and_length(site2)
+        if site1[off1 : off1 + len1] != site2[off2 : off2 + len2]:
+            raise ValueError(
+                "Recombinase recognition sites do not have matching homology cores."
+                f"Expected {site1[off1:off1 + len1]} == {site2[off2:off2 + len2]}"
+            )
+        self._homology_len = len1
+
+        rc1 = reverse_complement(site1)
+        rc2 = reverse_complement(site2)
+        off1_rev, _ = _recombinase_homology_offset_and_length(rc1)
+        off2_rev, _ = _recombinase_homology_offset_and_length(rc2)
+
+        self._site1_fwd = compute_regex_site(site1)
+        self._site1_rev = compute_regex_site(rc1)
+        self._site2_fwd = compute_regex_site(site2)
+        self._site2_rev = compute_regex_site(rc2)
+
+        self._configs = [
+            (self._site1_fwd, self._site2_fwd, off1, off2),
+            (self._site1_rev, self._site2_rev, off1_rev, off2_rev),
+        ]
+
+    def overlap(
+        self,
+        seqx: _Dseqrecord,
+        seqy: _Dseqrecord,
+        limit: None = None,
+    ) -> list[SequenceOverlap]:
+        """Find overlaps between *seqx* and *seqy* mediated by the sites.
+
+        Parameters
+        ----------
+        seqx : Dseqrecord
+            The first sequence.
+        seqy : Dseqrecord
+            The second sequence.
+        limit : None
+            Not used. Present only to satisfy the
+            ``AssemblyAlgorithmType`` convention.
+
+        Returns
+        -------
+        list[SequenceOverlap]
+            A list of ``(start_in_x, start_in_y, length)`` tuples.
+        """
+        matches: list[SequenceOverlap] = []
+
+        for site_x_re, site_y_re, off_x, off_y in self._configs:
+            matches_x = list(dseqrecord_finditer(site_x_re, seqx))
+            if not matches_x:
+                continue
+            matches_y = list(dseqrecord_finditer(site_y_re, seqy))
+            if not matches_y:
+                continue
+
+            for mx, my in _itertools.product(matches_x, matches_y):
+                # Here we wrap in case the sequence is circular and the match spans the origin,
+                # and the offset makes the match go beyond the origin.
+                matches.append(
+                    (
+                        (mx.start() + off_x) % len(seqx),
+                        (my.start() + off_y) % len(seqy),
+                        self._homology_len,
+                    )
+                )
+
+        # Deduplicate while preserving order
+        seen: set[SequenceOverlap] = set()
+        unique: list[SequenceOverlap] = []
+        for m in matches:
+            if m not in seen:
+                seen.add(m)
+                unique.append(m)
+        return unique
+
+
+def find_recombinase_sites(
+    seq: _Dseqrecord,
+    site1: str,
+    site2: str,
+    site1_name: str = "site1",
+    site2_name: str = "site2",
+) -> dict[str, list[SimpleLocation]]:
+    """Find all occurrences of the recombinase sites in *seq*.
+
+    Parameters
+    ----------
+    seq : Dseqrecord
+        Sequence to search.
+    site1 : str
+        First recognition site (may contain degenerate bases).
+    site2 : str
+        Second recognition site (may contain degenerate bases).
+    site1_name : str
+        Label for site1 in the returned dictionary.
+    site2_name : str
+        Label for site2 in the returned dictionary.
+
+    Returns
+    -------
+    dict[str, list[SimpleLocation]]
+        Dictionary mapping site names to lists of locations.
+    """
+    out: dict[str, list[SimpleLocation]] = {}
+    for name, raw_site in [(site1_name, site1), (site2_name, site2)]:
+        fwd_re = compute_regex_site(raw_site)
+        rev_re = compute_regex_site(reverse_complement(raw_site))
+        for pattern, strand in [(fwd_re, 1), (rev_re, -1)]:
+            for match in dseqrecord_finditer(pattern, seq):
+                loc = SimpleLocation(match.start(), match.end(), strand)
+                loc = shift_location(loc, 0, len(seq))
+                out.setdefault(name, []).append(loc)
+    return out
+
+
+def annotate_recombinase_sites(
+    seq: _Dseqrecord,
+    site1: str,
+    site2: str,
+    site1_name: str = "site1",
+    site2_name: str = "site2",
+) -> _Dseqrecord:
+    """Annotate *seq* with features for all occurrences of the recombinase sites.
+
+    Parameters
+    ----------
+    seq : Dseqrecord
+        Sequence to annotate (modified in place and returned).
+    site1 : str
+        First recognition site.
+    site2 : str
+        Second recognition site.
+    site1_name : str
+        Label for site1 features.
+    site2_name : str
+        Label for site2 features.
+
+    Returns
+    -------
+    Dseqrecord
+        The same sequence with added features.
+    """
+    sites = find_recombinase_sites(seq, site1, site2, site1_name, site2_name)
+    for name, locs in sites.items():
+        for loc in locs:
+            if not any(
+                f.location == loc
+                and f.type == "protein_bind"
+                and f.qualifiers.get("label", []) == [name]
+                for f in seq.features
+            ):
+                seq.features.append(
+                    SeqFeature(loc, type="protein_bind", qualifiers={"label": [name]})
+                )
+    return seq
diff --git a/tests/test_module_recombinase.py b/tests/test_module_recombinase.py
new file mode 100644
index 00000000..f1c462c6
--- /dev/null
+++ b/tests/test_module_recombinase.py
@@ -0,0 +1,288 @@
+import pytest
+
+from pydna.dseqrecord import Dseqrecord
+from pydna.assembly2 import (
+    Assembly,
+    recombinase_integration,
+    recombinase_excision,
+)
+from pydna.recombinase import (
+    _recombinase_homology_offset_and_length,
+    Recombinase,
+    find_recombinase_sites,
+    annotate_recombinase_sites,
+)
+
+
+# ---------------------------------------------------------------------------
+# _recombinase_homology_offset_and_length
+# ---------------------------------------------------------------------------
+
+
+def test_recombinase_homology_offset_and_length_basic():
+    site1 = "ATGCCCTAAaaTT"  # lowercase 'aa' at positions 9-10
+    site2 = "AAaaTTTTTTTCCCT"  # lowercase 'aa' at positions 2-3
+
+    off1, len1 = _recombinase_homology_offset_and_length(site1)
+    off2, len2 = _recombinase_homology_offset_and_length(site2)
+
+    assert (off1, len1) == (9, 2)
+    assert (off2, len2) == (2, 2)
+
+
+def test_recombinase_homology_offset_and_length_wrong_pattern_raises():
+    for pattern in ["ATGCCCTAAATT", "ATGaa", "aaATG", "ATGaaCCaa", "ATGaaCCaaTT"]:
+        with pytest.raises(ValueError):
+            _recombinase_homology_offset_and_length(pattern)
+
+
+def test_recombinase_homology_offset_and_length_invalid_chars():
+    with pytest.raises(ValueError):
+        _recombinase_homology_offset_and_length("ATGaa123")
+    with pytest.raises(ValueError):
+        _recombinase_homology_offset_and_length("ATGaa-CC")
+
+
+# ---------------------------------------------------------------------------
+# Recombinase.overlap – basic (linear, exact)
+# ---------------------------------------------------------------------------
+
+
+def test_recombinase_overlap_single_match():
+    site1 = "ATGCCCTAAaaTT"
+    site2 = "AAaaTTTTTTTCCCT"
+
+    seqA = Dseqrecord("aaaATGCCCTAAaaTTtt")
+    seqB = Dseqrecord("tataAAaaTTTTTTTCCCTaaa")
+
+    rec = Recombinase(site1, site2)
+    matches = rec.overlap(seqA, seqB, limit=None)
+    assert matches == [(12, 6, 2)]
+
+
+def test_recombinase_overlap_no_site_returns_empty():
+    site1 = "ATGCCCTAAaaTT"
+    site2 = "AAaaTTTTTTTCCCT"
+
+    seqA = Dseqrecord("aaaATGCCCTAAaaTTtt")
+    seqB = Dseqrecord("tataAAccTTTTTTTCCCTaaa")
+
+    rec = Recombinase(site1, site2)
+    matches = rec.overlap(seqA, seqB, limit=None)
+    assert matches == []
+
+
+def test_recombinase_algorithm_with_assembly():
+    site1 = "ATGCCCTAAaaTT"
+    site2 = "AAaaTTTTTTTCCCT"
+
+    seqA = Dseqrecord("aaaATGCCCTAAaaTTtt")
+    seqB = Dseqrecord("tataAAaaTTTTTTTCCCTaaa")
+
+    rec = Recombinase(site1, site2)
+    asm = Assembly([seqA, seqB], algorithm=rec.overlap, use_fragment_order=False)
+    products = asm.assemble_linear()
+
+    assert len(products) == 2
+    assert str(products[0].seq) == "aaaATGCCCTAAaaTTTTTTTCCCTaaa"
+    assert str(products[1].seq) == "tataAAaaTTtt"
+
+
+# ---------------------------------------------------------------------------
+# Circular sequences
+# ---------------------------------------------------------------------------
+
+
+def test_circular_site_spanning_origin():
+    """Sites that span the origin of a circular sequence should be found."""
+    site1 = "ATGCCCTAAaaTT"
+    site2 = "AAaaTTTTTTTCCCT"
+
+    circ_seq = Dseqrecord("aaTTgggggATGCCCTAA", circular=True)
+    linear_seq = Dseqrecord("tataAAaaTTTTTTTCCCTaaa")
+
+    rec = Recombinase(site1, site2)
+    for shift in range(len(circ_seq)):
+        circ_seq_shifted = circ_seq.shifted(shift)
+        matches = rec.overlap(circ_seq_shifted, linear_seq, limit=None)
+        assert len(matches) == 1
+        start, _, length = matches[0]
+        assert (
+            str(circ_seq_shifted.seq[start : (start + length) % len(circ_seq_shifted)])
+            == "aa"
+        )
+
+
+def test_circular_both_sequences():
+    """Both sequences circular, sites should still be found."""
+    site1 = "GGGaaaCCC"
+    site2 = "TTaaaTT"
+
+    circ_x = Dseqrecord("aaaCCCcaGGG", circular=True)
+    circ_y = Dseqrecord("aaaTTgaTT", circular=True)
+
+    rec = Recombinase(site1, site2)
+    matches = rec.overlap(circ_x, circ_y, limit=None)
+    assert len(matches) == 1
+    assert matches[0] == (0, 0, 3)
+
+
+# ---------------------------------------------------------------------------
+# Degenerate sequences
+# ---------------------------------------------------------------------------
+
+
+def test_degenerate_site():
+    """Sites with IUPAC ambiguity codes should match concrete sequences."""
+    site1 = "ATGNNNaaTT"
+    site2 = "CCNNaaTTGG"
+
+    seqA = Dseqrecord("xxxATGCCCaaTTyyy")
+    seqB = Dseqrecord("xxxCCGGaaTTGGyyy")
+
+    rec = Recombinase(site1, site2)
+    matches = rec.overlap(seqA, seqB, limit=None)
+    assert len(matches) == 1
+    assert matches[0] == (9, 7, 2)
+
+
+# ---------------------------------------------------------------------------
+# Reverse complement
+# ---------------------------------------------------------------------------
+
+# TODO: check tests below
+
+
+def test_reverse_complement_sites():
+    """Sites on the reverse strand should be detected."""
+    from Bio.Seq import reverse_complement
+
+    site1 = "ATGCCCTAAaaTT"
+    site2 = "AAaaTTTTTTTCCCT"
+
+    rc_site1 = reverse_complement(site1)
+    seqA = Dseqrecord(f"ggg{rc_site1}ggg")
+    seqB = Dseqrecord(f"ttt{reverse_complement(site2)}ttt")
+
+    rec = Recombinase(site1, site2)
+    matches = rec.overlap(seqA, seqB, limit=None)
+    assert len(matches) >= 1
+
+
+# ---------------------------------------------------------------------------
+# Recombinase class
+# ---------------------------------------------------------------------------
+
+
+def test_recombinase_class_basic():
+    rec = Recombinase("ATGCCCTAAaaTT", "AAaaTTTTTTTCCCT")
+
+    seqA = Dseqrecord("aaaATGCCCTAAaaTTtt")
+    seqB = Dseqrecord("tataAAaaTTTTTTTCCCTaaa")
+
+    matches = rec.overlap(seqA, seqB)
+    assert matches == [(12, 6, 2)]
+
+
+# ---------------------------------------------------------------------------
+# Site annotation
+# ---------------------------------------------------------------------------
+
+
+def test_find_recombinase_sites():
+    site1 = "ATGCCCTAAaaTT"
+    seq = Dseqrecord(f"ggg{site1.upper()}ttt")
+    sites = find_recombinase_sites(
+        seq, site1, "GGGaaaCCC", site1_name="s1", site2_name="s2"
+    )
+    assert "s1" in sites
+    assert len(sites["s1"]) >= 1
+
+
+def test_annotate_recombinase_sites():
+    site1 = "ATGCCCTAAaaTT"
+    seq = Dseqrecord(f"ggg{site1.upper()}ttt")
+    assert len(seq.features) == 0
+    annotate_recombinase_sites(
+        seq, site1, "GGGaaaCCC", site1_name="mysite1", site2_name="mysite2"
+    )
+    assert len(seq.features) >= 1
+    assert any(f.qualifiers.get("label", []) == ["mysite1"] for f in seq.features)
+
+    # Calling again should not duplicate
+    n_before = len(seq.features)
+    annotate_recombinase_sites(
+        seq, site1, "GGGaaaCCC", site1_name="mysite1", site2_name="mysite2"
+    )
+    assert len(seq.features) == n_before
+
+
+# ---------------------------------------------------------------------------
+# Symmetry: site1 in seqy and site2 in seqx should also work
+# ---------------------------------------------------------------------------
+
+
+def test_sites_swapped_between_sequences():
+    """With site1 in seqx and site2 in seqy (can be either order in Assembly)."""
+    site1 = "ATGCCCTAAaaTT"
+    site2 = "AAaaTTTTTTTCCCT"
+
+    seqA = Dseqrecord("aaaATGCCCTAAaaTTtt")
+    seqB = Dseqrecord("tataAAaaTTTTTTTCCCTaaa")
+
+    rec = Recombinase(site1, site2)
+    matches = rec.overlap(seqA, seqB, limit=None)
+    assert len(matches) >= 1
+
+
+# ---------------------------------------------------------------------------
+# recombinase_integration and recombinase_excision
+# ---------------------------------------------------------------------------
+
+
+def test_recombinase_integration():
+    """Integration of insert into genome via recombinase sites."""
+    site1 = "ATGCCCTAAaaTT"
+    site2 = "AAaaTTTTTTTCCCT"
+
+    genome = Dseqrecord(f"cccccc{site1.upper()}aaaaa")
+    insert = Dseqrecord(f"{site2.upper()}bbbbb", circular=True)
+
+    products = recombinase_integration(genome, [insert], site1, site2)
+    assert len(products) >= 1
+    # Integrated product should contain both flanking regions and insert
+    seq_str = str(products[0].seq).upper()
+    assert "ATGCCCTAA" in seq_str
+    assert "TTTTTTTCCCT" in seq_str
+
+
+def test_recombinase_excision():
+    """Excision of plasmid from genome with two recombinase sites."""
+    site1 = "ATGCCCTAAaaTT"
+    site2 = "AAaaTTTTTTTCCCT"
+
+    genome = Dseqrecord(
+        f"cccccc{site1.upper()}xxxxx{site2.upper()}aaaaa",
+        circular=True,
+    )
+
+    products = recombinase_excision(genome, site1, site2)
+    assert len(products) >= 1
+
+
+def test_recombinase_integration_excision_reversibility():
+    """With same site on both sides (Cre-Lox style), integrate then excise returns originals."""
+    site = "ATGCCCTAAaaTT"
+
+    genome = Dseqrecord(f"cccccc{site.upper()}aaaaa")
+    insert = Dseqrecord(f"{site.upper()}bbbbb", circular=True)
+
+    products = recombinase_integration(genome, [insert], site, site)
+    assert len(products) == 1
+    integrated = products[0]
+
+    excised = recombinase_excision(integrated, site, site)
+    assert len(excised) == 2
+
+    seqs = [str(p.seq).upper() for p in excised]
+    assert any(site.upper() in s for s in seqs)

From 850559ad0cfc5ee50c5dad7e97a9b236351aa839 Mon Sep 17 00:00:00 2001
From: Manuel Lera-Ramirez <manulera14@gmail.com>
Date: Wed, 25 Feb 2026 01:15:21 +0000
Subject: [PATCH 02/10] some more fixing, but excision not giving all products

---
 dummy.py                         |  25 +++++
 src/pydna/assembly2.py           |  34 +++----
 src/pydna/recombinase.py         | 155 +++++++++++++++----------------
 tests/test_module_recombinase.py | 109 +++++++++-------------
 4 files changed, 160 insertions(+), 163 deletions(-)
 create mode 100644 dummy.py

diff --git a/dummy.py b/dummy.py
new file mode 100644
index 00000000..c770d819
--- /dev/null
+++ b/dummy.py
@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+from pydna.dseqrecord import Dseqrecord
+from pydna.assembly2 import recombinase_excision
+from pydna.recombinase import Recombinase
+from pydna.assembly2 import SingleFragmentAssembly
+
+
+site1 = "ATGCCCTAAaaCT"
+site2 = "AAaaTTTTTTTCCCT"
+
+rec = Recombinase(site1, site2)
+genome = Dseqrecord(f"cccccc{site1.upper()}tttt{site2.upper()}aaaaa")
+
+products = recombinase_excision(genome, rec)
+
+asm = SingleFragmentAssembly([genome], None, rec.overlap)
+print(*asm.G.edges(data=True), sep="\n")
+print(asm.get_circular_assemblies())
+print()
+
+seq2 = Dseqrecord(f"ag{site1.upper()}gtca{site1.upper()}aa")
+
+asm = SingleFragmentAssembly([seq2], limit=13)
+print(*asm.G.edges(data=True), sep="\n")
+# print(asm.get_circular_assemblies())
diff --git a/src/pydna/assembly2.py b/src/pydna/assembly2.py
index 85b75005..2c874dc3 100644
--- a/src/pydna/assembly2.py
+++ b/src/pydna/assembly2.py
@@ -1970,11 +1970,15 @@ def __init__(self, frags: [Dseqrecord], limit=25, algorithm=common_sub_strings):
         self.G.add_node(1, seq=frag)
 
         matches = algorithm(frag, frag, limit)
+        # Remove matches where the whole sequence matches
+        matches = [match for match in matches if match[2] != len(frag)]
+
         for match in matches:
             self.add_edges_from_match(match, 1, 1, frag, frag)
 
         # To avoid duplicated outputs
-        self.G.remove_edges_from([(-1, -1)])
+        while (-1, -1) in self.G.edges():
+            self.G.remove_edges_from([(-1, -1)])
 
         # These two are constrained
         self.use_fragment_order = True
@@ -2805,8 +2809,7 @@ def cre_lox_excision(genome: Dseqrecord) -> list[Dseqrecord]:
 
 def recombinase_excision(
     genome: Dseqrecord,
-    site1: str,
-    site2: str,
+    recombinase: Recombinase,
 ) -> list[Dseqrecord]:
     """Returns the products for recombinase-mediated excision.
 
@@ -2814,26 +2817,23 @@ def recombinase_excision(
     ----------
     genome : Dseqrecord
         Target genome sequence containing two recombinase sites.
-    site1 : str
-        First recombinase recognition site (lowercase = overlap core).
-    site2 : str
-        Second recombinase recognition site (lowercase = overlap core).
+    recombinase : Recombinase
+        Recombinase object.
 
     Returns
     -------
     list[Dseqrecord]
         List containing excised plasmid and remaining genome sequence.
     """
-    rec = Recombinase(site1, site2)
-    products = common_function_excision_products(genome, None, rec.overlap)
+    products = common_function_excision_products(genome, None, recombinase.overlap)
+    products = [recombinase.annotate(p) for p in products]
     return _recast_sources(products, RecombinaseSource)
 
 
 def recombinase_integration(
     genome: Dseqrecord,
     inserts: list[Dseqrecord],
-    site1: str,
-    site2: str,
+    recombinase: Recombinase,
 ) -> list[Dseqrecord]:
     """Returns the products resulting from recombinase-mediated integration.
 
@@ -2843,10 +2843,8 @@ def recombinase_integration(
         Target genome sequence.
     inserts : list[Dseqrecord]
         DNA fragment(s) to insert.
-    site1 : str
-        First recombinase recognition site (lowercase = overlap core).
-    site2 : str
-        Second recombinase recognition site (lowercase = overlap core).
+    recombinase : Recombinase
+        Recombinase object.
 
     Returns
     -------
@@ -2866,8 +2864,10 @@ def recombinase_integration(
     True
     """
     fragments = common_handle_insertion_fragments(genome, inserts)
-    rec = Recombinase(site1, site2)
-    products = common_function_integration_products(fragments, None, rec.overlap)
+    products = common_function_integration_products(
+        fragments, None, recombinase.overlap
+    )
+    products = [recombinase.annotate(p) for p in products]
     return _recast_sources(products, RecombinaseSource)
 
 
diff --git a/src/pydna/recombinase.py b/src/pydna/recombinase.py
index 4436dee2..3f2669eb 100644
--- a/src/pydna/recombinase.py
+++ b/src/pydna/recombinase.py
@@ -21,12 +21,13 @@
 """
 
 import re
-import itertools as _itertools
+import itertools
+import copy
 
 from Bio.Seq import reverse_complement
 from Bio.SeqFeature import SimpleLocation, SeqFeature
 
-from pydna.dseqrecord import Dseqrecord as _Dseqrecord
+from pydna.dseqrecord import Dseqrecord
 from pydna.utils import shift_location
 from pydna.sequence_regex import compute_regex_site, dseqrecord_finditer
 from pydna.types import SequenceOverlap
@@ -76,6 +77,10 @@ class Recombinase:
         The first recognition site. The homology core must be in lowercase.
     site2 : str
         The second recognition site. The homology core must be in lowercase.
+    site1_name : str, optional
+        Label for site1 in find/annotate output. Default is "site1".
+    site2_name : str, optional
+        Label for site2 in find/annotate output. Default is "site2".
 
     Examples
     --------
@@ -86,11 +91,21 @@ class Recombinase:
     >>> seqB = Dseqrecord("tataAAaaTTTTTTTCCCTaaa")
     >>> rec.overlap(seqA, seqB)
     [(12, 6, 2)]
+    >>> sites = rec.find(seqA)
+    >>> rec.annotate(seqA)
     """
 
-    def __init__(self, site1: str, site2: str):
+    def __init__(
+        self,
+        site1: str,
+        site2: str,
+        site1_name: str = "site1",
+        site2_name: str = "site2",
+    ):
         self.site1 = site1
         self.site2 = site2
+        self.site1_name = site1_name
+        self.site2_name = site2_name
 
         off1, len1 = _recombinase_homology_offset_and_length(site1)
         off2, len2 = _recombinase_homology_offset_and_length(site2)
@@ -118,8 +133,8 @@ def __init__(self, site1: str, site2: str):
 
     def overlap(
         self,
-        seqx: _Dseqrecord,
-        seqy: _Dseqrecord,
+        seqx: Dseqrecord,
+        seqy: Dseqrecord,
         limit: None = None,
     ) -> list[SequenceOverlap]:
         """Find overlaps between *seqx* and *seqy* mediated by the sites.
@@ -149,7 +164,7 @@ def overlap(
             if not matches_y:
                 continue
 
-            for mx, my in _itertools.product(matches_x, matches_y):
+            for mx, my in itertools.product(matches_x, matches_y):
                 # Here we wrap in case the sequence is circular and the match spans the origin,
                 # and the offset makes the match go beyond the origin.
                 matches.append(
@@ -169,83 +184,59 @@ def overlap(
                 unique.append(m)
         return unique
 
+    def find(self, seq: Dseqrecord) -> dict[str, list[SimpleLocation]]:
+        """Find all occurrences of the recombinase sites in *seq*.
 
-def find_recombinase_sites(
-    seq: _Dseqrecord,
-    site1: str,
-    site2: str,
-    site1_name: str = "site1",
-    site2_name: str = "site2",
-) -> dict[str, list[SimpleLocation]]:
-    """Find all occurrences of the recombinase sites in *seq*.
-
-    Parameters
-    ----------
-    seq : Dseqrecord
-        Sequence to search.
-    site1 : str
-        First recognition site (may contain degenerate bases).
-    site2 : str
-        Second recognition site (may contain degenerate bases).
-    site1_name : str
-        Label for site1 in the returned dictionary.
-    site2_name : str
-        Label for site2 in the returned dictionary.
+        Parameters
+        ----------
+        seq : Dseqrecord
+            Sequence to search.
 
-    Returns
-    -------
-    dict[str, list[SimpleLocation]]
-        Dictionary mapping site names to lists of locations.
-    """
-    out: dict[str, list[SimpleLocation]] = {}
-    for name, raw_site in [(site1_name, site1), (site2_name, site2)]:
-        fwd_re = compute_regex_site(raw_site)
-        rev_re = compute_regex_site(reverse_complement(raw_site))
-        for pattern, strand in [(fwd_re, 1), (rev_re, -1)]:
-            for match in dseqrecord_finditer(pattern, seq):
-                loc = SimpleLocation(match.start(), match.end(), strand)
-                loc = shift_location(loc, 0, len(seq))
-                out.setdefault(name, []).append(loc)
-    return out
-
-
-def annotate_recombinase_sites(
-    seq: _Dseqrecord,
-    site1: str,
-    site2: str,
-    site1_name: str = "site1",
-    site2_name: str = "site2",
-) -> _Dseqrecord:
-    """Annotate *seq* with features for all occurrences of the recombinase sites.
+        Returns
+        -------
+        dict[str, list[SimpleLocation]]
+            Dictionary mapping site names to lists of locations.
+        """
+        out: dict[str, list[SimpleLocation]] = {}
+        for name, raw_site in [
+            (self.site1_name, self.site1),
+            (self.site2_name, self.site2),
+        ]:
+            fwd_re = compute_regex_site(raw_site)
+            rev_re = compute_regex_site(reverse_complement(raw_site))
+            for pattern, strand in [(fwd_re, 1), (rev_re, -1)]:
+                for match in dseqrecord_finditer(pattern, seq):
+                    loc = SimpleLocation(match.start(), match.end(), strand)
+                    loc = shift_location(loc, 0, len(seq))
+                    out.setdefault(name, []).append(loc)
+        return out
+
+    def annotate(self, seq: Dseqrecord) -> Dseqrecord:
+        """Annotate *seq* with features for all occurrences of the recombinase sites.
 
-    Parameters
-    ----------
-    seq : Dseqrecord
-        Sequence to annotate (modified in place and returned).
-    site1 : str
-        First recognition site.
-    site2 : str
-        Second recognition site.
-    site1_name : str
-        Label for site1 features.
-    site2_name : str
-        Label for site2 features.
+        Parameters
+        ----------
+        seq : Dseqrecord
+            Sequence to annotate (modified in place and returned).
 
-    Returns
-    -------
-    Dseqrecord
-        The same sequence with added features.
-    """
-    sites = find_recombinase_sites(seq, site1, site2, site1_name, site2_name)
-    for name, locs in sites.items():
-        for loc in locs:
-            if not any(
-                f.location == loc
-                and f.type == "protein_bind"
-                and f.qualifiers.get("label", []) == [name]
-                for f in seq.features
-            ):
-                seq.features.append(
-                    SeqFeature(loc, type="protein_bind", qualifiers={"label": [name]})
-                )
-    return seq
+        Returns
+        -------
+        Dseqrecord
+            The same sequence with added features.
+        """
+        out_seq = copy.deepcopy(seq)
+        sites = self.find(out_seq)
+        for name, locs in sites.items():
+            for loc in locs:
+                if not any(
+                    f.location == loc
+                    and f.type == "protein_bind"
+                    and f.qualifiers.get("label", []) == [name]
+                    for f in out_seq.features
+                ):
+                    out_seq.features.append(
+                        SeqFeature(
+                            loc, type="protein_bind", qualifiers={"label": [name]}
+                        )
+                    )
+        return out_seq
diff --git a/tests/test_module_recombinase.py b/tests/test_module_recombinase.py
index f1c462c6..b0aaf236 100644
--- a/tests/test_module_recombinase.py
+++ b/tests/test_module_recombinase.py
@@ -1,5 +1,6 @@
+# -*- coding: utf-8 -*-
 import pytest
-
+from Bio.SeqFeature import SimpleLocation
 from pydna.dseqrecord import Dseqrecord
 from pydna.assembly2 import (
     Assembly,
@@ -9,8 +10,6 @@
 from pydna.recombinase import (
     _recombinase_homology_offset_and_length,
     Recombinase,
-    find_recombinase_sites,
-    annotate_recombinase_sites,
 )
 
 
@@ -161,27 +160,14 @@ def test_reverse_complement_sites():
     site2 = "AAaaTTTTTTTCCCT"
 
     rc_site1 = reverse_complement(site1)
+    rc_site2 = reverse_complement(site2)
     seqA = Dseqrecord(f"ggg{rc_site1}ggg")
-    seqB = Dseqrecord(f"ttt{reverse_complement(site2)}ttt")
+    seqB = Dseqrecord(f"ttt{rc_site2}ttt")
 
     rec = Recombinase(site1, site2)
     matches = rec.overlap(seqA, seqB, limit=None)
-    assert len(matches) >= 1
-
-
-# ---------------------------------------------------------------------------
-# Recombinase class
-# ---------------------------------------------------------------------------
-
-
-def test_recombinase_class_basic():
-    rec = Recombinase("ATGCCCTAAaaTT", "AAaaTTTTTTTCCCT")
-
-    seqA = Dseqrecord("aaaATGCCCTAAaaTTtt")
-    seqB = Dseqrecord("tataAAaaTTTTTTTCCCTaaa")
-
-    matches = rec.overlap(seqA, seqB)
-    assert matches == [(12, 6, 2)]
+    assert len(matches) == 1
+    assert matches[0] == (5, 14, 2)
 
 
 # ---------------------------------------------------------------------------
@@ -191,48 +177,40 @@ def test_recombinase_class_basic():
 
 def test_find_recombinase_sites():
     site1 = "ATGCCCTAAaaTT"
+    site2 = "AAaaTTTTTTTCCCT"
+    rec = Recombinase(site1, site2, site1_name="s1", site2_name="s2")
     seq = Dseqrecord(f"ggg{site1.upper()}ttt")
-    sites = find_recombinase_sites(
-        seq, site1, "GGGaaaCCC", site1_name="s1", site2_name="s2"
-    )
+    sites = rec.find(seq)
     assert "s1" in sites
-    assert len(sites["s1"]) >= 1
+    assert sites["s1"] == [SimpleLocation(3, 3 + len(site1), 1)]
 
+    # Works origin-spanning sites
+    seq_looped = seq.looped()
+    for shift in range(len(seq_looped)):
+        seq_looped_shifted = seq_looped.shifted(shift)
+        sites = rec.find(seq_looped_shifted)
+        assert str(sites["s1"][0].extract(seq_looped_shifted.seq)) == site1.upper()
 
-def test_annotate_recombinase_sites():
-    site1 = "ATGCCCTAAaaTT"
-    seq = Dseqrecord(f"ggg{site1.upper()}ttt")
-    assert len(seq.features) == 0
-    annotate_recombinase_sites(
-        seq, site1, "GGGaaaCCC", site1_name="mysite1", site2_name="mysite2"
-    )
-    assert len(seq.features) >= 1
-    assert any(f.qualifiers.get("label", []) == ["mysite1"] for f in seq.features)
+    # Works with multiple sites
+    seq = Dseqrecord(f"ggg{site1.upper()}ttt{site2.upper()}ttt{site1.upper()}ttt")
+    sites = rec.find(seq)
+    assert len(sites["s1"]) == 2
+    assert len(sites["s2"]) == 1
 
-    # Calling again should not duplicate
-    n_before = len(seq.features)
-    annotate_recombinase_sites(
-        seq, site1, "GGGaaaCCC", site1_name="mysite1", site2_name="mysite2"
-    )
-    assert len(seq.features) == n_before
 
-
-# ---------------------------------------------------------------------------
-# Symmetry: site1 in seqy and site2 in seqx should also work
-# ---------------------------------------------------------------------------
-
-
-def test_sites_swapped_between_sequences():
-    """With site1 in seqx and site2 in seqy (can be either order in Assembly)."""
+def test_annotate_recombinase_sites():
     site1 = "ATGCCCTAAaaTT"
     site2 = "AAaaTTTTTTTCCCT"
+    rec = Recombinase(site1, site2, site1_name="mysite1", site2_name="mysite2")
+    seq = Dseqrecord(f"ggg{site1.upper()}ttt")
 
-    seqA = Dseqrecord("aaaATGCCCTAAaaTTtt")
-    seqB = Dseqrecord("tataAAaaTTTTTTTCCCTaaa")
+    annotated_seq = rec.annotate(seq)
+    assert len(annotated_seq.features) == 1
+    assert annotated_seq.features[0].qualifiers.get("label", []) == ["mysite1"]
+    assert annotated_seq.features[0].location == SimpleLocation(3, 3 + len(site1), 1)
 
-    rec = Recombinase(site1, site2)
-    matches = rec.overlap(seqA, seqB, limit=None)
-    assert len(matches) >= 1
+    # Does not change original sequence
+    assert len(seq.features) == 0
 
 
 # ---------------------------------------------------------------------------
@@ -242,32 +220,35 @@ def test_sites_swapped_between_sequences():
 
 def test_recombinase_integration():
     """Integration of insert into genome via recombinase sites."""
-    site1 = "ATGCCCTAAaaTT"
-    site2 = "AAaaTTTTTTTCCCT"
+    site1 = "ATGCCCTAAaaCT"
+    site2 = "CAaaTTTTTTTCCCT"
 
     genome = Dseqrecord(f"cccccc{site1.upper()}aaaaa")
     insert = Dseqrecord(f"{site2.upper()}bbbbb", circular=True)
+    rec = Recombinase(site1, site2)
 
-    products = recombinase_integration(genome, [insert], site1, site2)
-    assert len(products) >= 1
-    # Integrated product should contain both flanking regions and insert
-    seq_str = str(products[0].seq).upper()
-    assert "ATGCCCTAA" in seq_str
-    assert "TTTTTTTCCCT" in seq_str
+    products = recombinase_integration(genome, [insert], rec)
+
+    assert len(products) == 1
+    assert len(products[0].features) == 0
+    assert str(products[0].seq) == "ccccccATGCCCTAAAATTTTTTTCCCTbbbbbCAAACTaaaaa"
 
 
 def test_recombinase_excision():
     """Excision of plasmid from genome with two recombinase sites."""
-    site1 = "ATGCCCTAAaaTT"
+    site1 = "ATGCCCTAAaaCT"
     site2 = "AAaaTTTTTTTCCCT"
 
+    rec = Recombinase(site1, site2)
     genome = Dseqrecord(
-        f"cccccc{site1.upper()}xxxxx{site2.upper()}aaaaa",
+        f"cccccc{site1.upper()}tttt{site2.upper()}aaaaa",
         circular=True,
     )
 
-    products = recombinase_excision(genome, site1, site2)
-    assert len(products) >= 1
+    products = recombinase_excision(genome, rec)
+    assert len(products) == 2
+    assert str(products[0].seq) == "ccccccATGCCCTAAaaTTTTTTTCCCTtttt"
+    # assert str(products[1].seq) == "ccccccATGCCCTAAAATTTTTTTCCCTbbbbbCAAACTaaaaa"
 
 
 def test_recombinase_integration_excision_reversibility():

From f14093d37fa1c3ecf403ada0a296914543344ec4 Mon Sep 17 00:00:00 2001
From: Manuel Lera-Ramirez <manulera14@gmail.com>
Date: Wed, 25 Feb 2026 12:12:01 +0000
Subject: [PATCH 03/10] tests passing

---
 dummy.py                         | 25 -------------------------
 src/pydna/recombinase.py         |  2 ++
 tests/test_module_recombinase.py | 24 +++++++++++++-----------
 3 files changed, 15 insertions(+), 36 deletions(-)
 delete mode 100644 dummy.py

diff --git a/dummy.py b/dummy.py
deleted file mode 100644
index c770d819..00000000
--- a/dummy.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# -*- coding: utf-8 -*-
-from pydna.dseqrecord import Dseqrecord
-from pydna.assembly2 import recombinase_excision
-from pydna.recombinase import Recombinase
-from pydna.assembly2 import SingleFragmentAssembly
-
-
-site1 = "ATGCCCTAAaaCT"
-site2 = "AAaaTTTTTTTCCCT"
-
-rec = Recombinase(site1, site2)
-genome = Dseqrecord(f"cccccc{site1.upper()}tttt{site2.upper()}aaaaa")
-
-products = recombinase_excision(genome, rec)
-
-asm = SingleFragmentAssembly([genome], None, rec.overlap)
-print(*asm.G.edges(data=True), sep="\n")
-print(asm.get_circular_assemblies())
-print()
-
-seq2 = Dseqrecord(f"ag{site1.upper()}gtca{site1.upper()}aa")
-
-asm = SingleFragmentAssembly([seq2], limit=13)
-print(*asm.G.edges(data=True), sep="\n")
-# print(asm.get_circular_assemblies())
diff --git a/src/pydna/recombinase.py b/src/pydna/recombinase.py
index 3f2669eb..87e65171 100644
--- a/src/pydna/recombinase.py
+++ b/src/pydna/recombinase.py
@@ -129,6 +129,8 @@ def __init__(
         self._configs = [
             (self._site1_fwd, self._site2_fwd, off1, off2),
             (self._site1_rev, self._site2_rev, off1_rev, off2_rev),
+            (self._site2_fwd, self._site1_fwd, off2, off1),
+            (self._site2_rev, self._site1_rev, off2_rev, off1_rev),
         ]
 
     def overlap(
diff --git a/tests/test_module_recombinase.py b/tests/test_module_recombinase.py
index b0aaf236..88891a37 100644
--- a/tests/test_module_recombinase.py
+++ b/tests/test_module_recombinase.py
@@ -2,6 +2,7 @@
 import pytest
 from Bio.SeqFeature import SimpleLocation
 from pydna.dseqrecord import Dseqrecord
+from pydna.dseq import Dseq
 from pydna.assembly2 import (
     Assembly,
     recombinase_integration,
@@ -240,30 +241,31 @@ def test_recombinase_excision():
     site2 = "AAaaTTTTTTTCCCT"
 
     rec = Recombinase(site1, site2)
-    genome = Dseqrecord(
-        f"cccccc{site1.upper()}tttt{site2.upper()}aaaaa",
-        circular=True,
-    )
+    genome = Dseqrecord(f"cccccc{site1.upper()}tttt{site2.upper()}aaaaa")
 
     products = recombinase_excision(genome, rec)
     assert len(products) == 2
-    assert str(products[0].seq) == "ccccccATGCCCTAAaaTTTTTTTCCCTtttt"
-    # assert str(products[1].seq) == "ccccccATGCCCTAAAATTTTTTTCCCTbbbbbCAAACTaaaaa"
+    assert products[0].seq.upper() == Dseq("AACTttttAA".upper())
+    assert (
+        products[1].seq.upper()
+        == Dseq("ccccccATGCCCTAAAATTTTTTTCCCTaaaaa", circular=True).upper()
+    )
 
 
 def test_recombinase_integration_excision_reversibility():
     """With same site on both sides (Cre-Lox style), integrate then excise returns originals."""
-    site = "ATGCCCTAAaaTT"
+    site = "ATGaaGTA"
 
     genome = Dseqrecord(f"cccccc{site.upper()}aaaaa")
     insert = Dseqrecord(f"{site.upper()}bbbbb", circular=True)
+    rec = Recombinase(site, site)
 
-    products = recombinase_integration(genome, [insert], site, site)
+    products = recombinase_integration(genome, [insert], rec)
     assert len(products) == 1
     integrated = products[0]
 
-    excised = recombinase_excision(integrated, site, site)
+    excised = recombinase_excision(integrated, rec)
     assert len(excised) == 2
 
-    seqs = [str(p.seq).upper() for p in excised]
-    assert any(site.upper() in s for s in seqs)
+    assert excised[1].seq.seguid() == genome.seq.seguid()
+    assert excised[0].seq.seguid() == insert.seq.seguid()

From 73474961bfd107818f522577be91650bf4677008 Mon Sep 17 00:00:00 2001
From: Manuel Lera-Ramirez <manulera14@gmail.com>
Date: Wed, 25 Feb 2026 12:26:56 +0000
Subject: [PATCH 04/10] allow reversing recombinase

---
 src/pydna/recombinase.py         | 39 ++++++++++++++++++++++++++++++++
 tests/test_module_recombinase.py | 11 +++++++++
 2 files changed, 50 insertions(+)

diff --git a/src/pydna/recombinase.py b/src/pydna/recombinase.py
index 87e65171..f41dedd8 100644
--- a/src/pydna/recombinase.py
+++ b/src/pydna/recombinase.py
@@ -133,6 +133,45 @@ def __init__(
             (self._site2_rev, self._site1_rev, off2_rev, off1_rev),
         ]
 
+    def __repr__(self) -> str:
+        return f"Recombinase(site1={self.site1}, site2={self.site2}, site1_name={self.site1_name}, site2_name={self.site2_name})"
+
+    def get_reverse_recombinase(
+        self, site12_name: str = "site12", site21_name: str = "site21"
+    ) -> "Recombinase":
+        """Get a recombinase that does the reverse reaction.
+
+        Parameters
+        ----------
+        site12_name : str, optional
+            Label for site12 in find/annotate output. Default is "site12".
+        site21_name : str, optional
+            Label for site21 in find/annotate output. Default is "site21".
+
+        Returns
+        -------
+        Recombinase
+            A recombinase that does the reverse reaction.
+
+        Examples
+        --------
+        >>> from pydna.dseqrecord import Dseqrecord
+        >>> from pydna.recombinase import Recombinase
+        >>> rec = Recombinase("ATGCCCTAAaaTT", "AAaaTTTTTTTCCCT")
+        >>> rec.get_reverse_recombinase()
+        Recombinase(site1=ATGCCCTAAaaTTTTTTTCCCT, site2=AAaaTT, site1_name=site12, site2_name=site21)
+        """
+        _, _, off1, off2 = self._configs[0]
+        site12 = (
+            self.site1[: off1 + self._homology_len]
+            + self.site2[off2 + self._homology_len :]
+        )
+        site21 = (
+            self.site2[: off2 + self._homology_len]
+            + self.site1[off1 + self._homology_len :]
+        )
+        return Recombinase(site12, site21, site12_name, site21_name)
+
     def overlap(
         self,
         seqx: Dseqrecord,
diff --git a/tests/test_module_recombinase.py b/tests/test_module_recombinase.py
index 88891a37..6ed3ece1 100644
--- a/tests/test_module_recombinase.py
+++ b/tests/test_module_recombinase.py
@@ -269,3 +269,14 @@ def test_recombinase_integration_excision_reversibility():
 
     assert excised[1].seq.seguid() == genome.seq.seguid()
     assert excised[0].seq.seguid() == insert.seq.seguid()
+
+
+def test_recombinase_reverse():
+    site1 = "ATGCCCTAAaaTT"
+    site2 = "AAaaTTTTTTTCCCT"
+    rec = Recombinase(site1, site2)
+    rev_rec = rec.get_reverse_recombinase()
+    assert rev_rec.site1 == "ATGCCCTAAaaTTTTTTTCCCT"
+    assert rev_rec.site2 == "AAaaTT"
+    assert rev_rec.site1_name == "site12"
+    assert rev_rec.site2_name == "site21"

From 450b340f145f8c904c655db9d6848593d7c8a145 Mon Sep 17 00:00:00 2001
From: Manuel Lera-Ramirez <manulera14@gmail.com>
Date: Wed, 25 Feb 2026 16:17:49 +0000
Subject: [PATCH 05/10] used recombinase class in gateway

---
 src/pydna/gateway.py           | 241 ++++++++++++++-------------------
 tests/test_module_assembly2.py |   2 +-
 2 files changed, 102 insertions(+), 141 deletions(-)

diff --git a/src/pydna/gateway.py b/src/pydna/gateway.py
index e9a00e2e..0076f71e 100644
--- a/src/pydna/gateway.py
+++ b/src/pydna/gateway.py
@@ -1,164 +1,125 @@
 # -*- coding: utf-8 -*-
-from Bio.Seq import reverse_complement
 from pydna.dseqrecord import Dseqrecord
-import re
-import itertools
-from Bio.SeqFeature import SimpleLocation, SeqFeature
-from pydna.utils import shift_location
-from pydna.sequence_regex import compute_regex_site, dseqrecord_finditer
-
-
-raw_gateway_common = {
-    "attB1": "CHWVTWTGTACAAAAAANNNG",
-    "attB2": "CHWVTWTGTACAAGAAANNNG",
-    "attB3": "CHWVTWTGTATAATAAANNNG",
-    "attB4": "CHWVTWTGTATAGAAAANNNG",
-    "attB5": "CHWVTWTGTATACAAAANNNG",
-    "attL1": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTACAAAAAANNNG",
-    "attL2": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTACAAGAAANNNG",
-    "attL3": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTATAATAAANNNG",
-    "attL4": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTATAGAAAANNNG",
-    "attL5": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTATACAAAANNNG",
-    "attR1": "CHWVTWTGTACAAAAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
-    "attR2": "CHWVTWTGTACAAGAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
-    "attR3": "CHWVTWTGTATAATAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
-    "attR4": "CHWVTWTGTATAGAAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
-    "attR5": "CHWVTWTGTATACAAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
-    "overlap_1": "twtGTACAAAaaa",
-    "overlap_2": "twtGTACAAGaaa",
-    "overlap_3": "twtGTATAATaaa",
-    "overlap_4": "twtGTATAGAaaa",
-    "overlap_5": "twtGTATACAaaa",
-}
-
-
-raw_gateway_sites_greedy = {
-    **raw_gateway_common,
-    "attP1": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTACAAAAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
-    "attP2": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTACAAGAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
-    "attP3": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTATAATAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
-    "attP4": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTATAGAAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
-    "attP5": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTATACAAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
-}
-
-raw_gateway_sites_conservative = {
-    **raw_gateway_common,
-    "attP1": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTGTACAAAAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA",
-    "attP2": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTGTACAAGAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA",
-    "attP3": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTGTATAATAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA",
-    "attP4": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTGTATAGAAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA",
-    "attP5": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTGTATACAAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA",
-}
-
-gateway_sites_greedy = {
-    k: {
-        "forward_regex": compute_regex_site(v),
-        "reverse_regex": compute_regex_site(reverse_complement(v)),
-        "consensus_sequence": v,
+from Bio.SeqFeature import SimpleLocation
+from pydna.recombinase import Recombinase
+
+
+def create_recombinase_dict() -> dict[str, dict[str, list[Recombinase]]]:
+    """Create a dictionary of recombinases for the Gateway reaction."""
+    raw_gateway_common = {
+        "attB1": "CHWVTWTgtacaaaAAANNNG",
+        "attB2": "CHWVTWTgtacaagAAANNNG",
+        "attB3": "CHWVTWTgtataatAAANNNG",
+        "attB4": "CHWVTWTgtatagaAAANNNG",
+        "attB5": "CHWVTWTgtatacaAAANNNG",
+        "attL1": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtacaaaAAANNNG",
+        "attL2": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtacaagAAANNNG",
+        "attL3": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtataatAAANNNG",
+        "attL4": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtatagaAAANNNG",
+        "attL5": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtatacaAAANNNG",
+        "attR1": "CHWVTWTgtacaaaAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
+        "attR2": "CHWVTWTgtacaagAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
+        "attR3": "CHWVTWTgtataatAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
+        "attR4": "CHWVTWTgtatagaAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
+        "attR5": "CHWVTWTgtatacaAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
+        "overlap_1": "TWTgtacaaaAAA",
+        "overlap_2": "TWTgtacaagAAA",
+        "overlap_3": "TWTgtataatAAA",
+        "overlap_4": "TWTgtatagaAAA",
+        "overlap_5": "TWTgtatacaAAA",
     }
-    for k, v in raw_gateway_sites_greedy.items()
-}
-
-gateway_sites_conservative = {
-    k: {
-        "forward_regex": compute_regex_site(v),
-        "reverse_regex": compute_regex_site(reverse_complement(v)),
-        "consensus_sequence": v,
+
+    raw_gateway_sites_greedy = {
+        **raw_gateway_common,
+        "attP1": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtacaaaAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
+        "attP2": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtacaagAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
+        "attP3": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtataatAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
+        "attP4": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtatagaAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
+        "attP5": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtatacaAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
+    }
+
+    raw_gateway_sites_conservative = {
+        **raw_gateway_common,
+        "attP1": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTgtacaaaAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA",
+        "attP2": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTgtacaagAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA",
+        "attP3": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTgtataatAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA",
+        "attP4": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTgtatagaAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA",
+        "attP5": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTgtatacaAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA",
     }
-    for k, v in raw_gateway_sites_conservative.items()
-}
 
-# From snapgene - ask Valerie
-primer_design_attB = {
-    "attB1": "ACAAGTTTGTACAAAAAAGCAGGCT",
-    "attB2": "ACCACTTTGTACAAGAAAGCTGGGT",
-    "attB3": "ACAACTTTGTATAATAAAGTTGTA",
-    "attB4": "ACAACTTTGTATAGAAAAGTTGTA",
-    "attB5": "ACAACTTTGTATACAAAAGTTGTA",
-}
+    out_dict = {}
+
+    for reaction in ["BP", "LR"]:
+        left, right = reaction
+        conservative: list[Recombinase] = []
+        greedy: list[Recombinase] = []
+        for i in range(1, 6):
+            site1 = f"att{left}{i}"
+            site2 = f"att{right}{i}"
+            seq1_conservative = raw_gateway_sites_conservative[site1]
+            seq2_conservative = raw_gateway_sites_conservative[site2]
+            seq1_greedy = raw_gateway_sites_greedy[site1]
+            seq2_greedy = raw_gateway_sites_greedy[site2]
+            conservative.append(
+                Recombinase(seq1_conservative, seq2_conservative, site1, site2)
+            )
+            greedy.append(Recombinase(seq1_greedy, seq2_greedy, site1, site2))
+
+        out_dict[reaction] = {
+            "conservative": conservative,
+            "greedy": greedy,
+        }
+    return out_dict
+
+
+recombinase_dict = create_recombinase_dict()
 
 
 def gateway_overlap(
     seqx: Dseqrecord, seqy: Dseqrecord, reaction: str, greedy: bool
 ) -> list[tuple[int, int, int]]:
     """
-    Find gateway overlaps. If greedy is True, it uses a more greedy consensus site to find attP sites,
-    which might give false positives
+    Assembly Algorithm: Find gateway overlaps. If greedy is True, it uses a more greedy consensus site to find attP sites,
+    which might give false positives.
+
+    Parameters
+    ----------
+    seqx : Dseqrecord
+        First sequence to find overlaps.
+    seqy : Dseqrecord
+        Second sequence to find overlaps.
+    reaction : str
+        Type of Gateway reaction (BP or LR).
+    greedy : bool
+        If True, use greedy gateway consensus sites.
+
+    Returns
+    -------
+    list[tuple[int, int, int]] A list of overlaps between the two sequences.
     """
-    if reaction not in ["BP", "LR"]:
-        raise ValueError(f"Invalid overlap type: {reaction}")
-
-    gateway_sites = gateway_sites_greedy if greedy else gateway_sites_conservative
-    out = list()
-    # Iterate over the four possible att sites
-    for num in range(1, 5):
-        # Iterate over the two possible orientations
-        # The sites have to be in the same orientation (fwd + fwd or rev + rev)
-        for pattern in ["forward_regex", "reverse_regex"]:
-            # The overlap regex is the same for all types
-            overlap_regex = gateway_sites[f"overlap_{num}"][pattern]
-
-            # Iterate over pairs B, P and P, B for BP and L, R and R, L for LR
-            for site_x, site_y in zip(reaction, reaction[::-1]):
-                site_x_regex = gateway_sites[f"att{site_x}{num}"][pattern]
-                matches_x = list(dseqrecord_finditer(site_x_regex, seqx))
-                if len(matches_x) == 0:
-                    continue
-
-                site_y_regex = gateway_sites[f"att{site_y}{num}"][pattern]
-                matches_y = list(dseqrecord_finditer(site_y_regex, seqy))
-                if len(matches_y) == 0:
-                    continue
-
-                for match_x, match_y in itertools.product(matches_x, matches_y):
-                    # Find the overlap sequence within each match, and use the
-                    # core 7 pbs that are constant
-                    overlap_x = re.search(overlap_regex, match_x.group())
-                    overlap_y = re.search(overlap_regex, match_y.group())
-
-                    # Sanity check
-                    assert (
-                        overlap_x is not None and overlap_y is not None
-                    ), "Something went wrong, no overlap found within the matches"
-
-                    out.append(
-                        (
-                            match_x.start() + overlap_x.start() + 3,
-                            match_y.start() + overlap_y.start() + 3,
-                            7,
-                        )
-                    )
-
-    return out
+    type = "greedy" if greedy else "conservative"
+    recombinases = recombinase_dict[reaction][type]
+    return sum((r.overlap(seqx, seqy) for r in recombinases), [])
 
 
 def find_gateway_sites(
     seq: Dseqrecord, greedy: bool
 ) -> dict[str, list[SimpleLocation]]:
     """Find all gateway sites in a sequence and return a dictionary with the name and positions of the sites."""
-    gateway_sites = gateway_sites_greedy if greedy else gateway_sites_conservative
+
+    type = "greedy" if greedy else "conservative"
     out = dict()
-    for site in gateway_sites:
-        if not site.startswith("att"):
-            continue
-
-        for pattern in ["forward_regex", "reverse_regex"]:
-            matches = list(dseqrecord_finditer(gateway_sites[site][pattern], seq))
-            for match in matches:
-                if site not in out:
-                    out[site] = []
-                strand = 1 if pattern == "forward_regex" else -1
-                loc = SimpleLocation(match.start(), match.end(), strand)
-                loc = shift_location(loc, 0, len(seq))
-                out[site].append(loc)
+    for reaction in ["BP", "LR"]:
+        for rec in recombinase_dict[reaction][type]:
+            out.update(rec.find(seq))
     return out
 
 
 def annotate_gateway_sites(seq: Dseqrecord, greedy: bool) -> Dseqrecord:
-    sites = find_gateway_sites(seq, greedy)
-    for site in sites:
-        for loc in sites[site]:
-            seq.features.append(
-                SeqFeature(loc, type="protein_bind", qualifiers={"label": [site]})
-            )
+    """Annotate gateway sites in a sequence."""
+    type = "greedy" if greedy else "conservative"
+    out = seq
+    for reaction in ["BP", "LR"]:
+        for rec in recombinase_dict[reaction][type]:
+            out = rec.annotate(out)
     return seq
diff --git a/tests/test_module_assembly2.py b/tests/test_module_assembly2.py
index 19ee9084..8f1ca2d7 100644
--- a/tests/test_module_assembly2.py
+++ b/tests/test_module_assembly2.py
@@ -2638,7 +2638,7 @@ def test_gateway_assembly():
     # Test that greedy is being used (finds)
     with pytest.raises(ValueError) as e:
         assembly.gateway_assembly(products_LR, "LR", greedy=True)
-    assert "fragment 2: attB1, attL1, attR1, attP1" in str(e.value)
+    assert "fragment 2: attB1, attP1, attL1, attR1" in str(e.value)
 
     # Test multi site only
     seq1 = Dseqrecord("aaa" + attB1 + "ggg" + attB2 + "ccc", circular=True)

From e526a51dd1a8775442453f7f5c7cf84899dc7229 Mon Sep 17 00:00:00 2001
From: Manuel Lera-Ramirez <manulera14@gmail.com>
Date: Wed, 25 Feb 2026 17:22:43 +0000
Subject: [PATCH 06/10] add collection of recombinases

---
 src/pydna/assembly2.py           |   4 +-
 src/pydna/gateway.py             |  31 +++++----
 src/pydna/recombinase.py         | 108 +++++++++++++++++++++++++++++--
 tests/test_module_recombinase.py |  61 +++++++++++++++++
 4 files changed, 182 insertions(+), 22 deletions(-)

diff --git a/src/pydna/assembly2.py b/src/pydna/assembly2.py
index 2c874dc3..ac86722e 100644
--- a/src/pydna/assembly2.py
+++ b/src/pydna/assembly2.py
@@ -2855,11 +2855,13 @@ def recombinase_integration(
     --------
     >>> from pydna.dseqrecord import Dseqrecord
     >>> from pydna.assembly2 import recombinase_integration, recombinase_excision
+    >>> from pydna.recombinase import Recombinase
     >>> site1 = "ATGCCCTAAaaTT"
     >>> site2 = "AAaaTTTTTTTCCCT"
     >>> genome = Dseqrecord(f"cccccc{site1.upper()}aaaaa")
     >>> insert = Dseqrecord(f"{site2.upper()}bbbbb", circular=True)
-    >>> products = recombinase_integration(genome, [insert], site1, site2)
+    >>> recombinase = Recombinase(site1, site2)
+    >>> products = recombinase_integration(genome, [insert], recombinase)
     >>> len(products) >= 1
     True
     """
diff --git a/src/pydna/gateway.py b/src/pydna/gateway.py
index 0076f71e..212eaee3 100644
--- a/src/pydna/gateway.py
+++ b/src/pydna/gateway.py
@@ -1,10 +1,10 @@
 # -*- coding: utf-8 -*-
 from pydna.dseqrecord import Dseqrecord
 from Bio.SeqFeature import SimpleLocation
-from pydna.recombinase import Recombinase
+from pydna.recombinase import Recombinase, RecombinaseCollection
 
 
-def create_recombinase_dict() -> dict[str, dict[str, list[Recombinase]]]:
+def create_recombinase_dict() -> dict[str, dict[str, RecombinaseCollection]]:
     """Create a dictionary of recombinases for the Gateway reaction."""
     raw_gateway_common = {
         "attB1": "CHWVTWTgtacaaaAAANNNG",
@@ -66,8 +66,8 @@ def create_recombinase_dict() -> dict[str, dict[str, list[Recombinase]]]:
             greedy.append(Recombinase(seq1_greedy, seq2_greedy, site1, site2))
 
         out_dict[reaction] = {
-            "conservative": conservative,
-            "greedy": greedy,
+            "conservative": RecombinaseCollection(conservative),
+            "greedy": RecombinaseCollection(greedy),
         }
     return out_dict
 
@@ -98,8 +98,7 @@ def gateway_overlap(
     list[tuple[int, int, int]] A list of overlaps between the two sequences.
     """
     type = "greedy" if greedy else "conservative"
-    recombinases = recombinase_dict[reaction][type]
-    return sum((r.overlap(seqx, seqy) for r in recombinases), [])
+    return recombinase_dict[reaction][type].overlap(seqx, seqy)
 
 
 def find_gateway_sites(
@@ -108,18 +107,18 @@ def find_gateway_sites(
     """Find all gateway sites in a sequence and return a dictionary with the name and positions of the sites."""
 
     type = "greedy" if greedy else "conservative"
-    out = dict()
-    for reaction in ["BP", "LR"]:
-        for rec in recombinase_dict[reaction][type]:
-            out.update(rec.find(seq))
-    return out
+    collection = RecombinaseCollection(
+        recombinase_dict["BP"][type].recombinases
+        + recombinase_dict["LR"][type].recombinases
+    )
+    return collection.find(seq)
 
 
 def annotate_gateway_sites(seq: Dseqrecord, greedy: bool) -> Dseqrecord:
     """Annotate gateway sites in a sequence."""
     type = "greedy" if greedy else "conservative"
-    out = seq
-    for reaction in ["BP", "LR"]:
-        for rec in recombinase_dict[reaction][type]:
-            out = rec.annotate(out)
-    return seq
+    collection = RecombinaseCollection(
+        recombinase_dict["BP"][type].recombinases
+        + recombinase_dict["LR"][type].recombinases
+    )
+    return collection.annotate(seq)
diff --git a/src/pydna/recombinase.py b/src/pydna/recombinase.py
index f41dedd8..98754287 100644
--- a/src/pydna/recombinase.py
+++ b/src/pydna/recombinase.py
@@ -11,13 +11,79 @@
 
 For example::
 
-    site1 = "ATGCCCTAAaaTT"
-    site2 = "AAaaTTTTTTTCCCT"
+    >>> site1 = "ATGCCCTAAaaTT"
+    >>> site2 = "AAaaTTTTTTTCCCT"
 
 The lowercase ``aa`` is the overlap that will appear in the assembled product.
 
 Sites may contain IUPAC degenerate bases (N, W, S, etc.) and the search
 handles both linear and circular sequences.
+
+The easiest way to use it is with the recombinase_integration and
+recombinase_excision functions from the assembly2 module.
+
+Integration and excision with a single recombinase::
+
+    >>> from pydna.dseqrecord import Dseqrecord
+    >>> from pydna.recombinase import Recombinase
+    >>> from pydna.assembly2 import recombinase_integration, recombinase_excision
+    >>> site1 = "ATGCCCTAAaaCT"
+    >>> site2 = "CAaaTTTTTTTCCCT"
+    >>> genome = Dseqrecord("ccccccATGCCCTAAAACTaaaaa")
+    >>> insert = Dseqrecord("CAAATTTTTTTCCCTbbbbb", circular=True)
+    >>> rec = Recombinase(site1, site2)
+    >>> products = recombinase_integration(genome, [insert], rec)
+    >>>
+    >>> # Cre-Lox style (same site on both sides): integrate then excise returns originals
+    >>> site = "ATGaaGTA"
+    >>> genome = Dseqrecord("ccccccATGAAGTAAAAA")
+    >>> insert = Dseqrecord("ATGAAGTABbbbb", circular=True)
+    >>> rec = Recombinase(site, site)
+    >>> integrated = recombinase_integration(genome, [insert], rec)[0]
+    >>> excised = recombinase_excision(integrated, rec)
+
+Find and annotate sites in a sequence::
+
+    >>> from pydna.dseqrecord import Dseqrecord
+    >>> from pydna.recombinase import Recombinase
+    >>> site1, site2 = "ATGCCCTAAaaTT", "AAaaTTTTTTTCCCT"
+    >>> rec = Recombinase(site1, site2, site1_name="mysite1", site2_name="mysite2")
+    >>> seq = Dseqrecord("gggATGCCCTAAaaTTttt")
+    >>> sites = rec.find(seq)
+    >>> annotated = rec.annotate(seq)
+
+When several sites are possible, or when using multiple recombinases, you can
+create a RecombinaseCollection. It has the methods overlap, find, and annotate,
+so you can use it as a single Recombinase. For an example, check the gateway module::
+
+    >>> from pydna.dseqrecord import Dseqrecord
+    >>> from pydna.recombinase import Recombinase, RecombinaseCollection
+    >>> rec1 = Recombinase("AAaaTTC", "CCaaTTC", site1_name="s1", site2_name="s2")
+    >>> rec2 = Recombinase("GAccACC", "TCccAAC", site1_name="s3", site2_name="s4")
+    >>> collection = RecombinaseCollection([rec1, rec2])
+    >>> seq = Dseqrecord("gggAAAATTCTtttGACCACCTttt")
+    >>> sites = collection.find(seq)
+    >>> annotated = collection.annotate(seq)
+
+IUPAC degenerate bases (e.g. N matches any base)::
+
+    >>> site1 = "ATGNNNaaTT"
+    >>> site2 = "CCNNaaTTGG"
+    >>> rec = Recombinase(site1, site2)
+
+Using a Recombinase as Assembly algorithm::
+
+    >>> from pydna.dseqrecord import Dseqrecord
+    >>> from pydna.recombinase import Recombinase
+    >>> from pydna.assembly2 import Assembly
+    >>> site1 = "ATGCCCTAAaaTT"
+    >>> site2 = "AAaaTTTTTTTCCCT"
+    >>> seqA = Dseqrecord("aaaATGCCCTAAaaTTtt")
+    >>> seqB = Dseqrecord("tataAAaaTTTTTTTCCCTaaa")
+    >>> rec = Recombinase(site1, site2)
+    >>> asm = Assembly([seqA, seqB], algorithm=rec.overlap, use_fragment_order=False)
+    >>> products = asm.assemble_linear()
+
 """
 
 import re
@@ -89,10 +155,9 @@ class Recombinase:
     >>> rec = Recombinase("ATGCCCTAAaaTT", "AAaaTTTTTTTCCCT")
     >>> seqA = Dseqrecord("aaaATGCCCTAAaaTTtt")
     >>> seqB = Dseqrecord("tataAAaaTTTTTTTCCCTaaa")
-    >>> rec.overlap(seqA, seqB)
-    [(12, 6, 2)]
+    >>> _ = rec.overlap(seqA, seqB)
     >>> sites = rec.find(seqA)
-    >>> rec.annotate(seqA)
+    >>> _ = rec.annotate(seqA)
     """
 
     def __init__(
@@ -281,3 +346,36 @@ def annotate(self, seq: Dseqrecord) -> Dseqrecord:
                         )
                     )
         return out_seq
+
+
+class RecombinaseCollection:
+    """A collection of recombinases."""
+
+    def __init__(self, recombinases: list[Recombinase]):
+        if not isinstance(recombinases, list):
+            raise ValueError("recombinases must be a list of Recombinase objects")
+        if not all(isinstance(r, Recombinase) for r in recombinases):
+            raise ValueError("recombinases must be a list of Recombinase objects")
+        if len(recombinases) == 0:
+            raise ValueError("recombinases must be a non-empty list")
+        self.recombinases = recombinases
+
+    def overlap(self, seqx: Dseqrecord, seqy: Dseqrecord) -> list[SequenceOverlap]:
+        """Find overlaps between *seqx* and *seqy* mediated by the recombinases."""
+        return sum((r.overlap(seqx, seqy) for r in self.recombinases), [])
+
+    def find(self, seq: Dseqrecord) -> dict[str, list[SimpleLocation]]:
+        """Find all occurrences of the recombinase sites in *seq*."""
+        out = dict()
+        for rec in self.recombinases:
+            rec_sites = rec.find(seq)
+            for k, v in rec_sites.items():
+                out.setdefault(k, []).extend(v)
+        return out
+
+    def annotate(self, seq: Dseqrecord) -> Dseqrecord:
+        """Annotate *seq* with features for all occurrences of the recombinase sites."""
+        out = seq
+        for rec in self.recombinases:
+            out = rec.annotate(out)
+        return out
diff --git a/tests/test_module_recombinase.py b/tests/test_module_recombinase.py
index 6ed3ece1..0f0ce93c 100644
--- a/tests/test_module_recombinase.py
+++ b/tests/test_module_recombinase.py
@@ -11,6 +11,7 @@
 from pydna.recombinase import (
     _recombinase_homology_offset_and_length,
     Recombinase,
+    RecombinaseCollection,
 )
 
 
@@ -280,3 +281,63 @@ def test_recombinase_reverse():
     assert rev_rec.site2 == "AAaaTT"
     assert rev_rec.site1_name == "site12"
     assert rev_rec.site2_name == "site21"
+
+
+# ---------------------------------------------------------------------------
+# RecombinaseCollection
+# ---------------------------------------------------------------------------
+
+
+def test_recombinase_collection():
+    site1 = "AAaaTTC"
+    site2 = "CCaaTTC"
+    site3 = "GAccACC"
+    site4 = "TCccAAC"
+    rec1 = Recombinase(site1, site2, site1_name="s1", site2_name="s2")
+    rec2 = Recombinase(site3, site4, site1_name="s3", site2_name="s4")
+    collection = RecombinaseCollection([rec1, rec2])
+    seq1 = Dseqrecord(f"ggg{site1.upper()}ttt{site3.upper()}ttt")
+    seq2 = Dseqrecord(f"gggc{site2.upper()}ttt{site4.upper()}ttt")
+    print(collection.overlap(seq1, seq2))
+    assert collection.overlap(seq1, seq2) == [(5, 6, 2), (15, 16, 2)]
+
+
+def test_recombinase_collection_find():
+    # Here the important thing to test is that if
+    # two recombinases have the same site name, the find method should return
+    # both locations.
+    site1 = "AAaaTTC"
+    site2 = "CCaaTTC"
+    site3 = "GAccACC"
+    site4 = "TCccAAC"
+    rec1 = Recombinase(site1, site2, site1_name="s1", site2_name="s2")
+    rec2 = Recombinase(site3, site4, site1_name="s1", site2_name="s2")
+    collection = RecombinaseCollection([rec1, rec2])
+    seq = Dseqrecord(f"ggg{site1.upper()}ttt{site3.upper()}ttt")
+    assert len(collection.find(seq)["s1"]) == 2
+
+
+def test_recombinase_collection_annotate():
+    # Same as above, checking that two sites with the same name are annotated.
+    site1 = "AAaaTTC"
+    site2 = "CCaaTTC"
+    site3 = "GAccACC"
+    site4 = "TCccAAC"
+    rec1 = Recombinase(site1, site2, site1_name="s1", site2_name="s2")
+    rec2 = Recombinase(site3, site4, site1_name="s1", site2_name="s2")
+    collection = RecombinaseCollection([rec1, rec2])
+    seq = Dseqrecord(f"ggg{site1.upper()}ttt{site3.upper()}ttt")
+    annotated_seq = collection.annotate(seq)
+    assert len(annotated_seq.features) == 2
+    assert annotated_seq.features[0].qualifiers.get("label", []) == ["s1"]
+    assert annotated_seq.features[1].qualifiers.get("label", []) == ["s1"]
+
+
+def test_recombinase_collection_init_errors():
+    # Check that the init method raises errors for invalid inputs.
+    with pytest.raises(ValueError):
+        RecombinaseCollection(None)
+    with pytest.raises(ValueError):
+        RecombinaseCollection([])
+    with pytest.raises(ValueError):
+        RecombinaseCollection([Recombinase("AAaaTTC", "CCaaTTC"), "blah"])

From 49f10d07d3de50c6a876808c96a048cec67a5fc4 Mon Sep 17 00:00:00 2001
From: Manuel Lera-Ramirez <manulera14@gmail.com>
Date: Wed, 25 Feb 2026 17:28:48 +0000
Subject: [PATCH 07/10] extra docs

---
 src/pydna/recombinase.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/pydna/recombinase.py b/src/pydna/recombinase.py
index 98754287..38a32d35 100644
--- a/src/pydna/recombinase.py
+++ b/src/pydna/recombinase.py
@@ -65,12 +65,20 @@
     >>> sites = collection.find(seq)
     >>> annotated = collection.annotate(seq)
 
-IUPAC degenerate bases (e.g. N matches any base)::
+IUPAC degenerate bases work (e.g. N matches any base)::
 
     >>> site1 = "ATGNNNaaTT"
     >>> site2 = "CCNNaaTTGG"
     >>> rec = Recombinase(site1, site2)
 
+If the recombinase reaction is reversible, you can get the reverse recombinase with the get_reverse_recombinase method.
+
+    >>> rec = Recombinase('AAAcgCG', 'TTAcgATGA', site1_name="s1", site2_name="s2")
+    >>> rec
+    Recombinase(site1=AAAcgCG, site2=TTAcgATGA, site1_name=s1, site2_name=s2)
+    >>> rec.get_reverse_recombinase(site12_name="s1+s2", site21_name="s2+s1")
+    Recombinase(site1=AAAcgATGA, site2=TTAcgCG, site1_name=s1+s2, site2_name=s2+s1)
+
 Using a Recombinase as Assembly algorithm::
 
     >>> from pydna.dseqrecord import Dseqrecord

From a7e56176548acbaf73439009166bb182b00ad228 Mon Sep 17 00:00:00 2001
From: Manuel Lera-Ramirez <manulera14@gmail.com>
Date: Wed, 25 Feb 2026 17:46:41 +0000
Subject: [PATCH 08/10] improve test coverage

---
 tests/test_module_gateway.py     | 9 ++++++++-
 tests/test_module_recombinase.py | 7 +++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/tests/test_module_gateway.py b/tests/test_module_gateway.py
index 3d24a3b6..402a15d7 100644
--- a/tests/test_module_gateway.py
+++ b/tests/test_module_gateway.py
@@ -1,4 +1,5 @@
-from pydna.gateway import gateway_overlap, find_gateway_sites
+# -*- coding: utf-8 -*-
+from pydna.gateway import annotate_gateway_sites, gateway_overlap, find_gateway_sites
 import pydna.assembly2 as assembly
 import glob
 from pydna.parsers import parse_snapgene
@@ -119,3 +120,9 @@ def test_find_gateway_sites():
     assert find_gateway_sites(seq, greedy=False) == {
         "attB3": [SimpleLocation(0, 21, 1), SimpleLocation(24, 45, 1)]
     }
+
+
+def test_annotate_gateway_sites():
+    seq = Dseqrecord("CAACTTTGTATACAAAAGTTGaaaCAACTTTGTATAATAAAGTTG")
+    annotated_seq = annotate_gateway_sites(seq, greedy=False)
+    assert len(annotated_seq.features) == 2
diff --git a/tests/test_module_recombinase.py b/tests/test_module_recombinase.py
index 0f0ce93c..a5b83304 100644
--- a/tests/test_module_recombinase.py
+++ b/tests/test_module_recombinase.py
@@ -49,6 +49,13 @@ def test_recombinase_homology_offset_and_length_invalid_chars():
 # ---------------------------------------------------------------------------
 
 
+def test_recombinase_init_errors():
+    # Different cores
+    with pytest.raises(ValueError) as e:
+        Recombinase("ATGacCTAAATT", "AAaaTTTTTTTCCCT")
+    assert "Recombinase recognition sites do not have matching" in str(e.value)
+
+
 def test_recombinase_overlap_single_match():
     site1 = "ATGCCCTAAaaTT"
     site2 = "AAaaTTTTTTTCCCT"

From 424ac4200f988b0fa9d349ec43535cb5bab14610 Mon Sep 17 00:00:00 2001
From: Manuel Lera-Ramirez <manulera14@gmail.com>
Date: Wed, 25 Feb 2026 17:50:12 +0000
Subject: [PATCH 09/10] copilot feedback

---
 src/pydna/gateway.py     | 16 ++++++++--------
 src/pydna/recombinase.py |  4 ++--
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/pydna/gateway.py b/src/pydna/gateway.py
index 212eaee3..471beb85 100644
--- a/src/pydna/gateway.py
+++ b/src/pydna/gateway.py
@@ -97,8 +97,8 @@ def gateway_overlap(
     -------
     list[tuple[int, int, int]] A list of overlaps between the two sequences.
     """
-    type = "greedy" if greedy else "conservative"
-    return recombinase_dict[reaction][type].overlap(seqx, seqy)
+    mode = "greedy" if greedy else "conservative"
+    return recombinase_dict[reaction][mode].overlap(seqx, seqy)
 
 
 def find_gateway_sites(
@@ -106,19 +106,19 @@ def find_gateway_sites(
 ) -> dict[str, list[SimpleLocation]]:
     """Find all gateway sites in a sequence and return a dictionary with the name and positions of the sites."""
 
-    type = "greedy" if greedy else "conservative"
+    mode = "greedy" if greedy else "conservative"
     collection = RecombinaseCollection(
-        recombinase_dict["BP"][type].recombinases
-        + recombinase_dict["LR"][type].recombinases
+        recombinase_dict["BP"][mode].recombinases
+        + recombinase_dict["LR"][mode].recombinases
     )
     return collection.find(seq)
 
 
 def annotate_gateway_sites(seq: Dseqrecord, greedy: bool) -> Dseqrecord:
     """Annotate gateway sites in a sequence."""
-    type = "greedy" if greedy else "conservative"
+    mode = "greedy" if greedy else "conservative"
     collection = RecombinaseCollection(
-        recombinase_dict["BP"][type].recombinases
-        + recombinase_dict["LR"][type].recombinases
+        recombinase_dict["BP"][mode].recombinases
+        + recombinase_dict["LR"][mode].recombinases
     )
     return collection.annotate(seq)
diff --git a/src/pydna/recombinase.py b/src/pydna/recombinase.py
index 38a32d35..7759da94 100644
--- a/src/pydna/recombinase.py
+++ b/src/pydna/recombinase.py
@@ -331,12 +331,12 @@ def annotate(self, seq: Dseqrecord) -> Dseqrecord:
         Parameters
         ----------
         seq : Dseqrecord
-            Sequence to annotate (modified in place and returned).
+            Sequence to annotate.
 
         Returns
         -------
         Dseqrecord
-            The same sequence with added features.
+            A copy of the sequence with added features.
         """
         out_seq = copy.deepcopy(seq)
         sites = self.find(out_seq)

From 56ad9b2dd2a83a80b003af6f36f9ec0fa95f84fc Mon Sep 17 00:00:00 2001
From: Manuel Lera-Ramirez <manulera14@gmail.com>
Date: Wed, 25 Feb 2026 17:57:03 +0000
Subject: [PATCH 10/10] improve test coverage

---
 tests/test_module_recombinase.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/test_module_recombinase.py b/tests/test_module_recombinase.py
index a5b83304..b2984bb5 100644
--- a/tests/test_module_recombinase.py
+++ b/tests/test_module_recombinase.py
@@ -339,6 +339,10 @@ def test_recombinase_collection_annotate():
     assert annotated_seq.features[0].qualifiers.get("label", []) == ["s1"]
     assert annotated_seq.features[1].qualifiers.get("label", []) == ["s1"]
 
+    # It does not re-annotate if the sequence is already annotated.
+    annotated_seq = collection.annotate(annotated_seq)
+    assert len(annotated_seq.features) == 2
+
 
 def test_recombinase_collection_init_errors():
     # Check that the init method raises errors for invalid inputs.