Skip to content

Commit 450b340

Browse files
committed
used recombinase class in gateway
1 parent 7347496 commit 450b340

File tree

2 files changed

+102
-141
lines changed

2 files changed

+102
-141
lines changed

src/pydna/gateway.py

Lines changed: 101 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -1,164 +1,125 @@
11
# -*- coding: utf-8 -*-
2-
from Bio.Seq import reverse_complement
32
from pydna.dseqrecord import Dseqrecord
4-
import re
5-
import itertools
6-
from Bio.SeqFeature import SimpleLocation, SeqFeature
7-
from pydna.utils import shift_location
8-
from pydna.sequence_regex import compute_regex_site, dseqrecord_finditer
9-
10-
11-
raw_gateway_common = {
12-
"attB1": "CHWVTWTGTACAAAAAANNNG",
13-
"attB2": "CHWVTWTGTACAAGAAANNNG",
14-
"attB3": "CHWVTWTGTATAATAAANNNG",
15-
"attB4": "CHWVTWTGTATAGAAAANNNG",
16-
"attB5": "CHWVTWTGTATACAAAANNNG",
17-
"attL1": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTACAAAAAANNNG",
18-
"attL2": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTACAAGAAANNNG",
19-
"attL3": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTATAATAAANNNG",
20-
"attL4": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTATAGAAAANNNG",
21-
"attL5": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTATACAAAANNNG",
22-
"attR1": "CHWVTWTGTACAAAAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
23-
"attR2": "CHWVTWTGTACAAGAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
24-
"attR3": "CHWVTWTGTATAATAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
25-
"attR4": "CHWVTWTGTATAGAAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
26-
"attR5": "CHWVTWTGTATACAAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
27-
"overlap_1": "twtGTACAAAaaa",
28-
"overlap_2": "twtGTACAAGaaa",
29-
"overlap_3": "twtGTATAATaaa",
30-
"overlap_4": "twtGTATAGAaaa",
31-
"overlap_5": "twtGTATACAaaa",
32-
}
33-
34-
35-
raw_gateway_sites_greedy = {
36-
**raw_gateway_common,
37-
"attP1": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTACAAAAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
38-
"attP2": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTACAAGAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
39-
"attP3": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTATAATAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
40-
"attP4": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTATAGAAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
41-
"attP5": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTATACAAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
42-
}
43-
44-
raw_gateway_sites_conservative = {
45-
**raw_gateway_common,
46-
"attP1": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTGTACAAAAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA",
47-
"attP2": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTGTACAAGAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA",
48-
"attP3": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTGTATAATAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA",
49-
"attP4": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTGTATAGAAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA",
50-
"attP5": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTGTATACAAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA",
51-
}
52-
53-
gateway_sites_greedy = {
54-
k: {
55-
"forward_regex": compute_regex_site(v),
56-
"reverse_regex": compute_regex_site(reverse_complement(v)),
57-
"consensus_sequence": v,
3+
from Bio.SeqFeature import SimpleLocation
4+
from pydna.recombinase import Recombinase
5+
6+
7+
def create_recombinase_dict() -> dict[str, dict[str, list[Recombinase]]]:
8+
"""Create a dictionary of recombinases for the Gateway reaction."""
9+
raw_gateway_common = {
10+
"attB1": "CHWVTWTgtacaaaAAANNNG",
11+
"attB2": "CHWVTWTgtacaagAAANNNG",
12+
"attB3": "CHWVTWTgtataatAAANNNG",
13+
"attB4": "CHWVTWTgtatagaAAANNNG",
14+
"attB5": "CHWVTWTgtatacaAAANNNG",
15+
"attL1": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtacaaaAAANNNG",
16+
"attL2": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtacaagAAANNNG",
17+
"attL3": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtataatAAANNNG",
18+
"attL4": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtatagaAAANNNG",
19+
"attL5": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtatacaAAANNNG",
20+
"attR1": "CHWVTWTgtacaaaAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
21+
"attR2": "CHWVTWTgtacaagAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
22+
"attR3": "CHWVTWTgtataatAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
23+
"attR4": "CHWVTWTgtatagaAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
24+
"attR5": "CHWVTWTgtatacaAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
25+
"overlap_1": "TWTgtacaaaAAA",
26+
"overlap_2": "TWTgtacaagAAA",
27+
"overlap_3": "TWTgtataatAAA",
28+
"overlap_4": "TWTgtatagaAAA",
29+
"overlap_5": "TWTgtatacaAAA",
5830
}
59-
for k, v in raw_gateway_sites_greedy.items()
60-
}
61-
62-
gateway_sites_conservative = {
63-
k: {
64-
"forward_regex": compute_regex_site(v),
65-
"reverse_regex": compute_regex_site(reverse_complement(v)),
66-
"consensus_sequence": v,
31+
32+
raw_gateway_sites_greedy = {
33+
**raw_gateway_common,
34+
"attP1": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtacaaaAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
35+
"attP2": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtacaagAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
36+
"attP3": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtataatAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
37+
"attP4": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtatagaAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
38+
"attP5": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTgtatacaAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
39+
}
40+
41+
raw_gateway_sites_conservative = {
42+
**raw_gateway_common,
43+
"attP1": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTgtacaaaAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA",
44+
"attP2": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTgtacaagAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA",
45+
"attP3": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTgtataatAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA",
46+
"attP4": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTgtatagaAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA",
47+
"attP5": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTgtatacaAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA",
6748
}
68-
for k, v in raw_gateway_sites_conservative.items()
69-
}
7049

71-
# From snapgene - ask Valerie
72-
primer_design_attB = {
73-
"attB1": "ACAAGTTTGTACAAAAAAGCAGGCT",
74-
"attB2": "ACCACTTTGTACAAGAAAGCTGGGT",
75-
"attB3": "ACAACTTTGTATAATAAAGTTGTA",
76-
"attB4": "ACAACTTTGTATAGAAAAGTTGTA",
77-
"attB5": "ACAACTTTGTATACAAAAGTTGTA",
78-
}
50+
out_dict = {}
51+
52+
for reaction in ["BP", "LR"]:
53+
left, right = reaction
54+
conservative: list[Recombinase] = []
55+
greedy: list[Recombinase] = []
56+
for i in range(1, 6):
57+
site1 = f"att{left}{i}"
58+
site2 = f"att{right}{i}"
59+
seq1_conservative = raw_gateway_sites_conservative[site1]
60+
seq2_conservative = raw_gateway_sites_conservative[site2]
61+
seq1_greedy = raw_gateway_sites_greedy[site1]
62+
seq2_greedy = raw_gateway_sites_greedy[site2]
63+
conservative.append(
64+
Recombinase(seq1_conservative, seq2_conservative, site1, site2)
65+
)
66+
greedy.append(Recombinase(seq1_greedy, seq2_greedy, site1, site2))
67+
68+
out_dict[reaction] = {
69+
"conservative": conservative,
70+
"greedy": greedy,
71+
}
72+
return out_dict
73+
74+
75+
recombinase_dict = create_recombinase_dict()
7976

8077

8178
def gateway_overlap(
8279
seqx: Dseqrecord, seqy: Dseqrecord, reaction: str, greedy: bool
8380
) -> list[tuple[int, int, int]]:
8481
"""
85-
Find gateway overlaps. If greedy is True, it uses a more greedy consensus site to find attP sites,
86-
which might give false positives
82+
Assembly Algorithm: Find gateway overlaps. If greedy is True, it uses a more greedy consensus site to find attP sites,
83+
which might give false positives.
84+
85+
Parameters
86+
----------
87+
seqx : Dseqrecord
88+
First sequence to find overlaps.
89+
seqy : Dseqrecord
90+
Second sequence to find overlaps.
91+
reaction : str
92+
Type of Gateway reaction (BP or LR).
93+
greedy : bool
94+
If True, use greedy gateway consensus sites.
95+
96+
Returns
97+
-------
98+
list[tuple[int, int, int]] A list of overlaps between the two sequences.
8799
"""
88-
if reaction not in ["BP", "LR"]:
89-
raise ValueError(f"Invalid overlap type: {reaction}")
90-
91-
gateway_sites = gateway_sites_greedy if greedy else gateway_sites_conservative
92-
out = list()
93-
# Iterate over the four possible att sites
94-
for num in range(1, 5):
95-
# Iterate over the two possible orientations
96-
# The sites have to be in the same orientation (fwd + fwd or rev + rev)
97-
for pattern in ["forward_regex", "reverse_regex"]:
98-
# The overlap regex is the same for all types
99-
overlap_regex = gateway_sites[f"overlap_{num}"][pattern]
100-
101-
# Iterate over pairs B, P and P, B for BP and L, R and R, L for LR
102-
for site_x, site_y in zip(reaction, reaction[::-1]):
103-
site_x_regex = gateway_sites[f"att{site_x}{num}"][pattern]
104-
matches_x = list(dseqrecord_finditer(site_x_regex, seqx))
105-
if len(matches_x) == 0:
106-
continue
107-
108-
site_y_regex = gateway_sites[f"att{site_y}{num}"][pattern]
109-
matches_y = list(dseqrecord_finditer(site_y_regex, seqy))
110-
if len(matches_y) == 0:
111-
continue
112-
113-
for match_x, match_y in itertools.product(matches_x, matches_y):
114-
# Find the overlap sequence within each match, and use the
115-
# core 7 pbs that are constant
116-
overlap_x = re.search(overlap_regex, match_x.group())
117-
overlap_y = re.search(overlap_regex, match_y.group())
118-
119-
# Sanity check
120-
assert (
121-
overlap_x is not None and overlap_y is not None
122-
), "Something went wrong, no overlap found within the matches"
123-
124-
out.append(
125-
(
126-
match_x.start() + overlap_x.start() + 3,
127-
match_y.start() + overlap_y.start() + 3,
128-
7,
129-
)
130-
)
131-
132-
return out
100+
type = "greedy" if greedy else "conservative"
101+
recombinases = recombinase_dict[reaction][type]
102+
return sum((r.overlap(seqx, seqy) for r in recombinases), [])
133103

134104

135105
def find_gateway_sites(
136106
seq: Dseqrecord, greedy: bool
137107
) -> dict[str, list[SimpleLocation]]:
138108
"""Find all gateway sites in a sequence and return a dictionary with the name and positions of the sites."""
139-
gateway_sites = gateway_sites_greedy if greedy else gateway_sites_conservative
109+
110+
type = "greedy" if greedy else "conservative"
140111
out = dict()
141-
for site in gateway_sites:
142-
if not site.startswith("att"):
143-
continue
144-
145-
for pattern in ["forward_regex", "reverse_regex"]:
146-
matches = list(dseqrecord_finditer(gateway_sites[site][pattern], seq))
147-
for match in matches:
148-
if site not in out:
149-
out[site] = []
150-
strand = 1 if pattern == "forward_regex" else -1
151-
loc = SimpleLocation(match.start(), match.end(), strand)
152-
loc = shift_location(loc, 0, len(seq))
153-
out[site].append(loc)
112+
for reaction in ["BP", "LR"]:
113+
for rec in recombinase_dict[reaction][type]:
114+
out.update(rec.find(seq))
154115
return out
155116

156117

157118
def annotate_gateway_sites(seq: Dseqrecord, greedy: bool) -> Dseqrecord:
158-
sites = find_gateway_sites(seq, greedy)
159-
for site in sites:
160-
for loc in sites[site]:
161-
seq.features.append(
162-
SeqFeature(loc, type="protein_bind", qualifiers={"label": [site]})
163-
)
119+
"""Annotate gateway sites in a sequence."""
120+
type = "greedy" if greedy else "conservative"
121+
out = seq
122+
for reaction in ["BP", "LR"]:
123+
for rec in recombinase_dict[reaction][type]:
124+
out = rec.annotate(out)
164125
return seq

tests/test_module_assembly2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2638,7 +2638,7 @@ def test_gateway_assembly():
26382638
# Test that greedy is being used (finds)
26392639
with pytest.raises(ValueError) as e:
26402640
assembly.gateway_assembly(products_LR, "LR", greedy=True)
2641-
assert "fragment 2: attB1, attL1, attR1, attP1" in str(e.value)
2641+
assert "fragment 2: attB1, attP1, attL1, attR1" in str(e.value)
26422642

26432643
# Test multi site only
26442644
seq1 = Dseqrecord("aaa" + attB1 + "ggg" + attB2 + "ccc", circular=True)

0 commit comments

Comments
 (0)