|
| 1 | +/* |
| 2 | +Package pcr designs and simulates simple PCR reactions. |
| 3 | +
|
| 4 | +PCR, or polymerase chain reaction, is a method developed in 1983 to copy DNA |
| 5 | +templates using small fragments of synthesized single-stranded DNA, amplifying |
| 6 | +those DNA templates to ~x1,000,000,000 their starting concentration. These small |
| 7 | +fragments, referred to as "primers" or "oligos", can be designed on a computer |
| 8 | +and then synthesized for amplifying a variety of different templates. |
| 9 | +
|
| 10 | +This package allows users to simulate a PCR reaction or design new primers to |
| 11 | +amplify a given template. This package assumes perfect annealing to template at |
| 12 | +a target temperature, so should only be used for PCR reactions where this is a |
| 13 | +reasonable assumption. |
| 14 | +
|
| 15 | +If you are trying to simulate amplification out of a large pool, such as an |
| 16 | +oligo pool, use the `Simulate` rather than `SimulateSimple` function to detect |
| 17 | +if there is concatemerization happening in your multiplex reaction. In most |
| 18 | +other cases, use `SimulateSimple`. |
| 19 | +
|
| 20 | +IMPORTANT! The targetTm in all functions is specifically for Taq polymerase. |
| 21 | +*/ |
| 22 | +package pcr |
| 23 | + |
| 24 | +import ( |
| 25 | + "errors" |
| 26 | + "github.com/TimothyStiles/poly/primers" |
| 27 | + "github.com/TimothyStiles/poly/transform" |
| 28 | + "index/suffixarray" |
| 29 | + "sort" |
| 30 | + "strings" |
| 31 | +) |
| 32 | + |
| 33 | +// https://doi.org/10.1089/dna.1994.13.75 |
| 34 | +var minimalPrimerLength int = 15 |
| 35 | + |
| 36 | +// DesignPrimersWithOverhangs designs two primers to amplify a target sequence, |
| 37 | +// adding on an overhang to the forward and reverse strand. This overhang can |
| 38 | +// contain additional DNA needed for assembly, like Gibson assembly overhangs |
| 39 | +// or GoldenGate restriction enzyme sites. |
| 40 | +func DesignPrimersWithOverhangs(sequence, forwardOverhang, reverseOverhang string, targetTm float64) (string, string) { |
| 41 | + sequence = strings.ToUpper(sequence) |
| 42 | + forwardPrimer := sequence[0:minimalPrimerLength] |
| 43 | + for additionalNucleotides := 0; primers.MeltingTemp(forwardPrimer) < targetTm; additionalNucleotides++ { |
| 44 | + forwardPrimer = sequence[0 : minimalPrimerLength+additionalNucleotides] |
| 45 | + } |
| 46 | + reversePrimer := transform.ReverseComplement(sequence[len(sequence)-minimalPrimerLength:]) |
| 47 | + for additionalNucleotides := 0; primers.MeltingTemp(reversePrimer) < targetTm; additionalNucleotides++ { |
| 48 | + reversePrimer = transform.ReverseComplement(sequence[len(sequence)-(minimalPrimerLength+additionalNucleotides):]) |
| 49 | + } |
| 50 | + |
| 51 | + // Add overhangs to primer |
| 52 | + forwardPrimer = forwardOverhang + forwardPrimer |
| 53 | + reversePrimer = transform.ReverseComplement(reverseOverhang) + reversePrimer |
| 54 | + |
| 55 | + return forwardPrimer, reversePrimer |
| 56 | +} |
| 57 | + |
| 58 | +// DesignPrimers designs two primers to amplify a target sequence and only that |
| 59 | +// target sequence (no overhangs). |
| 60 | +func DesignPrimers(sequence string, targetTm float64) (string, string) { |
| 61 | + return DesignPrimersWithOverhangs(sequence, "", "", targetTm) |
| 62 | +} |
| 63 | + |
| 64 | +// SimulateSimple simulates a PCR reaction. It takes in a list of sequences and |
| 65 | +// a list of primers, with support for complex multiplex reactions, produces |
| 66 | +// a list of all possible PCR fragments from such a reaction. It does not |
| 67 | +// detect concatemerization, which could be useful or very detrimental to |
| 68 | +// your reactions. The variable `circular` is for if the target template is |
| 69 | +// circular, like a plasmid. |
| 70 | +func SimulateSimple(sequences []string, targetTm float64, circular bool, primerList []string) []string { |
| 71 | + // Set all primers to uppercase. |
| 72 | + for primerIndex := range primerList { |
| 73 | + primerList[primerIndex] = strings.ToUpper(primerList[primerIndex]) |
| 74 | + } |
| 75 | + |
| 76 | + var pcrFragments []string |
| 77 | + for _, sequence := range sequences { |
| 78 | + sequence = strings.ToUpper(sequence) |
| 79 | + // Suffix array construction allows function to operate on |
| 80 | + // very large sequences without being worried about exeuction |
| 81 | + // time. For small sequences, it doesn't really matter. |
| 82 | + // https://eli.thegreenplace.net/2016/suffix-arrays-in-the-go-standard-library/ |
| 83 | + sequenceIndex := suffixarray.New([]byte(sequence)) |
| 84 | + |
| 85 | + primerLength := len(primerList) |
| 86 | + |
| 87 | + forwardLocations := make(map[int][]int) |
| 88 | + reverseLocations := make(map[int][]int) |
| 89 | + minimalPrimers := make([]string, primerLength) |
| 90 | + for primerIndex, primer := range primerList { |
| 91 | + var minimalLength int |
| 92 | + for index := minimalPrimerLength; primers.MeltingTemp(primer[len(primer)-index:]) < targetTm; index++ { |
| 93 | + minimalLength = index |
| 94 | + if primer[len(primer)-index:] == primer { |
| 95 | + break |
| 96 | + } |
| 97 | + } |
| 98 | + // Use the minimal binding sites of the primer to find positions in the template |
| 99 | + minimalPrimer := primer[len(primer)-minimalLength:] |
| 100 | + if minimalPrimer != primer { |
| 101 | + minimalPrimers[primerIndex] = minimalPrimer |
| 102 | + // For each primer, we want to look for all possible binding sites in our gene. |
| 103 | + // We then append this to a list of binding sites for that primer. |
| 104 | + for _, forwardLocation := range sequenceIndex.Lookup([]byte(minimalPrimer), -1) { |
| 105 | + forwardLocations[forwardLocation] = append(forwardLocations[forwardLocation], primerIndex) |
| 106 | + } |
| 107 | + for _, reverseLocation := range sequenceIndex.Lookup([]byte(transform.ReverseComplement(minimalPrimer)), -1) { |
| 108 | + reverseLocations[reverseLocation] = append(reverseLocations[reverseLocation], primerIndex) |
| 109 | + } |
| 110 | + } |
| 111 | + } |
| 112 | + |
| 113 | + var forwardLocationInts []int |
| 114 | + var reverseLocationInts []int |
| 115 | + for key := range forwardLocations { |
| 116 | + forwardLocationInts = append(forwardLocationInts, key) |
| 117 | + } |
| 118 | + for key := range reverseLocations { |
| 119 | + reverseLocationInts = append(reverseLocationInts, key) |
| 120 | + } |
| 121 | + sort.Ints(forwardLocationInts) |
| 122 | + sort.Ints(reverseLocationInts) |
| 123 | + |
| 124 | + // Next, iterate through the forwardLocations list |
| 125 | + for index, forwardLocation := range forwardLocationInts { |
| 126 | + // First, make sure that this isn't the last element in forwardLocations |
| 127 | + if index+1 != len(forwardLocationInts) { |
| 128 | + // If this isn't the last element in forwardLocations, then we can select the first reverseLocation that is less than the next forwardLocation |
| 129 | + for _, reverseLocation := range reverseLocationInts { |
| 130 | + if (forwardLocation < reverseLocation) && (reverseLocation < forwardLocationInts[index+1]) { |
| 131 | + // If both are true, we have found the sequence we are aiming to PCR! Now, we get all primers from that forwardLocation and then |
| 132 | + // build PCR fragments with each one. |
| 133 | + pcrFragments = append(pcrFragments, generatePcrFragments(sequence, forwardLocation, reverseLocation, forwardLocations[forwardLocation], reverseLocations[reverseLocation], minimalPrimers, primerList)...) |
| 134 | + break |
| 135 | + } |
| 136 | + } |
| 137 | + } else { |
| 138 | + foundFragment := false |
| 139 | + for _, reverseLocation := range reverseLocationInts { |
| 140 | + if forwardLocation < reverseLocation { |
| 141 | + pcrFragments = append(pcrFragments, generatePcrFragments(sequence, forwardLocation, reverseLocation, forwardLocations[forwardLocation], reverseLocations[reverseLocation], minimalPrimers, primerList)...) |
| 142 | + foundFragment = true |
| 143 | + } |
| 144 | + } |
| 145 | + // If the sequence is circular and we haven't found a fragment yet, check the other side of the origin |
| 146 | + if circular && !foundFragment { |
| 147 | + for _, reverseLocation := range reverseLocationInts { |
| 148 | + if forwardLocationInts[0] > reverseLocation { |
| 149 | + // If either one of these are true, create a new pcrFragment and append to pcrFragments |
| 150 | + rotatedSequence := sequence[forwardLocation:] + sequence[:forwardLocation] |
| 151 | + rotatedForwardLocation := 0 |
| 152 | + rotatedReverseLocation := len(sequence[forwardLocation:]) + reverseLocation |
| 153 | + pcrFragments = append(pcrFragments, generatePcrFragments(rotatedSequence, rotatedForwardLocation, rotatedReverseLocation, forwardLocations[forwardLocation], reverseLocations[reverseLocation], minimalPrimers, primerList)...) |
| 154 | + |
| 155 | + } |
| 156 | + } |
| 157 | + } |
| 158 | + } |
| 159 | + } |
| 160 | + } |
| 161 | + return pcrFragments |
| 162 | +} |
| 163 | + |
| 164 | +// Simulate simulates a PCR reaction, including concatemerization analysis. It |
| 165 | +// takes in a list of sequences and list of primers, produces all possible PCR |
| 166 | +// fragments in a given reaction, and then attempts to see if the output |
| 167 | +// fragments can amplify themselves. If they can, concatemerization is occuring |
| 168 | +// in your reaction, which can lead to confusing results. The variable |
| 169 | +// `circular` is for if the target template is circular, like a plasmid. |
| 170 | +func Simulate(sequences []string, targetTm float64, circular bool, primerList []string) ([]string, error) { |
| 171 | + initialAmplification := SimulateSimple(sequences, targetTm, circular, primerList) |
| 172 | + subsequentAmplification := SimulateSimple(sequences, targetTm, circular, append(primerList, initialAmplification...)) |
| 173 | + if len(initialAmplification) != len(subsequentAmplification) { |
| 174 | + return initialAmplification, errors.New("Concatemerization detected in PCR.") |
| 175 | + } |
| 176 | + return initialAmplification, nil |
| 177 | +} |
| 178 | + |
| 179 | +func generatePcrFragments(sequence string, forwardLocation int, reverseLocation int, forwardPrimerIndxs []int, reversePrimerIndxs []int, minimalPrimers []string, primerList []string) []string { |
| 180 | + var pcrFragments []string |
| 181 | + for forwardPrimerIndex := range forwardPrimerIndxs { |
| 182 | + minimalPrimer := minimalPrimers[forwardPrimerIndex] |
| 183 | + fullPrimerForward := primerList[forwardPrimerIndex] |
| 184 | + for _, reversePrimerIndex := range reversePrimerIndxs { |
| 185 | + fullPrimerReverse := transform.ReverseComplement(primerList[reversePrimerIndex]) |
| 186 | + pcrFragment := fullPrimerForward[:len(fullPrimerForward)-len(minimalPrimer)] + sequence[forwardLocation:reverseLocation] + fullPrimerReverse |
| 187 | + pcrFragments = append(pcrFragments, pcrFragment) |
| 188 | + } |
| 189 | + } |
| 190 | + return pcrFragments |
| 191 | +} |
0 commit comments