diff --git a/src/main/java/com/thealgorithms/dynamicprogramming/Nussinov.java b/src/main/java/com/thealgorithms/dynamicprogramming/Nussinov.java new file mode 100644 index 000000000000..0cb808705091 --- /dev/null +++ b/src/main/java/com/thealgorithms/dynamicprogramming/Nussinov.java @@ -0,0 +1,166 @@ +import es.uma.eda.problem.combinatorial.sequence.folding.RNASecondaryStructurePredictor; +/** Download the required library here: https://drive.google.com/file/d/1L9ZU3_D3FODd2B22cr0eGBKeyDmsKog3/view?usp=sharing **/ +/** + * Implementation of the Nussinov algorithm for RNA secondary structure prediction by + * base-pair maximization using dynamic programming. + */ +public class Nussinov extends RNASecondaryStructurePredictor { + /** + * Strings describing compatible base pairs: basepairs1(i) is compatible with basepairs2(i) + */ + private final static String basepairs1 = "AUUCGG"; + private final static String basepairs2 = "UAGGCU"; + + /** + * Default constructor + */ + public Nussinov() { + } + + /** + * Returns true iff nucleotides b1 and b2 are compatible for pairing + * @param b1 a nucleotide (A, C, G, U) + * @param b2 a nucleotide (A, C, G, U) + * @return true iff nucleotides b1 and b2 are compatible for pairing + */ + protected boolean isCompatible (char b1, char b2) { + b1 = Character.toUpperCase(b1); + b2 = Character.toUpperCase(b2); + for (int i=0; i unpaired + } + } + + // General case: M(i,j) = max(M(i,j-1), max (M(i, k-1) + M(k+1,j-1) + 1 | i <= k < j-minLoopSize, s_k and s_j are complementary)) if j > i+minLoopSize + for (int l = minLoopSize + 1; l < n; l++) { // iterate possible lengths (j - i) + for (int i = 0; i + l < n; i++) { // iterate through sequence + int j = i + l; + M[i][j + 1] = M[i][j]; // option 1: do not pair s_j + D[i][j] = -1; // initially, unpaired + + // Option 2: find max possible pairing with previous nucleotides + for (int k = i; k < j - minLoopSize; k++) { + if (B[k][j]) { // if nucleotides are compatible + int pairs = M[i][k] + M[k + 1][j] + 1; + if (pairs > M[i][j + 1]) { + M[i][j + 1] = pairs; + D[i][j] = k; // pair s_k with s_j + } + } + } + } + } + + if (verbosity) { + System.out.println("\nScore matrix:\n"); + + System.out.print(" \t|"); + for (int j=0; j<=n; j++) + System.out.print("\t"+(j-1)); + System.out.print("\n \t|\t "); + for (int j=1; j<=n; j++) + System.out.print("\t"+rnaSeq.charAt(j-1)); + System.out.print("\n---- \t+"); + for (int j=0; j<=n; j++) + System.out.print("\t----"); + for (int i=0; i= j) { + return ".".repeat(Math.max(0, j - i + 1)); // no possible pairs + } + + if (D[i][j] == -1) { + return reconstructSolution(D, i, j - 1) + "."; // no pairing at j + } else { + int k = D[i][j]; + return reconstructSolution(D, i, k - 1) + "(" + reconstructSolution(D, k + 1, j - 1) + ")"; // pair s_k with s_j + } + } + + @Override + public double evaluate(String rnaSeq, String folding) { + if (rnaSeq.length() != folding.length()) // error: the folding string does not match the RNA sequence + return -1.0; + double contacts = 0.0; + int n = folding.length(); + int open = 0; + + for (int i=0; i 0) // error: parentheses are not balanced + contacts = -1.0; + return contacts; + } + +} diff --git a/src/test/java/com/thealgorithms/dynamicprogramming/TestRNAFolding.java b/src/test/java/com/thealgorithms/dynamicprogramming/TestRNAFolding.java new file mode 100644 index 000000000000..017611534987 --- /dev/null +++ b/src/test/java/com/thealgorithms/dynamicprogramming/TestRNAFolding.java @@ -0,0 +1,124 @@ +package com.thealgorithms.dynamicprogramming; + +import java.io.IOException; +import java.util.List; + +import es.uma.eda.AlgorithmUtil; +import es.uma.eda.problem.combinatorial.sequence.SequenceUtil; +import es.uma.eda.problem.combinatorial.sequence.folding.RNASecondaryStructurePredictor; + +/** + * Main class for testing RNA folding algorithms + * @author ccottap + * + */ +public class TestRNAFolding { + + /** + * Filename for storing statistics + */ + private final static String outputFilename = "folding.txt"; + /** + * how much the size of sequences is scaled-up on each step + */ + private final static double scaleFactor = 1.5; + + /** + * Finds the best folding of an RNA sequence + * @param args command-line arguments (to read data from file or generate at random) + * @throws IOException if data cannot be read from file or statistics cannot be written to file + */ + public static void main(String[] args) throws IOException { + if (args.length<1) { + System.out.println("java TestRNAFolding (-s|-f|-r) "); + System.out.println("\t-s: folds a string given as parameter"); + System.out.println("\t-f: performs a batch test from a file"); + System.out.println("\t-r: measures time"); + } + else { + List sequences = null; + RNASecondaryStructurePredictor predictor = new Nussinov(); + + switch (args[0]) { + case "-s": + if (args.length < 3) { + System.out.println("java TestRNAFolding -s "); + System.exit(-1); + } + else { + String f = predictor.run(args[1], Integer.parseInt(args[2])); + System.out.println("Folding " + args[1] + "..."); + System.out.println("Result:\n\t" + (int)predictor.evaluate(args[1],f) + " base pairs\n\t" + args[1] + "\n\t" + f); + } + break; + + case "-f": + if (args.length < 3) { + System.out.println("java TestRNAFolding -f "); + System.exit(-1); + } + else { + sequences = SequenceUtil.readSequencesFromFile(args[1]); + predictor.setVerbosity(false); + for (String s: sequences) { + String f = predictor.run(s, Integer.parseInt(args[2])); + System.out.println((int)predictor.evaluate(s, f)); + } + } + break; + + case "-r": + if (args.length < 5) { + System.out.println("java TestRNAFolding -r "); + System.exit(-1); + } + else { + int initialLength = Integer.parseUnsignedInt(args[1]); + int doublings = Integer.parseUnsignedInt(args[2]); + int testsPerLength = Integer.parseUnsignedInt(args[3]); + int minLoopSize = Integer.parseUnsignedInt(args[4]); + double[][] statistics = runTimer(predictor, initialLength, doublings, testsPerLength, minLoopSize); + AlgorithmUtil.writeStats(outputFilename, statistics); + } + break; + + default: + System.out.println("Wrong argument: " + args[0]); + System.exit(-1); + } + } + + } + + /** + * Performs a series of timed experiments with sequences of different sizes + * @param predictor the structure prediction algorithm + * @param initialLength initial length of sequences + * @param doublings number of times the sequence length is doubled + * @param testsPerLength number of tests per sequence length + * @param minLoopSize minimum number of nucleotides enclosed in loops + * @return a matrix with computational times (one row per size, one column per test). + */ + private static double[][] runTimer(RNASecondaryStructurePredictor predictor , int initialLength, int doublings, int testsPerLength, int minLoopSize) { + final String alphabet = "ACGU"; + double[][] statistics = new double[doublings][testsPerLength+1]; + double interval; + + predictor.setVerbosity(false); + for (int l = initialLength, dup = 0; dup < doublings; l *= scaleFactor, dup++) { + statistics[dup][0] = l; + System.out.println("Trying sequences of length " + l); + for (int j=0; j