diff --git a/src/main/java/com/thealgorithms/compression/BurrowsWheelerTransform.java b/src/main/java/com/thealgorithms/compression/BurrowsWheelerTransform.java new file mode 100644 index 000000000000..a148517e5b55 --- /dev/null +++ b/src/main/java/com/thealgorithms/compression/BurrowsWheelerTransform.java @@ -0,0 +1,220 @@ +package com.thealgorithms.compression; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +/** + * Implementation of the Burrows-Wheeler Transform (BWT) and its inverse. + *

+ * BWT is a reversible data transformation algorithm that rearranges a string into runs of + * similar characters. While not a compression algorithm itself, it significantly improves + * the compressibility of data for subsequent algorithms like Move-to-Front encoding and + * Run-Length Encoding. + *

+ * + *

The transform works by: + *

    + *
  1. Generating all rotations of the input string
  2. + *
  3. Sorting these rotations lexicographically
  4. + *
  5. Taking the last column of the sorted matrix as output
  6. + *
  7. Recording the index of the original string in the sorted matrix
  8. + *
+ *

+ * + *

Important: The input string should end with a unique end-of-string marker + * (typically '$') that: + *

+ * Without this marker, the inverse transform may not correctly reconstruct the original string. + *

+ * + *

Time Complexity: + *

+ *

+ * + *

Example:

+ *
+ * Input:  "banana$"
+ * Output: BWTResult("annb$aa", 4)
+ *         - "annb$aa" is the transformed string (groups similar characters)
+ *         - 4 is the index of the original string in the sorted rotations
+ * 
+ * + * @see Burrows–Wheeler transform (Wikipedia) + */ +public final class BurrowsWheelerTransform { + + private BurrowsWheelerTransform() { + } + + /** + * A container for the result of the forward BWT. + *

+ * Contains the transformed string and the index of the original string + * in the sorted rotations matrix, both of which are required for the + * inverse transformation. + *

+ */ + public static class BWTResult { + /** The transformed string (last column of the sorted rotation matrix) */ + public final String transformed; + + /** The index of the original string in the sorted rotations matrix */ + public final int originalIndex; + + /** + * Constructs a BWTResult with the transformed string and original index. + * + * @param transformed the transformed string (L-column) + * @param originalIndex the index of the original string in sorted rotations + */ + public BWTResult(String transformed, int originalIndex) { + this.transformed = transformed; + this.originalIndex = originalIndex; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + BWTResult bwtResult = (BWTResult) obj; + return originalIndex == bwtResult.originalIndex && transformed.equals(bwtResult.transformed); + } + + @Override + public int hashCode() { + return 31 * transformed.hashCode() + originalIndex; + } + + @Override + public String toString() { + return "BWTResult[transformed=" + transformed + ", originalIndex=" + originalIndex + "]"; + } + } + + /** + * Performs the forward Burrows-Wheeler Transform on the input string. + *

+ * The algorithm generates all cyclic rotations of the input, sorts them + * lexicographically, and returns the last column of this sorted matrix + * along with the position of the original string. + *

+ * + *

Note: It is strongly recommended that the input string ends with + * a unique end-of-string marker (e.g., '$') that is lexicographically smaller + * than any other character in the string. This ensures correct inversion.

+ * + * @param text the input string to transform; must not be {@code null} + * @return a {@link BWTResult} object containing the transformed string (L-column) + * and the index of the original string in the sorted rotations matrix; + * returns {@code BWTResult("", -1)} for empty input + * @throws NullPointerException if {@code text} is {@code null} + */ + public static BWTResult transform(String text) { + if (text == null || text.isEmpty()) { + return new BWTResult("", -1); + } + + int n = text.length(); + + // Generate all rotations of the input string + String[] rotations = new String[n]; + for (int i = 0; i < n; i++) { + rotations[i] = text.substring(i) + text.substring(0, i); + } + + // Sort rotations lexicographically + Arrays.sort(rotations); + int originalIndex = Arrays.binarySearch(rotations, text); + StringBuilder lastColumn = new StringBuilder(n); + for (int i = 0; i < n; i++) { + lastColumn.append(rotations[i].charAt(n - 1)); + } + + return new BWTResult(lastColumn.toString(), originalIndex); + } + + /** + * Performs the inverse Burrows-Wheeler Transform using the LF-mapping technique. + *

+ * The LF-mapping (Last-First mapping) is an efficient method to reconstruct + * the original string from the BWT output without explicitly reconstructing + * the entire sorted rotations matrix. + *

+ * + *

The algorithm works by: + *

    + *
  1. Creating the first column by sorting the BWT string
  2. + *
  3. Building a mapping from first column indices to last column indices
  4. + *
  5. Following this mapping starting from the original index to reconstruct the string
  6. + *
+ *

+ * + * @param bwtString the transformed string (L-column) from the forward transform; must not be {@code null} + * @param originalIndex the index of the original string row from the forward transform; + * use -1 for empty strings + * @return the original, untransformed string; returns empty string if input is empty or {@code originalIndex} is -1 + * @throws NullPointerException if {@code bwtString} is {@code null} + * @throws IllegalArgumentException if {@code originalIndex} is out of valid range (except -1) + */ + public static String inverseTransform(String bwtString, int originalIndex) { + if (bwtString == null || bwtString.isEmpty() || originalIndex == -1) { + return ""; + } + + int n = bwtString.length(); + if (originalIndex < 0 || originalIndex >= n) { + throw new IllegalArgumentException("Original index must be between 0 and " + (n - 1) + ", got: " + originalIndex); + } + + char[] lastColumn = bwtString.toCharArray(); + char[] firstColumn = bwtString.toCharArray(); + Arrays.sort(firstColumn); + + // Create the "next" array for LF-mapping. + // next[i] stores the row index in the last column that corresponds to firstColumn[i] + int[] next = new int[n]; + + // Track the count of each character seen so far in the last column + Map countMap = new HashMap<>(); + + // Store the first occurrence index of each character in the first column + Map firstOccurrence = new HashMap<>(); + + for (int i = 0; i < n; i++) { + if (!firstOccurrence.containsKey(firstColumn[i])) { + firstOccurrence.put(firstColumn[i], i); + } + } + + // Build the LF-mapping + for (int i = 0; i < n; i++) { + char c = lastColumn[i]; + int count = countMap.getOrDefault(c, 0); + int firstIndex = firstOccurrence.get(c); + next[firstIndex + count] = i; + countMap.put(c, count + 1); + } + + // Reconstruct the original string by following the LF-mapping + StringBuilder originalString = new StringBuilder(n); + int currentRow = originalIndex; + for (int i = 0; i < n; i++) { + originalString.append(firstColumn[currentRow]); + currentRow = next[currentRow]; + } + + return originalString.toString(); + } +} diff --git a/src/main/java/com/thealgorithms/compression/MoveToFront.java b/src/main/java/com/thealgorithms/compression/MoveToFront.java new file mode 100644 index 000000000000..fa8976df8262 --- /dev/null +++ b/src/main/java/com/thealgorithms/compression/MoveToFront.java @@ -0,0 +1,164 @@ +package com.thealgorithms.compression; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.LinkedList; +import java.util.List; +import java.util.stream.Collectors; + +/** + * Implementation of the Move-to-Front (MTF) transform and its inverse. + *

+ * MTF is a data transformation algorithm that encodes each symbol in the input + * as its current position in a dynamically-maintained list, then moves that symbol + * to the front of the list. This transformation is particularly effective when used + * after the Burrows-Wheeler Transform (BWT), as BWT groups similar characters together. + *

+ * + *

The transform converts runs of repeated characters into sequences of small integers + * (often zeros), which are highly compressible by subsequent entropy encoding algorithms + * like Run-Length Encoding (RLE) or Huffman coding. This technique is used in the + * bzip2 compression algorithm. + *

+ * + *

How it works: + *

    + *
  1. Maintain a list of symbols (the alphabet), initially in a fixed order
  2. + *
  3. For each input symbol: + *
      + *
    • Output its current index in the list
    • + *
    • Move that symbol to the front of the list
    • + *
    + *
  4. + *
+ * This means frequently occurring symbols quickly move to the front and are encoded + * with small indices (often 0), while rare symbols remain near the back. + *

+ * + *

Time Complexity: + *

+ * Note: Using {@link LinkedList} for O(1) insertions and O(m) search operations. + *

+ * + *

Example:

+ *
+ * Input:    "annb$aa"
+ * Alphabet: "$abn" (initial order)
+ * Output:   [1, 3, 0, 3, 3, 3, 0]
+ *
+ * Step-by-step:
+ * - 'a': index 1 in [$,a,b,n] → output 1, list becomes [a,$,b,n]
+ * - 'n': index 3 in [a,$,b,n] → output 3, list becomes [n,a,$,b]
+ * - 'n': index 0 in [n,a,$,b] → output 0, list stays [n,a,$,b]
+ * - 'b': index 3 in [n,a,$,b] → output 3, list becomes [b,n,a,$]
+ * - etc.
+ *
+ * Notice how repeated 'n' characters produce zeros after the first occurrence!
+ * 
+ * + * @see Move-to-front transform (Wikipedia) + */ +public final class MoveToFront { + + private MoveToFront() { + } + + /** + * Performs the forward Move-to-Front transform. + *

+ * Converts the input string into a list of integers, where each integer represents + * the position of the corresponding character in a dynamically-maintained alphabet list. + *

+ * + *

Note: All characters in the input text must exist in the provided alphabet, + * otherwise an {@link IllegalArgumentException} is thrown. The alphabet should contain + * all unique characters that may appear in the input.

+ * + * @param text the input string to transform; if empty, returns an empty list + * @param initialAlphabet a string containing the initial ordered set of symbols + * (e.g., "$abn" or the full ASCII set); must not be empty + * when {@code text} is non-empty + * @return a list of integers representing the transformed data, where each integer + * is the index of the corresponding input character in the current alphabet state + * @throws IllegalArgumentException if {@code text} is non-empty and {@code initialAlphabet} + * is {@code null} or empty + * @throws IllegalArgumentException if any character in {@code text} is not found in + * {@code initialAlphabet} + */ + public static List transform(String text, String initialAlphabet) { + if (text == null || text.isEmpty()) { + return new ArrayList<>(); + } + if (initialAlphabet == null || initialAlphabet.isEmpty()) { + throw new IllegalArgumentException("Alphabet cannot be null or empty when text is not empty."); + } + + List output = new ArrayList<>(text.length()); + + // Use LinkedList for O(1) add-to-front and O(n) remove operations + // This is more efficient than ArrayList for the move-to-front pattern + List alphabet = initialAlphabet.chars().mapToObj(c -> (char) c).collect(Collectors.toCollection(LinkedList::new)); + + for (char c : text.toCharArray()) { + int index = alphabet.indexOf(c); + if (index == -1) { + throw new IllegalArgumentException("Symbol '" + c + "' not found in the initial alphabet."); + } + + output.add(index); + + // Move the character to the front + Character symbol = alphabet.remove(index); + alphabet.addFirst(symbol); + } + return output; + } + + /** + * Performs the inverse Move-to-Front transform. + *

+ * Reconstructs the original string from the list of indices produced by the + * forward transform. This requires the exact same initial alphabet that was + * used in the forward transform. + *

+ * + *

Important: The {@code initialAlphabet} parameter must be identical + * to the one used in the forward transform, including character order, or the + * output will be incorrect.

+ * + * @param indices The list of integers from the forward transform. + * @param initialAlphabet the exact same initial alphabet string used for the forward transform; + * if {@code null} or empty, returns an empty string + * @return the original, untransformed string + * @throws IllegalArgumentException if any index in {@code indices} is negative or + * exceeds the current alphabet size + */ + public static String inverseTransform(Collection indices, String initialAlphabet) { + if (indices == null || indices.isEmpty() || initialAlphabet == null || initialAlphabet.isEmpty()) { + return ""; + } + + StringBuilder output = new StringBuilder(indices.size()); + + // Use LinkedList for O(1) add-to-front and O(n) remove operations + List alphabet = initialAlphabet.chars().mapToObj(c -> (char) c).collect(Collectors.toCollection(LinkedList::new)); + + for (int index : indices) { + if (index < 0 || index >= alphabet.size()) { + throw new IllegalArgumentException("Index " + index + " is out of bounds for the current alphabet of size " + alphabet.size() + "."); + } + + // Get the symbol at the index + char symbol = alphabet.get(index); + output.append(symbol); + + // Move the symbol to the front (mirroring the forward transform) + alphabet.remove(index); + alphabet.addFirst(symbol); + } + return output.toString(); + } +} diff --git a/src/test/java/com/thealgorithms/compression/BurrowsWheelerTransformTest.java b/src/test/java/com/thealgorithms/compression/BurrowsWheelerTransformTest.java new file mode 100644 index 000000000000..b6e10e0d796d --- /dev/null +++ b/src/test/java/com/thealgorithms/compression/BurrowsWheelerTransformTest.java @@ -0,0 +1,124 @@ +package com.thealgorithms.compression; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.junit.jupiter.api.Test; + +public class BurrowsWheelerTransformTest { + + @Test + public void testTransformAndInverseBanana() { + String original = "banana$"; + BurrowsWheelerTransform.BWTResult expectedTransform = new BurrowsWheelerTransform.BWTResult("annb$aa", 4); + + // Test forward transform + BurrowsWheelerTransform.BWTResult actualTransform = BurrowsWheelerTransform.transform(original); + assertEquals(expectedTransform, actualTransform); + + // Test inverse transform + String reconstructed = BurrowsWheelerTransform.inverseTransform(actualTransform.transformed, actualTransform.originalIndex); + assertEquals(original, reconstructed); + } + + @Test + public void testTransformAndInverseAbracadabra() { + String original = "abracadabra$"; + BurrowsWheelerTransform.BWTResult expectedTransform = new BurrowsWheelerTransform.BWTResult("ard$rcaaaabb", 3); + + // Test forward transform + BurrowsWheelerTransform.BWTResult actualTransform = BurrowsWheelerTransform.transform(original); + assertEquals(expectedTransform, actualTransform); + + // Test inverse transform + String reconstructed = BurrowsWheelerTransform.inverseTransform(actualTransform.transformed, actualTransform.originalIndex); + assertEquals(original, reconstructed); + } + + @Test + public void testTransformAndInverseSixMixPixFix() { + String original = "SIX.MIX.PIX.FIX$"; + BurrowsWheelerTransform.BWTResult expectedTransform = new BurrowsWheelerTransform.BWTResult("XXXX.FPSM..$IIII", 11); + + // Test forward transform + BurrowsWheelerTransform.BWTResult actualTransform = BurrowsWheelerTransform.transform(original); + assertEquals(expectedTransform, actualTransform); + + // Test inverse transform + String reconstructed = BurrowsWheelerTransform.inverseTransform(actualTransform.transformed, actualTransform.originalIndex); + assertEquals(original, reconstructed); + } + + @Test + public void testEmptyString() { + String original = ""; + BurrowsWheelerTransform.BWTResult expectedTransform = new BurrowsWheelerTransform.BWTResult("", -1); + + BurrowsWheelerTransform.BWTResult actualTransform = BurrowsWheelerTransform.transform(original); + assertEquals(expectedTransform, actualTransform); + + String reconstructed = BurrowsWheelerTransform.inverseTransform(actualTransform.transformed, actualTransform.originalIndex); + assertEquals(original, reconstructed); + } + + @Test + public void testSingleCharacter() { + String original = "a"; + BurrowsWheelerTransform.BWTResult expectedTransform = new BurrowsWheelerTransform.BWTResult("a", 0); + + BurrowsWheelerTransform.BWTResult actualTransform = BurrowsWheelerTransform.transform(original); + assertEquals(expectedTransform, actualTransform); + + String reconstructed = BurrowsWheelerTransform.inverseTransform(actualTransform.transformed, actualTransform.originalIndex); + assertEquals(original, reconstructed); + } + + @Test + public void testTransformNull() { + assertEquals(new BurrowsWheelerTransform.BWTResult("", -1), BurrowsWheelerTransform.transform(null)); + } + + @Test + public void testInverseTransformNullString() { + // bwtString == null + assertEquals("", BurrowsWheelerTransform.inverseTransform(null, 1)); + // bwtString.isEmpty() + assertEquals("", BurrowsWheelerTransform.inverseTransform("", 0)); + } + + @Test + public void testInverseTransformIndexOutOfBounds() { + String bwt = "annb$aa"; + int n = bwt.length(); // n = 7 + + // originalIndex >= n + assertThrows(IllegalArgumentException.class, () -> BurrowsWheelerTransform.inverseTransform(bwt, n)); + assertThrows(IllegalArgumentException.class, () -> BurrowsWheelerTransform.inverseTransform(bwt, 8)); + + // originalIndex < 0 + assertThrows(IllegalArgumentException.class, () -> BurrowsWheelerTransform.inverseTransform(bwt, -2)); + } + + @Test + public void testBWTResultHelpers() { + BurrowsWheelerTransform.BWTResult res1 = new BurrowsWheelerTransform.BWTResult("annb$aa", 4); + BurrowsWheelerTransform.BWTResult res2 = new BurrowsWheelerTransform.BWTResult("annb$aa", 4); + BurrowsWheelerTransform.BWTResult res3 = new BurrowsWheelerTransform.BWTResult("other", 4); + BurrowsWheelerTransform.BWTResult res4 = new BurrowsWheelerTransform.BWTResult("annb$aa", 1); + + assertEquals(res1, res1); + assertEquals(res1, res2); + assertNotEquals(res1, null); // obj == null + assertNotEquals(res1, new Object()); // different class + assertNotEquals(res1, res3); // different transformed + assertNotEquals(res1, res4); // different originalIndex + + assertEquals(res1.hashCode(), res2.hashCode()); + assertNotEquals(res1.hashCode(), res3.hashCode()); + + assertTrue(res1.toString().contains("annb$aa")); + assertTrue(res1.toString().contains("originalIndex=4")); + } +} diff --git a/src/test/java/com/thealgorithms/compression/MoveToFrontTest.java b/src/test/java/com/thealgorithms/compression/MoveToFrontTest.java new file mode 100644 index 000000000000..42ef6c9cd675 --- /dev/null +++ b/src/test/java/com/thealgorithms/compression/MoveToFrontTest.java @@ -0,0 +1,92 @@ +package com.thealgorithms.compression; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.util.List; +import org.junit.jupiter.api.Test; + +public class MoveToFrontTest { + + @Test + public void testTransformAndInverseBananaExample() { + String original = "annb$aa"; + String alphabet = "$abn"; + List expectedTransform = List.of(1, 3, 0, 3, 3, 3, 0); + + // Test forward transform + List actualTransform = MoveToFront.transform(original, alphabet); + assertEquals(expectedTransform, actualTransform); + + // Test inverse transform + String reconstructed = MoveToFront.inverseTransform(actualTransform, alphabet); + assertEquals(original, reconstructed); + } + + @Test + public void testTransformAndInverseCabaaExample() { + String original = "cabaa"; + String alphabet = "abcdef"; + List expectedTransform = List.of(2, 1, 2, 1, 0); + + // Test forward transform + List actualTransform = MoveToFront.transform(original, alphabet); + assertEquals(expectedTransform, actualTransform); + + // Test inverse transform + String reconstructed = MoveToFront.inverseTransform(actualTransform, alphabet); + assertEquals(original, reconstructed); + } + + @Test + public void testEmptyInput() { + String original = ""; + String alphabet = "abc"; + List expectedTransform = List.of(); + + List actualTransform = MoveToFront.transform(original, alphabet); + assertEquals(expectedTransform, actualTransform); + + String reconstructed = MoveToFront.inverseTransform(actualTransform, alphabet); + assertEquals(original, reconstructed); + } + + @Test + public void testEmptyAlphabet() { + assertThrows(IllegalArgumentException.class, () -> MoveToFront.transform("abc", "")); + + assertEquals("", MoveToFront.inverseTransform(List.of(1, 2), "")); + } + + @Test + public void testSymbolNotInAlphabet() { + // 'd' is not in "abc" + assertThrows(IllegalArgumentException.class, () -> MoveToFront.transform("abd", "abc")); + } + + @Test + public void testIndexOutOfBounds() { + // Index 5 is out of bounds for alphabet "abc" + // 1. test index >= alphabet.size() + assertThrows(IllegalArgumentException.class, () -> MoveToFront.inverseTransform(List.of(1, 2, 5), "abc")); + + // 2. test index < 0 + assertThrows(IllegalArgumentException.class, () -> MoveToFront.inverseTransform(List.of(1, -1, 2), "abc")); + } + + @Test + public void testTransformNull() { + List expected = List.of(); + assertEquals(expected, MoveToFront.transform(null, "abc")); + assertThrows(IllegalArgumentException.class, () -> MoveToFront.transform("abc", null)); + } + + @Test + public void testInverseTransformNulls() { + // 1. test indices == null + assertEquals("", MoveToFront.inverseTransform(null, "abc")); + + // 2. test initialAlphabet == null + assertEquals("", MoveToFront.inverseTransform(List.of(1, 2), null)); + } +}