diff --git a/src/main/java/com/thealgorithms/compression/BurrowsWheelerTransform.java b/src/main/java/com/thealgorithms/compression/BurrowsWheelerTransform.java new file mode 100644 index 000000000000..a148517e5b55 --- /dev/null +++ b/src/main/java/com/thealgorithms/compression/BurrowsWheelerTransform.java @@ -0,0 +1,220 @@ +package com.thealgorithms.compression; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +/** + * Implementation of the Burrows-Wheeler Transform (BWT) and its inverse. + *
+ * BWT is a reversible data transformation algorithm that rearranges a string into runs of + * similar characters. While not a compression algorithm itself, it significantly improves + * the compressibility of data for subsequent algorithms like Move-to-Front encoding and + * Run-Length Encoding. + *
+ * + *The transform works by: + *
Important: The input string should end with a unique end-of-string marker + * (typically '$') that: + *
Time Complexity: + *
Example:
+ *
+ * Input: "banana$"
+ * Output: BWTResult("annb$aa", 4)
+ * - "annb$aa" is the transformed string (groups similar characters)
+ * - 4 is the index of the original string in the sorted rotations
+ *
+ *
+ * @see Burrows–Wheeler transform (Wikipedia)
+ */
+public final class BurrowsWheelerTransform {
+
+ private BurrowsWheelerTransform() {
+ }
+
+ /**
+ * A container for the result of the forward BWT.
+ * + * Contains the transformed string and the index of the original string + * in the sorted rotations matrix, both of which are required for the + * inverse transformation. + *
+ */ + public static class BWTResult { + /** The transformed string (last column of the sorted rotation matrix) */ + public final String transformed; + + /** The index of the original string in the sorted rotations matrix */ + public final int originalIndex; + + /** + * Constructs a BWTResult with the transformed string and original index. + * + * @param transformed the transformed string (L-column) + * @param originalIndex the index of the original string in sorted rotations + */ + public BWTResult(String transformed, int originalIndex) { + this.transformed = transformed; + this.originalIndex = originalIndex; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + BWTResult bwtResult = (BWTResult) obj; + return originalIndex == bwtResult.originalIndex && transformed.equals(bwtResult.transformed); + } + + @Override + public int hashCode() { + return 31 * transformed.hashCode() + originalIndex; + } + + @Override + public String toString() { + return "BWTResult[transformed=" + transformed + ", originalIndex=" + originalIndex + "]"; + } + } + + /** + * Performs the forward Burrows-Wheeler Transform on the input string. + *+ * The algorithm generates all cyclic rotations of the input, sorts them + * lexicographically, and returns the last column of this sorted matrix + * along with the position of the original string. + *
+ * + *Note: It is strongly recommended that the input string ends with + * a unique end-of-string marker (e.g., '$') that is lexicographically smaller + * than any other character in the string. This ensures correct inversion.
+ * + * @param text the input string to transform; must not be {@code null} + * @return a {@link BWTResult} object containing the transformed string (L-column) + * and the index of the original string in the sorted rotations matrix; + * returns {@code BWTResult("", -1)} for empty input + * @throws NullPointerException if {@code text} is {@code null} + */ + public static BWTResult transform(String text) { + if (text == null || text.isEmpty()) { + return new BWTResult("", -1); + } + + int n = text.length(); + + // Generate all rotations of the input string + String[] rotations = new String[n]; + for (int i = 0; i < n; i++) { + rotations[i] = text.substring(i) + text.substring(0, i); + } + + // Sort rotations lexicographically + Arrays.sort(rotations); + int originalIndex = Arrays.binarySearch(rotations, text); + StringBuilder lastColumn = new StringBuilder(n); + for (int i = 0; i < n; i++) { + lastColumn.append(rotations[i].charAt(n - 1)); + } + + return new BWTResult(lastColumn.toString(), originalIndex); + } + + /** + * Performs the inverse Burrows-Wheeler Transform using the LF-mapping technique. + *+ * The LF-mapping (Last-First mapping) is an efficient method to reconstruct + * the original string from the BWT output without explicitly reconstructing + * the entire sorted rotations matrix. + *
+ * + *The algorithm works by: + *
+ * MTF is a data transformation algorithm that encodes each symbol in the input + * as its current position in a dynamically-maintained list, then moves that symbol + * to the front of the list. This transformation is particularly effective when used + * after the Burrows-Wheeler Transform (BWT), as BWT groups similar characters together. + *
+ * + *The transform converts runs of repeated characters into sequences of small integers + * (often zeros), which are highly compressible by subsequent entropy encoding algorithms + * like Run-Length Encoding (RLE) or Huffman coding. This technique is used in the + * bzip2 compression algorithm. + *
+ * + *How it works: + *
Time Complexity: + *
Example:
+ *+ * Input: "annb$aa" + * Alphabet: "$abn" (initial order) + * Output: [1, 3, 0, 3, 3, 3, 0] + * + * Step-by-step: + * - 'a': index 1 in [$,a,b,n] → output 1, list becomes [a,$,b,n] + * - 'n': index 3 in [a,$,b,n] → output 3, list becomes [n,a,$,b] + * - 'n': index 0 in [n,a,$,b] → output 0, list stays [n,a,$,b] + * - 'b': index 3 in [n,a,$,b] → output 3, list becomes [b,n,a,$] + * - etc. + * + * Notice how repeated 'n' characters produce zeros after the first occurrence! + *+ * + * @see Move-to-front transform (Wikipedia) + */ +public final class MoveToFront { + + private MoveToFront() { + } + + /** + * Performs the forward Move-to-Front transform. + *
+ * Converts the input string into a list of integers, where each integer represents + * the position of the corresponding character in a dynamically-maintained alphabet list. + *
+ * + *Note: All characters in the input text must exist in the provided alphabet, + * otherwise an {@link IllegalArgumentException} is thrown. The alphabet should contain + * all unique characters that may appear in the input.
+ * + * @param text the input string to transform; if empty, returns an empty list + * @param initialAlphabet a string containing the initial ordered set of symbols + * (e.g., "$abn" or the full ASCII set); must not be empty + * when {@code text} is non-empty + * @return a list of integers representing the transformed data, where each integer + * is the index of the corresponding input character in the current alphabet state + * @throws IllegalArgumentException if {@code text} is non-empty and {@code initialAlphabet} + * is {@code null} or empty + * @throws IllegalArgumentException if any character in {@code text} is not found in + * {@code initialAlphabet} + */ + public static List+ * Reconstructs the original string from the list of indices produced by the + * forward transform. This requires the exact same initial alphabet that was + * used in the forward transform. + *
+ * + *Important: The {@code initialAlphabet} parameter must be identical + * to the one used in the forward transform, including character order, or the + * output will be incorrect.
+ * + * @param indices The list of integers from the forward transform. + * @param initialAlphabet the exact same initial alphabet string used for the forward transform; + * if {@code null} or empty, returns an empty string + * @return the original, untransformed string + * @throws IllegalArgumentException if any index in {@code indices} is negative or + * exceeds the current alphabet size + */ + public static String inverseTransform(Collection