TheAlgorithms
diff --git a/‎DIRECTORY.md‎
Lines changed: 99 additions & 4 deletions b/‎DIRECTORY.md‎
Lines changed: 99 additions & 4 deletions
diff --git a/‎pom.xml‎
Lines changed: 3 additions & 3 deletions b/‎pom.xml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/main/java/com/thealgorithms/compression/ArithmeticCoding.java‎
Lines changed: 157 additions & 0 deletions b/‎src/main/java/com/thealgorithms/compression/ArithmeticCoding.java‎
Lines changed: 157 additions & 0 deletions
diff --git a/‎src/main/java/com/thealgorithms/compression/LZ77.java‎
Lines changed: 168 additions & 0 deletions b/‎src/main/java/com/thealgorithms/compression/LZ77.java‎
Lines changed: 168 additions & 0 deletions
@@ -82,7 +82,7 @@
             <plugin>
                 <groupId>org.jacoco</groupId>
                 <artifactId>jacoco-maven-plugin</artifactId>
-                <version>0.8.13</version>
+                <version>0.8.14</version>
                 <executions>
                     <execution>
                         <goals>
@@ -112,14 +112,14 @@
                     <dependency>
                     <groupId>com.puppycrawl.tools</groupId>
                     <artifactId>checkstyle</artifactId>
-                    <version>12.0.0</version>
+                    <version>12.1.0</version>
                     </dependency>
                 </dependencies>
             </plugin>
             <plugin>
                 <groupId>com.github.spotbugs</groupId>
                 <artifactId>spotbugs-maven-plugin</artifactId>
-                <version>4.9.6.0</version>
+                <version>4.9.8.1</version>
                 <configuration>
                     <excludeFilterFile>spotbugs-exclude.xml</excludeFilterFile>
                     <includeTests>true</includeTests>
 
@@ -0,0 +1,157 @@
+package com.thealgorithms.compression;
+
+import java.math.BigDecimal;
+import java.math.MathContext;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * An implementation of the Arithmetic Coding algorithm.
+ *
+ * <p>
+ * Arithmetic coding is a form of entropy encoding used in lossless data
+ * compression. It encodes an entire message into a single number, a fraction n
+ * where (0.0 <= n < 1.0). Unlike Huffman coding, which assigns a specific
+ * bit sequence to each symbol, arithmetic coding represents the message as a
+ * sub-interval of the [0, 1) interval.
+ * </p>
+ *
+ * <p>
+ * This implementation uses BigDecimal for precision to handle the shrinking
+ * intervals, making it suitable for educational purposes to demonstrate the
+ * core logic.
+ * </p>
+ *
+ * <p>
+ * Time Complexity: O(n*m) for compression and decompression where n is the
+ * length of the input and m is the number of unique symbols, due to the need
+ * to calculate symbol probabilities.
+ * </p>
+ *
+ * <p>
+ * References:
+ * <ul>
+ * <li><a href="https://en.wikipedia.org/wiki/Arithmetic_coding">Wikipedia:
+ * Arithmetic coding</a></li>
+ * </ul>
+ * </p>
+ */
+public final class ArithmeticCoding {
+
+    private ArithmeticCoding() {
+    }
+
+    /**
+     * Compresses a string using the Arithmetic Coding algorithm.
+     *
+     * @param uncompressed The string to be compressed.
+     * @return The compressed representation as a BigDecimal number.
+     * @throws IllegalArgumentException if the input string is null or empty.
+     */
+    public static BigDecimal compress(String uncompressed) {
+        if (uncompressed == null || uncompressed.isEmpty()) {
+            throw new IllegalArgumentException("Input string cannot be null or empty.");
+        }
+
+        Map<Character, Symbol> probabilityTable = calculateProbabilities(uncompressed);
+
+        BigDecimal low = BigDecimal.ZERO;
+        BigDecimal high = BigDecimal.ONE;
+
+        for (char symbol : uncompressed.toCharArray()) {
+            BigDecimal range = high.subtract(low);
+            Symbol sym = probabilityTable.get(symbol);
+
+            high = low.add(range.multiply(sym.high()));
+            low = low.add(range.multiply(sym.low()));
+        }
+
+        return low; // Return the lower bound of the final interval
+    }
+
+    /**
+     * Decompresses a BigDecimal number back into the original string.
+     *
+     * @param compressed       The compressed BigDecimal number.
+     * @param length           The length of the original uncompressed string.
+     * @param probabilityTable The probability table used during compression.
+     * @return The original, uncompressed string.
+     */
+    public static String decompress(BigDecimal compressed, int length, Map<Character, Symbol> probabilityTable) {
+        StringBuilder decompressed = new StringBuilder();
+
+        // Create a sorted list of symbols for deterministic decompression, matching the
+        // order used in calculateProbabilities
+        List<Map.Entry<Character, Symbol>> sortedSymbols = new ArrayList<>(probabilityTable.entrySet());
+        sortedSymbols.sort(Map.Entry.comparingByKey());
+
+        BigDecimal low = BigDecimal.ZERO;
+        BigDecimal high = BigDecimal.ONE;
+
+        for (int i = 0; i < length; i++) {
+            BigDecimal range = high.subtract(low);
+
+            // Find which symbol the compressed value falls into
+            for (Map.Entry<Character, Symbol> entry : sortedSymbols) {
+                Symbol sym = entry.getValue();
+
+                // Calculate the actual range for this symbol in the current interval
+                BigDecimal symLow = low.add(range.multiply(sym.low()));
+                BigDecimal symHigh = low.add(range.multiply(sym.high()));
+
+                // Check if the compressed value falls within this symbol's range
+                if (compressed.compareTo(symLow) >= 0 && compressed.compareTo(symHigh) < 0) {
+                    decompressed.append(entry.getKey());
+
+                    // Update the interval for the next iteration
+                    low = symLow;
+                    high = symHigh;
+                    break;
+                }
+            }
+        }
+
+        return decompressed.toString();
+    }
+
+    /**
+     * Calculates the frequency and probability range for each character in the
+     * input string in a deterministic order.
+     *
+     * @param text The input string.
+     * @return A map from each character to a Symbol object containing its
+     * probability range.
+     */
+    public static Map<Character, Symbol> calculateProbabilities(String text) {
+        Map<Character, Integer> frequencies = new HashMap<>();
+        for (char c : text.toCharArray()) {
+            frequencies.put(c, frequencies.getOrDefault(c, 0) + 1);
+        }
+
+        // Sort the characters to ensure a deterministic order for the probability table
+        List<Character> sortedKeys = new ArrayList<>(frequencies.keySet());
+        Collections.sort(sortedKeys);
+
+        Map<Character, Symbol> probabilityTable = new HashMap<>();
+        BigDecimal currentLow = BigDecimal.ZERO;
+        int total = text.length();
+
+        for (char symbol : sortedKeys) {
+            BigDecimal probability = BigDecimal.valueOf(frequencies.get(symbol)).divide(BigDecimal.valueOf(total), MathContext.DECIMAL128);
+            BigDecimal high = currentLow.add(probability);
+            probabilityTable.put(symbol, new Symbol(currentLow, high));
+            currentLow = high;
+        }
+
+        return probabilityTable;
+    }
+
+    /**
+     * Helper class to store the probability range [low, high) for a symbol.
+     */
+    public record Symbol(BigDecimal low, BigDecimal high) {
+    }
+}
@@ -0,0 +1,168 @@
+package com.thealgorithms.compression;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * An implementation of the Lempel-Ziv 77 (LZ77) compression algorithm.
+ * <p>
+ * LZ77 is a lossless data compression algorithm that works by finding repeated
+ * occurrences of data in a sliding window. It replaces subsequent occurrences
+ * with references (offset, length) to the first occurrence within the window.
+ * </p>
+ * <p>
+ * This implementation uses a simple sliding window and lookahead buffer approach.
+ * Output format is a sequence of tuples (offset, length, next_character).
+ * </p>
+ * <p>
+ * Time Complexity: O(n*W) in this naive implementation, where n is the input length
+ * and W is the window size, due to the search for the longest match. More advanced
+ * data structures (like suffix trees) can improve this.
+ * </p>
+ * <p>
+ * References:
+ * <ul>
+ * <li><a href="https://en.wikipedia.org/wiki/LZ77_and_LZ78#LZ77">Wikipedia: LZ77</a></li>
+ * </ul>
+ * </p>
+ */
+public final class LZ77 {
+
+    private static final int DEFAULT_WINDOW_SIZE = 4096;
+    private static final int DEFAULT_LOOKAHEAD_BUFFER_SIZE = 16;
+    private static final char END_OF_STREAM = '\u0000';
+    private LZ77() {
+    }
+
+    /**
+     * Represents a token in the LZ77 compressed output.
+     * Stores the offset back into the window, the length of the match,
+     * and the next character after the match (or END_OF_STREAM if at end).
+     */
+    public record Token(int offset, int length, char nextChar) {
+    }
+
+    /**
+     * Compresses the input text using the LZ77 algorithm.
+     *
+     * @param text The input string to compress. Must not be null.
+     * @param windowSize The size of the sliding window (search buffer). Must be positive.
+     * @param lookaheadBufferSize The size of the lookahead buffer. Must be positive.
+     * @return A list of {@link Token} objects representing the compressed data.
+     * @throws IllegalArgumentException if windowSize or lookaheadBufferSize are not positive.
+     */
+    public static List<Token> compress(String text, int windowSize, int lookaheadBufferSize) {
+        if (text == null) {
+            return new ArrayList<>();
+        }
+        if (windowSize <= 0 || lookaheadBufferSize <= 0) {
+            throw new IllegalArgumentException("Window size and lookahead buffer size must be positive.");
+        }
+
+        List<Token> compressedOutput = new ArrayList<>();
+        int currentPosition = 0;
+
+        while (currentPosition < text.length()) {
+            int bestMatchDistance = 0;
+            int bestMatchLength = 0;
+
+            // Define the start of the search window
+            int searchBufferStart = Math.max(0, currentPosition - windowSize);
+            // Define the end of the lookahead buffer (don't go past text length)
+            int lookaheadEnd = Math.min(currentPosition + lookaheadBufferSize, text.length());
+
+            // Search for the longest match in the window
+            for (int i = searchBufferStart; i < currentPosition; i++) {
+                int currentMatchLength = 0;
+
+                // Check how far the match extends into the lookahead buffer
+                // This allows for overlapping matches (e.g., "aaa" can match with offset 1)
+                while (currentPosition + currentMatchLength < lookaheadEnd) {
+                    int sourceIndex = i + currentMatchLength;
+
+                    // Handle overlapping matches (run-length encoding within LZ77)
+                    // When we've matched beyond our starting position, wrap around using modulo
+                    if (sourceIndex >= currentPosition) {
+                        int offset = currentPosition - i;
+                        sourceIndex = i + (currentMatchLength % offset);
+                    }
+
+                    if (text.charAt(sourceIndex) == text.charAt(currentPosition + currentMatchLength)) {
+                        currentMatchLength++;
+                    } else {
+                        break;
+                    }
+                }
+
+                // If this match is longer than the best found so far
+                if (currentMatchLength > bestMatchLength) {
+                    bestMatchLength = currentMatchLength;
+                    bestMatchDistance = currentPosition - i; // Calculate offset from current position
+                }
+            }
+
+            char nextChar;
+            if (currentPosition + bestMatchLength < text.length()) {
+                nextChar = text.charAt(currentPosition + bestMatchLength);
+            } else {
+                nextChar = END_OF_STREAM;
+            }
+
+            // Add the token to the output
+            compressedOutput.add(new Token(bestMatchDistance, bestMatchLength, nextChar));
+
+            // Move the current position forward
+            // If we're at the end and had a match, just move by the match length
+            if (nextChar == END_OF_STREAM) {
+                currentPosition += bestMatchLength;
+            } else {
+                currentPosition += bestMatchLength + 1;
+            }
+        }
+
+        return compressedOutput;
+    }
+
+    /**
+     * Compresses the input text using the LZ77 algorithm with default buffer sizes.
+     *
+     * @param text The input string to compress. Must not be null.
+     * @return A list of {@link Token} objects representing the compressed data.
+     */
+    public static List<Token> compress(String text) {
+        return compress(text, DEFAULT_WINDOW_SIZE, DEFAULT_LOOKAHEAD_BUFFER_SIZE);
+    }
+
+    /**
+     * Decompresses a list of LZ77 tokens back into the original string.
+     *
+     * @param compressedData The list of {@link Token} objects. Must not be null.
+     * @return The original, uncompressed string.
+     */
+    public static String decompress(List<Token> compressedData) {
+        if (compressedData == null) {
+            return "";
+        }
+
+        StringBuilder decompressedText = new StringBuilder();
+
+        for (Token token : compressedData) {
+            // Copy matched characters from the sliding window
+            if (token.length > 0) {
+                int startIndex = decompressedText.length() - token.offset;
+
+                // Handle overlapping matches (e.g., when length > offset)
+                for (int i = 0; i < token.length; i++) {
+                    decompressedText.append(decompressedText.charAt(startIndex + i));
+                }
+            }
+
+            // Append the next character (if not END_OF_STREAM)
+            if (token.nextChar != END_OF_STREAM) {
+                decompressedText.append(token.nextChar);
+            }
+        }
+
+        return decompressedText.toString();
+    }
+}