diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcComposer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcComposer.java index a35fe23bf4eb1..60549123ff3f1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcComposer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcComposer.java @@ -26,6 +26,7 @@ import java.io.ByteArrayOutputStream; import java.io.DataInputStream; import java.io.IOException; +import java.util.function.ToIntFunction; /** * Encapsulates logic for composing multiple CRCs into one or more combined CRCs @@ -35,11 +36,11 @@ */ @InterfaceAudience.LimitedPrivate({"Common", "HDFS", "MapReduce", "Yarn"}) @InterfaceStability.Unstable -public class CrcComposer { +public final class CrcComposer { private static final int CRC_SIZE_BYTES = 4; private static final Logger LOG = LoggerFactory.getLogger(CrcComposer.class); - private final int crcPolynomial; + private final ToIntFunction mod; private final int precomputedMonomialForHint; private final long bytesPerCrcHint; private final long stripeLength; @@ -79,28 +80,14 @@ public static CrcComposer newCrcComposer( */ public static CrcComposer newStripedCrcComposer( DataChecksum.Type type, long bytesPerCrcHint, long stripeLength) { - int polynomial = DataChecksum.getCrcPolynomialForType(type); - return new CrcComposer( - polynomial, - CrcUtil.getMonomial(bytesPerCrcHint, polynomial), - bytesPerCrcHint, - stripeLength); + return new CrcComposer(type, bytesPerCrcHint, stripeLength); } - CrcComposer( - int crcPolynomial, - int precomputedMonomialForHint, - long bytesPerCrcHint, - long stripeLength) { - LOG.debug( - "crcPolynomial=0x{}, precomputedMonomialForHint=0x{}, " - + "bytesPerCrcHint={}, stripeLength={}", - Integer.toString(crcPolynomial, 16), - Integer.toString(precomputedMonomialForHint, 16), - bytesPerCrcHint, - stripeLength); - this.crcPolynomial = crcPolynomial; - this.precomputedMonomialForHint = precomputedMonomialForHint; + private CrcComposer(DataChecksum.Type type, long bytesPerCrcHint, long stripeLength) { + LOG.debug("type={}, bytesPerCrcHint={}, stripeLength={}", + type, bytesPerCrcHint, stripeLength); + this.mod = DataChecksum.getModFunction(type); + this.precomputedMonomialForHint = CrcUtil.getMonomial(bytesPerCrcHint, mod); this.bytesPerCrcHint = bytesPerCrcHint; this.stripeLength = stripeLength; } @@ -161,10 +148,10 @@ public void update(int crcB, long bytesPerCrc) { curCompositeCrc = crcB; } else if (bytesPerCrc == bytesPerCrcHint) { curCompositeCrc = CrcUtil.composeWithMonomial( - curCompositeCrc, crcB, precomputedMonomialForHint, crcPolynomial); + curCompositeCrc, crcB, precomputedMonomialForHint, mod); } else { curCompositeCrc = CrcUtil.compose( - curCompositeCrc, crcB, bytesPerCrc, crcPolynomial); + curCompositeCrc, crcB, bytesPerCrc, mod); } curPositionInStripe += bytesPerCrc; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcUtil.java index 973363bf8d6c8..71b9e33b1fa13 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcUtil.java @@ -22,6 +22,7 @@ import org.apache.hadoop.classification.InterfaceStability; import java.util.Arrays; +import java.util.function.ToIntFunction; /** * This class provides utilities for working with CRCs. @@ -32,6 +33,55 @@ public final class CrcUtil { public static final int MULTIPLICATIVE_IDENTITY = 0x80000000; public static final int GZIP_POLYNOMIAL = 0xEDB88320; public static final int CASTAGNOLI_POLYNOMIAL = 0x82F63B78; + private static final long UNIT = 0x8000_0000_0000_0000L; + + /** + * @return a * b (mod p), + * where mod p is computed by the given mod function. + */ + static int multiplyMod(int a, int b, ToIntFunction mod) { + final long left = ((long)a) << 32; + final long right = ((long)b) << 32; + + final long product + = ((((((left & (UNIT /* */)) == 0L? 0L : right) + ^ ((left & (UNIT >>> 1)) == 0L? 0L : right >>> 1)) + ^ (((left & (UNIT >>> 2)) == 0L? 0L : right >>> 2) + ^ ((left & (UNIT >>> 3)) == 0L? 0L : right >>> 3))) + ^ ((((left & (UNIT >>> 4)) == 0L? 0L : right >>> 4) + ^ ((left & (UNIT >>> 5)) == 0L? 0L : right >>> 5)) + ^ (((left & (UNIT >>> 6)) == 0L? 0L : right >>> 6) + ^ ((left & (UNIT >>> 7)) == 0L? 0L : right >>> 7)))) + + ^ (((((left & (UNIT >>> 8)) == 0L? 0L : right >>> 8) + ^ ((left & (UNIT >>> 9)) == 0L? 0L : right >>> 9)) + ^ (((left & (UNIT >>> 10)) == 0L? 0L : right >>> 10) + ^ ((left & (UNIT >>> 11)) == 0L? 0L : right >>> 11))) + ^ ((((left & (UNIT >>> 12)) == 0L? 0L : right >>> 12) + ^ ((left & (UNIT >>> 13)) == 0L? 0L : right >>> 13)) + ^ (((left & (UNIT >>> 14)) == 0L? 0L : right >>> 14) + ^ ((left & (UNIT >>> 15)) == 0L? 0L : right >>> 15))))) + + ^ ((((((left & (UNIT >>> 16)) == 0L? 0L : right >>> 16) + ^ ((left & (UNIT >>> 17)) == 0L? 0L : right >>> 17)) + ^ (((left & (UNIT >>> 18)) == 0L? 0L : right >>> 18) + ^ ((left & (UNIT >>> 19)) == 0L? 0L : right >>> 19))) + ^ ((((left & (UNIT >>> 20)) == 0L? 0L : right >>> 20) + ^ ((left & (UNIT >>> 21)) == 0L? 0L : right >>> 21)) + ^ (((left & (UNIT >>> 22)) == 0L? 0L : right >>> 22) + ^ ((left & (UNIT >>> 23)) == 0L? 0L : right >>> 23)))) + + ^ (((((left & (UNIT >>> 24)) == 0L? 0L : right >>> 24) + ^ ((left & (UNIT >>> 25)) == 0L? 0L : right >>> 25)) + ^ (((left & (UNIT >>> 26)) == 0L? 0L : right >>> 26) + ^ ((left & (UNIT >>> 27)) == 0L? 0L : right >>> 27))) + ^ ((((left & (UNIT >>> 28)) == 0L? 0L : right >>> 28) + ^ ((left & (UNIT >>> 29)) == 0L? 0L : right >>> 29)) + ^ (((left & (UNIT >>> 30)) == 0L? 0L : right >>> 30) + ^ ((left & (UNIT >>> 31)) == 0L? 0L : right >>> 31))))); + + return mod.applyAsInt(product); + } /** * Hide default constructor for a static utils class. @@ -48,7 +98,7 @@ private CrcUtil() { * @param mod mod. * @return monomial. */ - public static int getMonomial(long lengthBytes, int mod) { + public static int getMonomial(long lengthBytes, ToIntFunction mod) { if (lengthBytes == 0) { return MULTIPLICATIVE_IDENTITY; } else if (lengthBytes < 0) { @@ -67,9 +117,9 @@ public static int getMonomial(long lengthBytes, int mod) { while (degree > 0) { if ((degree & 1) != 0) { product = (product == MULTIPLICATIVE_IDENTITY) ? multiplier : - galoisFieldMultiply(product, multiplier, mod); + multiplyMod(product, multiplier, mod); } - multiplier = galoisFieldMultiply(multiplier, multiplier, mod); + multiplier = multiplyMod(multiplier, multiplier, mod); degree >>= 1; } return product; @@ -85,8 +135,8 @@ public static int getMonomial(long lengthBytes, int mod) { * @return compose with monomial. */ public static int composeWithMonomial( - int crcA, int crcB, int monomial, int mod) { - return galoisFieldMultiply(crcA, monomial, mod) ^ crcB; + int crcA, int crcB, int monomial, ToIntFunction mod) { + return multiplyMod(crcA, monomial, mod) ^ crcB; } /** @@ -98,7 +148,7 @@ public static int composeWithMonomial( * @param mod mod. * @return compose result. */ - public static int compose(int crcA, int crcB, long lengthB, int mod) { + public static int compose(int crcA, int crcB, long lengthB, ToIntFunction mod) { int monomial = getMonomial(lengthB, mod); return composeWithMonomial(crcA, crcB, monomial, mod); } @@ -199,40 +249,5 @@ public static String toMultiCrcString(final byte[] bytes) { return sb.toString(); } - /** - * Galois field multiplication of {@code p} and {@code q} with the - * generator polynomial {@code m} as the modulus. - * - * @param m The little-endian polynomial to use as the modulus when - * multiplying p and q, with implicit "1" bit beyond the bottom bit. - */ - private static int galoisFieldMultiply(int p, int q, int m) { - int summation = 0; - - // Top bit is the x^0 place; each right-shift increments the degree of the - // current term. - int curTerm = MULTIPLICATIVE_IDENTITY; - - // Iteratively multiply p by x mod m as we go to represent the q[i] term - // (of degree x^i) times p. - int px = p; - - while (curTerm != 0) { - if ((q & curTerm) != 0) { - summation ^= px; - } - // Bottom bit represents highest degree since we're little-endian; before - // we multiply by "x" for the next term, check bottom bit to know whether - // the resulting px will thus have a term matching the implicit "1" term - // of "m" and thus will need to subtract "m" after mutiplying by "x". - boolean hasMaxDegree = ((px & 1) != 0); - px >>>= 1; - if (hasMaxDegree) { - px ^= m; - } - curTerm >>>= 1; - } - return summation; - } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java index aac0936a66451..4b79eb0a20e81 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -22,6 +22,7 @@ import java.io.DataOutputStream; import java.io.IOException; import java.nio.ByteBuffer; +import java.util.function.ToIntFunction; import java.util.zip.CRC32; import java.util.zip.Checksum; @@ -118,15 +119,14 @@ static Checksum newCrc32C() { * @return the int representation of the polynomial associated with the * CRC {@code type}, suitable for use with further CRC arithmetic. */ - public static int getCrcPolynomialForType(Type type) { + static ToIntFunction getModFunction(Type type) { switch (type) { case CRC32: - return CrcUtil.GZIP_POLYNOMIAL; + return PureJavaCrc32::mod; case CRC32C: - return CrcUtil.CASTAGNOLI_POLYNOMIAL; + return PureJavaCrc32C::mod; default: - throw new IllegalArgumentException( - "No CRC polynomial could be associated with type: " + type); + throw new IllegalArgumentException("Unexpected type: " + type); } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PureJavaCrc32.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PureJavaCrc32.java index 87561a022cd7f..0ea0e597b5f6c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PureJavaCrc32.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PureJavaCrc32.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -90,6 +90,18 @@ public void update(final byte[] b, final int offset, final int len) { crc = localCrc; } + /** + * Compute x mod p, where p is the CRC32 polynomial. + * @param x the input value + * @return x mod p + */ + public static int mod(long x) { + final int y = (int)(x); + return (int)(x >> 32) + ^ ((T[((y << 24) >>> 24) + 0x300] ^ T[((y << 16) >>> 24) + 0x200]) + ^ (T[((y << 8) >>> 24) + 0x100] ^ T[((y /* */) >>> 24) /* */])); + } + @Override final public void update(int b) { crc = (crc >>> 8) ^ T[(((crc ^ b) << 24) >>> 24)]; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PureJavaCrc32C.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PureJavaCrc32C.java index 11388f0f1cba9..58222a3425318 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PureJavaCrc32C.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PureJavaCrc32C.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -100,6 +100,18 @@ public void update(byte[] b, int off, int len) { crc = localCrc; } + /** + * Compute x mod p, where p is the CRC32C polynomial. + * @param x the input value + * @return x mod p + */ + public static int mod(long x) { + final int y = (int)(x); + return (int)(x >> 32) + ^ ((T[((y << 24) >>> 24) + 0x300] ^ T[((y << 16) >>> 24) + 0x200]) + ^ (T[((y << 8) >>> 24) + 0x100] ^ T[((y /* */) >>> 24) /* */])); + } + @Override final public void update(int b) { crc = (crc >>> 8) ^ T[T8_0_start + ((crc ^ b) & 0xff)]; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestCrcUtil.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestCrcUtil.java index 85ccb9b59aa1d..94a6b724a5b92 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestCrcUtil.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestCrcUtil.java @@ -19,7 +19,7 @@ import java.util.Objects; import java.util.Random; - +import java.util.function.ToIntFunction; import org.apache.hadoop.test.LambdaTestUtils; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; @@ -32,7 +32,7 @@ @Timeout(10) public class TestCrcUtil { - private static final Random RANDOM = new Random(1234); + private static final Random RANDOM = new Random(); @Test public void testComposeCrc32() { @@ -91,7 +91,7 @@ public void testComposeCrc32CZeroLength() { */ private static void doTestComposeCrc( byte[] data, DataChecksum.Type type, int chunkSize, boolean useMonomial) { - int crcPolynomial = DataChecksum.getCrcPolynomialForType(type); + final ToIntFunction mod = DataChecksum.getModFunction(type); // Get full end-to-end CRC in a single shot first. DataChecksum checksum = DataChecksum.newDataChecksum( @@ -104,7 +104,7 @@ private static void doTestComposeCrc( // second pass to compare to the end-to-end CRC. int compositeCrc = 0; int crcMonomial = - useMonomial ? CrcUtil.getMonomial(chunkSize, crcPolynomial) : 0; + useMonomial ? CrcUtil.getMonomial(chunkSize, mod) : 0; for (int offset = 0; offset + chunkSize <= data.length; offset += chunkSize) { @@ -113,10 +113,10 @@ private static void doTestComposeCrc( int partialCrc = (int) checksum.getValue(); if (useMonomial) { compositeCrc = CrcUtil.composeWithMonomial( - compositeCrc, partialCrc, crcMonomial, crcPolynomial); + compositeCrc, partialCrc, crcMonomial, mod); } else { compositeCrc = CrcUtil.compose( - compositeCrc, partialCrc, chunkSize, crcPolynomial); + compositeCrc, partialCrc, chunkSize, mod); } } @@ -127,12 +127,11 @@ private static void doTestComposeCrc( checksum.update(data, data.length - partialChunkSize, partialChunkSize); int partialCrc = (int) checksum.getValue(); compositeCrc = CrcUtil.compose( - compositeCrc, partialCrc, partialChunkSize, crcPolynomial); + compositeCrc, partialCrc, partialChunkSize, mod); } assertEquals(fullCrc, compositeCrc, String.format( - "Using CRC type '%s' with crcPolynomial '0x%08x' and chunkSize '%d'" + - ", expected '0x%08x', got '0x%08x'", - type, crcPolynomial, chunkSize, fullCrc, compositeCrc)); + "Using CRC type '%s' and chunkSize '%d', expected '0x%08x', got '0x%08x'", + type, chunkSize, fullCrc, compositeCrc)); } /** @@ -143,16 +142,16 @@ private static void doTestComposeCrcZerolength(DataChecksum.Type type) { // Without loss of generality, we can pick any integer as our fake crcA // even if we don't happen to know the preimage. int crcA = 0xCAFEBEEF; - int crcPolynomial = DataChecksum.getCrcPolynomialForType(type); + final ToIntFunction mod = DataChecksum.getModFunction(type); DataChecksum checksum = DataChecksum.newDataChecksum( type, Integer.MAX_VALUE); Objects.requireNonNull(checksum, "checksum"); int crcB = (int) checksum.getValue(); - assertEquals(crcA, CrcUtil.compose(crcA, crcB, 0, crcPolynomial)); + assertEquals(crcA, CrcUtil.compose(crcA, crcB, 0, mod)); - int monomial = CrcUtil.getMonomial(0, crcPolynomial); + int monomial = CrcUtil.getMonomial(0, mod); assertEquals( - crcA, CrcUtil.composeWithMonomial(crcA, crcB, monomial, crcPolynomial)); + crcA, CrcUtil.composeWithMonomial(crcA, crcB, monomial, mod)); } @Test @@ -221,4 +220,145 @@ public void testToMultiCrcStringNoElements() { "[]", CrcUtil.toMultiCrcString(new byte[0])); } + + @Test + public void testMultiplyMod() { + runTestMultiplyMod(10_000_000, DataChecksum.Type.CRC32); + runTestMultiplyMod(10_000_000, DataChecksum.Type.CRC32C); + } + + private static long[] runTestMultiplyMod(int n, DataChecksum.Type type) { + System.out.printf("Run %s with %d computations%n", type, n); + final int polynomial = getCrcPolynomialForType(type); + final ToIntFunction mod = DataChecksum.getModFunction(type); + + final int[] p = new int[n]; + final int[] q = new int[n]; + for (int i = 0; i < n; i++) { + p[i] = RANDOM.nextInt(); + q[i] = RANDOM.nextInt(); + } + + final int[] expected = new int[n]; + final long[] times = new long[2]; + final long t0 = System.currentTimeMillis(); + for (int i = 0; i < n; i++) { + expected[i] = galoisFieldMultiply(p[i], q[i], polynomial); + } + times[0] = System.currentTimeMillis() - t0; + final double ops0 = n * 1000.0 / times[0]; + System.out.printf("galoisFieldMultiply: %.3fs (%.2f ops)%n", times[0] / 1000.0, ops0); + + final int[] computed = new int[n]; + final long t1 = System.currentTimeMillis(); + for (int i = 0; i < n; i++) { + computed[i] = CrcUtil.multiplyMod(p[i], q[i], mod); + } + times[1] = System.currentTimeMillis() - t1; + final double ops1 = n * 1000.0 / times[1]; + System.out.printf("multiplyCrc32 : %.3fs (%.2f ops)%n", times[1] / 1000.0, ops1); + System.out.printf("multiplyCrc32 is %.2f%% faster%n", (ops1 - ops0) * 100.0 / ops0); + + for (int i = 0; i < n; i++) { + if (expected[i] != computed[i]) { + System.out.printf("expected %08X%n", expected[i]); + System.out.printf("computed %08X%n", computed[i]); + throw new IllegalStateException(); + } + } + return times; + } + + /** + * getCrcPolynomialForType. + * + * @param type type. + * @return the int representation of the polynomial associated with the + * CRC {@code type}, suitable for use with further CRC arithmetic. + */ + private static int getCrcPolynomialForType(DataChecksum.Type type) { + switch (type) { + case CRC32: + return CrcUtil.GZIP_POLYNOMIAL; + case CRC32C: + return CrcUtil.CASTAGNOLI_POLYNOMIAL; + default: + throw new IllegalArgumentException("Unexpected type: " + type); + } + } + + /** + * Galois field multiplication of {@code p} and {@code q} with the + * generator polynomial {@code m} as the modulus. + * + * @param m The little-endian polynomial to use as the modulus when + * multiplying p and q, with implicit "1" bit beyond the bottom bit. + */ + private static int galoisFieldMultiply(int p, int q, int m) { + int summation = 0; + + // Top bit is the x^0 place; each right-shift increments the degree of the + // current term. + int curTerm = CrcUtil.MULTIPLICATIVE_IDENTITY; + + // Iteratively multiply p by x mod m as we go to represent the q[i] term + // (of degree x^i) times p. + int px = p; + + while (curTerm != 0) { + if ((q & curTerm) != 0) { + summation ^= px; + } + + // Bottom bit represents highest degree since we're little-endian; before + // we multiply by "x" for the next term, check bottom bit to know whether + // the resulting px will thus have a term matching the implicit "1" term + // of "m" and thus will need to subtract "m" after mutiplying by "x". + boolean hasMaxDegree = ((px & 1) != 0); + px >>>= 1; + if (hasMaxDegree) { + px ^= m; + } + curTerm >>>= 1; + } + return summation; + } + + /** For running benchmarks. */ + public static class Benchmark { + /** + * Usages: java {@link Benchmark} [m] [n] [type] + * m: the number of iterations + * n: the number of multiplication + * type: the CRC type, either CRC32 or CRC32C. + */ + public static void main(String[] args) throws Exception { + final int m = args.length >= 1? Integer.parseInt(args[0]) : 10; + final int n = args.length >= 2? Integer.parseInt(args[1]) : 100_000_000; + final DataChecksum.Type type = args.length >= 3? DataChecksum.Type.valueOf(args[2]) + : DataChecksum.Type.CRC32; + + final int warmUpIterations = 2; + System.out.printf("%nStart warming up with %d iterations ...%n", warmUpIterations); + for (int i = 0; i < 2; i++) { + runTestMultiplyMod(n, type); + } + + System.out.printf("%nStart benchmark with %d iterations ...%n", m); + final long[] times = new long[2]; + for (int i = 0; i < m; i++) { + System.out.printf("%d) ", i); + final long[] t = runTestMultiplyMod(n, type); + times[0] += t[0]; + times[1] += t[1]; + } + + System.out.printf("%nResult) %d x %d computations:%n", m, n); + final double ops0 = n * 1000.0 / times[0]; + System.out.printf("galoisFieldMultiply: %.3fs (%.2f ops)%n", times[0] / 1000.0, ops0); + final double ops1 = n * 1000.0 / times[1]; + System.out.printf("multiplyCrc32 : %.3fs (%.2f ops)%n", times[1] / 1000.0, ops1); + System.out.printf("multiplyCrc32 is %.2f%% faster%n", (ops1 - ops0) * 100.0 / ops0); + } + } }