From 46494b025dd7f1677653758c84b9e66a70420a6a Mon Sep 17 00:00:00 2001 From: xtonik Date: Sat, 3 Jun 2023 10:46:24 +0200 Subject: [PATCH 1/2] A missing utf8 sibling method for parseEightDigitsUtf16 --- .../fastdoubleparser/FastDoubleSwar.java | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/fastdoubleparser-dev/src/main/java/ch.randelshofer.fastdoubleparser/ch/randelshofer/fastdoubleparser/FastDoubleSwar.java b/fastdoubleparser-dev/src/main/java/ch.randelshofer.fastdoubleparser/ch/randelshofer/fastdoubleparser/FastDoubleSwar.java index fbbd1dbe..7354a8b1 100644 --- a/fastdoubleparser-dev/src/main/java/ch.randelshofer.fastdoubleparser/ch/randelshofer/fastdoubleparser/FastDoubleSwar.java +++ b/fastdoubleparser-dev/src/main/java/ch.randelshofer.fastdoubleparser/ch/randelshofer/fastdoubleparser/FastDoubleSwar.java @@ -163,6 +163,23 @@ public static boolean isEightZeroesUtf8(long chunk) { return chunk == 0x3030303030303030L; } + public static int parseEightDigitsUtf8(long chunk) { + // Subtract the character '0' from all characters. + long val = chunk - 0x3030303030303030L; + + return parseBcd(val); + } + + private static int parseBcd(long val) { + // The last 2 multiplications are independent of each other. + long mask = 0xff_000000ffL; + long mul1 = 100 + (100_0000L << 32); + long mul2 = 1 + (1_0000L << 32); + val = val * 10 + (val >>> 8);// same as: val = val * (1 + (10 << 8)) >>> 8; + val = (val & mask) * mul1 + (val >>> 16 & mask) * mul2 >>> 32; + return (int) val; + } + public static int parseEightDigitsUtf16(long first, long second) { long fval = first - 0x0030_0030_0030_0030L; long sval = second - 0x0030_0030_0030_0030L; @@ -299,13 +316,7 @@ public static int tryToParseEightDigitsUtf8(long chunk) { return -1; } - // The last 2 multiplications are independent of each other. - long mask = 0xff_000000ffL; - long mul1 = 100 + (100_0000L << 32); - long mul2 = 1 + (1_0000L << 32); - val = val * 10 + (val >>> 8);// same as: val = val * (1 + (10 << 8)) >>> 8; - val = (val & mask) * mul1 + (val >>> 16 & mask) * mul2 >>> 32; - return (int) val; + return parseBcd(val); } /** From 57a690d94b40240dca953ca9e4b310e43a74d007 Mon Sep 17 00:00:00 2001 From: xtonik Date: Thu, 8 Jun 2023 23:42:59 +0200 Subject: [PATCH 2/2] missing sibling method for counting the number of first valid decimal digits in UTF-16 --- .../fastdoubleparser/FastDoubleSwar.java | 13 ++- .../fastdoubleparser/FastDoubleSwarTest.java | 79 +++++++++++++++++++ 2 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 fastdoubleparser-dev/src/test/java/ch/randelshofer/fastdoubleparser/FastDoubleSwarTest.java diff --git a/fastdoubleparser-dev/src/main/java/ch.randelshofer.fastdoubleparser/ch/randelshofer/fastdoubleparser/FastDoubleSwar.java b/fastdoubleparser-dev/src/main/java/ch.randelshofer.fastdoubleparser/ch/randelshofer/fastdoubleparser/FastDoubleSwar.java index 7354a8b1..43d1da8b 100644 --- a/fastdoubleparser-dev/src/main/java/ch.randelshofer.fastdoubleparser/ch/randelshofer/fastdoubleparser/FastDoubleSwar.java +++ b/fastdoubleparser-dev/src/main/java/ch.randelshofer.fastdoubleparser/ch/randelshofer/fastdoubleparser/FastDoubleSwar.java @@ -50,7 +50,18 @@ class FastDoubleSwar { public static int countUpToEightDigitsUtf8(long chunk) { long val = chunk - 0x3030303030303030L; long predicate = ((chunk + 0x4646464646464646L) | val) & 0x8080808080808080L; - return predicate == 0L ? 8 : Long.numberOfTrailingZeros(predicate) >> 3; + return predicate == 0L ? 8 : Long.numberOfLeadingZeros(predicate) >> 3; + } + + public static int countUpToFourDigitsUtf16(long chunk) { + long val = chunk - 0x0030_0030_0030_0030L; + long predicate = ((chunk + 0x0046_0046_0046_0046L) | val) & 0xff80_ff80_ff80_ff80L; + return predicate == 0L ? 4 : Long.numberOfLeadingZeros(predicate) >> 4; + } + + public static int countUpToEightDigitsUtf16(long first, long second) { + int digits = countUpToFourDigitsUtf16(first); + return digits < 4 ? digits : 4 + countUpToFourDigitsUtf16(second); } /** diff --git a/fastdoubleparser-dev/src/test/java/ch/randelshofer/fastdoubleparser/FastDoubleSwarTest.java b/fastdoubleparser-dev/src/test/java/ch/randelshofer/fastdoubleparser/FastDoubleSwarTest.java new file mode 100644 index 00000000..830b58e4 --- /dev/null +++ b/fastdoubleparser-dev/src/test/java/ch/randelshofer/fastdoubleparser/FastDoubleSwarTest.java @@ -0,0 +1,79 @@ +/* + * @(#)FastDoubleSwarTest.java + * Copyright © 2023 Werner Randelshofer, Switzerland. MIT License. + */ +package ch.randelshofer.fastdoubleparser; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.stream.IntStream; + +import static org.junit.jupiter.api.Assertions.*; + +class FastDoubleSwarTest { + private static final long EIGHT_ZERO_DIGITS_UTF8 = 0x30_30_30_30_30_30_30_30L; + private static final long FOUR_ZERO_DIGITS_UTF16 = 0x0030_0030_0030_0030L; + private static final long ONE_INVALID_DIGIT = 0x40;// ampersand + private static final long INTERFERENCE_DIGITS = 0x4030300040303080L; + + private static IntStream utf8charPositionInLong() { + return IntStream.range(1, 9); + } + + private static long invalidUtf8char(int position) { + return EIGHT_ZERO_DIGITS_UTF8 | ONE_INVALID_DIGIT << 8 * (8 - position); + } + + @ParameterizedTest(name = "invalid UTF-8 character at position {0}") + @MethodSource("utf8charPositionInLong") + public void countUpToEightDigitsUtf8_invalid(int invalidCharacterPosition) { + int expected = (invalidCharacterPosition - 1); + int actual = FastDoubleSwar.countUpToEightDigitsUtf8(invalidUtf8char(invalidCharacterPosition)); + + assertEquals(expected, actual, Long.toString(actual, 16)); + } + + @Test + public void countUpToEightDigitsUtf8_valid() { + assertEquals(8, FastDoubleSwar.countUpToEightDigitsUtf8(EIGHT_ZERO_DIGITS_UTF8)); + } + + private static IntStream utf16charPositionInLong() { + return IntStream.range(1, 5); + } + + private static long invalidUtf16char(int position) { + return FOUR_ZERO_DIGITS_UTF16 | ONE_INVALID_DIGIT << 16 * (4 - position); + } + + @ParameterizedTest(name = "invalid UTF-16 character in first chunk at position {0}") + @MethodSource("utf16charPositionInLong") + public void countUpToEightDigitsUtf16_firstInvalid(int invalidCharacterPosition) { + long first = invalidUtf16char(invalidCharacterPosition); + long second = INTERFERENCE_DIGITS; + + int expected = invalidCharacterPosition - 1; + int actual = FastDoubleSwar.countUpToEightDigitsUtf16(first, second); + + assertEquals(expected, actual, Long.toString(first, 16)); + } + + @ParameterizedTest(name = "invalid UTF-16 character in second chunk at position {0}") + @MethodSource("utf16charPositionInLong") + public void countUpToEightDigitsUtf16_secondInvalid(int invalidCharacterPosition) { + long first = FOUR_ZERO_DIGITS_UTF16; + long second = invalidUtf16char(invalidCharacterPosition); + + int expected = 4 + invalidCharacterPosition - 1; + int actual = FastDoubleSwar.countUpToEightDigitsUtf16(first, second); + + assertEquals(expected, actual, Long.toString(second, 16)); + } + + @Test + public void countUpToEightDigitsUtf16_bothValid() { + assertEquals(8, FastDoubleSwar.countUpToEightDigitsUtf16(FOUR_ZERO_DIGITS_UTF16, FOUR_ZERO_DIGITS_UTF16)); + } +} \ No newline at end of file