diff --git a/fluss-common/src/test/java/org/apache/fluss/row/encode/CompactedKeyEncoderTest.java b/fluss-common/src/test/java/org/apache/fluss/row/encode/CompactedKeyEncoderTest.java index d7691886b3..48f93e0c84 100644 --- a/fluss-common/src/test/java/org/apache/fluss/row/encode/CompactedKeyEncoderTest.java +++ b/fluss-common/src/test/java/org/apache/fluss/row/encode/CompactedKeyEncoderTest.java @@ -26,14 +26,20 @@ import org.apache.fluss.row.indexed.IndexedRow; import org.apache.fluss.row.indexed.IndexedRowTest; import org.apache.fluss.row.indexed.IndexedRowWriter; +import org.apache.fluss.shaded.guava32.com.google.common.io.BaseEncoding; import org.apache.fluss.types.DataType; import org.apache.fluss.types.DataTypes; import org.apache.fluss.types.RowType; import org.junit.jupiter.api.Test; +import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.Collections; import java.util.List; +import java.util.regex.Pattern; import java.util.stream.IntStream; import static org.apache.fluss.row.TestInternalRowGenerator.createAllRowType; @@ -44,6 +50,11 @@ /** Test for {@link CompactedKeyEncoder}. */ class CompactedKeyEncoderTest { + private static final Pattern COMMENT_PATTERN = Pattern.compile("(#.*$)", Pattern.MULTILINE); + private static final Pattern NON_HEX_PATTERN = + Pattern.compile("[^0-9A-Fa-f]+", Pattern.MULTILINE); + private static final String ENCODED_KEY_HEX_FILE_PATH = "encoding/encoded_key.hex"; + @Test void testEncodeKey() { // test int, long as primary key @@ -72,6 +83,9 @@ void testEncodeKeyWithKeyNames() { CompactedKeyEncoder keyEncoder = CompactedKeyEncoder.createKeyEncoder(rowType, pk); byte[] encodedBytes = keyEncoder.encodeKey(row); + // 2 (start of text), 97 (the letter a), 50 (the number 2) + assertThat(encodedBytes).isEqualTo(new byte[] {2, 97, 50}); + // decode it, should only get "a2" InternalRow encodedKey = decodeRow( @@ -124,6 +138,9 @@ void testGetKey() { BinaryString.fromString("a3")); keyBytes = keyEncoder1.encodeKey(row); + // 1, 2 (start of text), 97 (the letter a), 50 (the number 2) + assertThat(keyBytes).isEqualTo(new byte[] {1, 2, 97, 50}); + InternalRow keyRow = decodeRow( new DataType[] { @@ -154,6 +171,22 @@ void testGetKeyForAllTypes() throws Exception { InternalRow keyRow = decodeRow(keyDataTypes, keyBytes); + URL url = getClass().getClassLoader().getResource(ENCODED_KEY_HEX_FILE_PATH); + + if (url == null) { + throw new RuntimeException( + "Missing hex file for encoding test: " + ENCODED_KEY_HEX_FILE_PATH); + } + + Path encodedKeyFilePath = Paths.get(url.toURI()); + + String encodedKeyString = new String(Files.readAllBytes(encodedKeyFilePath)); + encodedKeyString = COMMENT_PATTERN.matcher(encodedKeyString).replaceAll(""); + encodedKeyString = NON_HEX_PATTERN.matcher(encodedKeyString).replaceAll(""); + byte[] expected = BaseEncoding.base16().decode(encodedKeyString); + + assertThat(keyBytes).isEqualTo(expected); + // get the field getter for the key field InternalRow.FieldGetter[] fieldGetters = new InternalRow.FieldGetter[keyDataTypes.length]; diff --git a/fluss-common/src/test/resources/encoding/encoded_key.hex b/fluss-common/src/test/resources/encoding/encoded_key.hex new file mode 100644 index 0000000000..82338c2315 --- /dev/null +++ b/fluss-common/src/test/resources/encoding/encoded_key.hex @@ -0,0 +1,109 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# The purpose of this test file is to ensure that encoding (and consequently, hashing) is consistent across Java and rust client + +# BOOLEAN: true +01 + +# TINYINT: 2 +02 + +# SMALLINT: 10 +0A + +# INT: 100 +00 64 + +# BIGINT: -6101065172474983726 +D2 95 FC D8 CE B1 AA AA AB 01 + +# FLOAT: 13.2 +33 33 53 41 + +# DOUBLE: 15.21 +EC 51 B8 1E 85 6B 2E 40 + +# DATE: TypeUtils.castFromString("2023-10-25", DataTypes.DATE()) +C7 99 01 + +# TIME(0): TypeUtils.castFromString("09:30:00.0", DataTypes.TIME()) +C0 B3 A7 10 + +# BINARY(20): "1234567890".getBytes() +0A 31 32 33 34 35 36 37 38 39 30 + +# BYTES: byte[]: "20".getBytes() +02 32 30 + +# CHAR(2): String: "1" +01 31 + +# STRING: String: "hello" +05 68 65 6C 6C 6F + +# DECIMAL(52) Decimal.fromUnscaledLong(9, 5, 2) +09 + +# DECIMAL(200): Decimal.fromBigDecimal(new BigDecimal(10), 20, 0) +01 0A + +# TIMESTAMP(1): TimestampNtz.fromMillis(1698235273182L) +DE 9F D7 B5 B6 31 + +# TIMESTAMP(5): TimestampNtz.fromMillis(1698235273182L)) +DE 9F D7 B5 B6 31 00 + +# TIMESTAMP_LTZ(1): TimestampLtz.fromEpochMillis(1698235273182L) +DE 9F D7 B5 B6 31 + +# TIMESTAMP_LTZ(5): TimestampLtz.fromEpochMillis(1698235273182L) +DE 9F D7 B5 B6 31 00 + +# ARRAY(INT): GenericArray.of(1, 2, 3, 4, 5, -11, null, 444, 102234) +30 09 00 00 00 40 00 00 00 01 +00 00 00 02 00 00 00 03 00 00 +00 04 00 00 00 05 00 00 00 F5 +FF FF FF 00 00 00 00 BC 01 00 +00 5A 8F 01 00 00 00 00 00 + +# ARRAY: GenericArray.of(0.1f, 1.1f, -0.5f, 6.6f, Float.MAX_VALUE, Float.MIN_VALUE) +20 06 00 00 00 00 00 00 00 CD +CC CC 3D CD CC 8C 3F 00 00 00 +BF 33 33 D3 40 FF FF 7F 7F 01 +00 00 00 + +# ARRAY>: GenericArray.of(GenericArray.of(fromString("a"), null, fromString("c")), null, GenericArray.of(fromString("hello"), fromString("world"))) +58 03 00 00 00 02 00 00 00 20 +00 00 00 20 00 00 00 00 00 00 +00 00 00 00 00 18 00 00 00 40 +00 00 00 03 00 00 00 02 00 00 +00 61 00 00 00 00 00 00 81 00 +00 00 00 00 00 00 00 63 00 00 +00 00 00 00 81 02 00 00 00 00 +00 00 00 68 65 6C 6C 6F 00 00 +85 77 6F 72 6C 64 00 00 85 + +# MAP: GenericMap.of(0, null, 1, fromString("1"), 2, fromString("2")) +3C 18 00 00 00 03 00 00 00 00 +00 00 00 00 00 00 00 01 00 00 +00 02 00 00 00 00 00 00 00 03 +00 00 00 01 00 00 00 00 00 00 +00 00 00 00 00 31 00 00 00 00 +00 00 81 32 00 00 00 00 00 00 +81 \ No newline at end of file