From 8abeba5fd0055ea68b9ba7022560e3e3c109abb1 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Mon, 5 Jan 2026 14:01:56 +0800 Subject: [PATCH 01/44] update spec doc --- .../specification/xlang_serialization_spec.md | 11 +- docs/specification/xlang_type_mapping.md | 104 +++++++++--------- 2 files changed, 61 insertions(+), 54 deletions(-) diff --git a/docs/specification/xlang_serialization_spec.md b/docs/specification/xlang_serialization_spec.md index d653c5180f..6b09b1042a 100644 --- a/docs/specification/xlang_serialization_spec.md +++ b/docs/specification/xlang_serialization_spec.md @@ -43,7 +43,14 @@ This specification defines the Fory xlang binary format. The format is dynamic r - var32: a 32-bit signed integer which use fory variable-length encoding. - int64: a 64-bit signed integer. - var64: a 64-bit signed integer which use fory PVL encoding. -- h64: a 64-bit signed integer which use fory Hybrid encoding. +- hybrid64: a 64-bit signed integer which use fory Hybrid encoding. +- uint8: an 8-bit unsigned integer. +- uint16: a 16-bit unsigned integer. +- uint32: a 32-bit unsigned integer. +- varu32: a 32-bit unsigned integer which use fory variable-length encoding. +- uint64: a 64-bit unsigned integer. +- varu64: a 64-bit unsigned integer which use fory PVL encoding. +- hybridu64: a 64-bit unsigned integer which use fory Hybrid encoding. - float16: a 16-bit floating point number. - float32: a 32-bit floating point number. - float64: a 64-bit floating point number including NaN and Infinity. @@ -82,7 +89,7 @@ This specification defines the Fory xlang binary format. The format is dynamic r Note: -- Unsigned int/long are not added here, since not every language support those types. +- Unsigned integer types use the same byte sizes as their signed counterparts; the difference is in value interpretation. See [Type mapping](xlang_type_mapping.md) for language-specific type mappings. ### Polymorphisms diff --git a/docs/specification/xlang_type_mapping.md b/docs/specification/xlang_type_mapping.md index 50dceb7bb3..9eedbf259d 100644 --- a/docs/specification/xlang_type_mapping.md +++ b/docs/specification/xlang_type_mapping.md @@ -27,58 +27,58 @@ Note: ## Type Mapping -| Fory Type | Fory Type ID | Java | Python | Javascript | C++ | Golang | Rust | -| ----------------------- | ------------ | --------------- | -------------------- | -------------- | ------------------------------ | ---------------- | ----------------- | -| bool | 1 | bool/Boolean | bool | Boolean | bool | bool | bool | -| int8 | 2 | byte/Byte | int/pyfory.int8 | Type.int8() | int8_t | int8 | i8 | -| int16 | 3 | short/Short | int/pyfory.int16 | Type.int16() | int16_t | int16 | i16 | -| int32 | 4 | int/Integer | int/pyfory.int32 | Type.int32() | int32_t | int32 | i32 | -| var32 | 5 | int/Integer | int/pyfory.var32 | Type.var32() | int32_t | int32 | i32 | -| int64 | 6 | long/Long | int/pyfory.int64 | Type.int64() | int64_t | int64 | i64 | -| var64 | 7 | long/Long | int/pyfory.var64 | Type.var64() | int64_t | int64 | i64 | -| h64 | 8 | long/Long | int/pyfory.h64 | Type.h64() | int64_t | int64 | i64 | -| uint8 | 9 | short/Short | int/pyfory.uint8 | Type.uint8() | uint8_t | uint8 | u8 | -| uint16 | 10 | int/Integer | int/pyfory.uint16 | Type.uint16() | uint16_t | uint16 | u16 | -| uint32 | 11 | long/Long | int/pyfory.uint32 | Type.uint32() | uint32_t | uint32 | u32 | -| varu32 | 12 | long/Long | int/pyfory.varu32 | Type.varu32() | uint32_t | uint32 | u32 | -| uint64 | 13 | long/Long | int/pyfory.uint64 | Type.uint64() | uint64_t | uint64 | u64 | -| varu64 | 14 | long/Long | int/pyfory.varu64 | Type.varu64() | uint64_t | uint64 | u64 | -| hu64 | 15 | long/Long | int/pyfory.hu64 | Type.hu64() | uint64_t | uint64 | u64 | -| float16 | 16 | float/Float | float/pyfory.float16 | Type.float16() | fory::float16_t | fory.float16 | fory::f16 | -| float32 | 17 | float/Float | float/pyfory.float32 | Type.float32() | float | float32 | f32 | -| float64 | 18 | double/Double | float/pyfory.float64 | Type.float64() | double | float64 | f64 | -| string | 19 | String | str | String | string | string | String/str | -| list | 20 | List/Collection | list/tuple | array | vector | slice | Vec | -| set | 21 | Set | set | / | set | fory.Set | Set | -| map | 22 | Map | dict | Map | unordered_map | map | HashMap | -| enum | 23 | Enum subclasses | enum subclasses | / | enum | / | enum | -| named_enum | 24 | Enum subclasses | enum subclasses | / | enum | / | enum | -| struct | 25 | pojo/record | data class | object | struct/class | struct | struct | -| compatible_struct | 26 | pojo/record | data class | object | struct/class | struct | struct | -| named_struct | 27 | pojo/record | data class | object | struct/class | struct | struct | -| named_compatible_struct | 28 | pojo/record | data class | object | struct/class | struct | struct | -| ext | 29 | pojo/record | data class | object | struct/class | struct | struct | -| named_ext | 30 | pojo/record | data class | object | struct/class | struct | struct | -| union | 31 | Union | typing.Union | / | `std::variant` | / | tagged union enum | -| none | 32 | null | None | null | `std::monostate` | nil | `()` | -| duration | 33 | Duration | timedelta | Number | duration | Duration | Duration | -| timestamp | 34 | Instant | datetime | Number | std::chrono::nanoseconds | Time | DateTime | -| local_date | 35 | Date | datetime | Number | std::chrono::nanoseconds | Time | DateTime | -| decimal | 36 | BigDecimal | Decimal | bigint | / | / | / | -| binary | 37 | byte[] | bytes | / | `uint8_t[n]/vector` | `[n]uint8/[]T` | `Vec` | -| array | 38 | array | np.ndarray | / | / | array/slice | Vec | -| bool_array | 39 | bool[] | ndarray(np.bool\_) | / | `bool[n]` | `[n]bool/[]T` | `Vec` | -| int8_array | 40 | byte[] | ndarray(int8) | / | `int8_t[n]/vector` | `[n]int8/[]T` | `Vec` | -| int16_array | 41 | short[] | ndarray(int16) | / | `int16_t[n]/vector` | `[n]int16/[]T` | `Vec` | -| int32_array | 42 | int[] | ndarray(int32) | / | `int32_t[n]/vector` | `[n]int32/[]T` | `Vec` | -| int64_array | 43 | long[] | ndarray(int64) | / | `int64_t[n]/vector` | `[n]int64/[]T` | `Vec` | -| uint8_array | 44 | short[] | ndarray(uint8) | / | `uint8_t[n]/vector` | `[n]uint8/[]T` | `Vec` | -| uint16_array | 45 | int[] | ndarray(uint16) | / | `uint16_t[n]/vector` | `[n]uint16/[]T` | `Vec` | -| uint32_array | 46 | long[] | ndarray(uint32) | / | `uint32_t[n]/vector` | `[n]uint32/[]T` | `Vec` | -| uint64_array | 47 | long[] | ndarray(uint64) | / | `uint64_t[n]/vector` | `[n]uint64/[]T` | `Vec` | -| float16_array | 48 | float[] | ndarray(float16) | / | `fory::float16_t[n]/vector` | `[n]float16/[]T` | `Vec` | -| float32_array | 49 | float[] | ndarray(float32) | / | `float[n]/vector` | `[n]float32/[]T` | `Vec` | -| float64_array | 50 | double[] | ndarray(float64) | / | `double[n]/vector` | `[n]float64/[]T` | `Vec` | +| Fory Type | Fory Type ID | Java | Python | Javascript | C++ | Golang | Rust | +| ----------------------- | ------------ | --------------- | -------------------- | ---------------- | ------------------------------ | ---------------- | ----------------- | +| bool | 1 | bool/Boolean | bool | Boolean | bool | bool | bool | +| int8 | 2 | byte/Byte | int/pyfory.int8 | Type.int8() | int8_t | int8 | i8 | +| int16 | 3 | short/Short | int/pyfory.int16 | Type.int16() | int16_t | int16 | i16 | +| int32 | 4 | int/Integer | int/pyfory.int32 | Type.int32() | int32_t | int32 | i32 | +| var32 | 5 | int/Integer | int/pyfory.var32 | Type.var32() | int32_t | int32 | i32 | +| int64 | 6 | long/Long | int/pyfory.int64 | Type.int64() | int64_t | int64 | i64 | +| var64 | 7 | long/Long | int/pyfory.var64 | Type.var64() | int64_t | int64 | i64 | +| hybrid64 | 8 | long/Long | int/pyfory.hybrid64 | Type.hybrid64() | int64_t | int64 | i64 | +| uint8 | 9 | short/Short | int/pyfory.uint8 | Type.uint8() | uint8_t | uint8 | u8 | +| uint16 | 10 | int/Integer | int/pyfory.uint16 | Type.uint16() | uint16_t | uint16 | u16 | +| uint32 | 11 | long/Long | int/pyfory.uint32 | Type.uint32() | uint32_t | uint32 | u32 | +| varu32 | 12 | long/Long | int/pyfory.varu32 | Type.varu32() | uint32_t | uint32 | u32 | +| uint64 | 13 | long/Long | int/pyfory.uint64 | Type.uint64() | uint64_t | uint64 | u64 | +| varu64 | 14 | long/Long | int/pyfory.hybridu64 | Type.hybridu64() | uint64_t | uint64 | u64 | +| hybridu64 | 15 | long/Long | int/pyfory.hu64 | Type.hu64() | uint64_t | uint64 | u64 | +| float16 | 16 | float/Float | float/pyfory.float16 | Type.float16() | fory::float16_t | fory.float16 | fory::f16 | +| float32 | 17 | float/Float | float/pyfory.float32 | Type.float32() | float | float32 | f32 | +| float64 | 18 | double/Double | float/pyfory.float64 | Type.float64() | double | float64 | f64 | +| string | 19 | String | str | String | string | string | String/str | +| list | 20 | List/Collection | list/tuple | array | vector | slice | Vec | +| set | 21 | Set | set | / | set | fory.Set | Set | +| map | 22 | Map | dict | Map | unordered_map | map | HashMap | +| enum | 23 | Enum subclasses | enum subclasses | / | enum | / | enum | +| named_enum | 24 | Enum subclasses | enum subclasses | / | enum | / | enum | +| struct | 25 | pojo/record | data class | object | struct/class | struct | struct | +| compatible_struct | 26 | pojo/record | data class | object | struct/class | struct | struct | +| named_struct | 27 | pojo/record | data class | object | struct/class | struct | struct | +| named_compatible_struct | 28 | pojo/record | data class | object | struct/class | struct | struct | +| ext | 29 | pojo/record | data class | object | struct/class | struct | struct | +| named_ext | 30 | pojo/record | data class | object | struct/class | struct | struct | +| union | 31 | Union | typing.Union | / | `std::variant` | / | tagged union enum | +| none | 32 | null | None | null | `std::monostate` | nil | `()` | +| duration | 33 | Duration | timedelta | Number | duration | Duration | Duration | +| timestamp | 34 | Instant | datetime | Number | std::chrono::nanoseconds | Time | DateTime | +| local_date | 35 | Date | datetime | Number | std::chrono::nanoseconds | Time | DateTime | +| decimal | 36 | BigDecimal | Decimal | bigint | / | / | / | +| binary | 37 | byte[] | bytes | / | `uint8_t[n]/vector` | `[n]uint8/[]T` | `Vec` | +| array | 38 | array | np.ndarray | / | / | array/slice | Vec | +| bool_array | 39 | bool[] | ndarray(np.bool\_) | / | `bool[n]` | `[n]bool/[]T` | `Vec` | +| int8_array | 40 | byte[] | ndarray(int8) | / | `int8_t[n]/vector` | `[n]int8/[]T` | `Vec` | +| int16_array | 41 | short[] | ndarray(int16) | / | `int16_t[n]/vector` | `[n]int16/[]T` | `Vec` | +| int32_array | 42 | int[] | ndarray(int32) | / | `int32_t[n]/vector` | `[n]int32/[]T` | `Vec` | +| int64_array | 43 | long[] | ndarray(int64) | / | `int64_t[n]/vector` | `[n]int64/[]T` | `Vec` | +| uint8_array | 44 | short[] | ndarray(uint8) | / | `uint8_t[n]/vector` | `[n]uint8/[]T` | `Vec` | +| uint16_array | 45 | int[] | ndarray(uint16) | / | `uint16_t[n]/vector` | `[n]uint16/[]T` | `Vec` | +| uint32_array | 46 | long[] | ndarray(uint32) | / | `uint32_t[n]/vector` | `[n]uint32/[]T` | `Vec` | +| uint64_array | 47 | long[] | ndarray(uint64) | / | `uint64_t[n]/vector` | `[n]uint64/[]T` | `Vec` | +| float16_array | 48 | float[] | ndarray(float16) | / | `fory::float16_t[n]/vector` | `[n]float16/[]T` | `Vec` | +| float32_array | 49 | float[] | ndarray(float32) | / | `float[n]/vector` | `[n]float32/[]T` | `Vec` | +| float64_array | 50 | double[] | ndarray(float64) | / | `double[n]/vector` | `[n]float64/[]T` | `Vec` | ## Type info(not implemented currently) From 0f08ce4c2984c4fe7c97e85fdad414f578bf8ade Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Mon, 5 Jan 2026 14:02:20 +0800 Subject: [PATCH 02/44] update buffer read/write API --- .../src/main/java/org/apache/fory/Fory.java | 2 +- .../org/apache/fory/config/ForyBuilder.java | 8 +- .../org/apache/fory/config/LongEncoding.java | 10 +- .../org/apache/fory/memory/MemoryBuffer.java | 121 +++++++++++++++--- .../fory/serializer/PrimitiveSerializers.java | 20 +-- .../test/java/org/apache/fory/StreamTest.java | 12 +- .../apache/fory/memory/MemoryBufferTest.java | 116 +++++++++++------ .../serializer/PrimitiveSerializersTest.java | 4 +- 8 files changed, 212 insertions(+), 81 deletions(-) diff --git a/java/fory-core/src/main/java/org/apache/fory/Fory.java b/java/fory-core/src/main/java/org/apache/fory/Fory.java index 98413ca2a1..5f9dea9fd4 100644 --- a/java/fory-core/src/main/java/org/apache/fory/Fory.java +++ b/java/fory-core/src/main/java/org/apache/fory/Fory.java @@ -692,7 +692,7 @@ public MemoryBuffer readBufferObject(MemoryBuffer buffer) { int size; // TODO(chaokunyang) Remove branch when other languages support aligned varint. if (!crossLanguage) { - size = buffer.readAlignedVarUint(); + size = buffer.readAlignedVarUint32(); } else { size = buffer.readVarUint32(); } diff --git a/java/fory-core/src/main/java/org/apache/fory/config/ForyBuilder.java b/java/fory-core/src/main/java/org/apache/fory/config/ForyBuilder.java index 9d4cce82c3..4f5687cdd9 100644 --- a/java/fory-core/src/main/java/org/apache/fory/config/ForyBuilder.java +++ b/java/fory-core/src/main/java/org/apache/fory/config/ForyBuilder.java @@ -67,7 +67,7 @@ public final class ForyBuilder { boolean timeRefIgnored = true; ClassLoader classLoader; boolean compressInt = true; - public LongEncoding longEncoding = LongEncoding.SLI; + public LongEncoding longEncoding = LongEncoding.HYBRID; boolean compressIntArray = false; boolean compressLongArray = false; boolean compressString = false; @@ -183,11 +183,11 @@ public ForyBuilder withIntCompressed(boolean intCompressed) { } /** - * Use variable length encoding for long. Enabled by default, use {@link LongEncoding#SLI} (Small + * Use variable length encoding for long. Enabled by default, use {@link LongEncoding#HYBRID} (Small * long as int) for long encoding. */ public ForyBuilder withLongCompressed(boolean longCompressed) { - return withLongCompressed(longCompressed ? LongEncoding.SLI : LongEncoding.LE_RAW_BYTES); + return withLongCompressed(longCompressed ? LongEncoding.HYBRID : LongEncoding.LE_RAW_BYTES); } /** Use variable length encoding for long. */ @@ -429,7 +429,7 @@ private void finish() { } if (language != Language.JAVA) { stringRefIgnored = true; - longEncoding = LongEncoding.PVL; + longEncoding = LongEncoding.VARINT64; compressInt = true; compressString = true; } diff --git a/java/fory-core/src/main/java/org/apache/fory/config/LongEncoding.java b/java/fory-core/src/main/java/org/apache/fory/config/LongEncoding.java index 128f7831ba..5474a15006 100644 --- a/java/fory-core/src/main/java/org/apache/fory/config/LongEncoding.java +++ b/java/fory-core/src/main/java/org/apache/fory/config/LongEncoding.java @@ -20,19 +20,19 @@ package org.apache.fory.config; /** - * Encoding option for long. Default encoding is fory SLI(Small long as int) encoding: {@link #SLI}. + * Encoding option for long. Default encoding is fory SLI(Small long as int) encoding: {@link #HYBRID}. */ public enum LongEncoding { /** - * Fory SLI(Small long as int) Encoding: + * Fory HYBRID(Small long as int) Encoding: *
  • If long is in [0xc0000000, 0x3fffffff], encode as 4 bytes int: `| little-endian: ((int) * value) << 1 |` *
  • Otherwise write as 9 bytes: `| 0b1 | little-endian 8bytes long |`. * - *

    Faster than {@link #PVL}, but compression is not good as {@link #PVL} such as for ints + *

    Faster than {@link #VARINT64}, but compression is not good as {@link #VARINT64} such as for ints * in short range. */ - SLI, + HYBRID, /** * Fory Progressive Variable-length Long Encoding: *

  • positive long format: first bit in every byte indicate whether has next byte, then next @@ -40,7 +40,7 @@ public enum LongEncoding { *
  • Negative number will be converted to positive number by ` (v << 1) ^ (v >> 63)` to reduce * cost of small negative numbers. */ - PVL, + VARINT64, /** Write long as little endian 8bytes, no compression. */ LE_RAW_BYTES, } diff --git a/java/fory-core/src/main/java/org/apache/fory/memory/MemoryBuffer.java b/java/fory-core/src/main/java/org/apache/fory/memory/MemoryBuffer.java index 29331f2691..c28a3e9525 100644 --- a/java/fory-core/src/main/java/org/apache/fory/memory/MemoryBuffer.java +++ b/java/fory-core/src/main/java/org/apache/fory/memory/MemoryBuffer.java @@ -1137,22 +1137,58 @@ public int _unsafeWriteVarUint64(long value) { } /** - * Write long using fory SLI(Small long as int) encoding. If long is in [0xc0000000, 0x3fffffff], + * Write signed long using fory Hybrid(Small long as int) encoding. If long is in [0xc0000000, 0x3fffffff], * encode as 4 bytes int: | little-endian: ((int) value) << 1 |; Otherwise write as 9 bytes: | 0b1 * | little-endian 8bytes long | */ - public int writeSliInt64(long value) { + public int writeHybridInt64(long value) { ensure(writerIndex + 9); - return _unsafeWriteSliInt64(value); + return _unsafeWriteHybridInt64(value); + } + + /** + * Write unsigned long using fory Hybrid(Small long as int) encoding. If long is in [0, + * 0x7fffffff], encode as 4 bytes int: | little-endian: ((int) value) << 1 |; Otherwise write as 9 + * bytes: | 0b1 | little-endian 8bytes long | + */ + public int writeHybridUint64(long value) { + ensure(writerIndex + 9); + return _unsafeWriteHybridUint64(value); + } + + /** Write unsigned long using fory Hybrid(Small Long as Int) encoding. */ + // CHECKSTYLE.OFF:MethodName + public int _unsafeWriteHybridUint64(long value) { + // CHECKSTYLE.ON:MethodName + final int writerIndex = this.writerIndex; + final long pos = address + writerIndex; + final byte[] heapMemory = this.heapMemory; + if (value >= 0 && value <= Integer.MAX_VALUE) { + int v = ((int) value) << 1; // bit 0 unset, means int. + if (!LITTLE_ENDIAN) { + v = Integer.reverseBytes(v); + } + UNSAFE.putInt(heapMemory, pos, v); + this.writerIndex = writerIndex + 4; + return 4; + } else { + UNSAFE.putByte(heapMemory, pos, BIG_LONG_FLAG); + if (!LITTLE_ENDIAN) { + value = Long.reverseBytes(value); + } + UNSAFE.putLong(heapMemory, pos + 1, value); + this.writerIndex = writerIndex + 9; + return 9; + } } private static final long HALF_MAX_INT_VALUE = Integer.MAX_VALUE / 2; private static final long HALF_MIN_INT_VALUE = Integer.MIN_VALUE / 2; private static final byte BIG_LONG_FLAG = 0b1; // bit 0 set, means big long. - /** Write long using fory SLI(Small Long as Int) encoding. */ + /** Write long using fory Hybrid(Small Long as Int) encoding. */ // CHECKSTYLE.OFF:MethodName - public int _unsafeWriteSliInt64(long value) { + public int _unsafeWriteHybridInt64(long value) { // CHECKSTYLE.ON:MethodName final int writerIndex = this.writerIndex; final long pos = address + writerIndex; @@ -1487,18 +1523,71 @@ public long _readInt64OnBE() { return Long.reverseBytes(UNSAFE.getLong(heapMemory, address + readerIdx)); } - /** Read fory SLI(Small Long as Int) encoded long. */ - public long readSliInt64() { + /** Read signed fory Hybrid(Small Long as Int) encoded long. */ + public long readHybridInt64() { if (LITTLE_ENDIAN) { - return _readSliInt64OnLE(); + return _readHybridInt64OnLE(); } else { - return _readSliInt64OnBE(); + return _readHybridInt64OnBE(); } } + /** Read unsigned fory Hybrid(Small Long as Int) encoded long. */ + public long readHybridUint64() { + if (LITTLE_ENDIAN) { + return _readHybridUint64OnLE(); + } else { + return _readHybridUint64OnBE(); + } + } + + @CodegenInvoke + // CHECKSTYLE.OFF:MethodName + public long _readHybridUint64OnLE() { + // CHECKSTYLE.ON:MethodName + final int readIdx = readerIndex; + int diff = size - readIdx; + if (diff < 4) { + streamReader.fillBuffer(4 - diff); + } + int i = UNSAFE.getInt(heapMemory, address + readIdx); + if ((i & 0b1) != 0b1) { + readerIndex = readIdx + 4; + return i >>> 1; // unsigned right shift + } + diff = size - readIdx; + if (diff < 9) { + streamReader.fillBuffer(9 - diff); + } + readerIndex = readIdx + 9; + return UNSAFE.getLong(heapMemory, address + readIdx + 1); + } + + @CodegenInvoke + // CHECKSTYLE.OFF:MethodName + public long _readHybridUint64OnBE() { + // CHECKSTYLE.ON:MethodName + final int readIdx = readerIndex; + int diff = size - readIdx; + if (diff < 4) { + streamReader.fillBuffer(4 - diff); + } + int i = Integer.reverseBytes(UNSAFE.getInt(heapMemory, address + readIdx)); + if ((i & 0b1) != 0b1) { + readerIndex = readIdx + 4; + return i >>> 1; // unsigned right shift + } + diff = size - readIdx; + if (diff < 9) { + streamReader.fillBuffer(9 - diff); + } + readerIndex = readIdx + 9; + return Long.reverseBytes(UNSAFE.getLong(heapMemory, address + readIdx + 1)); + } + @CodegenInvoke // CHECKSTYLE.OFF:MethodName - public long _readSliInt64OnLE() { + public long _readHybridInt64OnLE() { // CHECKSTYLE.ON:MethodName // Duplicate and manual inline for performance. // noinspection Duplicates @@ -1522,7 +1611,7 @@ public long _readSliInt64OnLE() { @CodegenInvoke // CHECKSTYLE.OFF:MethodName - public long _readSliInt64OnBE() { + public long _readHybridInt64OnBE() { // CHECKSTYLE.ON:MethodName // noinspection Duplicates final int readIdx = readerIndex; @@ -2065,11 +2154,11 @@ private long readVarUint64Slow() { } /** Reads the 1-9 byte int part of an aligned varint. */ - public int readAlignedVarUint() { + public int readAlignedVarUint32() { int readerIdx = readerIndex; // use subtract to avoid overflow if (readerIdx < size - 10) { - return slowReadAlignedVarUint(); + return slowReadAlignedVarUint32(); } long pos = address + readerIdx; long startPos = pos; @@ -2105,7 +2194,7 @@ public int readAlignedVarUint() { return result; } - public int slowReadAlignedVarUint() { + public int slowReadAlignedVarUint32() { int b = readByte(); // Mask first 6 bits, // bit 8 `set` indicates have next data bytes. @@ -2335,7 +2424,7 @@ public byte[] readBytesAndSize() { } public byte[] readBytesWithAlignedSize() { - final int numBytes = readAlignedVarUint(); + final int numBytes = readAlignedVarUint32(); int readerIdx = readerIndex; final byte[] arr = new byte[numBytes]; // use subtract to avoid overflow @@ -2392,7 +2481,7 @@ public char[] readCharsAndSize() { } public char[] readCharsWithAlignedSize() { - final int numBytes = readAlignedVarUint(); + final int numBytes = readAlignedVarUint32(); return readChars(numBytes); } diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/PrimitiveSerializers.java b/java/fory-core/src/main/java/org/apache/fory/serializer/PrimitiveSerializers.java index aecc0b56b8..1ef0ceb5ce 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/PrimitiveSerializers.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/PrimitiveSerializers.java @@ -199,9 +199,9 @@ public static Expression writeInt64( switch (longEncoding) { case LE_RAW_BYTES: return new Invoke(buffer, "writeInt64", v); - case SLI: - return new Invoke(buffer, ensureBounds ? "writeSliInt64" : "_unsafeWriteSliInt64", v); - case PVL: + case HYBRID: + return new Invoke(buffer, ensureBounds ? "writeHybridInt64" : "_unsafeWriteHybridInt64", v); + case VARINT64: return new Invoke(buffer, ensureBounds ? "writeVarInt64" : "_unsafeWriteVarInt64", v); default: throw new UnsupportedOperationException("Unsupported long encoding " + longEncoding); @@ -209,8 +209,8 @@ public static Expression writeInt64( } public static void writeInt64(MemoryBuffer buffer, long value, LongEncoding longEncoding) { - if (longEncoding == LongEncoding.SLI) { - buffer.writeSliInt64(value); + if (longEncoding == LongEncoding.HYBRID) { + buffer.writeHybridInt64(value); } else if (longEncoding == LongEncoding.LE_RAW_BYTES) { buffer.writeInt64(value); } else { @@ -219,8 +219,8 @@ public static void writeInt64(MemoryBuffer buffer, long value, LongEncoding long } public static long readInt64(MemoryBuffer buffer, LongEncoding longEncoding) { - if (longEncoding == LongEncoding.SLI) { - return buffer.readSliInt64(); + if (longEncoding == LongEncoding.HYBRID) { + return buffer.readHybridInt64(); } else if (longEncoding == LongEncoding.LE_RAW_BYTES) { return buffer.readInt64(); } else { @@ -236,9 +236,9 @@ public static String readLongFunc(LongEncoding longEncoding) { switch (longEncoding) { case LE_RAW_BYTES: return Platform.IS_LITTLE_ENDIAN ? "_readInt64OnLE" : "_readInt64OnBE"; - case SLI: - return Platform.IS_LITTLE_ENDIAN ? "_readSliInt64OnLE" : "_readSliInt64OnBE"; - case PVL: + case HYBRID: + return Platform.IS_LITTLE_ENDIAN ? "_readHybridInt64OnLE" : "_readHybridInt64OnBE"; + case VARINT64: return Platform.IS_LITTLE_ENDIAN ? "_readVarInt64OnLE" : "_readVarInt64OnBE"; default: throw new UnsupportedOperationException("Unsupported long encoding " + longEncoding); diff --git a/java/fory-core/src/test/java/org/apache/fory/StreamTest.java b/java/fory-core/src/test/java/org/apache/fory/StreamTest.java index b57ad362f2..e86c258186 100644 --- a/java/fory-core/src/test/java/org/apache/fory/StreamTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/StreamTest.java @@ -70,9 +70,9 @@ public void testBufferStream() { buffer0.writeVarUint64(i); buffer0.writeVarUint64(Long.MIN_VALUE); buffer0.writeVarUint64(Long.MAX_VALUE); - buffer0.writeSliInt64(i); - buffer0.writeSliInt64(Long.MIN_VALUE); - buffer0.writeSliInt64(Long.MAX_VALUE); + buffer0.writeHybridInt64(i); + buffer0.writeHybridInt64(Long.MIN_VALUE); + buffer0.writeHybridInt64(Long.MAX_VALUE); } byte[] bytes = buffer0.getBytes(0, buffer0.writerIndex()); ForyInputStream stream = @@ -105,9 +105,9 @@ public synchronized int read(byte[] b, int off, int len) { assertEquals(buffer.readVarUint64(), i); assertEquals(buffer.readVarUint64(), Long.MIN_VALUE); assertEquals(buffer.readVarUint64(), Long.MAX_VALUE); - assertEquals(buffer.readSliInt64(), i); - assertEquals(buffer.readSliInt64(), Long.MIN_VALUE); - assertEquals(buffer.readSliInt64(), Long.MAX_VALUE); + assertEquals(buffer.readHybridInt64(), i); + assertEquals(buffer.readHybridInt64(), Long.MIN_VALUE); + assertEquals(buffer.readHybridInt64(), Long.MAX_VALUE); } } diff --git a/java/fory-core/src/test/java/org/apache/fory/memory/MemoryBufferTest.java b/java/fory-core/src/test/java/org/apache/fory/memory/MemoryBufferTest.java index cc3fa8c50f..59d6a94f74 100644 --- a/java/fory-core/src/test/java/org/apache/fory/memory/MemoryBufferTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/memory/MemoryBufferTest.java @@ -499,27 +499,27 @@ private void checkVarUint64(MemoryBuffer buf, long value, int bytesWritten) { public void testWriteVarUint32Aligned() { MemoryBuffer buf = MemoryUtils.buffer(16); assertEquals(buf.writeVarUint32Aligned(1), 4); - assertEquals(buf.readAlignedVarUint(), 1); + assertEquals(buf.readAlignedVarUint32(), 1); assertEquals(buf.writeVarUint32Aligned(1 << 5), 4); - assertEquals(buf.readAlignedVarUint(), 1 << 5); + assertEquals(buf.readAlignedVarUint32(), 1 << 5); assertEquals(buf.writeVarUint32Aligned(1 << 10), 4); - assertEquals(buf.readAlignedVarUint(), 1 << 10); + assertEquals(buf.readAlignedVarUint32(), 1 << 10); assertEquals(buf.writeVarUint32Aligned(1 << 15), 4); - assertEquals(buf.readAlignedVarUint(), 1 << 15); + assertEquals(buf.readAlignedVarUint32(), 1 << 15); assertEquals(buf.writeVarUint32Aligned(1 << 20), 4); - assertEquals(buf.readAlignedVarUint(), 1 << 20); + assertEquals(buf.readAlignedVarUint32(), 1 << 20); assertEquals(buf.writeVarUint32Aligned(1 << 25), 8); - assertEquals(buf.readAlignedVarUint(), 1 << 25); + assertEquals(buf.readAlignedVarUint32(), 1 << 25); assertEquals(buf.writeVarUint32Aligned(1 << 30), 8); - assertEquals(buf.readAlignedVarUint(), 1 << 30); + assertEquals(buf.readAlignedVarUint32(), 1 << 30); assertEquals(buf.writeVarUint32Aligned(Integer.MAX_VALUE), 8); - assertEquals(buf.readAlignedVarUint(), Integer.MAX_VALUE); + assertEquals(buf.readAlignedVarUint32(), Integer.MAX_VALUE); buf.writeByte((byte) 1); // make address unaligned. buf.writeInt16((short) 1); // make address unaligned. assertEquals(buf.writeVarUint32Aligned(Integer.MAX_VALUE), 9); buf.readByte(); buf.readInt16(); - assertEquals(buf.readAlignedVarUint(), Integer.MAX_VALUE); + assertEquals(buf.readAlignedVarUint32(), Integer.MAX_VALUE); for (int i = 0; i < 32; i++) { MemoryBuffer buf1 = MemoryUtils.buffer(16); assertAligned(i, buf1); @@ -536,20 +536,20 @@ private void assertAligned(int i, MemoryBuffer buffer) { buffer.writeVarUint32Aligned(1 << j); assertEquals(buffer.writerIndex() % 4, 0); buffer.readByte(); - assertEquals(buffer.readAlignedVarUint(), 1 << j); + assertEquals(buffer.readAlignedVarUint32(), 1 << j); for (int k = 0; k < i % 4; k++) { buffer.writeByte((byte) i); // make address unaligned. buffer.writeVarUint32Aligned(1 << j); assertEquals(buffer.writerIndex() % 4, 0); buffer.readByte(); - assertEquals(buffer.readAlignedVarUint(), 1 << j); + assertEquals(buffer.readAlignedVarUint32(), 1 << j); } } buffer.writeByte((byte) i); // make address unaligned. buffer.writeVarUint32Aligned(Integer.MAX_VALUE); assertEquals(buffer.writerIndex() % 4, 0); buffer.readByte(); - assertEquals(buffer.readAlignedVarUint(), Integer.MAX_VALUE); + assertEquals(buffer.readAlignedVarUint32(), Integer.MAX_VALUE); } @Test @@ -561,43 +561,85 @@ public void testGetShortB() { } @Test - public void testWriteSliInt64() { + public void testWriteHybridInt64() { MemoryBuffer buf = MemoryUtils.buffer(8); - checkSliInt64(buf, -1, 4); + checkHybridInt64(buf, -1, 4); for (int i = 0; i < 10; i++) { for (int j = 0; j < i; j++) { - checkSliInt64(buf(i), -1, 4); - checkSliInt64(buf(i), 1, 4); - checkSliInt64(buf(i), 1L << 6, 4); - checkSliInt64(buf(i), 1L << 7, 4); - checkSliInt64(buf(i), -(2 << 5), 4); - checkSliInt64(buf(i), -(2 << 6), 4); - checkSliInt64(buf(i), 1L << 28, 4); - checkSliInt64(buf(i), Integer.MAX_VALUE / 2, 4); - checkSliInt64(buf(i), Integer.MIN_VALUE / 2, 4); - checkSliInt64(buf(i), -1L << 30, 4); - checkSliInt64(buf(i), 1L << 30, 9); - checkSliInt64(buf(i), Integer.MAX_VALUE, 9); - checkSliInt64(buf(i), Integer.MIN_VALUE, 9); - checkSliInt64(buf(i), -1L << 31, 9); - checkSliInt64(buf(i), 1L << 31, 9); - checkSliInt64(buf(i), -1L << 32, 9); - checkSliInt64(buf(i), 1L << 32, 9); - checkSliInt64(buf(i), Long.MAX_VALUE, 9); - checkSliInt64(buf(i), Long.MIN_VALUE, 9); + checkHybridInt64(buf(i), -1, 4); + checkHybridInt64(buf(i), 1, 4); + checkHybridInt64(buf(i), 1L << 6, 4); + checkHybridInt64(buf(i), 1L << 7, 4); + checkHybridInt64(buf(i), -(2 << 5), 4); + checkHybridInt64(buf(i), -(2 << 6), 4); + checkHybridInt64(buf(i), 1L << 28, 4); + checkHybridInt64(buf(i), Integer.MAX_VALUE / 2, 4); + checkHybridInt64(buf(i), Integer.MIN_VALUE / 2, 4); + checkHybridInt64(buf(i), -1L << 30, 4); + checkHybridInt64(buf(i), 1L << 30, 9); + checkHybridInt64(buf(i), Integer.MAX_VALUE, 9); + checkHybridInt64(buf(i), Integer.MIN_VALUE, 9); + checkHybridInt64(buf(i), -1L << 31, 9); + checkHybridInt64(buf(i), 1L << 31, 9); + checkHybridInt64(buf(i), -1L << 32, 9); + checkHybridInt64(buf(i), 1L << 32, 9); + checkHybridInt64(buf(i), Long.MAX_VALUE, 9); + checkHybridInt64(buf(i), Long.MIN_VALUE, 9); } } } - private void checkSliInt64(MemoryBuffer buf, long value, int bytesWritten) { + private void checkHybridInt64(MemoryBuffer buf, long value, int bytesWritten) { int readerIndex = buf.readerIndex(); assertEquals(buf.writerIndex(), readerIndex); - int actualBytesWritten = buf.writeSliInt64(value); + int actualBytesWritten = buf.writeHybridInt64(value); assertEquals(actualBytesWritten, bytesWritten); - long varLong = buf.readSliInt64(); + long varLong = buf.readHybridInt64(); assertEquals(buf.writerIndex(), buf.readerIndex()); assertEquals(value, varLong); - assertEquals(buf.slice(readerIndex, buf.readerIndex() - readerIndex).readSliInt64(), value); + assertEquals(buf.slice(readerIndex, buf.readerIndex() - readerIndex).readHybridInt64(), value); + } + + @Test + public void testWriteHybridUint64() { + MemoryBuffer buf = MemoryUtils.buffer(8); + checkHybridUint64(buf, 0, 4); + checkHybridUint64(buf, 1, 4); + for (int i = 0; i < 10; i++) { + for (int j = 0; j < i; j++) { + // Values in [0, Integer.MAX_VALUE] should use 4 bytes + checkHybridUint64(buf(i), 0, 4); + checkHybridUint64(buf(i), 1, 4); + checkHybridUint64(buf(i), 1L << 6, 4); + checkHybridUint64(buf(i), 1L << 7, 4); + checkHybridUint64(buf(i), 1L << 28, 4); + checkHybridUint64(buf(i), 1L << 30, 4); + checkHybridUint64(buf(i), Integer.MAX_VALUE, 4); + // Values > Integer.MAX_VALUE should use 9 bytes + checkHybridUint64(buf(i), (long) Integer.MAX_VALUE + 1, 9); + checkHybridUint64(buf(i), 1L << 31, 9); + checkHybridUint64(buf(i), 1L << 32, 9); + checkHybridUint64(buf(i), 1L << 62, 9); + checkHybridUint64(buf(i), Long.MAX_VALUE, 9); + // Negative values (large unsigned) should use 9 bytes + checkHybridUint64(buf(i), -1, 9); + checkHybridUint64(buf(i), -1L << 30, 9); + checkHybridUint64(buf(i), Integer.MIN_VALUE, 9); + checkHybridUint64(buf(i), Long.MIN_VALUE, 9); + } + } + } + + private void checkHybridUint64(MemoryBuffer buf, long value, int bytesWritten) { + int readerIndex = buf.readerIndex(); + assertEquals(buf.writerIndex(), readerIndex); + int actualBytesWritten = buf.writeHybridUint64(value); + assertEquals(actualBytesWritten, bytesWritten); + long varLong = buf.readHybridUint64(); + assertEquals(buf.writerIndex(), buf.readerIndex()); + assertEquals(value, varLong); + assertEquals( + buf.slice(readerIndex, buf.readerIndex() - readerIndex).readHybridUint64(), value); } @Test diff --git a/java/fory-core/src/test/java/org/apache/fory/serializer/PrimitiveSerializersTest.java b/java/fory-core/src/test/java/org/apache/fory/serializer/PrimitiveSerializersTest.java index 7304783d04..1371358411 100644 --- a/java/fory-core/src/test/java/org/apache/fory/serializer/PrimitiveSerializersTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/serializer/PrimitiveSerializersTest.java @@ -106,9 +106,9 @@ public void testPrimitiveStruct(boolean compressNumber, boolean codegen) { .withCodegen(codegen) .requireClassRegistration(false); serDeCheck( - builder.withNumberCompressed(true).withLongCompressed(LongEncoding.PVL).build(), struct); + builder.withNumberCompressed(true).withLongCompressed(LongEncoding.VARINT64).build(), struct); serDeCheck( - builder.withNumberCompressed(true).withLongCompressed(LongEncoding.SLI).build(), struct); + builder.withNumberCompressed(true).withLongCompressed(LongEncoding.HYBRID).build(), struct); } else { Fory fory = Fory.builder() From d0eed1337c6eb414d41b8ce37c1988c4b0236f00 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Mon, 5 Jan 2026 14:27:14 +0800 Subject: [PATCH 03/44] rename _util to buffer --- AGENTS.md | 2 +- BUILD | 18 +++++++++--------- python/pyfory/__init__.py | 2 +- python/pyfory/{_util.pxd => buffer.pxd} | 0 python/pyfory/buffer.py | 18 ------------------ python/pyfory/{_util.pyx => buffer.pyx} | 0 python/pyfory/format/row.pxi | 2 +- python/pyfory/meta/typedef.py | 2 +- python/pyfory/meta/typedef_decoder.py | 2 +- python/pyfory/meta/typedef_encoder.py | 2 +- python/pyfory/serialization.pyx | 4 ++-- python/pyfory/tests/test_typedef_encoding.py | 2 +- python/pyfory/type_util.py | 2 +- python/pyfory/utils.py | 2 +- rust/tests/tests/test_cross_language.rs | 4 ++-- 15 files changed, 22 insertions(+), 40 deletions(-) rename python/pyfory/{_util.pxd => buffer.pxd} (100%) delete mode 100644 python/pyfory/buffer.py rename python/pyfory/{_util.pyx => buffer.pyx} (100%) diff --git a/AGENTS.md b/AGENTS.md index 9202efd6a8..ae5a7d412f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -450,7 +450,7 @@ Code structure: - `python/pyfory/includes`: Cython headers for `c++` functions and classes. - `python/pyfory/resolver.py`: resolving shared/circular references when ref tracking is enabled in pure python mode - `python/pyfory/format`: Fory row format encoding and decoding, arrow columnar format interoperation -- `python/pyfory/_util.pyx`: Buffer for reading/writing data, string utilities. Used by `serialization.pyx` and `python/pyfory/format` at the same time. +- `python/pyfory/buffer.pyx`: Buffer for reading/writing data, string utilities. Used by `serialization.pyx` and `python/pyfory/format` at the same time. #### Go diff --git a/BUILD b/BUILD index abdfdc250a..809727199e 100644 --- a/BUILD +++ b/BUILD @@ -20,11 +20,11 @@ load("@hedron_compile_commands//:refresh_compile_commands.bzl", "refresh_compile pyx_library( - name = "_util", + name = "buffer", srcs = glob([ "python/pyfory/includes/*.pxd", - "python/pyfory/_util.pxd", - "python/pyfory/_util.pyx", + "python/pyfory/buffer.pxd", + "python/pyfory/buffer.pyx", "python/pyfory/__init__.py", ]), cc_kwargs = dict( @@ -54,7 +54,7 @@ pyx_library( name = "serialization", srcs = glob([ "python/pyfory/includes/*.pxd", - "python/pyfory/_util.pxd", + "python/pyfory/buffer.pxd", "python/pyfory/serialization.pyx", "python/pyfory/*.pxi", "python/pyfory/__init__.py", @@ -76,7 +76,7 @@ pyx_library( [ "python/pyfory/__init__.py", "python/pyfory/includes/*.pxd", - "python/pyfory/_util.pxd", + "python/pyfory/buffer.pxd", "python/pyfory/*.pxi", "python/pyfory/format/_format.pyx", "python/pyfory/format/__init__.py", @@ -95,7 +95,7 @@ pyx_library( genrule( name = "cp_fory_so", srcs = [ - ":python/pyfory/_util.so", + ":python/pyfory/buffer.so", ":python/pyfory/lib/mmh3/mmh3.so", ":python/pyfory/format/_format.so", ":python/pyfory/serialization.so", @@ -110,12 +110,12 @@ genrule( u_name=`uname -s` if [ "$${u_name: 0: 4}" == "MING" ] || [ "$${u_name: 0: 4}" == "MSYS" ] then - cp -f $(location python/pyfory/_util.so) "$$WORK_DIR/python/pyfory/_util.pyd" + cp -f $(location python/pyfory/buffer.so) "$$WORK_DIR/python/pyfory/buffer.pyd" cp -f $(location python/pyfory/lib/mmh3/mmh3.so) "$$WORK_DIR/python/pyfory/lib/mmh3/mmh3.pyd" cp -f $(location python/pyfory/format/_format.so) "$$WORK_DIR/python/pyfory/format/_format.pyd" cp -f $(location python/pyfory/serialization.so) "$$WORK_DIR/python/pyfory/serialization.pyd" else - cp -f $(location python/pyfory/_util.so) "$$WORK_DIR/python/pyfory" + cp -f $(location python/pyfory/buffer.so) "$$WORK_DIR/python/pyfory" cp -f $(location python/pyfory/lib/mmh3/mmh3.so) "$$WORK_DIR/python/pyfory/lib/mmh3" cp -f $(location python/pyfory/format/_format.so) "$$WORK_DIR/python/pyfory/format" cp -f $(location python/pyfory/serialization.so) "$$WORK_DIR/python/pyfory" @@ -131,4 +131,4 @@ refresh_compile_commands( name = "refresh_compile_commands", exclude_headers = "all", exclude_external_sources = True, -) \ No newline at end of file +) diff --git a/python/pyfory/__init__.py b/python/pyfory/__init__.py index 3a45f38abc..ccf12f2cad 100644 --- a/python/pyfory/__init__.py +++ b/python/pyfory/__init__.py @@ -85,7 +85,7 @@ dataslots, ) from pyfory.policy import DeserializationPolicy # noqa: F401 # pylint: disable=unused-import -from pyfory._util import Buffer # noqa: F401 # pylint: disable=unused-import +from pyfory.buffer import Buffer # noqa: F401 # pylint: disable=unused-import __version__ = "0.14.1.dev" diff --git a/python/pyfory/_util.pxd b/python/pyfory/buffer.pxd similarity index 100% rename from python/pyfory/_util.pxd rename to python/pyfory/buffer.pxd diff --git a/python/pyfory/buffer.py b/python/pyfory/buffer.py deleted file mode 100644 index 921e8a9dc7..0000000000 --- a/python/pyfory/buffer.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from pyfory._util import Buffer # noqa: F401 # pylint: disable=unused-import diff --git a/python/pyfory/_util.pyx b/python/pyfory/buffer.pyx similarity index 100% rename from python/pyfory/_util.pyx rename to python/pyfory/buffer.pyx diff --git a/python/pyfory/format/row.pxi b/python/pyfory/format/row.pxi index ca3ed5692d..ec19a65a8c 100644 --- a/python/pyfory/format/row.pxi +++ b/python/pyfory/format/row.pxi @@ -23,7 +23,7 @@ from pyfory.includes.libformat cimport ( CGetter, CArrayData, CMapData, CRow, CTypeId, CSchema, CListType, CMapType, fory_schema ) -from pyfory._util cimport Buffer +from pyfory.buffer cimport Buffer from libcpp.memory cimport shared_ptr from libcpp.vector cimport vector from datetime import datetime, date diff --git a/python/pyfory/meta/typedef.py b/python/pyfory/meta/typedef.py index e5853b241f..1218a039d2 100644 --- a/python/pyfory/meta/typedef.py +++ b/python/pyfory/meta/typedef.py @@ -19,7 +19,7 @@ import typing from typing import List from pyfory.types import TypeId, is_primitive_type, is_polymorphic_type -from pyfory._util import Buffer +from pyfory.buffer import Buffer from pyfory.type_util import infer_field from pyfory.meta.metastring import Encoding from pyfory.type_util import infer_field_types diff --git a/python/pyfory/meta/typedef_decoder.py b/python/pyfory/meta/typedef_decoder.py index 3c84e72fc0..9838cbfd33 100644 --- a/python/pyfory/meta/typedef_decoder.py +++ b/python/pyfory/meta/typedef_decoder.py @@ -23,7 +23,7 @@ from dataclasses import make_dataclass from typing import List, Any -from pyfory._util import Buffer +from pyfory.buffer import Buffer from pyfory.meta.typedef import TypeDef, FieldInfo, FieldType from pyfory.meta.typedef import ( SMALL_NUM_FIELDS_THRESHOLD, diff --git a/python/pyfory/meta/typedef_encoder.py b/python/pyfory/meta/typedef_encoder.py index ae0e56bb65..7d09756224 100644 --- a/python/pyfory/meta/typedef_encoder.py +++ b/python/pyfory/meta/typedef_encoder.py @@ -37,7 +37,7 @@ ) from pyfory.meta.metastring import MetaStringEncoder -from pyfory._util import Buffer +from pyfory.buffer import Buffer from pyfory.lib.mmh3 import hash_buffer diff --git a/python/pyfory/serialization.pyx b/python/pyfory/serialization.pyx index 6474a5bbb0..ec689b8d47 100644 --- a/python/pyfory/serialization.pyx +++ b/python/pyfory/serialization.pyx @@ -27,7 +27,7 @@ import time import warnings from typing import TypeVar, Union, Iterable -from pyfory._util import get_bit, set_bit, clear_bit +from pyfory.buffer import get_bit, set_bit, clear_bit from pyfory import _fory as fmod from pyfory._fory import Language from pyfory._fory import _ENABLE_TYPE_REGISTRATION_FORCIBLY @@ -50,7 +50,7 @@ from cpython.tuple cimport PyTuple_New, PyTuple_SET_ITEM from libcpp cimport bool as c_bool from libcpp.utility cimport pair from cython.operator cimport dereference as deref -from pyfory._util cimport Buffer +from pyfory.buffer cimport Buffer from pyfory.includes.libabsl cimport flat_hash_map from pyfory.meta.metastring import MetaStringDecoder diff --git a/python/pyfory/tests/test_typedef_encoding.py b/python/pyfory/tests/test_typedef_encoding.py index 1aea0d197f..7a44b43523 100644 --- a/python/pyfory/tests/test_typedef_encoding.py +++ b/python/pyfory/tests/test_typedef_encoding.py @@ -21,7 +21,7 @@ from dataclasses import dataclass from typing import List, Dict -from pyfory._util import Buffer +from pyfory.buffer import Buffer from pyfory.meta.typedef import ( TypeDef, FieldInfo, diff --git a/python/pyfory/type_util.py b/python/pyfory/type_util.py index 93983a1bf7..da8d6472c1 100644 --- a/python/pyfory/type_util.py +++ b/python/pyfory/type_util.py @@ -42,7 +42,7 @@ def record_class_factory(cls_name, field_names): >>> rex Dog(name='Rex', weight=32, owner='Bob') >>> Dog.__mro__ - (, ) + (, ) The factory also accepts a list or tuple of identifiers: diff --git a/python/pyfory/utils.py b/python/pyfory/utils.py index ea0865c9b7..fde19c26f0 100644 --- a/python/pyfory/utils.py +++ b/python/pyfory/utils.py @@ -20,7 +20,7 @@ import sys from typing import Dict, Callable -from pyfory._util import get_bit, set_bit, clear_bit, set_bit_to +from pyfory.buffer import get_bit, set_bit, clear_bit, set_bit_to # This method is derived from https://github.com/mars-project/mars/blob/c36c53fa22e10ef9477d9c454401a2f281375f31/mars/utils.py. diff --git a/rust/tests/tests/test_cross_language.rs b/rust/tests/tests/test_cross_language.rs index 54fb38637e..5f99cc75a9 100644 --- a/rust/tests/tests/test_cross_language.rs +++ b/rust/tests/tests/test_cross_language.rs @@ -74,7 +74,7 @@ fn test_buffer() { let data_file_path = get_data_file(); let bytes = fs::read(&data_file_path).unwrap(); let mut reader = Reader::new(bytes.as_slice()); - assert_eq!(reader.read_u8().unwrap(), 1); + assert!(reader.read_bool().unwrap()); assert_eq!(reader.read_i8().unwrap(), i8::MAX); assert_eq!(reader.read_i16().unwrap(), i16::MAX); assert_eq!(reader.read_i32().unwrap(), i32::MAX); @@ -88,7 +88,7 @@ fn test_buffer() { let mut buffer = vec![]; let mut writer = Writer::from_buffer(&mut buffer); - writer.write_u8(1); + writer.write_bool(true); writer.write_i8(i8::MAX); writer.write_i16(i16::MAX); writer.write_i32(i32::MAX); From a9701f7b880bf89c5df71134b0e9633c59d47aa8 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Mon, 5 Jan 2026 14:28:11 +0800 Subject: [PATCH 04/44] rename _registry to registry --- AGENTS.md | 2 +- python/pyfory/__init__.py | 2 +- python/pyfory/_fory.py | 2 +- python/pyfory/{_registry.py => registry.py} | 0 python/pyfory/serialization.pyx | 2 +- 5 files changed, 4 insertions(+), 4 deletions(-) rename python/pyfory/{_registry.py => registry.py} (100%) diff --git a/AGENTS.md b/AGENTS.md index ae5a7d412f..f1bce55b11 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -445,7 +445,7 @@ Code structure: - `python/pyfory/serialization.pyx`: Core serialization logic and entry point for cython mode based on `xlang serialization format` - `python/pyfory/_fory.py`: Serialization entry point for pure python mode based on `xlang serialization format` -- `python/pyfory/_registry.py`: Type registry, resolution and serializer dispatch for pure python mode, which is also used by cython mode. Cython mode use a cache to reduce invocations to this module. +- `python/pyfory/registry.py`: Type registry, resolution and serializer dispatch for pure python mode, which is also used by cython mode. Cython mode use a cache to reduce invocations to this module. - `python/pyfory/serializer.py`: Serializers for non-internal types - `python/pyfory/includes`: Cython headers for `c++` functions and classes. - `python/pyfory/resolver.py`: resolving shared/circular references when ref tracking is enabled in pure python mode diff --git a/python/pyfory/__init__.py b/python/pyfory/__init__.py index ccf12f2cad..a3d63b245a 100644 --- a/python/pyfory/__init__.py +++ b/python/pyfory/__init__.py @@ -30,7 +30,7 @@ except ImportError: ENABLE_FORY_CYTHON_SERIALIZATION = False -from pyfory._registry import TypeInfo +from pyfory.registry import TypeInfo if ENABLE_FORY_CYTHON_SERIALIZATION: from pyfory.serialization import Fory, TypeInfo # noqa: F401,F811 diff --git a/python/pyfory/_fory.py b/python/pyfory/_fory.py index 98bfae1119..cfc579ed61 100644 --- a/python/pyfory/_fory.py +++ b/python/pyfory/_fory.py @@ -242,7 +242,7 @@ def __init__( self.compatible = compatible self.field_nullable = field_nullable if self.is_py else False from pyfory.serialization import MetaStringResolver, SerializationContext - from pyfory._registry import TypeResolver + from pyfory.registry import TypeResolver self.metastring_resolver = MetaStringResolver() self.type_resolver = TypeResolver(self, meta_share=compatible, meta_compressor=meta_compressor) diff --git a/python/pyfory/_registry.py b/python/pyfory/registry.py similarity index 100% rename from python/pyfory/_registry.py rename to python/pyfory/registry.py diff --git a/python/pyfory/serialization.pyx b/python/pyfory/serialization.pyx index ec689b8d47..76ff98aef2 100644 --- a/python/pyfory/serialization.pyx +++ b/python/pyfory/serialization.pyx @@ -514,7 +514,7 @@ cdef class TypeResolver: self.fory = fory self.metastring_resolver = fory.metastring_resolver self.meta_share = meta_share - from pyfory._registry import TypeResolver + from pyfory.registry import TypeResolver self._resolver = TypeResolver(fory, meta_share=meta_share, meta_compressor=meta_compressor) def initialize(self): From ea6401e56941e6eca3830610d04318c965abf199 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Tue, 6 Jan 2026 23:10:44 +0800 Subject: [PATCH 05/44] support unsigned types and refactor java type system --- .../src/main/java/org/apache/fory/Fory.java | 130 +-- .../org/apache/fory/annotation/Int32Type.java | 5 + .../org/apache/fory/annotation/Int64Type.java | 7 + .../apache/fory/annotation/Uint16Type.java | 3 + .../apache/fory/annotation/Uint32Type.java | 5 + .../apache/fory/annotation/Uint64Type.java | 7 + .../org/apache/fory/annotation/Uint8Type.java | 3 + .../fory/builder/BaseObjectCodecBuilder.java | 179 ++- .../org/apache/fory/builder/CodecBuilder.java | 32 +- .../fory/builder/ObjectCodecBuilder.java | 271 +++-- .../java/org/apache/fory/config/Config.java | 2 +- .../org/apache/fory/config/ForyBuilder.java | 9 +- .../org/apache/fory/config/LongEncoding.java | 13 +- .../org/apache/fory/memory/MemoryBuffer.java | 48 +- .../java/org/apache/fory/meta/ClassDef.java | 3 + .../org/apache/fory/meta/ClassDefDecoder.java | 10 +- .../org/apache/fory/meta/ClassDefEncoder.java | 12 +- .../java/org/apache/fory/meta/FieldTypes.java | 182 ++- .../org/apache/fory/meta/TypeExtMeta.java | 17 +- .../java/org/apache/fory/reflect/TypeRef.java | 23 +- .../apache/fory/resolver/ClassResolver.java | 146 ++- .../apache/fory/resolver/TypeResolver.java | 7 +- .../serializer/AbstractObjectSerializer.java | 1017 +++++++++-------- .../apache/fory/serializer/FieldGroups.java | 45 +- .../serializer/MetaSharedLayerSerializer.java | 86 +- .../fory/serializer/MetaSharedSerializer.java | 87 +- .../NonexistentClassSerializers.java | 5 +- .../fory/serializer/ObjectSerializer.java | 25 +- .../fory/serializer/PrimitiveSerializers.java | 23 +- .../apache/fory/serializer/Serializers.java | 51 +- .../java/org/apache/fory/type/Descriptor.java | 66 +- .../java/org/apache/fory/type/DispatchId.java | 134 +++ .../apache/fory/type/FinalObjectTypeStub.java | 32 - .../apache/fory/type/TypeAnnotationUtils.java | 70 ++ .../java/org/apache/fory/type/TypeUtils.java | 15 +- .../main/java/org/apache/fory/type/Types.java | 155 ++- .../apache/fory/util/DefaultValueUtils.java | 56 +- .../test/java/org/apache/fory/StreamTest.java | 12 +- .../apache/fory/memory/MemoryBufferTest.java | 15 +- .../org/apache/fory/meta/ClassDefTest.java | 9 +- .../serializer/PrimitiveSerializersTest.java | 6 +- 41 files changed, 1878 insertions(+), 1145 deletions(-) create mode 100644 java/fory-core/src/main/java/org/apache/fory/annotation/Int32Type.java create mode 100644 java/fory-core/src/main/java/org/apache/fory/annotation/Int64Type.java create mode 100644 java/fory-core/src/main/java/org/apache/fory/annotation/Uint16Type.java create mode 100644 java/fory-core/src/main/java/org/apache/fory/annotation/Uint32Type.java create mode 100644 java/fory-core/src/main/java/org/apache/fory/annotation/Uint64Type.java create mode 100644 java/fory-core/src/main/java/org/apache/fory/annotation/Uint8Type.java create mode 100644 java/fory-core/src/main/java/org/apache/fory/type/DispatchId.java delete mode 100644 java/fory-core/src/main/java/org/apache/fory/type/FinalObjectTypeStub.java create mode 100644 java/fory-core/src/main/java/org/apache/fory/type/TypeAnnotationUtils.java diff --git a/java/fory-core/src/main/java/org/apache/fory/Fory.java b/java/fory-core/src/main/java/org/apache/fory/Fory.java index 5f9dea9fd4..7e7c208407 100644 --- a/java/fory-core/src/main/java/org/apache/fory/Fory.java +++ b/java/fory-core/src/main/java/org/apache/fory/Fory.java @@ -580,12 +580,12 @@ public void xwriteData(MemoryBuffer buffer, ClassInfo classInfo, Object obj) { buffer.writeInt16((Short) obj); break; case Types.INT32: - case Types.VAR32: + case Types.VARINT32: buffer.writeVarInt32((Integer) obj); break; case Types.INT64: - case Types.VAR64: - case Types.H64: + case Types.VARINT64: + case Types.TAGGED_INT64: buffer.writeVarInt64((Long) obj); break; case Types.FLOAT32: @@ -605,35 +605,35 @@ public void xwriteData(MemoryBuffer buffer, ClassInfo classInfo, Object obj) { /** Write not null data to buffer. */ private void writeData(MemoryBuffer buffer, ClassInfo classInfo, Object obj) { switch (classInfo.getClassId()) { - case ClassResolver.BOOLEAN_CLASS_ID: + case Types.BOOL: buffer.writeBoolean((Boolean) obj); break; - case ClassResolver.BYTE_CLASS_ID: + case Types.INT8: buffer.writeByte((Byte) obj); break; - case ClassResolver.CHAR_CLASS_ID: + case ClassResolver.CHAR_ID: buffer.writeChar((Character) obj); break; - case ClassResolver.SHORT_CLASS_ID: + case Types.INT16: buffer.writeInt16((Short) obj); break; - case ClassResolver.INTEGER_CLASS_ID: + case Types.INT32: if (compressInt) { buffer.writeVarInt32((Integer) obj); } else { buffer.writeInt32((Integer) obj); } break; - case ClassResolver.FLOAT_CLASS_ID: + case Types.FLOAT32: buffer.writeFloat32((Float) obj); break; - case ClassResolver.LONG_CLASS_ID: + case Types.INT64: LongSerializer.writeInt64(buffer, (Long) obj, longEncoding); break; - case ClassResolver.DOUBLE_CLASS_ID: + case Types.FLOAT64: buffer.writeFloat64((Double) obj); break; - case ClassResolver.STRING_CLASS_ID: + case Types.STRING: stringSerializer.writeJavaString(buffer, (String) obj); break; default: @@ -1013,27 +1013,27 @@ public Object readData(MemoryBuffer buffer, ClassInfo classInfo) { private Object readDataInternal(MemoryBuffer buffer, ClassInfo classInfo) { switch (classInfo.getClassId()) { - case ClassResolver.BOOLEAN_CLASS_ID: + case Types.BOOL: return buffer.readBoolean(); - case ClassResolver.BYTE_CLASS_ID: + case Types.INT8: return buffer.readByte(); - case ClassResolver.CHAR_CLASS_ID: + case ClassResolver.CHAR_ID: return buffer.readChar(); - case ClassResolver.SHORT_CLASS_ID: + case Types.INT16: return buffer.readInt16(); - case ClassResolver.INTEGER_CLASS_ID: + case Types.INT32: if (compressInt) { return buffer.readVarInt32(); } else { return buffer.readInt32(); } - case ClassResolver.FLOAT_CLASS_ID: + case Types.FLOAT32: return buffer.readFloat32(); - case ClassResolver.LONG_CLASS_ID: + case Types.INT64: return LongSerializer.readInt64(buffer, longEncoding); - case ClassResolver.DOUBLE_CLASS_ID: + case Types.FLOAT64: return buffer.readFloat64(); - case ClassResolver.STRING_CLASS_ID: + case Types.STRING: return stringSerializer.readJavaString(buffer); default: incReadDepth(); @@ -1110,13 +1110,13 @@ public Object xreadNonRef(MemoryBuffer buffer, ClassInfo classInfo) { case Types.INT16: return buffer.readInt16(); case Types.INT32: - case Types.VAR32: + case Types.VARINT32: // TODO(chaokunyang) support other encoding return buffer.readVarInt32(); case Types.INT64: - case Types.VAR64: + case Types.VARINT64: // TODO(chaokunyang) support other encoding - case Types.H64: + case Types.TAGGED_INT64: return buffer.readVarInt64(); case Types.FLOAT32: return buffer.readFloat32(); @@ -1399,55 +1399,47 @@ public T copyObject(T obj) { Object copy; ClassInfo classInfo = classResolver.getOrUpdateClassInfo(obj.getClass()); switch (classInfo.getClassId()) { - case ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID: - case ClassResolver.PRIMITIVE_BYTE_CLASS_ID: - case ClassResolver.PRIMITIVE_CHAR_CLASS_ID: - case ClassResolver.PRIMITIVE_SHORT_CLASS_ID: - case ClassResolver.PRIMITIVE_INT_CLASS_ID: - case ClassResolver.PRIMITIVE_FLOAT_CLASS_ID: - case ClassResolver.PRIMITIVE_LONG_CLASS_ID: - case ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID: - case ClassResolver.BOOLEAN_CLASS_ID: - case ClassResolver.BYTE_CLASS_ID: - case ClassResolver.CHAR_CLASS_ID: - case ClassResolver.SHORT_CLASS_ID: - case ClassResolver.INTEGER_CLASS_ID: - case ClassResolver.FLOAT_CLASS_ID: - case ClassResolver.LONG_CLASS_ID: - case ClassResolver.DOUBLE_CLASS_ID: - case ClassResolver.STRING_CLASS_ID: + case Types.BOOL: + case Types.INT8: + case ClassResolver.CHAR_ID: + case Types.INT16: + case Types.INT32: + case Types.FLOAT32: + case Types.INT64: + case Types.FLOAT64: + case Types.STRING: return obj; - case ClassResolver.PRIMITIVE_BOOLEAN_ARRAY_CLASS_ID: + case ClassResolver.PRIMITIVE_BOOLEAN_ARRAY_ID: boolean[] boolArr = (boolean[]) obj; return (T) Arrays.copyOf(boolArr, boolArr.length); - case ClassResolver.PRIMITIVE_BYTE_ARRAY_CLASS_ID: + case ClassResolver.PRIMITIVE_BYTE_ARRAY_ID: byte[] byteArr = (byte[]) obj; return (T) Arrays.copyOf(byteArr, byteArr.length); - case ClassResolver.PRIMITIVE_CHAR_ARRAY_CLASS_ID: + case ClassResolver.PRIMITIVE_CHAR_ARRAY_ID: char[] charArr = (char[]) obj; return (T) Arrays.copyOf(charArr, charArr.length); - case ClassResolver.PRIMITIVE_SHORT_ARRAY_CLASS_ID: + case ClassResolver.PRIMITIVE_SHORT_ARRAY_ID: short[] shortArr = (short[]) obj; return (T) Arrays.copyOf(shortArr, shortArr.length); - case ClassResolver.PRIMITIVE_INT_ARRAY_CLASS_ID: + case ClassResolver.PRIMITIVE_INT_ARRAY_ID: int[] intArr = (int[]) obj; return (T) Arrays.copyOf(intArr, intArr.length); - case ClassResolver.PRIMITIVE_FLOAT_ARRAY_CLASS_ID: + case ClassResolver.PRIMITIVE_FLOAT_ARRAY_ID: float[] floatArr = (float[]) obj; return (T) Arrays.copyOf(floatArr, floatArr.length); - case ClassResolver.PRIMITIVE_LONG_ARRAY_CLASS_ID: + case ClassResolver.PRIMITIVE_LONG_ARRAY_ID: long[] longArr = (long[]) obj; return (T) Arrays.copyOf(longArr, longArr.length); - case ClassResolver.PRIMITIVE_DOUBLE_ARRAY_CLASS_ID: + case ClassResolver.PRIMITIVE_DOUBLE_ARRAY_ID: double[] doubleArr = (double[]) obj; return (T) Arrays.copyOf(doubleArr, doubleArr.length); - case ClassResolver.STRING_ARRAY_CLASS_ID: + case ClassResolver.STRING_ARRAY_ID: String[] stringArr = (String[]) obj; return (T) Arrays.copyOf(stringArr, stringArr.length); - case ClassResolver.ARRAYLIST_CLASS_ID: + case ClassResolver.ARRAYLIST_ID: copy = arrayListSerializer.copy((ArrayList) obj); break; - case ClassResolver.HASHMAP_CLASS_ID: + case ClassResolver.HASHMAP_ID: copy = hashMapSerializer.copy((HashMap) obj); break; // todo: add fastpath for other types. @@ -1463,23 +1455,23 @@ public T copyObject(T obj, int classId) { } // Fast path to avoid cost of query class map. switch (classId) { - case ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID: - case ClassResolver.PRIMITIVE_BYTE_CLASS_ID: - case ClassResolver.PRIMITIVE_CHAR_CLASS_ID: - case ClassResolver.PRIMITIVE_SHORT_CLASS_ID: - case ClassResolver.PRIMITIVE_INT_CLASS_ID: - case ClassResolver.PRIMITIVE_FLOAT_CLASS_ID: - case ClassResolver.PRIMITIVE_LONG_CLASS_ID: - case ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID: - case ClassResolver.BOOLEAN_CLASS_ID: - case ClassResolver.BYTE_CLASS_ID: - case ClassResolver.CHAR_CLASS_ID: - case ClassResolver.SHORT_CLASS_ID: - case ClassResolver.INTEGER_CLASS_ID: - case ClassResolver.FLOAT_CLASS_ID: - case ClassResolver.LONG_CLASS_ID: - case ClassResolver.DOUBLE_CLASS_ID: - case ClassResolver.STRING_CLASS_ID: + case ClassResolver.PRIMITIVE_BOOL_ID: + case ClassResolver.PRIMITIVE_INT8_ID: + case ClassResolver.PRIMITIVE_CHAR_ID: + case ClassResolver.PRIMITIVE_INT16_ID: + case ClassResolver.PRIMITIVE_INT32_ID: + case ClassResolver.PRIMITIVE_FLOAT32_ID: + case ClassResolver.PRIMITIVE_INT64_ID: + case ClassResolver.PRIMITIVE_FLOAT64_ID: + case Types.BOOL: + case Types.INT8: + case ClassResolver.CHAR_ID: + case Types.INT16: + case Types.INT32: + case Types.FLOAT32: + case Types.INT64: + case Types.FLOAT64: + case Types.STRING: return obj; default: return copyObject(obj, classResolver.getOrUpdateClassInfo(obj.getClass()).getSerializer()); diff --git a/java/fory-core/src/main/java/org/apache/fory/annotation/Int32Type.java b/java/fory-core/src/main/java/org/apache/fory/annotation/Int32Type.java new file mode 100644 index 0000000000..e0adcfc14a --- /dev/null +++ b/java/fory-core/src/main/java/org/apache/fory/annotation/Int32Type.java @@ -0,0 +1,5 @@ +package org.apache.fory.annotation; + +public @interface Int32Type { + boolean compress() default true; +} diff --git a/java/fory-core/src/main/java/org/apache/fory/annotation/Int64Type.java b/java/fory-core/src/main/java/org/apache/fory/annotation/Int64Type.java new file mode 100644 index 0000000000..60772286d8 --- /dev/null +++ b/java/fory-core/src/main/java/org/apache/fory/annotation/Int64Type.java @@ -0,0 +1,7 @@ +package org.apache.fory.annotation; + +import org.apache.fory.config.LongEncoding; + +public @interface Int64Type { + LongEncoding encoding() default LongEncoding.VARINT64; +} diff --git a/java/fory-core/src/main/java/org/apache/fory/annotation/Uint16Type.java b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint16Type.java new file mode 100644 index 0000000000..1e6aacc074 --- /dev/null +++ b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint16Type.java @@ -0,0 +1,3 @@ +package org.apache.fory.annotation; + +public @interface Uint16Type {} diff --git a/java/fory-core/src/main/java/org/apache/fory/annotation/Uint32Type.java b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint32Type.java new file mode 100644 index 0000000000..47953f2926 --- /dev/null +++ b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint32Type.java @@ -0,0 +1,5 @@ +package org.apache.fory.annotation; + +public @interface Uint32Type { + boolean compress() default true; +} diff --git a/java/fory-core/src/main/java/org/apache/fory/annotation/Uint64Type.java b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint64Type.java new file mode 100644 index 0000000000..12ed5482eb --- /dev/null +++ b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint64Type.java @@ -0,0 +1,7 @@ +package org.apache.fory.annotation; + +import org.apache.fory.config.LongEncoding; + +public @interface Uint64Type { + LongEncoding encoding() default LongEncoding.VARINT64; +} diff --git a/java/fory-core/src/main/java/org/apache/fory/annotation/Uint8Type.java b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint8Type.java new file mode 100644 index 0000000000..d22bccd585 --- /dev/null +++ b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint8Type.java @@ -0,0 +1,3 @@ +package org.apache.fory.annotation; + +public @interface Uint8Type {} diff --git a/java/fory-core/src/main/java/org/apache/fory/builder/BaseObjectCodecBuilder.java b/java/fory-core/src/main/java/org/apache/fory/builder/BaseObjectCodecBuilder.java index 7b7a04dd4e..1bca6a7681 100644 --- a/java/fory-core/src/main/java/org/apache/fory/builder/BaseObjectCodecBuilder.java +++ b/java/fory-core/src/main/java/org/apache/fory/builder/BaseObjectCodecBuilder.java @@ -19,6 +19,12 @@ package org.apache.fory.builder; +import static org.apache.fory.builder.CodecBuilder.readFloat32Func; +import static org.apache.fory.builder.CodecBuilder.readFloat64Func; +import static org.apache.fory.builder.CodecBuilder.readInt16Func; +import static org.apache.fory.builder.CodecBuilder.readIntFunc; +import static org.apache.fory.builder.CodecBuilder.readLongFunc; +import static org.apache.fory.builder.CodecBuilder.readVarInt32Func; import static org.apache.fory.codegen.CodeGenerator.getPackage; import static org.apache.fory.codegen.Expression.Invoke.inlineInvoke; import static org.apache.fory.codegen.Expression.Literal.ofInt; @@ -52,10 +58,17 @@ import static org.apache.fory.serializer.collection.MapFlags.TRACKING_VALUE_REF; import static org.apache.fory.serializer.collection.MapFlags.VALUE_DECL_TYPE; import static org.apache.fory.serializer.collection.MapLikeSerializer.MAX_CHUNK_SIZE; +import static org.apache.fory.type.TypeUtils.BOOLEAN_TYPE; +import static org.apache.fory.type.TypeUtils.BYTE_TYPE; +import static org.apache.fory.type.TypeUtils.CHAR_TYPE; import static org.apache.fory.type.TypeUtils.CLASS_TYPE; import static org.apache.fory.type.TypeUtils.COLLECTION_TYPE; +import static org.apache.fory.type.TypeUtils.DOUBLE_TYPE; +import static org.apache.fory.type.TypeUtils.FLOAT_TYPE; +import static org.apache.fory.type.TypeUtils.INT_TYPE; import static org.apache.fory.type.TypeUtils.ITERATOR_TYPE; import static org.apache.fory.type.TypeUtils.LIST_TYPE; +import static org.apache.fory.type.TypeUtils.LONG_TYPE; import static org.apache.fory.type.TypeUtils.MAP_ENTRY_TYPE; import static org.apache.fory.type.TypeUtils.MAP_TYPE; import static org.apache.fory.type.TypeUtils.OBJECT_TYPE; @@ -65,6 +78,7 @@ import static org.apache.fory.type.TypeUtils.PRIMITIVE_LONG_TYPE; import static org.apache.fory.type.TypeUtils.PRIMITIVE_VOID_TYPE; import static org.apache.fory.type.TypeUtils.SET_TYPE; +import static org.apache.fory.type.TypeUtils.SHORT_TYPE; import static org.apache.fory.type.TypeUtils.getRawType; import static org.apache.fory.type.TypeUtils.isBoxed; import static org.apache.fory.type.TypeUtils.isPrimitive; @@ -125,6 +139,7 @@ import org.apache.fory.serializer.collection.CollectionLikeSerializer; import org.apache.fory.serializer.collection.MapLikeSerializer; import org.apache.fory.type.Descriptor; +import org.apache.fory.type.DispatchId; import org.apache.fory.type.GenericType; import org.apache.fory.type.TypeUtils; import org.apache.fory.util.GraalvmSupport; @@ -164,6 +179,7 @@ public abstract class BaseObjectCodecBuilder extends CodecBuilder { protected LinkedList walkPath = new LinkedList<>(); protected final String writeMethodName; protected final String readMethodName; + private final Map descriptorDispatchId; public BaseObjectCodecBuilder(TypeRef beanType, Fory fory, Class parentSerializerClass) { super(new CodegenContext(), beanType); @@ -202,6 +218,7 @@ public BaseObjectCodecBuilder(TypeRef beanType, Fory fory, Class parentSer STRING_SERIALIZER_NAME, inlineInvoke(foryRef, "getStringSerializer", typeResolverType)); jitCallbackUpdateFields = new HashMap<>(); + descriptorDispatchId = new HashMap<>(); } // Must be static to be shared across the whole process life. @@ -378,8 +395,6 @@ protected Expression serializeFor( TypeRef typeRef, Expression serializer, boolean generateNewMethod) { - // access rawType without jit lock to reduce lock competition. - Class rawType = getRawType(typeRef); if (needWriteRef(typeRef)) { return new If( not(writeRefOrNull(buffer, inputObject)), @@ -411,47 +426,42 @@ protected Expression serializeField( if (useRefTracking) { return new If( not(writeRefOrNull(buffer, fieldValue)), - serializeForNotNullForField(fieldValue, buffer, descriptor, null, false)); + serializeForNotNullForField(fieldValue, buffer, descriptor, null)); } else { // if typeToken is not final, ref tracking of subclass will be ignored too. if (typeRef.isPrimitive()) { - return serializeForNotNullForField(fieldValue, buffer, descriptor, null, false); + return serializeForNotNullForField(fieldValue, buffer, descriptor, null); } if (nullable) { Expression action = new ListExpression( new Invoke(buffer, "writeByte", Literal.ofByte(Fory.NOT_NULL_VALUE_FLAG)), - serializeForNotNullForField(fieldValue, buffer, descriptor, null, false)); + serializeForNotNullForField(fieldValue, buffer, descriptor, null)); return new If( eqNull(fieldValue), new Invoke(buffer, "writeByte", Literal.ofByte(Fory.NULL_FLAG)), action); } else { - return serializeForNotNullForField(fieldValue, buffer, descriptor, null, false); + return serializeForNotNullForField(fieldValue, buffer, descriptor, null); } } } private Expression serializeForNotNullForField( - Expression inputObject, - Expression buffer, - Descriptor descriptor, - Expression serializer, - boolean generateNewMethod) { + Expression inputObject, Expression buffer, Descriptor descriptor, Expression serializer) { TypeRef typeRef = descriptor.getTypeRef(); Class clz = getRawType(typeRef); if (isPrimitive(clz) || isBoxed(clz)) { - return serializePrimitive(inputObject, buffer, clz); + return serializePrimitiveField(inputObject, buffer, descriptor); } else { if (clz == String.class) { return fory.getStringSerializer().writeStringExpr(stringSerializerRef, buffer, inputObject); } Expression action; if (useCollectionSerialization(typeRef)) { - action = - serializeForCollection(buffer, inputObject, typeRef, serializer, generateNewMethod); + action = serializeForCollection(buffer, inputObject, typeRef, serializer, false); } else if (useMapSerialization(typeRef)) { - action = serializeForMap(buffer, inputObject, typeRef, serializer, generateNewMethod); + action = serializeForMap(buffer, inputObject, typeRef, serializer, false); } else { action = serializeForNotNullObjectForField(inputObject, buffer, descriptor, serializer); } @@ -459,6 +469,65 @@ private Expression serializeForNotNullForField( } } + private Expression serializePrimitiveField( + Expression inputObject, Expression buffer, Descriptor descriptor) { + int dispatchId = getNumericDescriptorDispatchId(descriptor); + switch (dispatchId) { + case DispatchId.PRIMITIVE_BOOL: + case DispatchId.BOOL: + return new Invoke(buffer, "writeBoolean", inputObject); + case DispatchId.PRIMITIVE_INT8: + case DispatchId.PRIMITIVE_UINT8: + case DispatchId.INT8: + case DispatchId.UINT8: + return new Invoke(buffer, "writeByte", inputObject); + case DispatchId.PRIMITIVE_CHAR: + case DispatchId.CHAR: + return new Invoke(buffer, "writeChar", inputObject); + case DispatchId.PRIMITIVE_INT16: + case DispatchId.PRIMITIVE_UINT16: + case DispatchId.INT16: + case DispatchId.UINT16: + return new Invoke(buffer, "writeInt16", inputObject); + case DispatchId.PRIMITIVE_INT32: + case DispatchId.PRIMITIVE_UINT32: + case DispatchId.INT32: + case DispatchId.UINT32: + return new Invoke(buffer, "writeInt32", inputObject); + case DispatchId.PRIMITIVE_VARINT32: + case DispatchId.VARINT32: + return new Invoke(buffer, "writeVarInt32", inputObject); + case DispatchId.PRIMITIVE_VAR_UINT32: + case DispatchId.VAR_UINT32: + return new Invoke(buffer, "writeVarUint32", inputObject); + case DispatchId.PRIMITIVE_INT64: + case DispatchId.PRIMITIVE_UINT64: + case DispatchId.INT64: + case DispatchId.UINT64: + return new Invoke(buffer, "writeInt64", inputObject); + case DispatchId.PRIMITIVE_VARINT64: + case DispatchId.VARINT64: + return new Invoke(buffer, "writeVarInt64", inputObject); + case DispatchId.PRIMITIVE_TAGGED_INT64: + case DispatchId.TAGGED_INT64: + return new Invoke(buffer, "writeTaggedInt64", inputObject); + case DispatchId.PRIMITIVE_VAR_UINT64: + case DispatchId.VAR_UINT64: + return new Invoke(buffer, "writeVarUint64", inputObject); + case DispatchId.PRIMITIVE_TAGGED_UINT64: + case DispatchId.TAGGED_UINT64: + return new Invoke(buffer, "writeTaggedUint64", inputObject); + case DispatchId.PRIMITIVE_FLOAT32: + case DispatchId.FLOAT32: + return new Invoke(buffer, "writeFloat32", inputObject); + case DispatchId.PRIMITIVE_FLOAT64: + case DispatchId.FLOAT64: + return new Invoke(buffer, "writeFloat64", inputObject); + default: + throw new IllegalStateException("Unsupported dispatchId: " + dispatchId); + } + } + private Expression serializePrimitive(Expression inputObject, Expression buffer, Class clz) { // for primitive, inline call here to avoid java boxing, rather call corresponding serializer. if (clz == byte.class || clz == Byte.class) { @@ -610,6 +679,12 @@ protected boolean useMapSerialization(Class type) { return typeResolver(r -> r.isMap(type)); } + protected int getNumericDescriptorDispatchId(Descriptor descriptor) { + Class rawType = descriptor.getRawType(); + Preconditions.checkArgument(TypeUtils.unwrap(rawType).isPrimitive()); + return descriptorDispatchId.computeIfAbsent(descriptor, d -> DispatchId.getDispatchId(fory, d)); + } + /** * Whether the provided type should be taken as final. Although the clz can be final, * the method can still return false. For example, we return false in meta share mode to write @@ -1855,7 +1930,7 @@ private Expression deserializeForNotNullForField( TypeRef typeRef = descriptor.getTypeRef(); Class cls = getRawType(typeRef); if (isPrimitive(cls) || isBoxed(cls)) { - return deserializePrimitive(buffer, cls); + return deserializePrimitiveField(buffer, descriptor); } else { if (cls == String.class) { return fory.getStringSerializer().readStringExpr(stringSerializerRef, buffer); @@ -1883,6 +1958,78 @@ private Expression deserializeForNotNullForField( } } + private Expression deserializePrimitiveField(Expression buffer, Descriptor descriptor) { + int dispatchId = getNumericDescriptorDispatchId(descriptor); + switch (dispatchId) { + case DispatchId.PRIMITIVE_BOOL: + return new Invoke(buffer, "readBoolean", PRIMITIVE_BOOLEAN_TYPE); + case DispatchId.BOOL: + return new Invoke(buffer, "readBoolean", BOOLEAN_TYPE); + case DispatchId.PRIMITIVE_INT8: + case DispatchId.PRIMITIVE_UINT8: + return new Invoke(buffer, "readByte", PRIMITIVE_BYTE_TYPE); + case DispatchId.INT8: + case DispatchId.UINT8: + return new Invoke(buffer, "readByte", BYTE_TYPE); + case DispatchId.PRIMITIVE_CHAR: + return readChar(buffer); + case DispatchId.CHAR: + return new Invoke(buffer, "readChar", CHAR_TYPE); + case DispatchId.PRIMITIVE_INT16: + case DispatchId.PRIMITIVE_UINT16: + return readInt16(buffer); + case DispatchId.INT16: + case DispatchId.UINT16: + return new Invoke(buffer, readInt16Func(), SHORT_TYPE); + case DispatchId.PRIMITIVE_INT32: + case DispatchId.PRIMITIVE_UINT32: + return readInt32(buffer); + case DispatchId.INT32: + case DispatchId.UINT32: + return new Invoke(buffer, readIntFunc(), INT_TYPE); + case DispatchId.PRIMITIVE_VARINT32: + return readVarInt32(buffer); + case DispatchId.VARINT32: + return new Invoke(buffer, readVarInt32Func(), INT_TYPE); + case DispatchId.PRIMITIVE_VAR_UINT32: + return new Invoke(buffer, "readVarUint32", PRIMITIVE_INT_TYPE); + case DispatchId.VAR_UINT32: + return new Invoke(buffer, "readVarUint32", INT_TYPE); + case DispatchId.PRIMITIVE_INT64: + case DispatchId.PRIMITIVE_UINT64: + return readInt64(buffer); + case DispatchId.INT64: + case DispatchId.UINT64: + return new Invoke(buffer, readLongFunc(), LONG_TYPE); + case DispatchId.PRIMITIVE_VARINT64: + return new Invoke(buffer, "readVarInt64", PRIMITIVE_LONG_TYPE); + case DispatchId.VARINT64: + return new Invoke(buffer, "readVarInt64", LONG_TYPE); + case DispatchId.PRIMITIVE_TAGGED_INT64: + return new Invoke(buffer, "readTaggedInt64", PRIMITIVE_LONG_TYPE); + case DispatchId.TAGGED_INT64: + return new Invoke(buffer, "readTaggedInt64", LONG_TYPE); + case DispatchId.PRIMITIVE_VAR_UINT64: + return new Invoke(buffer, "readVarUint64", PRIMITIVE_LONG_TYPE); + case DispatchId.VAR_UINT64: + return new Invoke(buffer, "readVarUint64", LONG_TYPE); + case DispatchId.PRIMITIVE_TAGGED_UINT64: + return new Invoke(buffer, "readTaggedUint64", PRIMITIVE_LONG_TYPE); + case DispatchId.TAGGED_UINT64: + return new Invoke(buffer, "readTaggedUint64", LONG_TYPE); + case DispatchId.PRIMITIVE_FLOAT32: + return readFloat32(buffer); + case DispatchId.FLOAT32: + return new Invoke(buffer, readFloat32Func(), FLOAT_TYPE); + case DispatchId.PRIMITIVE_FLOAT64: + return readFloat64(buffer); + case DispatchId.FLOAT64: + return new Invoke(buffer, readFloat64Func(), DOUBLE_TYPE); + default: + throw new IllegalStateException("Unsupported dispatchId: " + dispatchId); + } + } + private Expression deserializePrimitive(Expression buffer, Class cls) { // for primitive, inline call here to avoid java boxing if (cls == byte.class || cls == Byte.class) { diff --git a/java/fory-core/src/main/java/org/apache/fory/builder/CodecBuilder.java b/java/fory-core/src/main/java/org/apache/fory/builder/CodecBuilder.java index 51c8b93703..1faedf678d 100644 --- a/java/fory-core/src/main/java/org/apache/fory/builder/CodecBuilder.java +++ b/java/fory-core/src/main/java/org/apache/fory/builder/CodecBuilder.java @@ -63,7 +63,6 @@ import org.apache.fory.resolver.ClassInfo; import org.apache.fory.resolver.ClassInfoHolder; import org.apache.fory.type.Descriptor; -import org.apache.fory.type.FinalObjectTypeStub; import org.apache.fory.util.GraalvmSupport; import org.apache.fory.util.Preconditions; import org.apache.fory.util.StringUtils; @@ -154,10 +153,6 @@ protected Expression tryCastIfPublic(Expression expression, TypeRef targetTyp protected Expression tryCastIfPublic( Expression expression, TypeRef targetType, boolean inline) { Class rawType = getRawType(targetType); - if (rawType == FinalObjectTypeStub.class) { - // final field doesn't exist in this class, skip cast. - return expression; - } if (inline) { if (sourcePublicAccessible(rawType)) { return new Cast(expression, targetType); @@ -309,7 +304,7 @@ private Expression reflectAccessField( return new Cast(getObj, descriptor.getTypeRef(), descriptor.getName()); } - /** Returns an expression that get field value> from bean using {@link Unsafe}. */ + /** Returns an expression that get field value> from bean using `Unsafe`. */ private Expression unsafeAccessField( Expression inputObject, Class cls, Descriptor descriptor) { String fieldName = descriptor.getName(); @@ -418,8 +413,7 @@ private Expression reflectSetField(Expression bean, Field field, Expression valu } /** - * Returns an expression that set field value to bean using {@link - * Unsafe}. + * Returns an expression that set field value to bean using `Unsafe`. */ private Expression unsafeSetField(Expression bean, Descriptor descriptor, Expression value) { TypeRef fieldType = descriptor.getTypeRef(); @@ -709,13 +703,27 @@ public static String readLongFunc() { return Platform.IS_LITTLE_ENDIAN ? "_readInt64OnLE" : "_readInt64OnBE"; } + public static String readInt16Func() { + return Platform.IS_LITTLE_ENDIAN ? "_readInt16OnLE" : "_readInt16OnBE"; + } + + public static String readVarInt32Func() { + return Platform.IS_LITTLE_ENDIAN ? "_readVarInt32OnLE" : "_readVarInt32OnBE"; + } + + public static String readFloat32Func() { + return Platform.IS_LITTLE_ENDIAN ? "_readFloat32OnLE" : "_readFloat32OnBE"; + } + + public static String readFloat64Func() { + return Platform.IS_LITTLE_ENDIAN ? "_readFloat64OnLE" : "_readFloat64OnBE"; + } + protected Expression readFloat32(Expression buffer) { - String func = Platform.IS_LITTLE_ENDIAN ? "_readFloat32OnLE" : "_readFloat32OnBE"; - return new Invoke(buffer, func, PRIMITIVE_FLOAT_TYPE); + return new Invoke(buffer, readFloat32Func(), PRIMITIVE_FLOAT_TYPE); } protected Expression readFloat64(Expression buffer) { - String func = Platform.IS_LITTLE_ENDIAN ? "_readFloat64OnLE" : "_readFloat64OnBE"; - return new Invoke(buffer, func, PRIMITIVE_DOUBLE_TYPE); + return new Invoke(buffer, readFloat64Func(), PRIMITIVE_DOUBLE_TYPE); } } diff --git a/java/fory-core/src/main/java/org/apache/fory/builder/ObjectCodecBuilder.java b/java/fory-core/src/main/java/org/apache/fory/builder/ObjectCodecBuilder.java index 1e528094b4..b1f2b3ebe9 100644 --- a/java/fory-core/src/main/java/org/apache/fory/builder/ObjectCodecBuilder.java +++ b/java/fory-core/src/main/java/org/apache/fory/builder/ObjectCodecBuilder.java @@ -31,7 +31,6 @@ import static org.apache.fory.type.TypeUtils.PRIMITIVE_VOID_TYPE; import static org.apache.fory.type.TypeUtils.getRawType; import static org.apache.fory.type.TypeUtils.getSizeOfPrimitiveType; -import static org.apache.fory.type.TypeUtils.isPrimitive; import java.util.ArrayList; import java.util.Collection; @@ -60,12 +59,11 @@ import org.apache.fory.reflect.ObjectCreators; import org.apache.fory.reflect.TypeRef; import org.apache.fory.serializer.ObjectSerializer; -import org.apache.fory.serializer.PrimitiveSerializers.LongSerializer; import org.apache.fory.serializer.SerializationUtils; import org.apache.fory.type.Descriptor; import org.apache.fory.type.DescriptorGrouper; +import org.apache.fory.type.DispatchId; import org.apache.fory.type.TypeUtils; -import org.apache.fory.util.Preconditions; import org.apache.fory.util.function.SerializableSupplier; import org.apache.fory.util.record.RecordUtils; @@ -269,39 +267,42 @@ private List serializePrimitivesUnCompressed( ListExpression groupExpressions = new ListExpression(); // use Reference to cut-off expr dependency. for (Descriptor descriptor : group) { - Class clz = descriptor.getRawType(); - Preconditions.checkArgument(isPrimitive(clz)); + int dispatchId = getNumericDescriptorDispatchId(descriptor); // `bean` will be replaced by `Reference` to cut-off expr dependency. Expression fieldValue = getFieldValue(bean, descriptor); if (fieldValue instanceof Inlineable) { ((Inlineable) fieldValue).inline(); } - if (clz == byte.class) { - groupExpressions.add(unsafePut(base, getWriterPos(writerAddr, acc), fieldValue)); - acc += 1; - } else if (clz == boolean.class) { + if (dispatchId == DispatchId.PRIMITIVE_BOOL) { groupExpressions.add(unsafePutBoolean(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 1; - } else if (clz == char.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_INT8 + || dispatchId == DispatchId.PRIMITIVE_UINT8) { + groupExpressions.add(unsafePut(base, getWriterPos(writerAddr, acc), fieldValue)); + acc += 1; + } else if (dispatchId == DispatchId.PRIMITIVE_CHAR) { groupExpressions.add(unsafePutChar(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 2; - } else if (clz == short.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_INT16 + || dispatchId == DispatchId.PRIMITIVE_UINT16) { groupExpressions.add(unsafePutShort(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 2; - } else if (clz == int.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_INT32 + || dispatchId == DispatchId.PRIMITIVE_UINT32) { groupExpressions.add(unsafePutInt(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 4; - } else if (clz == long.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_INT64 + || dispatchId == DispatchId.PRIMITIVE_UINT64) { groupExpressions.add(unsafePutLong(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 8; - } else if (clz == float.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_FLOAT32) { groupExpressions.add(unsafePutFloat(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 4; - } else if (clz == double.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_FLOAT64) { groupExpressions.add(unsafePutDouble(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 8; } else { - throw new IllegalStateException("impossible"); + throw new IllegalStateException("Unsupported dispatchId: " + dispatchId); } } if (numPrimitiveFields < 4) { @@ -328,10 +329,17 @@ private List serializePrimitivesCompressed( int extraSize = 0; for (List group : primitiveGroups) { for (Descriptor d : group) { - if (d.getRawType() == int.class) { + int id = getNumericDescriptorDispatchId(d); + if (id == DispatchId.PRIMITIVE_INT32 + || id == DispatchId.PRIMITIVE_VARINT32 + || id == DispatchId.PRIMITIVE_VAR_UINT32) { // varint may be written as 5bytes, use 8bytes for written as long to reduce cost. extraSize += 4; - } else if (d.getRawType() == long.class) { + } else if (id == DispatchId.PRIMITIVE_INT64 + || id == DispatchId.PRIMITIVE_VARINT64 + || id == DispatchId.PRIMITIVE_TAGGED_INT64 + || id == DispatchId.PRIMITIVE_VAR_UINT64 + || id == DispatchId.PRIMITIVE_TAGGED_UINT64) { extraSize += 1; // long use 1~9 bytes. } } @@ -351,59 +359,78 @@ private List serializePrimitivesCompressed( int acc = 0; boolean compressStarted = false; for (Descriptor descriptor : group) { - Class clz = TypeUtils.unwrap(descriptor.getRawType()); - Preconditions.checkArgument(isPrimitive(clz)); + int dispatchId = getNumericDescriptorDispatchId(descriptor); // `bean` will be replaced by `Reference` to cut-off expr dependency. Expression fieldValue = getFieldValue(bean, descriptor); if (fieldValue instanceof Inlineable) { ((Inlineable) fieldValue).inline(); } - if (clz == byte.class) { - groupExpressions.add(unsafePut(base, getWriterPos(writerAddr, acc), fieldValue)); - acc += 1; - } else if (clz == boolean.class) { + if (dispatchId == DispatchId.PRIMITIVE_BOOL) { groupExpressions.add(unsafePutBoolean(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 1; - } else if (clz == char.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_INT8 + || dispatchId == DispatchId.PRIMITIVE_UINT8) { + groupExpressions.add(unsafePut(base, getWriterPos(writerAddr, acc), fieldValue)); + acc += 1; + } else if (dispatchId == DispatchId.PRIMITIVE_CHAR) { groupExpressions.add(unsafePutChar(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 2; - } else if (clz == short.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_INT16 + || dispatchId == DispatchId.PRIMITIVE_UINT16) { groupExpressions.add(unsafePutShort(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 2; - } else if (clz == float.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_FLOAT32) { groupExpressions.add(unsafePutFloat(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 4; - } else if (clz == double.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_FLOAT64) { groupExpressions.add(unsafePutDouble(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 8; - } else if (clz == int.class) { - if (!fory.compressInt()) { - groupExpressions.add(unsafePutInt(base, getWriterPos(writerAddr, acc), fieldValue)); - acc += 4; - } else { - if (!compressStarted) { - // int/long are sorted in the last. - addIncWriterIndexExpr(groupExpressions, buffer, acc); - compressStarted = true; - } - groupExpressions.add(new Invoke(buffer, "_unsafeWriteVarInt32", fieldValue)); - acc += 0; + } else if (dispatchId == DispatchId.PRIMITIVE_INT32 + || dispatchId == DispatchId.PRIMITIVE_UINT32) { + groupExpressions.add(unsafePutInt(base, getWriterPos(writerAddr, acc), fieldValue)); + acc += 4; + } else if (dispatchId == DispatchId.PRIMITIVE_INT64 + || dispatchId == DispatchId.PRIMITIVE_UINT64) { + groupExpressions.add(unsafePutLong(base, getWriterPos(writerAddr, acc), fieldValue)); + acc += 8; + } else if (dispatchId == DispatchId.PRIMITIVE_VARINT32) { + if (!compressStarted) { + addIncWriterIndexExpr(groupExpressions, buffer, acc); + compressStarted = true; + } + groupExpressions.add(new Invoke(buffer, "_unsafeWriteVarInt32", fieldValue)); + } else if (dispatchId == DispatchId.PRIMITIVE_VAR_UINT32) { + if (!compressStarted) { + addIncWriterIndexExpr(groupExpressions, buffer, acc); + compressStarted = true; + } + groupExpressions.add(new Invoke(buffer, "_unsafeWriteVarUint32", fieldValue)); + } else if (dispatchId == DispatchId.PRIMITIVE_VARINT64) { + if (!compressStarted) { + addIncWriterIndexExpr(groupExpressions, buffer, acc); + compressStarted = true; + } + groupExpressions.add(new Invoke(buffer, "writeVarInt64", fieldValue)); + } else if (dispatchId == DispatchId.PRIMITIVE_TAGGED_INT64) { + if (!compressStarted) { + addIncWriterIndexExpr(groupExpressions, buffer, acc); + compressStarted = true; + } + groupExpressions.add(new Invoke(buffer, "writeTaggedInt64", fieldValue)); + } else if (dispatchId == DispatchId.PRIMITIVE_VAR_UINT64) { + if (!compressStarted) { + addIncWriterIndexExpr(groupExpressions, buffer, acc); + compressStarted = true; } - } else if (clz == long.class) { - if (!fory.compressLong()) { - groupExpressions.add(unsafePutLong(base, getWriterPos(writerAddr, acc), fieldValue)); - acc += 8; - } else { - if (!compressStarted) { - // int/long are sorted in the last. - addIncWriterIndexExpr(groupExpressions, buffer, acc); - compressStarted = true; - } - groupExpressions.add( - LongSerializer.writeInt64(buffer, fieldValue, fory.longEncoding(), false)); + groupExpressions.add(new Invoke(buffer, "writeVarUint64", fieldValue)); + } else if (dispatchId == DispatchId.PRIMITIVE_TAGGED_UINT64) { + if (!compressStarted) { + addIncWriterIndexExpr(groupExpressions, buffer, acc); + compressStarted = true; } + groupExpressions.add(new Invoke(buffer, "writeTaggedUint64", fieldValue)); } else { - throw new IllegalStateException("impossible"); + throw new IllegalStateException("Unsupported dispatchId: " + dispatchId); } } if (!compressStarted) { @@ -643,36 +670,46 @@ private List deserializeUnCompressedPrimitives( for (List group : primitiveGroups) { ListExpression groupExpressions = new ListExpression(); for (Descriptor descriptor : group) { - TypeRef type = descriptor.getTypeRef(); - Class clz = getRawType(type); - Preconditions.checkArgument(isPrimitive(clz)); + int dispatchId = getNumericDescriptorDispatchId(descriptor); Expression fieldValue; - if (clz == byte.class) { - fieldValue = unsafeGet(heapBuffer, getReaderAddress(readerAddr, acc)); - acc += 1; - } else if (clz == boolean.class) { + if (dispatchId == DispatchId.PRIMITIVE_BOOL || dispatchId == DispatchId.BOOL) { fieldValue = unsafeGetBoolean(heapBuffer, getReaderAddress(readerAddr, acc)); acc += 1; - } else if (clz == char.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_INT8 + || dispatchId == DispatchId.PRIMITIVE_UINT8 + || dispatchId == DispatchId.INT8 + || dispatchId == DispatchId.UINT8) { + fieldValue = unsafeGet(heapBuffer, getReaderAddress(readerAddr, acc)); + acc += 1; + } else if (dispatchId == DispatchId.PRIMITIVE_CHAR || dispatchId == DispatchId.CHAR) { fieldValue = unsafeGetChar(heapBuffer, getReaderAddress(readerAddr, acc)); acc += 2; - } else if (clz == short.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_INT16 + || dispatchId == DispatchId.PRIMITIVE_UINT16 + || dispatchId == DispatchId.INT16 + || dispatchId == DispatchId.UINT16) { fieldValue = unsafeGetShort(heapBuffer, getReaderAddress(readerAddr, acc)); acc += 2; - } else if (clz == int.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_INT32 + || dispatchId == DispatchId.PRIMITIVE_UINT32 + || dispatchId == DispatchId.INT32 + || dispatchId == DispatchId.UINT32) { fieldValue = unsafeGetInt(heapBuffer, getReaderAddress(readerAddr, acc)); acc += 4; - } else if (clz == long.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_INT64 + || dispatchId == DispatchId.PRIMITIVE_UINT64 + || dispatchId == DispatchId.INT64 + || dispatchId == DispatchId.UINT64) { fieldValue = unsafeGetLong(heapBuffer, getReaderAddress(readerAddr, acc)); acc += 8; - } else if (clz == float.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_FLOAT32 || dispatchId == DispatchId.FLOAT32) { fieldValue = unsafeGetFloat(heapBuffer, getReaderAddress(readerAddr, acc)); acc += 4; - } else if (clz == double.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_FLOAT64 || dispatchId == DispatchId.FLOAT64) { fieldValue = unsafeGetDouble(heapBuffer, getReaderAddress(readerAddr, acc)); acc += 8; } else { - throw new IllegalStateException("impossible"); + throw new IllegalStateException("Unsupported dispatchId: " + dispatchId); } // `bean` will be replaced by `Reference` to cut-off expr dependency. groupExpressions.add(setFieldValue(bean, descriptor, fieldValue)); @@ -710,52 +747,88 @@ private List deserializeCompressedPrimitives( int acc = 0; boolean compressStarted = false; for (Descriptor descriptor : group) { - TypeRef type = descriptor.getTypeRef(); - Class clz = TypeUtils.unwrap(getRawType(type)); - Preconditions.checkArgument(isPrimitive(clz)); + int dispatchId = getNumericDescriptorDispatchId(descriptor); Expression fieldValue; - if (clz == byte.class) { - fieldValue = unsafeGet(heapBuffer, getReaderAddress(readerAddr, acc)); - acc += 1; - } else if (clz == boolean.class) { + if (dispatchId == DispatchId.PRIMITIVE_BOOL || dispatchId == DispatchId.BOOL) { fieldValue = unsafeGetBoolean(heapBuffer, getReaderAddress(readerAddr, acc)); acc += 1; - } else if (clz == char.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_INT8 + || dispatchId == DispatchId.PRIMITIVE_UINT8 + || dispatchId == DispatchId.INT8 + || dispatchId == DispatchId.UINT8) { + fieldValue = unsafeGet(heapBuffer, getReaderAddress(readerAddr, acc)); + acc += 1; + } else if (dispatchId == DispatchId.PRIMITIVE_CHAR || dispatchId == DispatchId.CHAR) { fieldValue = unsafeGetChar(heapBuffer, getReaderAddress(readerAddr, acc)); acc += 2; - } else if (clz == short.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_INT16 + || dispatchId == DispatchId.PRIMITIVE_UINT16 + || dispatchId == DispatchId.INT16 + || dispatchId == DispatchId.UINT16) { fieldValue = unsafeGetShort(heapBuffer, getReaderAddress(readerAddr, acc)); acc += 2; - } else if (clz == float.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_FLOAT32 || dispatchId == DispatchId.FLOAT32) { fieldValue = unsafeGetFloat(heapBuffer, getReaderAddress(readerAddr, acc)); acc += 4; - } else if (clz == double.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_FLOAT64 || dispatchId == DispatchId.FLOAT64) { fieldValue = unsafeGetDouble(heapBuffer, getReaderAddress(readerAddr, acc)); acc += 8; - } else if (clz == int.class) { - if (!fory.compressInt()) { - fieldValue = unsafeGetInt(heapBuffer, getReaderAddress(readerAddr, acc)); - acc += 4; - } else { - if (!compressStarted) { - compressStarted = true; - addIncReaderIndexExpr(groupExpressions, buffer, acc); - } - fieldValue = readVarInt32(buffer); + } else if (dispatchId == DispatchId.PRIMITIVE_INT32 + || dispatchId == DispatchId.PRIMITIVE_UINT32 + || dispatchId == DispatchId.INT32 + || dispatchId == DispatchId.UINT32) { + fieldValue = unsafeGetInt(heapBuffer, getReaderAddress(readerAddr, acc)); + acc += 4; + } else if (dispatchId == DispatchId.PRIMITIVE_INT64 + || dispatchId == DispatchId.PRIMITIVE_UINT64 + || dispatchId == DispatchId.INT64 + || dispatchId == DispatchId.UINT64) { + fieldValue = unsafeGetLong(heapBuffer, getReaderAddress(readerAddr, acc)); + acc += 8; + } else if (dispatchId == DispatchId.PRIMITIVE_VARINT32 + || dispatchId == DispatchId.VARINT32) { + if (!compressStarted) { + compressStarted = true; + addIncReaderIndexExpr(groupExpressions, buffer, acc); + } + fieldValue = readVarInt32(buffer); + } else if (dispatchId == DispatchId.PRIMITIVE_VAR_UINT32 + || dispatchId == DispatchId.VAR_UINT32) { + if (!compressStarted) { + compressStarted = true; + addIncReaderIndexExpr(groupExpressions, buffer, acc); + } + fieldValue = new Invoke(buffer, "readVarUint32", PRIMITIVE_INT_TYPE); + } else if (dispatchId == DispatchId.PRIMITIVE_VARINT64 + || dispatchId == DispatchId.VARINT64) { + if (!compressStarted) { + compressStarted = true; + addIncReaderIndexExpr(groupExpressions, buffer, acc); + } + fieldValue = new Invoke(buffer, "readVarInt64", PRIMITIVE_LONG_TYPE); + } else if (dispatchId == DispatchId.PRIMITIVE_TAGGED_INT64 + || dispatchId == DispatchId.TAGGED_INT64) { + if (!compressStarted) { + compressStarted = true; + addIncReaderIndexExpr(groupExpressions, buffer, acc); + } + fieldValue = new Invoke(buffer, "readTaggedInt64", PRIMITIVE_LONG_TYPE); + } else if (dispatchId == DispatchId.PRIMITIVE_VAR_UINT64 + || dispatchId == DispatchId.VAR_UINT64) { + if (!compressStarted) { + compressStarted = true; + addIncReaderIndexExpr(groupExpressions, buffer, acc); } - } else if (clz == long.class) { - if (!fory.compressLong()) { - fieldValue = unsafeGetLong(heapBuffer, getReaderAddress(readerAddr, acc)); - acc += 8; - } else { - if (!compressStarted) { - compressStarted = true; - addIncReaderIndexExpr(groupExpressions, buffer, acc); - } - fieldValue = LongSerializer.readInt64(buffer, fory.longEncoding()); + fieldValue = new Invoke(buffer, "readVarUint64", PRIMITIVE_LONG_TYPE); + } else if (dispatchId == DispatchId.PRIMITIVE_TAGGED_UINT64 + || dispatchId == DispatchId.TAGGED_UINT64) { + if (!compressStarted) { + compressStarted = true; + addIncReaderIndexExpr(groupExpressions, buffer, acc); } + fieldValue = new Invoke(buffer, "readTaggedUint64", PRIMITIVE_LONG_TYPE); } else { - throw new IllegalStateException("impossible"); + throw new IllegalStateException("Unsupported dispatchId: " + dispatchId); } // `bean` will be replaced by `Reference` to cut-off expr dependency. groupExpressions.add(setFieldValue(bean, descriptor, fieldValue)); diff --git a/java/fory-core/src/main/java/org/apache/fory/config/Config.java b/java/fory-core/src/main/java/org/apache/fory/config/Config.java index 0488cbfb59..b49f4ad1dc 100644 --- a/java/fory-core/src/main/java/org/apache/fory/config/Config.java +++ b/java/fory-core/src/main/java/org/apache/fory/config/Config.java @@ -83,7 +83,7 @@ public Config(ForyBuilder builder) { writeNumUtf16BytesForUtf8Encoding = builder.writeNumUtf16BytesForUtf8Encoding; compressInt = builder.compressInt; longEncoding = builder.longEncoding; - compressLong = longEncoding != LongEncoding.LE_RAW_BYTES; + compressLong = longEncoding != LongEncoding.FIXED_INT64; compressIntArray = builder.compressIntArray; compressLongArray = builder.compressLongArray; requireClassRegistration = builder.requireClassRegistration; diff --git a/java/fory-core/src/main/java/org/apache/fory/config/ForyBuilder.java b/java/fory-core/src/main/java/org/apache/fory/config/ForyBuilder.java index 4f5687cdd9..55c98ab114 100644 --- a/java/fory-core/src/main/java/org/apache/fory/config/ForyBuilder.java +++ b/java/fory-core/src/main/java/org/apache/fory/config/ForyBuilder.java @@ -67,7 +67,7 @@ public final class ForyBuilder { boolean timeRefIgnored = true; ClassLoader classLoader; boolean compressInt = true; - public LongEncoding longEncoding = LongEncoding.HYBRID; + public LongEncoding longEncoding = LongEncoding.TAGGED_INT64; boolean compressIntArray = false; boolean compressLongArray = false; boolean compressString = false; @@ -183,11 +183,12 @@ public ForyBuilder withIntCompressed(boolean intCompressed) { } /** - * Use variable length encoding for long. Enabled by default, use {@link LongEncoding#HYBRID} (Small - * long as int) for long encoding. + * Use variable length encoding for long. Enabled by default, use {@link + * LongEncoding#TAGGED_INT64} (Small long as int) for long encoding. */ public ForyBuilder withLongCompressed(boolean longCompressed) { - return withLongCompressed(longCompressed ? LongEncoding.HYBRID : LongEncoding.LE_RAW_BYTES); + return withLongCompressed( + longCompressed ? LongEncoding.TAGGED_INT64 : LongEncoding.FIXED_INT64); } /** Use variable length encoding for long. */ diff --git a/java/fory-core/src/main/java/org/apache/fory/config/LongEncoding.java b/java/fory-core/src/main/java/org/apache/fory/config/LongEncoding.java index 5474a15006..d937e5f7fa 100644 --- a/java/fory-core/src/main/java/org/apache/fory/config/LongEncoding.java +++ b/java/fory-core/src/main/java/org/apache/fory/config/LongEncoding.java @@ -20,19 +20,20 @@ package org.apache.fory.config; /** - * Encoding option for long. Default encoding is fory SLI(Small long as int) encoding: {@link #HYBRID}. + * Encoding option for long. Default encoding is fory SLI(Small long as int) encoding: {@link + * #TAGGED_INT64}. */ public enum LongEncoding { /** - * Fory HYBRID(Small long as int) Encoding: + * Fory Tagged int64 Encoding: *
  • If long is in [0xc0000000, 0x3fffffff], encode as 4 bytes int: `| little-endian: ((int) * value) << 1 |` *
  • Otherwise write as 9 bytes: `| 0b1 | little-endian 8bytes long |`. * - *

    Faster than {@link #VARINT64}, but compression is not good as {@link #VARINT64} such as for ints - * in short range. + *

    Faster than {@link #VARINT64}, but compression is not good as {@link #VARINT64} such as + * for ints in short range. */ - HYBRID, + TAGGED_INT64, /** * Fory Progressive Variable-length Long Encoding: *

  • positive long format: first bit in every byte indicate whether has next byte, then next @@ -42,5 +43,5 @@ public enum LongEncoding { */ VARINT64, /** Write long as little endian 8bytes, no compression. */ - LE_RAW_BYTES, + FIXED_INT64, } diff --git a/java/fory-core/src/main/java/org/apache/fory/memory/MemoryBuffer.java b/java/fory-core/src/main/java/org/apache/fory/memory/MemoryBuffer.java index c28a3e9525..3292eebf0a 100644 --- a/java/fory-core/src/main/java/org/apache/fory/memory/MemoryBuffer.java +++ b/java/fory-core/src/main/java/org/apache/fory/memory/MemoryBuffer.java @@ -1137,28 +1137,28 @@ public int _unsafeWriteVarUint64(long value) { } /** - * Write signed long using fory Hybrid(Small long as int) encoding. If long is in [0xc0000000, 0x3fffffff], - * encode as 4 bytes int: | little-endian: ((int) value) << 1 |; Otherwise write as 9 bytes: | 0b1 - * | little-endian 8bytes long | + * Write signed long using fory Tagged(Small long as int) encoding. If long is in [0xc0000000, + * 0x3fffffff], encode as 4 bytes int: | little-endian: ((int) value) << 1 |; Otherwise write as 9 + * bytes: | 0b1 | little-endian 8bytes long | */ - public int writeHybridInt64(long value) { + public int writeTaggedInt64(long value) { ensure(writerIndex + 9); - return _unsafeWriteHybridInt64(value); + return _unsafeWriteTaggedInt64(value); } /** - * Write unsigned long using fory Hybrid(Small long as int) encoding. If long is in [0, + * Write unsigned long using fory Tagged(Small long as int) encoding. If long is in [0, * 0x7fffffff], encode as 4 bytes int: | little-endian: ((int) value) << 1 |; Otherwise write as 9 * bytes: | 0b1 | little-endian 8bytes long | */ - public int writeHybridUint64(long value) { + public int writeTaggedUint64(long value) { ensure(writerIndex + 9); - return _unsafeWriteHybridUint64(value); + return _unsafeWriteTaggedUint64(value); } - /** Write unsigned long using fory Hybrid(Small Long as Int) encoding. */ + /** Write unsigned long using fory Tagged(Small Long as Int) encoding. */ // CHECKSTYLE.OFF:MethodName - public int _unsafeWriteHybridUint64(long value) { + public int _unsafeWriteTaggedUint64(long value) { // CHECKSTYLE.ON:MethodName final int writerIndex = this.writerIndex; final long pos = address + writerIndex; @@ -1186,9 +1186,9 @@ public int _unsafeWriteHybridUint64(long value) { private static final long HALF_MIN_INT_VALUE = Integer.MIN_VALUE / 2; private static final byte BIG_LONG_FLAG = 0b1; // bit 0 set, means big long. - /** Write long using fory Hybrid(Small Long as Int) encoding. */ + /** Write long using fory Tagged(Small Long as Int) encoding. */ // CHECKSTYLE.OFF:MethodName - public int _unsafeWriteHybridInt64(long value) { + public int _unsafeWriteTaggedInt64(long value) { // CHECKSTYLE.ON:MethodName final int writerIndex = this.writerIndex; final long pos = address + writerIndex; @@ -1523,27 +1523,27 @@ public long _readInt64OnBE() { return Long.reverseBytes(UNSAFE.getLong(heapMemory, address + readerIdx)); } - /** Read signed fory Hybrid(Small Long as Int) encoded long. */ - public long readHybridInt64() { + /** Read signed fory Tagged(Small Long as Int) encoded long. */ + public long readTaggedInt64() { if (LITTLE_ENDIAN) { - return _readHybridInt64OnLE(); + return _readTaggedInt64OnLE(); } else { - return _readHybridInt64OnBE(); + return _readTaggedInt64OnBE(); } } - /** Read unsigned fory Hybrid(Small Long as Int) encoded long. */ - public long readHybridUint64() { + /** Read unsigned fory Tagged(Small Long as Int) encoded long. */ + public long readTaggedUint64() { if (LITTLE_ENDIAN) { - return _readHybridUint64OnLE(); + return _readTaggedUint64OnLE(); } else { - return _readHybridUint64OnBE(); + return _readTaggedUint64OnBE(); } } @CodegenInvoke // CHECKSTYLE.OFF:MethodName - public long _readHybridUint64OnLE() { + public long _readTaggedUint64OnLE() { // CHECKSTYLE.ON:MethodName final int readIdx = readerIndex; int diff = size - readIdx; @@ -1565,7 +1565,7 @@ public long _readHybridUint64OnLE() { @CodegenInvoke // CHECKSTYLE.OFF:MethodName - public long _readHybridUint64OnBE() { + public long _readTaggedUint64OnBE() { // CHECKSTYLE.ON:MethodName final int readIdx = readerIndex; int diff = size - readIdx; @@ -1587,7 +1587,7 @@ public long _readHybridUint64OnBE() { @CodegenInvoke // CHECKSTYLE.OFF:MethodName - public long _readHybridInt64OnLE() { + public long _readTaggedInt64OnLE() { // CHECKSTYLE.ON:MethodName // Duplicate and manual inline for performance. // noinspection Duplicates @@ -1611,7 +1611,7 @@ public long _readHybridInt64OnLE() { @CodegenInvoke // CHECKSTYLE.OFF:MethodName - public long _readHybridInt64OnBE() { + public long _readTaggedInt64OnBE() { // CHECKSTYLE.ON:MethodName // noinspection Duplicates final int readIdx = readerIndex; diff --git a/java/fory-core/src/main/java/org/apache/fory/meta/ClassDef.java b/java/fory-core/src/main/java/org/apache/fory/meta/ClassDef.java index aaa2444c6f..ae8e6cb408 100644 --- a/java/fory-core/src/main/java/org/apache/fory/meta/ClassDef.java +++ b/java/fory-core/src/main/java/org/apache/fory/meta/ClassDef.java @@ -374,6 +374,9 @@ public List getDescriptors(TypeResolver resolver, Class cls) { } } Descriptor newDesc = fieldInfo.toDescriptor(resolver, descriptor); + if (newDesc.getField() == null) { + System.out.println(); + } descriptors.add(newDesc); } } diff --git a/java/fory-core/src/main/java/org/apache/fory/meta/ClassDefDecoder.java b/java/fory-core/src/main/java/org/apache/fory/meta/ClassDefDecoder.java index 46657a4fd6..f912017f1d 100644 --- a/java/fory-core/src/main/java/org/apache/fory/meta/ClassDefDecoder.java +++ b/java/fory-core/src/main/java/org/apache/fory/meta/ClassDefDecoder.java @@ -115,11 +115,10 @@ private static List readFieldsInfo( List fieldInfos = new ArrayList<>(numFields); for (int i = 0; i < numFields; i++) { int header = buffer.readByte() & 0xff; - // `3 bits size + 2 bits field name encoding + polymorphism flag + nullability flag + ref - // tracking flag` - int encodingFlags = (header >>> 3) & 0b11; + // `3 bits size + 2 bits field name encoding + nullability flag + ref tracking flag` + int encodingFlags = (header >>> 2) & 0b11; boolean useTagID = encodingFlags == 3; - int size = header >>> 5; + int size = header >>> 4; if (size == 7) { size += buffer.readVarUint32Small7(); } @@ -138,12 +137,11 @@ private static List readFieldsInfo( fieldName = Encoders.FIELD_NAME_DECODER.decode(buffer.readBytes(size), encoding); } - boolean isMonomorphic = (header & 0b100) != 0; boolean nullable = (header & 0b010) != 0; boolean trackingRef = (header & 0b001) != 0; int typeId = buffer.readVarUint32Small14(); FieldType fieldType = - FieldTypes.FieldType.read(buffer, resolver, isMonomorphic, nullable, trackingRef, typeId); + FieldTypes.FieldType.read(buffer, resolver, nullable, trackingRef, typeId); if (useTagID) { fieldInfos.add(new FieldInfo(className, fieldName, fieldType, tagId)); diff --git a/java/fory-core/src/main/java/org/apache/fory/meta/ClassDefEncoder.java b/java/fory-core/src/main/java/org/apache/fory/meta/ClassDefEncoder.java index 8b057c5e3f..62eb5b7a7e 100644 --- a/java/fory-core/src/main/java/org/apache/fory/meta/ClassDefEncoder.java +++ b/java/fory-core/src/main/java/org/apache/fory/meta/ClassDefEncoder.java @@ -280,10 +280,8 @@ static Map> groupClassFields(List fieldsInfo) static void writeFieldsInfo(MemoryBuffer buffer, List fields) { for (FieldInfo fieldInfo : fields) { FieldType fieldType = fieldInfo.getFieldType(); - // `3 bits size + 2 bits field name encoding + polymorphism flag + nullability flag + ref - // tracking flag` - int header = ((fieldType.isMonomorphic() ? 1 : 0) << 2); - header |= ((fieldType.nullable() ? 1 : 0) << 1); + // `3 bits size + 2 bits field name encoding + nullability flag + ref tracking flag` + int header = ((fieldType.nullable() ? 1 : 0) << 1); header |= ((fieldType.trackingRef() ? 1 : 0)); // Encoding `UTF8/ALL_TO_LOWER_SPECIAL/LOWER_UPPER_DIGIT_SPECIAL/TAG_ID` MetaString metaString = Encoders.encodeFieldName(fieldInfo.getFieldName()); @@ -294,14 +292,14 @@ static void writeFieldsInfo(MemoryBuffer buffer, List fields) { size = fieldInfo.getFieldId(); encodingFlags = 3; } - header |= (byte) (encodingFlags << 3); + header |= (byte) (encodingFlags << 2); boolean bigSize = size >= 7; if (bigSize) { - header |= 0b11100000; + header |= 0b01110000; buffer.writeByte(header); buffer.writeVarUint32Small7(size - 7); } else { - header |= (size << 5); + header |= (size << 4); buffer.writeByte(header); } if (!fieldInfo.hasFieldId()) { diff --git a/java/fory-core/src/main/java/org/apache/fory/meta/FieldTypes.java b/java/fory-core/src/main/java/org/apache/fory/meta/FieldTypes.java index 1923707831..86370244d8 100644 --- a/java/fory-core/src/main/java/org/apache/fory/meta/FieldTypes.java +++ b/java/fory-core/src/main/java/org/apache/fory/meta/FieldTypes.java @@ -47,7 +47,6 @@ import org.apache.fory.resolver.XtypeResolver; import org.apache.fory.serializer.NonexistentClass; import org.apache.fory.type.Descriptor; -import org.apache.fory.type.FinalObjectTypeStub; import org.apache.fory.type.GenericType; import org.apache.fory.type.TypeUtils; import org.apache.fory.type.Types; @@ -58,9 +57,7 @@ public class FieldTypes { /** Returns true if can use current field type. */ static boolean useFieldType(Class parsedType, Descriptor descriptor) { - if (parsedType.isEnum() - || parsedType.isAssignableFrom(descriptor.getRawType()) - || parsedType == FinalObjectTypeStub.class) { + if (parsedType.isEnum() || parsedType.isAssignableFrom(descriptor.getRawType())) { return true; } if (parsedType.isArray()) { @@ -69,7 +66,7 @@ static boolean useFieldType(Class parsedType, Descriptor descriptor) { if (!field.getType().isArray() || getArrayDimensions(field.getType()) != info.f1) { return false; } - return info.f0 == FinalObjectTypeStub.class || info.f0.isEnum(); + return info.f0.isEnum(); } return false; } @@ -87,8 +84,12 @@ private static FieldType buildFieldType( Preconditions.checkNotNull(genericType); Class rawType = genericType.getCls(); boolean isXlang = resolver.getFory().isCrossLanguage(); - int xtypeId = -1; - if (isXlang) { + // Get type ID for both xlang and native mode + // This supports unsigned types and field-configurable compression in both modes + int xtypeId; + if (field != null && TypeUtils.unwrap(rawType).isPrimitive()) { + xtypeId = Types.getDescriptorTypeId(resolver.getFory(), field); + } else { ClassInfo info = resolver.getClassInfo(genericType.getCls(), false); if (info != null) { xtypeId = info.getXtypeId(); @@ -96,10 +97,9 @@ private static FieldType buildFieldType( xtypeId = Types.UNKNOWN; } } - boolean isMonomorphic = genericType.isMonomorphic(); // For xlang: ref tracking is false by default (no shared ownership like Rust's Rc/Arc) // For native: use the type's default tracking behavior - boolean trackingRef = isXlang ? false : genericType.trackingRef(resolver); + boolean trackingRef = !isXlang && genericType.trackingRef(resolver); // For xlang: nullable is false by default (aligned with all languages) // Exception: Optional types are nullable (like Rust's Option) // For native: non-primitive types are nullable by default @@ -125,7 +125,6 @@ private static FieldType buildFieldType( if (COLLECTION_TYPE.isSupertypeOf(genericType.getTypeRef())) { return new CollectionFieldType( xtypeId, - isMonomorphic, nullable, trackingRef, buildFieldType( @@ -137,7 +136,6 @@ private static FieldType buildFieldType( } else if (MAP_TYPE.isSupertypeOf(genericType.getTypeRef())) { return new MapFieldType( xtypeId, - isMonomorphic, nullable, trackingRef, buildFieldType( @@ -152,14 +150,17 @@ private static FieldType buildFieldType( genericType.getTypeParameter1() == null ? GenericType.build(Object.class) : genericType.getTypeParameter1())); + } else if (TypeUtils.unwrap(rawType).isPrimitive()) { + // unified basic types for xlang and native mode + return new RegisteredFieldType(nullable, trackingRef, xtypeId); } else { if (isXlang && !Types.isUserDefinedType((byte) xtypeId) && resolver.isRegisteredById(rawType)) { - return new RegisteredFieldType(isMonomorphic, nullable, trackingRef, xtypeId); + return new RegisteredFieldType(nullable, trackingRef, xtypeId); } else if (!isXlang && resolver.isRegisteredById(rawType)) { Short classId = ((ClassResolver) resolver).getRegisteredClassId(rawType); - return new RegisteredFieldType(isMonomorphic, nullable, trackingRef, classId); + return new RegisteredFieldType(nullable, trackingRef, classId); } else { if (rawType.isEnum()) { return new EnumFieldType(nullable, xtypeId); @@ -172,42 +173,34 @@ private static FieldType buildFieldType( if (isXlang && !elemType.isPrimitive()) { return new CollectionFieldType( xtypeId, - isMonomorphic, nullable, trackingRef, buildFieldType(resolver, null, GenericType.build(elemType))); } - Tuple2, Integer> info = getArrayComponentInfo(rawType); + Tuple2, Integer> arrayComponentInfo = getArrayComponentInfo(rawType); return new ArrayFieldType( xtypeId, - isMonomorphic, nullable, trackingRef, - buildFieldType(resolver, null, GenericType.build(info.f0)), - info.f1); + buildFieldType(resolver, null, GenericType.build(arrayComponentInfo.f0)), + arrayComponentInfo.f1); } - return new ObjectFieldType(xtypeId, isMonomorphic, nullable, trackingRef); + return new ObjectFieldType(xtypeId, nullable, trackingRef); } } } public abstract static class FieldType implements Serializable { protected final int xtypeId; - protected final boolean isMonomorphic; protected final boolean nullable; protected final boolean trackingRef; - public FieldType(int xtypeId, boolean isMonomorphic, boolean nullable, boolean trackingRef) { - this.isMonomorphic = isMonomorphic; + public FieldType(int xtypeId, boolean nullable, boolean trackingRef) { this.trackingRef = trackingRef; this.nullable = nullable; this.xtypeId = xtypeId; } - public boolean isMonomorphic() { - return isMonomorphic; - } - public boolean trackingRef() { return trackingRef; } @@ -220,8 +213,6 @@ public boolean nullable() { * Convert a serializable field type to type token. If field type is a generic type with * generics, the generics will be built up recursively. The final leaf object type will be built * from class id or class stub. - * - * @see FinalObjectTypeStub */ public abstract TypeRef toTypeToken(TypeResolver classResolver, TypeRef declared); @@ -234,37 +225,34 @@ public boolean equals(Object o) { return false; } FieldType fieldType = (FieldType) o; - return isMonomorphic == fieldType.isMonomorphic - && trackingRef == fieldType.trackingRef - && nullable == fieldType.nullable; + return trackingRef == fieldType.trackingRef && nullable == fieldType.nullable; } @Override public int hashCode() { - return Objects.hash(isMonomorphic, nullable, trackingRef); + return Objects.hash(nullable, trackingRef); } /** Write field type info. */ public void write(MemoryBuffer buffer, boolean writeHeader) { - byte header = (byte) ((isMonomorphic ? 1 : 0) << 1); // header of nested generic fields in collection/map will be written independently - header |= (byte) (trackingRef ? 1 : 0); + byte header = (byte) (trackingRef ? 1 : 0); if (this instanceof RegisteredFieldType) { short classId = ((RegisteredFieldType) this).getClassId(); - buffer.writeVarUint32Small7(writeHeader ? ((5 + classId) << 2) | header : 5 + classId); + buffer.writeVarUint32Small7(writeHeader ? ((5 + classId) << 1) | header : 5 + classId); } else if (this instanceof EnumFieldType) { - buffer.writeVarUint32Small7(writeHeader ? ((4) << 2) | header : 4); + buffer.writeVarUint32Small7(writeHeader ? ((4) << 1) | header : 4); } else if (this instanceof ArrayFieldType) { ArrayFieldType arrayFieldType = (ArrayFieldType) this; - buffer.writeVarUint32Small7(writeHeader ? ((3) << 2) | header : 3); + buffer.writeVarUint32Small7(writeHeader ? ((3) << 1) | header : 3); buffer.writeVarUint32Small7(arrayFieldType.getDimensions()); (arrayFieldType).getComponentType().write(buffer); } else if (this instanceof CollectionFieldType) { - buffer.writeVarUint32Small7(writeHeader ? ((2) << 2) | header : 2); + buffer.writeVarUint32Small7(writeHeader ? ((2) << 1) | header : 2); // TODO remove it when new collection deserialization jit finished. ((CollectionFieldType) this).getElementType().write(buffer); } else if (this instanceof MapFieldType) { - buffer.writeVarUint32Small7(writeHeader ? ((1) << 2) | header : 1); + buffer.writeVarUint32Small7(writeHeader ? ((1) << 1) | header : 1); // TODO remove it when new map deserialization jit finished. MapFieldType mapFieldType = (MapFieldType) this; mapFieldType.getKeyType().write(buffer); @@ -281,34 +269,32 @@ public void write(MemoryBuffer buffer) { public static FieldType read(MemoryBuffer buffer, TypeResolver resolver) { int header = buffer.readVarUint32Small7(); - boolean isMonomorphic = (header & 0b10) != 0; boolean trackingRef = (header & 0b1) != 0; // For nested types (in collections/maps), nullable defaults to true - return read(buffer, resolver, isMonomorphic, true, trackingRef, header >>> 2); + return read(buffer, resolver, true, trackingRef, header >>> 1); } /** Read field type info. */ public static FieldType read( MemoryBuffer buffer, TypeResolver resolver, - boolean isFinal, boolean nullable, boolean trackingRef, int typeId) { if (typeId == 0) { - return new ObjectFieldType(-1, isFinal, nullable, trackingRef); + return new ObjectFieldType(-1, nullable, trackingRef); } else if (typeId == 1) { return new MapFieldType( - -1, isFinal, nullable, trackingRef, read(buffer, resolver), read(buffer, resolver)); + -1, nullable, trackingRef, read(buffer, resolver), read(buffer, resolver)); } else if (typeId == 2) { - return new CollectionFieldType(-1, isFinal, nullable, trackingRef, read(buffer, resolver)); + return new CollectionFieldType(-1, nullable, trackingRef, read(buffer, resolver)); } else if (typeId == 3) { int dims = buffer.readVarUint32Small7(); - return new ArrayFieldType(-1, isFinal, nullable, trackingRef, read(buffer, resolver), dims); + return new ArrayFieldType(-1, nullable, trackingRef, read(buffer, resolver), dims); } else if (typeId == 4) { return new EnumFieldType(nullable, -1); } else { - return new RegisteredFieldType(isFinal, nullable, trackingRef, (typeId - 5)); + return new RegisteredFieldType(nullable, trackingRef, (typeId - 5)); } } @@ -358,23 +344,17 @@ public static FieldType xread( switch (xtypeId & 0xff) { case Types.LIST: case Types.SET: - return new CollectionFieldType( - xtypeId, true, nullable, trackingRef, xread(buffer, resolver)); + return new CollectionFieldType(xtypeId, nullable, trackingRef, xread(buffer, resolver)); case Types.MAP: return new MapFieldType( - xtypeId, - true, - nullable, - trackingRef, - xread(buffer, resolver), - xread(buffer, resolver)); + xtypeId, nullable, trackingRef, xread(buffer, resolver), xread(buffer, resolver)); case Types.ENUM: case Types.NAMED_ENUM: return new EnumFieldType(nullable, xtypeId); case Types.UNION: return new UnionFieldType(nullable, trackingRef); case Types.UNKNOWN: - return new ObjectFieldType(xtypeId, false, nullable, trackingRef); + return new ObjectFieldType(xtypeId, nullable, trackingRef); default: { if (!Types.isUserDefinedType((byte) xtypeId)) { @@ -384,13 +364,11 @@ public static FieldType xread( // when remote sends a type ID that's not registered here. // Fall back to ObjectFieldType to handle gracefully. LOG.warn("Type {} not registered locally, treating as ObjectFieldType", xtypeId); - return new ObjectFieldType(xtypeId, false, nullable, trackingRef); + return new ObjectFieldType(xtypeId, nullable, trackingRef); } - Class cls = classInfo.getCls(); - return new RegisteredFieldType( - resolver.isMonomorphic(cls), nullable, trackingRef, xtypeId); + return new RegisteredFieldType(nullable, trackingRef, xtypeId); } else { - return new ObjectFieldType(xtypeId, false, nullable, trackingRef); + return new ObjectFieldType(xtypeId, nullable, trackingRef); } } } @@ -401,9 +379,8 @@ public static FieldType xread( public static class RegisteredFieldType extends FieldType { private final short classId; - public RegisteredFieldType( - boolean isFinal, boolean nullable, boolean trackingRef, int classId) { - super(classId, isFinal, nullable, trackingRef); + public RegisteredFieldType(boolean nullable, boolean trackingRef, int classId) { + super(classId, nullable, trackingRef); this.classId = (short) classId; } @@ -436,13 +413,23 @@ public TypeRef toTypeToken(TypeResolver resolver, TypeRef declared) { } } } else { - cls = ((ClassResolver) resolver).getRegisteredClass(classId); + if (Types.isPrimitiveType(classId)) { + cls = Types.getClassForTypeId(classId); + if (declared != null + && TypeUtils.unwrap(declared.getRawType()) == TypeUtils.unwrap(cls)) { + // we still need correct type, the `read/write` should use `nullable` of `Descriptor` + // for serialization + return declared; + } + } else { + cls = ((ClassResolver) resolver).getRegisteredClass(classId); + } } if (cls == null) { LOG.warn("Class {} not registered, take it as Struct type for deserialization.", classId); cls = NonexistentClass.NonexistentMetaShared.class; } - return TypeRef.of(cls, new TypeExtMeta(nullable, trackingRef)); + return TypeRef.of(cls, new TypeExtMeta(classId, nullable, trackingRef)); } @Override @@ -468,9 +455,7 @@ public int hashCode() { @Override public String toString() { return "RegisteredFieldType{" - + "isMonomorphic=" - + isMonomorphic() - + ", nullable=" + + "nullable=" + nullable() + ", trackingRef=" + trackingRef() @@ -492,12 +477,8 @@ public static class CollectionFieldType extends FieldType { private final FieldType elementType; public CollectionFieldType( - int xtypeId, - boolean isFinal, - boolean nullable, - boolean trackingRef, - FieldType elementType) { - super(xtypeId, isFinal, nullable, trackingRef); + int xtypeId, boolean nullable, boolean trackingRef, FieldType elementType) { + super(xtypeId, nullable, trackingRef); this.elementType = elementType; } @@ -527,10 +508,10 @@ public TypeRef toTypeToken(TypeResolver classResolver, TypeRef declared) { } TypeRef elementType = this.elementType.toTypeToken(classResolver, declElementType); if (declared == null) { - return collectionOf(elementType, new TypeExtMeta(nullable, trackingRef)); + return collectionOf(elementType, new TypeExtMeta(xtypeId, nullable, trackingRef)); } TypeRef> collectionTypeRef = - collectionOf(declaredClass, elementType, new TypeExtMeta(nullable, trackingRef)); + collectionOf(declaredClass, elementType, new TypeExtMeta(xtypeId, nullable, trackingRef)); if (!declaredClass.isArray()) { if (declElementType.equals(elementType)) { return declared; @@ -549,7 +530,7 @@ public TypeRef toTypeToken(TypeResolver classResolver, TypeRef declared) { TypeRef typeRef = TypeRef.of( Array.newInstance(arrayType.getRawType(), 1).getClass(), - typeRefs.get(i).getExtInfo()); + typeRefs.get(i).getTypeExtMeta()); typeRefs.set(i, typeRef); } return typeRefs.get(typeRefs.size() - 1); @@ -580,8 +561,6 @@ public String toString() { return "CollectionFieldType{" + "elementType=" + elementType - + ", isFinal=" - + isMonomorphic() + ", nullable=" + nullable() + ", trackingRef=" @@ -604,12 +583,11 @@ public static class MapFieldType extends FieldType { public MapFieldType( int xtypeId, - boolean isFinal, boolean nullable, boolean trackingRef, FieldType keyType, FieldType valueType) { - super(xtypeId, isFinal, nullable, trackingRef); + super(xtypeId, nullable, trackingRef); this.keyType = keyType; this.valueType = valueType; } @@ -643,12 +621,12 @@ public TypeRef toTypeToken(TypeResolver classResolver, TypeRef declared) { declared.getRawType(), keyType.toTypeToken(classResolver, keyDecl), valueType.toTypeToken(classResolver, valueDecl), - new TypeExtMeta(nullable, trackingRef)); + new TypeExtMeta(xtypeId, nullable, trackingRef)); } return mapOf( keyType.toTypeToken(classResolver, keyDecl), valueType.toTypeToken(classResolver, valueDecl), - new TypeExtMeta(nullable, trackingRef)); + new TypeExtMeta(xtypeId, nullable, trackingRef)); } @Override @@ -678,8 +656,6 @@ public String toString() { + keyType + ", valueType=" + valueType - + ", isFinal=" - + isMonomorphic() + ", nullable=" + nullable() + ", trackingRef=" @@ -690,7 +666,7 @@ public String toString() { public static class EnumFieldType extends FieldType { private EnumFieldType(boolean nullable, int xtypeId) { - super(xtypeId, true, nullable, false); + super(xtypeId, nullable, false); } @Override @@ -711,19 +687,17 @@ public static class ArrayFieldType extends FieldType { private final FieldType componentType; private final int dimensions; - public ArrayFieldType( - boolean isMonomorphic, boolean trackingRef, FieldType componentType, int dimensions) { - this(-1, isMonomorphic, true, trackingRef, componentType, dimensions); + public ArrayFieldType(boolean trackingRef, FieldType componentType, int dimensions) { + this(-1, true, trackingRef, componentType, dimensions); } public ArrayFieldType( int xtypeId, - boolean isMonomorphic, boolean nullable, boolean trackingRef, FieldType componentType, int dimensions) { - super(xtypeId, isMonomorphic, nullable, trackingRef); + super(xtypeId, nullable, trackingRef); this.componentType = componentType; this.dimensions = dimensions; } @@ -737,15 +711,13 @@ public TypeRef toTypeToken(TypeResolver classResolver, TypeRef declared) { Class componentRawType = componentTypeRef.getRawType(); if (NonexistentClass.class.isAssignableFrom(componentRawType)) { return TypeRef.of( - // We embed `isMonomorphic` flag in ObjectArraySerializer, so this flag can be ignored - // here. NonexistentClass.getNonexistentClass( componentType instanceof EnumFieldType, dimensions, true), - new TypeExtMeta(nullable, trackingRef)); + new TypeExtMeta(xtypeId, nullable, trackingRef)); } else { return TypeRef.of( Array.newInstance(componentRawType, new int[dimensions]).getClass(), - new TypeExtMeta(nullable, trackingRef)); + new TypeExtMeta(xtypeId, nullable, trackingRef)); } } @@ -784,8 +756,6 @@ public String toString() { + componentType + ", dimensions=" + dimensions - + ", isMonomorphic=" - + isMonomorphic + ", nullable=" + nullable + ", trackingRef=" @@ -797,15 +767,13 @@ public String toString() { /** Class for field type which isn't registered and not collection/map type too. */ public static class ObjectFieldType extends FieldType { - public ObjectFieldType(int xtypeId, boolean isFinal, boolean nullable, boolean trackingRef) { - super(xtypeId, isFinal, nullable, trackingRef); + public ObjectFieldType(int xtypeId, boolean nullable, boolean trackingRef) { + super(xtypeId, nullable, trackingRef); } @Override public TypeRef toTypeToken(TypeResolver classResolver, TypeRef declared) { - return isMonomorphic() - ? TypeRef.of(FinalObjectTypeStub.class, new TypeExtMeta(nullable, trackingRef)) - : TypeRef.of(Object.class, new TypeExtMeta(nullable, trackingRef)); + return TypeRef.of(Object.class, new TypeExtMeta(xtypeId, nullable, trackingRef)); } @Override @@ -823,8 +791,6 @@ public String toString() { return "ObjectFieldType{" + "xtypeId=" + xtypeId - + ", isMonomorphic=" - + isMonomorphic + ", nullable=" + nullable + ", trackingRef=" @@ -833,11 +799,11 @@ public String toString() { } } - /** Class for Union field type. Union types are always monomorphic and use declared type. */ + /** Class for Union field type. Union types use declared type. */ public static class UnionFieldType extends FieldType { public UnionFieldType(boolean nullable, boolean trackingRef) { - super(Types.UNION, true, nullable, trackingRef); + super(Types.UNION, nullable, trackingRef); } @Override @@ -848,7 +814,7 @@ public TypeRef toTypeToken(TypeResolver classResolver, TypeRef declared) { } // Fallback to base Union class if no declared type return TypeRef.of( - org.apache.fory.type.union.Union.class, new TypeExtMeta(nullable, trackingRef)); + org.apache.fory.type.union.Union.class, new TypeExtMeta(xtypeId, nullable, trackingRef)); } @Override diff --git a/java/fory-core/src/main/java/org/apache/fory/meta/TypeExtMeta.java b/java/fory-core/src/main/java/org/apache/fory/meta/TypeExtMeta.java index 90b17daa7f..19e37bed70 100644 --- a/java/fory-core/src/main/java/org/apache/fory/meta/TypeExtMeta.java +++ b/java/fory-core/src/main/java/org/apache/fory/meta/TypeExtMeta.java @@ -20,14 +20,20 @@ package org.apache.fory.meta; public class TypeExtMeta { + private final int typeId; private final boolean nullable; private final boolean trackingRef; - TypeExtMeta(boolean nullable, boolean trackingRef) { + TypeExtMeta(int typeId, boolean nullable, boolean trackingRef) { + this.typeId = typeId; this.nullable = nullable; this.trackingRef = trackingRef; } + public int typeId() { + return typeId; + } + public boolean nullable() { return nullable; } @@ -38,6 +44,13 @@ public boolean trackingRef() { @Override public String toString() { - return "TypeExtMeta{" + "nullable=" + nullable + ", trackingRef=" + trackingRef + '}'; + return "TypeExtMeta{" + + "typeId=" + + typeId + + ", nullable=" + + nullable + + ", trackingRef=" + + trackingRef + + '}'; } } diff --git a/java/fory-core/src/main/java/org/apache/fory/reflect/TypeRef.java b/java/fory-core/src/main/java/org/apache/fory/reflect/TypeRef.java index 321b01a148..dbe5c73fbf 100644 --- a/java/fory-core/src/main/java/org/apache/fory/reflect/TypeRef.java +++ b/java/fory-core/src/main/java/org/apache/fory/reflect/TypeRef.java @@ -34,13 +34,14 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.CheckForNull; +import org.apache.fory.meta.TypeExtMeta; import org.apache.fory.type.TypeUtils; // Mostly derived from Guava 32.1.2 com.google.common.reflect.TypeToken // https://github.com/google/guava/blob/9f6a3840/guava/src/com/google/common/reflect/TypeToken.java public class TypeRef { private final Type type; - private final Object extInfo; + private final TypeExtMeta typeExtMeta; private transient Class rawType; private transient Map typeMappings; @@ -58,27 +59,27 @@ public class TypeRef { */ protected TypeRef() { this.type = capture(); - this.extInfo = null; + this.typeExtMeta = null; } - protected TypeRef(Object extInfo) { + protected TypeRef(TypeExtMeta typeExtMeta) { this.type = capture(); - this.extInfo = extInfo; + this.typeExtMeta = typeExtMeta; } private TypeRef(Class declaringClass) { this.type = declaringClass; - this.extInfo = null; + this.typeExtMeta = null; } - private TypeRef(Class declaringClass, Object extInfo) { + private TypeRef(Class declaringClass, TypeExtMeta typeExtMeta) { this.type = declaringClass; - this.extInfo = extInfo; + this.typeExtMeta = typeExtMeta; } private TypeRef(Type type) { this.type = type; - this.extInfo = null; + this.typeExtMeta = null; } /** Returns an instance of type token that wraps {@code type}. */ @@ -86,7 +87,7 @@ public static TypeRef of(Class clazz) { return new TypeRef<>(clazz); } - public static TypeRef of(Class clazz, Object extInfo) { + public static TypeRef of(Class clazz, TypeExtMeta extInfo) { return new TypeRef<>(clazz, extInfo); } @@ -158,8 +159,8 @@ private static Stream> getRawTypes(Type... types) { }); } - public Object getExtInfo() { - return extInfo; + public TypeExtMeta getTypeExtMeta() { + return typeExtMeta; } /** Returns true if this type is one of the primitive types (including {@code void}). */ diff --git a/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java b/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java index c026bdeaa7..85b04eb6e8 100644 --- a/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java +++ b/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java @@ -147,6 +147,7 @@ import org.apache.fory.type.DescriptorGrouper; import org.apache.fory.type.GenericType; import org.apache.fory.type.TypeUtils; +import org.apache.fory.type.Types; import org.apache.fory.type.union.Union; import org.apache.fory.util.GraalvmSupport; import org.apache.fory.util.Preconditions; @@ -192,52 +193,43 @@ public class ClassResolver extends TypeResolver { /** Flag value indicating no class ID has been assigned. */ public static final short NO_CLASS_ID = TypeResolver.NO_CLASS_ID; - public static final short LAMBDA_STUB_ID = 1; - public static final short JDK_PROXY_STUB_ID = 2; - public static final short REPLACE_STUB_ID = 3; - /** - * Base offset for user-registered class IDs. User IDs are internally stored as (userId + {@value - * #USER_ID_BASE}). The first {@value #USER_ID_BASE} IDs (0 to {@value #USER_ID_BASE} - 1) are - * reserved for Fory's internal types. + * Base offset for user-registered class IDs. User IDs are internally stored as `userId + + * USER_ID_BASE`. 0 to `USER_ID_BASE` are reserved for Fory's internal types. */ public static final short USER_ID_BASE = 256; + public static final int NATIVE_START_ID = Types.STRING + 1; + public static final int VOID_ID = NATIVE_START_ID; + public static final int CHAR_ID = NATIVE_START_ID + 1; // Note: following pre-defined class id should be continuous, since they may be used based range. - public static final short PRIMITIVE_VOID_CLASS_ID = (short) (REPLACE_STUB_ID + 1); - public static final short PRIMITIVE_BOOLEAN_CLASS_ID = (short) (PRIMITIVE_VOID_CLASS_ID + 1); - public static final short PRIMITIVE_BYTE_CLASS_ID = (short) (PRIMITIVE_VOID_CLASS_ID + 2); - public static final short PRIMITIVE_CHAR_CLASS_ID = (short) (PRIMITIVE_VOID_CLASS_ID + 3); - public static final short PRIMITIVE_SHORT_CLASS_ID = (short) (PRIMITIVE_VOID_CLASS_ID + 4); - public static final short PRIMITIVE_INT_CLASS_ID = (short) (PRIMITIVE_VOID_CLASS_ID + 5); - public static final short PRIMITIVE_FLOAT_CLASS_ID = (short) (PRIMITIVE_VOID_CLASS_ID + 6); - public static final short PRIMITIVE_LONG_CLASS_ID = (short) (PRIMITIVE_VOID_CLASS_ID + 7); - public static final short PRIMITIVE_DOUBLE_CLASS_ID = (short) (PRIMITIVE_VOID_CLASS_ID + 8); - public static final short VOID_CLASS_ID = (short) (PRIMITIVE_DOUBLE_CLASS_ID + 1); - public static final short BOOLEAN_CLASS_ID = (short) (VOID_CLASS_ID + 1); - public static final short BYTE_CLASS_ID = (short) (VOID_CLASS_ID + 2); - public static final short CHAR_CLASS_ID = (short) (VOID_CLASS_ID + 3); - public static final short SHORT_CLASS_ID = (short) (VOID_CLASS_ID + 4); - public static final short INTEGER_CLASS_ID = (short) (VOID_CLASS_ID + 5); - public static final short FLOAT_CLASS_ID = (short) (VOID_CLASS_ID + 6); - public static final short LONG_CLASS_ID = (short) (VOID_CLASS_ID + 7); - public static final short DOUBLE_CLASS_ID = (short) (VOID_CLASS_ID + 8); - public static final short STRING_CLASS_ID = (short) (VOID_CLASS_ID + 9); - public static final short PRIMITIVE_BOOLEAN_ARRAY_CLASS_ID = (short) (STRING_CLASS_ID + 1); - public static final short PRIMITIVE_BYTE_ARRAY_CLASS_ID = (short) (STRING_CLASS_ID + 2); - public static final short PRIMITIVE_CHAR_ARRAY_CLASS_ID = (short) (STRING_CLASS_ID + 3); - public static final short PRIMITIVE_SHORT_ARRAY_CLASS_ID = (short) (STRING_CLASS_ID + 4); - public static final short PRIMITIVE_INT_ARRAY_CLASS_ID = (short) (STRING_CLASS_ID + 5); - public static final short PRIMITIVE_FLOAT_ARRAY_CLASS_ID = (short) (STRING_CLASS_ID + 6); - public static final short PRIMITIVE_LONG_ARRAY_CLASS_ID = (short) (STRING_CLASS_ID + 7); - public static final short PRIMITIVE_DOUBLE_ARRAY_CLASS_ID = (short) (STRING_CLASS_ID + 8); - public static final short STRING_ARRAY_CLASS_ID = (short) (PRIMITIVE_DOUBLE_ARRAY_CLASS_ID + 1); - public static final short OBJECT_ARRAY_CLASS_ID = (short) (PRIMITIVE_DOUBLE_ARRAY_CLASS_ID + 2); - public static final short ARRAYLIST_CLASS_ID = (short) (PRIMITIVE_DOUBLE_ARRAY_CLASS_ID + 3); - public static final short HASHMAP_CLASS_ID = (short) (PRIMITIVE_DOUBLE_ARRAY_CLASS_ID + 4); - public static final short HASHSET_CLASS_ID = (short) (PRIMITIVE_DOUBLE_ARRAY_CLASS_ID + 5); - public static final short CLASS_CLASS_ID = (short) (PRIMITIVE_DOUBLE_ARRAY_CLASS_ID + 6); - public static final short EMPTY_OBJECT_ID = (short) (PRIMITIVE_DOUBLE_ARRAY_CLASS_ID + 7); + public static final int PRIMITIVE_VOID_ID = NATIVE_START_ID + 2; + public static final int PRIMITIVE_BOOL_ID = NATIVE_START_ID + 3; + public static final int PRIMITIVE_INT8_ID = NATIVE_START_ID + 4; + public static final int PRIMITIVE_CHAR_ID = NATIVE_START_ID + 5; + public static final int PRIMITIVE_INT16_ID = NATIVE_START_ID + 6; + public static final int PRIMITIVE_INT32_ID = NATIVE_START_ID + 7; + public static final int PRIMITIVE_FLOAT32_ID = NATIVE_START_ID + 8; + public static final int PRIMITIVE_INT64_ID = NATIVE_START_ID + 9; + public static final int PRIMITIVE_FLOAT64_ID = NATIVE_START_ID + 10; + public static final int PRIMITIVE_BOOLEAN_ARRAY_ID = NATIVE_START_ID + 11; + public static final int PRIMITIVE_BYTE_ARRAY_ID = NATIVE_START_ID + 12; + public static final int PRIMITIVE_CHAR_ARRAY_ID = NATIVE_START_ID + 13; + public static final int PRIMITIVE_SHORT_ARRAY_ID = NATIVE_START_ID + 14; + public static final int PRIMITIVE_INT_ARRAY_ID = NATIVE_START_ID + 15; + public static final int PRIMITIVE_FLOAT_ARRAY_ID = NATIVE_START_ID + 16; + public static final int PRIMITIVE_LONG_ARRAY_ID = NATIVE_START_ID + 17; + public static final int PRIMITIVE_DOUBLE_ARRAY_ID = NATIVE_START_ID + 18; + public static final int STRING_ARRAY_ID = NATIVE_START_ID + 19; + public static final int OBJECT_ARRAY_ID = NATIVE_START_ID + 20; + public static final int ARRAYLIST_ID = NATIVE_START_ID + 21; + public static final int HASHMAP_ID = NATIVE_START_ID + 22; + public static final int HASHSET_ID = NATIVE_START_ID + 23; + public static final int CLASS_ID = NATIVE_START_ID + 24; + public static final int EMPTY_OBJECT_ID = NATIVE_START_ID + 25; + public static final short LAMBDA_STUB_ID = NATIVE_START_ID + 26; + public static final short JDK_PROXY_STUB_ID = NATIVE_START_ID + 27; + public static final short REPLACE_STUB_ID = NATIVE_START_ID + 28; private final Fory fory; XtypeResolver xtypeResolver; @@ -266,39 +258,39 @@ public void initialize() { registerInternal(LambdaSerializer.ReplaceStub.class, LAMBDA_STUB_ID); registerInternal(JdkProxySerializer.ReplaceStub.class, JDK_PROXY_STUB_ID); registerInternal(ReplaceResolveSerializer.ReplaceStub.class, REPLACE_STUB_ID); - registerInternal(void.class, PRIMITIVE_VOID_CLASS_ID); - registerInternal(boolean.class, PRIMITIVE_BOOLEAN_CLASS_ID); - registerInternal(byte.class, PRIMITIVE_BYTE_CLASS_ID); - registerInternal(char.class, PRIMITIVE_CHAR_CLASS_ID); - registerInternal(short.class, PRIMITIVE_SHORT_CLASS_ID); - registerInternal(int.class, PRIMITIVE_INT_CLASS_ID); - registerInternal(float.class, PRIMITIVE_FLOAT_CLASS_ID); - registerInternal(long.class, PRIMITIVE_LONG_CLASS_ID); - registerInternal(double.class, PRIMITIVE_DOUBLE_CLASS_ID); - registerInternal(Void.class, VOID_CLASS_ID); - registerInternal(Boolean.class, BOOLEAN_CLASS_ID); - registerInternal(Byte.class, BYTE_CLASS_ID); - registerInternal(Character.class, CHAR_CLASS_ID); - registerInternal(Short.class, SHORT_CLASS_ID); - registerInternal(Integer.class, INTEGER_CLASS_ID); - registerInternal(Float.class, FLOAT_CLASS_ID); - registerInternal(Long.class, LONG_CLASS_ID); - registerInternal(Double.class, DOUBLE_CLASS_ID); - registerInternal(String.class, STRING_CLASS_ID); - registerInternal(boolean[].class, PRIMITIVE_BOOLEAN_ARRAY_CLASS_ID); - registerInternal(byte[].class, PRIMITIVE_BYTE_ARRAY_CLASS_ID); - registerInternal(char[].class, PRIMITIVE_CHAR_ARRAY_CLASS_ID); - registerInternal(short[].class, PRIMITIVE_SHORT_ARRAY_CLASS_ID); - registerInternal(int[].class, PRIMITIVE_INT_ARRAY_CLASS_ID); - registerInternal(float[].class, PRIMITIVE_FLOAT_ARRAY_CLASS_ID); - registerInternal(long[].class, PRIMITIVE_LONG_ARRAY_CLASS_ID); - registerInternal(double[].class, PRIMITIVE_DOUBLE_ARRAY_CLASS_ID); - registerInternal(String[].class, STRING_ARRAY_CLASS_ID); - registerInternal(Object[].class, OBJECT_ARRAY_CLASS_ID); - registerInternal(ArrayList.class, ARRAYLIST_CLASS_ID); - registerInternal(HashMap.class, HASHMAP_CLASS_ID); - registerInternal(HashSet.class, HASHSET_CLASS_ID); - registerInternal(Class.class, CLASS_CLASS_ID); + registerInternal(void.class, PRIMITIVE_VOID_ID); + registerInternal(boolean.class, PRIMITIVE_BOOL_ID); + registerInternal(byte.class, PRIMITIVE_INT8_ID); + registerInternal(char.class, PRIMITIVE_CHAR_ID); + registerInternal(short.class, PRIMITIVE_INT16_ID); + registerInternal(int.class, PRIMITIVE_INT32_ID); + registerInternal(float.class, PRIMITIVE_FLOAT32_ID); + registerInternal(long.class, PRIMITIVE_INT64_ID); + registerInternal(double.class, PRIMITIVE_FLOAT64_ID); + registerInternal(Void.class, VOID_ID); + registerInternal(Boolean.class, Types.BOOL); + registerInternal(Byte.class, Types.INT8); + registerInternal(Character.class, CHAR_ID); + registerInternal(Short.class, Types.INT16); + registerInternal(Integer.class, Types.INT32); + registerInternal(Float.class, Types.FLOAT32); + registerInternal(Long.class, Types.INT64); + registerInternal(Double.class, Types.FLOAT64); + registerInternal(String.class, Types.STRING); + registerInternal(boolean[].class, PRIMITIVE_BOOLEAN_ARRAY_ID); + registerInternal(byte[].class, PRIMITIVE_BYTE_ARRAY_ID); + registerInternal(char[].class, PRIMITIVE_CHAR_ARRAY_ID); + registerInternal(short[].class, PRIMITIVE_SHORT_ARRAY_ID); + registerInternal(int[].class, PRIMITIVE_INT_ARRAY_ID); + registerInternal(float[].class, PRIMITIVE_FLOAT_ARRAY_ID); + registerInternal(long[].class, PRIMITIVE_LONG_ARRAY_ID); + registerInternal(double[].class, PRIMITIVE_DOUBLE_ARRAY_ID); + registerInternal(String[].class, STRING_ARRAY_ID); + registerInternal(Object[].class, OBJECT_ARRAY_ID); + registerInternal(ArrayList.class, ARRAYLIST_ID); + registerInternal(HashMap.class, HASHMAP_ID); + registerInternal(HashSet.class, HASHSET_ID); + registerInternal(Class.class, CLASS_ID); registerInternal(Object.class, EMPTY_OBJECT_ID); registerCommonUsedClasses(); registerDefaultClasses(); @@ -1476,9 +1468,9 @@ private boolean isSecure(Class cls) { public void writeClassAndUpdateCache(MemoryBuffer buffer, Class cls) { // fast path for common type if (cls == Integer.class) { - buffer.writeVarUint32Small7(INTEGER_CLASS_ID << 1); + buffer.writeVarUint32Small7(Types.INT32 << 1); } else if (cls == Long.class) { - buffer.writeVarUint32Small7(LONG_CLASS_ID << 1); + buffer.writeVarUint32Small7(Types.INT64 << 1); } else { writeClassInfo(buffer, getOrUpdateClassInfo(cls)); } @@ -1872,7 +1864,7 @@ public ClassInfoHolder nilClassInfoHolder() { } public boolean isPrimitive(short classId) { - return classId >= PRIMITIVE_VOID_CLASS_ID && classId <= PRIMITIVE_DOUBLE_CLASS_ID; + return classId >= PRIMITIVE_VOID_ID && classId <= PRIMITIVE_FLOAT64_ID; } public CodeGenerator getCodeGenerator(ClassLoader... loaders) { diff --git a/java/fory-core/src/main/java/org/apache/fory/resolver/TypeResolver.java b/java/fory-core/src/main/java/org/apache/fory/resolver/TypeResolver.java index 73bd5f7ec9..25d8743f7b 100644 --- a/java/fory-core/src/main/java/org/apache/fory/resolver/TypeResolver.java +++ b/java/fory-core/src/main/java/org/apache/fory/resolver/TypeResolver.java @@ -83,7 +83,7 @@ import org.apache.fory.util.function.Functions; // Internal type dispatcher. -// Do not use this interface outside of fory package +// Do not use this interface outside fory package @Internal @SuppressWarnings({"rawtypes", "unchecked"}) public abstract class TypeResolver { @@ -183,9 +183,8 @@ public abstract void registerSerializer( * ignored too. */ public final boolean needToWriteRef(TypeRef typeRef) { - Object extInfo = typeRef.getExtInfo(); - if (extInfo instanceof TypeExtMeta) { - TypeExtMeta meta = (TypeExtMeta) extInfo; + TypeExtMeta meta = typeRef.getTypeExtMeta(); + if (meta != null) { return meta.trackingRef(); } Class cls = typeRef.getRawType(); diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/AbstractObjectSerializer.java b/java/fory-core/src/main/java/org/apache/fory/serializer/AbstractObjectSerializer.java index 6c0a0ee193..2ab84d1c26 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/AbstractObjectSerializer.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/AbstractObjectSerializer.java @@ -37,8 +37,10 @@ import org.apache.fory.resolver.ClassResolver; import org.apache.fory.resolver.RefResolver; import org.apache.fory.resolver.TypeResolver; +import org.apache.fory.serializer.FieldGroups.SerializationFieldInfo; import org.apache.fory.type.Descriptor; import org.apache.fory.type.DescriptorGrouper; +import org.apache.fory.type.DispatchId; import org.apache.fory.type.Generics; import org.apache.fory.util.record.RecordComponent; import org.apache.fory.util.record.RecordInfo; @@ -50,7 +52,7 @@ public abstract class AbstractObjectSerializer extends Serializer { protected final TypeResolver typeResolver; protected final boolean isRecord; protected final ObjectCreator objectCreator; - private FieldGroups.SerializationFieldInfo[] fieldInfos; + private SerializationFieldInfo[] fieldInfos; private RecordInfo copyRecordInfo; public AbstractObjectSerializer(Fory fory, Class type) { @@ -69,7 +71,7 @@ public AbstractObjectSerializer(Fory fory, Class type, ObjectCreator objec static void writeOtherFieldValue( SerializationBinding binding, MemoryBuffer buffer, - FieldGroups.SerializationFieldInfo fieldInfo, + SerializationFieldInfo fieldInfo, Object fieldValue) { if (fieldInfo.useDeclaredTypeInfo) { switch (fieldInfo.refMode) { @@ -117,7 +119,7 @@ static void writeContainerFieldValue( RefResolver refResolver, TypeResolver typeResolver, Generics generics, - FieldGroups.SerializationFieldInfo fieldInfo, + SerializationFieldInfo fieldInfo, MemoryBuffer buffer, Object fieldValue) { switch (fieldInfo.refMode) { @@ -159,48 +161,59 @@ static void writeContainerFieldValue( /** * Write a primitive field value to buffer using direct memory offset access. * - * @param fory the fory instance for compression settings * @param buffer the buffer to write to * @param targetObject the object containing the field * @param fieldOffset the memory offset of the field - * @param classId the class ID of the primitive type - * @return true if classId is not a primitive type and needs further write handling + * @param dispatchId the class ID of the primitive type + * @return true if dispatchId is not a primitive type and needs further write handling */ static boolean writePrimitiveFieldValue( - Fory fory, MemoryBuffer buffer, Object targetObject, long fieldOffset, short classId) { - switch (classId) { - case ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID: + MemoryBuffer buffer, Object targetObject, long fieldOffset, int dispatchId) { + switch (dispatchId) { + case DispatchId.PRIMITIVE_BOOL: buffer.writeBoolean(Platform.getBoolean(targetObject, fieldOffset)); return false; - case ClassResolver.PRIMITIVE_BYTE_CLASS_ID: + case DispatchId.PRIMITIVE_INT8: + case DispatchId.PRIMITIVE_UINT8: buffer.writeByte(Platform.getByte(targetObject, fieldOffset)); return false; - case ClassResolver.PRIMITIVE_CHAR_CLASS_ID: + case DispatchId.PRIMITIVE_CHAR: buffer.writeChar(Platform.getChar(targetObject, fieldOffset)); return false; - case ClassResolver.PRIMITIVE_SHORT_CLASS_ID: + case DispatchId.PRIMITIVE_INT16: + case DispatchId.PRIMITIVE_UINT16: buffer.writeInt16(Platform.getShort(targetObject, fieldOffset)); return false; - case ClassResolver.PRIMITIVE_INT_CLASS_ID: - { - int fieldValue = Platform.getInt(targetObject, fieldOffset); - if (fory.compressInt()) { - buffer.writeVarInt32(fieldValue); - } else { - buffer.writeInt32(fieldValue); - } - return false; - } - case ClassResolver.PRIMITIVE_FLOAT_CLASS_ID: + case DispatchId.PRIMITIVE_INT32: + case DispatchId.PRIMITIVE_UINT32: + buffer.writeInt32(Platform.getInt(targetObject, fieldOffset)); + return false; + case DispatchId.PRIMITIVE_VARINT32: + buffer.writeVarInt32(Platform.getInt(targetObject, fieldOffset)); + return false; + case DispatchId.PRIMITIVE_VAR_UINT32: + buffer.writeVarUint32(Platform.getInt(targetObject, fieldOffset)); + return false; + case DispatchId.PRIMITIVE_FLOAT32: buffer.writeFloat32(Platform.getFloat(targetObject, fieldOffset)); return false; - case ClassResolver.PRIMITIVE_LONG_CLASS_ID: - { - long fieldValue = Platform.getLong(targetObject, fieldOffset); - fory.writeInt64(buffer, fieldValue); - return false; - } - case ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID: + case DispatchId.PRIMITIVE_INT64: + case DispatchId.PRIMITIVE_UINT64: + buffer.writeInt64(Platform.getLong(targetObject, fieldOffset)); + return false; + case DispatchId.PRIMITIVE_VARINT64: + buffer.writeVarInt64(Platform.getLong(targetObject, fieldOffset)); + return false; + case DispatchId.PRIMITIVE_TAGGED_INT64: + buffer.writeTaggedInt64(Platform.getLong(targetObject, fieldOffset)); + return false; + case DispatchId.PRIMITIVE_VAR_UINT64: + buffer.writeVarUint64(Platform.getLong(targetObject, fieldOffset)); + return false; + case DispatchId.PRIMITIVE_TAGGED_UINT64: + buffer.writeTaggedUint64(Platform.getLong(targetObject, fieldOffset)); + return false; + case DispatchId.PRIMITIVE_FLOAT64: buffer.writeFloat64(Platform.getDouble(targetObject, fieldOffset)); return false; default: @@ -211,56 +224,63 @@ static boolean writePrimitiveFieldValue( /** * Write a primitive field value to buffer using the field accessor. * - * @param fory the fory instance for compression settings * @param buffer the buffer to write to * @param targetObject the object containing the field * @param fieldAccessor the accessor to get the field value - * @param classId the class ID of the primitive type - * @return true if classId is not a primitive type and needs further write handling + * @param dispatchId the class ID of the primitive type + * @return true if dispatchId is not a primitive type and needs further write handling */ static boolean writePrimitiveFieldValue( - Fory fory, - MemoryBuffer buffer, - Object targetObject, - FieldAccessor fieldAccessor, - short classId) { + MemoryBuffer buffer, Object targetObject, FieldAccessor fieldAccessor, int dispatchId) { long fieldOffset = fieldAccessor.getFieldOffset(); if (fieldOffset != -1) { - return writePrimitiveFieldValue(fory, buffer, targetObject, fieldOffset, classId); + return writePrimitiveFieldValue(buffer, targetObject, fieldOffset, dispatchId); } - switch (classId) { - case ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID: + switch (dispatchId) { + case DispatchId.PRIMITIVE_BOOL: buffer.writeBoolean((Boolean) fieldAccessor.get(targetObject)); return false; - case ClassResolver.PRIMITIVE_BYTE_CLASS_ID: + case DispatchId.PRIMITIVE_INT8: + case DispatchId.PRIMITIVE_UINT8: buffer.writeByte((Byte) fieldAccessor.get(targetObject)); return false; - case ClassResolver.PRIMITIVE_CHAR_CLASS_ID: + case DispatchId.PRIMITIVE_CHAR: buffer.writeChar((Character) fieldAccessor.get(targetObject)); return false; - case ClassResolver.PRIMITIVE_SHORT_CLASS_ID: + case DispatchId.PRIMITIVE_INT16: + case DispatchId.PRIMITIVE_UINT16: buffer.writeInt16((Short) fieldAccessor.get(targetObject)); return false; - case ClassResolver.PRIMITIVE_INT_CLASS_ID: - { - int fieldValue = (Integer) fieldAccessor.get(targetObject); - if (fory.compressInt()) { - buffer.writeVarInt32(fieldValue); - } else { - buffer.writeInt32(fieldValue); - } - return false; - } - case ClassResolver.PRIMITIVE_FLOAT_CLASS_ID: + case DispatchId.PRIMITIVE_INT32: + case DispatchId.PRIMITIVE_UINT32: + buffer.writeInt32((Integer) fieldAccessor.get(targetObject)); + return false; + case DispatchId.PRIMITIVE_VARINT32: + buffer.writeVarInt32((Integer) fieldAccessor.get(targetObject)); + return false; + case DispatchId.PRIMITIVE_VAR_UINT32: + buffer.writeVarUint32((Integer) fieldAccessor.get(targetObject)); + return false; + case DispatchId.PRIMITIVE_FLOAT32: buffer.writeFloat32((Float) fieldAccessor.get(targetObject)); return false; - case ClassResolver.PRIMITIVE_LONG_CLASS_ID: - { - long fieldValue = (long) fieldAccessor.get(targetObject); - fory.writeInt64(buffer, fieldValue); - return false; - } - case ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID: + case DispatchId.PRIMITIVE_INT64: + case DispatchId.PRIMITIVE_UINT64: + buffer.writeInt64((Long) fieldAccessor.get(targetObject)); + return false; + case DispatchId.PRIMITIVE_VARINT64: + buffer.writeVarInt64((Long) fieldAccessor.get(targetObject)); + return false; + case DispatchId.PRIMITIVE_TAGGED_INT64: + buffer.writeTaggedInt64((Long) fieldAccessor.get(targetObject)); + return false; + case DispatchId.PRIMITIVE_VAR_UINT64: + buffer.writeVarUint64((Long) fieldAccessor.get(targetObject)); + return false; + case DispatchId.PRIMITIVE_TAGGED_UINT64: + buffer.writeTaggedUint64((Long) fieldAccessor.get(targetObject)); + return false; + case DispatchId.PRIMITIVE_FLOAT64: buffer.writeFloat64((Double) fieldAccessor.get(targetObject)); return false; default: @@ -274,7 +294,7 @@ static boolean writePrimitiveFieldValue( * @return true if field value isn't written by this function. */ static boolean writeBasicObjectFieldValue( - Fory fory, MemoryBuffer buffer, Object fieldValue, short classId) { + Fory fory, MemoryBuffer buffer, Object fieldValue, int dispatchId) { if (fieldValue == null) { throw new IllegalArgumentException( "Non-nullable field has null value. In xlang mode, fields are non-nullable by default. " @@ -284,8 +304,8 @@ static boolean writeBasicObjectFieldValue( return true; // let common path handle this. } // add time types serialization here. - switch (classId) { - case ClassResolver.STRING_CLASS_ID: // fastpath for string. + switch (dispatchId) { + case DispatchId.STRING: // fastpath for string. String stringValue = (String) (fieldValue); if (fory.getStringSerializer().needToWriteRef()) { fory.writeJavaStringRef(buffer, stringValue); @@ -293,50 +313,52 @@ static boolean writeBasicObjectFieldValue( fory.writeString(buffer, stringValue); } return false; - case ClassResolver.BOOLEAN_CLASS_ID: - { - buffer.writeBoolean((Boolean) fieldValue); - return false; - } - case ClassResolver.BYTE_CLASS_ID: - { - buffer.writeByte((Byte) fieldValue); - return false; - } - case ClassResolver.CHAR_CLASS_ID: - { - buffer.writeChar((Character) fieldValue); - return false; - } - case ClassResolver.SHORT_CLASS_ID: - { - buffer.writeInt16((Short) fieldValue); - return false; - } - case ClassResolver.INTEGER_CLASS_ID: - { - if (fory.compressInt()) { - buffer.writeVarInt32((Integer) fieldValue); - } else { - buffer.writeInt32((Integer) fieldValue); - } - return false; - } - case ClassResolver.FLOAT_CLASS_ID: - { - buffer.writeFloat32((Float) fieldValue); - return false; - } - case ClassResolver.LONG_CLASS_ID: - { - fory.writeInt64(buffer, (Long) fieldValue); - return false; - } - case ClassResolver.DOUBLE_CLASS_ID: - { - buffer.writeFloat64((Double) fieldValue); - return false; - } + case DispatchId.BOOL: + buffer.writeBoolean((Boolean) fieldValue); + return false; + case DispatchId.INT8: + case DispatchId.UINT8: + buffer.writeByte((Byte) fieldValue); + return false; + case DispatchId.CHAR: + buffer.writeChar((Character) fieldValue); + return false; + case DispatchId.INT16: + case DispatchId.UINT16: + buffer.writeInt16((Short) fieldValue); + return false; + case DispatchId.INT32: + case DispatchId.UINT32: + buffer.writeInt32((Integer) fieldValue); + return false; + case DispatchId.VARINT32: + buffer.writeVarInt32((Integer) fieldValue); + return false; + case DispatchId.VAR_UINT32: + buffer.writeVarUint32((Integer) fieldValue); + return false; + case DispatchId.INT64: + case DispatchId.UINT64: + buffer.writeInt64((Long) fieldValue); + return false; + case DispatchId.VARINT64: + buffer.writeVarInt64((Long) fieldValue); + return false; + case DispatchId.TAGGED_INT64: + buffer.writeTaggedInt64((Long) fieldValue); + return false; + case DispatchId.VAR_UINT64: + buffer.writeVarUint64((Long) fieldValue); + return false; + case DispatchId.TAGGED_UINT64: + buffer.writeTaggedUint64((Long) fieldValue); + return false; + case DispatchId.FLOAT32: + buffer.writeFloat32((Float) fieldValue); + return false; + case DispatchId.FLOAT64: + buffer.writeFloat64((Double) fieldValue); + return false; default: return true; } @@ -349,104 +371,136 @@ static boolean writeBasicObjectFieldValue( * @param fory the fory instance for compression and ref tracking settings * @param buffer the buffer to write to * @param fieldValue the field value to write (may be null) - * @param classId the class ID of the boxed type - * @return true if classId is not a basic type or ref tracking is enabled, needing further write - * handling + * @param dispatchId the class ID of the boxed type + * @return true if dispatchId is not a basic type or ref tracking is enabled, needing further + * write handling */ static boolean writeBasicNullableObjectFieldValue( - Fory fory, MemoryBuffer buffer, Object fieldValue, short classId) { + Fory fory, MemoryBuffer buffer, Object fieldValue, int dispatchId) { if (!fory.isBasicTypesRefIgnored()) { return true; // let common path handle this. } // add time types serialization here. - switch (classId) { - case ClassResolver.STRING_CLASS_ID: // fastpath for string. + switch (dispatchId) { + case DispatchId.STRING: // fastpath for string. fory.writeJavaStringRef(buffer, (String) (fieldValue)); return false; - case ClassResolver.BOOLEAN_CLASS_ID: - { - if (fieldValue == null) { - buffer.writeByte(Fory.NULL_FLAG); - } else { - buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); - buffer.writeBoolean((Boolean) (fieldValue)); - } - return false; + case DispatchId.BOOL: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeBoolean((Boolean) (fieldValue)); } - case ClassResolver.BYTE_CLASS_ID: - { - if (fieldValue == null) { - buffer.writeByte(Fory.NULL_FLAG); - } else { - buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); - buffer.writeByte((Byte) (fieldValue)); - } - return false; + return false; + case DispatchId.INT8: + case DispatchId.UINT8: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeByte((Byte) (fieldValue)); } - case ClassResolver.CHAR_CLASS_ID: - { - if (fieldValue == null) { - buffer.writeByte(Fory.NULL_FLAG); - } else { - buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); - buffer.writeChar((Character) (fieldValue)); - } - return false; + return false; + case DispatchId.CHAR: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeChar((Character) (fieldValue)); } - case ClassResolver.SHORT_CLASS_ID: - { - if (fieldValue == null) { - buffer.writeByte(Fory.NULL_FLAG); - } else { - buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); - buffer.writeInt16((Short) (fieldValue)); - } - return false; + return false; + case DispatchId.INT16: + case DispatchId.UINT16: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeInt16((Short) (fieldValue)); } - case ClassResolver.INTEGER_CLASS_ID: - { - if (fieldValue == null) { - buffer.writeByte(Fory.NULL_FLAG); - } else { - buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); - if (fory.compressInt()) { - buffer.writeVarInt32((Integer) (fieldValue)); - } else { - buffer.writeInt32((Integer) (fieldValue)); - } - } - return false; + return false; + case DispatchId.INT32: + case DispatchId.UINT32: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeInt32((Integer) (fieldValue)); } - case ClassResolver.FLOAT_CLASS_ID: - { - if (fieldValue == null) { - buffer.writeByte(Fory.NULL_FLAG); - } else { - buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); - buffer.writeFloat32((Float) (fieldValue)); - } - return false; + return false; + case DispatchId.VARINT32: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeVarInt32((Integer) (fieldValue)); } - case ClassResolver.LONG_CLASS_ID: - { - if (fieldValue == null) { - buffer.writeByte(Fory.NULL_FLAG); - } else { - buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); - fory.writeInt64(buffer, (Long) fieldValue); - } - return false; + return false; + case DispatchId.VAR_UINT32: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeVarUint32((Integer) (fieldValue)); } - case ClassResolver.DOUBLE_CLASS_ID: - { - if (fieldValue == null) { - buffer.writeByte(Fory.NULL_FLAG); - } else { - buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); - buffer.writeFloat64((Double) (fieldValue)); - } - return false; + return false; + case DispatchId.FLOAT32: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeFloat32((Float) (fieldValue)); + } + return false; + case DispatchId.INT64: + case DispatchId.UINT64: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeInt64((Long) fieldValue); + } + return false; + case DispatchId.VARINT64: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeVarInt64((Long) fieldValue); + } + return false; + case DispatchId.TAGGED_INT64: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeTaggedInt64((Long) fieldValue); + } + return false; + case DispatchId.VAR_UINT64: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeVarUint64((Long) fieldValue); + } + return false; + case DispatchId.TAGGED_UINT64: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeTaggedUint64((Long) fieldValue); + } + return false; + case DispatchId.FLOAT64: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeFloat64((Double) (fieldValue)); } + return false; default: return true; } @@ -460,7 +514,7 @@ static Object readFinalObjectFieldValue( SerializationBinding binding, RefResolver refResolver, TypeResolver typeResolver, - FieldGroups.SerializationFieldInfo fieldInfo, + SerializationFieldInfo fieldInfo, MemoryBuffer buffer) { Serializer serializer = fieldInfo.classInfo.getSerializer(); binding.incReadDepth(); @@ -528,9 +582,7 @@ static Object readFinalObjectFieldValue( * @return the deserialized field value, or null if the field is nullable and was null */ static Object readOtherFieldValue( - SerializationBinding binding, - FieldGroups.SerializationFieldInfo fieldInfo, - MemoryBuffer buffer) { + SerializationBinding binding, SerializationFieldInfo fieldInfo, MemoryBuffer buffer) { // Note: Enum has special handling for xlang compatibility - no type info for enum fields if (fieldInfo.genericType.getCls().isEnum()) { // Only read null flag when the field is nullable (for xlang compatibility) @@ -577,7 +629,7 @@ static Object readOtherFieldValue( static Object readContainerFieldValue( SerializationBinding binding, Generics generics, - FieldGroups.SerializationFieldInfo fieldInfo, + SerializationFieldInfo fieldInfo, MemoryBuffer buffer) { Object fieldValue; switch (fieldInfo.refMode) { @@ -617,42 +669,56 @@ static Object readContainerFieldValue( * @return true if classId is not a primitive type id. */ static boolean readPrimitiveFieldValue( - Fory fory, - MemoryBuffer buffer, - Object targetObject, - FieldAccessor fieldAccessor, - short classId) { + MemoryBuffer buffer, Object targetObject, FieldAccessor fieldAccessor, int dispatchId) { long fieldOffset = fieldAccessor.getFieldOffset(); if (fieldOffset != -1) { - return readPrimitiveFieldValue(fory, buffer, targetObject, fieldOffset, classId); + return readPrimitiveFieldValue(buffer, targetObject, fieldOffset, dispatchId); } - switch (classId) { - case ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID: + switch (dispatchId) { + case DispatchId.PRIMITIVE_BOOL: fieldAccessor.set(targetObject, buffer.readBoolean()); return false; - case ClassResolver.PRIMITIVE_BYTE_CLASS_ID: + case DispatchId.PRIMITIVE_INT8: + case DispatchId.PRIMITIVE_UINT8: fieldAccessor.set(targetObject, buffer.readByte()); return false; - case ClassResolver.PRIMITIVE_CHAR_CLASS_ID: + case DispatchId.PRIMITIVE_CHAR: fieldAccessor.set(targetObject, buffer.readChar()); return false; - case ClassResolver.PRIMITIVE_SHORT_CLASS_ID: + case DispatchId.PRIMITIVE_INT16: + case DispatchId.PRIMITIVE_UINT16: fieldAccessor.set(targetObject, buffer.readInt16()); return false; - case ClassResolver.PRIMITIVE_INT_CLASS_ID: - if (fory.compressInt()) { - fieldAccessor.set(targetObject, buffer.readVarInt32()); - } else { - fieldAccessor.set(targetObject, buffer.readInt32()); - } + case DispatchId.PRIMITIVE_INT32: + case DispatchId.PRIMITIVE_UINT32: + fieldAccessor.set(targetObject, buffer.readInt32()); + return false; + case DispatchId.PRIMITIVE_VARINT32: + fieldAccessor.set(targetObject, buffer.readVarInt32()); + return false; + case DispatchId.PRIMITIVE_VAR_UINT32: + fieldAccessor.set(targetObject, buffer.readVarUint32()); return false; - case ClassResolver.PRIMITIVE_FLOAT_CLASS_ID: + case DispatchId.PRIMITIVE_FLOAT32: fieldAccessor.set(targetObject, buffer.readFloat32()); return false; - case ClassResolver.PRIMITIVE_LONG_CLASS_ID: - fieldAccessor.set(targetObject, fory.readInt64(buffer)); + case DispatchId.PRIMITIVE_INT64: + case DispatchId.PRIMITIVE_UINT64: + fieldAccessor.set(targetObject, buffer.readInt64()); + return false; + case DispatchId.PRIMITIVE_VARINT64: + fieldAccessor.set(targetObject, buffer.readVarInt64()); + return false; + case DispatchId.PRIMITIVE_TAGGED_INT64: + fieldAccessor.set(targetObject, buffer.readTaggedInt64()); + return false; + case DispatchId.PRIMITIVE_VAR_UINT64: + fieldAccessor.set(targetObject, buffer.readVarUint64()); return false; - case ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID: + case DispatchId.PRIMITIVE_TAGGED_UINT64: + fieldAccessor.set(targetObject, buffer.readTaggedUint64()); + return false; + case DispatchId.PRIMITIVE_FLOAT64: fieldAccessor.set(targetObject, buffer.readFloat64()); return false; default: @@ -663,42 +729,59 @@ static boolean readPrimitiveFieldValue( /** * Read a primitive field value from buffer and set it using direct memory offset access. * - * @param fory the fory instance for compression settings * @param buffer the buffer to read from * @param targetObject the object to set the field value on * @param fieldOffset the memory offset of the field - * @param classId the class ID of the primitive type + * @param dispatchId the dispatch ID of the primitive type * @return true if classId is not a primitive type and needs further read handling */ private static boolean readPrimitiveFieldValue( - Fory fory, MemoryBuffer buffer, Object targetObject, long fieldOffset, short classId) { - switch (classId) { - case ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID: + MemoryBuffer buffer, Object targetObject, long fieldOffset, int dispatchId) { + switch (dispatchId) { + case DispatchId.PRIMITIVE_BOOL: Platform.putBoolean(targetObject, fieldOffset, buffer.readBoolean()); return false; - case ClassResolver.PRIMITIVE_BYTE_CLASS_ID: + case DispatchId.PRIMITIVE_INT8: + case DispatchId.PRIMITIVE_UINT8: Platform.putByte(targetObject, fieldOffset, buffer.readByte()); return false; - case ClassResolver.PRIMITIVE_CHAR_CLASS_ID: + case DispatchId.PRIMITIVE_CHAR: Platform.putChar(targetObject, fieldOffset, buffer.readChar()); return false; - case ClassResolver.PRIMITIVE_SHORT_CLASS_ID: + case DispatchId.PRIMITIVE_INT16: + case DispatchId.PRIMITIVE_UINT16: Platform.putShort(targetObject, fieldOffset, buffer.readInt16()); return false; - case ClassResolver.PRIMITIVE_INT_CLASS_ID: - if (fory.compressInt()) { - Platform.putInt(targetObject, fieldOffset, buffer.readVarInt32()); - } else { - Platform.putInt(targetObject, fieldOffset, buffer.readInt32()); - } + case DispatchId.PRIMITIVE_INT32: + case DispatchId.PRIMITIVE_UINT32: + Platform.putInt(targetObject, fieldOffset, buffer.readInt32()); + return false; + case DispatchId.PRIMITIVE_VARINT32: + Platform.putInt(targetObject, fieldOffset, buffer.readVarInt32()); + return false; + case DispatchId.PRIMITIVE_VAR_UINT32: + Platform.putInt(targetObject, fieldOffset, buffer.readVarUint32()); return false; - case ClassResolver.PRIMITIVE_FLOAT_CLASS_ID: + case DispatchId.PRIMITIVE_FLOAT32: Platform.putFloat(targetObject, fieldOffset, buffer.readFloat32()); return false; - case ClassResolver.PRIMITIVE_LONG_CLASS_ID: - Platform.putLong(targetObject, fieldOffset, fory.readInt64(buffer)); + case DispatchId.PRIMITIVE_INT64: + case DispatchId.PRIMITIVE_UINT64: + Platform.putLong(targetObject, fieldOffset, buffer.readInt64()); return false; - case ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID: + case DispatchId.PRIMITIVE_VARINT64: + Platform.putLong(targetObject, fieldOffset, buffer.readVarInt64()); + return false; + case DispatchId.PRIMITIVE_TAGGED_INT64: + Platform.putLong(targetObject, fieldOffset, buffer.readTaggedInt64()); + return false; + case DispatchId.PRIMITIVE_VAR_UINT64: + Platform.putLong(targetObject, fieldOffset, buffer.readVarUint64()); + return false; + case DispatchId.PRIMITIVE_TAGGED_UINT64: + Platform.putLong(targetObject, fieldOffset, buffer.readTaggedUint64()); + return false; + case DispatchId.PRIMITIVE_FLOAT64: Platform.putDouble(targetObject, fieldOffset, buffer.readFloat64()); return false; default: @@ -710,24 +793,19 @@ private static boolean readPrimitiveFieldValue( * Read a nullable primitive field value from buffer. Reads the null flag first and returns early * if null. * - * @param fory the fory instance for compression settings * @param buffer the buffer to read from * @param targetObject the object to set the field value on * @param fieldAccessor the accessor to set the field value - * @param classId the class ID of the primitive type - * @return true if classId is not a primitive type and needs further read handling; false if value - * was null or successfully read + * @param dispatchId the class ID of the primitive type + * @return true if dispatchId is not a primitive type and needs further read handling; false if + * value was null or successfully read */ static boolean readPrimitiveNullableFieldValue( - Fory fory, - MemoryBuffer buffer, - Object targetObject, - FieldAccessor fieldAccessor, - short classId) { + MemoryBuffer buffer, Object targetObject, FieldAccessor fieldAccessor, int dispatchId) { if (buffer.readByte() == Fory.NULL_FLAG) { return false; } - return readPrimitiveFieldValue(fory, buffer, targetObject, fieldAccessor, classId); + return readPrimitiveFieldValue(buffer, targetObject, fieldAccessor, dispatchId); } /** @@ -740,63 +818,65 @@ static boolean readBasicObjectFieldValue( MemoryBuffer buffer, Object targetObject, FieldAccessor fieldAccessor, - short classId) { + int dispatchId) { if (!fory.isBasicTypesRefIgnored()) { return true; // let common path handle this. } // add time types serialization here. - switch (classId) { - case ClassResolver.STRING_CLASS_ID: // fastpath for string. + switch (dispatchId) { + case DispatchId.STRING: // fastpath for string. if (fory.getStringSerializer().needToWriteRef()) { fieldAccessor.putObject(targetObject, fory.readJavaStringRef(buffer)); } else { fieldAccessor.putObject(targetObject, fory.readString(buffer)); } return false; - case ClassResolver.BOOLEAN_CLASS_ID: - { - fieldAccessor.putObject(targetObject, buffer.readBoolean()); - return false; - } - case ClassResolver.BYTE_CLASS_ID: - { - fieldAccessor.putObject(targetObject, buffer.readByte()); - return false; - } - case ClassResolver.CHAR_CLASS_ID: - { - fieldAccessor.putObject(targetObject, buffer.readChar()); - return false; - } - case ClassResolver.SHORT_CLASS_ID: - { - fieldAccessor.putObject(targetObject, buffer.readInt16()); - return false; - } - case ClassResolver.INTEGER_CLASS_ID: - { - if (fory.compressInt()) { - fieldAccessor.putObject(targetObject, buffer.readVarInt32()); - } else { - fieldAccessor.putObject(targetObject, buffer.readInt32()); - } - return false; - } - case ClassResolver.FLOAT_CLASS_ID: - { - fieldAccessor.putObject(targetObject, buffer.readFloat32()); - return false; - } - case ClassResolver.LONG_CLASS_ID: - { - fieldAccessor.putObject(targetObject, fory.readInt64(buffer)); - return false; - } - case ClassResolver.DOUBLE_CLASS_ID: - { - fieldAccessor.putObject(targetObject, buffer.readFloat64()); - return false; - } + case DispatchId.BOOL: + fieldAccessor.putObject(targetObject, buffer.readBoolean()); + return false; + case DispatchId.INT8: + case DispatchId.UINT8: + fieldAccessor.putObject(targetObject, buffer.readByte()); + return false; + case DispatchId.CHAR: + fieldAccessor.putObject(targetObject, buffer.readChar()); + return false; + case DispatchId.INT16: + case DispatchId.UINT16: + fieldAccessor.putObject(targetObject, buffer.readInt16()); + return false; + case DispatchId.INT32: + case DispatchId.UINT32: + fieldAccessor.putObject(targetObject, buffer.readInt32()); + return false; + case DispatchId.VARINT32: + fieldAccessor.putObject(targetObject, buffer.readVarInt32()); + return false; + case DispatchId.VAR_UINT32: + fieldAccessor.putObject(targetObject, buffer.readVarUint32()); + return false; + case DispatchId.INT64: + case DispatchId.UINT64: + fieldAccessor.putObject(targetObject, buffer.readInt64()); + return false; + case DispatchId.VARINT64: + fieldAccessor.putObject(targetObject, buffer.readVarInt64()); + return false; + case DispatchId.TAGGED_INT64: + fieldAccessor.putObject(targetObject, buffer.readTaggedInt64()); + return false; + case DispatchId.VAR_UINT64: + fieldAccessor.putObject(targetObject, buffer.readVarUint64()); + return false; + case DispatchId.TAGGED_UINT64: + fieldAccessor.putObject(targetObject, buffer.readTaggedUint64()); + return false; + case DispatchId.FLOAT32: + fieldAccessor.putObject(targetObject, buffer.readFloat32()); + return false; + case DispatchId.FLOAT64: + fieldAccessor.putObject(targetObject, buffer.readFloat64()); + return false; default: return true; } @@ -810,8 +890,8 @@ static boolean readBasicObjectFieldValue( * @param buffer the buffer to read from * @param targetObject the object to set the field value on * @param fieldAccessor the accessor to set the field value - * @param classId the class ID of the boxed type - * @return true if classId is not a basic type or ref tracking is enabled, needing further read + * @param dispatchId the class ID of the boxed type + * @return true if dispatchId is not a basic type or ref tracking is enabled, needing further read * handling */ static boolean readBasicNullableObjectFieldValue( @@ -819,91 +899,117 @@ static boolean readBasicNullableObjectFieldValue( MemoryBuffer buffer, Object targetObject, FieldAccessor fieldAccessor, - short classId) { + int dispatchId) { if (!fory.isBasicTypesRefIgnored()) { return true; // let common path handle this. } // add time types serialization here. - switch (classId) { - case ClassResolver.STRING_CLASS_ID: // fastpath for string. + switch (dispatchId) { + case DispatchId.STRING: // fastpath for string. fieldAccessor.putObject(targetObject, fory.readJavaStringRef(buffer)); return false; - case ClassResolver.BOOLEAN_CLASS_ID: - { - if (buffer.readByte() == Fory.NULL_FLAG) { - fieldAccessor.putObject(targetObject, null); - } else { - fieldAccessor.putObject(targetObject, buffer.readBoolean()); - } - return false; + case DispatchId.BOOL: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readBoolean()); } - case ClassResolver.BYTE_CLASS_ID: - { - if (buffer.readByte() == Fory.NULL_FLAG) { - fieldAccessor.putObject(targetObject, null); - } else { - fieldAccessor.putObject(targetObject, buffer.readByte()); - } - return false; + return false; + case DispatchId.INT8: + case DispatchId.UINT8: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readByte()); } - case ClassResolver.CHAR_CLASS_ID: - { - if (buffer.readByte() == Fory.NULL_FLAG) { - fieldAccessor.putObject(targetObject, null); - } else { - fieldAccessor.putObject(targetObject, buffer.readChar()); - } - return false; + return false; + case DispatchId.CHAR: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readChar()); } - case ClassResolver.SHORT_CLASS_ID: - { - if (buffer.readByte() == Fory.NULL_FLAG) { - fieldAccessor.putObject(targetObject, null); - } else { - fieldAccessor.putObject(targetObject, buffer.readInt16()); - } - return false; + return false; + case DispatchId.INT16: + case DispatchId.UINT16: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readInt16()); } - case ClassResolver.INTEGER_CLASS_ID: - { - if (buffer.readByte() == Fory.NULL_FLAG) { - fieldAccessor.putObject(targetObject, null); - } else { - if (fory.compressInt()) { - fieldAccessor.putObject(targetObject, buffer.readVarInt32()); - } else { - fieldAccessor.putObject(targetObject, buffer.readInt32()); - } - } - return false; + return false; + case DispatchId.INT32: + case DispatchId.UINT32: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readInt32()); } - case ClassResolver.FLOAT_CLASS_ID: - { - if (buffer.readByte() == Fory.NULL_FLAG) { - fieldAccessor.putObject(targetObject, null); - } else { - fieldAccessor.putObject(targetObject, buffer.readFloat32()); - } - return false; + return false; + case DispatchId.VARINT32: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readVarInt32()); } - case ClassResolver.LONG_CLASS_ID: - { - if (buffer.readByte() == Fory.NULL_FLAG) { - fieldAccessor.putObject(targetObject, null); - } else { - fieldAccessor.putObject(targetObject, fory.readInt64(buffer)); - } - return false; + return false; + case DispatchId.VAR_UINT32: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readVarUint32()); } - case ClassResolver.DOUBLE_CLASS_ID: - { - if (buffer.readByte() == Fory.NULL_FLAG) { - fieldAccessor.putObject(targetObject, null); - } else { - fieldAccessor.putObject(targetObject, buffer.readFloat64()); - } - return false; + return false; + case DispatchId.INT64: + case DispatchId.UINT64: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readInt64()); + } + return false; + case DispatchId.VARINT64: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readVarInt64()); + } + return false; + case DispatchId.TAGGED_INT64: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readTaggedInt64()); + } + return false; + case DispatchId.VAR_UINT64: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readVarUint64()); + } + return false; + case DispatchId.TAGGED_UINT64: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readTaggedUint64()); + } + return false; + case DispatchId.FLOAT32: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readFloat32()); + } + return false; + case DispatchId.FLOAT64: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readFloat64()); } + return false; default: return true; } @@ -939,120 +1045,95 @@ private T copyRecord(T originObj) { } private Object[] copyFields(T originObj) { - FieldGroups.SerializationFieldInfo[] fieldInfos = this.fieldInfos; + SerializationFieldInfo[] fieldInfos = this.fieldInfos; if (fieldInfos == null) { fieldInfos = buildFieldsInfo(); } Object[] fieldValues = new Object[fieldInfos.length]; for (int i = 0; i < fieldInfos.length; i++) { - FieldGroups.SerializationFieldInfo fieldInfo = fieldInfos[i]; + SerializationFieldInfo fieldInfo = fieldInfos[i]; FieldAccessor fieldAccessor = fieldInfo.fieldAccessor; long fieldOffset = fieldAccessor.getFieldOffset(); if (fieldOffset != -1) { - fieldValues[i] = copyField(originObj, fieldOffset, fieldInfo.classId); + fieldValues[i] = copyField(originObj, fieldOffset, fieldInfo.dispatchId); } else { // field in record class has offset -1 Object fieldValue = fieldAccessor.get(originObj); - fieldValues[i] = fory.copyObject(fieldValue, fieldInfo.classId); + fieldValues[i] = fory.copyObject(fieldValue, fieldInfo.dispatchId); } } return RecordUtils.remapping(copyRecordInfo, fieldValues); } private void copyFields(T originObj, T newObj) { - FieldGroups.SerializationFieldInfo[] fieldInfos = this.fieldInfos; + SerializationFieldInfo[] fieldInfos = this.fieldInfos; if (fieldInfos == null) { fieldInfos = buildFieldsInfo(); } - for (FieldGroups.SerializationFieldInfo fieldInfo : fieldInfos) { - FieldAccessor fieldAccessor = fieldInfo.fieldAccessor; - long fieldOffset = fieldAccessor.getFieldOffset(); - // record class won't go to this path; - assert fieldOffset != -1; - switch (fieldInfo.classId) { - case ClassResolver.PRIMITIVE_BYTE_CLASS_ID: - Platform.putByte(newObj, fieldOffset, Platform.getByte(originObj, fieldOffset)); - break; - case ClassResolver.PRIMITIVE_CHAR_CLASS_ID: - Platform.putChar(newObj, fieldOffset, Platform.getChar(originObj, fieldOffset)); - break; - case ClassResolver.PRIMITIVE_SHORT_CLASS_ID: - Platform.putShort(newObj, fieldOffset, Platform.getShort(originObj, fieldOffset)); - break; - case ClassResolver.PRIMITIVE_INT_CLASS_ID: - Platform.putInt(newObj, fieldOffset, Platform.getInt(originObj, fieldOffset)); - break; - case ClassResolver.PRIMITIVE_LONG_CLASS_ID: - Platform.putLong(newObj, fieldOffset, Platform.getLong(originObj, fieldOffset)); - break; - case ClassResolver.PRIMITIVE_FLOAT_CLASS_ID: - Platform.putFloat(newObj, fieldOffset, Platform.getFloat(originObj, fieldOffset)); - break; - case ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID: - Platform.putDouble(newObj, fieldOffset, Platform.getDouble(originObj, fieldOffset)); - break; - case ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID: - Platform.putBoolean(newObj, fieldOffset, Platform.getBoolean(originObj, fieldOffset)); - break; - case ClassResolver.BOOLEAN_CLASS_ID: - case ClassResolver.BYTE_CLASS_ID: - case ClassResolver.CHAR_CLASS_ID: - case ClassResolver.SHORT_CLASS_ID: - case ClassResolver.INTEGER_CLASS_ID: - case ClassResolver.FLOAT_CLASS_ID: - case ClassResolver.LONG_CLASS_ID: - case ClassResolver.DOUBLE_CLASS_ID: - case ClassResolver.STRING_CLASS_ID: - Platform.putObject(newObj, fieldOffset, Platform.getObject(originObj, fieldOffset)); - break; - default: - Platform.putObject( - newObj, fieldOffset, fory.copyObject(Platform.getObject(originObj, fieldOffset))); - } - } + copyFields(fory, fieldInfos, originObj, newObj); } public static void copyFields( - Fory fory, FieldGroups.SerializationFieldInfo[] fieldInfos, Object originObj, Object newObj) { - for (FieldGroups.SerializationFieldInfo fieldInfo : fieldInfos) { + Fory fory, SerializationFieldInfo[] fieldInfos, Object originObj, Object newObj) { + for (SerializationFieldInfo fieldInfo : fieldInfos) { FieldAccessor fieldAccessor = fieldInfo.fieldAccessor; long fieldOffset = fieldAccessor.getFieldOffset(); // record class won't go to this path; assert fieldOffset != -1; - switch (fieldInfo.classId) { - case ClassResolver.PRIMITIVE_BYTE_CLASS_ID: + switch (fieldInfo.dispatchId) { + case DispatchId.PRIMITIVE_BOOL: + Platform.putBoolean(newObj, fieldOffset, Platform.getBoolean(originObj, fieldOffset)); + break; + case DispatchId.PRIMITIVE_INT8: + case DispatchId.PRIMITIVE_UINT8: Platform.putByte(newObj, fieldOffset, Platform.getByte(originObj, fieldOffset)); break; - case ClassResolver.PRIMITIVE_CHAR_CLASS_ID: + case DispatchId.PRIMITIVE_CHAR: Platform.putChar(newObj, fieldOffset, Platform.getChar(originObj, fieldOffset)); break; - case ClassResolver.PRIMITIVE_SHORT_CLASS_ID: + case DispatchId.PRIMITIVE_INT16: + case DispatchId.PRIMITIVE_UINT16: Platform.putShort(newObj, fieldOffset, Platform.getShort(originObj, fieldOffset)); break; - case ClassResolver.PRIMITIVE_INT_CLASS_ID: + case DispatchId.PRIMITIVE_INT32: + case DispatchId.PRIMITIVE_VARINT32: + case DispatchId.PRIMITIVE_UINT32: + case DispatchId.PRIMITIVE_VAR_UINT32: Platform.putInt(newObj, fieldOffset, Platform.getInt(originObj, fieldOffset)); break; - case ClassResolver.PRIMITIVE_LONG_CLASS_ID: + case DispatchId.PRIMITIVE_INT64: + case DispatchId.PRIMITIVE_VARINT64: + case DispatchId.PRIMITIVE_TAGGED_INT64: + case DispatchId.PRIMITIVE_UINT64: + case DispatchId.PRIMITIVE_VAR_UINT64: + case DispatchId.PRIMITIVE_TAGGED_UINT64: Platform.putLong(newObj, fieldOffset, Platform.getLong(originObj, fieldOffset)); break; - case ClassResolver.PRIMITIVE_FLOAT_CLASS_ID: + case DispatchId.PRIMITIVE_FLOAT32: Platform.putFloat(newObj, fieldOffset, Platform.getFloat(originObj, fieldOffset)); break; - case ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID: + case DispatchId.PRIMITIVE_FLOAT64: Platform.putDouble(newObj, fieldOffset, Platform.getDouble(originObj, fieldOffset)); break; - case ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID: - Platform.putBoolean(newObj, fieldOffset, Platform.getBoolean(originObj, fieldOffset)); - break; - case ClassResolver.BOOLEAN_CLASS_ID: - case ClassResolver.BYTE_CLASS_ID: - case ClassResolver.CHAR_CLASS_ID: - case ClassResolver.SHORT_CLASS_ID: - case ClassResolver.INTEGER_CLASS_ID: - case ClassResolver.FLOAT_CLASS_ID: - case ClassResolver.LONG_CLASS_ID: - case ClassResolver.DOUBLE_CLASS_ID: - case ClassResolver.STRING_CLASS_ID: + case DispatchId.BOOL: + case DispatchId.INT8: + case DispatchId.UINT8: + case DispatchId.CHAR: + case DispatchId.INT16: + case DispatchId.UINT16: + case DispatchId.INT32: + case DispatchId.VARINT32: + case DispatchId.UINT32: + case DispatchId.VAR_UINT32: + case DispatchId.INT64: + case DispatchId.VARINT64: + case DispatchId.TAGGED_INT64: + case DispatchId.UINT64: + case DispatchId.VAR_UINT64: + case DispatchId.TAGGED_UINT64: + case DispatchId.FLOAT32: + case DispatchId.FLOAT64: + case DispatchId.STRING: Platform.putObject(newObj, fieldOffset, Platform.getObject(originObj, fieldOffset)); break; default: @@ -1062,40 +1143,60 @@ public static void copyFields( } } - private Object copyField(Object targetObject, long fieldOffset, short classId) { - switch (classId) { - case ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID: + private Object copyField(Object targetObject, long fieldOffset, int typeId) { + switch (typeId) { + case DispatchId.PRIMITIVE_BOOL: return Platform.getBoolean(targetObject, fieldOffset); - case ClassResolver.PRIMITIVE_BYTE_CLASS_ID: + case DispatchId.PRIMITIVE_INT8: + case DispatchId.PRIMITIVE_UINT8: return Platform.getByte(targetObject, fieldOffset); - case ClassResolver.PRIMITIVE_CHAR_CLASS_ID: + case DispatchId.PRIMITIVE_CHAR: return Platform.getChar(targetObject, fieldOffset); - case ClassResolver.PRIMITIVE_SHORT_CLASS_ID: + case DispatchId.PRIMITIVE_INT16: + case DispatchId.PRIMITIVE_UINT16: return Platform.getShort(targetObject, fieldOffset); - case ClassResolver.PRIMITIVE_INT_CLASS_ID: + case DispatchId.PRIMITIVE_INT32: + case DispatchId.PRIMITIVE_VARINT32: + case DispatchId.PRIMITIVE_UINT32: + case DispatchId.PRIMITIVE_VAR_UINT32: return Platform.getInt(targetObject, fieldOffset); - case ClassResolver.PRIMITIVE_FLOAT_CLASS_ID: + case DispatchId.PRIMITIVE_FLOAT32: return Platform.getFloat(targetObject, fieldOffset); - case ClassResolver.PRIMITIVE_LONG_CLASS_ID: + case DispatchId.PRIMITIVE_INT64: + case DispatchId.PRIMITIVE_VARINT64: + case DispatchId.PRIMITIVE_TAGGED_INT64: + case DispatchId.PRIMITIVE_UINT64: + case DispatchId.PRIMITIVE_VAR_UINT64: + case DispatchId.PRIMITIVE_TAGGED_UINT64: return Platform.getLong(targetObject, fieldOffset); - case ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID: + case DispatchId.PRIMITIVE_FLOAT64: return Platform.getDouble(targetObject, fieldOffset); - case ClassResolver.BOOLEAN_CLASS_ID: - case ClassResolver.BYTE_CLASS_ID: - case ClassResolver.CHAR_CLASS_ID: - case ClassResolver.SHORT_CLASS_ID: - case ClassResolver.INTEGER_CLASS_ID: - case ClassResolver.FLOAT_CLASS_ID: - case ClassResolver.LONG_CLASS_ID: - case ClassResolver.DOUBLE_CLASS_ID: - case ClassResolver.STRING_CLASS_ID: + case DispatchId.BOOL: + case DispatchId.INT8: + case DispatchId.UINT8: + case DispatchId.CHAR: + case DispatchId.INT16: + case DispatchId.UINT16: + case DispatchId.INT32: + case DispatchId.VARINT32: + case DispatchId.UINT32: + case DispatchId.VAR_UINT32: + case DispatchId.FLOAT32: + case DispatchId.INT64: + case DispatchId.VARINT64: + case DispatchId.TAGGED_INT64: + case DispatchId.UINT64: + case DispatchId.VAR_UINT64: + case DispatchId.TAGGED_UINT64: + case DispatchId.FLOAT64: + case DispatchId.STRING: return Platform.getObject(targetObject, fieldOffset); default: return fory.copyObject(Platform.getObject(targetObject, fieldOffset)); } } - private FieldGroups.SerializationFieldInfo[] buildFieldsInfo() { + private SerializationFieldInfo[] buildFieldsInfo() { List descriptors = new ArrayList<>(); if (RecordUtils.isRecord(type)) { RecordComponent[] components = RecordUtils.getRecordComponents(type); diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/FieldGroups.java b/java/fory-core/src/main/java/org/apache/fory/serializer/FieldGroups.java index dbcc3362d6..9eb8cc74a9 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/FieldGroups.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/FieldGroups.java @@ -30,15 +30,13 @@ import org.apache.fory.reflect.TypeRef; import org.apache.fory.resolver.ClassInfo; import org.apache.fory.resolver.ClassInfoHolder; -import org.apache.fory.resolver.ClassResolver; import org.apache.fory.resolver.RefMode; import org.apache.fory.resolver.TypeResolver; import org.apache.fory.serializer.converter.FieldConverter; import org.apache.fory.type.Descriptor; import org.apache.fory.type.DescriptorGrouper; -import org.apache.fory.type.FinalObjectTypeStub; +import org.apache.fory.type.DispatchId; import org.apache.fory.type.GenericType; -import org.apache.fory.type.TypeUtils; import org.apache.fory.util.StringUtils; public class FieldGroups { @@ -118,20 +116,10 @@ public static FieldGroups buildFieldInfos(Fory fory, DescriptorGrouper grouper) return new FieldGroups(allBuildIn, containerFields, otherFields); } - static short getRegisteredClassId(Fory fory, Descriptor d) { - Field field = d.getField(); - Class cls = d.getTypeRef().getRawType(); - if (TypeUtils.unwrap(cls).isPrimitive() && field != null) { - return fory.getClassResolver().getRegisteredClassId(field.getType()); - } - Short classId = fory.getClassResolver().getRegisteredClassId(cls); - return classId == null ? ClassResolver.NO_CLASS_ID : classId; - } - public static final class SerializationFieldInfo { public final Descriptor descriptor; public final TypeRef typeRef; - public final short classId; + public final int dispatchId; public final ClassInfo classInfo; public final Serializer serializer; public final String qualifiedFieldName; @@ -152,26 +140,19 @@ public static final class SerializationFieldInfo { SerializationFieldInfo(Fory fory, Descriptor d) { this.descriptor = d; this.typeRef = d.getTypeRef(); - this.classId = getRegisteredClassId(fory, d); + this.dispatchId = DispatchId.getDispatchId(fory, d); TypeResolver resolver = fory._getTypeResolver(); // invoke `copy` to avoid ObjectSerializer construct clear serializer by `clearSerializer`. - if (typeRef.getRawType() == FinalObjectTypeStub.class) { - // `FinalObjectTypeStub` has no fields, using its `classInfo` - // will make deserialization failed. - classInfo = null; - } else { - if (resolver.isMonomorphic(descriptor)) { - classInfo = SerializationUtils.getClassInfo(fory, typeRef.getRawType()); - if (!fory.isShareMeta() - && !fory.isCompatible() - && classInfo.getSerializer() instanceof ReplaceResolveSerializer) { - // overwrite replace resolve serializer for final field - classInfo.setSerializer( - new FinalFieldReplaceResolveSerializer(fory, classInfo.getCls())); - } - } else { - classInfo = null; + if (resolver.isMonomorphic(descriptor)) { + classInfo = SerializationUtils.getClassInfo(fory, typeRef.getRawType()); + if (!fory.isShareMeta() + && !fory.isCompatible() + && classInfo.getSerializer() instanceof ReplaceResolveSerializer) { + // overwrite replace resolve serializer for final field + classInfo.setSerializer(new FinalFieldReplaceResolveSerializer(fory, classInfo.getCls())); } + } else { + classInfo = null; } useDeclaredTypeInfo = classInfo != null && resolver.isMonomorphic(descriptor); if (classInfo != null) { @@ -226,7 +207,7 @@ public String toString() { + ", typeRef=" + typeRef + ", classId=" - + classId + + dispatchId + ", fieldAccessor=" + fieldAccessor + ", nullable=" diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/MetaSharedLayerSerializer.java b/java/fory-core/src/main/java/org/apache/fory/serializer/MetaSharedLayerSerializer.java index af0da89a9d..8f01b6d694 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/MetaSharedLayerSerializer.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/MetaSharedLayerSerializer.java @@ -36,6 +36,7 @@ import org.apache.fory.serializer.FieldGroups.SerializationFieldInfo; import org.apache.fory.type.Descriptor; import org.apache.fory.type.DescriptorGrouper; +import org.apache.fory.type.DispatchId; import org.apache.fory.type.Generics; /** @@ -125,16 +126,16 @@ private void writeFinalFields(MemoryBuffer buffer, T value) { for (SerializationFieldInfo fieldInfo : buildInFields) { FieldAccessor fieldAccessor = fieldInfo.fieldAccessor; boolean nullable = fieldInfo.nullable; - short classId = fieldInfo.classId; + int dispatchId = fieldInfo.dispatchId; if (AbstractObjectSerializer.writePrimitiveFieldValue( - fory, buffer, value, fieldAccessor, classId)) { + buffer, value, fieldAccessor, dispatchId)) { Object fieldValue = fieldAccessor.getObject(value); boolean writeBasicObjectResult = nullable ? AbstractObjectSerializer.writeBasicNullableObjectFieldValue( - fory, buffer, fieldValue, classId) + fory, buffer, fieldValue, dispatchId) : AbstractObjectSerializer.writeBasicObjectFieldValue( - fory, buffer, fieldValue, classId); + fory, buffer, fieldValue, dispatchId); if (writeBasicObjectResult) { Serializer serializer = fieldInfo.classInfo.getSerializer(); if (!metaShareEnabled || fieldInfo.useDeclaredTypeInfo) { @@ -232,14 +233,13 @@ private void readFinalFields(MemoryBuffer buffer, T obj) { FieldAccessor fieldAccessor = fieldInfo.fieldAccessor; if (fieldAccessor != null) { boolean nullable = fieldInfo.nullable; - short classId = fieldInfo.classId; - if (AbstractObjectSerializer.readPrimitiveFieldValue( - fory, buffer, obj, fieldAccessor, classId) + int dispatchId = fieldInfo.dispatchId; + if (AbstractObjectSerializer.readPrimitiveFieldValue(buffer, obj, fieldAccessor, dispatchId) && (nullable ? AbstractObjectSerializer.readBasicNullableObjectFieldValue( - fory, buffer, obj, fieldAccessor, classId) + fory, buffer, obj, fieldAccessor, dispatchId) : AbstractObjectSerializer.readBasicObjectFieldValue( - fory, buffer, obj, fieldAccessor, classId))) { + fory, buffer, obj, fieldAccessor, dispatchId))) { Object fieldValue = AbstractObjectSerializer.readFinalObjectFieldValue( binding, refResolver, classResolver, fieldInfo, buffer); @@ -247,7 +247,8 @@ private void readFinalFields(MemoryBuffer buffer, T obj) { } } else { // Field doesn't exist in current class - skip the value - if (MetaSharedSerializer.skipPrimitiveFieldValueFailed(fory, fieldInfo.classId, buffer)) { + if (MetaSharedSerializer.skipPrimitiveFieldValueFailed( + fory, fieldInfo.dispatchId, buffer)) { if (fieldInfo.classInfo == null) { fory.readRef(buffer, classInfoHolder); } else { @@ -340,37 +341,59 @@ public void writeFieldsValues(MemoryBuffer buffer, Object[] vals) { private void writeFieldValueFromArray( MemoryBuffer buffer, SerializationFieldInfo fieldInfo, Object fieldValue) { - short classId = fieldInfo.classId; + int dispatchId = fieldInfo.dispatchId; boolean nullable = fieldInfo.nullable; // Handle primitives first - switch (classId) { - case ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID: + switch (dispatchId) { + case DispatchId.PRIMITIVE_BOOL: buffer.writeBoolean((Boolean) fieldValue); return; - case ClassResolver.PRIMITIVE_BYTE_CLASS_ID: + case DispatchId.PRIMITIVE_INT8: + case DispatchId.PRIMITIVE_UINT8: buffer.writeByte((Byte) fieldValue); return; - case ClassResolver.PRIMITIVE_CHAR_CLASS_ID: + case DispatchId.PRIMITIVE_CHAR: buffer.writeChar((Character) fieldValue); return; - case ClassResolver.PRIMITIVE_SHORT_CLASS_ID: + case DispatchId.PRIMITIVE_INT16: + case DispatchId.PRIMITIVE_UINT16: buffer.writeInt16((Short) fieldValue); return; - case ClassResolver.PRIMITIVE_INT_CLASS_ID: - if (fory.compressInt()) { - buffer.writeVarInt32((Integer) fieldValue); - } else { - buffer.writeInt32((Integer) fieldValue); - } + case DispatchId.PRIMITIVE_INT32: + buffer.writeInt32((Integer) fieldValue); return; - case ClassResolver.PRIMITIVE_FLOAT_CLASS_ID: - buffer.writeFloat32((Float) fieldValue); + case DispatchId.PRIMITIVE_VARINT32: + buffer.writeVarInt32((Integer) fieldValue); + return; + case DispatchId.PRIMITIVE_UINT32: + buffer.writeInt32((Integer) fieldValue); + return; + case DispatchId.PRIMITIVE_VAR_UINT32: + buffer.writeVarUint32((Integer) fieldValue); + return; + case DispatchId.PRIMITIVE_INT64: + buffer.writeInt64((Long) fieldValue); + return; + case DispatchId.PRIMITIVE_VARINT64: + buffer.writeVarInt64((Long) fieldValue); + return; + case DispatchId.PRIMITIVE_TAGGED_INT64: + buffer.writeTaggedInt64((Long) fieldValue); return; - case ClassResolver.PRIMITIVE_LONG_CLASS_ID: - fory.writeInt64(buffer, (Long) fieldValue); + case DispatchId.PRIMITIVE_UINT64: + buffer.writeInt64((Long) fieldValue); return; - case ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID: + case DispatchId.PRIMITIVE_VAR_UINT64: + buffer.writeVarUint64((Long) fieldValue); + return; + case DispatchId.PRIMITIVE_TAGGED_UINT64: + buffer.writeTaggedUint64((Long) fieldValue); + return; + case DispatchId.PRIMITIVE_FLOAT32: + buffer.writeFloat32((Float) fieldValue); + return; + case DispatchId.PRIMITIVE_FLOAT64: buffer.writeFloat64((Double) fieldValue); return; default: @@ -428,14 +451,11 @@ public void readFields(MemoryBuffer buffer, Object[] vals) { } private Object readFieldValueToArray(MemoryBuffer buffer, SerializationFieldInfo fieldInfo) { - short classId = fieldInfo.classId; - + int dispatchId = fieldInfo.dispatchId; // Handle primitives - if (classId >= ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID - && classId <= ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID) { - return Serializers.readPrimitiveValue(fory, buffer, classId); + if (DispatchId.isPrimitive(dispatchId)) { + return Serializers.readPrimitiveValue(fory, buffer, dispatchId); } - // Handle objects return AbstractObjectSerializer.readFinalObjectFieldValue( binding, refResolver, classResolver, fieldInfo, buffer); diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/MetaSharedSerializer.java b/java/fory-core/src/main/java/org/apache/fory/serializer/MetaSharedSerializer.java index 60c4044572..44b34889aa 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/MetaSharedSerializer.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/MetaSharedSerializer.java @@ -40,6 +40,7 @@ import org.apache.fory.serializer.FieldGroups.SerializationFieldInfo; import org.apache.fory.type.Descriptor; import org.apache.fory.type.DescriptorGrouper; +import org.apache.fory.type.DispatchId; import org.apache.fory.type.Generics; import org.apache.fory.util.DefaultValueUtils; import org.apache.fory.util.GraalvmSupport; @@ -184,21 +185,21 @@ public T read(MemoryBuffer buffer) { FieldAccessor fieldAccessor = fieldInfo.fieldAccessor; boolean nullable = fieldInfo.nullable; if (fieldAccessor != null) { - short classId = fieldInfo.classId; + int dispatchId = fieldInfo.dispatchId; boolean needRead = true; if (fieldInfo.isPrimitive) { if (nullable) { - needRead = readPrimitiveNullableFieldValue(fory, buffer, obj, fieldAccessor, classId); + needRead = readPrimitiveNullableFieldValue(buffer, obj, fieldAccessor, dispatchId); } else { - needRead = readPrimitiveFieldValue(fory, buffer, obj, fieldAccessor, classId); + needRead = readPrimitiveFieldValue(buffer, obj, fieldAccessor, dispatchId); } } if (needRead && (nullable ? AbstractObjectSerializer.readBasicNullableObjectFieldValue( - fory, buffer, obj, fieldAccessor, classId) + fory, buffer, obj, fieldAccessor, dispatchId) : AbstractObjectSerializer.readBasicObjectFieldValue( - fory, buffer, obj, fieldAccessor, classId))) { + fory, buffer, obj, fieldAccessor, dispatchId))) { assert fieldInfo.classInfo != null; Object fieldValue = AbstractObjectSerializer.readFinalObjectFieldValue( @@ -208,7 +209,7 @@ public T read(MemoryBuffer buffer) { } else { if (fieldInfo.fieldConverter == null) { // Skip the field value from buffer since it doesn't exist in current class - if (skipPrimitiveFieldValueFailed(fory, fieldInfo.classId, buffer)) { + if (skipPrimitiveFieldValueFailed(fory, fieldInfo.dispatchId, buffer)) { if (fieldInfo.classInfo == null) { // TODO(chaokunyang) support registered serializer in peer with ref tracking disabled. binding.readRef(buffer, classInfoHolder); @@ -243,10 +244,9 @@ public T read(MemoryBuffer buffer) { private void compatibleRead(MemoryBuffer buffer, SerializationFieldInfo fieldInfo, Object obj) { Object fieldValue; - short classId = fieldInfo.classId; - if (classId >= ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID - && classId <= ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID) { - fieldValue = Serializers.readPrimitiveValue(fory, buffer, classId); + int dispatchId = fieldInfo.dispatchId; + if (DispatchId.isPrimitive(dispatchId)) { + fieldValue = Serializers.readPrimitiveValue(fory, buffer, dispatchId); } else { fieldValue = AbstractObjectSerializer.readFinalObjectFieldValue( @@ -280,11 +280,10 @@ private void readFields(MemoryBuffer buffer, Object[] fields) { for (SerializationFieldInfo fieldInfo : this.buildInFields) { if (fieldInfo.fieldAccessor != null) { assert fieldInfo.classInfo != null; - short classId = fieldInfo.classId; + int dispatchId = fieldInfo.dispatchId; // primitive field won't write null flag. - if (classId >= ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID - && classId <= ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID) { - fields[counter++] = Serializers.readPrimitiveValue(fory, buffer, classId); + if (DispatchId.isPrimitive(dispatchId)) { + fields[counter++] = Serializers.readPrimitiveValue(fory, buffer, dispatchId); } else { Object fieldValue = AbstractObjectSerializer.readFinalObjectFieldValue( @@ -293,7 +292,7 @@ private void readFields(MemoryBuffer buffer, Object[] fields) { } } else { // Skip the field value from buffer since it doesn't exist in current class - if (skipPrimitiveFieldValueFailed(fory, fieldInfo.classId, buffer)) { + if (skipPrimitiveFieldValueFailed(fory, fieldInfo.dispatchId, buffer)) { if (fieldInfo.classInfo == null) { // TODO(chaokunyang) support registered serializer in peer with ref tracking disabled. fory.readRef(buffer, classInfoHolder); @@ -319,40 +318,60 @@ private void readFields(MemoryBuffer buffer, Object[] fields) { } /** Skip primitive primitive field value since it doesn't write null flag. */ - static boolean skipPrimitiveFieldValueFailed(Fory fory, short classId, MemoryBuffer buffer) { - switch (classId) { - case ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID: + static boolean skipPrimitiveFieldValueFailed(Fory fory, int dispatchId, MemoryBuffer buffer) { + switch (dispatchId) { + case DispatchId.PRIMITIVE_BOOL: buffer.increaseReaderIndex(1); return false; - case ClassResolver.PRIMITIVE_BYTE_CLASS_ID: + case DispatchId.PRIMITIVE_INT8: + case DispatchId.PRIMITIVE_UINT8: buffer.increaseReaderIndex(1); return false; - case ClassResolver.PRIMITIVE_CHAR_CLASS_ID: + case DispatchId.PRIMITIVE_CHAR: buffer.increaseReaderIndex(2); return false; - case ClassResolver.PRIMITIVE_SHORT_CLASS_ID: + case DispatchId.PRIMITIVE_INT16: + case DispatchId.PRIMITIVE_UINT16: buffer.increaseReaderIndex(2); return false; - case ClassResolver.PRIMITIVE_INT_CLASS_ID: - if (fory.compressInt()) { - buffer.readVarInt32(); - } else { - buffer.increaseReaderIndex(4); - } + case DispatchId.PRIMITIVE_INT32: + buffer.increaseReaderIndex(4); + return false; + case DispatchId.PRIMITIVE_VARINT32: + buffer.readVarInt32(); return false; - case ClassResolver.PRIMITIVE_FLOAT_CLASS_ID: + case DispatchId.PRIMITIVE_UINT32: buffer.increaseReaderIndex(4); return false; - case ClassResolver.PRIMITIVE_LONG_CLASS_ID: - fory.readInt64(buffer); + case DispatchId.PRIMITIVE_VAR_UINT32: + buffer.readVarUint32(); + return false; + case DispatchId.PRIMITIVE_INT64: + buffer.increaseReaderIndex(8); + return false; + case DispatchId.PRIMITIVE_VARINT64: + buffer.readVarInt64(); + return false; + case DispatchId.PRIMITIVE_TAGGED_INT64: + buffer.readTaggedInt64(); return false; - case ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID: + case DispatchId.PRIMITIVE_UINT64: + buffer.increaseReaderIndex(8); + return false; + case DispatchId.PRIMITIVE_VAR_UINT64: + buffer.readVarUint64(); + return false; + case DispatchId.PRIMITIVE_TAGGED_UINT64: + buffer.readTaggedUint64(); + return false; + case DispatchId.PRIMITIVE_FLOAT32: + buffer.increaseReaderIndex(4); + return false; + case DispatchId.PRIMITIVE_FLOAT64: buffer.increaseReaderIndex(8); return false; default: - { - return true; - } + return true; } } diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/NonexistentClassSerializers.java b/java/fory-core/src/main/java/org/apache/fory/serializer/NonexistentClassSerializers.java index 83366f1286..94f94ac955 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/NonexistentClassSerializers.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/NonexistentClassSerializers.java @@ -43,6 +43,7 @@ import org.apache.fory.serializer.Serializers.CrossLanguageCompatibleSerializer; import org.apache.fory.type.Descriptor; import org.apache.fory.type.DescriptorGrouper; +import org.apache.fory.type.DispatchId; import org.apache.fory.type.Generics; import org.apache.fory.util.Preconditions; @@ -117,7 +118,7 @@ public void write(MemoryBuffer buffer, Object v) { for (SerializationFieldInfo fieldInfo : fieldsInfo.buildInFields) { Object fieldValue = value.get(fieldInfo.qualifiedFieldName); ClassInfo classInfo = fieldInfo.classInfo; - if (classResolver.isPrimitive(fieldInfo.classId)) { + if (DispatchId.isPrimitive(fieldInfo.dispatchId)) { classInfo.getSerializer().write(buffer, fieldValue); } else { if (fieldInfo.useDeclaredTypeInfo) { @@ -180,7 +181,7 @@ public Object read(MemoryBuffer buffer) { // TODO(chaokunyang) support registered serializer in peer with ref tracking disabled. fieldValue = fory.readRef(buffer, classInfoHolder); } else { - if (classResolver.isPrimitive(fieldInfo.classId)) { + if (DispatchId.isPrimitive(fieldInfo.dispatchId)) { fieldValue = fieldInfo.classInfo.getSerializer().read(buffer); } else { fieldValue = diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/ObjectSerializer.java b/java/fory-core/src/main/java/org/apache/fory/serializer/ObjectSerializer.java index 376ed5c7d9..7a858deebc 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/ObjectSerializer.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/ObjectSerializer.java @@ -31,13 +31,13 @@ import org.apache.fory.memory.MemoryBuffer; import org.apache.fory.meta.ClassDef; import org.apache.fory.reflect.FieldAccessor; -import org.apache.fory.resolver.ClassResolver; import org.apache.fory.resolver.RefResolver; import org.apache.fory.resolver.TypeResolver; import org.apache.fory.serializer.FieldGroups.SerializationFieldInfo; import org.apache.fory.serializer.struct.Fingerprint; import org.apache.fory.type.Descriptor; import org.apache.fory.type.DescriptorGrouper; +import org.apache.fory.type.DispatchId; import org.apache.fory.type.Generics; import org.apache.fory.util.MurmurHash3; import org.apache.fory.util.Utils; @@ -169,13 +169,13 @@ private void writeBuildInFields( for (SerializationFieldInfo fieldInfo : this.buildInFields) { FieldAccessor fieldAccessor = fieldInfo.fieldAccessor; boolean nullable = fieldInfo.nullable; - short classId = fieldInfo.classId; - if (writePrimitiveFieldValue(fory, buffer, value, fieldAccessor, classId)) { + int dispatchId = fieldInfo.dispatchId; + if (writePrimitiveFieldValue(buffer, value, fieldAccessor, dispatchId)) { Object fieldValue = fieldAccessor.getObject(value); boolean needWrite = nullable - ? writeBasicNullableObjectFieldValue(fory, buffer, fieldValue, classId) - : writeBasicObjectFieldValue(fory, buffer, fieldValue, classId); + ? writeBasicNullableObjectFieldValue(fory, buffer, fieldValue, dispatchId) + : writeBasicObjectFieldValue(fory, buffer, fieldValue, dispatchId); if (needWrite) { Serializer serializer = fieldInfo.classInfo.getSerializer(); if (!metaShareEnabled || fieldInfo.useDeclaredTypeInfo) { @@ -268,10 +268,9 @@ public Object[] readFields(MemoryBuffer buffer) { int counter = 0; // read order: primitive,boxed,final,other,collection,map for (SerializationFieldInfo fieldInfo : this.buildInFields) { - short classId = fieldInfo.classId; - if (classId >= ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID - && classId <= ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID) { - fieldValues[counter++] = Serializers.readPrimitiveValue(fory, buffer, classId); + int dispatchId = fieldInfo.dispatchId; + if (DispatchId.isPrimitive(dispatchId)) { + fieldValues[counter++] = Serializers.readPrimitiveValue(fory, buffer, dispatchId); } else { Object fieldValue = readFinalObjectFieldValue(binding, refResolver, typeResolver, fieldInfo, buffer); @@ -302,11 +301,11 @@ public T readAndSetFields(MemoryBuffer buffer, T obj) { for (SerializationFieldInfo fieldInfo : this.buildInFields) { FieldAccessor fieldAccessor = fieldInfo.fieldAccessor; boolean nullable = fieldInfo.nullable; - short classId = fieldInfo.classId; - if (readPrimitiveFieldValue(fory, buffer, obj, fieldAccessor, classId) + int dispatchId = fieldInfo.dispatchId; + if (readPrimitiveFieldValue(buffer, obj, fieldAccessor, dispatchId) && (nullable - ? readBasicNullableObjectFieldValue(fory, buffer, obj, fieldAccessor, classId) - : readBasicObjectFieldValue(fory, buffer, obj, fieldAccessor, classId))) { + ? readBasicNullableObjectFieldValue(fory, buffer, obj, fieldAccessor, dispatchId) + : readBasicObjectFieldValue(fory, buffer, obj, fieldAccessor, dispatchId))) { Object fieldValue = readFinalObjectFieldValue(binding, refResolver, typeResolver, fieldInfo, buffer); fieldAccessor.putObject(obj, fieldValue); diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/PrimitiveSerializers.java b/java/fory-core/src/main/java/org/apache/fory/serializer/PrimitiveSerializers.java index 1ef0ceb5ce..f0384472ad 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/PrimitiveSerializers.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/PrimitiveSerializers.java @@ -197,10 +197,11 @@ public Long read(MemoryBuffer buffer) { public static Expression writeInt64( Expression buffer, Expression v, LongEncoding longEncoding, boolean ensureBounds) { switch (longEncoding) { - case LE_RAW_BYTES: + case FIXED_INT64: return new Invoke(buffer, "writeInt64", v); - case HYBRID: - return new Invoke(buffer, ensureBounds ? "writeHybridInt64" : "_unsafeWriteHybridInt64", v); + case TAGGED_INT64: + return new Invoke( + buffer, ensureBounds ? "writeHybridInt64" : "_unsafeWriteHybridInt64", v); case VARINT64: return new Invoke(buffer, ensureBounds ? "writeVarInt64" : "_unsafeWriteVarInt64", v); default: @@ -209,9 +210,9 @@ public static Expression writeInt64( } public static void writeInt64(MemoryBuffer buffer, long value, LongEncoding longEncoding) { - if (longEncoding == LongEncoding.HYBRID) { - buffer.writeHybridInt64(value); - } else if (longEncoding == LongEncoding.LE_RAW_BYTES) { + if (longEncoding == LongEncoding.TAGGED_INT64) { + buffer.writeTaggedInt64(value); + } else if (longEncoding == LongEncoding.FIXED_INT64) { buffer.writeInt64(value); } else { buffer.writeVarInt64(value); @@ -219,9 +220,9 @@ public static void writeInt64(MemoryBuffer buffer, long value, LongEncoding long } public static long readInt64(MemoryBuffer buffer, LongEncoding longEncoding) { - if (longEncoding == LongEncoding.HYBRID) { - return buffer.readHybridInt64(); - } else if (longEncoding == LongEncoding.LE_RAW_BYTES) { + if (longEncoding == LongEncoding.TAGGED_INT64) { + return buffer.readTaggedInt64(); + } else if (longEncoding == LongEncoding.FIXED_INT64) { return buffer.readInt64(); } else { return buffer.readVarInt64(); @@ -234,9 +235,9 @@ public static Expression readInt64(Expression buffer, LongEncoding longEncoding) public static String readLongFunc(LongEncoding longEncoding) { switch (longEncoding) { - case LE_RAW_BYTES: + case FIXED_INT64: return Platform.IS_LITTLE_ENDIAN ? "_readInt64OnLE" : "_readInt64OnBE"; - case HYBRID: + case TAGGED_INT64: return Platform.IS_LITTLE_ENDIAN ? "_readHybridInt64OnLE" : "_readHybridInt64OnBE"; case VARINT64: return Platform.IS_LITTLE_ENDIAN ? "_readVarInt64OnLE" : "_readVarInt64OnBE"; diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/Serializers.java b/java/fory-core/src/main/java/org/apache/fory/serializer/Serializers.java index a75da3ded4..dbdfc065cd 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/Serializers.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/Serializers.java @@ -50,6 +50,7 @@ import org.apache.fory.meta.ClassDef; import org.apache.fory.reflect.ReflectionUtils; import org.apache.fory.resolver.ClassResolver; +import org.apache.fory.type.DispatchId; import org.apache.fory.util.ExceptionUtils; import org.apache.fory.util.GraalvmSupport; import org.apache.fory.util.GraalvmSupport.GraalvmSerializerHolder; @@ -176,32 +177,44 @@ private static Serializer createSerializer( } } - public static Object readPrimitiveValue(Fory fory, MemoryBuffer buffer, short classId) { - switch (classId) { - case ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID: + public static Object readPrimitiveValue(Fory fory, MemoryBuffer buffer, int dispatchId) { + switch (dispatchId) { + case DispatchId.PRIMITIVE_BOOL: return buffer.readBoolean(); - case ClassResolver.PRIMITIVE_BYTE_CLASS_ID: + case DispatchId.PRIMITIVE_INT8: + case DispatchId.PRIMITIVE_UINT8: return buffer.readByte(); - case ClassResolver.PRIMITIVE_CHAR_CLASS_ID: + case DispatchId.PRIMITIVE_CHAR: return buffer.readChar(); - case ClassResolver.PRIMITIVE_SHORT_CLASS_ID: + case DispatchId.PRIMITIVE_INT16: + case DispatchId.PRIMITIVE_UINT16: return buffer.readInt16(); - case ClassResolver.PRIMITIVE_INT_CLASS_ID: - if (fory.compressInt()) { - return buffer.readVarInt32(); - } else { - return buffer.readInt32(); - } - case ClassResolver.PRIMITIVE_FLOAT_CLASS_ID: + case DispatchId.PRIMITIVE_INT32: + return buffer.readInt32(); + case DispatchId.PRIMITIVE_VARINT32: + return buffer.readVarInt32(); + case DispatchId.PRIMITIVE_UINT32: + return buffer.readInt32(); + case DispatchId.PRIMITIVE_VAR_UINT32: + return buffer.readVarUint32(); + case DispatchId.PRIMITIVE_INT64: + return buffer.readInt64(); + case DispatchId.PRIMITIVE_VARINT64: + return buffer.readVarInt64(); + case DispatchId.PRIMITIVE_TAGGED_INT64: + return buffer.readTaggedInt64(); + case DispatchId.PRIMITIVE_UINT64: + return buffer.readInt64(); + case DispatchId.PRIMITIVE_VAR_UINT64: + return buffer.readVarUint64(); + case DispatchId.PRIMITIVE_TAGGED_UINT64: + return buffer.readTaggedUint64(); + case DispatchId.PRIMITIVE_FLOAT32: return buffer.readFloat32(); - case ClassResolver.PRIMITIVE_LONG_CLASS_ID: - return fory.readInt64(buffer); - case ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID: + case DispatchId.PRIMITIVE_FLOAT64: return buffer.readFloat64(); default: - { - throw new IllegalStateException("unreachable"); - } + throw new IllegalStateException("unreachable"); } } diff --git a/java/fory-core/src/main/java/org/apache/fory/type/Descriptor.java b/java/fory-core/src/main/java/org/apache/fory/type/Descriptor.java index 158c02968f..94fee5eeef 100644 --- a/java/fory-core/src/main/java/org/apache/fory/type/Descriptor.java +++ b/java/fory-core/src/main/java/org/apache/fory/type/Descriptor.java @@ -23,6 +23,7 @@ import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; +import java.lang.annotation.Annotation; import java.lang.reflect.Field; import java.lang.reflect.Member; import java.lang.reflect.Method; @@ -31,6 +32,7 @@ import java.util.Arrays; import java.util.Comparator; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; @@ -45,13 +47,18 @@ import org.apache.fory.annotation.Expose; import org.apache.fory.annotation.ForyField; import org.apache.fory.annotation.Ignore; +import org.apache.fory.annotation.Int32Type; +import org.apache.fory.annotation.Int64Type; import org.apache.fory.annotation.Internal; +import org.apache.fory.annotation.Uint16Type; +import org.apache.fory.annotation.Uint32Type; +import org.apache.fory.annotation.Uint64Type; +import org.apache.fory.annotation.Uint8Type; import org.apache.fory.collection.Collections; import org.apache.fory.collection.Tuple2; import org.apache.fory.memory.Platform; import org.apache.fory.reflect.TypeRef; import org.apache.fory.serializer.converter.FieldConverter; -import org.apache.fory.util.Preconditions; import org.apache.fory.util.StringUtils; import org.apache.fory.util.record.RecordComponent; import org.apache.fory.util.record.RecordUtils; @@ -88,6 +95,7 @@ public static void clearDescriptorCache() { private final Method readMethod; private final Method writeMethod; private final ForyField foryField; + private final Annotation typeAnnotation; private boolean nullable; // trackingRef should only be true if explicitly set to true via @ForyField(ref=true) // If no annotation or ref not specified, trackingRef stays false and type-based tracking applies @@ -107,6 +115,7 @@ public Descriptor(Field field, TypeRef typeRef, Method readMethod, Method wri this.writeMethod = writeMethod; this.typeRef = typeRef; this.foryField = this.field.getAnnotation(ForyField.class); + typeAnnotation = getTypeAnnotation(field); if (!typeRef.isPrimitive()) { this.nullable = foryField == null || foryField.nullable(); } @@ -130,6 +139,7 @@ public Descriptor( this.readMethod = null; this.writeMethod = null; this.foryField = null; + typeAnnotation = null; this.nullable = nullable; this.trackingRef = trackingRef; } @@ -147,6 +157,7 @@ private Descriptor(Field field, Method readMethod) { this.readMethod = readMethod; this.writeMethod = null; this.foryField = this.field.getAnnotation(ForyField.class); + typeAnnotation = getTypeAnnotation(field); if (!field.getType().isPrimitive()) { this.nullable = foryField == null || foryField.nullable(); } @@ -165,6 +176,7 @@ private Descriptor(Method readMethod) { this.readMethod = readMethod; this.writeMethod = null; this.foryField = readMethod.getAnnotation(ForyField.class); + typeAnnotation = getTypeAnnotation(readMethod); if (!readMethod.getReturnType().isPrimitive()) { this.nullable = foryField == null || foryField.nullable(); } @@ -182,6 +194,7 @@ public Descriptor(DescriptorBuilder builder) { this.writeMethod = builder.writeMethod; this.trackingRef = builder.trackingRef; this.foryField = this.field == null ? null : this.field.getAnnotation(ForyField.class); + typeAnnotation = getTypeAnnotation(field); // Use builder.nullable directly - this is set by DescriptorBuilder.nullable() // and should be respected, especially for xlang compatible mode where remote // TypeDef's nullable flag may differ from local field's nullable @@ -273,6 +286,10 @@ public ForyField.Morphic getMorphic() { return ForyField.Morphic.AUTO; } + public Annotation getTypeAnnotation() { + return typeAnnotation; + } + /** Try not use {@link TypeRef#getRawType()} since it's expensive. */ public Class getRawType() { Class type = this.type; @@ -340,7 +357,7 @@ public static List getDescriptors(Class clz) { public static SortedMap getDescriptorsMap(Class clz) { SortedMap allDescriptorsMap = getAllDescriptorsMap(clz); Map> duplicateNameFields = getDuplicateNames(allDescriptorsMap); - Preconditions.checkArgument( + checkArgument( duplicateNameFields.isEmpty(), "%s has duplicate fields %s", clz, duplicateNameFields); TreeMap map = new TreeMap<>(); allDescriptorsMap.forEach((k, v) -> map.put(k.getName(), v)); @@ -656,4 +673,49 @@ static SortedMap buildBeanedDescriptorsMap( // otherwise classes can't be gc. return descriptorMap; } + + private static final Set> typeAnnotationsTypes = new HashSet<>(); + + static { + typeAnnotationsTypes.add(Int32Type.class); + typeAnnotationsTypes.add(Int64Type.class); + typeAnnotationsTypes.add(Uint8Type.class); + typeAnnotationsTypes.add(Uint16Type.class); + typeAnnotationsTypes.add(Uint32Type.class); + typeAnnotationsTypes.add(Uint64Type.class); + } + + private static Annotation getTypeAnnotation(Field field) { + if (field == null) { + return null; + } + return getAnnotation(field); + } + + private static Annotation getTypeAnnotation(Method method) { + if (method == null) { + return null; + } + return getAnnotation(method.getDeclaredAnnotations(), method.getName()); + } + + public static Annotation getAnnotation(Field field) { + return getAnnotation(field.getDeclaredAnnotations(), field.getName()); + } + + public static Annotation getAnnotation(Annotation[] declaredAnnotations, String name) { + Annotation typeAnnotation = null; + for (Annotation annotation : declaredAnnotations) { + if (typeAnnotationsTypes.contains(annotation.annotationType())) { + if (typeAnnotation != null) { + throw new IllegalStateException( + String.format( + "Multiple type annotation %s and %s found for %s!", + typeAnnotation, annotation.annotationType(), name)); + } + typeAnnotation = annotation; + } + } + return typeAnnotation; + } } diff --git a/java/fory-core/src/main/java/org/apache/fory/type/DispatchId.java b/java/fory-core/src/main/java/org/apache/fory/type/DispatchId.java new file mode 100644 index 0000000000..d222dbdfca --- /dev/null +++ b/java/fory-core/src/main/java/org/apache/fory/type/DispatchId.java @@ -0,0 +1,134 @@ +package org.apache.fory.type; + +import org.apache.fory.Fory; +import org.apache.fory.meta.TypeExtMeta; +import org.apache.fory.reflect.TypeRef; +import org.apache.fory.resolver.ClassResolver; + +public class DispatchId { + public static final int UNKNOWN = 0; + public static final int PRIMITIVE_BOOL = 1; + public static final int PRIMITIVE_INT8 = 2; + public static final int PRIMITIVE_INT16 = 3; + public static final int PRIMITIVE_CHAR = 4; + public static final int PRIMITIVE_INT32 = 5; + public static final int PRIMITIVE_VARINT32 = 6; + public static final int PRIMITIVE_INT64 = 7; + public static final int PRIMITIVE_VARINT64 = 8; + public static final int PRIMITIVE_TAGGED_INT64 = 9; + public static final int PRIMITIVE_FLOAT32 = 10; + public static final int PRIMITIVE_FLOAT64 = 11; + public static final int PRIMITIVE_UINT8 = 12; + public static final int PRIMITIVE_UINT16 = 13; + public static final int PRIMITIVE_UINT32 = 14; + public static final int PRIMITIVE_VAR_UINT32 = 15; + public static final int PRIMITIVE_UINT64 = 16; + public static final int PRIMITIVE_VAR_UINT64 = 17; + public static final int PRIMITIVE_TAGGED_UINT64 = 18; + + public static final int BOOL = 19; + public static final int INT8 = 20; + public static final int CHAR = 21; + public static final int INT16 = 22; + public static final int INT32 = 23; + public static final int VARINT32 = 24; + public static final int INT64 = 25; + public static final int VARINT64 = 26; + public static final int TAGGED_INT64 = 27; + public static final int FLOAT32 = 28; + public static final int FLOAT64 = 29; + public static final int UINT8 = 30; + public static final int UINT16 = 31; + public static final int UINT32 = 32; + public static final int VAR_UINT32 = 33; + public static final int UINT64 = 34; + public static final int VAR_UINT64 = 35; + public static final int TAGGED_UINT64 = 36; + public static final int STRING = 37; + + public static int getDispatchId(Fory fory, Descriptor d) { + int typeId = Types.getDescriptorTypeId(fory, d); + TypeRef typeRef = d.getTypeRef(); + Class rawType = typeRef.getRawType(); + TypeExtMeta typeExtMeta = typeRef.getTypeExtMeta(); + boolean isPrimitive = + typeRef.isPrimitive() + || (TypeUtils.unwrap(rawType).isPrimitive() + && typeExtMeta != null + && !typeExtMeta.nullable()); + if (fory.isCrossLanguage()) { + + return xlangTypeIdToDispatchId(typeId, isPrimitive); + } else { + return nativeIdToDispatchId(typeId, d, isPrimitive); + } + } + + private static int xlangTypeIdToDispatchId(int typeId, boolean isPrimitive) { + switch (typeId) { + case Types.BOOL: + return isPrimitive ? PRIMITIVE_BOOL : BOOL; + case Types.INT8: + return isPrimitive ? PRIMITIVE_INT8 : INT8; + case Types.INT16: + return isPrimitive ? PRIMITIVE_INT16 : INT16; + case Types.INT32: + return isPrimitive ? PRIMITIVE_INT32 : INT32; + case Types.VARINT32: + return isPrimitive ? PRIMITIVE_VARINT32 : VARINT32; + case Types.INT64: + return isPrimitive ? PRIMITIVE_INT64 : INT64; + case Types.VARINT64: + return isPrimitive ? PRIMITIVE_VARINT64 : VARINT64; + case Types.TAGGED_INT64: + return isPrimitive ? PRIMITIVE_TAGGED_INT64 : TAGGED_INT64; + case Types.UINT8: + return isPrimitive ? PRIMITIVE_UINT8 : UINT8; + case Types.UINT16: + return isPrimitive ? PRIMITIVE_UINT16 : UINT16; + case Types.UINT32: + return isPrimitive ? PRIMITIVE_UINT32 : UINT32; + case Types.VAR_UINT32: + return isPrimitive ? PRIMITIVE_VAR_UINT32 : VAR_UINT32; + case Types.UINT64: + return isPrimitive ? PRIMITIVE_UINT64 : UINT64; + case Types.VAR_UINT64: + return isPrimitive ? PRIMITIVE_VAR_UINT64 : VAR_UINT64; + case Types.TAGGED_UINT64: + return isPrimitive ? PRIMITIVE_TAGGED_UINT64 : TAGGED_UINT64; + case Types.FLOAT32: + return isPrimitive ? PRIMITIVE_FLOAT32 : FLOAT32; + case Types.FLOAT64: + return isPrimitive ? PRIMITIVE_FLOAT64 : FLOAT64; + case Types.STRING: + return STRING; + default: + return UNKNOWN; + } + } + + private static int nativeIdToDispatchId( + int nativeId, Descriptor descriptor, boolean isPrimitive) { + if (nativeId >= Types.BOOL && nativeId <= ClassResolver.NATIVE_START_ID) { + return xlangTypeIdToDispatchId(nativeId, isPrimitive); + } + if (nativeId == ClassResolver.CHAR_ID) { + return isPrimitive ? PRIMITIVE_CHAR : CHAR; + } + if (nativeId == ClassResolver.PRIMITIVE_CHAR_ID) { + return PRIMITIVE_CHAR; + } + if (nativeId >= ClassResolver.PRIMITIVE_VOID_ID + && nativeId <= ClassResolver.PRIMITIVE_FLOAT64_ID) { + throw new IllegalArgumentException( + String.format( + "%s should use `Types.BOOL~Types.FLOAT64` with nullable meta instead, but got %s", + descriptor.getField(), nativeId)); + } + return xlangTypeIdToDispatchId(nativeId, isPrimitive); + } + + public static boolean isPrimitive(int dispatchId) { + return dispatchId >= PRIMITIVE_BOOL && dispatchId <= PRIMITIVE_TAGGED_UINT64; + } +} diff --git a/java/fory-core/src/main/java/org/apache/fory/type/FinalObjectTypeStub.java b/java/fory-core/src/main/java/org/apache/fory/type/FinalObjectTypeStub.java deleted file mode 100644 index 6e5b09bfd1..0000000000 --- a/java/fory-core/src/main/java/org/apache/fory/type/FinalObjectTypeStub.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.fory.type; - -import org.apache.fory.annotation.Internal; - -/** - * Stub class for object type which is final. - * - *

    {@link Object} class will be used if isn't final. No {@link - * org.apache.fory.resolver.ClassInfo} should be created for this class since it has no fields, and - * doesn't have consistent class structure as real class. - */ -@Internal -public final class FinalObjectTypeStub {} diff --git a/java/fory-core/src/main/java/org/apache/fory/type/TypeAnnotationUtils.java b/java/fory-core/src/main/java/org/apache/fory/type/TypeAnnotationUtils.java new file mode 100644 index 0000000000..50f4b09248 --- /dev/null +++ b/java/fory-core/src/main/java/org/apache/fory/type/TypeAnnotationUtils.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.type; + +import java.lang.annotation.Annotation; +import org.apache.fory.annotation.Int32Type; +import org.apache.fory.annotation.Int64Type; +import org.apache.fory.annotation.Uint16Type; +import org.apache.fory.annotation.Uint32Type; +import org.apache.fory.annotation.Uint64Type; +import org.apache.fory.annotation.Uint8Type; + +public class TypeAnnotationUtils { + public static int getTypeId(Annotation typeAnnotation) { + if (typeAnnotation == null) return Types.UNKNOWN; + if (typeAnnotation instanceof Uint8Type) { + return Types.UINT8; + } else if (typeAnnotation instanceof Uint16Type) { + return Types.UINT16; + } else if (typeAnnotation instanceof Uint32Type) { + Uint32Type uint32Type = (Uint32Type) typeAnnotation; + return uint32Type.compress() ? Types.VAR_UINT32 : Types.UINT32; + } else if (typeAnnotation instanceof Uint64Type) { + Uint64Type uint64Type = (Uint64Type) typeAnnotation; + switch (uint64Type.encoding()) { + case VARINT64: + return Types.VAR_UINT64; + case FIXED_INT64: + return Types.UINT64; + case TAGGED_INT64: + return Types.TAGGED_UINT64; + default: + throw new IllegalArgumentException("Unsupported encoding: " + uint64Type.encoding()); + } + } else if (typeAnnotation instanceof Int32Type) { + Int32Type int32Type = (Int32Type) typeAnnotation; + return int32Type.compress() ? Types.VARINT32 : Types.INT32; + } else if (typeAnnotation instanceof Int64Type) { + Int64Type int64Type = (Int64Type) typeAnnotation; + switch (int64Type.encoding()) { + case VARINT64: + return Types.VARINT64; + case FIXED_INT64: + return Types.INT64; + case TAGGED_INT64: + return Types.TAGGED_INT64; + default: + throw new IllegalArgumentException("Unsupported encoding: " + int64Type.encoding()); + } + } + throw new IllegalArgumentException("Unsupported type: " + typeAnnotation.getClass()); + } +} diff --git a/java/fory-core/src/main/java/org/apache/fory/type/TypeUtils.java b/java/fory-core/src/main/java/org/apache/fory/type/TypeUtils.java index ad5ec87a4f..10039842d5 100644 --- a/java/fory-core/src/main/java/org/apache/fory/type/TypeUtils.java +++ b/java/fory-core/src/main/java/org/apache/fory/type/TypeUtils.java @@ -69,6 +69,7 @@ import java.util.stream.Collectors; import org.apache.fory.collection.IdentityMap; import org.apache.fory.collection.Tuple2; +import org.apache.fory.meta.TypeExtMeta; import org.apache.fory.reflect.ReflectionUtils; import org.apache.fory.reflect.TypeParameter; import org.apache.fory.reflect.TypeRef; @@ -264,6 +265,9 @@ public static Class wrap(Class clz) { } public static Class unwrap(Class clz) { + if (clz == null) { + return null; + } if (clz.isPrimitive()) { return clz; } @@ -271,6 +275,9 @@ public static Class unwrap(Class clz) { } public static Class boxedType(Class clz) { + if (clz == null) { + return null; + } if (!clz.isPrimitive()) { return clz; } @@ -552,12 +559,12 @@ public static TypeRef> collectionOf(TypeRef elemType) { return new TypeRef>() {}.where(new TypeParameter() {}, elemType); } - public static TypeRef> collectionOf(TypeRef elemType, Object extMeta) { + public static TypeRef> collectionOf(TypeRef elemType, TypeExtMeta extMeta) { return new TypeRef>(extMeta) {}.where(new TypeParameter() {}, elemType); } public static TypeRef> collectionOf( - Class collectionType, TypeRef elemType, Object extMeta) { + Class collectionType, TypeRef elemType, TypeExtMeta extMeta) { return new TypeRef>(extMeta) {}.where(new TypeParameter() {}, elemType) .getSubtype(collectionType); } @@ -572,13 +579,13 @@ public static TypeRef> mapOf(TypeRef keyType, TypeRef val } public static TypeRef> mapOf( - TypeRef keyType, TypeRef valueType, Object extMeta) { + TypeRef keyType, TypeRef valueType, TypeExtMeta extMeta) { return new TypeRef>(extMeta) {}.where(new TypeParameter() {}, keyType) .where(new TypeParameter() {}, valueType); } public static TypeRef> mapOf( - Class mapType, TypeRef keyType, TypeRef valueType, Object extMeta) { + Class mapType, TypeRef keyType, TypeRef valueType, TypeExtMeta extMeta) { TypeRef> mapTypeRef = new TypeRef>(extMeta) {}.where(new TypeParameter() {}, keyType) .where(new TypeParameter() {}, valueType); diff --git a/java/fory-core/src/main/java/org/apache/fory/type/Types.java b/java/fory-core/src/main/java/org/apache/fory/type/Types.java index 0048a8975a..9042ae62f8 100644 --- a/java/fory-core/src/main/java/org/apache/fory/type/Types.java +++ b/java/fory-core/src/main/java/org/apache/fory/type/Types.java @@ -21,7 +21,14 @@ import static org.apache.fory.collection.Collections.ofHashMap; +import java.lang.annotation.Annotation; +import java.lang.reflect.Field; import java.util.Map; +import org.apache.fory.Fory; +import org.apache.fory.meta.TypeExtMeta; +import org.apache.fory.reflect.TypeRef; +import org.apache.fory.resolver.ClassInfo; +import org.apache.fory.resolver.ClassResolver; import org.apache.fory.util.Preconditions; public class Types { @@ -42,16 +49,16 @@ public class Types { public static final int INT32 = 4; /** var32: a 32-bit signed integer which uses fory var_int32 encoding. */ - public static final int VAR32 = 5; + public static final int VARINT32 = 5; /** int64: a 64-bit signed integer. */ public static final int INT64 = 6; /** var64: a 64-bit signed integer which uses fory PVL encoding. */ - public static final int VAR64 = 7; + public static final int VARINT64 = 7; - /** h64: a 64-bit signed integer which uses fory hybrid encoding. */ - public static final int H64 = 8; + /** tagged_int64: a 64-bit signed integer which uses fory hybrid encoding. */ + public static final int TAGGED_INT64 = 8; /** uint8: an 8-bit unsigned integer. */ public static final int UINT8 = 9; @@ -62,17 +69,17 @@ public class Types { /** uint32: a 32-bit unsigned integer. */ public static final int UINT32 = 11; - /** varu32: a 32-bit unsigned integer which uses fory var_uint32 encoding. */ - public static final int VARU32 = 12; + /** var_uint32: a 32-bit unsigned integer which uses fory var_uint32 encoding. */ + public static final int VAR_UINT32 = 12; /** uint64: a 64-bit unsigned integer. */ public static final int UINT64 = 13; - /** varu64: a 64-bit unsigned integer which uses fory var_uint64 encoding. */ - public static final int VARU64 = 14; + /** var_uint64: a 64-bit unsigned integer which uses fory var_uint64 encoding. */ + public static final int VAR_UINT64 = 14; - /** hu64: a 64-bit unsigned integer which uses fory hybrid encoding. */ - public static final int HU64 = 15; + /** tagged_uint64: a 64-bit unsigned integer which uses fory tagged int64 encoding. */ + public static final int TAGGED_UINT64 = 15; /** float16: a 16-bit floating point number. */ public static final int FLOAT16 = 16; @@ -253,19 +260,7 @@ public static int getPrimitiveTypeId(Class cls) { } public static boolean isPrimitiveType(int typeId) { - // noinspection Duplicates - switch (typeId) { - case BOOL: - case INT8: - case INT16: - case INT32: - case INT64: - case FLOAT32: - case FLOAT64: - return true; - default: - return false; - } + return typeId >= BOOL && typeId <= FLOAT64; } public static boolean isPrimitiveArray(int typeId) { @@ -320,4 +315,118 @@ public static int getPrimitiveArrayTypeId(int typeId) { String.format("Type id %d is not a primitive id", typeId)); } } + + public static int getPrimitiveTypeId(Fory fory, Class rawType) { + Class unwrapped = TypeUtils.unwrap(rawType); + if (unwrapped == char.class) { + Preconditions.checkArgument(!fory.isCrossLanguage(), "Char is not support for xlang"); + return rawType.isPrimitive() ? ClassResolver.PRIMITIVE_CHAR_ID : ClassResolver.CHAR_ID; + } + if (unwrapped == boolean.class) { + return Types.BOOL; + } else if (unwrapped == byte.class) { + return Types.INT8; + } else if (unwrapped == short.class) { + return Types.INT16; + } else if (unwrapped == int.class) { + return fory.compressInt() ? Types.VARINT32 : Types.INT32; + } else if (unwrapped == long.class) { + return fory.compressLong() ? Types.VARINT64 : Types.INT64; + } else if (unwrapped == float.class) { + return Types.FLOAT32; + } else if (unwrapped == double.class) { + return Types.FLOAT64; + } + return Types.UNKNOWN; + } + + public static int getDescriptorTypeId(Fory fory, Field field) { + Annotation annotation = Descriptor.getAnnotation(field); + if (annotation != null) { + return TypeAnnotationUtils.getTypeId(annotation); + } else { + Class rawType = field.getType(); + return getTypeId(fory, rawType); + } + } + + public static int getDescriptorTypeId(Fory fory, Descriptor d) { + TypeRef typeRef = d.getTypeRef(); + TypeExtMeta extMeta = typeRef.getTypeExtMeta(); + if (extMeta != null) { + return extMeta.typeId(); + } else { + Annotation typeAnnotation = d.getTypeAnnotation(); + if (typeAnnotation != null) { + return TypeAnnotationUtils.getTypeId(typeAnnotation); + } else { + Class rawType = typeRef.getRawType(); + return getTypeId(fory, rawType); + } + } + } + + public static int getTypeId(Fory fory, Class clz) { + Class unwrapped = TypeUtils.unwrap(clz); + if (unwrapped == char.class) { + Preconditions.checkArgument(!fory.isCrossLanguage(), "Char is not support for xlang"); + return clz.isPrimitive() ? ClassResolver.PRIMITIVE_CHAR_ID : ClassResolver.CHAR_ID; + } + if (unwrapped.isPrimitive()) { + if (unwrapped == boolean.class) { + return Types.BOOL; + } else if (unwrapped == byte.class) { + return Types.INT8; + } else if (unwrapped == short.class) { + return Types.INT16; + } else if (unwrapped == int.class) { + return fory.compressInt() ? Types.VARINT32 : Types.INT32; + } else if (unwrapped == long.class) { + return fory.compressLong() ? Types.VARINT64 : Types.INT64; + } else if (unwrapped == float.class) { + return Types.FLOAT32; + } else if (unwrapped == double.class) { + return Types.FLOAT64; + } + } + ClassInfo classInfo = fory._getTypeResolver().getClassInfo(clz, false); + if (classInfo != null) { + return fory.isCrossLanguage() ? classInfo.getXtypeId() : classInfo.getClassId(); + } + return Types.UNKNOWN; + } + + public static Class getClassForTypeId(int typeId) { + switch (typeId) { + case BOOL: + return Boolean.class; + case INT8: + case UINT8: + return Byte.class; + case INT16: + case UINT16: + return Short.class; + case INT32: + case VARINT32: + case UINT32: + case VAR_UINT32: + return Integer.class; + case INT64: + case VARINT64: + case TAGGED_INT64: + case UINT64: + case VAR_UINT64: + case TAGGED_UINT64: + return Long.class; + case FLOAT16: + case FLOAT32: + return Float.class; + case FLOAT64: + return Double.class; + case STRING: + return String.class; + default: + return null; + } + } } diff --git a/java/fory-core/src/main/java/org/apache/fory/util/DefaultValueUtils.java b/java/fory-core/src/main/java/org/apache/fory/util/DefaultValueUtils.java index 892f78737a..0dd4e4a50e 100644 --- a/java/fory-core/src/main/java/org/apache/fory/util/DefaultValueUtils.java +++ b/java/fory-core/src/main/java/org/apache/fory/util/DefaultValueUtils.java @@ -34,7 +34,7 @@ import org.apache.fory.logging.LoggerFactory; import org.apache.fory.memory.Platform; import org.apache.fory.reflect.FieldAccessor; -import org.apache.fory.resolver.ClassResolver; +import org.apache.fory.type.DispatchId; import org.apache.fory.type.ScalaTypes; import org.apache.fory.type.TypeUtils; import org.apache.fory.util.unsafe._JDKAccess; @@ -137,7 +137,7 @@ public final DefaultValueField[] buildDefaultValueFields( fieldName, defaultValue, fieldAccessor, - classId != null ? classId : ClassResolver.NO_CLASS_ID)); + classId != null ? classId : DispatchId.UNKNOWN)); } } } @@ -373,36 +373,56 @@ public static void setDefaultValues(Object obj, DefaultValueField[] defaultValue short classId = defaultField.getClassId(); long fieldOffset = fieldAccessor.getFieldOffset(); switch (classId) { - case ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID: - case ClassResolver.BOOLEAN_CLASS_ID: + case DispatchId.PRIMITIVE_BOOL: + case DispatchId.BOOL: Platform.putBoolean(obj, fieldOffset, (Boolean) defaultValue); break; - case ClassResolver.PRIMITIVE_BYTE_CLASS_ID: - case ClassResolver.BYTE_CLASS_ID: + case DispatchId.PRIMITIVE_INT8: + case DispatchId.INT8: + case DispatchId.PRIMITIVE_UINT8: + case DispatchId.UINT8: Platform.putByte(obj, fieldOffset, (Byte) defaultValue); break; - case ClassResolver.PRIMITIVE_CHAR_CLASS_ID: - case ClassResolver.CHAR_CLASS_ID: + case DispatchId.PRIMITIVE_CHAR: + case DispatchId.CHAR: Platform.putChar(obj, fieldOffset, (Character) defaultValue); break; - case ClassResolver.PRIMITIVE_SHORT_CLASS_ID: - case ClassResolver.SHORT_CLASS_ID: + case DispatchId.PRIMITIVE_INT16: + case DispatchId.INT16: + case DispatchId.PRIMITIVE_UINT16: + case DispatchId.UINT16: Platform.putShort(obj, fieldOffset, (Short) defaultValue); break; - case ClassResolver.PRIMITIVE_INT_CLASS_ID: - case ClassResolver.INTEGER_CLASS_ID: + case DispatchId.PRIMITIVE_INT32: + case DispatchId.INT32: + case DispatchId.PRIMITIVE_VARINT32: + case DispatchId.VARINT32: + case DispatchId.PRIMITIVE_UINT32: + case DispatchId.UINT32: + case DispatchId.PRIMITIVE_VAR_UINT32: + case DispatchId.VAR_UINT32: Platform.putInt(obj, fieldOffset, (Integer) defaultValue); break; - case ClassResolver.PRIMITIVE_LONG_CLASS_ID: - case ClassResolver.LONG_CLASS_ID: + case DispatchId.PRIMITIVE_INT64: + case DispatchId.INT64: + case DispatchId.PRIMITIVE_VARINT64: + case DispatchId.VARINT64: + case DispatchId.PRIMITIVE_TAGGED_INT64: + case DispatchId.TAGGED_INT64: + case DispatchId.PRIMITIVE_UINT64: + case DispatchId.UINT64: + case DispatchId.PRIMITIVE_VAR_UINT64: + case DispatchId.VAR_UINT64: + case DispatchId.PRIMITIVE_TAGGED_UINT64: + case DispatchId.TAGGED_UINT64: Platform.putLong(obj, fieldOffset, (Long) defaultValue); break; - case ClassResolver.PRIMITIVE_FLOAT_CLASS_ID: - case ClassResolver.FLOAT_CLASS_ID: + case DispatchId.PRIMITIVE_FLOAT32: + case DispatchId.FLOAT32: Platform.putFloat(obj, fieldOffset, (Float) defaultValue); break; - case ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID: - case ClassResolver.DOUBLE_CLASS_ID: + case DispatchId.PRIMITIVE_FLOAT64: + case DispatchId.FLOAT64: Platform.putDouble(obj, fieldOffset, (Double) defaultValue); break; default: diff --git a/java/fory-core/src/test/java/org/apache/fory/StreamTest.java b/java/fory-core/src/test/java/org/apache/fory/StreamTest.java index e86c258186..52dcc04fd6 100644 --- a/java/fory-core/src/test/java/org/apache/fory/StreamTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/StreamTest.java @@ -70,9 +70,9 @@ public void testBufferStream() { buffer0.writeVarUint64(i); buffer0.writeVarUint64(Long.MIN_VALUE); buffer0.writeVarUint64(Long.MAX_VALUE); - buffer0.writeHybridInt64(i); - buffer0.writeHybridInt64(Long.MIN_VALUE); - buffer0.writeHybridInt64(Long.MAX_VALUE); + buffer0.writeTaggedInt64(i); + buffer0.writeTaggedInt64(Long.MIN_VALUE); + buffer0.writeTaggedInt64(Long.MAX_VALUE); } byte[] bytes = buffer0.getBytes(0, buffer0.writerIndex()); ForyInputStream stream = @@ -105,9 +105,9 @@ public synchronized int read(byte[] b, int off, int len) { assertEquals(buffer.readVarUint64(), i); assertEquals(buffer.readVarUint64(), Long.MIN_VALUE); assertEquals(buffer.readVarUint64(), Long.MAX_VALUE); - assertEquals(buffer.readHybridInt64(), i); - assertEquals(buffer.readHybridInt64(), Long.MIN_VALUE); - assertEquals(buffer.readHybridInt64(), Long.MAX_VALUE); + assertEquals(buffer.readTaggedInt64(), i); + assertEquals(buffer.readTaggedInt64(), Long.MIN_VALUE); + assertEquals(buffer.readTaggedInt64(), Long.MAX_VALUE); } } diff --git a/java/fory-core/src/test/java/org/apache/fory/memory/MemoryBufferTest.java b/java/fory-core/src/test/java/org/apache/fory/memory/MemoryBufferTest.java index 59d6a94f74..f36fe9eb94 100644 --- a/java/fory-core/src/test/java/org/apache/fory/memory/MemoryBufferTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/memory/MemoryBufferTest.java @@ -561,7 +561,7 @@ public void testGetShortB() { } @Test - public void testWriteHybridInt64() { + public void testWriteTaggedInt64() { MemoryBuffer buf = MemoryUtils.buffer(8); checkHybridInt64(buf, -1, 4); for (int i = 0; i < 10; i++) { @@ -592,12 +592,12 @@ public void testWriteHybridInt64() { private void checkHybridInt64(MemoryBuffer buf, long value, int bytesWritten) { int readerIndex = buf.readerIndex(); assertEquals(buf.writerIndex(), readerIndex); - int actualBytesWritten = buf.writeHybridInt64(value); + int actualBytesWritten = buf.writeTaggedInt64(value); assertEquals(actualBytesWritten, bytesWritten); - long varLong = buf.readHybridInt64(); + long varLong = buf.readTaggedInt64(); assertEquals(buf.writerIndex(), buf.readerIndex()); assertEquals(value, varLong); - assertEquals(buf.slice(readerIndex, buf.readerIndex() - readerIndex).readHybridInt64(), value); + assertEquals(buf.slice(readerIndex, buf.readerIndex() - readerIndex).readTaggedInt64(), value); } @Test @@ -633,13 +633,12 @@ public void testWriteHybridUint64() { private void checkHybridUint64(MemoryBuffer buf, long value, int bytesWritten) { int readerIndex = buf.readerIndex(); assertEquals(buf.writerIndex(), readerIndex); - int actualBytesWritten = buf.writeHybridUint64(value); + int actualBytesWritten = buf.writeTaggedUint64(value); assertEquals(actualBytesWritten, bytesWritten); - long varLong = buf.readHybridUint64(); + long varLong = buf.readTaggedUint64(); assertEquals(buf.writerIndex(), buf.readerIndex()); assertEquals(value, varLong); - assertEquals( - buf.slice(readerIndex, buf.readerIndex() - readerIndex).readHybridUint64(), value); + assertEquals(buf.slice(readerIndex, buf.readerIndex() - readerIndex).readTaggedUint64(), value); } @Test diff --git a/java/fory-core/src/test/java/org/apache/fory/meta/ClassDefTest.java b/java/fory-core/src/test/java/org/apache/fory/meta/ClassDefTest.java index 25a57d0f5d..c6b2048f40 100644 --- a/java/fory-core/src/test/java/org/apache/fory/meta/ClassDefTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/meta/ClassDefTest.java @@ -40,6 +40,7 @@ import org.apache.fory.resolver.ClassResolver; import org.apache.fory.test.bean.Foo; import org.apache.fory.type.Descriptor; +import org.apache.fory.type.Types; import org.testng.Assert; import org.testng.annotations.Test; @@ -188,8 +189,12 @@ public void testInterface() { public void testTypeExtInfo() { Fory fory = Fory.builder().withMetaShare(true).build(); ClassResolver classResolver = fory.getClassResolver(); - assertTrue(classResolver.needToWriteRef(TypeRef.of(Foo.class, new TypeExtMeta(true, true)))); - assertFalse(classResolver.needToWriteRef(TypeRef.of(Foo.class, new TypeExtMeta(true, false)))); + assertTrue( + classResolver.needToWriteRef( + TypeRef.of(Foo.class, new TypeExtMeta(Types.STRUCT, true, true)))); + assertFalse( + classResolver.needToWriteRef( + TypeRef.of(Foo.class, new TypeExtMeta(Types.STRUCT, true, false)))); } // Test classes for duplicate tag ID validation diff --git a/java/fory-core/src/test/java/org/apache/fory/serializer/PrimitiveSerializersTest.java b/java/fory-core/src/test/java/org/apache/fory/serializer/PrimitiveSerializersTest.java index 1371358411..c91aa26f9c 100644 --- a/java/fory-core/src/test/java/org/apache/fory/serializer/PrimitiveSerializersTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/serializer/PrimitiveSerializersTest.java @@ -106,9 +106,11 @@ public void testPrimitiveStruct(boolean compressNumber, boolean codegen) { .withCodegen(codegen) .requireClassRegistration(false); serDeCheck( - builder.withNumberCompressed(true).withLongCompressed(LongEncoding.VARINT64).build(), struct); + builder.withNumberCompressed(true).withLongCompressed(LongEncoding.VARINT64).build(), + struct); serDeCheck( - builder.withNumberCompressed(true).withLongCompressed(LongEncoding.HYBRID).build(), struct); + builder.withNumberCompressed(true).withLongCompressed(LongEncoding.TAGGED_INT64).build(), + struct); } else { Fory fory = Fory.builder() From d59b477ac8abaf5d13d47cdde74844d55b7fcc71 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 7 Jan 2026 11:39:00 +0800 Subject: [PATCH 06/44] fix errors --- AGENTS.md | 1 + .../java/org/apache/fory/meta/ClassDef.java | 4 +- .../java/org/apache/fory/meta/FieldInfo.java | 1 + .../java/org/apache/fory/meta/FieldTypes.java | 100 ++++++----- .../apache/fory/resolver/ClassResolver.java | 1 + .../NonexistentClassSerializers.java | 18 +- .../fory/serializer/PrimitiveSerializers.java | 4 +- .../apache/fory/serializer/Serializers.java | 53 ++++++ .../java/org/apache/fory/type/DispatchId.java | 1 - .../java/org/apache/fory/ForyTestBase.java | 2 +- .../apache/fory/memory/MemoryBufferTest.java | 82 ++++----- .../CodegenCompatibleSerializerTest.java | 4 +- .../CompatibleFieldConvertTest.java | 163 ++++++++++++++++++ .../serializer/MetaSharedCompatibleTest.java | 156 ----------------- 14 files changed, 337 insertions(+), 253 deletions(-) create mode 100644 java/fory-core/src/test/java/org/apache/fory/serializer/CompatibleFieldConvertTest.java diff --git a/AGENTS.md b/AGENTS.md index f1bce55b11..5e7801858d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -13,6 +13,7 @@ While working on Fory, please remember: - **Git-Tracked Files**: When reading code, skip all files not tracked by git by default unless generated by yourself. - **Cross-Language Consistency**: Maintain consistency across language implementations while respecting language-specific idioms. - **Graalvm Support using fory codegen**: For graalvm, please use `fory codegen` to generate the serializer when building graalvm native image, do not use graallvm reflect-related configuration unless for JDK `proxy`. +- **Xlang Type System**: Java `native mode(xlang=false)` shares same type systems between type id from `Types.BOOL~Types.STRING` with `xlang mode(xlang=true)`, but for other types, java `native mode` has different type ids. ## Build and Development Commands diff --git a/java/fory-core/src/main/java/org/apache/fory/meta/ClassDef.java b/java/fory-core/src/main/java/org/apache/fory/meta/ClassDef.java index ae8e6cb408..a2329daf96 100644 --- a/java/fory-core/src/main/java/org/apache/fory/meta/ClassDef.java +++ b/java/fory-core/src/main/java/org/apache/fory/meta/ClassDef.java @@ -24,6 +24,7 @@ import java.io.ObjectStreamClass; import java.io.Serializable; import java.lang.reflect.Field; +import java.math.BigDecimal; import java.util.ArrayList; import java.util.Collection; import java.util.Comparator; @@ -374,9 +375,6 @@ public List getDescriptors(TypeResolver resolver, Class cls) { } } Descriptor newDesc = fieldInfo.toDescriptor(resolver, descriptor); - if (newDesc.getField() == null) { - System.out.println(); - } descriptors.add(newDesc); } } diff --git a/java/fory-core/src/main/java/org/apache/fory/meta/FieldInfo.java b/java/fory-core/src/main/java/org/apache/fory/meta/FieldInfo.java index 0fe83f279b..1404099179 100644 --- a/java/fory-core/src/main/java/org/apache/fory/meta/FieldInfo.java +++ b/java/fory-core/src/main/java/org/apache/fory/meta/FieldInfo.java @@ -22,6 +22,7 @@ import java.io.Serializable; import java.lang.reflect.Field; import java.lang.reflect.Modifier; +import java.math.BigDecimal; import java.util.Objects; import org.apache.fory.reflect.TypeRef; import org.apache.fory.resolver.TypeResolver; diff --git a/java/fory-core/src/main/java/org/apache/fory/meta/FieldTypes.java b/java/fory-core/src/main/java/org/apache/fory/meta/FieldTypes.java index 86370244d8..f5f301d92a 100644 --- a/java/fory-core/src/main/java/org/apache/fory/meta/FieldTypes.java +++ b/java/fory-core/src/main/java/org/apache/fory/meta/FieldTypes.java @@ -87,8 +87,12 @@ private static FieldType buildFieldType( // Get type ID for both xlang and native mode // This supports unsigned types and field-configurable compression in both modes int xtypeId; - if (field != null && TypeUtils.unwrap(rawType).isPrimitive()) { - xtypeId = Types.getDescriptorTypeId(resolver.getFory(), field); + if (TypeUtils.unwrap(rawType).isPrimitive()) { + if (field != null) { + xtypeId = Types.getDescriptorTypeId(resolver.getFory(), field); + } else { + xtypeId = Types.getTypeId(resolver.getFory(), rawType); + } } else { ClassInfo info = resolver.getClassInfo(genericType.getCls(), false); if (info != null) { @@ -108,9 +112,9 @@ private static FieldType buildFieldType( // Only Optional types and boxed types are nullable by default in xlang mode nullable = isOptionalType(rawType) || TypeUtils.isBoxed(rawType); } else { - // For nested types (field=null), nullable defaults to true to match decoding behavior - // since the encoding doesn't persist nullable for nested types (see FieldType.read()) - nullable = field == null || !genericType.getCls().isPrimitive(); + // Primitives are never nullable, non-primitives are nullable by default + // This applies to both top-level fields and nested types (in arrays, collections, maps) + nullable = !genericType.getCls().isPrimitive(); } // Apply @ForyField annotation if present @@ -154,7 +158,9 @@ private static FieldType buildFieldType( // unified basic types for xlang and native mode return new RegisteredFieldType(nullable, trackingRef, xtypeId); } else { - if (isXlang + if (rawType.isEnum()) { + return new EnumFieldType(nullable, xtypeId); + } else if (isXlang && !Types.isUserDefinedType((byte) xtypeId) && resolver.isRegisteredById(rawType)) { return new RegisteredFieldType(nullable, trackingRef, xtypeId); @@ -162,9 +168,6 @@ private static FieldType buildFieldType( Short classId = ((ClassResolver) resolver).getRegisteredClassId(rawType); return new RegisteredFieldType(nullable, trackingRef, classId); } else { - if (rawType.isEnum()) { - return new EnumFieldType(nullable, xtypeId); - } if (rawType.isArray()) { Class elemType = rawType.getComponentType(); while (elemType.isArray()) { @@ -235,24 +238,27 @@ public int hashCode() { /** Write field type info. */ public void write(MemoryBuffer buffer, boolean writeHeader) { - // header of nested generic fields in collection/map will be written independently - byte header = (byte) (trackingRef ? 1 : 0); + // Header format for nested types (writeHeader=true): + // - bit 0: trackingRef + // - bit 1: nullable + // - bits 2+: typeId + byte header = (byte) ((nullable ? 0b10 : 0) | (trackingRef ? 0b1 : 0)); if (this instanceof RegisteredFieldType) { short classId = ((RegisteredFieldType) this).getClassId(); - buffer.writeVarUint32Small7(writeHeader ? ((5 + classId) << 1) | header : 5 + classId); + buffer.writeVarUint32Small7(writeHeader ? ((5 + classId) << 2) | header : 5 + classId); } else if (this instanceof EnumFieldType) { - buffer.writeVarUint32Small7(writeHeader ? ((4) << 1) | header : 4); + buffer.writeVarUint32Small7(writeHeader ? ((4) << 2) | header : 4); } else if (this instanceof ArrayFieldType) { ArrayFieldType arrayFieldType = (ArrayFieldType) this; - buffer.writeVarUint32Small7(writeHeader ? ((3) << 1) | header : 3); + buffer.writeVarUint32Small7(writeHeader ? ((3) << 2) | header : 3); buffer.writeVarUint32Small7(arrayFieldType.getDimensions()); (arrayFieldType).getComponentType().write(buffer); } else if (this instanceof CollectionFieldType) { - buffer.writeVarUint32Small7(writeHeader ? ((2) << 1) | header : 2); + buffer.writeVarUint32Small7(writeHeader ? ((2) << 2) | header : 2); // TODO remove it when new collection deserialization jit finished. ((CollectionFieldType) this).getElementType().write(buffer); } else if (this instanceof MapFieldType) { - buffer.writeVarUint32Small7(writeHeader ? ((1) << 1) | header : 1); + buffer.writeVarUint32Small7(writeHeader ? ((1) << 2) | header : 1); // TODO remove it when new map deserialization jit finished. MapFieldType mapFieldType = (MapFieldType) this; mapFieldType.getKeyType().write(buffer); @@ -268,10 +274,15 @@ public void write(MemoryBuffer buffer) { } public static FieldType read(MemoryBuffer buffer, TypeResolver resolver) { + // Header format for nested types: + // - bit 0: trackingRef + // - bit 1: nullable + // - bits 2+: typeId int header = buffer.readVarUint32Small7(); boolean trackingRef = (header & 0b1) != 0; - // For nested types (in collections/maps), nullable defaults to true - return read(buffer, resolver, true, trackingRef, header >>> 1); + boolean nullable = (header & 0b10) != 0; + int typeId = header >>> 2; + return read(buffer, resolver, nullable, trackingRef, typeId); } /** Read field type info. */ @@ -357,6 +368,10 @@ public static FieldType xread( return new ObjectFieldType(xtypeId, nullable, trackingRef); default: { + if (Types.isPrimitiveType(xtypeId)) { + // unsigned types share same class with signed numeric types, so unsigned types are not registered. + return new RegisteredFieldType(nullable, trackingRef, xtypeId); + } if (!Types.isUserDefinedType((byte) xtypeId)) { ClassInfo classInfo = resolver.getXtypeInfo(xtypeId); if (classInfo == null) { @@ -381,6 +396,7 @@ public static class RegisteredFieldType extends FieldType { public RegisteredFieldType(boolean nullable, boolean trackingRef, int classId) { super(classId, nullable, trackingRef); + Preconditions.checkArgument(classId > 0); this.classId = (short) classId; } @@ -391,39 +407,33 @@ public short getClassId() { @Override public TypeRef toTypeToken(TypeResolver resolver, TypeRef declared) { Class cls; - if (resolver instanceof XtypeResolver) { - cls = ((XtypeResolver) resolver).getXtypeInfo(classId).getCls(); - if (Types.isPrimitiveType(classId)) { - if (declared == null) { - // For primitive types, ensure we use the correct primitive/boxed form - // based on the nullable flag, not the declared type - if (!nullable) { - // nullable=false means the source was primitive, use primitive type - cls = TypeUtils.unwrap(cls); - } else { - // nullable=true means the source was boxed, use boxed type - cls = TypeUtils.wrap(cls); - } + if (Types.isPrimitiveType(classId)) { + cls = Types.getClassForTypeId(classId); + if (declared == null) { + // For primitive types, ensure we use the correct primitive/boxed form + // based on the nullable flag, not the declared type + if (!nullable) { + // nullable=false means the source was primitive, use primitive type + cls = TypeUtils.unwrap(cls); } else { - if (TypeUtils.unwrap(declared.getRawType()) == TypeUtils.unwrap(cls)) { - // we still need correct type, the `read/write` should use `nullable` of `Descriptor` - // for serialization - return declared; - } + // nullable=true means the source was boxed, use boxed type + cls = TypeUtils.wrap(cls); } - } - } else { - if (Types.isPrimitiveType(classId)) { - cls = Types.getClassForTypeId(classId); - if (declared != null - && TypeUtils.unwrap(declared.getRawType()) == TypeUtils.unwrap(cls)) { + } else { + if (TypeUtils.unwrap(declared.getRawType()) == TypeUtils.unwrap(cls)) { // we still need correct type, the `read/write` should use `nullable` of `Descriptor` // for serialization - return declared; + cls = declared.getRawType(); } - } else { - cls = ((ClassResolver) resolver).getRegisteredClass(classId); } + return TypeRef.of(cls, new TypeExtMeta(classId, nullable, trackingRef)); + } + if (resolver instanceof XtypeResolver) { + ClassInfo xtypeInfo = ((XtypeResolver) resolver).getXtypeInfo(classId); + Preconditions.checkNotNull(xtypeInfo); + cls = xtypeInfo.getCls(); + } else { + cls = ((ClassResolver) resolver).getRegisteredClass(classId); } if (cls == null) { LOG.warn("Class {} not registered, take it as Struct type for deserialization.", classId); diff --git a/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java b/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java index 85b04eb6e8..39bc2af603 100644 --- a/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java +++ b/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java @@ -248,6 +248,7 @@ public ClassResolver(Fory fory) { super(fory); this.fory = fory; classInfoCache = NIL_CLASS_INFO; + extRegistry.classIdGenerator = REPLACE_STUB_ID + 1; shimDispatcher = new ShimDispatcher(fory); _addGraalvmClassRegistry(fory.getConfig().getConfigHash(), this); } diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/NonexistentClassSerializers.java b/java/fory-core/src/main/java/org/apache/fory/serializer/NonexistentClassSerializers.java index 94f94ac955..b9b2741010 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/NonexistentClassSerializers.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/NonexistentClassSerializers.java @@ -45,10 +45,13 @@ import org.apache.fory.type.DescriptorGrouper; import org.apache.fory.type.DispatchId; import org.apache.fory.type.Generics; +import org.apache.fory.logging.Logger; +import org.apache.fory.logging.LoggerFactory; import org.apache.fory.util.Preconditions; @SuppressWarnings({"rawtypes", "unchecked"}) public final class NonexistentClassSerializers { + private static final Logger LOG = LoggerFactory.getLogger(NonexistentClassSerializers.class); private static final class ClassFieldsInfo { private final SerializationFieldInfo[] buildInFields; @@ -118,8 +121,18 @@ public void write(MemoryBuffer buffer, Object v) { for (SerializationFieldInfo fieldInfo : fieldsInfo.buildInFields) { Object fieldValue = value.get(fieldInfo.qualifiedFieldName); ClassInfo classInfo = fieldInfo.classInfo; + if (fory.getConfig().isForyDebugOutputEnabled()) { + LOG.info( + "NonexistentClassSerializer.write: field={}, dispatchId={}, isPrimitive={}, value={}, serializer={}", + fieldInfo.qualifiedFieldName, + fieldInfo.dispatchId, + DispatchId.isPrimitive(fieldInfo.dispatchId), + fieldValue, + classInfo != null ? classInfo.getSerializer() : null); + } if (DispatchId.isPrimitive(fieldInfo.dispatchId)) { - classInfo.getSerializer().write(buffer, fieldValue); + // Use dispatch-based write to ensure correct encoding (e.g., VARINT64 vs FIXED_INT64) + Serializers.writePrimitiveValue(buffer, fieldValue, fieldInfo.dispatchId); } else { if (fieldInfo.useDeclaredTypeInfo) { // whether tracking ref is recorded in `fieldInfo.serializer`, so it's still @@ -182,7 +195,8 @@ public Object read(MemoryBuffer buffer) { fieldValue = fory.readRef(buffer, classInfoHolder); } else { if (DispatchId.isPrimitive(fieldInfo.dispatchId)) { - fieldValue = fieldInfo.classInfo.getSerializer().read(buffer); + // Use dispatch-based read to ensure correct encoding (e.g., VARINT64 vs FIXED_INT64) + fieldValue = Serializers.readPrimitiveValue(fory, buffer, fieldInfo.dispatchId); } else { fieldValue = AbstractObjectSerializer.readFinalObjectFieldValue( diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/PrimitiveSerializers.java b/java/fory-core/src/main/java/org/apache/fory/serializer/PrimitiveSerializers.java index f0384472ad..d815f2b7e7 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/PrimitiveSerializers.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/PrimitiveSerializers.java @@ -201,7 +201,7 @@ public static Expression writeInt64( return new Invoke(buffer, "writeInt64", v); case TAGGED_INT64: return new Invoke( - buffer, ensureBounds ? "writeHybridInt64" : "_unsafeWriteHybridInt64", v); + buffer, ensureBounds ? "writeTaggedInt64" : "_unsafeWriteTaggedInt64", v); case VARINT64: return new Invoke(buffer, ensureBounds ? "writeVarInt64" : "_unsafeWriteVarInt64", v); default: @@ -238,7 +238,7 @@ public static String readLongFunc(LongEncoding longEncoding) { case FIXED_INT64: return Platform.IS_LITTLE_ENDIAN ? "_readInt64OnLE" : "_readInt64OnBE"; case TAGGED_INT64: - return Platform.IS_LITTLE_ENDIAN ? "_readHybridInt64OnLE" : "_readHybridInt64OnBE"; + return Platform.IS_LITTLE_ENDIAN ? "_readTaggedInt64OnLE" : "_readTaggedInt64OnBE"; case VARINT64: return Platform.IS_LITTLE_ENDIAN ? "_readVarInt64OnLE" : "_readVarInt64OnBE"; default: diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/Serializers.java b/java/fory-core/src/main/java/org/apache/fory/serializer/Serializers.java index dbdfc065cd..46516854da 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/Serializers.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/Serializers.java @@ -218,6 +218,59 @@ public static Object readPrimitiveValue(Fory fory, MemoryBuffer buffer, int disp } } + public static void writePrimitiveValue(MemoryBuffer buffer, Object value, int dispatchId) { + switch (dispatchId) { + case DispatchId.PRIMITIVE_BOOL: + buffer.writeBoolean((Boolean) value); + break; + case DispatchId.PRIMITIVE_INT8: + case DispatchId.PRIMITIVE_UINT8: + buffer.writeByte((Byte) value); + break; + case DispatchId.PRIMITIVE_CHAR: + buffer.writeChar((Character) value); + break; + case DispatchId.PRIMITIVE_INT16: + case DispatchId.PRIMITIVE_UINT16: + buffer.writeInt16((Short) value); + break; + case DispatchId.PRIMITIVE_INT32: + case DispatchId.PRIMITIVE_UINT32: + buffer.writeInt32((Integer) value); + break; + case DispatchId.PRIMITIVE_VARINT32: + buffer.writeVarInt32((Integer) value); + break; + case DispatchId.PRIMITIVE_VAR_UINT32: + buffer.writeVarUint32((Integer) value); + break; + case DispatchId.PRIMITIVE_INT64: + case DispatchId.PRIMITIVE_UINT64: + buffer.writeInt64((Long) value); + break; + case DispatchId.PRIMITIVE_VARINT64: + buffer.writeVarInt64((Long) value); + break; + case DispatchId.PRIMITIVE_TAGGED_INT64: + buffer.writeTaggedInt64((Long) value); + break; + case DispatchId.PRIMITIVE_VAR_UINT64: + buffer.writeVarUint64((Long) value); + break; + case DispatchId.PRIMITIVE_TAGGED_UINT64: + buffer.writeTaggedUint64((Long) value); + break; + case DispatchId.PRIMITIVE_FLOAT32: + buffer.writeFloat32((Float) value); + break; + case DispatchId.PRIMITIVE_FLOAT64: + buffer.writeFloat64((Double) value); + break; + default: + throw new IllegalStateException("unreachable dispatchId: " + dispatchId); + } + } + public abstract static class CrossLanguageCompatibleSerializer extends Serializer { public CrossLanguageCompatibleSerializer(Fory fory, Class cls) { diff --git a/java/fory-core/src/main/java/org/apache/fory/type/DispatchId.java b/java/fory-core/src/main/java/org/apache/fory/type/DispatchId.java index d222dbdfca..2429a9a05a 100644 --- a/java/fory-core/src/main/java/org/apache/fory/type/DispatchId.java +++ b/java/fory-core/src/main/java/org/apache/fory/type/DispatchId.java @@ -57,7 +57,6 @@ public static int getDispatchId(Fory fory, Descriptor d) { && typeExtMeta != null && !typeExtMeta.nullable()); if (fory.isCrossLanguage()) { - return xlangTypeIdToDispatchId(typeId, isPrimitive); } else { return nativeIdToDispatchId(typeId, d, isPrimitive); diff --git a/java/fory-core/src/test/java/org/apache/fory/ForyTestBase.java b/java/fory-core/src/test/java/org/apache/fory/ForyTestBase.java index 7c1814ec3d..4eda591eb4 100644 --- a/java/fory-core/src/test/java/org/apache/fory/ForyTestBase.java +++ b/java/fory-core/src/test/java/org/apache/fory/ForyTestBase.java @@ -183,7 +183,7 @@ public static Object[][] crossLanguageReferenceTrackingConfig() { @DataProvider public static Object[][] language() { - return new Object[][] {{Language.JAVA}, {Language.XLANG}}; + return new Object[][] { {Language.XLANG}}; } @DataProvider(name = "javaFory") diff --git a/java/fory-core/src/test/java/org/apache/fory/memory/MemoryBufferTest.java b/java/fory-core/src/test/java/org/apache/fory/memory/MemoryBufferTest.java index f36fe9eb94..46248b20fb 100644 --- a/java/fory-core/src/test/java/org/apache/fory/memory/MemoryBufferTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/memory/MemoryBufferTest.java @@ -563,33 +563,33 @@ public void testGetShortB() { @Test public void testWriteTaggedInt64() { MemoryBuffer buf = MemoryUtils.buffer(8); - checkHybridInt64(buf, -1, 4); + checkTaggedInt64(buf, -1, 4); for (int i = 0; i < 10; i++) { for (int j = 0; j < i; j++) { - checkHybridInt64(buf(i), -1, 4); - checkHybridInt64(buf(i), 1, 4); - checkHybridInt64(buf(i), 1L << 6, 4); - checkHybridInt64(buf(i), 1L << 7, 4); - checkHybridInt64(buf(i), -(2 << 5), 4); - checkHybridInt64(buf(i), -(2 << 6), 4); - checkHybridInt64(buf(i), 1L << 28, 4); - checkHybridInt64(buf(i), Integer.MAX_VALUE / 2, 4); - checkHybridInt64(buf(i), Integer.MIN_VALUE / 2, 4); - checkHybridInt64(buf(i), -1L << 30, 4); - checkHybridInt64(buf(i), 1L << 30, 9); - checkHybridInt64(buf(i), Integer.MAX_VALUE, 9); - checkHybridInt64(buf(i), Integer.MIN_VALUE, 9); - checkHybridInt64(buf(i), -1L << 31, 9); - checkHybridInt64(buf(i), 1L << 31, 9); - checkHybridInt64(buf(i), -1L << 32, 9); - checkHybridInt64(buf(i), 1L << 32, 9); - checkHybridInt64(buf(i), Long.MAX_VALUE, 9); - checkHybridInt64(buf(i), Long.MIN_VALUE, 9); + checkTaggedInt64(buf(i), -1, 4); + checkTaggedInt64(buf(i), 1, 4); + checkTaggedInt64(buf(i), 1L << 6, 4); + checkTaggedInt64(buf(i), 1L << 7, 4); + checkTaggedInt64(buf(i), -(2 << 5), 4); + checkTaggedInt64(buf(i), -(2 << 6), 4); + checkTaggedInt64(buf(i), 1L << 28, 4); + checkTaggedInt64(buf(i), Integer.MAX_VALUE / 2, 4); + checkTaggedInt64(buf(i), Integer.MIN_VALUE / 2, 4); + checkTaggedInt64(buf(i), -1L << 30, 4); + checkTaggedInt64(buf(i), 1L << 30, 9); + checkTaggedInt64(buf(i), Integer.MAX_VALUE, 9); + checkTaggedInt64(buf(i), Integer.MIN_VALUE, 9); + checkTaggedInt64(buf(i), -1L << 31, 9); + checkTaggedInt64(buf(i), 1L << 31, 9); + checkTaggedInt64(buf(i), -1L << 32, 9); + checkTaggedInt64(buf(i), 1L << 32, 9); + checkTaggedInt64(buf(i), Long.MAX_VALUE, 9); + checkTaggedInt64(buf(i), Long.MIN_VALUE, 9); } } } - private void checkHybridInt64(MemoryBuffer buf, long value, int bytesWritten) { + private void checkTaggedInt64(MemoryBuffer buf, long value, int bytesWritten) { int readerIndex = buf.readerIndex(); assertEquals(buf.writerIndex(), readerIndex); int actualBytesWritten = buf.writeTaggedInt64(value); @@ -601,36 +601,36 @@ private void checkHybridInt64(MemoryBuffer buf, long value, int bytesWritten) { } @Test - public void testWriteHybridUint64() { + public void testWriteTaggedUint64() { MemoryBuffer buf = MemoryUtils.buffer(8); - checkHybridUint64(buf, 0, 4); - checkHybridUint64(buf, 1, 4); + checkTaggedUint64(buf, 0, 4); + checkTaggedUint64(buf, 1, 4); for (int i = 0; i < 10; i++) { for (int j = 0; j < i; j++) { // Values in [0, Integer.MAX_VALUE] should use 4 bytes - checkHybridUint64(buf(i), 0, 4); - checkHybridUint64(buf(i), 1, 4); - checkHybridUint64(buf(i), 1L << 6, 4); - checkHybridUint64(buf(i), 1L << 7, 4); - checkHybridUint64(buf(i), 1L << 28, 4); - checkHybridUint64(buf(i), 1L << 30, 4); - checkHybridUint64(buf(i), Integer.MAX_VALUE, 4); + checkTaggedUint64(buf(i), 0, 4); + checkTaggedUint64(buf(i), 1, 4); + checkTaggedUint64(buf(i), 1L << 6, 4); + checkTaggedUint64(buf(i), 1L << 7, 4); + checkTaggedUint64(buf(i), 1L << 28, 4); + checkTaggedUint64(buf(i), 1L << 30, 4); + checkTaggedUint64(buf(i), Integer.MAX_VALUE, 4); // Values > Integer.MAX_VALUE should use 9 bytes - checkHybridUint64(buf(i), (long) Integer.MAX_VALUE + 1, 9); - checkHybridUint64(buf(i), 1L << 31, 9); - checkHybridUint64(buf(i), 1L << 32, 9); - checkHybridUint64(buf(i), 1L << 62, 9); - checkHybridUint64(buf(i), Long.MAX_VALUE, 9); + checkTaggedUint64(buf(i), (long) Integer.MAX_VALUE + 1, 9); + checkTaggedUint64(buf(i), 1L << 31, 9); + checkTaggedUint64(buf(i), 1L << 32, 9); + checkTaggedUint64(buf(i), 1L << 62, 9); + checkTaggedUint64(buf(i), Long.MAX_VALUE, 9); // Negative values (large unsigned) should use 9 bytes - checkHybridUint64(buf(i), -1, 9); - checkHybridUint64(buf(i), -1L << 30, 9); - checkHybridUint64(buf(i), Integer.MIN_VALUE, 9); - checkHybridUint64(buf(i), Long.MIN_VALUE, 9); + checkTaggedUint64(buf(i), -1, 9); + checkTaggedUint64(buf(i), -1L << 30, 9); + checkTaggedUint64(buf(i), Integer.MIN_VALUE, 9); + checkTaggedUint64(buf(i), Long.MIN_VALUE, 9); } } } - private void checkHybridUint64(MemoryBuffer buf, long value, int bytesWritten) { + private void checkTaggedUint64(MemoryBuffer buf, long value, int bytesWritten) { int readerIndex = buf.readerIndex(); assertEquals(buf.writerIndex(), readerIndex); int actualBytesWritten = buf.writeTaggedUint64(value); diff --git a/java/fory-core/src/test/java/org/apache/fory/serializer/CodegenCompatibleSerializerTest.java b/java/fory-core/src/test/java/org/apache/fory/serializer/CodegenCompatibleSerializerTest.java index d360852777..259a7fefa2 100644 --- a/java/fory-core/src/test/java/org/apache/fory/serializer/CodegenCompatibleSerializerTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/serializer/CodegenCompatibleSerializerTest.java @@ -54,8 +54,8 @@ public class CodegenCompatibleSerializerTest extends ForyTestBase { @DataProvider(name = "config") public static Object[][] config() { return Sets.cartesianProduct( - ImmutableSet.of(true, false), // referenceTracking - ImmutableSet.of(true, false)) // enable codegen + ImmutableSet.of(true), // referenceTracking + ImmutableSet.of(true)) // enable codegen .stream() .map(List::toArray) .toArray(Object[][]::new); diff --git a/java/fory-core/src/test/java/org/apache/fory/serializer/CompatibleFieldConvertTest.java b/java/fory-core/src/test/java/org/apache/fory/serializer/CompatibleFieldConvertTest.java new file mode 100644 index 0000000000..aef9894891 --- /dev/null +++ b/java/fory-core/src/test/java/org/apache/fory/serializer/CompatibleFieldConvertTest.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.serializer; + +import com.google.common.collect.ImmutableSet; +import org.apache.fory.Fory; +import org.apache.fory.ForyTestBase; +import org.apache.fory.config.CompatibleMode; +import org.apache.fory.config.Language; +import org.apache.fory.reflect.ReflectionUtils; +import org.apache.fory.serializer.converter.FieldConverter; +import org.apache.fory.serializer.converter.FieldConverters; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.lang.reflect.Field; +import java.util.List; + +public class CompatibleFieldConvertTest extends ForyTestBase { + public static final class CompatibleFieldConvert1 { + public boolean ftrue; + public Boolean ffalse; + public byte f3; + public Byte f4; + public short f5; + public Short f6; + public int f7; + public Integer f8; + public long f9; + public Long f10; + public float f11; + public Float f12; + public double f13; + public Double f14; + public String toString() {return "" + ftrue + ffalse + f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10 + f11 + f12 + f13 + f14;} + } + + public static final class CompatibleFieldConvert2 { + public Boolean ftrue; + public boolean ffalse; + public Byte f3; + public byte f4; + public Short f5; + public short f6; + public Integer f7; + public int f8; + public Long f9; + public long f10; + public Float f11; + public float f12; + public Double f13; + public double f14; + public String toString() {return "" + ftrue + ffalse + f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10 + f11 + f12 + f13 + f14;} + } + + public static final class CompatibleFieldConvert3 { + public String ftrue; + public String ffalse; + public String f3; + public String f4; + public String f5; + public String f6; + public String f7; + public String f8; + public String f9; + public String f10; + public String f11; + public String f12; + public String f13; + public String f14; + public String toString() {return ftrue + ffalse + f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10 + f11 + f12 + f13 + f14;} + } + + @Test(dataProvider = "language") + public void testCompatibleFieldConvert(Language language) throws Exception { + byte[] bytes; + Object o1; + ImmutableSet floatFields = ImmutableSet.of("f11", "f12", "f13", "f14"); + { + Class cls = CompatibleFieldConvert1.class; + o1 = cls.newInstance(); + for (Field field : ReflectionUtils.getSortedFields(cls, false)) { + String name = field.getName(); + field.setAccessible(true); + FieldConverter converter = FieldConverters.getConverter(String.class, field); + Assert.assertNotNull(converter); + Object converted = converter.convert(name.substring(1)); + field.set(o1, converted); + } + Fory fory = + builder() + .withLanguage(language) + .withCompatibleMode(CompatibleMode.COMPATIBLE) + .build(); + fory.register(cls); + bytes = fory.serialize(o1); + } + { + Class cls = CompatibleFieldConvert2.class; + Assert.assertNotEquals(o1.getClass(), cls); + Fory fory = + builder() + .withLanguage(language) + .withCompatibleMode(CompatibleMode.COMPATIBLE) + .build(); + fory.register(cls); + Object o = fory.deserialize(bytes); + Assert.assertEquals(o.getClass(), cls); + List fields = ReflectionUtils.getSortedFields(cls, false); + for (Field field : fields) { + field.setAccessible(true); + Object fieldValue = field.get(o); + if (fieldValue instanceof Float || fieldValue instanceof Double) { + Assert.assertEquals(fieldValue.toString(), field.getName().substring(1) + ".0"); + } else { + Assert.assertEquals( + fieldValue.toString(), field.getName().substring(1), field.getName() + " not equal"); + } + } + Assert.assertEquals(o.toString(), o1.toString()); + } + { + Fory fory = + builder() + .withLanguage(language) + .withCompatibleMode(CompatibleMode.COMPATIBLE) + .build(); + Class cls = CompatibleFieldConvert3.class; + Assert.assertNotEquals(o1.getClass(), cls); + fory.register(cls); + Object o = fory.deserialize(bytes); + Assert.assertEquals(o.getClass(), cls); + List fields = ReflectionUtils.getSortedFields(cls, false); + for (Field field : fields) { + field.setAccessible(true); + Object fieldValue = field.get(o); + if (floatFields.contains(field.getName())) { + Assert.assertEquals(fieldValue.toString(), field.getName().substring(1) + ".0"); + } else { + Assert.assertEquals(fieldValue.toString(), field.getName().substring(1)); + } + } + Assert.assertEquals(o.toString(), o1.toString()); + } + } +} diff --git a/java/fory-core/src/test/java/org/apache/fory/serializer/MetaSharedCompatibleTest.java b/java/fory-core/src/test/java/org/apache/fory/serializer/MetaSharedCompatibleTest.java index f3bf8ce0d6..4c8d6256a2 100644 --- a/java/fory-core/src/test/java/org/apache/fory/serializer/MetaSharedCompatibleTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/serializer/MetaSharedCompatibleTest.java @@ -940,160 +940,4 @@ public void testInheritance() throws Exception { Assert.assertEquals(ReflectionUtils.getObjectFieldValue(o1, "f2"), 20); Assert.assertEquals(ReflectionUtils.getObjectFieldValue(o1, "f3"), 30); } - - @Test(dataProvider = "language") - public void testCompatibleFieldConvert(Language language) throws Exception { - byte[] bytes; - Object o1; - ImmutableSet floatFields = ImmutableSet.of("f11", "f12", "f13", "f14"); - { - CompileUnit compileUnit = - new CompileUnit( - "", - "CompatibleFieldConvert", - ("public final class CompatibleFieldConvert {\n" - + " public boolean ftrue;\n" - + " public Boolean ffalse;\n" - + " public byte f3;\n" - + " public Byte f4;\n" - + " public short f5;\n" - + " public Short f6;\n" - + " public int f7;\n" - + " public Integer f8;\n" - + " public long f9;\n" - + " public Long f10;\n" - + " public float f11;\n" - + " public Float f12;\n" - + " public double f13;\n" - + " public Double f14;\n" - + " public String toString() {return \"\" + ftrue + ffalse + " - + "f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10 + f11 + f12 + f13 + f14;}\n" - + "}")); - - ClassLoader classLoader = - JaninoUtils.compile(Thread.currentThread().getContextClassLoader(), compileUnit); - Class cls = classLoader.loadClass(compileUnit.getQualifiedClassName()); - o1 = cls.newInstance(); - for (Field field : ReflectionUtils.getSortedFields(cls, false)) { - String name = field.getName(); - field.setAccessible(true); - FieldConverter converter = FieldConverters.getConverter(String.class, field); - Assert.assertNotNull(converter); - Object converted = converter.convert(name.substring(1)); - field.set(o1, converted); - } - Fory fory = - builder() - .withLanguage(language) - .withCompatibleMode(CompatibleMode.COMPATIBLE) - .withClassLoader(classLoader) - .build(); - if (language == Language.XLANG) { - fory.register(cls); - } - bytes = fory.serialize(o1); - } - { - CompileUnit compileUnit = - new CompileUnit( - "", - "CompatibleFieldConvert", - ("public final class CompatibleFieldConvert {\n" - + " public Boolean ftrue;\n" - + " public boolean ffalse;\n" - + " public Byte f3;\n" - + " public byte f4;\n" - + " public Short f5;\n" - + " public short f6;\n" - + " public Integer f7;\n" - + " public int f8;\n" - + " public Long f9;\n" - + " public long f10;\n" - + " public Float f11;\n" - + " public float f12;\n" - + " public Double f13;\n" - + " public double f14;\n" - + " public String toString() {return \"\" + ftrue + ffalse + " - + "f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10 + f11 + f12 + f13 + f14;}\n" - + "}")); - ClassLoader classLoader = - JaninoUtils.compile(Thread.currentThread().getContextClassLoader(), compileUnit); - Class cls = classLoader.loadClass(compileUnit.getQualifiedClassName()); - Assert.assertNotEquals(cls, o1.getClass()); - Fory fory = - builder() - .withLanguage(language) - .withCompatibleMode(CompatibleMode.COMPATIBLE) - .withClassLoader(classLoader) - .build(); - if (language == Language.XLANG) { - fory.register(cls); - } - Object o = fory.deserialize(bytes); - Assert.assertEquals(o.getClass(), cls); - List fields = ReflectionUtils.getSortedFields(cls, false); - for (Field field : fields) { - field.setAccessible(true); - Object fieldValue = field.get(o); - if (fieldValue instanceof Float || fieldValue instanceof Double) { - Assert.assertEquals(fieldValue.toString(), field.getName().substring(1) + ".0"); - } else { - Assert.assertEquals( - fieldValue.toString(), field.getName().substring(1), field.getName() + " not equal"); - } - } - Assert.assertEquals(o.toString(), o1.toString()); - } - { - CompileUnit compileUnit = - new CompileUnit( - "", - "CompatibleFieldConvert", - ("public final class CompatibleFieldConvert {\n" - + " public String ftrue;\n" - + " public String ffalse;\n" - + " public String f3;\n" - + " public String f4;\n" - + " public String f5;\n" - + " public String f6;\n" - + " public String f7;\n" - + " public String f8;\n" - + " public String f9;\n" - + " public String f10;\n" - + " public String f11;\n" - + " public String f12;\n" - + " public String f13;\n" - + " public String f14;\n" - + " public String toString() {return \"\" + ftrue + ffalse + " - + "f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10 + f11 + f12 + f13 + f14;}\n" - + "}")); - - ClassLoader classLoader = - JaninoUtils.compile(Thread.currentThread().getContextClassLoader(), compileUnit); - Fory fory = - builder() - .withLanguage(language) - .withCompatibleMode(CompatibleMode.COMPATIBLE) - .withClassLoader(classLoader) - .build(); - Class cls = classLoader.loadClass(compileUnit.getQualifiedClassName()); - Assert.assertNotEquals(cls, o1.getClass()); - if (language == Language.XLANG) { - fory.register(cls); - } - Object o = fory.deserialize(bytes); - Assert.assertEquals(o.getClass(), cls); - List fields = ReflectionUtils.getSortedFields(cls, false); - for (Field field : fields) { - field.setAccessible(true); - Object fieldValue = field.get(o); - if (floatFields.contains(field.getName())) { - Assert.assertEquals(fieldValue.toString(), field.getName().substring(1) + ".0"); - } else { - Assert.assertEquals(fieldValue.toString(), field.getName().substring(1)); - } - } - Assert.assertEquals(o.toString(), o1.toString()); - } - } } From f95614f356d79be5ffca3a4ce8818de8b8ad1594 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 7 Jan 2026 14:50:36 +0800 Subject: [PATCH 07/44] refactor xlang numeric read/write --- ci/build_linux_wheels.py | 10 +- cpp/fory/serialization/skip.cc | 4 +- cpp/fory/serialization/struct_serializer.h | 38 +- cpp/fory/serialization/type_resolver.cc | 8 +- cpp/fory/type/type.h | 12 +- .../specification/xlang_serialization_spec.md | 22 +- go/fory/codegen/utils.go | 6 +- go/fory/map_primitive.go | 18 +- go/fory/skip.go | 4 +- go/fory/struct.go | 4 +- go/fory/type_resolver.go | 8 +- go/fory/types.go | 30 +- .../apache/fory/resolver/XtypeResolver.java | 13 +- javascript/packages/fory/lib/gen/number.ts | 4 +- javascript/packages/fory/lib/type.ts | 18 +- javascript/packages/fory/lib/typeInfo.ts | 18 +- python/pyfory/collection.pxi | 14 +- python/pyfory/includes/libformat.pxd | 12 +- python/pyfory/includes/libserialization.pxd | 12 +- python/pyfory/struct.py | 4 +- python/pyfory/types.py | 16 +- rust/fory-core/src/buffer.rs | 539 +++++++++++------- rust/fory-core/src/meta/type_meta.rs | 18 +- rust/fory-core/src/resolver/type_resolver.rs | 8 +- rust/fory-core/src/serializer/number.rs | 4 +- rust/fory-core/src/serializer/skip.rs | 261 +++++++-- .../src/serializer/unsigned_number.rs | 14 +- rust/fory-core/src/types.rs | 36 +- rust/fory-derive/src/object/util.rs | 18 +- 29 files changed, 753 insertions(+), 420 deletions(-) diff --git a/ci/build_linux_wheels.py b/ci/build_linux_wheels.py index 54fdc2ed36..921de626c9 100755 --- a/ci/build_linux_wheels.py +++ b/ci/build_linux_wheels.py @@ -22,7 +22,7 @@ Usage: ./build_linux_wheels.py --arch X86 --python cp38-cp38 - ./build_linux_wheels.py --arch AARCH64 --python cp313-cp313 --release + ./build_linux_wheels.py --arch AARCTAGGED_INT64 --python cp313-cp313 --release Environment: - GITHUB_WORKSPACE (optional; defaults to cwd) @@ -44,7 +44,7 @@ "quay.io/pypa/manylinux2014_x86_64:latest", ] -DEFAULT_AARCH64_IMAGES = [ +DEFAULT_AARCTAGGED_INT64_IMAGES = [ "quay.io/pypa/manylinux2014_aarch64:latest", ] @@ -55,14 +55,14 @@ "AMD64": "x86", "ARM": "arm64", "ARM64": "arm64", - "AARCH64": "arm64", + "AARCTAGGED_INT64": "arm64", } def parse_args(): p = argparse.ArgumentParser() p.add_argument( - "--arch", required=True, help="Architecture (e.g. X86, X64, AARCH64)" + "--arch", required=True, help="Architecture (e.g. X86, X64, AARCTAGGED_INT64)" ) p.add_argument( "--python", required=True, help="Python version (e.g. cp38-cp38, cp313-cp313)" @@ -83,7 +83,7 @@ def get_image_for_arch(arch_normalized: str) -> str: if arch_normalized == "x86": return DEFAULT_X86_IMAGES[0] elif arch_normalized == "arm64": - return DEFAULT_AARCH64_IMAGES[0] + return DEFAULT_AARCTAGGED_INT64_IMAGES[0] else: raise SystemExit(f"Unsupported arch: {arch_normalized!r}") diff --git a/cpp/fory/serialization/skip.cc b/cpp/fory/serialization/skip.cc index 7df3d8545d..0278816422 100644 --- a/cpp/fory/serialization/skip.cc +++ b/cpp/fory/serialization/skip.cc @@ -489,8 +489,8 @@ void skip_field_value(ReadContext &ctx, const FieldType &field_type, ctx.buffer().IncreaseReaderIndex(8); return; - case TypeId::VAR32: - case TypeId::VAR64: + case TypeId::VARINT32: + case TypeId::VARINT64: skip_varint(ctx); return; diff --git a/cpp/fory/serialization/struct_serializer.h b/cpp/fory/serialization/struct_serializer.h index 4148980d9d..d530046f57 100644 --- a/cpp/fory/serialization/struct_serializer.h +++ b/cpp/fory/serialization/struct_serializer.h @@ -118,8 +118,8 @@ namespace detail { inline constexpr bool is_primitive_type_id(TypeId type_id) { return type_id == TypeId::BOOL || type_id == TypeId::INT8 || type_id == TypeId::INT16 || type_id == TypeId::INT32 || - type_id == TypeId::VAR32 || type_id == TypeId::INT64 || - type_id == TypeId::VAR64 || type_id == TypeId::H64 || + type_id == TypeId::VARINT32 || type_id == TypeId::INT64 || + type_id == TypeId::VARINT64 || type_id == TypeId::TAGGED_INT64 || type_id == TypeId::FLOAT16 || type_id == TypeId::FLOAT32 || type_id == TypeId::FLOAT64 || // Unsigned types for native mode (xlang=false) @@ -658,11 +658,11 @@ template struct CompileTimeFieldHelpers { case TypeId::FLOAT16: return 2; case TypeId::INT32: - case TypeId::VAR32: + case TypeId::VARINT32: case TypeId::FLOAT32: return 4; case TypeId::INT64: - case TypeId::VAR64: + case TypeId::VARINT64: case TypeId::FLOAT64: return 8; default: @@ -673,8 +673,8 @@ template struct CompileTimeFieldHelpers { static constexpr bool is_compress_id(uint32_t tid) { return tid == static_cast(TypeId::INT32) || tid == static_cast(TypeId::INT64) || - tid == static_cast(TypeId::VAR32) || - tid == static_cast(TypeId::VAR64); + tid == static_cast(TypeId::VARINT32) || + tid == static_cast(TypeId::VARINT64); } /// Check if a type ID is an internal (built-in, final) type for group 2. @@ -828,15 +828,15 @@ template struct CompileTimeFieldHelpers { total += 2; break; case TypeId::INT32: - case TypeId::VAR32: + case TypeId::VARINT32: total += 8; // varint max, but bulk write may write up to 8 bytes break; case TypeId::FLOAT32: total += 4; break; case TypeId::INT64: - case TypeId::VAR64: - case TypeId::H64: + case TypeId::VARINT64: + case TypeId::TAGGED_INT64: total += 10; // varint max break; case TypeId::FLOAT64: @@ -899,14 +899,14 @@ template struct CompileTimeFieldHelpers { /// Check if a type_id represents a varint primitive (int32/int64 types) /// Per basic_serializer.h, INT32/INT64 use zigzag varint encoding - /// VAR32/VAR64/H64 also use varint encoding + /// VARINT32/VARINT64/TAGGED_INT64 also use varint encoding static constexpr bool is_varint_primitive(uint32_t tid) { switch (static_cast(tid)) { case TypeId::INT32: // int32_t uses zigzag varint per basic_serializer.h case TypeId::INT64: // int64_t uses zigzag varint per basic_serializer.h - case TypeId::VAR32: // explicit varint type - case TypeId::VAR64: // explicit varint type - case TypeId::H64: // hybrid int64 encoding + case TypeId::VARINT32: // explicit varint type + case TypeId::VARINT64: // explicit varint type + case TypeId::TAGGED_INT64: // hybrid int64 encoding return true; default: return false; @@ -917,11 +917,11 @@ template struct CompileTimeFieldHelpers { static constexpr size_t max_varint_bytes(uint32_t tid) { switch (static_cast(tid)) { case TypeId::INT32: // int32_t uses zigzag varint - case TypeId::VAR32: // explicit varint + case TypeId::VARINT32: // explicit varint return 5; // int32 varint max case TypeId::INT64: // int64_t uses zigzag varint - case TypeId::VAR64: // explicit varint - case TypeId::H64: + case TypeId::VARINT64: // explicit varint + case TypeId::TAGGED_INT64: return 10; // int64 varint max default: return 0; @@ -1055,15 +1055,15 @@ template struct CompileTimeFieldHelpers { total += 2; break; case TypeId::INT32: - case TypeId::VAR32: + case TypeId::VARINT32: total += 5; // varint max break; case TypeId::FLOAT32: total += 4; break; case TypeId::INT64: - case TypeId::VAR64: - case TypeId::H64: + case TypeId::VARINT64: + case TypeId::TAGGED_INT64: total += 10; // varint max break; case TypeId::FLOAT64: diff --git a/cpp/fory/serialization/type_resolver.cc b/cpp/fory/serialization/type_resolver.cc index 7e1e2ed7b0..afea138b7e 100644 --- a/cpp/fory/serialization/type_resolver.cc +++ b/cpp/fory/serialization/type_resolver.cc @@ -611,11 +611,11 @@ int32_t get_primitive_type_size(uint32_t type_id) { case TypeId::FLOAT16: return 2; case TypeId::INT32: - case TypeId::VAR32: + case TypeId::VARINT32: case TypeId::FLOAT32: return 4; case TypeId::INT64: - case TypeId::VAR64: + case TypeId::VARINT64: case TypeId::FLOAT64: return 8; default: @@ -626,8 +626,8 @@ int32_t get_primitive_type_size(uint32_t type_id) { bool is_compress(uint32_t type_id) { return type_id == static_cast(TypeId::INT32) || type_id == static_cast(TypeId::INT64) || - type_id == static_cast(TypeId::VAR32) || - type_id == static_cast(TypeId::VAR64); + type_id == static_cast(TypeId::VARINT32) || + type_id == static_cast(TypeId::VARINT64); } // Numeric field sorter (for primitive fields) diff --git a/cpp/fory/type/type.h b/cpp/fory/type/type.h index 1240fabe3f..2900f46c6d 100644 --- a/cpp/fory/type/type.h +++ b/cpp/fory/type/type.h @@ -34,13 +34,13 @@ enum class TypeId : int32_t { // a 32-bit signed integer. INT32 = 4, // a 32-bit signed integer which uses fory var_int32 encoding. - VAR32 = 5, + VARINT32 = 5, // a 64-bit signed integer. INT64 = 6, // a 64-bit signed integer which uses fory PVL encoding. - VAR64 = 7, + VARINT64 = 7, // a 64-bit signed integer which uses fory hybrid encoding. - H64 = 8, + TAGGED_INT64 = 8, // an 8-bit unsigned integer. UINT8 = 9, // a 16-bit unsigned integer. @@ -48,13 +48,13 @@ enum class TypeId : int32_t { // a 32-bit unsigned integer. UINT32 = 11, // a 32-bit unsigned integer which uses fory var_uint32 encoding. - VARU32 = 12, + VAR_UINT32 = 12, // a 64-bit unsigned integer. UINT64 = 13, // a 64-bit unsigned integer which uses fory var_uint64 encoding. - VARU64 = 14, + VAR_UINT64 = 14, // a 64-bit unsigned integer which uses fory hybrid encoding. - HU64 = 15, + TAGGED_UINT64 = 15, // a 16-bit floating point number. FLOAT16 = 16, // a 32-bit floating point number. diff --git a/docs/specification/xlang_serialization_spec.md b/docs/specification/xlang_serialization_spec.md index 6b09b1042a..acd2de9572 100644 --- a/docs/specification/xlang_serialization_spec.md +++ b/docs/specification/xlang_serialization_spec.md @@ -161,17 +161,17 @@ custom types (struct/ext/enum). User type IDs are in a separate namespace and co | 2 | INT8 | 8-bit signed integer | | 3 | INT16 | 16-bit signed integer | | 4 | INT32 | 32-bit signed integer | -| 5 | VAR32 | Variable-length encoded 32-bit signed integer | +| 5 | VARINT32 | Variable-length encoded 32-bit signed integer | | 6 | INT64 | 64-bit signed integer | -| 7 | VAR64 | Variable-length encoded 64-bit signed integer | -| 8 | H64 | Hybrid encoded 64-bit signed integer | +| 7 | VARINT64 | Variable-length encoded 64-bit signed integer | +| 8 | TAGGED_INT64 | Hybrid encoded 64-bit signed integer | | 9 | UINT8 | 8-bit unsigned integer | | 10 | UINT16 | 16-bit unsigned integer | | 11 | UINT32 | 32-bit unsigned integer | -| 12 | VARU32 | Variable-length encoded 32-bit unsigned integer | +| 12 | VAR_UINT32 | Variable-length encoded 32-bit unsigned integer | | 13 | UINT64 | 64-bit unsigned integer | -| 14 | VARU64 | Variable-length encoded 64-bit unsigned integer | -| 15 | HU64 | Hybrid encoded 64-bit unsigned integer | +| 14 | VAR_UINT64 | Variable-length encoded 64-bit unsigned integer | +| 15 | TAGGED_UINT64 | Hybrid encoded 64-bit unsigned integer | | 16 | FLOAT16 | 16-bit floating point (half precision) | | 17 | FLOAT32 | 32-bit floating point (single precision) | | 18 | FLOAT64 | 64-bit floating point (double precision) | @@ -939,7 +939,7 @@ function write_varuint64(value): | ... | ... | | 2^56 ~ 2^63-1 | 9 | -#### unsigned hybrid int64 (HU64) +#### unsigned hybrid int64 (TAGGED_UINT64) - size: 4 or 9 bytes @@ -963,7 +963,7 @@ else: return read_uint64_le() // read remaining 8 bytes ``` -Note: HU64 uses the full 31 bits for positive values [0, 2^31-1], compared to H64 which splits the range for signed values [-2^30, 2^30-1]. +Note: TAGGED_UINT64 uses the full 31 bits for positive values [0, 2^31-1], compared to TAGGED_INT64 which splits the range for signed values [-2^30, 2^30-1]. #### VarUint36Small @@ -1002,7 +1002,7 @@ zigzag_value = read_varuint64() value = (zigzag_value >> 1) ^ (-(zigzag_value & 1)) ``` -#### signed hybrid int64 (H64) +#### signed hybrid int64 (TAGGED_INT64) - size: 4 or 9 bytes @@ -1026,7 +1026,7 @@ else: return read_int64_le() // read remaining 8 bytes ``` -Note: H64 uses 30 bits + sign for values [-2^30, 2^30-1], while HU64 uses full 31 bits for unsigned values [0, 2^31-1]. +Note: TAGGED_INT64 uses 30 bits + sign for values [-2^30, 2^30-1], while TAGGED_UINT64 uses full 31 bits for unsigned values [0, 2^31-1]. #### float32 @@ -1525,7 +1525,7 @@ This section provides a step-by-step guide for implementing Fory xlang serializa - [ ] Implement `write_varuint64` / `read_varuint64` - [ ] Implement `write_varint64` / `read_varint64` (with ZigZag) - [ ] Implement `write_varuint36_small` / `read_varuint36_small` (for strings) - - [ ] Optionally implement Hybrid encoding (H64/HU64) for int64 + - [ ] Optionally implement Hybrid encoding (TAGGED_INT64/TAGGED_UINT64) for int64 3. **Header Handling** - [ ] Write magic number `0x62d4` diff --git a/go/fory/codegen/utils.go b/go/fory/codegen/utils.go index c833ee841e..776f8b5dee 100644 --- a/go/fory/codegen/utils.go +++ b/go/fory/codegen/utils.go @@ -293,11 +293,11 @@ func sortFields(fields []*FieldInfo) { // When same size, sort by type id // When same size and type id, sort by snake case field name - // Handle compression types (INT32/INT64/VAR32/VAR64) + // Handle compression types (INT32/INT64/VARINT32/VARINT64) compressI := f1.TypeID == "INT32" || f1.TypeID == "INT64" || - f1.TypeID == "VAR32" || f1.TypeID == "VAR64" + f1.TypeID == "VARINT32" || f1.TypeID == "VARINT64" compressJ := f2.TypeID == "INT32" || f2.TypeID == "INT64" || - f2.TypeID == "VAR32" || f2.TypeID == "VAR64" + f2.TypeID == "VARINT32" || f2.TypeID == "VARINT64" if compressI != compressJ { return !compressI && compressJ // non-compress comes first diff --git a/go/fory/map_primitive.go b/go/fory/map_primitive.go index 926744b8a7..53c04cb428 100644 --- a/go/fory/map_primitive.go +++ b/go/fory/map_primitive.go @@ -155,7 +155,7 @@ func writeMapStringInt64(buf *ByteBuffer, m map[string]int64, hasGenerics bool) buf.WriteUint8(0) buf.WriteUint8(uint8(chunkSize)) buf.WriteVaruint32Small7(uint32(STRING)) // key type - buf.WriteVaruint32Small7(uint32(VAR64)) // value type + buf.WriteVaruint32Small7(uint32(VARINT64)) // value type } count := 0 @@ -229,7 +229,7 @@ func writeMapStringInt32(buf *ByteBuffer, m map[string]int32, hasGenerics bool) buf.WriteUint8(0) buf.WriteUint8(uint8(chunkSize)) buf.WriteVaruint32Small7(uint32(STRING)) // key type - buf.WriteVaruint32Small7(uint32(VAR32)) // value type + buf.WriteVaruint32Small7(uint32(VARINT32)) // value type } count := 0 @@ -303,7 +303,7 @@ func writeMapStringInt(buf *ByteBuffer, m map[string]int, hasGenerics bool) { buf.WriteUint8(0) buf.WriteUint8(uint8(chunkSize)) buf.WriteVaruint32Small7(uint32(STRING)) // key type - buf.WriteVaruint32Small7(uint32(VAR64)) // value type (int serialized as varint64) + buf.WriteVaruint32Small7(uint32(VARINT64)) // value type (int serialized as varint64) } count := 0 @@ -529,8 +529,8 @@ func writeMapInt32Int32(buf *ByteBuffer, m map[int32]int32, hasGenerics bool) { } else { buf.WriteUint8(0) buf.WriteUint8(uint8(chunkSize)) - buf.WriteVaruint32Small7(uint32(VAR32)) // key type - buf.WriteVaruint32Small7(uint32(VAR32)) // value type + buf.WriteVaruint32Small7(uint32(VARINT32)) // key type + buf.WriteVaruint32Small7(uint32(VARINT32)) // value type } count := 0 @@ -603,8 +603,8 @@ func writeMapInt64Int64(buf *ByteBuffer, m map[int64]int64, hasGenerics bool) { } else { buf.WriteUint8(0) buf.WriteUint8(uint8(chunkSize)) - buf.WriteVaruint32Small7(uint32(VAR64)) // key type - buf.WriteVaruint32Small7(uint32(VAR64)) // value type + buf.WriteVaruint32Small7(uint32(VARINT64)) // key type + buf.WriteVaruint32Small7(uint32(VARINT64)) // value type } count := 0 @@ -677,8 +677,8 @@ func writeMapIntInt(buf *ByteBuffer, m map[int]int, hasGenerics bool) { } else { buf.WriteUint8(0) buf.WriteUint8(uint8(chunkSize)) - buf.WriteVaruint32Small7(uint32(VAR64)) // key type (int serialized as varint64) - buf.WriteVaruint32Small7(uint32(VAR64)) // value type + buf.WriteVaruint32Small7(uint32(VARINT64)) // key type (int serialized as varint64) + buf.WriteVaruint32Small7(uint32(VARINT64)) // value type } count := 0 diff --git a/go/fory/skip.go b/go/fory/skip.go index 91dd2b7ccf..016e1acc7d 100644 --- a/go/fory/skip.go +++ b/go/fory/skip.go @@ -582,9 +582,9 @@ func skipValue(ctx *ReadContext, fieldDef FieldDef, readRefFlag bool, isField bo _ = ctx.buffer.ReadInt16(err) case INT32: _ = ctx.buffer.ReadVaruint32Small7(err) - case VAR32: + case VARINT32: _ = ctx.buffer.ReadVaruint32Small7(err) - case INT64, VAR64, H64: + case INT64, VARINT64, TAGGED_INT64: _ = ctx.buffer.ReadVarint64(err) // Floating point types diff --git a/go/fory/struct.go b/go/fory/struct.go index 6fcf126e79..e6b7e99f13 100644 --- a/go/fory/struct.go +++ b/go/fory/struct.go @@ -1935,9 +1935,9 @@ func sortFields( sort.Slice(s, func(i, j int) bool { ai, aj := s[i], s[j] compressI := ai.typeID == INT32 || ai.typeID == INT64 || - ai.typeID == VAR32 || ai.typeID == VAR64 + ai.typeID == VARINT32 || ai.typeID == VARINT64 compressJ := aj.typeID == INT32 || aj.typeID == INT64 || - aj.typeID == VAR32 || aj.typeID == VAR64 + aj.typeID == VARINT32 || aj.typeID == VARINT64 if compressI != compressJ { return !compressI && compressJ } diff --git a/go/fory/type_resolver.go b/go/fory/type_resolver.go index 47286e8b70..951486c979 100644 --- a/go/fory/type_resolver.go +++ b/go/fory/type_resolver.go @@ -1810,7 +1810,7 @@ func (r *TypeResolver) ReadTypeInfo(buffer *ByteBuffer, err *Error) *TypeInfo { Serializer: r.typeToSerializers[reflect.TypeOf(uint16(0))], StaticId: ConcreteTypeInt16, // Use Int16 static ID for uint16 } - case INT32, VAR32: + case INT32, VARINT32: return &TypeInfo{ Type: reflect.TypeOf(int32(0)), TypeID: typeID, @@ -1824,7 +1824,7 @@ func (r *TypeResolver) ReadTypeInfo(buffer *ByteBuffer, err *Error) *TypeInfo { Serializer: r.typeToSerializers[reflect.TypeOf(uint32(0))], StaticId: ConcreteTypeInt32, // Use Int32 static ID for uint32 } - case INT64, VAR64, H64: + case INT64, VARINT64, TAGGED_INT64: return &TypeInfo{ Type: reflect.TypeOf(int64(0)), TypeID: typeID, @@ -1953,9 +1953,9 @@ func (r *TypeResolver) readTypeInfoWithTypeID(buffer *ByteBuffer, typeID uint32, return &TypeInfo{Type: int8Type, TypeID: typeID, Serializer: r.typeToSerializers[int8Type], StaticId: ConcreteTypeInt8} case INT16: return &TypeInfo{Type: int16Type, TypeID: typeID, Serializer: r.typeToSerializers[int16Type], StaticId: ConcreteTypeInt16} - case INT32, VAR32: + case INT32, VARINT32: return &TypeInfo{Type: int32Type, TypeID: typeID, Serializer: r.typeToSerializers[int32Type], StaticId: ConcreteTypeInt32} - case INT64, VAR64, H64: + case INT64, VARINT64, TAGGED_INT64: return &TypeInfo{Type: int64Type, TypeID: typeID, Serializer: r.typeToSerializers[int64Type], StaticId: ConcreteTypeInt64} case FLOAT32: return &TypeInfo{Type: float32Type, TypeID: typeID, Serializer: r.typeToSerializers[float32Type], StaticId: ConcreteTypeFloat32} diff --git a/go/fory/types.go b/go/fory/types.go index 8a7b09cadd..1d548f511c 100644 --- a/go/fory/types.go +++ b/go/fory/types.go @@ -32,28 +32,28 @@ const ( INT16 = 3 // INT32 Signed 32-bit little-endian integer INT32 = 4 - // VAR32 a 32-bit signed integer which uses fory var_int32 encoding - VAR32 = 5 + // VARINT32 a 32-bit signed integer which uses fory var_int32 encoding + VARINT32 = 5 // INT64 Signed 64-bit little-endian integer INT64 = 6 - // VAR64 a 64-bit signed integer which uses fory PVL encoding - VAR64 = 7 - // H64 a 64-bit signed integer which uses fory hybrid encoding - H64 = 8 + // VARINT64 a 64-bit signed integer which uses fory PVL encoding + VARINT64 = 7 + // TAGGED_INT64 a 64-bit signed integer which uses fory hybrid encoding + TAGGED_INT64 = 8 // UINT8 Unsigned 8-bit little-endian integer UINT8 = 9 // UINT16 Unsigned 16-bit little-endian integer UINT16 = 10 // UINT32 Unsigned 32-bit little-endian integer UINT32 = 11 - // VARU32 a 32-bit unsigned integer which uses fory var_uint32 encoding - VARU32 = 12 + // VAR_UINT32 a 32-bit unsigned integer which uses fory var_uint32 encoding + VAR_UINT32 = 12 // UINT64 Unsigned 64-bit little-endian integer UINT64 = 13 - // VARU64 a 64-bit unsigned integer which uses fory var_uint64 encoding - VARU64 = 14 - // HU64 a 64-bit unsigned integer which uses fory hybrid encoding - HU64 = 15 + // VAR_UINT64 a 64-bit unsigned integer which uses fory var_uint64 encoding + VAR_UINT64 = 14 + // TAGGED_UINT64 a 64-bit unsigned integer which uses fory hybrid encoding + TAGGED_UINT64 = 15 // FLOAT16 2-byte floating point value FLOAT16 = 16 // FLOAT32 4-byte floating point value @@ -168,7 +168,7 @@ func isPrimitiveType(typeID int16) bool { // Collections, structs, and other complex types need reference tracking. func NeedWriteRef(typeID TypeId) bool { switch typeID { - case BOOL, INT8, INT16, INT32, INT64, VAR32, VAR64, H64, + case BOOL, INT8, INT16, INT32, INT64, VARINT32, VARINT64, TAGGED_INT64, FLOAT32, FLOAT64, FLOAT16, STRING, TIMESTAMP, LOCAL_DATE, DURATION: return false @@ -213,9 +213,9 @@ var primitiveTypeSizes = map[int16]int{ INT8: 1, INT16: 2, INT32: 4, - VAR32: 4, + VARINT32: 4, INT64: 8, - VAR64: 8, + VARINT64: 8, FLOAT32: 4, FLOAT64: 8, } diff --git a/java/fory-core/src/main/java/org/apache/fory/resolver/XtypeResolver.java b/java/fory-core/src/main/java/org/apache/fory/resolver/XtypeResolver.java index a4fc4079e7..c482436d69 100644 --- a/java/fory-core/src/main/java/org/apache/fory/resolver/XtypeResolver.java +++ b/java/fory-core/src/main/java/org/apache/fory/resolver/XtypeResolver.java @@ -618,10 +618,19 @@ private Serializer getCollectionSerializer(Class cls) { private void registerDefaultTypes() { registerDefaultTypes(Types.BOOL, Boolean.class, boolean.class, AtomicBoolean.class); - registerDefaultTypes(Types.INT8, Byte.class, byte.class); - registerDefaultTypes(Types.INT16, Short.class, short.class); + registerDefaultTypes(Types.UINT8, Byte.class, byte.class); + registerDefaultTypes(Types.UINT16, Short.class, short.class); + registerDefaultTypes(Types.UINT32, Integer.class, int.class, AtomicInteger.class); + registerDefaultTypes(Types.UINT64, Long.class, long.class, AtomicLong.class); + registerDefaultTypes(Types.TAGGED_UINT64, Long.class, long.class, AtomicLong.class); registerDefaultTypes(Types.INT32, Integer.class, int.class, AtomicInteger.class); registerDefaultTypes(Types.INT64, Long.class, long.class, AtomicLong.class); + registerDefaultTypes(Types.TAGGED_INT64, Long.class, long.class, AtomicLong.class); + + registerDefaultTypes(Types.INT8, Byte.class, byte.class); + registerDefaultTypes(Types.INT16, Short.class, short.class); + registerDefaultTypes(Types.VARINT32, Integer.class, int.class, AtomicInteger.class); + registerDefaultTypes(Types.VARINT64, Long.class, long.class, AtomicLong.class); registerDefaultTypes(Types.FLOAT32, Float.class, float.class); registerDefaultTypes(Types.FLOAT64, Double.class, double.class); registerDefaultTypes(Types.STRING, String.class, StringBuilder.class, StringBuffer.class); diff --git a/javascript/packages/fory/lib/gen/number.ts b/javascript/packages/fory/lib/gen/number.ts index 758c7d8929..23297b1414 100644 --- a/javascript/packages/fory/lib/gen/number.ts +++ b/javascript/packages/fory/lib/gen/number.ts @@ -72,7 +72,7 @@ CodegenRegistry.register(InternalSerializerType.INT32, ) ); -CodegenRegistry.register(InternalSerializerType.VAR32, +CodegenRegistry.register(InternalSerializerType.VARINT32, buildNumberSerializer( (builder, accessor) => builder.writer.varInt32(accessor), builder => builder.reader.varInt32() @@ -86,7 +86,7 @@ CodegenRegistry.register(InternalSerializerType.INT64, ) ); -CodegenRegistry.register(InternalSerializerType.H64, +CodegenRegistry.register(InternalSerializerType.TAGGED_INT64, buildNumberSerializer( (builder, accessor) => builder.writer.sliInt64(accessor), builder => builder.reader.sliInt64() diff --git a/javascript/packages/fory/lib/type.ts b/javascript/packages/fory/lib/type.ts index 20c61668e3..b4583e49a9 100644 --- a/javascript/packages/fory/lib/type.ts +++ b/javascript/packages/fory/lib/type.ts @@ -31,13 +31,13 @@ export const TypeId = { // a 32-bit signed integer. INT32: 4, // a 32-bit signed integer which uses fory var_int32 encoding. - VAR32: 5, + VARINT32: 5, // a 64-bit signed integer. INT64: 6, // a 64-bit signed integer which uses fory PVL encoding. - VAR64: 7, + VARINT64: 7, // a 64-bit signed integer which uses fory hybrid encoding. - H64: 8, + TAGGED_INT64: 8, // an 8-bit unsigned integer. UINT8: 9, // a 16-bit unsigned integer. @@ -45,13 +45,13 @@ export const TypeId = { // a 32-bit unsigned integer. UINT32: 11, // a 32-bit unsigned integer which uses fory var_uint32 encoding. - VARU32: 12, + VAR_UINT32: 12, // a 64-bit unsigned integer. UINT64: 13, // a 64-bit unsigned integer which uses fory var_uint64 encoding. - VARU64: 14, + VAR_UINT64: 14, // a 64-bit unsigned integer which uses fory hybrid encoding. - HU64: 15, + TAGGED_UINT64: 15, // a 16-bit floating point number. FLOAT16: 16, // a 32-bit floating point number. @@ -138,10 +138,10 @@ export enum InternalSerializerType { INT8, INT16, INT32, - VAR32, + VARINT32, INT64, - VAR64, - H64, + VARINT64, + TAGGED_INT64, FLOAT16, FLOAT32, FLOAT64, diff --git a/javascript/packages/fory/lib/typeInfo.ts b/javascript/packages/fory/lib/typeInfo.ts index c6035e6c03..e98c7da023 100644 --- a/javascript/packages/fory/lib/typeInfo.ts +++ b/javascript/packages/fory/lib/typeInfo.ts @@ -337,7 +337,7 @@ export type HintInput = T extends unknown ? any : T extends { | InternalSerializerType.INT8 | InternalSerializerType.INT16 | InternalSerializerType.INT32 - | InternalSerializerType.VAR32 + | InternalSerializerType.VARINT32 | InternalSerializerType.FLOAT16 | InternalSerializerType.FLOAT32 | InternalSerializerType.FLOAT64; @@ -345,8 +345,8 @@ export type HintInput = T extends unknown ? any : T extends { ? number : T extends { - type: InternalSerializerType.VAR64 - | InternalSerializerType.H64 + type: InternalSerializerType.VARINT64 + | InternalSerializerType.TAGGED_INT64 | InternalSerializerType.INT64; } ? bigint @@ -407,7 +407,7 @@ export type HintResult = T extends never ? any : T extends { | InternalSerializerType.INT8 | InternalSerializerType.INT16 | InternalSerializerType.INT32 - | InternalSerializerType.VAR32 + | InternalSerializerType.VARINT32 | InternalSerializerType.FLOAT16 | InternalSerializerType.FLOAT32 | InternalSerializerType.FLOAT64; @@ -415,7 +415,7 @@ export type HintResult = T extends never ? any : T extends { ? number : T extends { - type: InternalSerializerType.H64 + type: InternalSerializerType.TAGGED_INT64 | InternalSerializerType.INT64; } ? bigint @@ -553,8 +553,8 @@ export const Type = { }, varInt32() { return TypeInfo.fromNonParam( - InternalSerializerType.VAR32 as const, - (TypeId.VAR32), + InternalSerializerType.VARINT32 as const, + (TypeId.VARINT32), ); }, @@ -567,8 +567,8 @@ export const Type = { }, sliInt64() { return TypeInfo.fromNonParam( - InternalSerializerType.H64 as const, - (TypeId.H64), + InternalSerializerType.TAGGED_INT64 as const, + (TypeId.TAGGED_INT64), ); }, diff --git a/python/pyfory/collection.pxi b/python/pyfory/collection.pxi index 603da240e2..7c335c6e74 100644 --- a/python/pyfory/collection.pxi +++ b/python/pyfory/collection.pxi @@ -310,7 +310,7 @@ cdef class ListSerializer(CollectionSerializer): if type_id == TypeId.STRING: self._read_string(buffer, len_, list_) return list_ - elif type_id == TypeId.VAR64: + elif type_id == TypeId.VARINT64: self._read_int(buffer, len_, list_) return list_ elif type_id == TypeId.BOOL: @@ -387,7 +387,7 @@ cdef inline get_next_element( # error. if type_id == TypeId.STRING: return buffer.read_string() - elif type_id == TypeId.VAR32: + elif type_id == TypeId.VARINT32: return buffer.read_varint64() elif type_id == TypeId.BOOL: return buffer.read_bool() @@ -428,7 +428,7 @@ cdef class TupleSerializer(CollectionSerializer): if type_id == TypeId.STRING: self._read_string(buffer, len_, tuple_) return tuple_ - if type_id == TypeId.VAR64: + if type_id == TypeId.VARINT64: self._read_int(buffer, len_, tuple_) return tuple_ if type_id == TypeId.BOOL: @@ -521,7 +521,7 @@ cdef class SetSerializer(CollectionSerializer): if type_id == TypeId.STRING: self._read_string(buffer, len_, instance) return instance - if type_id == TypeId.VAR64: + if type_id == TypeId.VARINT64: self._read_int(buffer, len_, instance) return instance if type_id == TypeId.BOOL: @@ -551,7 +551,7 @@ cdef class SetSerializer(CollectionSerializer): type_id = typeinfo.type_id if type_id == TypeId.STRING: instance.add(buffer.read_string()) - elif type_id == TypeId.VAR64: + elif type_id == TypeId.VARINT64: instance.add(buffer.read_varint64()) elif type_id == TypeId.BOOL: instance.add(buffer.read_bool()) @@ -571,7 +571,7 @@ cdef class SetSerializer(CollectionSerializer): type_id = typeinfo.type_id if type_id == TypeId.STRING: instance.add(buffer.read_string()) - elif type_id == TypeId.VAR64: + elif type_id == TypeId.VARINT64: instance.add(buffer.read_varint64()) elif type_id == TypeId.BOOL: instance.add(buffer.read_bool()) @@ -593,7 +593,7 @@ cdef class SetSerializer(CollectionSerializer): type_id = typeinfo.type_id if type_id == TypeId.STRING: instance.add(buffer.read_string()) - elif type_id == TypeId.VAR64: + elif type_id == TypeId.VARINT64: instance.add(buffer.read_varint64()) elif type_id == TypeId.BOOL: instance.add(buffer.read_bool()) diff --git a/python/pyfory/includes/libformat.pxd b/python/pyfory/includes/libformat.pxd index 8e1ffd4060..372effeabe 100755 --- a/python/pyfory/includes/libformat.pxd +++ b/python/pyfory/includes/libformat.pxd @@ -45,17 +45,17 @@ cdef extern from "fory/type/type.h" namespace "fory" nogil: INT8 = 2 INT16 = 3 INT32 = 4 - VAR32 = 5 + VARINT32 = 5 INT64 = 6 - VAR64 = 7 - H64 = 8 + VARINT64 = 7 + TAGGED_INT64 = 8 UINT8 = 9 UINT16 = 10 UINT32 = 11 - VARU32 = 12 + VAR_UINT32 = 12 UINT64 = 13 - VARU64 = 14 - HU64 = 15 + VAR_UINT64 = 14 + TAGGED_UINT64 = 15 FLOAT16 = 16 FLOAT32 = 17 FLOAT64 = 18 diff --git a/python/pyfory/includes/libserialization.pxd b/python/pyfory/includes/libserialization.pxd index edabe1abae..d1926911c5 100644 --- a/python/pyfory/includes/libserialization.pxd +++ b/python/pyfory/includes/libserialization.pxd @@ -28,17 +28,17 @@ cdef extern from "fory/type/type.h" namespace "fory" nogil: INT8 = 2 INT16 = 3 INT32 = 4 - VAR32 = 5 + VARINT32 = 5 INT64 = 6 - VAR64 = 7 - H64 = 8 + VARINT64 = 7 + TAGGED_INT64 = 8 UINT8 = 9 UINT16 = 10 UINT32 = 11 - VARU32 = 12 + VAR_UINT32 = 12 UINT64 = 13 - VARU64 = 14 - HU64 = 15 + VAR_UINT64 = 14 + TAGGED_UINT64 = 15 FLOAT16 = 16 FLOAT32 = 17 FLOAT64 = 18 diff --git a/python/pyfory/struct.py b/python/pyfory/struct.py index 704fdd131f..86ba1c965c 100644 --- a/python/pyfory/struct.py +++ b/python/pyfory/struct.py @@ -1132,8 +1132,8 @@ def numeric_sorter(item): compress = id_ in { TypeId.INT32, TypeId.INT64, - TypeId.VAR32, - TypeId.VAR64, + TypeId.VARINT32, + TypeId.VARINT64, } # Sort by: compress flag, -size (largest first), -type_id (higher type ID first), field_name # Java sorts by size (largest first), then by primitive type ID (descending) diff --git a/python/pyfory/types.py b/python/pyfory/types.py index 7c18dafc4d..0fb14c1549 100644 --- a/python/pyfory/types.py +++ b/python/pyfory/types.py @@ -45,13 +45,13 @@ class TypeId: # a 32-bit signed integer. INT32 = 4 # a 32-bit signed integer which uses fory var_int32 encoding. - VAR32 = 5 + VARINT32 = 5 # a 64-bit signed integer. INT64 = 6 # a 64-bit signed integer which uses fory PVL encoding. - VAR64 = 7 + VARINT64 = 7 # a 64-bit signed integer which uses fory hybrid encoding. - H64 = 8 + TAGGED_INT64 = 8 # an 8-bit unsigned integer. UINT8 = 9 # a 16-bit unsigned integer. @@ -59,13 +59,13 @@ class TypeId: # a 32-bit unsigned integer. UINT32 = 11 # a 32-bit unsigned integer which uses fory var_uint32 encoding. - VARU32 = 12 + VAR_UINT32 = 12 # a 64-bit unsigned integer. UINT64 = 13 # a 64-bit unsigned integer which uses fory var_uint64 encoding. - VARU64 = 14 + VAR_UINT64 = 14 # a 64-bit unsigned integer which uses fory hybrid encoding. - HU64 = 15 + TAGGED_UINT64 = 15 # a 16-bit floating point number. FLOAT16 = 16 # a 32-bit floating point number. @@ -212,9 +212,9 @@ def is_primitive_type(type_) -> bool: TypeId.INT8: 1, TypeId.INT16: 2, TypeId.INT32: 4, - TypeId.VAR32: 4, + TypeId.VARINT32: 4, TypeId.INT64: 8, - TypeId.VAR64: 8, + TypeId.VARINT64: 8, TypeId.FLOAT16: 2, TypeId.FLOAT32: 4, TypeId.FLOAT64: 8, diff --git a/rust/fory-core/src/buffer.rs b/rust/fory-core/src/buffer.rs index cf28e7112f..185566889d 100644 --- a/rust/fory-core/src/buffer.rs +++ b/rust/fory-core/src/buffer.rs @@ -28,6 +28,8 @@ pub struct Writer<'a> { pub(crate) bf: &'a mut Vec, } impl<'a> Writer<'a> { + // ============ Utility methods ============ + #[inline(always)] pub fn from_buffer(bf: &'a mut Vec) -> Writer<'a> { Writer { bf } @@ -79,93 +81,92 @@ impl<'a> Writer<'a> { v.len() } + // ============ BOOL (TypeId = 1) ============ + #[inline(always)] pub fn write_bool(&mut self, value: bool) { self.bf.push(if value { 1 } else { 0 }); } - #[inline(always)] - pub fn write_u8(&mut self, value: u8) { - self.bf.push(value); - } + // ============ INT8 (TypeId = 2) ============ #[inline(always)] pub fn write_i8(&mut self, value: i8) { self.bf.push(value as u8); } - #[inline(always)] - pub fn write_u16(&mut self, value: u16) { - #[cfg(target_endian = "little")] - { - let bytes = unsafe { &*(&value as *const u16 as *const [u8; 2]) }; - self.bf.extend_from_slice(bytes); - } - #[cfg(target_endian = "big")] - { - self.bf.extend_from_slice(&value.to_le_bytes()); - } - } + // ============ INT16 (TypeId = 3) ============ #[inline(always)] pub fn write_i16(&mut self, value: i16) { self.write_u16(value as u16); } - #[inline(always)] - pub fn write_u32(&mut self, value: u32) { - #[cfg(target_endian = "little")] - { - let bytes = unsafe { &*(&value as *const u32 as *const [u8; 4]) }; - self.bf.extend_from_slice(bytes); - } - #[cfg(target_endian = "big")] - { - self.bf.extend_from_slice(&value.to_le_bytes()); - } - } + // ============ INT32 (TypeId = 4) ============ #[inline(always)] pub fn write_i32(&mut self, value: i32) { self.write_u32(value as u32); } + // ============ VARINT32 (TypeId = 5) ============ + #[inline(always)] - pub fn write_f32(&mut self, value: f32) { - #[cfg(target_endian = "little")] - { - let bytes = unsafe { &*(&value as *const f32 as *const [u8; 4]) }; - self.bf.extend_from_slice(bytes); - } - #[cfg(target_endian = "big")] - { - self.bf.extend_from_slice(&value.to_bits().to_le_bytes()); - } + pub fn write_varint32(&mut self, value: i32) { + let zigzag = ((value as i64) << 1) ^ ((value as i64) >> 31); + self._write_varuint32(zigzag as u32) } + // ============ INT64 (TypeId = 6) ============ + #[inline(always)] pub fn write_i64(&mut self, value: i64) { self.write_u64(value as u64); } + // ============ VARINT64 (TypeId = 7) ============ + #[inline(always)] - pub fn write_f64(&mut self, value: f64) { - #[cfg(target_endian = "little")] - { - let bytes = unsafe { &*(&value as *const f64 as *const [u8; 8]) }; - self.bf.extend_from_slice(bytes); - } - #[cfg(target_endian = "big")] - { - self.bf.extend_from_slice(&value.to_bits().to_le_bytes()); + pub fn write_varint64(&mut self, value: i64) { + let zigzag = ((value << 1) ^ (value >> 63)) as u64; + self._write_varuint64(zigzag); + } + + // ============ TAGGED_INT64 (TypeId = 8) ============ + + /// Write signed long using fory Tagged(Small long as int) encoding. + /// If value is in [0xc0000000, 0x3fffffff] (i.e., [-1073741824, 1073741823]), + /// encode as 4 bytes: `((value as i32) << 1)`. + /// Otherwise write as 9 bytes: `0b1 | little-endian 8 bytes i64`. + #[inline(always)] + pub fn write_tagged_i64(&mut self, value: i64) { + const HALF_MIN_INT_VALUE: i64 = i32::MIN as i64 / 2; // -1073741824 + const HALF_MAX_INT_VALUE: i64 = i32::MAX as i64 / 2; // 1073741823 + if value >= HALF_MIN_INT_VALUE && value <= HALF_MAX_INT_VALUE { + // Fits in 31 bits (with sign), encode as 4 bytes with bit 0 = 0 + let v = (value as i32) << 1; + self.write_i32(v); + } else { + // Write flag byte (0b1) followed by 8-byte i64 + self.bf.push(0b1); + self.write_i64(value); } } + // ============ UINT8 (TypeId = 9) ============ + #[inline(always)] - pub fn write_u64(&mut self, value: u64) { + pub fn write_u8(&mut self, value: u8) { + self.bf.push(value); + } + + // ============ UINT16 (TypeId = 10) ============ + + #[inline(always)] + pub fn write_u16(&mut self, value: u16) { #[cfg(target_endian = "little")] { - let bytes = unsafe { &*(&value as *const u64 as *const [u8; 8]) }; + let bytes = unsafe { &*(&value as *const u16 as *const [u8; 2]) }; self.bf.extend_from_slice(bytes); } #[cfg(target_endian = "big")] @@ -174,22 +175,13 @@ impl<'a> Writer<'a> { } } - #[inline(always)] - pub fn write_usize(&mut self, value: usize) { - const SIZE: usize = std::mem::size_of::(); - match SIZE { - 2 => self.write_u16(value as u16), - 4 => self.write_varuint32(value as u32), - 8 => self.write_varuint64(value as u64), - _ => unreachable!("unsupported usize size"), - } - } + // ============ UINT32 (TypeId = 11) ============ #[inline(always)] - pub fn write_u128(&mut self, value: u128) { + pub fn write_u32(&mut self, value: u32) { #[cfg(target_endian = "little")] { - let bytes = unsafe { &*(&value as *const u128 as *const [u8; 16]) }; + let bytes = unsafe { &*(&value as *const u32 as *const [u8; 4]) }; self.bf.extend_from_slice(bytes); } #[cfg(target_endian = "big")] @@ -198,27 +190,7 @@ impl<'a> Writer<'a> { } } - #[inline(always)] - pub fn write_i128(&mut self, value: i128) { - self.write_u128(value as u128); - } - - #[inline(always)] - pub fn write_isize(&mut self, value: isize) { - const SIZE: usize = std::mem::size_of::(); - match SIZE { - 2 => self.write_i16(value as i16), - 4 => self.write_varint32(value as i32), - 8 => self.write_varint64(value as i64), - _ => unreachable!("unsupported isize size"), - } - } - - #[inline(always)] - pub fn write_varint32(&mut self, value: i32) { - let zigzag = ((value as i64) << 1) ^ ((value as i64) >> 31); - self._write_varuint32(zigzag as u32) - } + // ============ VAR_UINT32 (TypeId = 12) ============ #[inline(always)] pub fn write_varuint32(&mut self, value: u32) { @@ -264,12 +236,23 @@ impl<'a> Writer<'a> { } } + // ============ UINT64 (TypeId = 13) ============ + #[inline(always)] - pub fn write_varint64(&mut self, value: i64) { - let zigzag = ((value << 1) ^ (value >> 63)) as u64; - self._write_varuint64(zigzag); + pub fn write_u64(&mut self, value: u64) { + #[cfg(target_endian = "little")] + { + let bytes = unsafe { &*(&value as *const u64 as *const [u8; 8]) }; + self.bf.extend_from_slice(bytes); + } + #[cfg(target_endian = "big")] + { + self.bf.extend_from_slice(&value.to_le_bytes()); + } } + // ============ VAR_UINT64 (TypeId = 14) ============ + #[inline(always)] pub fn write_varuint64(&mut self, value: u64) { self._write_varuint64(value); @@ -374,6 +357,108 @@ impl<'a> Writer<'a> { } } + // ============ TAGGED_UINT64 (TypeId = 15) ============ + + /// Write unsigned long using fory Tagged(Small long as int) encoding. + /// If value is in [0, 0x7fffffff], encode as 4 bytes: `((value as u32) << 1)`. + /// Otherwise write as 9 bytes: `0b1 | little-endian 8 bytes u64`. + #[inline(always)] + pub fn write_tagged_u64(&mut self, value: u64) { + if value <= i32::MAX as u64 { + // Fits in 31 bits, encode as 4 bytes with bit 0 = 0 + let v = (value as u32) << 1; + self.write_u32(v); + } else { + // Write flag byte (0b1) followed by 8-byte u64 + self.bf.push(0b1); + self.write_u64(value); + } + } + + // ============ FLOAT32 (TypeId = 17) ============ + + #[inline(always)] + pub fn write_f32(&mut self, value: f32) { + #[cfg(target_endian = "little")] + { + let bytes = unsafe { &*(&value as *const f32 as *const [u8; 4]) }; + self.bf.extend_from_slice(bytes); + } + #[cfg(target_endian = "big")] + { + self.bf.extend_from_slice(&value.to_bits().to_le_bytes()); + } + } + + // ============ FLOAT64 (TypeId = 18) ============ + + #[inline(always)] + pub fn write_f64(&mut self, value: f64) { + #[cfg(target_endian = "little")] + { + let bytes = unsafe { &*(&value as *const f64 as *const [u8; 8]) }; + self.bf.extend_from_slice(bytes); + } + #[cfg(target_endian = "big")] + { + self.bf.extend_from_slice(&value.to_bits().to_le_bytes()); + } + } + + // ============ STRING (TypeId = 19) ============ + + #[inline(always)] + pub fn write_utf8_string(&mut self, s: &str) { + let bytes = s.as_bytes(); + let len = bytes.len(); + self.bf.reserve(len); + self.bf.extend_from_slice(bytes); + } + + // ============ Rust-specific types (i128, u128, isize, usize) ============ + + #[inline(always)] + pub fn write_i128(&mut self, value: i128) { + self.write_u128(value as u128); + } + + #[inline(always)] + pub fn write_u128(&mut self, value: u128) { + #[cfg(target_endian = "little")] + { + let bytes = unsafe { &*(&value as *const u128 as *const [u8; 16]) }; + self.bf.extend_from_slice(bytes); + } + #[cfg(target_endian = "big")] + { + self.bf.extend_from_slice(&value.to_le_bytes()); + } + } + + #[inline(always)] + pub fn write_isize(&mut self, value: isize) { + const SIZE: usize = std::mem::size_of::(); + match SIZE { + 2 => self.write_i16(value as i16), + 4 => self.write_varint32(value as i32), + 8 => self.write_varint64(value as i64), + _ => unreachable!("unsupported isize size"), + } + } + + #[inline(always)] + pub fn write_usize(&mut self, value: usize) { + const SIZE: usize = std::mem::size_of::(); + match SIZE { + 2 => self.write_u16(value as u16), + 4 => self.write_varuint32(value as u32), + 8 => self.write_varuint64(value as u64), + _ => unreachable!("unsupported usize size"), + } + } + + // ============ Other helper methods ============ + #[inline(always)] pub fn write_varuint36_small(&mut self, value: u64) { assert!(value < (1u64 << 36), "value too large for 36-bit varint"); @@ -407,14 +492,6 @@ impl<'a> Writer<'a> { self.write_u64(combined); } } - - #[inline(always)] - pub fn write_utf8_string(&mut self, s: &str) { - let bytes = s.as_bytes(); - let len = bytes.len(); - self.bf.reserve(len); - self.bf.extend_from_slice(bytes); - } } #[derive(Default)] @@ -426,6 +503,8 @@ pub struct Reader<'a> { #[allow(clippy::needless_lifetimes)] impl<'a> Reader<'a> { + // ============ Utility methods ============ + #[inline(always)] pub fn new(bf: &[u8]) -> Reader<'_> { Reader { bf, cursor: 0 } @@ -478,10 +557,6 @@ impl<'a> Reader<'a> { #[inline(always)] fn check_bound(&self, n: usize) -> Result<(), Error> { - // The upper layer guarantees it is non-null - // if self.bf.is_null() { - // return Err(Error::invalid_data("buffer pointer is null")); - // } if self.cursor + n > self.bf.len() { Err(Error::buffer_out_of_bound(self.cursor, n, self.bf.len())) } else { @@ -489,11 +564,6 @@ impl<'a> Reader<'a> { } } - #[inline(always)] - pub fn read_bool(&mut self) -> Result { - Ok(self.read_u8()? != 0) - } - #[inline(always)] fn read_u8_uncheck(&mut self) -> u8 { let result = unsafe { self.bf.get_unchecked(self.cursor) }; @@ -502,113 +572,143 @@ impl<'a> Reader<'a> { } #[inline(always)] - pub fn peek_u8(&mut self) -> Result { - let result = self.value_at(self.cursor)?; - Ok(result) + pub fn skip(&mut self, len: usize) -> Result<(), Error> { + self.check_bound(len)?; + self.move_next(len); + Ok(()) } #[inline(always)] - pub fn read_u8(&mut self) -> Result { - let result = self.value_at(self.cursor)?; - self.move_next(1); + pub fn read_bytes(&mut self, len: usize) -> Result<&[u8], Error> { + self.check_bound(len)?; + let result = &self.bf[self.cursor..self.cursor + len]; + self.move_next(len); Ok(result) } #[inline(always)] - pub fn read_i8(&mut self) -> Result { - Ok(self.read_u8()? as i8) + pub fn reset_cursor_to_here(&self) -> impl FnOnce(&mut Self) { + let raw_cursor = self.cursor; + move |this: &mut Self| { + this.cursor = raw_cursor; + } } + pub fn set_cursor(&mut self, cursor: usize) { + self.cursor = cursor; + } + + // ============ BOOL (TypeId = 1) ============ + #[inline(always)] - pub fn read_u16(&mut self) -> Result { - let slice = self.slice_after_cursor(); - let result = LittleEndian::read_u16(slice); - self.cursor += 2; - Ok(result) + pub fn read_bool(&mut self) -> Result { + Ok(self.read_u8()? != 0) } + // ============ INT8 (TypeId = 2) ============ + #[inline(always)] - pub fn read_i16(&mut self) -> Result { - Ok(self.read_u16()? as i16) + pub fn read_i8(&mut self) -> Result { + Ok(self.read_u8()? as i8) } + // ============ INT16 (TypeId = 3) ============ + #[inline(always)] - pub fn read_u32(&mut self) -> Result { - let slice = self.slice_after_cursor(); - let result = LittleEndian::read_u32(slice); - self.cursor += 4; - Ok(result) + pub fn read_i16(&mut self) -> Result { + Ok(self.read_u16()? as i16) } + // ============ INT32 (TypeId = 4) ============ + #[inline(always)] pub fn read_i32(&mut self) -> Result { Ok(self.read_u32()? as i32) } + // ============ VARINT32 (TypeId = 5) ============ + #[inline(always)] - pub fn read_u64(&mut self) -> Result { - let slice = self.slice_after_cursor(); - let result = LittleEndian::read_u64(slice); - self.cursor += 8; - Ok(result) + pub fn read_varint32(&mut self) -> Result { + let encoded = self.read_varuint32()?; + Ok(((encoded >> 1) as i32) ^ -((encoded & 1) as i32)) } + // ============ INT64 (TypeId = 6) ============ + #[inline(always)] - pub fn read_usize(&mut self) -> Result { - const SIZE: usize = std::mem::size_of::(); - match SIZE { - 2 => Ok(self.read_u16()? as usize), - 4 => Ok(self.read_varuint32()? as usize), - 8 => Ok(self.read_varuint64()? as usize), - _ => unreachable!("unsupported usize size"), - } + pub fn read_i64(&mut self) -> Result { + Ok(self.read_u64()? as i64) } + // ============ VARINT64 (TypeId = 7) ============ + #[inline(always)] - pub fn read_u128(&mut self) -> Result { - let slice = self.slice_after_cursor(); - let result = LittleEndian::read_u128(slice); - self.cursor += 16; - Ok(result) + pub fn read_varint64(&mut self) -> Result { + let encoded = self.read_varuint64()?; + Ok(((encoded >> 1) as i64) ^ -((encoded & 1) as i64)) } + // ============ TAGGED_INT64 (TypeId = 8) ============ + + /// Read signed fory Tagged(Small long as int) encoded i64. + /// If bit 0 of the first 4 bytes is 0, return the value >> 1 (arithmetic shift). + /// Otherwise, skip the flag byte and read 8 bytes as i64. #[inline(always)] - pub fn read_i128(&mut self) -> Result { - Ok(self.read_u128()? as i128) + pub fn read_tagged_i64(&mut self) -> Result { + self.check_bound(4)?; + let i = LittleEndian::read_i32(&self.bf[self.cursor..]); + if (i & 0b1) != 0b1 { + // Bit 0 is 0, small value encoded in 4 bytes + self.cursor += 4; + Ok((i >> 1) as i64) // arithmetic right shift preserves sign + } else { + // Bit 0 is 1, big value: skip flag byte and read 8 bytes + self.check_bound(9)?; + self.cursor += 1; + let value = LittleEndian::read_i64(&self.bf[self.cursor..]); + self.cursor += 8; + Ok(value) + } } + // ============ UINT8 (TypeId = 9) ============ + #[inline(always)] - pub fn read_isize(&mut self) -> Result { - const SIZE: usize = std::mem::size_of::(); - match SIZE { - 2 => Ok(self.read_i16()? as isize), - 4 => Ok(self.read_varint32()? as isize), - 8 => Ok(self.read_varint64()? as isize), - _ => unreachable!("unsupported isize size"), - } + pub fn peek_u8(&mut self) -> Result { + let result = self.value_at(self.cursor)?; + Ok(result) } #[inline(always)] - pub fn read_i64(&mut self) -> Result { - Ok(self.read_u64()? as i64) + pub fn read_u8(&mut self) -> Result { + let result = self.value_at(self.cursor)?; + self.move_next(1); + Ok(result) } + // ============ UINT16 (TypeId = 10) ============ + #[inline(always)] - pub fn read_f32(&mut self) -> Result { + pub fn read_u16(&mut self) -> Result { let slice = self.slice_after_cursor(); - let result = LittleEndian::read_f32(slice); - self.cursor += 4; + let result = LittleEndian::read_u16(slice); + self.cursor += 2; Ok(result) } + // ============ UINT32 (TypeId = 11) ============ + #[inline(always)] - pub fn read_f64(&mut self) -> Result { + pub fn read_u32(&mut self) -> Result { let slice = self.slice_after_cursor(); - let result = LittleEndian::read_f64(slice); - self.cursor += 8; + let result = LittleEndian::read_u32(slice); + self.cursor += 4; Ok(result) } + // ============ VAR_UINT32 (TypeId = 12) ============ + #[inline(always)] pub fn read_varuint32(&mut self) -> Result { let b0 = self.value_at(self.cursor)? as u32; @@ -644,12 +744,18 @@ impl<'a> Reader<'a> { Ok(encoded) } + // ============ UINT64 (TypeId = 13) ============ + #[inline(always)] - pub fn read_varint32(&mut self) -> Result { - let encoded = self.read_varuint32()?; - Ok(((encoded >> 1) as i32) ^ -((encoded & 1) as i32)) + pub fn read_u64(&mut self) -> Result { + let slice = self.slice_after_cursor(); + let result = LittleEndian::read_u64(slice); + self.cursor += 8; + Ok(result) } + // ============ VAR_UINT64 (TypeId = 14) ============ + #[inline(always)] pub fn read_varuint64(&mut self) -> Result { let b0 = self.value_at(self.cursor)? as u64; @@ -713,12 +819,51 @@ impl<'a> Reader<'a> { Ok(var64) } + // ============ TAGGED_UINT64 (TypeId = 15) ============ + + /// Read unsigned fory Tagged(Small long as int) encoded u64. + /// If bit 0 of the first 4 bytes is 0, return the value >> 1. + /// Otherwise, skip the flag byte and read 8 bytes as u64. #[inline(always)] - pub fn read_varint64(&mut self) -> Result { - let encoded = self.read_varuint64()?; - Ok(((encoded >> 1) as i64) ^ -((encoded & 1) as i64)) + pub fn read_tagged_u64(&mut self) -> Result { + self.check_bound(4)?; + let i = LittleEndian::read_u32(&self.bf[self.cursor..]); + if (i & 0b1) != 0b1 { + // Bit 0 is 0, small value encoded in 4 bytes + self.cursor += 4; + Ok((i >> 1) as u64) + } else { + // Bit 0 is 1, big value: skip flag byte and read 8 bytes + self.check_bound(9)?; + self.cursor += 1; + let value = LittleEndian::read_u64(&self.bf[self.cursor..]); + self.cursor += 8; + Ok(value) + } } + // ============ FLOAT32 (TypeId = 17) ============ + + #[inline(always)] + pub fn read_f32(&mut self) -> Result { + let slice = self.slice_after_cursor(); + let result = LittleEndian::read_f32(slice); + self.cursor += 4; + Ok(result) + } + + // ============ FLOAT64 (TypeId = 18) ============ + + #[inline(always)] + pub fn read_f64(&mut self) -> Result { + let slice = self.slice_after_cursor(); + let result = LittleEndian::read_f64(slice); + self.cursor += 8; + Ok(result) + } + + // ============ STRING (TypeId = 19) ============ + #[inline(always)] pub fn read_latin1_string(&mut self, len: usize) -> Result { self.check_bound(len)?; @@ -796,6 +941,45 @@ impl<'a> Reader<'a> { Ok(String::from_utf16_lossy(&units)) } + // ============ Rust-specific types (i128, u128, isize, usize) ============ + + #[inline(always)] + pub fn read_i128(&mut self) -> Result { + Ok(self.read_u128()? as i128) + } + + #[inline(always)] + pub fn read_u128(&mut self) -> Result { + let slice = self.slice_after_cursor(); + let result = LittleEndian::read_u128(slice); + self.cursor += 16; + Ok(result) + } + + #[inline(always)] + pub fn read_isize(&mut self) -> Result { + const SIZE: usize = std::mem::size_of::(); + match SIZE { + 2 => Ok(self.read_i16()? as isize), + 4 => Ok(self.read_varint32()? as isize), + 8 => Ok(self.read_varint64()? as isize), + _ => unreachable!("unsupported isize size"), + } + } + + #[inline(always)] + pub fn read_usize(&mut self) -> Result { + const SIZE: usize = std::mem::size_of::(); + match SIZE { + 2 => Ok(self.read_u16()? as usize), + 4 => Ok(self.read_varuint32()? as usize), + 8 => Ok(self.read_varuint64()? as usize), + _ => unreachable!("unsupported usize size"), + } + } + + // ============ Other helper methods ============ + #[inline(always)] pub fn read_varuint36small(&mut self) -> Result { let start = self.cursor; @@ -842,33 +1026,6 @@ impl<'a> Reader<'a> { } Ok(result) } - - #[inline(always)] - pub fn skip(&mut self, len: usize) -> Result<(), Error> { - self.check_bound(len)?; - self.move_next(len); - Ok(()) - } - - #[inline(always)] - pub fn read_bytes(&mut self, len: usize) -> Result<&[u8], Error> { - self.check_bound(len)?; - let result = &self.bf[self.cursor..self.cursor + len]; - self.move_next(len); - Ok(result) - } - - #[inline(always)] - pub fn reset_cursor_to_here(&self) -> impl FnOnce(&mut Self) { - let raw_cursor = self.cursor; - move |this: &mut Self| { - this.cursor = raw_cursor; - } - } - - pub fn set_cursor(&mut self, cursor: usize) { - self.cursor = cursor; - } } #[allow(clippy::needless_lifetimes)] diff --git a/rust/fory-core/src/meta/type_meta.rs b/rust/fory-core/src/meta/type_meta.rs index da253eac2d..9d545c32bb 100644 --- a/rust/fory-core/src/meta/type_meta.rs +++ b/rust/fory-core/src/meta/type_meta.rs @@ -622,17 +622,17 @@ impl TypeMeta { TypeId::INT8 => 1, TypeId::INT16 => 2, TypeId::INT32 => 4, - TypeId::VAR32 => 4, + TypeId::VARINT32 => 4, TypeId::INT64 => 8, - TypeId::VAR64 => 8, - TypeId::H64 => 8, + TypeId::VARINT64 => 8, + TypeId::TAGGED_INT64 => 8, TypeId::UINT8 => 1, TypeId::UINT16 => 2, TypeId::UINT32 => 4, - TypeId::VARU32 => 4, + TypeId::VAR_UINT32 => 4, TypeId::UINT64 => 8, - TypeId::VARU64 => 8, - TypeId::HU64 => 8, + TypeId::VAR_UINT64 => 8, + TypeId::TAGGED_UINT64 => 8, TypeId::FLOAT16 => 2, TypeId::FLOAT32 => 4, TypeId::FLOAT64 => 8, @@ -649,9 +649,9 @@ impl TypeMeta { [ TypeId::INT32 as u32, TypeId::INT64 as u32, - TypeId::VAR32 as u32, - TypeId::VAR64 as u32, - TypeId::H64 as u32, + TypeId::VARINT32 as u32, + TypeId::VARINT64 as u32, + TypeId::TAGGED_INT64 as u32, ] .contains(&type_id) } diff --git a/rust/fory-core/src/resolver/type_resolver.rs b/rust/fory-core/src/resolver/type_resolver.rs index a6d2aa487a..fc5d4c0fd8 100644 --- a/rust/fory-core/src/resolver/type_resolver.rs +++ b/rust/fory-core/src/resolver/type_resolver.rs @@ -590,16 +590,16 @@ impl TypeResolver { self.register_internal_serializer::(TypeId::BOOL)?; self.register_internal_serializer::(TypeId::INT8)?; self.register_internal_serializer::(TypeId::INT16)?; - self.register_internal_serializer::(TypeId::INT32)?; - self.register_internal_serializer::(TypeId::INT64)?; + self.register_internal_serializer::(TypeId::VARINT32)?; + self.register_internal_serializer::(TypeId::VARINT64)?; self.register_internal_serializer::(TypeId::ISIZE)?; self.register_internal_serializer::(TypeId::INT128)?; self.register_internal_serializer::(TypeId::FLOAT32)?; self.register_internal_serializer::(TypeId::FLOAT64)?; self.register_internal_serializer::(TypeId::UINT8)?; self.register_internal_serializer::(TypeId::UINT16)?; - self.register_internal_serializer::(TypeId::UINT32)?; - self.register_internal_serializer::(TypeId::UINT64)?; + self.register_internal_serializer::(TypeId::VAR_UINT32)?; + self.register_internal_serializer::(TypeId::VAR_UINT64)?; self.register_internal_serializer::(TypeId::USIZE)?; self.register_internal_serializer::(TypeId::U128)?; self.register_internal_serializer::(TypeId::STRING)?; diff --git a/rust/fory-core/src/serializer/number.rs b/rust/fory-core/src/serializer/number.rs index 1e8aa66de0..7e62aecac9 100644 --- a/rust/fory-core/src/serializer/number.rs +++ b/rust/fory-core/src/serializer/number.rs @@ -89,13 +89,13 @@ impl_num_serializer!( i32, Writer::write_varint32, Reader::read_varint32, - TypeId::INT32 + TypeId::VARINT32 ); impl_num_serializer!( i64, Writer::write_varint64, Reader::read_varint64, - TypeId::INT64 + TypeId::VARINT64 ); impl_num_serializer!(f32, Writer::write_f32, Reader::read_f32, TypeId::FLOAT32); impl_num_serializer!(f64, Writer::write_f64, Reader::read_f64, TypeId::FLOAT64); diff --git a/rust/fory-core/src/serializer/skip.rs b/rust/fory-core/src/serializer/skip.rs index b17d9e0b8b..5c839fa173 100644 --- a/rust/fory-core/src/serializer/skip.rs +++ b/rust/fory-core/src/serializer/skip.rs @@ -465,126 +465,283 @@ fn skip_value( } } - // Match on built-in types + // Match on built-in types (ordered by TypeId enum values) match type_id_num { - // Basic types + // ============ UNKNOWN (TypeId = 0) ============ + types::UNKNOWN => { + // UNKNOWN is used for polymorphic types in cross-language serialization + return skip_any_value(context, false); + } + + // ============ BOOL (TypeId = 1) ============ types::BOOL => { ::fory_read_data(context)?; } + + // ============ INT8 (TypeId = 2) ============ types::INT8 => { ::fory_read_data(context)?; } + + // ============ INT16 (TypeId = 3) ============ types::INT16 => { ::fory_read_data(context)?; } + + // ============ INT32 (TypeId = 4) ============ types::INT32 => { + context.reader.read_i32()?; + } + + // ============ VARINT32 (TypeId = 5) ============ + types::VARINT32 => { ::fory_read_data(context)?; } + + // ============ INT64 (TypeId = 6) ============ types::INT64 => { + context.reader.read_i64()?; + } + + // ============ VARINT64 (TypeId = 7) ============ + types::VARINT64 => { ::fory_read_data(context)?; } + + // ============ TAGGED_INT64 (TypeId = 8) ============ + types::TAGGED_INT64 => { + context.reader.read_tagged_i64()?; + } + + // ============ UINT8 (TypeId = 9) ============ + types::UINT8 => { + ::fory_read_data(context)?; + } + + // ============ UINT16 (TypeId = 10) ============ + types::UINT16 => { + ::fory_read_data(context)?; + } + + // ============ UINT32 (TypeId = 11) ============ + types::UINT32 => { + context.reader.read_u32()?; + } + + // ============ VAR_UINT32 (TypeId = 12) ============ + types::VAR_UINT32 => { + ::fory_read_data(context)?; + } + + // ============ UINT64 (TypeId = 13) ============ + types::UINT64 => { + context.reader.read_u64()?; + } + + // ============ VAR_UINT64 (TypeId = 14) ============ + types::VAR_UINT64 => { + ::fory_read_data(context)?; + } + + // ============ TAGGED_UINT64 (TypeId = 15) ============ + types::TAGGED_UINT64 => { + context.reader.read_tagged_u64()?; + } + + // ============ FLOAT32 (TypeId = 17) ============ types::FLOAT32 => { ::fory_read_data(context)?; } + + // ============ FLOAT64 (TypeId = 18) ============ types::FLOAT64 => { ::fory_read_data(context)?; } + + // ============ STRING (TypeId = 19) ============ types::STRING => { ::fory_read_data(context)?; } - types::LOCAL_DATE => { - ::fory_read_data(context)?; + + // ============ LIST (TypeId = 20) ============ + // ============ SET (TypeId = 21) ============ + types::LIST | types::SET => { + return skip_collection(context, field_type); } - types::TIMESTAMP => { - ::fory_read_data(context)?; + + // ============ MAP (TypeId = 22) ============ + types::MAP => { + return skip_map(context, field_type); + } + + // ============ ENUM (TypeId = 23) ============ + types::ENUM => { + let _ordinal = context.reader.read_varuint32()?; + } + + // ============ NAMED_ENUM (TypeId = 24) ============ + types::NAMED_ENUM => { + let _ordinal = context.reader.read_varuint32()?; + } + + // ============ STRUCT (TypeId = 25) ============ + types::STRUCT => { + return skip_struct(context, type_id_num, type_info); + } + + // ============ COMPATIBLE_STRUCT (TypeId = 26) ============ + types::COMPATIBLE_STRUCT => { + return skip_struct(context, type_id_num, type_info); + } + + // ============ NAMED_STRUCT (TypeId = 27) ============ + types::NAMED_STRUCT => { + return skip_struct(context, type_id_num, type_info); + } + + // ============ NAMED_COMPATIBLE_STRUCT (TypeId = 28) ============ + types::NAMED_COMPATIBLE_STRUCT => { + return skip_struct(context, type_id_num, type_info); + } + + // ============ EXT (TypeId = 29) ============ + types::EXT => { + return skip_ext(context, type_id_num, type_info); + } + + // ============ NAMED_EXT (TypeId = 30) ============ + types::NAMED_EXT => { + return skip_ext(context, type_id_num, type_info); + } + + // ============ UNION (TypeId = 31) ============ + types::UNION => { + // UNION format: index (varuint32) + value (xreadRef) + let _ = context.reader.read_varuint32()?; + return skip_any_value(context, true); + } + + // ============ NONE (TypeId = 32) ============ + types::NONE => { + // NONE represents an empty/unit value with no data - nothing to skip + return Ok(()); } + + // ============ DURATION (TypeId = 33) ============ types::DURATION => { ::fory_read_data(context)?; } + + // ============ TIMESTAMP (TypeId = 34) ============ + types::TIMESTAMP => { + ::fory_read_data(context)?; + } + + // ============ LOCAL_DATE (TypeId = 35) ============ + types::LOCAL_DATE => { + ::fory_read_data(context)?; + } + + // ============ BINARY (TypeId = 37) ============ types::BINARY => { as Serializer>::fory_read_data(context)?; } + + // ============ BOOL_ARRAY (TypeId = 39) ============ types::BOOL_ARRAY => { as Serializer>::fory_read_data(context)?; } + + // ============ INT8_ARRAY (TypeId = 40) ============ types::INT8_ARRAY => { as Serializer>::fory_read_data(context)?; } + + // ============ INT16_ARRAY (TypeId = 41) ============ types::INT16_ARRAY => { as Serializer>::fory_read_data(context)?; } + + // ============ INT32_ARRAY (TypeId = 42) ============ types::INT32_ARRAY => { as Serializer>::fory_read_data(context)?; } + + // ============ INT64_ARRAY (TypeId = 43) ============ types::INT64_ARRAY => { as Serializer>::fory_read_data(context)?; } - types::FLOAT32_ARRAY => { - as Serializer>::fory_read_data(context)?; - } - types::FLOAT64_ARRAY => { - as Serializer>::fory_read_data(context)?; - } - types::UINT8 => { - ::fory_read_data(context)?; - } - types::UINT16 => { - ::fory_read_data(context)?; - } - types::UINT32 => { - ::fory_read_data(context)?; - } - types::UINT64 => { - ::fory_read_data(context)?; - } - types::U128 => { - ::fory_read_data(context)?; + + // ============ UINT8_ARRAY (TypeId = 44) ============ + types::UINT8_ARRAY => { + as Serializer>::fory_read_data(context)?; } + + // ============ UINT16_ARRAY (TypeId = 45) ============ types::UINT16_ARRAY => { as Serializer>::fory_read_data(context)?; } + + // ============ UINT32_ARRAY (TypeId = 46) ============ types::UINT32_ARRAY => { as Serializer>::fory_read_data(context)?; } + + // ============ UINT64_ARRAY (TypeId = 47) ============ types::UINT64_ARRAY => { as Serializer>::fory_read_data(context)?; } - types::U128_ARRAY => { - as Serializer>::fory_read_data(context)?; + + // ============ FLOAT32_ARRAY (TypeId = 49) ============ + types::FLOAT32_ARRAY => { + as Serializer>::fory_read_data(context)?; } - // Container types - types::LIST | types::SET => { - return skip_collection(context, field_type); + // ============ FLOAT64_ARRAY (TypeId = 50) ============ + types::FLOAT64_ARRAY => { + as Serializer>::fory_read_data(context)?; } - types::MAP => { - return skip_map(context, field_type); + + // ============ Rust-specific types ============ + + // ============ U128 (TypeId = 64) ============ + types::U128 => { + ::fory_read_data(context)?; } - // Named types - types::NAMED_ENUM => { - let _ordinal = context.reader.read_varuint32()?; + // ============ INT128 (TypeId = 65) ============ + types::INT128 => { + ::fory_read_data(context)?; } - types::NAMED_COMPATIBLE_STRUCT => { - return skip_struct(context, type_id_num, type_info); + + // ============ USIZE (TypeId = 66) ============ + types::USIZE => { + ::fory_read_data(context)?; } - types::NAMED_EXT => { - return skip_ext(context, type_id_num, type_info); + + // ============ ISIZE (TypeId = 67) ============ + types::ISIZE => { + ::fory_read_data(context)?; } - types::UNKNOWN => { - // UNKNOWN (0) is used for polymorphic types in cross-language serialization - return skip_any_value(context, false); + + // ============ U128_ARRAY (TypeId = 68) ============ + types::U128_ARRAY => { + as Serializer>::fory_read_data(context)?; } - types::NONE => { - // NONE represents an empty/unit value with no data - nothing to skip - return Ok(()); + + // ============ INT128_ARRAY (TypeId = 69) ============ + types::INT128_ARRAY => { + as Serializer>::fory_read_data(context)?; } - types::UNION => { - // UNION format: index (varuint32) + value (xreadRef) - // Skip the index - let _ = context.reader.read_varuint32()?; - // Skip the value (which is written via xwriteRef) - return skip_any_value(context, true); + + // ============ USIZE_ARRAY (TypeId = 70) ============ + types::USIZE_ARRAY => { + as Serializer>::fory_read_data(context)?; + } + + // ============ ISIZE_ARRAY (TypeId = 71) ============ + types::ISIZE_ARRAY => { + as Serializer>::fory_read_data(context)?; } _ => { diff --git a/rust/fory-core/src/serializer/unsigned_number.rs b/rust/fory-core/src/serializer/unsigned_number.rs index bff0c759d9..2c859d9fb7 100644 --- a/rust/fory-core/src/serializer/unsigned_number.rs +++ b/rust/fory-core/src/serializer/unsigned_number.rs @@ -153,8 +153,18 @@ macro_rules! impl_rust_unsigned_num_serializer { // xlang-compatible unsigned types impl_xlang_unsigned_num_serializer!(u8, Writer::write_u8, Reader::read_u8, TypeId::UINT8); impl_xlang_unsigned_num_serializer!(u16, Writer::write_u16, Reader::read_u16, TypeId::UINT16); -impl_xlang_unsigned_num_serializer!(u32, Writer::write_u32, Reader::read_u32, TypeId::UINT32); -impl_xlang_unsigned_num_serializer!(u64, Writer::write_u64, Reader::read_u64, TypeId::UINT64); +impl_xlang_unsigned_num_serializer!( + u32, + Writer::write_varuint32, + Reader::read_varuint32, + TypeId::VAR_UINT32 +); +impl_xlang_unsigned_num_serializer!( + u64, + Writer::write_varuint64, + Reader::read_varuint64, + TypeId::VAR_UINT64 +); // Rust-specific unsigned types (not supported in xlang mode) impl_rust_unsigned_num_serializer!(u128, Writer::write_u128, Reader::read_u128, TypeId::U128); diff --git a/rust/fory-core/src/types.rs b/rust/fory-core/src/types.rs index e64f0ce376..879998eaa3 100644 --- a/rust/fory-core/src/types.rs +++ b/rust/fory-core/src/types.rs @@ -104,17 +104,17 @@ pub enum TypeId { INT8 = 2, INT16 = 3, INT32 = 4, - VAR32 = 5, + VARINT32 = 5, INT64 = 6, - VAR64 = 7, - H64 = 8, + VARINT64 = 7, + TAGGED_INT64 = 8, UINT8 = 9, UINT16 = 10, UINT32 = 11, - VARU32 = 12, + VAR_UINT32 = 12, UINT64 = 13, - VARU64 = 14, - HU64 = 15, + VAR_UINT64 = 14, + TAGGED_UINT64 = 15, FLOAT16 = 16, FLOAT32 = 17, FLOAT64 = 18, @@ -175,17 +175,17 @@ pub const BOOL: u32 = TypeId::BOOL as u32; pub const INT8: u32 = TypeId::INT8 as u32; pub const INT16: u32 = TypeId::INT16 as u32; pub const INT32: u32 = TypeId::INT32 as u32; -pub const VAR32: u32 = TypeId::VAR32 as u32; +pub const VARINT32: u32 = TypeId::VARINT32 as u32; pub const INT64: u32 = TypeId::INT64 as u32; -pub const VAR64: u32 = TypeId::VAR64 as u32; -pub const H64: u32 = TypeId::H64 as u32; +pub const VARINT64: u32 = TypeId::VARINT64 as u32; +pub const TAGGED_INT64: u32 = TypeId::TAGGED_INT64 as u32; pub const UINT8: u32 = TypeId::UINT8 as u32; pub const UINT16: u32 = TypeId::UINT16 as u32; pub const UINT32: u32 = TypeId::UINT32 as u32; -pub const VARU32: u32 = TypeId::VARU32 as u32; +pub const VAR_UINT32: u32 = TypeId::VAR_UINT32 as u32; pub const UINT64: u32 = TypeId::UINT64 as u32; -pub const VARU64: u32 = TypeId::VARU64 as u32; -pub const HU64: u32 = TypeId::HU64 as u32; +pub const VAR_UINT64: u32 = TypeId::VAR_UINT64 as u32; +pub const TAGGED_UINT64: u32 = TypeId::TAGGED_UINT64 as u32; pub const FLOAT16: u32 = TypeId::FLOAT16 as u32; pub const FLOAT32: u32 = TypeId::FLOAT32 as u32; pub const FLOAT64: u32 = TypeId::FLOAT64 as u32; @@ -544,17 +544,17 @@ pub fn format_type_id(type_id: u32) -> String { 2 => "INT8", 3 => "INT16", 4 => "INT32", - 5 => "VAR32", + 5 => "VARINT32", 6 => "INT64", - 7 => "VAR64", - 8 => "H64", + 7 => "VARINT64", + 8 => "TAGGED_INT64", 9 => "UINT8", 10 => "UINT16", 11 => "UINT32", - 12 => "VARU32", + 12 => "VAR_UINT32", 13 => "UINT64", - 14 => "VARU64", - 15 => "HU64", + 14 => "VAR_UINT64", + 15 => "TAGGED_UINT64", 16 => "FLOAT16", 17 => "FLOAT32", 18 => "FLOAT64", diff --git a/rust/fory-derive/src/object/util.rs b/rust/fory-derive/src/object/util.rs index 669c751366..5b807145a1 100644 --- a/rust/fory-derive/src/object/util.rs +++ b/rust/fory-derive/src/object/util.rs @@ -877,10 +877,10 @@ fn get_primitive_type_size(type_id_num: u32) -> i32 { TypeId::INT8 => 1, TypeId::INT16 => 2, TypeId::INT32 => 4, - TypeId::VAR32 => 4, + TypeId::VARINT32 => 4, TypeId::INT64 => 8, - TypeId::VAR64 => 8, - TypeId::H64 => 8, + TypeId::VARINT64 => 8, + TypeId::TAGGED_INT64 => 8, TypeId::FLOAT16 => 2, TypeId::FLOAT32 => 4, TypeId::FLOAT64 => 8, @@ -888,10 +888,10 @@ fn get_primitive_type_size(type_id_num: u32) -> i32 { TypeId::UINT8 => 1, TypeId::UINT16 => 2, TypeId::UINT32 => 4, - TypeId::VARU32 => 4, + TypeId::VAR_UINT32 => 4, TypeId::UINT64 => 8, - TypeId::VARU64 => 8, - TypeId::HU64 => 8, + TypeId::VAR_UINT64 => 8, + TypeId::TAGGED_UINT64 => 8, TypeId::U128 => 16, TypeId::USIZE => std::mem::size_of::() as i32, TypeId::ISIZE => std::mem::size_of::() as i32, @@ -905,9 +905,9 @@ fn is_compress(type_id: u32) -> bool { [ TypeId::INT32 as u32, TypeId::INT64 as u32, - TypeId::VAR32 as u32, - TypeId::VAR64 as u32, - TypeId::H64 as u32, + TypeId::VARINT32 as u32, + TypeId::VARINT64 as u32, + TypeId::TAGGED_INT64 as u32, ] .contains(&type_id) } From 68512fca123cc51cf9fb53574cd45d7639bcff53 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 7 Jan 2026 15:49:41 +0800 Subject: [PATCH 08/44] fix rust error --- AGENTS.md | 2 +- rust/fory-core/src/meta/type_meta.rs | 29 +++++++++++++++++++++++++++ rust/fory-core/src/serializer/list.rs | 18 +++++++++++++---- rust/fory-core/src/types.rs | 10 ++++++++- rust/fory-derive/src/object/util.rs | 14 ++++++++----- 5 files changed, 62 insertions(+), 11 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 5e7801858d..928c8da619 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -216,7 +216,7 @@ Run Rust xlang tests: cd java mvn -T16 install -DskipTests cd fory-core -FORY_RUST_JAVA_CI=1 ENABLE_FORY_DEBUG_OUTPUT=1 mvn test -Dtest=org.apache.fory.xlang.RustXlangTest +RUST_BACKTRACE=1 FORY_PANIC_ON_ERROR=1 FORY_RUST_JAVA_CI=1 ENABLE_FORY_DEBUG_OUTPUT=1 mvn test -Dtest=org.apache.fory.xlang.RustXlangTest ``` ### JavaScript/TypeScript Development diff --git a/rust/fory-core/src/meta/type_meta.rs b/rust/fory-core/src/meta/type_meta.rs index 9d545c32bb..6823d18927 100644 --- a/rust/fory-core/src/meta/type_meta.rs +++ b/rust/fory-core/src/meta/type_meta.rs @@ -749,8 +749,21 @@ impl TypeMeta { } fn assign_field_ids(type_info_current: &TypeInfo, field_infos: &mut [FieldInfo]) { + if crate::util::ENABLE_FORY_DEBUG_OUTPUT { + eprintln!("[fory-debug] assign_field_ids called for type: {:?}", type_info_current.get_type_name()); + for f in field_infos.iter() { + eprintln!("[fory-debug] remote field before assign: name={}, field_id={}, type={:?}", + f.field_name, f.field_id, f.field_type); + } + } let type_meta = type_info_current.get_type_meta(); let local_field_infos = type_meta.get_field_infos(); + if crate::util::ENABLE_FORY_DEBUG_OUTPUT { + for f in local_field_infos.iter() { + eprintln!("[fory-debug] local field: name={}, field_id={}, type={:?}", + f.field_name, f.field_id, f.field_type); + } + } // Build maps for both name-based and ID-based lookup. // The value is the SORTED INDEX (position in local_field_infos), not the field's ID attribute. @@ -792,13 +805,29 @@ impl TypeMeta { // Use FieldType comparison which normalizes type IDs for cross-language // schema evolution (e.g., UNKNOWN=0 matches STRUCT variants) if field.field_type != local_info.field_type { + if crate::util::ENABLE_FORY_DEBUG_OUTPUT { + eprintln!( + "[fory-debug] field type mismatch: name={}, remote_type={:?}, local_type={:?}", + field.field_name, field.field_type, local_info.field_type + ); + } field.field_id = -1; // Type mismatch, skip } else { // Assign SORTED INDEX for matching in generated code field.field_id = sorted_index as i16; + if crate::util::ENABLE_FORY_DEBUG_OUTPUT { + eprintln!("[fory-debug] matched field: name={}, assigned_field_id={}", + field.field_name, field.field_id); + } } } None => { + if crate::util::ENABLE_FORY_DEBUG_OUTPUT { + eprintln!( + "[fory-debug] no local match for field: name={}", + field.field_name + ); + } field.field_id = -1; // No match, skip } } diff --git a/rust/fory-core/src/serializer/list.rs b/rust/fory-core/src/serializer/list.rs index ed9a29b945..7bb84cd674 100644 --- a/rust/fory-core/src/serializer/list.rs +++ b/rust/fory-core/src/serializer/list.rs @@ -39,14 +39,18 @@ pub(super) fn get_primitive_type_id() -> TypeId { TypeId::BOOL => TypeId::BOOL_ARRAY, TypeId::INT8 => TypeId::INT8_ARRAY, TypeId::INT16 => TypeId::INT16_ARRAY, - TypeId::INT32 => TypeId::INT32_ARRAY, - TypeId::INT64 => TypeId::INT64_ARRAY, + // Handle both INT32 and VARINT32 (i32 uses VARINT32 in xlang mode) + TypeId::INT32 | TypeId::VARINT32 => TypeId::INT32_ARRAY, + // Handle INT64, VARINT64, and TAGGED_INT64 (i64 uses VARINT64 in xlang mode) + TypeId::INT64 | TypeId::VARINT64 | TypeId::TAGGED_INT64 => TypeId::INT64_ARRAY, TypeId::FLOAT32 => TypeId::FLOAT32_ARRAY, TypeId::FLOAT64 => TypeId::FLOAT64_ARRAY, TypeId::UINT8 => TypeId::BINARY, TypeId::UINT16 => TypeId::UINT16_ARRAY, - TypeId::UINT32 => TypeId::UINT32_ARRAY, - TypeId::UINT64 => TypeId::UINT64_ARRAY, + // Handle both UINT32 and VAR_UINT32 (u32 uses VAR_UINT32 in xlang mode) + TypeId::UINT32 | TypeId::VAR_UINT32 => TypeId::UINT32_ARRAY, + // Handle UINT64, VAR_UINT64, and TAGGED_UINT64 (u64 uses VAR_UINT64 in xlang mode) + TypeId::UINT64 | TypeId::VAR_UINT64 | TypeId::TAGGED_UINT64 => TypeId::UINT64_ARRAY, TypeId::U128 => TypeId::U128_ARRAY, TypeId::INT128 => TypeId::INT128_ARRAY, TypeId::USIZE => TypeId::USIZE_ARRAY, @@ -66,14 +70,20 @@ pub(super) fn is_primitive_type() -> bool { | TypeId::INT8 | TypeId::INT16 | TypeId::INT32 + | TypeId::VARINT32 | TypeId::INT64 + | TypeId::VARINT64 + | TypeId::TAGGED_INT64 | TypeId::INT128 | TypeId::FLOAT32 | TypeId::FLOAT64 | TypeId::UINT8 | TypeId::UINT16 | TypeId::UINT32 + | TypeId::VAR_UINT32 | TypeId::UINT64 + | TypeId::VAR_UINT64 + | TypeId::TAGGED_UINT64 | TypeId::U128, ) } diff --git a/rust/fory-core/src/types.rs b/rust/fory-core/src/types.rs index 879998eaa3..aae3443311 100644 --- a/rust/fory-core/src/types.rs +++ b/rust/fory-core/src/types.rs @@ -298,22 +298,30 @@ pub static BASIC_TYPES: [TypeId; 33] = [ TypeId::USIZE_ARRAY, ]; -pub static PRIMITIVE_TYPES: [u32; 14] = [ +pub static PRIMITIVE_TYPES: [u32; 22] = [ TypeId::BOOL as u32, TypeId::INT8 as u32, TypeId::INT16 as u32, TypeId::INT32 as u32, + TypeId::VARINT32 as u32, TypeId::INT64 as u32, + TypeId::VARINT64 as u32, + TypeId::TAGGED_INT64 as u32, TypeId::UINT8 as u32, TypeId::UINT16 as u32, TypeId::UINT32 as u32, + TypeId::VAR_UINT32 as u32, TypeId::UINT64 as u32, + TypeId::VAR_UINT64 as u32, + TypeId::TAGGED_UINT64 as u32, + TypeId::FLOAT16 as u32, TypeId::FLOAT32 as u32, TypeId::FLOAT64 as u32, // Rust-specific TypeId::U128 as u32, TypeId::INT128 as u32, TypeId::USIZE as u32, + TypeId::ISIZE as u32, ]; pub static PRIMITIVE_ARRAY_TYPES: [u32; 15] = [ diff --git a/rust/fory-derive/src/object/util.rs b/rust/fory-derive/src/object/util.rs index 5b807145a1..3b7746e745 100644 --- a/rust/fory-derive/src/object/util.rs +++ b/rust/fory-derive/src/object/util.rs @@ -689,14 +689,18 @@ fn get_primitive_type_id(ty: &str) -> u32 { "bool" => TypeId::BOOL as u32, "i8" => TypeId::INT8 as u32, "i16" => TypeId::INT16 as u32, - "i32" => TypeId::INT32 as u32, - "i64" => TypeId::INT64 as u32, + // Use VARINT32 for i32 to match Java xlang mode and Rust type resolver registration + "i32" => TypeId::VARINT32 as u32, + // Use VARINT64 for i64 to match Java xlang mode and Rust type resolver registration + "i64" => TypeId::VARINT64 as u32, "f32" => TypeId::FLOAT32 as u32, "f64" => TypeId::FLOAT64 as u32, "u8" => TypeId::UINT8 as u32, "u16" => TypeId::UINT16 as u32, - "u32" => TypeId::UINT32 as u32, - "u64" => TypeId::UINT64 as u32, + // Use VAR_UINT32 for u32 to match Rust type resolver registration + "u32" => TypeId::VAR_UINT32 as u32, + // Use VAR_UINT64 for u64 to match Rust type resolver registration + "u64" => TypeId::VAR_UINT64 as u32, "u128" => TypeId::U128 as u32, "i128" => TypeId::INT128 as u32, _ => unreachable!("Unknown primitive type: {}", ty), @@ -1214,7 +1218,7 @@ pub(crate) fn gen_struct_version_hash_ts(fields: &[&Field]) -> TokenStream { const VERSION_HASH: i32 = #version_hash; if fory_core::util::ENABLE_FORY_DEBUG_OUTPUT { println!( - "[fory-debug] struct {} version fingerprint=\"{}\" hash={}", + "[rust][fory-debug] struct {} version fingerprint=\"{}\" hash={}", std::any::type_name::(), #fingerprint, VERSION_HASH From cd34fac699ff000e8070d7b109c1cee74d25d412 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 7 Jan 2026 16:14:27 +0800 Subject: [PATCH 09/44] fix go unsigned support --- cpp/fory/serialization/type_resolver.cc | 16 +- go/fory/map_primitive.go | 8 +- go/fory/reader.go | 22 +- go/fory/struct.go | 195 +++++++++--------- go/fory/type_resolver.go | 72 +++---- go/fory/types.go | 254 +++++++++++++----------- go/fory/writer.go | 22 +- 7 files changed, 310 insertions(+), 279 deletions(-) diff --git a/cpp/fory/serialization/type_resolver.cc b/cpp/fory/serialization/type_resolver.cc index afea138b7e..2a328be46d 100644 --- a/cpp/fory/serialization/type_resolver.cc +++ b/cpp/fory/serialization/type_resolver.cc @@ -606,16 +606,24 @@ int32_t get_primitive_type_size(uint32_t type_id) { switch (static_cast(type_id)) { case TypeId::BOOL: case TypeId::INT8: + case TypeId::UINT8: return 1; case TypeId::INT16: + case TypeId::UINT16: case TypeId::FLOAT16: return 2; case TypeId::INT32: case TypeId::VARINT32: + case TypeId::UINT32: + case TypeId::VAR_UINT32: case TypeId::FLOAT32: return 4; case TypeId::INT64: case TypeId::VARINT64: + case TypeId::TAGGED_INT64: + case TypeId::UINT64: + case TypeId::VAR_UINT64: + case TypeId::TAGGED_UINT64: case TypeId::FLOAT64: return 8; default: @@ -627,7 +635,13 @@ bool is_compress(uint32_t type_id) { return type_id == static_cast(TypeId::INT32) || type_id == static_cast(TypeId::INT64) || type_id == static_cast(TypeId::VARINT32) || - type_id == static_cast(TypeId::VARINT64); + type_id == static_cast(TypeId::VARINT64) || + type_id == static_cast(TypeId::TAGGED_INT64) || + type_id == static_cast(TypeId::UINT32) || + type_id == static_cast(TypeId::UINT64) || + type_id == static_cast(TypeId::VAR_UINT32) || + type_id == static_cast(TypeId::VAR_UINT64) || + type_id == static_cast(TypeId::TAGGED_UINT64); } // Numeric field sorter (for primitive fields) diff --git a/go/fory/map_primitive.go b/go/fory/map_primitive.go index 53c04cb428..cb7c2f9d8e 100644 --- a/go/fory/map_primitive.go +++ b/go/fory/map_primitive.go @@ -154,7 +154,7 @@ func writeMapStringInt64(buf *ByteBuffer, m map[string]int64, hasGenerics bool) } else { buf.WriteUint8(0) buf.WriteUint8(uint8(chunkSize)) - buf.WriteVaruint32Small7(uint32(STRING)) // key type + buf.WriteVaruint32Small7(uint32(STRING)) // key type buf.WriteVaruint32Small7(uint32(VARINT64)) // value type } @@ -228,7 +228,7 @@ func writeMapStringInt32(buf *ByteBuffer, m map[string]int32, hasGenerics bool) } else { buf.WriteUint8(0) buf.WriteUint8(uint8(chunkSize)) - buf.WriteVaruint32Small7(uint32(STRING)) // key type + buf.WriteVaruint32Small7(uint32(STRING)) // key type buf.WriteVaruint32Small7(uint32(VARINT32)) // value type } @@ -302,7 +302,7 @@ func writeMapStringInt(buf *ByteBuffer, m map[string]int, hasGenerics bool) { } else { buf.WriteUint8(0) buf.WriteUint8(uint8(chunkSize)) - buf.WriteVaruint32Small7(uint32(STRING)) // key type + buf.WriteVaruint32Small7(uint32(STRING)) // key type buf.WriteVaruint32Small7(uint32(VARINT64)) // value type (int serialized as varint64) } @@ -376,7 +376,7 @@ func writeMapStringFloat64(buf *ByteBuffer, m map[string]float64, hasGenerics bo } else { buf.WriteUint8(0) buf.WriteUint8(uint8(chunkSize)) - buf.WriteVaruint32Small7(uint32(STRING)) // key type + buf.WriteVaruint32Small7(uint32(STRING)) // key type buf.WriteVaruint32Small7(uint32(FLOAT64)) // value type } diff --git a/go/fory/reader.go b/go/fory/reader.go index 2248947510..1b65fc19e5 100644 --- a/go/fory/reader.go +++ b/go/fory/reader.go @@ -180,31 +180,31 @@ func (c *ReadContext) ReadTypeId() TypeId { return TypeId(c.buffer.ReadVaruint32Small7(c.Err())) } -// readFast reads a value using fast path based on StaticTypeId -func (c *ReadContext) readFast(ptr unsafe.Pointer, ct StaticTypeId) { +// readFast reads a value using fast path based on DispatchId +func (c *ReadContext) readFast(ptr unsafe.Pointer, ct DispatchId) { err := c.Err() switch ct { - case ConcreteTypeBool: + case BoolDispatchId: *(*bool)(ptr) = c.buffer.ReadBool(err) - case ConcreteTypeInt8: + case Int8DispatchId: *(*int8)(ptr) = int8(c.buffer.ReadByte(err)) - case ConcreteTypeInt16: + case Int16DispatchId: *(*int16)(ptr) = c.buffer.ReadInt16(err) - case ConcreteTypeInt32: + case Int32DispatchId: *(*int32)(ptr) = c.buffer.ReadVarint32(err) - case ConcreteTypeInt: + case IntDispatchId: if strconv.IntSize == 64 { *(*int)(ptr) = int(c.buffer.ReadVarint64(err)) } else { *(*int)(ptr) = int(c.buffer.ReadVarint32(err)) } - case ConcreteTypeInt64: + case Int64DispatchId: *(*int64)(ptr) = c.buffer.ReadVarint64(err) - case ConcreteTypeFloat32: + case Float32DispatchId: *(*float32)(ptr) = c.buffer.ReadFloat32(err) - case ConcreteTypeFloat64: + case Float64DispatchId: *(*float64)(ptr) = c.buffer.ReadFloat64(err) - case ConcreteTypeString: + case StringDispatchId: *(*string)(ptr) = readString(c.buffer, err) } } diff --git a/go/fory/struct.go b/go/fory/struct.go index e6b7e99f13..b81e1857b7 100644 --- a/go/fory/struct.go +++ b/go/fory/struct.go @@ -38,7 +38,7 @@ type FieldInfo struct { Name string Offset uintptr Type reflect.Type - StaticId StaticTypeId + StaticId DispatchId TypeId TypeId // Fory type ID for the serializer Serializer Serializer Referencable bool @@ -332,27 +332,27 @@ func (s *structSerializer) WriteData(ctx *WriteContext, value reflect.Value) { fieldPtr := unsafe.Add(ptr, field.Offset) bufOffset := baseOffset + field.WriteOffset switch field.StaticId { - case ConcreteTypeBool: + case BoolDispatchId: if *(*bool)(fieldPtr) { data[bufOffset] = 1 } else { data[bufOffset] = 0 } - case ConcreteTypeInt8: + case Int8DispatchId: data[bufOffset] = *(*byte)(fieldPtr) - case ConcreteTypeInt16: + case Int16DispatchId: if isLittleEndian { *(*int16)(unsafe.Pointer(&data[bufOffset])) = *(*int16)(fieldPtr) } else { binary.LittleEndian.PutUint16(data[bufOffset:], uint16(*(*int16)(fieldPtr))) } - case ConcreteTypeFloat32: + case Float32DispatchId: if isLittleEndian { *(*float32)(unsafe.Pointer(&data[bufOffset])) = *(*float32)(fieldPtr) } else { binary.LittleEndian.PutUint32(data[bufOffset:], math.Float32bits(*(*float32)(fieldPtr))) } - case ConcreteTypeFloat64: + case Float64DispatchId: if isLittleEndian { *(*float64)(unsafe.Pointer(&data[bufOffset])) = *(*float64)(fieldPtr) } else { @@ -367,15 +367,15 @@ func (s *structSerializer) WriteData(ctx *WriteContext, value reflect.Value) { for _, field := range s.fixedFields { fieldValue := value.Field(field.FieldIndex) switch field.StaticId { - case ConcreteTypeBool: + case BoolDispatchId: buf.WriteBool(fieldValue.Bool()) - case ConcreteTypeInt8: + case Int8DispatchId: buf.WriteByte_(byte(fieldValue.Int())) - case ConcreteTypeInt16: + case Int16DispatchId: buf.WriteInt16(int16(fieldValue.Int())) - case ConcreteTypeFloat32: + case Float32DispatchId: buf.WriteFloat32(float32(fieldValue.Float())) - case ConcreteTypeFloat64: + case Float64DispatchId: buf.WriteFloat64(fieldValue.Float()) } } @@ -392,11 +392,11 @@ func (s *structSerializer) WriteData(ctx *WriteContext, value reflect.Value) { for _, field := range s.varintFields { fieldPtr := unsafe.Add(ptr, field.Offset) switch field.StaticId { - case ConcreteTypeInt32: + case Int32DispatchId: offset += buf.UnsafePutVarInt32(offset, *(*int32)(fieldPtr)) - case ConcreteTypeInt64: + case Int64DispatchId: offset += buf.UnsafePutVarInt64(offset, *(*int64)(fieldPtr)) - case ConcreteTypeInt: + case IntDispatchId: offset += buf.UnsafePutVarInt64(offset, int64(*(*int)(fieldPtr))) } } @@ -407,9 +407,9 @@ func (s *structSerializer) WriteData(ctx *WriteContext, value reflect.Value) { for _, field := range s.varintFields { fieldValue := value.Field(field.FieldIndex) switch field.StaticId { - case ConcreteTypeInt32: + case Int32DispatchId: buf.WriteVarint32(int32(fieldValue.Int())) - case ConcreteTypeInt64, ConcreteTypeInt: + case Int64DispatchId, IntDispatchId: buf.WriteVarint64(fieldValue.Int()) } } @@ -434,7 +434,7 @@ func (s *structSerializer) writeRemainingField(ctx *WriteContext, ptr unsafe.Poi if ptr != nil { fieldPtr := unsafe.Add(ptr, field.Offset) switch field.StaticId { - case ConcreteTypeString: + case StringDispatchId: if field.RefMode == RefModeTracking { break // Fall through to slow path } @@ -444,113 +444,113 @@ func (s *structSerializer) writeRemainingField(ctx *WriteContext, ptr unsafe.Poi } ctx.WriteString(*(*string)(fieldPtr)) return - case ConcreteTypeEnum: + case EnumDispatchId: // Enums don't track refs - always use fast path writeEnumField(ctx, field, value.Field(field.FieldIndex)) return - case ConcreteTypeStringSlice: + case StringSliceDispatchId: if field.RefMode == RefModeTracking { break } ctx.WriteStringSlice(*(*[]string)(fieldPtr), field.RefMode, false, true) return - case ConcreteTypeBoolSlice: + case BoolSliceDispatchId: if field.RefMode == RefModeTracking { break } ctx.WriteBoolSlice(*(*[]bool)(fieldPtr), field.RefMode, false) return - case ConcreteTypeInt8Slice: + case Int8SliceDispatchId: if field.RefMode == RefModeTracking { break } ctx.WriteInt8Slice(*(*[]int8)(fieldPtr), field.RefMode, false) return - case ConcreteTypeByteSlice: + case ByteSliceDispatchId: if field.RefMode == RefModeTracking { break } ctx.WriteByteSlice(*(*[]byte)(fieldPtr), field.RefMode, false) return - case ConcreteTypeInt16Slice: + case Int16SliceDispatchId: if field.RefMode == RefModeTracking { break } ctx.WriteInt16Slice(*(*[]int16)(fieldPtr), field.RefMode, false) return - case ConcreteTypeInt32Slice: + case Int32SliceDispatchId: if field.RefMode == RefModeTracking { break } ctx.WriteInt32Slice(*(*[]int32)(fieldPtr), field.RefMode, false) return - case ConcreteTypeInt64Slice: + case Int64SliceDispatchId: if field.RefMode == RefModeTracking { break } ctx.WriteInt64Slice(*(*[]int64)(fieldPtr), field.RefMode, false) return - case ConcreteTypeIntSlice: + case IntSliceDispatchId: if field.RefMode == RefModeTracking { break } ctx.WriteIntSlice(*(*[]int)(fieldPtr), field.RefMode, false) return - case ConcreteTypeUintSlice: + case UintSliceDispatchId: if field.RefMode == RefModeTracking { break } ctx.WriteUintSlice(*(*[]uint)(fieldPtr), field.RefMode, false) return - case ConcreteTypeFloat32Slice: + case Float32SliceDispatchId: if field.RefMode == RefModeTracking { break } ctx.WriteFloat32Slice(*(*[]float32)(fieldPtr), field.RefMode, false) return - case ConcreteTypeFloat64Slice: + case Float64SliceDispatchId: if field.RefMode == RefModeTracking { break } ctx.WriteFloat64Slice(*(*[]float64)(fieldPtr), field.RefMode, false) return - case ConcreteTypeStringStringMap: + case StringStringMapDispatchId: if field.RefMode == RefModeTracking { break } ctx.WriteStringStringMap(*(*map[string]string)(fieldPtr), field.RefMode, false) return - case ConcreteTypeStringInt64Map: + case StringInt64MapDispatchId: if field.RefMode == RefModeTracking { break } ctx.WriteStringInt64Map(*(*map[string]int64)(fieldPtr), field.RefMode, false) return - case ConcreteTypeStringInt32Map: + case StringInt32MapDispatchId: if field.RefMode == RefModeTracking { break } ctx.WriteStringInt32Map(*(*map[string]int32)(fieldPtr), field.RefMode, false) return - case ConcreteTypeStringIntMap: + case StringIntMapDispatchId: if field.RefMode == RefModeTracking { break } ctx.WriteStringIntMap(*(*map[string]int)(fieldPtr), field.RefMode, false) return - case ConcreteTypeStringFloat64Map: + case StringFloat64MapDispatchId: if field.RefMode == RefModeTracking { break } ctx.WriteStringFloat64Map(*(*map[string]float64)(fieldPtr), field.RefMode, false) return - case ConcreteTypeStringBoolMap: + case StringBoolMapDispatchId: // NOTE: map[string]bool is used to represent SETs in Go xlang mode. // We CANNOT use the fast path here because it writes MAP format, // but the data should be written in SET format. Fall through to slow path // which uses setSerializer to correctly write the SET format. break - case ConcreteTypeIntIntMap: + case IntIntMapDispatchId: if field.RefMode == RefModeTracking { break } @@ -672,23 +672,23 @@ func (s *structSerializer) ReadData(ctx *ReadContext, type_ reflect.Type, value fieldPtr := unsafe.Add(ptr, field.Offset) bufOffset := baseOffset + field.WriteOffset switch field.StaticId { - case ConcreteTypeBool: + case BoolDispatchId: *(*bool)(fieldPtr) = data[bufOffset] != 0 - case ConcreteTypeInt8: + case Int8DispatchId: *(*int8)(fieldPtr) = int8(data[bufOffset]) - case ConcreteTypeInt16: + case Int16DispatchId: if isLittleEndian { *(*int16)(fieldPtr) = *(*int16)(unsafe.Pointer(&data[bufOffset])) } else { *(*int16)(fieldPtr) = int16(binary.LittleEndian.Uint16(data[bufOffset:])) } - case ConcreteTypeFloat32: + case Float32DispatchId: if isLittleEndian { *(*float32)(fieldPtr) = *(*float32)(unsafe.Pointer(&data[bufOffset])) } else { *(*float32)(fieldPtr) = math.Float32frombits(binary.LittleEndian.Uint32(data[bufOffset:])) } - case ConcreteTypeFloat64: + case Float64DispatchId: if isLittleEndian { *(*float64)(fieldPtr) = *(*float64)(unsafe.Pointer(&data[bufOffset])) } else { @@ -706,11 +706,11 @@ func (s *structSerializer) ReadData(ctx *ReadContext, type_ reflect.Type, value for _, field := range s.varintFields { fieldPtr := unsafe.Add(ptr, field.Offset) switch field.StaticId { - case ConcreteTypeInt32: + case Int32DispatchId: *(*int32)(fieldPtr) = buf.UnsafeReadVarint32() - case ConcreteTypeInt64: + case Int64DispatchId: *(*int64)(fieldPtr) = buf.UnsafeReadVarint64() - case ConcreteTypeInt: + case IntDispatchId: *(*int)(fieldPtr) = int(buf.UnsafeReadVarint64()) } } @@ -720,11 +720,11 @@ func (s *structSerializer) ReadData(ctx *ReadContext, type_ reflect.Type, value for _, field := range s.varintFields { fieldPtr := unsafe.Add(ptr, field.Offset) switch field.StaticId { - case ConcreteTypeInt32: + case Int32DispatchId: *(*int32)(fieldPtr) = buf.ReadVarint32(err) - case ConcreteTypeInt64: + case Int64DispatchId: *(*int64)(fieldPtr) = buf.ReadVarint64(err) - case ConcreteTypeInt: + case IntDispatchId: *(*int)(fieldPtr) = int(buf.ReadVarint64(err)) } } @@ -747,7 +747,7 @@ func (s *structSerializer) readRemainingField(ctx *ReadContext, ptr unsafe.Point if ptr != nil { fieldPtr := unsafe.Add(ptr, field.Offset) switch field.StaticId { - case ConcreteTypeString: + case StringDispatchId: if field.RefMode == RefModeTracking { break // Fall through to slow path for ref tracking } @@ -761,114 +761,114 @@ func (s *structSerializer) readRemainingField(ctx *ReadContext, ptr unsafe.Point } *(*string)(fieldPtr) = ctx.ReadString() return - case ConcreteTypeEnum: + case EnumDispatchId: // Enums don't track refs - always use fast path fieldValue := value.Field(field.FieldIndex) readEnumField(ctx, field, fieldValue) return - case ConcreteTypeStringSlice: + case StringSliceDispatchId: if field.RefMode == RefModeTracking { break } *(*[]string)(fieldPtr) = ctx.ReadStringSlice(field.RefMode, false) return - case ConcreteTypeBoolSlice: + case BoolSliceDispatchId: if field.RefMode == RefModeTracking { break } *(*[]bool)(fieldPtr) = ctx.ReadBoolSlice(field.RefMode, false) return - case ConcreteTypeInt8Slice: + case Int8SliceDispatchId: if field.RefMode == RefModeTracking { break } *(*[]int8)(fieldPtr) = ctx.ReadInt8Slice(field.RefMode, false) return - case ConcreteTypeByteSlice: + case ByteSliceDispatchId: if field.RefMode == RefModeTracking { break } *(*[]byte)(fieldPtr) = ctx.ReadByteSlice(field.RefMode, false) return - case ConcreteTypeInt16Slice: + case Int16SliceDispatchId: if field.RefMode == RefModeTracking { break } *(*[]int16)(fieldPtr) = ctx.ReadInt16Slice(field.RefMode, false) return - case ConcreteTypeInt32Slice: + case Int32SliceDispatchId: if field.RefMode == RefModeTracking { break } *(*[]int32)(fieldPtr) = ctx.ReadInt32Slice(field.RefMode, false) return - case ConcreteTypeInt64Slice: + case Int64SliceDispatchId: if field.RefMode == RefModeTracking { break } *(*[]int64)(fieldPtr) = ctx.ReadInt64Slice(field.RefMode, false) return - case ConcreteTypeIntSlice: + case IntSliceDispatchId: if field.RefMode == RefModeTracking { break } *(*[]int)(fieldPtr) = ctx.ReadIntSlice(field.RefMode, false) return - case ConcreteTypeUintSlice: + case UintSliceDispatchId: if field.RefMode == RefModeTracking { break } *(*[]uint)(fieldPtr) = ctx.ReadUintSlice(field.RefMode, false) return - case ConcreteTypeFloat32Slice: + case Float32SliceDispatchId: if field.RefMode == RefModeTracking { break } *(*[]float32)(fieldPtr) = ctx.ReadFloat32Slice(field.RefMode, false) return - case ConcreteTypeFloat64Slice: + case Float64SliceDispatchId: if field.RefMode == RefModeTracking { break } *(*[]float64)(fieldPtr) = ctx.ReadFloat64Slice(field.RefMode, false) return - case ConcreteTypeStringStringMap: + case StringStringMapDispatchId: if field.RefMode == RefModeTracking { break } *(*map[string]string)(fieldPtr) = ctx.ReadStringStringMap(field.RefMode, false) return - case ConcreteTypeStringInt64Map: + case StringInt64MapDispatchId: if field.RefMode == RefModeTracking { break } *(*map[string]int64)(fieldPtr) = ctx.ReadStringInt64Map(field.RefMode, false) return - case ConcreteTypeStringInt32Map: + case StringInt32MapDispatchId: if field.RefMode == RefModeTracking { break } *(*map[string]int32)(fieldPtr) = ctx.ReadStringInt32Map(field.RefMode, false) return - case ConcreteTypeStringIntMap: + case StringIntMapDispatchId: if field.RefMode == RefModeTracking { break } *(*map[string]int)(fieldPtr) = ctx.ReadStringIntMap(field.RefMode, false) return - case ConcreteTypeStringFloat64Map: + case StringFloat64MapDispatchId: if field.RefMode == RefModeTracking { break } *(*map[string]float64)(fieldPtr) = ctx.ReadStringFloat64Map(field.RefMode, false) return - case ConcreteTypeStringBoolMap: + case StringBoolMapDispatchId: // NOTE: map[string]bool is used to represent SETs in Go xlang mode. // We CANNOT use the fast path here because it reads MAP format, // but the data is actually in SET format. Fall through to slow path // which uses setSerializer to correctly read the SET format. break - case ConcreteTypeIntIntMap: + case IntIntMapDispatchId: if field.RefMode == RefModeTracking { break } @@ -912,15 +912,15 @@ func (s *structSerializer) readFieldsInOrder(ctx *ReadContext, value reflect.Val if canUseUnsafe && isFixedSizePrimitive(field.StaticId, field.Referencable) { fieldPtr := unsafe.Add(ptr, field.Offset) switch field.StaticId { - case ConcreteTypeBool: + case BoolDispatchId: *(*bool)(fieldPtr) = buf.ReadBool(err) - case ConcreteTypeInt8: + case Int8DispatchId: *(*int8)(fieldPtr) = buf.ReadInt8(err) - case ConcreteTypeInt16: + case Int16DispatchId: *(*int16)(fieldPtr) = buf.ReadInt16(err) - case ConcreteTypeFloat32: + case Float32DispatchId: *(*float32)(fieldPtr) = buf.ReadFloat32(err) - case ConcreteTypeFloat64: + case Float64DispatchId: *(*float64)(fieldPtr) = buf.ReadFloat64(err) } continue @@ -931,11 +931,11 @@ func (s *structSerializer) readFieldsInOrder(ctx *ReadContext, value reflect.Val if canUseUnsafe && isVarintPrimitive(field.StaticId, field.Referencable) && !fieldHasNonPrimitiveSerializer(field) { fieldPtr := unsafe.Add(ptr, field.Offset) switch field.StaticId { - case ConcreteTypeInt32: + case Int32DispatchId: *(*int32)(fieldPtr) = buf.ReadVarint32(err) - case ConcreteTypeInt64: + case Int64DispatchId: *(*int64)(fieldPtr) = buf.ReadVarint64(err) - case ConcreteTypeInt: + case IntDispatchId: *(*int)(fieldPtr) = int(buf.ReadVarint64(err)) } continue @@ -947,15 +947,15 @@ func (s *structSerializer) readFieldsInOrder(ctx *ReadContext, value reflect.Val // Slow path for primitives when not addressable if !canUseUnsafe && isFixedSizePrimitive(field.StaticId, field.Referencable) { switch field.StaticId { - case ConcreteTypeBool: + case BoolDispatchId: fieldValue.SetBool(buf.ReadBool(err)) - case ConcreteTypeInt8: + case Int8DispatchId: fieldValue.SetInt(int64(buf.ReadInt8(err))) - case ConcreteTypeInt16: + case Int16DispatchId: fieldValue.SetInt(int64(buf.ReadInt16(err))) - case ConcreteTypeFloat32: + case Float32DispatchId: fieldValue.SetFloat(float64(buf.ReadFloat32(err))) - case ConcreteTypeFloat64: + case Float64DispatchId: fieldValue.SetFloat(buf.ReadFloat64(err)) } continue @@ -963,9 +963,9 @@ func (s *structSerializer) readFieldsInOrder(ctx *ReadContext, value reflect.Val if !canUseUnsafe && isVarintPrimitive(field.StaticId, field.Referencable) && !fieldHasNonPrimitiveSerializer(field) { switch field.StaticId { - case ConcreteTypeInt32: + case Int32DispatchId: fieldValue.SetInt(int64(buf.ReadVarint32(err))) - case ConcreteTypeInt64, ConcreteTypeInt: + case Int64DispatchId, IntDispatchId: fieldValue.SetInt(buf.ReadVarint64(err)) } continue @@ -1125,13 +1125,13 @@ func (s *structSerializer) initFieldsFromTypeResolver(typeResolver *TypeResolver writeType := typeResolver.Compatible() && isStructField(fieldType) // Pre-compute StaticId, with special handling for enum fields - staticId := GetStaticTypeId(fieldType) + staticId := GetDispatchId(fieldType) if fieldSerializer != nil { if _, ok := fieldSerializer.(*enumSerializer); ok { - staticId = ConcreteTypeEnum + staticId = EnumDispatchId } else if ptrSer, ok := fieldSerializer.(*ptrToValueSerializer); ok { if _, ok := ptrSer.valueSerializer.(*enumSerializer); ok { - staticId = ConcreteTypeEnum + staticId = EnumDispatchId } } } @@ -1215,12 +1215,12 @@ func (s *structSerializer) groupFields() { s.remainingFields = append(s.remainingFields, field) } else if isFixedSizePrimitive(field.StaticId, field.Referencable) { // Compute FixedSize and WriteOffset for this field - field.FixedSize = getFixedSizeByStaticId(field.StaticId) + field.FixedSize = getFixedSizeByDispatchId(field.StaticId) field.WriteOffset = s.fixedSize s.fixedSize += field.FixedSize s.fixedFields = append(s.fixedFields, field) } else if isVarintPrimitive(field.StaticId, field.Referencable) { - s.maxVarintSize += getVarintMaxSizeByStaticId(field.StaticId) + s.maxVarintSize += getVarintMaxSizeByDispatchId(field.StaticId) s.varintFields = append(s.varintFields, field) } else { s.remainingFields = append(s.remainingFields, field) @@ -1256,13 +1256,13 @@ func (s *structSerializer) initFieldsFromDefsWithResolver(typeResolver *TypeReso writeType := typeResolver.Compatible() && isStructField(remoteType) // Pre-compute StaticId, with special handling for enum fields - staticId := GetStaticTypeId(remoteType) + staticId := GetDispatchId(remoteType) if fieldSerializer != nil { if _, ok := fieldSerializer.(*enumSerializer); ok { - staticId = ConcreteTypeEnum + staticId = EnumDispatchId } else if ptrSer, ok := fieldSerializer.(*ptrToValueSerializer); ok { if _, ok := ptrSer.valueSerializer.(*enumSerializer); ok { - staticId = ConcreteTypeEnum + staticId = EnumDispatchId } } } @@ -1530,13 +1530,13 @@ func (s *structSerializer) initFieldsFromDefsWithResolver(typeResolver *TypeReso writeType := typeResolver.Compatible() && isStructField(fieldType) // Pre-compute StaticId, with special handling for enum fields - staticId := GetStaticTypeId(fieldType) + staticId := GetDispatchId(fieldType) if fieldSerializer != nil { if _, ok := fieldSerializer.(*enumSerializer); ok { - staticId = ConcreteTypeEnum + staticId = EnumDispatchId } else if ptrSer, ok := fieldSerializer.(*ptrToValueSerializer); ok { if _, ok := ptrSer.valueSerializer.(*enumSerializer); ok { - staticId = ConcreteTypeEnum + staticId = EnumDispatchId } } } @@ -2046,6 +2046,7 @@ func elementTypesCompatible(actual, expected reflect.Type) bool { // typeIdFromKind derives a TypeId from a reflect.Type's kind // This is used when the type is not registered in typesInfo +// Note: Uses VARINT32/VARINT64/VAR_UINT32/VAR_UINT64 to match Java xlang mode and Rust func typeIdFromKind(type_ reflect.Type) TypeId { switch type_.Kind() { case reflect.Bool: @@ -2055,17 +2056,17 @@ func typeIdFromKind(type_ reflect.Type) TypeId { case reflect.Int16: return INT16 case reflect.Int32: - return INT32 + return VARINT32 case reflect.Int64, reflect.Int: - return INT64 + return VARINT64 case reflect.Uint8: return UINT8 case reflect.Uint16: return UINT16 case reflect.Uint32: - return UINT32 + return VAR_UINT32 case reflect.Uint64, reflect.Uint: - return UINT64 + return VAR_UINT64 case reflect.Float32: return FLOAT32 case reflect.Float64: diff --git a/go/fory/type_resolver.go b/go/fory/type_resolver.go index 951486c979..51c3967f66 100644 --- a/go/fory/type_resolver.go +++ b/go/fory/type_resolver.go @@ -120,7 +120,7 @@ type TypeInfo struct { NameBytes *MetaStringBytes IsDynamic bool TypeID uint32 - StaticId StaticTypeId + StaticId DispatchId Serializer Serializer NeedWriteDef bool NeedWriteRef bool // Whether this type needs reference tracking @@ -344,9 +344,9 @@ func (r *TypeResolver) initialize() { {byteType, UINT8, byteSerializer{}}, {int8Type, INT8, int8Serializer{}}, {int16Type, INT16, int16Serializer{}}, - {int32Type, INT32, int32Serializer{}}, - {int64Type, INT64, int64Serializer{}}, - {intType, INT64, intSerializer{}}, // int maps to int64 for xlang + {int32Type, VARINT32, int32Serializer{}}, + {int64Type, VARINT64, int64Serializer{}}, + {intType, VARINT64, intSerializer{}}, // int maps to int64 for xlang {float32Type, FLOAT32, float32Serializer{}}, {float64Type, FLOAT64, float64Serializer{}}, {dateType, LOCAL_DATE, dateSerializer{}}, @@ -461,7 +461,7 @@ func (r *TypeResolver) RegisterEnumByID(type_ reflect.Type, fullTypeID uint32) e TypeID: fullTypeID, Serializer: serializer, IsDynamic: isDynamicType(type_), - StaticId: GetStaticTypeId(type_), + StaticId: GetDispatchId(type_), hashValue: calcTypeHash(type_), } r.typeIDToTypeInfo[fullTypeID] = typeInfo @@ -1066,8 +1066,8 @@ func (r *TypeResolver) registerType( PkgPathBytes: nsBytes, // Encoded namespace bytes NameBytes: typeBytes, // Encoded type name bytes IsDynamic: isDynamicType(type_), - StaticId: GetStaticTypeId(type_), // Static type ID for fast path - hashValue: calcTypeHash(type_), // Precomputed hash for fast lookups + StaticId: GetDispatchId(type_), // Static type ID for fast path + hashValue: calcTypeHash(type_), // Precomputed hash for fast lookups NeedWriteRef: NeedWriteRef(TypeId(typeID)), } // Update resolver caches: @@ -1759,112 +1759,112 @@ func (r *TypeResolver) ReadTypeInfo(buffer *ByteBuffer, err *Error) *TypeInfo { Type: interfaceSliceType, TypeID: typeID, Serializer: r.typeToSerializers[interfaceSliceType], - StaticId: ConcreteTypeOther, + StaticId: UnknowDispatchId, } case SET, -SET: return &TypeInfo{ Type: genericSetType, TypeID: typeID, Serializer: r.typeToSerializers[genericSetType], - StaticId: ConcreteTypeOther, + StaticId: UnknowDispatchId, } case MAP, -MAP: return &TypeInfo{ Type: interfaceMapType, TypeID: typeID, Serializer: r.typeToSerializers[interfaceMapType], - StaticId: ConcreteTypeOther, + StaticId: UnknowDispatchId, } case BOOL: return &TypeInfo{ Type: reflect.TypeOf(false), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(false)], - StaticId: ConcreteTypeBool, + StaticId: BoolDispatchId, } case INT8: return &TypeInfo{ Type: reflect.TypeOf(int8(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(int8(0))], - StaticId: ConcreteTypeInt8, + StaticId: Int8DispatchId, } case UINT8: return &TypeInfo{ Type: reflect.TypeOf(uint8(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(uint8(0))], - StaticId: ConcreteTypeInt8, // Use Int8 static ID for uint8 + StaticId: Int8DispatchId, // Use Int8 static ID for uint8 } case INT16: return &TypeInfo{ Type: reflect.TypeOf(int16(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(int16(0))], - StaticId: ConcreteTypeInt16, + StaticId: Int16DispatchId, } case UINT16: return &TypeInfo{ Type: reflect.TypeOf(uint16(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(uint16(0))], - StaticId: ConcreteTypeInt16, // Use Int16 static ID for uint16 + StaticId: Int16DispatchId, // Use Int16 static ID for uint16 } case INT32, VARINT32: return &TypeInfo{ Type: reflect.TypeOf(int32(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(int32(0))], - StaticId: ConcreteTypeInt32, + StaticId: Int32DispatchId, } case UINT32: return &TypeInfo{ Type: reflect.TypeOf(uint32(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(uint32(0))], - StaticId: ConcreteTypeInt32, // Use Int32 static ID for uint32 + StaticId: Int32DispatchId, // Use Int32 static ID for uint32 } case INT64, VARINT64, TAGGED_INT64: return &TypeInfo{ Type: reflect.TypeOf(int64(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(int64(0))], - StaticId: ConcreteTypeInt64, + StaticId: Int64DispatchId, } case UINT64: return &TypeInfo{ Type: reflect.TypeOf(uint64(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(uint64(0))], - StaticId: ConcreteTypeInt64, // Use Int64 static ID for uint64 + StaticId: Int64DispatchId, // Use Int64 static ID for uint64 } case FLOAT32: return &TypeInfo{ Type: reflect.TypeOf(float32(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(float32(0))], - StaticId: ConcreteTypeFloat32, + StaticId: Float32DispatchId, } case FLOAT64: return &TypeInfo{ Type: reflect.TypeOf(float64(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(float64(0))], - StaticId: ConcreteTypeFloat64, + StaticId: Float64DispatchId, } case STRING: return &TypeInfo{ Type: reflect.TypeOf(""), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf("")], - StaticId: ConcreteTypeString, + StaticId: StringDispatchId, } case BINARY: return &TypeInfo{ Type: reflect.TypeOf([]byte(nil)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf([]byte(nil))], - StaticId: ConcreteTypeOther, + StaticId: UnknowDispatchId, } } @@ -1930,41 +1930,41 @@ func (r *TypeResolver) readTypeInfoWithTypeID(buffer *ByteBuffer, typeID uint32, Type: interfaceSliceType, TypeID: typeID, Serializer: r.typeToSerializers[interfaceSliceType], - StaticId: ConcreteTypeOther, + StaticId: UnknowDispatchId, } case SET: return &TypeInfo{ Type: genericSetType, TypeID: typeID, Serializer: r.typeToSerializers[genericSetType], - StaticId: ConcreteTypeOther, + StaticId: UnknowDispatchId, } case MAP: return &TypeInfo{ Type: interfaceMapType, TypeID: typeID, Serializer: r.typeToSerializers[interfaceMapType], - StaticId: ConcreteTypeOther, + StaticId: UnknowDispatchId, } // Handle primitive types that may not be explicitly registered case BOOL: - return &TypeInfo{Type: boolType, TypeID: typeID, Serializer: r.typeToSerializers[boolType], StaticId: ConcreteTypeBool} + return &TypeInfo{Type: boolType, TypeID: typeID, Serializer: r.typeToSerializers[boolType], StaticId: BoolDispatchId} case INT8: - return &TypeInfo{Type: int8Type, TypeID: typeID, Serializer: r.typeToSerializers[int8Type], StaticId: ConcreteTypeInt8} + return &TypeInfo{Type: int8Type, TypeID: typeID, Serializer: r.typeToSerializers[int8Type], StaticId: Int8DispatchId} case INT16: - return &TypeInfo{Type: int16Type, TypeID: typeID, Serializer: r.typeToSerializers[int16Type], StaticId: ConcreteTypeInt16} + return &TypeInfo{Type: int16Type, TypeID: typeID, Serializer: r.typeToSerializers[int16Type], StaticId: Int16DispatchId} case INT32, VARINT32: - return &TypeInfo{Type: int32Type, TypeID: typeID, Serializer: r.typeToSerializers[int32Type], StaticId: ConcreteTypeInt32} + return &TypeInfo{Type: int32Type, TypeID: typeID, Serializer: r.typeToSerializers[int32Type], StaticId: Int32DispatchId} case INT64, VARINT64, TAGGED_INT64: - return &TypeInfo{Type: int64Type, TypeID: typeID, Serializer: r.typeToSerializers[int64Type], StaticId: ConcreteTypeInt64} + return &TypeInfo{Type: int64Type, TypeID: typeID, Serializer: r.typeToSerializers[int64Type], StaticId: Int64DispatchId} case FLOAT32: - return &TypeInfo{Type: float32Type, TypeID: typeID, Serializer: r.typeToSerializers[float32Type], StaticId: ConcreteTypeFloat32} + return &TypeInfo{Type: float32Type, TypeID: typeID, Serializer: r.typeToSerializers[float32Type], StaticId: Float32DispatchId} case FLOAT64: - return &TypeInfo{Type: float64Type, TypeID: typeID, Serializer: r.typeToSerializers[float64Type], StaticId: ConcreteTypeFloat64} + return &TypeInfo{Type: float64Type, TypeID: typeID, Serializer: r.typeToSerializers[float64Type], StaticId: Float64DispatchId} case STRING: - return &TypeInfo{Type: stringType, TypeID: typeID, Serializer: r.typeToSerializers[stringType], StaticId: ConcreteTypeString} + return &TypeInfo{Type: stringType, TypeID: typeID, Serializer: r.typeToSerializers[stringType], StaticId: StringDispatchId} case BINARY: - return &TypeInfo{Type: byteSliceType, TypeID: typeID, Serializer: r.typeToSerializers[byteSliceType], StaticId: ConcreteTypeByteSlice} + return &TypeInfo{Type: byteSliceType, TypeID: typeID, Serializer: r.typeToSerializers[byteSliceType], StaticId: ByteSliceDispatchId} } // Handle UNKNOWN type (0) - used for polymorphic types @@ -1972,7 +1972,7 @@ func (r *TypeResolver) readTypeInfoWithTypeID(buffer *ByteBuffer, typeID uint32, return &TypeInfo{ Type: interfaceType, TypeID: typeID, - StaticId: ConcreteTypeOther, + StaticId: UnknowDispatchId, } } diff --git a/go/fory/types.go b/go/fory/types.go index 1d548f511c..4a7290074b 100644 --- a/go/fory/types.go +++ b/go/fory/types.go @@ -154,7 +154,18 @@ func isPrimitiveType(typeID int16) bool { INT8, INT16, INT32, + VARINT32, INT64, + VARINT64, + TAGGED_INT64, + UINT8, + UINT16, + UINT32, + VAR_UINT32, + UINT64, + VAR_UINT64, + TAGGED_UINT64, + FLOAT16, FLOAT32, FLOAT64: return true @@ -209,15 +220,24 @@ func isPrimitiveArrayType(typeID int16) bool { } var primitiveTypeSizes = map[int16]int{ - BOOL: 1, - INT8: 1, - INT16: 2, - INT32: 4, - VARINT32: 4, - INT64: 8, - VARINT64: 8, - FLOAT32: 4, - FLOAT64: 8, + BOOL: 1, + INT8: 1, + UINT8: 1, + INT16: 2, + UINT16: 2, + FLOAT16: 2, + INT32: 4, + VARINT32: 4, + UINT32: 4, + VAR_UINT32: 4, + FLOAT32: 4, + INT64: 8, + VARINT64: 8, + TAGGED_INT64: 8, + UINT64: 8, + VAR_UINT64: 8, + TAGGED_UINT64: 8, + FLOAT64: 8, } func getPrimitiveTypeSize(typeID int16) int { @@ -240,150 +260,143 @@ func isUserDefinedType(typeID int16) bool { } // ============================================================================ -// StaticTypeId for switch-based fast path (avoids interface virtual method cost) +// DispatchId for switch-based fast path (avoids interface virtual method cost) // ============================================================================ -// StaticTypeId identifies concrete Go types for optimized serialization dispatch -type StaticTypeId uint8 +// DispatchId identifies concrete Go types for optimized serialization dispatch +type DispatchId uint8 const ( - ConcreteTypeOther StaticTypeId = iota - ConcreteTypeBool - ConcreteTypeInt8 - ConcreteTypeInt16 - ConcreteTypeInt32 - ConcreteTypeInt64 - ConcreteTypeInt - ConcreteTypeFloat32 - ConcreteTypeFloat64 - ConcreteTypeString - ConcreteTypeByteSlice - ConcreteTypeInt8Slice - ConcreteTypeInt16Slice - ConcreteTypeInt32Slice - ConcreteTypeInt64Slice - ConcreteTypeIntSlice - ConcreteTypeUintSlice - ConcreteTypeFloat32Slice - ConcreteTypeFloat64Slice - ConcreteTypeBoolSlice - ConcreteTypeStringSlice - ConcreteTypeStringStringMap - ConcreteTypeStringInt32Map - ConcreteTypeStringInt64Map - ConcreteTypeStringIntMap - ConcreteTypeStringFloat64Map - ConcreteTypeStringBoolMap - ConcreteTypeInt32Int32Map - ConcreteTypeInt64Int64Map - ConcreteTypeIntIntMap - ConcreteTypeEnum // Enum types (both ENUM and NAMED_ENUM) + UnknowDispatchId DispatchId = iota + BoolDispatchId + Int8DispatchId + Int16DispatchId + Int32DispatchId + Int64DispatchId + IntDispatchId + Uint8DispatchId + Uint16DispatchId + Uint32DispatchId + Uint64DispatchId + UintDispatchId + Float32DispatchId + Float64DispatchId + StringDispatchId + ByteSliceDispatchId + Int8SliceDispatchId + Int16SliceDispatchId + Int32SliceDispatchId + Int64SliceDispatchId + IntSliceDispatchId + UintSliceDispatchId + Float32SliceDispatchId + Float64SliceDispatchId + BoolSliceDispatchId + StringSliceDispatchId + StringStringMapDispatchId + StringInt32MapDispatchId + StringInt64MapDispatchId + StringIntMapDispatchId + StringFloat64MapDispatchId + StringBoolMapDispatchId + Int32Int32MapDispatchId + Int64Int64MapDispatchId + IntIntMapDispatchId + EnumDispatchId // Enum types (both ENUM and NAMED_ENUM) ) -// GetStaticTypeId returns the StaticTypeId for a reflect.Type -func GetStaticTypeId(t reflect.Type) StaticTypeId { +// GetDispatchId returns the DispatchId for a reflect.Type +func GetDispatchId(t reflect.Type) DispatchId { switch t.Kind() { case reflect.Bool: - return ConcreteTypeBool + return BoolDispatchId case reflect.Int8: - return ConcreteTypeInt8 + return Int8DispatchId case reflect.Int16: - return ConcreteTypeInt16 + return Int16DispatchId case reflect.Int32: - return ConcreteTypeInt32 + return Int32DispatchId case reflect.Int64: - return ConcreteTypeInt64 + return Int64DispatchId case reflect.Int: - return ConcreteTypeInt + return IntDispatchId + case reflect.Uint8: + return Uint8DispatchId + case reflect.Uint16: + return Uint16DispatchId + case reflect.Uint32: + return Uint32DispatchId + case reflect.Uint64: + return Uint64DispatchId + case reflect.Uint: + return UintDispatchId case reflect.Float32: - return ConcreteTypeFloat32 + return Float32DispatchId case reflect.Float64: - return ConcreteTypeFloat64 + return Float64DispatchId case reflect.String: - return ConcreteTypeString + return StringDispatchId case reflect.Slice: // Check for specific slice types switch t.Elem().Kind() { case reflect.Uint8: - return ConcreteTypeByteSlice + return ByteSliceDispatchId case reflect.Int8: - return ConcreteTypeInt8Slice + return Int8SliceDispatchId case reflect.Int16: - return ConcreteTypeInt16Slice + return Int16SliceDispatchId case reflect.Int32: - return ConcreteTypeInt32Slice + return Int32SliceDispatchId case reflect.Int64: - return ConcreteTypeInt64Slice + return Int64SliceDispatchId case reflect.Int: - return ConcreteTypeIntSlice + return IntSliceDispatchId case reflect.Uint: - return ConcreteTypeUintSlice + return UintSliceDispatchId case reflect.Float32: - return ConcreteTypeFloat32Slice + return Float32SliceDispatchId case reflect.Float64: - return ConcreteTypeFloat64Slice + return Float64SliceDispatchId case reflect.Bool: - return ConcreteTypeBoolSlice + return BoolSliceDispatchId case reflect.String: - return ConcreteTypeStringSlice + return StringSliceDispatchId } - return ConcreteTypeOther + return UnknowDispatchId case reflect.Map: // Check for specific common map types if t.Key().Kind() == reflect.String { switch t.Elem().Kind() { case reflect.String: - return ConcreteTypeStringStringMap + return StringStringMapDispatchId case reflect.Int64: - return ConcreteTypeStringInt64Map + return StringInt64MapDispatchId case reflect.Int: - return ConcreteTypeStringIntMap + return StringIntMapDispatchId case reflect.Float64: - return ConcreteTypeStringFloat64Map + return StringFloat64MapDispatchId case reflect.Bool: - return ConcreteTypeStringBoolMap + return StringBoolMapDispatchId } } else if t.Key().Kind() == reflect.Int32 && t.Elem().Kind() == reflect.Int32 { - return ConcreteTypeInt32Int32Map + return Int32Int32MapDispatchId } else if t.Key().Kind() == reflect.Int64 && t.Elem().Kind() == reflect.Int64 { - return ConcreteTypeInt64Int64Map + return Int64Int64MapDispatchId } else if t.Key().Kind() == reflect.Int && t.Elem().Kind() == reflect.Int { - return ConcreteTypeIntIntMap + return IntIntMapDispatchId } - return ConcreteTypeOther + return UnknowDispatchId default: - return ConcreteTypeOther - } -} - -// GetConcreteTypeIdAndTypeId returns both StaticTypeId and TypeId for a reflect.Type -func GetConcreteTypeIdAndTypeId(t reflect.Type) (StaticTypeId, TypeId) { - switch t.Kind() { - case reflect.Bool: - return ConcreteTypeBool, BOOL - case reflect.Int8: - return ConcreteTypeInt8, INT8 - case reflect.Int16: - return ConcreteTypeInt16, INT16 - case reflect.Int32: - return ConcreteTypeInt32, INT32 - case reflect.Int64: - return ConcreteTypeInt64, INT64 - case reflect.Float32: - return ConcreteTypeFloat32, FLOAT32 - case reflect.Float64: - return ConcreteTypeFloat64, FLOAT64 - case reflect.String: - return ConcreteTypeString, STRING - default: - return ConcreteTypeOther, 0 + return UnknowDispatchId } } // IsPrimitiveTypeId checks if a type ID is a primitive type func IsPrimitiveTypeId(typeId TypeId) bool { switch typeId { - case BOOL, INT8, INT16, INT32, INT64, FLOAT32, FLOAT64, STRING: + case BOOL, INT8, INT16, INT32, VARINT32, INT64, VARINT64, TAGGED_INT64, + UINT8, UINT16, UINT32, VAR_UINT32, UINT64, VAR_UINT64, TAGGED_UINT64, + FLOAT16, FLOAT32, FLOAT64, STRING: return true default: return false @@ -391,13 +404,13 @@ func IsPrimitiveTypeId(typeId TypeId) bool { } // isFixedSizePrimitive returns true for non-nullable fixed-size primitives -func isFixedSizePrimitive(staticId StaticTypeId, referencable bool) bool { +func isFixedSizePrimitive(staticId DispatchId, referencable bool) bool { if referencable { return false } switch staticId { - case ConcreteTypeBool, ConcreteTypeInt8, ConcreteTypeInt16, - ConcreteTypeFloat32, ConcreteTypeFloat64: + case BoolDispatchId, Int8DispatchId, Uint8DispatchId, Int16DispatchId, Uint16DispatchId, + Float32DispatchId, Float64DispatchId: return true default: return false @@ -405,12 +418,13 @@ func isFixedSizePrimitive(staticId StaticTypeId, referencable bool) bool { } // isVarintPrimitive returns true for non-nullable varint primitives -func isVarintPrimitive(staticId StaticTypeId, referencable bool) bool { +func isVarintPrimitive(staticId DispatchId, referencable bool) bool { if referencable { return false } switch staticId { - case ConcreteTypeInt32, ConcreteTypeInt64, ConcreteTypeInt: + case Int32DispatchId, Int64DispatchId, IntDispatchId, + Uint32DispatchId, Uint64DispatchId, UintDispatchId: return true default: return false @@ -418,10 +432,12 @@ func isVarintPrimitive(staticId StaticTypeId, referencable bool) bool { } // isPrimitiveStaticId returns true if the staticId represents a primitive type -func isPrimitiveStaticId(staticId StaticTypeId) bool { +func isPrimitiveStaticId(staticId DispatchId) bool { switch staticId { - case ConcreteTypeBool, ConcreteTypeInt8, ConcreteTypeInt16, ConcreteTypeInt32, - ConcreteTypeInt64, ConcreteTypeInt, ConcreteTypeFloat32, ConcreteTypeFloat64: + case BoolDispatchId, Int8DispatchId, Int16DispatchId, Int32DispatchId, + Int64DispatchId, IntDispatchId, Uint8DispatchId, Uint16DispatchId, + Uint32DispatchId, Uint64DispatchId, UintDispatchId, + Float32DispatchId, Float64DispatchId: return true default: return false @@ -439,28 +455,28 @@ func isNumericKind(kind reflect.Kind) bool { } } -// getFixedSizeByStaticId returns byte size for fixed primitives (0 if not fixed) -func getFixedSizeByStaticId(staticId StaticTypeId) int { +// getFixedSizeByDispatchId returns byte size for fixed primitives (0 if not fixed) +func getFixedSizeByDispatchId(staticId DispatchId) int { switch staticId { - case ConcreteTypeBool, ConcreteTypeInt8: + case BoolDispatchId, Int8DispatchId, Uint8DispatchId: return 1 - case ConcreteTypeInt16: + case Int16DispatchId, Uint16DispatchId: return 2 - case ConcreteTypeFloat32: + case Float32DispatchId: return 4 - case ConcreteTypeFloat64: + case Float64DispatchId: return 8 default: return 0 } } -// getVarintMaxSizeByStaticId returns max byte size for varint primitives (0 if not varint) -func getVarintMaxSizeByStaticId(staticId StaticTypeId) int { +// getVarintMaxSizeByDispatchId returns max byte size for varint primitives (0 if not varint) +func getVarintMaxSizeByDispatchId(staticId DispatchId) int { switch staticId { - case ConcreteTypeInt32: + case Int32DispatchId, Uint32DispatchId: return 5 - case ConcreteTypeInt64, ConcreteTypeInt: + case Int64DispatchId, IntDispatchId, Uint64DispatchId, UintDispatchId: return 10 default: return 0 diff --git a/go/fory/writer.go b/go/fory/writer.go index 510f4351ad..f11c74cb16 100644 --- a/go/fory/writer.go +++ b/go/fory/writer.go @@ -180,30 +180,30 @@ func (c *WriteContext) WriteTypeId(id TypeId) { c.buffer.WriteVaruint32Small7(uint32(id)) } -// writeFast writes a value using fast path based on StaticTypeId -func (c *WriteContext) writeFast(ptr unsafe.Pointer, ct StaticTypeId) { +// writeFast writes a value using fast path based on DispatchId +func (c *WriteContext) writeFast(ptr unsafe.Pointer, ct DispatchId) { switch ct { - case ConcreteTypeBool: + case BoolDispatchId: c.buffer.WriteBool(*(*bool)(ptr)) - case ConcreteTypeInt8: + case Int8DispatchId: c.buffer.WriteByte_(*(*byte)(ptr)) - case ConcreteTypeInt16: + case Int16DispatchId: c.buffer.WriteInt16(*(*int16)(ptr)) - case ConcreteTypeInt32: + case Int32DispatchId: c.buffer.WriteVarint32(*(*int32)(ptr)) - case ConcreteTypeInt: + case IntDispatchId: if strconv.IntSize == 64 { c.buffer.WriteVarint64(int64(*(*int)(ptr))) } else { c.buffer.WriteVarint32(int32(*(*int)(ptr))) } - case ConcreteTypeInt64: + case Int64DispatchId: c.buffer.WriteVarint64(*(*int64)(ptr)) - case ConcreteTypeFloat32: + case Float32DispatchId: c.buffer.WriteFloat32(*(*float32)(ptr)) - case ConcreteTypeFloat64: + case Float64DispatchId: c.buffer.WriteFloat64(*(*float64)(ptr)) - case ConcreteTypeString: + case StringDispatchId: writeString(c.buffer, *(*string)(ptr)) } } From b870947cf209896990f56b3cde453340e64842f7 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 7 Jan 2026 16:19:23 +0800 Subject: [PATCH 10/44] fix go codegen --- go/fory/codegen/decoder.go | 8 ++++---- go/fory/codegen/encoder.go | 16 ++++++++-------- go/fory/codegen/utils.go | 34 +++++++++++++++++++++++----------- 3 files changed, 35 insertions(+), 23 deletions(-) diff --git a/go/fory/codegen/decoder.go b/go/fory/codegen/decoder.go index 239cabb0e9..a6adbfd9b2 100644 --- a/go/fory/codegen/decoder.go +++ b/go/fory/codegen/decoder.go @@ -541,17 +541,17 @@ func generateElementTypeIDReadInline(buf *bytes.Buffer, elemType types.Type) err case types.Int16: expectedTypeID = int(fory.INT16) case types.Int32: - expectedTypeID = int(fory.INT32) + expectedTypeID = int(fory.VARINT32) case types.Int, types.Int64: - expectedTypeID = int(fory.INT64) + expectedTypeID = int(fory.VARINT64) case types.Uint8: expectedTypeID = int(fory.UINT8) case types.Uint16: expectedTypeID = int(fory.UINT16) case types.Uint32: - expectedTypeID = int(fory.UINT32) + expectedTypeID = int(fory.VAR_UINT32) case types.Uint, types.Uint64: - expectedTypeID = int(fory.UINT64) + expectedTypeID = int(fory.VAR_UINT64) case types.Float32: expectedTypeID = int(fory.FLOAT32) case types.Float64: diff --git a/go/fory/codegen/encoder.go b/go/fory/codegen/encoder.go index dcc90ba10b..49c3db3f22 100644 --- a/go/fory/codegen/encoder.go +++ b/go/fory/codegen/encoder.go @@ -233,17 +233,17 @@ func generateElementTypeIDWrite(buf *bytes.Buffer, elemType types.Type) error { case types.Int16: fmt.Fprintf(buf, "\t\tbuf.WriteVaruint32(%d) // INT16\n", fory.INT16) case types.Int32: - fmt.Fprintf(buf, "\t\tbuf.WriteVaruint32(%d) // INT32\n", fory.INT32) + fmt.Fprintf(buf, "\t\tbuf.WriteVaruint32(%d) // VARINT32\n", fory.VARINT32) case types.Int, types.Int64: - fmt.Fprintf(buf, "\t\tbuf.WriteVaruint32(%d) // INT64\n", fory.INT64) + fmt.Fprintf(buf, "\t\tbuf.WriteVaruint32(%d) // VARINT64\n", fory.VARINT64) case types.Uint8: fmt.Fprintf(buf, "\t\tbuf.WriteVaruint32(%d) // UINT8\n", fory.UINT8) case types.Uint16: fmt.Fprintf(buf, "\t\tbuf.WriteVaruint32(%d) // UINT16\n", fory.UINT16) case types.Uint32: - fmt.Fprintf(buf, "\t\tbuf.WriteVaruint32(%d) // UINT32\n", fory.UINT32) + fmt.Fprintf(buf, "\t\tbuf.WriteVaruint32(%d) // VAR_UINT32\n", fory.VAR_UINT32) case types.Uint, types.Uint64: - fmt.Fprintf(buf, "\t\tbuf.WriteVaruint32(%d) // UINT64\n", fory.UINT64) + fmt.Fprintf(buf, "\t\tbuf.WriteVaruint32(%d) // VAR_UINT64\n", fory.VAR_UINT64) case types.Float32: fmt.Fprintf(buf, "\t\tbuf.WriteVaruint32(%d) // FLOAT32\n", fory.FLOAT32) case types.Float64: @@ -723,17 +723,17 @@ func generateElementTypeIDWriteInline(buf *bytes.Buffer, elemType types.Type) er case types.Int16: fmt.Fprintf(buf, "\t\t\t\tbuf.WriteVaruint32(%d) // INT16\n", fory.INT16) case types.Int32: - fmt.Fprintf(buf, "\t\t\t\tbuf.WriteVaruint32(%d) // INT32\n", fory.INT32) + fmt.Fprintf(buf, "\t\t\t\tbuf.WriteVaruint32(%d) // VARINT32\n", fory.VARINT32) case types.Int, types.Int64: - fmt.Fprintf(buf, "\t\t\t\tbuf.WriteVaruint32(%d) // INT64\n", fory.INT64) + fmt.Fprintf(buf, "\t\t\t\tbuf.WriteVaruint32(%d) // VARINT64\n", fory.VARINT64) case types.Uint8: fmt.Fprintf(buf, "\t\t\t\tbuf.WriteVaruint32(%d) // UINT8\n", fory.UINT8) case types.Uint16: fmt.Fprintf(buf, "\t\t\t\tbuf.WriteVaruint32(%d) // UINT16\n", fory.UINT16) case types.Uint32: - fmt.Fprintf(buf, "\t\t\t\tbuf.WriteVaruint32(%d) // UINT32\n", fory.UINT32) + fmt.Fprintf(buf, "\t\t\t\tbuf.WriteVaruint32(%d) // VAR_UINT32\n", fory.VAR_UINT32) case types.Uint, types.Uint64: - fmt.Fprintf(buf, "\t\t\t\tbuf.WriteVaruint32(%d) // UINT64\n", fory.UINT64) + fmt.Fprintf(buf, "\t\t\t\tbuf.WriteVaruint32(%d) // VAR_UINT64\n", fory.VAR_UINT64) case types.Float32: fmt.Fprintf(buf, "\t\t\t\tbuf.WriteVaruint32(%d) // FLOAT32\n", fory.FLOAT32) case types.Float64: diff --git a/go/fory/codegen/utils.go b/go/fory/codegen/utils.go index 776f8b5dee..a66a8e5fda 100644 --- a/go/fory/codegen/utils.go +++ b/go/fory/codegen/utils.go @@ -180,17 +180,17 @@ func getTypeID(t types.Type) string { case types.Int16: return "INT16" case types.Int32: - return "INT32" + return "VARINT32" case types.Int, types.Int64: - return "INT64" + return "VARINT64" case types.Uint8: return "UINT8" case types.Uint16: return "UINT16" case types.Uint32: - return "UINT32" + return "VAR_UINT32" case types.Uint, types.Uint64: - return "UINT64" + return "VAR_UINT64" case types.Float32: return "FLOAT32" case types.Float64: @@ -240,16 +240,24 @@ func getTypeIDValue(typeID string) int { return int(fory.INT16) // 3 case "INT32": return int(fory.INT32) // 4 + case "VARINT32": + return int(fory.VARINT32) // 5 case "INT64": return int(fory.INT64) // 6 + case "VARINT64": + return int(fory.VARINT64) // 7 case "UINT8": - return int(fory.UINT8) // 100 + return int(fory.UINT8) // 9 case "UINT16": - return int(fory.UINT16) // 101 + return int(fory.UINT16) // 10 case "UINT32": - return int(fory.UINT32) // 102 + return int(fory.UINT32) // 11 + case "VAR_UINT32": + return int(fory.VAR_UINT32) // 12 case "UINT64": - return int(fory.UINT64) // 103 + return int(fory.UINT64) // 13 + case "VAR_UINT64": + return int(fory.VAR_UINT64) // 14 case "FLOAT32": return int(fory.FLOAT32) case "FLOAT64": @@ -293,11 +301,15 @@ func sortFields(fields []*FieldInfo) { // When same size, sort by type id // When same size and type id, sort by snake case field name - // Handle compression types (INT32/INT64/VARINT32/VARINT64) + // Handle compression types (INT32/INT64/VARINT32/VARINT64 and unsigned variants) compressI := f1.TypeID == "INT32" || f1.TypeID == "INT64" || - f1.TypeID == "VARINT32" || f1.TypeID == "VARINT64" + f1.TypeID == "VARINT32" || f1.TypeID == "VARINT64" || + f1.TypeID == "UINT32" || f1.TypeID == "UINT64" || + f1.TypeID == "VAR_UINT32" || f1.TypeID == "VAR_UINT64" compressJ := f2.TypeID == "INT32" || f2.TypeID == "INT64" || - f2.TypeID == "VARINT32" || f2.TypeID == "VARINT64" + f2.TypeID == "VARINT32" || f2.TypeID == "VARINT64" || + f2.TypeID == "UINT32" || f2.TypeID == "UINT64" || + f2.TypeID == "VAR_UINT32" || f2.TypeID == "VAR_UINT64" if compressI != compressJ { return !compressI && compressJ // non-compress comes first From cfc927bb411468e44bdf2f449c8e456783e6fa38 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 7 Jan 2026 16:30:13 +0800 Subject: [PATCH 11/44] update c++ unsigned and compressed int support --- cpp/fory/serialization/basic_serializer.h | 8 ++--- cpp/fory/serialization/serializer_traits.h | 4 +-- cpp/fory/serialization/struct_serializer.h | 34 ++++++++++++++------ cpp/fory/serialization/unsigned_serializer.h | 8 ++--- 4 files changed, 35 insertions(+), 19 deletions(-) diff --git a/cpp/fory/serialization/basic_serializer.h b/cpp/fory/serialization/basic_serializer.h index de8f99b1bf..6aa3c45c7b 100644 --- a/cpp/fory/serialization/basic_serializer.h +++ b/cpp/fory/serialization/basic_serializer.h @@ -252,9 +252,9 @@ template <> struct Serializer { } }; -/// int32_t serializer +/// int32_t serializer - uses VARINT32 to match Java xlang mode and Rust template <> struct Serializer { - static constexpr TypeId type_id = TypeId::INT32; + static constexpr TypeId type_id = TypeId::VARINT32; static inline void write_type_info(WriteContext &ctx) { ctx.write_varuint32(static_cast(type_id)); @@ -323,9 +323,9 @@ template <> struct Serializer { } }; -/// int64_t serializer +/// int64_t serializer - uses VARINT64 to match Java xlang mode and Rust template <> struct Serializer { - static constexpr TypeId type_id = TypeId::INT64; + static constexpr TypeId type_id = TypeId::VARINT64; static inline void write_type_info(WriteContext &ctx) { ctx.write_varuint32(static_cast(type_id)); diff --git a/cpp/fory/serialization/serializer_traits.h b/cpp/fory/serialization/serializer_traits.h index 7f3ad3d714..1b378ad8c4 100644 --- a/cpp/fory/serialization/serializer_traits.h +++ b/cpp/fory/serialization/serializer_traits.h @@ -444,11 +444,11 @@ template <> struct TypeIndex { }; template <> struct TypeIndex { - static constexpr uint64_t value = static_cast(TypeId::INT32); + static constexpr uint64_t value = static_cast(TypeId::VARINT32); }; template <> struct TypeIndex { - static constexpr uint64_t value = static_cast(TypeId::INT64); + static constexpr uint64_t value = static_cast(TypeId::VARINT64); }; // Note: Unsigned types (uint8_t, uint16_t, uint32_t, uint64_t) use the fallback diff --git a/cpp/fory/serialization/struct_serializer.h b/cpp/fory/serialization/struct_serializer.h index d530046f57..b84610d302 100644 --- a/cpp/fory/serialization/struct_serializer.h +++ b/cpp/fory/serialization/struct_serializer.h @@ -122,9 +122,11 @@ inline constexpr bool is_primitive_type_id(TypeId type_id) { type_id == TypeId::VARINT64 || type_id == TypeId::TAGGED_INT64 || type_id == TypeId::FLOAT16 || type_id == TypeId::FLOAT32 || type_id == TypeId::FLOAT64 || - // Unsigned types for native mode (xlang=false) + // Unsigned types type_id == TypeId::UINT8 || type_id == TypeId::UINT16 || - type_id == TypeId::UINT32 || type_id == TypeId::UINT64; + type_id == TypeId::UINT32 || type_id == TypeId::VAR_UINT32 || + type_id == TypeId::UINT64 || type_id == TypeId::VAR_UINT64 || + type_id == TypeId::TAGGED_UINT64; } /// Write a primitive value to buffer at given offset WITHOUT updating @@ -653,16 +655,24 @@ template struct CompileTimeFieldHelpers { switch (static_cast(tid)) { case TypeId::BOOL: case TypeId::INT8: + case TypeId::UINT8: return 1; case TypeId::INT16: + case TypeId::UINT16: case TypeId::FLOAT16: return 2; case TypeId::INT32: case TypeId::VARINT32: + case TypeId::UINT32: + case TypeId::VAR_UINT32: case TypeId::FLOAT32: return 4; case TypeId::INT64: case TypeId::VARINT64: + case TypeId::TAGGED_INT64: + case TypeId::UINT64: + case TypeId::VAR_UINT64: + case TypeId::TAGGED_UINT64: case TypeId::FLOAT64: return 8; default: @@ -674,7 +684,13 @@ template struct CompileTimeFieldHelpers { return tid == static_cast(TypeId::INT32) || tid == static_cast(TypeId::INT64) || tid == static_cast(TypeId::VARINT32) || - tid == static_cast(TypeId::VARINT64); + tid == static_cast(TypeId::VARINT64) || + tid == static_cast(TypeId::TAGGED_INT64) || + tid == static_cast(TypeId::UINT32) || + tid == static_cast(TypeId::UINT64) || + tid == static_cast(TypeId::VAR_UINT32) || + tid == static_cast(TypeId::VAR_UINT64) || + tid == static_cast(TypeId::TAGGED_UINT64); } /// Check if a type ID is an internal (built-in, final) type for group 2. @@ -902,11 +918,11 @@ template struct CompileTimeFieldHelpers { /// VARINT32/VARINT64/TAGGED_INT64 also use varint encoding static constexpr bool is_varint_primitive(uint32_t tid) { switch (static_cast(tid)) { - case TypeId::INT32: // int32_t uses zigzag varint per basic_serializer.h - case TypeId::INT64: // int64_t uses zigzag varint per basic_serializer.h + case TypeId::INT32: // int32_t uses zigzag varint per basic_serializer.h + case TypeId::INT64: // int64_t uses zigzag varint per basic_serializer.h case TypeId::VARINT32: // explicit varint type case TypeId::VARINT64: // explicit varint type - case TypeId::TAGGED_INT64: // hybrid int64 encoding + case TypeId::TAGGED_INT64: // hybrid int64 encoding return true; default: return false; @@ -916,10 +932,10 @@ template struct CompileTimeFieldHelpers { /// Get the max varint size in bytes for a type_id (0 if not varint) static constexpr size_t max_varint_bytes(uint32_t tid) { switch (static_cast(tid)) { - case TypeId::INT32: // int32_t uses zigzag varint + case TypeId::INT32: // int32_t uses zigzag varint case TypeId::VARINT32: // explicit varint - return 5; // int32 varint max - case TypeId::INT64: // int64_t uses zigzag varint + return 5; // int32 varint max + case TypeId::INT64: // int64_t uses zigzag varint case TypeId::VARINT64: // explicit varint case TypeId::TAGGED_INT64: return 10; // int64 varint max diff --git a/cpp/fory/serialization/unsigned_serializer.h b/cpp/fory/serialization/unsigned_serializer.h index fd1ca11b0b..1079afc689 100644 --- a/cpp/fory/serialization/unsigned_serializer.h +++ b/cpp/fory/serialization/unsigned_serializer.h @@ -165,9 +165,9 @@ template <> struct Serializer { } }; -/// uint32_t serializer (native mode only) +/// uint32_t serializer - uses VAR_UINT32 to match Rust xlang mode template <> struct Serializer { - static constexpr TypeId type_id = TypeId::UINT32; + static constexpr TypeId type_id = TypeId::VAR_UINT32; static inline void write_type_info(WriteContext &ctx) { ctx.write_varuint32(static_cast(type_id)); @@ -230,9 +230,9 @@ template <> struct Serializer { } }; -/// uint64_t serializer (native mode only) +/// uint64_t serializer - uses VAR_UINT64 to match Rust xlang mode template <> struct Serializer { - static constexpr TypeId type_id = TypeId::UINT64; + static constexpr TypeId type_id = TypeId::VAR_UINT64; static inline void write_type_info(WriteContext &ctx) { ctx.write_varuint32(static_cast(type_id)); From e663c1bb254c2f0f491d3009c0d6cd69fafc18b4 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 7 Jan 2026 17:13:31 +0800 Subject: [PATCH 12/44] support unsigned and configurable compress types for field --- AGENTS.md | 2 +- python/pyfory/__init__.py | 40 +++++++++++ python/pyfory/_serializer.py | 124 +++++++++++++++++++++++++++++++++++ python/pyfory/buffer.pxd | 20 ++++++ python/pyfory/buffer.pyx | 117 +++++++++++++++++++++++++++++++++ python/pyfory/primitive.pxi | 120 +++++++++++++++++++++++++++++++++ python/pyfory/registry.py | 41 +++++++++++- python/pyfory/serializer.py | 36 ++++++++++ python/pyfory/struct.py | 30 +++++++++ python/pyfory/types.py | 34 ++++++++++ 10 files changed, 560 insertions(+), 4 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 928c8da619..334c581227 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -126,7 +126,7 @@ cd java mvn -T16 install -DskipTests cd fory-core # disable fory cython for faster debugging -FORY_PYTHON_JAVA_CI=1 ENABLE_FORY_CYTHON_SERIALIZATION=0 mvn -T16 test -Dtest=org.apache.fory.xlang.PythonXlangTest +FORY_PYTHON_JAVA_CI=1 ENABLE_FORY_CYTHON_SERIALIZATION=0 ENABLE_FORY_DEBUG_OUTPUT=1 mvn -T16 test -Dtest=org.apache.fory.xlang.PythonXlangTest # enable fory cython FORY_PYTHON_JAVA_CI=1 ENABLE_FORY_CYTHON_SERIALIZATION=1 ENABLE_FORY_DEBUG_OUTPUT=1 mvn -T16 test -Dtest=org.apache.fory.xlang.PythonXlangTest ``` diff --git a/python/pyfory/__init__.py b/python/pyfory/__init__.py index a3d63b245a..79523aed9c 100644 --- a/python/pyfory/__init__.py +++ b/python/pyfory/__init__.py @@ -43,6 +43,16 @@ Int16Serializer, Int32Serializer, Int64Serializer, + Varint32Serializer, + Varint64Serializer, + TaggedInt64Serializer, + Uint8Serializer, + Uint16Serializer, + Uint32Serializer, + VarUint32Serializer, + Uint64Serializer, + VarUint64Serializer, + TaggedUint64Serializer, Float32Serializer, Float64Serializer, StringSerializer, @@ -70,6 +80,16 @@ int16, int32, int64, + fixed_int32, + fixed_int64, + tagged_int64, + uint8, + uint16, + uint32, + fixed_uint32, + uint64, + fixed_uint64, + tagged_uint64, float32, float64, # Int8ArrayType, @@ -110,6 +130,16 @@ "int16", "int32", "int64", + "fixed_int32", + "fixed_int64", + "tagged_int64", + "uint8", + "uint16", + "uint32", + "fixed_uint32", + "uint64", + "fixed_uint64", + "tagged_uint64", "float32", "float64", "int16_array", @@ -126,6 +156,16 @@ "Int16Serializer", "Int32Serializer", "Int64Serializer", + "Varint32Serializer", + "Varint64Serializer", + "TaggedInt64Serializer", + "Uint8Serializer", + "Uint16Serializer", + "Uint32Serializer", + "VarUint32Serializer", + "Uint64Serializer", + "VarUint64Serializer", + "TaggedUint64Serializer", "Float32Serializer", "Float64Serializer", "StringSerializer", diff --git a/python/pyfory/_serializer.py b/python/pyfory/_serializer.py index ba0b3d7a89..e9cac7b527 100644 --- a/python/pyfory/_serializer.py +++ b/python/pyfory/_serializer.py @@ -94,6 +94,8 @@ def read(self, buffer): class Int32Serializer(XlangCompatibleSerializer): + """Serializer for INT32/VARINT32 type - uses variable-length encoding for xlang compatibility.""" + def write(self, buffer, value): buffer.write_varint32(value) @@ -101,7 +103,19 @@ def read(self, buffer): return buffer.read_varint32() +class FixedInt32Serializer(XlangCompatibleSerializer): + """Serializer for fixed-width 32-bit signed integer (INT32 type_id=4).""" + + def write(self, buffer, value): + buffer.write_int32(value) + + def read(self, buffer): + return buffer.read_int32() + + class Int64Serializer(Serializer): + """Serializer for INT64/VARINT64 type - uses variable-length encoding for xlang compatibility.""" + def xwrite(self, buffer, value): buffer.write_varint64(value) @@ -115,6 +129,116 @@ def read(self, buffer): return buffer.read_varint64() +class FixedInt64Serializer(XlangCompatibleSerializer): + """Serializer for fixed-width 64-bit signed integer (INT64 type_id=6).""" + + def write(self, buffer, value): + buffer.write_int64(value) + + def read(self, buffer): + return buffer.read_int64() + + +class Varint32Serializer(XlangCompatibleSerializer): + """Serializer for VARINT32 type - variable-length encoded signed 32-bit integer.""" + + def write(self, buffer, value): + buffer.write_varint32(value) + + def read(self, buffer): + return buffer.read_varint32() + + +class Varint64Serializer(XlangCompatibleSerializer): + """Serializer for VARINT64 type - variable-length encoded signed 64-bit integer.""" + + def write(self, buffer, value): + buffer.write_varint64(value) + + def read(self, buffer): + return buffer.read_varint64() + + +class TaggedInt64Serializer(XlangCompatibleSerializer): + """Serializer for TAGGED_INT64 type - tagged encoding for signed 64-bit integer.""" + + def write(self, buffer, value): + buffer.write_tagged_int64(value) + + def read(self, buffer): + return buffer.read_tagged_int64() + + +class Uint8Serializer(XlangCompatibleSerializer): + """Serializer for UINT8 type - unsigned 8-bit integer.""" + + def write(self, buffer, value): + buffer.write_uint8(value) + + def read(self, buffer): + return buffer.read_uint8() + + +class Uint16Serializer(XlangCompatibleSerializer): + """Serializer for UINT16 type - unsigned 16-bit integer.""" + + def write(self, buffer, value): + buffer.write_uint16(value) + + def read(self, buffer): + return buffer.read_uint16() + + +class Uint32Serializer(XlangCompatibleSerializer): + """Serializer for UINT32 type - fixed-size unsigned 32-bit integer.""" + + def write(self, buffer, value): + buffer.write_uint32(value) + + def read(self, buffer): + return buffer.read_uint32() + + +class VarUint32Serializer(XlangCompatibleSerializer): + """Serializer for VAR_UINT32 type - variable-length encoded unsigned 32-bit integer.""" + + def write(self, buffer, value): + buffer.write_varuint32(value) + + def read(self, buffer): + return buffer.read_varuint32() + + +class Uint64Serializer(XlangCompatibleSerializer): + """Serializer for UINT64 type - fixed-size unsigned 64-bit integer.""" + + def write(self, buffer, value): + buffer.write_uint64(value) + + def read(self, buffer): + return buffer.read_uint64() + + +class VarUint64Serializer(XlangCompatibleSerializer): + """Serializer for VAR_UINT64 type - variable-length encoded unsigned 64-bit integer.""" + + def write(self, buffer, value): + buffer.write_varuint64(value) + + def read(self, buffer): + return buffer.read_varuint64() + + +class TaggedUint64Serializer(XlangCompatibleSerializer): + """Serializer for TAGGED_UINT64 type - tagged encoding for unsigned 64-bit integer.""" + + def write(self, buffer, value): + buffer.write_tagged_uint64(value) + + def read(self, buffer): + return buffer.read_tagged_uint64() + + class Float32Serializer(XlangCompatibleSerializer): def write(self, buffer, value): buffer.write_float(value) diff --git a/python/pyfory/buffer.pxd b/python/pyfory/buffer.pxd index 6938e755ca..1b23ef9895 100644 --- a/python/pyfory/buffer.pxd +++ b/python/pyfory/buffer.pxd @@ -107,6 +107,12 @@ cdef class Buffer: cpdef inline write_int64(self, int64_t value) + cpdef inline write_uint16(self, uint16_t value) + + cpdef inline write_uint32(self, uint32_t value) + + cpdef inline write_uint64(self, uint64_t value) + cpdef inline write_float(self, float value) cpdef inline write_float32(self, float value) @@ -131,6 +137,12 @@ cdef class Buffer: cpdef inline int64_t read_int64(self) + cpdef inline uint16_t read_uint16(self) + + cpdef inline uint32_t read_uint32(self) + + cpdef inline uint64_t read_uint64(self) + cpdef inline float read_float(self) cpdef inline float read_float32(self) @@ -155,6 +167,14 @@ cdef class Buffer: cpdef inline int32_t read_varuint32(self) + cpdef inline write_tagged_int64(self, int64_t value) + + cpdef inline int64_t read_tagged_int64(self) + + cpdef inline write_tagged_uint64(self, uint64_t value) + + cpdef inline uint64_t read_tagged_uint64(self) + cpdef put_buffer(self, uint32_t offset, v, int32_t src_index, int32_t length) cdef inline write_c_buffer(self, const uint8_t* value, int32_t length) diff --git a/python/pyfory/buffer.pyx b/python/pyfory/buffer.pyx index a8ad213786..fefb6e810d 100644 --- a/python/pyfory/buffer.pyx +++ b/python/pyfory/buffer.pyx @@ -205,6 +205,21 @@ cdef class Buffer: self.c_buffer_ptr.UnsafePut(self.writer_index, value) self.writer_index += 8 + cpdef inline write_uint16(self, uint16_t value): + self.grow(2) + self.c_buffer_ptr.UnsafePut(self.writer_index, value) + self.writer_index += 2 + + cpdef inline write_uint32(self, uint32_t value): + self.grow(4) + self.c_buffer_ptr.UnsafePut(self.writer_index, value) + self.writer_index += 4 + + cpdef inline write_uint64(self, uint64_t value): + self.grow(8) + self.c_buffer_ptr.UnsafePut(self.writer_index, value) + self.writer_index += 8 + cpdef inline write_float(self, float value): self.grow(4) self.c_buffer_ptr.UnsafePut(self.writer_index, value) @@ -360,6 +375,24 @@ cdef class Buffer: self.reader_index += 8 return value + cpdef inline uint16_t read_uint16(self): + cdef int32_t offset = self.reader_index + self.check_bound(offset, 2) + self.reader_index = offset + 2 + return self.c_buffer_ptr.GetInt16(offset) + + cpdef inline uint32_t read_uint32(self): + cdef int32_t offset = self.reader_index + self.check_bound(offset, 4) + self.reader_index = offset + 4 + return self.c_buffer_ptr.GetInt32(offset) + + cpdef inline uint64_t read_uint64(self): + cdef int32_t offset = self.reader_index + self.check_bound(offset, 8) + self.reader_index = offset + 8 + return self.c_buffer_ptr.GetInt64(offset) + cpdef inline float read_float(self): value = self.get_float(self.reader_index) self.reader_index += 4 @@ -568,6 +601,90 @@ cdef class Buffer: result |= b << 56 return result + cpdef inline write_tagged_int64(self, int64_t value): + """Write signed int64 using fory Tagged(Small long as int) encoding. + + If value is in [-1073741824, 1073741823] (fits in 31 bits with sign), + encode as 4 bytes: ((value as i32) << 1). + Otherwise write as 9 bytes: 0b1 | little-endian 8 bytes i64. + """ + cdef int64_t HALF_MIN_INT_VALUE = -1073741824 # i32::MIN / 2 + cdef int64_t HALF_MAX_INT_VALUE = 1073741823 # i32::MAX / 2 + if HALF_MIN_INT_VALUE <= value <= HALF_MAX_INT_VALUE: + # Fits in 31 bits (with sign), encode as 4 bytes with bit 0 = 0 + self.write_int32((value) << 1) + else: + # Write flag byte (0b1) followed by 8-byte i64 + self.grow(9) + ((self._c_address + self.writer_index))[0] = 0b1 + self.writer_index += 1 + self.c_buffer_ptr.UnsafePut(self.writer_index, value) + self.writer_index += 8 + + cpdef inline int64_t read_tagged_int64(self): + """Read signed fory Tagged(Small long as int) encoded int64. + + If bit 0 of the first 4 bytes is 0, return the value >> 1 (arithmetic shift). + Otherwise, skip the flag byte and read 8 bytes as int64. + """ + cdef int32_t offset = self.reader_index + cdef int32_t i + cdef int64_t value + self.check_bound(offset, 4) + i = self.c_buffer_ptr.GetInt32(offset) + if (i & 0b1) != 0b1: + # Bit 0 is 0, small value encoded in 4 bytes + self.reader_index = offset + 4 + return (i >> 1) # arithmetic right shift preserves sign + else: + # Bit 0 is 1, big value: skip flag byte and read 8 bytes + self.check_bound(offset, 9) + self.reader_index = offset + 1 + value = self.c_buffer_ptr.GetInt64(self.reader_index) + self.reader_index += 8 + return value + + cpdef inline write_tagged_uint64(self, uint64_t value): + """Write unsigned uint64 using fory Tagged(Small long as int) encoding. + + If value is in [0, 0x7fffffff], encode as 4 bytes: ((value as u32) << 1). + Otherwise write as 9 bytes: 0b1 | little-endian 8 bytes u64. + """ + cdef uint64_t MAX_SMALL_VALUE = 0x7fffffff # i32::MAX as u64 + if value <= MAX_SMALL_VALUE: + # Fits in 31 bits, encode as 4 bytes with bit 0 = 0 + self.write_int32((value) << 1) + else: + # Write flag byte (0b1) followed by 8-byte u64 + self.grow(9) + ((self._c_address + self.writer_index))[0] = 0b1 + self.writer_index += 1 + self.c_buffer_ptr.UnsafePut(self.writer_index, value) + self.writer_index += 8 + + cpdef inline uint64_t read_tagged_uint64(self): + """Read unsigned fory Tagged(Small long as int) encoded uint64. + + If bit 0 of the first 4 bytes is 0, return the value >> 1. + Otherwise, skip the flag byte and read 8 bytes as uint64. + """ + cdef int32_t offset = self.reader_index + cdef uint32_t i + cdef uint64_t value + self.check_bound(offset, 4) + i = self.c_buffer_ptr.GetInt32(offset) + if (i & 0b1) != 0b1: + # Bit 0 is 0, small value encoded in 4 bytes + self.reader_index = offset + 4 + return (i >> 1) + else: + # Bit 0 is 1, big value: skip flag byte and read 8 bytes + self.check_bound(offset, 9) + self.reader_index = offset + 1 + value = self.c_buffer_ptr.GetInt64(self.reader_index) + self.reader_index += 8 + return value + cdef inline write_c_buffer(self, const uint8_t* value, int32_t length): self.write_varuint32(length) if length <= 0: # access an emtpy buffer may raise out-of-bound exception. diff --git a/python/pyfory/primitive.pxi b/python/pyfory/primitive.pxi index 92e85cd71d..ed25317779 100644 --- a/python/pyfory/primitive.pxi +++ b/python/pyfory/primitive.pxi @@ -66,6 +66,126 @@ cdef class Int64Serializer(XlangCompatibleSerializer): return buffer.read_varint64() +@cython.final +cdef class FixedInt32Serializer(XlangCompatibleSerializer): + """Serializer for fixed-width 32-bit signed integer (INT32 type_id=4).""" + cpdef inline write(self, Buffer buffer, value): + buffer.write_int32(value) + + cpdef inline read(self, Buffer buffer): + return buffer.read_int32() + + +@cython.final +cdef class FixedInt64Serializer(XlangCompatibleSerializer): + """Serializer for fixed-width 64-bit signed integer (INT64 type_id=6).""" + cpdef inline write(self, Buffer buffer, value): + buffer.write_int64(value) + + cpdef inline read(self, Buffer buffer): + return buffer.read_int64() + + +@cython.final +cdef class Varint32Serializer(XlangCompatibleSerializer): + """Serializer for VARINT32 type - variable-length encoded signed 32-bit integer.""" + cpdef inline write(self, Buffer buffer, value): + buffer.write_varint32(value) + + cpdef inline read(self, Buffer buffer): + return buffer.read_varint32() + + +@cython.final +cdef class Varint64Serializer(XlangCompatibleSerializer): + """Serializer for VARINT64 type - variable-length encoded signed 64-bit integer.""" + cpdef inline write(self, Buffer buffer, value): + buffer.write_varint64(value) + + cpdef inline read(self, Buffer buffer): + return buffer.read_varint64() + + +@cython.final +cdef class TaggedInt64Serializer(XlangCompatibleSerializer): + """Serializer for TAGGED_INT64 type - tagged encoding for signed 64-bit integer.""" + cpdef inline write(self, Buffer buffer, value): + buffer.write_tagged_int64(value) + + cpdef inline read(self, Buffer buffer): + return buffer.read_tagged_int64() + + +@cython.final +cdef class Uint8Serializer(XlangCompatibleSerializer): + """Serializer for UINT8 type - unsigned 8-bit integer.""" + cpdef inline write(self, Buffer buffer, value): + buffer.write_uint8(value) + + cpdef inline read(self, Buffer buffer): + return buffer.read_uint8() + + +@cython.final +cdef class Uint16Serializer(XlangCompatibleSerializer): + """Serializer for UINT16 type - unsigned 16-bit integer.""" + cpdef inline write(self, Buffer buffer, value): + buffer.write_uint16(value) + + cpdef inline read(self, Buffer buffer): + return buffer.read_uint16() + + +@cython.final +cdef class Uint32Serializer(XlangCompatibleSerializer): + """Serializer for UINT32 type - fixed-size unsigned 32-bit integer.""" + cpdef inline write(self, Buffer buffer, value): + buffer.write_uint32(value) + + cpdef inline read(self, Buffer buffer): + return buffer.read_uint32() + + +@cython.final +cdef class VarUint32Serializer(XlangCompatibleSerializer): + """Serializer for VAR_UINT32 type - variable-length encoded unsigned 32-bit integer.""" + cpdef inline write(self, Buffer buffer, value): + buffer.write_varuint32(value) + + cpdef inline read(self, Buffer buffer): + return buffer.read_varuint32() + + +@cython.final +cdef class Uint64Serializer(XlangCompatibleSerializer): + """Serializer for UINT64 type - fixed-size unsigned 64-bit integer.""" + cpdef inline write(self, Buffer buffer, value): + buffer.write_uint64(value) + + cpdef inline read(self, Buffer buffer): + return buffer.read_uint64() + + +@cython.final +cdef class VarUint64Serializer(XlangCompatibleSerializer): + """Serializer for VAR_UINT64 type - variable-length encoded unsigned 64-bit integer.""" + cpdef inline write(self, Buffer buffer, value): + buffer.write_varuint64(value) + + cpdef inline read(self, Buffer buffer): + return buffer.read_varuint64() + + +@cython.final +cdef class TaggedUint64Serializer(XlangCompatibleSerializer): + """Serializer for TAGGED_UINT64 type - tagged encoding for unsigned 64-bit integer.""" + cpdef inline write(self, Buffer buffer, value): + buffer.write_tagged_uint64(value) + + cpdef inline read(self, Buffer buffer): + return buffer.read_tagged_uint64() + + @cython.final cdef class Float32Serializer(XlangCompatibleSerializer): cpdef inline write(self, Buffer buffer, value): diff --git a/python/pyfory/registry.py b/python/pyfory/registry.py index 366b244f1b..daec5fc9b9 100644 --- a/python/pyfory/registry.py +++ b/python/pyfory/registry.py @@ -43,6 +43,16 @@ Int16Serializer, Int32Serializer, Int64Serializer, + FixedInt32Serializer, + FixedInt64Serializer, + TaggedInt64Serializer, + Uint8Serializer, + Uint16Serializer, + Uint32Serializer, + VarUint32Serializer, + Uint64Serializer, + VarUint64Serializer, + TaggedUint64Serializer, Float32Serializer, Float64Serializer, StringSerializer, @@ -77,6 +87,16 @@ int16, int32, int64, + fixed_int32, + fixed_int64, + tagged_int64, + uint8, + uint16, + uint32, + fixed_uint32, + uint64, + fixed_uint64, + tagged_uint64, float32, float64, is_struct_type, @@ -261,11 +281,26 @@ def _initialize_common(self): register = functools.partial(self._register_type, internal=True) register(None, type_id=TypeId.UNKNOWN, serializer=NoneSerializer) register(bool, type_id=TypeId.BOOL, serializer=BooleanSerializer) + # Signed integers + # Note: int32/int64 use VARINT32/VARINT64 for xlang compatibility (matches Java/Rust) + # fixed_int32/fixed_int64 use INT32/INT64 for fixed-width encoding register(int8, type_id=TypeId.INT8, serializer=ByteSerializer) register(int16, type_id=TypeId.INT16, serializer=Int16Serializer) - register(int32, type_id=TypeId.INT32, serializer=Int32Serializer) - register(int64, type_id=TypeId.INT64, serializer=Int64Serializer) - register(int, type_id=TypeId.INT64, serializer=Int64Serializer) + register(int32, type_id=TypeId.VARINT32, serializer=Int32Serializer) + register(fixed_int32, type_id=TypeId.INT32, serializer=FixedInt32Serializer) + register(int64, type_id=TypeId.VARINT64, serializer=Int64Serializer) + register(int, type_id=TypeId.VARINT64, serializer=Int64Serializer) + register(fixed_int64, type_id=TypeId.INT64, serializer=FixedInt64Serializer) + register(tagged_int64, type_id=TypeId.TAGGED_INT64, serializer=TaggedInt64Serializer) + # Unsigned integers + register(uint8, type_id=TypeId.UINT8, serializer=Uint8Serializer) + register(uint16, type_id=TypeId.UINT16, serializer=Uint16Serializer) + register(uint32, type_id=TypeId.VAR_UINT32, serializer=VarUint32Serializer) + register(fixed_uint32, type_id=TypeId.UINT32, serializer=Uint32Serializer) + register(uint64, type_id=TypeId.VAR_UINT64, serializer=VarUint64Serializer) + register(fixed_uint64, type_id=TypeId.UINT64, serializer=Uint64Serializer) + register(tagged_uint64, type_id=TypeId.TAGGED_UINT64, serializer=TaggedUint64Serializer) + # Floats register( float32, type_id=TypeId.FLOAT32, diff --git a/python/pyfory/serializer.py b/python/pyfory/serializer.py index 6886b5fa54..d68c5736f3 100644 --- a/python/pyfory/serializer.py +++ b/python/pyfory/serializer.py @@ -51,6 +51,18 @@ Int16Serializer, Int32Serializer, Int64Serializer, + FixedInt32Serializer, + FixedInt64Serializer, + Varint32Serializer, + Varint64Serializer, + TaggedInt64Serializer, + Uint8Serializer, + Uint16Serializer, + Uint32Serializer, + VarUint32Serializer, + Uint64Serializer, + VarUint64Serializer, + TaggedUint64Serializer, Float32Serializer, Float64Serializer, StringSerializer, @@ -75,6 +87,18 @@ Int16Serializer, Int32Serializer, Int64Serializer, + FixedInt32Serializer, + FixedInt64Serializer, + Varint32Serializer, + Varint64Serializer, + TaggedInt64Serializer, + Uint8Serializer, + Uint16Serializer, + Uint32Serializer, + VarUint32Serializer, + Uint64Serializer, + VarUint64Serializer, + TaggedUint64Serializer, Float32Serializer, Float64Serializer, StringSerializer, @@ -1263,6 +1287,18 @@ def xread(self, buffer): "Int16Serializer", "Int32Serializer", "Int64Serializer", + "FixedInt32Serializer", + "FixedInt64Serializer", + "Varint32Serializer", + "Varint64Serializer", + "TaggedInt64Serializer", + "Uint8Serializer", + "Uint16Serializer", + "Uint32Serializer", + "VarUint32Serializer", + "Uint64Serializer", + "VarUint64Serializer", + "TaggedUint64Serializer", "Float32Serializer", "Float64Serializer", "StringSerializer", diff --git a/python/pyfory/struct.py b/python/pyfory/struct.py index 86ba1c965c..af8f45fd8a 100644 --- a/python/pyfory/struct.py +++ b/python/pyfory/struct.py @@ -33,6 +33,16 @@ int16, int32, int64, + fixed_int32, + fixed_int64, + tagged_int64, + uint8, + uint16, + uint32, + fixed_uint32, + uint64, + fixed_uint64, + tagged_uint64, float32, float64, is_py_array_type, @@ -1007,12 +1017,26 @@ def _replace(self): basic_types = { bool, + # Signed integers int8, int16, int32, + fixed_int32, int64, + fixed_int64, + tagged_int64, + # Unsigned integers + uint8, + uint16, + uint32, + fixed_uint32, + uint64, + fixed_uint64, + tagged_uint64, + # Floats float32, float64, + # Python native types int, float, str, @@ -1130,10 +1154,16 @@ def sorter(item): def numeric_sorter(item): id_ = item[0] compress = id_ in { + # Signed compressed types TypeId.INT32, TypeId.INT64, TypeId.VARINT32, TypeId.VARINT64, + TypeId.TAGGED_INT64, + # Unsigned compressed types + TypeId.VAR_UINT32, + TypeId.VAR_UINT64, + TypeId.TAGGED_UINT64, } # Sort by: compress flag, -size (largest first), -type_id (higher type ID first), field_name # Java sorts by size (largest first), then by primitive type ID (descending) diff --git a/python/pyfory/types.py b/python/pyfory/types.py index 0fb14c1549..7d97d91f3e 100644 --- a/python/pyfory/types.py +++ b/python/pyfory/types.py @@ -169,9 +169,19 @@ def is_type_share_meta(type_id: int) -> bool: TypeId.NAMED_COMPATIBLE_STRUCT, } int8 = TypeVar("int8", bound=int) +uint8 = TypeVar("uint8", bound=int) int16 = TypeVar("int16", bound=int) +uint16 = TypeVar("uint16", bound=int) int32 = TypeVar("int32", bound=int) +uint32 = TypeVar("uint32", bound=int) +fixed_int32 = TypeVar("fixed_int32", bound=int) +fixed_uint32 = TypeVar("fixed_uint32", bound=int) int64 = TypeVar("int64", bound=int) +uint64 = TypeVar("uint64", bound=int) +fixed_int64 = TypeVar("fixed_int64", bound=int) +tagged_int64 = TypeVar("tagged_int64", bound=int) +fixed_uint64 = TypeVar("fixed_uint64", bound=int) +tagged_uint64 = TypeVar("tagged_uint64", bound=int) float32 = TypeVar("float32", bound=float) float64 = TypeVar("float64", bound=float) @@ -188,10 +198,23 @@ def is_type_share_meta(type_id: int) -> bool: _primitive_types_ids = { TypeId.BOOL, + # Signed integers TypeId.INT8, TypeId.INT16, TypeId.INT32, + TypeId.VARINT32, TypeId.INT64, + TypeId.VARINT64, + TypeId.TAGGED_INT64, + # Unsigned integers + TypeId.UINT8, + TypeId.UINT16, + TypeId.UINT32, + TypeId.VAR_UINT32, + TypeId.UINT64, + TypeId.VAR_UINT64, + TypeId.TAGGED_UINT64, + # Floats TypeId.FLOAT16, TypeId.FLOAT32, TypeId.FLOAT64, @@ -209,12 +232,23 @@ def is_primitive_type(type_) -> bool: _primitive_type_sizes = { TypeId.BOOL: 1, + # Signed integers TypeId.INT8: 1, TypeId.INT16: 2, TypeId.INT32: 4, TypeId.VARINT32: 4, TypeId.INT64: 8, TypeId.VARINT64: 8, + TypeId.TAGGED_INT64: 8, + # Unsigned integers + TypeId.UINT8: 1, + TypeId.UINT16: 2, + TypeId.UINT32: 4, + TypeId.VAR_UINT32: 4, + TypeId.UINT64: 8, + TypeId.VAR_UINT64: 8, + TypeId.TAGGED_UINT64: 8, + # Floats TypeId.FLOAT16: 2, TypeId.FLOAT32: 4, TypeId.FLOAT64: 8, From 852d92c2ff95b3034fe629f2cb3118155f089ed4 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 7 Jan 2026 17:22:46 +0800 Subject: [PATCH 13/44] add javadoc to annotation --- .../org/apache/fory/annotation/Int32Type.java | 57 +++++++++++++++ .../org/apache/fory/annotation/Int64Type.java | 59 +++++++++++++++ .../apache/fory/annotation/Uint16Type.java | 45 ++++++++++++ .../apache/fory/annotation/Uint32Type.java | 66 +++++++++++++++++ .../apache/fory/annotation/Uint64Type.java | 71 +++++++++++++++++++ .../org/apache/fory/annotation/Uint8Type.java | 45 ++++++++++++ 6 files changed, 343 insertions(+) diff --git a/java/fory-core/src/main/java/org/apache/fory/annotation/Int32Type.java b/java/fory-core/src/main/java/org/apache/fory/annotation/Int32Type.java index e0adcfc14a..7d8048feb6 100644 --- a/java/fory-core/src/main/java/org/apache/fory/annotation/Int32Type.java +++ b/java/fory-core/src/main/java/org/apache/fory/annotation/Int32Type.java @@ -1,5 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.fory.annotation; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * Annotation to specify encoding options for 32-bit signed integer fields. + * + *

    When applied to a field of type {@code int} or {@code Integer}, this annotation controls how + * the value is serialized: + * + *

      + *
    • {@code compress=true} (default): Uses variable-length encoding (VARINT32, type_id=5) which + * is more compact for small values + *
    • {@code compress=false}: Uses fixed 4-byte encoding (INT32, type_id=4) which has consistent + * size + *
    + * + *

    Example usage: + * + *

    {@code
    + * public class MyStruct {
    + *   @Int32Type(compress = true)  // Uses varint encoding (default)
    + *   int compactId;
    + *
    + *   @Int32Type(compress = false) // Uses fixed 4-byte encoding
    + *   int fixedId;
    + * }
    + * }
    + */ +@Retention(RetentionPolicy.RUNTIME) +@Target({ElementType.FIELD, ElementType.METHOD}) public @interface Int32Type { + /** + * Whether to use variable-length compression for this int32 field. + * + * @return true to use VARINT32 encoding (compact for small values), false to use fixed INT32 + * encoding (4 bytes) + */ boolean compress() default true; } diff --git a/java/fory-core/src/main/java/org/apache/fory/annotation/Int64Type.java b/java/fory-core/src/main/java/org/apache/fory/annotation/Int64Type.java index 60772286d8..1c793bc5be 100644 --- a/java/fory-core/src/main/java/org/apache/fory/annotation/Int64Type.java +++ b/java/fory-core/src/main/java/org/apache/fory/annotation/Int64Type.java @@ -1,7 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.fory.annotation; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; import org.apache.fory.config.LongEncoding; +/** + * Annotation to specify encoding options for 64-bit signed integer fields. + * + *

    When applied to a field of type {@code long} or {@code Long}, this annotation controls how the + * value is serialized using different encoding strategies: + * + *

      + *
    • {@link LongEncoding#VARINT64} (default): Variable-length encoding, compact for small values + * (type_id=7) + *
    • {@link LongEncoding#INT64}: Fixed 8-byte encoding, consistent size (type_id=6) + *
    • {@link LongEncoding#TAGGED_INT64}: Tagged encoding that uses 4 bytes for values in range + * [-1073741824, 1073741823], otherwise 9 bytes (type_id=8) + *
    + * + *

    Example usage: + * + *

    {@code
    + * public class MyStruct {
    + *   @Int64Type(encoding = LongEncoding.VARINT64)  // Variable-length (default)
    + *   long compactId;
    + *
    + *   @Int64Type(encoding = LongEncoding.INT64)     // Fixed 8-byte
    + *   long fixedTimestamp;
    + *
    + *   @Int64Type(encoding = LongEncoding.TAGGED_INT64) // Tagged encoding
    + *   long taggedValue;
    + * }
    + * }
    + */ +@Retention(RetentionPolicy.RUNTIME) +@Target({ElementType.FIELD, ElementType.METHOD}) public @interface Int64Type { + /** + * The encoding strategy to use for this int64 field. + * + * @return the encoding type for serialization + */ LongEncoding encoding() default LongEncoding.VARINT64; } diff --git a/java/fory-core/src/main/java/org/apache/fory/annotation/Uint16Type.java b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint16Type.java index 1e6aacc074..f93d89d660 100644 --- a/java/fory-core/src/main/java/org/apache/fory/annotation/Uint16Type.java +++ b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint16Type.java @@ -1,3 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.fory.annotation; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * Annotation to mark a field as an unsigned 16-bit integer. + * + *

    When applied to a field of type {@code short}, {@code int}, or {@code char}, this annotation + * indicates that the value should be serialized as an unsigned 16-bit integer (UINT16, type_id=10) + * with a valid range of [0, 65535]. + * + *

    This is useful for compatibility with languages that have native unsigned integer types (e.g., + * Rust's u16, Go's uint16, C++'s uint16_t). + * + *

    Example usage: + * + *

    {@code
    + * public class MyStruct {
    + *   @Uint16Type
    + *   int port;  // Will be serialized as unsigned 16-bit [0, 65535]
    + * }
    + * }
    + */ +@Retention(RetentionPolicy.RUNTIME) +@Target({ElementType.FIELD, ElementType.METHOD}) public @interface Uint16Type {} diff --git a/java/fory-core/src/main/java/org/apache/fory/annotation/Uint32Type.java b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint32Type.java index 47953f2926..2470567e33 100644 --- a/java/fory-core/src/main/java/org/apache/fory/annotation/Uint32Type.java +++ b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint32Type.java @@ -1,5 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.fory.annotation; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * Annotation to mark a field as an unsigned 32-bit integer. + * + *

    When applied to a field of type {@code int} or {@code long}, this annotation indicates that + * the value should be serialized as an unsigned 32-bit integer with a valid range of [0, + * 4294967295]. + * + *

      + *
    • {@code compress=true} (default): Uses variable-length encoding (VAR_UINT32, type_id=12) + * which is more compact for small values + *
    • {@code compress=false}: Uses fixed 4-byte encoding (UINT32, type_id=11) which has + * consistent size + *
    + * + *

    Benefits: + * + *

      + *
    • With {@code compress=true}: skips zigzag encoding overhead for non-negative values + *
    • Compatible with languages that have native unsigned integer types (e.g., Rust's u32, Go's + * uint32, C++'s uint32_t) + *
    + * + *

    Example usage: + * + *

    {@code
    + * public class MyStruct {
    + *   @Uint32Type(compress = true)  // Uses varuint encoding (default)
    + *   long compactCount;
    + *
    + *   @Uint32Type(compress = false) // Uses fixed 4-byte encoding
    + *   long fixedCount;
    + * }
    + * }
    + */ +@Retention(RetentionPolicy.RUNTIME) +@Target({ElementType.FIELD, ElementType.METHOD}) public @interface Uint32Type { + /** + * Whether to use variable-length compression for this uint32 field. + * + * @return true to use VAR_UINT32 encoding (compact for small values), false to use fixed UINT32 + * encoding (4 bytes) + */ boolean compress() default true; } diff --git a/java/fory-core/src/main/java/org/apache/fory/annotation/Uint64Type.java b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint64Type.java index 12ed5482eb..1faac580a2 100644 --- a/java/fory-core/src/main/java/org/apache/fory/annotation/Uint64Type.java +++ b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint64Type.java @@ -1,7 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.fory.annotation; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; import org.apache.fory.config.LongEncoding; +/** + * Annotation to mark a field as an unsigned 64-bit integer. + * + *

    When applied to a field of type {@code long}, this annotation indicates that the value should + * be serialized as an unsigned 64-bit integer with a valid range of [0, 18446744073709551615]. + * + *

    Different encoding strategies are available: + * + *

      + *
    • {@link LongEncoding#VARINT64} (default): Variable-length encoding (VAR_UINT64, type_id=14), + * compact for small values + *
    • {@link LongEncoding#INT64}: Fixed 8-byte encoding (UINT64, type_id=13), consistent size + *
    • {@link LongEncoding#TAGGED_INT64}: Tagged encoding (TAGGED_UINT64, type_id=15) that uses 4 + * bytes for values in range [0, 2147483647], otherwise 9 bytes + *
    + * + *

    Benefits: + * + *

      + *
    • With {@link LongEncoding#VARINT64}: skips zigzag encoding overhead for non-negative values + *
    • With {@link LongEncoding#TAGGED_INT64}: uses unsigned range [0, 2147483647] for 4-byte + * encoding instead of signed range [-1073741824, 1073741823] + *
    • Compatible with languages that have native unsigned integer types (e.g., Rust's u64, Go's + * uint64, C++'s uint64_t) + *
    + * + *

    Example usage: + * + *

    {@code
    + * public class MyStruct {
    + *   @Uint64Type(encoding = LongEncoding.VARINT64)  // Variable-length (default)
    + *   long compactId;
    + *
    + *   @Uint64Type(encoding = LongEncoding.INT64)     // Fixed 8-byte
    + *   long fixedTimestamp;
    + *
    + *   @Uint64Type(encoding = LongEncoding.TAGGED_INT64) // Tagged encoding
    + *   long taggedValue;
    + * }
    + * }
    + */ +@Retention(RetentionPolicy.RUNTIME) +@Target({ElementType.FIELD, ElementType.METHOD}) public @interface Uint64Type { + /** + * The encoding strategy to use for this uint64 field. + * + * @return the encoding type for serialization + */ LongEncoding encoding() default LongEncoding.VARINT64; } diff --git a/java/fory-core/src/main/java/org/apache/fory/annotation/Uint8Type.java b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint8Type.java index d22bccd585..2393278a2b 100644 --- a/java/fory-core/src/main/java/org/apache/fory/annotation/Uint8Type.java +++ b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint8Type.java @@ -1,3 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.fory.annotation; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * Annotation to mark a field as an unsigned 8-bit integer. + * + *

    When applied to a field of type {@code byte}, {@code short}, or {@code int}, this annotation + * indicates that the value should be serialized as an unsigned 8-bit integer (UINT8, type_id=9) + * with a valid range of [0, 255]. + * + *

    This is useful for compatibility with languages that have native unsigned integer types (e.g., + * Rust's u8, Go's uint8, C++'s uint8_t). + * + *

    Example usage: + * + *

    {@code
    + * public class MyStruct {
    + *   @Uint8Type
    + *   short flags;  // Will be serialized as unsigned 8-bit [0, 255]
    + * }
    + * }
    + */ +@Retention(RetentionPolicy.RUNTIME) +@Target({ElementType.FIELD, ElementType.METHOD}) public @interface Uint8Type {} From 51688b0bb191db3fd105df03f0870b95666c625e Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 7 Jan 2026 17:46:02 +0800 Subject: [PATCH 14/44] add unsgined fields xlang tests --- cpp/fory/serialization/xlang_test_main.cc | 250 ++++++++++++++++++ cpp/fory/util/buffer.h | 79 ++++++ go/fory/buffer.go | 103 ++++++++ go/fory/tests/xlang/xlang_test_main.go | 195 ++++++++++++++ .../org/apache/fory/annotation/Int64Type.java | 4 +- .../apache/fory/annotation/Uint64Type.java | 5 +- .../org/apache/fory/xlang/XlangTestBase.java | 209 +++++++++++++++ python/pyfory/tests/xlang_test_main.py | 175 ++++++++++++ rust/tests/tests/test_cross_language.rs | 145 ++++++++++ 9 files changed, 1161 insertions(+), 4 deletions(-) diff --git a/cpp/fory/serialization/xlang_test_main.cc b/cpp/fory/serialization/xlang_test_main.cc index d0558f0722..bd15fe4cba 100644 --- a/cpp/fory/serialization/xlang_test_main.cc +++ b/cpp/fory/serialization/xlang_test_main.cc @@ -580,6 +580,103 @@ struct CircularRefStruct { FORY_STRUCT(CircularRefStruct, name, selfRef); FORY_FIELD_TAGS(CircularRefStruct, (name, 0), (selfRef, 1, nullable, ref)); +// ============================================================================ +// Unsigned Number Test Types +// ============================================================================ + +// UnsignedSchemaConsistent (type id 501) +// Test struct for unsigned numbers in SCHEMA_CONSISTENT mode. +// All fields use the same nullability as Java. +// Note: C++ uses std::optional for nullable fields. +struct UnsignedSchemaConsistent { + // Primitive unsigned fields (non-nullable) + uint8_t u8; + uint16_t u16; + uint32_t u32Var; // VAR_UINT32 - variable-length + uint32_t u32Fixed; // UINT32 - fixed 4-byte + uint64_t u64Var; // VAR_UINT64 - variable-length + uint64_t u64Fixed; // UINT64 - fixed 8-byte + uint64_t u64Tagged; // TAGGED_UINT64 + + // Nullable unsigned fields (using std::optional) + std::optional u8Nullable; + std::optional u16Nullable; + std::optional u32VarNullable; + std::optional u32FixedNullable; + std::optional u64VarNullable; + std::optional u64FixedNullable; + std::optional u64TaggedNullable; + + bool operator==(const UnsignedSchemaConsistent &other) const { + return u8 == other.u8 && u16 == other.u16 && u32Var == other.u32Var && + u32Fixed == other.u32Fixed && u64Var == other.u64Var && + u64Fixed == other.u64Fixed && u64Tagged == other.u64Tagged && + u8Nullable == other.u8Nullable && u16Nullable == other.u16Nullable && + u32VarNullable == other.u32VarNullable && + u32FixedNullable == other.u32FixedNullable && + u64VarNullable == other.u64VarNullable && + u64FixedNullable == other.u64FixedNullable && + u64TaggedNullable == other.u64TaggedNullable; + } +}; +FORY_STRUCT(UnsignedSchemaConsistent, u8, u16, u32Var, u32Fixed, u64Var, + u64Fixed, u64Tagged, u8Nullable, u16Nullable, u32VarNullable, + u32FixedNullable, u64VarNullable, u64FixedNullable, + u64TaggedNullable); +FORY_FIELD_TAGS(UnsignedSchemaConsistent, (u8, 0), (u16, 1), (u32Var, 2), + (u32Fixed, 3), (u64Var, 4), (u64Fixed, 5), (u64Tagged, 6), + (u8Nullable, 7, nullable), (u16Nullable, 8, nullable), + (u32VarNullable, 9, nullable), (u32FixedNullable, 10, nullable), + (u64VarNullable, 11, nullable), + (u64FixedNullable, 12, nullable), + (u64TaggedNullable, 13, nullable)); + +// UnsignedSchemaCompatible (type id 502) +// Test struct for unsigned numbers in COMPATIBLE mode. +// Group 1: std::optional types (nullable in C++, non-nullable in Java) +// Group 2: Non-optional types with Field2 suffix (non-nullable in C++, nullable +// in Java) +struct UnsignedSchemaCompatible { + // Group 1: Nullable in C++ (std::optional), non-nullable in Java + std::optional u8; + std::optional u16; + std::optional u32Var; + std::optional u32Fixed; + std::optional u64Var; + std::optional u64Fixed; + std::optional u64Tagged; + + // Group 2: Non-nullable in C++, nullable in Java + uint8_t u8Field2; + uint16_t u16Field2; + uint32_t u32VarField2; + uint32_t u32FixedField2; + uint64_t u64VarField2; + uint64_t u64FixedField2; + uint64_t u64TaggedField2; + + bool operator==(const UnsignedSchemaCompatible &other) const { + return u8 == other.u8 && u16 == other.u16 && u32Var == other.u32Var && + u32Fixed == other.u32Fixed && u64Var == other.u64Var && + u64Fixed == other.u64Fixed && u64Tagged == other.u64Tagged && + u8Field2 == other.u8Field2 && u16Field2 == other.u16Field2 && + u32VarField2 == other.u32VarField2 && + u32FixedField2 == other.u32FixedField2 && + u64VarField2 == other.u64VarField2 && + u64FixedField2 == other.u64FixedField2 && + u64TaggedField2 == other.u64TaggedField2; + } +}; +FORY_STRUCT(UnsignedSchemaCompatible, u8, u16, u32Var, u32Fixed, u64Var, + u64Fixed, u64Tagged, u8Field2, u16Field2, u32VarField2, + u32FixedField2, u64VarField2, u64FixedField2, u64TaggedField2); +FORY_FIELD_TAGS(UnsignedSchemaCompatible, (u8, 0, nullable), (u16, 1, nullable), + (u32Var, 2, nullable), (u32Fixed, 3, nullable), + (u64Var, 4, nullable), (u64Fixed, 5, nullable), + (u64Tagged, 6, nullable), (u8Field2, 7), (u16Field2, 8), + (u32VarField2, 9), (u32FixedField2, 10), (u64VarField2, 11), + (u64FixedField2, 12), (u64TaggedField2, 13)); + namespace fory { namespace serialization { @@ -760,6 +857,8 @@ void RunTestRefSchemaConsistent(const std::string &data_file); void RunTestRefCompatible(const std::string &data_file); void RunTestCircularRefSchemaConsistent(const std::string &data_file); void RunTestCircularRefCompatible(const std::string &data_file); +void RunTestUnsignedSchemaConsistent(const std::string &data_file); +void RunTestUnsignedSchemaCompatible(const std::string &data_file); } // namespace int main(int argc, char **argv) { @@ -859,6 +958,10 @@ int main(int argc, char **argv) { RunTestCircularRefSchemaConsistent(data_file); } else if (case_name == "test_circular_ref_compatible") { RunTestCircularRefCompatible(data_file); + } else if (case_name == "test_unsigned_schema_consistent") { + RunTestUnsignedSchemaConsistent(data_file); + } else if (case_name == "test_unsigned_schema_compatible") { + RunTestUnsignedSchemaCompatible(data_file); } else { Fail("Unknown test case: " + case_name); } @@ -2453,4 +2556,151 @@ void RunTestCircularRefCompatible(const std::string &data_file) { WriteFile(data_file, out); } +// ============================================================================ +// Unsigned Number Tests +// ============================================================================ + +void RunTestUnsignedSchemaConsistent(const std::string &data_file) { + auto bytes = ReadFile(data_file); + // SCHEMA_CONSISTENT mode: compatible=false, xlang=true + auto fory = BuildFory(false, true, false, false); + EnsureOk(fory.register_struct(501), + "register UnsignedSchemaConsistent"); + + Buffer buffer = MakeBuffer(bytes); + auto obj = ReadNext(fory, buffer); + + // Verify primitive unsigned fields + if (obj.u8 != 200) { + Fail("UnsignedSchemaConsistent: u8 should be 200, got " + + std::to_string(obj.u8)); + } + if (obj.u16 != 60000) { + Fail("UnsignedSchemaConsistent: u16 should be 60000, got " + + std::to_string(obj.u16)); + } + if (obj.u32Var != 3000000000) { + Fail("UnsignedSchemaConsistent: u32Var should be 3000000000, got " + + std::to_string(obj.u32Var)); + } + if (obj.u32Fixed != 4000000000) { + Fail("UnsignedSchemaConsistent: u32Fixed should be 4000000000, got " + + std::to_string(obj.u32Fixed)); + } + if (obj.u64Var != 10000000000) { + Fail("UnsignedSchemaConsistent: u64Var should be 10000000000, got " + + std::to_string(obj.u64Var)); + } + if (obj.u64Fixed != 15000000000) { + Fail("UnsignedSchemaConsistent: u64Fixed should be 15000000000, got " + + std::to_string(obj.u64Fixed)); + } + if (obj.u64Tagged != 1000000000) { + Fail("UnsignedSchemaConsistent: u64Tagged should be 1000000000, got " + + std::to_string(obj.u64Tagged)); + } + + // Verify nullable unsigned fields + if (!obj.u8Nullable.has_value() || obj.u8Nullable.value() != 128) { + Fail("UnsignedSchemaConsistent: u8Nullable should be 128"); + } + if (!obj.u16Nullable.has_value() || obj.u16Nullable.value() != 40000) { + Fail("UnsignedSchemaConsistent: u16Nullable should be 40000"); + } + if (!obj.u32VarNullable.has_value() || + obj.u32VarNullable.value() != 2500000000) { + Fail("UnsignedSchemaConsistent: u32VarNullable should be 2500000000"); + } + if (!obj.u32FixedNullable.has_value() || + obj.u32FixedNullable.value() != 3500000000) { + Fail("UnsignedSchemaConsistent: u32FixedNullable should be 3500000000"); + } + if (!obj.u64VarNullable.has_value() || + obj.u64VarNullable.value() != 8000000000) { + Fail("UnsignedSchemaConsistent: u64VarNullable should be 8000000000"); + } + if (!obj.u64FixedNullable.has_value() || + obj.u64FixedNullable.value() != 12000000000) { + Fail("UnsignedSchemaConsistent: u64FixedNullable should be 12000000000"); + } + if (!obj.u64TaggedNullable.has_value() || + obj.u64TaggedNullable.value() != 500000000) { + Fail("UnsignedSchemaConsistent: u64TaggedNullable should be 500000000"); + } + + // Re-serialize and write back + std::vector out; + AppendSerialized(fory, obj, out); + WriteFile(data_file, out); +} + +void RunTestUnsignedSchemaCompatible(const std::string &data_file) { + auto bytes = ReadFile(data_file); + // COMPATIBLE mode: compatible=true, xlang=true + auto fory = BuildFory(true, true, false, false); + EnsureOk(fory.register_struct(502), + "register UnsignedSchemaCompatible"); + + Buffer buffer = MakeBuffer(bytes); + auto obj = ReadNext(fory, buffer); + + // Verify Group 1: Nullable fields (values from Java's non-nullable fields) + if (!obj.u8.has_value() || obj.u8.value() != 200) { + Fail("UnsignedSchemaCompatible: u8 should be 200"); + } + if (!obj.u16.has_value() || obj.u16.value() != 60000) { + Fail("UnsignedSchemaCompatible: u16 should be 60000"); + } + if (!obj.u32Var.has_value() || obj.u32Var.value() != 3000000000) { + Fail("UnsignedSchemaCompatible: u32Var should be 3000000000"); + } + if (!obj.u32Fixed.has_value() || obj.u32Fixed.value() != 4000000000) { + Fail("UnsignedSchemaCompatible: u32Fixed should be 4000000000"); + } + if (!obj.u64Var.has_value() || obj.u64Var.value() != 10000000000) { + Fail("UnsignedSchemaCompatible: u64Var should be 10000000000"); + } + if (!obj.u64Fixed.has_value() || obj.u64Fixed.value() != 15000000000) { + Fail("UnsignedSchemaCompatible: u64Fixed should be 15000000000"); + } + if (!obj.u64Tagged.has_value() || obj.u64Tagged.value() != 1000000000) { + Fail("UnsignedSchemaCompatible: u64Tagged should be 1000000000"); + } + + // Verify Group 2: Non-nullable fields (values from Java's nullable fields) + if (obj.u8Field2 != 128) { + Fail("UnsignedSchemaCompatible: u8Field2 should be 128, got " + + std::to_string(obj.u8Field2)); + } + if (obj.u16Field2 != 40000) { + Fail("UnsignedSchemaCompatible: u16Field2 should be 40000, got " + + std::to_string(obj.u16Field2)); + } + if (obj.u32VarField2 != 2500000000) { + Fail("UnsignedSchemaCompatible: u32VarField2 should be 2500000000, got " + + std::to_string(obj.u32VarField2)); + } + if (obj.u32FixedField2 != 3500000000) { + Fail("UnsignedSchemaCompatible: u32FixedField2 should be 3500000000, got " + + std::to_string(obj.u32FixedField2)); + } + if (obj.u64VarField2 != 8000000000) { + Fail("UnsignedSchemaCompatible: u64VarField2 should be 8000000000, got " + + std::to_string(obj.u64VarField2)); + } + if (obj.u64FixedField2 != 12000000000) { + Fail("UnsignedSchemaCompatible: u64FixedField2 should be 12000000000, got " + + std::to_string(obj.u64FixedField2)); + } + if (obj.u64TaggedField2 != 500000000) { + Fail("UnsignedSchemaCompatible: u64TaggedField2 should be 500000000, got " + + std::to_string(obj.u64TaggedField2)); + } + + // Re-serialize and write back + std::vector out; + AppendSerialized(fory, obj, out); + WriteFile(data_file, out); +} + } // namespace diff --git a/cpp/fory/util/buffer.h b/cpp/fory/util/buffer.h index 9ff5f64e30..76350a1864 100644 --- a/cpp/fory/util/buffer.h +++ b/cpp/fory/util/buffer.h @@ -753,6 +753,85 @@ class Buffer { return static_cast((raw >> 1) ^ (~(raw & 1) + 1)); } + /// Write int64_t value using tagged encoding. + /// If value is in [-1073741824, 1073741823], encode as 4 bytes: ((value as + /// i32) << 1). Otherwise write as 9 bytes: 0b1 | little-endian 8 bytes i64. + FORY_ALWAYS_INLINE void WriteTaggedInt64(int64_t value) { + constexpr int64_t HALF_MIN_INT_VALUE = -1073741824; // INT32_MIN / 2 + constexpr int64_t HALF_MAX_INT_VALUE = 1073741823; // INT32_MAX / 2 + if (value >= HALF_MIN_INT_VALUE && value <= HALF_MAX_INT_VALUE) { + WriteInt32(static_cast(value) << 1); + } else { + Grow(9); + data_[writer_index_] = 0b1; + UnsafePut(writer_index_ + 1, value); + IncreaseWriterIndex(9); + } + } + + /// Read int64_t value using tagged encoding. Sets error on bounds violation. + /// If bit 0 is 0, return value >> 1 (arithmetic shift). + /// Otherwise, skip flag byte and read 8 bytes as int64. + FORY_ALWAYS_INLINE int64_t ReadTaggedInt64(Error &error) { + if (FORY_PREDICT_FALSE(reader_index_ + 4 > size_)) { + error.set_buffer_out_of_bound(reader_index_, 4, size_); + return 0; + } + int32_t i = reinterpret_cast(data_ + reader_index_)[0]; + if ((i & 0b1) != 0b1) { + reader_index_ += 4; + return static_cast(i >> 1); // arithmetic right shift + } else { + if (FORY_PREDICT_FALSE(reader_index_ + 9 > size_)) { + error.set_buffer_out_of_bound(reader_index_, 9, size_); + return 0; + } + int64_t value = + reinterpret_cast(data_ + reader_index_ + 1)[0]; + reader_index_ += 9; + return value; + } + } + + /// Write uint64_t value using tagged encoding. + /// If value is in [0, 0x7fffffff], encode as 4 bytes: ((value as u32) << 1). + /// Otherwise write as 9 bytes: 0b1 | little-endian 8 bytes u64. + FORY_ALWAYS_INLINE void WriteTaggedUint64(uint64_t value) { + constexpr uint64_t MAX_SMALL_VALUE = 0x7fffffff; // INT32_MAX as u64 + if (value <= MAX_SMALL_VALUE) { + WriteInt32(static_cast(value) << 1); + } else { + Grow(9); + data_[writer_index_] = 0b1; + UnsafePut(writer_index_ + 1, value); + IncreaseWriterIndex(9); + } + } + + /// Read uint64_t value using tagged encoding. Sets error on bounds violation. + /// If bit 0 is 0, return value >> 1. + /// Otherwise, skip flag byte and read 8 bytes as uint64. + FORY_ALWAYS_INLINE uint64_t ReadTaggedUint64(Error &error) { + if (FORY_PREDICT_FALSE(reader_index_ + 4 > size_)) { + error.set_buffer_out_of_bound(reader_index_, 4, size_); + return 0; + } + uint32_t i = reinterpret_cast(data_ + reader_index_)[0]; + if ((i & 0b1) != 0b1) { + reader_index_ += 4; + return static_cast(i >> 1); + } else { + if (FORY_PREDICT_FALSE(reader_index_ + 9 > size_)) { + error.set_buffer_out_of_bound(reader_index_, 9, size_); + return 0; + } + uint64_t value = + reinterpret_cast(data_ + reader_index_ + 1)[0]; + reader_index_ += 9; + return value; + } + } + /// Read uint64_t value as varuint36small. Sets error on bounds violation. FORY_ALWAYS_INLINE uint64_t ReadVarUint36Small(Error &error) { if (FORY_PREDICT_FALSE(reader_index_ + 1 > size_)) { diff --git a/go/fory/buffer.go b/go/fory/buffer.go index 8db021c990..f6aa84f3b4 100644 --- a/go/fory/buffer.go +++ b/go/fory/buffer.go @@ -1037,6 +1037,109 @@ func (b *ByteBuffer) ReadVarint64(err *Error) int64 { return v } +// WriteTaggedInt64 writes int64 using tagged encoding. +// If value is in [-1073741824, 1073741823], encode as 4 bytes: ((value as i32) << 1). +// Otherwise write as 9 bytes: 0b1 | little-endian 8 bytes i64. +func (b *ByteBuffer) WriteTaggedInt64(value int64) { + const halfMinIntValue int64 = -1073741824 // INT32_MIN / 2 + const halfMaxIntValue int64 = 1073741823 // INT32_MAX / 2 + if value >= halfMinIntValue && value <= halfMaxIntValue { + b.WriteInt32(int32(value) << 1) + } else { + b.grow(9) + b.data[b.writerIndex] = 0b1 + if isLittleEndian { + *(*int64)(unsafe.Pointer(&b.data[b.writerIndex+1])) = value + } else { + binary.LittleEndian.PutUint64(b.data[b.writerIndex+1:], uint64(value)) + } + b.writerIndex += 9 + } +} + +// ReadTaggedInt64 reads int64 using tagged encoding. +// If bit 0 is 0, return value >> 1 (arithmetic shift). +// Otherwise, skip flag byte and read 8 bytes as int64. +func (b *ByteBuffer) ReadTaggedInt64(err *Error) int64 { + if b.readerIndex+4 > len(b.data) { + *err = BufferOutOfBoundError(b.readerIndex, 4, len(b.data)) + return 0 + } + var i int32 + if isLittleEndian { + i = *(*int32)(unsafe.Pointer(&b.data[b.readerIndex])) + } else { + i = int32(binary.LittleEndian.Uint32(b.data[b.readerIndex:])) + } + if (i & 0b1) != 0b1 { + b.readerIndex += 4 + return int64(i >> 1) // arithmetic right shift + } + if b.readerIndex+9 > len(b.data) { + *err = BufferOutOfBoundError(b.readerIndex, 9, len(b.data)) + return 0 + } + var value int64 + if isLittleEndian { + value = *(*int64)(unsafe.Pointer(&b.data[b.readerIndex+1])) + } else { + value = int64(binary.LittleEndian.Uint64(b.data[b.readerIndex+1:])) + } + b.readerIndex += 9 + return value +} + +// WriteTaggedUint64 writes uint64 using tagged encoding. +// If value is in [0, 0x7fffffff], encode as 4 bytes: ((value as u32) << 1). +// Otherwise write as 9 bytes: 0b1 | little-endian 8 bytes u64. +func (b *ByteBuffer) WriteTaggedUint64(value uint64) { + const maxSmallValue uint64 = 0x7fffffff // INT32_MAX as u64 + if value <= maxSmallValue { + b.WriteInt32(int32(value) << 1) + } else { + b.grow(9) + b.data[b.writerIndex] = 0b1 + if isLittleEndian { + *(*uint64)(unsafe.Pointer(&b.data[b.writerIndex+1])) = value + } else { + binary.LittleEndian.PutUint64(b.data[b.writerIndex+1:], value) + } + b.writerIndex += 9 + } +} + +// ReadTaggedUint64 reads uint64 using tagged encoding. +// If bit 0 is 0, return value >> 1. +// Otherwise, skip flag byte and read 8 bytes as uint64. +func (b *ByteBuffer) ReadTaggedUint64(err *Error) uint64 { + if b.readerIndex+4 > len(b.data) { + *err = BufferOutOfBoundError(b.readerIndex, 4, len(b.data)) + return 0 + } + var i uint32 + if isLittleEndian { + i = *(*uint32)(unsafe.Pointer(&b.data[b.readerIndex])) + } else { + i = binary.LittleEndian.Uint32(b.data[b.readerIndex:]) + } + if (i & 0b1) != 0b1 { + b.readerIndex += 4 + return uint64(i >> 1) + } + if b.readerIndex+9 > len(b.data) { + *err = BufferOutOfBoundError(b.readerIndex, 9, len(b.data)) + return 0 + } + var value uint64 + if isLittleEndian { + value = *(*uint64)(unsafe.Pointer(&b.data[b.readerIndex+1])) + } else { + value = binary.LittleEndian.Uint64(b.data[b.readerIndex+1:]) + } + b.readerIndex += 9 + return value +} + // ReadVaruint64 reads unsigned varint // //go:inline diff --git a/go/fory/tests/xlang/xlang_test_main.go b/go/fory/tests/xlang/xlang_test_main.go index 503d75a01c..d7a2d60015 100644 --- a/go/fory/tests/xlang/xlang_test_main.go +++ b/go/fory/tests/xlang/xlang_test_main.go @@ -2120,6 +2120,197 @@ func testCircularRefCompatible() { writeFile(dataFile, serialized) } +// ============================================================================ +// Unsigned Number Test Types +// ============================================================================ + +// UnsignedSchemaConsistent - Test struct for unsigned numbers in SCHEMA_CONSISTENT mode. +// All fields use the same nullability as Java. +// Note: Go currently only supports uint8, uint16, uint32 (VAR_UINT32), uint64 (VAR_UINT64). +// Fixed and tagged encodings require fory encoding tags (TODO). +// Matches Java's UnsignedSchemaConsistent (type id 501) +type UnsignedSchemaConsistent struct { + // Primitive unsigned fields (non-nullable) + U8 uint8 // UINT8 - fixed 8-bit + U16 uint16 // UINT16 - fixed 16-bit + U32Var uint32 // VAR_UINT32 - variable-length + U32Fixed uint32 // Should be UINT32 (fixed) - TODO: add encoding tag + U64Var uint64 // VAR_UINT64 - variable-length + U64Fixed uint64 // Should be UINT64 (fixed) - TODO: add encoding tag + U64Tagged uint64 // Should be TAGGED_UINT64 - TODO: add encoding tag + + // Nullable unsigned fields (pointers) + U8Nullable *uint8 `fory:"nullable"` + U16Nullable *uint16 `fory:"nullable"` + U32VarNullable *uint32 `fory:"nullable"` + U32FixedNullable *uint32 `fory:"nullable"` + U64VarNullable *uint64 `fory:"nullable"` + U64FixedNullable *uint64 `fory:"nullable"` + U64TaggedNullable *uint64 `fory:"nullable"` +} + +// UnsignedSchemaCompatible - Test struct for unsigned numbers in COMPATIBLE mode. +// Group 1: Pointer types (nullable in Go, non-nullable in Java) +// Group 2: Non-pointer types with Field2 suffix (non-nullable in Go, nullable in Java) +// Matches Java's UnsignedSchemaCompatible (type id 502) +type UnsignedSchemaCompatible struct { + // Group 1: Nullable in Go (pointers), non-nullable in Java + U8 *uint8 `fory:"nullable"` + U16 *uint16 `fory:"nullable"` + U32Var *uint32 `fory:"nullable"` + U32Fixed *uint32 `fory:"nullable"` + U64Var *uint64 `fory:"nullable"` + U64Fixed *uint64 `fory:"nullable"` + U64Tagged *uint64 `fory:"nullable"` + + // Group 2: Non-nullable in Go, nullable in Java + U8Field2 uint8 + U16Field2 uint16 + U32VarField2 uint32 + U32FixedField2 uint32 + U64VarField2 uint64 + U64FixedField2 uint64 + U64TaggedField2 uint64 +} + +func getUnsignedSchemaConsistent(obj interface{}) UnsignedSchemaConsistent { + switch v := obj.(type) { + case UnsignedSchemaConsistent: + return v + case *UnsignedSchemaConsistent: + return *v + default: + panic(fmt.Sprintf("expected UnsignedSchemaConsistent, got %T", obj)) + } +} + +func getUnsignedSchemaCompatible(obj interface{}) UnsignedSchemaCompatible { + switch v := obj.(type) { + case UnsignedSchemaCompatible: + return v + case *UnsignedSchemaCompatible: + return *v + default: + panic(fmt.Sprintf("expected UnsignedSchemaCompatible, got %T", obj)) + } +} + +// ============================================================================ +// Unsigned Number Tests +// ============================================================================ + +func testUnsignedSchemaConsistent() { + dataFile := getDataFile() + data := readFile(dataFile) + + f := fory.New(fory.WithXlang(true), fory.WithCompatible(false)) + f.Register(UnsignedSchemaConsistent{}, 501) + + buf := fory.NewByteBuffer(data) + var obj interface{} + err := f.DeserializeWithCallbackBuffers(buf, &obj, nil) + if err != nil { + panic(fmt.Sprintf("Failed to deserialize: %v", err)) + } + + result := getUnsignedSchemaConsistent(obj) + + // Verify primitive unsigned fields + assertEqual(uint8(200), result.U8, "U8") + assertEqual(uint16(60000), result.U16, "U16") + assertEqual(uint32(3000000000), result.U32Var, "U32Var") + assertEqual(uint32(4000000000), result.U32Fixed, "U32Fixed") + assertEqual(uint64(10000000000), result.U64Var, "U64Var") + assertEqual(uint64(15000000000), result.U64Fixed, "U64Fixed") + assertEqual(uint64(1000000000), result.U64Tagged, "U64Tagged") + + // Verify nullable unsigned fields + if result.U8Nullable == nil || *result.U8Nullable != 128 { + panic(fmt.Sprintf("U8Nullable mismatch: expected 128, got %v", result.U8Nullable)) + } + if result.U16Nullable == nil || *result.U16Nullable != 40000 { + panic(fmt.Sprintf("U16Nullable mismatch: expected 40000, got %v", result.U16Nullable)) + } + if result.U32VarNullable == nil || *result.U32VarNullable != 2500000000 { + panic(fmt.Sprintf("U32VarNullable mismatch: expected 2500000000, got %v", result.U32VarNullable)) + } + if result.U32FixedNullable == nil || *result.U32FixedNullable != 3500000000 { + panic(fmt.Sprintf("U32FixedNullable mismatch: expected 3500000000, got %v", result.U32FixedNullable)) + } + if result.U64VarNullable == nil || *result.U64VarNullable != 8000000000 { + panic(fmt.Sprintf("U64VarNullable mismatch: expected 8000000000, got %v", result.U64VarNullable)) + } + if result.U64FixedNullable == nil || *result.U64FixedNullable != 12000000000 { + panic(fmt.Sprintf("U64FixedNullable mismatch: expected 12000000000, got %v", result.U64FixedNullable)) + } + if result.U64TaggedNullable == nil || *result.U64TaggedNullable != 500000000 { + panic(fmt.Sprintf("U64TaggedNullable mismatch: expected 500000000, got %v", result.U64TaggedNullable)) + } + + serialized, err := f.Serialize(result) + if err != nil { + panic(fmt.Sprintf("Failed to serialize: %v", err)) + } + + writeFile(dataFile, serialized) +} + +func testUnsignedSchemaCompatible() { + dataFile := getDataFile() + data := readFile(dataFile) + + f := fory.New(fory.WithXlang(true), fory.WithCompatible(true)) + f.Register(UnsignedSchemaCompatible{}, 502) + + buf := fory.NewByteBuffer(data) + var obj interface{} + err := f.DeserializeWithCallbackBuffers(buf, &obj, nil) + if err != nil { + panic(fmt.Sprintf("Failed to deserialize: %v", err)) + } + + result := getUnsignedSchemaCompatible(obj) + + // Verify Group 1: Nullable fields (values from Java's non-nullable fields) + if result.U8 == nil || *result.U8 != 200 { + panic(fmt.Sprintf("U8 mismatch: expected 200, got %v", result.U8)) + } + if result.U16 == nil || *result.U16 != 60000 { + panic(fmt.Sprintf("U16 mismatch: expected 60000, got %v", result.U16)) + } + if result.U32Var == nil || *result.U32Var != 3000000000 { + panic(fmt.Sprintf("U32Var mismatch: expected 3000000000, got %v", result.U32Var)) + } + if result.U32Fixed == nil || *result.U32Fixed != 4000000000 { + panic(fmt.Sprintf("U32Fixed mismatch: expected 4000000000, got %v", result.U32Fixed)) + } + if result.U64Var == nil || *result.U64Var != 10000000000 { + panic(fmt.Sprintf("U64Var mismatch: expected 10000000000, got %v", result.U64Var)) + } + if result.U64Fixed == nil || *result.U64Fixed != 15000000000 { + panic(fmt.Sprintf("U64Fixed mismatch: expected 15000000000, got %v", result.U64Fixed)) + } + if result.U64Tagged == nil || *result.U64Tagged != 1000000000 { + panic(fmt.Sprintf("U64Tagged mismatch: expected 1000000000, got %v", result.U64Tagged)) + } + + // Verify Group 2: Non-nullable fields (values from Java's nullable fields) + assertEqual(uint8(128), result.U8Field2, "U8Field2") + assertEqual(uint16(40000), result.U16Field2, "U16Field2") + assertEqual(uint32(2500000000), result.U32VarField2, "U32VarField2") + assertEqual(uint32(3500000000), result.U32FixedField2, "U32FixedField2") + assertEqual(uint64(8000000000), result.U64VarField2, "U64VarField2") + assertEqual(uint64(12000000000), result.U64FixedField2, "U64FixedField2") + assertEqual(uint64(500000000), result.U64TaggedField2, "U64TaggedField2") + + serialized, err := f.Serialize(result) + if err != nil { + panic(fmt.Sprintf("Failed to serialize: %v", err)) + } + + writeFile(dataFile, serialized) +} + // ============================================================================ // Main // ============================================================================ @@ -2223,6 +2414,10 @@ func main() { testCircularRefSchemaConsistent() case "test_circular_ref_compatible": testCircularRefCompatible() + case "test_unsigned_schema_consistent": + testUnsignedSchemaConsistent() + case "test_unsigned_schema_compatible": + testUnsignedSchemaCompatible() default: panic(fmt.Sprintf("Unknown test case: %s", *caseName)) } diff --git a/java/fory-core/src/main/java/org/apache/fory/annotation/Int64Type.java b/java/fory-core/src/main/java/org/apache/fory/annotation/Int64Type.java index 1c793bc5be..e853f236d2 100644 --- a/java/fory-core/src/main/java/org/apache/fory/annotation/Int64Type.java +++ b/java/fory-core/src/main/java/org/apache/fory/annotation/Int64Type.java @@ -34,7 +34,7 @@ *
      *
    • {@link LongEncoding#VARINT64} (default): Variable-length encoding, compact for small values * (type_id=7) - *
    • {@link LongEncoding#INT64}: Fixed 8-byte encoding, consistent size (type_id=6) + *
    • {@link LongEncoding#FIXED_INT64}: Fixed 8-byte encoding, consistent size (type_id=6) *
    • {@link LongEncoding#TAGGED_INT64}: Tagged encoding that uses 4 bytes for values in range * [-1073741824, 1073741823], otherwise 9 bytes (type_id=8) *
    @@ -46,7 +46,7 @@ * @Int64Type(encoding = LongEncoding.VARINT64) // Variable-length (default) * long compactId; * - * @Int64Type(encoding = LongEncoding.INT64) // Fixed 8-byte + * @Int64Type(encoding = LongEncoding.FIXED_INT64) // Fixed 8-byte * long fixedTimestamp; * * @Int64Type(encoding = LongEncoding.TAGGED_INT64) // Tagged encoding diff --git a/java/fory-core/src/main/java/org/apache/fory/annotation/Uint64Type.java b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint64Type.java index 1faac580a2..2ac792d052 100644 --- a/java/fory-core/src/main/java/org/apache/fory/annotation/Uint64Type.java +++ b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint64Type.java @@ -36,7 +36,8 @@ *
      *
    • {@link LongEncoding#VARINT64} (default): Variable-length encoding (VAR_UINT64, type_id=14), * compact for small values - *
    • {@link LongEncoding#INT64}: Fixed 8-byte encoding (UINT64, type_id=13), consistent size + *
    • {@link LongEncoding#FIXED_INT64}: Fixed 8-byte encoding (UINT64, type_id=13), consistent + * size *
    • {@link LongEncoding#TAGGED_INT64}: Tagged encoding (TAGGED_UINT64, type_id=15) that uses 4 * bytes for values in range [0, 2147483647], otherwise 9 bytes *
    @@ -58,7 +59,7 @@ * @Uint64Type(encoding = LongEncoding.VARINT64) // Variable-length (default) * long compactId; * - * @Uint64Type(encoding = LongEncoding.INT64) // Fixed 8-byte + * @Uint64Type(encoding = LongEncoding.FIXED_INT64) // Fixed 8-byte * long fixedTimestamp; * * @Uint64Type(encoding = LongEncoding.TAGGED_INT64) // Tagged encoding diff --git a/java/fory-core/src/test/java/org/apache/fory/xlang/XlangTestBase.java b/java/fory-core/src/test/java/org/apache/fory/xlang/XlangTestBase.java index a6f0d450e9..7202868bfd 100644 --- a/java/fory-core/src/test/java/org/apache/fory/xlang/XlangTestBase.java +++ b/java/fory-core/src/test/java/org/apache/fory/xlang/XlangTestBase.java @@ -36,8 +36,13 @@ import org.apache.fory.Fory; import org.apache.fory.ForyTestBase; import org.apache.fory.annotation.ForyField; +import org.apache.fory.annotation.Uint16Type; +import org.apache.fory.annotation.Uint32Type; +import org.apache.fory.annotation.Uint64Type; +import org.apache.fory.annotation.Uint8Type; import org.apache.fory.config.CompatibleMode; import org.apache.fory.config.Language; +import org.apache.fory.config.LongEncoding; import org.apache.fory.memory.MemoryBuffer; import org.apache.fory.memory.MemoryUtils; import org.apache.fory.meta.MetaCompressor; @@ -2465,4 +2470,208 @@ private Object normalizeNulls(Object obj) { // For other objects, return as-is return obj; } + + // ==================== Unsigned Number Tests ==================== + + /** + * Test struct for unsigned number schema consistent tests. Contains all unsigned numeric types + * with different encoding options. + */ + @Data + static class UnsignedSchemaConsistent { + // Primitive unsigned fields + @Uint8Type short u8; + + @Uint16Type int u16; + + @Uint32Type(compress = true) + long u32Var; + + @Uint32Type(compress = false) + long u32Fixed; + + @Uint64Type(encoding = LongEncoding.VARINT64) + long u64Var; + + @Uint64Type(encoding = LongEncoding.FIXED_INT64) + long u64Fixed; + + @Uint64Type(encoding = LongEncoding.TAGGED_INT64) + long u64Tagged; + + // Boxed nullable unsigned fields + @ForyField(nullable = true) + @Uint8Type + Short u8Nullable; + + @ForyField(nullable = true) + @Uint16Type + Integer u16Nullable; + + @ForyField(nullable = true) + @Uint32Type(compress = true) + Long u32VarNullable; + + @ForyField(nullable = true) + @Uint32Type(compress = false) + Long u32FixedNullable; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.VARINT64) + Long u64VarNullable; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.FIXED_INT64) + Long u64FixedNullable; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.TAGGED_INT64) + Long u64TaggedNullable; + } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaConsistent(boolean enableCodegen) throws java.io.IOException { + String caseName = "test_unsigned_schema_consistent"; + Fory fory = + Fory.builder() + .withLanguage(Language.XLANG) + .withCompatibleMode(CompatibleMode.SCHEMA_CONSISTENT) + .withCodegen(enableCodegen) + .build(); + fory.register(UnsignedSchemaConsistent.class, 501); + + UnsignedSchemaConsistent obj = new UnsignedSchemaConsistent(); + // Primitive fields + obj.u8 = 200; // Max uint8 range testing + obj.u16 = 60000; // Max uint16 range testing + obj.u32Var = 3000000000L; // > INT_MAX to test unsigned + obj.u32Fixed = 4000000000L; + obj.u64Var = 10000000000L; + obj.u64Fixed = 15000000000L; + obj.u64Tagged = 1000000000L; // Within tagged range + + // Nullable boxed fields with values + obj.u8Nullable = (short) 128; + obj.u16Nullable = 40000; + obj.u32VarNullable = 2500000000L; + obj.u32FixedNullable = 3500000000L; + obj.u64VarNullable = 8000000000L; + obj.u64FixedNullable = 12000000000L; + obj.u64TaggedNullable = 500000000L; + + // First verify Java serialization works + Assert.assertEquals(xserDe(fory, obj), obj); + + MemoryBuffer buffer = MemoryBuffer.newHeapBuffer(512); + fory.serialize(buffer, obj); + + ExecutionContext ctx = prepareExecution(caseName, buffer.getBytes(0, buffer.writerIndex())); + runPeer(ctx); + + MemoryBuffer buffer2 = readBuffer(ctx.dataFile()); + UnsignedSchemaConsistent result = (UnsignedSchemaConsistent) fory.deserialize(buffer2); + Assert.assertEquals(result, obj); + } + + /** + * Test struct for unsigned number schema compatible tests (Java side). Group 1: non-nullable + * primitive fields. Group 2: nullable boxed fields with "2" suffix. Other languages flip + * nullability: Group 1 is Optional, Group 2 is non-Optional. + */ + @Data + static class UnsignedSchemaCompatible { + // Group 1: Primitive unsigned fields (non-nullable in Java, Optional in other languages) + @Uint8Type short u8; + + @Uint16Type int u16; + + @Uint32Type(compress = true) + long u32Var; + + @Uint32Type(compress = false) + long u32Fixed; + + @Uint64Type(encoding = LongEncoding.VARINT64) + long u64Var; + + @Uint64Type(encoding = LongEncoding.FIXED_INT64) + long u64Fixed; + + @Uint64Type(encoding = LongEncoding.TAGGED_INT64) + long u64Tagged; + + // Group 2: Nullable boxed fields (nullable in Java, non-Optional in other languages) + @ForyField(nullable = true) + @Uint8Type + Short u8Field2; + + @ForyField(nullable = true) + @Uint16Type + Integer u16Field2; + + @ForyField(nullable = true) + @Uint32Type(compress = true) + Long u32VarField2; + + @ForyField(nullable = true) + @Uint32Type(compress = false) + Long u32FixedField2; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.VARINT64) + Long u64VarField2; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.FIXED_INT64) + Long u64FixedField2; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.TAGGED_INT64) + Long u64TaggedField2; + } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaCompatible(boolean enableCodegen) throws java.io.IOException { + String caseName = "test_unsigned_schema_compatible"; + Fory fory = + Fory.builder() + .withLanguage(Language.XLANG) + .withCompatibleMode(CompatibleMode.COMPATIBLE) + .withCodegen(enableCodegen) + .withMetaCompressor(new NoOpMetaCompressor()) + .build(); + fory.register(UnsignedSchemaCompatible.class, 502); + + UnsignedSchemaCompatible obj = new UnsignedSchemaCompatible(); + // Primitive fields + obj.u8 = 200; + obj.u16 = 60000; + obj.u32Var = 3000000000L; + obj.u32Fixed = 4000000000L; + obj.u64Var = 10000000000L; + obj.u64Fixed = 15000000000L; + obj.u64Tagged = 1000000000L; + + // Group 2 fields with values + obj.u8Field2 = (short) 128; + obj.u16Field2 = 40000; + obj.u32VarField2 = 2500000000L; + obj.u32FixedField2 = 3500000000L; + obj.u64VarField2 = 8000000000L; + obj.u64FixedField2 = 12000000000L; + obj.u64TaggedField2 = 500000000L; + + // First verify Java serialization works + Assert.assertEquals(xserDe(fory, obj), obj); + + MemoryBuffer buffer = MemoryBuffer.newHeapBuffer(1024); + fory.serialize(buffer, obj); + + ExecutionContext ctx = prepareExecution(caseName, buffer.getBytes(0, buffer.writerIndex())); + runPeer(ctx); + + MemoryBuffer buffer2 = readBuffer(ctx.dataFile()); + UnsignedSchemaCompatible result = (UnsignedSchemaCompatible) fory.deserialize(buffer2); + Assert.assertEquals(result, obj); + } } diff --git a/python/pyfory/tests/xlang_test_main.py b/python/pyfory/tests/xlang_test_main.py index b1b49a4bd2..c9ea377b58 100644 --- a/python/pyfory/tests/xlang_test_main.py +++ b/python/pyfory/tests/xlang_test_main.py @@ -1308,6 +1308,181 @@ def test_circular_ref_compatible(): f.write(new_bytes) +# ============================================================================ +# Unsigned Number Test Types +# ============================================================================ + + +@dataclass +class UnsignedSchemaConsistent: + """ + Test struct for unsigned number schema consistent tests (Python side). + Primitive fields first, then nullable boxed fields (using Optional). + + Must match Java UnsignedSchemaConsistent (type id 501). + """ + + # Primitive unsigned fields (non-nullable) + u8: pyfory.uint8 = 0 + u16: pyfory.uint16 = 0 + u32_var: pyfory.uint32 = 0 # VAR_UINT32 encoding + u32_fixed: pyfory.fixed_uint32 = 0 # Fixed 4-byte encoding + u64_var: pyfory.uint64 = 0 # VAR_UINT64 encoding + u64_fixed: pyfory.fixed_uint64 = 0 # Fixed 8-byte encoding + u64_tagged: pyfory.tagged_uint64 = 0 # Tagged encoding + + # Boxed nullable unsigned fields (using Optional) + u8_nullable: Optional[pyfory.uint8] = None + u16_nullable: Optional[pyfory.uint16] = None + u32_var_nullable: Optional[pyfory.uint32] = None + u32_fixed_nullable: Optional[pyfory.fixed_uint32] = None + u64_var_nullable: Optional[pyfory.uint64] = None + u64_fixed_nullable: Optional[pyfory.fixed_uint64] = None + u64_tagged_nullable: Optional[pyfory.tagged_uint64] = None + + +@dataclass +class UnsignedSchemaCompatible: + """ + Test struct for unsigned number schema compatible tests (Python side). + Group 1: Optional fields (nullable in Python, non-nullable in Java). + Group 2: Non-Optional fields with field2 suffix (non-nullable in Python, nullable in Java). + + Must match Java UnsignedSchemaCompatible (type id 502). + """ + + # Group 1: Optional unsigned fields (nullable in Python, non-nullable in Java) + u8: Optional[pyfory.uint8] = None + u16: Optional[pyfory.uint16] = None + u32_var: Optional[pyfory.uint32] = None # VAR_UINT32 encoding + u32_fixed: Optional[pyfory.fixed_uint32] = None # Fixed 4-byte encoding + u64_var: Optional[pyfory.uint64] = None # VAR_UINT64 encoding + u64_fixed: Optional[pyfory.fixed_uint64] = None # Fixed 8-byte encoding + u64_tagged: Optional[pyfory.tagged_uint64] = None # Tagged encoding + + # Group 2: Non-Optional unsigned fields (non-nullable in Python, nullable in Java) + u8_field2: pyfory.uint8 = 0 + u16_field2: pyfory.uint16 = 0 + u32_var_field2: pyfory.uint32 = 0 + u32_fixed_field2: pyfory.fixed_uint32 = 0 + u64_var_field2: pyfory.uint64 = 0 + u64_fixed_field2: pyfory.fixed_uint64 = 0 + u64_tagged_field2: pyfory.tagged_uint64 = 0 + + +# ============================================================================ +# Unsigned Number Tests +# ============================================================================ + + +def test_unsigned_schema_consistent(): + """Test unsigned number types with schema consistent mode.""" + data_file = get_data_file() + with open(data_file, "rb") as f: + data_bytes = f.read() + + fory = pyfory.Fory(xlang=True, compatible=False) + fory.register_type(UnsignedSchemaConsistent, type_id=501) + + expected = UnsignedSchemaConsistent( + # Primitive fields + u8=200, + u16=60000, + u32_var=3000000000, + u32_fixed=4000000000, + u64_var=10000000000, + u64_fixed=15000000000, + u64_tagged=1000000000, + # Nullable boxed fields with values + u8_nullable=128, + u16_nullable=40000, + u32_var_nullable=2500000000, + u32_fixed_nullable=3500000000, + u64_var_nullable=8000000000, + u64_fixed_nullable=12000000000, + u64_tagged_nullable=500000000, + ) + + obj = fory.deserialize(data_bytes) + debug_print(f"Deserialized: {obj}") + + # Verify primitive unsigned fields + assert obj.u8 == expected.u8, f"u8: {obj.u8} != {expected.u8}" + assert obj.u16 == expected.u16, f"u16: {obj.u16} != {expected.u16}" + assert obj.u32_var == expected.u32_var, f"u32_var: {obj.u32_var} != {expected.u32_var}" + assert obj.u32_fixed == expected.u32_fixed, f"u32_fixed: {obj.u32_fixed} != {expected.u32_fixed}" + assert obj.u64_var == expected.u64_var, f"u64_var: {obj.u64_var} != {expected.u64_var}" + assert obj.u64_fixed == expected.u64_fixed, f"u64_fixed: {obj.u64_fixed} != {expected.u64_fixed}" + assert obj.u64_tagged == expected.u64_tagged, f"u64_tagged: {obj.u64_tagged} != {expected.u64_tagged}" + + # Verify nullable boxed fields + assert obj.u8_nullable == expected.u8_nullable, f"u8_nullable: {obj.u8_nullable} != {expected.u8_nullable}" + assert obj.u16_nullable == expected.u16_nullable, f"u16_nullable: {obj.u16_nullable} != {expected.u16_nullable}" + assert obj.u32_var_nullable == expected.u32_var_nullable, f"u32_var_nullable: {obj.u32_var_nullable} != {expected.u32_var_nullable}" + assert obj.u32_fixed_nullable == expected.u32_fixed_nullable, f"u32_fixed_nullable: {obj.u32_fixed_nullable} != {expected.u32_fixed_nullable}" + assert obj.u64_var_nullable == expected.u64_var_nullable, f"u64_var_nullable: {obj.u64_var_nullable} != {expected.u64_var_nullable}" + assert obj.u64_fixed_nullable == expected.u64_fixed_nullable, f"u64_fixed_nullable: {obj.u64_fixed_nullable} != {expected.u64_fixed_nullable}" + assert obj.u64_tagged_nullable == expected.u64_tagged_nullable, f"u64_tagged_nullable: {obj.u64_tagged_nullable} != {expected.u64_tagged_nullable}" + + new_bytes = fory.serialize(obj) + with open(data_file, "wb") as f: + f.write(new_bytes) + + +def test_unsigned_schema_compatible(): + """Test unsigned number types with schema compatible mode.""" + data_file = get_data_file() + with open(data_file, "rb") as f: + data_bytes = f.read() + + fory = pyfory.Fory(xlang=True, compatible=True, meta_compressor=NoOpMetaCompressor()) + fory.register_type(UnsignedSchemaCompatible, type_id=502) + + expected = UnsignedSchemaCompatible( + # Group 1: Optional fields (values from Java's non-nullable fields) + u8=200, + u16=60000, + u32_var=3000000000, + u32_fixed=4000000000, + u64_var=10000000000, + u64_fixed=15000000000, + u64_tagged=1000000000, + # Group 2: Non-Optional fields (values from Java's nullable fields) + u8_field2=128, + u16_field2=40000, + u32_var_field2=2500000000, + u32_fixed_field2=3500000000, + u64_var_field2=8000000000, + u64_fixed_field2=12000000000, + u64_tagged_field2=500000000, + ) + + obj = fory.deserialize(data_bytes) + debug_print(f"Deserialized: {obj}") + + # Verify Group 1: Optional unsigned fields + assert obj.u8 == expected.u8, f"u8: {obj.u8} != {expected.u8}" + assert obj.u16 == expected.u16, f"u16: {obj.u16} != {expected.u16}" + assert obj.u32_var == expected.u32_var, f"u32_var: {obj.u32_var} != {expected.u32_var}" + assert obj.u32_fixed == expected.u32_fixed, f"u32_fixed: {obj.u32_fixed} != {expected.u32_fixed}" + assert obj.u64_var == expected.u64_var, f"u64_var: {obj.u64_var} != {expected.u64_var}" + assert obj.u64_fixed == expected.u64_fixed, f"u64_fixed: {obj.u64_fixed} != {expected.u64_fixed}" + assert obj.u64_tagged == expected.u64_tagged, f"u64_tagged: {obj.u64_tagged} != {expected.u64_tagged}" + + # Verify Group 2: Non-Optional fields + assert obj.u8_field2 == expected.u8_field2, f"u8_field2: {obj.u8_field2} != {expected.u8_field2}" + assert obj.u16_field2 == expected.u16_field2, f"u16_field2: {obj.u16_field2} != {expected.u16_field2}" + assert obj.u32_var_field2 == expected.u32_var_field2, f"u32_var_field2: {obj.u32_var_field2} != {expected.u32_var_field2}" + assert obj.u32_fixed_field2 == expected.u32_fixed_field2, f"u32_fixed_field2: {obj.u32_fixed_field2} != {expected.u32_fixed_field2}" + assert obj.u64_var_field2 == expected.u64_var_field2, f"u64_var_field2: {obj.u64_var_field2} != {expected.u64_var_field2}" + assert obj.u64_fixed_field2 == expected.u64_fixed_field2, f"u64_fixed_field2: {obj.u64_fixed_field2} != {expected.u64_fixed_field2}" + assert obj.u64_tagged_field2 == expected.u64_tagged_field2, f"u64_tagged_field2: {obj.u64_tagged_field2} != {expected.u64_tagged_field2}" + + new_bytes = fory.serialize(obj) + with open(data_file, "wb") as f: + f.write(new_bytes) + + if __name__ == "__main__": """ This file is executed by PythonXlangTest.java and other cross-language tests. diff --git a/rust/tests/tests/test_cross_language.rs b/rust/tests/tests/test_cross_language.rs index 5f99cc75a9..4f0c3608f0 100644 --- a/rust/tests/tests/test_cross_language.rs +++ b/rust/tests/tests/test_cross_language.rs @@ -1883,3 +1883,148 @@ fn test_circular_ref_compatible() { let new_bytes = fory.serialize(&obj).unwrap(); fs::write(&data_file_path, new_bytes).unwrap(); } + +// ============================================================================ +// Unsigned Number Tests - Test unsigned integer serialization across languages +// ============================================================================ + +/// Test struct for unsigned numbers in SCHEMA_CONSISTENT mode. +/// All fields use the same nullability as Java. +/// Note: Rust supports u8, u16, u32, u64 natively. Different encodings (fixed, var, tagged) +/// are handled via field attributes. +/// Matches Java's UnsignedSchemaConsistent (type id 501) +#[derive(ForyObject, Debug, PartialEq)] +#[fory(debug)] +struct UnsignedSchemaConsistent { + // Primitive unsigned fields (non-nullable) + u8_field: u8, // UINT8 - fixed 8-bit + u16_field: u16, // UINT16 - fixed 16-bit + u32_var: u32, // VAR_UINT32 - variable-length + u32_fixed: u32, // UINT32 - fixed 4-byte (TODO: add encoding tag) + u64_var: u64, // VAR_UINT64 - variable-length + u64_fixed: u64, // UINT64 - fixed 8-byte (TODO: add encoding tag) + u64_tagged: u64, // TAGGED_UINT64 (TODO: add encoding tag) + + // Nullable unsigned fields (using Option) + #[fory(nullable = true)] + u8_nullable: Option, + #[fory(nullable = true)] + u16_nullable: Option, + #[fory(nullable = true)] + u32_var_nullable: Option, + #[fory(nullable = true)] + u32_fixed_nullable: Option, + #[fory(nullable = true)] + u64_var_nullable: Option, + #[fory(nullable = true)] + u64_fixed_nullable: Option, + #[fory(nullable = true)] + u64_tagged_nullable: Option, +} + +/// Test struct for unsigned numbers in COMPATIBLE mode. +/// Group 1: Option types (nullable in Rust, non-nullable in Java) +/// Group 2: Non-Option types with Field2 suffix (non-nullable in Rust, nullable in Java) +/// Matches Java's UnsignedSchemaCompatible (type id 502) +#[derive(ForyObject, Debug, PartialEq)] +#[fory(debug)] +struct UnsignedSchemaCompatible { + // Group 1: Nullable in Rust (Option), non-nullable in Java + #[fory(nullable = true)] + u8_field: Option, + #[fory(nullable = true)] + u16_field: Option, + #[fory(nullable = true)] + u32_var: Option, + #[fory(nullable = true)] + u32_fixed: Option, + #[fory(nullable = true)] + u64_var: Option, + #[fory(nullable = true)] + u64_fixed: Option, + #[fory(nullable = true)] + u64_tagged: Option, + + // Group 2: Non-nullable in Rust, nullable in Java + u8_field2: u8, + u16_field2: u16, + u32_var_field2: u32, + u32_fixed_field2: u32, + u64_var_field2: u64, + u64_fixed_field2: u64, + u64_tagged_field2: u64, +} + +/// Test unsigned numbers in SCHEMA_CONSISTENT mode. +#[test] +#[ignore] +fn test_unsigned_schema_consistent() { + let data_file_path = get_data_file(); + let bytes = fs::read(&data_file_path).unwrap(); + + let mut fory = Fory::default().compatible(false).xlang(true); + fory.register::(501).unwrap(); + + let local_obj = UnsignedSchemaConsistent { + // Primitive unsigned fields + u8_field: 200, + u16_field: 60000, + u32_var: 3000000000, + u32_fixed: 4000000000, + u64_var: 10000000000, + u64_fixed: 15000000000, + u64_tagged: 1000000000, + + // Nullable unsigned fields with values + u8_nullable: Some(128), + u16_nullable: Some(40000), + u32_var_nullable: Some(2500000000), + u32_fixed_nullable: Some(3500000000), + u64_var_nullable: Some(8000000000), + u64_fixed_nullable: Some(12000000000), + u64_tagged_nullable: Some(500000000), + }; + + let remote_obj: UnsignedSchemaConsistent = fory.deserialize(&bytes).unwrap(); + assert_eq!(remote_obj, local_obj); + + let new_bytes = fory.serialize(&remote_obj).unwrap(); + fs::write(&data_file_path, new_bytes).unwrap(); +} + +/// Test unsigned numbers in COMPATIBLE mode with inverted nullability. +#[test] +#[ignore] +fn test_unsigned_schema_compatible() { + let data_file_path = get_data_file(); + let bytes = fs::read(&data_file_path).unwrap(); + + let mut fory = Fory::default().compatible(true).xlang(true); + fory.register::(502).unwrap(); + + let local_obj = UnsignedSchemaCompatible { + // Group 1: Option fields (values from Java's non-nullable fields) + u8_field: Some(200), + u16_field: Some(60000), + u32_var: Some(3000000000), + u32_fixed: Some(4000000000), + u64_var: Some(10000000000), + u64_fixed: Some(15000000000), + u64_tagged: Some(1000000000), + + // Group 2: Non-nullable fields (values from Java's nullable fields) + u8_field2: 128, + u16_field2: 40000, + u32_var_field2: 2500000000, + u32_fixed_field2: 3500000000, + u64_var_field2: 8000000000, + u64_fixed_field2: 12000000000, + u64_tagged_field2: 500000000, + }; + + let remote_obj: UnsignedSchemaCompatible = fory.deserialize(&bytes).unwrap(); + assert_eq!(remote_obj, local_obj); + + let new_bytes = fory.serialize(&remote_obj).unwrap(); + fs::write(&data_file_path, new_bytes).unwrap(); +} From f2f5059bf79334847b92741909abee9271eeb09f Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 7 Jan 2026 17:52:39 +0800 Subject: [PATCH 15/44] revert build_linux_wheels.py --- ci/build_linux_wheels.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ci/build_linux_wheels.py b/ci/build_linux_wheels.py index 921de626c9..54fdc2ed36 100755 --- a/ci/build_linux_wheels.py +++ b/ci/build_linux_wheels.py @@ -22,7 +22,7 @@ Usage: ./build_linux_wheels.py --arch X86 --python cp38-cp38 - ./build_linux_wheels.py --arch AARCTAGGED_INT64 --python cp313-cp313 --release + ./build_linux_wheels.py --arch AARCH64 --python cp313-cp313 --release Environment: - GITHUB_WORKSPACE (optional; defaults to cwd) @@ -44,7 +44,7 @@ "quay.io/pypa/manylinux2014_x86_64:latest", ] -DEFAULT_AARCTAGGED_INT64_IMAGES = [ +DEFAULT_AARCH64_IMAGES = [ "quay.io/pypa/manylinux2014_aarch64:latest", ] @@ -55,14 +55,14 @@ "AMD64": "x86", "ARM": "arm64", "ARM64": "arm64", - "AARCTAGGED_INT64": "arm64", + "AARCH64": "arm64", } def parse_args(): p = argparse.ArgumentParser() p.add_argument( - "--arch", required=True, help="Architecture (e.g. X86, X64, AARCTAGGED_INT64)" + "--arch", required=True, help="Architecture (e.g. X86, X64, AARCH64)" ) p.add_argument( "--python", required=True, help="Python version (e.g. cp38-cp38, cp313-cp313)" @@ -83,7 +83,7 @@ def get_image_for_arch(arch_normalized: str) -> str: if arch_normalized == "x86": return DEFAULT_X86_IMAGES[0] elif arch_normalized == "arm64": - return DEFAULT_AARCTAGGED_INT64_IMAGES[0] + return DEFAULT_AARCH64_IMAGES[0] else: raise SystemExit(f"Unsupported arch: {arch_normalized!r}") From 43b77838c486b816471685172795c69e5475c8e6 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 7 Jan 2026 18:07:24 +0800 Subject: [PATCH 16/44] add unsigned java tests --- .../org/apache/fory/config/ForyBuilder.java | 10 + .../java/org/apache/fory/meta/ClassDef.java | 1 - .../java/org/apache/fory/meta/FieldInfo.java | 1 - .../java/org/apache/fory/meta/FieldTypes.java | 5 +- .../NonexistentClassSerializers.java | 4 +- .../java/org/apache/fory/ForyTestBase.java | 31 +- .../CompatibleFieldConvertTest.java | 35 +- .../serializer/MetaSharedCompatibleTest.java | 2 - .../apache/fory/serializer/UnsignedTest.java | 814 ++++++++++++++++++ 9 files changed, 875 insertions(+), 28 deletions(-) create mode 100644 java/fory-core/src/test/java/org/apache/fory/serializer/UnsignedTest.java diff --git a/java/fory-core/src/main/java/org/apache/fory/config/ForyBuilder.java b/java/fory-core/src/main/java/org/apache/fory/config/ForyBuilder.java index 55c98ab114..51faffd767 100644 --- a/java/fory-core/src/main/java/org/apache/fory/config/ForyBuilder.java +++ b/java/fory-core/src/main/java/org/apache/fory/config/ForyBuilder.java @@ -102,6 +102,11 @@ public ForyBuilder withLanguage(Language language) { return this; } + public ForyBuilder withXlang(boolean xlang) { + this.language = xlang ? Language.XLANG : Language.JAVA; + return this; + } + /** Whether track shared or circular references. */ public ForyBuilder withRefTracking(boolean trackingRef) { this.trackingRef = trackingRef; @@ -260,6 +265,11 @@ public ForyBuilder withCompatibleMode(CompatibleMode compatibleMode) { return this; } + public ForyBuilder withCompatible(boolean compatible) { + return withCompatibleMode( + compatible ? CompatibleMode.COMPATIBLE : CompatibleMode.SCHEMA_CONSISTENT); + } + /** * Whether check class schema consistency, will be disabled automatically when {@link * CompatibleMode#COMPATIBLE} is enabled. Do not disable this option unless you can ensure the diff --git a/java/fory-core/src/main/java/org/apache/fory/meta/ClassDef.java b/java/fory-core/src/main/java/org/apache/fory/meta/ClassDef.java index a2329daf96..aaa2444c6f 100644 --- a/java/fory-core/src/main/java/org/apache/fory/meta/ClassDef.java +++ b/java/fory-core/src/main/java/org/apache/fory/meta/ClassDef.java @@ -24,7 +24,6 @@ import java.io.ObjectStreamClass; import java.io.Serializable; import java.lang.reflect.Field; -import java.math.BigDecimal; import java.util.ArrayList; import java.util.Collection; import java.util.Comparator; diff --git a/java/fory-core/src/main/java/org/apache/fory/meta/FieldInfo.java b/java/fory-core/src/main/java/org/apache/fory/meta/FieldInfo.java index 1404099179..0fe83f279b 100644 --- a/java/fory-core/src/main/java/org/apache/fory/meta/FieldInfo.java +++ b/java/fory-core/src/main/java/org/apache/fory/meta/FieldInfo.java @@ -22,7 +22,6 @@ import java.io.Serializable; import java.lang.reflect.Field; import java.lang.reflect.Modifier; -import java.math.BigDecimal; import java.util.Objects; import org.apache.fory.reflect.TypeRef; import org.apache.fory.resolver.TypeResolver; diff --git a/java/fory-core/src/main/java/org/apache/fory/meta/FieldTypes.java b/java/fory-core/src/main/java/org/apache/fory/meta/FieldTypes.java index f5f301d92a..3dde5484e6 100644 --- a/java/fory-core/src/main/java/org/apache/fory/meta/FieldTypes.java +++ b/java/fory-core/src/main/java/org/apache/fory/meta/FieldTypes.java @@ -369,7 +369,8 @@ public static FieldType xread( default: { if (Types.isPrimitiveType(xtypeId)) { - // unsigned types share same class with signed numeric types, so unsigned types are not registered. + // unsigned types share same class with signed numeric types, so unsigned types are + // not registered. return new RegisteredFieldType(nullable, trackingRef, xtypeId); } if (!Types.isUserDefinedType((byte) xtypeId)) { @@ -423,7 +424,7 @@ public TypeRef toTypeToken(TypeResolver resolver, TypeRef declared) { if (TypeUtils.unwrap(declared.getRawType()) == TypeUtils.unwrap(cls)) { // we still need correct type, the `read/write` should use `nullable` of `Descriptor` // for serialization - cls = declared.getRawType(); + cls = declared.getRawType(); } } return TypeRef.of(cls, new TypeExtMeta(classId, nullable, trackingRef)); diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/NonexistentClassSerializers.java b/java/fory-core/src/main/java/org/apache/fory/serializer/NonexistentClassSerializers.java index b9b2741010..f6ae69474d 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/NonexistentClassSerializers.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/NonexistentClassSerializers.java @@ -29,6 +29,8 @@ import org.apache.fory.collection.IdentityObjectIntMap; import org.apache.fory.collection.LongMap; import org.apache.fory.collection.MapEntry; +import org.apache.fory.logging.Logger; +import org.apache.fory.logging.LoggerFactory; import org.apache.fory.memory.MemoryBuffer; import org.apache.fory.meta.ClassDef; import org.apache.fory.resolver.ClassInfo; @@ -45,8 +47,6 @@ import org.apache.fory.type.DescriptorGrouper; import org.apache.fory.type.DispatchId; import org.apache.fory.type.Generics; -import org.apache.fory.logging.Logger; -import org.apache.fory.logging.LoggerFactory; import org.apache.fory.util.Preconditions; @SuppressWarnings({"rawtypes", "unchecked"}) diff --git a/java/fory-core/src/test/java/org/apache/fory/ForyTestBase.java b/java/fory-core/src/test/java/org/apache/fory/ForyTestBase.java index 4eda591eb4..6e7cb05d7d 100644 --- a/java/fory-core/src/test/java/org/apache/fory/ForyTestBase.java +++ b/java/fory-core/src/test/java/org/apache/fory/ForyTestBase.java @@ -183,7 +183,7 @@ public static Object[][] crossLanguageReferenceTrackingConfig() { @DataProvider public static Object[][] language() { - return new Object[][] { {Language.XLANG}}; + return new Object[][] {{Language.JAVA}, {Language.XLANG}}; } @DataProvider(name = "javaFory") @@ -197,7 +197,34 @@ public static Object[][] javaForyConfig() { .requireClassRegistration(false) .suppressClassRegistrationWarnings(true) .build() - } + }, + { + Fory.builder() + .withLanguage(Language.JAVA) + .withRefTracking(false) + .withCodegen(false) + .requireClassRegistration(false) + .suppressClassRegistrationWarnings(true) + .build() + }, + { + Fory.builder() + .withLanguage(Language.JAVA) + .withRefTracking(true) + .withCodegen(true) + .requireClassRegistration(false) + .suppressClassRegistrationWarnings(true) + .build() + }, + { + Fory.builder() + .withLanguage(Language.JAVA) + .withRefTracking(false) + .withCodegen(true) + .requireClassRegistration(false) + .suppressClassRegistrationWarnings(true) + .build() + }, }; } diff --git a/java/fory-core/src/test/java/org/apache/fory/serializer/CompatibleFieldConvertTest.java b/java/fory-core/src/test/java/org/apache/fory/serializer/CompatibleFieldConvertTest.java index aef9894891..afe294274e 100644 --- a/java/fory-core/src/test/java/org/apache/fory/serializer/CompatibleFieldConvertTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/serializer/CompatibleFieldConvertTest.java @@ -20,6 +20,8 @@ package org.apache.fory.serializer; import com.google.common.collect.ImmutableSet; +import java.lang.reflect.Field; +import java.util.List; import org.apache.fory.Fory; import org.apache.fory.ForyTestBase; import org.apache.fory.config.CompatibleMode; @@ -30,9 +32,6 @@ import org.testng.Assert; import org.testng.annotations.Test; -import java.lang.reflect.Field; -import java.util.List; - public class CompatibleFieldConvertTest extends ForyTestBase { public static final class CompatibleFieldConvert1 { public boolean ftrue; @@ -49,7 +48,10 @@ public static final class CompatibleFieldConvert1 { public Float f12; public double f13; public Double f14; - public String toString() {return "" + ftrue + ffalse + f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10 + f11 + f12 + f13 + f14;} + + public String toString() { + return "" + ftrue + ffalse + f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10 + f11 + f12 + f13 + f14; + } } public static final class CompatibleFieldConvert2 { @@ -67,7 +69,10 @@ public static final class CompatibleFieldConvert2 { public float f12; public Double f13; public double f14; - public String toString() {return "" + ftrue + ffalse + f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10 + f11 + f12 + f13 + f14;} + + public String toString() { + return "" + ftrue + ffalse + f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10 + f11 + f12 + f13 + f14; + } } public static final class CompatibleFieldConvert3 { @@ -85,7 +90,10 @@ public static final class CompatibleFieldConvert3 { public String f12; public String f13; public String f14; - public String toString() {return ftrue + ffalse + f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10 + f11 + f12 + f13 + f14;} + + public String toString() { + return ftrue + ffalse + f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10 + f11 + f12 + f13 + f14; + } } @Test(dataProvider = "language") @@ -105,10 +113,7 @@ public void testCompatibleFieldConvert(Language language) throws Exception { field.set(o1, converted); } Fory fory = - builder() - .withLanguage(language) - .withCompatibleMode(CompatibleMode.COMPATIBLE) - .build(); + builder().withLanguage(language).withCompatibleMode(CompatibleMode.COMPATIBLE).build(); fory.register(cls); bytes = fory.serialize(o1); } @@ -116,10 +121,7 @@ public void testCompatibleFieldConvert(Language language) throws Exception { Class cls = CompatibleFieldConvert2.class; Assert.assertNotEquals(o1.getClass(), cls); Fory fory = - builder() - .withLanguage(language) - .withCompatibleMode(CompatibleMode.COMPATIBLE) - .build(); + builder().withLanguage(language).withCompatibleMode(CompatibleMode.COMPATIBLE).build(); fory.register(cls); Object o = fory.deserialize(bytes); Assert.assertEquals(o.getClass(), cls); @@ -138,10 +140,7 @@ public void testCompatibleFieldConvert(Language language) throws Exception { } { Fory fory = - builder() - .withLanguage(language) - .withCompatibleMode(CompatibleMode.COMPATIBLE) - .build(); + builder().withLanguage(language).withCompatibleMode(CompatibleMode.COMPATIBLE).build(); Class cls = CompatibleFieldConvert3.class; Assert.assertNotEquals(o1.getClass(), cls); fory.register(cls); diff --git a/java/fory-core/src/test/java/org/apache/fory/serializer/MetaSharedCompatibleTest.java b/java/fory-core/src/test/java/org/apache/fory/serializer/MetaSharedCompatibleTest.java index 4c8d6256a2..94715db221 100644 --- a/java/fory-core/src/test/java/org/apache/fory/serializer/MetaSharedCompatibleTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/serializer/MetaSharedCompatibleTest.java @@ -42,8 +42,6 @@ import org.apache.fory.reflect.ReflectionUtils; import org.apache.fory.resolver.MetaContext; import org.apache.fory.serializer.collection.UnmodifiableSerializersTest; -import org.apache.fory.serializer.converter.FieldConverter; -import org.apache.fory.serializer.converter.FieldConverters; import org.apache.fory.test.bean.BeanA; import org.apache.fory.test.bean.BeanB; import org.apache.fory.test.bean.CollectionFields; diff --git a/java/fory-core/src/test/java/org/apache/fory/serializer/UnsignedTest.java b/java/fory-core/src/test/java/org/apache/fory/serializer/UnsignedTest.java new file mode 100644 index 0000000000..d3bbf00f4e --- /dev/null +++ b/java/fory-core/src/test/java/org/apache/fory/serializer/UnsignedTest.java @@ -0,0 +1,814 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.serializer; + +import java.util.Objects; +import lombok.Data; +import org.apache.fory.Fory; +import org.apache.fory.ForyTestBase; +import org.apache.fory.annotation.ForyField; +import org.apache.fory.annotation.Uint16Type; +import org.apache.fory.annotation.Uint32Type; +import org.apache.fory.annotation.Uint64Type; +import org.apache.fory.annotation.Uint8Type; +import org.apache.fory.config.ForyBuilder; +import org.apache.fory.config.LongEncoding; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +public class UnsignedTest extends ForyTestBase { + + // Max values for unsigned types + public static final short UINT8_MAX = 255; + public static final int UINT16_MAX = 65535; + public static final long UINT32_MAX = 4294967295L; + public static final long UINT64_MAX = -1L; // 0xFFFFFFFFFFFFFFFF as signed long + + // Common test values + public static final short UINT8_MID = 128; + public static final int UINT16_MID = 32768; + public static final long UINT32_MID = 2147483648L; + public static final long UINT64_MID = Long.MIN_VALUE; // 0x8000000000000000 + + @Data + public static class UnsignedSchemaConsistent { + @Uint8Type short u8; + + @Uint16Type int u16; + + @Uint32Type(compress = true) + long u32Var; + + @Uint32Type(compress = false) + long u32Fixed; + + @Uint64Type(encoding = LongEncoding.VARINT64) + long u64Var; + + @Uint64Type(encoding = LongEncoding.FIXED_INT64) + long u64Fixed; + + @Uint64Type(encoding = LongEncoding.TAGGED_INT64) + long u64Tagged; + + @ForyField(nullable = true) + @Uint8Type + Short u8Nullable; + + @ForyField(nullable = true) + @Uint16Type + Integer u16Nullable; + + @ForyField(nullable = true) + @Uint32Type(compress = true) + Long u32VarNullable; + + @ForyField(nullable = true) + @Uint32Type(compress = false) + Long u32FixedNullable; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.VARINT64) + Long u64VarNullable; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.FIXED_INT64) + Long u64FixedNullable; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.TAGGED_INT64) + Long u64TaggedNullable; + } + + public static class UnsignedSchemaCompatible { + @Uint8Type short u8; + + @Uint16Type int u16; + + @Uint32Type(compress = true) + long u32Var; + + @Uint32Type(compress = false) + long u32Fixed; + + @Uint64Type(encoding = LongEncoding.VARINT64) + long u64Var; + + @Uint64Type(encoding = LongEncoding.FIXED_INT64) + long u64Fixed; + + @Uint64Type(encoding = LongEncoding.TAGGED_INT64) + long u64Tagged; + + @ForyField(nullable = true) + @Uint8Type + Short u8Field2; + + @ForyField(nullable = true) + @Uint16Type + Integer u16Field2; + + @ForyField(nullable = true) + @Uint32Type(compress = true) + Long u32VarField2; + + @ForyField(nullable = true) + @Uint32Type(compress = false) + Long u32FixedField2; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.VARINT64) + Long u64VarField2; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.FIXED_INT64) + Long u64FixedField2; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.TAGGED_INT64) + Long u64TaggedField2; + } + + private static UnsignedSchemaConsistent createConsistentWithNormalValues() { + UnsignedSchemaConsistent obj = new UnsignedSchemaConsistent(); + obj.u8 = 200; + obj.u16 = 60000; + obj.u32Var = 3000000000L; + obj.u32Fixed = 4000000000L; + obj.u64Var = 10000000000L; + obj.u64Fixed = 15000000000L; + obj.u64Tagged = 1000000000L; + obj.u8Nullable = (short) 128; + obj.u16Nullable = 40000; + obj.u32VarNullable = 2500000000L; + obj.u32FixedNullable = 3500000000L; + obj.u64VarNullable = 8000000000L; + obj.u64FixedNullable = 12000000000L; + obj.u64TaggedNullable = 500000000L; + return obj; + } + + private static UnsignedSchemaConsistent createConsistentWithZeroValues() { + UnsignedSchemaConsistent obj = new UnsignedSchemaConsistent(); + obj.u8 = 0; + obj.u16 = 0; + obj.u32Var = 0; + obj.u32Fixed = 0; + obj.u64Var = 0; + obj.u64Fixed = 0; + obj.u64Tagged = 0; + obj.u8Nullable = 0; + obj.u16Nullable = 0; + obj.u32VarNullable = 0L; + obj.u32FixedNullable = 0L; + obj.u64VarNullable = 0L; + obj.u64FixedNullable = 0L; + obj.u64TaggedNullable = 0L; + return obj; + } + + private static UnsignedSchemaConsistent createConsistentWithMaxValues() { + UnsignedSchemaConsistent obj = new UnsignedSchemaConsistent(); + obj.u8 = UINT8_MAX; + obj.u16 = UINT16_MAX; + obj.u32Var = UINT32_MAX; + obj.u32Fixed = UINT32_MAX; + obj.u64Var = UINT64_MAX; + obj.u64Fixed = UINT64_MAX; + obj.u64Tagged = UINT64_MAX; + obj.u8Nullable = UINT8_MAX; + obj.u16Nullable = UINT16_MAX; + obj.u32VarNullable = UINT32_MAX; + obj.u32FixedNullable = UINT32_MAX; + obj.u64VarNullable = UINT64_MAX; + obj.u64FixedNullable = UINT64_MAX; + obj.u64TaggedNullable = UINT64_MAX; + return obj; + } + + private static UnsignedSchemaConsistent createConsistentWithMidValues() { + UnsignedSchemaConsistent obj = new UnsignedSchemaConsistent(); + obj.u8 = UINT8_MID; + obj.u16 = UINT16_MID; + obj.u32Var = UINT32_MID; + obj.u32Fixed = UINT32_MID; + obj.u64Var = UINT64_MID; + obj.u64Fixed = UINT64_MID; + obj.u64Tagged = UINT64_MID; + obj.u8Nullable = UINT8_MID; + obj.u16Nullable = UINT16_MID; + obj.u32VarNullable = UINT32_MID; + obj.u32FixedNullable = UINT32_MID; + obj.u64VarNullable = UINT64_MID; + obj.u64FixedNullable = UINT64_MID; + obj.u64TaggedNullable = UINT64_MID; + return obj; + } + + private static UnsignedSchemaConsistent createConsistentWithNullValues() { + UnsignedSchemaConsistent obj = new UnsignedSchemaConsistent(); + obj.u8 = 100; + obj.u16 = 30000; + obj.u32Var = 1500000000L; + obj.u32Fixed = 2000000000L; + obj.u64Var = 5000000000L; + obj.u64Fixed = 7500000000L; + obj.u64Tagged = 250000000L; + obj.u8Nullable = null; + obj.u16Nullable = null; + obj.u32VarNullable = null; + obj.u32FixedNullable = null; + obj.u64VarNullable = null; + obj.u64FixedNullable = null; + obj.u64TaggedNullable = null; + return obj; + } + + private static UnsignedSchemaCompatible createCompatibleWithNormalValues() { + UnsignedSchemaCompatible obj = new UnsignedSchemaCompatible(); + obj.u8 = 200; + obj.u16 = 60000; + obj.u32Var = 3000000000L; + obj.u32Fixed = 4000000000L; + obj.u64Var = 10000000000L; + obj.u64Fixed = 15000000000L; + obj.u64Tagged = 1000000000L; + obj.u8Field2 = (short) 128; + obj.u16Field2 = 40000; + obj.u32VarField2 = 2500000000L; + obj.u32FixedField2 = 3500000000L; + obj.u64VarField2 = 8000000000L; + obj.u64FixedField2 = 12000000000L; + obj.u64TaggedField2 = 500000000L; + return obj; + } + + private static UnsignedSchemaCompatible createCompatibleWithZeroValues() { + UnsignedSchemaCompatible obj = new UnsignedSchemaCompatible(); + obj.u8 = 0; + obj.u16 = 0; + obj.u32Var = 0; + obj.u32Fixed = 0; + obj.u64Var = 0; + obj.u64Fixed = 0; + obj.u64Tagged = 0; + obj.u8Field2 = 0; + obj.u16Field2 = 0; + obj.u32VarField2 = 0L; + obj.u32FixedField2 = 0L; + obj.u64VarField2 = 0L; + obj.u64FixedField2 = 0L; + obj.u64TaggedField2 = 0L; + return obj; + } + + private static UnsignedSchemaCompatible createCompatibleWithMaxValues() { + UnsignedSchemaCompatible obj = new UnsignedSchemaCompatible(); + obj.u8 = UINT8_MAX; + obj.u16 = UINT16_MAX; + obj.u32Var = UINT32_MAX; + obj.u32Fixed = UINT32_MAX; + obj.u64Var = UINT64_MAX; + obj.u64Fixed = UINT64_MAX; + obj.u64Tagged = UINT64_MAX; + obj.u8Field2 = UINT8_MAX; + obj.u16Field2 = UINT16_MAX; + obj.u32VarField2 = UINT32_MAX; + obj.u32FixedField2 = UINT32_MAX; + obj.u64VarField2 = UINT64_MAX; + obj.u64FixedField2 = UINT64_MAX; + obj.u64TaggedField2 = UINT64_MAX; + return obj; + } + + private static UnsignedSchemaCompatible createCompatibleWithMidValues() { + UnsignedSchemaCompatible obj = new UnsignedSchemaCompatible(); + obj.u8 = UINT8_MID; + obj.u16 = UINT16_MID; + obj.u32Var = UINT32_MID; + obj.u32Fixed = UINT32_MID; + obj.u64Var = UINT64_MID; + obj.u64Fixed = UINT64_MID; + obj.u64Tagged = UINT64_MID; + obj.u8Field2 = UINT8_MID; + obj.u16Field2 = UINT16_MID; + obj.u32VarField2 = UINT32_MID; + obj.u32FixedField2 = UINT32_MID; + obj.u64VarField2 = UINT64_MID; + obj.u64FixedField2 = UINT64_MID; + obj.u64TaggedField2 = UINT64_MID; + return obj; + } + + private static UnsignedSchemaCompatible createCompatibleWithNullValues() { + UnsignedSchemaCompatible obj = new UnsignedSchemaCompatible(); + obj.u8 = 100; + obj.u16 = 30000; + obj.u32Var = 1500000000L; + obj.u32Fixed = 2000000000L; + obj.u64Var = 5000000000L; + obj.u64Fixed = 7500000000L; + obj.u64Tagged = 250000000L; + obj.u8Field2 = null; + obj.u16Field2 = null; + obj.u32VarField2 = null; + obj.u32FixedField2 = null; + obj.u64VarField2 = null; + obj.u64FixedField2 = null; + obj.u64TaggedField2 = null; + return obj; + } + + @DataProvider(name = "fory") + public static Object[][] javaForyConfig() { + ForyBuilder builder = Fory.builder().withXlang(false).requireClassRegistration(false); + return new Object[][] { + {builder.withCompatible(true).withCodegen(false).build()}, + {builder.withCompatible(true).withCodegen(true).build()}, + {builder.withCompatible(false).withCodegen(false).build()}, + {builder.withCompatible(false).withCodegen(true).build()} + }; + } + + @Test(dataProvider = "fory") + public void testUnsignedSchemaConsistentNormalValues(Fory fory) { + serDeCheck(fory, createConsistentWithNormalValues()); + } + + @Test(dataProvider = "fory") + public void testUnsignedSchemaConsistentZeroValues(Fory fory) { + serDeCheck(fory, createConsistentWithZeroValues()); + } + + @Test(dataProvider = "fory") + public void testUnsignedSchemaConsistentMaxValues(Fory fory) { + serDeCheck(fory, createConsistentWithMaxValues()); + } + + @Test(dataProvider = "fory") + public void testUnsignedSchemaConsistentMidValues(Fory fory) { + serDeCheck(fory, createConsistentWithMidValues()); + } + + @Test(dataProvider = "fory") + public void testUnsignedSchemaConsistentNullValues(Fory fory) { + serDeCheck(fory, createConsistentWithNullValues()); + } + + @Test(dataProvider = "fory") + public void testUnsignedSchemaCompatibleNormalValues(Fory fory) { + serDeCheck(fory, createCompatibleWithNormalValues()); + } + + @Test(dataProvider = "fory") + public void testUnsignedSchemaCompatibleZeroValues(Fory fory) { + serDeCheck(fory, createCompatibleWithZeroValues()); + } + + @Test(dataProvider = "fory") + public void testUnsignedSchemaCompatibleMaxValues(Fory fory) { + serDeCheck(fory, createCompatibleWithMaxValues()); + } + + @Test(dataProvider = "fory") + public void testUnsignedSchemaCompatibleMidValues(Fory fory) { + serDeCheck(fory, createCompatibleWithMidValues()); + } + + @Test(dataProvider = "fory") + public void testUnsignedSchemaCompatibleNullValues(Fory fory) { + serDeCheck(fory, createCompatibleWithNullValues()); + } + + // Test specific edge cases for each unsigned type + public static class Uint8OnlyStruct { + @Uint8Type short value; + + @ForyField(nullable = true) + @Uint8Type + Short nullableValue; + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Uint8OnlyStruct that = (Uint8OnlyStruct) o; + return value == that.value && Objects.equals(nullableValue, that.nullableValue); + } + + @Override + public int hashCode() { + return Objects.hash(value, nullableValue); + } + } + + @Test(dataProvider = "fory") + public void testUint8EdgeCases(Fory fory) { + // Test 0 + Uint8OnlyStruct zero = new Uint8OnlyStruct(); + zero.value = 0; + zero.nullableValue = 0; + serDeCheck(fory, zero); + + // Test 1 + Uint8OnlyStruct one = new Uint8OnlyStruct(); + one.value = 1; + one.nullableValue = 1; + serDeCheck(fory, one); + + // Test 127 (max signed byte) + Uint8OnlyStruct maxSignedByte = new Uint8OnlyStruct(); + maxSignedByte.value = 127; + maxSignedByte.nullableValue = 127; + serDeCheck(fory, maxSignedByte); + + // Test 128 (min negative as unsigned) + Uint8OnlyStruct minUnsignedOver127 = new Uint8OnlyStruct(); + minUnsignedOver127.value = 128; + minUnsignedOver127.nullableValue = 128; + serDeCheck(fory, minUnsignedOver127); + + // Test 255 (max uint8) + Uint8OnlyStruct maxUint8 = new Uint8OnlyStruct(); + maxUint8.value = 255; + maxUint8.nullableValue = 255; + serDeCheck(fory, maxUint8); + + // Test null + Uint8OnlyStruct withNull = new Uint8OnlyStruct(); + withNull.value = 200; + withNull.nullableValue = null; + serDeCheck(fory, withNull); + } + + public static class Uint16OnlyStruct { + @Uint16Type int value; + + @ForyField(nullable = true) + @Uint16Type + Integer nullableValue; + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Uint16OnlyStruct that = (Uint16OnlyStruct) o; + return value == that.value && Objects.equals(nullableValue, that.nullableValue); + } + + @Override + public int hashCode() { + return Objects.hash(value, nullableValue); + } + } + + @Test(dataProvider = "fory") + public void testUint16EdgeCases(Fory fory) { + // Test 0 + Uint16OnlyStruct zero = new Uint16OnlyStruct(); + zero.value = 0; + zero.nullableValue = 0; + serDeCheck(fory, zero); + + // Test 1 + Uint16OnlyStruct one = new Uint16OnlyStruct(); + one.value = 1; + one.nullableValue = 1; + serDeCheck(fory, one); + + // Test 32767 (max signed short) + Uint16OnlyStruct maxSignedShort = new Uint16OnlyStruct(); + maxSignedShort.value = 32767; + maxSignedShort.nullableValue = 32767; + serDeCheck(fory, maxSignedShort); + + // Test 32768 (min unsigned over signed max) + Uint16OnlyStruct minUnsignedOver32767 = new Uint16OnlyStruct(); + minUnsignedOver32767.value = 32768; + minUnsignedOver32767.nullableValue = 32768; + serDeCheck(fory, minUnsignedOver32767); + + // Test 65535 (max uint16) + Uint16OnlyStruct maxUint16 = new Uint16OnlyStruct(); + maxUint16.value = 65535; + maxUint16.nullableValue = 65535; + serDeCheck(fory, maxUint16); + + // Test null + Uint16OnlyStruct withNull = new Uint16OnlyStruct(); + withNull.value = 50000; + withNull.nullableValue = null; + serDeCheck(fory, withNull); + } + + public static class Uint32OnlyStruct { + @Uint32Type(compress = true) + long varValue; + + @Uint32Type(compress = false) + long fixedValue; + + @ForyField(nullable = true) + @Uint32Type(compress = true) + Long varNullableValue; + + @ForyField(nullable = true) + @Uint32Type(compress = false) + Long fixedNullableValue; + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Uint32OnlyStruct that = (Uint32OnlyStruct) o; + return varValue == that.varValue + && fixedValue == that.fixedValue + && Objects.equals(varNullableValue, that.varNullableValue) + && Objects.equals(fixedNullableValue, that.fixedNullableValue); + } + + @Override + public int hashCode() { + return Objects.hash(varValue, fixedValue, varNullableValue, fixedNullableValue); + } + } + + @Test(dataProvider = "fory") + public void testUint32EdgeCases(Fory fory) { + // Test 0 + Uint32OnlyStruct zero = new Uint32OnlyStruct(); + zero.varValue = 0; + zero.fixedValue = 0; + zero.varNullableValue = 0L; + zero.fixedNullableValue = 0L; + serDeCheck(fory, zero); + + // Test 1 + Uint32OnlyStruct one = new Uint32OnlyStruct(); + one.varValue = 1; + one.fixedValue = 1; + one.varNullableValue = 1L; + one.fixedNullableValue = 1L; + serDeCheck(fory, one); + + // Test 2147483647 (max signed int) + Uint32OnlyStruct maxSignedInt = new Uint32OnlyStruct(); + maxSignedInt.varValue = 2147483647L; + maxSignedInt.fixedValue = 2147483647L; + maxSignedInt.varNullableValue = 2147483647L; + maxSignedInt.fixedNullableValue = 2147483647L; + serDeCheck(fory, maxSignedInt); + + // Test 2147483648 (min unsigned over signed max) + Uint32OnlyStruct minUnsignedOver = new Uint32OnlyStruct(); + minUnsignedOver.varValue = 2147483648L; + minUnsignedOver.fixedValue = 2147483648L; + minUnsignedOver.varNullableValue = 2147483648L; + minUnsignedOver.fixedNullableValue = 2147483648L; + serDeCheck(fory, minUnsignedOver); + + // Test 4294967295 (max uint32) + Uint32OnlyStruct maxUint32 = new Uint32OnlyStruct(); + maxUint32.varValue = 4294967295L; + maxUint32.fixedValue = 4294967295L; + maxUint32.varNullableValue = 4294967295L; + maxUint32.fixedNullableValue = 4294967295L; + serDeCheck(fory, maxUint32); + + // Test null + Uint32OnlyStruct withNull = new Uint32OnlyStruct(); + withNull.varValue = 3000000000L; + withNull.fixedValue = 3000000000L; + withNull.varNullableValue = null; + withNull.fixedNullableValue = null; + serDeCheck(fory, withNull); + } + + public static class Uint64OnlyStruct { + @Uint64Type(encoding = LongEncoding.VARINT64) + long varValue; + + @Uint64Type(encoding = LongEncoding.FIXED_INT64) + long fixedValue; + + @Uint64Type(encoding = LongEncoding.TAGGED_INT64) + long taggedValue; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.VARINT64) + Long varNullableValue; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.FIXED_INT64) + Long fixedNullableValue; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.TAGGED_INT64) + Long taggedNullableValue; + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Uint64OnlyStruct that = (Uint64OnlyStruct) o; + return varValue == that.varValue + && fixedValue == that.fixedValue + && taggedValue == that.taggedValue + && Objects.equals(varNullableValue, that.varNullableValue) + && Objects.equals(fixedNullableValue, that.fixedNullableValue) + && Objects.equals(taggedNullableValue, that.taggedNullableValue); + } + + @Override + public int hashCode() { + return Objects.hash( + varValue, + fixedValue, + taggedValue, + varNullableValue, + fixedNullableValue, + taggedNullableValue); + } + } + + @Test(dataProvider = "fory") + public void testUint64EdgeCases(Fory fory) { + // Test 0 + Uint64OnlyStruct zero = new Uint64OnlyStruct(); + zero.varValue = 0; + zero.fixedValue = 0; + zero.taggedValue = 0; + zero.varNullableValue = 0L; + zero.fixedNullableValue = 0L; + zero.taggedNullableValue = 0L; + serDeCheck(fory, zero); + + // Test 1 + Uint64OnlyStruct one = new Uint64OnlyStruct(); + one.varValue = 1; + one.fixedValue = 1; + one.taggedValue = 1; + one.varNullableValue = 1L; + one.fixedNullableValue = 1L; + one.taggedNullableValue = 1L; + serDeCheck(fory, one); + + // Test Long.MAX_VALUE (max signed long) + Uint64OnlyStruct maxSignedLong = new Uint64OnlyStruct(); + maxSignedLong.varValue = Long.MAX_VALUE; + maxSignedLong.fixedValue = Long.MAX_VALUE; + maxSignedLong.taggedValue = Long.MAX_VALUE; + maxSignedLong.varNullableValue = Long.MAX_VALUE; + maxSignedLong.fixedNullableValue = Long.MAX_VALUE; + maxSignedLong.taggedNullableValue = Long.MAX_VALUE; + serDeCheck(fory, maxSignedLong); + + // Test Long.MIN_VALUE (this represents 2^63 as unsigned) + Uint64OnlyStruct minValue = new Uint64OnlyStruct(); + minValue.varValue = Long.MIN_VALUE; + minValue.fixedValue = Long.MIN_VALUE; + minValue.taggedValue = Long.MIN_VALUE; + minValue.varNullableValue = Long.MIN_VALUE; + minValue.fixedNullableValue = Long.MIN_VALUE; + minValue.taggedNullableValue = Long.MIN_VALUE; + serDeCheck(fory, minValue); + + // Test -1 (this represents max uint64: 0xFFFFFFFFFFFFFFFF) + Uint64OnlyStruct maxUint64 = new Uint64OnlyStruct(); + maxUint64.varValue = -1L; + maxUint64.fixedValue = -1L; + maxUint64.taggedValue = -1L; + maxUint64.varNullableValue = -1L; + maxUint64.fixedNullableValue = -1L; + maxUint64.taggedNullableValue = -1L; + serDeCheck(fory, maxUint64); + + // Test null + Uint64OnlyStruct withNull = new Uint64OnlyStruct(); + withNull.varValue = 10000000000L; + withNull.fixedValue = 10000000000L; + withNull.taggedValue = 10000000000L; + withNull.varNullableValue = null; + withNull.fixedNullableValue = null; + withNull.taggedNullableValue = null; + serDeCheck(fory, withNull); + } + + // Test tagged encoding boundary values + @Test(dataProvider = "fory") + public void testTaggedEncodingBoundaryValues(Fory fory) { + Uint64OnlyStruct obj = new Uint64OnlyStruct(); + + // Test value at tagged 4-byte boundary: -1073741824 (HALF_MIN_INT_VALUE) + obj.varValue = -1073741824L; + obj.fixedValue = -1073741824L; + obj.taggedValue = -1073741824L; + obj.varNullableValue = -1073741824L; + obj.fixedNullableValue = -1073741824L; + obj.taggedNullableValue = -1073741824L; + serDeCheck(fory, obj); + + // Test value at tagged 4-byte boundary: 1073741823 (HALF_MAX_INT_VALUE) + obj.varValue = 1073741823L; + obj.fixedValue = 1073741823L; + obj.taggedValue = 1073741823L; + obj.varNullableValue = 1073741823L; + obj.fixedNullableValue = 1073741823L; + obj.taggedNullableValue = 1073741823L; + serDeCheck(fory, obj); + + // Test value just below tagged 4-byte boundary + obj.varValue = -1073741825L; + obj.fixedValue = -1073741825L; + obj.taggedValue = -1073741825L; + obj.varNullableValue = -1073741825L; + obj.fixedNullableValue = -1073741825L; + obj.taggedNullableValue = -1073741825L; + serDeCheck(fory, obj); + + // Test value just above tagged 4-byte boundary + obj.varValue = 1073741824L; + obj.fixedValue = 1073741824L; + obj.taggedValue = 1073741824L; + obj.varNullableValue = 1073741824L; + obj.fixedNullableValue = 1073741824L; + obj.taggedNullableValue = 1073741824L; + serDeCheck(fory, obj); + } + + // Test varint encoding boundary values + @Test(dataProvider = "fory") + public void testVarintEncodingBoundaryValues(Fory fory) { + Uint32OnlyStruct obj32 = new Uint32OnlyStruct(); + + // 1-byte varint boundary (0-127) + obj32.varValue = 127; + obj32.fixedValue = 127; + obj32.varNullableValue = 127L; + obj32.fixedNullableValue = 127L; + serDeCheck(fory, obj32); + + // 2-byte varint boundary (128-16383) + obj32.varValue = 128; + obj32.fixedValue = 128; + obj32.varNullableValue = 128L; + obj32.fixedNullableValue = 128L; + serDeCheck(fory, obj32); + + obj32.varValue = 16383; + obj32.fixedValue = 16383; + obj32.varNullableValue = 16383L; + obj32.fixedNullableValue = 16383L; + serDeCheck(fory, obj32); + + // 3-byte varint boundary (16384-2097151) + obj32.varValue = 16384; + obj32.fixedValue = 16384; + obj32.varNullableValue = 16384L; + obj32.fixedNullableValue = 16384L; + serDeCheck(fory, obj32); + + obj32.varValue = 2097151; + obj32.fixedValue = 2097151; + obj32.varNullableValue = 2097151L; + obj32.fixedNullableValue = 2097151L; + serDeCheck(fory, obj32); + + // 4-byte varint boundary (2097152-268435455) + obj32.varValue = 2097152; + obj32.fixedValue = 2097152; + obj32.varNullableValue = 2097152L; + obj32.fixedNullableValue = 2097152L; + serDeCheck(fory, obj32); + + obj32.varValue = 268435455; + obj32.fixedValue = 268435455; + obj32.varNullableValue = 268435455L; + obj32.fixedNullableValue = 268435455L; + serDeCheck(fory, obj32); + + // 5-byte varint boundary (268435456+) + obj32.varValue = 268435456; + obj32.fixedValue = 268435456; + obj32.varNullableValue = 268435456L; + obj32.fixedNullableValue = 268435456L; + serDeCheck(fory, obj32); + } +} From baf70d7c278eb38002a554b16fcac435f64e7713 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 7 Jan 2026 22:18:04 +0800 Subject: [PATCH 17/44] fix descriptor sort comparator --- .../java/org/apache/fory/meta/FieldInfo.java | 1 + .../apache/fory/resolver/ClassResolver.java | 6 +- .../apache/fory/resolver/TypeResolver.java | 65 ++++ .../apache/fory/resolver/XtypeResolver.java | 8 +- .../apache/fory/type/DescriptorGrouper.java | 119 +------ .../apache/fory/type/TypeAnnotationUtils.java | 41 ++- .../main/java/org/apache/fory/type/Types.java | 21 +- .../main/java/org/apache/fory/util/Utils.java | 2 +- .../apache/fory/serializer/UnsignedTest.java | 325 +++++++++++------- .../fory/type/DescriptorGrouperTest.java | 71 ++-- 10 files changed, 380 insertions(+), 279 deletions(-) diff --git a/java/fory-core/src/main/java/org/apache/fory/meta/FieldInfo.java b/java/fory-core/src/main/java/org/apache/fory/meta/FieldInfo.java index 0fe83f279b..1b011caa46 100644 --- a/java/fory-core/src/main/java/org/apache/fory/meta/FieldInfo.java +++ b/java/fory-core/src/main/java/org/apache/fory/meta/FieldInfo.java @@ -112,6 +112,7 @@ Descriptor toDescriptor(TypeResolver resolver, Descriptor descriptor) { .typeName(typeName) .trackingRef(remoteTrackingRef) .nullable(remoteNullable) + .typeRef(typeRef) .build(); } DescriptorBuilder builder = diff --git a/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java b/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java index 39bc2af603..4d7cc17112 100644 --- a/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java +++ b/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java @@ -1913,8 +1913,9 @@ public Comparator createTypeAndNameComparator() { // Use normalized type name so that Collection/Map subtypes have consistent order // between processes even if the field doesn't exist in peer (e.g., List vs Collection). int c = getNormalizedTypeName(d1).compareTo(getNormalizedTypeName(d2)); + // noinspection Duplicates if (c == 0) { - c = DescriptorGrouper.getFieldSortKey(d1).compareTo(DescriptorGrouper.getFieldSortKey(d2)); + c = getFieldSortKey(d1).compareTo(getFieldSortKey(d2)); if (c == 0) { // Field name duplicate in super/child classes. c = d1.getDeclaringClass().compareTo(d2.getDeclaringClass()); @@ -1939,8 +1940,7 @@ public DescriptorGrouper createDescriptorGrouper( descriptors, descriptorsGroupedOrdered, descriptorUpdator, - fory.compressInt(), - fory.compressLong(), + getPrimitiveComparator(), createTypeAndNameComparator()) .sort(); } diff --git a/java/fory-core/src/main/java/org/apache/fory/resolver/TypeResolver.java b/java/fory-core/src/main/java/org/apache/fory/resolver/TypeResolver.java index 25d8743f7b..90600bec1a 100644 --- a/java/fory-core/src/main/java/org/apache/fory/resolver/TypeResolver.java +++ b/java/fory-core/src/main/java/org/apache/fory/resolver/TypeResolver.java @@ -20,6 +20,7 @@ package org.apache.fory.resolver; import static org.apache.fory.Fory.NOT_SUPPORT_XLANG; +import static org.apache.fory.type.TypeUtils.getSizeOfPrimitiveType; import com.google.common.collect.BiMap; import com.google.common.collect.HashBiMap; @@ -28,6 +29,7 @@ import java.lang.reflect.Type; import java.util.ArrayList; import java.util.Collection; +import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -77,6 +79,7 @@ import org.apache.fory.type.GenericType; import org.apache.fory.type.ScalaTypes; import org.apache.fory.type.TypeUtils; +import org.apache.fory.type.Types; import org.apache.fory.util.GraalvmSupport; import org.apache.fory.util.GraalvmSupport.GraalvmSerializerHolder; import org.apache.fory.util.Preconditions; @@ -622,6 +625,68 @@ public List getFieldDescriptors(Class clz, boolean searchParent) return result; } + /** + * Gets the sort key for a field descriptor. + * + *

    If the field has a {@link ForyField} annotation with id >= 0, returns the id as a string. + * Otherwise, returns the snake_case field name. This ensures fields are sorted by tag ID when + * configured, matching the fingerprint computation order. + * + * @param descriptor the field descriptor + * @return the sort key (tag ID as string or snake_case name) + */ + protected static String getFieldSortKey(Descriptor descriptor) { + ForyField foryField = descriptor.getForyField(); + if (foryField != null && foryField.id() >= 0) { + return String.valueOf(foryField.id()); + } + return descriptor.getSnakeCaseName(); + } + + /** + * When compress disabled, sort primitive descriptors from largest to smallest, if size is the + * same, sort by field name to fix order. + * + *

    When compress enabled, sort primitive descriptors from largest to smallest but let compress + * fields ends in tail. if size is the same, sort by field name to fix order. + */ + public Comparator getPrimitiveComparator() { + return (d1, d2) -> { + Class t1 = TypeUtils.unwrap(d1.getRawType()); + Class t2 = TypeUtils.unwrap(d2.getRawType()); + int typeId1 = Types.getDescriptorTypeId(fory, d1); + int typeId2 = Types.getDescriptorTypeId(fory, d2); + boolean t1Compress = Types.isCompressedType(typeId1); + boolean t2Compress = Types.isCompressedType(typeId2); + if ((t1Compress && t2Compress) || (!t1Compress && !t2Compress)) { + int c = getSizeOfPrimitiveType(t2) - getSizeOfPrimitiveType(t1); + if (c == 0) { + c = typeId2 - typeId1; + // noinspection Duplicates + if (c == 0) { + c = getFieldSortKey(d1).compareTo(getFieldSortKey(d2)); + if (c == 0) { + // Field name duplicate in super/child classes. + c = d1.getDeclaringClass().compareTo(d2.getDeclaringClass()); + if (c == 0) { + // Final tie-breaker: use actual field name to distinguish fields with same tag ID. + // This ensures Comparator contract is satisfied (returns 0 only for same object). + c = d1.getName().compareTo(d2.getName()); + } + } + } + return c; + } + return c; + } + if (t1Compress) { + return 1; + } + // t2 compress + return -1; + }; + } + /** * Get the nullable flag for a field, respecting xlang mode. * diff --git a/java/fory-core/src/main/java/org/apache/fory/resolver/XtypeResolver.java b/java/fory-core/src/main/java/org/apache/fory/resolver/XtypeResolver.java index c482436d69..eeac4aec12 100644 --- a/java/fory-core/src/main/java/org/apache/fory/resolver/XtypeResolver.java +++ b/java/fory-core/src/main/java/org/apache/fory/resolver/XtypeResolver.java @@ -962,19 +962,17 @@ public DescriptorGrouper createDescriptorGrouper( descriptors, descriptorsGroupedOrdered, descriptorUpdator, - fory.compressInt(), - fory.compressLong(), + getPrimitiveComparator(), (o1, o2) -> { int xtypeId = getXtypeId(o1.getRawType()); int xtypeId2 = getXtypeId(o2.getRawType()); if (xtypeId == xtypeId2) { - return DescriptorGrouper.getFieldSortKey(o1) - .compareTo(DescriptorGrouper.getFieldSortKey(o2)); + return getFieldSortKey(o1).compareTo(getFieldSortKey(o2)); } else { return xtypeId - xtypeId2; } }) - .setOtherDescriptorComparator(Comparator.comparing(DescriptorGrouper::getFieldSortKey)) + .setOtherDescriptorComparator(Comparator.comparing(TypeResolver::getFieldSortKey)) .sort(); } diff --git a/java/fory-core/src/main/java/org/apache/fory/type/DescriptorGrouper.java b/java/fory-core/src/main/java/org/apache/fory/type/DescriptorGrouper.java index da25315d9b..2d232c784c 100644 --- a/java/fory-core/src/main/java/org/apache/fory/type/DescriptorGrouper.java +++ b/java/fory-core/src/main/java/org/apache/fory/type/DescriptorGrouper.java @@ -19,8 +19,6 @@ package org.apache.fory.type; -import static org.apache.fory.type.TypeUtils.getSizeOfPrimitiveType; - import java.lang.reflect.Method; import java.util.ArrayList; import java.util.Collection; @@ -29,7 +27,6 @@ import java.util.TreeSet; import java.util.function.Function; import java.util.function.Predicate; -import org.apache.fory.annotation.ForyField; import org.apache.fory.util.Preconditions; import org.apache.fory.util.record.RecordUtils; @@ -44,123 +41,12 @@ */ public class DescriptorGrouper { - /** - * Gets the sort key for a field descriptor. - * - *

    If the field has a {@link ForyField} annotation with id >= 0, returns the id as a string. - * Otherwise, returns the snake_case field name. This ensures fields are sorted by tag ID when - * configured, matching the fingerprint computation order. - * - * @param descriptor the field descriptor - * @return the sort key (tag ID as string or snake_case name) - */ - public static String getFieldSortKey(Descriptor descriptor) { - ForyField foryField = descriptor.getForyField(); - if (foryField != null && foryField.id() >= 0) { - return String.valueOf(foryField.id()); - } - return descriptor.getSnakeCaseName(); - } - - static final Comparator COMPARATOR_BY_PRIMITIVE_TYPE_ID = - (d1, d2) -> { - int c = - Types.getPrimitiveTypeId(TypeUtils.unwrap(d2.getRawType())) - - Types.getPrimitiveTypeId(TypeUtils.unwrap(d1.getRawType())); - if (c == 0) { - c = getFieldSortKey(d1).compareTo(getFieldSortKey(d2)); - if (c == 0) { - // Field name duplicate in super/child classes. - c = d1.getDeclaringClass().compareTo(d2.getDeclaringClass()); - if (c == 0) { - // Final tie-breaker: use actual field name to distinguish fields with same tag ID. - // This ensures TreeSet never treats different fields as duplicates. - c = d1.getName().compareTo(d2.getName()); - } - } - } - return c; - }; private final Collection descriptors; private final Predicate isBuildIn; private final Function descriptorUpdater; private final boolean descriptorsGroupedOrdered; private boolean sorted = false; - /** - * When compress disabled, sort primitive descriptors from largest to smallest, if size is the - * same, sort by field name to fix order. - * - *

    When compress enabled, sort primitive descriptors from largest to smallest but let compress - * fields ends in tail. if size is the same, sort by field name to fix order. - */ - public static Comparator getPrimitiveComparator( - boolean compressInt, boolean compressLong) { - if (!compressInt && !compressLong) { - // sort primitive descriptors from largest to smallest, if size is the same, - // sort by field name to fix order. - return (d1, d2) -> { - int c = - getSizeOfPrimitiveType(TypeUtils.unwrap(d2.getRawType())) - - getSizeOfPrimitiveType(TypeUtils.unwrap(d1.getRawType())); - if (c == 0) { - c = COMPARATOR_BY_PRIMITIVE_TYPE_ID.compare(d1, d2); - } - return c; - }; - } - return (d1, d2) -> { - Class t1 = TypeUtils.unwrap(d1.getRawType()); - Class t2 = TypeUtils.unwrap(d2.getRawType()); - boolean t1Compress = isCompressedType(t1, compressInt, compressLong); - boolean t2Compress = isCompressedType(t2, compressInt, compressLong); - if ((t1Compress && t2Compress) || (!t1Compress && !t2Compress)) { - int c = getSizeOfPrimitiveType(t2) - getSizeOfPrimitiveType(t1); - if (c == 0) { - c = COMPARATOR_BY_PRIMITIVE_TYPE_ID.compare(d1, d2); - } - return c; - } - if (t1Compress) { - return 1; - } - // t2 compress - return -1; - }; - } - - private static boolean isCompressedType(Class cls, boolean compressInt, boolean compressLong) { - cls = TypeUtils.unwrap(cls); - if (cls == int.class) { - return compressInt; - } - if (cls == long.class) { - return compressLong; - } - return false; - } - - /** Comparator based on field type, name/id and declaring class. */ - public static final Comparator COMPARATOR_BY_TYPE_AND_NAME = - (d1, d2) -> { - // sort by type so that we can hit class info cache more possibly. - // sort by field id/name to fix order if type is same. - int c = d1.getTypeName().compareTo(d2.getTypeName()); - if (c == 0) { - c = getFieldSortKey(d1).compareTo(getFieldSortKey(d2)); - if (c == 0) { - // Field name duplicate in super/child classes. - c = d1.getDeclaringClass().compareTo(d2.getDeclaringClass()); - if (c == 0) { - // Final tie-breaker: use actual field name to distinguish fields with same tag ID. - // This ensures TreeSet never treats different fields as duplicates. - c = d1.getName().compareTo(d2.getName()); - } - } - } - return c; - }; - private final Collection primitiveDescriptors; private final Collection boxedDescriptors; // The element type should be final. @@ -301,15 +187,14 @@ public static DescriptorGrouper createDescriptorGrouper( Collection descriptors, boolean descriptorsGroupedOrdered, Function descriptorUpdator, - boolean compressInt, - boolean compressLong, + Comparator primitiveComparator, Comparator comparator) { return new DescriptorGrouper( isBuildIn, descriptors, descriptorsGroupedOrdered, descriptorUpdator == null ? DescriptorGrouper::createDescriptor : descriptorUpdator, - getPrimitiveComparator(compressInt, compressLong), + primitiveComparator, comparator); } diff --git a/java/fory-core/src/main/java/org/apache/fory/type/TypeAnnotationUtils.java b/java/fory-core/src/main/java/org/apache/fory/type/TypeAnnotationUtils.java index 50f4b09248..6a64ffc82e 100644 --- a/java/fory-core/src/main/java/org/apache/fory/type/TypeAnnotationUtils.java +++ b/java/fory-core/src/main/java/org/apache/fory/type/TypeAnnotationUtils.java @@ -28,16 +28,31 @@ import org.apache.fory.annotation.Uint8Type; public class TypeAnnotationUtils { - public static int getTypeId(Annotation typeAnnotation) { - if (typeAnnotation == null) return Types.UNKNOWN; + + /** + * Get the type id for the given type annotation and validate it against the field type. + * + * @param typeAnnotation the type annotation + * @param fieldType the field type class + * @return the type id + * @throws IllegalArgumentException if the annotation is not compatible with the field type + */ + public static int getTypeId(Annotation typeAnnotation, Class fieldType) { + if (typeAnnotation == null) { + return Types.UNKNOWN; + } if (typeAnnotation instanceof Uint8Type) { + checkFieldType(fieldType, "@Uint8Type", byte.class, Byte.class); return Types.UINT8; } else if (typeAnnotation instanceof Uint16Type) { + checkFieldType(fieldType, "@Uint16Type", short.class, Short.class); return Types.UINT16; } else if (typeAnnotation instanceof Uint32Type) { + checkFieldType(fieldType, "@Uint32Type", int.class, Integer.class); Uint32Type uint32Type = (Uint32Type) typeAnnotation; return uint32Type.compress() ? Types.VAR_UINT32 : Types.UINT32; } else if (typeAnnotation instanceof Uint64Type) { + checkFieldType(fieldType, "@Uint64Type", long.class, Long.class); Uint64Type uint64Type = (Uint64Type) typeAnnotation; switch (uint64Type.encoding()) { case VARINT64: @@ -50,9 +65,11 @@ public static int getTypeId(Annotation typeAnnotation) { throw new IllegalArgumentException("Unsupported encoding: " + uint64Type.encoding()); } } else if (typeAnnotation instanceof Int32Type) { + checkFieldType(fieldType, "@Int32Type", int.class, Integer.class); Int32Type int32Type = (Int32Type) typeAnnotation; return int32Type.compress() ? Types.VARINT32 : Types.INT32; } else if (typeAnnotation instanceof Int64Type) { + checkFieldType(fieldType, "@Int64Type", long.class, Long.class); Int64Type int64Type = (Int64Type) typeAnnotation; switch (int64Type.encoding()) { case VARINT64: @@ -65,6 +82,24 @@ public static int getTypeId(Annotation typeAnnotation) { throw new IllegalArgumentException("Unsupported encoding: " + int64Type.encoding()); } } - throw new IllegalArgumentException("Unsupported type: " + typeAnnotation.getClass()); + throw new IllegalArgumentException("Unsupported type annotation: " + typeAnnotation.getClass()); + } + + private static void checkFieldType( + Class fieldType, String annotationName, Class... allowedTypes) { + for (Class allowedType : allowedTypes) { + if (fieldType == allowedType) { + return; + } + } + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < allowedTypes.length; i++) { + if (i > 0) { + sb.append(" or "); + } + sb.append(allowedTypes[i].getSimpleName()); + } + throw new IllegalArgumentException( + annotationName + " can only be applied to " + sb + " fields, but got " + fieldType); } } diff --git a/java/fory-core/src/main/java/org/apache/fory/type/Types.java b/java/fory-core/src/main/java/org/apache/fory/type/Types.java index 9042ae62f8..b89df739cc 100644 --- a/java/fory-core/src/main/java/org/apache/fory/type/Types.java +++ b/java/fory-core/src/main/java/org/apache/fory/type/Types.java @@ -342,10 +342,10 @@ public static int getPrimitiveTypeId(Fory fory, Class rawType) { public static int getDescriptorTypeId(Fory fory, Field field) { Annotation annotation = Descriptor.getAnnotation(field); + Class rawType = field.getType(); if (annotation != null) { - return TypeAnnotationUtils.getTypeId(annotation); + return TypeAnnotationUtils.getTypeId(annotation, rawType); } else { - Class rawType = field.getType(); return getTypeId(fory, rawType); } } @@ -356,11 +356,11 @@ public static int getDescriptorTypeId(Fory fory, Descriptor d) { if (extMeta != null) { return extMeta.typeId(); } else { + Class rawType = typeRef.getRawType(); Annotation typeAnnotation = d.getTypeAnnotation(); if (typeAnnotation != null) { - return TypeAnnotationUtils.getTypeId(typeAnnotation); + return TypeAnnotationUtils.getTypeId(typeAnnotation, rawType); } else { - Class rawType = typeRef.getRawType(); return getTypeId(fory, rawType); } } @@ -429,4 +429,17 @@ public static Class getClassForTypeId(int typeId) { return null; } } + + public static boolean isCompressedType(int typeId) { + switch (typeId) { + case VARINT32: + case VAR_UINT32: + case VARINT64: + case VAR_UINT64: + case TAGGED_INT64: + case TAGGED_UINT64: + return true; + } + return false; + } } diff --git a/java/fory-core/src/main/java/org/apache/fory/util/Utils.java b/java/fory-core/src/main/java/org/apache/fory/util/Utils.java index 3592e3ff71..890b72662b 100644 --- a/java/fory-core/src/main/java/org/apache/fory/util/Utils.java +++ b/java/fory-core/src/main/java/org/apache/fory/util/Utils.java @@ -24,7 +24,7 @@ public class Utils { private static final boolean DEBUG_OUTPUT_ENABLED; static { - DEBUG_OUTPUT_ENABLED = "1".equals(System.getenv("ENABLE_FORY_DEBUG_OUTPUT")); + DEBUG_OUTPUT_ENABLED = true; } /** Checks if ENABLE_FORY_DEBUG_OUTPUT env var is set to "1". */ diff --git a/java/fory-core/src/test/java/org/apache/fory/serializer/UnsignedTest.java b/java/fory-core/src/test/java/org/apache/fory/serializer/UnsignedTest.java index d3bbf00f4e..3108f1e76e 100644 --- a/java/fory-core/src/test/java/org/apache/fory/serializer/UnsignedTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/serializer/UnsignedTest.java @@ -33,31 +33,46 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +/** + * Unsigned fields serialization tests for java native mode(xlang=false). + * + *

    Type annotation constraints: + * + *

      + *
    • {@code @Uint8Type} can only be applied to {@code byte} or {@code Byte} fields + *
    • {@code @Uint16Type} can only be applied to {@code short} or {@code Short} fields + *
    • {@code @Uint32Type} can only be applied to {@code int} or {@code Integer} fields + *
    • {@code @Uint64Type} can only be applied to {@code long} or {@code Long} fields + *
    + * + *

    The unsigned annotations indicate that the field should be treated as unsigned during + * serialization, allowing the full unsigned range of the type to be used. + */ public class UnsignedTest extends ForyTestBase { - // Max values for unsigned types - public static final short UINT8_MAX = 255; - public static final int UINT16_MAX = 65535; - public static final long UINT32_MAX = 4294967295L; + // Max values for unsigned types (represented in their signed Java equivalents) + public static final byte UINT8_MAX = (byte) 255; // -1 as signed byte + public static final short UINT16_MAX = (short) 65535; // -1 as signed short + public static final int UINT32_MAX = (int) 4294967295L; // -1 as signed int public static final long UINT64_MAX = -1L; // 0xFFFFFFFFFFFFFFFF as signed long - // Common test values - public static final short UINT8_MID = 128; - public static final int UINT16_MID = 32768; - public static final long UINT32_MID = 2147483648L; + // Mid-point values (at the signed/unsigned boundary) + public static final byte UINT8_MID = (byte) 128; // -128 as signed byte + public static final short UINT16_MID = (short) 32768; // -32768 as signed short + public static final int UINT32_MID = (int) 2147483648L; // Integer.MIN_VALUE as signed int public static final long UINT64_MID = Long.MIN_VALUE; // 0x8000000000000000 @Data public static class UnsignedSchemaConsistent { - @Uint8Type short u8; + @Uint8Type byte u8; - @Uint16Type int u16; + @Uint16Type short u16; @Uint32Type(compress = true) - long u32Var; + int u32Var; @Uint32Type(compress = false) - long u32Fixed; + int u32Fixed; @Uint64Type(encoding = LongEncoding.VARINT64) long u64Var; @@ -70,19 +85,19 @@ public static class UnsignedSchemaConsistent { @ForyField(nullable = true) @Uint8Type - Short u8Nullable; + Byte u8Nullable; @ForyField(nullable = true) @Uint16Type - Integer u16Nullable; + Short u16Nullable; @ForyField(nullable = true) @Uint32Type(compress = true) - Long u32VarNullable; + Integer u32VarNullable; @ForyField(nullable = true) @Uint32Type(compress = false) - Long u32FixedNullable; + Integer u32FixedNullable; @ForyField(nullable = true) @Uint64Type(encoding = LongEncoding.VARINT64) @@ -98,15 +113,15 @@ public static class UnsignedSchemaConsistent { } public static class UnsignedSchemaCompatible { - @Uint8Type short u8; + @Uint8Type byte u8; - @Uint16Type int u16; + @Uint16Type short u16; @Uint32Type(compress = true) - long u32Var; + int u32Var; @Uint32Type(compress = false) - long u32Fixed; + int u32Fixed; @Uint64Type(encoding = LongEncoding.VARINT64) long u64Var; @@ -119,19 +134,19 @@ public static class UnsignedSchemaCompatible { @ForyField(nullable = true) @Uint8Type - Short u8Field2; + Byte u8Field2; @ForyField(nullable = true) @Uint16Type - Integer u16Field2; + Short u16Field2; @ForyField(nullable = true) @Uint32Type(compress = true) - Long u32VarField2; + Integer u32VarField2; @ForyField(nullable = true) @Uint32Type(compress = false) - Long u32FixedField2; + Integer u32FixedField2; @ForyField(nullable = true) @Uint64Type(encoding = LongEncoding.VARINT64) @@ -144,21 +159,61 @@ public static class UnsignedSchemaCompatible { @ForyField(nullable = true) @Uint64Type(encoding = LongEncoding.TAGGED_INT64) Long u64TaggedField2; + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + UnsignedSchemaCompatible that = (UnsignedSchemaCompatible) o; + return u8 == that.u8 + && u16 == that.u16 + && u32Var == that.u32Var + && u32Fixed == that.u32Fixed + && u64Var == that.u64Var + && u64Fixed == that.u64Fixed + && u64Tagged == that.u64Tagged + && Objects.equals(u8Field2, that.u8Field2) + && Objects.equals(u16Field2, that.u16Field2) + && Objects.equals(u32VarField2, that.u32VarField2) + && Objects.equals(u32FixedField2, that.u32FixedField2) + && Objects.equals(u64VarField2, that.u64VarField2) + && Objects.equals(u64FixedField2, that.u64FixedField2) + && Objects.equals(u64TaggedField2, that.u64TaggedField2); + } + + @Override + public int hashCode() { + return Objects.hash( + u8, + u16, + u32Var, + u32Fixed, + u64Var, + u64Fixed, + u64Tagged, + u8Field2, + u16Field2, + u32VarField2, + u32FixedField2, + u64VarField2, + u64FixedField2, + u64TaggedField2); + } } private static UnsignedSchemaConsistent createConsistentWithNormalValues() { UnsignedSchemaConsistent obj = new UnsignedSchemaConsistent(); - obj.u8 = 200; - obj.u16 = 60000; - obj.u32Var = 3000000000L; - obj.u32Fixed = 4000000000L; + obj.u8 = (byte) 200; // Unsigned 200 + obj.u16 = (short) 60000; // Unsigned 60000 + obj.u32Var = 2000000000; // Within signed int range + obj.u32Fixed = 2100000000; // Within signed int range obj.u64Var = 10000000000L; obj.u64Fixed = 15000000000L; obj.u64Tagged = 1000000000L; - obj.u8Nullable = (short) 128; - obj.u16Nullable = 40000; - obj.u32VarNullable = 2500000000L; - obj.u32FixedNullable = 3500000000L; + obj.u8Nullable = (byte) 128; // Unsigned 128 + obj.u16Nullable = (short) 40000; // Unsigned 40000 + obj.u32VarNullable = 1500000000; + obj.u32FixedNullable = 1800000000; obj.u64VarNullable = 8000000000L; obj.u64FixedNullable = 12000000000L; obj.u64TaggedNullable = 500000000L; @@ -176,8 +231,8 @@ private static UnsignedSchemaConsistent createConsistentWithZeroValues() { obj.u64Tagged = 0; obj.u8Nullable = 0; obj.u16Nullable = 0; - obj.u32VarNullable = 0L; - obj.u32FixedNullable = 0L; + obj.u32VarNullable = 0; + obj.u32FixedNullable = 0; obj.u64VarNullable = 0L; obj.u64FixedNullable = 0L; obj.u64TaggedNullable = 0L; @@ -226,8 +281,8 @@ private static UnsignedSchemaConsistent createConsistentWithNullValues() { UnsignedSchemaConsistent obj = new UnsignedSchemaConsistent(); obj.u8 = 100; obj.u16 = 30000; - obj.u32Var = 1500000000L; - obj.u32Fixed = 2000000000L; + obj.u32Var = 1500000000; + obj.u32Fixed = 2000000000; obj.u64Var = 5000000000L; obj.u64Fixed = 7500000000L; obj.u64Tagged = 250000000L; @@ -243,17 +298,17 @@ private static UnsignedSchemaConsistent createConsistentWithNullValues() { private static UnsignedSchemaCompatible createCompatibleWithNormalValues() { UnsignedSchemaCompatible obj = new UnsignedSchemaCompatible(); - obj.u8 = 200; - obj.u16 = 60000; - obj.u32Var = 3000000000L; - obj.u32Fixed = 4000000000L; + obj.u8 = (byte) 200; + obj.u16 = (short) 60000; + obj.u32Var = 2000000000; + obj.u32Fixed = 2100000000; obj.u64Var = 10000000000L; obj.u64Fixed = 15000000000L; obj.u64Tagged = 1000000000L; - obj.u8Field2 = (short) 128; - obj.u16Field2 = 40000; - obj.u32VarField2 = 2500000000L; - obj.u32FixedField2 = 3500000000L; + obj.u8Field2 = (byte) 128; + obj.u16Field2 = (short) 40000; + obj.u32VarField2 = 1500000000; + obj.u32FixedField2 = 1800000000; obj.u64VarField2 = 8000000000L; obj.u64FixedField2 = 12000000000L; obj.u64TaggedField2 = 500000000L; @@ -271,8 +326,8 @@ private static UnsignedSchemaCompatible createCompatibleWithZeroValues() { obj.u64Tagged = 0; obj.u8Field2 = 0; obj.u16Field2 = 0; - obj.u32VarField2 = 0L; - obj.u32FixedField2 = 0L; + obj.u32VarField2 = 0; + obj.u32FixedField2 = 0; obj.u64VarField2 = 0L; obj.u64FixedField2 = 0L; obj.u64TaggedField2 = 0L; @@ -321,8 +376,8 @@ private static UnsignedSchemaCompatible createCompatibleWithNullValues() { UnsignedSchemaCompatible obj = new UnsignedSchemaCompatible(); obj.u8 = 100; obj.u16 = 30000; - obj.u32Var = 1500000000L; - obj.u32Fixed = 2000000000L; + obj.u32Var = 1500000000; + obj.u32Fixed = 2000000000; obj.u64Var = 5000000000L; obj.u64Fixed = 7500000000L; obj.u64Tagged = 250000000L; @@ -336,17 +391,50 @@ private static UnsignedSchemaCompatible createCompatibleWithNullValues() { return obj; } - @DataProvider(name = "fory") + @DataProvider public static Object[][] javaForyConfig() { - ForyBuilder builder = Fory.builder().withXlang(false).requireClassRegistration(false); return new Object[][] { - {builder.withCompatible(true).withCodegen(false).build()}, - {builder.withCompatible(true).withCodegen(true).build()}, - {builder.withCompatible(false).withCodegen(false).build()}, - {builder.withCompatible(false).withCodegen(true).build()} + { + new ForyBuilder() + .withXlang(false) + .withCompatible(false) + .withCodegen(false) + .requireClassRegistration(false) + .build() + }, + { + new ForyBuilder() + .withXlang(false) + .withCompatible(false) + .withCodegen(true) + .requireClassRegistration(false) + .build() + }, + { + new ForyBuilder() + .withXlang(false) + .withCompatible(true) + .withCodegen(false) + .requireClassRegistration(false) + .build() + }, + { + new ForyBuilder() + .withXlang(false) + .withCompatible(true) + .withCodegen(true) + .requireClassRegistration(false) + .build() + } }; } + @DataProvider(name = "fory") + public static Object[][] foryProvider() { + return javaForyConfig(); + } + + // Schema consistent tests @Test(dataProvider = "fory") public void testUnsignedSchemaConsistentNormalValues(Fory fory) { serDeCheck(fory, createConsistentWithNormalValues()); @@ -372,6 +460,7 @@ public void testUnsignedSchemaConsistentNullValues(Fory fory) { serDeCheck(fory, createConsistentWithNullValues()); } + // Schema compatible tests @Test(dataProvider = "fory") public void testUnsignedSchemaCompatibleNormalValues(Fory fory) { serDeCheck(fory, createCompatibleWithNormalValues()); @@ -399,11 +488,11 @@ public void testUnsignedSchemaCompatibleNullValues(Fory fory) { // Test specific edge cases for each unsigned type public static class Uint8OnlyStruct { - @Uint8Type short value; + @Uint8Type byte value; @ForyField(nullable = true) @Uint8Type - Short nullableValue; + Byte nullableValue; @Override public boolean equals(Object o) { @@ -439,31 +528,31 @@ public void testUint8EdgeCases(Fory fory) { maxSignedByte.nullableValue = 127; serDeCheck(fory, maxSignedByte); - // Test 128 (min negative as unsigned) - Uint8OnlyStruct minUnsignedOver127 = new Uint8OnlyStruct(); - minUnsignedOver127.value = 128; - minUnsignedOver127.nullableValue = 128; - serDeCheck(fory, minUnsignedOver127); + // Test 128 (unsigned, appears as -128 in signed byte) + Uint8OnlyStruct val128 = new Uint8OnlyStruct(); + val128.value = (byte) 128; + val128.nullableValue = (byte) 128; + serDeCheck(fory, val128); - // Test 255 (max uint8) + // Test 255 (max uint8, appears as -1 in signed byte) Uint8OnlyStruct maxUint8 = new Uint8OnlyStruct(); - maxUint8.value = 255; - maxUint8.nullableValue = 255; + maxUint8.value = (byte) 255; + maxUint8.nullableValue = (byte) 255; serDeCheck(fory, maxUint8); // Test null Uint8OnlyStruct withNull = new Uint8OnlyStruct(); - withNull.value = 200; + withNull.value = (byte) 200; withNull.nullableValue = null; serDeCheck(fory, withNull); } public static class Uint16OnlyStruct { - @Uint16Type int value; + @Uint16Type short value; @ForyField(nullable = true) @Uint16Type - Integer nullableValue; + Short nullableValue; @Override public boolean equals(Object o) { @@ -499,39 +588,39 @@ public void testUint16EdgeCases(Fory fory) { maxSignedShort.nullableValue = 32767; serDeCheck(fory, maxSignedShort); - // Test 32768 (min unsigned over signed max) - Uint16OnlyStruct minUnsignedOver32767 = new Uint16OnlyStruct(); - minUnsignedOver32767.value = 32768; - minUnsignedOver32767.nullableValue = 32768; - serDeCheck(fory, minUnsignedOver32767); + // Test 32768 (unsigned, appears as -32768 in signed short) + Uint16OnlyStruct val32768 = new Uint16OnlyStruct(); + val32768.value = (short) 32768; + val32768.nullableValue = (short) 32768; + serDeCheck(fory, val32768); - // Test 65535 (max uint16) + // Test 65535 (max uint16, appears as -1 in signed short) Uint16OnlyStruct maxUint16 = new Uint16OnlyStruct(); - maxUint16.value = 65535; - maxUint16.nullableValue = 65535; + maxUint16.value = (short) 65535; + maxUint16.nullableValue = (short) 65535; serDeCheck(fory, maxUint16); // Test null Uint16OnlyStruct withNull = new Uint16OnlyStruct(); - withNull.value = 50000; + withNull.value = (short) 50000; withNull.nullableValue = null; serDeCheck(fory, withNull); } public static class Uint32OnlyStruct { @Uint32Type(compress = true) - long varValue; + int varValue; @Uint32Type(compress = false) - long fixedValue; + int fixedValue; @ForyField(nullable = true) @Uint32Type(compress = true) - Long varNullableValue; + Integer varNullableValue; @ForyField(nullable = true) @Uint32Type(compress = false) - Long fixedNullableValue; + Integer fixedNullableValue; @Override public boolean equals(Object o) { @@ -556,46 +645,46 @@ public void testUint32EdgeCases(Fory fory) { Uint32OnlyStruct zero = new Uint32OnlyStruct(); zero.varValue = 0; zero.fixedValue = 0; - zero.varNullableValue = 0L; - zero.fixedNullableValue = 0L; + zero.varNullableValue = 0; + zero.fixedNullableValue = 0; serDeCheck(fory, zero); // Test 1 Uint32OnlyStruct one = new Uint32OnlyStruct(); one.varValue = 1; one.fixedValue = 1; - one.varNullableValue = 1L; - one.fixedNullableValue = 1L; + one.varNullableValue = 1; + one.fixedNullableValue = 1; serDeCheck(fory, one); // Test 2147483647 (max signed int) Uint32OnlyStruct maxSignedInt = new Uint32OnlyStruct(); - maxSignedInt.varValue = 2147483647L; - maxSignedInt.fixedValue = 2147483647L; - maxSignedInt.varNullableValue = 2147483647L; - maxSignedInt.fixedNullableValue = 2147483647L; + maxSignedInt.varValue = 2147483647; + maxSignedInt.fixedValue = 2147483647; + maxSignedInt.varNullableValue = 2147483647; + maxSignedInt.fixedNullableValue = 2147483647; serDeCheck(fory, maxSignedInt); - // Test 2147483648 (min unsigned over signed max) - Uint32OnlyStruct minUnsignedOver = new Uint32OnlyStruct(); - minUnsignedOver.varValue = 2147483648L; - minUnsignedOver.fixedValue = 2147483648L; - minUnsignedOver.varNullableValue = 2147483648L; - minUnsignedOver.fixedNullableValue = 2147483648L; - serDeCheck(fory, minUnsignedOver); + // Test 2147483648 (unsigned, appears as Integer.MIN_VALUE in signed int) + Uint32OnlyStruct val2147483648 = new Uint32OnlyStruct(); + val2147483648.varValue = (int) 2147483648L; + val2147483648.fixedValue = (int) 2147483648L; + val2147483648.varNullableValue = (int) 2147483648L; + val2147483648.fixedNullableValue = (int) 2147483648L; + serDeCheck(fory, val2147483648); - // Test 4294967295 (max uint32) + // Test 4294967295 (max uint32, appears as -1 in signed int) Uint32OnlyStruct maxUint32 = new Uint32OnlyStruct(); - maxUint32.varValue = 4294967295L; - maxUint32.fixedValue = 4294967295L; - maxUint32.varNullableValue = 4294967295L; - maxUint32.fixedNullableValue = 4294967295L; + maxUint32.varValue = (int) 4294967295L; + maxUint32.fixedValue = (int) 4294967295L; + maxUint32.varNullableValue = (int) 4294967295L; + maxUint32.fixedNullableValue = (int) 4294967295L; serDeCheck(fory, maxUint32); // Test null Uint32OnlyStruct withNull = new Uint32OnlyStruct(); - withNull.varValue = 3000000000L; - withNull.fixedValue = 3000000000L; + withNull.varValue = 1000000000; + withNull.fixedValue = 1000000000; withNull.varNullableValue = null; withNull.fixedNullableValue = null; serDeCheck(fory, withNull); @@ -761,54 +850,54 @@ public void testVarintEncodingBoundaryValues(Fory fory) { // 1-byte varint boundary (0-127) obj32.varValue = 127; obj32.fixedValue = 127; - obj32.varNullableValue = 127L; - obj32.fixedNullableValue = 127L; + obj32.varNullableValue = 127; + obj32.fixedNullableValue = 127; serDeCheck(fory, obj32); // 2-byte varint boundary (128-16383) obj32.varValue = 128; obj32.fixedValue = 128; - obj32.varNullableValue = 128L; - obj32.fixedNullableValue = 128L; + obj32.varNullableValue = 128; + obj32.fixedNullableValue = 128; serDeCheck(fory, obj32); obj32.varValue = 16383; obj32.fixedValue = 16383; - obj32.varNullableValue = 16383L; - obj32.fixedNullableValue = 16383L; + obj32.varNullableValue = 16383; + obj32.fixedNullableValue = 16383; serDeCheck(fory, obj32); // 3-byte varint boundary (16384-2097151) obj32.varValue = 16384; obj32.fixedValue = 16384; - obj32.varNullableValue = 16384L; - obj32.fixedNullableValue = 16384L; + obj32.varNullableValue = 16384; + obj32.fixedNullableValue = 16384; serDeCheck(fory, obj32); obj32.varValue = 2097151; obj32.fixedValue = 2097151; - obj32.varNullableValue = 2097151L; - obj32.fixedNullableValue = 2097151L; + obj32.varNullableValue = 2097151; + obj32.fixedNullableValue = 2097151; serDeCheck(fory, obj32); // 4-byte varint boundary (2097152-268435455) obj32.varValue = 2097152; obj32.fixedValue = 2097152; - obj32.varNullableValue = 2097152L; - obj32.fixedNullableValue = 2097152L; + obj32.varNullableValue = 2097152; + obj32.fixedNullableValue = 2097152; serDeCheck(fory, obj32); obj32.varValue = 268435455; obj32.fixedValue = 268435455; - obj32.varNullableValue = 268435455L; - obj32.fixedNullableValue = 268435455L; + obj32.varNullableValue = 268435455; + obj32.fixedNullableValue = 268435455; serDeCheck(fory, obj32); // 5-byte varint boundary (268435456+) obj32.varValue = 268435456; obj32.fixedValue = 268435456; - obj32.varNullableValue = 268435456L; - obj32.fixedNullableValue = 268435456L; + obj32.varNullableValue = 268435456; + obj32.fixedNullableValue = 268435456; serDeCheck(fory, obj32); } } diff --git a/java/fory-core/src/test/java/org/apache/fory/type/DescriptorGrouperTest.java b/java/fory-core/src/test/java/org/apache/fory/type/DescriptorGrouperTest.java index 1f96d17869..f5bd079518 100644 --- a/java/fory-core/src/test/java/org/apache/fory/type/DescriptorGrouperTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/type/DescriptorGrouperTest.java @@ -75,8 +75,9 @@ private List createDescriptors() { @Test public void testComparatorByTypeAndName() { + Fory fory = Fory.builder().build(); List descriptors = createDescriptors(); - descriptors.sort(DescriptorGrouper.COMPARATOR_BY_TYPE_AND_NAME); + descriptors.sort(fory.getClassResolver().createTypeAndNameComparator()); List> classes = descriptors.stream().map(Descriptor::getRawType).collect(Collectors.toList()); List> expected = @@ -106,26 +107,30 @@ public void testComparatorByTypeAndName() { @Test public void testPrimitiveComparator() { + Fory fory = Fory.builder().build(); List descriptors = new ArrayList<>(); int index = 0; for (Class aClass : Primitives.allPrimitiveTypes()) { descriptors.add(createDescriptor(TypeRef.of(aClass), "f" + index++, -1, "TestClass", false)); } Collections.shuffle(descriptors, new Random(7)); - descriptors.sort(DescriptorGrouper.getPrimitiveComparator(false, false)); + descriptors.sort(fory.getClassResolver().getPrimitiveComparator()); List> classes = descriptors.stream().map(Descriptor::getRawType).collect(Collectors.toList()); + // With compression enabled (default): int/long are compressed and go to the end + // Non-compressed sorted by size (desc), then typeId (desc): char(25) > short(3), byte(2) > + // boolean(1) List> expected = Arrays.asList( double.class, - long.class, float.class, - int.class, - short.class, char.class, + short.class, byte.class, boolean.class, - void.class); + void.class, + long.class, + int.class); assertEquals(classes, expected); } @@ -137,15 +142,19 @@ public void testPrimitiveCompressedComparator() { descriptors.add(createDescriptor(TypeRef.of(aClass), "f" + index++, -1, "TestClass", false)); } Collections.shuffle(descriptors, new Random(7)); - descriptors.sort(DescriptorGrouper.getPrimitiveComparator(true, true)); + Fory fory = Fory.builder().build(); + descriptors.sort(fory.getClassResolver().getPrimitiveComparator()); List> classes = descriptors.stream().map(Descriptor::getRawType).collect(Collectors.toList()); + // With compression enabled (default): int/long are compressed and go to the end + // Non-compressed sorted by size (desc), then typeId (desc): char(25) > short(3), byte(2) > + // boolean(1) List> expected = Arrays.asList( double.class, float.class, - short.class, char.class, + short.class, byte.class, boolean.class, void.class, @@ -156,6 +165,7 @@ public void testPrimitiveCompressedComparator() { @Test public void testGrouper() { + Fory fory = Fory.builder().build(); List descriptors = createDescriptors(); int index = 0; descriptors.add( @@ -182,26 +192,26 @@ public void testGrouper() { descriptors, false, null, - false, - false, - DescriptorGrouper.COMPARATOR_BY_TYPE_AND_NAME) + fory.getClassResolver().getPrimitiveComparator(), + fory.getClassResolver().createTypeAndNameComparator()) .sort(); { List> classes = grouper.getPrimitiveDescriptors().stream() .map(Descriptor::getRawType) .collect(Collectors.toList()); + // With compression enabled: int/long go to end, sorted by size then typeId (desc) List> expected = Arrays.asList( double.class, - long.class, float.class, - int.class, - short.class, char.class, + short.class, byte.class, boolean.class, - void.class); + void.class, + long.class, + int.class); assertEquals(classes, expected); } { @@ -209,17 +219,18 @@ public void testGrouper() { grouper.getBoxedDescriptors().stream() .map(Descriptor::getRawType) .collect(Collectors.toList()); + // With compression enabled: Integer/Long go to end, sorted by size then typeId (desc) List> expected = Arrays.asList( Double.class, - Long.class, Float.class, - Integer.class, - Short.class, Character.class, + Short.class, Byte.class, Boolean.class, - Void.class); + Void.class, + Long.class, + Integer.class); assertEquals(classes, expected); } { @@ -227,9 +238,9 @@ public void testGrouper() { grouper.getCollectionDescriptors().stream() .map(Descriptor::getTypeRef) .collect(Collectors.toList()); - // Sorted by type name: List < List (alphabetically) + // Normalized type name is the same (Collection), fallback to field name order (c4 then c5) List> expected = - Arrays.asList(new TypeRef>() {}, new TypeRef>() {}); + Arrays.asList(new TypeRef>() {}, new TypeRef>() {}); assertEquals(types, expected); } { @@ -260,27 +271,29 @@ public void testGrouper() { @Test public void testCompressedPrimitiveGrouper() { + Fory fory = Fory.builder().build(); DescriptorGrouper grouper = DescriptorGrouper.createDescriptorGrouper( d -> ReflectionUtils.isMonomorphic(d.getRawType()), createDescriptors(), false, null, - true, - true, - DescriptorGrouper.COMPARATOR_BY_TYPE_AND_NAME) + fory.getClassResolver().getPrimitiveComparator(), + fory.getClassResolver().createTypeAndNameComparator()) .sort(); { List> classes = grouper.getPrimitiveDescriptors().stream() .map(Descriptor::getRawType) .collect(Collectors.toList()); + // With compression enabled: int/long go to end, sorted by size then typeId (desc) + // char has higher typeId (25) than short (3) List> expected = Arrays.asList( double.class, float.class, - short.class, char.class, + short.class, byte.class, boolean.class, void.class, @@ -293,12 +306,14 @@ public void testCompressedPrimitiveGrouper() { grouper.getBoxedDescriptors().stream() .map(Descriptor::getRawType) .collect(Collectors.toList()); + // With compression enabled: Integer/Long go to end, sorted by size then typeId (desc) + // Character has higher typeId than Short List> expected = Arrays.asList( Double.class, Float.class, - Short.class, Character.class, + Short.class, Byte.class, Boolean.class, Void.class, @@ -383,9 +398,9 @@ public void testStaticComparatorDoesNotNormalize() { descriptors.add( createDescriptor( new TypeRef>() {}, "arrayListField", -1, "TestClass", false)); - + Fory fory = Fory.builder().build(); // Sort with the static comparator - descriptors.sort(DescriptorGrouper.COMPARATOR_BY_TYPE_AND_NAME); + descriptors.sort(fory.getClassResolver().createTypeAndNameComparator()); // Get type names after sorting List typeNames = From 4d16d37ce59b1acbac74418522340442163c554b Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Fri, 9 Jan 2026 00:11:26 +0800 Subject: [PATCH 18/44] fix go/java xlang struct fields serde --- go/fory/buffer.go | 118 + go/fory/primitive.go | 138 + go/fory/reader.go | 16 +- go/fory/skip.go | 13 +- go/fory/struct.go | 2275 +++++++++++++++-- go/fory/tag.go | 69 +- go/fory/tests/xlang/xlang_test_main.go | 146 +- go/fory/type_def.go | 61 + go/fory/type_resolver.go | 101 +- go/fory/types.go | 474 +++- go/fory/writer.go | 16 +- .../org/apache/fory/builder/CodecBuilder.java | 15 +- .../org/apache/fory/memory/MemoryBuffer.java | 9 +- .../java/org/apache/fory/meta/FieldInfo.java | 7 +- .../java/org/apache/fory/meta/FieldTypes.java | 52 +- .../apache/fory/resolver/ClassResolver.java | 9 +- .../serializer/AbstractObjectSerializer.java | 20 + .../fory/serializer/MetaSharedSerializer.java | 37 +- .../fory/serializer/struct/Fingerprint.java | 16 +- .../serializer/CompatibleSerializerTest.java | 5 +- .../org/apache/fory/xlang/GoXlangTest.java | 17 +- .../org/apache/fory/xlang/XlangTestBase.java | 116 +- 22 files changed, 3344 insertions(+), 386 deletions(-) diff --git a/go/fory/buffer.go b/go/fory/buffer.go index f6aa84f3b4..13ec17f4af 100644 --- a/go/fory/buffer.go +++ b/go/fory/buffer.go @@ -96,6 +96,13 @@ func (b *ByteBuffer) WriteUint8(value uint8) { b.writerIndex++ } +//go:inline +func (b *ByteBuffer) WriteUint16(value uint16) { + b.grow(2) + binary.LittleEndian.PutUint16(b.data[b.writerIndex:], value) + b.writerIndex += 2 +} + //go:inline func (b *ByteBuffer) WriteInt16(value int16) { b.grow(2) @@ -103,6 +110,13 @@ func (b *ByteBuffer) WriteInt16(value int16) { b.writerIndex += 2 } +//go:inline +func (b *ByteBuffer) WriteUint32(value uint32) { + b.grow(4) + binary.LittleEndian.PutUint32(b.data[b.writerIndex:], value) + b.writerIndex += 4 +} + //go:inline func (b *ByteBuffer) WriteInt32(value int32) { b.grow(4) @@ -122,6 +136,13 @@ func (b *ByteBuffer) ReadLength(err *Error) int { return int(b.ReadVaruint32(err)) } +//go:inline +func (b *ByteBuffer) WriteUint64(value uint64) { + b.grow(8) + binary.LittleEndian.PutUint64(b.data[b.writerIndex:], value) + b.writerIndex += 8 +} + //go:inline func (b *ByteBuffer) WriteInt64(value int64) { b.grow(8) @@ -211,6 +232,19 @@ func (b *ByteBuffer) ReadInt16(err *Error) int16 { return v } +// ReadUint16 reads a uint16 and sets error on bounds violation +// +//go:inline +func (b *ByteBuffer) ReadUint16(err *Error) uint16 { + if b.readerIndex+2 > len(b.data) { + *err = BufferOutOfBoundError(b.readerIndex, 2, len(b.data)) + return 0 + } + v := binary.LittleEndian.Uint16(b.data[b.readerIndex:]) + b.readerIndex += 2 + return v +} + // ReadUint32 reads a uint32 and sets error on bounds violation // //go:inline @@ -498,6 +532,34 @@ func (b *ByteBuffer) UnsafeReadInt64() int64 { return v } +// UnsafeReadUint32 reads a uint32 without bounds check. +// +//go:inline +func (b *ByteBuffer) UnsafeReadUint32() uint32 { + var v uint32 + if isLittleEndian { + v = *(*uint32)(unsafe.Pointer(&b.data[b.readerIndex])) + } else { + v = binary.LittleEndian.Uint32(b.data[b.readerIndex:]) + } + b.readerIndex += 4 + return v +} + +// UnsafeReadUint64 reads a uint64 without bounds check. +// +//go:inline +func (b *ByteBuffer) UnsafeReadUint64() uint64 { + var v uint64 + if isLittleEndian { + v = *(*uint64)(unsafe.Pointer(&b.data[b.readerIndex])) + } else { + v = binary.LittleEndian.Uint64(b.data[b.readerIndex:]) + } + b.readerIndex += 8 + return v +} + // UnsafeWriteFloat32 writes a float32 without grow check. // //go:inline @@ -1292,6 +1354,22 @@ func (b *ByteBuffer) UnsafeReadVarint64() int64 { return v } +// UnsafeReadVaruint32 reads a varuint32 without bounds checking. +// Caller must ensure remaining() >= 5 before calling. +// +//go:inline +func (b *ByteBuffer) UnsafeReadVaruint32() uint32 { + return b.readVaruint32Fast() +} + +// UnsafeReadVaruint64 reads a varuint64 without bounds checking. +// Caller must ensure remaining() >= 10 before calling. +// +//go:inline +func (b *ByteBuffer) UnsafeReadVaruint64() uint64 { + return b.readVaruint64Fast() +} + // ReadVaruint32 reads a varuint32 and sets error on bounds violation // //go:inline @@ -1421,6 +1499,46 @@ func (b *ByteBuffer) unsafePutInt64(index int, v uint64) { binary.LittleEndian.PutUint64(b.data[index:], v) } +// UnsafePutUint32 writes a uint32 at the given offset without advancing writerIndex. +// Caller must have called Reserve() to ensure capacity. +// Returns the number of bytes written (4). +// +//go:inline +func (b *ByteBuffer) UnsafePutUint32(offset int, value uint32) int { + binary.LittleEndian.PutUint32(b.data[offset:], value) + return 4 +} + +// UnsafePutUint64 writes a uint64 at the given offset without advancing writerIndex. +// Caller must have called Reserve() to ensure capacity. +// Returns the number of bytes written (8). +// +//go:inline +func (b *ByteBuffer) UnsafePutUint64(offset int, value uint64) int { + binary.LittleEndian.PutUint64(b.data[offset:], value) + return 8 +} + +// UnsafePutInt8 writes 1 byte at the given offset without bound checking. +// Caller must have ensured capacity. +// Returns the number of bytes written (1). +// +//go:inline +func (b *ByteBuffer) UnsafePutInt8(offset int, value int8) int { + b.data[offset] = byte(value) + return 1 +} + +// UnsafePutInt64 writes an int64 in little-endian format at the given offset without bound checking. +// Caller must have ensured capacity. +// Returns the number of bytes written (8). +// +//go:inline +func (b *ByteBuffer) UnsafePutInt64(offset int, value int64) int { + binary.LittleEndian.PutUint64(b.data[offset:], uint64(value)) + return 8 +} + // ReadVaruint32Small7 reads a varuint32 in small-7 format with error checking func (b *ByteBuffer) ReadVaruint32Small7(err *Error) uint32 { if b.readerIndex >= len(b.data) { diff --git a/go/fory/primitive.go b/go/fory/primitive.go index 534d6924a5..b0bf967677 100644 --- a/go/fory/primitive.go +++ b/go/fory/primitive.go @@ -164,6 +164,144 @@ func (s byteSerializer) ReadWithTypeInfo(ctx *ReadContext, refMode RefMode, type s.Read(ctx, refMode, false, false, value) } +// uint16Serializer handles uint16 type +type uint16Serializer struct{} + +var globalUint16Serializer = uint16Serializer{} + +func (s uint16Serializer) WriteData(ctx *WriteContext, value reflect.Value) { + ctx.buffer.WriteUint16(uint16(value.Uint())) +} + +func (s uint16Serializer) Write(ctx *WriteContext, refMode RefMode, writeType bool, hasGenerics bool, value reflect.Value) { + _ = hasGenerics + if refMode != RefModeNone { + ctx.buffer.WriteInt8(NotNullValueFlag) + } + if writeType { + ctx.buffer.WriteVaruint32Small7(uint32(UINT16)) + } + s.WriteData(ctx, value) +} + +func (s uint16Serializer) ReadData(ctx *ReadContext, type_ reflect.Type, value reflect.Value) { + err := ctx.Err() + value.SetUint(uint64(ctx.buffer.ReadUint16(err))) +} + +func (s uint16Serializer) Read(ctx *ReadContext, refMode RefMode, readType bool, hasGenerics bool, value reflect.Value) { + _ = hasGenerics + err := ctx.Err() + if refMode != RefModeNone { + if ctx.buffer.ReadInt8(err) == NullFlag { + return + } + } + if readType { + _ = ctx.buffer.ReadVaruint32Small7(err) + } + if ctx.HasError() { + return + } + s.ReadData(ctx, value.Type(), value) +} + +func (s uint16Serializer) ReadWithTypeInfo(ctx *ReadContext, refMode RefMode, typeInfo *TypeInfo, value reflect.Value) { + s.Read(ctx, refMode, false, false, value) +} + +// uint32Serializer handles uint32 type with variable-length encoding (VAR_UINT32) +type uint32Serializer struct{} + +var globalUint32Serializer = uint32Serializer{} + +func (s uint32Serializer) WriteData(ctx *WriteContext, value reflect.Value) { + ctx.buffer.WriteVaruint32(uint32(value.Uint())) +} + +func (s uint32Serializer) Write(ctx *WriteContext, refMode RefMode, writeType bool, hasGenerics bool, value reflect.Value) { + _ = hasGenerics + if refMode != RefModeNone { + ctx.buffer.WriteInt8(NotNullValueFlag) + } + if writeType { + ctx.buffer.WriteVaruint32Small7(uint32(VAR_UINT32)) + } + s.WriteData(ctx, value) +} + +func (s uint32Serializer) ReadData(ctx *ReadContext, type_ reflect.Type, value reflect.Value) { + err := ctx.Err() + value.SetUint(uint64(ctx.buffer.ReadVaruint32(err))) +} + +func (s uint32Serializer) Read(ctx *ReadContext, refMode RefMode, readType bool, hasGenerics bool, value reflect.Value) { + _ = hasGenerics + err := ctx.Err() + if refMode != RefModeNone { + if ctx.buffer.ReadInt8(err) == NullFlag { + return + } + } + if readType { + _ = ctx.buffer.ReadVaruint32Small7(err) + } + if ctx.HasError() { + return + } + s.ReadData(ctx, value.Type(), value) +} + +func (s uint32Serializer) ReadWithTypeInfo(ctx *ReadContext, refMode RefMode, typeInfo *TypeInfo, value reflect.Value) { + s.Read(ctx, refMode, false, false, value) +} + +// uint64Serializer handles uint64 type with variable-length encoding (VAR_UINT64) +type uint64Serializer struct{} + +var globalUint64Serializer = uint64Serializer{} + +func (s uint64Serializer) WriteData(ctx *WriteContext, value reflect.Value) { + ctx.buffer.WriteVaruint64(value.Uint()) +} + +func (s uint64Serializer) Write(ctx *WriteContext, refMode RefMode, writeType bool, hasGenerics bool, value reflect.Value) { + _ = hasGenerics + if refMode != RefModeNone { + ctx.buffer.WriteInt8(NotNullValueFlag) + } + if writeType { + ctx.buffer.WriteVaruint32Small7(uint32(VAR_UINT64)) + } + s.WriteData(ctx, value) +} + +func (s uint64Serializer) ReadData(ctx *ReadContext, type_ reflect.Type, value reflect.Value) { + err := ctx.Err() + value.SetUint(ctx.buffer.ReadVaruint64(err)) +} + +func (s uint64Serializer) Read(ctx *ReadContext, refMode RefMode, readType bool, hasGenerics bool, value reflect.Value) { + _ = hasGenerics + err := ctx.Err() + if refMode != RefModeNone { + if ctx.buffer.ReadInt8(err) == NullFlag { + return + } + } + if readType { + _ = ctx.buffer.ReadVaruint32Small7(err) + } + if ctx.HasError() { + return + } + s.ReadData(ctx, value.Type(), value) +} + +func (s uint64Serializer) ReadWithTypeInfo(ctx *ReadContext, refMode RefMode, typeInfo *TypeInfo, value reflect.Value) { + s.Read(ctx, refMode, false, false, value) +} + // int16Serializer handles int16 type type int16Serializer struct{} diff --git a/go/fory/reader.go b/go/fory/reader.go index 1b65fc19e5..67f680fc05 100644 --- a/go/fory/reader.go +++ b/go/fory/reader.go @@ -184,25 +184,25 @@ func (c *ReadContext) ReadTypeId() TypeId { func (c *ReadContext) readFast(ptr unsafe.Pointer, ct DispatchId) { err := c.Err() switch ct { - case BoolDispatchId: + case PrimitiveBoolDispatchId: *(*bool)(ptr) = c.buffer.ReadBool(err) - case Int8DispatchId: + case PrimitiveInt8DispatchId: *(*int8)(ptr) = int8(c.buffer.ReadByte(err)) - case Int16DispatchId: + case PrimitiveInt16DispatchId: *(*int16)(ptr) = c.buffer.ReadInt16(err) - case Int32DispatchId: + case PrimitiveInt32DispatchId: *(*int32)(ptr) = c.buffer.ReadVarint32(err) - case IntDispatchId: + case PrimitiveIntDispatchId: if strconv.IntSize == 64 { *(*int)(ptr) = int(c.buffer.ReadVarint64(err)) } else { *(*int)(ptr) = int(c.buffer.ReadVarint32(err)) } - case Int64DispatchId: + case PrimitiveInt64DispatchId: *(*int64)(ptr) = c.buffer.ReadVarint64(err) - case Float32DispatchId: + case PrimitiveFloat32DispatchId: *(*float32)(ptr) = c.buffer.ReadFloat32(err) - case Float64DispatchId: + case PrimitiveFloat64DispatchId: *(*float64)(ptr) = c.buffer.ReadFloat64(err) case StringDispatchId: *(*string)(ptr) = readString(c.buffer, err) diff --git a/go/fory/skip.go b/go/fory/skip.go index 016e1acc7d..9aa1a14d59 100644 --- a/go/fory/skip.go +++ b/go/fory/skip.go @@ -650,11 +650,22 @@ func skipValue(ctx *ReadContext, fieldDef FieldDef, readRefFlag bool, isField bo case UINT8: _ = ctx.buffer.ReadByte(err) case UINT16: - _ = ctx.buffer.ReadInt16(err) // No ReadUint16, but same binary representation + _ = ctx.buffer.ReadUint16(err) case UINT32: + _ = ctx.buffer.ReadUint32(err) + case VAR_UINT32: _ = ctx.buffer.ReadVaruint32(err) case UINT64: + _ = ctx.buffer.ReadUint64(err) + case VAR_UINT64: _ = ctx.buffer.ReadVaruint64(err) + case TAGGED_UINT64: + firstInt32 := ctx.buffer.ReadInt32(err) + if (firstInt32 & 1) != 0 { + // 9-byte encoding + _ = ctx.buffer.ReadUint64(err) + } + // Otherwise it's 4-byte encoding, already read // Unknown (polymorphic) type - read type info and skip dynamically case UNKNOWN: diff --git a/go/fory/struct.go b/go/fory/struct.go index b81e1857b7..ab7f2618ea 100644 --- a/go/fory/struct.go +++ b/go/fory/struct.go @@ -38,7 +38,7 @@ type FieldInfo struct { Name string Offset uintptr Type reflect.Type - StaticId DispatchId + DispatchId DispatchId TypeId TypeId // Fory type ID for the serializer Serializer Serializer Referencable bool @@ -64,7 +64,7 @@ type FieldInfo struct { } // fieldHasNonPrimitiveSerializer returns true if the field has a serializer with a non-primitive type ID. -// This is used to skip the fast path for fields like enums where StaticId is int32 but the serializer +// This is used to skip the fast path for fields like enums where DispatchId is int32 but the serializer // writes a different format (e.g., unsigned varint for enum ordinals vs signed zigzag for int32). func fieldHasNonPrimitiveSerializer(field *FieldInfo) bool { if field.Serializer == nil { @@ -226,6 +226,10 @@ func (s *structSerializer) initialize(typeResolver *TypeResolver) error { s.type_ = s.type_.Elem() } + // Set compatible mode flag BEFORE field initialization + // This is needed for groupFields to apply correct sorting + s.isCompatibleMode = typeResolver.Compatible() + // Build fields from type or fieldDefs if s.fieldDefs != nil { if err := s.initFieldsFromDefsWithResolver(typeResolver); err != nil { @@ -240,9 +244,6 @@ func (s *structSerializer) initialize(typeResolver *TypeResolver) error { // Compute struct hash s.structHash = s.computeHash() - // Set compatible mode flag - s.isCompatibleMode = typeResolver.Compatible() - s.initialized = true return nil } @@ -295,6 +296,23 @@ func (s *structSerializer) WriteData(ctx *WriteContext, value reflect.Value) { } } + // Debug output for field order + if DebugOutputEnabled() { + fmt.Printf("[Go] WriteData for type %s:\n", s.type_.Name()) + fmt.Printf("[Go] fixedFields (%d):\n", len(s.fixedFields)) + for i, field := range s.fixedFields { + fmt.Printf("[Go] [%d] %s -> dispatchId=%d\n", i, field.Name, field.DispatchId) + } + fmt.Printf("[Go] varintFields (%d):\n", len(s.varintFields)) + for i, field := range s.varintFields { + fmt.Printf("[Go] [%d] %s -> dispatchId=%d\n", i, field.Name, field.DispatchId) + } + fmt.Printf("[Go] remainingFields (%d):\n", len(s.remainingFields)) + for i, field := range s.remainingFields { + fmt.Printf("[Go] [%d] %s -> dispatchId=%d\n", i, field.Name, field.DispatchId) + } + } + buf := ctx.Buffer() // Dereference pointer if needed @@ -323,6 +341,10 @@ func (s *structSerializer) WriteData(ctx *WriteContext, value reflect.Value) { // - Reserve once, inline unsafe writes with endian handling, update index once // - field.WriteOffset computed at init time // ========================================================================== + if DebugOutputEnabled() { + fmt.Printf("[Go] WriteData Phase 1: canUseUnsafe=%v, fixedSize=%d, len(fixedFields)=%d\n", + canUseUnsafe, s.fixedSize, len(s.fixedFields)) + } if canUseUnsafe && s.fixedSize > 0 { buf.Reserve(s.fixedSize) baseOffset := buf.WriterIndex() @@ -331,86 +353,290 @@ func (s *structSerializer) WriteData(ctx *WriteContext, value reflect.Value) { for _, field := range s.fixedFields { fieldPtr := unsafe.Add(ptr, field.Offset) bufOffset := baseOffset + field.WriteOffset - switch field.StaticId { - case BoolDispatchId: + // Debug output for values being written + if DebugOutputEnabled() { + switch field.DispatchId { + case PrimitiveUint64DispatchId: + fmt.Printf("[Go] Writing fixed field %s: value=%d, bufOffset=%d\n", field.Name, *(*uint64)(fieldPtr), bufOffset) + case PrimitiveUint32DispatchId: + fmt.Printf("[Go] Writing fixed field %s: value=%d, bufOffset=%d\n", field.Name, *(*uint32)(fieldPtr), bufOffset) + case PrimitiveUint16DispatchId: + fmt.Printf("[Go] Writing fixed field %s: value=%d, bufOffset=%d\n", field.Name, *(*uint16)(fieldPtr), bufOffset) + case PrimitiveUint8DispatchId: + fmt.Printf("[Go] Writing fixed field %s: value=%d, bufOffset=%d\n", field.Name, *(*uint8)(fieldPtr), bufOffset) + } + } + switch field.DispatchId { + case PrimitiveBoolDispatchId: if *(*bool)(fieldPtr) { data[bufOffset] = 1 } else { data[bufOffset] = 0 } - case Int8DispatchId: + case NotnullBoolPtrDispatchId: + if **(**bool)(fieldPtr) { + data[bufOffset] = 1 + } else { + data[bufOffset] = 0 + } + case PrimitiveInt8DispatchId: data[bufOffset] = *(*byte)(fieldPtr) - case Int16DispatchId: + case NotnullInt8PtrDispatchId: + data[bufOffset] = byte(**(**int8)(fieldPtr)) + case PrimitiveUint8DispatchId: + data[bufOffset] = *(*uint8)(fieldPtr) + case NotnullUint8PtrDispatchId: + data[bufOffset] = **(**uint8)(fieldPtr) + case PrimitiveInt16DispatchId: if isLittleEndian { *(*int16)(unsafe.Pointer(&data[bufOffset])) = *(*int16)(fieldPtr) } else { binary.LittleEndian.PutUint16(data[bufOffset:], uint16(*(*int16)(fieldPtr))) } - case Float32DispatchId: + case NotnullInt16PtrDispatchId: + if isLittleEndian { + *(*int16)(unsafe.Pointer(&data[bufOffset])) = **(**int16)(fieldPtr) + } else { + binary.LittleEndian.PutUint16(data[bufOffset:], uint16(**(**int16)(fieldPtr))) + } + case PrimitiveUint16DispatchId: + if isLittleEndian { + *(*uint16)(unsafe.Pointer(&data[bufOffset])) = *(*uint16)(fieldPtr) + } else { + binary.LittleEndian.PutUint16(data[bufOffset:], *(*uint16)(fieldPtr)) + } + case NotnullUint16PtrDispatchId: + if isLittleEndian { + *(*uint16)(unsafe.Pointer(&data[bufOffset])) = **(**uint16)(fieldPtr) + } else { + binary.LittleEndian.PutUint16(data[bufOffset:], **(**uint16)(fieldPtr)) + } + case PrimitiveInt32DispatchId: + if isLittleEndian { + *(*int32)(unsafe.Pointer(&data[bufOffset])) = *(*int32)(fieldPtr) + } else { + binary.LittleEndian.PutUint32(data[bufOffset:], uint32(*(*int32)(fieldPtr))) + } + case NotnullInt32PtrDispatchId: + if isLittleEndian { + *(*int32)(unsafe.Pointer(&data[bufOffset])) = **(**int32)(fieldPtr) + } else { + binary.LittleEndian.PutUint32(data[bufOffset:], uint32(**(**int32)(fieldPtr))) + } + case PrimitiveUint32DispatchId: + if isLittleEndian { + *(*uint32)(unsafe.Pointer(&data[bufOffset])) = *(*uint32)(fieldPtr) + } else { + binary.LittleEndian.PutUint32(data[bufOffset:], *(*uint32)(fieldPtr)) + } + case NotnullUint32PtrDispatchId: + if isLittleEndian { + *(*uint32)(unsafe.Pointer(&data[bufOffset])) = **(**uint32)(fieldPtr) + } else { + binary.LittleEndian.PutUint32(data[bufOffset:], **(**uint32)(fieldPtr)) + } + case PrimitiveInt64DispatchId: + if isLittleEndian { + *(*int64)(unsafe.Pointer(&data[bufOffset])) = *(*int64)(fieldPtr) + } else { + binary.LittleEndian.PutUint64(data[bufOffset:], uint64(*(*int64)(fieldPtr))) + } + case NotnullInt64PtrDispatchId: + if isLittleEndian { + *(*int64)(unsafe.Pointer(&data[bufOffset])) = **(**int64)(fieldPtr) + } else { + binary.LittleEndian.PutUint64(data[bufOffset:], uint64(**(**int64)(fieldPtr))) + } + case PrimitiveUint64DispatchId: + if isLittleEndian { + *(*uint64)(unsafe.Pointer(&data[bufOffset])) = *(*uint64)(fieldPtr) + } else { + binary.LittleEndian.PutUint64(data[bufOffset:], *(*uint64)(fieldPtr)) + } + case NotnullUint64PtrDispatchId: + if isLittleEndian { + *(*uint64)(unsafe.Pointer(&data[bufOffset])) = **(**uint64)(fieldPtr) + } else { + binary.LittleEndian.PutUint64(data[bufOffset:], **(**uint64)(fieldPtr)) + } + case PrimitiveFloat32DispatchId: if isLittleEndian { *(*float32)(unsafe.Pointer(&data[bufOffset])) = *(*float32)(fieldPtr) } else { binary.LittleEndian.PutUint32(data[bufOffset:], math.Float32bits(*(*float32)(fieldPtr))) } - case Float64DispatchId: + case NotnullFloat32PtrDispatchId: + if isLittleEndian { + *(*float32)(unsafe.Pointer(&data[bufOffset])) = **(**float32)(fieldPtr) + } else { + binary.LittleEndian.PutUint32(data[bufOffset:], math.Float32bits(**(**float32)(fieldPtr))) + } + case PrimitiveFloat64DispatchId: if isLittleEndian { *(*float64)(unsafe.Pointer(&data[bufOffset])) = *(*float64)(fieldPtr) } else { binary.LittleEndian.PutUint64(data[bufOffset:], math.Float64bits(*(*float64)(fieldPtr))) } + case NotnullFloat64PtrDispatchId: + if isLittleEndian { + *(*float64)(unsafe.Pointer(&data[bufOffset])) = **(**float64)(fieldPtr) + } else { + binary.LittleEndian.PutUint64(data[bufOffset:], math.Float64bits(**(**float64)(fieldPtr))) + } } } // Update writer index ONCE after all fixed fields buf.SetWriterIndex(baseOffset + s.fixedSize) } else if len(s.fixedFields) > 0 { // Fallback to reflect-based access for unaddressable values + if DebugOutputEnabled() { + fmt.Printf("[Go] Using reflect-based fallback for fixedFields\n") + } for _, field := range s.fixedFields { fieldValue := value.Field(field.FieldIndex) - switch field.StaticId { - case BoolDispatchId: + startPos := buf.WriterIndex() + if DebugOutputEnabled() { + fmt.Printf("[Go] Fallback writing field %s: FieldIndex=%d, value=%v, dispatchId=%d, bufPos=%d\n", + field.Name, field.FieldIndex, fieldValue.Interface(), field.DispatchId, startPos) + } + switch field.DispatchId { + // Primitive types (non-pointer) + case PrimitiveBoolDispatchId: buf.WriteBool(fieldValue.Bool()) - case Int8DispatchId: + case PrimitiveInt8DispatchId: buf.WriteByte_(byte(fieldValue.Int())) - case Int16DispatchId: + case PrimitiveUint8DispatchId: + buf.WriteByte_(byte(fieldValue.Uint())) + case PrimitiveInt16DispatchId: buf.WriteInt16(int16(fieldValue.Int())) - case Float32DispatchId: + case PrimitiveUint16DispatchId: + buf.WriteInt16(int16(fieldValue.Uint())) + case PrimitiveInt32DispatchId: + buf.WriteInt32(int32(fieldValue.Int())) + case PrimitiveUint32DispatchId: + buf.WriteInt32(int32(fieldValue.Uint())) + case PrimitiveInt64DispatchId: + buf.WriteInt64(fieldValue.Int()) + case PrimitiveUint64DispatchId: + buf.WriteInt64(int64(fieldValue.Uint())) + case PrimitiveFloat32DispatchId: buf.WriteFloat32(float32(fieldValue.Float())) - case Float64DispatchId: + case PrimitiveFloat64DispatchId: buf.WriteFloat64(fieldValue.Float()) + // Notnull pointer types - dereference and write + case NotnullBoolPtrDispatchId: + buf.WriteBool(fieldValue.Elem().Bool()) + case NotnullInt8PtrDispatchId: + buf.WriteByte_(byte(fieldValue.Elem().Int())) + case NotnullUint8PtrDispatchId: + buf.WriteByte_(byte(fieldValue.Elem().Uint())) + case NotnullInt16PtrDispatchId: + buf.WriteInt16(int16(fieldValue.Elem().Int())) + case NotnullUint16PtrDispatchId: + buf.WriteInt16(int16(fieldValue.Elem().Uint())) + case NotnullInt32PtrDispatchId: + buf.WriteInt32(int32(fieldValue.Elem().Int())) + case NotnullUint32PtrDispatchId: + buf.WriteInt32(int32(fieldValue.Elem().Uint())) + case NotnullInt64PtrDispatchId: + buf.WriteInt64(fieldValue.Elem().Int()) + case NotnullUint64PtrDispatchId: + buf.WriteInt64(int64(fieldValue.Elem().Uint())) + case NotnullFloat32PtrDispatchId: + buf.WriteFloat32(float32(fieldValue.Elem().Float())) + case NotnullFloat64PtrDispatchId: + buf.WriteFloat64(fieldValue.Elem().Float()) + } + if DebugOutputEnabled() { + endPos := buf.WriterIndex() + bytesWritten := endPos - startPos + fmt.Printf("[Go] Fallback wrote %d bytes for %s, endPos=%d, bytes=%x\n", + bytesWritten, field.Name, endPos, buf.GetByteSlice(startPos, endPos)) } } } // ========================================================================== - // Phase 2: Varint primitives (int32, int64, int) - // - Reserve max size, track offset locally, update index once at end + // Phase 2: Varint primitives (int32, int64, int, uint32, uint64, uint, tagged int64/uint64) + // - These are variable-length encodings that must be written sequentially // ========================================================================== - if canUseUnsafe && s.maxVarintSize > 0 { - buf.Reserve(s.maxVarintSize) - offset := buf.WriterIndex() - + if canUseUnsafe && len(s.varintFields) > 0 { for _, field := range s.varintFields { fieldPtr := unsafe.Add(ptr, field.Offset) - switch field.StaticId { - case Int32DispatchId: - offset += buf.UnsafePutVarInt32(offset, *(*int32)(fieldPtr)) - case Int64DispatchId: - offset += buf.UnsafePutVarInt64(offset, *(*int64)(fieldPtr)) - case IntDispatchId: - offset += buf.UnsafePutVarInt64(offset, int64(*(*int)(fieldPtr))) + switch field.DispatchId { + case PrimitiveVarint32DispatchId: + buf.WriteVarint32(*(*int32)(fieldPtr)) + case NotnullVarint32PtrDispatchId: + buf.WriteVarint32(**(**int32)(fieldPtr)) + case PrimitiveVarint64DispatchId: + buf.WriteVarint64(*(*int64)(fieldPtr)) + case NotnullVarint64PtrDispatchId: + buf.WriteVarint64(**(**int64)(fieldPtr)) + case PrimitiveIntDispatchId: + buf.WriteVarint64(int64(*(*int)(fieldPtr))) + case NotnullIntPtrDispatchId: + buf.WriteVarint64(int64(**(**int)(fieldPtr))) + case PrimitiveVarUint32DispatchId: + buf.WriteVaruint32(*(*uint32)(fieldPtr)) + case NotnullVarUint32PtrDispatchId: + buf.WriteVaruint32(**(**uint32)(fieldPtr)) + case PrimitiveVarUint64DispatchId: + buf.WriteVaruint64(*(*uint64)(fieldPtr)) + case NotnullVarUint64PtrDispatchId: + buf.WriteVaruint64(**(**uint64)(fieldPtr)) + case PrimitiveUintDispatchId: + buf.WriteVaruint64(uint64(*(*uint)(fieldPtr))) + case NotnullUintPtrDispatchId: + buf.WriteVaruint64(uint64(**(**uint)(fieldPtr))) + case PrimitiveTaggedInt64DispatchId: + buf.WriteTaggedInt64(*(*int64)(fieldPtr)) + case NotnullTaggedInt64PtrDispatchId: + buf.WriteTaggedInt64(**(**int64)(fieldPtr)) + case PrimitiveTaggedUint64DispatchId: + buf.WriteTaggedUint64(*(*uint64)(fieldPtr)) + case NotnullTaggedUint64PtrDispatchId: + buf.WriteTaggedUint64(**(**uint64)(fieldPtr)) } } - // Update writer index ONCE after all varint fields - buf.SetWriterIndex(offset) } else if len(s.varintFields) > 0 { - // Fallback to reflect-based access for unaddressable values + // Slow path for non-addressable values: use reflection for _, field := range s.varintFields { fieldValue := value.Field(field.FieldIndex) - switch field.StaticId { - case Int32DispatchId: + switch field.DispatchId { + // Primitive types (non-pointer) + case PrimitiveVarint32DispatchId: buf.WriteVarint32(int32(fieldValue.Int())) - case Int64DispatchId, IntDispatchId: + case PrimitiveVarint64DispatchId: + buf.WriteVarint64(fieldValue.Int()) + case PrimitiveIntDispatchId: buf.WriteVarint64(fieldValue.Int()) + case PrimitiveVarUint32DispatchId: + buf.WriteVaruint32(uint32(fieldValue.Uint())) + case PrimitiveVarUint64DispatchId: + buf.WriteVaruint64(fieldValue.Uint()) + case PrimitiveUintDispatchId: + buf.WriteVaruint64(fieldValue.Uint()) + case PrimitiveTaggedInt64DispatchId: + buf.WriteTaggedInt64(fieldValue.Int()) + case PrimitiveTaggedUint64DispatchId: + buf.WriteTaggedUint64(fieldValue.Uint()) + // Notnull pointer types - dereference and write + case NotnullVarint32PtrDispatchId: + buf.WriteVarint32(int32(fieldValue.Elem().Int())) + case NotnullVarint64PtrDispatchId: + buf.WriteVarint64(fieldValue.Elem().Int()) + case NotnullIntPtrDispatchId: + buf.WriteVarint64(fieldValue.Elem().Int()) + case NotnullVarUint32PtrDispatchId: + buf.WriteVaruint32(uint32(fieldValue.Elem().Uint())) + case NotnullVarUint64PtrDispatchId: + buf.WriteVaruint64(fieldValue.Elem().Uint()) + case NotnullUintPtrDispatchId: + buf.WriteVaruint64(fieldValue.Elem().Uint()) + case NotnullTaggedInt64PtrDispatchId: + buf.WriteTaggedInt64(fieldValue.Elem().Int()) + case NotnullTaggedUint64PtrDispatchId: + buf.WriteTaggedUint64(fieldValue.Elem().Uint()) } } } @@ -429,20 +655,50 @@ func (s *structSerializer) WriteData(ctx *WriteContext, value reflect.Value) { func (s *structSerializer) writeRemainingField(ctx *WriteContext, ptr unsafe.Pointer, field *FieldInfo, value reflect.Value) { buf := ctx.Buffer() - // Fast path dispatch using pre-computed StaticId + if DebugOutputEnabled() { + fieldValue := value.Field(field.FieldIndex) + fmt.Printf("[Go] WriteRemainingField: %s, dispatchId=%d, value=%v\n", + field.Name, field.DispatchId, fieldValue.Interface()) + } + + // Fast path dispatch using pre-computed DispatchId // ptr must be valid (addressable value) if ptr != nil { fieldPtr := unsafe.Add(ptr, field.Offset) - switch field.StaticId { + switch field.DispatchId { case StringDispatchId: if field.RefMode == RefModeTracking { break // Fall through to slow path } + // Check if local field is a pointer type (schema evolution: remote non-nullable, local nullable) + localIsPtr := field.Type.Kind() == reflect.Ptr // Only write null flag if RefMode requires it (nullable field) if field.RefMode == RefModeNullOnly { - buf.WriteInt8(NotNullValueFlag) + if localIsPtr { + strPtr := *(**string)(fieldPtr) + if strPtr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + ctx.WriteString(*strPtr) + } else { + buf.WriteInt8(NotNullValueFlag) + ctx.WriteString(*(*string)(fieldPtr)) + } + return + } + // RefModeNone: no null flag, write value directly + if localIsPtr { + strPtr := *(**string)(fieldPtr) + if strPtr == nil { + ctx.WriteString("") // Write empty string for nil pointer when non-nullable + } else { + ctx.WriteString(*strPtr) + } + } else { + ctx.WriteString(*(*string)(fieldPtr)) } - ctx.WriteString(*(*string)(fieldPtr)) return case EnumDispatchId: // Enums don't track refs - always use fast path @@ -556,11 +812,310 @@ func (s *structSerializer) writeRemainingField(ctx *WriteContext, ptr unsafe.Poi } ctx.WriteIntIntMap(*(*map[int]int)(fieldPtr), field.RefMode, false) return + case NullableTaggedInt64DispatchId: + // Nullable tagged INT64: write ref flag, then tagged encoding + ptr := *(**int64)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteTaggedInt64(*ptr) + return + case NullableTaggedUint64DispatchId: + // Nullable tagged UINT64: write ref flag, then tagged encoding + ptr := *(**uint64)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteTaggedUint64(*ptr) + return + // Nullable fixed-size types + case NullableBoolDispatchId: + ptr := *(**bool)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteBool(*ptr) + return + case NullableInt8DispatchId: + ptr := *(**int8)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt8(*ptr) + return + case NullableUint8DispatchId: + ptr := *(**uint8)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint8(*ptr) + return + case NullableInt16DispatchId: + ptr := *(**int16)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt16(*ptr) + return + case NullableUint16DispatchId: + ptr := *(**uint16)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint16(*ptr) + return + case NullableInt32DispatchId: + ptr := *(**int32)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt32(*ptr) + return + case NullableUint32DispatchId: + ptr := *(**uint32)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint32(*ptr) + return + case NullableInt64DispatchId: + ptr := *(**int64)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt64(*ptr) + return + case NullableUint64DispatchId: + ptr := *(**uint64)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint64(*ptr) + return + case NullableFloat32DispatchId: + ptr := *(**float32)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteFloat32(*ptr) + return + case NullableFloat64DispatchId: + ptr := *(**float64)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteFloat64(*ptr) + return + // Nullable varint types + case NullableVarint32DispatchId: + ptr := *(**int32)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteVarint32(*ptr) + return + case NullableVarUint32DispatchId: + ptr := *(**uint32)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteVaruint32(*ptr) + return + case NullableVarint64DispatchId: + ptr := *(**int64)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteVarint64(*ptr) + return + case NullableVarUint64DispatchId: + ptr := *(**uint64)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteVaruint64(*ptr) + return } } - // Slow path: use full serializer + // Slow path: use reflection for non-addressable values fieldValue := value.Field(field.FieldIndex) + + // Handle nullable types via reflection when ptr is nil (non-addressable) + switch field.DispatchId { + case NullableTaggedInt64DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteTaggedInt64(fieldValue.Elem().Int()) + return + case NullableTaggedUint64DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteTaggedUint64(fieldValue.Elem().Uint()) + return + case NullableBoolDispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteBool(fieldValue.Elem().Bool()) + return + case NullableInt8DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt8(int8(fieldValue.Elem().Int())) + return + case NullableUint8DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint8(uint8(fieldValue.Elem().Uint())) + return + case NullableInt16DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt16(int16(fieldValue.Elem().Int())) + return + case NullableUint16DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint16(uint16(fieldValue.Elem().Uint())) + return + case NullableInt32DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt32(int32(fieldValue.Elem().Int())) + return + case NullableUint32DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint32(uint32(fieldValue.Elem().Uint())) + return + case NullableInt64DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt64(fieldValue.Elem().Int()) + return + case NullableUint64DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint64(fieldValue.Elem().Uint()) + return + case NullableFloat32DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteFloat32(float32(fieldValue.Elem().Float())) + return + case NullableFloat64DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteFloat64(fieldValue.Elem().Float()) + return + case NullableVarint32DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteVarint32(int32(fieldValue.Elem().Int())) + return + case NullableVarUint32DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteVaruint32(uint32(fieldValue.Elem().Uint())) + return + case NullableVarint64DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteVarint64(fieldValue.Elem().Int()) + return + case NullableVarUint64DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteVaruint64(fieldValue.Elem().Uint()) + return + } + + // Fall back to serializer for other types if field.Serializer != nil { field.Serializer.Write(ctx, field.RefMode, field.WriteType, field.HasGenerics, fieldValue) } else { @@ -643,15 +1198,10 @@ func (s *structSerializer) ReadData(ctx *ReadContext, type_ reflect.Type, value } } - // Use ordered reading only when TypeDef differs from local type (schema evolution) - // When types match (typeDefDiffers=false), use grouped reading for better performance - if s.typeDefDiffers { - s.readFieldsInOrder(ctx, value) - return - } - - // Check if value is addressable for unsafe access - if !value.CanAddr() { + // Use ordered reading when: + // 1. TypeDef differs from local type (schema evolution) + // 2. Value is not addressable + if s.typeDefDiffers || !value.CanAddr() { s.readFieldsInOrder(ctx, value) return } @@ -671,29 +1221,138 @@ func (s *structSerializer) ReadData(ctx *ReadContext, type_ reflect.Type, value for _, field := range s.fixedFields { fieldPtr := unsafe.Add(ptr, field.Offset) bufOffset := baseOffset + field.WriteOffset - switch field.StaticId { - case BoolDispatchId: + switch field.DispatchId { + case PrimitiveBoolDispatchId: *(*bool)(fieldPtr) = data[bufOffset] != 0 - case Int8DispatchId: + case PrimitiveInt8DispatchId: *(*int8)(fieldPtr) = int8(data[bufOffset]) - case Int16DispatchId: + case PrimitiveUint8DispatchId: + *(*uint8)(fieldPtr) = data[bufOffset] + case PrimitiveInt16DispatchId: if isLittleEndian { *(*int16)(fieldPtr) = *(*int16)(unsafe.Pointer(&data[bufOffset])) } else { *(*int16)(fieldPtr) = int16(binary.LittleEndian.Uint16(data[bufOffset:])) } - case Float32DispatchId: + case PrimitiveUint16DispatchId: + if isLittleEndian { + *(*uint16)(fieldPtr) = *(*uint16)(unsafe.Pointer(&data[bufOffset])) + } else { + *(*uint16)(fieldPtr) = binary.LittleEndian.Uint16(data[bufOffset:]) + } + case PrimitiveInt32DispatchId: + if isLittleEndian { + *(*int32)(fieldPtr) = *(*int32)(unsafe.Pointer(&data[bufOffset])) + } else { + *(*int32)(fieldPtr) = int32(binary.LittleEndian.Uint32(data[bufOffset:])) + } + case PrimitiveUint32DispatchId: + if isLittleEndian { + *(*uint32)(fieldPtr) = *(*uint32)(unsafe.Pointer(&data[bufOffset])) + } else { + *(*uint32)(fieldPtr) = binary.LittleEndian.Uint32(data[bufOffset:]) + } + case PrimitiveInt64DispatchId: + if isLittleEndian { + *(*int64)(fieldPtr) = *(*int64)(unsafe.Pointer(&data[bufOffset])) + } else { + *(*int64)(fieldPtr) = int64(binary.LittleEndian.Uint64(data[bufOffset:])) + } + case PrimitiveUint64DispatchId: + if isLittleEndian { + *(*uint64)(fieldPtr) = *(*uint64)(unsafe.Pointer(&data[bufOffset])) + } else { + *(*uint64)(fieldPtr) = binary.LittleEndian.Uint64(data[bufOffset:]) + } + case PrimitiveFloat32DispatchId: if isLittleEndian { *(*float32)(fieldPtr) = *(*float32)(unsafe.Pointer(&data[bufOffset])) } else { *(*float32)(fieldPtr) = math.Float32frombits(binary.LittleEndian.Uint32(data[bufOffset:])) } - case Float64DispatchId: + case PrimitiveFloat64DispatchId: if isLittleEndian { *(*float64)(fieldPtr) = *(*float64)(unsafe.Pointer(&data[bufOffset])) } else { *(*float64)(fieldPtr) = math.Float64frombits(binary.LittleEndian.Uint64(data[bufOffset:])) } + // Notnull pointer types - allocate and set pointer + case NotnullBoolPtrDispatchId: + v := new(bool) + *v = data[bufOffset] != 0 + *(**bool)(fieldPtr) = v + case NotnullInt8PtrDispatchId: + v := new(int8) + *v = int8(data[bufOffset]) + *(**int8)(fieldPtr) = v + case NotnullUint8PtrDispatchId: + v := new(uint8) + *v = data[bufOffset] + *(**uint8)(fieldPtr) = v + case NotnullInt16PtrDispatchId: + v := new(int16) + if isLittleEndian { + *v = *(*int16)(unsafe.Pointer(&data[bufOffset])) + } else { + *v = int16(binary.LittleEndian.Uint16(data[bufOffset:])) + } + *(**int16)(fieldPtr) = v + case NotnullUint16PtrDispatchId: + v := new(uint16) + if isLittleEndian { + *v = *(*uint16)(unsafe.Pointer(&data[bufOffset])) + } else { + *v = binary.LittleEndian.Uint16(data[bufOffset:]) + } + *(**uint16)(fieldPtr) = v + case NotnullInt32PtrDispatchId: + v := new(int32) + if isLittleEndian { + *v = *(*int32)(unsafe.Pointer(&data[bufOffset])) + } else { + *v = int32(binary.LittleEndian.Uint32(data[bufOffset:])) + } + *(**int32)(fieldPtr) = v + case NotnullUint32PtrDispatchId: + v := new(uint32) + if isLittleEndian { + *v = *(*uint32)(unsafe.Pointer(&data[bufOffset])) + } else { + *v = binary.LittleEndian.Uint32(data[bufOffset:]) + } + *(**uint32)(fieldPtr) = v + case NotnullInt64PtrDispatchId: + v := new(int64) + if isLittleEndian { + *v = *(*int64)(unsafe.Pointer(&data[bufOffset])) + } else { + *v = int64(binary.LittleEndian.Uint64(data[bufOffset:])) + } + *(**int64)(fieldPtr) = v + case NotnullUint64PtrDispatchId: + v := new(uint64) + if isLittleEndian { + *v = *(*uint64)(unsafe.Pointer(&data[bufOffset])) + } else { + *v = binary.LittleEndian.Uint64(data[bufOffset:]) + } + *(**uint64)(fieldPtr) = v + case NotnullFloat32PtrDispatchId: + v := new(float32) + if isLittleEndian { + *v = *(*float32)(unsafe.Pointer(&data[bufOffset])) + } else { + *v = math.Float32frombits(binary.LittleEndian.Uint32(data[bufOffset:])) + } + *(**float32)(fieldPtr) = v + case NotnullFloat64PtrDispatchId: + v := new(float64) + if isLittleEndian { + *v = *(*float64)(unsafe.Pointer(&data[bufOffset])) + } else { + *v = math.Float64frombits(binary.LittleEndian.Uint64(data[bufOffset:])) + } + *(**float64)(fieldPtr) = v } } // Update reader index ONCE after all fixed fields @@ -701,31 +1360,63 @@ func (s *structSerializer) ReadData(ctx *ReadContext, type_ reflect.Type, value } // Phase 2: Varint primitives (must read sequentially - variable length) - // Use unsafe reads when we have enough buffer remaining - if s.maxVarintSize > 0 && buf.remaining() >= s.maxVarintSize { - for _, field := range s.varintFields { - fieldPtr := unsafe.Add(ptr, field.Offset) - switch field.StaticId { - case Int32DispatchId: - *(*int32)(fieldPtr) = buf.UnsafeReadVarint32() - case Int64DispatchId: - *(*int64)(fieldPtr) = buf.UnsafeReadVarint64() - case IntDispatchId: - *(*int)(fieldPtr) = int(buf.UnsafeReadVarint64()) - } - } - } else if len(s.varintFields) > 0 { - // Slow path with bounds checking + // Note: For tagged int64/uint64, we can't use unsafe reads because they need bounds checking + if len(s.varintFields) > 0 { err := ctx.Err() for _, field := range s.varintFields { fieldPtr := unsafe.Add(ptr, field.Offset) - switch field.StaticId { - case Int32DispatchId: + switch field.DispatchId { + case PrimitiveVarint32DispatchId: *(*int32)(fieldPtr) = buf.ReadVarint32(err) - case Int64DispatchId: + case PrimitiveVarint64DispatchId: *(*int64)(fieldPtr) = buf.ReadVarint64(err) - case IntDispatchId: + case PrimitiveIntDispatchId: *(*int)(fieldPtr) = int(buf.ReadVarint64(err)) + case PrimitiveVarUint32DispatchId: + *(*uint32)(fieldPtr) = buf.ReadVaruint32(err) + case PrimitiveVarUint64DispatchId: + *(*uint64)(fieldPtr) = buf.ReadVaruint64(err) + case PrimitiveUintDispatchId: + *(*uint)(fieldPtr) = uint(buf.ReadVaruint64(err)) + case PrimitiveTaggedInt64DispatchId: + // Tagged INT64: use buffer's tagged decoding (4 bytes for small, 9 for large) + *(*int64)(fieldPtr) = buf.ReadTaggedInt64(err) + case PrimitiveTaggedUint64DispatchId: + // Tagged UINT64: use buffer's tagged decoding (4 bytes for small, 9 for large) + *(*uint64)(fieldPtr) = buf.ReadTaggedUint64(err) + // Notnull pointer types - allocate and set pointer + case NotnullVarint32PtrDispatchId: + v := new(int32) + *v = buf.ReadVarint32(err) + *(**int32)(fieldPtr) = v + case NotnullVarint64PtrDispatchId: + v := new(int64) + *v = buf.ReadVarint64(err) + *(**int64)(fieldPtr) = v + case NotnullIntPtrDispatchId: + v := new(int) + *v = int(buf.ReadVarint64(err)) + *(**int)(fieldPtr) = v + case NotnullVarUint32PtrDispatchId: + v := new(uint32) + *v = buf.ReadVaruint32(err) + *(**uint32)(fieldPtr) = v + case NotnullVarUint64PtrDispatchId: + v := new(uint64) + *v = buf.ReadVaruint64(err) + *(**uint64)(fieldPtr) = v + case NotnullUintPtrDispatchId: + v := new(uint) + *v = uint(buf.ReadVaruint64(err)) + *(**uint)(fieldPtr) = v + case NotnullTaggedInt64PtrDispatchId: + v := new(int64) + *v = buf.ReadTaggedInt64(err) + *(**int64)(fieldPtr) = v + case NotnullTaggedUint64PtrDispatchId: + v := new(uint64) + *v = buf.ReadTaggedUint64(err) + *(**uint64)(fieldPtr) = v } } } @@ -742,24 +1433,43 @@ func (s *structSerializer) readRemainingField(ctx *ReadContext, ptr unsafe.Point buf := ctx.Buffer() ctxErr := ctx.Err() - // Fast path dispatch using pre-computed StaticId + if DebugOutputEnabled() { + fmt.Printf("[fory-debug] readRemainingField: field=%s dispatchId=%d pos=%d ptr=%v\n", + field.Name, field.DispatchId, buf.ReaderIndex(), ptr != nil) + } + + // Fast path dispatch using pre-computed DispatchId // ptr must be valid (addressable value) if ptr != nil { fieldPtr := unsafe.Add(ptr, field.Offset) - switch field.StaticId { + switch field.DispatchId { case StringDispatchId: if field.RefMode == RefModeTracking { break // Fall through to slow path for ref tracking } + // Check if local field is a pointer type (schema evolution: remote non-nullable, local nullable) + localIsPtr := field.Type.Kind() == reflect.Ptr // Only read null flag if RefMode requires it (nullable field) if field.RefMode == RefModeNullOnly { refFlag := buf.ReadInt8(ctxErr) if refFlag == NullFlag { - *(*string)(fieldPtr) = "" + if localIsPtr { + // Leave as nil + } else { + *(*string)(fieldPtr) = "" + } return } } - *(*string)(fieldPtr) = ctx.ReadString() + str := ctx.ReadString() + if localIsPtr { + // Allocate new string and store pointer + sp := new(string) + *sp = str + *(**string)(fieldPtr) = sp + } else { + *(*string)(fieldPtr) = str + } return case EnumDispatchId: // Enums don't track refs - always use fast path @@ -850,36 +1560,361 @@ func (s *structSerializer) readRemainingField(ctx *ReadContext, ptr unsafe.Point } *(*map[string]int32)(fieldPtr) = ctx.ReadStringInt32Map(field.RefMode, false) return - case StringIntMapDispatchId: - if field.RefMode == RefModeTracking { - break - } - *(*map[string]int)(fieldPtr) = ctx.ReadStringIntMap(field.RefMode, false) + case StringIntMapDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*map[string]int)(fieldPtr) = ctx.ReadStringIntMap(field.RefMode, false) + return + case StringFloat64MapDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*map[string]float64)(fieldPtr) = ctx.ReadStringFloat64Map(field.RefMode, false) + return + case StringBoolMapDispatchId: + // NOTE: map[string]bool is used to represent SETs in Go xlang mode. + // We CANNOT use the fast path here because it reads MAP format, + // but the data is actually in SET format. Fall through to slow path + // which uses setSerializer to correctly read the SET format. + break + case IntIntMapDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*map[int]int)(fieldPtr) = ctx.ReadIntIntMap(field.RefMode, false) + return + case NullableTaggedInt64DispatchId: + // Nullable tagged INT64: read ref flag, then tagged encoding + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + // Leave pointer as nil + return + } + // Allocate new int64 and store pointer + v := new(int64) + *v = buf.ReadTaggedInt64(ctxErr) + *(**int64)(fieldPtr) = v + return + case NullableTaggedUint64DispatchId: + // Nullable tagged UINT64: read ref flag, then tagged encoding + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + // Leave pointer as nil + return + } + // Allocate new uint64 and store pointer + v := new(uint64) + *v = buf.ReadTaggedUint64(ctxErr) + *(**uint64)(fieldPtr) = v + return + // Nullable fixed-size types + case NullableBoolDispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(bool) + *v = buf.ReadBool(ctxErr) + *(**bool)(fieldPtr) = v + return + case NullableInt8DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(int8) + *v = buf.ReadInt8(ctxErr) + *(**int8)(fieldPtr) = v + return + case NullableUint8DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if DebugOutputEnabled() { + fmt.Printf("[fory-debug] readRemainingField: NullableUint8 refFlag=%d\n", refFlag) + } + if refFlag == NullFlag { + return + } + v := new(uint8) + *v = buf.ReadUint8(ctxErr) + if DebugOutputEnabled() { + fmt.Printf("[fory-debug] readRemainingField: NullableUint8 value=%d\n", *v) + } + *(**uint8)(fieldPtr) = v + return + case NullableInt16DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(int16) + *v = buf.ReadInt16(ctxErr) + *(**int16)(fieldPtr) = v + return + case NullableUint16DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(uint16) + *v = buf.ReadUint16(ctxErr) + *(**uint16)(fieldPtr) = v + return + case NullableInt32DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(int32) + *v = buf.ReadInt32(ctxErr) + *(**int32)(fieldPtr) = v + return + case NullableUint32DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(uint32) + *v = buf.ReadUint32(ctxErr) + *(**uint32)(fieldPtr) = v + return + case NullableInt64DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(int64) + *v = buf.ReadInt64(ctxErr) + *(**int64)(fieldPtr) = v + return + case NullableUint64DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(uint64) + *v = buf.ReadUint64(ctxErr) + *(**uint64)(fieldPtr) = v + return + case NullableFloat32DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(float32) + *v = buf.ReadFloat32(ctxErr) + *(**float32)(fieldPtr) = v + return + case NullableFloat64DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(float64) + *v = buf.ReadFloat64(ctxErr) + *(**float64)(fieldPtr) = v + return + // Nullable varint types + case NullableVarint32DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(int32) + *v = buf.ReadVarint32(ctxErr) + *(**int32)(fieldPtr) = v + return + case NullableVarUint32DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(uint32) + *v = buf.ReadVaruint32(ctxErr) + *(**uint32)(fieldPtr) = v + return + case NullableVarint64DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(int64) + *v = buf.ReadVarint64(ctxErr) + *(**int64)(fieldPtr) = v + return + case NullableVarUint64DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(uint64) + *v = buf.ReadVaruint64(ctxErr) + *(**uint64)(fieldPtr) = v + return + } + } + + // Slow path: use reflection for non-addressable values + fieldValue := value.Field(field.FieldIndex) + + // Handle nullable types via reflection when ptr is nil (non-addressable) + switch field.DispatchId { + case NullableTaggedInt64DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := reflect.New(fieldValue.Type().Elem()) + v.Elem().SetInt(buf.ReadTaggedInt64(ctxErr)) + fieldValue.Set(v) + return + case NullableTaggedUint64DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := reflect.New(fieldValue.Type().Elem()) + v.Elem().SetUint(buf.ReadTaggedUint64(ctxErr)) + fieldValue.Set(v) + return + case NullableBoolDispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := reflect.New(fieldValue.Type().Elem()) + v.Elem().SetBool(buf.ReadBool(ctxErr)) + fieldValue.Set(v) + return + case NullableInt8DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := reflect.New(fieldValue.Type().Elem()) + v.Elem().SetInt(int64(buf.ReadInt8(ctxErr))) + fieldValue.Set(v) + return + case NullableUint8DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := reflect.New(fieldValue.Type().Elem()) + v.Elem().SetUint(uint64(buf.ReadUint8(ctxErr))) + fieldValue.Set(v) + return + case NullableInt16DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := reflect.New(fieldValue.Type().Elem()) + v.Elem().SetInt(int64(buf.ReadInt16(ctxErr))) + fieldValue.Set(v) + return + case NullableUint16DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := reflect.New(fieldValue.Type().Elem()) + v.Elem().SetUint(uint64(buf.ReadUint16(ctxErr))) + fieldValue.Set(v) + return + case NullableInt32DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := reflect.New(fieldValue.Type().Elem()) + v.Elem().SetInt(int64(buf.ReadInt32(ctxErr))) + fieldValue.Set(v) + return + case NullableUint32DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := reflect.New(fieldValue.Type().Elem()) + v.Elem().SetUint(uint64(buf.ReadUint32(ctxErr))) + fieldValue.Set(v) + return + case NullableInt64DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := reflect.New(fieldValue.Type().Elem()) + v.Elem().SetInt(buf.ReadInt64(ctxErr)) + fieldValue.Set(v) + return + case NullableUint64DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := reflect.New(fieldValue.Type().Elem()) + v.Elem().SetUint(buf.ReadUint64(ctxErr)) + fieldValue.Set(v) + return + case NullableFloat32DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := reflect.New(fieldValue.Type().Elem()) + v.Elem().SetFloat(float64(buf.ReadFloat32(ctxErr))) + fieldValue.Set(v) + return + case NullableFloat64DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := reflect.New(fieldValue.Type().Elem()) + v.Elem().SetFloat(buf.ReadFloat64(ctxErr)) + fieldValue.Set(v) + return + case NullableVarint32DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { return - case StringFloat64MapDispatchId: - if field.RefMode == RefModeTracking { - break - } - *(*map[string]float64)(fieldPtr) = ctx.ReadStringFloat64Map(field.RefMode, false) + } + v := reflect.New(fieldValue.Type().Elem()) + v.Elem().SetInt(int64(buf.ReadVarint32(ctxErr))) + fieldValue.Set(v) + return + case NullableVarUint32DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { return - case StringBoolMapDispatchId: - // NOTE: map[string]bool is used to represent SETs in Go xlang mode. - // We CANNOT use the fast path here because it reads MAP format, - // but the data is actually in SET format. Fall through to slow path - // which uses setSerializer to correctly read the SET format. - break - case IntIntMapDispatchId: - if field.RefMode == RefModeTracking { - break - } - *(*map[int]int)(fieldPtr) = ctx.ReadIntIntMap(field.RefMode, false) + } + v := reflect.New(fieldValue.Type().Elem()) + v.Elem().SetUint(uint64(buf.ReadVaruint32(ctxErr))) + fieldValue.Set(v) + return + case NullableVarint64DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { return } + v := reflect.New(fieldValue.Type().Elem()) + v.Elem().SetInt(buf.ReadVarint64(ctxErr)) + fieldValue.Set(v) + return + case NullableVarUint64DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := reflect.New(fieldValue.Type().Elem()) + v.Elem().SetUint(buf.ReadVaruint64(ctxErr)) + fieldValue.Set(v) + return } - // Slow path: use full serializer - fieldValue := value.Field(field.FieldIndex) - + // Fall back to serializer for other types if field.Serializer != nil { field.Serializer.Read(ctx, field.RefMode, field.WriteType, field.HasGenerics, fieldValue) } else { @@ -898,45 +1933,162 @@ func (s *structSerializer) readFieldsInOrder(ctx *ReadContext, value reflect.Val } err := ctx.Err() + if DebugOutputEnabled() { + fmt.Printf("[fory-debug] readFieldsInOrder: starting at pos=%d, field count=%d\n", buf.ReaderIndex(), len(s.fields)) + for i, f := range s.fields { + fmt.Printf("[fory-debug] readFieldsInOrder: field[%d]=%s dispatchId=%d referencable=%v\n", i, f.Name, f.DispatchId, f.Referencable) + } + } + for _, field := range s.fields { + startPos := buf.ReaderIndex() if field.FieldIndex < 0 { s.skipField(ctx, field) if ctx.HasError() { return } + if DebugOutputEnabled() { + fmt.Printf("[fory-debug] readFieldsInOrder: skipped field=%s pos=%d->%d\n", field.Name, startPos, buf.ReaderIndex()) + } continue } - // Fast path for fixed-size primitive types (no ref flag) - // Use error-aware methods with deferred checking - if canUseUnsafe && isFixedSizePrimitive(field.StaticId, field.Referencable) { + // Fast path for fixed-size primitive types (no ref flag from remote schema) + if canUseUnsafe && isFixedSizePrimitive(field.DispatchId, field.Referencable) { fieldPtr := unsafe.Add(ptr, field.Offset) - switch field.StaticId { - case BoolDispatchId: + switch field.DispatchId { + // PrimitiveXxxDispatchId: local field is non-pointer type + case PrimitiveBoolDispatchId: *(*bool)(fieldPtr) = buf.ReadBool(err) - case Int8DispatchId: + case PrimitiveInt8DispatchId: *(*int8)(fieldPtr) = buf.ReadInt8(err) - case Int16DispatchId: + case PrimitiveUint8DispatchId: + *(*uint8)(fieldPtr) = uint8(buf.ReadInt8(err)) + case PrimitiveInt16DispatchId: *(*int16)(fieldPtr) = buf.ReadInt16(err) - case Float32DispatchId: + case PrimitiveUint16DispatchId: + *(*uint16)(fieldPtr) = buf.ReadUint16(err) + case PrimitiveInt32DispatchId: + *(*int32)(fieldPtr) = buf.ReadInt32(err) + case PrimitiveUint32DispatchId: + *(*uint32)(fieldPtr) = buf.ReadUint32(err) + case PrimitiveInt64DispatchId: + *(*int64)(fieldPtr) = buf.ReadInt64(err) + case PrimitiveUint64DispatchId: + *(*uint64)(fieldPtr) = buf.ReadUint64(err) + case PrimitiveFloat32DispatchId: *(*float32)(fieldPtr) = buf.ReadFloat32(err) - case Float64DispatchId: + case PrimitiveFloat64DispatchId: *(*float64)(fieldPtr) = buf.ReadFloat64(err) + // NotnullXxxPtrDispatchId: local field is *T with nullable=false + case NotnullBoolPtrDispatchId: + v := new(bool) + *v = buf.ReadBool(err) + *(**bool)(fieldPtr) = v + case NotnullInt8PtrDispatchId: + v := new(int8) + *v = buf.ReadInt8(err) + *(**int8)(fieldPtr) = v + case NotnullUint8PtrDispatchId: + v := new(uint8) + *v = uint8(buf.ReadInt8(err)) + *(**uint8)(fieldPtr) = v + case NotnullInt16PtrDispatchId: + v := new(int16) + *v = buf.ReadInt16(err) + *(**int16)(fieldPtr) = v + case NotnullUint16PtrDispatchId: + v := new(uint16) + *v = buf.ReadUint16(err) + *(**uint16)(fieldPtr) = v + case NotnullInt32PtrDispatchId: + v := new(int32) + *v = buf.ReadInt32(err) + *(**int32)(fieldPtr) = v + case NotnullUint32PtrDispatchId: + v := new(uint32) + *v = buf.ReadUint32(err) + *(**uint32)(fieldPtr) = v + case NotnullInt64PtrDispatchId: + v := new(int64) + *v = buf.ReadInt64(err) + *(**int64)(fieldPtr) = v + case NotnullUint64PtrDispatchId: + v := new(uint64) + *v = buf.ReadUint64(err) + *(**uint64)(fieldPtr) = v + case NotnullFloat32PtrDispatchId: + v := new(float32) + *v = buf.ReadFloat32(err) + *(**float32)(fieldPtr) = v + case NotnullFloat64PtrDispatchId: + v := new(float64) + *v = buf.ReadFloat64(err) + *(**float64)(fieldPtr) = v + } + if DebugOutputEnabled() { + fmt.Printf("[fory-debug] readFieldsInOrder: fixed field=%s pos=%d->%d\n", field.Name, startPos, buf.ReaderIndex()) } continue } - // Fast path for varint primitive types (no ref flag) - // Skip fast path if field has a serializer with a non-primitive type (e.g., NAMED_ENUM) - if canUseUnsafe && isVarintPrimitive(field.StaticId, field.Referencable) && !fieldHasNonPrimitiveSerializer(field) { + // Fast path for varint primitive types (no ref flag from remote schema) + if canUseUnsafe && isVarintPrimitive(field.DispatchId, field.Referencable) && !fieldHasNonPrimitiveSerializer(field) { fieldPtr := unsafe.Add(ptr, field.Offset) - switch field.StaticId { - case Int32DispatchId: + switch field.DispatchId { + // PrimitiveXxxDispatchId: local field is non-pointer type + case PrimitiveVarint32DispatchId: *(*int32)(fieldPtr) = buf.ReadVarint32(err) - case Int64DispatchId: + case PrimitiveVarint64DispatchId: *(*int64)(fieldPtr) = buf.ReadVarint64(err) - case IntDispatchId: + case PrimitiveVarUint32DispatchId: + *(*uint32)(fieldPtr) = buf.ReadVaruint32(err) + case PrimitiveVarUint64DispatchId: + *(*uint64)(fieldPtr) = buf.ReadVaruint64(err) + case PrimitiveTaggedInt64DispatchId: + *(*int64)(fieldPtr) = buf.ReadTaggedInt64(err) + case PrimitiveTaggedUint64DispatchId: + *(*uint64)(fieldPtr) = buf.ReadTaggedUint64(err) + case PrimitiveIntDispatchId: *(*int)(fieldPtr) = int(buf.ReadVarint64(err)) + case PrimitiveUintDispatchId: + *(*uint)(fieldPtr) = uint(buf.ReadVaruint64(err)) + // NotnullXxxPtrDispatchId: local field is *T with nullable=false + case NotnullVarint32PtrDispatchId: + v := new(int32) + *v = buf.ReadVarint32(err) + *(**int32)(fieldPtr) = v + case NotnullVarint64PtrDispatchId: + v := new(int64) + *v = buf.ReadVarint64(err) + *(**int64)(fieldPtr) = v + case NotnullVarUint32PtrDispatchId: + v := new(uint32) + *v = buf.ReadVaruint32(err) + *(**uint32)(fieldPtr) = v + case NotnullVarUint64PtrDispatchId: + v := new(uint64) + *v = buf.ReadVaruint64(err) + *(**uint64)(fieldPtr) = v + case NotnullTaggedInt64PtrDispatchId: + v := new(int64) + *v = buf.ReadTaggedInt64(err) + *(**int64)(fieldPtr) = v + case NotnullTaggedUint64PtrDispatchId: + v := new(uint64) + *v = buf.ReadTaggedUint64(err) + *(**uint64)(fieldPtr) = v + case NotnullIntPtrDispatchId: + v := new(int) + *v = int(buf.ReadVarint64(err)) + *(**int)(fieldPtr) = v + case NotnullUintPtrDispatchId: + v := new(uint) + *v = uint(buf.ReadVaruint64(err)) + *(**uint)(fieldPtr) = v + } + if DebugOutputEnabled() { + fmt.Printf("[fory-debug] readFieldsInOrder: varint field=%s pos=%d->%d\n", field.Name, startPos, buf.ReaderIndex()) } continue } @@ -945,34 +2097,233 @@ func (s *structSerializer) readFieldsInOrder(ctx *ReadContext, value reflect.Val fieldValue := value.Field(field.FieldIndex) // Slow path for primitives when not addressable - if !canUseUnsafe && isFixedSizePrimitive(field.StaticId, field.Referencable) { - switch field.StaticId { - case BoolDispatchId: + if !canUseUnsafe && isFixedSizePrimitive(field.DispatchId, field.Referencable) { + switch field.DispatchId { + case PrimitiveBoolDispatchId: fieldValue.SetBool(buf.ReadBool(err)) - case Int8DispatchId: + case PrimitiveInt8DispatchId: fieldValue.SetInt(int64(buf.ReadInt8(err))) - case Int16DispatchId: + case PrimitiveUint8DispatchId: + fieldValue.SetUint(uint64(buf.ReadInt8(err))) + case PrimitiveInt16DispatchId: fieldValue.SetInt(int64(buf.ReadInt16(err))) - case Float32DispatchId: + case PrimitiveUint16DispatchId: + fieldValue.SetUint(uint64(buf.ReadUint16(err))) + case PrimitiveInt32DispatchId: + fieldValue.SetInt(int64(buf.ReadInt32(err))) + case PrimitiveUint32DispatchId: + fieldValue.SetUint(uint64(buf.ReadUint32(err))) + case PrimitiveInt64DispatchId: + fieldValue.SetInt(buf.ReadInt64(err)) + case PrimitiveUint64DispatchId: + fieldValue.SetUint(buf.ReadUint64(err)) + case PrimitiveFloat32DispatchId: fieldValue.SetFloat(float64(buf.ReadFloat32(err))) - case Float64DispatchId: + case PrimitiveFloat64DispatchId: fieldValue.SetFloat(buf.ReadFloat64(err)) } continue } - if !canUseUnsafe && isVarintPrimitive(field.StaticId, field.Referencable) && !fieldHasNonPrimitiveSerializer(field) { - switch field.StaticId { - case Int32DispatchId: + if !canUseUnsafe && isVarintPrimitive(field.DispatchId, field.Referencable) && !fieldHasNonPrimitiveSerializer(field) { + switch field.DispatchId { + case PrimitiveVarint32DispatchId: fieldValue.SetInt(int64(buf.ReadVarint32(err))) - case Int64DispatchId, IntDispatchId: + case PrimitiveVarint64DispatchId, PrimitiveIntDispatchId: fieldValue.SetInt(buf.ReadVarint64(err)) + case PrimitiveVarUint32DispatchId: + fieldValue.SetUint(uint64(buf.ReadVaruint32(err))) + case PrimitiveVarUint64DispatchId, PrimitiveUintDispatchId: + fieldValue.SetUint(buf.ReadVaruint64(err)) + case PrimitiveTaggedInt64DispatchId: + fieldValue.SetInt(buf.ReadTaggedInt64(err)) + case PrimitiveTaggedUint64DispatchId: + fieldValue.SetUint(buf.ReadTaggedUint64(err)) + } + continue + } + + // Fast path for nullable fixed-size primitives (read ref flag + fixed bytes) + // These have Referencable=true but use fixed encoding, not varint + if isNullableFixedSizePrimitive(field.DispatchId) { + refFlag := buf.ReadInt8(err) + if refFlag == NullFlag { + // Leave pointer as nil (or zero for non-pointer local types) + if DebugOutputEnabled() { + fmt.Printf("[fory-debug] readFieldsInOrder: nullable fixed field=%s is null pos=%d->%d\n", field.Name, startPos, buf.ReaderIndex()) + } + continue + } + // Read fixed-size value based on dispatch ID + // Handle both pointer and non-pointer local field types (schema evolution) + localIsPtr := fieldValue.Kind() == reflect.Ptr + switch field.DispatchId { + case NullableBoolDispatchId: + v := buf.ReadBool(err) + if localIsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetBool(v) + } + case NullableInt8DispatchId: + v := buf.ReadInt8(err) + if localIsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetInt(int64(v)) + } + case NullableUint8DispatchId: + v := uint8(buf.ReadInt8(err)) + if localIsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetUint(uint64(v)) + } + case NullableInt16DispatchId: + v := buf.ReadInt16(err) + if localIsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetInt(int64(v)) + } + case NullableUint16DispatchId: + v := buf.ReadUint16(err) + if localIsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetUint(uint64(v)) + } + case NullableInt32DispatchId: + v := buf.ReadInt32(err) + if localIsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetInt(int64(v)) + } + case NullableUint32DispatchId: + v := buf.ReadUint32(err) + if localIsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetUint(uint64(v)) + } + case NullableInt64DispatchId: + v := buf.ReadInt64(err) + if localIsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetInt(v) + } + case NullableUint64DispatchId: + v := buf.ReadUint64(err) + if localIsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetUint(v) + } + case NullableFloat32DispatchId: + v := buf.ReadFloat32(err) + if localIsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetFloat(float64(v)) + } + case NullableFloat64DispatchId: + v := buf.ReadFloat64(err) + if localIsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetFloat(v) + } + } + if DebugOutputEnabled() { + fmt.Printf("[fory-debug] readFieldsInOrder: nullable fixed field=%s pos=%d->%d\n", field.Name, startPos, buf.ReaderIndex()) + } + continue + } + + // Fast path for nullable varint primitives (read ref flag + varint) + if isNullableVarintPrimitive(field.DispatchId) { + refFlag := buf.ReadInt8(err) + if refFlag == NullFlag { + // Leave pointer as nil (or zero for non-pointer local types) + if DebugOutputEnabled() { + fmt.Printf("[fory-debug] readFieldsInOrder: nullable varint field=%s is null pos=%d->%d\n", field.Name, startPos, buf.ReaderIndex()) + } + continue + } + // Read varint value based on dispatch ID + // Handle both pointer and non-pointer local field types (schema evolution) + localIsPtr := fieldValue.Kind() == reflect.Ptr + switch field.DispatchId { + case NullableVarint32DispatchId: + v := buf.ReadVarint32(err) + if localIsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetInt(int64(v)) + } + case NullableVarint64DispatchId: + v := buf.ReadVarint64(err) + if localIsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetInt(v) + } + case NullableVarUint32DispatchId: + v := buf.ReadVaruint32(err) + if localIsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetUint(uint64(v)) + } + case NullableVarUint64DispatchId: + v := buf.ReadVaruint64(err) + if localIsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetUint(v) + } + case NullableTaggedInt64DispatchId: + v := buf.ReadTaggedInt64(err) + if localIsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetInt(v) + } + case NullableTaggedUint64DispatchId: + v := buf.ReadTaggedUint64(err) + if localIsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetUint(v) + } + case NullableIntDispatchId: + v := int(buf.ReadVarint64(err)) + if localIsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetInt(int64(v)) + } + case NullableUintDispatchId: + v := uint(buf.ReadVaruint64(err)) + if localIsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetUint(uint64(v)) + } + } + if DebugOutputEnabled() { + fmt.Printf("[fory-debug] readFieldsInOrder: nullable varint field=%s pos=%d->%d\n", field.Name, startPos, buf.ReaderIndex()) } continue } if isEnumField(field) { readEnumField(ctx, field, fieldValue) + if DebugOutputEnabled() { + fmt.Printf("[fory-debug] readFieldsInOrder: enum field=%s pos=%d->%d\n", field.Name, startPos, buf.ReaderIndex()) + } continue } @@ -983,6 +2334,288 @@ func (s *structSerializer) readFieldsInOrder(ctx *ReadContext, value reflect.Val } else { ctx.ReadValue(fieldValue, RefModeTracking, true) } + if DebugOutputEnabled() { + fmt.Printf("[fory-debug] readFieldsInOrder: slow path field=%s pos=%d->%d\n", field.Name, startPos, buf.ReaderIndex()) + } + } +} + +// writeFieldsInOrder writes fields in the order they appear in s.fields (fingerprint order) +// This is used in non-compatible mode where Java writes fields in fingerprint order +func (s *structSerializer) writeFieldsInOrder(ctx *WriteContext, value reflect.Value) { + buf := ctx.Buffer() + canUseUnsafe := value.CanAddr() + var ptr unsafe.Pointer + if canUseUnsafe { + ptr = unsafe.Pointer(value.UnsafeAddr()) + } + + for _, field := range s.fields { + // Fast path for fixed-size primitive types + if canUseUnsafe && isFixedSizePrimitive(field.DispatchId, field.Referencable) { + fieldPtr := unsafe.Add(ptr, field.Offset) + switch field.DispatchId { + case PrimitiveBoolDispatchId: + buf.WriteBool(*(*bool)(fieldPtr)) + case PrimitiveInt8DispatchId: + buf.WriteInt8(*(*int8)(fieldPtr)) + case PrimitiveUint8DispatchId: + buf.WriteUint8(*(*uint8)(fieldPtr)) + case PrimitiveInt16DispatchId: + buf.WriteInt16(*(*int16)(fieldPtr)) + case PrimitiveUint16DispatchId: + buf.WriteUint16(*(*uint16)(fieldPtr)) + case PrimitiveInt32DispatchId: + buf.WriteInt32(*(*int32)(fieldPtr)) + case PrimitiveUint32DispatchId: + buf.WriteUint32(*(*uint32)(fieldPtr)) + case PrimitiveInt64DispatchId: + buf.WriteInt64(*(*int64)(fieldPtr)) + case PrimitiveUint64DispatchId: + buf.WriteUint64(*(*uint64)(fieldPtr)) + case PrimitiveFloat32DispatchId: + buf.WriteFloat32(*(*float32)(fieldPtr)) + case PrimitiveFloat64DispatchId: + buf.WriteFloat64(*(*float64)(fieldPtr)) + // NotnullXxxPtrDispatchId: local field is *T with nullable=false + case NotnullBoolPtrDispatchId: + buf.WriteBool(**(**bool)(fieldPtr)) + case NotnullInt8PtrDispatchId: + buf.WriteInt8(**(**int8)(fieldPtr)) + case NotnullUint8PtrDispatchId: + buf.WriteUint8(**(**uint8)(fieldPtr)) + case NotnullInt16PtrDispatchId: + buf.WriteInt16(**(**int16)(fieldPtr)) + case NotnullUint16PtrDispatchId: + buf.WriteUint16(**(**uint16)(fieldPtr)) + case NotnullInt32PtrDispatchId: + buf.WriteInt32(**(**int32)(fieldPtr)) + case NotnullUint32PtrDispatchId: + buf.WriteUint32(**(**uint32)(fieldPtr)) + case NotnullInt64PtrDispatchId: + buf.WriteInt64(**(**int64)(fieldPtr)) + case NotnullUint64PtrDispatchId: + buf.WriteUint64(**(**uint64)(fieldPtr)) + case NotnullFloat32PtrDispatchId: + buf.WriteFloat32(**(**float32)(fieldPtr)) + case NotnullFloat64PtrDispatchId: + buf.WriteFloat64(**(**float64)(fieldPtr)) + } + continue + } + + // Fast path for varint primitive types + if canUseUnsafe && isVarintPrimitive(field.DispatchId, field.Referencable) && !fieldHasNonPrimitiveSerializer(field) { + fieldPtr := unsafe.Add(ptr, field.Offset) + switch field.DispatchId { + case PrimitiveVarint32DispatchId: + buf.WriteVarint32(*(*int32)(fieldPtr)) + case PrimitiveVarUint32DispatchId: + buf.WriteVaruint32(*(*uint32)(fieldPtr)) + case PrimitiveVarint64DispatchId: + buf.WriteVarint64(*(*int64)(fieldPtr)) + case PrimitiveVarUint64DispatchId: + buf.WriteVaruint64(*(*uint64)(fieldPtr)) + case PrimitiveTaggedInt64DispatchId: + buf.WriteTaggedInt64(*(*int64)(fieldPtr)) + case PrimitiveTaggedUint64DispatchId: + buf.WriteTaggedUint64(*(*uint64)(fieldPtr)) + case PrimitiveIntDispatchId: + buf.WriteVarint64(int64(*(*int)(fieldPtr))) + case PrimitiveUintDispatchId: + buf.WriteVaruint64(uint64(*(*uint)(fieldPtr))) + // NotnullXxxPtrDispatchId: local field is *T with nullable=false + case NotnullVarint32PtrDispatchId: + buf.WriteVarint32(**(**int32)(fieldPtr)) + case NotnullVarUint32PtrDispatchId: + buf.WriteVaruint32(**(**uint32)(fieldPtr)) + case NotnullVarint64PtrDispatchId: + buf.WriteVarint64(**(**int64)(fieldPtr)) + case NotnullVarUint64PtrDispatchId: + buf.WriteVaruint64(**(**uint64)(fieldPtr)) + case NotnullTaggedInt64PtrDispatchId: + buf.WriteTaggedInt64(**(**int64)(fieldPtr)) + case NotnullTaggedUint64PtrDispatchId: + buf.WriteTaggedUint64(**(**uint64)(fieldPtr)) + case NotnullIntPtrDispatchId: + buf.WriteVarint64(int64(**(**int)(fieldPtr))) + case NotnullUintPtrDispatchId: + buf.WriteVaruint64(uint64(**(**uint)(fieldPtr))) + } + continue + } + + // Fast path for nullable fixed-size primitives (write ref flag + fixed bytes) + if canUseUnsafe && isNullableFixedSizePrimitive(field.DispatchId) { + fieldPtr := unsafe.Add(ptr, field.Offset) + // Get the pointer value and check for nil + switch field.DispatchId { + case NullableBoolDispatchId: + p := *(**bool)(fieldPtr) + if p == nil { + buf.WriteInt8(NullFlag) + } else { + buf.WriteInt8(NotNullValueFlag) + buf.WriteBool(*p) + } + case NullableInt8DispatchId: + p := *(**int8)(fieldPtr) + if p == nil { + buf.WriteInt8(NullFlag) + } else { + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt8(*p) + } + case NullableUint8DispatchId: + p := *(**uint8)(fieldPtr) + if p == nil { + buf.WriteInt8(NullFlag) + } else { + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint8(*p) + } + case NullableInt16DispatchId: + p := *(**int16)(fieldPtr) + if p == nil { + buf.WriteInt8(NullFlag) + } else { + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt16(*p) + } + case NullableUint16DispatchId: + p := *(**uint16)(fieldPtr) + if p == nil { + buf.WriteInt8(NullFlag) + } else { + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint16(*p) + } + case NullableInt32DispatchId: + p := *(**int32)(fieldPtr) + if p == nil { + buf.WriteInt8(NullFlag) + } else { + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt32(*p) + } + case NullableUint32DispatchId: + p := *(**uint32)(fieldPtr) + if p == nil { + buf.WriteInt8(NullFlag) + } else { + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint32(*p) + } + case NullableInt64DispatchId: + p := *(**int64)(fieldPtr) + if p == nil { + buf.WriteInt8(NullFlag) + } else { + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt64(*p) + } + case NullableUint64DispatchId: + p := *(**uint64)(fieldPtr) + if p == nil { + buf.WriteInt8(NullFlag) + } else { + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint64(*p) + } + case NullableFloat32DispatchId: + p := *(**float32)(fieldPtr) + if p == nil { + buf.WriteInt8(NullFlag) + } else { + buf.WriteInt8(NotNullValueFlag) + buf.WriteFloat32(*p) + } + case NullableFloat64DispatchId: + p := *(**float64)(fieldPtr) + if p == nil { + buf.WriteInt8(NullFlag) + } else { + buf.WriteInt8(NotNullValueFlag) + buf.WriteFloat64(*p) + } + } + continue + } + + // Fast path for nullable varint primitives (write ref flag + varint) + if canUseUnsafe && isNullableVarintPrimitive(field.DispatchId) { + fieldPtr := unsafe.Add(ptr, field.Offset) + switch field.DispatchId { + case NullableVarint32DispatchId: + p := *(**int32)(fieldPtr) + if p == nil { + buf.WriteInt8(NullFlag) + } else { + buf.WriteInt8(NotNullValueFlag) + buf.WriteVarint32(*p) + } + case NullableVarint64DispatchId: + p := *(**int64)(fieldPtr) + if p == nil { + buf.WriteInt8(NullFlag) + } else { + buf.WriteInt8(NotNullValueFlag) + buf.WriteVarint64(*p) + } + case NullableVarUint32DispatchId: + p := *(**uint32)(fieldPtr) + if p == nil { + buf.WriteInt8(NullFlag) + } else { + buf.WriteInt8(NotNullValueFlag) + buf.WriteVaruint32(*p) + } + case NullableVarUint64DispatchId: + p := *(**uint64)(fieldPtr) + if p == nil { + buf.WriteInt8(NullFlag) + } else { + buf.WriteInt8(NotNullValueFlag) + buf.WriteVaruint64(*p) + } + case NullableTaggedInt64DispatchId: + p := *(**int64)(fieldPtr) + if p == nil { + buf.WriteInt8(NullFlag) + } else { + buf.WriteInt8(NotNullValueFlag) + buf.WriteTaggedInt64(*p) + } + case NullableTaggedUint64DispatchId: + p := *(**uint64)(fieldPtr) + if p == nil { + buf.WriteInt8(NullFlag) + } else { + buf.WriteInt8(NotNullValueFlag) + buf.WriteTaggedUint64(*p) + } + case NullableIntDispatchId: + p := *(**int)(fieldPtr) + if p == nil { + buf.WriteInt8(NullFlag) + } else { + buf.WriteInt8(NotNullValueFlag) + buf.WriteVarint64(int64(*p)) + } + case NullableUintDispatchId: + p := *(**uint)(fieldPtr) + if p == nil { + buf.WriteInt8(NullFlag) + } else { + buf.WriteInt8(NotNullValueFlag) + buf.WriteVaruint64(uint64(*p)) + } + } + continue + } + + // Handle remaining field types (strings, slices, maps, structs, nullable primitives) + s.writeRemainingField(ctx, ptr, field, value) } } @@ -1071,6 +2704,54 @@ func (s *structSerializer) initFieldsFromTypeResolver(typeResolver *TypeResolver if fieldTypeId == 0 { fieldTypeId = typeIdFromKind(fieldType) } + + // Override TypeId based on compress/encoding tags for integer types + // This matches the logic in type_def.go:buildFieldDefs + baseKind := fieldType.Kind() + if baseKind == reflect.Ptr { + baseKind = fieldType.Elem().Kind() + } + switch baseKind { + case reflect.Uint32: + if foryTag.CompressSet { + if foryTag.Compress { + fieldTypeId = VAR_UINT32 + } else { + fieldTypeId = UINT32 + } + } + case reflect.Int32: + if foryTag.CompressSet { + if foryTag.Compress { + fieldTypeId = VARINT32 + } else { + fieldTypeId = INT32 + } + } + case reflect.Uint64: + if foryTag.EncodingSet { + switch foryTag.Encoding { + case "fixed": + fieldTypeId = UINT64 + case "varint": + fieldTypeId = VAR_UINT64 + case "tagged": + fieldTypeId = TAGGED_UINT64 + } + } + case reflect.Int64: + if foryTag.EncodingSet { + switch foryTag.Encoding { + case "fixed": + fieldTypeId = INT64 + case "varint": + fieldTypeId = VARINT64 + case "tagged": + fieldTypeId = TAGGED_INT64 + } + } + } + // Calculate nullable flag for serialization (wire format): // - In xlang mode: Per xlang spec, fields are NON-NULLABLE by default. // Only pointer types are nullable by default. @@ -1124,8 +2805,20 @@ func (s *structSerializer) initFieldsFromTypeResolver(typeResolver *TypeResolver // Pre-compute WriteType: true for struct fields in compatible mode writeType := typeResolver.Compatible() && isStructField(fieldType) - // Pre-compute StaticId, with special handling for enum fields - staticId := GetDispatchId(fieldType) + // Pre-compute DispatchId, with special handling for enum fields and pointer-to-numeric + var staticId DispatchId + if fieldType.Kind() == reflect.Ptr && isNumericKind(fieldType.Elem().Kind()) { + if nullableFlag { + staticId = GetDispatchIdFromTypeId(fieldTypeId, true) + } else { + staticId = GetNotnullPtrDispatchId(fieldType.Elem().Kind(), foryTag.Encoding) + } + } else { + staticId = GetDispatchIdFromTypeId(fieldTypeId, nullableFlag) + if staticId == UnknownDispatchId { + staticId = GetDispatchId(fieldType) + } + } if fieldSerializer != nil { if _, ok := fieldSerializer.(*enumSerializer); ok { staticId = EnumDispatchId @@ -1144,7 +2837,7 @@ func (s *structSerializer) initFieldsFromTypeResolver(typeResolver *TypeResolver Name: SnakeCase(field.Name), Offset: field.Offset, Type: fieldType, - StaticId: staticId, + DispatchId: staticId, TypeId: fieldTypeId, Serializer: fieldSerializer, Referencable: nullableFlag, // Use same logic as TypeDef's nullable flag for consistent ref handling @@ -1191,11 +2884,31 @@ func (s *structSerializer) initFieldsFromTypeResolver(typeResolver *TypeResolver s.fields = fields s.groupFields() + + // Debug output for field order comparison with Java + if DebugOutputEnabled() && s.type_ != nil { + fmt.Printf("[Go] ========== Local sorted fields for %s ==========\n", s.type_.Name()) + fmt.Printf("[Go] Go sorted fixedFields (%d):\n", len(s.fixedFields)) + for i, f := range s.fixedFields { + fmt.Printf("[Go] [%d] %s -> dispatchId=%d, typeId=%d, size=%d, referencable=%v\n", i, f.Name, f.DispatchId, f.TypeId, f.FixedSize, f.Referencable) + } + fmt.Printf("[Go] Go sorted varintFields (%d):\n", len(s.varintFields)) + for i, f := range s.varintFields { + fmt.Printf("[Go] [%d] %s -> dispatchId=%d, typeId=%d, referencable=%v\n", i, f.Name, f.DispatchId, f.TypeId, f.Referencable) + } + fmt.Printf("[Go] Go sorted remainingFields (%d):\n", len(s.remainingFields)) + for i, f := range s.remainingFields { + fmt.Printf("[Go] [%d] %s -> dispatchId=%d, typeId=%d, referencable=%v\n", i, f.Name, f.DispatchId, f.TypeId, f.Referencable) + } + fmt.Printf("[Go] ===========================================\n") + } + return nil } // groupFields categorizes fields into fixedFields, varintFields, and remainingFields. // Also computes pre-computed sizes and WriteOffset for batch buffer reservation. +// Fields are sorted within each group to match Java's wire format order. func (s *structSerializer) groupFields() { s.fixedFields = nil s.varintFields = nil @@ -1204,28 +2917,196 @@ func (s *structSerializer) groupFields() { s.maxVarintSize = 0 for _, field := range s.fields { - // Fields with non-primitive serializers (NAMED_ENUM, NAMED_STRUCT, etc.) - // must go to remainingFields to use their serializer's type info writing - hasNonPrimitive := fieldHasNonPrimitiveSerializer(field) - if DebugOutputEnabled() { - fmt.Printf("[fory-debug] groupFields: field=%s TypeId=%d internalId=%d hasNonPrimitive=%v\n", - field.Name, field.TypeId, field.TypeId&0xFF, hasNonPrimitive) - } - if hasNonPrimitive { - s.remainingFields = append(s.remainingFields, field) - } else if isFixedSizePrimitive(field.StaticId, field.Referencable) { - // Compute FixedSize and WriteOffset for this field - field.FixedSize = getFixedSizeByDispatchId(field.StaticId) - field.WriteOffset = s.fixedSize - s.fixedSize += field.FixedSize + if isFixedSizePrimitive(field.DispatchId, field.Referencable) { + // Non-nullable fixed-size primitives only + field.FixedSize = getFixedSizeByDispatchId(field.DispatchId) s.fixedFields = append(s.fixedFields, field) - } else if isVarintPrimitive(field.StaticId, field.Referencable) { - s.maxVarintSize += getVarintMaxSizeByDispatchId(field.StaticId) + } else if isVarintPrimitive(field.DispatchId, field.Referencable) { + // Non-nullable varint primitives only s.varintFields = append(s.varintFields, field) } else { + // All other fields including nullable primitives s.remainingFields = append(s.remainingFields, field) } } + + // Sort fixedFields: size desc, typeId desc, name asc + sort.SliceStable(s.fixedFields, func(i, j int) bool { + fi, fj := s.fixedFields[i], s.fixedFields[j] + if fi.FixedSize != fj.FixedSize { + return fi.FixedSize > fj.FixedSize // size descending + } + if fi.TypeId != fj.TypeId { + return fi.TypeId > fj.TypeId // typeId descending + } + return fi.Name < fj.Name // name ascending + }) + + // Recompute WriteOffset after sorting + for _, field := range s.fixedFields { + field.WriteOffset = s.fixedSize + s.fixedSize += field.FixedSize + } + + // Sort varintFields: underlying type size desc, typeId desc, name asc + // Note: Java uses primitive type size (8 for long, 4 for int), not encoding max size + sort.SliceStable(s.varintFields, func(i, j int) bool { + fi, fj := s.varintFields[i], s.varintFields[j] + sizeI := getUnderlyingTypeSize(fi.DispatchId) + sizeJ := getUnderlyingTypeSize(fj.DispatchId) + if sizeI != sizeJ { + return sizeI > sizeJ // size descending + } + if fi.TypeId != fj.TypeId { + return fi.TypeId > fj.TypeId // typeId descending + } + return fi.Name < fj.Name // name ascending + }) + + // Recompute maxVarintSize + for _, field := range s.varintFields { + s.maxVarintSize += getVarintMaxSizeByDispatchId(field.DispatchId) + } + + // Sort remainingFields: nullable primitives first (by primitiveComparator), + // then other internal types (typeId, name), then lists, sets, maps, other (by name) + // This sorting is ALWAYS applied - same algorithm for both local and remote types + sort.SliceStable(s.remainingFields, func(i, j int) bool { + fi, fj := s.remainingFields[i], s.remainingFields[j] + catI, catJ := getFieldCategory(fi), getFieldCategory(fj) + if catI != catJ { + return catI < catJ + } + // Within nullable primitives category, use primitiveComparator logic + if catI == 0 { + return comparePrimitiveFields(fi, fj) + } + // Within other internal types category, sort by typeId then name + if catI == 1 { + if fi.TypeId != fj.TypeId { + return fi.TypeId < fj.TypeId + } + return fi.Name < fj.Name + } + // List, set, map, and other categories: sort by name only + return fi.Name < fj.Name + }) +} + +// getFieldCategory returns the category for sorting remainingFields: +// 0: nullable primitives (sorted by primitiveComparator) +// 1: internal types STRING, BINARY, LIST, SET, MAP (sorted by typeId, then name) +// 2: struct, enum, and all other types (sorted by name only) +func getFieldCategory(field *FieldInfo) int { + if isNullableFixedSizePrimitive(field.DispatchId) || isNullableVarintPrimitive(field.DispatchId) { + return 0 + } + internalId := field.TypeId & 0xFF + switch TypeId(internalId) { + case STRING, BINARY, LIST, SET, MAP: + // Internal types: sorted by typeId, then name + return 1 + default: + // struct, enum, and all other types: sorted by name + return 2 + } +} + +// comparePrimitiveFields compares two nullable primitive fields using Java's primitiveComparator logic: +// fixed before varint, then underlying type size desc, typeId desc, name asc +func comparePrimitiveFields(fi, fj *FieldInfo) bool { + iFixed := isNullableFixedSizePrimitive(fi.DispatchId) + jFixed := isNullableFixedSizePrimitive(fj.DispatchId) + if iFixed != jFixed { + return iFixed // fixed before varint + } + // Same category: compare by underlying type size desc, typeId desc, name asc + // Note: Java uses primitive type size (8, 4, 2, 1), not encoding size + sizeI := getUnderlyingTypeSize(fi.DispatchId) + sizeJ := getUnderlyingTypeSize(fj.DispatchId) + if sizeI != sizeJ { + return sizeI > sizeJ // size descending + } + if fi.TypeId != fj.TypeId { + return fi.TypeId > fj.TypeId // typeId descending + } + return fi.Name < fj.Name // name ascending +} + +// getNullableFixedSize returns the fixed size for nullable fixed primitives +func getNullableFixedSize(dispatchId DispatchId) int { + switch dispatchId { + case NullableBoolDispatchId, NullableInt8DispatchId, NullableUint8DispatchId: + return 1 + case NullableInt16DispatchId, NullableUint16DispatchId: + return 2 + case NullableInt32DispatchId, NullableUint32DispatchId, NullableFloat32DispatchId: + return 4 + case NullableInt64DispatchId, NullableUint64DispatchId, NullableFloat64DispatchId: + return 8 + default: + return 0 + } +} + +// getNullableVarintMaxSize returns the max size for nullable varint primitives +func getNullableVarintMaxSize(dispatchId DispatchId) int { + switch dispatchId { + case NullableVarint32DispatchId, NullableVarUint32DispatchId: + return 5 + case NullableVarint64DispatchId, NullableVarUint64DispatchId, NullableIntDispatchId, NullableUintDispatchId: + return 10 + case NullableTaggedInt64DispatchId, NullableTaggedUint64DispatchId: + return 9 + default: + return 0 + } +} + +// getUnderlyingTypeSize returns the size of the underlying primitive type (8 for 64-bit, 4 for 32-bit, etc.) +// This matches Java's getSizeOfPrimitiveType() which uses the type size, not encoding size +func getUnderlyingTypeSize(dispatchId DispatchId) int { + switch dispatchId { + // 64-bit types + case PrimitiveInt64DispatchId, PrimitiveUint64DispatchId, PrimitiveFloat64DispatchId, + NotnullInt64PtrDispatchId, NotnullUint64PtrDispatchId, NotnullFloat64PtrDispatchId, + PrimitiveVarint64DispatchId, PrimitiveVarUint64DispatchId, + NotnullVarint64PtrDispatchId, NotnullVarUint64PtrDispatchId, + PrimitiveTaggedInt64DispatchId, PrimitiveTaggedUint64DispatchId, + NotnullTaggedInt64PtrDispatchId, NotnullTaggedUint64PtrDispatchId, + PrimitiveIntDispatchId, PrimitiveUintDispatchId, + NotnullIntPtrDispatchId, NotnullUintPtrDispatchId: + return 8 + // 32-bit types + case PrimitiveInt32DispatchId, PrimitiveUint32DispatchId, PrimitiveFloat32DispatchId, + NotnullInt32PtrDispatchId, NotnullUint32PtrDispatchId, NotnullFloat32PtrDispatchId, + PrimitiveVarint32DispatchId, PrimitiveVarUint32DispatchId, + NotnullVarint32PtrDispatchId, NotnullVarUint32PtrDispatchId: + return 4 + // 16-bit types + case PrimitiveInt16DispatchId, PrimitiveUint16DispatchId, + NotnullInt16PtrDispatchId, NotnullUint16PtrDispatchId: + return 2 + // 8-bit types + case PrimitiveBoolDispatchId, PrimitiveInt8DispatchId, PrimitiveUint8DispatchId, + NotnullBoolPtrDispatchId, NotnullInt8PtrDispatchId, NotnullUint8PtrDispatchId: + return 1 + // Nullable types + case NullableInt64DispatchId, NullableUint64DispatchId, NullableFloat64DispatchId, + NullableVarint64DispatchId, NullableVarUint64DispatchId, + NullableTaggedInt64DispatchId, NullableTaggedUint64DispatchId, + NullableIntDispatchId, NullableUintDispatchId: + return 8 + case NullableInt32DispatchId, NullableUint32DispatchId, NullableFloat32DispatchId, + NullableVarint32DispatchId, NullableVarUint32DispatchId: + return 4 + case NullableInt16DispatchId, NullableUint16DispatchId: + return 2 + case NullableBoolDispatchId, NullableInt8DispatchId, NullableUint8DispatchId: + return 1 + default: + return 0 + } } // initFieldsFromDefsWithResolver initializes fields from remote fieldDefs using typeResolver @@ -1255,7 +3136,7 @@ func (s *structSerializer) initFieldsFromDefsWithResolver(typeResolver *TypeReso // Pre-compute WriteType: true for struct fields in compatible mode writeType := typeResolver.Compatible() && isStructField(remoteType) - // Pre-compute StaticId, with special handling for enum fields + // Pre-compute DispatchId, with special handling for enum fields staticId := GetDispatchId(remoteType) if fieldSerializer != nil { if _, ok := fieldSerializer.(*enumSerializer); ok { @@ -1271,7 +3152,7 @@ func (s *structSerializer) initFieldsFromDefsWithResolver(typeResolver *TypeReso Name: def.name, Offset: 0, Type: remoteType, - StaticId: staticId, + DispatchId: staticId, TypeId: fieldTypeId, Serializer: fieldSerializer, Referencable: def.nullable, // Use remote nullable flag @@ -1529,8 +3410,36 @@ func (s *structSerializer) initFieldsFromDefsWithResolver(typeResolver *TypeReso // Pre-compute WriteType: true for struct fields in compatible mode writeType := typeResolver.Compatible() && isStructField(fieldType) - // Pre-compute StaticId, with special handling for enum fields - staticId := GetDispatchId(fieldType) + // Pre-compute DispatchId, with special handling for pointer-to-numeric and enum fields + // IMPORTANT: For compatible mode reading, we must use the REMOTE nullable flag + // to determine DispatchId, because Java wrote data with its nullable semantics. + var staticId DispatchId + localKind := fieldType.Kind() + localIsPtr := localKind == reflect.Ptr + localIsNumeric := isNumericKind(localKind) || (localIsPtr && isNumericKind(fieldType.Elem().Kind())) + + if localIsNumeric { + if localIsPtr { + if def.nullable { + // Local is *T, remote is nullable - use nullable DispatchId + staticId = GetDispatchIdFromTypeId(fieldTypeId, true) + } else { + // Local is *T, remote is NOT nullable - use notnull pointer DispatchId + encoding := getEncodingFromTypeId(fieldTypeId) + staticId = GetNotnullPtrDispatchId(fieldType.Elem().Kind(), encoding) + } + } else { + if def.nullable { + // Local is T (non-pointer), remote is nullable - use nullable DispatchId + staticId = GetDispatchIdFromTypeId(fieldTypeId, true) + } else { + // Local is T, remote is NOT nullable - use primitive DispatchId + staticId = GetDispatchId(fieldType) + } + } + } else { + staticId = GetDispatchId(fieldType) + } if fieldSerializer != nil { if _, ok := fieldSerializer.(*enumSerializer); ok { staticId = EnumDispatchId @@ -1551,7 +3460,7 @@ func (s *structSerializer) initFieldsFromDefsWithResolver(typeResolver *TypeReso Name: fieldName, Offset: offset, Type: fieldType, - StaticId: staticId, + DispatchId: staticId, TypeId: fieldTypeId, Serializer: fieldSerializer, Referencable: def.nullable, // Use remote nullable flag @@ -1569,15 +3478,54 @@ func (s *structSerializer) initFieldsFromDefsWithResolver(typeResolver *TypeReso s.fields = fields s.groupFields() - // Compute typeDefDiffers: true if any field doesn't exist locally or has type mismatch + // Debug output for field order comparison with Java MetaSharedSerializer + if DebugOutputEnabled() && s.type_ != nil { + fmt.Printf("[Go] ========== Sorted fields for %s ==========\n", s.type_.Name()) + fmt.Printf("[Go] Remote TypeDef order (%d fields):\n", len(s.fieldDefs)) + for i, def := range s.fieldDefs { + fmt.Printf("[Go] [%d] %s -> typeId=%d, nullable=%v\n", i, def.name, def.fieldType.TypeId(), def.nullable) + } + fmt.Printf("[Go] Go sorted fixedFields (%d):\n", len(s.fixedFields)) + for i, f := range s.fixedFields { + fmt.Printf("[Go] [%d] %s -> dispatchId=%d, typeId=%d, size=%d, referencable=%v\n", i, f.Name, f.DispatchId, f.TypeId, f.FixedSize, f.Referencable) + } + fmt.Printf("[Go] Go sorted varintFields (%d):\n", len(s.varintFields)) + for i, f := range s.varintFields { + fmt.Printf("[Go] [%d] %s -> dispatchId=%d, typeId=%d, referencable=%v\n", i, f.Name, f.DispatchId, f.TypeId, f.Referencable) + } + fmt.Printf("[Go] Go sorted remainingFields (%d):\n", len(s.remainingFields)) + for i, f := range s.remainingFields { + fmt.Printf("[Go] [%d] %s -> dispatchId=%d, typeId=%d, referencable=%v\n", i, f.Name, f.DispatchId, f.TypeId, f.Referencable) + } + fmt.Printf("[Go] ===========================================\n") + } + + // Compute typeDefDiffers: true if any field doesn't exist locally, has type mismatch, + // or has nullable mismatch (which affects field ordering) // When typeDefDiffers is false, we can use grouped reading for better performance s.typeDefDiffers = false - for _, field := range fields { + for i, field := range fields { if field.FieldIndex < 0 { // Field exists in remote TypeDef but not locally s.typeDefDiffers = true break } + // Check if nullable flag differs between remote and local + // Remote nullable is stored in fieldDefs[i].nullable + // Local nullable is determined by whether the Go field is a pointer type + if i < len(s.fieldDefs) && field.FieldIndex >= 0 { + remoteNullable := s.fieldDefs[i].nullable + // Check if local Go field is a pointer type (can be nil = nullable) + localNullable := field.Type.Kind() == reflect.Ptr + if remoteNullable != localNullable { + s.typeDefDiffers = true + break + } + } + } + + if DebugOutputEnabled() && s.type_ != nil { + fmt.Printf("[Go] typeDefDiffers=%v for %s\n", s.typeDefDiffers, s.type_.Name()) } return nil @@ -1930,14 +3878,19 @@ func sortFields( } } // Sort primitives (non-nullable) - same logic as boxed - // Java sorts by: compressed types last, then by size (largest first), then by type ID (descending) + // Java sorts by: compressed (varint) types last, then by size (largest first), then by type ID (descending) + // Fixed types: BOOL, INT8, UINT8, INT16, UINT16, INT32, UINT32, INT64, UINT64, FLOAT32, FLOAT64 + // Varint types: VARINT32, VARINT64, VAR_UINT32, VAR_UINT64, TAGGED_INT64, TAGGED_UINT64 + isVarintTypeId := func(typeID int16) bool { + return typeID == VARINT32 || typeID == VARINT64 || + typeID == VAR_UINT32 || typeID == VAR_UINT64 || + typeID == TAGGED_INT64 || typeID == TAGGED_UINT64 + } sortPrimitiveSlice := func(s []triple) { sort.Slice(s, func(i, j int) bool { ai, aj := s[i], s[j] - compressI := ai.typeID == INT32 || ai.typeID == INT64 || - ai.typeID == VARINT32 || ai.typeID == VARINT64 - compressJ := aj.typeID == INT32 || aj.typeID == INT64 || - aj.typeID == VARINT32 || aj.typeID == VARINT64 + compressI := isVarintTypeId(ai.typeID) + compressJ := isVarintTypeId(aj.typeID) if compressI != compressJ { return !compressI && compressJ } diff --git a/go/fory/tag.go b/go/fory/tag.go index d07154374f..63a0ac530c 100644 --- a/go/fory/tag.go +++ b/go/fory/tag.go @@ -30,43 +30,54 @@ const ( // ForyTag represents parsed fory struct tag options. // -// Tag format: `fory:"id=N,nullable=bool,ref=bool,ignore=bool"` or `fory:"-"` +// Tag format: `fory:"id=N,nullable=bool,ref=bool,ignore=bool,type=TypeID"` or `fory:"-"` // // Options: // - id: Field tag ID. -1 (default) uses field name, >=0 uses numeric tag ID for compact encoding // - nullable: Whether to write null flag. Default false (skip null flag for non-nullable fields) // - ref: Whether to enable reference tracking. Default false (skip ref tracking overhead) // - ignore: Whether to skip this field during serialization. Default false +// - type: Override type ID for unsigned types. Allows specifying encoding for uint32/uint64. +// Valid values: UINT16, UINT32, VAR_UINT32, UINT64, VAR_UINT64, TAGGED_UINT64 // // Examples: // // type Example struct { -// Name string `fory:"id=0"` // Use tag ID 0 -// Age int `fory:"id=1,nullable=false"` // Explicit nullable=false -// Email *string `fory:"id=2,nullable=true,ref=false"` // Nullable pointer, no ref tracking -// Parent *Node `fory:"id=3,ref=true,nullable=true"` // With reference tracking -// Secret string `fory:"ignore"` // Skip this field -// Hidden string `fory:"-"` // Skip this field (shorthand) +// Name string `fory:"id=0"` // Use tag ID 0 +// Age int `fory:"id=1,nullable=false"` // Explicit nullable=false +// Email *string `fory:"id=2,nullable=true,ref=false"` // Nullable pointer, no ref tracking +// Parent *Node `fory:"id=3,ref=true,nullable=true"` // With reference tracking +// U32Fixed uint32 `fory:"type=UINT32"` // Use fixed 4-byte encoding +// U32Var uint32 `fory:"type=VAR_UINT32"` // Use variable-length encoding +// U64Tagged uint64 `fory:"type=TAGGED_UINT64"` // Use tagged encoding +// Secret string `fory:"ignore"` // Skip this field +// Hidden string `fory:"-"` // Skip this field (shorthand) // } type ForyTag struct { - ID int // Field tag ID (-1 = use field name, >=0 = use tag ID) - Nullable bool // Whether to write null flag (default: false) - Ref bool // Whether to enable reference tracking (default: false) - Ignore bool // Whether to ignore this field during serialization (default: false) - HasTag bool // Whether field has fory tag at all + ID int // Field tag ID (-1 = use field name, >=0 = use tag ID) + Nullable bool // Whether to write null flag (default: false) + Ref bool // Whether to enable reference tracking (default: false) + Ignore bool // Whether to ignore this field during serialization (default: false) + HasTag bool // Whether field has fory tag at all + Compress bool // For int32/uint32: true=varint, false=fixed (default: true) + Encoding string // For int64/uint64: "fixed", "varint", "tagged" (default: "varint") // Track which options were explicitly set (for override logic) NullableSet bool RefSet bool IgnoreSet bool + CompressSet bool + EncodingSet bool } // ParseForyTag parses a fory struct tag from reflect.StructField.Tag. // -// Tag format: `fory:"id=N,nullable=bool,ref=bool,ignore=bool"` or `fory:"-"` +// Tag format: `fory:"id=N,nullable=bool,ref=bool,ignore=bool,compress=bool,encoding=value"` or `fory:"-"` // // Supported syntaxes: // - Key-value: `nullable=true`, `ref=false`, `ignore=true`, `id=0` +// - For int32/uint32: `compress=true` (varint) or `compress=false` (fixed), default is true +// - For int64/uint64: `encoding=fixed`, `encoding=varint`, `encoding=tagged`, default is varint // - Standalone flags: `nullable`, `ref`, `ignore` (equivalent to =true) // - Shorthand: `-` (equivalent to `ignore=true`) func ParseForyTag(field reflect.StructField) ForyTag { @@ -76,6 +87,8 @@ func ParseForyTag(field reflect.StructField) ForyTag { Ref: false, Ignore: false, HasTag: false, + Compress: true, // default: varint encoding + Encoding: "varint", // default: varint encoding } tagValue, ok := field.Tag.Lookup("fory") @@ -119,6 +132,12 @@ func ParseForyTag(field reflect.StructField) ForyTag { case "ignore": tag.Ignore = parseBool(value) tag.IgnoreSet = true + case "compress": + tag.Compress = parseBool(value) + tag.CompressSet = true + case "encoding": + tag.Encoding = strings.ToLower(strings.TrimSpace(value)) + tag.EncodingSet = true } } else { // Handle standalone flags (presence means true) @@ -146,6 +165,30 @@ func parseBool(s string) bool { return s == "true" || s == "1" || s == "yes" } +// parseTypeID parses a TypeId from string name. +// Returns 0 if the type name is not recognized. +func parseTypeID(s string) TypeId { + s = strings.ToUpper(strings.TrimSpace(s)) + switch s { + case "UINT8": + return UINT8 + case "UINT16": + return UINT16 + case "UINT32": + return UINT32 + case "VAR_UINT32": + return VAR_UINT32 + case "UINT64": + return UINT64 + case "VAR_UINT64": + return VAR_UINT64 + case "TAGGED_UINT64": + return TAGGED_UINT64 + default: + return 0 + } +} + // ValidateForyTags validates all fory tags in a struct type. // Returns an error if validation fails. // diff --git a/go/fory/tests/xlang/xlang_test_main.go b/go/fory/tests/xlang/xlang_test_main.go index d7a2d60015..282b58b1de 100644 --- a/go/fory/tests/xlang/xlang_test_main.go +++ b/go/fory/tests/xlang/xlang_test_main.go @@ -129,6 +129,39 @@ func getNullableComprehensiveCompatible(obj interface{}) NullableComprehensiveCo } } +func getUnsignedSchemaConsistent(obj interface{}) UnsignedSchemaConsistent { + switch v := obj.(type) { + case UnsignedSchemaConsistent: + return v + case *UnsignedSchemaConsistent: + return *v + default: + panic(fmt.Sprintf("expected UnsignedSchemaConsistent, got %T", obj)) + } +} + +func getUnsignedSchemaCompatible(obj interface{}) UnsignedSchemaCompatible { + switch v := obj.(type) { + case UnsignedSchemaCompatible: + return v + case *UnsignedSchemaCompatible: + return *v + default: + panic(fmt.Sprintf("expected UnsignedSchemaCompatible, got %T", obj)) + } +} + +func getUnsignedSchemaConsistentSimple(obj interface{}) UnsignedSchemaConsistentSimple { + switch v := obj.(type) { + case UnsignedSchemaConsistentSimple: + return v + case *UnsignedSchemaConsistentSimple: + return *v + default: + panic(fmt.Sprintf("expected UnsignedSchemaConsistentSimple, got %T", obj)) + } +} + func assertEqualFloat32(expected, actual float32, name string) { diff := expected - actual if diff < 0 { @@ -2129,24 +2162,31 @@ func testCircularRefCompatible() { // Note: Go currently only supports uint8, uint16, uint32 (VAR_UINT32), uint64 (VAR_UINT64). // Fixed and tagged encodings require fory encoding tags (TODO). // Matches Java's UnsignedSchemaConsistent (type id 501) +// UnsignedSchemaConsistentSimple - Simple test struct for unsigned numbers. +// Matches Java's UnsignedSchemaConsistentSimple (type id 1) +type UnsignedSchemaConsistentSimple struct { + U64Tagged uint64 `fory:"encoding=tagged"` // TAGGED_UINT64 - tagged encoding + U64TaggedNullable *uint64 `fory:"nullable,encoding=tagged"` // Nullable TAGGED_UINT64 +} + type UnsignedSchemaConsistent struct { // Primitive unsigned fields (non-nullable) U8 uint8 // UINT8 - fixed 8-bit U16 uint16 // UINT16 - fixed 16-bit - U32Var uint32 // VAR_UINT32 - variable-length - U32Fixed uint32 // Should be UINT32 (fixed) - TODO: add encoding tag - U64Var uint64 // VAR_UINT64 - variable-length - U64Fixed uint64 // Should be UINT64 (fixed) - TODO: add encoding tag - U64Tagged uint64 // Should be TAGGED_UINT64 - TODO: add encoding tag + U32Var uint32 `fory:"compress=true"` // VAR_UINT32 - variable-length + U32Fixed uint32 `fory:"compress=false"` // UINT32 - fixed 4-byte + U64Var uint64 `fory:"encoding=varint"` // VAR_UINT64 - variable-length + U64Fixed uint64 `fory:"encoding=fixed"` // UINT64 - fixed 8-byte + U64Tagged uint64 `fory:"encoding=tagged"` // TAGGED_UINT64 - tagged encoding // Nullable unsigned fields (pointers) U8Nullable *uint8 `fory:"nullable"` U16Nullable *uint16 `fory:"nullable"` - U32VarNullable *uint32 `fory:"nullable"` - U32FixedNullable *uint32 `fory:"nullable"` - U64VarNullable *uint64 `fory:"nullable"` - U64FixedNullable *uint64 `fory:"nullable"` - U64TaggedNullable *uint64 `fory:"nullable"` + U32VarNullable *uint32 `fory:"nullable,compress=true"` + U32FixedNullable *uint32 `fory:"nullable,compress=false"` + U64VarNullable *uint64 `fory:"nullable,encoding=varint"` + U64FixedNullable *uint64 `fory:"nullable,encoding=fixed"` + U64TaggedNullable *uint64 `fory:"nullable,encoding=tagged"` } // UnsignedSchemaCompatible - Test struct for unsigned numbers in COMPATIBLE mode. @@ -2157,58 +2197,67 @@ type UnsignedSchemaCompatible struct { // Group 1: Nullable in Go (pointers), non-nullable in Java U8 *uint8 `fory:"nullable"` U16 *uint16 `fory:"nullable"` - U32Var *uint32 `fory:"nullable"` - U32Fixed *uint32 `fory:"nullable"` - U64Var *uint64 `fory:"nullable"` - U64Fixed *uint64 `fory:"nullable"` - U64Tagged *uint64 `fory:"nullable"` + U32Var *uint32 `fory:"nullable,compress=true"` + U32Fixed *uint32 `fory:"nullable,compress=false"` + U64Var *uint64 `fory:"nullable,encoding=varint"` + U64Fixed *uint64 `fory:"nullable,encoding=fixed"` + U64Tagged *uint64 `fory:"nullable,encoding=tagged"` // Group 2: Non-nullable in Go, nullable in Java U8Field2 uint8 U16Field2 uint16 - U32VarField2 uint32 - U32FixedField2 uint32 - U64VarField2 uint64 - U64FixedField2 uint64 - U64TaggedField2 uint64 + U32VarField2 uint32 `fory:"compress=true"` + U32FixedField2 uint32 `fory:"compress=false"` + U64VarField2 uint64 `fory:"encoding=varint"` + U64FixedField2 uint64 `fory:"encoding=fixed"` + U64TaggedField2 uint64 `fory:"encoding=tagged"` } -func getUnsignedSchemaConsistent(obj interface{}) UnsignedSchemaConsistent { - switch v := obj.(type) { - case UnsignedSchemaConsistent: - return v - case *UnsignedSchemaConsistent: - return *v - default: - panic(fmt.Sprintf("expected UnsignedSchemaConsistent, got %T", obj)) +// ============================================================================ +// Unsigned Number Tests +// ============================================================================ + +func testUnsignedSchemaConsistentSimple() { + dataFile := getDataFile() + data := readFile(dataFile) + + f := fory.New(fory.WithXlang(true), fory.WithCompatible(false)) + f.Register(UnsignedSchemaConsistentSimple{}, 1) + + var obj interface{} + err := f.Deserialize(data, &obj) + if err != nil { + panic(fmt.Sprintf("Failed to deserialize: %v", err)) } -} -func getUnsignedSchemaCompatible(obj interface{}) UnsignedSchemaCompatible { - switch v := obj.(type) { - case UnsignedSchemaCompatible: - return v - case *UnsignedSchemaCompatible: - return *v - default: - panic(fmt.Sprintf("expected UnsignedSchemaCompatible, got %T", obj)) + result := getUnsignedSchemaConsistentSimple(obj) + + // Verify fields + assertEqual(uint64(1000000000), result.U64Tagged, "U64Tagged") + if result.U64TaggedNullable == nil || *result.U64TaggedNullable != 500000000 { + panic(fmt.Sprintf("U64TaggedNullable mismatch: expected 500000000, got %v", result.U64TaggedNullable)) } -} -// ============================================================================ -// Unsigned Number Tests -// ============================================================================ + serialized, err := f.Serialize(result) + if err != nil { + panic(fmt.Sprintf("Failed to serialize: %v", err)) + } + + writeFile(dataFile, serialized) +} func testUnsignedSchemaConsistent() { dataFile := getDataFile() data := readFile(dataFile) + fmt.Printf("Input size: %d bytes\n", len(data)) + fmt.Printf("Input hex: %x\n", data) + f := fory.New(fory.WithXlang(true), fory.WithCompatible(false)) f.Register(UnsignedSchemaConsistent{}, 501) - buf := fory.NewByteBuffer(data) var obj interface{} - err := f.DeserializeWithCallbackBuffers(buf, &obj, nil) + err := f.Deserialize(data, &obj) if err != nil { panic(fmt.Sprintf("Failed to deserialize: %v", err)) } @@ -2252,6 +2301,9 @@ func testUnsignedSchemaConsistent() { panic(fmt.Sprintf("Failed to serialize: %v", err)) } + fmt.Printf("Output size: %d bytes\n", len(serialized)) + fmt.Printf("Output hex: %x\n", serialized) + writeFile(dataFile, serialized) } @@ -2262,9 +2314,8 @@ func testUnsignedSchemaCompatible() { f := fory.New(fory.WithXlang(true), fory.WithCompatible(true)) f.Register(UnsignedSchemaCompatible{}, 502) - buf := fory.NewByteBuffer(data) var obj interface{} - err := f.DeserializeWithCallbackBuffers(buf, &obj, nil) + err := f.Deserialize(data, &obj) if err != nil { panic(fmt.Sprintf("Failed to deserialize: %v", err)) } @@ -2308,6 +2359,9 @@ func testUnsignedSchemaCompatible() { panic(fmt.Sprintf("Failed to serialize: %v", err)) } + fmt.Printf("[Go] Serialized output size: %d bytes\n", len(serialized)) + fmt.Printf("[Go] Serialized output hex: %x\n", serialized) + writeFile(dataFile, serialized) } @@ -2414,6 +2468,8 @@ func main() { testCircularRefSchemaConsistent() case "test_circular_ref_compatible": testCircularRefCompatible() + case "test_unsigned_schema_consistent_simple": + testUnsignedSchemaConsistentSimple() case "test_unsigned_schema_consistent": testUnsignedSchemaConsistent() case "test_unsigned_schema_compatible": diff --git a/go/fory/type_def.go b/go/fory/type_def.go index 78895f9c9a..a6d41f87c0 100644 --- a/go/fory/type_def.go +++ b/go/fory/type_def.go @@ -431,6 +431,67 @@ func buildFieldDefs(fory *Fory, value reflect.Value) ([]FieldDef, error) { if err != nil { return nil, fmt.Errorf("failed to build field type for field %s: %w", fieldName, err) } + + // Apply encoding override from struct tags if set + // This works for both direct types and pointer-wrapped types + baseKind := field.Type.Kind() + // Handle pointer types - get the element kind + if baseKind == reflect.Ptr { + baseKind = field.Type.Elem().Kind() + } + + // Check if we need to override the TypeID based on compress/encoding tags + var overrideTypeId TypeId = 0 + switch baseKind { + case reflect.Uint32: + if foryTag.CompressSet { + if foryTag.Compress { + overrideTypeId = VAR_UINT32 + } else { + overrideTypeId = UINT32 + } + } + case reflect.Int32: + if foryTag.CompressSet { + if foryTag.Compress { + overrideTypeId = VARINT32 + } else { + overrideTypeId = INT32 + } + } + case reflect.Uint64: + if foryTag.EncodingSet { + switch foryTag.Encoding { + case "fixed": + overrideTypeId = UINT64 + case "varint": + overrideTypeId = VAR_UINT64 + case "tagged": + overrideTypeId = TAGGED_UINT64 + default: + return nil, fmt.Errorf("field %s: invalid encoding value %q for uint64, must be 'fixed', 'varint', or 'tagged'", fieldName, foryTag.Encoding) + } + } + case reflect.Int64: + if foryTag.EncodingSet { + switch foryTag.Encoding { + case "fixed": + overrideTypeId = INT64 + case "varint": + overrideTypeId = VARINT64 + case "tagged": + overrideTypeId = TAGGED_INT64 + default: + return nil, fmt.Errorf("field %s: invalid encoding value %q for int64, must be 'fixed', 'varint', or 'tagged'", fieldName, foryTag.Encoding) + } + } + } + + // Apply the override if one was determined + if overrideTypeId != 0 { + ft = NewSimpleFieldType(overrideTypeId) + } + // Determine nullable based on mode: // - In xlang mode: Per xlang spec, fields are NON-NULLABLE by default. // Only pointer types are nullable by default. diff --git a/go/fory/type_resolver.go b/go/fory/type_resolver.go index 51c3967f66..e3b9e18c52 100644 --- a/go/fory/type_resolver.go +++ b/go/fory/type_resolver.go @@ -81,6 +81,9 @@ var ( boolType = reflect.TypeOf((*bool)(nil)).Elem() byteType = reflect.TypeOf((*byte)(nil)).Elem() uint8Type = reflect.TypeOf((*uint8)(nil)).Elem() + uint16Type = reflect.TypeOf((*uint16)(nil)).Elem() + uint32Type = reflect.TypeOf((*uint32)(nil)).Elem() + uint64Type = reflect.TypeOf((*uint64)(nil)).Elem() int8Type = reflect.TypeOf((*int8)(nil)).Elem() int16Type = reflect.TypeOf((*int16)(nil)).Elem() int32Type = reflect.TypeOf((*int32)(nil)).Elem() @@ -120,7 +123,7 @@ type TypeInfo struct { NameBytes *MetaStringBytes IsDynamic bool TypeID uint32 - StaticId DispatchId + DispatchId DispatchId Serializer Serializer NeedWriteDef bool NeedWriteRef bool // Whether this type needs reference tracking @@ -342,6 +345,9 @@ func (r *TypeResolver) initialize() { // Register primitive types {boolType, BOOL, boolSerializer{}}, {byteType, UINT8, byteSerializer{}}, + {uint16Type, UINT16, uint16Serializer{}}, + {uint32Type, VAR_UINT32, uint32Serializer{}}, + {uint64Type, VAR_UINT64, uint64Serializer{}}, {int8Type, INT8, int8Serializer{}}, {int16Type, INT16, int16Serializer{}}, {int32Type, VARINT32, int32Serializer{}}, @@ -359,6 +365,31 @@ func (r *TypeResolver) initialize() { fmt.Errorf("init type error: %v", err) } } + + // Register additional TypeIds for types that support multiple encodings. + // This allows Go to deserialize data from Java that uses different encoding variants. + // For example, Java may send UINT32 (fixed) but Go only registered VAR_UINT32 by default. + // We need to map all encoding variants to the same Go type. + additionalTypeIds := []struct { + typeId TypeId + goType reflect.Type + }{ + // Fixed-size integer encodings (in addition to varint defaults) + {UINT32, uint32Type}, // Fixed UINT32 (11) → uint32 + {UINT64, uint64Type}, // Fixed UINT64 (13) → uint64 + {TAGGED_UINT64, uint64Type}, // Tagged UINT64 (15) → uint64 + {INT32, int32Type}, // Fixed INT32 (3) → int32 + {INT64, int64Type}, // Fixed INT64 (5) → int64 + {TAGGED_INT64, int64Type}, // Tagged INT64 (7) → int64 + } + for _, entry := range additionalTypeIds { + if _, exists := r.typeIDToTypeInfo[uint32(entry.typeId)]; !exists { + // Get the existing TypeInfo for this Go type and create a reference to it + if existingInfo, ok := r.typesInfo[entry.goType]; ok { + r.typeIDToTypeInfo[uint32(entry.typeId)] = existingInfo + } + } + } } func (r *TypeResolver) registerSerializer(type_ reflect.Type, typeId TypeId, s Serializer) error { @@ -461,7 +492,7 @@ func (r *TypeResolver) RegisterEnumByID(type_ reflect.Type, fullTypeID uint32) e TypeID: fullTypeID, Serializer: serializer, IsDynamic: isDynamicType(type_), - StaticId: GetDispatchId(type_), + DispatchId: GetDispatchId(type_), hashValue: calcTypeHash(type_), } r.typeIDToTypeInfo[fullTypeID] = typeInfo @@ -832,7 +863,7 @@ func (r *TypeResolver) getTypeInfo(value reflect.Value, create bool) (*TypeInfo, NameBytes: elemInfo.NameBytes, IsDynamic: elemInfo.IsDynamic, TypeID: elemInfo.TypeID, - StaticId: elemInfo.StaticId, + DispatchId: elemInfo.DispatchId, Serializer: ptrSerializer, NeedWriteDef: elemInfo.NeedWriteDef, hashValue: elemInfo.hashValue, @@ -1066,7 +1097,7 @@ func (r *TypeResolver) registerType( PkgPathBytes: nsBytes, // Encoded namespace bytes NameBytes: typeBytes, // Encoded type name bytes IsDynamic: isDynamicType(type_), - StaticId: GetDispatchId(type_), // Static type ID for fast path + DispatchId: GetDispatchId(type_), // Static type ID for fast path hashValue: calcTypeHash(type_), // Precomputed hash for fast lookups NeedWriteRef: NeedWriteRef(TypeId(typeID)), } @@ -1759,112 +1790,112 @@ func (r *TypeResolver) ReadTypeInfo(buffer *ByteBuffer, err *Error) *TypeInfo { Type: interfaceSliceType, TypeID: typeID, Serializer: r.typeToSerializers[interfaceSliceType], - StaticId: UnknowDispatchId, + DispatchId: UnknownDispatchId, } case SET, -SET: return &TypeInfo{ Type: genericSetType, TypeID: typeID, Serializer: r.typeToSerializers[genericSetType], - StaticId: UnknowDispatchId, + DispatchId: UnknownDispatchId, } case MAP, -MAP: return &TypeInfo{ Type: interfaceMapType, TypeID: typeID, Serializer: r.typeToSerializers[interfaceMapType], - StaticId: UnknowDispatchId, + DispatchId: UnknownDispatchId, } case BOOL: return &TypeInfo{ Type: reflect.TypeOf(false), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(false)], - StaticId: BoolDispatchId, + DispatchId: PrimitiveBoolDispatchId, } case INT8: return &TypeInfo{ Type: reflect.TypeOf(int8(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(int8(0))], - StaticId: Int8DispatchId, + DispatchId: PrimitiveInt8DispatchId, } case UINT8: return &TypeInfo{ Type: reflect.TypeOf(uint8(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(uint8(0))], - StaticId: Int8DispatchId, // Use Int8 static ID for uint8 + DispatchId: PrimitiveInt8DispatchId, // Use Int8 static ID for uint8 } case INT16: return &TypeInfo{ Type: reflect.TypeOf(int16(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(int16(0))], - StaticId: Int16DispatchId, + DispatchId: PrimitiveInt16DispatchId, } case UINT16: return &TypeInfo{ Type: reflect.TypeOf(uint16(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(uint16(0))], - StaticId: Int16DispatchId, // Use Int16 static ID for uint16 + DispatchId: PrimitiveInt16DispatchId, // Use Int16 static ID for uint16 } case INT32, VARINT32: return &TypeInfo{ Type: reflect.TypeOf(int32(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(int32(0))], - StaticId: Int32DispatchId, + DispatchId: PrimitiveInt32DispatchId, } case UINT32: return &TypeInfo{ Type: reflect.TypeOf(uint32(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(uint32(0))], - StaticId: Int32DispatchId, // Use Int32 static ID for uint32 + DispatchId: PrimitiveInt32DispatchId, // Use Int32 static ID for uint32 } case INT64, VARINT64, TAGGED_INT64: return &TypeInfo{ Type: reflect.TypeOf(int64(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(int64(0))], - StaticId: Int64DispatchId, + DispatchId: PrimitiveInt64DispatchId, } case UINT64: return &TypeInfo{ Type: reflect.TypeOf(uint64(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(uint64(0))], - StaticId: Int64DispatchId, // Use Int64 static ID for uint64 + DispatchId: PrimitiveInt64DispatchId, // Use Int64 static ID for uint64 } case FLOAT32: return &TypeInfo{ Type: reflect.TypeOf(float32(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(float32(0))], - StaticId: Float32DispatchId, + DispatchId: PrimitiveFloat32DispatchId, } case FLOAT64: return &TypeInfo{ Type: reflect.TypeOf(float64(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(float64(0))], - StaticId: Float64DispatchId, + DispatchId: PrimitiveFloat64DispatchId, } case STRING: return &TypeInfo{ Type: reflect.TypeOf(""), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf("")], - StaticId: StringDispatchId, + DispatchId: StringDispatchId, } case BINARY: return &TypeInfo{ Type: reflect.TypeOf([]byte(nil)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf([]byte(nil))], - StaticId: UnknowDispatchId, + DispatchId: UnknownDispatchId, } } @@ -1930,49 +1961,49 @@ func (r *TypeResolver) readTypeInfoWithTypeID(buffer *ByteBuffer, typeID uint32, Type: interfaceSliceType, TypeID: typeID, Serializer: r.typeToSerializers[interfaceSliceType], - StaticId: UnknowDispatchId, + DispatchId: UnknownDispatchId, } case SET: return &TypeInfo{ Type: genericSetType, TypeID: typeID, Serializer: r.typeToSerializers[genericSetType], - StaticId: UnknowDispatchId, + DispatchId: UnknownDispatchId, } case MAP: return &TypeInfo{ Type: interfaceMapType, TypeID: typeID, Serializer: r.typeToSerializers[interfaceMapType], - StaticId: UnknowDispatchId, + DispatchId: UnknownDispatchId, } // Handle primitive types that may not be explicitly registered case BOOL: - return &TypeInfo{Type: boolType, TypeID: typeID, Serializer: r.typeToSerializers[boolType], StaticId: BoolDispatchId} + return &TypeInfo{Type: boolType, TypeID: typeID, Serializer: r.typeToSerializers[boolType], DispatchId: PrimitiveBoolDispatchId} case INT8: - return &TypeInfo{Type: int8Type, TypeID: typeID, Serializer: r.typeToSerializers[int8Type], StaticId: Int8DispatchId} + return &TypeInfo{Type: int8Type, TypeID: typeID, Serializer: r.typeToSerializers[int8Type], DispatchId: PrimitiveInt8DispatchId} case INT16: - return &TypeInfo{Type: int16Type, TypeID: typeID, Serializer: r.typeToSerializers[int16Type], StaticId: Int16DispatchId} + return &TypeInfo{Type: int16Type, TypeID: typeID, Serializer: r.typeToSerializers[int16Type], DispatchId: PrimitiveInt16DispatchId} case INT32, VARINT32: - return &TypeInfo{Type: int32Type, TypeID: typeID, Serializer: r.typeToSerializers[int32Type], StaticId: Int32DispatchId} + return &TypeInfo{Type: int32Type, TypeID: typeID, Serializer: r.typeToSerializers[int32Type], DispatchId: PrimitiveInt32DispatchId} case INT64, VARINT64, TAGGED_INT64: - return &TypeInfo{Type: int64Type, TypeID: typeID, Serializer: r.typeToSerializers[int64Type], StaticId: Int64DispatchId} + return &TypeInfo{Type: int64Type, TypeID: typeID, Serializer: r.typeToSerializers[int64Type], DispatchId: PrimitiveInt64DispatchId} case FLOAT32: - return &TypeInfo{Type: float32Type, TypeID: typeID, Serializer: r.typeToSerializers[float32Type], StaticId: Float32DispatchId} + return &TypeInfo{Type: float32Type, TypeID: typeID, Serializer: r.typeToSerializers[float32Type], DispatchId: PrimitiveFloat32DispatchId} case FLOAT64: - return &TypeInfo{Type: float64Type, TypeID: typeID, Serializer: r.typeToSerializers[float64Type], StaticId: Float64DispatchId} + return &TypeInfo{Type: float64Type, TypeID: typeID, Serializer: r.typeToSerializers[float64Type], DispatchId: PrimitiveFloat64DispatchId} case STRING: - return &TypeInfo{Type: stringType, TypeID: typeID, Serializer: r.typeToSerializers[stringType], StaticId: StringDispatchId} + return &TypeInfo{Type: stringType, TypeID: typeID, Serializer: r.typeToSerializers[stringType], DispatchId: StringDispatchId} case BINARY: - return &TypeInfo{Type: byteSliceType, TypeID: typeID, Serializer: r.typeToSerializers[byteSliceType], StaticId: ByteSliceDispatchId} + return &TypeInfo{Type: byteSliceType, TypeID: typeID, Serializer: r.typeToSerializers[byteSliceType], DispatchId: ByteSliceDispatchId} } // Handle UNKNOWN type (0) - used for polymorphic types if typeID == 0 { return &TypeInfo{ - Type: interfaceType, - TypeID: typeID, - StaticId: UnknowDispatchId, + Type: interfaceType, + TypeID: typeID, + DispatchId: UnknownDispatchId, } } diff --git a/go/fory/types.go b/go/fory/types.go index 4a7290074b..f8da0c2716 100644 --- a/go/fory/types.go +++ b/go/fory/types.go @@ -240,6 +240,15 @@ var primitiveTypeSizes = map[int16]int{ FLOAT64: 8, } +// MaxInt31 is the maximum value that fits in 31 bits (used for TAGGED_UINT64 encoding) +const MaxInt31 uint64 = 0x7FFFFFFF // 2^31 - 1 + +// MinInt31 is the minimum value that fits in 31 bits (used for TAGGED_INT64 encoding) +const MinInt31 int64 = -0x40000000 // -2^30 + +// MaxInt31Signed is MaxInt31 as a signed int64 for TAGGED_INT64 encoding +const MaxInt31Signed int64 = 0x3FFFFFFF // 2^30 - 1 + func getPrimitiveTypeSize(typeID int16) int { if sz, ok := primitiveTypeSizes[typeID]; ok { return sz @@ -263,25 +272,81 @@ func isUserDefinedType(typeID int16) bool { // DispatchId for switch-based fast path (avoids interface virtual method cost) // ============================================================================ -// DispatchId identifies concrete Go types for optimized serialization dispatch +// DispatchId identifies concrete Go types for optimized serialization dispatch. +// Following Java's pattern with separate IDs for primitive (non-nullable) and boxed (nullable) types. type DispatchId uint8 const ( - UnknowDispatchId DispatchId = iota - BoolDispatchId - Int8DispatchId - Int16DispatchId - Int32DispatchId - Int64DispatchId - IntDispatchId - Uint8DispatchId - Uint16DispatchId - Uint32DispatchId - Uint64DispatchId - UintDispatchId - Float32DispatchId - Float64DispatchId + UnknownDispatchId DispatchId = iota + + // Primitive (non-nullable) dispatch IDs - match Java's PRIMITIVE_* constants + PrimitiveBoolDispatchId + PrimitiveInt8DispatchId + PrimitiveInt16DispatchId + PrimitiveInt32DispatchId + PrimitiveVarint32DispatchId + PrimitiveInt64DispatchId + PrimitiveVarint64DispatchId + PrimitiveTaggedInt64DispatchId + PrimitiveFloat32DispatchId + PrimitiveFloat64DispatchId + PrimitiveUint8DispatchId + PrimitiveUint16DispatchId + PrimitiveUint32DispatchId + PrimitiveVarUint32DispatchId + PrimitiveUint64DispatchId + PrimitiveVarUint64DispatchId + PrimitiveTaggedUint64DispatchId + PrimitiveIntDispatchId // Go-specific: native int + PrimitiveUintDispatchId // Go-specific: native uint + + // Nullable dispatch IDs - match Java's non-PRIMITIVE_* constants + NullableBoolDispatchId + NullableInt8DispatchId + NullableInt16DispatchId + NullableInt32DispatchId + NullableVarint32DispatchId + NullableInt64DispatchId + NullableVarint64DispatchId + NullableTaggedInt64DispatchId + NullableFloat32DispatchId + NullableFloat64DispatchId + NullableUint8DispatchId + NullableUint16DispatchId + NullableUint32DispatchId + NullableVarUint32DispatchId + NullableUint64DispatchId + NullableVarUint64DispatchId + NullableTaggedUint64DispatchId + NullableIntDispatchId // Go-specific: *int + NullableUintDispatchId // Go-specific: *uint + + // Notnull pointer dispatch IDs - pointer types with nullable=false + // Write without null flag; on read, create default value if remote sends null + NotnullBoolPtrDispatchId + NotnullInt8PtrDispatchId + NotnullInt16PtrDispatchId + NotnullInt32PtrDispatchId + NotnullVarint32PtrDispatchId + NotnullInt64PtrDispatchId + NotnullVarint64PtrDispatchId + NotnullTaggedInt64PtrDispatchId + NotnullFloat32PtrDispatchId + NotnullFloat64PtrDispatchId + NotnullUint8PtrDispatchId + NotnullUint16PtrDispatchId + NotnullUint32PtrDispatchId + NotnullVarUint32PtrDispatchId + NotnullUint64PtrDispatchId + NotnullVarUint64PtrDispatchId + NotnullTaggedUint64PtrDispatchId + NotnullIntPtrDispatchId + NotnullUintPtrDispatchId + + // String dispatch ID StringDispatchId + + // Slice dispatch IDs ByteSliceDispatchId Int8SliceDispatchId Int16SliceDispatchId @@ -293,6 +358,8 @@ const ( Float64SliceDispatchId BoolSliceDispatchId StringSliceDispatchId + + // Map dispatch IDs StringStringMapDispatchId StringInt32MapDispatchId StringInt64MapDispatchId @@ -302,38 +369,46 @@ const ( Int32Int32MapDispatchId Int64Int64MapDispatchId IntIntMapDispatchId + + // Enum dispatch ID EnumDispatchId // Enum types (both ENUM and NAMED_ENUM) ) -// GetDispatchId returns the DispatchId for a reflect.Type +// GetDispatchId returns the DispatchId for a reflect.Type. +// For int32/int64/uint32/uint64, returns varint dispatch IDs by default since that's +// the default encoding in xlang serialization (VARINT32, VARINT64, VAR_UINT32, VAR_UINT64). func GetDispatchId(t reflect.Type) DispatchId { switch t.Kind() { case reflect.Bool: - return BoolDispatchId + return PrimitiveBoolDispatchId case reflect.Int8: - return Int8DispatchId + return PrimitiveInt8DispatchId case reflect.Int16: - return Int16DispatchId + return PrimitiveInt16DispatchId case reflect.Int32: - return Int32DispatchId + // Default to varint encoding (VARINT32) for xlang compatibility + return PrimitiveVarint32DispatchId case reflect.Int64: - return Int64DispatchId + // Default to varint encoding (VARINT64) for xlang compatibility + return PrimitiveVarint64DispatchId case reflect.Int: - return IntDispatchId + return PrimitiveIntDispatchId case reflect.Uint8: - return Uint8DispatchId + return PrimitiveUint8DispatchId case reflect.Uint16: - return Uint16DispatchId + return PrimitiveUint16DispatchId case reflect.Uint32: - return Uint32DispatchId + // Default to varint encoding (VAR_UINT32) for xlang compatibility + return PrimitiveVarUint32DispatchId case reflect.Uint64: - return Uint64DispatchId + // Default to varint encoding (VAR_UINT64) for xlang compatibility + return PrimitiveVarUint64DispatchId case reflect.Uint: - return UintDispatchId + return PrimitiveUintDispatchId case reflect.Float32: - return Float32DispatchId + return PrimitiveFloat32DispatchId case reflect.Float64: - return Float64DispatchId + return PrimitiveFloat64DispatchId case reflect.String: return StringDispatchId case reflect.Slice: @@ -362,7 +437,7 @@ func GetDispatchId(t reflect.Type) DispatchId { case reflect.String: return StringSliceDispatchId } - return UnknowDispatchId + return UnknownDispatchId case reflect.Map: // Check for specific common map types if t.Key().Kind() == reflect.String { @@ -385,9 +460,9 @@ func GetDispatchId(t reflect.Type) DispatchId { } else if t.Key().Kind() == reflect.Int && t.Elem().Kind() == reflect.Int { return IntIntMapDispatchId } - return UnknowDispatchId + return UnknownDispatchId default: - return UnknowDispatchId + return UnknownDispatchId } } @@ -403,41 +478,100 @@ func IsPrimitiveTypeId(typeId TypeId) bool { } } -// isFixedSizePrimitive returns true for non-nullable fixed-size primitives +// isFixedSizePrimitive returns true for fixed-size primitives and notnull pointer types. +// Includes INT32/UINT32/INT64/UINT64 (fixed encoding), NOT VARINT32/VAR_UINT32 etc. func isFixedSizePrimitive(staticId DispatchId, referencable bool) bool { - if referencable { + switch staticId { + case PrimitiveBoolDispatchId, PrimitiveInt8DispatchId, PrimitiveUint8DispatchId, + PrimitiveInt16DispatchId, PrimitiveUint16DispatchId, + PrimitiveInt32DispatchId, PrimitiveUint32DispatchId, + PrimitiveInt64DispatchId, PrimitiveUint64DispatchId, + PrimitiveFloat32DispatchId, PrimitiveFloat64DispatchId: + return !referencable + case NotnullBoolPtrDispatchId, NotnullInt8PtrDispatchId, NotnullUint8PtrDispatchId, + NotnullInt16PtrDispatchId, NotnullUint16PtrDispatchId, + NotnullInt32PtrDispatchId, NotnullUint32PtrDispatchId, + NotnullInt64PtrDispatchId, NotnullUint64PtrDispatchId, + NotnullFloat32PtrDispatchId, NotnullFloat64PtrDispatchId: + return true + default: return false } +} + +// isNullableFixedSizePrimitive returns true for nullable fixed-size primitive dispatch IDs. +// These are pointer types that use fixed encoding and have a ref flag. +func isNullableFixedSizePrimitive(staticId DispatchId) bool { switch staticId { - case BoolDispatchId, Int8DispatchId, Uint8DispatchId, Int16DispatchId, Uint16DispatchId, - Float32DispatchId, Float64DispatchId: + case NullableBoolDispatchId, NullableInt8DispatchId, NullableUint8DispatchId, + NullableInt16DispatchId, NullableUint16DispatchId, + NullableInt32DispatchId, NullableUint32DispatchId, + NullableInt64DispatchId, NullableUint64DispatchId, + NullableFloat32DispatchId, NullableFloat64DispatchId: return true default: return false } } -// isVarintPrimitive returns true for non-nullable varint primitives +// isNullableVarintPrimitive returns true for nullable varint primitive dispatch IDs. +// These are pointer types that use varint encoding and have a ref flag. +func isNullableVarintPrimitive(staticId DispatchId) bool { + switch staticId { + case NullableVarint32DispatchId, NullableVarint64DispatchId, + NullableVarUint32DispatchId, NullableVarUint64DispatchId, + NullableTaggedInt64DispatchId, NullableTaggedUint64DispatchId, + NullableIntDispatchId, NullableUintDispatchId: + return true + default: + return false + } +} + +// isVarintPrimitive returns true for varint primitives and notnull pointer types. +// Includes VARINT32/VAR_UINT32/VARINT64/VAR_UINT64 (variable encoding), NOT INT32/UINT32 etc. func isVarintPrimitive(staticId DispatchId, referencable bool) bool { - if referencable { + switch staticId { + case PrimitiveVarint32DispatchId, PrimitiveVarint64DispatchId, + PrimitiveVarUint32DispatchId, PrimitiveVarUint64DispatchId, + PrimitiveTaggedInt64DispatchId, PrimitiveTaggedUint64DispatchId, + PrimitiveIntDispatchId, PrimitiveUintDispatchId: + return !referencable + case NotnullVarint32PtrDispatchId, NotnullVarint64PtrDispatchId, + NotnullVarUint32PtrDispatchId, NotnullVarUint64PtrDispatchId, + NotnullTaggedInt64PtrDispatchId, NotnullTaggedUint64PtrDispatchId, + NotnullIntPtrDispatchId, NotnullUintPtrDispatchId: + return true + default: return false } +} + +// isPrimitiveDispatchId returns true if the staticId represents a primitive type +func isPrimitiveDispatchId(staticId DispatchId) bool { switch staticId { - case Int32DispatchId, Int64DispatchId, IntDispatchId, - Uint32DispatchId, Uint64DispatchId, UintDispatchId: + case PrimitiveBoolDispatchId, PrimitiveInt8DispatchId, PrimitiveInt16DispatchId, PrimitiveInt32DispatchId, + PrimitiveInt64DispatchId, PrimitiveIntDispatchId, PrimitiveUint8DispatchId, PrimitiveUint16DispatchId, + PrimitiveUint32DispatchId, PrimitiveUint64DispatchId, PrimitiveUintDispatchId, + PrimitiveFloat32DispatchId, PrimitiveFloat64DispatchId: return true default: return false } } -// isPrimitiveStaticId returns true if the staticId represents a primitive type -func isPrimitiveStaticId(staticId DispatchId) bool { +// isNotnullPtrDispatchId returns true if the staticId represents a notnull pointer type +func isNotnullPtrDispatchId(staticId DispatchId) bool { switch staticId { - case BoolDispatchId, Int8DispatchId, Int16DispatchId, Int32DispatchId, - Int64DispatchId, IntDispatchId, Uint8DispatchId, Uint16DispatchId, - Uint32DispatchId, Uint64DispatchId, UintDispatchId, - Float32DispatchId, Float64DispatchId: + case NotnullBoolPtrDispatchId, NotnullInt8PtrDispatchId, NotnullUint8PtrDispatchId, + NotnullInt16PtrDispatchId, NotnullUint16PtrDispatchId, + NotnullInt32PtrDispatchId, NotnullUint32PtrDispatchId, + NotnullInt64PtrDispatchId, NotnullUint64PtrDispatchId, + NotnullFloat32PtrDispatchId, NotnullFloat64PtrDispatchId, + NotnullVarint32PtrDispatchId, NotnullVarint64PtrDispatchId, + NotnullVarUint32PtrDispatchId, NotnullVarUint64PtrDispatchId, + NotnullTaggedInt64PtrDispatchId, NotnullTaggedUint64PtrDispatchId, + NotnullIntPtrDispatchId, NotnullUintPtrDispatchId: return true default: return false @@ -455,16 +589,120 @@ func isNumericKind(kind reflect.Kind) bool { } } +// GetDispatchIdFromTypeId converts a TypeId to a DispatchId based on nullability. +// This follows Java's DispatchId.xlangTypeIdToDispatchId pattern. +func GetDispatchIdFromTypeId(typeId TypeId, nullable bool) DispatchId { + if nullable { + // Nullable (nullable) types + switch typeId { + case BOOL: + return NullableBoolDispatchId + case INT8: + return NullableInt8DispatchId + case INT16: + return NullableInt16DispatchId + case INT32: + return NullableInt32DispatchId + case VARINT32: + return NullableVarint32DispatchId + case INT64: + return NullableInt64DispatchId + case VARINT64: + return NullableVarint64DispatchId + case TAGGED_INT64: + return NullableTaggedInt64DispatchId + case FLOAT32: + return NullableFloat32DispatchId + case FLOAT64: + return NullableFloat64DispatchId + case UINT8: + return NullableUint8DispatchId + case UINT16: + return NullableUint16DispatchId + case UINT32: + return NullableUint32DispatchId + case VAR_UINT32: + return NullableVarUint32DispatchId + case UINT64: + return NullableUint64DispatchId + case VAR_UINT64: + return NullableVarUint64DispatchId + case TAGGED_UINT64: + return NullableTaggedUint64DispatchId + case STRING: + return StringDispatchId + default: + return UnknownDispatchId + } + } else { + // Primitive (non-nullable) types + switch typeId { + case BOOL: + return PrimitiveBoolDispatchId + case INT8: + return PrimitiveInt8DispatchId + case INT16: + return PrimitiveInt16DispatchId + case INT32: + return PrimitiveInt32DispatchId + case VARINT32: + return PrimitiveVarint32DispatchId + case INT64: + return PrimitiveInt64DispatchId + case VARINT64: + return PrimitiveVarint64DispatchId + case TAGGED_INT64: + return PrimitiveTaggedInt64DispatchId + case FLOAT32: + return PrimitiveFloat32DispatchId + case FLOAT64: + return PrimitiveFloat64DispatchId + case UINT8: + return PrimitiveUint8DispatchId + case UINT16: + return PrimitiveUint16DispatchId + case UINT32: + return PrimitiveUint32DispatchId + case VAR_UINT32: + return PrimitiveVarUint32DispatchId + case UINT64: + return PrimitiveUint64DispatchId + case VAR_UINT64: + return PrimitiveVarUint64DispatchId + case TAGGED_UINT64: + return PrimitiveTaggedUint64DispatchId + case STRING: + return StringDispatchId + default: + return UnknownDispatchId + } + } +} + +// IsPrimitiveDispatchId returns true if the dispatch ID is for a primitive (non-nullable) type +func IsPrimitiveDispatchId(id DispatchId) bool { + return id >= PrimitiveBoolDispatchId && id <= PrimitiveUintDispatchId +} + +// IsNullablePrimitiveDispatchId returns true if the dispatch ID is for a nullable primitive type +func IsNullablePrimitiveDispatchId(id DispatchId) bool { + return id >= NullableBoolDispatchId && id <= NullableUintDispatchId +} + // getFixedSizeByDispatchId returns byte size for fixed primitives (0 if not fixed) func getFixedSizeByDispatchId(staticId DispatchId) int { switch staticId { - case BoolDispatchId, Int8DispatchId, Uint8DispatchId: + case PrimitiveBoolDispatchId, PrimitiveInt8DispatchId, PrimitiveUint8DispatchId, + NotnullBoolPtrDispatchId, NotnullInt8PtrDispatchId, NotnullUint8PtrDispatchId: return 1 - case Int16DispatchId, Uint16DispatchId: + case PrimitiveInt16DispatchId, PrimitiveUint16DispatchId, + NotnullInt16PtrDispatchId, NotnullUint16PtrDispatchId: return 2 - case Float32DispatchId: + case PrimitiveInt32DispatchId, PrimitiveUint32DispatchId, PrimitiveFloat32DispatchId, + NotnullInt32PtrDispatchId, NotnullUint32PtrDispatchId, NotnullFloat32PtrDispatchId: return 4 - case Float64DispatchId: + case PrimitiveInt64DispatchId, PrimitiveUint64DispatchId, PrimitiveFloat64DispatchId, + NotnullInt64PtrDispatchId, NotnullUint64PtrDispatchId, NotnullFloat64PtrDispatchId: return 8 default: return 0 @@ -474,10 +712,142 @@ func getFixedSizeByDispatchId(staticId DispatchId) int { // getVarintMaxSizeByDispatchId returns max byte size for varint primitives (0 if not varint) func getVarintMaxSizeByDispatchId(staticId DispatchId) int { switch staticId { - case Int32DispatchId, Uint32DispatchId: + case PrimitiveVarint32DispatchId, PrimitiveVarUint32DispatchId, + NotnullVarint32PtrDispatchId, NotnullVarUint32PtrDispatchId: + return 5 + case PrimitiveVarint64DispatchId, PrimitiveVarUint64DispatchId, PrimitiveIntDispatchId, PrimitiveUintDispatchId, + NotnullVarint64PtrDispatchId, NotnullVarUint64PtrDispatchId, NotnullIntPtrDispatchId, NotnullUintPtrDispatchId: + return 10 + case PrimitiveTaggedInt64DispatchId, PrimitiveTaggedUint64DispatchId, + NotnullTaggedInt64PtrDispatchId, NotnullTaggedUint64PtrDispatchId: + return 9 + default: + return 0 + } +} + +// getEncodingFromTypeId returns the encoding string ("fixed", "varint", "tagged") from a TypeId. +func getEncodingFromTypeId(typeId TypeId) string { + internalId := typeId & 0xFF + switch TypeId(internalId) { + case INT32, INT64, UINT32, UINT64: + return "fixed" + case VARINT32, VARINT64, VAR_UINT32, VAR_UINT64: + return "varint" + case TAGGED_INT64, TAGGED_UINT64: + return "tagged" + default: + return "varint" // default encoding + } +} + +// GetNotnullPtrDispatchId returns the NotnullXxxPtrDispatchId for a pointer-to-numeric type. +// elemKind is the kind of the element type (e.g., reflect.Uint8 for *uint8). +// encoding specifies the encoding type (fixed, varint, tagged) for int32/int64/uint32/uint64. +func GetNotnullPtrDispatchId(elemKind reflect.Kind, encoding string) DispatchId { + switch elemKind { + case reflect.Bool: + return NotnullBoolPtrDispatchId + case reflect.Int8: + return NotnullInt8PtrDispatchId + case reflect.Int16: + return NotnullInt16PtrDispatchId + case reflect.Int32: + if encoding == "fixed" { + return NotnullInt32PtrDispatchId + } + return NotnullVarint32PtrDispatchId + case reflect.Int64: + if encoding == "fixed" { + return NotnullInt64PtrDispatchId + } else if encoding == "tagged" { + return NotnullTaggedInt64PtrDispatchId + } + return NotnullVarint64PtrDispatchId + case reflect.Int: + return NotnullIntPtrDispatchId + case reflect.Uint8: + return NotnullUint8PtrDispatchId + case reflect.Uint16: + return NotnullUint16PtrDispatchId + case reflect.Uint32: + if encoding == "fixed" { + return NotnullUint32PtrDispatchId + } + return NotnullVarUint32PtrDispatchId + case reflect.Uint64: + if encoding == "fixed" { + return NotnullUint64PtrDispatchId + } else if encoding == "tagged" { + return NotnullTaggedUint64PtrDispatchId + } + return NotnullVarUint64PtrDispatchId + case reflect.Uint: + return NotnullUintPtrDispatchId + case reflect.Float32: + return NotnullFloat32PtrDispatchId + case reflect.Float64: + return NotnullFloat64PtrDispatchId + default: + return UnknownDispatchId + } +} + +// isPrimitiveFixedDispatchId returns true if the dispatch ID is for a non-nullable fixed-size primitive. +// Note: int32/int64/uint32/uint64 are NOT included here because they default to varint encoding. +// Only types that are always fixed-size are included (bool, int8/uint8, int16/uint16, float32/float64). +// Fixed int32/int64/uint32/uint64 encodings (INT32, INT64, UINT32, UINT64) use their specific dispatch IDs. +func isPrimitiveFixedDispatchId(id DispatchId) bool { + switch id { + case PrimitiveBoolDispatchId, PrimitiveInt8DispatchId, PrimitiveUint8DispatchId, + PrimitiveInt16DispatchId, PrimitiveUint16DispatchId, + // Fixed-size int32/int64/uint32/uint64 - only when explicitly specified via TypeId + PrimitiveInt32DispatchId, PrimitiveUint32DispatchId, + PrimitiveInt64DispatchId, PrimitiveUint64DispatchId, + PrimitiveFloat32DispatchId, PrimitiveFloat64DispatchId: + return true + default: + return false + } +} + +// getFixedSizeByPrimitiveDispatchId returns byte size for fixed primitives based on dispatch ID +func getFixedSizeByPrimitiveDispatchId(id DispatchId) int { + switch id { + case PrimitiveBoolDispatchId, PrimitiveInt8DispatchId, PrimitiveUint8DispatchId: + return 1 + case PrimitiveInt16DispatchId, PrimitiveUint16DispatchId: + return 2 + case PrimitiveInt32DispatchId, PrimitiveUint32DispatchId, PrimitiveFloat32DispatchId: + return 4 + case PrimitiveInt64DispatchId, PrimitiveUint64DispatchId, PrimitiveFloat64DispatchId: + return 8 + default: + return 0 + } +} + +// isPrimitiveVarintDispatchId returns true if the dispatch ID is for a non-nullable varint primitive +func isPrimitiveVarintDispatchId(id DispatchId) bool { + switch id { + case PrimitiveVarint32DispatchId, PrimitiveVarint64DispatchId, PrimitiveTaggedInt64DispatchId, + PrimitiveVarUint32DispatchId, PrimitiveVarUint64DispatchId, PrimitiveTaggedUint64DispatchId, + PrimitiveIntDispatchId, PrimitiveUintDispatchId: + return true + default: + return false + } +} + +// getVarintMaxSizeByPrimitiveDispatchId returns max byte size for varint primitives based on dispatch ID +func getVarintMaxSizeByPrimitiveDispatchId(id DispatchId) int { + switch id { + case PrimitiveVarint32DispatchId, PrimitiveVarUint32DispatchId: return 5 - case Int64DispatchId, IntDispatchId, Uint64DispatchId, UintDispatchId: + case PrimitiveVarint64DispatchId, PrimitiveVarUint64DispatchId, PrimitiveIntDispatchId, PrimitiveUintDispatchId: return 10 + case PrimitiveTaggedInt64DispatchId, PrimitiveTaggedUint64DispatchId: + return 12 // 4 byte tag + 8 byte value default: return 0 } diff --git a/go/fory/writer.go b/go/fory/writer.go index f11c74cb16..9449a47d47 100644 --- a/go/fory/writer.go +++ b/go/fory/writer.go @@ -183,25 +183,25 @@ func (c *WriteContext) WriteTypeId(id TypeId) { // writeFast writes a value using fast path based on DispatchId func (c *WriteContext) writeFast(ptr unsafe.Pointer, ct DispatchId) { switch ct { - case BoolDispatchId: + case PrimitiveBoolDispatchId: c.buffer.WriteBool(*(*bool)(ptr)) - case Int8DispatchId: + case PrimitiveInt8DispatchId: c.buffer.WriteByte_(*(*byte)(ptr)) - case Int16DispatchId: + case PrimitiveInt16DispatchId: c.buffer.WriteInt16(*(*int16)(ptr)) - case Int32DispatchId: + case PrimitiveInt32DispatchId: c.buffer.WriteVarint32(*(*int32)(ptr)) - case IntDispatchId: + case PrimitiveIntDispatchId: if strconv.IntSize == 64 { c.buffer.WriteVarint64(int64(*(*int)(ptr))) } else { c.buffer.WriteVarint32(int32(*(*int)(ptr))) } - case Int64DispatchId: + case PrimitiveInt64DispatchId: c.buffer.WriteVarint64(*(*int64)(ptr)) - case Float32DispatchId: + case PrimitiveFloat32DispatchId: c.buffer.WriteFloat32(*(*float32)(ptr)) - case Float64DispatchId: + case PrimitiveFloat64DispatchId: c.buffer.WriteFloat64(*(*float64)(ptr)) case StringDispatchId: writeString(c.buffer, *(*string)(ptr)) diff --git a/java/fory-core/src/main/java/org/apache/fory/builder/CodecBuilder.java b/java/fory-core/src/main/java/org/apache/fory/builder/CodecBuilder.java index 1faedf678d..cce3953c5e 100644 --- a/java/fory-core/src/main/java/org/apache/fory/builder/CodecBuilder.java +++ b/java/fory-core/src/main/java/org/apache/fory/builder/CodecBuilder.java @@ -373,15 +373,21 @@ protected Expression setFieldValue(Expression bean, Descriptor d, Expression val if (!d.isFinalField() && Modifier.isPublic(d.getModifiers()) && Modifier.isPublic(d.getRawType().getModifiers())) { + if (!d.getRawType().isAssignableFrom(value.type().getRawType())) { + value = tryInlineCast(value, d.getTypeRef()); + } return new Expression.SetField(bean, fieldName, value); } else if (d.getWriteMethod() != null && Modifier.isPublic(d.getWriteMethod().getModifiers())) { + if (!d.getRawType().isAssignableFrom(value.type().getRawType())) { + value = tryInlineCast(value, d.getTypeRef()); + } return new Invoke(bean, d.getWriteMethod().getName(), value); } else { if (!d.isFinalField() && !Modifier.isPrivate(d.getModifiers())) { if (AccessorHelper.defineSetter(d.getField())) { Class accessorClass = AccessorHelper.getAccessorClass(d.getField()); - if (!value.type().equals(d.getTypeRef())) { - value = new Cast(value, d.getTypeRef()); + if (!d.getRawType().isAssignableFrom(value.type().getRawType())) { + value = tryInlineCast(value, d.getTypeRef()); } return new StaticInvoke( accessorClass, d.getName(), PRIMITIVE_VOID_TYPE, false, bean, value); @@ -390,8 +396,8 @@ protected Expression setFieldValue(Expression bean, Descriptor d, Expression val if (d.getWriteMethod() != null && !Modifier.isPrivate(d.getWriteMethod().getModifiers())) { if (AccessorHelper.defineSetter(d.getWriteMethod())) { Class accessorClass = AccessorHelper.getAccessorClass(d.getWriteMethod()); - if (!value.type().equals(d.getTypeRef())) { - value = new Cast(value, d.getTypeRef()); + if (!d.getRawType().isAssignableFrom(value.type().getRawType())) { + value = tryInlineCast(value, d.getTypeRef()); } return new StaticInvoke( accessorClass, d.getWriteMethod().getName(), PRIMITIVE_VOID_TYPE, false, bean, value); @@ -473,6 +479,7 @@ protected Reference getOrCreateField( boolean isStatic, Class type, String fieldName, Supplier value) { Reference fieldRef = fieldMap.get(fieldName); if (fieldRef == null) { + fieldName = ctx.newName(fieldName); ctx.addField(isStatic, true, ctx.type(type), fieldName, value.get()); fieldRef = new Reference(fieldName, TypeRef.of(type)); fieldMap.put(fieldName, fieldRef); diff --git a/java/fory-core/src/main/java/org/apache/fory/memory/MemoryBuffer.java b/java/fory-core/src/main/java/org/apache/fory/memory/MemoryBuffer.java index 3292eebf0a..9ce16158e2 100644 --- a/java/fory-core/src/main/java/org/apache/fory/memory/MemoryBuffer.java +++ b/java/fory-core/src/main/java/org/apache/fory/memory/MemoryBuffer.java @@ -774,7 +774,10 @@ public int writeVarUint32(int v) { // generated code is smaller. Otherwise, `MapRefResolver.writeRefOrNull` // may be `callee is too large`/`already compiled into a big method` ensure(writerIndex + 8); - int varintBytes = _unsafePutVarUint36Small(writerIndex, v); + // Use Integer.toUnsignedLong to handle values > INT32_MAX correctly + // Without this, negative int values would be sign-extended to long, + // causing incorrect varint encoding (9+ bytes instead of 5) + int varintBytes = _unsafePutVarUint36Small(writerIndex, Integer.toUnsignedLong(v)); writerIndex += varintBytes; return varintBytes; } @@ -786,7 +789,8 @@ public int writeVarUint32(int v) { // CHECKSTYLE.OFF:MethodName public int _unsafeWriteVarUint32(int v) { // CHECKSTYLE.ON:MethodName - int varintBytes = _unsafePutVarUint36Small(writerIndex, v); + // Use Integer.toUnsignedLong to handle values > INT32_MAX correctly + int varintBytes = _unsafePutVarUint36Small(writerIndex, Integer.toUnsignedLong(v)); writerIndex += varintBytes; return varintBytes; } @@ -820,6 +824,7 @@ private int continueWriteVarUint32Small7(int value) { /** * Caller must ensure there must be at least 8 bytes for writing, otherwise the crash may occur. + * Don't pass int value to avoid sign extension. */ // CHECKSTYLE.OFF:MethodName public int _unsafePutVarUint36Small(int index, long value) { diff --git a/java/fory-core/src/main/java/org/apache/fory/meta/FieldInfo.java b/java/fory-core/src/main/java/org/apache/fory/meta/FieldInfo.java index 1b011caa46..076b9d7cf6 100644 --- a/java/fory-core/src/main/java/org/apache/fory/meta/FieldInfo.java +++ b/java/fory-core/src/main/java/org/apache/fory/meta/FieldInfo.java @@ -29,6 +29,7 @@ import org.apache.fory.serializer.converter.FieldConverters; import org.apache.fory.type.Descriptor; import org.apache.fory.type.DescriptorBuilder; +import org.apache.fory.type.Types; /** * FieldInfo contains all necessary info of a field to execute serialization/deserialization logic. @@ -89,9 +90,11 @@ public FieldTypes.FieldType getFieldType() { Descriptor toDescriptor(TypeResolver resolver, Descriptor descriptor) { TypeRef declared = descriptor != null ? descriptor.getTypeRef() : null; TypeRef typeRef = fieldType.toTypeToken(resolver, declared); - String typeName = typeRef.getType().getTypeName(); + String typeName = fieldType.getTypeName(resolver, typeRef); if (fieldType instanceof FieldTypes.RegisteredFieldType) { - typeName = String.valueOf(((FieldTypes.RegisteredFieldType) fieldType).getClassId()); + if (!Types.isPrimitiveType(fieldType.xtypeId)) { + typeName = String.valueOf(((FieldTypes.RegisteredFieldType) fieldType).getClassId()); + } } // Get nullable and trackingRef from remote FieldType - these are what the remote peer // used when serializing, so we must respect them when deserializing diff --git a/java/fory-core/src/main/java/org/apache/fory/meta/FieldTypes.java b/java/fory-core/src/main/java/org/apache/fory/meta/FieldTypes.java index 3dde5484e6..8753a3fd22 100644 --- a/java/fory-core/src/main/java/org/apache/fory/meta/FieldTypes.java +++ b/java/fory-core/src/main/java/org/apache/fory/meta/FieldTypes.java @@ -50,6 +50,7 @@ import org.apache.fory.type.GenericType; import org.apache.fory.type.TypeUtils; import org.apache.fory.type.Types; +import org.apache.fory.type.union.Union; import org.apache.fory.util.Preconditions; public class FieldTypes { @@ -154,6 +155,8 @@ private static FieldType buildFieldType( genericType.getTypeParameter1() == null ? GenericType.build(Object.class) : genericType.getTypeParameter1())); + } else if (Union.class.isAssignableFrom(rawType)) { + return new UnionFieldType(nullable, trackingRef); } else if (TypeUtils.unwrap(rawType).isPrimitive()) { // unified basic types for xlang and native mode return new RegisteredFieldType(nullable, trackingRef, xtypeId); @@ -219,6 +222,10 @@ public boolean nullable() { */ public abstract TypeRef toTypeToken(TypeResolver classResolver, TypeRef declared); + public String getTypeName(TypeResolver resolver, TypeRef typeRef) { + return typeRef.getType().getTypeName(); + } + @Override public boolean equals(Object o) { if (this == o) { @@ -443,6 +450,24 @@ public TypeRef toTypeToken(TypeResolver resolver, TypeRef declared) { return TypeRef.of(cls, new TypeExtMeta(classId, nullable, trackingRef)); } + @Override + public String getTypeName(TypeResolver resolver, TypeRef typeRef) { + // Some registered class may not be registered on peer class, we always use + // registered id to keep consistent order. + // Note that this is only used for fields sort in native mode. + // For xlang mode, we always sort fields by type id in + if (resolver instanceof ClassResolver) { + ClassResolver classResolver = (ClassResolver) resolver; + // Peer class may not register this class id, which will introduce inconsistent field order + if (classResolver.isInternalRegistered(classId)) { + return String.valueOf(classId); + } else { + return "Registered"; + } + } + return String.valueOf(classId); + } + @Override public boolean equals(Object o) { if (this == o) { @@ -688,6 +713,11 @@ public TypeRef toTypeToken(TypeResolver classResolver, TypeRef declared) { return TypeRef.of(NonexistentClass.NonexistentEnum.class); } + @Override + public String getTypeName(TypeResolver resolver, TypeRef typeRef) { + return "Enum"; + } + @Override public String toString() { return "EnumFieldType{" + "xtypeId=" + xtypeId + ", nullable=" + nullable + '}'; @@ -698,10 +728,6 @@ public static class ArrayFieldType extends FieldType { private final FieldType componentType; private final int dimensions; - public ArrayFieldType(boolean trackingRef, FieldType componentType, int dimensions) { - this(-1, true, trackingRef, componentType, dimensions); - } - public ArrayFieldType( int xtypeId, boolean nullable, @@ -732,6 +758,14 @@ public TypeRef toTypeToken(TypeResolver classResolver, TypeRef declared) { } } + @Override + public String getTypeName(TypeResolver resolver, TypeRef typeRef) { + // For native mode, this return same `Array` type to ensure consistent order even some array type + // is not exist on current deserialization process. + // For primitive/registered array, it goes to RegisteredFieldType. + return "Array"; + } + public int getDimensions() { return dimensions; } @@ -784,7 +818,15 @@ public ObjectFieldType(int xtypeId, boolean nullable, boolean trackingRef) { @Override public TypeRef toTypeToken(TypeResolver classResolver, TypeRef declared) { - return TypeRef.of(Object.class, new TypeExtMeta(xtypeId, nullable, trackingRef)); + Class clz = declared == null ? Object.class : declared.getRawType(); + return TypeRef.of(clz, new TypeExtMeta(xtypeId, nullable, trackingRef)); + } + + @Override + public String getTypeName(TypeResolver resolver, TypeRef typeRef) { + // When fields not exist on deserializing struct, we can't know its actual field type, + // sort based on actual type name will incur inconsistent fields order + return "Object"; } @Override diff --git a/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java b/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java index 4d7cc17112..bba1fed804 100644 --- a/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java +++ b/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java @@ -722,7 +722,7 @@ public boolean isMonomorphic(Class clz) { if (Union.class.isAssignableFrom(clz)) { return true; } - return (isInnerClass(clz) || clz.isEnum()); + return (isInternalRegistered(clz) || clz.isEnum()); } return ReflectionUtils.isMonomorphic(clz); } @@ -731,8 +731,13 @@ public boolean isBuildIn(Descriptor descriptor) { return isMonomorphic(descriptor); } + public boolean isInternalRegistered(int classId) { + return classId != NO_CLASS_ID && classId < innerEndClassId; + } + + /** Returns true if cls is fory inner registered class. */ - boolean isInnerClass(Class cls) { + public boolean isInternalRegistered(Class cls) { Short classId = extRegistry.registeredClassIdMap.get(cls); if (classId == null) { ClassInfo classInfo = getClassInfo(cls, false); diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/AbstractObjectSerializer.java b/java/fory-core/src/main/java/org/apache/fory/serializer/AbstractObjectSerializer.java index 2ab84d1c26..6f7dd454e6 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/AbstractObjectSerializer.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/AbstractObjectSerializer.java @@ -823,6 +823,8 @@ static boolean readBasicObjectFieldValue( return true; // let common path handle this. } // add time types serialization here. + // Handle both primitive and nullable dispatchIds for schema compatible mode + // where Java field is boxed but ClassDef says non-nullable (primitive encoding) switch (dispatchId) { case DispatchId.STRING: // fastpath for string. if (fory.getStringSerializer().needToWriteRef()) { @@ -831,49 +833,67 @@ static boolean readBasicObjectFieldValue( fieldAccessor.putObject(targetObject, fory.readString(buffer)); } return false; + case DispatchId.PRIMITIVE_BOOL: case DispatchId.BOOL: fieldAccessor.putObject(targetObject, buffer.readBoolean()); return false; + case DispatchId.PRIMITIVE_INT8: + case DispatchId.PRIMITIVE_UINT8: case DispatchId.INT8: case DispatchId.UINT8: fieldAccessor.putObject(targetObject, buffer.readByte()); return false; + case DispatchId.PRIMITIVE_CHAR: case DispatchId.CHAR: fieldAccessor.putObject(targetObject, buffer.readChar()); return false; + case DispatchId.PRIMITIVE_INT16: + case DispatchId.PRIMITIVE_UINT16: case DispatchId.INT16: case DispatchId.UINT16: fieldAccessor.putObject(targetObject, buffer.readInt16()); return false; + case DispatchId.PRIMITIVE_INT32: + case DispatchId.PRIMITIVE_UINT32: case DispatchId.INT32: case DispatchId.UINT32: fieldAccessor.putObject(targetObject, buffer.readInt32()); return false; + case DispatchId.PRIMITIVE_VARINT32: case DispatchId.VARINT32: fieldAccessor.putObject(targetObject, buffer.readVarInt32()); return false; + case DispatchId.PRIMITIVE_VAR_UINT32: case DispatchId.VAR_UINT32: fieldAccessor.putObject(targetObject, buffer.readVarUint32()); return false; + case DispatchId.PRIMITIVE_INT64: + case DispatchId.PRIMITIVE_UINT64: case DispatchId.INT64: case DispatchId.UINT64: fieldAccessor.putObject(targetObject, buffer.readInt64()); return false; + case DispatchId.PRIMITIVE_VARINT64: case DispatchId.VARINT64: fieldAccessor.putObject(targetObject, buffer.readVarInt64()); return false; + case DispatchId.PRIMITIVE_TAGGED_INT64: case DispatchId.TAGGED_INT64: fieldAccessor.putObject(targetObject, buffer.readTaggedInt64()); return false; + case DispatchId.PRIMITIVE_VAR_UINT64: case DispatchId.VAR_UINT64: fieldAccessor.putObject(targetObject, buffer.readVarUint64()); return false; + case DispatchId.PRIMITIVE_TAGGED_UINT64: case DispatchId.TAGGED_UINT64: fieldAccessor.putObject(targetObject, buffer.readTaggedUint64()); return false; + case DispatchId.PRIMITIVE_FLOAT32: case DispatchId.FLOAT32: fieldAccessor.putObject(targetObject, buffer.readFloat32()); return false; + case DispatchId.PRIMITIVE_FLOAT64: case DispatchId.FLOAT64: fieldAccessor.putObject(targetObject, buffer.readFloat64()); return false; diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/MetaSharedSerializer.java b/java/fory-core/src/main/java/org/apache/fory/serializer/MetaSharedSerializer.java index 44b34889aa..4ebf038957 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/MetaSharedSerializer.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/MetaSharedSerializer.java @@ -42,6 +42,7 @@ import org.apache.fory.type.DescriptorGrouper; import org.apache.fory.type.DispatchId; import org.apache.fory.type.Generics; +import org.apache.fory.type.Types; import org.apache.fory.util.DefaultValueUtils; import org.apache.fory.util.GraalvmSupport; import org.apache.fory.util.Preconditions; @@ -100,11 +101,12 @@ public MetaSharedSerializer(Fory fory, Class type, ClassDef classDef) { "========== MetaSharedSerializer sorted descriptors for {} ==========", type.getName()); for (Descriptor d : descriptorGrouper.getSortedDescriptors()) { LOG.info( - " {} -> {}, ref {}, nullable {}", + " {} -> {}, ref {}, nullable {}, type id {}", d.getName(), d.getTypeName(), d.isTrackingRef(), - d.isNullable()); + d.isNullable(), + Types.getDescriptorTypeId(fory, d)); } } // d.getField() may be null if not exists in this class when meta share enabled. @@ -165,6 +167,18 @@ public void xwrite(MemoryBuffer buffer, T value) { @Override public T read(MemoryBuffer buffer) { + if (Utils.debugOutputEnabled()) { + LOG.info( + "========== MetaSharedSerializer.read() for {} ==========", type.getName()); + LOG.info("Buffer readerIndex at start: {}", buffer.readerIndex()); + LOG.info("buildInFields count: {}", buildInFields.length); + for (int i = 0; i < buildInFields.length; i++) { + SerializationFieldInfo fi = buildInFields[i]; + LOG.info( + " buildInField[{}]: name={}, dispatchId={}, nullable={}, isPrimitive={}, hasAccessor={}", + i, fi.qualifiedFieldName, fi.dispatchId, fi.nullable, fi.isPrimitive, fi.fieldAccessor != null); + } + } if (isRecord) { Object[] fieldValues = new Object[buildInFields.length + otherFields.length + containerFields.length]; @@ -184,6 +198,25 @@ public T read(MemoryBuffer buffer) { for (SerializationFieldInfo fieldInfo : this.buildInFields) { FieldAccessor fieldAccessor = fieldInfo.fieldAccessor; boolean nullable = fieldInfo.nullable; + if (Utils.debugOutputEnabled()) { + LOG.info( + "[Java] About to read field: name={}, dispatchId={}, nullable={}, isPrimitive={}, bufferPos={}", + fieldInfo.qualifiedFieldName, fieldInfo.dispatchId, nullable, fieldInfo.isPrimitive, buffer.readerIndex()); + // Print next 16 bytes from buffer for debugging + int pos = buffer.readerIndex(); + int remaining = Math.min(16, buffer.size() - pos); + if (remaining > 0) { + byte[] peek = new byte[remaining]; + for (int i = 0; i < remaining; i++) { + peek[i] = buffer.getByte(pos + i); + } + StringBuilder hex = new StringBuilder(); + for (byte b : peek) { + hex.append(String.format("%02x", b)); + } + LOG.info("[Java] Next {} bytes at pos {}: {}", remaining, pos, hex.toString()); + } + } if (fieldAccessor != null) { int dispatchId = fieldInfo.dispatchId; boolean needRead = true; diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/struct/Fingerprint.java b/java/fory-core/src/main/java/org/apache/fory/serializer/struct/Fingerprint.java index 4da42be9ee..d852323cd1 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/struct/Fingerprint.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/struct/Fingerprint.java @@ -70,7 +70,7 @@ public static String computeStructFingerprint(Fory fory, List descri List fieldInfos = new ArrayList<>(descriptors.size()); for (Descriptor descriptor : descriptors) { Class rawType = descriptor.getTypeRef().getRawType(); - int typeId = getTypeId(fory, rawType); + int typeId = getTypeId(fory, descriptor); // Get field identifier: tag ID if configured, otherwise snake_case name String fieldIdentifier; @@ -132,7 +132,8 @@ public static String computeStructFingerprint(Fory fory, List descri return builder.toString(); } - private static int getTypeId(Fory fory, Class cls) { + private static int getTypeId(Fory fory, Descriptor descriptor) { + Class cls = descriptor.getTypeRef().getRawType(); TypeResolver resolver = fory._getTypeResolver(); if (resolver.isSet(cls)) { return Types.SET; @@ -148,14 +149,9 @@ private static int getTypeId(Fory fory, Class cls) { if (classInfo == null) { return Types.UNKNOWN; } - int typeId; - if (fory.isCrossLanguage()) { - typeId = classInfo.getXtypeId(); - if (Types.isUserDefinedType((byte) typeId)) { - return Types.UNKNOWN; - } - } else { - typeId = classInfo.getClassId(); + int typeId = Types.getDescriptorTypeId(fory, descriptor); + if (Types.isUserDefinedType((byte) (typeId & 0xff))) { + return Types.UNKNOWN; } return typeId; } diff --git a/java/fory-core/src/test/java/org/apache/fory/serializer/CompatibleSerializerTest.java b/java/fory-core/src/test/java/org/apache/fory/serializer/CompatibleSerializerTest.java index a16160ead5..39c305019f 100644 --- a/java/fory-core/src/test/java/org/apache/fory/serializer/CompatibleSerializerTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/serializer/CompatibleSerializerTest.java @@ -160,14 +160,15 @@ public void testWriteNestedMap() throws Exception { Assert.assertEquals(o1, o2); } - @Test - public void testWriteCompatibleContainer() throws Exception { + @Test(dataProvider = "enableCodegen") + public void testWriteCompatibleContainer(boolean enableCodegen) throws Exception { Fory fory = Fory.builder() .withLanguage(Language.JAVA) .withRefTracking(true) .withCompatibleMode(CompatibleMode.COMPATIBLE) .requireClassRegistration(false) + .withCodegen(enableCodegen) .build(); BeanA beanA = BeanA.createBeanA(2); Class cls = ClassUtils.createCompatibleClass1(); diff --git a/java/fory-core/src/test/java/org/apache/fory/xlang/GoXlangTest.java b/java/fory-core/src/test/java/org/apache/fory/xlang/GoXlangTest.java index bd351ddd7b..5a4a51e780 100644 --- a/java/fory-core/src/test/java/org/apache/fory/xlang/GoXlangTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/xlang/GoXlangTest.java @@ -49,7 +49,7 @@ public class GoXlangTest extends XlangTestBase { protected void ensurePeerReady() { String enabled = System.getenv("FORY_GO_JAVA_CI"); if (!"1".equals(enabled)) { - throw new SkipException("Skipping GoXlangTest: FORY_GO_JAVA_CI not set to 1"); +// throw new SkipException("Skipping GoXlangTest: FORY_GO_JAVA_CI not set to 1"); } boolean goInstalled = true; try { @@ -458,4 +458,19 @@ public void testCircularRefSchemaConsistent(boolean enableCodegen) throws java.i public void testCircularRefCompatible(boolean enableCodegen) throws java.io.IOException { super.testCircularRefCompatible(enableCodegen); } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaConsistent(boolean enableCodegen) throws java.io.IOException { + super.testUnsignedSchemaConsistent(enableCodegen); + } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaConsistentSimple(boolean enableCodegen) throws java.io.IOException { + super.testUnsignedSchemaConsistentSimple(enableCodegen); + } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaCompatible(boolean enableCodegen) throws java.io.IOException { + super.testUnsignedSchemaCompatible(enableCodegen); + } } diff --git a/java/fory-core/src/test/java/org/apache/fory/xlang/XlangTestBase.java b/java/fory-core/src/test/java/org/apache/fory/xlang/XlangTestBase.java index 7202868bfd..a5a4ddc684 100644 --- a/java/fory-core/src/test/java/org/apache/fory/xlang/XlangTestBase.java +++ b/java/fory-core/src/test/java/org/apache/fory/xlang/XlangTestBase.java @@ -2480,15 +2480,15 @@ private Object normalizeNulls(Object obj) { @Data static class UnsignedSchemaConsistent { // Primitive unsigned fields - @Uint8Type short u8; + @Uint8Type byte u8; - @Uint16Type int u16; + @Uint16Type short u16; @Uint32Type(compress = true) - long u32Var; + int u32Var; @Uint32Type(compress = false) - long u32Fixed; + int u32Fixed; @Uint64Type(encoding = LongEncoding.VARINT64) long u64Var; @@ -2502,19 +2502,19 @@ static class UnsignedSchemaConsistent { // Boxed nullable unsigned fields @ForyField(nullable = true) @Uint8Type - Short u8Nullable; + Byte u8Nullable; @ForyField(nullable = true) @Uint16Type - Integer u16Nullable; + Short u16Nullable; @ForyField(nullable = true) @Uint32Type(compress = true) - Long u32VarNullable; + Integer u32VarNullable; @ForyField(nullable = true) @Uint32Type(compress = false) - Long u32FixedNullable; + Integer u32FixedNullable; @ForyField(nullable = true) @Uint64Type(encoding = LongEncoding.VARINT64) @@ -2529,6 +2529,40 @@ static class UnsignedSchemaConsistent { Long u64TaggedNullable; } + @Data + static class UnsignedSchemaConsistentSimple { + @Uint64Type(encoding = LongEncoding.TAGGED_INT64) + long u64Tagged; + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.TAGGED_INT64) + Long u64TaggedNullable; + } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaConsistentSimple(boolean enableCodegen) throws java.io.IOException { + String caseName = "test_unsigned_schema_consistent_simple"; + Fory fory = + Fory.builder() + .withLanguage(Language.XLANG) + .withCompatibleMode(CompatibleMode.SCHEMA_CONSISTENT) + .withCodegen(enableCodegen) + .build(); + fory.register(UnsignedSchemaConsistentSimple.class, 1); + UnsignedSchemaConsistentSimple obj = new UnsignedSchemaConsistentSimple(); + obj.u64Tagged = 1000000000L; // Within tagged range + obj.u64TaggedNullable = 500000000L; // Within tagged range + // First verify Java serialization works + Assert.assertEquals(xserDe(fory, obj), obj); + + MemoryBuffer buffer = MemoryBuffer.newHeapBuffer(512); + fory.serialize(buffer, obj); + ExecutionContext ctx = prepareExecution(caseName, buffer.getBytes(0, buffer.writerIndex())); + runPeer(ctx); + MemoryBuffer buffer2 = readBuffer(ctx.dataFile()); + UnsignedSchemaConsistentSimple result = (UnsignedSchemaConsistentSimple) fory.deserialize(buffer2); + Assert.assertEquals(result, obj); + } + @Test(dataProvider = "enableCodegen") public void testUnsignedSchemaConsistent(boolean enableCodegen) throws java.io.IOException { String caseName = "test_unsigned_schema_consistent"; @@ -2542,19 +2576,19 @@ public void testUnsignedSchemaConsistent(boolean enableCodegen) throws java.io.I UnsignedSchemaConsistent obj = new UnsignedSchemaConsistent(); // Primitive fields - obj.u8 = 200; // Max uint8 range testing - obj.u16 = 60000; // Max uint16 range testing - obj.u32Var = 3000000000L; // > INT_MAX to test unsigned - obj.u32Fixed = 4000000000L; + obj.u8 = (byte) 200; // Max uint8 range testing + obj.u16 = (short) 60000; // Max uint16 range testing + obj.u32Var = (int) 3000000000L; // > INT_MAX to test unsigned + obj.u32Fixed = (int) 4000000000L; obj.u64Var = 10000000000L; obj.u64Fixed = 15000000000L; obj.u64Tagged = 1000000000L; // Within tagged range // Nullable boxed fields with values - obj.u8Nullable = (short) 128; - obj.u16Nullable = 40000; - obj.u32VarNullable = 2500000000L; - obj.u32FixedNullable = 3500000000L; + obj.u8Nullable = (byte) 128; + obj.u16Nullable = (short) 40000; + obj.u32VarNullable = (int) 2500000000L; + obj.u32FixedNullable = (int) 3500000000L; obj.u64VarNullable = 8000000000L; obj.u64FixedNullable = 12000000000L; obj.u64TaggedNullable = 500000000L; @@ -2565,14 +2599,30 @@ public void testUnsignedSchemaConsistent(boolean enableCodegen) throws java.io.I MemoryBuffer buffer = MemoryBuffer.newHeapBuffer(512); fory.serialize(buffer, obj); - ExecutionContext ctx = prepareExecution(caseName, buffer.getBytes(0, buffer.writerIndex())); + byte[] javaBytes = buffer.getBytes(0, buffer.writerIndex()); + System.out.printf("Java output size: %d bytes%n", javaBytes.length); + System.out.printf("Java output hex: %s%n", bytesToHex(javaBytes)); + + ExecutionContext ctx = prepareExecution(caseName, javaBytes); runPeer(ctx); MemoryBuffer buffer2 = readBuffer(ctx.dataFile()); + byte[] goBytes = buffer2.getBytes(0, buffer2.size()); + System.out.printf("Go output size: %d bytes%n", goBytes.length); + System.out.printf("Go output hex: %s%n", bytesToHex(goBytes)); + UnsignedSchemaConsistent result = (UnsignedSchemaConsistent) fory.deserialize(buffer2); Assert.assertEquals(result, obj); } + private static String bytesToHex(byte[] bytes) { + StringBuilder sb = new StringBuilder(); + for (byte b : bytes) { + sb.append(String.format("%02x", b)); + } + return sb.toString(); + } + /** * Test struct for unsigned number schema compatible tests (Java side). Group 1: non-nullable * primitive fields. Group 2: nullable boxed fields with "2" suffix. Other languages flip @@ -2581,15 +2631,15 @@ public void testUnsignedSchemaConsistent(boolean enableCodegen) throws java.io.I @Data static class UnsignedSchemaCompatible { // Group 1: Primitive unsigned fields (non-nullable in Java, Optional in other languages) - @Uint8Type short u8; + @Uint8Type byte u8; - @Uint16Type int u16; + @Uint16Type short u16; @Uint32Type(compress = true) - long u32Var; + int u32Var; @Uint32Type(compress = false) - long u32Fixed; + int u32Fixed; @Uint64Type(encoding = LongEncoding.VARINT64) long u64Var; @@ -2603,19 +2653,19 @@ static class UnsignedSchemaCompatible { // Group 2: Nullable boxed fields (nullable in Java, non-Optional in other languages) @ForyField(nullable = true) @Uint8Type - Short u8Field2; + Byte u8Field2; @ForyField(nullable = true) @Uint16Type - Integer u16Field2; + Short u16Field2; @ForyField(nullable = true) @Uint32Type(compress = true) - Long u32VarField2; + Integer u32VarField2; @ForyField(nullable = true) @Uint32Type(compress = false) - Long u32FixedField2; + Integer u32FixedField2; @ForyField(nullable = true) @Uint64Type(encoding = LongEncoding.VARINT64) @@ -2644,19 +2694,19 @@ public void testUnsignedSchemaCompatible(boolean enableCodegen) throws java.io.I UnsignedSchemaCompatible obj = new UnsignedSchemaCompatible(); // Primitive fields - obj.u8 = 200; - obj.u16 = 60000; - obj.u32Var = 3000000000L; - obj.u32Fixed = 4000000000L; + obj.u8 = (byte) 200; + obj.u16 = (short) 60000; + obj.u32Var = (int) 3000000000L; + obj.u32Fixed = (int) 4000000000L; obj.u64Var = 10000000000L; obj.u64Fixed = 15000000000L; obj.u64Tagged = 1000000000L; // Group 2 fields with values - obj.u8Field2 = (short) 128; - obj.u16Field2 = 40000; - obj.u32VarField2 = 2500000000L; - obj.u32FixedField2 = 3500000000L; + obj.u8Field2 = (byte) 128; + obj.u16Field2 = (short) 40000; + obj.u32VarField2 = (int) 2500000000L; + obj.u32FixedField2 = (int) 3500000000L; obj.u64VarField2 = 8000000000L; obj.u64FixedField2 = 12000000000L; obj.u64TaggedField2 = 500000000L; From 4b225ba80eca1e7f8d9f49be90e274c6d0946e84 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Fri, 9 Jan 2026 16:53:55 +0800 Subject: [PATCH 19/44] refactor go struct serializer --- go/fory/codegen/utils.go | 122 +- go/fory/field_info.go | 796 +++ go/fory/struct.go | 5520 +++++++---------- go/fory/struct_test.go | 71 + go/fory/tag.go | 328 +- go/fory/tag_test.go | 120 +- go/fory/tests/structs_fory_gen.go | 194 +- go/fory/type_def.go | 43 +- go/fory/type_resolver.go | 4 +- go/fory/types.go | 44 +- .../org/apache/fory/annotation/Int64Type.java | 8 +- .../apache/fory/annotation/Uint64Type.java | 12 +- .../java/org/apache/fory/config/Config.java | 2 +- .../org/apache/fory/config/ForyBuilder.java | 8 +- .../org/apache/fory/config/LongEncoding.java | 10 +- .../fory/serializer/PrimitiveSerializers.java | 20 +- .../apache/fory/type/TypeAnnotationUtils.java | 12 +- .../serializer/PrimitiveSerializersTest.java | 4 +- .../apache/fory/serializer/UnsignedTest.java | 36 +- .../org/apache/fory/xlang/XlangTestBase.java | 28 +- 20 files changed, 3514 insertions(+), 3868 deletions(-) create mode 100644 go/fory/field_info.go create mode 100644 go/fory/struct_test.go diff --git a/go/fory/codegen/utils.go b/go/fory/codegen/utils.go index a66a8e5fda..f562ccf1d7 100644 --- a/go/fory/codegen/utils.go +++ b/go/fory/codegen/utils.go @@ -33,6 +33,7 @@ type FieldInfo struct { Index int // Original field index in struct IsPrimitive bool // Whether it's a Fory primitive type IsPointer bool // Whether it's a pointer type + Nullable bool // Whether the field can be null (pointer types) TypeID string // Fory TypeID for sorting PrimitiveSize int // Size for primitive type sorting } @@ -138,8 +139,33 @@ func getTypeID(t types.Type) string { t = ptr.Elem() } - // Check slice types - if _, ok := t.(*types.Slice); ok { + // Check slice types - distinguish primitive arrays from generic lists + if slice, ok := t.(*types.Slice); ok { + elemType := slice.Elem() + // For pointer to primitive, unwrap the pointer + if ptr, ok := elemType.(*types.Pointer); ok { + elemType = ptr.Elem() + } + // Check if element is a primitive type (primitive arrays use specific typeIDs) + if basic, ok := elemType.Underlying().(*types.Basic); ok { + switch basic.Kind() { + case types.Bool: + return "BOOL_ARRAY" + case types.Int8: + return "INT8_ARRAY" + case types.Int16: + return "INT16_ARRAY" + case types.Int32: + return "INT32_ARRAY" + case types.Int, types.Int64: + return "INT64_ARRAY" + case types.Float32: + return "FLOAT32_ARRAY" + case types.Float64: + return "FLOAT64_ARRAY" + } + } + // Non-primitive slices use LIST return "LIST" } @@ -263,17 +289,36 @@ func getTypeIDValue(typeID string) int { case "FLOAT64": return int(fory.FLOAT64) case "STRING": - return int(fory.STRING) // 12 + return int(fory.STRING) // 9 + case "BINARY": + return int(fory.BINARY) // 10 + case "LIST": + return int(fory.LIST) // 20 + case "SET": + return int(fory.SET) // 21 + case "MAP": + return int(fory.MAP) // 22 case "TIMESTAMP": return int(fory.TIMESTAMP) // 25 case "LOCAL_DATE": return int(fory.LOCAL_DATE) // 26 case "NAMED_STRUCT": return int(fory.NAMED_STRUCT) // 17 - case "LIST": - return int(fory.LIST) // 21 - case "MAP": - return int(fory.MAP) // 23 + // Primitive array types + case "BOOL_ARRAY": + return int(fory.BOOL_ARRAY) // 39 + case "INT8_ARRAY": + return int(fory.INT8_ARRAY) // 40 + case "INT16_ARRAY": + return int(fory.INT16_ARRAY) // 41 + case "INT32_ARRAY": + return int(fory.INT32_ARRAY) // 42 + case "INT64_ARRAY": + return int(fory.INT64_ARRAY) // 43 + case "FLOAT32_ARRAY": + return int(fory.FLOAT32_ARRAY) // 49 + case "FLOAT64_ARRAY": + return int(fory.FLOAT64_ARRAY) // 50 default: return 999 // Unknown types sort last } @@ -329,14 +374,15 @@ func sortFields(fields []*FieldInfo) { return f1.SnakeName < f2.SnakeName case groupOtherInternalType: - // Other internal type fields: sort by type id then snake case field name + // Internal type fields (STRING, BINARY, LIST, SET, MAP): sort by type id then name only. + // Java does NOT sort by nullable flag for these types. if f1.TypeID != f2.TypeID { return getTypeIDValue(f1.TypeID) < getTypeIDValue(f2.TypeID) } return f1.SnakeName < f2.SnakeName - case groupList, groupSet, groupMap, groupOther: - // List/Set/Map/Other fields: sort by snake case field name only + case groupPrimitiveArray, groupOther: + // Primitive arrays and other fields: sort by snake case field name only return f1.SnakeName < f2.SnakeName default: @@ -347,13 +393,13 @@ func sortFields(fields []*FieldInfo) { } // Field group constants for sorting +// This matches reflection's field ordering in field_info.go: +// primitives → boxed → otherInternalType (STRING/BINARY/LIST/SET/MAP) → primitiveArray → other const ( groupPrimitive = 0 // primitive and nullable primitive fields - groupOtherInternalType = 1 // other internal type fields (string, timestamp, etc.) - groupList = 2 // list fields - groupSet = 3 // set fields - groupMap = 4 // map fields - groupOther = 5 // other fields + groupOtherInternalType = 1 // STRING, BINARY, LIST, SET, MAP (sorted by typeId, name) + groupPrimitiveArray = 2 // primitive arrays (BOOL_ARRAY, INT32_ARRAY, etc.) - sorted by name + groupOther = 3 // structs, enums, and unknown types - sorted by name ) // getFieldGroup categorizes a field into its sorting group @@ -366,38 +412,29 @@ func getFieldGroup(field *FieldInfo) int { return groupPrimitive } - // List fields - if typeID == "LIST" { - return groupList - } - - // Set fields - if typeID == "SET" { - return groupSet + // Primitive array fields - sorted by name only + primitiveArrayTypes := map[string]bool{ + "BOOL_ARRAY": true, + "INT8_ARRAY": true, + "INT16_ARRAY": true, + "INT32_ARRAY": true, + "INT64_ARRAY": true, + "FLOAT32_ARRAY": true, + "FLOAT64_ARRAY": true, } - - // Map fields - if typeID == "MAP" { - return groupMap + if primitiveArrayTypes[typeID] { + return groupPrimitiveArray } - // Other internal type fields - // These are fory internal types that are not primitives/lists/sets/maps - // Examples: STRING, TIMESTAMP, LOCAL_DATE, NAMED_STRUCT, etc. + // Internal types (STRING, BINARY, LIST, SET, MAP) - sorted by typeId, nullable, name + // These match reflection's category 1 in getFieldCategory internalTypes := map[string]bool{ - "STRING": true, - "TIMESTAMP": true, - "LOCAL_DATE": true, - "NAMED_STRUCT": true, - "STRUCT": true, - "BINARY": true, - "ENUM": true, - "NAMED_ENUM": true, - "EXT": true, - "NAMED_EXT": true, - "INTERFACE": true, // for interface{} types + "STRING": true, + "BINARY": true, + "LIST": true, + "SET": true, + "MAP": true, } - if internalTypes[typeID] { return groupOtherInternalType } @@ -448,6 +485,7 @@ func analyzeField(field *types.Var, index int) (*FieldInfo, error) { Index: index, IsPrimitive: isPrimitive, IsPointer: isPointer, + Nullable: isPointer, // Pointer types are nullable, slices/maps are non-nullable in xlang mode TypeID: typeID, PrimitiveSize: primitiveSize, }, nil diff --git a/go/fory/field_info.go b/go/fory/field_info.go new file mode 100644 index 0000000000..55da217f51 --- /dev/null +++ b/go/fory/field_info.go @@ -0,0 +1,796 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package fory + +import ( + "fmt" + "reflect" + "sort" + "strings" +) + +// FieldInfo stores field metadata computed ENTIRELY at init time. +// All flags and decisions are pre-computed to eliminate runtime checks. +type FieldInfo struct { + Name string + Offset uintptr + Type reflect.Type + DispatchId DispatchId + TypeId TypeId // Fory type ID for the serializer + Serializer Serializer + Nullable bool + FieldIndex int // -1 if field doesn't exist in current struct (for compatible mode) + FieldDef FieldDef // original FieldDef from remote TypeDef (for compatible mode skip) + + // Pre-computed sizes and offsets (for fixed primitives) + FixedSize int // 0 if not fixed-size, else 1/2/4/8 + WriteOffset int // Offset within fixed-fields buffer region (sum of preceding field sizes) + + // Pre-computed flags for serialization (computed at init time) + RefMode RefMode // ref mode for serializer.Write/Read + WriteType bool // whether to write type info (true for struct fields in compatible mode) + HasGenerics bool // whether element types are known from TypeDef (for container fields) + + // Tag-based configuration (from fory struct tags) + TagID int // -1 = use field name, >=0 = use tag ID + HasForyTag bool // Whether field has explicit fory tag + TagRefSet bool // Whether ref was explicitly set via fory tag + TagRef bool // The ref value from fory tag (only valid if TagRefSet is true) + TagNullableSet bool // Whether nullable was explicitly set via fory tag + TagNullable bool // The nullable value from fory tag (only valid if TagNullableSet is true) + + // Pre-computed type flags (computed at init time to avoid runtime reflection) + IsPtr bool // True if field.Type.Kind() == reflect.Ptr +} + +// FieldGroup holds categorized and sorted fields for optimized serialization. +// Fields are stored as values (not pointers) for better cache locality. +// Each field belongs to exactly one category: +// - FixedFields: non-nullable fixed-size primitives (bool, int8-64, uint8-64, float32/64) +// - VarintFields: non-nullable varint primitives (varint32/64, var_uint32/64, tagged_int64/uint64) +// - RemainingFields: all other fields (nullable primitives, strings, collections, structs, etc.) +type FieldGroup struct { + FixedFields []FieldInfo // Non-nullable fixed-size primitives + VarintFields []FieldInfo // Non-nullable varint primitives + RemainingFields []FieldInfo // All other fields + FixedSize int // Total bytes for fixed-size fields + MaxVarintSize int // Maximum bytes for varint fields +} + +// FieldCount returns the total number of fields across all categories. +func (g *FieldGroup) FieldCount() int { + return len(g.FixedFields) + len(g.VarintFields) + len(g.RemainingFields) +} + +// ForEachField iterates over all fields in serialization order (fixed, varint, remaining). +func (g *FieldGroup) ForEachField(fn func(*FieldInfo)) { + for i := range g.FixedFields { + fn(&g.FixedFields[i]) + } + for i := range g.VarintFields { + fn(&g.VarintFields[i]) + } + for i := range g.RemainingFields { + fn(&g.RemainingFields[i]) + } +} + +// DebugPrint prints field group information for debugging. +func (g *FieldGroup) DebugPrint(typeName string) { + if !DebugOutputEnabled() { + return + } + fmt.Printf("[Go] ========== Sorted fields for %s ==========\n", typeName) + fmt.Printf("[Go] Go sorted fixedFields (%d):\n", len(g.FixedFields)) + for i := range g.FixedFields { + f := &g.FixedFields[i] + fmt.Printf("[Go] [%d] %s -> dispatchId=%d, typeId=%d, size=%d, nullable=%v\n", + i, f.Name, f.DispatchId, f.TypeId, f.FixedSize, f.Nullable) + } + fmt.Printf("[Go] Go sorted varintFields (%d):\n", len(g.VarintFields)) + for i := range g.VarintFields { + f := &g.VarintFields[i] + fmt.Printf("[Go] [%d] %s -> dispatchId=%d, typeId=%d, nullable=%v\n", + i, f.Name, f.DispatchId, f.TypeId, f.Nullable) + } + fmt.Printf("[Go] Go sorted remainingFields (%d):\n", len(g.RemainingFields)) + for i := range g.RemainingFields { + f := &g.RemainingFields[i] + fmt.Printf("[Go] [%d] %s -> dispatchId=%d, typeId=%d, nullable=%v\n", + i, f.Name, f.DispatchId, f.TypeId, f.Nullable) + } + fmt.Printf("[Go] ===========================================\n") +} + +// GroupFields categorizes and sorts fields into FixedFields, VarintFields, and RemainingFields. +// It computes pre-computed sizes and WriteOffset for batch buffer reservation. +// Fields are sorted within each group to match Java's wire format order. +func GroupFields(fields []FieldInfo) FieldGroup { + var g FieldGroup + + // Categorize fields + for i := range fields { + field := &fields[i] + if isFixedSizePrimitive(field.DispatchId, field.Nullable) { + // Non-nullable fixed-size primitives only + field.FixedSize = getFixedSizeByDispatchId(field.DispatchId) + g.FixedFields = append(g.FixedFields, *field) + } else if isVarintPrimitive(field.DispatchId, field.Nullable) { + // Non-nullable varint primitives only + g.VarintFields = append(g.VarintFields, *field) + } else { + // All other fields including nullable primitives + g.RemainingFields = append(g.RemainingFields, *field) + } + } + + // Sort fixedFields: size desc, typeId desc, name asc + sort.SliceStable(g.FixedFields, func(i, j int) bool { + fi, fj := &g.FixedFields[i], &g.FixedFields[j] + if fi.FixedSize != fj.FixedSize { + return fi.FixedSize > fj.FixedSize // size descending + } + if fi.TypeId != fj.TypeId { + return fi.TypeId > fj.TypeId // typeId descending + } + return fi.Name < fj.Name // name ascending + }) + + // Compute WriteOffset after sorting + for i := range g.FixedFields { + g.FixedFields[i].WriteOffset = g.FixedSize + g.FixedSize += g.FixedFields[i].FixedSize + } + + // Sort varintFields: underlying type size desc, typeId desc, name asc + // Note: Java uses primitive type size (8 for long, 4 for int), not encoding max size + sort.SliceStable(g.VarintFields, func(i, j int) bool { + fi, fj := &g.VarintFields[i], &g.VarintFields[j] + sizeI := getUnderlyingTypeSize(fi.DispatchId) + sizeJ := getUnderlyingTypeSize(fj.DispatchId) + if sizeI != sizeJ { + return sizeI > sizeJ // size descending + } + if fi.TypeId != fj.TypeId { + return fi.TypeId > fj.TypeId // typeId descending + } + return fi.Name < fj.Name // name ascending + }) + + // Compute maxVarintSize + for i := range g.VarintFields { + g.MaxVarintSize += getVarintMaxSizeByDispatchId(g.VarintFields[i].DispatchId) + } + + // Sort remainingFields: nullable primitives first (by primitiveComparator), + // then other internal types (typeId, name), then lists, sets, maps, other (by name) + sort.SliceStable(g.RemainingFields, func(i, j int) bool { + fi, fj := &g.RemainingFields[i], &g.RemainingFields[j] + catI, catJ := getFieldCategory(fi), getFieldCategory(fj) + if catI != catJ { + return catI < catJ + } + // Within nullable primitives category, use primitiveComparator logic + if catI == 0 { + return comparePrimitiveFields(fi, fj) + } + // Within other internal types category (STRING, BINARY, LIST, SET, MAP), + // sort by typeId then by name only. Java does NOT sort by nullable flag here. + if catI == 1 { + if fi.TypeId != fj.TypeId { + return fi.TypeId < fj.TypeId + } + return fi.Name < fj.Name + } + // Other categories (struct, enum, etc.): sort by name only + return fi.Name < fj.Name + }) + + return g +} + +// fieldHasNonPrimitiveSerializer returns true if the field has a serializer with a non-primitive type ID. +// This is used to skip the fast path for fields like enums where DispatchId is int32 but the serializer +// writes a different format (e.g., unsigned varint for enum ordinals vs signed zigzag for int32). +func fieldHasNonPrimitiveSerializer(field *FieldInfo) bool { + if field.Serializer == nil { + return false + } + // ENUM (numeric ID), NAMED_ENUM (namespace/typename), NAMED_STRUCT, NAMED_COMPATIBLE_STRUCT, NAMED_EXT + // all require special serialization and should not use the primitive fast path + // Note: ENUM uses unsigned Varuint32Small7 for ordinals, not signed zigzag varint + // Use internal type ID (low 8 bits) since registered types have composite TypeIds like (userID << 8) | internalID + internalTypeId := TypeId(field.TypeId & 0xFF) + switch internalTypeId { + case ENUM, NAMED_ENUM, NAMED_STRUCT, NAMED_COMPATIBLE_STRUCT, NAMED_EXT: + return true + default: + return false + } +} + +// isEnumField checks if a field is an enum type based on its TypeId +func isEnumField(field *FieldInfo) bool { + if field.Serializer == nil { + return false + } + internalTypeId := field.TypeId & 0xFF + return internalTypeId == ENUM || internalTypeId == NAMED_ENUM +} + +// getFieldCategory returns the category for sorting remainingFields: +// 0: nullable primitives (sorted by primitiveComparator) +// 1: internal types STRING, BINARY, LIST, SET, MAP (sorted by typeId, then name) +// 2: struct, enum, and all other types (sorted by name only) +func getFieldCategory(field *FieldInfo) int { + if isNullableFixedSizePrimitive(field.DispatchId) || isNullableVarintPrimitive(field.DispatchId) { + return 0 + } + internalId := field.TypeId & 0xFF + switch TypeId(internalId) { + case STRING, BINARY, LIST, SET, MAP: + // Internal types: sorted by typeId, then name + return 1 + default: + // struct, enum, and all other types: sorted by name + return 2 + } +} + +// comparePrimitiveFields compares two nullable primitive fields using Java's primitiveComparator logic: +// fixed before varint, then underlying type size desc, typeId desc, name asc +func comparePrimitiveFields(fi, fj *FieldInfo) bool { + iFixed := isNullableFixedSizePrimitive(fi.DispatchId) + jFixed := isNullableFixedSizePrimitive(fj.DispatchId) + if iFixed != jFixed { + return iFixed // fixed before varint + } + // Same category: compare by underlying type size desc, typeId desc, name asc + // Note: Java uses primitive type size (8, 4, 2, 1), not encoding size + sizeI := getUnderlyingTypeSize(fi.DispatchId) + sizeJ := getUnderlyingTypeSize(fj.DispatchId) + if sizeI != sizeJ { + return sizeI > sizeJ // size descending + } + if fi.TypeId != fj.TypeId { + return fi.TypeId > fj.TypeId // typeId descending + } + return fi.Name < fj.Name // name ascending +} + +// getNullableFixedSize returns the fixed size for nullable fixed primitives +func getNullableFixedSize(dispatchId DispatchId) int { + switch dispatchId { + case NullableBoolDispatchId, NullableInt8DispatchId, NullableUint8DispatchId: + return 1 + case NullableInt16DispatchId, NullableUint16DispatchId: + return 2 + case NullableInt32DispatchId, NullableUint32DispatchId, NullableFloat32DispatchId: + return 4 + case NullableInt64DispatchId, NullableUint64DispatchId, NullableFloat64DispatchId: + return 8 + default: + return 0 + } +} + +// getNullableVarintMaxSize returns the max size for nullable varint primitives +func getNullableVarintMaxSize(dispatchId DispatchId) int { + switch dispatchId { + case NullableVarint32DispatchId, NullableVarUint32DispatchId: + return 5 + case NullableVarint64DispatchId, NullableVarUint64DispatchId, NullableIntDispatchId, NullableUintDispatchId: + return 10 + case NullableTaggedInt64DispatchId, NullableTaggedUint64DispatchId: + return 9 + default: + return 0 + } +} + +// getUnderlyingTypeSize returns the size of the underlying primitive type (8 for 64-bit, 4 for 32-bit, etc.) +// This matches Java's getSizeOfPrimitiveType() which uses the type size, not encoding size +func getUnderlyingTypeSize(dispatchId DispatchId) int { + switch dispatchId { + // 64-bit types + case PrimitiveInt64DispatchId, PrimitiveUint64DispatchId, PrimitiveFloat64DispatchId, + NotnullInt64PtrDispatchId, NotnullUint64PtrDispatchId, NotnullFloat64PtrDispatchId, + PrimitiveVarint64DispatchId, PrimitiveVarUint64DispatchId, + NotnullVarint64PtrDispatchId, NotnullVarUint64PtrDispatchId, + PrimitiveTaggedInt64DispatchId, PrimitiveTaggedUint64DispatchId, + NotnullTaggedInt64PtrDispatchId, NotnullTaggedUint64PtrDispatchId, + PrimitiveIntDispatchId, PrimitiveUintDispatchId, + NotnullIntPtrDispatchId, NotnullUintPtrDispatchId: + return 8 + // 32-bit types + case PrimitiveInt32DispatchId, PrimitiveUint32DispatchId, PrimitiveFloat32DispatchId, + NotnullInt32PtrDispatchId, NotnullUint32PtrDispatchId, NotnullFloat32PtrDispatchId, + PrimitiveVarint32DispatchId, PrimitiveVarUint32DispatchId, + NotnullVarint32PtrDispatchId, NotnullVarUint32PtrDispatchId: + return 4 + // 16-bit types + case PrimitiveInt16DispatchId, PrimitiveUint16DispatchId, + NotnullInt16PtrDispatchId, NotnullUint16PtrDispatchId: + return 2 + // 8-bit types + case PrimitiveBoolDispatchId, PrimitiveInt8DispatchId, PrimitiveUint8DispatchId, + NotnullBoolPtrDispatchId, NotnullInt8PtrDispatchId, NotnullUint8PtrDispatchId: + return 1 + // Nullable types + case NullableInt64DispatchId, NullableUint64DispatchId, NullableFloat64DispatchId, + NullableVarint64DispatchId, NullableVarUint64DispatchId, + NullableTaggedInt64DispatchId, NullableTaggedUint64DispatchId, + NullableIntDispatchId, NullableUintDispatchId: + return 8 + case NullableInt32DispatchId, NullableUint32DispatchId, NullableFloat32DispatchId, + NullableVarint32DispatchId, NullableVarUint32DispatchId: + return 4 + case NullableInt16DispatchId, NullableUint16DispatchId: + return 2 + case NullableBoolDispatchId, NullableInt8DispatchId, NullableUint8DispatchId: + return 1 + default: + return 0 + } +} + +func isNonNullablePrimitiveKind(kind reflect.Kind) bool { + switch kind { + case reflect.Bool, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, + reflect.Float32, reflect.Float64, reflect.Int, reflect.Uint: + return true + default: + return false + } +} + +// isInternalTypeWithoutTypeMeta checks if a type is serialized without type meta per xlang spec. +// Per the spec (struct field serialization), these types use format: | ref/null flag | value data | (NO type meta) +// - Nullable primitives (*int32, *float64, etc.): | null flag | field value | +// - Strings (string): | null flag | value data | +// - Binary ([]byte): | null flag | value data | +// - List/Slice: | ref meta | value data | +// - Set: | ref meta | value data | +// - Map: | ref meta | value data | +// Only struct/enum/ext types need type meta: | ref flag | type meta | value data | +func isInternalTypeWithoutTypeMeta(t reflect.Type) bool { + kind := t.Kind() + // String type - no type meta needed + if kind == reflect.String { + return true + } + // Slice (list or byte slice) - no type meta needed + if kind == reflect.Slice { + return true + } + // Map type - no type meta needed + if kind == reflect.Map { + return true + } + // Pointer to primitive - no type meta needed + if kind == reflect.Ptr { + elemKind := t.Elem().Kind() + switch elemKind { + case reflect.Bool, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Int, reflect.Float32, reflect.Float64, reflect.String: + return true + } + } + return false +} + +// isStructField checks if a type is a struct type (directly or via pointer) +func isStructField(t reflect.Type) bool { + if t.Kind() == reflect.Struct { + return true + } + if t.Kind() == reflect.Ptr && t.Elem().Kind() == reflect.Struct { + return true + } + return false +} + +// isStructFieldType checks if a FieldType represents a type that needs type info written +// This is used to determine if type info was written for the field in compatible mode +// In compatible mode, Java writes type info for struct and ext types, but NOT for enum types +// Enum fields only have null flag + ordinal, no type ID +func isStructFieldType(ft FieldType) bool { + if ft == nil { + return false + } + typeId := ft.TypeId() + // Check base type IDs that need type info (struct and ext, NOT enum) + // Always check the internal type ID (low byte) to handle composite type IDs + // which may be negative when stored as int32 (e.g., -2288 = (short)128784) + internalTypeId := TypeId(typeId & 0xFF) + switch internalTypeId { + case STRUCT, NAMED_STRUCT, COMPATIBLE_STRUCT, NAMED_COMPATIBLE_STRUCT, + EXT, NAMED_EXT: + return true + } + return false +} + +// FieldFingerprintInfo contains the information needed to compute a field's fingerprint. +type FieldFingerprintInfo struct { + // FieldID is the tag ID if configured (>= 0), or -1 to use field name + FieldID int + // FieldName is the snake_case field name (used when FieldID < 0) + FieldName string + // TypeID is the Fory type ID for the field + TypeID TypeId + // Ref is true if reference tracking is enabled for this field + Ref bool + // Nullable is true if null flag is written for this field + Nullable bool +} + +// ComputeStructFingerprint computes the fingerprint string for a struct type. +// +// Fingerprint Format: +// +// Each field contributes: ",,,;" +// Fields are sorted by field_id_or_name (lexicographically as strings) +// +// Field Components: +// - field_id_or_name: Tag ID as string if configured (e.g., "0", "1"), otherwise snake_case field name +// - type_id: Fory TypeId as decimal string (e.g., "4" for INT32) +// - ref: "1" if reference tracking enabled, "0" otherwise +// - nullable: "1" if null flag is written, "0" otherwise +// +// Example fingerprints: +// - With tag IDs: "0,4,0,0;1,4,0,1;2,9,0,1;" +// - With field names: "age,4,0,0;name,9,0,1;" +// +// The fingerprint is used to compute a hash for struct schema versioning. +// Different nullable/ref settings will produce different fingerprints, +// ensuring schema compatibility is properly validated. +func ComputeStructFingerprint(fields []FieldFingerprintInfo) string { + // Sort fields by their identifier (field ID or name) + type fieldWithKey struct { + field FieldFingerprintInfo + sortKey string + } + fieldsWithKeys := make([]fieldWithKey, 0, len(fields)) + for _, field := range fields { + var sortKey string + if field.FieldID >= 0 { + sortKey = fmt.Sprintf("%d", field.FieldID) + } else { + sortKey = field.FieldName + } + fieldsWithKeys = append(fieldsWithKeys, fieldWithKey{field: field, sortKey: sortKey}) + } + + sort.Slice(fieldsWithKeys, func(i, j int) bool { + return fieldsWithKeys[i].sortKey < fieldsWithKeys[j].sortKey + }) + + var sb strings.Builder + for _, fw := range fieldsWithKeys { + // Field identifier + sb.WriteString(fw.sortKey) + sb.WriteString(",") + // Type ID + sb.WriteString(fmt.Sprintf("%d", fw.field.TypeID)) + sb.WriteString(",") + // Ref flag + if fw.field.Ref { + sb.WriteString("1") + } else { + sb.WriteString("0") + } + sb.WriteString(",") + // Nullable flag + if fw.field.Nullable { + sb.WriteString("1") + } else { + sb.WriteString("0") + } + sb.WriteString(";") + } + return sb.String() +} + +// Field sorting helpers + +type triple struct { + typeID int16 + serializer Serializer + name string + nullable bool + tagID int // -1 = use field name, >=0 = use tag ID for sorting +} + +// getFieldSortKey returns the sort key for a field. +// If tagID >= 0, returns the tag ID as string (for tag-based sorting). +// Otherwise returns the snake_case field name. +func (t triple) getSortKey() string { + if t.tagID >= 0 { + return fmt.Sprintf("%d", t.tagID) + } + return SnakeCase(t.name) +} + +// sortFields sorts fields with nullable information to match Java's field ordering. +// Java separates primitive types (int, long) from boxed types (Integer, Long). +// In Go, this corresponds to non-pointer primitives vs pointer-to-primitive. +// When tagIDs are provided (>= 0), fields are sorted by tag ID instead of field name. +func sortFields( + typeResolver *TypeResolver, + fieldNames []string, + serializers []Serializer, + typeIds []TypeId, + nullables []bool, + tagIDs []int, +) ([]Serializer, []string) { + var ( + typeTriples []triple + others []triple + userDefined []triple + ) + + for i, name := range fieldNames { + ser := serializers[i] + tagID := TagIDUseFieldName // default: use field name + if tagIDs != nil && i < len(tagIDs) { + tagID = tagIDs[i] + } + if ser == nil { + others = append(others, triple{UNKNOWN, nil, name, nullables[i], tagID}) + continue + } + typeTriples = append(typeTriples, triple{typeIds[i], ser, name, nullables[i], tagID}) + } + // Java orders: primitives, boxed, finals, others, collections, maps + // primitives = non-nullable primitive types (int, long, etc.) + // boxed = nullable boxed types (Integer, Long, etc. which are pointers in Go) + var primitives, boxed, collection, otherInternalTypeFields []triple + + for _, t := range typeTriples { + switch { + case isPrimitiveType(t.typeID): + // Separate non-nullable primitives from nullable (boxed) primitives + if t.nullable { + boxed = append(boxed, t) + } else { + primitives = append(primitives, t) + } + case isPrimitiveArrayType(t.typeID): + // Primitive arrays: sorted by name only (category 2 in reflection) + collection = append(collection, t) + case isListType(t.typeID), isSetType(t.typeID), isMapType(t.typeID): + // LIST, SET, MAP: sorted by typeId, name (category 1 in reflection) + otherInternalTypeFields = append(otherInternalTypeFields, t) + case isUserDefinedType(t.typeID): + userDefined = append(userDefined, t) + case t.typeID == UNKNOWN: + others = append(others, t) + default: + // STRING, BINARY, and other internal types (category 1 in reflection) + otherInternalTypeFields = append(otherInternalTypeFields, t) + } + } + // Sort primitives (non-nullable) - same logic as boxed + // Java sorts by: compressed (varint) types last, then by size (largest first), then by type ID (descending) + // Fixed types: BOOL, INT8, UINT8, INT16, UINT16, INT32, UINT32, INT64, UINT64, FLOAT32, FLOAT64 + // Varint types: VARINT32, VARINT64, VAR_UINT32, VAR_UINT64, TAGGED_INT64, TAGGED_UINT64 + isVarintTypeId := func(typeID int16) bool { + return typeID == VARINT32 || typeID == VARINT64 || + typeID == VAR_UINT32 || typeID == VAR_UINT64 || + typeID == TAGGED_INT64 || typeID == TAGGED_UINT64 + } + sortPrimitiveSlice := func(s []triple) { + sort.Slice(s, func(i, j int) bool { + ai, aj := s[i], s[j] + compressI := isVarintTypeId(ai.typeID) + compressJ := isVarintTypeId(aj.typeID) + if compressI != compressJ { + return !compressI && compressJ + } + szI, szJ := getPrimitiveTypeSize(ai.typeID), getPrimitiveTypeSize(aj.typeID) + if szI != szJ { + return szI > szJ + } + // Tie-breaker: type ID descending (higher type ID first), then field name + if ai.typeID != aj.typeID { + return ai.typeID > aj.typeID + } + return ai.getSortKey() < aj.getSortKey() + }) + } + sortPrimitiveSlice(primitives) + sortPrimitiveSlice(boxed) + // Sort internal types (STRING, BINARY, LIST, SET, MAP) by typeId then name only. + // Java does NOT sort by nullable flag for these types. + sortByTypeIDThenName := func(s []triple) { + sort.Slice(s, func(i, j int) bool { + if s[i].typeID != s[j].typeID { + return s[i].typeID < s[j].typeID + } + return s[i].getSortKey() < s[j].getSortKey() + }) + } + sortTuple := func(s []triple) { + sort.Slice(s, func(i, j int) bool { + return s[i].getSortKey() < s[j].getSortKey() + }) + } + sortByTypeIDThenName(otherInternalTypeFields) + // Merge all category 2 fields (primitive arrays, userDefined, others) and sort by name + // This matches GroupFields' getFieldCategory which sorts all category 2 fields together + category2 := make([]triple, 0, len(collection)+len(userDefined)+len(others)) + category2 = append(category2, collection...) // primitive arrays + category2 = append(category2, userDefined...) // structs, enums + category2 = append(category2, others...) // unknown types + sortTuple(category2) + + // Order: primitives, boxed, internal types (STRING/BINARY/LIST/SET/MAP), category 2 (by name) + // This aligns with GroupFields' getFieldCategory sorting + all := make([]triple, 0, len(fieldNames)) + all = append(all, primitives...) + all = append(all, boxed...) + all = append(all, otherInternalTypeFields...) // STRING, BINARY, LIST, SET, MAP (category 1) + all = append(all, category2...) // all category 2 fields sorted by name + + outSer := make([]Serializer, len(all)) + outNam := make([]string, len(all)) + for i, t := range all { + outSer[i] = t.serializer + outNam[i] = t.name + } + return outSer, outNam +} + +func typesCompatible(actual, expected reflect.Type) bool { + if actual == nil || expected == nil { + return false + } + if actual == expected { + return true + } + // interface{} can accept any value + if actual.Kind() == reflect.Interface && actual.NumMethod() == 0 { + return true + } + if actual.AssignableTo(expected) || expected.AssignableTo(actual) { + return true + } + if actual.Kind() == reflect.Ptr && actual.Elem() == expected { + return true + } + if expected.Kind() == reflect.Ptr && expected.Elem() == actual { + return true + } + if actual.Kind() == expected.Kind() { + switch actual.Kind() { + case reflect.Slice, reflect.Array: + return elementTypesCompatible(actual.Elem(), expected.Elem()) + case reflect.Map: + return elementTypesCompatible(actual.Key(), expected.Key()) && elementTypesCompatible(actual.Elem(), expected.Elem()) + } + } + if (actual.Kind() == reflect.Array && expected.Kind() == reflect.Slice) || + (actual.Kind() == reflect.Slice && expected.Kind() == reflect.Array) { + return true + } + return false +} + +func elementTypesCompatible(actual, expected reflect.Type) bool { + if actual == nil || expected == nil { + return false + } + if actual == expected || actual.AssignableTo(expected) || expected.AssignableTo(actual) { + return true + } + if actual.Kind() == reflect.Ptr { + return elementTypesCompatible(actual, expected.Elem()) + } + return false +} + +// typeIdFromKind derives a TypeId from a reflect.Type's kind +// This is used when the type is not registered in typesInfo +// Note: Uses VARINT32/VARINT64/VAR_UINT32/VAR_UINT64 to match Java xlang mode and Rust +func typeIdFromKind(type_ reflect.Type) TypeId { + switch type_.Kind() { + case reflect.Bool: + return BOOL + case reflect.Int8: + return INT8 + case reflect.Int16: + return INT16 + case reflect.Int32: + return VARINT32 + case reflect.Int64, reflect.Int: + return VARINT64 + case reflect.Uint8: + return UINT8 + case reflect.Uint16: + return UINT16 + case reflect.Uint32: + return VAR_UINT32 + case reflect.Uint64, reflect.Uint: + return VAR_UINT64 + case reflect.Float32: + return FLOAT32 + case reflect.Float64: + return FLOAT64 + case reflect.String: + return STRING + case reflect.Slice: + // For slices, return the appropriate primitive array type ID based on element type + elemKind := type_.Elem().Kind() + switch elemKind { + case reflect.Bool: + return BOOL_ARRAY + case reflect.Int8: + return INT8_ARRAY + case reflect.Int16: + return INT16_ARRAY + case reflect.Int32: + return INT32_ARRAY + case reflect.Int64, reflect.Int: + return INT64_ARRAY + case reflect.Float32: + return FLOAT32_ARRAY + case reflect.Float64: + return FLOAT64_ARRAY + default: + // Non-primitive slices use LIST + return LIST + } + case reflect.Array: + // For arrays, return the appropriate primitive array type ID based on element type + elemKind := type_.Elem().Kind() + switch elemKind { + case reflect.Bool: + return BOOL_ARRAY + case reflect.Int8: + return INT8_ARRAY + case reflect.Int16: + return INT16_ARRAY + case reflect.Int32: + return INT32_ARRAY + case reflect.Int64, reflect.Int: + return INT64_ARRAY + case reflect.Float32: + return FLOAT32_ARRAY + case reflect.Float64: + return FLOAT64_ARRAY + default: + // Non-primitive arrays use LIST + return LIST + } + case reflect.Map: + // map[T]bool is used to represent a Set in Go + if type_.Elem().Kind() == reflect.Bool { + return SET + } + return MAP + case reflect.Struct: + return NAMED_STRUCT + case reflect.Ptr: + // For pointer types, get the type ID of the element type + return typeIdFromKind(type_.Elem()) + default: + return UNKNOWN + } +} diff --git a/go/fory/struct.go b/go/fory/struct.go index ab7f2618ea..b8f3c77658 100644 --- a/go/fory/struct.go +++ b/go/fory/struct.go @@ -24,7 +24,6 @@ import ( "math" "reflect" "sort" - "strings" "unicode" "unicode/utf8" "unsafe" @@ -32,160 +31,28 @@ import ( "github.com/spaolacci/murmur3" ) -// FieldInfo stores field metadata computed ENTIRELY at init time. -// All flags and decisions are pre-computed to eliminate runtime checks. -type FieldInfo struct { - Name string - Offset uintptr - Type reflect.Type - DispatchId DispatchId - TypeId TypeId // Fory type ID for the serializer - Serializer Serializer - Referencable bool - FieldIndex int // -1 if field doesn't exist in current struct (for compatible mode) - FieldDef FieldDef // original FieldDef from remote TypeDef (for compatible mode skip) - - // Pre-computed sizes and offsets (for fixed primitives) - FixedSize int // 0 if not fixed-size, else 1/2/4/8 - WriteOffset int // Offset within fixed-fields buffer region (sum of preceding field sizes) - - // Pre-computed flags for serialization (computed at init time) - RefMode RefMode // ref mode for serializer.Write/Read - WriteType bool // whether to write type info (true for struct fields in compatible mode) - HasGenerics bool // whether element types are known from TypeDef (for container fields) - - // Tag-based configuration (from fory struct tags) - TagID int // -1 = use field name, >=0 = use tag ID - HasForyTag bool // Whether field has explicit fory tag - TagRefSet bool // Whether ref was explicitly set via fory tag - TagRef bool // The ref value from fory tag (only valid if TagRefSet is true) - TagNullableSet bool // Whether nullable was explicitly set via fory tag - TagNullable bool // The nullable value from fory tag (only valid if TagNullableSet is true) -} - -// fieldHasNonPrimitiveSerializer returns true if the field has a serializer with a non-primitive type ID. -// This is used to skip the fast path for fields like enums where DispatchId is int32 but the serializer -// writes a different format (e.g., unsigned varint for enum ordinals vs signed zigzag for int32). -func fieldHasNonPrimitiveSerializer(field *FieldInfo) bool { - if field.Serializer == nil { - return false - } - // ENUM (numeric ID), NAMED_ENUM (namespace/typename), NAMED_STRUCT, NAMED_COMPATIBLE_STRUCT, NAMED_EXT - // all require special serialization and should not use the primitive fast path - // Note: ENUM uses unsigned Varuint32Small7 for ordinals, not signed zigzag varint - // Use internal type ID (low 8 bits) since registered types have composite TypeIds like (userID << 8) | internalID - internalTypeId := TypeId(field.TypeId & 0xFF) - switch internalTypeId { - case ENUM, NAMED_ENUM, NAMED_STRUCT, NAMED_COMPATIBLE_STRUCT, NAMED_EXT: - return true - default: - return false - } -} - -// isEnumField checks if a field is an enum type based on its TypeId -func isEnumField(field *FieldInfo) bool { - if field.Serializer == nil { - return false - } - internalTypeId := field.TypeId & 0xFF - return internalTypeId == ENUM || internalTypeId == NAMED_ENUM -} - -// writeEnumField writes an enum field respecting the field's RefMode. -// Java writes enum ordinals as unsigned Varuint32Small7, not signed zigzag. -// RefMode determines whether null flag is written, regardless of whether the local type is a pointer. -// This is important for compatible mode where remote TypeDef's nullable flag controls the wire format. -func writeEnumField(ctx *WriteContext, field *FieldInfo, fieldValue reflect.Value) { - buf := ctx.Buffer() - isPointer := fieldValue.Kind() == reflect.Ptr - - // Write null flag based on RefMode only (not based on whether local type is pointer) - if field.RefMode != RefModeNone { - if isPointer && fieldValue.IsNil() { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - } - - // Get the actual value to serialize - targetValue := fieldValue - if isPointer { - if fieldValue.IsNil() { - // RefModeNone but nil pointer - this is a protocol error in schema-consistent mode - // Write zero value as fallback - targetValue = reflect.Zero(field.Type.Elem()) - } else { - targetValue = fieldValue.Elem() - } - } - - // For pointer enum fields, the serializer is ptrToValueSerializer wrapping enumSerializer. - // We need to call the inner enumSerializer directly with the dereferenced value. - if ptrSer, ok := field.Serializer.(*ptrToValueSerializer); ok { - ptrSer.valueSerializer.WriteData(ctx, targetValue) - } else { - field.Serializer.WriteData(ctx, targetValue) - } -} - -// readEnumField reads an enum field respecting the field's RefMode. -// RefMode determines whether null flag is read, regardless of whether the local type is a pointer. -// This is important for compatible mode where remote TypeDef's nullable flag controls the wire format. -// Uses context error state for deferred error checking. -func readEnumField(ctx *ReadContext, field *FieldInfo, fieldValue reflect.Value) { - buf := ctx.Buffer() - isPointer := fieldValue.Kind() == reflect.Ptr - - // Read null flag based on RefMode only (not based on whether local type is pointer) - if field.RefMode != RefModeNone { - nullFlag := buf.ReadInt8(ctx.Err()) - if nullFlag == NullFlag { - // For pointer enum fields, leave as nil; for non-pointer, set to zero - if !isPointer { - fieldValue.SetInt(0) - } - return - } - } - - // For pointer enum fields, allocate a new value - targetValue := fieldValue - if isPointer { - newVal := reflect.New(field.Type.Elem()) - fieldValue.Set(newVal) - targetValue = newVal.Elem() - } - - // For pointer enum fields, the serializer is ptrToValueSerializer wrapping enumSerializer. - // We need to call the inner enumSerializer directly with the dereferenced value. - if ptrSer, ok := field.Serializer.(*ptrToValueSerializer); ok { - ptrSer.valueSerializer.ReadData(ctx, field.Type.Elem(), targetValue) - } else { - field.Serializer.ReadData(ctx, field.Type, targetValue) +// GetStructHash returns the struct hash for a given type using the provided TypeResolver. +// This is used by codegen serializers to get the hash at runtime. +func GetStructHash(type_ reflect.Type, resolver *TypeResolver) int32 { + ser := newStructSerializer(type_, "") + if err := ser.initialize(resolver); err != nil { + panic(fmt.Errorf("failed to initialize struct serializer for hash computation: %v", err)) } + return ser.structHash } type structSerializer struct { // Identity - typeTag string + name string type_ reflect.Type structHash int32 - // Pre-sorted field lists by category (computed at init) - fixedFields []*FieldInfo // fixed-size primitives (bool, int8, int16, float32, float64) - varintFields []*FieldInfo // varint primitives (int32, int64, int) - remainingFields []*FieldInfo // all other fields (string, slice, map, struct, etc.) - - // All fields in protocol order (for compatible mode) - fields []*FieldInfo // all fields in sorted order - fieldMap map[string]*FieldInfo // for compatible reading - fieldDefs []FieldDef // for type_def compatibility + // Pre-sorted and categorized fields (embedded for cache locality) + fieldGroup FieldGroup - // Pre-computed buffer sizes - fixedSize int // Total bytes for fixed-size primitives - maxVarintSize int // Max bytes for varints (5 per int32, 10 per int64) + // Original field list for hash computation and compatible mode + fields []FieldInfo // all fields in sorted order (before grouping) + fieldDefs []FieldDef // for type_def compatibility // Mode flags (set at init) isCompatibleMode bool // true when compatible=true @@ -195,1561 +62,1480 @@ type structSerializer struct { initialized bool } -// newStructSerializer creates a new structSerializer with the given parameters. -// typeTag can be empty and will be derived from type_.Name() if not provided. -// fieldDefs can be nil for local structs without remote schema. -func newStructSerializer(type_ reflect.Type, typeTag string, fieldDefs []FieldDef) *structSerializer { - if typeTag == "" && type_ != nil { - typeTag = type_.Name() +// newStructSerializerFromTypeDef creates a new structSerializer with the given parameters. +// name can be empty and will be derived from type_.Name() if not provided. +// fieldDefs is from remote schema. +func newStructSerializerFromTypeDef(type_ reflect.Type, name string, fieldDefs []FieldDef) *structSerializer { + if name == "" && type_ != nil { + name = type_.Name() } return &structSerializer{ type_: type_, - typeTag: typeTag, + name: name, fieldDefs: fieldDefs, } } +// newStructSerializer creates a new structSerializer with the given parameters. +// name can be empty and will be derived from type_.Name() if not provided. +// fieldDefs can be nil for local structs without remote schema. +func newStructSerializer(type_ reflect.Type, name string) *structSerializer { + if name == "" && type_ != nil { + name = type_.Name() + } + return &structSerializer{ + type_: type_, + name: name, + } +} + // initialize performs eager initialization of the struct serializer. // This should be called at registration time to pre-compute all field metadata. func (s *structSerializer) initialize(typeResolver *TypeResolver) error { if s.initialized { return nil } - // Ensure type is set if s.type_ == nil { return errors.New("struct type not set") } - // Normalize pointer types for s.type_.Kind() == reflect.Ptr { s.type_ = s.type_.Elem() } - // Set compatible mode flag BEFORE field initialization // This is needed for groupFields to apply correct sorting s.isCompatibleMode = typeResolver.Compatible() - // Build fields from type or fieldDefs if s.fieldDefs != nil { - if err := s.initFieldsFromDefsWithResolver(typeResolver); err != nil { + if err := s.initFieldsFromTypeDef(typeResolver); err != nil { return err } } else { - if err := s.initFieldsFromTypeResolver(typeResolver); err != nil { + if err := s.initFields(typeResolver); err != nil { return err } } - // Compute struct hash s.structHash = s.computeHash() - s.initialized = true return nil } -func (s *structSerializer) Write(ctx *WriteContext, refMode RefMode, writeType bool, hasGenerics bool, value reflect.Value) { - switch refMode { - case RefModeTracking: - if value.Kind() == reflect.Ptr && value.IsNil() { - ctx.buffer.WriteInt8(NullFlag) - return - } - refWritten, err := ctx.RefResolver().WriteRefOrNull(ctx.buffer, value) - if err != nil { - ctx.SetError(FromError(err)) - return - } - if refWritten { - return - } - case RefModeNullOnly: - if value.Kind() == reflect.Ptr && value.IsNil() { - ctx.buffer.WriteInt8(NullFlag) - return - } - ctx.buffer.WriteInt8(NotNullValueFlag) - } - if writeType { - // Structs have dynamic type IDs, need to look up from TypeResolver - typeInfo, err := ctx.TypeResolver().getTypeInfo(value, true) - if err != nil { - ctx.SetError(FromError(err)) - return - } - ctx.TypeResolver().WriteTypeInfo(ctx.buffer, typeInfo, ctx.Err()) +// initFields initializes fields from local struct type using TypeResolver +func (s *structSerializer) initFields(typeResolver *TypeResolver) error { + // If we have fieldDefs from type_def (remote meta), use them + if len(s.fieldDefs) > 0 { + return s.initFieldsFromTypeDef(typeResolver) } - s.WriteData(ctx, value) -} -func (s *structSerializer) WriteData(ctx *WriteContext, value reflect.Value) { - // Early error check - skip all intermediate checks for normal path performance - if ctx.HasError() { - return - } + // Otherwise initialize from local struct type + type_ := s.type_ + var fields []FieldInfo + var fieldNames []string + var serializers []Serializer + var typeIds []TypeId + var nullables []bool + var tagIDs []int - // Lazy initialization - if !s.initialized { - if err := s.initialize(ctx.TypeResolver()); err != nil { - ctx.SetError(FromError(err)) - return + for i := 0; i < type_.NumField(); i++ { + field := type_.Field(i) + firstRune, _ := utf8.DecodeRuneInString(field.Name) + if unicode.IsLower(firstRune) { + continue // skip unexported fields } - } - // Debug output for field order - if DebugOutputEnabled() { - fmt.Printf("[Go] WriteData for type %s:\n", s.type_.Name()) - fmt.Printf("[Go] fixedFields (%d):\n", len(s.fixedFields)) - for i, field := range s.fixedFields { - fmt.Printf("[Go] [%d] %s -> dispatchId=%d\n", i, field.Name, field.DispatchId) - } - fmt.Printf("[Go] varintFields (%d):\n", len(s.varintFields)) - for i, field := range s.varintFields { - fmt.Printf("[Go] [%d] %s -> dispatchId=%d\n", i, field.Name, field.DispatchId) - } - fmt.Printf("[Go] remainingFields (%d):\n", len(s.remainingFields)) - for i, field := range s.remainingFields { - fmt.Printf("[Go] [%d] %s -> dispatchId=%d\n", i, field.Name, field.DispatchId) + // Parse fory struct tag and check for ignore + foryTag := parseForyTag(field) + if foryTag.Ignore { + continue // skip ignored fields } - } - buf := ctx.Buffer() + fieldType := field.Type - // Dereference pointer if needed - if value.Kind() == reflect.Ptr { - if value.IsNil() { - ctx.SetError(SerializationError("cannot write nil pointer")) - return + var fieldSerializer Serializer + // For interface{} fields, don't get a serializer - use WriteValue/ReadValue instead + // which will handle polymorphic types dynamically + if fieldType.Kind() != reflect.Interface { + // Get serializer for all non-interface field types + fieldSerializer, _ = typeResolver.getSerializerByType(fieldType, true) } - value = value.Elem() - } - // In compatible mode with meta share, struct hash is not written - if !ctx.Compatible() { - buf.WriteInt32(s.structHash) - } - - // Check if value is addressable for unsafe access - canUseUnsafe := value.CanAddr() - var ptr unsafe.Pointer - if canUseUnsafe { - ptr = unsafe.Pointer(value.UnsafeAddr()) - } + // Use TypeResolver helper methods for arrays and slices + if fieldType.Kind() == reflect.Array && fieldType.Elem().Kind() != reflect.Interface { + fieldSerializer, _ = typeResolver.GetArraySerializer(fieldType) + } else if fieldType.Kind() == reflect.Slice && fieldType.Elem().Kind() != reflect.Interface { + fieldSerializer, _ = typeResolver.GetSliceSerializer(fieldType) + } else if fieldType.Kind() == reflect.Slice && fieldType.Elem().Kind() == reflect.Interface { + // For struct fields with interface element types, use sliceDynSerializer + fieldSerializer = mustNewSliceDynSerializer(fieldType.Elem()) + } - // ========================================================================== - // Phase 1: Fixed-size primitives (bool, int8, int16, float32, float64) - // - Reserve once, inline unsafe writes with endian handling, update index once - // - field.WriteOffset computed at init time - // ========================================================================== - if DebugOutputEnabled() { - fmt.Printf("[Go] WriteData Phase 1: canUseUnsafe=%v, fixedSize=%d, len(fixedFields)=%d\n", - canUseUnsafe, s.fixedSize, len(s.fixedFields)) - } - if canUseUnsafe && s.fixedSize > 0 { - buf.Reserve(s.fixedSize) - baseOffset := buf.WriterIndex() - data := buf.GetData() + // Get TypeId for the serializer, fallback to deriving from kind + fieldTypeId := typeResolver.getTypeIdByType(fieldType) + if fieldTypeId == 0 { + fieldTypeId = typeIdFromKind(fieldType) + } - for _, field := range s.fixedFields { - fieldPtr := unsafe.Add(ptr, field.Offset) - bufOffset := baseOffset + field.WriteOffset - // Debug output for values being written - if DebugOutputEnabled() { - switch field.DispatchId { - case PrimitiveUint64DispatchId: - fmt.Printf("[Go] Writing fixed field %s: value=%d, bufOffset=%d\n", field.Name, *(*uint64)(fieldPtr), bufOffset) - case PrimitiveUint32DispatchId: - fmt.Printf("[Go] Writing fixed field %s: value=%d, bufOffset=%d\n", field.Name, *(*uint32)(fieldPtr), bufOffset) - case PrimitiveUint16DispatchId: - fmt.Printf("[Go] Writing fixed field %s: value=%d, bufOffset=%d\n", field.Name, *(*uint16)(fieldPtr), bufOffset) - case PrimitiveUint8DispatchId: - fmt.Printf("[Go] Writing fixed field %s: value=%d, bufOffset=%d\n", field.Name, *(*uint8)(fieldPtr), bufOffset) - } - } - switch field.DispatchId { - case PrimitiveBoolDispatchId: - if *(*bool)(fieldPtr) { - data[bufOffset] = 1 + // Override TypeId based on compress/encoding tags for integer types + // This matches the logic in type_def.go:buildFieldDefs + baseKind := fieldType.Kind() + if baseKind == reflect.Ptr { + baseKind = fieldType.Elem().Kind() + } + switch baseKind { + case reflect.Uint32: + if foryTag.CompressSet { + if foryTag.Compress { + fieldTypeId = VAR_UINT32 } else { - data[bufOffset] = 0 + fieldTypeId = UINT32 } - case NotnullBoolPtrDispatchId: - if **(**bool)(fieldPtr) { - data[bufOffset] = 1 + } + case reflect.Int32: + if foryTag.CompressSet { + if foryTag.Compress { + fieldTypeId = VARINT32 } else { - data[bufOffset] = 0 + fieldTypeId = INT32 } - case PrimitiveInt8DispatchId: - data[bufOffset] = *(*byte)(fieldPtr) - case NotnullInt8PtrDispatchId: - data[bufOffset] = byte(**(**int8)(fieldPtr)) - case PrimitiveUint8DispatchId: - data[bufOffset] = *(*uint8)(fieldPtr) - case NotnullUint8PtrDispatchId: - data[bufOffset] = **(**uint8)(fieldPtr) - case PrimitiveInt16DispatchId: - if isLittleEndian { - *(*int16)(unsafe.Pointer(&data[bufOffset])) = *(*int16)(fieldPtr) - } else { - binary.LittleEndian.PutUint16(data[bufOffset:], uint16(*(*int16)(fieldPtr))) - } - case NotnullInt16PtrDispatchId: - if isLittleEndian { - *(*int16)(unsafe.Pointer(&data[bufOffset])) = **(**int16)(fieldPtr) - } else { - binary.LittleEndian.PutUint16(data[bufOffset:], uint16(**(**int16)(fieldPtr))) - } - case PrimitiveUint16DispatchId: - if isLittleEndian { - *(*uint16)(unsafe.Pointer(&data[bufOffset])) = *(*uint16)(fieldPtr) - } else { - binary.LittleEndian.PutUint16(data[bufOffset:], *(*uint16)(fieldPtr)) - } - case NotnullUint16PtrDispatchId: - if isLittleEndian { - *(*uint16)(unsafe.Pointer(&data[bufOffset])) = **(**uint16)(fieldPtr) - } else { - binary.LittleEndian.PutUint16(data[bufOffset:], **(**uint16)(fieldPtr)) - } - case PrimitiveInt32DispatchId: - if isLittleEndian { - *(*int32)(unsafe.Pointer(&data[bufOffset])) = *(*int32)(fieldPtr) - } else { - binary.LittleEndian.PutUint32(data[bufOffset:], uint32(*(*int32)(fieldPtr))) - } - case NotnullInt32PtrDispatchId: - if isLittleEndian { - *(*int32)(unsafe.Pointer(&data[bufOffset])) = **(**int32)(fieldPtr) - } else { - binary.LittleEndian.PutUint32(data[bufOffset:], uint32(**(**int32)(fieldPtr))) - } - case PrimitiveUint32DispatchId: - if isLittleEndian { - *(*uint32)(unsafe.Pointer(&data[bufOffset])) = *(*uint32)(fieldPtr) - } else { - binary.LittleEndian.PutUint32(data[bufOffset:], *(*uint32)(fieldPtr)) - } - case NotnullUint32PtrDispatchId: - if isLittleEndian { - *(*uint32)(unsafe.Pointer(&data[bufOffset])) = **(**uint32)(fieldPtr) - } else { - binary.LittleEndian.PutUint32(data[bufOffset:], **(**uint32)(fieldPtr)) - } - case PrimitiveInt64DispatchId: - if isLittleEndian { - *(*int64)(unsafe.Pointer(&data[bufOffset])) = *(*int64)(fieldPtr) - } else { - binary.LittleEndian.PutUint64(data[bufOffset:], uint64(*(*int64)(fieldPtr))) - } - case NotnullInt64PtrDispatchId: - if isLittleEndian { - *(*int64)(unsafe.Pointer(&data[bufOffset])) = **(**int64)(fieldPtr) - } else { - binary.LittleEndian.PutUint64(data[bufOffset:], uint64(**(**int64)(fieldPtr))) - } - case PrimitiveUint64DispatchId: - if isLittleEndian { - *(*uint64)(unsafe.Pointer(&data[bufOffset])) = *(*uint64)(fieldPtr) - } else { - binary.LittleEndian.PutUint64(data[bufOffset:], *(*uint64)(fieldPtr)) - } - case NotnullUint64PtrDispatchId: - if isLittleEndian { - *(*uint64)(unsafe.Pointer(&data[bufOffset])) = **(**uint64)(fieldPtr) - } else { - binary.LittleEndian.PutUint64(data[bufOffset:], **(**uint64)(fieldPtr)) - } - case PrimitiveFloat32DispatchId: - if isLittleEndian { - *(*float32)(unsafe.Pointer(&data[bufOffset])) = *(*float32)(fieldPtr) - } else { - binary.LittleEndian.PutUint32(data[bufOffset:], math.Float32bits(*(*float32)(fieldPtr))) - } - case NotnullFloat32PtrDispatchId: - if isLittleEndian { - *(*float32)(unsafe.Pointer(&data[bufOffset])) = **(**float32)(fieldPtr) - } else { - binary.LittleEndian.PutUint32(data[bufOffset:], math.Float32bits(**(**float32)(fieldPtr))) - } - case PrimitiveFloat64DispatchId: - if isLittleEndian { - *(*float64)(unsafe.Pointer(&data[bufOffset])) = *(*float64)(fieldPtr) - } else { - binary.LittleEndian.PutUint64(data[bufOffset:], math.Float64bits(*(*float64)(fieldPtr))) + } + case reflect.Uint64: + if foryTag.EncodingSet { + switch foryTag.Encoding { + case "fixed": + fieldTypeId = UINT64 + case "varint": + fieldTypeId = VAR_UINT64 + case "tagged": + fieldTypeId = TAGGED_UINT64 } - case NotnullFloat64PtrDispatchId: - if isLittleEndian { - *(*float64)(unsafe.Pointer(&data[bufOffset])) = **(**float64)(fieldPtr) - } else { - binary.LittleEndian.PutUint64(data[bufOffset:], math.Float64bits(**(**float64)(fieldPtr))) + } + case reflect.Int64: + if foryTag.EncodingSet { + switch foryTag.Encoding { + case "fixed": + fieldTypeId = INT64 + case "varint": + fieldTypeId = VARINT64 + case "tagged": + fieldTypeId = TAGGED_INT64 } } } - // Update writer index ONCE after all fixed fields - buf.SetWriterIndex(baseOffset + s.fixedSize) - } else if len(s.fixedFields) > 0 { - // Fallback to reflect-based access for unaddressable values - if DebugOutputEnabled() { - fmt.Printf("[Go] Using reflect-based fallback for fixedFields\n") + + // Calculate nullable flag for serialization (wire format): + // - In xlang mode: Per xlang spec, fields are NON-NULLABLE by default. + // Only pointer types are nullable by default. + // - In native mode: Go's natural semantics apply - slice/map/interface can be nil, + // so they are nullable by default. + // Can be overridden by explicit fory tag `fory:"nullable"`. + internalId := fieldTypeId & 0xFF + isEnum := internalId == ENUM || internalId == NAMED_ENUM + + // Determine nullable based on mode + // In xlang mode: only pointer types are nullable by default (per xlang spec) + // In native mode: Go's natural semantics - all nil-able types are nullable + // This ensures proper interoperability with Java/other languages in xlang mode. + var nullableFlag bool + if typeResolver.fory.config.IsXlang { + // xlang mode: only pointer types are nullable by default per xlang spec + // Slices and maps are NOT nullable - they serialize as empty when nil + nullableFlag = fieldType.Kind() == reflect.Ptr + } else { + // Native mode: Go's natural semantics - all nil-able types are nullable + nullableFlag = fieldType.Kind() == reflect.Ptr || + fieldType.Kind() == reflect.Slice || + fieldType.Kind() == reflect.Map || + fieldType.Kind() == reflect.Interface } - for _, field := range s.fixedFields { - fieldValue := value.Field(field.FieldIndex) - startPos := buf.WriterIndex() - if DebugOutputEnabled() { - fmt.Printf("[Go] Fallback writing field %s: FieldIndex=%d, value=%v, dispatchId=%d, bufPos=%d\n", - field.Name, field.FieldIndex, fieldValue.Interface(), field.DispatchId, startPos) + if foryTag.NullableSet { + // Override nullable flag if explicitly set in fory tag + nullableFlag = foryTag.Nullable + } + // Primitives are never nullable, regardless of tag + if isNonNullablePrimitiveKind(fieldType.Kind()) && !isEnum { + nullableFlag = false + } + + // Calculate ref tracking - use tag override if explicitly set + trackRef := typeResolver.TrackRef() + if foryTag.RefSet { + trackRef = foryTag.Ref + } + + // Pre-compute RefMode based on (possibly overridden) trackRef and nullable + // For pointer-to-struct fields, enable ref tracking when trackRef is enabled, + // regardless of nullable flag. This is necessary to detect circular references. + refMode := RefModeNone + isStructPointer := fieldType.Kind() == reflect.Ptr && fieldType.Elem().Kind() == reflect.Struct + if trackRef && (nullableFlag || isStructPointer) { + refMode = RefModeTracking + } else if nullableFlag { + refMode = RefModeNullOnly + } + // Pre-compute WriteType: true for struct fields in compatible mode + writeType := typeResolver.Compatible() && isStructField(fieldType) + + // Pre-compute DispatchId, with special handling for enum fields and pointer-to-numeric + var dispatchId DispatchId + if fieldType.Kind() == reflect.Ptr && isNumericKind(fieldType.Elem().Kind()) { + if nullableFlag { + dispatchId = getDispatchIdFromTypeId(fieldTypeId, true) + } else { + dispatchId = getNotnullPtrDispatchId(fieldType.Elem().Kind(), foryTag.Encoding) } - switch field.DispatchId { - // Primitive types (non-pointer) - case PrimitiveBoolDispatchId: - buf.WriteBool(fieldValue.Bool()) - case PrimitiveInt8DispatchId: - buf.WriteByte_(byte(fieldValue.Int())) - case PrimitiveUint8DispatchId: - buf.WriteByte_(byte(fieldValue.Uint())) - case PrimitiveInt16DispatchId: - buf.WriteInt16(int16(fieldValue.Int())) - case PrimitiveUint16DispatchId: - buf.WriteInt16(int16(fieldValue.Uint())) - case PrimitiveInt32DispatchId: - buf.WriteInt32(int32(fieldValue.Int())) - case PrimitiveUint32DispatchId: - buf.WriteInt32(int32(fieldValue.Uint())) - case PrimitiveInt64DispatchId: - buf.WriteInt64(fieldValue.Int()) - case PrimitiveUint64DispatchId: - buf.WriteInt64(int64(fieldValue.Uint())) - case PrimitiveFloat32DispatchId: - buf.WriteFloat32(float32(fieldValue.Float())) - case PrimitiveFloat64DispatchId: - buf.WriteFloat64(fieldValue.Float()) - // Notnull pointer types - dereference and write - case NotnullBoolPtrDispatchId: - buf.WriteBool(fieldValue.Elem().Bool()) - case NotnullInt8PtrDispatchId: - buf.WriteByte_(byte(fieldValue.Elem().Int())) - case NotnullUint8PtrDispatchId: - buf.WriteByte_(byte(fieldValue.Elem().Uint())) - case NotnullInt16PtrDispatchId: - buf.WriteInt16(int16(fieldValue.Elem().Int())) - case NotnullUint16PtrDispatchId: - buf.WriteInt16(int16(fieldValue.Elem().Uint())) - case NotnullInt32PtrDispatchId: - buf.WriteInt32(int32(fieldValue.Elem().Int())) - case NotnullUint32PtrDispatchId: - buf.WriteInt32(int32(fieldValue.Elem().Uint())) - case NotnullInt64PtrDispatchId: - buf.WriteInt64(fieldValue.Elem().Int()) - case NotnullUint64PtrDispatchId: - buf.WriteInt64(int64(fieldValue.Elem().Uint())) - case NotnullFloat32PtrDispatchId: - buf.WriteFloat32(float32(fieldValue.Elem().Float())) - case NotnullFloat64PtrDispatchId: - buf.WriteFloat64(fieldValue.Elem().Float()) + } else { + dispatchId = getDispatchIdFromTypeId(fieldTypeId, nullableFlag) + if dispatchId == UnknownDispatchId { + dispatchId = GetDispatchId(fieldType) } - if DebugOutputEnabled() { - endPos := buf.WriterIndex() - bytesWritten := endPos - startPos - fmt.Printf("[Go] Fallback wrote %d bytes for %s, endPos=%d, bytes=%x\n", - bytesWritten, field.Name, endPos, buf.GetByteSlice(startPos, endPos)) + } + if fieldSerializer != nil { + if _, ok := fieldSerializer.(*enumSerializer); ok { + dispatchId = EnumDispatchId + } else if ptrSer, ok := fieldSerializer.(*ptrToValueSerializer); ok { + if _, ok := ptrSer.valueSerializer.(*enumSerializer); ok { + dispatchId = EnumDispatchId + } } } + if DebugOutputEnabled() { + fmt.Printf("[Go][fory-debug] initFields: field=%s type=%v dispatchId=%d refMode=%v nullableFlag=%v serializer=%T\n", + SnakeCase(field.Name), fieldType, dispatchId, refMode, nullableFlag, fieldSerializer) + } + + fieldInfo := FieldInfo{ + Name: SnakeCase(field.Name), + Offset: field.Offset, + Type: fieldType, + DispatchId: dispatchId, + TypeId: fieldTypeId, + Serializer: fieldSerializer, + Nullable: nullableFlag, // Use same logic as TypeDef's nullable flag for consistent ref handling + FieldIndex: i, + RefMode: refMode, + WriteType: writeType, + HasGenerics: isCollectionType(fieldTypeId), // Container fields have declared element types + TagID: foryTag.ID, + HasForyTag: foryTag.HasTag, + TagRefSet: foryTag.RefSet, + TagRef: foryTag.Ref, + TagNullableSet: foryTag.NullableSet, + TagNullable: foryTag.Nullable, + IsPtr: fieldType.Kind() == reflect.Ptr, + } + fields = append(fields, fieldInfo) + fieldNames = append(fieldNames, fieldInfo.Name) + serializers = append(serializers, fieldSerializer) + typeIds = append(typeIds, fieldTypeId) + nullables = append(nullables, nullableFlag) + tagIDs = append(tagIDs, foryTag.ID) } - // ========================================================================== - // Phase 2: Varint primitives (int32, int64, int, uint32, uint64, uint, tagged int64/uint64) - // - These are variable-length encodings that must be written sequentially - // ========================================================================== - if canUseUnsafe && len(s.varintFields) > 0 { - for _, field := range s.varintFields { - fieldPtr := unsafe.Add(ptr, field.Offset) - switch field.DispatchId { - case PrimitiveVarint32DispatchId: - buf.WriteVarint32(*(*int32)(fieldPtr)) - case NotnullVarint32PtrDispatchId: - buf.WriteVarint32(**(**int32)(fieldPtr)) - case PrimitiveVarint64DispatchId: - buf.WriteVarint64(*(*int64)(fieldPtr)) - case NotnullVarint64PtrDispatchId: - buf.WriteVarint64(**(**int64)(fieldPtr)) - case PrimitiveIntDispatchId: - buf.WriteVarint64(int64(*(*int)(fieldPtr))) - case NotnullIntPtrDispatchId: - buf.WriteVarint64(int64(**(**int)(fieldPtr))) - case PrimitiveVarUint32DispatchId: - buf.WriteVaruint32(*(*uint32)(fieldPtr)) - case NotnullVarUint32PtrDispatchId: - buf.WriteVaruint32(**(**uint32)(fieldPtr)) - case PrimitiveVarUint64DispatchId: - buf.WriteVaruint64(*(*uint64)(fieldPtr)) - case NotnullVarUint64PtrDispatchId: - buf.WriteVaruint64(**(**uint64)(fieldPtr)) - case PrimitiveUintDispatchId: - buf.WriteVaruint64(uint64(*(*uint)(fieldPtr))) - case NotnullUintPtrDispatchId: - buf.WriteVaruint64(uint64(**(**uint)(fieldPtr))) - case PrimitiveTaggedInt64DispatchId: - buf.WriteTaggedInt64(*(*int64)(fieldPtr)) - case NotnullTaggedInt64PtrDispatchId: - buf.WriteTaggedInt64(**(**int64)(fieldPtr)) - case PrimitiveTaggedUint64DispatchId: - buf.WriteTaggedUint64(*(*uint64)(fieldPtr)) - case NotnullTaggedUint64PtrDispatchId: - buf.WriteTaggedUint64(**(**uint64)(fieldPtr)) - } - } - } else if len(s.varintFields) > 0 { - // Slow path for non-addressable values: use reflection - for _, field := range s.varintFields { - fieldValue := value.Field(field.FieldIndex) - switch field.DispatchId { - // Primitive types (non-pointer) - case PrimitiveVarint32DispatchId: - buf.WriteVarint32(int32(fieldValue.Int())) - case PrimitiveVarint64DispatchId: - buf.WriteVarint64(fieldValue.Int()) - case PrimitiveIntDispatchId: - buf.WriteVarint64(fieldValue.Int()) - case PrimitiveVarUint32DispatchId: - buf.WriteVaruint32(uint32(fieldValue.Uint())) - case PrimitiveVarUint64DispatchId: - buf.WriteVaruint64(fieldValue.Uint()) - case PrimitiveUintDispatchId: - buf.WriteVaruint64(fieldValue.Uint()) - case PrimitiveTaggedInt64DispatchId: - buf.WriteTaggedInt64(fieldValue.Int()) - case PrimitiveTaggedUint64DispatchId: - buf.WriteTaggedUint64(fieldValue.Uint()) - // Notnull pointer types - dereference and write - case NotnullVarint32PtrDispatchId: - buf.WriteVarint32(int32(fieldValue.Elem().Int())) - case NotnullVarint64PtrDispatchId: - buf.WriteVarint64(fieldValue.Elem().Int()) - case NotnullIntPtrDispatchId: - buf.WriteVarint64(fieldValue.Elem().Int()) - case NotnullVarUint32PtrDispatchId: - buf.WriteVaruint32(uint32(fieldValue.Elem().Uint())) - case NotnullVarUint64PtrDispatchId: - buf.WriteVaruint64(fieldValue.Elem().Uint()) - case NotnullUintPtrDispatchId: - buf.WriteVaruint64(fieldValue.Elem().Uint()) - case NotnullTaggedInt64PtrDispatchId: - buf.WriteTaggedInt64(fieldValue.Elem().Int()) - case NotnullTaggedUint64PtrDispatchId: - buf.WriteTaggedUint64(fieldValue.Elem().Uint()) - } - } + // Sort fields according to specification using nullable info and tag IDs for consistent ordering + serializers, fieldNames = sortFields(typeResolver, fieldNames, serializers, typeIds, nullables, tagIDs) + order := make(map[string]int, len(fieldNames)) + for idx, name := range fieldNames { + order[name] = idx } - // ========================================================================== - // Phase 3: Remaining fields (strings, slices, maps, structs, enums) - // - These require per-field handling (ref flags, type info, serializers) - // - No intermediate error checks - trade error path performance for normal path - // ========================================================================== - for _, field := range s.remainingFields { - s.writeRemainingField(ctx, ptr, field, value) - } -} + sort.SliceStable(fields, func(i, j int) bool { + oi, okI := order[fields[i].Name] + oj, okJ := order[fields[j].Name] + switch { + case okI && okJ: + return oi < oj + case okI: + return true + case okJ: + return false + default: + return false + } + }) -// writeRemainingField writes a non-primitive field (string, slice, map, struct, enum) -func (s *structSerializer) writeRemainingField(ctx *WriteContext, ptr unsafe.Pointer, field *FieldInfo, value reflect.Value) { - buf := ctx.Buffer() + s.fields = fields + s.fieldGroup = GroupFields(s.fields) - if DebugOutputEnabled() { - fieldValue := value.Field(field.FieldIndex) - fmt.Printf("[Go] WriteRemainingField: %s, dispatchId=%d, value=%v\n", - field.Name, field.DispatchId, fieldValue.Interface()) + // Debug output for field order comparison with Java + if s.type_ != nil { + s.fieldGroup.DebugPrint(s.type_.Name()) } - // Fast path dispatch using pre-computed DispatchId - // ptr must be valid (addressable value) - if ptr != nil { - fieldPtr := unsafe.Add(ptr, field.Offset) - switch field.DispatchId { - case StringDispatchId: - if field.RefMode == RefModeTracking { - break // Fall through to slow path + return nil +} + +// initFieldsFromTypeDef initializes fields from remote fieldDefs using typeResolver +func (s *structSerializer) initFieldsFromTypeDef(typeResolver *TypeResolver) error { + type_ := s.type_ + if type_ == nil { + // Type is not known - we'll create an interface{} placeholder + // This happens when deserializing unknown types in compatible mode + // For now, we'll create fields that discard all data + var fields []FieldInfo + for _, def := range s.fieldDefs { + fieldSerializer, _ := getFieldTypeSerializerWithResolver(typeResolver, def.fieldType) + remoteTypeInfo, _ := def.fieldType.getTypeInfoWithResolver(typeResolver) + remoteType := remoteTypeInfo.Type + if remoteType == nil { + remoteType = reflect.TypeOf((*interface{})(nil)).Elem() } - // Check if local field is a pointer type (schema evolution: remote non-nullable, local nullable) - localIsPtr := field.Type.Kind() == reflect.Ptr - // Only write null flag if RefMode requires it (nullable field) - if field.RefMode == RefModeNullOnly { - if localIsPtr { - strPtr := *(**string)(fieldPtr) - if strPtr == nil { - buf.WriteInt8(NullFlag) - return + // Get TypeId from FieldType's TypeId method + fieldTypeId := def.fieldType.TypeId() + // Pre-compute RefMode based on trackRef and FieldDef flags + refMode := RefModeNone + if def.trackingRef { + refMode = RefModeTracking + } else if def.nullable { + refMode = RefModeNullOnly + } + // Pre-compute WriteType: true for struct fields in compatible mode + writeType := typeResolver.Compatible() && isStructField(remoteType) + + // Pre-compute DispatchId, with special handling for enum fields + dispatchId := GetDispatchId(remoteType) + if fieldSerializer != nil { + if _, ok := fieldSerializer.(*enumSerializer); ok { + dispatchId = EnumDispatchId + } else if ptrSer, ok := fieldSerializer.(*ptrToValueSerializer); ok { + if _, ok := ptrSer.valueSerializer.(*enumSerializer); ok { + dispatchId = EnumDispatchId } - buf.WriteInt8(NotNullValueFlag) - ctx.WriteString(*strPtr) - } else { - buf.WriteInt8(NotNullValueFlag) - ctx.WriteString(*(*string)(fieldPtr)) } - return } - // RefModeNone: no null flag, write value directly - if localIsPtr { - strPtr := *(**string)(fieldPtr) - if strPtr == nil { - ctx.WriteString("") // Write empty string for nil pointer when non-nullable - } else { - ctx.WriteString(*strPtr) - } - } else { - ctx.WriteString(*(*string)(fieldPtr)) + + fieldInfo := FieldInfo{ + Name: def.name, + Offset: 0, + Type: remoteType, + DispatchId: dispatchId, + TypeId: fieldTypeId, + Serializer: fieldSerializer, + Nullable: def.nullable, // Use remote nullable flag + FieldIndex: -1, // Mark as non-existent field to discard data + FieldDef: def, // Save original FieldDef for skipping + RefMode: refMode, + WriteType: writeType, + HasGenerics: isCollectionType(fieldTypeId), // Container fields have declared element types + IsPtr: remoteType != nil && remoteType.Kind() == reflect.Ptr, } - return - case EnumDispatchId: - // Enums don't track refs - always use fast path - writeEnumField(ctx, field, value.Field(field.FieldIndex)) - return - case StringSliceDispatchId: - if field.RefMode == RefModeTracking { - break + fields = append(fields, fieldInfo) + } + s.fields = fields + s.fieldGroup = GroupFields(s.fields) + s.typeDefDiffers = true // Unknown type, must use ordered reading + return nil + } + + // Build maps from field names and tag IDs to struct field indices + fieldNameToIndex := make(map[string]int) + fieldNameToOffset := make(map[string]uintptr) + fieldNameToType := make(map[string]reflect.Type) + fieldTagIDToIndex := make(map[int]int) // tag ID -> struct field index + fieldTagIDToOffset := make(map[int]uintptr) // tag ID -> field offset + fieldTagIDToType := make(map[int]reflect.Type) // tag ID -> field type + fieldTagIDToName := make(map[int]string) // tag ID -> snake_case field name + for i := 0; i < type_.NumField(); i++ { + field := type_.Field(i) + + // Parse fory tag and skip ignored fields + foryTag := parseForyTag(field) + if foryTag.Ignore { + continue + } + + name := SnakeCase(field.Name) + fieldNameToIndex[name] = i + fieldNameToOffset[name] = field.Offset + fieldNameToType[name] = field.Type + + // Also index by tag ID if present + if foryTag.ID >= 0 { + fieldTagIDToIndex[foryTag.ID] = i + fieldTagIDToOffset[foryTag.ID] = field.Offset + fieldTagIDToType[foryTag.ID] = field.Type + fieldTagIDToName[foryTag.ID] = name + } + } + + var fields []FieldInfo + + for _, def := range s.fieldDefs { + fieldSerializer, err := getFieldTypeSerializerWithResolver(typeResolver, def.fieldType) + if err != nil || fieldSerializer == nil { + // If we can't get serializer from typeID, try to get it from the Go type + // This can happen when the type isn't registered in typeIDToTypeInfo + remoteTypeInfo, _ := def.fieldType.getTypeInfoWithResolver(typeResolver) + if remoteTypeInfo.Type != nil { + fieldSerializer, _ = typeResolver.getSerializerByType(remoteTypeInfo.Type, true) } - ctx.WriteStringSlice(*(*[]string)(fieldPtr), field.RefMode, false, true) - return - case BoolSliceDispatchId: - if field.RefMode == RefModeTracking { - break + } + + // Get the remote type from fieldDef + remoteTypeInfo, _ := def.fieldType.getTypeInfoWithResolver(typeResolver) + remoteType := remoteTypeInfo.Type + // Track if type lookup failed - we'll need to skip such fields + // Note: DynamicFieldType.getTypeInfoWithResolver returns interface{} (not nil) when lookup fails + emptyInterfaceType := reflect.TypeOf((*interface{})(nil)).Elem() + typeLookupFailed := remoteType == nil || remoteType == emptyInterfaceType + if remoteType == nil { + remoteType = emptyInterfaceType + } + + // For struct-like fields, even if TypeDef lookup fails, we can try to read + // the field because type resolution happens at read time from the buffer. + // The type name might map to a different local type. + isStructLikeField := isStructFieldType(def.fieldType) + + // Try to find corresponding local field + // First try to match by tag ID (if remote def uses tag ID) + // Then fall back to matching by field name + fieldIndex := -1 + var offset uintptr + var fieldType reflect.Type + var localFieldName string + var localType reflect.Type + var exists bool + + if def.tagID >= 0 { + // Try to match by tag ID + if idx, ok := fieldTagIDToIndex[def.tagID]; ok { + exists = true + fieldIndex = idx // Will be overwritten if types are compatible + localType = fieldTagIDToType[def.tagID] + offset = fieldTagIDToOffset[def.tagID] + localFieldName = fieldTagIDToName[def.tagID] } - ctx.WriteBoolSlice(*(*[]bool)(fieldPtr), field.RefMode, false) - return - case Int8SliceDispatchId: - if field.RefMode == RefModeTracking { - break + } + + // Fall back to name-based matching if tag ID match failed + if !exists && def.name != "" { + if _, ok := fieldNameToIndex[def.name]; ok { + exists = true + localType = fieldNameToType[def.name] + offset = fieldNameToOffset[def.name] + localFieldName = def.name } - ctx.WriteInt8Slice(*(*[]int8)(fieldPtr), field.RefMode, false) - return - case ByteSliceDispatchId: - if field.RefMode == RefModeTracking { - break + } + + if exists { + idx := fieldNameToIndex[localFieldName] + if def.tagID >= 0 { + idx = fieldTagIDToIndex[def.tagID] } - ctx.WriteByteSlice(*(*[]byte)(fieldPtr), field.RefMode, false) - return - case Int16SliceDispatchId: - if field.RefMode == RefModeTracking { - break + // Check if types are compatible + // For primitive types: skip if types don't match + // For struct-like types: allow read even if TypeDef lookup failed, + // because runtime type resolution by name might work + shouldRead := false + isPolymorphicField := def.fieldType.TypeId() == UNKNOWN + defTypeId := def.fieldType.TypeId() + // Check if field is an enum - either by type ID or by serializer type + // The type ID may be a composite value with namespace bits, so check the low 8 bits + internalDefTypeId := defTypeId & 0xFF + isEnumField := internalDefTypeId == NAMED_ENUM || internalDefTypeId == ENUM + if !isEnumField && fieldSerializer != nil { + _, isEnumField = fieldSerializer.(*enumSerializer) } - ctx.WriteInt16Slice(*(*[]int16)(fieldPtr), field.RefMode, false) - return - case Int32SliceDispatchId: - if field.RefMode == RefModeTracking { - break + if isPolymorphicField && localType.Kind() == reflect.Interface { + // For polymorphic (UNKNOWN) fields with interface{} local type, + // allow reading - the actual type will be determined at runtime + shouldRead = true + fieldType = localType + } else if typeLookupFailed && isEnumField { + // For enum fields with failed TypeDef lookup (NAMED_ENUM stores by namespace/typename, not typeId), + // check if local field is a numeric type (Go enums are int-based) + // Also handle pointer enum fields (*EnumType) + localKind := localType.Kind() + elemKind := localKind + if localKind == reflect.Ptr { + elemKind = localType.Elem().Kind() + } + if isNumericKind(elemKind) { + shouldRead = true + fieldType = localType + // Get the serializer for the base type (the enum type, not the pointer) + baseType := localType + if localKind == reflect.Ptr { + baseType = localType.Elem() + } + fieldSerializer, _ = typeResolver.getSerializerByType(baseType, true) + } + } else if typeLookupFailed && isStructLikeField { + // For struct fields with failed TypeDef lookup, check if local field can hold a struct + localKind := localType.Kind() + if localKind == reflect.Ptr { + localKind = localType.Elem().Kind() + } + if localKind == reflect.Struct || localKind == reflect.Interface { + shouldRead = true + fieldType = localType // Use local type for struct fields + } + } else if typeLookupFailed && (defTypeId == LIST || defTypeId == SET) { + // For collection fields with failed type lookup (e.g., List with interface element type), + // check if local type is a slice with interface element type (e.g., []Animal) + // The type lookup fails because sliceSerializer doesn't support interface elements + if localType.Kind() == reflect.Slice && localType.Elem().Kind() == reflect.Interface { + shouldRead = true + fieldType = localType + } + } else if !typeLookupFailed && typesCompatible(localType, remoteType) { + shouldRead = true + fieldType = localType } - ctx.WriteInt32Slice(*(*[]int32)(fieldPtr), field.RefMode, false) - return - case Int64SliceDispatchId: - if field.RefMode == RefModeTracking { - break + + if shouldRead { + fieldIndex = idx + // offset was already set above when matching by tag ID or field name + // For struct-like fields with failed type lookup, get the serializer for the local type + if typeLookupFailed && isStructLikeField && fieldSerializer == nil { + fieldSerializer, _ = typeResolver.getSerializerByType(localType, true) + } + // For collection fields with interface element types, use sliceDynSerializer + if typeLookupFailed && (defTypeId == LIST || defTypeId == SET) && fieldSerializer == nil { + if localType.Kind() == reflect.Slice && localType.Elem().Kind() == reflect.Interface { + fieldSerializer = mustNewSliceDynSerializer(localType.Elem()) + } + } + // If local type is *T and remote type is T, we need the serializer for *T + // This handles Java's Integer/Long (nullable boxed types) mapping to Go's *int32/*int64 + if localType.Kind() == reflect.Ptr && localType.Elem() == remoteType { + fieldSerializer, _ = typeResolver.getSerializerByType(localType, true) + } + // For pointer enum fields (*EnumType), get the serializer for the base enum type + // The struct read/write code will handle pointer dereferencing + if isEnumField && localType.Kind() == reflect.Ptr { + baseType := localType.Elem() + fieldSerializer, _ = typeResolver.getSerializerByType(baseType, true) + if DebugOutputEnabled() { + fmt.Printf("[fory-debug] pointer enum field %s: localType=%v baseType=%v serializer=%T\n", + def.name, localType, baseType, fieldSerializer) + } + } + // For array fields, use array serializers (not slice serializers) even if typeID maps to slice serializer + // The typeID (INT16_ARRAY, etc.) is shared between arrays and slices, but we need the correct + // serializer based on the actual Go type + if localType.Kind() == reflect.Array { + elemType := localType.Elem() + switch elemType.Kind() { + case reflect.Bool: + fieldSerializer = boolArraySerializer{arrayType: localType} + case reflect.Int8: + fieldSerializer = int8ArraySerializer{arrayType: localType} + case reflect.Int16: + fieldSerializer = int16ArraySerializer{arrayType: localType} + case reflect.Int32: + fieldSerializer = int32ArraySerializer{arrayType: localType} + case reflect.Int64: + fieldSerializer = int64ArraySerializer{arrayType: localType} + case reflect.Uint8: + fieldSerializer = uint8ArraySerializer{arrayType: localType} + case reflect.Float32: + fieldSerializer = float32ArraySerializer{arrayType: localType} + case reflect.Float64: + fieldSerializer = float64ArraySerializer{arrayType: localType} + case reflect.Int: + if reflect.TypeOf(int(0)).Size() == 8 { + fieldSerializer = int64ArraySerializer{arrayType: localType} + } else { + fieldSerializer = int32ArraySerializer{arrayType: localType} + } + } + } + } else { + // Types are incompatible or unknown - use remote type but mark field as not settable + fieldType = remoteType + fieldIndex = -1 + offset = 0 // Don't set offset for incompatible fields } - ctx.WriteInt64Slice(*(*[]int64)(fieldPtr), field.RefMode, false) - return - case IntSliceDispatchId: - if field.RefMode == RefModeTracking { - break + } else { + // Field doesn't exist locally, use type from fieldDef + fieldType = remoteType + } + + // Get TypeId from FieldType's TypeId method + fieldTypeId := def.fieldType.TypeId() + // Pre-compute RefMode based on FieldDef flags (trackingRef and nullable) + refMode := RefModeNone + if def.trackingRef { + refMode = RefModeTracking + } else if def.nullable { + refMode = RefModeNullOnly + } + // Pre-compute WriteType: true for struct fields in compatible mode + writeType := typeResolver.Compatible() && isStructField(fieldType) + + // Pre-compute DispatchId, with special handling for pointer-to-numeric and enum fields + // IMPORTANT: For compatible mode reading, we must use the REMOTE nullable flag + // to determine DispatchId, because Java wrote data with its nullable semantics. + var dispatchId DispatchId + localKind := fieldType.Kind() + localIsPtr := localKind == reflect.Ptr + localIsNumeric := isNumericKind(localKind) || (localIsPtr && isNumericKind(fieldType.Elem().Kind())) + + if localIsNumeric { + if localIsPtr { + if def.nullable { + // Local is *T, remote is nullable - use nullable DispatchId + dispatchId = getDispatchIdFromTypeId(fieldTypeId, true) + } else { + // Local is *T, remote is NOT nullable - use notnull pointer DispatchId + encoding := getEncodingFromTypeId(fieldTypeId) + dispatchId = getNotnullPtrDispatchId(fieldType.Elem().Kind(), encoding) + } + } else { + if def.nullable { + // Local is T (non-pointer), remote is nullable - use nullable DispatchId + dispatchId = getDispatchIdFromTypeId(fieldTypeId, true) + } else { + // Local is T, remote is NOT nullable - use primitive DispatchId + dispatchId = GetDispatchId(fieldType) + } } - ctx.WriteIntSlice(*(*[]int)(fieldPtr), field.RefMode, false) - return - case UintSliceDispatchId: - if field.RefMode == RefModeTracking { + } else { + dispatchId = GetDispatchId(fieldType) + } + if fieldSerializer != nil { + if _, ok := fieldSerializer.(*enumSerializer); ok { + dispatchId = EnumDispatchId + } else if ptrSer, ok := fieldSerializer.(*ptrToValueSerializer); ok { + if _, ok := ptrSer.valueSerializer.(*enumSerializer); ok { + dispatchId = EnumDispatchId + } + } + } + + // Determine field name: use local field name if matched, otherwise use def.name + fieldName := def.name + if localFieldName != "" { + fieldName = localFieldName + } + + fieldInfo := FieldInfo{ + Name: fieldName, + Offset: offset, + Type: fieldType, + DispatchId: dispatchId, + TypeId: fieldTypeId, + Serializer: fieldSerializer, + Nullable: def.nullable, // Use remote nullable flag + FieldIndex: fieldIndex, + FieldDef: def, // Save original FieldDef for skipping + RefMode: refMode, + WriteType: writeType, + HasGenerics: isCollectionType(fieldTypeId), // Container fields have declared element types + TagID: def.tagID, + HasForyTag: def.tagID >= 0, + IsPtr: fieldType != nil && fieldType.Kind() == reflect.Ptr, + } + fields = append(fields, fieldInfo) + } + + s.fields = fields + s.fieldGroup = GroupFields(s.fields) + + // Debug output for field order comparison with Java MetaSharedSerializer + if DebugOutputEnabled() && s.type_ != nil { + fmt.Printf("[Go] Remote TypeDef order (%d fields):\n", len(s.fieldDefs)) + for i, def := range s.fieldDefs { + fmt.Printf("[Go] [%d] %s -> typeId=%d, nullable=%v\n", i, def.name, def.fieldType.TypeId(), def.nullable) + } + s.fieldGroup.DebugPrint(s.type_.Name()) + } + + // Compute typeDefDiffers: true if any field doesn't exist locally, has type mismatch, + // or has nullable mismatch (which affects field ordering) + // When typeDefDiffers is false, we can use grouped reading for better performance + s.typeDefDiffers = false + for i, field := range fields { + if field.FieldIndex < 0 { + // Field exists in remote TypeDef but not locally + s.typeDefDiffers = true + break + } + // Check if nullable flag differs between remote and local + // Remote nullable is stored in fieldDefs[i].nullable + // Local nullable is determined by whether the Go field is a pointer type + if i < len(s.fieldDefs) && field.FieldIndex >= 0 { + remoteNullable := s.fieldDefs[i].nullable + // Check if local Go field is a pointer type (can be nil = nullable) + localNullable := field.IsPtr + if remoteNullable != localNullable { + s.typeDefDiffers = true break } - ctx.WriteUintSlice(*(*[]uint)(fieldPtr), field.RefMode, false) - return - case Float32SliceDispatchId: - if field.RefMode == RefModeTracking { - break - } - ctx.WriteFloat32Slice(*(*[]float32)(fieldPtr), field.RefMode, false) - return - case Float64SliceDispatchId: - if field.RefMode == RefModeTracking { - break - } - ctx.WriteFloat64Slice(*(*[]float64)(fieldPtr), field.RefMode, false) - return - case StringStringMapDispatchId: - if field.RefMode == RefModeTracking { - break - } - ctx.WriteStringStringMap(*(*map[string]string)(fieldPtr), field.RefMode, false) - return - case StringInt64MapDispatchId: - if field.RefMode == RefModeTracking { - break - } - ctx.WriteStringInt64Map(*(*map[string]int64)(fieldPtr), field.RefMode, false) - return - case StringInt32MapDispatchId: - if field.RefMode == RefModeTracking { - break - } - ctx.WriteStringInt32Map(*(*map[string]int32)(fieldPtr), field.RefMode, false) - return - case StringIntMapDispatchId: - if field.RefMode == RefModeTracking { - break + } + } + + if DebugOutputEnabled() && s.type_ != nil { + fmt.Printf("[Go] typeDefDiffers=%v for %s\n", s.typeDefDiffers, s.type_.Name()) + } + + return nil +} + +func (s *structSerializer) computeHash() int32 { + // Build FieldFingerprintInfo for each field + fields := make([]FieldFingerprintInfo, 0, len(s.fields)) + for _, field := range s.fields { + var typeId TypeId + isEnumField := false + if field.Serializer == nil { + typeId = UNKNOWN + } else { + typeId = field.TypeId + // Check if this is an enum serializer (directly or wrapped in ptrToValueSerializer) + if _, ok := field.Serializer.(*enumSerializer); ok { + isEnumField = true + typeId = UNKNOWN + } else if ptrSer, ok := field.Serializer.(*ptrToValueSerializer); ok { + if _, ok := ptrSer.valueSerializer.(*enumSerializer); ok { + isEnumField = true + typeId = UNKNOWN + } } - ctx.WriteStringIntMap(*(*map[string]int)(fieldPtr), field.RefMode, false) - return - case StringFloat64MapDispatchId: - if field.RefMode == RefModeTracking { - break + // For user-defined types (struct, ext types), use UNKNOWN in fingerprint + // This matches Java's behavior where user-defined types return UNKNOWN + // to ensure consistent fingerprint computation across languages + if isUserDefinedType(int16(typeId)) { + typeId = UNKNOWN } - ctx.WriteStringFloat64Map(*(*map[string]float64)(fieldPtr), field.RefMode, false) - return - case StringBoolMapDispatchId: - // NOTE: map[string]bool is used to represent SETs in Go xlang mode. - // We CANNOT use the fast path here because it writes MAP format, - // but the data should be written in SET format. Fall through to slow path - // which uses setSerializer to correctly write the SET format. - break - case IntIntMapDispatchId: - if field.RefMode == RefModeTracking { - break + // For fixed-size arrays with primitive elements, use primitive array type IDs + if field.Type.Kind() == reflect.Array { + elemKind := field.Type.Elem().Kind() + switch elemKind { + case reflect.Int8: + typeId = INT8_ARRAY + case reflect.Int16: + typeId = INT16_ARRAY + case reflect.Int32: + typeId = INT32_ARRAY + case reflect.Int64: + typeId = INT64_ARRAY + case reflect.Float32: + typeId = FLOAT32_ARRAY + case reflect.Float64: + typeId = FLOAT64_ARRAY + default: + typeId = LIST + } + } else if field.Type.Kind() == reflect.Slice { + typeId = LIST + } else if field.Type.Kind() == reflect.Map { + // map[T]bool is used to represent a Set in Go + if field.Type.Elem().Kind() == reflect.Bool { + typeId = SET + } else { + typeId = MAP + } } - ctx.WriteIntIntMap(*(*map[int]int)(fieldPtr), field.RefMode, false) + } + + // Determine nullable flag for xlang compatibility: + // - Default: false for ALL fields (xlang default - aligned with all languages) + // - Primitives are always non-nullable + // - Can be overridden by explicit fory tag + nullable := false // Default to nullable=false for xlang mode + if field.TagNullableSet { + // Use explicit tag value if set + nullable = field.TagNullable + } + // Primitives are never nullable, regardless of tag + if isNonNullablePrimitiveKind(field.Type.Kind()) && !isEnumField { + nullable = false + } + + fields = append(fields, FieldFingerprintInfo{ + FieldID: field.TagID, + FieldName: SnakeCase(field.Name), + TypeID: typeId, + // Ref is based on explicit tag annotation only, NOT runtime ref_tracking config + // This allows fingerprint to be computed at compile time for C++/Rust + Ref: field.TagRefSet && field.TagRef, + Nullable: nullable, + }) + } + + hashString := ComputeStructFingerprint(fields) + data := []byte(hashString) + h1, _ := murmur3.Sum128WithSeed(data, 47) + hash := int32(h1 & 0xFFFFFFFF) + + if DebugOutputEnabled() { + fmt.Printf("[Go][fory-debug] struct %v version fingerprint=\"%s\" version hash=%d\n", s.type_, hashString, hash) + } + + if hash == 0 { + panic(fmt.Errorf("hash for type %v is 0", s.type_)) + } + return hash +} + +func (s *structSerializer) Write(ctx *WriteContext, refMode RefMode, writeType bool, hasGenerics bool, value reflect.Value) { + switch refMode { + case RefModeTracking: + if value.Kind() == reflect.Ptr && value.IsNil() { + ctx.buffer.WriteInt8(NullFlag) return - case NullableTaggedInt64DispatchId: - // Nullable tagged INT64: write ref flag, then tagged encoding - ptr := *(**int64)(fieldPtr) - if ptr == nil { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteTaggedInt64(*ptr) + } + refWritten, err := ctx.RefResolver().WriteRefOrNull(ctx.buffer, value) + if err != nil { + ctx.SetError(FromError(err)) return - case NullableTaggedUint64DispatchId: - // Nullable tagged UINT64: write ref flag, then tagged encoding - ptr := *(**uint64)(fieldPtr) - if ptr == nil { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteTaggedUint64(*ptr) + } + if refWritten { return - // Nullable fixed-size types - case NullableBoolDispatchId: - ptr := *(**bool)(fieldPtr) - if ptr == nil { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteBool(*ptr) + } + case RefModeNullOnly: + if value.Kind() == reflect.Ptr && value.IsNil() { + ctx.buffer.WriteInt8(NullFlag) return - case NullableInt8DispatchId: - ptr := *(**int8)(fieldPtr) - if ptr == nil { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteInt8(*ptr) + } + ctx.buffer.WriteInt8(NotNullValueFlag) + } + if writeType { + // Structs have dynamic type IDs, need to look up from TypeResolver + typeInfo, err := ctx.TypeResolver().getTypeInfo(value, true) + if err != nil { + ctx.SetError(FromError(err)) return - case NullableUint8DispatchId: - ptr := *(**uint8)(fieldPtr) - if ptr == nil { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteUint8(*ptr) + } + ctx.TypeResolver().WriteTypeInfo(ctx.buffer, typeInfo, ctx.Err()) + } + s.WriteData(ctx, value) +} + +func (s *structSerializer) WriteData(ctx *WriteContext, value reflect.Value) { + // Early error check - skip all intermediate checks for normal path performance + if ctx.HasError() { + return + } + + // Lazy initialization + if !s.initialized { + if err := s.initialize(ctx.TypeResolver()); err != nil { + ctx.SetError(FromError(err)) return - case NullableInt16DispatchId: - ptr := *(**int16)(fieldPtr) - if ptr == nil { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteInt16(*ptr) - return - case NullableUint16DispatchId: - ptr := *(**uint16)(fieldPtr) - if ptr == nil { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteUint16(*ptr) - return - case NullableInt32DispatchId: - ptr := *(**int32)(fieldPtr) - if ptr == nil { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteInt32(*ptr) - return - case NullableUint32DispatchId: - ptr := *(**uint32)(fieldPtr) - if ptr == nil { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteUint32(*ptr) - return - case NullableInt64DispatchId: - ptr := *(**int64)(fieldPtr) - if ptr == nil { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteInt64(*ptr) - return - case NullableUint64DispatchId: - ptr := *(**uint64)(fieldPtr) - if ptr == nil { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteUint64(*ptr) - return - case NullableFloat32DispatchId: - ptr := *(**float32)(fieldPtr) - if ptr == nil { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteFloat32(*ptr) - return - case NullableFloat64DispatchId: - ptr := *(**float64)(fieldPtr) - if ptr == nil { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteFloat64(*ptr) - return - // Nullable varint types - case NullableVarint32DispatchId: - ptr := *(**int32)(fieldPtr) - if ptr == nil { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteVarint32(*ptr) - return - case NullableVarUint32DispatchId: - ptr := *(**uint32)(fieldPtr) - if ptr == nil { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteVaruint32(*ptr) - return - case NullableVarint64DispatchId: - ptr := *(**int64)(fieldPtr) - if ptr == nil { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteVarint64(*ptr) - return - case NullableVarUint64DispatchId: - ptr := *(**uint64)(fieldPtr) - if ptr == nil { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteVaruint64(*ptr) + } + } + + buf := ctx.Buffer() + + // Dereference pointer if needed + if value.Kind() == reflect.Ptr { + if value.IsNil() { + ctx.SetError(SerializationError("cannot write nil pointer")) return } + value = value.Elem() } - // Slow path: use reflection for non-addressable values - fieldValue := value.Field(field.FieldIndex) - - // Handle nullable types via reflection when ptr is nil (non-addressable) - switch field.DispatchId { - case NullableTaggedInt64DispatchId: - if fieldValue.IsNil() { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteTaggedInt64(fieldValue.Elem().Int()) - return - case NullableTaggedUint64DispatchId: - if fieldValue.IsNil() { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteTaggedUint64(fieldValue.Elem().Uint()) - return - case NullableBoolDispatchId: - if fieldValue.IsNil() { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteBool(fieldValue.Elem().Bool()) - return - case NullableInt8DispatchId: - if fieldValue.IsNil() { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteInt8(int8(fieldValue.Elem().Int())) - return - case NullableUint8DispatchId: - if fieldValue.IsNil() { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteUint8(uint8(fieldValue.Elem().Uint())) - return - case NullableInt16DispatchId: - if fieldValue.IsNil() { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteInt16(int16(fieldValue.Elem().Int())) - return - case NullableUint16DispatchId: - if fieldValue.IsNil() { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteUint16(uint16(fieldValue.Elem().Uint())) - return - case NullableInt32DispatchId: - if fieldValue.IsNil() { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteInt32(int32(fieldValue.Elem().Int())) - return - case NullableUint32DispatchId: - if fieldValue.IsNil() { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteUint32(uint32(fieldValue.Elem().Uint())) - return - case NullableInt64DispatchId: - if fieldValue.IsNil() { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteInt64(fieldValue.Elem().Int()) - return - case NullableUint64DispatchId: - if fieldValue.IsNil() { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteUint64(fieldValue.Elem().Uint()) - return - case NullableFloat32DispatchId: - if fieldValue.IsNil() { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteFloat32(float32(fieldValue.Elem().Float())) - return - case NullableFloat64DispatchId: - if fieldValue.IsNil() { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteFloat64(fieldValue.Elem().Float()) - return - case NullableVarint32DispatchId: - if fieldValue.IsNil() { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteVarint32(int32(fieldValue.Elem().Int())) - return - case NullableVarUint32DispatchId: - if fieldValue.IsNil() { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteVaruint32(uint32(fieldValue.Elem().Uint())) - return - case NullableVarint64DispatchId: - if fieldValue.IsNil() { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteVarint64(fieldValue.Elem().Int()) - return - case NullableVarUint64DispatchId: - if fieldValue.IsNil() { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - buf.WriteVaruint64(fieldValue.Elem().Uint()) - return + // In compatible mode with meta share, struct hash is not written + if !ctx.Compatible() { + buf.WriteInt32(s.structHash) } - // Fall back to serializer for other types - if field.Serializer != nil { - field.Serializer.Write(ctx, field.RefMode, field.WriteType, field.HasGenerics, fieldValue) - } else { - ctx.WriteValue(fieldValue, RefModeTracking, true) - } -} - -func (s *structSerializer) Read(ctx *ReadContext, refMode RefMode, readType bool, hasGenerics bool, value reflect.Value) { - buf := ctx.Buffer() - ctxErr := ctx.Err() - switch refMode { - case RefModeTracking: - refID, refErr := ctx.RefResolver().TryPreserveRefId(buf) - if refErr != nil { - ctx.SetError(FromError(refErr)) - return - } - if refID < int32(NotNullValueFlag) { - // Reference found - obj := ctx.RefResolver().GetReadObject(refID) - if obj.IsValid() { - value.Set(obj) - } - return - } - case RefModeNullOnly: - flag := buf.ReadInt8(ctxErr) - if flag == NullFlag { - return - } - } - if readType { - // Read type info - in compatible mode this returns the serializer with remote fieldDefs - typeID := buf.ReadVaruint32Small7(ctxErr) - internalTypeID := TypeId(typeID & 0xFF) - // Check if this is a struct type that needs type meta reading - if IsNamespacedType(TypeId(typeID)) || internalTypeID == COMPATIBLE_STRUCT || internalTypeID == STRUCT { - // For struct types in compatible mode, use the serializer from TypeInfo - typeInfo := ctx.TypeResolver().readTypeInfoWithTypeID(buf, typeID, ctxErr) - // Use the serializer from TypeInfo which has the remote field definitions - if structSer, ok := typeInfo.Serializer.(*structSerializer); ok && len(structSer.fieldDefs) > 0 { - structSer.ReadData(ctx, value.Type(), value) - return - } - } - } - s.ReadData(ctx, value.Type(), value) -} - -func (s *structSerializer) ReadData(ctx *ReadContext, type_ reflect.Type, value reflect.Value) { - // Early error check - skip all intermediate checks for normal path performance - if ctx.HasError() { - return - } - - // Lazy initialization - if !s.initialized { - if err := s.initialize(ctx.TypeResolver()); err != nil { - ctx.SetError(FromError(err)) - return - } - } - - buf := ctx.Buffer() - if value.Kind() == reflect.Ptr { - if value.IsNil() { - value.Set(reflect.New(type_.Elem())) - } - value = value.Elem() - type_ = type_.Elem() - } - - // In compatible mode with meta share, struct hash is not written - if !ctx.Compatible() { - err := ctx.Err() - structHash := buf.ReadInt32(err) - if structHash != s.structHash { - ctx.SetError(HashMismatchError(structHash, s.structHash, s.type_.String())) - return - } - } - - // Use ordered reading when: - // 1. TypeDef differs from local type (schema evolution) - // 2. Value is not addressable - if s.typeDefDiffers || !value.CanAddr() { - s.readFieldsInOrder(ctx, value) - return + // Check if value is addressable for unsafe access + canUseUnsafe := value.CanAddr() + var ptr unsafe.Pointer + if canUseUnsafe { + ptr = unsafe.Pointer(value.UnsafeAddr()) } // ========================================================================== - // Grouped reading for matching types (optimized path) - // - Types match, so all fields exist locally (no FieldIndex < 0 checks) - // - Use UnsafeGet at pre-computed offsets, update reader index once per phase + // Phase 1: Fixed-size primitives (bool, int8, int16, float32, float64) + // - Reserve once, inline unsafe writes with endian handling, update index once + // - field.WriteOffset computed at init time // ========================================================================== - ptr := unsafe.Pointer(value.UnsafeAddr()) - - // Phase 1: Fixed-size primitives (inline unsafe reads with endian handling) - if s.fixedSize > 0 { - baseOffset := buf.ReaderIndex() + if canUseUnsafe && s.fieldGroup.FixedSize > 0 { + buf.Reserve(s.fieldGroup.FixedSize) + baseOffset := buf.WriterIndex() data := buf.GetData() - for _, field := range s.fixedFields { + for _, field := range s.fieldGroup.FixedFields { fieldPtr := unsafe.Add(ptr, field.Offset) bufOffset := baseOffset + field.WriteOffset switch field.DispatchId { case PrimitiveBoolDispatchId: - *(*bool)(fieldPtr) = data[bufOffset] != 0 + if *(*bool)(fieldPtr) { + data[bufOffset] = 1 + } else { + data[bufOffset] = 0 + } + case NotnullBoolPtrDispatchId: + if **(**bool)(fieldPtr) { + data[bufOffset] = 1 + } else { + data[bufOffset] = 0 + } case PrimitiveInt8DispatchId: - *(*int8)(fieldPtr) = int8(data[bufOffset]) + data[bufOffset] = *(*byte)(fieldPtr) + case NotnullInt8PtrDispatchId: + data[bufOffset] = byte(**(**int8)(fieldPtr)) case PrimitiveUint8DispatchId: - *(*uint8)(fieldPtr) = data[bufOffset] + data[bufOffset] = *(*uint8)(fieldPtr) + case NotnullUint8PtrDispatchId: + data[bufOffset] = **(**uint8)(fieldPtr) case PrimitiveInt16DispatchId: if isLittleEndian { - *(*int16)(fieldPtr) = *(*int16)(unsafe.Pointer(&data[bufOffset])) + *(*int16)(unsafe.Pointer(&data[bufOffset])) = *(*int16)(fieldPtr) } else { - *(*int16)(fieldPtr) = int16(binary.LittleEndian.Uint16(data[bufOffset:])) + binary.LittleEndian.PutUint16(data[bufOffset:], uint16(*(*int16)(fieldPtr))) } - case PrimitiveUint16DispatchId: + case NotnullInt16PtrDispatchId: if isLittleEndian { - *(*uint16)(fieldPtr) = *(*uint16)(unsafe.Pointer(&data[bufOffset])) + *(*int16)(unsafe.Pointer(&data[bufOffset])) = **(**int16)(fieldPtr) } else { - *(*uint16)(fieldPtr) = binary.LittleEndian.Uint16(data[bufOffset:]) + binary.LittleEndian.PutUint16(data[bufOffset:], uint16(**(**int16)(fieldPtr))) } - case PrimitiveInt32DispatchId: + case PrimitiveUint16DispatchId: if isLittleEndian { - *(*int32)(fieldPtr) = *(*int32)(unsafe.Pointer(&data[bufOffset])) + *(*uint16)(unsafe.Pointer(&data[bufOffset])) = *(*uint16)(fieldPtr) } else { - *(*int32)(fieldPtr) = int32(binary.LittleEndian.Uint32(data[bufOffset:])) + binary.LittleEndian.PutUint16(data[bufOffset:], *(*uint16)(fieldPtr)) } - case PrimitiveUint32DispatchId: + case NotnullUint16PtrDispatchId: if isLittleEndian { - *(*uint32)(fieldPtr) = *(*uint32)(unsafe.Pointer(&data[bufOffset])) + *(*uint16)(unsafe.Pointer(&data[bufOffset])) = **(**uint16)(fieldPtr) } else { - *(*uint32)(fieldPtr) = binary.LittleEndian.Uint32(data[bufOffset:]) + binary.LittleEndian.PutUint16(data[bufOffset:], **(**uint16)(fieldPtr)) } - case PrimitiveInt64DispatchId: + case PrimitiveInt32DispatchId: if isLittleEndian { - *(*int64)(fieldPtr) = *(*int64)(unsafe.Pointer(&data[bufOffset])) + *(*int32)(unsafe.Pointer(&data[bufOffset])) = *(*int32)(fieldPtr) } else { - *(*int64)(fieldPtr) = int64(binary.LittleEndian.Uint64(data[bufOffset:])) + binary.LittleEndian.PutUint32(data[bufOffset:], uint32(*(*int32)(fieldPtr))) } - case PrimitiveUint64DispatchId: + case NotnullInt32PtrDispatchId: if isLittleEndian { - *(*uint64)(fieldPtr) = *(*uint64)(unsafe.Pointer(&data[bufOffset])) + *(*int32)(unsafe.Pointer(&data[bufOffset])) = **(**int32)(fieldPtr) } else { - *(*uint64)(fieldPtr) = binary.LittleEndian.Uint64(data[bufOffset:]) + binary.LittleEndian.PutUint32(data[bufOffset:], uint32(**(**int32)(fieldPtr))) } - case PrimitiveFloat32DispatchId: + case PrimitiveUint32DispatchId: if isLittleEndian { - *(*float32)(fieldPtr) = *(*float32)(unsafe.Pointer(&data[bufOffset])) + *(*uint32)(unsafe.Pointer(&data[bufOffset])) = *(*uint32)(fieldPtr) } else { - *(*float32)(fieldPtr) = math.Float32frombits(binary.LittleEndian.Uint32(data[bufOffset:])) + binary.LittleEndian.PutUint32(data[bufOffset:], *(*uint32)(fieldPtr)) } - case PrimitiveFloat64DispatchId: + case NotnullUint32PtrDispatchId: if isLittleEndian { - *(*float64)(fieldPtr) = *(*float64)(unsafe.Pointer(&data[bufOffset])) + *(*uint32)(unsafe.Pointer(&data[bufOffset])) = **(**uint32)(fieldPtr) } else { - *(*float64)(fieldPtr) = math.Float64frombits(binary.LittleEndian.Uint64(data[bufOffset:])) + binary.LittleEndian.PutUint32(data[bufOffset:], **(**uint32)(fieldPtr)) } - // Notnull pointer types - allocate and set pointer - case NotnullBoolPtrDispatchId: - v := new(bool) - *v = data[bufOffset] != 0 - *(**bool)(fieldPtr) = v - case NotnullInt8PtrDispatchId: - v := new(int8) - *v = int8(data[bufOffset]) - *(**int8)(fieldPtr) = v - case NotnullUint8PtrDispatchId: - v := new(uint8) - *v = data[bufOffset] - *(**uint8)(fieldPtr) = v - case NotnullInt16PtrDispatchId: - v := new(int16) + case PrimitiveInt64DispatchId: if isLittleEndian { - *v = *(*int16)(unsafe.Pointer(&data[bufOffset])) + *(*int64)(unsafe.Pointer(&data[bufOffset])) = *(*int64)(fieldPtr) } else { - *v = int16(binary.LittleEndian.Uint16(data[bufOffset:])) + binary.LittleEndian.PutUint64(data[bufOffset:], uint64(*(*int64)(fieldPtr))) } - *(**int16)(fieldPtr) = v - case NotnullUint16PtrDispatchId: - v := new(uint16) + case NotnullInt64PtrDispatchId: if isLittleEndian { - *v = *(*uint16)(unsafe.Pointer(&data[bufOffset])) + *(*int64)(unsafe.Pointer(&data[bufOffset])) = **(**int64)(fieldPtr) } else { - *v = binary.LittleEndian.Uint16(data[bufOffset:]) + binary.LittleEndian.PutUint64(data[bufOffset:], uint64(**(**int64)(fieldPtr))) } - *(**uint16)(fieldPtr) = v - case NotnullInt32PtrDispatchId: - v := new(int32) + case PrimitiveUint64DispatchId: if isLittleEndian { - *v = *(*int32)(unsafe.Pointer(&data[bufOffset])) + *(*uint64)(unsafe.Pointer(&data[bufOffset])) = *(*uint64)(fieldPtr) } else { - *v = int32(binary.LittleEndian.Uint32(data[bufOffset:])) + binary.LittleEndian.PutUint64(data[bufOffset:], *(*uint64)(fieldPtr)) } - *(**int32)(fieldPtr) = v - case NotnullUint32PtrDispatchId: - v := new(uint32) + case NotnullUint64PtrDispatchId: if isLittleEndian { - *v = *(*uint32)(unsafe.Pointer(&data[bufOffset])) + *(*uint64)(unsafe.Pointer(&data[bufOffset])) = **(**uint64)(fieldPtr) } else { - *v = binary.LittleEndian.Uint32(data[bufOffset:]) + binary.LittleEndian.PutUint64(data[bufOffset:], **(**uint64)(fieldPtr)) } - *(**uint32)(fieldPtr) = v - case NotnullInt64PtrDispatchId: - v := new(int64) + case PrimitiveFloat32DispatchId: if isLittleEndian { - *v = *(*int64)(unsafe.Pointer(&data[bufOffset])) + *(*float32)(unsafe.Pointer(&data[bufOffset])) = *(*float32)(fieldPtr) } else { - *v = int64(binary.LittleEndian.Uint64(data[bufOffset:])) + binary.LittleEndian.PutUint32(data[bufOffset:], math.Float32bits(*(*float32)(fieldPtr))) } - *(**int64)(fieldPtr) = v - case NotnullUint64PtrDispatchId: - v := new(uint64) + case NotnullFloat32PtrDispatchId: if isLittleEndian { - *v = *(*uint64)(unsafe.Pointer(&data[bufOffset])) + *(*float32)(unsafe.Pointer(&data[bufOffset])) = **(**float32)(fieldPtr) } else { - *v = binary.LittleEndian.Uint64(data[bufOffset:]) + binary.LittleEndian.PutUint32(data[bufOffset:], math.Float32bits(**(**float32)(fieldPtr))) } - *(**uint64)(fieldPtr) = v - case NotnullFloat32PtrDispatchId: - v := new(float32) + case PrimitiveFloat64DispatchId: if isLittleEndian { - *v = *(*float32)(unsafe.Pointer(&data[bufOffset])) + *(*float64)(unsafe.Pointer(&data[bufOffset])) = *(*float64)(fieldPtr) } else { - *v = math.Float32frombits(binary.LittleEndian.Uint32(data[bufOffset:])) + binary.LittleEndian.PutUint64(data[bufOffset:], math.Float64bits(*(*float64)(fieldPtr))) } - *(**float32)(fieldPtr) = v case NotnullFloat64PtrDispatchId: - v := new(float64) if isLittleEndian { - *v = *(*float64)(unsafe.Pointer(&data[bufOffset])) + *(*float64)(unsafe.Pointer(&data[bufOffset])) = **(**float64)(fieldPtr) } else { - *v = math.Float64frombits(binary.LittleEndian.Uint64(data[bufOffset:])) + binary.LittleEndian.PutUint64(data[bufOffset:], math.Float64bits(**(**float64)(fieldPtr))) } - *(**float64)(fieldPtr) = v } } - // Update reader index ONCE after all fixed fields - buf.SetReaderIndex(baseOffset + s.fixedSize) - } - - // Phase 2: Varint primitives (must read sequentially - variable length) - // Note: For tagged int64/uint64, we can't use unsafe reads because they need bounds checking - if len(s.varintFields) > 0 { - err := ctx.Err() - for _, field := range s.varintFields { - fieldPtr := unsafe.Add(ptr, field.Offset) + // Update writer index ONCE after all fixed fields + buf.SetWriterIndex(baseOffset + s.fieldGroup.FixedSize) + } else if len(s.fieldGroup.FixedFields) > 0 { + // Fallback to reflect-based access for unaddressable values + for _, field := range s.fieldGroup.FixedFields { + fieldValue := value.Field(field.FieldIndex) switch field.DispatchId { - case PrimitiveVarint32DispatchId: - *(*int32)(fieldPtr) = buf.ReadVarint32(err) - case PrimitiveVarint64DispatchId: - *(*int64)(fieldPtr) = buf.ReadVarint64(err) - case PrimitiveIntDispatchId: - *(*int)(fieldPtr) = int(buf.ReadVarint64(err)) - case PrimitiveVarUint32DispatchId: - *(*uint32)(fieldPtr) = buf.ReadVaruint32(err) - case PrimitiveVarUint64DispatchId: - *(*uint64)(fieldPtr) = buf.ReadVaruint64(err) - case PrimitiveUintDispatchId: - *(*uint)(fieldPtr) = uint(buf.ReadVaruint64(err)) - case PrimitiveTaggedInt64DispatchId: - // Tagged INT64: use buffer's tagged decoding (4 bytes for small, 9 for large) - *(*int64)(fieldPtr) = buf.ReadTaggedInt64(err) - case PrimitiveTaggedUint64DispatchId: - // Tagged UINT64: use buffer's tagged decoding (4 bytes for small, 9 for large) - *(*uint64)(fieldPtr) = buf.ReadTaggedUint64(err) - // Notnull pointer types - allocate and set pointer + // Primitive types (non-pointer) + case PrimitiveBoolDispatchId: + buf.WriteBool(fieldValue.Bool()) + case PrimitiveInt8DispatchId: + buf.WriteByte_(byte(fieldValue.Int())) + case PrimitiveUint8DispatchId: + buf.WriteByte_(byte(fieldValue.Uint())) + case PrimitiveInt16DispatchId: + buf.WriteInt16(int16(fieldValue.Int())) + case PrimitiveUint16DispatchId: + buf.WriteInt16(int16(fieldValue.Uint())) + case PrimitiveInt32DispatchId: + buf.WriteInt32(int32(fieldValue.Int())) + case PrimitiveUint32DispatchId: + buf.WriteInt32(int32(fieldValue.Uint())) + case PrimitiveInt64DispatchId: + buf.WriteInt64(fieldValue.Int()) + case PrimitiveUint64DispatchId: + buf.WriteInt64(int64(fieldValue.Uint())) + case PrimitiveFloat32DispatchId: + buf.WriteFloat32(float32(fieldValue.Float())) + case PrimitiveFloat64DispatchId: + buf.WriteFloat64(fieldValue.Float()) + // Notnull pointer types - dereference and write + case NotnullBoolPtrDispatchId: + buf.WriteBool(fieldValue.Elem().Bool()) + case NotnullInt8PtrDispatchId: + buf.WriteByte_(byte(fieldValue.Elem().Int())) + case NotnullUint8PtrDispatchId: + buf.WriteByte_(byte(fieldValue.Elem().Uint())) + case NotnullInt16PtrDispatchId: + buf.WriteInt16(int16(fieldValue.Elem().Int())) + case NotnullUint16PtrDispatchId: + buf.WriteInt16(int16(fieldValue.Elem().Uint())) + case NotnullInt32PtrDispatchId: + buf.WriteInt32(int32(fieldValue.Elem().Int())) + case NotnullUint32PtrDispatchId: + buf.WriteInt32(int32(fieldValue.Elem().Uint())) + case NotnullInt64PtrDispatchId: + buf.WriteInt64(fieldValue.Elem().Int()) + case NotnullUint64PtrDispatchId: + buf.WriteInt64(int64(fieldValue.Elem().Uint())) + case NotnullFloat32PtrDispatchId: + buf.WriteFloat32(float32(fieldValue.Elem().Float())) + case NotnullFloat64PtrDispatchId: + buf.WriteFloat64(fieldValue.Elem().Float()) + } + } + } + + // ========================================================================== + // Phase 2: Varint primitives (int32, int64, int, uint32, uint64, uint, tagged int64/uint64) + // - These are variable-length encodings that must be written sequentially + // ========================================================================== + if canUseUnsafe && len(s.fieldGroup.VarintFields) > 0 { + for _, field := range s.fieldGroup.VarintFields { + fieldPtr := unsafe.Add(ptr, field.Offset) + switch field.DispatchId { + case PrimitiveVarint32DispatchId: + buf.WriteVarint32(*(*int32)(fieldPtr)) case NotnullVarint32PtrDispatchId: - v := new(int32) - *v = buf.ReadVarint32(err) - *(**int32)(fieldPtr) = v + buf.WriteVarint32(**(**int32)(fieldPtr)) + case PrimitiveVarint64DispatchId: + buf.WriteVarint64(*(*int64)(fieldPtr)) case NotnullVarint64PtrDispatchId: - v := new(int64) - *v = buf.ReadVarint64(err) - *(**int64)(fieldPtr) = v + buf.WriteVarint64(**(**int64)(fieldPtr)) + case PrimitiveIntDispatchId: + buf.WriteVarint64(int64(*(*int)(fieldPtr))) case NotnullIntPtrDispatchId: - v := new(int) - *v = int(buf.ReadVarint64(err)) - *(**int)(fieldPtr) = v + buf.WriteVarint64(int64(**(**int)(fieldPtr))) + case PrimitiveVarUint32DispatchId: + buf.WriteVaruint32(*(*uint32)(fieldPtr)) case NotnullVarUint32PtrDispatchId: - v := new(uint32) - *v = buf.ReadVaruint32(err) - *(**uint32)(fieldPtr) = v + buf.WriteVaruint32(**(**uint32)(fieldPtr)) + case PrimitiveVarUint64DispatchId: + buf.WriteVaruint64(*(*uint64)(fieldPtr)) case NotnullVarUint64PtrDispatchId: - v := new(uint64) - *v = buf.ReadVaruint64(err) - *(**uint64)(fieldPtr) = v + buf.WriteVaruint64(**(**uint64)(fieldPtr)) + case PrimitiveUintDispatchId: + buf.WriteVaruint64(uint64(*(*uint)(fieldPtr))) case NotnullUintPtrDispatchId: - v := new(uint) - *v = uint(buf.ReadVaruint64(err)) - *(**uint)(fieldPtr) = v + buf.WriteVaruint64(uint64(**(**uint)(fieldPtr))) + case PrimitiveTaggedInt64DispatchId: + buf.WriteTaggedInt64(*(*int64)(fieldPtr)) case NotnullTaggedInt64PtrDispatchId: - v := new(int64) - *v = buf.ReadTaggedInt64(err) - *(**int64)(fieldPtr) = v + buf.WriteTaggedInt64(**(**int64)(fieldPtr)) + case PrimitiveTaggedUint64DispatchId: + buf.WriteTaggedUint64(*(*uint64)(fieldPtr)) case NotnullTaggedUint64PtrDispatchId: - v := new(uint64) - *v = buf.ReadTaggedUint64(err) - *(**uint64)(fieldPtr) = v + buf.WriteTaggedUint64(**(**uint64)(fieldPtr)) + } + } + } else if len(s.fieldGroup.VarintFields) > 0 { + // Slow path for non-addressable values: use reflection + for _, field := range s.fieldGroup.VarintFields { + fieldValue := value.Field(field.FieldIndex) + switch field.DispatchId { + // Primitive types (non-pointer) + case PrimitiveVarint32DispatchId: + buf.WriteVarint32(int32(fieldValue.Int())) + case PrimitiveVarint64DispatchId: + buf.WriteVarint64(fieldValue.Int()) + case PrimitiveIntDispatchId: + buf.WriteVarint64(fieldValue.Int()) + case PrimitiveVarUint32DispatchId: + buf.WriteVaruint32(uint32(fieldValue.Uint())) + case PrimitiveVarUint64DispatchId: + buf.WriteVaruint64(fieldValue.Uint()) + case PrimitiveUintDispatchId: + buf.WriteVaruint64(fieldValue.Uint()) + case PrimitiveTaggedInt64DispatchId: + buf.WriteTaggedInt64(fieldValue.Int()) + case PrimitiveTaggedUint64DispatchId: + buf.WriteTaggedUint64(fieldValue.Uint()) + // Notnull pointer types - dereference and write + case NotnullVarint32PtrDispatchId: + buf.WriteVarint32(int32(fieldValue.Elem().Int())) + case NotnullVarint64PtrDispatchId: + buf.WriteVarint64(fieldValue.Elem().Int()) + case NotnullIntPtrDispatchId: + buf.WriteVarint64(fieldValue.Elem().Int()) + case NotnullVarUint32PtrDispatchId: + buf.WriteVaruint32(uint32(fieldValue.Elem().Uint())) + case NotnullVarUint64PtrDispatchId: + buf.WriteVaruint64(fieldValue.Elem().Uint()) + case NotnullUintPtrDispatchId: + buf.WriteVaruint64(fieldValue.Elem().Uint()) + case NotnullTaggedInt64PtrDispatchId: + buf.WriteTaggedInt64(fieldValue.Elem().Int()) + case NotnullTaggedUint64PtrDispatchId: + buf.WriteTaggedUint64(fieldValue.Elem().Uint()) } } } + // ========================================================================== // Phase 3: Remaining fields (strings, slices, maps, structs, enums) - // No intermediate error checks - trade error path performance for normal path - for _, field := range s.remainingFields { - s.readRemainingField(ctx, ptr, field, value) + // - These require per-field handling (ref flags, type info, serializers) + // - No intermediate error checks - trade error path performance for normal path + // ========================================================================== + for i := range s.fieldGroup.RemainingFields { + s.writeRemainingField(ctx, ptr, &s.fieldGroup.RemainingFields[i], value) } } -// readRemainingField reads a non-primitive field (string, slice, map, struct, enum) -func (s *structSerializer) readRemainingField(ctx *ReadContext, ptr unsafe.Pointer, field *FieldInfo, value reflect.Value) { +// writeRemainingField writes a non-primitive field (string, slice, map, struct, enum) +func (s *structSerializer) writeRemainingField(ctx *WriteContext, ptr unsafe.Pointer, field *FieldInfo, value reflect.Value) { buf := ctx.Buffer() - ctxErr := ctx.Err() - - if DebugOutputEnabled() { - fmt.Printf("[fory-debug] readRemainingField: field=%s dispatchId=%d pos=%d ptr=%v\n", - field.Name, field.DispatchId, buf.ReaderIndex(), ptr != nil) - } - // Fast path dispatch using pre-computed DispatchId // ptr must be valid (addressable value) if ptr != nil { fieldPtr := unsafe.Add(ptr, field.Offset) switch field.DispatchId { case StringDispatchId: + // Check isPtr first for better branch prediction + if !field.IsPtr { + // Non-pointer string: always non-null, no ref tracking needed in fast path + if field.RefMode == RefModeNone { + ctx.WriteString(*(*string)(fieldPtr)) + } else { + // RefModeNullOnly or RefModeTracking: write NotNull flag then string + buf.WriteInt8(NotNullValueFlag) + ctx.WriteString(*(*string)(fieldPtr)) + } + return + } + // Pointer to string: can be nil, may need ref tracking if field.RefMode == RefModeTracking { break // Fall through to slow path for ref tracking } - // Check if local field is a pointer type (schema evolution: remote non-nullable, local nullable) - localIsPtr := field.Type.Kind() == reflect.Ptr - // Only read null flag if RefMode requires it (nullable field) - if field.RefMode == RefModeNullOnly { - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { - if localIsPtr { - // Leave as nil - } else { - *(*string)(fieldPtr) = "" - } - return + strPtr := *(**string)(fieldPtr) + if strPtr == nil { + if field.RefMode == RefModeNullOnly { + buf.WriteInt8(NullFlag) + } else { + // RefModeNone: write empty string for nil pointer + ctx.WriteString("") } + return } - str := ctx.ReadString() - if localIsPtr { - // Allocate new string and store pointer - sp := new(string) - *sp = str - *(**string)(fieldPtr) = sp - } else { - *(*string)(fieldPtr) = str + // Non-nil pointer + if field.RefMode == RefModeNullOnly { + buf.WriteInt8(NotNullValueFlag) } + ctx.WriteString(*strPtr) return case EnumDispatchId: // Enums don't track refs - always use fast path - fieldValue := value.Field(field.FieldIndex) - readEnumField(ctx, field, fieldValue) + writeEnumField(ctx, field, value.Field(field.FieldIndex)) return case StringSliceDispatchId: if field.RefMode == RefModeTracking { break } - *(*[]string)(fieldPtr) = ctx.ReadStringSlice(field.RefMode, false) + ctx.WriteStringSlice(*(*[]string)(fieldPtr), field.RefMode, false, true) return case BoolSliceDispatchId: if field.RefMode == RefModeTracking { break } - *(*[]bool)(fieldPtr) = ctx.ReadBoolSlice(field.RefMode, false) + ctx.WriteBoolSlice(*(*[]bool)(fieldPtr), field.RefMode, false) return case Int8SliceDispatchId: if field.RefMode == RefModeTracking { break } - *(*[]int8)(fieldPtr) = ctx.ReadInt8Slice(field.RefMode, false) + ctx.WriteInt8Slice(*(*[]int8)(fieldPtr), field.RefMode, false) return case ByteSliceDispatchId: if field.RefMode == RefModeTracking { break } - *(*[]byte)(fieldPtr) = ctx.ReadByteSlice(field.RefMode, false) + ctx.WriteByteSlice(*(*[]byte)(fieldPtr), field.RefMode, false) return case Int16SliceDispatchId: if field.RefMode == RefModeTracking { break } - *(*[]int16)(fieldPtr) = ctx.ReadInt16Slice(field.RefMode, false) + ctx.WriteInt16Slice(*(*[]int16)(fieldPtr), field.RefMode, false) return case Int32SliceDispatchId: if field.RefMode == RefModeTracking { break } - *(*[]int32)(fieldPtr) = ctx.ReadInt32Slice(field.RefMode, false) + ctx.WriteInt32Slice(*(*[]int32)(fieldPtr), field.RefMode, false) return case Int64SliceDispatchId: if field.RefMode == RefModeTracking { break } - *(*[]int64)(fieldPtr) = ctx.ReadInt64Slice(field.RefMode, false) + ctx.WriteInt64Slice(*(*[]int64)(fieldPtr), field.RefMode, false) return case IntSliceDispatchId: if field.RefMode == RefModeTracking { break } - *(*[]int)(fieldPtr) = ctx.ReadIntSlice(field.RefMode, false) + ctx.WriteIntSlice(*(*[]int)(fieldPtr), field.RefMode, false) return case UintSliceDispatchId: if field.RefMode == RefModeTracking { break } - *(*[]uint)(fieldPtr) = ctx.ReadUintSlice(field.RefMode, false) + ctx.WriteUintSlice(*(*[]uint)(fieldPtr), field.RefMode, false) return case Float32SliceDispatchId: if field.RefMode == RefModeTracking { break } - *(*[]float32)(fieldPtr) = ctx.ReadFloat32Slice(field.RefMode, false) + ctx.WriteFloat32Slice(*(*[]float32)(fieldPtr), field.RefMode, false) return case Float64SliceDispatchId: if field.RefMode == RefModeTracking { break } - *(*[]float64)(fieldPtr) = ctx.ReadFloat64Slice(field.RefMode, false) + ctx.WriteFloat64Slice(*(*[]float64)(fieldPtr), field.RefMode, false) return case StringStringMapDispatchId: if field.RefMode == RefModeTracking { break } - *(*map[string]string)(fieldPtr) = ctx.ReadStringStringMap(field.RefMode, false) + ctx.WriteStringStringMap(*(*map[string]string)(fieldPtr), field.RefMode, false) return case StringInt64MapDispatchId: if field.RefMode == RefModeTracking { break } - *(*map[string]int64)(fieldPtr) = ctx.ReadStringInt64Map(field.RefMode, false) + ctx.WriteStringInt64Map(*(*map[string]int64)(fieldPtr), field.RefMode, false) return case StringInt32MapDispatchId: if field.RefMode == RefModeTracking { break } - *(*map[string]int32)(fieldPtr) = ctx.ReadStringInt32Map(field.RefMode, false) + ctx.WriteStringInt32Map(*(*map[string]int32)(fieldPtr), field.RefMode, false) return case StringIntMapDispatchId: if field.RefMode == RefModeTracking { break } - *(*map[string]int)(fieldPtr) = ctx.ReadStringIntMap(field.RefMode, false) + ctx.WriteStringIntMap(*(*map[string]int)(fieldPtr), field.RefMode, false) return case StringFloat64MapDispatchId: if field.RefMode == RefModeTracking { break } - *(*map[string]float64)(fieldPtr) = ctx.ReadStringFloat64Map(field.RefMode, false) + ctx.WriteStringFloat64Map(*(*map[string]float64)(fieldPtr), field.RefMode, false) return case StringBoolMapDispatchId: // NOTE: map[string]bool is used to represent SETs in Go xlang mode. - // We CANNOT use the fast path here because it reads MAP format, - // but the data is actually in SET format. Fall through to slow path - // which uses setSerializer to correctly read the SET format. + // We CANNOT use the fast path here because it writes MAP format, + // but the data should be written in SET format. Fall through to slow path + // which uses setSerializer to correctly write the SET format. break case IntIntMapDispatchId: if field.RefMode == RefModeTracking { break } - *(*map[int]int)(fieldPtr) = ctx.ReadIntIntMap(field.RefMode, false) + ctx.WriteIntIntMap(*(*map[int]int)(fieldPtr), field.RefMode, false) return case NullableTaggedInt64DispatchId: - // Nullable tagged INT64: read ref flag, then tagged encoding - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { - // Leave pointer as nil + // Nullable tagged INT64: write ref flag, then tagged encoding + ptr := *(**int64)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) return } - // Allocate new int64 and store pointer - v := new(int64) - *v = buf.ReadTaggedInt64(ctxErr) - *(**int64)(fieldPtr) = v + buf.WriteInt8(NotNullValueFlag) + buf.WriteTaggedInt64(*ptr) return case NullableTaggedUint64DispatchId: - // Nullable tagged UINT64: read ref flag, then tagged encoding - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { - // Leave pointer as nil + // Nullable tagged UINT64: write ref flag, then tagged encoding + ptr := *(**uint64)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) return } - // Allocate new uint64 and store pointer - v := new(uint64) - *v = buf.ReadTaggedUint64(ctxErr) - *(**uint64)(fieldPtr) = v + buf.WriteInt8(NotNullValueFlag) + buf.WriteTaggedUint64(*ptr) return // Nullable fixed-size types case NullableBoolDispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + ptr := *(**bool)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) return } - v := new(bool) - *v = buf.ReadBool(ctxErr) - *(**bool)(fieldPtr) = v + buf.WriteInt8(NotNullValueFlag) + buf.WriteBool(*ptr) return case NullableInt8DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + ptr := *(**int8)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) return } - v := new(int8) - *v = buf.ReadInt8(ctxErr) - *(**int8)(fieldPtr) = v + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt8(*ptr) return case NullableUint8DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if DebugOutputEnabled() { - fmt.Printf("[fory-debug] readRemainingField: NullableUint8 refFlag=%d\n", refFlag) - } - if refFlag == NullFlag { + ptr := *(**uint8)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) return } - v := new(uint8) - *v = buf.ReadUint8(ctxErr) - if DebugOutputEnabled() { - fmt.Printf("[fory-debug] readRemainingField: NullableUint8 value=%d\n", *v) - } - *(**uint8)(fieldPtr) = v + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint8(*ptr) return case NullableInt16DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + ptr := *(**int16)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) return } - v := new(int16) - *v = buf.ReadInt16(ctxErr) - *(**int16)(fieldPtr) = v + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt16(*ptr) return case NullableUint16DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + ptr := *(**uint16)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) return } - v := new(uint16) - *v = buf.ReadUint16(ctxErr) - *(**uint16)(fieldPtr) = v + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint16(*ptr) return case NullableInt32DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + ptr := *(**int32)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) return } - v := new(int32) - *v = buf.ReadInt32(ctxErr) - *(**int32)(fieldPtr) = v + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt32(*ptr) return case NullableUint32DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + ptr := *(**uint32)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) return } - v := new(uint32) - *v = buf.ReadUint32(ctxErr) - *(**uint32)(fieldPtr) = v + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint32(*ptr) return case NullableInt64DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + ptr := *(**int64)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) return } - v := new(int64) - *v = buf.ReadInt64(ctxErr) - *(**int64)(fieldPtr) = v + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt64(*ptr) return case NullableUint64DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + ptr := *(**uint64)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) return } - v := new(uint64) - *v = buf.ReadUint64(ctxErr) - *(**uint64)(fieldPtr) = v + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint64(*ptr) return case NullableFloat32DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + ptr := *(**float32)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) return } - v := new(float32) - *v = buf.ReadFloat32(ctxErr) - *(**float32)(fieldPtr) = v + buf.WriteInt8(NotNullValueFlag) + buf.WriteFloat32(*ptr) return case NullableFloat64DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + ptr := *(**float64)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) return } - v := new(float64) - *v = buf.ReadFloat64(ctxErr) - *(**float64)(fieldPtr) = v + buf.WriteInt8(NotNullValueFlag) + buf.WriteFloat64(*ptr) return // Nullable varint types case NullableVarint32DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + ptr := *(**int32)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) return } - v := new(int32) - *v = buf.ReadVarint32(ctxErr) - *(**int32)(fieldPtr) = v + buf.WriteInt8(NotNullValueFlag) + buf.WriteVarint32(*ptr) return case NullableVarUint32DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + ptr := *(**uint32)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) return } - v := new(uint32) - *v = buf.ReadVaruint32(ctxErr) - *(**uint32)(fieldPtr) = v + buf.WriteInt8(NotNullValueFlag) + buf.WriteVaruint32(*ptr) return case NullableVarint64DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + ptr := *(**int64)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) return } - v := new(int64) - *v = buf.ReadVarint64(ctxErr) - *(**int64)(fieldPtr) = v + buf.WriteInt8(NotNullValueFlag) + buf.WriteVarint64(*ptr) return case NullableVarUint64DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + ptr := *(**uint64)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) return } - v := new(uint64) - *v = buf.ReadVaruint64(ctxErr) - *(**uint64)(fieldPtr) = v + buf.WriteInt8(NotNullValueFlag) + buf.WriteVaruint64(*ptr) return } } @@ -1760,300 +1546,422 @@ func (s *structSerializer) readRemainingField(ctx *ReadContext, ptr unsafe.Point // Handle nullable types via reflection when ptr is nil (non-addressable) switch field.DispatchId { case NullableTaggedInt64DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) return } - v := reflect.New(fieldValue.Type().Elem()) - v.Elem().SetInt(buf.ReadTaggedInt64(ctxErr)) - fieldValue.Set(v) + buf.WriteInt8(NotNullValueFlag) + buf.WriteTaggedInt64(fieldValue.Elem().Int()) return case NullableTaggedUint64DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) return } - v := reflect.New(fieldValue.Type().Elem()) - v.Elem().SetUint(buf.ReadTaggedUint64(ctxErr)) - fieldValue.Set(v) + buf.WriteInt8(NotNullValueFlag) + buf.WriteTaggedUint64(fieldValue.Elem().Uint()) return case NullableBoolDispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) return } - v := reflect.New(fieldValue.Type().Elem()) - v.Elem().SetBool(buf.ReadBool(ctxErr)) - fieldValue.Set(v) + buf.WriteInt8(NotNullValueFlag) + buf.WriteBool(fieldValue.Elem().Bool()) return case NullableInt8DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) return } - v := reflect.New(fieldValue.Type().Elem()) - v.Elem().SetInt(int64(buf.ReadInt8(ctxErr))) - fieldValue.Set(v) + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt8(int8(fieldValue.Elem().Int())) return case NullableUint8DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) return } - v := reflect.New(fieldValue.Type().Elem()) - v.Elem().SetUint(uint64(buf.ReadUint8(ctxErr))) - fieldValue.Set(v) + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint8(uint8(fieldValue.Elem().Uint())) return case NullableInt16DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) return } - v := reflect.New(fieldValue.Type().Elem()) - v.Elem().SetInt(int64(buf.ReadInt16(ctxErr))) - fieldValue.Set(v) + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt16(int16(fieldValue.Elem().Int())) return case NullableUint16DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) return } - v := reflect.New(fieldValue.Type().Elem()) - v.Elem().SetUint(uint64(buf.ReadUint16(ctxErr))) - fieldValue.Set(v) + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint16(uint16(fieldValue.Elem().Uint())) return case NullableInt32DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) return } - v := reflect.New(fieldValue.Type().Elem()) - v.Elem().SetInt(int64(buf.ReadInt32(ctxErr))) - fieldValue.Set(v) + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt32(int32(fieldValue.Elem().Int())) return case NullableUint32DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) return } - v := reflect.New(fieldValue.Type().Elem()) - v.Elem().SetUint(uint64(buf.ReadUint32(ctxErr))) - fieldValue.Set(v) + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint32(uint32(fieldValue.Elem().Uint())) return case NullableInt64DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) return } - v := reflect.New(fieldValue.Type().Elem()) - v.Elem().SetInt(buf.ReadInt64(ctxErr)) - fieldValue.Set(v) + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt64(fieldValue.Elem().Int()) return case NullableUint64DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) return } - v := reflect.New(fieldValue.Type().Elem()) - v.Elem().SetUint(buf.ReadUint64(ctxErr)) - fieldValue.Set(v) + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint64(fieldValue.Elem().Uint()) return case NullableFloat32DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) return } - v := reflect.New(fieldValue.Type().Elem()) - v.Elem().SetFloat(float64(buf.ReadFloat32(ctxErr))) - fieldValue.Set(v) + buf.WriteInt8(NotNullValueFlag) + buf.WriteFloat32(float32(fieldValue.Elem().Float())) return case NullableFloat64DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) return } - v := reflect.New(fieldValue.Type().Elem()) - v.Elem().SetFloat(buf.ReadFloat64(ctxErr)) - fieldValue.Set(v) + buf.WriteInt8(NotNullValueFlag) + buf.WriteFloat64(fieldValue.Elem().Float()) return case NullableVarint32DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) return } - v := reflect.New(fieldValue.Type().Elem()) - v.Elem().SetInt(int64(buf.ReadVarint32(ctxErr))) - fieldValue.Set(v) + buf.WriteInt8(NotNullValueFlag) + buf.WriteVarint32(int32(fieldValue.Elem().Int())) return case NullableVarUint32DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) return } - v := reflect.New(fieldValue.Type().Elem()) - v.Elem().SetUint(uint64(buf.ReadVaruint32(ctxErr))) - fieldValue.Set(v) + buf.WriteInt8(NotNullValueFlag) + buf.WriteVaruint32(uint32(fieldValue.Elem().Uint())) return case NullableVarint64DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) return } - v := reflect.New(fieldValue.Type().Elem()) - v.Elem().SetInt(buf.ReadVarint64(ctxErr)) - fieldValue.Set(v) + buf.WriteInt8(NotNullValueFlag) + buf.WriteVarint64(fieldValue.Elem().Int()) return case NullableVarUint64DispatchId: - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) return } - v := reflect.New(fieldValue.Type().Elem()) - v.Elem().SetUint(buf.ReadVaruint64(ctxErr)) - fieldValue.Set(v) + buf.WriteInt8(NotNullValueFlag) + buf.WriteVaruint64(fieldValue.Elem().Uint()) return } // Fall back to serializer for other types if field.Serializer != nil { - field.Serializer.Read(ctx, field.RefMode, field.WriteType, field.HasGenerics, fieldValue) + field.Serializer.Write(ctx, field.RefMode, field.WriteType, field.HasGenerics, fieldValue) } else { - ctx.ReadValue(fieldValue, RefModeTracking, true) + ctx.WriteValue(fieldValue, RefModeTracking, true) } } -// readFieldsInOrder reads fields in the order they appear in s.fields (TypeDef order) -// This is used in compatible mode where Java writes fields in TypeDef order -func (s *structSerializer) readFieldsInOrder(ctx *ReadContext, value reflect.Value) { +func (s *structSerializer) Read(ctx *ReadContext, refMode RefMode, readType bool, hasGenerics bool, value reflect.Value) { buf := ctx.Buffer() - canUseUnsafe := value.CanAddr() - var ptr unsafe.Pointer - if canUseUnsafe { - ptr = unsafe.Pointer(value.UnsafeAddr()) - } - err := ctx.Err() - - if DebugOutputEnabled() { - fmt.Printf("[fory-debug] readFieldsInOrder: starting at pos=%d, field count=%d\n", buf.ReaderIndex(), len(s.fields)) - for i, f := range s.fields { - fmt.Printf("[fory-debug] readFieldsInOrder: field[%d]=%s dispatchId=%d referencable=%v\n", i, f.Name, f.DispatchId, f.Referencable) + ctxErr := ctx.Err() + switch refMode { + case RefModeTracking: + refID, refErr := ctx.RefResolver().TryPreserveRefId(buf) + if refErr != nil { + ctx.SetError(FromError(refErr)) + return } - } - - for _, field := range s.fields { - startPos := buf.ReaderIndex() - if field.FieldIndex < 0 { - s.skipField(ctx, field) - if ctx.HasError() { - return - } - if DebugOutputEnabled() { - fmt.Printf("[fory-debug] readFieldsInOrder: skipped field=%s pos=%d->%d\n", field.Name, startPos, buf.ReaderIndex()) + if refID < int32(NotNullValueFlag) { + // Reference found + obj := ctx.RefResolver().GetReadObject(refID) + if obj.IsValid() { + value.Set(obj) } - continue + return } - - // Fast path for fixed-size primitive types (no ref flag from remote schema) - if canUseUnsafe && isFixedSizePrimitive(field.DispatchId, field.Referencable) { - fieldPtr := unsafe.Add(ptr, field.Offset) - switch field.DispatchId { - // PrimitiveXxxDispatchId: local field is non-pointer type + case RefModeNullOnly: + flag := buf.ReadInt8(ctxErr) + if flag == NullFlag { + return + } + } + if readType { + // Read type info - in compatible mode this returns the serializer with remote fieldDefs + typeID := buf.ReadVaruint32Small7(ctxErr) + internalTypeID := TypeId(typeID & 0xFF) + // Check if this is a struct type that needs type meta reading + if IsNamespacedType(TypeId(typeID)) || internalTypeID == COMPATIBLE_STRUCT || internalTypeID == STRUCT { + // For struct types in compatible mode, use the serializer from TypeInfo + typeInfo := ctx.TypeResolver().readTypeInfoWithTypeID(buf, typeID, ctxErr) + // Use the serializer from TypeInfo which has the remote field definitions + if structSer, ok := typeInfo.Serializer.(*structSerializer); ok && len(structSer.fieldDefs) > 0 { + structSer.ReadData(ctx, value.Type(), value) + return + } + } + } + s.ReadData(ctx, value.Type(), value) +} + +func (s *structSerializer) ReadWithTypeInfo(ctx *ReadContext, refMode RefMode, typeInfo *TypeInfo, value reflect.Value) { + // typeInfo is already read, don't read it again + s.Read(ctx, refMode, false, false, value) +} + +func (s *structSerializer) ReadData(ctx *ReadContext, type_ reflect.Type, value reflect.Value) { + // Early error check - skip all intermediate checks for normal path performance + if ctx.HasError() { + return + } + + // Lazy initialization + if !s.initialized { + if err := s.initialize(ctx.TypeResolver()); err != nil { + ctx.SetError(FromError(err)) + return + } + } + + buf := ctx.Buffer() + if value.Kind() == reflect.Ptr { + if value.IsNil() { + value.Set(reflect.New(type_.Elem())) + } + value = value.Elem() + type_ = type_.Elem() + } + + // In compatible mode with meta share, struct hash is not written + if !ctx.Compatible() { + err := ctx.Err() + structHash := buf.ReadInt32(err) + if structHash != s.structHash { + ctx.SetError(HashMismatchError(structHash, s.structHash, s.type_.String())) + return + } + } + + // Fail fast if value is not addressable - we require unsafe pointer access + if !value.CanAddr() { + ctx.SetError(SerializationError("cannot deserialize struct " + s.type_.Name() + " into non-addressable value")) + return + } + + // Use ordered reading when TypeDef differs from local type (schema evolution) + if s.typeDefDiffers { + s.readFieldsInOrder(ctx, value) + return + } + + // ========================================================================== + // Grouped reading for matching types (optimized path) + // - Types match, so all fields exist locally (no FieldIndex < 0 checks) + // - Use UnsafeGet at pre-computed offsets, update reader index once per phase + // ========================================================================== + ptr := unsafe.Pointer(value.UnsafeAddr()) + + // Phase 1: Fixed-size primitives (inline unsafe reads with endian handling) + if s.fieldGroup.FixedSize > 0 { + baseOffset := buf.ReaderIndex() + data := buf.GetData() + + for _, field := range s.fieldGroup.FixedFields { + fieldPtr := unsafe.Add(ptr, field.Offset) + bufOffset := baseOffset + field.WriteOffset + switch field.DispatchId { case PrimitiveBoolDispatchId: - *(*bool)(fieldPtr) = buf.ReadBool(err) + *(*bool)(fieldPtr) = data[bufOffset] != 0 case PrimitiveInt8DispatchId: - *(*int8)(fieldPtr) = buf.ReadInt8(err) + *(*int8)(fieldPtr) = int8(data[bufOffset]) case PrimitiveUint8DispatchId: - *(*uint8)(fieldPtr) = uint8(buf.ReadInt8(err)) + *(*uint8)(fieldPtr) = data[bufOffset] case PrimitiveInt16DispatchId: - *(*int16)(fieldPtr) = buf.ReadInt16(err) + if isLittleEndian { + *(*int16)(fieldPtr) = *(*int16)(unsafe.Pointer(&data[bufOffset])) + } else { + *(*int16)(fieldPtr) = int16(binary.LittleEndian.Uint16(data[bufOffset:])) + } case PrimitiveUint16DispatchId: - *(*uint16)(fieldPtr) = buf.ReadUint16(err) + if isLittleEndian { + *(*uint16)(fieldPtr) = *(*uint16)(unsafe.Pointer(&data[bufOffset])) + } else { + *(*uint16)(fieldPtr) = binary.LittleEndian.Uint16(data[bufOffset:]) + } case PrimitiveInt32DispatchId: - *(*int32)(fieldPtr) = buf.ReadInt32(err) + if isLittleEndian { + *(*int32)(fieldPtr) = *(*int32)(unsafe.Pointer(&data[bufOffset])) + } else { + *(*int32)(fieldPtr) = int32(binary.LittleEndian.Uint32(data[bufOffset:])) + } case PrimitiveUint32DispatchId: - *(*uint32)(fieldPtr) = buf.ReadUint32(err) + if isLittleEndian { + *(*uint32)(fieldPtr) = *(*uint32)(unsafe.Pointer(&data[bufOffset])) + } else { + *(*uint32)(fieldPtr) = binary.LittleEndian.Uint32(data[bufOffset:]) + } case PrimitiveInt64DispatchId: - *(*int64)(fieldPtr) = buf.ReadInt64(err) + if isLittleEndian { + *(*int64)(fieldPtr) = *(*int64)(unsafe.Pointer(&data[bufOffset])) + } else { + *(*int64)(fieldPtr) = int64(binary.LittleEndian.Uint64(data[bufOffset:])) + } case PrimitiveUint64DispatchId: - *(*uint64)(fieldPtr) = buf.ReadUint64(err) + if isLittleEndian { + *(*uint64)(fieldPtr) = *(*uint64)(unsafe.Pointer(&data[bufOffset])) + } else { + *(*uint64)(fieldPtr) = binary.LittleEndian.Uint64(data[bufOffset:]) + } case PrimitiveFloat32DispatchId: - *(*float32)(fieldPtr) = buf.ReadFloat32(err) + if isLittleEndian { + *(*float32)(fieldPtr) = *(*float32)(unsafe.Pointer(&data[bufOffset])) + } else { + *(*float32)(fieldPtr) = math.Float32frombits(binary.LittleEndian.Uint32(data[bufOffset:])) + } case PrimitiveFloat64DispatchId: - *(*float64)(fieldPtr) = buf.ReadFloat64(err) - // NotnullXxxPtrDispatchId: local field is *T with nullable=false + if isLittleEndian { + *(*float64)(fieldPtr) = *(*float64)(unsafe.Pointer(&data[bufOffset])) + } else { + *(*float64)(fieldPtr) = math.Float64frombits(binary.LittleEndian.Uint64(data[bufOffset:])) + } + // Notnull pointer types - allocate and set pointer case NotnullBoolPtrDispatchId: v := new(bool) - *v = buf.ReadBool(err) + *v = data[bufOffset] != 0 *(**bool)(fieldPtr) = v case NotnullInt8PtrDispatchId: v := new(int8) - *v = buf.ReadInt8(err) + *v = int8(data[bufOffset]) *(**int8)(fieldPtr) = v case NotnullUint8PtrDispatchId: v := new(uint8) - *v = uint8(buf.ReadInt8(err)) + *v = data[bufOffset] *(**uint8)(fieldPtr) = v case NotnullInt16PtrDispatchId: v := new(int16) - *v = buf.ReadInt16(err) + if isLittleEndian { + *v = *(*int16)(unsafe.Pointer(&data[bufOffset])) + } else { + *v = int16(binary.LittleEndian.Uint16(data[bufOffset:])) + } *(**int16)(fieldPtr) = v case NotnullUint16PtrDispatchId: v := new(uint16) - *v = buf.ReadUint16(err) + if isLittleEndian { + *v = *(*uint16)(unsafe.Pointer(&data[bufOffset])) + } else { + *v = binary.LittleEndian.Uint16(data[bufOffset:]) + } *(**uint16)(fieldPtr) = v case NotnullInt32PtrDispatchId: v := new(int32) - *v = buf.ReadInt32(err) + if isLittleEndian { + *v = *(*int32)(unsafe.Pointer(&data[bufOffset])) + } else { + *v = int32(binary.LittleEndian.Uint32(data[bufOffset:])) + } *(**int32)(fieldPtr) = v case NotnullUint32PtrDispatchId: v := new(uint32) - *v = buf.ReadUint32(err) + if isLittleEndian { + *v = *(*uint32)(unsafe.Pointer(&data[bufOffset])) + } else { + *v = binary.LittleEndian.Uint32(data[bufOffset:]) + } *(**uint32)(fieldPtr) = v case NotnullInt64PtrDispatchId: v := new(int64) - *v = buf.ReadInt64(err) + if isLittleEndian { + *v = *(*int64)(unsafe.Pointer(&data[bufOffset])) + } else { + *v = int64(binary.LittleEndian.Uint64(data[bufOffset:])) + } *(**int64)(fieldPtr) = v case NotnullUint64PtrDispatchId: v := new(uint64) - *v = buf.ReadUint64(err) + if isLittleEndian { + *v = *(*uint64)(unsafe.Pointer(&data[bufOffset])) + } else { + *v = binary.LittleEndian.Uint64(data[bufOffset:]) + } *(**uint64)(fieldPtr) = v case NotnullFloat32PtrDispatchId: v := new(float32) - *v = buf.ReadFloat32(err) + if isLittleEndian { + *v = *(*float32)(unsafe.Pointer(&data[bufOffset])) + } else { + *v = math.Float32frombits(binary.LittleEndian.Uint32(data[bufOffset:])) + } *(**float32)(fieldPtr) = v case NotnullFloat64PtrDispatchId: v := new(float64) - *v = buf.ReadFloat64(err) + if isLittleEndian { + *v = *(*float64)(unsafe.Pointer(&data[bufOffset])) + } else { + *v = math.Float64frombits(binary.LittleEndian.Uint64(data[bufOffset:])) + } *(**float64)(fieldPtr) = v } - if DebugOutputEnabled() { - fmt.Printf("[fory-debug] readFieldsInOrder: fixed field=%s pos=%d->%d\n", field.Name, startPos, buf.ReaderIndex()) - } - continue } + // Update reader index ONCE after all fixed fields + buf.SetReaderIndex(baseOffset + s.fieldGroup.FixedSize) + } - // Fast path for varint primitive types (no ref flag from remote schema) - if canUseUnsafe && isVarintPrimitive(field.DispatchId, field.Referencable) && !fieldHasNonPrimitiveSerializer(field) { + // Phase 2: Varint primitives (must read sequentially - variable length) + // Note: For tagged int64/uint64, we can't use unsafe reads because they need bounds checking + if len(s.fieldGroup.VarintFields) > 0 { + err := ctx.Err() + for _, field := range s.fieldGroup.VarintFields { fieldPtr := unsafe.Add(ptr, field.Offset) switch field.DispatchId { - // PrimitiveXxxDispatchId: local field is non-pointer type case PrimitiveVarint32DispatchId: *(*int32)(fieldPtr) = buf.ReadVarint32(err) case PrimitiveVarint64DispatchId: *(*int64)(fieldPtr) = buf.ReadVarint64(err) + case PrimitiveIntDispatchId: + *(*int)(fieldPtr) = int(buf.ReadVarint64(err)) case PrimitiveVarUint32DispatchId: *(*uint32)(fieldPtr) = buf.ReadVaruint32(err) case PrimitiveVarUint64DispatchId: *(*uint64)(fieldPtr) = buf.ReadVaruint64(err) + case PrimitiveUintDispatchId: + *(*uint)(fieldPtr) = uint(buf.ReadVaruint64(err)) case PrimitiveTaggedInt64DispatchId: + // Tagged INT64: use buffer's tagged decoding (4 bytes for small, 9 for large) *(*int64)(fieldPtr) = buf.ReadTaggedInt64(err) case PrimitiveTaggedUint64DispatchId: + // Tagged UINT64: use buffer's tagged decoding (4 bytes for small, 9 for large) *(*uint64)(fieldPtr) = buf.ReadTaggedUint64(err) - case PrimitiveIntDispatchId: - *(*int)(fieldPtr) = int(buf.ReadVarint64(err)) - case PrimitiveUintDispatchId: - *(*uint)(fieldPtr) = uint(buf.ReadVaruint64(err)) - // NotnullXxxPtrDispatchId: local field is *T with nullable=false + // Notnull pointer types - allocate and set pointer case NotnullVarint32PtrDispatchId: v := new(int32) *v = buf.ReadVarint32(err) @@ -2062,6 +1970,10 @@ func (s *structSerializer) readFieldsInOrder(ctx *ReadContext, value reflect.Val v := new(int64) *v = buf.ReadVarint64(err) *(**int64)(fieldPtr) = v + case NotnullIntPtrDispatchId: + v := new(int) + *v = int(buf.ReadVarint64(err)) + *(**int)(fieldPtr) = v case NotnullVarUint32PtrDispatchId: v := new(uint32) *v = buf.ReadVaruint32(err) @@ -2070,6 +1982,10 @@ func (s *structSerializer) readFieldsInOrder(ctx *ReadContext, value reflect.Val v := new(uint64) *v = buf.ReadVaruint64(err) *(**uint64)(fieldPtr) = v + case NotnullUintPtrDispatchId: + v := new(uint) + *v = uint(buf.ReadVaruint64(err)) + *(**uint)(fieldPtr) = v case NotnullTaggedInt64PtrDispatchId: v := new(int64) *v = buf.ReadTaggedInt64(err) @@ -2078,2011 +1994,773 @@ func (s *structSerializer) readFieldsInOrder(ctx *ReadContext, value reflect.Val v := new(uint64) *v = buf.ReadTaggedUint64(err) *(**uint64)(fieldPtr) = v - case NotnullIntPtrDispatchId: - v := new(int) - *v = int(buf.ReadVarint64(err)) - *(**int)(fieldPtr) = v - case NotnullUintPtrDispatchId: - v := new(uint) - *v = uint(buf.ReadVaruint64(err)) - *(**uint)(fieldPtr) = v - } - if DebugOutputEnabled() { - fmt.Printf("[fory-debug] readFieldsInOrder: varint field=%s pos=%d->%d\n", field.Name, startPos, buf.ReaderIndex()) } - continue } + } - // Get field value for slow paths - fieldValue := value.Field(field.FieldIndex) - - // Slow path for primitives when not addressable - if !canUseUnsafe && isFixedSizePrimitive(field.DispatchId, field.Referencable) { - switch field.DispatchId { - case PrimitiveBoolDispatchId: - fieldValue.SetBool(buf.ReadBool(err)) - case PrimitiveInt8DispatchId: - fieldValue.SetInt(int64(buf.ReadInt8(err))) - case PrimitiveUint8DispatchId: - fieldValue.SetUint(uint64(buf.ReadInt8(err))) - case PrimitiveInt16DispatchId: - fieldValue.SetInt(int64(buf.ReadInt16(err))) - case PrimitiveUint16DispatchId: - fieldValue.SetUint(uint64(buf.ReadUint16(err))) - case PrimitiveInt32DispatchId: - fieldValue.SetInt(int64(buf.ReadInt32(err))) - case PrimitiveUint32DispatchId: - fieldValue.SetUint(uint64(buf.ReadUint32(err))) - case PrimitiveInt64DispatchId: - fieldValue.SetInt(buf.ReadInt64(err)) - case PrimitiveUint64DispatchId: - fieldValue.SetUint(buf.ReadUint64(err)) - case PrimitiveFloat32DispatchId: - fieldValue.SetFloat(float64(buf.ReadFloat32(err))) - case PrimitiveFloat64DispatchId: - fieldValue.SetFloat(buf.ReadFloat64(err)) - } - continue - } + // Phase 3: Remaining fields (strings, slices, maps, structs, enums) + // No intermediate error checks - trade error path performance for normal path + for i := range s.fieldGroup.RemainingFields { + s.readRemainingField(ctx, ptr, &s.fieldGroup.RemainingFields[i], value) + } +} - if !canUseUnsafe && isVarintPrimitive(field.DispatchId, field.Referencable) && !fieldHasNonPrimitiveSerializer(field) { - switch field.DispatchId { - case PrimitiveVarint32DispatchId: - fieldValue.SetInt(int64(buf.ReadVarint32(err))) - case PrimitiveVarint64DispatchId, PrimitiveIntDispatchId: - fieldValue.SetInt(buf.ReadVarint64(err)) - case PrimitiveVarUint32DispatchId: - fieldValue.SetUint(uint64(buf.ReadVaruint32(err))) - case PrimitiveVarUint64DispatchId, PrimitiveUintDispatchId: - fieldValue.SetUint(buf.ReadVaruint64(err)) - case PrimitiveTaggedInt64DispatchId: - fieldValue.SetInt(buf.ReadTaggedInt64(err)) - case PrimitiveTaggedUint64DispatchId: - fieldValue.SetUint(buf.ReadTaggedUint64(err)) - } - continue - } - - // Fast path for nullable fixed-size primitives (read ref flag + fixed bytes) - // These have Referencable=true but use fixed encoding, not varint - if isNullableFixedSizePrimitive(field.DispatchId) { - refFlag := buf.ReadInt8(err) - if refFlag == NullFlag { - // Leave pointer as nil (or zero for non-pointer local types) - if DebugOutputEnabled() { - fmt.Printf("[fory-debug] readFieldsInOrder: nullable fixed field=%s is null pos=%d->%d\n", field.Name, startPos, buf.ReaderIndex()) +// readRemainingField reads a non-primitive field (string, slice, map, struct, enum) +func (s *structSerializer) readRemainingField(ctx *ReadContext, ptr unsafe.Pointer, field *FieldInfo, value reflect.Value) { + buf := ctx.Buffer() + ctxErr := ctx.Err() + // Fast path dispatch using pre-computed DispatchId + // ptr must be valid (addressable value) + if ptr != nil { + fieldPtr := unsafe.Add(ptr, field.Offset) + switch field.DispatchId { + case StringDispatchId: + // Check isPtr first for better branch prediction + if !field.IsPtr { + // Non-pointer string: no ref tracking needed in fast path + if field.RefMode == RefModeNone { + *(*string)(fieldPtr) = ctx.ReadString() + } else { + // RefModeNullOnly or RefModeTracking: read NotNull flag then string + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + *(*string)(fieldPtr) = "" + } else { + *(*string)(fieldPtr) = ctx.ReadString() + } } - continue + return } - // Read fixed-size value based on dispatch ID - // Handle both pointer and non-pointer local field types (schema evolution) - localIsPtr := fieldValue.Kind() == reflect.Ptr - switch field.DispatchId { - case NullableBoolDispatchId: - v := buf.ReadBool(err) - if localIsPtr { - fieldValue.Set(reflect.ValueOf(&v)) - } else { - fieldValue.SetBool(v) - } - case NullableInt8DispatchId: - v := buf.ReadInt8(err) - if localIsPtr { - fieldValue.Set(reflect.ValueOf(&v)) - } else { - fieldValue.SetInt(int64(v)) - } - case NullableUint8DispatchId: - v := uint8(buf.ReadInt8(err)) - if localIsPtr { - fieldValue.Set(reflect.ValueOf(&v)) - } else { - fieldValue.SetUint(uint64(v)) - } - case NullableInt16DispatchId: - v := buf.ReadInt16(err) - if localIsPtr { - fieldValue.Set(reflect.ValueOf(&v)) - } else { - fieldValue.SetInt(int64(v)) - } - case NullableUint16DispatchId: - v := buf.ReadUint16(err) - if localIsPtr { - fieldValue.Set(reflect.ValueOf(&v)) - } else { - fieldValue.SetUint(uint64(v)) - } - case NullableInt32DispatchId: - v := buf.ReadInt32(err) - if localIsPtr { - fieldValue.Set(reflect.ValueOf(&v)) - } else { - fieldValue.SetInt(int64(v)) - } - case NullableUint32DispatchId: - v := buf.ReadUint32(err) - if localIsPtr { - fieldValue.Set(reflect.ValueOf(&v)) - } else { - fieldValue.SetUint(uint64(v)) - } - case NullableInt64DispatchId: - v := buf.ReadInt64(err) - if localIsPtr { - fieldValue.Set(reflect.ValueOf(&v)) - } else { - fieldValue.SetInt(v) - } - case NullableUint64DispatchId: - v := buf.ReadUint64(err) - if localIsPtr { - fieldValue.Set(reflect.ValueOf(&v)) - } else { - fieldValue.SetUint(v) - } - case NullableFloat32DispatchId: - v := buf.ReadFloat32(err) - if localIsPtr { - fieldValue.Set(reflect.ValueOf(&v)) - } else { - fieldValue.SetFloat(float64(v)) - } - case NullableFloat64DispatchId: - v := buf.ReadFloat64(err) - if localIsPtr { - fieldValue.Set(reflect.ValueOf(&v)) - } else { - fieldValue.SetFloat(v) + // Pointer to string: can be nil, may need ref tracking + if field.RefMode == RefModeTracking { + break // Fall through to slow path for ref tracking + } + if field.RefMode == RefModeNullOnly { + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + // Leave as nil + return } } - if DebugOutputEnabled() { - fmt.Printf("[fory-debug] readFieldsInOrder: nullable fixed field=%s pos=%d->%d\n", field.Name, startPos, buf.ReaderIndex()) + // Allocate new string and store pointer + str := ctx.ReadString() + sp := new(string) + *sp = str + *(**string)(fieldPtr) = sp + return + case EnumDispatchId: + // Enums don't track refs - always use fast path + fieldValue := value.Field(field.FieldIndex) + readEnumField(ctx, field, fieldValue) + return + case StringSliceDispatchId: + if field.RefMode == RefModeTracking { + break } - continue - } - - // Fast path for nullable varint primitives (read ref flag + varint) - if isNullableVarintPrimitive(field.DispatchId) { - refFlag := buf.ReadInt8(err) - if refFlag == NullFlag { - // Leave pointer as nil (or zero for non-pointer local types) - if DebugOutputEnabled() { - fmt.Printf("[fory-debug] readFieldsInOrder: nullable varint field=%s is null pos=%d->%d\n", field.Name, startPos, buf.ReaderIndex()) - } - continue + *(*[]string)(fieldPtr) = ctx.ReadStringSlice(field.RefMode, false) + return + case BoolSliceDispatchId: + if field.RefMode == RefModeTracking { + break } - // Read varint value based on dispatch ID - // Handle both pointer and non-pointer local field types (schema evolution) - localIsPtr := fieldValue.Kind() == reflect.Ptr - switch field.DispatchId { - case NullableVarint32DispatchId: - v := buf.ReadVarint32(err) - if localIsPtr { - fieldValue.Set(reflect.ValueOf(&v)) - } else { - fieldValue.SetInt(int64(v)) - } - case NullableVarint64DispatchId: - v := buf.ReadVarint64(err) - if localIsPtr { - fieldValue.Set(reflect.ValueOf(&v)) - } else { - fieldValue.SetInt(v) - } - case NullableVarUint32DispatchId: - v := buf.ReadVaruint32(err) - if localIsPtr { - fieldValue.Set(reflect.ValueOf(&v)) - } else { - fieldValue.SetUint(uint64(v)) - } - case NullableVarUint64DispatchId: - v := buf.ReadVaruint64(err) - if localIsPtr { - fieldValue.Set(reflect.ValueOf(&v)) - } else { - fieldValue.SetUint(v) - } - case NullableTaggedInt64DispatchId: - v := buf.ReadTaggedInt64(err) - if localIsPtr { - fieldValue.Set(reflect.ValueOf(&v)) - } else { - fieldValue.SetInt(v) - } - case NullableTaggedUint64DispatchId: - v := buf.ReadTaggedUint64(err) - if localIsPtr { - fieldValue.Set(reflect.ValueOf(&v)) - } else { - fieldValue.SetUint(v) - } - case NullableIntDispatchId: - v := int(buf.ReadVarint64(err)) - if localIsPtr { - fieldValue.Set(reflect.ValueOf(&v)) - } else { - fieldValue.SetInt(int64(v)) - } - case NullableUintDispatchId: - v := uint(buf.ReadVaruint64(err)) - if localIsPtr { - fieldValue.Set(reflect.ValueOf(&v)) - } else { - fieldValue.SetUint(uint64(v)) - } + *(*[]bool)(fieldPtr) = ctx.ReadBoolSlice(field.RefMode, false) + return + case Int8SliceDispatchId: + if field.RefMode == RefModeTracking { + break } - if DebugOutputEnabled() { - fmt.Printf("[fory-debug] readFieldsInOrder: nullable varint field=%s pos=%d->%d\n", field.Name, startPos, buf.ReaderIndex()) + *(*[]int8)(fieldPtr) = ctx.ReadInt8Slice(field.RefMode, false) + return + case ByteSliceDispatchId: + if field.RefMode == RefModeTracking { + break } - continue - } - - if isEnumField(field) { - readEnumField(ctx, field, fieldValue) - if DebugOutputEnabled() { - fmt.Printf("[fory-debug] readFieldsInOrder: enum field=%s pos=%d->%d\n", field.Name, startPos, buf.ReaderIndex()) + *(*[]byte)(fieldPtr) = ctx.ReadByteSlice(field.RefMode, false) + return + case Int16SliceDispatchId: + if field.RefMode == RefModeTracking { + break } - continue - } - - // Slow path for non-primitives (all need ref flag per xlang spec) - if field.Serializer != nil { - // Use pre-computed RefMode and WriteType from field initialization - field.Serializer.Read(ctx, field.RefMode, field.WriteType, field.HasGenerics, fieldValue) - } else { - ctx.ReadValue(fieldValue, RefModeTracking, true) - } - if DebugOutputEnabled() { - fmt.Printf("[fory-debug] readFieldsInOrder: slow path field=%s pos=%d->%d\n", field.Name, startPos, buf.ReaderIndex()) - } - } -} - -// writeFieldsInOrder writes fields in the order they appear in s.fields (fingerprint order) -// This is used in non-compatible mode where Java writes fields in fingerprint order -func (s *structSerializer) writeFieldsInOrder(ctx *WriteContext, value reflect.Value) { - buf := ctx.Buffer() - canUseUnsafe := value.CanAddr() - var ptr unsafe.Pointer - if canUseUnsafe { - ptr = unsafe.Pointer(value.UnsafeAddr()) - } - - for _, field := range s.fields { - // Fast path for fixed-size primitive types - if canUseUnsafe && isFixedSizePrimitive(field.DispatchId, field.Referencable) { - fieldPtr := unsafe.Add(ptr, field.Offset) - switch field.DispatchId { - case PrimitiveBoolDispatchId: - buf.WriteBool(*(*bool)(fieldPtr)) - case PrimitiveInt8DispatchId: - buf.WriteInt8(*(*int8)(fieldPtr)) - case PrimitiveUint8DispatchId: - buf.WriteUint8(*(*uint8)(fieldPtr)) - case PrimitiveInt16DispatchId: - buf.WriteInt16(*(*int16)(fieldPtr)) - case PrimitiveUint16DispatchId: - buf.WriteUint16(*(*uint16)(fieldPtr)) - case PrimitiveInt32DispatchId: - buf.WriteInt32(*(*int32)(fieldPtr)) - case PrimitiveUint32DispatchId: - buf.WriteUint32(*(*uint32)(fieldPtr)) - case PrimitiveInt64DispatchId: - buf.WriteInt64(*(*int64)(fieldPtr)) - case PrimitiveUint64DispatchId: - buf.WriteUint64(*(*uint64)(fieldPtr)) - case PrimitiveFloat32DispatchId: - buf.WriteFloat32(*(*float32)(fieldPtr)) - case PrimitiveFloat64DispatchId: - buf.WriteFloat64(*(*float64)(fieldPtr)) - // NotnullXxxPtrDispatchId: local field is *T with nullable=false - case NotnullBoolPtrDispatchId: - buf.WriteBool(**(**bool)(fieldPtr)) - case NotnullInt8PtrDispatchId: - buf.WriteInt8(**(**int8)(fieldPtr)) - case NotnullUint8PtrDispatchId: - buf.WriteUint8(**(**uint8)(fieldPtr)) - case NotnullInt16PtrDispatchId: - buf.WriteInt16(**(**int16)(fieldPtr)) - case NotnullUint16PtrDispatchId: - buf.WriteUint16(**(**uint16)(fieldPtr)) - case NotnullInt32PtrDispatchId: - buf.WriteInt32(**(**int32)(fieldPtr)) - case NotnullUint32PtrDispatchId: - buf.WriteUint32(**(**uint32)(fieldPtr)) - case NotnullInt64PtrDispatchId: - buf.WriteInt64(**(**int64)(fieldPtr)) - case NotnullUint64PtrDispatchId: - buf.WriteUint64(**(**uint64)(fieldPtr)) - case NotnullFloat32PtrDispatchId: - buf.WriteFloat32(**(**float32)(fieldPtr)) - case NotnullFloat64PtrDispatchId: - buf.WriteFloat64(**(**float64)(fieldPtr)) + *(*[]int16)(fieldPtr) = ctx.ReadInt16Slice(field.RefMode, false) + return + case Int32SliceDispatchId: + if field.RefMode == RefModeTracking { + break } - continue - } - - // Fast path for varint primitive types - if canUseUnsafe && isVarintPrimitive(field.DispatchId, field.Referencable) && !fieldHasNonPrimitiveSerializer(field) { - fieldPtr := unsafe.Add(ptr, field.Offset) - switch field.DispatchId { - case PrimitiveVarint32DispatchId: - buf.WriteVarint32(*(*int32)(fieldPtr)) - case PrimitiveVarUint32DispatchId: - buf.WriteVaruint32(*(*uint32)(fieldPtr)) - case PrimitiveVarint64DispatchId: - buf.WriteVarint64(*(*int64)(fieldPtr)) - case PrimitiveVarUint64DispatchId: - buf.WriteVaruint64(*(*uint64)(fieldPtr)) - case PrimitiveTaggedInt64DispatchId: - buf.WriteTaggedInt64(*(*int64)(fieldPtr)) - case PrimitiveTaggedUint64DispatchId: - buf.WriteTaggedUint64(*(*uint64)(fieldPtr)) - case PrimitiveIntDispatchId: - buf.WriteVarint64(int64(*(*int)(fieldPtr))) - case PrimitiveUintDispatchId: - buf.WriteVaruint64(uint64(*(*uint)(fieldPtr))) - // NotnullXxxPtrDispatchId: local field is *T with nullable=false - case NotnullVarint32PtrDispatchId: - buf.WriteVarint32(**(**int32)(fieldPtr)) - case NotnullVarUint32PtrDispatchId: - buf.WriteVaruint32(**(**uint32)(fieldPtr)) - case NotnullVarint64PtrDispatchId: - buf.WriteVarint64(**(**int64)(fieldPtr)) - case NotnullVarUint64PtrDispatchId: - buf.WriteVaruint64(**(**uint64)(fieldPtr)) - case NotnullTaggedInt64PtrDispatchId: - buf.WriteTaggedInt64(**(**int64)(fieldPtr)) - case NotnullTaggedUint64PtrDispatchId: - buf.WriteTaggedUint64(**(**uint64)(fieldPtr)) - case NotnullIntPtrDispatchId: - buf.WriteVarint64(int64(**(**int)(fieldPtr))) - case NotnullUintPtrDispatchId: - buf.WriteVaruint64(uint64(**(**uint)(fieldPtr))) + *(*[]int32)(fieldPtr) = ctx.ReadInt32Slice(field.RefMode, false) + return + case Int64SliceDispatchId: + if field.RefMode == RefModeTracking { + break } - continue - } - - // Fast path for nullable fixed-size primitives (write ref flag + fixed bytes) - if canUseUnsafe && isNullableFixedSizePrimitive(field.DispatchId) { - fieldPtr := unsafe.Add(ptr, field.Offset) - // Get the pointer value and check for nil - switch field.DispatchId { - case NullableBoolDispatchId: - p := *(**bool)(fieldPtr) - if p == nil { - buf.WriteInt8(NullFlag) - } else { - buf.WriteInt8(NotNullValueFlag) - buf.WriteBool(*p) - } - case NullableInt8DispatchId: - p := *(**int8)(fieldPtr) - if p == nil { - buf.WriteInt8(NullFlag) - } else { - buf.WriteInt8(NotNullValueFlag) - buf.WriteInt8(*p) - } - case NullableUint8DispatchId: - p := *(**uint8)(fieldPtr) - if p == nil { - buf.WriteInt8(NullFlag) - } else { - buf.WriteInt8(NotNullValueFlag) - buf.WriteUint8(*p) - } - case NullableInt16DispatchId: - p := *(**int16)(fieldPtr) - if p == nil { - buf.WriteInt8(NullFlag) - } else { - buf.WriteInt8(NotNullValueFlag) - buf.WriteInt16(*p) - } - case NullableUint16DispatchId: - p := *(**uint16)(fieldPtr) - if p == nil { - buf.WriteInt8(NullFlag) - } else { - buf.WriteInt8(NotNullValueFlag) - buf.WriteUint16(*p) - } - case NullableInt32DispatchId: - p := *(**int32)(fieldPtr) - if p == nil { - buf.WriteInt8(NullFlag) - } else { - buf.WriteInt8(NotNullValueFlag) - buf.WriteInt32(*p) - } - case NullableUint32DispatchId: - p := *(**uint32)(fieldPtr) - if p == nil { - buf.WriteInt8(NullFlag) - } else { - buf.WriteInt8(NotNullValueFlag) - buf.WriteUint32(*p) - } - case NullableInt64DispatchId: - p := *(**int64)(fieldPtr) - if p == nil { - buf.WriteInt8(NullFlag) - } else { - buf.WriteInt8(NotNullValueFlag) - buf.WriteInt64(*p) - } - case NullableUint64DispatchId: - p := *(**uint64)(fieldPtr) - if p == nil { - buf.WriteInt8(NullFlag) - } else { - buf.WriteInt8(NotNullValueFlag) - buf.WriteUint64(*p) - } - case NullableFloat32DispatchId: - p := *(**float32)(fieldPtr) - if p == nil { - buf.WriteInt8(NullFlag) - } else { - buf.WriteInt8(NotNullValueFlag) - buf.WriteFloat32(*p) - } - case NullableFloat64DispatchId: - p := *(**float64)(fieldPtr) - if p == nil { - buf.WriteInt8(NullFlag) - } else { - buf.WriteInt8(NotNullValueFlag) - buf.WriteFloat64(*p) - } + *(*[]int64)(fieldPtr) = ctx.ReadInt64Slice(field.RefMode, false) + return + case IntSliceDispatchId: + if field.RefMode == RefModeTracking { + break } - continue - } - - // Fast path for nullable varint primitives (write ref flag + varint) - if canUseUnsafe && isNullableVarintPrimitive(field.DispatchId) { - fieldPtr := unsafe.Add(ptr, field.Offset) - switch field.DispatchId { - case NullableVarint32DispatchId: - p := *(**int32)(fieldPtr) - if p == nil { - buf.WriteInt8(NullFlag) - } else { - buf.WriteInt8(NotNullValueFlag) - buf.WriteVarint32(*p) - } - case NullableVarint64DispatchId: - p := *(**int64)(fieldPtr) - if p == nil { - buf.WriteInt8(NullFlag) - } else { - buf.WriteInt8(NotNullValueFlag) - buf.WriteVarint64(*p) - } - case NullableVarUint32DispatchId: - p := *(**uint32)(fieldPtr) - if p == nil { - buf.WriteInt8(NullFlag) - } else { - buf.WriteInt8(NotNullValueFlag) - buf.WriteVaruint32(*p) - } - case NullableVarUint64DispatchId: - p := *(**uint64)(fieldPtr) - if p == nil { - buf.WriteInt8(NullFlag) - } else { - buf.WriteInt8(NotNullValueFlag) - buf.WriteVaruint64(*p) - } - case NullableTaggedInt64DispatchId: - p := *(**int64)(fieldPtr) - if p == nil { - buf.WriteInt8(NullFlag) - } else { - buf.WriteInt8(NotNullValueFlag) - buf.WriteTaggedInt64(*p) - } - case NullableTaggedUint64DispatchId: - p := *(**uint64)(fieldPtr) - if p == nil { - buf.WriteInt8(NullFlag) - } else { - buf.WriteInt8(NotNullValueFlag) - buf.WriteTaggedUint64(*p) - } - case NullableIntDispatchId: - p := *(**int)(fieldPtr) - if p == nil { - buf.WriteInt8(NullFlag) - } else { - buf.WriteInt8(NotNullValueFlag) - buf.WriteVarint64(int64(*p)) - } - case NullableUintDispatchId: - p := *(**uint)(fieldPtr) - if p == nil { - buf.WriteInt8(NullFlag) - } else { - buf.WriteInt8(NotNullValueFlag) - buf.WriteVaruint64(uint64(*p)) - } + *(*[]int)(fieldPtr) = ctx.ReadIntSlice(field.RefMode, false) + return + case UintSliceDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*[]uint)(fieldPtr) = ctx.ReadUintSlice(field.RefMode, false) + return + case Float32SliceDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*[]float32)(fieldPtr) = ctx.ReadFloat32Slice(field.RefMode, false) + return + case Float64SliceDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*[]float64)(fieldPtr) = ctx.ReadFloat64Slice(field.RefMode, false) + return + case StringStringMapDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*map[string]string)(fieldPtr) = ctx.ReadStringStringMap(field.RefMode, false) + return + case StringInt64MapDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*map[string]int64)(fieldPtr) = ctx.ReadStringInt64Map(field.RefMode, false) + return + case StringInt32MapDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*map[string]int32)(fieldPtr) = ctx.ReadStringInt32Map(field.RefMode, false) + return + case StringIntMapDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*map[string]int)(fieldPtr) = ctx.ReadStringIntMap(field.RefMode, false) + return + case StringFloat64MapDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*map[string]float64)(fieldPtr) = ctx.ReadStringFloat64Map(field.RefMode, false) + return + case StringBoolMapDispatchId: + // NOTE: map[string]bool is used to represent SETs in Go xlang mode. + // We CANNOT use the fast path here because it reads MAP format, + // but the data is actually in SET format. Fall through to slow path + // which uses setSerializer to correctly read the SET format. + break + case IntIntMapDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*map[int]int)(fieldPtr) = ctx.ReadIntIntMap(field.RefMode, false) + return + case NullableTaggedInt64DispatchId: + // Nullable tagged INT64: read ref flag, then tagged encoding + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + // Leave pointer as nil + return + } + // Allocate new int64 and store pointer + v := new(int64) + *v = buf.ReadTaggedInt64(ctxErr) + *(**int64)(fieldPtr) = v + return + case NullableTaggedUint64DispatchId: + // Nullable tagged UINT64: read ref flag, then tagged encoding + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + // Leave pointer as nil + return + } + // Allocate new uint64 and store pointer + v := new(uint64) + *v = buf.ReadTaggedUint64(ctxErr) + *(**uint64)(fieldPtr) = v + return + // Nullable fixed-size types + case NullableBoolDispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return } - continue - } - - // Handle remaining field types (strings, slices, maps, structs, nullable primitives) - s.writeRemainingField(ctx, ptr, field, value) - } -} - -// skipField skips a field that doesn't exist or is incompatible -// Uses context error state for deferred error checking. -func (s *structSerializer) skipField(ctx *ReadContext, field *FieldInfo) { - if field.FieldDef.name != "" { - fieldDefIsStructType := isStructFieldType(field.FieldDef.fieldType) - // Use FieldDef's trackingRef and nullable to determine if ref flag was written by Java - // Java writes ref flag based on its FieldDef, not Go's field type - readRefFlag := field.FieldDef.trackingRef || field.FieldDef.nullable - SkipFieldValueWithTypeFlag(ctx, field.FieldDef, readRefFlag, ctx.Compatible() && fieldDefIsStructType) - return - } - // No FieldDef available, read into temp value - tempValue := reflect.New(field.Type).Elem() - if field.Serializer != nil { - readType := ctx.Compatible() && isStructField(field.Type) - refMode := RefModeNone - if field.Referencable { - refMode = RefModeTracking - } - field.Serializer.Read(ctx, refMode, readType, false, tempValue) - } else { - ctx.ReadValue(tempValue, RefModeTracking, true) - } -} - -func (s *structSerializer) ReadWithTypeInfo(ctx *ReadContext, refMode RefMode, typeInfo *TypeInfo, value reflect.Value) { - // typeInfo is already read, don't read it again - s.Read(ctx, refMode, false, false, value) -} - -// initFieldsFromContext initializes fields using context's type resolver (for WriteContext) -// initFieldsFromTypeResolver initializes fields from local struct type using TypeResolver -func (s *structSerializer) initFieldsFromTypeResolver(typeResolver *TypeResolver) error { - // If we have fieldDefs from type_def (remote meta), use them - if len(s.fieldDefs) > 0 { - return s.initFieldsFromDefsWithResolver(typeResolver) - } - - // Otherwise initialize from local struct type - type_ := s.type_ - var fields []*FieldInfo - var fieldNames []string - var serializers []Serializer - var typeIds []TypeId - var nullables []bool - var tagIDs []int - - for i := 0; i < type_.NumField(); i++ { - field := type_.Field(i) - firstRune, _ := utf8.DecodeRuneInString(field.Name) - if unicode.IsLower(firstRune) { - continue // skip unexported fields - } - - // Parse fory struct tag and check for ignore - foryTag := ParseForyTag(field) - if foryTag.Ignore { - continue // skip ignored fields - } - - fieldType := field.Type - - var fieldSerializer Serializer - // For interface{} fields, don't get a serializer - use WriteValue/ReadValue instead - // which will handle polymorphic types dynamically - if fieldType.Kind() != reflect.Interface { - // Get serializer for all non-interface field types - fieldSerializer, _ = typeResolver.getSerializerByType(fieldType, true) - } - - // Use TypeResolver helper methods for arrays and slices - if fieldType.Kind() == reflect.Array && fieldType.Elem().Kind() != reflect.Interface { - fieldSerializer, _ = typeResolver.GetArraySerializer(fieldType) - } else if fieldType.Kind() == reflect.Slice && fieldType.Elem().Kind() != reflect.Interface { - fieldSerializer, _ = typeResolver.GetSliceSerializer(fieldType) - } else if fieldType.Kind() == reflect.Slice && fieldType.Elem().Kind() == reflect.Interface { - // For struct fields with interface element types, use sliceDynSerializer - fieldSerializer = mustNewSliceDynSerializer(fieldType.Elem()) - } - - // Get TypeId for the serializer, fallback to deriving from kind - fieldTypeId := typeResolver.getTypeIdByType(fieldType) - if fieldTypeId == 0 { - fieldTypeId = typeIdFromKind(fieldType) - } - - // Override TypeId based on compress/encoding tags for integer types - // This matches the logic in type_def.go:buildFieldDefs - baseKind := fieldType.Kind() - if baseKind == reflect.Ptr { - baseKind = fieldType.Elem().Kind() - } - switch baseKind { - case reflect.Uint32: - if foryTag.CompressSet { - if foryTag.Compress { - fieldTypeId = VAR_UINT32 - } else { - fieldTypeId = UINT32 - } + v := new(bool) + *v = buf.ReadBool(ctxErr) + *(**bool)(fieldPtr) = v + return + case NullableInt8DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return } - case reflect.Int32: - if foryTag.CompressSet { - if foryTag.Compress { - fieldTypeId = VARINT32 - } else { - fieldTypeId = INT32 - } + v := new(int8) + *v = buf.ReadInt8(ctxErr) + *(**int8)(fieldPtr) = v + return + case NullableUint8DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return } - case reflect.Uint64: - if foryTag.EncodingSet { - switch foryTag.Encoding { - case "fixed": - fieldTypeId = UINT64 - case "varint": - fieldTypeId = VAR_UINT64 - case "tagged": - fieldTypeId = TAGGED_UINT64 - } + v := new(uint8) + *v = buf.ReadUint8(ctxErr) + *(**uint8)(fieldPtr) = v + return + case NullableInt16DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return } - case reflect.Int64: - if foryTag.EncodingSet { - switch foryTag.Encoding { - case "fixed": - fieldTypeId = INT64 - case "varint": - fieldTypeId = VARINT64 - case "tagged": - fieldTypeId = TAGGED_INT64 - } + v := new(int16) + *v = buf.ReadInt16(ctxErr) + *(**int16)(fieldPtr) = v + return + case NullableUint16DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return } - } - - // Calculate nullable flag for serialization (wire format): - // - In xlang mode: Per xlang spec, fields are NON-NULLABLE by default. - // Only pointer types are nullable by default. - // - In native mode: Go's natural semantics apply - slice/map/interface can be nil, - // so they are nullable by default. - // Can be overridden by explicit fory tag `fory:"nullable"`. - internalId := TypeId(fieldTypeId & 0xFF) - isEnum := internalId == ENUM || internalId == NAMED_ENUM - - // Determine nullable based on mode - // In xlang mode: only pointer types are nullable by default (per xlang spec) - // In native mode: Go's natural semantics - all nil-able types are nullable - // This ensures proper interoperability with Java/other languages in xlang mode. - var nullableFlag bool - if typeResolver.fory.config.IsXlang { - // xlang mode: only pointer types are nullable by default per xlang spec - // Slices and maps are NOT nullable - they serialize as empty when nil - nullableFlag = fieldType.Kind() == reflect.Ptr - } else { - // Native mode: Go's natural semantics - all nil-able types are nullable - nullableFlag = fieldType.Kind() == reflect.Ptr || - fieldType.Kind() == reflect.Slice || - fieldType.Kind() == reflect.Map || - fieldType.Kind() == reflect.Interface - } - if foryTag.NullableSet { - // Override nullable flag if explicitly set in fory tag - nullableFlag = foryTag.Nullable - } - // Primitives are never nullable, regardless of tag - if isNonNullablePrimitiveKind(fieldType.Kind()) && !isEnum { - nullableFlag = false - } - - // Calculate ref tracking - use tag override if explicitly set - trackRef := typeResolver.TrackRef() - if foryTag.RefSet { - trackRef = foryTag.Ref - } - - // Pre-compute RefMode based on (possibly overridden) trackRef and nullable - // For pointer-to-struct fields, enable ref tracking when trackRef is enabled, - // regardless of nullable flag. This is necessary to detect circular references. - refMode := RefModeNone - isStructPointer := fieldType.Kind() == reflect.Ptr && fieldType.Elem().Kind() == reflect.Struct - if trackRef && (nullableFlag || isStructPointer) { - refMode = RefModeTracking - } else if nullableFlag { - refMode = RefModeNullOnly - } - // Pre-compute WriteType: true for struct fields in compatible mode - writeType := typeResolver.Compatible() && isStructField(fieldType) - - // Pre-compute DispatchId, with special handling for enum fields and pointer-to-numeric - var staticId DispatchId - if fieldType.Kind() == reflect.Ptr && isNumericKind(fieldType.Elem().Kind()) { - if nullableFlag { - staticId = GetDispatchIdFromTypeId(fieldTypeId, true) - } else { - staticId = GetNotnullPtrDispatchId(fieldType.Elem().Kind(), foryTag.Encoding) + v := new(uint16) + *v = buf.ReadUint16(ctxErr) + *(**uint16)(fieldPtr) = v + return + case NullableInt32DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return } - } else { - staticId = GetDispatchIdFromTypeId(fieldTypeId, nullableFlag) - if staticId == UnknownDispatchId { - staticId = GetDispatchId(fieldType) + v := new(int32) + *v = buf.ReadInt32(ctxErr) + *(**int32)(fieldPtr) = v + return + case NullableUint32DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return } - } - if fieldSerializer != nil { - if _, ok := fieldSerializer.(*enumSerializer); ok { - staticId = EnumDispatchId - } else if ptrSer, ok := fieldSerializer.(*ptrToValueSerializer); ok { - if _, ok := ptrSer.valueSerializer.(*enumSerializer); ok { - staticId = EnumDispatchId - } + v := new(uint32) + *v = buf.ReadUint32(ctxErr) + *(**uint32)(fieldPtr) = v + return + case NullableInt64DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(int64) + *v = buf.ReadInt64(ctxErr) + *(**int64)(fieldPtr) = v + return + case NullableUint64DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return } - } - if DebugOutputEnabled() { - fmt.Printf("[fory-debug] initFieldsFromTypeResolver: field=%s type=%v staticId=%d refMode=%v nullableFlag=%v serializer=%T\n", - SnakeCase(field.Name), fieldType, staticId, refMode, nullableFlag, fieldSerializer) - } - - fieldInfo := &FieldInfo{ - Name: SnakeCase(field.Name), - Offset: field.Offset, - Type: fieldType, - DispatchId: staticId, - TypeId: fieldTypeId, - Serializer: fieldSerializer, - Referencable: nullableFlag, // Use same logic as TypeDef's nullable flag for consistent ref handling - FieldIndex: i, - RefMode: refMode, - WriteType: writeType, - HasGenerics: isCollectionType(fieldTypeId), // Container fields have declared element types - TagID: foryTag.ID, - HasForyTag: foryTag.HasTag, - TagRefSet: foryTag.RefSet, - TagRef: foryTag.Ref, - TagNullableSet: foryTag.NullableSet, - TagNullable: foryTag.Nullable, - } - fields = append(fields, fieldInfo) - fieldNames = append(fieldNames, fieldInfo.Name) - serializers = append(serializers, fieldSerializer) - typeIds = append(typeIds, fieldTypeId) - nullables = append(nullables, nullableFlag) - tagIDs = append(tagIDs, foryTag.ID) - } - - // Sort fields according to specification using nullable info and tag IDs for consistent ordering - serializers, fieldNames = sortFields(typeResolver, fieldNames, serializers, typeIds, nullables, tagIDs) - order := make(map[string]int, len(fieldNames)) - for idx, name := range fieldNames { - order[name] = idx - } - - sort.SliceStable(fields, func(i, j int) bool { - oi, okI := order[fields[i].Name] - oj, okJ := order[fields[j].Name] - switch { - case okI && okJ: - return oi < oj - case okI: - return true - case okJ: - return false - default: - return false - } - }) - - s.fields = fields - s.groupFields() - - // Debug output for field order comparison with Java - if DebugOutputEnabled() && s.type_ != nil { - fmt.Printf("[Go] ========== Local sorted fields for %s ==========\n", s.type_.Name()) - fmt.Printf("[Go] Go sorted fixedFields (%d):\n", len(s.fixedFields)) - for i, f := range s.fixedFields { - fmt.Printf("[Go] [%d] %s -> dispatchId=%d, typeId=%d, size=%d, referencable=%v\n", i, f.Name, f.DispatchId, f.TypeId, f.FixedSize, f.Referencable) - } - fmt.Printf("[Go] Go sorted varintFields (%d):\n", len(s.varintFields)) - for i, f := range s.varintFields { - fmt.Printf("[Go] [%d] %s -> dispatchId=%d, typeId=%d, referencable=%v\n", i, f.Name, f.DispatchId, f.TypeId, f.Referencable) - } - fmt.Printf("[Go] Go sorted remainingFields (%d):\n", len(s.remainingFields)) - for i, f := range s.remainingFields { - fmt.Printf("[Go] [%d] %s -> dispatchId=%d, typeId=%d, referencable=%v\n", i, f.Name, f.DispatchId, f.TypeId, f.Referencable) - } - fmt.Printf("[Go] ===========================================\n") - } - - return nil -} - -// groupFields categorizes fields into fixedFields, varintFields, and remainingFields. -// Also computes pre-computed sizes and WriteOffset for batch buffer reservation. -// Fields are sorted within each group to match Java's wire format order. -func (s *structSerializer) groupFields() { - s.fixedFields = nil - s.varintFields = nil - s.remainingFields = nil - s.fixedSize = 0 - s.maxVarintSize = 0 - - for _, field := range s.fields { - if isFixedSizePrimitive(field.DispatchId, field.Referencable) { - // Non-nullable fixed-size primitives only - field.FixedSize = getFixedSizeByDispatchId(field.DispatchId) - s.fixedFields = append(s.fixedFields, field) - } else if isVarintPrimitive(field.DispatchId, field.Referencable) { - // Non-nullable varint primitives only - s.varintFields = append(s.varintFields, field) - } else { - // All other fields including nullable primitives - s.remainingFields = append(s.remainingFields, field) - } - } - - // Sort fixedFields: size desc, typeId desc, name asc - sort.SliceStable(s.fixedFields, func(i, j int) bool { - fi, fj := s.fixedFields[i], s.fixedFields[j] - if fi.FixedSize != fj.FixedSize { - return fi.FixedSize > fj.FixedSize // size descending - } - if fi.TypeId != fj.TypeId { - return fi.TypeId > fj.TypeId // typeId descending - } - return fi.Name < fj.Name // name ascending - }) - - // Recompute WriteOffset after sorting - for _, field := range s.fixedFields { - field.WriteOffset = s.fixedSize - s.fixedSize += field.FixedSize - } - - // Sort varintFields: underlying type size desc, typeId desc, name asc - // Note: Java uses primitive type size (8 for long, 4 for int), not encoding max size - sort.SliceStable(s.varintFields, func(i, j int) bool { - fi, fj := s.varintFields[i], s.varintFields[j] - sizeI := getUnderlyingTypeSize(fi.DispatchId) - sizeJ := getUnderlyingTypeSize(fj.DispatchId) - if sizeI != sizeJ { - return sizeI > sizeJ // size descending - } - if fi.TypeId != fj.TypeId { - return fi.TypeId > fj.TypeId // typeId descending - } - return fi.Name < fj.Name // name ascending - }) - - // Recompute maxVarintSize - for _, field := range s.varintFields { - s.maxVarintSize += getVarintMaxSizeByDispatchId(field.DispatchId) - } - - // Sort remainingFields: nullable primitives first (by primitiveComparator), - // then other internal types (typeId, name), then lists, sets, maps, other (by name) - // This sorting is ALWAYS applied - same algorithm for both local and remote types - sort.SliceStable(s.remainingFields, func(i, j int) bool { - fi, fj := s.remainingFields[i], s.remainingFields[j] - catI, catJ := getFieldCategory(fi), getFieldCategory(fj) - if catI != catJ { - return catI < catJ - } - // Within nullable primitives category, use primitiveComparator logic - if catI == 0 { - return comparePrimitiveFields(fi, fj) - } - // Within other internal types category, sort by typeId then name - if catI == 1 { - if fi.TypeId != fj.TypeId { - return fi.TypeId < fj.TypeId + v := new(uint64) + *v = buf.ReadUint64(ctxErr) + *(**uint64)(fieldPtr) = v + return + case NullableFloat32DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return } - return fi.Name < fj.Name - } - // List, set, map, and other categories: sort by name only - return fi.Name < fj.Name - }) -} - -// getFieldCategory returns the category for sorting remainingFields: -// 0: nullable primitives (sorted by primitiveComparator) -// 1: internal types STRING, BINARY, LIST, SET, MAP (sorted by typeId, then name) -// 2: struct, enum, and all other types (sorted by name only) -func getFieldCategory(field *FieldInfo) int { - if isNullableFixedSizePrimitive(field.DispatchId) || isNullableVarintPrimitive(field.DispatchId) { - return 0 - } - internalId := field.TypeId & 0xFF - switch TypeId(internalId) { - case STRING, BINARY, LIST, SET, MAP: - // Internal types: sorted by typeId, then name - return 1 - default: - // struct, enum, and all other types: sorted by name - return 2 - } -} - -// comparePrimitiveFields compares two nullable primitive fields using Java's primitiveComparator logic: -// fixed before varint, then underlying type size desc, typeId desc, name asc -func comparePrimitiveFields(fi, fj *FieldInfo) bool { - iFixed := isNullableFixedSizePrimitive(fi.DispatchId) - jFixed := isNullableFixedSizePrimitive(fj.DispatchId) - if iFixed != jFixed { - return iFixed // fixed before varint - } - // Same category: compare by underlying type size desc, typeId desc, name asc - // Note: Java uses primitive type size (8, 4, 2, 1), not encoding size - sizeI := getUnderlyingTypeSize(fi.DispatchId) - sizeJ := getUnderlyingTypeSize(fj.DispatchId) - if sizeI != sizeJ { - return sizeI > sizeJ // size descending - } - if fi.TypeId != fj.TypeId { - return fi.TypeId > fj.TypeId // typeId descending - } - return fi.Name < fj.Name // name ascending -} - -// getNullableFixedSize returns the fixed size for nullable fixed primitives -func getNullableFixedSize(dispatchId DispatchId) int { - switch dispatchId { - case NullableBoolDispatchId, NullableInt8DispatchId, NullableUint8DispatchId: - return 1 - case NullableInt16DispatchId, NullableUint16DispatchId: - return 2 - case NullableInt32DispatchId, NullableUint32DispatchId, NullableFloat32DispatchId: - return 4 - case NullableInt64DispatchId, NullableUint64DispatchId, NullableFloat64DispatchId: - return 8 - default: - return 0 - } -} - -// getNullableVarintMaxSize returns the max size for nullable varint primitives -func getNullableVarintMaxSize(dispatchId DispatchId) int { - switch dispatchId { - case NullableVarint32DispatchId, NullableVarUint32DispatchId: - return 5 - case NullableVarint64DispatchId, NullableVarUint64DispatchId, NullableIntDispatchId, NullableUintDispatchId: - return 10 - case NullableTaggedInt64DispatchId, NullableTaggedUint64DispatchId: - return 9 - default: - return 0 - } -} - -// getUnderlyingTypeSize returns the size of the underlying primitive type (8 for 64-bit, 4 for 32-bit, etc.) -// This matches Java's getSizeOfPrimitiveType() which uses the type size, not encoding size -func getUnderlyingTypeSize(dispatchId DispatchId) int { - switch dispatchId { - // 64-bit types - case PrimitiveInt64DispatchId, PrimitiveUint64DispatchId, PrimitiveFloat64DispatchId, - NotnullInt64PtrDispatchId, NotnullUint64PtrDispatchId, NotnullFloat64PtrDispatchId, - PrimitiveVarint64DispatchId, PrimitiveVarUint64DispatchId, - NotnullVarint64PtrDispatchId, NotnullVarUint64PtrDispatchId, - PrimitiveTaggedInt64DispatchId, PrimitiveTaggedUint64DispatchId, - NotnullTaggedInt64PtrDispatchId, NotnullTaggedUint64PtrDispatchId, - PrimitiveIntDispatchId, PrimitiveUintDispatchId, - NotnullIntPtrDispatchId, NotnullUintPtrDispatchId: - return 8 - // 32-bit types - case PrimitiveInt32DispatchId, PrimitiveUint32DispatchId, PrimitiveFloat32DispatchId, - NotnullInt32PtrDispatchId, NotnullUint32PtrDispatchId, NotnullFloat32PtrDispatchId, - PrimitiveVarint32DispatchId, PrimitiveVarUint32DispatchId, - NotnullVarint32PtrDispatchId, NotnullVarUint32PtrDispatchId: - return 4 - // 16-bit types - case PrimitiveInt16DispatchId, PrimitiveUint16DispatchId, - NotnullInt16PtrDispatchId, NotnullUint16PtrDispatchId: - return 2 - // 8-bit types - case PrimitiveBoolDispatchId, PrimitiveInt8DispatchId, PrimitiveUint8DispatchId, - NotnullBoolPtrDispatchId, NotnullInt8PtrDispatchId, NotnullUint8PtrDispatchId: - return 1 - // Nullable types - case NullableInt64DispatchId, NullableUint64DispatchId, NullableFloat64DispatchId, - NullableVarint64DispatchId, NullableVarUint64DispatchId, - NullableTaggedInt64DispatchId, NullableTaggedUint64DispatchId, - NullableIntDispatchId, NullableUintDispatchId: - return 8 - case NullableInt32DispatchId, NullableUint32DispatchId, NullableFloat32DispatchId, - NullableVarint32DispatchId, NullableVarUint32DispatchId: - return 4 - case NullableInt16DispatchId, NullableUint16DispatchId: - return 2 - case NullableBoolDispatchId, NullableInt8DispatchId, NullableUint8DispatchId: - return 1 - default: - return 0 - } -} - -// initFieldsFromDefsWithResolver initializes fields from remote fieldDefs using typeResolver -func (s *structSerializer) initFieldsFromDefsWithResolver(typeResolver *TypeResolver) error { - type_ := s.type_ - if type_ == nil { - // Type is not known - we'll create an interface{} placeholder - // This happens when deserializing unknown types in compatible mode - // For now, we'll create fields that discard all data - var fields []*FieldInfo - for _, def := range s.fieldDefs { - fieldSerializer, _ := getFieldTypeSerializerWithResolver(typeResolver, def.fieldType) - remoteTypeInfo, _ := def.fieldType.getTypeInfoWithResolver(typeResolver) - remoteType := remoteTypeInfo.Type - if remoteType == nil { - remoteType = reflect.TypeOf((*interface{})(nil)).Elem() + v := new(float32) + *v = buf.ReadFloat32(ctxErr) + *(**float32)(fieldPtr) = v + return + case NullableFloat64DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return } - // Get TypeId from FieldType's TypeId method - fieldTypeId := def.fieldType.TypeId() - // Pre-compute RefMode based on trackRef and FieldDef flags - refMode := RefModeNone - if def.trackingRef { - refMode = RefModeTracking - } else if def.nullable { - refMode = RefModeNullOnly + v := new(float64) + *v = buf.ReadFloat64(ctxErr) + *(**float64)(fieldPtr) = v + return + // Nullable varint types + case NullableVarint32DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return } - // Pre-compute WriteType: true for struct fields in compatible mode - writeType := typeResolver.Compatible() && isStructField(remoteType) - - // Pre-compute DispatchId, with special handling for enum fields - staticId := GetDispatchId(remoteType) - if fieldSerializer != nil { - if _, ok := fieldSerializer.(*enumSerializer); ok { - staticId = EnumDispatchId - } else if ptrSer, ok := fieldSerializer.(*ptrToValueSerializer); ok { - if _, ok := ptrSer.valueSerializer.(*enumSerializer); ok { - staticId = EnumDispatchId - } - } + v := new(int32) + *v = buf.ReadVarint32(ctxErr) + *(**int32)(fieldPtr) = v + return + case NullableVarUint32DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return } - - fieldInfo := &FieldInfo{ - Name: def.name, - Offset: 0, - Type: remoteType, - DispatchId: staticId, - TypeId: fieldTypeId, - Serializer: fieldSerializer, - Referencable: def.nullable, // Use remote nullable flag - FieldIndex: -1, // Mark as non-existent field to discard data - FieldDef: def, // Save original FieldDef for skipping - RefMode: refMode, - WriteType: writeType, - HasGenerics: isCollectionType(fieldTypeId), // Container fields have declared element types + v := new(uint32) + *v = buf.ReadVaruint32(ctxErr) + *(**uint32)(fieldPtr) = v + return + case NullableVarint64DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return } - fields = append(fields, fieldInfo) - } - s.fields = fields - s.groupFields() - s.typeDefDiffers = true // Unknown type, must use ordered reading - return nil - } - - // Build maps from field names and tag IDs to struct field indices - fieldNameToIndex := make(map[string]int) - fieldNameToOffset := make(map[string]uintptr) - fieldNameToType := make(map[string]reflect.Type) - fieldTagIDToIndex := make(map[int]int) // tag ID -> struct field index - fieldTagIDToOffset := make(map[int]uintptr) // tag ID -> field offset - fieldTagIDToType := make(map[int]reflect.Type) // tag ID -> field type - fieldTagIDToName := make(map[int]string) // tag ID -> snake_case field name - for i := 0; i < type_.NumField(); i++ { - field := type_.Field(i) - - // Parse fory tag and skip ignored fields - foryTag := ParseForyTag(field) - if foryTag.Ignore { - continue - } - - name := SnakeCase(field.Name) - fieldNameToIndex[name] = i - fieldNameToOffset[name] = field.Offset - fieldNameToType[name] = field.Type - - // Also index by tag ID if present - if foryTag.ID >= 0 { - fieldTagIDToIndex[foryTag.ID] = i - fieldTagIDToOffset[foryTag.ID] = field.Offset - fieldTagIDToType[foryTag.ID] = field.Type - fieldTagIDToName[foryTag.ID] = name - } - } - - var fields []*FieldInfo - - for _, def := range s.fieldDefs { - fieldSerializer, err := getFieldTypeSerializerWithResolver(typeResolver, def.fieldType) - if err != nil || fieldSerializer == nil { - // If we can't get serializer from typeID, try to get it from the Go type - // This can happen when the type isn't registered in typeIDToTypeInfo - remoteTypeInfo, _ := def.fieldType.getTypeInfoWithResolver(typeResolver) - if remoteTypeInfo.Type != nil { - fieldSerializer, _ = typeResolver.getSerializerByType(remoteTypeInfo.Type, true) + v := new(int64) + *v = buf.ReadVarint64(ctxErr) + *(**int64)(fieldPtr) = v + return + case NullableVarUint64DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return } - } - - // Get the remote type from fieldDef - remoteTypeInfo, _ := def.fieldType.getTypeInfoWithResolver(typeResolver) - remoteType := remoteTypeInfo.Type - // Track if type lookup failed - we'll need to skip such fields - // Note: DynamicFieldType.getTypeInfoWithResolver returns interface{} (not nil) when lookup fails - emptyInterfaceType := reflect.TypeOf((*interface{})(nil)).Elem() - typeLookupFailed := remoteType == nil || remoteType == emptyInterfaceType - if remoteType == nil { - remoteType = emptyInterfaceType - } - - // For struct-like fields, even if TypeDef lookup fails, we can try to read - // the field because type resolution happens at read time from the buffer. - // The type name might map to a different local type. - isStructLikeField := isStructFieldType(def.fieldType) + v := new(uint64) + *v = buf.ReadVaruint64(ctxErr) + *(**uint64)(fieldPtr) = v + return + } + } - // Try to find corresponding local field - // First try to match by tag ID (if remote def uses tag ID) - // Then fall back to matching by field name - fieldIndex := -1 - var offset uintptr - var fieldType reflect.Type - var localFieldName string - var localType reflect.Type - var exists bool + // Slow path for RefModeTracking cases that break from the switch above + fieldValue := value.Field(field.FieldIndex) + if field.Serializer != nil { + field.Serializer.Read(ctx, field.RefMode, field.WriteType, field.HasGenerics, fieldValue) + } else { + ctx.ReadValue(fieldValue, RefModeTracking, true) + } +} - if def.tagID >= 0 { - // Try to match by tag ID - if idx, ok := fieldTagIDToIndex[def.tagID]; ok { - exists = true - fieldIndex = idx // Will be overwritten if types are compatible - localType = fieldTagIDToType[def.tagID] - offset = fieldTagIDToOffset[def.tagID] - localFieldName = fieldTagIDToName[def.tagID] - _ = fieldIndex // Use to avoid compiler warning, will be set properly below +// readFieldsInOrder reads fields in the order they appear in s.fields (TypeDef order) +// This is used in compatible mode where Java writes fields in TypeDef order +// Precondition: value.CanAddr() must be true (checked by caller) +func (s *structSerializer) readFieldsInOrder(ctx *ReadContext, value reflect.Value) { + buf := ctx.Buffer() + ptr := unsafe.Pointer(value.UnsafeAddr()) + err := ctx.Err() + for i := range s.fields { + field := &s.fields[i] + if field.FieldIndex < 0 { + s.skipField(ctx, field) + if ctx.HasError() { + return } + continue } - // Fall back to name-based matching if tag ID match failed - if !exists && def.name != "" { - if idx, ok := fieldNameToIndex[def.name]; ok { - exists = true - localType = fieldNameToType[def.name] - offset = fieldNameToOffset[def.name] - localFieldName = def.name - _ = idx // Will be set properly below + // Fast path for fixed-size primitive types (no ref flag from remote schema) + if isFixedSizePrimitive(field.DispatchId, field.Nullable) { + fieldPtr := unsafe.Add(ptr, field.Offset) + switch field.DispatchId { + // PrimitiveXxxDispatchId: local field is non-pointer type + case PrimitiveBoolDispatchId: + *(*bool)(fieldPtr) = buf.ReadBool(err) + case PrimitiveInt8DispatchId: + *(*int8)(fieldPtr) = buf.ReadInt8(err) + case PrimitiveUint8DispatchId: + *(*uint8)(fieldPtr) = uint8(buf.ReadInt8(err)) + case PrimitiveInt16DispatchId: + *(*int16)(fieldPtr) = buf.ReadInt16(err) + case PrimitiveUint16DispatchId: + *(*uint16)(fieldPtr) = buf.ReadUint16(err) + case PrimitiveInt32DispatchId: + *(*int32)(fieldPtr) = buf.ReadInt32(err) + case PrimitiveUint32DispatchId: + *(*uint32)(fieldPtr) = buf.ReadUint32(err) + case PrimitiveInt64DispatchId: + *(*int64)(fieldPtr) = buf.ReadInt64(err) + case PrimitiveUint64DispatchId: + *(*uint64)(fieldPtr) = buf.ReadUint64(err) + case PrimitiveFloat32DispatchId: + *(*float32)(fieldPtr) = buf.ReadFloat32(err) + case PrimitiveFloat64DispatchId: + *(*float64)(fieldPtr) = buf.ReadFloat64(err) + // NotnullXxxPtrDispatchId: local field is *T with nullable=false + case NotnullBoolPtrDispatchId: + v := new(bool) + *v = buf.ReadBool(err) + *(**bool)(fieldPtr) = v + case NotnullInt8PtrDispatchId: + v := new(int8) + *v = buf.ReadInt8(err) + *(**int8)(fieldPtr) = v + case NotnullUint8PtrDispatchId: + v := new(uint8) + *v = uint8(buf.ReadInt8(err)) + *(**uint8)(fieldPtr) = v + case NotnullInt16PtrDispatchId: + v := new(int16) + *v = buf.ReadInt16(err) + *(**int16)(fieldPtr) = v + case NotnullUint16PtrDispatchId: + v := new(uint16) + *v = buf.ReadUint16(err) + *(**uint16)(fieldPtr) = v + case NotnullInt32PtrDispatchId: + v := new(int32) + *v = buf.ReadInt32(err) + *(**int32)(fieldPtr) = v + case NotnullUint32PtrDispatchId: + v := new(uint32) + *v = buf.ReadUint32(err) + *(**uint32)(fieldPtr) = v + case NotnullInt64PtrDispatchId: + v := new(int64) + *v = buf.ReadInt64(err) + *(**int64)(fieldPtr) = v + case NotnullUint64PtrDispatchId: + v := new(uint64) + *v = buf.ReadUint64(err) + *(**uint64)(fieldPtr) = v + case NotnullFloat32PtrDispatchId: + v := new(float32) + *v = buf.ReadFloat32(err) + *(**float32)(fieldPtr) = v + case NotnullFloat64PtrDispatchId: + v := new(float64) + *v = buf.ReadFloat64(err) + *(**float64)(fieldPtr) = v } + continue } - if exists { - idx := fieldNameToIndex[localFieldName] - if def.tagID >= 0 { - idx = fieldTagIDToIndex[def.tagID] + // Fast path for varint primitive types (no ref flag from remote schema) + if isVarintPrimitive(field.DispatchId, field.Nullable) && !fieldHasNonPrimitiveSerializer(field) { + fieldPtr := unsafe.Add(ptr, field.Offset) + switch field.DispatchId { + // PrimitiveXxxDispatchId: local field is non-pointer type + case PrimitiveVarint32DispatchId: + *(*int32)(fieldPtr) = buf.ReadVarint32(err) + case PrimitiveVarint64DispatchId: + *(*int64)(fieldPtr) = buf.ReadVarint64(err) + case PrimitiveVarUint32DispatchId: + *(*uint32)(fieldPtr) = buf.ReadVaruint32(err) + case PrimitiveVarUint64DispatchId: + *(*uint64)(fieldPtr) = buf.ReadVaruint64(err) + case PrimitiveTaggedInt64DispatchId: + *(*int64)(fieldPtr) = buf.ReadTaggedInt64(err) + case PrimitiveTaggedUint64DispatchId: + *(*uint64)(fieldPtr) = buf.ReadTaggedUint64(err) + case PrimitiveIntDispatchId: + *(*int)(fieldPtr) = int(buf.ReadVarint64(err)) + case PrimitiveUintDispatchId: + *(*uint)(fieldPtr) = uint(buf.ReadVaruint64(err)) + // NotnullXxxPtrDispatchId: local field is *T with nullable=false + case NotnullVarint32PtrDispatchId: + v := new(int32) + *v = buf.ReadVarint32(err) + *(**int32)(fieldPtr) = v + case NotnullVarint64PtrDispatchId: + v := new(int64) + *v = buf.ReadVarint64(err) + *(**int64)(fieldPtr) = v + case NotnullVarUint32PtrDispatchId: + v := new(uint32) + *v = buf.ReadVaruint32(err) + *(**uint32)(fieldPtr) = v + case NotnullVarUint64PtrDispatchId: + v := new(uint64) + *v = buf.ReadVaruint64(err) + *(**uint64)(fieldPtr) = v + case NotnullTaggedInt64PtrDispatchId: + v := new(int64) + *v = buf.ReadTaggedInt64(err) + *(**int64)(fieldPtr) = v + case NotnullTaggedUint64PtrDispatchId: + v := new(uint64) + *v = buf.ReadTaggedUint64(err) + *(**uint64)(fieldPtr) = v + case NotnullIntPtrDispatchId: + v := new(int) + *v = int(buf.ReadVarint64(err)) + *(**int)(fieldPtr) = v + case NotnullUintPtrDispatchId: + v := new(uint) + *v = uint(buf.ReadVaruint64(err)) + *(**uint)(fieldPtr) = v } - // Check if types are compatible - // For primitive types: skip if types don't match - // For struct-like types: allow read even if TypeDef lookup failed, - // because runtime type resolution by name might work - shouldRead := false - isPolymorphicField := def.fieldType.TypeId() == UNKNOWN - defTypeId := def.fieldType.TypeId() - // Check if field is an enum - either by type ID or by serializer type - // The type ID may be a composite value with namespace bits, so check the low 8 bits - internalDefTypeId := defTypeId & 0xFF - isEnumField := internalDefTypeId == NAMED_ENUM || internalDefTypeId == ENUM - if !isEnumField && fieldSerializer != nil { - _, isEnumField = fieldSerializer.(*enumSerializer) + continue + } + + // Get field value for nullable primitives and non-primitives + fieldValue := value.Field(field.FieldIndex) + + // Handle nullable fixed-size primitives (read ref flag + fixed bytes) + // These have Nullable=true but use fixed encoding, not varint + if isNullableFixedSizePrimitive(field.DispatchId) { + refFlag := buf.ReadInt8(err) + if refFlag == NullFlag { + // Leave pointer as nil (or zero for non-pointer local types) + continue } - if isPolymorphicField && localType.Kind() == reflect.Interface { - // For polymorphic (UNKNOWN) fields with interface{} local type, - // allow reading - the actual type will be determined at runtime - shouldRead = true - fieldType = localType - } else if typeLookupFailed && isEnumField { - // For enum fields with failed TypeDef lookup (NAMED_ENUM stores by namespace/typename, not typeId), - // check if local field is a numeric type (Go enums are int-based) - // Also handle pointer enum fields (*EnumType) - localKind := localType.Kind() - elemKind := localKind - if localKind == reflect.Ptr { - elemKind = localType.Elem().Kind() - } - if isNumericKind(elemKind) { - shouldRead = true - fieldType = localType - // Get the serializer for the base type (the enum type, not the pointer) - baseType := localType - if localKind == reflect.Ptr { - baseType = localType.Elem() - } - fieldSerializer, _ = typeResolver.getSerializerByType(baseType, true) - } - } else if typeLookupFailed && isStructLikeField { - // For struct fields with failed TypeDef lookup, check if local field can hold a struct - localKind := localType.Kind() - if localKind == reflect.Ptr { - localKind = localType.Elem().Kind() - } - if localKind == reflect.Struct || localKind == reflect.Interface { - shouldRead = true - fieldType = localType // Use local type for struct fields - } - } else if typeLookupFailed && (defTypeId == LIST || defTypeId == SET) { - // For collection fields with failed type lookup (e.g., List with interface element type), - // check if local type is a slice with interface element type (e.g., []Animal) - // The type lookup fails because sliceSerializer doesn't support interface elements - if localType.Kind() == reflect.Slice && localType.Elem().Kind() == reflect.Interface { - shouldRead = true - fieldType = localType + // Read fixed-size value based on dispatch ID + // Handle both pointer and non-pointer local field types (schema evolution) + switch field.DispatchId { + case NullableBoolDispatchId: + v := buf.ReadBool(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetBool(v) } - } else if !typeLookupFailed && typesCompatible(localType, remoteType) { - shouldRead = true - fieldType = localType - } - - if shouldRead { - fieldIndex = idx - // offset was already set above when matching by tag ID or field name - // For struct-like fields with failed type lookup, get the serializer for the local type - if typeLookupFailed && isStructLikeField && fieldSerializer == nil { - fieldSerializer, _ = typeResolver.getSerializerByType(localType, true) + case NullableInt8DispatchId: + v := buf.ReadInt8(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetInt(int64(v)) } - // For collection fields with interface element types, use sliceDynSerializer - if typeLookupFailed && (defTypeId == LIST || defTypeId == SET) && fieldSerializer == nil { - if localType.Kind() == reflect.Slice && localType.Elem().Kind() == reflect.Interface { - fieldSerializer = mustNewSliceDynSerializer(localType.Elem()) - } + case NullableUint8DispatchId: + v := uint8(buf.ReadInt8(err)) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetUint(uint64(v)) } - // If local type is *T and remote type is T, we need the serializer for *T - // This handles Java's Integer/Long (nullable boxed types) mapping to Go's *int32/*int64 - if localType.Kind() == reflect.Ptr && localType.Elem() == remoteType { - fieldSerializer, _ = typeResolver.getSerializerByType(localType, true) + case NullableInt16DispatchId: + v := buf.ReadInt16(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetInt(int64(v)) } - // For pointer enum fields (*EnumType), get the serializer for the base enum type - // The struct read/write code will handle pointer dereferencing - if isEnumField && localType.Kind() == reflect.Ptr { - baseType := localType.Elem() - fieldSerializer, _ = typeResolver.getSerializerByType(baseType, true) - if DebugOutputEnabled() { - fmt.Printf("[fory-debug] pointer enum field %s: localType=%v baseType=%v serializer=%T\n", - def.name, localType, baseType, fieldSerializer) - } + case NullableUint16DispatchId: + v := buf.ReadUint16(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetUint(uint64(v)) } - // For array fields, use array serializers (not slice serializers) even if typeID maps to slice serializer - // The typeID (INT16_ARRAY, etc.) is shared between arrays and slices, but we need the correct - // serializer based on the actual Go type - if localType.Kind() == reflect.Array { - elemType := localType.Elem() - switch elemType.Kind() { - case reflect.Bool: - fieldSerializer = boolArraySerializer{arrayType: localType} - case reflect.Int8: - fieldSerializer = int8ArraySerializer{arrayType: localType} - case reflect.Int16: - fieldSerializer = int16ArraySerializer{arrayType: localType} - case reflect.Int32: - fieldSerializer = int32ArraySerializer{arrayType: localType} - case reflect.Int64: - fieldSerializer = int64ArraySerializer{arrayType: localType} - case reflect.Uint8: - fieldSerializer = uint8ArraySerializer{arrayType: localType} - case reflect.Float32: - fieldSerializer = float32ArraySerializer{arrayType: localType} - case reflect.Float64: - fieldSerializer = float64ArraySerializer{arrayType: localType} - case reflect.Int: - if reflect.TypeOf(int(0)).Size() == 8 { - fieldSerializer = int64ArraySerializer{arrayType: localType} - } else { - fieldSerializer = int32ArraySerializer{arrayType: localType} - } - } + case NullableInt32DispatchId: + v := buf.ReadInt32(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetInt(int64(v)) } - } else { - // Types are incompatible or unknown - use remote type but mark field as not settable - fieldType = remoteType - fieldIndex = -1 - offset = 0 // Don't set offset for incompatible fields - } - } else { - // Field doesn't exist locally, use type from fieldDef - fieldType = remoteType - } - - // Get TypeId from FieldType's TypeId method - fieldTypeId := def.fieldType.TypeId() - // Pre-compute RefMode based on FieldDef flags (trackingRef and nullable) - refMode := RefModeNone - if def.trackingRef { - refMode = RefModeTracking - } else if def.nullable { - refMode = RefModeNullOnly - } - // Pre-compute WriteType: true for struct fields in compatible mode - writeType := typeResolver.Compatible() && isStructField(fieldType) - - // Pre-compute DispatchId, with special handling for pointer-to-numeric and enum fields - // IMPORTANT: For compatible mode reading, we must use the REMOTE nullable flag - // to determine DispatchId, because Java wrote data with its nullable semantics. - var staticId DispatchId - localKind := fieldType.Kind() - localIsPtr := localKind == reflect.Ptr - localIsNumeric := isNumericKind(localKind) || (localIsPtr && isNumericKind(fieldType.Elem().Kind())) - - if localIsNumeric { - if localIsPtr { - if def.nullable { - // Local is *T, remote is nullable - use nullable DispatchId - staticId = GetDispatchIdFromTypeId(fieldTypeId, true) + case NullableUint32DispatchId: + v := buf.ReadUint32(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) } else { - // Local is *T, remote is NOT nullable - use notnull pointer DispatchId - encoding := getEncodingFromTypeId(fieldTypeId) - staticId = GetNotnullPtrDispatchId(fieldType.Elem().Kind(), encoding) + fieldValue.SetUint(uint64(v)) } - } else { - if def.nullable { - // Local is T (non-pointer), remote is nullable - use nullable DispatchId - staticId = GetDispatchIdFromTypeId(fieldTypeId, true) + case NullableInt64DispatchId: + v := buf.ReadInt64(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) } else { - // Local is T, remote is NOT nullable - use primitive DispatchId - staticId = GetDispatchId(fieldType) + fieldValue.SetInt(v) } - } - } else { - staticId = GetDispatchId(fieldType) - } - if fieldSerializer != nil { - if _, ok := fieldSerializer.(*enumSerializer); ok { - staticId = EnumDispatchId - } else if ptrSer, ok := fieldSerializer.(*ptrToValueSerializer); ok { - if _, ok := ptrSer.valueSerializer.(*enumSerializer); ok { - staticId = EnumDispatchId + case NullableUint64DispatchId: + v := buf.ReadUint64(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetUint(v) } - } - } - - // Determine field name: use local field name if matched, otherwise use def.name - fieldName := def.name - if localFieldName != "" { - fieldName = localFieldName - } - - fieldInfo := &FieldInfo{ - Name: fieldName, - Offset: offset, - Type: fieldType, - DispatchId: staticId, - TypeId: fieldTypeId, - Serializer: fieldSerializer, - Referencable: def.nullable, // Use remote nullable flag - FieldIndex: fieldIndex, - FieldDef: def, // Save original FieldDef for skipping - RefMode: refMode, - WriteType: writeType, - HasGenerics: isCollectionType(fieldTypeId), // Container fields have declared element types - TagID: def.tagID, - HasForyTag: def.tagID >= 0, - } - fields = append(fields, fieldInfo) - } - - s.fields = fields - s.groupFields() - - // Debug output for field order comparison with Java MetaSharedSerializer - if DebugOutputEnabled() && s.type_ != nil { - fmt.Printf("[Go] ========== Sorted fields for %s ==========\n", s.type_.Name()) - fmt.Printf("[Go] Remote TypeDef order (%d fields):\n", len(s.fieldDefs)) - for i, def := range s.fieldDefs { - fmt.Printf("[Go] [%d] %s -> typeId=%d, nullable=%v\n", i, def.name, def.fieldType.TypeId(), def.nullable) - } - fmt.Printf("[Go] Go sorted fixedFields (%d):\n", len(s.fixedFields)) - for i, f := range s.fixedFields { - fmt.Printf("[Go] [%d] %s -> dispatchId=%d, typeId=%d, size=%d, referencable=%v\n", i, f.Name, f.DispatchId, f.TypeId, f.FixedSize, f.Referencable) - } - fmt.Printf("[Go] Go sorted varintFields (%d):\n", len(s.varintFields)) - for i, f := range s.varintFields { - fmt.Printf("[Go] [%d] %s -> dispatchId=%d, typeId=%d, referencable=%v\n", i, f.Name, f.DispatchId, f.TypeId, f.Referencable) - } - fmt.Printf("[Go] Go sorted remainingFields (%d):\n", len(s.remainingFields)) - for i, f := range s.remainingFields { - fmt.Printf("[Go] [%d] %s -> dispatchId=%d, typeId=%d, referencable=%v\n", i, f.Name, f.DispatchId, f.TypeId, f.Referencable) - } - fmt.Printf("[Go] ===========================================\n") - } - - // Compute typeDefDiffers: true if any field doesn't exist locally, has type mismatch, - // or has nullable mismatch (which affects field ordering) - // When typeDefDiffers is false, we can use grouped reading for better performance - s.typeDefDiffers = false - for i, field := range fields { - if field.FieldIndex < 0 { - // Field exists in remote TypeDef but not locally - s.typeDefDiffers = true - break - } - // Check if nullable flag differs between remote and local - // Remote nullable is stored in fieldDefs[i].nullable - // Local nullable is determined by whether the Go field is a pointer type - if i < len(s.fieldDefs) && field.FieldIndex >= 0 { - remoteNullable := s.fieldDefs[i].nullable - // Check if local Go field is a pointer type (can be nil = nullable) - localNullable := field.Type.Kind() == reflect.Ptr - if remoteNullable != localNullable { - s.typeDefDiffers = true - break - } - } - } - - if DebugOutputEnabled() && s.type_ != nil { - fmt.Printf("[Go] typeDefDiffers=%v for %s\n", s.typeDefDiffers, s.type_.Name()) - } - - return nil -} - -// isNonNullablePrimitiveKind returns true for Go kinds that map to Java primitive types -// These are the types that cannot be null in Java and should have nullable=0 in hash computation -func isNonNullablePrimitiveKind(kind reflect.Kind) bool { - switch kind { - case reflect.Bool, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, - reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, - reflect.Float32, reflect.Float64, reflect.Int, reflect.Uint: - return true - default: - return false - } -} - -// isInternalTypeWithoutTypeMeta checks if a type is serialized without type meta per xlang spec. -// Per the spec (struct field serialization), these types use format: | ref/null flag | value data | (NO type meta) -// - Nullable primitives (*int32, *float64, etc.): | null flag | field value | -// - Strings (string): | null flag | value data | -// - Binary ([]byte): | null flag | value data | -// - List/Slice: | ref meta | value data | -// - Set: | ref meta | value data | -// - Map: | ref meta | value data | -// Only struct/enum/ext types need type meta: | ref flag | type meta | value data | -func isInternalTypeWithoutTypeMeta(t reflect.Type) bool { - kind := t.Kind() - // String type - no type meta needed - if kind == reflect.String { - return true - } - // Slice (list or byte slice) - no type meta needed - if kind == reflect.Slice { - return true - } - // Map type - no type meta needed - if kind == reflect.Map { - return true - } - // Pointer to primitive - no type meta needed - if kind == reflect.Ptr { - elemKind := t.Elem().Kind() - switch elemKind { - case reflect.Bool, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, - reflect.Int, reflect.Float32, reflect.Float64, reflect.String: - return true - } - } - return false -} - -// isStructField checks if a type is a struct type (directly or via pointer) -func isStructField(t reflect.Type) bool { - if t.Kind() == reflect.Struct { - return true - } - if t.Kind() == reflect.Ptr && t.Elem().Kind() == reflect.Struct { - return true - } - return false -} - -// isStructFieldType checks if a FieldType represents a type that needs type info written -// This is used to determine if type info was written for the field in compatible mode -// In compatible mode, Java writes type info for struct and ext types, but NOT for enum types -// Enum fields only have null flag + ordinal, no type ID -func isStructFieldType(ft FieldType) bool { - if ft == nil { - return false - } - typeId := ft.TypeId() - // Check base type IDs that need type info (struct and ext, NOT enum) - // Always check the internal type ID (low byte) to handle composite type IDs - // which may be negative when stored as int32 (e.g., -2288 = (short)128784) - internalTypeId := TypeId(typeId & 0xFF) - switch internalTypeId { - case STRUCT, NAMED_STRUCT, COMPATIBLE_STRUCT, NAMED_COMPATIBLE_STRUCT, - EXT, NAMED_EXT: - return true - } - return false -} - -// FieldFingerprintInfo contains the information needed to compute a field's fingerprint. -type FieldFingerprintInfo struct { - // FieldID is the tag ID if configured (>= 0), or -1 to use field name - FieldID int - // FieldName is the snake_case field name (used when FieldID < 0) - FieldName string - // TypeID is the Fory type ID for the field - TypeID TypeId - // Ref is true if reference tracking is enabled for this field - Ref bool - // Nullable is true if null flag is written for this field - Nullable bool -} - -// ComputeStructFingerprint computes the fingerprint string for a struct type. -// -// Fingerprint Format: -// -// Each field contributes: ",,,;" -// Fields are sorted by field_id_or_name (lexicographically as strings) -// -// Field Components: -// - field_id_or_name: Tag ID as string if configured (e.g., "0", "1"), otherwise snake_case field name -// - type_id: Fory TypeId as decimal string (e.g., "4" for INT32) -// - ref: "1" if reference tracking enabled, "0" otherwise -// - nullable: "1" if null flag is written, "0" otherwise -// -// Example fingerprints: -// - With tag IDs: "0,4,0,0;1,4,0,1;2,9,0,1;" -// - With field names: "age,4,0,0;name,9,0,1;" -// -// The fingerprint is used to compute a hash for struct schema versioning. -// Different nullable/ref settings will produce different fingerprints, -// ensuring schema compatibility is properly validated. -func ComputeStructFingerprint(fields []FieldFingerprintInfo) string { - // Sort fields by their identifier (field ID or name) - type fieldWithKey struct { - field FieldFingerprintInfo - sortKey string - } - fieldsWithKeys := make([]fieldWithKey, 0, len(fields)) - for _, field := range fields { - var sortKey string - if field.FieldID >= 0 { - sortKey = fmt.Sprintf("%d", field.FieldID) - } else { - sortKey = field.FieldName - } - fieldsWithKeys = append(fieldsWithKeys, fieldWithKey{field: field, sortKey: sortKey}) - } - - sort.Slice(fieldsWithKeys, func(i, j int) bool { - return fieldsWithKeys[i].sortKey < fieldsWithKeys[j].sortKey - }) - - var sb strings.Builder - for _, fw := range fieldsWithKeys { - // Field identifier - sb.WriteString(fw.sortKey) - sb.WriteString(",") - // Type ID - sb.WriteString(fmt.Sprintf("%d", fw.field.TypeID)) - sb.WriteString(",") - // Ref flag - if fw.field.Ref { - sb.WriteString("1") - } else { - sb.WriteString("0") - } - sb.WriteString(",") - // Nullable flag - if fw.field.Nullable { - sb.WriteString("1") - } else { - sb.WriteString("0") - } - sb.WriteString(";") - } - return sb.String() -} - -func (s *structSerializer) computeHash() int32 { - // Build FieldFingerprintInfo for each field - fields := make([]FieldFingerprintInfo, 0, len(s.fields)) - for _, field := range s.fields { - var typeId TypeId - isEnumField := false - if field.Serializer == nil { - typeId = UNKNOWN - } else { - typeId = field.TypeId - // Check if this is an enum serializer (directly or wrapped in ptrToValueSerializer) - if _, ok := field.Serializer.(*enumSerializer); ok { - isEnumField = true - typeId = UNKNOWN - } else if ptrSer, ok := field.Serializer.(*ptrToValueSerializer); ok { - if _, ok := ptrSer.valueSerializer.(*enumSerializer); ok { - isEnumField = true - typeId = UNKNOWN + case NullableFloat32DispatchId: + v := buf.ReadFloat32(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetFloat(float64(v)) + } + case NullableFloat64DispatchId: + v := buf.ReadFloat64(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetFloat(v) } } - // For user-defined types (struct, ext types), use UNKNOWN in fingerprint - // This matches Java's behavior where user-defined types return UNKNOWN - // to ensure consistent fingerprint computation across languages - if isUserDefinedType(int16(typeId)) { - typeId = UNKNOWN + continue + } + + // Handle nullable varint primitives (read ref flag + varint) + if isNullableVarintPrimitive(field.DispatchId) { + refFlag := buf.ReadInt8(err) + if refFlag == NullFlag { + // Leave pointer as nil (or zero for non-pointer local types) + continue } - // For fixed-size arrays with primitive elements, use primitive array type IDs - if field.Type.Kind() == reflect.Array { - elemKind := field.Type.Elem().Kind() - switch elemKind { - case reflect.Int8: - typeId = INT8_ARRAY - case reflect.Int16: - typeId = INT16_ARRAY - case reflect.Int32: - typeId = INT32_ARRAY - case reflect.Int64: - typeId = INT64_ARRAY - case reflect.Float32: - typeId = FLOAT32_ARRAY - case reflect.Float64: - typeId = FLOAT64_ARRAY - default: - typeId = LIST + // Read varint value based on dispatch ID + // Handle both pointer and non-pointer local field types (schema evolution) + switch field.DispatchId { + case NullableVarint32DispatchId: + v := buf.ReadVarint32(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetInt(int64(v)) } - } else if field.Type.Kind() == reflect.Slice { - typeId = LIST - } else if field.Type.Kind() == reflect.Map { - // map[T]bool is used to represent a Set in Go - if field.Type.Elem().Kind() == reflect.Bool { - typeId = SET + case NullableVarint64DispatchId: + v := buf.ReadVarint64(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) } else { - typeId = MAP + fieldValue.SetInt(v) + } + case NullableVarUint32DispatchId: + v := buf.ReadVaruint32(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetUint(uint64(v)) + } + case NullableVarUint64DispatchId: + v := buf.ReadVaruint64(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetUint(v) + } + case NullableTaggedInt64DispatchId: + v := buf.ReadTaggedInt64(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetInt(v) + } + case NullableTaggedUint64DispatchId: + v := buf.ReadTaggedUint64(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetUint(v) + } + case NullableIntDispatchId: + v := int(buf.ReadVarint64(err)) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetInt(int64(v)) + } + case NullableUintDispatchId: + v := uint(buf.ReadVaruint64(err)) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetUint(uint64(v)) } } + continue } - - // Determine nullable flag for xlang compatibility: - // - Default: false for ALL fields (xlang default - aligned with all languages) - // - Primitives are always non-nullable - // - Can be overridden by explicit fory tag - nullable := false // Default to nullable=false for xlang mode - if field.TagNullableSet { - // Use explicit tag value if set - nullable = field.TagNullable - } - // Primitives are never nullable, regardless of tag - if isNonNullablePrimitiveKind(field.Type.Kind()) && !isEnumField { - nullable = false + if isEnumField(field) { + readEnumField(ctx, field, fieldValue) + continue } - fields = append(fields, FieldFingerprintInfo{ - FieldID: field.TagID, - FieldName: SnakeCase(field.Name), - TypeID: typeId, - // Ref is based on explicit tag annotation only, NOT runtime ref_tracking config - // This allows fingerprint to be computed at compile time for C++/Rust - Ref: field.TagRefSet && field.TagRef, - Nullable: nullable, - }) - } - - hashString := ComputeStructFingerprint(fields) - data := []byte(hashString) - h1, _ := murmur3.Sum128WithSeed(data, 47) - hash := int32(h1 & 0xFFFFFFFF) - - if DebugOutputEnabled() { - fmt.Printf("[Go][fory-debug] struct %v version fingerprint=\"%s\" version hash=%d\n", s.type_, hashString, hash) - } - - if hash == 0 { - panic(fmt.Errorf("hash for type %v is 0", s.type_)) + // Slow path for non-primitives (all need ref flag per xlang spec) + if field.Serializer != nil { + // Use pre-computed RefMode and WriteType from field initialization + field.Serializer.Read(ctx, field.RefMode, field.WriteType, field.HasGenerics, fieldValue) + } else { + ctx.ReadValue(fieldValue, RefModeTracking, true) + } } - return hash } -// GetStructHash returns the struct hash for a given type using the provided TypeResolver. -// This is used by codegen serializers to get the hash at runtime. -func GetStructHash(type_ reflect.Type, resolver *TypeResolver) int32 { - ser := newStructSerializer(type_, "", nil) - if err := ser.initialize(resolver); err != nil { - panic(fmt.Errorf("failed to initialize struct serializer for hash computation: %v", err)) +// skipField skips a field that doesn't exist or is incompatible +// Uses context error state for deferred error checking. +func (s *structSerializer) skipField(ctx *ReadContext, field *FieldInfo) { + if field.FieldDef.name != "" { + fieldDefIsStructType := isStructFieldType(field.FieldDef.fieldType) + // Use FieldDef's trackingRef and nullable to determine if ref flag was written by Java + // Java writes ref flag based on its FieldDef, not Go's field type + readRefFlag := field.FieldDef.trackingRef || field.FieldDef.nullable + SkipFieldValueWithTypeFlag(ctx, field.FieldDef, readRefFlag, ctx.Compatible() && fieldDefIsStructType) + return } - return ser.structHash -} - -// Field sorting helpers - -type triple struct { - typeID int16 - serializer Serializer - name string - nullable bool - tagID int // -1 = use field name, >=0 = use tag ID for sorting -} - -// getFieldSortKey returns the sort key for a field. -// If tagID >= 0, returns the tag ID as string (for tag-based sorting). -// Otherwise returns the snake_case field name. -func (t triple) getSortKey() string { - if t.tagID >= 0 { - return fmt.Sprintf("%d", t.tagID) + // No FieldDef available, read into temp value + tempValue := reflect.New(field.Type).Elem() + if field.Serializer != nil { + readType := ctx.Compatible() && isStructField(field.Type) + refMode := RefModeNone + if field.Nullable { + refMode = RefModeTracking + } + field.Serializer.Read(ctx, refMode, readType, false, tempValue) + } else { + ctx.ReadValue(tempValue, RefModeTracking, true) } - return SnakeCase(t.name) } -// sortFields sorts fields with nullable information to match Java's field ordering. -// Java separates primitive types (int, long) from boxed types (Integer, Long). -// In Go, this corresponds to non-pointer primitives vs pointer-to-primitive. -// When tagIDs are provided (>= 0), fields are sorted by tag ID instead of field name. -func sortFields( - typeResolver *TypeResolver, - fieldNames []string, - serializers []Serializer, - typeIds []TypeId, - nullables []bool, - tagIDs []int, -) ([]Serializer, []string) { - var ( - typeTriples []triple - others []triple - userDefined []triple - ) +// writeEnumField writes an enum field respecting the field's RefMode. +// Java writes enum ordinals as unsigned Varuint32Small7, not signed zigzag. +// RefMode determines whether null flag is written, regardless of whether the local type is a pointer. +// This is important for compatible mode where remote TypeDef's nullable flag controls the wire format. +func writeEnumField(ctx *WriteContext, field *FieldInfo, fieldValue reflect.Value) { + buf := ctx.Buffer() + isPointer := field.IsPtr - for i, name := range fieldNames { - ser := serializers[i] - tagID := TagIDUseFieldName // default: use field name - if tagIDs != nil && i < len(tagIDs) { - tagID = tagIDs[i] - } - if ser == nil { - others = append(others, triple{UNKNOWN, nil, name, nullables[i], tagID}) - continue + // Write null flag based on RefMode only (not based on whether local type is pointer) + if field.RefMode != RefModeNone { + if isPointer && fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return } - typeTriples = append(typeTriples, triple{typeIds[i], ser, name, nullables[i], tagID}) + buf.WriteInt8(NotNullValueFlag) } - // Java orders: primitives, boxed, finals, others, collections, maps - // primitives = non-nullable primitive types (int, long, etc.) - // boxed = nullable boxed types (Integer, Long, etc. which are pointers in Go) - var primitives, boxed, collection, setFields, maps, otherInternalTypeFields []triple - for _, t := range typeTriples { - switch { - case isPrimitiveType(t.typeID): - // Separate non-nullable primitives from nullable (boxed) primitives - if t.nullable { - boxed = append(boxed, t) - } else { - primitives = append(primitives, t) - } - case isListType(t.typeID), isPrimitiveArrayType(t.typeID): - collection = append(collection, t) - case isSetType(t.typeID): - setFields = append(setFields, t) - case isMapType(t.typeID): - maps = append(maps, t) - case isUserDefinedType(t.typeID): - userDefined = append(userDefined, t) - case t.typeID == UNKNOWN: - others = append(others, t) - default: - otherInternalTypeFields = append(otherInternalTypeFields, t) + // Get the actual value to serialize + targetValue := fieldValue + if isPointer { + if fieldValue.IsNil() { + // RefModeNone but nil pointer - this is a protocol error in schema-consistent mode + // Write zero value as fallback + targetValue = reflect.Zero(field.Type.Elem()) + } else { + targetValue = fieldValue.Elem() } } - // Sort primitives (non-nullable) - same logic as boxed - // Java sorts by: compressed (varint) types last, then by size (largest first), then by type ID (descending) - // Fixed types: BOOL, INT8, UINT8, INT16, UINT16, INT32, UINT32, INT64, UINT64, FLOAT32, FLOAT64 - // Varint types: VARINT32, VARINT64, VAR_UINT32, VAR_UINT64, TAGGED_INT64, TAGGED_UINT64 - isVarintTypeId := func(typeID int16) bool { - return typeID == VARINT32 || typeID == VARINT64 || - typeID == VAR_UINT32 || typeID == VAR_UINT64 || - typeID == TAGGED_INT64 || typeID == TAGGED_UINT64 - } - sortPrimitiveSlice := func(s []triple) { - sort.Slice(s, func(i, j int) bool { - ai, aj := s[i], s[j] - compressI := isVarintTypeId(ai.typeID) - compressJ := isVarintTypeId(aj.typeID) - if compressI != compressJ { - return !compressI && compressJ - } - szI, szJ := getPrimitiveTypeSize(ai.typeID), getPrimitiveTypeSize(aj.typeID) - if szI != szJ { - return szI > szJ - } - // Tie-breaker: type ID descending (higher type ID first), then field name - if ai.typeID != aj.typeID { - return ai.typeID > aj.typeID - } - return ai.getSortKey() < aj.getSortKey() - }) - } - sortPrimitiveSlice(primitives) - sortPrimitiveSlice(boxed) - sortByTypeIDThenName := func(s []triple) { - sort.Slice(s, func(i, j int) bool { - if s[i].typeID != s[j].typeID { - return s[i].typeID < s[j].typeID - } - return s[i].getSortKey() < s[j].getSortKey() - }) - } - sortTuple := func(s []triple) { - sort.Slice(s, func(i, j int) bool { - return s[i].getSortKey() < s[j].getSortKey() - }) - } - sortByTypeIDThenName(otherInternalTypeFields) - sortTuple(others) - sortTuple(collection) - sortTuple(setFields) - sortTuple(maps) - sortTuple(userDefined) - - // Java order: primitives, boxed, finals, collections, maps, others - // finals = String and other monomorphic types (otherInternalTypeFields) - // others = userDefined types (structs, enums) and unknown types - all := make([]triple, 0, len(fieldNames)) - all = append(all, primitives...) - all = append(all, boxed...) - all = append(all, otherInternalTypeFields...) // finals (String, etc.) - all = append(all, collection...) - all = append(all, setFields...) - all = append(all, maps...) - all = append(all, userDefined...) // others (structs, enums) - all = append(all, others...) // unknown types - outSer := make([]Serializer, len(all)) - outNam := make([]string, len(all)) - for i, t := range all { - outSer[i] = t.serializer - outNam[i] = t.name + // For pointer enum fields, the serializer is ptrToValueSerializer wrapping enumSerializer. + // We need to call the inner enumSerializer directly with the dereferenced value. + if ptrSer, ok := field.Serializer.(*ptrToValueSerializer); ok { + ptrSer.valueSerializer.WriteData(ctx, targetValue) + } else { + field.Serializer.WriteData(ctx, targetValue) } - return outSer, outNam } -func typesCompatible(actual, expected reflect.Type) bool { - if actual == nil || expected == nil { - return false - } - if actual == expected { - return true - } - // interface{} can accept any value - if actual.Kind() == reflect.Interface && actual.NumMethod() == 0 { - return true - } - if actual.AssignableTo(expected) || expected.AssignableTo(actual) { - return true - } - if actual.Kind() == reflect.Ptr && actual.Elem() == expected { - return true - } - if expected.Kind() == reflect.Ptr && expected.Elem() == actual { - return true - } - if actual.Kind() == expected.Kind() { - switch actual.Kind() { - case reflect.Slice, reflect.Array: - return elementTypesCompatible(actual.Elem(), expected.Elem()) - case reflect.Map: - return elementTypesCompatible(actual.Key(), expected.Key()) && elementTypesCompatible(actual.Elem(), expected.Elem()) +// readEnumField reads an enum field respecting the field's RefMode. +// RefMode determines whether null flag is read, regardless of whether the local type is a pointer. +// This is important for compatible mode where remote TypeDef's nullable flag controls the wire format. +// Uses context error state for deferred error checking. +func readEnumField(ctx *ReadContext, field *FieldInfo, fieldValue reflect.Value) { + buf := ctx.Buffer() + isPointer := field.IsPtr + + // Read null flag based on RefMode only (not based on whether local type is pointer) + if field.RefMode != RefModeNone { + nullFlag := buf.ReadInt8(ctx.Err()) + if nullFlag == NullFlag { + // For pointer enum fields, leave as nil; for non-pointer, set to zero + if !isPointer { + fieldValue.SetInt(0) + } + return } } - if (actual.Kind() == reflect.Array && expected.Kind() == reflect.Slice) || - (actual.Kind() == reflect.Slice && expected.Kind() == reflect.Array) { - return true - } - return false -} -func elementTypesCompatible(actual, expected reflect.Type) bool { - if actual == nil || expected == nil { - return false - } - if actual == expected || actual.AssignableTo(expected) || expected.AssignableTo(actual) { - return true - } - if actual.Kind() == reflect.Ptr { - return elementTypesCompatible(actual, expected.Elem()) + // For pointer enum fields, allocate a new value + targetValue := fieldValue + if isPointer { + newVal := reflect.New(field.Type.Elem()) + fieldValue.Set(newVal) + targetValue = newVal.Elem() } - return false -} -// typeIdFromKind derives a TypeId from a reflect.Type's kind -// This is used when the type is not registered in typesInfo -// Note: Uses VARINT32/VARINT64/VAR_UINT32/VAR_UINT64 to match Java xlang mode and Rust -func typeIdFromKind(type_ reflect.Type) TypeId { - switch type_.Kind() { - case reflect.Bool: - return BOOL - case reflect.Int8: - return INT8 - case reflect.Int16: - return INT16 - case reflect.Int32: - return VARINT32 - case reflect.Int64, reflect.Int: - return VARINT64 - case reflect.Uint8: - return UINT8 - case reflect.Uint16: - return UINT16 - case reflect.Uint32: - return VAR_UINT32 - case reflect.Uint64, reflect.Uint: - return VAR_UINT64 - case reflect.Float32: - return FLOAT32 - case reflect.Float64: - return FLOAT64 - case reflect.String: - return STRING - case reflect.Slice: - // For slices, return the appropriate primitive array type ID based on element type - elemKind := type_.Elem().Kind() - switch elemKind { - case reflect.Bool: - return BOOL_ARRAY - case reflect.Int8: - return INT8_ARRAY - case reflect.Int16: - return INT16_ARRAY - case reflect.Int32: - return INT32_ARRAY - case reflect.Int64, reflect.Int: - return INT64_ARRAY - case reflect.Float32: - return FLOAT32_ARRAY - case reflect.Float64: - return FLOAT64_ARRAY - default: - // Non-primitive slices use LIST - return LIST - } - case reflect.Array: - // For arrays, return the appropriate primitive array type ID based on element type - elemKind := type_.Elem().Kind() - switch elemKind { - case reflect.Bool: - return BOOL_ARRAY - case reflect.Int8: - return INT8_ARRAY - case reflect.Int16: - return INT16_ARRAY - case reflect.Int32: - return INT32_ARRAY - case reflect.Int64, reflect.Int: - return INT64_ARRAY - case reflect.Float32: - return FLOAT32_ARRAY - case reflect.Float64: - return FLOAT64_ARRAY - default: - // Non-primitive arrays use LIST - return LIST - } - case reflect.Map: - // map[T]bool is used to represent a Set in Go - if type_.Elem().Kind() == reflect.Bool { - return SET - } - return MAP - case reflect.Struct: - return NAMED_STRUCT - case reflect.Ptr: - // For pointer types, get the type ID of the element type - return typeIdFromKind(type_.Elem()) - default: - return UNKNOWN + // For pointer enum fields, the serializer is ptrToValueSerializer wrapping enumSerializer. + // We need to call the inner enumSerializer directly with the dereferenced value. + if ptrSer, ok := field.Serializer.(*ptrToValueSerializer); ok { + ptrSer.valueSerializer.ReadData(ctx, field.Type.Elem(), targetValue) + } else { + field.Serializer.ReadData(ctx, field.Type, targetValue) } } diff --git a/go/fory/struct_test.go b/go/fory/struct_test.go new file mode 100644 index 0000000000..5c2fed3719 --- /dev/null +++ b/go/fory/struct_test.go @@ -0,0 +1,71 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package fory + +import ( + "testing" +) + +func TestUnsignedTypeSerialization(t *testing.T) { + type TestStruct struct { + U32Var uint32 `fory:"compress=true"` + U32Fixed uint32 `fory:"compress=false"` + U64Var uint64 `fory:"encoding=varint"` + U64Fixed uint64 `fory:"encoding=fixed"` + U64Tagged uint64 `fory:"encoding=tagged"` + } + + f := New(WithXlang(true), WithCompatible(false)) + f.Register(TestStruct{}, 9999) + + obj := TestStruct{ + U32Var: 3000000000, + U32Fixed: 4000000000, + U64Var: 10000000000, + U64Fixed: 15000000000, + U64Tagged: 1000000000, + } + + data, err := f.Serialize(obj) + if err != nil { + t.Fatalf("Serialize failed: %v", err) + } + + var result interface{} + err = f.Deserialize(data, &result) + if err != nil { + t.Fatalf("Deserialize failed: %v", err) + } + + resultObj := result.(*TestStruct) + if resultObj.U32Var != obj.U32Var { + t.Errorf("U32Var mismatch: expected %d, got %d", obj.U32Var, resultObj.U32Var) + } + if resultObj.U32Fixed != obj.U32Fixed { + t.Errorf("U32Fixed mismatch: expected %d, got %d", obj.U32Fixed, resultObj.U32Fixed) + } + if resultObj.U64Var != obj.U64Var { + t.Errorf("U64Var mismatch: expected %d, got %d", obj.U64Var, resultObj.U64Var) + } + if resultObj.U64Fixed != obj.U64Fixed { + t.Errorf("U64Fixed mismatch: expected %d, got %d", obj.U64Fixed, resultObj.U64Fixed) + } + if resultObj.U64Tagged != obj.U64Tagged { + t.Errorf("U64Tagged mismatch: expected %d, got %d", obj.U64Tagged, resultObj.U64Tagged) + } +} diff --git a/go/fory/tag.go b/go/fory/tag.go index 63a0ac530c..426157a3a7 100644 --- a/go/fory/tag.go +++ b/go/fory/tag.go @@ -18,14 +18,14 @@ package fory import ( - "reflect" - "strconv" - "strings" + "reflect" + "strconv" + "strings" ) const ( - // TagIDUseFieldName indicates field name should be used instead of tag ID - TagIDUseFieldName = -1 + // TagIDUseFieldName indicates field name should be used instead of tag ID + TagIDUseFieldName = -1 ) // ForyTag represents parsed fory struct tag options. @@ -54,23 +54,23 @@ const ( // Hidden string `fory:"-"` // Skip this field (shorthand) // } type ForyTag struct { - ID int // Field tag ID (-1 = use field name, >=0 = use tag ID) - Nullable bool // Whether to write null flag (default: false) - Ref bool // Whether to enable reference tracking (default: false) - Ignore bool // Whether to ignore this field during serialization (default: false) - HasTag bool // Whether field has fory tag at all - Compress bool // For int32/uint32: true=varint, false=fixed (default: true) - Encoding string // For int64/uint64: "fixed", "varint", "tagged" (default: "varint") - - // Track which options were explicitly set (for override logic) - NullableSet bool - RefSet bool - IgnoreSet bool - CompressSet bool - EncodingSet bool + ID int // Field tag ID (-1 = use field name, >=0 = use tag ID) + Nullable bool // Whether to write null flag (default: false) + Ref bool // Whether to enable reference tracking (default: false) + Ignore bool // Whether to ignore this field during serialization (default: false) + HasTag bool // Whether field has fory tag at all + Compress bool // For int32/uint32: true=varint, false=fixed (default: true) + Encoding string // For int64/uint64: "fixed", "varint", "tagged" (default: "varint") + + // Track which options were explicitly set (for override logic) + NullableSet bool + RefSet bool + IgnoreSet bool + CompressSet bool + EncodingSet bool } -// ParseForyTag parses a fory struct tag from reflect.StructField.Tag. +// parseForyTag parses a fory struct tag from reflect.StructField.Tag. // // Tag format: `fory:"id=N,nullable=bool,ref=bool,ignore=bool,compress=bool,encoding=value"` or `fory:"-"` // @@ -80,172 +80,172 @@ type ForyTag struct { // - For int64/uint64: `encoding=fixed`, `encoding=varint`, `encoding=tagged`, default is varint // - Standalone flags: `nullable`, `ref`, `ignore` (equivalent to =true) // - Shorthand: `-` (equivalent to `ignore=true`) -func ParseForyTag(field reflect.StructField) ForyTag { - tag := ForyTag{ - ID: TagIDUseFieldName, - Nullable: false, - Ref: false, - Ignore: false, - HasTag: false, - Compress: true, // default: varint encoding - Encoding: "varint", // default: varint encoding - } - - tagValue, ok := field.Tag.Lookup("fory") - if !ok { - return tag - } - - tag.HasTag = true - - // Handle "-" shorthand for ignore - if tagValue == "-" { - tag.Ignore = true - tag.IgnoreSet = true - return tag - } - - // Parse comma-separated options - parts := strings.Split(tagValue, ",") - for _, part := range parts { - part = strings.TrimSpace(part) - if part == "" { - continue - } - - // Handle key=value pairs and standalone flags - if idx := strings.Index(part, "="); idx >= 0 { - key := strings.TrimSpace(part[:idx]) - value := strings.TrimSpace(part[idx+1:]) - - switch key { - case "id": - if id, err := strconv.Atoi(value); err == nil { - tag.ID = id - } - case "nullable": - tag.Nullable = parseBool(value) - tag.NullableSet = true - case "ref": - tag.Ref = parseBool(value) - tag.RefSet = true - case "ignore": - tag.Ignore = parseBool(value) - tag.IgnoreSet = true - case "compress": - tag.Compress = parseBool(value) - tag.CompressSet = true - case "encoding": - tag.Encoding = strings.ToLower(strings.TrimSpace(value)) - tag.EncodingSet = true - } - } else { - // Handle standalone flags (presence means true) - switch part { - case "nullable": - tag.Nullable = true - tag.NullableSet = true - case "ref": - tag.Ref = true - tag.RefSet = true - case "ignore": - tag.Ignore = true - tag.IgnoreSet = true - } - } - } - - return tag +func parseForyTag(field reflect.StructField) ForyTag { + tag := ForyTag{ + ID: TagIDUseFieldName, + Nullable: false, + Ref: false, + Ignore: false, + HasTag: false, + Compress: true, // default: varint encoding + Encoding: "varint", // default: varint encoding + } + + tagValue, ok := field.Tag.Lookup("fory") + if !ok { + return tag + } + + tag.HasTag = true + + // Handle "-" shorthand for ignore + if tagValue == "-" { + tag.Ignore = true + tag.IgnoreSet = true + return tag + } + + // Parse comma-separated options + parts := strings.Split(tagValue, ",") + for _, part := range parts { + part = strings.TrimSpace(part) + if part == "" { + continue + } + + // Handle key=value pairs and standalone flags + if idx := strings.Index(part, "="); idx >= 0 { + key := strings.TrimSpace(part[:idx]) + value := strings.TrimSpace(part[idx+1:]) + + switch key { + case "id": + if id, err := strconv.Atoi(value); err == nil { + tag.ID = id + } + case "nullable": + tag.Nullable = parseBool(value) + tag.NullableSet = true + case "ref": + tag.Ref = parseBool(value) + tag.RefSet = true + case "ignore": + tag.Ignore = parseBool(value) + tag.IgnoreSet = true + case "compress": + tag.Compress = parseBool(value) + tag.CompressSet = true + case "encoding": + tag.Encoding = strings.ToLower(strings.TrimSpace(value)) + tag.EncodingSet = true + } + } else { + // Handle standalone flags (presence means true) + switch part { + case "nullable": + tag.Nullable = true + tag.NullableSet = true + case "ref": + tag.Ref = true + tag.RefSet = true + case "ignore": + tag.Ignore = true + tag.IgnoreSet = true + } + } + } + + return tag } // parseBool parses a boolean value from string. // Accepts: "true", "1", "yes" as true; everything else as false. func parseBool(s string) bool { - s = strings.ToLower(strings.TrimSpace(s)) - return s == "true" || s == "1" || s == "yes" + s = strings.ToLower(strings.TrimSpace(s)) + return s == "true" || s == "1" || s == "yes" } // parseTypeID parses a TypeId from string name. // Returns 0 if the type name is not recognized. func parseTypeID(s string) TypeId { - s = strings.ToUpper(strings.TrimSpace(s)) - switch s { - case "UINT8": - return UINT8 - case "UINT16": - return UINT16 - case "UINT32": - return UINT32 - case "VAR_UINT32": - return VAR_UINT32 - case "UINT64": - return UINT64 - case "VAR_UINT64": - return VAR_UINT64 - case "TAGGED_UINT64": - return TAGGED_UINT64 - default: - return 0 - } + s = strings.ToUpper(strings.TrimSpace(s)) + switch s { + case "UINT8": + return UINT8 + case "UINT16": + return UINT16 + case "UINT32": + return UINT32 + case "VAR_UINT32": + return VAR_UINT32 + case "UINT64": + return UINT64 + case "VAR_UINT64": + return VAR_UINT64 + case "TAGGED_UINT64": + return TAGGED_UINT64 + default: + return 0 + } } -// ValidateForyTags validates all fory tags in a struct type. +// validateForyTags validates all fory tags in a struct type. // Returns an error if validation fails. // // Validation rules: // - Tag ID must be >= -1 // - Tag IDs must be unique within a struct (except -1) // - Ignored fields are not validated for ID uniqueness -func ValidateForyTags(t reflect.Type) error { - if t.Kind() == reflect.Ptr { - t = t.Elem() - } - if t.Kind() != reflect.Struct { - return nil - } - - tagIDs := make(map[int]string) // id -> field name - - for i := 0; i < t.NumField(); i++ { - field := t.Field(i) - tag := ParseForyTag(field) - - // Skip ignored fields for ID uniqueness validation - if tag.Ignore { - continue - } - - // Validate tag ID range - if tag.ID < TagIDUseFieldName { - return InvalidTagErrorf("invalid fory tag id=%d on field %s: id must be >= -1", - tag.ID, field.Name) - } - - // Check for duplicate tag IDs (except -1 which means use field name) - if tag.ID >= 0 { - if existing, ok := tagIDs[tag.ID]; ok { - return InvalidTagErrorf("duplicate fory tag id=%d on fields %s and %s", - tag.ID, existing, field.Name) - } - tagIDs[tag.ID] = field.Name - } - } - - return nil +func validateForyTags(t reflect.Type) error { + if t.Kind() == reflect.Ptr { + t = t.Elem() + } + if t.Kind() != reflect.Struct { + return nil + } + + tagIDs := make(map[int]string) // id -> field name + + for i := 0; i < t.NumField(); i++ { + field := t.Field(i) + tag := parseForyTag(field) + + // Skip ignored fields for ID uniqueness validation + if tag.Ignore { + continue + } + + // Validate tag ID range + if tag.ID < TagIDUseFieldName { + return InvalidTagErrorf("invalid fory tag id=%d on field %s: id must be >= -1", + tag.ID, field.Name) + } + + // Check for duplicate tag IDs (except -1 which means use field name) + if tag.ID >= 0 { + if existing, ok := tagIDs[tag.ID]; ok { + return InvalidTagErrorf("duplicate fory tag id=%d on fields %s and %s", + tag.ID, existing, field.Name) + } + tagIDs[tag.ID] = field.Name + } + } + + return nil } -// ShouldIncludeField returns true if the field should be serialized. +// shouldIncludeField returns true if the field should be serialized. // A field is excluded if: // - It's unexported (starts with lowercase) // - It has `fory:"-"` tag // - It has `fory:"ignore"` or `fory:"ignore=true"` tag -func ShouldIncludeField(field reflect.StructField) bool { - // Skip unexported fields - if field.PkgPath != "" { - return false - } - - // Check for ignore tag - tag := ParseForyTag(field) - return !tag.Ignore +func shouldIncludeField(field reflect.StructField) bool { + // Skip unexported fields + if field.PkgPath != "" { + return false + } + + // Check for ignore tag + tag := parseForyTag(field) + return !tag.Ignore } diff --git a/go/fory/tag_test.go b/go/fory/tag_test.go index 3c7f52e2b2..83c3670ea0 100644 --- a/go/fory/tag_test.go +++ b/go/fory/tag_test.go @@ -43,7 +43,7 @@ func TestParseForyTag(t *testing.T) { typ := reflect.TypeOf(TestStruct{}) // Test Field1: id=0 - tag1 := ParseForyTag(typ.Field(0)) + tag1 := parseForyTag(typ.Field(0)) require.True(t, tag1.HasTag) require.Equal(t, 0, tag1.ID) require.False(t, tag1.Nullable) @@ -54,7 +54,7 @@ func TestParseForyTag(t *testing.T) { require.False(t, tag1.IgnoreSet) // Test Field2: all explicit false values - tag2 := ParseForyTag(typ.Field(1)) + tag2 := parseForyTag(typ.Field(1)) require.Equal(t, 1, tag2.ID) require.False(t, tag2.Nullable) require.False(t, tag2.Ref) @@ -64,14 +64,14 @@ func TestParseForyTag(t *testing.T) { require.True(t, tag2.IgnoreSet) // Test Field3: explicit true values - tag3 := ParseForyTag(typ.Field(2)) + tag3 := parseForyTag(typ.Field(2)) require.Equal(t, 2, tag3.ID) require.True(t, tag3.Nullable) require.True(t, tag3.Ref) require.False(t, tag3.Ignore) // Test Field4: standalone flags (presence = true) - tag4 := ParseForyTag(typ.Field(3)) + tag4 := parseForyTag(typ.Field(3)) require.Equal(t, TagIDUseFieldName, tag4.ID) require.True(t, tag4.Nullable) require.True(t, tag4.Ref) @@ -79,44 +79,44 @@ func TestParseForyTag(t *testing.T) { require.True(t, tag4.RefSet) // Test Field5: standalone ignore - tag5 := ParseForyTag(typ.Field(4)) + tag5 := parseForyTag(typ.Field(4)) require.True(t, tag5.Ignore) require.True(t, tag5.IgnoreSet) // Test Field6: explicit ignore=true - tag6 := ParseForyTag(typ.Field(5)) + tag6 := parseForyTag(typ.Field(5)) require.True(t, tag6.Ignore) require.True(t, tag6.IgnoreSet) // Test Field7: explicit ignore=false - tag7 := ParseForyTag(typ.Field(6)) + tag7 := parseForyTag(typ.Field(6)) require.False(t, tag7.Ignore) require.True(t, tag7.IgnoreSet) // Test Field8: "-" shorthand - tag8 := ParseForyTag(typ.Field(7)) + tag8 := parseForyTag(typ.Field(7)) require.True(t, tag8.Ignore) require.True(t, tag8.IgnoreSet) // Test Field9: no tag - tag9 := ParseForyTag(typ.Field(8)) + tag9 := parseForyTag(typ.Field(8)) require.False(t, tag9.HasTag) require.False(t, tag9.Ignore) require.Equal(t, TagIDUseFieldName, tag9.ID) // Test Field10: has ID but not ignored - tag10 := ParseForyTag(typ.Field(9)) + tag10 := parseForyTag(typ.Field(9)) require.Equal(t, 3, tag10.ID) require.False(t, tag10.Ignore) require.True(t, tag10.IgnoreSet) // Test Field11: explicit id=-1 (use field name) - tag11 := ParseForyTag(typ.Field(10)) + tag11 := parseForyTag(typ.Field(10)) require.Equal(t, TagIDUseFieldName, tag11.ID) require.True(t, tag11.HasTag) // Test Field12: nullable=true,ref=false - tag12 := ParseForyTag(typ.Field(11)) + tag12 := parseForyTag(typ.Field(11)) require.True(t, tag12.Nullable) require.False(t, tag12.Ref) require.True(t, tag12.NullableSet) @@ -135,12 +135,12 @@ func TestShouldIncludeField(t *testing.T) { typ := reflect.TypeOf(TestStruct{}) - require.True(t, ShouldIncludeField(typ.Field(0))) // Included1 - require.True(t, ShouldIncludeField(typ.Field(1))) // Included2 (ignore=false) - require.False(t, ShouldIncludeField(typ.Field(2))) // Ignored1 - require.False(t, ShouldIncludeField(typ.Field(3))) // Ignored2 - require.False(t, ShouldIncludeField(typ.Field(4))) // Ignored3 - require.True(t, ShouldIncludeField(typ.Field(5))) // NoTag (default: include) + require.True(t, shouldIncludeField(typ.Field(0))) // Included1 + require.True(t, shouldIncludeField(typ.Field(1))) // Included2 (ignore=false) + require.False(t, shouldIncludeField(typ.Field(2))) // Ignored1 + require.False(t, shouldIncludeField(typ.Field(3))) // Ignored2 + require.False(t, shouldIncludeField(typ.Field(4))) // Ignored3 + require.True(t, shouldIncludeField(typ.Field(5))) // NoTag (default: include) } func TestValidateForyTags(t *testing.T) { @@ -151,7 +151,7 @@ func TestValidateForyTags(t *testing.T) { Field3 string `fory:"id=-1"` Field4 string // No tag } - err := ValidateForyTags(reflect.TypeOf(ValidStruct{})) + err := validateForyTags(reflect.TypeOf(ValidStruct{})) require.NoError(t, err) // Test duplicate tag IDs @@ -159,7 +159,7 @@ func TestValidateForyTags(t *testing.T) { Field1 string `fory:"id=0"` Field2 string `fory:"id=0"` } - err = ValidateForyTags(reflect.TypeOf(DuplicateIDs{})) + err = validateForyTags(reflect.TypeOf(DuplicateIDs{})) require.Error(t, err) require.Contains(t, err.Error(), "duplicate") foryErr, ok := err.(Error) @@ -170,7 +170,7 @@ func TestValidateForyTags(t *testing.T) { type InvalidID struct { Field1 string `fory:"id=-2"` } - err = ValidateForyTags(reflect.TypeOf(InvalidID{})) + err = validateForyTags(reflect.TypeOf(InvalidID{})) require.Error(t, err) require.Contains(t, err.Error(), "invalid") foryErr, ok = err.(Error) @@ -183,7 +183,7 @@ func TestValidateForyTags(t *testing.T) { Field2 string `fory:"id=0,ignore"` // Same ID but ignored Field3 string `fory:"id=1"` } - err = ValidateForyTags(reflect.TypeOf(IgnoredFields{})) + err = validateForyTags(reflect.TypeOf(IgnoredFields{})) require.NoError(t, err) } @@ -193,7 +193,7 @@ func TestParseForyTagEdgeCases(t *testing.T) { Field1 string `fory:" id = 0 , nullable = true "` } typ := reflect.TypeOf(WhitespaceStruct{}) - tag := ParseForyTag(typ.Field(0)) + tag := parseForyTag(typ.Field(0)) require.Equal(t, 0, tag.ID) require.True(t, tag.Nullable) @@ -202,7 +202,7 @@ func TestParseForyTagEdgeCases(t *testing.T) { Field1 string `fory:""` } typ2 := reflect.TypeOf(EmptyTagStruct{}) - tag2 := ParseForyTag(typ2.Field(0)) + tag2 := parseForyTag(typ2.Field(0)) require.True(t, tag2.HasTag) require.Equal(t, TagIDUseFieldName, tag2.ID) @@ -215,16 +215,16 @@ func TestParseForyTagEdgeCases(t *testing.T) { } typ3 := reflect.TypeOf(BoolValuesStruct{}) - tag3 := ParseForyTag(typ3.Field(0)) + tag3 := parseForyTag(typ3.Field(0)) require.True(t, tag3.Nullable) // "1" -> true - tag4 := ParseForyTag(typ3.Field(1)) + tag4 := parseForyTag(typ3.Field(1)) require.True(t, tag4.Nullable) // "yes" -> true - tag5 := ParseForyTag(typ3.Field(2)) + tag5 := parseForyTag(typ3.Field(2)) require.True(t, tag5.Nullable) // "TRUE" -> true - tag6 := ParseForyTag(typ3.Field(3)) + tag6 := parseForyTag(typ3.Field(3)) require.False(t, tag6.Nullable) // "no" -> false } @@ -422,7 +422,7 @@ func TestNullableRefFlagsRespected(t *testing.T) { typ1 := reflect.TypeOf(TestStructNoNull{}) for i := 0; i < typ1.NumField(); i++ { field := typ1.Field(i) - tag := ParseForyTag(field) + tag := parseForyTag(field) t.Logf("Field %s: ID=%d, Nullable=%v (set=%v), Ref=%v (set=%v)", field.Name, tag.ID, tag.Nullable, tag.NullableSet, tag.Ref, tag.RefSet) } @@ -611,3 +611,65 @@ func TestNestedStructWithTags(t *testing.T) { require.Equal(t, obj.Inner.Count, result.Inner.Count) require.Equal(t, obj.Items, result.Items) } + +func TestParseTypeIDTag(t *testing.T) { + type TestStruct struct { + U32Var uint32 `fory:"compress=true"` + U32Fixed uint32 `fory:"compress=false"` + U64Var uint64 `fory:"encoding=varint"` + U64Fixed uint64 `fory:"encoding=fixed"` + U64Tagged uint64 `fory:"encoding=tagged"` + } + + typ := reflect.TypeOf(TestStruct{}) + + // Test U32Var + field := typ.Field(0) + tag := parseForyTag(field) + if !tag.CompressSet { + t.Errorf("U32Var: CompressSet should be true") + } + if !tag.Compress { + t.Errorf("U32Var: Compress should be true") + } + + // Test U32Fixed + field = typ.Field(1) + tag = parseForyTag(field) + if !tag.CompressSet { + t.Errorf("U32Fixed: CompressSet should be true") + } + if tag.Compress { + t.Errorf("U32Fixed: Compress should be false") + } + + // Test U64Var + field = typ.Field(2) + tag = parseForyTag(field) + if !tag.EncodingSet { + t.Errorf("U64Var: EncodingSet should be true") + } + if tag.Encoding != "varint" { + t.Errorf("U64Var: expected encoding 'varint', got %s", tag.Encoding) + } + + // Test U64Fixed + field = typ.Field(3) + tag = parseForyTag(field) + if !tag.EncodingSet { + t.Errorf("U64Fixed: EncodingSet should be true") + } + if tag.Encoding != "fixed" { + t.Errorf("U64Fixed: expected encoding 'fixed', got %s", tag.Encoding) + } + + // Test U64Tagged + field = typ.Field(4) + tag = parseForyTag(field) + if !tag.EncodingSet { + t.Errorf("U64Tagged: EncodingSet should be true") + } + if tag.Encoding != "tagged" { + t.Errorf("U64Tagged: expected encoding 'tagged', got %s", tag.Encoding) + } +} diff --git a/go/fory/tests/structs_fory_gen.go b/go/fory/tests/structs_fory_gen.go index e3538214d9..8a85508e1a 100644 --- a/go/fory/tests/structs_fory_gen.go +++ b/go/fory/tests/structs_fory_gen.go @@ -1,6 +1,6 @@ // Code generated by forygen. DO NOT EDIT. -// source: /Users/chaokunyang/Desktop/dev/fory/go/fory/tests/structs.go -// generated at: 2026-01-03T14:03:09+08:00 +// source: structs.go +// generated at: 2026-01-09T19:15:47+08:00 package fory @@ -924,98 +924,98 @@ func (g *SliceDemo_ForyGenSerializer) WriteTyped(ctx *fory.WriteContext, v *Slic buf.WriteInt32(g.structHash) // WriteData fields in sorted order - // Field: BoolSlice ([]bool) + // Field: StringSlice ([]string) { isXlang := ctx.TypeResolver().IsXlang() if isXlang { // xlang mode: slices are not nullable, write directly without null flag - fory.WriteBoolSlice(buf, v.BoolSlice) + sliceLen := 0 + if v.StringSlice != nil { + sliceLen = len(v.StringSlice) + } + buf.WriteVaruint32(uint32(sliceLen)) + if sliceLen > 0 { + collectFlag := 12 // CollectionIsSameType | CollectionIsDeclElementType + if ctx.TrackRef() { + collectFlag |= 1 // CollectionTrackingRef for referencable element type + } + buf.WriteInt8(int8(collectFlag)) + for _, elem := range v.StringSlice { + if ctx.TrackRef() { + buf.WriteInt8(-1) // NotNullValueFlag for element + } + ctx.WriteString(elem) + } + } } else { // Native Go mode: slices are nullable, write null flag - if v.BoolSlice == nil { + if v.StringSlice == nil { buf.WriteInt8(-3) // NullFlag } else { buf.WriteInt8(-1) // NotNullValueFlag - fory.WriteBoolSlice(buf, v.BoolSlice) + sliceLen := len(v.StringSlice) + buf.WriteVaruint32(uint32(sliceLen)) + if sliceLen > 0 { + collectFlag := 12 // CollectionIsSameType | CollectionIsDeclElementType + if ctx.TrackRef() { + collectFlag |= 1 // CollectionTrackingRef for referencable element type + } + buf.WriteInt8(int8(collectFlag)) + for _, elem := range v.StringSlice { + if ctx.TrackRef() { + buf.WriteInt8(-1) // NotNullValueFlag for element + } + ctx.WriteString(elem) + } + } } } } - // Field: FloatSlice ([]float64) + // Field: BoolSlice ([]bool) { isXlang := ctx.TypeResolver().IsXlang() if isXlang { // xlang mode: slices are not nullable, write directly without null flag - fory.WriteFloat64Slice(buf, v.FloatSlice) + fory.WriteBoolSlice(buf, v.BoolSlice) } else { // Native Go mode: slices are nullable, write null flag - if v.FloatSlice == nil { + if v.BoolSlice == nil { buf.WriteInt8(-3) // NullFlag } else { buf.WriteInt8(-1) // NotNullValueFlag - fory.WriteFloat64Slice(buf, v.FloatSlice) + fory.WriteBoolSlice(buf, v.BoolSlice) } } } - // Field: IntSlice ([]int32) + // Field: FloatSlice ([]float64) { isXlang := ctx.TypeResolver().IsXlang() if isXlang { // xlang mode: slices are not nullable, write directly without null flag - fory.WriteInt32Slice(buf, v.IntSlice) + fory.WriteFloat64Slice(buf, v.FloatSlice) } else { // Native Go mode: slices are nullable, write null flag - if v.IntSlice == nil { + if v.FloatSlice == nil { buf.WriteInt8(-3) // NullFlag } else { buf.WriteInt8(-1) // NotNullValueFlag - fory.WriteInt32Slice(buf, v.IntSlice) + fory.WriteFloat64Slice(buf, v.FloatSlice) } } } - // Field: StringSlice ([]string) + // Field: IntSlice ([]int32) { isXlang := ctx.TypeResolver().IsXlang() if isXlang { // xlang mode: slices are not nullable, write directly without null flag - sliceLen := 0 - if v.StringSlice != nil { - sliceLen = len(v.StringSlice) - } - buf.WriteVaruint32(uint32(sliceLen)) - if sliceLen > 0 { - collectFlag := 12 // CollectionIsSameType | CollectionIsDeclElementType - if ctx.TrackRef() { - collectFlag |= 1 // CollectionTrackingRef for referencable element type - } - buf.WriteInt8(int8(collectFlag)) - for _, elem := range v.StringSlice { - if ctx.TrackRef() { - buf.WriteInt8(-1) // NotNullValueFlag for element - } - ctx.WriteString(elem) - } - } + fory.WriteInt32Slice(buf, v.IntSlice) } else { // Native Go mode: slices are nullable, write null flag - if v.StringSlice == nil { + if v.IntSlice == nil { buf.WriteInt8(-3) // NullFlag } else { buf.WriteInt8(-1) // NotNullValueFlag - sliceLen := len(v.StringSlice) - buf.WriteVaruint32(uint32(sliceLen)) - if sliceLen > 0 { - collectFlag := 12 // CollectionIsSameType | CollectionIsDeclElementType - if ctx.TrackRef() { - collectFlag |= 1 // CollectionTrackingRef for referencable element type - } - buf.WriteInt8(int8(collectFlag)) - for _, elem := range v.StringSlice { - if ctx.TrackRef() { - buf.WriteInt8(-1) // NotNullValueFlag for element - } - ctx.WriteString(elem) - } - } + fory.WriteInt32Slice(buf, v.IntSlice) } } } @@ -1085,54 +1085,6 @@ func (g *SliceDemo_ForyGenSerializer) ReadTyped(ctx *fory.ReadContext, v *SliceD } // ReadData fields in same order as write - // Field: BoolSlice ([]bool) - { - isXlang := ctx.TypeResolver().IsXlang() - if isXlang { - // xlang mode: slices are not nullable, read directly without null flag - v.BoolSlice = fory.ReadBoolSlice(buf, err) - } else { - // Native Go mode: slices are nullable, read null flag - nullFlag := buf.ReadInt8(err) - if nullFlag == -3 { - v.BoolSlice = nil - } else { - v.BoolSlice = fory.ReadBoolSlice(buf, err) - } - } - } - // Field: FloatSlice ([]float64) - { - isXlang := ctx.TypeResolver().IsXlang() - if isXlang { - // xlang mode: slices are not nullable, read directly without null flag - v.FloatSlice = fory.ReadFloat64Slice(buf, err) - } else { - // Native Go mode: slices are nullable, read null flag - nullFlag := buf.ReadInt8(err) - if nullFlag == -3 { - v.FloatSlice = nil - } else { - v.FloatSlice = fory.ReadFloat64Slice(buf, err) - } - } - } - // Field: IntSlice ([]int32) - { - isXlang := ctx.TypeResolver().IsXlang() - if isXlang { - // xlang mode: slices are not nullable, read directly without null flag - v.IntSlice = fory.ReadInt32Slice(buf, err) - } else { - // Native Go mode: slices are nullable, read null flag - nullFlag := buf.ReadInt8(err) - if nullFlag == -3 { - v.IntSlice = nil - } else { - v.IntSlice = fory.ReadInt32Slice(buf, err) - } - } - } // Field: StringSlice ([]string) { isXlang := ctx.TypeResolver().IsXlang() @@ -1211,6 +1163,54 @@ func (g *SliceDemo_ForyGenSerializer) ReadTyped(ctx *fory.ReadContext, v *SliceD } } } + // Field: BoolSlice ([]bool) + { + isXlang := ctx.TypeResolver().IsXlang() + if isXlang { + // xlang mode: slices are not nullable, read directly without null flag + v.BoolSlice = fory.ReadBoolSlice(buf, err) + } else { + // Native Go mode: slices are nullable, read null flag + nullFlag := buf.ReadInt8(err) + if nullFlag == -3 { + v.BoolSlice = nil + } else { + v.BoolSlice = fory.ReadBoolSlice(buf, err) + } + } + } + // Field: FloatSlice ([]float64) + { + isXlang := ctx.TypeResolver().IsXlang() + if isXlang { + // xlang mode: slices are not nullable, read directly without null flag + v.FloatSlice = fory.ReadFloat64Slice(buf, err) + } else { + // Native Go mode: slices are nullable, read null flag + nullFlag := buf.ReadInt8(err) + if nullFlag == -3 { + v.FloatSlice = nil + } else { + v.FloatSlice = fory.ReadFloat64Slice(buf, err) + } + } + } + // Field: IntSlice ([]int32) + { + isXlang := ctx.TypeResolver().IsXlang() + if isXlang { + // xlang mode: slices are not nullable, read directly without null flag + v.IntSlice = fory.ReadInt32Slice(buf, err) + } else { + // Native Go mode: slices are nullable, read null flag + nullFlag := buf.ReadInt8(err) + if nullFlag == -3 { + v.IntSlice = nil + } else { + v.IntSlice = fory.ReadInt32Slice(buf, err) + } + } + } // Final deferred error check if ctx.HasError() { diff --git a/go/fory/type_def.go b/go/fory/type_def.go index a6d41f87c0..424b796454 100644 --- a/go/fory/type_def.go +++ b/go/fory/type_def.go @@ -218,7 +218,7 @@ func (td *TypeDef) buildTypeInfoWithResolver(resolver *TypeResolver) (TypeInfo, } } else { // Known struct type - use structSerializer with fieldDefs - structSer := newStructSerializer(type_, "", td.fieldDefs) + structSer := newStructSerializerFromTypeDef(type_, "", td.fieldDefs) // Eagerly initialize the struct serializer with pre-computed field metadata if resolver != nil { if err := structSer.initialize(resolver); err != nil { @@ -417,7 +417,7 @@ func buildFieldDefs(fory *Fory, value reflect.Value) ([]FieldDef, error) { } // Parse fory struct tag and check for ignore - foryTag := ParseForyTag(field) + foryTag := parseForyTag(field) if foryTag.Ignore { continue // skip ignored fields } @@ -971,31 +971,32 @@ func buildFieldType(fory *Fory, fieldValue reflect.Value) (FieldType, error) { } // Handle slice and array types BEFORE getTypeInfo to avoid anonymous type errors - // For fixed-size arrays with primitive elements, use primitive array type IDs (INT16_ARRAY, etc.) - // For slices and arrays with non-primitive elements, use collection format + // For primitive element types, use primitive array type IDs (INT16_ARRAY, etc.) + // For non-primitive elements, use collection format (LIST with element type) if fieldType.Kind() == reflect.Slice || fieldType.Kind() == reflect.Array { elemType := fieldType.Elem() // Check if element is a primitive type that maps to a primitive array type ID - // Only fixed-size arrays use primitive array format; slices always use LIST - if fieldType.Kind() == reflect.Array { - switch elemType.Kind() { - case reflect.Int8: - return NewSimpleFieldType(INT8_ARRAY), nil - case reflect.Int16: - return NewSimpleFieldType(INT16_ARRAY), nil - case reflect.Int32: - return NewSimpleFieldType(INT32_ARRAY), nil - case reflect.Int64: - return NewSimpleFieldType(INT64_ARRAY), nil - case reflect.Float32: - return NewSimpleFieldType(FLOAT32_ARRAY), nil - case reflect.Float64: - return NewSimpleFieldType(FLOAT64_ARRAY), nil - } + // Both slices and fixed-size arrays with primitive elements use primitive array format + // This matches typeIdFromKind in field_info.go for consistent field sorting + switch elemType.Kind() { + case reflect.Bool: + return NewSimpleFieldType(BOOL_ARRAY), nil + case reflect.Int8: + return NewSimpleFieldType(INT8_ARRAY), nil + case reflect.Int16: + return NewSimpleFieldType(INT16_ARRAY), nil + case reflect.Int32: + return NewSimpleFieldType(INT32_ARRAY), nil + case reflect.Int64, reflect.Int: + return NewSimpleFieldType(INT64_ARRAY), nil + case reflect.Float32: + return NewSimpleFieldType(FLOAT32_ARRAY), nil + case reflect.Float64: + return NewSimpleFieldType(FLOAT64_ARRAY), nil } - // For slices and non-primitive arrays, use collection format + // For non-primitive elements, use collection format (LIST with element type) elemValue := reflect.Zero(elemType) elementFieldType, err := buildFieldType(fory, elemValue) if err != nil { diff --git a/go/fory/type_resolver.go b/go/fory/type_resolver.go index e3b9e18c52..5e2de1bb16 100644 --- a/go/fory/type_resolver.go +++ b/go/fory/type_resolver.go @@ -428,7 +428,7 @@ func (r *TypeResolver) RegisterByID(type_ reflect.Type, fullTypeID uint32) error // Create struct serializer tag := type_.Name() - serializer := newStructSerializer(type_, tag, nil) + serializer := newStructSerializer(type_, tag) r.typeToSerializers[type_] = serializer r.typeToTypeInfo[type_] = "@" + tag r.typeInfoToType["@"+tag] = type_ @@ -579,7 +579,7 @@ func (r *TypeResolver) RegisterNamedType( } else { tag = namespace + "." + typeName } - serializer := newStructSerializer(type_, tag, nil) + serializer := newStructSerializer(type_, tag) r.typeToSerializers[type_] = serializer // multiple struct with same name defined inside function will have same `type_.String()`, but they are // different types. so we use tag to encode type info. diff --git a/go/fory/types.go b/go/fory/types.go index f8da0c2716..a8edfc6b94 100644 --- a/go/fory/types.go +++ b/go/fory/types.go @@ -480,8 +480,8 @@ func IsPrimitiveTypeId(typeId TypeId) bool { // isFixedSizePrimitive returns true for fixed-size primitives and notnull pointer types. // Includes INT32/UINT32/INT64/UINT64 (fixed encoding), NOT VARINT32/VAR_UINT32 etc. -func isFixedSizePrimitive(staticId DispatchId, referencable bool) bool { - switch staticId { +func isFixedSizePrimitive(dispatchId DispatchId, referencable bool) bool { + switch dispatchId { case PrimitiveBoolDispatchId, PrimitiveInt8DispatchId, PrimitiveUint8DispatchId, PrimitiveInt16DispatchId, PrimitiveUint16DispatchId, PrimitiveInt32DispatchId, PrimitiveUint32DispatchId, @@ -501,8 +501,8 @@ func isFixedSizePrimitive(staticId DispatchId, referencable bool) bool { // isNullableFixedSizePrimitive returns true for nullable fixed-size primitive dispatch IDs. // These are pointer types that use fixed encoding and have a ref flag. -func isNullableFixedSizePrimitive(staticId DispatchId) bool { - switch staticId { +func isNullableFixedSizePrimitive(dispatchId DispatchId) bool { + switch dispatchId { case NullableBoolDispatchId, NullableInt8DispatchId, NullableUint8DispatchId, NullableInt16DispatchId, NullableUint16DispatchId, NullableInt32DispatchId, NullableUint32DispatchId, @@ -516,8 +516,8 @@ func isNullableFixedSizePrimitive(staticId DispatchId) bool { // isNullableVarintPrimitive returns true for nullable varint primitive dispatch IDs. // These are pointer types that use varint encoding and have a ref flag. -func isNullableVarintPrimitive(staticId DispatchId) bool { - switch staticId { +func isNullableVarintPrimitive(dispatchId DispatchId) bool { + switch dispatchId { case NullableVarint32DispatchId, NullableVarint64DispatchId, NullableVarUint32DispatchId, NullableVarUint64DispatchId, NullableTaggedInt64DispatchId, NullableTaggedUint64DispatchId, @@ -530,8 +530,8 @@ func isNullableVarintPrimitive(staticId DispatchId) bool { // isVarintPrimitive returns true for varint primitives and notnull pointer types. // Includes VARINT32/VAR_UINT32/VARINT64/VAR_UINT64 (variable encoding), NOT INT32/UINT32 etc. -func isVarintPrimitive(staticId DispatchId, referencable bool) bool { - switch staticId { +func isVarintPrimitive(dispatchId DispatchId, referencable bool) bool { + switch dispatchId { case PrimitiveVarint32DispatchId, PrimitiveVarint64DispatchId, PrimitiveVarUint32DispatchId, PrimitiveVarUint64DispatchId, PrimitiveTaggedInt64DispatchId, PrimitiveTaggedUint64DispatchId, @@ -547,9 +547,9 @@ func isVarintPrimitive(staticId DispatchId, referencable bool) bool { } } -// isPrimitiveDispatchId returns true if the staticId represents a primitive type -func isPrimitiveDispatchId(staticId DispatchId) bool { - switch staticId { +// isPrimitiveDispatchId returns true if the dispatchId represents a primitive type +func isPrimitiveDispatchId(dispatchId DispatchId) bool { + switch dispatchId { case PrimitiveBoolDispatchId, PrimitiveInt8DispatchId, PrimitiveInt16DispatchId, PrimitiveInt32DispatchId, PrimitiveInt64DispatchId, PrimitiveIntDispatchId, PrimitiveUint8DispatchId, PrimitiveUint16DispatchId, PrimitiveUint32DispatchId, PrimitiveUint64DispatchId, PrimitiveUintDispatchId, @@ -560,9 +560,9 @@ func isPrimitiveDispatchId(staticId DispatchId) bool { } } -// isNotnullPtrDispatchId returns true if the staticId represents a notnull pointer type -func isNotnullPtrDispatchId(staticId DispatchId) bool { - switch staticId { +// isNotnullPtrDispatchId returns true if the dispatchId represents a notnull pointer type +func isNotnullPtrDispatchId(dispatchId DispatchId) bool { + switch dispatchId { case NotnullBoolPtrDispatchId, NotnullInt8PtrDispatchId, NotnullUint8PtrDispatchId, NotnullInt16PtrDispatchId, NotnullUint16PtrDispatchId, NotnullInt32PtrDispatchId, NotnullUint32PtrDispatchId, @@ -589,9 +589,9 @@ func isNumericKind(kind reflect.Kind) bool { } } -// GetDispatchIdFromTypeId converts a TypeId to a DispatchId based on nullability. +// getDispatchIdFromTypeId converts a TypeId to a DispatchId based on nullability. // This follows Java's DispatchId.xlangTypeIdToDispatchId pattern. -func GetDispatchIdFromTypeId(typeId TypeId, nullable bool) DispatchId { +func getDispatchIdFromTypeId(typeId TypeId, nullable bool) DispatchId { if nullable { // Nullable (nullable) types switch typeId { @@ -690,8 +690,8 @@ func IsNullablePrimitiveDispatchId(id DispatchId) bool { } // getFixedSizeByDispatchId returns byte size for fixed primitives (0 if not fixed) -func getFixedSizeByDispatchId(staticId DispatchId) int { - switch staticId { +func getFixedSizeByDispatchId(dispatchId DispatchId) int { + switch dispatchId { case PrimitiveBoolDispatchId, PrimitiveInt8DispatchId, PrimitiveUint8DispatchId, NotnullBoolPtrDispatchId, NotnullInt8PtrDispatchId, NotnullUint8PtrDispatchId: return 1 @@ -710,8 +710,8 @@ func getFixedSizeByDispatchId(staticId DispatchId) int { } // getVarintMaxSizeByDispatchId returns max byte size for varint primitives (0 if not varint) -func getVarintMaxSizeByDispatchId(staticId DispatchId) int { - switch staticId { +func getVarintMaxSizeByDispatchId(dispatchId DispatchId) int { + switch dispatchId { case PrimitiveVarint32DispatchId, PrimitiveVarUint32DispatchId, NotnullVarint32PtrDispatchId, NotnullVarUint32PtrDispatchId: return 5 @@ -741,10 +741,10 @@ func getEncodingFromTypeId(typeId TypeId) string { } } -// GetNotnullPtrDispatchId returns the NotnullXxxPtrDispatchId for a pointer-to-numeric type. +// getNotnullPtrDispatchId returns the NotnullXxxPtrDispatchId for a pointer-to-numeric type. // elemKind is the kind of the element type (e.g., reflect.Uint8 for *uint8). // encoding specifies the encoding type (fixed, varint, tagged) for int32/int64/uint32/uint64. -func GetNotnullPtrDispatchId(elemKind reflect.Kind, encoding string) DispatchId { +func getNotnullPtrDispatchId(elemKind reflect.Kind, encoding string) DispatchId { switch elemKind { case reflect.Bool: return NotnullBoolPtrDispatchId diff --git a/java/fory-core/src/main/java/org/apache/fory/annotation/Int64Type.java b/java/fory-core/src/main/java/org/apache/fory/annotation/Int64Type.java index e853f236d2..e03c266cdf 100644 --- a/java/fory-core/src/main/java/org/apache/fory/annotation/Int64Type.java +++ b/java/fory-core/src/main/java/org/apache/fory/annotation/Int64Type.java @@ -32,10 +32,10 @@ * value is serialized using different encoding strategies: * *

      - *
    • {@link LongEncoding#VARINT64} (default): Variable-length encoding, compact for small values + *
    • {@link LongEncoding#VARINT} (default): Variable-length encoding, compact for small values * (type_id=7) - *
    • {@link LongEncoding#FIXED_INT64}: Fixed 8-byte encoding, consistent size (type_id=6) - *
    • {@link LongEncoding#TAGGED_INT64}: Tagged encoding that uses 4 bytes for values in range + *
    • {@link LongEncoding#FIXED}: Fixed 8-byte encoding, consistent size (type_id=6) + *
    • {@link LongEncoding#TAGGED}: Tagged encoding that uses 4 bytes for values in range * [-1073741824, 1073741823], otherwise 9 bytes (type_id=8) *
    * @@ -62,5 +62,5 @@ * * @return the encoding type for serialization */ - LongEncoding encoding() default LongEncoding.VARINT64; + LongEncoding encoding() default LongEncoding.VARINT; } diff --git a/java/fory-core/src/main/java/org/apache/fory/annotation/Uint64Type.java b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint64Type.java index 2ac792d052..2fd8c6af97 100644 --- a/java/fory-core/src/main/java/org/apache/fory/annotation/Uint64Type.java +++ b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint64Type.java @@ -34,19 +34,19 @@ *

    Different encoding strategies are available: * *

      - *
    • {@link LongEncoding#VARINT64} (default): Variable-length encoding (VAR_UINT64, type_id=14), + *
    • {@link LongEncoding#VARINT} (default): Variable-length encoding (VAR_UINT64, type_id=14), * compact for small values - *
    • {@link LongEncoding#FIXED_INT64}: Fixed 8-byte encoding (UINT64, type_id=13), consistent + *
    • {@link LongEncoding#FIXED}: Fixed 8-byte encoding (UINT64, type_id=13), consistent * size - *
    • {@link LongEncoding#TAGGED_INT64}: Tagged encoding (TAGGED_UINT64, type_id=15) that uses 4 + *
    • {@link LongEncoding#TAGGED}: Tagged encoding (TAGGED_UINT64, type_id=15) that uses 4 * bytes for values in range [0, 2147483647], otherwise 9 bytes *
    * *

    Benefits: * *

      - *
    • With {@link LongEncoding#VARINT64}: skips zigzag encoding overhead for non-negative values - *
    • With {@link LongEncoding#TAGGED_INT64}: uses unsigned range [0, 2147483647] for 4-byte + *
    • With {@link LongEncoding#VARINT}: skips zigzag encoding overhead for non-negative values + *
    • With {@link LongEncoding#TAGGED}: uses unsigned range [0, 2147483647] for 4-byte * encoding instead of signed range [-1073741824, 1073741823] *
    • Compatible with languages that have native unsigned integer types (e.g., Rust's u64, Go's * uint64, C++'s uint64_t) @@ -75,5 +75,5 @@ * * @return the encoding type for serialization */ - LongEncoding encoding() default LongEncoding.VARINT64; + LongEncoding encoding() default LongEncoding.VARINT; } diff --git a/java/fory-core/src/main/java/org/apache/fory/config/Config.java b/java/fory-core/src/main/java/org/apache/fory/config/Config.java index b49f4ad1dc..e81ed4e5f5 100644 --- a/java/fory-core/src/main/java/org/apache/fory/config/Config.java +++ b/java/fory-core/src/main/java/org/apache/fory/config/Config.java @@ -83,7 +83,7 @@ public Config(ForyBuilder builder) { writeNumUtf16BytesForUtf8Encoding = builder.writeNumUtf16BytesForUtf8Encoding; compressInt = builder.compressInt; longEncoding = builder.longEncoding; - compressLong = longEncoding != LongEncoding.FIXED_INT64; + compressLong = longEncoding != LongEncoding.FIXED; compressIntArray = builder.compressIntArray; compressLongArray = builder.compressLongArray; requireClassRegistration = builder.requireClassRegistration; diff --git a/java/fory-core/src/main/java/org/apache/fory/config/ForyBuilder.java b/java/fory-core/src/main/java/org/apache/fory/config/ForyBuilder.java index 51faffd767..e1320ddb97 100644 --- a/java/fory-core/src/main/java/org/apache/fory/config/ForyBuilder.java +++ b/java/fory-core/src/main/java/org/apache/fory/config/ForyBuilder.java @@ -67,7 +67,7 @@ public final class ForyBuilder { boolean timeRefIgnored = true; ClassLoader classLoader; boolean compressInt = true; - public LongEncoding longEncoding = LongEncoding.TAGGED_INT64; + public LongEncoding longEncoding = LongEncoding.TAGGED; boolean compressIntArray = false; boolean compressLongArray = false; boolean compressString = false; @@ -189,11 +189,11 @@ public ForyBuilder withIntCompressed(boolean intCompressed) { /** * Use variable length encoding for long. Enabled by default, use {@link - * LongEncoding#TAGGED_INT64} (Small long as int) for long encoding. + * LongEncoding#TAGGED} (Small long as int) for long encoding. */ public ForyBuilder withLongCompressed(boolean longCompressed) { return withLongCompressed( - longCompressed ? LongEncoding.TAGGED_INT64 : LongEncoding.FIXED_INT64); + longCompressed ? LongEncoding.TAGGED : LongEncoding.FIXED); } /** Use variable length encoding for long. */ @@ -440,7 +440,7 @@ private void finish() { } if (language != Language.JAVA) { stringRefIgnored = true; - longEncoding = LongEncoding.VARINT64; + longEncoding = LongEncoding.VARINT; compressInt = true; compressString = true; } diff --git a/java/fory-core/src/main/java/org/apache/fory/config/LongEncoding.java b/java/fory-core/src/main/java/org/apache/fory/config/LongEncoding.java index d937e5f7fa..f7f1e56659 100644 --- a/java/fory-core/src/main/java/org/apache/fory/config/LongEncoding.java +++ b/java/fory-core/src/main/java/org/apache/fory/config/LongEncoding.java @@ -21,7 +21,7 @@ /** * Encoding option for long. Default encoding is fory SLI(Small long as int) encoding: {@link - * #TAGGED_INT64}. + * #TAGGED}. */ public enum LongEncoding { /** @@ -30,10 +30,10 @@ public enum LongEncoding { * value) << 1 |` *
    • Otherwise write as 9 bytes: `| 0b1 | little-endian 8bytes long |`. * - *

      Faster than {@link #VARINT64}, but compression is not good as {@link #VARINT64} such as + *

      Faster than {@link #VARINT}, but compression is not good as {@link #VARINT} such as * for ints in short range. */ - TAGGED_INT64, + TAGGED, /** * Fory Progressive Variable-length Long Encoding: *

    • positive long format: first bit in every byte indicate whether has next byte, then next @@ -41,7 +41,7 @@ public enum LongEncoding { *
    • Negative number will be converted to positive number by ` (v << 1) ^ (v >> 63)` to reduce * cost of small negative numbers. */ - VARINT64, + VARINT, /** Write long as little endian 8bytes, no compression. */ - FIXED_INT64, + FIXED, } diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/PrimitiveSerializers.java b/java/fory-core/src/main/java/org/apache/fory/serializer/PrimitiveSerializers.java index d815f2b7e7..0affca6517 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/PrimitiveSerializers.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/PrimitiveSerializers.java @@ -197,12 +197,12 @@ public Long read(MemoryBuffer buffer) { public static Expression writeInt64( Expression buffer, Expression v, LongEncoding longEncoding, boolean ensureBounds) { switch (longEncoding) { - case FIXED_INT64: + case FIXED: return new Invoke(buffer, "writeInt64", v); - case TAGGED_INT64: + case TAGGED: return new Invoke( buffer, ensureBounds ? "writeTaggedInt64" : "_unsafeWriteTaggedInt64", v); - case VARINT64: + case VARINT: return new Invoke(buffer, ensureBounds ? "writeVarInt64" : "_unsafeWriteVarInt64", v); default: throw new UnsupportedOperationException("Unsupported long encoding " + longEncoding); @@ -210,9 +210,9 @@ public static Expression writeInt64( } public static void writeInt64(MemoryBuffer buffer, long value, LongEncoding longEncoding) { - if (longEncoding == LongEncoding.TAGGED_INT64) { + if (longEncoding == LongEncoding.TAGGED) { buffer.writeTaggedInt64(value); - } else if (longEncoding == LongEncoding.FIXED_INT64) { + } else if (longEncoding == LongEncoding.FIXED) { buffer.writeInt64(value); } else { buffer.writeVarInt64(value); @@ -220,9 +220,9 @@ public static void writeInt64(MemoryBuffer buffer, long value, LongEncoding long } public static long readInt64(MemoryBuffer buffer, LongEncoding longEncoding) { - if (longEncoding == LongEncoding.TAGGED_INT64) { + if (longEncoding == LongEncoding.TAGGED) { return buffer.readTaggedInt64(); - } else if (longEncoding == LongEncoding.FIXED_INT64) { + } else if (longEncoding == LongEncoding.FIXED) { return buffer.readInt64(); } else { return buffer.readVarInt64(); @@ -235,11 +235,11 @@ public static Expression readInt64(Expression buffer, LongEncoding longEncoding) public static String readLongFunc(LongEncoding longEncoding) { switch (longEncoding) { - case FIXED_INT64: + case FIXED: return Platform.IS_LITTLE_ENDIAN ? "_readInt64OnLE" : "_readInt64OnBE"; - case TAGGED_INT64: + case TAGGED: return Platform.IS_LITTLE_ENDIAN ? "_readTaggedInt64OnLE" : "_readTaggedInt64OnBE"; - case VARINT64: + case VARINT: return Platform.IS_LITTLE_ENDIAN ? "_readVarInt64OnLE" : "_readVarInt64OnBE"; default: throw new UnsupportedOperationException("Unsupported long encoding " + longEncoding); diff --git a/java/fory-core/src/main/java/org/apache/fory/type/TypeAnnotationUtils.java b/java/fory-core/src/main/java/org/apache/fory/type/TypeAnnotationUtils.java index 6a64ffc82e..3c521af989 100644 --- a/java/fory-core/src/main/java/org/apache/fory/type/TypeAnnotationUtils.java +++ b/java/fory-core/src/main/java/org/apache/fory/type/TypeAnnotationUtils.java @@ -55,11 +55,11 @@ public static int getTypeId(Annotation typeAnnotation, Class fieldType) { checkFieldType(fieldType, "@Uint64Type", long.class, Long.class); Uint64Type uint64Type = (Uint64Type) typeAnnotation; switch (uint64Type.encoding()) { - case VARINT64: + case VARINT: return Types.VAR_UINT64; - case FIXED_INT64: + case FIXED: return Types.UINT64; - case TAGGED_INT64: + case TAGGED: return Types.TAGGED_UINT64; default: throw new IllegalArgumentException("Unsupported encoding: " + uint64Type.encoding()); @@ -72,11 +72,11 @@ public static int getTypeId(Annotation typeAnnotation, Class fieldType) { checkFieldType(fieldType, "@Int64Type", long.class, Long.class); Int64Type int64Type = (Int64Type) typeAnnotation; switch (int64Type.encoding()) { - case VARINT64: + case VARINT: return Types.VARINT64; - case FIXED_INT64: + case FIXED: return Types.INT64; - case TAGGED_INT64: + case TAGGED: return Types.TAGGED_INT64; default: throw new IllegalArgumentException("Unsupported encoding: " + int64Type.encoding()); diff --git a/java/fory-core/src/test/java/org/apache/fory/serializer/PrimitiveSerializersTest.java b/java/fory-core/src/test/java/org/apache/fory/serializer/PrimitiveSerializersTest.java index c91aa26f9c..5ef68ba197 100644 --- a/java/fory-core/src/test/java/org/apache/fory/serializer/PrimitiveSerializersTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/serializer/PrimitiveSerializersTest.java @@ -106,10 +106,10 @@ public void testPrimitiveStruct(boolean compressNumber, boolean codegen) { .withCodegen(codegen) .requireClassRegistration(false); serDeCheck( - builder.withNumberCompressed(true).withLongCompressed(LongEncoding.VARINT64).build(), + builder.withNumberCompressed(true).withLongCompressed(LongEncoding.VARINT).build(), struct); serDeCheck( - builder.withNumberCompressed(true).withLongCompressed(LongEncoding.TAGGED_INT64).build(), + builder.withNumberCompressed(true).withLongCompressed(LongEncoding.TAGGED).build(), struct); } else { Fory fory = diff --git a/java/fory-core/src/test/java/org/apache/fory/serializer/UnsignedTest.java b/java/fory-core/src/test/java/org/apache/fory/serializer/UnsignedTest.java index 3108f1e76e..676c3e4237 100644 --- a/java/fory-core/src/test/java/org/apache/fory/serializer/UnsignedTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/serializer/UnsignedTest.java @@ -74,13 +74,13 @@ public static class UnsignedSchemaConsistent { @Uint32Type(compress = false) int u32Fixed; - @Uint64Type(encoding = LongEncoding.VARINT64) + @Uint64Type(encoding = LongEncoding.VARINT) long u64Var; - @Uint64Type(encoding = LongEncoding.FIXED_INT64) + @Uint64Type(encoding = LongEncoding.FIXED) long u64Fixed; - @Uint64Type(encoding = LongEncoding.TAGGED_INT64) + @Uint64Type(encoding = LongEncoding.TAGGED) long u64Tagged; @ForyField(nullable = true) @@ -100,15 +100,15 @@ public static class UnsignedSchemaConsistent { Integer u32FixedNullable; @ForyField(nullable = true) - @Uint64Type(encoding = LongEncoding.VARINT64) + @Uint64Type(encoding = LongEncoding.VARINT) Long u64VarNullable; @ForyField(nullable = true) - @Uint64Type(encoding = LongEncoding.FIXED_INT64) + @Uint64Type(encoding = LongEncoding.FIXED) Long u64FixedNullable; @ForyField(nullable = true) - @Uint64Type(encoding = LongEncoding.TAGGED_INT64) + @Uint64Type(encoding = LongEncoding.TAGGED) Long u64TaggedNullable; } @@ -123,13 +123,13 @@ public static class UnsignedSchemaCompatible { @Uint32Type(compress = false) int u32Fixed; - @Uint64Type(encoding = LongEncoding.VARINT64) + @Uint64Type(encoding = LongEncoding.VARINT) long u64Var; - @Uint64Type(encoding = LongEncoding.FIXED_INT64) + @Uint64Type(encoding = LongEncoding.FIXED) long u64Fixed; - @Uint64Type(encoding = LongEncoding.TAGGED_INT64) + @Uint64Type(encoding = LongEncoding.TAGGED) long u64Tagged; @ForyField(nullable = true) @@ -149,15 +149,15 @@ public static class UnsignedSchemaCompatible { Integer u32FixedField2; @ForyField(nullable = true) - @Uint64Type(encoding = LongEncoding.VARINT64) + @Uint64Type(encoding = LongEncoding.VARINT) Long u64VarField2; @ForyField(nullable = true) - @Uint64Type(encoding = LongEncoding.FIXED_INT64) + @Uint64Type(encoding = LongEncoding.FIXED) Long u64FixedField2; @ForyField(nullable = true) - @Uint64Type(encoding = LongEncoding.TAGGED_INT64) + @Uint64Type(encoding = LongEncoding.TAGGED) Long u64TaggedField2; @Override @@ -691,25 +691,25 @@ public void testUint32EdgeCases(Fory fory) { } public static class Uint64OnlyStruct { - @Uint64Type(encoding = LongEncoding.VARINT64) + @Uint64Type(encoding = LongEncoding.VARINT) long varValue; - @Uint64Type(encoding = LongEncoding.FIXED_INT64) + @Uint64Type(encoding = LongEncoding.FIXED) long fixedValue; - @Uint64Type(encoding = LongEncoding.TAGGED_INT64) + @Uint64Type(encoding = LongEncoding.TAGGED) long taggedValue; @ForyField(nullable = true) - @Uint64Type(encoding = LongEncoding.VARINT64) + @Uint64Type(encoding = LongEncoding.VARINT) Long varNullableValue; @ForyField(nullable = true) - @Uint64Type(encoding = LongEncoding.FIXED_INT64) + @Uint64Type(encoding = LongEncoding.FIXED) Long fixedNullableValue; @ForyField(nullable = true) - @Uint64Type(encoding = LongEncoding.TAGGED_INT64) + @Uint64Type(encoding = LongEncoding.TAGGED) Long taggedNullableValue; @Override diff --git a/java/fory-core/src/test/java/org/apache/fory/xlang/XlangTestBase.java b/java/fory-core/src/test/java/org/apache/fory/xlang/XlangTestBase.java index a5a4ddc684..52b928cd39 100644 --- a/java/fory-core/src/test/java/org/apache/fory/xlang/XlangTestBase.java +++ b/java/fory-core/src/test/java/org/apache/fory/xlang/XlangTestBase.java @@ -2490,13 +2490,13 @@ static class UnsignedSchemaConsistent { @Uint32Type(compress = false) int u32Fixed; - @Uint64Type(encoding = LongEncoding.VARINT64) + @Uint64Type(encoding = LongEncoding.VARINT) long u64Var; - @Uint64Type(encoding = LongEncoding.FIXED_INT64) + @Uint64Type(encoding = LongEncoding.FIXED) long u64Fixed; - @Uint64Type(encoding = LongEncoding.TAGGED_INT64) + @Uint64Type(encoding = LongEncoding.TAGGED) long u64Tagged; // Boxed nullable unsigned fields @@ -2517,24 +2517,24 @@ static class UnsignedSchemaConsistent { Integer u32FixedNullable; @ForyField(nullable = true) - @Uint64Type(encoding = LongEncoding.VARINT64) + @Uint64Type(encoding = LongEncoding.VARINT) Long u64VarNullable; @ForyField(nullable = true) - @Uint64Type(encoding = LongEncoding.FIXED_INT64) + @Uint64Type(encoding = LongEncoding.FIXED) Long u64FixedNullable; @ForyField(nullable = true) - @Uint64Type(encoding = LongEncoding.TAGGED_INT64) + @Uint64Type(encoding = LongEncoding.TAGGED) Long u64TaggedNullable; } @Data static class UnsignedSchemaConsistentSimple { - @Uint64Type(encoding = LongEncoding.TAGGED_INT64) + @Uint64Type(encoding = LongEncoding.TAGGED) long u64Tagged; @ForyField(nullable = true) - @Uint64Type(encoding = LongEncoding.TAGGED_INT64) + @Uint64Type(encoding = LongEncoding.TAGGED) Long u64TaggedNullable; } @@ -2641,13 +2641,13 @@ static class UnsignedSchemaCompatible { @Uint32Type(compress = false) int u32Fixed; - @Uint64Type(encoding = LongEncoding.VARINT64) + @Uint64Type(encoding = LongEncoding.VARINT) long u64Var; - @Uint64Type(encoding = LongEncoding.FIXED_INT64) + @Uint64Type(encoding = LongEncoding.FIXED) long u64Fixed; - @Uint64Type(encoding = LongEncoding.TAGGED_INT64) + @Uint64Type(encoding = LongEncoding.TAGGED) long u64Tagged; // Group 2: Nullable boxed fields (nullable in Java, non-Optional in other languages) @@ -2668,15 +2668,15 @@ static class UnsignedSchemaCompatible { Integer u32FixedField2; @ForyField(nullable = true) - @Uint64Type(encoding = LongEncoding.VARINT64) + @Uint64Type(encoding = LongEncoding.VARINT) Long u64VarField2; @ForyField(nullable = true) - @Uint64Type(encoding = LongEncoding.FIXED_INT64) + @Uint64Type(encoding = LongEncoding.FIXED) Long u64FixedField2; @ForyField(nullable = true) - @Uint64Type(encoding = LongEncoding.TAGGED_INT64) + @Uint64Type(encoding = LongEncoding.TAGGED) Long u64TaggedField2; } From 145adaa895d9c9286698d6da50b06b5342126f92 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 10 Jan 2026 09:07:07 +0800 Subject: [PATCH 20/44] support unsigned in python --- python/pyfory/buffer.pxd | 4 +- python/pyfory/buffer.pyx | 6 +- python/pyfory/tests/xlang_test_main.py | 166 +++++++++++++++---------- 3 files changed, 108 insertions(+), 68 deletions(-) diff --git a/python/pyfory/buffer.pxd b/python/pyfory/buffer.pxd index 1b23ef9895..16c87705e4 100644 --- a/python/pyfory/buffer.pxd +++ b/python/pyfory/buffer.pxd @@ -159,13 +159,13 @@ cdef class Buffer: cpdef inline int64_t read_varuint64(self) - cpdef inline write_varuint32(self, int32_t value) + cpdef inline write_varuint32(self, uint32_t value) cpdef inline write_varint32(self, int32_t value) cpdef inline int32_t read_varint32(self) - cpdef inline int32_t read_varuint32(self) + cpdef inline uint32_t read_varuint32(self) cpdef inline write_tagged_int64(self, int64_t value) diff --git a/python/pyfory/buffer.pyx b/python/pyfory/buffer.pyx index fefb6e810d..b4e97a15d8 100644 --- a/python/pyfory/buffer.pyx +++ b/python/pyfory/buffer.pyx @@ -432,7 +432,7 @@ cdef class Buffer: cpdef inline write_varint32(self, int32_t value): return self.write_varuint32((value << 1) ^ (value >> 31)) - cpdef inline write_varuint32(self, int32_t value): + cpdef inline write_varuint32(self, uint32_t value): # Need 8 bytes for safe bulk write (PutVarUint32 writes uint64_t for 5-byte varints) self.grow(8) cdef int32_t actual_bytes_written = self.c_buffer_ptr.PutVarUint32(self.writer_index, value) @@ -443,11 +443,11 @@ cdef class Buffer: cdef uint32_t v = self.read_varuint32() return (v >> 1) ^ -(v & 1) - cpdef inline int32_t read_varuint32(self): + cpdef inline uint32_t read_varuint32(self): cdef: uint32_t read_length = 0 int8_t b - int32_t result + uint32_t result if self._c_size - self.reader_index > 5: result = self.c_buffer_ptr.GetVarUint32(self.reader_index, &read_length) self.reader_index += read_length diff --git a/python/pyfory/tests/xlang_test_main.py b/python/pyfory/tests/xlang_test_main.py index c9ea377b58..48ed5afce0 100644 --- a/python/pyfory/tests/xlang_test_main.py +++ b/python/pyfory/tests/xlang_test_main.py @@ -1323,22 +1323,22 @@ class UnsignedSchemaConsistent: """ # Primitive unsigned fields (non-nullable) - u8: pyfory.uint8 = 0 - u16: pyfory.uint16 = 0 - u32_var: pyfory.uint32 = 0 # VAR_UINT32 encoding - u32_fixed: pyfory.fixed_uint32 = 0 # Fixed 4-byte encoding - u64_var: pyfory.uint64 = 0 # VAR_UINT64 encoding - u64_fixed: pyfory.fixed_uint64 = 0 # Fixed 8-byte encoding - u64_tagged: pyfory.tagged_uint64 = 0 # Tagged encoding + u8_field: pyfory.uint8 = 0 + u16_field: pyfory.uint16 = 0 + u32_var_field: pyfory.uint32 = 0 # VAR_UINT32 encoding + u32_fixed_field: pyfory.fixed_uint32 = 0 # Fixed 4-byte encoding + u64_var_field: pyfory.uint64 = 0 # VAR_UINT64 encoding + u64_fixed_field: pyfory.fixed_uint64 = 0 # Fixed 8-byte encoding + u64_tagged_field: pyfory.tagged_uint64 = 0 # Tagged encoding # Boxed nullable unsigned fields (using Optional) - u8_nullable: Optional[pyfory.uint8] = None - u16_nullable: Optional[pyfory.uint16] = None - u32_var_nullable: Optional[pyfory.uint32] = None - u32_fixed_nullable: Optional[pyfory.fixed_uint32] = None - u64_var_nullable: Optional[pyfory.uint64] = None - u64_fixed_nullable: Optional[pyfory.fixed_uint64] = None - u64_tagged_nullable: Optional[pyfory.tagged_uint64] = None + u8_nullable_field: Optional[pyfory.uint8] = None + u16_nullable_field: Optional[pyfory.uint16] = None + u32_var_nullable_field: Optional[pyfory.uint32] = None + u32_fixed_nullable_field: Optional[pyfory.fixed_uint32] = None + u64_var_nullable_field: Optional[pyfory.uint64] = None + u64_fixed_nullable_field: Optional[pyfory.fixed_uint64] = None + u64_tagged_nullable_field: Optional[pyfory.tagged_uint64] = None @dataclass @@ -1352,13 +1352,13 @@ class UnsignedSchemaCompatible: """ # Group 1: Optional unsigned fields (nullable in Python, non-nullable in Java) - u8: Optional[pyfory.uint8] = None - u16: Optional[pyfory.uint16] = None - u32_var: Optional[pyfory.uint32] = None # VAR_UINT32 encoding - u32_fixed: Optional[pyfory.fixed_uint32] = None # Fixed 4-byte encoding - u64_var: Optional[pyfory.uint64] = None # VAR_UINT64 encoding - u64_fixed: Optional[pyfory.fixed_uint64] = None # Fixed 8-byte encoding - u64_tagged: Optional[pyfory.tagged_uint64] = None # Tagged encoding + u8_field1: Optional[pyfory.uint8] = None + u16_field1: Optional[pyfory.uint16] = None + u32_var_field1: Optional[pyfory.uint32] = None # VAR_UINT32 encoding + u32_fixed_field1: Optional[pyfory.fixed_uint32] = None # Fixed 4-byte encoding + u64_var_field1: Optional[pyfory.uint64] = None # VAR_UINT64 encoding + u64_fixed_field1: Optional[pyfory.fixed_uint64] = None # Fixed 8-byte encoding + u64_tagged_field1: Optional[pyfory.tagged_uint64] = None # Tagged encoding # Group 2: Non-Optional unsigned fields (non-nullable in Python, nullable in Java) u8_field2: pyfory.uint8 = 0 @@ -1370,11 +1370,51 @@ class UnsignedSchemaCompatible: u64_tagged_field2: pyfory.tagged_uint64 = 0 +@dataclass +class UnsignedSchemaConsistentSimple: + """ + Simple test struct for tagged uint64 in schema consistent mode. + Must match Java UnsignedSchemaConsistentSimple (type id 1). + """ + + u64_tagged: pyfory.tagged_uint64 = 0 + u64_tagged_nullable: Optional[pyfory.tagged_uint64] = None + + # ============================================================================ # Unsigned Number Tests # ============================================================================ +def test_unsigned_schema_consistent_simple(): + """Test simple tagged uint64 in schema consistent mode.""" + data_file = get_data_file() + with open(data_file, "rb") as f: + data_bytes = f.read() + + fory = pyfory.Fory(xlang=True, compatible=False) + fory.register_type(UnsignedSchemaConsistentSimple, type_id=1) + + expected = UnsignedSchemaConsistentSimple( + u64_tagged=1000000000, + u64_tagged_nullable=500000000, + ) + + obj = fory.deserialize(data_bytes) + debug_print(f"Deserialized: {obj}") + + assert obj.u64_tagged == expected.u64_tagged, ( + f"u64_tagged: {obj.u64_tagged} != {expected.u64_tagged}" + ) + assert obj.u64_tagged_nullable == expected.u64_tagged_nullable, ( + f"u64_tagged_nullable: {obj.u64_tagged_nullable} != {expected.u64_tagged_nullable}" + ) + + new_bytes = fory.serialize(obj) + with open(data_file, "wb") as f: + f.write(new_bytes) + + def test_unsigned_schema_consistent(): """Test unsigned number types with schema consistent mode.""" data_file = get_data_file() @@ -1386,43 +1426,43 @@ def test_unsigned_schema_consistent(): expected = UnsignedSchemaConsistent( # Primitive fields - u8=200, - u16=60000, - u32_var=3000000000, - u32_fixed=4000000000, - u64_var=10000000000, - u64_fixed=15000000000, - u64_tagged=1000000000, + u8_field=200, + u16_field=60000, + u32_var_field=3000000000, + u32_fixed_field=4000000000, + u64_var_field=10000000000, + u64_fixed_field=15000000000, + u64_tagged_field=1000000000, # Nullable boxed fields with values - u8_nullable=128, - u16_nullable=40000, - u32_var_nullable=2500000000, - u32_fixed_nullable=3500000000, - u64_var_nullable=8000000000, - u64_fixed_nullable=12000000000, - u64_tagged_nullable=500000000, + u8_nullable_field=128, + u16_nullable_field=40000, + u32_var_nullable_field=2500000000, + u32_fixed_nullable_field=3500000000, + u64_var_nullable_field=8000000000, + u64_fixed_nullable_field=12000000000, + u64_tagged_nullable_field=500000000, ) obj = fory.deserialize(data_bytes) debug_print(f"Deserialized: {obj}") # Verify primitive unsigned fields - assert obj.u8 == expected.u8, f"u8: {obj.u8} != {expected.u8}" - assert obj.u16 == expected.u16, f"u16: {obj.u16} != {expected.u16}" - assert obj.u32_var == expected.u32_var, f"u32_var: {obj.u32_var} != {expected.u32_var}" - assert obj.u32_fixed == expected.u32_fixed, f"u32_fixed: {obj.u32_fixed} != {expected.u32_fixed}" - assert obj.u64_var == expected.u64_var, f"u64_var: {obj.u64_var} != {expected.u64_var}" - assert obj.u64_fixed == expected.u64_fixed, f"u64_fixed: {obj.u64_fixed} != {expected.u64_fixed}" - assert obj.u64_tagged == expected.u64_tagged, f"u64_tagged: {obj.u64_tagged} != {expected.u64_tagged}" + assert obj.u8_field == expected.u8_field, f"u8_field: {obj.u8_field} != {expected.u8_field}" + assert obj.u16_field == expected.u16_field, f"u16_field: {obj.u16_field} != {expected.u16_field}" + assert obj.u32_var_field == expected.u32_var_field, f"u32_var_field: {obj.u32_var_field} != {expected.u32_var_field}" + assert obj.u32_fixed_field == expected.u32_fixed_field, f"u32_fixed_field: {obj.u32_fixed_field} != {expected.u32_fixed_field}" + assert obj.u64_var_field == expected.u64_var_field, f"u64_var_field: {obj.u64_var_field} != {expected.u64_var_field}" + assert obj.u64_fixed_field == expected.u64_fixed_field, f"u64_fixed_field: {obj.u64_fixed_field} != {expected.u64_fixed_field}" + assert obj.u64_tagged_field == expected.u64_tagged_field, f"u64_tagged_field: {obj.u64_tagged_field} != {expected.u64_tagged_field}" # Verify nullable boxed fields - assert obj.u8_nullable == expected.u8_nullable, f"u8_nullable: {obj.u8_nullable} != {expected.u8_nullable}" - assert obj.u16_nullable == expected.u16_nullable, f"u16_nullable: {obj.u16_nullable} != {expected.u16_nullable}" - assert obj.u32_var_nullable == expected.u32_var_nullable, f"u32_var_nullable: {obj.u32_var_nullable} != {expected.u32_var_nullable}" - assert obj.u32_fixed_nullable == expected.u32_fixed_nullable, f"u32_fixed_nullable: {obj.u32_fixed_nullable} != {expected.u32_fixed_nullable}" - assert obj.u64_var_nullable == expected.u64_var_nullable, f"u64_var_nullable: {obj.u64_var_nullable} != {expected.u64_var_nullable}" - assert obj.u64_fixed_nullable == expected.u64_fixed_nullable, f"u64_fixed_nullable: {obj.u64_fixed_nullable} != {expected.u64_fixed_nullable}" - assert obj.u64_tagged_nullable == expected.u64_tagged_nullable, f"u64_tagged_nullable: {obj.u64_tagged_nullable} != {expected.u64_tagged_nullable}" + assert obj.u8_nullable_field == expected.u8_nullable_field, f"u8_nullable_field: {obj.u8_nullable_field} != {expected.u8_nullable_field}" + assert obj.u16_nullable_field == expected.u16_nullable_field, f"u16_nullable_field: {obj.u16_nullable_field} != {expected.u16_nullable_field}" + assert obj.u32_var_nullable_field == expected.u32_var_nullable_field, f"u32_var_nullable_field: {obj.u32_var_nullable_field} != {expected.u32_var_nullable_field}" + assert obj.u32_fixed_nullable_field == expected.u32_fixed_nullable_field, f"u32_fixed_nullable_field: {obj.u32_fixed_nullable_field} != {expected.u32_fixed_nullable_field}" + assert obj.u64_var_nullable_field == expected.u64_var_nullable_field, f"u64_var_nullable_field: {obj.u64_var_nullable_field} != {expected.u64_var_nullable_field}" + assert obj.u64_fixed_nullable_field == expected.u64_fixed_nullable_field, f"u64_fixed_nullable_field: {obj.u64_fixed_nullable_field} != {expected.u64_fixed_nullable_field}" + assert obj.u64_tagged_nullable_field == expected.u64_tagged_nullable_field, f"u64_tagged_nullable_field: {obj.u64_tagged_nullable_field} != {expected.u64_tagged_nullable_field}" new_bytes = fory.serialize(obj) with open(data_file, "wb") as f: @@ -1440,13 +1480,13 @@ def test_unsigned_schema_compatible(): expected = UnsignedSchemaCompatible( # Group 1: Optional fields (values from Java's non-nullable fields) - u8=200, - u16=60000, - u32_var=3000000000, - u32_fixed=4000000000, - u64_var=10000000000, - u64_fixed=15000000000, - u64_tagged=1000000000, + u8_field1=200, + u16_field1=60000, + u32_var_field1=3000000000, + u32_fixed_field1=4000000000, + u64_var_field1=10000000000, + u64_fixed_field1=15000000000, + u64_tagged_field1=1000000000, # Group 2: Non-Optional fields (values from Java's nullable fields) u8_field2=128, u16_field2=40000, @@ -1461,13 +1501,13 @@ def test_unsigned_schema_compatible(): debug_print(f"Deserialized: {obj}") # Verify Group 1: Optional unsigned fields - assert obj.u8 == expected.u8, f"u8: {obj.u8} != {expected.u8}" - assert obj.u16 == expected.u16, f"u16: {obj.u16} != {expected.u16}" - assert obj.u32_var == expected.u32_var, f"u32_var: {obj.u32_var} != {expected.u32_var}" - assert obj.u32_fixed == expected.u32_fixed, f"u32_fixed: {obj.u32_fixed} != {expected.u32_fixed}" - assert obj.u64_var == expected.u64_var, f"u64_var: {obj.u64_var} != {expected.u64_var}" - assert obj.u64_fixed == expected.u64_fixed, f"u64_fixed: {obj.u64_fixed} != {expected.u64_fixed}" - assert obj.u64_tagged == expected.u64_tagged, f"u64_tagged: {obj.u64_tagged} != {expected.u64_tagged}" + assert obj.u8_field1 == expected.u8_field1, f"u8_field1: {obj.u8_field1} != {expected.u8_field1}" + assert obj.u16_field1 == expected.u16_field1, f"u16_field1: {obj.u16_field1} != {expected.u16_field1}" + assert obj.u32_var_field1 == expected.u32_var_field1, f"u32_var_field1: {obj.u32_var_field1} != {expected.u32_var_field1}" + assert obj.u32_fixed_field1 == expected.u32_fixed_field1, f"u32_fixed_field1: {obj.u32_fixed_field1} != {expected.u32_fixed_field1}" + assert obj.u64_var_field1 == expected.u64_var_field1, f"u64_var_field1: {obj.u64_var_field1} != {expected.u64_var_field1}" + assert obj.u64_fixed_field1 == expected.u64_fixed_field1, f"u64_fixed_field1: {obj.u64_fixed_field1} != {expected.u64_fixed_field1}" + assert obj.u64_tagged_field1 == expected.u64_tagged_field1, f"u64_tagged_field1: {obj.u64_tagged_field1} != {expected.u64_tagged_field1}" # Verify Group 2: Non-Optional fields assert obj.u8_field2 == expected.u8_field2, f"u8_field2: {obj.u8_field2} != {expected.u8_field2}" From 51e68480df424df125415068a2fe850d94b19fea Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 10 Jan 2026 14:18:04 +0800 Subject: [PATCH 21/44] add rust unsigned and compressed fields support --- rust/fory-core/src/meta/type_meta.rs | 11 +- rust/fory-derive/src/object/field_meta.rs | 110 +++++++++- rust/fory-derive/src/object/misc.rs | 73 +++++-- rust/fory-derive/src/object/read.rs | 215 ++++++++++++++++++- rust/fory-derive/src/object/util.rs | 241 +++++++++++++++++++--- rust/fory-derive/src/object/write.rs | 30 ++- rust/tests/tests/test_cross_language.rs | 100 ++++----- 7 files changed, 677 insertions(+), 103 deletions(-) diff --git a/rust/fory-core/src/meta/type_meta.rs b/rust/fory-core/src/meta/type_meta.rs index 6823d18927..de34dad5c8 100644 --- a/rust/fory-core/src/meta/type_meta.rs +++ b/rust/fory-core/src/meta/type_meta.rs @@ -644,14 +644,15 @@ impl TypeMeta { } } fn is_compress(type_id: u32) -> bool { - // Only signed integer types are marked as compressible - // to maintain backward compatibility with field ordering + // Variable-size integer types (both signed and unsigned) + // These are sorted after fixed-size types in field ordering [ - TypeId::INT32 as u32, - TypeId::INT64 as u32, TypeId::VARINT32 as u32, TypeId::VARINT64 as u32, TypeId::TAGGED_INT64 as u32, + TypeId::VAR_UINT32 as u32, + TypeId::VAR_UINT64 as u32, + TypeId::TAGGED_UINT64 as u32, ] .contains(&type_id) } @@ -669,7 +670,7 @@ impl TypeMeta { .cmp(&b_nullable) // non-nullable first .then_with(|| compress_a.cmp(&compress_b)) // fixed-size (false) first, then variable-size (true) last .then_with(|| size_b.cmp(&size_a)) // when same compress status: larger size first - .then_with(|| a_id.cmp(&b_id)) // when same size: smaller type id first + .then_with(|| b_id.cmp(&a_id)) // when same size: larger type id first .then_with(|| a_field_name.cmp(b_field_name)) // when same id: lexicographic name } fn type_then_name_sorter(a: &FieldInfo, b: &FieldInfo) -> std::cmp::Ordering { diff --git a/rust/fory-derive/src/object/field_meta.rs b/rust/fory-derive/src/object/field_meta.rs index ff1a68793d..d7ae18ec63 100644 --- a/rust/fory-derive/src/object/field_meta.rs +++ b/rust/fory-derive/src/object/field_meta.rs @@ -22,11 +22,25 @@ //! - `nullable`: Whether the field can be null (default: false, except Option/RcWeak/ArcWeak) //! - `ref`: Whether to enable reference tracking (default: false, except Rc/Arc/RcWeak/ArcWeak) //! - `skip`: Skip this field during serialization +//! - `compress`: For u32 fields: true (VAR_UINT32, default) or false (UINT32 fixed) +//! - `encoding`: For u64 fields: "varint" (default), "fixed", or "tagged" use quote::ToTokens; use std::collections::HashMap; use syn::{Field, GenericArgument, PathArguments, Type}; +/// Encoding type for u64 fields +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum U64Encoding { + /// Variable-length encoding (VAR_UINT64, type id 14) - default + #[default] + Varint, + /// Fixed 8-byte encoding (UINT64, type id 13) + Fixed, + /// Tagged variable-length encoding (TAGGED_UINT64, type id 15) + Tagged, +} + /// Represents parsed `#[fory(...)]` field attributes #[derive(Debug, Clone, Default)] pub struct ForyFieldMeta { @@ -38,6 +52,10 @@ pub struct ForyFieldMeta { pub ref_tracking: Option, /// Whether to skip this field entirely pub skip: bool, + /// For u32 fields: true = VAR_UINT32 (default), false = UINT32 (fixed) + pub compress: Option, + /// For u64 fields: encoding type (varint/fixed/tagged) + pub encoding: Option, } /// Type classification for determining default nullable/ref behavior @@ -125,6 +143,24 @@ pub fn parse_field_meta(field: &Field) -> syn::Result { meta.ref_tracking = Some(value); } else if nested.path.is_ident("skip") { meta.skip = true; + } else if nested.path.is_ident("compress") { + let value = parse_bool_or_flag(&nested)?; + meta.compress = Some(value); + } else if nested.path.is_ident("encoding") { + let lit: syn::LitStr = nested.value()?.parse()?; + let encoding_str = lit.value(); + let encoding = match encoding_str.as_str() { + "varint" => U64Encoding::Varint, + "fixed" => U64Encoding::Fixed, + "tagged" => U64Encoding::Tagged, + _ => { + return Err(syn::Error::new( + lit.span(), + "encoding must be \"varint\", \"fixed\", or \"tagged\"", + )); + } + }; + meta.encoding = Some(encoding); } Ok(()) })?; @@ -188,7 +224,7 @@ fn extract_outer_type_name(ty: &Type) -> String { } /// Extract the inner type from `Option` -fn extract_option_inner_type(ty: &Type) -> Option { +pub fn extract_option_inner_type(ty: &Type) -> Option { if let Type::Path(type_path) = ty { if let Some(seg) = type_path.path.segments.last() { if seg.ident == "Option" { @@ -456,6 +492,8 @@ mod tests { nullable: Some(true), ref_tracking: None, skip: false, + compress: None, + encoding: None, }; assert!(meta.effective_nullable(FieldTypeClass::Primitive)); // Would be false by default @@ -465,7 +503,77 @@ mod tests { nullable: None, ref_tracking: Some(false), skip: false, + compress: None, + encoding: None, }; assert!(!meta.effective_ref_tracking(FieldTypeClass::Rc)); // Would be true by default } + + #[test] + fn test_parse_compress_attribute() { + let field: Field = parse_quote! { + #[fory(compress = false)] + value: u32 + }; + let meta = parse_field_meta(&field).unwrap(); + assert_eq!(meta.compress, Some(false)); + + let field: Field = parse_quote! { + #[fory(compress = true)] + value: u32 + }; + let meta = parse_field_meta(&field).unwrap(); + assert_eq!(meta.compress, Some(true)); + + // Standalone compress flag should be true + let field: Field = parse_quote! { + #[fory(compress)] + value: u32 + }; + let meta = parse_field_meta(&field).unwrap(); + assert_eq!(meta.compress, Some(true)); + } + + #[test] + fn test_parse_encoding_attribute() { + let field: Field = parse_quote! { + #[fory(encoding = "varint")] + value: u64 + }; + let meta = parse_field_meta(&field).unwrap(); + assert_eq!(meta.encoding, Some(U64Encoding::Varint)); + + let field: Field = parse_quote! { + #[fory(encoding = "fixed")] + value: u64 + }; + let meta = parse_field_meta(&field).unwrap(); + assert_eq!(meta.encoding, Some(U64Encoding::Fixed)); + + let field: Field = parse_quote! { + #[fory(encoding = "tagged")] + value: u64 + }; + let meta = parse_field_meta(&field).unwrap(); + assert_eq!(meta.encoding, Some(U64Encoding::Tagged)); + } + + #[test] + fn test_parse_combined_attributes() { + let field: Field = parse_quote! { + #[fory(nullable, compress = false)] + value: Option + }; + let meta = parse_field_meta(&field).unwrap(); + assert_eq!(meta.nullable, Some(true)); + assert_eq!(meta.compress, Some(false)); + + let field: Field = parse_quote! { + #[fory(nullable, encoding = "tagged")] + value: Option + }; + let meta = parse_field_meta(&field).unwrap(); + assert_eq!(meta.nullable, Some(true)); + assert_eq!(meta.encoding, Some(U64Encoding::Tagged)); + } } diff --git a/rust/fory-derive/src/object/misc.rs b/rust/fory-derive/src/object/misc.rs index 582f82e8a0..f3581a907e 100644 --- a/rust/fory-derive/src/object/misc.rs +++ b/rust/fory-derive/src/object/misc.rs @@ -20,7 +20,9 @@ use quote::quote; use std::sync::atomic::{AtomicU32, Ordering}; use syn::Field; -use super::field_meta::{classify_field_type, is_option_type, parse_field_meta}; +use super::field_meta::{ + classify_field_type, extract_option_inner_type, is_option_type, parse_field_meta, U64Encoding, +}; use super::util::{ classify_trait_object_field, generic_tree_to_tokens, get_filtered_source_fields_iter, get_sort_fields_ts, parse_generic_tree, StructField, @@ -98,19 +100,64 @@ pub fn gen_field_fields_info(source_fields: &[SourceField<'_>]) -> TokenStream { match classify_trait_object_field(ty) { StructField::None => { - let generic_tree = parse_generic_tree(ty); - let generic_token = generic_tree_to_tokens(&generic_tree); - quote! { - fory_core::meta::FieldInfo::new_with_id( - #field_id, - #name, - { - let mut ft = #generic_token; - ft.nullable = #nullable; - ft.ref_tracking = #ref_tracking; - ft + // Check if this is a u32/u64 field (or Option/Option) with encoding attributes + // In this case, we need to generate the FieldType with the correct type ID directly + let inner_ty = extract_option_inner_type(ty).unwrap_or_else(|| ty.clone()); + let inner_ty_str = quote::ToTokens::to_token_stream(&inner_ty) + .to_string() + .replace(' ', ""); + + let is_u32_with_encoding = inner_ty_str == "u32" && meta.compress.is_some(); + let is_u64_with_encoding = inner_ty_str == "u64" && meta.encoding.is_some(); + + if is_u32_with_encoding || is_u64_with_encoding { + // Generate FieldType directly with the correct type ID + let type_id_ts = if is_u32_with_encoding { + if meta.compress == Some(false) { + quote! { fory_core::types::TypeId::UINT32 as u32 } + } else { + quote! { fory_core::types::TypeId::VAR_UINT32 as u32 } + } + } else { + // u64 with encoding attribute + match meta.encoding { + Some(U64Encoding::Fixed) => { + quote! { fory_core::types::TypeId::UINT64 as u32 } + } + Some(U64Encoding::Tagged) => { + quote! { fory_core::types::TypeId::TAGGED_UINT64 as u32 } + } + _ => quote! { fory_core::types::TypeId::VAR_UINT64 as u32 }, } - ) + }; + + quote! { + fory_core::meta::FieldInfo::new_with_id( + #field_id, + #name, + fory_core::meta::FieldType { + type_id: #type_id_ts, + nullable: #nullable, + ref_tracking: #ref_tracking, + generics: Vec::new() + } + ) + } + } else { + let generic_tree = parse_generic_tree(ty); + let generic_token = generic_tree_to_tokens(&generic_tree); + quote! { + fory_core::meta::FieldInfo::new_with_id( + #field_id, + #name, + { + let mut ft = #generic_token; + ft.nullable = #nullable; + ft.ref_tracking = #ref_tracking; + ft + } + ) + } } } StructField::VecBox(_) | StructField::VecRc(_) | StructField::VecArc(_) => { diff --git a/rust/fory-derive/src/object/read.rs b/rust/fory-derive/src/object/read.rs index 2e7aa12f50..a98a0e9f51 100644 --- a/rust/fory-derive/src/object/read.rs +++ b/rust/fory-derive/src/object/read.rs @@ -19,14 +19,143 @@ use proc_macro2::{Ident, TokenStream}; use quote::{format_ident, quote}; use syn::Field; +use super::field_meta::{extract_option_inner_type, parse_field_meta}; use super::util::{ classify_trait_object_field, create_wrapper_types_arc, create_wrapper_types_rc, determine_field_ref_mode, extract_type_name, gen_struct_version_hash_ts, - get_primitive_reader_method, get_struct_name, is_debug_enabled, is_direct_primitive_type, - is_primitive_type, is_skip_field, should_skip_type_info_for_field, FieldRefMode, StructField, + get_option_inner_primitive_name, get_primitive_reader_method_with_encoding, get_struct_name, + is_debug_enabled, is_direct_primitive_type, is_option_encoding_primitive, is_primitive_type, + is_skip_field, should_skip_type_info_for_field, FieldRefMode, StructField, }; use crate::util::SourceField; +/// Check if a type is a primitive type that needs special compatible mode handling +/// Returns the type name if it's u8, u16, u32, or u64 (or Option) +fn is_compatible_primitive_type(ty: &syn::Type) -> Option<&'static str> { + let inner_ty = extract_option_inner_type(ty).unwrap_or_else(|| ty.clone()); + let inner_ty_str = quote::ToTokens::to_token_stream(&inner_ty) + .to_string() + .replace(' ', ""); + match inner_ty_str.as_str() { + "u8" => Some("u8"), + "u16" => Some("u16"), + "u32" => Some("u32"), + "u64" => Some("u64"), + _ => None, + } +} + +/// Check if a type is u32 or u64 (for encoding-aware reading) +fn is_unsigned_encoding_type(ty: &syn::Type) -> Option<&'static str> { + let inner_ty = extract_option_inner_type(ty).unwrap_or_else(|| ty.clone()); + let inner_ty_str = quote::ToTokens::to_token_stream(&inner_ty) + .to_string() + .replace(' ', ""); + match inner_ty_str.as_str() { + "u32" => Some("u32"), + "u64" => Some("u64"), + _ => None, + } +} + +/// Generate compatible mode read code for u32/u64 fields based on remote type_id +fn gen_compatible_unsigned_read( + unsigned_type: &str, + var_name: &Ident, + is_option: bool, +) -> TokenStream { + let read_value = if unsigned_type == "u32" { + quote! { + // Read u32 based on remote type_id + match _field.field_type.type_id { + fory_core::types::UINT32 => context.reader.read_u32()?, + fory_core::types::VAR_UINT32 => context.reader.read_varuint32()?, + _ => context.reader.read_varuint32()?, // Default to varint + } + } + } else { + // u64 + quote! { + // Read u64 based on remote type_id + match _field.field_type.type_id { + fory_core::types::UINT64 => context.reader.read_u64()?, + fory_core::types::VAR_UINT64 => context.reader.read_varuint64()?, + fory_core::types::TAGGED_UINT64 => context.reader.read_tagged_u64()?, + _ => context.reader.read_varuint64()?, // Default to varint + } + } + }; + + if is_option { + quote! { + let read_ref_flag = fory_core::serializer::util::field_need_write_ref_into( + _field.field_type.type_id, + _field.field_type.nullable, + ); + if read_ref_flag { + let ref_flag = context.reader.read_i8()?; + if ref_flag == fory_core::RefFlag::Null as i8 { + #var_name = Some(None); + } else { + #var_name = Some(Some(#read_value)); + } + } else { + #var_name = Some(Some(#read_value)); + } + } + } else { + quote! { + let read_ref_flag = fory_core::serializer::util::field_need_write_ref_into( + _field.field_type.type_id, + _field.field_type.nullable, + ); + if read_ref_flag { + let ref_flag = context.reader.read_i8()?; + if ref_flag == fory_core::RefFlag::Null as i8 { + // Remote sent null but local field is non-nullable, use default + #var_name = 0; + } else { + #var_name = #read_value; + } + } else { + #var_name = #read_value; + } + } + } +} + +/// Generate compatible mode read code for u8/u16 Option fields +/// These need special handling because when remote field is non-nullable, +/// Java sends just the raw bytes without a ref flag +fn gen_compatible_primitive_option_read( + prim_type: &str, + var_name: &Ident, +) -> TokenStream { + let read_value = match prim_type { + "u8" => quote! { context.reader.read_u8()? }, + "u16" => quote! { context.reader.read_u16()? }, + _ => unreachable!("Only u8/u16 should use this function"), + }; + + quote! { + let read_ref_flag = fory_core::serializer::util::field_need_write_ref_into( + _field.field_type.type_id, + _field.field_type.nullable, + ); + if read_ref_flag { + let ref_flag = context.reader.read_i8()?; + if ref_flag == fory_core::RefFlag::Null as i8 { + #var_name = Some(None); + } else { + #var_name = Some(Some(#read_value)); + } + } else { + // Remote field is non-nullable, read raw value directly + #var_name = Some(Some(#read_value)); + } + } +} + /// Create a private variable name for a field during deserialization. /// For named fields: `_field_name` /// For tuple struct fields: `_0`, `_1`, etc. @@ -219,10 +348,28 @@ pub fn gen_read_field(field: &Field, private_ident: &Ident, field_name: &str) -> } _ => { let skip_type_info = should_skip_type_info_for_field(ty); + let meta = parse_field_meta(field).unwrap_or_default(); + // Check if this is Option or Option with encoding attributes + // These need special inline handling because the generic Option serializer + // doesn't know about field-level encoding attributes. + if is_option_encoding_primitive(ty, &meta) { + let inner_name = get_option_inner_primitive_name(ty).unwrap(); + let reader_method = get_primitive_reader_method_with_encoding(inner_name, &meta); + let reader_ident = syn::Ident::new(reader_method, proc_macro2::Span::call_site()); + // For Option, read null flag first, then value if not null + quote! { + let ref_flag = context.reader.read_i8()?; + let #private_ident = if ref_flag == fory_core::RefFlag::Null as i8 { + None + } else { + Some(context.reader.#reader_ident()?) + }; + } + } // Check if this is a direct primitive type that can use direct reader calls // Only apply when ref_mode is None (no ref tracking needed) - if ref_mode == FieldRefMode::None && is_direct_primitive_type(ty) { + else if ref_mode == FieldRefMode::None && is_direct_primitive_type(ty) { let type_name = extract_type_name(ty); if type_name == "String" { // String: call fory_read_data directly @@ -231,7 +378,8 @@ pub fn gen_read_field(field: &Field, private_ident: &Ident, field_name: &str) -> } } else { // Numeric primitives: use direct buffer methods - let reader_method = get_primitive_reader_method(&type_name); + // For u32/u64, consider encoding attributes + let reader_method = get_primitive_reader_method_with_encoding(&type_name, &meta); let reader_ident = syn::Ident::new(reader_method, proc_macro2::Span::call_site()); quote! { @@ -540,7 +688,64 @@ pub(crate) fn gen_read_compatible_match_arm_body( StructField::None => { let skip_type_info = should_skip_type_info_for_field(ty); let dec_by_option = need_declared_by_option(field); - if skip_type_info { + let is_option_type = extract_option_inner_type(ty).is_some(); + + // Check if this is a u32/u64 field that needs encoding-aware reading + if let Some(unsigned_type) = is_unsigned_encoding_type(ty) { + gen_compatible_unsigned_read(unsigned_type, var_name, is_option_type || dec_by_option) + } else if is_option_type { + // Check if it's Option or Option which need special handling + if let Some(prim_type) = is_compatible_primitive_type(ty) { + if prim_type == "u8" || prim_type == "u16" { + gen_compatible_primitive_option_read(prim_type, var_name) + } else { + // u32/u64 handled above + unreachable!() + } + } else if skip_type_info { + // Non-primitive Option type with skip_type_info + quote! { + let read_ref_flag = fory_core::serializer::util::field_need_write_ref_into( + _field.field_type.type_id, + _field.field_type.nullable, + ); + // Use RefMode::Tracking if remote field has ref_tracking enabled + let ref_mode = if _field.field_type.ref_tracking { + fory_core::RefMode::Tracking + } else if read_ref_flag { + fory_core::RefMode::NullOnly + } else { + fory_core::RefMode::None + }; + if read_ref_flag || _field.field_type.ref_tracking { + #var_name = Some(<#ty as fory_core::Serializer>::fory_read(context, ref_mode, false)?); + } else { + #var_name = Some(<#ty as fory_core::Serializer>::fory_read_data(context)?); + } + } + } else { + // Non-primitive Option type without skip_type_info + quote! { + let read_ref_flag = fory_core::serializer::util::field_need_write_ref_into( + _field.field_type.type_id, + _field.field_type.nullable, + ); + // Use RefMode::Tracking if remote field has ref_tracking enabled + let ref_mode = if _field.field_type.ref_tracking { + fory_core::RefMode::Tracking + } else if read_ref_flag { + fory_core::RefMode::NullOnly + } else { + fory_core::RefMode::None + }; + // For ref-tracked struct types, Java writes type info after RefValue flag + let read_type_info = fory_core::types::need_to_write_type_for_field( + <#ty as fory_core::Serializer>::fory_static_type_id() + ); + #var_name = Some(<#ty as fory_core::Serializer>::fory_read(context, ref_mode, read_type_info)?); + } + } + } else if skip_type_info { if dec_by_option { quote! { let read_ref_flag = fory_core::serializer::util::field_need_write_ref_into( diff --git a/rust/fory-derive/src/object/util.rs b/rust/fory-derive/src/object/util.rs index 3b7746e745..019c2f9c55 100644 --- a/rust/fory-derive/src/object/util.rs +++ b/rust/fory-derive/src/object/util.rs @@ -760,6 +760,46 @@ pub(super) fn get_primitive_writer_method(type_name: &str) -> &'static str { .unwrap_or_else(|| panic!("type_name '{}' must be a primitive type", type_name)) } +/// Get the writer method name for a primitive numeric type, considering encoding attributes. +/// +/// For u32 fields: +/// - compress=true (default): write_varuint32 +/// - compress=false: write_u32 (fixed 4-byte) +/// +/// For u64 fields: +/// - encoding="varint" (default): write_varuint64 +/// - encoding="fixed": write_u64 (fixed 8-byte) +/// - encoding="tagged": write_tagged_varuint64 +pub(super) fn get_primitive_writer_method_with_encoding( + type_name: &str, + meta: &super::field_meta::ForyFieldMeta, +) -> &'static str { + use super::field_meta::U64Encoding; + + // Handle u32 with compress attribute + if type_name == "u32" { + if let Some(false) = meta.compress { + return "write_u32"; // Fixed 4-byte encoding + } + return "write_varuint32"; // Variable-length (default) + } + + // Handle u64 with encoding attribute + if type_name == "u64" { + if let Some(encoding) = meta.encoding { + return match encoding { + U64Encoding::Varint => "write_varuint64", + U64Encoding::Fixed => "write_u64", + U64Encoding::Tagged => "write_tagged_u64", + }; + } + return "write_varuint64"; // Variable-length (default) + } + + // For other types, use the default method from PRIMITIVE_IO_METHODS + get_primitive_writer_method(type_name) +} + /// Get the reader method name for a primitive numeric type /// Panics if type_name is not a primitive type pub(super) fn get_primitive_reader_method(type_name: &str) -> &'static str { @@ -770,6 +810,92 @@ pub(super) fn get_primitive_reader_method(type_name: &str) -> &'static str { .unwrap_or_else(|| panic!("type_name '{}' must be a primitive type", type_name)) } +/// Get the reader method name for a primitive numeric type, considering encoding attributes. +/// +/// For u32 fields: +/// - compress=true (default): read_varuint32 +/// - compress=false: read_u32 (fixed 4-byte) +/// +/// For u64 fields: +/// - encoding="varint" (default): read_varuint64 +/// - encoding="fixed": read_u64 (fixed 8-byte) +/// - encoding="tagged": read_tagged_varuint64 +pub(super) fn get_primitive_reader_method_with_encoding( + type_name: &str, + meta: &super::field_meta::ForyFieldMeta, +) -> &'static str { + use super::field_meta::U64Encoding; + + // Handle u32 with compress attribute + if type_name == "u32" { + if let Some(false) = meta.compress { + return "read_u32"; // Fixed 4-byte encoding + } + return "read_varuint32"; // Variable-length (default) + } + + // Handle u64 with encoding attribute + if type_name == "u64" { + if let Some(encoding) = meta.encoding { + return match encoding { + U64Encoding::Varint => "read_varuint64", + U64Encoding::Fixed => "read_u64", + U64Encoding::Tagged => "read_tagged_u64", + }; + } + return "read_varuint64"; // Variable-length (default) + } + + // For other types, use the default method from PRIMITIVE_IO_METHODS + get_primitive_reader_method(type_name) +} + +/// Check if a type is Option or Option that needs encoding-aware handling +/// based on the field metadata (compress or encoding attributes). +pub(super) fn is_option_encoding_primitive( + ty: &Type, + meta: &super::field_meta::ForyFieldMeta, +) -> bool { + if let Some(inner_name) = get_option_inner_primitive_name(ty) { + // For u32, check compress attribute + if inner_name == "u32" && meta.compress.is_some() { + return true; + } + // For u64, check encoding attribute + if inner_name == "u64" && meta.encoding.is_some() { + return true; + } + } + false +} + +/// Get the inner primitive name if the type is Option +/// Returns Some("u32"), Some("u64"), etc. for Option, Option, etc. +pub(super) fn get_option_inner_primitive_name(ty: &Type) -> Option<&'static str> { + use syn::PathArguments; + if let Type::Path(type_path) = ty { + if let Some(seg) = type_path.path.segments.last() { + if seg.ident == "Option" { + if let PathArguments::AngleBracketed(args) = &seg.arguments { + if let Some(syn::GenericArgument::Type(Type::Path(inner_path))) = + args.args.first() + { + if let Some(inner_seg) = inner_path.path.segments.last() { + let inner_name = inner_seg.ident.to_string(); + // Return static string for known primitives + return PRIMITIVE_IO_METHODS + .iter() + .find(|(name, _, _)| *name == inner_name.as_str()) + .map(|(name, _, _)| *name); + } + } + } + } + } + } + None +} + pub(crate) fn get_type_id_by_type_ast(ty: &Type) -> u32 { let ty_str: String = ty .to_token_stream() @@ -904,14 +1030,17 @@ fn get_primitive_type_size(type_id_num: u32) -> i32 { } fn is_compress(type_id: u32) -> bool { - // Only signed integer types are marked as compressible - // to maintain backward compatibility with field ordering + // Variable-length and tagged types are marked as compressible + // This must match Java's Types.isCompressedType() for xlang compatibility [ - TypeId::INT32 as u32, - TypeId::INT64 as u32, + // Signed compressed types TypeId::VARINT32 as u32, TypeId::VARINT64 as u32, TypeId::TAGGED_INT64 as u32, + // Unsigned compressed types + TypeId::VAR_UINT32 as u32, + TypeId::VAR_UINT64 as u32, + TypeId::TAGGED_UINT64 as u32, ] .contains(&type_id) } @@ -944,6 +1073,8 @@ fn is_internal_type_id(type_id: u32) -> bool { /// Group fields into serialization categories while normalizing field names to snake_case. /// The returned groups preserve the ordering rules required by the serialization layout. fn group_fields_by_type(fields: &[&Field]) -> FieldGroups { + use super::field_meta::parse_field_meta; + let mut primitive_fields = Vec::new(); let mut nullable_primitive_fields = Vec::new(); let mut internal_type_fields = Vec::new(); @@ -961,25 +1092,6 @@ fn group_fields_by_type(fields: &[&Field]) -> FieldGroups { } } - let mut group_field = |ident: String, ty: &str| { - let type_id = get_type_id_by_name(ty); - // Categorize based on type_id - if PRIMITIVE_TYPE_NAMES.contains(&ty) { - primitive_fields.push((ident, ty.to_string(), type_id)); - } else if is_internal_type_id(type_id) { - internal_type_fields.push((ident, ty.to_string(), type_id)); - } else if type_id == TypeId::LIST as u32 { - list_fields.push((ident, ty.to_string(), type_id)); - } else if type_id == TypeId::SET as u32 { - set_fields.push((ident, ty.to_string(), type_id)); - } else if type_id == TypeId::MAP as u32 { - map_fields.push((ident, ty.to_string(), type_id)); - } else { - // User-defined type - other_fields.push((ident, ty.to_string(), type_id)); - } - }; - for (idx, field) in fields.iter().enumerate() { let raw_ident = get_field_name(field, idx); let ident = to_snake_case(&raw_ident); @@ -989,6 +1101,9 @@ fn group_fields_by_type(fields: &[&Field]) -> FieldGroups { continue; } + // Parse field metadata to get encoding attributes + let meta = parse_field_meta(field).unwrap_or_default(); + let ty: String = field .ty .to_token_stream() @@ -996,16 +1111,44 @@ fn group_fields_by_type(fields: &[&Field]) -> FieldGroups { .chars() .filter(|c| !c.is_whitespace()) .collect::(); + + // Closure to group non-option fields, considering encoding attributes + let mut group_field = |ident: String, ty_str: &str, is_primitive: bool| { + let base_type_id = get_type_id_by_name(ty_str); + // Adjust type ID based on encoding attributes for u32/u64 fields + let type_id = adjust_type_id_for_encoding(base_type_id, &meta); + + // Categorize based on type_id + if is_primitive { + primitive_fields.push((ident, ty_str.to_string(), type_id)); + } else if is_internal_type_id(type_id) { + internal_type_fields.push((ident, ty_str.to_string(), type_id)); + } else if type_id == TypeId::LIST as u32 { + list_fields.push((ident, ty_str.to_string(), type_id)); + } else if type_id == TypeId::SET as u32 { + set_fields.push((ident, ty_str.to_string(), type_id)); + } else if type_id == TypeId::MAP as u32 { + map_fields.push((ident, ty_str.to_string(), type_id)); + } else { + // User-defined type + other_fields.push((ident, ty_str.to_string(), type_id)); + } + }; + // handle Option specially if let Some(inner) = extract_option_inner(&ty) { if PRIMITIVE_TYPE_NAMES.contains(&inner) { - let type_id = get_primitive_type_id(inner); + // Get base type ID and adjust for encoding attributes + let base_type_id = get_primitive_type_id(inner); + let type_id = adjust_type_id_for_encoding(base_type_id, &meta); nullable_primitive_fields.push((ident, ty.to_string(), type_id)); } else { - group_field(ident, inner); + group_field(ident, inner, false); } + } else if PRIMITIVE_TYPE_NAMES.contains(&ty.as_str()) { + group_field(ident, &ty, true); } else { - group_field(ident, &ty); + group_field(ident, &ty, false); } } @@ -1124,6 +1267,47 @@ struct FieldFingerprintInfo { is_option_type: bool, } +/// Adjusts type ID based on encoding attributes for u32/u64 fields. +/// +/// For u32 fields: +/// - compress=true (default): VAR_UINT32 (12) +/// - compress=false: UINT32 (11, fixed) +/// +/// For u64 fields: +/// - encoding="varint" (default): VAR_UINT64 (14) +/// - encoding="fixed": UINT64 (13, fixed 8-byte) +/// - encoding="tagged": TAGGED_UINT64 (15) +fn adjust_type_id_for_encoding( + base_type_id: u32, + meta: &super::field_meta::ForyFieldMeta, +) -> u32 { + use super::field_meta::U64Encoding; + + // Handle u32 fields with compress attribute + if base_type_id == TypeId::VAR_UINT32 as u32 { + if let Some(compress) = meta.compress { + if !compress { + return TypeId::UINT32 as u32; // Fixed 4-byte encoding + } + } + return base_type_id; // VAR_UINT32 (default) + } + + // Handle u64 fields with encoding attribute + if base_type_id == TypeId::VAR_UINT64 as u32 { + if let Some(encoding) = meta.encoding { + return match encoding { + U64Encoding::Varint => TypeId::VAR_UINT64 as u32, + U64Encoding::Fixed => TypeId::UINT64 as u32, + U64Encoding::Tagged => TypeId::TAGGED_UINT64 as u32, + }; + } + return base_type_id; // VAR_UINT64 (default) + } + + base_type_id +} + /// Computes struct fingerprint string at compile time (during proc-macro execution). /// /// **Fingerprint Format:** `,,,;` @@ -1151,8 +1335,9 @@ fn compute_struct_fingerprint(fields: &[&Field]) -> String { let ref_tracking = meta.effective_ref_tracking(type_class); let explicit_nullable = meta.nullable; - // Get compile-time TypeId (UNKNOWN for user-defined types including enums/unions) - let type_id = get_type_id_by_type_ast(&field.ty); + // Get compile-time TypeId, considering encoding attributes for u32/u64 fields + let base_type_id = get_type_id_by_type_ast(&field.ty); + let type_id = adjust_type_id_for_encoding(base_type_id, &meta); // Check if field type is Option let ty_str: String = field diff --git a/rust/fory-derive/src/object/write.rs b/rust/fory-derive/src/object/write.rs index 3960860103..f6f47224f1 100644 --- a/rust/fory-derive/src/object/write.rs +++ b/rust/fory-derive/src/object/write.rs @@ -15,11 +15,14 @@ // specific language governing permissions and limitations // under the License. +use super::field_meta::parse_field_meta; use super::util::{ classify_trait_object_field, create_wrapper_types_arc, create_wrapper_types_rc, determine_field_ref_mode, extract_type_name, gen_struct_version_hash_ts, get_field_accessor, - get_field_name, get_filtered_source_fields_iter, get_primitive_writer_method, get_struct_name, - get_type_id_by_type_ast, is_debug_enabled, is_direct_primitive_type, FieldRefMode, StructField, + get_field_name, get_filtered_source_fields_iter, get_option_inner_primitive_name, + get_primitive_writer_method_with_encoding, get_struct_name, get_type_id_by_type_ast, + is_debug_enabled, is_direct_primitive_type, is_option_encoding_primitive, FieldRefMode, + StructField, }; use crate::util::SourceField; use fory_core::types::TypeId; @@ -248,10 +251,28 @@ fn gen_write_field_impl( } _ => { let type_id = get_type_id_by_type_ast(ty); + let meta = parse_field_meta(field).unwrap_or_default(); + // Check if this is Option or Option with encoding attributes + // These need special inline handling because the generic Option serializer + // doesn't know about field-level encoding attributes. + if is_option_encoding_primitive(ty, &meta) { + let inner_name = get_option_inner_primitive_name(ty).unwrap(); + let writer_method = get_primitive_writer_method_with_encoding(inner_name, &meta); + let writer_ident = syn::Ident::new(writer_method, proc_macro2::Span::call_site()); + // For Option, write null flag first, then value if Some + quote! { + if let Some(v) = &#value_ts { + context.writer.write_i8(fory_core::RefFlag::NotNullValue as i8); + context.writer.#writer_ident(*v); + } else { + context.writer.write_i8(fory_core::RefFlag::Null as i8); + } + } + } // Check if this is a direct primitive type that can use direct writer calls // Only apply when ref_mode is None (no ref tracking needed) - if ref_mode == FieldRefMode::None && is_direct_primitive_type(ty) { + else if ref_mode == FieldRefMode::None && is_direct_primitive_type(ty) { let type_name = extract_type_name(ty); if type_name == "String" { // String: call fory_write_data directly @@ -260,7 +281,8 @@ fn gen_write_field_impl( } } else { // Numeric primitives: use direct buffer methods - let writer_method = get_primitive_writer_method(&type_name); + // For u32/u64, consider encoding attributes + let writer_method = get_primitive_writer_method_with_encoding(&type_name, &meta); let writer_ident = syn::Ident::new(writer_method, proc_macro2::Span::call_site()); // For primitives: diff --git a/rust/tests/tests/test_cross_language.rs b/rust/tests/tests/test_cross_language.rs index 4f0c3608f0..c988508a8b 100644 --- a/rust/tests/tests/test_cross_language.rs +++ b/rust/tests/tests/test_cross_language.rs @@ -1896,30 +1896,33 @@ fn test_circular_ref_compatible() { #[derive(ForyObject, Debug, PartialEq)] #[fory(debug)] struct UnsignedSchemaConsistent { - // Primitive unsigned fields (non-nullable) - u8_field: u8, // UINT8 - fixed 8-bit - u16_field: u16, // UINT16 - fixed 16-bit - u32_var: u32, // VAR_UINT32 - variable-length - u32_fixed: u32, // UINT32 - fixed 4-byte (TODO: add encoding tag) - u64_var: u64, // VAR_UINT64 - variable-length - u64_fixed: u64, // UINT64 - fixed 8-byte (TODO: add encoding tag) - u64_tagged: u64, // TAGGED_UINT64 (TODO: add encoding tag) + // Primitive unsigned fields (non-nullable, use Field suffix to avoid reserved keywords) + u8_field: u8, // UINT8 - fixed 8-bit + u16_field: u16, // UINT16 - fixed 16-bit + u32_var_field: u32, // VAR_UINT32 - variable-length (default) + #[fory(compress = false)] + u32_fixed_field: u32, // UINT32 - fixed 4-byte + u64_var_field: u64, // VAR_UINT64 - variable-length (default) + #[fory(encoding = "fixed")] + u64_fixed_field: u64, // UINT64 - fixed 8-byte + #[fory(encoding = "tagged")] + u64_tagged_field: u64, // TAGGED_UINT64 // Nullable unsigned fields (using Option) #[fory(nullable = true)] - u8_nullable: Option, + u8_nullable_field: Option, #[fory(nullable = true)] - u16_nullable: Option, + u16_nullable_field: Option, #[fory(nullable = true)] - u32_var_nullable: Option, + u32_var_nullable_field: Option, + #[fory(nullable = true, compress = false)] + u32_fixed_nullable_field: Option, #[fory(nullable = true)] - u32_fixed_nullable: Option, - #[fory(nullable = true)] - u64_var_nullable: Option, - #[fory(nullable = true)] - u64_fixed_nullable: Option, - #[fory(nullable = true)] - u64_tagged_nullable: Option, + u64_var_nullable_field: Option, + #[fory(nullable = true, encoding = "fixed")] + u64_fixed_nullable_field: Option, + #[fory(nullable = true, encoding = "tagged")] + u64_tagged_nullable_field: Option, } /// Test struct for unsigned numbers in COMPATIBLE mode. @@ -1931,27 +1934,30 @@ struct UnsignedSchemaConsistent { struct UnsignedSchemaCompatible { // Group 1: Nullable in Rust (Option), non-nullable in Java #[fory(nullable = true)] - u8_field: Option, - #[fory(nullable = true)] - u16_field: Option, - #[fory(nullable = true)] - u32_var: Option, - #[fory(nullable = true)] - u32_fixed: Option, + u8_field1: Option, #[fory(nullable = true)] - u64_var: Option, + u16_field1: Option, #[fory(nullable = true)] - u64_fixed: Option, + u32_var_field1: Option, + #[fory(nullable = true, compress = false)] + u32_fixed_field1: Option, #[fory(nullable = true)] - u64_tagged: Option, + u64_var_field1: Option, + #[fory(nullable = true, encoding = "fixed")] + u64_fixed_field1: Option, + #[fory(nullable = true, encoding = "tagged")] + u64_tagged_field1: Option, // Group 2: Non-nullable in Rust, nullable in Java u8_field2: u8, u16_field2: u16, u32_var_field2: u32, + #[fory(compress = false)] u32_fixed_field2: u32, u64_var_field2: u64, + #[fory(encoding = "fixed")] u64_fixed_field2: u64, + #[fory(encoding = "tagged")] u64_tagged_field2: u64, } @@ -1969,20 +1975,20 @@ fn test_unsigned_schema_consistent() { // Primitive unsigned fields u8_field: 200, u16_field: 60000, - u32_var: 3000000000, - u32_fixed: 4000000000, - u64_var: 10000000000, - u64_fixed: 15000000000, - u64_tagged: 1000000000, + u32_var_field: 3000000000, + u32_fixed_field: 4000000000, + u64_var_field: 10000000000, + u64_fixed_field: 15000000000, + u64_tagged_field: 1000000000, // Nullable unsigned fields with values - u8_nullable: Some(128), - u16_nullable: Some(40000), - u32_var_nullable: Some(2500000000), - u32_fixed_nullable: Some(3500000000), - u64_var_nullable: Some(8000000000), - u64_fixed_nullable: Some(12000000000), - u64_tagged_nullable: Some(500000000), + u8_nullable_field: Some(128), + u16_nullable_field: Some(40000), + u32_var_nullable_field: Some(2500000000), + u32_fixed_nullable_field: Some(3500000000), + u64_var_nullable_field: Some(8000000000), + u64_fixed_nullable_field: Some(12000000000), + u64_tagged_nullable_field: Some(500000000), }; let remote_obj: UnsignedSchemaConsistent = fory.deserialize(&bytes).unwrap(); @@ -2004,13 +2010,13 @@ fn test_unsigned_schema_compatible() { let local_obj = UnsignedSchemaCompatible { // Group 1: Option fields (values from Java's non-nullable fields) - u8_field: Some(200), - u16_field: Some(60000), - u32_var: Some(3000000000), - u32_fixed: Some(4000000000), - u64_var: Some(10000000000), - u64_fixed: Some(15000000000), - u64_tagged: Some(1000000000), + u8_field1: Some(200), + u16_field1: Some(60000), + u32_var_field1: Some(3000000000), + u32_fixed_field1: Some(4000000000), + u64_var_field1: Some(10000000000), + u64_fixed_field1: Some(15000000000), + u64_tagged_field1: Some(1000000000), // Group 2: Non-nullable fields (values from Java's nullable fields) u8_field2: 128, From 098117b07b7f1c204517cd1d095390cdf9da28ea Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 10 Jan 2026 14:23:35 +0800 Subject: [PATCH 22/44] update xlang tests in java side --- .../org/apache/fory/xlang/CPPXlangTest.java | 15 +++ .../org/apache/fory/xlang/GoXlangTest.java | 2 +- .../apache/fory/xlang/PythonXlangTest.java | 15 +++ .../org/apache/fory/xlang/RustXlangTest.java | 15 +++ .../org/apache/fory/xlang/XlangTestBase.java | 92 +++++++++---------- 5 files changed, 92 insertions(+), 47 deletions(-) diff --git a/java/fory-core/src/test/java/org/apache/fory/xlang/CPPXlangTest.java b/java/fory-core/src/test/java/org/apache/fory/xlang/CPPXlangTest.java index 7cc0e9c2ea..d596601e5e 100644 --- a/java/fory-core/src/test/java/org/apache/fory/xlang/CPPXlangTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/xlang/CPPXlangTest.java @@ -393,4 +393,19 @@ public void testCircularRefSchemaConsistent(boolean enableCodegen) throws java.i public void testCircularRefCompatible(boolean enableCodegen) throws java.io.IOException { super.testCircularRefCompatible(enableCodegen); } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaConsistent(boolean enableCodegen) throws java.io.IOException { + super.testUnsignedSchemaConsistent(enableCodegen); + } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaConsistentSimple(boolean enableCodegen) throws java.io.IOException { + super.testUnsignedSchemaConsistentSimple(enableCodegen); + } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaCompatible(boolean enableCodegen) throws java.io.IOException { + super.testUnsignedSchemaCompatible(enableCodegen); + } } diff --git a/java/fory-core/src/test/java/org/apache/fory/xlang/GoXlangTest.java b/java/fory-core/src/test/java/org/apache/fory/xlang/GoXlangTest.java index 5a4a51e780..0b463c60d4 100644 --- a/java/fory-core/src/test/java/org/apache/fory/xlang/GoXlangTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/xlang/GoXlangTest.java @@ -49,7 +49,7 @@ public class GoXlangTest extends XlangTestBase { protected void ensurePeerReady() { String enabled = System.getenv("FORY_GO_JAVA_CI"); if (!"1".equals(enabled)) { -// throw new SkipException("Skipping GoXlangTest: FORY_GO_JAVA_CI not set to 1"); + throw new SkipException("Skipping GoXlangTest: FORY_GO_JAVA_CI not set to 1"); } boolean goInstalled = true; try { diff --git a/java/fory-core/src/test/java/org/apache/fory/xlang/PythonXlangTest.java b/java/fory-core/src/test/java/org/apache/fory/xlang/PythonXlangTest.java index 3a03229a0a..c7433f240e 100644 --- a/java/fory-core/src/test/java/org/apache/fory/xlang/PythonXlangTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/xlang/PythonXlangTest.java @@ -310,4 +310,19 @@ public void testCircularRefSchemaConsistent(boolean enableCodegen) throws IOExce public void testCircularRefCompatible(boolean enableCodegen) throws IOException { super.testCircularRefCompatible(enableCodegen); } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaConsistent(boolean enableCodegen) throws java.io.IOException { + super.testUnsignedSchemaConsistent(enableCodegen); + } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaConsistentSimple(boolean enableCodegen) throws java.io.IOException { + super.testUnsignedSchemaConsistentSimple(enableCodegen); + } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaCompatible(boolean enableCodegen) throws java.io.IOException { + super.testUnsignedSchemaCompatible(enableCodegen); + } } diff --git a/java/fory-core/src/test/java/org/apache/fory/xlang/RustXlangTest.java b/java/fory-core/src/test/java/org/apache/fory/xlang/RustXlangTest.java index 2bf9490480..56f1061257 100644 --- a/java/fory-core/src/test/java/org/apache/fory/xlang/RustXlangTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/xlang/RustXlangTest.java @@ -279,4 +279,19 @@ public void testCircularRefSchemaConsistent(boolean enableCodegen) throws java.i public void testCircularRefCompatible(boolean enableCodegen) throws java.io.IOException { super.testCircularRefCompatible(enableCodegen); } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaConsistent(boolean enableCodegen) throws java.io.IOException { + super.testUnsignedSchemaConsistent(enableCodegen); + } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaConsistentSimple(boolean enableCodegen) throws java.io.IOException { + super.testUnsignedSchemaConsistentSimple(enableCodegen); + } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaCompatible(boolean enableCodegen) throws java.io.IOException { + super.testUnsignedSchemaCompatible(enableCodegen); + } } diff --git a/java/fory-core/src/test/java/org/apache/fory/xlang/XlangTestBase.java b/java/fory-core/src/test/java/org/apache/fory/xlang/XlangTestBase.java index 52b928cd39..e9e40c987e 100644 --- a/java/fory-core/src/test/java/org/apache/fory/xlang/XlangTestBase.java +++ b/java/fory-core/src/test/java/org/apache/fory/xlang/XlangTestBase.java @@ -2479,54 +2479,54 @@ private Object normalizeNulls(Object obj) { */ @Data static class UnsignedSchemaConsistent { - // Primitive unsigned fields - @Uint8Type byte u8; + // Primitive unsigned fields (use Field suffix to avoid reserved keywords in Rust/Go) + @Uint8Type byte u8Field; - @Uint16Type short u16; + @Uint16Type short u16Field; @Uint32Type(compress = true) - int u32Var; + int u32VarField; @Uint32Type(compress = false) - int u32Fixed; + int u32FixedField; @Uint64Type(encoding = LongEncoding.VARINT) - long u64Var; + long u64VarField; @Uint64Type(encoding = LongEncoding.FIXED) - long u64Fixed; + long u64FixedField; @Uint64Type(encoding = LongEncoding.TAGGED) - long u64Tagged; + long u64TaggedField; // Boxed nullable unsigned fields @ForyField(nullable = true) @Uint8Type - Byte u8Nullable; + Byte u8NullableField; @ForyField(nullable = true) @Uint16Type - Short u16Nullable; + Short u16NullableField; @ForyField(nullable = true) @Uint32Type(compress = true) - Integer u32VarNullable; + Integer u32VarNullableField; @ForyField(nullable = true) @Uint32Type(compress = false) - Integer u32FixedNullable; + Integer u32FixedNullableField; @ForyField(nullable = true) @Uint64Type(encoding = LongEncoding.VARINT) - Long u64VarNullable; + Long u64VarNullableField; @ForyField(nullable = true) @Uint64Type(encoding = LongEncoding.FIXED) - Long u64FixedNullable; + Long u64FixedNullableField; @ForyField(nullable = true) @Uint64Type(encoding = LongEncoding.TAGGED) - Long u64TaggedNullable; + Long u64TaggedNullableField; } @Data @@ -2576,22 +2576,22 @@ public void testUnsignedSchemaConsistent(boolean enableCodegen) throws java.io.I UnsignedSchemaConsistent obj = new UnsignedSchemaConsistent(); // Primitive fields - obj.u8 = (byte) 200; // Max uint8 range testing - obj.u16 = (short) 60000; // Max uint16 range testing - obj.u32Var = (int) 3000000000L; // > INT_MAX to test unsigned - obj.u32Fixed = (int) 4000000000L; - obj.u64Var = 10000000000L; - obj.u64Fixed = 15000000000L; - obj.u64Tagged = 1000000000L; // Within tagged range + obj.u8Field = (byte) 200; // Max uint8 range testing + obj.u16Field = (short) 60000; // Max uint16 range testing + obj.u32VarField = (int) 3000000000L; // > INT_MAX to test unsigned + obj.u32FixedField = (int) 4000000000L; + obj.u64VarField = 10000000000L; + obj.u64FixedField = 15000000000L; + obj.u64TaggedField = 1000000000L; // Within tagged range // Nullable boxed fields with values - obj.u8Nullable = (byte) 128; - obj.u16Nullable = (short) 40000; - obj.u32VarNullable = (int) 2500000000L; - obj.u32FixedNullable = (int) 3500000000L; - obj.u64VarNullable = 8000000000L; - obj.u64FixedNullable = 12000000000L; - obj.u64TaggedNullable = 500000000L; + obj.u8NullableField = (byte) 128; + obj.u16NullableField = (short) 40000; + obj.u32VarNullableField = (int) 2500000000L; + obj.u32FixedNullableField = (int) 3500000000L; + obj.u64VarNullableField = 8000000000L; + obj.u64FixedNullableField = 12000000000L; + obj.u64TaggedNullableField = 500000000L; // First verify Java serialization works Assert.assertEquals(xserDe(fory, obj), obj); @@ -2631,24 +2631,24 @@ private static String bytesToHex(byte[] bytes) { @Data static class UnsignedSchemaCompatible { // Group 1: Primitive unsigned fields (non-nullable in Java, Optional in other languages) - @Uint8Type byte u8; + @Uint8Type byte u8Field1; - @Uint16Type short u16; + @Uint16Type short u16Field1; @Uint32Type(compress = true) - int u32Var; + int u32VarField1; @Uint32Type(compress = false) - int u32Fixed; + int u32FixedField1; @Uint64Type(encoding = LongEncoding.VARINT) - long u64Var; + long u64VarField1; @Uint64Type(encoding = LongEncoding.FIXED) - long u64Fixed; + long u64FixedField1; @Uint64Type(encoding = LongEncoding.TAGGED) - long u64Tagged; + long u64TaggedField1; // Group 2: Nullable boxed fields (nullable in Java, non-Optional in other languages) @ForyField(nullable = true) @@ -2693,16 +2693,16 @@ public void testUnsignedSchemaCompatible(boolean enableCodegen) throws java.io.I fory.register(UnsignedSchemaCompatible.class, 502); UnsignedSchemaCompatible obj = new UnsignedSchemaCompatible(); - // Primitive fields - obj.u8 = (byte) 200; - obj.u16 = (short) 60000; - obj.u32Var = (int) 3000000000L; - obj.u32Fixed = (int) 4000000000L; - obj.u64Var = 10000000000L; - obj.u64Fixed = 15000000000L; - obj.u64Tagged = 1000000000L; - - // Group 2 fields with values + // Group 1: Primitive fields + obj.u8Field1 = (byte) 200; + obj.u16Field1 = (short) 60000; + obj.u32VarField1 = (int) 3000000000L; + obj.u32FixedField1 = (int) 4000000000L; + obj.u64VarField1 = 10000000000L; + obj.u64FixedField1 = 15000000000L; + obj.u64TaggedField1 = 1000000000L; + + // Group 2: Nullable boxed fields with values obj.u8Field2 = (byte) 128; obj.u16Field2 = (short) 40000; obj.u32VarField2 = (int) 2500000000L; From 1c5670b76ade6eff5e60f9b8ae4e66873368e12f Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 10 Jan 2026 17:11:18 +0800 Subject: [PATCH 23/44] make cpp support configure number encoding and sort fields for all numerics --- cpp/fory/meta/field.h | 274 +++++++++ cpp/fory/serialization/context.h | 22 + .../serialization/smart_ptr_serializers.h | 4 + cpp/fory/serialization/struct_serializer.h | 580 +++++++++++++++++- cpp/fory/serialization/type_resolver.cc | 20 +- cpp/fory/serialization/type_resolver.h | 81 ++- cpp/fory/serialization/xlang_test_main.cc | 362 +++++++---- cpp/fory/util/buffer.h | 64 ++ 8 files changed, 1232 insertions(+), 175 deletions(-) diff --git a/cpp/fory/meta/field.h b/cpp/fory/meta/field.h index 7ec8835208..d9ae03b537 100644 --- a/cpp/fory/meta/field.h +++ b/cpp/fory/meta/field.h @@ -120,6 +120,175 @@ inline constexpr bool has_field_tags_v = ForyFieldTagsImpl::has_tags; } // namespace detail +// ============================================================================ +// Field Encoding Types for Unsigned Integers +// ============================================================================ + +/// Encoding strategies for integer fields +enum class Encoding { + Default = 0, // Use type's default encoding + Varint = 1, // Variable-length encoding (smaller values use fewer bytes) + Fixed = 2, // Fixed-size encoding (always uses full type width) + Tagged = 3 // Tagged encoding (uses tag byte + value) +}; + +// ============================================================================ +// FieldMeta - Compile-time Field Configuration with Builder Pattern +// ============================================================================ + +/// Compile-time field metadata with fluent builder API. +/// Supports both: +/// - Simple: F(0) - just field ID +/// - Full: F(0).nullable().varint().compress(false) +struct FieldMeta { + int16_t id_ = -1; + bool nullable_ = false; + bool ref_ = false; + bool monomorphic_ = false; + Encoding encoding_ = Encoding::Default; + bool compress_ = true; + + // Builder methods - each returns a modified copy + constexpr FieldMeta id(int16_t v) const { + auto c = *this; + c.id_ = v; + return c; + } + constexpr FieldMeta nullable(bool v = true) const { + auto c = *this; + c.nullable_ = v; + return c; + } + constexpr FieldMeta ref(bool v = true) const { + auto c = *this; + c.ref_ = v; + return c; + } + constexpr FieldMeta monomorphic(bool v = true) const { + auto c = *this; + c.monomorphic_ = v; + return c; + } + constexpr FieldMeta encoding(Encoding v) const { + auto c = *this; + c.encoding_ = v; + return c; + } + constexpr FieldMeta compress(bool v) const { + auto c = *this; + c.compress_ = v; + return c; + } + + // Convenience shortcuts for common encodings + constexpr FieldMeta varint() const { return encoding(Encoding::Varint); } + constexpr FieldMeta fixed() const { return encoding(Encoding::Fixed); } + constexpr FieldMeta tagged() const { return encoding(Encoding::Tagged); } +}; + +/// Short factory function for FieldMeta - use F(id) in macros for brevity +constexpr FieldMeta F(int16_t id) { return FieldMeta{}.id(id); } + +namespace detail { + +// ============================================================================ +// Config Normalization - Handle both integer IDs and FieldMeta +// ============================================================================ + +/// Normalize configuration: convert integer to FieldMeta, pass FieldMeta +/// through +template constexpr auto normalize_config(T &&v) { + if constexpr (std::is_integral_v>) { + // Old syntax: just an integer ID + return FieldMeta{}.id(static_cast(v)); + } else if constexpr (std::is_same_v, FieldMeta>) { + // New syntax: already a FieldMeta + return v; + } else { + static_assert( + std::is_integral_v> || + std::is_same_v, FieldMeta>, + "Field config must be an integer ID or FieldMeta (use F(id)...)"); + return FieldMeta{}; + } +} + +/// Apply old-style tag to FieldMeta (for backward compatibility) +constexpr FieldMeta apply_tag(FieldMeta m, nullable) { return m.nullable(); } +constexpr FieldMeta apply_tag(FieldMeta m, not_null) { + return m.nullable(false); +} +constexpr FieldMeta apply_tag(FieldMeta m, ref) { return m.ref(); } +constexpr FieldMeta apply_tag(FieldMeta m, monomorphic) { + return m.monomorphic(); +} + +/// Fold multiple tags onto a base config +template +constexpr FieldMeta apply_tags(FieldMeta base, Tags... tags) { + ((base = apply_tag(base, tags)), ...); + return base; +} + +// ============================================================================ +// FieldEntry - Binds Member Pointer to Config for Compile-Time Verification +// ============================================================================ + +/// Field entry that stores member pointer (for verification) + configuration +template struct FieldEntry { + M T::*ptr; // Member pointer - compile-time field verification + const char *name; // Field name for debugging + FieldMeta meta; // Field configuration + + constexpr FieldEntry(M T::*p, const char *n, FieldMeta m) + : ptr(p), name(n), meta(m) {} +}; + +/// Create a FieldEntry with automatic type deduction +template +constexpr auto make_field_entry(M T::*ptr, const char *name, FieldMeta meta) { + return FieldEntry{ptr, name, meta}; +} + +/// Default: no field config defined for type T +template struct ForyFieldConfigImpl { + static constexpr bool has_config = false; +}; + +template +inline constexpr bool has_field_config_v = ForyFieldConfigImpl::has_config; + +/// Helper to get field encoding from ForyFieldConfigImpl +template struct GetFieldConfigEntry { + static constexpr Encoding encoding = Encoding::Default; + static constexpr int16_t id = -1; + static constexpr bool nullable = false; + static constexpr bool ref = false; + static constexpr bool monomorphic = false; + static constexpr bool compress = true; +}; + +template +struct GetFieldConfigEntry< + T, Index, + std::enable_if_t::has_config && + (Index < ForyFieldConfigImpl::field_count)>> { +private: + static constexpr auto get_entry() { + return std::get(ForyFieldConfigImpl::entries); + } + +public: + static constexpr Encoding encoding = get_entry().meta.encoding_; + static constexpr int16_t id = get_entry().meta.id_; + static constexpr bool nullable = get_entry().meta.nullable_; + static constexpr bool ref = get_entry().meta.ref_; + static constexpr bool monomorphic = get_entry().meta.monomorphic_; + static constexpr bool compress = get_entry().meta.compress_; +}; + +} // namespace detail + // ============================================================================ // fory::field Template // ============================================================================ @@ -522,3 +691,108 @@ struct GetFieldTagEntry< FORY_FT_ENTRIES_15(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, \ _13, _14, _15), \ FORY_FT_MAKE_ENTRY(T, _16) + +// ============================================================================ +// FORY_FIELD_CONFIG Macro - New Syntax with Member Pointer Verification +// ============================================================================ +// +// Usage: +// FORY_FIELD_CONFIG(MyStruct, +// (field1, F(0)), // Simple: just ID +// (field2, F(1).nullable()), // With nullable +// (field3, F(2).varint()), // With encoding +// (field4, F(3).nullable().ref()), // Multiple options +// (field5, 4) // Backward compatible: integer +// ID +// ); +// +// This macro: +// 1. Verifies field names exist at compile time via member pointers +// 2. Supports both integer IDs (old) and F(id).xxx() builder (new) +// 3. Stores configuration in a constexpr tuple for efficient access + +// Helper to stringify field name +#define FORY_FC_STRINGIFY(x) FORY_FC_STRINGIFY_I(x) +#define FORY_FC_STRINGIFY_I(x) #x + +// Extract field name (first element of tuple) +#define FORY_FC_NAME(tuple) FORY_FC_NAME_IMPL tuple +#define FORY_FC_NAME_IMPL(name, ...) name + +// Extract config (second element of tuple) +#define FORY_FC_CONFIG(tuple) FORY_FC_CONFIG_IMPL tuple +#define FORY_FC_CONFIG_IMPL(name, config, ...) config + +// Create a FieldEntry with member pointer verification +#define FORY_FC_MAKE_ENTRY(Type, tuple) \ + ::fory::detail::make_field_entry( \ + &Type::FORY_FC_NAME(tuple), FORY_FC_STRINGIFY(FORY_FC_NAME(tuple)), \ + ::fory::detail::normalize_config(FORY_FC_CONFIG(tuple))) + +// Generate entries using indirect expansion +#define FORY_FC_ENTRIES(Type, ...) \ + FORY_FC_ENTRIES_I(Type, FORY_PP_NARG(__VA_ARGS__), __VA_ARGS__) +#define FORY_FC_ENTRIES_I(Type, N, ...) FORY_FC_ENTRIES_II(Type, N, __VA_ARGS__) +#define FORY_FC_ENTRIES_II(Type, N, ...) FORY_FC_ENTRIES_##N(Type, __VA_ARGS__) + +// Generate entries for 1-32 fields +#define FORY_FC_ENTRIES_1(T, _1) FORY_FC_MAKE_ENTRY(T, _1) +#define FORY_FC_ENTRIES_2(T, _1, _2) \ + FORY_FC_MAKE_ENTRY(T, _1), FORY_FC_MAKE_ENTRY(T, _2) +#define FORY_FC_ENTRIES_3(T, _1, _2, _3) \ + FORY_FC_ENTRIES_2(T, _1, _2), FORY_FC_MAKE_ENTRY(T, _3) +#define FORY_FC_ENTRIES_4(T, _1, _2, _3, _4) \ + FORY_FC_ENTRIES_3(T, _1, _2, _3), FORY_FC_MAKE_ENTRY(T, _4) +#define FORY_FC_ENTRIES_5(T, _1, _2, _3, _4, _5) \ + FORY_FC_ENTRIES_4(T, _1, _2, _3, _4), FORY_FC_MAKE_ENTRY(T, _5) +#define FORY_FC_ENTRIES_6(T, _1, _2, _3, _4, _5, _6) \ + FORY_FC_ENTRIES_5(T, _1, _2, _3, _4, _5), FORY_FC_MAKE_ENTRY(T, _6) +#define FORY_FC_ENTRIES_7(T, _1, _2, _3, _4, _5, _6, _7) \ + FORY_FC_ENTRIES_6(T, _1, _2, _3, _4, _5, _6), FORY_FC_MAKE_ENTRY(T, _7) +#define FORY_FC_ENTRIES_8(T, _1, _2, _3, _4, _5, _6, _7, _8) \ + FORY_FC_ENTRIES_7(T, _1, _2, _3, _4, _5, _6, _7), FORY_FC_MAKE_ENTRY(T, _8) +#define FORY_FC_ENTRIES_9(T, _1, _2, _3, _4, _5, _6, _7, _8, _9) \ + FORY_FC_ENTRIES_8(T, _1, _2, _3, _4, _5, _6, _7, _8), \ + FORY_FC_MAKE_ENTRY(T, _9) +#define FORY_FC_ENTRIES_10(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10) \ + FORY_FC_ENTRIES_9(T, _1, _2, _3, _4, _5, _6, _7, _8, _9), \ + FORY_FC_MAKE_ENTRY(T, _10) +#define FORY_FC_ENTRIES_11(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11) \ + FORY_FC_ENTRIES_10(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10), \ + FORY_FC_MAKE_ENTRY(T, _11) +#define FORY_FC_ENTRIES_12(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, \ + _12) \ + FORY_FC_ENTRIES_11(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11), \ + FORY_FC_MAKE_ENTRY(T, _12) +#define FORY_FC_ENTRIES_13(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, \ + _12, _13) \ + FORY_FC_ENTRIES_12(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12), \ + FORY_FC_MAKE_ENTRY(T, _13) +#define FORY_FC_ENTRIES_14(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, \ + _12, _13, _14) \ + FORY_FC_ENTRIES_13(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, \ + _13), \ + FORY_FC_MAKE_ENTRY(T, _14) +#define FORY_FC_ENTRIES_15(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, \ + _12, _13, _14, _15) \ + FORY_FC_ENTRIES_14(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, \ + _13, _14), \ + FORY_FC_MAKE_ENTRY(T, _15) +#define FORY_FC_ENTRIES_16(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, \ + _12, _13, _14, _15, _16) \ + FORY_FC_ENTRIES_15(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, \ + _13, _14, _15), \ + FORY_FC_MAKE_ENTRY(T, _16) + +// Main FORY_FIELD_CONFIG macro +// Creates a constexpr tuple of FieldEntry objects with member pointer +// verification +#define FORY_FIELD_CONFIG(Type, ...) \ + inline constexpr auto _fory_field_entries_##Type = \ + std::make_tuple(FORY_FC_ENTRIES(Type, __VA_ARGS__)); \ + template <> struct fory::detail::ForyFieldConfigImpl { \ + static constexpr bool has_config = true; \ + static constexpr auto &entries = _fory_field_entries_##Type; \ + static constexpr size_t field_count = \ + std::tuple_size_v>; \ + } diff --git a/cpp/fory/serialization/context.h b/cpp/fory/serialization/context.h index 88fdc2e30a..faf426a53f 100644 --- a/cpp/fory/serialization/context.h +++ b/cpp/fory/serialization/context.h @@ -200,6 +200,16 @@ class WriteContext { buffer().WriteVarInt64(value); } + /// Write uint64_t value using tagged encoding to buffer. + FORY_ALWAYS_INLINE void write_tagged_uint64(uint64_t value) { + buffer().WriteTaggedUint64(value); + } + + /// Write int64_t value using tagged encoding to buffer. + FORY_ALWAYS_INLINE void write_tagged_int64(int64_t value) { + buffer().WriteTaggedInt64(value); + } + /// Write uint64_t value as varuint36small to buffer. /// This is the special variable-length encoding used for string headers. FORY_ALWAYS_INLINE void write_varuint36small(uint64_t value) { @@ -499,6 +509,18 @@ class ReadContext { return buffer().ReadVarInt64(error); } + /// Read uint64_t value using tagged encoding from buffer. Sets error on + /// failure. + FORY_ALWAYS_INLINE uint64_t read_tagged_uint64(Error &error) { + return buffer().ReadTaggedUint64(error); + } + + /// Read int64_t value using tagged encoding from buffer. Sets error on + /// failure. + FORY_ALWAYS_INLINE int64_t read_tagged_int64(Error &error) { + return buffer().ReadTaggedInt64(error); + } + /// Read uint64_t value as varuint36small from buffer. Sets error on failure. FORY_ALWAYS_INLINE uint64_t read_varuint36small(Error &error) { return buffer().ReadVarUint36Small(error); diff --git a/cpp/fory/serialization/smart_ptr_serializers.h b/cpp/fory/serialization/smart_ptr_serializers.h index 4e18cd31ed..b581382b3e 100644 --- a/cpp/fory/serialization/smart_ptr_serializers.h +++ b/cpp/fory/serialization/smart_ptr_serializers.h @@ -133,6 +133,10 @@ template struct Serializer> { bool read_type) { constexpr bool inner_requires_ref = requires_ref_metadata_v; + std::cerr << "[optional::read] T=" << typeid(T).name() + << ", ref_mode=" << static_cast(ref_mode) + << ", buffer_pos=" << ctx.buffer().reader_index() << std::endl; + if (ref_mode == RefMode::None) { T value = Serializer::read(ctx, RefMode::None, read_type); if (ctx.has_error()) { diff --git a/cpp/fory/serialization/struct_serializer.h b/cpp/fory/serialization/struct_serializer.h index b84610d302..e0ed8d8f69 100644 --- a/cpp/fory/serialization/struct_serializer.h +++ b/cpp/fory/serialization/struct_serializer.h @@ -40,7 +40,7 @@ #include #include -#ifdef FORY_DEBUG +#ifdef ENABLE_FORY_DEBUG_OUTPUT #include #endif @@ -225,6 +225,14 @@ FORY_ALWAYS_INLINE uint32_t put_varint_at(T value, Buffer &buffer, uint64_t zigzag = (static_cast(val) << 1) ^ static_cast(val >> 63); return buffer.PutVarUint64(offset, zigzag); + } else if constexpr (std::is_same_v || + std::is_same_v) { + // Unsigned 32-bit varint (no zigzag) + return buffer.PutVarUint32(offset, static_cast(value)); + } else if constexpr (std::is_same_v || + std::is_same_v) { + // Unsigned 64-bit varint (no zigzag) - used for VAR_UINT64 and TAGGED_UINT64 + return buffer.PutVarUint64(offset, static_cast(value)); } else { static_assert(sizeof(T) == 0, "Unsupported varint type"); return 0; @@ -277,6 +285,45 @@ template struct CompileTimeFieldHelpers { using RawFieldType = meta::RemoveMemberPointerCVRefT; // Unwrap fory::field<> to get the actual type for serialization using FieldType = unwrap_field_t; + + // Check for encoding override from FORY_FIELD_CONFIG + // This allows specifying varint/fixed/tagged encoding for unsigned types + if constexpr (::fory::detail::has_field_config_v) { + constexpr auto enc = + ::fory::detail::GetFieldConfigEntry::encoding; + // Apply encoding override for uint32_t (non-optional) + if constexpr (std::is_same_v) { + if constexpr (enc == Encoding::Varint) { + return static_cast(TypeId::VAR_UINT32); + } + return static_cast(TypeId::UINT32); + } + // Apply encoding override for uint64_t (non-optional) + else if constexpr (std::is_same_v) { + if constexpr (enc == Encoding::Varint) { + return static_cast(TypeId::VAR_UINT64); + } else if constexpr (enc == Encoding::Tagged) { + return static_cast(TypeId::TAGGED_UINT64); + } + return static_cast(TypeId::UINT64); + } + // Apply encoding override for std::optional + else if constexpr (std::is_same_v>) { + if constexpr (enc == Encoding::Varint) { + return static_cast(TypeId::VAR_UINT32); + } + return static_cast(TypeId::UINT32); + } + // Apply encoding override for std::optional + else if constexpr (std::is_same_v>) { + if constexpr (enc == Encoding::Varint) { + return static_cast(TypeId::VAR_UINT64); + } else if constexpr (enc == Encoding::Tagged) { + return static_cast(TypeId::TAGGED_UINT64); + } + return static_cast(TypeId::UINT64); + } + } return static_cast(Serializer::type_id); } } @@ -419,6 +466,19 @@ template struct CompileTimeFieldHelpers { using PtrT = std::tuple_element_t; using RawFieldType = meta::RemoveMemberPointerCVRefT; using FieldType = unwrap_field_t; + + // Check for encoding override from FORY_FIELD_CONFIG for unsigned types + // If encoding is Varint or Tagged, it's NOT a fixed-size primitive + if constexpr (::fory::detail::has_field_config_v && + (std::is_same_v || + std::is_same_v)) { + constexpr auto enc = + ::fory::detail::GetFieldConfigEntry::encoding; + if constexpr (enc == Encoding::Varint || enc == Encoding::Tagged) { + return false; // Not fixed-size, uses varint encoding + } + } + return std::is_same_v || std::is_same_v || std::is_same_v || @@ -442,6 +502,19 @@ template struct CompileTimeFieldHelpers { using PtrT = std::tuple_element_t; using RawFieldType = meta::RemoveMemberPointerCVRefT; using FieldType = unwrap_field_t; + + // Check for encoding override from FORY_FIELD_CONFIG for unsigned types + // If encoding is Varint or Tagged, treat as varint primitive + if constexpr (::fory::detail::has_field_config_v && + (std::is_same_v || + std::is_same_v)) { + constexpr auto enc = + ::fory::detail::GetFieldConfigEntry::encoding; + if constexpr (enc == Encoding::Varint || enc == Encoding::Tagged) { + return true; // Varint/Tagged encoding + } + } + return std::is_same_v || std::is_same_v || std::is_same_v || @@ -486,6 +559,36 @@ template struct CompileTimeFieldHelpers { using PtrT = std::tuple_element_t; using RawFieldType = meta::RemoveMemberPointerCVRefT; using FieldType = unwrap_field_t; + + // Check for encoding override from FORY_FIELD_CONFIG for unsigned types + if constexpr (::fory::detail::has_field_config_v && + (std::is_same_v || + std::is_same_v)) { + constexpr auto enc = + ::fory::detail::GetFieldConfigEntry::encoding; + if constexpr (enc == Encoding::Varint) { + if constexpr (std::is_same_v) { + return 5; // uint32 varint max + } else { + return 10; // uint64 varint max + } + } else if constexpr (enc == Encoding::Tagged) { + // Tagged encoding: 4 bytes for small, 9 bytes for large + return 9; + } + } + // Check for tagged encoding on signed int64 types + if constexpr (::fory::detail::has_field_config_v && + (std::is_same_v || + std::is_same_v)) { + constexpr auto enc = + ::fory::detail::GetFieldConfigEntry::encoding; + if constexpr (enc == Encoding::Tagged) { + // Tagged encoding: 4 bytes for small, 9 bytes for large + return 9; + } + } + if constexpr (std::is_same_v || std::is_same_v) { return 5; // int32 varint max @@ -680,14 +783,17 @@ template struct CompileTimeFieldHelpers { } } + /// Check if a type ID represents a compressed (varint/tagged) type. + /// This must match Java's Types.isCompressedType() exactly for consistent + /// field ordering. Java only considers VARINT32, VAR_UINT32, VARINT64, + /// VAR_UINT64, TAGGED_INT64, and TAGGED_UINT64 as compressed. + /// Note: INT32, INT64, UINT32, UINT64 are NOT compressed - they are fixed- + /// size types. Java xlang mode uses compressInt=true which maps int→VARINT32 + /// and long→VARINT64, but the actual INT32/INT64 type IDs are not compressed. static constexpr bool is_compress_id(uint32_t tid) { - return tid == static_cast(TypeId::INT32) || - tid == static_cast(TypeId::INT64) || - tid == static_cast(TypeId::VARINT32) || + return tid == static_cast(TypeId::VARINT32) || tid == static_cast(TypeId::VARINT64) || tid == static_cast(TypeId::TAGGED_INT64) || - tid == static_cast(TypeId::UINT32) || - tid == static_cast(TypeId::UINT64) || tid == static_cast(TypeId::VAR_UINT32) || tid == static_cast(TypeId::VAR_UINT64) || tid == static_cast(TypeId::TAGGED_UINT64); @@ -755,17 +861,20 @@ template struct CompileTimeFieldHelpers { return sa > sb; if (a_tid != b_tid) return a_tid > b_tid; // type_id descending to match Java - return snake_case_names[a] < snake_case_names[b]; + // Use original Names (not snake_case) to match runtime sorting and Java + return Names[a] < Names[b]; } if (ga == 2) { // Internal types (STRING, etc.): sort by type_id ascending, then name if (a_tid != b_tid) return a_tid < b_tid; - return snake_case_names[a] < snake_case_names[b]; + // Use original Names (not snake_case) to match runtime sorting and Java + return Names[a] < Names[b]; } - return snake_case_names[a] < snake_case_names[b]; + // Use original Names (not snake_case) to match runtime sorting and Java + return Names[a] < Names[b]; } } @@ -1064,15 +1173,23 @@ template struct CompileTimeFieldHelpers { switch (static_cast(tid)) { case TypeId::BOOL: case TypeId::INT8: + case TypeId::UINT8: total += 1; break; case TypeId::INT16: + case TypeId::UINT16: case TypeId::FLOAT16: total += 2; break; case TypeId::INT32: case TypeId::VARINT32: - total += 5; // varint max + total += 5; // varint max for 32-bit + break; + case TypeId::UINT32: + total += 4; // fixed 4 bytes + break; + case TypeId::VAR_UINT32: + total += 5; // varint max for 32-bit break; case TypeId::FLOAT32: total += 4; @@ -1080,12 +1197,20 @@ template struct CompileTimeFieldHelpers { case TypeId::INT64: case TypeId::VARINT64: case TypeId::TAGGED_INT64: - total += 10; // varint max + total += 10; // varint max for 64-bit + break; + case TypeId::UINT64: + total += 8; // fixed 8 bytes + break; + case TypeId::VAR_UINT64: + case TypeId::TAGGED_UINT64: + total += 10; // varint max for 64-bit break; case TypeId::FLOAT64: total += 8; break; default: + total += 10; // safe default for unknown types break; } } @@ -1174,7 +1299,33 @@ FORY_ALWAYS_INLINE void write_single_varint_field(const T &obj, Buffer &buffer, return obj.*field_ptr; } }(); - offset += put_varint_at(field_value, buffer, offset); + + // Check for tagged encoding on unsigned 64-bit types + if constexpr (::fory::detail::has_field_config_v && + (std::is_same_v || + std::is_same_v)) { + constexpr auto enc = + ::fory::detail::GetFieldConfigEntry::encoding; + if constexpr (enc == Encoding::Tagged) { + // Use tagged writing (not standard varint) + offset += buffer.PutTaggedUint64(offset, field_value); + } else { + offset += put_varint_at(field_value, buffer, offset); + } + } else if constexpr (::fory::detail::has_field_config_v && + (std::is_same_v || + std::is_same_v)) { + constexpr auto enc = + ::fory::detail::GetFieldConfigEntry::encoding; + if constexpr (enc == Encoding::Tagged) { + // Use tagged writing for signed int64 (not standard varint) + offset += buffer.PutTaggedInt64(offset, field_value); + } else { + offset += put_varint_at(field_value, buffer, offset); + } + } else { + offset += put_varint_at(field_value, buffer, offset); + } } /// Fast write consecutive varint primitive fields (int32, int64). @@ -1302,6 +1453,46 @@ void write_single_field(const T &obj, WriteContext &ctx, // For backwards compatibility, also check requires_ref_metadata_v constexpr bool field_requires_ref = requires_ref_metadata_v; + // Special handling for std::optional with encoding config + // This must come BEFORE the general primitive check because optional requires + // ref metadata but we want to use encoding-specific serialization. + constexpr bool is_encoded_optional_uint = + ::fory::detail::has_field_config_v && + (std::is_same_v> || + std::is_same_v>); + + if constexpr (is_encoded_optional_uint) { + constexpr auto enc = + ::fory::detail::GetFieldConfigEntry::encoding; + // Write nullable flag + if (!field_value.has_value()) { + ctx.write_int8(NULL_FLAG); + return; + } + ctx.write_int8(NOT_NULL_VALUE_FLAG); + + // Write the value with encoding-aware writing + using InnerType = typename std::remove_reference_t::value_type; + InnerType value = field_value.value(); + if constexpr (std::is_same_v) { + if constexpr (enc == Encoding::Varint) { + ctx.write_varuint32(value); + } else { + ctx.buffer().WriteInt32(static_cast(value)); + } + } else if constexpr (std::is_same_v) { + if constexpr (enc == Encoding::Varint) { + ctx.write_varuint64(value); + } else if constexpr (enc == Encoding::Tagged) { + ctx.write_tagged_uint64(value); + } else { + // For fixed encoding, cast to int64 since binary representation is same + ctx.buffer().WriteInt64(static_cast(value)); + } + } + return; + } + // Per Rust implementation: primitives are written directly without ref/type if constexpr (is_primitive_field && !field_requires_ref) { Serializer::write_data(field_value, ctx); @@ -1428,6 +1619,66 @@ template <> struct is_raw_primitive : std::true_type {}; template inline constexpr bool is_raw_primitive_v = is_raw_primitive::value; +/// Read a primitive value based on remote type_id (for compatible mode). +/// Returns the value as a uint64_t (or int64_t for signed types). +/// The caller must convert to the correct local type. +template +FORY_ALWAYS_INLINE TargetType read_primitive_by_type_id(ReadContext &ctx, + uint32_t type_id, + Error &error) { + // Read based on remote type_id encoding, then convert to TargetType + switch (static_cast(type_id)) { + case TypeId::BOOL: + return static_cast(ctx.read_uint8(error) != 0); + case TypeId::INT8: + return static_cast(ctx.read_int8(error)); + case TypeId::UINT8: + return static_cast(ctx.read_uint8(error)); + case TypeId::INT16: + return static_cast(ctx.read_int16(error)); + case TypeId::UINT16: + return static_cast(static_cast(ctx.read_int16(error))); + case TypeId::INT32: + // INT32 uses fixed encoding + return static_cast(ctx.read_int32(error)); + case TypeId::VARINT32: + // VARINT32 uses varint encoding + return static_cast(ctx.read_varint32(error)); + case TypeId::UINT32: + // UINT32 uses fixed 4-byte encoding + return static_cast(static_cast(ctx.read_int32(error))); + case TypeId::VAR_UINT32: + // VAR_UINT32 uses varint encoding + return static_cast(ctx.read_varuint32(error)); + case TypeId::INT64: + // INT64 uses fixed encoding + return static_cast(ctx.read_int64(error)); + case TypeId::VARINT64: + // VARINT64 uses varint encoding + return static_cast(ctx.read_varint64(error)); + case TypeId::TAGGED_INT64: + // TAGGED_INT64 uses tagged encoding (special hybrid encoding) + return static_cast(ctx.read_tagged_int64(error)); + case TypeId::UINT64: + // UINT64 uses fixed 8-byte encoding + return static_cast(static_cast(ctx.read_int64(error))); + case TypeId::VAR_UINT64: + // VAR_UINT64 uses varint encoding + return static_cast(ctx.read_varuint64(error)); + case TypeId::TAGGED_UINT64: + // TAGGED_UINT64 uses tagged encoding (special hybrid encoding) + return static_cast(ctx.read_tagged_uint64(error)); + case TypeId::FLOAT32: + return static_cast(ctx.read_float(error)); + case TypeId::FLOAT64: + return static_cast(ctx.read_double(error)); + default: + error = Error::type_error("Unsupported type_id for primitive read: " + + std::to_string(type_id)); + return TargetType{}; + } +} + /// Helper to read a primitive field directly using Error* pattern. /// This bypasses Serializer::read for better performance. /// Returns the read value; sets error on failure. @@ -1533,7 +1784,7 @@ void read_single_field_by_index(T &obj, ReadContext &ctx) { constexpr RefMode field_ref_mode = make_ref_mode(is_nullable || field_requires_ref, track_ref); -#ifdef FORY_DEBUG +#ifdef ENABLE_FORY_DEBUG_OUTPUT const auto debug_names = decltype(field_info)::Names; std::cerr << "[xlang][field] T=" << typeid(T).name() << ", index=" << Index << ", name=" << debug_names[Index] @@ -1549,30 +1800,133 @@ void read_single_field_by_index(T &obj, ReadContext &ctx) { // and use direct buffer reads with Error&. constexpr bool is_raw_prim = is_raw_primitive_v; if constexpr (is_raw_prim && is_primitive_field && !field_requires_ref) { + // Check for encoding override for unsigned types from FORY_FIELD_CONFIG + auto read_value = [&ctx]() -> FieldType { + if constexpr (::fory::detail::has_field_config_v && + (std::is_same_v || + std::is_same_v)) { + constexpr auto enc = + ::fory::detail::GetFieldConfigEntry::encoding; +#ifdef ENABLE_FORY_DEBUG_OUTPUT + std::cerr << "[xlang][encoding] T=" << typeid(T).name() + << ", Index=" << Index << ", enc=" << static_cast(enc) + << ", reader_index=" << ctx.buffer().reader_index() + << std::endl; +#endif + if constexpr (std::is_same_v) { + if constexpr (enc == Encoding::Varint) { + // VAR_UINT32: read as unsigned varint + return ctx.read_varuint32(ctx.error()); + } + // UINT32: fixed 4-byte + return static_cast(ctx.read_int32(ctx.error())); + } else if constexpr (std::is_same_v) { + if constexpr (enc == Encoding::Varint) { + // VAR_UINT64: read as unsigned varint + return ctx.read_varuint64(ctx.error()); + } else if constexpr (enc == Encoding::Tagged) { + // TAGGED_UINT64: read using tagged encoding + return ctx.read_tagged_uint64(ctx.error()); + } + // UINT64: fixed 8-byte + return ctx.read_uint64(ctx.error()); + } + } + // No encoding override, use default type-based reading + return read_primitive_field_direct(ctx, ctx.error()); + }; // Assign to field (handle fory::field<> wrapper if needed) if constexpr (is_fory_field_v) { - (obj.*field_ptr).value = - read_primitive_field_direct(ctx, ctx.error()); + (obj.*field_ptr).value = read_value(); } else { - obj.*field_ptr = read_primitive_field_direct(ctx, ctx.error()); + obj.*field_ptr = read_value(); } } else { - // Assign to field (handle fory::field<> wrapper if needed) - FieldType result = - Serializer::read(ctx, field_ref_mode, read_type); - if constexpr (is_fory_field_v) { - (obj.*field_ptr).value = std::move(result); + // Special handling for std::optional with encoding config + constexpr bool is_encoded_optional_uint = + ::fory::detail::has_field_config_v && + (std::is_same_v> || + std::is_same_v>); + + if constexpr (is_encoded_optional_uint) { + constexpr auto enc = + ::fory::detail::GetFieldConfigEntry::encoding; +#ifdef ENABLE_FORY_DEBUG_OUTPUT + std::cerr << "[DEBUG] is_encoded_optional_uint: Index=" << Index + << ", enc=" << static_cast(enc) + << ", reader_index=" << ctx.buffer().reader_index() << std::endl; +#endif + // Read nullable flag + int8_t flag = ctx.read_int8(ctx.error()); +#ifdef ENABLE_FORY_DEBUG_OUTPUT + std::cerr << "[DEBUG] After read flag: flag=" << static_cast(flag) + << ", reader_index=" << ctx.buffer().reader_index() << std::endl; +#endif + if (FORY_PREDICT_FALSE(ctx.has_error())) { + return; + } + if (flag == NULL_FLAG) { + if constexpr (is_fory_field_v) { + (obj.*field_ptr).value = std::nullopt; + } else { + obj.*field_ptr = std::nullopt; + } + return; + } + // Read the value with encoding-aware reading + using InnerType = + typename std::remove_reference_t::value_type; + InnerType value; + if constexpr (std::is_same_v) { + if constexpr (enc == Encoding::Varint) { + value = ctx.read_varuint32(ctx.error()); + } else { + value = static_cast(ctx.read_int32(ctx.error())); + } + } else if constexpr (std::is_same_v) { +#ifdef ENABLE_FORY_DEBUG_OUTPUT + std::cerr << "[DEBUG] Reading uint64 with enc=" << static_cast(enc) + << ", reader_index=" << ctx.buffer().reader_index() + << std::endl; +#endif + if constexpr (enc == Encoding::Varint) { + value = ctx.read_varuint64(ctx.error()); + } else if constexpr (enc == Encoding::Tagged) { + value = ctx.read_tagged_uint64(ctx.error()); + } else { + value = ctx.read_uint64(ctx.error()); + } +#ifdef ENABLE_FORY_DEBUG_OUTPUT + std::cerr << "[DEBUG] After read uint64: value=" << value + << ", reader_index=" << ctx.buffer().reader_index() + << ", has_error=" << ctx.has_error() << std::endl; +#endif + } + if constexpr (is_fory_field_v) { + (obj.*field_ptr).value = std::optional(value); + } else { + obj.*field_ptr = std::optional(value); + } } else { - obj.*field_ptr = std::move(result); + // Assign to field (handle fory::field<> wrapper if needed) + FieldType result = + Serializer::read(ctx, field_ref_mode, read_type); + if constexpr (is_fory_field_v) { + (obj.*field_ptr).value = std::move(result); + } else { + obj.*field_ptr = std::move(result); + } } } } /// Helper to read a single field by index in compatible mode using /// remote field metadata to decide reference flag presence. +/// @param remote_type_id The type_id from the remote schema (for encoding) template void read_single_field_by_index_compatible(T &obj, ReadContext &ctx, - RefMode remote_ref_mode) { + RefMode remote_ref_mode, + uint32_t remote_type_id) { using Helpers = CompileTimeFieldHelpers; const auto field_info = ForyFieldInfo(obj); const auto field_ptrs = decltype(field_info)::Ptrs; @@ -1608,24 +1962,113 @@ void read_single_field_by_index_compatible(T &obj, ReadContext &ctx, // In compatible mode, trust the remote field metadata (remote_ref_mode) // to tell us whether a ref/null flag was written before the value payload. - // OPTIMIZATION: For raw primitive fields (not wrappers) with no ref flag, - // bypass Serializer::read and use direct buffer reads with Error&. +#ifdef ENABLE_FORY_DEBUG_OUTPUT + const auto debug_names = decltype(field_info)::Names; + std::cerr << "[compatible][read_field] Index=" << Index + << ", name=" << debug_names[Index] + << ", FieldType=" << typeid(FieldType).name() + << ", remote_ref_mode=" << static_cast(remote_ref_mode) + << ", buffer pos=" << ctx.buffer().reader_index() << std::endl; +#endif + + // In compatible mode, handle primitive fields specially to use remote encoding. + // This is critical for schema evolution where encoding differs between sender/receiver. constexpr bool is_raw_prim = is_raw_primitive_v; + constexpr bool is_local_optional = is_optional_v; + + // Case 1: Local raw primitive, any remote ref mode + // For primitives, we must use remote_type_id encoding regardless of nullability if constexpr (is_raw_prim && is_primitive_field) { if (remote_ref_mode == RefMode::None) { - // Assign to field (handle fory::field<> wrapper if needed) + // Remote is non-nullable, no ref flag if constexpr (is_fory_field_v) { (obj.*field_ptr).value = - read_primitive_field_direct(ctx, ctx.error()); + read_primitive_by_type_id(ctx, remote_type_id, + ctx.error()); } else { obj.*field_ptr = - read_primitive_field_direct(ctx, ctx.error()); + read_primitive_by_type_id(ctx, remote_type_id, + ctx.error()); + } + return; + } else { + // Remote is nullable, has ref flag + int8_t flag = ctx.read_int8(ctx.error()); + if (FORY_PREDICT_FALSE(ctx.has_error())) { + return; + } + if (flag == NULL_FLAG) { + // Cannot assign null to non-nullable local field + ctx.set_error(Error::invalid( + "Cannot deserialize null value to non-nullable field")); + return; + } + // NOT_NULL_VALUE_FLAG or REF_VALUE_FLAG - read the value + if constexpr (is_fory_field_v) { + (obj.*field_ptr).value = + read_primitive_by_type_id(ctx, remote_type_id, + ctx.error()); + } else { + obj.*field_ptr = + read_primitive_by_type_id(ctx, remote_type_id, + ctx.error()); } return; } } - // Assign to field (handle fory::field<> wrapper if needed) + // Case 2: Local std::optional

      where P is a primitive + // Use remote encoding for the inner primitive value + if constexpr (is_local_optional && is_primitive_field) { + using InnerType = typename FieldType::value_type; + constexpr bool inner_is_raw_prim = is_raw_primitive_v; + + if constexpr (inner_is_raw_prim) { + if (remote_ref_mode == RefMode::None) { + // Remote is non-nullable, no ref flag - read value and wrap in optional + InnerType value = read_primitive_by_type_id( + ctx, remote_type_id, ctx.error()); + if (FORY_PREDICT_FALSE(ctx.has_error())) { + return; + } + if constexpr (is_fory_field_v) { + (obj.*field_ptr).value = std::optional(value); + } else { + obj.*field_ptr = std::optional(value); + } + return; + } else { + // Remote is nullable, has ref flag + int8_t flag = ctx.read_int8(ctx.error()); + if (FORY_PREDICT_FALSE(ctx.has_error())) { + return; + } + if (flag == NULL_FLAG) { + // Null value - set optional to nullopt + if constexpr (is_fory_field_v) { + (obj.*field_ptr).value = std::nullopt; + } else { + obj.*field_ptr = std::nullopt; + } + return; + } + // NOT_NULL_VALUE_FLAG or REF_VALUE_FLAG - read the value + InnerType value = read_primitive_by_type_id( + ctx, remote_type_id, ctx.error()); + if (FORY_PREDICT_FALSE(ctx.has_error())) { + return; + } + if constexpr (is_fory_field_v) { + (obj.*field_ptr).value = std::optional(value); + } else { + obj.*field_ptr = std::optional(value); + } + return; + } + } + } + + // For non-primitive types, use the standard serializer path FieldType result = Serializer::read(ctx, remote_ref_mode, read_type); if constexpr (is_fory_field_v) { @@ -1638,10 +2081,12 @@ void read_single_field_by_index_compatible(T &obj, ReadContext &ctx, /// Helper to dispatch field reading by field_id in compatible mode. /// Uses fold expression with short-circuit to avoid lambda overhead. /// Sets handled=true if field was matched. +/// @param remote_type_id The type_id from the remote schema (for encoding) template FORY_ALWAYS_INLINE void dispatch_compatible_field_read_impl(T &obj, ReadContext &ctx, int16_t field_id, - RefMode remote_ref_mode, bool &handled, + RefMode remote_ref_mode, + uint32_t remote_type_id, bool &handled, std::index_sequence) { using Helpers = CompileTimeFieldHelpers; @@ -1650,7 +2095,8 @@ dispatch_compatible_field_read_impl(T &obj, ReadContext &ctx, int16_t field_id, ((static_cast(Indices) == field_id ? (handled = true, read_single_field_by_index_compatible< - Helpers::sorted_indices[Indices]>(obj, ctx, remote_ref_mode), + Helpers::sorted_indices[Indices]>(obj, ctx, remote_ref_mode, + remote_type_id), true) : false) || ...); @@ -1798,6 +2244,18 @@ FORY_ALWAYS_INLINE T read_varint_at(Buffer &buffer, uint32_t &offset) { offset += bytes_read; // Zigzag decode return static_cast((raw >> 1) ^ (~(raw & 1) + 1)); + } else if constexpr (std::is_same_v || + std::is_same_v) { + // Unsigned 32-bit varint (no zigzag) + uint32_t raw = buffer.GetVarUint32(offset, &bytes_read); + offset += bytes_read; + return raw; + } else if constexpr (std::is_same_v || + std::is_same_v) { + // Unsigned 64-bit varint (no zigzag) - used for VAR_UINT64 and TAGGED_UINT64 + uint64_t raw = buffer.GetVarUint64(offset, &bytes_read); + offset += bytes_read; + return raw; } else { static_assert(sizeof(T) == 0, "Unsupported varint type"); return T{}; @@ -1806,6 +2264,7 @@ FORY_ALWAYS_INLINE T read_varint_at(Buffer &buffer, uint32_t &offset) { /// Helper to read a single varint primitive field. /// No lambda overhead - direct function call that will be inlined. +/// Handles both standard varint and tagged encoding based on field config. template FORY_ALWAYS_INLINE void read_single_varint_field(T &obj, Buffer &buffer, uint32_t &offset) { @@ -1816,7 +2275,40 @@ FORY_ALWAYS_INLINE void read_single_varint_field(T &obj, Buffer &buffer, using RawFieldType = typename meta::RemoveMemberPointerCVRefT; using FieldType = unwrap_field_t; - FieldType result = read_varint_at(buffer, offset); + + FieldType result; + + // Check for tagged encoding on unsigned 64-bit types + if constexpr (::fory::detail::has_field_config_v && + (std::is_same_v || + std::is_same_v)) { + constexpr auto enc = + ::fory::detail::GetFieldConfigEntry::encoding; + if constexpr (enc == Encoding::Tagged) { + // Use tagged reading (not standard varint) + uint32_t bytes_read; + result = buffer.GetTaggedUint64(offset, &bytes_read); + offset += bytes_read; + } else { + result = read_varint_at(buffer, offset); + } + } else if constexpr (::fory::detail::has_field_config_v && + (std::is_same_v || + std::is_same_v)) { + constexpr auto enc = + ::fory::detail::GetFieldConfigEntry::encoding; + if constexpr (enc == Encoding::Tagged) { + // Use tagged reading for signed int64 (not standard varint) + uint32_t bytes_read; + result = buffer.GetTaggedInt64(offset, &bytes_read); + offset += bytes_read; + } else { + result = read_varint_at(buffer, offset); + } + } else { + result = read_varint_at(buffer, offset); + } + // Assign to field (handle fory::field<> wrapper if needed) if constexpr (is_fory_field_v) { (obj.*field_ptr).value = result; @@ -1918,6 +2410,10 @@ void read_struct_fields_compatible(T &obj, ReadContext &ctx, std::index_sequence) { const auto &remote_fields = remote_type_meta->get_field_infos(); + std::cerr << "[compatible] Starting to read " << remote_fields.size() + << " remote fields, buffer pos=" << ctx.buffer().reader_index() + << std::endl; + // Iterate through remote fields in their serialization order for (size_t remote_idx = 0; remote_idx < remote_fields.size(); ++remote_idx) { const auto &remote_field = remote_fields[remote_idx]; @@ -1928,6 +2424,14 @@ void read_struct_fields_compatible(T &obj, ReadContext &ctx, // field's header during FieldInfo::from_bytes. RefMode remote_ref_mode = remote_field.field_type.ref_mode; + std::cerr << "[compatible] remote_idx=" << remote_idx + << ", field=" << remote_field.field_name + << ", type_id=" << remote_field.field_type.type_id + << ", nullable=" << remote_field.field_type.nullable + << ", ref_mode=" << static_cast(remote_ref_mode) + << ", field_id=" << field_id + << ", buffer pos=" << ctx.buffer().reader_index() << std::endl; + if (field_id == -1) { // Field unknown locally — skip its value skip_field_value(ctx, remote_field.field_type, remote_ref_mode); @@ -1939,8 +2443,10 @@ void read_struct_fields_compatible(T &obj, ReadContext &ctx, // Dispatch to the correct local field by field_id // Uses fold expression with short-circuit - no lambda overhead + // Pass remote type_id for correct encoding in compatible mode bool handled = false; dispatch_compatible_field_read_impl(obj, ctx, field_id, remote_ref_mode, + remote_field.field_type.type_id, handled, std::index_sequence{}); @@ -2038,6 +2544,8 @@ struct Serializer>> { } static void write_data(const T &obj, WriteContext &ctx) { + // Only write struct version hash when check_struct_version is enabled, + // matching Java's behavior in ObjectSerializer.write(). if (ctx.check_struct_version()) { auto type_info_res = ctx.type_resolver().template get_type_info(); if (FORY_PREDICT_FALSE(!type_info_res.ok())) { @@ -2063,6 +2571,8 @@ struct Serializer>> { static void write_data_generic(const T &obj, WriteContext &ctx, bool has_generics) { + // Only write struct version hash when check_struct_version is enabled, + // matching Java's behavior in ObjectSerializer.write(). if (ctx.check_struct_version()) { auto type_info_res = ctx.type_resolver().template get_type_info(); if (FORY_PREDICT_FALSE(!type_info_res.ok())) { @@ -2094,7 +2604,7 @@ struct Serializer>> { if (FORY_PREDICT_FALSE(ctx.has_error())) { return T{}; } -#ifdef FORY_DEBUG +#ifdef ENABLE_FORY_DEBUG_OUTPUT std::cerr << "[xlang][struct] T=" << typeid(T).name() << ", read_ref_flag=" << static_cast(ref_flag) << ", reader_index=" << ctx.buffer().reader_index() @@ -2243,6 +2753,8 @@ struct Serializer>> { } static T read_compatible(ReadContext &ctx, const TypeInfo *remote_type_info) { + std::cerr << "[read_compatible] Entering for type " << typeid(T).name() + << ", buffer_pos=" << ctx.buffer().reader_index() << std::endl; // Read and verify struct version if enabled (matches write_data behavior) if (ctx.check_struct_version()) { int32_t read_version = ctx.buffer().ReadInt32(ctx.error()); @@ -2293,6 +2805,8 @@ struct Serializer>> { } static T read_data(ReadContext &ctx) { + // Only read struct version hash when check_struct_version is enabled, + // matching Java's behavior in ObjectSerializer.read(). if (ctx.check_struct_version()) { int32_t read_version = ctx.buffer().ReadInt32(ctx.error()); if (FORY_PREDICT_FALSE(ctx.has_error())) { diff --git a/cpp/fory/serialization/type_resolver.cc b/cpp/fory/serialization/type_resolver.cc index 2a328be46d..04e8219c6d 100644 --- a/cpp/fory/serialization/type_resolver.cc +++ b/cpp/fory/serialization/type_resolver.cc @@ -443,8 +443,9 @@ TypeMeta::from_bytes(Buffer &buffer, const TypeMeta *local_type_info) { field_infos.push_back(std::move(field)); } - // Sort fields according to xlang spec - field_infos = sort_field_infos(std::move(field_infos)); + // NOTE: Do NOT sort remote fields! They are already in the sender's sorted + // order, which matches the data order. Re-sorting would cause misalignment + // with the serialized data. // Assign field IDs by comparing with local type if (local_type_info != nullptr) { @@ -539,8 +540,8 @@ TypeMeta::from_bytes_with_header(Buffer &buffer, int64_t header) { field_infos.push_back(std::move(field)); } - // Sort fields according to xlang spec - field_infos = sort_field_infos(std::move(field_infos)); + // NOTE: Do NOT sort remote fields! They are already in the sender's sorted + // order, which matches the data order. // CRITICAL FIX: Ensure we consume exactly meta_size bytes size_t current_pos = buffer.reader_index(); @@ -631,14 +632,15 @@ int32_t get_primitive_type_size(uint32_t type_id) { } } +/// Check if a type ID represents a compressed (varint/tagged) type. +/// This must match Java's Types.isCompressedType() exactly for consistent +/// field ordering. Java only considers VARINT32, VAR_UINT32, VARINT64, +/// VAR_UINT64, TAGGED_INT64, and TAGGED_UINT64 as compressed. +/// Note: INT32, INT64, UINT32, UINT64 are NOT compressed - they are fixed-size. bool is_compress(uint32_t type_id) { - return type_id == static_cast(TypeId::INT32) || - type_id == static_cast(TypeId::INT64) || - type_id == static_cast(TypeId::VARINT32) || + return type_id == static_cast(TypeId::VARINT32) || type_id == static_cast(TypeId::VARINT64) || type_id == static_cast(TypeId::TAGGED_INT64) || - type_id == static_cast(TypeId::UINT32) || - type_id == static_cast(TypeId::UINT64) || type_id == static_cast(TypeId::VAR_UINT32) || type_id == static_cast(TypeId::VAR_UINT64) || type_id == static_cast(TypeId::TAGGED_UINT64); diff --git a/cpp/fory/serialization/type_resolver.h b/cpp/fory/serialization/type_resolver.h index 896074dd5f..403c7c2782 100644 --- a/cpp/fory/serialization/type_resolver.h +++ b/cpp/fory/serialization/type_resolver.h @@ -57,6 +57,7 @@ #include "fory/util/flat_int_map.h" #include "fory/util/logging.h" #include "fory/util/result.h" +#include "fory/util/string_util.h" namespace fory { namespace serialization { @@ -509,13 +510,72 @@ constexpr bool compute_track_ref() { } } +// Helper to check if a type is unsigned integer +template struct is_unsigned_integer : std::false_type {}; +template <> struct is_unsigned_integer : std::true_type {}; +template <> struct is_unsigned_integer : std::true_type {}; +template <> struct is_unsigned_integer : std::true_type {}; +template <> struct is_unsigned_integer : std::true_type {}; +template +inline constexpr bool is_unsigned_integer_v = is_unsigned_integer::value; + +// Helper to get inner type of optional, or the type itself +template struct unwrap_optional_inner { + using type = T; +}; +template +struct unwrap_optional_inner>>> { + using type = typename decay_t::value_type; +}; +template +using unwrap_optional_inner_t = typename unwrap_optional_inner::type; + +// Helper to compute the correct type_id for unsigned types based on encoding +template +constexpr uint32_t compute_unsigned_type_id() { + // For unsigned types, check if FORY_FIELD_CONFIG specifies an encoding + if constexpr (::fory::detail::has_field_config_v) { + constexpr auto enc = + ::fory::detail::GetFieldConfigEntry::encoding; + // Handle inner type for std::optional + using InnerType = unwrap_optional_inner_t; + if constexpr (std::is_same_v) { + return static_cast(TypeId::UINT8); + } else if constexpr (std::is_same_v) { + return static_cast(TypeId::UINT16); + } else if constexpr (std::is_same_v) { + if constexpr (enc == Encoding::Varint) { + return static_cast(TypeId::VAR_UINT32); + } else { + return static_cast(TypeId::UINT32); + } + } else if constexpr (std::is_same_v) { + if constexpr (enc == Encoding::Varint) { + return static_cast(TypeId::VAR_UINT64); + } else if constexpr (enc == Encoding::Tagged) { + return static_cast(TypeId::TAGGED_UINT64); + } else { + return static_cast(TypeId::UINT64); + } + } + } + // Not an unsigned type with field config, use default + return 0; +} + template struct FieldInfoBuilder { static FieldInfo build() { const auto meta = ForyFieldInfo(T{}); const auto field_names = decltype(meta)::Names; const auto field_ptrs = decltype(meta)::Ptrs; - std::string field_name(field_names[Index]); + // Convert camelCase field name to snake_case for cross-language compatibility + std::string_view original_name = field_names[Index]; + constexpr size_t max_snake_len = 128; // Reasonable max for field names + auto [snake_buffer, snake_len] = + ::fory::to_snake_case(original_name); + std::string field_name(snake_buffer.data(), snake_len); + const auto field_ptr = std::get(field_ptrs); using RawFieldType = typename meta::RemoveMemberPointerCVRefT; @@ -531,6 +591,15 @@ template struct FieldInfoBuilder { constexpr bool track_ref = compute_track_ref(); FieldType field_type = FieldTypeBuilder::build(false); + + // Override type_id for unsigned types based on encoding from FORY_FIELD_CONFIG + using InnerType = unwrap_optional_inner_t; + constexpr uint32_t unsigned_tid = + compute_unsigned_type_id(); + if constexpr (unsigned_tid != 0 && is_unsigned_integer_v) { + field_type.type_id = unsigned_tid; + } + // Override nullable and ref_tracking from field-level metadata field_type.nullable = is_nullable; field_type.ref_tracking = track_ref; @@ -538,7 +607,8 @@ template struct FieldInfoBuilder { #ifdef FORY_DEBUG // DEBUG: Print field info for debugging fingerprint mismatch std::cerr << "[xlang][debug] FieldInfoBuilder T=" << typeid(T).name() - << " Index=" << Index << " field=" << field_name << " has_tags=" + << " Index=" << Index << " field=" << field_name + << " type_id=" << field_type.type_id << " has_tags=" << ::fory::detail::has_field_tags_v << " is_nullable=" << is_nullable << " track_ref=" << track_ref << std::endl; #endif @@ -976,7 +1046,12 @@ TypeResolver::build_struct_type_info(uint32_t type_id, std::string ns, entry->name_to_index.reserve(field_count); for (size_t i = 0; i < field_count; ++i) { - entry->name_to_index.emplace(std::string(field_names[i]), i); + // Convert camelCase field name to snake_case for cross-language compatibility + constexpr size_t max_snake_len = 128; + auto [snake_buffer, snake_len] = + ::fory::to_snake_case(field_names[i]); + entry->name_to_index.emplace(std::string(snake_buffer.data(), snake_len), + i); } auto field_infos = diff --git a/cpp/fory/serialization/xlang_test_main.cc b/cpp/fory/serialization/xlang_test_main.cc index bd15fe4cba..e8f77224fe 100644 --- a/cpp/fory/serialization/xlang_test_main.cc +++ b/cpp/fory/serialization/xlang_test_main.cc @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -584,52 +585,80 @@ FORY_FIELD_TAGS(CircularRefStruct, (name, 0), (selfRef, 1, nullable, ref)); // Unsigned Number Test Types // ============================================================================ +// UnsignedSchemaConsistentSimple (type id 1) +// A simple test struct for unsigned numbers with tagged encoding. +struct UnsignedSchemaConsistentSimple { + uint64_t u64Tagged; // TAGGED_UINT64 + std::optional u64TaggedNullable; // TAGGED_UINT64, nullable + + bool operator==(const UnsignedSchemaConsistentSimple &other) const { + return u64Tagged == other.u64Tagged && + u64TaggedNullable == other.u64TaggedNullable; + } +}; +FORY_STRUCT(UnsignedSchemaConsistentSimple, u64Tagged, u64TaggedNullable); +FORY_FIELD_CONFIG( + UnsignedSchemaConsistentSimple, + (u64Tagged, fory::F(0).tagged()), + (u64TaggedNullable, fory::F(1).nullable().tagged())); + // UnsignedSchemaConsistent (type id 501) // Test struct for unsigned numbers in SCHEMA_CONSISTENT mode. // All fields use the same nullability as Java. // Note: C++ uses std::optional for nullable fields. struct UnsignedSchemaConsistent { // Primitive unsigned fields (non-nullable) - uint8_t u8; - uint16_t u16; - uint32_t u32Var; // VAR_UINT32 - variable-length - uint32_t u32Fixed; // UINT32 - fixed 4-byte - uint64_t u64Var; // VAR_UINT64 - variable-length - uint64_t u64Fixed; // UINT64 - fixed 8-byte - uint64_t u64Tagged; // TAGGED_UINT64 + uint8_t u8Field; + uint16_t u16Field; + uint32_t u32VarField; // VAR_UINT32 - variable-length + uint32_t u32FixedField; // UINT32 - fixed 4-byte + uint64_t u64VarField; // VAR_UINT64 - variable-length + uint64_t u64FixedField; // UINT64 - fixed 8-byte + uint64_t u64TaggedField; // TAGGED_UINT64 // Nullable unsigned fields (using std::optional) - std::optional u8Nullable; - std::optional u16Nullable; - std::optional u32VarNullable; - std::optional u32FixedNullable; - std::optional u64VarNullable; - std::optional u64FixedNullable; - std::optional u64TaggedNullable; + std::optional u8NullableField; + std::optional u16NullableField; + std::optional u32VarNullableField; + std::optional u32FixedNullableField; + std::optional u64VarNullableField; + std::optional u64FixedNullableField; + std::optional u64TaggedNullableField; bool operator==(const UnsignedSchemaConsistent &other) const { - return u8 == other.u8 && u16 == other.u16 && u32Var == other.u32Var && - u32Fixed == other.u32Fixed && u64Var == other.u64Var && - u64Fixed == other.u64Fixed && u64Tagged == other.u64Tagged && - u8Nullable == other.u8Nullable && u16Nullable == other.u16Nullable && - u32VarNullable == other.u32VarNullable && - u32FixedNullable == other.u32FixedNullable && - u64VarNullable == other.u64VarNullable && - u64FixedNullable == other.u64FixedNullable && - u64TaggedNullable == other.u64TaggedNullable; + return u8Field == other.u8Field && u16Field == other.u16Field && + u32VarField == other.u32VarField && + u32FixedField == other.u32FixedField && + u64VarField == other.u64VarField && + u64FixedField == other.u64FixedField && + u64TaggedField == other.u64TaggedField && + u8NullableField == other.u8NullableField && + u16NullableField == other.u16NullableField && + u32VarNullableField == other.u32VarNullableField && + u32FixedNullableField == other.u32FixedNullableField && + u64VarNullableField == other.u64VarNullableField && + u64FixedNullableField == other.u64FixedNullableField && + u64TaggedNullableField == other.u64TaggedNullableField; } }; -FORY_STRUCT(UnsignedSchemaConsistent, u8, u16, u32Var, u32Fixed, u64Var, - u64Fixed, u64Tagged, u8Nullable, u16Nullable, u32VarNullable, - u32FixedNullable, u64VarNullable, u64FixedNullable, - u64TaggedNullable); -FORY_FIELD_TAGS(UnsignedSchemaConsistent, (u8, 0), (u16, 1), (u32Var, 2), - (u32Fixed, 3), (u64Var, 4), (u64Fixed, 5), (u64Tagged, 6), - (u8Nullable, 7, nullable), (u16Nullable, 8, nullable), - (u32VarNullable, 9, nullable), (u32FixedNullable, 10, nullable), - (u64VarNullable, 11, nullable), - (u64FixedNullable, 12, nullable), - (u64TaggedNullable, 13, nullable)); +FORY_STRUCT(UnsignedSchemaConsistent, u8Field, u16Field, u32VarField, + u32FixedField, u64VarField, u64FixedField, u64TaggedField, + u8NullableField, u16NullableField, u32VarNullableField, + u32FixedNullableField, u64VarNullableField, u64FixedNullableField, + u64TaggedNullableField); +// Use new FORY_FIELD_CONFIG with builder pattern for encoding specification +FORY_FIELD_CONFIG( + UnsignedSchemaConsistent, (u8Field, fory::F(0)), (u16Field, fory::F(1)), + (u32VarField, fory::F(2).varint()), (u32FixedField, fory::F(3).fixed()), + (u64VarField, fory::F(4).varint()), (u64FixedField, fory::F(5).fixed()), + (u64TaggedField, fory::F(6).tagged()), + (u8NullableField, fory::F(7).nullable()), + (u16NullableField, fory::F(8).nullable()), + (u32VarNullableField, fory::F(9).nullable().varint()), + (u32FixedNullableField, fory::F(10).nullable().fixed()), + (u64VarNullableField, fory::F(11).nullable().varint()), + (u64FixedNullableField, fory::F(12).nullable().fixed()), + (u64TaggedNullableField, fory::F(13).nullable().tagged())); // UnsignedSchemaCompatible (type id 502) // Test struct for unsigned numbers in COMPATIBLE mode. @@ -638,13 +667,13 @@ FORY_FIELD_TAGS(UnsignedSchemaConsistent, (u8, 0), (u16, 1), (u32Var, 2), // in Java) struct UnsignedSchemaCompatible { // Group 1: Nullable in C++ (std::optional), non-nullable in Java - std::optional u8; - std::optional u16; - std::optional u32Var; - std::optional u32Fixed; - std::optional u64Var; - std::optional u64Fixed; - std::optional u64Tagged; + std::optional u8Field1; + std::optional u16Field1; + std::optional u32VarField1; + std::optional u32FixedField1; + std::optional u64VarField1; + std::optional u64FixedField1; + std::optional u64TaggedField1; // Group 2: Non-nullable in C++, nullable in Java uint8_t u8Field2; @@ -656,9 +685,12 @@ struct UnsignedSchemaCompatible { uint64_t u64TaggedField2; bool operator==(const UnsignedSchemaCompatible &other) const { - return u8 == other.u8 && u16 == other.u16 && u32Var == other.u32Var && - u32Fixed == other.u32Fixed && u64Var == other.u64Var && - u64Fixed == other.u64Fixed && u64Tagged == other.u64Tagged && + return u8Field1 == other.u8Field1 && u16Field1 == other.u16Field1 && + u32VarField1 == other.u32VarField1 && + u32FixedField1 == other.u32FixedField1 && + u64VarField1 == other.u64VarField1 && + u64FixedField1 == other.u64FixedField1 && + u64TaggedField1 == other.u64TaggedField1 && u8Field2 == other.u8Field2 && u16Field2 == other.u16Field2 && u32VarField2 == other.u32VarField2 && u32FixedField2 == other.u32FixedField2 && @@ -667,15 +699,25 @@ struct UnsignedSchemaCompatible { u64TaggedField2 == other.u64TaggedField2; } }; -FORY_STRUCT(UnsignedSchemaCompatible, u8, u16, u32Var, u32Fixed, u64Var, - u64Fixed, u64Tagged, u8Field2, u16Field2, u32VarField2, - u32FixedField2, u64VarField2, u64FixedField2, u64TaggedField2); -FORY_FIELD_TAGS(UnsignedSchemaCompatible, (u8, 0, nullable), (u16, 1, nullable), - (u32Var, 2, nullable), (u32Fixed, 3, nullable), - (u64Var, 4, nullable), (u64Fixed, 5, nullable), - (u64Tagged, 6, nullable), (u8Field2, 7), (u16Field2, 8), - (u32VarField2, 9), (u32FixedField2, 10), (u64VarField2, 11), - (u64FixedField2, 12), (u64TaggedField2, 13)); +FORY_STRUCT(UnsignedSchemaCompatible, u8Field1, u16Field1, u32VarField1, + u32FixedField1, u64VarField1, u64FixedField1, u64TaggedField1, + u8Field2, u16Field2, u32VarField2, u32FixedField2, u64VarField2, + u64FixedField2, u64TaggedField2); +// Use new FORY_FIELD_CONFIG with builder pattern for encoding specification +// Group 1: nullable in C++ (std::optional), non-nullable in Java +// Group 2: non-nullable in C++, nullable in Java +FORY_FIELD_CONFIG( + UnsignedSchemaCompatible, (u8Field1, fory::F(0).nullable()), + (u16Field1, fory::F(1).nullable()), + (u32VarField1, fory::F(2).nullable().varint()), + (u32FixedField1, fory::F(3).nullable().fixed()), + (u64VarField1, fory::F(4).nullable().varint()), + (u64FixedField1, fory::F(5).nullable().fixed()), + (u64TaggedField1, fory::F(6).nullable().tagged()), (u8Field2, fory::F(7)), + (u16Field2, fory::F(8)), (u32VarField2, fory::F(9).varint()), + (u32FixedField2, fory::F(10).fixed()), (u64VarField2, fory::F(11).varint()), + (u64FixedField2, fory::F(12).fixed()), + (u64TaggedField2, fory::F(13).tagged())); namespace fory { namespace serialization { @@ -803,10 +845,16 @@ void AppendSerialized(Fory &fory, const T &value, std::vector &out) { Fory BuildFory(bool compatible = true, bool xlang = true, bool check_struct_version = false, bool track_ref = false) { + // In Java xlang mode, checkClassVersion is automatically set to true for + // SCHEMA_CONSISTENT mode (compatible=false). Match this behavior in C++. + bool actual_check_version = check_struct_version; + if (xlang && !compatible) { + actual_check_version = true; + } return Fory::builder() .compatible(compatible) .xlang(xlang) - .check_struct_version(check_struct_version) + .check_struct_version(actual_check_version) .track_ref(track_ref) .build(); } @@ -857,6 +905,7 @@ void RunTestRefSchemaConsistent(const std::string &data_file); void RunTestRefCompatible(const std::string &data_file); void RunTestCircularRefSchemaConsistent(const std::string &data_file); void RunTestCircularRefCompatible(const std::string &data_file); +void RunTestUnsignedSchemaConsistentSimple(const std::string &data_file); void RunTestUnsignedSchemaConsistent(const std::string &data_file); void RunTestUnsignedSchemaCompatible(const std::string &data_file); } // namespace @@ -958,6 +1007,8 @@ int main(int argc, char **argv) { RunTestCircularRefSchemaConsistent(data_file); } else if (case_name == "test_circular_ref_compatible") { RunTestCircularRefCompatible(data_file); + } else if (case_name == "test_unsigned_schema_consistent_simple") { + RunTestUnsignedSchemaConsistentSimple(data_file); } else if (case_name == "test_unsigned_schema_consistent") { RunTestUnsignedSchemaConsistent(data_file); } else if (case_name == "test_unsigned_schema_compatible") { @@ -2160,26 +2211,6 @@ void RunTestNullableFieldSchemaConsistentNotNull(const std::string &data_file) { EnsureOk(fory.register_struct(401), "register NullableComprehensiveSchemaConsistent"); - // Debug: Print sorted field order - { - const char *debug_env = std::getenv("ENABLE_FORY_DEBUG_OUTPUT"); - if (debug_env && std::string(debug_env) == "1") { - using Helpers = fory::serialization::detail::CompileTimeFieldHelpers< - NullableComprehensiveSchemaConsistent>; - std::cerr << "[C++][fory-debug] NullableComprehensiveSchemaConsistent " - "sorted field order:\n"; - for (size_t i = 0; i < Helpers::FieldCount; ++i) { - size_t orig_idx = Helpers::sorted_indices[i]; - std::cerr << " [" << i << "] orig_idx=" << orig_idx - << " name=" << Helpers::sorted_field_names[i] - << " type_id=" << Helpers::type_ids[orig_idx] - << " nullable=" << Helpers::nullable_flags[orig_idx] - << " group=" << Helpers::group_rank(orig_idx) << "\n"; - } - std::cerr << std::endl; - } - } - NullableComprehensiveSchemaConsistent expected; // Base non-nullable primitive fields expected.byte_field = 1; @@ -2560,6 +2591,48 @@ void RunTestCircularRefCompatible(const std::string &data_file) { // Unsigned Number Tests // ============================================================================ +void RunTestUnsignedSchemaConsistentSimple(const std::string &data_file) { + auto bytes = ReadFile(data_file); + std::cerr << "[DEBUG] test_unsigned_schema_consistent_simple: read " + << bytes.size() << " bytes from " << data_file << std::endl; + // Print first 32 bytes as hex + std::cerr << "[DEBUG] First bytes: "; + for (size_t i = 0; i < std::min(bytes.size(), size_t(32)); ++i) { + std::cerr << std::hex << std::setw(2) << std::setfill('0') + << static_cast(bytes[i]) << " "; + } + std::cerr << std::dec << std::endl; + + // SCHEMA_CONSISTENT mode: compatible=false, xlang=true + auto fory = BuildFory(false, true, false, false); + EnsureOk(fory.register_struct(1), + "register UnsignedSchemaConsistentSimple"); + + Buffer buffer = MakeBuffer(bytes); + auto obj = ReadNext(fory, buffer); + std::cerr << "[DEBUG] Deserialized: u64Tagged=" << obj.u64Tagged + << ", u64TaggedNullable=" + << (obj.u64TaggedNullable.has_value() + ? std::to_string(obj.u64TaggedNullable.value()) + : "null") + << std::endl; + + // Verify fields + if (obj.u64Tagged != 1000000000) { + Fail("UnsignedSchemaConsistentSimple: u64Tagged should be 1000000000, got " + + std::to_string(obj.u64Tagged)); + } + if (!obj.u64TaggedNullable.has_value() || + obj.u64TaggedNullable.value() != 500000000) { + Fail("UnsignedSchemaConsistentSimple: u64TaggedNullable should be 500000000"); + } + + // Re-serialize and write back + std::vector out; + AppendSerialized(fory, obj, out); + WriteFile(data_file, out); +} + void RunTestUnsignedSchemaConsistent(const std::string &data_file) { auto bytes = ReadFile(data_file); // SCHEMA_CONSISTENT mode: compatible=false, xlang=true @@ -2571,66 +2644,90 @@ void RunTestUnsignedSchemaConsistent(const std::string &data_file) { auto obj = ReadNext(fory, buffer); // Verify primitive unsigned fields - if (obj.u8 != 200) { - Fail("UnsignedSchemaConsistent: u8 should be 200, got " + - std::to_string(obj.u8)); + if (obj.u8Field != 200) { + Fail("UnsignedSchemaConsistent: u8Field should be 200, got " + + std::to_string(obj.u8Field)); } - if (obj.u16 != 60000) { - Fail("UnsignedSchemaConsistent: u16 should be 60000, got " + - std::to_string(obj.u16)); + if (obj.u16Field != 60000) { + Fail("UnsignedSchemaConsistent: u16Field should be 60000, got " + + std::to_string(obj.u16Field)); } - if (obj.u32Var != 3000000000) { - Fail("UnsignedSchemaConsistent: u32Var should be 3000000000, got " + - std::to_string(obj.u32Var)); + if (obj.u32VarField != 3000000000) { + Fail("UnsignedSchemaConsistent: u32VarField should be 3000000000, got " + + std::to_string(obj.u32VarField)); } - if (obj.u32Fixed != 4000000000) { - Fail("UnsignedSchemaConsistent: u32Fixed should be 4000000000, got " + - std::to_string(obj.u32Fixed)); + if (obj.u32FixedField != 4000000000) { + Fail("UnsignedSchemaConsistent: u32FixedField should be 4000000000, got " + + std::to_string(obj.u32FixedField)); } - if (obj.u64Var != 10000000000) { - Fail("UnsignedSchemaConsistent: u64Var should be 10000000000, got " + - std::to_string(obj.u64Var)); + if (obj.u64VarField != 10000000000) { + Fail("UnsignedSchemaConsistent: u64VarField should be 10000000000, got " + + std::to_string(obj.u64VarField)); } - if (obj.u64Fixed != 15000000000) { - Fail("UnsignedSchemaConsistent: u64Fixed should be 15000000000, got " + - std::to_string(obj.u64Fixed)); + if (obj.u64FixedField != 15000000000) { + Fail("UnsignedSchemaConsistent: u64FixedField should be 15000000000, got " + + std::to_string(obj.u64FixedField)); } - if (obj.u64Tagged != 1000000000) { - Fail("UnsignedSchemaConsistent: u64Tagged should be 1000000000, got " + - std::to_string(obj.u64Tagged)); + if (obj.u64TaggedField != 1000000000) { + Fail("UnsignedSchemaConsistent: u64TaggedField should be 1000000000, got " + + std::to_string(obj.u64TaggedField)); } // Verify nullable unsigned fields - if (!obj.u8Nullable.has_value() || obj.u8Nullable.value() != 128) { - Fail("UnsignedSchemaConsistent: u8Nullable should be 128"); + if (!obj.u8NullableField.has_value() || obj.u8NullableField.value() != 128) { + Fail("UnsignedSchemaConsistent: u8NullableField should be 128"); } - if (!obj.u16Nullable.has_value() || obj.u16Nullable.value() != 40000) { - Fail("UnsignedSchemaConsistent: u16Nullable should be 40000"); + if (!obj.u16NullableField.has_value() || + obj.u16NullableField.value() != 40000) { + Fail("UnsignedSchemaConsistent: u16NullableField should be 40000"); } - if (!obj.u32VarNullable.has_value() || - obj.u32VarNullable.value() != 2500000000) { - Fail("UnsignedSchemaConsistent: u32VarNullable should be 2500000000"); + if (!obj.u32VarNullableField.has_value() || + obj.u32VarNullableField.value() != 2500000000) { + Fail("UnsignedSchemaConsistent: u32VarNullableField should be 2500000000"); } - if (!obj.u32FixedNullable.has_value() || - obj.u32FixedNullable.value() != 3500000000) { - Fail("UnsignedSchemaConsistent: u32FixedNullable should be 3500000000"); + if (!obj.u32FixedNullableField.has_value() || + obj.u32FixedNullableField.value() != 3500000000) { + Fail( + "UnsignedSchemaConsistent: u32FixedNullableField should be 3500000000"); } - if (!obj.u64VarNullable.has_value() || - obj.u64VarNullable.value() != 8000000000) { - Fail("UnsignedSchemaConsistent: u64VarNullable should be 8000000000"); + if (!obj.u64VarNullableField.has_value() || + obj.u64VarNullableField.value() != 8000000000) { + Fail("UnsignedSchemaConsistent: u64VarNullableField should be 8000000000"); } - if (!obj.u64FixedNullable.has_value() || - obj.u64FixedNullable.value() != 12000000000) { - Fail("UnsignedSchemaConsistent: u64FixedNullable should be 12000000000"); + if (!obj.u64FixedNullableField.has_value() || + obj.u64FixedNullableField.value() != 12000000000) { + Fail("UnsignedSchemaConsistent: u64FixedNullableField should be " + "12000000000"); } - if (!obj.u64TaggedNullable.has_value() || - obj.u64TaggedNullable.value() != 500000000) { - Fail("UnsignedSchemaConsistent: u64TaggedNullable should be 500000000"); + if (!obj.u64TaggedNullableField.has_value() || + obj.u64TaggedNullableField.value() != 500000000) { + Fail( + "UnsignedSchemaConsistent: u64TaggedNullableField should be 500000000"); } + // Debug: print field values before re-serialization + std::cerr << "[DEBUG] Before re-serialization:\n"; + std::cerr << " u8Field=" << static_cast(obj.u8Field) + << " u16Field=" << obj.u16Field + << " u32VarField=" << obj.u32VarField + << " u32FixedField=" << obj.u32FixedField << "\n"; + std::cerr << " u64VarField=" << obj.u64VarField + << " u64FixedField=" << obj.u64FixedField + << " u64TaggedField=" << obj.u64TaggedField << "\n"; + // Re-serialize and write back std::vector out; AppendSerialized(fory, obj, out); + + // Debug: print output bytes for inspection + std::cerr << "[DEBUG] Serialized " << out.size() << " bytes:\n"; + std::cerr << "[DEBUG] Hex: "; + for (size_t i = 0; i < std::min(out.size(), size_t(80)); ++i) { + std::cerr << std::hex << std::setw(2) << std::setfill('0') + << static_cast(out[i]); + } + std::cerr << std::dec << "\n"; + WriteFile(data_file, out); } @@ -2645,26 +2742,30 @@ void RunTestUnsignedSchemaCompatible(const std::string &data_file) { auto obj = ReadNext(fory, buffer); // Verify Group 1: Nullable fields (values from Java's non-nullable fields) - if (!obj.u8.has_value() || obj.u8.value() != 200) { - Fail("UnsignedSchemaCompatible: u8 should be 200"); + if (!obj.u8Field1.has_value() || obj.u8Field1.value() != 200) { + Fail("UnsignedSchemaCompatible: u8Field1 should be 200"); } - if (!obj.u16.has_value() || obj.u16.value() != 60000) { - Fail("UnsignedSchemaCompatible: u16 should be 60000"); + if (!obj.u16Field1.has_value() || obj.u16Field1.value() != 60000) { + Fail("UnsignedSchemaCompatible: u16Field1 should be 60000"); } - if (!obj.u32Var.has_value() || obj.u32Var.value() != 3000000000) { - Fail("UnsignedSchemaCompatible: u32Var should be 3000000000"); + if (!obj.u32VarField1.has_value() || obj.u32VarField1.value() != 3000000000) { + Fail("UnsignedSchemaCompatible: u32VarField1 should be 3000000000"); } - if (!obj.u32Fixed.has_value() || obj.u32Fixed.value() != 4000000000) { - Fail("UnsignedSchemaCompatible: u32Fixed should be 4000000000"); + if (!obj.u32FixedField1.has_value() || + obj.u32FixedField1.value() != 4000000000) { + Fail("UnsignedSchemaCompatible: u32FixedField1 should be 4000000000"); } - if (!obj.u64Var.has_value() || obj.u64Var.value() != 10000000000) { - Fail("UnsignedSchemaCompatible: u64Var should be 10000000000"); + if (!obj.u64VarField1.has_value() || + obj.u64VarField1.value() != 10000000000) { + Fail("UnsignedSchemaCompatible: u64VarField1 should be 10000000000"); } - if (!obj.u64Fixed.has_value() || obj.u64Fixed.value() != 15000000000) { - Fail("UnsignedSchemaCompatible: u64Fixed should be 15000000000"); + if (!obj.u64FixedField1.has_value() || + obj.u64FixedField1.value() != 15000000000) { + Fail("UnsignedSchemaCompatible: u64FixedField1 should be 15000000000"); } - if (!obj.u64Tagged.has_value() || obj.u64Tagged.value() != 1000000000) { - Fail("UnsignedSchemaCompatible: u64Tagged should be 1000000000"); + if (!obj.u64TaggedField1.has_value() || + obj.u64TaggedField1.value() != 1000000000) { + Fail("UnsignedSchemaCompatible: u64TaggedField1 should be 1000000000"); } // Verify Group 2: Non-nullable fields (values from Java's nullable fields) @@ -2689,8 +2790,9 @@ void RunTestUnsignedSchemaCompatible(const std::string &data_file) { std::to_string(obj.u64VarField2)); } if (obj.u64FixedField2 != 12000000000) { - Fail("UnsignedSchemaCompatible: u64FixedField2 should be 12000000000, got " + - std::to_string(obj.u64FixedField2)); + Fail( + "UnsignedSchemaCompatible: u64FixedField2 should be 12000000000, got " + + std::to_string(obj.u64FixedField2)); } if (obj.u64TaggedField2 != 500000000) { Fail("UnsignedSchemaCompatible: u64TaggedField2 should be 500000000, got " + diff --git a/cpp/fory/util/buffer.h b/cpp/fory/util/buffer.h index 76350a1864..2be892ea6b 100644 --- a/cpp/fory/util/buffer.h +++ b/cpp/fory/util/buffer.h @@ -441,6 +441,70 @@ class Buffer { return result; } + /// Read uint64_t using tagged encoding at given offset. + /// Similar to GetVarUint64 but for tagged encoding: + /// - If bit 0 is 0: read 4 bytes, return value >> 1 + /// - If bit 0 is 1: read 1 byte flag + 8 bytes uint64 + FORY_ALWAYS_INLINE uint64_t GetTaggedUint64(uint32_t offset, + uint32_t *readBytesLength) { + uint32_t i = *reinterpret_cast(data_ + offset); + if ((i & 0b1) != 0b1) { + *readBytesLength = 4; + return static_cast(i >> 1); + } else { + *readBytesLength = 9; + return *reinterpret_cast(data_ + offset + 1); + } + } + + /// Read int64_t using tagged encoding at given offset. + /// - If bit 0 is 0: read 4 bytes as signed int, return value >> 1 (arithmetic) + /// - If bit 0 is 1: read 1 byte flag + 8 bytes int64 + FORY_ALWAYS_INLINE int64_t GetTaggedInt64(uint32_t offset, + uint32_t *readBytesLength) { + int32_t i = *reinterpret_cast(data_ + offset); + if ((i & 0b1) != 0b1) { + *readBytesLength = 4; + return static_cast(i >> 1); // Arithmetic shift for signed + } else { + *readBytesLength = 9; + return *reinterpret_cast(data_ + offset + 1); + } + } + + /// Write uint64_t using tagged encoding at given offset. Returns bytes written. + /// - If value is in [0, 0x7fffffff]: write 4 bytes (value << 1), return 4 + /// - Otherwise: write 1 byte flag + 8 bytes uint64, return 9 + FORY_ALWAYS_INLINE uint32_t PutTaggedUint64(uint32_t offset, uint64_t value) { + constexpr uint64_t MAX_SMALL_VALUE = 0x7fffffff; // INT32_MAX as u64 + if (value <= MAX_SMALL_VALUE) { + *reinterpret_cast(data_ + offset) = + static_cast(value) << 1; + return 4; + } else { + data_[offset] = 0b1; + *reinterpret_cast(data_ + offset + 1) = value; + return 9; + } + } + + /// Write int64_t using tagged encoding at given offset. Returns bytes written. + /// - If value is in [-1073741824, 1073741823]: write 4 bytes (value << 1), return 4 + /// - Otherwise: write 1 byte flag + 8 bytes int64, return 9 + FORY_ALWAYS_INLINE uint32_t PutTaggedInt64(uint32_t offset, int64_t value) { + constexpr int64_t MIN_SMALL_VALUE = -1073741824; // -2^30 + constexpr int64_t MAX_SMALL_VALUE = 1073741823; // 2^30 - 1 + if (value >= MIN_SMALL_VALUE && value <= MAX_SMALL_VALUE) { + *reinterpret_cast(data_ + offset) = + static_cast(value) << 1; + return 4; + } else { + data_[offset] = 0b1; + *reinterpret_cast(data_ + offset + 1) = value; + return 9; + } + } + /// Write uint8_t value to buffer at current writer index. /// Automatically grows buffer and advances writer index. FORY_ALWAYS_INLINE void WriteUint8(uint8_t value) { From 67758cfdfca35a3440df3ef3b8ce6409a5e255a7 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 10 Jan 2026 17:11:43 +0800 Subject: [PATCH 24/44] refactor rust field meta config parse --- rust/fory-core/src/meta/type_meta.rs | 23 +- rust/fory-derive/src/object/field_meta.rs | 258 +++++++++++++++++----- rust/fory-derive/src/object/misc.rs | 52 +++-- rust/fory-derive/src/object/read.rs | 14 +- rust/fory-derive/src/object/util.rs | 166 ++++++++------ rust/fory-derive/src/object/write.rs | 3 +- rust/tests/tests/test_cross_language.rs | 12 +- 7 files changed, 361 insertions(+), 167 deletions(-) diff --git a/rust/fory-core/src/meta/type_meta.rs b/rust/fory-core/src/meta/type_meta.rs index de34dad5c8..4ead56cbb3 100644 --- a/rust/fory-core/src/meta/type_meta.rs +++ b/rust/fory-core/src/meta/type_meta.rs @@ -751,18 +751,25 @@ impl TypeMeta { fn assign_field_ids(type_info_current: &TypeInfo, field_infos: &mut [FieldInfo]) { if crate::util::ENABLE_FORY_DEBUG_OUTPUT { - eprintln!("[fory-debug] assign_field_ids called for type: {:?}", type_info_current.get_type_name()); + eprintln!( + "[fory-debug] assign_field_ids called for type: {:?}", + type_info_current.get_type_name() + ); for f in field_infos.iter() { - eprintln!("[fory-debug] remote field before assign: name={}, field_id={}, type={:?}", - f.field_name, f.field_id, f.field_type); + eprintln!( + "[fory-debug] remote field before assign: name={}, field_id={}, type={:?}", + f.field_name, f.field_id, f.field_type + ); } } let type_meta = type_info_current.get_type_meta(); let local_field_infos = type_meta.get_field_infos(); if crate::util::ENABLE_FORY_DEBUG_OUTPUT { for f in local_field_infos.iter() { - eprintln!("[fory-debug] local field: name={}, field_id={}, type={:?}", - f.field_name, f.field_id, f.field_type); + eprintln!( + "[fory-debug] local field: name={}, field_id={}, type={:?}", + f.field_name, f.field_id, f.field_type + ); } } @@ -817,8 +824,10 @@ impl TypeMeta { // Assign SORTED INDEX for matching in generated code field.field_id = sorted_index as i16; if crate::util::ENABLE_FORY_DEBUG_OUTPUT { - eprintln!("[fory-debug] matched field: name={}, assigned_field_id={}", - field.field_name, field.field_id); + eprintln!( + "[fory-debug] matched field: name={}, assigned_field_id={}", + field.field_name, field.field_id + ); } } } diff --git a/rust/fory-derive/src/object/field_meta.rs b/rust/fory-derive/src/object/field_meta.rs index d7ae18ec63..4ccb5db065 100644 --- a/rust/fory-derive/src/object/field_meta.rs +++ b/rust/fory-derive/src/object/field_meta.rs @@ -22,25 +22,17 @@ //! - `nullable`: Whether the field can be null (default: false, except Option/RcWeak/ArcWeak) //! - `ref`: Whether to enable reference tracking (default: false, except Rc/Arc/RcWeak/ArcWeak) //! - `skip`: Skip this field during serialization -//! - `compress`: For u32 fields: true (VAR_UINT32, default) or false (UINT32 fixed) -//! - `encoding`: For u64 fields: "varint" (default), "fixed", or "tagged" +//! - `compress`: For i32/u32 fields: true (VARINT32/VAR_UINT32) or false (INT32/UINT32 fixed) +//! - `encoding`: For i32/u32: "varint", "fixed"; for u64: "varint", "fixed", "tagged" +//! +//! Both `compress` and `encoding` are converted to a `type_id` internally. If both are +//! specified, they must not conflict. +use fory_core::types::TypeId; use quote::ToTokens; use std::collections::HashMap; use syn::{Field, GenericArgument, PathArguments, Type}; -/// Encoding type for u64 fields -#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] -pub enum U64Encoding { - /// Variable-length encoding (VAR_UINT64, type id 14) - default - #[default] - Varint, - /// Fixed 8-byte encoding (UINT64, type id 13) - Fixed, - /// Tagged variable-length encoding (TAGGED_UINT64, type id 15) - Tagged, -} - /// Represents parsed `#[fory(...)]` field attributes #[derive(Debug, Clone, Default)] pub struct ForyFieldMeta { @@ -49,13 +41,12 @@ pub struct ForyFieldMeta { /// Whether the field can be null (None = use type-based default) pub nullable: Option, /// Whether to enable reference tracking (None = use type-based default) - pub ref_tracking: Option, + pub r#ref: Option, /// Whether to skip this field entirely pub skip: bool, - /// For u32 fields: true = VAR_UINT32 (default), false = UINT32 (fixed) - pub compress: Option, - /// For u64 fields: encoding type (varint/fixed/tagged) - pub encoding: Option, + /// Explicit type ID for encoding (e.g., INT32 vs VARINT32, UINT32 vs VAR_UINT32, etc.) + /// This is set by `compress` or `encoding` attributes. + pub type_id: Option, } /// Type classification for determining default nullable/ref behavior @@ -97,8 +88,8 @@ impl ForyFieldMeta { /// Defaults: /// - `Rc`, `Arc`, `RcWeak`, `ArcWeak`: true (shared ownership types) /// - All other types: false - pub fn effective_ref_tracking(&self, type_class: FieldTypeClass) -> bool { - self.ref_tracking.unwrap_or(matches!( + pub fn effective_ref(&self, type_class: FieldTypeClass) -> bool { + self.r#ref.unwrap_or(matches!( type_class, FieldTypeClass::Rc | FieldTypeClass::Arc @@ -118,9 +109,26 @@ impl ForyFieldMeta { } } +/// Encoding specified via `compress` attribute +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum CompressEncoding { + Varint, + Fixed, +} + +/// Encoding specified via `encoding` attribute +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum ExplicitEncoding { + Varint, + Fixed, + Tagged, +} + /// Parse `#[fory(...)]` attributes from a field pub fn parse_field_meta(field: &Field) -> syn::Result { let mut meta = ForyFieldMeta::default(); + let mut compress_encoding: Option = None; + let mut explicit_encoding: Option = None; for attr in &field.attrs { if !attr.path().is_ident("fory") { @@ -140,32 +148,85 @@ pub fn parse_field_meta(field: &Field) -> syn::Result { meta.nullable = Some(value); } else if nested.path.is_ident("ref") { let value = parse_bool_or_flag(&nested)?; - meta.ref_tracking = Some(value); + meta.r#ref = Some(value); } else if nested.path.is_ident("skip") { meta.skip = true; } else if nested.path.is_ident("compress") { let value = parse_bool_or_flag(&nested)?; - meta.compress = Some(value); + compress_encoding = Some(if value { + CompressEncoding::Varint + } else { + CompressEncoding::Fixed + }); } else if nested.path.is_ident("encoding") { let lit: syn::LitStr = nested.value()?.parse()?; let encoding_str = lit.value(); - let encoding = match encoding_str.as_str() { - "varint" => U64Encoding::Varint, - "fixed" => U64Encoding::Fixed, - "tagged" => U64Encoding::Tagged, + explicit_encoding = Some(match encoding_str.as_str() { + "varint" => ExplicitEncoding::Varint, + "fixed" => ExplicitEncoding::Fixed, + "tagged" => ExplicitEncoding::Tagged, _ => { return Err(syn::Error::new( lit.span(), "encoding must be \"varint\", \"fixed\", or \"tagged\"", )); } - }; - meta.encoding = Some(encoding); + }); } Ok(()) })?; } + // Validate that compress and encoding don't conflict if both are specified + if let (Some(compress), Some(explicit)) = (compress_encoding, explicit_encoding) { + let compress_implies = match compress { + CompressEncoding::Varint => ExplicitEncoding::Varint, + CompressEncoding::Fixed => ExplicitEncoding::Fixed, + }; + // Only check conflict for varint/fixed (tagged is only for u64) + if explicit != ExplicitEncoding::Tagged && compress_implies != explicit { + let compress_str = match compress { + CompressEncoding::Varint => "true", + CompressEncoding::Fixed => "false", + }; + let encoding_str = match explicit { + ExplicitEncoding::Varint => "varint", + ExplicitEncoding::Fixed => "fixed", + ExplicitEncoding::Tagged => "tagged", + }; + return Err(syn::Error::new_spanned( + field, + format!( + "conflicting attributes: compress={} implies {} encoding, but encoding=\"{}\" was specified", + compress_str, + match compress { + CompressEncoding::Varint => "varint", + CompressEncoding::Fixed => "fixed", + }, + encoding_str + ), + )); + } + } + + // Convert encoding to type_id + // Priority: explicit_encoding > compress_encoding + // Note: The actual type_id depends on the field type (i32, u32, u64), but we store + // a "canonical" type_id here. The util.rs code will interpret it correctly. + if let Some(explicit) = explicit_encoding { + meta.type_id = Some(match explicit { + // For varint, we use the signed variant as canonical; util.rs adjusts for unsigned + ExplicitEncoding::Varint => TypeId::VARINT32 as i16, + ExplicitEncoding::Fixed => TypeId::INT32 as i16, + ExplicitEncoding::Tagged => TypeId::TAGGED_UINT64 as i16, + }); + } else if let Some(compress) = compress_encoding { + meta.type_id = Some(match compress { + CompressEncoding::Varint => TypeId::VARINT32 as i16, + CompressEncoding::Fixed => TypeId::INT32 as i16, + }); + } + Ok(meta) } @@ -283,13 +344,13 @@ pub fn classify_field_type(ty: &Type) -> FieldTypeClass { } } -/// Get nullable and ref tracking flags for a field based on its type and metadata +/// Get nullable and ref flags for a field based on its type and metadata #[allow(dead_code)] pub fn get_field_flags(field: &Field, meta: &ForyFieldMeta) -> (bool, bool) { let type_class = classify_field_type(&field.ty); let nullable = meta.effective_nullable(type_class); - let ref_tracking = meta.effective_ref_tracking(type_class); - (nullable, ref_tracking) + let ref_flag = meta.effective_ref(type_class); + (nullable, ref_flag) } /// Parse field metadata for all fields and validate @@ -339,7 +400,7 @@ mod tests { let meta = parse_field_meta(&field).unwrap(); assert_eq!(meta.id, Some(0)); assert_eq!(meta.nullable, None); - assert_eq!(meta.ref_tracking, None); + assert_eq!(meta.r#ref, None); assert!(!meta.skip); } @@ -352,7 +413,7 @@ mod tests { let meta = parse_field_meta(&field).unwrap(); assert_eq!(meta.id, Some(1)); assert_eq!(meta.nullable, Some(true)); - assert_eq!(meta.ref_tracking, Some(false)); + assert_eq!(meta.r#ref, Some(false)); } #[test] @@ -364,7 +425,7 @@ mod tests { let meta = parse_field_meta(&field).unwrap(); assert_eq!(meta.id, Some(2)); assert_eq!(meta.nullable, Some(true)); - assert_eq!(meta.ref_tracking, Some(true)); + assert_eq!(meta.r#ref, Some(true)); } #[test] @@ -469,19 +530,19 @@ mod tests { } #[test] - fn test_effective_ref_tracking_defaults() { + fn test_effective_ref_defaults() { let meta = ForyFieldMeta::default(); // Rc, Arc, and RcWeak/ArcWeak have ref tracking by default - assert!(meta.effective_ref_tracking(FieldTypeClass::Rc)); - assert!(meta.effective_ref_tracking(FieldTypeClass::Arc)); - assert!(meta.effective_ref_tracking(FieldTypeClass::RcWeak)); - assert!(meta.effective_ref_tracking(FieldTypeClass::ArcWeak)); + assert!(meta.effective_ref(FieldTypeClass::Rc)); + assert!(meta.effective_ref(FieldTypeClass::Arc)); + assert!(meta.effective_ref(FieldTypeClass::RcWeak)); + assert!(meta.effective_ref(FieldTypeClass::ArcWeak)); // All others don't have ref tracking by default - assert!(!meta.effective_ref_tracking(FieldTypeClass::Primitive)); - assert!(!meta.effective_ref_tracking(FieldTypeClass::Option)); - assert!(!meta.effective_ref_tracking(FieldTypeClass::Other)); + assert!(!meta.effective_ref(FieldTypeClass::Primitive)); + assert!(!meta.effective_ref(FieldTypeClass::Option)); + assert!(!meta.effective_ref(FieldTypeClass::Other)); } #[test] @@ -490,10 +551,9 @@ mod tests { let meta = ForyFieldMeta { id: Some(0), nullable: Some(true), - ref_tracking: None, + r#ref: None, skip: false, - compress: None, - encoding: None, + type_id: None, }; assert!(meta.effective_nullable(FieldTypeClass::Primitive)); // Would be false by default @@ -501,79 +561,159 @@ mod tests { let meta = ForyFieldMeta { id: Some(0), nullable: None, - ref_tracking: Some(false), + r#ref: Some(false), skip: false, - compress: None, - encoding: None, + type_id: None, }; - assert!(!meta.effective_ref_tracking(FieldTypeClass::Rc)); // Would be true by default + assert!(!meta.effective_ref(FieldTypeClass::Rc)); // Would be true by default } #[test] fn test_parse_compress_attribute() { + // compress=false sets type_id to INT32 (fixed encoding) let field: Field = parse_quote! { #[fory(compress = false)] value: u32 }; let meta = parse_field_meta(&field).unwrap(); - assert_eq!(meta.compress, Some(false)); + assert_eq!(meta.type_id, Some(TypeId::INT32 as i16)); + // compress=true sets type_id to VARINT32 (variable encoding) let field: Field = parse_quote! { #[fory(compress = true)] value: u32 }; let meta = parse_field_meta(&field).unwrap(); - assert_eq!(meta.compress, Some(true)); + assert_eq!(meta.type_id, Some(TypeId::VARINT32 as i16)); - // Standalone compress flag should be true + // Standalone compress flag should set to varint let field: Field = parse_quote! { #[fory(compress)] value: u32 }; let meta = parse_field_meta(&field).unwrap(); - assert_eq!(meta.compress, Some(true)); + assert_eq!(meta.type_id, Some(TypeId::VARINT32 as i16)); } #[test] fn test_parse_encoding_attribute() { + // encoding="varint" sets type_id to VARINT32 let field: Field = parse_quote! { #[fory(encoding = "varint")] value: u64 }; let meta = parse_field_meta(&field).unwrap(); - assert_eq!(meta.encoding, Some(U64Encoding::Varint)); + assert_eq!(meta.type_id, Some(TypeId::VARINT32 as i16)); + // encoding="fixed" sets type_id to INT32 let field: Field = parse_quote! { #[fory(encoding = "fixed")] value: u64 }; let meta = parse_field_meta(&field).unwrap(); - assert_eq!(meta.encoding, Some(U64Encoding::Fixed)); + assert_eq!(meta.type_id, Some(TypeId::INT32 as i16)); + // encoding="tagged" sets type_id to TAGGED_UINT64 let field: Field = parse_quote! { #[fory(encoding = "tagged")] value: u64 }; let meta = parse_field_meta(&field).unwrap(); - assert_eq!(meta.encoding, Some(U64Encoding::Tagged)); + assert_eq!(meta.type_id, Some(TypeId::TAGGED_UINT64 as i16)); + } + + #[test] + fn test_parse_encoding_for_i32_u32() { + // encoding="varint" for i32/u32 + let field: Field = parse_quote! { + #[fory(encoding = "varint")] + value: i32 + }; + let meta = parse_field_meta(&field).unwrap(); + assert_eq!(meta.type_id, Some(TypeId::VARINT32 as i16)); + + // encoding="fixed" for i32/u32 + let field: Field = parse_quote! { + #[fory(encoding = "fixed")] + value: u32 + }; + let meta = parse_field_meta(&field).unwrap(); + assert_eq!(meta.type_id, Some(TypeId::INT32 as i16)); + } + + #[test] + fn test_compress_encoding_no_conflict() { + // compress=true with encoding="varint" - no conflict + let field: Field = parse_quote! { + #[fory(compress = true, encoding = "varint")] + value: i32 + }; + let meta = parse_field_meta(&field); + assert!(meta.is_ok()); + let meta = meta.unwrap(); + assert_eq!(meta.type_id, Some(TypeId::VARINT32 as i16)); + + // compress=false with encoding="fixed" - no conflict + let field: Field = parse_quote! { + #[fory(compress = false, encoding = "fixed")] + value: u32 + }; + let meta = parse_field_meta(&field); + assert!(meta.is_ok()); + let meta = meta.unwrap(); + assert_eq!(meta.type_id, Some(TypeId::INT32 as i16)); + } + + #[test] + fn test_compress_encoding_conflict() { + // compress=true with encoding="fixed" - conflict! + let field: Field = parse_quote! { + #[fory(compress = true, encoding = "fixed")] + value: i32 + }; + let result = parse_field_meta(&field); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.to_string().contains("conflicting")); + + // compress=false with encoding="varint" - conflict! + let field: Field = parse_quote! { + #[fory(compress = false, encoding = "varint")] + value: u32 + }; + let result = parse_field_meta(&field); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.to_string().contains("conflicting")); } #[test] fn test_parse_combined_attributes() { + // nullable with compress=false let field: Field = parse_quote! { #[fory(nullable, compress = false)] value: Option }; let meta = parse_field_meta(&field).unwrap(); assert_eq!(meta.nullable, Some(true)); - assert_eq!(meta.compress, Some(false)); + assert_eq!(meta.type_id, Some(TypeId::INT32 as i16)); + // nullable with encoding="tagged" (for u64) let field: Field = parse_quote! { #[fory(nullable, encoding = "tagged")] value: Option }; let meta = parse_field_meta(&field).unwrap(); assert_eq!(meta.nullable, Some(true)); - assert_eq!(meta.encoding, Some(U64Encoding::Tagged)); + assert_eq!(meta.type_id, Some(TypeId::TAGGED_UINT64 as i16)); + + // encoding="fixed" for Option + let field: Field = parse_quote! { + #[fory(nullable, encoding = "fixed")] + value: Option + }; + let meta = parse_field_meta(&field).unwrap(); + assert_eq!(meta.nullable, Some(true)); + assert_eq!(meta.type_id, Some(TypeId::INT32 as i16)); } } diff --git a/rust/fory-derive/src/object/misc.rs b/rust/fory-derive/src/object/misc.rs index f3581a907e..34d84c3c16 100644 --- a/rust/fory-derive/src/object/misc.rs +++ b/rust/fory-derive/src/object/misc.rs @@ -21,7 +21,7 @@ use std::sync::atomic::{AtomicU32, Ordering}; use syn::Field; use super::field_meta::{ - classify_field_type, extract_option_inner_type, is_option_type, parse_field_meta, U64Encoding, + classify_field_type, extract_option_inner_type, is_option_type, parse_field_meta, }; use super::util::{ classify_trait_object_field, generic_tree_to_tokens, get_filtered_source_fields_iter, @@ -89,7 +89,7 @@ pub fn gen_field_fields_info(source_fields: &[SourceField<'_>]) -> TokenStream { // but we also need to detect that the outer wrapper is Option for nullable. let is_outer_option = is_option_type(ty); let nullable = meta.effective_nullable(type_class) || is_outer_option; - let ref_tracking = meta.effective_ref_tracking(type_class); + let ref_tracking = meta.effective_ref(type_class); // Only use explicit field ID when user sets #[fory(id = N)] // Otherwise use -1 to indicate field name encoding should be used let field_id = if meta.uses_tag_id() { @@ -100,35 +100,47 @@ pub fn gen_field_fields_info(source_fields: &[SourceField<'_>]) -> TokenStream { match classify_trait_object_field(ty) { StructField::None => { - // Check if this is a u32/u64 field (or Option/Option) with encoding attributes + // Check if this is an i32/u32/u64 field (or Option/Option/Option) with encoding attributes // In this case, we need to generate the FieldType with the correct type ID directly let inner_ty = extract_option_inner_type(ty).unwrap_or_else(|| ty.clone()); let inner_ty_str = quote::ToTokens::to_token_stream(&inner_ty) .to_string() .replace(' ', ""); - let is_u32_with_encoding = inner_ty_str == "u32" && meta.compress.is_some(); - let is_u64_with_encoding = inner_ty_str == "u64" && meta.encoding.is_some(); + let has_encoding = + (inner_ty_str == "i32" || inner_ty_str == "u32" || inner_ty_str == "u64") + && meta.type_id.is_some(); - if is_u32_with_encoding || is_u64_with_encoding { - // Generate FieldType directly with the correct type ID - let type_id_ts = if is_u32_with_encoding { - if meta.compress == Some(false) { + if has_encoding { + // Generate FieldType directly with the correct type ID based on meta.type_id + let type_id_ts = match (inner_ty_str.as_str(), meta.type_id) { + // i32: VARINT32 (default) or INT32 (fixed) + ("i32", Some(tid)) if tid == fory_core::types::TypeId::INT32 as i16 => { + quote! { fory_core::types::TypeId::INT32 as u32 } + } + ("i32", _) => { + quote! { fory_core::types::TypeId::VARINT32 as u32 } + } + // u32: VAR_UINT32 (default) or UINT32 (fixed) + ("u32", Some(tid)) if tid == fory_core::types::TypeId::INT32 as i16 => { quote! { fory_core::types::TypeId::UINT32 as u32 } - } else { + } + ("u32", _) => { quote! { fory_core::types::TypeId::VAR_UINT32 as u32 } } - } else { - // u64 with encoding attribute - match meta.encoding { - Some(U64Encoding::Fixed) => { - quote! { fory_core::types::TypeId::UINT64 as u32 } - } - Some(U64Encoding::Tagged) => { - quote! { fory_core::types::TypeId::TAGGED_UINT64 as u32 } - } - _ => quote! { fory_core::types::TypeId::VAR_UINT64 as u32 }, + // u64: VAR_UINT64 (default), UINT64 (fixed), or TAGGED_UINT64 (tagged) + ("u64", Some(tid)) if tid == fory_core::types::TypeId::INT32 as i16 => { + quote! { fory_core::types::TypeId::UINT64 as u32 } + } + ("u64", Some(tid)) + if tid == fory_core::types::TypeId::TAGGED_UINT64 as i16 => + { + quote! { fory_core::types::TypeId::TAGGED_UINT64 as u32 } + } + ("u64", _) => { + quote! { fory_core::types::TypeId::VAR_UINT64 as u32 } } + _ => unreachable!(), }; quote! { diff --git a/rust/fory-derive/src/object/read.rs b/rust/fory-derive/src/object/read.rs index a98a0e9f51..a76156cd7b 100644 --- a/rust/fory-derive/src/object/read.rs +++ b/rust/fory-derive/src/object/read.rs @@ -127,10 +127,7 @@ fn gen_compatible_unsigned_read( /// Generate compatible mode read code for u8/u16 Option fields /// These need special handling because when remote field is non-nullable, /// Java sends just the raw bytes without a ref flag -fn gen_compatible_primitive_option_read( - prim_type: &str, - var_name: &Ident, -) -> TokenStream { +fn gen_compatible_primitive_option_read(prim_type: &str, var_name: &Ident) -> TokenStream { let read_value = match prim_type { "u8" => quote! { context.reader.read_u8()? }, "u16" => quote! { context.reader.read_u16()? }, @@ -379,7 +376,8 @@ pub fn gen_read_field(field: &Field, private_ident: &Ident, field_name: &str) -> } else { // Numeric primitives: use direct buffer methods // For u32/u64, consider encoding attributes - let reader_method = get_primitive_reader_method_with_encoding(&type_name, &meta); + let reader_method = + get_primitive_reader_method_with_encoding(&type_name, &meta); let reader_ident = syn::Ident::new(reader_method, proc_macro2::Span::call_site()); quote! { @@ -692,7 +690,11 @@ pub(crate) fn gen_read_compatible_match_arm_body( // Check if this is a u32/u64 field that needs encoding-aware reading if let Some(unsigned_type) = is_unsigned_encoding_type(ty) { - gen_compatible_unsigned_read(unsigned_type, var_name, is_option_type || dec_by_option) + gen_compatible_unsigned_read( + unsigned_type, + var_name, + is_option_type || dec_by_option, + ) } else if is_option_type { // Check if it's Option or Option which need special handling if let Some(prim_type) = is_compatible_primitive_type(ty) { diff --git a/rust/fory-derive/src/object/util.rs b/rust/fory-derive/src/object/util.rs index 019c2f9c55..33a3bc56fe 100644 --- a/rust/fory-derive/src/object/util.rs +++ b/rust/fory-derive/src/object/util.rs @@ -762,36 +762,52 @@ pub(super) fn get_primitive_writer_method(type_name: &str) -> &'static str { /// Get the writer method name for a primitive numeric type, considering encoding attributes. /// +/// For i32 fields: +/// - type_id=VARINT32 (default): write_varint32 +/// - type_id=INT32: write_i32 (fixed 4-byte) +/// /// For u32 fields: -/// - compress=true (default): write_varuint32 -/// - compress=false: write_u32 (fixed 4-byte) +/// - type_id=VARINT32/VAR_UINT32 (default): write_varuint32 +/// - type_id=INT32/UINT32: write_u32 (fixed 4-byte) /// /// For u64 fields: -/// - encoding="varint" (default): write_varuint64 -/// - encoding="fixed": write_u64 (fixed 8-byte) -/// - encoding="tagged": write_tagged_varuint64 +/// - type_id=VARINT32/VAR_UINT64 (default): write_varuint64 +/// - type_id=INT32/UINT64: write_u64 (fixed 8-byte) +/// - type_id=TAGGED_UINT64: write_tagged_u64 pub(super) fn get_primitive_writer_method_with_encoding( type_name: &str, meta: &super::field_meta::ForyFieldMeta, ) -> &'static str { - use super::field_meta::U64Encoding; + use fory_core::types::TypeId; - // Handle u32 with compress attribute + // Handle i32 with type_id + if type_name == "i32" { + if let Some(type_id) = meta.type_id { + if type_id == TypeId::INT32 as i16 { + return "write_i32"; // Fixed 4-byte encoding + } + } + return "write_varint32"; // Variable-length (default) + } + + // Handle u32 with type_id if type_name == "u32" { - if let Some(false) = meta.compress { - return "write_u32"; // Fixed 4-byte encoding + if let Some(type_id) = meta.type_id { + if type_id == TypeId::INT32 as i16 || type_id == TypeId::UINT32 as i16 { + return "write_u32"; // Fixed 4-byte encoding + } } return "write_varuint32"; // Variable-length (default) } - // Handle u64 with encoding attribute + // Handle u64 with type_id if type_name == "u64" { - if let Some(encoding) = meta.encoding { - return match encoding { - U64Encoding::Varint => "write_varuint64", - U64Encoding::Fixed => "write_u64", - U64Encoding::Tagged => "write_tagged_u64", - }; + if let Some(type_id) = meta.type_id { + if type_id == TypeId::INT32 as i16 || type_id == TypeId::UINT64 as i16 { + return "write_u64"; // Fixed 8-byte encoding + } else if type_id == TypeId::TAGGED_UINT64 as i16 { + return "write_tagged_u64"; // Tagged variable-length + } } return "write_varuint64"; // Variable-length (default) } @@ -812,36 +828,52 @@ pub(super) fn get_primitive_reader_method(type_name: &str) -> &'static str { /// Get the reader method name for a primitive numeric type, considering encoding attributes. /// +/// For i32 fields: +/// - type_id=VARINT32 (default): read_varint32 +/// - type_id=INT32: read_i32 (fixed 4-byte) +/// /// For u32 fields: -/// - compress=true (default): read_varuint32 -/// - compress=false: read_u32 (fixed 4-byte) +/// - type_id=VARINT32/VAR_UINT32 (default): read_varuint32 +/// - type_id=INT32/UINT32: read_u32 (fixed 4-byte) /// /// For u64 fields: -/// - encoding="varint" (default): read_varuint64 -/// - encoding="fixed": read_u64 (fixed 8-byte) -/// - encoding="tagged": read_tagged_varuint64 +/// - type_id=VARINT32/VAR_UINT64 (default): read_varuint64 +/// - type_id=INT32/UINT64: read_u64 (fixed 8-byte) +/// - type_id=TAGGED_UINT64: read_tagged_u64 pub(super) fn get_primitive_reader_method_with_encoding( type_name: &str, meta: &super::field_meta::ForyFieldMeta, ) -> &'static str { - use super::field_meta::U64Encoding; + use fory_core::types::TypeId; + + // Handle i32 with type_id + if type_name == "i32" { + if let Some(type_id) = meta.type_id { + if type_id == TypeId::INT32 as i16 { + return "read_i32"; // Fixed 4-byte encoding + } + } + return "read_varint32"; // Variable-length (default) + } - // Handle u32 with compress attribute + // Handle u32 with type_id if type_name == "u32" { - if let Some(false) = meta.compress { - return "read_u32"; // Fixed 4-byte encoding + if let Some(type_id) = meta.type_id { + if type_id == TypeId::INT32 as i16 || type_id == TypeId::UINT32 as i16 { + return "read_u32"; // Fixed 4-byte encoding + } } return "read_varuint32"; // Variable-length (default) } - // Handle u64 with encoding attribute + // Handle u64 with type_id if type_name == "u64" { - if let Some(encoding) = meta.encoding { - return match encoding { - U64Encoding::Varint => "read_varuint64", - U64Encoding::Fixed => "read_u64", - U64Encoding::Tagged => "read_tagged_u64", - }; + if let Some(type_id) = meta.type_id { + if type_id == TypeId::INT32 as i16 || type_id == TypeId::UINT64 as i16 { + return "read_u64"; // Fixed 8-byte encoding + } else if type_id == TypeId::TAGGED_UINT64 as i16 { + return "read_tagged_u64"; // Tagged variable-length + } } return "read_varuint64"; // Variable-length (default) } @@ -850,19 +882,17 @@ pub(super) fn get_primitive_reader_method_with_encoding( get_primitive_reader_method(type_name) } -/// Check if a type is Option or Option that needs encoding-aware handling -/// based on the field metadata (compress or encoding attributes). +/// Check if a type is Option, Option, or Option that needs encoding-aware handling +/// based on the field metadata (type_id attribute). pub(super) fn is_option_encoding_primitive( ty: &Type, meta: &super::field_meta::ForyFieldMeta, ) -> bool { if let Some(inner_name) = get_option_inner_primitive_name(ty) { - // For u32, check compress attribute - if inner_name == "u32" && meta.compress.is_some() { - return true; - } - // For u64, check encoding attribute - if inner_name == "u64" && meta.encoding.is_some() { + // For i32/u32/u64, check if type_id is set + if (inner_name == "i32" || inner_name == "u32" || inner_name == "u64") + && meta.type_id.is_some() + { return true; } } @@ -1267,40 +1297,40 @@ struct FieldFingerprintInfo { is_option_type: bool, } -/// Adjusts type ID based on encoding attributes for u32/u64 fields. +/// Adjusts type ID based on encoding attributes for i32/u32/u64 fields. /// -/// For u32 fields: -/// - compress=true (default): VAR_UINT32 (12) -/// - compress=false: UINT32 (11, fixed) -/// -/// For u64 fields: -/// - encoding="varint" (default): VAR_UINT64 (14) -/// - encoding="fixed": UINT64 (13, fixed 8-byte) -/// - encoding="tagged": TAGGED_UINT64 (15) -fn adjust_type_id_for_encoding( - base_type_id: u32, - meta: &super::field_meta::ForyFieldMeta, -) -> u32 { - use super::field_meta::U64Encoding; +/// The type_id in meta represents the desired encoding: +/// - VARINT32: variable-length for i32/u32 +/// - INT32: fixed 4-byte for i32, u32 +/// - TAGGED_UINT64: tagged variable-length for u64 +fn adjust_type_id_for_encoding(base_type_id: u32, meta: &super::field_meta::ForyFieldMeta) -> u32 { + // If no explicit type_id is set, use the base type_id + let Some(explicit_type_id) = meta.type_id else { + return base_type_id; + }; + + // Handle i32 fields + if base_type_id == TypeId::VARINT32 as u32 { + if explicit_type_id == TypeId::INT32 as i16 { + return TypeId::INT32 as u32; // Fixed 4-byte encoding + } + return base_type_id; // VARINT32 (default) + } - // Handle u32 fields with compress attribute + // Handle u32 fields if base_type_id == TypeId::VAR_UINT32 as u32 { - if let Some(compress) = meta.compress { - if !compress { - return TypeId::UINT32 as u32; // Fixed 4-byte encoding - } + if explicit_type_id == TypeId::INT32 as i16 { + return TypeId::UINT32 as u32; // Fixed 4-byte encoding } return base_type_id; // VAR_UINT32 (default) } - // Handle u64 fields with encoding attribute + // Handle u64 fields if base_type_id == TypeId::VAR_UINT64 as u32 { - if let Some(encoding) = meta.encoding { - return match encoding { - U64Encoding::Varint => TypeId::VAR_UINT64 as u32, - U64Encoding::Fixed => TypeId::UINT64 as u32, - U64Encoding::Tagged => TypeId::TAGGED_UINT64 as u32, - }; + if explicit_type_id == TypeId::INT32 as i16 { + return TypeId::UINT64 as u32; // Fixed 8-byte encoding + } else if explicit_type_id == TypeId::TAGGED_UINT64 as i16 { + return TypeId::TAGGED_UINT64 as u32; // Tagged variable-length } return base_type_id; // VAR_UINT64 (default) } @@ -1332,7 +1362,7 @@ fn compute_struct_fingerprint(fields: &[&Field]) -> String { }; let type_class = classify_field_type(&field.ty); - let ref_tracking = meta.effective_ref_tracking(type_class); + let ref_tracking = meta.effective_ref(type_class); let explicit_nullable = meta.nullable; // Get compile-time TypeId, considering encoding attributes for u32/u64 fields @@ -1441,7 +1471,7 @@ pub(crate) fn determine_field_ref_mode(field: &syn::Field) -> FieldRefMode { let meta = parse_field_meta(field).unwrap_or_default(); let type_class = classify_field_type(&field.ty); let nullable = meta.effective_nullable(type_class); - let ref_tracking = meta.effective_ref_tracking(type_class); + let ref_tracking = meta.effective_ref(type_class); if ref_tracking { FieldRefMode::Tracking diff --git a/rust/fory-derive/src/object/write.rs b/rust/fory-derive/src/object/write.rs index f6f47224f1..8300e8784b 100644 --- a/rust/fory-derive/src/object/write.rs +++ b/rust/fory-derive/src/object/write.rs @@ -282,7 +282,8 @@ fn gen_write_field_impl( } else { // Numeric primitives: use direct buffer methods // For u32/u64, consider encoding attributes - let writer_method = get_primitive_writer_method_with_encoding(&type_name, &meta); + let writer_method = + get_primitive_writer_method_with_encoding(&type_name, &meta); let writer_ident = syn::Ident::new(writer_method, proc_macro2::Span::call_site()); // For primitives: diff --git a/rust/tests/tests/test_cross_language.rs b/rust/tests/tests/test_cross_language.rs index c988508a8b..6022673828 100644 --- a/rust/tests/tests/test_cross_language.rs +++ b/rust/tests/tests/test_cross_language.rs @@ -1897,14 +1897,14 @@ fn test_circular_ref_compatible() { #[fory(debug)] struct UnsignedSchemaConsistent { // Primitive unsigned fields (non-nullable, use Field suffix to avoid reserved keywords) - u8_field: u8, // UINT8 - fixed 8-bit - u16_field: u16, // UINT16 - fixed 16-bit - u32_var_field: u32, // VAR_UINT32 - variable-length (default) + u8_field: u8, // UINT8 - fixed 8-bit + u16_field: u16, // UINT16 - fixed 16-bit + u32_var_field: u32, // VAR_UINT32 - variable-length (default) #[fory(compress = false)] - u32_fixed_field: u32, // UINT32 - fixed 4-byte - u64_var_field: u64, // VAR_UINT64 - variable-length (default) + u32_fixed_field: u32, // UINT32 - fixed 4-byte + u64_var_field: u64, // VAR_UINT64 - variable-length (default) #[fory(encoding = "fixed")] - u64_fixed_field: u64, // UINT64 - fixed 8-byte + u64_fixed_field: u64, // UINT64 - fixed 8-byte #[fory(encoding = "tagged")] u64_tagged_field: u64, // TAGGED_UINT64 From aaa66e8d9e32c3052641c6a5430628e8a803a7a3 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 10 Jan 2026 17:12:07 +0800 Subject: [PATCH 25/44] update go test --- go/fory/tests/xlang/xlang_test_main.go | 118 ++++++++++++------------- 1 file changed, 59 insertions(+), 59 deletions(-) diff --git a/go/fory/tests/xlang/xlang_test_main.go b/go/fory/tests/xlang/xlang_test_main.go index 282b58b1de..e2e31b69b6 100644 --- a/go/fory/tests/xlang/xlang_test_main.go +++ b/go/fory/tests/xlang/xlang_test_main.go @@ -2170,23 +2170,23 @@ type UnsignedSchemaConsistentSimple struct { } type UnsignedSchemaConsistent struct { - // Primitive unsigned fields (non-nullable) - U8 uint8 // UINT8 - fixed 8-bit - U16 uint16 // UINT16 - fixed 16-bit - U32Var uint32 `fory:"compress=true"` // VAR_UINT32 - variable-length - U32Fixed uint32 `fory:"compress=false"` // UINT32 - fixed 4-byte - U64Var uint64 `fory:"encoding=varint"` // VAR_UINT64 - variable-length - U64Fixed uint64 `fory:"encoding=fixed"` // UINT64 - fixed 8-byte - U64Tagged uint64 `fory:"encoding=tagged"` // TAGGED_UINT64 - tagged encoding + // Primitive unsigned fields (non-nullable, use Field suffix to avoid reserved keywords) + U8Field uint8 // UINT8 - fixed 8-bit + U16Field uint16 // UINT16 - fixed 16-bit + U32VarField uint32 `fory:"compress=true"` // VAR_UINT32 - variable-length + U32FixedField uint32 `fory:"compress=false"` // UINT32 - fixed 4-byte + U64VarField uint64 `fory:"encoding=varint"` // VAR_UINT64 - variable-length + U64FixedField uint64 `fory:"encoding=fixed"` // UINT64 - fixed 8-byte + U64TaggedField uint64 `fory:"encoding=tagged"` // TAGGED_UINT64 - tagged encoding // Nullable unsigned fields (pointers) - U8Nullable *uint8 `fory:"nullable"` - U16Nullable *uint16 `fory:"nullable"` - U32VarNullable *uint32 `fory:"nullable,compress=true"` - U32FixedNullable *uint32 `fory:"nullable,compress=false"` - U64VarNullable *uint64 `fory:"nullable,encoding=varint"` - U64FixedNullable *uint64 `fory:"nullable,encoding=fixed"` - U64TaggedNullable *uint64 `fory:"nullable,encoding=tagged"` + U8NullableField *uint8 `fory:"nullable"` + U16NullableField *uint16 `fory:"nullable"` + U32VarNullableField *uint32 `fory:"nullable,compress=true"` + U32FixedNullableField *uint32 `fory:"nullable,compress=false"` + U64VarNullableField *uint64 `fory:"nullable,encoding=varint"` + U64FixedNullableField *uint64 `fory:"nullable,encoding=fixed"` + U64TaggedNullableField *uint64 `fory:"nullable,encoding=tagged"` } // UnsignedSchemaCompatible - Test struct for unsigned numbers in COMPATIBLE mode. @@ -2195,17 +2195,17 @@ type UnsignedSchemaConsistent struct { // Matches Java's UnsignedSchemaCompatible (type id 502) type UnsignedSchemaCompatible struct { // Group 1: Nullable in Go (pointers), non-nullable in Java - U8 *uint8 `fory:"nullable"` - U16 *uint16 `fory:"nullable"` - U32Var *uint32 `fory:"nullable,compress=true"` - U32Fixed *uint32 `fory:"nullable,compress=false"` - U64Var *uint64 `fory:"nullable,encoding=varint"` - U64Fixed *uint64 `fory:"nullable,encoding=fixed"` - U64Tagged *uint64 `fory:"nullable,encoding=tagged"` + U8Field1 *uint8 `fory:"nullable"` + U16Field1 *uint16 `fory:"nullable"` + U32VarField1 *uint32 `fory:"nullable,compress=true"` + U32FixedField1 *uint32 `fory:"nullable,compress=false"` + U64VarField1 *uint64 `fory:"nullable,encoding=varint"` + U64FixedField1 *uint64 `fory:"nullable,encoding=fixed"` + U64TaggedField1 *uint64 `fory:"nullable,encoding=tagged"` // Group 2: Non-nullable in Go, nullable in Java - U8Field2 uint8 - U16Field2 uint16 + U8Field2 uint8 + U16Field2 uint16 U32VarField2 uint32 `fory:"compress=true"` U32FixedField2 uint32 `fory:"compress=false"` U64VarField2 uint64 `fory:"encoding=varint"` @@ -2265,35 +2265,35 @@ func testUnsignedSchemaConsistent() { result := getUnsignedSchemaConsistent(obj) // Verify primitive unsigned fields - assertEqual(uint8(200), result.U8, "U8") - assertEqual(uint16(60000), result.U16, "U16") - assertEqual(uint32(3000000000), result.U32Var, "U32Var") - assertEqual(uint32(4000000000), result.U32Fixed, "U32Fixed") - assertEqual(uint64(10000000000), result.U64Var, "U64Var") - assertEqual(uint64(15000000000), result.U64Fixed, "U64Fixed") - assertEqual(uint64(1000000000), result.U64Tagged, "U64Tagged") + assertEqual(uint8(200), result.U8Field, "U8Field") + assertEqual(uint16(60000), result.U16Field, "U16Field") + assertEqual(uint32(3000000000), result.U32VarField, "U32VarField") + assertEqual(uint32(4000000000), result.U32FixedField, "U32FixedField") + assertEqual(uint64(10000000000), result.U64VarField, "U64VarField") + assertEqual(uint64(15000000000), result.U64FixedField, "U64FixedField") + assertEqual(uint64(1000000000), result.U64TaggedField, "U64TaggedField") // Verify nullable unsigned fields - if result.U8Nullable == nil || *result.U8Nullable != 128 { - panic(fmt.Sprintf("U8Nullable mismatch: expected 128, got %v", result.U8Nullable)) + if result.U8NullableField == nil || *result.U8NullableField != 128 { + panic(fmt.Sprintf("U8NullableField mismatch: expected 128, got %v", result.U8NullableField)) } - if result.U16Nullable == nil || *result.U16Nullable != 40000 { - panic(fmt.Sprintf("U16Nullable mismatch: expected 40000, got %v", result.U16Nullable)) + if result.U16NullableField == nil || *result.U16NullableField != 40000 { + panic(fmt.Sprintf("U16NullableField mismatch: expected 40000, got %v", result.U16NullableField)) } - if result.U32VarNullable == nil || *result.U32VarNullable != 2500000000 { - panic(fmt.Sprintf("U32VarNullable mismatch: expected 2500000000, got %v", result.U32VarNullable)) + if result.U32VarNullableField == nil || *result.U32VarNullableField != 2500000000 { + panic(fmt.Sprintf("U32VarNullableField mismatch: expected 2500000000, got %v", result.U32VarNullableField)) } - if result.U32FixedNullable == nil || *result.U32FixedNullable != 3500000000 { - panic(fmt.Sprintf("U32FixedNullable mismatch: expected 3500000000, got %v", result.U32FixedNullable)) + if result.U32FixedNullableField == nil || *result.U32FixedNullableField != 3500000000 { + panic(fmt.Sprintf("U32FixedNullableField mismatch: expected 3500000000, got %v", result.U32FixedNullableField)) } - if result.U64VarNullable == nil || *result.U64VarNullable != 8000000000 { - panic(fmt.Sprintf("U64VarNullable mismatch: expected 8000000000, got %v", result.U64VarNullable)) + if result.U64VarNullableField == nil || *result.U64VarNullableField != 8000000000 { + panic(fmt.Sprintf("U64VarNullableField mismatch: expected 8000000000, got %v", result.U64VarNullableField)) } - if result.U64FixedNullable == nil || *result.U64FixedNullable != 12000000000 { - panic(fmt.Sprintf("U64FixedNullable mismatch: expected 12000000000, got %v", result.U64FixedNullable)) + if result.U64FixedNullableField == nil || *result.U64FixedNullableField != 12000000000 { + panic(fmt.Sprintf("U64FixedNullableField mismatch: expected 12000000000, got %v", result.U64FixedNullableField)) } - if result.U64TaggedNullable == nil || *result.U64TaggedNullable != 500000000 { - panic(fmt.Sprintf("U64TaggedNullable mismatch: expected 500000000, got %v", result.U64TaggedNullable)) + if result.U64TaggedNullableField == nil || *result.U64TaggedNullableField != 500000000 { + panic(fmt.Sprintf("U64TaggedNullableField mismatch: expected 500000000, got %v", result.U64TaggedNullableField)) } serialized, err := f.Serialize(result) @@ -2323,26 +2323,26 @@ func testUnsignedSchemaCompatible() { result := getUnsignedSchemaCompatible(obj) // Verify Group 1: Nullable fields (values from Java's non-nullable fields) - if result.U8 == nil || *result.U8 != 200 { - panic(fmt.Sprintf("U8 mismatch: expected 200, got %v", result.U8)) + if result.U8Field1 == nil || *result.U8Field1 != 200 { + panic(fmt.Sprintf("U8Field1 mismatch: expected 200, got %v", result.U8Field1)) } - if result.U16 == nil || *result.U16 != 60000 { - panic(fmt.Sprintf("U16 mismatch: expected 60000, got %v", result.U16)) + if result.U16Field1 == nil || *result.U16Field1 != 60000 { + panic(fmt.Sprintf("U16Field1 mismatch: expected 60000, got %v", result.U16Field1)) } - if result.U32Var == nil || *result.U32Var != 3000000000 { - panic(fmt.Sprintf("U32Var mismatch: expected 3000000000, got %v", result.U32Var)) + if result.U32VarField1 == nil || *result.U32VarField1 != 3000000000 { + panic(fmt.Sprintf("U32VarField1 mismatch: expected 3000000000, got %v", result.U32VarField1)) } - if result.U32Fixed == nil || *result.U32Fixed != 4000000000 { - panic(fmt.Sprintf("U32Fixed mismatch: expected 4000000000, got %v", result.U32Fixed)) + if result.U32FixedField1 == nil || *result.U32FixedField1 != 4000000000 { + panic(fmt.Sprintf("U32FixedField1 mismatch: expected 4000000000, got %v", result.U32FixedField1)) } - if result.U64Var == nil || *result.U64Var != 10000000000 { - panic(fmt.Sprintf("U64Var mismatch: expected 10000000000, got %v", result.U64Var)) + if result.U64VarField1 == nil || *result.U64VarField1 != 10000000000 { + panic(fmt.Sprintf("U64VarField1 mismatch: expected 10000000000, got %v", result.U64VarField1)) } - if result.U64Fixed == nil || *result.U64Fixed != 15000000000 { - panic(fmt.Sprintf("U64Fixed mismatch: expected 15000000000, got %v", result.U64Fixed)) + if result.U64FixedField1 == nil || *result.U64FixedField1 != 15000000000 { + panic(fmt.Sprintf("U64FixedField1 mismatch: expected 15000000000, got %v", result.U64FixedField1)) } - if result.U64Tagged == nil || *result.U64Tagged != 1000000000 { - panic(fmt.Sprintf("U64Tagged mismatch: expected 1000000000, got %v", result.U64Tagged)) + if result.U64TaggedField1 == nil || *result.U64TaggedField1 != 1000000000 { + panic(fmt.Sprintf("U64TaggedField1 mismatch: expected 1000000000, got %v", result.U64TaggedField1)) } // Verify Group 2: Non-nullable fields (values from Java's nullable fields) From 9ed0723c6d5ce7ef1529eb40a2edab042dba9184 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 10 Jan 2026 17:19:00 +0800 Subject: [PATCH 26/44] format code --- AGENTS.md | 2 +- cpp/fory/meta/field.h | 3 +- cpp/fory/serialization/struct_serializer.h | 74 ++++++++++--------- cpp/fory/serialization/type_resolver.h | 9 ++- cpp/fory/serialization/xlang_test_main.cc | 69 ++++++++--------- cpp/fory/util/buffer.h | 20 +++-- .../specification/xlang_serialization_spec.md | 12 +-- .../apache/fory/annotation/Uint64Type.java | 11 ++- .../org/apache/fory/config/ForyBuilder.java | 7 +- .../org/apache/fory/config/LongEncoding.java | 4 +- .../java/org/apache/fory/meta/FieldTypes.java | 5 +- .../apache/fory/resolver/ClassResolver.java | 1 - .../fory/serializer/MetaSharedSerializer.java | 16 +++- .../java/org/apache/fory/type/Descriptor.java | 22 +----- .../main/java/org/apache/fory/type/Types.java | 45 +---------- .../org/apache/fory/xlang/XlangTestBase.java | 4 +- python/pyfory/tests/xlang_test_main.py | 24 ++++-- 17 files changed, 151 insertions(+), 177 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 334c581227..7ae97b7173 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -13,7 +13,7 @@ While working on Fory, please remember: - **Git-Tracked Files**: When reading code, skip all files not tracked by git by default unless generated by yourself. - **Cross-Language Consistency**: Maintain consistency across language implementations while respecting language-specific idioms. - **Graalvm Support using fory codegen**: For graalvm, please use `fory codegen` to generate the serializer when building graalvm native image, do not use graallvm reflect-related configuration unless for JDK `proxy`. -- **Xlang Type System**: Java `native mode(xlang=false)` shares same type systems between type id from `Types.BOOL~Types.STRING` with `xlang mode(xlang=true)`, but for other types, java `native mode` has different type ids. +- **Xlang Type System**: Java `native mode(xlang=false)` shares same type systems between type id from `Types.BOOL~Types.STRING` with `xlang mode(xlang=true)`, but for other types, java `native mode` has different type ids. ## Build and Development Commands diff --git a/cpp/fory/meta/field.h b/cpp/fory/meta/field.h index d9ae03b537..7bcdc4e049 100644 --- a/cpp/fory/meta/field.h +++ b/cpp/fory/meta/field.h @@ -259,7 +259,8 @@ template inline constexpr bool has_field_config_v = ForyFieldConfigImpl::has_config; /// Helper to get field encoding from ForyFieldConfigImpl -template struct GetFieldConfigEntry { +template +struct GetFieldConfigEntry { static constexpr Encoding encoding = Encoding::Default; static constexpr int16_t id = -1; static constexpr bool nullable = false; diff --git a/cpp/fory/serialization/struct_serializer.h b/cpp/fory/serialization/struct_serializer.h index e0ed8d8f69..29c1d31f6e 100644 --- a/cpp/fory/serialization/struct_serializer.h +++ b/cpp/fory/serialization/struct_serializer.h @@ -231,7 +231,8 @@ FORY_ALWAYS_INLINE uint32_t put_varint_at(T value, Buffer &buffer, return buffer.PutVarUint32(offset, static_cast(value)); } else if constexpr (std::is_same_v || std::is_same_v) { - // Unsigned 64-bit varint (no zigzag) - used for VAR_UINT64 and TAGGED_UINT64 + // Unsigned 64-bit varint (no zigzag) - used for VAR_UINT64 and + // TAGGED_UINT64 return buffer.PutVarUint64(offset, static_cast(value)); } else { static_assert(sizeof(T) == 0, "Unsupported varint type"); @@ -1624,8 +1625,8 @@ inline constexpr bool is_raw_primitive_v = is_raw_primitive::value; /// The caller must convert to the correct local type. template FORY_ALWAYS_INLINE TargetType read_primitive_by_type_id(ReadContext &ctx, - uint32_t type_id, - Error &error) { + uint32_t type_id, + Error &error) { // Read based on remote type_id encoding, then convert to TargetType switch (static_cast(type_id)) { case TypeId::BOOL: @@ -1637,7 +1638,8 @@ FORY_ALWAYS_INLINE TargetType read_primitive_by_type_id(ReadContext &ctx, case TypeId::INT16: return static_cast(ctx.read_int16(error)); case TypeId::UINT16: - return static_cast(static_cast(ctx.read_int16(error))); + return static_cast( + static_cast(ctx.read_int16(error))); case TypeId::INT32: // INT32 uses fixed encoding return static_cast(ctx.read_int32(error)); @@ -1646,7 +1648,8 @@ FORY_ALWAYS_INLINE TargetType read_primitive_by_type_id(ReadContext &ctx, return static_cast(ctx.read_varint32(error)); case TypeId::UINT32: // UINT32 uses fixed 4-byte encoding - return static_cast(static_cast(ctx.read_int32(error))); + return static_cast( + static_cast(ctx.read_int32(error))); case TypeId::VAR_UINT32: // VAR_UINT32 uses varint encoding return static_cast(ctx.read_varuint32(error)); @@ -1661,7 +1664,8 @@ FORY_ALWAYS_INLINE TargetType read_primitive_by_type_id(ReadContext &ctx, return static_cast(ctx.read_tagged_int64(error)); case TypeId::UINT64: // UINT64 uses fixed 8-byte encoding - return static_cast(static_cast(ctx.read_int64(error))); + return static_cast( + static_cast(ctx.read_int64(error))); case TypeId::VAR_UINT64: // VAR_UINT64 uses varint encoding return static_cast(ctx.read_varuint64(error)); @@ -1842,7 +1846,8 @@ void read_single_field_by_index(T &obj, ReadContext &ctx) { obj.*field_ptr = read_value(); } } else { - // Special handling for std::optional with encoding config + // Special handling for std::optional with encoding + // config constexpr bool is_encoded_optional_uint = ::fory::detail::has_field_config_v && (std::is_same_v> || @@ -1854,13 +1859,15 @@ void read_single_field_by_index(T &obj, ReadContext &ctx) { #ifdef ENABLE_FORY_DEBUG_OUTPUT std::cerr << "[DEBUG] is_encoded_optional_uint: Index=" << Index << ", enc=" << static_cast(enc) - << ", reader_index=" << ctx.buffer().reader_index() << std::endl; + << ", reader_index=" << ctx.buffer().reader_index() + << std::endl; #endif // Read nullable flag int8_t flag = ctx.read_int8(ctx.error()); #ifdef ENABLE_FORY_DEBUG_OUTPUT std::cerr << "[DEBUG] After read flag: flag=" << static_cast(flag) - << ", reader_index=" << ctx.buffer().reader_index() << std::endl; + << ", reader_index=" << ctx.buffer().reader_index() + << std::endl; #endif if (FORY_PREDICT_FALSE(ctx.has_error())) { return; @@ -1874,8 +1881,7 @@ void read_single_field_by_index(T &obj, ReadContext &ctx) { return; } // Read the value with encoding-aware reading - using InnerType = - typename std::remove_reference_t::value_type; + using InnerType = typename std::remove_reference_t::value_type; InnerType value; if constexpr (std::is_same_v) { if constexpr (enc == Encoding::Varint) { @@ -1971,24 +1977,24 @@ void read_single_field_by_index_compatible(T &obj, ReadContext &ctx, << ", buffer pos=" << ctx.buffer().reader_index() << std::endl; #endif - // In compatible mode, handle primitive fields specially to use remote encoding. - // This is critical for schema evolution where encoding differs between sender/receiver. + // In compatible mode, handle primitive fields specially to use remote + // encoding. This is critical for schema evolution where encoding differs + // between sender/receiver. constexpr bool is_raw_prim = is_raw_primitive_v; constexpr bool is_local_optional = is_optional_v; // Case 1: Local raw primitive, any remote ref mode - // For primitives, we must use remote_type_id encoding regardless of nullability + // For primitives, we must use remote_type_id encoding regardless of + // nullability if constexpr (is_raw_prim && is_primitive_field) { if (remote_ref_mode == RefMode::None) { // Remote is non-nullable, no ref flag if constexpr (is_fory_field_v) { - (obj.*field_ptr).value = - read_primitive_by_type_id(ctx, remote_type_id, - ctx.error()); + (obj.*field_ptr).value = read_primitive_by_type_id( + ctx, remote_type_id, ctx.error()); } else { - obj.*field_ptr = - read_primitive_by_type_id(ctx, remote_type_id, - ctx.error()); + obj.*field_ptr = read_primitive_by_type_id( + ctx, remote_type_id, ctx.error()); } return; } else { @@ -2005,13 +2011,11 @@ void read_single_field_by_index_compatible(T &obj, ReadContext &ctx, } // NOT_NULL_VALUE_FLAG or REF_VALUE_FLAG - read the value if constexpr (is_fory_field_v) { - (obj.*field_ptr).value = - read_primitive_by_type_id(ctx, remote_type_id, - ctx.error()); + (obj.*field_ptr).value = read_primitive_by_type_id( + ctx, remote_type_id, ctx.error()); } else { - obj.*field_ptr = - read_primitive_by_type_id(ctx, remote_type_id, - ctx.error()); + obj.*field_ptr = read_primitive_by_type_id( + ctx, remote_type_id, ctx.error()); } return; } @@ -2083,11 +2087,9 @@ void read_single_field_by_index_compatible(T &obj, ReadContext &ctx, /// Sets handled=true if field was matched. /// @param remote_type_id The type_id from the remote schema (for encoding) template -FORY_ALWAYS_INLINE void -dispatch_compatible_field_read_impl(T &obj, ReadContext &ctx, int16_t field_id, - RefMode remote_ref_mode, - uint32_t remote_type_id, bool &handled, - std::index_sequence) { +FORY_ALWAYS_INLINE void dispatch_compatible_field_read_impl( + T &obj, ReadContext &ctx, int16_t field_id, RefMode remote_ref_mode, + uint32_t remote_type_id, bool &handled, std::index_sequence) { using Helpers = CompileTimeFieldHelpers; // Short-circuit fold: stops at first match @@ -2252,7 +2254,8 @@ FORY_ALWAYS_INLINE T read_varint_at(Buffer &buffer, uint32_t &offset) { return raw; } else if constexpr (std::is_same_v || std::is_same_v) { - // Unsigned 64-bit varint (no zigzag) - used for VAR_UINT64 and TAGGED_UINT64 + // Unsigned 64-bit varint (no zigzag) - used for VAR_UINT64 and + // TAGGED_UINT64 uint64_t raw = buffer.GetVarUint64(offset, &bytes_read); offset += bytes_read; return raw; @@ -2445,10 +2448,9 @@ void read_struct_fields_compatible(T &obj, ReadContext &ctx, // Uses fold expression with short-circuit - no lambda overhead // Pass remote type_id for correct encoding in compatible mode bool handled = false; - dispatch_compatible_field_read_impl(obj, ctx, field_id, remote_ref_mode, - remote_field.field_type.type_id, - handled, - std::index_sequence{}); + dispatch_compatible_field_read_impl( + obj, ctx, field_id, remote_ref_mode, remote_field.field_type.type_id, + handled, std::index_sequence{}); if (!handled) { // Shouldn't happen if TypeMeta::assign_field_ids worked correctly diff --git a/cpp/fory/serialization/type_resolver.h b/cpp/fory/serialization/type_resolver.h index 403c7c2782..1ef0ffca07 100644 --- a/cpp/fory/serialization/type_resolver.h +++ b/cpp/fory/serialization/type_resolver.h @@ -569,7 +569,8 @@ template struct FieldInfoBuilder { const auto field_names = decltype(meta)::Names; const auto field_ptrs = decltype(meta)::Ptrs; - // Convert camelCase field name to snake_case for cross-language compatibility + // Convert camelCase field name to snake_case for cross-language + // compatibility std::string_view original_name = field_names[Index]; constexpr size_t max_snake_len = 128; // Reasonable max for field names auto [snake_buffer, snake_len] = @@ -592,7 +593,8 @@ template struct FieldInfoBuilder { FieldType field_type = FieldTypeBuilder::build(false); - // Override type_id for unsigned types based on encoding from FORY_FIELD_CONFIG + // Override type_id for unsigned types based on encoding from + // FORY_FIELD_CONFIG using InnerType = unwrap_optional_inner_t; constexpr uint32_t unsigned_tid = compute_unsigned_type_id(); @@ -1046,7 +1048,8 @@ TypeResolver::build_struct_type_info(uint32_t type_id, std::string ns, entry->name_to_index.reserve(field_count); for (size_t i = 0; i < field_count; ++i) { - // Convert camelCase field name to snake_case for cross-language compatibility + // Convert camelCase field name to snake_case for cross-language + // compatibility constexpr size_t max_snake_len = 128; auto [snake_buffer, snake_len] = ::fory::to_snake_case(field_names[i]); diff --git a/cpp/fory/serialization/xlang_test_main.cc b/cpp/fory/serialization/xlang_test_main.cc index e8f77224fe..73f42f9bae 100644 --- a/cpp/fory/serialization/xlang_test_main.cc +++ b/cpp/fory/serialization/xlang_test_main.cc @@ -588,8 +588,8 @@ FORY_FIELD_TAGS(CircularRefStruct, (name, 0), (selfRef, 1, nullable, ref)); // UnsignedSchemaConsistentSimple (type id 1) // A simple test struct for unsigned numbers with tagged encoding. struct UnsignedSchemaConsistentSimple { - uint64_t u64Tagged; // TAGGED_UINT64 - std::optional u64TaggedNullable; // TAGGED_UINT64, nullable + uint64_t u64Tagged; // TAGGED_UINT64 + std::optional u64TaggedNullable; // TAGGED_UINT64, nullable bool operator==(const UnsignedSchemaConsistentSimple &other) const { return u64Tagged == other.u64Tagged && @@ -597,10 +597,9 @@ struct UnsignedSchemaConsistentSimple { } }; FORY_STRUCT(UnsignedSchemaConsistentSimple, u64Tagged, u64TaggedNullable); -FORY_FIELD_CONFIG( - UnsignedSchemaConsistentSimple, - (u64Tagged, fory::F(0).tagged()), - (u64TaggedNullable, fory::F(1).nullable().tagged())); +FORY_FIELD_CONFIG(UnsignedSchemaConsistentSimple, + (u64Tagged, fory::F(0).tagged()), + (u64TaggedNullable, fory::F(1).nullable().tagged())); // UnsignedSchemaConsistent (type id 501) // Test struct for unsigned numbers in SCHEMA_CONSISTENT mode. @@ -647,18 +646,19 @@ FORY_STRUCT(UnsignedSchemaConsistent, u8Field, u16Field, u32VarField, u32FixedNullableField, u64VarNullableField, u64FixedNullableField, u64TaggedNullableField); // Use new FORY_FIELD_CONFIG with builder pattern for encoding specification -FORY_FIELD_CONFIG( - UnsignedSchemaConsistent, (u8Field, fory::F(0)), (u16Field, fory::F(1)), - (u32VarField, fory::F(2).varint()), (u32FixedField, fory::F(3).fixed()), - (u64VarField, fory::F(4).varint()), (u64FixedField, fory::F(5).fixed()), - (u64TaggedField, fory::F(6).tagged()), - (u8NullableField, fory::F(7).nullable()), - (u16NullableField, fory::F(8).nullable()), - (u32VarNullableField, fory::F(9).nullable().varint()), - (u32FixedNullableField, fory::F(10).nullable().fixed()), - (u64VarNullableField, fory::F(11).nullable().varint()), - (u64FixedNullableField, fory::F(12).nullable().fixed()), - (u64TaggedNullableField, fory::F(13).nullable().tagged())); +FORY_FIELD_CONFIG(UnsignedSchemaConsistent, (u8Field, fory::F(0)), + (u16Field, fory::F(1)), (u32VarField, fory::F(2).varint()), + (u32FixedField, fory::F(3).fixed()), + (u64VarField, fory::F(4).varint()), + (u64FixedField, fory::F(5).fixed()), + (u64TaggedField, fory::F(6).tagged()), + (u8NullableField, fory::F(7).nullable()), + (u16NullableField, fory::F(8).nullable()), + (u32VarNullableField, fory::F(9).nullable().varint()), + (u32FixedNullableField, fory::F(10).nullable().fixed()), + (u64VarNullableField, fory::F(11).nullable().varint()), + (u64FixedNullableField, fory::F(12).nullable().fixed()), + (u64TaggedNullableField, fory::F(13).nullable().tagged())); // UnsignedSchemaCompatible (type id 502) // Test struct for unsigned numbers in COMPATIBLE mode. @@ -706,18 +706,19 @@ FORY_STRUCT(UnsignedSchemaCompatible, u8Field1, u16Field1, u32VarField1, // Use new FORY_FIELD_CONFIG with builder pattern for encoding specification // Group 1: nullable in C++ (std::optional), non-nullable in Java // Group 2: non-nullable in C++, nullable in Java -FORY_FIELD_CONFIG( - UnsignedSchemaCompatible, (u8Field1, fory::F(0).nullable()), - (u16Field1, fory::F(1).nullable()), - (u32VarField1, fory::F(2).nullable().varint()), - (u32FixedField1, fory::F(3).nullable().fixed()), - (u64VarField1, fory::F(4).nullable().varint()), - (u64FixedField1, fory::F(5).nullable().fixed()), - (u64TaggedField1, fory::F(6).nullable().tagged()), (u8Field2, fory::F(7)), - (u16Field2, fory::F(8)), (u32VarField2, fory::F(9).varint()), - (u32FixedField2, fory::F(10).fixed()), (u64VarField2, fory::F(11).varint()), - (u64FixedField2, fory::F(12).fixed()), - (u64TaggedField2, fory::F(13).tagged())); +FORY_FIELD_CONFIG(UnsignedSchemaCompatible, (u8Field1, fory::F(0).nullable()), + (u16Field1, fory::F(1).nullable()), + (u32VarField1, fory::F(2).nullable().varint()), + (u32FixedField1, fory::F(3).nullable().fixed()), + (u64VarField1, fory::F(4).nullable().varint()), + (u64FixedField1, fory::F(5).nullable().fixed()), + (u64TaggedField1, fory::F(6).nullable().tagged()), + (u8Field2, fory::F(7)), (u16Field2, fory::F(8)), + (u32VarField2, fory::F(9).varint()), + (u32FixedField2, fory::F(10).fixed()), + (u64VarField2, fory::F(11).varint()), + (u64FixedField2, fory::F(12).fixed()), + (u64TaggedField2, fory::F(13).tagged())); namespace fory { namespace serialization { @@ -2619,12 +2620,14 @@ void RunTestUnsignedSchemaConsistentSimple(const std::string &data_file) { // Verify fields if (obj.u64Tagged != 1000000000) { - Fail("UnsignedSchemaConsistentSimple: u64Tagged should be 1000000000, got " + - std::to_string(obj.u64Tagged)); + Fail( + "UnsignedSchemaConsistentSimple: u64Tagged should be 1000000000, got " + + std::to_string(obj.u64Tagged)); } if (!obj.u64TaggedNullable.has_value() || obj.u64TaggedNullable.value() != 500000000) { - Fail("UnsignedSchemaConsistentSimple: u64TaggedNullable should be 500000000"); + Fail("UnsignedSchemaConsistentSimple: u64TaggedNullable should be " + "500000000"); } // Re-serialize and write back diff --git a/cpp/fory/util/buffer.h b/cpp/fory/util/buffer.h index 2be892ea6b..1c3581715e 100644 --- a/cpp/fory/util/buffer.h +++ b/cpp/fory/util/buffer.h @@ -458,7 +458,8 @@ class Buffer { } /// Read int64_t using tagged encoding at given offset. - /// - If bit 0 is 0: read 4 bytes as signed int, return value >> 1 (arithmetic) + /// - If bit 0 is 0: read 4 bytes as signed int, return value >> 1 + /// (arithmetic) /// - If bit 0 is 1: read 1 byte flag + 8 bytes int64 FORY_ALWAYS_INLINE int64_t GetTaggedInt64(uint32_t offset, uint32_t *readBytesLength) { @@ -472,14 +473,15 @@ class Buffer { } } - /// Write uint64_t using tagged encoding at given offset. Returns bytes written. + /// Write uint64_t using tagged encoding at given offset. Returns bytes + /// written. /// - If value is in [0, 0x7fffffff]: write 4 bytes (value << 1), return 4 /// - Otherwise: write 1 byte flag + 8 bytes uint64, return 9 FORY_ALWAYS_INLINE uint32_t PutTaggedUint64(uint32_t offset, uint64_t value) { constexpr uint64_t MAX_SMALL_VALUE = 0x7fffffff; // INT32_MAX as u64 if (value <= MAX_SMALL_VALUE) { - *reinterpret_cast(data_ + offset) = - static_cast(value) << 1; + *reinterpret_cast(data_ + offset) = static_cast(value) + << 1; return 4; } else { data_[offset] = 0b1; @@ -488,15 +490,17 @@ class Buffer { } } - /// Write int64_t using tagged encoding at given offset. Returns bytes written. - /// - If value is in [-1073741824, 1073741823]: write 4 bytes (value << 1), return 4 + /// Write int64_t using tagged encoding at given offset. Returns bytes + /// written. + /// - If value is in [-1073741824, 1073741823]: write 4 bytes (value << 1), + /// return 4 /// - Otherwise: write 1 byte flag + 8 bytes int64, return 9 FORY_ALWAYS_INLINE uint32_t PutTaggedInt64(uint32_t offset, int64_t value) { constexpr int64_t MIN_SMALL_VALUE = -1073741824; // -2^30 constexpr int64_t MAX_SMALL_VALUE = 1073741823; // 2^30 - 1 if (value >= MIN_SMALL_VALUE && value <= MAX_SMALL_VALUE) { - *reinterpret_cast(data_ + offset) = - static_cast(value) << 1; + *reinterpret_cast(data_ + offset) = static_cast(value) + << 1; return 4; } else { data_[offset] = 0b1; diff --git a/docs/specification/xlang_serialization_spec.md b/docs/specification/xlang_serialization_spec.md index acd2de9572..be6f27f35f 100644 --- a/docs/specification/xlang_serialization_spec.md +++ b/docs/specification/xlang_serialization_spec.md @@ -161,17 +161,17 @@ custom types (struct/ext/enum). User type IDs are in a separate namespace and co | 2 | INT8 | 8-bit signed integer | | 3 | INT16 | 16-bit signed integer | | 4 | INT32 | 32-bit signed integer | -| 5 | VARINT32 | Variable-length encoded 32-bit signed integer | +| 5 | VARINT32 | Variable-length encoded 32-bit signed integer | | 6 | INT64 | 64-bit signed integer | -| 7 | VARINT64 | Variable-length encoded 64-bit signed integer | -| 8 | TAGGED_INT64 | Hybrid encoded 64-bit signed integer | +| 7 | VARINT64 | Variable-length encoded 64-bit signed integer | +| 8 | TAGGED_INT64 | Hybrid encoded 64-bit signed integer | | 9 | UINT8 | 8-bit unsigned integer | | 10 | UINT16 | 16-bit unsigned integer | | 11 | UINT32 | 32-bit unsigned integer | -| 12 | VAR_UINT32 | Variable-length encoded 32-bit unsigned integer | +| 12 | VAR_UINT32 | Variable-length encoded 32-bit unsigned integer | | 13 | UINT64 | 64-bit unsigned integer | -| 14 | VAR_UINT64 | Variable-length encoded 64-bit unsigned integer | -| 15 | TAGGED_UINT64 | Hybrid encoded 64-bit unsigned integer | +| 14 | VAR_UINT64 | Variable-length encoded 64-bit unsigned integer | +| 15 | TAGGED_UINT64 | Hybrid encoded 64-bit unsigned integer | | 16 | FLOAT16 | 16-bit floating point (half precision) | | 17 | FLOAT32 | 32-bit floating point (single precision) | | 18 | FLOAT64 | 64-bit floating point (double precision) | diff --git a/java/fory-core/src/main/java/org/apache/fory/annotation/Uint64Type.java b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint64Type.java index 2fd8c6af97..9f550d5c4f 100644 --- a/java/fory-core/src/main/java/org/apache/fory/annotation/Uint64Type.java +++ b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint64Type.java @@ -36,18 +36,17 @@ *

        *
      • {@link LongEncoding#VARINT} (default): Variable-length encoding (VAR_UINT64, type_id=14), * compact for small values - *
      • {@link LongEncoding#FIXED}: Fixed 8-byte encoding (UINT64, type_id=13), consistent - * size - *
      • {@link LongEncoding#TAGGED}: Tagged encoding (TAGGED_UINT64, type_id=15) that uses 4 - * bytes for values in range [0, 2147483647], otherwise 9 bytes + *
      • {@link LongEncoding#FIXED}: Fixed 8-byte encoding (UINT64, type_id=13), consistent size + *
      • {@link LongEncoding#TAGGED}: Tagged encoding (TAGGED_UINT64, type_id=15) that uses 4 bytes + * for values in range [0, 2147483647], otherwise 9 bytes *
      * *

      Benefits: * *

        *
      • With {@link LongEncoding#VARINT}: skips zigzag encoding overhead for non-negative values - *
      • With {@link LongEncoding#TAGGED}: uses unsigned range [0, 2147483647] for 4-byte - * encoding instead of signed range [-1073741824, 1073741823] + *
      • With {@link LongEncoding#TAGGED}: uses unsigned range [0, 2147483647] for 4-byte encoding + * instead of signed range [-1073741824, 1073741823] *
      • Compatible with languages that have native unsigned integer types (e.g., Rust's u64, Go's * uint64, C++'s uint64_t) *
      diff --git a/java/fory-core/src/main/java/org/apache/fory/config/ForyBuilder.java b/java/fory-core/src/main/java/org/apache/fory/config/ForyBuilder.java index e1320ddb97..4a2b60b077 100644 --- a/java/fory-core/src/main/java/org/apache/fory/config/ForyBuilder.java +++ b/java/fory-core/src/main/java/org/apache/fory/config/ForyBuilder.java @@ -188,12 +188,11 @@ public ForyBuilder withIntCompressed(boolean intCompressed) { } /** - * Use variable length encoding for long. Enabled by default, use {@link - * LongEncoding#TAGGED} (Small long as int) for long encoding. + * Use variable length encoding for long. Enabled by default, use {@link LongEncoding#TAGGED} + * (Small long as int) for long encoding. */ public ForyBuilder withLongCompressed(boolean longCompressed) { - return withLongCompressed( - longCompressed ? LongEncoding.TAGGED : LongEncoding.FIXED); + return withLongCompressed(longCompressed ? LongEncoding.TAGGED : LongEncoding.FIXED); } /** Use variable length encoding for long. */ diff --git a/java/fory-core/src/main/java/org/apache/fory/config/LongEncoding.java b/java/fory-core/src/main/java/org/apache/fory/config/LongEncoding.java index f7f1e56659..f6c598e98c 100644 --- a/java/fory-core/src/main/java/org/apache/fory/config/LongEncoding.java +++ b/java/fory-core/src/main/java/org/apache/fory/config/LongEncoding.java @@ -30,8 +30,8 @@ public enum LongEncoding { * value) << 1 |` *
    • Otherwise write as 9 bytes: `| 0b1 | little-endian 8bytes long |`. * - *

      Faster than {@link #VARINT}, but compression is not good as {@link #VARINT} such as - * for ints in short range. + *

      Faster than {@link #VARINT}, but compression is not good as {@link #VARINT} such as for + * ints in short range. */ TAGGED, /** diff --git a/java/fory-core/src/main/java/org/apache/fory/meta/FieldTypes.java b/java/fory-core/src/main/java/org/apache/fory/meta/FieldTypes.java index 8753a3fd22..5f3e3c27f1 100644 --- a/java/fory-core/src/main/java/org/apache/fory/meta/FieldTypes.java +++ b/java/fory-core/src/main/java/org/apache/fory/meta/FieldTypes.java @@ -222,7 +222,7 @@ public boolean nullable() { */ public abstract TypeRef toTypeToken(TypeResolver classResolver, TypeRef declared); - public String getTypeName(TypeResolver resolver, TypeRef typeRef) { + public String getTypeName(TypeResolver resolver, TypeRef typeRef) { return typeRef.getType().getTypeName(); } @@ -760,7 +760,8 @@ public TypeRef toTypeToken(TypeResolver classResolver, TypeRef declared) { @Override public String getTypeName(TypeResolver resolver, TypeRef typeRef) { - // For native mode, this return same `Array` type to ensure consistent order even some array type + // For native mode, this return same `Array` type to ensure consistent order even some array + // type // is not exist on current deserialization process. // For primitive/registered array, it goes to RegisteredFieldType. return "Array"; diff --git a/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java b/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java index bba1fed804..4c5f37e951 100644 --- a/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java +++ b/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java @@ -735,7 +735,6 @@ public boolean isInternalRegistered(int classId) { return classId != NO_CLASS_ID && classId < innerEndClassId; } - /** Returns true if cls is fory inner registered class. */ public boolean isInternalRegistered(Class cls) { Short classId = extRegistry.registeredClassIdMap.get(cls); diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/MetaSharedSerializer.java b/java/fory-core/src/main/java/org/apache/fory/serializer/MetaSharedSerializer.java index 4ebf038957..5191fa7af4 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/MetaSharedSerializer.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/MetaSharedSerializer.java @@ -168,15 +168,19 @@ public void xwrite(MemoryBuffer buffer, T value) { @Override public T read(MemoryBuffer buffer) { if (Utils.debugOutputEnabled()) { - LOG.info( - "========== MetaSharedSerializer.read() for {} ==========", type.getName()); + LOG.info("========== MetaSharedSerializer.read() for {} ==========", type.getName()); LOG.info("Buffer readerIndex at start: {}", buffer.readerIndex()); LOG.info("buildInFields count: {}", buildInFields.length); for (int i = 0; i < buildInFields.length; i++) { SerializationFieldInfo fi = buildInFields[i]; LOG.info( " buildInField[{}]: name={}, dispatchId={}, nullable={}, isPrimitive={}, hasAccessor={}", - i, fi.qualifiedFieldName, fi.dispatchId, fi.nullable, fi.isPrimitive, fi.fieldAccessor != null); + i, + fi.qualifiedFieldName, + fi.dispatchId, + fi.nullable, + fi.isPrimitive, + fi.fieldAccessor != null); } } if (isRecord) { @@ -201,7 +205,11 @@ public T read(MemoryBuffer buffer) { if (Utils.debugOutputEnabled()) { LOG.info( "[Java] About to read field: name={}, dispatchId={}, nullable={}, isPrimitive={}, bufferPos={}", - fieldInfo.qualifiedFieldName, fieldInfo.dispatchId, nullable, fieldInfo.isPrimitive, buffer.readerIndex()); + fieldInfo.qualifiedFieldName, + fieldInfo.dispatchId, + nullable, + fieldInfo.isPrimitive, + buffer.readerIndex()); // Print next 16 bytes from buffer for debugging int pos = buffer.readerIndex(); int remaining = Math.min(16, buffer.size() - pos); diff --git a/java/fory-core/src/main/java/org/apache/fory/type/Descriptor.java b/java/fory-core/src/main/java/org/apache/fory/type/Descriptor.java index 94fee5eeef..f41852e7cd 100644 --- a/java/fory-core/src/main/java/org/apache/fory/type/Descriptor.java +++ b/java/fory-core/src/main/java/org/apache/fory/type/Descriptor.java @@ -115,7 +115,7 @@ public Descriptor(Field field, TypeRef typeRef, Method readMethod, Method wri this.writeMethod = writeMethod; this.typeRef = typeRef; this.foryField = this.field.getAnnotation(ForyField.class); - typeAnnotation = getTypeAnnotation(field); + typeAnnotation = getAnnotation(field); if (!typeRef.isPrimitive()) { this.nullable = foryField == null || foryField.nullable(); } @@ -157,7 +157,7 @@ private Descriptor(Field field, Method readMethod) { this.readMethod = readMethod; this.writeMethod = null; this.foryField = this.field.getAnnotation(ForyField.class); - typeAnnotation = getTypeAnnotation(field); + typeAnnotation = getAnnotation(field); if (!field.getType().isPrimitive()) { this.nullable = foryField == null || foryField.nullable(); } @@ -176,7 +176,7 @@ private Descriptor(Method readMethod) { this.readMethod = readMethod; this.writeMethod = null; this.foryField = readMethod.getAnnotation(ForyField.class); - typeAnnotation = getTypeAnnotation(readMethod); + typeAnnotation = getAnnotation(readMethod.getDeclaredAnnotations(), readMethod.getName()); if (!readMethod.getReturnType().isPrimitive()) { this.nullable = foryField == null || foryField.nullable(); } @@ -194,7 +194,7 @@ public Descriptor(DescriptorBuilder builder) { this.writeMethod = builder.writeMethod; this.trackingRef = builder.trackingRef; this.foryField = this.field == null ? null : this.field.getAnnotation(ForyField.class); - typeAnnotation = getTypeAnnotation(field); + typeAnnotation = field == null ? null : getAnnotation(field); // Use builder.nullable directly - this is set by DescriptorBuilder.nullable() // and should be respected, especially for xlang compatible mode where remote // TypeDef's nullable flag may differ from local field's nullable @@ -685,20 +685,6 @@ static SortedMap buildBeanedDescriptorsMap( typeAnnotationsTypes.add(Uint64Type.class); } - private static Annotation getTypeAnnotation(Field field) { - if (field == null) { - return null; - } - return getAnnotation(field); - } - - private static Annotation getTypeAnnotation(Method method) { - if (method == null) { - return null; - } - return getAnnotation(method.getDeclaredAnnotations(), method.getName()); - } - public static Annotation getAnnotation(Field field) { return getAnnotation(field.getDeclaredAnnotations(), field.getName()); } diff --git a/java/fory-core/src/main/java/org/apache/fory/type/Types.java b/java/fory-core/src/main/java/org/apache/fory/type/Types.java index b89df739cc..b20d03a3ed 100644 --- a/java/fory-core/src/main/java/org/apache/fory/type/Types.java +++ b/java/fory-core/src/main/java/org/apache/fory/type/Types.java @@ -19,11 +19,8 @@ package org.apache.fory.type; -import static org.apache.fory.collection.Collections.ofHashMap; - import java.lang.annotation.Annotation; import java.lang.reflect.Field; -import java.util.Map; import org.apache.fory.Fory; import org.apache.fory.meta.TypeExtMeta; import org.apache.fory.reflect.TypeRef; @@ -244,21 +241,6 @@ public static boolean isUserDefinedType(byte typeId) { return isStructType(typeId) || isExtType(typeId) || isEnumType(typeId); } - private static final Map PRIMITIVE_TYPE_ID_MAP = - ofHashMap( - boolean.class, BOOL, - byte.class, INT8, - short.class, INT16, - int.class, INT32, - long.class, INT64, - float.class, FLOAT32, - double.class, FLOAT64); - - public static int getPrimitiveTypeId(Class cls) { - Preconditions.checkArgument(cls.isPrimitive(), "Class %s is not primitive", cls); - return PRIMITIVE_TYPE_ID_MAP.getOrDefault(cls, -1); - } - public static boolean isPrimitiveType(int typeId) { return typeId >= BOOL && typeId <= FLOAT64; } @@ -316,30 +298,6 @@ public static int getPrimitiveArrayTypeId(int typeId) { } } - public static int getPrimitiveTypeId(Fory fory, Class rawType) { - Class unwrapped = TypeUtils.unwrap(rawType); - if (unwrapped == char.class) { - Preconditions.checkArgument(!fory.isCrossLanguage(), "Char is not support for xlang"); - return rawType.isPrimitive() ? ClassResolver.PRIMITIVE_CHAR_ID : ClassResolver.CHAR_ID; - } - if (unwrapped == boolean.class) { - return Types.BOOL; - } else if (unwrapped == byte.class) { - return Types.INT8; - } else if (unwrapped == short.class) { - return Types.INT16; - } else if (unwrapped == int.class) { - return fory.compressInt() ? Types.VARINT32 : Types.INT32; - } else if (unwrapped == long.class) { - return fory.compressLong() ? Types.VARINT64 : Types.INT64; - } else if (unwrapped == float.class) { - return Types.FLOAT32; - } else if (unwrapped == double.class) { - return Types.FLOAT64; - } - return Types.UNKNOWN; - } - public static int getDescriptorTypeId(Fory fory, Field field) { Annotation annotation = Descriptor.getAnnotation(field); Class rawType = field.getType(); @@ -439,7 +397,8 @@ public static boolean isCompressedType(int typeId) { case TAGGED_INT64: case TAGGED_UINT64: return true; + default: + return false; } - return false; } } diff --git a/java/fory-core/src/test/java/org/apache/fory/xlang/XlangTestBase.java b/java/fory-core/src/test/java/org/apache/fory/xlang/XlangTestBase.java index e9e40c987e..837ff11527 100644 --- a/java/fory-core/src/test/java/org/apache/fory/xlang/XlangTestBase.java +++ b/java/fory-core/src/test/java/org/apache/fory/xlang/XlangTestBase.java @@ -2533,6 +2533,7 @@ static class UnsignedSchemaConsistent { static class UnsignedSchemaConsistentSimple { @Uint64Type(encoding = LongEncoding.TAGGED) long u64Tagged; + @ForyField(nullable = true) @Uint64Type(encoding = LongEncoding.TAGGED) Long u64TaggedNullable; @@ -2559,7 +2560,8 @@ public void testUnsignedSchemaConsistentSimple(boolean enableCodegen) throws jav ExecutionContext ctx = prepareExecution(caseName, buffer.getBytes(0, buffer.writerIndex())); runPeer(ctx); MemoryBuffer buffer2 = readBuffer(ctx.dataFile()); - UnsignedSchemaConsistentSimple result = (UnsignedSchemaConsistentSimple) fory.deserialize(buffer2); + UnsignedSchemaConsistentSimple result = + (UnsignedSchemaConsistentSimple) fory.deserialize(buffer2); Assert.assertEquals(result, obj); } diff --git a/python/pyfory/tests/xlang_test_main.py b/python/pyfory/tests/xlang_test_main.py index 48ed5afce0..3b3160db87 100644 --- a/python/pyfory/tests/xlang_test_main.py +++ b/python/pyfory/tests/xlang_test_main.py @@ -1403,9 +1403,7 @@ def test_unsigned_schema_consistent_simple(): obj = fory.deserialize(data_bytes) debug_print(f"Deserialized: {obj}") - assert obj.u64_tagged == expected.u64_tagged, ( - f"u64_tagged: {obj.u64_tagged} != {expected.u64_tagged}" - ) + assert obj.u64_tagged == expected.u64_tagged, f"u64_tagged: {obj.u64_tagged} != {expected.u64_tagged}" assert obj.u64_tagged_nullable == expected.u64_tagged_nullable, ( f"u64_tagged_nullable: {obj.u64_tagged_nullable} != {expected.u64_tagged_nullable}" ) @@ -1458,11 +1456,21 @@ def test_unsigned_schema_consistent(): # Verify nullable boxed fields assert obj.u8_nullable_field == expected.u8_nullable_field, f"u8_nullable_field: {obj.u8_nullable_field} != {expected.u8_nullable_field}" assert obj.u16_nullable_field == expected.u16_nullable_field, f"u16_nullable_field: {obj.u16_nullable_field} != {expected.u16_nullable_field}" - assert obj.u32_var_nullable_field == expected.u32_var_nullable_field, f"u32_var_nullable_field: {obj.u32_var_nullable_field} != {expected.u32_var_nullable_field}" - assert obj.u32_fixed_nullable_field == expected.u32_fixed_nullable_field, f"u32_fixed_nullable_field: {obj.u32_fixed_nullable_field} != {expected.u32_fixed_nullable_field}" - assert obj.u64_var_nullable_field == expected.u64_var_nullable_field, f"u64_var_nullable_field: {obj.u64_var_nullable_field} != {expected.u64_var_nullable_field}" - assert obj.u64_fixed_nullable_field == expected.u64_fixed_nullable_field, f"u64_fixed_nullable_field: {obj.u64_fixed_nullable_field} != {expected.u64_fixed_nullable_field}" - assert obj.u64_tagged_nullable_field == expected.u64_tagged_nullable_field, f"u64_tagged_nullable_field: {obj.u64_tagged_nullable_field} != {expected.u64_tagged_nullable_field}" + assert obj.u32_var_nullable_field == expected.u32_var_nullable_field, ( + f"u32_var_nullable_field: {obj.u32_var_nullable_field} != {expected.u32_var_nullable_field}" + ) + assert obj.u32_fixed_nullable_field == expected.u32_fixed_nullable_field, ( + f"u32_fixed_nullable_field: {obj.u32_fixed_nullable_field} != {expected.u32_fixed_nullable_field}" + ) + assert obj.u64_var_nullable_field == expected.u64_var_nullable_field, ( + f"u64_var_nullable_field: {obj.u64_var_nullable_field} != {expected.u64_var_nullable_field}" + ) + assert obj.u64_fixed_nullable_field == expected.u64_fixed_nullable_field, ( + f"u64_fixed_nullable_field: {obj.u64_fixed_nullable_field} != {expected.u64_fixed_nullable_field}" + ) + assert obj.u64_tagged_nullable_field == expected.u64_tagged_nullable_field, ( + f"u64_tagged_nullable_field: {obj.u64_tagged_nullable_field} != {expected.u64_tagged_nullable_field}" + ) new_bytes = fory.serialize(obj) with open(data_file, "wb") as f: From 1bd9fe13689563537b2983358b87fc981144fb4a Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 10 Jan 2026 17:25:12 +0800 Subject: [PATCH 27/44] fix merge conflict --- .../fory/serializer/ArraySerializers.java | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/ArraySerializers.java b/java/fory-core/src/main/java/org/apache/fory/serializer/ArraySerializers.java index 262798e939..6d0282020b 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/ArraySerializers.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/ArraySerializers.java @@ -23,7 +23,6 @@ import java.util.Arrays; import org.apache.fory.Fory; import org.apache.fory.config.CompatibleMode; -import org.apache.fory.config.Config; import org.apache.fory.config.LongEncoding; import org.apache.fory.memory.MemoryBuffer; import org.apache.fory.memory.Platform; @@ -489,16 +488,20 @@ public int[] read(MemoryBuffer buffer) { } public static final class LongArraySerializer extends PrimitiveArraySerializer { + private final boolean compressLongArray; public LongArraySerializer(Fory fory) { super(fory, long[].class); + compressLongArray = + fory.getConfig().compressLongArray() + && fory.getConfig().longEncoding() != LongEncoding.FIXED; } @Override public void write(MemoryBuffer buffer, long[] value) { if (fory.getBufferCallback() == null) { - if (compressArray(fory.getConfig())) { - writeInt64s(buffer, value, fory.getConfig().longEncoding()); + if (compressLongArray) { + writeInt64Compressed(buffer, value, fory.getConfig().longEncoding()); return; } int size = Math.multiplyExact(value.length, 8); @@ -527,8 +530,8 @@ public long[] read(MemoryBuffer buffer) { } return values; } - if (compressArray(fory.getConfig())) { - return readInt64s(buffer, fory.getConfig().longEncoding()); + if (compressLongArray) { + return readInt64Compressed(buffer, fory.getConfig().longEncoding()); } int size = buffer.readVarUint32Small7(); int numElements = size / 8; @@ -539,17 +542,14 @@ public long[] read(MemoryBuffer buffer) { return values; } - private boolean compressArray(Config config) { - return config.compressLongArray() && config.longEncoding() != LongEncoding.LE_RAW_BYTES; - } - - private void writeInt64s(MemoryBuffer buffer, long[] value, LongEncoding longEncoding) { + private void writeInt64Compressed( + MemoryBuffer buffer, long[] value, LongEncoding longEncoding) { int length = value.length; buffer.writeVarUint32Small7(length); - if (longEncoding == LongEncoding.SLI) { + if (longEncoding == LongEncoding.TAGGED) { for (int i = 0; i < length; i++) { - buffer.writeSliInt64(value[i]); + buffer.writeTaggedInt64(value[i]); } return; } @@ -558,13 +558,13 @@ private void writeInt64s(MemoryBuffer buffer, long[] value, LongEncoding longEnc } } - public long[] readInt64s(MemoryBuffer buffer, LongEncoding longEncoding) { + public long[] readInt64Compressed(MemoryBuffer buffer, LongEncoding longEncoding) { int numElements = buffer.readVarUint32Small7(); long[] values = new long[numElements]; - if (longEncoding == LongEncoding.SLI) { + if (longEncoding == LongEncoding.TAGGED) { for (int i = 0; i < numElements; i++) { - values[i] = buffer.readSliInt64(); + values[i] = buffer.readTaggedInt64(); } } else { for (int i = 0; i < numElements; i++) { From 268fd95f42d3a6f4bc27736a7edc7988578f0ccc Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 10 Jan 2026 17:33:44 +0800 Subject: [PATCH 28/44] fix tests --- .../serialization/unsigned_serializer_test.cc | 10 ++++++---- go/fory/type_def.go | 15 ++++++++++----- .../fory/serializer/ArraySerializersTest.java | 4 ++-- rust/fory-core/src/buffer.rs | 2 +- 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/cpp/fory/serialization/unsigned_serializer_test.cc b/cpp/fory/serialization/unsigned_serializer_test.cc index 4680be0cf3..0bf7fe8c58 100644 --- a/cpp/fory/serialization/unsigned_serializer_test.cc +++ b/cpp/fory/serialization/unsigned_serializer_test.cc @@ -246,16 +246,18 @@ TEST(UnsignedSerializerTest, BoundaryValues) { // ============================================================================ TEST(UnsignedSerializerTest, UnsignedTypeIdsAreDistinct) { - // Verify that unsigned types use distinct TypeIds (UINT8, UINT16, UINT32, - // UINT64) + // Verify that unsigned types use distinct TypeIds + // uint8_t and uint16_t use fixed encoding (UINT8, UINT16) + // uint32_t and uint64_t use variable encoding (VAR_UINT32, VAR_UINT64) to + // match Rust xlang mode EXPECT_EQ(static_cast(Serializer::type_id), static_cast(TypeId::UINT8)); EXPECT_EQ(static_cast(Serializer::type_id), static_cast(TypeId::UINT16)); EXPECT_EQ(static_cast(Serializer::type_id), - static_cast(TypeId::UINT32)); + static_cast(TypeId::VAR_UINT32)); EXPECT_EQ(static_cast(Serializer::type_id), - static_cast(TypeId::UINT64)); + static_cast(TypeId::VAR_UINT64)); } TEST(UnsignedSerializerTest, UnsignedArrayTypeIdsAreDistinct) { diff --git a/go/fory/type_def.go b/go/fory/type_def.go index 424b796454..e45769ff20 100644 --- a/go/fory/type_def.go +++ b/go/fory/type_def.go @@ -578,17 +578,22 @@ func buildFieldDefs(fory *Fory, value reflect.Value) ([]FieldDef, error) { // Use sortFields to match Java's field ordering // (primitives before boxed/nullable primitives, sorted by tag ID if available) - _, sortedNames := sortFields(fory.typeResolver, fieldNames, serializers, typeIds, nullables, tagIDs) + _, sortedKeys := sortFields(fory.typeResolver, fieldNames, serializers, typeIds, nullables, tagIDs) // Rebuild fieldInfos in the sorted order - nameToFieldInfo := make(map[string]FieldDef) + // Key by sort key: tag ID as string if tagID >= 0, otherwise snake_case field name + keyToFieldInfo := make(map[string]FieldDef) for _, fieldInfo := range fieldDefs { - nameToFieldInfo[fieldInfo.name] = fieldInfo + key := fieldInfo.name + if fieldInfo.tagID >= 0 { + key = fmt.Sprintf("%d", fieldInfo.tagID) + } + keyToFieldInfo[key] = fieldInfo } sortedFieldInfos := make([]FieldDef, len(fieldDefs)) - for i, name := range sortedNames { - sortedFieldInfos[i] = nameToFieldInfo[name] + for i, key := range sortedKeys { + sortedFieldInfos[i] = keyToFieldInfo[key] } fieldDefs = sortedFieldInfos diff --git a/java/fory-core/src/test/java/org/apache/fory/serializer/ArraySerializersTest.java b/java/fory-core/src/test/java/org/apache/fory/serializer/ArraySerializersTest.java index 542e568294..fe817d916e 100644 --- a/java/fory-core/src/test/java/org/apache/fory/serializer/ArraySerializersTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/serializer/ArraySerializersTest.java @@ -376,7 +376,7 @@ public void testVariableLengthLongArray() { Fory.builder() .requireClassRegistration(false) .withLongArrayCompressed(true) - .withLongCompressed(LongEncoding.PVL) + .withLongCompressed(LongEncoding.VARINT) .build(); // Test empty array @@ -429,7 +429,7 @@ public void testVariableLengthEncodingEfficiencyForSmallValues() { Fory.builder() .requireClassRegistration(false) .withLongArrayCompressed(true) - .withLongCompressed(LongEncoding.PVL) + .withLongCompressed(LongEncoding.VARINT) .build(); // Create an array with many small values (0-127, which can be encoded in 1-2 bytes with varint) diff --git a/rust/fory-core/src/buffer.rs b/rust/fory-core/src/buffer.rs index 185566889d..8d72f4f5e3 100644 --- a/rust/fory-core/src/buffer.rs +++ b/rust/fory-core/src/buffer.rs @@ -142,7 +142,7 @@ impl<'a> Writer<'a> { pub fn write_tagged_i64(&mut self, value: i64) { const HALF_MIN_INT_VALUE: i64 = i32::MIN as i64 / 2; // -1073741824 const HALF_MAX_INT_VALUE: i64 = i32::MAX as i64 / 2; // 1073741823 - if value >= HALF_MIN_INT_VALUE && value <= HALF_MAX_INT_VALUE { + if (HALF_MIN_INT_VALUE..=HALF_MAX_INT_VALUE).contains(&value) { // Fits in 31 bits (with sign), encode as 4 bytes with bit 0 = 0 let v = (value as i32) << 1; self.write_i32(v); From b3a972310c00d69edafb81b4bba0c6871fc5d3e4 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 10 Jan 2026 21:39:52 +0800 Subject: [PATCH 29/44] fix python tests --- python/pyfory/_fory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyfory/_fory.py b/python/pyfory/_fory.py index cfc579ed61..2d67d8d858 100644 --- a/python/pyfory/_fory.py +++ b/python/pyfory/_fory.py @@ -48,7 +48,7 @@ USE_TYPE_ID = 1 # preserve 0 as flag for type id not set in TypeInfo` NO_TYPE_ID = 0 -INT64_TYPE_ID = TypeId.INT64 +INT64_TYPE_ID = TypeId.VARINT64 FLOAT64_TYPE_ID = TypeId.FLOAT64 BOOL_TYPE_ID = TypeId.BOOL STRING_TYPE_ID = TypeId.STRING From ad0257ea7aaf3fbc88db5acf515853dc39c93bc6 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 10 Jan 2026 21:40:18 +0800 Subject: [PATCH 30/44] fix c++ tests --- cpp/fory/serialization/type_resolver.cc | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/cpp/fory/serialization/type_resolver.cc b/cpp/fory/serialization/type_resolver.cc index 04e8219c6d..3ddd99b0bd 100644 --- a/cpp/fory/serialization/type_resolver.cc +++ b/cpp/fory/serialization/type_resolver.cc @@ -1266,7 +1266,18 @@ void TypeResolver::register_builtin_types() { register_type_id_only(TypeId::INT8); register_type_id_only(TypeId::INT16); register_type_id_only(TypeId::INT32); + register_type_id_only(TypeId::VARINT32); register_type_id_only(TypeId::INT64); + register_type_id_only(TypeId::VARINT64); + register_type_id_only(TypeId::TAGGED_INT64); + register_type_id_only(TypeId::UINT8); + register_type_id_only(TypeId::UINT16); + register_type_id_only(TypeId::UINT32); + register_type_id_only(TypeId::VAR_UINT32); + register_type_id_only(TypeId::UINT64); + register_type_id_only(TypeId::VAR_UINT64); + register_type_id_only(TypeId::TAGGED_UINT64); + register_type_id_only(TypeId::FLOAT16); register_type_id_only(TypeId::FLOAT32); register_type_id_only(TypeId::FLOAT64); register_type_id_only(TypeId::STRING); @@ -1277,6 +1288,10 @@ void TypeResolver::register_builtin_types() { register_type_id_only(TypeId::INT16_ARRAY); register_type_id_only(TypeId::INT32_ARRAY); register_type_id_only(TypeId::INT64_ARRAY); + register_type_id_only(TypeId::UINT8_ARRAY); + register_type_id_only(TypeId::UINT16_ARRAY); + register_type_id_only(TypeId::UINT32_ARRAY); + register_type_id_only(TypeId::UINT64_ARRAY); register_type_id_only(TypeId::FLOAT16_ARRAY); register_type_id_only(TypeId::FLOAT32_ARRAY); register_type_id_only(TypeId::FLOAT64_ARRAY); @@ -1293,6 +1308,8 @@ void TypeResolver::register_builtin_types() { register_type_id_only(TypeId::EXT); // Other internal types + register_type_id_only(TypeId::UNION); + register_type_id_only(TypeId::NONE); register_type_id_only(TypeId::DURATION); register_type_id_only(TypeId::TIMESTAMP); register_type_id_only(TypeId::LOCAL_DATE); From eb99c8cfb6374038b5c2a919490a76bc219bde28 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 10 Jan 2026 21:40:39 +0800 Subject: [PATCH 31/44] fix go tests --- go/fory/type_def.go | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/go/fory/type_def.go b/go/fory/type_def.go index e45769ff20..424b796454 100644 --- a/go/fory/type_def.go +++ b/go/fory/type_def.go @@ -578,22 +578,17 @@ func buildFieldDefs(fory *Fory, value reflect.Value) ([]FieldDef, error) { // Use sortFields to match Java's field ordering // (primitives before boxed/nullable primitives, sorted by tag ID if available) - _, sortedKeys := sortFields(fory.typeResolver, fieldNames, serializers, typeIds, nullables, tagIDs) + _, sortedNames := sortFields(fory.typeResolver, fieldNames, serializers, typeIds, nullables, tagIDs) // Rebuild fieldInfos in the sorted order - // Key by sort key: tag ID as string if tagID >= 0, otherwise snake_case field name - keyToFieldInfo := make(map[string]FieldDef) + nameToFieldInfo := make(map[string]FieldDef) for _, fieldInfo := range fieldDefs { - key := fieldInfo.name - if fieldInfo.tagID >= 0 { - key = fmt.Sprintf("%d", fieldInfo.tagID) - } - keyToFieldInfo[key] = fieldInfo + nameToFieldInfo[fieldInfo.name] = fieldInfo } sortedFieldInfos := make([]FieldDef, len(fieldDefs)) - for i, key := range sortedKeys { - sortedFieldInfos[i] = keyToFieldInfo[key] + for i, name := range sortedNames { + sortedFieldInfos[i] = nameToFieldInfo[name] } fieldDefs = sortedFieldInfos From df96ffd35a2534701c982712bfabc17cd65d3290 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 10 Jan 2026 21:42:36 +0800 Subject: [PATCH 32/44] revert DEBUG_OUTPUT_ENABLED flag --- java/fory-core/src/main/java/org/apache/fory/util/Utils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/fory-core/src/main/java/org/apache/fory/util/Utils.java b/java/fory-core/src/main/java/org/apache/fory/util/Utils.java index 890b72662b..3592e3ff71 100644 --- a/java/fory-core/src/main/java/org/apache/fory/util/Utils.java +++ b/java/fory-core/src/main/java/org/apache/fory/util/Utils.java @@ -24,7 +24,7 @@ public class Utils { private static final boolean DEBUG_OUTPUT_ENABLED; static { - DEBUG_OUTPUT_ENABLED = true; + DEBUG_OUTPUT_ENABLED = "1".equals(System.getenv("ENABLE_FORY_DEBUG_OUTPUT")); } /** Checks if ENABLE_FORY_DEBUG_OUTPUT env var is set to "1". */ From b05ac57babf26855bde734d8797a4f6bb3a40931 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 10 Jan 2026 21:58:46 +0800 Subject: [PATCH 33/44] fix tests --- go/fory/field_info.go | 20 +- go/fory/tag.go | 316 +++++++++--------- go/fory/tests/xlang/xlang_test_main.go | 16 +- .../fory/builder/ObjectCodecBuilder.java | 76 +++-- .../java/org/apache/fory/type/DispatchId.java | 5 +- .../fory-core/native-image.properties | 1 + .../fory/resolver/ClassResolverTest.java | 25 +- 7 files changed, 251 insertions(+), 208 deletions(-) diff --git a/go/fory/field_info.go b/go/fory/field_info.go index 55da217f51..cac8ae4465 100644 --- a/go/fory/field_info.go +++ b/go/fory/field_info.go @@ -190,15 +190,15 @@ func GroupFields(fields []FieldInfo) FieldGroup { return comparePrimitiveFields(fi, fj) } // Within other internal types category (STRING, BINARY, LIST, SET, MAP), - // sort by typeId then by name only. Java does NOT sort by nullable flag here. + // sort by typeId then by sort key (tagID if available, otherwise name). if catI == 1 { if fi.TypeId != fj.TypeId { return fi.TypeId < fj.TypeId } - return fi.Name < fj.Name + return getFieldSortKey(fi) < getFieldSortKey(fj) } - // Other categories (struct, enum, etc.): sort by name only - return fi.Name < fj.Name + // Other categories (struct, enum, etc.): sort by sort key (tagID if available, otherwise name) + return getFieldSortKey(fi) < getFieldSortKey(fj) }) return g @@ -518,7 +518,7 @@ type triple struct { tagID int // -1 = use field name, >=0 = use tag ID for sorting } -// getFieldSortKey returns the sort key for a field. +// getSortKey returns the sort key for a triple. // If tagID >= 0, returns the tag ID as string (for tag-based sorting). // Otherwise returns the snake_case field name. func (t triple) getSortKey() string { @@ -528,6 +528,16 @@ func (t triple) getSortKey() string { return SnakeCase(t.name) } +// getFieldSortKey returns the sort key for a FieldInfo. +// If TagID >= 0, returns the tag ID as string (for tag-based sorting). +// Otherwise returns the field name (which is already snake_case). +func getFieldSortKey(f *FieldInfo) string { + if f.TagID >= 0 { + return fmt.Sprintf("%d", f.TagID) + } + return f.Name +} + // sortFields sorts fields with nullable information to match Java's field ordering. // Java separates primitive types (int, long) from boxed types (Integer, Long). // In Go, this corresponds to non-pointer primitives vs pointer-to-primitive. diff --git a/go/fory/tag.go b/go/fory/tag.go index 426157a3a7..f78e8296c9 100644 --- a/go/fory/tag.go +++ b/go/fory/tag.go @@ -18,14 +18,14 @@ package fory import ( - "reflect" - "strconv" - "strings" + "reflect" + "strconv" + "strings" ) const ( - // TagIDUseFieldName indicates field name should be used instead of tag ID - TagIDUseFieldName = -1 + // TagIDUseFieldName indicates field name should be used instead of tag ID + TagIDUseFieldName = -1 ) // ForyTag represents parsed fory struct tag options. @@ -54,20 +54,20 @@ const ( // Hidden string `fory:"-"` // Skip this field (shorthand) // } type ForyTag struct { - ID int // Field tag ID (-1 = use field name, >=0 = use tag ID) - Nullable bool // Whether to write null flag (default: false) - Ref bool // Whether to enable reference tracking (default: false) - Ignore bool // Whether to ignore this field during serialization (default: false) - HasTag bool // Whether field has fory tag at all - Compress bool // For int32/uint32: true=varint, false=fixed (default: true) - Encoding string // For int64/uint64: "fixed", "varint", "tagged" (default: "varint") - - // Track which options were explicitly set (for override logic) - NullableSet bool - RefSet bool - IgnoreSet bool - CompressSet bool - EncodingSet bool + ID int // Field tag ID (-1 = use field name, >=0 = use tag ID) + Nullable bool // Whether to write null flag (default: false) + Ref bool // Whether to enable reference tracking (default: false) + Ignore bool // Whether to ignore this field during serialization (default: false) + HasTag bool // Whether field has fory tag at all + Compress bool // For int32/uint32: true=varint, false=fixed (default: true) + Encoding string // For int64/uint64: "fixed", "varint", "tagged" (default: "varint") + + // Track which options were explicitly set (for override logic) + NullableSet bool + RefSet bool + IgnoreSet bool + CompressSet bool + EncodingSet bool } // parseForyTag parses a fory struct tag from reflect.StructField.Tag. @@ -81,112 +81,112 @@ type ForyTag struct { // - Standalone flags: `nullable`, `ref`, `ignore` (equivalent to =true) // - Shorthand: `-` (equivalent to `ignore=true`) func parseForyTag(field reflect.StructField) ForyTag { - tag := ForyTag{ - ID: TagIDUseFieldName, - Nullable: false, - Ref: false, - Ignore: false, - HasTag: false, - Compress: true, // default: varint encoding - Encoding: "varint", // default: varint encoding - } - - tagValue, ok := field.Tag.Lookup("fory") - if !ok { - return tag - } - - tag.HasTag = true - - // Handle "-" shorthand for ignore - if tagValue == "-" { - tag.Ignore = true - tag.IgnoreSet = true - return tag - } - - // Parse comma-separated options - parts := strings.Split(tagValue, ",") - for _, part := range parts { - part = strings.TrimSpace(part) - if part == "" { - continue - } - - // Handle key=value pairs and standalone flags - if idx := strings.Index(part, "="); idx >= 0 { - key := strings.TrimSpace(part[:idx]) - value := strings.TrimSpace(part[idx+1:]) - - switch key { - case "id": - if id, err := strconv.Atoi(value); err == nil { - tag.ID = id - } - case "nullable": - tag.Nullable = parseBool(value) - tag.NullableSet = true - case "ref": - tag.Ref = parseBool(value) - tag.RefSet = true - case "ignore": - tag.Ignore = parseBool(value) - tag.IgnoreSet = true - case "compress": - tag.Compress = parseBool(value) - tag.CompressSet = true - case "encoding": - tag.Encoding = strings.ToLower(strings.TrimSpace(value)) - tag.EncodingSet = true - } - } else { - // Handle standalone flags (presence means true) - switch part { - case "nullable": - tag.Nullable = true - tag.NullableSet = true - case "ref": - tag.Ref = true - tag.RefSet = true - case "ignore": - tag.Ignore = true - tag.IgnoreSet = true - } - } - } - - return tag + tag := ForyTag{ + ID: TagIDUseFieldName, + Nullable: false, + Ref: false, + Ignore: false, + HasTag: false, + Compress: true, // default: varint encoding + Encoding: "varint", // default: varint encoding + } + + tagValue, ok := field.Tag.Lookup("fory") + if !ok { + return tag + } + + tag.HasTag = true + + // Handle "-" shorthand for ignore + if tagValue == "-" { + tag.Ignore = true + tag.IgnoreSet = true + return tag + } + + // Parse comma-separated options + parts := strings.Split(tagValue, ",") + for _, part := range parts { + part = strings.TrimSpace(part) + if part == "" { + continue + } + + // Handle key=value pairs and standalone flags + if idx := strings.Index(part, "="); idx >= 0 { + key := strings.TrimSpace(part[:idx]) + value := strings.TrimSpace(part[idx+1:]) + + switch key { + case "id": + if id, err := strconv.Atoi(value); err == nil { + tag.ID = id + } + case "nullable": + tag.Nullable = parseBool(value) + tag.NullableSet = true + case "ref": + tag.Ref = parseBool(value) + tag.RefSet = true + case "ignore": + tag.Ignore = parseBool(value) + tag.IgnoreSet = true + case "compress": + tag.Compress = parseBool(value) + tag.CompressSet = true + case "encoding": + tag.Encoding = strings.ToLower(strings.TrimSpace(value)) + tag.EncodingSet = true + } + } else { + // Handle standalone flags (presence means true) + switch part { + case "nullable": + tag.Nullable = true + tag.NullableSet = true + case "ref": + tag.Ref = true + tag.RefSet = true + case "ignore": + tag.Ignore = true + tag.IgnoreSet = true + } + } + } + + return tag } // parseBool parses a boolean value from string. // Accepts: "true", "1", "yes" as true; everything else as false. func parseBool(s string) bool { - s = strings.ToLower(strings.TrimSpace(s)) - return s == "true" || s == "1" || s == "yes" + s = strings.ToLower(strings.TrimSpace(s)) + return s == "true" || s == "1" || s == "yes" } // parseTypeID parses a TypeId from string name. // Returns 0 if the type name is not recognized. func parseTypeID(s string) TypeId { - s = strings.ToUpper(strings.TrimSpace(s)) - switch s { - case "UINT8": - return UINT8 - case "UINT16": - return UINT16 - case "UINT32": - return UINT32 - case "VAR_UINT32": - return VAR_UINT32 - case "UINT64": - return UINT64 - case "VAR_UINT64": - return VAR_UINT64 - case "TAGGED_UINT64": - return TAGGED_UINT64 - default: - return 0 - } + s = strings.ToUpper(strings.TrimSpace(s)) + switch s { + case "UINT8": + return UINT8 + case "UINT16": + return UINT16 + case "UINT32": + return UINT32 + case "VAR_UINT32": + return VAR_UINT32 + case "UINT64": + return UINT64 + case "VAR_UINT64": + return VAR_UINT64 + case "TAGGED_UINT64": + return TAGGED_UINT64 + default: + return 0 + } } // validateForyTags validates all fory tags in a struct type. @@ -197,41 +197,41 @@ func parseTypeID(s string) TypeId { // - Tag IDs must be unique within a struct (except -1) // - Ignored fields are not validated for ID uniqueness func validateForyTags(t reflect.Type) error { - if t.Kind() == reflect.Ptr { - t = t.Elem() - } - if t.Kind() != reflect.Struct { - return nil - } - - tagIDs := make(map[int]string) // id -> field name - - for i := 0; i < t.NumField(); i++ { - field := t.Field(i) - tag := parseForyTag(field) - - // Skip ignored fields for ID uniqueness validation - if tag.Ignore { - continue - } - - // Validate tag ID range - if tag.ID < TagIDUseFieldName { - return InvalidTagErrorf("invalid fory tag id=%d on field %s: id must be >= -1", - tag.ID, field.Name) - } - - // Check for duplicate tag IDs (except -1 which means use field name) - if tag.ID >= 0 { - if existing, ok := tagIDs[tag.ID]; ok { - return InvalidTagErrorf("duplicate fory tag id=%d on fields %s and %s", - tag.ID, existing, field.Name) - } - tagIDs[tag.ID] = field.Name - } - } - - return nil + if t.Kind() == reflect.Ptr { + t = t.Elem() + } + if t.Kind() != reflect.Struct { + return nil + } + + tagIDs := make(map[int]string) // id -> field name + + for i := 0; i < t.NumField(); i++ { + field := t.Field(i) + tag := parseForyTag(field) + + // Skip ignored fields for ID uniqueness validation + if tag.Ignore { + continue + } + + // Validate tag ID range + if tag.ID < TagIDUseFieldName { + return InvalidTagErrorf("invalid fory tag id=%d on field %s: id must be >= -1", + tag.ID, field.Name) + } + + // Check for duplicate tag IDs (except -1 which means use field name) + if tag.ID >= 0 { + if existing, ok := tagIDs[tag.ID]; ok { + return InvalidTagErrorf("duplicate fory tag id=%d on fields %s and %s", + tag.ID, existing, field.Name) + } + tagIDs[tag.ID] = field.Name + } + } + + return nil } // shouldIncludeField returns true if the field should be serialized. @@ -240,12 +240,12 @@ func validateForyTags(t reflect.Type) error { // - It has `fory:"-"` tag // - It has `fory:"ignore"` or `fory:"ignore=true"` tag func shouldIncludeField(field reflect.StructField) bool { - // Skip unexported fields - if field.PkgPath != "" { - return false - } - - // Check for ignore tag - tag := parseForyTag(field) - return !tag.Ignore + // Skip unexported fields + if field.PkgPath != "" { + return false + } + + // Check for ignore tag + tag := parseForyTag(field) + return !tag.Ignore } diff --git a/go/fory/tests/xlang/xlang_test_main.go b/go/fory/tests/xlang/xlang_test_main.go index e2e31b69b6..1b95295e0a 100644 --- a/go/fory/tests/xlang/xlang_test_main.go +++ b/go/fory/tests/xlang/xlang_test_main.go @@ -2171,8 +2171,8 @@ type UnsignedSchemaConsistentSimple struct { type UnsignedSchemaConsistent struct { // Primitive unsigned fields (non-nullable, use Field suffix to avoid reserved keywords) - U8Field uint8 // UINT8 - fixed 8-bit - U16Field uint16 // UINT16 - fixed 16-bit + U8Field uint8 // UINT8 - fixed 8-bit + U16Field uint16 // UINT16 - fixed 16-bit U32VarField uint32 `fory:"compress=true"` // VAR_UINT32 - variable-length U32FixedField uint32 `fory:"compress=false"` // UINT32 - fixed 4-byte U64VarField uint64 `fory:"encoding=varint"` // VAR_UINT64 - variable-length @@ -2180,8 +2180,8 @@ type UnsignedSchemaConsistent struct { U64TaggedField uint64 `fory:"encoding=tagged"` // TAGGED_UINT64 - tagged encoding // Nullable unsigned fields (pointers) - U8NullableField *uint8 `fory:"nullable"` - U16NullableField *uint16 `fory:"nullable"` + U8NullableField *uint8 `fory:"nullable"` + U16NullableField *uint16 `fory:"nullable"` U32VarNullableField *uint32 `fory:"nullable,compress=true"` U32FixedNullableField *uint32 `fory:"nullable,compress=false"` U64VarNullableField *uint64 `fory:"nullable,encoding=varint"` @@ -2195,8 +2195,8 @@ type UnsignedSchemaConsistent struct { // Matches Java's UnsignedSchemaCompatible (type id 502) type UnsignedSchemaCompatible struct { // Group 1: Nullable in Go (pointers), non-nullable in Java - U8Field1 *uint8 `fory:"nullable"` - U16Field1 *uint16 `fory:"nullable"` + U8Field1 *uint8 `fory:"nullable"` + U16Field1 *uint16 `fory:"nullable"` U32VarField1 *uint32 `fory:"nullable,compress=true"` U32FixedField1 *uint32 `fory:"nullable,compress=false"` U64VarField1 *uint64 `fory:"nullable,encoding=varint"` @@ -2204,8 +2204,8 @@ type UnsignedSchemaCompatible struct { U64TaggedField1 *uint64 `fory:"nullable,encoding=tagged"` // Group 2: Non-nullable in Go, nullable in Java - U8Field2 uint8 - U16Field2 uint16 + U8Field2 uint8 + U16Field2 uint16 U32VarField2 uint32 `fory:"compress=true"` U32FixedField2 uint32 `fory:"compress=false"` U64VarField2 uint64 `fory:"encoding=varint"` diff --git a/java/fory-core/src/main/java/org/apache/fory/builder/ObjectCodecBuilder.java b/java/fory-core/src/main/java/org/apache/fory/builder/ObjectCodecBuilder.java index b1f2b3ebe9..98e745fc29 100644 --- a/java/fory-core/src/main/java/org/apache/fory/builder/ObjectCodecBuilder.java +++ b/java/fory-core/src/main/java/org/apache/fory/builder/ObjectCodecBuilder.java @@ -273,32 +273,40 @@ private List serializePrimitivesUnCompressed( if (fieldValue instanceof Inlineable) { ((Inlineable) fieldValue).inline(); } - if (dispatchId == DispatchId.PRIMITIVE_BOOL) { + if (dispatchId == DispatchId.PRIMITIVE_BOOL || dispatchId == DispatchId.BOOL) { groupExpressions.add(unsafePutBoolean(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 1; } else if (dispatchId == DispatchId.PRIMITIVE_INT8 - || dispatchId == DispatchId.PRIMITIVE_UINT8) { + || dispatchId == DispatchId.PRIMITIVE_UINT8 + || dispatchId == DispatchId.INT8 + || dispatchId == DispatchId.UINT8) { groupExpressions.add(unsafePut(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 1; - } else if (dispatchId == DispatchId.PRIMITIVE_CHAR) { + } else if (dispatchId == DispatchId.PRIMITIVE_CHAR || dispatchId == DispatchId.CHAR) { groupExpressions.add(unsafePutChar(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 2; } else if (dispatchId == DispatchId.PRIMITIVE_INT16 - || dispatchId == DispatchId.PRIMITIVE_UINT16) { + || dispatchId == DispatchId.PRIMITIVE_UINT16 + || dispatchId == DispatchId.INT16 + || dispatchId == DispatchId.UINT16) { groupExpressions.add(unsafePutShort(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 2; } else if (dispatchId == DispatchId.PRIMITIVE_INT32 - || dispatchId == DispatchId.PRIMITIVE_UINT32) { + || dispatchId == DispatchId.PRIMITIVE_UINT32 + || dispatchId == DispatchId.INT32 + || dispatchId == DispatchId.UINT32) { groupExpressions.add(unsafePutInt(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 4; } else if (dispatchId == DispatchId.PRIMITIVE_INT64 - || dispatchId == DispatchId.PRIMITIVE_UINT64) { + || dispatchId == DispatchId.PRIMITIVE_UINT64 + || dispatchId == DispatchId.INT64 + || dispatchId == DispatchId.UINT64) { groupExpressions.add(unsafePutLong(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 8; - } else if (dispatchId == DispatchId.PRIMITIVE_FLOAT32) { + } else if (dispatchId == DispatchId.PRIMITIVE_FLOAT32 || dispatchId == DispatchId.FLOAT32) { groupExpressions.add(unsafePutFloat(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 4; - } else if (dispatchId == DispatchId.PRIMITIVE_FLOAT64) { + } else if (dispatchId == DispatchId.PRIMITIVE_FLOAT64 || dispatchId == DispatchId.FLOAT64) { groupExpressions.add(unsafePutDouble(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 8; } else { @@ -332,14 +340,22 @@ private List serializePrimitivesCompressed( int id = getNumericDescriptorDispatchId(d); if (id == DispatchId.PRIMITIVE_INT32 || id == DispatchId.PRIMITIVE_VARINT32 - || id == DispatchId.PRIMITIVE_VAR_UINT32) { + || id == DispatchId.PRIMITIVE_VAR_UINT32 + || id == DispatchId.INT32 + || id == DispatchId.VARINT32 + || id == DispatchId.VAR_UINT32) { // varint may be written as 5bytes, use 8bytes for written as long to reduce cost. extraSize += 4; } else if (id == DispatchId.PRIMITIVE_INT64 || id == DispatchId.PRIMITIVE_VARINT64 || id == DispatchId.PRIMITIVE_TAGGED_INT64 || id == DispatchId.PRIMITIVE_VAR_UINT64 - || id == DispatchId.PRIMITIVE_TAGGED_UINT64) { + || id == DispatchId.PRIMITIVE_TAGGED_UINT64 + || id == DispatchId.INT64 + || id == DispatchId.VARINT64 + || id == DispatchId.TAGGED_INT64 + || id == DispatchId.VAR_UINT64 + || id == DispatchId.TAGGED_UINT64) { extraSize += 1; // long use 1~9 bytes. } } @@ -365,65 +381,77 @@ private List serializePrimitivesCompressed( if (fieldValue instanceof Inlineable) { ((Inlineable) fieldValue).inline(); } - if (dispatchId == DispatchId.PRIMITIVE_BOOL) { + if (dispatchId == DispatchId.PRIMITIVE_BOOL || dispatchId == DispatchId.BOOL) { groupExpressions.add(unsafePutBoolean(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 1; } else if (dispatchId == DispatchId.PRIMITIVE_INT8 - || dispatchId == DispatchId.PRIMITIVE_UINT8) { + || dispatchId == DispatchId.PRIMITIVE_UINT8 + || dispatchId == DispatchId.INT8 + || dispatchId == DispatchId.UINT8) { groupExpressions.add(unsafePut(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 1; - } else if (dispatchId == DispatchId.PRIMITIVE_CHAR) { + } else if (dispatchId == DispatchId.PRIMITIVE_CHAR || dispatchId == DispatchId.CHAR) { groupExpressions.add(unsafePutChar(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 2; } else if (dispatchId == DispatchId.PRIMITIVE_INT16 - || dispatchId == DispatchId.PRIMITIVE_UINT16) { + || dispatchId == DispatchId.PRIMITIVE_UINT16 + || dispatchId == DispatchId.INT16 + || dispatchId == DispatchId.UINT16) { groupExpressions.add(unsafePutShort(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 2; - } else if (dispatchId == DispatchId.PRIMITIVE_FLOAT32) { + } else if (dispatchId == DispatchId.PRIMITIVE_FLOAT32 || dispatchId == DispatchId.FLOAT32) { groupExpressions.add(unsafePutFloat(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 4; - } else if (dispatchId == DispatchId.PRIMITIVE_FLOAT64) { + } else if (dispatchId == DispatchId.PRIMITIVE_FLOAT64 || dispatchId == DispatchId.FLOAT64) { groupExpressions.add(unsafePutDouble(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 8; } else if (dispatchId == DispatchId.PRIMITIVE_INT32 - || dispatchId == DispatchId.PRIMITIVE_UINT32) { + || dispatchId == DispatchId.PRIMITIVE_UINT32 + || dispatchId == DispatchId.INT32 + || dispatchId == DispatchId.UINT32) { groupExpressions.add(unsafePutInt(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 4; } else if (dispatchId == DispatchId.PRIMITIVE_INT64 - || dispatchId == DispatchId.PRIMITIVE_UINT64) { + || dispatchId == DispatchId.PRIMITIVE_UINT64 + || dispatchId == DispatchId.INT64 + || dispatchId == DispatchId.UINT64) { groupExpressions.add(unsafePutLong(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 8; - } else if (dispatchId == DispatchId.PRIMITIVE_VARINT32) { + } else if (dispatchId == DispatchId.PRIMITIVE_VARINT32 || dispatchId == DispatchId.VARINT32) { if (!compressStarted) { addIncWriterIndexExpr(groupExpressions, buffer, acc); compressStarted = true; } groupExpressions.add(new Invoke(buffer, "_unsafeWriteVarInt32", fieldValue)); - } else if (dispatchId == DispatchId.PRIMITIVE_VAR_UINT32) { + } else if (dispatchId == DispatchId.PRIMITIVE_VAR_UINT32 + || dispatchId == DispatchId.VAR_UINT32) { if (!compressStarted) { addIncWriterIndexExpr(groupExpressions, buffer, acc); compressStarted = true; } groupExpressions.add(new Invoke(buffer, "_unsafeWriteVarUint32", fieldValue)); - } else if (dispatchId == DispatchId.PRIMITIVE_VARINT64) { + } else if (dispatchId == DispatchId.PRIMITIVE_VARINT64 || dispatchId == DispatchId.VARINT64) { if (!compressStarted) { addIncWriterIndexExpr(groupExpressions, buffer, acc); compressStarted = true; } groupExpressions.add(new Invoke(buffer, "writeVarInt64", fieldValue)); - } else if (dispatchId == DispatchId.PRIMITIVE_TAGGED_INT64) { + } else if (dispatchId == DispatchId.PRIMITIVE_TAGGED_INT64 + || dispatchId == DispatchId.TAGGED_INT64) { if (!compressStarted) { addIncWriterIndexExpr(groupExpressions, buffer, acc); compressStarted = true; } groupExpressions.add(new Invoke(buffer, "writeTaggedInt64", fieldValue)); - } else if (dispatchId == DispatchId.PRIMITIVE_VAR_UINT64) { + } else if (dispatchId == DispatchId.PRIMITIVE_VAR_UINT64 + || dispatchId == DispatchId.VAR_UINT64) { if (!compressStarted) { addIncWriterIndexExpr(groupExpressions, buffer, acc); compressStarted = true; } groupExpressions.add(new Invoke(buffer, "writeVarUint64", fieldValue)); - } else if (dispatchId == DispatchId.PRIMITIVE_TAGGED_UINT64) { + } else if (dispatchId == DispatchId.PRIMITIVE_TAGGED_UINT64 + || dispatchId == DispatchId.TAGGED_UINT64) { if (!compressStarted) { addIncWriterIndexExpr(groupExpressions, buffer, acc); compressStarted = true; diff --git a/java/fory-core/src/main/java/org/apache/fory/type/DispatchId.java b/java/fory-core/src/main/java/org/apache/fory/type/DispatchId.java index 2429a9a05a..4f3977a76b 100644 --- a/java/fory-core/src/main/java/org/apache/fory/type/DispatchId.java +++ b/java/fory-core/src/main/java/org/apache/fory/type/DispatchId.java @@ -51,9 +51,12 @@ public static int getDispatchId(Fory fory, Descriptor d) { TypeRef typeRef = d.getTypeRef(); Class rawType = typeRef.getRawType(); TypeExtMeta typeExtMeta = typeRef.getTypeExtMeta(); + // A field is treated as primitive for dispatch only if the Java type itself is primitive. + // Boxed types with nullable=false are still dispatched as boxed types, + // but serialized without null checks. boolean isPrimitive = typeRef.isPrimitive() - || (TypeUtils.unwrap(rawType).isPrimitive() + || (rawType.isPrimitive() && typeExtMeta != null && !typeExtMeta.nullable()); if (fory.isCrossLanguage()) { diff --git a/java/fory-core/src/main/resources/META-INF/native-image/org.apache.fory/fory-core/native-image.properties b/java/fory-core/src/main/resources/META-INF/native-image/org.apache.fory/fory-core/native-image.properties index 44df2f95cc..89c967274b 100644 --- a/java/fory-core/src/main/resources/META-INF/native-image/org.apache.fory/fory-core/native-image.properties +++ b/java/fory-core/src/main/resources/META-INF/native-image/org.apache.fory/fory-core/native-image.properties @@ -333,6 +333,7 @@ Args=--initialize-at-build-time=org.apache.fory.memory.MemoryBuffer,\ org.apache.fory.serializer.LazySerializer$LazyObjectSerializer,\ org.apache.fory.serializer.CodegenSerializer$LazyInitBeanSerializer,\ org.apache.fory.serializer.NoneSerializer,\ + org.apache.fory.serializer.NonexistentClassSerializers,\ org.apache.fory.serializer.NonexistentClassSerializers$ClassFieldsInfo,\ org.apache.fory.serializer.NonexistentClassSerializers$NonexistentClassSerializer,\ org.apache.fory.serializer.NonexistentClassSerializers$NonexistentEnumClassSerializer,\ diff --git a/java/fory-core/src/test/java/org/apache/fory/resolver/ClassResolverTest.java b/java/fory-core/src/test/java/org/apache/fory/resolver/ClassResolverTest.java index 6c088cfdd3..5fd8bd749f 100644 --- a/java/fory-core/src/test/java/org/apache/fory/resolver/ClassResolverTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/resolver/ClassResolverTest.java @@ -74,19 +74,20 @@ public class ClassResolverTest extends ForyTestBase { public void testPrimitivesClassId() { Fory fory = Fory.builder().withLanguage(Language.JAVA).requireClassRegistration(false).build(); ClassResolver classResolver = fory.getClassResolver(); - for (List> classes : - ImmutableList.of( - TypeUtils.getSortedPrimitiveClasses(), TypeUtils.getSortedBoxedClasses())) { - for (int i = 0; i < classes.size() - 1; i++) { - assertEquals( - classResolver.getRegisteredClassId(classes.get(i)) + 1, - classResolver.getRegisteredClassId(classes.get(i + 1)).shortValue()); - assertTrue(classResolver.getRegisteredClassId(classes.get(i)) > 0); - } + // Test that primitive types have consecutive IDs + List> primitiveClasses = TypeUtils.getSortedPrimitiveClasses(); + for (int i = 0; i < primitiveClasses.size() - 1; i++) { assertEquals( - classResolver.getRegisteredClassId(classes.get(classes.size() - 2)) + 1, - classResolver.getRegisteredClassId(classes.get(classes.size() - 1)).shortValue()); - assertTrue(classResolver.getRegisteredClassId(classes.get(classes.size() - 1)) > 0); + classResolver.getRegisteredClassId(primitiveClasses.get(i)) + 1, + classResolver.getRegisteredClassId(primitiveClasses.get(i + 1)).shortValue()); + assertTrue(classResolver.getRegisteredClassId(primitiveClasses.get(i)) > 0); + } + assertTrue(classResolver.getRegisteredClassId(primitiveClasses.get(primitiveClasses.size() - 1)) > 0); + // Test that boxed types all have valid positive IDs + // Note: boxed types are no longer consecutive due to unsigned type IDs being added + List> boxedClasses = TypeUtils.getSortedBoxedClasses(); + for (Class boxedClass : boxedClasses) { + assertTrue(classResolver.getRegisteredClassId(boxedClass) > 0); } } From c012feeb04b6723acb8dd454963f86161ecbaad3 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 10 Jan 2026 22:10:40 +0800 Subject: [PATCH 34/44] update cpp doc for fields --- docs/guide/cpp/field-configuration.md | 213 ++++++++++++++++++++++++++ 1 file changed, 213 insertions(+) diff --git a/docs/guide/cpp/field-configuration.md b/docs/guide/cpp/field-configuration.md index a36b832268..5391c1f3c7 100644 --- a/docs/guide/cpp/field-configuration.md +++ b/docs/guide/cpp/field-configuration.md @@ -284,8 +284,221 @@ FORY_FIELD_TAGS(Document, | **Header dependencies** | Required everywhere | Isolated to config | | **Migration effort** | High (change all fields) | Low (add one macro) | +## FORY_FIELD_CONFIG Macro + +The `FORY_FIELD_CONFIG` macro is the most powerful and flexible way to configure field-level serialization. It provides: + +- **Builder pattern API**: Fluent, chainable configuration with `F(id).option1().option2()` +- **Encoding control**: Specify how unsigned integers are encoded (varint, fixed, tagged) +- **Compile-time verification**: Field names are verified against member pointers +- **Cross-language compatibility**: Configure encoding to match other languages (Java, Rust, etc.) + +### Basic Syntax + +```cpp +FORY_FIELD_CONFIG(StructType, + (field1, fory::F(0)), // Simple: just ID + (field2, fory::F(1).nullable()), // With nullable + (field3, fory::F(2).varint()), // With encoding + (field4, fory::F(3).nullable().ref()), // Multiple options + (field5, 4) // Backward compatible: integer ID +); +``` + +### The F() Builder + +The `fory::F(id)` factory creates a `FieldMeta` object that supports method chaining: + +```cpp +fory::F(0) // Create with field ID 0 + .nullable() // Mark as nullable + .ref() // Enable reference tracking + .varint() // Use variable-length encoding + .fixed() // Use fixed-size encoding + .tagged() // Use tagged encoding + .monomorphic() // Mark as monomorphic type + .compress(false) // Disable compression +``` + +**Tip:** To use `F()` without the `fory::` prefix, add a using declaration: + +```cpp +using fory::F; + +FORY_FIELD_CONFIG(MyStruct, + (field1, F(0).varint()), // No prefix needed + (field2, F(1).nullable()) +); +``` + +### Encoding Options for Unsigned Integers + +For `uint32_t` and `uint64_t` fields, you can specify the wire encoding: + +| Method | Type ID | Description | Use Case | +| ----------- | ------------- | ---------------------------------------------- | ------------------------------------- | +| `.varint()` | VAR_UINT32/64 | Variable-length encoding (1-5 or 1-10 bytes) | Values typically small | +| `.fixed()` | UINT32/64 | Fixed-size encoding (always 4 or 8 bytes) | Values uniformly distributed | +| `.tagged()` | TAGGED_UINT64 | Tagged hybrid encoding with size hint (uint64) | Mixed small and large values (uint64) | + +**Note:** `uint8_t` and `uint16_t` always use fixed encoding (UINT8, UINT16). + +### Complete Example + +```cpp +#include "fory/serialization/fory.h" + +using namespace fory::serialization; + +// Define struct with unsigned integer fields +struct MetricsData { + // Counters - often small values, use varint for space efficiency + uint32_t requestCount; + uint64_t bytesSent; + + // IDs - uniformly distributed, use fixed for consistent performance + uint32_t userId; + uint64_t sessionId; + + // Timestamps - use tagged encoding for mixed value ranges + uint64_t createdAt; + + // Nullable fields + std::optional errorCount; + std::optional lastAccessTime; +}; + +FORY_STRUCT(MetricsData, requestCount, bytesSent, userId, sessionId, + createdAt, errorCount, lastAccessTime); + +// Configure field encoding +FORY_FIELD_CONFIG(MetricsData, + // Small counters - varint saves space + (requestCount, fory::F(0).varint()), + (bytesSent, fory::F(1).varint()), + + // IDs - fixed for consistent performance + (userId, fory::F(2).fixed()), + (sessionId, fory::F(3).fixed()), + + // Timestamp - tagged encoding + (createdAt, fory::F(4).tagged()), + + // Nullable fields + (errorCount, fory::F(5).nullable().varint()), + (lastAccessTime, fory::F(6).nullable().tagged()) +); + +int main() { + auto fory = Fory::builder().xlang(true).build(); + fory.register_struct(100); + + MetricsData data{ + .requestCount = 42, + .bytesSent = 1024, + .userId = 12345678, + .sessionId = 9876543210, + .createdAt = 1704067200000000000ULL, // 2024-01-01 in nanoseconds + .errorCount = 3, + .lastAccessTime = std::nullopt + }; + + auto bytes = fory.serialize(data).value(); + auto decoded = fory.deserialize(bytes).value(); +} +``` + +### Cross-Language Compatibility + +When serializing data to be read by other languages, use `FORY_FIELD_CONFIG` to match their encoding expectations: + +**Java Compatibility:** + +```cpp +// Java uses these type IDs for unsigned integers: +// - Byte (u8): UINT8 (fixed) +// - Short (u16): UINT16 (fixed) +// - Integer (u32): VAR_UINT32 (varint) or UINT32 (fixed) +// - Long (u64): VAR_UINT64 (varint), UINT64 (fixed), or TAGGED_UINT64 + +struct JavaCompatible { + uint8_t byteField; // Maps to Java Byte + uint16_t shortField; // Maps to Java Short + uint32_t intVarField; // Maps to Java Integer with varint + uint32_t intFixedField; // Maps to Java Integer with fixed + uint64_t longVarField; // Maps to Java Long with varint + uint64_t longTagged; // Maps to Java Long with tagged +}; + +FORY_STRUCT(JavaCompatible, byteField, shortField, intVarField, + intFixedField, longVarField, longTagged); + +FORY_FIELD_CONFIG(JavaCompatible, + (byteField, fory::F(0)), // UINT8 (auto) + (shortField, fory::F(1)), // UINT16 (auto) + (intVarField, fory::F(2).varint()), // VAR_UINT32 + (intFixedField, fory::F(3).fixed()), // UINT32 + (longVarField, fory::F(4).varint()), // VAR_UINT64 + (longTagged, fory::F(5).tagged()) // TAGGED_UINT64 +); +``` + +### Schema Evolution with FORY_FIELD_CONFIG + +In compatible mode, fields can have different nullability between sender and receiver: + +```cpp +// Version 1: All fields non-nullable +struct DataV1 { + uint32_t id; + uint64_t timestamp; +}; +FORY_STRUCT(DataV1, id, timestamp); +FORY_FIELD_CONFIG(DataV1, + (id, fory::F(0).varint()), + (timestamp, fory::F(1).tagged()) +); + +// Version 2: Added nullable fields +struct DataV2 { + uint32_t id; + uint64_t timestamp; + std::optional version; // New nullable field +}; +FORY_STRUCT(DataV2, id, timestamp, version); +FORY_FIELD_CONFIG(DataV2, + (id, fory::F(0).varint()), + (timestamp, fory::F(1).tagged()), + (version, fory::F(2).nullable().varint()) // New field with nullable +); +``` + +### FORY_FIELD_CONFIG Options Reference + +| Method | Description | Valid For | +| ---------------- | ------------------------------------------- | -------------------------- | +| `.nullable()` | Mark field as nullable | Smart pointers, primitives | +| `.ref()` | Enable reference tracking | `std::shared_ptr` only | +| `.monomorphic()` | Mark pointer as always pointing to one type | Smart pointers | +| `.varint()` | Use variable-length encoding | `uint32_t`, `uint64_t` | +| `.fixed()` | Use fixed-size encoding | `uint32_t`, `uint64_t` | +| `.tagged()` | Use tagged hybrid encoding | `uint64_t` only | +| `.compress(v)` | Enable/disable field compression | All types | + +### Comparing Field Configuration Macros + +| Feature | `fory::field<>` | `FORY_FIELD_TAGS` | `FORY_FIELD_CONFIG` | +| ----------------------- | --------------------- | ----------------- | ------------------------- | +| **Struct modification** | Required (wrap types) | None | None | +| **Encoding control** | No | No | Yes (varint/fixed/tagged) | +| **Builder pattern** | No | No | Yes | +| **Compile-time verify** | Yes | Limited | Yes (member pointers) | +| **Cross-lang compat** | Limited | Limited | Full | +| **Recommended for** | Simple structs | Third-party types | Complex/xlang structs | + ## Related Topics - [Type Registration](type-registration.md) - Registering types with FORY_STRUCT - [Schema Evolution](schema-evolution.md) - Using tag IDs for schema evolution - [Configuration](configuration.md) - Enabling reference tracking globally +- [Cross-Language](cross-language.md) - Interoperability with Java, Rust, Python From 4f7fbad1bdcecfaa0664846e920fc17d3ce01b76 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 10 Jan 2026 22:10:50 +0800 Subject: [PATCH 35/44] update go tag --- go/fory/tag.go | 54 +++++++++++++++++--------------------------------- 1 file changed, 18 insertions(+), 36 deletions(-) diff --git a/go/fory/tag.go b/go/fory/tag.go index f78e8296c9..6220875be2 100644 --- a/go/fory/tag.go +++ b/go/fory/tag.go @@ -30,28 +30,34 @@ const ( // ForyTag represents parsed fory struct tag options. // -// Tag format: `fory:"id=N,nullable=bool,ref=bool,ignore=bool,type=TypeID"` or `fory:"-"` +// Tag format: `fory:"id=N,nullable=bool,ref=bool,ignore=bool,compress=bool,encoding=value"` or `fory:"-"` // // Options: // - id: Field tag ID. -1 (default) uses field name, >=0 uses numeric tag ID for compact encoding // - nullable: Whether to write null flag. Default false (skip null flag for non-nullable fields) // - ref: Whether to enable reference tracking. Default false (skip ref tracking overhead) // - ignore: Whether to skip this field during serialization. Default false -// - type: Override type ID for unsigned types. Allows specifying encoding for uint32/uint64. -// Valid values: UINT16, UINT32, VAR_UINT32, UINT64, VAR_UINT64, TAGGED_UINT64 +// - compress: For int32/uint32 fields: true=varint encoding (default), false=fixed encoding +// - encoding: For numeric fields: +// - int32/uint32: "varint" (default) or "fixed" +// - int64/uint64: "varint" (default), "fixed", or "tagged" +// +// Note: For int32/uint32, use either `compress` or `encoding`, not both. // // Examples: // // type Example struct { -// Name string `fory:"id=0"` // Use tag ID 0 -// Age int `fory:"id=1,nullable=false"` // Explicit nullable=false -// Email *string `fory:"id=2,nullable=true,ref=false"` // Nullable pointer, no ref tracking -// Parent *Node `fory:"id=3,ref=true,nullable=true"` // With reference tracking -// U32Fixed uint32 `fory:"type=UINT32"` // Use fixed 4-byte encoding -// U32Var uint32 `fory:"type=VAR_UINT32"` // Use variable-length encoding -// U64Tagged uint64 `fory:"type=TAGGED_UINT64"` // Use tagged encoding -// Secret string `fory:"ignore"` // Skip this field -// Hidden string `fory:"-"` // Skip this field (shorthand) +// Name string `fory:"id=0"` // Use tag ID 0 +// Age int `fory:"id=1,nullable=false"` // Explicit nullable=false +// Email *string `fory:"id=2,nullable=true,ref=false"` // Nullable pointer, no ref tracking +// Parent *Node `fory:"id=3,ref=true,nullable=true"` // With reference tracking +// FixedI32 int32 `fory:"compress=false"` // Use fixed 4-byte INT32 +// VarI32 int32 `fory:"encoding=varint"` // Use VARINT32 (default) +// FixedU32 uint32 `fory:"encoding=fixed"` // Use fixed 4-byte UINT32 +// TaggedI64 int64 `fory:"encoding=tagged"` // Use TAGGED_INT64 +// VarU64 uint64 `fory:"encoding=varint"` // Use VAR_UINT64 (default) +// Secret string `fory:"ignore"` // Skip this field +// Hidden string `fory:"-"` // Skip this field (shorthand) // } type ForyTag struct { ID int // Field tag ID (-1 = use field name, >=0 = use tag ID) @@ -165,30 +171,6 @@ func parseBool(s string) bool { return s == "true" || s == "1" || s == "yes" } -// parseTypeID parses a TypeId from string name. -// Returns 0 if the type name is not recognized. -func parseTypeID(s string) TypeId { - s = strings.ToUpper(strings.TrimSpace(s)) - switch s { - case "UINT8": - return UINT8 - case "UINT16": - return UINT16 - case "UINT32": - return UINT32 - case "VAR_UINT32": - return VAR_UINT32 - case "UINT64": - return UINT64 - case "VAR_UINT64": - return VAR_UINT64 - case "TAGGED_UINT64": - return TAGGED_UINT64 - default: - return 0 - } -} - // validateForyTags validates all fory tags in a struct type. // Returns an error if validation fails. // From 88a01940fc78a7a74e0588e0374fa9a64820a91d Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 10 Jan 2026 22:20:16 +0800 Subject: [PATCH 36/44] fix: add license header and fix type conversion in DefaultValueUtils - Add missing Apache license header to DispatchId.java - Fix ClassCastException in DefaultValueUtils.setDefaultValues by using Number interface for type conversion instead of direct casts --- .../java/org/apache/fory/type/DispatchId.java | 19 +++++++++++++++++++ .../apache/fory/util/DefaultValueUtils.java | 8 ++++---- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/java/fory-core/src/main/java/org/apache/fory/type/DispatchId.java b/java/fory-core/src/main/java/org/apache/fory/type/DispatchId.java index 4f3977a76b..fb82b82097 100644 --- a/java/fory-core/src/main/java/org/apache/fory/type/DispatchId.java +++ b/java/fory-core/src/main/java/org/apache/fory/type/DispatchId.java @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.fory.type; import org.apache.fory.Fory; diff --git a/java/fory-core/src/main/java/org/apache/fory/util/DefaultValueUtils.java b/java/fory-core/src/main/java/org/apache/fory/util/DefaultValueUtils.java index 0dd4e4a50e..242bb645a3 100644 --- a/java/fory-core/src/main/java/org/apache/fory/util/DefaultValueUtils.java +++ b/java/fory-core/src/main/java/org/apache/fory/util/DefaultValueUtils.java @@ -381,7 +381,7 @@ public static void setDefaultValues(Object obj, DefaultValueField[] defaultValue case DispatchId.INT8: case DispatchId.PRIMITIVE_UINT8: case DispatchId.UINT8: - Platform.putByte(obj, fieldOffset, (Byte) defaultValue); + Platform.putByte(obj, fieldOffset, ((Number) defaultValue).byteValue()); break; case DispatchId.PRIMITIVE_CHAR: case DispatchId.CHAR: @@ -391,7 +391,7 @@ public static void setDefaultValues(Object obj, DefaultValueField[] defaultValue case DispatchId.INT16: case DispatchId.PRIMITIVE_UINT16: case DispatchId.UINT16: - Platform.putShort(obj, fieldOffset, (Short) defaultValue); + Platform.putShort(obj, fieldOffset, ((Number) defaultValue).shortValue()); break; case DispatchId.PRIMITIVE_INT32: case DispatchId.INT32: @@ -401,7 +401,7 @@ public static void setDefaultValues(Object obj, DefaultValueField[] defaultValue case DispatchId.UINT32: case DispatchId.PRIMITIVE_VAR_UINT32: case DispatchId.VAR_UINT32: - Platform.putInt(obj, fieldOffset, (Integer) defaultValue); + Platform.putInt(obj, fieldOffset, ((Number) defaultValue).intValue()); break; case DispatchId.PRIMITIVE_INT64: case DispatchId.INT64: @@ -415,7 +415,7 @@ public static void setDefaultValues(Object obj, DefaultValueField[] defaultValue case DispatchId.VAR_UINT64: case DispatchId.PRIMITIVE_TAGGED_UINT64: case DispatchId.TAGGED_UINT64: - Platform.putLong(obj, fieldOffset, (Long) defaultValue); + Platform.putLong(obj, fieldOffset, ((Number) defaultValue).longValue()); break; case DispatchId.PRIMITIVE_FLOAT32: case DispatchId.FLOAT32: From 32475105e8eb0fd07136b10cbff4cf821f7901eb Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 10 Jan 2026 22:49:45 +0800 Subject: [PATCH 37/44] fix: push stub ref id when global ref tracking is enabled but field trackingRef is false When global ref tracking is enabled, serializers call reference() at the end of deserialization. If a field has trackingRef=false (e.g., in xlang mode where all fields default to trackingRef=false), we need to push a stub -1 via preserveRefId() so that reference() can pop it and skip setReadObject. The fix checks if the TYPE normally needs ref tracking (ignoring field-level metadata) by using TypeRef.of(typeRef.getRawType()). This ensures the stub is pushed when needed, preventing ArrayIndexOutOfBoundsException when the serializer calls reference() on an empty readRefIds stack. --- .../fory/builder/BaseObjectCodecBuilder.java | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/java/fory-core/src/main/java/org/apache/fory/builder/BaseObjectCodecBuilder.java b/java/fory-core/src/main/java/org/apache/fory/builder/BaseObjectCodecBuilder.java index 1bca6a7681..b53e79802a 100644 --- a/java/fory-core/src/main/java/org/apache/fory/builder/BaseObjectCodecBuilder.java +++ b/java/fory-core/src/main/java/org/apache/fory/builder/BaseObjectCodecBuilder.java @@ -1882,8 +1882,12 @@ protected Expression deserializeField( boolean nullable = descriptor.isNullable(); // descriptor.isTrackingRef() already includes the needWriteRef check boolean useRefTracking = descriptor.isTrackingRef(); - // Check if type normally needs ref (for preserveRefId when ref tracking is disabled) - boolean typeNeedsRef = needWriteRef(typeRef); + // Check if the TYPE normally needs ref tracking, ignoring field-level metadata. + // When global ref tracking is enabled, serializers call reference() at the end. + // If field has trackingRef=false but the type's serializer calls reference(), + // we need to push a stub -1 so reference() can pop it and skip setReadObject. + // Use raw type without metadata to check type-level ref tracking. + boolean serializerCallsReference = needWriteRef(TypeRef.of(typeRef.getRawType())); if (useRefTracking) { return readRef( @@ -1892,10 +1896,10 @@ protected Expression deserializeField( if (!nullable) { Expression value = deserializeForNotNullForField(buffer, descriptor, null); - if (typeNeedsRef) { + if (serializerCallsReference) { // When a field explicitly disables ref tracking (@ForyField(trackingRef=false)) - // but the type normally needs ref tracking (e.g., collections), - // we need to preserve a -1 id so that when the deserializer calls reference(), + // but global ref tracking is enabled, the serializer will call reference(). + // We need to preserve a -1 id so that when the deserializer calls reference(), // it will pop this -1 and skip the setReadObject call. Expression preserveStubRefId = new Invoke(refResolverRef, "preserveRefId", new Literal(-1, PRIMITIVE_INT_TYPE)); @@ -1916,7 +1920,7 @@ protected Expression deserializeField( true, localFieldType); - if (typeNeedsRef) { + if (serializerCallsReference) { Expression preserveStubRefId = new Invoke(refResolverRef, "preserveRefId", new Literal(-1, PRIMITIVE_INT_TYPE)); return new ListExpression(preserveStubRefId, readNullableExpr); From cca95fb6cd23abd0a619bd62b2000d6f3bd81926 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 10 Jan 2026 22:50:42 +0800 Subject: [PATCH 38/44] fix ci --- .github/workflows/build-native-pr.yml | 4 ++-- .github/workflows/build-native-release.yml | 4 ++-- go/fory/tag.go | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build-native-pr.yml b/.github/workflows/build-native-pr.yml index bdb2fef7b5..ec4ce28930 100644 --- a/.github/workflows/build-native-pr.yml +++ b/.github/workflows/build-native-pr.yml @@ -168,7 +168,7 @@ jobs: X86_DIR=$(ls -d unpacked/x86_64/pyfory-*) UNIVERSAL_DIR="unpacked/pyfory-universal2" cp -R "$ARM_DIR" "$UNIVERSAL_DIR" - for so in pyfory/_util.so pyfory/serialization.so pyfory/format/_format.so pyfory/lib/mmh3/mmh3.so; do + for so in pyfory/buffer.go pyfory/serialization.so pyfory/format/_format.so pyfory/lib/mmh3/mmh3.so; do lipo -create "$ARM_DIR/$so" "$X86_DIR/$so" -output "$UNIVERSAL_DIR/$so" done WHEEL_FILE=$(ls "$UNIVERSAL_DIR"/pyfory-*.dist-info/WHEEL) @@ -187,7 +187,7 @@ jobs: VERIFY_DIR=$(ls -d verify/pyfory-*) WHEEL_FILE=$(ls "$VERIFY_DIR"/pyfory-*.dist-info/WHEEL) grep -q "macosx_11_0_universal2" "$WHEEL_FILE" - for so in pyfory/_util.so pyfory/serialization.so pyfory/format/_format.so pyfory/lib/mmh3/mmh3.so; do + for so in pyfory/buffer.go pyfory/serialization.so pyfory/format/_format.so pyfory/lib/mmh3/mmh3.so; do echo "$so: $(lipo -archs "$VERIFY_DIR/$so")" done - name: Upload universal2 wheel diff --git a/.github/workflows/build-native-release.yml b/.github/workflows/build-native-release.yml index a531f766cb..42aa6ee197 100644 --- a/.github/workflows/build-native-release.yml +++ b/.github/workflows/build-native-release.yml @@ -206,7 +206,7 @@ jobs: X86_DIR=$(ls -d unpacked/x86_64/pyfory-*) UNIVERSAL_DIR="unpacked/pyfory-universal2" cp -R "$ARM_DIR" "$UNIVERSAL_DIR" - for so in pyfory/_util.so pyfory/serialization.so pyfory/format/_format.so pyfory/lib/mmh3/mmh3.so; do + for so in pyfory/buffer.go pyfory/serialization.so pyfory/format/_format.so pyfory/lib/mmh3/mmh3.so; do lipo -create "$ARM_DIR/$so" "$X86_DIR/$so" -output "$UNIVERSAL_DIR/$so" done WHEEL_FILE=$(ls "$UNIVERSAL_DIR"/pyfory-*.dist-info/WHEEL) @@ -225,7 +225,7 @@ jobs: VERIFY_DIR=$(ls -d verify/pyfory-*) WHEEL_FILE=$(ls "$VERIFY_DIR"/pyfory-*.dist-info/WHEEL) grep -q "macosx_11_0_universal2" "$WHEEL_FILE" - for so in pyfory/_util.so pyfory/serialization.so pyfory/format/_format.so pyfory/lib/mmh3/mmh3.so; do + for so in pyfory/buffer.go pyfory/serialization.so pyfory/format/_format.so pyfory/lib/mmh3/mmh3.so; do echo "$so: $(lipo -archs "$VERIFY_DIR/$so")" done - name: Upload universal2 wheel diff --git a/go/fory/tag.go b/go/fory/tag.go index 6220875be2..3287a9c0d5 100644 --- a/go/fory/tag.go +++ b/go/fory/tag.go @@ -39,8 +39,8 @@ const ( // - ignore: Whether to skip this field during serialization. Default false // - compress: For int32/uint32 fields: true=varint encoding (default), false=fixed encoding // - encoding: For numeric fields: -// - int32/uint32: "varint" (default) or "fixed" -// - int64/uint64: "varint" (default), "fixed", or "tagged" +// - int32/uint32: "varint" (default) or "fixed" +// - int64/uint64: "varint" (default), "fixed", or "tagged" // // Note: For int32/uint32, use either `compress` or `encoding`, not both. // From fe0fb764d408854b26a74519805d6b6206253240 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 10 Jan 2026 22:56:29 +0800 Subject: [PATCH 39/44] style: format code with spotless --- .../java/org/apache/fory/builder/ObjectCodecBuilder.java | 6 ++++-- .../src/main/java/org/apache/fory/type/DispatchId.java | 4 +--- .../java/org/apache/fory/resolver/ClassResolverTest.java | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/java/fory-core/src/main/java/org/apache/fory/builder/ObjectCodecBuilder.java b/java/fory-core/src/main/java/org/apache/fory/builder/ObjectCodecBuilder.java index 98e745fc29..818299e0ba 100644 --- a/java/fory-core/src/main/java/org/apache/fory/builder/ObjectCodecBuilder.java +++ b/java/fory-core/src/main/java/org/apache/fory/builder/ObjectCodecBuilder.java @@ -417,7 +417,8 @@ private List serializePrimitivesCompressed( || dispatchId == DispatchId.UINT64) { groupExpressions.add(unsafePutLong(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 8; - } else if (dispatchId == DispatchId.PRIMITIVE_VARINT32 || dispatchId == DispatchId.VARINT32) { + } else if (dispatchId == DispatchId.PRIMITIVE_VARINT32 + || dispatchId == DispatchId.VARINT32) { if (!compressStarted) { addIncWriterIndexExpr(groupExpressions, buffer, acc); compressStarted = true; @@ -430,7 +431,8 @@ private List serializePrimitivesCompressed( compressStarted = true; } groupExpressions.add(new Invoke(buffer, "_unsafeWriteVarUint32", fieldValue)); - } else if (dispatchId == DispatchId.PRIMITIVE_VARINT64 || dispatchId == DispatchId.VARINT64) { + } else if (dispatchId == DispatchId.PRIMITIVE_VARINT64 + || dispatchId == DispatchId.VARINT64) { if (!compressStarted) { addIncWriterIndexExpr(groupExpressions, buffer, acc); compressStarted = true; diff --git a/java/fory-core/src/main/java/org/apache/fory/type/DispatchId.java b/java/fory-core/src/main/java/org/apache/fory/type/DispatchId.java index fb82b82097..0b12adeb2c 100644 --- a/java/fory-core/src/main/java/org/apache/fory/type/DispatchId.java +++ b/java/fory-core/src/main/java/org/apache/fory/type/DispatchId.java @@ -75,9 +75,7 @@ public static int getDispatchId(Fory fory, Descriptor d) { // but serialized without null checks. boolean isPrimitive = typeRef.isPrimitive() - || (rawType.isPrimitive() - && typeExtMeta != null - && !typeExtMeta.nullable()); + || (rawType.isPrimitive() && typeExtMeta != null && !typeExtMeta.nullable()); if (fory.isCrossLanguage()) { return xlangTypeIdToDispatchId(typeId, isPrimitive); } else { diff --git a/java/fory-core/src/test/java/org/apache/fory/resolver/ClassResolverTest.java b/java/fory-core/src/test/java/org/apache/fory/resolver/ClassResolverTest.java index 5fd8bd749f..66620152a7 100644 --- a/java/fory-core/src/test/java/org/apache/fory/resolver/ClassResolverTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/resolver/ClassResolverTest.java @@ -24,7 +24,6 @@ import static org.testng.Assert.assertSame; import static org.testng.Assert.assertTrue; -import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.primitives.Primitives; import java.nio.charset.StandardCharsets; @@ -82,7 +81,8 @@ public void testPrimitivesClassId() { classResolver.getRegisteredClassId(primitiveClasses.get(i + 1)).shortValue()); assertTrue(classResolver.getRegisteredClassId(primitiveClasses.get(i)) > 0); } - assertTrue(classResolver.getRegisteredClassId(primitiveClasses.get(primitiveClasses.size() - 1)) > 0); + assertTrue( + classResolver.getRegisteredClassId(primitiveClasses.get(primitiveClasses.size() - 1)) > 0); // Test that boxed types all have valid positive IDs // Note: boxed types are no longer consecutive due to unsigned type IDs being added List> boxedClasses = TypeUtils.getSortedBoxedClasses(); From 72802acc4a6317a97f38c336546a4da145c973d2 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 10 Jan 2026 23:14:28 +0800 Subject: [PATCH 40/44] fix: use Types.getTypeId for Scala/Kotlin default value type dispatch Use Types.getTypeId() instead of ClassResolver registered IDs for determining dispatch IDs in DefaultValueUtils. This ensures consistent type IDs between DispatchId constants and the values used in setDefaultValues. Also convert default values to correct types during initialization to avoid repeated type conversion at runtime. --- .../apache/fory/util/DefaultValueUtils.java | 108 +++++++++--------- .../util/ScalaDefaultValueUtilsTest.scala | 4 +- 2 files changed, 53 insertions(+), 59 deletions(-) diff --git a/java/fory-core/src/main/java/org/apache/fory/util/DefaultValueUtils.java b/java/fory-core/src/main/java/org/apache/fory/util/DefaultValueUtils.java index 242bb645a3..2ce6518ebe 100644 --- a/java/fory-core/src/main/java/org/apache/fory/util/DefaultValueUtils.java +++ b/java/fory-core/src/main/java/org/apache/fory/util/DefaultValueUtils.java @@ -34,9 +34,9 @@ import org.apache.fory.logging.LoggerFactory; import org.apache.fory.memory.Platform; import org.apache.fory.reflect.FieldAccessor; -import org.apache.fory.type.DispatchId; import org.apache.fory.type.ScalaTypes; import org.apache.fory.type.TypeUtils; +import org.apache.fory.type.Types; import org.apache.fory.util.unsafe._JDKAccess; /** @@ -61,14 +61,14 @@ public static final class DefaultValueField { private final Object defaultValue; private final String fieldName; private final FieldAccessor fieldAccessor; - private final short classId; + private final int dispatchId; private DefaultValueField( - String fieldName, Object defaultValue, FieldAccessor fieldAccessor, short classId) { + String fieldName, Object defaultValue, FieldAccessor fieldAccessor, int dispatchId) { this.fieldName = fieldName; this.defaultValue = defaultValue; this.fieldAccessor = fieldAccessor; - this.classId = classId; + this.dispatchId = dispatchId; } public Object getDefaultValue() { @@ -83,8 +83,8 @@ public FieldAccessor getFieldAccessor() { return fieldAccessor; } - public short getClassId() { - return classId; + public int getDispatchId() { + return dispatchId; } } @@ -131,13 +131,11 @@ public final DefaultValueField[] buildDefaultValueFields( if (defaultValue != null && TypeUtils.wrap(field.getType()).isAssignableFrom(defaultValue.getClass())) { FieldAccessor fieldAccessor = FieldAccessor.createAccessor(field); - Short classId = fory.getClassResolver().getRegisteredClassId(field.getType()); + int dispatchId = Types.getTypeId(fory, field.getType()); + // Convert value to correct type once during initialization + Object convertedValue = convertToType(defaultValue, dispatchId); defaultFields.add( - new DefaultValueField( - fieldName, - defaultValue, - fieldAccessor, - classId != null ? classId : DispatchId.UNKNOWN)); + new DefaultValueField(fieldName, convertedValue, fieldAccessor, dispatchId)); } } } @@ -359,6 +357,31 @@ private static Map getDefaultValuesForRegularScalaClass(Class= 0 + field.getDispatchId.toInt should be >= 0 } } @@ -380,7 +380,7 @@ class ScalaDefaultValueUtilsTest extends AnyWordSpec with Matchers { field.getFieldName should not be null field.getDefaultValue should not be null field.getFieldAccessor should not be null - field.getClassId.toInt should be >= 0 + field.getDispatchId.toInt should be >= 0 } } } From 47169d8c0272ab419a9ec31fd8ff33972f36a1c9 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 10 Jan 2026 23:21:14 +0800 Subject: [PATCH 41/44] fix build ci --- .github/workflows/build-native-release.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-native-release.yml b/.github/workflows/build-native-release.yml index 42aa6ee197..7f8c5314aa 100644 --- a/.github/workflows/build-native-release.yml +++ b/.github/workflows/build-native-release.yml @@ -206,7 +206,7 @@ jobs: X86_DIR=$(ls -d unpacked/x86_64/pyfory-*) UNIVERSAL_DIR="unpacked/pyfory-universal2" cp -R "$ARM_DIR" "$UNIVERSAL_DIR" - for so in pyfory/buffer.go pyfory/serialization.so pyfory/format/_format.so pyfory/lib/mmh3/mmh3.so; do + for so in pyfory/buffer.so pyfory/serialization.so pyfory/format/_format.so pyfory/lib/mmh3/mmh3.so; do lipo -create "$ARM_DIR/$so" "$X86_DIR/$so" -output "$UNIVERSAL_DIR/$so" done WHEEL_FILE=$(ls "$UNIVERSAL_DIR"/pyfory-*.dist-info/WHEEL) @@ -225,7 +225,7 @@ jobs: VERIFY_DIR=$(ls -d verify/pyfory-*) WHEEL_FILE=$(ls "$VERIFY_DIR"/pyfory-*.dist-info/WHEEL) grep -q "macosx_11_0_universal2" "$WHEEL_FILE" - for so in pyfory/buffer.go pyfory/serialization.so pyfory/format/_format.so pyfory/lib/mmh3/mmh3.so; do + for so in pyfory/ pyfory/serialization.so pyfory/format/_format.so pyfory/lib/mmh3/mmh3.so; do echo "$so: $(lipo -archs "$VERIFY_DIR/$so")" done - name: Upload universal2 wheel From db73589a036a6357015d8b35d6ccccee7dae5f77 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 10 Jan 2026 23:37:01 +0800 Subject: [PATCH 42/44] fix(ci): correct buffer.go typo to buffer.so in macos universal2 build --- .github/workflows/build-native-pr.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-native-pr.yml b/.github/workflows/build-native-pr.yml index ec4ce28930..49077afe5d 100644 --- a/.github/workflows/build-native-pr.yml +++ b/.github/workflows/build-native-pr.yml @@ -168,7 +168,7 @@ jobs: X86_DIR=$(ls -d unpacked/x86_64/pyfory-*) UNIVERSAL_DIR="unpacked/pyfory-universal2" cp -R "$ARM_DIR" "$UNIVERSAL_DIR" - for so in pyfory/buffer.go pyfory/serialization.so pyfory/format/_format.so pyfory/lib/mmh3/mmh3.so; do + for so in pyfory/buffer.so pyfory/serialization.so pyfory/format/_format.so pyfory/lib/mmh3/mmh3.so; do lipo -create "$ARM_DIR/$so" "$X86_DIR/$so" -output "$UNIVERSAL_DIR/$so" done WHEEL_FILE=$(ls "$UNIVERSAL_DIR"/pyfory-*.dist-info/WHEEL) @@ -187,7 +187,7 @@ jobs: VERIFY_DIR=$(ls -d verify/pyfory-*) WHEEL_FILE=$(ls "$VERIFY_DIR"/pyfory-*.dist-info/WHEEL) grep -q "macosx_11_0_universal2" "$WHEEL_FILE" - for so in pyfory/buffer.go pyfory/serialization.so pyfory/format/_format.so pyfory/lib/mmh3/mmh3.so; do + for so in pyfory/buffer.so pyfory/serialization.so pyfory/format/_format.so pyfory/lib/mmh3/mmh3.so; do echo "$so: $(lipo -archs "$VERIFY_DIR/$so")" done - name: Upload universal2 wheel From 585e545dfe82359cc30b16b7fa53bd57886f97f5 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 10 Jan 2026 23:43:10 +0800 Subject: [PATCH 43/44] udpate benchmark code --- benchmarks/cpp_benchmark/README.md | 16 ++++++++-------- benchmarks/rust_benchmark/src/models/complex.rs | 4 ++-- benchmarks/rust_benchmark/src/models/medium.rs | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/benchmarks/cpp_benchmark/README.md b/benchmarks/cpp_benchmark/README.md index 8f6250511e..0ea3618a27 100644 --- a/benchmarks/cpp_benchmark/README.md +++ b/benchmarks/cpp_benchmark/README.md @@ -29,14 +29,14 @@ Note: Protobuf is fetched automatically via CMake FetchContent, so no manual ins

      -| Datatype | Operation | Fory TPS | Protobuf TPS | Faster | -| ------------ | ----------- | ---------- | ------------ | ----------- | -| Mediacontent | Serialize | 2,312,522 | 501,867 | Fory (4.6x) | -| Mediacontent | Deserialize | 769,157 | 398,960 | Fory (1.9x) | -| Sample | Serialize | 5,046,250 | 3,182,176 | Fory (1.6x) | -| Sample | Deserialize | 941,637 | 721,614 | Fory (1.3x) | -| Struct | Serialize | 21,424,386 | 6,024,856 | Fory (3.6x) | -| Struct | Deserialize | 7,904,533 | 6,515,853 | Fory (1.2x) | +| Datatype | Operation | Fory TPS | Protobuf TPS | Faster | +|----------|-----------|----------|--------------|--------| +| Mediacontent | Serialize | 2,430,924 | 484,368 | Fory (5.0x) | +| Mediacontent | Deserialize | 740,074 | 387,522 | Fory (1.9x) | +| Sample | Serialize | 4,813,270 | 3,021,968 | Fory (1.6x) | +| Sample | Deserialize | 915,554 | 684,675 | Fory (1.3x) | +| Struct | Serialize | 18,105,957 | 5,788,186 | Fory (3.1x) | +| Struct | Deserialize | 7,495,726 | 5,932,982 | Fory (1.3x) | ## Quick Start diff --git a/benchmarks/rust_benchmark/src/models/complex.rs b/benchmarks/rust_benchmark/src/models/complex.rs index 8db31b4a2f..e70003b366 100644 --- a/benchmarks/rust_benchmark/src/models/complex.rs +++ b/benchmarks/rust_benchmark/src/models/complex.rs @@ -22,7 +22,7 @@ use serde::{Deserialize, Serialize}; use std::collections::HashMap; // Fory models -#[derive(ForyObject, Debug, Clone, PartialEq)] +#[derive(ForyObject, Debug, Clone, PartialEq, Default)] pub struct ForyProduct { pub id: String, pub name: String, @@ -39,7 +39,7 @@ pub struct ForyOrderItem { pub customizations: HashMap, } -#[derive(ForyObject, Debug, Clone, PartialEq)] +#[derive(ForyObject, Debug, Clone, PartialEq, Default)] pub struct ForyCustomer { pub id: String, pub name: String, diff --git a/benchmarks/rust_benchmark/src/models/medium.rs b/benchmarks/rust_benchmark/src/models/medium.rs index 727ff7cb39..1a718bc166 100644 --- a/benchmarks/rust_benchmark/src/models/medium.rs +++ b/benchmarks/rust_benchmark/src/models/medium.rs @@ -22,7 +22,7 @@ use serde::{Deserialize, Serialize}; use std::collections::HashMap; // Fory models -#[derive(ForyObject, Debug, Clone, PartialEq)] +#[derive(ForyObject, Debug, Clone, PartialEq, Default)] pub struct ForyAddress { pub street: String, pub city: String, From 64482b51ec9b6eea199529894ebee7b11b81406a Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sun, 11 Jan 2026 00:01:03 +0800 Subject: [PATCH 44/44] fix code style --- benchmarks/cpp_benchmark/README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/benchmarks/cpp_benchmark/README.md b/benchmarks/cpp_benchmark/README.md index 0ea3618a27..c4b536fbc6 100644 --- a/benchmarks/cpp_benchmark/README.md +++ b/benchmarks/cpp_benchmark/README.md @@ -29,14 +29,14 @@ Note: Protobuf is fetched automatically via CMake FetchContent, so no manual ins

      -| Datatype | Operation | Fory TPS | Protobuf TPS | Faster | -|----------|-----------|----------|--------------|--------| -| Mediacontent | Serialize | 2,430,924 | 484,368 | Fory (5.0x) | -| Mediacontent | Deserialize | 740,074 | 387,522 | Fory (1.9x) | -| Sample | Serialize | 4,813,270 | 3,021,968 | Fory (1.6x) | -| Sample | Deserialize | 915,554 | 684,675 | Fory (1.3x) | -| Struct | Serialize | 18,105,957 | 5,788,186 | Fory (3.1x) | -| Struct | Deserialize | 7,495,726 | 5,932,982 | Fory (1.3x) | +| Datatype | Operation | Fory TPS | Protobuf TPS | Faster | +| ------------ | ----------- | ---------- | ------------ | ----------- | +| Mediacontent | Serialize | 2,430,924 | 484,368 | Fory (5.0x) | +| Mediacontent | Deserialize | 740,074 | 387,522 | Fory (1.9x) | +| Sample | Serialize | 4,813,270 | 3,021,968 | Fory (1.6x) | +| Sample | Deserialize | 915,554 | 684,675 | Fory (1.3x) | +| Struct | Serialize | 18,105,957 | 5,788,186 | Fory (3.1x) | +| Struct | Deserialize | 7,495,726 | 5,932,982 | Fory (1.3x) | ## Quick Start