diff --git a/.github/workflows/build-native-pr.yml b/.github/workflows/build-native-pr.yml index bdb2fef7b5..49077afe5d 100644 --- a/.github/workflows/build-native-pr.yml +++ b/.github/workflows/build-native-pr.yml @@ -168,7 +168,7 @@ jobs: X86_DIR=$(ls -d unpacked/x86_64/pyfory-*) UNIVERSAL_DIR="unpacked/pyfory-universal2" cp -R "$ARM_DIR" "$UNIVERSAL_DIR" - for so in pyfory/_util.so pyfory/serialization.so pyfory/format/_format.so pyfory/lib/mmh3/mmh3.so; do + for so in pyfory/buffer.so pyfory/serialization.so pyfory/format/_format.so pyfory/lib/mmh3/mmh3.so; do lipo -create "$ARM_DIR/$so" "$X86_DIR/$so" -output "$UNIVERSAL_DIR/$so" done WHEEL_FILE=$(ls "$UNIVERSAL_DIR"/pyfory-*.dist-info/WHEEL) @@ -187,7 +187,7 @@ jobs: VERIFY_DIR=$(ls -d verify/pyfory-*) WHEEL_FILE=$(ls "$VERIFY_DIR"/pyfory-*.dist-info/WHEEL) grep -q "macosx_11_0_universal2" "$WHEEL_FILE" - for so in pyfory/_util.so pyfory/serialization.so pyfory/format/_format.so pyfory/lib/mmh3/mmh3.so; do + for so in pyfory/buffer.so pyfory/serialization.so pyfory/format/_format.so pyfory/lib/mmh3/mmh3.so; do echo "$so: $(lipo -archs "$VERIFY_DIR/$so")" done - name: Upload universal2 wheel diff --git a/.github/workflows/build-native-release.yml b/.github/workflows/build-native-release.yml index a531f766cb..7f8c5314aa 100644 --- a/.github/workflows/build-native-release.yml +++ b/.github/workflows/build-native-release.yml @@ -206,7 +206,7 @@ jobs: X86_DIR=$(ls -d unpacked/x86_64/pyfory-*) UNIVERSAL_DIR="unpacked/pyfory-universal2" cp -R "$ARM_DIR" "$UNIVERSAL_DIR" - for so in pyfory/_util.so pyfory/serialization.so pyfory/format/_format.so pyfory/lib/mmh3/mmh3.so; do + for so in pyfory/buffer.so pyfory/serialization.so pyfory/format/_format.so pyfory/lib/mmh3/mmh3.so; do lipo -create "$ARM_DIR/$so" "$X86_DIR/$so" -output "$UNIVERSAL_DIR/$so" done WHEEL_FILE=$(ls "$UNIVERSAL_DIR"/pyfory-*.dist-info/WHEEL) @@ -225,7 +225,7 @@ jobs: VERIFY_DIR=$(ls -d verify/pyfory-*) WHEEL_FILE=$(ls "$VERIFY_DIR"/pyfory-*.dist-info/WHEEL) grep -q "macosx_11_0_universal2" "$WHEEL_FILE" - for so in pyfory/_util.so pyfory/serialization.so pyfory/format/_format.so pyfory/lib/mmh3/mmh3.so; do + for so in pyfory/ pyfory/serialization.so pyfory/format/_format.so pyfory/lib/mmh3/mmh3.so; do echo "$so: $(lipo -archs "$VERIFY_DIR/$so")" done - name: Upload universal2 wheel diff --git a/AGENTS.md b/AGENTS.md index 9202efd6a8..7ae97b7173 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -13,6 +13,7 @@ While working on Fory, please remember: - **Git-Tracked Files**: When reading code, skip all files not tracked by git by default unless generated by yourself. - **Cross-Language Consistency**: Maintain consistency across language implementations while respecting language-specific idioms. - **Graalvm Support using fory codegen**: For graalvm, please use `fory codegen` to generate the serializer when building graalvm native image, do not use graallvm reflect-related configuration unless for JDK `proxy`. +- **Xlang Type System**: Java `native mode(xlang=false)` shares same type systems between type id from `Types.BOOL~Types.STRING` with `xlang mode(xlang=true)`, but for other types, java `native mode` has different type ids. ## Build and Development Commands @@ -125,7 +126,7 @@ cd java mvn -T16 install -DskipTests cd fory-core # disable fory cython for faster debugging -FORY_PYTHON_JAVA_CI=1 ENABLE_FORY_CYTHON_SERIALIZATION=0 mvn -T16 test -Dtest=org.apache.fory.xlang.PythonXlangTest +FORY_PYTHON_JAVA_CI=1 ENABLE_FORY_CYTHON_SERIALIZATION=0 ENABLE_FORY_DEBUG_OUTPUT=1 mvn -T16 test -Dtest=org.apache.fory.xlang.PythonXlangTest # enable fory cython FORY_PYTHON_JAVA_CI=1 ENABLE_FORY_CYTHON_SERIALIZATION=1 ENABLE_FORY_DEBUG_OUTPUT=1 mvn -T16 test -Dtest=org.apache.fory.xlang.PythonXlangTest ``` @@ -215,7 +216,7 @@ Run Rust xlang tests: cd java mvn -T16 install -DskipTests cd fory-core -FORY_RUST_JAVA_CI=1 ENABLE_FORY_DEBUG_OUTPUT=1 mvn test -Dtest=org.apache.fory.xlang.RustXlangTest +RUST_BACKTRACE=1 FORY_PANIC_ON_ERROR=1 FORY_RUST_JAVA_CI=1 ENABLE_FORY_DEBUG_OUTPUT=1 mvn test -Dtest=org.apache.fory.xlang.RustXlangTest ``` ### JavaScript/TypeScript Development @@ -445,12 +446,12 @@ Code structure: - `python/pyfory/serialization.pyx`: Core serialization logic and entry point for cython mode based on `xlang serialization format` - `python/pyfory/_fory.py`: Serialization entry point for pure python mode based on `xlang serialization format` -- `python/pyfory/_registry.py`: Type registry, resolution and serializer dispatch for pure python mode, which is also used by cython mode. Cython mode use a cache to reduce invocations to this module. +- `python/pyfory/registry.py`: Type registry, resolution and serializer dispatch for pure python mode, which is also used by cython mode. Cython mode use a cache to reduce invocations to this module. - `python/pyfory/serializer.py`: Serializers for non-internal types - `python/pyfory/includes`: Cython headers for `c++` functions and classes. - `python/pyfory/resolver.py`: resolving shared/circular references when ref tracking is enabled in pure python mode - `python/pyfory/format`: Fory row format encoding and decoding, arrow columnar format interoperation -- `python/pyfory/_util.pyx`: Buffer for reading/writing data, string utilities. Used by `serialization.pyx` and `python/pyfory/format` at the same time. +- `python/pyfory/buffer.pyx`: Buffer for reading/writing data, string utilities. Used by `serialization.pyx` and `python/pyfory/format` at the same time. #### Go diff --git a/BUILD b/BUILD index abdfdc250a..809727199e 100644 --- a/BUILD +++ b/BUILD @@ -20,11 +20,11 @@ load("@hedron_compile_commands//:refresh_compile_commands.bzl", "refresh_compile pyx_library( - name = "_util", + name = "buffer", srcs = glob([ "python/pyfory/includes/*.pxd", - "python/pyfory/_util.pxd", - "python/pyfory/_util.pyx", + "python/pyfory/buffer.pxd", + "python/pyfory/buffer.pyx", "python/pyfory/__init__.py", ]), cc_kwargs = dict( @@ -54,7 +54,7 @@ pyx_library( name = "serialization", srcs = glob([ "python/pyfory/includes/*.pxd", - "python/pyfory/_util.pxd", + "python/pyfory/buffer.pxd", "python/pyfory/serialization.pyx", "python/pyfory/*.pxi", "python/pyfory/__init__.py", @@ -76,7 +76,7 @@ pyx_library( [ "python/pyfory/__init__.py", "python/pyfory/includes/*.pxd", - "python/pyfory/_util.pxd", + "python/pyfory/buffer.pxd", "python/pyfory/*.pxi", "python/pyfory/format/_format.pyx", "python/pyfory/format/__init__.py", @@ -95,7 +95,7 @@ pyx_library( genrule( name = "cp_fory_so", srcs = [ - ":python/pyfory/_util.so", + ":python/pyfory/buffer.so", ":python/pyfory/lib/mmh3/mmh3.so", ":python/pyfory/format/_format.so", ":python/pyfory/serialization.so", @@ -110,12 +110,12 @@ genrule( u_name=`uname -s` if [ "$${u_name: 0: 4}" == "MING" ] || [ "$${u_name: 0: 4}" == "MSYS" ] then - cp -f $(location python/pyfory/_util.so) "$$WORK_DIR/python/pyfory/_util.pyd" + cp -f $(location python/pyfory/buffer.so) "$$WORK_DIR/python/pyfory/buffer.pyd" cp -f $(location python/pyfory/lib/mmh3/mmh3.so) "$$WORK_DIR/python/pyfory/lib/mmh3/mmh3.pyd" cp -f $(location python/pyfory/format/_format.so) "$$WORK_DIR/python/pyfory/format/_format.pyd" cp -f $(location python/pyfory/serialization.so) "$$WORK_DIR/python/pyfory/serialization.pyd" else - cp -f $(location python/pyfory/_util.so) "$$WORK_DIR/python/pyfory" + cp -f $(location python/pyfory/buffer.so) "$$WORK_DIR/python/pyfory" cp -f $(location python/pyfory/lib/mmh3/mmh3.so) "$$WORK_DIR/python/pyfory/lib/mmh3" cp -f $(location python/pyfory/format/_format.so) "$$WORK_DIR/python/pyfory/format" cp -f $(location python/pyfory/serialization.so) "$$WORK_DIR/python/pyfory" @@ -131,4 +131,4 @@ refresh_compile_commands( name = "refresh_compile_commands", exclude_headers = "all", exclude_external_sources = True, -) \ No newline at end of file +) diff --git a/benchmarks/cpp_benchmark/README.md b/benchmarks/cpp_benchmark/README.md index 8f6250511e..c4b536fbc6 100644 --- a/benchmarks/cpp_benchmark/README.md +++ b/benchmarks/cpp_benchmark/README.md @@ -31,12 +31,12 @@ Note: Protobuf is fetched automatically via CMake FetchContent, so no manual ins | Datatype | Operation | Fory TPS | Protobuf TPS | Faster | | ------------ | ----------- | ---------- | ------------ | ----------- | -| Mediacontent | Serialize | 2,312,522 | 501,867 | Fory (4.6x) | -| Mediacontent | Deserialize | 769,157 | 398,960 | Fory (1.9x) | -| Sample | Serialize | 5,046,250 | 3,182,176 | Fory (1.6x) | -| Sample | Deserialize | 941,637 | 721,614 | Fory (1.3x) | -| Struct | Serialize | 21,424,386 | 6,024,856 | Fory (3.6x) | -| Struct | Deserialize | 7,904,533 | 6,515,853 | Fory (1.2x) | +| Mediacontent | Serialize | 2,430,924 | 484,368 | Fory (5.0x) | +| Mediacontent | Deserialize | 740,074 | 387,522 | Fory (1.9x) | +| Sample | Serialize | 4,813,270 | 3,021,968 | Fory (1.6x) | +| Sample | Deserialize | 915,554 | 684,675 | Fory (1.3x) | +| Struct | Serialize | 18,105,957 | 5,788,186 | Fory (3.1x) | +| Struct | Deserialize | 7,495,726 | 5,932,982 | Fory (1.3x) | ## Quick Start diff --git a/benchmarks/rust_benchmark/src/models/complex.rs b/benchmarks/rust_benchmark/src/models/complex.rs index 8db31b4a2f..e70003b366 100644 --- a/benchmarks/rust_benchmark/src/models/complex.rs +++ b/benchmarks/rust_benchmark/src/models/complex.rs @@ -22,7 +22,7 @@ use serde::{Deserialize, Serialize}; use std::collections::HashMap; // Fory models -#[derive(ForyObject, Debug, Clone, PartialEq)] +#[derive(ForyObject, Debug, Clone, PartialEq, Default)] pub struct ForyProduct { pub id: String, pub name: String, @@ -39,7 +39,7 @@ pub struct ForyOrderItem { pub customizations: HashMap, } -#[derive(ForyObject, Debug, Clone, PartialEq)] +#[derive(ForyObject, Debug, Clone, PartialEq, Default)] pub struct ForyCustomer { pub id: String, pub name: String, diff --git a/benchmarks/rust_benchmark/src/models/medium.rs b/benchmarks/rust_benchmark/src/models/medium.rs index 727ff7cb39..1a718bc166 100644 --- a/benchmarks/rust_benchmark/src/models/medium.rs +++ b/benchmarks/rust_benchmark/src/models/medium.rs @@ -22,7 +22,7 @@ use serde::{Deserialize, Serialize}; use std::collections::HashMap; // Fory models -#[derive(ForyObject, Debug, Clone, PartialEq)] +#[derive(ForyObject, Debug, Clone, PartialEq, Default)] pub struct ForyAddress { pub street: String, pub city: String, diff --git a/cpp/fory/meta/field.h b/cpp/fory/meta/field.h index 7ec8835208..7bcdc4e049 100644 --- a/cpp/fory/meta/field.h +++ b/cpp/fory/meta/field.h @@ -120,6 +120,176 @@ inline constexpr bool has_field_tags_v = ForyFieldTagsImpl::has_tags; } // namespace detail +// ============================================================================ +// Field Encoding Types for Unsigned Integers +// ============================================================================ + +/// Encoding strategies for integer fields +enum class Encoding { + Default = 0, // Use type's default encoding + Varint = 1, // Variable-length encoding (smaller values use fewer bytes) + Fixed = 2, // Fixed-size encoding (always uses full type width) + Tagged = 3 // Tagged encoding (uses tag byte + value) +}; + +// ============================================================================ +// FieldMeta - Compile-time Field Configuration with Builder Pattern +// ============================================================================ + +/// Compile-time field metadata with fluent builder API. +/// Supports both: +/// - Simple: F(0) - just field ID +/// - Full: F(0).nullable().varint().compress(false) +struct FieldMeta { + int16_t id_ = -1; + bool nullable_ = false; + bool ref_ = false; + bool monomorphic_ = false; + Encoding encoding_ = Encoding::Default; + bool compress_ = true; + + // Builder methods - each returns a modified copy + constexpr FieldMeta id(int16_t v) const { + auto c = *this; + c.id_ = v; + return c; + } + constexpr FieldMeta nullable(bool v = true) const { + auto c = *this; + c.nullable_ = v; + return c; + } + constexpr FieldMeta ref(bool v = true) const { + auto c = *this; + c.ref_ = v; + return c; + } + constexpr FieldMeta monomorphic(bool v = true) const { + auto c = *this; + c.monomorphic_ = v; + return c; + } + constexpr FieldMeta encoding(Encoding v) const { + auto c = *this; + c.encoding_ = v; + return c; + } + constexpr FieldMeta compress(bool v) const { + auto c = *this; + c.compress_ = v; + return c; + } + + // Convenience shortcuts for common encodings + constexpr FieldMeta varint() const { return encoding(Encoding::Varint); } + constexpr FieldMeta fixed() const { return encoding(Encoding::Fixed); } + constexpr FieldMeta tagged() const { return encoding(Encoding::Tagged); } +}; + +/// Short factory function for FieldMeta - use F(id) in macros for brevity +constexpr FieldMeta F(int16_t id) { return FieldMeta{}.id(id); } + +namespace detail { + +// ============================================================================ +// Config Normalization - Handle both integer IDs and FieldMeta +// ============================================================================ + +/// Normalize configuration: convert integer to FieldMeta, pass FieldMeta +/// through +template constexpr auto normalize_config(T &&v) { + if constexpr (std::is_integral_v>) { + // Old syntax: just an integer ID + return FieldMeta{}.id(static_cast(v)); + } else if constexpr (std::is_same_v, FieldMeta>) { + // New syntax: already a FieldMeta + return v; + } else { + static_assert( + std::is_integral_v> || + std::is_same_v, FieldMeta>, + "Field config must be an integer ID or FieldMeta (use F(id)...)"); + return FieldMeta{}; + } +} + +/// Apply old-style tag to FieldMeta (for backward compatibility) +constexpr FieldMeta apply_tag(FieldMeta m, nullable) { return m.nullable(); } +constexpr FieldMeta apply_tag(FieldMeta m, not_null) { + return m.nullable(false); +} +constexpr FieldMeta apply_tag(FieldMeta m, ref) { return m.ref(); } +constexpr FieldMeta apply_tag(FieldMeta m, monomorphic) { + return m.monomorphic(); +} + +/// Fold multiple tags onto a base config +template +constexpr FieldMeta apply_tags(FieldMeta base, Tags... tags) { + ((base = apply_tag(base, tags)), ...); + return base; +} + +// ============================================================================ +// FieldEntry - Binds Member Pointer to Config for Compile-Time Verification +// ============================================================================ + +/// Field entry that stores member pointer (for verification) + configuration +template struct FieldEntry { + M T::*ptr; // Member pointer - compile-time field verification + const char *name; // Field name for debugging + FieldMeta meta; // Field configuration + + constexpr FieldEntry(M T::*p, const char *n, FieldMeta m) + : ptr(p), name(n), meta(m) {} +}; + +/// Create a FieldEntry with automatic type deduction +template +constexpr auto make_field_entry(M T::*ptr, const char *name, FieldMeta meta) { + return FieldEntry{ptr, name, meta}; +} + +/// Default: no field config defined for type T +template struct ForyFieldConfigImpl { + static constexpr bool has_config = false; +}; + +template +inline constexpr bool has_field_config_v = ForyFieldConfigImpl::has_config; + +/// Helper to get field encoding from ForyFieldConfigImpl +template +struct GetFieldConfigEntry { + static constexpr Encoding encoding = Encoding::Default; + static constexpr int16_t id = -1; + static constexpr bool nullable = false; + static constexpr bool ref = false; + static constexpr bool monomorphic = false; + static constexpr bool compress = true; +}; + +template +struct GetFieldConfigEntry< + T, Index, + std::enable_if_t::has_config && + (Index < ForyFieldConfigImpl::field_count)>> { +private: + static constexpr auto get_entry() { + return std::get(ForyFieldConfigImpl::entries); + } + +public: + static constexpr Encoding encoding = get_entry().meta.encoding_; + static constexpr int16_t id = get_entry().meta.id_; + static constexpr bool nullable = get_entry().meta.nullable_; + static constexpr bool ref = get_entry().meta.ref_; + static constexpr bool monomorphic = get_entry().meta.monomorphic_; + static constexpr bool compress = get_entry().meta.compress_; +}; + +} // namespace detail + // ============================================================================ // fory::field Template // ============================================================================ @@ -522,3 +692,108 @@ struct GetFieldTagEntry< FORY_FT_ENTRIES_15(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, \ _13, _14, _15), \ FORY_FT_MAKE_ENTRY(T, _16) + +// ============================================================================ +// FORY_FIELD_CONFIG Macro - New Syntax with Member Pointer Verification +// ============================================================================ +// +// Usage: +// FORY_FIELD_CONFIG(MyStruct, +// (field1, F(0)), // Simple: just ID +// (field2, F(1).nullable()), // With nullable +// (field3, F(2).varint()), // With encoding +// (field4, F(3).nullable().ref()), // Multiple options +// (field5, 4) // Backward compatible: integer +// ID +// ); +// +// This macro: +// 1. Verifies field names exist at compile time via member pointers +// 2. Supports both integer IDs (old) and F(id).xxx() builder (new) +// 3. Stores configuration in a constexpr tuple for efficient access + +// Helper to stringify field name +#define FORY_FC_STRINGIFY(x) FORY_FC_STRINGIFY_I(x) +#define FORY_FC_STRINGIFY_I(x) #x + +// Extract field name (first element of tuple) +#define FORY_FC_NAME(tuple) FORY_FC_NAME_IMPL tuple +#define FORY_FC_NAME_IMPL(name, ...) name + +// Extract config (second element of tuple) +#define FORY_FC_CONFIG(tuple) FORY_FC_CONFIG_IMPL tuple +#define FORY_FC_CONFIG_IMPL(name, config, ...) config + +// Create a FieldEntry with member pointer verification +#define FORY_FC_MAKE_ENTRY(Type, tuple) \ + ::fory::detail::make_field_entry( \ + &Type::FORY_FC_NAME(tuple), FORY_FC_STRINGIFY(FORY_FC_NAME(tuple)), \ + ::fory::detail::normalize_config(FORY_FC_CONFIG(tuple))) + +// Generate entries using indirect expansion +#define FORY_FC_ENTRIES(Type, ...) \ + FORY_FC_ENTRIES_I(Type, FORY_PP_NARG(__VA_ARGS__), __VA_ARGS__) +#define FORY_FC_ENTRIES_I(Type, N, ...) FORY_FC_ENTRIES_II(Type, N, __VA_ARGS__) +#define FORY_FC_ENTRIES_II(Type, N, ...) FORY_FC_ENTRIES_##N(Type, __VA_ARGS__) + +// Generate entries for 1-32 fields +#define FORY_FC_ENTRIES_1(T, _1) FORY_FC_MAKE_ENTRY(T, _1) +#define FORY_FC_ENTRIES_2(T, _1, _2) \ + FORY_FC_MAKE_ENTRY(T, _1), FORY_FC_MAKE_ENTRY(T, _2) +#define FORY_FC_ENTRIES_3(T, _1, _2, _3) \ + FORY_FC_ENTRIES_2(T, _1, _2), FORY_FC_MAKE_ENTRY(T, _3) +#define FORY_FC_ENTRIES_4(T, _1, _2, _3, _4) \ + FORY_FC_ENTRIES_3(T, _1, _2, _3), FORY_FC_MAKE_ENTRY(T, _4) +#define FORY_FC_ENTRIES_5(T, _1, _2, _3, _4, _5) \ + FORY_FC_ENTRIES_4(T, _1, _2, _3, _4), FORY_FC_MAKE_ENTRY(T, _5) +#define FORY_FC_ENTRIES_6(T, _1, _2, _3, _4, _5, _6) \ + FORY_FC_ENTRIES_5(T, _1, _2, _3, _4, _5), FORY_FC_MAKE_ENTRY(T, _6) +#define FORY_FC_ENTRIES_7(T, _1, _2, _3, _4, _5, _6, _7) \ + FORY_FC_ENTRIES_6(T, _1, _2, _3, _4, _5, _6), FORY_FC_MAKE_ENTRY(T, _7) +#define FORY_FC_ENTRIES_8(T, _1, _2, _3, _4, _5, _6, _7, _8) \ + FORY_FC_ENTRIES_7(T, _1, _2, _3, _4, _5, _6, _7), FORY_FC_MAKE_ENTRY(T, _8) +#define FORY_FC_ENTRIES_9(T, _1, _2, _3, _4, _5, _6, _7, _8, _9) \ + FORY_FC_ENTRIES_8(T, _1, _2, _3, _4, _5, _6, _7, _8), \ + FORY_FC_MAKE_ENTRY(T, _9) +#define FORY_FC_ENTRIES_10(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10) \ + FORY_FC_ENTRIES_9(T, _1, _2, _3, _4, _5, _6, _7, _8, _9), \ + FORY_FC_MAKE_ENTRY(T, _10) +#define FORY_FC_ENTRIES_11(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11) \ + FORY_FC_ENTRIES_10(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10), \ + FORY_FC_MAKE_ENTRY(T, _11) +#define FORY_FC_ENTRIES_12(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, \ + _12) \ + FORY_FC_ENTRIES_11(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11), \ + FORY_FC_MAKE_ENTRY(T, _12) +#define FORY_FC_ENTRIES_13(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, \ + _12, _13) \ + FORY_FC_ENTRIES_12(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12), \ + FORY_FC_MAKE_ENTRY(T, _13) +#define FORY_FC_ENTRIES_14(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, \ + _12, _13, _14) \ + FORY_FC_ENTRIES_13(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, \ + _13), \ + FORY_FC_MAKE_ENTRY(T, _14) +#define FORY_FC_ENTRIES_15(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, \ + _12, _13, _14, _15) \ + FORY_FC_ENTRIES_14(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, \ + _13, _14), \ + FORY_FC_MAKE_ENTRY(T, _15) +#define FORY_FC_ENTRIES_16(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, \ + _12, _13, _14, _15, _16) \ + FORY_FC_ENTRIES_15(T, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, \ + _13, _14, _15), \ + FORY_FC_MAKE_ENTRY(T, _16) + +// Main FORY_FIELD_CONFIG macro +// Creates a constexpr tuple of FieldEntry objects with member pointer +// verification +#define FORY_FIELD_CONFIG(Type, ...) \ + inline constexpr auto _fory_field_entries_##Type = \ + std::make_tuple(FORY_FC_ENTRIES(Type, __VA_ARGS__)); \ + template <> struct fory::detail::ForyFieldConfigImpl { \ + static constexpr bool has_config = true; \ + static constexpr auto &entries = _fory_field_entries_##Type; \ + static constexpr size_t field_count = \ + std::tuple_size_v>; \ + } diff --git a/cpp/fory/serialization/basic_serializer.h b/cpp/fory/serialization/basic_serializer.h index de8f99b1bf..6aa3c45c7b 100644 --- a/cpp/fory/serialization/basic_serializer.h +++ b/cpp/fory/serialization/basic_serializer.h @@ -252,9 +252,9 @@ template <> struct Serializer { } }; -/// int32_t serializer +/// int32_t serializer - uses VARINT32 to match Java xlang mode and Rust template <> struct Serializer { - static constexpr TypeId type_id = TypeId::INT32; + static constexpr TypeId type_id = TypeId::VARINT32; static inline void write_type_info(WriteContext &ctx) { ctx.write_varuint32(static_cast(type_id)); @@ -323,9 +323,9 @@ template <> struct Serializer { } }; -/// int64_t serializer +/// int64_t serializer - uses VARINT64 to match Java xlang mode and Rust template <> struct Serializer { - static constexpr TypeId type_id = TypeId::INT64; + static constexpr TypeId type_id = TypeId::VARINT64; static inline void write_type_info(WriteContext &ctx) { ctx.write_varuint32(static_cast(type_id)); diff --git a/cpp/fory/serialization/context.h b/cpp/fory/serialization/context.h index 88fdc2e30a..faf426a53f 100644 --- a/cpp/fory/serialization/context.h +++ b/cpp/fory/serialization/context.h @@ -200,6 +200,16 @@ class WriteContext { buffer().WriteVarInt64(value); } + /// Write uint64_t value using tagged encoding to buffer. + FORY_ALWAYS_INLINE void write_tagged_uint64(uint64_t value) { + buffer().WriteTaggedUint64(value); + } + + /// Write int64_t value using tagged encoding to buffer. + FORY_ALWAYS_INLINE void write_tagged_int64(int64_t value) { + buffer().WriteTaggedInt64(value); + } + /// Write uint64_t value as varuint36small to buffer. /// This is the special variable-length encoding used for string headers. FORY_ALWAYS_INLINE void write_varuint36small(uint64_t value) { @@ -499,6 +509,18 @@ class ReadContext { return buffer().ReadVarInt64(error); } + /// Read uint64_t value using tagged encoding from buffer. Sets error on + /// failure. + FORY_ALWAYS_INLINE uint64_t read_tagged_uint64(Error &error) { + return buffer().ReadTaggedUint64(error); + } + + /// Read int64_t value using tagged encoding from buffer. Sets error on + /// failure. + FORY_ALWAYS_INLINE int64_t read_tagged_int64(Error &error) { + return buffer().ReadTaggedInt64(error); + } + /// Read uint64_t value as varuint36small from buffer. Sets error on failure. FORY_ALWAYS_INLINE uint64_t read_varuint36small(Error &error) { return buffer().ReadVarUint36Small(error); diff --git a/cpp/fory/serialization/serializer_traits.h b/cpp/fory/serialization/serializer_traits.h index 7f3ad3d714..1b378ad8c4 100644 --- a/cpp/fory/serialization/serializer_traits.h +++ b/cpp/fory/serialization/serializer_traits.h @@ -444,11 +444,11 @@ template <> struct TypeIndex { }; template <> struct TypeIndex { - static constexpr uint64_t value = static_cast(TypeId::INT32); + static constexpr uint64_t value = static_cast(TypeId::VARINT32); }; template <> struct TypeIndex { - static constexpr uint64_t value = static_cast(TypeId::INT64); + static constexpr uint64_t value = static_cast(TypeId::VARINT64); }; // Note: Unsigned types (uint8_t, uint16_t, uint32_t, uint64_t) use the fallback diff --git a/cpp/fory/serialization/skip.cc b/cpp/fory/serialization/skip.cc index 7df3d8545d..0278816422 100644 --- a/cpp/fory/serialization/skip.cc +++ b/cpp/fory/serialization/skip.cc @@ -489,8 +489,8 @@ void skip_field_value(ReadContext &ctx, const FieldType &field_type, ctx.buffer().IncreaseReaderIndex(8); return; - case TypeId::VAR32: - case TypeId::VAR64: + case TypeId::VARINT32: + case TypeId::VARINT64: skip_varint(ctx); return; diff --git a/cpp/fory/serialization/smart_ptr_serializers.h b/cpp/fory/serialization/smart_ptr_serializers.h index 4e18cd31ed..b581382b3e 100644 --- a/cpp/fory/serialization/smart_ptr_serializers.h +++ b/cpp/fory/serialization/smart_ptr_serializers.h @@ -133,6 +133,10 @@ template struct Serializer> { bool read_type) { constexpr bool inner_requires_ref = requires_ref_metadata_v; + std::cerr << "[optional::read] T=" << typeid(T).name() + << ", ref_mode=" << static_cast(ref_mode) + << ", buffer_pos=" << ctx.buffer().reader_index() << std::endl; + if (ref_mode == RefMode::None) { T value = Serializer::read(ctx, RefMode::None, read_type); if (ctx.has_error()) { diff --git a/cpp/fory/serialization/struct_serializer.h b/cpp/fory/serialization/struct_serializer.h index 4148980d9d..29c1d31f6e 100644 --- a/cpp/fory/serialization/struct_serializer.h +++ b/cpp/fory/serialization/struct_serializer.h @@ -40,7 +40,7 @@ #include #include -#ifdef FORY_DEBUG +#ifdef ENABLE_FORY_DEBUG_OUTPUT #include #endif @@ -118,13 +118,15 @@ namespace detail { inline constexpr bool is_primitive_type_id(TypeId type_id) { return type_id == TypeId::BOOL || type_id == TypeId::INT8 || type_id == TypeId::INT16 || type_id == TypeId::INT32 || - type_id == TypeId::VAR32 || type_id == TypeId::INT64 || - type_id == TypeId::VAR64 || type_id == TypeId::H64 || + type_id == TypeId::VARINT32 || type_id == TypeId::INT64 || + type_id == TypeId::VARINT64 || type_id == TypeId::TAGGED_INT64 || type_id == TypeId::FLOAT16 || type_id == TypeId::FLOAT32 || type_id == TypeId::FLOAT64 || - // Unsigned types for native mode (xlang=false) + // Unsigned types type_id == TypeId::UINT8 || type_id == TypeId::UINT16 || - type_id == TypeId::UINT32 || type_id == TypeId::UINT64; + type_id == TypeId::UINT32 || type_id == TypeId::VAR_UINT32 || + type_id == TypeId::UINT64 || type_id == TypeId::VAR_UINT64 || + type_id == TypeId::TAGGED_UINT64; } /// Write a primitive value to buffer at given offset WITHOUT updating @@ -223,6 +225,15 @@ FORY_ALWAYS_INLINE uint32_t put_varint_at(T value, Buffer &buffer, uint64_t zigzag = (static_cast(val) << 1) ^ static_cast(val >> 63); return buffer.PutVarUint64(offset, zigzag); + } else if constexpr (std::is_same_v || + std::is_same_v) { + // Unsigned 32-bit varint (no zigzag) + return buffer.PutVarUint32(offset, static_cast(value)); + } else if constexpr (std::is_same_v || + std::is_same_v) { + // Unsigned 64-bit varint (no zigzag) - used for VAR_UINT64 and + // TAGGED_UINT64 + return buffer.PutVarUint64(offset, static_cast(value)); } else { static_assert(sizeof(T) == 0, "Unsupported varint type"); return 0; @@ -275,6 +286,45 @@ template struct CompileTimeFieldHelpers { using RawFieldType = meta::RemoveMemberPointerCVRefT; // Unwrap fory::field<> to get the actual type for serialization using FieldType = unwrap_field_t; + + // Check for encoding override from FORY_FIELD_CONFIG + // This allows specifying varint/fixed/tagged encoding for unsigned types + if constexpr (::fory::detail::has_field_config_v) { + constexpr auto enc = + ::fory::detail::GetFieldConfigEntry::encoding; + // Apply encoding override for uint32_t (non-optional) + if constexpr (std::is_same_v) { + if constexpr (enc == Encoding::Varint) { + return static_cast(TypeId::VAR_UINT32); + } + return static_cast(TypeId::UINT32); + } + // Apply encoding override for uint64_t (non-optional) + else if constexpr (std::is_same_v) { + if constexpr (enc == Encoding::Varint) { + return static_cast(TypeId::VAR_UINT64); + } else if constexpr (enc == Encoding::Tagged) { + return static_cast(TypeId::TAGGED_UINT64); + } + return static_cast(TypeId::UINT64); + } + // Apply encoding override for std::optional + else if constexpr (std::is_same_v>) { + if constexpr (enc == Encoding::Varint) { + return static_cast(TypeId::VAR_UINT32); + } + return static_cast(TypeId::UINT32); + } + // Apply encoding override for std::optional + else if constexpr (std::is_same_v>) { + if constexpr (enc == Encoding::Varint) { + return static_cast(TypeId::VAR_UINT64); + } else if constexpr (enc == Encoding::Tagged) { + return static_cast(TypeId::TAGGED_UINT64); + } + return static_cast(TypeId::UINT64); + } + } return static_cast(Serializer::type_id); } } @@ -417,6 +467,19 @@ template struct CompileTimeFieldHelpers { using PtrT = std::tuple_element_t; using RawFieldType = meta::RemoveMemberPointerCVRefT; using FieldType = unwrap_field_t; + + // Check for encoding override from FORY_FIELD_CONFIG for unsigned types + // If encoding is Varint or Tagged, it's NOT a fixed-size primitive + if constexpr (::fory::detail::has_field_config_v && + (std::is_same_v || + std::is_same_v)) { + constexpr auto enc = + ::fory::detail::GetFieldConfigEntry::encoding; + if constexpr (enc == Encoding::Varint || enc == Encoding::Tagged) { + return false; // Not fixed-size, uses varint encoding + } + } + return std::is_same_v || std::is_same_v || std::is_same_v || @@ -440,6 +503,19 @@ template struct CompileTimeFieldHelpers { using PtrT = std::tuple_element_t; using RawFieldType = meta::RemoveMemberPointerCVRefT; using FieldType = unwrap_field_t; + + // Check for encoding override from FORY_FIELD_CONFIG for unsigned types + // If encoding is Varint or Tagged, treat as varint primitive + if constexpr (::fory::detail::has_field_config_v && + (std::is_same_v || + std::is_same_v)) { + constexpr auto enc = + ::fory::detail::GetFieldConfigEntry::encoding; + if constexpr (enc == Encoding::Varint || enc == Encoding::Tagged) { + return true; // Varint/Tagged encoding + } + } + return std::is_same_v || std::is_same_v || std::is_same_v || @@ -484,6 +560,36 @@ template struct CompileTimeFieldHelpers { using PtrT = std::tuple_element_t; using RawFieldType = meta::RemoveMemberPointerCVRefT; using FieldType = unwrap_field_t; + + // Check for encoding override from FORY_FIELD_CONFIG for unsigned types + if constexpr (::fory::detail::has_field_config_v && + (std::is_same_v || + std::is_same_v)) { + constexpr auto enc = + ::fory::detail::GetFieldConfigEntry::encoding; + if constexpr (enc == Encoding::Varint) { + if constexpr (std::is_same_v) { + return 5; // uint32 varint max + } else { + return 10; // uint64 varint max + } + } else if constexpr (enc == Encoding::Tagged) { + // Tagged encoding: 4 bytes for small, 9 bytes for large + return 9; + } + } + // Check for tagged encoding on signed int64 types + if constexpr (::fory::detail::has_field_config_v && + (std::is_same_v || + std::is_same_v)) { + constexpr auto enc = + ::fory::detail::GetFieldConfigEntry::encoding; + if constexpr (enc == Encoding::Tagged) { + // Tagged encoding: 4 bytes for small, 9 bytes for large + return 9; + } + } + if constexpr (std::is_same_v || std::is_same_v) { return 5; // int32 varint max @@ -653,16 +759,24 @@ template struct CompileTimeFieldHelpers { switch (static_cast(tid)) { case TypeId::BOOL: case TypeId::INT8: + case TypeId::UINT8: return 1; case TypeId::INT16: + case TypeId::UINT16: case TypeId::FLOAT16: return 2; case TypeId::INT32: - case TypeId::VAR32: + case TypeId::VARINT32: + case TypeId::UINT32: + case TypeId::VAR_UINT32: case TypeId::FLOAT32: return 4; case TypeId::INT64: - case TypeId::VAR64: + case TypeId::VARINT64: + case TypeId::TAGGED_INT64: + case TypeId::UINT64: + case TypeId::VAR_UINT64: + case TypeId::TAGGED_UINT64: case TypeId::FLOAT64: return 8; default: @@ -670,11 +784,20 @@ template struct CompileTimeFieldHelpers { } } + /// Check if a type ID represents a compressed (varint/tagged) type. + /// This must match Java's Types.isCompressedType() exactly for consistent + /// field ordering. Java only considers VARINT32, VAR_UINT32, VARINT64, + /// VAR_UINT64, TAGGED_INT64, and TAGGED_UINT64 as compressed. + /// Note: INT32, INT64, UINT32, UINT64 are NOT compressed - they are fixed- + /// size types. Java xlang mode uses compressInt=true which maps int→VARINT32 + /// and long→VARINT64, but the actual INT32/INT64 type IDs are not compressed. static constexpr bool is_compress_id(uint32_t tid) { - return tid == static_cast(TypeId::INT32) || - tid == static_cast(TypeId::INT64) || - tid == static_cast(TypeId::VAR32) || - tid == static_cast(TypeId::VAR64); + return tid == static_cast(TypeId::VARINT32) || + tid == static_cast(TypeId::VARINT64) || + tid == static_cast(TypeId::TAGGED_INT64) || + tid == static_cast(TypeId::VAR_UINT32) || + tid == static_cast(TypeId::VAR_UINT64) || + tid == static_cast(TypeId::TAGGED_UINT64); } /// Check if a type ID is an internal (built-in, final) type for group 2. @@ -739,17 +862,20 @@ template struct CompileTimeFieldHelpers { return sa > sb; if (a_tid != b_tid) return a_tid > b_tid; // type_id descending to match Java - return snake_case_names[a] < snake_case_names[b]; + // Use original Names (not snake_case) to match runtime sorting and Java + return Names[a] < Names[b]; } if (ga == 2) { // Internal types (STRING, etc.): sort by type_id ascending, then name if (a_tid != b_tid) return a_tid < b_tid; - return snake_case_names[a] < snake_case_names[b]; + // Use original Names (not snake_case) to match runtime sorting and Java + return Names[a] < Names[b]; } - return snake_case_names[a] < snake_case_names[b]; + // Use original Names (not snake_case) to match runtime sorting and Java + return Names[a] < Names[b]; } } @@ -828,15 +954,15 @@ template struct CompileTimeFieldHelpers { total += 2; break; case TypeId::INT32: - case TypeId::VAR32: + case TypeId::VARINT32: total += 8; // varint max, but bulk write may write up to 8 bytes break; case TypeId::FLOAT32: total += 4; break; case TypeId::INT64: - case TypeId::VAR64: - case TypeId::H64: + case TypeId::VARINT64: + case TypeId::TAGGED_INT64: total += 10; // varint max break; case TypeId::FLOAT64: @@ -899,14 +1025,14 @@ template struct CompileTimeFieldHelpers { /// Check if a type_id represents a varint primitive (int32/int64 types) /// Per basic_serializer.h, INT32/INT64 use zigzag varint encoding - /// VAR32/VAR64/H64 also use varint encoding + /// VARINT32/VARINT64/TAGGED_INT64 also use varint encoding static constexpr bool is_varint_primitive(uint32_t tid) { switch (static_cast(tid)) { - case TypeId::INT32: // int32_t uses zigzag varint per basic_serializer.h - case TypeId::INT64: // int64_t uses zigzag varint per basic_serializer.h - case TypeId::VAR32: // explicit varint type - case TypeId::VAR64: // explicit varint type - case TypeId::H64: // hybrid int64 encoding + case TypeId::INT32: // int32_t uses zigzag varint per basic_serializer.h + case TypeId::INT64: // int64_t uses zigzag varint per basic_serializer.h + case TypeId::VARINT32: // explicit varint type + case TypeId::VARINT64: // explicit varint type + case TypeId::TAGGED_INT64: // hybrid int64 encoding return true; default: return false; @@ -916,12 +1042,12 @@ template struct CompileTimeFieldHelpers { /// Get the max varint size in bytes for a type_id (0 if not varint) static constexpr size_t max_varint_bytes(uint32_t tid) { switch (static_cast(tid)) { - case TypeId::INT32: // int32_t uses zigzag varint - case TypeId::VAR32: // explicit varint - return 5; // int32 varint max - case TypeId::INT64: // int64_t uses zigzag varint - case TypeId::VAR64: // explicit varint - case TypeId::H64: + case TypeId::INT32: // int32_t uses zigzag varint + case TypeId::VARINT32: // explicit varint + return 5; // int32 varint max + case TypeId::INT64: // int64_t uses zigzag varint + case TypeId::VARINT64: // explicit varint + case TypeId::TAGGED_INT64: return 10; // int64 varint max default: return 0; @@ -1048,28 +1174,44 @@ template struct CompileTimeFieldHelpers { switch (static_cast(tid)) { case TypeId::BOOL: case TypeId::INT8: + case TypeId::UINT8: total += 1; break; case TypeId::INT16: + case TypeId::UINT16: case TypeId::FLOAT16: total += 2; break; case TypeId::INT32: - case TypeId::VAR32: - total += 5; // varint max + case TypeId::VARINT32: + total += 5; // varint max for 32-bit + break; + case TypeId::UINT32: + total += 4; // fixed 4 bytes + break; + case TypeId::VAR_UINT32: + total += 5; // varint max for 32-bit break; case TypeId::FLOAT32: total += 4; break; case TypeId::INT64: - case TypeId::VAR64: - case TypeId::H64: - total += 10; // varint max + case TypeId::VARINT64: + case TypeId::TAGGED_INT64: + total += 10; // varint max for 64-bit + break; + case TypeId::UINT64: + total += 8; // fixed 8 bytes + break; + case TypeId::VAR_UINT64: + case TypeId::TAGGED_UINT64: + total += 10; // varint max for 64-bit break; case TypeId::FLOAT64: total += 8; break; default: + total += 10; // safe default for unknown types break; } } @@ -1158,7 +1300,33 @@ FORY_ALWAYS_INLINE void write_single_varint_field(const T &obj, Buffer &buffer, return obj.*field_ptr; } }(); - offset += put_varint_at(field_value, buffer, offset); + + // Check for tagged encoding on unsigned 64-bit types + if constexpr (::fory::detail::has_field_config_v && + (std::is_same_v || + std::is_same_v)) { + constexpr auto enc = + ::fory::detail::GetFieldConfigEntry::encoding; + if constexpr (enc == Encoding::Tagged) { + // Use tagged writing (not standard varint) + offset += buffer.PutTaggedUint64(offset, field_value); + } else { + offset += put_varint_at(field_value, buffer, offset); + } + } else if constexpr (::fory::detail::has_field_config_v && + (std::is_same_v || + std::is_same_v)) { + constexpr auto enc = + ::fory::detail::GetFieldConfigEntry::encoding; + if constexpr (enc == Encoding::Tagged) { + // Use tagged writing for signed int64 (not standard varint) + offset += buffer.PutTaggedInt64(offset, field_value); + } else { + offset += put_varint_at(field_value, buffer, offset); + } + } else { + offset += put_varint_at(field_value, buffer, offset); + } } /// Fast write consecutive varint primitive fields (int32, int64). @@ -1286,6 +1454,46 @@ void write_single_field(const T &obj, WriteContext &ctx, // For backwards compatibility, also check requires_ref_metadata_v constexpr bool field_requires_ref = requires_ref_metadata_v; + // Special handling for std::optional with encoding config + // This must come BEFORE the general primitive check because optional requires + // ref metadata but we want to use encoding-specific serialization. + constexpr bool is_encoded_optional_uint = + ::fory::detail::has_field_config_v && + (std::is_same_v> || + std::is_same_v>); + + if constexpr (is_encoded_optional_uint) { + constexpr auto enc = + ::fory::detail::GetFieldConfigEntry::encoding; + // Write nullable flag + if (!field_value.has_value()) { + ctx.write_int8(NULL_FLAG); + return; + } + ctx.write_int8(NOT_NULL_VALUE_FLAG); + + // Write the value with encoding-aware writing + using InnerType = typename std::remove_reference_t::value_type; + InnerType value = field_value.value(); + if constexpr (std::is_same_v) { + if constexpr (enc == Encoding::Varint) { + ctx.write_varuint32(value); + } else { + ctx.buffer().WriteInt32(static_cast(value)); + } + } else if constexpr (std::is_same_v) { + if constexpr (enc == Encoding::Varint) { + ctx.write_varuint64(value); + } else if constexpr (enc == Encoding::Tagged) { + ctx.write_tagged_uint64(value); + } else { + // For fixed encoding, cast to int64 since binary representation is same + ctx.buffer().WriteInt64(static_cast(value)); + } + } + return; + } + // Per Rust implementation: primitives are written directly without ref/type if constexpr (is_primitive_field && !field_requires_ref) { Serializer::write_data(field_value, ctx); @@ -1412,6 +1620,69 @@ template <> struct is_raw_primitive : std::true_type {}; template inline constexpr bool is_raw_primitive_v = is_raw_primitive::value; +/// Read a primitive value based on remote type_id (for compatible mode). +/// Returns the value as a uint64_t (or int64_t for signed types). +/// The caller must convert to the correct local type. +template +FORY_ALWAYS_INLINE TargetType read_primitive_by_type_id(ReadContext &ctx, + uint32_t type_id, + Error &error) { + // Read based on remote type_id encoding, then convert to TargetType + switch (static_cast(type_id)) { + case TypeId::BOOL: + return static_cast(ctx.read_uint8(error) != 0); + case TypeId::INT8: + return static_cast(ctx.read_int8(error)); + case TypeId::UINT8: + return static_cast(ctx.read_uint8(error)); + case TypeId::INT16: + return static_cast(ctx.read_int16(error)); + case TypeId::UINT16: + return static_cast( + static_cast(ctx.read_int16(error))); + case TypeId::INT32: + // INT32 uses fixed encoding + return static_cast(ctx.read_int32(error)); + case TypeId::VARINT32: + // VARINT32 uses varint encoding + return static_cast(ctx.read_varint32(error)); + case TypeId::UINT32: + // UINT32 uses fixed 4-byte encoding + return static_cast( + static_cast(ctx.read_int32(error))); + case TypeId::VAR_UINT32: + // VAR_UINT32 uses varint encoding + return static_cast(ctx.read_varuint32(error)); + case TypeId::INT64: + // INT64 uses fixed encoding + return static_cast(ctx.read_int64(error)); + case TypeId::VARINT64: + // VARINT64 uses varint encoding + return static_cast(ctx.read_varint64(error)); + case TypeId::TAGGED_INT64: + // TAGGED_INT64 uses tagged encoding (special hybrid encoding) + return static_cast(ctx.read_tagged_int64(error)); + case TypeId::UINT64: + // UINT64 uses fixed 8-byte encoding + return static_cast( + static_cast(ctx.read_int64(error))); + case TypeId::VAR_UINT64: + // VAR_UINT64 uses varint encoding + return static_cast(ctx.read_varuint64(error)); + case TypeId::TAGGED_UINT64: + // TAGGED_UINT64 uses tagged encoding (special hybrid encoding) + return static_cast(ctx.read_tagged_uint64(error)); + case TypeId::FLOAT32: + return static_cast(ctx.read_float(error)); + case TypeId::FLOAT64: + return static_cast(ctx.read_double(error)); + default: + error = Error::type_error("Unsupported type_id for primitive read: " + + std::to_string(type_id)); + return TargetType{}; + } +} + /// Helper to read a primitive field directly using Error* pattern. /// This bypasses Serializer::read for better performance. /// Returns the read value; sets error on failure. @@ -1517,7 +1788,7 @@ void read_single_field_by_index(T &obj, ReadContext &ctx) { constexpr RefMode field_ref_mode = make_ref_mode(is_nullable || field_requires_ref, track_ref); -#ifdef FORY_DEBUG +#ifdef ENABLE_FORY_DEBUG_OUTPUT const auto debug_names = decltype(field_info)::Names; std::cerr << "[xlang][field] T=" << typeid(T).name() << ", index=" << Index << ", name=" << debug_names[Index] @@ -1533,30 +1804,135 @@ void read_single_field_by_index(T &obj, ReadContext &ctx) { // and use direct buffer reads with Error&. constexpr bool is_raw_prim = is_raw_primitive_v; if constexpr (is_raw_prim && is_primitive_field && !field_requires_ref) { + // Check for encoding override for unsigned types from FORY_FIELD_CONFIG + auto read_value = [&ctx]() -> FieldType { + if constexpr (::fory::detail::has_field_config_v && + (std::is_same_v || + std::is_same_v)) { + constexpr auto enc = + ::fory::detail::GetFieldConfigEntry::encoding; +#ifdef ENABLE_FORY_DEBUG_OUTPUT + std::cerr << "[xlang][encoding] T=" << typeid(T).name() + << ", Index=" << Index << ", enc=" << static_cast(enc) + << ", reader_index=" << ctx.buffer().reader_index() + << std::endl; +#endif + if constexpr (std::is_same_v) { + if constexpr (enc == Encoding::Varint) { + // VAR_UINT32: read as unsigned varint + return ctx.read_varuint32(ctx.error()); + } + // UINT32: fixed 4-byte + return static_cast(ctx.read_int32(ctx.error())); + } else if constexpr (std::is_same_v) { + if constexpr (enc == Encoding::Varint) { + // VAR_UINT64: read as unsigned varint + return ctx.read_varuint64(ctx.error()); + } else if constexpr (enc == Encoding::Tagged) { + // TAGGED_UINT64: read using tagged encoding + return ctx.read_tagged_uint64(ctx.error()); + } + // UINT64: fixed 8-byte + return ctx.read_uint64(ctx.error()); + } + } + // No encoding override, use default type-based reading + return read_primitive_field_direct(ctx, ctx.error()); + }; // Assign to field (handle fory::field<> wrapper if needed) if constexpr (is_fory_field_v) { - (obj.*field_ptr).value = - read_primitive_field_direct(ctx, ctx.error()); + (obj.*field_ptr).value = read_value(); } else { - obj.*field_ptr = read_primitive_field_direct(ctx, ctx.error()); + obj.*field_ptr = read_value(); } } else { - // Assign to field (handle fory::field<> wrapper if needed) - FieldType result = - Serializer::read(ctx, field_ref_mode, read_type); - if constexpr (is_fory_field_v) { - (obj.*field_ptr).value = std::move(result); + // Special handling for std::optional with encoding + // config + constexpr bool is_encoded_optional_uint = + ::fory::detail::has_field_config_v && + (std::is_same_v> || + std::is_same_v>); + + if constexpr (is_encoded_optional_uint) { + constexpr auto enc = + ::fory::detail::GetFieldConfigEntry::encoding; +#ifdef ENABLE_FORY_DEBUG_OUTPUT + std::cerr << "[DEBUG] is_encoded_optional_uint: Index=" << Index + << ", enc=" << static_cast(enc) + << ", reader_index=" << ctx.buffer().reader_index() + << std::endl; +#endif + // Read nullable flag + int8_t flag = ctx.read_int8(ctx.error()); +#ifdef ENABLE_FORY_DEBUG_OUTPUT + std::cerr << "[DEBUG] After read flag: flag=" << static_cast(flag) + << ", reader_index=" << ctx.buffer().reader_index() + << std::endl; +#endif + if (FORY_PREDICT_FALSE(ctx.has_error())) { + return; + } + if (flag == NULL_FLAG) { + if constexpr (is_fory_field_v) { + (obj.*field_ptr).value = std::nullopt; + } else { + obj.*field_ptr = std::nullopt; + } + return; + } + // Read the value with encoding-aware reading + using InnerType = typename std::remove_reference_t::value_type; + InnerType value; + if constexpr (std::is_same_v) { + if constexpr (enc == Encoding::Varint) { + value = ctx.read_varuint32(ctx.error()); + } else { + value = static_cast(ctx.read_int32(ctx.error())); + } + } else if constexpr (std::is_same_v) { +#ifdef ENABLE_FORY_DEBUG_OUTPUT + std::cerr << "[DEBUG] Reading uint64 with enc=" << static_cast(enc) + << ", reader_index=" << ctx.buffer().reader_index() + << std::endl; +#endif + if constexpr (enc == Encoding::Varint) { + value = ctx.read_varuint64(ctx.error()); + } else if constexpr (enc == Encoding::Tagged) { + value = ctx.read_tagged_uint64(ctx.error()); + } else { + value = ctx.read_uint64(ctx.error()); + } +#ifdef ENABLE_FORY_DEBUG_OUTPUT + std::cerr << "[DEBUG] After read uint64: value=" << value + << ", reader_index=" << ctx.buffer().reader_index() + << ", has_error=" << ctx.has_error() << std::endl; +#endif + } + if constexpr (is_fory_field_v) { + (obj.*field_ptr).value = std::optional(value); + } else { + obj.*field_ptr = std::optional(value); + } } else { - obj.*field_ptr = std::move(result); + // Assign to field (handle fory::field<> wrapper if needed) + FieldType result = + Serializer::read(ctx, field_ref_mode, read_type); + if constexpr (is_fory_field_v) { + (obj.*field_ptr).value = std::move(result); + } else { + obj.*field_ptr = std::move(result); + } } } } /// Helper to read a single field by index in compatible mode using /// remote field metadata to decide reference flag presence. +/// @param remote_type_id The type_id from the remote schema (for encoding) template void read_single_field_by_index_compatible(T &obj, ReadContext &ctx, - RefMode remote_ref_mode) { + RefMode remote_ref_mode, + uint32_t remote_type_id) { using Helpers = CompileTimeFieldHelpers; const auto field_info = ForyFieldInfo(obj); const auto field_ptrs = decltype(field_info)::Ptrs; @@ -1592,24 +1968,111 @@ void read_single_field_by_index_compatible(T &obj, ReadContext &ctx, // In compatible mode, trust the remote field metadata (remote_ref_mode) // to tell us whether a ref/null flag was written before the value payload. - // OPTIMIZATION: For raw primitive fields (not wrappers) with no ref flag, - // bypass Serializer::read and use direct buffer reads with Error&. +#ifdef ENABLE_FORY_DEBUG_OUTPUT + const auto debug_names = decltype(field_info)::Names; + std::cerr << "[compatible][read_field] Index=" << Index + << ", name=" << debug_names[Index] + << ", FieldType=" << typeid(FieldType).name() + << ", remote_ref_mode=" << static_cast(remote_ref_mode) + << ", buffer pos=" << ctx.buffer().reader_index() << std::endl; +#endif + + // In compatible mode, handle primitive fields specially to use remote + // encoding. This is critical for schema evolution where encoding differs + // between sender/receiver. constexpr bool is_raw_prim = is_raw_primitive_v; + constexpr bool is_local_optional = is_optional_v; + + // Case 1: Local raw primitive, any remote ref mode + // For primitives, we must use remote_type_id encoding regardless of + // nullability if constexpr (is_raw_prim && is_primitive_field) { if (remote_ref_mode == RefMode::None) { - // Assign to field (handle fory::field<> wrapper if needed) + // Remote is non-nullable, no ref flag + if constexpr (is_fory_field_v) { + (obj.*field_ptr).value = read_primitive_by_type_id( + ctx, remote_type_id, ctx.error()); + } else { + obj.*field_ptr = read_primitive_by_type_id( + ctx, remote_type_id, ctx.error()); + } + return; + } else { + // Remote is nullable, has ref flag + int8_t flag = ctx.read_int8(ctx.error()); + if (FORY_PREDICT_FALSE(ctx.has_error())) { + return; + } + if (flag == NULL_FLAG) { + // Cannot assign null to non-nullable local field + ctx.set_error(Error::invalid( + "Cannot deserialize null value to non-nullable field")); + return; + } + // NOT_NULL_VALUE_FLAG or REF_VALUE_FLAG - read the value if constexpr (is_fory_field_v) { - (obj.*field_ptr).value = - read_primitive_field_direct(ctx, ctx.error()); + (obj.*field_ptr).value = read_primitive_by_type_id( + ctx, remote_type_id, ctx.error()); } else { - obj.*field_ptr = - read_primitive_field_direct(ctx, ctx.error()); + obj.*field_ptr = read_primitive_by_type_id( + ctx, remote_type_id, ctx.error()); } return; } } - // Assign to field (handle fory::field<> wrapper if needed) + // Case 2: Local std::optional

where P is a primitive + // Use remote encoding for the inner primitive value + if constexpr (is_local_optional && is_primitive_field) { + using InnerType = typename FieldType::value_type; + constexpr bool inner_is_raw_prim = is_raw_primitive_v; + + if constexpr (inner_is_raw_prim) { + if (remote_ref_mode == RefMode::None) { + // Remote is non-nullable, no ref flag - read value and wrap in optional + InnerType value = read_primitive_by_type_id( + ctx, remote_type_id, ctx.error()); + if (FORY_PREDICT_FALSE(ctx.has_error())) { + return; + } + if constexpr (is_fory_field_v) { + (obj.*field_ptr).value = std::optional(value); + } else { + obj.*field_ptr = std::optional(value); + } + return; + } else { + // Remote is nullable, has ref flag + int8_t flag = ctx.read_int8(ctx.error()); + if (FORY_PREDICT_FALSE(ctx.has_error())) { + return; + } + if (flag == NULL_FLAG) { + // Null value - set optional to nullopt + if constexpr (is_fory_field_v) { + (obj.*field_ptr).value = std::nullopt; + } else { + obj.*field_ptr = std::nullopt; + } + return; + } + // NOT_NULL_VALUE_FLAG or REF_VALUE_FLAG - read the value + InnerType value = read_primitive_by_type_id( + ctx, remote_type_id, ctx.error()); + if (FORY_PREDICT_FALSE(ctx.has_error())) { + return; + } + if constexpr (is_fory_field_v) { + (obj.*field_ptr).value = std::optional(value); + } else { + obj.*field_ptr = std::optional(value); + } + return; + } + } + } + + // For non-primitive types, use the standard serializer path FieldType result = Serializer::read(ctx, remote_ref_mode, read_type); if constexpr (is_fory_field_v) { @@ -1622,11 +2085,11 @@ void read_single_field_by_index_compatible(T &obj, ReadContext &ctx, /// Helper to dispatch field reading by field_id in compatible mode. /// Uses fold expression with short-circuit to avoid lambda overhead. /// Sets handled=true if field was matched. +/// @param remote_type_id The type_id from the remote schema (for encoding) template -FORY_ALWAYS_INLINE void -dispatch_compatible_field_read_impl(T &obj, ReadContext &ctx, int16_t field_id, - RefMode remote_ref_mode, bool &handled, - std::index_sequence) { +FORY_ALWAYS_INLINE void dispatch_compatible_field_read_impl( + T &obj, ReadContext &ctx, int16_t field_id, RefMode remote_ref_mode, + uint32_t remote_type_id, bool &handled, std::index_sequence) { using Helpers = CompileTimeFieldHelpers; // Short-circuit fold: stops at first match @@ -1634,7 +2097,8 @@ dispatch_compatible_field_read_impl(T &obj, ReadContext &ctx, int16_t field_id, ((static_cast(Indices) == field_id ? (handled = true, read_single_field_by_index_compatible< - Helpers::sorted_indices[Indices]>(obj, ctx, remote_ref_mode), + Helpers::sorted_indices[Indices]>(obj, ctx, remote_ref_mode, + remote_type_id), true) : false) || ...); @@ -1782,6 +2246,19 @@ FORY_ALWAYS_INLINE T read_varint_at(Buffer &buffer, uint32_t &offset) { offset += bytes_read; // Zigzag decode return static_cast((raw >> 1) ^ (~(raw & 1) + 1)); + } else if constexpr (std::is_same_v || + std::is_same_v) { + // Unsigned 32-bit varint (no zigzag) + uint32_t raw = buffer.GetVarUint32(offset, &bytes_read); + offset += bytes_read; + return raw; + } else if constexpr (std::is_same_v || + std::is_same_v) { + // Unsigned 64-bit varint (no zigzag) - used for VAR_UINT64 and + // TAGGED_UINT64 + uint64_t raw = buffer.GetVarUint64(offset, &bytes_read); + offset += bytes_read; + return raw; } else { static_assert(sizeof(T) == 0, "Unsupported varint type"); return T{}; @@ -1790,6 +2267,7 @@ FORY_ALWAYS_INLINE T read_varint_at(Buffer &buffer, uint32_t &offset) { /// Helper to read a single varint primitive field. /// No lambda overhead - direct function call that will be inlined. +/// Handles both standard varint and tagged encoding based on field config. template FORY_ALWAYS_INLINE void read_single_varint_field(T &obj, Buffer &buffer, uint32_t &offset) { @@ -1800,7 +2278,40 @@ FORY_ALWAYS_INLINE void read_single_varint_field(T &obj, Buffer &buffer, using RawFieldType = typename meta::RemoveMemberPointerCVRefT; using FieldType = unwrap_field_t; - FieldType result = read_varint_at(buffer, offset); + + FieldType result; + + // Check for tagged encoding on unsigned 64-bit types + if constexpr (::fory::detail::has_field_config_v && + (std::is_same_v || + std::is_same_v)) { + constexpr auto enc = + ::fory::detail::GetFieldConfigEntry::encoding; + if constexpr (enc == Encoding::Tagged) { + // Use tagged reading (not standard varint) + uint32_t bytes_read; + result = buffer.GetTaggedUint64(offset, &bytes_read); + offset += bytes_read; + } else { + result = read_varint_at(buffer, offset); + } + } else if constexpr (::fory::detail::has_field_config_v && + (std::is_same_v || + std::is_same_v)) { + constexpr auto enc = + ::fory::detail::GetFieldConfigEntry::encoding; + if constexpr (enc == Encoding::Tagged) { + // Use tagged reading for signed int64 (not standard varint) + uint32_t bytes_read; + result = buffer.GetTaggedInt64(offset, &bytes_read); + offset += bytes_read; + } else { + result = read_varint_at(buffer, offset); + } + } else { + result = read_varint_at(buffer, offset); + } + // Assign to field (handle fory::field<> wrapper if needed) if constexpr (is_fory_field_v) { (obj.*field_ptr).value = result; @@ -1902,6 +2413,10 @@ void read_struct_fields_compatible(T &obj, ReadContext &ctx, std::index_sequence) { const auto &remote_fields = remote_type_meta->get_field_infos(); + std::cerr << "[compatible] Starting to read " << remote_fields.size() + << " remote fields, buffer pos=" << ctx.buffer().reader_index() + << std::endl; + // Iterate through remote fields in their serialization order for (size_t remote_idx = 0; remote_idx < remote_fields.size(); ++remote_idx) { const auto &remote_field = remote_fields[remote_idx]; @@ -1912,6 +2427,14 @@ void read_struct_fields_compatible(T &obj, ReadContext &ctx, // field's header during FieldInfo::from_bytes. RefMode remote_ref_mode = remote_field.field_type.ref_mode; + std::cerr << "[compatible] remote_idx=" << remote_idx + << ", field=" << remote_field.field_name + << ", type_id=" << remote_field.field_type.type_id + << ", nullable=" << remote_field.field_type.nullable + << ", ref_mode=" << static_cast(remote_ref_mode) + << ", field_id=" << field_id + << ", buffer pos=" << ctx.buffer().reader_index() << std::endl; + if (field_id == -1) { // Field unknown locally — skip its value skip_field_value(ctx, remote_field.field_type, remote_ref_mode); @@ -1923,10 +2446,11 @@ void read_struct_fields_compatible(T &obj, ReadContext &ctx, // Dispatch to the correct local field by field_id // Uses fold expression with short-circuit - no lambda overhead + // Pass remote type_id for correct encoding in compatible mode bool handled = false; - dispatch_compatible_field_read_impl(obj, ctx, field_id, remote_ref_mode, - handled, - std::index_sequence{}); + dispatch_compatible_field_read_impl( + obj, ctx, field_id, remote_ref_mode, remote_field.field_type.type_id, + handled, std::index_sequence{}); if (!handled) { // Shouldn't happen if TypeMeta::assign_field_ids worked correctly @@ -2022,6 +2546,8 @@ struct Serializer>> { } static void write_data(const T &obj, WriteContext &ctx) { + // Only write struct version hash when check_struct_version is enabled, + // matching Java's behavior in ObjectSerializer.write(). if (ctx.check_struct_version()) { auto type_info_res = ctx.type_resolver().template get_type_info(); if (FORY_PREDICT_FALSE(!type_info_res.ok())) { @@ -2047,6 +2573,8 @@ struct Serializer>> { static void write_data_generic(const T &obj, WriteContext &ctx, bool has_generics) { + // Only write struct version hash when check_struct_version is enabled, + // matching Java's behavior in ObjectSerializer.write(). if (ctx.check_struct_version()) { auto type_info_res = ctx.type_resolver().template get_type_info(); if (FORY_PREDICT_FALSE(!type_info_res.ok())) { @@ -2078,7 +2606,7 @@ struct Serializer>> { if (FORY_PREDICT_FALSE(ctx.has_error())) { return T{}; } -#ifdef FORY_DEBUG +#ifdef ENABLE_FORY_DEBUG_OUTPUT std::cerr << "[xlang][struct] T=" << typeid(T).name() << ", read_ref_flag=" << static_cast(ref_flag) << ", reader_index=" << ctx.buffer().reader_index() @@ -2227,6 +2755,8 @@ struct Serializer>> { } static T read_compatible(ReadContext &ctx, const TypeInfo *remote_type_info) { + std::cerr << "[read_compatible] Entering for type " << typeid(T).name() + << ", buffer_pos=" << ctx.buffer().reader_index() << std::endl; // Read and verify struct version if enabled (matches write_data behavior) if (ctx.check_struct_version()) { int32_t read_version = ctx.buffer().ReadInt32(ctx.error()); @@ -2277,6 +2807,8 @@ struct Serializer>> { } static T read_data(ReadContext &ctx) { + // Only read struct version hash when check_struct_version is enabled, + // matching Java's behavior in ObjectSerializer.read(). if (ctx.check_struct_version()) { int32_t read_version = ctx.buffer().ReadInt32(ctx.error()); if (FORY_PREDICT_FALSE(ctx.has_error())) { diff --git a/cpp/fory/serialization/type_resolver.cc b/cpp/fory/serialization/type_resolver.cc index 7e1e2ed7b0..3ddd99b0bd 100644 --- a/cpp/fory/serialization/type_resolver.cc +++ b/cpp/fory/serialization/type_resolver.cc @@ -443,8 +443,9 @@ TypeMeta::from_bytes(Buffer &buffer, const TypeMeta *local_type_info) { field_infos.push_back(std::move(field)); } - // Sort fields according to xlang spec - field_infos = sort_field_infos(std::move(field_infos)); + // NOTE: Do NOT sort remote fields! They are already in the sender's sorted + // order, which matches the data order. Re-sorting would cause misalignment + // with the serialized data. // Assign field IDs by comparing with local type if (local_type_info != nullptr) { @@ -539,8 +540,8 @@ TypeMeta::from_bytes_with_header(Buffer &buffer, int64_t header) { field_infos.push_back(std::move(field)); } - // Sort fields according to xlang spec - field_infos = sort_field_infos(std::move(field_infos)); + // NOTE: Do NOT sort remote fields! They are already in the sender's sorted + // order, which matches the data order. // CRITICAL FIX: Ensure we consume exactly meta_size bytes size_t current_pos = buffer.reader_index(); @@ -606,16 +607,24 @@ int32_t get_primitive_type_size(uint32_t type_id) { switch (static_cast(type_id)) { case TypeId::BOOL: case TypeId::INT8: + case TypeId::UINT8: return 1; case TypeId::INT16: + case TypeId::UINT16: case TypeId::FLOAT16: return 2; case TypeId::INT32: - case TypeId::VAR32: + case TypeId::VARINT32: + case TypeId::UINT32: + case TypeId::VAR_UINT32: case TypeId::FLOAT32: return 4; case TypeId::INT64: - case TypeId::VAR64: + case TypeId::VARINT64: + case TypeId::TAGGED_INT64: + case TypeId::UINT64: + case TypeId::VAR_UINT64: + case TypeId::TAGGED_UINT64: case TypeId::FLOAT64: return 8; default: @@ -623,11 +632,18 @@ int32_t get_primitive_type_size(uint32_t type_id) { } } +/// Check if a type ID represents a compressed (varint/tagged) type. +/// This must match Java's Types.isCompressedType() exactly for consistent +/// field ordering. Java only considers VARINT32, VAR_UINT32, VARINT64, +/// VAR_UINT64, TAGGED_INT64, and TAGGED_UINT64 as compressed. +/// Note: INT32, INT64, UINT32, UINT64 are NOT compressed - they are fixed-size. bool is_compress(uint32_t type_id) { - return type_id == static_cast(TypeId::INT32) || - type_id == static_cast(TypeId::INT64) || - type_id == static_cast(TypeId::VAR32) || - type_id == static_cast(TypeId::VAR64); + return type_id == static_cast(TypeId::VARINT32) || + type_id == static_cast(TypeId::VARINT64) || + type_id == static_cast(TypeId::TAGGED_INT64) || + type_id == static_cast(TypeId::VAR_UINT32) || + type_id == static_cast(TypeId::VAR_UINT64) || + type_id == static_cast(TypeId::TAGGED_UINT64); } // Numeric field sorter (for primitive fields) @@ -1250,7 +1266,18 @@ void TypeResolver::register_builtin_types() { register_type_id_only(TypeId::INT8); register_type_id_only(TypeId::INT16); register_type_id_only(TypeId::INT32); + register_type_id_only(TypeId::VARINT32); register_type_id_only(TypeId::INT64); + register_type_id_only(TypeId::VARINT64); + register_type_id_only(TypeId::TAGGED_INT64); + register_type_id_only(TypeId::UINT8); + register_type_id_only(TypeId::UINT16); + register_type_id_only(TypeId::UINT32); + register_type_id_only(TypeId::VAR_UINT32); + register_type_id_only(TypeId::UINT64); + register_type_id_only(TypeId::VAR_UINT64); + register_type_id_only(TypeId::TAGGED_UINT64); + register_type_id_only(TypeId::FLOAT16); register_type_id_only(TypeId::FLOAT32); register_type_id_only(TypeId::FLOAT64); register_type_id_only(TypeId::STRING); @@ -1261,6 +1288,10 @@ void TypeResolver::register_builtin_types() { register_type_id_only(TypeId::INT16_ARRAY); register_type_id_only(TypeId::INT32_ARRAY); register_type_id_only(TypeId::INT64_ARRAY); + register_type_id_only(TypeId::UINT8_ARRAY); + register_type_id_only(TypeId::UINT16_ARRAY); + register_type_id_only(TypeId::UINT32_ARRAY); + register_type_id_only(TypeId::UINT64_ARRAY); register_type_id_only(TypeId::FLOAT16_ARRAY); register_type_id_only(TypeId::FLOAT32_ARRAY); register_type_id_only(TypeId::FLOAT64_ARRAY); @@ -1277,6 +1308,8 @@ void TypeResolver::register_builtin_types() { register_type_id_only(TypeId::EXT); // Other internal types + register_type_id_only(TypeId::UNION); + register_type_id_only(TypeId::NONE); register_type_id_only(TypeId::DURATION); register_type_id_only(TypeId::TIMESTAMP); register_type_id_only(TypeId::LOCAL_DATE); diff --git a/cpp/fory/serialization/type_resolver.h b/cpp/fory/serialization/type_resolver.h index 896074dd5f..1ef0ffca07 100644 --- a/cpp/fory/serialization/type_resolver.h +++ b/cpp/fory/serialization/type_resolver.h @@ -57,6 +57,7 @@ #include "fory/util/flat_int_map.h" #include "fory/util/logging.h" #include "fory/util/result.h" +#include "fory/util/string_util.h" namespace fory { namespace serialization { @@ -509,13 +510,73 @@ constexpr bool compute_track_ref() { } } +// Helper to check if a type is unsigned integer +template struct is_unsigned_integer : std::false_type {}; +template <> struct is_unsigned_integer : std::true_type {}; +template <> struct is_unsigned_integer : std::true_type {}; +template <> struct is_unsigned_integer : std::true_type {}; +template <> struct is_unsigned_integer : std::true_type {}; +template +inline constexpr bool is_unsigned_integer_v = is_unsigned_integer::value; + +// Helper to get inner type of optional, or the type itself +template struct unwrap_optional_inner { + using type = T; +}; +template +struct unwrap_optional_inner>>> { + using type = typename decay_t::value_type; +}; +template +using unwrap_optional_inner_t = typename unwrap_optional_inner::type; + +// Helper to compute the correct type_id for unsigned types based on encoding +template +constexpr uint32_t compute_unsigned_type_id() { + // For unsigned types, check if FORY_FIELD_CONFIG specifies an encoding + if constexpr (::fory::detail::has_field_config_v) { + constexpr auto enc = + ::fory::detail::GetFieldConfigEntry::encoding; + // Handle inner type for std::optional + using InnerType = unwrap_optional_inner_t; + if constexpr (std::is_same_v) { + return static_cast(TypeId::UINT8); + } else if constexpr (std::is_same_v) { + return static_cast(TypeId::UINT16); + } else if constexpr (std::is_same_v) { + if constexpr (enc == Encoding::Varint) { + return static_cast(TypeId::VAR_UINT32); + } else { + return static_cast(TypeId::UINT32); + } + } else if constexpr (std::is_same_v) { + if constexpr (enc == Encoding::Varint) { + return static_cast(TypeId::VAR_UINT64); + } else if constexpr (enc == Encoding::Tagged) { + return static_cast(TypeId::TAGGED_UINT64); + } else { + return static_cast(TypeId::UINT64); + } + } + } + // Not an unsigned type with field config, use default + return 0; +} + template struct FieldInfoBuilder { static FieldInfo build() { const auto meta = ForyFieldInfo(T{}); const auto field_names = decltype(meta)::Names; const auto field_ptrs = decltype(meta)::Ptrs; - std::string field_name(field_names[Index]); + // Convert camelCase field name to snake_case for cross-language + // compatibility + std::string_view original_name = field_names[Index]; + constexpr size_t max_snake_len = 128; // Reasonable max for field names + auto [snake_buffer, snake_len] = + ::fory::to_snake_case(original_name); + std::string field_name(snake_buffer.data(), snake_len); + const auto field_ptr = std::get(field_ptrs); using RawFieldType = typename meta::RemoveMemberPointerCVRefT; @@ -531,6 +592,16 @@ template struct FieldInfoBuilder { constexpr bool track_ref = compute_track_ref(); FieldType field_type = FieldTypeBuilder::build(false); + + // Override type_id for unsigned types based on encoding from + // FORY_FIELD_CONFIG + using InnerType = unwrap_optional_inner_t; + constexpr uint32_t unsigned_tid = + compute_unsigned_type_id(); + if constexpr (unsigned_tid != 0 && is_unsigned_integer_v) { + field_type.type_id = unsigned_tid; + } + // Override nullable and ref_tracking from field-level metadata field_type.nullable = is_nullable; field_type.ref_tracking = track_ref; @@ -538,7 +609,8 @@ template struct FieldInfoBuilder { #ifdef FORY_DEBUG // DEBUG: Print field info for debugging fingerprint mismatch std::cerr << "[xlang][debug] FieldInfoBuilder T=" << typeid(T).name() - << " Index=" << Index << " field=" << field_name << " has_tags=" + << " Index=" << Index << " field=" << field_name + << " type_id=" << field_type.type_id << " has_tags=" << ::fory::detail::has_field_tags_v << " is_nullable=" << is_nullable << " track_ref=" << track_ref << std::endl; #endif @@ -976,7 +1048,13 @@ TypeResolver::build_struct_type_info(uint32_t type_id, std::string ns, entry->name_to_index.reserve(field_count); for (size_t i = 0; i < field_count; ++i) { - entry->name_to_index.emplace(std::string(field_names[i]), i); + // Convert camelCase field name to snake_case for cross-language + // compatibility + constexpr size_t max_snake_len = 128; + auto [snake_buffer, snake_len] = + ::fory::to_snake_case(field_names[i]); + entry->name_to_index.emplace(std::string(snake_buffer.data(), snake_len), + i); } auto field_infos = diff --git a/cpp/fory/serialization/unsigned_serializer.h b/cpp/fory/serialization/unsigned_serializer.h index fd1ca11b0b..1079afc689 100644 --- a/cpp/fory/serialization/unsigned_serializer.h +++ b/cpp/fory/serialization/unsigned_serializer.h @@ -165,9 +165,9 @@ template <> struct Serializer { } }; -/// uint32_t serializer (native mode only) +/// uint32_t serializer - uses VAR_UINT32 to match Rust xlang mode template <> struct Serializer { - static constexpr TypeId type_id = TypeId::UINT32; + static constexpr TypeId type_id = TypeId::VAR_UINT32; static inline void write_type_info(WriteContext &ctx) { ctx.write_varuint32(static_cast(type_id)); @@ -230,9 +230,9 @@ template <> struct Serializer { } }; -/// uint64_t serializer (native mode only) +/// uint64_t serializer - uses VAR_UINT64 to match Rust xlang mode template <> struct Serializer { - static constexpr TypeId type_id = TypeId::UINT64; + static constexpr TypeId type_id = TypeId::VAR_UINT64; static inline void write_type_info(WriteContext &ctx) { ctx.write_varuint32(static_cast(type_id)); diff --git a/cpp/fory/serialization/unsigned_serializer_test.cc b/cpp/fory/serialization/unsigned_serializer_test.cc index 4680be0cf3..0bf7fe8c58 100644 --- a/cpp/fory/serialization/unsigned_serializer_test.cc +++ b/cpp/fory/serialization/unsigned_serializer_test.cc @@ -246,16 +246,18 @@ TEST(UnsignedSerializerTest, BoundaryValues) { // ============================================================================ TEST(UnsignedSerializerTest, UnsignedTypeIdsAreDistinct) { - // Verify that unsigned types use distinct TypeIds (UINT8, UINT16, UINT32, - // UINT64) + // Verify that unsigned types use distinct TypeIds + // uint8_t and uint16_t use fixed encoding (UINT8, UINT16) + // uint32_t and uint64_t use variable encoding (VAR_UINT32, VAR_UINT64) to + // match Rust xlang mode EXPECT_EQ(static_cast(Serializer::type_id), static_cast(TypeId::UINT8)); EXPECT_EQ(static_cast(Serializer::type_id), static_cast(TypeId::UINT16)); EXPECT_EQ(static_cast(Serializer::type_id), - static_cast(TypeId::UINT32)); + static_cast(TypeId::VAR_UINT32)); EXPECT_EQ(static_cast(Serializer::type_id), - static_cast(TypeId::UINT64)); + static_cast(TypeId::VAR_UINT64)); } TEST(UnsignedSerializerTest, UnsignedArrayTypeIdsAreDistinct) { diff --git a/cpp/fory/serialization/xlang_test_main.cc b/cpp/fory/serialization/xlang_test_main.cc index d0558f0722..73f42f9bae 100644 --- a/cpp/fory/serialization/xlang_test_main.cc +++ b/cpp/fory/serialization/xlang_test_main.cc @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -580,6 +581,145 @@ struct CircularRefStruct { FORY_STRUCT(CircularRefStruct, name, selfRef); FORY_FIELD_TAGS(CircularRefStruct, (name, 0), (selfRef, 1, nullable, ref)); +// ============================================================================ +// Unsigned Number Test Types +// ============================================================================ + +// UnsignedSchemaConsistentSimple (type id 1) +// A simple test struct for unsigned numbers with tagged encoding. +struct UnsignedSchemaConsistentSimple { + uint64_t u64Tagged; // TAGGED_UINT64 + std::optional u64TaggedNullable; // TAGGED_UINT64, nullable + + bool operator==(const UnsignedSchemaConsistentSimple &other) const { + return u64Tagged == other.u64Tagged && + u64TaggedNullable == other.u64TaggedNullable; + } +}; +FORY_STRUCT(UnsignedSchemaConsistentSimple, u64Tagged, u64TaggedNullable); +FORY_FIELD_CONFIG(UnsignedSchemaConsistentSimple, + (u64Tagged, fory::F(0).tagged()), + (u64TaggedNullable, fory::F(1).nullable().tagged())); + +// UnsignedSchemaConsistent (type id 501) +// Test struct for unsigned numbers in SCHEMA_CONSISTENT mode. +// All fields use the same nullability as Java. +// Note: C++ uses std::optional for nullable fields. +struct UnsignedSchemaConsistent { + // Primitive unsigned fields (non-nullable) + uint8_t u8Field; + uint16_t u16Field; + uint32_t u32VarField; // VAR_UINT32 - variable-length + uint32_t u32FixedField; // UINT32 - fixed 4-byte + uint64_t u64VarField; // VAR_UINT64 - variable-length + uint64_t u64FixedField; // UINT64 - fixed 8-byte + uint64_t u64TaggedField; // TAGGED_UINT64 + + // Nullable unsigned fields (using std::optional) + std::optional u8NullableField; + std::optional u16NullableField; + std::optional u32VarNullableField; + std::optional u32FixedNullableField; + std::optional u64VarNullableField; + std::optional u64FixedNullableField; + std::optional u64TaggedNullableField; + + bool operator==(const UnsignedSchemaConsistent &other) const { + return u8Field == other.u8Field && u16Field == other.u16Field && + u32VarField == other.u32VarField && + u32FixedField == other.u32FixedField && + u64VarField == other.u64VarField && + u64FixedField == other.u64FixedField && + u64TaggedField == other.u64TaggedField && + u8NullableField == other.u8NullableField && + u16NullableField == other.u16NullableField && + u32VarNullableField == other.u32VarNullableField && + u32FixedNullableField == other.u32FixedNullableField && + u64VarNullableField == other.u64VarNullableField && + u64FixedNullableField == other.u64FixedNullableField && + u64TaggedNullableField == other.u64TaggedNullableField; + } +}; +FORY_STRUCT(UnsignedSchemaConsistent, u8Field, u16Field, u32VarField, + u32FixedField, u64VarField, u64FixedField, u64TaggedField, + u8NullableField, u16NullableField, u32VarNullableField, + u32FixedNullableField, u64VarNullableField, u64FixedNullableField, + u64TaggedNullableField); +// Use new FORY_FIELD_CONFIG with builder pattern for encoding specification +FORY_FIELD_CONFIG(UnsignedSchemaConsistent, (u8Field, fory::F(0)), + (u16Field, fory::F(1)), (u32VarField, fory::F(2).varint()), + (u32FixedField, fory::F(3).fixed()), + (u64VarField, fory::F(4).varint()), + (u64FixedField, fory::F(5).fixed()), + (u64TaggedField, fory::F(6).tagged()), + (u8NullableField, fory::F(7).nullable()), + (u16NullableField, fory::F(8).nullable()), + (u32VarNullableField, fory::F(9).nullable().varint()), + (u32FixedNullableField, fory::F(10).nullable().fixed()), + (u64VarNullableField, fory::F(11).nullable().varint()), + (u64FixedNullableField, fory::F(12).nullable().fixed()), + (u64TaggedNullableField, fory::F(13).nullable().tagged())); + +// UnsignedSchemaCompatible (type id 502) +// Test struct for unsigned numbers in COMPATIBLE mode. +// Group 1: std::optional types (nullable in C++, non-nullable in Java) +// Group 2: Non-optional types with Field2 suffix (non-nullable in C++, nullable +// in Java) +struct UnsignedSchemaCompatible { + // Group 1: Nullable in C++ (std::optional), non-nullable in Java + std::optional u8Field1; + std::optional u16Field1; + std::optional u32VarField1; + std::optional u32FixedField1; + std::optional u64VarField1; + std::optional u64FixedField1; + std::optional u64TaggedField1; + + // Group 2: Non-nullable in C++, nullable in Java + uint8_t u8Field2; + uint16_t u16Field2; + uint32_t u32VarField2; + uint32_t u32FixedField2; + uint64_t u64VarField2; + uint64_t u64FixedField2; + uint64_t u64TaggedField2; + + bool operator==(const UnsignedSchemaCompatible &other) const { + return u8Field1 == other.u8Field1 && u16Field1 == other.u16Field1 && + u32VarField1 == other.u32VarField1 && + u32FixedField1 == other.u32FixedField1 && + u64VarField1 == other.u64VarField1 && + u64FixedField1 == other.u64FixedField1 && + u64TaggedField1 == other.u64TaggedField1 && + u8Field2 == other.u8Field2 && u16Field2 == other.u16Field2 && + u32VarField2 == other.u32VarField2 && + u32FixedField2 == other.u32FixedField2 && + u64VarField2 == other.u64VarField2 && + u64FixedField2 == other.u64FixedField2 && + u64TaggedField2 == other.u64TaggedField2; + } +}; +FORY_STRUCT(UnsignedSchemaCompatible, u8Field1, u16Field1, u32VarField1, + u32FixedField1, u64VarField1, u64FixedField1, u64TaggedField1, + u8Field2, u16Field2, u32VarField2, u32FixedField2, u64VarField2, + u64FixedField2, u64TaggedField2); +// Use new FORY_FIELD_CONFIG with builder pattern for encoding specification +// Group 1: nullable in C++ (std::optional), non-nullable in Java +// Group 2: non-nullable in C++, nullable in Java +FORY_FIELD_CONFIG(UnsignedSchemaCompatible, (u8Field1, fory::F(0).nullable()), + (u16Field1, fory::F(1).nullable()), + (u32VarField1, fory::F(2).nullable().varint()), + (u32FixedField1, fory::F(3).nullable().fixed()), + (u64VarField1, fory::F(4).nullable().varint()), + (u64FixedField1, fory::F(5).nullable().fixed()), + (u64TaggedField1, fory::F(6).nullable().tagged()), + (u8Field2, fory::F(7)), (u16Field2, fory::F(8)), + (u32VarField2, fory::F(9).varint()), + (u32FixedField2, fory::F(10).fixed()), + (u64VarField2, fory::F(11).varint()), + (u64FixedField2, fory::F(12).fixed()), + (u64TaggedField2, fory::F(13).tagged())); + namespace fory { namespace serialization { @@ -706,10 +846,16 @@ void AppendSerialized(Fory &fory, const T &value, std::vector &out) { Fory BuildFory(bool compatible = true, bool xlang = true, bool check_struct_version = false, bool track_ref = false) { + // In Java xlang mode, checkClassVersion is automatically set to true for + // SCHEMA_CONSISTENT mode (compatible=false). Match this behavior in C++. + bool actual_check_version = check_struct_version; + if (xlang && !compatible) { + actual_check_version = true; + } return Fory::builder() .compatible(compatible) .xlang(xlang) - .check_struct_version(check_struct_version) + .check_struct_version(actual_check_version) .track_ref(track_ref) .build(); } @@ -760,6 +906,9 @@ void RunTestRefSchemaConsistent(const std::string &data_file); void RunTestRefCompatible(const std::string &data_file); void RunTestCircularRefSchemaConsistent(const std::string &data_file); void RunTestCircularRefCompatible(const std::string &data_file); +void RunTestUnsignedSchemaConsistentSimple(const std::string &data_file); +void RunTestUnsignedSchemaConsistent(const std::string &data_file); +void RunTestUnsignedSchemaCompatible(const std::string &data_file); } // namespace int main(int argc, char **argv) { @@ -859,6 +1008,12 @@ int main(int argc, char **argv) { RunTestCircularRefSchemaConsistent(data_file); } else if (case_name == "test_circular_ref_compatible") { RunTestCircularRefCompatible(data_file); + } else if (case_name == "test_unsigned_schema_consistent_simple") { + RunTestUnsignedSchemaConsistentSimple(data_file); + } else if (case_name == "test_unsigned_schema_consistent") { + RunTestUnsignedSchemaConsistent(data_file); + } else if (case_name == "test_unsigned_schema_compatible") { + RunTestUnsignedSchemaCompatible(data_file); } else { Fail("Unknown test case: " + case_name); } @@ -2057,26 +2212,6 @@ void RunTestNullableFieldSchemaConsistentNotNull(const std::string &data_file) { EnsureOk(fory.register_struct(401), "register NullableComprehensiveSchemaConsistent"); - // Debug: Print sorted field order - { - const char *debug_env = std::getenv("ENABLE_FORY_DEBUG_OUTPUT"); - if (debug_env && std::string(debug_env) == "1") { - using Helpers = fory::serialization::detail::CompileTimeFieldHelpers< - NullableComprehensiveSchemaConsistent>; - std::cerr << "[C++][fory-debug] NullableComprehensiveSchemaConsistent " - "sorted field order:\n"; - for (size_t i = 0; i < Helpers::FieldCount; ++i) { - size_t orig_idx = Helpers::sorted_indices[i]; - std::cerr << " [" << i << "] orig_idx=" << orig_idx - << " name=" << Helpers::sorted_field_names[i] - << " type_id=" << Helpers::type_ids[orig_idx] - << " nullable=" << Helpers::nullable_flags[orig_idx] - << " group=" << Helpers::group_rank(orig_idx) << "\n"; - } - std::cerr << std::endl; - } - } - NullableComprehensiveSchemaConsistent expected; // Base non-nullable primitive fields expected.byte_field = 1; @@ -2453,4 +2588,224 @@ void RunTestCircularRefCompatible(const std::string &data_file) { WriteFile(data_file, out); } +// ============================================================================ +// Unsigned Number Tests +// ============================================================================ + +void RunTestUnsignedSchemaConsistentSimple(const std::string &data_file) { + auto bytes = ReadFile(data_file); + std::cerr << "[DEBUG] test_unsigned_schema_consistent_simple: read " + << bytes.size() << " bytes from " << data_file << std::endl; + // Print first 32 bytes as hex + std::cerr << "[DEBUG] First bytes: "; + for (size_t i = 0; i < std::min(bytes.size(), size_t(32)); ++i) { + std::cerr << std::hex << std::setw(2) << std::setfill('0') + << static_cast(bytes[i]) << " "; + } + std::cerr << std::dec << std::endl; + + // SCHEMA_CONSISTENT mode: compatible=false, xlang=true + auto fory = BuildFory(false, true, false, false); + EnsureOk(fory.register_struct(1), + "register UnsignedSchemaConsistentSimple"); + + Buffer buffer = MakeBuffer(bytes); + auto obj = ReadNext(fory, buffer); + std::cerr << "[DEBUG] Deserialized: u64Tagged=" << obj.u64Tagged + << ", u64TaggedNullable=" + << (obj.u64TaggedNullable.has_value() + ? std::to_string(obj.u64TaggedNullable.value()) + : "null") + << std::endl; + + // Verify fields + if (obj.u64Tagged != 1000000000) { + Fail( + "UnsignedSchemaConsistentSimple: u64Tagged should be 1000000000, got " + + std::to_string(obj.u64Tagged)); + } + if (!obj.u64TaggedNullable.has_value() || + obj.u64TaggedNullable.value() != 500000000) { + Fail("UnsignedSchemaConsistentSimple: u64TaggedNullable should be " + "500000000"); + } + + // Re-serialize and write back + std::vector out; + AppendSerialized(fory, obj, out); + WriteFile(data_file, out); +} + +void RunTestUnsignedSchemaConsistent(const std::string &data_file) { + auto bytes = ReadFile(data_file); + // SCHEMA_CONSISTENT mode: compatible=false, xlang=true + auto fory = BuildFory(false, true, false, false); + EnsureOk(fory.register_struct(501), + "register UnsignedSchemaConsistent"); + + Buffer buffer = MakeBuffer(bytes); + auto obj = ReadNext(fory, buffer); + + // Verify primitive unsigned fields + if (obj.u8Field != 200) { + Fail("UnsignedSchemaConsistent: u8Field should be 200, got " + + std::to_string(obj.u8Field)); + } + if (obj.u16Field != 60000) { + Fail("UnsignedSchemaConsistent: u16Field should be 60000, got " + + std::to_string(obj.u16Field)); + } + if (obj.u32VarField != 3000000000) { + Fail("UnsignedSchemaConsistent: u32VarField should be 3000000000, got " + + std::to_string(obj.u32VarField)); + } + if (obj.u32FixedField != 4000000000) { + Fail("UnsignedSchemaConsistent: u32FixedField should be 4000000000, got " + + std::to_string(obj.u32FixedField)); + } + if (obj.u64VarField != 10000000000) { + Fail("UnsignedSchemaConsistent: u64VarField should be 10000000000, got " + + std::to_string(obj.u64VarField)); + } + if (obj.u64FixedField != 15000000000) { + Fail("UnsignedSchemaConsistent: u64FixedField should be 15000000000, got " + + std::to_string(obj.u64FixedField)); + } + if (obj.u64TaggedField != 1000000000) { + Fail("UnsignedSchemaConsistent: u64TaggedField should be 1000000000, got " + + std::to_string(obj.u64TaggedField)); + } + + // Verify nullable unsigned fields + if (!obj.u8NullableField.has_value() || obj.u8NullableField.value() != 128) { + Fail("UnsignedSchemaConsistent: u8NullableField should be 128"); + } + if (!obj.u16NullableField.has_value() || + obj.u16NullableField.value() != 40000) { + Fail("UnsignedSchemaConsistent: u16NullableField should be 40000"); + } + if (!obj.u32VarNullableField.has_value() || + obj.u32VarNullableField.value() != 2500000000) { + Fail("UnsignedSchemaConsistent: u32VarNullableField should be 2500000000"); + } + if (!obj.u32FixedNullableField.has_value() || + obj.u32FixedNullableField.value() != 3500000000) { + Fail( + "UnsignedSchemaConsistent: u32FixedNullableField should be 3500000000"); + } + if (!obj.u64VarNullableField.has_value() || + obj.u64VarNullableField.value() != 8000000000) { + Fail("UnsignedSchemaConsistent: u64VarNullableField should be 8000000000"); + } + if (!obj.u64FixedNullableField.has_value() || + obj.u64FixedNullableField.value() != 12000000000) { + Fail("UnsignedSchemaConsistent: u64FixedNullableField should be " + "12000000000"); + } + if (!obj.u64TaggedNullableField.has_value() || + obj.u64TaggedNullableField.value() != 500000000) { + Fail( + "UnsignedSchemaConsistent: u64TaggedNullableField should be 500000000"); + } + + // Debug: print field values before re-serialization + std::cerr << "[DEBUG] Before re-serialization:\n"; + std::cerr << " u8Field=" << static_cast(obj.u8Field) + << " u16Field=" << obj.u16Field + << " u32VarField=" << obj.u32VarField + << " u32FixedField=" << obj.u32FixedField << "\n"; + std::cerr << " u64VarField=" << obj.u64VarField + << " u64FixedField=" << obj.u64FixedField + << " u64TaggedField=" << obj.u64TaggedField << "\n"; + + // Re-serialize and write back + std::vector out; + AppendSerialized(fory, obj, out); + + // Debug: print output bytes for inspection + std::cerr << "[DEBUG] Serialized " << out.size() << " bytes:\n"; + std::cerr << "[DEBUG] Hex: "; + for (size_t i = 0; i < std::min(out.size(), size_t(80)); ++i) { + std::cerr << std::hex << std::setw(2) << std::setfill('0') + << static_cast(out[i]); + } + std::cerr << std::dec << "\n"; + + WriteFile(data_file, out); +} + +void RunTestUnsignedSchemaCompatible(const std::string &data_file) { + auto bytes = ReadFile(data_file); + // COMPATIBLE mode: compatible=true, xlang=true + auto fory = BuildFory(true, true, false, false); + EnsureOk(fory.register_struct(502), + "register UnsignedSchemaCompatible"); + + Buffer buffer = MakeBuffer(bytes); + auto obj = ReadNext(fory, buffer); + + // Verify Group 1: Nullable fields (values from Java's non-nullable fields) + if (!obj.u8Field1.has_value() || obj.u8Field1.value() != 200) { + Fail("UnsignedSchemaCompatible: u8Field1 should be 200"); + } + if (!obj.u16Field1.has_value() || obj.u16Field1.value() != 60000) { + Fail("UnsignedSchemaCompatible: u16Field1 should be 60000"); + } + if (!obj.u32VarField1.has_value() || obj.u32VarField1.value() != 3000000000) { + Fail("UnsignedSchemaCompatible: u32VarField1 should be 3000000000"); + } + if (!obj.u32FixedField1.has_value() || + obj.u32FixedField1.value() != 4000000000) { + Fail("UnsignedSchemaCompatible: u32FixedField1 should be 4000000000"); + } + if (!obj.u64VarField1.has_value() || + obj.u64VarField1.value() != 10000000000) { + Fail("UnsignedSchemaCompatible: u64VarField1 should be 10000000000"); + } + if (!obj.u64FixedField1.has_value() || + obj.u64FixedField1.value() != 15000000000) { + Fail("UnsignedSchemaCompatible: u64FixedField1 should be 15000000000"); + } + if (!obj.u64TaggedField1.has_value() || + obj.u64TaggedField1.value() != 1000000000) { + Fail("UnsignedSchemaCompatible: u64TaggedField1 should be 1000000000"); + } + + // Verify Group 2: Non-nullable fields (values from Java's nullable fields) + if (obj.u8Field2 != 128) { + Fail("UnsignedSchemaCompatible: u8Field2 should be 128, got " + + std::to_string(obj.u8Field2)); + } + if (obj.u16Field2 != 40000) { + Fail("UnsignedSchemaCompatible: u16Field2 should be 40000, got " + + std::to_string(obj.u16Field2)); + } + if (obj.u32VarField2 != 2500000000) { + Fail("UnsignedSchemaCompatible: u32VarField2 should be 2500000000, got " + + std::to_string(obj.u32VarField2)); + } + if (obj.u32FixedField2 != 3500000000) { + Fail("UnsignedSchemaCompatible: u32FixedField2 should be 3500000000, got " + + std::to_string(obj.u32FixedField2)); + } + if (obj.u64VarField2 != 8000000000) { + Fail("UnsignedSchemaCompatible: u64VarField2 should be 8000000000, got " + + std::to_string(obj.u64VarField2)); + } + if (obj.u64FixedField2 != 12000000000) { + Fail( + "UnsignedSchemaCompatible: u64FixedField2 should be 12000000000, got " + + std::to_string(obj.u64FixedField2)); + } + if (obj.u64TaggedField2 != 500000000) { + Fail("UnsignedSchemaCompatible: u64TaggedField2 should be 500000000, got " + + std::to_string(obj.u64TaggedField2)); + } + + // Re-serialize and write back + std::vector out; + AppendSerialized(fory, obj, out); + WriteFile(data_file, out); +} + } // namespace diff --git a/cpp/fory/type/type.h b/cpp/fory/type/type.h index 1240fabe3f..2900f46c6d 100644 --- a/cpp/fory/type/type.h +++ b/cpp/fory/type/type.h @@ -34,13 +34,13 @@ enum class TypeId : int32_t { // a 32-bit signed integer. INT32 = 4, // a 32-bit signed integer which uses fory var_int32 encoding. - VAR32 = 5, + VARINT32 = 5, // a 64-bit signed integer. INT64 = 6, // a 64-bit signed integer which uses fory PVL encoding. - VAR64 = 7, + VARINT64 = 7, // a 64-bit signed integer which uses fory hybrid encoding. - H64 = 8, + TAGGED_INT64 = 8, // an 8-bit unsigned integer. UINT8 = 9, // a 16-bit unsigned integer. @@ -48,13 +48,13 @@ enum class TypeId : int32_t { // a 32-bit unsigned integer. UINT32 = 11, // a 32-bit unsigned integer which uses fory var_uint32 encoding. - VARU32 = 12, + VAR_UINT32 = 12, // a 64-bit unsigned integer. UINT64 = 13, // a 64-bit unsigned integer which uses fory var_uint64 encoding. - VARU64 = 14, + VAR_UINT64 = 14, // a 64-bit unsigned integer which uses fory hybrid encoding. - HU64 = 15, + TAGGED_UINT64 = 15, // a 16-bit floating point number. FLOAT16 = 16, // a 32-bit floating point number. diff --git a/cpp/fory/util/buffer.h b/cpp/fory/util/buffer.h index 9ff5f64e30..1c3581715e 100644 --- a/cpp/fory/util/buffer.h +++ b/cpp/fory/util/buffer.h @@ -441,6 +441,74 @@ class Buffer { return result; } + /// Read uint64_t using tagged encoding at given offset. + /// Similar to GetVarUint64 but for tagged encoding: + /// - If bit 0 is 0: read 4 bytes, return value >> 1 + /// - If bit 0 is 1: read 1 byte flag + 8 bytes uint64 + FORY_ALWAYS_INLINE uint64_t GetTaggedUint64(uint32_t offset, + uint32_t *readBytesLength) { + uint32_t i = *reinterpret_cast(data_ + offset); + if ((i & 0b1) != 0b1) { + *readBytesLength = 4; + return static_cast(i >> 1); + } else { + *readBytesLength = 9; + return *reinterpret_cast(data_ + offset + 1); + } + } + + /// Read int64_t using tagged encoding at given offset. + /// - If bit 0 is 0: read 4 bytes as signed int, return value >> 1 + /// (arithmetic) + /// - If bit 0 is 1: read 1 byte flag + 8 bytes int64 + FORY_ALWAYS_INLINE int64_t GetTaggedInt64(uint32_t offset, + uint32_t *readBytesLength) { + int32_t i = *reinterpret_cast(data_ + offset); + if ((i & 0b1) != 0b1) { + *readBytesLength = 4; + return static_cast(i >> 1); // Arithmetic shift for signed + } else { + *readBytesLength = 9; + return *reinterpret_cast(data_ + offset + 1); + } + } + + /// Write uint64_t using tagged encoding at given offset. Returns bytes + /// written. + /// - If value is in [0, 0x7fffffff]: write 4 bytes (value << 1), return 4 + /// - Otherwise: write 1 byte flag + 8 bytes uint64, return 9 + FORY_ALWAYS_INLINE uint32_t PutTaggedUint64(uint32_t offset, uint64_t value) { + constexpr uint64_t MAX_SMALL_VALUE = 0x7fffffff; // INT32_MAX as u64 + if (value <= MAX_SMALL_VALUE) { + *reinterpret_cast(data_ + offset) = static_cast(value) + << 1; + return 4; + } else { + data_[offset] = 0b1; + *reinterpret_cast(data_ + offset + 1) = value; + return 9; + } + } + + /// Write int64_t using tagged encoding at given offset. Returns bytes + /// written. + /// - If value is in [-1073741824, 1073741823]: write 4 bytes (value << 1), + /// return 4 + /// - Otherwise: write 1 byte flag + 8 bytes int64, return 9 + FORY_ALWAYS_INLINE uint32_t PutTaggedInt64(uint32_t offset, int64_t value) { + constexpr int64_t MIN_SMALL_VALUE = -1073741824; // -2^30 + constexpr int64_t MAX_SMALL_VALUE = 1073741823; // 2^30 - 1 + if (value >= MIN_SMALL_VALUE && value <= MAX_SMALL_VALUE) { + *reinterpret_cast(data_ + offset) = static_cast(value) + << 1; + return 4; + } else { + data_[offset] = 0b1; + *reinterpret_cast(data_ + offset + 1) = value; + return 9; + } + } + /// Write uint8_t value to buffer at current writer index. /// Automatically grows buffer and advances writer index. FORY_ALWAYS_INLINE void WriteUint8(uint8_t value) { @@ -753,6 +821,85 @@ class Buffer { return static_cast((raw >> 1) ^ (~(raw & 1) + 1)); } + /// Write int64_t value using tagged encoding. + /// If value is in [-1073741824, 1073741823], encode as 4 bytes: ((value as + /// i32) << 1). Otherwise write as 9 bytes: 0b1 | little-endian 8 bytes i64. + FORY_ALWAYS_INLINE void WriteTaggedInt64(int64_t value) { + constexpr int64_t HALF_MIN_INT_VALUE = -1073741824; // INT32_MIN / 2 + constexpr int64_t HALF_MAX_INT_VALUE = 1073741823; // INT32_MAX / 2 + if (value >= HALF_MIN_INT_VALUE && value <= HALF_MAX_INT_VALUE) { + WriteInt32(static_cast(value) << 1); + } else { + Grow(9); + data_[writer_index_] = 0b1; + UnsafePut(writer_index_ + 1, value); + IncreaseWriterIndex(9); + } + } + + /// Read int64_t value using tagged encoding. Sets error on bounds violation. + /// If bit 0 is 0, return value >> 1 (arithmetic shift). + /// Otherwise, skip flag byte and read 8 bytes as int64. + FORY_ALWAYS_INLINE int64_t ReadTaggedInt64(Error &error) { + if (FORY_PREDICT_FALSE(reader_index_ + 4 > size_)) { + error.set_buffer_out_of_bound(reader_index_, 4, size_); + return 0; + } + int32_t i = reinterpret_cast(data_ + reader_index_)[0]; + if ((i & 0b1) != 0b1) { + reader_index_ += 4; + return static_cast(i >> 1); // arithmetic right shift + } else { + if (FORY_PREDICT_FALSE(reader_index_ + 9 > size_)) { + error.set_buffer_out_of_bound(reader_index_, 9, size_); + return 0; + } + int64_t value = + reinterpret_cast(data_ + reader_index_ + 1)[0]; + reader_index_ += 9; + return value; + } + } + + /// Write uint64_t value using tagged encoding. + /// If value is in [0, 0x7fffffff], encode as 4 bytes: ((value as u32) << 1). + /// Otherwise write as 9 bytes: 0b1 | little-endian 8 bytes u64. + FORY_ALWAYS_INLINE void WriteTaggedUint64(uint64_t value) { + constexpr uint64_t MAX_SMALL_VALUE = 0x7fffffff; // INT32_MAX as u64 + if (value <= MAX_SMALL_VALUE) { + WriteInt32(static_cast(value) << 1); + } else { + Grow(9); + data_[writer_index_] = 0b1; + UnsafePut(writer_index_ + 1, value); + IncreaseWriterIndex(9); + } + } + + /// Read uint64_t value using tagged encoding. Sets error on bounds violation. + /// If bit 0 is 0, return value >> 1. + /// Otherwise, skip flag byte and read 8 bytes as uint64. + FORY_ALWAYS_INLINE uint64_t ReadTaggedUint64(Error &error) { + if (FORY_PREDICT_FALSE(reader_index_ + 4 > size_)) { + error.set_buffer_out_of_bound(reader_index_, 4, size_); + return 0; + } + uint32_t i = reinterpret_cast(data_ + reader_index_)[0]; + if ((i & 0b1) != 0b1) { + reader_index_ += 4; + return static_cast(i >> 1); + } else { + if (FORY_PREDICT_FALSE(reader_index_ + 9 > size_)) { + error.set_buffer_out_of_bound(reader_index_, 9, size_); + return 0; + } + uint64_t value = + reinterpret_cast(data_ + reader_index_ + 1)[0]; + reader_index_ += 9; + return value; + } + } + /// Read uint64_t value as varuint36small. Sets error on bounds violation. FORY_ALWAYS_INLINE uint64_t ReadVarUint36Small(Error &error) { if (FORY_PREDICT_FALSE(reader_index_ + 1 > size_)) { diff --git a/docs/guide/cpp/field-configuration.md b/docs/guide/cpp/field-configuration.md index a36b832268..5391c1f3c7 100644 --- a/docs/guide/cpp/field-configuration.md +++ b/docs/guide/cpp/field-configuration.md @@ -284,8 +284,221 @@ FORY_FIELD_TAGS(Document, | **Header dependencies** | Required everywhere | Isolated to config | | **Migration effort** | High (change all fields) | Low (add one macro) | +## FORY_FIELD_CONFIG Macro + +The `FORY_FIELD_CONFIG` macro is the most powerful and flexible way to configure field-level serialization. It provides: + +- **Builder pattern API**: Fluent, chainable configuration with `F(id).option1().option2()` +- **Encoding control**: Specify how unsigned integers are encoded (varint, fixed, tagged) +- **Compile-time verification**: Field names are verified against member pointers +- **Cross-language compatibility**: Configure encoding to match other languages (Java, Rust, etc.) + +### Basic Syntax + +```cpp +FORY_FIELD_CONFIG(StructType, + (field1, fory::F(0)), // Simple: just ID + (field2, fory::F(1).nullable()), // With nullable + (field3, fory::F(2).varint()), // With encoding + (field4, fory::F(3).nullable().ref()), // Multiple options + (field5, 4) // Backward compatible: integer ID +); +``` + +### The F() Builder + +The `fory::F(id)` factory creates a `FieldMeta` object that supports method chaining: + +```cpp +fory::F(0) // Create with field ID 0 + .nullable() // Mark as nullable + .ref() // Enable reference tracking + .varint() // Use variable-length encoding + .fixed() // Use fixed-size encoding + .tagged() // Use tagged encoding + .monomorphic() // Mark as monomorphic type + .compress(false) // Disable compression +``` + +**Tip:** To use `F()` without the `fory::` prefix, add a using declaration: + +```cpp +using fory::F; + +FORY_FIELD_CONFIG(MyStruct, + (field1, F(0).varint()), // No prefix needed + (field2, F(1).nullable()) +); +``` + +### Encoding Options for Unsigned Integers + +For `uint32_t` and `uint64_t` fields, you can specify the wire encoding: + +| Method | Type ID | Description | Use Case | +| ----------- | ------------- | ---------------------------------------------- | ------------------------------------- | +| `.varint()` | VAR_UINT32/64 | Variable-length encoding (1-5 or 1-10 bytes) | Values typically small | +| `.fixed()` | UINT32/64 | Fixed-size encoding (always 4 or 8 bytes) | Values uniformly distributed | +| `.tagged()` | TAGGED_UINT64 | Tagged hybrid encoding with size hint (uint64) | Mixed small and large values (uint64) | + +**Note:** `uint8_t` and `uint16_t` always use fixed encoding (UINT8, UINT16). + +### Complete Example + +```cpp +#include "fory/serialization/fory.h" + +using namespace fory::serialization; + +// Define struct with unsigned integer fields +struct MetricsData { + // Counters - often small values, use varint for space efficiency + uint32_t requestCount; + uint64_t bytesSent; + + // IDs - uniformly distributed, use fixed for consistent performance + uint32_t userId; + uint64_t sessionId; + + // Timestamps - use tagged encoding for mixed value ranges + uint64_t createdAt; + + // Nullable fields + std::optional errorCount; + std::optional lastAccessTime; +}; + +FORY_STRUCT(MetricsData, requestCount, bytesSent, userId, sessionId, + createdAt, errorCount, lastAccessTime); + +// Configure field encoding +FORY_FIELD_CONFIG(MetricsData, + // Small counters - varint saves space + (requestCount, fory::F(0).varint()), + (bytesSent, fory::F(1).varint()), + + // IDs - fixed for consistent performance + (userId, fory::F(2).fixed()), + (sessionId, fory::F(3).fixed()), + + // Timestamp - tagged encoding + (createdAt, fory::F(4).tagged()), + + // Nullable fields + (errorCount, fory::F(5).nullable().varint()), + (lastAccessTime, fory::F(6).nullable().tagged()) +); + +int main() { + auto fory = Fory::builder().xlang(true).build(); + fory.register_struct(100); + + MetricsData data{ + .requestCount = 42, + .bytesSent = 1024, + .userId = 12345678, + .sessionId = 9876543210, + .createdAt = 1704067200000000000ULL, // 2024-01-01 in nanoseconds + .errorCount = 3, + .lastAccessTime = std::nullopt + }; + + auto bytes = fory.serialize(data).value(); + auto decoded = fory.deserialize(bytes).value(); +} +``` + +### Cross-Language Compatibility + +When serializing data to be read by other languages, use `FORY_FIELD_CONFIG` to match their encoding expectations: + +**Java Compatibility:** + +```cpp +// Java uses these type IDs for unsigned integers: +// - Byte (u8): UINT8 (fixed) +// - Short (u16): UINT16 (fixed) +// - Integer (u32): VAR_UINT32 (varint) or UINT32 (fixed) +// - Long (u64): VAR_UINT64 (varint), UINT64 (fixed), or TAGGED_UINT64 + +struct JavaCompatible { + uint8_t byteField; // Maps to Java Byte + uint16_t shortField; // Maps to Java Short + uint32_t intVarField; // Maps to Java Integer with varint + uint32_t intFixedField; // Maps to Java Integer with fixed + uint64_t longVarField; // Maps to Java Long with varint + uint64_t longTagged; // Maps to Java Long with tagged +}; + +FORY_STRUCT(JavaCompatible, byteField, shortField, intVarField, + intFixedField, longVarField, longTagged); + +FORY_FIELD_CONFIG(JavaCompatible, + (byteField, fory::F(0)), // UINT8 (auto) + (shortField, fory::F(1)), // UINT16 (auto) + (intVarField, fory::F(2).varint()), // VAR_UINT32 + (intFixedField, fory::F(3).fixed()), // UINT32 + (longVarField, fory::F(4).varint()), // VAR_UINT64 + (longTagged, fory::F(5).tagged()) // TAGGED_UINT64 +); +``` + +### Schema Evolution with FORY_FIELD_CONFIG + +In compatible mode, fields can have different nullability between sender and receiver: + +```cpp +// Version 1: All fields non-nullable +struct DataV1 { + uint32_t id; + uint64_t timestamp; +}; +FORY_STRUCT(DataV1, id, timestamp); +FORY_FIELD_CONFIG(DataV1, + (id, fory::F(0).varint()), + (timestamp, fory::F(1).tagged()) +); + +// Version 2: Added nullable fields +struct DataV2 { + uint32_t id; + uint64_t timestamp; + std::optional version; // New nullable field +}; +FORY_STRUCT(DataV2, id, timestamp, version); +FORY_FIELD_CONFIG(DataV2, + (id, fory::F(0).varint()), + (timestamp, fory::F(1).tagged()), + (version, fory::F(2).nullable().varint()) // New field with nullable +); +``` + +### FORY_FIELD_CONFIG Options Reference + +| Method | Description | Valid For | +| ---------------- | ------------------------------------------- | -------------------------- | +| `.nullable()` | Mark field as nullable | Smart pointers, primitives | +| `.ref()` | Enable reference tracking | `std::shared_ptr` only | +| `.monomorphic()` | Mark pointer as always pointing to one type | Smart pointers | +| `.varint()` | Use variable-length encoding | `uint32_t`, `uint64_t` | +| `.fixed()` | Use fixed-size encoding | `uint32_t`, `uint64_t` | +| `.tagged()` | Use tagged hybrid encoding | `uint64_t` only | +| `.compress(v)` | Enable/disable field compression | All types | + +### Comparing Field Configuration Macros + +| Feature | `fory::field<>` | `FORY_FIELD_TAGS` | `FORY_FIELD_CONFIG` | +| ----------------------- | --------------------- | ----------------- | ------------------------- | +| **Struct modification** | Required (wrap types) | None | None | +| **Encoding control** | No | No | Yes (varint/fixed/tagged) | +| **Builder pattern** | No | No | Yes | +| **Compile-time verify** | Yes | Limited | Yes (member pointers) | +| **Cross-lang compat** | Limited | Limited | Full | +| **Recommended for** | Simple structs | Third-party types | Complex/xlang structs | + ## Related Topics - [Type Registration](type-registration.md) - Registering types with FORY_STRUCT - [Schema Evolution](schema-evolution.md) - Using tag IDs for schema evolution - [Configuration](configuration.md) - Enabling reference tracking globally +- [Cross-Language](cross-language.md) - Interoperability with Java, Rust, Python diff --git a/docs/specification/xlang_serialization_spec.md b/docs/specification/xlang_serialization_spec.md index d653c5180f..be6f27f35f 100644 --- a/docs/specification/xlang_serialization_spec.md +++ b/docs/specification/xlang_serialization_spec.md @@ -43,7 +43,14 @@ This specification defines the Fory xlang binary format. The format is dynamic r - var32: a 32-bit signed integer which use fory variable-length encoding. - int64: a 64-bit signed integer. - var64: a 64-bit signed integer which use fory PVL encoding. -- h64: a 64-bit signed integer which use fory Hybrid encoding. +- hybrid64: a 64-bit signed integer which use fory Hybrid encoding. +- uint8: an 8-bit unsigned integer. +- uint16: a 16-bit unsigned integer. +- uint32: a 32-bit unsigned integer. +- varu32: a 32-bit unsigned integer which use fory variable-length encoding. +- uint64: a 64-bit unsigned integer. +- varu64: a 64-bit unsigned integer which use fory PVL encoding. +- hybridu64: a 64-bit unsigned integer which use fory Hybrid encoding. - float16: a 16-bit floating point number. - float32: a 32-bit floating point number. - float64: a 64-bit floating point number including NaN and Infinity. @@ -82,7 +89,7 @@ This specification defines the Fory xlang binary format. The format is dynamic r Note: -- Unsigned int/long are not added here, since not every language support those types. +- Unsigned integer types use the same byte sizes as their signed counterparts; the difference is in value interpretation. See [Type mapping](xlang_type_mapping.md) for language-specific type mappings. ### Polymorphisms @@ -154,17 +161,17 @@ custom types (struct/ext/enum). User type IDs are in a separate namespace and co | 2 | INT8 | 8-bit signed integer | | 3 | INT16 | 16-bit signed integer | | 4 | INT32 | 32-bit signed integer | -| 5 | VAR32 | Variable-length encoded 32-bit signed integer | +| 5 | VARINT32 | Variable-length encoded 32-bit signed integer | | 6 | INT64 | 64-bit signed integer | -| 7 | VAR64 | Variable-length encoded 64-bit signed integer | -| 8 | H64 | Hybrid encoded 64-bit signed integer | +| 7 | VARINT64 | Variable-length encoded 64-bit signed integer | +| 8 | TAGGED_INT64 | Hybrid encoded 64-bit signed integer | | 9 | UINT8 | 8-bit unsigned integer | | 10 | UINT16 | 16-bit unsigned integer | | 11 | UINT32 | 32-bit unsigned integer | -| 12 | VARU32 | Variable-length encoded 32-bit unsigned integer | +| 12 | VAR_UINT32 | Variable-length encoded 32-bit unsigned integer | | 13 | UINT64 | 64-bit unsigned integer | -| 14 | VARU64 | Variable-length encoded 64-bit unsigned integer | -| 15 | HU64 | Hybrid encoded 64-bit unsigned integer | +| 14 | VAR_UINT64 | Variable-length encoded 64-bit unsigned integer | +| 15 | TAGGED_UINT64 | Hybrid encoded 64-bit unsigned integer | | 16 | FLOAT16 | 16-bit floating point (half precision) | | 17 | FLOAT32 | 32-bit floating point (single precision) | | 18 | FLOAT64 | 64-bit floating point (double precision) | @@ -932,7 +939,7 @@ function write_varuint64(value): | ... | ... | | 2^56 ~ 2^63-1 | 9 | -#### unsigned hybrid int64 (HU64) +#### unsigned hybrid int64 (TAGGED_UINT64) - size: 4 or 9 bytes @@ -956,7 +963,7 @@ else: return read_uint64_le() // read remaining 8 bytes ``` -Note: HU64 uses the full 31 bits for positive values [0, 2^31-1], compared to H64 which splits the range for signed values [-2^30, 2^30-1]. +Note: TAGGED_UINT64 uses the full 31 bits for positive values [0, 2^31-1], compared to TAGGED_INT64 which splits the range for signed values [-2^30, 2^30-1]. #### VarUint36Small @@ -995,7 +1002,7 @@ zigzag_value = read_varuint64() value = (zigzag_value >> 1) ^ (-(zigzag_value & 1)) ``` -#### signed hybrid int64 (H64) +#### signed hybrid int64 (TAGGED_INT64) - size: 4 or 9 bytes @@ -1019,7 +1026,7 @@ else: return read_int64_le() // read remaining 8 bytes ``` -Note: H64 uses 30 bits + sign for values [-2^30, 2^30-1], while HU64 uses full 31 bits for unsigned values [0, 2^31-1]. +Note: TAGGED_INT64 uses 30 bits + sign for values [-2^30, 2^30-1], while TAGGED_UINT64 uses full 31 bits for unsigned values [0, 2^31-1]. #### float32 @@ -1518,7 +1525,7 @@ This section provides a step-by-step guide for implementing Fory xlang serializa - [ ] Implement `write_varuint64` / `read_varuint64` - [ ] Implement `write_varint64` / `read_varint64` (with ZigZag) - [ ] Implement `write_varuint36_small` / `read_varuint36_small` (for strings) - - [ ] Optionally implement Hybrid encoding (H64/HU64) for int64 + - [ ] Optionally implement Hybrid encoding (TAGGED_INT64/TAGGED_UINT64) for int64 3. **Header Handling** - [ ] Write magic number `0x62d4` diff --git a/docs/specification/xlang_type_mapping.md b/docs/specification/xlang_type_mapping.md index 50dceb7bb3..9eedbf259d 100644 --- a/docs/specification/xlang_type_mapping.md +++ b/docs/specification/xlang_type_mapping.md @@ -27,58 +27,58 @@ Note: ## Type Mapping -| Fory Type | Fory Type ID | Java | Python | Javascript | C++ | Golang | Rust | -| ----------------------- | ------------ | --------------- | -------------------- | -------------- | ------------------------------ | ---------------- | ----------------- | -| bool | 1 | bool/Boolean | bool | Boolean | bool | bool | bool | -| int8 | 2 | byte/Byte | int/pyfory.int8 | Type.int8() | int8_t | int8 | i8 | -| int16 | 3 | short/Short | int/pyfory.int16 | Type.int16() | int16_t | int16 | i16 | -| int32 | 4 | int/Integer | int/pyfory.int32 | Type.int32() | int32_t | int32 | i32 | -| var32 | 5 | int/Integer | int/pyfory.var32 | Type.var32() | int32_t | int32 | i32 | -| int64 | 6 | long/Long | int/pyfory.int64 | Type.int64() | int64_t | int64 | i64 | -| var64 | 7 | long/Long | int/pyfory.var64 | Type.var64() | int64_t | int64 | i64 | -| h64 | 8 | long/Long | int/pyfory.h64 | Type.h64() | int64_t | int64 | i64 | -| uint8 | 9 | short/Short | int/pyfory.uint8 | Type.uint8() | uint8_t | uint8 | u8 | -| uint16 | 10 | int/Integer | int/pyfory.uint16 | Type.uint16() | uint16_t | uint16 | u16 | -| uint32 | 11 | long/Long | int/pyfory.uint32 | Type.uint32() | uint32_t | uint32 | u32 | -| varu32 | 12 | long/Long | int/pyfory.varu32 | Type.varu32() | uint32_t | uint32 | u32 | -| uint64 | 13 | long/Long | int/pyfory.uint64 | Type.uint64() | uint64_t | uint64 | u64 | -| varu64 | 14 | long/Long | int/pyfory.varu64 | Type.varu64() | uint64_t | uint64 | u64 | -| hu64 | 15 | long/Long | int/pyfory.hu64 | Type.hu64() | uint64_t | uint64 | u64 | -| float16 | 16 | float/Float | float/pyfory.float16 | Type.float16() | fory::float16_t | fory.float16 | fory::f16 | -| float32 | 17 | float/Float | float/pyfory.float32 | Type.float32() | float | float32 | f32 | -| float64 | 18 | double/Double | float/pyfory.float64 | Type.float64() | double | float64 | f64 | -| string | 19 | String | str | String | string | string | String/str | -| list | 20 | List/Collection | list/tuple | array | vector | slice | Vec | -| set | 21 | Set | set | / | set | fory.Set | Set | -| map | 22 | Map | dict | Map | unordered_map | map | HashMap | -| enum | 23 | Enum subclasses | enum subclasses | / | enum | / | enum | -| named_enum | 24 | Enum subclasses | enum subclasses | / | enum | / | enum | -| struct | 25 | pojo/record | data class | object | struct/class | struct | struct | -| compatible_struct | 26 | pojo/record | data class | object | struct/class | struct | struct | -| named_struct | 27 | pojo/record | data class | object | struct/class | struct | struct | -| named_compatible_struct | 28 | pojo/record | data class | object | struct/class | struct | struct | -| ext | 29 | pojo/record | data class | object | struct/class | struct | struct | -| named_ext | 30 | pojo/record | data class | object | struct/class | struct | struct | -| union | 31 | Union | typing.Union | / | `std::variant` | / | tagged union enum | -| none | 32 | null | None | null | `std::monostate` | nil | `()` | -| duration | 33 | Duration | timedelta | Number | duration | Duration | Duration | -| timestamp | 34 | Instant | datetime | Number | std::chrono::nanoseconds | Time | DateTime | -| local_date | 35 | Date | datetime | Number | std::chrono::nanoseconds | Time | DateTime | -| decimal | 36 | BigDecimal | Decimal | bigint | / | / | / | -| binary | 37 | byte[] | bytes | / | `uint8_t[n]/vector` | `[n]uint8/[]T` | `Vec` | -| array | 38 | array | np.ndarray | / | / | array/slice | Vec | -| bool_array | 39 | bool[] | ndarray(np.bool\_) | / | `bool[n]` | `[n]bool/[]T` | `Vec` | -| int8_array | 40 | byte[] | ndarray(int8) | / | `int8_t[n]/vector` | `[n]int8/[]T` | `Vec` | -| int16_array | 41 | short[] | ndarray(int16) | / | `int16_t[n]/vector` | `[n]int16/[]T` | `Vec` | -| int32_array | 42 | int[] | ndarray(int32) | / | `int32_t[n]/vector` | `[n]int32/[]T` | `Vec` | -| int64_array | 43 | long[] | ndarray(int64) | / | `int64_t[n]/vector` | `[n]int64/[]T` | `Vec` | -| uint8_array | 44 | short[] | ndarray(uint8) | / | `uint8_t[n]/vector` | `[n]uint8/[]T` | `Vec` | -| uint16_array | 45 | int[] | ndarray(uint16) | / | `uint16_t[n]/vector` | `[n]uint16/[]T` | `Vec` | -| uint32_array | 46 | long[] | ndarray(uint32) | / | `uint32_t[n]/vector` | `[n]uint32/[]T` | `Vec` | -| uint64_array | 47 | long[] | ndarray(uint64) | / | `uint64_t[n]/vector` | `[n]uint64/[]T` | `Vec` | -| float16_array | 48 | float[] | ndarray(float16) | / | `fory::float16_t[n]/vector` | `[n]float16/[]T` | `Vec` | -| float32_array | 49 | float[] | ndarray(float32) | / | `float[n]/vector` | `[n]float32/[]T` | `Vec` | -| float64_array | 50 | double[] | ndarray(float64) | / | `double[n]/vector` | `[n]float64/[]T` | `Vec` | +| Fory Type | Fory Type ID | Java | Python | Javascript | C++ | Golang | Rust | +| ----------------------- | ------------ | --------------- | -------------------- | ---------------- | ------------------------------ | ---------------- | ----------------- | +| bool | 1 | bool/Boolean | bool | Boolean | bool | bool | bool | +| int8 | 2 | byte/Byte | int/pyfory.int8 | Type.int8() | int8_t | int8 | i8 | +| int16 | 3 | short/Short | int/pyfory.int16 | Type.int16() | int16_t | int16 | i16 | +| int32 | 4 | int/Integer | int/pyfory.int32 | Type.int32() | int32_t | int32 | i32 | +| var32 | 5 | int/Integer | int/pyfory.var32 | Type.var32() | int32_t | int32 | i32 | +| int64 | 6 | long/Long | int/pyfory.int64 | Type.int64() | int64_t | int64 | i64 | +| var64 | 7 | long/Long | int/pyfory.var64 | Type.var64() | int64_t | int64 | i64 | +| hybrid64 | 8 | long/Long | int/pyfory.hybrid64 | Type.hybrid64() | int64_t | int64 | i64 | +| uint8 | 9 | short/Short | int/pyfory.uint8 | Type.uint8() | uint8_t | uint8 | u8 | +| uint16 | 10 | int/Integer | int/pyfory.uint16 | Type.uint16() | uint16_t | uint16 | u16 | +| uint32 | 11 | long/Long | int/pyfory.uint32 | Type.uint32() | uint32_t | uint32 | u32 | +| varu32 | 12 | long/Long | int/pyfory.varu32 | Type.varu32() | uint32_t | uint32 | u32 | +| uint64 | 13 | long/Long | int/pyfory.uint64 | Type.uint64() | uint64_t | uint64 | u64 | +| varu64 | 14 | long/Long | int/pyfory.hybridu64 | Type.hybridu64() | uint64_t | uint64 | u64 | +| hybridu64 | 15 | long/Long | int/pyfory.hu64 | Type.hu64() | uint64_t | uint64 | u64 | +| float16 | 16 | float/Float | float/pyfory.float16 | Type.float16() | fory::float16_t | fory.float16 | fory::f16 | +| float32 | 17 | float/Float | float/pyfory.float32 | Type.float32() | float | float32 | f32 | +| float64 | 18 | double/Double | float/pyfory.float64 | Type.float64() | double | float64 | f64 | +| string | 19 | String | str | String | string | string | String/str | +| list | 20 | List/Collection | list/tuple | array | vector | slice | Vec | +| set | 21 | Set | set | / | set | fory.Set | Set | +| map | 22 | Map | dict | Map | unordered_map | map | HashMap | +| enum | 23 | Enum subclasses | enum subclasses | / | enum | / | enum | +| named_enum | 24 | Enum subclasses | enum subclasses | / | enum | / | enum | +| struct | 25 | pojo/record | data class | object | struct/class | struct | struct | +| compatible_struct | 26 | pojo/record | data class | object | struct/class | struct | struct | +| named_struct | 27 | pojo/record | data class | object | struct/class | struct | struct | +| named_compatible_struct | 28 | pojo/record | data class | object | struct/class | struct | struct | +| ext | 29 | pojo/record | data class | object | struct/class | struct | struct | +| named_ext | 30 | pojo/record | data class | object | struct/class | struct | struct | +| union | 31 | Union | typing.Union | / | `std::variant` | / | tagged union enum | +| none | 32 | null | None | null | `std::monostate` | nil | `()` | +| duration | 33 | Duration | timedelta | Number | duration | Duration | Duration | +| timestamp | 34 | Instant | datetime | Number | std::chrono::nanoseconds | Time | DateTime | +| local_date | 35 | Date | datetime | Number | std::chrono::nanoseconds | Time | DateTime | +| decimal | 36 | BigDecimal | Decimal | bigint | / | / | / | +| binary | 37 | byte[] | bytes | / | `uint8_t[n]/vector` | `[n]uint8/[]T` | `Vec` | +| array | 38 | array | np.ndarray | / | / | array/slice | Vec | +| bool_array | 39 | bool[] | ndarray(np.bool\_) | / | `bool[n]` | `[n]bool/[]T` | `Vec` | +| int8_array | 40 | byte[] | ndarray(int8) | / | `int8_t[n]/vector` | `[n]int8/[]T` | `Vec` | +| int16_array | 41 | short[] | ndarray(int16) | / | `int16_t[n]/vector` | `[n]int16/[]T` | `Vec` | +| int32_array | 42 | int[] | ndarray(int32) | / | `int32_t[n]/vector` | `[n]int32/[]T` | `Vec` | +| int64_array | 43 | long[] | ndarray(int64) | / | `int64_t[n]/vector` | `[n]int64/[]T` | `Vec` | +| uint8_array | 44 | short[] | ndarray(uint8) | / | `uint8_t[n]/vector` | `[n]uint8/[]T` | `Vec` | +| uint16_array | 45 | int[] | ndarray(uint16) | / | `uint16_t[n]/vector` | `[n]uint16/[]T` | `Vec` | +| uint32_array | 46 | long[] | ndarray(uint32) | / | `uint32_t[n]/vector` | `[n]uint32/[]T` | `Vec` | +| uint64_array | 47 | long[] | ndarray(uint64) | / | `uint64_t[n]/vector` | `[n]uint64/[]T` | `Vec` | +| float16_array | 48 | float[] | ndarray(float16) | / | `fory::float16_t[n]/vector` | `[n]float16/[]T` | `Vec` | +| float32_array | 49 | float[] | ndarray(float32) | / | `float[n]/vector` | `[n]float32/[]T` | `Vec` | +| float64_array | 50 | double[] | ndarray(float64) | / | `double[n]/vector` | `[n]float64/[]T` | `Vec` | ## Type info(not implemented currently) diff --git a/go/fory/buffer.go b/go/fory/buffer.go index 8db021c990..13ec17f4af 100644 --- a/go/fory/buffer.go +++ b/go/fory/buffer.go @@ -96,6 +96,13 @@ func (b *ByteBuffer) WriteUint8(value uint8) { b.writerIndex++ } +//go:inline +func (b *ByteBuffer) WriteUint16(value uint16) { + b.grow(2) + binary.LittleEndian.PutUint16(b.data[b.writerIndex:], value) + b.writerIndex += 2 +} + //go:inline func (b *ByteBuffer) WriteInt16(value int16) { b.grow(2) @@ -103,6 +110,13 @@ func (b *ByteBuffer) WriteInt16(value int16) { b.writerIndex += 2 } +//go:inline +func (b *ByteBuffer) WriteUint32(value uint32) { + b.grow(4) + binary.LittleEndian.PutUint32(b.data[b.writerIndex:], value) + b.writerIndex += 4 +} + //go:inline func (b *ByteBuffer) WriteInt32(value int32) { b.grow(4) @@ -122,6 +136,13 @@ func (b *ByteBuffer) ReadLength(err *Error) int { return int(b.ReadVaruint32(err)) } +//go:inline +func (b *ByteBuffer) WriteUint64(value uint64) { + b.grow(8) + binary.LittleEndian.PutUint64(b.data[b.writerIndex:], value) + b.writerIndex += 8 +} + //go:inline func (b *ByteBuffer) WriteInt64(value int64) { b.grow(8) @@ -211,6 +232,19 @@ func (b *ByteBuffer) ReadInt16(err *Error) int16 { return v } +// ReadUint16 reads a uint16 and sets error on bounds violation +// +//go:inline +func (b *ByteBuffer) ReadUint16(err *Error) uint16 { + if b.readerIndex+2 > len(b.data) { + *err = BufferOutOfBoundError(b.readerIndex, 2, len(b.data)) + return 0 + } + v := binary.LittleEndian.Uint16(b.data[b.readerIndex:]) + b.readerIndex += 2 + return v +} + // ReadUint32 reads a uint32 and sets error on bounds violation // //go:inline @@ -498,6 +532,34 @@ func (b *ByteBuffer) UnsafeReadInt64() int64 { return v } +// UnsafeReadUint32 reads a uint32 without bounds check. +// +//go:inline +func (b *ByteBuffer) UnsafeReadUint32() uint32 { + var v uint32 + if isLittleEndian { + v = *(*uint32)(unsafe.Pointer(&b.data[b.readerIndex])) + } else { + v = binary.LittleEndian.Uint32(b.data[b.readerIndex:]) + } + b.readerIndex += 4 + return v +} + +// UnsafeReadUint64 reads a uint64 without bounds check. +// +//go:inline +func (b *ByteBuffer) UnsafeReadUint64() uint64 { + var v uint64 + if isLittleEndian { + v = *(*uint64)(unsafe.Pointer(&b.data[b.readerIndex])) + } else { + v = binary.LittleEndian.Uint64(b.data[b.readerIndex:]) + } + b.readerIndex += 8 + return v +} + // UnsafeWriteFloat32 writes a float32 without grow check. // //go:inline @@ -1037,6 +1099,109 @@ func (b *ByteBuffer) ReadVarint64(err *Error) int64 { return v } +// WriteTaggedInt64 writes int64 using tagged encoding. +// If value is in [-1073741824, 1073741823], encode as 4 bytes: ((value as i32) << 1). +// Otherwise write as 9 bytes: 0b1 | little-endian 8 bytes i64. +func (b *ByteBuffer) WriteTaggedInt64(value int64) { + const halfMinIntValue int64 = -1073741824 // INT32_MIN / 2 + const halfMaxIntValue int64 = 1073741823 // INT32_MAX / 2 + if value >= halfMinIntValue && value <= halfMaxIntValue { + b.WriteInt32(int32(value) << 1) + } else { + b.grow(9) + b.data[b.writerIndex] = 0b1 + if isLittleEndian { + *(*int64)(unsafe.Pointer(&b.data[b.writerIndex+1])) = value + } else { + binary.LittleEndian.PutUint64(b.data[b.writerIndex+1:], uint64(value)) + } + b.writerIndex += 9 + } +} + +// ReadTaggedInt64 reads int64 using tagged encoding. +// If bit 0 is 0, return value >> 1 (arithmetic shift). +// Otherwise, skip flag byte and read 8 bytes as int64. +func (b *ByteBuffer) ReadTaggedInt64(err *Error) int64 { + if b.readerIndex+4 > len(b.data) { + *err = BufferOutOfBoundError(b.readerIndex, 4, len(b.data)) + return 0 + } + var i int32 + if isLittleEndian { + i = *(*int32)(unsafe.Pointer(&b.data[b.readerIndex])) + } else { + i = int32(binary.LittleEndian.Uint32(b.data[b.readerIndex:])) + } + if (i & 0b1) != 0b1 { + b.readerIndex += 4 + return int64(i >> 1) // arithmetic right shift + } + if b.readerIndex+9 > len(b.data) { + *err = BufferOutOfBoundError(b.readerIndex, 9, len(b.data)) + return 0 + } + var value int64 + if isLittleEndian { + value = *(*int64)(unsafe.Pointer(&b.data[b.readerIndex+1])) + } else { + value = int64(binary.LittleEndian.Uint64(b.data[b.readerIndex+1:])) + } + b.readerIndex += 9 + return value +} + +// WriteTaggedUint64 writes uint64 using tagged encoding. +// If value is in [0, 0x7fffffff], encode as 4 bytes: ((value as u32) << 1). +// Otherwise write as 9 bytes: 0b1 | little-endian 8 bytes u64. +func (b *ByteBuffer) WriteTaggedUint64(value uint64) { + const maxSmallValue uint64 = 0x7fffffff // INT32_MAX as u64 + if value <= maxSmallValue { + b.WriteInt32(int32(value) << 1) + } else { + b.grow(9) + b.data[b.writerIndex] = 0b1 + if isLittleEndian { + *(*uint64)(unsafe.Pointer(&b.data[b.writerIndex+1])) = value + } else { + binary.LittleEndian.PutUint64(b.data[b.writerIndex+1:], value) + } + b.writerIndex += 9 + } +} + +// ReadTaggedUint64 reads uint64 using tagged encoding. +// If bit 0 is 0, return value >> 1. +// Otherwise, skip flag byte and read 8 bytes as uint64. +func (b *ByteBuffer) ReadTaggedUint64(err *Error) uint64 { + if b.readerIndex+4 > len(b.data) { + *err = BufferOutOfBoundError(b.readerIndex, 4, len(b.data)) + return 0 + } + var i uint32 + if isLittleEndian { + i = *(*uint32)(unsafe.Pointer(&b.data[b.readerIndex])) + } else { + i = binary.LittleEndian.Uint32(b.data[b.readerIndex:]) + } + if (i & 0b1) != 0b1 { + b.readerIndex += 4 + return uint64(i >> 1) + } + if b.readerIndex+9 > len(b.data) { + *err = BufferOutOfBoundError(b.readerIndex, 9, len(b.data)) + return 0 + } + var value uint64 + if isLittleEndian { + value = *(*uint64)(unsafe.Pointer(&b.data[b.readerIndex+1])) + } else { + value = binary.LittleEndian.Uint64(b.data[b.readerIndex+1:]) + } + b.readerIndex += 9 + return value +} + // ReadVaruint64 reads unsigned varint // //go:inline @@ -1189,6 +1354,22 @@ func (b *ByteBuffer) UnsafeReadVarint64() int64 { return v } +// UnsafeReadVaruint32 reads a varuint32 without bounds checking. +// Caller must ensure remaining() >= 5 before calling. +// +//go:inline +func (b *ByteBuffer) UnsafeReadVaruint32() uint32 { + return b.readVaruint32Fast() +} + +// UnsafeReadVaruint64 reads a varuint64 without bounds checking. +// Caller must ensure remaining() >= 10 before calling. +// +//go:inline +func (b *ByteBuffer) UnsafeReadVaruint64() uint64 { + return b.readVaruint64Fast() +} + // ReadVaruint32 reads a varuint32 and sets error on bounds violation // //go:inline @@ -1318,6 +1499,46 @@ func (b *ByteBuffer) unsafePutInt64(index int, v uint64) { binary.LittleEndian.PutUint64(b.data[index:], v) } +// UnsafePutUint32 writes a uint32 at the given offset without advancing writerIndex. +// Caller must have called Reserve() to ensure capacity. +// Returns the number of bytes written (4). +// +//go:inline +func (b *ByteBuffer) UnsafePutUint32(offset int, value uint32) int { + binary.LittleEndian.PutUint32(b.data[offset:], value) + return 4 +} + +// UnsafePutUint64 writes a uint64 at the given offset without advancing writerIndex. +// Caller must have called Reserve() to ensure capacity. +// Returns the number of bytes written (8). +// +//go:inline +func (b *ByteBuffer) UnsafePutUint64(offset int, value uint64) int { + binary.LittleEndian.PutUint64(b.data[offset:], value) + return 8 +} + +// UnsafePutInt8 writes 1 byte at the given offset without bound checking. +// Caller must have ensured capacity. +// Returns the number of bytes written (1). +// +//go:inline +func (b *ByteBuffer) UnsafePutInt8(offset int, value int8) int { + b.data[offset] = byte(value) + return 1 +} + +// UnsafePutInt64 writes an int64 in little-endian format at the given offset without bound checking. +// Caller must have ensured capacity. +// Returns the number of bytes written (8). +// +//go:inline +func (b *ByteBuffer) UnsafePutInt64(offset int, value int64) int { + binary.LittleEndian.PutUint64(b.data[offset:], uint64(value)) + return 8 +} + // ReadVaruint32Small7 reads a varuint32 in small-7 format with error checking func (b *ByteBuffer) ReadVaruint32Small7(err *Error) uint32 { if b.readerIndex >= len(b.data) { diff --git a/go/fory/codegen/decoder.go b/go/fory/codegen/decoder.go index 239cabb0e9..a6adbfd9b2 100644 --- a/go/fory/codegen/decoder.go +++ b/go/fory/codegen/decoder.go @@ -541,17 +541,17 @@ func generateElementTypeIDReadInline(buf *bytes.Buffer, elemType types.Type) err case types.Int16: expectedTypeID = int(fory.INT16) case types.Int32: - expectedTypeID = int(fory.INT32) + expectedTypeID = int(fory.VARINT32) case types.Int, types.Int64: - expectedTypeID = int(fory.INT64) + expectedTypeID = int(fory.VARINT64) case types.Uint8: expectedTypeID = int(fory.UINT8) case types.Uint16: expectedTypeID = int(fory.UINT16) case types.Uint32: - expectedTypeID = int(fory.UINT32) + expectedTypeID = int(fory.VAR_UINT32) case types.Uint, types.Uint64: - expectedTypeID = int(fory.UINT64) + expectedTypeID = int(fory.VAR_UINT64) case types.Float32: expectedTypeID = int(fory.FLOAT32) case types.Float64: diff --git a/go/fory/codegen/encoder.go b/go/fory/codegen/encoder.go index dcc90ba10b..49c3db3f22 100644 --- a/go/fory/codegen/encoder.go +++ b/go/fory/codegen/encoder.go @@ -233,17 +233,17 @@ func generateElementTypeIDWrite(buf *bytes.Buffer, elemType types.Type) error { case types.Int16: fmt.Fprintf(buf, "\t\tbuf.WriteVaruint32(%d) // INT16\n", fory.INT16) case types.Int32: - fmt.Fprintf(buf, "\t\tbuf.WriteVaruint32(%d) // INT32\n", fory.INT32) + fmt.Fprintf(buf, "\t\tbuf.WriteVaruint32(%d) // VARINT32\n", fory.VARINT32) case types.Int, types.Int64: - fmt.Fprintf(buf, "\t\tbuf.WriteVaruint32(%d) // INT64\n", fory.INT64) + fmt.Fprintf(buf, "\t\tbuf.WriteVaruint32(%d) // VARINT64\n", fory.VARINT64) case types.Uint8: fmt.Fprintf(buf, "\t\tbuf.WriteVaruint32(%d) // UINT8\n", fory.UINT8) case types.Uint16: fmt.Fprintf(buf, "\t\tbuf.WriteVaruint32(%d) // UINT16\n", fory.UINT16) case types.Uint32: - fmt.Fprintf(buf, "\t\tbuf.WriteVaruint32(%d) // UINT32\n", fory.UINT32) + fmt.Fprintf(buf, "\t\tbuf.WriteVaruint32(%d) // VAR_UINT32\n", fory.VAR_UINT32) case types.Uint, types.Uint64: - fmt.Fprintf(buf, "\t\tbuf.WriteVaruint32(%d) // UINT64\n", fory.UINT64) + fmt.Fprintf(buf, "\t\tbuf.WriteVaruint32(%d) // VAR_UINT64\n", fory.VAR_UINT64) case types.Float32: fmt.Fprintf(buf, "\t\tbuf.WriteVaruint32(%d) // FLOAT32\n", fory.FLOAT32) case types.Float64: @@ -723,17 +723,17 @@ func generateElementTypeIDWriteInline(buf *bytes.Buffer, elemType types.Type) er case types.Int16: fmt.Fprintf(buf, "\t\t\t\tbuf.WriteVaruint32(%d) // INT16\n", fory.INT16) case types.Int32: - fmt.Fprintf(buf, "\t\t\t\tbuf.WriteVaruint32(%d) // INT32\n", fory.INT32) + fmt.Fprintf(buf, "\t\t\t\tbuf.WriteVaruint32(%d) // VARINT32\n", fory.VARINT32) case types.Int, types.Int64: - fmt.Fprintf(buf, "\t\t\t\tbuf.WriteVaruint32(%d) // INT64\n", fory.INT64) + fmt.Fprintf(buf, "\t\t\t\tbuf.WriteVaruint32(%d) // VARINT64\n", fory.VARINT64) case types.Uint8: fmt.Fprintf(buf, "\t\t\t\tbuf.WriteVaruint32(%d) // UINT8\n", fory.UINT8) case types.Uint16: fmt.Fprintf(buf, "\t\t\t\tbuf.WriteVaruint32(%d) // UINT16\n", fory.UINT16) case types.Uint32: - fmt.Fprintf(buf, "\t\t\t\tbuf.WriteVaruint32(%d) // UINT32\n", fory.UINT32) + fmt.Fprintf(buf, "\t\t\t\tbuf.WriteVaruint32(%d) // VAR_UINT32\n", fory.VAR_UINT32) case types.Uint, types.Uint64: - fmt.Fprintf(buf, "\t\t\t\tbuf.WriteVaruint32(%d) // UINT64\n", fory.UINT64) + fmt.Fprintf(buf, "\t\t\t\tbuf.WriteVaruint32(%d) // VAR_UINT64\n", fory.VAR_UINT64) case types.Float32: fmt.Fprintf(buf, "\t\t\t\tbuf.WriteVaruint32(%d) // FLOAT32\n", fory.FLOAT32) case types.Float64: diff --git a/go/fory/codegen/utils.go b/go/fory/codegen/utils.go index c833ee841e..f562ccf1d7 100644 --- a/go/fory/codegen/utils.go +++ b/go/fory/codegen/utils.go @@ -33,6 +33,7 @@ type FieldInfo struct { Index int // Original field index in struct IsPrimitive bool // Whether it's a Fory primitive type IsPointer bool // Whether it's a pointer type + Nullable bool // Whether the field can be null (pointer types) TypeID string // Fory TypeID for sorting PrimitiveSize int // Size for primitive type sorting } @@ -138,8 +139,33 @@ func getTypeID(t types.Type) string { t = ptr.Elem() } - // Check slice types - if _, ok := t.(*types.Slice); ok { + // Check slice types - distinguish primitive arrays from generic lists + if slice, ok := t.(*types.Slice); ok { + elemType := slice.Elem() + // For pointer to primitive, unwrap the pointer + if ptr, ok := elemType.(*types.Pointer); ok { + elemType = ptr.Elem() + } + // Check if element is a primitive type (primitive arrays use specific typeIDs) + if basic, ok := elemType.Underlying().(*types.Basic); ok { + switch basic.Kind() { + case types.Bool: + return "BOOL_ARRAY" + case types.Int8: + return "INT8_ARRAY" + case types.Int16: + return "INT16_ARRAY" + case types.Int32: + return "INT32_ARRAY" + case types.Int, types.Int64: + return "INT64_ARRAY" + case types.Float32: + return "FLOAT32_ARRAY" + case types.Float64: + return "FLOAT64_ARRAY" + } + } + // Non-primitive slices use LIST return "LIST" } @@ -180,17 +206,17 @@ func getTypeID(t types.Type) string { case types.Int16: return "INT16" case types.Int32: - return "INT32" + return "VARINT32" case types.Int, types.Int64: - return "INT64" + return "VARINT64" case types.Uint8: return "UINT8" case types.Uint16: return "UINT16" case types.Uint32: - return "UINT32" + return "VAR_UINT32" case types.Uint, types.Uint64: - return "UINT64" + return "VAR_UINT64" case types.Float32: return "FLOAT32" case types.Float64: @@ -240,32 +266,59 @@ func getTypeIDValue(typeID string) int { return int(fory.INT16) // 3 case "INT32": return int(fory.INT32) // 4 + case "VARINT32": + return int(fory.VARINT32) // 5 case "INT64": return int(fory.INT64) // 6 + case "VARINT64": + return int(fory.VARINT64) // 7 case "UINT8": - return int(fory.UINT8) // 100 + return int(fory.UINT8) // 9 case "UINT16": - return int(fory.UINT16) // 101 + return int(fory.UINT16) // 10 case "UINT32": - return int(fory.UINT32) // 102 + return int(fory.UINT32) // 11 + case "VAR_UINT32": + return int(fory.VAR_UINT32) // 12 case "UINT64": - return int(fory.UINT64) // 103 + return int(fory.UINT64) // 13 + case "VAR_UINT64": + return int(fory.VAR_UINT64) // 14 case "FLOAT32": return int(fory.FLOAT32) case "FLOAT64": return int(fory.FLOAT64) case "STRING": - return int(fory.STRING) // 12 + return int(fory.STRING) // 9 + case "BINARY": + return int(fory.BINARY) // 10 + case "LIST": + return int(fory.LIST) // 20 + case "SET": + return int(fory.SET) // 21 + case "MAP": + return int(fory.MAP) // 22 case "TIMESTAMP": return int(fory.TIMESTAMP) // 25 case "LOCAL_DATE": return int(fory.LOCAL_DATE) // 26 case "NAMED_STRUCT": return int(fory.NAMED_STRUCT) // 17 - case "LIST": - return int(fory.LIST) // 21 - case "MAP": - return int(fory.MAP) // 23 + // Primitive array types + case "BOOL_ARRAY": + return int(fory.BOOL_ARRAY) // 39 + case "INT8_ARRAY": + return int(fory.INT8_ARRAY) // 40 + case "INT16_ARRAY": + return int(fory.INT16_ARRAY) // 41 + case "INT32_ARRAY": + return int(fory.INT32_ARRAY) // 42 + case "INT64_ARRAY": + return int(fory.INT64_ARRAY) // 43 + case "FLOAT32_ARRAY": + return int(fory.FLOAT32_ARRAY) // 49 + case "FLOAT64_ARRAY": + return int(fory.FLOAT64_ARRAY) // 50 default: return 999 // Unknown types sort last } @@ -293,11 +346,15 @@ func sortFields(fields []*FieldInfo) { // When same size, sort by type id // When same size and type id, sort by snake case field name - // Handle compression types (INT32/INT64/VAR32/VAR64) + // Handle compression types (INT32/INT64/VARINT32/VARINT64 and unsigned variants) compressI := f1.TypeID == "INT32" || f1.TypeID == "INT64" || - f1.TypeID == "VAR32" || f1.TypeID == "VAR64" + f1.TypeID == "VARINT32" || f1.TypeID == "VARINT64" || + f1.TypeID == "UINT32" || f1.TypeID == "UINT64" || + f1.TypeID == "VAR_UINT32" || f1.TypeID == "VAR_UINT64" compressJ := f2.TypeID == "INT32" || f2.TypeID == "INT64" || - f2.TypeID == "VAR32" || f2.TypeID == "VAR64" + f2.TypeID == "VARINT32" || f2.TypeID == "VARINT64" || + f2.TypeID == "UINT32" || f2.TypeID == "UINT64" || + f2.TypeID == "VAR_UINT32" || f2.TypeID == "VAR_UINT64" if compressI != compressJ { return !compressI && compressJ // non-compress comes first @@ -317,14 +374,15 @@ func sortFields(fields []*FieldInfo) { return f1.SnakeName < f2.SnakeName case groupOtherInternalType: - // Other internal type fields: sort by type id then snake case field name + // Internal type fields (STRING, BINARY, LIST, SET, MAP): sort by type id then name only. + // Java does NOT sort by nullable flag for these types. if f1.TypeID != f2.TypeID { return getTypeIDValue(f1.TypeID) < getTypeIDValue(f2.TypeID) } return f1.SnakeName < f2.SnakeName - case groupList, groupSet, groupMap, groupOther: - // List/Set/Map/Other fields: sort by snake case field name only + case groupPrimitiveArray, groupOther: + // Primitive arrays and other fields: sort by snake case field name only return f1.SnakeName < f2.SnakeName default: @@ -335,13 +393,13 @@ func sortFields(fields []*FieldInfo) { } // Field group constants for sorting +// This matches reflection's field ordering in field_info.go: +// primitives → boxed → otherInternalType (STRING/BINARY/LIST/SET/MAP) → primitiveArray → other const ( groupPrimitive = 0 // primitive and nullable primitive fields - groupOtherInternalType = 1 // other internal type fields (string, timestamp, etc.) - groupList = 2 // list fields - groupSet = 3 // set fields - groupMap = 4 // map fields - groupOther = 5 // other fields + groupOtherInternalType = 1 // STRING, BINARY, LIST, SET, MAP (sorted by typeId, name) + groupPrimitiveArray = 2 // primitive arrays (BOOL_ARRAY, INT32_ARRAY, etc.) - sorted by name + groupOther = 3 // structs, enums, and unknown types - sorted by name ) // getFieldGroup categorizes a field into its sorting group @@ -354,38 +412,29 @@ func getFieldGroup(field *FieldInfo) int { return groupPrimitive } - // List fields - if typeID == "LIST" { - return groupList - } - - // Set fields - if typeID == "SET" { - return groupSet + // Primitive array fields - sorted by name only + primitiveArrayTypes := map[string]bool{ + "BOOL_ARRAY": true, + "INT8_ARRAY": true, + "INT16_ARRAY": true, + "INT32_ARRAY": true, + "INT64_ARRAY": true, + "FLOAT32_ARRAY": true, + "FLOAT64_ARRAY": true, } - - // Map fields - if typeID == "MAP" { - return groupMap + if primitiveArrayTypes[typeID] { + return groupPrimitiveArray } - // Other internal type fields - // These are fory internal types that are not primitives/lists/sets/maps - // Examples: STRING, TIMESTAMP, LOCAL_DATE, NAMED_STRUCT, etc. + // Internal types (STRING, BINARY, LIST, SET, MAP) - sorted by typeId, nullable, name + // These match reflection's category 1 in getFieldCategory internalTypes := map[string]bool{ - "STRING": true, - "TIMESTAMP": true, - "LOCAL_DATE": true, - "NAMED_STRUCT": true, - "STRUCT": true, - "BINARY": true, - "ENUM": true, - "NAMED_ENUM": true, - "EXT": true, - "NAMED_EXT": true, - "INTERFACE": true, // for interface{} types + "STRING": true, + "BINARY": true, + "LIST": true, + "SET": true, + "MAP": true, } - if internalTypes[typeID] { return groupOtherInternalType } @@ -436,6 +485,7 @@ func analyzeField(field *types.Var, index int) (*FieldInfo, error) { Index: index, IsPrimitive: isPrimitive, IsPointer: isPointer, + Nullable: isPointer, // Pointer types are nullable, slices/maps are non-nullable in xlang mode TypeID: typeID, PrimitiveSize: primitiveSize, }, nil diff --git a/go/fory/field_info.go b/go/fory/field_info.go new file mode 100644 index 0000000000..cac8ae4465 --- /dev/null +++ b/go/fory/field_info.go @@ -0,0 +1,806 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package fory + +import ( + "fmt" + "reflect" + "sort" + "strings" +) + +// FieldInfo stores field metadata computed ENTIRELY at init time. +// All flags and decisions are pre-computed to eliminate runtime checks. +type FieldInfo struct { + Name string + Offset uintptr + Type reflect.Type + DispatchId DispatchId + TypeId TypeId // Fory type ID for the serializer + Serializer Serializer + Nullable bool + FieldIndex int // -1 if field doesn't exist in current struct (for compatible mode) + FieldDef FieldDef // original FieldDef from remote TypeDef (for compatible mode skip) + + // Pre-computed sizes and offsets (for fixed primitives) + FixedSize int // 0 if not fixed-size, else 1/2/4/8 + WriteOffset int // Offset within fixed-fields buffer region (sum of preceding field sizes) + + // Pre-computed flags for serialization (computed at init time) + RefMode RefMode // ref mode for serializer.Write/Read + WriteType bool // whether to write type info (true for struct fields in compatible mode) + HasGenerics bool // whether element types are known from TypeDef (for container fields) + + // Tag-based configuration (from fory struct tags) + TagID int // -1 = use field name, >=0 = use tag ID + HasForyTag bool // Whether field has explicit fory tag + TagRefSet bool // Whether ref was explicitly set via fory tag + TagRef bool // The ref value from fory tag (only valid if TagRefSet is true) + TagNullableSet bool // Whether nullable was explicitly set via fory tag + TagNullable bool // The nullable value from fory tag (only valid if TagNullableSet is true) + + // Pre-computed type flags (computed at init time to avoid runtime reflection) + IsPtr bool // True if field.Type.Kind() == reflect.Ptr +} + +// FieldGroup holds categorized and sorted fields for optimized serialization. +// Fields are stored as values (not pointers) for better cache locality. +// Each field belongs to exactly one category: +// - FixedFields: non-nullable fixed-size primitives (bool, int8-64, uint8-64, float32/64) +// - VarintFields: non-nullable varint primitives (varint32/64, var_uint32/64, tagged_int64/uint64) +// - RemainingFields: all other fields (nullable primitives, strings, collections, structs, etc.) +type FieldGroup struct { + FixedFields []FieldInfo // Non-nullable fixed-size primitives + VarintFields []FieldInfo // Non-nullable varint primitives + RemainingFields []FieldInfo // All other fields + FixedSize int // Total bytes for fixed-size fields + MaxVarintSize int // Maximum bytes for varint fields +} + +// FieldCount returns the total number of fields across all categories. +func (g *FieldGroup) FieldCount() int { + return len(g.FixedFields) + len(g.VarintFields) + len(g.RemainingFields) +} + +// ForEachField iterates over all fields in serialization order (fixed, varint, remaining). +func (g *FieldGroup) ForEachField(fn func(*FieldInfo)) { + for i := range g.FixedFields { + fn(&g.FixedFields[i]) + } + for i := range g.VarintFields { + fn(&g.VarintFields[i]) + } + for i := range g.RemainingFields { + fn(&g.RemainingFields[i]) + } +} + +// DebugPrint prints field group information for debugging. +func (g *FieldGroup) DebugPrint(typeName string) { + if !DebugOutputEnabled() { + return + } + fmt.Printf("[Go] ========== Sorted fields for %s ==========\n", typeName) + fmt.Printf("[Go] Go sorted fixedFields (%d):\n", len(g.FixedFields)) + for i := range g.FixedFields { + f := &g.FixedFields[i] + fmt.Printf("[Go] [%d] %s -> dispatchId=%d, typeId=%d, size=%d, nullable=%v\n", + i, f.Name, f.DispatchId, f.TypeId, f.FixedSize, f.Nullable) + } + fmt.Printf("[Go] Go sorted varintFields (%d):\n", len(g.VarintFields)) + for i := range g.VarintFields { + f := &g.VarintFields[i] + fmt.Printf("[Go] [%d] %s -> dispatchId=%d, typeId=%d, nullable=%v\n", + i, f.Name, f.DispatchId, f.TypeId, f.Nullable) + } + fmt.Printf("[Go] Go sorted remainingFields (%d):\n", len(g.RemainingFields)) + for i := range g.RemainingFields { + f := &g.RemainingFields[i] + fmt.Printf("[Go] [%d] %s -> dispatchId=%d, typeId=%d, nullable=%v\n", + i, f.Name, f.DispatchId, f.TypeId, f.Nullable) + } + fmt.Printf("[Go] ===========================================\n") +} + +// GroupFields categorizes and sorts fields into FixedFields, VarintFields, and RemainingFields. +// It computes pre-computed sizes and WriteOffset for batch buffer reservation. +// Fields are sorted within each group to match Java's wire format order. +func GroupFields(fields []FieldInfo) FieldGroup { + var g FieldGroup + + // Categorize fields + for i := range fields { + field := &fields[i] + if isFixedSizePrimitive(field.DispatchId, field.Nullable) { + // Non-nullable fixed-size primitives only + field.FixedSize = getFixedSizeByDispatchId(field.DispatchId) + g.FixedFields = append(g.FixedFields, *field) + } else if isVarintPrimitive(field.DispatchId, field.Nullable) { + // Non-nullable varint primitives only + g.VarintFields = append(g.VarintFields, *field) + } else { + // All other fields including nullable primitives + g.RemainingFields = append(g.RemainingFields, *field) + } + } + + // Sort fixedFields: size desc, typeId desc, name asc + sort.SliceStable(g.FixedFields, func(i, j int) bool { + fi, fj := &g.FixedFields[i], &g.FixedFields[j] + if fi.FixedSize != fj.FixedSize { + return fi.FixedSize > fj.FixedSize // size descending + } + if fi.TypeId != fj.TypeId { + return fi.TypeId > fj.TypeId // typeId descending + } + return fi.Name < fj.Name // name ascending + }) + + // Compute WriteOffset after sorting + for i := range g.FixedFields { + g.FixedFields[i].WriteOffset = g.FixedSize + g.FixedSize += g.FixedFields[i].FixedSize + } + + // Sort varintFields: underlying type size desc, typeId desc, name asc + // Note: Java uses primitive type size (8 for long, 4 for int), not encoding max size + sort.SliceStable(g.VarintFields, func(i, j int) bool { + fi, fj := &g.VarintFields[i], &g.VarintFields[j] + sizeI := getUnderlyingTypeSize(fi.DispatchId) + sizeJ := getUnderlyingTypeSize(fj.DispatchId) + if sizeI != sizeJ { + return sizeI > sizeJ // size descending + } + if fi.TypeId != fj.TypeId { + return fi.TypeId > fj.TypeId // typeId descending + } + return fi.Name < fj.Name // name ascending + }) + + // Compute maxVarintSize + for i := range g.VarintFields { + g.MaxVarintSize += getVarintMaxSizeByDispatchId(g.VarintFields[i].DispatchId) + } + + // Sort remainingFields: nullable primitives first (by primitiveComparator), + // then other internal types (typeId, name), then lists, sets, maps, other (by name) + sort.SliceStable(g.RemainingFields, func(i, j int) bool { + fi, fj := &g.RemainingFields[i], &g.RemainingFields[j] + catI, catJ := getFieldCategory(fi), getFieldCategory(fj) + if catI != catJ { + return catI < catJ + } + // Within nullable primitives category, use primitiveComparator logic + if catI == 0 { + return comparePrimitiveFields(fi, fj) + } + // Within other internal types category (STRING, BINARY, LIST, SET, MAP), + // sort by typeId then by sort key (tagID if available, otherwise name). + if catI == 1 { + if fi.TypeId != fj.TypeId { + return fi.TypeId < fj.TypeId + } + return getFieldSortKey(fi) < getFieldSortKey(fj) + } + // Other categories (struct, enum, etc.): sort by sort key (tagID if available, otherwise name) + return getFieldSortKey(fi) < getFieldSortKey(fj) + }) + + return g +} + +// fieldHasNonPrimitiveSerializer returns true if the field has a serializer with a non-primitive type ID. +// This is used to skip the fast path for fields like enums where DispatchId is int32 but the serializer +// writes a different format (e.g., unsigned varint for enum ordinals vs signed zigzag for int32). +func fieldHasNonPrimitiveSerializer(field *FieldInfo) bool { + if field.Serializer == nil { + return false + } + // ENUM (numeric ID), NAMED_ENUM (namespace/typename), NAMED_STRUCT, NAMED_COMPATIBLE_STRUCT, NAMED_EXT + // all require special serialization and should not use the primitive fast path + // Note: ENUM uses unsigned Varuint32Small7 for ordinals, not signed zigzag varint + // Use internal type ID (low 8 bits) since registered types have composite TypeIds like (userID << 8) | internalID + internalTypeId := TypeId(field.TypeId & 0xFF) + switch internalTypeId { + case ENUM, NAMED_ENUM, NAMED_STRUCT, NAMED_COMPATIBLE_STRUCT, NAMED_EXT: + return true + default: + return false + } +} + +// isEnumField checks if a field is an enum type based on its TypeId +func isEnumField(field *FieldInfo) bool { + if field.Serializer == nil { + return false + } + internalTypeId := field.TypeId & 0xFF + return internalTypeId == ENUM || internalTypeId == NAMED_ENUM +} + +// getFieldCategory returns the category for sorting remainingFields: +// 0: nullable primitives (sorted by primitiveComparator) +// 1: internal types STRING, BINARY, LIST, SET, MAP (sorted by typeId, then name) +// 2: struct, enum, and all other types (sorted by name only) +func getFieldCategory(field *FieldInfo) int { + if isNullableFixedSizePrimitive(field.DispatchId) || isNullableVarintPrimitive(field.DispatchId) { + return 0 + } + internalId := field.TypeId & 0xFF + switch TypeId(internalId) { + case STRING, BINARY, LIST, SET, MAP: + // Internal types: sorted by typeId, then name + return 1 + default: + // struct, enum, and all other types: sorted by name + return 2 + } +} + +// comparePrimitiveFields compares two nullable primitive fields using Java's primitiveComparator logic: +// fixed before varint, then underlying type size desc, typeId desc, name asc +func comparePrimitiveFields(fi, fj *FieldInfo) bool { + iFixed := isNullableFixedSizePrimitive(fi.DispatchId) + jFixed := isNullableFixedSizePrimitive(fj.DispatchId) + if iFixed != jFixed { + return iFixed // fixed before varint + } + // Same category: compare by underlying type size desc, typeId desc, name asc + // Note: Java uses primitive type size (8, 4, 2, 1), not encoding size + sizeI := getUnderlyingTypeSize(fi.DispatchId) + sizeJ := getUnderlyingTypeSize(fj.DispatchId) + if sizeI != sizeJ { + return sizeI > sizeJ // size descending + } + if fi.TypeId != fj.TypeId { + return fi.TypeId > fj.TypeId // typeId descending + } + return fi.Name < fj.Name // name ascending +} + +// getNullableFixedSize returns the fixed size for nullable fixed primitives +func getNullableFixedSize(dispatchId DispatchId) int { + switch dispatchId { + case NullableBoolDispatchId, NullableInt8DispatchId, NullableUint8DispatchId: + return 1 + case NullableInt16DispatchId, NullableUint16DispatchId: + return 2 + case NullableInt32DispatchId, NullableUint32DispatchId, NullableFloat32DispatchId: + return 4 + case NullableInt64DispatchId, NullableUint64DispatchId, NullableFloat64DispatchId: + return 8 + default: + return 0 + } +} + +// getNullableVarintMaxSize returns the max size for nullable varint primitives +func getNullableVarintMaxSize(dispatchId DispatchId) int { + switch dispatchId { + case NullableVarint32DispatchId, NullableVarUint32DispatchId: + return 5 + case NullableVarint64DispatchId, NullableVarUint64DispatchId, NullableIntDispatchId, NullableUintDispatchId: + return 10 + case NullableTaggedInt64DispatchId, NullableTaggedUint64DispatchId: + return 9 + default: + return 0 + } +} + +// getUnderlyingTypeSize returns the size of the underlying primitive type (8 for 64-bit, 4 for 32-bit, etc.) +// This matches Java's getSizeOfPrimitiveType() which uses the type size, not encoding size +func getUnderlyingTypeSize(dispatchId DispatchId) int { + switch dispatchId { + // 64-bit types + case PrimitiveInt64DispatchId, PrimitiveUint64DispatchId, PrimitiveFloat64DispatchId, + NotnullInt64PtrDispatchId, NotnullUint64PtrDispatchId, NotnullFloat64PtrDispatchId, + PrimitiveVarint64DispatchId, PrimitiveVarUint64DispatchId, + NotnullVarint64PtrDispatchId, NotnullVarUint64PtrDispatchId, + PrimitiveTaggedInt64DispatchId, PrimitiveTaggedUint64DispatchId, + NotnullTaggedInt64PtrDispatchId, NotnullTaggedUint64PtrDispatchId, + PrimitiveIntDispatchId, PrimitiveUintDispatchId, + NotnullIntPtrDispatchId, NotnullUintPtrDispatchId: + return 8 + // 32-bit types + case PrimitiveInt32DispatchId, PrimitiveUint32DispatchId, PrimitiveFloat32DispatchId, + NotnullInt32PtrDispatchId, NotnullUint32PtrDispatchId, NotnullFloat32PtrDispatchId, + PrimitiveVarint32DispatchId, PrimitiveVarUint32DispatchId, + NotnullVarint32PtrDispatchId, NotnullVarUint32PtrDispatchId: + return 4 + // 16-bit types + case PrimitiveInt16DispatchId, PrimitiveUint16DispatchId, + NotnullInt16PtrDispatchId, NotnullUint16PtrDispatchId: + return 2 + // 8-bit types + case PrimitiveBoolDispatchId, PrimitiveInt8DispatchId, PrimitiveUint8DispatchId, + NotnullBoolPtrDispatchId, NotnullInt8PtrDispatchId, NotnullUint8PtrDispatchId: + return 1 + // Nullable types + case NullableInt64DispatchId, NullableUint64DispatchId, NullableFloat64DispatchId, + NullableVarint64DispatchId, NullableVarUint64DispatchId, + NullableTaggedInt64DispatchId, NullableTaggedUint64DispatchId, + NullableIntDispatchId, NullableUintDispatchId: + return 8 + case NullableInt32DispatchId, NullableUint32DispatchId, NullableFloat32DispatchId, + NullableVarint32DispatchId, NullableVarUint32DispatchId: + return 4 + case NullableInt16DispatchId, NullableUint16DispatchId: + return 2 + case NullableBoolDispatchId, NullableInt8DispatchId, NullableUint8DispatchId: + return 1 + default: + return 0 + } +} + +func isNonNullablePrimitiveKind(kind reflect.Kind) bool { + switch kind { + case reflect.Bool, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, + reflect.Float32, reflect.Float64, reflect.Int, reflect.Uint: + return true + default: + return false + } +} + +// isInternalTypeWithoutTypeMeta checks if a type is serialized without type meta per xlang spec. +// Per the spec (struct field serialization), these types use format: | ref/null flag | value data | (NO type meta) +// - Nullable primitives (*int32, *float64, etc.): | null flag | field value | +// - Strings (string): | null flag | value data | +// - Binary ([]byte): | null flag | value data | +// - List/Slice: | ref meta | value data | +// - Set: | ref meta | value data | +// - Map: | ref meta | value data | +// Only struct/enum/ext types need type meta: | ref flag | type meta | value data | +func isInternalTypeWithoutTypeMeta(t reflect.Type) bool { + kind := t.Kind() + // String type - no type meta needed + if kind == reflect.String { + return true + } + // Slice (list or byte slice) - no type meta needed + if kind == reflect.Slice { + return true + } + // Map type - no type meta needed + if kind == reflect.Map { + return true + } + // Pointer to primitive - no type meta needed + if kind == reflect.Ptr { + elemKind := t.Elem().Kind() + switch elemKind { + case reflect.Bool, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Int, reflect.Float32, reflect.Float64, reflect.String: + return true + } + } + return false +} + +// isStructField checks if a type is a struct type (directly or via pointer) +func isStructField(t reflect.Type) bool { + if t.Kind() == reflect.Struct { + return true + } + if t.Kind() == reflect.Ptr && t.Elem().Kind() == reflect.Struct { + return true + } + return false +} + +// isStructFieldType checks if a FieldType represents a type that needs type info written +// This is used to determine if type info was written for the field in compatible mode +// In compatible mode, Java writes type info for struct and ext types, but NOT for enum types +// Enum fields only have null flag + ordinal, no type ID +func isStructFieldType(ft FieldType) bool { + if ft == nil { + return false + } + typeId := ft.TypeId() + // Check base type IDs that need type info (struct and ext, NOT enum) + // Always check the internal type ID (low byte) to handle composite type IDs + // which may be negative when stored as int32 (e.g., -2288 = (short)128784) + internalTypeId := TypeId(typeId & 0xFF) + switch internalTypeId { + case STRUCT, NAMED_STRUCT, COMPATIBLE_STRUCT, NAMED_COMPATIBLE_STRUCT, + EXT, NAMED_EXT: + return true + } + return false +} + +// FieldFingerprintInfo contains the information needed to compute a field's fingerprint. +type FieldFingerprintInfo struct { + // FieldID is the tag ID if configured (>= 0), or -1 to use field name + FieldID int + // FieldName is the snake_case field name (used when FieldID < 0) + FieldName string + // TypeID is the Fory type ID for the field + TypeID TypeId + // Ref is true if reference tracking is enabled for this field + Ref bool + // Nullable is true if null flag is written for this field + Nullable bool +} + +// ComputeStructFingerprint computes the fingerprint string for a struct type. +// +// Fingerprint Format: +// +// Each field contributes: ",,,;" +// Fields are sorted by field_id_or_name (lexicographically as strings) +// +// Field Components: +// - field_id_or_name: Tag ID as string if configured (e.g., "0", "1"), otherwise snake_case field name +// - type_id: Fory TypeId as decimal string (e.g., "4" for INT32) +// - ref: "1" if reference tracking enabled, "0" otherwise +// - nullable: "1" if null flag is written, "0" otherwise +// +// Example fingerprints: +// - With tag IDs: "0,4,0,0;1,4,0,1;2,9,0,1;" +// - With field names: "age,4,0,0;name,9,0,1;" +// +// The fingerprint is used to compute a hash for struct schema versioning. +// Different nullable/ref settings will produce different fingerprints, +// ensuring schema compatibility is properly validated. +func ComputeStructFingerprint(fields []FieldFingerprintInfo) string { + // Sort fields by their identifier (field ID or name) + type fieldWithKey struct { + field FieldFingerprintInfo + sortKey string + } + fieldsWithKeys := make([]fieldWithKey, 0, len(fields)) + for _, field := range fields { + var sortKey string + if field.FieldID >= 0 { + sortKey = fmt.Sprintf("%d", field.FieldID) + } else { + sortKey = field.FieldName + } + fieldsWithKeys = append(fieldsWithKeys, fieldWithKey{field: field, sortKey: sortKey}) + } + + sort.Slice(fieldsWithKeys, func(i, j int) bool { + return fieldsWithKeys[i].sortKey < fieldsWithKeys[j].sortKey + }) + + var sb strings.Builder + for _, fw := range fieldsWithKeys { + // Field identifier + sb.WriteString(fw.sortKey) + sb.WriteString(",") + // Type ID + sb.WriteString(fmt.Sprintf("%d", fw.field.TypeID)) + sb.WriteString(",") + // Ref flag + if fw.field.Ref { + sb.WriteString("1") + } else { + sb.WriteString("0") + } + sb.WriteString(",") + // Nullable flag + if fw.field.Nullable { + sb.WriteString("1") + } else { + sb.WriteString("0") + } + sb.WriteString(";") + } + return sb.String() +} + +// Field sorting helpers + +type triple struct { + typeID int16 + serializer Serializer + name string + nullable bool + tagID int // -1 = use field name, >=0 = use tag ID for sorting +} + +// getSortKey returns the sort key for a triple. +// If tagID >= 0, returns the tag ID as string (for tag-based sorting). +// Otherwise returns the snake_case field name. +func (t triple) getSortKey() string { + if t.tagID >= 0 { + return fmt.Sprintf("%d", t.tagID) + } + return SnakeCase(t.name) +} + +// getFieldSortKey returns the sort key for a FieldInfo. +// If TagID >= 0, returns the tag ID as string (for tag-based sorting). +// Otherwise returns the field name (which is already snake_case). +func getFieldSortKey(f *FieldInfo) string { + if f.TagID >= 0 { + return fmt.Sprintf("%d", f.TagID) + } + return f.Name +} + +// sortFields sorts fields with nullable information to match Java's field ordering. +// Java separates primitive types (int, long) from boxed types (Integer, Long). +// In Go, this corresponds to non-pointer primitives vs pointer-to-primitive. +// When tagIDs are provided (>= 0), fields are sorted by tag ID instead of field name. +func sortFields( + typeResolver *TypeResolver, + fieldNames []string, + serializers []Serializer, + typeIds []TypeId, + nullables []bool, + tagIDs []int, +) ([]Serializer, []string) { + var ( + typeTriples []triple + others []triple + userDefined []triple + ) + + for i, name := range fieldNames { + ser := serializers[i] + tagID := TagIDUseFieldName // default: use field name + if tagIDs != nil && i < len(tagIDs) { + tagID = tagIDs[i] + } + if ser == nil { + others = append(others, triple{UNKNOWN, nil, name, nullables[i], tagID}) + continue + } + typeTriples = append(typeTriples, triple{typeIds[i], ser, name, nullables[i], tagID}) + } + // Java orders: primitives, boxed, finals, others, collections, maps + // primitives = non-nullable primitive types (int, long, etc.) + // boxed = nullable boxed types (Integer, Long, etc. which are pointers in Go) + var primitives, boxed, collection, otherInternalTypeFields []triple + + for _, t := range typeTriples { + switch { + case isPrimitiveType(t.typeID): + // Separate non-nullable primitives from nullable (boxed) primitives + if t.nullable { + boxed = append(boxed, t) + } else { + primitives = append(primitives, t) + } + case isPrimitiveArrayType(t.typeID): + // Primitive arrays: sorted by name only (category 2 in reflection) + collection = append(collection, t) + case isListType(t.typeID), isSetType(t.typeID), isMapType(t.typeID): + // LIST, SET, MAP: sorted by typeId, name (category 1 in reflection) + otherInternalTypeFields = append(otherInternalTypeFields, t) + case isUserDefinedType(t.typeID): + userDefined = append(userDefined, t) + case t.typeID == UNKNOWN: + others = append(others, t) + default: + // STRING, BINARY, and other internal types (category 1 in reflection) + otherInternalTypeFields = append(otherInternalTypeFields, t) + } + } + // Sort primitives (non-nullable) - same logic as boxed + // Java sorts by: compressed (varint) types last, then by size (largest first), then by type ID (descending) + // Fixed types: BOOL, INT8, UINT8, INT16, UINT16, INT32, UINT32, INT64, UINT64, FLOAT32, FLOAT64 + // Varint types: VARINT32, VARINT64, VAR_UINT32, VAR_UINT64, TAGGED_INT64, TAGGED_UINT64 + isVarintTypeId := func(typeID int16) bool { + return typeID == VARINT32 || typeID == VARINT64 || + typeID == VAR_UINT32 || typeID == VAR_UINT64 || + typeID == TAGGED_INT64 || typeID == TAGGED_UINT64 + } + sortPrimitiveSlice := func(s []triple) { + sort.Slice(s, func(i, j int) bool { + ai, aj := s[i], s[j] + compressI := isVarintTypeId(ai.typeID) + compressJ := isVarintTypeId(aj.typeID) + if compressI != compressJ { + return !compressI && compressJ + } + szI, szJ := getPrimitiveTypeSize(ai.typeID), getPrimitiveTypeSize(aj.typeID) + if szI != szJ { + return szI > szJ + } + // Tie-breaker: type ID descending (higher type ID first), then field name + if ai.typeID != aj.typeID { + return ai.typeID > aj.typeID + } + return ai.getSortKey() < aj.getSortKey() + }) + } + sortPrimitiveSlice(primitives) + sortPrimitiveSlice(boxed) + // Sort internal types (STRING, BINARY, LIST, SET, MAP) by typeId then name only. + // Java does NOT sort by nullable flag for these types. + sortByTypeIDThenName := func(s []triple) { + sort.Slice(s, func(i, j int) bool { + if s[i].typeID != s[j].typeID { + return s[i].typeID < s[j].typeID + } + return s[i].getSortKey() < s[j].getSortKey() + }) + } + sortTuple := func(s []triple) { + sort.Slice(s, func(i, j int) bool { + return s[i].getSortKey() < s[j].getSortKey() + }) + } + sortByTypeIDThenName(otherInternalTypeFields) + // Merge all category 2 fields (primitive arrays, userDefined, others) and sort by name + // This matches GroupFields' getFieldCategory which sorts all category 2 fields together + category2 := make([]triple, 0, len(collection)+len(userDefined)+len(others)) + category2 = append(category2, collection...) // primitive arrays + category2 = append(category2, userDefined...) // structs, enums + category2 = append(category2, others...) // unknown types + sortTuple(category2) + + // Order: primitives, boxed, internal types (STRING/BINARY/LIST/SET/MAP), category 2 (by name) + // This aligns with GroupFields' getFieldCategory sorting + all := make([]triple, 0, len(fieldNames)) + all = append(all, primitives...) + all = append(all, boxed...) + all = append(all, otherInternalTypeFields...) // STRING, BINARY, LIST, SET, MAP (category 1) + all = append(all, category2...) // all category 2 fields sorted by name + + outSer := make([]Serializer, len(all)) + outNam := make([]string, len(all)) + for i, t := range all { + outSer[i] = t.serializer + outNam[i] = t.name + } + return outSer, outNam +} + +func typesCompatible(actual, expected reflect.Type) bool { + if actual == nil || expected == nil { + return false + } + if actual == expected { + return true + } + // interface{} can accept any value + if actual.Kind() == reflect.Interface && actual.NumMethod() == 0 { + return true + } + if actual.AssignableTo(expected) || expected.AssignableTo(actual) { + return true + } + if actual.Kind() == reflect.Ptr && actual.Elem() == expected { + return true + } + if expected.Kind() == reflect.Ptr && expected.Elem() == actual { + return true + } + if actual.Kind() == expected.Kind() { + switch actual.Kind() { + case reflect.Slice, reflect.Array: + return elementTypesCompatible(actual.Elem(), expected.Elem()) + case reflect.Map: + return elementTypesCompatible(actual.Key(), expected.Key()) && elementTypesCompatible(actual.Elem(), expected.Elem()) + } + } + if (actual.Kind() == reflect.Array && expected.Kind() == reflect.Slice) || + (actual.Kind() == reflect.Slice && expected.Kind() == reflect.Array) { + return true + } + return false +} + +func elementTypesCompatible(actual, expected reflect.Type) bool { + if actual == nil || expected == nil { + return false + } + if actual == expected || actual.AssignableTo(expected) || expected.AssignableTo(actual) { + return true + } + if actual.Kind() == reflect.Ptr { + return elementTypesCompatible(actual, expected.Elem()) + } + return false +} + +// typeIdFromKind derives a TypeId from a reflect.Type's kind +// This is used when the type is not registered in typesInfo +// Note: Uses VARINT32/VARINT64/VAR_UINT32/VAR_UINT64 to match Java xlang mode and Rust +func typeIdFromKind(type_ reflect.Type) TypeId { + switch type_.Kind() { + case reflect.Bool: + return BOOL + case reflect.Int8: + return INT8 + case reflect.Int16: + return INT16 + case reflect.Int32: + return VARINT32 + case reflect.Int64, reflect.Int: + return VARINT64 + case reflect.Uint8: + return UINT8 + case reflect.Uint16: + return UINT16 + case reflect.Uint32: + return VAR_UINT32 + case reflect.Uint64, reflect.Uint: + return VAR_UINT64 + case reflect.Float32: + return FLOAT32 + case reflect.Float64: + return FLOAT64 + case reflect.String: + return STRING + case reflect.Slice: + // For slices, return the appropriate primitive array type ID based on element type + elemKind := type_.Elem().Kind() + switch elemKind { + case reflect.Bool: + return BOOL_ARRAY + case reflect.Int8: + return INT8_ARRAY + case reflect.Int16: + return INT16_ARRAY + case reflect.Int32: + return INT32_ARRAY + case reflect.Int64, reflect.Int: + return INT64_ARRAY + case reflect.Float32: + return FLOAT32_ARRAY + case reflect.Float64: + return FLOAT64_ARRAY + default: + // Non-primitive slices use LIST + return LIST + } + case reflect.Array: + // For arrays, return the appropriate primitive array type ID based on element type + elemKind := type_.Elem().Kind() + switch elemKind { + case reflect.Bool: + return BOOL_ARRAY + case reflect.Int8: + return INT8_ARRAY + case reflect.Int16: + return INT16_ARRAY + case reflect.Int32: + return INT32_ARRAY + case reflect.Int64, reflect.Int: + return INT64_ARRAY + case reflect.Float32: + return FLOAT32_ARRAY + case reflect.Float64: + return FLOAT64_ARRAY + default: + // Non-primitive arrays use LIST + return LIST + } + case reflect.Map: + // map[T]bool is used to represent a Set in Go + if type_.Elem().Kind() == reflect.Bool { + return SET + } + return MAP + case reflect.Struct: + return NAMED_STRUCT + case reflect.Ptr: + // For pointer types, get the type ID of the element type + return typeIdFromKind(type_.Elem()) + default: + return UNKNOWN + } +} diff --git a/go/fory/map_primitive.go b/go/fory/map_primitive.go index 926744b8a7..cb7c2f9d8e 100644 --- a/go/fory/map_primitive.go +++ b/go/fory/map_primitive.go @@ -154,8 +154,8 @@ func writeMapStringInt64(buf *ByteBuffer, m map[string]int64, hasGenerics bool) } else { buf.WriteUint8(0) buf.WriteUint8(uint8(chunkSize)) - buf.WriteVaruint32Small7(uint32(STRING)) // key type - buf.WriteVaruint32Small7(uint32(VAR64)) // value type + buf.WriteVaruint32Small7(uint32(STRING)) // key type + buf.WriteVaruint32Small7(uint32(VARINT64)) // value type } count := 0 @@ -228,8 +228,8 @@ func writeMapStringInt32(buf *ByteBuffer, m map[string]int32, hasGenerics bool) } else { buf.WriteUint8(0) buf.WriteUint8(uint8(chunkSize)) - buf.WriteVaruint32Small7(uint32(STRING)) // key type - buf.WriteVaruint32Small7(uint32(VAR32)) // value type + buf.WriteVaruint32Small7(uint32(STRING)) // key type + buf.WriteVaruint32Small7(uint32(VARINT32)) // value type } count := 0 @@ -302,8 +302,8 @@ func writeMapStringInt(buf *ByteBuffer, m map[string]int, hasGenerics bool) { } else { buf.WriteUint8(0) buf.WriteUint8(uint8(chunkSize)) - buf.WriteVaruint32Small7(uint32(STRING)) // key type - buf.WriteVaruint32Small7(uint32(VAR64)) // value type (int serialized as varint64) + buf.WriteVaruint32Small7(uint32(STRING)) // key type + buf.WriteVaruint32Small7(uint32(VARINT64)) // value type (int serialized as varint64) } count := 0 @@ -376,7 +376,7 @@ func writeMapStringFloat64(buf *ByteBuffer, m map[string]float64, hasGenerics bo } else { buf.WriteUint8(0) buf.WriteUint8(uint8(chunkSize)) - buf.WriteVaruint32Small7(uint32(STRING)) // key type + buf.WriteVaruint32Small7(uint32(STRING)) // key type buf.WriteVaruint32Small7(uint32(FLOAT64)) // value type } @@ -529,8 +529,8 @@ func writeMapInt32Int32(buf *ByteBuffer, m map[int32]int32, hasGenerics bool) { } else { buf.WriteUint8(0) buf.WriteUint8(uint8(chunkSize)) - buf.WriteVaruint32Small7(uint32(VAR32)) // key type - buf.WriteVaruint32Small7(uint32(VAR32)) // value type + buf.WriteVaruint32Small7(uint32(VARINT32)) // key type + buf.WriteVaruint32Small7(uint32(VARINT32)) // value type } count := 0 @@ -603,8 +603,8 @@ func writeMapInt64Int64(buf *ByteBuffer, m map[int64]int64, hasGenerics bool) { } else { buf.WriteUint8(0) buf.WriteUint8(uint8(chunkSize)) - buf.WriteVaruint32Small7(uint32(VAR64)) // key type - buf.WriteVaruint32Small7(uint32(VAR64)) // value type + buf.WriteVaruint32Small7(uint32(VARINT64)) // key type + buf.WriteVaruint32Small7(uint32(VARINT64)) // value type } count := 0 @@ -677,8 +677,8 @@ func writeMapIntInt(buf *ByteBuffer, m map[int]int, hasGenerics bool) { } else { buf.WriteUint8(0) buf.WriteUint8(uint8(chunkSize)) - buf.WriteVaruint32Small7(uint32(VAR64)) // key type (int serialized as varint64) - buf.WriteVaruint32Small7(uint32(VAR64)) // value type + buf.WriteVaruint32Small7(uint32(VARINT64)) // key type (int serialized as varint64) + buf.WriteVaruint32Small7(uint32(VARINT64)) // value type } count := 0 diff --git a/go/fory/primitive.go b/go/fory/primitive.go index 534d6924a5..b0bf967677 100644 --- a/go/fory/primitive.go +++ b/go/fory/primitive.go @@ -164,6 +164,144 @@ func (s byteSerializer) ReadWithTypeInfo(ctx *ReadContext, refMode RefMode, type s.Read(ctx, refMode, false, false, value) } +// uint16Serializer handles uint16 type +type uint16Serializer struct{} + +var globalUint16Serializer = uint16Serializer{} + +func (s uint16Serializer) WriteData(ctx *WriteContext, value reflect.Value) { + ctx.buffer.WriteUint16(uint16(value.Uint())) +} + +func (s uint16Serializer) Write(ctx *WriteContext, refMode RefMode, writeType bool, hasGenerics bool, value reflect.Value) { + _ = hasGenerics + if refMode != RefModeNone { + ctx.buffer.WriteInt8(NotNullValueFlag) + } + if writeType { + ctx.buffer.WriteVaruint32Small7(uint32(UINT16)) + } + s.WriteData(ctx, value) +} + +func (s uint16Serializer) ReadData(ctx *ReadContext, type_ reflect.Type, value reflect.Value) { + err := ctx.Err() + value.SetUint(uint64(ctx.buffer.ReadUint16(err))) +} + +func (s uint16Serializer) Read(ctx *ReadContext, refMode RefMode, readType bool, hasGenerics bool, value reflect.Value) { + _ = hasGenerics + err := ctx.Err() + if refMode != RefModeNone { + if ctx.buffer.ReadInt8(err) == NullFlag { + return + } + } + if readType { + _ = ctx.buffer.ReadVaruint32Small7(err) + } + if ctx.HasError() { + return + } + s.ReadData(ctx, value.Type(), value) +} + +func (s uint16Serializer) ReadWithTypeInfo(ctx *ReadContext, refMode RefMode, typeInfo *TypeInfo, value reflect.Value) { + s.Read(ctx, refMode, false, false, value) +} + +// uint32Serializer handles uint32 type with variable-length encoding (VAR_UINT32) +type uint32Serializer struct{} + +var globalUint32Serializer = uint32Serializer{} + +func (s uint32Serializer) WriteData(ctx *WriteContext, value reflect.Value) { + ctx.buffer.WriteVaruint32(uint32(value.Uint())) +} + +func (s uint32Serializer) Write(ctx *WriteContext, refMode RefMode, writeType bool, hasGenerics bool, value reflect.Value) { + _ = hasGenerics + if refMode != RefModeNone { + ctx.buffer.WriteInt8(NotNullValueFlag) + } + if writeType { + ctx.buffer.WriteVaruint32Small7(uint32(VAR_UINT32)) + } + s.WriteData(ctx, value) +} + +func (s uint32Serializer) ReadData(ctx *ReadContext, type_ reflect.Type, value reflect.Value) { + err := ctx.Err() + value.SetUint(uint64(ctx.buffer.ReadVaruint32(err))) +} + +func (s uint32Serializer) Read(ctx *ReadContext, refMode RefMode, readType bool, hasGenerics bool, value reflect.Value) { + _ = hasGenerics + err := ctx.Err() + if refMode != RefModeNone { + if ctx.buffer.ReadInt8(err) == NullFlag { + return + } + } + if readType { + _ = ctx.buffer.ReadVaruint32Small7(err) + } + if ctx.HasError() { + return + } + s.ReadData(ctx, value.Type(), value) +} + +func (s uint32Serializer) ReadWithTypeInfo(ctx *ReadContext, refMode RefMode, typeInfo *TypeInfo, value reflect.Value) { + s.Read(ctx, refMode, false, false, value) +} + +// uint64Serializer handles uint64 type with variable-length encoding (VAR_UINT64) +type uint64Serializer struct{} + +var globalUint64Serializer = uint64Serializer{} + +func (s uint64Serializer) WriteData(ctx *WriteContext, value reflect.Value) { + ctx.buffer.WriteVaruint64(value.Uint()) +} + +func (s uint64Serializer) Write(ctx *WriteContext, refMode RefMode, writeType bool, hasGenerics bool, value reflect.Value) { + _ = hasGenerics + if refMode != RefModeNone { + ctx.buffer.WriteInt8(NotNullValueFlag) + } + if writeType { + ctx.buffer.WriteVaruint32Small7(uint32(VAR_UINT64)) + } + s.WriteData(ctx, value) +} + +func (s uint64Serializer) ReadData(ctx *ReadContext, type_ reflect.Type, value reflect.Value) { + err := ctx.Err() + value.SetUint(ctx.buffer.ReadVaruint64(err)) +} + +func (s uint64Serializer) Read(ctx *ReadContext, refMode RefMode, readType bool, hasGenerics bool, value reflect.Value) { + _ = hasGenerics + err := ctx.Err() + if refMode != RefModeNone { + if ctx.buffer.ReadInt8(err) == NullFlag { + return + } + } + if readType { + _ = ctx.buffer.ReadVaruint32Small7(err) + } + if ctx.HasError() { + return + } + s.ReadData(ctx, value.Type(), value) +} + +func (s uint64Serializer) ReadWithTypeInfo(ctx *ReadContext, refMode RefMode, typeInfo *TypeInfo, value reflect.Value) { + s.Read(ctx, refMode, false, false, value) +} + // int16Serializer handles int16 type type int16Serializer struct{} diff --git a/go/fory/reader.go b/go/fory/reader.go index 2248947510..67f680fc05 100644 --- a/go/fory/reader.go +++ b/go/fory/reader.go @@ -180,31 +180,31 @@ func (c *ReadContext) ReadTypeId() TypeId { return TypeId(c.buffer.ReadVaruint32Small7(c.Err())) } -// readFast reads a value using fast path based on StaticTypeId -func (c *ReadContext) readFast(ptr unsafe.Pointer, ct StaticTypeId) { +// readFast reads a value using fast path based on DispatchId +func (c *ReadContext) readFast(ptr unsafe.Pointer, ct DispatchId) { err := c.Err() switch ct { - case ConcreteTypeBool: + case PrimitiveBoolDispatchId: *(*bool)(ptr) = c.buffer.ReadBool(err) - case ConcreteTypeInt8: + case PrimitiveInt8DispatchId: *(*int8)(ptr) = int8(c.buffer.ReadByte(err)) - case ConcreteTypeInt16: + case PrimitiveInt16DispatchId: *(*int16)(ptr) = c.buffer.ReadInt16(err) - case ConcreteTypeInt32: + case PrimitiveInt32DispatchId: *(*int32)(ptr) = c.buffer.ReadVarint32(err) - case ConcreteTypeInt: + case PrimitiveIntDispatchId: if strconv.IntSize == 64 { *(*int)(ptr) = int(c.buffer.ReadVarint64(err)) } else { *(*int)(ptr) = int(c.buffer.ReadVarint32(err)) } - case ConcreteTypeInt64: + case PrimitiveInt64DispatchId: *(*int64)(ptr) = c.buffer.ReadVarint64(err) - case ConcreteTypeFloat32: + case PrimitiveFloat32DispatchId: *(*float32)(ptr) = c.buffer.ReadFloat32(err) - case ConcreteTypeFloat64: + case PrimitiveFloat64DispatchId: *(*float64)(ptr) = c.buffer.ReadFloat64(err) - case ConcreteTypeString: + case StringDispatchId: *(*string)(ptr) = readString(c.buffer, err) } } diff --git a/go/fory/skip.go b/go/fory/skip.go index 91dd2b7ccf..9aa1a14d59 100644 --- a/go/fory/skip.go +++ b/go/fory/skip.go @@ -582,9 +582,9 @@ func skipValue(ctx *ReadContext, fieldDef FieldDef, readRefFlag bool, isField bo _ = ctx.buffer.ReadInt16(err) case INT32: _ = ctx.buffer.ReadVaruint32Small7(err) - case VAR32: + case VARINT32: _ = ctx.buffer.ReadVaruint32Small7(err) - case INT64, VAR64, H64: + case INT64, VARINT64, TAGGED_INT64: _ = ctx.buffer.ReadVarint64(err) // Floating point types @@ -650,11 +650,22 @@ func skipValue(ctx *ReadContext, fieldDef FieldDef, readRefFlag bool, isField bo case UINT8: _ = ctx.buffer.ReadByte(err) case UINT16: - _ = ctx.buffer.ReadInt16(err) // No ReadUint16, but same binary representation + _ = ctx.buffer.ReadUint16(err) case UINT32: + _ = ctx.buffer.ReadUint32(err) + case VAR_UINT32: _ = ctx.buffer.ReadVaruint32(err) case UINT64: + _ = ctx.buffer.ReadUint64(err) + case VAR_UINT64: _ = ctx.buffer.ReadVaruint64(err) + case TAGGED_UINT64: + firstInt32 := ctx.buffer.ReadInt32(err) + if (firstInt32 & 1) != 0 { + // 9-byte encoding + _ = ctx.buffer.ReadUint64(err) + } + // Otherwise it's 4-byte encoding, already read // Unknown (polymorphic) type - read type info and skip dynamically case UNKNOWN: diff --git a/go/fory/struct.go b/go/fory/struct.go index 6fcf126e79..b8f3c77658 100644 --- a/go/fory/struct.go +++ b/go/fory/struct.go @@ -24,7 +24,6 @@ import ( "math" "reflect" "sort" - "strings" "unicode" "unicode/utf8" "unsafe" @@ -32,160 +31,28 @@ import ( "github.com/spaolacci/murmur3" ) -// FieldInfo stores field metadata computed ENTIRELY at init time. -// All flags and decisions are pre-computed to eliminate runtime checks. -type FieldInfo struct { - Name string - Offset uintptr - Type reflect.Type - StaticId StaticTypeId - TypeId TypeId // Fory type ID for the serializer - Serializer Serializer - Referencable bool - FieldIndex int // -1 if field doesn't exist in current struct (for compatible mode) - FieldDef FieldDef // original FieldDef from remote TypeDef (for compatible mode skip) - - // Pre-computed sizes and offsets (for fixed primitives) - FixedSize int // 0 if not fixed-size, else 1/2/4/8 - WriteOffset int // Offset within fixed-fields buffer region (sum of preceding field sizes) - - // Pre-computed flags for serialization (computed at init time) - RefMode RefMode // ref mode for serializer.Write/Read - WriteType bool // whether to write type info (true for struct fields in compatible mode) - HasGenerics bool // whether element types are known from TypeDef (for container fields) - - // Tag-based configuration (from fory struct tags) - TagID int // -1 = use field name, >=0 = use tag ID - HasForyTag bool // Whether field has explicit fory tag - TagRefSet bool // Whether ref was explicitly set via fory tag - TagRef bool // The ref value from fory tag (only valid if TagRefSet is true) - TagNullableSet bool // Whether nullable was explicitly set via fory tag - TagNullable bool // The nullable value from fory tag (only valid if TagNullableSet is true) -} - -// fieldHasNonPrimitiveSerializer returns true if the field has a serializer with a non-primitive type ID. -// This is used to skip the fast path for fields like enums where StaticId is int32 but the serializer -// writes a different format (e.g., unsigned varint for enum ordinals vs signed zigzag for int32). -func fieldHasNonPrimitiveSerializer(field *FieldInfo) bool { - if field.Serializer == nil { - return false - } - // ENUM (numeric ID), NAMED_ENUM (namespace/typename), NAMED_STRUCT, NAMED_COMPATIBLE_STRUCT, NAMED_EXT - // all require special serialization and should not use the primitive fast path - // Note: ENUM uses unsigned Varuint32Small7 for ordinals, not signed zigzag varint - // Use internal type ID (low 8 bits) since registered types have composite TypeIds like (userID << 8) | internalID - internalTypeId := TypeId(field.TypeId & 0xFF) - switch internalTypeId { - case ENUM, NAMED_ENUM, NAMED_STRUCT, NAMED_COMPATIBLE_STRUCT, NAMED_EXT: - return true - default: - return false - } -} - -// isEnumField checks if a field is an enum type based on its TypeId -func isEnumField(field *FieldInfo) bool { - if field.Serializer == nil { - return false - } - internalTypeId := field.TypeId & 0xFF - return internalTypeId == ENUM || internalTypeId == NAMED_ENUM -} - -// writeEnumField writes an enum field respecting the field's RefMode. -// Java writes enum ordinals as unsigned Varuint32Small7, not signed zigzag. -// RefMode determines whether null flag is written, regardless of whether the local type is a pointer. -// This is important for compatible mode where remote TypeDef's nullable flag controls the wire format. -func writeEnumField(ctx *WriteContext, field *FieldInfo, fieldValue reflect.Value) { - buf := ctx.Buffer() - isPointer := fieldValue.Kind() == reflect.Ptr - - // Write null flag based on RefMode only (not based on whether local type is pointer) - if field.RefMode != RefModeNone { - if isPointer && fieldValue.IsNil() { - buf.WriteInt8(NullFlag) - return - } - buf.WriteInt8(NotNullValueFlag) - } - - // Get the actual value to serialize - targetValue := fieldValue - if isPointer { - if fieldValue.IsNil() { - // RefModeNone but nil pointer - this is a protocol error in schema-consistent mode - // Write zero value as fallback - targetValue = reflect.Zero(field.Type.Elem()) - } else { - targetValue = fieldValue.Elem() - } - } - - // For pointer enum fields, the serializer is ptrToValueSerializer wrapping enumSerializer. - // We need to call the inner enumSerializer directly with the dereferenced value. - if ptrSer, ok := field.Serializer.(*ptrToValueSerializer); ok { - ptrSer.valueSerializer.WriteData(ctx, targetValue) - } else { - field.Serializer.WriteData(ctx, targetValue) - } -} - -// readEnumField reads an enum field respecting the field's RefMode. -// RefMode determines whether null flag is read, regardless of whether the local type is a pointer. -// This is important for compatible mode where remote TypeDef's nullable flag controls the wire format. -// Uses context error state for deferred error checking. -func readEnumField(ctx *ReadContext, field *FieldInfo, fieldValue reflect.Value) { - buf := ctx.Buffer() - isPointer := fieldValue.Kind() == reflect.Ptr - - // Read null flag based on RefMode only (not based on whether local type is pointer) - if field.RefMode != RefModeNone { - nullFlag := buf.ReadInt8(ctx.Err()) - if nullFlag == NullFlag { - // For pointer enum fields, leave as nil; for non-pointer, set to zero - if !isPointer { - fieldValue.SetInt(0) - } - return - } - } - - // For pointer enum fields, allocate a new value - targetValue := fieldValue - if isPointer { - newVal := reflect.New(field.Type.Elem()) - fieldValue.Set(newVal) - targetValue = newVal.Elem() - } - - // For pointer enum fields, the serializer is ptrToValueSerializer wrapping enumSerializer. - // We need to call the inner enumSerializer directly with the dereferenced value. - if ptrSer, ok := field.Serializer.(*ptrToValueSerializer); ok { - ptrSer.valueSerializer.ReadData(ctx, field.Type.Elem(), targetValue) - } else { - field.Serializer.ReadData(ctx, field.Type, targetValue) +// GetStructHash returns the struct hash for a given type using the provided TypeResolver. +// This is used by codegen serializers to get the hash at runtime. +func GetStructHash(type_ reflect.Type, resolver *TypeResolver) int32 { + ser := newStructSerializer(type_, "") + if err := ser.initialize(resolver); err != nil { + panic(fmt.Errorf("failed to initialize struct serializer for hash computation: %v", err)) } + return ser.structHash } type structSerializer struct { // Identity - typeTag string + name string type_ reflect.Type structHash int32 - // Pre-sorted field lists by category (computed at init) - fixedFields []*FieldInfo // fixed-size primitives (bool, int8, int16, float32, float64) - varintFields []*FieldInfo // varint primitives (int32, int64, int) - remainingFields []*FieldInfo // all other fields (string, slice, map, struct, etc.) + // Pre-sorted and categorized fields (embedded for cache locality) + fieldGroup FieldGroup - // All fields in protocol order (for compatible mode) - fields []*FieldInfo // all fields in sorted order - fieldMap map[string]*FieldInfo // for compatible reading - fieldDefs []FieldDef // for type_def compatibility - - // Pre-computed buffer sizes - fixedSize int // Total bytes for fixed-size primitives - maxVarintSize int // Max bytes for varints (5 per int32, 10 per int64) + // Original field list for hash computation and compatible mode + fields []FieldInfo // all fields in sorted order (before grouping) + fieldDefs []FieldDef // for type_def compatibility // Mode flags (set at init) isCompatibleMode bool // true when compatible=true @@ -195,422 +62,851 @@ type structSerializer struct { initialized bool } -// newStructSerializer creates a new structSerializer with the given parameters. -// typeTag can be empty and will be derived from type_.Name() if not provided. -// fieldDefs can be nil for local structs without remote schema. -func newStructSerializer(type_ reflect.Type, typeTag string, fieldDefs []FieldDef) *structSerializer { - if typeTag == "" && type_ != nil { - typeTag = type_.Name() +// newStructSerializerFromTypeDef creates a new structSerializer with the given parameters. +// name can be empty and will be derived from type_.Name() if not provided. +// fieldDefs is from remote schema. +func newStructSerializerFromTypeDef(type_ reflect.Type, name string, fieldDefs []FieldDef) *structSerializer { + if name == "" && type_ != nil { + name = type_.Name() } return &structSerializer{ type_: type_, - typeTag: typeTag, + name: name, fieldDefs: fieldDefs, } } +// newStructSerializer creates a new structSerializer with the given parameters. +// name can be empty and will be derived from type_.Name() if not provided. +// fieldDefs can be nil for local structs without remote schema. +func newStructSerializer(type_ reflect.Type, name string) *structSerializer { + if name == "" && type_ != nil { + name = type_.Name() + } + return &structSerializer{ + type_: type_, + name: name, + } +} + // initialize performs eager initialization of the struct serializer. // This should be called at registration time to pre-compute all field metadata. func (s *structSerializer) initialize(typeResolver *TypeResolver) error { if s.initialized { return nil } - // Ensure type is set if s.type_ == nil { return errors.New("struct type not set") } - // Normalize pointer types for s.type_.Kind() == reflect.Ptr { s.type_ = s.type_.Elem() } - + // Set compatible mode flag BEFORE field initialization + // This is needed for groupFields to apply correct sorting + s.isCompatibleMode = typeResolver.Compatible() // Build fields from type or fieldDefs if s.fieldDefs != nil { - if err := s.initFieldsFromDefsWithResolver(typeResolver); err != nil { + if err := s.initFieldsFromTypeDef(typeResolver); err != nil { return err } } else { - if err := s.initFieldsFromTypeResolver(typeResolver); err != nil { + if err := s.initFields(typeResolver); err != nil { return err } } - // Compute struct hash s.structHash = s.computeHash() - - // Set compatible mode flag - s.isCompatibleMode = typeResolver.Compatible() - s.initialized = true return nil } -func (s *structSerializer) Write(ctx *WriteContext, refMode RefMode, writeType bool, hasGenerics bool, value reflect.Value) { - switch refMode { - case RefModeTracking: - if value.Kind() == reflect.Ptr && value.IsNil() { - ctx.buffer.WriteInt8(NullFlag) - return - } - refWritten, err := ctx.RefResolver().WriteRefOrNull(ctx.buffer, value) - if err != nil { - ctx.SetError(FromError(err)) - return - } - if refWritten { - return - } - case RefModeNullOnly: - if value.Kind() == reflect.Ptr && value.IsNil() { - ctx.buffer.WriteInt8(NullFlag) - return - } - ctx.buffer.WriteInt8(NotNullValueFlag) - } - if writeType { - // Structs have dynamic type IDs, need to look up from TypeResolver - typeInfo, err := ctx.TypeResolver().getTypeInfo(value, true) - if err != nil { - ctx.SetError(FromError(err)) - return - } - ctx.TypeResolver().WriteTypeInfo(ctx.buffer, typeInfo, ctx.Err()) +// initFields initializes fields from local struct type using TypeResolver +func (s *structSerializer) initFields(typeResolver *TypeResolver) error { + // If we have fieldDefs from type_def (remote meta), use them + if len(s.fieldDefs) > 0 { + return s.initFieldsFromTypeDef(typeResolver) } - s.WriteData(ctx, value) -} -func (s *structSerializer) WriteData(ctx *WriteContext, value reflect.Value) { - // Early error check - skip all intermediate checks for normal path performance - if ctx.HasError() { - return - } + // Otherwise initialize from local struct type + type_ := s.type_ + var fields []FieldInfo + var fieldNames []string + var serializers []Serializer + var typeIds []TypeId + var nullables []bool + var tagIDs []int - // Lazy initialization - if !s.initialized { - if err := s.initialize(ctx.TypeResolver()); err != nil { - ctx.SetError(FromError(err)) - return + for i := 0; i < type_.NumField(); i++ { + field := type_.Field(i) + firstRune, _ := utf8.DecodeRuneInString(field.Name) + if unicode.IsLower(firstRune) { + continue // skip unexported fields } - } - - buf := ctx.Buffer() - // Dereference pointer if needed - if value.Kind() == reflect.Ptr { - if value.IsNil() { - ctx.SetError(SerializationError("cannot write nil pointer")) - return + // Parse fory struct tag and check for ignore + foryTag := parseForyTag(field) + if foryTag.Ignore { + continue // skip ignored fields } - value = value.Elem() - } - // In compatible mode with meta share, struct hash is not written - if !ctx.Compatible() { - buf.WriteInt32(s.structHash) - } + fieldType := field.Type - // Check if value is addressable for unsafe access - canUseUnsafe := value.CanAddr() - var ptr unsafe.Pointer - if canUseUnsafe { - ptr = unsafe.Pointer(value.UnsafeAddr()) - } + var fieldSerializer Serializer + // For interface{} fields, don't get a serializer - use WriteValue/ReadValue instead + // which will handle polymorphic types dynamically + if fieldType.Kind() != reflect.Interface { + // Get serializer for all non-interface field types + fieldSerializer, _ = typeResolver.getSerializerByType(fieldType, true) + } - // ========================================================================== - // Phase 1: Fixed-size primitives (bool, int8, int16, float32, float64) - // - Reserve once, inline unsafe writes with endian handling, update index once - // - field.WriteOffset computed at init time - // ========================================================================== - if canUseUnsafe && s.fixedSize > 0 { - buf.Reserve(s.fixedSize) - baseOffset := buf.WriterIndex() - data := buf.GetData() + // Use TypeResolver helper methods for arrays and slices + if fieldType.Kind() == reflect.Array && fieldType.Elem().Kind() != reflect.Interface { + fieldSerializer, _ = typeResolver.GetArraySerializer(fieldType) + } else if fieldType.Kind() == reflect.Slice && fieldType.Elem().Kind() != reflect.Interface { + fieldSerializer, _ = typeResolver.GetSliceSerializer(fieldType) + } else if fieldType.Kind() == reflect.Slice && fieldType.Elem().Kind() == reflect.Interface { + // For struct fields with interface element types, use sliceDynSerializer + fieldSerializer = mustNewSliceDynSerializer(fieldType.Elem()) + } - for _, field := range s.fixedFields { - fieldPtr := unsafe.Add(ptr, field.Offset) - bufOffset := baseOffset + field.WriteOffset - switch field.StaticId { - case ConcreteTypeBool: - if *(*bool)(fieldPtr) { - data[bufOffset] = 1 - } else { - data[bufOffset] = 0 - } - case ConcreteTypeInt8: - data[bufOffset] = *(*byte)(fieldPtr) - case ConcreteTypeInt16: - if isLittleEndian { - *(*int16)(unsafe.Pointer(&data[bufOffset])) = *(*int16)(fieldPtr) + // Get TypeId for the serializer, fallback to deriving from kind + fieldTypeId := typeResolver.getTypeIdByType(fieldType) + if fieldTypeId == 0 { + fieldTypeId = typeIdFromKind(fieldType) + } + + // Override TypeId based on compress/encoding tags for integer types + // This matches the logic in type_def.go:buildFieldDefs + baseKind := fieldType.Kind() + if baseKind == reflect.Ptr { + baseKind = fieldType.Elem().Kind() + } + switch baseKind { + case reflect.Uint32: + if foryTag.CompressSet { + if foryTag.Compress { + fieldTypeId = VAR_UINT32 } else { - binary.LittleEndian.PutUint16(data[bufOffset:], uint16(*(*int16)(fieldPtr))) + fieldTypeId = UINT32 } - case ConcreteTypeFloat32: - if isLittleEndian { - *(*float32)(unsafe.Pointer(&data[bufOffset])) = *(*float32)(fieldPtr) + } + case reflect.Int32: + if foryTag.CompressSet { + if foryTag.Compress { + fieldTypeId = VARINT32 } else { - binary.LittleEndian.PutUint32(data[bufOffset:], math.Float32bits(*(*float32)(fieldPtr))) + fieldTypeId = INT32 } - case ConcreteTypeFloat64: - if isLittleEndian { - *(*float64)(unsafe.Pointer(&data[bufOffset])) = *(*float64)(fieldPtr) - } else { - binary.LittleEndian.PutUint64(data[bufOffset:], math.Float64bits(*(*float64)(fieldPtr))) + } + case reflect.Uint64: + if foryTag.EncodingSet { + switch foryTag.Encoding { + case "fixed": + fieldTypeId = UINT64 + case "varint": + fieldTypeId = VAR_UINT64 + case "tagged": + fieldTypeId = TAGGED_UINT64 } } - } - // Update writer index ONCE after all fixed fields - buf.SetWriterIndex(baseOffset + s.fixedSize) - } else if len(s.fixedFields) > 0 { - // Fallback to reflect-based access for unaddressable values - for _, field := range s.fixedFields { - fieldValue := value.Field(field.FieldIndex) - switch field.StaticId { - case ConcreteTypeBool: - buf.WriteBool(fieldValue.Bool()) - case ConcreteTypeInt8: - buf.WriteByte_(byte(fieldValue.Int())) - case ConcreteTypeInt16: - buf.WriteInt16(int16(fieldValue.Int())) - case ConcreteTypeFloat32: - buf.WriteFloat32(float32(fieldValue.Float())) - case ConcreteTypeFloat64: - buf.WriteFloat64(fieldValue.Float()) + case reflect.Int64: + if foryTag.EncodingSet { + switch foryTag.Encoding { + case "fixed": + fieldTypeId = INT64 + case "varint": + fieldTypeId = VARINT64 + case "tagged": + fieldTypeId = TAGGED_INT64 + } } } - } - // ========================================================================== - // Phase 2: Varint primitives (int32, int64, int) - // - Reserve max size, track offset locally, update index once at end - // ========================================================================== - if canUseUnsafe && s.maxVarintSize > 0 { - buf.Reserve(s.maxVarintSize) - offset := buf.WriterIndex() + // Calculate nullable flag for serialization (wire format): + // - In xlang mode: Per xlang spec, fields are NON-NULLABLE by default. + // Only pointer types are nullable by default. + // - In native mode: Go's natural semantics apply - slice/map/interface can be nil, + // so they are nullable by default. + // Can be overridden by explicit fory tag `fory:"nullable"`. + internalId := fieldTypeId & 0xFF + isEnum := internalId == ENUM || internalId == NAMED_ENUM - for _, field := range s.varintFields { - fieldPtr := unsafe.Add(ptr, field.Offset) - switch field.StaticId { - case ConcreteTypeInt32: - offset += buf.UnsafePutVarInt32(offset, *(*int32)(fieldPtr)) - case ConcreteTypeInt64: - offset += buf.UnsafePutVarInt64(offset, *(*int64)(fieldPtr)) - case ConcreteTypeInt: - offset += buf.UnsafePutVarInt64(offset, int64(*(*int)(fieldPtr))) - } - } - // Update writer index ONCE after all varint fields - buf.SetWriterIndex(offset) - } else if len(s.varintFields) > 0 { - // Fallback to reflect-based access for unaddressable values - for _, field := range s.varintFields { - fieldValue := value.Field(field.FieldIndex) - switch field.StaticId { - case ConcreteTypeInt32: - buf.WriteVarint32(int32(fieldValue.Int())) - case ConcreteTypeInt64, ConcreteTypeInt: - buf.WriteVarint64(fieldValue.Int()) + // Determine nullable based on mode + // In xlang mode: only pointer types are nullable by default (per xlang spec) + // In native mode: Go's natural semantics - all nil-able types are nullable + // This ensures proper interoperability with Java/other languages in xlang mode. + var nullableFlag bool + if typeResolver.fory.config.IsXlang { + // xlang mode: only pointer types are nullable by default per xlang spec + // Slices and maps are NOT nullable - they serialize as empty when nil + nullableFlag = fieldType.Kind() == reflect.Ptr + } else { + // Native mode: Go's natural semantics - all nil-able types are nullable + nullableFlag = fieldType.Kind() == reflect.Ptr || + fieldType.Kind() == reflect.Slice || + fieldType.Kind() == reflect.Map || + fieldType.Kind() == reflect.Interface + } + if foryTag.NullableSet { + // Override nullable flag if explicitly set in fory tag + nullableFlag = foryTag.Nullable + } + // Primitives are never nullable, regardless of tag + if isNonNullablePrimitiveKind(fieldType.Kind()) && !isEnum { + nullableFlag = false + } + + // Calculate ref tracking - use tag override if explicitly set + trackRef := typeResolver.TrackRef() + if foryTag.RefSet { + trackRef = foryTag.Ref + } + + // Pre-compute RefMode based on (possibly overridden) trackRef and nullable + // For pointer-to-struct fields, enable ref tracking when trackRef is enabled, + // regardless of nullable flag. This is necessary to detect circular references. + refMode := RefModeNone + isStructPointer := fieldType.Kind() == reflect.Ptr && fieldType.Elem().Kind() == reflect.Struct + if trackRef && (nullableFlag || isStructPointer) { + refMode = RefModeTracking + } else if nullableFlag { + refMode = RefModeNullOnly + } + // Pre-compute WriteType: true for struct fields in compatible mode + writeType := typeResolver.Compatible() && isStructField(fieldType) + + // Pre-compute DispatchId, with special handling for enum fields and pointer-to-numeric + var dispatchId DispatchId + if fieldType.Kind() == reflect.Ptr && isNumericKind(fieldType.Elem().Kind()) { + if nullableFlag { + dispatchId = getDispatchIdFromTypeId(fieldTypeId, true) + } else { + dispatchId = getNotnullPtrDispatchId(fieldType.Elem().Kind(), foryTag.Encoding) + } + } else { + dispatchId = getDispatchIdFromTypeId(fieldTypeId, nullableFlag) + if dispatchId == UnknownDispatchId { + dispatchId = GetDispatchId(fieldType) + } + } + if fieldSerializer != nil { + if _, ok := fieldSerializer.(*enumSerializer); ok { + dispatchId = EnumDispatchId + } else if ptrSer, ok := fieldSerializer.(*ptrToValueSerializer); ok { + if _, ok := ptrSer.valueSerializer.(*enumSerializer); ok { + dispatchId = EnumDispatchId + } } } + if DebugOutputEnabled() { + fmt.Printf("[Go][fory-debug] initFields: field=%s type=%v dispatchId=%d refMode=%v nullableFlag=%v serializer=%T\n", + SnakeCase(field.Name), fieldType, dispatchId, refMode, nullableFlag, fieldSerializer) + } + + fieldInfo := FieldInfo{ + Name: SnakeCase(field.Name), + Offset: field.Offset, + Type: fieldType, + DispatchId: dispatchId, + TypeId: fieldTypeId, + Serializer: fieldSerializer, + Nullable: nullableFlag, // Use same logic as TypeDef's nullable flag for consistent ref handling + FieldIndex: i, + RefMode: refMode, + WriteType: writeType, + HasGenerics: isCollectionType(fieldTypeId), // Container fields have declared element types + TagID: foryTag.ID, + HasForyTag: foryTag.HasTag, + TagRefSet: foryTag.RefSet, + TagRef: foryTag.Ref, + TagNullableSet: foryTag.NullableSet, + TagNullable: foryTag.Nullable, + IsPtr: fieldType.Kind() == reflect.Ptr, + } + fields = append(fields, fieldInfo) + fieldNames = append(fieldNames, fieldInfo.Name) + serializers = append(serializers, fieldSerializer) + typeIds = append(typeIds, fieldTypeId) + nullables = append(nullables, nullableFlag) + tagIDs = append(tagIDs, foryTag.ID) } - // ========================================================================== - // Phase 3: Remaining fields (strings, slices, maps, structs, enums) - // - These require per-field handling (ref flags, type info, serializers) - // - No intermediate error checks - trade error path performance for normal path - // ========================================================================== - for _, field := range s.remainingFields { - s.writeRemainingField(ctx, ptr, field, value) + // Sort fields according to specification using nullable info and tag IDs for consistent ordering + serializers, fieldNames = sortFields(typeResolver, fieldNames, serializers, typeIds, nullables, tagIDs) + order := make(map[string]int, len(fieldNames)) + for idx, name := range fieldNames { + order[name] = idx } -} -// writeRemainingField writes a non-primitive field (string, slice, map, struct, enum) -func (s *structSerializer) writeRemainingField(ctx *WriteContext, ptr unsafe.Pointer, field *FieldInfo, value reflect.Value) { - buf := ctx.Buffer() + sort.SliceStable(fields, func(i, j int) bool { + oi, okI := order[fields[i].Name] + oj, okJ := order[fields[j].Name] + switch { + case okI && okJ: + return oi < oj + case okI: + return true + case okJ: + return false + default: + return false + } + }) - // Fast path dispatch using pre-computed StaticId - // ptr must be valid (addressable value) - if ptr != nil { - fieldPtr := unsafe.Add(ptr, field.Offset) - switch field.StaticId { - case ConcreteTypeString: - if field.RefMode == RefModeTracking { - break // Fall through to slow path + s.fields = fields + s.fieldGroup = GroupFields(s.fields) + + // Debug output for field order comparison with Java + if s.type_ != nil { + s.fieldGroup.DebugPrint(s.type_.Name()) + } + + return nil +} + +// initFieldsFromTypeDef initializes fields from remote fieldDefs using typeResolver +func (s *structSerializer) initFieldsFromTypeDef(typeResolver *TypeResolver) error { + type_ := s.type_ + if type_ == nil { + // Type is not known - we'll create an interface{} placeholder + // This happens when deserializing unknown types in compatible mode + // For now, we'll create fields that discard all data + var fields []FieldInfo + for _, def := range s.fieldDefs { + fieldSerializer, _ := getFieldTypeSerializerWithResolver(typeResolver, def.fieldType) + remoteTypeInfo, _ := def.fieldType.getTypeInfoWithResolver(typeResolver) + remoteType := remoteTypeInfo.Type + if remoteType == nil { + remoteType = reflect.TypeOf((*interface{})(nil)).Elem() } - // Only write null flag if RefMode requires it (nullable field) - if field.RefMode == RefModeNullOnly { - buf.WriteInt8(NotNullValueFlag) + // Get TypeId from FieldType's TypeId method + fieldTypeId := def.fieldType.TypeId() + // Pre-compute RefMode based on trackRef and FieldDef flags + refMode := RefModeNone + if def.trackingRef { + refMode = RefModeTracking + } else if def.nullable { + refMode = RefModeNullOnly } - ctx.WriteString(*(*string)(fieldPtr)) - return - case ConcreteTypeEnum: - // Enums don't track refs - always use fast path - writeEnumField(ctx, field, value.Field(field.FieldIndex)) - return - case ConcreteTypeStringSlice: - if field.RefMode == RefModeTracking { - break + // Pre-compute WriteType: true for struct fields in compatible mode + writeType := typeResolver.Compatible() && isStructField(remoteType) + + // Pre-compute DispatchId, with special handling for enum fields + dispatchId := GetDispatchId(remoteType) + if fieldSerializer != nil { + if _, ok := fieldSerializer.(*enumSerializer); ok { + dispatchId = EnumDispatchId + } else if ptrSer, ok := fieldSerializer.(*ptrToValueSerializer); ok { + if _, ok := ptrSer.valueSerializer.(*enumSerializer); ok { + dispatchId = EnumDispatchId + } + } } - ctx.WriteStringSlice(*(*[]string)(fieldPtr), field.RefMode, false, true) - return - case ConcreteTypeBoolSlice: - if field.RefMode == RefModeTracking { - break + + fieldInfo := FieldInfo{ + Name: def.name, + Offset: 0, + Type: remoteType, + DispatchId: dispatchId, + TypeId: fieldTypeId, + Serializer: fieldSerializer, + Nullable: def.nullable, // Use remote nullable flag + FieldIndex: -1, // Mark as non-existent field to discard data + FieldDef: def, // Save original FieldDef for skipping + RefMode: refMode, + WriteType: writeType, + HasGenerics: isCollectionType(fieldTypeId), // Container fields have declared element types + IsPtr: remoteType != nil && remoteType.Kind() == reflect.Ptr, } - ctx.WriteBoolSlice(*(*[]bool)(fieldPtr), field.RefMode, false) - return - case ConcreteTypeInt8Slice: - if field.RefMode == RefModeTracking { - break + fields = append(fields, fieldInfo) + } + s.fields = fields + s.fieldGroup = GroupFields(s.fields) + s.typeDefDiffers = true // Unknown type, must use ordered reading + return nil + } + + // Build maps from field names and tag IDs to struct field indices + fieldNameToIndex := make(map[string]int) + fieldNameToOffset := make(map[string]uintptr) + fieldNameToType := make(map[string]reflect.Type) + fieldTagIDToIndex := make(map[int]int) // tag ID -> struct field index + fieldTagIDToOffset := make(map[int]uintptr) // tag ID -> field offset + fieldTagIDToType := make(map[int]reflect.Type) // tag ID -> field type + fieldTagIDToName := make(map[int]string) // tag ID -> snake_case field name + for i := 0; i < type_.NumField(); i++ { + field := type_.Field(i) + + // Parse fory tag and skip ignored fields + foryTag := parseForyTag(field) + if foryTag.Ignore { + continue + } + + name := SnakeCase(field.Name) + fieldNameToIndex[name] = i + fieldNameToOffset[name] = field.Offset + fieldNameToType[name] = field.Type + + // Also index by tag ID if present + if foryTag.ID >= 0 { + fieldTagIDToIndex[foryTag.ID] = i + fieldTagIDToOffset[foryTag.ID] = field.Offset + fieldTagIDToType[foryTag.ID] = field.Type + fieldTagIDToName[foryTag.ID] = name + } + } + + var fields []FieldInfo + + for _, def := range s.fieldDefs { + fieldSerializer, err := getFieldTypeSerializerWithResolver(typeResolver, def.fieldType) + if err != nil || fieldSerializer == nil { + // If we can't get serializer from typeID, try to get it from the Go type + // This can happen when the type isn't registered in typeIDToTypeInfo + remoteTypeInfo, _ := def.fieldType.getTypeInfoWithResolver(typeResolver) + if remoteTypeInfo.Type != nil { + fieldSerializer, _ = typeResolver.getSerializerByType(remoteTypeInfo.Type, true) } - ctx.WriteInt8Slice(*(*[]int8)(fieldPtr), field.RefMode, false) - return - case ConcreteTypeByteSlice: - if field.RefMode == RefModeTracking { - break + } + + // Get the remote type from fieldDef + remoteTypeInfo, _ := def.fieldType.getTypeInfoWithResolver(typeResolver) + remoteType := remoteTypeInfo.Type + // Track if type lookup failed - we'll need to skip such fields + // Note: DynamicFieldType.getTypeInfoWithResolver returns interface{} (not nil) when lookup fails + emptyInterfaceType := reflect.TypeOf((*interface{})(nil)).Elem() + typeLookupFailed := remoteType == nil || remoteType == emptyInterfaceType + if remoteType == nil { + remoteType = emptyInterfaceType + } + + // For struct-like fields, even if TypeDef lookup fails, we can try to read + // the field because type resolution happens at read time from the buffer. + // The type name might map to a different local type. + isStructLikeField := isStructFieldType(def.fieldType) + + // Try to find corresponding local field + // First try to match by tag ID (if remote def uses tag ID) + // Then fall back to matching by field name + fieldIndex := -1 + var offset uintptr + var fieldType reflect.Type + var localFieldName string + var localType reflect.Type + var exists bool + + if def.tagID >= 0 { + // Try to match by tag ID + if idx, ok := fieldTagIDToIndex[def.tagID]; ok { + exists = true + fieldIndex = idx // Will be overwritten if types are compatible + localType = fieldTagIDToType[def.tagID] + offset = fieldTagIDToOffset[def.tagID] + localFieldName = fieldTagIDToName[def.tagID] } - ctx.WriteByteSlice(*(*[]byte)(fieldPtr), field.RefMode, false) - return - case ConcreteTypeInt16Slice: - if field.RefMode == RefModeTracking { - break - } - ctx.WriteInt16Slice(*(*[]int16)(fieldPtr), field.RefMode, false) - return - case ConcreteTypeInt32Slice: - if field.RefMode == RefModeTracking { - break - } - ctx.WriteInt32Slice(*(*[]int32)(fieldPtr), field.RefMode, false) - return - case ConcreteTypeInt64Slice: - if field.RefMode == RefModeTracking { - break + } + + // Fall back to name-based matching if tag ID match failed + if !exists && def.name != "" { + if _, ok := fieldNameToIndex[def.name]; ok { + exists = true + localType = fieldNameToType[def.name] + offset = fieldNameToOffset[def.name] + localFieldName = def.name } - ctx.WriteInt64Slice(*(*[]int64)(fieldPtr), field.RefMode, false) - return - case ConcreteTypeIntSlice: - if field.RefMode == RefModeTracking { - break + } + + if exists { + idx := fieldNameToIndex[localFieldName] + if def.tagID >= 0 { + idx = fieldTagIDToIndex[def.tagID] } - ctx.WriteIntSlice(*(*[]int)(fieldPtr), field.RefMode, false) - return - case ConcreteTypeUintSlice: - if field.RefMode == RefModeTracking { - break + // Check if types are compatible + // For primitive types: skip if types don't match + // For struct-like types: allow read even if TypeDef lookup failed, + // because runtime type resolution by name might work + shouldRead := false + isPolymorphicField := def.fieldType.TypeId() == UNKNOWN + defTypeId := def.fieldType.TypeId() + // Check if field is an enum - either by type ID or by serializer type + // The type ID may be a composite value with namespace bits, so check the low 8 bits + internalDefTypeId := defTypeId & 0xFF + isEnumField := internalDefTypeId == NAMED_ENUM || internalDefTypeId == ENUM + if !isEnumField && fieldSerializer != nil { + _, isEnumField = fieldSerializer.(*enumSerializer) } - ctx.WriteUintSlice(*(*[]uint)(fieldPtr), field.RefMode, false) - return - case ConcreteTypeFloat32Slice: - if field.RefMode == RefModeTracking { - break + if isPolymorphicField && localType.Kind() == reflect.Interface { + // For polymorphic (UNKNOWN) fields with interface{} local type, + // allow reading - the actual type will be determined at runtime + shouldRead = true + fieldType = localType + } else if typeLookupFailed && isEnumField { + // For enum fields with failed TypeDef lookup (NAMED_ENUM stores by namespace/typename, not typeId), + // check if local field is a numeric type (Go enums are int-based) + // Also handle pointer enum fields (*EnumType) + localKind := localType.Kind() + elemKind := localKind + if localKind == reflect.Ptr { + elemKind = localType.Elem().Kind() + } + if isNumericKind(elemKind) { + shouldRead = true + fieldType = localType + // Get the serializer for the base type (the enum type, not the pointer) + baseType := localType + if localKind == reflect.Ptr { + baseType = localType.Elem() + } + fieldSerializer, _ = typeResolver.getSerializerByType(baseType, true) + } + } else if typeLookupFailed && isStructLikeField { + // For struct fields with failed TypeDef lookup, check if local field can hold a struct + localKind := localType.Kind() + if localKind == reflect.Ptr { + localKind = localType.Elem().Kind() + } + if localKind == reflect.Struct || localKind == reflect.Interface { + shouldRead = true + fieldType = localType // Use local type for struct fields + } + } else if typeLookupFailed && (defTypeId == LIST || defTypeId == SET) { + // For collection fields with failed type lookup (e.g., List with interface element type), + // check if local type is a slice with interface element type (e.g., []Animal) + // The type lookup fails because sliceSerializer doesn't support interface elements + if localType.Kind() == reflect.Slice && localType.Elem().Kind() == reflect.Interface { + shouldRead = true + fieldType = localType + } + } else if !typeLookupFailed && typesCompatible(localType, remoteType) { + shouldRead = true + fieldType = localType } - ctx.WriteFloat32Slice(*(*[]float32)(fieldPtr), field.RefMode, false) - return - case ConcreteTypeFloat64Slice: - if field.RefMode == RefModeTracking { - break + + if shouldRead { + fieldIndex = idx + // offset was already set above when matching by tag ID or field name + // For struct-like fields with failed type lookup, get the serializer for the local type + if typeLookupFailed && isStructLikeField && fieldSerializer == nil { + fieldSerializer, _ = typeResolver.getSerializerByType(localType, true) + } + // For collection fields with interface element types, use sliceDynSerializer + if typeLookupFailed && (defTypeId == LIST || defTypeId == SET) && fieldSerializer == nil { + if localType.Kind() == reflect.Slice && localType.Elem().Kind() == reflect.Interface { + fieldSerializer = mustNewSliceDynSerializer(localType.Elem()) + } + } + // If local type is *T and remote type is T, we need the serializer for *T + // This handles Java's Integer/Long (nullable boxed types) mapping to Go's *int32/*int64 + if localType.Kind() == reflect.Ptr && localType.Elem() == remoteType { + fieldSerializer, _ = typeResolver.getSerializerByType(localType, true) + } + // For pointer enum fields (*EnumType), get the serializer for the base enum type + // The struct read/write code will handle pointer dereferencing + if isEnumField && localType.Kind() == reflect.Ptr { + baseType := localType.Elem() + fieldSerializer, _ = typeResolver.getSerializerByType(baseType, true) + if DebugOutputEnabled() { + fmt.Printf("[fory-debug] pointer enum field %s: localType=%v baseType=%v serializer=%T\n", + def.name, localType, baseType, fieldSerializer) + } + } + // For array fields, use array serializers (not slice serializers) even if typeID maps to slice serializer + // The typeID (INT16_ARRAY, etc.) is shared between arrays and slices, but we need the correct + // serializer based on the actual Go type + if localType.Kind() == reflect.Array { + elemType := localType.Elem() + switch elemType.Kind() { + case reflect.Bool: + fieldSerializer = boolArraySerializer{arrayType: localType} + case reflect.Int8: + fieldSerializer = int8ArraySerializer{arrayType: localType} + case reflect.Int16: + fieldSerializer = int16ArraySerializer{arrayType: localType} + case reflect.Int32: + fieldSerializer = int32ArraySerializer{arrayType: localType} + case reflect.Int64: + fieldSerializer = int64ArraySerializer{arrayType: localType} + case reflect.Uint8: + fieldSerializer = uint8ArraySerializer{arrayType: localType} + case reflect.Float32: + fieldSerializer = float32ArraySerializer{arrayType: localType} + case reflect.Float64: + fieldSerializer = float64ArraySerializer{arrayType: localType} + case reflect.Int: + if reflect.TypeOf(int(0)).Size() == 8 { + fieldSerializer = int64ArraySerializer{arrayType: localType} + } else { + fieldSerializer = int32ArraySerializer{arrayType: localType} + } + } + } + } else { + // Types are incompatible or unknown - use remote type but mark field as not settable + fieldType = remoteType + fieldIndex = -1 + offset = 0 // Don't set offset for incompatible fields } - ctx.WriteFloat64Slice(*(*[]float64)(fieldPtr), field.RefMode, false) - return - case ConcreteTypeStringStringMap: - if field.RefMode == RefModeTracking { - break + } else { + // Field doesn't exist locally, use type from fieldDef + fieldType = remoteType + } + + // Get TypeId from FieldType's TypeId method + fieldTypeId := def.fieldType.TypeId() + // Pre-compute RefMode based on FieldDef flags (trackingRef and nullable) + refMode := RefModeNone + if def.trackingRef { + refMode = RefModeTracking + } else if def.nullable { + refMode = RefModeNullOnly + } + // Pre-compute WriteType: true for struct fields in compatible mode + writeType := typeResolver.Compatible() && isStructField(fieldType) + + // Pre-compute DispatchId, with special handling for pointer-to-numeric and enum fields + // IMPORTANT: For compatible mode reading, we must use the REMOTE nullable flag + // to determine DispatchId, because Java wrote data with its nullable semantics. + var dispatchId DispatchId + localKind := fieldType.Kind() + localIsPtr := localKind == reflect.Ptr + localIsNumeric := isNumericKind(localKind) || (localIsPtr && isNumericKind(fieldType.Elem().Kind())) + + if localIsNumeric { + if localIsPtr { + if def.nullable { + // Local is *T, remote is nullable - use nullable DispatchId + dispatchId = getDispatchIdFromTypeId(fieldTypeId, true) + } else { + // Local is *T, remote is NOT nullable - use notnull pointer DispatchId + encoding := getEncodingFromTypeId(fieldTypeId) + dispatchId = getNotnullPtrDispatchId(fieldType.Elem().Kind(), encoding) + } + } else { + if def.nullable { + // Local is T (non-pointer), remote is nullable - use nullable DispatchId + dispatchId = getDispatchIdFromTypeId(fieldTypeId, true) + } else { + // Local is T, remote is NOT nullable - use primitive DispatchId + dispatchId = GetDispatchId(fieldType) + } } - ctx.WriteStringStringMap(*(*map[string]string)(fieldPtr), field.RefMode, false) - return - case ConcreteTypeStringInt64Map: - if field.RefMode == RefModeTracking { - break + } else { + dispatchId = GetDispatchId(fieldType) + } + if fieldSerializer != nil { + if _, ok := fieldSerializer.(*enumSerializer); ok { + dispatchId = EnumDispatchId + } else if ptrSer, ok := fieldSerializer.(*ptrToValueSerializer); ok { + if _, ok := ptrSer.valueSerializer.(*enumSerializer); ok { + dispatchId = EnumDispatchId + } } - ctx.WriteStringInt64Map(*(*map[string]int64)(fieldPtr), field.RefMode, false) - return - case ConcreteTypeStringInt32Map: - if field.RefMode == RefModeTracking { + } + + // Determine field name: use local field name if matched, otherwise use def.name + fieldName := def.name + if localFieldName != "" { + fieldName = localFieldName + } + + fieldInfo := FieldInfo{ + Name: fieldName, + Offset: offset, + Type: fieldType, + DispatchId: dispatchId, + TypeId: fieldTypeId, + Serializer: fieldSerializer, + Nullable: def.nullable, // Use remote nullable flag + FieldIndex: fieldIndex, + FieldDef: def, // Save original FieldDef for skipping + RefMode: refMode, + WriteType: writeType, + HasGenerics: isCollectionType(fieldTypeId), // Container fields have declared element types + TagID: def.tagID, + HasForyTag: def.tagID >= 0, + IsPtr: fieldType != nil && fieldType.Kind() == reflect.Ptr, + } + fields = append(fields, fieldInfo) + } + + s.fields = fields + s.fieldGroup = GroupFields(s.fields) + + // Debug output for field order comparison with Java MetaSharedSerializer + if DebugOutputEnabled() && s.type_ != nil { + fmt.Printf("[Go] Remote TypeDef order (%d fields):\n", len(s.fieldDefs)) + for i, def := range s.fieldDefs { + fmt.Printf("[Go] [%d] %s -> typeId=%d, nullable=%v\n", i, def.name, def.fieldType.TypeId(), def.nullable) + } + s.fieldGroup.DebugPrint(s.type_.Name()) + } + + // Compute typeDefDiffers: true if any field doesn't exist locally, has type mismatch, + // or has nullable mismatch (which affects field ordering) + // When typeDefDiffers is false, we can use grouped reading for better performance + s.typeDefDiffers = false + for i, field := range fields { + if field.FieldIndex < 0 { + // Field exists in remote TypeDef but not locally + s.typeDefDiffers = true + break + } + // Check if nullable flag differs between remote and local + // Remote nullable is stored in fieldDefs[i].nullable + // Local nullable is determined by whether the Go field is a pointer type + if i < len(s.fieldDefs) && field.FieldIndex >= 0 { + remoteNullable := s.fieldDefs[i].nullable + // Check if local Go field is a pointer type (can be nil = nullable) + localNullable := field.IsPtr + if remoteNullable != localNullable { + s.typeDefDiffers = true break } - ctx.WriteStringInt32Map(*(*map[string]int32)(fieldPtr), field.RefMode, false) - return - case ConcreteTypeStringIntMap: - if field.RefMode == RefModeTracking { - break + } + } + + if DebugOutputEnabled() && s.type_ != nil { + fmt.Printf("[Go] typeDefDiffers=%v for %s\n", s.typeDefDiffers, s.type_.Name()) + } + + return nil +} + +func (s *structSerializer) computeHash() int32 { + // Build FieldFingerprintInfo for each field + fields := make([]FieldFingerprintInfo, 0, len(s.fields)) + for _, field := range s.fields { + var typeId TypeId + isEnumField := false + if field.Serializer == nil { + typeId = UNKNOWN + } else { + typeId = field.TypeId + // Check if this is an enum serializer (directly or wrapped in ptrToValueSerializer) + if _, ok := field.Serializer.(*enumSerializer); ok { + isEnumField = true + typeId = UNKNOWN + } else if ptrSer, ok := field.Serializer.(*ptrToValueSerializer); ok { + if _, ok := ptrSer.valueSerializer.(*enumSerializer); ok { + isEnumField = true + typeId = UNKNOWN + } } - ctx.WriteStringIntMap(*(*map[string]int)(fieldPtr), field.RefMode, false) - return - case ConcreteTypeStringFloat64Map: - if field.RefMode == RefModeTracking { - break + // For user-defined types (struct, ext types), use UNKNOWN in fingerprint + // This matches Java's behavior where user-defined types return UNKNOWN + // to ensure consistent fingerprint computation across languages + if isUserDefinedType(int16(typeId)) { + typeId = UNKNOWN } - ctx.WriteStringFloat64Map(*(*map[string]float64)(fieldPtr), field.RefMode, false) - return - case ConcreteTypeStringBoolMap: - // NOTE: map[string]bool is used to represent SETs in Go xlang mode. - // We CANNOT use the fast path here because it writes MAP format, - // but the data should be written in SET format. Fall through to slow path - // which uses setSerializer to correctly write the SET format. - break - case ConcreteTypeIntIntMap: - if field.RefMode == RefModeTracking { - break + // For fixed-size arrays with primitive elements, use primitive array type IDs + if field.Type.Kind() == reflect.Array { + elemKind := field.Type.Elem().Kind() + switch elemKind { + case reflect.Int8: + typeId = INT8_ARRAY + case reflect.Int16: + typeId = INT16_ARRAY + case reflect.Int32: + typeId = INT32_ARRAY + case reflect.Int64: + typeId = INT64_ARRAY + case reflect.Float32: + typeId = FLOAT32_ARRAY + case reflect.Float64: + typeId = FLOAT64_ARRAY + default: + typeId = LIST + } + } else if field.Type.Kind() == reflect.Slice { + typeId = LIST + } else if field.Type.Kind() == reflect.Map { + // map[T]bool is used to represent a Set in Go + if field.Type.Elem().Kind() == reflect.Bool { + typeId = SET + } else { + typeId = MAP + } } - ctx.WriteIntIntMap(*(*map[int]int)(fieldPtr), field.RefMode, false) - return } + + // Determine nullable flag for xlang compatibility: + // - Default: false for ALL fields (xlang default - aligned with all languages) + // - Primitives are always non-nullable + // - Can be overridden by explicit fory tag + nullable := false // Default to nullable=false for xlang mode + if field.TagNullableSet { + // Use explicit tag value if set + nullable = field.TagNullable + } + // Primitives are never nullable, regardless of tag + if isNonNullablePrimitiveKind(field.Type.Kind()) && !isEnumField { + nullable = false + } + + fields = append(fields, FieldFingerprintInfo{ + FieldID: field.TagID, + FieldName: SnakeCase(field.Name), + TypeID: typeId, + // Ref is based on explicit tag annotation only, NOT runtime ref_tracking config + // This allows fingerprint to be computed at compile time for C++/Rust + Ref: field.TagRefSet && field.TagRef, + Nullable: nullable, + }) } - // Slow path: use full serializer - fieldValue := value.Field(field.FieldIndex) - if field.Serializer != nil { - field.Serializer.Write(ctx, field.RefMode, field.WriteType, field.HasGenerics, fieldValue) - } else { - ctx.WriteValue(fieldValue, RefModeTracking, true) + hashString := ComputeStructFingerprint(fields) + data := []byte(hashString) + h1, _ := murmur3.Sum128WithSeed(data, 47) + hash := int32(h1 & 0xFFFFFFFF) + + if DebugOutputEnabled() { + fmt.Printf("[Go][fory-debug] struct %v version fingerprint=\"%s\" version hash=%d\n", s.type_, hashString, hash) + } + + if hash == 0 { + panic(fmt.Errorf("hash for type %v is 0", s.type_)) } + return hash } -func (s *structSerializer) Read(ctx *ReadContext, refMode RefMode, readType bool, hasGenerics bool, value reflect.Value) { - buf := ctx.Buffer() - ctxErr := ctx.Err() +func (s *structSerializer) Write(ctx *WriteContext, refMode RefMode, writeType bool, hasGenerics bool, value reflect.Value) { switch refMode { case RefModeTracking: - refID, refErr := ctx.RefResolver().TryPreserveRefId(buf) - if refErr != nil { - ctx.SetError(FromError(refErr)) + if value.Kind() == reflect.Ptr && value.IsNil() { + ctx.buffer.WriteInt8(NullFlag) return } - if refID < int32(NotNullValueFlag) { - // Reference found - obj := ctx.RefResolver().GetReadObject(refID) - if obj.IsValid() { - value.Set(obj) - } + refWritten, err := ctx.RefResolver().WriteRefOrNull(ctx.buffer, value) + if err != nil { + ctx.SetError(FromError(err)) + return + } + if refWritten { return } case RefModeNullOnly: - flag := buf.ReadInt8(ctxErr) - if flag == NullFlag { + if value.Kind() == reflect.Ptr && value.IsNil() { + ctx.buffer.WriteInt8(NullFlag) return } + ctx.buffer.WriteInt8(NotNullValueFlag) } - if readType { - // Read type info - in compatible mode this returns the serializer with remote fieldDefs - typeID := buf.ReadVaruint32Small7(ctxErr) - internalTypeID := TypeId(typeID & 0xFF) - // Check if this is a struct type that needs type meta reading - if IsNamespacedType(TypeId(typeID)) || internalTypeID == COMPATIBLE_STRUCT || internalTypeID == STRUCT { - // For struct types in compatible mode, use the serializer from TypeInfo - typeInfo := ctx.TypeResolver().readTypeInfoWithTypeID(buf, typeID, ctxErr) - // Use the serializer from TypeInfo which has the remote field definitions - if structSer, ok := typeInfo.Serializer.(*structSerializer); ok && len(structSer.fieldDefs) > 0 { - structSer.ReadData(ctx, value.Type(), value) - return - } + if writeType { + // Structs have dynamic type IDs, need to look up from TypeResolver + typeInfo, err := ctx.TypeResolver().getTypeInfo(value, true) + if err != nil { + ctx.SetError(FromError(err)) + return } + ctx.TypeResolver().WriteTypeInfo(ctx.buffer, typeInfo, ctx.Err()) } - s.ReadData(ctx, value.Type(), value) + s.WriteData(ctx, value) } -func (s *structSerializer) ReadData(ctx *ReadContext, type_ reflect.Type, value reflect.Value) { +func (s *structSerializer) WriteData(ctx *WriteContext, value reflect.Value) { // Early error check - skip all intermediate checks for normal path performance if ctx.HasError() { return @@ -625,1510 +921,1846 @@ func (s *structSerializer) ReadData(ctx *ReadContext, type_ reflect.Type, value } buf := ctx.Buffer() + + // Dereference pointer if needed if value.Kind() == reflect.Ptr { if value.IsNil() { - value.Set(reflect.New(type_.Elem())) + ctx.SetError(SerializationError("cannot write nil pointer")) + return } value = value.Elem() - type_ = type_.Elem() } // In compatible mode with meta share, struct hash is not written if !ctx.Compatible() { - err := ctx.Err() - structHash := buf.ReadInt32(err) - if structHash != s.structHash { - ctx.SetError(HashMismatchError(structHash, s.structHash, s.type_.String())) - return - } - } - - // Use ordered reading only when TypeDef differs from local type (schema evolution) - // When types match (typeDefDiffers=false), use grouped reading for better performance - if s.typeDefDiffers { - s.readFieldsInOrder(ctx, value) - return + buf.WriteInt32(s.structHash) } // Check if value is addressable for unsafe access - if !value.CanAddr() { - s.readFieldsInOrder(ctx, value) - return + canUseUnsafe := value.CanAddr() + var ptr unsafe.Pointer + if canUseUnsafe { + ptr = unsafe.Pointer(value.UnsafeAddr()) } // ========================================================================== - // Grouped reading for matching types (optimized path) - // - Types match, so all fields exist locally (no FieldIndex < 0 checks) - // - Use UnsafeGet at pre-computed offsets, update reader index once per phase + // Phase 1: Fixed-size primitives (bool, int8, int16, float32, float64) + // - Reserve once, inline unsafe writes with endian handling, update index once + // - field.WriteOffset computed at init time // ========================================================================== - ptr := unsafe.Pointer(value.UnsafeAddr()) - - // Phase 1: Fixed-size primitives (inline unsafe reads with endian handling) - if s.fixedSize > 0 { - baseOffset := buf.ReaderIndex() + if canUseUnsafe && s.fieldGroup.FixedSize > 0 { + buf.Reserve(s.fieldGroup.FixedSize) + baseOffset := buf.WriterIndex() data := buf.GetData() - for _, field := range s.fixedFields { + for _, field := range s.fieldGroup.FixedFields { fieldPtr := unsafe.Add(ptr, field.Offset) bufOffset := baseOffset + field.WriteOffset - switch field.StaticId { - case ConcreteTypeBool: - *(*bool)(fieldPtr) = data[bufOffset] != 0 - case ConcreteTypeInt8: - *(*int8)(fieldPtr) = int8(data[bufOffset]) - case ConcreteTypeInt16: + switch field.DispatchId { + case PrimitiveBoolDispatchId: + if *(*bool)(fieldPtr) { + data[bufOffset] = 1 + } else { + data[bufOffset] = 0 + } + case NotnullBoolPtrDispatchId: + if **(**bool)(fieldPtr) { + data[bufOffset] = 1 + } else { + data[bufOffset] = 0 + } + case PrimitiveInt8DispatchId: + data[bufOffset] = *(*byte)(fieldPtr) + case NotnullInt8PtrDispatchId: + data[bufOffset] = byte(**(**int8)(fieldPtr)) + case PrimitiveUint8DispatchId: + data[bufOffset] = *(*uint8)(fieldPtr) + case NotnullUint8PtrDispatchId: + data[bufOffset] = **(**uint8)(fieldPtr) + case PrimitiveInt16DispatchId: if isLittleEndian { - *(*int16)(fieldPtr) = *(*int16)(unsafe.Pointer(&data[bufOffset])) + *(*int16)(unsafe.Pointer(&data[bufOffset])) = *(*int16)(fieldPtr) } else { - *(*int16)(fieldPtr) = int16(binary.LittleEndian.Uint16(data[bufOffset:])) + binary.LittleEndian.PutUint16(data[bufOffset:], uint16(*(*int16)(fieldPtr))) } - case ConcreteTypeFloat32: + case NotnullInt16PtrDispatchId: if isLittleEndian { - *(*float32)(fieldPtr) = *(*float32)(unsafe.Pointer(&data[bufOffset])) + *(*int16)(unsafe.Pointer(&data[bufOffset])) = **(**int16)(fieldPtr) } else { - *(*float32)(fieldPtr) = math.Float32frombits(binary.LittleEndian.Uint32(data[bufOffset:])) + binary.LittleEndian.PutUint16(data[bufOffset:], uint16(**(**int16)(fieldPtr))) } - case ConcreteTypeFloat64: + case PrimitiveUint16DispatchId: if isLittleEndian { - *(*float64)(fieldPtr) = *(*float64)(unsafe.Pointer(&data[bufOffset])) + *(*uint16)(unsafe.Pointer(&data[bufOffset])) = *(*uint16)(fieldPtr) } else { - *(*float64)(fieldPtr) = math.Float64frombits(binary.LittleEndian.Uint64(data[bufOffset:])) + binary.LittleEndian.PutUint16(data[bufOffset:], *(*uint16)(fieldPtr)) + } + case NotnullUint16PtrDispatchId: + if isLittleEndian { + *(*uint16)(unsafe.Pointer(&data[bufOffset])) = **(**uint16)(fieldPtr) + } else { + binary.LittleEndian.PutUint16(data[bufOffset:], **(**uint16)(fieldPtr)) + } + case PrimitiveInt32DispatchId: + if isLittleEndian { + *(*int32)(unsafe.Pointer(&data[bufOffset])) = *(*int32)(fieldPtr) + } else { + binary.LittleEndian.PutUint32(data[bufOffset:], uint32(*(*int32)(fieldPtr))) + } + case NotnullInt32PtrDispatchId: + if isLittleEndian { + *(*int32)(unsafe.Pointer(&data[bufOffset])) = **(**int32)(fieldPtr) + } else { + binary.LittleEndian.PutUint32(data[bufOffset:], uint32(**(**int32)(fieldPtr))) + } + case PrimitiveUint32DispatchId: + if isLittleEndian { + *(*uint32)(unsafe.Pointer(&data[bufOffset])) = *(*uint32)(fieldPtr) + } else { + binary.LittleEndian.PutUint32(data[bufOffset:], *(*uint32)(fieldPtr)) + } + case NotnullUint32PtrDispatchId: + if isLittleEndian { + *(*uint32)(unsafe.Pointer(&data[bufOffset])) = **(**uint32)(fieldPtr) + } else { + binary.LittleEndian.PutUint32(data[bufOffset:], **(**uint32)(fieldPtr)) + } + case PrimitiveInt64DispatchId: + if isLittleEndian { + *(*int64)(unsafe.Pointer(&data[bufOffset])) = *(*int64)(fieldPtr) + } else { + binary.LittleEndian.PutUint64(data[bufOffset:], uint64(*(*int64)(fieldPtr))) + } + case NotnullInt64PtrDispatchId: + if isLittleEndian { + *(*int64)(unsafe.Pointer(&data[bufOffset])) = **(**int64)(fieldPtr) + } else { + binary.LittleEndian.PutUint64(data[bufOffset:], uint64(**(**int64)(fieldPtr))) + } + case PrimitiveUint64DispatchId: + if isLittleEndian { + *(*uint64)(unsafe.Pointer(&data[bufOffset])) = *(*uint64)(fieldPtr) + } else { + binary.LittleEndian.PutUint64(data[bufOffset:], *(*uint64)(fieldPtr)) + } + case NotnullUint64PtrDispatchId: + if isLittleEndian { + *(*uint64)(unsafe.Pointer(&data[bufOffset])) = **(**uint64)(fieldPtr) + } else { + binary.LittleEndian.PutUint64(data[bufOffset:], **(**uint64)(fieldPtr)) + } + case PrimitiveFloat32DispatchId: + if isLittleEndian { + *(*float32)(unsafe.Pointer(&data[bufOffset])) = *(*float32)(fieldPtr) + } else { + binary.LittleEndian.PutUint32(data[bufOffset:], math.Float32bits(*(*float32)(fieldPtr))) + } + case NotnullFloat32PtrDispatchId: + if isLittleEndian { + *(*float32)(unsafe.Pointer(&data[bufOffset])) = **(**float32)(fieldPtr) + } else { + binary.LittleEndian.PutUint32(data[bufOffset:], math.Float32bits(**(**float32)(fieldPtr))) + } + case PrimitiveFloat64DispatchId: + if isLittleEndian { + *(*float64)(unsafe.Pointer(&data[bufOffset])) = *(*float64)(fieldPtr) + } else { + binary.LittleEndian.PutUint64(data[bufOffset:], math.Float64bits(*(*float64)(fieldPtr))) + } + case NotnullFloat64PtrDispatchId: + if isLittleEndian { + *(*float64)(unsafe.Pointer(&data[bufOffset])) = **(**float64)(fieldPtr) + } else { + binary.LittleEndian.PutUint64(data[bufOffset:], math.Float64bits(**(**float64)(fieldPtr))) } } } - // Update reader index ONCE after all fixed fields - buf.SetReaderIndex(baseOffset + s.fixedSize) - } - - // Phase 2: Varint primitives (must read sequentially - variable length) - // Use unsafe reads when we have enough buffer remaining - if s.maxVarintSize > 0 && buf.remaining() >= s.maxVarintSize { - for _, field := range s.varintFields { - fieldPtr := unsafe.Add(ptr, field.Offset) - switch field.StaticId { - case ConcreteTypeInt32: - *(*int32)(fieldPtr) = buf.UnsafeReadVarint32() - case ConcreteTypeInt64: - *(*int64)(fieldPtr) = buf.UnsafeReadVarint64() - case ConcreteTypeInt: - *(*int)(fieldPtr) = int(buf.UnsafeReadVarint64()) + // Update writer index ONCE after all fixed fields + buf.SetWriterIndex(baseOffset + s.fieldGroup.FixedSize) + } else if len(s.fieldGroup.FixedFields) > 0 { + // Fallback to reflect-based access for unaddressable values + for _, field := range s.fieldGroup.FixedFields { + fieldValue := value.Field(field.FieldIndex) + switch field.DispatchId { + // Primitive types (non-pointer) + case PrimitiveBoolDispatchId: + buf.WriteBool(fieldValue.Bool()) + case PrimitiveInt8DispatchId: + buf.WriteByte_(byte(fieldValue.Int())) + case PrimitiveUint8DispatchId: + buf.WriteByte_(byte(fieldValue.Uint())) + case PrimitiveInt16DispatchId: + buf.WriteInt16(int16(fieldValue.Int())) + case PrimitiveUint16DispatchId: + buf.WriteInt16(int16(fieldValue.Uint())) + case PrimitiveInt32DispatchId: + buf.WriteInt32(int32(fieldValue.Int())) + case PrimitiveUint32DispatchId: + buf.WriteInt32(int32(fieldValue.Uint())) + case PrimitiveInt64DispatchId: + buf.WriteInt64(fieldValue.Int()) + case PrimitiveUint64DispatchId: + buf.WriteInt64(int64(fieldValue.Uint())) + case PrimitiveFloat32DispatchId: + buf.WriteFloat32(float32(fieldValue.Float())) + case PrimitiveFloat64DispatchId: + buf.WriteFloat64(fieldValue.Float()) + // Notnull pointer types - dereference and write + case NotnullBoolPtrDispatchId: + buf.WriteBool(fieldValue.Elem().Bool()) + case NotnullInt8PtrDispatchId: + buf.WriteByte_(byte(fieldValue.Elem().Int())) + case NotnullUint8PtrDispatchId: + buf.WriteByte_(byte(fieldValue.Elem().Uint())) + case NotnullInt16PtrDispatchId: + buf.WriteInt16(int16(fieldValue.Elem().Int())) + case NotnullUint16PtrDispatchId: + buf.WriteInt16(int16(fieldValue.Elem().Uint())) + case NotnullInt32PtrDispatchId: + buf.WriteInt32(int32(fieldValue.Elem().Int())) + case NotnullUint32PtrDispatchId: + buf.WriteInt32(int32(fieldValue.Elem().Uint())) + case NotnullInt64PtrDispatchId: + buf.WriteInt64(fieldValue.Elem().Int()) + case NotnullUint64PtrDispatchId: + buf.WriteInt64(int64(fieldValue.Elem().Uint())) + case NotnullFloat32PtrDispatchId: + buf.WriteFloat32(float32(fieldValue.Elem().Float())) + case NotnullFloat64PtrDispatchId: + buf.WriteFloat64(fieldValue.Elem().Float()) } } - } else if len(s.varintFields) > 0 { - // Slow path with bounds checking - err := ctx.Err() - for _, field := range s.varintFields { + } + + // ========================================================================== + // Phase 2: Varint primitives (int32, int64, int, uint32, uint64, uint, tagged int64/uint64) + // - These are variable-length encodings that must be written sequentially + // ========================================================================== + if canUseUnsafe && len(s.fieldGroup.VarintFields) > 0 { + for _, field := range s.fieldGroup.VarintFields { fieldPtr := unsafe.Add(ptr, field.Offset) - switch field.StaticId { - case ConcreteTypeInt32: - *(*int32)(fieldPtr) = buf.ReadVarint32(err) - case ConcreteTypeInt64: - *(*int64)(fieldPtr) = buf.ReadVarint64(err) - case ConcreteTypeInt: - *(*int)(fieldPtr) = int(buf.ReadVarint64(err)) + switch field.DispatchId { + case PrimitiveVarint32DispatchId: + buf.WriteVarint32(*(*int32)(fieldPtr)) + case NotnullVarint32PtrDispatchId: + buf.WriteVarint32(**(**int32)(fieldPtr)) + case PrimitiveVarint64DispatchId: + buf.WriteVarint64(*(*int64)(fieldPtr)) + case NotnullVarint64PtrDispatchId: + buf.WriteVarint64(**(**int64)(fieldPtr)) + case PrimitiveIntDispatchId: + buf.WriteVarint64(int64(*(*int)(fieldPtr))) + case NotnullIntPtrDispatchId: + buf.WriteVarint64(int64(**(**int)(fieldPtr))) + case PrimitiveVarUint32DispatchId: + buf.WriteVaruint32(*(*uint32)(fieldPtr)) + case NotnullVarUint32PtrDispatchId: + buf.WriteVaruint32(**(**uint32)(fieldPtr)) + case PrimitiveVarUint64DispatchId: + buf.WriteVaruint64(*(*uint64)(fieldPtr)) + case NotnullVarUint64PtrDispatchId: + buf.WriteVaruint64(**(**uint64)(fieldPtr)) + case PrimitiveUintDispatchId: + buf.WriteVaruint64(uint64(*(*uint)(fieldPtr))) + case NotnullUintPtrDispatchId: + buf.WriteVaruint64(uint64(**(**uint)(fieldPtr))) + case PrimitiveTaggedInt64DispatchId: + buf.WriteTaggedInt64(*(*int64)(fieldPtr)) + case NotnullTaggedInt64PtrDispatchId: + buf.WriteTaggedInt64(**(**int64)(fieldPtr)) + case PrimitiveTaggedUint64DispatchId: + buf.WriteTaggedUint64(*(*uint64)(fieldPtr)) + case NotnullTaggedUint64PtrDispatchId: + buf.WriteTaggedUint64(**(**uint64)(fieldPtr)) + } + } + } else if len(s.fieldGroup.VarintFields) > 0 { + // Slow path for non-addressable values: use reflection + for _, field := range s.fieldGroup.VarintFields { + fieldValue := value.Field(field.FieldIndex) + switch field.DispatchId { + // Primitive types (non-pointer) + case PrimitiveVarint32DispatchId: + buf.WriteVarint32(int32(fieldValue.Int())) + case PrimitiveVarint64DispatchId: + buf.WriteVarint64(fieldValue.Int()) + case PrimitiveIntDispatchId: + buf.WriteVarint64(fieldValue.Int()) + case PrimitiveVarUint32DispatchId: + buf.WriteVaruint32(uint32(fieldValue.Uint())) + case PrimitiveVarUint64DispatchId: + buf.WriteVaruint64(fieldValue.Uint()) + case PrimitiveUintDispatchId: + buf.WriteVaruint64(fieldValue.Uint()) + case PrimitiveTaggedInt64DispatchId: + buf.WriteTaggedInt64(fieldValue.Int()) + case PrimitiveTaggedUint64DispatchId: + buf.WriteTaggedUint64(fieldValue.Uint()) + // Notnull pointer types - dereference and write + case NotnullVarint32PtrDispatchId: + buf.WriteVarint32(int32(fieldValue.Elem().Int())) + case NotnullVarint64PtrDispatchId: + buf.WriteVarint64(fieldValue.Elem().Int()) + case NotnullIntPtrDispatchId: + buf.WriteVarint64(fieldValue.Elem().Int()) + case NotnullVarUint32PtrDispatchId: + buf.WriteVaruint32(uint32(fieldValue.Elem().Uint())) + case NotnullVarUint64PtrDispatchId: + buf.WriteVaruint64(fieldValue.Elem().Uint()) + case NotnullUintPtrDispatchId: + buf.WriteVaruint64(fieldValue.Elem().Uint()) + case NotnullTaggedInt64PtrDispatchId: + buf.WriteTaggedInt64(fieldValue.Elem().Int()) + case NotnullTaggedUint64PtrDispatchId: + buf.WriteTaggedUint64(fieldValue.Elem().Uint()) } } } + // ========================================================================== // Phase 3: Remaining fields (strings, slices, maps, structs, enums) - // No intermediate error checks - trade error path performance for normal path - for _, field := range s.remainingFields { - s.readRemainingField(ctx, ptr, field, value) + // - These require per-field handling (ref flags, type info, serializers) + // - No intermediate error checks - trade error path performance for normal path + // ========================================================================== + for i := range s.fieldGroup.RemainingFields { + s.writeRemainingField(ctx, ptr, &s.fieldGroup.RemainingFields[i], value) } } -// readRemainingField reads a non-primitive field (string, slice, map, struct, enum) -func (s *structSerializer) readRemainingField(ctx *ReadContext, ptr unsafe.Pointer, field *FieldInfo, value reflect.Value) { +// writeRemainingField writes a non-primitive field (string, slice, map, struct, enum) +func (s *structSerializer) writeRemainingField(ctx *WriteContext, ptr unsafe.Pointer, field *FieldInfo, value reflect.Value) { buf := ctx.Buffer() - ctxErr := ctx.Err() - - // Fast path dispatch using pre-computed StaticId + // Fast path dispatch using pre-computed DispatchId // ptr must be valid (addressable value) if ptr != nil { fieldPtr := unsafe.Add(ptr, field.Offset) - switch field.StaticId { - case ConcreteTypeString: + switch field.DispatchId { + case StringDispatchId: + // Check isPtr first for better branch prediction + if !field.IsPtr { + // Non-pointer string: always non-null, no ref tracking needed in fast path + if field.RefMode == RefModeNone { + ctx.WriteString(*(*string)(fieldPtr)) + } else { + // RefModeNullOnly or RefModeTracking: write NotNull flag then string + buf.WriteInt8(NotNullValueFlag) + ctx.WriteString(*(*string)(fieldPtr)) + } + return + } + // Pointer to string: can be nil, may need ref tracking if field.RefMode == RefModeTracking { break // Fall through to slow path for ref tracking } - // Only read null flag if RefMode requires it (nullable field) - if field.RefMode == RefModeNullOnly { - refFlag := buf.ReadInt8(ctxErr) - if refFlag == NullFlag { - *(*string)(fieldPtr) = "" - return + strPtr := *(**string)(fieldPtr) + if strPtr == nil { + if field.RefMode == RefModeNullOnly { + buf.WriteInt8(NullFlag) + } else { + // RefModeNone: write empty string for nil pointer + ctx.WriteString("") } + return } - *(*string)(fieldPtr) = ctx.ReadString() + // Non-nil pointer + if field.RefMode == RefModeNullOnly { + buf.WriteInt8(NotNullValueFlag) + } + ctx.WriteString(*strPtr) return - case ConcreteTypeEnum: + case EnumDispatchId: // Enums don't track refs - always use fast path - fieldValue := value.Field(field.FieldIndex) - readEnumField(ctx, field, fieldValue) + writeEnumField(ctx, field, value.Field(field.FieldIndex)) return - case ConcreteTypeStringSlice: + case StringSliceDispatchId: if field.RefMode == RefModeTracking { break } - *(*[]string)(fieldPtr) = ctx.ReadStringSlice(field.RefMode, false) + ctx.WriteStringSlice(*(*[]string)(fieldPtr), field.RefMode, false, true) return - case ConcreteTypeBoolSlice: + case BoolSliceDispatchId: if field.RefMode == RefModeTracking { break } - *(*[]bool)(fieldPtr) = ctx.ReadBoolSlice(field.RefMode, false) + ctx.WriteBoolSlice(*(*[]bool)(fieldPtr), field.RefMode, false) return - case ConcreteTypeInt8Slice: + case Int8SliceDispatchId: if field.RefMode == RefModeTracking { break } - *(*[]int8)(fieldPtr) = ctx.ReadInt8Slice(field.RefMode, false) + ctx.WriteInt8Slice(*(*[]int8)(fieldPtr), field.RefMode, false) return - case ConcreteTypeByteSlice: + case ByteSliceDispatchId: if field.RefMode == RefModeTracking { break } - *(*[]byte)(fieldPtr) = ctx.ReadByteSlice(field.RefMode, false) + ctx.WriteByteSlice(*(*[]byte)(fieldPtr), field.RefMode, false) return - case ConcreteTypeInt16Slice: + case Int16SliceDispatchId: if field.RefMode == RefModeTracking { break } - *(*[]int16)(fieldPtr) = ctx.ReadInt16Slice(field.RefMode, false) + ctx.WriteInt16Slice(*(*[]int16)(fieldPtr), field.RefMode, false) return - case ConcreteTypeInt32Slice: + case Int32SliceDispatchId: if field.RefMode == RefModeTracking { break } - *(*[]int32)(fieldPtr) = ctx.ReadInt32Slice(field.RefMode, false) + ctx.WriteInt32Slice(*(*[]int32)(fieldPtr), field.RefMode, false) return - case ConcreteTypeInt64Slice: + case Int64SliceDispatchId: if field.RefMode == RefModeTracking { break } - *(*[]int64)(fieldPtr) = ctx.ReadInt64Slice(field.RefMode, false) + ctx.WriteInt64Slice(*(*[]int64)(fieldPtr), field.RefMode, false) return - case ConcreteTypeIntSlice: + case IntSliceDispatchId: if field.RefMode == RefModeTracking { break } - *(*[]int)(fieldPtr) = ctx.ReadIntSlice(field.RefMode, false) + ctx.WriteIntSlice(*(*[]int)(fieldPtr), field.RefMode, false) return - case ConcreteTypeUintSlice: + case UintSliceDispatchId: if field.RefMode == RefModeTracking { break } - *(*[]uint)(fieldPtr) = ctx.ReadUintSlice(field.RefMode, false) + ctx.WriteUintSlice(*(*[]uint)(fieldPtr), field.RefMode, false) return - case ConcreteTypeFloat32Slice: + case Float32SliceDispatchId: if field.RefMode == RefModeTracking { break } - *(*[]float32)(fieldPtr) = ctx.ReadFloat32Slice(field.RefMode, false) + ctx.WriteFloat32Slice(*(*[]float32)(fieldPtr), field.RefMode, false) return - case ConcreteTypeFloat64Slice: + case Float64SliceDispatchId: if field.RefMode == RefModeTracking { break } - *(*[]float64)(fieldPtr) = ctx.ReadFloat64Slice(field.RefMode, false) + ctx.WriteFloat64Slice(*(*[]float64)(fieldPtr), field.RefMode, false) return - case ConcreteTypeStringStringMap: + case StringStringMapDispatchId: if field.RefMode == RefModeTracking { break } - *(*map[string]string)(fieldPtr) = ctx.ReadStringStringMap(field.RefMode, false) + ctx.WriteStringStringMap(*(*map[string]string)(fieldPtr), field.RefMode, false) return - case ConcreteTypeStringInt64Map: + case StringInt64MapDispatchId: if field.RefMode == RefModeTracking { break } - *(*map[string]int64)(fieldPtr) = ctx.ReadStringInt64Map(field.RefMode, false) + ctx.WriteStringInt64Map(*(*map[string]int64)(fieldPtr), field.RefMode, false) return - case ConcreteTypeStringInt32Map: + case StringInt32MapDispatchId: if field.RefMode == RefModeTracking { break } - *(*map[string]int32)(fieldPtr) = ctx.ReadStringInt32Map(field.RefMode, false) + ctx.WriteStringInt32Map(*(*map[string]int32)(fieldPtr), field.RefMode, false) return - case ConcreteTypeStringIntMap: + case StringIntMapDispatchId: if field.RefMode == RefModeTracking { break } - *(*map[string]int)(fieldPtr) = ctx.ReadStringIntMap(field.RefMode, false) + ctx.WriteStringIntMap(*(*map[string]int)(fieldPtr), field.RefMode, false) return - case ConcreteTypeStringFloat64Map: + case StringFloat64MapDispatchId: if field.RefMode == RefModeTracking { break } - *(*map[string]float64)(fieldPtr) = ctx.ReadStringFloat64Map(field.RefMode, false) + ctx.WriteStringFloat64Map(*(*map[string]float64)(fieldPtr), field.RefMode, false) return - case ConcreteTypeStringBoolMap: + case StringBoolMapDispatchId: // NOTE: map[string]bool is used to represent SETs in Go xlang mode. - // We CANNOT use the fast path here because it reads MAP format, - // but the data is actually in SET format. Fall through to slow path - // which uses setSerializer to correctly read the SET format. + // We CANNOT use the fast path here because it writes MAP format, + // but the data should be written in SET format. Fall through to slow path + // which uses setSerializer to correctly write the SET format. break - case ConcreteTypeIntIntMap: + case IntIntMapDispatchId: if field.RefMode == RefModeTracking { break } - *(*map[int]int)(fieldPtr) = ctx.ReadIntIntMap(field.RefMode, false) + ctx.WriteIntIntMap(*(*map[int]int)(fieldPtr), field.RefMode, false) return - } - } - - // Slow path: use full serializer - fieldValue := value.Field(field.FieldIndex) - - if field.Serializer != nil { - field.Serializer.Read(ctx, field.RefMode, field.WriteType, field.HasGenerics, fieldValue) - } else { - ctx.ReadValue(fieldValue, RefModeTracking, true) - } -} - -// readFieldsInOrder reads fields in the order they appear in s.fields (TypeDef order) -// This is used in compatible mode where Java writes fields in TypeDef order -func (s *structSerializer) readFieldsInOrder(ctx *ReadContext, value reflect.Value) { - buf := ctx.Buffer() - canUseUnsafe := value.CanAddr() - var ptr unsafe.Pointer - if canUseUnsafe { - ptr = unsafe.Pointer(value.UnsafeAddr()) - } - err := ctx.Err() - - for _, field := range s.fields { - if field.FieldIndex < 0 { - s.skipField(ctx, field) - if ctx.HasError() { + case NullableTaggedInt64DispatchId: + // Nullable tagged INT64: write ref flag, then tagged encoding + ptr := *(**int64)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) return } - continue - } - - // Fast path for fixed-size primitive types (no ref flag) - // Use error-aware methods with deferred checking - if canUseUnsafe && isFixedSizePrimitive(field.StaticId, field.Referencable) { - fieldPtr := unsafe.Add(ptr, field.Offset) - switch field.StaticId { - case ConcreteTypeBool: - *(*bool)(fieldPtr) = buf.ReadBool(err) - case ConcreteTypeInt8: - *(*int8)(fieldPtr) = buf.ReadInt8(err) - case ConcreteTypeInt16: - *(*int16)(fieldPtr) = buf.ReadInt16(err) - case ConcreteTypeFloat32: - *(*float32)(fieldPtr) = buf.ReadFloat32(err) - case ConcreteTypeFloat64: - *(*float64)(fieldPtr) = buf.ReadFloat64(err) + buf.WriteInt8(NotNullValueFlag) + buf.WriteTaggedInt64(*ptr) + return + case NullableTaggedUint64DispatchId: + // Nullable tagged UINT64: write ref flag, then tagged encoding + ptr := *(**uint64)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return } - continue - } - - // Fast path for varint primitive types (no ref flag) - // Skip fast path if field has a serializer with a non-primitive type (e.g., NAMED_ENUM) - if canUseUnsafe && isVarintPrimitive(field.StaticId, field.Referencable) && !fieldHasNonPrimitiveSerializer(field) { - fieldPtr := unsafe.Add(ptr, field.Offset) - switch field.StaticId { - case ConcreteTypeInt32: - *(*int32)(fieldPtr) = buf.ReadVarint32(err) - case ConcreteTypeInt64: - *(*int64)(fieldPtr) = buf.ReadVarint64(err) - case ConcreteTypeInt: - *(*int)(fieldPtr) = int(buf.ReadVarint64(err)) + buf.WriteInt8(NotNullValueFlag) + buf.WriteTaggedUint64(*ptr) + return + // Nullable fixed-size types + case NullableBoolDispatchId: + ptr := *(**bool)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return } - continue - } - - // Get field value for slow paths - fieldValue := value.Field(field.FieldIndex) - - // Slow path for primitives when not addressable - if !canUseUnsafe && isFixedSizePrimitive(field.StaticId, field.Referencable) { - switch field.StaticId { - case ConcreteTypeBool: - fieldValue.SetBool(buf.ReadBool(err)) - case ConcreteTypeInt8: - fieldValue.SetInt(int64(buf.ReadInt8(err))) - case ConcreteTypeInt16: - fieldValue.SetInt(int64(buf.ReadInt16(err))) - case ConcreteTypeFloat32: - fieldValue.SetFloat(float64(buf.ReadFloat32(err))) - case ConcreteTypeFloat64: - fieldValue.SetFloat(buf.ReadFloat64(err)) + buf.WriteInt8(NotNullValueFlag) + buf.WriteBool(*ptr) + return + case NullableInt8DispatchId: + ptr := *(**int8)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return } - continue - } - - if !canUseUnsafe && isVarintPrimitive(field.StaticId, field.Referencable) && !fieldHasNonPrimitiveSerializer(field) { - switch field.StaticId { - case ConcreteTypeInt32: - fieldValue.SetInt(int64(buf.ReadVarint32(err))) - case ConcreteTypeInt64, ConcreteTypeInt: - fieldValue.SetInt(buf.ReadVarint64(err)) + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt8(*ptr) + return + case NullableUint8DispatchId: + ptr := *(**uint8)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return } - continue + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint8(*ptr) + return + case NullableInt16DispatchId: + ptr := *(**int16)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt16(*ptr) + return + case NullableUint16DispatchId: + ptr := *(**uint16)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint16(*ptr) + return + case NullableInt32DispatchId: + ptr := *(**int32)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt32(*ptr) + return + case NullableUint32DispatchId: + ptr := *(**uint32)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint32(*ptr) + return + case NullableInt64DispatchId: + ptr := *(**int64)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt64(*ptr) + return + case NullableUint64DispatchId: + ptr := *(**uint64)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint64(*ptr) + return + case NullableFloat32DispatchId: + ptr := *(**float32)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteFloat32(*ptr) + return + case NullableFloat64DispatchId: + ptr := *(**float64)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteFloat64(*ptr) + return + // Nullable varint types + case NullableVarint32DispatchId: + ptr := *(**int32)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteVarint32(*ptr) + return + case NullableVarUint32DispatchId: + ptr := *(**uint32)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteVaruint32(*ptr) + return + case NullableVarint64DispatchId: + ptr := *(**int64)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteVarint64(*ptr) + return + case NullableVarUint64DispatchId: + ptr := *(**uint64)(fieldPtr) + if ptr == nil { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteVaruint64(*ptr) + return } + } - if isEnumField(field) { - readEnumField(ctx, field, fieldValue) - continue - } + // Slow path: use reflection for non-addressable values + fieldValue := value.Field(field.FieldIndex) - // Slow path for non-primitives (all need ref flag per xlang spec) - if field.Serializer != nil { - // Use pre-computed RefMode and WriteType from field initialization - field.Serializer.Read(ctx, field.RefMode, field.WriteType, field.HasGenerics, fieldValue) - } else { - ctx.ReadValue(fieldValue, RefModeTracking, true) + // Handle nullable types via reflection when ptr is nil (non-addressable) + switch field.DispatchId { + case NullableTaggedInt64DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return } - } -} - -// skipField skips a field that doesn't exist or is incompatible -// Uses context error state for deferred error checking. -func (s *structSerializer) skipField(ctx *ReadContext, field *FieldInfo) { - if field.FieldDef.name != "" { - fieldDefIsStructType := isStructFieldType(field.FieldDef.fieldType) - // Use FieldDef's trackingRef and nullable to determine if ref flag was written by Java - // Java writes ref flag based on its FieldDef, not Go's field type - readRefFlag := field.FieldDef.trackingRef || field.FieldDef.nullable - SkipFieldValueWithTypeFlag(ctx, field.FieldDef, readRefFlag, ctx.Compatible() && fieldDefIsStructType) + buf.WriteInt8(NotNullValueFlag) + buf.WriteTaggedInt64(fieldValue.Elem().Int()) return - } - // No FieldDef available, read into temp value - tempValue := reflect.New(field.Type).Elem() - if field.Serializer != nil { - readType := ctx.Compatible() && isStructField(field.Type) - refMode := RefModeNone - if field.Referencable { - refMode = RefModeTracking + case NullableTaggedUint64DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return } - field.Serializer.Read(ctx, refMode, readType, false, tempValue) - } else { - ctx.ReadValue(tempValue, RefModeTracking, true) - } -} - -func (s *structSerializer) ReadWithTypeInfo(ctx *ReadContext, refMode RefMode, typeInfo *TypeInfo, value reflect.Value) { - // typeInfo is already read, don't read it again - s.Read(ctx, refMode, false, false, value) -} - -// initFieldsFromContext initializes fields using context's type resolver (for WriteContext) -// initFieldsFromTypeResolver initializes fields from local struct type using TypeResolver -func (s *structSerializer) initFieldsFromTypeResolver(typeResolver *TypeResolver) error { - // If we have fieldDefs from type_def (remote meta), use them - if len(s.fieldDefs) > 0 { - return s.initFieldsFromDefsWithResolver(typeResolver) - } - - // Otherwise initialize from local struct type - type_ := s.type_ - var fields []*FieldInfo - var fieldNames []string - var serializers []Serializer - var typeIds []TypeId - var nullables []bool - var tagIDs []int - - for i := 0; i < type_.NumField(); i++ { - field := type_.Field(i) - firstRune, _ := utf8.DecodeRuneInString(field.Name) - if unicode.IsLower(firstRune) { - continue // skip unexported fields + buf.WriteInt8(NotNullValueFlag) + buf.WriteTaggedUint64(fieldValue.Elem().Uint()) + return + case NullableBoolDispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return } - - // Parse fory struct tag and check for ignore - foryTag := ParseForyTag(field) - if foryTag.Ignore { - continue // skip ignored fields + buf.WriteInt8(NotNullValueFlag) + buf.WriteBool(fieldValue.Elem().Bool()) + return + case NullableInt8DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return } - - fieldType := field.Type - - var fieldSerializer Serializer - // For interface{} fields, don't get a serializer - use WriteValue/ReadValue instead - // which will handle polymorphic types dynamically - if fieldType.Kind() != reflect.Interface { - // Get serializer for all non-interface field types - fieldSerializer, _ = typeResolver.getSerializerByType(fieldType, true) + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt8(int8(fieldValue.Elem().Int())) + return + case NullableUint8DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return } - - // Use TypeResolver helper methods for arrays and slices - if fieldType.Kind() == reflect.Array && fieldType.Elem().Kind() != reflect.Interface { - fieldSerializer, _ = typeResolver.GetArraySerializer(fieldType) - } else if fieldType.Kind() == reflect.Slice && fieldType.Elem().Kind() != reflect.Interface { - fieldSerializer, _ = typeResolver.GetSliceSerializer(fieldType) - } else if fieldType.Kind() == reflect.Slice && fieldType.Elem().Kind() == reflect.Interface { - // For struct fields with interface element types, use sliceDynSerializer - fieldSerializer = mustNewSliceDynSerializer(fieldType.Elem()) + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint8(uint8(fieldValue.Elem().Uint())) + return + case NullableInt16DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return } - - // Get TypeId for the serializer, fallback to deriving from kind - fieldTypeId := typeResolver.getTypeIdByType(fieldType) - if fieldTypeId == 0 { - fieldTypeId = typeIdFromKind(fieldType) + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt16(int16(fieldValue.Elem().Int())) + return + case NullableUint16DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return } - // Calculate nullable flag for serialization (wire format): - // - In xlang mode: Per xlang spec, fields are NON-NULLABLE by default. - // Only pointer types are nullable by default. - // - In native mode: Go's natural semantics apply - slice/map/interface can be nil, - // so they are nullable by default. - // Can be overridden by explicit fory tag `fory:"nullable"`. - internalId := TypeId(fieldTypeId & 0xFF) - isEnum := internalId == ENUM || internalId == NAMED_ENUM - - // Determine nullable based on mode - // In xlang mode: only pointer types are nullable by default (per xlang spec) - // In native mode: Go's natural semantics - all nil-able types are nullable - // This ensures proper interoperability with Java/other languages in xlang mode. - var nullableFlag bool - if typeResolver.fory.config.IsXlang { - // xlang mode: only pointer types are nullable by default per xlang spec - // Slices and maps are NOT nullable - they serialize as empty when nil - nullableFlag = fieldType.Kind() == reflect.Ptr - } else { - // Native mode: Go's natural semantics - all nil-able types are nullable - nullableFlag = fieldType.Kind() == reflect.Ptr || - fieldType.Kind() == reflect.Slice || - fieldType.Kind() == reflect.Map || - fieldType.Kind() == reflect.Interface + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint16(uint16(fieldValue.Elem().Uint())) + return + case NullableInt32DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return } - if foryTag.NullableSet { - // Override nullable flag if explicitly set in fory tag - nullableFlag = foryTag.Nullable + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt32(int32(fieldValue.Elem().Int())) + return + case NullableUint32DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return } - // Primitives are never nullable, regardless of tag - if isNonNullablePrimitiveKind(fieldType.Kind()) && !isEnum { - nullableFlag = false + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint32(uint32(fieldValue.Elem().Uint())) + return + case NullableInt64DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return } - - // Calculate ref tracking - use tag override if explicitly set - trackRef := typeResolver.TrackRef() - if foryTag.RefSet { - trackRef = foryTag.Ref + buf.WriteInt8(NotNullValueFlag) + buf.WriteInt64(fieldValue.Elem().Int()) + return + case NullableUint64DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return } - - // Pre-compute RefMode based on (possibly overridden) trackRef and nullable - // For pointer-to-struct fields, enable ref tracking when trackRef is enabled, - // regardless of nullable flag. This is necessary to detect circular references. - refMode := RefModeNone - isStructPointer := fieldType.Kind() == reflect.Ptr && fieldType.Elem().Kind() == reflect.Struct - if trackRef && (nullableFlag || isStructPointer) { - refMode = RefModeTracking - } else if nullableFlag { - refMode = RefModeNullOnly + buf.WriteInt8(NotNullValueFlag) + buf.WriteUint64(fieldValue.Elem().Uint()) + return + case NullableFloat32DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return } - // Pre-compute WriteType: true for struct fields in compatible mode - writeType := typeResolver.Compatible() && isStructField(fieldType) - - // Pre-compute StaticId, with special handling for enum fields - staticId := GetStaticTypeId(fieldType) - if fieldSerializer != nil { - if _, ok := fieldSerializer.(*enumSerializer); ok { - staticId = ConcreteTypeEnum - } else if ptrSer, ok := fieldSerializer.(*ptrToValueSerializer); ok { - if _, ok := ptrSer.valueSerializer.(*enumSerializer); ok { - staticId = ConcreteTypeEnum - } - } + buf.WriteInt8(NotNullValueFlag) + buf.WriteFloat32(float32(fieldValue.Elem().Float())) + return + case NullableFloat64DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return } - if DebugOutputEnabled() { - fmt.Printf("[fory-debug] initFieldsFromTypeResolver: field=%s type=%v staticId=%d refMode=%v nullableFlag=%v serializer=%T\n", - SnakeCase(field.Name), fieldType, staticId, refMode, nullableFlag, fieldSerializer) + buf.WriteInt8(NotNullValueFlag) + buf.WriteFloat64(fieldValue.Elem().Float()) + return + case NullableVarint32DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return } - - fieldInfo := &FieldInfo{ - Name: SnakeCase(field.Name), - Offset: field.Offset, - Type: fieldType, - StaticId: staticId, - TypeId: fieldTypeId, - Serializer: fieldSerializer, - Referencable: nullableFlag, // Use same logic as TypeDef's nullable flag for consistent ref handling - FieldIndex: i, - RefMode: refMode, - WriteType: writeType, - HasGenerics: isCollectionType(fieldTypeId), // Container fields have declared element types - TagID: foryTag.ID, - HasForyTag: foryTag.HasTag, - TagRefSet: foryTag.RefSet, - TagRef: foryTag.Ref, - TagNullableSet: foryTag.NullableSet, - TagNullable: foryTag.Nullable, + buf.WriteInt8(NotNullValueFlag) + buf.WriteVarint32(int32(fieldValue.Elem().Int())) + return + case NullableVarUint32DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return } - fields = append(fields, fieldInfo) - fieldNames = append(fieldNames, fieldInfo.Name) - serializers = append(serializers, fieldSerializer) - typeIds = append(typeIds, fieldTypeId) - nullables = append(nullables, nullableFlag) - tagIDs = append(tagIDs, foryTag.ID) + buf.WriteInt8(NotNullValueFlag) + buf.WriteVaruint32(uint32(fieldValue.Elem().Uint())) + return + case NullableVarint64DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteVarint64(fieldValue.Elem().Int()) + return + case NullableVarUint64DispatchId: + if fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return + } + buf.WriteInt8(NotNullValueFlag) + buf.WriteVaruint64(fieldValue.Elem().Uint()) + return } - // Sort fields according to specification using nullable info and tag IDs for consistent ordering - serializers, fieldNames = sortFields(typeResolver, fieldNames, serializers, typeIds, nullables, tagIDs) - order := make(map[string]int, len(fieldNames)) - for idx, name := range fieldNames { - order[name] = idx + // Fall back to serializer for other types + if field.Serializer != nil { + field.Serializer.Write(ctx, field.RefMode, field.WriteType, field.HasGenerics, fieldValue) + } else { + ctx.WriteValue(fieldValue, RefModeTracking, true) } - - sort.SliceStable(fields, func(i, j int) bool { - oi, okI := order[fields[i].Name] - oj, okJ := order[fields[j].Name] - switch { - case okI && okJ: - return oi < oj - case okI: - return true - case okJ: - return false - default: - return false - } - }) - - s.fields = fields - s.groupFields() - return nil } -// groupFields categorizes fields into fixedFields, varintFields, and remainingFields. -// Also computes pre-computed sizes and WriteOffset for batch buffer reservation. -func (s *structSerializer) groupFields() { - s.fixedFields = nil - s.varintFields = nil - s.remainingFields = nil - s.fixedSize = 0 - s.maxVarintSize = 0 - - for _, field := range s.fields { - // Fields with non-primitive serializers (NAMED_ENUM, NAMED_STRUCT, etc.) - // must go to remainingFields to use their serializer's type info writing - hasNonPrimitive := fieldHasNonPrimitiveSerializer(field) - if DebugOutputEnabled() { - fmt.Printf("[fory-debug] groupFields: field=%s TypeId=%d internalId=%d hasNonPrimitive=%v\n", - field.Name, field.TypeId, field.TypeId&0xFF, hasNonPrimitive) - } - if hasNonPrimitive { - s.remainingFields = append(s.remainingFields, field) - } else if isFixedSizePrimitive(field.StaticId, field.Referencable) { - // Compute FixedSize and WriteOffset for this field - field.FixedSize = getFixedSizeByStaticId(field.StaticId) - field.WriteOffset = s.fixedSize - s.fixedSize += field.FixedSize - s.fixedFields = append(s.fixedFields, field) - } else if isVarintPrimitive(field.StaticId, field.Referencable) { - s.maxVarintSize += getVarintMaxSizeByStaticId(field.StaticId) - s.varintFields = append(s.varintFields, field) - } else { - s.remainingFields = append(s.remainingFields, field) +func (s *structSerializer) Read(ctx *ReadContext, refMode RefMode, readType bool, hasGenerics bool, value reflect.Value) { + buf := ctx.Buffer() + ctxErr := ctx.Err() + switch refMode { + case RefModeTracking: + refID, refErr := ctx.RefResolver().TryPreserveRefId(buf) + if refErr != nil { + ctx.SetError(FromError(refErr)) + return } - } -} - -// initFieldsFromDefsWithResolver initializes fields from remote fieldDefs using typeResolver -func (s *structSerializer) initFieldsFromDefsWithResolver(typeResolver *TypeResolver) error { - type_ := s.type_ - if type_ == nil { - // Type is not known - we'll create an interface{} placeholder - // This happens when deserializing unknown types in compatible mode - // For now, we'll create fields that discard all data - var fields []*FieldInfo - for _, def := range s.fieldDefs { - fieldSerializer, _ := getFieldTypeSerializerWithResolver(typeResolver, def.fieldType) - remoteTypeInfo, _ := def.fieldType.getTypeInfoWithResolver(typeResolver) - remoteType := remoteTypeInfo.Type - if remoteType == nil { - remoteType = reflect.TypeOf((*interface{})(nil)).Elem() - } - // Get TypeId from FieldType's TypeId method - fieldTypeId := def.fieldType.TypeId() - // Pre-compute RefMode based on trackRef and FieldDef flags - refMode := RefModeNone - if def.trackingRef { - refMode = RefModeTracking - } else if def.nullable { - refMode = RefModeNullOnly - } - // Pre-compute WriteType: true for struct fields in compatible mode - writeType := typeResolver.Compatible() && isStructField(remoteType) - - // Pre-compute StaticId, with special handling for enum fields - staticId := GetStaticTypeId(remoteType) - if fieldSerializer != nil { - if _, ok := fieldSerializer.(*enumSerializer); ok { - staticId = ConcreteTypeEnum - } else if ptrSer, ok := fieldSerializer.(*ptrToValueSerializer); ok { - if _, ok := ptrSer.valueSerializer.(*enumSerializer); ok { - staticId = ConcreteTypeEnum - } - } + if refID < int32(NotNullValueFlag) { + // Reference found + obj := ctx.RefResolver().GetReadObject(refID) + if obj.IsValid() { + value.Set(obj) } - - fieldInfo := &FieldInfo{ - Name: def.name, - Offset: 0, - Type: remoteType, - StaticId: staticId, - TypeId: fieldTypeId, - Serializer: fieldSerializer, - Referencable: def.nullable, // Use remote nullable flag - FieldIndex: -1, // Mark as non-existent field to discard data - FieldDef: def, // Save original FieldDef for skipping - RefMode: refMode, - WriteType: writeType, - HasGenerics: isCollectionType(fieldTypeId), // Container fields have declared element types + return + } + case RefModeNullOnly: + flag := buf.ReadInt8(ctxErr) + if flag == NullFlag { + return + } + } + if readType { + // Read type info - in compatible mode this returns the serializer with remote fieldDefs + typeID := buf.ReadVaruint32Small7(ctxErr) + internalTypeID := TypeId(typeID & 0xFF) + // Check if this is a struct type that needs type meta reading + if IsNamespacedType(TypeId(typeID)) || internalTypeID == COMPATIBLE_STRUCT || internalTypeID == STRUCT { + // For struct types in compatible mode, use the serializer from TypeInfo + typeInfo := ctx.TypeResolver().readTypeInfoWithTypeID(buf, typeID, ctxErr) + // Use the serializer from TypeInfo which has the remote field definitions + if structSer, ok := typeInfo.Serializer.(*structSerializer); ok && len(structSer.fieldDefs) > 0 { + structSer.ReadData(ctx, value.Type(), value) + return } - fields = append(fields, fieldInfo) } - s.fields = fields - s.groupFields() - s.typeDefDiffers = true // Unknown type, must use ordered reading - return nil } + s.ReadData(ctx, value.Type(), value) +} - // Build maps from field names and tag IDs to struct field indices - fieldNameToIndex := make(map[string]int) - fieldNameToOffset := make(map[string]uintptr) - fieldNameToType := make(map[string]reflect.Type) - fieldTagIDToIndex := make(map[int]int) // tag ID -> struct field index - fieldTagIDToOffset := make(map[int]uintptr) // tag ID -> field offset - fieldTagIDToType := make(map[int]reflect.Type) // tag ID -> field type - fieldTagIDToName := make(map[int]string) // tag ID -> snake_case field name - for i := 0; i < type_.NumField(); i++ { - field := type_.Field(i) +func (s *structSerializer) ReadWithTypeInfo(ctx *ReadContext, refMode RefMode, typeInfo *TypeInfo, value reflect.Value) { + // typeInfo is already read, don't read it again + s.Read(ctx, refMode, false, false, value) +} - // Parse fory tag and skip ignored fields - foryTag := ParseForyTag(field) - if foryTag.Ignore { - continue - } +func (s *structSerializer) ReadData(ctx *ReadContext, type_ reflect.Type, value reflect.Value) { + // Early error check - skip all intermediate checks for normal path performance + if ctx.HasError() { + return + } - name := SnakeCase(field.Name) - fieldNameToIndex[name] = i - fieldNameToOffset[name] = field.Offset - fieldNameToType[name] = field.Type + // Lazy initialization + if !s.initialized { + if err := s.initialize(ctx.TypeResolver()); err != nil { + ctx.SetError(FromError(err)) + return + } + } - // Also index by tag ID if present - if foryTag.ID >= 0 { - fieldTagIDToIndex[foryTag.ID] = i - fieldTagIDToOffset[foryTag.ID] = field.Offset - fieldTagIDToType[foryTag.ID] = field.Type - fieldTagIDToName[foryTag.ID] = name + buf := ctx.Buffer() + if value.Kind() == reflect.Ptr { + if value.IsNil() { + value.Set(reflect.New(type_.Elem())) } + value = value.Elem() + type_ = type_.Elem() } - var fields []*FieldInfo - - for _, def := range s.fieldDefs { - fieldSerializer, err := getFieldTypeSerializerWithResolver(typeResolver, def.fieldType) - if err != nil || fieldSerializer == nil { - // If we can't get serializer from typeID, try to get it from the Go type - // This can happen when the type isn't registered in typeIDToTypeInfo - remoteTypeInfo, _ := def.fieldType.getTypeInfoWithResolver(typeResolver) - if remoteTypeInfo.Type != nil { - fieldSerializer, _ = typeResolver.getSerializerByType(remoteTypeInfo.Type, true) - } - } - - // Get the remote type from fieldDef - remoteTypeInfo, _ := def.fieldType.getTypeInfoWithResolver(typeResolver) - remoteType := remoteTypeInfo.Type - // Track if type lookup failed - we'll need to skip such fields - // Note: DynamicFieldType.getTypeInfoWithResolver returns interface{} (not nil) when lookup fails - emptyInterfaceType := reflect.TypeOf((*interface{})(nil)).Elem() - typeLookupFailed := remoteType == nil || remoteType == emptyInterfaceType - if remoteType == nil { - remoteType = emptyInterfaceType + // In compatible mode with meta share, struct hash is not written + if !ctx.Compatible() { + err := ctx.Err() + structHash := buf.ReadInt32(err) + if structHash != s.structHash { + ctx.SetError(HashMismatchError(structHash, s.structHash, s.type_.String())) + return } + } - // For struct-like fields, even if TypeDef lookup fails, we can try to read - // the field because type resolution happens at read time from the buffer. - // The type name might map to a different local type. - isStructLikeField := isStructFieldType(def.fieldType) + // Fail fast if value is not addressable - we require unsafe pointer access + if !value.CanAddr() { + ctx.SetError(SerializationError("cannot deserialize struct " + s.type_.Name() + " into non-addressable value")) + return + } - // Try to find corresponding local field - // First try to match by tag ID (if remote def uses tag ID) - // Then fall back to matching by field name - fieldIndex := -1 - var offset uintptr - var fieldType reflect.Type - var localFieldName string - var localType reflect.Type - var exists bool + // Use ordered reading when TypeDef differs from local type (schema evolution) + if s.typeDefDiffers { + s.readFieldsInOrder(ctx, value) + return + } - if def.tagID >= 0 { - // Try to match by tag ID - if idx, ok := fieldTagIDToIndex[def.tagID]; ok { - exists = true - fieldIndex = idx // Will be overwritten if types are compatible - localType = fieldTagIDToType[def.tagID] - offset = fieldTagIDToOffset[def.tagID] - localFieldName = fieldTagIDToName[def.tagID] - _ = fieldIndex // Use to avoid compiler warning, will be set properly below - } - } + // ========================================================================== + // Grouped reading for matching types (optimized path) + // - Types match, so all fields exist locally (no FieldIndex < 0 checks) + // - Use UnsafeGet at pre-computed offsets, update reader index once per phase + // ========================================================================== + ptr := unsafe.Pointer(value.UnsafeAddr()) - // Fall back to name-based matching if tag ID match failed - if !exists && def.name != "" { - if idx, ok := fieldNameToIndex[def.name]; ok { - exists = true - localType = fieldNameToType[def.name] - offset = fieldNameToOffset[def.name] - localFieldName = def.name - _ = idx // Will be set properly below - } - } + // Phase 1: Fixed-size primitives (inline unsafe reads with endian handling) + if s.fieldGroup.FixedSize > 0 { + baseOffset := buf.ReaderIndex() + data := buf.GetData() - if exists { - idx := fieldNameToIndex[localFieldName] - if def.tagID >= 0 { - idx = fieldTagIDToIndex[def.tagID] - } - // Check if types are compatible - // For primitive types: skip if types don't match - // For struct-like types: allow read even if TypeDef lookup failed, - // because runtime type resolution by name might work - shouldRead := false - isPolymorphicField := def.fieldType.TypeId() == UNKNOWN - defTypeId := def.fieldType.TypeId() - // Check if field is an enum - either by type ID or by serializer type - // The type ID may be a composite value with namespace bits, so check the low 8 bits - internalDefTypeId := defTypeId & 0xFF - isEnumField := internalDefTypeId == NAMED_ENUM || internalDefTypeId == ENUM - if !isEnumField && fieldSerializer != nil { - _, isEnumField = fieldSerializer.(*enumSerializer) - } - if isPolymorphicField && localType.Kind() == reflect.Interface { - // For polymorphic (UNKNOWN) fields with interface{} local type, - // allow reading - the actual type will be determined at runtime - shouldRead = true - fieldType = localType - } else if typeLookupFailed && isEnumField { - // For enum fields with failed TypeDef lookup (NAMED_ENUM stores by namespace/typename, not typeId), - // check if local field is a numeric type (Go enums are int-based) - // Also handle pointer enum fields (*EnumType) - localKind := localType.Kind() - elemKind := localKind - if localKind == reflect.Ptr { - elemKind = localType.Elem().Kind() + for _, field := range s.fieldGroup.FixedFields { + fieldPtr := unsafe.Add(ptr, field.Offset) + bufOffset := baseOffset + field.WriteOffset + switch field.DispatchId { + case PrimitiveBoolDispatchId: + *(*bool)(fieldPtr) = data[bufOffset] != 0 + case PrimitiveInt8DispatchId: + *(*int8)(fieldPtr) = int8(data[bufOffset]) + case PrimitiveUint8DispatchId: + *(*uint8)(fieldPtr) = data[bufOffset] + case PrimitiveInt16DispatchId: + if isLittleEndian { + *(*int16)(fieldPtr) = *(*int16)(unsafe.Pointer(&data[bufOffset])) + } else { + *(*int16)(fieldPtr) = int16(binary.LittleEndian.Uint16(data[bufOffset:])) } - if isNumericKind(elemKind) { - shouldRead = true - fieldType = localType - // Get the serializer for the base type (the enum type, not the pointer) - baseType := localType - if localKind == reflect.Ptr { - baseType = localType.Elem() - } - fieldSerializer, _ = typeResolver.getSerializerByType(baseType, true) + case PrimitiveUint16DispatchId: + if isLittleEndian { + *(*uint16)(fieldPtr) = *(*uint16)(unsafe.Pointer(&data[bufOffset])) + } else { + *(*uint16)(fieldPtr) = binary.LittleEndian.Uint16(data[bufOffset:]) } - } else if typeLookupFailed && isStructLikeField { - // For struct fields with failed TypeDef lookup, check if local field can hold a struct - localKind := localType.Kind() - if localKind == reflect.Ptr { - localKind = localType.Elem().Kind() + case PrimitiveInt32DispatchId: + if isLittleEndian { + *(*int32)(fieldPtr) = *(*int32)(unsafe.Pointer(&data[bufOffset])) + } else { + *(*int32)(fieldPtr) = int32(binary.LittleEndian.Uint32(data[bufOffset:])) } - if localKind == reflect.Struct || localKind == reflect.Interface { - shouldRead = true - fieldType = localType // Use local type for struct fields + case PrimitiveUint32DispatchId: + if isLittleEndian { + *(*uint32)(fieldPtr) = *(*uint32)(unsafe.Pointer(&data[bufOffset])) + } else { + *(*uint32)(fieldPtr) = binary.LittleEndian.Uint32(data[bufOffset:]) } - } else if typeLookupFailed && (defTypeId == LIST || defTypeId == SET) { - // For collection fields with failed type lookup (e.g., List with interface element type), - // check if local type is a slice with interface element type (e.g., []Animal) - // The type lookup fails because sliceSerializer doesn't support interface elements - if localType.Kind() == reflect.Slice && localType.Elem().Kind() == reflect.Interface { - shouldRead = true - fieldType = localType + case PrimitiveInt64DispatchId: + if isLittleEndian { + *(*int64)(fieldPtr) = *(*int64)(unsafe.Pointer(&data[bufOffset])) + } else { + *(*int64)(fieldPtr) = int64(binary.LittleEndian.Uint64(data[bufOffset:])) } - } else if !typeLookupFailed && typesCompatible(localType, remoteType) { - shouldRead = true - fieldType = localType - } - - if shouldRead { - fieldIndex = idx - // offset was already set above when matching by tag ID or field name - // For struct-like fields with failed type lookup, get the serializer for the local type - if typeLookupFailed && isStructLikeField && fieldSerializer == nil { - fieldSerializer, _ = typeResolver.getSerializerByType(localType, true) + case PrimitiveUint64DispatchId: + if isLittleEndian { + *(*uint64)(fieldPtr) = *(*uint64)(unsafe.Pointer(&data[bufOffset])) + } else { + *(*uint64)(fieldPtr) = binary.LittleEndian.Uint64(data[bufOffset:]) } - // For collection fields with interface element types, use sliceDynSerializer - if typeLookupFailed && (defTypeId == LIST || defTypeId == SET) && fieldSerializer == nil { - if localType.Kind() == reflect.Slice && localType.Elem().Kind() == reflect.Interface { - fieldSerializer = mustNewSliceDynSerializer(localType.Elem()) - } + case PrimitiveFloat32DispatchId: + if isLittleEndian { + *(*float32)(fieldPtr) = *(*float32)(unsafe.Pointer(&data[bufOffset])) + } else { + *(*float32)(fieldPtr) = math.Float32frombits(binary.LittleEndian.Uint32(data[bufOffset:])) } - // If local type is *T and remote type is T, we need the serializer for *T - // This handles Java's Integer/Long (nullable boxed types) mapping to Go's *int32/*int64 - if localType.Kind() == reflect.Ptr && localType.Elem() == remoteType { - fieldSerializer, _ = typeResolver.getSerializerByType(localType, true) + case PrimitiveFloat64DispatchId: + if isLittleEndian { + *(*float64)(fieldPtr) = *(*float64)(unsafe.Pointer(&data[bufOffset])) + } else { + *(*float64)(fieldPtr) = math.Float64frombits(binary.LittleEndian.Uint64(data[bufOffset:])) } - // For pointer enum fields (*EnumType), get the serializer for the base enum type - // The struct read/write code will handle pointer dereferencing - if isEnumField && localType.Kind() == reflect.Ptr { - baseType := localType.Elem() - fieldSerializer, _ = typeResolver.getSerializerByType(baseType, true) - if DebugOutputEnabled() { - fmt.Printf("[fory-debug] pointer enum field %s: localType=%v baseType=%v serializer=%T\n", - def.name, localType, baseType, fieldSerializer) - } + // Notnull pointer types - allocate and set pointer + case NotnullBoolPtrDispatchId: + v := new(bool) + *v = data[bufOffset] != 0 + *(**bool)(fieldPtr) = v + case NotnullInt8PtrDispatchId: + v := new(int8) + *v = int8(data[bufOffset]) + *(**int8)(fieldPtr) = v + case NotnullUint8PtrDispatchId: + v := new(uint8) + *v = data[bufOffset] + *(**uint8)(fieldPtr) = v + case NotnullInt16PtrDispatchId: + v := new(int16) + if isLittleEndian { + *v = *(*int16)(unsafe.Pointer(&data[bufOffset])) + } else { + *v = int16(binary.LittleEndian.Uint16(data[bufOffset:])) } - // For array fields, use array serializers (not slice serializers) even if typeID maps to slice serializer - // The typeID (INT16_ARRAY, etc.) is shared between arrays and slices, but we need the correct - // serializer based on the actual Go type - if localType.Kind() == reflect.Array { - elemType := localType.Elem() - switch elemType.Kind() { - case reflect.Bool: - fieldSerializer = boolArraySerializer{arrayType: localType} - case reflect.Int8: - fieldSerializer = int8ArraySerializer{arrayType: localType} - case reflect.Int16: - fieldSerializer = int16ArraySerializer{arrayType: localType} - case reflect.Int32: - fieldSerializer = int32ArraySerializer{arrayType: localType} - case reflect.Int64: - fieldSerializer = int64ArraySerializer{arrayType: localType} - case reflect.Uint8: - fieldSerializer = uint8ArraySerializer{arrayType: localType} - case reflect.Float32: - fieldSerializer = float32ArraySerializer{arrayType: localType} - case reflect.Float64: - fieldSerializer = float64ArraySerializer{arrayType: localType} - case reflect.Int: - if reflect.TypeOf(int(0)).Size() == 8 { - fieldSerializer = int64ArraySerializer{arrayType: localType} - } else { - fieldSerializer = int32ArraySerializer{arrayType: localType} - } - } + *(**int16)(fieldPtr) = v + case NotnullUint16PtrDispatchId: + v := new(uint16) + if isLittleEndian { + *v = *(*uint16)(unsafe.Pointer(&data[bufOffset])) + } else { + *v = binary.LittleEndian.Uint16(data[bufOffset:]) } - } else { - // Types are incompatible or unknown - use remote type but mark field as not settable - fieldType = remoteType - fieldIndex = -1 - offset = 0 // Don't set offset for incompatible fields - } - } else { - // Field doesn't exist locally, use type from fieldDef - fieldType = remoteType - } - - // Get TypeId from FieldType's TypeId method - fieldTypeId := def.fieldType.TypeId() - // Pre-compute RefMode based on FieldDef flags (trackingRef and nullable) - refMode := RefModeNone - if def.trackingRef { - refMode = RefModeTracking - } else if def.nullable { - refMode = RefModeNullOnly - } - // Pre-compute WriteType: true for struct fields in compatible mode - writeType := typeResolver.Compatible() && isStructField(fieldType) - - // Pre-compute StaticId, with special handling for enum fields - staticId := GetStaticTypeId(fieldType) - if fieldSerializer != nil { - if _, ok := fieldSerializer.(*enumSerializer); ok { - staticId = ConcreteTypeEnum - } else if ptrSer, ok := fieldSerializer.(*ptrToValueSerializer); ok { - if _, ok := ptrSer.valueSerializer.(*enumSerializer); ok { - staticId = ConcreteTypeEnum + *(**uint16)(fieldPtr) = v + case NotnullInt32PtrDispatchId: + v := new(int32) + if isLittleEndian { + *v = *(*int32)(unsafe.Pointer(&data[bufOffset])) + } else { + *v = int32(binary.LittleEndian.Uint32(data[bufOffset:])) + } + *(**int32)(fieldPtr) = v + case NotnullUint32PtrDispatchId: + v := new(uint32) + if isLittleEndian { + *v = *(*uint32)(unsafe.Pointer(&data[bufOffset])) + } else { + *v = binary.LittleEndian.Uint32(data[bufOffset:]) + } + *(**uint32)(fieldPtr) = v + case NotnullInt64PtrDispatchId: + v := new(int64) + if isLittleEndian { + *v = *(*int64)(unsafe.Pointer(&data[bufOffset])) + } else { + *v = int64(binary.LittleEndian.Uint64(data[bufOffset:])) + } + *(**int64)(fieldPtr) = v + case NotnullUint64PtrDispatchId: + v := new(uint64) + if isLittleEndian { + *v = *(*uint64)(unsafe.Pointer(&data[bufOffset])) + } else { + *v = binary.LittleEndian.Uint64(data[bufOffset:]) + } + *(**uint64)(fieldPtr) = v + case NotnullFloat32PtrDispatchId: + v := new(float32) + if isLittleEndian { + *v = *(*float32)(unsafe.Pointer(&data[bufOffset])) + } else { + *v = math.Float32frombits(binary.LittleEndian.Uint32(data[bufOffset:])) + } + *(**float32)(fieldPtr) = v + case NotnullFloat64PtrDispatchId: + v := new(float64) + if isLittleEndian { + *v = *(*float64)(unsafe.Pointer(&data[bufOffset])) + } else { + *v = math.Float64frombits(binary.LittleEndian.Uint64(data[bufOffset:])) } + *(**float64)(fieldPtr) = v } } - - // Determine field name: use local field name if matched, otherwise use def.name - fieldName := def.name - if localFieldName != "" { - fieldName = localFieldName - } - - fieldInfo := &FieldInfo{ - Name: fieldName, - Offset: offset, - Type: fieldType, - StaticId: staticId, - TypeId: fieldTypeId, - Serializer: fieldSerializer, - Referencable: def.nullable, // Use remote nullable flag - FieldIndex: fieldIndex, - FieldDef: def, // Save original FieldDef for skipping - RefMode: refMode, - WriteType: writeType, - HasGenerics: isCollectionType(fieldTypeId), // Container fields have declared element types - TagID: def.tagID, - HasForyTag: def.tagID >= 0, - } - fields = append(fields, fieldInfo) + // Update reader index ONCE after all fixed fields + buf.SetReaderIndex(baseOffset + s.fieldGroup.FixedSize) } - s.fields = fields - s.groupFields() - - // Compute typeDefDiffers: true if any field doesn't exist locally or has type mismatch - // When typeDefDiffers is false, we can use grouped reading for better performance - s.typeDefDiffers = false - for _, field := range fields { - if field.FieldIndex < 0 { - // Field exists in remote TypeDef but not locally - s.typeDefDiffers = true - break + // Phase 2: Varint primitives (must read sequentially - variable length) + // Note: For tagged int64/uint64, we can't use unsafe reads because they need bounds checking + if len(s.fieldGroup.VarintFields) > 0 { + err := ctx.Err() + for _, field := range s.fieldGroup.VarintFields { + fieldPtr := unsafe.Add(ptr, field.Offset) + switch field.DispatchId { + case PrimitiveVarint32DispatchId: + *(*int32)(fieldPtr) = buf.ReadVarint32(err) + case PrimitiveVarint64DispatchId: + *(*int64)(fieldPtr) = buf.ReadVarint64(err) + case PrimitiveIntDispatchId: + *(*int)(fieldPtr) = int(buf.ReadVarint64(err)) + case PrimitiveVarUint32DispatchId: + *(*uint32)(fieldPtr) = buf.ReadVaruint32(err) + case PrimitiveVarUint64DispatchId: + *(*uint64)(fieldPtr) = buf.ReadVaruint64(err) + case PrimitiveUintDispatchId: + *(*uint)(fieldPtr) = uint(buf.ReadVaruint64(err)) + case PrimitiveTaggedInt64DispatchId: + // Tagged INT64: use buffer's tagged decoding (4 bytes for small, 9 for large) + *(*int64)(fieldPtr) = buf.ReadTaggedInt64(err) + case PrimitiveTaggedUint64DispatchId: + // Tagged UINT64: use buffer's tagged decoding (4 bytes for small, 9 for large) + *(*uint64)(fieldPtr) = buf.ReadTaggedUint64(err) + // Notnull pointer types - allocate and set pointer + case NotnullVarint32PtrDispatchId: + v := new(int32) + *v = buf.ReadVarint32(err) + *(**int32)(fieldPtr) = v + case NotnullVarint64PtrDispatchId: + v := new(int64) + *v = buf.ReadVarint64(err) + *(**int64)(fieldPtr) = v + case NotnullIntPtrDispatchId: + v := new(int) + *v = int(buf.ReadVarint64(err)) + *(**int)(fieldPtr) = v + case NotnullVarUint32PtrDispatchId: + v := new(uint32) + *v = buf.ReadVaruint32(err) + *(**uint32)(fieldPtr) = v + case NotnullVarUint64PtrDispatchId: + v := new(uint64) + *v = buf.ReadVaruint64(err) + *(**uint64)(fieldPtr) = v + case NotnullUintPtrDispatchId: + v := new(uint) + *v = uint(buf.ReadVaruint64(err)) + *(**uint)(fieldPtr) = v + case NotnullTaggedInt64PtrDispatchId: + v := new(int64) + *v = buf.ReadTaggedInt64(err) + *(**int64)(fieldPtr) = v + case NotnullTaggedUint64PtrDispatchId: + v := new(uint64) + *v = buf.ReadTaggedUint64(err) + *(**uint64)(fieldPtr) = v + } } } - return nil -} - -// isNonNullablePrimitiveKind returns true for Go kinds that map to Java primitive types -// These are the types that cannot be null in Java and should have nullable=0 in hash computation -func isNonNullablePrimitiveKind(kind reflect.Kind) bool { - switch kind { - case reflect.Bool, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, - reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, - reflect.Float32, reflect.Float64, reflect.Int, reflect.Uint: - return true - default: - return false + // Phase 3: Remaining fields (strings, slices, maps, structs, enums) + // No intermediate error checks - trade error path performance for normal path + for i := range s.fieldGroup.RemainingFields { + s.readRemainingField(ctx, ptr, &s.fieldGroup.RemainingFields[i], value) } } -// isInternalTypeWithoutTypeMeta checks if a type is serialized without type meta per xlang spec. -// Per the spec (struct field serialization), these types use format: | ref/null flag | value data | (NO type meta) -// - Nullable primitives (*int32, *float64, etc.): | null flag | field value | -// - Strings (string): | null flag | value data | -// - Binary ([]byte): | null flag | value data | -// - List/Slice: | ref meta | value data | -// - Set: | ref meta | value data | -// - Map: | ref meta | value data | -// Only struct/enum/ext types need type meta: | ref flag | type meta | value data | -func isInternalTypeWithoutTypeMeta(t reflect.Type) bool { - kind := t.Kind() - // String type - no type meta needed - if kind == reflect.String { - return true - } - // Slice (list or byte slice) - no type meta needed - if kind == reflect.Slice { - return true - } - // Map type - no type meta needed - if kind == reflect.Map { - return true - } - // Pointer to primitive - no type meta needed - if kind == reflect.Ptr { - elemKind := t.Elem().Kind() - switch elemKind { - case reflect.Bool, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, - reflect.Int, reflect.Float32, reflect.Float64, reflect.String: - return true +// readRemainingField reads a non-primitive field (string, slice, map, struct, enum) +func (s *structSerializer) readRemainingField(ctx *ReadContext, ptr unsafe.Pointer, field *FieldInfo, value reflect.Value) { + buf := ctx.Buffer() + ctxErr := ctx.Err() + // Fast path dispatch using pre-computed DispatchId + // ptr must be valid (addressable value) + if ptr != nil { + fieldPtr := unsafe.Add(ptr, field.Offset) + switch field.DispatchId { + case StringDispatchId: + // Check isPtr first for better branch prediction + if !field.IsPtr { + // Non-pointer string: no ref tracking needed in fast path + if field.RefMode == RefModeNone { + *(*string)(fieldPtr) = ctx.ReadString() + } else { + // RefModeNullOnly or RefModeTracking: read NotNull flag then string + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + *(*string)(fieldPtr) = "" + } else { + *(*string)(fieldPtr) = ctx.ReadString() + } + } + return + } + // Pointer to string: can be nil, may need ref tracking + if field.RefMode == RefModeTracking { + break // Fall through to slow path for ref tracking + } + if field.RefMode == RefModeNullOnly { + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + // Leave as nil + return + } + } + // Allocate new string and store pointer + str := ctx.ReadString() + sp := new(string) + *sp = str + *(**string)(fieldPtr) = sp + return + case EnumDispatchId: + // Enums don't track refs - always use fast path + fieldValue := value.Field(field.FieldIndex) + readEnumField(ctx, field, fieldValue) + return + case StringSliceDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*[]string)(fieldPtr) = ctx.ReadStringSlice(field.RefMode, false) + return + case BoolSliceDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*[]bool)(fieldPtr) = ctx.ReadBoolSlice(field.RefMode, false) + return + case Int8SliceDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*[]int8)(fieldPtr) = ctx.ReadInt8Slice(field.RefMode, false) + return + case ByteSliceDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*[]byte)(fieldPtr) = ctx.ReadByteSlice(field.RefMode, false) + return + case Int16SliceDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*[]int16)(fieldPtr) = ctx.ReadInt16Slice(field.RefMode, false) + return + case Int32SliceDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*[]int32)(fieldPtr) = ctx.ReadInt32Slice(field.RefMode, false) + return + case Int64SliceDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*[]int64)(fieldPtr) = ctx.ReadInt64Slice(field.RefMode, false) + return + case IntSliceDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*[]int)(fieldPtr) = ctx.ReadIntSlice(field.RefMode, false) + return + case UintSliceDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*[]uint)(fieldPtr) = ctx.ReadUintSlice(field.RefMode, false) + return + case Float32SliceDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*[]float32)(fieldPtr) = ctx.ReadFloat32Slice(field.RefMode, false) + return + case Float64SliceDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*[]float64)(fieldPtr) = ctx.ReadFloat64Slice(field.RefMode, false) + return + case StringStringMapDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*map[string]string)(fieldPtr) = ctx.ReadStringStringMap(field.RefMode, false) + return + case StringInt64MapDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*map[string]int64)(fieldPtr) = ctx.ReadStringInt64Map(field.RefMode, false) + return + case StringInt32MapDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*map[string]int32)(fieldPtr) = ctx.ReadStringInt32Map(field.RefMode, false) + return + case StringIntMapDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*map[string]int)(fieldPtr) = ctx.ReadStringIntMap(field.RefMode, false) + return + case StringFloat64MapDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*map[string]float64)(fieldPtr) = ctx.ReadStringFloat64Map(field.RefMode, false) + return + case StringBoolMapDispatchId: + // NOTE: map[string]bool is used to represent SETs in Go xlang mode. + // We CANNOT use the fast path here because it reads MAP format, + // but the data is actually in SET format. Fall through to slow path + // which uses setSerializer to correctly read the SET format. + break + case IntIntMapDispatchId: + if field.RefMode == RefModeTracking { + break + } + *(*map[int]int)(fieldPtr) = ctx.ReadIntIntMap(field.RefMode, false) + return + case NullableTaggedInt64DispatchId: + // Nullable tagged INT64: read ref flag, then tagged encoding + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + // Leave pointer as nil + return + } + // Allocate new int64 and store pointer + v := new(int64) + *v = buf.ReadTaggedInt64(ctxErr) + *(**int64)(fieldPtr) = v + return + case NullableTaggedUint64DispatchId: + // Nullable tagged UINT64: read ref flag, then tagged encoding + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + // Leave pointer as nil + return + } + // Allocate new uint64 and store pointer + v := new(uint64) + *v = buf.ReadTaggedUint64(ctxErr) + *(**uint64)(fieldPtr) = v + return + // Nullable fixed-size types + case NullableBoolDispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(bool) + *v = buf.ReadBool(ctxErr) + *(**bool)(fieldPtr) = v + return + case NullableInt8DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(int8) + *v = buf.ReadInt8(ctxErr) + *(**int8)(fieldPtr) = v + return + case NullableUint8DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(uint8) + *v = buf.ReadUint8(ctxErr) + *(**uint8)(fieldPtr) = v + return + case NullableInt16DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(int16) + *v = buf.ReadInt16(ctxErr) + *(**int16)(fieldPtr) = v + return + case NullableUint16DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(uint16) + *v = buf.ReadUint16(ctxErr) + *(**uint16)(fieldPtr) = v + return + case NullableInt32DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(int32) + *v = buf.ReadInt32(ctxErr) + *(**int32)(fieldPtr) = v + return + case NullableUint32DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(uint32) + *v = buf.ReadUint32(ctxErr) + *(**uint32)(fieldPtr) = v + return + case NullableInt64DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(int64) + *v = buf.ReadInt64(ctxErr) + *(**int64)(fieldPtr) = v + return + case NullableUint64DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(uint64) + *v = buf.ReadUint64(ctxErr) + *(**uint64)(fieldPtr) = v + return + case NullableFloat32DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(float32) + *v = buf.ReadFloat32(ctxErr) + *(**float32)(fieldPtr) = v + return + case NullableFloat64DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(float64) + *v = buf.ReadFloat64(ctxErr) + *(**float64)(fieldPtr) = v + return + // Nullable varint types + case NullableVarint32DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(int32) + *v = buf.ReadVarint32(ctxErr) + *(**int32)(fieldPtr) = v + return + case NullableVarUint32DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(uint32) + *v = buf.ReadVaruint32(ctxErr) + *(**uint32)(fieldPtr) = v + return + case NullableVarint64DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(int64) + *v = buf.ReadVarint64(ctxErr) + *(**int64)(fieldPtr) = v + return + case NullableVarUint64DispatchId: + refFlag := buf.ReadInt8(ctxErr) + if refFlag == NullFlag { + return + } + v := new(uint64) + *v = buf.ReadVaruint64(ctxErr) + *(**uint64)(fieldPtr) = v + return } } - return false -} -// isStructField checks if a type is a struct type (directly or via pointer) -func isStructField(t reflect.Type) bool { - if t.Kind() == reflect.Struct { - return true - } - if t.Kind() == reflect.Ptr && t.Elem().Kind() == reflect.Struct { - return true + // Slow path for RefModeTracking cases that break from the switch above + fieldValue := value.Field(field.FieldIndex) + if field.Serializer != nil { + field.Serializer.Read(ctx, field.RefMode, field.WriteType, field.HasGenerics, fieldValue) + } else { + ctx.ReadValue(fieldValue, RefModeTracking, true) } - return false } -// isStructFieldType checks if a FieldType represents a type that needs type info written -// This is used to determine if type info was written for the field in compatible mode -// In compatible mode, Java writes type info for struct and ext types, but NOT for enum types -// Enum fields only have null flag + ordinal, no type ID -func isStructFieldType(ft FieldType) bool { - if ft == nil { - return false - } - typeId := ft.TypeId() - // Check base type IDs that need type info (struct and ext, NOT enum) - // Always check the internal type ID (low byte) to handle composite type IDs - // which may be negative when stored as int32 (e.g., -2288 = (short)128784) - internalTypeId := TypeId(typeId & 0xFF) - switch internalTypeId { - case STRUCT, NAMED_STRUCT, COMPATIBLE_STRUCT, NAMED_COMPATIBLE_STRUCT, - EXT, NAMED_EXT: - return true - } - return false -} +// readFieldsInOrder reads fields in the order they appear in s.fields (TypeDef order) +// This is used in compatible mode where Java writes fields in TypeDef order +// Precondition: value.CanAddr() must be true (checked by caller) +func (s *structSerializer) readFieldsInOrder(ctx *ReadContext, value reflect.Value) { + buf := ctx.Buffer() + ptr := unsafe.Pointer(value.UnsafeAddr()) + err := ctx.Err() + for i := range s.fields { + field := &s.fields[i] + if field.FieldIndex < 0 { + s.skipField(ctx, field) + if ctx.HasError() { + return + } + continue + } -// FieldFingerprintInfo contains the information needed to compute a field's fingerprint. -type FieldFingerprintInfo struct { - // FieldID is the tag ID if configured (>= 0), or -1 to use field name - FieldID int - // FieldName is the snake_case field name (used when FieldID < 0) - FieldName string - // TypeID is the Fory type ID for the field - TypeID TypeId - // Ref is true if reference tracking is enabled for this field - Ref bool - // Nullable is true if null flag is written for this field - Nullable bool -} + // Fast path for fixed-size primitive types (no ref flag from remote schema) + if isFixedSizePrimitive(field.DispatchId, field.Nullable) { + fieldPtr := unsafe.Add(ptr, field.Offset) + switch field.DispatchId { + // PrimitiveXxxDispatchId: local field is non-pointer type + case PrimitiveBoolDispatchId: + *(*bool)(fieldPtr) = buf.ReadBool(err) + case PrimitiveInt8DispatchId: + *(*int8)(fieldPtr) = buf.ReadInt8(err) + case PrimitiveUint8DispatchId: + *(*uint8)(fieldPtr) = uint8(buf.ReadInt8(err)) + case PrimitiveInt16DispatchId: + *(*int16)(fieldPtr) = buf.ReadInt16(err) + case PrimitiveUint16DispatchId: + *(*uint16)(fieldPtr) = buf.ReadUint16(err) + case PrimitiveInt32DispatchId: + *(*int32)(fieldPtr) = buf.ReadInt32(err) + case PrimitiveUint32DispatchId: + *(*uint32)(fieldPtr) = buf.ReadUint32(err) + case PrimitiveInt64DispatchId: + *(*int64)(fieldPtr) = buf.ReadInt64(err) + case PrimitiveUint64DispatchId: + *(*uint64)(fieldPtr) = buf.ReadUint64(err) + case PrimitiveFloat32DispatchId: + *(*float32)(fieldPtr) = buf.ReadFloat32(err) + case PrimitiveFloat64DispatchId: + *(*float64)(fieldPtr) = buf.ReadFloat64(err) + // NotnullXxxPtrDispatchId: local field is *T with nullable=false + case NotnullBoolPtrDispatchId: + v := new(bool) + *v = buf.ReadBool(err) + *(**bool)(fieldPtr) = v + case NotnullInt8PtrDispatchId: + v := new(int8) + *v = buf.ReadInt8(err) + *(**int8)(fieldPtr) = v + case NotnullUint8PtrDispatchId: + v := new(uint8) + *v = uint8(buf.ReadInt8(err)) + *(**uint8)(fieldPtr) = v + case NotnullInt16PtrDispatchId: + v := new(int16) + *v = buf.ReadInt16(err) + *(**int16)(fieldPtr) = v + case NotnullUint16PtrDispatchId: + v := new(uint16) + *v = buf.ReadUint16(err) + *(**uint16)(fieldPtr) = v + case NotnullInt32PtrDispatchId: + v := new(int32) + *v = buf.ReadInt32(err) + *(**int32)(fieldPtr) = v + case NotnullUint32PtrDispatchId: + v := new(uint32) + *v = buf.ReadUint32(err) + *(**uint32)(fieldPtr) = v + case NotnullInt64PtrDispatchId: + v := new(int64) + *v = buf.ReadInt64(err) + *(**int64)(fieldPtr) = v + case NotnullUint64PtrDispatchId: + v := new(uint64) + *v = buf.ReadUint64(err) + *(**uint64)(fieldPtr) = v + case NotnullFloat32PtrDispatchId: + v := new(float32) + *v = buf.ReadFloat32(err) + *(**float32)(fieldPtr) = v + case NotnullFloat64PtrDispatchId: + v := new(float64) + *v = buf.ReadFloat64(err) + *(**float64)(fieldPtr) = v + } + continue + } -// ComputeStructFingerprint computes the fingerprint string for a struct type. -// -// Fingerprint Format: -// -// Each field contributes: ",,,;" -// Fields are sorted by field_id_or_name (lexicographically as strings) -// -// Field Components: -// - field_id_or_name: Tag ID as string if configured (e.g., "0", "1"), otherwise snake_case field name -// - type_id: Fory TypeId as decimal string (e.g., "4" for INT32) -// - ref: "1" if reference tracking enabled, "0" otherwise -// - nullable: "1" if null flag is written, "0" otherwise -// -// Example fingerprints: -// - With tag IDs: "0,4,0,0;1,4,0,1;2,9,0,1;" -// - With field names: "age,4,0,0;name,9,0,1;" -// -// The fingerprint is used to compute a hash for struct schema versioning. -// Different nullable/ref settings will produce different fingerprints, -// ensuring schema compatibility is properly validated. -func ComputeStructFingerprint(fields []FieldFingerprintInfo) string { - // Sort fields by their identifier (field ID or name) - type fieldWithKey struct { - field FieldFingerprintInfo - sortKey string - } - fieldsWithKeys := make([]fieldWithKey, 0, len(fields)) - for _, field := range fields { - var sortKey string - if field.FieldID >= 0 { - sortKey = fmt.Sprintf("%d", field.FieldID) - } else { - sortKey = field.FieldName + // Fast path for varint primitive types (no ref flag from remote schema) + if isVarintPrimitive(field.DispatchId, field.Nullable) && !fieldHasNonPrimitiveSerializer(field) { + fieldPtr := unsafe.Add(ptr, field.Offset) + switch field.DispatchId { + // PrimitiveXxxDispatchId: local field is non-pointer type + case PrimitiveVarint32DispatchId: + *(*int32)(fieldPtr) = buf.ReadVarint32(err) + case PrimitiveVarint64DispatchId: + *(*int64)(fieldPtr) = buf.ReadVarint64(err) + case PrimitiveVarUint32DispatchId: + *(*uint32)(fieldPtr) = buf.ReadVaruint32(err) + case PrimitiveVarUint64DispatchId: + *(*uint64)(fieldPtr) = buf.ReadVaruint64(err) + case PrimitiveTaggedInt64DispatchId: + *(*int64)(fieldPtr) = buf.ReadTaggedInt64(err) + case PrimitiveTaggedUint64DispatchId: + *(*uint64)(fieldPtr) = buf.ReadTaggedUint64(err) + case PrimitiveIntDispatchId: + *(*int)(fieldPtr) = int(buf.ReadVarint64(err)) + case PrimitiveUintDispatchId: + *(*uint)(fieldPtr) = uint(buf.ReadVaruint64(err)) + // NotnullXxxPtrDispatchId: local field is *T with nullable=false + case NotnullVarint32PtrDispatchId: + v := new(int32) + *v = buf.ReadVarint32(err) + *(**int32)(fieldPtr) = v + case NotnullVarint64PtrDispatchId: + v := new(int64) + *v = buf.ReadVarint64(err) + *(**int64)(fieldPtr) = v + case NotnullVarUint32PtrDispatchId: + v := new(uint32) + *v = buf.ReadVaruint32(err) + *(**uint32)(fieldPtr) = v + case NotnullVarUint64PtrDispatchId: + v := new(uint64) + *v = buf.ReadVaruint64(err) + *(**uint64)(fieldPtr) = v + case NotnullTaggedInt64PtrDispatchId: + v := new(int64) + *v = buf.ReadTaggedInt64(err) + *(**int64)(fieldPtr) = v + case NotnullTaggedUint64PtrDispatchId: + v := new(uint64) + *v = buf.ReadTaggedUint64(err) + *(**uint64)(fieldPtr) = v + case NotnullIntPtrDispatchId: + v := new(int) + *v = int(buf.ReadVarint64(err)) + *(**int)(fieldPtr) = v + case NotnullUintPtrDispatchId: + v := new(uint) + *v = uint(buf.ReadVaruint64(err)) + *(**uint)(fieldPtr) = v + } + continue } - fieldsWithKeys = append(fieldsWithKeys, fieldWithKey{field: field, sortKey: sortKey}) - } - - sort.Slice(fieldsWithKeys, func(i, j int) bool { - return fieldsWithKeys[i].sortKey < fieldsWithKeys[j].sortKey - }) - var sb strings.Builder - for _, fw := range fieldsWithKeys { - // Field identifier - sb.WriteString(fw.sortKey) - sb.WriteString(",") - // Type ID - sb.WriteString(fmt.Sprintf("%d", fw.field.TypeID)) - sb.WriteString(",") - // Ref flag - if fw.field.Ref { - sb.WriteString("1") - } else { - sb.WriteString("0") - } - sb.WriteString(",") - // Nullable flag - if fw.field.Nullable { - sb.WriteString("1") - } else { - sb.WriteString("0") - } - sb.WriteString(";") - } - return sb.String() -} + // Get field value for nullable primitives and non-primitives + fieldValue := value.Field(field.FieldIndex) -func (s *structSerializer) computeHash() int32 { - // Build FieldFingerprintInfo for each field - fields := make([]FieldFingerprintInfo, 0, len(s.fields)) - for _, field := range s.fields { - var typeId TypeId - isEnumField := false - if field.Serializer == nil { - typeId = UNKNOWN - } else { - typeId = field.TypeId - // Check if this is an enum serializer (directly or wrapped in ptrToValueSerializer) - if _, ok := field.Serializer.(*enumSerializer); ok { - isEnumField = true - typeId = UNKNOWN - } else if ptrSer, ok := field.Serializer.(*ptrToValueSerializer); ok { - if _, ok := ptrSer.valueSerializer.(*enumSerializer); ok { - isEnumField = true - typeId = UNKNOWN + // Handle nullable fixed-size primitives (read ref flag + fixed bytes) + // These have Nullable=true but use fixed encoding, not varint + if isNullableFixedSizePrimitive(field.DispatchId) { + refFlag := buf.ReadInt8(err) + if refFlag == NullFlag { + // Leave pointer as nil (or zero for non-pointer local types) + continue + } + // Read fixed-size value based on dispatch ID + // Handle both pointer and non-pointer local field types (schema evolution) + switch field.DispatchId { + case NullableBoolDispatchId: + v := buf.ReadBool(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetBool(v) } - } - // For user-defined types (struct, ext types), use UNKNOWN in fingerprint - // This matches Java's behavior where user-defined types return UNKNOWN - // to ensure consistent fingerprint computation across languages - if isUserDefinedType(int16(typeId)) { - typeId = UNKNOWN - } - // For fixed-size arrays with primitive elements, use primitive array type IDs - if field.Type.Kind() == reflect.Array { - elemKind := field.Type.Elem().Kind() - switch elemKind { - case reflect.Int8: - typeId = INT8_ARRAY - case reflect.Int16: - typeId = INT16_ARRAY - case reflect.Int32: - typeId = INT32_ARRAY - case reflect.Int64: - typeId = INT64_ARRAY - case reflect.Float32: - typeId = FLOAT32_ARRAY - case reflect.Float64: - typeId = FLOAT64_ARRAY - default: - typeId = LIST + case NullableInt8DispatchId: + v := buf.ReadInt8(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetInt(int64(v)) } - } else if field.Type.Kind() == reflect.Slice { - typeId = LIST - } else if field.Type.Kind() == reflect.Map { - // map[T]bool is used to represent a Set in Go - if field.Type.Elem().Kind() == reflect.Bool { - typeId = SET + case NullableUint8DispatchId: + v := uint8(buf.ReadInt8(err)) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) } else { - typeId = MAP + fieldValue.SetUint(uint64(v)) + } + case NullableInt16DispatchId: + v := buf.ReadInt16(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetInt(int64(v)) + } + case NullableUint16DispatchId: + v := buf.ReadUint16(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetUint(uint64(v)) + } + case NullableInt32DispatchId: + v := buf.ReadInt32(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetInt(int64(v)) + } + case NullableUint32DispatchId: + v := buf.ReadUint32(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetUint(uint64(v)) + } + case NullableInt64DispatchId: + v := buf.ReadInt64(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetInt(v) + } + case NullableUint64DispatchId: + v := buf.ReadUint64(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetUint(v) + } + case NullableFloat32DispatchId: + v := buf.ReadFloat32(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetFloat(float64(v)) + } + case NullableFloat64DispatchId: + v := buf.ReadFloat64(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetFloat(v) } } + continue } - // Determine nullable flag for xlang compatibility: - // - Default: false for ALL fields (xlang default - aligned with all languages) - // - Primitives are always non-nullable - // - Can be overridden by explicit fory tag - nullable := false // Default to nullable=false for xlang mode - if field.TagNullableSet { - // Use explicit tag value if set - nullable = field.TagNullable + // Handle nullable varint primitives (read ref flag + varint) + if isNullableVarintPrimitive(field.DispatchId) { + refFlag := buf.ReadInt8(err) + if refFlag == NullFlag { + // Leave pointer as nil (or zero for non-pointer local types) + continue + } + // Read varint value based on dispatch ID + // Handle both pointer and non-pointer local field types (schema evolution) + switch field.DispatchId { + case NullableVarint32DispatchId: + v := buf.ReadVarint32(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetInt(int64(v)) + } + case NullableVarint64DispatchId: + v := buf.ReadVarint64(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetInt(v) + } + case NullableVarUint32DispatchId: + v := buf.ReadVaruint32(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetUint(uint64(v)) + } + case NullableVarUint64DispatchId: + v := buf.ReadVaruint64(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetUint(v) + } + case NullableTaggedInt64DispatchId: + v := buf.ReadTaggedInt64(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetInt(v) + } + case NullableTaggedUint64DispatchId: + v := buf.ReadTaggedUint64(err) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetUint(v) + } + case NullableIntDispatchId: + v := int(buf.ReadVarint64(err)) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetInt(int64(v)) + } + case NullableUintDispatchId: + v := uint(buf.ReadVaruint64(err)) + if field.IsPtr { + fieldValue.Set(reflect.ValueOf(&v)) + } else { + fieldValue.SetUint(uint64(v)) + } + } + continue } - // Primitives are never nullable, regardless of tag - if isNonNullablePrimitiveKind(field.Type.Kind()) && !isEnumField { - nullable = false + if isEnumField(field) { + readEnumField(ctx, field, fieldValue) + continue } - fields = append(fields, FieldFingerprintInfo{ - FieldID: field.TagID, - FieldName: SnakeCase(field.Name), - TypeID: typeId, - // Ref is based on explicit tag annotation only, NOT runtime ref_tracking config - // This allows fingerprint to be computed at compile time for C++/Rust - Ref: field.TagRefSet && field.TagRef, - Nullable: nullable, - }) - } - - hashString := ComputeStructFingerprint(fields) - data := []byte(hashString) - h1, _ := murmur3.Sum128WithSeed(data, 47) - hash := int32(h1 & 0xFFFFFFFF) - - if DebugOutputEnabled() { - fmt.Printf("[Go][fory-debug] struct %v version fingerprint=\"%s\" version hash=%d\n", s.type_, hashString, hash) - } - - if hash == 0 { - panic(fmt.Errorf("hash for type %v is 0", s.type_)) + // Slow path for non-primitives (all need ref flag per xlang spec) + if field.Serializer != nil { + // Use pre-computed RefMode and WriteType from field initialization + field.Serializer.Read(ctx, field.RefMode, field.WriteType, field.HasGenerics, fieldValue) + } else { + ctx.ReadValue(fieldValue, RefModeTracking, true) + } } - return hash } -// GetStructHash returns the struct hash for a given type using the provided TypeResolver. -// This is used by codegen serializers to get the hash at runtime. -func GetStructHash(type_ reflect.Type, resolver *TypeResolver) int32 { - ser := newStructSerializer(type_, "", nil) - if err := ser.initialize(resolver); err != nil { - panic(fmt.Errorf("failed to initialize struct serializer for hash computation: %v", err)) +// skipField skips a field that doesn't exist or is incompatible +// Uses context error state for deferred error checking. +func (s *structSerializer) skipField(ctx *ReadContext, field *FieldInfo) { + if field.FieldDef.name != "" { + fieldDefIsStructType := isStructFieldType(field.FieldDef.fieldType) + // Use FieldDef's trackingRef and nullable to determine if ref flag was written by Java + // Java writes ref flag based on its FieldDef, not Go's field type + readRefFlag := field.FieldDef.trackingRef || field.FieldDef.nullable + SkipFieldValueWithTypeFlag(ctx, field.FieldDef, readRefFlag, ctx.Compatible() && fieldDefIsStructType) + return } - return ser.structHash -} - -// Field sorting helpers - -type triple struct { - typeID int16 - serializer Serializer - name string - nullable bool - tagID int // -1 = use field name, >=0 = use tag ID for sorting -} - -// getFieldSortKey returns the sort key for a field. -// If tagID >= 0, returns the tag ID as string (for tag-based sorting). -// Otherwise returns the snake_case field name. -func (t triple) getSortKey() string { - if t.tagID >= 0 { - return fmt.Sprintf("%d", t.tagID) + // No FieldDef available, read into temp value + tempValue := reflect.New(field.Type).Elem() + if field.Serializer != nil { + readType := ctx.Compatible() && isStructField(field.Type) + refMode := RefModeNone + if field.Nullable { + refMode = RefModeTracking + } + field.Serializer.Read(ctx, refMode, readType, false, tempValue) + } else { + ctx.ReadValue(tempValue, RefModeTracking, true) } - return SnakeCase(t.name) } -// sortFields sorts fields with nullable information to match Java's field ordering. -// Java separates primitive types (int, long) from boxed types (Integer, Long). -// In Go, this corresponds to non-pointer primitives vs pointer-to-primitive. -// When tagIDs are provided (>= 0), fields are sorted by tag ID instead of field name. -func sortFields( - typeResolver *TypeResolver, - fieldNames []string, - serializers []Serializer, - typeIds []TypeId, - nullables []bool, - tagIDs []int, -) ([]Serializer, []string) { - var ( - typeTriples []triple - others []triple - userDefined []triple - ) - - for i, name := range fieldNames { - ser := serializers[i] - tagID := TagIDUseFieldName // default: use field name - if tagIDs != nil && i < len(tagIDs) { - tagID = tagIDs[i] - } - if ser == nil { - others = append(others, triple{UNKNOWN, nil, name, nullables[i], tagID}) - continue +// writeEnumField writes an enum field respecting the field's RefMode. +// Java writes enum ordinals as unsigned Varuint32Small7, not signed zigzag. +// RefMode determines whether null flag is written, regardless of whether the local type is a pointer. +// This is important for compatible mode where remote TypeDef's nullable flag controls the wire format. +func writeEnumField(ctx *WriteContext, field *FieldInfo, fieldValue reflect.Value) { + buf := ctx.Buffer() + isPointer := field.IsPtr + + // Write null flag based on RefMode only (not based on whether local type is pointer) + if field.RefMode != RefModeNone { + if isPointer && fieldValue.IsNil() { + buf.WriteInt8(NullFlag) + return } - typeTriples = append(typeTriples, triple{typeIds[i], ser, name, nullables[i], tagID}) + buf.WriteInt8(NotNullValueFlag) } - // Java orders: primitives, boxed, finals, others, collections, maps - // primitives = non-nullable primitive types (int, long, etc.) - // boxed = nullable boxed types (Integer, Long, etc. which are pointers in Go) - var primitives, boxed, collection, setFields, maps, otherInternalTypeFields []triple - for _, t := range typeTriples { - switch { - case isPrimitiveType(t.typeID): - // Separate non-nullable primitives from nullable (boxed) primitives - if t.nullable { - boxed = append(boxed, t) - } else { - primitives = append(primitives, t) - } - case isListType(t.typeID), isPrimitiveArrayType(t.typeID): - collection = append(collection, t) - case isSetType(t.typeID): - setFields = append(setFields, t) - case isMapType(t.typeID): - maps = append(maps, t) - case isUserDefinedType(t.typeID): - userDefined = append(userDefined, t) - case t.typeID == UNKNOWN: - others = append(others, t) - default: - otherInternalTypeFields = append(otherInternalTypeFields, t) + // Get the actual value to serialize + targetValue := fieldValue + if isPointer { + if fieldValue.IsNil() { + // RefModeNone but nil pointer - this is a protocol error in schema-consistent mode + // Write zero value as fallback + targetValue = reflect.Zero(field.Type.Elem()) + } else { + targetValue = fieldValue.Elem() } } - // Sort primitives (non-nullable) - same logic as boxed - // Java sorts by: compressed types last, then by size (largest first), then by type ID (descending) - sortPrimitiveSlice := func(s []triple) { - sort.Slice(s, func(i, j int) bool { - ai, aj := s[i], s[j] - compressI := ai.typeID == INT32 || ai.typeID == INT64 || - ai.typeID == VAR32 || ai.typeID == VAR64 - compressJ := aj.typeID == INT32 || aj.typeID == INT64 || - aj.typeID == VAR32 || aj.typeID == VAR64 - if compressI != compressJ { - return !compressI && compressJ - } - szI, szJ := getPrimitiveTypeSize(ai.typeID), getPrimitiveTypeSize(aj.typeID) - if szI != szJ { - return szI > szJ - } - // Tie-breaker: type ID descending (higher type ID first), then field name - if ai.typeID != aj.typeID { - return ai.typeID > aj.typeID - } - return ai.getSortKey() < aj.getSortKey() - }) - } - sortPrimitiveSlice(primitives) - sortPrimitiveSlice(boxed) - sortByTypeIDThenName := func(s []triple) { - sort.Slice(s, func(i, j int) bool { - if s[i].typeID != s[j].typeID { - return s[i].typeID < s[j].typeID - } - return s[i].getSortKey() < s[j].getSortKey() - }) - } - sortTuple := func(s []triple) { - sort.Slice(s, func(i, j int) bool { - return s[i].getSortKey() < s[j].getSortKey() - }) - } - sortByTypeIDThenName(otherInternalTypeFields) - sortTuple(others) - sortTuple(collection) - sortTuple(setFields) - sortTuple(maps) - sortTuple(userDefined) - - // Java order: primitives, boxed, finals, collections, maps, others - // finals = String and other monomorphic types (otherInternalTypeFields) - // others = userDefined types (structs, enums) and unknown types - all := make([]triple, 0, len(fieldNames)) - all = append(all, primitives...) - all = append(all, boxed...) - all = append(all, otherInternalTypeFields...) // finals (String, etc.) - all = append(all, collection...) - all = append(all, setFields...) - all = append(all, maps...) - all = append(all, userDefined...) // others (structs, enums) - all = append(all, others...) // unknown types - - outSer := make([]Serializer, len(all)) - outNam := make([]string, len(all)) - for i, t := range all { - outSer[i] = t.serializer - outNam[i] = t.name + + // For pointer enum fields, the serializer is ptrToValueSerializer wrapping enumSerializer. + // We need to call the inner enumSerializer directly with the dereferenced value. + if ptrSer, ok := field.Serializer.(*ptrToValueSerializer); ok { + ptrSer.valueSerializer.WriteData(ctx, targetValue) + } else { + field.Serializer.WriteData(ctx, targetValue) } - return outSer, outNam } -func typesCompatible(actual, expected reflect.Type) bool { - if actual == nil || expected == nil { - return false - } - if actual == expected { - return true - } - // interface{} can accept any value - if actual.Kind() == reflect.Interface && actual.NumMethod() == 0 { - return true - } - if actual.AssignableTo(expected) || expected.AssignableTo(actual) { - return true - } - if actual.Kind() == reflect.Ptr && actual.Elem() == expected { - return true - } - if expected.Kind() == reflect.Ptr && expected.Elem() == actual { - return true - } - if actual.Kind() == expected.Kind() { - switch actual.Kind() { - case reflect.Slice, reflect.Array: - return elementTypesCompatible(actual.Elem(), expected.Elem()) - case reflect.Map: - return elementTypesCompatible(actual.Key(), expected.Key()) && elementTypesCompatible(actual.Elem(), expected.Elem()) +// readEnumField reads an enum field respecting the field's RefMode. +// RefMode determines whether null flag is read, regardless of whether the local type is a pointer. +// This is important for compatible mode where remote TypeDef's nullable flag controls the wire format. +// Uses context error state for deferred error checking. +func readEnumField(ctx *ReadContext, field *FieldInfo, fieldValue reflect.Value) { + buf := ctx.Buffer() + isPointer := field.IsPtr + + // Read null flag based on RefMode only (not based on whether local type is pointer) + if field.RefMode != RefModeNone { + nullFlag := buf.ReadInt8(ctx.Err()) + if nullFlag == NullFlag { + // For pointer enum fields, leave as nil; for non-pointer, set to zero + if !isPointer { + fieldValue.SetInt(0) + } + return } } - if (actual.Kind() == reflect.Array && expected.Kind() == reflect.Slice) || - (actual.Kind() == reflect.Slice && expected.Kind() == reflect.Array) { - return true - } - return false -} -func elementTypesCompatible(actual, expected reflect.Type) bool { - if actual == nil || expected == nil { - return false - } - if actual == expected || actual.AssignableTo(expected) || expected.AssignableTo(actual) { - return true - } - if actual.Kind() == reflect.Ptr { - return elementTypesCompatible(actual, expected.Elem()) + // For pointer enum fields, allocate a new value + targetValue := fieldValue + if isPointer { + newVal := reflect.New(field.Type.Elem()) + fieldValue.Set(newVal) + targetValue = newVal.Elem() } - return false -} -// typeIdFromKind derives a TypeId from a reflect.Type's kind -// This is used when the type is not registered in typesInfo -func typeIdFromKind(type_ reflect.Type) TypeId { - switch type_.Kind() { - case reflect.Bool: - return BOOL - case reflect.Int8: - return INT8 - case reflect.Int16: - return INT16 - case reflect.Int32: - return INT32 - case reflect.Int64, reflect.Int: - return INT64 - case reflect.Uint8: - return UINT8 - case reflect.Uint16: - return UINT16 - case reflect.Uint32: - return UINT32 - case reflect.Uint64, reflect.Uint: - return UINT64 - case reflect.Float32: - return FLOAT32 - case reflect.Float64: - return FLOAT64 - case reflect.String: - return STRING - case reflect.Slice: - // For slices, return the appropriate primitive array type ID based on element type - elemKind := type_.Elem().Kind() - switch elemKind { - case reflect.Bool: - return BOOL_ARRAY - case reflect.Int8: - return INT8_ARRAY - case reflect.Int16: - return INT16_ARRAY - case reflect.Int32: - return INT32_ARRAY - case reflect.Int64, reflect.Int: - return INT64_ARRAY - case reflect.Float32: - return FLOAT32_ARRAY - case reflect.Float64: - return FLOAT64_ARRAY - default: - // Non-primitive slices use LIST - return LIST - } - case reflect.Array: - // For arrays, return the appropriate primitive array type ID based on element type - elemKind := type_.Elem().Kind() - switch elemKind { - case reflect.Bool: - return BOOL_ARRAY - case reflect.Int8: - return INT8_ARRAY - case reflect.Int16: - return INT16_ARRAY - case reflect.Int32: - return INT32_ARRAY - case reflect.Int64, reflect.Int: - return INT64_ARRAY - case reflect.Float32: - return FLOAT32_ARRAY - case reflect.Float64: - return FLOAT64_ARRAY - default: - // Non-primitive arrays use LIST - return LIST - } - case reflect.Map: - // map[T]bool is used to represent a Set in Go - if type_.Elem().Kind() == reflect.Bool { - return SET - } - return MAP - case reflect.Struct: - return NAMED_STRUCT - case reflect.Ptr: - // For pointer types, get the type ID of the element type - return typeIdFromKind(type_.Elem()) - default: - return UNKNOWN + // For pointer enum fields, the serializer is ptrToValueSerializer wrapping enumSerializer. + // We need to call the inner enumSerializer directly with the dereferenced value. + if ptrSer, ok := field.Serializer.(*ptrToValueSerializer); ok { + ptrSer.valueSerializer.ReadData(ctx, field.Type.Elem(), targetValue) + } else { + field.Serializer.ReadData(ctx, field.Type, targetValue) } } diff --git a/go/fory/struct_test.go b/go/fory/struct_test.go new file mode 100644 index 0000000000..5c2fed3719 --- /dev/null +++ b/go/fory/struct_test.go @@ -0,0 +1,71 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package fory + +import ( + "testing" +) + +func TestUnsignedTypeSerialization(t *testing.T) { + type TestStruct struct { + U32Var uint32 `fory:"compress=true"` + U32Fixed uint32 `fory:"compress=false"` + U64Var uint64 `fory:"encoding=varint"` + U64Fixed uint64 `fory:"encoding=fixed"` + U64Tagged uint64 `fory:"encoding=tagged"` + } + + f := New(WithXlang(true), WithCompatible(false)) + f.Register(TestStruct{}, 9999) + + obj := TestStruct{ + U32Var: 3000000000, + U32Fixed: 4000000000, + U64Var: 10000000000, + U64Fixed: 15000000000, + U64Tagged: 1000000000, + } + + data, err := f.Serialize(obj) + if err != nil { + t.Fatalf("Serialize failed: %v", err) + } + + var result interface{} + err = f.Deserialize(data, &result) + if err != nil { + t.Fatalf("Deserialize failed: %v", err) + } + + resultObj := result.(*TestStruct) + if resultObj.U32Var != obj.U32Var { + t.Errorf("U32Var mismatch: expected %d, got %d", obj.U32Var, resultObj.U32Var) + } + if resultObj.U32Fixed != obj.U32Fixed { + t.Errorf("U32Fixed mismatch: expected %d, got %d", obj.U32Fixed, resultObj.U32Fixed) + } + if resultObj.U64Var != obj.U64Var { + t.Errorf("U64Var mismatch: expected %d, got %d", obj.U64Var, resultObj.U64Var) + } + if resultObj.U64Fixed != obj.U64Fixed { + t.Errorf("U64Fixed mismatch: expected %d, got %d", obj.U64Fixed, resultObj.U64Fixed) + } + if resultObj.U64Tagged != obj.U64Tagged { + t.Errorf("U64Tagged mismatch: expected %d, got %d", obj.U64Tagged, resultObj.U64Tagged) + } +} diff --git a/go/fory/tag.go b/go/fory/tag.go index d07154374f..3287a9c0d5 100644 --- a/go/fory/tag.go +++ b/go/fory/tag.go @@ -30,52 +30,71 @@ const ( // ForyTag represents parsed fory struct tag options. // -// Tag format: `fory:"id=N,nullable=bool,ref=bool,ignore=bool"` or `fory:"-"` +// Tag format: `fory:"id=N,nullable=bool,ref=bool,ignore=bool,compress=bool,encoding=value"` or `fory:"-"` // // Options: // - id: Field tag ID. -1 (default) uses field name, >=0 uses numeric tag ID for compact encoding // - nullable: Whether to write null flag. Default false (skip null flag for non-nullable fields) // - ref: Whether to enable reference tracking. Default false (skip ref tracking overhead) // - ignore: Whether to skip this field during serialization. Default false +// - compress: For int32/uint32 fields: true=varint encoding (default), false=fixed encoding +// - encoding: For numeric fields: +// - int32/uint32: "varint" (default) or "fixed" +// - int64/uint64: "varint" (default), "fixed", or "tagged" +// +// Note: For int32/uint32, use either `compress` or `encoding`, not both. // // Examples: // // type Example struct { -// Name string `fory:"id=0"` // Use tag ID 0 -// Age int `fory:"id=1,nullable=false"` // Explicit nullable=false -// Email *string `fory:"id=2,nullable=true,ref=false"` // Nullable pointer, no ref tracking -// Parent *Node `fory:"id=3,ref=true,nullable=true"` // With reference tracking -// Secret string `fory:"ignore"` // Skip this field -// Hidden string `fory:"-"` // Skip this field (shorthand) +// Name string `fory:"id=0"` // Use tag ID 0 +// Age int `fory:"id=1,nullable=false"` // Explicit nullable=false +// Email *string `fory:"id=2,nullable=true,ref=false"` // Nullable pointer, no ref tracking +// Parent *Node `fory:"id=3,ref=true,nullable=true"` // With reference tracking +// FixedI32 int32 `fory:"compress=false"` // Use fixed 4-byte INT32 +// VarI32 int32 `fory:"encoding=varint"` // Use VARINT32 (default) +// FixedU32 uint32 `fory:"encoding=fixed"` // Use fixed 4-byte UINT32 +// TaggedI64 int64 `fory:"encoding=tagged"` // Use TAGGED_INT64 +// VarU64 uint64 `fory:"encoding=varint"` // Use VAR_UINT64 (default) +// Secret string `fory:"ignore"` // Skip this field +// Hidden string `fory:"-"` // Skip this field (shorthand) // } type ForyTag struct { - ID int // Field tag ID (-1 = use field name, >=0 = use tag ID) - Nullable bool // Whether to write null flag (default: false) - Ref bool // Whether to enable reference tracking (default: false) - Ignore bool // Whether to ignore this field during serialization (default: false) - HasTag bool // Whether field has fory tag at all + ID int // Field tag ID (-1 = use field name, >=0 = use tag ID) + Nullable bool // Whether to write null flag (default: false) + Ref bool // Whether to enable reference tracking (default: false) + Ignore bool // Whether to ignore this field during serialization (default: false) + HasTag bool // Whether field has fory tag at all + Compress bool // For int32/uint32: true=varint, false=fixed (default: true) + Encoding string // For int64/uint64: "fixed", "varint", "tagged" (default: "varint") // Track which options were explicitly set (for override logic) NullableSet bool RefSet bool IgnoreSet bool + CompressSet bool + EncodingSet bool } -// ParseForyTag parses a fory struct tag from reflect.StructField.Tag. +// parseForyTag parses a fory struct tag from reflect.StructField.Tag. // -// Tag format: `fory:"id=N,nullable=bool,ref=bool,ignore=bool"` or `fory:"-"` +// Tag format: `fory:"id=N,nullable=bool,ref=bool,ignore=bool,compress=bool,encoding=value"` or `fory:"-"` // // Supported syntaxes: // - Key-value: `nullable=true`, `ref=false`, `ignore=true`, `id=0` +// - For int32/uint32: `compress=true` (varint) or `compress=false` (fixed), default is true +// - For int64/uint64: `encoding=fixed`, `encoding=varint`, `encoding=tagged`, default is varint // - Standalone flags: `nullable`, `ref`, `ignore` (equivalent to =true) // - Shorthand: `-` (equivalent to `ignore=true`) -func ParseForyTag(field reflect.StructField) ForyTag { +func parseForyTag(field reflect.StructField) ForyTag { tag := ForyTag{ ID: TagIDUseFieldName, Nullable: false, Ref: false, Ignore: false, HasTag: false, + Compress: true, // default: varint encoding + Encoding: "varint", // default: varint encoding } tagValue, ok := field.Tag.Lookup("fory") @@ -119,6 +138,12 @@ func ParseForyTag(field reflect.StructField) ForyTag { case "ignore": tag.Ignore = parseBool(value) tag.IgnoreSet = true + case "compress": + tag.Compress = parseBool(value) + tag.CompressSet = true + case "encoding": + tag.Encoding = strings.ToLower(strings.TrimSpace(value)) + tag.EncodingSet = true } } else { // Handle standalone flags (presence means true) @@ -146,14 +171,14 @@ func parseBool(s string) bool { return s == "true" || s == "1" || s == "yes" } -// ValidateForyTags validates all fory tags in a struct type. +// validateForyTags validates all fory tags in a struct type. // Returns an error if validation fails. // // Validation rules: // - Tag ID must be >= -1 // - Tag IDs must be unique within a struct (except -1) // - Ignored fields are not validated for ID uniqueness -func ValidateForyTags(t reflect.Type) error { +func validateForyTags(t reflect.Type) error { if t.Kind() == reflect.Ptr { t = t.Elem() } @@ -165,7 +190,7 @@ func ValidateForyTags(t reflect.Type) error { for i := 0; i < t.NumField(); i++ { field := t.Field(i) - tag := ParseForyTag(field) + tag := parseForyTag(field) // Skip ignored fields for ID uniqueness validation if tag.Ignore { @@ -191,18 +216,18 @@ func ValidateForyTags(t reflect.Type) error { return nil } -// ShouldIncludeField returns true if the field should be serialized. +// shouldIncludeField returns true if the field should be serialized. // A field is excluded if: // - It's unexported (starts with lowercase) // - It has `fory:"-"` tag // - It has `fory:"ignore"` or `fory:"ignore=true"` tag -func ShouldIncludeField(field reflect.StructField) bool { +func shouldIncludeField(field reflect.StructField) bool { // Skip unexported fields if field.PkgPath != "" { return false } // Check for ignore tag - tag := ParseForyTag(field) + tag := parseForyTag(field) return !tag.Ignore } diff --git a/go/fory/tag_test.go b/go/fory/tag_test.go index 3c7f52e2b2..83c3670ea0 100644 --- a/go/fory/tag_test.go +++ b/go/fory/tag_test.go @@ -43,7 +43,7 @@ func TestParseForyTag(t *testing.T) { typ := reflect.TypeOf(TestStruct{}) // Test Field1: id=0 - tag1 := ParseForyTag(typ.Field(0)) + tag1 := parseForyTag(typ.Field(0)) require.True(t, tag1.HasTag) require.Equal(t, 0, tag1.ID) require.False(t, tag1.Nullable) @@ -54,7 +54,7 @@ func TestParseForyTag(t *testing.T) { require.False(t, tag1.IgnoreSet) // Test Field2: all explicit false values - tag2 := ParseForyTag(typ.Field(1)) + tag2 := parseForyTag(typ.Field(1)) require.Equal(t, 1, tag2.ID) require.False(t, tag2.Nullable) require.False(t, tag2.Ref) @@ -64,14 +64,14 @@ func TestParseForyTag(t *testing.T) { require.True(t, tag2.IgnoreSet) // Test Field3: explicit true values - tag3 := ParseForyTag(typ.Field(2)) + tag3 := parseForyTag(typ.Field(2)) require.Equal(t, 2, tag3.ID) require.True(t, tag3.Nullable) require.True(t, tag3.Ref) require.False(t, tag3.Ignore) // Test Field4: standalone flags (presence = true) - tag4 := ParseForyTag(typ.Field(3)) + tag4 := parseForyTag(typ.Field(3)) require.Equal(t, TagIDUseFieldName, tag4.ID) require.True(t, tag4.Nullable) require.True(t, tag4.Ref) @@ -79,44 +79,44 @@ func TestParseForyTag(t *testing.T) { require.True(t, tag4.RefSet) // Test Field5: standalone ignore - tag5 := ParseForyTag(typ.Field(4)) + tag5 := parseForyTag(typ.Field(4)) require.True(t, tag5.Ignore) require.True(t, tag5.IgnoreSet) // Test Field6: explicit ignore=true - tag6 := ParseForyTag(typ.Field(5)) + tag6 := parseForyTag(typ.Field(5)) require.True(t, tag6.Ignore) require.True(t, tag6.IgnoreSet) // Test Field7: explicit ignore=false - tag7 := ParseForyTag(typ.Field(6)) + tag7 := parseForyTag(typ.Field(6)) require.False(t, tag7.Ignore) require.True(t, tag7.IgnoreSet) // Test Field8: "-" shorthand - tag8 := ParseForyTag(typ.Field(7)) + tag8 := parseForyTag(typ.Field(7)) require.True(t, tag8.Ignore) require.True(t, tag8.IgnoreSet) // Test Field9: no tag - tag9 := ParseForyTag(typ.Field(8)) + tag9 := parseForyTag(typ.Field(8)) require.False(t, tag9.HasTag) require.False(t, tag9.Ignore) require.Equal(t, TagIDUseFieldName, tag9.ID) // Test Field10: has ID but not ignored - tag10 := ParseForyTag(typ.Field(9)) + tag10 := parseForyTag(typ.Field(9)) require.Equal(t, 3, tag10.ID) require.False(t, tag10.Ignore) require.True(t, tag10.IgnoreSet) // Test Field11: explicit id=-1 (use field name) - tag11 := ParseForyTag(typ.Field(10)) + tag11 := parseForyTag(typ.Field(10)) require.Equal(t, TagIDUseFieldName, tag11.ID) require.True(t, tag11.HasTag) // Test Field12: nullable=true,ref=false - tag12 := ParseForyTag(typ.Field(11)) + tag12 := parseForyTag(typ.Field(11)) require.True(t, tag12.Nullable) require.False(t, tag12.Ref) require.True(t, tag12.NullableSet) @@ -135,12 +135,12 @@ func TestShouldIncludeField(t *testing.T) { typ := reflect.TypeOf(TestStruct{}) - require.True(t, ShouldIncludeField(typ.Field(0))) // Included1 - require.True(t, ShouldIncludeField(typ.Field(1))) // Included2 (ignore=false) - require.False(t, ShouldIncludeField(typ.Field(2))) // Ignored1 - require.False(t, ShouldIncludeField(typ.Field(3))) // Ignored2 - require.False(t, ShouldIncludeField(typ.Field(4))) // Ignored3 - require.True(t, ShouldIncludeField(typ.Field(5))) // NoTag (default: include) + require.True(t, shouldIncludeField(typ.Field(0))) // Included1 + require.True(t, shouldIncludeField(typ.Field(1))) // Included2 (ignore=false) + require.False(t, shouldIncludeField(typ.Field(2))) // Ignored1 + require.False(t, shouldIncludeField(typ.Field(3))) // Ignored2 + require.False(t, shouldIncludeField(typ.Field(4))) // Ignored3 + require.True(t, shouldIncludeField(typ.Field(5))) // NoTag (default: include) } func TestValidateForyTags(t *testing.T) { @@ -151,7 +151,7 @@ func TestValidateForyTags(t *testing.T) { Field3 string `fory:"id=-1"` Field4 string // No tag } - err := ValidateForyTags(reflect.TypeOf(ValidStruct{})) + err := validateForyTags(reflect.TypeOf(ValidStruct{})) require.NoError(t, err) // Test duplicate tag IDs @@ -159,7 +159,7 @@ func TestValidateForyTags(t *testing.T) { Field1 string `fory:"id=0"` Field2 string `fory:"id=0"` } - err = ValidateForyTags(reflect.TypeOf(DuplicateIDs{})) + err = validateForyTags(reflect.TypeOf(DuplicateIDs{})) require.Error(t, err) require.Contains(t, err.Error(), "duplicate") foryErr, ok := err.(Error) @@ -170,7 +170,7 @@ func TestValidateForyTags(t *testing.T) { type InvalidID struct { Field1 string `fory:"id=-2"` } - err = ValidateForyTags(reflect.TypeOf(InvalidID{})) + err = validateForyTags(reflect.TypeOf(InvalidID{})) require.Error(t, err) require.Contains(t, err.Error(), "invalid") foryErr, ok = err.(Error) @@ -183,7 +183,7 @@ func TestValidateForyTags(t *testing.T) { Field2 string `fory:"id=0,ignore"` // Same ID but ignored Field3 string `fory:"id=1"` } - err = ValidateForyTags(reflect.TypeOf(IgnoredFields{})) + err = validateForyTags(reflect.TypeOf(IgnoredFields{})) require.NoError(t, err) } @@ -193,7 +193,7 @@ func TestParseForyTagEdgeCases(t *testing.T) { Field1 string `fory:" id = 0 , nullable = true "` } typ := reflect.TypeOf(WhitespaceStruct{}) - tag := ParseForyTag(typ.Field(0)) + tag := parseForyTag(typ.Field(0)) require.Equal(t, 0, tag.ID) require.True(t, tag.Nullable) @@ -202,7 +202,7 @@ func TestParseForyTagEdgeCases(t *testing.T) { Field1 string `fory:""` } typ2 := reflect.TypeOf(EmptyTagStruct{}) - tag2 := ParseForyTag(typ2.Field(0)) + tag2 := parseForyTag(typ2.Field(0)) require.True(t, tag2.HasTag) require.Equal(t, TagIDUseFieldName, tag2.ID) @@ -215,16 +215,16 @@ func TestParseForyTagEdgeCases(t *testing.T) { } typ3 := reflect.TypeOf(BoolValuesStruct{}) - tag3 := ParseForyTag(typ3.Field(0)) + tag3 := parseForyTag(typ3.Field(0)) require.True(t, tag3.Nullable) // "1" -> true - tag4 := ParseForyTag(typ3.Field(1)) + tag4 := parseForyTag(typ3.Field(1)) require.True(t, tag4.Nullable) // "yes" -> true - tag5 := ParseForyTag(typ3.Field(2)) + tag5 := parseForyTag(typ3.Field(2)) require.True(t, tag5.Nullable) // "TRUE" -> true - tag6 := ParseForyTag(typ3.Field(3)) + tag6 := parseForyTag(typ3.Field(3)) require.False(t, tag6.Nullable) // "no" -> false } @@ -422,7 +422,7 @@ func TestNullableRefFlagsRespected(t *testing.T) { typ1 := reflect.TypeOf(TestStructNoNull{}) for i := 0; i < typ1.NumField(); i++ { field := typ1.Field(i) - tag := ParseForyTag(field) + tag := parseForyTag(field) t.Logf("Field %s: ID=%d, Nullable=%v (set=%v), Ref=%v (set=%v)", field.Name, tag.ID, tag.Nullable, tag.NullableSet, tag.Ref, tag.RefSet) } @@ -611,3 +611,65 @@ func TestNestedStructWithTags(t *testing.T) { require.Equal(t, obj.Inner.Count, result.Inner.Count) require.Equal(t, obj.Items, result.Items) } + +func TestParseTypeIDTag(t *testing.T) { + type TestStruct struct { + U32Var uint32 `fory:"compress=true"` + U32Fixed uint32 `fory:"compress=false"` + U64Var uint64 `fory:"encoding=varint"` + U64Fixed uint64 `fory:"encoding=fixed"` + U64Tagged uint64 `fory:"encoding=tagged"` + } + + typ := reflect.TypeOf(TestStruct{}) + + // Test U32Var + field := typ.Field(0) + tag := parseForyTag(field) + if !tag.CompressSet { + t.Errorf("U32Var: CompressSet should be true") + } + if !tag.Compress { + t.Errorf("U32Var: Compress should be true") + } + + // Test U32Fixed + field = typ.Field(1) + tag = parseForyTag(field) + if !tag.CompressSet { + t.Errorf("U32Fixed: CompressSet should be true") + } + if tag.Compress { + t.Errorf("U32Fixed: Compress should be false") + } + + // Test U64Var + field = typ.Field(2) + tag = parseForyTag(field) + if !tag.EncodingSet { + t.Errorf("U64Var: EncodingSet should be true") + } + if tag.Encoding != "varint" { + t.Errorf("U64Var: expected encoding 'varint', got %s", tag.Encoding) + } + + // Test U64Fixed + field = typ.Field(3) + tag = parseForyTag(field) + if !tag.EncodingSet { + t.Errorf("U64Fixed: EncodingSet should be true") + } + if tag.Encoding != "fixed" { + t.Errorf("U64Fixed: expected encoding 'fixed', got %s", tag.Encoding) + } + + // Test U64Tagged + field = typ.Field(4) + tag = parseForyTag(field) + if !tag.EncodingSet { + t.Errorf("U64Tagged: EncodingSet should be true") + } + if tag.Encoding != "tagged" { + t.Errorf("U64Tagged: expected encoding 'tagged', got %s", tag.Encoding) + } +} diff --git a/go/fory/tests/structs_fory_gen.go b/go/fory/tests/structs_fory_gen.go index e3538214d9..8a85508e1a 100644 --- a/go/fory/tests/structs_fory_gen.go +++ b/go/fory/tests/structs_fory_gen.go @@ -1,6 +1,6 @@ // Code generated by forygen. DO NOT EDIT. -// source: /Users/chaokunyang/Desktop/dev/fory/go/fory/tests/structs.go -// generated at: 2026-01-03T14:03:09+08:00 +// source: structs.go +// generated at: 2026-01-09T19:15:47+08:00 package fory @@ -924,98 +924,98 @@ func (g *SliceDemo_ForyGenSerializer) WriteTyped(ctx *fory.WriteContext, v *Slic buf.WriteInt32(g.structHash) // WriteData fields in sorted order - // Field: BoolSlice ([]bool) + // Field: StringSlice ([]string) { isXlang := ctx.TypeResolver().IsXlang() if isXlang { // xlang mode: slices are not nullable, write directly without null flag - fory.WriteBoolSlice(buf, v.BoolSlice) + sliceLen := 0 + if v.StringSlice != nil { + sliceLen = len(v.StringSlice) + } + buf.WriteVaruint32(uint32(sliceLen)) + if sliceLen > 0 { + collectFlag := 12 // CollectionIsSameType | CollectionIsDeclElementType + if ctx.TrackRef() { + collectFlag |= 1 // CollectionTrackingRef for referencable element type + } + buf.WriteInt8(int8(collectFlag)) + for _, elem := range v.StringSlice { + if ctx.TrackRef() { + buf.WriteInt8(-1) // NotNullValueFlag for element + } + ctx.WriteString(elem) + } + } } else { // Native Go mode: slices are nullable, write null flag - if v.BoolSlice == nil { + if v.StringSlice == nil { buf.WriteInt8(-3) // NullFlag } else { buf.WriteInt8(-1) // NotNullValueFlag - fory.WriteBoolSlice(buf, v.BoolSlice) + sliceLen := len(v.StringSlice) + buf.WriteVaruint32(uint32(sliceLen)) + if sliceLen > 0 { + collectFlag := 12 // CollectionIsSameType | CollectionIsDeclElementType + if ctx.TrackRef() { + collectFlag |= 1 // CollectionTrackingRef for referencable element type + } + buf.WriteInt8(int8(collectFlag)) + for _, elem := range v.StringSlice { + if ctx.TrackRef() { + buf.WriteInt8(-1) // NotNullValueFlag for element + } + ctx.WriteString(elem) + } + } } } } - // Field: FloatSlice ([]float64) + // Field: BoolSlice ([]bool) { isXlang := ctx.TypeResolver().IsXlang() if isXlang { // xlang mode: slices are not nullable, write directly without null flag - fory.WriteFloat64Slice(buf, v.FloatSlice) + fory.WriteBoolSlice(buf, v.BoolSlice) } else { // Native Go mode: slices are nullable, write null flag - if v.FloatSlice == nil { + if v.BoolSlice == nil { buf.WriteInt8(-3) // NullFlag } else { buf.WriteInt8(-1) // NotNullValueFlag - fory.WriteFloat64Slice(buf, v.FloatSlice) + fory.WriteBoolSlice(buf, v.BoolSlice) } } } - // Field: IntSlice ([]int32) + // Field: FloatSlice ([]float64) { isXlang := ctx.TypeResolver().IsXlang() if isXlang { // xlang mode: slices are not nullable, write directly without null flag - fory.WriteInt32Slice(buf, v.IntSlice) + fory.WriteFloat64Slice(buf, v.FloatSlice) } else { // Native Go mode: slices are nullable, write null flag - if v.IntSlice == nil { + if v.FloatSlice == nil { buf.WriteInt8(-3) // NullFlag } else { buf.WriteInt8(-1) // NotNullValueFlag - fory.WriteInt32Slice(buf, v.IntSlice) + fory.WriteFloat64Slice(buf, v.FloatSlice) } } } - // Field: StringSlice ([]string) + // Field: IntSlice ([]int32) { isXlang := ctx.TypeResolver().IsXlang() if isXlang { // xlang mode: slices are not nullable, write directly without null flag - sliceLen := 0 - if v.StringSlice != nil { - sliceLen = len(v.StringSlice) - } - buf.WriteVaruint32(uint32(sliceLen)) - if sliceLen > 0 { - collectFlag := 12 // CollectionIsSameType | CollectionIsDeclElementType - if ctx.TrackRef() { - collectFlag |= 1 // CollectionTrackingRef for referencable element type - } - buf.WriteInt8(int8(collectFlag)) - for _, elem := range v.StringSlice { - if ctx.TrackRef() { - buf.WriteInt8(-1) // NotNullValueFlag for element - } - ctx.WriteString(elem) - } - } + fory.WriteInt32Slice(buf, v.IntSlice) } else { // Native Go mode: slices are nullable, write null flag - if v.StringSlice == nil { + if v.IntSlice == nil { buf.WriteInt8(-3) // NullFlag } else { buf.WriteInt8(-1) // NotNullValueFlag - sliceLen := len(v.StringSlice) - buf.WriteVaruint32(uint32(sliceLen)) - if sliceLen > 0 { - collectFlag := 12 // CollectionIsSameType | CollectionIsDeclElementType - if ctx.TrackRef() { - collectFlag |= 1 // CollectionTrackingRef for referencable element type - } - buf.WriteInt8(int8(collectFlag)) - for _, elem := range v.StringSlice { - if ctx.TrackRef() { - buf.WriteInt8(-1) // NotNullValueFlag for element - } - ctx.WriteString(elem) - } - } + fory.WriteInt32Slice(buf, v.IntSlice) } } } @@ -1085,54 +1085,6 @@ func (g *SliceDemo_ForyGenSerializer) ReadTyped(ctx *fory.ReadContext, v *SliceD } // ReadData fields in same order as write - // Field: BoolSlice ([]bool) - { - isXlang := ctx.TypeResolver().IsXlang() - if isXlang { - // xlang mode: slices are not nullable, read directly without null flag - v.BoolSlice = fory.ReadBoolSlice(buf, err) - } else { - // Native Go mode: slices are nullable, read null flag - nullFlag := buf.ReadInt8(err) - if nullFlag == -3 { - v.BoolSlice = nil - } else { - v.BoolSlice = fory.ReadBoolSlice(buf, err) - } - } - } - // Field: FloatSlice ([]float64) - { - isXlang := ctx.TypeResolver().IsXlang() - if isXlang { - // xlang mode: slices are not nullable, read directly without null flag - v.FloatSlice = fory.ReadFloat64Slice(buf, err) - } else { - // Native Go mode: slices are nullable, read null flag - nullFlag := buf.ReadInt8(err) - if nullFlag == -3 { - v.FloatSlice = nil - } else { - v.FloatSlice = fory.ReadFloat64Slice(buf, err) - } - } - } - // Field: IntSlice ([]int32) - { - isXlang := ctx.TypeResolver().IsXlang() - if isXlang { - // xlang mode: slices are not nullable, read directly without null flag - v.IntSlice = fory.ReadInt32Slice(buf, err) - } else { - // Native Go mode: slices are nullable, read null flag - nullFlag := buf.ReadInt8(err) - if nullFlag == -3 { - v.IntSlice = nil - } else { - v.IntSlice = fory.ReadInt32Slice(buf, err) - } - } - } // Field: StringSlice ([]string) { isXlang := ctx.TypeResolver().IsXlang() @@ -1211,6 +1163,54 @@ func (g *SliceDemo_ForyGenSerializer) ReadTyped(ctx *fory.ReadContext, v *SliceD } } } + // Field: BoolSlice ([]bool) + { + isXlang := ctx.TypeResolver().IsXlang() + if isXlang { + // xlang mode: slices are not nullable, read directly without null flag + v.BoolSlice = fory.ReadBoolSlice(buf, err) + } else { + // Native Go mode: slices are nullable, read null flag + nullFlag := buf.ReadInt8(err) + if nullFlag == -3 { + v.BoolSlice = nil + } else { + v.BoolSlice = fory.ReadBoolSlice(buf, err) + } + } + } + // Field: FloatSlice ([]float64) + { + isXlang := ctx.TypeResolver().IsXlang() + if isXlang { + // xlang mode: slices are not nullable, read directly without null flag + v.FloatSlice = fory.ReadFloat64Slice(buf, err) + } else { + // Native Go mode: slices are nullable, read null flag + nullFlag := buf.ReadInt8(err) + if nullFlag == -3 { + v.FloatSlice = nil + } else { + v.FloatSlice = fory.ReadFloat64Slice(buf, err) + } + } + } + // Field: IntSlice ([]int32) + { + isXlang := ctx.TypeResolver().IsXlang() + if isXlang { + // xlang mode: slices are not nullable, read directly without null flag + v.IntSlice = fory.ReadInt32Slice(buf, err) + } else { + // Native Go mode: slices are nullable, read null flag + nullFlag := buf.ReadInt8(err) + if nullFlag == -3 { + v.IntSlice = nil + } else { + v.IntSlice = fory.ReadInt32Slice(buf, err) + } + } + } // Final deferred error check if ctx.HasError() { diff --git a/go/fory/tests/xlang/xlang_test_main.go b/go/fory/tests/xlang/xlang_test_main.go index 503d75a01c..1b95295e0a 100644 --- a/go/fory/tests/xlang/xlang_test_main.go +++ b/go/fory/tests/xlang/xlang_test_main.go @@ -129,6 +129,39 @@ func getNullableComprehensiveCompatible(obj interface{}) NullableComprehensiveCo } } +func getUnsignedSchemaConsistent(obj interface{}) UnsignedSchemaConsistent { + switch v := obj.(type) { + case UnsignedSchemaConsistent: + return v + case *UnsignedSchemaConsistent: + return *v + default: + panic(fmt.Sprintf("expected UnsignedSchemaConsistent, got %T", obj)) + } +} + +func getUnsignedSchemaCompatible(obj interface{}) UnsignedSchemaCompatible { + switch v := obj.(type) { + case UnsignedSchemaCompatible: + return v + case *UnsignedSchemaCompatible: + return *v + default: + panic(fmt.Sprintf("expected UnsignedSchemaCompatible, got %T", obj)) + } +} + +func getUnsignedSchemaConsistentSimple(obj interface{}) UnsignedSchemaConsistentSimple { + switch v := obj.(type) { + case UnsignedSchemaConsistentSimple: + return v + case *UnsignedSchemaConsistentSimple: + return *v + default: + panic(fmt.Sprintf("expected UnsignedSchemaConsistentSimple, got %T", obj)) + } +} + func assertEqualFloat32(expected, actual float32, name string) { diff := expected - actual if diff < 0 { @@ -2120,6 +2153,218 @@ func testCircularRefCompatible() { writeFile(dataFile, serialized) } +// ============================================================================ +// Unsigned Number Test Types +// ============================================================================ + +// UnsignedSchemaConsistent - Test struct for unsigned numbers in SCHEMA_CONSISTENT mode. +// All fields use the same nullability as Java. +// Note: Go currently only supports uint8, uint16, uint32 (VAR_UINT32), uint64 (VAR_UINT64). +// Fixed and tagged encodings require fory encoding tags (TODO). +// Matches Java's UnsignedSchemaConsistent (type id 501) +// UnsignedSchemaConsistentSimple - Simple test struct for unsigned numbers. +// Matches Java's UnsignedSchemaConsistentSimple (type id 1) +type UnsignedSchemaConsistentSimple struct { + U64Tagged uint64 `fory:"encoding=tagged"` // TAGGED_UINT64 - tagged encoding + U64TaggedNullable *uint64 `fory:"nullable,encoding=tagged"` // Nullable TAGGED_UINT64 +} + +type UnsignedSchemaConsistent struct { + // Primitive unsigned fields (non-nullable, use Field suffix to avoid reserved keywords) + U8Field uint8 // UINT8 - fixed 8-bit + U16Field uint16 // UINT16 - fixed 16-bit + U32VarField uint32 `fory:"compress=true"` // VAR_UINT32 - variable-length + U32FixedField uint32 `fory:"compress=false"` // UINT32 - fixed 4-byte + U64VarField uint64 `fory:"encoding=varint"` // VAR_UINT64 - variable-length + U64FixedField uint64 `fory:"encoding=fixed"` // UINT64 - fixed 8-byte + U64TaggedField uint64 `fory:"encoding=tagged"` // TAGGED_UINT64 - tagged encoding + + // Nullable unsigned fields (pointers) + U8NullableField *uint8 `fory:"nullable"` + U16NullableField *uint16 `fory:"nullable"` + U32VarNullableField *uint32 `fory:"nullable,compress=true"` + U32FixedNullableField *uint32 `fory:"nullable,compress=false"` + U64VarNullableField *uint64 `fory:"nullable,encoding=varint"` + U64FixedNullableField *uint64 `fory:"nullable,encoding=fixed"` + U64TaggedNullableField *uint64 `fory:"nullable,encoding=tagged"` +} + +// UnsignedSchemaCompatible - Test struct for unsigned numbers in COMPATIBLE mode. +// Group 1: Pointer types (nullable in Go, non-nullable in Java) +// Group 2: Non-pointer types with Field2 suffix (non-nullable in Go, nullable in Java) +// Matches Java's UnsignedSchemaCompatible (type id 502) +type UnsignedSchemaCompatible struct { + // Group 1: Nullable in Go (pointers), non-nullable in Java + U8Field1 *uint8 `fory:"nullable"` + U16Field1 *uint16 `fory:"nullable"` + U32VarField1 *uint32 `fory:"nullable,compress=true"` + U32FixedField1 *uint32 `fory:"nullable,compress=false"` + U64VarField1 *uint64 `fory:"nullable,encoding=varint"` + U64FixedField1 *uint64 `fory:"nullable,encoding=fixed"` + U64TaggedField1 *uint64 `fory:"nullable,encoding=tagged"` + + // Group 2: Non-nullable in Go, nullable in Java + U8Field2 uint8 + U16Field2 uint16 + U32VarField2 uint32 `fory:"compress=true"` + U32FixedField2 uint32 `fory:"compress=false"` + U64VarField2 uint64 `fory:"encoding=varint"` + U64FixedField2 uint64 `fory:"encoding=fixed"` + U64TaggedField2 uint64 `fory:"encoding=tagged"` +} + +// ============================================================================ +// Unsigned Number Tests +// ============================================================================ + +func testUnsignedSchemaConsistentSimple() { + dataFile := getDataFile() + data := readFile(dataFile) + + f := fory.New(fory.WithXlang(true), fory.WithCompatible(false)) + f.Register(UnsignedSchemaConsistentSimple{}, 1) + + var obj interface{} + err := f.Deserialize(data, &obj) + if err != nil { + panic(fmt.Sprintf("Failed to deserialize: %v", err)) + } + + result := getUnsignedSchemaConsistentSimple(obj) + + // Verify fields + assertEqual(uint64(1000000000), result.U64Tagged, "U64Tagged") + if result.U64TaggedNullable == nil || *result.U64TaggedNullable != 500000000 { + panic(fmt.Sprintf("U64TaggedNullable mismatch: expected 500000000, got %v", result.U64TaggedNullable)) + } + + serialized, err := f.Serialize(result) + if err != nil { + panic(fmt.Sprintf("Failed to serialize: %v", err)) + } + + writeFile(dataFile, serialized) +} + +func testUnsignedSchemaConsistent() { + dataFile := getDataFile() + data := readFile(dataFile) + + fmt.Printf("Input size: %d bytes\n", len(data)) + fmt.Printf("Input hex: %x\n", data) + + f := fory.New(fory.WithXlang(true), fory.WithCompatible(false)) + f.Register(UnsignedSchemaConsistent{}, 501) + + var obj interface{} + err := f.Deserialize(data, &obj) + if err != nil { + panic(fmt.Sprintf("Failed to deserialize: %v", err)) + } + + result := getUnsignedSchemaConsistent(obj) + + // Verify primitive unsigned fields + assertEqual(uint8(200), result.U8Field, "U8Field") + assertEqual(uint16(60000), result.U16Field, "U16Field") + assertEqual(uint32(3000000000), result.U32VarField, "U32VarField") + assertEqual(uint32(4000000000), result.U32FixedField, "U32FixedField") + assertEqual(uint64(10000000000), result.U64VarField, "U64VarField") + assertEqual(uint64(15000000000), result.U64FixedField, "U64FixedField") + assertEqual(uint64(1000000000), result.U64TaggedField, "U64TaggedField") + + // Verify nullable unsigned fields + if result.U8NullableField == nil || *result.U8NullableField != 128 { + panic(fmt.Sprintf("U8NullableField mismatch: expected 128, got %v", result.U8NullableField)) + } + if result.U16NullableField == nil || *result.U16NullableField != 40000 { + panic(fmt.Sprintf("U16NullableField mismatch: expected 40000, got %v", result.U16NullableField)) + } + if result.U32VarNullableField == nil || *result.U32VarNullableField != 2500000000 { + panic(fmt.Sprintf("U32VarNullableField mismatch: expected 2500000000, got %v", result.U32VarNullableField)) + } + if result.U32FixedNullableField == nil || *result.U32FixedNullableField != 3500000000 { + panic(fmt.Sprintf("U32FixedNullableField mismatch: expected 3500000000, got %v", result.U32FixedNullableField)) + } + if result.U64VarNullableField == nil || *result.U64VarNullableField != 8000000000 { + panic(fmt.Sprintf("U64VarNullableField mismatch: expected 8000000000, got %v", result.U64VarNullableField)) + } + if result.U64FixedNullableField == nil || *result.U64FixedNullableField != 12000000000 { + panic(fmt.Sprintf("U64FixedNullableField mismatch: expected 12000000000, got %v", result.U64FixedNullableField)) + } + if result.U64TaggedNullableField == nil || *result.U64TaggedNullableField != 500000000 { + panic(fmt.Sprintf("U64TaggedNullableField mismatch: expected 500000000, got %v", result.U64TaggedNullableField)) + } + + serialized, err := f.Serialize(result) + if err != nil { + panic(fmt.Sprintf("Failed to serialize: %v", err)) + } + + fmt.Printf("Output size: %d bytes\n", len(serialized)) + fmt.Printf("Output hex: %x\n", serialized) + + writeFile(dataFile, serialized) +} + +func testUnsignedSchemaCompatible() { + dataFile := getDataFile() + data := readFile(dataFile) + + f := fory.New(fory.WithXlang(true), fory.WithCompatible(true)) + f.Register(UnsignedSchemaCompatible{}, 502) + + var obj interface{} + err := f.Deserialize(data, &obj) + if err != nil { + panic(fmt.Sprintf("Failed to deserialize: %v", err)) + } + + result := getUnsignedSchemaCompatible(obj) + + // Verify Group 1: Nullable fields (values from Java's non-nullable fields) + if result.U8Field1 == nil || *result.U8Field1 != 200 { + panic(fmt.Sprintf("U8Field1 mismatch: expected 200, got %v", result.U8Field1)) + } + if result.U16Field1 == nil || *result.U16Field1 != 60000 { + panic(fmt.Sprintf("U16Field1 mismatch: expected 60000, got %v", result.U16Field1)) + } + if result.U32VarField1 == nil || *result.U32VarField1 != 3000000000 { + panic(fmt.Sprintf("U32VarField1 mismatch: expected 3000000000, got %v", result.U32VarField1)) + } + if result.U32FixedField1 == nil || *result.U32FixedField1 != 4000000000 { + panic(fmt.Sprintf("U32FixedField1 mismatch: expected 4000000000, got %v", result.U32FixedField1)) + } + if result.U64VarField1 == nil || *result.U64VarField1 != 10000000000 { + panic(fmt.Sprintf("U64VarField1 mismatch: expected 10000000000, got %v", result.U64VarField1)) + } + if result.U64FixedField1 == nil || *result.U64FixedField1 != 15000000000 { + panic(fmt.Sprintf("U64FixedField1 mismatch: expected 15000000000, got %v", result.U64FixedField1)) + } + if result.U64TaggedField1 == nil || *result.U64TaggedField1 != 1000000000 { + panic(fmt.Sprintf("U64TaggedField1 mismatch: expected 1000000000, got %v", result.U64TaggedField1)) + } + + // Verify Group 2: Non-nullable fields (values from Java's nullable fields) + assertEqual(uint8(128), result.U8Field2, "U8Field2") + assertEqual(uint16(40000), result.U16Field2, "U16Field2") + assertEqual(uint32(2500000000), result.U32VarField2, "U32VarField2") + assertEqual(uint32(3500000000), result.U32FixedField2, "U32FixedField2") + assertEqual(uint64(8000000000), result.U64VarField2, "U64VarField2") + assertEqual(uint64(12000000000), result.U64FixedField2, "U64FixedField2") + assertEqual(uint64(500000000), result.U64TaggedField2, "U64TaggedField2") + + serialized, err := f.Serialize(result) + if err != nil { + panic(fmt.Sprintf("Failed to serialize: %v", err)) + } + + fmt.Printf("[Go] Serialized output size: %d bytes\n", len(serialized)) + fmt.Printf("[Go] Serialized output hex: %x\n", serialized) + + writeFile(dataFile, serialized) +} + // ============================================================================ // Main // ============================================================================ @@ -2223,6 +2468,12 @@ func main() { testCircularRefSchemaConsistent() case "test_circular_ref_compatible": testCircularRefCompatible() + case "test_unsigned_schema_consistent_simple": + testUnsignedSchemaConsistentSimple() + case "test_unsigned_schema_consistent": + testUnsignedSchemaConsistent() + case "test_unsigned_schema_compatible": + testUnsignedSchemaCompatible() default: panic(fmt.Sprintf("Unknown test case: %s", *caseName)) } diff --git a/go/fory/type_def.go b/go/fory/type_def.go index 78895f9c9a..424b796454 100644 --- a/go/fory/type_def.go +++ b/go/fory/type_def.go @@ -218,7 +218,7 @@ func (td *TypeDef) buildTypeInfoWithResolver(resolver *TypeResolver) (TypeInfo, } } else { // Known struct type - use structSerializer with fieldDefs - structSer := newStructSerializer(type_, "", td.fieldDefs) + structSer := newStructSerializerFromTypeDef(type_, "", td.fieldDefs) // Eagerly initialize the struct serializer with pre-computed field metadata if resolver != nil { if err := structSer.initialize(resolver); err != nil { @@ -417,7 +417,7 @@ func buildFieldDefs(fory *Fory, value reflect.Value) ([]FieldDef, error) { } // Parse fory struct tag and check for ignore - foryTag := ParseForyTag(field) + foryTag := parseForyTag(field) if foryTag.Ignore { continue // skip ignored fields } @@ -431,6 +431,67 @@ func buildFieldDefs(fory *Fory, value reflect.Value) ([]FieldDef, error) { if err != nil { return nil, fmt.Errorf("failed to build field type for field %s: %w", fieldName, err) } + + // Apply encoding override from struct tags if set + // This works for both direct types and pointer-wrapped types + baseKind := field.Type.Kind() + // Handle pointer types - get the element kind + if baseKind == reflect.Ptr { + baseKind = field.Type.Elem().Kind() + } + + // Check if we need to override the TypeID based on compress/encoding tags + var overrideTypeId TypeId = 0 + switch baseKind { + case reflect.Uint32: + if foryTag.CompressSet { + if foryTag.Compress { + overrideTypeId = VAR_UINT32 + } else { + overrideTypeId = UINT32 + } + } + case reflect.Int32: + if foryTag.CompressSet { + if foryTag.Compress { + overrideTypeId = VARINT32 + } else { + overrideTypeId = INT32 + } + } + case reflect.Uint64: + if foryTag.EncodingSet { + switch foryTag.Encoding { + case "fixed": + overrideTypeId = UINT64 + case "varint": + overrideTypeId = VAR_UINT64 + case "tagged": + overrideTypeId = TAGGED_UINT64 + default: + return nil, fmt.Errorf("field %s: invalid encoding value %q for uint64, must be 'fixed', 'varint', or 'tagged'", fieldName, foryTag.Encoding) + } + } + case reflect.Int64: + if foryTag.EncodingSet { + switch foryTag.Encoding { + case "fixed": + overrideTypeId = INT64 + case "varint": + overrideTypeId = VARINT64 + case "tagged": + overrideTypeId = TAGGED_INT64 + default: + return nil, fmt.Errorf("field %s: invalid encoding value %q for int64, must be 'fixed', 'varint', or 'tagged'", fieldName, foryTag.Encoding) + } + } + } + + // Apply the override if one was determined + if overrideTypeId != 0 { + ft = NewSimpleFieldType(overrideTypeId) + } + // Determine nullable based on mode: // - In xlang mode: Per xlang spec, fields are NON-NULLABLE by default. // Only pointer types are nullable by default. @@ -910,31 +971,32 @@ func buildFieldType(fory *Fory, fieldValue reflect.Value) (FieldType, error) { } // Handle slice and array types BEFORE getTypeInfo to avoid anonymous type errors - // For fixed-size arrays with primitive elements, use primitive array type IDs (INT16_ARRAY, etc.) - // For slices and arrays with non-primitive elements, use collection format + // For primitive element types, use primitive array type IDs (INT16_ARRAY, etc.) + // For non-primitive elements, use collection format (LIST with element type) if fieldType.Kind() == reflect.Slice || fieldType.Kind() == reflect.Array { elemType := fieldType.Elem() // Check if element is a primitive type that maps to a primitive array type ID - // Only fixed-size arrays use primitive array format; slices always use LIST - if fieldType.Kind() == reflect.Array { - switch elemType.Kind() { - case reflect.Int8: - return NewSimpleFieldType(INT8_ARRAY), nil - case reflect.Int16: - return NewSimpleFieldType(INT16_ARRAY), nil - case reflect.Int32: - return NewSimpleFieldType(INT32_ARRAY), nil - case reflect.Int64: - return NewSimpleFieldType(INT64_ARRAY), nil - case reflect.Float32: - return NewSimpleFieldType(FLOAT32_ARRAY), nil - case reflect.Float64: - return NewSimpleFieldType(FLOAT64_ARRAY), nil - } + // Both slices and fixed-size arrays with primitive elements use primitive array format + // This matches typeIdFromKind in field_info.go for consistent field sorting + switch elemType.Kind() { + case reflect.Bool: + return NewSimpleFieldType(BOOL_ARRAY), nil + case reflect.Int8: + return NewSimpleFieldType(INT8_ARRAY), nil + case reflect.Int16: + return NewSimpleFieldType(INT16_ARRAY), nil + case reflect.Int32: + return NewSimpleFieldType(INT32_ARRAY), nil + case reflect.Int64, reflect.Int: + return NewSimpleFieldType(INT64_ARRAY), nil + case reflect.Float32: + return NewSimpleFieldType(FLOAT32_ARRAY), nil + case reflect.Float64: + return NewSimpleFieldType(FLOAT64_ARRAY), nil } - // For slices and non-primitive arrays, use collection format + // For non-primitive elements, use collection format (LIST with element type) elemValue := reflect.Zero(elemType) elementFieldType, err := buildFieldType(fory, elemValue) if err != nil { diff --git a/go/fory/type_resolver.go b/go/fory/type_resolver.go index 47286e8b70..5e2de1bb16 100644 --- a/go/fory/type_resolver.go +++ b/go/fory/type_resolver.go @@ -81,6 +81,9 @@ var ( boolType = reflect.TypeOf((*bool)(nil)).Elem() byteType = reflect.TypeOf((*byte)(nil)).Elem() uint8Type = reflect.TypeOf((*uint8)(nil)).Elem() + uint16Type = reflect.TypeOf((*uint16)(nil)).Elem() + uint32Type = reflect.TypeOf((*uint32)(nil)).Elem() + uint64Type = reflect.TypeOf((*uint64)(nil)).Elem() int8Type = reflect.TypeOf((*int8)(nil)).Elem() int16Type = reflect.TypeOf((*int16)(nil)).Elem() int32Type = reflect.TypeOf((*int32)(nil)).Elem() @@ -120,7 +123,7 @@ type TypeInfo struct { NameBytes *MetaStringBytes IsDynamic bool TypeID uint32 - StaticId StaticTypeId + DispatchId DispatchId Serializer Serializer NeedWriteDef bool NeedWriteRef bool // Whether this type needs reference tracking @@ -342,11 +345,14 @@ func (r *TypeResolver) initialize() { // Register primitive types {boolType, BOOL, boolSerializer{}}, {byteType, UINT8, byteSerializer{}}, + {uint16Type, UINT16, uint16Serializer{}}, + {uint32Type, VAR_UINT32, uint32Serializer{}}, + {uint64Type, VAR_UINT64, uint64Serializer{}}, {int8Type, INT8, int8Serializer{}}, {int16Type, INT16, int16Serializer{}}, - {int32Type, INT32, int32Serializer{}}, - {int64Type, INT64, int64Serializer{}}, - {intType, INT64, intSerializer{}}, // int maps to int64 for xlang + {int32Type, VARINT32, int32Serializer{}}, + {int64Type, VARINT64, int64Serializer{}}, + {intType, VARINT64, intSerializer{}}, // int maps to int64 for xlang {float32Type, FLOAT32, float32Serializer{}}, {float64Type, FLOAT64, float64Serializer{}}, {dateType, LOCAL_DATE, dateSerializer{}}, @@ -359,6 +365,31 @@ func (r *TypeResolver) initialize() { fmt.Errorf("init type error: %v", err) } } + + // Register additional TypeIds for types that support multiple encodings. + // This allows Go to deserialize data from Java that uses different encoding variants. + // For example, Java may send UINT32 (fixed) but Go only registered VAR_UINT32 by default. + // We need to map all encoding variants to the same Go type. + additionalTypeIds := []struct { + typeId TypeId + goType reflect.Type + }{ + // Fixed-size integer encodings (in addition to varint defaults) + {UINT32, uint32Type}, // Fixed UINT32 (11) → uint32 + {UINT64, uint64Type}, // Fixed UINT64 (13) → uint64 + {TAGGED_UINT64, uint64Type}, // Tagged UINT64 (15) → uint64 + {INT32, int32Type}, // Fixed INT32 (3) → int32 + {INT64, int64Type}, // Fixed INT64 (5) → int64 + {TAGGED_INT64, int64Type}, // Tagged INT64 (7) → int64 + } + for _, entry := range additionalTypeIds { + if _, exists := r.typeIDToTypeInfo[uint32(entry.typeId)]; !exists { + // Get the existing TypeInfo for this Go type and create a reference to it + if existingInfo, ok := r.typesInfo[entry.goType]; ok { + r.typeIDToTypeInfo[uint32(entry.typeId)] = existingInfo + } + } + } } func (r *TypeResolver) registerSerializer(type_ reflect.Type, typeId TypeId, s Serializer) error { @@ -397,7 +428,7 @@ func (r *TypeResolver) RegisterByID(type_ reflect.Type, fullTypeID uint32) error // Create struct serializer tag := type_.Name() - serializer := newStructSerializer(type_, tag, nil) + serializer := newStructSerializer(type_, tag) r.typeToSerializers[type_] = serializer r.typeToTypeInfo[type_] = "@" + tag r.typeInfoToType["@"+tag] = type_ @@ -461,7 +492,7 @@ func (r *TypeResolver) RegisterEnumByID(type_ reflect.Type, fullTypeID uint32) e TypeID: fullTypeID, Serializer: serializer, IsDynamic: isDynamicType(type_), - StaticId: GetStaticTypeId(type_), + DispatchId: GetDispatchId(type_), hashValue: calcTypeHash(type_), } r.typeIDToTypeInfo[fullTypeID] = typeInfo @@ -548,7 +579,7 @@ func (r *TypeResolver) RegisterNamedType( } else { tag = namespace + "." + typeName } - serializer := newStructSerializer(type_, tag, nil) + serializer := newStructSerializer(type_, tag) r.typeToSerializers[type_] = serializer // multiple struct with same name defined inside function will have same `type_.String()`, but they are // different types. so we use tag to encode type info. @@ -832,7 +863,7 @@ func (r *TypeResolver) getTypeInfo(value reflect.Value, create bool) (*TypeInfo, NameBytes: elemInfo.NameBytes, IsDynamic: elemInfo.IsDynamic, TypeID: elemInfo.TypeID, - StaticId: elemInfo.StaticId, + DispatchId: elemInfo.DispatchId, Serializer: ptrSerializer, NeedWriteDef: elemInfo.NeedWriteDef, hashValue: elemInfo.hashValue, @@ -1066,8 +1097,8 @@ func (r *TypeResolver) registerType( PkgPathBytes: nsBytes, // Encoded namespace bytes NameBytes: typeBytes, // Encoded type name bytes IsDynamic: isDynamicType(type_), - StaticId: GetStaticTypeId(type_), // Static type ID for fast path - hashValue: calcTypeHash(type_), // Precomputed hash for fast lookups + DispatchId: GetDispatchId(type_), // Static type ID for fast path + hashValue: calcTypeHash(type_), // Precomputed hash for fast lookups NeedWriteRef: NeedWriteRef(TypeId(typeID)), } // Update resolver caches: @@ -1759,112 +1790,112 @@ func (r *TypeResolver) ReadTypeInfo(buffer *ByteBuffer, err *Error) *TypeInfo { Type: interfaceSliceType, TypeID: typeID, Serializer: r.typeToSerializers[interfaceSliceType], - StaticId: ConcreteTypeOther, + DispatchId: UnknownDispatchId, } case SET, -SET: return &TypeInfo{ Type: genericSetType, TypeID: typeID, Serializer: r.typeToSerializers[genericSetType], - StaticId: ConcreteTypeOther, + DispatchId: UnknownDispatchId, } case MAP, -MAP: return &TypeInfo{ Type: interfaceMapType, TypeID: typeID, Serializer: r.typeToSerializers[interfaceMapType], - StaticId: ConcreteTypeOther, + DispatchId: UnknownDispatchId, } case BOOL: return &TypeInfo{ Type: reflect.TypeOf(false), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(false)], - StaticId: ConcreteTypeBool, + DispatchId: PrimitiveBoolDispatchId, } case INT8: return &TypeInfo{ Type: reflect.TypeOf(int8(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(int8(0))], - StaticId: ConcreteTypeInt8, + DispatchId: PrimitiveInt8DispatchId, } case UINT8: return &TypeInfo{ Type: reflect.TypeOf(uint8(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(uint8(0))], - StaticId: ConcreteTypeInt8, // Use Int8 static ID for uint8 + DispatchId: PrimitiveInt8DispatchId, // Use Int8 static ID for uint8 } case INT16: return &TypeInfo{ Type: reflect.TypeOf(int16(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(int16(0))], - StaticId: ConcreteTypeInt16, + DispatchId: PrimitiveInt16DispatchId, } case UINT16: return &TypeInfo{ Type: reflect.TypeOf(uint16(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(uint16(0))], - StaticId: ConcreteTypeInt16, // Use Int16 static ID for uint16 + DispatchId: PrimitiveInt16DispatchId, // Use Int16 static ID for uint16 } - case INT32, VAR32: + case INT32, VARINT32: return &TypeInfo{ Type: reflect.TypeOf(int32(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(int32(0))], - StaticId: ConcreteTypeInt32, + DispatchId: PrimitiveInt32DispatchId, } case UINT32: return &TypeInfo{ Type: reflect.TypeOf(uint32(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(uint32(0))], - StaticId: ConcreteTypeInt32, // Use Int32 static ID for uint32 + DispatchId: PrimitiveInt32DispatchId, // Use Int32 static ID for uint32 } - case INT64, VAR64, H64: + case INT64, VARINT64, TAGGED_INT64: return &TypeInfo{ Type: reflect.TypeOf(int64(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(int64(0))], - StaticId: ConcreteTypeInt64, + DispatchId: PrimitiveInt64DispatchId, } case UINT64: return &TypeInfo{ Type: reflect.TypeOf(uint64(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(uint64(0))], - StaticId: ConcreteTypeInt64, // Use Int64 static ID for uint64 + DispatchId: PrimitiveInt64DispatchId, // Use Int64 static ID for uint64 } case FLOAT32: return &TypeInfo{ Type: reflect.TypeOf(float32(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(float32(0))], - StaticId: ConcreteTypeFloat32, + DispatchId: PrimitiveFloat32DispatchId, } case FLOAT64: return &TypeInfo{ Type: reflect.TypeOf(float64(0)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf(float64(0))], - StaticId: ConcreteTypeFloat64, + DispatchId: PrimitiveFloat64DispatchId, } case STRING: return &TypeInfo{ Type: reflect.TypeOf(""), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf("")], - StaticId: ConcreteTypeString, + DispatchId: StringDispatchId, } case BINARY: return &TypeInfo{ Type: reflect.TypeOf([]byte(nil)), TypeID: typeID, Serializer: r.typeToSerializers[reflect.TypeOf([]byte(nil))], - StaticId: ConcreteTypeOther, + DispatchId: UnknownDispatchId, } } @@ -1930,49 +1961,49 @@ func (r *TypeResolver) readTypeInfoWithTypeID(buffer *ByteBuffer, typeID uint32, Type: interfaceSliceType, TypeID: typeID, Serializer: r.typeToSerializers[interfaceSliceType], - StaticId: ConcreteTypeOther, + DispatchId: UnknownDispatchId, } case SET: return &TypeInfo{ Type: genericSetType, TypeID: typeID, Serializer: r.typeToSerializers[genericSetType], - StaticId: ConcreteTypeOther, + DispatchId: UnknownDispatchId, } case MAP: return &TypeInfo{ Type: interfaceMapType, TypeID: typeID, Serializer: r.typeToSerializers[interfaceMapType], - StaticId: ConcreteTypeOther, + DispatchId: UnknownDispatchId, } // Handle primitive types that may not be explicitly registered case BOOL: - return &TypeInfo{Type: boolType, TypeID: typeID, Serializer: r.typeToSerializers[boolType], StaticId: ConcreteTypeBool} + return &TypeInfo{Type: boolType, TypeID: typeID, Serializer: r.typeToSerializers[boolType], DispatchId: PrimitiveBoolDispatchId} case INT8: - return &TypeInfo{Type: int8Type, TypeID: typeID, Serializer: r.typeToSerializers[int8Type], StaticId: ConcreteTypeInt8} + return &TypeInfo{Type: int8Type, TypeID: typeID, Serializer: r.typeToSerializers[int8Type], DispatchId: PrimitiveInt8DispatchId} case INT16: - return &TypeInfo{Type: int16Type, TypeID: typeID, Serializer: r.typeToSerializers[int16Type], StaticId: ConcreteTypeInt16} - case INT32, VAR32: - return &TypeInfo{Type: int32Type, TypeID: typeID, Serializer: r.typeToSerializers[int32Type], StaticId: ConcreteTypeInt32} - case INT64, VAR64, H64: - return &TypeInfo{Type: int64Type, TypeID: typeID, Serializer: r.typeToSerializers[int64Type], StaticId: ConcreteTypeInt64} + return &TypeInfo{Type: int16Type, TypeID: typeID, Serializer: r.typeToSerializers[int16Type], DispatchId: PrimitiveInt16DispatchId} + case INT32, VARINT32: + return &TypeInfo{Type: int32Type, TypeID: typeID, Serializer: r.typeToSerializers[int32Type], DispatchId: PrimitiveInt32DispatchId} + case INT64, VARINT64, TAGGED_INT64: + return &TypeInfo{Type: int64Type, TypeID: typeID, Serializer: r.typeToSerializers[int64Type], DispatchId: PrimitiveInt64DispatchId} case FLOAT32: - return &TypeInfo{Type: float32Type, TypeID: typeID, Serializer: r.typeToSerializers[float32Type], StaticId: ConcreteTypeFloat32} + return &TypeInfo{Type: float32Type, TypeID: typeID, Serializer: r.typeToSerializers[float32Type], DispatchId: PrimitiveFloat32DispatchId} case FLOAT64: - return &TypeInfo{Type: float64Type, TypeID: typeID, Serializer: r.typeToSerializers[float64Type], StaticId: ConcreteTypeFloat64} + return &TypeInfo{Type: float64Type, TypeID: typeID, Serializer: r.typeToSerializers[float64Type], DispatchId: PrimitiveFloat64DispatchId} case STRING: - return &TypeInfo{Type: stringType, TypeID: typeID, Serializer: r.typeToSerializers[stringType], StaticId: ConcreteTypeString} + return &TypeInfo{Type: stringType, TypeID: typeID, Serializer: r.typeToSerializers[stringType], DispatchId: StringDispatchId} case BINARY: - return &TypeInfo{Type: byteSliceType, TypeID: typeID, Serializer: r.typeToSerializers[byteSliceType], StaticId: ConcreteTypeByteSlice} + return &TypeInfo{Type: byteSliceType, TypeID: typeID, Serializer: r.typeToSerializers[byteSliceType], DispatchId: ByteSliceDispatchId} } // Handle UNKNOWN type (0) - used for polymorphic types if typeID == 0 { return &TypeInfo{ - Type: interfaceType, - TypeID: typeID, - StaticId: ConcreteTypeOther, + Type: interfaceType, + TypeID: typeID, + DispatchId: UnknownDispatchId, } } diff --git a/go/fory/types.go b/go/fory/types.go index 8a7b09cadd..a8edfc6b94 100644 --- a/go/fory/types.go +++ b/go/fory/types.go @@ -32,28 +32,28 @@ const ( INT16 = 3 // INT32 Signed 32-bit little-endian integer INT32 = 4 - // VAR32 a 32-bit signed integer which uses fory var_int32 encoding - VAR32 = 5 + // VARINT32 a 32-bit signed integer which uses fory var_int32 encoding + VARINT32 = 5 // INT64 Signed 64-bit little-endian integer INT64 = 6 - // VAR64 a 64-bit signed integer which uses fory PVL encoding - VAR64 = 7 - // H64 a 64-bit signed integer which uses fory hybrid encoding - H64 = 8 + // VARINT64 a 64-bit signed integer which uses fory PVL encoding + VARINT64 = 7 + // TAGGED_INT64 a 64-bit signed integer which uses fory hybrid encoding + TAGGED_INT64 = 8 // UINT8 Unsigned 8-bit little-endian integer UINT8 = 9 // UINT16 Unsigned 16-bit little-endian integer UINT16 = 10 // UINT32 Unsigned 32-bit little-endian integer UINT32 = 11 - // VARU32 a 32-bit unsigned integer which uses fory var_uint32 encoding - VARU32 = 12 + // VAR_UINT32 a 32-bit unsigned integer which uses fory var_uint32 encoding + VAR_UINT32 = 12 // UINT64 Unsigned 64-bit little-endian integer UINT64 = 13 - // VARU64 a 64-bit unsigned integer which uses fory var_uint64 encoding - VARU64 = 14 - // HU64 a 64-bit unsigned integer which uses fory hybrid encoding - HU64 = 15 + // VAR_UINT64 a 64-bit unsigned integer which uses fory var_uint64 encoding + VAR_UINT64 = 14 + // TAGGED_UINT64 a 64-bit unsigned integer which uses fory hybrid encoding + TAGGED_UINT64 = 15 // FLOAT16 2-byte floating point value FLOAT16 = 16 // FLOAT32 4-byte floating point value @@ -154,7 +154,18 @@ func isPrimitiveType(typeID int16) bool { INT8, INT16, INT32, + VARINT32, INT64, + VARINT64, + TAGGED_INT64, + UINT8, + UINT16, + UINT32, + VAR_UINT32, + UINT64, + VAR_UINT64, + TAGGED_UINT64, + FLOAT16, FLOAT32, FLOAT64: return true @@ -168,7 +179,7 @@ func isPrimitiveType(typeID int16) bool { // Collections, structs, and other complex types need reference tracking. func NeedWriteRef(typeID TypeId) bool { switch typeID { - case BOOL, INT8, INT16, INT32, INT64, VAR32, VAR64, H64, + case BOOL, INT8, INT16, INT32, INT64, VARINT32, VARINT64, TAGGED_INT64, FLOAT32, FLOAT64, FLOAT16, STRING, TIMESTAMP, LOCAL_DATE, DURATION: return false @@ -209,17 +220,35 @@ func isPrimitiveArrayType(typeID int16) bool { } var primitiveTypeSizes = map[int16]int{ - BOOL: 1, - INT8: 1, - INT16: 2, - INT32: 4, - VAR32: 4, - INT64: 8, - VAR64: 8, - FLOAT32: 4, - FLOAT64: 8, + BOOL: 1, + INT8: 1, + UINT8: 1, + INT16: 2, + UINT16: 2, + FLOAT16: 2, + INT32: 4, + VARINT32: 4, + UINT32: 4, + VAR_UINT32: 4, + FLOAT32: 4, + INT64: 8, + VARINT64: 8, + TAGGED_INT64: 8, + UINT64: 8, + VAR_UINT64: 8, + TAGGED_UINT64: 8, + FLOAT64: 8, } +// MaxInt31 is the maximum value that fits in 31 bits (used for TAGGED_UINT64 encoding) +const MaxInt31 uint64 = 0x7FFFFFFF // 2^31 - 1 + +// MinInt31 is the minimum value that fits in 31 bits (used for TAGGED_INT64 encoding) +const MinInt31 int64 = -0x40000000 // -2^30 + +// MaxInt31Signed is MaxInt31 as a signed int64 for TAGGED_INT64 encoding +const MaxInt31Signed int64 = 0x3FFFFFFF // 2^30 - 1 + func getPrimitiveTypeSize(typeID int16) int { if sz, ok := primitiveTypeSizes[typeID]; ok { return sz @@ -240,188 +269,309 @@ func isUserDefinedType(typeID int16) bool { } // ============================================================================ -// StaticTypeId for switch-based fast path (avoids interface virtual method cost) +// DispatchId for switch-based fast path (avoids interface virtual method cost) // ============================================================================ -// StaticTypeId identifies concrete Go types for optimized serialization dispatch -type StaticTypeId uint8 +// DispatchId identifies concrete Go types for optimized serialization dispatch. +// Following Java's pattern with separate IDs for primitive (non-nullable) and boxed (nullable) types. +type DispatchId uint8 const ( - ConcreteTypeOther StaticTypeId = iota - ConcreteTypeBool - ConcreteTypeInt8 - ConcreteTypeInt16 - ConcreteTypeInt32 - ConcreteTypeInt64 - ConcreteTypeInt - ConcreteTypeFloat32 - ConcreteTypeFloat64 - ConcreteTypeString - ConcreteTypeByteSlice - ConcreteTypeInt8Slice - ConcreteTypeInt16Slice - ConcreteTypeInt32Slice - ConcreteTypeInt64Slice - ConcreteTypeIntSlice - ConcreteTypeUintSlice - ConcreteTypeFloat32Slice - ConcreteTypeFloat64Slice - ConcreteTypeBoolSlice - ConcreteTypeStringSlice - ConcreteTypeStringStringMap - ConcreteTypeStringInt32Map - ConcreteTypeStringInt64Map - ConcreteTypeStringIntMap - ConcreteTypeStringFloat64Map - ConcreteTypeStringBoolMap - ConcreteTypeInt32Int32Map - ConcreteTypeInt64Int64Map - ConcreteTypeIntIntMap - ConcreteTypeEnum // Enum types (both ENUM and NAMED_ENUM) + UnknownDispatchId DispatchId = iota + + // Primitive (non-nullable) dispatch IDs - match Java's PRIMITIVE_* constants + PrimitiveBoolDispatchId + PrimitiveInt8DispatchId + PrimitiveInt16DispatchId + PrimitiveInt32DispatchId + PrimitiveVarint32DispatchId + PrimitiveInt64DispatchId + PrimitiveVarint64DispatchId + PrimitiveTaggedInt64DispatchId + PrimitiveFloat32DispatchId + PrimitiveFloat64DispatchId + PrimitiveUint8DispatchId + PrimitiveUint16DispatchId + PrimitiveUint32DispatchId + PrimitiveVarUint32DispatchId + PrimitiveUint64DispatchId + PrimitiveVarUint64DispatchId + PrimitiveTaggedUint64DispatchId + PrimitiveIntDispatchId // Go-specific: native int + PrimitiveUintDispatchId // Go-specific: native uint + + // Nullable dispatch IDs - match Java's non-PRIMITIVE_* constants + NullableBoolDispatchId + NullableInt8DispatchId + NullableInt16DispatchId + NullableInt32DispatchId + NullableVarint32DispatchId + NullableInt64DispatchId + NullableVarint64DispatchId + NullableTaggedInt64DispatchId + NullableFloat32DispatchId + NullableFloat64DispatchId + NullableUint8DispatchId + NullableUint16DispatchId + NullableUint32DispatchId + NullableVarUint32DispatchId + NullableUint64DispatchId + NullableVarUint64DispatchId + NullableTaggedUint64DispatchId + NullableIntDispatchId // Go-specific: *int + NullableUintDispatchId // Go-specific: *uint + + // Notnull pointer dispatch IDs - pointer types with nullable=false + // Write without null flag; on read, create default value if remote sends null + NotnullBoolPtrDispatchId + NotnullInt8PtrDispatchId + NotnullInt16PtrDispatchId + NotnullInt32PtrDispatchId + NotnullVarint32PtrDispatchId + NotnullInt64PtrDispatchId + NotnullVarint64PtrDispatchId + NotnullTaggedInt64PtrDispatchId + NotnullFloat32PtrDispatchId + NotnullFloat64PtrDispatchId + NotnullUint8PtrDispatchId + NotnullUint16PtrDispatchId + NotnullUint32PtrDispatchId + NotnullVarUint32PtrDispatchId + NotnullUint64PtrDispatchId + NotnullVarUint64PtrDispatchId + NotnullTaggedUint64PtrDispatchId + NotnullIntPtrDispatchId + NotnullUintPtrDispatchId + + // String dispatch ID + StringDispatchId + + // Slice dispatch IDs + ByteSliceDispatchId + Int8SliceDispatchId + Int16SliceDispatchId + Int32SliceDispatchId + Int64SliceDispatchId + IntSliceDispatchId + UintSliceDispatchId + Float32SliceDispatchId + Float64SliceDispatchId + BoolSliceDispatchId + StringSliceDispatchId + + // Map dispatch IDs + StringStringMapDispatchId + StringInt32MapDispatchId + StringInt64MapDispatchId + StringIntMapDispatchId + StringFloat64MapDispatchId + StringBoolMapDispatchId + Int32Int32MapDispatchId + Int64Int64MapDispatchId + IntIntMapDispatchId + + // Enum dispatch ID + EnumDispatchId // Enum types (both ENUM and NAMED_ENUM) ) -// GetStaticTypeId returns the StaticTypeId for a reflect.Type -func GetStaticTypeId(t reflect.Type) StaticTypeId { +// GetDispatchId returns the DispatchId for a reflect.Type. +// For int32/int64/uint32/uint64, returns varint dispatch IDs by default since that's +// the default encoding in xlang serialization (VARINT32, VARINT64, VAR_UINT32, VAR_UINT64). +func GetDispatchId(t reflect.Type) DispatchId { switch t.Kind() { case reflect.Bool: - return ConcreteTypeBool + return PrimitiveBoolDispatchId case reflect.Int8: - return ConcreteTypeInt8 + return PrimitiveInt8DispatchId case reflect.Int16: - return ConcreteTypeInt16 + return PrimitiveInt16DispatchId case reflect.Int32: - return ConcreteTypeInt32 + // Default to varint encoding (VARINT32) for xlang compatibility + return PrimitiveVarint32DispatchId case reflect.Int64: - return ConcreteTypeInt64 + // Default to varint encoding (VARINT64) for xlang compatibility + return PrimitiveVarint64DispatchId case reflect.Int: - return ConcreteTypeInt + return PrimitiveIntDispatchId + case reflect.Uint8: + return PrimitiveUint8DispatchId + case reflect.Uint16: + return PrimitiveUint16DispatchId + case reflect.Uint32: + // Default to varint encoding (VAR_UINT32) for xlang compatibility + return PrimitiveVarUint32DispatchId + case reflect.Uint64: + // Default to varint encoding (VAR_UINT64) for xlang compatibility + return PrimitiveVarUint64DispatchId + case reflect.Uint: + return PrimitiveUintDispatchId case reflect.Float32: - return ConcreteTypeFloat32 + return PrimitiveFloat32DispatchId case reflect.Float64: - return ConcreteTypeFloat64 + return PrimitiveFloat64DispatchId case reflect.String: - return ConcreteTypeString + return StringDispatchId case reflect.Slice: // Check for specific slice types switch t.Elem().Kind() { case reflect.Uint8: - return ConcreteTypeByteSlice + return ByteSliceDispatchId case reflect.Int8: - return ConcreteTypeInt8Slice + return Int8SliceDispatchId case reflect.Int16: - return ConcreteTypeInt16Slice + return Int16SliceDispatchId case reflect.Int32: - return ConcreteTypeInt32Slice + return Int32SliceDispatchId case reflect.Int64: - return ConcreteTypeInt64Slice + return Int64SliceDispatchId case reflect.Int: - return ConcreteTypeIntSlice + return IntSliceDispatchId case reflect.Uint: - return ConcreteTypeUintSlice + return UintSliceDispatchId case reflect.Float32: - return ConcreteTypeFloat32Slice + return Float32SliceDispatchId case reflect.Float64: - return ConcreteTypeFloat64Slice + return Float64SliceDispatchId case reflect.Bool: - return ConcreteTypeBoolSlice + return BoolSliceDispatchId case reflect.String: - return ConcreteTypeStringSlice + return StringSliceDispatchId } - return ConcreteTypeOther + return UnknownDispatchId case reflect.Map: // Check for specific common map types if t.Key().Kind() == reflect.String { switch t.Elem().Kind() { case reflect.String: - return ConcreteTypeStringStringMap + return StringStringMapDispatchId case reflect.Int64: - return ConcreteTypeStringInt64Map + return StringInt64MapDispatchId case reflect.Int: - return ConcreteTypeStringIntMap + return StringIntMapDispatchId case reflect.Float64: - return ConcreteTypeStringFloat64Map + return StringFloat64MapDispatchId case reflect.Bool: - return ConcreteTypeStringBoolMap + return StringBoolMapDispatchId } } else if t.Key().Kind() == reflect.Int32 && t.Elem().Kind() == reflect.Int32 { - return ConcreteTypeInt32Int32Map + return Int32Int32MapDispatchId } else if t.Key().Kind() == reflect.Int64 && t.Elem().Kind() == reflect.Int64 { - return ConcreteTypeInt64Int64Map + return Int64Int64MapDispatchId } else if t.Key().Kind() == reflect.Int && t.Elem().Kind() == reflect.Int { - return ConcreteTypeIntIntMap + return IntIntMapDispatchId } - return ConcreteTypeOther + return UnknownDispatchId default: - return ConcreteTypeOther + return UnknownDispatchId } } -// GetConcreteTypeIdAndTypeId returns both StaticTypeId and TypeId for a reflect.Type -func GetConcreteTypeIdAndTypeId(t reflect.Type) (StaticTypeId, TypeId) { - switch t.Kind() { - case reflect.Bool: - return ConcreteTypeBool, BOOL - case reflect.Int8: - return ConcreteTypeInt8, INT8 - case reflect.Int16: - return ConcreteTypeInt16, INT16 - case reflect.Int32: - return ConcreteTypeInt32, INT32 - case reflect.Int64: - return ConcreteTypeInt64, INT64 - case reflect.Float32: - return ConcreteTypeFloat32, FLOAT32 - case reflect.Float64: - return ConcreteTypeFloat64, FLOAT64 - case reflect.String: - return ConcreteTypeString, STRING +// IsPrimitiveTypeId checks if a type ID is a primitive type +func IsPrimitiveTypeId(typeId TypeId) bool { + switch typeId { + case BOOL, INT8, INT16, INT32, VARINT32, INT64, VARINT64, TAGGED_INT64, + UINT8, UINT16, UINT32, VAR_UINT32, UINT64, VAR_UINT64, TAGGED_UINT64, + FLOAT16, FLOAT32, FLOAT64, STRING: + return true default: - return ConcreteTypeOther, 0 + return false } } -// IsPrimitiveTypeId checks if a type ID is a primitive type -func IsPrimitiveTypeId(typeId TypeId) bool { - switch typeId { - case BOOL, INT8, INT16, INT32, INT64, FLOAT32, FLOAT64, STRING: +// isFixedSizePrimitive returns true for fixed-size primitives and notnull pointer types. +// Includes INT32/UINT32/INT64/UINT64 (fixed encoding), NOT VARINT32/VAR_UINT32 etc. +func isFixedSizePrimitive(dispatchId DispatchId, referencable bool) bool { + switch dispatchId { + case PrimitiveBoolDispatchId, PrimitiveInt8DispatchId, PrimitiveUint8DispatchId, + PrimitiveInt16DispatchId, PrimitiveUint16DispatchId, + PrimitiveInt32DispatchId, PrimitiveUint32DispatchId, + PrimitiveInt64DispatchId, PrimitiveUint64DispatchId, + PrimitiveFloat32DispatchId, PrimitiveFloat64DispatchId: + return !referencable + case NotnullBoolPtrDispatchId, NotnullInt8PtrDispatchId, NotnullUint8PtrDispatchId, + NotnullInt16PtrDispatchId, NotnullUint16PtrDispatchId, + NotnullInt32PtrDispatchId, NotnullUint32PtrDispatchId, + NotnullInt64PtrDispatchId, NotnullUint64PtrDispatchId, + NotnullFloat32PtrDispatchId, NotnullFloat64PtrDispatchId: return true default: return false } } -// isFixedSizePrimitive returns true for non-nullable fixed-size primitives -func isFixedSizePrimitive(staticId StaticTypeId, referencable bool) bool { - if referencable { +// isNullableFixedSizePrimitive returns true for nullable fixed-size primitive dispatch IDs. +// These are pointer types that use fixed encoding and have a ref flag. +func isNullableFixedSizePrimitive(dispatchId DispatchId) bool { + switch dispatchId { + case NullableBoolDispatchId, NullableInt8DispatchId, NullableUint8DispatchId, + NullableInt16DispatchId, NullableUint16DispatchId, + NullableInt32DispatchId, NullableUint32DispatchId, + NullableInt64DispatchId, NullableUint64DispatchId, + NullableFloat32DispatchId, NullableFloat64DispatchId: + return true + default: return false } - switch staticId { - case ConcreteTypeBool, ConcreteTypeInt8, ConcreteTypeInt16, - ConcreteTypeFloat32, ConcreteTypeFloat64: +} + +// isNullableVarintPrimitive returns true for nullable varint primitive dispatch IDs. +// These are pointer types that use varint encoding and have a ref flag. +func isNullableVarintPrimitive(dispatchId DispatchId) bool { + switch dispatchId { + case NullableVarint32DispatchId, NullableVarint64DispatchId, + NullableVarUint32DispatchId, NullableVarUint64DispatchId, + NullableTaggedInt64DispatchId, NullableTaggedUint64DispatchId, + NullableIntDispatchId, NullableUintDispatchId: return true default: return false } } -// isVarintPrimitive returns true for non-nullable varint primitives -func isVarintPrimitive(staticId StaticTypeId, referencable bool) bool { - if referencable { +// isVarintPrimitive returns true for varint primitives and notnull pointer types. +// Includes VARINT32/VAR_UINT32/VARINT64/VAR_UINT64 (variable encoding), NOT INT32/UINT32 etc. +func isVarintPrimitive(dispatchId DispatchId, referencable bool) bool { + switch dispatchId { + case PrimitiveVarint32DispatchId, PrimitiveVarint64DispatchId, + PrimitiveVarUint32DispatchId, PrimitiveVarUint64DispatchId, + PrimitiveTaggedInt64DispatchId, PrimitiveTaggedUint64DispatchId, + PrimitiveIntDispatchId, PrimitiveUintDispatchId: + return !referencable + case NotnullVarint32PtrDispatchId, NotnullVarint64PtrDispatchId, + NotnullVarUint32PtrDispatchId, NotnullVarUint64PtrDispatchId, + NotnullTaggedInt64PtrDispatchId, NotnullTaggedUint64PtrDispatchId, + NotnullIntPtrDispatchId, NotnullUintPtrDispatchId: + return true + default: return false } - switch staticId { - case ConcreteTypeInt32, ConcreteTypeInt64, ConcreteTypeInt: +} + +// isPrimitiveDispatchId returns true if the dispatchId represents a primitive type +func isPrimitiveDispatchId(dispatchId DispatchId) bool { + switch dispatchId { + case PrimitiveBoolDispatchId, PrimitiveInt8DispatchId, PrimitiveInt16DispatchId, PrimitiveInt32DispatchId, + PrimitiveInt64DispatchId, PrimitiveIntDispatchId, PrimitiveUint8DispatchId, PrimitiveUint16DispatchId, + PrimitiveUint32DispatchId, PrimitiveUint64DispatchId, PrimitiveUintDispatchId, + PrimitiveFloat32DispatchId, PrimitiveFloat64DispatchId: return true default: return false } } -// isPrimitiveStaticId returns true if the staticId represents a primitive type -func isPrimitiveStaticId(staticId StaticTypeId) bool { - switch staticId { - case ConcreteTypeBool, ConcreteTypeInt8, ConcreteTypeInt16, ConcreteTypeInt32, - ConcreteTypeInt64, ConcreteTypeInt, ConcreteTypeFloat32, ConcreteTypeFloat64: +// isNotnullPtrDispatchId returns true if the dispatchId represents a notnull pointer type +func isNotnullPtrDispatchId(dispatchId DispatchId) bool { + switch dispatchId { + case NotnullBoolPtrDispatchId, NotnullInt8PtrDispatchId, NotnullUint8PtrDispatchId, + NotnullInt16PtrDispatchId, NotnullUint16PtrDispatchId, + NotnullInt32PtrDispatchId, NotnullUint32PtrDispatchId, + NotnullInt64PtrDispatchId, NotnullUint64PtrDispatchId, + NotnullFloat32PtrDispatchId, NotnullFloat64PtrDispatchId, + NotnullVarint32PtrDispatchId, NotnullVarint64PtrDispatchId, + NotnullVarUint32PtrDispatchId, NotnullVarUint64PtrDispatchId, + NotnullTaggedInt64PtrDispatchId, NotnullTaggedUint64PtrDispatchId, + NotnullIntPtrDispatchId, NotnullUintPtrDispatchId: return true default: return false @@ -439,29 +589,265 @@ func isNumericKind(kind reflect.Kind) bool { } } -// getFixedSizeByStaticId returns byte size for fixed primitives (0 if not fixed) -func getFixedSizeByStaticId(staticId StaticTypeId) int { - switch staticId { - case ConcreteTypeBool, ConcreteTypeInt8: +// getDispatchIdFromTypeId converts a TypeId to a DispatchId based on nullability. +// This follows Java's DispatchId.xlangTypeIdToDispatchId pattern. +func getDispatchIdFromTypeId(typeId TypeId, nullable bool) DispatchId { + if nullable { + // Nullable (nullable) types + switch typeId { + case BOOL: + return NullableBoolDispatchId + case INT8: + return NullableInt8DispatchId + case INT16: + return NullableInt16DispatchId + case INT32: + return NullableInt32DispatchId + case VARINT32: + return NullableVarint32DispatchId + case INT64: + return NullableInt64DispatchId + case VARINT64: + return NullableVarint64DispatchId + case TAGGED_INT64: + return NullableTaggedInt64DispatchId + case FLOAT32: + return NullableFloat32DispatchId + case FLOAT64: + return NullableFloat64DispatchId + case UINT8: + return NullableUint8DispatchId + case UINT16: + return NullableUint16DispatchId + case UINT32: + return NullableUint32DispatchId + case VAR_UINT32: + return NullableVarUint32DispatchId + case UINT64: + return NullableUint64DispatchId + case VAR_UINT64: + return NullableVarUint64DispatchId + case TAGGED_UINT64: + return NullableTaggedUint64DispatchId + case STRING: + return StringDispatchId + default: + return UnknownDispatchId + } + } else { + // Primitive (non-nullable) types + switch typeId { + case BOOL: + return PrimitiveBoolDispatchId + case INT8: + return PrimitiveInt8DispatchId + case INT16: + return PrimitiveInt16DispatchId + case INT32: + return PrimitiveInt32DispatchId + case VARINT32: + return PrimitiveVarint32DispatchId + case INT64: + return PrimitiveInt64DispatchId + case VARINT64: + return PrimitiveVarint64DispatchId + case TAGGED_INT64: + return PrimitiveTaggedInt64DispatchId + case FLOAT32: + return PrimitiveFloat32DispatchId + case FLOAT64: + return PrimitiveFloat64DispatchId + case UINT8: + return PrimitiveUint8DispatchId + case UINT16: + return PrimitiveUint16DispatchId + case UINT32: + return PrimitiveUint32DispatchId + case VAR_UINT32: + return PrimitiveVarUint32DispatchId + case UINT64: + return PrimitiveUint64DispatchId + case VAR_UINT64: + return PrimitiveVarUint64DispatchId + case TAGGED_UINT64: + return PrimitiveTaggedUint64DispatchId + case STRING: + return StringDispatchId + default: + return UnknownDispatchId + } + } +} + +// IsPrimitiveDispatchId returns true if the dispatch ID is for a primitive (non-nullable) type +func IsPrimitiveDispatchId(id DispatchId) bool { + return id >= PrimitiveBoolDispatchId && id <= PrimitiveUintDispatchId +} + +// IsNullablePrimitiveDispatchId returns true if the dispatch ID is for a nullable primitive type +func IsNullablePrimitiveDispatchId(id DispatchId) bool { + return id >= NullableBoolDispatchId && id <= NullableUintDispatchId +} + +// getFixedSizeByDispatchId returns byte size for fixed primitives (0 if not fixed) +func getFixedSizeByDispatchId(dispatchId DispatchId) int { + switch dispatchId { + case PrimitiveBoolDispatchId, PrimitiveInt8DispatchId, PrimitiveUint8DispatchId, + NotnullBoolPtrDispatchId, NotnullInt8PtrDispatchId, NotnullUint8PtrDispatchId: + return 1 + case PrimitiveInt16DispatchId, PrimitiveUint16DispatchId, + NotnullInt16PtrDispatchId, NotnullUint16PtrDispatchId: + return 2 + case PrimitiveInt32DispatchId, PrimitiveUint32DispatchId, PrimitiveFloat32DispatchId, + NotnullInt32PtrDispatchId, NotnullUint32PtrDispatchId, NotnullFloat32PtrDispatchId: + return 4 + case PrimitiveInt64DispatchId, PrimitiveUint64DispatchId, PrimitiveFloat64DispatchId, + NotnullInt64PtrDispatchId, NotnullUint64PtrDispatchId, NotnullFloat64PtrDispatchId: + return 8 + default: + return 0 + } +} + +// getVarintMaxSizeByDispatchId returns max byte size for varint primitives (0 if not varint) +func getVarintMaxSizeByDispatchId(dispatchId DispatchId) int { + switch dispatchId { + case PrimitiveVarint32DispatchId, PrimitiveVarUint32DispatchId, + NotnullVarint32PtrDispatchId, NotnullVarUint32PtrDispatchId: + return 5 + case PrimitiveVarint64DispatchId, PrimitiveVarUint64DispatchId, PrimitiveIntDispatchId, PrimitiveUintDispatchId, + NotnullVarint64PtrDispatchId, NotnullVarUint64PtrDispatchId, NotnullIntPtrDispatchId, NotnullUintPtrDispatchId: + return 10 + case PrimitiveTaggedInt64DispatchId, PrimitiveTaggedUint64DispatchId, + NotnullTaggedInt64PtrDispatchId, NotnullTaggedUint64PtrDispatchId: + return 9 + default: + return 0 + } +} + +// getEncodingFromTypeId returns the encoding string ("fixed", "varint", "tagged") from a TypeId. +func getEncodingFromTypeId(typeId TypeId) string { + internalId := typeId & 0xFF + switch TypeId(internalId) { + case INT32, INT64, UINT32, UINT64: + return "fixed" + case VARINT32, VARINT64, VAR_UINT32, VAR_UINT64: + return "varint" + case TAGGED_INT64, TAGGED_UINT64: + return "tagged" + default: + return "varint" // default encoding + } +} + +// getNotnullPtrDispatchId returns the NotnullXxxPtrDispatchId for a pointer-to-numeric type. +// elemKind is the kind of the element type (e.g., reflect.Uint8 for *uint8). +// encoding specifies the encoding type (fixed, varint, tagged) for int32/int64/uint32/uint64. +func getNotnullPtrDispatchId(elemKind reflect.Kind, encoding string) DispatchId { + switch elemKind { + case reflect.Bool: + return NotnullBoolPtrDispatchId + case reflect.Int8: + return NotnullInt8PtrDispatchId + case reflect.Int16: + return NotnullInt16PtrDispatchId + case reflect.Int32: + if encoding == "fixed" { + return NotnullInt32PtrDispatchId + } + return NotnullVarint32PtrDispatchId + case reflect.Int64: + if encoding == "fixed" { + return NotnullInt64PtrDispatchId + } else if encoding == "tagged" { + return NotnullTaggedInt64PtrDispatchId + } + return NotnullVarint64PtrDispatchId + case reflect.Int: + return NotnullIntPtrDispatchId + case reflect.Uint8: + return NotnullUint8PtrDispatchId + case reflect.Uint16: + return NotnullUint16PtrDispatchId + case reflect.Uint32: + if encoding == "fixed" { + return NotnullUint32PtrDispatchId + } + return NotnullVarUint32PtrDispatchId + case reflect.Uint64: + if encoding == "fixed" { + return NotnullUint64PtrDispatchId + } else if encoding == "tagged" { + return NotnullTaggedUint64PtrDispatchId + } + return NotnullVarUint64PtrDispatchId + case reflect.Uint: + return NotnullUintPtrDispatchId + case reflect.Float32: + return NotnullFloat32PtrDispatchId + case reflect.Float64: + return NotnullFloat64PtrDispatchId + default: + return UnknownDispatchId + } +} + +// isPrimitiveFixedDispatchId returns true if the dispatch ID is for a non-nullable fixed-size primitive. +// Note: int32/int64/uint32/uint64 are NOT included here because they default to varint encoding. +// Only types that are always fixed-size are included (bool, int8/uint8, int16/uint16, float32/float64). +// Fixed int32/int64/uint32/uint64 encodings (INT32, INT64, UINT32, UINT64) use their specific dispatch IDs. +func isPrimitiveFixedDispatchId(id DispatchId) bool { + switch id { + case PrimitiveBoolDispatchId, PrimitiveInt8DispatchId, PrimitiveUint8DispatchId, + PrimitiveInt16DispatchId, PrimitiveUint16DispatchId, + // Fixed-size int32/int64/uint32/uint64 - only when explicitly specified via TypeId + PrimitiveInt32DispatchId, PrimitiveUint32DispatchId, + PrimitiveInt64DispatchId, PrimitiveUint64DispatchId, + PrimitiveFloat32DispatchId, PrimitiveFloat64DispatchId: + return true + default: + return false + } +} + +// getFixedSizeByPrimitiveDispatchId returns byte size for fixed primitives based on dispatch ID +func getFixedSizeByPrimitiveDispatchId(id DispatchId) int { + switch id { + case PrimitiveBoolDispatchId, PrimitiveInt8DispatchId, PrimitiveUint8DispatchId: return 1 - case ConcreteTypeInt16: + case PrimitiveInt16DispatchId, PrimitiveUint16DispatchId: return 2 - case ConcreteTypeFloat32: + case PrimitiveInt32DispatchId, PrimitiveUint32DispatchId, PrimitiveFloat32DispatchId: return 4 - case ConcreteTypeFloat64: + case PrimitiveInt64DispatchId, PrimitiveUint64DispatchId, PrimitiveFloat64DispatchId: return 8 default: return 0 } } -// getVarintMaxSizeByStaticId returns max byte size for varint primitives (0 if not varint) -func getVarintMaxSizeByStaticId(staticId StaticTypeId) int { - switch staticId { - case ConcreteTypeInt32: +// isPrimitiveVarintDispatchId returns true if the dispatch ID is for a non-nullable varint primitive +func isPrimitiveVarintDispatchId(id DispatchId) bool { + switch id { + case PrimitiveVarint32DispatchId, PrimitiveVarint64DispatchId, PrimitiveTaggedInt64DispatchId, + PrimitiveVarUint32DispatchId, PrimitiveVarUint64DispatchId, PrimitiveTaggedUint64DispatchId, + PrimitiveIntDispatchId, PrimitiveUintDispatchId: + return true + default: + return false + } +} + +// getVarintMaxSizeByPrimitiveDispatchId returns max byte size for varint primitives based on dispatch ID +func getVarintMaxSizeByPrimitiveDispatchId(id DispatchId) int { + switch id { + case PrimitiveVarint32DispatchId, PrimitiveVarUint32DispatchId: return 5 - case ConcreteTypeInt64, ConcreteTypeInt: + case PrimitiveVarint64DispatchId, PrimitiveVarUint64DispatchId, PrimitiveIntDispatchId, PrimitiveUintDispatchId: return 10 + case PrimitiveTaggedInt64DispatchId, PrimitiveTaggedUint64DispatchId: + return 12 // 4 byte tag + 8 byte value default: return 0 } diff --git a/go/fory/writer.go b/go/fory/writer.go index 510f4351ad..9449a47d47 100644 --- a/go/fory/writer.go +++ b/go/fory/writer.go @@ -180,30 +180,30 @@ func (c *WriteContext) WriteTypeId(id TypeId) { c.buffer.WriteVaruint32Small7(uint32(id)) } -// writeFast writes a value using fast path based on StaticTypeId -func (c *WriteContext) writeFast(ptr unsafe.Pointer, ct StaticTypeId) { +// writeFast writes a value using fast path based on DispatchId +func (c *WriteContext) writeFast(ptr unsafe.Pointer, ct DispatchId) { switch ct { - case ConcreteTypeBool: + case PrimitiveBoolDispatchId: c.buffer.WriteBool(*(*bool)(ptr)) - case ConcreteTypeInt8: + case PrimitiveInt8DispatchId: c.buffer.WriteByte_(*(*byte)(ptr)) - case ConcreteTypeInt16: + case PrimitiveInt16DispatchId: c.buffer.WriteInt16(*(*int16)(ptr)) - case ConcreteTypeInt32: + case PrimitiveInt32DispatchId: c.buffer.WriteVarint32(*(*int32)(ptr)) - case ConcreteTypeInt: + case PrimitiveIntDispatchId: if strconv.IntSize == 64 { c.buffer.WriteVarint64(int64(*(*int)(ptr))) } else { c.buffer.WriteVarint32(int32(*(*int)(ptr))) } - case ConcreteTypeInt64: + case PrimitiveInt64DispatchId: c.buffer.WriteVarint64(*(*int64)(ptr)) - case ConcreteTypeFloat32: + case PrimitiveFloat32DispatchId: c.buffer.WriteFloat32(*(*float32)(ptr)) - case ConcreteTypeFloat64: + case PrimitiveFloat64DispatchId: c.buffer.WriteFloat64(*(*float64)(ptr)) - case ConcreteTypeString: + case StringDispatchId: writeString(c.buffer, *(*string)(ptr)) } } diff --git a/java/fory-core/src/main/java/org/apache/fory/Fory.java b/java/fory-core/src/main/java/org/apache/fory/Fory.java index 98413ca2a1..7e7c208407 100644 --- a/java/fory-core/src/main/java/org/apache/fory/Fory.java +++ b/java/fory-core/src/main/java/org/apache/fory/Fory.java @@ -580,12 +580,12 @@ public void xwriteData(MemoryBuffer buffer, ClassInfo classInfo, Object obj) { buffer.writeInt16((Short) obj); break; case Types.INT32: - case Types.VAR32: + case Types.VARINT32: buffer.writeVarInt32((Integer) obj); break; case Types.INT64: - case Types.VAR64: - case Types.H64: + case Types.VARINT64: + case Types.TAGGED_INT64: buffer.writeVarInt64((Long) obj); break; case Types.FLOAT32: @@ -605,35 +605,35 @@ public void xwriteData(MemoryBuffer buffer, ClassInfo classInfo, Object obj) { /** Write not null data to buffer. */ private void writeData(MemoryBuffer buffer, ClassInfo classInfo, Object obj) { switch (classInfo.getClassId()) { - case ClassResolver.BOOLEAN_CLASS_ID: + case Types.BOOL: buffer.writeBoolean((Boolean) obj); break; - case ClassResolver.BYTE_CLASS_ID: + case Types.INT8: buffer.writeByte((Byte) obj); break; - case ClassResolver.CHAR_CLASS_ID: + case ClassResolver.CHAR_ID: buffer.writeChar((Character) obj); break; - case ClassResolver.SHORT_CLASS_ID: + case Types.INT16: buffer.writeInt16((Short) obj); break; - case ClassResolver.INTEGER_CLASS_ID: + case Types.INT32: if (compressInt) { buffer.writeVarInt32((Integer) obj); } else { buffer.writeInt32((Integer) obj); } break; - case ClassResolver.FLOAT_CLASS_ID: + case Types.FLOAT32: buffer.writeFloat32((Float) obj); break; - case ClassResolver.LONG_CLASS_ID: + case Types.INT64: LongSerializer.writeInt64(buffer, (Long) obj, longEncoding); break; - case ClassResolver.DOUBLE_CLASS_ID: + case Types.FLOAT64: buffer.writeFloat64((Double) obj); break; - case ClassResolver.STRING_CLASS_ID: + case Types.STRING: stringSerializer.writeJavaString(buffer, (String) obj); break; default: @@ -692,7 +692,7 @@ public MemoryBuffer readBufferObject(MemoryBuffer buffer) { int size; // TODO(chaokunyang) Remove branch when other languages support aligned varint. if (!crossLanguage) { - size = buffer.readAlignedVarUint(); + size = buffer.readAlignedVarUint32(); } else { size = buffer.readVarUint32(); } @@ -1013,27 +1013,27 @@ public Object readData(MemoryBuffer buffer, ClassInfo classInfo) { private Object readDataInternal(MemoryBuffer buffer, ClassInfo classInfo) { switch (classInfo.getClassId()) { - case ClassResolver.BOOLEAN_CLASS_ID: + case Types.BOOL: return buffer.readBoolean(); - case ClassResolver.BYTE_CLASS_ID: + case Types.INT8: return buffer.readByte(); - case ClassResolver.CHAR_CLASS_ID: + case ClassResolver.CHAR_ID: return buffer.readChar(); - case ClassResolver.SHORT_CLASS_ID: + case Types.INT16: return buffer.readInt16(); - case ClassResolver.INTEGER_CLASS_ID: + case Types.INT32: if (compressInt) { return buffer.readVarInt32(); } else { return buffer.readInt32(); } - case ClassResolver.FLOAT_CLASS_ID: + case Types.FLOAT32: return buffer.readFloat32(); - case ClassResolver.LONG_CLASS_ID: + case Types.INT64: return LongSerializer.readInt64(buffer, longEncoding); - case ClassResolver.DOUBLE_CLASS_ID: + case Types.FLOAT64: return buffer.readFloat64(); - case ClassResolver.STRING_CLASS_ID: + case Types.STRING: return stringSerializer.readJavaString(buffer); default: incReadDepth(); @@ -1110,13 +1110,13 @@ public Object xreadNonRef(MemoryBuffer buffer, ClassInfo classInfo) { case Types.INT16: return buffer.readInt16(); case Types.INT32: - case Types.VAR32: + case Types.VARINT32: // TODO(chaokunyang) support other encoding return buffer.readVarInt32(); case Types.INT64: - case Types.VAR64: + case Types.VARINT64: // TODO(chaokunyang) support other encoding - case Types.H64: + case Types.TAGGED_INT64: return buffer.readVarInt64(); case Types.FLOAT32: return buffer.readFloat32(); @@ -1399,55 +1399,47 @@ public T copyObject(T obj) { Object copy; ClassInfo classInfo = classResolver.getOrUpdateClassInfo(obj.getClass()); switch (classInfo.getClassId()) { - case ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID: - case ClassResolver.PRIMITIVE_BYTE_CLASS_ID: - case ClassResolver.PRIMITIVE_CHAR_CLASS_ID: - case ClassResolver.PRIMITIVE_SHORT_CLASS_ID: - case ClassResolver.PRIMITIVE_INT_CLASS_ID: - case ClassResolver.PRIMITIVE_FLOAT_CLASS_ID: - case ClassResolver.PRIMITIVE_LONG_CLASS_ID: - case ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID: - case ClassResolver.BOOLEAN_CLASS_ID: - case ClassResolver.BYTE_CLASS_ID: - case ClassResolver.CHAR_CLASS_ID: - case ClassResolver.SHORT_CLASS_ID: - case ClassResolver.INTEGER_CLASS_ID: - case ClassResolver.FLOAT_CLASS_ID: - case ClassResolver.LONG_CLASS_ID: - case ClassResolver.DOUBLE_CLASS_ID: - case ClassResolver.STRING_CLASS_ID: + case Types.BOOL: + case Types.INT8: + case ClassResolver.CHAR_ID: + case Types.INT16: + case Types.INT32: + case Types.FLOAT32: + case Types.INT64: + case Types.FLOAT64: + case Types.STRING: return obj; - case ClassResolver.PRIMITIVE_BOOLEAN_ARRAY_CLASS_ID: + case ClassResolver.PRIMITIVE_BOOLEAN_ARRAY_ID: boolean[] boolArr = (boolean[]) obj; return (T) Arrays.copyOf(boolArr, boolArr.length); - case ClassResolver.PRIMITIVE_BYTE_ARRAY_CLASS_ID: + case ClassResolver.PRIMITIVE_BYTE_ARRAY_ID: byte[] byteArr = (byte[]) obj; return (T) Arrays.copyOf(byteArr, byteArr.length); - case ClassResolver.PRIMITIVE_CHAR_ARRAY_CLASS_ID: + case ClassResolver.PRIMITIVE_CHAR_ARRAY_ID: char[] charArr = (char[]) obj; return (T) Arrays.copyOf(charArr, charArr.length); - case ClassResolver.PRIMITIVE_SHORT_ARRAY_CLASS_ID: + case ClassResolver.PRIMITIVE_SHORT_ARRAY_ID: short[] shortArr = (short[]) obj; return (T) Arrays.copyOf(shortArr, shortArr.length); - case ClassResolver.PRIMITIVE_INT_ARRAY_CLASS_ID: + case ClassResolver.PRIMITIVE_INT_ARRAY_ID: int[] intArr = (int[]) obj; return (T) Arrays.copyOf(intArr, intArr.length); - case ClassResolver.PRIMITIVE_FLOAT_ARRAY_CLASS_ID: + case ClassResolver.PRIMITIVE_FLOAT_ARRAY_ID: float[] floatArr = (float[]) obj; return (T) Arrays.copyOf(floatArr, floatArr.length); - case ClassResolver.PRIMITIVE_LONG_ARRAY_CLASS_ID: + case ClassResolver.PRIMITIVE_LONG_ARRAY_ID: long[] longArr = (long[]) obj; return (T) Arrays.copyOf(longArr, longArr.length); - case ClassResolver.PRIMITIVE_DOUBLE_ARRAY_CLASS_ID: + case ClassResolver.PRIMITIVE_DOUBLE_ARRAY_ID: double[] doubleArr = (double[]) obj; return (T) Arrays.copyOf(doubleArr, doubleArr.length); - case ClassResolver.STRING_ARRAY_CLASS_ID: + case ClassResolver.STRING_ARRAY_ID: String[] stringArr = (String[]) obj; return (T) Arrays.copyOf(stringArr, stringArr.length); - case ClassResolver.ARRAYLIST_CLASS_ID: + case ClassResolver.ARRAYLIST_ID: copy = arrayListSerializer.copy((ArrayList) obj); break; - case ClassResolver.HASHMAP_CLASS_ID: + case ClassResolver.HASHMAP_ID: copy = hashMapSerializer.copy((HashMap) obj); break; // todo: add fastpath for other types. @@ -1463,23 +1455,23 @@ public T copyObject(T obj, int classId) { } // Fast path to avoid cost of query class map. switch (classId) { - case ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID: - case ClassResolver.PRIMITIVE_BYTE_CLASS_ID: - case ClassResolver.PRIMITIVE_CHAR_CLASS_ID: - case ClassResolver.PRIMITIVE_SHORT_CLASS_ID: - case ClassResolver.PRIMITIVE_INT_CLASS_ID: - case ClassResolver.PRIMITIVE_FLOAT_CLASS_ID: - case ClassResolver.PRIMITIVE_LONG_CLASS_ID: - case ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID: - case ClassResolver.BOOLEAN_CLASS_ID: - case ClassResolver.BYTE_CLASS_ID: - case ClassResolver.CHAR_CLASS_ID: - case ClassResolver.SHORT_CLASS_ID: - case ClassResolver.INTEGER_CLASS_ID: - case ClassResolver.FLOAT_CLASS_ID: - case ClassResolver.LONG_CLASS_ID: - case ClassResolver.DOUBLE_CLASS_ID: - case ClassResolver.STRING_CLASS_ID: + case ClassResolver.PRIMITIVE_BOOL_ID: + case ClassResolver.PRIMITIVE_INT8_ID: + case ClassResolver.PRIMITIVE_CHAR_ID: + case ClassResolver.PRIMITIVE_INT16_ID: + case ClassResolver.PRIMITIVE_INT32_ID: + case ClassResolver.PRIMITIVE_FLOAT32_ID: + case ClassResolver.PRIMITIVE_INT64_ID: + case ClassResolver.PRIMITIVE_FLOAT64_ID: + case Types.BOOL: + case Types.INT8: + case ClassResolver.CHAR_ID: + case Types.INT16: + case Types.INT32: + case Types.FLOAT32: + case Types.INT64: + case Types.FLOAT64: + case Types.STRING: return obj; default: return copyObject(obj, classResolver.getOrUpdateClassInfo(obj.getClass()).getSerializer()); diff --git a/java/fory-core/src/main/java/org/apache/fory/annotation/Int32Type.java b/java/fory-core/src/main/java/org/apache/fory/annotation/Int32Type.java new file mode 100644 index 0000000000..7d8048feb6 --- /dev/null +++ b/java/fory-core/src/main/java/org/apache/fory/annotation/Int32Type.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.annotation; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * Annotation to specify encoding options for 32-bit signed integer fields. + * + *

When applied to a field of type {@code int} or {@code Integer}, this annotation controls how + * the value is serialized: + * + *

    + *
  • {@code compress=true} (default): Uses variable-length encoding (VARINT32, type_id=5) which + * is more compact for small values + *
  • {@code compress=false}: Uses fixed 4-byte encoding (INT32, type_id=4) which has consistent + * size + *
+ * + *

Example usage: + * + *

{@code
+ * public class MyStruct {
+ *   @Int32Type(compress = true)  // Uses varint encoding (default)
+ *   int compactId;
+ *
+ *   @Int32Type(compress = false) // Uses fixed 4-byte encoding
+ *   int fixedId;
+ * }
+ * }
+ */ +@Retention(RetentionPolicy.RUNTIME) +@Target({ElementType.FIELD, ElementType.METHOD}) +public @interface Int32Type { + /** + * Whether to use variable-length compression for this int32 field. + * + * @return true to use VARINT32 encoding (compact for small values), false to use fixed INT32 + * encoding (4 bytes) + */ + boolean compress() default true; +} diff --git a/java/fory-core/src/main/java/org/apache/fory/annotation/Int64Type.java b/java/fory-core/src/main/java/org/apache/fory/annotation/Int64Type.java new file mode 100644 index 0000000000..e03c266cdf --- /dev/null +++ b/java/fory-core/src/main/java/org/apache/fory/annotation/Int64Type.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.annotation; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; +import org.apache.fory.config.LongEncoding; + +/** + * Annotation to specify encoding options for 64-bit signed integer fields. + * + *

When applied to a field of type {@code long} or {@code Long}, this annotation controls how the + * value is serialized using different encoding strategies: + * + *

    + *
  • {@link LongEncoding#VARINT} (default): Variable-length encoding, compact for small values + * (type_id=7) + *
  • {@link LongEncoding#FIXED}: Fixed 8-byte encoding, consistent size (type_id=6) + *
  • {@link LongEncoding#TAGGED}: Tagged encoding that uses 4 bytes for values in range + * [-1073741824, 1073741823], otherwise 9 bytes (type_id=8) + *
+ * + *

Example usage: + * + *

{@code
+ * public class MyStruct {
+ *   @Int64Type(encoding = LongEncoding.VARINT64)  // Variable-length (default)
+ *   long compactId;
+ *
+ *   @Int64Type(encoding = LongEncoding.FIXED_INT64)     // Fixed 8-byte
+ *   long fixedTimestamp;
+ *
+ *   @Int64Type(encoding = LongEncoding.TAGGED_INT64) // Tagged encoding
+ *   long taggedValue;
+ * }
+ * }
+ */ +@Retention(RetentionPolicy.RUNTIME) +@Target({ElementType.FIELD, ElementType.METHOD}) +public @interface Int64Type { + /** + * The encoding strategy to use for this int64 field. + * + * @return the encoding type for serialization + */ + LongEncoding encoding() default LongEncoding.VARINT; +} diff --git a/java/fory-core/src/main/java/org/apache/fory/annotation/Uint16Type.java b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint16Type.java new file mode 100644 index 0000000000..f93d89d660 --- /dev/null +++ b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint16Type.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.annotation; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * Annotation to mark a field as an unsigned 16-bit integer. + * + *

When applied to a field of type {@code short}, {@code int}, or {@code char}, this annotation + * indicates that the value should be serialized as an unsigned 16-bit integer (UINT16, type_id=10) + * with a valid range of [0, 65535]. + * + *

This is useful for compatibility with languages that have native unsigned integer types (e.g., + * Rust's u16, Go's uint16, C++'s uint16_t). + * + *

Example usage: + * + *

{@code
+ * public class MyStruct {
+ *   @Uint16Type
+ *   int port;  // Will be serialized as unsigned 16-bit [0, 65535]
+ * }
+ * }
+ */ +@Retention(RetentionPolicy.RUNTIME) +@Target({ElementType.FIELD, ElementType.METHOD}) +public @interface Uint16Type {} diff --git a/java/fory-core/src/main/java/org/apache/fory/annotation/Uint32Type.java b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint32Type.java new file mode 100644 index 0000000000..2470567e33 --- /dev/null +++ b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint32Type.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.annotation; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * Annotation to mark a field as an unsigned 32-bit integer. + * + *

When applied to a field of type {@code int} or {@code long}, this annotation indicates that + * the value should be serialized as an unsigned 32-bit integer with a valid range of [0, + * 4294967295]. + * + *

    + *
  • {@code compress=true} (default): Uses variable-length encoding (VAR_UINT32, type_id=12) + * which is more compact for small values + *
  • {@code compress=false}: Uses fixed 4-byte encoding (UINT32, type_id=11) which has + * consistent size + *
+ * + *

Benefits: + * + *

    + *
  • With {@code compress=true}: skips zigzag encoding overhead for non-negative values + *
  • Compatible with languages that have native unsigned integer types (e.g., Rust's u32, Go's + * uint32, C++'s uint32_t) + *
+ * + *

Example usage: + * + *

{@code
+ * public class MyStruct {
+ *   @Uint32Type(compress = true)  // Uses varuint encoding (default)
+ *   long compactCount;
+ *
+ *   @Uint32Type(compress = false) // Uses fixed 4-byte encoding
+ *   long fixedCount;
+ * }
+ * }
+ */ +@Retention(RetentionPolicy.RUNTIME) +@Target({ElementType.FIELD, ElementType.METHOD}) +public @interface Uint32Type { + /** + * Whether to use variable-length compression for this uint32 field. + * + * @return true to use VAR_UINT32 encoding (compact for small values), false to use fixed UINT32 + * encoding (4 bytes) + */ + boolean compress() default true; +} diff --git a/java/fory-core/src/main/java/org/apache/fory/annotation/Uint64Type.java b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint64Type.java new file mode 100644 index 0000000000..9f550d5c4f --- /dev/null +++ b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint64Type.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.annotation; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; +import org.apache.fory.config.LongEncoding; + +/** + * Annotation to mark a field as an unsigned 64-bit integer. + * + *

When applied to a field of type {@code long}, this annotation indicates that the value should + * be serialized as an unsigned 64-bit integer with a valid range of [0, 18446744073709551615]. + * + *

Different encoding strategies are available: + * + *

    + *
  • {@link LongEncoding#VARINT} (default): Variable-length encoding (VAR_UINT64, type_id=14), + * compact for small values + *
  • {@link LongEncoding#FIXED}: Fixed 8-byte encoding (UINT64, type_id=13), consistent size + *
  • {@link LongEncoding#TAGGED}: Tagged encoding (TAGGED_UINT64, type_id=15) that uses 4 bytes + * for values in range [0, 2147483647], otherwise 9 bytes + *
+ * + *

Benefits: + * + *

    + *
  • With {@link LongEncoding#VARINT}: skips zigzag encoding overhead for non-negative values + *
  • With {@link LongEncoding#TAGGED}: uses unsigned range [0, 2147483647] for 4-byte encoding + * instead of signed range [-1073741824, 1073741823] + *
  • Compatible with languages that have native unsigned integer types (e.g., Rust's u64, Go's + * uint64, C++'s uint64_t) + *
+ * + *

Example usage: + * + *

{@code
+ * public class MyStruct {
+ *   @Uint64Type(encoding = LongEncoding.VARINT64)  // Variable-length (default)
+ *   long compactId;
+ *
+ *   @Uint64Type(encoding = LongEncoding.FIXED_INT64)     // Fixed 8-byte
+ *   long fixedTimestamp;
+ *
+ *   @Uint64Type(encoding = LongEncoding.TAGGED_INT64) // Tagged encoding
+ *   long taggedValue;
+ * }
+ * }
+ */ +@Retention(RetentionPolicy.RUNTIME) +@Target({ElementType.FIELD, ElementType.METHOD}) +public @interface Uint64Type { + /** + * The encoding strategy to use for this uint64 field. + * + * @return the encoding type for serialization + */ + LongEncoding encoding() default LongEncoding.VARINT; +} diff --git a/java/fory-core/src/main/java/org/apache/fory/annotation/Uint8Type.java b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint8Type.java new file mode 100644 index 0000000000..2393278a2b --- /dev/null +++ b/java/fory-core/src/main/java/org/apache/fory/annotation/Uint8Type.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.annotation; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * Annotation to mark a field as an unsigned 8-bit integer. + * + *

When applied to a field of type {@code byte}, {@code short}, or {@code int}, this annotation + * indicates that the value should be serialized as an unsigned 8-bit integer (UINT8, type_id=9) + * with a valid range of [0, 255]. + * + *

This is useful for compatibility with languages that have native unsigned integer types (e.g., + * Rust's u8, Go's uint8, C++'s uint8_t). + * + *

Example usage: + * + *

{@code
+ * public class MyStruct {
+ *   @Uint8Type
+ *   short flags;  // Will be serialized as unsigned 8-bit [0, 255]
+ * }
+ * }
+ */ +@Retention(RetentionPolicy.RUNTIME) +@Target({ElementType.FIELD, ElementType.METHOD}) +public @interface Uint8Type {} diff --git a/java/fory-core/src/main/java/org/apache/fory/builder/BaseObjectCodecBuilder.java b/java/fory-core/src/main/java/org/apache/fory/builder/BaseObjectCodecBuilder.java index 7b7a04dd4e..b53e79802a 100644 --- a/java/fory-core/src/main/java/org/apache/fory/builder/BaseObjectCodecBuilder.java +++ b/java/fory-core/src/main/java/org/apache/fory/builder/BaseObjectCodecBuilder.java @@ -19,6 +19,12 @@ package org.apache.fory.builder; +import static org.apache.fory.builder.CodecBuilder.readFloat32Func; +import static org.apache.fory.builder.CodecBuilder.readFloat64Func; +import static org.apache.fory.builder.CodecBuilder.readInt16Func; +import static org.apache.fory.builder.CodecBuilder.readIntFunc; +import static org.apache.fory.builder.CodecBuilder.readLongFunc; +import static org.apache.fory.builder.CodecBuilder.readVarInt32Func; import static org.apache.fory.codegen.CodeGenerator.getPackage; import static org.apache.fory.codegen.Expression.Invoke.inlineInvoke; import static org.apache.fory.codegen.Expression.Literal.ofInt; @@ -52,10 +58,17 @@ import static org.apache.fory.serializer.collection.MapFlags.TRACKING_VALUE_REF; import static org.apache.fory.serializer.collection.MapFlags.VALUE_DECL_TYPE; import static org.apache.fory.serializer.collection.MapLikeSerializer.MAX_CHUNK_SIZE; +import static org.apache.fory.type.TypeUtils.BOOLEAN_TYPE; +import static org.apache.fory.type.TypeUtils.BYTE_TYPE; +import static org.apache.fory.type.TypeUtils.CHAR_TYPE; import static org.apache.fory.type.TypeUtils.CLASS_TYPE; import static org.apache.fory.type.TypeUtils.COLLECTION_TYPE; +import static org.apache.fory.type.TypeUtils.DOUBLE_TYPE; +import static org.apache.fory.type.TypeUtils.FLOAT_TYPE; +import static org.apache.fory.type.TypeUtils.INT_TYPE; import static org.apache.fory.type.TypeUtils.ITERATOR_TYPE; import static org.apache.fory.type.TypeUtils.LIST_TYPE; +import static org.apache.fory.type.TypeUtils.LONG_TYPE; import static org.apache.fory.type.TypeUtils.MAP_ENTRY_TYPE; import static org.apache.fory.type.TypeUtils.MAP_TYPE; import static org.apache.fory.type.TypeUtils.OBJECT_TYPE; @@ -65,6 +78,7 @@ import static org.apache.fory.type.TypeUtils.PRIMITIVE_LONG_TYPE; import static org.apache.fory.type.TypeUtils.PRIMITIVE_VOID_TYPE; import static org.apache.fory.type.TypeUtils.SET_TYPE; +import static org.apache.fory.type.TypeUtils.SHORT_TYPE; import static org.apache.fory.type.TypeUtils.getRawType; import static org.apache.fory.type.TypeUtils.isBoxed; import static org.apache.fory.type.TypeUtils.isPrimitive; @@ -125,6 +139,7 @@ import org.apache.fory.serializer.collection.CollectionLikeSerializer; import org.apache.fory.serializer.collection.MapLikeSerializer; import org.apache.fory.type.Descriptor; +import org.apache.fory.type.DispatchId; import org.apache.fory.type.GenericType; import org.apache.fory.type.TypeUtils; import org.apache.fory.util.GraalvmSupport; @@ -164,6 +179,7 @@ public abstract class BaseObjectCodecBuilder extends CodecBuilder { protected LinkedList walkPath = new LinkedList<>(); protected final String writeMethodName; protected final String readMethodName; + private final Map descriptorDispatchId; public BaseObjectCodecBuilder(TypeRef beanType, Fory fory, Class parentSerializerClass) { super(new CodegenContext(), beanType); @@ -202,6 +218,7 @@ public BaseObjectCodecBuilder(TypeRef beanType, Fory fory, Class parentSer STRING_SERIALIZER_NAME, inlineInvoke(foryRef, "getStringSerializer", typeResolverType)); jitCallbackUpdateFields = new HashMap<>(); + descriptorDispatchId = new HashMap<>(); } // Must be static to be shared across the whole process life. @@ -378,8 +395,6 @@ protected Expression serializeFor( TypeRef typeRef, Expression serializer, boolean generateNewMethod) { - // access rawType without jit lock to reduce lock competition. - Class rawType = getRawType(typeRef); if (needWriteRef(typeRef)) { return new If( not(writeRefOrNull(buffer, inputObject)), @@ -411,47 +426,42 @@ protected Expression serializeField( if (useRefTracking) { return new If( not(writeRefOrNull(buffer, fieldValue)), - serializeForNotNullForField(fieldValue, buffer, descriptor, null, false)); + serializeForNotNullForField(fieldValue, buffer, descriptor, null)); } else { // if typeToken is not final, ref tracking of subclass will be ignored too. if (typeRef.isPrimitive()) { - return serializeForNotNullForField(fieldValue, buffer, descriptor, null, false); + return serializeForNotNullForField(fieldValue, buffer, descriptor, null); } if (nullable) { Expression action = new ListExpression( new Invoke(buffer, "writeByte", Literal.ofByte(Fory.NOT_NULL_VALUE_FLAG)), - serializeForNotNullForField(fieldValue, buffer, descriptor, null, false)); + serializeForNotNullForField(fieldValue, buffer, descriptor, null)); return new If( eqNull(fieldValue), new Invoke(buffer, "writeByte", Literal.ofByte(Fory.NULL_FLAG)), action); } else { - return serializeForNotNullForField(fieldValue, buffer, descriptor, null, false); + return serializeForNotNullForField(fieldValue, buffer, descriptor, null); } } } private Expression serializeForNotNullForField( - Expression inputObject, - Expression buffer, - Descriptor descriptor, - Expression serializer, - boolean generateNewMethod) { + Expression inputObject, Expression buffer, Descriptor descriptor, Expression serializer) { TypeRef typeRef = descriptor.getTypeRef(); Class clz = getRawType(typeRef); if (isPrimitive(clz) || isBoxed(clz)) { - return serializePrimitive(inputObject, buffer, clz); + return serializePrimitiveField(inputObject, buffer, descriptor); } else { if (clz == String.class) { return fory.getStringSerializer().writeStringExpr(stringSerializerRef, buffer, inputObject); } Expression action; if (useCollectionSerialization(typeRef)) { - action = - serializeForCollection(buffer, inputObject, typeRef, serializer, generateNewMethod); + action = serializeForCollection(buffer, inputObject, typeRef, serializer, false); } else if (useMapSerialization(typeRef)) { - action = serializeForMap(buffer, inputObject, typeRef, serializer, generateNewMethod); + action = serializeForMap(buffer, inputObject, typeRef, serializer, false); } else { action = serializeForNotNullObjectForField(inputObject, buffer, descriptor, serializer); } @@ -459,6 +469,65 @@ private Expression serializeForNotNullForField( } } + private Expression serializePrimitiveField( + Expression inputObject, Expression buffer, Descriptor descriptor) { + int dispatchId = getNumericDescriptorDispatchId(descriptor); + switch (dispatchId) { + case DispatchId.PRIMITIVE_BOOL: + case DispatchId.BOOL: + return new Invoke(buffer, "writeBoolean", inputObject); + case DispatchId.PRIMITIVE_INT8: + case DispatchId.PRIMITIVE_UINT8: + case DispatchId.INT8: + case DispatchId.UINT8: + return new Invoke(buffer, "writeByte", inputObject); + case DispatchId.PRIMITIVE_CHAR: + case DispatchId.CHAR: + return new Invoke(buffer, "writeChar", inputObject); + case DispatchId.PRIMITIVE_INT16: + case DispatchId.PRIMITIVE_UINT16: + case DispatchId.INT16: + case DispatchId.UINT16: + return new Invoke(buffer, "writeInt16", inputObject); + case DispatchId.PRIMITIVE_INT32: + case DispatchId.PRIMITIVE_UINT32: + case DispatchId.INT32: + case DispatchId.UINT32: + return new Invoke(buffer, "writeInt32", inputObject); + case DispatchId.PRIMITIVE_VARINT32: + case DispatchId.VARINT32: + return new Invoke(buffer, "writeVarInt32", inputObject); + case DispatchId.PRIMITIVE_VAR_UINT32: + case DispatchId.VAR_UINT32: + return new Invoke(buffer, "writeVarUint32", inputObject); + case DispatchId.PRIMITIVE_INT64: + case DispatchId.PRIMITIVE_UINT64: + case DispatchId.INT64: + case DispatchId.UINT64: + return new Invoke(buffer, "writeInt64", inputObject); + case DispatchId.PRIMITIVE_VARINT64: + case DispatchId.VARINT64: + return new Invoke(buffer, "writeVarInt64", inputObject); + case DispatchId.PRIMITIVE_TAGGED_INT64: + case DispatchId.TAGGED_INT64: + return new Invoke(buffer, "writeTaggedInt64", inputObject); + case DispatchId.PRIMITIVE_VAR_UINT64: + case DispatchId.VAR_UINT64: + return new Invoke(buffer, "writeVarUint64", inputObject); + case DispatchId.PRIMITIVE_TAGGED_UINT64: + case DispatchId.TAGGED_UINT64: + return new Invoke(buffer, "writeTaggedUint64", inputObject); + case DispatchId.PRIMITIVE_FLOAT32: + case DispatchId.FLOAT32: + return new Invoke(buffer, "writeFloat32", inputObject); + case DispatchId.PRIMITIVE_FLOAT64: + case DispatchId.FLOAT64: + return new Invoke(buffer, "writeFloat64", inputObject); + default: + throw new IllegalStateException("Unsupported dispatchId: " + dispatchId); + } + } + private Expression serializePrimitive(Expression inputObject, Expression buffer, Class clz) { // for primitive, inline call here to avoid java boxing, rather call corresponding serializer. if (clz == byte.class || clz == Byte.class) { @@ -610,6 +679,12 @@ protected boolean useMapSerialization(Class type) { return typeResolver(r -> r.isMap(type)); } + protected int getNumericDescriptorDispatchId(Descriptor descriptor) { + Class rawType = descriptor.getRawType(); + Preconditions.checkArgument(TypeUtils.unwrap(rawType).isPrimitive()); + return descriptorDispatchId.computeIfAbsent(descriptor, d -> DispatchId.getDispatchId(fory, d)); + } + /** * Whether the provided type should be taken as final. Although the clz can be final, * the method can still return false. For example, we return false in meta share mode to write @@ -1807,8 +1882,12 @@ protected Expression deserializeField( boolean nullable = descriptor.isNullable(); // descriptor.isTrackingRef() already includes the needWriteRef check boolean useRefTracking = descriptor.isTrackingRef(); - // Check if type normally needs ref (for preserveRefId when ref tracking is disabled) - boolean typeNeedsRef = needWriteRef(typeRef); + // Check if the TYPE normally needs ref tracking, ignoring field-level metadata. + // When global ref tracking is enabled, serializers call reference() at the end. + // If field has trackingRef=false but the type's serializer calls reference(), + // we need to push a stub -1 so reference() can pop it and skip setReadObject. + // Use raw type without metadata to check type-level ref tracking. + boolean serializerCallsReference = needWriteRef(TypeRef.of(typeRef.getRawType())); if (useRefTracking) { return readRef( @@ -1817,10 +1896,10 @@ protected Expression deserializeField( if (!nullable) { Expression value = deserializeForNotNullForField(buffer, descriptor, null); - if (typeNeedsRef) { + if (serializerCallsReference) { // When a field explicitly disables ref tracking (@ForyField(trackingRef=false)) - // but the type normally needs ref tracking (e.g., collections), - // we need to preserve a -1 id so that when the deserializer calls reference(), + // but global ref tracking is enabled, the serializer will call reference(). + // We need to preserve a -1 id so that when the deserializer calls reference(), // it will pop this -1 and skip the setReadObject call. Expression preserveStubRefId = new Invoke(refResolverRef, "preserveRefId", new Literal(-1, PRIMITIVE_INT_TYPE)); @@ -1841,7 +1920,7 @@ protected Expression deserializeField( true, localFieldType); - if (typeNeedsRef) { + if (serializerCallsReference) { Expression preserveStubRefId = new Invoke(refResolverRef, "preserveRefId", new Literal(-1, PRIMITIVE_INT_TYPE)); return new ListExpression(preserveStubRefId, readNullableExpr); @@ -1855,7 +1934,7 @@ private Expression deserializeForNotNullForField( TypeRef typeRef = descriptor.getTypeRef(); Class cls = getRawType(typeRef); if (isPrimitive(cls) || isBoxed(cls)) { - return deserializePrimitive(buffer, cls); + return deserializePrimitiveField(buffer, descriptor); } else { if (cls == String.class) { return fory.getStringSerializer().readStringExpr(stringSerializerRef, buffer); @@ -1883,6 +1962,78 @@ private Expression deserializeForNotNullForField( } } + private Expression deserializePrimitiveField(Expression buffer, Descriptor descriptor) { + int dispatchId = getNumericDescriptorDispatchId(descriptor); + switch (dispatchId) { + case DispatchId.PRIMITIVE_BOOL: + return new Invoke(buffer, "readBoolean", PRIMITIVE_BOOLEAN_TYPE); + case DispatchId.BOOL: + return new Invoke(buffer, "readBoolean", BOOLEAN_TYPE); + case DispatchId.PRIMITIVE_INT8: + case DispatchId.PRIMITIVE_UINT8: + return new Invoke(buffer, "readByte", PRIMITIVE_BYTE_TYPE); + case DispatchId.INT8: + case DispatchId.UINT8: + return new Invoke(buffer, "readByte", BYTE_TYPE); + case DispatchId.PRIMITIVE_CHAR: + return readChar(buffer); + case DispatchId.CHAR: + return new Invoke(buffer, "readChar", CHAR_TYPE); + case DispatchId.PRIMITIVE_INT16: + case DispatchId.PRIMITIVE_UINT16: + return readInt16(buffer); + case DispatchId.INT16: + case DispatchId.UINT16: + return new Invoke(buffer, readInt16Func(), SHORT_TYPE); + case DispatchId.PRIMITIVE_INT32: + case DispatchId.PRIMITIVE_UINT32: + return readInt32(buffer); + case DispatchId.INT32: + case DispatchId.UINT32: + return new Invoke(buffer, readIntFunc(), INT_TYPE); + case DispatchId.PRIMITIVE_VARINT32: + return readVarInt32(buffer); + case DispatchId.VARINT32: + return new Invoke(buffer, readVarInt32Func(), INT_TYPE); + case DispatchId.PRIMITIVE_VAR_UINT32: + return new Invoke(buffer, "readVarUint32", PRIMITIVE_INT_TYPE); + case DispatchId.VAR_UINT32: + return new Invoke(buffer, "readVarUint32", INT_TYPE); + case DispatchId.PRIMITIVE_INT64: + case DispatchId.PRIMITIVE_UINT64: + return readInt64(buffer); + case DispatchId.INT64: + case DispatchId.UINT64: + return new Invoke(buffer, readLongFunc(), LONG_TYPE); + case DispatchId.PRIMITIVE_VARINT64: + return new Invoke(buffer, "readVarInt64", PRIMITIVE_LONG_TYPE); + case DispatchId.VARINT64: + return new Invoke(buffer, "readVarInt64", LONG_TYPE); + case DispatchId.PRIMITIVE_TAGGED_INT64: + return new Invoke(buffer, "readTaggedInt64", PRIMITIVE_LONG_TYPE); + case DispatchId.TAGGED_INT64: + return new Invoke(buffer, "readTaggedInt64", LONG_TYPE); + case DispatchId.PRIMITIVE_VAR_UINT64: + return new Invoke(buffer, "readVarUint64", PRIMITIVE_LONG_TYPE); + case DispatchId.VAR_UINT64: + return new Invoke(buffer, "readVarUint64", LONG_TYPE); + case DispatchId.PRIMITIVE_TAGGED_UINT64: + return new Invoke(buffer, "readTaggedUint64", PRIMITIVE_LONG_TYPE); + case DispatchId.TAGGED_UINT64: + return new Invoke(buffer, "readTaggedUint64", LONG_TYPE); + case DispatchId.PRIMITIVE_FLOAT32: + return readFloat32(buffer); + case DispatchId.FLOAT32: + return new Invoke(buffer, readFloat32Func(), FLOAT_TYPE); + case DispatchId.PRIMITIVE_FLOAT64: + return readFloat64(buffer); + case DispatchId.FLOAT64: + return new Invoke(buffer, readFloat64Func(), DOUBLE_TYPE); + default: + throw new IllegalStateException("Unsupported dispatchId: " + dispatchId); + } + } + private Expression deserializePrimitive(Expression buffer, Class cls) { // for primitive, inline call here to avoid java boxing if (cls == byte.class || cls == Byte.class) { diff --git a/java/fory-core/src/main/java/org/apache/fory/builder/CodecBuilder.java b/java/fory-core/src/main/java/org/apache/fory/builder/CodecBuilder.java index 51c8b93703..cce3953c5e 100644 --- a/java/fory-core/src/main/java/org/apache/fory/builder/CodecBuilder.java +++ b/java/fory-core/src/main/java/org/apache/fory/builder/CodecBuilder.java @@ -63,7 +63,6 @@ import org.apache.fory.resolver.ClassInfo; import org.apache.fory.resolver.ClassInfoHolder; import org.apache.fory.type.Descriptor; -import org.apache.fory.type.FinalObjectTypeStub; import org.apache.fory.util.GraalvmSupport; import org.apache.fory.util.Preconditions; import org.apache.fory.util.StringUtils; @@ -154,10 +153,6 @@ protected Expression tryCastIfPublic(Expression expression, TypeRef targetTyp protected Expression tryCastIfPublic( Expression expression, TypeRef targetType, boolean inline) { Class rawType = getRawType(targetType); - if (rawType == FinalObjectTypeStub.class) { - // final field doesn't exist in this class, skip cast. - return expression; - } if (inline) { if (sourcePublicAccessible(rawType)) { return new Cast(expression, targetType); @@ -309,7 +304,7 @@ private Expression reflectAccessField( return new Cast(getObj, descriptor.getTypeRef(), descriptor.getName()); } - /** Returns an expression that get field value> from bean using {@link Unsafe}. */ + /** Returns an expression that get field value> from bean using `Unsafe`. */ private Expression unsafeAccessField( Expression inputObject, Class cls, Descriptor descriptor) { String fieldName = descriptor.getName(); @@ -378,15 +373,21 @@ protected Expression setFieldValue(Expression bean, Descriptor d, Expression val if (!d.isFinalField() && Modifier.isPublic(d.getModifiers()) && Modifier.isPublic(d.getRawType().getModifiers())) { + if (!d.getRawType().isAssignableFrom(value.type().getRawType())) { + value = tryInlineCast(value, d.getTypeRef()); + } return new Expression.SetField(bean, fieldName, value); } else if (d.getWriteMethod() != null && Modifier.isPublic(d.getWriteMethod().getModifiers())) { + if (!d.getRawType().isAssignableFrom(value.type().getRawType())) { + value = tryInlineCast(value, d.getTypeRef()); + } return new Invoke(bean, d.getWriteMethod().getName(), value); } else { if (!d.isFinalField() && !Modifier.isPrivate(d.getModifiers())) { if (AccessorHelper.defineSetter(d.getField())) { Class accessorClass = AccessorHelper.getAccessorClass(d.getField()); - if (!value.type().equals(d.getTypeRef())) { - value = new Cast(value, d.getTypeRef()); + if (!d.getRawType().isAssignableFrom(value.type().getRawType())) { + value = tryInlineCast(value, d.getTypeRef()); } return new StaticInvoke( accessorClass, d.getName(), PRIMITIVE_VOID_TYPE, false, bean, value); @@ -395,8 +396,8 @@ protected Expression setFieldValue(Expression bean, Descriptor d, Expression val if (d.getWriteMethod() != null && !Modifier.isPrivate(d.getWriteMethod().getModifiers())) { if (AccessorHelper.defineSetter(d.getWriteMethod())) { Class accessorClass = AccessorHelper.getAccessorClass(d.getWriteMethod()); - if (!value.type().equals(d.getTypeRef())) { - value = new Cast(value, d.getTypeRef()); + if (!d.getRawType().isAssignableFrom(value.type().getRawType())) { + value = tryInlineCast(value, d.getTypeRef()); } return new StaticInvoke( accessorClass, d.getWriteMethod().getName(), PRIMITIVE_VOID_TYPE, false, bean, value); @@ -418,8 +419,7 @@ private Expression reflectSetField(Expression bean, Field field, Expression valu } /** - * Returns an expression that set field value to bean using {@link - * Unsafe}. + * Returns an expression that set field value to bean using `Unsafe`. */ private Expression unsafeSetField(Expression bean, Descriptor descriptor, Expression value) { TypeRef fieldType = descriptor.getTypeRef(); @@ -479,6 +479,7 @@ protected Reference getOrCreateField( boolean isStatic, Class type, String fieldName, Supplier value) { Reference fieldRef = fieldMap.get(fieldName); if (fieldRef == null) { + fieldName = ctx.newName(fieldName); ctx.addField(isStatic, true, ctx.type(type), fieldName, value.get()); fieldRef = new Reference(fieldName, TypeRef.of(type)); fieldMap.put(fieldName, fieldRef); @@ -709,13 +710,27 @@ public static String readLongFunc() { return Platform.IS_LITTLE_ENDIAN ? "_readInt64OnLE" : "_readInt64OnBE"; } + public static String readInt16Func() { + return Platform.IS_LITTLE_ENDIAN ? "_readInt16OnLE" : "_readInt16OnBE"; + } + + public static String readVarInt32Func() { + return Platform.IS_LITTLE_ENDIAN ? "_readVarInt32OnLE" : "_readVarInt32OnBE"; + } + + public static String readFloat32Func() { + return Platform.IS_LITTLE_ENDIAN ? "_readFloat32OnLE" : "_readFloat32OnBE"; + } + + public static String readFloat64Func() { + return Platform.IS_LITTLE_ENDIAN ? "_readFloat64OnLE" : "_readFloat64OnBE"; + } + protected Expression readFloat32(Expression buffer) { - String func = Platform.IS_LITTLE_ENDIAN ? "_readFloat32OnLE" : "_readFloat32OnBE"; - return new Invoke(buffer, func, PRIMITIVE_FLOAT_TYPE); + return new Invoke(buffer, readFloat32Func(), PRIMITIVE_FLOAT_TYPE); } protected Expression readFloat64(Expression buffer) { - String func = Platform.IS_LITTLE_ENDIAN ? "_readFloat64OnLE" : "_readFloat64OnBE"; - return new Invoke(buffer, func, PRIMITIVE_DOUBLE_TYPE); + return new Invoke(buffer, readFloat64Func(), PRIMITIVE_DOUBLE_TYPE); } } diff --git a/java/fory-core/src/main/java/org/apache/fory/builder/ObjectCodecBuilder.java b/java/fory-core/src/main/java/org/apache/fory/builder/ObjectCodecBuilder.java index 1e528094b4..818299e0ba 100644 --- a/java/fory-core/src/main/java/org/apache/fory/builder/ObjectCodecBuilder.java +++ b/java/fory-core/src/main/java/org/apache/fory/builder/ObjectCodecBuilder.java @@ -31,7 +31,6 @@ import static org.apache.fory.type.TypeUtils.PRIMITIVE_VOID_TYPE; import static org.apache.fory.type.TypeUtils.getRawType; import static org.apache.fory.type.TypeUtils.getSizeOfPrimitiveType; -import static org.apache.fory.type.TypeUtils.isPrimitive; import java.util.ArrayList; import java.util.Collection; @@ -60,12 +59,11 @@ import org.apache.fory.reflect.ObjectCreators; import org.apache.fory.reflect.TypeRef; import org.apache.fory.serializer.ObjectSerializer; -import org.apache.fory.serializer.PrimitiveSerializers.LongSerializer; import org.apache.fory.serializer.SerializationUtils; import org.apache.fory.type.Descriptor; import org.apache.fory.type.DescriptorGrouper; +import org.apache.fory.type.DispatchId; import org.apache.fory.type.TypeUtils; -import org.apache.fory.util.Preconditions; import org.apache.fory.util.function.SerializableSupplier; import org.apache.fory.util.record.RecordUtils; @@ -269,39 +267,50 @@ private List serializePrimitivesUnCompressed( ListExpression groupExpressions = new ListExpression(); // use Reference to cut-off expr dependency. for (Descriptor descriptor : group) { - Class clz = descriptor.getRawType(); - Preconditions.checkArgument(isPrimitive(clz)); + int dispatchId = getNumericDescriptorDispatchId(descriptor); // `bean` will be replaced by `Reference` to cut-off expr dependency. Expression fieldValue = getFieldValue(bean, descriptor); if (fieldValue instanceof Inlineable) { ((Inlineable) fieldValue).inline(); } - if (clz == byte.class) { - groupExpressions.add(unsafePut(base, getWriterPos(writerAddr, acc), fieldValue)); - acc += 1; - } else if (clz == boolean.class) { + if (dispatchId == DispatchId.PRIMITIVE_BOOL || dispatchId == DispatchId.BOOL) { groupExpressions.add(unsafePutBoolean(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 1; - } else if (clz == char.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_INT8 + || dispatchId == DispatchId.PRIMITIVE_UINT8 + || dispatchId == DispatchId.INT8 + || dispatchId == DispatchId.UINT8) { + groupExpressions.add(unsafePut(base, getWriterPos(writerAddr, acc), fieldValue)); + acc += 1; + } else if (dispatchId == DispatchId.PRIMITIVE_CHAR || dispatchId == DispatchId.CHAR) { groupExpressions.add(unsafePutChar(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 2; - } else if (clz == short.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_INT16 + || dispatchId == DispatchId.PRIMITIVE_UINT16 + || dispatchId == DispatchId.INT16 + || dispatchId == DispatchId.UINT16) { groupExpressions.add(unsafePutShort(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 2; - } else if (clz == int.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_INT32 + || dispatchId == DispatchId.PRIMITIVE_UINT32 + || dispatchId == DispatchId.INT32 + || dispatchId == DispatchId.UINT32) { groupExpressions.add(unsafePutInt(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 4; - } else if (clz == long.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_INT64 + || dispatchId == DispatchId.PRIMITIVE_UINT64 + || dispatchId == DispatchId.INT64 + || dispatchId == DispatchId.UINT64) { groupExpressions.add(unsafePutLong(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 8; - } else if (clz == float.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_FLOAT32 || dispatchId == DispatchId.FLOAT32) { groupExpressions.add(unsafePutFloat(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 4; - } else if (clz == double.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_FLOAT64 || dispatchId == DispatchId.FLOAT64) { groupExpressions.add(unsafePutDouble(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 8; } else { - throw new IllegalStateException("impossible"); + throw new IllegalStateException("Unsupported dispatchId: " + dispatchId); } } if (numPrimitiveFields < 4) { @@ -328,10 +337,25 @@ private List serializePrimitivesCompressed( int extraSize = 0; for (List group : primitiveGroups) { for (Descriptor d : group) { - if (d.getRawType() == int.class) { + int id = getNumericDescriptorDispatchId(d); + if (id == DispatchId.PRIMITIVE_INT32 + || id == DispatchId.PRIMITIVE_VARINT32 + || id == DispatchId.PRIMITIVE_VAR_UINT32 + || id == DispatchId.INT32 + || id == DispatchId.VARINT32 + || id == DispatchId.VAR_UINT32) { // varint may be written as 5bytes, use 8bytes for written as long to reduce cost. extraSize += 4; - } else if (d.getRawType() == long.class) { + } else if (id == DispatchId.PRIMITIVE_INT64 + || id == DispatchId.PRIMITIVE_VARINT64 + || id == DispatchId.PRIMITIVE_TAGGED_INT64 + || id == DispatchId.PRIMITIVE_VAR_UINT64 + || id == DispatchId.PRIMITIVE_TAGGED_UINT64 + || id == DispatchId.INT64 + || id == DispatchId.VARINT64 + || id == DispatchId.TAGGED_INT64 + || id == DispatchId.VAR_UINT64 + || id == DispatchId.TAGGED_UINT64) { extraSize += 1; // long use 1~9 bytes. } } @@ -351,59 +375,92 @@ private List serializePrimitivesCompressed( int acc = 0; boolean compressStarted = false; for (Descriptor descriptor : group) { - Class clz = TypeUtils.unwrap(descriptor.getRawType()); - Preconditions.checkArgument(isPrimitive(clz)); + int dispatchId = getNumericDescriptorDispatchId(descriptor); // `bean` will be replaced by `Reference` to cut-off expr dependency. Expression fieldValue = getFieldValue(bean, descriptor); if (fieldValue instanceof Inlineable) { ((Inlineable) fieldValue).inline(); } - if (clz == byte.class) { - groupExpressions.add(unsafePut(base, getWriterPos(writerAddr, acc), fieldValue)); - acc += 1; - } else if (clz == boolean.class) { + if (dispatchId == DispatchId.PRIMITIVE_BOOL || dispatchId == DispatchId.BOOL) { groupExpressions.add(unsafePutBoolean(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 1; - } else if (clz == char.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_INT8 + || dispatchId == DispatchId.PRIMITIVE_UINT8 + || dispatchId == DispatchId.INT8 + || dispatchId == DispatchId.UINT8) { + groupExpressions.add(unsafePut(base, getWriterPos(writerAddr, acc), fieldValue)); + acc += 1; + } else if (dispatchId == DispatchId.PRIMITIVE_CHAR || dispatchId == DispatchId.CHAR) { groupExpressions.add(unsafePutChar(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 2; - } else if (clz == short.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_INT16 + || dispatchId == DispatchId.PRIMITIVE_UINT16 + || dispatchId == DispatchId.INT16 + || dispatchId == DispatchId.UINT16) { groupExpressions.add(unsafePutShort(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 2; - } else if (clz == float.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_FLOAT32 || dispatchId == DispatchId.FLOAT32) { groupExpressions.add(unsafePutFloat(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 4; - } else if (clz == double.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_FLOAT64 || dispatchId == DispatchId.FLOAT64) { groupExpressions.add(unsafePutDouble(base, getWriterPos(writerAddr, acc), fieldValue)); acc += 8; - } else if (clz == int.class) { - if (!fory.compressInt()) { - groupExpressions.add(unsafePutInt(base, getWriterPos(writerAddr, acc), fieldValue)); - acc += 4; - } else { - if (!compressStarted) { - // int/long are sorted in the last. - addIncWriterIndexExpr(groupExpressions, buffer, acc); - compressStarted = true; - } - groupExpressions.add(new Invoke(buffer, "_unsafeWriteVarInt32", fieldValue)); - acc += 0; + } else if (dispatchId == DispatchId.PRIMITIVE_INT32 + || dispatchId == DispatchId.PRIMITIVE_UINT32 + || dispatchId == DispatchId.INT32 + || dispatchId == DispatchId.UINT32) { + groupExpressions.add(unsafePutInt(base, getWriterPos(writerAddr, acc), fieldValue)); + acc += 4; + } else if (dispatchId == DispatchId.PRIMITIVE_INT64 + || dispatchId == DispatchId.PRIMITIVE_UINT64 + || dispatchId == DispatchId.INT64 + || dispatchId == DispatchId.UINT64) { + groupExpressions.add(unsafePutLong(base, getWriterPos(writerAddr, acc), fieldValue)); + acc += 8; + } else if (dispatchId == DispatchId.PRIMITIVE_VARINT32 + || dispatchId == DispatchId.VARINT32) { + if (!compressStarted) { + addIncWriterIndexExpr(groupExpressions, buffer, acc); + compressStarted = true; + } + groupExpressions.add(new Invoke(buffer, "_unsafeWriteVarInt32", fieldValue)); + } else if (dispatchId == DispatchId.PRIMITIVE_VAR_UINT32 + || dispatchId == DispatchId.VAR_UINT32) { + if (!compressStarted) { + addIncWriterIndexExpr(groupExpressions, buffer, acc); + compressStarted = true; + } + groupExpressions.add(new Invoke(buffer, "_unsafeWriteVarUint32", fieldValue)); + } else if (dispatchId == DispatchId.PRIMITIVE_VARINT64 + || dispatchId == DispatchId.VARINT64) { + if (!compressStarted) { + addIncWriterIndexExpr(groupExpressions, buffer, acc); + compressStarted = true; + } + groupExpressions.add(new Invoke(buffer, "writeVarInt64", fieldValue)); + } else if (dispatchId == DispatchId.PRIMITIVE_TAGGED_INT64 + || dispatchId == DispatchId.TAGGED_INT64) { + if (!compressStarted) { + addIncWriterIndexExpr(groupExpressions, buffer, acc); + compressStarted = true; + } + groupExpressions.add(new Invoke(buffer, "writeTaggedInt64", fieldValue)); + } else if (dispatchId == DispatchId.PRIMITIVE_VAR_UINT64 + || dispatchId == DispatchId.VAR_UINT64) { + if (!compressStarted) { + addIncWriterIndexExpr(groupExpressions, buffer, acc); + compressStarted = true; } - } else if (clz == long.class) { - if (!fory.compressLong()) { - groupExpressions.add(unsafePutLong(base, getWriterPos(writerAddr, acc), fieldValue)); - acc += 8; - } else { - if (!compressStarted) { - // int/long are sorted in the last. - addIncWriterIndexExpr(groupExpressions, buffer, acc); - compressStarted = true; - } - groupExpressions.add( - LongSerializer.writeInt64(buffer, fieldValue, fory.longEncoding(), false)); + groupExpressions.add(new Invoke(buffer, "writeVarUint64", fieldValue)); + } else if (dispatchId == DispatchId.PRIMITIVE_TAGGED_UINT64 + || dispatchId == DispatchId.TAGGED_UINT64) { + if (!compressStarted) { + addIncWriterIndexExpr(groupExpressions, buffer, acc); + compressStarted = true; } + groupExpressions.add(new Invoke(buffer, "writeTaggedUint64", fieldValue)); } else { - throw new IllegalStateException("impossible"); + throw new IllegalStateException("Unsupported dispatchId: " + dispatchId); } } if (!compressStarted) { @@ -643,36 +700,46 @@ private List deserializeUnCompressedPrimitives( for (List group : primitiveGroups) { ListExpression groupExpressions = new ListExpression(); for (Descriptor descriptor : group) { - TypeRef type = descriptor.getTypeRef(); - Class clz = getRawType(type); - Preconditions.checkArgument(isPrimitive(clz)); + int dispatchId = getNumericDescriptorDispatchId(descriptor); Expression fieldValue; - if (clz == byte.class) { - fieldValue = unsafeGet(heapBuffer, getReaderAddress(readerAddr, acc)); - acc += 1; - } else if (clz == boolean.class) { + if (dispatchId == DispatchId.PRIMITIVE_BOOL || dispatchId == DispatchId.BOOL) { fieldValue = unsafeGetBoolean(heapBuffer, getReaderAddress(readerAddr, acc)); acc += 1; - } else if (clz == char.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_INT8 + || dispatchId == DispatchId.PRIMITIVE_UINT8 + || dispatchId == DispatchId.INT8 + || dispatchId == DispatchId.UINT8) { + fieldValue = unsafeGet(heapBuffer, getReaderAddress(readerAddr, acc)); + acc += 1; + } else if (dispatchId == DispatchId.PRIMITIVE_CHAR || dispatchId == DispatchId.CHAR) { fieldValue = unsafeGetChar(heapBuffer, getReaderAddress(readerAddr, acc)); acc += 2; - } else if (clz == short.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_INT16 + || dispatchId == DispatchId.PRIMITIVE_UINT16 + || dispatchId == DispatchId.INT16 + || dispatchId == DispatchId.UINT16) { fieldValue = unsafeGetShort(heapBuffer, getReaderAddress(readerAddr, acc)); acc += 2; - } else if (clz == int.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_INT32 + || dispatchId == DispatchId.PRIMITIVE_UINT32 + || dispatchId == DispatchId.INT32 + || dispatchId == DispatchId.UINT32) { fieldValue = unsafeGetInt(heapBuffer, getReaderAddress(readerAddr, acc)); acc += 4; - } else if (clz == long.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_INT64 + || dispatchId == DispatchId.PRIMITIVE_UINT64 + || dispatchId == DispatchId.INT64 + || dispatchId == DispatchId.UINT64) { fieldValue = unsafeGetLong(heapBuffer, getReaderAddress(readerAddr, acc)); acc += 8; - } else if (clz == float.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_FLOAT32 || dispatchId == DispatchId.FLOAT32) { fieldValue = unsafeGetFloat(heapBuffer, getReaderAddress(readerAddr, acc)); acc += 4; - } else if (clz == double.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_FLOAT64 || dispatchId == DispatchId.FLOAT64) { fieldValue = unsafeGetDouble(heapBuffer, getReaderAddress(readerAddr, acc)); acc += 8; } else { - throw new IllegalStateException("impossible"); + throw new IllegalStateException("Unsupported dispatchId: " + dispatchId); } // `bean` will be replaced by `Reference` to cut-off expr dependency. groupExpressions.add(setFieldValue(bean, descriptor, fieldValue)); @@ -710,52 +777,88 @@ private List deserializeCompressedPrimitives( int acc = 0; boolean compressStarted = false; for (Descriptor descriptor : group) { - TypeRef type = descriptor.getTypeRef(); - Class clz = TypeUtils.unwrap(getRawType(type)); - Preconditions.checkArgument(isPrimitive(clz)); + int dispatchId = getNumericDescriptorDispatchId(descriptor); Expression fieldValue; - if (clz == byte.class) { - fieldValue = unsafeGet(heapBuffer, getReaderAddress(readerAddr, acc)); - acc += 1; - } else if (clz == boolean.class) { + if (dispatchId == DispatchId.PRIMITIVE_BOOL || dispatchId == DispatchId.BOOL) { fieldValue = unsafeGetBoolean(heapBuffer, getReaderAddress(readerAddr, acc)); acc += 1; - } else if (clz == char.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_INT8 + || dispatchId == DispatchId.PRIMITIVE_UINT8 + || dispatchId == DispatchId.INT8 + || dispatchId == DispatchId.UINT8) { + fieldValue = unsafeGet(heapBuffer, getReaderAddress(readerAddr, acc)); + acc += 1; + } else if (dispatchId == DispatchId.PRIMITIVE_CHAR || dispatchId == DispatchId.CHAR) { fieldValue = unsafeGetChar(heapBuffer, getReaderAddress(readerAddr, acc)); acc += 2; - } else if (clz == short.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_INT16 + || dispatchId == DispatchId.PRIMITIVE_UINT16 + || dispatchId == DispatchId.INT16 + || dispatchId == DispatchId.UINT16) { fieldValue = unsafeGetShort(heapBuffer, getReaderAddress(readerAddr, acc)); acc += 2; - } else if (clz == float.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_FLOAT32 || dispatchId == DispatchId.FLOAT32) { fieldValue = unsafeGetFloat(heapBuffer, getReaderAddress(readerAddr, acc)); acc += 4; - } else if (clz == double.class) { + } else if (dispatchId == DispatchId.PRIMITIVE_FLOAT64 || dispatchId == DispatchId.FLOAT64) { fieldValue = unsafeGetDouble(heapBuffer, getReaderAddress(readerAddr, acc)); acc += 8; - } else if (clz == int.class) { - if (!fory.compressInt()) { - fieldValue = unsafeGetInt(heapBuffer, getReaderAddress(readerAddr, acc)); - acc += 4; - } else { - if (!compressStarted) { - compressStarted = true; - addIncReaderIndexExpr(groupExpressions, buffer, acc); - } - fieldValue = readVarInt32(buffer); + } else if (dispatchId == DispatchId.PRIMITIVE_INT32 + || dispatchId == DispatchId.PRIMITIVE_UINT32 + || dispatchId == DispatchId.INT32 + || dispatchId == DispatchId.UINT32) { + fieldValue = unsafeGetInt(heapBuffer, getReaderAddress(readerAddr, acc)); + acc += 4; + } else if (dispatchId == DispatchId.PRIMITIVE_INT64 + || dispatchId == DispatchId.PRIMITIVE_UINT64 + || dispatchId == DispatchId.INT64 + || dispatchId == DispatchId.UINT64) { + fieldValue = unsafeGetLong(heapBuffer, getReaderAddress(readerAddr, acc)); + acc += 8; + } else if (dispatchId == DispatchId.PRIMITIVE_VARINT32 + || dispatchId == DispatchId.VARINT32) { + if (!compressStarted) { + compressStarted = true; + addIncReaderIndexExpr(groupExpressions, buffer, acc); + } + fieldValue = readVarInt32(buffer); + } else if (dispatchId == DispatchId.PRIMITIVE_VAR_UINT32 + || dispatchId == DispatchId.VAR_UINT32) { + if (!compressStarted) { + compressStarted = true; + addIncReaderIndexExpr(groupExpressions, buffer, acc); + } + fieldValue = new Invoke(buffer, "readVarUint32", PRIMITIVE_INT_TYPE); + } else if (dispatchId == DispatchId.PRIMITIVE_VARINT64 + || dispatchId == DispatchId.VARINT64) { + if (!compressStarted) { + compressStarted = true; + addIncReaderIndexExpr(groupExpressions, buffer, acc); + } + fieldValue = new Invoke(buffer, "readVarInt64", PRIMITIVE_LONG_TYPE); + } else if (dispatchId == DispatchId.PRIMITIVE_TAGGED_INT64 + || dispatchId == DispatchId.TAGGED_INT64) { + if (!compressStarted) { + compressStarted = true; + addIncReaderIndexExpr(groupExpressions, buffer, acc); + } + fieldValue = new Invoke(buffer, "readTaggedInt64", PRIMITIVE_LONG_TYPE); + } else if (dispatchId == DispatchId.PRIMITIVE_VAR_UINT64 + || dispatchId == DispatchId.VAR_UINT64) { + if (!compressStarted) { + compressStarted = true; + addIncReaderIndexExpr(groupExpressions, buffer, acc); } - } else if (clz == long.class) { - if (!fory.compressLong()) { - fieldValue = unsafeGetLong(heapBuffer, getReaderAddress(readerAddr, acc)); - acc += 8; - } else { - if (!compressStarted) { - compressStarted = true; - addIncReaderIndexExpr(groupExpressions, buffer, acc); - } - fieldValue = LongSerializer.readInt64(buffer, fory.longEncoding()); + fieldValue = new Invoke(buffer, "readVarUint64", PRIMITIVE_LONG_TYPE); + } else if (dispatchId == DispatchId.PRIMITIVE_TAGGED_UINT64 + || dispatchId == DispatchId.TAGGED_UINT64) { + if (!compressStarted) { + compressStarted = true; + addIncReaderIndexExpr(groupExpressions, buffer, acc); } + fieldValue = new Invoke(buffer, "readTaggedUint64", PRIMITIVE_LONG_TYPE); } else { - throw new IllegalStateException("impossible"); + throw new IllegalStateException("Unsupported dispatchId: " + dispatchId); } // `bean` will be replaced by `Reference` to cut-off expr dependency. groupExpressions.add(setFieldValue(bean, descriptor, fieldValue)); diff --git a/java/fory-core/src/main/java/org/apache/fory/config/Config.java b/java/fory-core/src/main/java/org/apache/fory/config/Config.java index 0488cbfb59..e81ed4e5f5 100644 --- a/java/fory-core/src/main/java/org/apache/fory/config/Config.java +++ b/java/fory-core/src/main/java/org/apache/fory/config/Config.java @@ -83,7 +83,7 @@ public Config(ForyBuilder builder) { writeNumUtf16BytesForUtf8Encoding = builder.writeNumUtf16BytesForUtf8Encoding; compressInt = builder.compressInt; longEncoding = builder.longEncoding; - compressLong = longEncoding != LongEncoding.LE_RAW_BYTES; + compressLong = longEncoding != LongEncoding.FIXED; compressIntArray = builder.compressIntArray; compressLongArray = builder.compressLongArray; requireClassRegistration = builder.requireClassRegistration; diff --git a/java/fory-core/src/main/java/org/apache/fory/config/ForyBuilder.java b/java/fory-core/src/main/java/org/apache/fory/config/ForyBuilder.java index 9d4cce82c3..4a2b60b077 100644 --- a/java/fory-core/src/main/java/org/apache/fory/config/ForyBuilder.java +++ b/java/fory-core/src/main/java/org/apache/fory/config/ForyBuilder.java @@ -67,7 +67,7 @@ public final class ForyBuilder { boolean timeRefIgnored = true; ClassLoader classLoader; boolean compressInt = true; - public LongEncoding longEncoding = LongEncoding.SLI; + public LongEncoding longEncoding = LongEncoding.TAGGED; boolean compressIntArray = false; boolean compressLongArray = false; boolean compressString = false; @@ -102,6 +102,11 @@ public ForyBuilder withLanguage(Language language) { return this; } + public ForyBuilder withXlang(boolean xlang) { + this.language = xlang ? Language.XLANG : Language.JAVA; + return this; + } + /** Whether track shared or circular references. */ public ForyBuilder withRefTracking(boolean trackingRef) { this.trackingRef = trackingRef; @@ -183,11 +188,11 @@ public ForyBuilder withIntCompressed(boolean intCompressed) { } /** - * Use variable length encoding for long. Enabled by default, use {@link LongEncoding#SLI} (Small - * long as int) for long encoding. + * Use variable length encoding for long. Enabled by default, use {@link LongEncoding#TAGGED} + * (Small long as int) for long encoding. */ public ForyBuilder withLongCompressed(boolean longCompressed) { - return withLongCompressed(longCompressed ? LongEncoding.SLI : LongEncoding.LE_RAW_BYTES); + return withLongCompressed(longCompressed ? LongEncoding.TAGGED : LongEncoding.FIXED); } /** Use variable length encoding for long. */ @@ -259,6 +264,11 @@ public ForyBuilder withCompatibleMode(CompatibleMode compatibleMode) { return this; } + public ForyBuilder withCompatible(boolean compatible) { + return withCompatibleMode( + compatible ? CompatibleMode.COMPATIBLE : CompatibleMode.SCHEMA_CONSISTENT); + } + /** * Whether check class schema consistency, will be disabled automatically when {@link * CompatibleMode#COMPATIBLE} is enabled. Do not disable this option unless you can ensure the @@ -429,7 +439,7 @@ private void finish() { } if (language != Language.JAVA) { stringRefIgnored = true; - longEncoding = LongEncoding.PVL; + longEncoding = LongEncoding.VARINT; compressInt = true; compressString = true; } diff --git a/java/fory-core/src/main/java/org/apache/fory/config/LongEncoding.java b/java/fory-core/src/main/java/org/apache/fory/config/LongEncoding.java index 128f7831ba..f6c598e98c 100644 --- a/java/fory-core/src/main/java/org/apache/fory/config/LongEncoding.java +++ b/java/fory-core/src/main/java/org/apache/fory/config/LongEncoding.java @@ -20,19 +20,20 @@ package org.apache.fory.config; /** - * Encoding option for long. Default encoding is fory SLI(Small long as int) encoding: {@link #SLI}. + * Encoding option for long. Default encoding is fory SLI(Small long as int) encoding: {@link + * #TAGGED}. */ public enum LongEncoding { /** - * Fory SLI(Small long as int) Encoding: + * Fory Tagged int64 Encoding: *
  • If long is in [0xc0000000, 0x3fffffff], encode as 4 bytes int: `| little-endian: ((int) * value) << 1 |` *
  • Otherwise write as 9 bytes: `| 0b1 | little-endian 8bytes long |`. * - *

    Faster than {@link #PVL}, but compression is not good as {@link #PVL} such as for ints - * in short range. + *

    Faster than {@link #VARINT}, but compression is not good as {@link #VARINT} such as for + * ints in short range. */ - SLI, + TAGGED, /** * Fory Progressive Variable-length Long Encoding: *

  • positive long format: first bit in every byte indicate whether has next byte, then next @@ -40,7 +41,7 @@ public enum LongEncoding { *
  • Negative number will be converted to positive number by ` (v << 1) ^ (v >> 63)` to reduce * cost of small negative numbers. */ - PVL, + VARINT, /** Write long as little endian 8bytes, no compression. */ - LE_RAW_BYTES, + FIXED, } diff --git a/java/fory-core/src/main/java/org/apache/fory/memory/MemoryBuffer.java b/java/fory-core/src/main/java/org/apache/fory/memory/MemoryBuffer.java index 29331f2691..9ce16158e2 100644 --- a/java/fory-core/src/main/java/org/apache/fory/memory/MemoryBuffer.java +++ b/java/fory-core/src/main/java/org/apache/fory/memory/MemoryBuffer.java @@ -774,7 +774,10 @@ public int writeVarUint32(int v) { // generated code is smaller. Otherwise, `MapRefResolver.writeRefOrNull` // may be `callee is too large`/`already compiled into a big method` ensure(writerIndex + 8); - int varintBytes = _unsafePutVarUint36Small(writerIndex, v); + // Use Integer.toUnsignedLong to handle values > INT32_MAX correctly + // Without this, negative int values would be sign-extended to long, + // causing incorrect varint encoding (9+ bytes instead of 5) + int varintBytes = _unsafePutVarUint36Small(writerIndex, Integer.toUnsignedLong(v)); writerIndex += varintBytes; return varintBytes; } @@ -786,7 +789,8 @@ public int writeVarUint32(int v) { // CHECKSTYLE.OFF:MethodName public int _unsafeWriteVarUint32(int v) { // CHECKSTYLE.ON:MethodName - int varintBytes = _unsafePutVarUint36Small(writerIndex, v); + // Use Integer.toUnsignedLong to handle values > INT32_MAX correctly + int varintBytes = _unsafePutVarUint36Small(writerIndex, Integer.toUnsignedLong(v)); writerIndex += varintBytes; return varintBytes; } @@ -820,6 +824,7 @@ private int continueWriteVarUint32Small7(int value) { /** * Caller must ensure there must be at least 8 bytes for writing, otherwise the crash may occur. + * Don't pass int value to avoid sign extension. */ // CHECKSTYLE.OFF:MethodName public int _unsafePutVarUint36Small(int index, long value) { @@ -1137,22 +1142,58 @@ public int _unsafeWriteVarUint64(long value) { } /** - * Write long using fory SLI(Small long as int) encoding. If long is in [0xc0000000, 0x3fffffff], - * encode as 4 bytes int: | little-endian: ((int) value) << 1 |; Otherwise write as 9 bytes: | 0b1 - * | little-endian 8bytes long | + * Write signed long using fory Tagged(Small long as int) encoding. If long is in [0xc0000000, + * 0x3fffffff], encode as 4 bytes int: | little-endian: ((int) value) << 1 |; Otherwise write as 9 + * bytes: | 0b1 | little-endian 8bytes long | */ - public int writeSliInt64(long value) { + public int writeTaggedInt64(long value) { ensure(writerIndex + 9); - return _unsafeWriteSliInt64(value); + return _unsafeWriteTaggedInt64(value); + } + + /** + * Write unsigned long using fory Tagged(Small long as int) encoding. If long is in [0, + * 0x7fffffff], encode as 4 bytes int: | little-endian: ((int) value) << 1 |; Otherwise write as 9 + * bytes: | 0b1 | little-endian 8bytes long | + */ + public int writeTaggedUint64(long value) { + ensure(writerIndex + 9); + return _unsafeWriteTaggedUint64(value); + } + + /** Write unsigned long using fory Tagged(Small Long as Int) encoding. */ + // CHECKSTYLE.OFF:MethodName + public int _unsafeWriteTaggedUint64(long value) { + // CHECKSTYLE.ON:MethodName + final int writerIndex = this.writerIndex; + final long pos = address + writerIndex; + final byte[] heapMemory = this.heapMemory; + if (value >= 0 && value <= Integer.MAX_VALUE) { + int v = ((int) value) << 1; // bit 0 unset, means int. + if (!LITTLE_ENDIAN) { + v = Integer.reverseBytes(v); + } + UNSAFE.putInt(heapMemory, pos, v); + this.writerIndex = writerIndex + 4; + return 4; + } else { + UNSAFE.putByte(heapMemory, pos, BIG_LONG_FLAG); + if (!LITTLE_ENDIAN) { + value = Long.reverseBytes(value); + } + UNSAFE.putLong(heapMemory, pos + 1, value); + this.writerIndex = writerIndex + 9; + return 9; + } } private static final long HALF_MAX_INT_VALUE = Integer.MAX_VALUE / 2; private static final long HALF_MIN_INT_VALUE = Integer.MIN_VALUE / 2; private static final byte BIG_LONG_FLAG = 0b1; // bit 0 set, means big long. - /** Write long using fory SLI(Small Long as Int) encoding. */ + /** Write long using fory Tagged(Small Long as Int) encoding. */ // CHECKSTYLE.OFF:MethodName - public int _unsafeWriteSliInt64(long value) { + public int _unsafeWriteTaggedInt64(long value) { // CHECKSTYLE.ON:MethodName final int writerIndex = this.writerIndex; final long pos = address + writerIndex; @@ -1487,18 +1528,71 @@ public long _readInt64OnBE() { return Long.reverseBytes(UNSAFE.getLong(heapMemory, address + readerIdx)); } - /** Read fory SLI(Small Long as Int) encoded long. */ - public long readSliInt64() { + /** Read signed fory Tagged(Small Long as Int) encoded long. */ + public long readTaggedInt64() { if (LITTLE_ENDIAN) { - return _readSliInt64OnLE(); + return _readTaggedInt64OnLE(); } else { - return _readSliInt64OnBE(); + return _readTaggedInt64OnBE(); } } + /** Read unsigned fory Tagged(Small Long as Int) encoded long. */ + public long readTaggedUint64() { + if (LITTLE_ENDIAN) { + return _readTaggedUint64OnLE(); + } else { + return _readTaggedUint64OnBE(); + } + } + + @CodegenInvoke + // CHECKSTYLE.OFF:MethodName + public long _readTaggedUint64OnLE() { + // CHECKSTYLE.ON:MethodName + final int readIdx = readerIndex; + int diff = size - readIdx; + if (diff < 4) { + streamReader.fillBuffer(4 - diff); + } + int i = UNSAFE.getInt(heapMemory, address + readIdx); + if ((i & 0b1) != 0b1) { + readerIndex = readIdx + 4; + return i >>> 1; // unsigned right shift + } + diff = size - readIdx; + if (diff < 9) { + streamReader.fillBuffer(9 - diff); + } + readerIndex = readIdx + 9; + return UNSAFE.getLong(heapMemory, address + readIdx + 1); + } + + @CodegenInvoke + // CHECKSTYLE.OFF:MethodName + public long _readTaggedUint64OnBE() { + // CHECKSTYLE.ON:MethodName + final int readIdx = readerIndex; + int diff = size - readIdx; + if (diff < 4) { + streamReader.fillBuffer(4 - diff); + } + int i = Integer.reverseBytes(UNSAFE.getInt(heapMemory, address + readIdx)); + if ((i & 0b1) != 0b1) { + readerIndex = readIdx + 4; + return i >>> 1; // unsigned right shift + } + diff = size - readIdx; + if (diff < 9) { + streamReader.fillBuffer(9 - diff); + } + readerIndex = readIdx + 9; + return Long.reverseBytes(UNSAFE.getLong(heapMemory, address + readIdx + 1)); + } + @CodegenInvoke // CHECKSTYLE.OFF:MethodName - public long _readSliInt64OnLE() { + public long _readTaggedInt64OnLE() { // CHECKSTYLE.ON:MethodName // Duplicate and manual inline for performance. // noinspection Duplicates @@ -1522,7 +1616,7 @@ public long _readSliInt64OnLE() { @CodegenInvoke // CHECKSTYLE.OFF:MethodName - public long _readSliInt64OnBE() { + public long _readTaggedInt64OnBE() { // CHECKSTYLE.ON:MethodName // noinspection Duplicates final int readIdx = readerIndex; @@ -2065,11 +2159,11 @@ private long readVarUint64Slow() { } /** Reads the 1-9 byte int part of an aligned varint. */ - public int readAlignedVarUint() { + public int readAlignedVarUint32() { int readerIdx = readerIndex; // use subtract to avoid overflow if (readerIdx < size - 10) { - return slowReadAlignedVarUint(); + return slowReadAlignedVarUint32(); } long pos = address + readerIdx; long startPos = pos; @@ -2105,7 +2199,7 @@ public int readAlignedVarUint() { return result; } - public int slowReadAlignedVarUint() { + public int slowReadAlignedVarUint32() { int b = readByte(); // Mask first 6 bits, // bit 8 `set` indicates have next data bytes. @@ -2335,7 +2429,7 @@ public byte[] readBytesAndSize() { } public byte[] readBytesWithAlignedSize() { - final int numBytes = readAlignedVarUint(); + final int numBytes = readAlignedVarUint32(); int readerIdx = readerIndex; final byte[] arr = new byte[numBytes]; // use subtract to avoid overflow @@ -2392,7 +2486,7 @@ public char[] readCharsAndSize() { } public char[] readCharsWithAlignedSize() { - final int numBytes = readAlignedVarUint(); + final int numBytes = readAlignedVarUint32(); return readChars(numBytes); } diff --git a/java/fory-core/src/main/java/org/apache/fory/meta/ClassDefDecoder.java b/java/fory-core/src/main/java/org/apache/fory/meta/ClassDefDecoder.java index 46657a4fd6..f912017f1d 100644 --- a/java/fory-core/src/main/java/org/apache/fory/meta/ClassDefDecoder.java +++ b/java/fory-core/src/main/java/org/apache/fory/meta/ClassDefDecoder.java @@ -115,11 +115,10 @@ private static List readFieldsInfo( List fieldInfos = new ArrayList<>(numFields); for (int i = 0; i < numFields; i++) { int header = buffer.readByte() & 0xff; - // `3 bits size + 2 bits field name encoding + polymorphism flag + nullability flag + ref - // tracking flag` - int encodingFlags = (header >>> 3) & 0b11; + // `3 bits size + 2 bits field name encoding + nullability flag + ref tracking flag` + int encodingFlags = (header >>> 2) & 0b11; boolean useTagID = encodingFlags == 3; - int size = header >>> 5; + int size = header >>> 4; if (size == 7) { size += buffer.readVarUint32Small7(); } @@ -138,12 +137,11 @@ private static List readFieldsInfo( fieldName = Encoders.FIELD_NAME_DECODER.decode(buffer.readBytes(size), encoding); } - boolean isMonomorphic = (header & 0b100) != 0; boolean nullable = (header & 0b010) != 0; boolean trackingRef = (header & 0b001) != 0; int typeId = buffer.readVarUint32Small14(); FieldType fieldType = - FieldTypes.FieldType.read(buffer, resolver, isMonomorphic, nullable, trackingRef, typeId); + FieldTypes.FieldType.read(buffer, resolver, nullable, trackingRef, typeId); if (useTagID) { fieldInfos.add(new FieldInfo(className, fieldName, fieldType, tagId)); diff --git a/java/fory-core/src/main/java/org/apache/fory/meta/ClassDefEncoder.java b/java/fory-core/src/main/java/org/apache/fory/meta/ClassDefEncoder.java index 8b057c5e3f..62eb5b7a7e 100644 --- a/java/fory-core/src/main/java/org/apache/fory/meta/ClassDefEncoder.java +++ b/java/fory-core/src/main/java/org/apache/fory/meta/ClassDefEncoder.java @@ -280,10 +280,8 @@ static Map> groupClassFields(List fieldsInfo) static void writeFieldsInfo(MemoryBuffer buffer, List fields) { for (FieldInfo fieldInfo : fields) { FieldType fieldType = fieldInfo.getFieldType(); - // `3 bits size + 2 bits field name encoding + polymorphism flag + nullability flag + ref - // tracking flag` - int header = ((fieldType.isMonomorphic() ? 1 : 0) << 2); - header |= ((fieldType.nullable() ? 1 : 0) << 1); + // `3 bits size + 2 bits field name encoding + nullability flag + ref tracking flag` + int header = ((fieldType.nullable() ? 1 : 0) << 1); header |= ((fieldType.trackingRef() ? 1 : 0)); // Encoding `UTF8/ALL_TO_LOWER_SPECIAL/LOWER_UPPER_DIGIT_SPECIAL/TAG_ID` MetaString metaString = Encoders.encodeFieldName(fieldInfo.getFieldName()); @@ -294,14 +292,14 @@ static void writeFieldsInfo(MemoryBuffer buffer, List fields) { size = fieldInfo.getFieldId(); encodingFlags = 3; } - header |= (byte) (encodingFlags << 3); + header |= (byte) (encodingFlags << 2); boolean bigSize = size >= 7; if (bigSize) { - header |= 0b11100000; + header |= 0b01110000; buffer.writeByte(header); buffer.writeVarUint32Small7(size - 7); } else { - header |= (size << 5); + header |= (size << 4); buffer.writeByte(header); } if (!fieldInfo.hasFieldId()) { diff --git a/java/fory-core/src/main/java/org/apache/fory/meta/FieldInfo.java b/java/fory-core/src/main/java/org/apache/fory/meta/FieldInfo.java index 0fe83f279b..076b9d7cf6 100644 --- a/java/fory-core/src/main/java/org/apache/fory/meta/FieldInfo.java +++ b/java/fory-core/src/main/java/org/apache/fory/meta/FieldInfo.java @@ -29,6 +29,7 @@ import org.apache.fory.serializer.converter.FieldConverters; import org.apache.fory.type.Descriptor; import org.apache.fory.type.DescriptorBuilder; +import org.apache.fory.type.Types; /** * FieldInfo contains all necessary info of a field to execute serialization/deserialization logic. @@ -89,9 +90,11 @@ public FieldTypes.FieldType getFieldType() { Descriptor toDescriptor(TypeResolver resolver, Descriptor descriptor) { TypeRef declared = descriptor != null ? descriptor.getTypeRef() : null; TypeRef typeRef = fieldType.toTypeToken(resolver, declared); - String typeName = typeRef.getType().getTypeName(); + String typeName = fieldType.getTypeName(resolver, typeRef); if (fieldType instanceof FieldTypes.RegisteredFieldType) { - typeName = String.valueOf(((FieldTypes.RegisteredFieldType) fieldType).getClassId()); + if (!Types.isPrimitiveType(fieldType.xtypeId)) { + typeName = String.valueOf(((FieldTypes.RegisteredFieldType) fieldType).getClassId()); + } } // Get nullable and trackingRef from remote FieldType - these are what the remote peer // used when serializing, so we must respect them when deserializing @@ -112,6 +115,7 @@ Descriptor toDescriptor(TypeResolver resolver, Descriptor descriptor) { .typeName(typeName) .trackingRef(remoteTrackingRef) .nullable(remoteNullable) + .typeRef(typeRef) .build(); } DescriptorBuilder builder = diff --git a/java/fory-core/src/main/java/org/apache/fory/meta/FieldTypes.java b/java/fory-core/src/main/java/org/apache/fory/meta/FieldTypes.java index 1923707831..5f3e3c27f1 100644 --- a/java/fory-core/src/main/java/org/apache/fory/meta/FieldTypes.java +++ b/java/fory-core/src/main/java/org/apache/fory/meta/FieldTypes.java @@ -47,10 +47,10 @@ import org.apache.fory.resolver.XtypeResolver; import org.apache.fory.serializer.NonexistentClass; import org.apache.fory.type.Descriptor; -import org.apache.fory.type.FinalObjectTypeStub; import org.apache.fory.type.GenericType; import org.apache.fory.type.TypeUtils; import org.apache.fory.type.Types; +import org.apache.fory.type.union.Union; import org.apache.fory.util.Preconditions; public class FieldTypes { @@ -58,9 +58,7 @@ public class FieldTypes { /** Returns true if can use current field type. */ static boolean useFieldType(Class parsedType, Descriptor descriptor) { - if (parsedType.isEnum() - || parsedType.isAssignableFrom(descriptor.getRawType()) - || parsedType == FinalObjectTypeStub.class) { + if (parsedType.isEnum() || parsedType.isAssignableFrom(descriptor.getRawType())) { return true; } if (parsedType.isArray()) { @@ -69,7 +67,7 @@ static boolean useFieldType(Class parsedType, Descriptor descriptor) { if (!field.getType().isArray() || getArrayDimensions(field.getType()) != info.f1) { return false; } - return info.f0 == FinalObjectTypeStub.class || info.f0.isEnum(); + return info.f0.isEnum(); } return false; } @@ -87,8 +85,16 @@ private static FieldType buildFieldType( Preconditions.checkNotNull(genericType); Class rawType = genericType.getCls(); boolean isXlang = resolver.getFory().isCrossLanguage(); - int xtypeId = -1; - if (isXlang) { + // Get type ID for both xlang and native mode + // This supports unsigned types and field-configurable compression in both modes + int xtypeId; + if (TypeUtils.unwrap(rawType).isPrimitive()) { + if (field != null) { + xtypeId = Types.getDescriptorTypeId(resolver.getFory(), field); + } else { + xtypeId = Types.getTypeId(resolver.getFory(), rawType); + } + } else { ClassInfo info = resolver.getClassInfo(genericType.getCls(), false); if (info != null) { xtypeId = info.getXtypeId(); @@ -96,10 +102,9 @@ private static FieldType buildFieldType( xtypeId = Types.UNKNOWN; } } - boolean isMonomorphic = genericType.isMonomorphic(); // For xlang: ref tracking is false by default (no shared ownership like Rust's Rc/Arc) // For native: use the type's default tracking behavior - boolean trackingRef = isXlang ? false : genericType.trackingRef(resolver); + boolean trackingRef = !isXlang && genericType.trackingRef(resolver); // For xlang: nullable is false by default (aligned with all languages) // Exception: Optional types are nullable (like Rust's Option) // For native: non-primitive types are nullable by default @@ -108,9 +113,9 @@ private static FieldType buildFieldType( // Only Optional types and boxed types are nullable by default in xlang mode nullable = isOptionalType(rawType) || TypeUtils.isBoxed(rawType); } else { - // For nested types (field=null), nullable defaults to true to match decoding behavior - // since the encoding doesn't persist nullable for nested types (see FieldType.read()) - nullable = field == null || !genericType.getCls().isPrimitive(); + // Primitives are never nullable, non-primitives are nullable by default + // This applies to both top-level fields and nested types (in arrays, collections, maps) + nullable = !genericType.getCls().isPrimitive(); } // Apply @ForyField annotation if present @@ -125,7 +130,6 @@ private static FieldType buildFieldType( if (COLLECTION_TYPE.isSupertypeOf(genericType.getTypeRef())) { return new CollectionFieldType( xtypeId, - isMonomorphic, nullable, trackingRef, buildFieldType( @@ -137,7 +141,6 @@ private static FieldType buildFieldType( } else if (MAP_TYPE.isSupertypeOf(genericType.getTypeRef())) { return new MapFieldType( xtypeId, - isMonomorphic, nullable, trackingRef, buildFieldType( @@ -152,18 +155,22 @@ private static FieldType buildFieldType( genericType.getTypeParameter1() == null ? GenericType.build(Object.class) : genericType.getTypeParameter1())); + } else if (Union.class.isAssignableFrom(rawType)) { + return new UnionFieldType(nullable, trackingRef); + } else if (TypeUtils.unwrap(rawType).isPrimitive()) { + // unified basic types for xlang and native mode + return new RegisteredFieldType(nullable, trackingRef, xtypeId); } else { - if (isXlang + if (rawType.isEnum()) { + return new EnumFieldType(nullable, xtypeId); + } else if (isXlang && !Types.isUserDefinedType((byte) xtypeId) && resolver.isRegisteredById(rawType)) { - return new RegisteredFieldType(isMonomorphic, nullable, trackingRef, xtypeId); + return new RegisteredFieldType(nullable, trackingRef, xtypeId); } else if (!isXlang && resolver.isRegisteredById(rawType)) { Short classId = ((ClassResolver) resolver).getRegisteredClassId(rawType); - return new RegisteredFieldType(isMonomorphic, nullable, trackingRef, classId); + return new RegisteredFieldType(nullable, trackingRef, classId); } else { - if (rawType.isEnum()) { - return new EnumFieldType(nullable, xtypeId); - } if (rawType.isArray()) { Class elemType = rawType.getComponentType(); while (elemType.isArray()) { @@ -172,42 +179,34 @@ private static FieldType buildFieldType( if (isXlang && !elemType.isPrimitive()) { return new CollectionFieldType( xtypeId, - isMonomorphic, nullable, trackingRef, buildFieldType(resolver, null, GenericType.build(elemType))); } - Tuple2, Integer> info = getArrayComponentInfo(rawType); + Tuple2, Integer> arrayComponentInfo = getArrayComponentInfo(rawType); return new ArrayFieldType( xtypeId, - isMonomorphic, nullable, trackingRef, - buildFieldType(resolver, null, GenericType.build(info.f0)), - info.f1); + buildFieldType(resolver, null, GenericType.build(arrayComponentInfo.f0)), + arrayComponentInfo.f1); } - return new ObjectFieldType(xtypeId, isMonomorphic, nullable, trackingRef); + return new ObjectFieldType(xtypeId, nullable, trackingRef); } } } public abstract static class FieldType implements Serializable { protected final int xtypeId; - protected final boolean isMonomorphic; protected final boolean nullable; protected final boolean trackingRef; - public FieldType(int xtypeId, boolean isMonomorphic, boolean nullable, boolean trackingRef) { - this.isMonomorphic = isMonomorphic; + public FieldType(int xtypeId, boolean nullable, boolean trackingRef) { this.trackingRef = trackingRef; this.nullable = nullable; this.xtypeId = xtypeId; } - public boolean isMonomorphic() { - return isMonomorphic; - } - public boolean trackingRef() { return trackingRef; } @@ -220,11 +219,13 @@ public boolean nullable() { * Convert a serializable field type to type token. If field type is a generic type with * generics, the generics will be built up recursively. The final leaf object type will be built * from class id or class stub. - * - * @see FinalObjectTypeStub */ public abstract TypeRef toTypeToken(TypeResolver classResolver, TypeRef declared); + public String getTypeName(TypeResolver resolver, TypeRef typeRef) { + return typeRef.getType().getTypeName(); + } + @Override public boolean equals(Object o) { if (this == o) { @@ -234,21 +235,21 @@ public boolean equals(Object o) { return false; } FieldType fieldType = (FieldType) o; - return isMonomorphic == fieldType.isMonomorphic - && trackingRef == fieldType.trackingRef - && nullable == fieldType.nullable; + return trackingRef == fieldType.trackingRef && nullable == fieldType.nullable; } @Override public int hashCode() { - return Objects.hash(isMonomorphic, nullable, trackingRef); + return Objects.hash(nullable, trackingRef); } /** Write field type info. */ public void write(MemoryBuffer buffer, boolean writeHeader) { - byte header = (byte) ((isMonomorphic ? 1 : 0) << 1); - // header of nested generic fields in collection/map will be written independently - header |= (byte) (trackingRef ? 1 : 0); + // Header format for nested types (writeHeader=true): + // - bit 0: trackingRef + // - bit 1: nullable + // - bits 2+: typeId + byte header = (byte) ((nullable ? 0b10 : 0) | (trackingRef ? 0b1 : 0)); if (this instanceof RegisteredFieldType) { short classId = ((RegisteredFieldType) this).getClassId(); buffer.writeVarUint32Small7(writeHeader ? ((5 + classId) << 2) | header : 5 + classId); @@ -280,35 +281,38 @@ public void write(MemoryBuffer buffer) { } public static FieldType read(MemoryBuffer buffer, TypeResolver resolver) { + // Header format for nested types: + // - bit 0: trackingRef + // - bit 1: nullable + // - bits 2+: typeId int header = buffer.readVarUint32Small7(); - boolean isMonomorphic = (header & 0b10) != 0; boolean trackingRef = (header & 0b1) != 0; - // For nested types (in collections/maps), nullable defaults to true - return read(buffer, resolver, isMonomorphic, true, trackingRef, header >>> 2); + boolean nullable = (header & 0b10) != 0; + int typeId = header >>> 2; + return read(buffer, resolver, nullable, trackingRef, typeId); } /** Read field type info. */ public static FieldType read( MemoryBuffer buffer, TypeResolver resolver, - boolean isFinal, boolean nullable, boolean trackingRef, int typeId) { if (typeId == 0) { - return new ObjectFieldType(-1, isFinal, nullable, trackingRef); + return new ObjectFieldType(-1, nullable, trackingRef); } else if (typeId == 1) { return new MapFieldType( - -1, isFinal, nullable, trackingRef, read(buffer, resolver), read(buffer, resolver)); + -1, nullable, trackingRef, read(buffer, resolver), read(buffer, resolver)); } else if (typeId == 2) { - return new CollectionFieldType(-1, isFinal, nullable, trackingRef, read(buffer, resolver)); + return new CollectionFieldType(-1, nullable, trackingRef, read(buffer, resolver)); } else if (typeId == 3) { int dims = buffer.readVarUint32Small7(); - return new ArrayFieldType(-1, isFinal, nullable, trackingRef, read(buffer, resolver), dims); + return new ArrayFieldType(-1, nullable, trackingRef, read(buffer, resolver), dims); } else if (typeId == 4) { return new EnumFieldType(nullable, -1); } else { - return new RegisteredFieldType(isFinal, nullable, trackingRef, (typeId - 5)); + return new RegisteredFieldType(nullable, trackingRef, (typeId - 5)); } } @@ -358,25 +362,24 @@ public static FieldType xread( switch (xtypeId & 0xff) { case Types.LIST: case Types.SET: - return new CollectionFieldType( - xtypeId, true, nullable, trackingRef, xread(buffer, resolver)); + return new CollectionFieldType(xtypeId, nullable, trackingRef, xread(buffer, resolver)); case Types.MAP: return new MapFieldType( - xtypeId, - true, - nullable, - trackingRef, - xread(buffer, resolver), - xread(buffer, resolver)); + xtypeId, nullable, trackingRef, xread(buffer, resolver), xread(buffer, resolver)); case Types.ENUM: case Types.NAMED_ENUM: return new EnumFieldType(nullable, xtypeId); case Types.UNION: return new UnionFieldType(nullable, trackingRef); case Types.UNKNOWN: - return new ObjectFieldType(xtypeId, false, nullable, trackingRef); + return new ObjectFieldType(xtypeId, nullable, trackingRef); default: { + if (Types.isPrimitiveType(xtypeId)) { + // unsigned types share same class with signed numeric types, so unsigned types are + // not registered. + return new RegisteredFieldType(nullable, trackingRef, xtypeId); + } if (!Types.isUserDefinedType((byte) xtypeId)) { ClassInfo classInfo = resolver.getXtypeInfo(xtypeId); if (classInfo == null) { @@ -384,13 +387,11 @@ public static FieldType xread( // when remote sends a type ID that's not registered here. // Fall back to ObjectFieldType to handle gracefully. LOG.warn("Type {} not registered locally, treating as ObjectFieldType", xtypeId); - return new ObjectFieldType(xtypeId, false, nullable, trackingRef); + return new ObjectFieldType(xtypeId, nullable, trackingRef); } - Class cls = classInfo.getCls(); - return new RegisteredFieldType( - resolver.isMonomorphic(cls), nullable, trackingRef, xtypeId); + return new RegisteredFieldType(nullable, trackingRef, xtypeId); } else { - return new ObjectFieldType(xtypeId, false, nullable, trackingRef); + return new ObjectFieldType(xtypeId, nullable, trackingRef); } } } @@ -401,9 +402,9 @@ public static FieldType xread( public static class RegisteredFieldType extends FieldType { private final short classId; - public RegisteredFieldType( - boolean isFinal, boolean nullable, boolean trackingRef, int classId) { - super(classId, isFinal, nullable, trackingRef); + public RegisteredFieldType(boolean nullable, boolean trackingRef, int classId) { + super(classId, nullable, trackingRef); + Preconditions.checkArgument(classId > 0); this.classId = (short) classId; } @@ -414,27 +415,31 @@ public short getClassId() { @Override public TypeRef toTypeToken(TypeResolver resolver, TypeRef declared) { Class cls; - if (resolver instanceof XtypeResolver) { - cls = ((XtypeResolver) resolver).getXtypeInfo(classId).getCls(); - if (Types.isPrimitiveType(classId)) { - if (declared == null) { - // For primitive types, ensure we use the correct primitive/boxed form - // based on the nullable flag, not the declared type - if (!nullable) { - // nullable=false means the source was primitive, use primitive type - cls = TypeUtils.unwrap(cls); - } else { - // nullable=true means the source was boxed, use boxed type - cls = TypeUtils.wrap(cls); - } + if (Types.isPrimitiveType(classId)) { + cls = Types.getClassForTypeId(classId); + if (declared == null) { + // For primitive types, ensure we use the correct primitive/boxed form + // based on the nullable flag, not the declared type + if (!nullable) { + // nullable=false means the source was primitive, use primitive type + cls = TypeUtils.unwrap(cls); } else { - if (TypeUtils.unwrap(declared.getRawType()) == TypeUtils.unwrap(cls)) { - // we still need correct type, the `read/write` should use `nullable` of `Descriptor` - // for serialization - return declared; - } + // nullable=true means the source was boxed, use boxed type + cls = TypeUtils.wrap(cls); + } + } else { + if (TypeUtils.unwrap(declared.getRawType()) == TypeUtils.unwrap(cls)) { + // we still need correct type, the `read/write` should use `nullable` of `Descriptor` + // for serialization + cls = declared.getRawType(); } } + return TypeRef.of(cls, new TypeExtMeta(classId, nullable, trackingRef)); + } + if (resolver instanceof XtypeResolver) { + ClassInfo xtypeInfo = ((XtypeResolver) resolver).getXtypeInfo(classId); + Preconditions.checkNotNull(xtypeInfo); + cls = xtypeInfo.getCls(); } else { cls = ((ClassResolver) resolver).getRegisteredClass(classId); } @@ -442,7 +447,25 @@ public TypeRef toTypeToken(TypeResolver resolver, TypeRef declared) { LOG.warn("Class {} not registered, take it as Struct type for deserialization.", classId); cls = NonexistentClass.NonexistentMetaShared.class; } - return TypeRef.of(cls, new TypeExtMeta(nullable, trackingRef)); + return TypeRef.of(cls, new TypeExtMeta(classId, nullable, trackingRef)); + } + + @Override + public String getTypeName(TypeResolver resolver, TypeRef typeRef) { + // Some registered class may not be registered on peer class, we always use + // registered id to keep consistent order. + // Note that this is only used for fields sort in native mode. + // For xlang mode, we always sort fields by type id in + if (resolver instanceof ClassResolver) { + ClassResolver classResolver = (ClassResolver) resolver; + // Peer class may not register this class id, which will introduce inconsistent field order + if (classResolver.isInternalRegistered(classId)) { + return String.valueOf(classId); + } else { + return "Registered"; + } + } + return String.valueOf(classId); } @Override @@ -468,9 +491,7 @@ public int hashCode() { @Override public String toString() { return "RegisteredFieldType{" - + "isMonomorphic=" - + isMonomorphic() - + ", nullable=" + + "nullable=" + nullable() + ", trackingRef=" + trackingRef() @@ -492,12 +513,8 @@ public static class CollectionFieldType extends FieldType { private final FieldType elementType; public CollectionFieldType( - int xtypeId, - boolean isFinal, - boolean nullable, - boolean trackingRef, - FieldType elementType) { - super(xtypeId, isFinal, nullable, trackingRef); + int xtypeId, boolean nullable, boolean trackingRef, FieldType elementType) { + super(xtypeId, nullable, trackingRef); this.elementType = elementType; } @@ -527,10 +544,10 @@ public TypeRef toTypeToken(TypeResolver classResolver, TypeRef declared) { } TypeRef elementType = this.elementType.toTypeToken(classResolver, declElementType); if (declared == null) { - return collectionOf(elementType, new TypeExtMeta(nullable, trackingRef)); + return collectionOf(elementType, new TypeExtMeta(xtypeId, nullable, trackingRef)); } TypeRef> collectionTypeRef = - collectionOf(declaredClass, elementType, new TypeExtMeta(nullable, trackingRef)); + collectionOf(declaredClass, elementType, new TypeExtMeta(xtypeId, nullable, trackingRef)); if (!declaredClass.isArray()) { if (declElementType.equals(elementType)) { return declared; @@ -549,7 +566,7 @@ public TypeRef toTypeToken(TypeResolver classResolver, TypeRef declared) { TypeRef typeRef = TypeRef.of( Array.newInstance(arrayType.getRawType(), 1).getClass(), - typeRefs.get(i).getExtInfo()); + typeRefs.get(i).getTypeExtMeta()); typeRefs.set(i, typeRef); } return typeRefs.get(typeRefs.size() - 1); @@ -580,8 +597,6 @@ public String toString() { return "CollectionFieldType{" + "elementType=" + elementType - + ", isFinal=" - + isMonomorphic() + ", nullable=" + nullable() + ", trackingRef=" @@ -604,12 +619,11 @@ public static class MapFieldType extends FieldType { public MapFieldType( int xtypeId, - boolean isFinal, boolean nullable, boolean trackingRef, FieldType keyType, FieldType valueType) { - super(xtypeId, isFinal, nullable, trackingRef); + super(xtypeId, nullable, trackingRef); this.keyType = keyType; this.valueType = valueType; } @@ -643,12 +657,12 @@ public TypeRef toTypeToken(TypeResolver classResolver, TypeRef declared) { declared.getRawType(), keyType.toTypeToken(classResolver, keyDecl), valueType.toTypeToken(classResolver, valueDecl), - new TypeExtMeta(nullable, trackingRef)); + new TypeExtMeta(xtypeId, nullable, trackingRef)); } return mapOf( keyType.toTypeToken(classResolver, keyDecl), valueType.toTypeToken(classResolver, valueDecl), - new TypeExtMeta(nullable, trackingRef)); + new TypeExtMeta(xtypeId, nullable, trackingRef)); } @Override @@ -678,8 +692,6 @@ public String toString() { + keyType + ", valueType=" + valueType - + ", isFinal=" - + isMonomorphic() + ", nullable=" + nullable() + ", trackingRef=" @@ -690,7 +702,7 @@ public String toString() { public static class EnumFieldType extends FieldType { private EnumFieldType(boolean nullable, int xtypeId) { - super(xtypeId, true, nullable, false); + super(xtypeId, nullable, false); } @Override @@ -701,6 +713,11 @@ public TypeRef toTypeToken(TypeResolver classResolver, TypeRef declared) { return TypeRef.of(NonexistentClass.NonexistentEnum.class); } + @Override + public String getTypeName(TypeResolver resolver, TypeRef typeRef) { + return "Enum"; + } + @Override public String toString() { return "EnumFieldType{" + "xtypeId=" + xtypeId + ", nullable=" + nullable + '}'; @@ -711,19 +728,13 @@ public static class ArrayFieldType extends FieldType { private final FieldType componentType; private final int dimensions; - public ArrayFieldType( - boolean isMonomorphic, boolean trackingRef, FieldType componentType, int dimensions) { - this(-1, isMonomorphic, true, trackingRef, componentType, dimensions); - } - public ArrayFieldType( int xtypeId, - boolean isMonomorphic, boolean nullable, boolean trackingRef, FieldType componentType, int dimensions) { - super(xtypeId, isMonomorphic, nullable, trackingRef); + super(xtypeId, nullable, trackingRef); this.componentType = componentType; this.dimensions = dimensions; } @@ -737,18 +748,25 @@ public TypeRef toTypeToken(TypeResolver classResolver, TypeRef declared) { Class componentRawType = componentTypeRef.getRawType(); if (NonexistentClass.class.isAssignableFrom(componentRawType)) { return TypeRef.of( - // We embed `isMonomorphic` flag in ObjectArraySerializer, so this flag can be ignored - // here. NonexistentClass.getNonexistentClass( componentType instanceof EnumFieldType, dimensions, true), - new TypeExtMeta(nullable, trackingRef)); + new TypeExtMeta(xtypeId, nullable, trackingRef)); } else { return TypeRef.of( Array.newInstance(componentRawType, new int[dimensions]).getClass(), - new TypeExtMeta(nullable, trackingRef)); + new TypeExtMeta(xtypeId, nullable, trackingRef)); } } + @Override + public String getTypeName(TypeResolver resolver, TypeRef typeRef) { + // For native mode, this return same `Array` type to ensure consistent order even some array + // type + // is not exist on current deserialization process. + // For primitive/registered array, it goes to RegisteredFieldType. + return "Array"; + } + public int getDimensions() { return dimensions; } @@ -784,8 +802,6 @@ public String toString() { + componentType + ", dimensions=" + dimensions - + ", isMonomorphic=" - + isMonomorphic + ", nullable=" + nullable + ", trackingRef=" @@ -797,15 +813,21 @@ public String toString() { /** Class for field type which isn't registered and not collection/map type too. */ public static class ObjectFieldType extends FieldType { - public ObjectFieldType(int xtypeId, boolean isFinal, boolean nullable, boolean trackingRef) { - super(xtypeId, isFinal, nullable, trackingRef); + public ObjectFieldType(int xtypeId, boolean nullable, boolean trackingRef) { + super(xtypeId, nullable, trackingRef); } @Override public TypeRef toTypeToken(TypeResolver classResolver, TypeRef declared) { - return isMonomorphic() - ? TypeRef.of(FinalObjectTypeStub.class, new TypeExtMeta(nullable, trackingRef)) - : TypeRef.of(Object.class, new TypeExtMeta(nullable, trackingRef)); + Class clz = declared == null ? Object.class : declared.getRawType(); + return TypeRef.of(clz, new TypeExtMeta(xtypeId, nullable, trackingRef)); + } + + @Override + public String getTypeName(TypeResolver resolver, TypeRef typeRef) { + // When fields not exist on deserializing struct, we can't know its actual field type, + // sort based on actual type name will incur inconsistent fields order + return "Object"; } @Override @@ -823,8 +845,6 @@ public String toString() { return "ObjectFieldType{" + "xtypeId=" + xtypeId - + ", isMonomorphic=" - + isMonomorphic + ", nullable=" + nullable + ", trackingRef=" @@ -833,11 +853,11 @@ public String toString() { } } - /** Class for Union field type. Union types are always monomorphic and use declared type. */ + /** Class for Union field type. Union types use declared type. */ public static class UnionFieldType extends FieldType { public UnionFieldType(boolean nullable, boolean trackingRef) { - super(Types.UNION, true, nullable, trackingRef); + super(Types.UNION, nullable, trackingRef); } @Override @@ -848,7 +868,7 @@ public TypeRef toTypeToken(TypeResolver classResolver, TypeRef declared) { } // Fallback to base Union class if no declared type return TypeRef.of( - org.apache.fory.type.union.Union.class, new TypeExtMeta(nullable, trackingRef)); + org.apache.fory.type.union.Union.class, new TypeExtMeta(xtypeId, nullable, trackingRef)); } @Override diff --git a/java/fory-core/src/main/java/org/apache/fory/meta/TypeExtMeta.java b/java/fory-core/src/main/java/org/apache/fory/meta/TypeExtMeta.java index 90b17daa7f..19e37bed70 100644 --- a/java/fory-core/src/main/java/org/apache/fory/meta/TypeExtMeta.java +++ b/java/fory-core/src/main/java/org/apache/fory/meta/TypeExtMeta.java @@ -20,14 +20,20 @@ package org.apache.fory.meta; public class TypeExtMeta { + private final int typeId; private final boolean nullable; private final boolean trackingRef; - TypeExtMeta(boolean nullable, boolean trackingRef) { + TypeExtMeta(int typeId, boolean nullable, boolean trackingRef) { + this.typeId = typeId; this.nullable = nullable; this.trackingRef = trackingRef; } + public int typeId() { + return typeId; + } + public boolean nullable() { return nullable; } @@ -38,6 +44,13 @@ public boolean trackingRef() { @Override public String toString() { - return "TypeExtMeta{" + "nullable=" + nullable + ", trackingRef=" + trackingRef + '}'; + return "TypeExtMeta{" + + "typeId=" + + typeId + + ", nullable=" + + nullable + + ", trackingRef=" + + trackingRef + + '}'; } } diff --git a/java/fory-core/src/main/java/org/apache/fory/reflect/TypeRef.java b/java/fory-core/src/main/java/org/apache/fory/reflect/TypeRef.java index 321b01a148..dbe5c73fbf 100644 --- a/java/fory-core/src/main/java/org/apache/fory/reflect/TypeRef.java +++ b/java/fory-core/src/main/java/org/apache/fory/reflect/TypeRef.java @@ -34,13 +34,14 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.CheckForNull; +import org.apache.fory.meta.TypeExtMeta; import org.apache.fory.type.TypeUtils; // Mostly derived from Guava 32.1.2 com.google.common.reflect.TypeToken // https://github.com/google/guava/blob/9f6a3840/guava/src/com/google/common/reflect/TypeToken.java public class TypeRef { private final Type type; - private final Object extInfo; + private final TypeExtMeta typeExtMeta; private transient Class rawType; private transient Map typeMappings; @@ -58,27 +59,27 @@ public class TypeRef { */ protected TypeRef() { this.type = capture(); - this.extInfo = null; + this.typeExtMeta = null; } - protected TypeRef(Object extInfo) { + protected TypeRef(TypeExtMeta typeExtMeta) { this.type = capture(); - this.extInfo = extInfo; + this.typeExtMeta = typeExtMeta; } private TypeRef(Class declaringClass) { this.type = declaringClass; - this.extInfo = null; + this.typeExtMeta = null; } - private TypeRef(Class declaringClass, Object extInfo) { + private TypeRef(Class declaringClass, TypeExtMeta typeExtMeta) { this.type = declaringClass; - this.extInfo = extInfo; + this.typeExtMeta = typeExtMeta; } private TypeRef(Type type) { this.type = type; - this.extInfo = null; + this.typeExtMeta = null; } /** Returns an instance of type token that wraps {@code type}. */ @@ -86,7 +87,7 @@ public static TypeRef of(Class clazz) { return new TypeRef<>(clazz); } - public static TypeRef of(Class clazz, Object extInfo) { + public static TypeRef of(Class clazz, TypeExtMeta extInfo) { return new TypeRef<>(clazz, extInfo); } @@ -158,8 +159,8 @@ private static Stream> getRawTypes(Type... types) { }); } - public Object getExtInfo() { - return extInfo; + public TypeExtMeta getTypeExtMeta() { + return typeExtMeta; } /** Returns true if this type is one of the primitive types (including {@code void}). */ diff --git a/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java b/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java index c026bdeaa7..4c5f37e951 100644 --- a/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java +++ b/java/fory-core/src/main/java/org/apache/fory/resolver/ClassResolver.java @@ -147,6 +147,7 @@ import org.apache.fory.type.DescriptorGrouper; import org.apache.fory.type.GenericType; import org.apache.fory.type.TypeUtils; +import org.apache.fory.type.Types; import org.apache.fory.type.union.Union; import org.apache.fory.util.GraalvmSupport; import org.apache.fory.util.Preconditions; @@ -192,52 +193,43 @@ public class ClassResolver extends TypeResolver { /** Flag value indicating no class ID has been assigned. */ public static final short NO_CLASS_ID = TypeResolver.NO_CLASS_ID; - public static final short LAMBDA_STUB_ID = 1; - public static final short JDK_PROXY_STUB_ID = 2; - public static final short REPLACE_STUB_ID = 3; - /** - * Base offset for user-registered class IDs. User IDs are internally stored as (userId + {@value - * #USER_ID_BASE}). The first {@value #USER_ID_BASE} IDs (0 to {@value #USER_ID_BASE} - 1) are - * reserved for Fory's internal types. + * Base offset for user-registered class IDs. User IDs are internally stored as `userId + + * USER_ID_BASE`. 0 to `USER_ID_BASE` are reserved for Fory's internal types. */ public static final short USER_ID_BASE = 256; + public static final int NATIVE_START_ID = Types.STRING + 1; + public static final int VOID_ID = NATIVE_START_ID; + public static final int CHAR_ID = NATIVE_START_ID + 1; // Note: following pre-defined class id should be continuous, since they may be used based range. - public static final short PRIMITIVE_VOID_CLASS_ID = (short) (REPLACE_STUB_ID + 1); - public static final short PRIMITIVE_BOOLEAN_CLASS_ID = (short) (PRIMITIVE_VOID_CLASS_ID + 1); - public static final short PRIMITIVE_BYTE_CLASS_ID = (short) (PRIMITIVE_VOID_CLASS_ID + 2); - public static final short PRIMITIVE_CHAR_CLASS_ID = (short) (PRIMITIVE_VOID_CLASS_ID + 3); - public static final short PRIMITIVE_SHORT_CLASS_ID = (short) (PRIMITIVE_VOID_CLASS_ID + 4); - public static final short PRIMITIVE_INT_CLASS_ID = (short) (PRIMITIVE_VOID_CLASS_ID + 5); - public static final short PRIMITIVE_FLOAT_CLASS_ID = (short) (PRIMITIVE_VOID_CLASS_ID + 6); - public static final short PRIMITIVE_LONG_CLASS_ID = (short) (PRIMITIVE_VOID_CLASS_ID + 7); - public static final short PRIMITIVE_DOUBLE_CLASS_ID = (short) (PRIMITIVE_VOID_CLASS_ID + 8); - public static final short VOID_CLASS_ID = (short) (PRIMITIVE_DOUBLE_CLASS_ID + 1); - public static final short BOOLEAN_CLASS_ID = (short) (VOID_CLASS_ID + 1); - public static final short BYTE_CLASS_ID = (short) (VOID_CLASS_ID + 2); - public static final short CHAR_CLASS_ID = (short) (VOID_CLASS_ID + 3); - public static final short SHORT_CLASS_ID = (short) (VOID_CLASS_ID + 4); - public static final short INTEGER_CLASS_ID = (short) (VOID_CLASS_ID + 5); - public static final short FLOAT_CLASS_ID = (short) (VOID_CLASS_ID + 6); - public static final short LONG_CLASS_ID = (short) (VOID_CLASS_ID + 7); - public static final short DOUBLE_CLASS_ID = (short) (VOID_CLASS_ID + 8); - public static final short STRING_CLASS_ID = (short) (VOID_CLASS_ID + 9); - public static final short PRIMITIVE_BOOLEAN_ARRAY_CLASS_ID = (short) (STRING_CLASS_ID + 1); - public static final short PRIMITIVE_BYTE_ARRAY_CLASS_ID = (short) (STRING_CLASS_ID + 2); - public static final short PRIMITIVE_CHAR_ARRAY_CLASS_ID = (short) (STRING_CLASS_ID + 3); - public static final short PRIMITIVE_SHORT_ARRAY_CLASS_ID = (short) (STRING_CLASS_ID + 4); - public static final short PRIMITIVE_INT_ARRAY_CLASS_ID = (short) (STRING_CLASS_ID + 5); - public static final short PRIMITIVE_FLOAT_ARRAY_CLASS_ID = (short) (STRING_CLASS_ID + 6); - public static final short PRIMITIVE_LONG_ARRAY_CLASS_ID = (short) (STRING_CLASS_ID + 7); - public static final short PRIMITIVE_DOUBLE_ARRAY_CLASS_ID = (short) (STRING_CLASS_ID + 8); - public static final short STRING_ARRAY_CLASS_ID = (short) (PRIMITIVE_DOUBLE_ARRAY_CLASS_ID + 1); - public static final short OBJECT_ARRAY_CLASS_ID = (short) (PRIMITIVE_DOUBLE_ARRAY_CLASS_ID + 2); - public static final short ARRAYLIST_CLASS_ID = (short) (PRIMITIVE_DOUBLE_ARRAY_CLASS_ID + 3); - public static final short HASHMAP_CLASS_ID = (short) (PRIMITIVE_DOUBLE_ARRAY_CLASS_ID + 4); - public static final short HASHSET_CLASS_ID = (short) (PRIMITIVE_DOUBLE_ARRAY_CLASS_ID + 5); - public static final short CLASS_CLASS_ID = (short) (PRIMITIVE_DOUBLE_ARRAY_CLASS_ID + 6); - public static final short EMPTY_OBJECT_ID = (short) (PRIMITIVE_DOUBLE_ARRAY_CLASS_ID + 7); + public static final int PRIMITIVE_VOID_ID = NATIVE_START_ID + 2; + public static final int PRIMITIVE_BOOL_ID = NATIVE_START_ID + 3; + public static final int PRIMITIVE_INT8_ID = NATIVE_START_ID + 4; + public static final int PRIMITIVE_CHAR_ID = NATIVE_START_ID + 5; + public static final int PRIMITIVE_INT16_ID = NATIVE_START_ID + 6; + public static final int PRIMITIVE_INT32_ID = NATIVE_START_ID + 7; + public static final int PRIMITIVE_FLOAT32_ID = NATIVE_START_ID + 8; + public static final int PRIMITIVE_INT64_ID = NATIVE_START_ID + 9; + public static final int PRIMITIVE_FLOAT64_ID = NATIVE_START_ID + 10; + public static final int PRIMITIVE_BOOLEAN_ARRAY_ID = NATIVE_START_ID + 11; + public static final int PRIMITIVE_BYTE_ARRAY_ID = NATIVE_START_ID + 12; + public static final int PRIMITIVE_CHAR_ARRAY_ID = NATIVE_START_ID + 13; + public static final int PRIMITIVE_SHORT_ARRAY_ID = NATIVE_START_ID + 14; + public static final int PRIMITIVE_INT_ARRAY_ID = NATIVE_START_ID + 15; + public static final int PRIMITIVE_FLOAT_ARRAY_ID = NATIVE_START_ID + 16; + public static final int PRIMITIVE_LONG_ARRAY_ID = NATIVE_START_ID + 17; + public static final int PRIMITIVE_DOUBLE_ARRAY_ID = NATIVE_START_ID + 18; + public static final int STRING_ARRAY_ID = NATIVE_START_ID + 19; + public static final int OBJECT_ARRAY_ID = NATIVE_START_ID + 20; + public static final int ARRAYLIST_ID = NATIVE_START_ID + 21; + public static final int HASHMAP_ID = NATIVE_START_ID + 22; + public static final int HASHSET_ID = NATIVE_START_ID + 23; + public static final int CLASS_ID = NATIVE_START_ID + 24; + public static final int EMPTY_OBJECT_ID = NATIVE_START_ID + 25; + public static final short LAMBDA_STUB_ID = NATIVE_START_ID + 26; + public static final short JDK_PROXY_STUB_ID = NATIVE_START_ID + 27; + public static final short REPLACE_STUB_ID = NATIVE_START_ID + 28; private final Fory fory; XtypeResolver xtypeResolver; @@ -256,6 +248,7 @@ public ClassResolver(Fory fory) { super(fory); this.fory = fory; classInfoCache = NIL_CLASS_INFO; + extRegistry.classIdGenerator = REPLACE_STUB_ID + 1; shimDispatcher = new ShimDispatcher(fory); _addGraalvmClassRegistry(fory.getConfig().getConfigHash(), this); } @@ -266,39 +259,39 @@ public void initialize() { registerInternal(LambdaSerializer.ReplaceStub.class, LAMBDA_STUB_ID); registerInternal(JdkProxySerializer.ReplaceStub.class, JDK_PROXY_STUB_ID); registerInternal(ReplaceResolveSerializer.ReplaceStub.class, REPLACE_STUB_ID); - registerInternal(void.class, PRIMITIVE_VOID_CLASS_ID); - registerInternal(boolean.class, PRIMITIVE_BOOLEAN_CLASS_ID); - registerInternal(byte.class, PRIMITIVE_BYTE_CLASS_ID); - registerInternal(char.class, PRIMITIVE_CHAR_CLASS_ID); - registerInternal(short.class, PRIMITIVE_SHORT_CLASS_ID); - registerInternal(int.class, PRIMITIVE_INT_CLASS_ID); - registerInternal(float.class, PRIMITIVE_FLOAT_CLASS_ID); - registerInternal(long.class, PRIMITIVE_LONG_CLASS_ID); - registerInternal(double.class, PRIMITIVE_DOUBLE_CLASS_ID); - registerInternal(Void.class, VOID_CLASS_ID); - registerInternal(Boolean.class, BOOLEAN_CLASS_ID); - registerInternal(Byte.class, BYTE_CLASS_ID); - registerInternal(Character.class, CHAR_CLASS_ID); - registerInternal(Short.class, SHORT_CLASS_ID); - registerInternal(Integer.class, INTEGER_CLASS_ID); - registerInternal(Float.class, FLOAT_CLASS_ID); - registerInternal(Long.class, LONG_CLASS_ID); - registerInternal(Double.class, DOUBLE_CLASS_ID); - registerInternal(String.class, STRING_CLASS_ID); - registerInternal(boolean[].class, PRIMITIVE_BOOLEAN_ARRAY_CLASS_ID); - registerInternal(byte[].class, PRIMITIVE_BYTE_ARRAY_CLASS_ID); - registerInternal(char[].class, PRIMITIVE_CHAR_ARRAY_CLASS_ID); - registerInternal(short[].class, PRIMITIVE_SHORT_ARRAY_CLASS_ID); - registerInternal(int[].class, PRIMITIVE_INT_ARRAY_CLASS_ID); - registerInternal(float[].class, PRIMITIVE_FLOAT_ARRAY_CLASS_ID); - registerInternal(long[].class, PRIMITIVE_LONG_ARRAY_CLASS_ID); - registerInternal(double[].class, PRIMITIVE_DOUBLE_ARRAY_CLASS_ID); - registerInternal(String[].class, STRING_ARRAY_CLASS_ID); - registerInternal(Object[].class, OBJECT_ARRAY_CLASS_ID); - registerInternal(ArrayList.class, ARRAYLIST_CLASS_ID); - registerInternal(HashMap.class, HASHMAP_CLASS_ID); - registerInternal(HashSet.class, HASHSET_CLASS_ID); - registerInternal(Class.class, CLASS_CLASS_ID); + registerInternal(void.class, PRIMITIVE_VOID_ID); + registerInternal(boolean.class, PRIMITIVE_BOOL_ID); + registerInternal(byte.class, PRIMITIVE_INT8_ID); + registerInternal(char.class, PRIMITIVE_CHAR_ID); + registerInternal(short.class, PRIMITIVE_INT16_ID); + registerInternal(int.class, PRIMITIVE_INT32_ID); + registerInternal(float.class, PRIMITIVE_FLOAT32_ID); + registerInternal(long.class, PRIMITIVE_INT64_ID); + registerInternal(double.class, PRIMITIVE_FLOAT64_ID); + registerInternal(Void.class, VOID_ID); + registerInternal(Boolean.class, Types.BOOL); + registerInternal(Byte.class, Types.INT8); + registerInternal(Character.class, CHAR_ID); + registerInternal(Short.class, Types.INT16); + registerInternal(Integer.class, Types.INT32); + registerInternal(Float.class, Types.FLOAT32); + registerInternal(Long.class, Types.INT64); + registerInternal(Double.class, Types.FLOAT64); + registerInternal(String.class, Types.STRING); + registerInternal(boolean[].class, PRIMITIVE_BOOLEAN_ARRAY_ID); + registerInternal(byte[].class, PRIMITIVE_BYTE_ARRAY_ID); + registerInternal(char[].class, PRIMITIVE_CHAR_ARRAY_ID); + registerInternal(short[].class, PRIMITIVE_SHORT_ARRAY_ID); + registerInternal(int[].class, PRIMITIVE_INT_ARRAY_ID); + registerInternal(float[].class, PRIMITIVE_FLOAT_ARRAY_ID); + registerInternal(long[].class, PRIMITIVE_LONG_ARRAY_ID); + registerInternal(double[].class, PRIMITIVE_DOUBLE_ARRAY_ID); + registerInternal(String[].class, STRING_ARRAY_ID); + registerInternal(Object[].class, OBJECT_ARRAY_ID); + registerInternal(ArrayList.class, ARRAYLIST_ID); + registerInternal(HashMap.class, HASHMAP_ID); + registerInternal(HashSet.class, HASHSET_ID); + registerInternal(Class.class, CLASS_ID); registerInternal(Object.class, EMPTY_OBJECT_ID); registerCommonUsedClasses(); registerDefaultClasses(); @@ -729,7 +722,7 @@ public boolean isMonomorphic(Class clz) { if (Union.class.isAssignableFrom(clz)) { return true; } - return (isInnerClass(clz) || clz.isEnum()); + return (isInternalRegistered(clz) || clz.isEnum()); } return ReflectionUtils.isMonomorphic(clz); } @@ -738,8 +731,12 @@ public boolean isBuildIn(Descriptor descriptor) { return isMonomorphic(descriptor); } + public boolean isInternalRegistered(int classId) { + return classId != NO_CLASS_ID && classId < innerEndClassId; + } + /** Returns true if cls is fory inner registered class. */ - boolean isInnerClass(Class cls) { + public boolean isInternalRegistered(Class cls) { Short classId = extRegistry.registeredClassIdMap.get(cls); if (classId == null) { ClassInfo classInfo = getClassInfo(cls, false); @@ -1476,9 +1473,9 @@ private boolean isSecure(Class cls) { public void writeClassAndUpdateCache(MemoryBuffer buffer, Class cls) { // fast path for common type if (cls == Integer.class) { - buffer.writeVarUint32Small7(INTEGER_CLASS_ID << 1); + buffer.writeVarUint32Small7(Types.INT32 << 1); } else if (cls == Long.class) { - buffer.writeVarUint32Small7(LONG_CLASS_ID << 1); + buffer.writeVarUint32Small7(Types.INT64 << 1); } else { writeClassInfo(buffer, getOrUpdateClassInfo(cls)); } @@ -1872,7 +1869,7 @@ public ClassInfoHolder nilClassInfoHolder() { } public boolean isPrimitive(short classId) { - return classId >= PRIMITIVE_VOID_CLASS_ID && classId <= PRIMITIVE_DOUBLE_CLASS_ID; + return classId >= PRIMITIVE_VOID_ID && classId <= PRIMITIVE_FLOAT64_ID; } public CodeGenerator getCodeGenerator(ClassLoader... loaders) { @@ -1920,8 +1917,9 @@ public Comparator createTypeAndNameComparator() { // Use normalized type name so that Collection/Map subtypes have consistent order // between processes even if the field doesn't exist in peer (e.g., List vs Collection). int c = getNormalizedTypeName(d1).compareTo(getNormalizedTypeName(d2)); + // noinspection Duplicates if (c == 0) { - c = DescriptorGrouper.getFieldSortKey(d1).compareTo(DescriptorGrouper.getFieldSortKey(d2)); + c = getFieldSortKey(d1).compareTo(getFieldSortKey(d2)); if (c == 0) { // Field name duplicate in super/child classes. c = d1.getDeclaringClass().compareTo(d2.getDeclaringClass()); @@ -1946,8 +1944,7 @@ public DescriptorGrouper createDescriptorGrouper( descriptors, descriptorsGroupedOrdered, descriptorUpdator, - fory.compressInt(), - fory.compressLong(), + getPrimitiveComparator(), createTypeAndNameComparator()) .sort(); } diff --git a/java/fory-core/src/main/java/org/apache/fory/resolver/TypeResolver.java b/java/fory-core/src/main/java/org/apache/fory/resolver/TypeResolver.java index 73bd5f7ec9..90600bec1a 100644 --- a/java/fory-core/src/main/java/org/apache/fory/resolver/TypeResolver.java +++ b/java/fory-core/src/main/java/org/apache/fory/resolver/TypeResolver.java @@ -20,6 +20,7 @@ package org.apache.fory.resolver; import static org.apache.fory.Fory.NOT_SUPPORT_XLANG; +import static org.apache.fory.type.TypeUtils.getSizeOfPrimitiveType; import com.google.common.collect.BiMap; import com.google.common.collect.HashBiMap; @@ -28,6 +29,7 @@ import java.lang.reflect.Type; import java.util.ArrayList; import java.util.Collection; +import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -77,13 +79,14 @@ import org.apache.fory.type.GenericType; import org.apache.fory.type.ScalaTypes; import org.apache.fory.type.TypeUtils; +import org.apache.fory.type.Types; import org.apache.fory.util.GraalvmSupport; import org.apache.fory.util.GraalvmSupport.GraalvmSerializerHolder; import org.apache.fory.util.Preconditions; import org.apache.fory.util.function.Functions; // Internal type dispatcher. -// Do not use this interface outside of fory package +// Do not use this interface outside fory package @Internal @SuppressWarnings({"rawtypes", "unchecked"}) public abstract class TypeResolver { @@ -183,9 +186,8 @@ public abstract void registerSerializer( * ignored too. */ public final boolean needToWriteRef(TypeRef typeRef) { - Object extInfo = typeRef.getExtInfo(); - if (extInfo instanceof TypeExtMeta) { - TypeExtMeta meta = (TypeExtMeta) extInfo; + TypeExtMeta meta = typeRef.getTypeExtMeta(); + if (meta != null) { return meta.trackingRef(); } Class cls = typeRef.getRawType(); @@ -623,6 +625,68 @@ public List getFieldDescriptors(Class clz, boolean searchParent) return result; } + /** + * Gets the sort key for a field descriptor. + * + *

    If the field has a {@link ForyField} annotation with id >= 0, returns the id as a string. + * Otherwise, returns the snake_case field name. This ensures fields are sorted by tag ID when + * configured, matching the fingerprint computation order. + * + * @param descriptor the field descriptor + * @return the sort key (tag ID as string or snake_case name) + */ + protected static String getFieldSortKey(Descriptor descriptor) { + ForyField foryField = descriptor.getForyField(); + if (foryField != null && foryField.id() >= 0) { + return String.valueOf(foryField.id()); + } + return descriptor.getSnakeCaseName(); + } + + /** + * When compress disabled, sort primitive descriptors from largest to smallest, if size is the + * same, sort by field name to fix order. + * + *

    When compress enabled, sort primitive descriptors from largest to smallest but let compress + * fields ends in tail. if size is the same, sort by field name to fix order. + */ + public Comparator getPrimitiveComparator() { + return (d1, d2) -> { + Class t1 = TypeUtils.unwrap(d1.getRawType()); + Class t2 = TypeUtils.unwrap(d2.getRawType()); + int typeId1 = Types.getDescriptorTypeId(fory, d1); + int typeId2 = Types.getDescriptorTypeId(fory, d2); + boolean t1Compress = Types.isCompressedType(typeId1); + boolean t2Compress = Types.isCompressedType(typeId2); + if ((t1Compress && t2Compress) || (!t1Compress && !t2Compress)) { + int c = getSizeOfPrimitiveType(t2) - getSizeOfPrimitiveType(t1); + if (c == 0) { + c = typeId2 - typeId1; + // noinspection Duplicates + if (c == 0) { + c = getFieldSortKey(d1).compareTo(getFieldSortKey(d2)); + if (c == 0) { + // Field name duplicate in super/child classes. + c = d1.getDeclaringClass().compareTo(d2.getDeclaringClass()); + if (c == 0) { + // Final tie-breaker: use actual field name to distinguish fields with same tag ID. + // This ensures Comparator contract is satisfied (returns 0 only for same object). + c = d1.getName().compareTo(d2.getName()); + } + } + } + return c; + } + return c; + } + if (t1Compress) { + return 1; + } + // t2 compress + return -1; + }; + } + /** * Get the nullable flag for a field, respecting xlang mode. * diff --git a/java/fory-core/src/main/java/org/apache/fory/resolver/XtypeResolver.java b/java/fory-core/src/main/java/org/apache/fory/resolver/XtypeResolver.java index a4fc4079e7..eeac4aec12 100644 --- a/java/fory-core/src/main/java/org/apache/fory/resolver/XtypeResolver.java +++ b/java/fory-core/src/main/java/org/apache/fory/resolver/XtypeResolver.java @@ -618,10 +618,19 @@ private Serializer getCollectionSerializer(Class cls) { private void registerDefaultTypes() { registerDefaultTypes(Types.BOOL, Boolean.class, boolean.class, AtomicBoolean.class); - registerDefaultTypes(Types.INT8, Byte.class, byte.class); - registerDefaultTypes(Types.INT16, Short.class, short.class); + registerDefaultTypes(Types.UINT8, Byte.class, byte.class); + registerDefaultTypes(Types.UINT16, Short.class, short.class); + registerDefaultTypes(Types.UINT32, Integer.class, int.class, AtomicInteger.class); + registerDefaultTypes(Types.UINT64, Long.class, long.class, AtomicLong.class); + registerDefaultTypes(Types.TAGGED_UINT64, Long.class, long.class, AtomicLong.class); registerDefaultTypes(Types.INT32, Integer.class, int.class, AtomicInteger.class); registerDefaultTypes(Types.INT64, Long.class, long.class, AtomicLong.class); + registerDefaultTypes(Types.TAGGED_INT64, Long.class, long.class, AtomicLong.class); + + registerDefaultTypes(Types.INT8, Byte.class, byte.class); + registerDefaultTypes(Types.INT16, Short.class, short.class); + registerDefaultTypes(Types.VARINT32, Integer.class, int.class, AtomicInteger.class); + registerDefaultTypes(Types.VARINT64, Long.class, long.class, AtomicLong.class); registerDefaultTypes(Types.FLOAT32, Float.class, float.class); registerDefaultTypes(Types.FLOAT64, Double.class, double.class); registerDefaultTypes(Types.STRING, String.class, StringBuilder.class, StringBuffer.class); @@ -953,19 +962,17 @@ public DescriptorGrouper createDescriptorGrouper( descriptors, descriptorsGroupedOrdered, descriptorUpdator, - fory.compressInt(), - fory.compressLong(), + getPrimitiveComparator(), (o1, o2) -> { int xtypeId = getXtypeId(o1.getRawType()); int xtypeId2 = getXtypeId(o2.getRawType()); if (xtypeId == xtypeId2) { - return DescriptorGrouper.getFieldSortKey(o1) - .compareTo(DescriptorGrouper.getFieldSortKey(o2)); + return getFieldSortKey(o1).compareTo(getFieldSortKey(o2)); } else { return xtypeId - xtypeId2; } }) - .setOtherDescriptorComparator(Comparator.comparing(DescriptorGrouper::getFieldSortKey)) + .setOtherDescriptorComparator(Comparator.comparing(TypeResolver::getFieldSortKey)) .sort(); } diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/AbstractObjectSerializer.java b/java/fory-core/src/main/java/org/apache/fory/serializer/AbstractObjectSerializer.java index 6c0a0ee193..6f7dd454e6 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/AbstractObjectSerializer.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/AbstractObjectSerializer.java @@ -37,8 +37,10 @@ import org.apache.fory.resolver.ClassResolver; import org.apache.fory.resolver.RefResolver; import org.apache.fory.resolver.TypeResolver; +import org.apache.fory.serializer.FieldGroups.SerializationFieldInfo; import org.apache.fory.type.Descriptor; import org.apache.fory.type.DescriptorGrouper; +import org.apache.fory.type.DispatchId; import org.apache.fory.type.Generics; import org.apache.fory.util.record.RecordComponent; import org.apache.fory.util.record.RecordInfo; @@ -50,7 +52,7 @@ public abstract class AbstractObjectSerializer extends Serializer { protected final TypeResolver typeResolver; protected final boolean isRecord; protected final ObjectCreator objectCreator; - private FieldGroups.SerializationFieldInfo[] fieldInfos; + private SerializationFieldInfo[] fieldInfos; private RecordInfo copyRecordInfo; public AbstractObjectSerializer(Fory fory, Class type) { @@ -69,7 +71,7 @@ public AbstractObjectSerializer(Fory fory, Class type, ObjectCreator objec static void writeOtherFieldValue( SerializationBinding binding, MemoryBuffer buffer, - FieldGroups.SerializationFieldInfo fieldInfo, + SerializationFieldInfo fieldInfo, Object fieldValue) { if (fieldInfo.useDeclaredTypeInfo) { switch (fieldInfo.refMode) { @@ -117,7 +119,7 @@ static void writeContainerFieldValue( RefResolver refResolver, TypeResolver typeResolver, Generics generics, - FieldGroups.SerializationFieldInfo fieldInfo, + SerializationFieldInfo fieldInfo, MemoryBuffer buffer, Object fieldValue) { switch (fieldInfo.refMode) { @@ -159,48 +161,59 @@ static void writeContainerFieldValue( /** * Write a primitive field value to buffer using direct memory offset access. * - * @param fory the fory instance for compression settings * @param buffer the buffer to write to * @param targetObject the object containing the field * @param fieldOffset the memory offset of the field - * @param classId the class ID of the primitive type - * @return true if classId is not a primitive type and needs further write handling + * @param dispatchId the class ID of the primitive type + * @return true if dispatchId is not a primitive type and needs further write handling */ static boolean writePrimitiveFieldValue( - Fory fory, MemoryBuffer buffer, Object targetObject, long fieldOffset, short classId) { - switch (classId) { - case ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID: + MemoryBuffer buffer, Object targetObject, long fieldOffset, int dispatchId) { + switch (dispatchId) { + case DispatchId.PRIMITIVE_BOOL: buffer.writeBoolean(Platform.getBoolean(targetObject, fieldOffset)); return false; - case ClassResolver.PRIMITIVE_BYTE_CLASS_ID: + case DispatchId.PRIMITIVE_INT8: + case DispatchId.PRIMITIVE_UINT8: buffer.writeByte(Platform.getByte(targetObject, fieldOffset)); return false; - case ClassResolver.PRIMITIVE_CHAR_CLASS_ID: + case DispatchId.PRIMITIVE_CHAR: buffer.writeChar(Platform.getChar(targetObject, fieldOffset)); return false; - case ClassResolver.PRIMITIVE_SHORT_CLASS_ID: + case DispatchId.PRIMITIVE_INT16: + case DispatchId.PRIMITIVE_UINT16: buffer.writeInt16(Platform.getShort(targetObject, fieldOffset)); return false; - case ClassResolver.PRIMITIVE_INT_CLASS_ID: - { - int fieldValue = Platform.getInt(targetObject, fieldOffset); - if (fory.compressInt()) { - buffer.writeVarInt32(fieldValue); - } else { - buffer.writeInt32(fieldValue); - } - return false; - } - case ClassResolver.PRIMITIVE_FLOAT_CLASS_ID: + case DispatchId.PRIMITIVE_INT32: + case DispatchId.PRIMITIVE_UINT32: + buffer.writeInt32(Platform.getInt(targetObject, fieldOffset)); + return false; + case DispatchId.PRIMITIVE_VARINT32: + buffer.writeVarInt32(Platform.getInt(targetObject, fieldOffset)); + return false; + case DispatchId.PRIMITIVE_VAR_UINT32: + buffer.writeVarUint32(Platform.getInt(targetObject, fieldOffset)); + return false; + case DispatchId.PRIMITIVE_FLOAT32: buffer.writeFloat32(Platform.getFloat(targetObject, fieldOffset)); return false; - case ClassResolver.PRIMITIVE_LONG_CLASS_ID: - { - long fieldValue = Platform.getLong(targetObject, fieldOffset); - fory.writeInt64(buffer, fieldValue); - return false; - } - case ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID: + case DispatchId.PRIMITIVE_INT64: + case DispatchId.PRIMITIVE_UINT64: + buffer.writeInt64(Platform.getLong(targetObject, fieldOffset)); + return false; + case DispatchId.PRIMITIVE_VARINT64: + buffer.writeVarInt64(Platform.getLong(targetObject, fieldOffset)); + return false; + case DispatchId.PRIMITIVE_TAGGED_INT64: + buffer.writeTaggedInt64(Platform.getLong(targetObject, fieldOffset)); + return false; + case DispatchId.PRIMITIVE_VAR_UINT64: + buffer.writeVarUint64(Platform.getLong(targetObject, fieldOffset)); + return false; + case DispatchId.PRIMITIVE_TAGGED_UINT64: + buffer.writeTaggedUint64(Platform.getLong(targetObject, fieldOffset)); + return false; + case DispatchId.PRIMITIVE_FLOAT64: buffer.writeFloat64(Platform.getDouble(targetObject, fieldOffset)); return false; default: @@ -211,56 +224,63 @@ static boolean writePrimitiveFieldValue( /** * Write a primitive field value to buffer using the field accessor. * - * @param fory the fory instance for compression settings * @param buffer the buffer to write to * @param targetObject the object containing the field * @param fieldAccessor the accessor to get the field value - * @param classId the class ID of the primitive type - * @return true if classId is not a primitive type and needs further write handling + * @param dispatchId the class ID of the primitive type + * @return true if dispatchId is not a primitive type and needs further write handling */ static boolean writePrimitiveFieldValue( - Fory fory, - MemoryBuffer buffer, - Object targetObject, - FieldAccessor fieldAccessor, - short classId) { + MemoryBuffer buffer, Object targetObject, FieldAccessor fieldAccessor, int dispatchId) { long fieldOffset = fieldAccessor.getFieldOffset(); if (fieldOffset != -1) { - return writePrimitiveFieldValue(fory, buffer, targetObject, fieldOffset, classId); + return writePrimitiveFieldValue(buffer, targetObject, fieldOffset, dispatchId); } - switch (classId) { - case ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID: + switch (dispatchId) { + case DispatchId.PRIMITIVE_BOOL: buffer.writeBoolean((Boolean) fieldAccessor.get(targetObject)); return false; - case ClassResolver.PRIMITIVE_BYTE_CLASS_ID: + case DispatchId.PRIMITIVE_INT8: + case DispatchId.PRIMITIVE_UINT8: buffer.writeByte((Byte) fieldAccessor.get(targetObject)); return false; - case ClassResolver.PRIMITIVE_CHAR_CLASS_ID: + case DispatchId.PRIMITIVE_CHAR: buffer.writeChar((Character) fieldAccessor.get(targetObject)); return false; - case ClassResolver.PRIMITIVE_SHORT_CLASS_ID: + case DispatchId.PRIMITIVE_INT16: + case DispatchId.PRIMITIVE_UINT16: buffer.writeInt16((Short) fieldAccessor.get(targetObject)); return false; - case ClassResolver.PRIMITIVE_INT_CLASS_ID: - { - int fieldValue = (Integer) fieldAccessor.get(targetObject); - if (fory.compressInt()) { - buffer.writeVarInt32(fieldValue); - } else { - buffer.writeInt32(fieldValue); - } - return false; - } - case ClassResolver.PRIMITIVE_FLOAT_CLASS_ID: + case DispatchId.PRIMITIVE_INT32: + case DispatchId.PRIMITIVE_UINT32: + buffer.writeInt32((Integer) fieldAccessor.get(targetObject)); + return false; + case DispatchId.PRIMITIVE_VARINT32: + buffer.writeVarInt32((Integer) fieldAccessor.get(targetObject)); + return false; + case DispatchId.PRIMITIVE_VAR_UINT32: + buffer.writeVarUint32((Integer) fieldAccessor.get(targetObject)); + return false; + case DispatchId.PRIMITIVE_FLOAT32: buffer.writeFloat32((Float) fieldAccessor.get(targetObject)); return false; - case ClassResolver.PRIMITIVE_LONG_CLASS_ID: - { - long fieldValue = (long) fieldAccessor.get(targetObject); - fory.writeInt64(buffer, fieldValue); - return false; - } - case ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID: + case DispatchId.PRIMITIVE_INT64: + case DispatchId.PRIMITIVE_UINT64: + buffer.writeInt64((Long) fieldAccessor.get(targetObject)); + return false; + case DispatchId.PRIMITIVE_VARINT64: + buffer.writeVarInt64((Long) fieldAccessor.get(targetObject)); + return false; + case DispatchId.PRIMITIVE_TAGGED_INT64: + buffer.writeTaggedInt64((Long) fieldAccessor.get(targetObject)); + return false; + case DispatchId.PRIMITIVE_VAR_UINT64: + buffer.writeVarUint64((Long) fieldAccessor.get(targetObject)); + return false; + case DispatchId.PRIMITIVE_TAGGED_UINT64: + buffer.writeTaggedUint64((Long) fieldAccessor.get(targetObject)); + return false; + case DispatchId.PRIMITIVE_FLOAT64: buffer.writeFloat64((Double) fieldAccessor.get(targetObject)); return false; default: @@ -274,7 +294,7 @@ static boolean writePrimitiveFieldValue( * @return true if field value isn't written by this function. */ static boolean writeBasicObjectFieldValue( - Fory fory, MemoryBuffer buffer, Object fieldValue, short classId) { + Fory fory, MemoryBuffer buffer, Object fieldValue, int dispatchId) { if (fieldValue == null) { throw new IllegalArgumentException( "Non-nullable field has null value. In xlang mode, fields are non-nullable by default. " @@ -284,8 +304,8 @@ static boolean writeBasicObjectFieldValue( return true; // let common path handle this. } // add time types serialization here. - switch (classId) { - case ClassResolver.STRING_CLASS_ID: // fastpath for string. + switch (dispatchId) { + case DispatchId.STRING: // fastpath for string. String stringValue = (String) (fieldValue); if (fory.getStringSerializer().needToWriteRef()) { fory.writeJavaStringRef(buffer, stringValue); @@ -293,50 +313,52 @@ static boolean writeBasicObjectFieldValue( fory.writeString(buffer, stringValue); } return false; - case ClassResolver.BOOLEAN_CLASS_ID: - { - buffer.writeBoolean((Boolean) fieldValue); - return false; - } - case ClassResolver.BYTE_CLASS_ID: - { - buffer.writeByte((Byte) fieldValue); - return false; - } - case ClassResolver.CHAR_CLASS_ID: - { - buffer.writeChar((Character) fieldValue); - return false; - } - case ClassResolver.SHORT_CLASS_ID: - { - buffer.writeInt16((Short) fieldValue); - return false; - } - case ClassResolver.INTEGER_CLASS_ID: - { - if (fory.compressInt()) { - buffer.writeVarInt32((Integer) fieldValue); - } else { - buffer.writeInt32((Integer) fieldValue); - } - return false; - } - case ClassResolver.FLOAT_CLASS_ID: - { - buffer.writeFloat32((Float) fieldValue); - return false; - } - case ClassResolver.LONG_CLASS_ID: - { - fory.writeInt64(buffer, (Long) fieldValue); - return false; - } - case ClassResolver.DOUBLE_CLASS_ID: - { - buffer.writeFloat64((Double) fieldValue); - return false; - } + case DispatchId.BOOL: + buffer.writeBoolean((Boolean) fieldValue); + return false; + case DispatchId.INT8: + case DispatchId.UINT8: + buffer.writeByte((Byte) fieldValue); + return false; + case DispatchId.CHAR: + buffer.writeChar((Character) fieldValue); + return false; + case DispatchId.INT16: + case DispatchId.UINT16: + buffer.writeInt16((Short) fieldValue); + return false; + case DispatchId.INT32: + case DispatchId.UINT32: + buffer.writeInt32((Integer) fieldValue); + return false; + case DispatchId.VARINT32: + buffer.writeVarInt32((Integer) fieldValue); + return false; + case DispatchId.VAR_UINT32: + buffer.writeVarUint32((Integer) fieldValue); + return false; + case DispatchId.INT64: + case DispatchId.UINT64: + buffer.writeInt64((Long) fieldValue); + return false; + case DispatchId.VARINT64: + buffer.writeVarInt64((Long) fieldValue); + return false; + case DispatchId.TAGGED_INT64: + buffer.writeTaggedInt64((Long) fieldValue); + return false; + case DispatchId.VAR_UINT64: + buffer.writeVarUint64((Long) fieldValue); + return false; + case DispatchId.TAGGED_UINT64: + buffer.writeTaggedUint64((Long) fieldValue); + return false; + case DispatchId.FLOAT32: + buffer.writeFloat32((Float) fieldValue); + return false; + case DispatchId.FLOAT64: + buffer.writeFloat64((Double) fieldValue); + return false; default: return true; } @@ -349,104 +371,136 @@ static boolean writeBasicObjectFieldValue( * @param fory the fory instance for compression and ref tracking settings * @param buffer the buffer to write to * @param fieldValue the field value to write (may be null) - * @param classId the class ID of the boxed type - * @return true if classId is not a basic type or ref tracking is enabled, needing further write - * handling + * @param dispatchId the class ID of the boxed type + * @return true if dispatchId is not a basic type or ref tracking is enabled, needing further + * write handling */ static boolean writeBasicNullableObjectFieldValue( - Fory fory, MemoryBuffer buffer, Object fieldValue, short classId) { + Fory fory, MemoryBuffer buffer, Object fieldValue, int dispatchId) { if (!fory.isBasicTypesRefIgnored()) { return true; // let common path handle this. } // add time types serialization here. - switch (classId) { - case ClassResolver.STRING_CLASS_ID: // fastpath for string. + switch (dispatchId) { + case DispatchId.STRING: // fastpath for string. fory.writeJavaStringRef(buffer, (String) (fieldValue)); return false; - case ClassResolver.BOOLEAN_CLASS_ID: - { - if (fieldValue == null) { - buffer.writeByte(Fory.NULL_FLAG); - } else { - buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); - buffer.writeBoolean((Boolean) (fieldValue)); - } - return false; + case DispatchId.BOOL: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeBoolean((Boolean) (fieldValue)); } - case ClassResolver.BYTE_CLASS_ID: - { - if (fieldValue == null) { - buffer.writeByte(Fory.NULL_FLAG); - } else { - buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); - buffer.writeByte((Byte) (fieldValue)); - } - return false; + return false; + case DispatchId.INT8: + case DispatchId.UINT8: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeByte((Byte) (fieldValue)); } - case ClassResolver.CHAR_CLASS_ID: - { - if (fieldValue == null) { - buffer.writeByte(Fory.NULL_FLAG); - } else { - buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); - buffer.writeChar((Character) (fieldValue)); - } - return false; + return false; + case DispatchId.CHAR: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeChar((Character) (fieldValue)); } - case ClassResolver.SHORT_CLASS_ID: - { - if (fieldValue == null) { - buffer.writeByte(Fory.NULL_FLAG); - } else { - buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); - buffer.writeInt16((Short) (fieldValue)); - } - return false; + return false; + case DispatchId.INT16: + case DispatchId.UINT16: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeInt16((Short) (fieldValue)); } - case ClassResolver.INTEGER_CLASS_ID: - { - if (fieldValue == null) { - buffer.writeByte(Fory.NULL_FLAG); - } else { - buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); - if (fory.compressInt()) { - buffer.writeVarInt32((Integer) (fieldValue)); - } else { - buffer.writeInt32((Integer) (fieldValue)); - } - } - return false; + return false; + case DispatchId.INT32: + case DispatchId.UINT32: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeInt32((Integer) (fieldValue)); } - case ClassResolver.FLOAT_CLASS_ID: - { - if (fieldValue == null) { - buffer.writeByte(Fory.NULL_FLAG); - } else { - buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); - buffer.writeFloat32((Float) (fieldValue)); - } - return false; + return false; + case DispatchId.VARINT32: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeVarInt32((Integer) (fieldValue)); } - case ClassResolver.LONG_CLASS_ID: - { - if (fieldValue == null) { - buffer.writeByte(Fory.NULL_FLAG); - } else { - buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); - fory.writeInt64(buffer, (Long) fieldValue); - } - return false; + return false; + case DispatchId.VAR_UINT32: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeVarUint32((Integer) (fieldValue)); } - case ClassResolver.DOUBLE_CLASS_ID: - { - if (fieldValue == null) { - buffer.writeByte(Fory.NULL_FLAG); - } else { - buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); - buffer.writeFloat64((Double) (fieldValue)); - } - return false; + return false; + case DispatchId.FLOAT32: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeFloat32((Float) (fieldValue)); + } + return false; + case DispatchId.INT64: + case DispatchId.UINT64: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeInt64((Long) fieldValue); + } + return false; + case DispatchId.VARINT64: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeVarInt64((Long) fieldValue); + } + return false; + case DispatchId.TAGGED_INT64: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeTaggedInt64((Long) fieldValue); + } + return false; + case DispatchId.VAR_UINT64: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeVarUint64((Long) fieldValue); + } + return false; + case DispatchId.TAGGED_UINT64: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeTaggedUint64((Long) fieldValue); + } + return false; + case DispatchId.FLOAT64: + if (fieldValue == null) { + buffer.writeByte(Fory.NULL_FLAG); + } else { + buffer.writeByte(Fory.NOT_NULL_VALUE_FLAG); + buffer.writeFloat64((Double) (fieldValue)); } + return false; default: return true; } @@ -460,7 +514,7 @@ static Object readFinalObjectFieldValue( SerializationBinding binding, RefResolver refResolver, TypeResolver typeResolver, - FieldGroups.SerializationFieldInfo fieldInfo, + SerializationFieldInfo fieldInfo, MemoryBuffer buffer) { Serializer serializer = fieldInfo.classInfo.getSerializer(); binding.incReadDepth(); @@ -528,9 +582,7 @@ static Object readFinalObjectFieldValue( * @return the deserialized field value, or null if the field is nullable and was null */ static Object readOtherFieldValue( - SerializationBinding binding, - FieldGroups.SerializationFieldInfo fieldInfo, - MemoryBuffer buffer) { + SerializationBinding binding, SerializationFieldInfo fieldInfo, MemoryBuffer buffer) { // Note: Enum has special handling for xlang compatibility - no type info for enum fields if (fieldInfo.genericType.getCls().isEnum()) { // Only read null flag when the field is nullable (for xlang compatibility) @@ -577,7 +629,7 @@ static Object readOtherFieldValue( static Object readContainerFieldValue( SerializationBinding binding, Generics generics, - FieldGroups.SerializationFieldInfo fieldInfo, + SerializationFieldInfo fieldInfo, MemoryBuffer buffer) { Object fieldValue; switch (fieldInfo.refMode) { @@ -617,42 +669,56 @@ static Object readContainerFieldValue( * @return true if classId is not a primitive type id. */ static boolean readPrimitiveFieldValue( - Fory fory, - MemoryBuffer buffer, - Object targetObject, - FieldAccessor fieldAccessor, - short classId) { + MemoryBuffer buffer, Object targetObject, FieldAccessor fieldAccessor, int dispatchId) { long fieldOffset = fieldAccessor.getFieldOffset(); if (fieldOffset != -1) { - return readPrimitiveFieldValue(fory, buffer, targetObject, fieldOffset, classId); + return readPrimitiveFieldValue(buffer, targetObject, fieldOffset, dispatchId); } - switch (classId) { - case ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID: + switch (dispatchId) { + case DispatchId.PRIMITIVE_BOOL: fieldAccessor.set(targetObject, buffer.readBoolean()); return false; - case ClassResolver.PRIMITIVE_BYTE_CLASS_ID: + case DispatchId.PRIMITIVE_INT8: + case DispatchId.PRIMITIVE_UINT8: fieldAccessor.set(targetObject, buffer.readByte()); return false; - case ClassResolver.PRIMITIVE_CHAR_CLASS_ID: + case DispatchId.PRIMITIVE_CHAR: fieldAccessor.set(targetObject, buffer.readChar()); return false; - case ClassResolver.PRIMITIVE_SHORT_CLASS_ID: + case DispatchId.PRIMITIVE_INT16: + case DispatchId.PRIMITIVE_UINT16: fieldAccessor.set(targetObject, buffer.readInt16()); return false; - case ClassResolver.PRIMITIVE_INT_CLASS_ID: - if (fory.compressInt()) { - fieldAccessor.set(targetObject, buffer.readVarInt32()); - } else { - fieldAccessor.set(targetObject, buffer.readInt32()); - } + case DispatchId.PRIMITIVE_INT32: + case DispatchId.PRIMITIVE_UINT32: + fieldAccessor.set(targetObject, buffer.readInt32()); + return false; + case DispatchId.PRIMITIVE_VARINT32: + fieldAccessor.set(targetObject, buffer.readVarInt32()); + return false; + case DispatchId.PRIMITIVE_VAR_UINT32: + fieldAccessor.set(targetObject, buffer.readVarUint32()); return false; - case ClassResolver.PRIMITIVE_FLOAT_CLASS_ID: + case DispatchId.PRIMITIVE_FLOAT32: fieldAccessor.set(targetObject, buffer.readFloat32()); return false; - case ClassResolver.PRIMITIVE_LONG_CLASS_ID: - fieldAccessor.set(targetObject, fory.readInt64(buffer)); + case DispatchId.PRIMITIVE_INT64: + case DispatchId.PRIMITIVE_UINT64: + fieldAccessor.set(targetObject, buffer.readInt64()); + return false; + case DispatchId.PRIMITIVE_VARINT64: + fieldAccessor.set(targetObject, buffer.readVarInt64()); + return false; + case DispatchId.PRIMITIVE_TAGGED_INT64: + fieldAccessor.set(targetObject, buffer.readTaggedInt64()); + return false; + case DispatchId.PRIMITIVE_VAR_UINT64: + fieldAccessor.set(targetObject, buffer.readVarUint64()); return false; - case ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID: + case DispatchId.PRIMITIVE_TAGGED_UINT64: + fieldAccessor.set(targetObject, buffer.readTaggedUint64()); + return false; + case DispatchId.PRIMITIVE_FLOAT64: fieldAccessor.set(targetObject, buffer.readFloat64()); return false; default: @@ -663,42 +729,59 @@ static boolean readPrimitiveFieldValue( /** * Read a primitive field value from buffer and set it using direct memory offset access. * - * @param fory the fory instance for compression settings * @param buffer the buffer to read from * @param targetObject the object to set the field value on * @param fieldOffset the memory offset of the field - * @param classId the class ID of the primitive type + * @param dispatchId the dispatch ID of the primitive type * @return true if classId is not a primitive type and needs further read handling */ private static boolean readPrimitiveFieldValue( - Fory fory, MemoryBuffer buffer, Object targetObject, long fieldOffset, short classId) { - switch (classId) { - case ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID: + MemoryBuffer buffer, Object targetObject, long fieldOffset, int dispatchId) { + switch (dispatchId) { + case DispatchId.PRIMITIVE_BOOL: Platform.putBoolean(targetObject, fieldOffset, buffer.readBoolean()); return false; - case ClassResolver.PRIMITIVE_BYTE_CLASS_ID: + case DispatchId.PRIMITIVE_INT8: + case DispatchId.PRIMITIVE_UINT8: Platform.putByte(targetObject, fieldOffset, buffer.readByte()); return false; - case ClassResolver.PRIMITIVE_CHAR_CLASS_ID: + case DispatchId.PRIMITIVE_CHAR: Platform.putChar(targetObject, fieldOffset, buffer.readChar()); return false; - case ClassResolver.PRIMITIVE_SHORT_CLASS_ID: + case DispatchId.PRIMITIVE_INT16: + case DispatchId.PRIMITIVE_UINT16: Platform.putShort(targetObject, fieldOffset, buffer.readInt16()); return false; - case ClassResolver.PRIMITIVE_INT_CLASS_ID: - if (fory.compressInt()) { - Platform.putInt(targetObject, fieldOffset, buffer.readVarInt32()); - } else { - Platform.putInt(targetObject, fieldOffset, buffer.readInt32()); - } + case DispatchId.PRIMITIVE_INT32: + case DispatchId.PRIMITIVE_UINT32: + Platform.putInt(targetObject, fieldOffset, buffer.readInt32()); + return false; + case DispatchId.PRIMITIVE_VARINT32: + Platform.putInt(targetObject, fieldOffset, buffer.readVarInt32()); + return false; + case DispatchId.PRIMITIVE_VAR_UINT32: + Platform.putInt(targetObject, fieldOffset, buffer.readVarUint32()); return false; - case ClassResolver.PRIMITIVE_FLOAT_CLASS_ID: + case DispatchId.PRIMITIVE_FLOAT32: Platform.putFloat(targetObject, fieldOffset, buffer.readFloat32()); return false; - case ClassResolver.PRIMITIVE_LONG_CLASS_ID: - Platform.putLong(targetObject, fieldOffset, fory.readInt64(buffer)); + case DispatchId.PRIMITIVE_INT64: + case DispatchId.PRIMITIVE_UINT64: + Platform.putLong(targetObject, fieldOffset, buffer.readInt64()); return false; - case ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID: + case DispatchId.PRIMITIVE_VARINT64: + Platform.putLong(targetObject, fieldOffset, buffer.readVarInt64()); + return false; + case DispatchId.PRIMITIVE_TAGGED_INT64: + Platform.putLong(targetObject, fieldOffset, buffer.readTaggedInt64()); + return false; + case DispatchId.PRIMITIVE_VAR_UINT64: + Platform.putLong(targetObject, fieldOffset, buffer.readVarUint64()); + return false; + case DispatchId.PRIMITIVE_TAGGED_UINT64: + Platform.putLong(targetObject, fieldOffset, buffer.readTaggedUint64()); + return false; + case DispatchId.PRIMITIVE_FLOAT64: Platform.putDouble(targetObject, fieldOffset, buffer.readFloat64()); return false; default: @@ -710,24 +793,19 @@ private static boolean readPrimitiveFieldValue( * Read a nullable primitive field value from buffer. Reads the null flag first and returns early * if null. * - * @param fory the fory instance for compression settings * @param buffer the buffer to read from * @param targetObject the object to set the field value on * @param fieldAccessor the accessor to set the field value - * @param classId the class ID of the primitive type - * @return true if classId is not a primitive type and needs further read handling; false if value - * was null or successfully read + * @param dispatchId the class ID of the primitive type + * @return true if dispatchId is not a primitive type and needs further read handling; false if + * value was null or successfully read */ static boolean readPrimitiveNullableFieldValue( - Fory fory, - MemoryBuffer buffer, - Object targetObject, - FieldAccessor fieldAccessor, - short classId) { + MemoryBuffer buffer, Object targetObject, FieldAccessor fieldAccessor, int dispatchId) { if (buffer.readByte() == Fory.NULL_FLAG) { return false; } - return readPrimitiveFieldValue(fory, buffer, targetObject, fieldAccessor, classId); + return readPrimitiveFieldValue(buffer, targetObject, fieldAccessor, dispatchId); } /** @@ -740,63 +818,85 @@ static boolean readBasicObjectFieldValue( MemoryBuffer buffer, Object targetObject, FieldAccessor fieldAccessor, - short classId) { + int dispatchId) { if (!fory.isBasicTypesRefIgnored()) { return true; // let common path handle this. } // add time types serialization here. - switch (classId) { - case ClassResolver.STRING_CLASS_ID: // fastpath for string. + // Handle both primitive and nullable dispatchIds for schema compatible mode + // where Java field is boxed but ClassDef says non-nullable (primitive encoding) + switch (dispatchId) { + case DispatchId.STRING: // fastpath for string. if (fory.getStringSerializer().needToWriteRef()) { fieldAccessor.putObject(targetObject, fory.readJavaStringRef(buffer)); } else { fieldAccessor.putObject(targetObject, fory.readString(buffer)); } return false; - case ClassResolver.BOOLEAN_CLASS_ID: - { - fieldAccessor.putObject(targetObject, buffer.readBoolean()); - return false; - } - case ClassResolver.BYTE_CLASS_ID: - { - fieldAccessor.putObject(targetObject, buffer.readByte()); - return false; - } - case ClassResolver.CHAR_CLASS_ID: - { - fieldAccessor.putObject(targetObject, buffer.readChar()); - return false; - } - case ClassResolver.SHORT_CLASS_ID: - { - fieldAccessor.putObject(targetObject, buffer.readInt16()); - return false; - } - case ClassResolver.INTEGER_CLASS_ID: - { - if (fory.compressInt()) { - fieldAccessor.putObject(targetObject, buffer.readVarInt32()); - } else { - fieldAccessor.putObject(targetObject, buffer.readInt32()); - } - return false; - } - case ClassResolver.FLOAT_CLASS_ID: - { - fieldAccessor.putObject(targetObject, buffer.readFloat32()); - return false; - } - case ClassResolver.LONG_CLASS_ID: - { - fieldAccessor.putObject(targetObject, fory.readInt64(buffer)); - return false; - } - case ClassResolver.DOUBLE_CLASS_ID: - { - fieldAccessor.putObject(targetObject, buffer.readFloat64()); - return false; - } + case DispatchId.PRIMITIVE_BOOL: + case DispatchId.BOOL: + fieldAccessor.putObject(targetObject, buffer.readBoolean()); + return false; + case DispatchId.PRIMITIVE_INT8: + case DispatchId.PRIMITIVE_UINT8: + case DispatchId.INT8: + case DispatchId.UINT8: + fieldAccessor.putObject(targetObject, buffer.readByte()); + return false; + case DispatchId.PRIMITIVE_CHAR: + case DispatchId.CHAR: + fieldAccessor.putObject(targetObject, buffer.readChar()); + return false; + case DispatchId.PRIMITIVE_INT16: + case DispatchId.PRIMITIVE_UINT16: + case DispatchId.INT16: + case DispatchId.UINT16: + fieldAccessor.putObject(targetObject, buffer.readInt16()); + return false; + case DispatchId.PRIMITIVE_INT32: + case DispatchId.PRIMITIVE_UINT32: + case DispatchId.INT32: + case DispatchId.UINT32: + fieldAccessor.putObject(targetObject, buffer.readInt32()); + return false; + case DispatchId.PRIMITIVE_VARINT32: + case DispatchId.VARINT32: + fieldAccessor.putObject(targetObject, buffer.readVarInt32()); + return false; + case DispatchId.PRIMITIVE_VAR_UINT32: + case DispatchId.VAR_UINT32: + fieldAccessor.putObject(targetObject, buffer.readVarUint32()); + return false; + case DispatchId.PRIMITIVE_INT64: + case DispatchId.PRIMITIVE_UINT64: + case DispatchId.INT64: + case DispatchId.UINT64: + fieldAccessor.putObject(targetObject, buffer.readInt64()); + return false; + case DispatchId.PRIMITIVE_VARINT64: + case DispatchId.VARINT64: + fieldAccessor.putObject(targetObject, buffer.readVarInt64()); + return false; + case DispatchId.PRIMITIVE_TAGGED_INT64: + case DispatchId.TAGGED_INT64: + fieldAccessor.putObject(targetObject, buffer.readTaggedInt64()); + return false; + case DispatchId.PRIMITIVE_VAR_UINT64: + case DispatchId.VAR_UINT64: + fieldAccessor.putObject(targetObject, buffer.readVarUint64()); + return false; + case DispatchId.PRIMITIVE_TAGGED_UINT64: + case DispatchId.TAGGED_UINT64: + fieldAccessor.putObject(targetObject, buffer.readTaggedUint64()); + return false; + case DispatchId.PRIMITIVE_FLOAT32: + case DispatchId.FLOAT32: + fieldAccessor.putObject(targetObject, buffer.readFloat32()); + return false; + case DispatchId.PRIMITIVE_FLOAT64: + case DispatchId.FLOAT64: + fieldAccessor.putObject(targetObject, buffer.readFloat64()); + return false; default: return true; } @@ -810,8 +910,8 @@ static boolean readBasicObjectFieldValue( * @param buffer the buffer to read from * @param targetObject the object to set the field value on * @param fieldAccessor the accessor to set the field value - * @param classId the class ID of the boxed type - * @return true if classId is not a basic type or ref tracking is enabled, needing further read + * @param dispatchId the class ID of the boxed type + * @return true if dispatchId is not a basic type or ref tracking is enabled, needing further read * handling */ static boolean readBasicNullableObjectFieldValue( @@ -819,91 +919,117 @@ static boolean readBasicNullableObjectFieldValue( MemoryBuffer buffer, Object targetObject, FieldAccessor fieldAccessor, - short classId) { + int dispatchId) { if (!fory.isBasicTypesRefIgnored()) { return true; // let common path handle this. } // add time types serialization here. - switch (classId) { - case ClassResolver.STRING_CLASS_ID: // fastpath for string. + switch (dispatchId) { + case DispatchId.STRING: // fastpath for string. fieldAccessor.putObject(targetObject, fory.readJavaStringRef(buffer)); return false; - case ClassResolver.BOOLEAN_CLASS_ID: - { - if (buffer.readByte() == Fory.NULL_FLAG) { - fieldAccessor.putObject(targetObject, null); - } else { - fieldAccessor.putObject(targetObject, buffer.readBoolean()); - } - return false; + case DispatchId.BOOL: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readBoolean()); } - case ClassResolver.BYTE_CLASS_ID: - { - if (buffer.readByte() == Fory.NULL_FLAG) { - fieldAccessor.putObject(targetObject, null); - } else { - fieldAccessor.putObject(targetObject, buffer.readByte()); - } - return false; + return false; + case DispatchId.INT8: + case DispatchId.UINT8: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readByte()); } - case ClassResolver.CHAR_CLASS_ID: - { - if (buffer.readByte() == Fory.NULL_FLAG) { - fieldAccessor.putObject(targetObject, null); - } else { - fieldAccessor.putObject(targetObject, buffer.readChar()); - } - return false; + return false; + case DispatchId.CHAR: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readChar()); } - case ClassResolver.SHORT_CLASS_ID: - { - if (buffer.readByte() == Fory.NULL_FLAG) { - fieldAccessor.putObject(targetObject, null); - } else { - fieldAccessor.putObject(targetObject, buffer.readInt16()); - } - return false; + return false; + case DispatchId.INT16: + case DispatchId.UINT16: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readInt16()); } - case ClassResolver.INTEGER_CLASS_ID: - { - if (buffer.readByte() == Fory.NULL_FLAG) { - fieldAccessor.putObject(targetObject, null); - } else { - if (fory.compressInt()) { - fieldAccessor.putObject(targetObject, buffer.readVarInt32()); - } else { - fieldAccessor.putObject(targetObject, buffer.readInt32()); - } - } - return false; + return false; + case DispatchId.INT32: + case DispatchId.UINT32: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readInt32()); } - case ClassResolver.FLOAT_CLASS_ID: - { - if (buffer.readByte() == Fory.NULL_FLAG) { - fieldAccessor.putObject(targetObject, null); - } else { - fieldAccessor.putObject(targetObject, buffer.readFloat32()); - } - return false; + return false; + case DispatchId.VARINT32: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readVarInt32()); } - case ClassResolver.LONG_CLASS_ID: - { - if (buffer.readByte() == Fory.NULL_FLAG) { - fieldAccessor.putObject(targetObject, null); - } else { - fieldAccessor.putObject(targetObject, fory.readInt64(buffer)); - } - return false; + return false; + case DispatchId.VAR_UINT32: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readVarUint32()); } - case ClassResolver.DOUBLE_CLASS_ID: - { - if (buffer.readByte() == Fory.NULL_FLAG) { - fieldAccessor.putObject(targetObject, null); - } else { - fieldAccessor.putObject(targetObject, buffer.readFloat64()); - } - return false; + return false; + case DispatchId.INT64: + case DispatchId.UINT64: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readInt64()); + } + return false; + case DispatchId.VARINT64: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readVarInt64()); + } + return false; + case DispatchId.TAGGED_INT64: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readTaggedInt64()); + } + return false; + case DispatchId.VAR_UINT64: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readVarUint64()); + } + return false; + case DispatchId.TAGGED_UINT64: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readTaggedUint64()); + } + return false; + case DispatchId.FLOAT32: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readFloat32()); + } + return false; + case DispatchId.FLOAT64: + if (buffer.readByte() == Fory.NULL_FLAG) { + fieldAccessor.putObject(targetObject, null); + } else { + fieldAccessor.putObject(targetObject, buffer.readFloat64()); } + return false; default: return true; } @@ -939,120 +1065,95 @@ private T copyRecord(T originObj) { } private Object[] copyFields(T originObj) { - FieldGroups.SerializationFieldInfo[] fieldInfos = this.fieldInfos; + SerializationFieldInfo[] fieldInfos = this.fieldInfos; if (fieldInfos == null) { fieldInfos = buildFieldsInfo(); } Object[] fieldValues = new Object[fieldInfos.length]; for (int i = 0; i < fieldInfos.length; i++) { - FieldGroups.SerializationFieldInfo fieldInfo = fieldInfos[i]; + SerializationFieldInfo fieldInfo = fieldInfos[i]; FieldAccessor fieldAccessor = fieldInfo.fieldAccessor; long fieldOffset = fieldAccessor.getFieldOffset(); if (fieldOffset != -1) { - fieldValues[i] = copyField(originObj, fieldOffset, fieldInfo.classId); + fieldValues[i] = copyField(originObj, fieldOffset, fieldInfo.dispatchId); } else { // field in record class has offset -1 Object fieldValue = fieldAccessor.get(originObj); - fieldValues[i] = fory.copyObject(fieldValue, fieldInfo.classId); + fieldValues[i] = fory.copyObject(fieldValue, fieldInfo.dispatchId); } } return RecordUtils.remapping(copyRecordInfo, fieldValues); } private void copyFields(T originObj, T newObj) { - FieldGroups.SerializationFieldInfo[] fieldInfos = this.fieldInfos; + SerializationFieldInfo[] fieldInfos = this.fieldInfos; if (fieldInfos == null) { fieldInfos = buildFieldsInfo(); } - for (FieldGroups.SerializationFieldInfo fieldInfo : fieldInfos) { - FieldAccessor fieldAccessor = fieldInfo.fieldAccessor; - long fieldOffset = fieldAccessor.getFieldOffset(); - // record class won't go to this path; - assert fieldOffset != -1; - switch (fieldInfo.classId) { - case ClassResolver.PRIMITIVE_BYTE_CLASS_ID: - Platform.putByte(newObj, fieldOffset, Platform.getByte(originObj, fieldOffset)); - break; - case ClassResolver.PRIMITIVE_CHAR_CLASS_ID: - Platform.putChar(newObj, fieldOffset, Platform.getChar(originObj, fieldOffset)); - break; - case ClassResolver.PRIMITIVE_SHORT_CLASS_ID: - Platform.putShort(newObj, fieldOffset, Platform.getShort(originObj, fieldOffset)); - break; - case ClassResolver.PRIMITIVE_INT_CLASS_ID: - Platform.putInt(newObj, fieldOffset, Platform.getInt(originObj, fieldOffset)); - break; - case ClassResolver.PRIMITIVE_LONG_CLASS_ID: - Platform.putLong(newObj, fieldOffset, Platform.getLong(originObj, fieldOffset)); - break; - case ClassResolver.PRIMITIVE_FLOAT_CLASS_ID: - Platform.putFloat(newObj, fieldOffset, Platform.getFloat(originObj, fieldOffset)); - break; - case ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID: - Platform.putDouble(newObj, fieldOffset, Platform.getDouble(originObj, fieldOffset)); - break; - case ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID: - Platform.putBoolean(newObj, fieldOffset, Platform.getBoolean(originObj, fieldOffset)); - break; - case ClassResolver.BOOLEAN_CLASS_ID: - case ClassResolver.BYTE_CLASS_ID: - case ClassResolver.CHAR_CLASS_ID: - case ClassResolver.SHORT_CLASS_ID: - case ClassResolver.INTEGER_CLASS_ID: - case ClassResolver.FLOAT_CLASS_ID: - case ClassResolver.LONG_CLASS_ID: - case ClassResolver.DOUBLE_CLASS_ID: - case ClassResolver.STRING_CLASS_ID: - Platform.putObject(newObj, fieldOffset, Platform.getObject(originObj, fieldOffset)); - break; - default: - Platform.putObject( - newObj, fieldOffset, fory.copyObject(Platform.getObject(originObj, fieldOffset))); - } - } + copyFields(fory, fieldInfos, originObj, newObj); } public static void copyFields( - Fory fory, FieldGroups.SerializationFieldInfo[] fieldInfos, Object originObj, Object newObj) { - for (FieldGroups.SerializationFieldInfo fieldInfo : fieldInfos) { + Fory fory, SerializationFieldInfo[] fieldInfos, Object originObj, Object newObj) { + for (SerializationFieldInfo fieldInfo : fieldInfos) { FieldAccessor fieldAccessor = fieldInfo.fieldAccessor; long fieldOffset = fieldAccessor.getFieldOffset(); // record class won't go to this path; assert fieldOffset != -1; - switch (fieldInfo.classId) { - case ClassResolver.PRIMITIVE_BYTE_CLASS_ID: + switch (fieldInfo.dispatchId) { + case DispatchId.PRIMITIVE_BOOL: + Platform.putBoolean(newObj, fieldOffset, Platform.getBoolean(originObj, fieldOffset)); + break; + case DispatchId.PRIMITIVE_INT8: + case DispatchId.PRIMITIVE_UINT8: Platform.putByte(newObj, fieldOffset, Platform.getByte(originObj, fieldOffset)); break; - case ClassResolver.PRIMITIVE_CHAR_CLASS_ID: + case DispatchId.PRIMITIVE_CHAR: Platform.putChar(newObj, fieldOffset, Platform.getChar(originObj, fieldOffset)); break; - case ClassResolver.PRIMITIVE_SHORT_CLASS_ID: + case DispatchId.PRIMITIVE_INT16: + case DispatchId.PRIMITIVE_UINT16: Platform.putShort(newObj, fieldOffset, Platform.getShort(originObj, fieldOffset)); break; - case ClassResolver.PRIMITIVE_INT_CLASS_ID: + case DispatchId.PRIMITIVE_INT32: + case DispatchId.PRIMITIVE_VARINT32: + case DispatchId.PRIMITIVE_UINT32: + case DispatchId.PRIMITIVE_VAR_UINT32: Platform.putInt(newObj, fieldOffset, Platform.getInt(originObj, fieldOffset)); break; - case ClassResolver.PRIMITIVE_LONG_CLASS_ID: + case DispatchId.PRIMITIVE_INT64: + case DispatchId.PRIMITIVE_VARINT64: + case DispatchId.PRIMITIVE_TAGGED_INT64: + case DispatchId.PRIMITIVE_UINT64: + case DispatchId.PRIMITIVE_VAR_UINT64: + case DispatchId.PRIMITIVE_TAGGED_UINT64: Platform.putLong(newObj, fieldOffset, Platform.getLong(originObj, fieldOffset)); break; - case ClassResolver.PRIMITIVE_FLOAT_CLASS_ID: + case DispatchId.PRIMITIVE_FLOAT32: Platform.putFloat(newObj, fieldOffset, Platform.getFloat(originObj, fieldOffset)); break; - case ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID: + case DispatchId.PRIMITIVE_FLOAT64: Platform.putDouble(newObj, fieldOffset, Platform.getDouble(originObj, fieldOffset)); break; - case ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID: - Platform.putBoolean(newObj, fieldOffset, Platform.getBoolean(originObj, fieldOffset)); - break; - case ClassResolver.BOOLEAN_CLASS_ID: - case ClassResolver.BYTE_CLASS_ID: - case ClassResolver.CHAR_CLASS_ID: - case ClassResolver.SHORT_CLASS_ID: - case ClassResolver.INTEGER_CLASS_ID: - case ClassResolver.FLOAT_CLASS_ID: - case ClassResolver.LONG_CLASS_ID: - case ClassResolver.DOUBLE_CLASS_ID: - case ClassResolver.STRING_CLASS_ID: + case DispatchId.BOOL: + case DispatchId.INT8: + case DispatchId.UINT8: + case DispatchId.CHAR: + case DispatchId.INT16: + case DispatchId.UINT16: + case DispatchId.INT32: + case DispatchId.VARINT32: + case DispatchId.UINT32: + case DispatchId.VAR_UINT32: + case DispatchId.INT64: + case DispatchId.VARINT64: + case DispatchId.TAGGED_INT64: + case DispatchId.UINT64: + case DispatchId.VAR_UINT64: + case DispatchId.TAGGED_UINT64: + case DispatchId.FLOAT32: + case DispatchId.FLOAT64: + case DispatchId.STRING: Platform.putObject(newObj, fieldOffset, Platform.getObject(originObj, fieldOffset)); break; default: @@ -1062,40 +1163,60 @@ public static void copyFields( } } - private Object copyField(Object targetObject, long fieldOffset, short classId) { - switch (classId) { - case ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID: + private Object copyField(Object targetObject, long fieldOffset, int typeId) { + switch (typeId) { + case DispatchId.PRIMITIVE_BOOL: return Platform.getBoolean(targetObject, fieldOffset); - case ClassResolver.PRIMITIVE_BYTE_CLASS_ID: + case DispatchId.PRIMITIVE_INT8: + case DispatchId.PRIMITIVE_UINT8: return Platform.getByte(targetObject, fieldOffset); - case ClassResolver.PRIMITIVE_CHAR_CLASS_ID: + case DispatchId.PRIMITIVE_CHAR: return Platform.getChar(targetObject, fieldOffset); - case ClassResolver.PRIMITIVE_SHORT_CLASS_ID: + case DispatchId.PRIMITIVE_INT16: + case DispatchId.PRIMITIVE_UINT16: return Platform.getShort(targetObject, fieldOffset); - case ClassResolver.PRIMITIVE_INT_CLASS_ID: + case DispatchId.PRIMITIVE_INT32: + case DispatchId.PRIMITIVE_VARINT32: + case DispatchId.PRIMITIVE_UINT32: + case DispatchId.PRIMITIVE_VAR_UINT32: return Platform.getInt(targetObject, fieldOffset); - case ClassResolver.PRIMITIVE_FLOAT_CLASS_ID: + case DispatchId.PRIMITIVE_FLOAT32: return Platform.getFloat(targetObject, fieldOffset); - case ClassResolver.PRIMITIVE_LONG_CLASS_ID: + case DispatchId.PRIMITIVE_INT64: + case DispatchId.PRIMITIVE_VARINT64: + case DispatchId.PRIMITIVE_TAGGED_INT64: + case DispatchId.PRIMITIVE_UINT64: + case DispatchId.PRIMITIVE_VAR_UINT64: + case DispatchId.PRIMITIVE_TAGGED_UINT64: return Platform.getLong(targetObject, fieldOffset); - case ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID: + case DispatchId.PRIMITIVE_FLOAT64: return Platform.getDouble(targetObject, fieldOffset); - case ClassResolver.BOOLEAN_CLASS_ID: - case ClassResolver.BYTE_CLASS_ID: - case ClassResolver.CHAR_CLASS_ID: - case ClassResolver.SHORT_CLASS_ID: - case ClassResolver.INTEGER_CLASS_ID: - case ClassResolver.FLOAT_CLASS_ID: - case ClassResolver.LONG_CLASS_ID: - case ClassResolver.DOUBLE_CLASS_ID: - case ClassResolver.STRING_CLASS_ID: + case DispatchId.BOOL: + case DispatchId.INT8: + case DispatchId.UINT8: + case DispatchId.CHAR: + case DispatchId.INT16: + case DispatchId.UINT16: + case DispatchId.INT32: + case DispatchId.VARINT32: + case DispatchId.UINT32: + case DispatchId.VAR_UINT32: + case DispatchId.FLOAT32: + case DispatchId.INT64: + case DispatchId.VARINT64: + case DispatchId.TAGGED_INT64: + case DispatchId.UINT64: + case DispatchId.VAR_UINT64: + case DispatchId.TAGGED_UINT64: + case DispatchId.FLOAT64: + case DispatchId.STRING: return Platform.getObject(targetObject, fieldOffset); default: return fory.copyObject(Platform.getObject(targetObject, fieldOffset)); } } - private FieldGroups.SerializationFieldInfo[] buildFieldsInfo() { + private SerializationFieldInfo[] buildFieldsInfo() { List descriptors = new ArrayList<>(); if (RecordUtils.isRecord(type)) { RecordComponent[] components = RecordUtils.getRecordComponents(type); diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/ArraySerializers.java b/java/fory-core/src/main/java/org/apache/fory/serializer/ArraySerializers.java index 262798e939..6d0282020b 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/ArraySerializers.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/ArraySerializers.java @@ -23,7 +23,6 @@ import java.util.Arrays; import org.apache.fory.Fory; import org.apache.fory.config.CompatibleMode; -import org.apache.fory.config.Config; import org.apache.fory.config.LongEncoding; import org.apache.fory.memory.MemoryBuffer; import org.apache.fory.memory.Platform; @@ -489,16 +488,20 @@ public int[] read(MemoryBuffer buffer) { } public static final class LongArraySerializer extends PrimitiveArraySerializer { + private final boolean compressLongArray; public LongArraySerializer(Fory fory) { super(fory, long[].class); + compressLongArray = + fory.getConfig().compressLongArray() + && fory.getConfig().longEncoding() != LongEncoding.FIXED; } @Override public void write(MemoryBuffer buffer, long[] value) { if (fory.getBufferCallback() == null) { - if (compressArray(fory.getConfig())) { - writeInt64s(buffer, value, fory.getConfig().longEncoding()); + if (compressLongArray) { + writeInt64Compressed(buffer, value, fory.getConfig().longEncoding()); return; } int size = Math.multiplyExact(value.length, 8); @@ -527,8 +530,8 @@ public long[] read(MemoryBuffer buffer) { } return values; } - if (compressArray(fory.getConfig())) { - return readInt64s(buffer, fory.getConfig().longEncoding()); + if (compressLongArray) { + return readInt64Compressed(buffer, fory.getConfig().longEncoding()); } int size = buffer.readVarUint32Small7(); int numElements = size / 8; @@ -539,17 +542,14 @@ public long[] read(MemoryBuffer buffer) { return values; } - private boolean compressArray(Config config) { - return config.compressLongArray() && config.longEncoding() != LongEncoding.LE_RAW_BYTES; - } - - private void writeInt64s(MemoryBuffer buffer, long[] value, LongEncoding longEncoding) { + private void writeInt64Compressed( + MemoryBuffer buffer, long[] value, LongEncoding longEncoding) { int length = value.length; buffer.writeVarUint32Small7(length); - if (longEncoding == LongEncoding.SLI) { + if (longEncoding == LongEncoding.TAGGED) { for (int i = 0; i < length; i++) { - buffer.writeSliInt64(value[i]); + buffer.writeTaggedInt64(value[i]); } return; } @@ -558,13 +558,13 @@ private void writeInt64s(MemoryBuffer buffer, long[] value, LongEncoding longEnc } } - public long[] readInt64s(MemoryBuffer buffer, LongEncoding longEncoding) { + public long[] readInt64Compressed(MemoryBuffer buffer, LongEncoding longEncoding) { int numElements = buffer.readVarUint32Small7(); long[] values = new long[numElements]; - if (longEncoding == LongEncoding.SLI) { + if (longEncoding == LongEncoding.TAGGED) { for (int i = 0; i < numElements; i++) { - values[i] = buffer.readSliInt64(); + values[i] = buffer.readTaggedInt64(); } } else { for (int i = 0; i < numElements; i++) { diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/FieldGroups.java b/java/fory-core/src/main/java/org/apache/fory/serializer/FieldGroups.java index dbcc3362d6..9eb8cc74a9 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/FieldGroups.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/FieldGroups.java @@ -30,15 +30,13 @@ import org.apache.fory.reflect.TypeRef; import org.apache.fory.resolver.ClassInfo; import org.apache.fory.resolver.ClassInfoHolder; -import org.apache.fory.resolver.ClassResolver; import org.apache.fory.resolver.RefMode; import org.apache.fory.resolver.TypeResolver; import org.apache.fory.serializer.converter.FieldConverter; import org.apache.fory.type.Descriptor; import org.apache.fory.type.DescriptorGrouper; -import org.apache.fory.type.FinalObjectTypeStub; +import org.apache.fory.type.DispatchId; import org.apache.fory.type.GenericType; -import org.apache.fory.type.TypeUtils; import org.apache.fory.util.StringUtils; public class FieldGroups { @@ -118,20 +116,10 @@ public static FieldGroups buildFieldInfos(Fory fory, DescriptorGrouper grouper) return new FieldGroups(allBuildIn, containerFields, otherFields); } - static short getRegisteredClassId(Fory fory, Descriptor d) { - Field field = d.getField(); - Class cls = d.getTypeRef().getRawType(); - if (TypeUtils.unwrap(cls).isPrimitive() && field != null) { - return fory.getClassResolver().getRegisteredClassId(field.getType()); - } - Short classId = fory.getClassResolver().getRegisteredClassId(cls); - return classId == null ? ClassResolver.NO_CLASS_ID : classId; - } - public static final class SerializationFieldInfo { public final Descriptor descriptor; public final TypeRef typeRef; - public final short classId; + public final int dispatchId; public final ClassInfo classInfo; public final Serializer serializer; public final String qualifiedFieldName; @@ -152,26 +140,19 @@ public static final class SerializationFieldInfo { SerializationFieldInfo(Fory fory, Descriptor d) { this.descriptor = d; this.typeRef = d.getTypeRef(); - this.classId = getRegisteredClassId(fory, d); + this.dispatchId = DispatchId.getDispatchId(fory, d); TypeResolver resolver = fory._getTypeResolver(); // invoke `copy` to avoid ObjectSerializer construct clear serializer by `clearSerializer`. - if (typeRef.getRawType() == FinalObjectTypeStub.class) { - // `FinalObjectTypeStub` has no fields, using its `classInfo` - // will make deserialization failed. - classInfo = null; - } else { - if (resolver.isMonomorphic(descriptor)) { - classInfo = SerializationUtils.getClassInfo(fory, typeRef.getRawType()); - if (!fory.isShareMeta() - && !fory.isCompatible() - && classInfo.getSerializer() instanceof ReplaceResolveSerializer) { - // overwrite replace resolve serializer for final field - classInfo.setSerializer( - new FinalFieldReplaceResolveSerializer(fory, classInfo.getCls())); - } - } else { - classInfo = null; + if (resolver.isMonomorphic(descriptor)) { + classInfo = SerializationUtils.getClassInfo(fory, typeRef.getRawType()); + if (!fory.isShareMeta() + && !fory.isCompatible() + && classInfo.getSerializer() instanceof ReplaceResolveSerializer) { + // overwrite replace resolve serializer for final field + classInfo.setSerializer(new FinalFieldReplaceResolveSerializer(fory, classInfo.getCls())); } + } else { + classInfo = null; } useDeclaredTypeInfo = classInfo != null && resolver.isMonomorphic(descriptor); if (classInfo != null) { @@ -226,7 +207,7 @@ public String toString() { + ", typeRef=" + typeRef + ", classId=" - + classId + + dispatchId + ", fieldAccessor=" + fieldAccessor + ", nullable=" diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/MetaSharedLayerSerializer.java b/java/fory-core/src/main/java/org/apache/fory/serializer/MetaSharedLayerSerializer.java index af0da89a9d..8f01b6d694 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/MetaSharedLayerSerializer.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/MetaSharedLayerSerializer.java @@ -36,6 +36,7 @@ import org.apache.fory.serializer.FieldGroups.SerializationFieldInfo; import org.apache.fory.type.Descriptor; import org.apache.fory.type.DescriptorGrouper; +import org.apache.fory.type.DispatchId; import org.apache.fory.type.Generics; /** @@ -125,16 +126,16 @@ private void writeFinalFields(MemoryBuffer buffer, T value) { for (SerializationFieldInfo fieldInfo : buildInFields) { FieldAccessor fieldAccessor = fieldInfo.fieldAccessor; boolean nullable = fieldInfo.nullable; - short classId = fieldInfo.classId; + int dispatchId = fieldInfo.dispatchId; if (AbstractObjectSerializer.writePrimitiveFieldValue( - fory, buffer, value, fieldAccessor, classId)) { + buffer, value, fieldAccessor, dispatchId)) { Object fieldValue = fieldAccessor.getObject(value); boolean writeBasicObjectResult = nullable ? AbstractObjectSerializer.writeBasicNullableObjectFieldValue( - fory, buffer, fieldValue, classId) + fory, buffer, fieldValue, dispatchId) : AbstractObjectSerializer.writeBasicObjectFieldValue( - fory, buffer, fieldValue, classId); + fory, buffer, fieldValue, dispatchId); if (writeBasicObjectResult) { Serializer serializer = fieldInfo.classInfo.getSerializer(); if (!metaShareEnabled || fieldInfo.useDeclaredTypeInfo) { @@ -232,14 +233,13 @@ private void readFinalFields(MemoryBuffer buffer, T obj) { FieldAccessor fieldAccessor = fieldInfo.fieldAccessor; if (fieldAccessor != null) { boolean nullable = fieldInfo.nullable; - short classId = fieldInfo.classId; - if (AbstractObjectSerializer.readPrimitiveFieldValue( - fory, buffer, obj, fieldAccessor, classId) + int dispatchId = fieldInfo.dispatchId; + if (AbstractObjectSerializer.readPrimitiveFieldValue(buffer, obj, fieldAccessor, dispatchId) && (nullable ? AbstractObjectSerializer.readBasicNullableObjectFieldValue( - fory, buffer, obj, fieldAccessor, classId) + fory, buffer, obj, fieldAccessor, dispatchId) : AbstractObjectSerializer.readBasicObjectFieldValue( - fory, buffer, obj, fieldAccessor, classId))) { + fory, buffer, obj, fieldAccessor, dispatchId))) { Object fieldValue = AbstractObjectSerializer.readFinalObjectFieldValue( binding, refResolver, classResolver, fieldInfo, buffer); @@ -247,7 +247,8 @@ private void readFinalFields(MemoryBuffer buffer, T obj) { } } else { // Field doesn't exist in current class - skip the value - if (MetaSharedSerializer.skipPrimitiveFieldValueFailed(fory, fieldInfo.classId, buffer)) { + if (MetaSharedSerializer.skipPrimitiveFieldValueFailed( + fory, fieldInfo.dispatchId, buffer)) { if (fieldInfo.classInfo == null) { fory.readRef(buffer, classInfoHolder); } else { @@ -340,37 +341,59 @@ public void writeFieldsValues(MemoryBuffer buffer, Object[] vals) { private void writeFieldValueFromArray( MemoryBuffer buffer, SerializationFieldInfo fieldInfo, Object fieldValue) { - short classId = fieldInfo.classId; + int dispatchId = fieldInfo.dispatchId; boolean nullable = fieldInfo.nullable; // Handle primitives first - switch (classId) { - case ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID: + switch (dispatchId) { + case DispatchId.PRIMITIVE_BOOL: buffer.writeBoolean((Boolean) fieldValue); return; - case ClassResolver.PRIMITIVE_BYTE_CLASS_ID: + case DispatchId.PRIMITIVE_INT8: + case DispatchId.PRIMITIVE_UINT8: buffer.writeByte((Byte) fieldValue); return; - case ClassResolver.PRIMITIVE_CHAR_CLASS_ID: + case DispatchId.PRIMITIVE_CHAR: buffer.writeChar((Character) fieldValue); return; - case ClassResolver.PRIMITIVE_SHORT_CLASS_ID: + case DispatchId.PRIMITIVE_INT16: + case DispatchId.PRIMITIVE_UINT16: buffer.writeInt16((Short) fieldValue); return; - case ClassResolver.PRIMITIVE_INT_CLASS_ID: - if (fory.compressInt()) { - buffer.writeVarInt32((Integer) fieldValue); - } else { - buffer.writeInt32((Integer) fieldValue); - } + case DispatchId.PRIMITIVE_INT32: + buffer.writeInt32((Integer) fieldValue); return; - case ClassResolver.PRIMITIVE_FLOAT_CLASS_ID: - buffer.writeFloat32((Float) fieldValue); + case DispatchId.PRIMITIVE_VARINT32: + buffer.writeVarInt32((Integer) fieldValue); + return; + case DispatchId.PRIMITIVE_UINT32: + buffer.writeInt32((Integer) fieldValue); + return; + case DispatchId.PRIMITIVE_VAR_UINT32: + buffer.writeVarUint32((Integer) fieldValue); + return; + case DispatchId.PRIMITIVE_INT64: + buffer.writeInt64((Long) fieldValue); + return; + case DispatchId.PRIMITIVE_VARINT64: + buffer.writeVarInt64((Long) fieldValue); + return; + case DispatchId.PRIMITIVE_TAGGED_INT64: + buffer.writeTaggedInt64((Long) fieldValue); return; - case ClassResolver.PRIMITIVE_LONG_CLASS_ID: - fory.writeInt64(buffer, (Long) fieldValue); + case DispatchId.PRIMITIVE_UINT64: + buffer.writeInt64((Long) fieldValue); return; - case ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID: + case DispatchId.PRIMITIVE_VAR_UINT64: + buffer.writeVarUint64((Long) fieldValue); + return; + case DispatchId.PRIMITIVE_TAGGED_UINT64: + buffer.writeTaggedUint64((Long) fieldValue); + return; + case DispatchId.PRIMITIVE_FLOAT32: + buffer.writeFloat32((Float) fieldValue); + return; + case DispatchId.PRIMITIVE_FLOAT64: buffer.writeFloat64((Double) fieldValue); return; default: @@ -428,14 +451,11 @@ public void readFields(MemoryBuffer buffer, Object[] vals) { } private Object readFieldValueToArray(MemoryBuffer buffer, SerializationFieldInfo fieldInfo) { - short classId = fieldInfo.classId; - + int dispatchId = fieldInfo.dispatchId; // Handle primitives - if (classId >= ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID - && classId <= ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID) { - return Serializers.readPrimitiveValue(fory, buffer, classId); + if (DispatchId.isPrimitive(dispatchId)) { + return Serializers.readPrimitiveValue(fory, buffer, dispatchId); } - // Handle objects return AbstractObjectSerializer.readFinalObjectFieldValue( binding, refResolver, classResolver, fieldInfo, buffer); diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/MetaSharedSerializer.java b/java/fory-core/src/main/java/org/apache/fory/serializer/MetaSharedSerializer.java index 60c4044572..5191fa7af4 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/MetaSharedSerializer.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/MetaSharedSerializer.java @@ -40,7 +40,9 @@ import org.apache.fory.serializer.FieldGroups.SerializationFieldInfo; import org.apache.fory.type.Descriptor; import org.apache.fory.type.DescriptorGrouper; +import org.apache.fory.type.DispatchId; import org.apache.fory.type.Generics; +import org.apache.fory.type.Types; import org.apache.fory.util.DefaultValueUtils; import org.apache.fory.util.GraalvmSupport; import org.apache.fory.util.Preconditions; @@ -99,11 +101,12 @@ public MetaSharedSerializer(Fory fory, Class type, ClassDef classDef) { "========== MetaSharedSerializer sorted descriptors for {} ==========", type.getName()); for (Descriptor d : descriptorGrouper.getSortedDescriptors()) { LOG.info( - " {} -> {}, ref {}, nullable {}", + " {} -> {}, ref {}, nullable {}, type id {}", d.getName(), d.getTypeName(), d.isTrackingRef(), - d.isNullable()); + d.isNullable(), + Types.getDescriptorTypeId(fory, d)); } } // d.getField() may be null if not exists in this class when meta share enabled. @@ -164,6 +167,22 @@ public void xwrite(MemoryBuffer buffer, T value) { @Override public T read(MemoryBuffer buffer) { + if (Utils.debugOutputEnabled()) { + LOG.info("========== MetaSharedSerializer.read() for {} ==========", type.getName()); + LOG.info("Buffer readerIndex at start: {}", buffer.readerIndex()); + LOG.info("buildInFields count: {}", buildInFields.length); + for (int i = 0; i < buildInFields.length; i++) { + SerializationFieldInfo fi = buildInFields[i]; + LOG.info( + " buildInField[{}]: name={}, dispatchId={}, nullable={}, isPrimitive={}, hasAccessor={}", + i, + fi.qualifiedFieldName, + fi.dispatchId, + fi.nullable, + fi.isPrimitive, + fi.fieldAccessor != null); + } + } if (isRecord) { Object[] fieldValues = new Object[buildInFields.length + otherFields.length + containerFields.length]; @@ -183,22 +202,45 @@ public T read(MemoryBuffer buffer) { for (SerializationFieldInfo fieldInfo : this.buildInFields) { FieldAccessor fieldAccessor = fieldInfo.fieldAccessor; boolean nullable = fieldInfo.nullable; + if (Utils.debugOutputEnabled()) { + LOG.info( + "[Java] About to read field: name={}, dispatchId={}, nullable={}, isPrimitive={}, bufferPos={}", + fieldInfo.qualifiedFieldName, + fieldInfo.dispatchId, + nullable, + fieldInfo.isPrimitive, + buffer.readerIndex()); + // Print next 16 bytes from buffer for debugging + int pos = buffer.readerIndex(); + int remaining = Math.min(16, buffer.size() - pos); + if (remaining > 0) { + byte[] peek = new byte[remaining]; + for (int i = 0; i < remaining; i++) { + peek[i] = buffer.getByte(pos + i); + } + StringBuilder hex = new StringBuilder(); + for (byte b : peek) { + hex.append(String.format("%02x", b)); + } + LOG.info("[Java] Next {} bytes at pos {}: {}", remaining, pos, hex.toString()); + } + } if (fieldAccessor != null) { - short classId = fieldInfo.classId; + int dispatchId = fieldInfo.dispatchId; boolean needRead = true; if (fieldInfo.isPrimitive) { if (nullable) { - needRead = readPrimitiveNullableFieldValue(fory, buffer, obj, fieldAccessor, classId); + needRead = readPrimitiveNullableFieldValue(buffer, obj, fieldAccessor, dispatchId); } else { - needRead = readPrimitiveFieldValue(fory, buffer, obj, fieldAccessor, classId); + needRead = readPrimitiveFieldValue(buffer, obj, fieldAccessor, dispatchId); } } if (needRead && (nullable ? AbstractObjectSerializer.readBasicNullableObjectFieldValue( - fory, buffer, obj, fieldAccessor, classId) + fory, buffer, obj, fieldAccessor, dispatchId) : AbstractObjectSerializer.readBasicObjectFieldValue( - fory, buffer, obj, fieldAccessor, classId))) { + fory, buffer, obj, fieldAccessor, dispatchId))) { assert fieldInfo.classInfo != null; Object fieldValue = AbstractObjectSerializer.readFinalObjectFieldValue( @@ -208,7 +250,7 @@ public T read(MemoryBuffer buffer) { } else { if (fieldInfo.fieldConverter == null) { // Skip the field value from buffer since it doesn't exist in current class - if (skipPrimitiveFieldValueFailed(fory, fieldInfo.classId, buffer)) { + if (skipPrimitiveFieldValueFailed(fory, fieldInfo.dispatchId, buffer)) { if (fieldInfo.classInfo == null) { // TODO(chaokunyang) support registered serializer in peer with ref tracking disabled. binding.readRef(buffer, classInfoHolder); @@ -243,10 +285,9 @@ public T read(MemoryBuffer buffer) { private void compatibleRead(MemoryBuffer buffer, SerializationFieldInfo fieldInfo, Object obj) { Object fieldValue; - short classId = fieldInfo.classId; - if (classId >= ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID - && classId <= ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID) { - fieldValue = Serializers.readPrimitiveValue(fory, buffer, classId); + int dispatchId = fieldInfo.dispatchId; + if (DispatchId.isPrimitive(dispatchId)) { + fieldValue = Serializers.readPrimitiveValue(fory, buffer, dispatchId); } else { fieldValue = AbstractObjectSerializer.readFinalObjectFieldValue( @@ -280,11 +321,10 @@ private void readFields(MemoryBuffer buffer, Object[] fields) { for (SerializationFieldInfo fieldInfo : this.buildInFields) { if (fieldInfo.fieldAccessor != null) { assert fieldInfo.classInfo != null; - short classId = fieldInfo.classId; + int dispatchId = fieldInfo.dispatchId; // primitive field won't write null flag. - if (classId >= ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID - && classId <= ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID) { - fields[counter++] = Serializers.readPrimitiveValue(fory, buffer, classId); + if (DispatchId.isPrimitive(dispatchId)) { + fields[counter++] = Serializers.readPrimitiveValue(fory, buffer, dispatchId); } else { Object fieldValue = AbstractObjectSerializer.readFinalObjectFieldValue( @@ -293,7 +333,7 @@ private void readFields(MemoryBuffer buffer, Object[] fields) { } } else { // Skip the field value from buffer since it doesn't exist in current class - if (skipPrimitiveFieldValueFailed(fory, fieldInfo.classId, buffer)) { + if (skipPrimitiveFieldValueFailed(fory, fieldInfo.dispatchId, buffer)) { if (fieldInfo.classInfo == null) { // TODO(chaokunyang) support registered serializer in peer with ref tracking disabled. fory.readRef(buffer, classInfoHolder); @@ -319,40 +359,60 @@ private void readFields(MemoryBuffer buffer, Object[] fields) { } /** Skip primitive primitive field value since it doesn't write null flag. */ - static boolean skipPrimitiveFieldValueFailed(Fory fory, short classId, MemoryBuffer buffer) { - switch (classId) { - case ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID: + static boolean skipPrimitiveFieldValueFailed(Fory fory, int dispatchId, MemoryBuffer buffer) { + switch (dispatchId) { + case DispatchId.PRIMITIVE_BOOL: buffer.increaseReaderIndex(1); return false; - case ClassResolver.PRIMITIVE_BYTE_CLASS_ID: + case DispatchId.PRIMITIVE_INT8: + case DispatchId.PRIMITIVE_UINT8: buffer.increaseReaderIndex(1); return false; - case ClassResolver.PRIMITIVE_CHAR_CLASS_ID: + case DispatchId.PRIMITIVE_CHAR: buffer.increaseReaderIndex(2); return false; - case ClassResolver.PRIMITIVE_SHORT_CLASS_ID: + case DispatchId.PRIMITIVE_INT16: + case DispatchId.PRIMITIVE_UINT16: buffer.increaseReaderIndex(2); return false; - case ClassResolver.PRIMITIVE_INT_CLASS_ID: - if (fory.compressInt()) { - buffer.readVarInt32(); - } else { - buffer.increaseReaderIndex(4); - } + case DispatchId.PRIMITIVE_INT32: + buffer.increaseReaderIndex(4); return false; - case ClassResolver.PRIMITIVE_FLOAT_CLASS_ID: + case DispatchId.PRIMITIVE_VARINT32: + buffer.readVarInt32(); + return false; + case DispatchId.PRIMITIVE_UINT32: buffer.increaseReaderIndex(4); return false; - case ClassResolver.PRIMITIVE_LONG_CLASS_ID: - fory.readInt64(buffer); + case DispatchId.PRIMITIVE_VAR_UINT32: + buffer.readVarUint32(); + return false; + case DispatchId.PRIMITIVE_INT64: + buffer.increaseReaderIndex(8); + return false; + case DispatchId.PRIMITIVE_VARINT64: + buffer.readVarInt64(); + return false; + case DispatchId.PRIMITIVE_TAGGED_INT64: + buffer.readTaggedInt64(); + return false; + case DispatchId.PRIMITIVE_UINT64: + buffer.increaseReaderIndex(8); + return false; + case DispatchId.PRIMITIVE_VAR_UINT64: + buffer.readVarUint64(); + return false; + case DispatchId.PRIMITIVE_TAGGED_UINT64: + buffer.readTaggedUint64(); return false; - case ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID: + case DispatchId.PRIMITIVE_FLOAT32: + buffer.increaseReaderIndex(4); + return false; + case DispatchId.PRIMITIVE_FLOAT64: buffer.increaseReaderIndex(8); return false; default: - { - return true; - } + return true; } } diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/NonexistentClassSerializers.java b/java/fory-core/src/main/java/org/apache/fory/serializer/NonexistentClassSerializers.java index 83366f1286..f6ae69474d 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/NonexistentClassSerializers.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/NonexistentClassSerializers.java @@ -29,6 +29,8 @@ import org.apache.fory.collection.IdentityObjectIntMap; import org.apache.fory.collection.LongMap; import org.apache.fory.collection.MapEntry; +import org.apache.fory.logging.Logger; +import org.apache.fory.logging.LoggerFactory; import org.apache.fory.memory.MemoryBuffer; import org.apache.fory.meta.ClassDef; import org.apache.fory.resolver.ClassInfo; @@ -43,11 +45,13 @@ import org.apache.fory.serializer.Serializers.CrossLanguageCompatibleSerializer; import org.apache.fory.type.Descriptor; import org.apache.fory.type.DescriptorGrouper; +import org.apache.fory.type.DispatchId; import org.apache.fory.type.Generics; import org.apache.fory.util.Preconditions; @SuppressWarnings({"rawtypes", "unchecked"}) public final class NonexistentClassSerializers { + private static final Logger LOG = LoggerFactory.getLogger(NonexistentClassSerializers.class); private static final class ClassFieldsInfo { private final SerializationFieldInfo[] buildInFields; @@ -117,8 +121,18 @@ public void write(MemoryBuffer buffer, Object v) { for (SerializationFieldInfo fieldInfo : fieldsInfo.buildInFields) { Object fieldValue = value.get(fieldInfo.qualifiedFieldName); ClassInfo classInfo = fieldInfo.classInfo; - if (classResolver.isPrimitive(fieldInfo.classId)) { - classInfo.getSerializer().write(buffer, fieldValue); + if (fory.getConfig().isForyDebugOutputEnabled()) { + LOG.info( + "NonexistentClassSerializer.write: field={}, dispatchId={}, isPrimitive={}, value={}, serializer={}", + fieldInfo.qualifiedFieldName, + fieldInfo.dispatchId, + DispatchId.isPrimitive(fieldInfo.dispatchId), + fieldValue, + classInfo != null ? classInfo.getSerializer() : null); + } + if (DispatchId.isPrimitive(fieldInfo.dispatchId)) { + // Use dispatch-based write to ensure correct encoding (e.g., VARINT64 vs FIXED_INT64) + Serializers.writePrimitiveValue(buffer, fieldValue, fieldInfo.dispatchId); } else { if (fieldInfo.useDeclaredTypeInfo) { // whether tracking ref is recorded in `fieldInfo.serializer`, so it's still @@ -180,8 +194,9 @@ public Object read(MemoryBuffer buffer) { // TODO(chaokunyang) support registered serializer in peer with ref tracking disabled. fieldValue = fory.readRef(buffer, classInfoHolder); } else { - if (classResolver.isPrimitive(fieldInfo.classId)) { - fieldValue = fieldInfo.classInfo.getSerializer().read(buffer); + if (DispatchId.isPrimitive(fieldInfo.dispatchId)) { + // Use dispatch-based read to ensure correct encoding (e.g., VARINT64 vs FIXED_INT64) + fieldValue = Serializers.readPrimitiveValue(fory, buffer, fieldInfo.dispatchId); } else { fieldValue = AbstractObjectSerializer.readFinalObjectFieldValue( diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/ObjectSerializer.java b/java/fory-core/src/main/java/org/apache/fory/serializer/ObjectSerializer.java index 376ed5c7d9..7a858deebc 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/ObjectSerializer.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/ObjectSerializer.java @@ -31,13 +31,13 @@ import org.apache.fory.memory.MemoryBuffer; import org.apache.fory.meta.ClassDef; import org.apache.fory.reflect.FieldAccessor; -import org.apache.fory.resolver.ClassResolver; import org.apache.fory.resolver.RefResolver; import org.apache.fory.resolver.TypeResolver; import org.apache.fory.serializer.FieldGroups.SerializationFieldInfo; import org.apache.fory.serializer.struct.Fingerprint; import org.apache.fory.type.Descriptor; import org.apache.fory.type.DescriptorGrouper; +import org.apache.fory.type.DispatchId; import org.apache.fory.type.Generics; import org.apache.fory.util.MurmurHash3; import org.apache.fory.util.Utils; @@ -169,13 +169,13 @@ private void writeBuildInFields( for (SerializationFieldInfo fieldInfo : this.buildInFields) { FieldAccessor fieldAccessor = fieldInfo.fieldAccessor; boolean nullable = fieldInfo.nullable; - short classId = fieldInfo.classId; - if (writePrimitiveFieldValue(fory, buffer, value, fieldAccessor, classId)) { + int dispatchId = fieldInfo.dispatchId; + if (writePrimitiveFieldValue(buffer, value, fieldAccessor, dispatchId)) { Object fieldValue = fieldAccessor.getObject(value); boolean needWrite = nullable - ? writeBasicNullableObjectFieldValue(fory, buffer, fieldValue, classId) - : writeBasicObjectFieldValue(fory, buffer, fieldValue, classId); + ? writeBasicNullableObjectFieldValue(fory, buffer, fieldValue, dispatchId) + : writeBasicObjectFieldValue(fory, buffer, fieldValue, dispatchId); if (needWrite) { Serializer serializer = fieldInfo.classInfo.getSerializer(); if (!metaShareEnabled || fieldInfo.useDeclaredTypeInfo) { @@ -268,10 +268,9 @@ public Object[] readFields(MemoryBuffer buffer) { int counter = 0; // read order: primitive,boxed,final,other,collection,map for (SerializationFieldInfo fieldInfo : this.buildInFields) { - short classId = fieldInfo.classId; - if (classId >= ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID - && classId <= ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID) { - fieldValues[counter++] = Serializers.readPrimitiveValue(fory, buffer, classId); + int dispatchId = fieldInfo.dispatchId; + if (DispatchId.isPrimitive(dispatchId)) { + fieldValues[counter++] = Serializers.readPrimitiveValue(fory, buffer, dispatchId); } else { Object fieldValue = readFinalObjectFieldValue(binding, refResolver, typeResolver, fieldInfo, buffer); @@ -302,11 +301,11 @@ public T readAndSetFields(MemoryBuffer buffer, T obj) { for (SerializationFieldInfo fieldInfo : this.buildInFields) { FieldAccessor fieldAccessor = fieldInfo.fieldAccessor; boolean nullable = fieldInfo.nullable; - short classId = fieldInfo.classId; - if (readPrimitiveFieldValue(fory, buffer, obj, fieldAccessor, classId) + int dispatchId = fieldInfo.dispatchId; + if (readPrimitiveFieldValue(buffer, obj, fieldAccessor, dispatchId) && (nullable - ? readBasicNullableObjectFieldValue(fory, buffer, obj, fieldAccessor, classId) - : readBasicObjectFieldValue(fory, buffer, obj, fieldAccessor, classId))) { + ? readBasicNullableObjectFieldValue(fory, buffer, obj, fieldAccessor, dispatchId) + : readBasicObjectFieldValue(fory, buffer, obj, fieldAccessor, dispatchId))) { Object fieldValue = readFinalObjectFieldValue(binding, refResolver, typeResolver, fieldInfo, buffer); fieldAccessor.putObject(obj, fieldValue); diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/PrimitiveSerializers.java b/java/fory-core/src/main/java/org/apache/fory/serializer/PrimitiveSerializers.java index aecc0b56b8..0affca6517 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/PrimitiveSerializers.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/PrimitiveSerializers.java @@ -197,11 +197,12 @@ public Long read(MemoryBuffer buffer) { public static Expression writeInt64( Expression buffer, Expression v, LongEncoding longEncoding, boolean ensureBounds) { switch (longEncoding) { - case LE_RAW_BYTES: + case FIXED: return new Invoke(buffer, "writeInt64", v); - case SLI: - return new Invoke(buffer, ensureBounds ? "writeSliInt64" : "_unsafeWriteSliInt64", v); - case PVL: + case TAGGED: + return new Invoke( + buffer, ensureBounds ? "writeTaggedInt64" : "_unsafeWriteTaggedInt64", v); + case VARINT: return new Invoke(buffer, ensureBounds ? "writeVarInt64" : "_unsafeWriteVarInt64", v); default: throw new UnsupportedOperationException("Unsupported long encoding " + longEncoding); @@ -209,9 +210,9 @@ public static Expression writeInt64( } public static void writeInt64(MemoryBuffer buffer, long value, LongEncoding longEncoding) { - if (longEncoding == LongEncoding.SLI) { - buffer.writeSliInt64(value); - } else if (longEncoding == LongEncoding.LE_RAW_BYTES) { + if (longEncoding == LongEncoding.TAGGED) { + buffer.writeTaggedInt64(value); + } else if (longEncoding == LongEncoding.FIXED) { buffer.writeInt64(value); } else { buffer.writeVarInt64(value); @@ -219,9 +220,9 @@ public static void writeInt64(MemoryBuffer buffer, long value, LongEncoding long } public static long readInt64(MemoryBuffer buffer, LongEncoding longEncoding) { - if (longEncoding == LongEncoding.SLI) { - return buffer.readSliInt64(); - } else if (longEncoding == LongEncoding.LE_RAW_BYTES) { + if (longEncoding == LongEncoding.TAGGED) { + return buffer.readTaggedInt64(); + } else if (longEncoding == LongEncoding.FIXED) { return buffer.readInt64(); } else { return buffer.readVarInt64(); @@ -234,11 +235,11 @@ public static Expression readInt64(Expression buffer, LongEncoding longEncoding) public static String readLongFunc(LongEncoding longEncoding) { switch (longEncoding) { - case LE_RAW_BYTES: + case FIXED: return Platform.IS_LITTLE_ENDIAN ? "_readInt64OnLE" : "_readInt64OnBE"; - case SLI: - return Platform.IS_LITTLE_ENDIAN ? "_readSliInt64OnLE" : "_readSliInt64OnBE"; - case PVL: + case TAGGED: + return Platform.IS_LITTLE_ENDIAN ? "_readTaggedInt64OnLE" : "_readTaggedInt64OnBE"; + case VARINT: return Platform.IS_LITTLE_ENDIAN ? "_readVarInt64OnLE" : "_readVarInt64OnBE"; default: throw new UnsupportedOperationException("Unsupported long encoding " + longEncoding); diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/Serializers.java b/java/fory-core/src/main/java/org/apache/fory/serializer/Serializers.java index a75da3ded4..46516854da 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/Serializers.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/Serializers.java @@ -50,6 +50,7 @@ import org.apache.fory.meta.ClassDef; import org.apache.fory.reflect.ReflectionUtils; import org.apache.fory.resolver.ClassResolver; +import org.apache.fory.type.DispatchId; import org.apache.fory.util.ExceptionUtils; import org.apache.fory.util.GraalvmSupport; import org.apache.fory.util.GraalvmSupport.GraalvmSerializerHolder; @@ -176,32 +177,97 @@ private static Serializer createSerializer( } } - public static Object readPrimitiveValue(Fory fory, MemoryBuffer buffer, short classId) { - switch (classId) { - case ClassResolver.PRIMITIVE_BOOLEAN_CLASS_ID: + public static Object readPrimitiveValue(Fory fory, MemoryBuffer buffer, int dispatchId) { + switch (dispatchId) { + case DispatchId.PRIMITIVE_BOOL: return buffer.readBoolean(); - case ClassResolver.PRIMITIVE_BYTE_CLASS_ID: + case DispatchId.PRIMITIVE_INT8: + case DispatchId.PRIMITIVE_UINT8: return buffer.readByte(); - case ClassResolver.PRIMITIVE_CHAR_CLASS_ID: + case DispatchId.PRIMITIVE_CHAR: return buffer.readChar(); - case ClassResolver.PRIMITIVE_SHORT_CLASS_ID: + case DispatchId.PRIMITIVE_INT16: + case DispatchId.PRIMITIVE_UINT16: return buffer.readInt16(); - case ClassResolver.PRIMITIVE_INT_CLASS_ID: - if (fory.compressInt()) { - return buffer.readVarInt32(); - } else { - return buffer.readInt32(); - } - case ClassResolver.PRIMITIVE_FLOAT_CLASS_ID: + case DispatchId.PRIMITIVE_INT32: + return buffer.readInt32(); + case DispatchId.PRIMITIVE_VARINT32: + return buffer.readVarInt32(); + case DispatchId.PRIMITIVE_UINT32: + return buffer.readInt32(); + case DispatchId.PRIMITIVE_VAR_UINT32: + return buffer.readVarUint32(); + case DispatchId.PRIMITIVE_INT64: + return buffer.readInt64(); + case DispatchId.PRIMITIVE_VARINT64: + return buffer.readVarInt64(); + case DispatchId.PRIMITIVE_TAGGED_INT64: + return buffer.readTaggedInt64(); + case DispatchId.PRIMITIVE_UINT64: + return buffer.readInt64(); + case DispatchId.PRIMITIVE_VAR_UINT64: + return buffer.readVarUint64(); + case DispatchId.PRIMITIVE_TAGGED_UINT64: + return buffer.readTaggedUint64(); + case DispatchId.PRIMITIVE_FLOAT32: return buffer.readFloat32(); - case ClassResolver.PRIMITIVE_LONG_CLASS_ID: - return fory.readInt64(buffer); - case ClassResolver.PRIMITIVE_DOUBLE_CLASS_ID: + case DispatchId.PRIMITIVE_FLOAT64: return buffer.readFloat64(); default: - { - throw new IllegalStateException("unreachable"); - } + throw new IllegalStateException("unreachable"); + } + } + + public static void writePrimitiveValue(MemoryBuffer buffer, Object value, int dispatchId) { + switch (dispatchId) { + case DispatchId.PRIMITIVE_BOOL: + buffer.writeBoolean((Boolean) value); + break; + case DispatchId.PRIMITIVE_INT8: + case DispatchId.PRIMITIVE_UINT8: + buffer.writeByte((Byte) value); + break; + case DispatchId.PRIMITIVE_CHAR: + buffer.writeChar((Character) value); + break; + case DispatchId.PRIMITIVE_INT16: + case DispatchId.PRIMITIVE_UINT16: + buffer.writeInt16((Short) value); + break; + case DispatchId.PRIMITIVE_INT32: + case DispatchId.PRIMITIVE_UINT32: + buffer.writeInt32((Integer) value); + break; + case DispatchId.PRIMITIVE_VARINT32: + buffer.writeVarInt32((Integer) value); + break; + case DispatchId.PRIMITIVE_VAR_UINT32: + buffer.writeVarUint32((Integer) value); + break; + case DispatchId.PRIMITIVE_INT64: + case DispatchId.PRIMITIVE_UINT64: + buffer.writeInt64((Long) value); + break; + case DispatchId.PRIMITIVE_VARINT64: + buffer.writeVarInt64((Long) value); + break; + case DispatchId.PRIMITIVE_TAGGED_INT64: + buffer.writeTaggedInt64((Long) value); + break; + case DispatchId.PRIMITIVE_VAR_UINT64: + buffer.writeVarUint64((Long) value); + break; + case DispatchId.PRIMITIVE_TAGGED_UINT64: + buffer.writeTaggedUint64((Long) value); + break; + case DispatchId.PRIMITIVE_FLOAT32: + buffer.writeFloat32((Float) value); + break; + case DispatchId.PRIMITIVE_FLOAT64: + buffer.writeFloat64((Double) value); + break; + default: + throw new IllegalStateException("unreachable dispatchId: " + dispatchId); } } diff --git a/java/fory-core/src/main/java/org/apache/fory/serializer/struct/Fingerprint.java b/java/fory-core/src/main/java/org/apache/fory/serializer/struct/Fingerprint.java index 4da42be9ee..d852323cd1 100644 --- a/java/fory-core/src/main/java/org/apache/fory/serializer/struct/Fingerprint.java +++ b/java/fory-core/src/main/java/org/apache/fory/serializer/struct/Fingerprint.java @@ -70,7 +70,7 @@ public static String computeStructFingerprint(Fory fory, List descri List fieldInfos = new ArrayList<>(descriptors.size()); for (Descriptor descriptor : descriptors) { Class rawType = descriptor.getTypeRef().getRawType(); - int typeId = getTypeId(fory, rawType); + int typeId = getTypeId(fory, descriptor); // Get field identifier: tag ID if configured, otherwise snake_case name String fieldIdentifier; @@ -132,7 +132,8 @@ public static String computeStructFingerprint(Fory fory, List descri return builder.toString(); } - private static int getTypeId(Fory fory, Class cls) { + private static int getTypeId(Fory fory, Descriptor descriptor) { + Class cls = descriptor.getTypeRef().getRawType(); TypeResolver resolver = fory._getTypeResolver(); if (resolver.isSet(cls)) { return Types.SET; @@ -148,14 +149,9 @@ private static int getTypeId(Fory fory, Class cls) { if (classInfo == null) { return Types.UNKNOWN; } - int typeId; - if (fory.isCrossLanguage()) { - typeId = classInfo.getXtypeId(); - if (Types.isUserDefinedType((byte) typeId)) { - return Types.UNKNOWN; - } - } else { - typeId = classInfo.getClassId(); + int typeId = Types.getDescriptorTypeId(fory, descriptor); + if (Types.isUserDefinedType((byte) (typeId & 0xff))) { + return Types.UNKNOWN; } return typeId; } diff --git a/java/fory-core/src/main/java/org/apache/fory/type/Descriptor.java b/java/fory-core/src/main/java/org/apache/fory/type/Descriptor.java index 158c02968f..f41852e7cd 100644 --- a/java/fory-core/src/main/java/org/apache/fory/type/Descriptor.java +++ b/java/fory-core/src/main/java/org/apache/fory/type/Descriptor.java @@ -23,6 +23,7 @@ import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; +import java.lang.annotation.Annotation; import java.lang.reflect.Field; import java.lang.reflect.Member; import java.lang.reflect.Method; @@ -31,6 +32,7 @@ import java.util.Arrays; import java.util.Comparator; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; @@ -45,13 +47,18 @@ import org.apache.fory.annotation.Expose; import org.apache.fory.annotation.ForyField; import org.apache.fory.annotation.Ignore; +import org.apache.fory.annotation.Int32Type; +import org.apache.fory.annotation.Int64Type; import org.apache.fory.annotation.Internal; +import org.apache.fory.annotation.Uint16Type; +import org.apache.fory.annotation.Uint32Type; +import org.apache.fory.annotation.Uint64Type; +import org.apache.fory.annotation.Uint8Type; import org.apache.fory.collection.Collections; import org.apache.fory.collection.Tuple2; import org.apache.fory.memory.Platform; import org.apache.fory.reflect.TypeRef; import org.apache.fory.serializer.converter.FieldConverter; -import org.apache.fory.util.Preconditions; import org.apache.fory.util.StringUtils; import org.apache.fory.util.record.RecordComponent; import org.apache.fory.util.record.RecordUtils; @@ -88,6 +95,7 @@ public static void clearDescriptorCache() { private final Method readMethod; private final Method writeMethod; private final ForyField foryField; + private final Annotation typeAnnotation; private boolean nullable; // trackingRef should only be true if explicitly set to true via @ForyField(ref=true) // If no annotation or ref not specified, trackingRef stays false and type-based tracking applies @@ -107,6 +115,7 @@ public Descriptor(Field field, TypeRef typeRef, Method readMethod, Method wri this.writeMethod = writeMethod; this.typeRef = typeRef; this.foryField = this.field.getAnnotation(ForyField.class); + typeAnnotation = getAnnotation(field); if (!typeRef.isPrimitive()) { this.nullable = foryField == null || foryField.nullable(); } @@ -130,6 +139,7 @@ public Descriptor( this.readMethod = null; this.writeMethod = null; this.foryField = null; + typeAnnotation = null; this.nullable = nullable; this.trackingRef = trackingRef; } @@ -147,6 +157,7 @@ private Descriptor(Field field, Method readMethod) { this.readMethod = readMethod; this.writeMethod = null; this.foryField = this.field.getAnnotation(ForyField.class); + typeAnnotation = getAnnotation(field); if (!field.getType().isPrimitive()) { this.nullable = foryField == null || foryField.nullable(); } @@ -165,6 +176,7 @@ private Descriptor(Method readMethod) { this.readMethod = readMethod; this.writeMethod = null; this.foryField = readMethod.getAnnotation(ForyField.class); + typeAnnotation = getAnnotation(readMethod.getDeclaredAnnotations(), readMethod.getName()); if (!readMethod.getReturnType().isPrimitive()) { this.nullable = foryField == null || foryField.nullable(); } @@ -182,6 +194,7 @@ public Descriptor(DescriptorBuilder builder) { this.writeMethod = builder.writeMethod; this.trackingRef = builder.trackingRef; this.foryField = this.field == null ? null : this.field.getAnnotation(ForyField.class); + typeAnnotation = field == null ? null : getAnnotation(field); // Use builder.nullable directly - this is set by DescriptorBuilder.nullable() // and should be respected, especially for xlang compatible mode where remote // TypeDef's nullable flag may differ from local field's nullable @@ -273,6 +286,10 @@ public ForyField.Morphic getMorphic() { return ForyField.Morphic.AUTO; } + public Annotation getTypeAnnotation() { + return typeAnnotation; + } + /** Try not use {@link TypeRef#getRawType()} since it's expensive. */ public Class getRawType() { Class type = this.type; @@ -340,7 +357,7 @@ public static List getDescriptors(Class clz) { public static SortedMap getDescriptorsMap(Class clz) { SortedMap allDescriptorsMap = getAllDescriptorsMap(clz); Map> duplicateNameFields = getDuplicateNames(allDescriptorsMap); - Preconditions.checkArgument( + checkArgument( duplicateNameFields.isEmpty(), "%s has duplicate fields %s", clz, duplicateNameFields); TreeMap map = new TreeMap<>(); allDescriptorsMap.forEach((k, v) -> map.put(k.getName(), v)); @@ -656,4 +673,35 @@ static SortedMap buildBeanedDescriptorsMap( // otherwise classes can't be gc. return descriptorMap; } + + private static final Set> typeAnnotationsTypes = new HashSet<>(); + + static { + typeAnnotationsTypes.add(Int32Type.class); + typeAnnotationsTypes.add(Int64Type.class); + typeAnnotationsTypes.add(Uint8Type.class); + typeAnnotationsTypes.add(Uint16Type.class); + typeAnnotationsTypes.add(Uint32Type.class); + typeAnnotationsTypes.add(Uint64Type.class); + } + + public static Annotation getAnnotation(Field field) { + return getAnnotation(field.getDeclaredAnnotations(), field.getName()); + } + + public static Annotation getAnnotation(Annotation[] declaredAnnotations, String name) { + Annotation typeAnnotation = null; + for (Annotation annotation : declaredAnnotations) { + if (typeAnnotationsTypes.contains(annotation.annotationType())) { + if (typeAnnotation != null) { + throw new IllegalStateException( + String.format( + "Multiple type annotation %s and %s found for %s!", + typeAnnotation, annotation.annotationType(), name)); + } + typeAnnotation = annotation; + } + } + return typeAnnotation; + } } diff --git a/java/fory-core/src/main/java/org/apache/fory/type/DescriptorGrouper.java b/java/fory-core/src/main/java/org/apache/fory/type/DescriptorGrouper.java index da25315d9b..2d232c784c 100644 --- a/java/fory-core/src/main/java/org/apache/fory/type/DescriptorGrouper.java +++ b/java/fory-core/src/main/java/org/apache/fory/type/DescriptorGrouper.java @@ -19,8 +19,6 @@ package org.apache.fory.type; -import static org.apache.fory.type.TypeUtils.getSizeOfPrimitiveType; - import java.lang.reflect.Method; import java.util.ArrayList; import java.util.Collection; @@ -29,7 +27,6 @@ import java.util.TreeSet; import java.util.function.Function; import java.util.function.Predicate; -import org.apache.fory.annotation.ForyField; import org.apache.fory.util.Preconditions; import org.apache.fory.util.record.RecordUtils; @@ -44,123 +41,12 @@ */ public class DescriptorGrouper { - /** - * Gets the sort key for a field descriptor. - * - *

    If the field has a {@link ForyField} annotation with id >= 0, returns the id as a string. - * Otherwise, returns the snake_case field name. This ensures fields are sorted by tag ID when - * configured, matching the fingerprint computation order. - * - * @param descriptor the field descriptor - * @return the sort key (tag ID as string or snake_case name) - */ - public static String getFieldSortKey(Descriptor descriptor) { - ForyField foryField = descriptor.getForyField(); - if (foryField != null && foryField.id() >= 0) { - return String.valueOf(foryField.id()); - } - return descriptor.getSnakeCaseName(); - } - - static final Comparator COMPARATOR_BY_PRIMITIVE_TYPE_ID = - (d1, d2) -> { - int c = - Types.getPrimitiveTypeId(TypeUtils.unwrap(d2.getRawType())) - - Types.getPrimitiveTypeId(TypeUtils.unwrap(d1.getRawType())); - if (c == 0) { - c = getFieldSortKey(d1).compareTo(getFieldSortKey(d2)); - if (c == 0) { - // Field name duplicate in super/child classes. - c = d1.getDeclaringClass().compareTo(d2.getDeclaringClass()); - if (c == 0) { - // Final tie-breaker: use actual field name to distinguish fields with same tag ID. - // This ensures TreeSet never treats different fields as duplicates. - c = d1.getName().compareTo(d2.getName()); - } - } - } - return c; - }; private final Collection descriptors; private final Predicate isBuildIn; private final Function descriptorUpdater; private final boolean descriptorsGroupedOrdered; private boolean sorted = false; - /** - * When compress disabled, sort primitive descriptors from largest to smallest, if size is the - * same, sort by field name to fix order. - * - *

    When compress enabled, sort primitive descriptors from largest to smallest but let compress - * fields ends in tail. if size is the same, sort by field name to fix order. - */ - public static Comparator getPrimitiveComparator( - boolean compressInt, boolean compressLong) { - if (!compressInt && !compressLong) { - // sort primitive descriptors from largest to smallest, if size is the same, - // sort by field name to fix order. - return (d1, d2) -> { - int c = - getSizeOfPrimitiveType(TypeUtils.unwrap(d2.getRawType())) - - getSizeOfPrimitiveType(TypeUtils.unwrap(d1.getRawType())); - if (c == 0) { - c = COMPARATOR_BY_PRIMITIVE_TYPE_ID.compare(d1, d2); - } - return c; - }; - } - return (d1, d2) -> { - Class t1 = TypeUtils.unwrap(d1.getRawType()); - Class t2 = TypeUtils.unwrap(d2.getRawType()); - boolean t1Compress = isCompressedType(t1, compressInt, compressLong); - boolean t2Compress = isCompressedType(t2, compressInt, compressLong); - if ((t1Compress && t2Compress) || (!t1Compress && !t2Compress)) { - int c = getSizeOfPrimitiveType(t2) - getSizeOfPrimitiveType(t1); - if (c == 0) { - c = COMPARATOR_BY_PRIMITIVE_TYPE_ID.compare(d1, d2); - } - return c; - } - if (t1Compress) { - return 1; - } - // t2 compress - return -1; - }; - } - - private static boolean isCompressedType(Class cls, boolean compressInt, boolean compressLong) { - cls = TypeUtils.unwrap(cls); - if (cls == int.class) { - return compressInt; - } - if (cls == long.class) { - return compressLong; - } - return false; - } - - /** Comparator based on field type, name/id and declaring class. */ - public static final Comparator COMPARATOR_BY_TYPE_AND_NAME = - (d1, d2) -> { - // sort by type so that we can hit class info cache more possibly. - // sort by field id/name to fix order if type is same. - int c = d1.getTypeName().compareTo(d2.getTypeName()); - if (c == 0) { - c = getFieldSortKey(d1).compareTo(getFieldSortKey(d2)); - if (c == 0) { - // Field name duplicate in super/child classes. - c = d1.getDeclaringClass().compareTo(d2.getDeclaringClass()); - if (c == 0) { - // Final tie-breaker: use actual field name to distinguish fields with same tag ID. - // This ensures TreeSet never treats different fields as duplicates. - c = d1.getName().compareTo(d2.getName()); - } - } - } - return c; - }; - private final Collection primitiveDescriptors; private final Collection boxedDescriptors; // The element type should be final. @@ -301,15 +187,14 @@ public static DescriptorGrouper createDescriptorGrouper( Collection descriptors, boolean descriptorsGroupedOrdered, Function descriptorUpdator, - boolean compressInt, - boolean compressLong, + Comparator primitiveComparator, Comparator comparator) { return new DescriptorGrouper( isBuildIn, descriptors, descriptorsGroupedOrdered, descriptorUpdator == null ? DescriptorGrouper::createDescriptor : descriptorUpdator, - getPrimitiveComparator(compressInt, compressLong), + primitiveComparator, comparator); } diff --git a/java/fory-core/src/main/java/org/apache/fory/type/DispatchId.java b/java/fory-core/src/main/java/org/apache/fory/type/DispatchId.java new file mode 100644 index 0000000000..0b12adeb2c --- /dev/null +++ b/java/fory-core/src/main/java/org/apache/fory/type/DispatchId.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.type; + +import org.apache.fory.Fory; +import org.apache.fory.meta.TypeExtMeta; +import org.apache.fory.reflect.TypeRef; +import org.apache.fory.resolver.ClassResolver; + +public class DispatchId { + public static final int UNKNOWN = 0; + public static final int PRIMITIVE_BOOL = 1; + public static final int PRIMITIVE_INT8 = 2; + public static final int PRIMITIVE_INT16 = 3; + public static final int PRIMITIVE_CHAR = 4; + public static final int PRIMITIVE_INT32 = 5; + public static final int PRIMITIVE_VARINT32 = 6; + public static final int PRIMITIVE_INT64 = 7; + public static final int PRIMITIVE_VARINT64 = 8; + public static final int PRIMITIVE_TAGGED_INT64 = 9; + public static final int PRIMITIVE_FLOAT32 = 10; + public static final int PRIMITIVE_FLOAT64 = 11; + public static final int PRIMITIVE_UINT8 = 12; + public static final int PRIMITIVE_UINT16 = 13; + public static final int PRIMITIVE_UINT32 = 14; + public static final int PRIMITIVE_VAR_UINT32 = 15; + public static final int PRIMITIVE_UINT64 = 16; + public static final int PRIMITIVE_VAR_UINT64 = 17; + public static final int PRIMITIVE_TAGGED_UINT64 = 18; + + public static final int BOOL = 19; + public static final int INT8 = 20; + public static final int CHAR = 21; + public static final int INT16 = 22; + public static final int INT32 = 23; + public static final int VARINT32 = 24; + public static final int INT64 = 25; + public static final int VARINT64 = 26; + public static final int TAGGED_INT64 = 27; + public static final int FLOAT32 = 28; + public static final int FLOAT64 = 29; + public static final int UINT8 = 30; + public static final int UINT16 = 31; + public static final int UINT32 = 32; + public static final int VAR_UINT32 = 33; + public static final int UINT64 = 34; + public static final int VAR_UINT64 = 35; + public static final int TAGGED_UINT64 = 36; + public static final int STRING = 37; + + public static int getDispatchId(Fory fory, Descriptor d) { + int typeId = Types.getDescriptorTypeId(fory, d); + TypeRef typeRef = d.getTypeRef(); + Class rawType = typeRef.getRawType(); + TypeExtMeta typeExtMeta = typeRef.getTypeExtMeta(); + // A field is treated as primitive for dispatch only if the Java type itself is primitive. + // Boxed types with nullable=false are still dispatched as boxed types, + // but serialized without null checks. + boolean isPrimitive = + typeRef.isPrimitive() + || (rawType.isPrimitive() && typeExtMeta != null && !typeExtMeta.nullable()); + if (fory.isCrossLanguage()) { + return xlangTypeIdToDispatchId(typeId, isPrimitive); + } else { + return nativeIdToDispatchId(typeId, d, isPrimitive); + } + } + + private static int xlangTypeIdToDispatchId(int typeId, boolean isPrimitive) { + switch (typeId) { + case Types.BOOL: + return isPrimitive ? PRIMITIVE_BOOL : BOOL; + case Types.INT8: + return isPrimitive ? PRIMITIVE_INT8 : INT8; + case Types.INT16: + return isPrimitive ? PRIMITIVE_INT16 : INT16; + case Types.INT32: + return isPrimitive ? PRIMITIVE_INT32 : INT32; + case Types.VARINT32: + return isPrimitive ? PRIMITIVE_VARINT32 : VARINT32; + case Types.INT64: + return isPrimitive ? PRIMITIVE_INT64 : INT64; + case Types.VARINT64: + return isPrimitive ? PRIMITIVE_VARINT64 : VARINT64; + case Types.TAGGED_INT64: + return isPrimitive ? PRIMITIVE_TAGGED_INT64 : TAGGED_INT64; + case Types.UINT8: + return isPrimitive ? PRIMITIVE_UINT8 : UINT8; + case Types.UINT16: + return isPrimitive ? PRIMITIVE_UINT16 : UINT16; + case Types.UINT32: + return isPrimitive ? PRIMITIVE_UINT32 : UINT32; + case Types.VAR_UINT32: + return isPrimitive ? PRIMITIVE_VAR_UINT32 : VAR_UINT32; + case Types.UINT64: + return isPrimitive ? PRIMITIVE_UINT64 : UINT64; + case Types.VAR_UINT64: + return isPrimitive ? PRIMITIVE_VAR_UINT64 : VAR_UINT64; + case Types.TAGGED_UINT64: + return isPrimitive ? PRIMITIVE_TAGGED_UINT64 : TAGGED_UINT64; + case Types.FLOAT32: + return isPrimitive ? PRIMITIVE_FLOAT32 : FLOAT32; + case Types.FLOAT64: + return isPrimitive ? PRIMITIVE_FLOAT64 : FLOAT64; + case Types.STRING: + return STRING; + default: + return UNKNOWN; + } + } + + private static int nativeIdToDispatchId( + int nativeId, Descriptor descriptor, boolean isPrimitive) { + if (nativeId >= Types.BOOL && nativeId <= ClassResolver.NATIVE_START_ID) { + return xlangTypeIdToDispatchId(nativeId, isPrimitive); + } + if (nativeId == ClassResolver.CHAR_ID) { + return isPrimitive ? PRIMITIVE_CHAR : CHAR; + } + if (nativeId == ClassResolver.PRIMITIVE_CHAR_ID) { + return PRIMITIVE_CHAR; + } + if (nativeId >= ClassResolver.PRIMITIVE_VOID_ID + && nativeId <= ClassResolver.PRIMITIVE_FLOAT64_ID) { + throw new IllegalArgumentException( + String.format( + "%s should use `Types.BOOL~Types.FLOAT64` with nullable meta instead, but got %s", + descriptor.getField(), nativeId)); + } + return xlangTypeIdToDispatchId(nativeId, isPrimitive); + } + + public static boolean isPrimitive(int dispatchId) { + return dispatchId >= PRIMITIVE_BOOL && dispatchId <= PRIMITIVE_TAGGED_UINT64; + } +} diff --git a/java/fory-core/src/main/java/org/apache/fory/type/FinalObjectTypeStub.java b/java/fory-core/src/main/java/org/apache/fory/type/FinalObjectTypeStub.java deleted file mode 100644 index 6e5b09bfd1..0000000000 --- a/java/fory-core/src/main/java/org/apache/fory/type/FinalObjectTypeStub.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.fory.type; - -import org.apache.fory.annotation.Internal; - -/** - * Stub class for object type which is final. - * - *

    {@link Object} class will be used if isn't final. No {@link - * org.apache.fory.resolver.ClassInfo} should be created for this class since it has no fields, and - * doesn't have consistent class structure as real class. - */ -@Internal -public final class FinalObjectTypeStub {} diff --git a/java/fory-core/src/main/java/org/apache/fory/type/TypeAnnotationUtils.java b/java/fory-core/src/main/java/org/apache/fory/type/TypeAnnotationUtils.java new file mode 100644 index 0000000000..3c521af989 --- /dev/null +++ b/java/fory-core/src/main/java/org/apache/fory/type/TypeAnnotationUtils.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.type; + +import java.lang.annotation.Annotation; +import org.apache.fory.annotation.Int32Type; +import org.apache.fory.annotation.Int64Type; +import org.apache.fory.annotation.Uint16Type; +import org.apache.fory.annotation.Uint32Type; +import org.apache.fory.annotation.Uint64Type; +import org.apache.fory.annotation.Uint8Type; + +public class TypeAnnotationUtils { + + /** + * Get the type id for the given type annotation and validate it against the field type. + * + * @param typeAnnotation the type annotation + * @param fieldType the field type class + * @return the type id + * @throws IllegalArgumentException if the annotation is not compatible with the field type + */ + public static int getTypeId(Annotation typeAnnotation, Class fieldType) { + if (typeAnnotation == null) { + return Types.UNKNOWN; + } + if (typeAnnotation instanceof Uint8Type) { + checkFieldType(fieldType, "@Uint8Type", byte.class, Byte.class); + return Types.UINT8; + } else if (typeAnnotation instanceof Uint16Type) { + checkFieldType(fieldType, "@Uint16Type", short.class, Short.class); + return Types.UINT16; + } else if (typeAnnotation instanceof Uint32Type) { + checkFieldType(fieldType, "@Uint32Type", int.class, Integer.class); + Uint32Type uint32Type = (Uint32Type) typeAnnotation; + return uint32Type.compress() ? Types.VAR_UINT32 : Types.UINT32; + } else if (typeAnnotation instanceof Uint64Type) { + checkFieldType(fieldType, "@Uint64Type", long.class, Long.class); + Uint64Type uint64Type = (Uint64Type) typeAnnotation; + switch (uint64Type.encoding()) { + case VARINT: + return Types.VAR_UINT64; + case FIXED: + return Types.UINT64; + case TAGGED: + return Types.TAGGED_UINT64; + default: + throw new IllegalArgumentException("Unsupported encoding: " + uint64Type.encoding()); + } + } else if (typeAnnotation instanceof Int32Type) { + checkFieldType(fieldType, "@Int32Type", int.class, Integer.class); + Int32Type int32Type = (Int32Type) typeAnnotation; + return int32Type.compress() ? Types.VARINT32 : Types.INT32; + } else if (typeAnnotation instanceof Int64Type) { + checkFieldType(fieldType, "@Int64Type", long.class, Long.class); + Int64Type int64Type = (Int64Type) typeAnnotation; + switch (int64Type.encoding()) { + case VARINT: + return Types.VARINT64; + case FIXED: + return Types.INT64; + case TAGGED: + return Types.TAGGED_INT64; + default: + throw new IllegalArgumentException("Unsupported encoding: " + int64Type.encoding()); + } + } + throw new IllegalArgumentException("Unsupported type annotation: " + typeAnnotation.getClass()); + } + + private static void checkFieldType( + Class fieldType, String annotationName, Class... allowedTypes) { + for (Class allowedType : allowedTypes) { + if (fieldType == allowedType) { + return; + } + } + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < allowedTypes.length; i++) { + if (i > 0) { + sb.append(" or "); + } + sb.append(allowedTypes[i].getSimpleName()); + } + throw new IllegalArgumentException( + annotationName + " can only be applied to " + sb + " fields, but got " + fieldType); + } +} diff --git a/java/fory-core/src/main/java/org/apache/fory/type/TypeUtils.java b/java/fory-core/src/main/java/org/apache/fory/type/TypeUtils.java index ad5ec87a4f..10039842d5 100644 --- a/java/fory-core/src/main/java/org/apache/fory/type/TypeUtils.java +++ b/java/fory-core/src/main/java/org/apache/fory/type/TypeUtils.java @@ -69,6 +69,7 @@ import java.util.stream.Collectors; import org.apache.fory.collection.IdentityMap; import org.apache.fory.collection.Tuple2; +import org.apache.fory.meta.TypeExtMeta; import org.apache.fory.reflect.ReflectionUtils; import org.apache.fory.reflect.TypeParameter; import org.apache.fory.reflect.TypeRef; @@ -264,6 +265,9 @@ public static Class wrap(Class clz) { } public static Class unwrap(Class clz) { + if (clz == null) { + return null; + } if (clz.isPrimitive()) { return clz; } @@ -271,6 +275,9 @@ public static Class unwrap(Class clz) { } public static Class boxedType(Class clz) { + if (clz == null) { + return null; + } if (!clz.isPrimitive()) { return clz; } @@ -552,12 +559,12 @@ public static TypeRef> collectionOf(TypeRef elemType) { return new TypeRef>() {}.where(new TypeParameter() {}, elemType); } - public static TypeRef> collectionOf(TypeRef elemType, Object extMeta) { + public static TypeRef> collectionOf(TypeRef elemType, TypeExtMeta extMeta) { return new TypeRef>(extMeta) {}.where(new TypeParameter() {}, elemType); } public static TypeRef> collectionOf( - Class collectionType, TypeRef elemType, Object extMeta) { + Class collectionType, TypeRef elemType, TypeExtMeta extMeta) { return new TypeRef>(extMeta) {}.where(new TypeParameter() {}, elemType) .getSubtype(collectionType); } @@ -572,13 +579,13 @@ public static TypeRef> mapOf(TypeRef keyType, TypeRef val } public static TypeRef> mapOf( - TypeRef keyType, TypeRef valueType, Object extMeta) { + TypeRef keyType, TypeRef valueType, TypeExtMeta extMeta) { return new TypeRef>(extMeta) {}.where(new TypeParameter() {}, keyType) .where(new TypeParameter() {}, valueType); } public static TypeRef> mapOf( - Class mapType, TypeRef keyType, TypeRef valueType, Object extMeta) { + Class mapType, TypeRef keyType, TypeRef valueType, TypeExtMeta extMeta) { TypeRef> mapTypeRef = new TypeRef>(extMeta) {}.where(new TypeParameter() {}, keyType) .where(new TypeParameter() {}, valueType); diff --git a/java/fory-core/src/main/java/org/apache/fory/type/Types.java b/java/fory-core/src/main/java/org/apache/fory/type/Types.java index 0048a8975a..b20d03a3ed 100644 --- a/java/fory-core/src/main/java/org/apache/fory/type/Types.java +++ b/java/fory-core/src/main/java/org/apache/fory/type/Types.java @@ -19,9 +19,13 @@ package org.apache.fory.type; -import static org.apache.fory.collection.Collections.ofHashMap; - -import java.util.Map; +import java.lang.annotation.Annotation; +import java.lang.reflect.Field; +import org.apache.fory.Fory; +import org.apache.fory.meta.TypeExtMeta; +import org.apache.fory.reflect.TypeRef; +import org.apache.fory.resolver.ClassInfo; +import org.apache.fory.resolver.ClassResolver; import org.apache.fory.util.Preconditions; public class Types { @@ -42,16 +46,16 @@ public class Types { public static final int INT32 = 4; /** var32: a 32-bit signed integer which uses fory var_int32 encoding. */ - public static final int VAR32 = 5; + public static final int VARINT32 = 5; /** int64: a 64-bit signed integer. */ public static final int INT64 = 6; /** var64: a 64-bit signed integer which uses fory PVL encoding. */ - public static final int VAR64 = 7; + public static final int VARINT64 = 7; - /** h64: a 64-bit signed integer which uses fory hybrid encoding. */ - public static final int H64 = 8; + /** tagged_int64: a 64-bit signed integer which uses fory hybrid encoding. */ + public static final int TAGGED_INT64 = 8; /** uint8: an 8-bit unsigned integer. */ public static final int UINT8 = 9; @@ -62,17 +66,17 @@ public class Types { /** uint32: a 32-bit unsigned integer. */ public static final int UINT32 = 11; - /** varu32: a 32-bit unsigned integer which uses fory var_uint32 encoding. */ - public static final int VARU32 = 12; + /** var_uint32: a 32-bit unsigned integer which uses fory var_uint32 encoding. */ + public static final int VAR_UINT32 = 12; /** uint64: a 64-bit unsigned integer. */ public static final int UINT64 = 13; - /** varu64: a 64-bit unsigned integer which uses fory var_uint64 encoding. */ - public static final int VARU64 = 14; + /** var_uint64: a 64-bit unsigned integer which uses fory var_uint64 encoding. */ + public static final int VAR_UINT64 = 14; - /** hu64: a 64-bit unsigned integer which uses fory hybrid encoding. */ - public static final int HU64 = 15; + /** tagged_uint64: a 64-bit unsigned integer which uses fory tagged int64 encoding. */ + public static final int TAGGED_UINT64 = 15; /** float16: a 16-bit floating point number. */ public static final int FLOAT16 = 16; @@ -237,35 +241,8 @@ public static boolean isUserDefinedType(byte typeId) { return isStructType(typeId) || isExtType(typeId) || isEnumType(typeId); } - private static final Map PRIMITIVE_TYPE_ID_MAP = - ofHashMap( - boolean.class, BOOL, - byte.class, INT8, - short.class, INT16, - int.class, INT32, - long.class, INT64, - float.class, FLOAT32, - double.class, FLOAT64); - - public static int getPrimitiveTypeId(Class cls) { - Preconditions.checkArgument(cls.isPrimitive(), "Class %s is not primitive", cls); - return PRIMITIVE_TYPE_ID_MAP.getOrDefault(cls, -1); - } - public static boolean isPrimitiveType(int typeId) { - // noinspection Duplicates - switch (typeId) { - case BOOL: - case INT8: - case INT16: - case INT32: - case INT64: - case FLOAT32: - case FLOAT64: - return true; - default: - return false; - } + return typeId >= BOOL && typeId <= FLOAT64; } public static boolean isPrimitiveArray(int typeId) { @@ -320,4 +297,108 @@ public static int getPrimitiveArrayTypeId(int typeId) { String.format("Type id %d is not a primitive id", typeId)); } } + + public static int getDescriptorTypeId(Fory fory, Field field) { + Annotation annotation = Descriptor.getAnnotation(field); + Class rawType = field.getType(); + if (annotation != null) { + return TypeAnnotationUtils.getTypeId(annotation, rawType); + } else { + return getTypeId(fory, rawType); + } + } + + public static int getDescriptorTypeId(Fory fory, Descriptor d) { + TypeRef typeRef = d.getTypeRef(); + TypeExtMeta extMeta = typeRef.getTypeExtMeta(); + if (extMeta != null) { + return extMeta.typeId(); + } else { + Class rawType = typeRef.getRawType(); + Annotation typeAnnotation = d.getTypeAnnotation(); + if (typeAnnotation != null) { + return TypeAnnotationUtils.getTypeId(typeAnnotation, rawType); + } else { + return getTypeId(fory, rawType); + } + } + } + + public static int getTypeId(Fory fory, Class clz) { + Class unwrapped = TypeUtils.unwrap(clz); + if (unwrapped == char.class) { + Preconditions.checkArgument(!fory.isCrossLanguage(), "Char is not support for xlang"); + return clz.isPrimitive() ? ClassResolver.PRIMITIVE_CHAR_ID : ClassResolver.CHAR_ID; + } + if (unwrapped.isPrimitive()) { + if (unwrapped == boolean.class) { + return Types.BOOL; + } else if (unwrapped == byte.class) { + return Types.INT8; + } else if (unwrapped == short.class) { + return Types.INT16; + } else if (unwrapped == int.class) { + return fory.compressInt() ? Types.VARINT32 : Types.INT32; + } else if (unwrapped == long.class) { + return fory.compressLong() ? Types.VARINT64 : Types.INT64; + } else if (unwrapped == float.class) { + return Types.FLOAT32; + } else if (unwrapped == double.class) { + return Types.FLOAT64; + } + } + ClassInfo classInfo = fory._getTypeResolver().getClassInfo(clz, false); + if (classInfo != null) { + return fory.isCrossLanguage() ? classInfo.getXtypeId() : classInfo.getClassId(); + } + return Types.UNKNOWN; + } + + public static Class getClassForTypeId(int typeId) { + switch (typeId) { + case BOOL: + return Boolean.class; + case INT8: + case UINT8: + return Byte.class; + case INT16: + case UINT16: + return Short.class; + case INT32: + case VARINT32: + case UINT32: + case VAR_UINT32: + return Integer.class; + case INT64: + case VARINT64: + case TAGGED_INT64: + case UINT64: + case VAR_UINT64: + case TAGGED_UINT64: + return Long.class; + case FLOAT16: + case FLOAT32: + return Float.class; + case FLOAT64: + return Double.class; + case STRING: + return String.class; + default: + return null; + } + } + + public static boolean isCompressedType(int typeId) { + switch (typeId) { + case VARINT32: + case VAR_UINT32: + case VARINT64: + case VAR_UINT64: + case TAGGED_INT64: + case TAGGED_UINT64: + return true; + default: + return false; + } + } } diff --git a/java/fory-core/src/main/java/org/apache/fory/util/DefaultValueUtils.java b/java/fory-core/src/main/java/org/apache/fory/util/DefaultValueUtils.java index 892f78737a..2ce6518ebe 100644 --- a/java/fory-core/src/main/java/org/apache/fory/util/DefaultValueUtils.java +++ b/java/fory-core/src/main/java/org/apache/fory/util/DefaultValueUtils.java @@ -34,9 +34,9 @@ import org.apache.fory.logging.LoggerFactory; import org.apache.fory.memory.Platform; import org.apache.fory.reflect.FieldAccessor; -import org.apache.fory.resolver.ClassResolver; import org.apache.fory.type.ScalaTypes; import org.apache.fory.type.TypeUtils; +import org.apache.fory.type.Types; import org.apache.fory.util.unsafe._JDKAccess; /** @@ -61,14 +61,14 @@ public static final class DefaultValueField { private final Object defaultValue; private final String fieldName; private final FieldAccessor fieldAccessor; - private final short classId; + private final int dispatchId; private DefaultValueField( - String fieldName, Object defaultValue, FieldAccessor fieldAccessor, short classId) { + String fieldName, Object defaultValue, FieldAccessor fieldAccessor, int dispatchId) { this.fieldName = fieldName; this.defaultValue = defaultValue; this.fieldAccessor = fieldAccessor; - this.classId = classId; + this.dispatchId = dispatchId; } public Object getDefaultValue() { @@ -83,8 +83,8 @@ public FieldAccessor getFieldAccessor() { return fieldAccessor; } - public short getClassId() { - return classId; + public int getDispatchId() { + return dispatchId; } } @@ -131,13 +131,11 @@ public final DefaultValueField[] buildDefaultValueFields( if (defaultValue != null && TypeUtils.wrap(field.getType()).isAssignableFrom(defaultValue.getClass())) { FieldAccessor fieldAccessor = FieldAccessor.createAccessor(field); - Short classId = fory.getClassResolver().getRegisteredClassId(field.getType()); + int dispatchId = Types.getTypeId(fory, field.getType()); + // Convert value to correct type once during initialization + Object convertedValue = convertToType(defaultValue, dispatchId); defaultFields.add( - new DefaultValueField( - fieldName, - defaultValue, - fieldAccessor, - classId != null ? classId : ClassResolver.NO_CLASS_ID)); + new DefaultValueField(fieldName, convertedValue, fieldAccessor, dispatchId)); } } } @@ -359,6 +357,31 @@ private static Map getDefaultValuesForRegularScalaClass(Class Integer.MAX_VALUE should use 9 bytes + checkTaggedUint64(buf(i), (long) Integer.MAX_VALUE + 1, 9); + checkTaggedUint64(buf(i), 1L << 31, 9); + checkTaggedUint64(buf(i), 1L << 32, 9); + checkTaggedUint64(buf(i), 1L << 62, 9); + checkTaggedUint64(buf(i), Long.MAX_VALUE, 9); + // Negative values (large unsigned) should use 9 bytes + checkTaggedUint64(buf(i), -1, 9); + checkTaggedUint64(buf(i), -1L << 30, 9); + checkTaggedUint64(buf(i), Integer.MIN_VALUE, 9); + checkTaggedUint64(buf(i), Long.MIN_VALUE, 9); + } + } + } + + private void checkTaggedUint64(MemoryBuffer buf, long value, int bytesWritten) { + int readerIndex = buf.readerIndex(); + assertEquals(buf.writerIndex(), readerIndex); + int actualBytesWritten = buf.writeTaggedUint64(value); + assertEquals(actualBytesWritten, bytesWritten); + long varLong = buf.readTaggedUint64(); + assertEquals(buf.writerIndex(), buf.readerIndex()); + assertEquals(value, varLong); + assertEquals(buf.slice(readerIndex, buf.readerIndex() - readerIndex).readTaggedUint64(), value); } @Test diff --git a/java/fory-core/src/test/java/org/apache/fory/meta/ClassDefTest.java b/java/fory-core/src/test/java/org/apache/fory/meta/ClassDefTest.java index 25a57d0f5d..c6b2048f40 100644 --- a/java/fory-core/src/test/java/org/apache/fory/meta/ClassDefTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/meta/ClassDefTest.java @@ -40,6 +40,7 @@ import org.apache.fory.resolver.ClassResolver; import org.apache.fory.test.bean.Foo; import org.apache.fory.type.Descriptor; +import org.apache.fory.type.Types; import org.testng.Assert; import org.testng.annotations.Test; @@ -188,8 +189,12 @@ public void testInterface() { public void testTypeExtInfo() { Fory fory = Fory.builder().withMetaShare(true).build(); ClassResolver classResolver = fory.getClassResolver(); - assertTrue(classResolver.needToWriteRef(TypeRef.of(Foo.class, new TypeExtMeta(true, true)))); - assertFalse(classResolver.needToWriteRef(TypeRef.of(Foo.class, new TypeExtMeta(true, false)))); + assertTrue( + classResolver.needToWriteRef( + TypeRef.of(Foo.class, new TypeExtMeta(Types.STRUCT, true, true)))); + assertFalse( + classResolver.needToWriteRef( + TypeRef.of(Foo.class, new TypeExtMeta(Types.STRUCT, true, false)))); } // Test classes for duplicate tag ID validation diff --git a/java/fory-core/src/test/java/org/apache/fory/resolver/ClassResolverTest.java b/java/fory-core/src/test/java/org/apache/fory/resolver/ClassResolverTest.java index 6c088cfdd3..66620152a7 100644 --- a/java/fory-core/src/test/java/org/apache/fory/resolver/ClassResolverTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/resolver/ClassResolverTest.java @@ -24,7 +24,6 @@ import static org.testng.Assert.assertSame; import static org.testng.Assert.assertTrue; -import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.primitives.Primitives; import java.nio.charset.StandardCharsets; @@ -74,19 +73,21 @@ public class ClassResolverTest extends ForyTestBase { public void testPrimitivesClassId() { Fory fory = Fory.builder().withLanguage(Language.JAVA).requireClassRegistration(false).build(); ClassResolver classResolver = fory.getClassResolver(); - for (List> classes : - ImmutableList.of( - TypeUtils.getSortedPrimitiveClasses(), TypeUtils.getSortedBoxedClasses())) { - for (int i = 0; i < classes.size() - 1; i++) { - assertEquals( - classResolver.getRegisteredClassId(classes.get(i)) + 1, - classResolver.getRegisteredClassId(classes.get(i + 1)).shortValue()); - assertTrue(classResolver.getRegisteredClassId(classes.get(i)) > 0); - } + // Test that primitive types have consecutive IDs + List> primitiveClasses = TypeUtils.getSortedPrimitiveClasses(); + for (int i = 0; i < primitiveClasses.size() - 1; i++) { assertEquals( - classResolver.getRegisteredClassId(classes.get(classes.size() - 2)) + 1, - classResolver.getRegisteredClassId(classes.get(classes.size() - 1)).shortValue()); - assertTrue(classResolver.getRegisteredClassId(classes.get(classes.size() - 1)) > 0); + classResolver.getRegisteredClassId(primitiveClasses.get(i)) + 1, + classResolver.getRegisteredClassId(primitiveClasses.get(i + 1)).shortValue()); + assertTrue(classResolver.getRegisteredClassId(primitiveClasses.get(i)) > 0); + } + assertTrue( + classResolver.getRegisteredClassId(primitiveClasses.get(primitiveClasses.size() - 1)) > 0); + // Test that boxed types all have valid positive IDs + // Note: boxed types are no longer consecutive due to unsigned type IDs being added + List> boxedClasses = TypeUtils.getSortedBoxedClasses(); + for (Class boxedClass : boxedClasses) { + assertTrue(classResolver.getRegisteredClassId(boxedClass) > 0); } } diff --git a/java/fory-core/src/test/java/org/apache/fory/serializer/ArraySerializersTest.java b/java/fory-core/src/test/java/org/apache/fory/serializer/ArraySerializersTest.java index 542e568294..fe817d916e 100644 --- a/java/fory-core/src/test/java/org/apache/fory/serializer/ArraySerializersTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/serializer/ArraySerializersTest.java @@ -376,7 +376,7 @@ public void testVariableLengthLongArray() { Fory.builder() .requireClassRegistration(false) .withLongArrayCompressed(true) - .withLongCompressed(LongEncoding.PVL) + .withLongCompressed(LongEncoding.VARINT) .build(); // Test empty array @@ -429,7 +429,7 @@ public void testVariableLengthEncodingEfficiencyForSmallValues() { Fory.builder() .requireClassRegistration(false) .withLongArrayCompressed(true) - .withLongCompressed(LongEncoding.PVL) + .withLongCompressed(LongEncoding.VARINT) .build(); // Create an array with many small values (0-127, which can be encoded in 1-2 bytes with varint) diff --git a/java/fory-core/src/test/java/org/apache/fory/serializer/CodegenCompatibleSerializerTest.java b/java/fory-core/src/test/java/org/apache/fory/serializer/CodegenCompatibleSerializerTest.java index d360852777..259a7fefa2 100644 --- a/java/fory-core/src/test/java/org/apache/fory/serializer/CodegenCompatibleSerializerTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/serializer/CodegenCompatibleSerializerTest.java @@ -54,8 +54,8 @@ public class CodegenCompatibleSerializerTest extends ForyTestBase { @DataProvider(name = "config") public static Object[][] config() { return Sets.cartesianProduct( - ImmutableSet.of(true, false), // referenceTracking - ImmutableSet.of(true, false)) // enable codegen + ImmutableSet.of(true), // referenceTracking + ImmutableSet.of(true)) // enable codegen .stream() .map(List::toArray) .toArray(Object[][]::new); diff --git a/java/fory-core/src/test/java/org/apache/fory/serializer/CompatibleFieldConvertTest.java b/java/fory-core/src/test/java/org/apache/fory/serializer/CompatibleFieldConvertTest.java new file mode 100644 index 0000000000..afe294274e --- /dev/null +++ b/java/fory-core/src/test/java/org/apache/fory/serializer/CompatibleFieldConvertTest.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.serializer; + +import com.google.common.collect.ImmutableSet; +import java.lang.reflect.Field; +import java.util.List; +import org.apache.fory.Fory; +import org.apache.fory.ForyTestBase; +import org.apache.fory.config.CompatibleMode; +import org.apache.fory.config.Language; +import org.apache.fory.reflect.ReflectionUtils; +import org.apache.fory.serializer.converter.FieldConverter; +import org.apache.fory.serializer.converter.FieldConverters; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class CompatibleFieldConvertTest extends ForyTestBase { + public static final class CompatibleFieldConvert1 { + public boolean ftrue; + public Boolean ffalse; + public byte f3; + public Byte f4; + public short f5; + public Short f6; + public int f7; + public Integer f8; + public long f9; + public Long f10; + public float f11; + public Float f12; + public double f13; + public Double f14; + + public String toString() { + return "" + ftrue + ffalse + f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10 + f11 + f12 + f13 + f14; + } + } + + public static final class CompatibleFieldConvert2 { + public Boolean ftrue; + public boolean ffalse; + public Byte f3; + public byte f4; + public Short f5; + public short f6; + public Integer f7; + public int f8; + public Long f9; + public long f10; + public Float f11; + public float f12; + public Double f13; + public double f14; + + public String toString() { + return "" + ftrue + ffalse + f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10 + f11 + f12 + f13 + f14; + } + } + + public static final class CompatibleFieldConvert3 { + public String ftrue; + public String ffalse; + public String f3; + public String f4; + public String f5; + public String f6; + public String f7; + public String f8; + public String f9; + public String f10; + public String f11; + public String f12; + public String f13; + public String f14; + + public String toString() { + return ftrue + ffalse + f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10 + f11 + f12 + f13 + f14; + } + } + + @Test(dataProvider = "language") + public void testCompatibleFieldConvert(Language language) throws Exception { + byte[] bytes; + Object o1; + ImmutableSet floatFields = ImmutableSet.of("f11", "f12", "f13", "f14"); + { + Class cls = CompatibleFieldConvert1.class; + o1 = cls.newInstance(); + for (Field field : ReflectionUtils.getSortedFields(cls, false)) { + String name = field.getName(); + field.setAccessible(true); + FieldConverter converter = FieldConverters.getConverter(String.class, field); + Assert.assertNotNull(converter); + Object converted = converter.convert(name.substring(1)); + field.set(o1, converted); + } + Fory fory = + builder().withLanguage(language).withCompatibleMode(CompatibleMode.COMPATIBLE).build(); + fory.register(cls); + bytes = fory.serialize(o1); + } + { + Class cls = CompatibleFieldConvert2.class; + Assert.assertNotEquals(o1.getClass(), cls); + Fory fory = + builder().withLanguage(language).withCompatibleMode(CompatibleMode.COMPATIBLE).build(); + fory.register(cls); + Object o = fory.deserialize(bytes); + Assert.assertEquals(o.getClass(), cls); + List fields = ReflectionUtils.getSortedFields(cls, false); + for (Field field : fields) { + field.setAccessible(true); + Object fieldValue = field.get(o); + if (fieldValue instanceof Float || fieldValue instanceof Double) { + Assert.assertEquals(fieldValue.toString(), field.getName().substring(1) + ".0"); + } else { + Assert.assertEquals( + fieldValue.toString(), field.getName().substring(1), field.getName() + " not equal"); + } + } + Assert.assertEquals(o.toString(), o1.toString()); + } + { + Fory fory = + builder().withLanguage(language).withCompatibleMode(CompatibleMode.COMPATIBLE).build(); + Class cls = CompatibleFieldConvert3.class; + Assert.assertNotEquals(o1.getClass(), cls); + fory.register(cls); + Object o = fory.deserialize(bytes); + Assert.assertEquals(o.getClass(), cls); + List fields = ReflectionUtils.getSortedFields(cls, false); + for (Field field : fields) { + field.setAccessible(true); + Object fieldValue = field.get(o); + if (floatFields.contains(field.getName())) { + Assert.assertEquals(fieldValue.toString(), field.getName().substring(1) + ".0"); + } else { + Assert.assertEquals(fieldValue.toString(), field.getName().substring(1)); + } + } + Assert.assertEquals(o.toString(), o1.toString()); + } + } +} diff --git a/java/fory-core/src/test/java/org/apache/fory/serializer/CompatibleSerializerTest.java b/java/fory-core/src/test/java/org/apache/fory/serializer/CompatibleSerializerTest.java index a16160ead5..39c305019f 100644 --- a/java/fory-core/src/test/java/org/apache/fory/serializer/CompatibleSerializerTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/serializer/CompatibleSerializerTest.java @@ -160,14 +160,15 @@ public void testWriteNestedMap() throws Exception { Assert.assertEquals(o1, o2); } - @Test - public void testWriteCompatibleContainer() throws Exception { + @Test(dataProvider = "enableCodegen") + public void testWriteCompatibleContainer(boolean enableCodegen) throws Exception { Fory fory = Fory.builder() .withLanguage(Language.JAVA) .withRefTracking(true) .withCompatibleMode(CompatibleMode.COMPATIBLE) .requireClassRegistration(false) + .withCodegen(enableCodegen) .build(); BeanA beanA = BeanA.createBeanA(2); Class cls = ClassUtils.createCompatibleClass1(); diff --git a/java/fory-core/src/test/java/org/apache/fory/serializer/MetaSharedCompatibleTest.java b/java/fory-core/src/test/java/org/apache/fory/serializer/MetaSharedCompatibleTest.java index f3bf8ce0d6..94715db221 100644 --- a/java/fory-core/src/test/java/org/apache/fory/serializer/MetaSharedCompatibleTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/serializer/MetaSharedCompatibleTest.java @@ -42,8 +42,6 @@ import org.apache.fory.reflect.ReflectionUtils; import org.apache.fory.resolver.MetaContext; import org.apache.fory.serializer.collection.UnmodifiableSerializersTest; -import org.apache.fory.serializer.converter.FieldConverter; -import org.apache.fory.serializer.converter.FieldConverters; import org.apache.fory.test.bean.BeanA; import org.apache.fory.test.bean.BeanB; import org.apache.fory.test.bean.CollectionFields; @@ -940,160 +938,4 @@ public void testInheritance() throws Exception { Assert.assertEquals(ReflectionUtils.getObjectFieldValue(o1, "f2"), 20); Assert.assertEquals(ReflectionUtils.getObjectFieldValue(o1, "f3"), 30); } - - @Test(dataProvider = "language") - public void testCompatibleFieldConvert(Language language) throws Exception { - byte[] bytes; - Object o1; - ImmutableSet floatFields = ImmutableSet.of("f11", "f12", "f13", "f14"); - { - CompileUnit compileUnit = - new CompileUnit( - "", - "CompatibleFieldConvert", - ("public final class CompatibleFieldConvert {\n" - + " public boolean ftrue;\n" - + " public Boolean ffalse;\n" - + " public byte f3;\n" - + " public Byte f4;\n" - + " public short f5;\n" - + " public Short f6;\n" - + " public int f7;\n" - + " public Integer f8;\n" - + " public long f9;\n" - + " public Long f10;\n" - + " public float f11;\n" - + " public Float f12;\n" - + " public double f13;\n" - + " public Double f14;\n" - + " public String toString() {return \"\" + ftrue + ffalse + " - + "f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10 + f11 + f12 + f13 + f14;}\n" - + "}")); - - ClassLoader classLoader = - JaninoUtils.compile(Thread.currentThread().getContextClassLoader(), compileUnit); - Class cls = classLoader.loadClass(compileUnit.getQualifiedClassName()); - o1 = cls.newInstance(); - for (Field field : ReflectionUtils.getSortedFields(cls, false)) { - String name = field.getName(); - field.setAccessible(true); - FieldConverter converter = FieldConverters.getConverter(String.class, field); - Assert.assertNotNull(converter); - Object converted = converter.convert(name.substring(1)); - field.set(o1, converted); - } - Fory fory = - builder() - .withLanguage(language) - .withCompatibleMode(CompatibleMode.COMPATIBLE) - .withClassLoader(classLoader) - .build(); - if (language == Language.XLANG) { - fory.register(cls); - } - bytes = fory.serialize(o1); - } - { - CompileUnit compileUnit = - new CompileUnit( - "", - "CompatibleFieldConvert", - ("public final class CompatibleFieldConvert {\n" - + " public Boolean ftrue;\n" - + " public boolean ffalse;\n" - + " public Byte f3;\n" - + " public byte f4;\n" - + " public Short f5;\n" - + " public short f6;\n" - + " public Integer f7;\n" - + " public int f8;\n" - + " public Long f9;\n" - + " public long f10;\n" - + " public Float f11;\n" - + " public float f12;\n" - + " public Double f13;\n" - + " public double f14;\n" - + " public String toString() {return \"\" + ftrue + ffalse + " - + "f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10 + f11 + f12 + f13 + f14;}\n" - + "}")); - ClassLoader classLoader = - JaninoUtils.compile(Thread.currentThread().getContextClassLoader(), compileUnit); - Class cls = classLoader.loadClass(compileUnit.getQualifiedClassName()); - Assert.assertNotEquals(cls, o1.getClass()); - Fory fory = - builder() - .withLanguage(language) - .withCompatibleMode(CompatibleMode.COMPATIBLE) - .withClassLoader(classLoader) - .build(); - if (language == Language.XLANG) { - fory.register(cls); - } - Object o = fory.deserialize(bytes); - Assert.assertEquals(o.getClass(), cls); - List fields = ReflectionUtils.getSortedFields(cls, false); - for (Field field : fields) { - field.setAccessible(true); - Object fieldValue = field.get(o); - if (fieldValue instanceof Float || fieldValue instanceof Double) { - Assert.assertEquals(fieldValue.toString(), field.getName().substring(1) + ".0"); - } else { - Assert.assertEquals( - fieldValue.toString(), field.getName().substring(1), field.getName() + " not equal"); - } - } - Assert.assertEquals(o.toString(), o1.toString()); - } - { - CompileUnit compileUnit = - new CompileUnit( - "", - "CompatibleFieldConvert", - ("public final class CompatibleFieldConvert {\n" - + " public String ftrue;\n" - + " public String ffalse;\n" - + " public String f3;\n" - + " public String f4;\n" - + " public String f5;\n" - + " public String f6;\n" - + " public String f7;\n" - + " public String f8;\n" - + " public String f9;\n" - + " public String f10;\n" - + " public String f11;\n" - + " public String f12;\n" - + " public String f13;\n" - + " public String f14;\n" - + " public String toString() {return \"\" + ftrue + ffalse + " - + "f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10 + f11 + f12 + f13 + f14;}\n" - + "}")); - - ClassLoader classLoader = - JaninoUtils.compile(Thread.currentThread().getContextClassLoader(), compileUnit); - Fory fory = - builder() - .withLanguage(language) - .withCompatibleMode(CompatibleMode.COMPATIBLE) - .withClassLoader(classLoader) - .build(); - Class cls = classLoader.loadClass(compileUnit.getQualifiedClassName()); - Assert.assertNotEquals(cls, o1.getClass()); - if (language == Language.XLANG) { - fory.register(cls); - } - Object o = fory.deserialize(bytes); - Assert.assertEquals(o.getClass(), cls); - List fields = ReflectionUtils.getSortedFields(cls, false); - for (Field field : fields) { - field.setAccessible(true); - Object fieldValue = field.get(o); - if (floatFields.contains(field.getName())) { - Assert.assertEquals(fieldValue.toString(), field.getName().substring(1) + ".0"); - } else { - Assert.assertEquals(fieldValue.toString(), field.getName().substring(1)); - } - } - Assert.assertEquals(o.toString(), o1.toString()); - } - } } diff --git a/java/fory-core/src/test/java/org/apache/fory/serializer/PrimitiveSerializersTest.java b/java/fory-core/src/test/java/org/apache/fory/serializer/PrimitiveSerializersTest.java index 7304783d04..5ef68ba197 100644 --- a/java/fory-core/src/test/java/org/apache/fory/serializer/PrimitiveSerializersTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/serializer/PrimitiveSerializersTest.java @@ -106,9 +106,11 @@ public void testPrimitiveStruct(boolean compressNumber, boolean codegen) { .withCodegen(codegen) .requireClassRegistration(false); serDeCheck( - builder.withNumberCompressed(true).withLongCompressed(LongEncoding.PVL).build(), struct); + builder.withNumberCompressed(true).withLongCompressed(LongEncoding.VARINT).build(), + struct); serDeCheck( - builder.withNumberCompressed(true).withLongCompressed(LongEncoding.SLI).build(), struct); + builder.withNumberCompressed(true).withLongCompressed(LongEncoding.TAGGED).build(), + struct); } else { Fory fory = Fory.builder() diff --git a/java/fory-core/src/test/java/org/apache/fory/serializer/UnsignedTest.java b/java/fory-core/src/test/java/org/apache/fory/serializer/UnsignedTest.java new file mode 100644 index 0000000000..676c3e4237 --- /dev/null +++ b/java/fory-core/src/test/java/org/apache/fory/serializer/UnsignedTest.java @@ -0,0 +1,903 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fory.serializer; + +import java.util.Objects; +import lombok.Data; +import org.apache.fory.Fory; +import org.apache.fory.ForyTestBase; +import org.apache.fory.annotation.ForyField; +import org.apache.fory.annotation.Uint16Type; +import org.apache.fory.annotation.Uint32Type; +import org.apache.fory.annotation.Uint64Type; +import org.apache.fory.annotation.Uint8Type; +import org.apache.fory.config.ForyBuilder; +import org.apache.fory.config.LongEncoding; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +/** + * Unsigned fields serialization tests for java native mode(xlang=false). + * + *

    Type annotation constraints: + * + *

      + *
    • {@code @Uint8Type} can only be applied to {@code byte} or {@code Byte} fields + *
    • {@code @Uint16Type} can only be applied to {@code short} or {@code Short} fields + *
    • {@code @Uint32Type} can only be applied to {@code int} or {@code Integer} fields + *
    • {@code @Uint64Type} can only be applied to {@code long} or {@code Long} fields + *
    + * + *

    The unsigned annotations indicate that the field should be treated as unsigned during + * serialization, allowing the full unsigned range of the type to be used. + */ +public class UnsignedTest extends ForyTestBase { + + // Max values for unsigned types (represented in their signed Java equivalents) + public static final byte UINT8_MAX = (byte) 255; // -1 as signed byte + public static final short UINT16_MAX = (short) 65535; // -1 as signed short + public static final int UINT32_MAX = (int) 4294967295L; // -1 as signed int + public static final long UINT64_MAX = -1L; // 0xFFFFFFFFFFFFFFFF as signed long + + // Mid-point values (at the signed/unsigned boundary) + public static final byte UINT8_MID = (byte) 128; // -128 as signed byte + public static final short UINT16_MID = (short) 32768; // -32768 as signed short + public static final int UINT32_MID = (int) 2147483648L; // Integer.MIN_VALUE as signed int + public static final long UINT64_MID = Long.MIN_VALUE; // 0x8000000000000000 + + @Data + public static class UnsignedSchemaConsistent { + @Uint8Type byte u8; + + @Uint16Type short u16; + + @Uint32Type(compress = true) + int u32Var; + + @Uint32Type(compress = false) + int u32Fixed; + + @Uint64Type(encoding = LongEncoding.VARINT) + long u64Var; + + @Uint64Type(encoding = LongEncoding.FIXED) + long u64Fixed; + + @Uint64Type(encoding = LongEncoding.TAGGED) + long u64Tagged; + + @ForyField(nullable = true) + @Uint8Type + Byte u8Nullable; + + @ForyField(nullable = true) + @Uint16Type + Short u16Nullable; + + @ForyField(nullable = true) + @Uint32Type(compress = true) + Integer u32VarNullable; + + @ForyField(nullable = true) + @Uint32Type(compress = false) + Integer u32FixedNullable; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.VARINT) + Long u64VarNullable; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.FIXED) + Long u64FixedNullable; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.TAGGED) + Long u64TaggedNullable; + } + + public static class UnsignedSchemaCompatible { + @Uint8Type byte u8; + + @Uint16Type short u16; + + @Uint32Type(compress = true) + int u32Var; + + @Uint32Type(compress = false) + int u32Fixed; + + @Uint64Type(encoding = LongEncoding.VARINT) + long u64Var; + + @Uint64Type(encoding = LongEncoding.FIXED) + long u64Fixed; + + @Uint64Type(encoding = LongEncoding.TAGGED) + long u64Tagged; + + @ForyField(nullable = true) + @Uint8Type + Byte u8Field2; + + @ForyField(nullable = true) + @Uint16Type + Short u16Field2; + + @ForyField(nullable = true) + @Uint32Type(compress = true) + Integer u32VarField2; + + @ForyField(nullable = true) + @Uint32Type(compress = false) + Integer u32FixedField2; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.VARINT) + Long u64VarField2; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.FIXED) + Long u64FixedField2; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.TAGGED) + Long u64TaggedField2; + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + UnsignedSchemaCompatible that = (UnsignedSchemaCompatible) o; + return u8 == that.u8 + && u16 == that.u16 + && u32Var == that.u32Var + && u32Fixed == that.u32Fixed + && u64Var == that.u64Var + && u64Fixed == that.u64Fixed + && u64Tagged == that.u64Tagged + && Objects.equals(u8Field2, that.u8Field2) + && Objects.equals(u16Field2, that.u16Field2) + && Objects.equals(u32VarField2, that.u32VarField2) + && Objects.equals(u32FixedField2, that.u32FixedField2) + && Objects.equals(u64VarField2, that.u64VarField2) + && Objects.equals(u64FixedField2, that.u64FixedField2) + && Objects.equals(u64TaggedField2, that.u64TaggedField2); + } + + @Override + public int hashCode() { + return Objects.hash( + u8, + u16, + u32Var, + u32Fixed, + u64Var, + u64Fixed, + u64Tagged, + u8Field2, + u16Field2, + u32VarField2, + u32FixedField2, + u64VarField2, + u64FixedField2, + u64TaggedField2); + } + } + + private static UnsignedSchemaConsistent createConsistentWithNormalValues() { + UnsignedSchemaConsistent obj = new UnsignedSchemaConsistent(); + obj.u8 = (byte) 200; // Unsigned 200 + obj.u16 = (short) 60000; // Unsigned 60000 + obj.u32Var = 2000000000; // Within signed int range + obj.u32Fixed = 2100000000; // Within signed int range + obj.u64Var = 10000000000L; + obj.u64Fixed = 15000000000L; + obj.u64Tagged = 1000000000L; + obj.u8Nullable = (byte) 128; // Unsigned 128 + obj.u16Nullable = (short) 40000; // Unsigned 40000 + obj.u32VarNullable = 1500000000; + obj.u32FixedNullable = 1800000000; + obj.u64VarNullable = 8000000000L; + obj.u64FixedNullable = 12000000000L; + obj.u64TaggedNullable = 500000000L; + return obj; + } + + private static UnsignedSchemaConsistent createConsistentWithZeroValues() { + UnsignedSchemaConsistent obj = new UnsignedSchemaConsistent(); + obj.u8 = 0; + obj.u16 = 0; + obj.u32Var = 0; + obj.u32Fixed = 0; + obj.u64Var = 0; + obj.u64Fixed = 0; + obj.u64Tagged = 0; + obj.u8Nullable = 0; + obj.u16Nullable = 0; + obj.u32VarNullable = 0; + obj.u32FixedNullable = 0; + obj.u64VarNullable = 0L; + obj.u64FixedNullable = 0L; + obj.u64TaggedNullable = 0L; + return obj; + } + + private static UnsignedSchemaConsistent createConsistentWithMaxValues() { + UnsignedSchemaConsistent obj = new UnsignedSchemaConsistent(); + obj.u8 = UINT8_MAX; + obj.u16 = UINT16_MAX; + obj.u32Var = UINT32_MAX; + obj.u32Fixed = UINT32_MAX; + obj.u64Var = UINT64_MAX; + obj.u64Fixed = UINT64_MAX; + obj.u64Tagged = UINT64_MAX; + obj.u8Nullable = UINT8_MAX; + obj.u16Nullable = UINT16_MAX; + obj.u32VarNullable = UINT32_MAX; + obj.u32FixedNullable = UINT32_MAX; + obj.u64VarNullable = UINT64_MAX; + obj.u64FixedNullable = UINT64_MAX; + obj.u64TaggedNullable = UINT64_MAX; + return obj; + } + + private static UnsignedSchemaConsistent createConsistentWithMidValues() { + UnsignedSchemaConsistent obj = new UnsignedSchemaConsistent(); + obj.u8 = UINT8_MID; + obj.u16 = UINT16_MID; + obj.u32Var = UINT32_MID; + obj.u32Fixed = UINT32_MID; + obj.u64Var = UINT64_MID; + obj.u64Fixed = UINT64_MID; + obj.u64Tagged = UINT64_MID; + obj.u8Nullable = UINT8_MID; + obj.u16Nullable = UINT16_MID; + obj.u32VarNullable = UINT32_MID; + obj.u32FixedNullable = UINT32_MID; + obj.u64VarNullable = UINT64_MID; + obj.u64FixedNullable = UINT64_MID; + obj.u64TaggedNullable = UINT64_MID; + return obj; + } + + private static UnsignedSchemaConsistent createConsistentWithNullValues() { + UnsignedSchemaConsistent obj = new UnsignedSchemaConsistent(); + obj.u8 = 100; + obj.u16 = 30000; + obj.u32Var = 1500000000; + obj.u32Fixed = 2000000000; + obj.u64Var = 5000000000L; + obj.u64Fixed = 7500000000L; + obj.u64Tagged = 250000000L; + obj.u8Nullable = null; + obj.u16Nullable = null; + obj.u32VarNullable = null; + obj.u32FixedNullable = null; + obj.u64VarNullable = null; + obj.u64FixedNullable = null; + obj.u64TaggedNullable = null; + return obj; + } + + private static UnsignedSchemaCompatible createCompatibleWithNormalValues() { + UnsignedSchemaCompatible obj = new UnsignedSchemaCompatible(); + obj.u8 = (byte) 200; + obj.u16 = (short) 60000; + obj.u32Var = 2000000000; + obj.u32Fixed = 2100000000; + obj.u64Var = 10000000000L; + obj.u64Fixed = 15000000000L; + obj.u64Tagged = 1000000000L; + obj.u8Field2 = (byte) 128; + obj.u16Field2 = (short) 40000; + obj.u32VarField2 = 1500000000; + obj.u32FixedField2 = 1800000000; + obj.u64VarField2 = 8000000000L; + obj.u64FixedField2 = 12000000000L; + obj.u64TaggedField2 = 500000000L; + return obj; + } + + private static UnsignedSchemaCompatible createCompatibleWithZeroValues() { + UnsignedSchemaCompatible obj = new UnsignedSchemaCompatible(); + obj.u8 = 0; + obj.u16 = 0; + obj.u32Var = 0; + obj.u32Fixed = 0; + obj.u64Var = 0; + obj.u64Fixed = 0; + obj.u64Tagged = 0; + obj.u8Field2 = 0; + obj.u16Field2 = 0; + obj.u32VarField2 = 0; + obj.u32FixedField2 = 0; + obj.u64VarField2 = 0L; + obj.u64FixedField2 = 0L; + obj.u64TaggedField2 = 0L; + return obj; + } + + private static UnsignedSchemaCompatible createCompatibleWithMaxValues() { + UnsignedSchemaCompatible obj = new UnsignedSchemaCompatible(); + obj.u8 = UINT8_MAX; + obj.u16 = UINT16_MAX; + obj.u32Var = UINT32_MAX; + obj.u32Fixed = UINT32_MAX; + obj.u64Var = UINT64_MAX; + obj.u64Fixed = UINT64_MAX; + obj.u64Tagged = UINT64_MAX; + obj.u8Field2 = UINT8_MAX; + obj.u16Field2 = UINT16_MAX; + obj.u32VarField2 = UINT32_MAX; + obj.u32FixedField2 = UINT32_MAX; + obj.u64VarField2 = UINT64_MAX; + obj.u64FixedField2 = UINT64_MAX; + obj.u64TaggedField2 = UINT64_MAX; + return obj; + } + + private static UnsignedSchemaCompatible createCompatibleWithMidValues() { + UnsignedSchemaCompatible obj = new UnsignedSchemaCompatible(); + obj.u8 = UINT8_MID; + obj.u16 = UINT16_MID; + obj.u32Var = UINT32_MID; + obj.u32Fixed = UINT32_MID; + obj.u64Var = UINT64_MID; + obj.u64Fixed = UINT64_MID; + obj.u64Tagged = UINT64_MID; + obj.u8Field2 = UINT8_MID; + obj.u16Field2 = UINT16_MID; + obj.u32VarField2 = UINT32_MID; + obj.u32FixedField2 = UINT32_MID; + obj.u64VarField2 = UINT64_MID; + obj.u64FixedField2 = UINT64_MID; + obj.u64TaggedField2 = UINT64_MID; + return obj; + } + + private static UnsignedSchemaCompatible createCompatibleWithNullValues() { + UnsignedSchemaCompatible obj = new UnsignedSchemaCompatible(); + obj.u8 = 100; + obj.u16 = 30000; + obj.u32Var = 1500000000; + obj.u32Fixed = 2000000000; + obj.u64Var = 5000000000L; + obj.u64Fixed = 7500000000L; + obj.u64Tagged = 250000000L; + obj.u8Field2 = null; + obj.u16Field2 = null; + obj.u32VarField2 = null; + obj.u32FixedField2 = null; + obj.u64VarField2 = null; + obj.u64FixedField2 = null; + obj.u64TaggedField2 = null; + return obj; + } + + @DataProvider + public static Object[][] javaForyConfig() { + return new Object[][] { + { + new ForyBuilder() + .withXlang(false) + .withCompatible(false) + .withCodegen(false) + .requireClassRegistration(false) + .build() + }, + { + new ForyBuilder() + .withXlang(false) + .withCompatible(false) + .withCodegen(true) + .requireClassRegistration(false) + .build() + }, + { + new ForyBuilder() + .withXlang(false) + .withCompatible(true) + .withCodegen(false) + .requireClassRegistration(false) + .build() + }, + { + new ForyBuilder() + .withXlang(false) + .withCompatible(true) + .withCodegen(true) + .requireClassRegistration(false) + .build() + } + }; + } + + @DataProvider(name = "fory") + public static Object[][] foryProvider() { + return javaForyConfig(); + } + + // Schema consistent tests + @Test(dataProvider = "fory") + public void testUnsignedSchemaConsistentNormalValues(Fory fory) { + serDeCheck(fory, createConsistentWithNormalValues()); + } + + @Test(dataProvider = "fory") + public void testUnsignedSchemaConsistentZeroValues(Fory fory) { + serDeCheck(fory, createConsistentWithZeroValues()); + } + + @Test(dataProvider = "fory") + public void testUnsignedSchemaConsistentMaxValues(Fory fory) { + serDeCheck(fory, createConsistentWithMaxValues()); + } + + @Test(dataProvider = "fory") + public void testUnsignedSchemaConsistentMidValues(Fory fory) { + serDeCheck(fory, createConsistentWithMidValues()); + } + + @Test(dataProvider = "fory") + public void testUnsignedSchemaConsistentNullValues(Fory fory) { + serDeCheck(fory, createConsistentWithNullValues()); + } + + // Schema compatible tests + @Test(dataProvider = "fory") + public void testUnsignedSchemaCompatibleNormalValues(Fory fory) { + serDeCheck(fory, createCompatibleWithNormalValues()); + } + + @Test(dataProvider = "fory") + public void testUnsignedSchemaCompatibleZeroValues(Fory fory) { + serDeCheck(fory, createCompatibleWithZeroValues()); + } + + @Test(dataProvider = "fory") + public void testUnsignedSchemaCompatibleMaxValues(Fory fory) { + serDeCheck(fory, createCompatibleWithMaxValues()); + } + + @Test(dataProvider = "fory") + public void testUnsignedSchemaCompatibleMidValues(Fory fory) { + serDeCheck(fory, createCompatibleWithMidValues()); + } + + @Test(dataProvider = "fory") + public void testUnsignedSchemaCompatibleNullValues(Fory fory) { + serDeCheck(fory, createCompatibleWithNullValues()); + } + + // Test specific edge cases for each unsigned type + public static class Uint8OnlyStruct { + @Uint8Type byte value; + + @ForyField(nullable = true) + @Uint8Type + Byte nullableValue; + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Uint8OnlyStruct that = (Uint8OnlyStruct) o; + return value == that.value && Objects.equals(nullableValue, that.nullableValue); + } + + @Override + public int hashCode() { + return Objects.hash(value, nullableValue); + } + } + + @Test(dataProvider = "fory") + public void testUint8EdgeCases(Fory fory) { + // Test 0 + Uint8OnlyStruct zero = new Uint8OnlyStruct(); + zero.value = 0; + zero.nullableValue = 0; + serDeCheck(fory, zero); + + // Test 1 + Uint8OnlyStruct one = new Uint8OnlyStruct(); + one.value = 1; + one.nullableValue = 1; + serDeCheck(fory, one); + + // Test 127 (max signed byte) + Uint8OnlyStruct maxSignedByte = new Uint8OnlyStruct(); + maxSignedByte.value = 127; + maxSignedByte.nullableValue = 127; + serDeCheck(fory, maxSignedByte); + + // Test 128 (unsigned, appears as -128 in signed byte) + Uint8OnlyStruct val128 = new Uint8OnlyStruct(); + val128.value = (byte) 128; + val128.nullableValue = (byte) 128; + serDeCheck(fory, val128); + + // Test 255 (max uint8, appears as -1 in signed byte) + Uint8OnlyStruct maxUint8 = new Uint8OnlyStruct(); + maxUint8.value = (byte) 255; + maxUint8.nullableValue = (byte) 255; + serDeCheck(fory, maxUint8); + + // Test null + Uint8OnlyStruct withNull = new Uint8OnlyStruct(); + withNull.value = (byte) 200; + withNull.nullableValue = null; + serDeCheck(fory, withNull); + } + + public static class Uint16OnlyStruct { + @Uint16Type short value; + + @ForyField(nullable = true) + @Uint16Type + Short nullableValue; + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Uint16OnlyStruct that = (Uint16OnlyStruct) o; + return value == that.value && Objects.equals(nullableValue, that.nullableValue); + } + + @Override + public int hashCode() { + return Objects.hash(value, nullableValue); + } + } + + @Test(dataProvider = "fory") + public void testUint16EdgeCases(Fory fory) { + // Test 0 + Uint16OnlyStruct zero = new Uint16OnlyStruct(); + zero.value = 0; + zero.nullableValue = 0; + serDeCheck(fory, zero); + + // Test 1 + Uint16OnlyStruct one = new Uint16OnlyStruct(); + one.value = 1; + one.nullableValue = 1; + serDeCheck(fory, one); + + // Test 32767 (max signed short) + Uint16OnlyStruct maxSignedShort = new Uint16OnlyStruct(); + maxSignedShort.value = 32767; + maxSignedShort.nullableValue = 32767; + serDeCheck(fory, maxSignedShort); + + // Test 32768 (unsigned, appears as -32768 in signed short) + Uint16OnlyStruct val32768 = new Uint16OnlyStruct(); + val32768.value = (short) 32768; + val32768.nullableValue = (short) 32768; + serDeCheck(fory, val32768); + + // Test 65535 (max uint16, appears as -1 in signed short) + Uint16OnlyStruct maxUint16 = new Uint16OnlyStruct(); + maxUint16.value = (short) 65535; + maxUint16.nullableValue = (short) 65535; + serDeCheck(fory, maxUint16); + + // Test null + Uint16OnlyStruct withNull = new Uint16OnlyStruct(); + withNull.value = (short) 50000; + withNull.nullableValue = null; + serDeCheck(fory, withNull); + } + + public static class Uint32OnlyStruct { + @Uint32Type(compress = true) + int varValue; + + @Uint32Type(compress = false) + int fixedValue; + + @ForyField(nullable = true) + @Uint32Type(compress = true) + Integer varNullableValue; + + @ForyField(nullable = true) + @Uint32Type(compress = false) + Integer fixedNullableValue; + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Uint32OnlyStruct that = (Uint32OnlyStruct) o; + return varValue == that.varValue + && fixedValue == that.fixedValue + && Objects.equals(varNullableValue, that.varNullableValue) + && Objects.equals(fixedNullableValue, that.fixedNullableValue); + } + + @Override + public int hashCode() { + return Objects.hash(varValue, fixedValue, varNullableValue, fixedNullableValue); + } + } + + @Test(dataProvider = "fory") + public void testUint32EdgeCases(Fory fory) { + // Test 0 + Uint32OnlyStruct zero = new Uint32OnlyStruct(); + zero.varValue = 0; + zero.fixedValue = 0; + zero.varNullableValue = 0; + zero.fixedNullableValue = 0; + serDeCheck(fory, zero); + + // Test 1 + Uint32OnlyStruct one = new Uint32OnlyStruct(); + one.varValue = 1; + one.fixedValue = 1; + one.varNullableValue = 1; + one.fixedNullableValue = 1; + serDeCheck(fory, one); + + // Test 2147483647 (max signed int) + Uint32OnlyStruct maxSignedInt = new Uint32OnlyStruct(); + maxSignedInt.varValue = 2147483647; + maxSignedInt.fixedValue = 2147483647; + maxSignedInt.varNullableValue = 2147483647; + maxSignedInt.fixedNullableValue = 2147483647; + serDeCheck(fory, maxSignedInt); + + // Test 2147483648 (unsigned, appears as Integer.MIN_VALUE in signed int) + Uint32OnlyStruct val2147483648 = new Uint32OnlyStruct(); + val2147483648.varValue = (int) 2147483648L; + val2147483648.fixedValue = (int) 2147483648L; + val2147483648.varNullableValue = (int) 2147483648L; + val2147483648.fixedNullableValue = (int) 2147483648L; + serDeCheck(fory, val2147483648); + + // Test 4294967295 (max uint32, appears as -1 in signed int) + Uint32OnlyStruct maxUint32 = new Uint32OnlyStruct(); + maxUint32.varValue = (int) 4294967295L; + maxUint32.fixedValue = (int) 4294967295L; + maxUint32.varNullableValue = (int) 4294967295L; + maxUint32.fixedNullableValue = (int) 4294967295L; + serDeCheck(fory, maxUint32); + + // Test null + Uint32OnlyStruct withNull = new Uint32OnlyStruct(); + withNull.varValue = 1000000000; + withNull.fixedValue = 1000000000; + withNull.varNullableValue = null; + withNull.fixedNullableValue = null; + serDeCheck(fory, withNull); + } + + public static class Uint64OnlyStruct { + @Uint64Type(encoding = LongEncoding.VARINT) + long varValue; + + @Uint64Type(encoding = LongEncoding.FIXED) + long fixedValue; + + @Uint64Type(encoding = LongEncoding.TAGGED) + long taggedValue; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.VARINT) + Long varNullableValue; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.FIXED) + Long fixedNullableValue; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.TAGGED) + Long taggedNullableValue; + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Uint64OnlyStruct that = (Uint64OnlyStruct) o; + return varValue == that.varValue + && fixedValue == that.fixedValue + && taggedValue == that.taggedValue + && Objects.equals(varNullableValue, that.varNullableValue) + && Objects.equals(fixedNullableValue, that.fixedNullableValue) + && Objects.equals(taggedNullableValue, that.taggedNullableValue); + } + + @Override + public int hashCode() { + return Objects.hash( + varValue, + fixedValue, + taggedValue, + varNullableValue, + fixedNullableValue, + taggedNullableValue); + } + } + + @Test(dataProvider = "fory") + public void testUint64EdgeCases(Fory fory) { + // Test 0 + Uint64OnlyStruct zero = new Uint64OnlyStruct(); + zero.varValue = 0; + zero.fixedValue = 0; + zero.taggedValue = 0; + zero.varNullableValue = 0L; + zero.fixedNullableValue = 0L; + zero.taggedNullableValue = 0L; + serDeCheck(fory, zero); + + // Test 1 + Uint64OnlyStruct one = new Uint64OnlyStruct(); + one.varValue = 1; + one.fixedValue = 1; + one.taggedValue = 1; + one.varNullableValue = 1L; + one.fixedNullableValue = 1L; + one.taggedNullableValue = 1L; + serDeCheck(fory, one); + + // Test Long.MAX_VALUE (max signed long) + Uint64OnlyStruct maxSignedLong = new Uint64OnlyStruct(); + maxSignedLong.varValue = Long.MAX_VALUE; + maxSignedLong.fixedValue = Long.MAX_VALUE; + maxSignedLong.taggedValue = Long.MAX_VALUE; + maxSignedLong.varNullableValue = Long.MAX_VALUE; + maxSignedLong.fixedNullableValue = Long.MAX_VALUE; + maxSignedLong.taggedNullableValue = Long.MAX_VALUE; + serDeCheck(fory, maxSignedLong); + + // Test Long.MIN_VALUE (this represents 2^63 as unsigned) + Uint64OnlyStruct minValue = new Uint64OnlyStruct(); + minValue.varValue = Long.MIN_VALUE; + minValue.fixedValue = Long.MIN_VALUE; + minValue.taggedValue = Long.MIN_VALUE; + minValue.varNullableValue = Long.MIN_VALUE; + minValue.fixedNullableValue = Long.MIN_VALUE; + minValue.taggedNullableValue = Long.MIN_VALUE; + serDeCheck(fory, minValue); + + // Test -1 (this represents max uint64: 0xFFFFFFFFFFFFFFFF) + Uint64OnlyStruct maxUint64 = new Uint64OnlyStruct(); + maxUint64.varValue = -1L; + maxUint64.fixedValue = -1L; + maxUint64.taggedValue = -1L; + maxUint64.varNullableValue = -1L; + maxUint64.fixedNullableValue = -1L; + maxUint64.taggedNullableValue = -1L; + serDeCheck(fory, maxUint64); + + // Test null + Uint64OnlyStruct withNull = new Uint64OnlyStruct(); + withNull.varValue = 10000000000L; + withNull.fixedValue = 10000000000L; + withNull.taggedValue = 10000000000L; + withNull.varNullableValue = null; + withNull.fixedNullableValue = null; + withNull.taggedNullableValue = null; + serDeCheck(fory, withNull); + } + + // Test tagged encoding boundary values + @Test(dataProvider = "fory") + public void testTaggedEncodingBoundaryValues(Fory fory) { + Uint64OnlyStruct obj = new Uint64OnlyStruct(); + + // Test value at tagged 4-byte boundary: -1073741824 (HALF_MIN_INT_VALUE) + obj.varValue = -1073741824L; + obj.fixedValue = -1073741824L; + obj.taggedValue = -1073741824L; + obj.varNullableValue = -1073741824L; + obj.fixedNullableValue = -1073741824L; + obj.taggedNullableValue = -1073741824L; + serDeCheck(fory, obj); + + // Test value at tagged 4-byte boundary: 1073741823 (HALF_MAX_INT_VALUE) + obj.varValue = 1073741823L; + obj.fixedValue = 1073741823L; + obj.taggedValue = 1073741823L; + obj.varNullableValue = 1073741823L; + obj.fixedNullableValue = 1073741823L; + obj.taggedNullableValue = 1073741823L; + serDeCheck(fory, obj); + + // Test value just below tagged 4-byte boundary + obj.varValue = -1073741825L; + obj.fixedValue = -1073741825L; + obj.taggedValue = -1073741825L; + obj.varNullableValue = -1073741825L; + obj.fixedNullableValue = -1073741825L; + obj.taggedNullableValue = -1073741825L; + serDeCheck(fory, obj); + + // Test value just above tagged 4-byte boundary + obj.varValue = 1073741824L; + obj.fixedValue = 1073741824L; + obj.taggedValue = 1073741824L; + obj.varNullableValue = 1073741824L; + obj.fixedNullableValue = 1073741824L; + obj.taggedNullableValue = 1073741824L; + serDeCheck(fory, obj); + } + + // Test varint encoding boundary values + @Test(dataProvider = "fory") + public void testVarintEncodingBoundaryValues(Fory fory) { + Uint32OnlyStruct obj32 = new Uint32OnlyStruct(); + + // 1-byte varint boundary (0-127) + obj32.varValue = 127; + obj32.fixedValue = 127; + obj32.varNullableValue = 127; + obj32.fixedNullableValue = 127; + serDeCheck(fory, obj32); + + // 2-byte varint boundary (128-16383) + obj32.varValue = 128; + obj32.fixedValue = 128; + obj32.varNullableValue = 128; + obj32.fixedNullableValue = 128; + serDeCheck(fory, obj32); + + obj32.varValue = 16383; + obj32.fixedValue = 16383; + obj32.varNullableValue = 16383; + obj32.fixedNullableValue = 16383; + serDeCheck(fory, obj32); + + // 3-byte varint boundary (16384-2097151) + obj32.varValue = 16384; + obj32.fixedValue = 16384; + obj32.varNullableValue = 16384; + obj32.fixedNullableValue = 16384; + serDeCheck(fory, obj32); + + obj32.varValue = 2097151; + obj32.fixedValue = 2097151; + obj32.varNullableValue = 2097151; + obj32.fixedNullableValue = 2097151; + serDeCheck(fory, obj32); + + // 4-byte varint boundary (2097152-268435455) + obj32.varValue = 2097152; + obj32.fixedValue = 2097152; + obj32.varNullableValue = 2097152; + obj32.fixedNullableValue = 2097152; + serDeCheck(fory, obj32); + + obj32.varValue = 268435455; + obj32.fixedValue = 268435455; + obj32.varNullableValue = 268435455; + obj32.fixedNullableValue = 268435455; + serDeCheck(fory, obj32); + + // 5-byte varint boundary (268435456+) + obj32.varValue = 268435456; + obj32.fixedValue = 268435456; + obj32.varNullableValue = 268435456; + obj32.fixedNullableValue = 268435456; + serDeCheck(fory, obj32); + } +} diff --git a/java/fory-core/src/test/java/org/apache/fory/type/DescriptorGrouperTest.java b/java/fory-core/src/test/java/org/apache/fory/type/DescriptorGrouperTest.java index 1f96d17869..f5bd079518 100644 --- a/java/fory-core/src/test/java/org/apache/fory/type/DescriptorGrouperTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/type/DescriptorGrouperTest.java @@ -75,8 +75,9 @@ private List createDescriptors() { @Test public void testComparatorByTypeAndName() { + Fory fory = Fory.builder().build(); List descriptors = createDescriptors(); - descriptors.sort(DescriptorGrouper.COMPARATOR_BY_TYPE_AND_NAME); + descriptors.sort(fory.getClassResolver().createTypeAndNameComparator()); List> classes = descriptors.stream().map(Descriptor::getRawType).collect(Collectors.toList()); List> expected = @@ -106,26 +107,30 @@ public void testComparatorByTypeAndName() { @Test public void testPrimitiveComparator() { + Fory fory = Fory.builder().build(); List descriptors = new ArrayList<>(); int index = 0; for (Class aClass : Primitives.allPrimitiveTypes()) { descriptors.add(createDescriptor(TypeRef.of(aClass), "f" + index++, -1, "TestClass", false)); } Collections.shuffle(descriptors, new Random(7)); - descriptors.sort(DescriptorGrouper.getPrimitiveComparator(false, false)); + descriptors.sort(fory.getClassResolver().getPrimitiveComparator()); List> classes = descriptors.stream().map(Descriptor::getRawType).collect(Collectors.toList()); + // With compression enabled (default): int/long are compressed and go to the end + // Non-compressed sorted by size (desc), then typeId (desc): char(25) > short(3), byte(2) > + // boolean(1) List> expected = Arrays.asList( double.class, - long.class, float.class, - int.class, - short.class, char.class, + short.class, byte.class, boolean.class, - void.class); + void.class, + long.class, + int.class); assertEquals(classes, expected); } @@ -137,15 +142,19 @@ public void testPrimitiveCompressedComparator() { descriptors.add(createDescriptor(TypeRef.of(aClass), "f" + index++, -1, "TestClass", false)); } Collections.shuffle(descriptors, new Random(7)); - descriptors.sort(DescriptorGrouper.getPrimitiveComparator(true, true)); + Fory fory = Fory.builder().build(); + descriptors.sort(fory.getClassResolver().getPrimitiveComparator()); List> classes = descriptors.stream().map(Descriptor::getRawType).collect(Collectors.toList()); + // With compression enabled (default): int/long are compressed and go to the end + // Non-compressed sorted by size (desc), then typeId (desc): char(25) > short(3), byte(2) > + // boolean(1) List> expected = Arrays.asList( double.class, float.class, - short.class, char.class, + short.class, byte.class, boolean.class, void.class, @@ -156,6 +165,7 @@ public void testPrimitiveCompressedComparator() { @Test public void testGrouper() { + Fory fory = Fory.builder().build(); List descriptors = createDescriptors(); int index = 0; descriptors.add( @@ -182,26 +192,26 @@ public void testGrouper() { descriptors, false, null, - false, - false, - DescriptorGrouper.COMPARATOR_BY_TYPE_AND_NAME) + fory.getClassResolver().getPrimitiveComparator(), + fory.getClassResolver().createTypeAndNameComparator()) .sort(); { List> classes = grouper.getPrimitiveDescriptors().stream() .map(Descriptor::getRawType) .collect(Collectors.toList()); + // With compression enabled: int/long go to end, sorted by size then typeId (desc) List> expected = Arrays.asList( double.class, - long.class, float.class, - int.class, - short.class, char.class, + short.class, byte.class, boolean.class, - void.class); + void.class, + long.class, + int.class); assertEquals(classes, expected); } { @@ -209,17 +219,18 @@ public void testGrouper() { grouper.getBoxedDescriptors().stream() .map(Descriptor::getRawType) .collect(Collectors.toList()); + // With compression enabled: Integer/Long go to end, sorted by size then typeId (desc) List> expected = Arrays.asList( Double.class, - Long.class, Float.class, - Integer.class, - Short.class, Character.class, + Short.class, Byte.class, Boolean.class, - Void.class); + Void.class, + Long.class, + Integer.class); assertEquals(classes, expected); } { @@ -227,9 +238,9 @@ public void testGrouper() { grouper.getCollectionDescriptors().stream() .map(Descriptor::getTypeRef) .collect(Collectors.toList()); - // Sorted by type name: List < List (alphabetically) + // Normalized type name is the same (Collection), fallback to field name order (c4 then c5) List> expected = - Arrays.asList(new TypeRef>() {}, new TypeRef>() {}); + Arrays.asList(new TypeRef>() {}, new TypeRef>() {}); assertEquals(types, expected); } { @@ -260,27 +271,29 @@ public void testGrouper() { @Test public void testCompressedPrimitiveGrouper() { + Fory fory = Fory.builder().build(); DescriptorGrouper grouper = DescriptorGrouper.createDescriptorGrouper( d -> ReflectionUtils.isMonomorphic(d.getRawType()), createDescriptors(), false, null, - true, - true, - DescriptorGrouper.COMPARATOR_BY_TYPE_AND_NAME) + fory.getClassResolver().getPrimitiveComparator(), + fory.getClassResolver().createTypeAndNameComparator()) .sort(); { List> classes = grouper.getPrimitiveDescriptors().stream() .map(Descriptor::getRawType) .collect(Collectors.toList()); + // With compression enabled: int/long go to end, sorted by size then typeId (desc) + // char has higher typeId (25) than short (3) List> expected = Arrays.asList( double.class, float.class, - short.class, char.class, + short.class, byte.class, boolean.class, void.class, @@ -293,12 +306,14 @@ public void testCompressedPrimitiveGrouper() { grouper.getBoxedDescriptors().stream() .map(Descriptor::getRawType) .collect(Collectors.toList()); + // With compression enabled: Integer/Long go to end, sorted by size then typeId (desc) + // Character has higher typeId than Short List> expected = Arrays.asList( Double.class, Float.class, - Short.class, Character.class, + Short.class, Byte.class, Boolean.class, Void.class, @@ -383,9 +398,9 @@ public void testStaticComparatorDoesNotNormalize() { descriptors.add( createDescriptor( new TypeRef>() {}, "arrayListField", -1, "TestClass", false)); - + Fory fory = Fory.builder().build(); // Sort with the static comparator - descriptors.sort(DescriptorGrouper.COMPARATOR_BY_TYPE_AND_NAME); + descriptors.sort(fory.getClassResolver().createTypeAndNameComparator()); // Get type names after sorting List typeNames = diff --git a/java/fory-core/src/test/java/org/apache/fory/xlang/CPPXlangTest.java b/java/fory-core/src/test/java/org/apache/fory/xlang/CPPXlangTest.java index 7cc0e9c2ea..d596601e5e 100644 --- a/java/fory-core/src/test/java/org/apache/fory/xlang/CPPXlangTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/xlang/CPPXlangTest.java @@ -393,4 +393,19 @@ public void testCircularRefSchemaConsistent(boolean enableCodegen) throws java.i public void testCircularRefCompatible(boolean enableCodegen) throws java.io.IOException { super.testCircularRefCompatible(enableCodegen); } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaConsistent(boolean enableCodegen) throws java.io.IOException { + super.testUnsignedSchemaConsistent(enableCodegen); + } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaConsistentSimple(boolean enableCodegen) throws java.io.IOException { + super.testUnsignedSchemaConsistentSimple(enableCodegen); + } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaCompatible(boolean enableCodegen) throws java.io.IOException { + super.testUnsignedSchemaCompatible(enableCodegen); + } } diff --git a/java/fory-core/src/test/java/org/apache/fory/xlang/GoXlangTest.java b/java/fory-core/src/test/java/org/apache/fory/xlang/GoXlangTest.java index bd351ddd7b..0b463c60d4 100644 --- a/java/fory-core/src/test/java/org/apache/fory/xlang/GoXlangTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/xlang/GoXlangTest.java @@ -458,4 +458,19 @@ public void testCircularRefSchemaConsistent(boolean enableCodegen) throws java.i public void testCircularRefCompatible(boolean enableCodegen) throws java.io.IOException { super.testCircularRefCompatible(enableCodegen); } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaConsistent(boolean enableCodegen) throws java.io.IOException { + super.testUnsignedSchemaConsistent(enableCodegen); + } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaConsistentSimple(boolean enableCodegen) throws java.io.IOException { + super.testUnsignedSchemaConsistentSimple(enableCodegen); + } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaCompatible(boolean enableCodegen) throws java.io.IOException { + super.testUnsignedSchemaCompatible(enableCodegen); + } } diff --git a/java/fory-core/src/test/java/org/apache/fory/xlang/PythonXlangTest.java b/java/fory-core/src/test/java/org/apache/fory/xlang/PythonXlangTest.java index 3a03229a0a..c7433f240e 100644 --- a/java/fory-core/src/test/java/org/apache/fory/xlang/PythonXlangTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/xlang/PythonXlangTest.java @@ -310,4 +310,19 @@ public void testCircularRefSchemaConsistent(boolean enableCodegen) throws IOExce public void testCircularRefCompatible(boolean enableCodegen) throws IOException { super.testCircularRefCompatible(enableCodegen); } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaConsistent(boolean enableCodegen) throws java.io.IOException { + super.testUnsignedSchemaConsistent(enableCodegen); + } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaConsistentSimple(boolean enableCodegen) throws java.io.IOException { + super.testUnsignedSchemaConsistentSimple(enableCodegen); + } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaCompatible(boolean enableCodegen) throws java.io.IOException { + super.testUnsignedSchemaCompatible(enableCodegen); + } } diff --git a/java/fory-core/src/test/java/org/apache/fory/xlang/RustXlangTest.java b/java/fory-core/src/test/java/org/apache/fory/xlang/RustXlangTest.java index 2bf9490480..56f1061257 100644 --- a/java/fory-core/src/test/java/org/apache/fory/xlang/RustXlangTest.java +++ b/java/fory-core/src/test/java/org/apache/fory/xlang/RustXlangTest.java @@ -279,4 +279,19 @@ public void testCircularRefSchemaConsistent(boolean enableCodegen) throws java.i public void testCircularRefCompatible(boolean enableCodegen) throws java.io.IOException { super.testCircularRefCompatible(enableCodegen); } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaConsistent(boolean enableCodegen) throws java.io.IOException { + super.testUnsignedSchemaConsistent(enableCodegen); + } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaConsistentSimple(boolean enableCodegen) throws java.io.IOException { + super.testUnsignedSchemaConsistentSimple(enableCodegen); + } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaCompatible(boolean enableCodegen) throws java.io.IOException { + super.testUnsignedSchemaCompatible(enableCodegen); + } } diff --git a/java/fory-core/src/test/java/org/apache/fory/xlang/XlangTestBase.java b/java/fory-core/src/test/java/org/apache/fory/xlang/XlangTestBase.java index a6f0d450e9..837ff11527 100644 --- a/java/fory-core/src/test/java/org/apache/fory/xlang/XlangTestBase.java +++ b/java/fory-core/src/test/java/org/apache/fory/xlang/XlangTestBase.java @@ -36,8 +36,13 @@ import org.apache.fory.Fory; import org.apache.fory.ForyTestBase; import org.apache.fory.annotation.ForyField; +import org.apache.fory.annotation.Uint16Type; +import org.apache.fory.annotation.Uint32Type; +import org.apache.fory.annotation.Uint64Type; +import org.apache.fory.annotation.Uint8Type; import org.apache.fory.config.CompatibleMode; import org.apache.fory.config.Language; +import org.apache.fory.config.LongEncoding; import org.apache.fory.memory.MemoryBuffer; import org.apache.fory.memory.MemoryUtils; import org.apache.fory.meta.MetaCompressor; @@ -2465,4 +2470,260 @@ private Object normalizeNulls(Object obj) { // For other objects, return as-is return obj; } + + // ==================== Unsigned Number Tests ==================== + + /** + * Test struct for unsigned number schema consistent tests. Contains all unsigned numeric types + * with different encoding options. + */ + @Data + static class UnsignedSchemaConsistent { + // Primitive unsigned fields (use Field suffix to avoid reserved keywords in Rust/Go) + @Uint8Type byte u8Field; + + @Uint16Type short u16Field; + + @Uint32Type(compress = true) + int u32VarField; + + @Uint32Type(compress = false) + int u32FixedField; + + @Uint64Type(encoding = LongEncoding.VARINT) + long u64VarField; + + @Uint64Type(encoding = LongEncoding.FIXED) + long u64FixedField; + + @Uint64Type(encoding = LongEncoding.TAGGED) + long u64TaggedField; + + // Boxed nullable unsigned fields + @ForyField(nullable = true) + @Uint8Type + Byte u8NullableField; + + @ForyField(nullable = true) + @Uint16Type + Short u16NullableField; + + @ForyField(nullable = true) + @Uint32Type(compress = true) + Integer u32VarNullableField; + + @ForyField(nullable = true) + @Uint32Type(compress = false) + Integer u32FixedNullableField; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.VARINT) + Long u64VarNullableField; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.FIXED) + Long u64FixedNullableField; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.TAGGED) + Long u64TaggedNullableField; + } + + @Data + static class UnsignedSchemaConsistentSimple { + @Uint64Type(encoding = LongEncoding.TAGGED) + long u64Tagged; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.TAGGED) + Long u64TaggedNullable; + } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaConsistentSimple(boolean enableCodegen) throws java.io.IOException { + String caseName = "test_unsigned_schema_consistent_simple"; + Fory fory = + Fory.builder() + .withLanguage(Language.XLANG) + .withCompatibleMode(CompatibleMode.SCHEMA_CONSISTENT) + .withCodegen(enableCodegen) + .build(); + fory.register(UnsignedSchemaConsistentSimple.class, 1); + UnsignedSchemaConsistentSimple obj = new UnsignedSchemaConsistentSimple(); + obj.u64Tagged = 1000000000L; // Within tagged range + obj.u64TaggedNullable = 500000000L; // Within tagged range + // First verify Java serialization works + Assert.assertEquals(xserDe(fory, obj), obj); + + MemoryBuffer buffer = MemoryBuffer.newHeapBuffer(512); + fory.serialize(buffer, obj); + ExecutionContext ctx = prepareExecution(caseName, buffer.getBytes(0, buffer.writerIndex())); + runPeer(ctx); + MemoryBuffer buffer2 = readBuffer(ctx.dataFile()); + UnsignedSchemaConsistentSimple result = + (UnsignedSchemaConsistentSimple) fory.deserialize(buffer2); + Assert.assertEquals(result, obj); + } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaConsistent(boolean enableCodegen) throws java.io.IOException { + String caseName = "test_unsigned_schema_consistent"; + Fory fory = + Fory.builder() + .withLanguage(Language.XLANG) + .withCompatibleMode(CompatibleMode.SCHEMA_CONSISTENT) + .withCodegen(enableCodegen) + .build(); + fory.register(UnsignedSchemaConsistent.class, 501); + + UnsignedSchemaConsistent obj = new UnsignedSchemaConsistent(); + // Primitive fields + obj.u8Field = (byte) 200; // Max uint8 range testing + obj.u16Field = (short) 60000; // Max uint16 range testing + obj.u32VarField = (int) 3000000000L; // > INT_MAX to test unsigned + obj.u32FixedField = (int) 4000000000L; + obj.u64VarField = 10000000000L; + obj.u64FixedField = 15000000000L; + obj.u64TaggedField = 1000000000L; // Within tagged range + + // Nullable boxed fields with values + obj.u8NullableField = (byte) 128; + obj.u16NullableField = (short) 40000; + obj.u32VarNullableField = (int) 2500000000L; + obj.u32FixedNullableField = (int) 3500000000L; + obj.u64VarNullableField = 8000000000L; + obj.u64FixedNullableField = 12000000000L; + obj.u64TaggedNullableField = 500000000L; + + // First verify Java serialization works + Assert.assertEquals(xserDe(fory, obj), obj); + + MemoryBuffer buffer = MemoryBuffer.newHeapBuffer(512); + fory.serialize(buffer, obj); + + byte[] javaBytes = buffer.getBytes(0, buffer.writerIndex()); + System.out.printf("Java output size: %d bytes%n", javaBytes.length); + System.out.printf("Java output hex: %s%n", bytesToHex(javaBytes)); + + ExecutionContext ctx = prepareExecution(caseName, javaBytes); + runPeer(ctx); + + MemoryBuffer buffer2 = readBuffer(ctx.dataFile()); + byte[] goBytes = buffer2.getBytes(0, buffer2.size()); + System.out.printf("Go output size: %d bytes%n", goBytes.length); + System.out.printf("Go output hex: %s%n", bytesToHex(goBytes)); + + UnsignedSchemaConsistent result = (UnsignedSchemaConsistent) fory.deserialize(buffer2); + Assert.assertEquals(result, obj); + } + + private static String bytesToHex(byte[] bytes) { + StringBuilder sb = new StringBuilder(); + for (byte b : bytes) { + sb.append(String.format("%02x", b)); + } + return sb.toString(); + } + + /** + * Test struct for unsigned number schema compatible tests (Java side). Group 1: non-nullable + * primitive fields. Group 2: nullable boxed fields with "2" suffix. Other languages flip + * nullability: Group 1 is Optional, Group 2 is non-Optional. + */ + @Data + static class UnsignedSchemaCompatible { + // Group 1: Primitive unsigned fields (non-nullable in Java, Optional in other languages) + @Uint8Type byte u8Field1; + + @Uint16Type short u16Field1; + + @Uint32Type(compress = true) + int u32VarField1; + + @Uint32Type(compress = false) + int u32FixedField1; + + @Uint64Type(encoding = LongEncoding.VARINT) + long u64VarField1; + + @Uint64Type(encoding = LongEncoding.FIXED) + long u64FixedField1; + + @Uint64Type(encoding = LongEncoding.TAGGED) + long u64TaggedField1; + + // Group 2: Nullable boxed fields (nullable in Java, non-Optional in other languages) + @ForyField(nullable = true) + @Uint8Type + Byte u8Field2; + + @ForyField(nullable = true) + @Uint16Type + Short u16Field2; + + @ForyField(nullable = true) + @Uint32Type(compress = true) + Integer u32VarField2; + + @ForyField(nullable = true) + @Uint32Type(compress = false) + Integer u32FixedField2; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.VARINT) + Long u64VarField2; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.FIXED) + Long u64FixedField2; + + @ForyField(nullable = true) + @Uint64Type(encoding = LongEncoding.TAGGED) + Long u64TaggedField2; + } + + @Test(dataProvider = "enableCodegen") + public void testUnsignedSchemaCompatible(boolean enableCodegen) throws java.io.IOException { + String caseName = "test_unsigned_schema_compatible"; + Fory fory = + Fory.builder() + .withLanguage(Language.XLANG) + .withCompatibleMode(CompatibleMode.COMPATIBLE) + .withCodegen(enableCodegen) + .withMetaCompressor(new NoOpMetaCompressor()) + .build(); + fory.register(UnsignedSchemaCompatible.class, 502); + + UnsignedSchemaCompatible obj = new UnsignedSchemaCompatible(); + // Group 1: Primitive fields + obj.u8Field1 = (byte) 200; + obj.u16Field1 = (short) 60000; + obj.u32VarField1 = (int) 3000000000L; + obj.u32FixedField1 = (int) 4000000000L; + obj.u64VarField1 = 10000000000L; + obj.u64FixedField1 = 15000000000L; + obj.u64TaggedField1 = 1000000000L; + + // Group 2: Nullable boxed fields with values + obj.u8Field2 = (byte) 128; + obj.u16Field2 = (short) 40000; + obj.u32VarField2 = (int) 2500000000L; + obj.u32FixedField2 = (int) 3500000000L; + obj.u64VarField2 = 8000000000L; + obj.u64FixedField2 = 12000000000L; + obj.u64TaggedField2 = 500000000L; + + // First verify Java serialization works + Assert.assertEquals(xserDe(fory, obj), obj); + + MemoryBuffer buffer = MemoryBuffer.newHeapBuffer(1024); + fory.serialize(buffer, obj); + + ExecutionContext ctx = prepareExecution(caseName, buffer.getBytes(0, buffer.writerIndex())); + runPeer(ctx); + + MemoryBuffer buffer2 = readBuffer(ctx.dataFile()); + UnsignedSchemaCompatible result = (UnsignedSchemaCompatible) fory.deserialize(buffer2); + Assert.assertEquals(result, obj); + } } diff --git a/javascript/packages/fory/lib/gen/number.ts b/javascript/packages/fory/lib/gen/number.ts index 758c7d8929..23297b1414 100644 --- a/javascript/packages/fory/lib/gen/number.ts +++ b/javascript/packages/fory/lib/gen/number.ts @@ -72,7 +72,7 @@ CodegenRegistry.register(InternalSerializerType.INT32, ) ); -CodegenRegistry.register(InternalSerializerType.VAR32, +CodegenRegistry.register(InternalSerializerType.VARINT32, buildNumberSerializer( (builder, accessor) => builder.writer.varInt32(accessor), builder => builder.reader.varInt32() @@ -86,7 +86,7 @@ CodegenRegistry.register(InternalSerializerType.INT64, ) ); -CodegenRegistry.register(InternalSerializerType.H64, +CodegenRegistry.register(InternalSerializerType.TAGGED_INT64, buildNumberSerializer( (builder, accessor) => builder.writer.sliInt64(accessor), builder => builder.reader.sliInt64() diff --git a/javascript/packages/fory/lib/type.ts b/javascript/packages/fory/lib/type.ts index 20c61668e3..b4583e49a9 100644 --- a/javascript/packages/fory/lib/type.ts +++ b/javascript/packages/fory/lib/type.ts @@ -31,13 +31,13 @@ export const TypeId = { // a 32-bit signed integer. INT32: 4, // a 32-bit signed integer which uses fory var_int32 encoding. - VAR32: 5, + VARINT32: 5, // a 64-bit signed integer. INT64: 6, // a 64-bit signed integer which uses fory PVL encoding. - VAR64: 7, + VARINT64: 7, // a 64-bit signed integer which uses fory hybrid encoding. - H64: 8, + TAGGED_INT64: 8, // an 8-bit unsigned integer. UINT8: 9, // a 16-bit unsigned integer. @@ -45,13 +45,13 @@ export const TypeId = { // a 32-bit unsigned integer. UINT32: 11, // a 32-bit unsigned integer which uses fory var_uint32 encoding. - VARU32: 12, + VAR_UINT32: 12, // a 64-bit unsigned integer. UINT64: 13, // a 64-bit unsigned integer which uses fory var_uint64 encoding. - VARU64: 14, + VAR_UINT64: 14, // a 64-bit unsigned integer which uses fory hybrid encoding. - HU64: 15, + TAGGED_UINT64: 15, // a 16-bit floating point number. FLOAT16: 16, // a 32-bit floating point number. @@ -138,10 +138,10 @@ export enum InternalSerializerType { INT8, INT16, INT32, - VAR32, + VARINT32, INT64, - VAR64, - H64, + VARINT64, + TAGGED_INT64, FLOAT16, FLOAT32, FLOAT64, diff --git a/javascript/packages/fory/lib/typeInfo.ts b/javascript/packages/fory/lib/typeInfo.ts index c6035e6c03..e98c7da023 100644 --- a/javascript/packages/fory/lib/typeInfo.ts +++ b/javascript/packages/fory/lib/typeInfo.ts @@ -337,7 +337,7 @@ export type HintInput = T extends unknown ? any : T extends { | InternalSerializerType.INT8 | InternalSerializerType.INT16 | InternalSerializerType.INT32 - | InternalSerializerType.VAR32 + | InternalSerializerType.VARINT32 | InternalSerializerType.FLOAT16 | InternalSerializerType.FLOAT32 | InternalSerializerType.FLOAT64; @@ -345,8 +345,8 @@ export type HintInput = T extends unknown ? any : T extends { ? number : T extends { - type: InternalSerializerType.VAR64 - | InternalSerializerType.H64 + type: InternalSerializerType.VARINT64 + | InternalSerializerType.TAGGED_INT64 | InternalSerializerType.INT64; } ? bigint @@ -407,7 +407,7 @@ export type HintResult = T extends never ? any : T extends { | InternalSerializerType.INT8 | InternalSerializerType.INT16 | InternalSerializerType.INT32 - | InternalSerializerType.VAR32 + | InternalSerializerType.VARINT32 | InternalSerializerType.FLOAT16 | InternalSerializerType.FLOAT32 | InternalSerializerType.FLOAT64; @@ -415,7 +415,7 @@ export type HintResult = T extends never ? any : T extends { ? number : T extends { - type: InternalSerializerType.H64 + type: InternalSerializerType.TAGGED_INT64 | InternalSerializerType.INT64; } ? bigint @@ -553,8 +553,8 @@ export const Type = { }, varInt32() { return TypeInfo.fromNonParam( - InternalSerializerType.VAR32 as const, - (TypeId.VAR32), + InternalSerializerType.VARINT32 as const, + (TypeId.VARINT32), ); }, @@ -567,8 +567,8 @@ export const Type = { }, sliInt64() { return TypeInfo.fromNonParam( - InternalSerializerType.H64 as const, - (TypeId.H64), + InternalSerializerType.TAGGED_INT64 as const, + (TypeId.TAGGED_INT64), ); }, diff --git a/python/pyfory/__init__.py b/python/pyfory/__init__.py index 3a45f38abc..79523aed9c 100644 --- a/python/pyfory/__init__.py +++ b/python/pyfory/__init__.py @@ -30,7 +30,7 @@ except ImportError: ENABLE_FORY_CYTHON_SERIALIZATION = False -from pyfory._registry import TypeInfo +from pyfory.registry import TypeInfo if ENABLE_FORY_CYTHON_SERIALIZATION: from pyfory.serialization import Fory, TypeInfo # noqa: F401,F811 @@ -43,6 +43,16 @@ Int16Serializer, Int32Serializer, Int64Serializer, + Varint32Serializer, + Varint64Serializer, + TaggedInt64Serializer, + Uint8Serializer, + Uint16Serializer, + Uint32Serializer, + VarUint32Serializer, + Uint64Serializer, + VarUint64Serializer, + TaggedUint64Serializer, Float32Serializer, Float64Serializer, StringSerializer, @@ -70,6 +80,16 @@ int16, int32, int64, + fixed_int32, + fixed_int64, + tagged_int64, + uint8, + uint16, + uint32, + fixed_uint32, + uint64, + fixed_uint64, + tagged_uint64, float32, float64, # Int8ArrayType, @@ -85,7 +105,7 @@ dataslots, ) from pyfory.policy import DeserializationPolicy # noqa: F401 # pylint: disable=unused-import -from pyfory._util import Buffer # noqa: F401 # pylint: disable=unused-import +from pyfory.buffer import Buffer # noqa: F401 # pylint: disable=unused-import __version__ = "0.14.1.dev" @@ -110,6 +130,16 @@ "int16", "int32", "int64", + "fixed_int32", + "fixed_int64", + "tagged_int64", + "uint8", + "uint16", + "uint32", + "fixed_uint32", + "uint64", + "fixed_uint64", + "tagged_uint64", "float32", "float64", "int16_array", @@ -126,6 +156,16 @@ "Int16Serializer", "Int32Serializer", "Int64Serializer", + "Varint32Serializer", + "Varint64Serializer", + "TaggedInt64Serializer", + "Uint8Serializer", + "Uint16Serializer", + "Uint32Serializer", + "VarUint32Serializer", + "Uint64Serializer", + "VarUint64Serializer", + "TaggedUint64Serializer", "Float32Serializer", "Float64Serializer", "StringSerializer", diff --git a/python/pyfory/_fory.py b/python/pyfory/_fory.py index 98bfae1119..2d67d8d858 100644 --- a/python/pyfory/_fory.py +++ b/python/pyfory/_fory.py @@ -48,7 +48,7 @@ USE_TYPE_ID = 1 # preserve 0 as flag for type id not set in TypeInfo` NO_TYPE_ID = 0 -INT64_TYPE_ID = TypeId.INT64 +INT64_TYPE_ID = TypeId.VARINT64 FLOAT64_TYPE_ID = TypeId.FLOAT64 BOOL_TYPE_ID = TypeId.BOOL STRING_TYPE_ID = TypeId.STRING @@ -242,7 +242,7 @@ def __init__( self.compatible = compatible self.field_nullable = field_nullable if self.is_py else False from pyfory.serialization import MetaStringResolver, SerializationContext - from pyfory._registry import TypeResolver + from pyfory.registry import TypeResolver self.metastring_resolver = MetaStringResolver() self.type_resolver = TypeResolver(self, meta_share=compatible, meta_compressor=meta_compressor) diff --git a/python/pyfory/_serializer.py b/python/pyfory/_serializer.py index ba0b3d7a89..e9cac7b527 100644 --- a/python/pyfory/_serializer.py +++ b/python/pyfory/_serializer.py @@ -94,6 +94,8 @@ def read(self, buffer): class Int32Serializer(XlangCompatibleSerializer): + """Serializer for INT32/VARINT32 type - uses variable-length encoding for xlang compatibility.""" + def write(self, buffer, value): buffer.write_varint32(value) @@ -101,7 +103,19 @@ def read(self, buffer): return buffer.read_varint32() +class FixedInt32Serializer(XlangCompatibleSerializer): + """Serializer for fixed-width 32-bit signed integer (INT32 type_id=4).""" + + def write(self, buffer, value): + buffer.write_int32(value) + + def read(self, buffer): + return buffer.read_int32() + + class Int64Serializer(Serializer): + """Serializer for INT64/VARINT64 type - uses variable-length encoding for xlang compatibility.""" + def xwrite(self, buffer, value): buffer.write_varint64(value) @@ -115,6 +129,116 @@ def read(self, buffer): return buffer.read_varint64() +class FixedInt64Serializer(XlangCompatibleSerializer): + """Serializer for fixed-width 64-bit signed integer (INT64 type_id=6).""" + + def write(self, buffer, value): + buffer.write_int64(value) + + def read(self, buffer): + return buffer.read_int64() + + +class Varint32Serializer(XlangCompatibleSerializer): + """Serializer for VARINT32 type - variable-length encoded signed 32-bit integer.""" + + def write(self, buffer, value): + buffer.write_varint32(value) + + def read(self, buffer): + return buffer.read_varint32() + + +class Varint64Serializer(XlangCompatibleSerializer): + """Serializer for VARINT64 type - variable-length encoded signed 64-bit integer.""" + + def write(self, buffer, value): + buffer.write_varint64(value) + + def read(self, buffer): + return buffer.read_varint64() + + +class TaggedInt64Serializer(XlangCompatibleSerializer): + """Serializer for TAGGED_INT64 type - tagged encoding for signed 64-bit integer.""" + + def write(self, buffer, value): + buffer.write_tagged_int64(value) + + def read(self, buffer): + return buffer.read_tagged_int64() + + +class Uint8Serializer(XlangCompatibleSerializer): + """Serializer for UINT8 type - unsigned 8-bit integer.""" + + def write(self, buffer, value): + buffer.write_uint8(value) + + def read(self, buffer): + return buffer.read_uint8() + + +class Uint16Serializer(XlangCompatibleSerializer): + """Serializer for UINT16 type - unsigned 16-bit integer.""" + + def write(self, buffer, value): + buffer.write_uint16(value) + + def read(self, buffer): + return buffer.read_uint16() + + +class Uint32Serializer(XlangCompatibleSerializer): + """Serializer for UINT32 type - fixed-size unsigned 32-bit integer.""" + + def write(self, buffer, value): + buffer.write_uint32(value) + + def read(self, buffer): + return buffer.read_uint32() + + +class VarUint32Serializer(XlangCompatibleSerializer): + """Serializer for VAR_UINT32 type - variable-length encoded unsigned 32-bit integer.""" + + def write(self, buffer, value): + buffer.write_varuint32(value) + + def read(self, buffer): + return buffer.read_varuint32() + + +class Uint64Serializer(XlangCompatibleSerializer): + """Serializer for UINT64 type - fixed-size unsigned 64-bit integer.""" + + def write(self, buffer, value): + buffer.write_uint64(value) + + def read(self, buffer): + return buffer.read_uint64() + + +class VarUint64Serializer(XlangCompatibleSerializer): + """Serializer for VAR_UINT64 type - variable-length encoded unsigned 64-bit integer.""" + + def write(self, buffer, value): + buffer.write_varuint64(value) + + def read(self, buffer): + return buffer.read_varuint64() + + +class TaggedUint64Serializer(XlangCompatibleSerializer): + """Serializer for TAGGED_UINT64 type - tagged encoding for unsigned 64-bit integer.""" + + def write(self, buffer, value): + buffer.write_tagged_uint64(value) + + def read(self, buffer): + return buffer.read_tagged_uint64() + + class Float32Serializer(XlangCompatibleSerializer): def write(self, buffer, value): buffer.write_float(value) diff --git a/python/pyfory/_util.pxd b/python/pyfory/buffer.pxd similarity index 89% rename from python/pyfory/_util.pxd rename to python/pyfory/buffer.pxd index 6938e755ca..16c87705e4 100644 --- a/python/pyfory/_util.pxd +++ b/python/pyfory/buffer.pxd @@ -107,6 +107,12 @@ cdef class Buffer: cpdef inline write_int64(self, int64_t value) + cpdef inline write_uint16(self, uint16_t value) + + cpdef inline write_uint32(self, uint32_t value) + + cpdef inline write_uint64(self, uint64_t value) + cpdef inline write_float(self, float value) cpdef inline write_float32(self, float value) @@ -131,6 +137,12 @@ cdef class Buffer: cpdef inline int64_t read_int64(self) + cpdef inline uint16_t read_uint16(self) + + cpdef inline uint32_t read_uint32(self) + + cpdef inline uint64_t read_uint64(self) + cpdef inline float read_float(self) cpdef inline float read_float32(self) @@ -147,13 +159,21 @@ cdef class Buffer: cpdef inline int64_t read_varuint64(self) - cpdef inline write_varuint32(self, int32_t value) + cpdef inline write_varuint32(self, uint32_t value) cpdef inline write_varint32(self, int32_t value) cpdef inline int32_t read_varint32(self) - cpdef inline int32_t read_varuint32(self) + cpdef inline uint32_t read_varuint32(self) + + cpdef inline write_tagged_int64(self, int64_t value) + + cpdef inline int64_t read_tagged_int64(self) + + cpdef inline write_tagged_uint64(self, uint64_t value) + + cpdef inline uint64_t read_tagged_uint64(self) cpdef put_buffer(self, uint32_t offset, v, int32_t src_index, int32_t length) diff --git a/python/pyfory/buffer.py b/python/pyfory/buffer.py deleted file mode 100644 index 921e8a9dc7..0000000000 --- a/python/pyfory/buffer.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from pyfory._util import Buffer # noqa: F401 # pylint: disable=unused-import diff --git a/python/pyfory/_util.pyx b/python/pyfory/buffer.pyx similarity index 84% rename from python/pyfory/_util.pyx rename to python/pyfory/buffer.pyx index a8ad213786..b4e97a15d8 100644 --- a/python/pyfory/_util.pyx +++ b/python/pyfory/buffer.pyx @@ -205,6 +205,21 @@ cdef class Buffer: self.c_buffer_ptr.UnsafePut(self.writer_index, value) self.writer_index += 8 + cpdef inline write_uint16(self, uint16_t value): + self.grow(2) + self.c_buffer_ptr.UnsafePut(self.writer_index, value) + self.writer_index += 2 + + cpdef inline write_uint32(self, uint32_t value): + self.grow(4) + self.c_buffer_ptr.UnsafePut(self.writer_index, value) + self.writer_index += 4 + + cpdef inline write_uint64(self, uint64_t value): + self.grow(8) + self.c_buffer_ptr.UnsafePut(self.writer_index, value) + self.writer_index += 8 + cpdef inline write_float(self, float value): self.grow(4) self.c_buffer_ptr.UnsafePut(self.writer_index, value) @@ -360,6 +375,24 @@ cdef class Buffer: self.reader_index += 8 return value + cpdef inline uint16_t read_uint16(self): + cdef int32_t offset = self.reader_index + self.check_bound(offset, 2) + self.reader_index = offset + 2 + return self.c_buffer_ptr.GetInt16(offset) + + cpdef inline uint32_t read_uint32(self): + cdef int32_t offset = self.reader_index + self.check_bound(offset, 4) + self.reader_index = offset + 4 + return self.c_buffer_ptr.GetInt32(offset) + + cpdef inline uint64_t read_uint64(self): + cdef int32_t offset = self.reader_index + self.check_bound(offset, 8) + self.reader_index = offset + 8 + return self.c_buffer_ptr.GetInt64(offset) + cpdef inline float read_float(self): value = self.get_float(self.reader_index) self.reader_index += 4 @@ -399,7 +432,7 @@ cdef class Buffer: cpdef inline write_varint32(self, int32_t value): return self.write_varuint32((value << 1) ^ (value >> 31)) - cpdef inline write_varuint32(self, int32_t value): + cpdef inline write_varuint32(self, uint32_t value): # Need 8 bytes for safe bulk write (PutVarUint32 writes uint64_t for 5-byte varints) self.grow(8) cdef int32_t actual_bytes_written = self.c_buffer_ptr.PutVarUint32(self.writer_index, value) @@ -410,11 +443,11 @@ cdef class Buffer: cdef uint32_t v = self.read_varuint32() return (v >> 1) ^ -(v & 1) - cpdef inline int32_t read_varuint32(self): + cpdef inline uint32_t read_varuint32(self): cdef: uint32_t read_length = 0 int8_t b - int32_t result + uint32_t result if self._c_size - self.reader_index > 5: result = self.c_buffer_ptr.GetVarUint32(self.reader_index, &read_length) self.reader_index += read_length @@ -568,6 +601,90 @@ cdef class Buffer: result |= b << 56 return result + cpdef inline write_tagged_int64(self, int64_t value): + """Write signed int64 using fory Tagged(Small long as int) encoding. + + If value is in [-1073741824, 1073741823] (fits in 31 bits with sign), + encode as 4 bytes: ((value as i32) << 1). + Otherwise write as 9 bytes: 0b1 | little-endian 8 bytes i64. + """ + cdef int64_t HALF_MIN_INT_VALUE = -1073741824 # i32::MIN / 2 + cdef int64_t HALF_MAX_INT_VALUE = 1073741823 # i32::MAX / 2 + if HALF_MIN_INT_VALUE <= value <= HALF_MAX_INT_VALUE: + # Fits in 31 bits (with sign), encode as 4 bytes with bit 0 = 0 + self.write_int32((value) << 1) + else: + # Write flag byte (0b1) followed by 8-byte i64 + self.grow(9) + ((self._c_address + self.writer_index))[0] = 0b1 + self.writer_index += 1 + self.c_buffer_ptr.UnsafePut(self.writer_index, value) + self.writer_index += 8 + + cpdef inline int64_t read_tagged_int64(self): + """Read signed fory Tagged(Small long as int) encoded int64. + + If bit 0 of the first 4 bytes is 0, return the value >> 1 (arithmetic shift). + Otherwise, skip the flag byte and read 8 bytes as int64. + """ + cdef int32_t offset = self.reader_index + cdef int32_t i + cdef int64_t value + self.check_bound(offset, 4) + i = self.c_buffer_ptr.GetInt32(offset) + if (i & 0b1) != 0b1: + # Bit 0 is 0, small value encoded in 4 bytes + self.reader_index = offset + 4 + return (i >> 1) # arithmetic right shift preserves sign + else: + # Bit 0 is 1, big value: skip flag byte and read 8 bytes + self.check_bound(offset, 9) + self.reader_index = offset + 1 + value = self.c_buffer_ptr.GetInt64(self.reader_index) + self.reader_index += 8 + return value + + cpdef inline write_tagged_uint64(self, uint64_t value): + """Write unsigned uint64 using fory Tagged(Small long as int) encoding. + + If value is in [0, 0x7fffffff], encode as 4 bytes: ((value as u32) << 1). + Otherwise write as 9 bytes: 0b1 | little-endian 8 bytes u64. + """ + cdef uint64_t MAX_SMALL_VALUE = 0x7fffffff # i32::MAX as u64 + if value <= MAX_SMALL_VALUE: + # Fits in 31 bits, encode as 4 bytes with bit 0 = 0 + self.write_int32((value) << 1) + else: + # Write flag byte (0b1) followed by 8-byte u64 + self.grow(9) + ((self._c_address + self.writer_index))[0] = 0b1 + self.writer_index += 1 + self.c_buffer_ptr.UnsafePut(self.writer_index, value) + self.writer_index += 8 + + cpdef inline uint64_t read_tagged_uint64(self): + """Read unsigned fory Tagged(Small long as int) encoded uint64. + + If bit 0 of the first 4 bytes is 0, return the value >> 1. + Otherwise, skip the flag byte and read 8 bytes as uint64. + """ + cdef int32_t offset = self.reader_index + cdef uint32_t i + cdef uint64_t value + self.check_bound(offset, 4) + i = self.c_buffer_ptr.GetInt32(offset) + if (i & 0b1) != 0b1: + # Bit 0 is 0, small value encoded in 4 bytes + self.reader_index = offset + 4 + return (i >> 1) + else: + # Bit 0 is 1, big value: skip flag byte and read 8 bytes + self.check_bound(offset, 9) + self.reader_index = offset + 1 + value = self.c_buffer_ptr.GetInt64(self.reader_index) + self.reader_index += 8 + return value + cdef inline write_c_buffer(self, const uint8_t* value, int32_t length): self.write_varuint32(length) if length <= 0: # access an emtpy buffer may raise out-of-bound exception. diff --git a/python/pyfory/collection.pxi b/python/pyfory/collection.pxi index 603da240e2..7c335c6e74 100644 --- a/python/pyfory/collection.pxi +++ b/python/pyfory/collection.pxi @@ -310,7 +310,7 @@ cdef class ListSerializer(CollectionSerializer): if type_id == TypeId.STRING: self._read_string(buffer, len_, list_) return list_ - elif type_id == TypeId.VAR64: + elif type_id == TypeId.VARINT64: self._read_int(buffer, len_, list_) return list_ elif type_id == TypeId.BOOL: @@ -387,7 +387,7 @@ cdef inline get_next_element( # error. if type_id == TypeId.STRING: return buffer.read_string() - elif type_id == TypeId.VAR32: + elif type_id == TypeId.VARINT32: return buffer.read_varint64() elif type_id == TypeId.BOOL: return buffer.read_bool() @@ -428,7 +428,7 @@ cdef class TupleSerializer(CollectionSerializer): if type_id == TypeId.STRING: self._read_string(buffer, len_, tuple_) return tuple_ - if type_id == TypeId.VAR64: + if type_id == TypeId.VARINT64: self._read_int(buffer, len_, tuple_) return tuple_ if type_id == TypeId.BOOL: @@ -521,7 +521,7 @@ cdef class SetSerializer(CollectionSerializer): if type_id == TypeId.STRING: self._read_string(buffer, len_, instance) return instance - if type_id == TypeId.VAR64: + if type_id == TypeId.VARINT64: self._read_int(buffer, len_, instance) return instance if type_id == TypeId.BOOL: @@ -551,7 +551,7 @@ cdef class SetSerializer(CollectionSerializer): type_id = typeinfo.type_id if type_id == TypeId.STRING: instance.add(buffer.read_string()) - elif type_id == TypeId.VAR64: + elif type_id == TypeId.VARINT64: instance.add(buffer.read_varint64()) elif type_id == TypeId.BOOL: instance.add(buffer.read_bool()) @@ -571,7 +571,7 @@ cdef class SetSerializer(CollectionSerializer): type_id = typeinfo.type_id if type_id == TypeId.STRING: instance.add(buffer.read_string()) - elif type_id == TypeId.VAR64: + elif type_id == TypeId.VARINT64: instance.add(buffer.read_varint64()) elif type_id == TypeId.BOOL: instance.add(buffer.read_bool()) @@ -593,7 +593,7 @@ cdef class SetSerializer(CollectionSerializer): type_id = typeinfo.type_id if type_id == TypeId.STRING: instance.add(buffer.read_string()) - elif type_id == TypeId.VAR64: + elif type_id == TypeId.VARINT64: instance.add(buffer.read_varint64()) elif type_id == TypeId.BOOL: instance.add(buffer.read_bool()) diff --git a/python/pyfory/format/row.pxi b/python/pyfory/format/row.pxi index ca3ed5692d..ec19a65a8c 100644 --- a/python/pyfory/format/row.pxi +++ b/python/pyfory/format/row.pxi @@ -23,7 +23,7 @@ from pyfory.includes.libformat cimport ( CGetter, CArrayData, CMapData, CRow, CTypeId, CSchema, CListType, CMapType, fory_schema ) -from pyfory._util cimport Buffer +from pyfory.buffer cimport Buffer from libcpp.memory cimport shared_ptr from libcpp.vector cimport vector from datetime import datetime, date diff --git a/python/pyfory/includes/libformat.pxd b/python/pyfory/includes/libformat.pxd index 8e1ffd4060..372effeabe 100755 --- a/python/pyfory/includes/libformat.pxd +++ b/python/pyfory/includes/libformat.pxd @@ -45,17 +45,17 @@ cdef extern from "fory/type/type.h" namespace "fory" nogil: INT8 = 2 INT16 = 3 INT32 = 4 - VAR32 = 5 + VARINT32 = 5 INT64 = 6 - VAR64 = 7 - H64 = 8 + VARINT64 = 7 + TAGGED_INT64 = 8 UINT8 = 9 UINT16 = 10 UINT32 = 11 - VARU32 = 12 + VAR_UINT32 = 12 UINT64 = 13 - VARU64 = 14 - HU64 = 15 + VAR_UINT64 = 14 + TAGGED_UINT64 = 15 FLOAT16 = 16 FLOAT32 = 17 FLOAT64 = 18 diff --git a/python/pyfory/includes/libserialization.pxd b/python/pyfory/includes/libserialization.pxd index edabe1abae..d1926911c5 100644 --- a/python/pyfory/includes/libserialization.pxd +++ b/python/pyfory/includes/libserialization.pxd @@ -28,17 +28,17 @@ cdef extern from "fory/type/type.h" namespace "fory" nogil: INT8 = 2 INT16 = 3 INT32 = 4 - VAR32 = 5 + VARINT32 = 5 INT64 = 6 - VAR64 = 7 - H64 = 8 + VARINT64 = 7 + TAGGED_INT64 = 8 UINT8 = 9 UINT16 = 10 UINT32 = 11 - VARU32 = 12 + VAR_UINT32 = 12 UINT64 = 13 - VARU64 = 14 - HU64 = 15 + VAR_UINT64 = 14 + TAGGED_UINT64 = 15 FLOAT16 = 16 FLOAT32 = 17 FLOAT64 = 18 diff --git a/python/pyfory/meta/typedef.py b/python/pyfory/meta/typedef.py index e5853b241f..1218a039d2 100644 --- a/python/pyfory/meta/typedef.py +++ b/python/pyfory/meta/typedef.py @@ -19,7 +19,7 @@ import typing from typing import List from pyfory.types import TypeId, is_primitive_type, is_polymorphic_type -from pyfory._util import Buffer +from pyfory.buffer import Buffer from pyfory.type_util import infer_field from pyfory.meta.metastring import Encoding from pyfory.type_util import infer_field_types diff --git a/python/pyfory/meta/typedef_decoder.py b/python/pyfory/meta/typedef_decoder.py index 3c84e72fc0..9838cbfd33 100644 --- a/python/pyfory/meta/typedef_decoder.py +++ b/python/pyfory/meta/typedef_decoder.py @@ -23,7 +23,7 @@ from dataclasses import make_dataclass from typing import List, Any -from pyfory._util import Buffer +from pyfory.buffer import Buffer from pyfory.meta.typedef import TypeDef, FieldInfo, FieldType from pyfory.meta.typedef import ( SMALL_NUM_FIELDS_THRESHOLD, diff --git a/python/pyfory/meta/typedef_encoder.py b/python/pyfory/meta/typedef_encoder.py index ae0e56bb65..7d09756224 100644 --- a/python/pyfory/meta/typedef_encoder.py +++ b/python/pyfory/meta/typedef_encoder.py @@ -37,7 +37,7 @@ ) from pyfory.meta.metastring import MetaStringEncoder -from pyfory._util import Buffer +from pyfory.buffer import Buffer from pyfory.lib.mmh3 import hash_buffer diff --git a/python/pyfory/primitive.pxi b/python/pyfory/primitive.pxi index 92e85cd71d..ed25317779 100644 --- a/python/pyfory/primitive.pxi +++ b/python/pyfory/primitive.pxi @@ -66,6 +66,126 @@ cdef class Int64Serializer(XlangCompatibleSerializer): return buffer.read_varint64() +@cython.final +cdef class FixedInt32Serializer(XlangCompatibleSerializer): + """Serializer for fixed-width 32-bit signed integer (INT32 type_id=4).""" + cpdef inline write(self, Buffer buffer, value): + buffer.write_int32(value) + + cpdef inline read(self, Buffer buffer): + return buffer.read_int32() + + +@cython.final +cdef class FixedInt64Serializer(XlangCompatibleSerializer): + """Serializer for fixed-width 64-bit signed integer (INT64 type_id=6).""" + cpdef inline write(self, Buffer buffer, value): + buffer.write_int64(value) + + cpdef inline read(self, Buffer buffer): + return buffer.read_int64() + + +@cython.final +cdef class Varint32Serializer(XlangCompatibleSerializer): + """Serializer for VARINT32 type - variable-length encoded signed 32-bit integer.""" + cpdef inline write(self, Buffer buffer, value): + buffer.write_varint32(value) + + cpdef inline read(self, Buffer buffer): + return buffer.read_varint32() + + +@cython.final +cdef class Varint64Serializer(XlangCompatibleSerializer): + """Serializer for VARINT64 type - variable-length encoded signed 64-bit integer.""" + cpdef inline write(self, Buffer buffer, value): + buffer.write_varint64(value) + + cpdef inline read(self, Buffer buffer): + return buffer.read_varint64() + + +@cython.final +cdef class TaggedInt64Serializer(XlangCompatibleSerializer): + """Serializer for TAGGED_INT64 type - tagged encoding for signed 64-bit integer.""" + cpdef inline write(self, Buffer buffer, value): + buffer.write_tagged_int64(value) + + cpdef inline read(self, Buffer buffer): + return buffer.read_tagged_int64() + + +@cython.final +cdef class Uint8Serializer(XlangCompatibleSerializer): + """Serializer for UINT8 type - unsigned 8-bit integer.""" + cpdef inline write(self, Buffer buffer, value): + buffer.write_uint8(value) + + cpdef inline read(self, Buffer buffer): + return buffer.read_uint8() + + +@cython.final +cdef class Uint16Serializer(XlangCompatibleSerializer): + """Serializer for UINT16 type - unsigned 16-bit integer.""" + cpdef inline write(self, Buffer buffer, value): + buffer.write_uint16(value) + + cpdef inline read(self, Buffer buffer): + return buffer.read_uint16() + + +@cython.final +cdef class Uint32Serializer(XlangCompatibleSerializer): + """Serializer for UINT32 type - fixed-size unsigned 32-bit integer.""" + cpdef inline write(self, Buffer buffer, value): + buffer.write_uint32(value) + + cpdef inline read(self, Buffer buffer): + return buffer.read_uint32() + + +@cython.final +cdef class VarUint32Serializer(XlangCompatibleSerializer): + """Serializer for VAR_UINT32 type - variable-length encoded unsigned 32-bit integer.""" + cpdef inline write(self, Buffer buffer, value): + buffer.write_varuint32(value) + + cpdef inline read(self, Buffer buffer): + return buffer.read_varuint32() + + +@cython.final +cdef class Uint64Serializer(XlangCompatibleSerializer): + """Serializer for UINT64 type - fixed-size unsigned 64-bit integer.""" + cpdef inline write(self, Buffer buffer, value): + buffer.write_uint64(value) + + cpdef inline read(self, Buffer buffer): + return buffer.read_uint64() + + +@cython.final +cdef class VarUint64Serializer(XlangCompatibleSerializer): + """Serializer for VAR_UINT64 type - variable-length encoded unsigned 64-bit integer.""" + cpdef inline write(self, Buffer buffer, value): + buffer.write_varuint64(value) + + cpdef inline read(self, Buffer buffer): + return buffer.read_varuint64() + + +@cython.final +cdef class TaggedUint64Serializer(XlangCompatibleSerializer): + """Serializer for TAGGED_UINT64 type - tagged encoding for unsigned 64-bit integer.""" + cpdef inline write(self, Buffer buffer, value): + buffer.write_tagged_uint64(value) + + cpdef inline read(self, Buffer buffer): + return buffer.read_tagged_uint64() + + @cython.final cdef class Float32Serializer(XlangCompatibleSerializer): cpdef inline write(self, Buffer buffer, value): diff --git a/python/pyfory/_registry.py b/python/pyfory/registry.py similarity index 95% rename from python/pyfory/_registry.py rename to python/pyfory/registry.py index 366b244f1b..daec5fc9b9 100644 --- a/python/pyfory/_registry.py +++ b/python/pyfory/registry.py @@ -43,6 +43,16 @@ Int16Serializer, Int32Serializer, Int64Serializer, + FixedInt32Serializer, + FixedInt64Serializer, + TaggedInt64Serializer, + Uint8Serializer, + Uint16Serializer, + Uint32Serializer, + VarUint32Serializer, + Uint64Serializer, + VarUint64Serializer, + TaggedUint64Serializer, Float32Serializer, Float64Serializer, StringSerializer, @@ -77,6 +87,16 @@ int16, int32, int64, + fixed_int32, + fixed_int64, + tagged_int64, + uint8, + uint16, + uint32, + fixed_uint32, + uint64, + fixed_uint64, + tagged_uint64, float32, float64, is_struct_type, @@ -261,11 +281,26 @@ def _initialize_common(self): register = functools.partial(self._register_type, internal=True) register(None, type_id=TypeId.UNKNOWN, serializer=NoneSerializer) register(bool, type_id=TypeId.BOOL, serializer=BooleanSerializer) + # Signed integers + # Note: int32/int64 use VARINT32/VARINT64 for xlang compatibility (matches Java/Rust) + # fixed_int32/fixed_int64 use INT32/INT64 for fixed-width encoding register(int8, type_id=TypeId.INT8, serializer=ByteSerializer) register(int16, type_id=TypeId.INT16, serializer=Int16Serializer) - register(int32, type_id=TypeId.INT32, serializer=Int32Serializer) - register(int64, type_id=TypeId.INT64, serializer=Int64Serializer) - register(int, type_id=TypeId.INT64, serializer=Int64Serializer) + register(int32, type_id=TypeId.VARINT32, serializer=Int32Serializer) + register(fixed_int32, type_id=TypeId.INT32, serializer=FixedInt32Serializer) + register(int64, type_id=TypeId.VARINT64, serializer=Int64Serializer) + register(int, type_id=TypeId.VARINT64, serializer=Int64Serializer) + register(fixed_int64, type_id=TypeId.INT64, serializer=FixedInt64Serializer) + register(tagged_int64, type_id=TypeId.TAGGED_INT64, serializer=TaggedInt64Serializer) + # Unsigned integers + register(uint8, type_id=TypeId.UINT8, serializer=Uint8Serializer) + register(uint16, type_id=TypeId.UINT16, serializer=Uint16Serializer) + register(uint32, type_id=TypeId.VAR_UINT32, serializer=VarUint32Serializer) + register(fixed_uint32, type_id=TypeId.UINT32, serializer=Uint32Serializer) + register(uint64, type_id=TypeId.VAR_UINT64, serializer=VarUint64Serializer) + register(fixed_uint64, type_id=TypeId.UINT64, serializer=Uint64Serializer) + register(tagged_uint64, type_id=TypeId.TAGGED_UINT64, serializer=TaggedUint64Serializer) + # Floats register( float32, type_id=TypeId.FLOAT32, diff --git a/python/pyfory/serialization.pyx b/python/pyfory/serialization.pyx index 6474a5bbb0..76ff98aef2 100644 --- a/python/pyfory/serialization.pyx +++ b/python/pyfory/serialization.pyx @@ -27,7 +27,7 @@ import time import warnings from typing import TypeVar, Union, Iterable -from pyfory._util import get_bit, set_bit, clear_bit +from pyfory.buffer import get_bit, set_bit, clear_bit from pyfory import _fory as fmod from pyfory._fory import Language from pyfory._fory import _ENABLE_TYPE_REGISTRATION_FORCIBLY @@ -50,7 +50,7 @@ from cpython.tuple cimport PyTuple_New, PyTuple_SET_ITEM from libcpp cimport bool as c_bool from libcpp.utility cimport pair from cython.operator cimport dereference as deref -from pyfory._util cimport Buffer +from pyfory.buffer cimport Buffer from pyfory.includes.libabsl cimport flat_hash_map from pyfory.meta.metastring import MetaStringDecoder @@ -514,7 +514,7 @@ cdef class TypeResolver: self.fory = fory self.metastring_resolver = fory.metastring_resolver self.meta_share = meta_share - from pyfory._registry import TypeResolver + from pyfory.registry import TypeResolver self._resolver = TypeResolver(fory, meta_share=meta_share, meta_compressor=meta_compressor) def initialize(self): diff --git a/python/pyfory/serializer.py b/python/pyfory/serializer.py index 6886b5fa54..d68c5736f3 100644 --- a/python/pyfory/serializer.py +++ b/python/pyfory/serializer.py @@ -51,6 +51,18 @@ Int16Serializer, Int32Serializer, Int64Serializer, + FixedInt32Serializer, + FixedInt64Serializer, + Varint32Serializer, + Varint64Serializer, + TaggedInt64Serializer, + Uint8Serializer, + Uint16Serializer, + Uint32Serializer, + VarUint32Serializer, + Uint64Serializer, + VarUint64Serializer, + TaggedUint64Serializer, Float32Serializer, Float64Serializer, StringSerializer, @@ -75,6 +87,18 @@ Int16Serializer, Int32Serializer, Int64Serializer, + FixedInt32Serializer, + FixedInt64Serializer, + Varint32Serializer, + Varint64Serializer, + TaggedInt64Serializer, + Uint8Serializer, + Uint16Serializer, + Uint32Serializer, + VarUint32Serializer, + Uint64Serializer, + VarUint64Serializer, + TaggedUint64Serializer, Float32Serializer, Float64Serializer, StringSerializer, @@ -1263,6 +1287,18 @@ def xread(self, buffer): "Int16Serializer", "Int32Serializer", "Int64Serializer", + "FixedInt32Serializer", + "FixedInt64Serializer", + "Varint32Serializer", + "Varint64Serializer", + "TaggedInt64Serializer", + "Uint8Serializer", + "Uint16Serializer", + "Uint32Serializer", + "VarUint32Serializer", + "Uint64Serializer", + "VarUint64Serializer", + "TaggedUint64Serializer", "Float32Serializer", "Float64Serializer", "StringSerializer", diff --git a/python/pyfory/struct.py b/python/pyfory/struct.py index 704fdd131f..af8f45fd8a 100644 --- a/python/pyfory/struct.py +++ b/python/pyfory/struct.py @@ -33,6 +33,16 @@ int16, int32, int64, + fixed_int32, + fixed_int64, + tagged_int64, + uint8, + uint16, + uint32, + fixed_uint32, + uint64, + fixed_uint64, + tagged_uint64, float32, float64, is_py_array_type, @@ -1007,12 +1017,26 @@ def _replace(self): basic_types = { bool, + # Signed integers int8, int16, int32, + fixed_int32, int64, + fixed_int64, + tagged_int64, + # Unsigned integers + uint8, + uint16, + uint32, + fixed_uint32, + uint64, + fixed_uint64, + tagged_uint64, + # Floats float32, float64, + # Python native types int, float, str, @@ -1130,10 +1154,16 @@ def sorter(item): def numeric_sorter(item): id_ = item[0] compress = id_ in { + # Signed compressed types TypeId.INT32, TypeId.INT64, - TypeId.VAR32, - TypeId.VAR64, + TypeId.VARINT32, + TypeId.VARINT64, + TypeId.TAGGED_INT64, + # Unsigned compressed types + TypeId.VAR_UINT32, + TypeId.VAR_UINT64, + TypeId.TAGGED_UINT64, } # Sort by: compress flag, -size (largest first), -type_id (higher type ID first), field_name # Java sorts by size (largest first), then by primitive type ID (descending) diff --git a/python/pyfory/tests/test_typedef_encoding.py b/python/pyfory/tests/test_typedef_encoding.py index 1aea0d197f..7a44b43523 100644 --- a/python/pyfory/tests/test_typedef_encoding.py +++ b/python/pyfory/tests/test_typedef_encoding.py @@ -21,7 +21,7 @@ from dataclasses import dataclass from typing import List, Dict -from pyfory._util import Buffer +from pyfory.buffer import Buffer from pyfory.meta.typedef import ( TypeDef, FieldInfo, diff --git a/python/pyfory/tests/xlang_test_main.py b/python/pyfory/tests/xlang_test_main.py index b1b49a4bd2..3b3160db87 100644 --- a/python/pyfory/tests/xlang_test_main.py +++ b/python/pyfory/tests/xlang_test_main.py @@ -1308,6 +1308,229 @@ def test_circular_ref_compatible(): f.write(new_bytes) +# ============================================================================ +# Unsigned Number Test Types +# ============================================================================ + + +@dataclass +class UnsignedSchemaConsistent: + """ + Test struct for unsigned number schema consistent tests (Python side). + Primitive fields first, then nullable boxed fields (using Optional). + + Must match Java UnsignedSchemaConsistent (type id 501). + """ + + # Primitive unsigned fields (non-nullable) + u8_field: pyfory.uint8 = 0 + u16_field: pyfory.uint16 = 0 + u32_var_field: pyfory.uint32 = 0 # VAR_UINT32 encoding + u32_fixed_field: pyfory.fixed_uint32 = 0 # Fixed 4-byte encoding + u64_var_field: pyfory.uint64 = 0 # VAR_UINT64 encoding + u64_fixed_field: pyfory.fixed_uint64 = 0 # Fixed 8-byte encoding + u64_tagged_field: pyfory.tagged_uint64 = 0 # Tagged encoding + + # Boxed nullable unsigned fields (using Optional) + u8_nullable_field: Optional[pyfory.uint8] = None + u16_nullable_field: Optional[pyfory.uint16] = None + u32_var_nullable_field: Optional[pyfory.uint32] = None + u32_fixed_nullable_field: Optional[pyfory.fixed_uint32] = None + u64_var_nullable_field: Optional[pyfory.uint64] = None + u64_fixed_nullable_field: Optional[pyfory.fixed_uint64] = None + u64_tagged_nullable_field: Optional[pyfory.tagged_uint64] = None + + +@dataclass +class UnsignedSchemaCompatible: + """ + Test struct for unsigned number schema compatible tests (Python side). + Group 1: Optional fields (nullable in Python, non-nullable in Java). + Group 2: Non-Optional fields with field2 suffix (non-nullable in Python, nullable in Java). + + Must match Java UnsignedSchemaCompatible (type id 502). + """ + + # Group 1: Optional unsigned fields (nullable in Python, non-nullable in Java) + u8_field1: Optional[pyfory.uint8] = None + u16_field1: Optional[pyfory.uint16] = None + u32_var_field1: Optional[pyfory.uint32] = None # VAR_UINT32 encoding + u32_fixed_field1: Optional[pyfory.fixed_uint32] = None # Fixed 4-byte encoding + u64_var_field1: Optional[pyfory.uint64] = None # VAR_UINT64 encoding + u64_fixed_field1: Optional[pyfory.fixed_uint64] = None # Fixed 8-byte encoding + u64_tagged_field1: Optional[pyfory.tagged_uint64] = None # Tagged encoding + + # Group 2: Non-Optional unsigned fields (non-nullable in Python, nullable in Java) + u8_field2: pyfory.uint8 = 0 + u16_field2: pyfory.uint16 = 0 + u32_var_field2: pyfory.uint32 = 0 + u32_fixed_field2: pyfory.fixed_uint32 = 0 + u64_var_field2: pyfory.uint64 = 0 + u64_fixed_field2: pyfory.fixed_uint64 = 0 + u64_tagged_field2: pyfory.tagged_uint64 = 0 + + +@dataclass +class UnsignedSchemaConsistentSimple: + """ + Simple test struct for tagged uint64 in schema consistent mode. + Must match Java UnsignedSchemaConsistentSimple (type id 1). + """ + + u64_tagged: pyfory.tagged_uint64 = 0 + u64_tagged_nullable: Optional[pyfory.tagged_uint64] = None + + +# ============================================================================ +# Unsigned Number Tests +# ============================================================================ + + +def test_unsigned_schema_consistent_simple(): + """Test simple tagged uint64 in schema consistent mode.""" + data_file = get_data_file() + with open(data_file, "rb") as f: + data_bytes = f.read() + + fory = pyfory.Fory(xlang=True, compatible=False) + fory.register_type(UnsignedSchemaConsistentSimple, type_id=1) + + expected = UnsignedSchemaConsistentSimple( + u64_tagged=1000000000, + u64_tagged_nullable=500000000, + ) + + obj = fory.deserialize(data_bytes) + debug_print(f"Deserialized: {obj}") + + assert obj.u64_tagged == expected.u64_tagged, f"u64_tagged: {obj.u64_tagged} != {expected.u64_tagged}" + assert obj.u64_tagged_nullable == expected.u64_tagged_nullable, ( + f"u64_tagged_nullable: {obj.u64_tagged_nullable} != {expected.u64_tagged_nullable}" + ) + + new_bytes = fory.serialize(obj) + with open(data_file, "wb") as f: + f.write(new_bytes) + + +def test_unsigned_schema_consistent(): + """Test unsigned number types with schema consistent mode.""" + data_file = get_data_file() + with open(data_file, "rb") as f: + data_bytes = f.read() + + fory = pyfory.Fory(xlang=True, compatible=False) + fory.register_type(UnsignedSchemaConsistent, type_id=501) + + expected = UnsignedSchemaConsistent( + # Primitive fields + u8_field=200, + u16_field=60000, + u32_var_field=3000000000, + u32_fixed_field=4000000000, + u64_var_field=10000000000, + u64_fixed_field=15000000000, + u64_tagged_field=1000000000, + # Nullable boxed fields with values + u8_nullable_field=128, + u16_nullable_field=40000, + u32_var_nullable_field=2500000000, + u32_fixed_nullable_field=3500000000, + u64_var_nullable_field=8000000000, + u64_fixed_nullable_field=12000000000, + u64_tagged_nullable_field=500000000, + ) + + obj = fory.deserialize(data_bytes) + debug_print(f"Deserialized: {obj}") + + # Verify primitive unsigned fields + assert obj.u8_field == expected.u8_field, f"u8_field: {obj.u8_field} != {expected.u8_field}" + assert obj.u16_field == expected.u16_field, f"u16_field: {obj.u16_field} != {expected.u16_field}" + assert obj.u32_var_field == expected.u32_var_field, f"u32_var_field: {obj.u32_var_field} != {expected.u32_var_field}" + assert obj.u32_fixed_field == expected.u32_fixed_field, f"u32_fixed_field: {obj.u32_fixed_field} != {expected.u32_fixed_field}" + assert obj.u64_var_field == expected.u64_var_field, f"u64_var_field: {obj.u64_var_field} != {expected.u64_var_field}" + assert obj.u64_fixed_field == expected.u64_fixed_field, f"u64_fixed_field: {obj.u64_fixed_field} != {expected.u64_fixed_field}" + assert obj.u64_tagged_field == expected.u64_tagged_field, f"u64_tagged_field: {obj.u64_tagged_field} != {expected.u64_tagged_field}" + + # Verify nullable boxed fields + assert obj.u8_nullable_field == expected.u8_nullable_field, f"u8_nullable_field: {obj.u8_nullable_field} != {expected.u8_nullable_field}" + assert obj.u16_nullable_field == expected.u16_nullable_field, f"u16_nullable_field: {obj.u16_nullable_field} != {expected.u16_nullable_field}" + assert obj.u32_var_nullable_field == expected.u32_var_nullable_field, ( + f"u32_var_nullable_field: {obj.u32_var_nullable_field} != {expected.u32_var_nullable_field}" + ) + assert obj.u32_fixed_nullable_field == expected.u32_fixed_nullable_field, ( + f"u32_fixed_nullable_field: {obj.u32_fixed_nullable_field} != {expected.u32_fixed_nullable_field}" + ) + assert obj.u64_var_nullable_field == expected.u64_var_nullable_field, ( + f"u64_var_nullable_field: {obj.u64_var_nullable_field} != {expected.u64_var_nullable_field}" + ) + assert obj.u64_fixed_nullable_field == expected.u64_fixed_nullable_field, ( + f"u64_fixed_nullable_field: {obj.u64_fixed_nullable_field} != {expected.u64_fixed_nullable_field}" + ) + assert obj.u64_tagged_nullable_field == expected.u64_tagged_nullable_field, ( + f"u64_tagged_nullable_field: {obj.u64_tagged_nullable_field} != {expected.u64_tagged_nullable_field}" + ) + + new_bytes = fory.serialize(obj) + with open(data_file, "wb") as f: + f.write(new_bytes) + + +def test_unsigned_schema_compatible(): + """Test unsigned number types with schema compatible mode.""" + data_file = get_data_file() + with open(data_file, "rb") as f: + data_bytes = f.read() + + fory = pyfory.Fory(xlang=True, compatible=True, meta_compressor=NoOpMetaCompressor()) + fory.register_type(UnsignedSchemaCompatible, type_id=502) + + expected = UnsignedSchemaCompatible( + # Group 1: Optional fields (values from Java's non-nullable fields) + u8_field1=200, + u16_field1=60000, + u32_var_field1=3000000000, + u32_fixed_field1=4000000000, + u64_var_field1=10000000000, + u64_fixed_field1=15000000000, + u64_tagged_field1=1000000000, + # Group 2: Non-Optional fields (values from Java's nullable fields) + u8_field2=128, + u16_field2=40000, + u32_var_field2=2500000000, + u32_fixed_field2=3500000000, + u64_var_field2=8000000000, + u64_fixed_field2=12000000000, + u64_tagged_field2=500000000, + ) + + obj = fory.deserialize(data_bytes) + debug_print(f"Deserialized: {obj}") + + # Verify Group 1: Optional unsigned fields + assert obj.u8_field1 == expected.u8_field1, f"u8_field1: {obj.u8_field1} != {expected.u8_field1}" + assert obj.u16_field1 == expected.u16_field1, f"u16_field1: {obj.u16_field1} != {expected.u16_field1}" + assert obj.u32_var_field1 == expected.u32_var_field1, f"u32_var_field1: {obj.u32_var_field1} != {expected.u32_var_field1}" + assert obj.u32_fixed_field1 == expected.u32_fixed_field1, f"u32_fixed_field1: {obj.u32_fixed_field1} != {expected.u32_fixed_field1}" + assert obj.u64_var_field1 == expected.u64_var_field1, f"u64_var_field1: {obj.u64_var_field1} != {expected.u64_var_field1}" + assert obj.u64_fixed_field1 == expected.u64_fixed_field1, f"u64_fixed_field1: {obj.u64_fixed_field1} != {expected.u64_fixed_field1}" + assert obj.u64_tagged_field1 == expected.u64_tagged_field1, f"u64_tagged_field1: {obj.u64_tagged_field1} != {expected.u64_tagged_field1}" + + # Verify Group 2: Non-Optional fields + assert obj.u8_field2 == expected.u8_field2, f"u8_field2: {obj.u8_field2} != {expected.u8_field2}" + assert obj.u16_field2 == expected.u16_field2, f"u16_field2: {obj.u16_field2} != {expected.u16_field2}" + assert obj.u32_var_field2 == expected.u32_var_field2, f"u32_var_field2: {obj.u32_var_field2} != {expected.u32_var_field2}" + assert obj.u32_fixed_field2 == expected.u32_fixed_field2, f"u32_fixed_field2: {obj.u32_fixed_field2} != {expected.u32_fixed_field2}" + assert obj.u64_var_field2 == expected.u64_var_field2, f"u64_var_field2: {obj.u64_var_field2} != {expected.u64_var_field2}" + assert obj.u64_fixed_field2 == expected.u64_fixed_field2, f"u64_fixed_field2: {obj.u64_fixed_field2} != {expected.u64_fixed_field2}" + assert obj.u64_tagged_field2 == expected.u64_tagged_field2, f"u64_tagged_field2: {obj.u64_tagged_field2} != {expected.u64_tagged_field2}" + + new_bytes = fory.serialize(obj) + with open(data_file, "wb") as f: + f.write(new_bytes) + + if __name__ == "__main__": """ This file is executed by PythonXlangTest.java and other cross-language tests. diff --git a/python/pyfory/type_util.py b/python/pyfory/type_util.py index 93983a1bf7..da8d6472c1 100644 --- a/python/pyfory/type_util.py +++ b/python/pyfory/type_util.py @@ -42,7 +42,7 @@ def record_class_factory(cls_name, field_names): >>> rex Dog(name='Rex', weight=32, owner='Bob') >>> Dog.__mro__ - (, ) + (, ) The factory also accepts a list or tuple of identifiers: diff --git a/python/pyfory/types.py b/python/pyfory/types.py index 7c18dafc4d..7d97d91f3e 100644 --- a/python/pyfory/types.py +++ b/python/pyfory/types.py @@ -45,13 +45,13 @@ class TypeId: # a 32-bit signed integer. INT32 = 4 # a 32-bit signed integer which uses fory var_int32 encoding. - VAR32 = 5 + VARINT32 = 5 # a 64-bit signed integer. INT64 = 6 # a 64-bit signed integer which uses fory PVL encoding. - VAR64 = 7 + VARINT64 = 7 # a 64-bit signed integer which uses fory hybrid encoding. - H64 = 8 + TAGGED_INT64 = 8 # an 8-bit unsigned integer. UINT8 = 9 # a 16-bit unsigned integer. @@ -59,13 +59,13 @@ class TypeId: # a 32-bit unsigned integer. UINT32 = 11 # a 32-bit unsigned integer which uses fory var_uint32 encoding. - VARU32 = 12 + VAR_UINT32 = 12 # a 64-bit unsigned integer. UINT64 = 13 # a 64-bit unsigned integer which uses fory var_uint64 encoding. - VARU64 = 14 + VAR_UINT64 = 14 # a 64-bit unsigned integer which uses fory hybrid encoding. - HU64 = 15 + TAGGED_UINT64 = 15 # a 16-bit floating point number. FLOAT16 = 16 # a 32-bit floating point number. @@ -169,9 +169,19 @@ def is_type_share_meta(type_id: int) -> bool: TypeId.NAMED_COMPATIBLE_STRUCT, } int8 = TypeVar("int8", bound=int) +uint8 = TypeVar("uint8", bound=int) int16 = TypeVar("int16", bound=int) +uint16 = TypeVar("uint16", bound=int) int32 = TypeVar("int32", bound=int) +uint32 = TypeVar("uint32", bound=int) +fixed_int32 = TypeVar("fixed_int32", bound=int) +fixed_uint32 = TypeVar("fixed_uint32", bound=int) int64 = TypeVar("int64", bound=int) +uint64 = TypeVar("uint64", bound=int) +fixed_int64 = TypeVar("fixed_int64", bound=int) +tagged_int64 = TypeVar("tagged_int64", bound=int) +fixed_uint64 = TypeVar("fixed_uint64", bound=int) +tagged_uint64 = TypeVar("tagged_uint64", bound=int) float32 = TypeVar("float32", bound=float) float64 = TypeVar("float64", bound=float) @@ -188,10 +198,23 @@ def is_type_share_meta(type_id: int) -> bool: _primitive_types_ids = { TypeId.BOOL, + # Signed integers TypeId.INT8, TypeId.INT16, TypeId.INT32, + TypeId.VARINT32, TypeId.INT64, + TypeId.VARINT64, + TypeId.TAGGED_INT64, + # Unsigned integers + TypeId.UINT8, + TypeId.UINT16, + TypeId.UINT32, + TypeId.VAR_UINT32, + TypeId.UINT64, + TypeId.VAR_UINT64, + TypeId.TAGGED_UINT64, + # Floats TypeId.FLOAT16, TypeId.FLOAT32, TypeId.FLOAT64, @@ -209,12 +232,23 @@ def is_primitive_type(type_) -> bool: _primitive_type_sizes = { TypeId.BOOL: 1, + # Signed integers TypeId.INT8: 1, TypeId.INT16: 2, TypeId.INT32: 4, - TypeId.VAR32: 4, + TypeId.VARINT32: 4, TypeId.INT64: 8, - TypeId.VAR64: 8, + TypeId.VARINT64: 8, + TypeId.TAGGED_INT64: 8, + # Unsigned integers + TypeId.UINT8: 1, + TypeId.UINT16: 2, + TypeId.UINT32: 4, + TypeId.VAR_UINT32: 4, + TypeId.UINT64: 8, + TypeId.VAR_UINT64: 8, + TypeId.TAGGED_UINT64: 8, + # Floats TypeId.FLOAT16: 2, TypeId.FLOAT32: 4, TypeId.FLOAT64: 8, diff --git a/python/pyfory/utils.py b/python/pyfory/utils.py index ea0865c9b7..fde19c26f0 100644 --- a/python/pyfory/utils.py +++ b/python/pyfory/utils.py @@ -20,7 +20,7 @@ import sys from typing import Dict, Callable -from pyfory._util import get_bit, set_bit, clear_bit, set_bit_to +from pyfory.buffer import get_bit, set_bit, clear_bit, set_bit_to # This method is derived from https://github.com/mars-project/mars/blob/c36c53fa22e10ef9477d9c454401a2f281375f31/mars/utils.py. diff --git a/rust/fory-core/src/buffer.rs b/rust/fory-core/src/buffer.rs index cf28e7112f..8d72f4f5e3 100644 --- a/rust/fory-core/src/buffer.rs +++ b/rust/fory-core/src/buffer.rs @@ -28,6 +28,8 @@ pub struct Writer<'a> { pub(crate) bf: &'a mut Vec, } impl<'a> Writer<'a> { + // ============ Utility methods ============ + #[inline(always)] pub fn from_buffer(bf: &'a mut Vec) -> Writer<'a> { Writer { bf } @@ -79,93 +81,92 @@ impl<'a> Writer<'a> { v.len() } + // ============ BOOL (TypeId = 1) ============ + #[inline(always)] pub fn write_bool(&mut self, value: bool) { self.bf.push(if value { 1 } else { 0 }); } - #[inline(always)] - pub fn write_u8(&mut self, value: u8) { - self.bf.push(value); - } + // ============ INT8 (TypeId = 2) ============ #[inline(always)] pub fn write_i8(&mut self, value: i8) { self.bf.push(value as u8); } - #[inline(always)] - pub fn write_u16(&mut self, value: u16) { - #[cfg(target_endian = "little")] - { - let bytes = unsafe { &*(&value as *const u16 as *const [u8; 2]) }; - self.bf.extend_from_slice(bytes); - } - #[cfg(target_endian = "big")] - { - self.bf.extend_from_slice(&value.to_le_bytes()); - } - } + // ============ INT16 (TypeId = 3) ============ #[inline(always)] pub fn write_i16(&mut self, value: i16) { self.write_u16(value as u16); } - #[inline(always)] - pub fn write_u32(&mut self, value: u32) { - #[cfg(target_endian = "little")] - { - let bytes = unsafe { &*(&value as *const u32 as *const [u8; 4]) }; - self.bf.extend_from_slice(bytes); - } - #[cfg(target_endian = "big")] - { - self.bf.extend_from_slice(&value.to_le_bytes()); - } - } + // ============ INT32 (TypeId = 4) ============ #[inline(always)] pub fn write_i32(&mut self, value: i32) { self.write_u32(value as u32); } + // ============ VARINT32 (TypeId = 5) ============ + #[inline(always)] - pub fn write_f32(&mut self, value: f32) { - #[cfg(target_endian = "little")] - { - let bytes = unsafe { &*(&value as *const f32 as *const [u8; 4]) }; - self.bf.extend_from_slice(bytes); - } - #[cfg(target_endian = "big")] - { - self.bf.extend_from_slice(&value.to_bits().to_le_bytes()); - } + pub fn write_varint32(&mut self, value: i32) { + let zigzag = ((value as i64) << 1) ^ ((value as i64) >> 31); + self._write_varuint32(zigzag as u32) } + // ============ INT64 (TypeId = 6) ============ + #[inline(always)] pub fn write_i64(&mut self, value: i64) { self.write_u64(value as u64); } + // ============ VARINT64 (TypeId = 7) ============ + #[inline(always)] - pub fn write_f64(&mut self, value: f64) { - #[cfg(target_endian = "little")] - { - let bytes = unsafe { &*(&value as *const f64 as *const [u8; 8]) }; - self.bf.extend_from_slice(bytes); - } - #[cfg(target_endian = "big")] - { - self.bf.extend_from_slice(&value.to_bits().to_le_bytes()); + pub fn write_varint64(&mut self, value: i64) { + let zigzag = ((value << 1) ^ (value >> 63)) as u64; + self._write_varuint64(zigzag); + } + + // ============ TAGGED_INT64 (TypeId = 8) ============ + + /// Write signed long using fory Tagged(Small long as int) encoding. + /// If value is in [0xc0000000, 0x3fffffff] (i.e., [-1073741824, 1073741823]), + /// encode as 4 bytes: `((value as i32) << 1)`. + /// Otherwise write as 9 bytes: `0b1 | little-endian 8 bytes i64`. + #[inline(always)] + pub fn write_tagged_i64(&mut self, value: i64) { + const HALF_MIN_INT_VALUE: i64 = i32::MIN as i64 / 2; // -1073741824 + const HALF_MAX_INT_VALUE: i64 = i32::MAX as i64 / 2; // 1073741823 + if (HALF_MIN_INT_VALUE..=HALF_MAX_INT_VALUE).contains(&value) { + // Fits in 31 bits (with sign), encode as 4 bytes with bit 0 = 0 + let v = (value as i32) << 1; + self.write_i32(v); + } else { + // Write flag byte (0b1) followed by 8-byte i64 + self.bf.push(0b1); + self.write_i64(value); } } + // ============ UINT8 (TypeId = 9) ============ + #[inline(always)] - pub fn write_u64(&mut self, value: u64) { + pub fn write_u8(&mut self, value: u8) { + self.bf.push(value); + } + + // ============ UINT16 (TypeId = 10) ============ + + #[inline(always)] + pub fn write_u16(&mut self, value: u16) { #[cfg(target_endian = "little")] { - let bytes = unsafe { &*(&value as *const u64 as *const [u8; 8]) }; + let bytes = unsafe { &*(&value as *const u16 as *const [u8; 2]) }; self.bf.extend_from_slice(bytes); } #[cfg(target_endian = "big")] @@ -174,22 +175,13 @@ impl<'a> Writer<'a> { } } - #[inline(always)] - pub fn write_usize(&mut self, value: usize) { - const SIZE: usize = std::mem::size_of::(); - match SIZE { - 2 => self.write_u16(value as u16), - 4 => self.write_varuint32(value as u32), - 8 => self.write_varuint64(value as u64), - _ => unreachable!("unsupported usize size"), - } - } + // ============ UINT32 (TypeId = 11) ============ #[inline(always)] - pub fn write_u128(&mut self, value: u128) { + pub fn write_u32(&mut self, value: u32) { #[cfg(target_endian = "little")] { - let bytes = unsafe { &*(&value as *const u128 as *const [u8; 16]) }; + let bytes = unsafe { &*(&value as *const u32 as *const [u8; 4]) }; self.bf.extend_from_slice(bytes); } #[cfg(target_endian = "big")] @@ -198,27 +190,7 @@ impl<'a> Writer<'a> { } } - #[inline(always)] - pub fn write_i128(&mut self, value: i128) { - self.write_u128(value as u128); - } - - #[inline(always)] - pub fn write_isize(&mut self, value: isize) { - const SIZE: usize = std::mem::size_of::(); - match SIZE { - 2 => self.write_i16(value as i16), - 4 => self.write_varint32(value as i32), - 8 => self.write_varint64(value as i64), - _ => unreachable!("unsupported isize size"), - } - } - - #[inline(always)] - pub fn write_varint32(&mut self, value: i32) { - let zigzag = ((value as i64) << 1) ^ ((value as i64) >> 31); - self._write_varuint32(zigzag as u32) - } + // ============ VAR_UINT32 (TypeId = 12) ============ #[inline(always)] pub fn write_varuint32(&mut self, value: u32) { @@ -264,12 +236,23 @@ impl<'a> Writer<'a> { } } + // ============ UINT64 (TypeId = 13) ============ + #[inline(always)] - pub fn write_varint64(&mut self, value: i64) { - let zigzag = ((value << 1) ^ (value >> 63)) as u64; - self._write_varuint64(zigzag); + pub fn write_u64(&mut self, value: u64) { + #[cfg(target_endian = "little")] + { + let bytes = unsafe { &*(&value as *const u64 as *const [u8; 8]) }; + self.bf.extend_from_slice(bytes); + } + #[cfg(target_endian = "big")] + { + self.bf.extend_from_slice(&value.to_le_bytes()); + } } + // ============ VAR_UINT64 (TypeId = 14) ============ + #[inline(always)] pub fn write_varuint64(&mut self, value: u64) { self._write_varuint64(value); @@ -374,6 +357,108 @@ impl<'a> Writer<'a> { } } + // ============ TAGGED_UINT64 (TypeId = 15) ============ + + /// Write unsigned long using fory Tagged(Small long as int) encoding. + /// If value is in [0, 0x7fffffff], encode as 4 bytes: `((value as u32) << 1)`. + /// Otherwise write as 9 bytes: `0b1 | little-endian 8 bytes u64`. + #[inline(always)] + pub fn write_tagged_u64(&mut self, value: u64) { + if value <= i32::MAX as u64 { + // Fits in 31 bits, encode as 4 bytes with bit 0 = 0 + let v = (value as u32) << 1; + self.write_u32(v); + } else { + // Write flag byte (0b1) followed by 8-byte u64 + self.bf.push(0b1); + self.write_u64(value); + } + } + + // ============ FLOAT32 (TypeId = 17) ============ + + #[inline(always)] + pub fn write_f32(&mut self, value: f32) { + #[cfg(target_endian = "little")] + { + let bytes = unsafe { &*(&value as *const f32 as *const [u8; 4]) }; + self.bf.extend_from_slice(bytes); + } + #[cfg(target_endian = "big")] + { + self.bf.extend_from_slice(&value.to_bits().to_le_bytes()); + } + } + + // ============ FLOAT64 (TypeId = 18) ============ + + #[inline(always)] + pub fn write_f64(&mut self, value: f64) { + #[cfg(target_endian = "little")] + { + let bytes = unsafe { &*(&value as *const f64 as *const [u8; 8]) }; + self.bf.extend_from_slice(bytes); + } + #[cfg(target_endian = "big")] + { + self.bf.extend_from_slice(&value.to_bits().to_le_bytes()); + } + } + + // ============ STRING (TypeId = 19) ============ + + #[inline(always)] + pub fn write_utf8_string(&mut self, s: &str) { + let bytes = s.as_bytes(); + let len = bytes.len(); + self.bf.reserve(len); + self.bf.extend_from_slice(bytes); + } + + // ============ Rust-specific types (i128, u128, isize, usize) ============ + + #[inline(always)] + pub fn write_i128(&mut self, value: i128) { + self.write_u128(value as u128); + } + + #[inline(always)] + pub fn write_u128(&mut self, value: u128) { + #[cfg(target_endian = "little")] + { + let bytes = unsafe { &*(&value as *const u128 as *const [u8; 16]) }; + self.bf.extend_from_slice(bytes); + } + #[cfg(target_endian = "big")] + { + self.bf.extend_from_slice(&value.to_le_bytes()); + } + } + + #[inline(always)] + pub fn write_isize(&mut self, value: isize) { + const SIZE: usize = std::mem::size_of::(); + match SIZE { + 2 => self.write_i16(value as i16), + 4 => self.write_varint32(value as i32), + 8 => self.write_varint64(value as i64), + _ => unreachable!("unsupported isize size"), + } + } + + #[inline(always)] + pub fn write_usize(&mut self, value: usize) { + const SIZE: usize = std::mem::size_of::(); + match SIZE { + 2 => self.write_u16(value as u16), + 4 => self.write_varuint32(value as u32), + 8 => self.write_varuint64(value as u64), + _ => unreachable!("unsupported usize size"), + } + } + + // ============ Other helper methods ============ + #[inline(always)] pub fn write_varuint36_small(&mut self, value: u64) { assert!(value < (1u64 << 36), "value too large for 36-bit varint"); @@ -407,14 +492,6 @@ impl<'a> Writer<'a> { self.write_u64(combined); } } - - #[inline(always)] - pub fn write_utf8_string(&mut self, s: &str) { - let bytes = s.as_bytes(); - let len = bytes.len(); - self.bf.reserve(len); - self.bf.extend_from_slice(bytes); - } } #[derive(Default)] @@ -426,6 +503,8 @@ pub struct Reader<'a> { #[allow(clippy::needless_lifetimes)] impl<'a> Reader<'a> { + // ============ Utility methods ============ + #[inline(always)] pub fn new(bf: &[u8]) -> Reader<'_> { Reader { bf, cursor: 0 } @@ -478,10 +557,6 @@ impl<'a> Reader<'a> { #[inline(always)] fn check_bound(&self, n: usize) -> Result<(), Error> { - // The upper layer guarantees it is non-null - // if self.bf.is_null() { - // return Err(Error::invalid_data("buffer pointer is null")); - // } if self.cursor + n > self.bf.len() { Err(Error::buffer_out_of_bound(self.cursor, n, self.bf.len())) } else { @@ -489,11 +564,6 @@ impl<'a> Reader<'a> { } } - #[inline(always)] - pub fn read_bool(&mut self) -> Result { - Ok(self.read_u8()? != 0) - } - #[inline(always)] fn read_u8_uncheck(&mut self) -> u8 { let result = unsafe { self.bf.get_unchecked(self.cursor) }; @@ -502,113 +572,143 @@ impl<'a> Reader<'a> { } #[inline(always)] - pub fn peek_u8(&mut self) -> Result { - let result = self.value_at(self.cursor)?; - Ok(result) + pub fn skip(&mut self, len: usize) -> Result<(), Error> { + self.check_bound(len)?; + self.move_next(len); + Ok(()) } #[inline(always)] - pub fn read_u8(&mut self) -> Result { - let result = self.value_at(self.cursor)?; - self.move_next(1); + pub fn read_bytes(&mut self, len: usize) -> Result<&[u8], Error> { + self.check_bound(len)?; + let result = &self.bf[self.cursor..self.cursor + len]; + self.move_next(len); Ok(result) } #[inline(always)] - pub fn read_i8(&mut self) -> Result { - Ok(self.read_u8()? as i8) + pub fn reset_cursor_to_here(&self) -> impl FnOnce(&mut Self) { + let raw_cursor = self.cursor; + move |this: &mut Self| { + this.cursor = raw_cursor; + } } + pub fn set_cursor(&mut self, cursor: usize) { + self.cursor = cursor; + } + + // ============ BOOL (TypeId = 1) ============ + #[inline(always)] - pub fn read_u16(&mut self) -> Result { - let slice = self.slice_after_cursor(); - let result = LittleEndian::read_u16(slice); - self.cursor += 2; - Ok(result) + pub fn read_bool(&mut self) -> Result { + Ok(self.read_u8()? != 0) } + // ============ INT8 (TypeId = 2) ============ + #[inline(always)] - pub fn read_i16(&mut self) -> Result { - Ok(self.read_u16()? as i16) + pub fn read_i8(&mut self) -> Result { + Ok(self.read_u8()? as i8) } + // ============ INT16 (TypeId = 3) ============ + #[inline(always)] - pub fn read_u32(&mut self) -> Result { - let slice = self.slice_after_cursor(); - let result = LittleEndian::read_u32(slice); - self.cursor += 4; - Ok(result) + pub fn read_i16(&mut self) -> Result { + Ok(self.read_u16()? as i16) } + // ============ INT32 (TypeId = 4) ============ + #[inline(always)] pub fn read_i32(&mut self) -> Result { Ok(self.read_u32()? as i32) } + // ============ VARINT32 (TypeId = 5) ============ + #[inline(always)] - pub fn read_u64(&mut self) -> Result { - let slice = self.slice_after_cursor(); - let result = LittleEndian::read_u64(slice); - self.cursor += 8; - Ok(result) + pub fn read_varint32(&mut self) -> Result { + let encoded = self.read_varuint32()?; + Ok(((encoded >> 1) as i32) ^ -((encoded & 1) as i32)) } + // ============ INT64 (TypeId = 6) ============ + #[inline(always)] - pub fn read_usize(&mut self) -> Result { - const SIZE: usize = std::mem::size_of::(); - match SIZE { - 2 => Ok(self.read_u16()? as usize), - 4 => Ok(self.read_varuint32()? as usize), - 8 => Ok(self.read_varuint64()? as usize), - _ => unreachable!("unsupported usize size"), - } + pub fn read_i64(&mut self) -> Result { + Ok(self.read_u64()? as i64) } + // ============ VARINT64 (TypeId = 7) ============ + #[inline(always)] - pub fn read_u128(&mut self) -> Result { - let slice = self.slice_after_cursor(); - let result = LittleEndian::read_u128(slice); - self.cursor += 16; - Ok(result) + pub fn read_varint64(&mut self) -> Result { + let encoded = self.read_varuint64()?; + Ok(((encoded >> 1) as i64) ^ -((encoded & 1) as i64)) } + // ============ TAGGED_INT64 (TypeId = 8) ============ + + /// Read signed fory Tagged(Small long as int) encoded i64. + /// If bit 0 of the first 4 bytes is 0, return the value >> 1 (arithmetic shift). + /// Otherwise, skip the flag byte and read 8 bytes as i64. #[inline(always)] - pub fn read_i128(&mut self) -> Result { - Ok(self.read_u128()? as i128) + pub fn read_tagged_i64(&mut self) -> Result { + self.check_bound(4)?; + let i = LittleEndian::read_i32(&self.bf[self.cursor..]); + if (i & 0b1) != 0b1 { + // Bit 0 is 0, small value encoded in 4 bytes + self.cursor += 4; + Ok((i >> 1) as i64) // arithmetic right shift preserves sign + } else { + // Bit 0 is 1, big value: skip flag byte and read 8 bytes + self.check_bound(9)?; + self.cursor += 1; + let value = LittleEndian::read_i64(&self.bf[self.cursor..]); + self.cursor += 8; + Ok(value) + } } + // ============ UINT8 (TypeId = 9) ============ + #[inline(always)] - pub fn read_isize(&mut self) -> Result { - const SIZE: usize = std::mem::size_of::(); - match SIZE { - 2 => Ok(self.read_i16()? as isize), - 4 => Ok(self.read_varint32()? as isize), - 8 => Ok(self.read_varint64()? as isize), - _ => unreachable!("unsupported isize size"), - } + pub fn peek_u8(&mut self) -> Result { + let result = self.value_at(self.cursor)?; + Ok(result) } #[inline(always)] - pub fn read_i64(&mut self) -> Result { - Ok(self.read_u64()? as i64) + pub fn read_u8(&mut self) -> Result { + let result = self.value_at(self.cursor)?; + self.move_next(1); + Ok(result) } + // ============ UINT16 (TypeId = 10) ============ + #[inline(always)] - pub fn read_f32(&mut self) -> Result { + pub fn read_u16(&mut self) -> Result { let slice = self.slice_after_cursor(); - let result = LittleEndian::read_f32(slice); - self.cursor += 4; + let result = LittleEndian::read_u16(slice); + self.cursor += 2; Ok(result) } + // ============ UINT32 (TypeId = 11) ============ + #[inline(always)] - pub fn read_f64(&mut self) -> Result { + pub fn read_u32(&mut self) -> Result { let slice = self.slice_after_cursor(); - let result = LittleEndian::read_f64(slice); - self.cursor += 8; + let result = LittleEndian::read_u32(slice); + self.cursor += 4; Ok(result) } + // ============ VAR_UINT32 (TypeId = 12) ============ + #[inline(always)] pub fn read_varuint32(&mut self) -> Result { let b0 = self.value_at(self.cursor)? as u32; @@ -644,12 +744,18 @@ impl<'a> Reader<'a> { Ok(encoded) } + // ============ UINT64 (TypeId = 13) ============ + #[inline(always)] - pub fn read_varint32(&mut self) -> Result { - let encoded = self.read_varuint32()?; - Ok(((encoded >> 1) as i32) ^ -((encoded & 1) as i32)) + pub fn read_u64(&mut self) -> Result { + let slice = self.slice_after_cursor(); + let result = LittleEndian::read_u64(slice); + self.cursor += 8; + Ok(result) } + // ============ VAR_UINT64 (TypeId = 14) ============ + #[inline(always)] pub fn read_varuint64(&mut self) -> Result { let b0 = self.value_at(self.cursor)? as u64; @@ -713,12 +819,51 @@ impl<'a> Reader<'a> { Ok(var64) } + // ============ TAGGED_UINT64 (TypeId = 15) ============ + + /// Read unsigned fory Tagged(Small long as int) encoded u64. + /// If bit 0 of the first 4 bytes is 0, return the value >> 1. + /// Otherwise, skip the flag byte and read 8 bytes as u64. #[inline(always)] - pub fn read_varint64(&mut self) -> Result { - let encoded = self.read_varuint64()?; - Ok(((encoded >> 1) as i64) ^ -((encoded & 1) as i64)) + pub fn read_tagged_u64(&mut self) -> Result { + self.check_bound(4)?; + let i = LittleEndian::read_u32(&self.bf[self.cursor..]); + if (i & 0b1) != 0b1 { + // Bit 0 is 0, small value encoded in 4 bytes + self.cursor += 4; + Ok((i >> 1) as u64) + } else { + // Bit 0 is 1, big value: skip flag byte and read 8 bytes + self.check_bound(9)?; + self.cursor += 1; + let value = LittleEndian::read_u64(&self.bf[self.cursor..]); + self.cursor += 8; + Ok(value) + } } + // ============ FLOAT32 (TypeId = 17) ============ + + #[inline(always)] + pub fn read_f32(&mut self) -> Result { + let slice = self.slice_after_cursor(); + let result = LittleEndian::read_f32(slice); + self.cursor += 4; + Ok(result) + } + + // ============ FLOAT64 (TypeId = 18) ============ + + #[inline(always)] + pub fn read_f64(&mut self) -> Result { + let slice = self.slice_after_cursor(); + let result = LittleEndian::read_f64(slice); + self.cursor += 8; + Ok(result) + } + + // ============ STRING (TypeId = 19) ============ + #[inline(always)] pub fn read_latin1_string(&mut self, len: usize) -> Result { self.check_bound(len)?; @@ -796,6 +941,45 @@ impl<'a> Reader<'a> { Ok(String::from_utf16_lossy(&units)) } + // ============ Rust-specific types (i128, u128, isize, usize) ============ + + #[inline(always)] + pub fn read_i128(&mut self) -> Result { + Ok(self.read_u128()? as i128) + } + + #[inline(always)] + pub fn read_u128(&mut self) -> Result { + let slice = self.slice_after_cursor(); + let result = LittleEndian::read_u128(slice); + self.cursor += 16; + Ok(result) + } + + #[inline(always)] + pub fn read_isize(&mut self) -> Result { + const SIZE: usize = std::mem::size_of::(); + match SIZE { + 2 => Ok(self.read_i16()? as isize), + 4 => Ok(self.read_varint32()? as isize), + 8 => Ok(self.read_varint64()? as isize), + _ => unreachable!("unsupported isize size"), + } + } + + #[inline(always)] + pub fn read_usize(&mut self) -> Result { + const SIZE: usize = std::mem::size_of::(); + match SIZE { + 2 => Ok(self.read_u16()? as usize), + 4 => Ok(self.read_varuint32()? as usize), + 8 => Ok(self.read_varuint64()? as usize), + _ => unreachable!("unsupported usize size"), + } + } + + // ============ Other helper methods ============ + #[inline(always)] pub fn read_varuint36small(&mut self) -> Result { let start = self.cursor; @@ -842,33 +1026,6 @@ impl<'a> Reader<'a> { } Ok(result) } - - #[inline(always)] - pub fn skip(&mut self, len: usize) -> Result<(), Error> { - self.check_bound(len)?; - self.move_next(len); - Ok(()) - } - - #[inline(always)] - pub fn read_bytes(&mut self, len: usize) -> Result<&[u8], Error> { - self.check_bound(len)?; - let result = &self.bf[self.cursor..self.cursor + len]; - self.move_next(len); - Ok(result) - } - - #[inline(always)] - pub fn reset_cursor_to_here(&self) -> impl FnOnce(&mut Self) { - let raw_cursor = self.cursor; - move |this: &mut Self| { - this.cursor = raw_cursor; - } - } - - pub fn set_cursor(&mut self, cursor: usize) { - self.cursor = cursor; - } } #[allow(clippy::needless_lifetimes)] diff --git a/rust/fory-core/src/meta/type_meta.rs b/rust/fory-core/src/meta/type_meta.rs index da253eac2d..4ead56cbb3 100644 --- a/rust/fory-core/src/meta/type_meta.rs +++ b/rust/fory-core/src/meta/type_meta.rs @@ -622,17 +622,17 @@ impl TypeMeta { TypeId::INT8 => 1, TypeId::INT16 => 2, TypeId::INT32 => 4, - TypeId::VAR32 => 4, + TypeId::VARINT32 => 4, TypeId::INT64 => 8, - TypeId::VAR64 => 8, - TypeId::H64 => 8, + TypeId::VARINT64 => 8, + TypeId::TAGGED_INT64 => 8, TypeId::UINT8 => 1, TypeId::UINT16 => 2, TypeId::UINT32 => 4, - TypeId::VARU32 => 4, + TypeId::VAR_UINT32 => 4, TypeId::UINT64 => 8, - TypeId::VARU64 => 8, - TypeId::HU64 => 8, + TypeId::VAR_UINT64 => 8, + TypeId::TAGGED_UINT64 => 8, TypeId::FLOAT16 => 2, TypeId::FLOAT32 => 4, TypeId::FLOAT64 => 8, @@ -644,14 +644,15 @@ impl TypeMeta { } } fn is_compress(type_id: u32) -> bool { - // Only signed integer types are marked as compressible - // to maintain backward compatibility with field ordering + // Variable-size integer types (both signed and unsigned) + // These are sorted after fixed-size types in field ordering [ - TypeId::INT32 as u32, - TypeId::INT64 as u32, - TypeId::VAR32 as u32, - TypeId::VAR64 as u32, - TypeId::H64 as u32, + TypeId::VARINT32 as u32, + TypeId::VARINT64 as u32, + TypeId::TAGGED_INT64 as u32, + TypeId::VAR_UINT32 as u32, + TypeId::VAR_UINT64 as u32, + TypeId::TAGGED_UINT64 as u32, ] .contains(&type_id) } @@ -669,7 +670,7 @@ impl TypeMeta { .cmp(&b_nullable) // non-nullable first .then_with(|| compress_a.cmp(&compress_b)) // fixed-size (false) first, then variable-size (true) last .then_with(|| size_b.cmp(&size_a)) // when same compress status: larger size first - .then_with(|| a_id.cmp(&b_id)) // when same size: smaller type id first + .then_with(|| b_id.cmp(&a_id)) // when same size: larger type id first .then_with(|| a_field_name.cmp(b_field_name)) // when same id: lexicographic name } fn type_then_name_sorter(a: &FieldInfo, b: &FieldInfo) -> std::cmp::Ordering { @@ -749,8 +750,28 @@ impl TypeMeta { } fn assign_field_ids(type_info_current: &TypeInfo, field_infos: &mut [FieldInfo]) { + if crate::util::ENABLE_FORY_DEBUG_OUTPUT { + eprintln!( + "[fory-debug] assign_field_ids called for type: {:?}", + type_info_current.get_type_name() + ); + for f in field_infos.iter() { + eprintln!( + "[fory-debug] remote field before assign: name={}, field_id={}, type={:?}", + f.field_name, f.field_id, f.field_type + ); + } + } let type_meta = type_info_current.get_type_meta(); let local_field_infos = type_meta.get_field_infos(); + if crate::util::ENABLE_FORY_DEBUG_OUTPUT { + for f in local_field_infos.iter() { + eprintln!( + "[fory-debug] local field: name={}, field_id={}, type={:?}", + f.field_name, f.field_id, f.field_type + ); + } + } // Build maps for both name-based and ID-based lookup. // The value is the SORTED INDEX (position in local_field_infos), not the field's ID attribute. @@ -792,13 +813,31 @@ impl TypeMeta { // Use FieldType comparison which normalizes type IDs for cross-language // schema evolution (e.g., UNKNOWN=0 matches STRUCT variants) if field.field_type != local_info.field_type { + if crate::util::ENABLE_FORY_DEBUG_OUTPUT { + eprintln!( + "[fory-debug] field type mismatch: name={}, remote_type={:?}, local_type={:?}", + field.field_name, field.field_type, local_info.field_type + ); + } field.field_id = -1; // Type mismatch, skip } else { // Assign SORTED INDEX for matching in generated code field.field_id = sorted_index as i16; + if crate::util::ENABLE_FORY_DEBUG_OUTPUT { + eprintln!( + "[fory-debug] matched field: name={}, assigned_field_id={}", + field.field_name, field.field_id + ); + } } } None => { + if crate::util::ENABLE_FORY_DEBUG_OUTPUT { + eprintln!( + "[fory-debug] no local match for field: name={}", + field.field_name + ); + } field.field_id = -1; // No match, skip } } diff --git a/rust/fory-core/src/resolver/type_resolver.rs b/rust/fory-core/src/resolver/type_resolver.rs index a6d2aa487a..fc5d4c0fd8 100644 --- a/rust/fory-core/src/resolver/type_resolver.rs +++ b/rust/fory-core/src/resolver/type_resolver.rs @@ -590,16 +590,16 @@ impl TypeResolver { self.register_internal_serializer::(TypeId::BOOL)?; self.register_internal_serializer::(TypeId::INT8)?; self.register_internal_serializer::(TypeId::INT16)?; - self.register_internal_serializer::(TypeId::INT32)?; - self.register_internal_serializer::(TypeId::INT64)?; + self.register_internal_serializer::(TypeId::VARINT32)?; + self.register_internal_serializer::(TypeId::VARINT64)?; self.register_internal_serializer::(TypeId::ISIZE)?; self.register_internal_serializer::(TypeId::INT128)?; self.register_internal_serializer::(TypeId::FLOAT32)?; self.register_internal_serializer::(TypeId::FLOAT64)?; self.register_internal_serializer::(TypeId::UINT8)?; self.register_internal_serializer::(TypeId::UINT16)?; - self.register_internal_serializer::(TypeId::UINT32)?; - self.register_internal_serializer::(TypeId::UINT64)?; + self.register_internal_serializer::(TypeId::VAR_UINT32)?; + self.register_internal_serializer::(TypeId::VAR_UINT64)?; self.register_internal_serializer::(TypeId::USIZE)?; self.register_internal_serializer::(TypeId::U128)?; self.register_internal_serializer::(TypeId::STRING)?; diff --git a/rust/fory-core/src/serializer/list.rs b/rust/fory-core/src/serializer/list.rs index ed9a29b945..7bb84cd674 100644 --- a/rust/fory-core/src/serializer/list.rs +++ b/rust/fory-core/src/serializer/list.rs @@ -39,14 +39,18 @@ pub(super) fn get_primitive_type_id() -> TypeId { TypeId::BOOL => TypeId::BOOL_ARRAY, TypeId::INT8 => TypeId::INT8_ARRAY, TypeId::INT16 => TypeId::INT16_ARRAY, - TypeId::INT32 => TypeId::INT32_ARRAY, - TypeId::INT64 => TypeId::INT64_ARRAY, + // Handle both INT32 and VARINT32 (i32 uses VARINT32 in xlang mode) + TypeId::INT32 | TypeId::VARINT32 => TypeId::INT32_ARRAY, + // Handle INT64, VARINT64, and TAGGED_INT64 (i64 uses VARINT64 in xlang mode) + TypeId::INT64 | TypeId::VARINT64 | TypeId::TAGGED_INT64 => TypeId::INT64_ARRAY, TypeId::FLOAT32 => TypeId::FLOAT32_ARRAY, TypeId::FLOAT64 => TypeId::FLOAT64_ARRAY, TypeId::UINT8 => TypeId::BINARY, TypeId::UINT16 => TypeId::UINT16_ARRAY, - TypeId::UINT32 => TypeId::UINT32_ARRAY, - TypeId::UINT64 => TypeId::UINT64_ARRAY, + // Handle both UINT32 and VAR_UINT32 (u32 uses VAR_UINT32 in xlang mode) + TypeId::UINT32 | TypeId::VAR_UINT32 => TypeId::UINT32_ARRAY, + // Handle UINT64, VAR_UINT64, and TAGGED_UINT64 (u64 uses VAR_UINT64 in xlang mode) + TypeId::UINT64 | TypeId::VAR_UINT64 | TypeId::TAGGED_UINT64 => TypeId::UINT64_ARRAY, TypeId::U128 => TypeId::U128_ARRAY, TypeId::INT128 => TypeId::INT128_ARRAY, TypeId::USIZE => TypeId::USIZE_ARRAY, @@ -66,14 +70,20 @@ pub(super) fn is_primitive_type() -> bool { | TypeId::INT8 | TypeId::INT16 | TypeId::INT32 + | TypeId::VARINT32 | TypeId::INT64 + | TypeId::VARINT64 + | TypeId::TAGGED_INT64 | TypeId::INT128 | TypeId::FLOAT32 | TypeId::FLOAT64 | TypeId::UINT8 | TypeId::UINT16 | TypeId::UINT32 + | TypeId::VAR_UINT32 | TypeId::UINT64 + | TypeId::VAR_UINT64 + | TypeId::TAGGED_UINT64 | TypeId::U128, ) } diff --git a/rust/fory-core/src/serializer/number.rs b/rust/fory-core/src/serializer/number.rs index 1e8aa66de0..7e62aecac9 100644 --- a/rust/fory-core/src/serializer/number.rs +++ b/rust/fory-core/src/serializer/number.rs @@ -89,13 +89,13 @@ impl_num_serializer!( i32, Writer::write_varint32, Reader::read_varint32, - TypeId::INT32 + TypeId::VARINT32 ); impl_num_serializer!( i64, Writer::write_varint64, Reader::read_varint64, - TypeId::INT64 + TypeId::VARINT64 ); impl_num_serializer!(f32, Writer::write_f32, Reader::read_f32, TypeId::FLOAT32); impl_num_serializer!(f64, Writer::write_f64, Reader::read_f64, TypeId::FLOAT64); diff --git a/rust/fory-core/src/serializer/skip.rs b/rust/fory-core/src/serializer/skip.rs index b17d9e0b8b..5c839fa173 100644 --- a/rust/fory-core/src/serializer/skip.rs +++ b/rust/fory-core/src/serializer/skip.rs @@ -465,126 +465,283 @@ fn skip_value( } } - // Match on built-in types + // Match on built-in types (ordered by TypeId enum values) match type_id_num { - // Basic types + // ============ UNKNOWN (TypeId = 0) ============ + types::UNKNOWN => { + // UNKNOWN is used for polymorphic types in cross-language serialization + return skip_any_value(context, false); + } + + // ============ BOOL (TypeId = 1) ============ types::BOOL => { ::fory_read_data(context)?; } + + // ============ INT8 (TypeId = 2) ============ types::INT8 => { ::fory_read_data(context)?; } + + // ============ INT16 (TypeId = 3) ============ types::INT16 => { ::fory_read_data(context)?; } + + // ============ INT32 (TypeId = 4) ============ types::INT32 => { + context.reader.read_i32()?; + } + + // ============ VARINT32 (TypeId = 5) ============ + types::VARINT32 => { ::fory_read_data(context)?; } + + // ============ INT64 (TypeId = 6) ============ types::INT64 => { + context.reader.read_i64()?; + } + + // ============ VARINT64 (TypeId = 7) ============ + types::VARINT64 => { ::fory_read_data(context)?; } + + // ============ TAGGED_INT64 (TypeId = 8) ============ + types::TAGGED_INT64 => { + context.reader.read_tagged_i64()?; + } + + // ============ UINT8 (TypeId = 9) ============ + types::UINT8 => { + ::fory_read_data(context)?; + } + + // ============ UINT16 (TypeId = 10) ============ + types::UINT16 => { + ::fory_read_data(context)?; + } + + // ============ UINT32 (TypeId = 11) ============ + types::UINT32 => { + context.reader.read_u32()?; + } + + // ============ VAR_UINT32 (TypeId = 12) ============ + types::VAR_UINT32 => { + ::fory_read_data(context)?; + } + + // ============ UINT64 (TypeId = 13) ============ + types::UINT64 => { + context.reader.read_u64()?; + } + + // ============ VAR_UINT64 (TypeId = 14) ============ + types::VAR_UINT64 => { + ::fory_read_data(context)?; + } + + // ============ TAGGED_UINT64 (TypeId = 15) ============ + types::TAGGED_UINT64 => { + context.reader.read_tagged_u64()?; + } + + // ============ FLOAT32 (TypeId = 17) ============ types::FLOAT32 => { ::fory_read_data(context)?; } + + // ============ FLOAT64 (TypeId = 18) ============ types::FLOAT64 => { ::fory_read_data(context)?; } + + // ============ STRING (TypeId = 19) ============ types::STRING => { ::fory_read_data(context)?; } - types::LOCAL_DATE => { - ::fory_read_data(context)?; + + // ============ LIST (TypeId = 20) ============ + // ============ SET (TypeId = 21) ============ + types::LIST | types::SET => { + return skip_collection(context, field_type); } - types::TIMESTAMP => { - ::fory_read_data(context)?; + + // ============ MAP (TypeId = 22) ============ + types::MAP => { + return skip_map(context, field_type); + } + + // ============ ENUM (TypeId = 23) ============ + types::ENUM => { + let _ordinal = context.reader.read_varuint32()?; + } + + // ============ NAMED_ENUM (TypeId = 24) ============ + types::NAMED_ENUM => { + let _ordinal = context.reader.read_varuint32()?; + } + + // ============ STRUCT (TypeId = 25) ============ + types::STRUCT => { + return skip_struct(context, type_id_num, type_info); + } + + // ============ COMPATIBLE_STRUCT (TypeId = 26) ============ + types::COMPATIBLE_STRUCT => { + return skip_struct(context, type_id_num, type_info); + } + + // ============ NAMED_STRUCT (TypeId = 27) ============ + types::NAMED_STRUCT => { + return skip_struct(context, type_id_num, type_info); + } + + // ============ NAMED_COMPATIBLE_STRUCT (TypeId = 28) ============ + types::NAMED_COMPATIBLE_STRUCT => { + return skip_struct(context, type_id_num, type_info); + } + + // ============ EXT (TypeId = 29) ============ + types::EXT => { + return skip_ext(context, type_id_num, type_info); + } + + // ============ NAMED_EXT (TypeId = 30) ============ + types::NAMED_EXT => { + return skip_ext(context, type_id_num, type_info); + } + + // ============ UNION (TypeId = 31) ============ + types::UNION => { + // UNION format: index (varuint32) + value (xreadRef) + let _ = context.reader.read_varuint32()?; + return skip_any_value(context, true); + } + + // ============ NONE (TypeId = 32) ============ + types::NONE => { + // NONE represents an empty/unit value with no data - nothing to skip + return Ok(()); } + + // ============ DURATION (TypeId = 33) ============ types::DURATION => { ::fory_read_data(context)?; } + + // ============ TIMESTAMP (TypeId = 34) ============ + types::TIMESTAMP => { + ::fory_read_data(context)?; + } + + // ============ LOCAL_DATE (TypeId = 35) ============ + types::LOCAL_DATE => { + ::fory_read_data(context)?; + } + + // ============ BINARY (TypeId = 37) ============ types::BINARY => { as Serializer>::fory_read_data(context)?; } + + // ============ BOOL_ARRAY (TypeId = 39) ============ types::BOOL_ARRAY => { as Serializer>::fory_read_data(context)?; } + + // ============ INT8_ARRAY (TypeId = 40) ============ types::INT8_ARRAY => { as Serializer>::fory_read_data(context)?; } + + // ============ INT16_ARRAY (TypeId = 41) ============ types::INT16_ARRAY => { as Serializer>::fory_read_data(context)?; } + + // ============ INT32_ARRAY (TypeId = 42) ============ types::INT32_ARRAY => { as Serializer>::fory_read_data(context)?; } + + // ============ INT64_ARRAY (TypeId = 43) ============ types::INT64_ARRAY => { as Serializer>::fory_read_data(context)?; } - types::FLOAT32_ARRAY => { - as Serializer>::fory_read_data(context)?; - } - types::FLOAT64_ARRAY => { - as Serializer>::fory_read_data(context)?; - } - types::UINT8 => { - ::fory_read_data(context)?; - } - types::UINT16 => { - ::fory_read_data(context)?; - } - types::UINT32 => { - ::fory_read_data(context)?; - } - types::UINT64 => { - ::fory_read_data(context)?; - } - types::U128 => { - ::fory_read_data(context)?; + + // ============ UINT8_ARRAY (TypeId = 44) ============ + types::UINT8_ARRAY => { + as Serializer>::fory_read_data(context)?; } + + // ============ UINT16_ARRAY (TypeId = 45) ============ types::UINT16_ARRAY => { as Serializer>::fory_read_data(context)?; } + + // ============ UINT32_ARRAY (TypeId = 46) ============ types::UINT32_ARRAY => { as Serializer>::fory_read_data(context)?; } + + // ============ UINT64_ARRAY (TypeId = 47) ============ types::UINT64_ARRAY => { as Serializer>::fory_read_data(context)?; } - types::U128_ARRAY => { - as Serializer>::fory_read_data(context)?; + + // ============ FLOAT32_ARRAY (TypeId = 49) ============ + types::FLOAT32_ARRAY => { + as Serializer>::fory_read_data(context)?; } - // Container types - types::LIST | types::SET => { - return skip_collection(context, field_type); + // ============ FLOAT64_ARRAY (TypeId = 50) ============ + types::FLOAT64_ARRAY => { + as Serializer>::fory_read_data(context)?; } - types::MAP => { - return skip_map(context, field_type); + + // ============ Rust-specific types ============ + + // ============ U128 (TypeId = 64) ============ + types::U128 => { + ::fory_read_data(context)?; } - // Named types - types::NAMED_ENUM => { - let _ordinal = context.reader.read_varuint32()?; + // ============ INT128 (TypeId = 65) ============ + types::INT128 => { + ::fory_read_data(context)?; } - types::NAMED_COMPATIBLE_STRUCT => { - return skip_struct(context, type_id_num, type_info); + + // ============ USIZE (TypeId = 66) ============ + types::USIZE => { + ::fory_read_data(context)?; } - types::NAMED_EXT => { - return skip_ext(context, type_id_num, type_info); + + // ============ ISIZE (TypeId = 67) ============ + types::ISIZE => { + ::fory_read_data(context)?; } - types::UNKNOWN => { - // UNKNOWN (0) is used for polymorphic types in cross-language serialization - return skip_any_value(context, false); + + // ============ U128_ARRAY (TypeId = 68) ============ + types::U128_ARRAY => { + as Serializer>::fory_read_data(context)?; } - types::NONE => { - // NONE represents an empty/unit value with no data - nothing to skip - return Ok(()); + + // ============ INT128_ARRAY (TypeId = 69) ============ + types::INT128_ARRAY => { + as Serializer>::fory_read_data(context)?; } - types::UNION => { - // UNION format: index (varuint32) + value (xreadRef) - // Skip the index - let _ = context.reader.read_varuint32()?; - // Skip the value (which is written via xwriteRef) - return skip_any_value(context, true); + + // ============ USIZE_ARRAY (TypeId = 70) ============ + types::USIZE_ARRAY => { + as Serializer>::fory_read_data(context)?; + } + + // ============ ISIZE_ARRAY (TypeId = 71) ============ + types::ISIZE_ARRAY => { + as Serializer>::fory_read_data(context)?; } _ => { diff --git a/rust/fory-core/src/serializer/unsigned_number.rs b/rust/fory-core/src/serializer/unsigned_number.rs index bff0c759d9..2c859d9fb7 100644 --- a/rust/fory-core/src/serializer/unsigned_number.rs +++ b/rust/fory-core/src/serializer/unsigned_number.rs @@ -153,8 +153,18 @@ macro_rules! impl_rust_unsigned_num_serializer { // xlang-compatible unsigned types impl_xlang_unsigned_num_serializer!(u8, Writer::write_u8, Reader::read_u8, TypeId::UINT8); impl_xlang_unsigned_num_serializer!(u16, Writer::write_u16, Reader::read_u16, TypeId::UINT16); -impl_xlang_unsigned_num_serializer!(u32, Writer::write_u32, Reader::read_u32, TypeId::UINT32); -impl_xlang_unsigned_num_serializer!(u64, Writer::write_u64, Reader::read_u64, TypeId::UINT64); +impl_xlang_unsigned_num_serializer!( + u32, + Writer::write_varuint32, + Reader::read_varuint32, + TypeId::VAR_UINT32 +); +impl_xlang_unsigned_num_serializer!( + u64, + Writer::write_varuint64, + Reader::read_varuint64, + TypeId::VAR_UINT64 +); // Rust-specific unsigned types (not supported in xlang mode) impl_rust_unsigned_num_serializer!(u128, Writer::write_u128, Reader::read_u128, TypeId::U128); diff --git a/rust/fory-core/src/types.rs b/rust/fory-core/src/types.rs index e64f0ce376..aae3443311 100644 --- a/rust/fory-core/src/types.rs +++ b/rust/fory-core/src/types.rs @@ -104,17 +104,17 @@ pub enum TypeId { INT8 = 2, INT16 = 3, INT32 = 4, - VAR32 = 5, + VARINT32 = 5, INT64 = 6, - VAR64 = 7, - H64 = 8, + VARINT64 = 7, + TAGGED_INT64 = 8, UINT8 = 9, UINT16 = 10, UINT32 = 11, - VARU32 = 12, + VAR_UINT32 = 12, UINT64 = 13, - VARU64 = 14, - HU64 = 15, + VAR_UINT64 = 14, + TAGGED_UINT64 = 15, FLOAT16 = 16, FLOAT32 = 17, FLOAT64 = 18, @@ -175,17 +175,17 @@ pub const BOOL: u32 = TypeId::BOOL as u32; pub const INT8: u32 = TypeId::INT8 as u32; pub const INT16: u32 = TypeId::INT16 as u32; pub const INT32: u32 = TypeId::INT32 as u32; -pub const VAR32: u32 = TypeId::VAR32 as u32; +pub const VARINT32: u32 = TypeId::VARINT32 as u32; pub const INT64: u32 = TypeId::INT64 as u32; -pub const VAR64: u32 = TypeId::VAR64 as u32; -pub const H64: u32 = TypeId::H64 as u32; +pub const VARINT64: u32 = TypeId::VARINT64 as u32; +pub const TAGGED_INT64: u32 = TypeId::TAGGED_INT64 as u32; pub const UINT8: u32 = TypeId::UINT8 as u32; pub const UINT16: u32 = TypeId::UINT16 as u32; pub const UINT32: u32 = TypeId::UINT32 as u32; -pub const VARU32: u32 = TypeId::VARU32 as u32; +pub const VAR_UINT32: u32 = TypeId::VAR_UINT32 as u32; pub const UINT64: u32 = TypeId::UINT64 as u32; -pub const VARU64: u32 = TypeId::VARU64 as u32; -pub const HU64: u32 = TypeId::HU64 as u32; +pub const VAR_UINT64: u32 = TypeId::VAR_UINT64 as u32; +pub const TAGGED_UINT64: u32 = TypeId::TAGGED_UINT64 as u32; pub const FLOAT16: u32 = TypeId::FLOAT16 as u32; pub const FLOAT32: u32 = TypeId::FLOAT32 as u32; pub const FLOAT64: u32 = TypeId::FLOAT64 as u32; @@ -298,22 +298,30 @@ pub static BASIC_TYPES: [TypeId; 33] = [ TypeId::USIZE_ARRAY, ]; -pub static PRIMITIVE_TYPES: [u32; 14] = [ +pub static PRIMITIVE_TYPES: [u32; 22] = [ TypeId::BOOL as u32, TypeId::INT8 as u32, TypeId::INT16 as u32, TypeId::INT32 as u32, + TypeId::VARINT32 as u32, TypeId::INT64 as u32, + TypeId::VARINT64 as u32, + TypeId::TAGGED_INT64 as u32, TypeId::UINT8 as u32, TypeId::UINT16 as u32, TypeId::UINT32 as u32, + TypeId::VAR_UINT32 as u32, TypeId::UINT64 as u32, + TypeId::VAR_UINT64 as u32, + TypeId::TAGGED_UINT64 as u32, + TypeId::FLOAT16 as u32, TypeId::FLOAT32 as u32, TypeId::FLOAT64 as u32, // Rust-specific TypeId::U128 as u32, TypeId::INT128 as u32, TypeId::USIZE as u32, + TypeId::ISIZE as u32, ]; pub static PRIMITIVE_ARRAY_TYPES: [u32; 15] = [ @@ -544,17 +552,17 @@ pub fn format_type_id(type_id: u32) -> String { 2 => "INT8", 3 => "INT16", 4 => "INT32", - 5 => "VAR32", + 5 => "VARINT32", 6 => "INT64", - 7 => "VAR64", - 8 => "H64", + 7 => "VARINT64", + 8 => "TAGGED_INT64", 9 => "UINT8", 10 => "UINT16", 11 => "UINT32", - 12 => "VARU32", + 12 => "VAR_UINT32", 13 => "UINT64", - 14 => "VARU64", - 15 => "HU64", + 14 => "VAR_UINT64", + 15 => "TAGGED_UINT64", 16 => "FLOAT16", 17 => "FLOAT32", 18 => "FLOAT64", diff --git a/rust/fory-derive/src/object/field_meta.rs b/rust/fory-derive/src/object/field_meta.rs index ff1a68793d..4ccb5db065 100644 --- a/rust/fory-derive/src/object/field_meta.rs +++ b/rust/fory-derive/src/object/field_meta.rs @@ -22,7 +22,13 @@ //! - `nullable`: Whether the field can be null (default: false, except Option/RcWeak/ArcWeak) //! - `ref`: Whether to enable reference tracking (default: false, except Rc/Arc/RcWeak/ArcWeak) //! - `skip`: Skip this field during serialization +//! - `compress`: For i32/u32 fields: true (VARINT32/VAR_UINT32) or false (INT32/UINT32 fixed) +//! - `encoding`: For i32/u32: "varint", "fixed"; for u64: "varint", "fixed", "tagged" +//! +//! Both `compress` and `encoding` are converted to a `type_id` internally. If both are +//! specified, they must not conflict. +use fory_core::types::TypeId; use quote::ToTokens; use std::collections::HashMap; use syn::{Field, GenericArgument, PathArguments, Type}; @@ -35,9 +41,12 @@ pub struct ForyFieldMeta { /// Whether the field can be null (None = use type-based default) pub nullable: Option, /// Whether to enable reference tracking (None = use type-based default) - pub ref_tracking: Option, + pub r#ref: Option, /// Whether to skip this field entirely pub skip: bool, + /// Explicit type ID for encoding (e.g., INT32 vs VARINT32, UINT32 vs VAR_UINT32, etc.) + /// This is set by `compress` or `encoding` attributes. + pub type_id: Option, } /// Type classification for determining default nullable/ref behavior @@ -79,8 +88,8 @@ impl ForyFieldMeta { /// Defaults: /// - `Rc`, `Arc`, `RcWeak`, `ArcWeak`: true (shared ownership types) /// - All other types: false - pub fn effective_ref_tracking(&self, type_class: FieldTypeClass) -> bool { - self.ref_tracking.unwrap_or(matches!( + pub fn effective_ref(&self, type_class: FieldTypeClass) -> bool { + self.r#ref.unwrap_or(matches!( type_class, FieldTypeClass::Rc | FieldTypeClass::Arc @@ -100,9 +109,26 @@ impl ForyFieldMeta { } } +/// Encoding specified via `compress` attribute +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum CompressEncoding { + Varint, + Fixed, +} + +/// Encoding specified via `encoding` attribute +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum ExplicitEncoding { + Varint, + Fixed, + Tagged, +} + /// Parse `#[fory(...)]` attributes from a field pub fn parse_field_meta(field: &Field) -> syn::Result { let mut meta = ForyFieldMeta::default(); + let mut compress_encoding: Option = None; + let mut explicit_encoding: Option = None; for attr in &field.attrs { if !attr.path().is_ident("fory") { @@ -122,14 +148,85 @@ pub fn parse_field_meta(field: &Field) -> syn::Result { meta.nullable = Some(value); } else if nested.path.is_ident("ref") { let value = parse_bool_or_flag(&nested)?; - meta.ref_tracking = Some(value); + meta.r#ref = Some(value); } else if nested.path.is_ident("skip") { meta.skip = true; + } else if nested.path.is_ident("compress") { + let value = parse_bool_or_flag(&nested)?; + compress_encoding = Some(if value { + CompressEncoding::Varint + } else { + CompressEncoding::Fixed + }); + } else if nested.path.is_ident("encoding") { + let lit: syn::LitStr = nested.value()?.parse()?; + let encoding_str = lit.value(); + explicit_encoding = Some(match encoding_str.as_str() { + "varint" => ExplicitEncoding::Varint, + "fixed" => ExplicitEncoding::Fixed, + "tagged" => ExplicitEncoding::Tagged, + _ => { + return Err(syn::Error::new( + lit.span(), + "encoding must be \"varint\", \"fixed\", or \"tagged\"", + )); + } + }); } Ok(()) })?; } + // Validate that compress and encoding don't conflict if both are specified + if let (Some(compress), Some(explicit)) = (compress_encoding, explicit_encoding) { + let compress_implies = match compress { + CompressEncoding::Varint => ExplicitEncoding::Varint, + CompressEncoding::Fixed => ExplicitEncoding::Fixed, + }; + // Only check conflict for varint/fixed (tagged is only for u64) + if explicit != ExplicitEncoding::Tagged && compress_implies != explicit { + let compress_str = match compress { + CompressEncoding::Varint => "true", + CompressEncoding::Fixed => "false", + }; + let encoding_str = match explicit { + ExplicitEncoding::Varint => "varint", + ExplicitEncoding::Fixed => "fixed", + ExplicitEncoding::Tagged => "tagged", + }; + return Err(syn::Error::new_spanned( + field, + format!( + "conflicting attributes: compress={} implies {} encoding, but encoding=\"{}\" was specified", + compress_str, + match compress { + CompressEncoding::Varint => "varint", + CompressEncoding::Fixed => "fixed", + }, + encoding_str + ), + )); + } + } + + // Convert encoding to type_id + // Priority: explicit_encoding > compress_encoding + // Note: The actual type_id depends on the field type (i32, u32, u64), but we store + // a "canonical" type_id here. The util.rs code will interpret it correctly. + if let Some(explicit) = explicit_encoding { + meta.type_id = Some(match explicit { + // For varint, we use the signed variant as canonical; util.rs adjusts for unsigned + ExplicitEncoding::Varint => TypeId::VARINT32 as i16, + ExplicitEncoding::Fixed => TypeId::INT32 as i16, + ExplicitEncoding::Tagged => TypeId::TAGGED_UINT64 as i16, + }); + } else if let Some(compress) = compress_encoding { + meta.type_id = Some(match compress { + CompressEncoding::Varint => TypeId::VARINT32 as i16, + CompressEncoding::Fixed => TypeId::INT32 as i16, + }); + } + Ok(meta) } @@ -188,7 +285,7 @@ fn extract_outer_type_name(ty: &Type) -> String { } /// Extract the inner type from `Option` -fn extract_option_inner_type(ty: &Type) -> Option { +pub fn extract_option_inner_type(ty: &Type) -> Option { if let Type::Path(type_path) = ty { if let Some(seg) = type_path.path.segments.last() { if seg.ident == "Option" { @@ -247,13 +344,13 @@ pub fn classify_field_type(ty: &Type) -> FieldTypeClass { } } -/// Get nullable and ref tracking flags for a field based on its type and metadata +/// Get nullable and ref flags for a field based on its type and metadata #[allow(dead_code)] pub fn get_field_flags(field: &Field, meta: &ForyFieldMeta) -> (bool, bool) { let type_class = classify_field_type(&field.ty); let nullable = meta.effective_nullable(type_class); - let ref_tracking = meta.effective_ref_tracking(type_class); - (nullable, ref_tracking) + let ref_flag = meta.effective_ref(type_class); + (nullable, ref_flag) } /// Parse field metadata for all fields and validate @@ -303,7 +400,7 @@ mod tests { let meta = parse_field_meta(&field).unwrap(); assert_eq!(meta.id, Some(0)); assert_eq!(meta.nullable, None); - assert_eq!(meta.ref_tracking, None); + assert_eq!(meta.r#ref, None); assert!(!meta.skip); } @@ -316,7 +413,7 @@ mod tests { let meta = parse_field_meta(&field).unwrap(); assert_eq!(meta.id, Some(1)); assert_eq!(meta.nullable, Some(true)); - assert_eq!(meta.ref_tracking, Some(false)); + assert_eq!(meta.r#ref, Some(false)); } #[test] @@ -328,7 +425,7 @@ mod tests { let meta = parse_field_meta(&field).unwrap(); assert_eq!(meta.id, Some(2)); assert_eq!(meta.nullable, Some(true)); - assert_eq!(meta.ref_tracking, Some(true)); + assert_eq!(meta.r#ref, Some(true)); } #[test] @@ -433,19 +530,19 @@ mod tests { } #[test] - fn test_effective_ref_tracking_defaults() { + fn test_effective_ref_defaults() { let meta = ForyFieldMeta::default(); // Rc, Arc, and RcWeak/ArcWeak have ref tracking by default - assert!(meta.effective_ref_tracking(FieldTypeClass::Rc)); - assert!(meta.effective_ref_tracking(FieldTypeClass::Arc)); - assert!(meta.effective_ref_tracking(FieldTypeClass::RcWeak)); - assert!(meta.effective_ref_tracking(FieldTypeClass::ArcWeak)); + assert!(meta.effective_ref(FieldTypeClass::Rc)); + assert!(meta.effective_ref(FieldTypeClass::Arc)); + assert!(meta.effective_ref(FieldTypeClass::RcWeak)); + assert!(meta.effective_ref(FieldTypeClass::ArcWeak)); // All others don't have ref tracking by default - assert!(!meta.effective_ref_tracking(FieldTypeClass::Primitive)); - assert!(!meta.effective_ref_tracking(FieldTypeClass::Option)); - assert!(!meta.effective_ref_tracking(FieldTypeClass::Other)); + assert!(!meta.effective_ref(FieldTypeClass::Primitive)); + assert!(!meta.effective_ref(FieldTypeClass::Option)); + assert!(!meta.effective_ref(FieldTypeClass::Other)); } #[test] @@ -454,8 +551,9 @@ mod tests { let meta = ForyFieldMeta { id: Some(0), nullable: Some(true), - ref_tracking: None, + r#ref: None, skip: false, + type_id: None, }; assert!(meta.effective_nullable(FieldTypeClass::Primitive)); // Would be false by default @@ -463,9 +561,159 @@ mod tests { let meta = ForyFieldMeta { id: Some(0), nullable: None, - ref_tracking: Some(false), + r#ref: Some(false), skip: false, + type_id: None, + }; + assert!(!meta.effective_ref(FieldTypeClass::Rc)); // Would be true by default + } + + #[test] + fn test_parse_compress_attribute() { + // compress=false sets type_id to INT32 (fixed encoding) + let field: Field = parse_quote! { + #[fory(compress = false)] + value: u32 + }; + let meta = parse_field_meta(&field).unwrap(); + assert_eq!(meta.type_id, Some(TypeId::INT32 as i16)); + + // compress=true sets type_id to VARINT32 (variable encoding) + let field: Field = parse_quote! { + #[fory(compress = true)] + value: u32 + }; + let meta = parse_field_meta(&field).unwrap(); + assert_eq!(meta.type_id, Some(TypeId::VARINT32 as i16)); + + // Standalone compress flag should set to varint + let field: Field = parse_quote! { + #[fory(compress)] + value: u32 + }; + let meta = parse_field_meta(&field).unwrap(); + assert_eq!(meta.type_id, Some(TypeId::VARINT32 as i16)); + } + + #[test] + fn test_parse_encoding_attribute() { + // encoding="varint" sets type_id to VARINT32 + let field: Field = parse_quote! { + #[fory(encoding = "varint")] + value: u64 + }; + let meta = parse_field_meta(&field).unwrap(); + assert_eq!(meta.type_id, Some(TypeId::VARINT32 as i16)); + + // encoding="fixed" sets type_id to INT32 + let field: Field = parse_quote! { + #[fory(encoding = "fixed")] + value: u64 + }; + let meta = parse_field_meta(&field).unwrap(); + assert_eq!(meta.type_id, Some(TypeId::INT32 as i16)); + + // encoding="tagged" sets type_id to TAGGED_UINT64 + let field: Field = parse_quote! { + #[fory(encoding = "tagged")] + value: u64 + }; + let meta = parse_field_meta(&field).unwrap(); + assert_eq!(meta.type_id, Some(TypeId::TAGGED_UINT64 as i16)); + } + + #[test] + fn test_parse_encoding_for_i32_u32() { + // encoding="varint" for i32/u32 + let field: Field = parse_quote! { + #[fory(encoding = "varint")] + value: i32 + }; + let meta = parse_field_meta(&field).unwrap(); + assert_eq!(meta.type_id, Some(TypeId::VARINT32 as i16)); + + // encoding="fixed" for i32/u32 + let field: Field = parse_quote! { + #[fory(encoding = "fixed")] + value: u32 + }; + let meta = parse_field_meta(&field).unwrap(); + assert_eq!(meta.type_id, Some(TypeId::INT32 as i16)); + } + + #[test] + fn test_compress_encoding_no_conflict() { + // compress=true with encoding="varint" - no conflict + let field: Field = parse_quote! { + #[fory(compress = true, encoding = "varint")] + value: i32 + }; + let meta = parse_field_meta(&field); + assert!(meta.is_ok()); + let meta = meta.unwrap(); + assert_eq!(meta.type_id, Some(TypeId::VARINT32 as i16)); + + // compress=false with encoding="fixed" - no conflict + let field: Field = parse_quote! { + #[fory(compress = false, encoding = "fixed")] + value: u32 + }; + let meta = parse_field_meta(&field); + assert!(meta.is_ok()); + let meta = meta.unwrap(); + assert_eq!(meta.type_id, Some(TypeId::INT32 as i16)); + } + + #[test] + fn test_compress_encoding_conflict() { + // compress=true with encoding="fixed" - conflict! + let field: Field = parse_quote! { + #[fory(compress = true, encoding = "fixed")] + value: i32 }; - assert!(!meta.effective_ref_tracking(FieldTypeClass::Rc)); // Would be true by default + let result = parse_field_meta(&field); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.to_string().contains("conflicting")); + + // compress=false with encoding="varint" - conflict! + let field: Field = parse_quote! { + #[fory(compress = false, encoding = "varint")] + value: u32 + }; + let result = parse_field_meta(&field); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.to_string().contains("conflicting")); + } + + #[test] + fn test_parse_combined_attributes() { + // nullable with compress=false + let field: Field = parse_quote! { + #[fory(nullable, compress = false)] + value: Option + }; + let meta = parse_field_meta(&field).unwrap(); + assert_eq!(meta.nullable, Some(true)); + assert_eq!(meta.type_id, Some(TypeId::INT32 as i16)); + + // nullable with encoding="tagged" (for u64) + let field: Field = parse_quote! { + #[fory(nullable, encoding = "tagged")] + value: Option + }; + let meta = parse_field_meta(&field).unwrap(); + assert_eq!(meta.nullable, Some(true)); + assert_eq!(meta.type_id, Some(TypeId::TAGGED_UINT64 as i16)); + + // encoding="fixed" for Option + let field: Field = parse_quote! { + #[fory(nullable, encoding = "fixed")] + value: Option + }; + let meta = parse_field_meta(&field).unwrap(); + assert_eq!(meta.nullable, Some(true)); + assert_eq!(meta.type_id, Some(TypeId::INT32 as i16)); } } diff --git a/rust/fory-derive/src/object/misc.rs b/rust/fory-derive/src/object/misc.rs index 582f82e8a0..34d84c3c16 100644 --- a/rust/fory-derive/src/object/misc.rs +++ b/rust/fory-derive/src/object/misc.rs @@ -20,7 +20,9 @@ use quote::quote; use std::sync::atomic::{AtomicU32, Ordering}; use syn::Field; -use super::field_meta::{classify_field_type, is_option_type, parse_field_meta}; +use super::field_meta::{ + classify_field_type, extract_option_inner_type, is_option_type, parse_field_meta, +}; use super::util::{ classify_trait_object_field, generic_tree_to_tokens, get_filtered_source_fields_iter, get_sort_fields_ts, parse_generic_tree, StructField, @@ -87,7 +89,7 @@ pub fn gen_field_fields_info(source_fields: &[SourceField<'_>]) -> TokenStream { // but we also need to detect that the outer wrapper is Option for nullable. let is_outer_option = is_option_type(ty); let nullable = meta.effective_nullable(type_class) || is_outer_option; - let ref_tracking = meta.effective_ref_tracking(type_class); + let ref_tracking = meta.effective_ref(type_class); // Only use explicit field ID when user sets #[fory(id = N)] // Otherwise use -1 to indicate field name encoding should be used let field_id = if meta.uses_tag_id() { @@ -98,19 +100,76 @@ pub fn gen_field_fields_info(source_fields: &[SourceField<'_>]) -> TokenStream { match classify_trait_object_field(ty) { StructField::None => { - let generic_tree = parse_generic_tree(ty); - let generic_token = generic_tree_to_tokens(&generic_tree); - quote! { - fory_core::meta::FieldInfo::new_with_id( - #field_id, - #name, + // Check if this is an i32/u32/u64 field (or Option/Option/Option) with encoding attributes + // In this case, we need to generate the FieldType with the correct type ID directly + let inner_ty = extract_option_inner_type(ty).unwrap_or_else(|| ty.clone()); + let inner_ty_str = quote::ToTokens::to_token_stream(&inner_ty) + .to_string() + .replace(' ', ""); + + let has_encoding = + (inner_ty_str == "i32" || inner_ty_str == "u32" || inner_ty_str == "u64") + && meta.type_id.is_some(); + + if has_encoding { + // Generate FieldType directly with the correct type ID based on meta.type_id + let type_id_ts = match (inner_ty_str.as_str(), meta.type_id) { + // i32: VARINT32 (default) or INT32 (fixed) + ("i32", Some(tid)) if tid == fory_core::types::TypeId::INT32 as i16 => { + quote! { fory_core::types::TypeId::INT32 as u32 } + } + ("i32", _) => { + quote! { fory_core::types::TypeId::VARINT32 as u32 } + } + // u32: VAR_UINT32 (default) or UINT32 (fixed) + ("u32", Some(tid)) if tid == fory_core::types::TypeId::INT32 as i16 => { + quote! { fory_core::types::TypeId::UINT32 as u32 } + } + ("u32", _) => { + quote! { fory_core::types::TypeId::VAR_UINT32 as u32 } + } + // u64: VAR_UINT64 (default), UINT64 (fixed), or TAGGED_UINT64 (tagged) + ("u64", Some(tid)) if tid == fory_core::types::TypeId::INT32 as i16 => { + quote! { fory_core::types::TypeId::UINT64 as u32 } + } + ("u64", Some(tid)) + if tid == fory_core::types::TypeId::TAGGED_UINT64 as i16 => { - let mut ft = #generic_token; - ft.nullable = #nullable; - ft.ref_tracking = #ref_tracking; - ft + quote! { fory_core::types::TypeId::TAGGED_UINT64 as u32 } + } + ("u64", _) => { + quote! { fory_core::types::TypeId::VAR_UINT64 as u32 } } - ) + _ => unreachable!(), + }; + + quote! { + fory_core::meta::FieldInfo::new_with_id( + #field_id, + #name, + fory_core::meta::FieldType { + type_id: #type_id_ts, + nullable: #nullable, + ref_tracking: #ref_tracking, + generics: Vec::new() + } + ) + } + } else { + let generic_tree = parse_generic_tree(ty); + let generic_token = generic_tree_to_tokens(&generic_tree); + quote! { + fory_core::meta::FieldInfo::new_with_id( + #field_id, + #name, + { + let mut ft = #generic_token; + ft.nullable = #nullable; + ft.ref_tracking = #ref_tracking; + ft + } + ) + } } } StructField::VecBox(_) | StructField::VecRc(_) | StructField::VecArc(_) => { diff --git a/rust/fory-derive/src/object/read.rs b/rust/fory-derive/src/object/read.rs index 2e7aa12f50..a76156cd7b 100644 --- a/rust/fory-derive/src/object/read.rs +++ b/rust/fory-derive/src/object/read.rs @@ -19,14 +19,140 @@ use proc_macro2::{Ident, TokenStream}; use quote::{format_ident, quote}; use syn::Field; +use super::field_meta::{extract_option_inner_type, parse_field_meta}; use super::util::{ classify_trait_object_field, create_wrapper_types_arc, create_wrapper_types_rc, determine_field_ref_mode, extract_type_name, gen_struct_version_hash_ts, - get_primitive_reader_method, get_struct_name, is_debug_enabled, is_direct_primitive_type, - is_primitive_type, is_skip_field, should_skip_type_info_for_field, FieldRefMode, StructField, + get_option_inner_primitive_name, get_primitive_reader_method_with_encoding, get_struct_name, + is_debug_enabled, is_direct_primitive_type, is_option_encoding_primitive, is_primitive_type, + is_skip_field, should_skip_type_info_for_field, FieldRefMode, StructField, }; use crate::util::SourceField; +/// Check if a type is a primitive type that needs special compatible mode handling +/// Returns the type name if it's u8, u16, u32, or u64 (or Option) +fn is_compatible_primitive_type(ty: &syn::Type) -> Option<&'static str> { + let inner_ty = extract_option_inner_type(ty).unwrap_or_else(|| ty.clone()); + let inner_ty_str = quote::ToTokens::to_token_stream(&inner_ty) + .to_string() + .replace(' ', ""); + match inner_ty_str.as_str() { + "u8" => Some("u8"), + "u16" => Some("u16"), + "u32" => Some("u32"), + "u64" => Some("u64"), + _ => None, + } +} + +/// Check if a type is u32 or u64 (for encoding-aware reading) +fn is_unsigned_encoding_type(ty: &syn::Type) -> Option<&'static str> { + let inner_ty = extract_option_inner_type(ty).unwrap_or_else(|| ty.clone()); + let inner_ty_str = quote::ToTokens::to_token_stream(&inner_ty) + .to_string() + .replace(' ', ""); + match inner_ty_str.as_str() { + "u32" => Some("u32"), + "u64" => Some("u64"), + _ => None, + } +} + +/// Generate compatible mode read code for u32/u64 fields based on remote type_id +fn gen_compatible_unsigned_read( + unsigned_type: &str, + var_name: &Ident, + is_option: bool, +) -> TokenStream { + let read_value = if unsigned_type == "u32" { + quote! { + // Read u32 based on remote type_id + match _field.field_type.type_id { + fory_core::types::UINT32 => context.reader.read_u32()?, + fory_core::types::VAR_UINT32 => context.reader.read_varuint32()?, + _ => context.reader.read_varuint32()?, // Default to varint + } + } + } else { + // u64 + quote! { + // Read u64 based on remote type_id + match _field.field_type.type_id { + fory_core::types::UINT64 => context.reader.read_u64()?, + fory_core::types::VAR_UINT64 => context.reader.read_varuint64()?, + fory_core::types::TAGGED_UINT64 => context.reader.read_tagged_u64()?, + _ => context.reader.read_varuint64()?, // Default to varint + } + } + }; + + if is_option { + quote! { + let read_ref_flag = fory_core::serializer::util::field_need_write_ref_into( + _field.field_type.type_id, + _field.field_type.nullable, + ); + if read_ref_flag { + let ref_flag = context.reader.read_i8()?; + if ref_flag == fory_core::RefFlag::Null as i8 { + #var_name = Some(None); + } else { + #var_name = Some(Some(#read_value)); + } + } else { + #var_name = Some(Some(#read_value)); + } + } + } else { + quote! { + let read_ref_flag = fory_core::serializer::util::field_need_write_ref_into( + _field.field_type.type_id, + _field.field_type.nullable, + ); + if read_ref_flag { + let ref_flag = context.reader.read_i8()?; + if ref_flag == fory_core::RefFlag::Null as i8 { + // Remote sent null but local field is non-nullable, use default + #var_name = 0; + } else { + #var_name = #read_value; + } + } else { + #var_name = #read_value; + } + } + } +} + +/// Generate compatible mode read code for u8/u16 Option fields +/// These need special handling because when remote field is non-nullable, +/// Java sends just the raw bytes without a ref flag +fn gen_compatible_primitive_option_read(prim_type: &str, var_name: &Ident) -> TokenStream { + let read_value = match prim_type { + "u8" => quote! { context.reader.read_u8()? }, + "u16" => quote! { context.reader.read_u16()? }, + _ => unreachable!("Only u8/u16 should use this function"), + }; + + quote! { + let read_ref_flag = fory_core::serializer::util::field_need_write_ref_into( + _field.field_type.type_id, + _field.field_type.nullable, + ); + if read_ref_flag { + let ref_flag = context.reader.read_i8()?; + if ref_flag == fory_core::RefFlag::Null as i8 { + #var_name = Some(None); + } else { + #var_name = Some(Some(#read_value)); + } + } else { + // Remote field is non-nullable, read raw value directly + #var_name = Some(Some(#read_value)); + } + } +} + /// Create a private variable name for a field during deserialization. /// For named fields: `_field_name` /// For tuple struct fields: `_0`, `_1`, etc. @@ -219,10 +345,28 @@ pub fn gen_read_field(field: &Field, private_ident: &Ident, field_name: &str) -> } _ => { let skip_type_info = should_skip_type_info_for_field(ty); + let meta = parse_field_meta(field).unwrap_or_default(); + // Check if this is Option or Option with encoding attributes + // These need special inline handling because the generic Option serializer + // doesn't know about field-level encoding attributes. + if is_option_encoding_primitive(ty, &meta) { + let inner_name = get_option_inner_primitive_name(ty).unwrap(); + let reader_method = get_primitive_reader_method_with_encoding(inner_name, &meta); + let reader_ident = syn::Ident::new(reader_method, proc_macro2::Span::call_site()); + // For Option, read null flag first, then value if not null + quote! { + let ref_flag = context.reader.read_i8()?; + let #private_ident = if ref_flag == fory_core::RefFlag::Null as i8 { + None + } else { + Some(context.reader.#reader_ident()?) + }; + } + } // Check if this is a direct primitive type that can use direct reader calls // Only apply when ref_mode is None (no ref tracking needed) - if ref_mode == FieldRefMode::None && is_direct_primitive_type(ty) { + else if ref_mode == FieldRefMode::None && is_direct_primitive_type(ty) { let type_name = extract_type_name(ty); if type_name == "String" { // String: call fory_read_data directly @@ -231,7 +375,9 @@ pub fn gen_read_field(field: &Field, private_ident: &Ident, field_name: &str) -> } } else { // Numeric primitives: use direct buffer methods - let reader_method = get_primitive_reader_method(&type_name); + // For u32/u64, consider encoding attributes + let reader_method = + get_primitive_reader_method_with_encoding(&type_name, &meta); let reader_ident = syn::Ident::new(reader_method, proc_macro2::Span::call_site()); quote! { @@ -540,7 +686,68 @@ pub(crate) fn gen_read_compatible_match_arm_body( StructField::None => { let skip_type_info = should_skip_type_info_for_field(ty); let dec_by_option = need_declared_by_option(field); - if skip_type_info { + let is_option_type = extract_option_inner_type(ty).is_some(); + + // Check if this is a u32/u64 field that needs encoding-aware reading + if let Some(unsigned_type) = is_unsigned_encoding_type(ty) { + gen_compatible_unsigned_read( + unsigned_type, + var_name, + is_option_type || dec_by_option, + ) + } else if is_option_type { + // Check if it's Option or Option which need special handling + if let Some(prim_type) = is_compatible_primitive_type(ty) { + if prim_type == "u8" || prim_type == "u16" { + gen_compatible_primitive_option_read(prim_type, var_name) + } else { + // u32/u64 handled above + unreachable!() + } + } else if skip_type_info { + // Non-primitive Option type with skip_type_info + quote! { + let read_ref_flag = fory_core::serializer::util::field_need_write_ref_into( + _field.field_type.type_id, + _field.field_type.nullable, + ); + // Use RefMode::Tracking if remote field has ref_tracking enabled + let ref_mode = if _field.field_type.ref_tracking { + fory_core::RefMode::Tracking + } else if read_ref_flag { + fory_core::RefMode::NullOnly + } else { + fory_core::RefMode::None + }; + if read_ref_flag || _field.field_type.ref_tracking { + #var_name = Some(<#ty as fory_core::Serializer>::fory_read(context, ref_mode, false)?); + } else { + #var_name = Some(<#ty as fory_core::Serializer>::fory_read_data(context)?); + } + } + } else { + // Non-primitive Option type without skip_type_info + quote! { + let read_ref_flag = fory_core::serializer::util::field_need_write_ref_into( + _field.field_type.type_id, + _field.field_type.nullable, + ); + // Use RefMode::Tracking if remote field has ref_tracking enabled + let ref_mode = if _field.field_type.ref_tracking { + fory_core::RefMode::Tracking + } else if read_ref_flag { + fory_core::RefMode::NullOnly + } else { + fory_core::RefMode::None + }; + // For ref-tracked struct types, Java writes type info after RefValue flag + let read_type_info = fory_core::types::need_to_write_type_for_field( + <#ty as fory_core::Serializer>::fory_static_type_id() + ); + #var_name = Some(<#ty as fory_core::Serializer>::fory_read(context, ref_mode, read_type_info)?); + } + } + } else if skip_type_info { if dec_by_option { quote! { let read_ref_flag = fory_core::serializer::util::field_need_write_ref_into( diff --git a/rust/fory-derive/src/object/util.rs b/rust/fory-derive/src/object/util.rs index 669c751366..33a3bc56fe 100644 --- a/rust/fory-derive/src/object/util.rs +++ b/rust/fory-derive/src/object/util.rs @@ -689,14 +689,18 @@ fn get_primitive_type_id(ty: &str) -> u32 { "bool" => TypeId::BOOL as u32, "i8" => TypeId::INT8 as u32, "i16" => TypeId::INT16 as u32, - "i32" => TypeId::INT32 as u32, - "i64" => TypeId::INT64 as u32, + // Use VARINT32 for i32 to match Java xlang mode and Rust type resolver registration + "i32" => TypeId::VARINT32 as u32, + // Use VARINT64 for i64 to match Java xlang mode and Rust type resolver registration + "i64" => TypeId::VARINT64 as u32, "f32" => TypeId::FLOAT32 as u32, "f64" => TypeId::FLOAT64 as u32, "u8" => TypeId::UINT8 as u32, "u16" => TypeId::UINT16 as u32, - "u32" => TypeId::UINT32 as u32, - "u64" => TypeId::UINT64 as u32, + // Use VAR_UINT32 for u32 to match Rust type resolver registration + "u32" => TypeId::VAR_UINT32 as u32, + // Use VAR_UINT64 for u64 to match Rust type resolver registration + "u64" => TypeId::VAR_UINT64 as u32, "u128" => TypeId::U128 as u32, "i128" => TypeId::INT128 as u32, _ => unreachable!("Unknown primitive type: {}", ty), @@ -756,6 +760,62 @@ pub(super) fn get_primitive_writer_method(type_name: &str) -> &'static str { .unwrap_or_else(|| panic!("type_name '{}' must be a primitive type", type_name)) } +/// Get the writer method name for a primitive numeric type, considering encoding attributes. +/// +/// For i32 fields: +/// - type_id=VARINT32 (default): write_varint32 +/// - type_id=INT32: write_i32 (fixed 4-byte) +/// +/// For u32 fields: +/// - type_id=VARINT32/VAR_UINT32 (default): write_varuint32 +/// - type_id=INT32/UINT32: write_u32 (fixed 4-byte) +/// +/// For u64 fields: +/// - type_id=VARINT32/VAR_UINT64 (default): write_varuint64 +/// - type_id=INT32/UINT64: write_u64 (fixed 8-byte) +/// - type_id=TAGGED_UINT64: write_tagged_u64 +pub(super) fn get_primitive_writer_method_with_encoding( + type_name: &str, + meta: &super::field_meta::ForyFieldMeta, +) -> &'static str { + use fory_core::types::TypeId; + + // Handle i32 with type_id + if type_name == "i32" { + if let Some(type_id) = meta.type_id { + if type_id == TypeId::INT32 as i16 { + return "write_i32"; // Fixed 4-byte encoding + } + } + return "write_varint32"; // Variable-length (default) + } + + // Handle u32 with type_id + if type_name == "u32" { + if let Some(type_id) = meta.type_id { + if type_id == TypeId::INT32 as i16 || type_id == TypeId::UINT32 as i16 { + return "write_u32"; // Fixed 4-byte encoding + } + } + return "write_varuint32"; // Variable-length (default) + } + + // Handle u64 with type_id + if type_name == "u64" { + if let Some(type_id) = meta.type_id { + if type_id == TypeId::INT32 as i16 || type_id == TypeId::UINT64 as i16 { + return "write_u64"; // Fixed 8-byte encoding + } else if type_id == TypeId::TAGGED_UINT64 as i16 { + return "write_tagged_u64"; // Tagged variable-length + } + } + return "write_varuint64"; // Variable-length (default) + } + + // For other types, use the default method from PRIMITIVE_IO_METHODS + get_primitive_writer_method(type_name) +} + /// Get the reader method name for a primitive numeric type /// Panics if type_name is not a primitive type pub(super) fn get_primitive_reader_method(type_name: &str) -> &'static str { @@ -766,6 +826,106 @@ pub(super) fn get_primitive_reader_method(type_name: &str) -> &'static str { .unwrap_or_else(|| panic!("type_name '{}' must be a primitive type", type_name)) } +/// Get the reader method name for a primitive numeric type, considering encoding attributes. +/// +/// For i32 fields: +/// - type_id=VARINT32 (default): read_varint32 +/// - type_id=INT32: read_i32 (fixed 4-byte) +/// +/// For u32 fields: +/// - type_id=VARINT32/VAR_UINT32 (default): read_varuint32 +/// - type_id=INT32/UINT32: read_u32 (fixed 4-byte) +/// +/// For u64 fields: +/// - type_id=VARINT32/VAR_UINT64 (default): read_varuint64 +/// - type_id=INT32/UINT64: read_u64 (fixed 8-byte) +/// - type_id=TAGGED_UINT64: read_tagged_u64 +pub(super) fn get_primitive_reader_method_with_encoding( + type_name: &str, + meta: &super::field_meta::ForyFieldMeta, +) -> &'static str { + use fory_core::types::TypeId; + + // Handle i32 with type_id + if type_name == "i32" { + if let Some(type_id) = meta.type_id { + if type_id == TypeId::INT32 as i16 { + return "read_i32"; // Fixed 4-byte encoding + } + } + return "read_varint32"; // Variable-length (default) + } + + // Handle u32 with type_id + if type_name == "u32" { + if let Some(type_id) = meta.type_id { + if type_id == TypeId::INT32 as i16 || type_id == TypeId::UINT32 as i16 { + return "read_u32"; // Fixed 4-byte encoding + } + } + return "read_varuint32"; // Variable-length (default) + } + + // Handle u64 with type_id + if type_name == "u64" { + if let Some(type_id) = meta.type_id { + if type_id == TypeId::INT32 as i16 || type_id == TypeId::UINT64 as i16 { + return "read_u64"; // Fixed 8-byte encoding + } else if type_id == TypeId::TAGGED_UINT64 as i16 { + return "read_tagged_u64"; // Tagged variable-length + } + } + return "read_varuint64"; // Variable-length (default) + } + + // For other types, use the default method from PRIMITIVE_IO_METHODS + get_primitive_reader_method(type_name) +} + +/// Check if a type is Option, Option, or Option that needs encoding-aware handling +/// based on the field metadata (type_id attribute). +pub(super) fn is_option_encoding_primitive( + ty: &Type, + meta: &super::field_meta::ForyFieldMeta, +) -> bool { + if let Some(inner_name) = get_option_inner_primitive_name(ty) { + // For i32/u32/u64, check if type_id is set + if (inner_name == "i32" || inner_name == "u32" || inner_name == "u64") + && meta.type_id.is_some() + { + return true; + } + } + false +} + +/// Get the inner primitive name if the type is Option +/// Returns Some("u32"), Some("u64"), etc. for Option, Option, etc. +pub(super) fn get_option_inner_primitive_name(ty: &Type) -> Option<&'static str> { + use syn::PathArguments; + if let Type::Path(type_path) = ty { + if let Some(seg) = type_path.path.segments.last() { + if seg.ident == "Option" { + if let PathArguments::AngleBracketed(args) = &seg.arguments { + if let Some(syn::GenericArgument::Type(Type::Path(inner_path))) = + args.args.first() + { + if let Some(inner_seg) = inner_path.path.segments.last() { + let inner_name = inner_seg.ident.to_string(); + // Return static string for known primitives + return PRIMITIVE_IO_METHODS + .iter() + .find(|(name, _, _)| *name == inner_name.as_str()) + .map(|(name, _, _)| *name); + } + } + } + } + } + } + None +} + pub(crate) fn get_type_id_by_type_ast(ty: &Type) -> u32 { let ty_str: String = ty .to_token_stream() @@ -877,10 +1037,10 @@ fn get_primitive_type_size(type_id_num: u32) -> i32 { TypeId::INT8 => 1, TypeId::INT16 => 2, TypeId::INT32 => 4, - TypeId::VAR32 => 4, + TypeId::VARINT32 => 4, TypeId::INT64 => 8, - TypeId::VAR64 => 8, - TypeId::H64 => 8, + TypeId::VARINT64 => 8, + TypeId::TAGGED_INT64 => 8, TypeId::FLOAT16 => 2, TypeId::FLOAT32 => 4, TypeId::FLOAT64 => 8, @@ -888,10 +1048,10 @@ fn get_primitive_type_size(type_id_num: u32) -> i32 { TypeId::UINT8 => 1, TypeId::UINT16 => 2, TypeId::UINT32 => 4, - TypeId::VARU32 => 4, + TypeId::VAR_UINT32 => 4, TypeId::UINT64 => 8, - TypeId::VARU64 => 8, - TypeId::HU64 => 8, + TypeId::VAR_UINT64 => 8, + TypeId::TAGGED_UINT64 => 8, TypeId::U128 => 16, TypeId::USIZE => std::mem::size_of::() as i32, TypeId::ISIZE => std::mem::size_of::() as i32, @@ -900,14 +1060,17 @@ fn get_primitive_type_size(type_id_num: u32) -> i32 { } fn is_compress(type_id: u32) -> bool { - // Only signed integer types are marked as compressible - // to maintain backward compatibility with field ordering + // Variable-length and tagged types are marked as compressible + // This must match Java's Types.isCompressedType() for xlang compatibility [ - TypeId::INT32 as u32, - TypeId::INT64 as u32, - TypeId::VAR32 as u32, - TypeId::VAR64 as u32, - TypeId::H64 as u32, + // Signed compressed types + TypeId::VARINT32 as u32, + TypeId::VARINT64 as u32, + TypeId::TAGGED_INT64 as u32, + // Unsigned compressed types + TypeId::VAR_UINT32 as u32, + TypeId::VAR_UINT64 as u32, + TypeId::TAGGED_UINT64 as u32, ] .contains(&type_id) } @@ -940,6 +1103,8 @@ fn is_internal_type_id(type_id: u32) -> bool { /// Group fields into serialization categories while normalizing field names to snake_case. /// The returned groups preserve the ordering rules required by the serialization layout. fn group_fields_by_type(fields: &[&Field]) -> FieldGroups { + use super::field_meta::parse_field_meta; + let mut primitive_fields = Vec::new(); let mut nullable_primitive_fields = Vec::new(); let mut internal_type_fields = Vec::new(); @@ -957,25 +1122,6 @@ fn group_fields_by_type(fields: &[&Field]) -> FieldGroups { } } - let mut group_field = |ident: String, ty: &str| { - let type_id = get_type_id_by_name(ty); - // Categorize based on type_id - if PRIMITIVE_TYPE_NAMES.contains(&ty) { - primitive_fields.push((ident, ty.to_string(), type_id)); - } else if is_internal_type_id(type_id) { - internal_type_fields.push((ident, ty.to_string(), type_id)); - } else if type_id == TypeId::LIST as u32 { - list_fields.push((ident, ty.to_string(), type_id)); - } else if type_id == TypeId::SET as u32 { - set_fields.push((ident, ty.to_string(), type_id)); - } else if type_id == TypeId::MAP as u32 { - map_fields.push((ident, ty.to_string(), type_id)); - } else { - // User-defined type - other_fields.push((ident, ty.to_string(), type_id)); - } - }; - for (idx, field) in fields.iter().enumerate() { let raw_ident = get_field_name(field, idx); let ident = to_snake_case(&raw_ident); @@ -985,6 +1131,9 @@ fn group_fields_by_type(fields: &[&Field]) -> FieldGroups { continue; } + // Parse field metadata to get encoding attributes + let meta = parse_field_meta(field).unwrap_or_default(); + let ty: String = field .ty .to_token_stream() @@ -992,16 +1141,44 @@ fn group_fields_by_type(fields: &[&Field]) -> FieldGroups { .chars() .filter(|c| !c.is_whitespace()) .collect::(); + + // Closure to group non-option fields, considering encoding attributes + let mut group_field = |ident: String, ty_str: &str, is_primitive: bool| { + let base_type_id = get_type_id_by_name(ty_str); + // Adjust type ID based on encoding attributes for u32/u64 fields + let type_id = adjust_type_id_for_encoding(base_type_id, &meta); + + // Categorize based on type_id + if is_primitive { + primitive_fields.push((ident, ty_str.to_string(), type_id)); + } else if is_internal_type_id(type_id) { + internal_type_fields.push((ident, ty_str.to_string(), type_id)); + } else if type_id == TypeId::LIST as u32 { + list_fields.push((ident, ty_str.to_string(), type_id)); + } else if type_id == TypeId::SET as u32 { + set_fields.push((ident, ty_str.to_string(), type_id)); + } else if type_id == TypeId::MAP as u32 { + map_fields.push((ident, ty_str.to_string(), type_id)); + } else { + // User-defined type + other_fields.push((ident, ty_str.to_string(), type_id)); + } + }; + // handle Option specially if let Some(inner) = extract_option_inner(&ty) { if PRIMITIVE_TYPE_NAMES.contains(&inner) { - let type_id = get_primitive_type_id(inner); + // Get base type ID and adjust for encoding attributes + let base_type_id = get_primitive_type_id(inner); + let type_id = adjust_type_id_for_encoding(base_type_id, &meta); nullable_primitive_fields.push((ident, ty.to_string(), type_id)); } else { - group_field(ident, inner); + group_field(ident, inner, false); } + } else if PRIMITIVE_TYPE_NAMES.contains(&ty.as_str()) { + group_field(ident, &ty, true); } else { - group_field(ident, &ty); + group_field(ident, &ty, false); } } @@ -1120,6 +1297,47 @@ struct FieldFingerprintInfo { is_option_type: bool, } +/// Adjusts type ID based on encoding attributes for i32/u32/u64 fields. +/// +/// The type_id in meta represents the desired encoding: +/// - VARINT32: variable-length for i32/u32 +/// - INT32: fixed 4-byte for i32, u32 +/// - TAGGED_UINT64: tagged variable-length for u64 +fn adjust_type_id_for_encoding(base_type_id: u32, meta: &super::field_meta::ForyFieldMeta) -> u32 { + // If no explicit type_id is set, use the base type_id + let Some(explicit_type_id) = meta.type_id else { + return base_type_id; + }; + + // Handle i32 fields + if base_type_id == TypeId::VARINT32 as u32 { + if explicit_type_id == TypeId::INT32 as i16 { + return TypeId::INT32 as u32; // Fixed 4-byte encoding + } + return base_type_id; // VARINT32 (default) + } + + // Handle u32 fields + if base_type_id == TypeId::VAR_UINT32 as u32 { + if explicit_type_id == TypeId::INT32 as i16 { + return TypeId::UINT32 as u32; // Fixed 4-byte encoding + } + return base_type_id; // VAR_UINT32 (default) + } + + // Handle u64 fields + if base_type_id == TypeId::VAR_UINT64 as u32 { + if explicit_type_id == TypeId::INT32 as i16 { + return TypeId::UINT64 as u32; // Fixed 8-byte encoding + } else if explicit_type_id == TypeId::TAGGED_UINT64 as i16 { + return TypeId::TAGGED_UINT64 as u32; // Tagged variable-length + } + return base_type_id; // VAR_UINT64 (default) + } + + base_type_id +} + /// Computes struct fingerprint string at compile time (during proc-macro execution). /// /// **Fingerprint Format:** `,,,;` @@ -1144,11 +1362,12 @@ fn compute_struct_fingerprint(fields: &[&Field]) -> String { }; let type_class = classify_field_type(&field.ty); - let ref_tracking = meta.effective_ref_tracking(type_class); + let ref_tracking = meta.effective_ref(type_class); let explicit_nullable = meta.nullable; - // Get compile-time TypeId (UNKNOWN for user-defined types including enums/unions) - let type_id = get_type_id_by_type_ast(&field.ty); + // Get compile-time TypeId, considering encoding attributes for u32/u64 fields + let base_type_id = get_type_id_by_type_ast(&field.ty); + let type_id = adjust_type_id_for_encoding(base_type_id, &meta); // Check if field type is Option let ty_str: String = field @@ -1214,7 +1433,7 @@ pub(crate) fn gen_struct_version_hash_ts(fields: &[&Field]) -> TokenStream { const VERSION_HASH: i32 = #version_hash; if fory_core::util::ENABLE_FORY_DEBUG_OUTPUT { println!( - "[fory-debug] struct {} version fingerprint=\"{}\" hash={}", + "[rust][fory-debug] struct {} version fingerprint=\"{}\" hash={}", std::any::type_name::(), #fingerprint, VERSION_HASH @@ -1252,7 +1471,7 @@ pub(crate) fn determine_field_ref_mode(field: &syn::Field) -> FieldRefMode { let meta = parse_field_meta(field).unwrap_or_default(); let type_class = classify_field_type(&field.ty); let nullable = meta.effective_nullable(type_class); - let ref_tracking = meta.effective_ref_tracking(type_class); + let ref_tracking = meta.effective_ref(type_class); if ref_tracking { FieldRefMode::Tracking diff --git a/rust/fory-derive/src/object/write.rs b/rust/fory-derive/src/object/write.rs index 3960860103..8300e8784b 100644 --- a/rust/fory-derive/src/object/write.rs +++ b/rust/fory-derive/src/object/write.rs @@ -15,11 +15,14 @@ // specific language governing permissions and limitations // under the License. +use super::field_meta::parse_field_meta; use super::util::{ classify_trait_object_field, create_wrapper_types_arc, create_wrapper_types_rc, determine_field_ref_mode, extract_type_name, gen_struct_version_hash_ts, get_field_accessor, - get_field_name, get_filtered_source_fields_iter, get_primitive_writer_method, get_struct_name, - get_type_id_by_type_ast, is_debug_enabled, is_direct_primitive_type, FieldRefMode, StructField, + get_field_name, get_filtered_source_fields_iter, get_option_inner_primitive_name, + get_primitive_writer_method_with_encoding, get_struct_name, get_type_id_by_type_ast, + is_debug_enabled, is_direct_primitive_type, is_option_encoding_primitive, FieldRefMode, + StructField, }; use crate::util::SourceField; use fory_core::types::TypeId; @@ -248,10 +251,28 @@ fn gen_write_field_impl( } _ => { let type_id = get_type_id_by_type_ast(ty); + let meta = parse_field_meta(field).unwrap_or_default(); + // Check if this is Option or Option with encoding attributes + // These need special inline handling because the generic Option serializer + // doesn't know about field-level encoding attributes. + if is_option_encoding_primitive(ty, &meta) { + let inner_name = get_option_inner_primitive_name(ty).unwrap(); + let writer_method = get_primitive_writer_method_with_encoding(inner_name, &meta); + let writer_ident = syn::Ident::new(writer_method, proc_macro2::Span::call_site()); + // For Option, write null flag first, then value if Some + quote! { + if let Some(v) = &#value_ts { + context.writer.write_i8(fory_core::RefFlag::NotNullValue as i8); + context.writer.#writer_ident(*v); + } else { + context.writer.write_i8(fory_core::RefFlag::Null as i8); + } + } + } // Check if this is a direct primitive type that can use direct writer calls // Only apply when ref_mode is None (no ref tracking needed) - if ref_mode == FieldRefMode::None && is_direct_primitive_type(ty) { + else if ref_mode == FieldRefMode::None && is_direct_primitive_type(ty) { let type_name = extract_type_name(ty); if type_name == "String" { // String: call fory_write_data directly @@ -260,7 +281,9 @@ fn gen_write_field_impl( } } else { // Numeric primitives: use direct buffer methods - let writer_method = get_primitive_writer_method(&type_name); + // For u32/u64, consider encoding attributes + let writer_method = + get_primitive_writer_method_with_encoding(&type_name, &meta); let writer_ident = syn::Ident::new(writer_method, proc_macro2::Span::call_site()); // For primitives: diff --git a/rust/tests/tests/test_cross_language.rs b/rust/tests/tests/test_cross_language.rs index 54fb38637e..6022673828 100644 --- a/rust/tests/tests/test_cross_language.rs +++ b/rust/tests/tests/test_cross_language.rs @@ -74,7 +74,7 @@ fn test_buffer() { let data_file_path = get_data_file(); let bytes = fs::read(&data_file_path).unwrap(); let mut reader = Reader::new(bytes.as_slice()); - assert_eq!(reader.read_u8().unwrap(), 1); + assert!(reader.read_bool().unwrap()); assert_eq!(reader.read_i8().unwrap(), i8::MAX); assert_eq!(reader.read_i16().unwrap(), i16::MAX); assert_eq!(reader.read_i32().unwrap(), i32::MAX); @@ -88,7 +88,7 @@ fn test_buffer() { let mut buffer = vec![]; let mut writer = Writer::from_buffer(&mut buffer); - writer.write_u8(1); + writer.write_bool(true); writer.write_i8(i8::MAX); writer.write_i16(i16::MAX); writer.write_i32(i32::MAX); @@ -1883,3 +1883,154 @@ fn test_circular_ref_compatible() { let new_bytes = fory.serialize(&obj).unwrap(); fs::write(&data_file_path, new_bytes).unwrap(); } + +// ============================================================================ +// Unsigned Number Tests - Test unsigned integer serialization across languages +// ============================================================================ + +/// Test struct for unsigned numbers in SCHEMA_CONSISTENT mode. +/// All fields use the same nullability as Java. +/// Note: Rust supports u8, u16, u32, u64 natively. Different encodings (fixed, var, tagged) +/// are handled via field attributes. +/// Matches Java's UnsignedSchemaConsistent (type id 501) +#[derive(ForyObject, Debug, PartialEq)] +#[fory(debug)] +struct UnsignedSchemaConsistent { + // Primitive unsigned fields (non-nullable, use Field suffix to avoid reserved keywords) + u8_field: u8, // UINT8 - fixed 8-bit + u16_field: u16, // UINT16 - fixed 16-bit + u32_var_field: u32, // VAR_UINT32 - variable-length (default) + #[fory(compress = false)] + u32_fixed_field: u32, // UINT32 - fixed 4-byte + u64_var_field: u64, // VAR_UINT64 - variable-length (default) + #[fory(encoding = "fixed")] + u64_fixed_field: u64, // UINT64 - fixed 8-byte + #[fory(encoding = "tagged")] + u64_tagged_field: u64, // TAGGED_UINT64 + + // Nullable unsigned fields (using Option) + #[fory(nullable = true)] + u8_nullable_field: Option, + #[fory(nullable = true)] + u16_nullable_field: Option, + #[fory(nullable = true)] + u32_var_nullable_field: Option, + #[fory(nullable = true, compress = false)] + u32_fixed_nullable_field: Option, + #[fory(nullable = true)] + u64_var_nullable_field: Option, + #[fory(nullable = true, encoding = "fixed")] + u64_fixed_nullable_field: Option, + #[fory(nullable = true, encoding = "tagged")] + u64_tagged_nullable_field: Option, +} + +/// Test struct for unsigned numbers in COMPATIBLE mode. +/// Group 1: Option types (nullable in Rust, non-nullable in Java) +/// Group 2: Non-Option types with Field2 suffix (non-nullable in Rust, nullable in Java) +/// Matches Java's UnsignedSchemaCompatible (type id 502) +#[derive(ForyObject, Debug, PartialEq)] +#[fory(debug)] +struct UnsignedSchemaCompatible { + // Group 1: Nullable in Rust (Option), non-nullable in Java + #[fory(nullable = true)] + u8_field1: Option, + #[fory(nullable = true)] + u16_field1: Option, + #[fory(nullable = true)] + u32_var_field1: Option, + #[fory(nullable = true, compress = false)] + u32_fixed_field1: Option, + #[fory(nullable = true)] + u64_var_field1: Option, + #[fory(nullable = true, encoding = "fixed")] + u64_fixed_field1: Option, + #[fory(nullable = true, encoding = "tagged")] + u64_tagged_field1: Option, + + // Group 2: Non-nullable in Rust, nullable in Java + u8_field2: u8, + u16_field2: u16, + u32_var_field2: u32, + #[fory(compress = false)] + u32_fixed_field2: u32, + u64_var_field2: u64, + #[fory(encoding = "fixed")] + u64_fixed_field2: u64, + #[fory(encoding = "tagged")] + u64_tagged_field2: u64, +} + +/// Test unsigned numbers in SCHEMA_CONSISTENT mode. +#[test] +#[ignore] +fn test_unsigned_schema_consistent() { + let data_file_path = get_data_file(); + let bytes = fs::read(&data_file_path).unwrap(); + + let mut fory = Fory::default().compatible(false).xlang(true); + fory.register::(501).unwrap(); + + let local_obj = UnsignedSchemaConsistent { + // Primitive unsigned fields + u8_field: 200, + u16_field: 60000, + u32_var_field: 3000000000, + u32_fixed_field: 4000000000, + u64_var_field: 10000000000, + u64_fixed_field: 15000000000, + u64_tagged_field: 1000000000, + + // Nullable unsigned fields with values + u8_nullable_field: Some(128), + u16_nullable_field: Some(40000), + u32_var_nullable_field: Some(2500000000), + u32_fixed_nullable_field: Some(3500000000), + u64_var_nullable_field: Some(8000000000), + u64_fixed_nullable_field: Some(12000000000), + u64_tagged_nullable_field: Some(500000000), + }; + + let remote_obj: UnsignedSchemaConsistent = fory.deserialize(&bytes).unwrap(); + assert_eq!(remote_obj, local_obj); + + let new_bytes = fory.serialize(&remote_obj).unwrap(); + fs::write(&data_file_path, new_bytes).unwrap(); +} + +/// Test unsigned numbers in COMPATIBLE mode with inverted nullability. +#[test] +#[ignore] +fn test_unsigned_schema_compatible() { + let data_file_path = get_data_file(); + let bytes = fs::read(&data_file_path).unwrap(); + + let mut fory = Fory::default().compatible(true).xlang(true); + fory.register::(502).unwrap(); + + let local_obj = UnsignedSchemaCompatible { + // Group 1: Option fields (values from Java's non-nullable fields) + u8_field1: Some(200), + u16_field1: Some(60000), + u32_var_field1: Some(3000000000), + u32_fixed_field1: Some(4000000000), + u64_var_field1: Some(10000000000), + u64_fixed_field1: Some(15000000000), + u64_tagged_field1: Some(1000000000), + + // Group 2: Non-nullable fields (values from Java's nullable fields) + u8_field2: 128, + u16_field2: 40000, + u32_var_field2: 2500000000, + u32_fixed_field2: 3500000000, + u64_var_field2: 8000000000, + u64_fixed_field2: 12000000000, + u64_tagged_field2: 500000000, + }; + + let remote_obj: UnsignedSchemaCompatible = fory.deserialize(&bytes).unwrap(); + assert_eq!(remote_obj, local_obj); + + let new_bytes = fory.serialize(&remote_obj).unwrap(); + fs::write(&data_file_path, new_bytes).unwrap(); +} diff --git a/scala/src/test/scala/org/apache/fory/util/ScalaDefaultValueUtilsTest.scala b/scala/src/test/scala/org/apache/fory/util/ScalaDefaultValueUtilsTest.scala index 8d9c0e9ff3..b1eb42136b 100644 --- a/scala/src/test/scala/org/apache/fory/util/ScalaDefaultValueUtilsTest.scala +++ b/scala/src/test/scala/org/apache/fory/util/ScalaDefaultValueUtilsTest.scala @@ -313,7 +313,7 @@ class ScalaDefaultValueUtilsTest extends AnyWordSpec with Matchers { field.getDefaultValue should not be null field.getFieldName should not be null field.getFieldAccessor should not be null - field.getClassId.toInt should be >= 0 + field.getDispatchId.toInt should be >= 0 } } @@ -380,7 +380,7 @@ class ScalaDefaultValueUtilsTest extends AnyWordSpec with Matchers { field.getFieldName should not be null field.getDefaultValue should not be null field.getFieldAccessor should not be null - field.getClassId.toInt should be >= 0 + field.getDispatchId.toInt should be >= 0 } } }