Skip to content
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
8abeba5
update spec doc
chaokunyang Jan 5, 2026
0f08ce4
update buffer read/write API
chaokunyang Jan 5, 2026
d0eed13
rename _util to buffer
chaokunyang Jan 5, 2026
a9701f7
rename _registry to registry
chaokunyang Jan 5, 2026
ea6401e
support unsigned types and refactor java type system
chaokunyang Jan 6, 2026
d59b477
fix errors
chaokunyang Jan 7, 2026
f95614f
refactor xlang numeric read/write
chaokunyang Jan 7, 2026
68512fc
fix rust error
chaokunyang Jan 7, 2026
cd34fac
fix go unsigned support
chaokunyang Jan 7, 2026
b870947
fix go codegen
chaokunyang Jan 7, 2026
cfc927b
update c++ unsigned and compressed int support
chaokunyang Jan 7, 2026
e663c1b
support unsigned and configurable compress types for field
chaokunyang Jan 7, 2026
852d92c
add javadoc to annotation
chaokunyang Jan 7, 2026
51688b0
add unsgined fields xlang tests
chaokunyang Jan 7, 2026
f2f5059
revert build_linux_wheels.py
chaokunyang Jan 7, 2026
43b7783
add unsigned java tests
chaokunyang Jan 7, 2026
baf70d7
fix descriptor sort comparator
chaokunyang Jan 7, 2026
4d16d37
fix go/java xlang struct fields serde
chaokunyang Jan 8, 2026
4b225ba
refactor go struct serializer
chaokunyang Jan 9, 2026
145adaa
support unsigned in python
chaokunyang Jan 10, 2026
51e6848
add rust unsigned and compressed fields support
chaokunyang Jan 10, 2026
098117b
update xlang tests in java side
chaokunyang Jan 10, 2026
1c5670b
make cpp support configure number encoding and sort fields for all nu…
chaokunyang Jan 10, 2026
67758cf
refactor rust field meta config parse
chaokunyang Jan 10, 2026
aaa66e8
update go test
chaokunyang Jan 10, 2026
9ed0723
format code
chaokunyang Jan 10, 2026
2f25afa
Merge remote-tracking branch 'asf/main' into support_unsigned_types_f…
chaokunyang Jan 10, 2026
1bd9fe1
fix merge conflict
chaokunyang Jan 10, 2026
268fd95
fix tests
chaokunyang Jan 10, 2026
b3a9723
fix python tests
chaokunyang Jan 10, 2026
ad0257e
fix c++ tests
chaokunyang Jan 10, 2026
eb99c8c
fix go tests
chaokunyang Jan 10, 2026
df96ffd
revert DEBUG_OUTPUT_ENABLED flag
chaokunyang Jan 10, 2026
b05ac57
fix tests
chaokunyang Jan 10, 2026
c012fee
update cpp doc for fields
chaokunyang Jan 10, 2026
4f7fbad
update go tag
chaokunyang Jan 10, 2026
88a0194
fix: add license header and fix type conversion in DefaultValueUtils
chaokunyang Jan 10, 2026
3247510
fix: push stub ref id when global ref tracking is enabled but field t…
chaokunyang Jan 10, 2026
cca95fb
fix ci
chaokunyang Jan 10, 2026
fe0fb76
style: format code with spotless
chaokunyang Jan 10, 2026
72802ac
fix: use Types.getTypeId for Scala/Kotlin default value type dispatch
chaokunyang Jan 10, 2026
47169d8
fix build ci
chaokunyang Jan 10, 2026
db73589
fix(ci): correct buffer.go typo to buffer.so in macos universal2 build
chaokunyang Jan 10, 2026
585e545
udpate benchmark code
chaokunyang Jan 10, 2026
64482b5
fix code style
chaokunyang Jan 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ While working on Fory, please remember:
- **Git-Tracked Files**: When reading code, skip all files not tracked by git by default unless generated by yourself.
- **Cross-Language Consistency**: Maintain consistency across language implementations while respecting language-specific idioms.
- **Graalvm Support using fory codegen**: For graalvm, please use `fory codegen` to generate the serializer when building graalvm native image, do not use graallvm reflect-related configuration unless for JDK `proxy`.
- **Xlang Type System**: Java `native mode(xlang=false)` shares same type systems between type id from `Types.BOOL~Types.STRING` with `xlang mode(xlang=true)`, but for other types, java `native mode` has different type ids.

## Build and Development Commands

Expand Down Expand Up @@ -125,7 +126,7 @@ cd java
mvn -T16 install -DskipTests
cd fory-core
# disable fory cython for faster debugging
FORY_PYTHON_JAVA_CI=1 ENABLE_FORY_CYTHON_SERIALIZATION=0 mvn -T16 test -Dtest=org.apache.fory.xlang.PythonXlangTest
FORY_PYTHON_JAVA_CI=1 ENABLE_FORY_CYTHON_SERIALIZATION=0 ENABLE_FORY_DEBUG_OUTPUT=1 mvn -T16 test -Dtest=org.apache.fory.xlang.PythonXlangTest
# enable fory cython
FORY_PYTHON_JAVA_CI=1 ENABLE_FORY_CYTHON_SERIALIZATION=1 ENABLE_FORY_DEBUG_OUTPUT=1 mvn -T16 test -Dtest=org.apache.fory.xlang.PythonXlangTest
```
Expand Down Expand Up @@ -215,7 +216,7 @@ Run Rust xlang tests:
cd java
mvn -T16 install -DskipTests
cd fory-core
FORY_RUST_JAVA_CI=1 ENABLE_FORY_DEBUG_OUTPUT=1 mvn test -Dtest=org.apache.fory.xlang.RustXlangTest
RUST_BACKTRACE=1 FORY_PANIC_ON_ERROR=1 FORY_RUST_JAVA_CI=1 ENABLE_FORY_DEBUG_OUTPUT=1 mvn test -Dtest=org.apache.fory.xlang.RustXlangTest
```

### JavaScript/TypeScript Development
Expand Down Expand Up @@ -445,12 +446,12 @@ Code structure:

- `python/pyfory/serialization.pyx`: Core serialization logic and entry point for cython mode based on `xlang serialization format`
- `python/pyfory/_fory.py`: Serialization entry point for pure python mode based on `xlang serialization format`
- `python/pyfory/_registry.py`: Type registry, resolution and serializer dispatch for pure python mode, which is also used by cython mode. Cython mode use a cache to reduce invocations to this module.
- `python/pyfory/registry.py`: Type registry, resolution and serializer dispatch for pure python mode, which is also used by cython mode. Cython mode use a cache to reduce invocations to this module.
- `python/pyfory/serializer.py`: Serializers for non-internal types
- `python/pyfory/includes`: Cython headers for `c++` functions and classes.
- `python/pyfory/resolver.py`: resolving shared/circular references when ref tracking is enabled in pure python mode
- `python/pyfory/format`: Fory row format encoding and decoding, arrow columnar format interoperation
- `python/pyfory/_util.pyx`: Buffer for reading/writing data, string utilities. Used by `serialization.pyx` and `python/pyfory/format` at the same time.
- `python/pyfory/buffer.pyx`: Buffer for reading/writing data, string utilities. Used by `serialization.pyx` and `python/pyfory/format` at the same time.

#### Go

Expand Down
18 changes: 9 additions & 9 deletions BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ load("@hedron_compile_commands//:refresh_compile_commands.bzl", "refresh_compile


pyx_library(
name = "_util",
name = "buffer",
srcs = glob([
"python/pyfory/includes/*.pxd",
"python/pyfory/_util.pxd",
"python/pyfory/_util.pyx",
"python/pyfory/buffer.pxd",
"python/pyfory/buffer.pyx",
"python/pyfory/__init__.py",
]),
cc_kwargs = dict(
Expand Down Expand Up @@ -54,7 +54,7 @@ pyx_library(
name = "serialization",
srcs = glob([
"python/pyfory/includes/*.pxd",
"python/pyfory/_util.pxd",
"python/pyfory/buffer.pxd",
"python/pyfory/serialization.pyx",
"python/pyfory/*.pxi",
"python/pyfory/__init__.py",
Expand All @@ -76,7 +76,7 @@ pyx_library(
[
"python/pyfory/__init__.py",
"python/pyfory/includes/*.pxd",
"python/pyfory/_util.pxd",
"python/pyfory/buffer.pxd",
"python/pyfory/*.pxi",
"python/pyfory/format/_format.pyx",
"python/pyfory/format/__init__.py",
Expand All @@ -95,7 +95,7 @@ pyx_library(
genrule(
name = "cp_fory_so",
srcs = [
":python/pyfory/_util.so",
":python/pyfory/buffer.so",
":python/pyfory/lib/mmh3/mmh3.so",
":python/pyfory/format/_format.so",
":python/pyfory/serialization.so",
Expand All @@ -110,12 +110,12 @@ genrule(
u_name=`uname -s`
if [ "$${u_name: 0: 4}" == "MING" ] || [ "$${u_name: 0: 4}" == "MSYS" ]
then
cp -f $(location python/pyfory/_util.so) "$$WORK_DIR/python/pyfory/_util.pyd"
cp -f $(location python/pyfory/buffer.so) "$$WORK_DIR/python/pyfory/buffer.pyd"
cp -f $(location python/pyfory/lib/mmh3/mmh3.so) "$$WORK_DIR/python/pyfory/lib/mmh3/mmh3.pyd"
cp -f $(location python/pyfory/format/_format.so) "$$WORK_DIR/python/pyfory/format/_format.pyd"
cp -f $(location python/pyfory/serialization.so) "$$WORK_DIR/python/pyfory/serialization.pyd"
else
cp -f $(location python/pyfory/_util.so) "$$WORK_DIR/python/pyfory"
cp -f $(location python/pyfory/buffer.so) "$$WORK_DIR/python/pyfory"
cp -f $(location python/pyfory/lib/mmh3/mmh3.so) "$$WORK_DIR/python/pyfory/lib/mmh3"
cp -f $(location python/pyfory/format/_format.so) "$$WORK_DIR/python/pyfory/format"
cp -f $(location python/pyfory/serialization.so) "$$WORK_DIR/python/pyfory"
Expand All @@ -131,4 +131,4 @@ refresh_compile_commands(
name = "refresh_compile_commands",
exclude_headers = "all",
exclude_external_sources = True,
)
)
8 changes: 4 additions & 4 deletions cpp/fory/serialization/basic_serializer.h
Original file line number Diff line number Diff line change
Expand Up @@ -252,9 +252,9 @@ template <> struct Serializer<int16_t> {
}
};

/// int32_t serializer
/// int32_t serializer - uses VARINT32 to match Java xlang mode and Rust
template <> struct Serializer<int32_t> {
static constexpr TypeId type_id = TypeId::INT32;
static constexpr TypeId type_id = TypeId::VARINT32;

static inline void write_type_info(WriteContext &ctx) {
ctx.write_varuint32(static_cast<uint32_t>(type_id));
Expand Down Expand Up @@ -323,9 +323,9 @@ template <> struct Serializer<int32_t> {
}
};

/// int64_t serializer
/// int64_t serializer - uses VARINT64 to match Java xlang mode and Rust
template <> struct Serializer<int64_t> {
static constexpr TypeId type_id = TypeId::INT64;
static constexpr TypeId type_id = TypeId::VARINT64;

static inline void write_type_info(WriteContext &ctx) {
ctx.write_varuint32(static_cast<uint32_t>(type_id));
Expand Down
4 changes: 2 additions & 2 deletions cpp/fory/serialization/serializer_traits.h
Original file line number Diff line number Diff line change
Expand Up @@ -444,11 +444,11 @@ template <> struct TypeIndex<int16_t> {
};

template <> struct TypeIndex<int32_t> {
static constexpr uint64_t value = static_cast<uint64_t>(TypeId::INT32);
static constexpr uint64_t value = static_cast<uint64_t>(TypeId::VARINT32);
};

template <> struct TypeIndex<int64_t> {
static constexpr uint64_t value = static_cast<uint64_t>(TypeId::INT64);
static constexpr uint64_t value = static_cast<uint64_t>(TypeId::VARINT64);
};

// Note: Unsigned types (uint8_t, uint16_t, uint32_t, uint64_t) use the fallback
Expand Down
4 changes: 2 additions & 2 deletions cpp/fory/serialization/skip.cc
Original file line number Diff line number Diff line change
Expand Up @@ -489,8 +489,8 @@ void skip_field_value(ReadContext &ctx, const FieldType &field_type,
ctx.buffer().IncreaseReaderIndex(8);
return;

case TypeId::VAR32:
case TypeId::VAR64:
case TypeId::VARINT32:
case TypeId::VARINT64:
skip_varint(ctx);
return;

Expand Down
68 changes: 42 additions & 26 deletions cpp/fory/serialization/struct_serializer.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,15 @@ namespace detail {
inline constexpr bool is_primitive_type_id(TypeId type_id) {
return type_id == TypeId::BOOL || type_id == TypeId::INT8 ||
type_id == TypeId::INT16 || type_id == TypeId::INT32 ||
type_id == TypeId::VAR32 || type_id == TypeId::INT64 ||
type_id == TypeId::VAR64 || type_id == TypeId::H64 ||
type_id == TypeId::VARINT32 || type_id == TypeId::INT64 ||
type_id == TypeId::VARINT64 || type_id == TypeId::TAGGED_INT64 ||
type_id == TypeId::FLOAT16 || type_id == TypeId::FLOAT32 ||
type_id == TypeId::FLOAT64 ||
// Unsigned types for native mode (xlang=false)
// Unsigned types
type_id == TypeId::UINT8 || type_id == TypeId::UINT16 ||
type_id == TypeId::UINT32 || type_id == TypeId::UINT64;
type_id == TypeId::UINT32 || type_id == TypeId::VAR_UINT32 ||
type_id == TypeId::UINT64 || type_id == TypeId::VAR_UINT64 ||
type_id == TypeId::TAGGED_UINT64;
}

/// Write a primitive value to buffer at given offset WITHOUT updating
Expand Down Expand Up @@ -653,16 +655,24 @@ template <typename T> struct CompileTimeFieldHelpers {
switch (static_cast<TypeId>(tid)) {
case TypeId::BOOL:
case TypeId::INT8:
case TypeId::UINT8:
return 1;
case TypeId::INT16:
case TypeId::UINT16:
case TypeId::FLOAT16:
return 2;
case TypeId::INT32:
case TypeId::VAR32:
case TypeId::VARINT32:
case TypeId::UINT32:
case TypeId::VAR_UINT32:
case TypeId::FLOAT32:
return 4;
case TypeId::INT64:
case TypeId::VAR64:
case TypeId::VARINT64:
case TypeId::TAGGED_INT64:
case TypeId::UINT64:
case TypeId::VAR_UINT64:
case TypeId::TAGGED_UINT64:
case TypeId::FLOAT64:
return 8;
default:
Expand All @@ -673,8 +683,14 @@ template <typename T> struct CompileTimeFieldHelpers {
static constexpr bool is_compress_id(uint32_t tid) {
return tid == static_cast<uint32_t>(TypeId::INT32) ||
tid == static_cast<uint32_t>(TypeId::INT64) ||
tid == static_cast<uint32_t>(TypeId::VAR32) ||
tid == static_cast<uint32_t>(TypeId::VAR64);
tid == static_cast<uint32_t>(TypeId::VARINT32) ||
tid == static_cast<uint32_t>(TypeId::VARINT64) ||
tid == static_cast<uint32_t>(TypeId::TAGGED_INT64) ||
tid == static_cast<uint32_t>(TypeId::UINT32) ||
tid == static_cast<uint32_t>(TypeId::UINT64) ||
tid == static_cast<uint32_t>(TypeId::VAR_UINT32) ||
tid == static_cast<uint32_t>(TypeId::VAR_UINT64) ||
tid == static_cast<uint32_t>(TypeId::TAGGED_UINT64);
}

/// Check if a type ID is an internal (built-in, final) type for group 2.
Expand Down Expand Up @@ -828,15 +844,15 @@ template <typename T> struct CompileTimeFieldHelpers {
total += 2;
break;
case TypeId::INT32:
case TypeId::VAR32:
case TypeId::VARINT32:
total += 8; // varint max, but bulk write may write up to 8 bytes
break;
case TypeId::FLOAT32:
total += 4;
break;
case TypeId::INT64:
case TypeId::VAR64:
case TypeId::H64:
case TypeId::VARINT64:
case TypeId::TAGGED_INT64:
total += 10; // varint max
break;
case TypeId::FLOAT64:
Expand Down Expand Up @@ -899,14 +915,14 @@ template <typename T> struct CompileTimeFieldHelpers {

/// Check if a type_id represents a varint primitive (int32/int64 types)
/// Per basic_serializer.h, INT32/INT64 use zigzag varint encoding
/// VAR32/VAR64/H64 also use varint encoding
/// VARINT32/VARINT64/TAGGED_INT64 also use varint encoding
static constexpr bool is_varint_primitive(uint32_t tid) {
switch (static_cast<TypeId>(tid)) {
case TypeId::INT32: // int32_t uses zigzag varint per basic_serializer.h
case TypeId::INT64: // int64_t uses zigzag varint per basic_serializer.h
case TypeId::VAR32: // explicit varint type
case TypeId::VAR64: // explicit varint type
case TypeId::H64: // hybrid int64 encoding
case TypeId::INT32: // int32_t uses zigzag varint per basic_serializer.h
case TypeId::INT64: // int64_t uses zigzag varint per basic_serializer.h
case TypeId::VARINT32: // explicit varint type
case TypeId::VARINT64: // explicit varint type
case TypeId::TAGGED_INT64: // hybrid int64 encoding
return true;
default:
return false;
Expand All @@ -916,12 +932,12 @@ template <typename T> struct CompileTimeFieldHelpers {
/// Get the max varint size in bytes for a type_id (0 if not varint)
static constexpr size_t max_varint_bytes(uint32_t tid) {
switch (static_cast<TypeId>(tid)) {
case TypeId::INT32: // int32_t uses zigzag varint
case TypeId::VAR32: // explicit varint
return 5; // int32 varint max
case TypeId::INT64: // int64_t uses zigzag varint
case TypeId::VAR64: // explicit varint
case TypeId::H64:
case TypeId::INT32: // int32_t uses zigzag varint
case TypeId::VARINT32: // explicit varint
return 5; // int32 varint max
case TypeId::INT64: // int64_t uses zigzag varint
case TypeId::VARINT64: // explicit varint
case TypeId::TAGGED_INT64:
return 10; // int64 varint max
default:
return 0;
Expand Down Expand Up @@ -1055,15 +1071,15 @@ template <typename T> struct CompileTimeFieldHelpers {
total += 2;
break;
case TypeId::INT32:
case TypeId::VAR32:
case TypeId::VARINT32:
total += 5; // varint max
break;
case TypeId::FLOAT32:
total += 4;
break;
case TypeId::INT64:
case TypeId::VAR64:
case TypeId::H64:
case TypeId::VARINT64:
case TypeId::TAGGED_INT64:
total += 10; // varint max
break;
case TypeId::FLOAT64:
Expand Down
22 changes: 18 additions & 4 deletions cpp/fory/serialization/type_resolver.cc
Original file line number Diff line number Diff line change
Expand Up @@ -606,16 +606,24 @@ int32_t get_primitive_type_size(uint32_t type_id) {
switch (static_cast<TypeId>(type_id)) {
case TypeId::BOOL:
case TypeId::INT8:
case TypeId::UINT8:
return 1;
case TypeId::INT16:
case TypeId::UINT16:
case TypeId::FLOAT16:
return 2;
case TypeId::INT32:
case TypeId::VAR32:
case TypeId::VARINT32:
case TypeId::UINT32:
case TypeId::VAR_UINT32:
case TypeId::FLOAT32:
return 4;
case TypeId::INT64:
case TypeId::VAR64:
case TypeId::VARINT64:
case TypeId::TAGGED_INT64:
case TypeId::UINT64:
case TypeId::VAR_UINT64:
case TypeId::TAGGED_UINT64:
case TypeId::FLOAT64:
return 8;
default:
Expand All @@ -626,8 +634,14 @@ int32_t get_primitive_type_size(uint32_t type_id) {
bool is_compress(uint32_t type_id) {
return type_id == static_cast<uint32_t>(TypeId::INT32) ||
type_id == static_cast<uint32_t>(TypeId::INT64) ||
type_id == static_cast<uint32_t>(TypeId::VAR32) ||
type_id == static_cast<uint32_t>(TypeId::VAR64);
type_id == static_cast<uint32_t>(TypeId::VARINT32) ||
type_id == static_cast<uint32_t>(TypeId::VARINT64) ||
type_id == static_cast<uint32_t>(TypeId::TAGGED_INT64) ||
type_id == static_cast<uint32_t>(TypeId::UINT32) ||
type_id == static_cast<uint32_t>(TypeId::UINT64) ||
type_id == static_cast<uint32_t>(TypeId::VAR_UINT32) ||
type_id == static_cast<uint32_t>(TypeId::VAR_UINT64) ||
type_id == static_cast<uint32_t>(TypeId::TAGGED_UINT64);
}

// Numeric field sorter (for primitive fields)
Expand Down
8 changes: 4 additions & 4 deletions cpp/fory/serialization/unsigned_serializer.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,9 @@ template <> struct Serializer<uint16_t> {
}
};

/// uint32_t serializer (native mode only)
/// uint32_t serializer - uses VAR_UINT32 to match Rust xlang mode
template <> struct Serializer<uint32_t> {
static constexpr TypeId type_id = TypeId::UINT32;
static constexpr TypeId type_id = TypeId::VAR_UINT32;

static inline void write_type_info(WriteContext &ctx) {
ctx.write_varuint32(static_cast<uint32_t>(type_id));
Expand Down Expand Up @@ -230,9 +230,9 @@ template <> struct Serializer<uint32_t> {
}
};

/// uint64_t serializer (native mode only)
/// uint64_t serializer - uses VAR_UINT64 to match Rust xlang mode
template <> struct Serializer<uint64_t> {
static constexpr TypeId type_id = TypeId::UINT64;
static constexpr TypeId type_id = TypeId::VAR_UINT64;

static inline void write_type_info(WriteContext &ctx) {
ctx.write_varuint32(static_cast<uint32_t>(type_id));
Expand Down
Loading
Loading