Skip to content

Commit 471a646

Browse files
committed
Merge remote-tracking branch 'asf/main' into sync_code_to_0.13
2 parents 1802c43 + 1574d44 commit 471a646

File tree

87 files changed

+6152
-3158
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

87 files changed

+6152
-3158
lines changed

.github/workflows/ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ jobs:
288288
name: Rust CI
289289
strategy:
290290
matrix:
291-
os: [ubuntu-latest, macos-13, macos-14] # macos-13: x86, macos-14: arm64
291+
os: [ubuntu-latest, macos-14, macos-latest] # macos-14: arm64
292292
runs-on: ${{ matrix.os }}
293293
timeout-minutes: 45
294294
steps:
@@ -316,7 +316,7 @@ jobs:
316316
name: C++ CI
317317
strategy:
318318
matrix:
319-
os: [ubuntu-latest, macos-13, macos-14, windows-2022] # macos-13: x86, macos-14: arm64
319+
os: [ubuntu-latest, macos-14, macos-latest, windows-2022] # macos-13: x86, macos-14: arm64
320320
runs-on: ${{ matrix.os }}
321321
steps:
322322
- uses: actions/checkout@v5

AGENTS.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -389,14 +389,14 @@ Fory python has two implementations for the protocol:
389389

390390
Code structure:
391391

392-
- `python/pyfory/_serialization.pyx`: Core serialization logic and entry point for cython mode based on `xlang serialization format`
392+
- `python/pyfory/serialization.pyx`: Core serialization logic and entry point for cython mode based on `xlang serialization format`
393393
- `python/pyfory/_fory.py`: Serialization entry point for pure python mode based on `xlang serialization format`
394394
- `python/pyfory/_registry.py`: Type registry, resolution and serializer dispatch for pure python mode, which is also used by cython mode. Cython mode use a cache to reduce invocations to this module.
395395
- `python/pyfory/serializer.py`: Serializers for non-internal types
396396
- `python/pyfory/includes`: Cython headers for `c++` functions and classes.
397397
- `python/pyfory/resolver.py`: resolving shared/circular references when ref tracking is enabled in pure python mode
398398
- `python/pyfory/format`: Fory row format encoding and decoding, arrow columnar format interoperation
399-
- `python/pyfory/_util.pyx`: Buffer for reading/writing data, string utilities. Used by `_serialization.pyx` and `python/pyfory/format` at the same time.
399+
- `python/pyfory/_util.pyx`: Buffer for reading/writing data, string utilities. Used by `serialization.pyx` and `python/pyfory/format` at the same time.
400400

401401
#### Go
402402

BUILD

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,11 @@ pyx_library(
5151
)
5252

5353
pyx_library(
54-
name = "_serialization",
54+
name = "serialization",
5555
srcs = glob([
5656
"python/pyfory/includes/*.pxd",
5757
"python/pyfory/_util.pxd",
58-
"python/pyfory/_serialization.pyx",
58+
"python/pyfory/serialization.pyx",
5959
"python/pyfory/__init__.py",
6060
]),
6161
cc_kwargs = dict(
@@ -96,7 +96,7 @@ genrule(
9696
":python/pyfory/_util.so",
9797
":python/pyfory/lib/mmh3/mmh3.so",
9898
":python/pyfory/format/_format.so",
99-
":python/pyfory/_serialization.so",
99+
":python/pyfory/serialization.so",
100100
],
101101
outs = [
102102
"cp_fory_py_generated.out",
@@ -111,12 +111,12 @@ genrule(
111111
cp -f $(location python/pyfory/_util.so) "$$WORK_DIR/python/pyfory/_util.pyd"
112112
cp -f $(location python/pyfory/lib/mmh3/mmh3.so) "$$WORK_DIR/python/pyfory/lib/mmh3/mmh3.pyd"
113113
cp -f $(location python/pyfory/format/_format.so) "$$WORK_DIR/python/pyfory/format/_format.pyd"
114-
cp -f $(location python/pyfory/_serialization.so) "$$WORK_DIR/python/pyfory/_serialization.pyd"
114+
cp -f $(location python/pyfory/serialization.so) "$$WORK_DIR/python/pyfory/serialization.pyd"
115115
else
116116
cp -f $(location python/pyfory/_util.so) "$$WORK_DIR/python/pyfory"
117117
cp -f $(location python/pyfory/lib/mmh3/mmh3.so) "$$WORK_DIR/python/pyfory/lib/mmh3"
118118
cp -f $(location python/pyfory/format/_format.so) "$$WORK_DIR/python/pyfory/format"
119-
cp -f $(location python/pyfory/_serialization.so) "$$WORK_DIR/python/pyfory"
119+
cp -f $(location python/pyfory/serialization.so) "$$WORK_DIR/python/pyfory"
120120
fi
121121
echo $$(date) > $@
122122
""",

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ cd python
155155
python setup.py develop
156156
```
157157

158-
- Use `cython --cplus -a pyfory/_serialization.pyx` to produce an annotated HTML file of the source code. Then you can analyze interaction between Python objects and Python's C API.
158+
- Use `cython --cplus -a pyfory/serialization.pyx` to produce an annotated HTML file of the source code. Then you can analyze interaction between Python objects and Python's C API.
159159
- Read more: https://cython.readthedocs.io/en/latest/src/userguide/debugging.html
160160

161161
```bash

ci/release.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,19 @@ def build(v: str):
5454
if os.path.exists("dist"):
5555
shutil.rmtree("dist")
5656
os.mkdir("dist")
57-
subprocess.check_call(f"git checkout releases-{v}", shell=True)
5857
branch = f"releases-{v}"
58+
# Check if branch exists, if not create it
59+
result = subprocess.run(
60+
f"git show-ref --verify --quiet refs/heads/{branch}",
61+
shell=True,
62+
capture_output=True,
63+
)
64+
if result.returncode == 0:
65+
# Branch exists, checkout
66+
subprocess.check_call(f"git checkout {branch}", shell=True)
67+
else:
68+
# Branch doesn't exist, create it
69+
subprocess.check_call(f"git checkout -b {branch}", shell=True)
5970
src_tar = f"apache-fory-{v}-src.tar.gz"
6071
_check_all_committed()
6172
_strip_unnecessary_license()

ci/run_ci.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ def parse_args():
293293
if USE_PYTHON_GO:
294294
func()
295295
else:
296-
# run_shell_script("go")
296+
run_shell_script("go")
297297
pass
298298
elif command == "format":
299299
if USE_PYTHON_FORMAT:

ci/run_ci.sh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -372,11 +372,11 @@ case $1 in
372372
;;
373373
go)
374374
echo "Executing fory go tests for go"
375-
cd "$ROOT/go/fory"
376-
go install ./cmd/fory
377-
cd "$ROOT/go/fory/tests"
378-
go generate
379-
go test -v
375+
# cd "$ROOT/go/fory"
376+
# go install ./cmd/fory
377+
# cd "$ROOT/go/fory/tests"
378+
# go generate
379+
# go test -v
380380
cd "$ROOT/go/fory"
381381
go test -v
382382
echo "Executing fory go tests succeeds"

cpp/fory/benchmark/benchmark_string_util.cc

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,20 @@ bool utf16HasSurrogatePairs_BaseLine(const std::u16string &str) {
347347
return false;
348348
}
349349

350+
// Generate test strings of various sizes for threshold benchmarking
351+
const std::vector<std::u16string> test_utf16_strings_small_16 =
352+
generateUTF16String(num_tests, 16);
353+
const std::vector<std::u16string> test_utf16_strings_small_32 =
354+
generateUTF16String(num_tests, 32);
355+
const std::vector<std::u16string> test_utf16_strings_small_64 =
356+
generateUTF16String(num_tests, 64);
357+
const std::vector<std::u16string> test_utf16_strings_medium_128 =
358+
generateUTF16String(num_tests, 128);
359+
const std::vector<std::u16string> test_utf16_strings_medium_256 =
360+
generateUTF16String(num_tests, 256);
361+
const std::vector<std::u16string> test_utf16_strings_large_512 =
362+
generateUTF16String(num_tests, 512);
363+
350364
// Benchmark function for checking if a UTF-16 string contains surrogate pairs
351365
static void BM_Utf16HasSurrogatePairs_BaseLine(benchmark::State &state) {
352366
for (auto _ : state) {
@@ -372,6 +386,142 @@ static void BM_Utf16HasSurrogatePairs_FORY(benchmark::State &state) {
372386

373387
BENCHMARK(BM_Utf16HasSurrogatePairs_FORY);
374388

389+
// Benchmarks for different string sizes to determine optimal threshold
390+
static void BM_Utf16HasSurrogatePairs_BaseLine_Size16(benchmark::State &state) {
391+
for (auto _ : state) {
392+
for (const std::u16string &str : test_utf16_strings_small_16) {
393+
bool result = utf16HasSurrogatePairs_BaseLine(str);
394+
benchmark::DoNotOptimize(result);
395+
}
396+
}
397+
}
398+
399+
BENCHMARK(BM_Utf16HasSurrogatePairs_BaseLine_Size16);
400+
401+
static void BM_Utf16HasSurrogatePairs_FORY_Size16(benchmark::State &state) {
402+
for (auto _ : state) {
403+
for (const std::u16string &str : test_utf16_strings_small_16) {
404+
bool result = fory::utf16HasSurrogatePairs(str);
405+
benchmark::DoNotOptimize(result);
406+
}
407+
}
408+
}
409+
410+
BENCHMARK(BM_Utf16HasSurrogatePairs_FORY_Size16);
411+
412+
static void BM_Utf16HasSurrogatePairs_BaseLine_Size32(benchmark::State &state) {
413+
for (auto _ : state) {
414+
for (const std::u16string &str : test_utf16_strings_small_32) {
415+
bool result = utf16HasSurrogatePairs_BaseLine(str);
416+
benchmark::DoNotOptimize(result);
417+
}
418+
}
419+
}
420+
421+
BENCHMARK(BM_Utf16HasSurrogatePairs_BaseLine_Size32);
422+
423+
static void BM_Utf16HasSurrogatePairs_FORY_Size32(benchmark::State &state) {
424+
for (auto _ : state) {
425+
for (const std::u16string &str : test_utf16_strings_small_32) {
426+
bool result = fory::utf16HasSurrogatePairs(str);
427+
benchmark::DoNotOptimize(result);
428+
}
429+
}
430+
}
431+
432+
BENCHMARK(BM_Utf16HasSurrogatePairs_FORY_Size32);
433+
434+
static void BM_Utf16HasSurrogatePairs_BaseLine_Size64(benchmark::State &state) {
435+
for (auto _ : state) {
436+
for (const std::u16string &str : test_utf16_strings_small_64) {
437+
bool result = utf16HasSurrogatePairs_BaseLine(str);
438+
benchmark::DoNotOptimize(result);
439+
}
440+
}
441+
}
442+
443+
BENCHMARK(BM_Utf16HasSurrogatePairs_BaseLine_Size64);
444+
445+
static void BM_Utf16HasSurrogatePairs_FORY_Size64(benchmark::State &state) {
446+
for (auto _ : state) {
447+
for (const std::u16string &str : test_utf16_strings_small_64) {
448+
bool result = fory::utf16HasSurrogatePairs(str);
449+
benchmark::DoNotOptimize(result);
450+
}
451+
}
452+
}
453+
454+
BENCHMARK(BM_Utf16HasSurrogatePairs_FORY_Size64);
455+
456+
static void
457+
BM_Utf16HasSurrogatePairs_BaseLine_Size128(benchmark::State &state) {
458+
for (auto _ : state) {
459+
for (const std::u16string &str : test_utf16_strings_medium_128) {
460+
bool result = utf16HasSurrogatePairs_BaseLine(str);
461+
benchmark::DoNotOptimize(result);
462+
}
463+
}
464+
}
465+
466+
BENCHMARK(BM_Utf16HasSurrogatePairs_BaseLine_Size128);
467+
468+
static void BM_Utf16HasSurrogatePairs_FORY_Size128(benchmark::State &state) {
469+
for (auto _ : state) {
470+
for (const std::u16string &str : test_utf16_strings_medium_128) {
471+
bool result = fory::utf16HasSurrogatePairs(str);
472+
benchmark::DoNotOptimize(result);
473+
}
474+
}
475+
}
476+
477+
BENCHMARK(BM_Utf16HasSurrogatePairs_FORY_Size128);
478+
479+
static void
480+
BM_Utf16HasSurrogatePairs_BaseLine_Size256(benchmark::State &state) {
481+
for (auto _ : state) {
482+
for (const std::u16string &str : test_utf16_strings_medium_256) {
483+
bool result = utf16HasSurrogatePairs_BaseLine(str);
484+
benchmark::DoNotOptimize(result);
485+
}
486+
}
487+
}
488+
489+
BENCHMARK(BM_Utf16HasSurrogatePairs_BaseLine_Size256);
490+
491+
static void BM_Utf16HasSurrogatePairs_FORY_Size256(benchmark::State &state) {
492+
for (auto _ : state) {
493+
for (const std::u16string &str : test_utf16_strings_medium_256) {
494+
bool result = fory::utf16HasSurrogatePairs(str);
495+
benchmark::DoNotOptimize(result);
496+
}
497+
}
498+
}
499+
500+
BENCHMARK(BM_Utf16HasSurrogatePairs_FORY_Size256);
501+
502+
static void
503+
BM_Utf16HasSurrogatePairs_BaseLine_Size512(benchmark::State &state) {
504+
for (auto _ : state) {
505+
for (const std::u16string &str : test_utf16_strings_large_512) {
506+
bool result = utf16HasSurrogatePairs_BaseLine(str);
507+
benchmark::DoNotOptimize(result);
508+
}
509+
}
510+
}
511+
512+
BENCHMARK(BM_Utf16HasSurrogatePairs_BaseLine_Size512);
513+
514+
static void BM_Utf16HasSurrogatePairs_FORY_Size512(benchmark::State &state) {
515+
for (auto _ : state) {
516+
for (const std::u16string &str : test_utf16_strings_large_512) {
517+
bool result = fory::utf16HasSurrogatePairs(str);
518+
benchmark::DoNotOptimize(result);
519+
}
520+
}
521+
}
522+
523+
BENCHMARK(BM_Utf16HasSurrogatePairs_FORY_Size512);
524+
375525
/*
376526
* TEST Utf16ToUtf8
377527
*/

cpp/fory/util/string_util.h

Lines changed: 23 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -133,23 +133,11 @@ inline bool isLatin1(const uint16_t *data, size_t length) {
133133
length % VECTOR_SIZE);
134134
}
135135
inline bool utf16HasSurrogatePairs(const uint16_t *data, size_t length) {
136-
constexpr size_t VECTOR_SIZE = 16;
137-
const auto *ptr = reinterpret_cast<const __m256i *>(data);
138-
const auto *end = ptr + length / VECTOR_SIZE;
139-
const __m256i lower_bound = _mm256_set1_epi16(0xD800);
140-
const __m256i higher_bound = _mm256_set1_epi16(0xDFFF);
141-
142-
for (; ptr < end; ++ptr) {
143-
__m256i vec = _mm256_loadu_si256(ptr);
144-
__m256i mask1 = _mm256_cmpgt_epi16(vec, lower_bound);
145-
__m256i mask2 = _mm256_cmpgt_epi16(higher_bound, vec);
146-
__m256i result = _mm256_and_si256(mask1, mask2);
147-
if (!_mm256_testz_si256(result, result))
148-
return true;
149-
}
150-
151-
return hasSurrogatePairFallback(data + (length / VECTOR_SIZE) * VECTOR_SIZE,
152-
length % VECTOR_SIZE);
136+
// Direct fallback implementation - SIMD versions were consistently slower
137+
// due to early-exit characteristics: surrogate pairs are rare and when
138+
// present, often appear early in strings, making SIMD setup overhead
139+
// outweigh any vectorization benefits.
140+
return hasSurrogatePairFallback(data, length);
153141
}
154142

155143
#elif defined(FORY_HAS_NEON)
@@ -182,18 +170,11 @@ inline bool isLatin1(const uint16_t *data, size_t length) {
182170
}
183171

184172
inline bool utf16HasSurrogatePairs(const uint16_t *data, size_t length) {
185-
size_t i = 0;
186-
uint16x8_t lower_bound = vdupq_n_u16(0xD800);
187-
uint16x8_t higher_bound = vdupq_n_u16(0xDFFF);
188-
for (; i + 7 < length; i += 8) {
189-
uint16x8_t chunk = vld1q_u16(data + i);
190-
uint16x8_t mask1 = vcgeq_u16(chunk, lower_bound);
191-
uint16x8_t mask2 = vcleq_u16(chunk, higher_bound);
192-
if (vmaxvq_u16(mask1 & mask2)) {
193-
return true; // Detected a high surrogate
194-
}
195-
}
196-
return hasSurrogatePairFallback(data + i, length - i);
173+
// Direct fallback implementation - SIMD versions were consistently slower
174+
// due to early-exit characteristics: surrogate pairs are rare and when
175+
// present, often appear early in strings, making SIMD setup overhead
176+
// outweigh any vectorization benefits.
177+
return hasSurrogatePairFallback(data, length);
197178
}
198179
#elif defined(FORY_HAS_SSE2)
199180
inline bool isAscii(const char *data, size_t length) {
@@ -227,19 +208,11 @@ inline bool isLatin1(const uint16_t *data, size_t length) {
227208
}
228209

229210
inline bool utf16HasSurrogatePairs(const uint16_t *data, size_t length) {
230-
size_t i = 0;
231-
__m128i lower_bound = _mm_set1_epi16(0xd7ff);
232-
__m128i higher_bound = _mm_set1_epi16(0xe000);
233-
for (; i + 7 < length; i += 8) {
234-
__m128i chunk =
235-
_mm_loadu_si128(reinterpret_cast<const __m128i *>(data + i));
236-
__m128i cmp1 = _mm_cmpgt_epi16(chunk, lower_bound);
237-
__m128i cmp2 = _mm_cmpgt_epi16(higher_bound, chunk);
238-
if (_mm_movemask_epi8(_mm_and_si128(cmp1, cmp2)) != 0) {
239-
return true; // Detected a surrogate
240-
}
241-
}
242-
return hasSurrogatePairFallback(data + i, length - i);
211+
// Direct fallback implementation - SIMD versions were consistently slower
212+
// due to early-exit characteristics: surrogate pairs are rare and when
213+
// present, often appear early in strings, making SIMD setup overhead
214+
// outweigh any vectorization benefits.
215+
return hasSurrogatePairFallback(data, length);
243216
}
244217
#else
245218
inline bool isAscii(const char *data, size_t length) {
@@ -266,10 +239,14 @@ inline bool isLatin1(const std::u16string &str) {
266239
}
267240

268241
inline bool utf16HasSurrogatePairs(const std::u16string &str) {
269-
// Get the data pointer
270-
const std::uint16_t *data =
271-
reinterpret_cast<const std::uint16_t *>(str.data());
272-
return utf16HasSurrogatePairs(data, str.size());
242+
// Inline implementation for best performance
243+
for (size_t i = 0; i < str.size(); ++i) {
244+
auto c = str[i];
245+
if (c >= 0xD800 && c <= 0xDFFF) {
246+
return true;
247+
}
248+
}
249+
return false;
273250
}
274251

275252
} // namespace fory

0 commit comments

Comments
 (0)