Skip to content

Commit a85298d

Browse files
authored
perf: use simdutf for idna (#960)
1 parent f706c78 commit a85298d

File tree

6 files changed

+66
-3
lines changed

6 files changed

+66
-3
lines changed

.github/workflows/ubuntu.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,14 @@ jobs:
2828
shared: [ON, OFF]
2929
cxx: [g++-12, clang++-15]
3030
runs-on: [ubuntu-22.04, ubuntu-22.04-arm]
31+
simdutf: [OFF, ON]
3132
runs-on: ${{matrix.runs-on}}
3233
steps:
3334
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
3435
- name: Setup Ninja
3536
run: sudo apt-get install ninja-build
3637
- name: Prepare
37-
run: cmake -D ADA_TESTING=ON -D ADA_BENCHMARKS=ON -DBUILD_SHARED_LIBS=${{matrix.shared}} -G Ninja -B build
38+
run: cmake -D ADA_TESTING=ON -D ADA_BENCHMARKS=ON -DBUILD_SHARED_LIBS=${{matrix.shared}} -D ADA_USE_SIMDUTF=${{matrix.simdutf}} -G Ninja -B build
3839
env:
3940
CXX: ${{matrix.cxx}}
4041
- name: Build

CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ option(ADA_TESTING "Whether to build tests." OFF)
2020
option(ADA_BENCHMARKS "Whether to build benchmarks." OFF)
2121
option(ADA_TOOLS "Whether to build tools." OFF)
2222
option(ADA_BUILD_SINGLE_HEADER_LIB "Whether to build the lib from the single-header files" OFF)
23+
option(ADA_USE_SIMDUTF "Whether to use SIMDUTF for IDNA" OFF)
2324
# There are cases where when embedding ada as a dependency for other CMake
2425
# projects as submodules or subdirectories (via FetchContent) can lead to
2526
# errors due to CPM, so this is here to support disabling all the testing
@@ -78,6 +79,14 @@ if(ADA_TESTING OR ADA_BENCHMARKS OR ADA_TOOLS)
7879
endif(ADA_TESTING AND EMSCRIPTEN)
7980
endif()
8081

82+
if(ADA_USE_SIMDUTF)
83+
CPMAddPackage(
84+
NAME simdutf
85+
GITHUB_REPOSITORY simdutf/simdutf
86+
VERSION 7.3.2
87+
OPTIONS "SIMDUTF_TESTS OFF" "SIMDUTF_TOOLS OFF"
88+
)
89+
endif()
8190

8291
add_library(ada::ada ALIAS ada)
8392

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,12 @@ With tests (requires available local packages):
319319
- **Build**: `cmake -B build -DADA_TESTING=ON -D CPM_USE_LOCAL_PACKAGES=ON && cmake --build build`
320320
- **Test**: `ctest --output-on-failure --test-dir build`
321321

322+
### Build options
323+
324+
Ada provides several CMake options to customize the build:
325+
326+
- `ADA_USE_SIMDUTF`: Enables SIMD-accelerated Unicode processing via simdutf (default: OFF)
327+
322328
Windows users need additional flags to specify the build configuration, e.g. `--config Release`.
323329

324330
The project can also be built via docker using default docker file of repository with following commands.

singleheader/CMakeLists.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,21 +44,38 @@ if (Python3_Interpreter_FOUND)
4444
#
4545
add_library(ada-singleheader-include-source INTERFACE)
4646
target_include_directories(ada-singleheader-include-source INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>)
47+
if (ADA_USE_SIMDUTF)
48+
target_include_directories(ada-singleheader-include-source INTERFACE ${simdutf_SOURCE_DIR}/include)
49+
endif()
4750
add_dependencies(ada-singleheader-include-source ada-singleheader-files)
4851

4952
add_library(ada-singleheader-source INTERFACE)
5053
target_sources(ada-singleheader-source INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/ada.cpp>)
5154
target_link_libraries(ada-singleheader-source INTERFACE ada-singleheader-include-source)
55+
if (ADA_USE_SIMDUTF)
56+
target_link_libraries(ada-singleheader-source INTERFACE simdutf)
57+
endif()
58+
5259
if (ADA_TESTING OR ADA_BUILD_SINGLE_HEADER_LIB)
5360
add_library(ada-singleheader-lib STATIC $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/ada.cpp>)
61+
if (ADA_USE_SIMDUTF)
62+
target_link_libraries(ada-singleheader-lib simdutf)
63+
target_include_directories(ada-singleheader-lib PRIVATE ${simdutf_SOURCE_DIR}/include)
64+
endif()
5465
endif(ADA_TESTING OR ADA_BUILD_SINGLE_HEADER_LIB)
5566

5667
if (ADA_TESTING)
5768
add_executable(demo $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/demo.cpp>)
5869
target_link_libraries(demo ada-singleheader-include-source)
70+
if (ADA_USE_SIMDUTF)
71+
target_link_libraries(demo simdutf)
72+
endif()
5973

6074
add_executable(demo_no_url_pattern $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/demo.cpp>)
6175
target_link_libraries(demo_no_url_pattern ada-singleheader-include-source)
76+
if (ADA_USE_SIMDUTF)
77+
target_link_libraries(demo_no_url_pattern simdutf)
78+
endif()
6279
target_compile_definitions(demo_no_url_pattern PRIVATE ADA_INCLUDE_URL_PATTERN=0)
6380

6481
add_test(demo_no_url_pattern demo_no_url_pattern)

src/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,3 +65,8 @@ if(ADA_INCLUDE_URL_PATTERN)
6565
else()
6666
target_compile_definitions(ada PRIVATE ADA_INCLUDE_URL_PATTERN=0)
6767
endif()
68+
69+
if (ADA_USE_SIMDUTF)
70+
target_link_libraries(ada PRIVATE simdutf)
71+
target_compile_definitions(ada PRIVATE ADA_USE_SIMDUTF)
72+
endif()

src/ada_idna.cpp

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* auto-generated on 2025-03-08 13:17:11 -0500. Do not edit! */
1+
/* auto-generated on 2025-06-26 23:04:30 -0300. Do not edit! */
22
/* begin file src/idna.cpp */
33
/* begin file src/unicode_transcoding.cpp */
44

@@ -8021,7 +8021,7 @@ bool utf32_to_punycode(std::u32string_view input, std::string &out) {
80218021
++h;
80228022
out.push_back(char(c));
80238023
}
8024-
if (c > 0x10ffff || (c >= 0xd880 && c < 0xe000)) {
8024+
if (c > 0x10ffff || (c >= 0xd800 && c < 0xe000)) {
80258025
return false;
80268026
}
80278027
}
@@ -9411,6 +9411,10 @@ bool is_label_valid(const std::u32string_view label) {
94119411
#include <ranges>
94129412

94139413

9414+
#ifdef ADA_USE_SIMDUTF
9415+
#include "simdutf.h"
9416+
#endif
9417+
94149418
namespace ada::idna {
94159419

94169420
bool constexpr is_ascii(std::u32string_view view) {
@@ -9523,11 +9527,20 @@ std::string to_ascii(std::string_view ut8_string) {
95239527
}
95249528
static const std::string error = "";
95259529
// We convert to UTF-32
9530+
9531+
#ifdef ADA_USE_SIMDUTF
9532+
size_t utf32_length =
9533+
simdutf::utf32_length_from_utf8(ut8_string.data(), ut8_string.size());
9534+
std::u32string utf32(utf32_length, '\0');
9535+
size_t actual_utf32_length = simdutf::convert_utf8_to_utf32(
9536+
ut8_string.data(), ut8_string.size(), utf32.data());
9537+
#else
95269538
size_t utf32_length =
95279539
ada::idna::utf32_length_from_utf8(ut8_string.data(), ut8_string.size());
95289540
std::u32string utf32(utf32_length, '\0');
95299541
size_t actual_utf32_length = ada::idna::utf8_to_utf32(
95309542
ut8_string.data(), ut8_string.size(), utf32.data());
9543+
#endif
95319544
if (actual_utf32_length == 0) {
95329545
return error;
95339546
}
@@ -9619,6 +9632,10 @@ std::string to_ascii(std::string_view ut8_string) {
96199632
#include <string>
96209633

96219634

9635+
#ifdef ADA_USE_SIMDUTF
9636+
#include "simdutf.h"
9637+
#endif
9638+
96229639
namespace ada::idna {
96239640
std::string to_unicode(std::string_view input) {
96249641
std::string output;
@@ -9637,11 +9654,19 @@ std::string to_unicode(std::string_view input) {
96379654
if (ada::idna::verify_punycode(label_view)) {
96389655
std::u32string tmp_buffer;
96399656
if (ada::idna::punycode_to_utf32(label_view, tmp_buffer)) {
9657+
#ifdef ADA_USE_SIMDUTF
9658+
auto utf8_size = simdutf::utf8_length_from_utf32(tmp_buffer.data(),
9659+
tmp_buffer.size());
9660+
std::string final_utf8(utf8_size, '\0');
9661+
simdutf::convert_utf32_to_utf8(tmp_buffer.data(), tmp_buffer.size(),
9662+
final_utf8.data());
9663+
#else
96409664
auto utf8_size = ada::idna::utf8_length_from_utf32(tmp_buffer.data(),
96419665
tmp_buffer.size());
96429666
std::string final_utf8(utf8_size, '\0');
96439667
ada::idna::utf32_to_utf8(tmp_buffer.data(), tmp_buffer.size(),
96449668
final_utf8.data());
9669+
#endif
96459670
output.append(final_utf8);
96469671
} else {
96479672
// ToUnicode never fails. If any step fails, then the original input

0 commit comments

Comments
 (0)