|
13 | 13 | #
|
14 | 14 | cmake_minimum_required(VERSION 3.18)
|
15 | 15 | set(CMAKE_CXX_STANDARD 17)
|
16 |
| - |
| 16 | +set(CMAKE_POLICY_VERSION_MINIMUM 3.5) |
17 | 17 | project(Tokenizers)
|
18 | 18 |
|
19 | 19 | option(TOKENIZERS_BUILD_TEST "Build tests" OFF)
|
20 | 20 | option(TOKENIZERS_BUILD_TOOLS "Build tools" OFF)
|
| 21 | +option(TOKENIZERS_BUILD_PYTHON "Build Python bindings" OFF) |
21 | 22 | option(SUPPORT_REGEX_LOOKAHEAD
|
22 | 23 | "Support regex lookahead patterns (requires PCRE2)" OFF
|
23 | 24 | )
|
@@ -122,17 +123,49 @@ if(TOKENIZERS_BUILD_TOOLS)
|
122 | 123 | add_subdirectory(examples/tokenize_tool)
|
123 | 124 | endif()
|
124 | 125 |
|
| 126 | +# Build Python bindings |
| 127 | +if(TOKENIZERS_BUILD_PYTHON) |
| 128 | + include(FetchContent) |
| 129 | + FetchContent_Declare( |
| 130 | + pybind11 |
| 131 | + GIT_REPOSITORY https://github.com/pybind/pybind11.git |
| 132 | + GIT_TAG v2.13.6 |
| 133 | + ) |
| 134 | + FetchContent_MakeAvailable(pybind11) |
| 135 | + |
| 136 | + # Create the Python extension module |
| 137 | + pybind11_add_module(pytorch_tokenizers_cpp |
| 138 | + ${CMAKE_CURRENT_SOURCE_DIR}/src/python_bindings.cpp |
| 139 | + ) |
| 140 | + |
| 141 | + # Link with the tokenizers library |
| 142 | + target_link_libraries(pytorch_tokenizers_cpp PRIVATE tokenizers) |
| 143 | + |
| 144 | + # Set properties for the Python extension |
| 145 | + target_compile_definitions(pytorch_tokenizers_cpp PRIVATE VERSION_INFO=${PROJECT_VERSION}) |
| 146 | + |
| 147 | + # Set the output name and let setuptools control the output directory |
| 148 | + set_target_properties(pytorch_tokenizers_cpp PROPERTIES |
| 149 | + OUTPUT_NAME "pytorch_tokenizers_cpp" |
| 150 | + ) |
| 151 | + |
| 152 | + # Don't install the Python extension here - let setuptools handle it |
| 153 | + # The setup.py will copy the built extension to the appropriate location |
| 154 | +endif() |
| 155 | + |
125 | 156 | # Installation rules
|
126 | 157 | include(GNUInstallDirs)
|
127 | 158 |
|
128 |
| -# Install the library and its dependencies |
129 |
| -install( |
130 |
| - TARGETS tokenizers re2 sentencepiece-static |
131 |
| - EXPORT tokenizers-targets |
132 |
| - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} |
133 |
| - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} |
134 |
| - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} |
135 |
| -) |
| 159 | +if(NOT TOKENIZERS_BUILD_PYTHON) |
| 160 | + # Install the library and its dependencies |
| 161 | + install( |
| 162 | + TARGETS tokenizers re2 sentencepiece-static |
| 163 | + EXPORT tokenizers-targets |
| 164 | + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} |
| 165 | + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} |
| 166 | + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} |
| 167 | + ) |
| 168 | +endif() |
136 | 169 |
|
137 | 170 | # Install header files
|
138 | 171 | install(
|
|
0 commit comments