Skip to content

Commit 381cdff

Browse files
authored
Merge branch 'main' into fix-bytes-move-assign
2 parents 92ca3b9 + 60448b8 commit 381cdff

File tree

173 files changed

+5288
-1294
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

173 files changed

+5288
-1294
lines changed

.gitattributes

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,6 @@
33
*.jar filter=lfs diff=lfs merge=lfs -text
44
*.tar filter=lfs diff=lfs merge=lfs -text
55
*.gz filter=lfs diff=lfs merge=lfs -text
6-
79d01717-8380-4504-86e1-387e6c058d0a filter=lfs diff=lfs merge=lfs -text
6+
test/test_data/sst/none/79d01717-8380-4504-86e1-387e6c058d0a filter=lfs diff=lfs merge=lfs -text
7+
test/test_data/sst/lz4/10540951-41d3-4216-aa2c-b15dfd25eb75 filter=lfs diff=lfs merge=lfs -text
8+
test/test_data/sst/zstd/83d05c53-2353-4160-b756-d50dd851b474 filter=lfs diff=lfs merge=lfs -text

CMakeLists.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ option(PAIMON_ENABLE_ORC "Whether to enable orc file format" ON)
5353
option(PAIMON_ENABLE_LANCE "Whether to enable lance file format" OFF)
5454
option(PAIMON_ENABLE_JINDO "Whether to enable jindo file system" OFF)
5555
option(PAIMON_ENABLE_LUMINA "Whether to enable lumina vector index" ON)
56+
option(PAIMON_ENABLE_LUCENE "Whether to enable lucene index" ON)
5657

5758
if(PAIMON_ENABLE_ORC)
5859
add_definitions(-DPAIMON_ENABLE_ORC)
@@ -82,6 +83,10 @@ if(PAIMON_ENABLE_LUMINA)
8283
add_definitions(-DPAIMON_ENABLE_LUMINA)
8384
endif()
8485

86+
if(PAIMON_ENABLE_LUCENE)
87+
add_definitions(-DPAIMON_ENABLE_LUCENE)
88+
endif()
89+
8590
add_definitions(-DSNAPPY_CODEC_AVAILABLE)
8691
add_definitions(-DZSTD_CODEC_AVAILABLE)
8792
add_definitions(-DRAPIDJSON_HAS_STDSTRING)
@@ -379,6 +384,11 @@ if(PAIMON_BUILD_TESTS)
379384
list(APPEND TEST_STATIC_LINK_LIBS paimon_lumina_index_shared)
380385
list(APPEND TEST_STATIC_LINK_LIBS "-Wl,--as-needed")
381386
endif()
387+
if(PAIMON_ENABLE_LUCENE)
388+
list(APPEND TEST_STATIC_LINK_LIBS "-Wl,--no-as-needed")
389+
list(APPEND TEST_STATIC_LINK_LIBS paimon_lucene_index_shared)
390+
list(APPEND TEST_STATIC_LINK_LIBS "-Wl,--as-needed")
391+
endif()
382392

383393
endif()
384394

@@ -407,6 +417,7 @@ add_subdirectory(src/paimon/format/parquet)
407417
add_subdirectory(src/paimon/format/avro)
408418
add_subdirectory(src/paimon/format/lance)
409419
add_subdirectory(src/paimon/global_index/lumina)
420+
add_subdirectory(src/paimon/global_index/lucene)
410421
add_subdirectory(src/paimon/testing/mock)
411422
add_subdirectory(src/paimon/testing/utils)
412423
add_subdirectory(test/inte)

LICENSE

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,9 @@ This product includes code from Apache Iceberg C++.
263263
* .devcontainer/devcontainer.json.template
264264
* CI utilities:
265265
* .pre-commit-config.yaml
266+
* Avro direct decoder/encoder:
267+
* src/paimon/format/avro/avro_direct_decoder.cpp
268+
* src/paimon/format/avro/avro_direct_decoder.h
266269

267270
Copyright: 2024-2025 The Apache Software Foundation.
268271
Home page: https://iceberg.apache.org/

cmake_modules/BuildUtils.cmake

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,18 @@ function(add_paimon_lib LIB_NAME)
5555
# Necessary to make static linking into other shared libraries work properly
5656
set_property(TARGET ${LIB_NAME}_objlib PROPERTY POSITION_INDEPENDENT_CODE 1)
5757
if(ARG_DEPENDENCIES)
58-
add_dependencies(${LIB_NAME}_objlib ${ARG_DEPENDENCIES})
58+
# Avoid add_dependencies on non-existent targets (e.g. when building static only).
59+
set(_paimon_objlib_deps)
60+
foreach(_paimon_dep IN LISTS ARG_DEPENDENCIES)
61+
if(TARGET ${_paimon_dep})
62+
list(APPEND _paimon_objlib_deps ${_paimon_dep})
63+
endif()
64+
endforeach()
65+
if(_paimon_objlib_deps)
66+
add_dependencies(${LIB_NAME}_objlib ${_paimon_objlib_deps})
67+
endif()
68+
unset(_paimon_objlib_deps)
69+
unset(_paimon_dep)
5970
endif()
6071
set(LIB_DEPS $<TARGET_OBJECTS:${LIB_NAME}_objlib>)
6172
set(LIB_INCLUDES)

cmake_modules/SetupCxxFlags.cmake

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,13 @@ if("${BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN")
8080
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unused-parameter")
8181
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unknown-warning-option")
8282
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-constant-logical-operand")
83+
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-deprecated-declarations")
84+
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-deprecated-builtins")
8385
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
8486
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall")
8587
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-conversion")
8688
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-deprecated-declarations")
89+
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-deprecated-builtins")
8790
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-sign-conversion")
8891
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unused-variable")
8992
else()
@@ -182,6 +185,8 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STRE
182185

183186
# Don't complain about optimization passes that were not possible
184187
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-pass-failed")
188+
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-deprecated-declarations")
189+
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-deprecated-builtins")
185190

186191
if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
187192
# Depending on the default OSX_DEPLOYMENT_TARGET (< 10.9), libstdc++ may be

cmake_modules/ThirdpartyToolchain.cmake

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,18 @@ else()
112112
endif()
113113
endif()
114114

115+
if(DEFINED ENV{PAIMON_LUCENE_URL})
116+
set(LUCENE_SOURCE_URL "$ENV{PAIMON_LUCENE_URL}")
117+
else()
118+
if(EXISTS "${THIRDPARTY_DIR}/${PAIMON_LUCENE_PKG_NAME}")
119+
set_urls(LUCENE_SOURCE_URL "${THIRDPARTY_DIR}/${PAIMON_LUCENE_PKG_NAME}")
120+
else()
121+
set_urls(LUCENE_SOURCE_URL
122+
"${THIRDPARTY_MIRROR_URL}https://github.com/luceneplusplus/LucenePlusPlus/archive/refs/tags/${PAIMON_LUCENE_PKG_NAME}"
123+
)
124+
endif()
125+
endif()
126+
115127
if(DEFINED ENV{PAIMON_GLOG_URL})
116128
set(GLOG_SOURCE_URL "$ENV{PAIMON_GLOG_URL}")
117129
else()
@@ -275,6 +287,62 @@ set(EP_COMMON_CMAKE_ARGS
275287
-DCMAKE_C_FLAGS=${EP_C_FLAGS}
276288
-DCMAKE_INSTALL_LIBDIR=lib)
277289

290+
macro(build_lucene)
291+
message(STATUS "Building lucene from source")
292+
set(LUCENE_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/lucene_ep-install")
293+
set(LUCENE_CMAKE_ARGS
294+
${EP_COMMON_CMAKE_ARGS}
295+
"-DENABLE_TEST=OFF"
296+
"-DCMAKE_C_FLAGS=-pthread"
297+
"-DCMAKE_CXX_FLAGS=-pthread"
298+
"-DCMAKE_EXE_LINKER_FLAGS=-pthread"
299+
"-DBoost_INCLUDE_DIR=${BOOST_INCLUDE_DIR}"
300+
"-DBoost_LIBRARY_DIR=${BOOST_LIBRARY_DIR}"
301+
"-DBOOST_ROOT=${BOOST_INSTALL}"
302+
"-DBoost_CHRONO_FOUND=TRUE"
303+
"-DBoost_THREAD_FOUND=TRUE"
304+
"-DCMAKE_INSTALL_PREFIX=${LUCENE_PREFIX}")
305+
306+
set(LUCENE_LIB "${LUCENE_PREFIX}/lib/liblucene++.so.0")
307+
externalproject_add(lucene_ep
308+
${EP_COMMON_OPTIONS}
309+
URL ${LUCENE_SOURCE_URL}
310+
URL_HASH "SHA256=${PAIMON_LUCENE_BUILD_SHA256_CHECKSUM}"
311+
CMAKE_ARGS ${LUCENE_CMAKE_ARGS}
312+
BUILD_BYPRODUCTS ${LUCENE_LIB}
313+
DEPENDS boost_date_time
314+
boost_filesystem
315+
boost_regex
316+
boost_thread
317+
boost_iostreams
318+
boost_system
319+
boost_chrono
320+
boost_atomic)
321+
322+
set(LUCENE_INCLUDE_DIR "${LUCENE_PREFIX}/include")
323+
# The include directory must exist before it is referenced by a target.
324+
file(MAKE_DIRECTORY "${LUCENE_INCLUDE_DIR}")
325+
include_directories(SYSTEM ${LUCENE_INCLUDE_DIR} ${BOOST_INCLUDE_DIR}
326+
${BOOST_EXTRA_INCLUDE_DIR})
327+
add_library(lucene INTERFACE IMPORTED)
328+
target_include_directories(lucene SYSTEM INTERFACE "${LUCENE_INCLUDE_DIR}")
329+
target_compile_options(lucene INTERFACE -pthread)
330+
331+
target_link_libraries(lucene
332+
INTERFACE "${LUCENE_LIB}"
333+
boost_date_time
334+
boost_filesystem
335+
boost_regex
336+
boost_thread
337+
boost_iostreams
338+
boost_system
339+
boost_chrono
340+
boost_atomic
341+
pthread
342+
dl)
343+
add_dependencies(lucene lucene_ep)
344+
endmacro()
345+
278346
macro(build_rapidjson)
279347
message(STATUS "Building RapidJSON from source")
280348
set(RAPIDJSON_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/rapidjson_ep-install")
@@ -342,6 +410,99 @@ macro(build_fmt)
342410
add_dependencies(fmt fmt_ep)
343411
endmacro(build_fmt)
344412

413+
macro(build_boost)
414+
message(STATUS "Building boost from source")
415+
set(BOOST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/boost_ep-prefix")
416+
set(BOOST_INSTALL "${CMAKE_CURRENT_BINARY_DIR}/boost_ep-install")
417+
set(BOOST_INCLUDE_DIR "${BOOST_INSTALL}/include")
418+
set(BOOST_LIBRARY_DIR ${BOOST_INSTALL}/lib)
419+
file(MAKE_DIRECTORY ${BOOST_INCLUDE_DIR})
420+
file(MAKE_DIRECTORY ${BOOST_LIBRARY_DIR})
421+
422+
set(BOOST_BYPRODUCTS
423+
${BOOST_LIBRARY_DIR}/libboost_date_time.a
424+
${BOOST_LIBRARY_DIR}/libboost_filesystem.a
425+
${BOOST_LIBRARY_DIR}/libboost_system.a
426+
${BOOST_LIBRARY_DIR}/libboost_regex.a
427+
${BOOST_LIBRARY_DIR}/libboost_thread.a
428+
${BOOST_LIBRARY_DIR}/libboost_atomic.a
429+
${BOOST_LIBRARY_DIR}/libboost_chrono.a
430+
${BOOST_LIBRARY_DIR}/libboost_iostreams.a)
431+
432+
externalproject_add(boost_ep
433+
GIT_REPOSITORY https://github.com/boostorg/boost.git
434+
GIT_TAG boost-${PAIMON_BOOST_BUILD_VERSION}
435+
GIT_SHALLOW FALSE
436+
GIT_PROGRESS TRUE
437+
GIT_SUBMODULES_RECURSE TRUE
438+
CONFIGURE_COMMAND ${BOOST_PREFIX}/src/boost_ep/bootstrap.sh
439+
--with-libraries=date_time,filesystem,iostreams,regex,system,thread,chrono,atomic
440+
BUILD_IN_SOURCE TRUE
441+
BUILD_COMMAND ${BOOST_PREFIX}/src/boost_ep/b2
442+
--prefix=${BOOST_INSTALL}
443+
--libdir=${BOOST_LIBRARY_DIR} link=static
444+
runtime-link=shared threading=multi variant=release
445+
cxxflags=-fPIC install
446+
INSTALL_COMMAND bash -c
447+
"mkdir -p ${BOOST_INSTALL}/include/boost && cp -r ${BOOST_PREFIX}/src/boost_ep/libs/*/include/boost/* ${BOOST_INSTALL}/include/boost && cp -r ${BOOST_PREFIX}/src/boost_ep/libs/*/*/include/boost/* ${BOOST_INSTALL}/include/boost"
448+
BUILD_BYPRODUCTS ${BOOST_BYPRODUCTS}
449+
LOG_DOWNLOAD ON
450+
LOG_CONFIGURE ON
451+
LOG_BUILD ON)
452+
453+
include_directories(SYSTEM ${BOOST_INCLUDE_DIR})
454+
455+
add_library(boost_atomic STATIC IMPORTED)
456+
set_target_properties(boost_atomic
457+
PROPERTIES IMPORTED_LOCATION
458+
${BOOST_LIBRARY_DIR}/libboost_atomic.a
459+
INTERFACE_INCLUDE_DIRECTORIES ${BOOST_INCLUDE_DIR})
460+
add_library(boost_chrono STATIC IMPORTED)
461+
set_target_properties(boost_chrono
462+
PROPERTIES IMPORTED_LOCATION
463+
${BOOST_LIBRARY_DIR}/libboost_chrono.a
464+
INTERFACE_INCLUDE_DIRECTORIES ${BOOST_INCLUDE_DIR})
465+
add_library(boost_date_time STATIC IMPORTED)
466+
set_target_properties(boost_date_time
467+
PROPERTIES IMPORTED_LOCATION
468+
${BOOST_LIBRARY_DIR}/libboost_date_time.a
469+
INTERFACE_INCLUDE_DIRECTORIES ${BOOST_INCLUDE_DIR})
470+
add_library(boost_filesystem STATIC IMPORTED)
471+
set_target_properties(boost_filesystem
472+
PROPERTIES IMPORTED_LOCATION
473+
${BOOST_LIBRARY_DIR}/libboost_filesystem.a
474+
INTERFACE_INCLUDE_DIRECTORIES ${BOOST_INCLUDE_DIR})
475+
add_library(boost_regex STATIC IMPORTED)
476+
set_target_properties(boost_regex
477+
PROPERTIES IMPORTED_LOCATION
478+
${BOOST_LIBRARY_DIR}/libboost_regex.a
479+
INTERFACE_INCLUDE_DIRECTORIES ${BOOST_INCLUDE_DIR})
480+
add_library(boost_thread STATIC IMPORTED)
481+
set_target_properties(boost_thread
482+
PROPERTIES IMPORTED_LOCATION
483+
${BOOST_LIBRARY_DIR}/libboost_thread.a
484+
INTERFACE_INCLUDE_DIRECTORIES ${BOOST_INCLUDE_DIR})
485+
add_library(boost_iostreams STATIC IMPORTED)
486+
set_target_properties(boost_iostreams
487+
PROPERTIES IMPORTED_LOCATION
488+
${BOOST_LIBRARY_DIR}/libboost_iostreams.a
489+
INTERFACE_INCLUDE_DIRECTORIES ${BOOST_INCLUDE_DIR})
490+
add_library(boost_system STATIC IMPORTED)
491+
set_target_properties(boost_system
492+
PROPERTIES IMPORTED_LOCATION
493+
${BOOST_LIBRARY_DIR}/libboost_system.a
494+
INTERFACE_INCLUDE_DIRECTORIES ${BOOST_INCLUDE_DIR})
495+
496+
add_dependencies(boost_atomic boost_ep)
497+
add_dependencies(boost_chrono boost_ep)
498+
add_dependencies(boost_date_time boost_ep)
499+
add_dependencies(boost_filesystem boost_ep)
500+
add_dependencies(boost_regex boost_ep)
501+
add_dependencies(boost_thread boost_ep)
502+
add_dependencies(boost_iostreams boost_ep)
503+
add_dependencies(boost_system boost_ep)
504+
endmacro(build_boost)
505+
345506
macro(build_snappy)
346507
message(STATUS "Building snappy from source")
347508
set(SNAPPY_HOME "${CMAKE_CURRENT_BINARY_DIR}/snappy_ep-install")
@@ -1108,3 +1269,7 @@ if(PAIMON_ENABLE_JINDO)
11081269
build_jindosdk_c()
11091270
build_jindosdk_nextarch()
11101271
endif()
1272+
if(PAIMON_ENABLE_LUCENE)
1273+
build_boost()
1274+
build_lucene()
1275+
endif()

include/paimon/global_config.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ namespace paimon {
2929
/// not necessarily the exact number of threads at a given point in time.
3030
///
3131
/// You can change this number using SetArrowCpuThreadPoolCapacity().
32-
PAIMON_EXPORT int GetArrowCpuThreadPoolCapacity();
32+
PAIMON_EXPORT int32_t GetArrowCpuThreadPoolCapacity();
3333

3434
/// Set the capacity of the arrow's global thread pool
3535
/// This is a simple wrapper of arrow::SetCpuThreadPoolCapacity()
@@ -40,6 +40,6 @@ PAIMON_EXPORT int GetArrowCpuThreadPoolCapacity();
4040
/// The current number is returned by GetArrowCpuThreadPoolCapacity().
4141
/// Currently, this capacity will significantly affect the performance
4242
/// of parquet file batch read.
43-
PAIMON_EXPORT Status SetArrowCpuThreadPoolCapacity(int threads);
43+
PAIMON_EXPORT Status SetArrowCpuThreadPoolCapacity(int32_t threads);
4444

4545
} // namespace paimon

include/paimon/global_index/global_index_reader.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,16 @@ namespace paimon {
3838
class PAIMON_EXPORT GlobalIndexReader : public FunctionVisitor<std::shared_ptr<GlobalIndexResult>> {
3939
public:
4040
/// VisitVectorSearch performs approximate vector similarity search.
41-
/// @note `VisitVectorSearch` is thread-safe (not coroutine-safe) while other `VisitXXX` is not
42-
/// thread-safe.
4341
/// @warning `VisitVectorSearch` may return error status when it is incorrectly invoked (e.g.,
4442
/// BitmapGlobalIndexReader call `VisitVectorSearch`).
4543
virtual Result<std::shared_ptr<VectorSearchGlobalIndexResult>> VisitVectorSearch(
4644
const std::shared_ptr<VectorSearch>& vector_search) = 0;
45+
46+
/// @return true if the reader is thread-safe; false otherwise.
47+
virtual bool IsThreadSafe() const = 0;
48+
49+
/// @return An identifier representing the index type. (e.g., "bitmap", "lumina").
50+
virtual std::string GetIndexType() const = 0;
4751
};
4852

4953
} // namespace paimon

0 commit comments

Comments
 (0)