Skip to content

Commit 5dd96b7

Browse files
committed
Merge remote-tracking branch 'upstream/main' into add_deserializer
2 parents 79fb000 + 93c2fe8 commit 5dd96b7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+2724
-466
lines changed

CMakeLists.txt

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,11 +108,11 @@ set(SPARROW_IPC_HEADERS
108108
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/chunk_memory_serializer.hpp
109109
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/config/config.hpp
110110
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/config/sparrow_ipc_version.hpp
111+
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/compression.hpp
111112
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_fixedsizebinary_array.hpp
112113
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_primitive_array.hpp
113114
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_utils.hpp
114115
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_variable_size_binary_array.hpp
115-
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_variable_size_binary_array.hpp
116116
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize.hpp
117117
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserializer.hpp
118118
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/encapsulated_message.hpp
@@ -123,6 +123,7 @@ set(SPARROW_IPC_HEADERS
123123
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/serialize_utils.hpp
124124
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/serialize.hpp
125125
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/serializer.hpp
126+
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/stream_file_serializer.hpp
126127
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/utils.hpp
127128
)
128129

@@ -133,6 +134,8 @@ set(SPARROW_IPC_SRC
133134
${SPARROW_IPC_SOURCE_DIR}/arrow_interface/arrow_schema.cpp
134135
${SPARROW_IPC_SOURCE_DIR}/arrow_interface/arrow_schema/private_data.cpp
135136
${SPARROW_IPC_SOURCE_DIR}/chunk_memory_serializer.cpp
137+
${SPARROW_IPC_SOURCE_DIR}/compression.cpp
138+
${SPARROW_IPC_SOURCE_DIR}/compression_impl.hpp
136139
${SPARROW_IPC_SOURCE_DIR}/deserialize_fixedsizebinary_array.cpp
137140
${SPARROW_IPC_SOURCE_DIR}/deserialize_utils.cpp
138141
${SPARROW_IPC_SOURCE_DIR}/deserialize.cpp
@@ -142,6 +145,7 @@ set(SPARROW_IPC_SRC
142145
${SPARROW_IPC_SOURCE_DIR}/serialize_utils.cpp
143146
${SPARROW_IPC_SOURCE_DIR}/serialize.cpp
144147
${SPARROW_IPC_SOURCE_DIR}/serializer.cpp
148+
${SPARROW_IPC_SOURCE_DIR}/stream_file_serializer.cpp
145149
${SPARROW_IPC_SOURCE_DIR}/utils.cpp
146150
)
147151

@@ -254,6 +258,9 @@ target_link_libraries(sparrow-ipc
254258
PUBLIC
255259
sparrow::sparrow
256260
flatbuffers::flatbuffers
261+
PRIVATE
262+
lz4::lz4
263+
zstd::libzstd
257264
)
258265

259266
# Ensure generated headers are available when building sparrow-ipc
@@ -319,6 +326,25 @@ if (TARGET flatbuffers)
319326
endif()
320327
endif()
321328

329+
if (TARGET lz4)
330+
get_target_property(is_imported lz4 IMPORTED)
331+
if(NOT is_imported)
332+
# This means `lz4` was fetched using FetchContent
333+
# We need to export `lz4` target explicitly
334+
list(APPEND SPARROW_IPC_EXPORTED_TARGETS lz4)
335+
endif()
336+
endif()
337+
338+
if (TARGET lz4_static)
339+
get_target_property(is_imported lz4_static IMPORTED)
340+
if(NOT is_imported)
341+
# `lz4_static` is needed as this is the actual library
342+
# and `lz4` is an interface pointing to it.
343+
# If `lz4_shared` is used instead for some reason, modify this accordingly
344+
list(APPEND SPARROW_IPC_EXPORTED_TARGETS lz4_static)
345+
endif()
346+
endif()
347+
322348
install(TARGETS ${SPARROW_IPC_EXPORTED_TARGETS}
323349
EXPORT ${PROJECT_NAME}-targets)
324350

cmake/Findlz4.cmake

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Find LZ4 library and headers
2+
3+
# This module defines:
4+
# LZ4_FOUND - True if lz4 is found
5+
# LZ4_INCLUDE_DIRS - LZ4 include directories
6+
# LZ4_LIBRARIES - Libraries needed to use LZ4
7+
# LZ4_VERSION - LZ4 version number
8+
#
9+
10+
find_package(PkgConfig)
11+
if(PKG_CONFIG_FOUND)
12+
pkg_check_modules(LZ4 QUIET liblz4)
13+
if(NOT LZ4_FOUND)
14+
message(STATUS "Did not find 'liblz4.pc', trying 'lz4.pc'")
15+
pkg_check_modules(LZ4 QUIET lz4)
16+
endif()
17+
endif()
18+
19+
find_path(LZ4_INCLUDE_DIR lz4.h)
20+
# HINTS ${LZ4_INCLUDEDIR} ${LZ4_INCLUDE_DIRS})
21+
find_library(LZ4_LIBRARY NAMES lz4 liblz4)
22+
# HINTS ${LZ4_LIBDIR} ${LZ4_LIBRARY_DIRS})
23+
24+
include(FindPackageHandleStandardArgs)
25+
find_package_handle_standard_args(lz4 DEFAULT_MSG
26+
LZ4_LIBRARY LZ4_INCLUDE_DIR)
27+
mark_as_advanced(LZ4_INCLUDE_DIR LZ4_LIBRARY)
28+
29+
set(LZ4_LIBRARIES ${LZ4_LIBRARY})
30+
set(LZ4_INCLUDE_DIRS ${LZ4_INCLUDE_DIR})
31+
32+
if(LZ4_FOUND AND NOT TARGET lz4::lz4)
33+
add_library(lz4::lz4 UNKNOWN IMPORTED)
34+
set_target_properties(lz4::lz4 PROPERTIES
35+
IMPORTED_LOCATION "${LZ4_LIBRARIES}"
36+
INTERFACE_INCLUDE_DIRECTORIES "${LZ4_INCLUDE_DIRS}")
37+
if (NOT TARGET LZ4::LZ4 AND TARGET lz4::lz4)
38+
add_library(LZ4::LZ4 ALIAS lz4::lz4)
39+
endif ()
40+
endif()
41+
42+
#TODO add version?

cmake/external_dependencies.cmake

Lines changed: 96 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ endif()
1111

1212
function(find_package_or_fetch)
1313
set(options)
14-
set(oneValueArgs CONAN_PKG_NAME PACKAGE_NAME GIT_REPOSITORY TAG)
15-
set(multiValueArgs)
14+
set(oneValueArgs CONAN_PKG_NAME PACKAGE_NAME GIT_REPOSITORY TAG SOURCE_SUBDIR)
15+
set(multiValueArgs CMAKE_ARGS)
1616
cmake_parse_arguments(PARSE_ARGV 0 arg
1717
"${options}" "${oneValueArgs}" "${multiValueArgs}"
1818
)
@@ -26,21 +26,43 @@ function(find_package_or_fetch)
2626
find_package(${actual_pkg_name} ${FIND_PACKAGE_OPTIONS})
2727
endif()
2828

29-
if(FETCH_DEPENDENCIES_WITH_CMAKE STREQUAL "ON" OR FETCH_DEPENDENCIES_WITH_CMAKE STREQUAL "MISSING")
30-
if(NOT ${actual_pkg_name}_FOUND)
31-
message(STATUS "📦 Fetching ${arg_PACKAGE_NAME}")
32-
FetchContent_Declare(
33-
${arg_PACKAGE_NAME}
34-
GIT_SHALLOW TRUE
35-
GIT_REPOSITORY ${arg_GIT_REPOSITORY}
36-
GIT_TAG ${arg_TAG}
37-
GIT_PROGRESS TRUE
38-
SYSTEM
39-
EXCLUDE_FROM_ALL)
40-
FetchContent_MakeAvailable(${arg_PACKAGE_NAME})
41-
message(STATUS "\t✅ Fetched ${arg_PACKAGE_NAME}")
42-
else()
29+
if(arg_GIT_REPOSITORY)
30+
if(FETCH_DEPENDENCIES_WITH_CMAKE STREQUAL "ON" OR FETCH_DEPENDENCIES_WITH_CMAKE STREQUAL "MISSING")
31+
if(NOT ${actual_pkg_name}_FOUND)
32+
message(STATUS "📦 Fetching ${arg_PACKAGE_NAME}")
33+
# Apply CMAKE_ARGS before fetching
34+
foreach(cmake_arg ${arg_CMAKE_ARGS})
35+
string(REGEX MATCH "^([^=]+)=(.*)$" _ ${cmake_arg})
36+
if(CMAKE_MATCH_1)
37+
set(${CMAKE_MATCH_1} ${CMAKE_MATCH_2} CACHE BOOL "" FORCE)
38+
endif()
39+
endforeach()
40+
set(fetch_args
41+
${arg_PACKAGE_NAME}
42+
GIT_SHALLOW TRUE
43+
GIT_REPOSITORY ${arg_GIT_REPOSITORY}
44+
GIT_TAG ${arg_TAG}
45+
GIT_PROGRESS TRUE
46+
SYSTEM
47+
EXCLUDE_FROM_ALL)
48+
if(arg_SOURCE_SUBDIR)
49+
list(APPEND fetch_args SOURCE_SUBDIR ${arg_SOURCE_SUBDIR})
50+
endif()
51+
FetchContent_Declare(${fetch_args})
52+
FetchContent_MakeAvailable(${arg_PACKAGE_NAME})
53+
message(STATUS "\t✅ Fetched ${arg_PACKAGE_NAME}")
54+
else()
55+
message(STATUS "📦 ${actual_pkg_name} found here: ${${actual_pkg_name}_DIR}")
56+
endif()
57+
endif()
58+
else()
59+
# No GIT_REPOSITORY provided - only find_package is attempted
60+
if(${actual_pkg_name}_FOUND)
4361
message(STATUS "📦 ${actual_pkg_name} found here: ${${actual_pkg_name}_DIR}")
62+
elseif(FETCH_DEPENDENCIES_WITH_CMAKE STREQUAL "OFF")
63+
message(FATAL_ERROR "Could not find ${actual_pkg_name} and no GIT_REPOSITORY provided for fetching")
64+
else()
65+
message(WARNING "Could not find ${actual_pkg_name} and no GIT_REPOSITORY provided for fetching")
4466
endif()
4567
endif()
4668
endfunction()
@@ -52,14 +74,17 @@ endif()
5274
find_package_or_fetch(
5375
PACKAGE_NAME sparrow
5476
GIT_REPOSITORY https://github.com/man-group/sparrow.git
55-
TAG 1.2.0
77+
TAG 1.3.0
5678
)
5779
unset(CREATE_JSON_READER_TARGET)
5880

5981
if(NOT TARGET sparrow::sparrow)
6082
add_library(sparrow::sparrow ALIAS sparrow)
6183
endif()
6284
if(${SPARROW_IPC_BUILD_TESTS})
85+
find_package_or_fetch(
86+
PACKAGE_NAME sparrow-json-reader
87+
)
6388
if(NOT TARGET sparrow::json_reader)
6489
add_library(sparrow::json_reader ALIAS json_reader)
6590
endif()
@@ -79,6 +104,50 @@ if(NOT TARGET flatbuffers::flatbuffers)
79104
endif()
80105
unset(FLATBUFFERS_BUILD_TESTS CACHE)
81106

107+
# Fetching lz4
108+
# Disable bundled mode to allow shared libraries if needed
109+
# lz4 is built as static by default if bundled
110+
# set(LZ4_BUNDLED_MODE OFF CACHE BOOL "" FORCE)
111+
# set(BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE)
112+
find_package_or_fetch(
113+
PACKAGE_NAME lz4
114+
GIT_REPOSITORY https://github.com/lz4/lz4.git
115+
TAG v1.10.0
116+
SOURCE_SUBDIR build/cmake
117+
CMAKE_ARGS
118+
"LZ4_BUILD_CLI=OFF"
119+
"LZ4_BUILD_LEGACY_LZ4C=OFF"
120+
)
121+
122+
if(NOT TARGET lz4::lz4)
123+
add_library(lz4::lz4 ALIAS lz4)
124+
endif()
125+
126+
find_package_or_fetch(
127+
PACKAGE_NAME zstd
128+
GIT_REPOSITORY https://github.com/facebook/zstd.git
129+
TAG v1.5.7
130+
SOURCE_SUBDIR build/cmake
131+
CMAKE_ARGS
132+
"ZSTD_BUILD_PROGRAMS=OFF"
133+
)
134+
135+
if(NOT TARGET zstd::libzstd)
136+
if(SPARROW_IPC_BUILD_SHARED)
137+
if(TARGET zstd::libzstd_shared) # Linux case
138+
add_library(zstd::libzstd ALIAS zstd::libzstd_shared)
139+
elseif(TARGET libzstd_shared) # Windows case
140+
add_library(zstd::libzstd ALIAS libzstd_shared)
141+
endif()
142+
else()
143+
if(TARGET zstd::libzstd_static) # Linux case
144+
add_library(zstd::libzstd ALIAS zstd::libzstd_static)
145+
elseif(TARGET libzstd_static) # Windows case
146+
add_library(zstd::libzstd ALIAS libzstd_static)
147+
endif()
148+
endif()
149+
endif()
150+
82151
if(SPARROW_IPC_BUILD_TESTS)
83152
find_package_or_fetch(
84153
PACKAGE_NAME doctest
@@ -109,10 +178,18 @@ if(SPARROW_IPC_BUILD_TESTS)
109178
)
110179
message(STATUS "\t✅ Fetched arrow-testing")
111180

112-
# Iterate over all the files in the arrow-testing-data source directiory. When it's a gz, extract in place.
113-
file(GLOB_RECURSE arrow_testing_data_targz_files CONFIGURE_DEPENDS
181+
# Fetch all the files in the cpp-21.0.0 directory
182+
file(GLOB_RECURSE arrow_testing_data_targz_files_cpp_21 CONFIGURE_DEPENDS
114183
"${arrow-testing_SOURCE_DIR}/data/arrow-ipc-stream/integration/cpp-21.0.0/*.json.gz"
115184
)
185+
# Fetch all the files in the 2.0.0-compression directory
186+
file(GLOB_RECURSE arrow_testing_data_targz_files_compression CONFIGURE_DEPENDS
187+
"${arrow-testing_SOURCE_DIR}/data/arrow-ipc-stream/integration/2.0.0-compression/*.json.gz"
188+
)
189+
190+
# Combine lists of files
191+
list(APPEND arrow_testing_data_targz_files ${arrow_testing_data_targz_files_cpp_21} ${arrow_testing_data_targz_files_compression})
192+
# Iterate over all the files in the arrow-testing-data source directory. When it's a gz, extract in place.
116193
foreach(file_path IN LISTS arrow_testing_data_targz_files)
117194
cmake_path(GET file_path PARENT_PATH parent_dir)
118195
cmake_path(GET file_path STEM filename)
@@ -128,5 +205,4 @@ if(SPARROW_IPC_BUILD_TESTS)
128205
endif()
129206
endif()
130207
endforeach()
131-
132208
endif()

conanfile.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ def configure(self):
4545
def requirements(self):
4646
self.requires("sparrow/1.0.0")
4747
self.requires(f"flatbuffers/{self._flatbuffers_version}")
48+
self.requires("lz4/1.9.4")
49+
self.requires("zstd/1.5.7")
4850
if self.options.get_safe("build_tests"):
4951
self.test_requires("doctest/2.4.12")
5052

environment-dev.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,12 @@ dependencies:
88
- cxx-compiler
99
# Libraries dependencies
1010
- flatbuffers
11+
- lz4-c
12+
- zstd
1113
- nlohmann_json
12-
- sparrow-devel >=1.1.2
14+
- sparrow-devel
15+
- sparrow-json-reader
16+
# Testing dependencies
1317
- doctest
1418
# Documentation dependencies
1519
- doxygen

examples/CMakeLists.txt

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,40 @@ add_dependencies(write_and_read_streams generate_flatbuffers_headers)
3131

3232
# Optional: Copy to build directory for easy execution
3333
if(WIN32)
34+
set(ZSTD_DLL_TARGET "")
35+
if(TARGET libzstd_shared AND SPARROW_IPC_BUILD_SHARED) # Building deps from src case: shared target without namespace
36+
set(ZSTD_DLL_TARGET libzstd_shared)
37+
elseif(TARGET libzstd_static AND NOT SPARROW_IPC_BUILD_SHARED) # Building deps from src case: static target without namespace
38+
set(ZSTD_DLL_TARGET libzstd_static)
39+
endif()
40+
3441
# On Windows, copy required DLLs
35-
add_custom_command(
36-
TARGET write_and_read_streams POST_BUILD
42+
set(DLL_COPY_COMMANDS "") # Initialize a list to hold all copy commands
43+
# Add unconditional copy commands
44+
list(APPEND DLL_COPY_COMMANDS
3745
COMMAND ${CMAKE_COMMAND} -E copy_if_different
3846
"$<TARGET_FILE:sparrow::sparrow>"
3947
"$<TARGET_FILE_DIR:write_and_read_streams>"
4048
COMMAND ${CMAKE_COMMAND} -E copy_if_different
4149
"$<TARGET_FILE:sparrow-ipc>"
4250
"$<TARGET_FILE_DIR:write_and_read_streams>"
43-
COMMENT "Copying sparrow and sparrow-ipc DLLs to example executable directory"
51+
)
52+
53+
# Conditionally add ZSTD copy command
54+
if(ZSTD_DLL_TARGET)
55+
list(APPEND DLL_COPY_COMMANDS
56+
COMMAND ${CMAKE_COMMAND} -E copy_if_different
57+
"$<TARGET_FILE:${ZSTD_DLL_TARGET}>"
58+
"$<TARGET_FILE_DIR:write_and_read_streams>"
59+
)
60+
else()
61+
message(WARNING "ZSTD DLL will not be copied for examples.")
62+
endif()
63+
64+
add_custom_command(
65+
TARGET write_and_read_streams POST_BUILD
66+
${DLL_COPY_COMMANDS}
67+
COMMENT "Copying required DLLs to example executable directory"
4468
)
4569
endif()
4670

0 commit comments

Comments
 (0)