Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 74 additions & 13 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.28)

project(sparrow-ipc CXX)
project(sparrow-ipc LANGUAGES CXX)

set(CMAKE_CXX_STANDARD 20 CACHE STRING "C++ Standard")
set(CMAKE_CXX_STANDARD_REQUIRED ON CACHE BOOL "C++ Standard Required")
Expand All @@ -15,6 +15,9 @@ include(external_dependencies)

set(SPARROW_IPC_COMPILE_DEFINITIONS "" CACHE STRING "List of public compile definitions of the sparrow-ipc target")

set(SPARROW_IPC_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include)
set(SPARROW_IPC_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)

# Linter options
# =============
OPTION(ACTIVATE_LINTER "Create targets to run clang-format" OFF)
Expand All @@ -26,6 +29,38 @@ if(ACTIVATE_LINTER)
include(clang-tidy)
endif()

# Versionning
# ===========
file(STRINGS "${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/config/sparrow_ipc_version.hpp" sparrow_ipc_version_defines
REGEX "constexpr int SPARROW_IPC_VERSION_(MAJOR|MINOR|PATCH)")

foreach(ver ${sparrow_ipc_version_defines})
if(ver MATCHES "constexpr int SPARROW_VERSION_(MAJOR|MINOR|PATCH) = ([0-9]+);$")
set(PROJECT_VERSION_${CMAKE_MATCH_1} "${CMAKE_MATCH_2}" CACHE INTERNAL "")
endif()
endforeach()

set(CMAKE_PROJECT_VERSION
${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH})

message(STATUS "Building sparrow_ipc v${CMAKE_PROJECT_VERSION}")

# Binary version
# See the following URL for explanations about the binary versionning
# https://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html#Updating-version-info
file(STRINGS "${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/config/sparrow_ipc_version.hpp" sparrow_ipc_version_defines
REGEX "constexpr int SPARROW_IPC_BINARY_(CURRENT|REVISION|AGE)")

foreach(ver ${sparrow_ipc_version_defines})
if(ver MATCHES "constexpr int SPARROW_IPC_BINARY_(CURRENT|REVISION|AGE) = ([0-9]+);$")
set(SPARROW_IPC_BINARY_${CMAKE_MATCH_1} "${CMAKE_MATCH_2}" CACHE INTERNAL "")
endif()
endforeach()

set(SPARROW_IPC_BINARY_VERSION
${SPARROW_IPC_BINARY_CURRENT}.${SPARROW_IPC_BINARY_REVISION}.${SPARROW_IPC_BINARY_AGE})

message(STATUS "sparrow_ipc binary version: v${SPARROW_IPC_BINARY_VERSION}")

# Build options
# =============
Expand All @@ -51,16 +86,38 @@ set(SPARROW_IPC_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include)
set(SPARROW_IPC_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)

set(SPARROW_IPC_HEADERS
${SPARROW_IPC_INCLUDE_DIR}/config/config.hpp
${SPARROW_IPC_INCLUDE_DIR}/serialize.hpp
${SPARROW_IPC_INCLUDE_DIR}/serialize_primitive_array.hpp
${SPARROW_IPC_INCLUDE_DIR}/serialize_null_array.hpp
${SPARROW_IPC_INCLUDE_DIR}/utils.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/arrow_interface/arrow_array_schema_common_release.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/arrow_interface/arrow_array.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/arrow_interface/arrow_array/private_data.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/arrow_interface/arrow_schema.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/arrow_interface/arrow_schema/private_data.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/config/config.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_variable_size_binary_array.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_fixedsizebinary_array.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_primitive_array.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_utils.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/encapsulated_message.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/magic_values.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/metadata.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/serialize_null_array.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/serialize_primitive_array.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/serialize.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/utils.hpp
)

set(SPARROW_IPC_SRC
${SPARROW_IPC_SOURCE_DIR}/serialize.cpp
${SPARROW_IPC_SOURCE_DIR}/arrow_interface/arrow_array.cpp
${SPARROW_IPC_SOURCE_DIR}/arrow_interface/arrow_array/private_data.cpp
${SPARROW_IPC_SOURCE_DIR}/arrow_interface/arrow_schema.cpp
${SPARROW_IPC_SOURCE_DIR}/arrow_interface/arrow_schema/private_data.cpp
${SPARROW_IPC_SOURCE_DIR}/deserialize_fixedsizebinary_array.cpp
${SPARROW_IPC_SOURCE_DIR}/deserialize_utils.cpp
${SPARROW_IPC_SOURCE_DIR}/deserialize.cpp
${SPARROW_IPC_SOURCE_DIR}/encapsulated_message.cpp
${SPARROW_IPC_SOURCE_DIR}/metadata.cpp
${SPARROW_IPC_SOURCE_DIR}/serialize_null_array.cpp
${SPARROW_IPC_SOURCE_DIR}/serialize.cpp
${SPARROW_IPC_SOURCE_DIR}/utils.cpp
)

Expand Down Expand Up @@ -117,11 +174,14 @@ add_custom_command(

add_custom_target(generate_flatbuffers_headers
DEPENDS ${FLATBUFFERS_GENERATED_HEADERS}
COMMENT "Ensuring FlatBuffers headers are generated"
)

# Interface target for generated headers
add_library(flatbuffers_interface INTERFACE)
target_include_directories(flatbuffers_interface INTERFACE ${FLATBUFFERS_GENERATED_DIR})
target_include_directories(flatbuffers_interface INTERFACE
$<BUILD_INTERFACE:${FLATBUFFERS_GENERATED_DIR}>
$<INSTALL_INTERFACE:include/flatbuffers_generated>)
add_dependencies(flatbuffers_interface generate_flatbuffers_headers)

add_library(sparrow-ipc ${SPARROW_IPC_LIBRARY_TYPE} ${SPARROW_IPC_SRC} ${SPARROW_IPC_HEADERS})
Expand All @@ -141,19 +201,20 @@ else()
target_compile_definitions(sparrow-ipc PRIVATE SPARROW_IPC_EXPORTS)
endif()

target_include_directories(sparrow-ipc
target_include_directories(sparrow-ipc
PUBLIC
${SPARROW_IPC_INCLUDE_DIR}
$<BUILD_INTERFACE:${SPARROW_IPC_INCLUDE_DIR}>
$<INSTALL_INTERFACE:include>
PRIVATE
${SPARROW_IPC_SOURCE_DIR} )
$<BUILD_INTERFACE:${FLATBUFFERS_GENERATED_DIR}>)

target_link_libraries(sparrow-ipc
PUBLIC
sparrow::sparrow
flatbuffers::flatbuffers
PRIVATE
flatbuffers_interface)
)

# Ensure generated headers are available when building sparrow-ipc
add_dependencies(sparrow-ipc generate_flatbuffers_headers)

# Tests
Expand Down
53 changes: 53 additions & 0 deletions cmake/external_dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,24 @@ function(find_package_or_fetch)
endfunction()

set(SPARROW_BUILD_SHARED ${SPARROW_IPC_BUILD_SHARED})
if(${SPARROW_IPC_BUILD_TESTS})
set(CREATE_JSON_READER_TARGET ON)
endif()
find_package_or_fetch(
PACKAGE_NAME sparrow
GIT_REPOSITORY https://github.com/man-group/sparrow.git
TAG 1.1.0
)
unset(CREATE_JSON_READER_TARGET)

if(NOT TARGET sparrow::sparrow)
add_library(sparrow::sparrow ALIAS sparrow)
endif()
if(${SPARROW_IPC_BUILD_TESTS})
if(NOT TARGET sparrow::json_reader)
add_library(sparrow::json_reader ALIAS json_reader)
endif()
endif()

set(FLATBUFFERS_BUILD_TESTS OFF)
set(FLATBUFFERS_BUILD_SHAREDLIB ${SPARROW_IPC_BUILD_SHARED})
Expand All @@ -76,4 +85,48 @@ if(SPARROW_IPC_BUILD_TESTS)
GIT_REPOSITORY https://github.com/doctest/doctest.git
TAG v2.4.12
)

message(STATUS "📦 Fetching arrow-testing")
cmake_policy(PUSH)
cmake_policy(SET CMP0174 NEW) # Suppress warning about FetchContent_Declare GIT_REPOSITORY
# Fetch arrow-testing data (no CMake build needed)
FetchContent_Declare(
arrow-testing
GIT_REPOSITORY https://github.com/apache/arrow-testing.git
GIT_SHALLOW TRUE
# CONFIGURE_COMMAND ""
# BUILD_COMMAND ""
# INSTALL_COMMAND ""
)
FetchContent_MakeAvailable(arrow-testing)
cmake_policy(POP)

# Create interface library for easy access to test data
add_library(arrow-testing-data INTERFACE)
message(STATUS "Arrow testing data directory: ${arrow-testing_SOURCE_DIR}")
target_compile_definitions(arrow-testing-data INTERFACE
ARROW_TESTING_DATA_DIR="${arrow-testing_SOURCE_DIR}"
)
message(STATUS "\t✅ Fetched arrow-testing")

# Iterate over all the files in the arrow-testing-data source directiory. When it's a gz, extract in place.
file(GLOB_RECURSE arrow_testing_data_targz_files CONFIGURE_DEPENDS
"${arrow-testing_SOURCE_DIR}/data/arrow-ipc-stream/integration/1.0.0-littleendian/*.json.gz"
)
foreach(file_path IN LISTS arrow_testing_data_targz_files)
cmake_path(GET file_path PARENT_PATH parent_dir)
cmake_path(GET file_path STEM filename)
set(destination_file_path "${parent_dir}/${filename}.json")
if(EXISTS "${destination_file_path}")
message(VERBOSE "File already extracted: ${destination_file_path}")
else()
message(STATUS "Extracting ${file_path}")
if(WIN32)
execute_process(COMMAND powershell -Command "$i=\"${file_path}\"; $o=\"${destination_file_path}\"; [IO.Compression.GZipStream]::new([IO.File]::OpenRead($i),[IO.Compression.CompressionMode]::Decompress).CopyTo([IO.File]::Create($o))")
else()
execute_process(COMMAND gunzip -kf "${file_path}")
endif()
endif()
endforeach()

endif()
27 changes: 27 additions & 0 deletions cmake/sparrow-ipcConfig.cmake.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# sparrow-ipc cmake module
# This module sets the following variables in your project::
#
# sparrow-ipc_FOUND - true if sparrow-ipc found on the system
# sparrow-ipc_INCLUDE_DIRS - the directory containing sparrow-ipc headers
# sparrow-ipc_LIBRARY - empty

@PACKAGE_INIT@

include(CMakeFindDependencyMacro)

if("@USE_DATE_POLYFILL@")
find_dependency(date)
endif()

if("@CREATE_JSON_READER_TARGET@")
find_dependency(nlohmann_json)
endif()

find_dependency(sparrow)
find_dependency(FlatBuffers)

if(NOT TARGET sparrow-ipc::sparrow-ipc)
include("${CMAKE_CURRENT_LIST_DIR}/@[email protected]")
get_target_property(@PROJECT_NAME@_INCLUDE_DIRS sparrow-ipc::sparrow-ipc INTERFACE_INCLUDE_DIRECTORIES)
get_target_property(@PROJECT_NAME@_LIBRARY sparrow-ipc::sparrow-ipc LOCATION)
endif()
25 changes: 0 additions & 25 deletions include/serialize.hpp

This file was deleted.

34 changes: 34 additions & 0 deletions include/sparrow_ipc/arrow_interface/arrow_array.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@

#pragma once

#include <vector>

#include <sparrow/c_interface.hpp>

#include "sparrow_ipc/config/config.hpp"

namespace sparrow_ipc
{
[[nodiscard]] SPARROW_IPC_API ArrowArray make_non_owning_arrow_array(
int64_t length,
int64_t null_count,
int64_t offset,
std::vector<std::uint8_t*>&& buffers,
size_t children_count,
ArrowArray** children,
ArrowArray* dictionary
);

SPARROW_IPC_API void release_non_owning_arrow_array(ArrowArray* array);

SPARROW_IPC_API void fill_non_owning_arrow_array(
ArrowArray& array,
int64_t length,
int64_t null_count,
int64_t offset,
std::vector<std::uint8_t*>&& buffers,
size_t children_count,
ArrowArray** children,
ArrowArray* dictionary
);
}
25 changes: 25 additions & 0 deletions include/sparrow_ipc/arrow_interface/arrow_array/private_data.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#pragma once

#include <cstdint>
#include <vector>

#include "sparrow_ipc/config/config.hpp"

namespace sparrow_ipc
{
class non_owning_arrow_array_private_data
{
public:

explicit constexpr non_owning_arrow_array_private_data(std::vector<std::uint8_t*>&& buffers_pointers)
: m_buffers_pointers(std::move(buffers_pointers))
{
}

[[nodiscard]] SPARROW_IPC_API const void** buffers_ptrs() noexcept;

private:

std::vector<std::uint8_t*> m_buffers_pointers;
};
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@

#pragma once

#include <sparrow/c_interface.hpp>

#include "arrow_array/private_data.hpp"
#include "arrow_schema/private_data.hpp"

namespace sparrow_ipc
{
/**
* Release the children and dictionnary of an `ArrowArray` or `ArrowSchema`.
*
* @tparam T `ArrowArray` or `ArrowSchema`
* @param t The `ArrowArray` or `ArrowSchema` to release.
*/
template <class T>
requires std::same_as<T, ArrowArray> || std::same_as<T, ArrowSchema>
void release_common_non_owning_arrow(T& t)
{
using private_data_type = std::conditional_t<
std::same_as<T, ArrowArray>,
non_owning_arrow_array_private_data,
non_owning_arrow_schema_private_data>;
if (t.release == nullptr)
{
return;
}
SPARROW_ASSERT_TRUE(t.private_data != nullptr);
const auto private_data = static_cast<const private_data_type*>(t.private_data);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The private data is allocated in fill_non_owning_arrow_schema, so I would expect this function to deallocate it?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's deallocated in the release_non_owning_arrow_array and release_non_owning_arrow_schema function, but you are right, let's move it to the common releaser


if (t.dictionary)
{
if (t.dictionary->release)
{
t.dictionary->release(t.dictionary);
}
delete t.dictionary;
t.dictionary = nullptr;
}

if (t.children)
{
for (int64_t i = 0; i < t.n_children; ++i)
{
T* child = t.children[i];
if (child)
{
if (child->release)
{
child->release(child);
}
delete child;
child = nullptr;
}
}
delete[] t.children;
t.children = nullptr;
}
t.release = nullptr;
}
}
Loading
Loading