Skip to content

Commit 4b8cf6c

Browse files
committed
wip
1 parent e618b0b commit 4b8cf6c

File tree

6 files changed

+61
-9
lines changed

6 files changed

+61
-9
lines changed

cmake/external_dependencies.cmake

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,48 @@ if(SPARROW_IPC_BUILD_TESTS)
8585
GIT_REPOSITORY https://github.com/doctest/doctest.git
8686
TAG v2.4.12
8787
)
88+
89+
message(STATUS "📦 Fetching arrow-testing")
90+
cmake_policy(PUSH)
91+
cmake_policy(SET CMP0174 NEW) # Suppress warning about FetchContent_Declare GIT_REPOSITORY
92+
# Fetch arrow-testing data (no CMake build needed)
93+
FetchContent_Declare(
94+
arrow-testing
95+
GIT_REPOSITORY https://github.com/apache/arrow-testing.git
96+
GIT_SHALLOW TRUE
97+
# CONFIGURE_COMMAND ""
98+
# BUILD_COMMAND ""
99+
# INSTALL_COMMAND ""
100+
)
101+
FetchContent_MakeAvailable(arrow-testing)
102+
cmake_policy(POP)
103+
104+
# Create interface library for easy access to test data
105+
add_library(arrow-testing-data INTERFACE)
106+
message(STATUS "Arrow testing data directory: ${arrow-testing_SOURCE_DIR}")
107+
target_compile_definitions(arrow-testing-data INTERFACE
108+
ARROW_TESTING_DATA_DIR="${arrow-testing_SOURCE_DIR}"
109+
)
110+
message(STATUS "\t✅ Fetched arrow-testing")
111+
112+
# Iterate over all the files in the arrow-testing-data source directiory. When it's a gz, extract in place.
113+
file(GLOB_RECURSE arrow_testing_data_targz_files CONFIGURE_DEPENDS
114+
"${arrow-testing_SOURCE_DIR}/data/arrow-ipc-stream/integration/1.0.0-littleendian/*.json.gz"
115+
)
116+
foreach(file_path IN LISTS arrow_testing_data_targz_files)
117+
cmake_path(GET file_path PARENT_PATH parent_dir)
118+
cmake_path(GET file_path STEM filename)
119+
set(destination_file_path "${parent_dir}/${filename}.json")
120+
if(EXISTS "${destination_file_path}")
121+
message(VERBOSE "File already extracted: ${destination_file_path}")
122+
else()
123+
message(STATUS "Extracting ${file_path}")
124+
if(WIN32)
125+
execute_process(COMMAND powershell -Command "$i=\"${file_path}\"; $o=\"${destination_file_path}\"; [IO.Compression.GZipStream]::new([IO.File]::OpenRead($i),[IO.Compression.CompressionMode]::Decompress).CopyTo([IO.File]::Create($o))")
126+
else()
127+
execute_process(COMMAND gunzip -kf "${file_path}")
128+
endif()
129+
endif()
130+
endforeach()
131+
88132
endif()

include/sparrow_ipc/encapsulated_message.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,5 +39,6 @@ namespace sparrow_ipc
3939
std::span<const uint8_t> m_data;
4040
};
4141

42-
[[nodiscard]] EncapsulatedMessage create_encapsulated_message(std::span<const uint8_t> buf_ptr);
42+
[[nodiscard]] std::pair<EncapsulatedMessage, std::span<const uint8_t>>
43+
extract_encapsulated_message(std::span<const uint8_t> buf_ptr);
4344
}

src/deserialize.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ namespace sparrow_ipc
234234
std::vector<sparrow::data_type> field_types;
235235
do
236236
{
237-
const EncapsulatedMessage encapsulated_message = create_encapsulated_message(data);
237+
const auto [encapsulated_message, rest] = extract_encapsulated_message(data);
238238
const org::apache::arrow::flatbuf::Message* message = encapsulated_message.flat_buffer_message();
239239
switch (message->header_type())
240240
{
@@ -279,8 +279,7 @@ namespace sparrow_ipc
279279
default:
280280
throw std::runtime_error("Unknown message header type.");
281281
}
282-
const size_t encapsulated_message_total_length = encapsulated_message.total_length();
283-
data = data.subspan(encapsulated_message_total_length);
282+
data = rest;
284283
if (is_end_of_stream(data.subspan(0, 8)))
285284
{
286285
break;

src/encapsulated_message.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,8 @@ namespace sparrow_ipc
9292
return m_data;
9393
}
9494

95-
EncapsulatedMessage create_encapsulated_message(std::span<const uint8_t> data)
95+
std::pair<EncapsulatedMessage, std::span<const uint8_t>>
96+
extract_encapsulated_message(std::span<const uint8_t> data)
9697
{
9798
if (!data.size() || data.size() < 8)
9899
{
@@ -103,6 +104,8 @@ namespace sparrow_ipc
103104
{
104105
throw std::runtime_error("Buffer starts with continuation bytes, expected a valid message.");
105106
}
106-
return {data};
107+
EncapsulatedMessage message(data);
108+
std::span<const uint8_t> rest = data.subspan(message.total_length());
109+
return {std::move(message), std::move(rest)};
107110
}
108111
}

tests/CMakeLists.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,7 @@ cmake_minimum_required(VERSION 3.28)
22

33
set(test_target "test_sparrow_ipc_lib")
44

5-
set(
6-
SPARROW_IPC_TESTS_SRC
5+
set(SPARROW_IPC_TESTS_SRC
76
include/sparrow_ipc_tests_helpers.hpp
87
# TODO move all the files below under src?
98
main.cpp
@@ -21,8 +20,11 @@ target_link_libraries(${test_target}
2120
sparrow-ipc
2221
doctest::doctest
2322
sparrow::json_reader
23+
arrow-testing-data
2424
)
2525

26+
set(APACHE_ARROW_TESTING_DIR $<TARGET_PROPERTY:arrow-testing,INTERFACE_INCLUDE_DIRECTORIES>)
27+
2628
target_compile_definitions(${test_target}
2729
PRIVATE
2830
TESTS_RESOURCES_FILES_PATH="${CMAKE_CURRENT_SOURCE_DIR}/resources/")

tests/test_primitive_array_with_files.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,10 @@
1515
#include "sparrow_ipc/deserialize.hpp"
1616

1717

18-
const std::filesystem::path tests_resources_files_path = TESTS_RESOURCES_FILES_PATH;
18+
const std::filesystem::path arrow_testing_data_dir = ARROW_TESTING_DATA_DIR;
19+
20+
const std::filesystem::path tests_resources_files_path = arrow_testing_data_dir / "data" / "arrow-ipc-stream"
21+
/ "integration" / "1.0.0-littleendian";
1922

2023
const std::vector<std::filesystem::path> files_paths_to_test = {
2124
tests_resources_files_path / "generated_primitive",

0 commit comments

Comments
 (0)