Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .github/workflows/linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,14 @@ jobs:
working-directory: build
run: cmake --build . --target run_example

- name: Build deserializer example
working-directory: build
run: cmake --build . --target deserializer_example

- name: Run deserializer example
working-directory: build
run: cmake --build . --target run_deserializer_example

- name: Install
working-directory: build
run: cmake --install .
Expand Down Expand Up @@ -103,6 +111,14 @@ jobs:
working-directory: build
run: cmake --build . --target run_example

- name: Build deserializer example
working-directory: build
run: cmake --build . --target deserializer_example

- name: Run deserializer example
working-directory: build
run: cmake --build . --target run_deserializer_example

- name: Install
working-directory: build
run: sudo cmake --install .
16 changes: 16 additions & 0 deletions .github/workflows/osx.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,14 @@ jobs:
working-directory: build
run: cmake --build . --target run_example

- name: Build deserializer example
working-directory: build
run: cmake --build . --target deserializer_example

- name: Run deserializer example
working-directory: build
run: cmake --build . --target run_deserializer_example

- name: Install
working-directory: build
run: cmake --install .
Expand Down Expand Up @@ -113,6 +121,14 @@ jobs:
working-directory: build
run: cmake --build . --target run_example

- name: Build deserializer example
working-directory: build
run: cmake --build . --target deserializer_example

- name: Run deserializer example
working-directory: build
run: cmake --build . --target run_deserializer_example

- name: Install
working-directory: build
run: sudo cmake --install .
16 changes: 16 additions & 0 deletions .github/workflows/windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,14 @@ jobs:
working-directory: build
run: cmake --build . --config ${{ matrix.build_type }} --target run_example

- name: Build deserializer example
working-directory: build
run: cmake --build . --config ${{ matrix.build_type }} --target deserializer_example

- name: Run deserializer example
working-directory: build
run: cmake --build . --config ${{ matrix.build_type }} --target run_deserializer_example

- name: Install
working-directory: build
run: cmake --install . --config ${{ matrix.build_type }}
Expand Down Expand Up @@ -113,6 +121,14 @@ jobs:
working-directory: build
run: cmake --build . --config ${{ matrix.build_type }} --target run_example

- name: Build deserializer example
working-directory: build
run: cmake --build . --config ${{ matrix.build_type }} --target deserializer_example

- name: Run deserializer example
working-directory: build
run: cmake --build . --config ${{ matrix.build_type }} --target run_deserializer_example

- name: Install
working-directory: build
run: cmake --install . --config ${{ matrix.build_type }}
Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ set(SPARROW_IPC_HEADERS
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_utils.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize_variable_size_binary_array.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserialize.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/deserializer.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/encapsulated_message.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/flatbuffer_utils.hpp
${SPARROW_IPC_INCLUDE_DIR}/sparrow_ipc/magic_values.hpp
Expand Down
74 changes: 72 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,21 +107,91 @@ void stream_record_batches(std::ostream& os, record_batch_source& source)

### Deserialize a stream into record batches

#### Using the function API

```cpp
#include <vector>
#include <sparrow_ipc/deserializer.hpp>
#include <sparrow_ipc/deserialize.hpp>
#include <sparrow/record_batch.hpp>

namespace sp = sparrow;
namespace sp_ipc = sparrow_ipc;

std::vector<sp::record_batch> deserialize_stream_to_batches(const std::vector<uint8_t>& stream_data)
std::vector<sp::record_batch> deserialize_stream_example(const std::vector<uint8_t>& stream_data)
{
// Deserialize the entire stream at once
auto batches = sp_ipc::deserialize_stream(stream_data);
return batches;
}
```

#### Using the deserializer class

The deserializer class allows you to accumulate record batches into an existing container as you deserialize data:

```cpp
#include <iostream>
#include <span>
#include <vector>
#include <sparrow_ipc/deserializer.hpp>
#include <sparrow/record_batch.hpp>

namespace sp = sparrow;
namespace sp_ipc = sparrow_ipc;

void deserializer_basic_example(const std::vector<uint8_t>& stream_data)
{
// Create a container to hold the deserialized batches
std::vector<sp::record_batch> batches;

// Create a deserializer that will append to our container
sp_ipc::deserializer deser(batches);

// Deserialize the stream data
deser.deserialize(std::span<const uint8_t>(stream_data));

// Process the accumulated batches
for (const auto& batch : batches)
{
std::cout << "Batch with " << batch.nb_rows() << " rows and " << batch.nb_columns() << " columns\n";
}
}
```

#### Incremental deserialization

The deserializer class is particularly useful for streaming scenarios where data arrives in chunks:

```cpp
#include <iostream>
#include <span>
#include <vector>
#include <sparrow_ipc/deserializer.hpp>
#include <sparrow/record_batch.hpp>

namespace sp = sparrow;
namespace sp_ipc = sparrow_ipc;

void deserializer_incremental_example(const std::vector<std::vector<uint8_t>>& stream_chunks)
{
// Container to accumulate all deserialized batches
std::vector<sp::record_batch> batches;

// Create a deserializer
sp_ipc::deserializer deser(batches);

// Deserialize chunks as they arrive using the streaming operator
for (const auto& chunk : stream_chunks)
{
deser << std::span<const uint8_t>(chunk);
std::cout << "After chunk: " << batches.size() << " batches accumulated\n";
}

// All batches are now available in the container
std::cout << "Total batches deserialized: " << batches.size() << "\n";
}
```

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should still add this to examples file to ensure that the same code (as long as it's copied correctly) is running without failures (no typos etc).

## Documentation

The documentation (currently being written) can be found at https://quantstack.github.io/sparrow-ipc/index.html
Expand Down
38 changes: 38 additions & 0 deletions docs/source/main_page.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,41 @@ Sparrow-IPC requires a modern C++ compiler supporting C++20:
| MSVC | 19.41 or higher |

This software is licensed under the BSD-3-Clause license. See the [LICENSE](https://github.com/QuantStack/sparrow-ipc/blob/main/LICENSE) file for details.

Getting Started
---------------

### Quick Example

```cpp
#include <vector>
#include <sparrow_ipc/deserialize.hpp>
#include <sparrow_ipc/memory_output_stream.hpp>
#include <sparrow_ipc/serializer.hpp>
#include <sparrow/record_batch.hpp>

namespace sp = sparrow;
namespace sp_ipc = sparrow_ipc;

// Serialize record batches
std::vector<uint8_t> serialize(const std::vector<sp::record_batch>& batches)
{
std::vector<uint8_t> stream_data;
sp_ipc::memory_output_stream stream(stream_data);
sp_ipc::serializer serializer(stream);
serializer << batches << sp_ipc::end_stream;
return stream_data;
}

// Deserialize record batches
std::vector<sp::record_batch> deserialize(const std::vector<uint8_t>& stream_data)
{
return sp_ipc::deserialize_stream(stream_data);
}
```

Documentation
-------------

- @ref serialization "Serialization and Deserialization" - How to serialize and deserialize record batches
- @ref dev_build "Development Build" - How to build the project for development
55 changes: 55 additions & 0 deletions docs/source/serialization.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Serialization and Deserialization {#serialization}

This page describes how to serialize and deserialize record batches using `sparrow-ipc`.

## Overview

`sparrow-ipc` provides two main approaches for both serialization and deserialization:

- **Function API**: Simple one-shot operations for serializing/deserializing complete data
- **Class API**: Streaming-oriented classes (`serializer` and `deserializer`) for incremental operations

## Serialization

### Serialize record batches to a memory stream

The simplest way to serialize record batches is to use the `serializer` class with a `memory_output_stream`:

\snippet write_and_read_streams.cpp example_serialize_to_stream

### Serialize individual record batches

You can also serialize record batches one at a time:

\snippet write_and_read_streams.cpp example_serialize_individual

## Deserialization

### Using the function API

The simplest way to deserialize a complete Arrow IPC stream is using `deserialize_stream`:

\snippet deserializer_example.cpp example_deserialize_stream

### Using the deserializer class

The `deserializer` class provides more control over deserialization and is useful when you want to:
- Accumulate batches into an existing container
- Deserialize data incrementally as it arrives
- Process multiple streams into a single container

#### Basic usage

\snippet deserializer_example.cpp example_deserializer_basic

#### Incremental deserialization

The `deserializer` class is particularly useful for streaming scenarios where data arrives in chunks:

\snippet deserializer_example.cpp example_deserializer_incremental

#### Chaining deserializations

The streaming operator can be chained for fluent API usage:

\snippet deserializer_example.cpp example_deserializer_chaining
56 changes: 54 additions & 2 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,18 @@ target_link_libraries(write_and_read_streams
arrow-testing-data
)

# Create executable for the deserializer_example
add_executable(deserializer_example deserializer_example.cpp)

# Link against sparrow-ipc and its dependencies
target_link_libraries(deserializer_example
PRIVATE
sparrow-ipc
sparrow::sparrow
)

# Set C++ standard to match the main project
set_target_properties(write_and_read_streams
set_target_properties(write_and_read_streams deserializer_example
PROPERTIES
CXX_STANDARD 20
CXX_STANDARD_REQUIRED ON
Expand All @@ -26,8 +36,15 @@ target_include_directories(write_and_read_streams
${CMAKE_BINARY_DIR}/generated
)

target_include_directories(deserializer_example
PRIVATE
${CMAKE_SOURCE_DIR}/include
${CMAKE_BINARY_DIR}/generated
)

# Ensure generated flatbuffer headers are available
add_dependencies(write_and_read_streams generate_flatbuffers_headers)
add_dependencies(deserializer_example generate_flatbuffers_headers)

# Optional: Copy to build directory for easy execution
if(WIN32)
Expand All @@ -38,7 +55,7 @@ if(WIN32)
set(ZSTD_DLL_TARGET libzstd_static)
endif()

# On Windows, copy required DLLs
# On Windows, copy required DLLs for write_and_read_streams
set(DLL_COPY_COMMANDS "") # Initialize a list to hold all copy commands
# Add unconditional copy commands
list(APPEND DLL_COPY_COMMANDS
Expand Down Expand Up @@ -66,6 +83,31 @@ if(WIN32)
${DLL_COPY_COMMANDS}
COMMENT "Copying required DLLs to example executable directory"
)

# On Windows, copy required DLLs for deserializer_example
set(DLL_COPY_COMMANDS_DESER "") # Initialize a list to hold all copy commands
list(APPEND DLL_COPY_COMMANDS_DESER
COMMAND ${CMAKE_COMMAND} -E copy_if_different
"$<TARGET_FILE:sparrow::sparrow>"
"$<TARGET_FILE_DIR:deserializer_example>"
COMMAND ${CMAKE_COMMAND} -E copy_if_different
"$<TARGET_FILE:sparrow-ipc>"
"$<TARGET_FILE_DIR:deserializer_example>"
)

if(ZSTD_DLL_TARGET)
list(APPEND DLL_COPY_COMMANDS_DESER
COMMAND ${CMAKE_COMMAND} -E copy_if_different
"$<TARGET_FILE:${ZSTD_DLL_TARGET}>"
"$<TARGET_FILE_DIR:deserializer_example>"
)
endif()

add_custom_command(
TARGET deserializer_example POST_BUILD
${DLL_COPY_COMMANDS_DESER}
COMMENT "Copying required DLLs to deserializer_example executable directory"
)
endif()

# Create a custom target to easily run the example
Expand All @@ -77,3 +119,13 @@ add_custom_target(run_example
)

set_target_properties(run_example PROPERTIES FOLDER "Examples")

# Create a custom target to run the deserializer example
add_custom_target(run_deserializer_example
COMMAND deserializer_example
DEPENDS deserializer_example
COMMENT "Running deserializer_example"
USES_TERMINAL
)

set_target_properties(run_deserializer_example PROPERTIES FOLDER "Examples")
Loading
Loading