| 
 | 1 | +#pragma once  | 
 | 2 | + | 
 | 3 | +#include <ostream>  | 
 | 4 | +#include <ranges>  | 
 | 5 | +#include <vector>  | 
 | 6 | + | 
 | 7 | +#include <sparrow/record_batch.hpp>  | 
 | 8 | + | 
 | 9 | +#include "Message_generated.h"  | 
 | 10 | +#include "sparrow_ipc/config/config.hpp"  | 
 | 11 | +#include "sparrow_ipc/magic_values.hpp"  | 
 | 12 | +#include "sparrow_ipc/serialize_utils.hpp"  | 
 | 13 | +#include "sparrow_ipc/utils.hpp"  | 
 | 14 | + | 
 | 15 | +namespace sparrow_ipc  | 
 | 16 | +{  | 
 | 17 | +    /**  | 
 | 18 | +     * @brief Serializes a collection of record batches into a binary format.  | 
 | 19 | +     *  | 
 | 20 | +     * This function takes a collection of record batches and serializes them into a single  | 
 | 21 | +     * binary representation following the Arrow IPC format. The serialization includes:  | 
 | 22 | +     * - Schema message (derived from the first record batch)  | 
 | 23 | +     * - All record batch data  | 
 | 24 | +     * - End-of-stream marker  | 
 | 25 | +     *  | 
 | 26 | +     * @tparam R Container type that holds record batches (must support empty(), operator[], begin(), end())  | 
 | 27 | +     * @param record_batches Collection of record batches to serialize. All batches must have identical  | 
 | 28 | +     * schemas.  | 
 | 29 | +     *  | 
 | 30 | +     * @return std::vector<uint8_t> Binary serialized data containing schema, record batches, and  | 
 | 31 | +     * end-of-stream marker. Returns empty vector if input collection is empty.  | 
 | 32 | +     *  | 
 | 33 | +     * @throws std::invalid_argument If record batches have inconsistent schemas or if the collection  | 
 | 34 | +     *                               contains batches that cannot be serialized together.  | 
 | 35 | +     *  | 
 | 36 | +     * @pre All record batches in the collection must have the same schema  | 
 | 37 | +     * @pre The container R must not be empty when consistency checking is required  | 
 | 38 | +     */  | 
 | 39 | +    template <std::ranges::input_range R>  | 
 | 40 | +        requires std::same_as<std::ranges::range_value_t<R>, sparrow::record_batch>  | 
 | 41 | +    std::vector<uint8_t> serialize(const R& record_batches)  | 
 | 42 | +    {  | 
 | 43 | +        if (record_batches.empty())  | 
 | 44 | +        {  | 
 | 45 | +            return {};  | 
 | 46 | +        }  | 
 | 47 | +        if (!utils::check_record_batches_consistency(record_batches))  | 
 | 48 | +        {  | 
 | 49 | +            throw std::invalid_argument(  | 
 | 50 | +                "All record batches must have the same schema to be serialized together."  | 
 | 51 | +            );  | 
 | 52 | +        }  | 
 | 53 | +        std::vector<uint8_t> serialized_schema = serialize_schema_message(record_batches[0]);  | 
 | 54 | +        std::vector<uint8_t> serialized_record_batches = serialize_record_batches_without_schema_message(record_batches);  | 
 | 55 | +        serialized_schema.insert(  | 
 | 56 | +            serialized_schema.end(),  | 
 | 57 | +            std::make_move_iterator(serialized_record_batches.begin()),  | 
 | 58 | +            std::make_move_iterator(serialized_record_batches.end())  | 
 | 59 | +        );  | 
 | 60 | +        // End of stream message  | 
 | 61 | +        serialized_schema.insert(serialized_schema.end(), end_of_stream.begin(), end_of_stream.end());  | 
 | 62 | +        return serialized_schema;  | 
 | 63 | +    }  | 
 | 64 | +}  | 
0 commit comments