55
66#include < sparrow/record_batch.hpp>
77
8- #include " config/config.hpp"
98#include " Message_generated.h"
9+ #include " sparrow_ipc/config/config.hpp"
1010#include " sparrow_ipc/encapsulated_message.hpp"
11- #include " SparseTensor_generated.h"
1211
1312namespace sparrow_ipc
1413{
14+ /* *
15+ * @brief Deserializes a schema message from Arrow IPC format data.
16+ *
17+ * This function parses an Arrow IPC schema message from a byte buffer, extracting
18+ * the field name and custom metadata from the first (and expected only) field in the schema.
19+ *
20+ * @param data A span containing the raw byte data to deserialize from
21+ * @param current_offset Reference to the current position in the data buffer, which will be
22+ * updated to point past the processed schema message
23+ * @param name Optional output parameter that will contain the field name if present
24+ * @param metadata Optional output parameter that will contain the custom metadata
25+ * key-value pairs if present
26+ *
27+ * @throws std::runtime_error If the message is not a Schema message type
28+ * @throws std::runtime_error If the schema does not contain exactly one field
29+ *
30+ * @note This function expects the data to start with a 4-byte length prefix followed
31+ * by the FlatBuffer schema message data
32+ */
1533 SPARROW_IPC_API void deserialize_schema_message (
1634 std::span<const uint8_t > data,
1735 size_t & current_offset,
@@ -21,6 +39,25 @@ namespace sparrow_ipc
2139 [[nodiscard]] SPARROW_IPC_API const org::apache::arrow::flatbuf::RecordBatch*
2240 deserialize_record_batch_message (std::span<const uint8_t > data, size_t & current_offset);
2341
42+ /* *
43+ * @brief Deserializes an Arrow IPC stream from binary data into a vector of record batches.
44+ *
45+ * This function processes an Arrow IPC stream format, extracting schema information
46+ * and record batch data. It handles encapsulated messages sequentially, first expecting
47+ * a Schema message followed by one or more RecordBatch messages.
48+ *
49+ * @param data A span of bytes containing the serialized Arrow IPC stream data
50+ *
51+ * @return std::vector<sparrow::record_batch> A vector containing all deserialized record batches
52+ *
53+ * @throws std::runtime_error If:
54+ * - A RecordBatch message is encountered before a Schema message
55+ * - A RecordBatch message header is missing or invalid
56+ * - Unsupported message types are encountered (Tensor, DictionaryBatch, SparseTensor)
57+ * - An unknown message header type is encountered
58+ *
59+ * @note The function processes messages until an end-of-stream marker is detected
60+ */
2461 [[nodiscard]] SPARROW_IPC_API std::vector<sparrow::record_batch>
2562 deserialize_stream (std::span<const uint8_t > data);
2663}
0 commit comments