Skip to content

Commit 6a737f5

Browse files
committed
Move utils fcts to utils files
1 parent 610259f commit 6a737f5

File tree

4 files changed

+366
-335
lines changed

4 files changed

+366
-335
lines changed

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,12 @@ set(SPARROW_IPC_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
4444
set(SPARROW_IPC_HEADERS
4545
${SPARROW_IPC_INCLUDE_DIR}/config/config.hpp
4646
${SPARROW_IPC_INCLUDE_DIR}/serialize.hpp
47+
${SPARROW_IPC_INCLUDE_DIR}/utils.hpp
4748
)
4849

4950
set(SPARROW_IPC_SRC
5051
${SPARROW_IPC_SOURCE_DIR}/serialize.cpp
52+
${SPARROW_IPC_SOURCE_DIR}/utils.cpp
5153
)
5254

5355
set(SCHEMA_DIR ${CMAKE_BINARY_DIR}/format)

include/utils.hpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#pragma once
2+
3+
#include <cstdint>
4+
#include <optional>
5+
#include <string_view>
6+
#include <utility>
7+
8+
// TODO what to do with namespace?
9+
// TODO add tests for these?
10+
// TODO add namespace ? sparrow-ipc / detail or utils?
11+
#include "config/config.hpp"
12+
13+
// Aligns a value to the next multiple of 8, as required by the Arrow IPC format for message bodies.
14+
SPARROW_IPC_API int64_t align_to_8(int64_t n);
15+
16+
// Parse the format string
17+
SPARROW_IPC_API std::optional<int32_t> parse_format(std::string_view format_str, std::string_view sep);
18+
19+
SPARROW_IPC_API std::pair<org::apache::arrow::flatbuf::Type, flatbuffers::Offset<void>>
20+
get_flatbuffer_decimal_type(flatbuffers::FlatBufferBuilder& builder, std::string_view format_str, int32_t bitWidth);
21+
22+
SPARROW_IPC_API std::pair<org::apache::arrow::flatbuf::Type, flatbuffers::Offset<void>>
23+
get_flatbuffer_type(flatbuffers::FlatBufferBuilder& builder, std::string_view format_str);

src/serialize.cpp

Lines changed: 1 addition & 335 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
#include <charconv>
21
#include <cstdint>
32
#include <cstring>
43
#include <optional>
@@ -10,340 +9,7 @@
109
#include "Schema_generated.h"
1110

1211
#include "serialize.hpp"
13-
14-
namespace
15-
{
16-
// Aligns a value to the next multiple of 8, as required by the Arrow IPC format for message bodies.
17-
int64_t align_to_8(int64_t n)
18-
{
19-
return (n + 7) & -8;
20-
}
21-
22-
// Parse the format string
23-
std::optional<int32_t> parse_format(std::string_view format_str, std::string_view sep)
24-
{
25-
// Find the position of the colon
26-
auto sep_pos = format_str.find(sep);
27-
if (sep_pos == std::string_view::npos)
28-
{
29-
return std::nullopt;
30-
}
31-
32-
std::string_view substr_str(format_str.data() + sep_pos + 1, format_str.size() - sep_pos - 1);
33-
34-
int32_t substr_size = 0;
35-
auto [ptr, ec] = std::from_chars(substr_str.data(), substr_str.data() + substr_str.size(), substr_size);
36-
37-
if (ec != std::errc() || ptr != substr_str.data() + substr_str.size())
38-
{
39-
return std::nullopt;
40-
}
41-
return substr_size;
42-
}
43-
44-
std::pair<org::apache::arrow::flatbuf::Type, flatbuffers::Offset<void>>
45-
get_flatbuffer_decimal_type(flatbuffers::FlatBufferBuilder& builder, std::string_view format_str, int32_t bitWidth)
46-
{
47-
// Decimal requires precision and scale. We need to parse the format_str.
48-
// Format: "d:precision,scale"
49-
auto scale = parse_format(format_str, ",");
50-
if (!scale.has_value())
51-
{
52-
throw std::runtime_error("Failed to parse Decimal256 scale from format string: " + std::string(format_str));
53-
}
54-
size_t comma_pos = format_str.find(',');
55-
auto precision = parse_format(format_str.substr(0, comma_pos), ":");
56-
if (!precision.has_value())
57-
{
58-
throw std::runtime_error("Failed to parse Decimal256 precision from format string: " + std::string(format_str));
59-
}
60-
auto decimal_type = org::apache::arrow::flatbuf::CreateDecimal(builder, precision.value(), scale.value(), bitWidth);
61-
return {org::apache::arrow::flatbuf::Type::Decimal, decimal_type.Union()};
62-
}
63-
64-
std::pair<org::apache::arrow::flatbuf::Type, flatbuffers::Offset<void>>
65-
get_flatbuffer_type(flatbuffers::FlatBufferBuilder& builder, std::string_view format_str)
66-
{
67-
auto type = sparrow::format_to_data_type(format_str);
68-
switch (type)
69-
{
70-
case sparrow::data_type::NA:
71-
{
72-
auto null_type = org::apache::arrow::flatbuf::CreateNull(builder);
73-
return {org::apache::arrow::flatbuf::Type::Null, null_type.Union()};
74-
}
75-
case sparrow::data_type::BOOL:
76-
{
77-
auto bool_type = org::apache::arrow::flatbuf::CreateBool(builder);
78-
return {org::apache::arrow::flatbuf::Type::Bool, bool_type.Union()};
79-
}
80-
case sparrow::data_type::UINT8:
81-
{
82-
auto int_type = org::apache::arrow::flatbuf::CreateInt(builder, 8, false);
83-
return {org::apache::arrow::flatbuf::Type::Int, int_type.Union()};
84-
}
85-
case sparrow::data_type::INT8:
86-
{
87-
auto int_type = org::apache::arrow::flatbuf::CreateInt(builder, 8, true);
88-
return {org::apache::arrow::flatbuf::Type::Int, int_type.Union()};
89-
}
90-
case sparrow::data_type::UINT16:
91-
{
92-
auto int_type = org::apache::arrow::flatbuf::CreateInt(builder, 16, false);
93-
return {org::apache::arrow::flatbuf::Type::Int, int_type.Union()};
94-
}
95-
case sparrow::data_type::INT16:
96-
{
97-
auto int_type = org::apache::arrow::flatbuf::CreateInt(builder, 16, true);
98-
return {org::apache::arrow::flatbuf::Type::Int, int_type.Union()};
99-
}
100-
case sparrow::data_type::UINT32:
101-
{
102-
auto int_type = org::apache::arrow::flatbuf::CreateInt(builder, 32, false);
103-
return {org::apache::arrow::flatbuf::Type::Int, int_type.Union()};
104-
}
105-
case sparrow::data_type::INT32:
106-
{
107-
auto int_type = org::apache::arrow::flatbuf::CreateInt(builder, 32, true);
108-
return {org::apache::arrow::flatbuf::Type::Int, int_type.Union()};
109-
}
110-
case sparrow::data_type::UINT64:
111-
{
112-
auto int_type = org::apache::arrow::flatbuf::CreateInt(builder, 64, false);
113-
return {org::apache::arrow::flatbuf::Type::Int, int_type.Union()};
114-
}
115-
case sparrow::data_type::INT64:
116-
{
117-
auto int_type = org::apache::arrow::flatbuf::CreateInt(builder, 64, true);
118-
return {org::apache::arrow::flatbuf::Type::Int, int_type.Union()};
119-
}
120-
case sparrow::data_type::HALF_FLOAT:
121-
{
122-
auto fp_type = org::apache::arrow::flatbuf::CreateFloatingPoint(
123-
builder, org::apache::arrow::flatbuf::Precision::HALF);
124-
return {org::apache::arrow::flatbuf::Type::FloatingPoint, fp_type.Union()};
125-
}
126-
case sparrow::data_type::FLOAT:
127-
{
128-
auto fp_type = org::apache::arrow::flatbuf::CreateFloatingPoint(
129-
builder, org::apache::arrow::flatbuf::Precision::SINGLE);
130-
return {org::apache::arrow::flatbuf::Type::FloatingPoint, fp_type.Union()};
131-
}
132-
case sparrow::data_type::DOUBLE:
133-
{
134-
auto fp_type = org::apache::arrow::flatbuf::CreateFloatingPoint(
135-
builder, org::apache::arrow::flatbuf::Precision::DOUBLE);
136-
return {org::apache::arrow::flatbuf::Type::FloatingPoint, fp_type.Union()};
137-
}
138-
case sparrow::data_type::STRING:
139-
{
140-
auto string_type = org::apache::arrow::flatbuf::CreateUtf8(builder);
141-
return {org::apache::arrow::flatbuf::Type::Utf8, string_type.Union()};
142-
}
143-
case sparrow::data_type::LARGE_STRING:
144-
{
145-
auto large_string_type = org::apache::arrow::flatbuf::CreateLargeUtf8(builder);
146-
return {org::apache::arrow::flatbuf::Type::LargeUtf8, large_string_type.Union()};
147-
}
148-
case sparrow::data_type::BINARY:
149-
{
150-
auto binary_type = org::apache::arrow::flatbuf::CreateBinary(builder);
151-
return {org::apache::arrow::flatbuf::Type::Binary, binary_type.Union()};
152-
}
153-
case sparrow::data_type::LARGE_BINARY:
154-
{
155-
auto large_binary_type = org::apache::arrow::flatbuf::CreateLargeBinary(builder);
156-
return {org::apache::arrow::flatbuf::Type::LargeBinary, large_binary_type.Union()};
157-
}
158-
case sparrow::data_type::STRING_VIEW:
159-
{
160-
auto string_view_type = org::apache::arrow::flatbuf::CreateUtf8View(builder);
161-
return {org::apache::arrow::flatbuf::Type::Utf8View, string_view_type.Union()};
162-
}
163-
case sparrow::data_type::BINARY_VIEW:
164-
{
165-
auto binary_view_type = org::apache::arrow::flatbuf::CreateBinaryView(builder);
166-
return {org::apache::arrow::flatbuf::Type::BinaryView, binary_view_type.Union()};
167-
}
168-
case sparrow::data_type::DATE_DAYS:
169-
{
170-
auto date_type = org::apache::arrow::flatbuf::CreateDate(builder, org::apache::arrow::flatbuf::DateUnit::DAY);
171-
return {org::apache::arrow::flatbuf::Type::Date, date_type.Union()};
172-
}
173-
case sparrow::data_type::DATE_MILLISECONDS:
174-
{
175-
auto date_type = org::apache::arrow::flatbuf::CreateDate(builder, org::apache::arrow::flatbuf::DateUnit::MILLISECOND);
176-
return {org::apache::arrow::flatbuf::Type::Date, date_type.Union()};
177-
}
178-
case sparrow::data_type::TIMESTAMP_SECONDS:
179-
{
180-
auto timestamp_type = org::apache::arrow::flatbuf::CreateTimestamp(builder, org::apache::arrow::flatbuf::TimeUnit::SECOND);
181-
return {org::apache::arrow::flatbuf::Type::Timestamp, timestamp_type.Union()};
182-
}
183-
case sparrow::data_type::TIMESTAMP_MILLISECONDS:
184-
{
185-
auto timestamp_type = org::apache::arrow::flatbuf::CreateTimestamp(builder, org::apache::arrow::flatbuf::TimeUnit::MILLISECOND);
186-
return {org::apache::arrow::flatbuf::Type::Timestamp, timestamp_type.Union()};
187-
}
188-
case sparrow::data_type::TIMESTAMP_MICROSECONDS:
189-
{
190-
auto timestamp_type = org::apache::arrow::flatbuf::CreateTimestamp(builder, org::apache::arrow::flatbuf::TimeUnit::MICROSECOND);
191-
return {org::apache::arrow::flatbuf::Type::Timestamp, timestamp_type.Union()};
192-
}
193-
case sparrow::data_type::TIMESTAMP_NANOSECONDS:
194-
{
195-
auto timestamp_type = org::apache::arrow::flatbuf::CreateTimestamp(builder, org::apache::arrow::flatbuf::TimeUnit::NANOSECOND);
196-
return {org::apache::arrow::flatbuf::Type::Timestamp, timestamp_type.Union()};
197-
}
198-
case sparrow::data_type::DURATION_SECONDS:
199-
{
200-
auto duration_type = org::apache::arrow::flatbuf::CreateDuration(builder, org::apache::arrow::flatbuf::TimeUnit::SECOND);
201-
return {org::apache::arrow::flatbuf::Type::Duration, duration_type.Union()};
202-
}
203-
case sparrow::data_type::DURATION_MILLISECONDS:
204-
{
205-
auto duration_type = org::apache::arrow::flatbuf::CreateDuration(builder, org::apache::arrow::flatbuf::TimeUnit::MILLISECOND);
206-
return {org::apache::arrow::flatbuf::Type::Duration, duration_type.Union()};
207-
}
208-
case sparrow::data_type::DURATION_MICROSECONDS:
209-
{
210-
auto duration_type = org::apache::arrow::flatbuf::CreateDuration(builder, org::apache::arrow::flatbuf::TimeUnit::MICROSECOND);
211-
return {org::apache::arrow::flatbuf::Type::Duration, duration_type.Union()};
212-
}
213-
case sparrow::data_type::DURATION_NANOSECONDS:
214-
{
215-
auto duration_type = org::apache::arrow::flatbuf::CreateDuration(builder, org::apache::arrow::flatbuf::TimeUnit::NANOSECOND);
216-
return {org::apache::arrow::flatbuf::Type::Duration, duration_type.Union()};
217-
}
218-
case sparrow::data_type::INTERVAL_MONTHS:
219-
{
220-
auto interval_type = org::apache::arrow::flatbuf::CreateInterval(builder, org::apache::arrow::flatbuf::IntervalUnit::YEAR_MONTH);
221-
return {org::apache::arrow::flatbuf::Type::Interval, interval_type.Union()};
222-
}
223-
case sparrow::data_type::INTERVAL_DAYS_TIME:
224-
{
225-
auto interval_type = org::apache::arrow::flatbuf::CreateInterval(builder, org::apache::arrow::flatbuf::IntervalUnit::DAY_TIME);
226-
return {org::apache::arrow::flatbuf::Type::Interval, interval_type.Union()};
227-
}
228-
case sparrow::data_type::INTERVAL_MONTHS_DAYS_NANOSECONDS:
229-
{
230-
auto interval_type = org::apache::arrow::flatbuf::CreateInterval(builder, org::apache::arrow::flatbuf::IntervalUnit::MONTH_DAY_NANO);
231-
return {org::apache::arrow::flatbuf::Type::Interval, interval_type.Union()};
232-
}
233-
case sparrow::data_type::TIME_SECONDS:
234-
{
235-
auto time_type = org::apache::arrow::flatbuf::CreateTime(builder, org::apache::arrow::flatbuf::TimeUnit::SECOND, 32);
236-
return {org::apache::arrow::flatbuf::Type::Time, time_type.Union()};
237-
}
238-
case sparrow::data_type::TIME_MILLISECONDS:
239-
{
240-
auto time_type = org::apache::arrow::flatbuf::CreateTime(builder, org::apache::arrow::flatbuf::TimeUnit::MILLISECOND, 32);
241-
return {org::apache::arrow::flatbuf::Type::Time, time_type.Union()};
242-
}
243-
case sparrow::data_type::TIME_MICROSECONDS:
244-
{
245-
auto time_type = org::apache::arrow::flatbuf::CreateTime(builder, org::apache::arrow::flatbuf::TimeUnit::MICROSECOND, 64);
246-
return {org::apache::arrow::flatbuf::Type::Time, time_type.Union()};
247-
}
248-
case sparrow::data_type::TIME_NANOSECONDS:
249-
{
250-
auto time_type = org::apache::arrow::flatbuf::CreateTime(builder, org::apache::arrow::flatbuf::TimeUnit::NANOSECOND, 64);
251-
return {org::apache::arrow::flatbuf::Type::Time, time_type.Union()};
252-
}
253-
case sparrow::data_type::LIST:
254-
{
255-
auto list_type = org::apache::arrow::flatbuf::CreateList(builder);
256-
return {org::apache::arrow::flatbuf::Type::List, list_type.Union()};
257-
}
258-
case sparrow::data_type::LARGE_LIST:
259-
{
260-
auto large_list_type = org::apache::arrow::flatbuf::CreateLargeList(builder);
261-
return {org::apache::arrow::flatbuf::Type::LargeList, large_list_type.Union()};
262-
}
263-
case sparrow::data_type::LIST_VIEW:
264-
{
265-
auto list_view_type = org::apache::arrow::flatbuf::CreateListView(builder);
266-
return {org::apache::arrow::flatbuf::Type::ListView, list_view_type.Union()};
267-
}
268-
case sparrow::data_type::LARGE_LIST_VIEW:
269-
{
270-
auto large_list_view_type = org::apache::arrow::flatbuf::CreateLargeListView(builder);
271-
return {org::apache::arrow::flatbuf::Type::LargeListView, large_list_view_type.Union()};
272-
}
273-
case sparrow::data_type::FIXED_SIZED_LIST:
274-
{
275-
// FixedSizeList requires listSize. We need to parse the format_str.
276-
// Format: "+w:size"
277-
auto list_size = parse_format(format_str, ":");
278-
if (!list_size.has_value())
279-
{
280-
throw std::runtime_error("Failed to parse FixedSizeList size from format string: " + std::string(format_str));
281-
}
282-
283-
auto fixed_size_list_type = org::apache::arrow::flatbuf::CreateFixedSizeList(builder, list_size.value());
284-
return {org::apache::arrow::flatbuf::Type::FixedSizeList, fixed_size_list_type.Union()};
285-
}
286-
case sparrow::data_type::STRUCT:
287-
{
288-
auto struct_type = org::apache::arrow::flatbuf::CreateStruct_(builder);
289-
return {org::apache::arrow::flatbuf::Type::Struct_, struct_type.Union()};
290-
}
291-
case sparrow::data_type::MAP:
292-
{
293-
auto map_type = org::apache::arrow::flatbuf::CreateMap(builder, false); // not sorted keys
294-
return {org::apache::arrow::flatbuf::Type::Map, map_type.Union()};
295-
}
296-
case sparrow::data_type::DENSE_UNION:
297-
{
298-
auto union_type = org::apache::arrow::flatbuf::CreateUnion(builder, org::apache::arrow::flatbuf::UnionMode::Dense, 0);
299-
return {org::apache::arrow::flatbuf::Type::Union, union_type.Union()};
300-
}
301-
case sparrow::data_type::SPARSE_UNION:
302-
{
303-
auto union_type = org::apache::arrow::flatbuf::CreateUnion(builder, org::apache::arrow::flatbuf::UnionMode::Sparse, 0);
304-
return {org::apache::arrow::flatbuf::Type::Union, union_type.Union()};
305-
}
306-
case sparrow::data_type::RUN_ENCODED:
307-
{
308-
auto run_end_encoded_type = org::apache::arrow::flatbuf::CreateRunEndEncoded(builder);
309-
return {org::apache::arrow::flatbuf::Type::RunEndEncoded, run_end_encoded_type.Union()};
310-
}
311-
case sparrow::data_type::DECIMAL32:
312-
{
313-
return get_flatbuffer_decimal_type(builder, format_str, 32);
314-
}
315-
case sparrow::data_type::DECIMAL64:
316-
{
317-
return get_flatbuffer_decimal_type(builder, format_str, 64);
318-
}
319-
case sparrow::data_type::DECIMAL128:
320-
{
321-
return get_flatbuffer_decimal_type(builder, format_str, 128);
322-
}
323-
case sparrow::data_type::DECIMAL256:
324-
{
325-
return get_flatbuffer_decimal_type(builder, format_str, 256);
326-
}
327-
case sparrow::data_type::FIXED_WIDTH_BINARY:
328-
{
329-
// FixedSizeBinary requires byteWidth. We need to parse the format_str.
330-
// Format: "w:size"
331-
auto byte_width = parse_format(format_str, ":");
332-
if (!byte_width.has_value())
333-
{
334-
throw std::runtime_error("Failed to parse FixedWidthBinary size from format string: " + std::string(format_str));
335-
}
336-
337-
auto fixed_width_binary_type = org::apache::arrow::flatbuf::CreateFixedSizeBinary(builder, byte_width.value());
338-
return {org::apache::arrow::flatbuf::Type::FixedSizeBinary, fixed_width_binary_type.Union()};
339-
}
340-
default:
341-
{
342-
throw std::runtime_error("Unsupported data type for serialization");
343-
}
344-
}
345-
}
346-
}
12+
#include "utils.hpp"
34713

34814
template <typename T>
34915
std::vector<uint8_t> serialize_primitive_array(const sparrow::primitive_array<T>& arr)

0 commit comments

Comments
 (0)