Skip to content

Commit 9e76f85

Browse files
Interval array (#335)
Add interval_array --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 2fea185 commit 9e76f85

File tree

13 files changed

+859
-0
lines changed

13 files changed

+859
-0
lines changed

.github/workflows/linux.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@ jobs:
5656
if: matrix.sys.compiler == 'clang'
5757
run: sudo apt install ${{matrix.sys.stdlib}}-dev -y
5858

59+
- name: Install specific version of tzdata
60+
if: matrix.sys.date-polyfill == 'OFF'
61+
run: sudo apt-get install tzdata=2024a-2ubuntu1 -y --allow-downgrades
62+
5963
- name: Checkout code
6064
uses: actions/checkout@v4
6165

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ set(SPARROW_HEADERS
205205
${SPARROW_INCLUDE_DIR}/sparrow/layout/temporal/timestamp_array.hpp
206206
${SPARROW_INCLUDE_DIR}/sparrow/layout/temporal/timestamp_concepts.hpp
207207
${SPARROW_INCLUDE_DIR}/sparrow/layout/temporal/timestamp_reference.hpp
208+
${SPARROW_INCLUDE_DIR}/sparrow/layout/temporal/interval_types.hpp
208209
${SPARROW_INCLUDE_DIR}/sparrow/layout/trivial_copyable_data_access.hpp
209210
${SPARROW_INCLUDE_DIR}/sparrow/layout/variable_size_binary_layout/variable_size_binary_array.hpp
210211
${SPARROW_INCLUDE_DIR}/sparrow/layout/variable_size_binary_layout/variable_size_binary_iterator.hpp

include/sparrow/arrow_interface/arrow_array_schema_info_utils.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ namespace sparrow
5252
case data_type::DURATION_MILLISECONDS:
5353
case data_type::DURATION_MICROSECONDS:
5454
case data_type::DURATION_NANOSECONDS:
55+
case data_type::INTERVAL_MONTHS:
56+
case data_type::INTERVAL_DAYS_TIME:
57+
case data_type::INTERVAL_MONTHS_DAYS_NANOSECONDS:
5558
case data_type::DECIMAL32:
5659
case data_type::DECIMAL64:
5760
case data_type::DECIMAL128:

include/sparrow/builder/builder.hpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
#include <sparrow/layout/variable_size_binary_layout/variable_size_binary_array.hpp>
3636
#include <sparrow/utils/ranges.hpp>
3737

38+
#include "sparrow/layout/temporal/interval_array.hpp"
39+
3840
namespace sparrow
3941
{
4042

@@ -127,6 +129,14 @@ namespace sparrow
127129
concept translates_to_timestamp_layout = std::ranges::input_range<T>
128130
&& mpl::is_type_instance_of_v<ensured_range_value_t<T>, timestamp>;
129131

132+
133+
template <typename T>
134+
concept translates_to_interval_layout = std::ranges::input_range<T>
135+
&& mpl::any_of(
136+
interval_types_t{},
137+
mpl::predicate::same_as<ensured_range_value_t<T>>{}
138+
);
139+
130140
template <class T>
131141
concept translate_to_variable_sized_list_layout = std::ranges::input_range<T>
132142
&& std::ranges::input_range<ensured_range_value_t<T>>
@@ -223,6 +233,18 @@ namespace sparrow
223233
}
224234
};
225235

236+
template <translates_to_interval_layout T, class OPTION_FLAGS>
237+
struct builder<T, dont_enforce_layout, OPTION_FLAGS>
238+
{
239+
using type = sparrow::interval_array<ensured_range_value_t<T>>;
240+
241+
template <class U>
242+
static type create(U&& t)
243+
{
244+
return type(std::forward<U>(t));
245+
}
246+
};
247+
226248
template <translate_to_variable_sized_list_layout T, class OPTION_FLAGS>
227249
struct builder<T, dont_enforce_layout, OPTION_FLAGS>
228250
{

include/sparrow/layout/dispatch.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "sparrow/layout/run_end_encoded_layout/run_end_encoded_array.hpp"
2828
#include "sparrow/layout/struct_layout/struct_array.hpp"
2929
#include "sparrow/layout/temporal/duration_array.hpp"
30+
#include "sparrow/layout/temporal/interval_array.hpp"
3031
#include "sparrow/layout/temporal/timestamp_array.hpp"
3132
#include "sparrow/layout/union_array.hpp"
3233
#include "sparrow/layout/variable_size_binary_layout/variable_size_binary_array.hpp"
@@ -146,6 +147,12 @@ namespace sparrow
146147
return func(unwrap_array<duration_microseconds_array>(ar));
147148
case data_type::DURATION_NANOSECONDS:
148149
return func(unwrap_array<duration_nanoseconds_array>(ar));
150+
case data_type::INTERVAL_MONTHS:
151+
return func(unwrap_array<months_interval_array>(ar));
152+
case data_type::INTERVAL_DAYS_TIME:
153+
return func(unwrap_array<days_time_interval_array>(ar));
154+
case data_type::INTERVAL_MONTHS_DAYS_NANOSECONDS:
155+
return func(unwrap_array<month_day_nanoseconds_interval_array>(ar));
149156
default:
150157
throw std::invalid_argument("array type not supported");
151158
}
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
// Copyright 2024 Man Group Operations Limited
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include "sparrow/layout/array_trivial_copyable.hpp"
18+
#include "sparrow/layout/temporal/interval_types.hpp"
19+
#include "sparrow/types/data_traits.hpp"
20+
21+
// tiM : std::chrono::months
22+
// tiD : sparrow::day_time_interval
23+
// tin : sparrow::month_day_nanoseconds_interval
24+
25+
namespace sparrow
26+
{
27+
using interval_types_t = mpl::typelist<chrono::months, days_time_interval, month_day_nanoseconds_interval>;
28+
29+
static constexpr interval_types_t interval_types;
30+
template <typename T>
31+
concept interval_type = mpl::contains<T>(interval_types);
32+
33+
/**
34+
* Array of interval values.
35+
*
36+
* As the other arrays in sparrow, \c interval_array<T> provides an API as if it was holding
37+
* \c nullable<T> values instead of \c T values.
38+
*
39+
* Internally, the array contains a validity bitmap and a contiguous memory buffer
40+
* holding the values.
41+
*
42+
* @tparam T the type of the values in the array.
43+
* @see https://arrow.apache.org/docs/dev/format/Columnar.html#fixed-size-primitive-layout
44+
*/
45+
template <interval_type T>
46+
using interval_array = array_trivial_copyable<T>;
47+
48+
using months_interval_array = interval_array<chrono::months>;
49+
using days_time_interval_array = interval_array<days_time_interval>;
50+
using month_day_nanoseconds_interval_array = interval_array<month_day_nanoseconds_interval>;
51+
52+
template <class T>
53+
struct is_interval_array : std::false_type
54+
{
55+
};
56+
57+
template <class T>
58+
struct is_interval_array<interval_array<T>> : std::true_type
59+
{
60+
};
61+
62+
/**
63+
* Checks whether T is a interval_array type.
64+
*/
65+
template <class T>
66+
constexpr bool is_interval_array_v = is_interval_array<T>::value;
67+
}
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
// Copyright 2024 Man Group Operations Limited
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include <chrono>
18+
#if defined(__cpp_lib_format)
19+
# include <format>
20+
#endif
21+
22+
namespace sparrow
23+
{
24+
namespace chrono
25+
{
26+
using days = std::chrono::duration<int32_t, std::ratio<86400>>;
27+
using months = std::chrono::duration<int32_t, std::ratio<2629746>>;
28+
}
29+
30+
// We pack the structures to ensure that they are the same size on all platforms
31+
#pragma pack(push, 1)
32+
33+
struct days_time_interval
34+
{
35+
chrono::days days;
36+
std::chrono::duration<int32_t, std::milli> time;
37+
};
38+
39+
#pragma pack(pop)
40+
41+
inline bool operator==(const days_time_interval& lhs, const days_time_interval& rhs)
42+
{
43+
return lhs.days == rhs.days && lhs.time == rhs.time;
44+
}
45+
46+
#pragma pack(push, 1)
47+
48+
struct month_day_nanoseconds_interval
49+
{
50+
chrono::months months;
51+
chrono::days days;
52+
std::chrono::duration<int64_t, std::nano> nanoseconds;
53+
};
54+
55+
#pragma pack(pop)
56+
57+
inline bool operator==(const month_day_nanoseconds_interval& lhs, const month_day_nanoseconds_interval& rhs)
58+
{
59+
return lhs.months == rhs.months && lhs.days == rhs.days && lhs.nanoseconds == rhs.nanoseconds;
60+
}
61+
}
62+
63+
namespace std
64+
{
65+
#if defined(__cpp_lib_format)
66+
template <>
67+
struct formatter<sparrow::days_time_interval>
68+
{
69+
constexpr auto parse(std::format_parse_context& ctx)
70+
{
71+
return ctx.begin(); // Simple implementation
72+
}
73+
74+
auto format(const sparrow::days_time_interval& interval, std::format_context& ctx) const
75+
{
76+
std::ostringstream oss;
77+
oss << interval.days.count() << " days/" << interval.time.count() << " ms";
78+
const std::string interval_str = oss.str();
79+
return std::format_to(ctx.out(), "{}", interval_str);
80+
}
81+
};
82+
83+
template <>
84+
struct formatter<sparrow::month_day_nanoseconds_interval>
85+
{
86+
constexpr auto parse(std::format_parse_context& ctx)
87+
{
88+
return ctx.begin(); // Simple implementation
89+
}
90+
91+
auto format(const sparrow::month_day_nanoseconds_interval& interval, std::format_context& ctx) const
92+
{
93+
std::ostringstream oss;
94+
oss << interval.months.count() << " months/" << interval.days.count() << " days/"
95+
<< interval.nanoseconds.count() << " ns";
96+
const std::string interval_str = oss.str();
97+
return std::format_to(ctx.out(), "{}", interval_str);
98+
}
99+
};
100+
#endif
101+
}

include/sparrow/types/data_traits.hpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <chrono>
1818
#include <concepts>
1919

20+
#include "sparrow/layout/temporal/interval_types.hpp"
2021
#include "sparrow/types/data_type.hpp"
2122
#include "sparrow/utils/nullable.hpp"
2223
#include "sparrow/utils/vector_view.hpp"
@@ -167,6 +168,25 @@ namespace sparrow
167168
using const_reference = timestamp<std::chrono::nanoseconds>;
168169
};
169170

171+
template <>
172+
struct arrow_traits<chrono::months> : common_native_types_traits<chrono::months>
173+
{
174+
static constexpr data_type type_id = data_type::INTERVAL_MONTHS;
175+
};
176+
177+
template <>
178+
struct arrow_traits<days_time_interval> : common_native_types_traits<days_time_interval>
179+
{
180+
static constexpr data_type type_id = data_type::INTERVAL_DAYS_TIME;
181+
};
182+
183+
template <>
184+
struct arrow_traits<month_day_nanoseconds_interval>
185+
: common_native_types_traits<month_day_nanoseconds_interval>
186+
{
187+
static constexpr data_type type_id = data_type::INTERVAL_MONTHS_DAYS_NANOSECONDS;
188+
};
189+
170190
namespace detail
171191
{
172192
template <class T>

include/sparrow/types/data_type.hpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616

1717
#include <chrono>
1818
#include <version>
19+
20+
#include "sparrow/layout/temporal/interval_types.hpp"
21+
1922
#if defined(SPARROW_USE_DATE_POLYFILL)
2023

2124
# include <date/tz.h>
@@ -193,6 +196,9 @@ namespace sparrow
193196
DURATION_MILLISECONDS,
194197
DURATION_MICROSECONDS,
195198
DURATION_NANOSECONDS,
199+
INTERVAL_MONTHS,
200+
INTERVAL_DAYS_TIME,
201+
INTERVAL_MONTHS_DAYS_NANOSECONDS
196202
};
197203

198204
// helper function to check if a string is all digits
@@ -304,6 +310,18 @@ namespace sparrow
304310
{
305311
return data_type::DURATION_NANOSECONDS;
306312
}
313+
else if (format == "tiM")
314+
{
315+
return data_type::INTERVAL_MONTHS;
316+
}
317+
else if (format == "tiD")
318+
{
319+
return data_type::INTERVAL_DAYS_TIME;
320+
}
321+
else if (format == "tin")
322+
{
323+
return data_type::INTERVAL_MONTHS_DAYS_NANOSECONDS;
324+
}
307325
}
308326
else if (format == "+l")
309327
{
@@ -498,6 +516,12 @@ namespace sparrow
498516
return "tDu";
499517
case data_type::DURATION_NANOSECONDS:
500518
return "tDn";
519+
case data_type::INTERVAL_MONTHS:
520+
return "tiM";
521+
case data_type::INTERVAL_DAYS_TIME:
522+
return "tiD";
523+
case data_type::INTERVAL_MONTHS_DAYS_NANOSECONDS:
524+
return "tin";
501525
case data_type::LIST:
502526
return "+l";
503527
case data_type::LARGE_LIST:
@@ -579,6 +603,9 @@ namespace sparrow
579603
std::chrono::milliseconds,
580604
std::chrono::microseconds,
581605
std::chrono::nanoseconds,
606+
chrono::months,
607+
days_time_interval,
608+
month_day_nanoseconds_interval,
582609
// TODO: add missing fundamental types here
583610
list_value,
584611
struct_value,
@@ -825,6 +852,12 @@ namespace std
825852
return "Duration microseconds";
826853
case DURATION_NANOSECONDS:
827854
return "Duration nanoseconds";
855+
case INTERVAL_MONTHS:
856+
return "Interval months";
857+
case INTERVAL_DAYS_TIME:
858+
return "Interval days time";
859+
case INTERVAL_MONTHS_DAYS_NANOSECONDS:
860+
return "Interval months days nanoseconds";
828861
case LIST:
829862
return "List";
830863
case LARGE_LIST:

src/array_factory.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "sparrow/layout/run_end_encoded_layout/run_end_encoded_array.hpp"
2424
#include "sparrow/layout/struct_layout/struct_array.hpp"
2525
#include "sparrow/layout/temporal/duration_array.hpp"
26+
#include "sparrow/layout/temporal/interval_array.hpp"
2627
#include "sparrow/layout/temporal/timestamp_array.hpp"
2728
#include "sparrow/layout/union_array.hpp"
2829
#include "sparrow/layout/variable_size_binary_layout/variable_size_binary_array.hpp"
@@ -138,6 +139,12 @@ namespace sparrow
138139
return detail::make_wrapper_ptr<duration_microseconds_array>(std::move(proxy));
139140
case data_type::DURATION_NANOSECONDS:
140141
return detail::make_wrapper_ptr<duration_nanoseconds_array>(std::move(proxy));
142+
case data_type::INTERVAL_MONTHS:
143+
return detail::make_wrapper_ptr<months_interval_array>(std::move(proxy));
144+
case data_type::INTERVAL_DAYS_TIME:
145+
return detail::make_wrapper_ptr<days_time_interval_array>(std::move(proxy));
146+
case data_type::INTERVAL_MONTHS_DAYS_NANOSECONDS:
147+
return detail::make_wrapper_ptr<month_day_nanoseconds_interval_array>(std::move(proxy));
141148
case data_type::MAP:
142149
case data_type::DECIMAL32:
143150
return detail::make_wrapper_ptr<decimal_32_array>(std::move(proxy));

0 commit comments

Comments
 (0)