Skip to content

Commit cceb32e

Browse files
Add date array (#342)
Add date array --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 532bf48 commit cceb32e

File tree

14 files changed

+705
-6
lines changed

14 files changed

+705
-6
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ set(SPARROW_HEADERS
201201
${SPARROW_INCLUDE_DIR}/sparrow/layout/primitive_array.hpp
202202
${SPARROW_INCLUDE_DIR}/sparrow/layout/struct_layout/struct_array.hpp
203203
${SPARROW_INCLUDE_DIR}/sparrow/layout/struct_layout/struct_value.hpp
204+
${SPARROW_INCLUDE_DIR}/sparrow/layout/temporal/date_array.hpp
204205
${SPARROW_INCLUDE_DIR}/sparrow/layout/temporal/duration_array.hpp
205206
${SPARROW_INCLUDE_DIR}/sparrow/layout/temporal/timestamp_array.hpp
206207
${SPARROW_INCLUDE_DIR}/sparrow/layout/temporal/timestamp_concepts.hpp

include/sparrow/arrow_interface/arrow_array_schema_info_utils.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ namespace sparrow
4444
case data_type::HALF_FLOAT:
4545
case data_type::FLOAT:
4646
case data_type::DOUBLE:
47+
case data_type::DATE_DAYS:
48+
case data_type::DATE_MILLISECONDS:
4749
case data_type::TIMESTAMP_SECONDS:
4850
case data_type::TIMESTAMP_MILLISECONDS:
4951
case data_type::TIMESTAMP_MICROSECONDS:

include/sparrow/builder/builder.hpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include <sparrow/layout/list_layout/list_array.hpp>
3232
#include <sparrow/layout/primitive_array.hpp>
3333
#include <sparrow/layout/struct_layout/struct_array.hpp>
34+
#include <sparrow/layout/temporal/date_array.hpp>
3435
#include <sparrow/layout/union_array.hpp>
3536
#include <sparrow/layout/variable_size_binary_layout/variable_size_binary_array.hpp>
3637
#include <sparrow/utils/ranges.hpp>
@@ -120,6 +121,12 @@ namespace sparrow
120121
&& std::is_scalar_v<ensured_range_value_t<T>>;
121122

122123
template <typename T>
124+
concept translates_to_date_layout = std::ranges::input_range<T>
125+
&& mpl::any_of(
126+
date_types_t{},
127+
mpl::predicate::same_as<ensured_range_value_t<T>>{}
128+
);
129+
template <typename T>
123130
concept translates_to_duration_layout = std::ranges::input_range<T>
124131
&& mpl::any_of(
125132
duration_types_t{},
@@ -197,6 +204,18 @@ namespace sparrow
197204
}
198205
};
199206

207+
template <translates_to_date_layout T, class OPTION_FLAGS>
208+
struct builder<T, dont_enforce_layout, OPTION_FLAGS>
209+
{
210+
using type = sparrow::date_array<ensured_range_value_t<T>>;
211+
212+
template <class U>
213+
static type create(U&& t)
214+
{
215+
return type(std::forward<U>(t));
216+
}
217+
};
218+
200219
template <translates_to_duration_layout T, class OPTION_FLAGS>
201220
struct builder<T, dont_enforce_layout, OPTION_FLAGS>
202221
{

include/sparrow/layout/dispatch.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,14 @@
2626
#include "sparrow/layout/primitive_array.hpp"
2727
#include "sparrow/layout/run_end_encoded_layout/run_end_encoded_array.hpp"
2828
#include "sparrow/layout/struct_layout/struct_array.hpp"
29+
#include "sparrow/layout/temporal/date_array.hpp"
2930
#include "sparrow/layout/temporal/duration_array.hpp"
3031
#include "sparrow/layout/temporal/interval_array.hpp"
3132
#include "sparrow/layout/temporal/timestamp_array.hpp"
3233
#include "sparrow/layout/union_array.hpp"
3334
#include "sparrow/layout/variable_size_binary_layout/variable_size_binary_array.hpp"
3435
#include "sparrow/types/data_traits.hpp"
36+
#include "sparrow/types/data_type.hpp"
3537

3638
namespace sparrow
3739
{
@@ -131,6 +133,10 @@ namespace sparrow
131133
return func(unwrap_array<decimal_256_array>(ar));
132134
case data_type::FIXED_WIDTH_BINARY:
133135
return func(unwrap_array<fixed_width_binary_array>(ar));
136+
case sparrow::data_type::DATE_DAYS:
137+
return func(unwrap_array<date_days_array>(ar));
138+
case data_type::DATE_MILLISECONDS:
139+
return func(unwrap_array<date_milliseconds_array>(ar));
134140
case data_type::TIMESTAMP_SECONDS:
135141
return func(unwrap_array<timestamp_array<timestamp<std::chrono::seconds>>>(ar));
136142
case data_type::TIMESTAMP_MILLISECONDS:
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
// Copyright 2024 Man Group Operations Limited
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include "sparrow/layout/array_trivial_copyable.hpp"
18+
#include "sparrow/layout/temporal/date_types.hpp"
19+
20+
// tdD : std::chrono::seconds
21+
// tdm : std::chrono::milliseconds
22+
23+
namespace sparrow
24+
{
25+
using date_types_t = mpl::typelist<date_days, date_milliseconds>;
26+
27+
static constexpr date_types_t date_types;
28+
template <typename T>
29+
concept date_type = mpl::contains<T>(date_types);
30+
31+
/**
32+
* Array of std::chrono::duration values.
33+
*
34+
* As the other arrays in sparrow, \c date_array<T> provides an API as if it was holding
35+
* \c nullable<T> values instead of \c T values.
36+
*
37+
* Internally, the array contains a validity bitmap and a contiguous memory buffer
38+
* holding the values.
39+
*
40+
* @tparam T the type of the values in the array.
41+
* @see https://arrow.apache.org/docs/dev/format/Columnar.html#fixed-size-primitive-layout
42+
*/
43+
template <date_type T>
44+
using date_array = array_trivial_copyable<T>;
45+
46+
using date_days_array = date_array<date_days>;
47+
using date_milliseconds_array = date_array<date_milliseconds>;
48+
49+
template <class T>
50+
struct is_date_array : std::false_type
51+
{
52+
};
53+
54+
template <class T>
55+
struct is_date_array<date_array<T>> : std::true_type
56+
{
57+
};
58+
59+
/**
60+
* Checks whether T is a date_array type.
61+
*/
62+
template <class T>
63+
constexpr bool is_date_array_v = is_date_array<T>::value;
64+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// Copyright 2024 Man Group Operations Limited
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include <chrono>
18+
19+
#include "sparrow/layout/temporal/temporal_types.hpp"
20+
21+
namespace sparrow
22+
{
23+
using date_days = std::chrono::time_point<std::chrono::system_clock, chrono::days>;
24+
using date_milliseconds = std::chrono::time_point<std::chrono::system_clock, std::chrono::milliseconds>;
25+
}

include/sparrow/layout/temporal/interval_types.hpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,10 @@
1919
# include <format>
2020
#endif
2121

22+
#include "sparrow/layout/temporal/temporal_types.hpp"
23+
2224
namespace sparrow
2325
{
24-
namespace chrono
25-
{
26-
using days = std::chrono::duration<int32_t, std::ratio<86400>>;
27-
using months = std::chrono::duration<int32_t, std::ratio<2629746>>;
28-
}
2926

3027
// We pack the structures to ensure that they are the same size on all platforms
3128
#pragma pack(push, 1)
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// Copyright 2024 Man Group Operations Limited
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include <chrono>
18+
19+
namespace sparrow::chrono
20+
{
21+
using days = std::chrono::duration<int32_t, std::ratio<86400>>; // 1 day = 86400 seconds
22+
using months = std::chrono::duration<int32_t, std::ratio<2629746>>; // 1 month = 2629746 seconds
23+
}

include/sparrow/types/data_traits.hpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <chrono>
1818
#include <concepts>
1919

20+
#include "sparrow/layout/temporal/date_array.hpp"
2021
#include "sparrow/layout/temporal/interval_types.hpp"
2122
#include "sparrow/types/data_type.hpp"
2223
#include "sparrow/utils/nullable.hpp"
@@ -112,6 +113,18 @@ namespace sparrow
112113
using const_reference = decimal<int256_t>;
113114
};
114115

116+
template <>
117+
struct arrow_traits<date_days> : common_native_types_traits<date_days>
118+
{
119+
static constexpr data_type type_id = data_type::DATE_DAYS;
120+
};
121+
122+
template <>
123+
struct arrow_traits<date_milliseconds> : common_native_types_traits<date_milliseconds>
124+
{
125+
static constexpr data_type type_id = data_type::DATE_MILLISECONDS;
126+
};
127+
115128
template <>
116129
struct arrow_traits<std::chrono::seconds> : common_native_types_traits<std::chrono::seconds>
117130
{

include/sparrow/types/data_type.hpp

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <chrono>
1818
#include <version>
1919

20+
#include "sparrow/layout/temporal/date_types.hpp"
2021
#include "sparrow/layout/temporal/interval_types.hpp"
2122

2223
#if defined(SPARROW_USE_DATE_POLYFILL)
@@ -188,6 +189,8 @@ namespace sparrow
188189
DECIMAL128,
189190
DECIMAL256,
190191
FIXED_WIDTH_BINARY,
192+
DATE_DAYS,
193+
DATE_MILLISECONDS,
191194
TIMESTAMP_SECONDS,
192195
TIMESTAMP_MILLISECONDS,
193196
TIMESTAMP_MICROSECONDS,
@@ -278,7 +281,15 @@ namespace sparrow
278281
// TODO: add propper timestamp support below
279282
else if (format.starts_with("t"))
280283
{
281-
if (format.starts_with("tss:"))
284+
if (format == "tdD")
285+
{
286+
return data_type::DATE_DAYS;
287+
}
288+
else if (format == "tdm")
289+
{
290+
return data_type::DATE_MILLISECONDS;
291+
}
292+
else if (format.starts_with("tss:"))
282293
{
283294
return data_type::TIMESTAMP_SECONDS;
284295
}
@@ -500,6 +511,10 @@ namespace sparrow
500511
return "z";
501512
case data_type::LARGE_BINARY:
502513
return "Z";
514+
case data_type::DATE_DAYS:
515+
return "tdD";
516+
case data_type::DATE_MILLISECONDS:
517+
return "tdm";
503518
case data_type::TIMESTAMP_SECONDS:
504519
return "tss:";
505520
case data_type::TIMESTAMP_MILLISECONDS:
@@ -595,6 +610,8 @@ namespace sparrow
595610
float64_t,
596611
std::string,
597612
std::vector<byte_t>,
613+
date_days,
614+
date_milliseconds,
598615
timestamp<std::chrono::seconds>,
599616
timestamp<std::chrono::milliseconds>,
600617
timestamp<std::chrono::microseconds>,
@@ -836,6 +853,10 @@ namespace std
836853
return "Binary";
837854
case LARGE_BINARY:
838855
return "Large binary";
856+
case DATE_DAYS:
857+
return "Date days";
858+
case DATE_MILLISECONDS:
859+
return "Date milliseconds";
839860
case TIMESTAMP_SECONDS:
840861
return "Timestamp seconds";
841862
case TIMESTAMP_MILLISECONDS:

0 commit comments

Comments
 (0)