Skip to content

Commit bf342b2

Browse files
rokpitrou
andauthored
GH-30036: [C++] Timezone-aware kernels should handle offset strings (e.g. "+04:30") (#12865)
ARROW-14477: #30036 Currently timestamp arrays have unit `timestamp(unit, zone name)`. This would add "offset timezones" where timestamp array would also support units like `timestamp(unit, "+/-HH:MM")`. * GitHub Issue: #30036 Lead-authored-by: Rok Mihevc <[email protected]> Co-authored-by: Rok <[email protected]> Co-authored-by: Antoine Pitrou <[email protected]> Signed-off-by: Rok Mihevc <[email protected]>
1 parent e137a04 commit bf342b2

File tree

10 files changed

+410
-158
lines changed

10 files changed

+410
-158
lines changed

cpp/src/arrow/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,7 @@ set(ARROW_COMPUTE_SRCS
735735
compute/kernels/scalar_cast_numeric.cc
736736
compute/kernels/scalar_cast_string.cc
737737
compute/kernels/scalar_cast_temporal.cc
738+
compute/kernels/temporal_internal.cc
738739
compute/kernels/vector_hash.cc
739740
compute/kernels/vector_selection.cc
740741
compute/kernels/vector_selection_filter_internal.cc
@@ -779,6 +780,7 @@ if(ARROW_COMPUTE)
779780
compute/kernels/scalar_temporal_binary.cc
780781
compute/kernels/scalar_temporal_unary.cc
781782
compute/kernels/scalar_validity.cc
783+
compute/kernels/temporal_internal.cc
782784
compute/kernels/util_internal.cc
783785
compute/kernels/vector_array_sort.cc
784786
compute/kernels/vector_cumulative_ops.cc

cpp/src/arrow/compute/kernels/scalar_cast_string.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -196,8 +196,8 @@ struct TemporalToStringCastFunctor<O, TimestampType> {
196196
static const std::string kFormatString = "%Y-%m-%d %H:%M:%S%z";
197197
static const std::string kUtcFormatString = "%Y-%m-%d %H:%M:%SZ";
198198
DCHECK(!timezone.empty());
199-
ARROW_ASSIGN_OR_RAISE(const time_zone* tz, LocateZone(timezone));
200-
ARROW_ASSIGN_OR_RAISE(std::locale locale, GetLocale("C"));
199+
ARROW_ASSIGN_OR_RAISE(auto tz, LocateZone(timezone));
200+
ARROW_ASSIGN_OR_RAISE(auto locale, GetLocale("C"));
201201
TimestampFormatter<Duration> formatter{
202202
timezone == "UTC" ? kUtcFormatString : kFormatString, tz, locale};
203203
return VisitArraySpanInline<TimestampType>(

cpp/src/arrow/compute/kernels/scalar_temporal_binary.cc

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,17 +44,14 @@ using arrow_vendored::date::floor;
4444
using arrow_vendored::date::hh_mm_ss;
4545
using arrow_vendored::date::local_days;
4646
using arrow_vendored::date::local_time;
47-
using arrow_vendored::date::locate_zone;
4847
using arrow_vendored::date::sys_days;
4948
using arrow_vendored::date::sys_time;
50-
using arrow_vendored::date::time_zone;
5149
using arrow_vendored::date::trunc;
5250
using arrow_vendored::date::weekday;
5351
using arrow_vendored::date::weeks;
5452
using arrow_vendored::date::year_month_day;
5553
using arrow_vendored::date::year_month_weekday;
5654
using arrow_vendored::date::years;
57-
using arrow_vendored::date::zoned_time;
5855
using arrow_vendored::date::literals::dec;
5956
using arrow_vendored::date::literals::jan;
6057
using arrow_vendored::date::literals::last;

cpp/src/arrow/compute/kernels/scalar_temporal_test.cc

Lines changed: 171 additions & 89 deletions
Large diffs are not rendered by default.

cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,7 @@ namespace arrow {
3636
using internal::checked_cast;
3737
using internal::checked_pointer_cast;
3838

39-
namespace compute {
40-
namespace internal {
39+
namespace compute::internal {
4140

4241
namespace {
4342

@@ -60,7 +59,6 @@ using arrow_vendored::date::year;
6059
using arrow_vendored::date::year_month_day;
6160
using arrow_vendored::date::year_month_weekday;
6261
using arrow_vendored::date::years;
63-
using arrow_vendored::date::zoned_time;
6462
using arrow_vendored::date::literals::dec;
6563
using arrow_vendored::date::literals::jan;
6664
using arrow_vendored::date::literals::last;
@@ -664,15 +662,19 @@ struct Nanosecond {
664662

665663
template <typename Duration>
666664
struct IsDaylightSavings {
667-
explicit IsDaylightSavings(const FunctionOptions* options, const time_zone* tz)
665+
explicit IsDaylightSavings(const FunctionOptions* options, const ArrowTimeZone tz)
668666
: tz_(tz) {}
669667

670668
template <typename T, typename Arg0>
671669
T Call(KernelContext*, Arg0 arg, Status*) const {
672-
return tz_->get_info(sys_time<Duration>{Duration{arg}}).save.count() != 0;
670+
return std::visit(
671+
[&arg](const auto& tz) -> bool {
672+
return tz->get_info(sys_time<Duration>{Duration{arg}}).save.count() != 0;
673+
},
674+
tz_);
673675
}
674676

675-
const time_zone* tz_;
677+
const ArrowTimeZone tz_;
676678
};
677679

678680
// ----------------------------------------------------------------------
@@ -1166,7 +1168,7 @@ Result<std::locale> GetLocale(const std::string& locale) {
11661168
template <typename Duration, typename InType>
11671169
struct Strftime {
11681170
const StrftimeOptions& options;
1169-
const time_zone* tz;
1171+
const ArrowTimeZone tz;
11701172
const std::locale locale;
11711173

11721174
static Result<Strftime> Make(KernelContext* ctx, const DataType& type) {
@@ -1187,9 +1189,7 @@ struct Strftime {
11871189
options.format);
11881190
}
11891191
}
1190-
1191-
ARROW_ASSIGN_OR_RAISE(const time_zone* tz,
1192-
LocateZone(timezone.empty() ? "UTC" : timezone));
1192+
ARROW_ASSIGN_OR_RAISE(auto tz, LocateZone(timezone.empty() ? "UTC" : timezone));
11931193

11941194
ARROW_ASSIGN_OR_RAISE(std::locale locale, GetLocale(options.locale));
11951195

@@ -1354,31 +1354,31 @@ Result<TypeHolder> ResolveLocalTimestampOutput(KernelContext* ctx,
13541354

13551355
template <typename Duration>
13561356
struct AssumeTimezone {
1357-
explicit AssumeTimezone(const AssumeTimezoneOptions* options, const time_zone* tz)
1357+
explicit AssumeTimezone(const AssumeTimezoneOptions* options, const ArrowTimeZone tz)
13581358
: options(*options), tz_(tz) {}
13591359

13601360
template <typename T, typename Arg0>
1361-
T get_local_time(Arg0 arg, const time_zone* tz) const {
1362-
return static_cast<T>(zoned_time<Duration>(tz, local_time<Duration>(Duration{arg}))
1363-
.get_sys_time()
1364-
.time_since_epoch()
1365-
.count());
1361+
T get_local_time(Arg0 arg, const ArrowTimeZone* tz) const {
1362+
const auto lt = local_time<Duration>(Duration{arg});
1363+
auto local_to_sys_time = [&](auto&& t) {
1364+
return t.get_sys_time().time_since_epoch().count();
1365+
};
1366+
return ApplyTimeZone(tz_, lt, std::nullopt, local_to_sys_time);
13661367
}
13671368

13681369
template <typename T, typename Arg0>
1369-
T get_local_time(Arg0 arg, const arrow_vendored::date::choose choose,
1370-
const time_zone* tz) const {
1371-
return static_cast<T>(
1372-
zoned_time<Duration>(tz, local_time<Duration>(Duration{arg}), choose)
1373-
.get_sys_time()
1374-
.time_since_epoch()
1375-
.count());
1370+
T get_local_time(Arg0 arg, const choose c, const ArrowTimeZone* tz) const {
1371+
const auto lt = local_time<Duration>(Duration{arg});
1372+
auto local_to_sys_time = [&](auto&& t) {
1373+
return t.get_sys_time().time_since_epoch().count();
1374+
};
1375+
return ApplyTimeZone(tz_, lt, c, local_to_sys_time);
13761376
}
13771377

13781378
template <typename T, typename Arg0>
13791379
T Call(KernelContext*, Arg0 arg, Status* st) const {
13801380
try {
1381-
return get_local_time<T, Arg0>(arg, tz_);
1381+
return get_local_time<T, Arg0>(arg, &tz_);
13821382
} catch (const arrow_vendored::date::nonexistent_local_time& e) {
13831383
switch (options.nonexistent) {
13841384
case AssumeTimezoneOptions::Nonexistent::NONEXISTENT_RAISE: {
@@ -1387,11 +1387,12 @@ struct AssumeTimezone {
13871387
return arg;
13881388
}
13891389
case AssumeTimezoneOptions::Nonexistent::NONEXISTENT_EARLIEST: {
1390-
return get_local_time<T, Arg0>(arg, arrow_vendored::date::choose::latest, tz_) -
1390+
return get_local_time<T, Arg0>(arg, arrow_vendored::date::choose::latest,
1391+
&tz_) -
13911392
1;
13921393
}
13931394
case AssumeTimezoneOptions::Nonexistent::NONEXISTENT_LATEST: {
1394-
return get_local_time<T, Arg0>(arg, arrow_vendored::date::choose::latest, tz_);
1395+
return get_local_time<T, Arg0>(arg, arrow_vendored::date::choose::latest, &tz_);
13951396
}
13961397
}
13971398
} catch (const arrow_vendored::date::ambiguous_local_time& e) {
@@ -1403,17 +1404,17 @@ struct AssumeTimezone {
14031404
}
14041405
case AssumeTimezoneOptions::Ambiguous::AMBIGUOUS_EARLIEST: {
14051406
return get_local_time<T, Arg0>(arg, arrow_vendored::date::choose::earliest,
1406-
tz_);
1407+
&tz_);
14071408
}
14081409
case AssumeTimezoneOptions::Ambiguous::AMBIGUOUS_LATEST: {
1409-
return get_local_time<T, Arg0>(arg, arrow_vendored::date::choose::latest, tz_);
1410+
return get_local_time<T, Arg0>(arg, arrow_vendored::date::choose::latest, &tz_);
14101411
}
14111412
}
14121413
}
14131414
return 0;
14141415
}
14151416
AssumeTimezoneOptions options;
1416-
const time_zone* tz_;
1417+
const ArrowTimeZone tz_;
14171418
};
14181419

14191420
// ----------------------------------------------------------------------
@@ -2035,6 +2036,5 @@ void RegisterScalarTemporalUnary(FunctionRegistry* registry) {
20352036
DCHECK_OK(registry->AddFunction(std::move(round_temporal)));
20362037
}
20372038

2038-
} // namespace internal
2039-
} // namespace compute
2039+
} // namespace compute::internal
20402040
} // namespace arrow
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#include "arrow/compute/kernels/temporal_internal.h"
19+
20+
namespace arrow::compute::internal {
21+
22+
Result<ArrowTimeZone> LocateZone(const std::string_view timezone) {
23+
if (timezone[0] == '+' || timezone[0] == '-') {
24+
// Valid offset strings have to have 4 digits and a sign prefix.
25+
// Valid examples: +01:23 and -0123.
26+
// Invalid examples: 1:23, 123, 0123, 01:23, +25:00, -12:34:45, +090000.
27+
auto offset = std::string(timezone.substr(1));
28+
std::chrono::minutes zone_offset;
29+
switch (timezone.length()) {
30+
case 6:
31+
if (arrow::internal::detail::ParseHH_MM(offset.c_str(), &zone_offset)) {
32+
break;
33+
}
34+
[[fallthrough]];
35+
case 5:
36+
if (arrow::internal::detail::ParseHHMM(offset.c_str(), &zone_offset)) {
37+
break;
38+
}
39+
[[fallthrough]];
40+
default:
41+
return Status::Invalid("Cannot locate or parse timezone '", timezone, "'");
42+
}
43+
zone_offset = timezone[0] == '-' ? -zone_offset : zone_offset;
44+
return OffsetZone(zone_offset);
45+
}
46+
47+
try {
48+
return locate_zone(timezone);
49+
} catch (const std::runtime_error& ex) {
50+
return Status::Invalid("Cannot locate or parse timezone '", timezone,
51+
"': ", ex.what());
52+
}
53+
}
54+
55+
} // namespace arrow::compute::internal

0 commit comments

Comments
 (0)