Skip to content
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,7 @@ set(ARROW_COMPUTE_SRCS
compute/kernels/scalar_cast_numeric.cc
compute/kernels/scalar_cast_string.cc
compute/kernels/scalar_cast_temporal.cc
compute/kernels/temporal_internal.cc
compute/kernels/vector_hash.cc
compute/kernels/vector_selection.cc
compute/kernels/vector_selection_filter_internal.cc
Expand Down Expand Up @@ -779,6 +780,7 @@ if(ARROW_COMPUTE)
compute/kernels/scalar_temporal_binary.cc
compute/kernels/scalar_temporal_unary.cc
compute/kernels/scalar_validity.cc
compute/kernels/temporal_internal.cc
compute/kernels/util_internal.cc
compute/kernels/vector_array_sort.cc
compute/kernels/vector_cumulative_ops.cc
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/compute/kernels/scalar_cast_string.cc
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,8 @@ struct TemporalToStringCastFunctor<O, TimestampType> {
static const std::string kFormatString = "%Y-%m-%d %H:%M:%S%z";
static const std::string kUtcFormatString = "%Y-%m-%d %H:%M:%SZ";
DCHECK(!timezone.empty());
ARROW_ASSIGN_OR_RAISE(const time_zone* tz, LocateZone(timezone));
ARROW_ASSIGN_OR_RAISE(std::locale locale, GetLocale("C"));
ARROW_ASSIGN_OR_RAISE(auto tz, LocateZone(timezone));
ARROW_ASSIGN_OR_RAISE(auto locale, GetLocale("C"));
TimestampFormatter<Duration> formatter{
timezone == "UTC" ? kUtcFormatString : kFormatString, tz, locale};
return VisitArraySpanInline<TimestampType>(
Expand Down
3 changes: 0 additions & 3 deletions cpp/src/arrow/compute/kernels/scalar_temporal_binary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,14 @@ using arrow_vendored::date::floor;
using arrow_vendored::date::hh_mm_ss;
using arrow_vendored::date::local_days;
using arrow_vendored::date::local_time;
using arrow_vendored::date::locate_zone;
using arrow_vendored::date::sys_days;
using arrow_vendored::date::sys_time;
using arrow_vendored::date::time_zone;
using arrow_vendored::date::trunc;
using arrow_vendored::date::weekday;
using arrow_vendored::date::weeks;
using arrow_vendored::date::year_month_day;
using arrow_vendored::date::year_month_weekday;
using arrow_vendored::date::years;
using arrow_vendored::date::zoned_time;
using arrow_vendored::date::literals::dec;
using arrow_vendored::date::literals::jan;
using arrow_vendored::date::literals::last;
Expand Down
260 changes: 171 additions & 89 deletions cpp/src/arrow/compute/kernels/scalar_temporal_test.cc

Large diffs are not rendered by default.

62 changes: 31 additions & 31 deletions cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@ namespace arrow {
using internal::checked_cast;
using internal::checked_pointer_cast;

namespace compute {
namespace internal {
namespace compute::internal {

namespace {

Expand All @@ -60,7 +59,6 @@ using arrow_vendored::date::year;
using arrow_vendored::date::year_month_day;
using arrow_vendored::date::year_month_weekday;
using arrow_vendored::date::years;
using arrow_vendored::date::zoned_time;
using arrow_vendored::date::literals::dec;
using arrow_vendored::date::literals::jan;
using arrow_vendored::date::literals::last;
Expand Down Expand Up @@ -664,15 +662,19 @@ struct Nanosecond {

template <typename Duration>
struct IsDaylightSavings {
explicit IsDaylightSavings(const FunctionOptions* options, const time_zone* tz)
explicit IsDaylightSavings(const FunctionOptions* options, const ArrowTimeZone tz)
: tz_(tz) {}

template <typename T, typename Arg0>
T Call(KernelContext*, Arg0 arg, Status*) const {
return tz_->get_info(sys_time<Duration>{Duration{arg}}).save.count() != 0;
return std::visit(
[&arg](const auto& tz) -> bool {
return tz->get_info(sys_time<Duration>{Duration{arg}}).save.count() != 0;
},
tz_);
}

const time_zone* tz_;
const ArrowTimeZone tz_;
};

// ----------------------------------------------------------------------
Expand Down Expand Up @@ -1166,7 +1168,7 @@ Result<std::locale> GetLocale(const std::string& locale) {
template <typename Duration, typename InType>
struct Strftime {
const StrftimeOptions& options;
const time_zone* tz;
const ArrowTimeZone tz;
const std::locale locale;

static Result<Strftime> Make(KernelContext* ctx, const DataType& type) {
Expand All @@ -1187,9 +1189,7 @@ struct Strftime {
options.format);
}
}

ARROW_ASSIGN_OR_RAISE(const time_zone* tz,
LocateZone(timezone.empty() ? "UTC" : timezone));
ARROW_ASSIGN_OR_RAISE(auto tz, LocateZone(timezone.empty() ? "UTC" : timezone));

ARROW_ASSIGN_OR_RAISE(std::locale locale, GetLocale(options.locale));

Expand Down Expand Up @@ -1354,31 +1354,31 @@ Result<TypeHolder> ResolveLocalTimestampOutput(KernelContext* ctx,

template <typename Duration>
struct AssumeTimezone {
explicit AssumeTimezone(const AssumeTimezoneOptions* options, const time_zone* tz)
explicit AssumeTimezone(const AssumeTimezoneOptions* options, const ArrowTimeZone tz)
: options(*options), tz_(tz) {}

template <typename T, typename Arg0>
T get_local_time(Arg0 arg, const time_zone* tz) const {
return static_cast<T>(zoned_time<Duration>(tz, local_time<Duration>(Duration{arg}))
.get_sys_time()
.time_since_epoch()
.count());
T get_local_time(Arg0 arg, const ArrowTimeZone* tz) const {
const auto lt = local_time<Duration>(Duration{arg});
auto local_to_sys_time = [&](auto&& t) {
return t.get_sys_time().time_since_epoch().count();
};
return ApplyTimeZone(tz_, lt, std::nullopt, local_to_sys_time);
}

template <typename T, typename Arg0>
T get_local_time(Arg0 arg, const arrow_vendored::date::choose choose,
const time_zone* tz) const {
return static_cast<T>(
zoned_time<Duration>(tz, local_time<Duration>(Duration{arg}), choose)
.get_sys_time()
.time_since_epoch()
.count());
T get_local_time(Arg0 arg, const choose c, const ArrowTimeZone* tz) const {
const auto lt = local_time<Duration>(Duration{arg});
auto local_to_sys_time = [&](auto&& t) {
return t.get_sys_time().time_since_epoch().count();
};
return ApplyTimeZone(tz_, lt, c, local_to_sys_time);
}

template <typename T, typename Arg0>
T Call(KernelContext*, Arg0 arg, Status* st) const {
try {
return get_local_time<T, Arg0>(arg, tz_);
return get_local_time<T, Arg0>(arg, &tz_);
} catch (const arrow_vendored::date::nonexistent_local_time& e) {
switch (options.nonexistent) {
case AssumeTimezoneOptions::Nonexistent::NONEXISTENT_RAISE: {
Expand All @@ -1387,11 +1387,12 @@ struct AssumeTimezone {
return arg;
}
case AssumeTimezoneOptions::Nonexistent::NONEXISTENT_EARLIEST: {
return get_local_time<T, Arg0>(arg, arrow_vendored::date::choose::latest, tz_) -
return get_local_time<T, Arg0>(arg, arrow_vendored::date::choose::latest,
&tz_) -
1;
}
case AssumeTimezoneOptions::Nonexistent::NONEXISTENT_LATEST: {
return get_local_time<T, Arg0>(arg, arrow_vendored::date::choose::latest, tz_);
return get_local_time<T, Arg0>(arg, arrow_vendored::date::choose::latest, &tz_);
}
}
} catch (const arrow_vendored::date::ambiguous_local_time& e) {
Expand All @@ -1403,17 +1404,17 @@ struct AssumeTimezone {
}
case AssumeTimezoneOptions::Ambiguous::AMBIGUOUS_EARLIEST: {
return get_local_time<T, Arg0>(arg, arrow_vendored::date::choose::earliest,
tz_);
&tz_);
}
case AssumeTimezoneOptions::Ambiguous::AMBIGUOUS_LATEST: {
return get_local_time<T, Arg0>(arg, arrow_vendored::date::choose::latest, tz_);
return get_local_time<T, Arg0>(arg, arrow_vendored::date::choose::latest, &tz_);
}
}
}
return 0;
}
AssumeTimezoneOptions options;
const time_zone* tz_;
const ArrowTimeZone tz_;
};

// ----------------------------------------------------------------------
Expand Down Expand Up @@ -2035,6 +2036,5 @@ void RegisterScalarTemporalUnary(FunctionRegistry* registry) {
DCHECK_OK(registry->AddFunction(std::move(round_temporal)));
}

} // namespace internal
} // namespace compute
} // namespace compute::internal
} // namespace arrow
55 changes: 55 additions & 0 deletions cpp/src/arrow/compute/kernels/temporal_internal.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "arrow/compute/kernels/temporal_internal.h"

namespace arrow::compute::internal {

Result<ArrowTimeZone> LocateZone(const std::string_view timezone) {
if (timezone[0] == '+' || timezone[0] == '-') {
// Valid offset strings have to have 4 digits and a sign prefix.
// Valid examples: +01:23 and -0123.
// Invalid examples: 1:23, 123, 0123, 01:23, +25:00, -12:34:45, +090000.
auto offset = std::string(timezone.substr(1));
std::chrono::minutes zone_offset;
switch (timezone.length()) {
case 6:
if (arrow::internal::detail::ParseHH_MM(offset.c_str(), &zone_offset)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should probably update this to operate on string_view but that can be done later

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Opened #47732

break;
}
[[fallthrough]];
case 5:
if (arrow::internal::detail::ParseHHMM(offset.c_str(), &zone_offset)) {
break;
}
[[fallthrough]];
default:
return Status::Invalid("Cannot locate or parse timezone '", timezone, "'");
}
zone_offset = timezone[0] == '-' ? -zone_offset : zone_offset;
return OffsetZone(zone_offset);
}

try {
return locate_zone(timezone);
} catch (const std::runtime_error& ex) {
return Status::Invalid("Cannot locate or parse timezone '", timezone,
"': ", ex.what());
}
}

} // namespace arrow::compute::internal
Loading
Loading