Skip to content

Commit 27103d9

Browse files
committed
GH-47848: [C++] Add TimeUnit::PICO
This enum value is unused for now, but I plan to introduce a timestamp128 type in the future which supports this.
1 parent 2289f31 commit 27103d9

File tree

17 files changed

+209
-21
lines changed

17 files changed

+209
-21
lines changed

cpp/src/arrow/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,7 @@ set(ARROW_UTIL_SRCS
509509
util/future.cc
510510
util/hashing.cc
511511
util/int_util.cc
512+
util/int128_internal.cc
512513
util/io_util.cc
513514
util/list_util.cc
514515
util/logger.cc

cpp/src/arrow/array/diff.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#include "arrow/util/bit_util.h"
4545
#include "arrow/util/checked_cast.h"
4646
#include "arrow/util/float16.h"
47+
#include "arrow/util/int128_internal.h"
4748
#include "arrow/util/logging_internal.h"
4849
#include "arrow/util/range.h"
4950
#include "arrow/util/ree_util.h"
@@ -57,6 +58,7 @@ namespace arrow {
5758
using internal::checked_cast;
5859
using internal::checked_pointer_cast;
5960
using internal::MakeLazyRange;
61+
using internal::int128_t;
6062

6163
namespace {
6264

@@ -855,6 +857,7 @@ class MakeFormatterImpl {
855857
// Using unqualified `format` directly would produce ambiguous
856858
// lookup because of `std::format` (ARROW-15520).
857859
namespace avd = arrow_vendored::date;
860+
using picoseconds = std::chrono::duration<int128_t, std::pico>;
858861
using std::chrono::nanoseconds;
859862
using std::chrono::microseconds;
860863
using std::chrono::milliseconds;
@@ -863,6 +866,9 @@ class MakeFormatterImpl {
863866
static avd::sys_days epoch{avd::jan / 1 / 1970};
864867

865868
switch (unit) {
869+
case TimeUnit::PICO:
870+
*os << avd::format(fmt, static_cast<picoseconds>(value) + epoch);
871+
break;
866872
case TimeUnit::NANO:
867873
*os << avd::format(fmt, static_cast<nanoseconds>(value) + epoch);
868874
break;
@@ -879,6 +885,9 @@ class MakeFormatterImpl {
879885
return;
880886
}
881887
switch (unit) {
888+
case TimeUnit::PICO:
889+
*os << avd::format(fmt, static_cast<picoseconds>(value));
890+
break;
882891
case TimeUnit::NANO:
883892
*os << avd::format(fmt, static_cast<nanoseconds>(value));
884893
break;

cpp/src/arrow/c/bridge.cc

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1221,21 +1221,28 @@ struct SchemaImporter {
12211221
ARROW_ASSIGN_OR_RAISE(auto unit, f_parser_.ParseTimeUnit());
12221222
if (unit == TimeUnit::SECOND || unit == TimeUnit::MILLI) {
12231223
return ProcessPrimitive(time32(unit));
1224-
} else {
1224+
} else if (unit != TimeUnit::PICO) {
12251225
return ProcessPrimitive(time64(unit));
12261226
}
1227+
return f_parser_.Invalid();
12271228
}
12281229

12291230
Status ProcessDuration() {
12301231
ARROW_ASSIGN_OR_RAISE(auto unit, f_parser_.ParseTimeUnit());
1231-
return ProcessPrimitive(duration(unit));
1232+
if (unit != TimeUnit::PICO) {
1233+
return ProcessPrimitive(duration(unit));
1234+
}
1235+
return f_parser_.Invalid();
12321236
}
12331237

12341238
Status ProcessTimestamp() {
12351239
ARROW_ASSIGN_OR_RAISE(auto unit, f_parser_.ParseTimeUnit());
12361240
RETURN_NOT_OK(f_parser_.CheckNext(':'));
1237-
type_ = timestamp(unit, std::string(f_parser_.Rest()));
1238-
return Status::OK();
1241+
if (unit != TimeUnit::PICO) {
1242+
type_ = timestamp(unit, std::string(f_parser_.Rest()));
1243+
return Status::OK();
1244+
}
1245+
return f_parser_.Invalid();
12391246
}
12401247

12411248
Status ProcessFixedSizeBinary() {

cpp/src/arrow/compute/api_scalar.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ struct EnumTraits<compute::JoinOptions::NullHandlingBehavior>
5858
template <>
5959
struct EnumTraits<TimeUnit::type>
6060
: BasicEnumTraits<TimeUnit::type, TimeUnit::type::SECOND, TimeUnit::type::MILLI,
61-
TimeUnit::type::MICRO, TimeUnit::type::NANO> {
61+
TimeUnit::type::MICRO, TimeUnit::type::NANO, TimeUnit::type::PICO> {
6262
static std::string name() { return "TimeUnit::type"; }
6363
static std::string value_name(TimeUnit::type value) {
6464
switch (value) {
@@ -70,6 +70,8 @@ struct EnumTraits<TimeUnit::type>
7070
return "MICRO";
7171
case TimeUnit::type::NANO:
7272
return "NANO";
73+
case TimeUnit::type::PICO:
74+
return "PICO";
7375
}
7476
return "<INVALID>";
7577
}

cpp/src/arrow/compute/kernels/codegen_internal.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,9 @@ TypeHolder CommonTemporal(const TypeHolder* begin, size_t count) {
342342
case TimeUnit::MICRO:
343343
case TimeUnit::NANO:
344344
return time64(finest_unit);
345+
case TimeUnit::PICO:
346+
// Type not implemented.
347+
return TypeHolder(nullptr);
345348
}
346349
}
347350
return TypeHolder(nullptr);

cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,8 @@ Status ExtractTemporal(KernelContext* ctx, const ExecSpan& batch, ExecResult* ou
140140
case TimeUnit::NANO:
141141
return TemporalComponentExtract<Op, std::chrono::nanoseconds, TimestampType,
142142
OutType, Args...>::Exec(ctx, batch, out, args...);
143+
case TimeUnit::PICO:
144+
return Status::Invalid("Picoseconds not yet supported: ", ty);
143145
}
144146
return Status::Invalid("Unknown timestamp unit: ", ty);
145147
}

cpp/src/arrow/compute/kernels/scalar_cast_test.cc

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2396,7 +2396,8 @@ TEST(Cast, TimestampToDate) {
23962396
CheckCast(timestamps, date_64);
23972397
CheckCast(timestamps_extreme, date_32_extreme);
23982398
CheckCast(timestamps_extreme, date_64_extreme);
2399-
for (auto u : TimeUnit::values()) {
2399+
auto units = {TimeUnit::SECOND, TimeUnit::MILLI, TimeUnit::MICRO, TimeUnit::NANO};
2400+
for (auto u : units) {
24002401
auto unit = timestamp(u);
24012402
CheckCast(ArrayFromJSON(unit, kTimestampSecondsJson), date_32);
24022403
CheckCast(ArrayFromJSON(unit, kTimestampSecondsJson), date_64);
@@ -2438,15 +2439,16 @@ TEST_F(CastTimezone, ZonedTimestampToDate) {
24382439
1230422400000, 1230508800000, 1325376000000, null
24392440
])");
24402441

2441-
for (auto u : TimeUnit::values()) {
2442+
auto units = {TimeUnit::SECOND, TimeUnit::MILLI, TimeUnit::MICRO, TimeUnit::NANO};
2443+
for (auto u : units) {
24422444
auto timestamps =
24432445
ArrayFromJSON(timestamp(u, "Australia/Broken_Hill"), kTimestampSecondsJson);
24442446
CheckCast(timestamps, date_32);
24452447
CheckCast(timestamps, date_64);
24462448
}
24472449

24482450
// Invalid timezone
2449-
for (auto u : TimeUnit::values()) {
2451+
for (auto u : units) {
24502452
auto timestamps =
24512453
ArrayFromJSON(timestamp(u, "Mars/Mariner_Valley"), kTimestampSecondsJson);
24522454
CheckCastFails(timestamps, CastOptions::Unsafe(date32()));
@@ -2583,7 +2585,8 @@ TEST(Cast, TimestampToTime) {
25832585
CheckCast(timestamps_s, times_ms);
25842586

25852587
// Invalid timezone
2586-
for (auto u : TimeUnit::values()) {
2588+
auto units = {TimeUnit::SECOND, TimeUnit::MILLI, TimeUnit::MICRO, TimeUnit::NANO};
2589+
for (auto u : units) {
25872590
auto timestamps =
25882591
ArrayFromJSON(timestamp(u, "Mars/Mariner_Valley"), kTimestampSecondsJson);
25892592
if (u == TimeUnit::SECOND || u == TimeUnit::MILLI) {

cpp/src/arrow/compute/kernels/scalar_if_else_test.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,10 @@ TEST_F(TestIfElseKernel, IfElseMultiType) {
421421
}
422422

423423
TEST_F(TestIfElseKernel, TimestampTypes) {
424-
for (const auto unit : TimeUnit::values()) {
424+
for (const auto unit : {
425+
TimeUnit::SECOND,
426+
TimeUnit::NANO,
427+
}) {
425428
auto ty = timestamp(unit);
426429
CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[true, true, true, false]"),
427430
ArrayFromJSON(ty, "[1, 2, 3, 4]"),

cpp/src/arrow/compute/kernels/scalar_temporal_test.cc

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -608,7 +608,8 @@ TEST_F(ScalarTemporalTest, TestTemporalComponentExtractionAllTemporalTypes) {
608608
}
609609

610610
TEST_F(ScalarTemporalTest, TestTemporalComponentExtractionWithDifferentUnits) {
611-
for (auto u : TimeUnit::values()) {
611+
auto units = {TimeUnit::SECOND, TimeUnit::MILLI, TimeUnit::MICRO, TimeUnit::NANO};
612+
for (auto u : units) {
612613
auto unit = timestamp(u);
613614
CheckScalarUnary("year", unit, times_seconds_precision, int64(), year);
614615
CheckScalarUnary("is_leap_year", unit, times_seconds_precision, boolean(),
@@ -814,7 +815,8 @@ TEST_F(ScalarTemporalTest, TestZoned1) {
814815
}
815816

816817
TEST_F(ScalarTemporalTest, TestZoned2) {
817-
for (auto u : TimeUnit::values()) {
818+
auto units = {TimeUnit::SECOND, TimeUnit::MILLI, TimeUnit::MICRO, TimeUnit::NANO};
819+
for (auto u : units) {
818820
auto unit = timestamp(u, "Australia/Broken_Hill");
819821
auto month = "[1, 3, 1, 5, 1, 12, 12, 12, 1, 1, 1, 1, 12, 12, 12, 1, null]";
820822
auto day = "[1, 1, 1, 18, 1, 31, 30, 31, 1, 3, 4, 1, 31, 28, 29, 1, null]";
@@ -906,8 +908,9 @@ TEST_F(ScalarTemporalTest, TestNonexistentTimezone) {
906908
auto nonexistent_timezones = {
907909
"Mars/Mariner_Valley", "+25:00", "-25:00", "15:00", "5:00", "500",
908910
"+05:00:00", "+050000"};
911+
auto units = {TimeUnit::SECOND, TimeUnit::MILLI, TimeUnit::MICRO, TimeUnit::NANO};
909912
for (auto timezone : nonexistent_timezones) {
910-
for (auto u : TimeUnit::values()) {
913+
for (auto u : units) {
911914
auto ts_type = timestamp(u, timezone);
912915
auto timestamp_array = std::make_shared<NumericArray<TimestampType>>(
913916
ts_type, 2, data_buffer, null_buffer, 0);
@@ -1026,7 +1029,8 @@ TEST_F(ScalarTemporalTest, DayOfWeek) {
10261029
}
10271030

10281031
TEST_F(ScalarTemporalTest, TestTemporalDifference) {
1029-
for (auto u : TimeUnit::values()) {
1032+
auto units = {TimeUnit::SECOND, TimeUnit::MILLI, TimeUnit::MICRO, TimeUnit::NANO};
1033+
for (auto u : units) {
10301034
auto unit = timestamp(u);
10311035
auto arr1 = ArrayFromJSON(unit, times_seconds_precision);
10321036
auto arr2 = ArrayFromJSON(unit, times_seconds_precision2);
@@ -1879,8 +1883,9 @@ TEST_F(ScalarTemporalTest, TestLocalTimestamp) {
18791883
"2009-12-30 18:50:20", "2009-12-31 19:55:25", "2010-01-02 21:00:30",
18801884
"2010-01-03 22:05:35", "2005-12-31 23:10:40", "2005-12-31 00:15:45",
18811885
"2008-12-27 14:30:00", "2008-12-28 14:30:00", "2011-12-31 15:32:03", null])";
1886+
auto units = {TimeUnit::SECOND, TimeUnit::MILLI, TimeUnit::MICRO, TimeUnit::NANO};
18821887

1883-
for (auto u : TimeUnit::values()) {
1888+
for (auto u : units) {
18841889
CheckScalarUnary("local_timestamp", timestamp(u), times_seconds_precision,
18851890
timestamp(u), times_seconds_precision);
18861891
CheckScalarUnary("local_timestamp", timestamp(u, "UTC"), times_seconds_precision,
@@ -1912,8 +1917,9 @@ TEST_F(ScalarTemporalTest, TestAssumeTimezone) {
19121917
auto options_tbilisi = AssumeTimezoneOptions(timezone_tbilisi);
19131918
auto options_tbilisi_offset = AssumeTimezoneOptions(timezone_tbilisi_offset);
19141919
auto options_invalid = AssumeTimezoneOptions("Europe/Brusselsss");
1920+
auto units = {TimeUnit::SECOND, TimeUnit::MILLI, TimeUnit::MICRO, TimeUnit::NANO};
19151921

1916-
for (auto u : TimeUnit::values()) {
1922+
for (auto u : units) {
19171923
auto unit = timestamp(u);
19181924
auto unit_utc = timestamp(u, timezone_utc);
19191925
auto unit_kolkata = timestamp(u, timezone_kolkata);
@@ -1956,8 +1962,9 @@ TEST_F(ScalarTemporalTest, TestAssumeTimezoneAmbiguous) {
19561962
AssumeTimezoneOptions(timezone, AssumeTimezoneOptions::AMBIGUOUS_LATEST);
19571963
auto options_raise =
19581964
AssumeTimezoneOptions(timezone, AssumeTimezoneOptions::AMBIGUOUS_RAISE);
1965+
auto units = {TimeUnit::SECOND, TimeUnit::MILLI, TimeUnit::MICRO, TimeUnit::NANO};
19591966

1960-
for (auto u : TimeUnit::values()) {
1967+
for (auto u : units) {
19611968
auto unit = timestamp(u);
19621969
auto unit_local = timestamp(u, timezone);
19631970
ASSERT_RAISES(Invalid, AssumeTimezone(ArrayFromJSON(unit, times), options_raise));
@@ -1989,8 +1996,9 @@ TEST_F(ScalarTemporalTest, TestAssumeTimezoneNonexistent) {
19891996
auto options_earliest =
19901997
AssumeTimezoneOptions(timezone, AssumeTimezoneOptions::AMBIGUOUS_RAISE,
19911998
AssumeTimezoneOptions::NONEXISTENT_EARLIEST);
1999+
auto units = {TimeUnit::SECOND, TimeUnit::MILLI, TimeUnit::MICRO, TimeUnit::NANO};
19922000

1993-
for (auto u : TimeUnit::values()) {
2001+
for (auto u : units) {
19942002
auto unit = timestamp(u);
19952003
auto unit_local = timestamp(u, timezone);
19962004
ASSERT_RAISES(Invalid, AssumeTimezone(ArrayFromJSON(unit, times), options_raise));
@@ -2245,7 +2253,8 @@ TEST_F(ScalarTemporalTest, StrftimeInvalidLocale) {
22452253
}
22462254

22472255
TEST_F(ScalarTemporalTest, TestTemporalDifferenceZoned) {
2248-
for (auto u : TimeUnit::values()) {
2256+
auto units = {TimeUnit::SECOND, TimeUnit::MILLI, TimeUnit::MICRO, TimeUnit::NANO};
2257+
for (auto u : units) {
22492258
auto unit = timestamp(u, "Pacific/Marquesas");
22502259
auto arr1 = ArrayFromJSON(unit, times_seconds_precision);
22512260
auto arr2 = ArrayFromJSON(unit, times_seconds_precision2);

cpp/src/arrow/type.cc

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,8 @@ std::string ToString(TimeUnit::type unit) {
249249
return "us";
250250
case TimeUnit::NANO:
251251
return "ns";
252+
case TimeUnit::PICO:
253+
return "ps";
252254
default:
253255
DCHECK(false);
254256
return "";
@@ -1190,6 +1192,9 @@ std::ostream& operator<<(std::ostream& os, TimeUnit::type unit) {
11901192
case TimeUnit::NANO:
11911193
os << "ns";
11921194
break;
1195+
case TimeUnit::PICO:
1196+
os << "ps";
1197+
break;
11931198
}
11941199
return os;
11951200
}
@@ -2767,6 +2772,8 @@ static char TimeUnitFingerprint(TimeUnit::type unit) {
27672772
return 'u';
27682773
case TimeUnit::NANO:
27692774
return 'n';
2775+
case TimeUnit::PICO:
2776+
return 'p';
27702777
default:
27712778
DCHECK(false) << "Unexpected TimeUnit";
27722779
return '\0';
@@ -3542,7 +3549,8 @@ const std::vector<std::shared_ptr<DataType>>& PrimitiveTypes() {
35423549

35433550
const std::vector<TimeUnit::type>& TimeUnit::values() {
35443551
static std::vector<TimeUnit::type> units = {TimeUnit::SECOND, TimeUnit::MILLI,
3545-
TimeUnit::MICRO, TimeUnit::NANO};
3552+
TimeUnit::MICRO, TimeUnit::NANO,
3553+
TimeUnit::PICO};
35463554
return units;
35473555
}
35483556

0 commit comments

Comments
 (0)