Skip to content

Commit 373eb49

Browse files
committed
fix pre-1970 timestamp roundtrip
1 parent 52d258b commit 373eb49

File tree

2 files changed

+25
-11
lines changed

2 files changed

+25
-11
lines changed

cpp/src/parquet/arrow/arrow_reader_writer_test.cc

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
#include "parquet/page_index.h"
7575
#include "parquet/properties.h"
7676
#include "parquet/test_util.h"
77+
#include "parquet/types.h"
7778

7879
using arrow::Array;
7980
using arrow::ArrayData;
@@ -4149,14 +4150,26 @@ INSTANTIATE_TEST_SUITE_P(Repetition_type, TestNestedSchemaRead,
41494150
::testing::Values(Repetition::REQUIRED, Repetition::OPTIONAL));
41504151

41514152
TEST(TestImpalaConversion, ArrowTimestampToImpalaTimestamp) {
4152-
// June 20, 2017 16:32:56 and 123456789 nanoseconds
4153-
int64_t nanoseconds = INT64_C(1497976376123456789);
4153+
{
4154+
// June 20, 2017 16:32:56 and 123456789 nanoseconds
4155+
int64_t timestamp = INT64_C(1497976376123456789);
4156+
Int96 impala_timestamp = {{UINT32_C(632093973), UINT32_C(13871), UINT32_C(2457925)}};
4157+
ASSERT_EQ(timestamp, ::parquet::Int96GetNanoSeconds(impala_timestamp));
41544158

4155-
Int96 calculated;
4159+
Int96 calculated;
4160+
::parquet::internal::NanosecondsToImpalaTimestamp(timestamp, &calculated);
4161+
ASSERT_EQ(impala_timestamp, calculated);
4162+
}
4163+
{
4164+
// January 1, 1970 07:59:59 and 999999000 nanoseconds
4165+
int64_t timestamp = INT64_C(-1000);
4166+
Int96 impala_timestamp = {{UINT32_C(2437872664), UINT32_C(20116), UINT32_C(2440587)}};
4167+
ASSERT_EQ(timestamp, ::parquet::Int96GetNanoSeconds(impala_timestamp));
41564168

4157-
Int96 expected = {{UINT32_C(632093973), UINT32_C(13871), UINT32_C(2457925)}};
4158-
::parquet::internal::NanosecondsToImpalaTimestamp(nanoseconds, &calculated);
4159-
ASSERT_EQ(expected, calculated);
4169+
Int96 calculated;
4170+
::parquet::internal::NanosecondsToImpalaTimestamp(timestamp, &calculated);
4171+
ASSERT_EQ(impala_timestamp, calculated);
4172+
}
41604173
}
41614174

41624175
void TryReadDataFile(const std::string& path,

cpp/src/parquet/column_writer.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -259,14 +259,15 @@ constexpr int64_t kJulianEpochOffsetDays = INT64_C(2440588);
259259

260260
template <int64_t UnitPerDay, int64_t NanosecondsPerUnit>
261261
inline void ArrowTimestampToImpalaTimestamp(const int64_t time, Int96* impala_timestamp) {
262-
int64_t julian_days = (time / UnitPerDay) + kJulianEpochOffsetDays;
263-
(*impala_timestamp).value[2] = (uint32_t)julian_days;
262+
int32_t julian_days = static_cast<int32_t>(time / UnitPerDay + kJulianEpochOffsetDays) +
263+
(time < 0 ? -1 : 0);
264+
impala_timestamp->value[2] = static_cast<uint32_t>(julian_days);
264265

265-
int64_t last_day_units = time % UnitPerDay;
266-
auto last_day_nanos = last_day_units * NanosecondsPerUnit;
266+
uint64_t last_day_units = time % UnitPerDay + (time < 0 ? UnitPerDay : 0);
267+
uint64_t last_day_nanos = last_day_units * NanosecondsPerUnit;
267268
// impala_timestamp will be unaligned every other entry so do memcpy instead
268269
// of assign and reinterpret cast to avoid undefined behavior.
269-
std::memcpy(impala_timestamp, &last_day_nanos, sizeof(int64_t));
270+
std::memcpy(impala_timestamp, &last_day_nanos, sizeof(uint64_t));
270271
}
271272

272273
constexpr int64_t kSecondsInNanos = INT64_C(1000000000);

0 commit comments

Comments
 (0)