Skip to content

Commit d8bea19

Browse files
committed
Extend time type
1 parent 492af4b commit d8bea19

File tree

13 files changed

+297
-95
lines changed

13 files changed

+297
-95
lines changed

velox/common/fuzzer/Utils.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ int32_t randDate(FuzzerGenerator& rng) {
120120
}
121121

122122
int32_t randTime(FuzzerGenerator& rng) {
123-
return rand<int64_t>(rng, TimeType::kMin, TimeType::kMax);
123+
return rand<int64_t>(rng, TIME()->getMin(), TIME()->getMax());
124124
}
125125

126126
/// Unicode character ranges. Ensure the vector indexes match the UTF8CharList

velox/docs/develop/types.rst

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,9 @@ upto 38 precision, with a range of :math:`[-10^{38} + 1, +10^{38} - 1]`.
131131
All the three values, precision, scale, unscaled value are required to represent a
132132
decimal value.
133133

134-
TIME type represents time in milliseconds from midnight UTC. Thus min/max value can range from UTC-14:00 at 00:00:00 to UTC+14:00 at 23:59:59.999 modulo 24 hours.
135-
TIME type is backed by BIGINT physical type.
134+
TIME type represents time in milliseconds from midnight UTC. Thus min/max value can range from UTC-14:00 at 00:00:00 to UTC+14:00 at 23:59:59.999 modulo 24 hours.
135+
TIME_MICRO type represents time in microseconds from midnight. Thus min/max value can range from 00:00:00.000000 to 23:59:59.999999 modulo 24 hours.
136+
TIME and TIME_MICRO types are backed by BIGINT physical type.
136137

137138
Custom Types
138139
~~~~~~~~~~~~
@@ -292,7 +293,7 @@ Presto. These differences require us to implement the same functions
292293
separately for each system in Velox, such as min, max and collect_set. The
293294
key differences are listed below.
294295

295-
* Spark operates on timestamps with "microsecond" precision while Presto with
296+
* Spark operates on TIMESTAMP and TIME types with "microsecond" precision while Presto with
296297
"millisecond" precision.
297298
Example::
298299

@@ -304,6 +305,8 @@ key differences are listed below.
304305
) AS t(ts);
305306
-- 2014-03-08 09:00:00.012345
306307

308+
SELECT cast(time'12:30:45.123456' as bigint); -- 45,045,123,456
309+
307310
* In function comparisons, nested null values are handled as values.
308311
Example::
309312

velox/expression/CastExpr.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ VectorPtr CastExpr::castFromTime(
237237
auto* resultFlatVector = castResult->as<FlatVector<StringView>>();
238238

239239
Buffer* buffer = resultFlatVector->getBufferWithSpace(
240-
rows.countSelected() * TimeType::kTimeToVarcharRowSize,
240+
rows.countSelected() * TIME()->getTimeToVarcharRowSize(),
241241
true /*exactSize*/);
242242
char* rawBuffer = buffer->asMutable<char>() + buffer->size();
243243

velox/expression/UdfTypeResolver.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,13 @@ struct resolver<Time> {
157157
using out_type = int64_t;
158158
};
159159

160+
template <>
161+
struct resolver<TimeMicro> {
162+
using in_type = int64_t;
163+
using null_free_in_type = in_type;
164+
using out_type = int64_t;
165+
};
166+
160167
template <typename T>
161168
struct resolver<std::shared_ptr<T>> {
162169
using in_type = std::shared_ptr<T>;

velox/expression/tests/SimpleFunctionTest.cpp

Lines changed: 50 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1657,12 +1657,16 @@ TEST_F(SimpleFunctionTest, toDebugString) {
16571657
"Priority: 999997\nDefaultNullBehavior: true");
16581658
}
16591659

1660-
template <typename T>
1660+
template <typename T, typename TimeType>
16611661
struct TimePlusOneFunction {
16621662
VELOX_DEFINE_FUNCTION_TYPES(T);
16631663

1664-
void call(out_type<Time>& out, const arg_type<Time>& input) {
1665-
out = input + 1;
1664+
void call(out_type<TimeType>& out, const arg_type<TimeType>& input) {
1665+
if constexpr (std::is_same_v<TimeType, Time>) {
1666+
out = input + 1;
1667+
} else if constexpr (std::is_same_v<TimeType, TimeMicro>) {
1668+
out = input / 1000 + 1;
1669+
}
16661670
}
16671671
};
16681672

@@ -1677,16 +1681,23 @@ struct ArrayTimeFunction {
16771681
}
16781682
}
16791683
}
1684+
1685+
void call(
1686+
out_type<Array<TimeMicro>>& out,
1687+
const arg_type<Array<TimeMicro>>& input) {}
16801688
};
16811689

16821690
TEST_F(SimpleFunctionTest, timeTypeTest) {
1683-
registerFunction<TimePlusOneFunction, Time, Time>({"time_plus_one"});
1684-
auto data = makeRowVector({
1685-
makeFlatVector<int64_t>({1, 2, 3}, TIME()),
1686-
});
1687-
auto result = evaluate("time_plus_one(c0)", data);
1688-
auto expected = makeFlatVector<int64_t>({2, 3, 4}, TIME());
1689-
assertEqualVectors(expected, result);
1691+
{
1692+
registerFunction<ParameterBinder<TimePlusOneFunction, Time>, Time, Time>(
1693+
{"time_plus_one"});
1694+
auto data = makeRowVector({
1695+
makeFlatVector<int64_t>({1000, 2000, 3000}, TIME()),
1696+
});
1697+
auto result = evaluate("time_plus_one(c0)", data);
1698+
auto expected = makeFlatVector<int64_t>({1001, 2001, 3001}, TIME());
1699+
assertEqualVectors(expected, result);
1700+
}
16901701

16911702
// Test out Time in complex type.
16921703
{
@@ -1703,4 +1714,33 @@ TEST_F(SimpleFunctionTest, timeTypeTest) {
17031714
}
17041715
}
17051716

1717+
TEST_F(SimpleFunctionTest, timeMicroTypeTest) {
1718+
{
1719+
registerFunction<
1720+
ParameterBinder<TimePlusOneFunction, TimeMicro>,
1721+
TimeMicro,
1722+
TimeMicro>({"time_micro_plus_one"});
1723+
auto data = makeRowVector({
1724+
makeFlatVector<int64_t>({1000, 2000, 3000}, TIME_MICRO()),
1725+
});
1726+
auto result = evaluate("time_micro_plus_one(c0)", data);
1727+
auto expected = makeFlatVector<int64_t>({2, 3, 4}, TIME_MICRO());
1728+
assertEqualVectors(expected, result);
1729+
}
1730+
1731+
// Test out TimeMicro in complex type.
1732+
{
1733+
registerFunction<ArrayTimeFunction, Array<TimeMicro>, Array<TimeMicro>>(
1734+
{"array_time_micro"});
1735+
1736+
auto data = makeRowVector({
1737+
makeArrayVector<int64_t>({{1, 2, 3}, {4, 5, 6}}, TIME_MICRO()),
1738+
});
1739+
1740+
auto result = evaluate("array_time_micro(c0)", data);
1741+
auto expected = makeArrayVector<int64_t>({{}, {}}, TIME_MICRO());
1742+
assertEqualVectors(expected, result);
1743+
}
1744+
}
1745+
17061746
} // namespace

velox/functions/prestosql/tests/TypeOfTest.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ TEST_F(TypeOfTest, basic) {
6363
EXPECT_EQ("timestamp", typeOf(TIMESTAMP()));
6464
EXPECT_EQ("date", typeOf(DATE()));
6565
EXPECT_EQ("time", typeOf(TIME()));
66+
EXPECT_EQ("time micro", typeOf(TIME_MICRO()));
6667

6768
EXPECT_EQ("unknown", typeOf(UNKNOWN()));
6869

velox/functions/prestosql/types/parser/tests/TypeParserTest.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ TEST_F(TypeParserTest, varbinary) {
112112

113113
TEST_F(TypeParserTest, time) {
114114
ASSERT_EQ(*parseType("time"), *TIME());
115+
ASSERT_EQ(*parseType("time micro"), *TIME_MICRO());
115116
}
116117

117118
TEST_F(TypeParserTest, timeWithTimeZoneType) {

velox/type/CppToType.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,18 @@ template <>
9292
struct CppToType<Timestamp> : public CppToTypeBase<TypeKind::TIMESTAMP> {};
9393

9494
template <>
95-
struct CppToType<Time> : public CppToTypeBase<TypeKind::BIGINT> {};
95+
struct CppToType<Time> : public CppToTypeBase<TypeKind::BIGINT> {
96+
static auto create() {
97+
return TIME();
98+
}
99+
};
100+
101+
template <>
102+
struct CppToType<TimeMicro> : public CppToTypeBase<TypeKind::BIGINT> {
103+
static auto create() {
104+
return TIME_MICRO();
105+
}
106+
};
96107

97108
// TODO: maybe do something smarter than just matching any shared_ptr, e.g. we
98109
// can declare "registered" types explicitly

velox/type/SimpleFunctionApi.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,11 @@ struct SimpleTypeTrait<Time> : public TypeTraits<TypeKind::BIGINT> {
374374
static constexpr const char* name = "TIME";
375375
};
376376

377+
template <>
378+
struct SimpleTypeTrait<TimeMicro> : public TypeTraits<TypeKind::BIGINT> {
379+
static constexpr const char* name = "TIME MICRO";
380+
};
381+
377382
template <typename T, bool comparable, bool orderable>
378383
struct SimpleTypeTrait<Generic<T, comparable, orderable>> {
379384
static constexpr TypeKind typeKind = TypeKind::INVALID;

velox/type/Type.cpp

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,10 @@ void Type::registerSerDe() {
265265
registry.Register(
266266
"IntervalYearMonthType", IntervalYearMonthType::deserialize);
267267
registry.Register("DateType", DateType::deserialize);
268-
registry.Register("TimeType", TimeType::deserialize);
268+
269+
// The deserilization handles all supported precisions. No extra registration
270+
// needed for 'TimeMicroPrecisionType'.
271+
registry.Register("TimeType", TimeMilliPrecisionType::deserialize);
269272
}
270273

271274
std::string ArrayType::toString() const {
@@ -1358,6 +1361,7 @@ const SingletonTypeMap& singletonBuiltInTypes() {
13581361
{"INTERVAL YEAR TO MONTH", INTERVAL_YEAR_MONTH()},
13591362
{"DATE", DATE()},
13601363
{"TIME", TIME()},
1364+
{"TIME MICRO", TIME_MICRO()},
13611365
{"UNKNOWN", UNKNOWN()},
13621366
};
13631367
return kTypes;
@@ -1508,14 +1512,20 @@ std::string getOpaqueAliasForTypeId(std::type_index typeIndex) {
15081512
return it->second;
15091513
}
15101514

1511-
folly::dynamic TimeType::serialize() const {
1512-
folly::dynamic obj = folly::dynamic::object;
1513-
obj["name"] = "TimeType";
1514-
obj["type"] = name();
1515-
return obj;
1515+
template <TimePrecision PRECISION>
1516+
TypePtr TimeType<PRECISION>::deserialize(const folly::dynamic& obj) {
1517+
if (obj.count("precision")) {
1518+
auto precision = obj["precision"].asInt();
1519+
if (precision == static_cast<int>(TimePrecision::kMicroseconds)) {
1520+
return TIME_MICRO();
1521+
}
1522+
}
1523+
return TIME();
15161524
}
15171525

1518-
StringView TimeType::valueToString(int64_t value, char* const startPos) const {
1526+
StringView TimeMilliPrecisionType::valueToString(
1527+
int64_t value,
1528+
char* const startPos) const {
15191529
// Ensure the value is within valid TIME range
15201530
VELOX_USER_CHECK(
15211531
!(value < 0 || value >= 86400000),
@@ -1531,25 +1541,24 @@ StringView TimeType::valueToString(int64_t value, char* const startPos) const {
15311541

15321542
// TIME is represented as milliseconds since midnight
15331543
// Convert to HH:mm:ss.SSS format
1534-
15351544
fmt::format_to_n(
15361545
startPos,
1537-
kTimeToVarcharRowSize,
1546+
getTimeToVarcharRowSize(),
15381547
"{:02d}:{:02d}:{:02d}.{:03d}",
15391548
hours,
15401549
minutes,
15411550
seconds,
15421551
millis);
1543-
return StringView{startPos, kTimeToVarcharRowSize};
1552+
return StringView{startPos, getTimeToVarcharRowSize()};
15441553
}
15451554

1546-
int64_t TimeType::valueToTime(const StringView& timeStr) const {
1555+
int64_t TimeMilliPrecisionType::valueToTime(const StringView& timeStr) const {
15471556
return util::fromTimeString(timeStr).thenOrThrow(
15481557
folly::identity,
15491558
[&](const Status& status) { VELOX_USER_FAIL("{}", status.message()); });
15501559
}
15511560

1552-
int64_t TimeType::valueToTime(
1561+
int64_t TimeMilliPrecisionType::valueToTime(
15531562
const StringView& timeStr,
15541563
const tz::TimeZone* timeZone,
15551564
int64_t sessionStartTimeMs) const {
@@ -1603,6 +1612,23 @@ int64_t TimeType::valueToTime(
16031612
return adjustedTime;
16041613
}
16051614

1615+
StringView TimeMicroPrecisionType::valueToString(
1616+
int64_t value,
1617+
char* const startPos) const {
1618+
VELOX_NYI("The valueToString not implemented yet");
1619+
}
1620+
1621+
int64_t TimeMicroPrecisionType::valueToTime(const StringView& timeStr) const {
1622+
VELOX_NYI("The valueToTime not implemented yet");
1623+
}
1624+
1625+
int64_t TimeMicroPrecisionType::valueToTime(
1626+
const StringView& timeStr,
1627+
const tz::TimeZone* timeZone,
1628+
int64_t sessionStartTimeMs) const {
1629+
VELOX_NYI("The valueToTime not implemented yet");
1630+
}
1631+
16061632
std::string stringifyTruncatedElementList(
16071633
size_t size,
16081634
const std::function<void(std::stringstream&, size_t)>& stringifyElement,

0 commit comments

Comments
 (0)