Skip to content

Commit 5b2cbcc

Browse files
authored
[Opt](function) opt of certain time field functions used in conjunction with FROM_UNIXTIME. (#57941)
opt of certain time field functions(`HOUR`, `MINUTE`, `SECOND`, `MICROSECOND`) used in conjunction with `FROM_UNIXTIME` Take `HOUR(FROM_UNIXTIME(ts))` as an example: The `hour(from_unixtime(xxx))` function is slow because `from_unixtime`needs to extract the full yyyy-MM-dd HH:mm:ss format from the timestamp, which is not necessary. By calculating only the required fields directly from the timestamp, the process can be significantly faster. Add a function `hour_from_unixtime(ts)` to extract the hour from a unix timestamp, which is timezone aware. Implementation: 1. Lookup the timezone offset with cctz library 2. Calculate the hour from local unixtime ```cpp int64_t local_unixtime = unixtime + timezone.lookup_offset(unixtime); int hour = (local_unixtime % (24 * 3600)) / 3600 ``` Performance: Before VS After: ```text -- HOUR Doris> SELECT COUNT(HOUR(FROM_UNIXTIME(ts))) FROM test_hour_from_unixtime; +--------------------------------+ | COUNT(HOUR(FROM_UNIXTIME(ts))) | +--------------------------------+ | 100000000 | +--------------------------------+ 1 row in set (9.51 sec) Doris> SELECT COUNT(HOUR(FROM_UNIXTIME(ts))) FROM test_hour_from_unixtime; +--------------------------------+ | COUNT(HOUR(FROM_UNIXTIME(ts))) | +--------------------------------+ | 100000000 | +--------------------------------+ 1 row in set (0.96 sec) -- MINUTE Doris> SELECT COUNT(MINUTE(FROM_UNIXTIME(ts))) FROM test_hour_from_unixtime; +----------------------------------+ | COUNT(MINUTE(FROM_UNIXTIME(ts))) | +----------------------------------+ | 100000000 | +----------------------------------+ 1 row in set (10.98 sec) Doris> SELECT COUNT(MINUTE(FROM_UNIXTIME(ts))) FROM test_hour_from_unixtime; +----------------------------------+ | COUNT(MINUTE(FROM_UNIXTIME(ts))) | +----------------------------------+ | 100000000 | +----------------------------------+ 1 row in set (1.00 sec) -- SECOND Doris> SELECT COUNT(SECOND(FROM_UNIXTIME(ts))) FROM test_hour_from_unixtime; +----------------------------------+ | COUNT(SECOND(FROM_UNIXTIME(ts))) | +----------------------------------+ | 100000000 | +----------------------------------+ 1 row in set (10.01 sec) Doris> SELECT COUNT(SECOND(FROM_UNIXTIME(ts))) FROM test_hour_from_unixtime; +----------------------------------+ | COUNT(SECOND(FROM_UNIXTIME(ts))) | +----------------------------------+ | 100000000 | +----------------------------------+ 1 row in set (0.90 sec) -- MICROSECOND Doris> SELECT COUNT(MICROSECOND(FROM_UNIXTIME(ts))) FROM test_hour_from_unixtime; +---------------------------------------+ | COUNT(MICROSECOND(FROM_UNIXTIME(ts))) | +---------------------------------------+ | 100000000 | +---------------------------------------+ 1 row in set (9.75 sec) Doris> SELECT COUNT(MICROSECOND(FROM_UNIXTIME(ts))) FROM test_hour_from_unixtime; +---------------------------------------+ | COUNT(MICROSECOND(FROM_UNIXTIME(ts))) | +---------------------------------------+ | 100000000 | +---------------------------------------+ 1 row in set (1.24 sec) ```
1 parent a1e482c commit 5b2cbcc

File tree

15 files changed

+964
-1
lines changed

15 files changed

+964
-1
lines changed

be/src/vec/functions/date_time_transforms.h

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,17 @@
2020

2121
#pragma once
2222

23+
#include <libdivide.h>
24+
2325
#include <cmath>
2426
#include <cstdint>
2527

2628
#include "common/status.h"
29+
#include "runtime/define_primitive_type.h"
2730
#include "runtime/primitive_type.h"
2831
#include "udf/udf.h"
2932
#include "util/binary_cast.hpp"
33+
#include "vec/columns/column_decimal.h"
3034
#include "vec/columns/column_nullable.h"
3135
#include "vec/columns/column_string.h"
3236
#include "vec/columns/column_vector.h"
@@ -427,6 +431,139 @@ struct FromUnixTimeDecimalImpl {
427431
}
428432
};
429433

434+
// Base template for optimized time field(HOUR, MINUTE, SECOND, MS) extraction from Unix timestamp
435+
// Uses lookup_offset to avoid expensive civil_second construction
436+
template <typename Impl>
437+
class FunctionTimeFieldFromUnixtime : public IFunction {
438+
public:
439+
static constexpr auto name = Impl::name;
440+
static FunctionPtr create() { return std::make_shared<FunctionTimeFieldFromUnixtime<Impl>>(); }
441+
442+
String get_name() const override { return name; }
443+
444+
size_t get_number_of_arguments() const override { return 1; }
445+
446+
DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override {
447+
// microsecond_from_unixtime returns Int32, others (hour/minute/second) return Int8
448+
if constexpr (Impl::ArgType == PrimitiveType::TYPE_DECIMAL64) {
449+
return make_nullable(std::make_shared<DataTypeInt32>());
450+
} else {
451+
return make_nullable(std::make_shared<DataTypeInt8>());
452+
}
453+
}
454+
455+
// (UTC 9999-12-31 23:59:59) - 24 * 3600
456+
static const int64_t TIMESTAMP_VALID_MAX = 253402243199L;
457+
458+
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
459+
uint32_t result, size_t input_rows_count) const override {
460+
using ArgColType = PrimitiveTypeTraits<Impl::ArgType>::ColumnType;
461+
using ResColType = std::conditional_t<Impl::ArgType == PrimitiveType::TYPE_DECIMAL64,
462+
ColumnInt32, ColumnInt8>;
463+
using ResItemType = typename ResColType::value_type;
464+
auto res = ResColType::create();
465+
466+
const auto* ts_col =
467+
assert_cast<const ArgColType*>(block.get_by_position(arguments[0]).column.get());
468+
if constexpr (Impl::ArgType == PrimitiveType::TYPE_DECIMAL64) {
469+
// microsecond_from_unixtime only
470+
const auto scale = static_cast<int32_t>(ts_col->get_scale());
471+
472+
for (int i = 0; i < input_rows_count; ++i) {
473+
const auto seconds = ts_col->get_intergral_part(i);
474+
const auto fraction = ts_col->get_fractional_part(i);
475+
476+
if (seconds < 0 || seconds > TIMESTAMP_VALID_MAX) {
477+
return Status::InvalidArgument(
478+
"The input value of TimeFiled(from_unixtime()) must between 0 and "
479+
"253402243199L");
480+
}
481+
482+
ResItemType value = Impl::extract_field(fraction, scale);
483+
res->insert_value(value);
484+
}
485+
} else {
486+
auto ctz = context->state()->timezone_obj();
487+
for (int i = 0; i < input_rows_count; ++i) {
488+
auto date = ts_col->get_element(i);
489+
490+
if (date < 0 || date > TIMESTAMP_VALID_MAX) {
491+
return Status::InvalidArgument(
492+
"The input value of TimeFiled(from_unixtime()) must between 0 and "
493+
"253402243199L");
494+
}
495+
496+
ResItemType value = Impl::extract_field(date, ctz);
497+
res->insert_value(value);
498+
}
499+
}
500+
block.replace_by_position(result, std::move(res));
501+
return Status::OK();
502+
}
503+
};
504+
505+
struct HourFromUnixtimeImpl {
506+
static constexpr PrimitiveType ArgType = PrimitiveType::TYPE_BIGINT;
507+
static constexpr auto name = "hour_from_unixtime";
508+
509+
static int8_t extract_field(int64_t local_time, const cctz::time_zone& ctz) {
510+
static const auto epoch = std::chrono::time_point_cast<cctz::sys_seconds>(
511+
std::chrono::system_clock::from_time_t(0));
512+
cctz::time_point<cctz::sys_seconds> t = epoch + cctz::seconds(local_time);
513+
int offset = ctz.lookup_offset(t).offset;
514+
local_time += offset;
515+
516+
static const libdivide::divider<int64_t> fast_div_3600(3600);
517+
static const libdivide::divider<int64_t> fast_div_86400(86400);
518+
519+
int64_t remainder;
520+
if (LIKELY(local_time >= 0)) {
521+
remainder = local_time - local_time / fast_div_86400 * 86400;
522+
} else {
523+
remainder = local_time % 86400;
524+
if (remainder < 0) {
525+
remainder += 86400;
526+
}
527+
}
528+
return static_cast<int8_t>(remainder / fast_div_3600);
529+
}
530+
};
531+
532+
struct MinuteFromUnixtimeImpl {
533+
static constexpr PrimitiveType ArgType = PrimitiveType::TYPE_BIGINT;
534+
static constexpr auto name = "minute_from_unixtime";
535+
536+
static int8_t extract_field(int64_t local_time, const cctz::time_zone& /*ctz*/) {
537+
static const libdivide::divider<int64_t> fast_div_60(60);
538+
static const libdivide::divider<int64_t> fast_div_3600(3600);
539+
540+
local_time = local_time - local_time / fast_div_3600 * 3600;
541+
542+
return static_cast<int8_t>(local_time / fast_div_60);
543+
}
544+
};
545+
546+
struct SecondFromUnixtimeImpl {
547+
static constexpr PrimitiveType ArgType = PrimitiveType::TYPE_BIGINT;
548+
static constexpr auto name = "second_from_unixtime";
549+
550+
static int8_t extract_field(int64_t local_time, const cctz::time_zone& /*ctz*/) {
551+
return static_cast<int8_t>(local_time % 60);
552+
}
553+
};
554+
555+
struct MicrosecondFromUnixtimeImpl {
556+
static constexpr PrimitiveType ArgType = PrimitiveType::TYPE_DECIMAL64;
557+
static constexpr auto name = "microsecond_from_unixtime";
558+
559+
static int32_t extract_field(int64_t fraction, int scale) {
560+
if (scale < 6) {
561+
fraction *= common::exp10_i64(6 - scale);
562+
}
563+
return static_cast<int32_t>(fraction);
564+
}
565+
};
566+
430567
#include "common/compile_check_end.h"
431568
} // namespace doris::vectorized
432569

be/src/vec/functions/function_time_value_to_field.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "common/status.h"
2222
#include "vec/data_types/data_type_number.h"
2323
#include "vec/data_types/data_type_time.h"
24+
#include "vec/functions/date_time_transforms.h"
2425
#include "vec/functions/function.h"
2526
#include "vec/functions/function_date_or_datetime_computation.h"
2627
#include "vec/functions/simple_function_factory.h"
@@ -89,11 +90,20 @@ struct MicroImpl {
8990
static inline auto execute(const TimeValue::TimeType& t) { return TimeValue::microsecond(t); }
9091
};
9192

93+
using FunctionHourFromUnixtime = FunctionTimeFieldFromUnixtime<HourFromUnixtimeImpl>;
94+
using FunctionMinuteFromUnixtime = FunctionTimeFieldFromUnixtime<MinuteFromUnixtimeImpl>;
95+
using FunctionSecondFromUnixtime = FunctionTimeFieldFromUnixtime<SecondFromUnixtimeImpl>;
96+
using FunctionMicrosecondFromUnixtime = FunctionTimeFieldFromUnixtime<MicrosecondFromUnixtimeImpl>;
97+
9298
void register_function_time_value_field(SimpleFunctionFactory& factory) {
9399
factory.register_function<FunctionTimeValueToField<DataTypeInt32, HourImpl>>();
94100
factory.register_function<FunctionTimeValueToField<DataTypeInt8, MintuImpl>>();
95101
factory.register_function<FunctionTimeValueToField<DataTypeInt8, SecondImpl>>();
96102
factory.register_function<FunctionTimeValueToField<DataTypeInt32, MicroImpl>>();
103+
factory.register_function<FunctionHourFromUnixtime>();
104+
factory.register_function<FunctionMinuteFromUnixtime>();
105+
factory.register_function<FunctionSecondFromUnixtime>();
106+
factory.register_function<FunctionMicrosecondFromUnixtime>();
97107
}
98108
#include "common/compile_check_end.h"
99109
} // namespace doris::vectorized

fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@
238238
import org.apache.doris.nereids.trees.expressions.functions.scalar.Hour;
239239
import org.apache.doris.nereids.trees.expressions.functions.scalar.HourCeil;
240240
import org.apache.doris.nereids.trees.expressions.functions.scalar.HourFloor;
241+
import org.apache.doris.nereids.trees.expressions.functions.scalar.HourFromUnixtime;
241242
import org.apache.doris.nereids.trees.expressions.functions.scalar.HoursAdd;
242243
import org.apache.doris.nereids.trees.expressions.functions.scalar.HoursDiff;
243244
import org.apache.doris.nereids.trees.expressions.functions.scalar.HoursSub;
@@ -335,13 +336,15 @@
335336
import org.apache.doris.nereids.trees.expressions.functions.scalar.MicroSecondsDiff;
336337
import org.apache.doris.nereids.trees.expressions.functions.scalar.MicroSecondsSub;
337338
import org.apache.doris.nereids.trees.expressions.functions.scalar.Microsecond;
339+
import org.apache.doris.nereids.trees.expressions.functions.scalar.MicrosecondFromUnixtime;
338340
import org.apache.doris.nereids.trees.expressions.functions.scalar.MilliSecondTimestamp;
339341
import org.apache.doris.nereids.trees.expressions.functions.scalar.MilliSecondsAdd;
340342
import org.apache.doris.nereids.trees.expressions.functions.scalar.MilliSecondsDiff;
341343
import org.apache.doris.nereids.trees.expressions.functions.scalar.MilliSecondsSub;
342344
import org.apache.doris.nereids.trees.expressions.functions.scalar.Minute;
343345
import org.apache.doris.nereids.trees.expressions.functions.scalar.MinuteCeil;
344346
import org.apache.doris.nereids.trees.expressions.functions.scalar.MinuteFloor;
347+
import org.apache.doris.nereids.trees.expressions.functions.scalar.MinuteFromUnixtime;
345348
import org.apache.doris.nereids.trees.expressions.functions.scalar.MinutesAdd;
346349
import org.apache.doris.nereids.trees.expressions.functions.scalar.MinutesDiff;
347350
import org.apache.doris.nereids.trees.expressions.functions.scalar.MinutesSub;
@@ -419,6 +422,7 @@
419422
import org.apache.doris.nereids.trees.expressions.functions.scalar.Second;
420423
import org.apache.doris.nereids.trees.expressions.functions.scalar.SecondCeil;
421424
import org.apache.doris.nereids.trees.expressions.functions.scalar.SecondFloor;
425+
import org.apache.doris.nereids.trees.expressions.functions.scalar.SecondFromUnixtime;
422426
import org.apache.doris.nereids.trees.expressions.functions.scalar.SecondTimestamp;
423427
import org.apache.doris.nereids.trees.expressions.functions.scalar.SecondsAdd;
424428
import org.apache.doris.nereids.trees.expressions.functions.scalar.SecondsDiff;
@@ -776,6 +780,7 @@ public class BuiltinScalarFunctions implements FunctionHelper {
776780
scalar(Hour.class, "hour"),
777781
scalar(HourCeil.class, "hour_ceil"),
778782
scalar(HourFloor.class, "hour_floor"),
783+
scalar(HourFromUnixtime.class, "hour_from_unixtime"),
779784
scalar(HoursAdd.class, "hours_add"),
780785
scalar(HoursDiff.class, "hours_diff"),
781786
scalar(HoursSub.class, "hours_sub"),
@@ -877,6 +882,7 @@ public class BuiltinScalarFunctions implements FunctionHelper {
877882
scalar(Md5.class, "md5"),
878883
scalar(Md5Sum.class, "md5sum"),
879884
scalar(Microsecond.class, "microsecond"),
885+
scalar(MicrosecondFromUnixtime.class, "microsecond_from_unixtime"),
880886
scalar(MicroSecondsAdd.class, "microseconds_add"),
881887
scalar(MicroSecondsDiff.class, "microseconds_diff"),
882888
scalar(MicroSecondsSub.class, "microseconds_sub"),
@@ -886,6 +892,7 @@ public class BuiltinScalarFunctions implements FunctionHelper {
886892
scalar(Minute.class, "minute"),
887893
scalar(MinuteCeil.class, "minute_ceil"),
888894
scalar(MinuteFloor.class, "minute_floor"),
895+
scalar(MinuteFromUnixtime.class, "minute_from_unixtime"),
889896
scalar(MinutesAdd.class, "minutes_add"),
890897
scalar(MinutesDiff.class, "minutes_diff"),
891898
scalar(MinutesSub.class, "minutes_sub"),
@@ -964,6 +971,7 @@ public class BuiltinScalarFunctions implements FunctionHelper {
964971
scalar(Second.class, "second"),
965972
scalar(SecondCeil.class, "second_ceil"),
966973
scalar(SecondFloor.class, "second_floor"),
974+
scalar(SecondFromUnixtime.class, "second_from_unixtime"),
967975
scalar(SecondsAdd.class, "seconds_add"),
968976
scalar(SecondsDiff.class, "seconds_diff"),
969977
scalar(SecondsSub.class, "seconds_sub"),

fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionOptimization.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import org.apache.doris.nereids.rules.expression.rules.SimplifyInPredicate;
3636
import org.apache.doris.nereids.rules.expression.rules.SimplifyRange;
3737
import org.apache.doris.nereids.rules.expression.rules.SimplifySelfComparison;
38+
import org.apache.doris.nereids.rules.expression.rules.SimplifyTimeFieldFromUnixtime;
3839
import org.apache.doris.nereids.rules.expression.rules.TopnToMax;
3940

4041
import com.google.common.collect.ImmutableList;
@@ -56,6 +57,7 @@ public class ExpressionOptimization extends ExpressionRewrite {
5657
// compound predicates
5758
SimplifyRange.INSTANCE,
5859
SimplifyConflictCompound.INSTANCE,
60+
SimplifyTimeFieldFromUnixtime.INSTANCE,
5961
DistinctPredicatesRule.INSTANCE,
6062
ExtractCommonFactorRule.INSTANCE,
6163

fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionRuleType.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ public enum ExpressionRuleType {
5757
SIMPLIFY_COMPARISON_PREDICATE,
5858
SIMPLIFY_CONDITIONAL_FUNCTION,
5959
SIMPLIFY_CONFLICT_COMPOUND,
60+
SIMPLIFY_DATETIME_FUNCTION,
6061
SIMPLIFY_EQUAL_BOOLEAN_LITERAL,
6162
SIMPLIFY_IN_PREDICATE,
6263
SIMPLIFY_NOT_EXPR,

0 commit comments

Comments
 (0)