Skip to content

Commit 7362354

Browse files
authored
branch-4.0: [Opt](function) opt of certain time field functions used in conjunction with FROM_UNIXTIME. (#60843)
pick: #57941 and #60829
1 parent cc01c04 commit 7362354

File tree

15 files changed

+973
-1
lines changed

15 files changed

+973
-1
lines changed

be/src/vec/functions/date_time_transforms.h

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,16 @@
2020

2121
#pragma once
2222

23+
#include <libdivide.h>
24+
2325
#include <cmath>
2426
#include <cstdint>
2527

2628
#include "common/status.h"
29+
#include "runtime/define_primitive_type.h"
2730
#include "runtime/primitive_type.h"
2831
#include "util/binary_cast.hpp"
32+
#include "vec/columns/column_decimal.h"
2933
#include "vec/columns/column_nullable.h"
3034
#include "vec/columns/column_string.h"
3135
#include "vec/columns/column_vector.h"
@@ -591,6 +595,139 @@ class FunctionTimeFormat : public IFunction {
591595
}
592596
};
593597

598+
// Base template for optimized time field(HOUR, MINUTE, SECOND, MS) extraction from Unix timestamp
599+
// Uses lookup_offset to avoid expensive civil_second construction
600+
template <typename Impl>
601+
class FunctionTimeFieldFromUnixtime : public IFunction {
602+
public:
603+
static constexpr auto name = Impl::name;
604+
static FunctionPtr create() { return std::make_shared<FunctionTimeFieldFromUnixtime<Impl>>(); }
605+
606+
String get_name() const override { return name; }
607+
608+
size_t get_number_of_arguments() const override { return 1; }
609+
610+
DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override {
611+
// microsecond_from_unixtime returns Int32, others (hour/minute/second) return Int8
612+
if constexpr (Impl::ArgType == PrimitiveType::TYPE_DECIMAL64) {
613+
return make_nullable(std::make_shared<DataTypeInt32>());
614+
} else {
615+
return make_nullable(std::make_shared<DataTypeInt8>());
616+
}
617+
}
618+
619+
// (UTC 9999-12-31 23:59:59) - 24 * 3600
620+
static const int64_t TIMESTAMP_VALID_MAX = 253402243199L;
621+
622+
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
623+
uint32_t result, size_t input_rows_count) const override {
624+
using ArgColType = PrimitiveTypeTraits<Impl::ArgType>::ColumnType;
625+
using ResColType = std::conditional_t<Impl::ArgType == PrimitiveType::TYPE_DECIMAL64,
626+
ColumnInt32, ColumnInt8>;
627+
using ResItemType = typename ResColType::value_type;
628+
auto res = ResColType::create();
629+
630+
const auto* ts_col =
631+
assert_cast<const ArgColType*>(block.get_by_position(arguments[0]).column.get());
632+
if constexpr (Impl::ArgType == PrimitiveType::TYPE_DECIMAL64) {
633+
// microsecond_from_unixtime only
634+
const auto scale = static_cast<int32_t>(ts_col->get_scale());
635+
636+
for (int i = 0; i < input_rows_count; ++i) {
637+
const auto seconds = ts_col->get_intergral_part(i);
638+
const auto fraction = ts_col->get_fractional_part(i);
639+
640+
if (seconds < 0 || seconds > TIMESTAMP_VALID_MAX) {
641+
return Status::InvalidArgument(
642+
"The input value of TimeFiled(from_unixtime()) must between 0 and "
643+
"253402243199L");
644+
}
645+
646+
ResItemType value = Impl::extract_field(fraction, scale);
647+
res->insert_value(value);
648+
}
649+
} else {
650+
auto ctz = context->state()->timezone_obj();
651+
for (int i = 0; i < input_rows_count; ++i) {
652+
auto date = ts_col->get_element(i);
653+
654+
if (date < 0 || date > TIMESTAMP_VALID_MAX) {
655+
return Status::InvalidArgument(
656+
"The input value of TimeFiled(from_unixtime()) must between 0 and "
657+
"253402243199L");
658+
}
659+
660+
ResItemType value = Impl::extract_field(date, ctz);
661+
res->insert_value(value);
662+
}
663+
}
664+
block.replace_by_position(result, std::move(res));
665+
return Status::OK();
666+
}
667+
};
668+
669+
struct HourFromUnixtimeImpl {
670+
static constexpr PrimitiveType ArgType = PrimitiveType::TYPE_BIGINT;
671+
static constexpr auto name = "hour_from_unixtime";
672+
673+
static int8_t extract_field(int64_t local_time, const cctz::time_zone& ctz) {
674+
static const auto epoch = std::chrono::time_point_cast<cctz::sys_seconds>(
675+
std::chrono::system_clock::from_time_t(0));
676+
cctz::time_point<cctz::sys_seconds> t = epoch + cctz::seconds(local_time);
677+
int offset = ctz.lookup_offset(t).offset;
678+
local_time += offset;
679+
680+
static const libdivide::divider<int64_t> fast_div_3600(3600);
681+
static const libdivide::divider<int64_t> fast_div_86400(86400);
682+
683+
int64_t remainder;
684+
if (LIKELY(local_time >= 0)) {
685+
remainder = local_time - local_time / fast_div_86400 * 86400;
686+
} else {
687+
remainder = local_time % 86400;
688+
if (remainder < 0) {
689+
remainder += 86400;
690+
}
691+
}
692+
return static_cast<int8_t>(remainder / fast_div_3600);
693+
}
694+
};
695+
696+
struct MinuteFromUnixtimeImpl {
697+
static constexpr PrimitiveType ArgType = PrimitiveType::TYPE_BIGINT;
698+
static constexpr auto name = "minute_from_unixtime";
699+
700+
static int8_t extract_field(int64_t local_time, const cctz::time_zone& /*ctz*/) {
701+
static const libdivide::divider<int64_t> fast_div_60(60);
702+
static const libdivide::divider<int64_t> fast_div_3600(3600);
703+
704+
local_time = local_time - local_time / fast_div_3600 * 3600;
705+
706+
return static_cast<int8_t>(local_time / fast_div_60);
707+
}
708+
};
709+
710+
struct SecondFromUnixtimeImpl {
711+
static constexpr PrimitiveType ArgType = PrimitiveType::TYPE_BIGINT;
712+
static constexpr auto name = "second_from_unixtime";
713+
714+
static int8_t extract_field(int64_t local_time, const cctz::time_zone& /*ctz*/) {
715+
return static_cast<int8_t>(local_time % 60);
716+
}
717+
};
718+
719+
struct MicrosecondFromUnixtimeImpl {
720+
static constexpr PrimitiveType ArgType = PrimitiveType::TYPE_DECIMAL64;
721+
static constexpr auto name = "microsecond_from_unixtime";
722+
723+
static int32_t extract_field(int64_t fraction, int scale) {
724+
if (scale < 6) {
725+
fraction *= common::exp10_i64(6 - scale);
726+
}
727+
return static_cast<int32_t>(fraction);
728+
}
729+
};
730+
594731
#include "common/compile_check_end.h"
595732
} // namespace doris::vectorized
596733

be/src/vec/functions/function_time_value_to_field.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "common/status.h"
2222
#include "vec/data_types/data_type_number.h"
2323
#include "vec/data_types/data_type_time.h"
24+
#include "vec/functions/date_time_transforms.h"
2425
#include "vec/functions/function.h"
2526
#include "vec/functions/function_date_or_datetime_computation.h"
2627
#include "vec/functions/simple_function_factory.h"
@@ -89,11 +90,20 @@ struct MicroImpl {
8990
static inline auto execute(const TimeValue::TimeType& t) { return TimeValue::microsecond(t); }
9091
};
9192

93+
using FunctionHourFromUnixtime = FunctionTimeFieldFromUnixtime<HourFromUnixtimeImpl>;
94+
using FunctionMinuteFromUnixtime = FunctionTimeFieldFromUnixtime<MinuteFromUnixtimeImpl>;
95+
using FunctionSecondFromUnixtime = FunctionTimeFieldFromUnixtime<SecondFromUnixtimeImpl>;
96+
using FunctionMicrosecondFromUnixtime = FunctionTimeFieldFromUnixtime<MicrosecondFromUnixtimeImpl>;
97+
9298
void register_function_time_value_field(SimpleFunctionFactory& factory) {
9399
factory.register_function<FunctionTimeValueToField<DataTypeInt32, HourImpl>>();
94100
factory.register_function<FunctionTimeValueToField<DataTypeInt8, MintuImpl>>();
95101
factory.register_function<FunctionTimeValueToField<DataTypeInt8, SecondImpl>>();
96102
factory.register_function<FunctionTimeValueToField<DataTypeInt32, MicroImpl>>();
103+
factory.register_function<FunctionHourFromUnixtime>();
104+
factory.register_function<FunctionMinuteFromUnixtime>();
105+
factory.register_function<FunctionSecondFromUnixtime>();
106+
factory.register_function<FunctionMicrosecondFromUnixtime>();
97107
}
98108
#include "common/compile_check_end.h"
99109
} // namespace doris::vectorized

fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@
242242
import org.apache.doris.nereids.trees.expressions.functions.scalar.Hour;
243243
import org.apache.doris.nereids.trees.expressions.functions.scalar.HourCeil;
244244
import org.apache.doris.nereids.trees.expressions.functions.scalar.HourFloor;
245+
import org.apache.doris.nereids.trees.expressions.functions.scalar.HourFromUnixtime;
245246
import org.apache.doris.nereids.trees.expressions.functions.scalar.HourMicrosecond;
246247
import org.apache.doris.nereids.trees.expressions.functions.scalar.HourMinute;
247248
import org.apache.doris.nereids.trees.expressions.functions.scalar.HourSecond;
@@ -342,13 +343,15 @@
342343
import org.apache.doris.nereids.trees.expressions.functions.scalar.MicroSecondsDiff;
343344
import org.apache.doris.nereids.trees.expressions.functions.scalar.MicroSecondsSub;
344345
import org.apache.doris.nereids.trees.expressions.functions.scalar.Microsecond;
346+
import org.apache.doris.nereids.trees.expressions.functions.scalar.MicrosecondFromUnixtime;
345347
import org.apache.doris.nereids.trees.expressions.functions.scalar.MilliSecondTimestamp;
346348
import org.apache.doris.nereids.trees.expressions.functions.scalar.MilliSecondsAdd;
347349
import org.apache.doris.nereids.trees.expressions.functions.scalar.MilliSecondsDiff;
348350
import org.apache.doris.nereids.trees.expressions.functions.scalar.MilliSecondsSub;
349351
import org.apache.doris.nereids.trees.expressions.functions.scalar.Minute;
350352
import org.apache.doris.nereids.trees.expressions.functions.scalar.MinuteCeil;
351353
import org.apache.doris.nereids.trees.expressions.functions.scalar.MinuteFloor;
354+
import org.apache.doris.nereids.trees.expressions.functions.scalar.MinuteFromUnixtime;
352355
import org.apache.doris.nereids.trees.expressions.functions.scalar.MinuteMicrosecond;
353356
import org.apache.doris.nereids.trees.expressions.functions.scalar.MinuteSecond;
354357
import org.apache.doris.nereids.trees.expressions.functions.scalar.MinutesAdd;
@@ -431,6 +434,7 @@
431434
import org.apache.doris.nereids.trees.expressions.functions.scalar.Second;
432435
import org.apache.doris.nereids.trees.expressions.functions.scalar.SecondCeil;
433436
import org.apache.doris.nereids.trees.expressions.functions.scalar.SecondFloor;
437+
import org.apache.doris.nereids.trees.expressions.functions.scalar.SecondFromUnixtime;
434438
import org.apache.doris.nereids.trees.expressions.functions.scalar.SecondMicrosecond;
435439
import org.apache.doris.nereids.trees.expressions.functions.scalar.SecondTimestamp;
436440
import org.apache.doris.nereids.trees.expressions.functions.scalar.SecondsAdd;
@@ -798,6 +802,7 @@ public class BuiltinScalarFunctions implements FunctionHelper {
798802
scalar(Hour.class, "hour"),
799803
scalar(HourCeil.class, "hour_ceil"),
800804
scalar(HourFloor.class, "hour_floor"),
805+
scalar(HourFromUnixtime.class, "hour_from_unixtime"),
801806
scalar(HourMicrosecond.class, "hour_microsecond"),
802807
scalar(HourMinute.class, "hour_minute"),
803808
scalar(HourSecond.class, "hour_second"),
@@ -902,6 +907,7 @@ public class BuiltinScalarFunctions implements FunctionHelper {
902907
scalar(Md5.class, "md5"),
903908
scalar(Md5Sum.class, "md5sum"),
904909
scalar(Microsecond.class, "microsecond"),
910+
scalar(MicrosecondFromUnixtime.class, "microsecond_from_unixtime"),
905911
scalar(MicroSecondsAdd.class, "microseconds_add"),
906912
scalar(MicroSecondsDiff.class, "microseconds_diff"),
907913
scalar(MicroSecondsSub.class, "microseconds_sub"),
@@ -911,6 +917,7 @@ public class BuiltinScalarFunctions implements FunctionHelper {
911917
scalar(Minute.class, "minute"),
912918
scalar(MinuteCeil.class, "minute_ceil"),
913919
scalar(MinuteFloor.class, "minute_floor"),
920+
scalar(MinuteFromUnixtime.class, "minute_from_unixtime"),
914921
scalar(MinuteMicrosecond.class, "minute_microsecond"),
915922
scalar(MinuteSecond.class, "minute_second"),
916923
scalar(MinutesAdd.class, "minutes_add"),
@@ -994,6 +1001,7 @@ public class BuiltinScalarFunctions implements FunctionHelper {
9941001
scalar(Second.class, "second"),
9951002
scalar(SecondCeil.class, "second_ceil"),
9961003
scalar(SecondFloor.class, "second_floor"),
1004+
scalar(SecondFromUnixtime.class, "second_from_unixtime"),
9971005
scalar(SecondMicrosecond.class, "second_microsecond"),
9981006
scalar(SecondsAdd.class, "seconds_add"),
9991007
scalar(SecondsDiff.class, "seconds_diff"),

fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionOptimization.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import org.apache.doris.nereids.rules.expression.rules.SimplifyInPredicate;
3232
import org.apache.doris.nereids.rules.expression.rules.SimplifyRange;
3333
import org.apache.doris.nereids.rules.expression.rules.SimplifySelfComparison;
34+
import org.apache.doris.nereids.rules.expression.rules.SimplifyTimeFieldFromUnixtime;
3435
import org.apache.doris.nereids.rules.expression.rules.TopnToMax;
3536

3637
import com.google.common.collect.ImmutableList;
@@ -52,6 +53,7 @@ public class ExpressionOptimization extends ExpressionRewrite {
5253
// compound predicates
5354
SimplifyRange.INSTANCE,
5455
SimplifyConflictCompound.INSTANCE,
56+
SimplifyTimeFieldFromUnixtime.INSTANCE,
5557
DistinctPredicatesRule.INSTANCE,
5658
ExtractCommonFactorRule.INSTANCE,
5759

fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionRuleType.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ public enum ExpressionRuleType {
5252
SIMPLIFY_COMPARISON_PREDICATE,
5353
SIMPLIFY_CONDITIONAL_FUNCTION,
5454
SIMPLIFY_CONFLICT_COMPOUND,
55+
SIMPLIFY_DATETIME_FUNCTION,
5556
SIMPLIFY_IN_PREDICATE,
5657
SIMPLIFY_NOT_EXPR,
5758
SIMPLIFY_RANGE,

0 commit comments

Comments
 (0)