|
30 | 30 | #include "runtime/primitive_type.h" |
31 | 31 | #include "udf/udf.h" |
32 | 32 | #include "util/binary_cast.hpp" |
33 | | -#include "vec/columns/column_decimal.h" |
34 | 33 | #include "vec/columns/column.h" |
| 34 | +#include "vec/columns/column_decimal.h" |
35 | 35 | #include "vec/columns/column_nullable.h" |
36 | 36 | #include "vec/columns/column_string.h" |
37 | 37 | #include "vec/columns/column_vector.h" |
@@ -433,6 +433,208 @@ struct FromUnixTimeDecimalImpl { |
433 | 433 | } |
434 | 434 | }; |
435 | 435 |
|
| 436 | +// Base template for optimized time field(HOUR, MINUTE, SECOND, MS) extraction from Unix timestamp |
| 437 | +// Uses lookup_offset to avoid expensive civil_second construction |
| 438 | +template <typename Impl> |
| 439 | +class FunctionTimeFieldFromUnixtime : public IFunction { |
| 440 | +public: |
| 441 | + static constexpr auto name = Impl::name; |
| 442 | + static FunctionPtr create() { return std::make_shared<FunctionTimeFieldFromUnixtime<Impl>>(); } |
| 443 | + |
| 444 | + String get_name() const override { return name; } |
| 445 | + |
| 446 | + size_t get_number_of_arguments() const override { return 1; } |
| 447 | + |
| 448 | + DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override { |
| 449 | + // microsecond_from_unixtime returns Int32, others (hour/minute/second) return Int8 |
| 450 | + if constexpr (Impl::ArgType == PrimitiveType::TYPE_DECIMAL64) { |
| 451 | + return make_nullable(std::make_shared<DataTypeInt32>()); |
| 452 | + } else { |
| 453 | + return make_nullable(std::make_shared<DataTypeInt8>()); |
| 454 | + } |
| 455 | + } |
| 456 | + |
| 457 | + // (UTC 9999-12-31 23:59:59) - 24 * 3600 |
| 458 | + static const int64_t TIMESTAMP_VALID_MAX = 253402243199L; |
| 459 | + |
| 460 | + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
| 461 | + uint32_t result, size_t input_rows_count) const override { |
| 462 | + using ArgColType = PrimitiveTypeTraits<Impl::ArgType>::ColumnType; |
| 463 | + using ResColType = std::conditional_t<Impl::ArgType == PrimitiveType::TYPE_DECIMAL64, |
| 464 | + ColumnInt32, ColumnInt8>; |
| 465 | + using ResItemType = typename ResColType::value_type; |
| 466 | + auto res = ResColType::create(); |
| 467 | + |
| 468 | + const auto* ts_col = |
| 469 | + assert_cast<const ArgColType*>(block.get_by_position(arguments[0]).column.get()); |
| 470 | + if constexpr (Impl::ArgType == PrimitiveType::TYPE_DECIMAL64) { |
| 471 | + // microsecond_from_unixtime only |
| 472 | + const auto scale = static_cast<int32_t>(ts_col->get_scale()); |
| 473 | + |
| 474 | + for (int i = 0; i < input_rows_count; ++i) { |
| 475 | + const auto seconds = ts_col->get_intergral_part(i); |
| 476 | + const auto fraction = ts_col->get_fractional_part(i); |
| 477 | + |
| 478 | + if (seconds < 0 || seconds > TIMESTAMP_VALID_MAX) { |
| 479 | + return Status::InvalidArgument( |
| 480 | + "The input value of TimeFiled(from_unixtime()) must between 0 and " |
| 481 | + "253402243199L"); |
| 482 | + } |
| 483 | + |
| 484 | + ResItemType value = Impl::extract_field(fraction, scale); |
| 485 | + res->insert_value(value); |
| 486 | + } |
| 487 | + } else { |
| 488 | + auto ctz = context->state()->timezone_obj(); |
| 489 | + for (int i = 0; i < input_rows_count; ++i) { |
| 490 | + auto date = ts_col->get_element(i); |
| 491 | + |
| 492 | + if (date < 0 || date > TIMESTAMP_VALID_MAX) { |
| 493 | + return Status::InvalidArgument( |
| 494 | + "The input value of TimeFiled(from_unixtime()) must between 0 and " |
| 495 | + "253402243199L"); |
| 496 | + } |
| 497 | + |
| 498 | + ResItemType value = Impl::extract_field(date, ctz); |
| 499 | + res->insert_value(value); |
| 500 | + } |
| 501 | + } |
| 502 | + block.replace_by_position(result, std::move(res)); |
| 503 | + return Status::OK(); |
| 504 | + } |
| 505 | +}; |
| 506 | + |
| 507 | +struct HourFromUnixtimeImpl { |
| 508 | + static constexpr PrimitiveType ArgType = PrimitiveType::TYPE_BIGINT; |
| 509 | + static constexpr auto name = "hour_from_unixtime"; |
| 510 | + |
| 511 | + static int8_t extract_field(int64_t local_time, const cctz::time_zone& ctz) { |
| 512 | + static const auto epoch = std::chrono::time_point_cast<cctz::sys_seconds>( |
| 513 | + std::chrono::system_clock::from_time_t(0)); |
| 514 | + cctz::time_point<cctz::sys_seconds> t = epoch + cctz::seconds(local_time); |
| 515 | + int offset = ctz.lookup_offset(t).offset; |
| 516 | + local_time += offset; |
| 517 | + |
| 518 | + static const libdivide::divider<int64_t> fast_div_3600(3600); |
| 519 | + static const libdivide::divider<int64_t> fast_div_86400(86400); |
| 520 | + |
| 521 | + int64_t remainder; |
| 522 | + if (LIKELY(local_time >= 0)) { |
| 523 | + remainder = local_time - local_time / fast_div_86400 * 86400; |
| 524 | + } else { |
| 525 | + remainder = local_time % 86400; |
| 526 | + if (remainder < 0) { |
| 527 | + remainder += 86400; |
| 528 | + } |
| 529 | + } |
| 530 | + return static_cast<int8_t>(remainder / fast_div_3600); |
| 531 | + } |
| 532 | +}; |
| 533 | + |
| 534 | +struct MinuteFromUnixtimeImpl { |
| 535 | + static constexpr PrimitiveType ArgType = PrimitiveType::TYPE_BIGINT; |
| 536 | + static constexpr auto name = "minute_from_unixtime"; |
| 537 | + |
| 538 | + static int8_t extract_field(int64_t local_time, const cctz::time_zone& /*ctz*/) { |
| 539 | + static const libdivide::divider<int64_t> fast_div_60(60); |
| 540 | + static const libdivide::divider<int64_t> fast_div_3600(3600); |
| 541 | + |
| 542 | + local_time = local_time - local_time / fast_div_3600 * 3600; |
| 543 | + |
| 544 | + return static_cast<int8_t>(local_time / fast_div_60); |
| 545 | + } |
| 546 | +}; |
| 547 | + |
| 548 | +struct SecondFromUnixtimeImpl { |
| 549 | + static constexpr PrimitiveType ArgType = PrimitiveType::TYPE_BIGINT; |
| 550 | + static constexpr auto name = "second_from_unixtime"; |
| 551 | + |
| 552 | + static int8_t extract_field(int64_t local_time, const cctz::time_zone& /*ctz*/) { |
| 553 | + return static_cast<int8_t>(local_time % 60); |
| 554 | + } |
| 555 | +}; |
| 556 | + |
| 557 | +struct MicrosecondFromUnixtimeImpl { |
| 558 | + static constexpr PrimitiveType ArgType = PrimitiveType::TYPE_DECIMAL64; |
| 559 | + static constexpr auto name = "microsecond_from_unixtime"; |
| 560 | + |
| 561 | + static int32_t extract_field(int64_t fraction, int scale) { |
| 562 | + if (scale < 6) { |
| 563 | + fraction *= common::exp10_i64(6 - scale); |
| 564 | + } |
| 565 | + return static_cast<int32_t>(fraction); |
| 566 | + } |
| 567 | + |
| 568 | +template <PrimitiveType ArgPType> |
| 569 | +class FunctionTimeFormat : public IFunction { |
| 570 | +public: |
| 571 | + using ArgColType = typename PrimitiveTypeTraits<ArgPType>::ColumnType; |
| 572 | + using ArgCppType = typename PrimitiveTypeTraits<ArgPType>::CppType; |
| 573 | + |
| 574 | + static constexpr auto name = "time_format"; |
| 575 | + String get_name() const override { return name; } |
| 576 | + static FunctionPtr create() { return std::make_shared<FunctionTimeFormat>(); } |
| 577 | + DataTypes get_variadic_argument_types_impl() const override { |
| 578 | + return {std::make_shared<typename PrimitiveTypeTraits<ArgPType>::DataType>(), |
| 579 | + std::make_shared<vectorized::DataTypeString>()}; |
| 580 | + } |
| 581 | + DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override { |
| 582 | + return make_nullable(std::make_shared<DataTypeString>()); |
| 583 | + } |
| 584 | + size_t get_number_of_arguments() const override { return 2; } |
| 585 | + |
| 586 | + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
| 587 | + uint32_t result, size_t input_rows_count) const override { |
| 588 | + auto res_col = ColumnString::create(); |
| 589 | + ColumnString::Chars& res_chars = res_col->get_chars(); |
| 590 | + ColumnString::Offsets& res_offsets = res_col->get_offsets(); |
| 591 | + |
| 592 | + auto null_map = ColumnUInt8::create(); |
| 593 | + auto& null_map_data = null_map->get_data(); |
| 594 | + null_map_data.resize_fill(input_rows_count, 0); |
| 595 | + |
| 596 | + res_offsets.reserve(input_rows_count); |
| 597 | + |
| 598 | + ColumnPtr arg_col[2]; |
| 599 | + bool is_const[2]; |
| 600 | + for (size_t i = 0; i < 2; ++i) { |
| 601 | + const ColumnPtr& col = block.get_by_position(arguments[i]).column; |
| 602 | + std::tie(arg_col[i], is_const[i]) = unpack_if_const(col); |
| 603 | + } |
| 604 | + |
| 605 | + const auto* datetime_col = assert_cast<const ArgColType*>(arg_col[0].get()); |
| 606 | + const auto* format_col = assert_cast<const ColumnString*>(arg_col[1].get()); |
| 607 | + for (size_t i = 0; i < input_rows_count; ++i) { |
| 608 | + const auto& datetime_val = datetime_col->get_element(index_check_const(i, is_const[0])); |
| 609 | + StringRef format = format_col->get_data_at(index_check_const(i, is_const[1])); |
| 610 | + TimeValue::TimeType time = get_time_value(datetime_val); |
| 611 | + |
| 612 | + char buf[100 + SAFE_FORMAT_STRING_MARGIN]; |
| 613 | + if (!TimeValue::to_format_string_conservative(format.data, format.size, buf, |
| 614 | + 100 + SAFE_FORMAT_STRING_MARGIN, time)) { |
| 615 | + null_map_data[i] = 1; |
| 616 | + res_offsets.push_back(res_chars.size()); |
| 617 | + continue; |
| 618 | + } |
| 619 | + res_chars.insert(buf, buf + strlen(buf)); |
| 620 | + res_offsets.push_back(res_chars.size()); |
| 621 | + } |
| 622 | + block.replace_by_position(result, |
| 623 | + ColumnNullable::create(std::move(res_col), std::move(null_map))); |
| 624 | + return Status::OK(); |
| 625 | + } |
| 626 | + |
| 627 | +private: |
| 628 | + TimeValue::TimeType get_time_value(const ArgCppType& datetime_val) const { |
| 629 | + if constexpr (ArgPType == PrimitiveType::TYPE_TIMEV2) { |
| 630 | + return static_cast<TimeValue::TimeType>(datetime_val); |
| 631 | + } else { |
| 632 | + return TimeValue::make_time(datetime_val.hour(), datetime_val.minute(), |
| 633 | + datetime_val.second(), datetime_val.microsecond()); |
| 634 | + } |
| 635 | + } |
| 636 | +}; |
| 637 | + |
436 | 638 | #include "common/compile_check_end.h" |
437 | 639 | } // namespace doris::vectorized |
438 | 640 |
|
|
0 commit comments