Skip to content

Commit 93c13cb

Browse files
jacktenggMryange
andauthored
[feature](type)support timestamptz type (#56646) (#59399)
apache/doris-website#2962 TIMESTAMPTZ implementation does not store time zone information with each row of data, but instead adopts the following mechanism: 1. During storage: All input time values are converted to UTC (Coordinated Universal Time) 2. During query: Based on the session's time zone setting (specified via the `time_zone` variable), UTC time is automatically converted to the corresponding time zone for display Therefore, TIMESTAMPTZ can be understood as a DATETIME type with time zone conversion functionality, where Doris automatically handles time zone conversions internally. None - Test <!-- At least one of them must be included. --> - [x] Regression test - [x] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [ ] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [ ] Previous test can cover this change. - [ ] No code files have been changed. - [ ] Other reason <!-- Add your reason? --> - Behavior changed: - [x] No. - [ ] Yes. <!-- Explain the behavior change --> - Does this need documentation? - [ ] No. - [x] Yes. <!-- Add document PR link here. eg: apache/doris-website#1214 --> - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label <!-- Add branch pick label that this PR should merge into --> --------- ### What problem does this PR solve? Issue Number: close #xxx Related PR: #xxx Problem Summary: ### Release note None ### Check List (For Author) - Test <!-- At least one of them must be included. --> - [ ] Regression test - [ ] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [ ] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [ ] Previous test can cover this change. - [ ] No code files have been changed. - [ ] Other reason <!-- Add your reason? --> - Behavior changed: - [ ] No. - [ ] Yes. <!-- Explain the behavior change --> - Does this need documentation? - [ ] No. - [ ] Yes. <!-- Add document PR link here. eg: apache/doris-website#1214 --> ### Check List (For Reviewer who merge this PR) - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label <!-- Add branch pick label that this PR should merge into --> --------- Co-authored-by: Mryange <[email protected]>
1 parent d991ff2 commit 93c13cb

File tree

278 files changed

+26218
-1222
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

278 files changed

+26218
-1222
lines changed

be/src/exec/olap_common.h

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -70,13 +70,15 @@ std::string cast_to_string(T value, int scale) {
7070
} else if constexpr (primitive_type == TYPE_LARGEINT) {
7171
return vectorized::int128_to_string(value);
7272
} else if constexpr (primitive_type == TYPE_DATETIMEV2) {
73-
DateV2Value<DateTimeV2ValueType> datetimev2_val =
74-
static_cast<DateV2Value<DateTimeV2ValueType>>(value);
73+
auto datetimev2_val = static_cast<DateV2Value<DateTimeV2ValueType>>(value);
7574
char buf[30];
7675
datetimev2_val.to_string(buf);
7776
std::stringstream ss;
7877
ss << buf;
7978
return ss.str();
79+
} else if constexpr (primitive_type == TYPE_TIMESTAMPTZ) {
80+
auto timestamptz_val = static_cast<TimestampTzValue>(value);
81+
return timestamptz_val.to_string(cctz::utc_time_zone(), scale);
8082
} else if constexpr (primitive_type == TYPE_TIMEV2) {
8183
return TimeValue::to_string(value, scale);
8284
} else if constexpr (primitive_type == TYPE_IPV4) {
@@ -103,16 +105,10 @@ class ColumnValueRange {
103105

104106
ColumnValueRange(std::string col_name);
105107

106-
ColumnValueRange(std::string col_name, const CppType& min, const CppType& max,
107-
bool contain_null);
108-
109108
ColumnValueRange(std::string col_name, int precision, int scale);
110109

111110
ColumnValueRange(std::string col_name, bool is_nullable_col, int precision, int scale);
112111

113-
ColumnValueRange(std::string col_name, const CppType& min, const CppType& max,
114-
bool is_nullable_col, bool contain_null, int precision, int scale);
115-
116112
// should add fixed value before add range
117113
Status add_fixed_value(const CppType& value);
118114

@@ -368,6 +364,12 @@ class ColumnValueRange {
368364
bool is_in_range(const CppType& value);
369365

370366
private:
367+
ColumnValueRange(std::string col_name, const CppType& min, const CppType& max,
368+
bool contain_null);
369+
370+
ColumnValueRange(std::string col_name, const CppType& min, const CppType& max,
371+
bool is_nullable_col, bool contain_null, int precision, int scale);
372+
371373
const static CppType TYPE_MIN; // Column type's min value
372374
const static CppType TYPE_MAX; // Column type's max value
373375

@@ -396,6 +398,7 @@ class ColumnValueRange {
396398
primitive_type == PrimitiveType::TYPE_BOOLEAN ||
397399
primitive_type == PrimitiveType::TYPE_DATETIME ||
398400
primitive_type == PrimitiveType::TYPE_DATETIMEV2 ||
401+
primitive_type == PrimitiveType::TYPE_TIMESTAMPTZ ||
399402
primitive_type == PrimitiveType::TYPE_DECIMAL256;
400403

401404
int _runtime_filter_id = -1;
@@ -488,10 +491,10 @@ using ColumnValueRangeType = std::variant<
488491
ColumnValueRange<TYPE_IPV6>, ColumnValueRange<TYPE_CHAR>, ColumnValueRange<TYPE_VARCHAR>,
489492
ColumnValueRange<TYPE_STRING>, ColumnValueRange<TYPE_DATE>, ColumnValueRange<TYPE_DATEV2>,
490493
ColumnValueRange<TYPE_DATETIME>, ColumnValueRange<TYPE_DATETIMEV2>,
491-
ColumnValueRange<TYPE_DECIMALV2>, ColumnValueRange<TYPE_BOOLEAN>,
492-
ColumnValueRange<TYPE_HLL>, ColumnValueRange<TYPE_DECIMAL32>,
493-
ColumnValueRange<TYPE_DECIMAL64>, ColumnValueRange<TYPE_DECIMAL128I>,
494-
ColumnValueRange<TYPE_DECIMAL256>>;
494+
ColumnValueRange<TYPE_TIMESTAMPTZ>, ColumnValueRange<TYPE_DECIMALV2>,
495+
ColumnValueRange<TYPE_BOOLEAN>, ColumnValueRange<TYPE_HLL>,
496+
ColumnValueRange<TYPE_DECIMAL32>, ColumnValueRange<TYPE_DECIMAL64>,
497+
ColumnValueRange<TYPE_DECIMAL128I>, ColumnValueRange<TYPE_DECIMAL256>>;
495498

496499
template <PrimitiveType primitive_type>
497500
const typename ColumnValueRange<primitive_type>::CppType

be/src/exec/schema_scanner.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
#include "pipeline/dependency.h"
7171
#include "runtime/define_primitive_type.h"
7272
#include "runtime/fragment_mgr.h"
73+
#include "runtime/primitive_type.h"
7374
#include "runtime/types.h"
7475
#include "util/string_util.h"
7576
#include "util/types.h"
@@ -383,6 +384,12 @@ Status SchemaScanner::fill_dest_column_for_range(vectorized::Block* block, size_
383384
break;
384385
}
385386

387+
case TYPE_TIMESTAMPTZ: {
388+
uint64_t num = *reinterpret_cast<uint64_t*>(data);
389+
assert_cast<vectorized::ColumnTimeStampTz*>(col_ptr)->insert_value(num);
390+
break;
391+
}
392+
386393
case TYPE_DECIMALV2: {
387394
const vectorized::Int128 num = (reinterpret_cast<PackedInt128*>(data))->value;
388395
assert_cast<vectorized::ColumnDecimal128V2*>(col_ptr)->insert_data(

be/src/exec/schema_scanner/schema_columns_scanner.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,8 @@ std::string SchemaColumnsScanner::_to_mysql_data_type_string(TColumnDesc& desc)
137137
case TPrimitiveType::DATETIME:
138138
case TPrimitiveType::DATETIMEV2:
139139
return "datetime";
140+
case TPrimitiveType::TIMESTAMPTZ:
141+
return "timestamp";
140142
case TPrimitiveType::DECIMAL32:
141143
case TPrimitiveType::DECIMAL64:
142144
case TPrimitiveType::DECIMAL128I:
@@ -238,6 +240,16 @@ std::string SchemaColumnsScanner::_type_to_string(TColumnDesc& desc) {
238240
}
239241
return fmt::to_string(debug_string_buffer);
240242
}
243+
case TPrimitiveType::TIMESTAMPTZ: {
244+
fmt::memory_buffer debug_string_buffer;
245+
if (!desc.__isset.columnScale || desc.columnScale == 0) {
246+
fmt::format_to(debug_string_buffer, "timestamp");
247+
} else {
248+
fmt::format_to(debug_string_buffer, "timestamp({})",
249+
desc.__isset.columnScale ? std::to_string(desc.columnScale) : "UNKNOWN");
250+
}
251+
return fmt::to_string(debug_string_buffer);
252+
}
241253
case TPrimitiveType::HLL: {
242254
return "hll";
243255
}
@@ -569,7 +581,8 @@ Status SchemaColumnsScanner::_fill_block_impl(vectorized::Block* block) {
569581
for (int i = 0; i < columns_num; ++i) {
570582
int data_type = _desc_result.columns[i].columnDesc.columnType;
571583
if (_desc_result.columns[i].columnDesc.__isset.columnScale &&
572-
data_type == TPrimitiveType::DATETIMEV2) {
584+
(data_type == TPrimitiveType::DATETIMEV2 ||
585+
data_type == TPrimitiveType::TIMESTAMPTZ)) {
573586
srcs[i] = _desc_result.columns[i].columnDesc.columnScale;
574587
datas[i] = srcs.data() + i;
575588
} else {

be/src/exec/table_connector.cpp

Lines changed: 0 additions & 170 deletions
Original file line numberDiff line numberDiff line change
@@ -99,175 +99,5 @@ std::u16string TableConnector::utf8_to_u16string(const char* first, const char*
9999
return result;
100100
}
101101

102-
Status TableConnector::convert_column_data(const vectorized::ColumnPtr& column_ptr,
103-
const vectorized::DataTypePtr& type_ptr,
104-
const vectorized::DataTypePtr& type, size_t row,
105-
TOdbcTableType::type table_type) {
106-
auto extra_convert_func = [&](const std::string_view& str, const bool& is_date) -> void {
107-
if (table_type == TOdbcTableType::ORACLE || table_type == TOdbcTableType::SAP_HANA) {
108-
//if is ORACLE and date type, insert into need convert
109-
if (is_date) {
110-
fmt::format_to(_insert_stmt_buffer, "to_date('{}','yyyy-mm-dd')", str);
111-
} else {
112-
fmt::format_to(_insert_stmt_buffer, "to_date('{}','yyyy-mm-dd hh24:mi:ss')", str);
113-
}
114-
} else if (table_type == TOdbcTableType::POSTGRESQL) {
115-
fmt::format_to(_insert_stmt_buffer, "'{}'::date", str);
116-
} else if (table_type == TOdbcTableType::SQLSERVER) {
117-
// Values in sqlserver should be enclosed by single quotes
118-
fmt::format_to(_insert_stmt_buffer, "'{}'", str);
119-
} else {
120-
fmt::format_to(_insert_stmt_buffer, "\"{}\"", str);
121-
}
122-
};
123-
const vectorized::IColumn* column = column_ptr.get();
124-
if (type_ptr->is_nullable()) {
125-
const auto* nullable_column =
126-
assert_cast<const vectorized::ColumnNullable*>(column_ptr.get());
127-
if (nullable_column->is_null_at(row)) {
128-
fmt::format_to(_insert_stmt_buffer, "{}", "NULL");
129-
return Status::OK();
130-
}
131-
column = nullable_column->get_nested_column_ptr().get();
132-
} else {
133-
column = column_ptr.get();
134-
}
135-
auto [item, size] = column->get_data_at(row);
136-
switch (type->get_primitive_type()) {
137-
case TYPE_BOOLEAN:
138-
if (table_type == TOdbcTableType::SAP_HANA) {
139-
fmt::format_to(_insert_stmt_buffer, "{}", *reinterpret_cast<const bool*>(item));
140-
} else {
141-
fmt::format_to(_insert_stmt_buffer, "{}", *reinterpret_cast<const int8_t*>(item));
142-
}
143-
break;
144-
case TYPE_TINYINT:
145-
fmt::format_to(_insert_stmt_buffer, "{}", *reinterpret_cast<const int8_t*>(item));
146-
break;
147-
case TYPE_SMALLINT:
148-
fmt::format_to(_insert_stmt_buffer, "{}", *reinterpret_cast<const int16_t*>(item));
149-
break;
150-
case TYPE_INT:
151-
fmt::format_to(_insert_stmt_buffer, "{}", *reinterpret_cast<const int32_t*>(item));
152-
break;
153-
case TYPE_BIGINT:
154-
fmt::format_to(_insert_stmt_buffer, "{}", *reinterpret_cast<const int64_t*>(item));
155-
break;
156-
case TYPE_FLOAT:
157-
fmt::format_to(_insert_stmt_buffer, "{}", *reinterpret_cast<const float*>(item));
158-
break;
159-
case TYPE_DOUBLE:
160-
fmt::format_to(_insert_stmt_buffer, "{}", *reinterpret_cast<const double*>(item));
161-
break;
162-
case TYPE_DATE: {
163-
VecDateTimeValue value = binary_cast<int64_t, doris::VecDateTimeValue>(*(int64_t*)item);
164-
165-
char buf[64];
166-
char* pos = value.to_string(buf);
167-
std::string_view str(buf, pos - buf - 1);
168-
extra_convert_func(str, true);
169-
break;
170-
}
171-
case TYPE_DATETIME: {
172-
VecDateTimeValue value = binary_cast<int64_t, doris::VecDateTimeValue>(*(int64_t*)item);
173-
174-
char buf[64];
175-
char* pos = value.to_string(buf);
176-
std::string_view str(buf, pos - buf - 1);
177-
extra_convert_func(str, false);
178-
break;
179-
}
180-
case TYPE_DATEV2: {
181-
DateV2Value<DateV2ValueType> value =
182-
binary_cast<uint32_t, DateV2Value<DateV2ValueType>>(*(int32_t*)item);
183-
184-
char buf[64];
185-
char* pos = value.to_string(buf);
186-
std::string str(buf, pos - buf - 1);
187-
extra_convert_func(str, true);
188-
break;
189-
}
190-
case TYPE_DATETIMEV2: {
191-
DateV2Value<DateTimeV2ValueType> value =
192-
binary_cast<uint64_t, DateV2Value<DateTimeV2ValueType>>(*(int64_t*)item);
193-
194-
char buf[64];
195-
char* pos = value.to_string(buf, type->get_scale());
196-
std::string str(buf, pos - buf - 1);
197-
extra_convert_func(str, false);
198-
break;
199-
}
200-
case TYPE_VARCHAR:
201-
case TYPE_CHAR:
202-
case TYPE_STRING: {
203-
// for oracle/pg database string must be '
204-
if (table_type == TOdbcTableType::ORACLE || table_type == TOdbcTableType::POSTGRESQL ||
205-
table_type == TOdbcTableType::SAP_HANA || table_type == TOdbcTableType::MYSQL ||
206-
table_type == TOdbcTableType::CLICKHOUSE || table_type == TOdbcTableType::SQLSERVER) {
207-
fmt::format_to(_insert_stmt_buffer, "'{}'", fmt::basic_string_view(item, size));
208-
} else {
209-
fmt::format_to(_insert_stmt_buffer, "\"{}\"", fmt::basic_string_view(item, size));
210-
}
211-
break;
212-
}
213-
case TYPE_ARRAY: {
214-
auto& arr_nested = reinterpret_cast<const vectorized::ColumnArray*>(column)->get_data_ptr();
215-
auto& arr_offset = reinterpret_cast<const vectorized::ColumnArray*>(column)->get_offsets();
216-
auto array_type = remove_nullable(type_ptr);
217-
auto nested_type =
218-
reinterpret_cast<const vectorized::DataTypeArray&>(*array_type).get_nested_type();
219-
220-
//for doris、CK insert into ---> []
221-
//for PG insert into ---> ARRAY[]
222-
if (table_type == TOdbcTableType::POSTGRESQL) {
223-
fmt::format_to(_insert_stmt_buffer, "{}", "ARRAY[");
224-
} else if (table_type == TOdbcTableType::CLICKHOUSE ||
225-
table_type == TOdbcTableType::MYSQL) {
226-
fmt::format_to(_insert_stmt_buffer, "{}", "[");
227-
}
228-
bool first_value = true;
229-
for (auto idx = arr_offset[row - 1]; idx < arr_offset[row]; ++idx) {
230-
if (first_value == false) {
231-
fmt::format_to(_insert_stmt_buffer, "{}", ", ");
232-
}
233-
if (arr_nested->is_null_at(idx)) {
234-
fmt::format_to(_insert_stmt_buffer, "{}", "NULL");
235-
} else {
236-
RETURN_IF_ERROR(convert_column_data(arr_nested, nested_type,
237-
assert_cast<const vectorized::DataTypeArray*>(
238-
vectorized::remove_nullable(type).get())
239-
->get_nested_type(),
240-
idx, table_type));
241-
}
242-
first_value = false;
243-
}
244-
fmt::format_to(_insert_stmt_buffer, "{}", "]");
245-
break;
246-
}
247-
case TYPE_DECIMALV2: {
248-
DecimalV2Value value = *(DecimalV2Value*)(item);
249-
fmt::format_to(_insert_stmt_buffer, "{}", value.to_string());
250-
break;
251-
}
252-
case TYPE_DECIMAL32:
253-
case TYPE_DECIMAL64:
254-
case TYPE_DECIMAL128I:
255-
case TYPE_DECIMAL256: {
256-
auto decimal_type = remove_nullable(type_ptr);
257-
auto val = decimal_type->to_string(*column, row);
258-
fmt::format_to(_insert_stmt_buffer, "{}", val);
259-
break;
260-
}
261-
case TYPE_LARGEINT: {
262-
fmt::format_to(_insert_stmt_buffer, "{}", *reinterpret_cast<const __int128*>(item));
263-
break;
264-
}
265-
default: {
266-
return Status::InternalError("can't convert this type to mysql type. type = {}",
267-
type->get_name());
268-
}
269-
}
270-
return Status::OK();
271-
}
272102
#include "common/compile_check_end.h"
273103
} // namespace doris

be/src/exec/table_connector.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,6 @@ class TableConnector {
7171

7272
std::u16string utf8_to_u16string(const char* first, const char* last);
7373

74-
Status convert_column_data(const vectorized::ColumnPtr& column_ptr,
75-
const vectorized::DataTypePtr& type_ptr,
76-
const vectorized::DataTypePtr& type, size_t row,
77-
TOdbcTableType::type table_type);
78-
7974
// Default max buffer size use in insert to: 50MB, normally a batch is smaller than the size
8075
static constexpr uint32_t INSERT_BUFFER_SIZE = 1024l * 1024 * 50;
8176

be/src/exec/tablet_info.cpp

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
// NOLINTNEXTLINE(unused-includes)
5454
#include "vec/exprs/vexpr_context.h" // IWYU pragma: keep
5555
#include "vec/exprs/vliteral.h"
56+
#include "vec/functions/cast/cast_to_timestamptz.h"
5657
#include "vec/runtime/vdatetime_value.h"
5758

5859
namespace doris {
@@ -559,9 +560,10 @@ static Status _create_partition_key(const TExprNode& t_expr, BlockRow* part_key,
559560
//TODO: use assert_cast before insert_data
560561
switch (t_expr.node_type) {
561562
case TExprNodeType::DATE_LITERAL: {
562-
if (vectorized::DataTypeFactory::instance()
563-
.create_data_type(t_expr.type)
564-
->get_primitive_type() == TYPE_DATEV2) {
563+
auto primitive_type = vectorized::DataTypeFactory::instance()
564+
.create_data_type(t_expr.type)
565+
->get_primitive_type();
566+
if (primitive_type == TYPE_DATEV2) {
565567
DateV2Value<DateV2ValueType> dt;
566568
if (!dt.from_date_str(t_expr.date_literal.value.c_str(),
567569
t_expr.date_literal.value.size())) {
@@ -570,9 +572,7 @@ static Status _create_partition_key(const TExprNode& t_expr, BlockRow* part_key,
570572
return Status::InternalError(ss.str());
571573
}
572574
column->insert_data(reinterpret_cast<const char*>(&dt), 0);
573-
} else if (vectorized::DataTypeFactory::instance()
574-
.create_data_type(t_expr.type)
575-
->get_primitive_type() == TYPE_DATETIMEV2) {
575+
} else if (primitive_type == TYPE_DATETIMEV2) {
576576
DateV2Value<DateTimeV2ValueType> dt;
577577
const int32_t scale =
578578
t_expr.type.types.empty() ? -1 : t_expr.type.types.front().scalar_type.scale;
@@ -583,6 +583,21 @@ static Status _create_partition_key(const TExprNode& t_expr, BlockRow* part_key,
583583
return Status::InternalError(ss.str());
584584
}
585585
column->insert_data(reinterpret_cast<const char*>(&dt), 0);
586+
} else if (primitive_type == TYPE_TIMESTAMPTZ) {
587+
TimestampTzValue res;
588+
vectorized::CastParameters params {.status = Status::OK(), .is_strict = true};
589+
const int32_t scale =
590+
t_expr.type.types.empty() ? -1 : t_expr.type.types.front().scalar_type.scale;
591+
if (!vectorized::CastToTimstampTz::from_string(
592+
{t_expr.date_literal.value.c_str(), t_expr.date_literal.value.size()}, res,
593+
params, nullptr, scale)) [[unlikely]] {
594+
std::stringstream ss;
595+
ss << "invalid timestamptz literal in partition column, value="
596+
<< t_expr.date_literal;
597+
return Status::InternalError(ss.str());
598+
} else {
599+
column->insert_data(reinterpret_cast<const char*>(&res), 0);
600+
}
586601
} else {
587602
// TYPE_DATE (DATEV1) or TYPE_DATETIME (DATETIMEV1)
588603
VecDateTimeValue dt;

be/src/exprs/bloom_filter_func_impl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ struct fixed_len_to_uint32_v2 {
4444
} else if constexpr (std::is_same_v<T, DateV2Value<DateTimeV2ValueType>>) {
4545
return uint32_t(HashCRC32<DateV2Value<DateTimeV2ValueType>::underlying_value>()(
4646
value.to_date_int_val()));
47+
} else if constexpr (std::is_same_v<T, TimestampTzValue>) {
48+
return uint32_t(HashCRC32<typename T::underlying_value>()(value.to_date_int_val()));
4749
} else if constexpr (vectorized::IsDecimalNumber<T>) {
4850
return uint32_t(HashCRC32<typename T::NativeType>()(value.value));
4951
} else {

be/src/exprs/create_predicate_function.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ class PredicateFunctionCreator {
103103
M(TYPE_DATETIME) \
104104
M(TYPE_DATEV2) \
105105
M(TYPE_DATETIMEV2) \
106+
M(TYPE_TIMESTAMPTZ) \
106107
M(TYPE_CHAR) \
107108
M(TYPE_VARCHAR) \
108109
M(TYPE_STRING) \

0 commit comments

Comments
 (0)