Skip to content

Commit de5c8fa

Browse files
committed
update
1 parent 3cb5969 commit de5c8fa

File tree

9 files changed

+118
-27
lines changed

9 files changed

+118
-27
lines changed

be/src/vec/exec/format/parquet/parquet_column_convert.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -269,9 +269,16 @@ std::unique_ptr<PhysicalToLogicalConverter> PhysicalToLogicalConverter::get_conv
269269
physical_converter = std::make_unique<ConsistentPhysicalConverter>();
270270
}
271271
} else if (src_logical_primitive == TYPE_TIMESTAMPTZ) {
272-
DCHECK(src_physical_type == tparquet::Type::INT64) << src_physical_type;
273-
DCHECK(parquet_schema.logicalType.__isset.TIMESTAMP) << parquet_schema.name;
274-
physical_converter = std::make_unique<Int64ToTimestampTz>();
272+
if (src_physical_type == tparquet::Type::INT96) {
273+
physical_converter = std::make_unique<Int96toTimestampTz>();
274+
} else if (src_physical_type == tparquet::Type::INT64) {
275+
DCHECK(src_physical_type == tparquet::Type::INT64) << src_physical_type;
276+
DCHECK(parquet_schema.logicalType.__isset.TIMESTAMP) << parquet_schema.name;
277+
physical_converter = std::make_unique<Int64ToTimestampTz>();
278+
} else {
279+
physical_converter =
280+
std::make_unique<UnsupportedConverter>(src_physical_type, src_logical_type);
281+
}
275282
} else {
276283
physical_converter =
277284
std::make_unique<UnsupportedConverter>(src_physical_type, src_logical_type);

be/src/vec/exec/format/parquet/parquet_column_convert.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,32 @@ struct Int96toTimestamp : public PhysicalToLogicalConverter {
726726
return Status::OK();
727727
}
728728
};
729+
730+
struct Int96toTimestampTz : public PhysicalToLogicalConverter {
731+
Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
732+
ColumnPtr src_col = remove_nullable(src_physical_col);
733+
MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable();
734+
735+
size_t rows = src_col->size() / sizeof(ParquetInt96);
736+
const auto& src_data = assert_cast<const ColumnInt8*>(src_col.get())->get_data();
737+
auto* ParquetInt96_data = (ParquetInt96*)src_data.data();
738+
size_t start_idx = dst_col->size();
739+
dst_col->resize(start_idx + rows);
740+
auto& data = assert_cast<ColumnTimeStampTz*>(dst_col.get())->get_data();
741+
static const cctz::time_zone utc = cctz::utc_time_zone();
742+
743+
for (int i = 0; i < rows; i++) {
744+
ParquetInt96 src_cell_data = ParquetInt96_data[i];
745+
auto& dst_value =
746+
reinterpret_cast<DateV2Value<DateTimeV2ValueType>&>(data[start_idx + i]);
747+
748+
int64_t timestamp_with_micros = src_cell_data.to_timestamp_micros();
749+
dst_value.from_unixtime(timestamp_with_micros / 1000000, utc);
750+
dst_value.set_microsecond(timestamp_with_micros % 1000000);
751+
}
752+
return Status::OK();
753+
}
754+
};
729755
#include "common/compile_check_end.h"
730756

731757
} // namespace doris::vectorized::parquet

be/src/vec/exec/format/parquet/schema_desc.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -232,9 +232,15 @@ std::pair<DataTypePtr, bool> FieldDescriptor::get_doris_type(
232232
ans.first = DataTypeFactory::instance().create_data_type(TYPE_BIGINT, nullable);
233233
break;
234234
case tparquet::Type::INT96:
235-
// in most cases, it's a nano timestamp
236-
ans.first =
237-
DataTypeFactory::instance().create_data_type(TYPE_DATETIMEV2, nullable, 0, 6);
235+
if (_enable_mapping_timestamp_tz) {
236+
// treat INT96 as TIMESTAMPTZ
237+
ans.first = DataTypeFactory::instance().create_data_type(TYPE_TIMESTAMPTZ, nullable,
238+
0, 6);
239+
} else {
240+
// in most cases, it's a nano timestamp
241+
ans.first = DataTypeFactory::instance().create_data_type(TYPE_DATETIMEV2, nullable,
242+
0, 6);
243+
}
238244
break;
239245
case tparquet::Type::FLOAT:
240246
ans.first = DataTypeFactory::instance().create_data_type(TYPE_FLOAT, nullable);

docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run25.sql

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,6 @@ INSERT INTO test_ice_timestamp_tz_parquet VALUES (2, TIMESTAMP_LTZ '2025-06-01 1
3737
INSERT INTO test_ice_timestamp_tz_parquet VALUES (3, TIMESTAMP_LTZ '2025-12-31 23:59:59.999999');
3838
INSERT INTO test_ice_timestamp_tz_parquet VALUES (4, NULL);
3939

40-
SELECT * FROM test_ice_timestamp_tz_orc;
41-
SELECT * FROM test_ice_timestamp_tz_parquet;
42-
4340
DROP TABLE IF EXISTS test_ice_timestamp_tz_orc_write_with_mapping;
4441
DROP TABLE IF EXISTS test_ice_timestamp_tz_parquet_write_with_mapping;
4542

docker/thirdparties/docker-compose/mysql/init/04-insert.sql

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1216,10 +1216,11 @@ INSERT INTO test_varbinary_db.`test_varbinary_udf` VALUES (1, X'48656C6C6F20576F
12161216

12171217

12181218

1219-
1219+
SET @original_time_zone = @@session.time_zone;
12201220
SET time_zone = '+08:00';
12211221
INSERT INTO test_timestamp_tz_db.ts_test VALUES (1,'2025-01-01 12:00:00','2025-01-01 12:00:00');
12221222
INSERT INTO test_timestamp_tz_db.ts_test VALUES (2,NULL,NULL);
1223+
SET time_zone = @original_time_zone;
12231224

12241225
ANALYZE TABLE Doris.doris;
12251226
ANALYZE TABLE Doris.DORIS;

docker/thirdparties/docker-compose/oracle/init/04-insert.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ INSERT INTO doris_test.varbinary_test VALUES (3, 'normal', HEXTORAW('48656C6C6F2
163163
ALTER SESSION SET TIME_ZONE = '+08:00';
164164
INSERT INTO doris_test.ltz_test VALUES (1,TIMESTAMP '2025-01-01 12:00:00 +08:00');
165165
INSERT INTO doris_test.ltz_test VALUES (NULL,NULL);
166+
ALTER SESSION SET TIME_ZONE = DBTIMEZONE;
166167

167168

168169
commit;

docker/thirdparties/docker-compose/postgresql/init/04-insert.sql

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3024,4 +3024,5 @@ insert into catalog_pg_test.extreme_test_multi_block select * from catalog_pg_te
30243024

30253025
SET TIME ZONE 'Asia/Shanghai';
30263026
INSERT INTO test_timestamp_tz_db.ts_test VALUES (1,'2025-01-01 12:00:00+08','2025-01-01 12:00:00');
3027-
INSERT INTO test_timestamp_tz_db.ts_test VALUES (2,NULL,NULL);
3027+
INSERT INTO test_timestamp_tz_db.ts_test VALUES (2,NULL,NULL);
3028+
SET TIME ZONE DEFAULT;

regression-test/data/external_table_p0/iceberg/test_iceberg_export_timestamp_tz.out

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,24 +8,44 @@ id int Yes true \N
88
ts_tz timestamptz(6) Yes true \N WITH_TIMEZONE
99

1010
-- !select_tvf0 --
11-
1 2025-01-01T00:00
12-
2 2025-06-01T12:34:56.789
13-
3 2025-12-31T23:59:59.999999
11+
1 2025-01-01 00:00:00+08:00
12+
2 2025-06-01 12:34:56+08:00
13+
3 2025-12-31 23:59:59+08:00
1414
4 \N
1515

1616
-- !select_tvf0_desc --
1717
id int Yes false \N NONE
18-
ts_tz datetime(6) Yes false \N NONE
18+
ts_tz timestamptz Yes false \N NONE
19+
20+
-- !select_tvf0_false --
21+
1 2025-01-01 00:00:00+08:00
22+
2 2025-06-01 12:34:56+08:00
23+
3 2025-12-31 23:59:59+08:00
24+
4 \N
25+
26+
-- !select_tvf0_desc_false --
27+
id int Yes false \N NONE
28+
ts_tz timestamptz Yes false \N NONE
1929

2030
-- !select_tvf1 --
21-
1 2025-01-01T00:00
22-
2 2025-06-01T12:34:56.789
23-
3 2025-12-31T23:59:59.999999
31+
1 2025-01-01 00:00:00+08:00
32+
2 2025-06-01 12:34:56+08:00
33+
3 2025-12-31 23:59:59+08:00
2434
4 \N
2535

2636
-- !select_tvf1_desc --
2737
id int Yes false \N NONE
28-
ts_tz datetime(6) Yes false \N NONE
38+
ts_tz timestamptz Yes false \N NONE
39+
40+
-- !select_tvf1_false --
41+
1 2025-01-01 00:00:00+08:00
42+
2 2025-06-01 12:34:56+08:00
43+
3 2025-12-31 23:59:59+08:00
44+
4 \N
45+
46+
-- !select_tvf1_desc_false --
47+
id int Yes false \N NONE
48+
ts_tz timestamptz Yes false \N NONE
2949

3050
-- !select_tvf2 --
3151
1 2025-01-01 00:00:00+08:00

regression-test/suites/external_table_p0/iceberg/test_iceberg_export_timestamp_tz.groovy

Lines changed: 40 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ suite("test_iceberg_export_timestamp_tz", "external,hive,external_docker") {
4141
def outfile_path = "/user/doris/tmp_data"
4242
def uri = "${defaultFS}" + "${outfile_path}/exp_"
4343

44-
def outfile_to_HDFS = {format,export_table_name ->
44+
def outfile_to_HDFS = {format,export_table_name, enable_int96_timestamps ->
4545
// select ... into outfile ...
4646
def uuid = UUID.randomUUID().toString()
4747
outfile_path = "/user/doris/tmp_data/${uuid}"
@@ -53,7 +53,8 @@ suite("test_iceberg_export_timestamp_tz", "external,hive,external_docker") {
5353
FORMAT AS ${format}
5454
PROPERTIES (
5555
"fs.defaultFS"="${defaultFS}",
56-
"hadoop.username" = "${hdfsUserName}"
56+
"hadoop.username" = "${hdfsUserName}",
57+
"enable_int96_timestamps"="${enable_int96_timestamps}"
5758
);
5859
"""
5960
logger.info("outfile success path: " + res[0][3]);
@@ -83,16 +84,15 @@ suite("test_iceberg_export_timestamp_tz", "external,hive,external_docker") {
8384
"enable.mapping.timestamp_tz"="true"
8485
);"""
8586

86-
8787
sql """switch ${catalog_name_with_export}"""
8888
sql """use ${db_name}"""
8989
order_qt_select_desc_orc """ desc test_ice_timestamp_tz_orc; """
9090
order_qt_select_desc_parquet """ desc test_ice_timestamp_tz_parquet; """
91-
// TODO: seems write to parquet with timestamp_tz has some problem
91+
9292
def format = "parquet"
9393
def export_table_name = "test_ice_timestamp_tz_parquet"
9494

95-
def outfile_url0 = outfile_to_HDFS(format, export_table_name)
95+
def outfile_url0 = outfile_to_HDFS(format, export_table_name, "true")
9696
order_qt_select_tvf0 """ select * from HDFS(
9797
"uri" = "${outfile_url0}.${format}",
9898
"hadoop.username" = "${hdfsUserName}",
@@ -108,9 +108,25 @@ suite("test_iceberg_export_timestamp_tz", "external,hive,external_docker") {
108108
"format" = "${format}");
109109
"""
110110

111+
def outfile_url0_false = outfile_to_HDFS(format, export_table_name, "false")
112+
order_qt_select_tvf0_false """ select * from HDFS(
113+
"uri" = "${outfile_url0}.${format}",
114+
"hadoop.username" = "${hdfsUserName}",
115+
"enable_mapping_timestamp_tz"="true",
116+
"enable_mapping_varbinary"="true",
117+
"format" = "${format}");
118+
"""
119+
order_qt_select_tvf0_desc_false """ desc function HDFS(
120+
"uri" = "${outfile_url0}.${format}",
121+
"hadoop.username" = "${hdfsUserName}",
122+
"enable_mapping_timestamp_tz"="true",
123+
"enable_mapping_varbinary"="true",
124+
"format" = "${format}");
125+
"""
126+
111127
format = "parquet"
112128
export_table_name = "test_ice_timestamp_tz_orc"
113-
def outfile_url1 = outfile_to_HDFS(format, export_table_name)
129+
def outfile_url1 = outfile_to_HDFS(format, export_table_name, "true")
114130
order_qt_select_tvf1 """ select * from HDFS(
115131
"uri" = "${outfile_url1}.${format}",
116132
"hadoop.username" = "${hdfsUserName}",
@@ -126,9 +142,25 @@ suite("test_iceberg_export_timestamp_tz", "external,hive,external_docker") {
126142
"format" = "${format}");
127143
"""
128144

145+
def outfile_url1_false = outfile_to_HDFS(format, export_table_name, "false")
146+
order_qt_select_tvf1_false """ select * from HDFS(
147+
"uri" = "${outfile_url1}.${format}",
148+
"hadoop.username" = "${hdfsUserName}",
149+
"enable_mapping_timestamp_tz"="true",
150+
"enable_mapping_varbinary"="true",
151+
"format" = "${format}");
152+
"""
153+
order_qt_select_tvf1_desc_false """ desc function HDFS(
154+
"uri" = "${outfile_url1}.${format}",
155+
"hadoop.username" = "${hdfsUserName}",
156+
"enable_mapping_timestamp_tz"="true",
157+
"enable_mapping_varbinary"="true",
158+
"format" = "${format}");
159+
"""
160+
129161
format = "orc"
130162
export_table_name = "test_ice_timestamp_tz_parquet"
131-
def outfile_url2 = outfile_to_HDFS(format, export_table_name)
163+
def outfile_url2 = outfile_to_HDFS(format, export_table_name, "true")
132164
order_qt_select_tvf2 """ select * from HDFS(
133165
"uri" = "${outfile_url2}.${format}",
134166
"hadoop.username" = "${hdfsUserName}",
@@ -147,7 +179,7 @@ suite("test_iceberg_export_timestamp_tz", "external,hive,external_docker") {
147179

148180
format = "orc"
149181
export_table_name = "test_ice_timestamp_tz_orc"
150-
def outfile_url3 = outfile_to_HDFS(format, export_table_name)
182+
def outfile_url3 = outfile_to_HDFS(format, export_table_name, "true")
151183
order_qt_select_tvf3 """ select * from HDFS(
152184
"uri" = "${outfile_url3}.${format}",
153185
"hadoop.username" = "${hdfsUserName}",

0 commit comments

Comments
 (0)