Skip to content

Commit 943db93

Browse files
committed
fix
1 parent ded75ed commit 943db93

File tree

14 files changed

+140
-62
lines changed

14 files changed

+140
-62
lines changed

be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -671,7 +671,7 @@ Status VariantColumnWriterImpl::_process_doc_snapshot_column(
671671
if (!_tablet_column->variant_enable_doc_snapshot_mode()) {
672672
return Status::OK();
673673
}
674-
ptr->reconstruct_doc_snapshot_column();
674+
ptr->reconstruct_and_sort_doc_snapshot_column();
675675
const int bucket_num = std::max(1, _tablet_column->variant_doc_snapshot_shard_count());
676676
RETURN_IF_ERROR(
677677
_doc_snapshot_writer.init(_tablet_column, bucket_num, column_id, _opts, _opts.footer));

be/src/vec/columns/column_variant.cpp

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2572,9 +2572,41 @@ MutableColumnPtr ColumnVariant::clone() const {
25722572
return res;
25732573
}
25742574

2575-
void ColumnVariant::reconstruct_doc_snapshot_column() {
2575+
void ColumnVariant::reconstruct_and_sort_doc_snapshot_column() {
25762576
const auto& offset = serialized_doc_snapshot_column_offsets();
2577+
2578+
auto sort_map_by_row_paths = [&](const ColumnString& in_paths, const ColumnString& in_values,
2579+
const ColumnArray::Offsets64& in_offsets) -> MutableColumnPtr {
2580+
auto sorted = create_binary_column_fn();
2581+
auto& sorted_map = assert_cast<ColumnMap&>(*sorted);
2582+
auto& out_paths = assert_cast<ColumnString&>(sorted_map.get_keys());
2583+
auto& out_values = assert_cast<ColumnString&>(sorted_map.get_values());
2584+
auto& out_offsets = sorted_map.get_offsets();
2585+
out_offsets.reserve(num_rows);
2586+
2587+
for (int64_t i = 0; i < num_rows; ++i) {
2588+
size_t start = in_offsets[i - 1];
2589+
size_t end = in_offsets[i];
2590+
std::vector<std::tuple<std::string_view, size_t>> order;
2591+
order.reserve(end - start);
2592+
for (size_t j = start; j < end; ++j) {
2593+
order.emplace_back(in_paths.get_data_at(j).to_string_view(), j);
2594+
}
2595+
std::sort(order.begin(), order.end());
2596+
for (const auto& [p, j] : order) {
2597+
out_paths.insert_data(p.data(), p.size());
2598+
out_values.insert_from(in_values, j);
2599+
}
2600+
out_offsets.push_back(out_paths.size());
2601+
}
2602+
return sorted;
2603+
};
2604+
2605+
// doc snapshot column has been constructed in parse2column
2606+
// sort the column by row paths.
25772607
if (offset[num_rows - 1] != 0) {
2608+
auto [path, value] = get_doc_snapshot_data_paths_and_values();
2609+
serialized_doc_snapshot_column = sort_map_by_row_paths(*path, *value, offset);
25782610
return;
25792611
}
25802612
CHECK(is_finalized());
@@ -2608,7 +2640,9 @@ void ColumnVariant::reconstruct_doc_snapshot_column() {
26082640
}
26092641
doc_snapshot_data_offsets.push_back(doc_snapshot_data_paths.size());
26102642
}
2611-
serialized_doc_snapshot_column = std::move(doc_snapshot_column);
2643+
2644+
serialized_doc_snapshot_column = sort_map_by_row_paths(
2645+
doc_snapshot_data_paths, doc_snapshot_data_values, doc_snapshot_data_offsets);
26122646
}
26132647

26142648
bool ColumnVariant::is_doc_snapshot_mode() const {

be/src/vec/columns/column_variant.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -634,7 +634,7 @@ class ColumnVariant final : public COWHelper<IColumn, ColumnVariant> {
634634
return _max_subcolumns_count - current_subcolumns_count;
635635
}
636636

637-
void reconstruct_doc_snapshot_column();
637+
void reconstruct_and_sort_doc_snapshot_column();
638638

639639
// doc snapshot mode: only root column, and doc snapshot column is not empty
640640
bool is_doc_snapshot_mode() const;
4 Bytes
Binary file not shown.

be/test/vec/exec/format/native/native_reader_writer_test.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1189,7 +1189,7 @@ static std::string get_all_types_native_file_path() {
11891189

11901190
// Generator test: Generate native file with all types (DISABLED by default).
11911191
// Run this test manually to regenerate the test data file:
1192-
// ./run-be-ut.sh --run --filter=*DISABLED_generate_all_types_native_file*
1192+
// ./run-be-ut.sh --run --filter=*generate_all_types_native_file*
11931193
// Then copy the generated file to: be/test/data/vec/native/all_types_single_row.native
11941194
TEST_F(NativeReaderWriterTest, generate_all_types_native_file) {
11951195
// Output to current directory, user needs to copy it to test data dir
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
-- This file is automatically generated. You should know what you did if you want to edit this
2+
-- !sql --
3+
2
4+
5+
-- !sql --
6+
1 {"a":"2025-04-16","b":123.123456789012,"c":"2025-04-17 17:09:09","d":"123","e":"2025-04-19","f":"2025-04-20","g":"2025-04-21","h":"2025-04-22","i":"2025-04-23","j":"2025-04-24","k":"2025-04-25","l":"2025-04-26","m":"2025-04-27","n":"2025-04-28","o":"2025-04-29","p":"2025-04-30"} col
7+
1 {"a":"2025-04-16","b":123.123456789012,"c":"2025-04-17 17:09:09","d":"123","e":"2025-04-19","f":"2025-04-20","g":"2025-04-21","h":"2025-04-22","i":"2025-04-23","j":"2025-04-24","k":"2025-04-25","l":"2025-04-26","m":"2025-04-27","n":"2025-04-28","o":"2025-04-29","p":"2025-04-30"} col
8+
9+
-- !sql --
10+
{"__DORIS_VARIANT_DOC_SNAPSHOT__":"map"}
11+
{"__DORIS_VARIANT_DOC_SNAPSHOT__":"map"}
12+
13+
-- !sql --
14+
2
15+
16+
-- !sql --
17+
1 {"a":"2025-04-16","b":123.123456789012,"c":"2025-04-17 17:09:09","d":"123","e":"2025-04-19","f":"2025-04-20","g":"2025-04-21","h":"2025-04-22","i":"2025-04-23","j":"2025-04-24","k":"2025-04-25","l":"2025-04-26","m":"2025-04-27","n":"2025-04-28","o":"2025-04-29","p":"2025-04-30"} col
18+
1 {"a":"2025-04-16","b":123.123456789012,"c":"2025-04-17 17:09:09","d":"123","e":"2025-04-19","f":"2025-04-20","g":"2025-04-21","h":"2025-04-22","i":"2025-04-23","j":"2025-04-24","k":"2025-04-25","l":"2025-04-26","m":"2025-04-27","n":"2025-04-28","o":"2025-04-29","p":"2025-04-30"} col
19+
20+
-- !sql --
21+
{"__DORIS_VARIANT_DOC_SNAPSHOT__":"map"}
22+
{"__DORIS_VARIANT_DOC_SNAPSHOT__":"map"}
23+

regression-test/data/variant_p0/doc_snapshot/predefine/test_predefine_type_index.out

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
-- This file is automatically generated. You should know what you did if you want to edit this
22
-- !sql --
3-
{"path.decimal":"decimal64","path.int":"int","path.string":"string"}
4-
{"path.decimal":"decimal64","path.int":"int","path.string":"string"}
5-
{"path.decimal":"decimal64","path.int":"int","path.string":"string"}
6-
{"path.decimal":"decimal64","path.int":"int","path.string":"string"}
7-
{"path.decimal":"decimal64","path.int":"int","path.string":"string"}
3+
{"__DORIS_VARIANT_DOC_SNAPSHOT__":"map"}
4+
{"__DORIS_VARIANT_DOC_SNAPSHOT__":"map"}
5+
{"__DORIS_VARIANT_DOC_SNAPSHOT__":"map"}
6+
{"__DORIS_VARIANT_DOC_SNAPSHOT__":"map"}
7+
{"__DORIS_VARIANT_DOC_SNAPSHOT__":"map"}
88

99
-- !sql --
1010
1 {"path":{"decimal":123.123456789012,"int":123,"string":"hello"}}
@@ -23,21 +23,21 @@
2323
3
2424

2525
-- !sql --
26-
{"path.decimal":"decimal64","path.int":"int","path.string":"string"}
27-
{"path.decimal":"decimal64","path.int":"int","path.string":"string"}
28-
{"path.decimal":"decimal64","path.int":"int","path.string":"string"}
29-
{"path.decimal":"decimal64","path.int":"int","path.string":"string"}
30-
{"path.decimal":"decimal64","path.int":"int","path.string":"string"}
31-
{"path.decimal":"decimal64","path.int":"int","path.string":"string"}
32-
{"path.decimal":"decimal64","path.int":"int","path.string":"string"}
33-
{"path.decimal":"decimal64","path.int":"int","path.string":"string"}
34-
{"path.decimal":"decimal64","path.int":"int","path.string":"string"}
35-
{"path.decimal":"decimal64","path.int":"int","path.string":"string"}
36-
{"path.decimal":"decimal64","path.int":"int","path.string":"string"}
37-
{"path.decimal":"decimal64","path.int":"int","path.string":"string"}
38-
{"path.decimal":"decimal64","path.int":"int","path.string":"string"}
39-
{"path.decimal":"decimal64","path.int":"int","path.string":"string"}
40-
{"path.decimal":"decimal64","path.int":"int","path.string":"string"}
26+
{"__DORIS_VARIANT_DOC_SNAPSHOT__":"map"}
27+
{"__DORIS_VARIANT_DOC_SNAPSHOT__":"map"}
28+
{"__DORIS_VARIANT_DOC_SNAPSHOT__":"map"}
29+
{"__DORIS_VARIANT_DOC_SNAPSHOT__":"map"}
30+
{"__DORIS_VARIANT_DOC_SNAPSHOT__":"map"}
31+
{"__DORIS_VARIANT_DOC_SNAPSHOT__":"map"}
32+
{"__DORIS_VARIANT_DOC_SNAPSHOT__":"map"}
33+
{"__DORIS_VARIANT_DOC_SNAPSHOT__":"map"}
34+
{"__DORIS_VARIANT_DOC_SNAPSHOT__":"map"}
35+
{"__DORIS_VARIANT_DOC_SNAPSHOT__":"map"}
36+
{"__DORIS_VARIANT_DOC_SNAPSHOT__":"map"}
37+
{"__DORIS_VARIANT_DOC_SNAPSHOT__":"map"}
38+
{"__DORIS_VARIANT_DOC_SNAPSHOT__":"map"}
39+
{"__DORIS_VARIANT_DOC_SNAPSHOT__":"map"}
40+
{"__DORIS_VARIANT_DOC_SNAPSHOT__":"map"}
4141

4242
-- !sql --
4343
1 {"path":{"decimal":123.123456789012,"int":123,"string":"hello"}}

regression-test/data/variant_p0/doc_snapshot/test_compaction.out

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -161,18 +161,6 @@
161161
1999 1 {"c":1}
162162
1999 1 {"c":1}
163163

164-
-- !sql_55 --
165-
10 \N
166-
10 \N
167-
{"c":1} 1
168-
{"c":1} 1
169-
{"c":1} 1
170-
{"c":1} 1
171-
10 \N
172-
10 \N
173-
{"c":[{"a":1}]} [{"a":1}]
174-
{"c":[{"a":1}]} [{"a":1}]
175-
176164
-- !sql_1 --
177165
1 {"x":[1]}
178166
2 {"a":"1"}
@@ -264,16 +252,6 @@
264252
1029 1 {"c":1}
265253
1999 1 {"c":1}
266254

267-
-- !sql_55 --
268-
10 \N
269-
{"c":1} 1
270-
{"c":1} 1
271-
10 \N
272-
{"c":[{"a":1}]} [{"a":1}]
273-
{"c":[{"a":1}]} [{"a":1}]
274-
{"c":[{"a":1}]} [{"a":1}]
275-
{"c":1} 1
276-
277255
-- !sql_1 --
278256
1 {"x":[1]}
279257
2 {"a":"1"}
@@ -365,13 +343,3 @@
365343
1029 1 {"c":1}
366344
1999 1 {"c":1}
367345

368-
-- !sql_55 --
369-
10 \N
370-
{"c":1} 1
371-
{"c":1} 1
372-
10 \N
373-
{"c":[{"a":1}]} [{"a":1}]
374-
{"c":[{"a":1}]} [{"a":1}]
375-
{"c":[{"a":1}]} [{"a":1}]
376-
{"c":1} 1
377-

regression-test/data/variant_p0/doc_snapshot/test_outfile_csv_variant_type.out

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
1 doris1 [9,99,999]
2424
2 doris2 [8,88]
2525
3 doris3 {"a":123}
26-
4 doris4 \\N
2726
5 doris5 [1,null,2]
2827
6 doris6 {"aaaa":"111111"}
2928
7 doris7 {"bbbb":1.1111}
@@ -33,7 +32,6 @@
3332
1 doris1 [9,99,999]
3433
2 doris2 [8,88]
3534
3 doris3 {"a":123}
36-
4 doris4 \N
3735
5 doris5 [1,null,2]
3836
6 doris6 {"aaaa":"111111"}
3937
7 doris7 {"bbbb":1.1111}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
suite("predefine_schema_change_doc_snapshot", "p0"){
19+
def tableName = "test_predefine_schema_change"
20+
sql """ set default_variant_enable_typed_paths_to_sparse = false """
21+
sql """ set default_variant_enable_doc_snapshot_mode = true """
22+
sql "DROP TABLE IF EXISTS ${tableName}"
23+
sql """CREATE TABLE ${tableName} (
24+
`id` bigint NULL,
25+
`var` variant<
26+
MATCH_NAME 'a' : date,
27+
MATCH_NAME 'b' : decimal(20,12),
28+
MATCH_NAME 'c' : datetime,
29+
MATCH_NAME 'd' : string,
30+
properties("variant_max_subcolumns_count" = "2")
31+
> NULL,
32+
`col1` varchar(100) NOT NULL,
33+
INDEX idx_a_b (var) USING INVERTED PROPERTIES("field_pattern"="d", "parser"="unicode", "support_phrase" = "true") COMMENT ''
34+
) ENGINE=OLAP DUPLICATE KEY(`id`) DISTRIBUTED BY HASH(`id`)
35+
BUCKETS 1 PROPERTIES ( "replication_allocation" = "tag.location.default: 1", "disable_auto_compaction" = "true")"""
36+
sql """insert into ${tableName} values(1, '{"a": "2025-04-16", "b": 123.123456789012, "c": "2025-04-17T09:09:09Z", "d": 123, "e": "2025-04-19", "f": "2025-04-20", "g": "2025-04-21", "h": "2025-04-22", "i": "2025-04-23", "j": "2025-04-24", "k": "2025-04-25", "l": "2025-04-26", "m": "2025-04-27", "n": "2025-04-28", "o": "2025-04-29", "p": "2025-04-30"}', 'col');"""
37+
sql """insert into ${tableName} values(1, '{"a": "2025-04-16", "b": 123.123456789012, "c": "2025-04-17T09:09:09Z", "d": 123, "e": "2025-04-19", "f": "2025-04-20", "g": "2025-04-21", "h": "2025-04-22", "i": "2025-04-23", "j": "2025-04-24", "k": "2025-04-25", "l": "2025-04-26", "m": "2025-04-27", "n": "2025-04-28", "o": "2025-04-29", "p": "2025-04-30"}', 'col');"""
38+
39+
sql """ set enable_match_without_inverted_index = false """
40+
sql """ set enable_common_expr_pushdown = true """
41+
qt_sql """ select count() from ${tableName} where cast (var['d'] as string) match '123' """
42+
qt_sql """ select * from ${tableName} """
43+
qt_sql """ select variant_type(var) from ${tableName} """
44+
45+
sql """ alter table ${tableName} modify column col1 varchar(200) NULL """
46+
47+
waitForSchemaChangeDone {
48+
sql """ SHOW ALTER TABLE COLUMN WHERE TableName='${tableName}' ORDER BY createtime DESC LIMIT 1 """
49+
time 60
50+
}
51+
52+
qt_sql """ select count() from ${tableName} where cast (var['d'] as string) match '123' """
53+
qt_sql """ select * from ${tableName} """
54+
qt_sql """ select variant_type(var) from ${tableName} """
55+
56+
}

0 commit comments

Comments
 (0)