diff --git a/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp b/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp index 91002f0ff1eff1..2e1383671ef7d6 100644 --- a/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp @@ -157,6 +157,11 @@ Status VariantColumnReader::_create_sparse_merge_reader(ColumnIteratorUPtr* iter // Build substream reader tree for merging subcolumns into sparse column SubstreamReaderTree src_subcolumns_for_sparse; for (const auto& subcolumn_reader : *_subcolumns_meta_info) { + // NOTE: Skip the root node (empty parts). Do NOT skip "empty key" subcolumns where + // path.get_path() may also be "" but parts are not empty. Otherwise v[''] data will be lost. + if (subcolumn_reader->path.empty()) { + continue; + } const auto& path = subcolumn_reader->path.get_path(); if (path_set_info.sparse_path_set.find(StringRef(path)) == path_set_info.sparse_path_set.end()) { diff --git a/regression-test/data/variant_p0/test_variant_compaction_empty_path_bug.out b/regression-test/data/variant_p0/test_variant_compaction_empty_path_bug.out new file mode 100644 index 00000000000000..5a11b56fdc7b6e --- /dev/null +++ b/regression-test/data/variant_p0/test_variant_compaction_empty_path_bug.out @@ -0,0 +1,80 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !before_compaction -- +1 {"a":1,"b":2,"c":3} +2 {"a":10,"b":20,"c":30} +3 {"a":100,"b":200,"c":300} +4 {"a":1,"d":4,"e":5,"f":6} +5 {"b":2,"d":40,"e":50,"f":60} +6 {"c":3,"d":400,"e":500,"f":600} +7 {"a":7,"g":70,"h":700} +8 {"b":8,"g":80,"h":800} +9 {"c":9,"g":90,"h":900} +10 {"":"empty_key_value","a":1000} +11 {"":"empty_key_value2","b":2000} +12 {"":"empty_key_value3","c":3000} +13 {"a":13,"d":130} +14 {"b":14,"e":140} +15 {"c":15,"f":150} +16 {"d":16,"g":160} +17 {"e":17,"h":170} +18 {"a":180,"f":18} + +-- !col_a_before -- +1 1 +2 10 +3 100 +4 1 +7 7 +10 1000 +13 13 +18 180 + +-- !col_d_before -- +4 4 +5 40 +6 400 +13 130 +16 16 + +-- !after_compaction -- +1 {"a":1,"b":2,"c":3} +2 {"a":10,"b":20,"c":30} +3 {"a":100,"b":200,"c":300} +4 {"a":1,"d":4,"e":5,"f":6} +5 {"b":2,"d":40,"e":50,"f":60} +6 {"c":3,"d":400,"e":500,"f":600} +7 {"a":7,"g":70,"h":700} +8 {"b":8,"g":80,"h":800} +9 {"c":9,"g":90,"h":900} +10 {"":"empty_key_value","a":1000} +11 {"":"empty_key_value2","b":2000} +12 {"":"empty_key_value3","c":3000} +13 {"a":13,"d":130} +14 {"b":14,"e":140} +15 {"c":15,"f":150} +16 {"d":16,"g":160} +17 {"e":17,"h":170} +18 {"a":180,"f":18} + +-- !col_a_after -- +1 1 +2 10 +3 100 +4 1 +7 7 +10 1000 +13 13 +18 180 + +-- !col_d_after -- +4 4 +5 40 +6 400 +13 130 +16 16 + +-- !empty_key -- +10 empty_key_value +11 empty_key_value2 +12 empty_key_value3 + diff --git a/regression-test/suites/variant_p0/test_variant_compaction_empty_path_bug.groovy b/regression-test/suites/variant_p0/test_variant_compaction_empty_path_bug.groovy new file mode 100644 index 00000000000000..66af55ac3b64ff --- /dev/null +++ b/regression-test/suites/variant_p0/test_variant_compaction_empty_path_bug.groovy @@ -0,0 +1,142 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_variant_compaction_empty_path_bug", "nonConcurrent") { + def tableName = "test_variant_empty_path_compaction" + + try { + sql "DROP TABLE IF EXISTS ${tableName}" + + // Create table with variant column + // Set variant_max_subcolumns_count to 3, so any columns beyond the top 3 will become sparse + // This triggers the sparse column merge logic during compaction + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} ( + k bigint, + v variant< properties("variant_max_subcolumns_count" = "3")> + ) + DUPLICATE KEY(`k`) + DISTRIBUTED BY HASH(k) BUCKETS 1 + properties("replication_num" = "1", "disable_auto_compaction" = "true"); + """ + + logger.info("Testing variant compaction with empty path in sparse columns") + + // Insert data with multiple different subcolumns + // Strategy: Insert 6+ different subcolumns to exceed the limit of 3 + // The most frequently used 3 columns will be materialized, others will be sparse + + // First batch: establish column usage patterns + sql """INSERT INTO ${tableName} VALUES + (1, '{"a": 1, "b": 2, "c": 3}'), + (2, '{"a": 10, "b": 20, "c": 30}'), + (3, '{"a": 100, "b": 200, "c": 300}') + """ + + // Second batch: introduce additional columns that will become sparse + sql """INSERT INTO ${tableName} VALUES + (4, '{"a": 1, "d": 4, "e": 5, "f": 6}'), + (5, '{"b": 2, "d": 40, "e": 50, "f": 60}'), + (6, '{"c": 3, "d": 400, "e": 500, "f": 600}') + """ + + // Third batch: more sparse columns + sql """INSERT INTO ${tableName} VALUES + (7, '{"a": 7, "g": 70, "h": 700}'), + (8, '{"b": 8, "g": 80, "h": 800}'), + (9, '{"c": 9, "g": 90, "h": 900}') + """ + + // Fourth batch: edge case - JSON with empty key + // This creates a scenario where statistics might contain empty path + sql """INSERT INTO ${tableName} VALUES + (10, '{"": "empty_key_value", "a": 1000}'), + (11, '{"": "empty_key_value2", "b": 2000}'), + (12, '{"": "empty_key_value3", "c": 3000}') + """ + + // Additional inserts to create more rowsets for compaction + sql """INSERT INTO ${tableName} VALUES + (13, '{"a": 13, "d": 130}'), + (14, '{"b": 14, "e": 140}'), + (15, '{"c": 15, "f": 150}') + """ + + sql """INSERT INTO ${tableName} VALUES + (16, '{"d": 16, "g": 160}'), + (17, '{"e": 17, "h": 170}'), + (18, '{"f": 18, "a": 180}') + """ + + // Verify data before compaction + def count_before = sql "SELECT COUNT(*) FROM ${tableName}" + logger.info("Row count before compaction: ${count_before[0][0]}") + assertEquals(18, count_before[0][0]) + + // Query to verify data integrity before compaction + qt_before_compaction "SELECT k, cast(v as string) FROM ${tableName} ORDER BY k" + + // Test specific column access + qt_col_a_before "SELECT k, v['a'] FROM ${tableName} WHERE v['a'] IS NOT NULL ORDER BY k" + qt_col_d_before "SELECT k, v['d'] FROM ${tableName} WHERE v['d'] IS NOT NULL ORDER BY k" + + logger.info("Data inserted, now triggering compaction...") + logger.info("Expected behavior: columns a,b,c materialized, d,e,f,g,h as sparse") + logger.info("Bug scenario: if root node (empty path) is not skipped in _create_sparse_merge_reader") + logger.info(" it will call VariantColumnReader::new_iterator with 3 params") + logger.info(" which returns NOT_IMPLEMENTED_ERROR") + + // Trigger compaction - this may reproduce the NOT_IMPLEMENTED_ERROR bug + def tablets = sql_return_maparray "SHOW TABLETS FROM ${tableName}" + + try { + trigger_and_wait_compaction(tableName, "cumulative") + logger.info("Compaction completed successfully") + + // Verify data after compaction + def count_after = sql "SELECT COUNT(*) FROM ${tableName}" + logger.info("Row count after compaction: ${count_after[0][0]}") + assertEquals(18, count_after[0][0]) + + // Query to verify data integrity after compaction + qt_after_compaction "SELECT k, cast(v as string) FROM ${tableName} ORDER BY k" + + // Test specific column access after compaction + qt_col_a_after "SELECT k, v['a'] FROM ${tableName} WHERE v['a'] IS NOT NULL ORDER BY k" + qt_col_d_after "SELECT k, v['d'] FROM ${tableName} WHERE v['d'] IS NOT NULL ORDER BY k" + + // Test empty key access if supported + qt_empty_key "SELECT k, v[''] FROM ${tableName} WHERE v[''] IS NOT NULL ORDER BY k" + + } catch (Exception e) { + logger.error("Compaction failed with error: ${e.getMessage()}", e) + + // Check if the error is the expected NOT_IMPLEMENTED_ERROR + if (e.getMessage().contains("NOT_IMPLEMENTED_ERROR") || + e.getMessage().contains("Not implemented")) { + logger.error("BUG REPRODUCED: Compaction failed with NOT_IMPLEMENTED_ERROR") + throw e + } else { + // Different error, rethrow + throw e + } + } + + } finally { + sql "DROP TABLE IF EXISTS ${tableName}" + } +}