|
| 1 | +// Licensed to the Apache Software Foundation (ASF) under one |
| 2 | +// or more contributor license agreements. See the NOTICE file |
| 3 | +// distributed with this work for additional information |
| 4 | +// regarding copyright ownership. The ASF licenses this file |
| 5 | +// to you under the Apache License, Version 2.0 (the |
| 6 | +// "License"); you may not use this file except in compliance |
| 7 | +// with the License. You may obtain a copy of the License at |
| 8 | +// |
| 9 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +// |
| 11 | +// Unless required by applicable law or agreed to in writing, |
| 12 | +// software distributed under the License is distributed on an |
| 13 | +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 14 | +// KIND, either express or implied. See the License for the |
| 15 | +// specific language governing permissions and limitations |
| 16 | +// under the License. |
| 17 | + |
| 18 | +suite("test_iceberg_struct_schema_evolution", "p0,external,doris,external_docker,external_docker_doris") { |
| 19 | + String enabled = context.config.otherConfigs.get("enableIcebergTest") |
| 20 | + if (enabled == null || !enabled.equalsIgnoreCase("true")) { |
| 21 | + logger.info("disable iceberg test.") |
| 22 | + return |
| 23 | + } |
| 24 | + |
| 25 | + String rest_port = context.config.otherConfigs.get("iceberg_rest_uri_port") |
| 26 | + String minio_port = context.config.otherConfigs.get("iceberg_minio_port") |
| 27 | + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") |
| 28 | + String catalog_name = "test_iceberg_struct_schema_evolution" |
| 29 | + |
| 30 | + sql """drop catalog if exists ${catalog_name}""" |
| 31 | + sql """ |
| 32 | + CREATE CATALOG ${catalog_name} PROPERTIES ( |
| 33 | + 'type'='iceberg', |
| 34 | + 'iceberg.catalog.type'='rest', |
| 35 | + 'uri' = 'http://${externalEnvIp}:${rest_port}', |
| 36 | + "s3.access_key" = "admin", |
| 37 | + "s3.secret_key" = "password", |
| 38 | + "s3.endpoint" = "http://${externalEnvIp}:${minio_port}", |
| 39 | + "s3.region" = "us-east-1" |
| 40 | + );""" |
| 41 | + |
| 42 | + logger.info("catalog " + catalog_name + " created") |
| 43 | + sql """switch ${catalog_name};""" |
| 44 | + logger.info("switched to catalog " + catalog_name) |
| 45 | + sql """use test_struct_schema_evolution_db;""" |
| 46 | + |
| 47 | + sql """set enable_fallback_to_original_planner=false;""" |
| 48 | + |
| 49 | + // Test case: Schema evolution with nested struct fields |
| 50 | + // This test covers the bug fix for DORIS-22901 where querying a struct field |
| 51 | + // after dropping another field in the same struct would cause core dump |
| 52 | + // Note: Table and initial data are created by SQL script (run24.sql) |
| 53 | + String tableName = "test_schema_evolution_for_nested_fields_parquet" |
| 54 | + |
| 55 | + // Test 1: Describe table to verify schema |
| 56 | + qt_desc_1 """DESCRIBE ${tableName}""" |
| 57 | + |
| 58 | + // Test 2: Query all columns |
| 59 | + qt_query_all """SELECT quite_renamed_col, keep_col, drop_and_add_col, add_col, casesensitivecol, a_struct, a_partition FROM ${tableName} ORDER BY a_partition""" |
| 60 | + |
| 61 | + // Test 3: Query struct element that was dropped and re-added (this was causing the core dump) |
| 62 | + // The bug was: when querying 'drop_and_add' field, the code would try to find a reference column |
| 63 | + // from parquet file schema, and might select 'removed' field which doesn't exist in root_node, |
| 64 | + // causing "File column name 'removed' not found in struct children" error |
| 65 | + qt_query_drop_and_add """SELECT struct_element(a_struct,'drop_and_add') FROM ${tableName} ORDER BY a_partition""" |
| 66 | + |
| 67 | + // Test 4: Query other struct elements |
| 68 | + qt_query_renamed """SELECT struct_element(a_struct,'renamed') FROM ${tableName} ORDER BY a_partition""" |
| 69 | + qt_query_keep """SELECT struct_element(a_struct,'keep') FROM ${tableName} ORDER BY a_partition""" |
| 70 | + qt_query_casesensitive """SELECT struct_element(a_struct,'casesensitive') FROM ${tableName} ORDER BY a_partition""" |
| 71 | + qt_query_added """SELECT struct_element(a_struct,'added') FROM ${tableName} ORDER BY a_partition""" |
| 72 | + |
| 73 | + // Test 5: Query with predicates on struct elements |
| 74 | + qt_predicate_renamed """SELECT keep_col FROM ${tableName} WHERE struct_element(a_struct,'renamed') = 11 ORDER BY a_partition""" |
| 75 | + qt_predicate_keep """SELECT keep_col FROM ${tableName} WHERE struct_element(a_struct,'keep') = 12 ORDER BY a_partition""" |
| 76 | + qt_predicate_casesensitive """SELECT keep_col FROM ${tableName} WHERE struct_element(a_struct,'casesensitive') = 14 ORDER BY a_partition""" |
| 77 | + qt_predicate_drop_and_add_null """SELECT keep_col FROM ${tableName} WHERE struct_element(a_struct, 'drop_and_add') IS NULL ORDER BY a_partition""" |
| 78 | + qt_predicate_added_null """SELECT keep_col FROM ${tableName} WHERE struct_element(a_struct, 'added') IS NULL ORDER BY a_partition""" |
| 79 | + |
| 80 | + // Test 6: Critical test - Query only newly added struct fields (drop_and_add and added) |
| 81 | + // This scenario triggers the bug fix: when all queried struct fields are missing in old data, |
| 82 | + // the code needs to select a reference column. The fix ensures it doesn't select 'removed' |
| 83 | + // field which was deleted from table schema but still exists in parquet file schema. |
| 84 | + qt_query_only_new_fields """SELECT struct_element(a_struct,'drop_and_add'), struct_element(a_struct,'added') FROM ${tableName} ORDER BY a_partition""" |
| 85 | +} |
| 86 | + |
0 commit comments