diff --git a/src/query/service/src/interpreters/interpreter_table_modify_column.rs b/src/query/service/src/interpreters/interpreter_table_modify_column.rs index bdd17ebf03703..91c8a6c68e07d 100644 --- a/src/query/service/src/interpreters/interpreter_table_modify_column.rs +++ b/src/query/service/src/interpreters/interpreter_table_modify_column.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashMap; use std::collections::HashSet; use std::sync::Arc; @@ -23,6 +22,7 @@ use databend_common_catalog::table::TableExt; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::ComputedExpr; +use databend_common_expression::DataField; use databend_common_expression::DataSchema; use databend_common_expression::Scalar; use databend_common_expression::TableDataType; @@ -30,6 +30,7 @@ use databend_common_expression::TableField; use databend_common_expression::TableSchema; use databend_common_expression::TableSchemaRef; use databend_common_expression::types::DataType; +use databend_common_functions::BUILTIN_FUNCTIONS; use databend_common_license::license::Feature::ComputedColumn; use databend_common_license::license::Feature::DataMask; use databend_common_license::license_manager::LicenseManagerSwitch; @@ -380,38 +381,90 @@ impl ModifyTableColumnInterpreter { return Ok(PipelineBuildResult::create()); } - let mut modified_default_scalars = HashMap::new(); + let schema_changed = schema != new_schema; + let is_empty_table = base_snapshot.is_none_or(|v| v.summary.row_count == 0); + + let mut need_rebuild = false; + let mut has_column_change = false; let mut default_expr_binder = DefaultExprBinder::try_new(self.ctx.clone())?; let new_schema_without_computed_fields = new_schema.remove_computed_fields(); let format_as_parquet = fuse_table.storage_format_as_parquet(); - if schema != new_schema { + if schema_changed { for (field, _) in field_and_comments { let old_field = schema.field_with_name(&field.name)?; - let is_alter_column_string_to_binary = - is_string_to_binary(&old_field.data_type, &field.data_type); - // If two conditions are met, we don't need rebuild the table, - // as rebuild table can be a time-consuming job. - // 1. alter column from string to binary in parquet or data type not changed. - // 2. default expr and computed expr not changed. Otherwise, we need fill value for - // new added column. - if ((format_as_parquet && is_alter_column_string_to_binary) - || old_field.data_type == field.data_type) - && old_field.default_expr == field.default_expr - && old_field.computed_expr == field.computed_expr + let data_type_changed = old_field.data_type != field.data_type; + let default_expr_changed = old_field.default_expr != field.default_expr; + let computed_expr_changed = old_field.computed_expr != field.computed_expr; + + // Validate the new default expression against the new column type + // to catch invalid defaults at ALTER time rather than at query time. + if data_type_changed || default_expr_changed { + let field_index = new_schema_without_computed_fields.index_of(&field.name)?; + let _ = default_expr_binder + .get_scalar(&new_schema_without_computed_fields.fields[field_index])?; + } + + // Parquet String -> Binary: safe metadata-only conversion, + // physical data is identical so no rebuild or CDC concern. + if format_as_parquet + && is_string_to_binary(&old_field.data_type, &field.data_type) + && !default_expr_changed + && !computed_expr_changed { continue; } - let field_index = new_schema_without_computed_fields.index_of(&field.name)?; - let default_scalar = default_expr_binder - .get_scalar(&new_schema_without_computed_fields.fields[field_index])?; - modified_default_scalars.insert(field_index, default_scalar); + + // No-op: nothing changed for this field, skip it. + if !data_type_changed && !default_expr_changed && !computed_expr_changed { + continue; + } + + has_column_change = true; + + // Already decided to rebuild from a previous field; keep + // iterating only to validate remaining default expressions. + if need_rebuild { + continue; + } + + if data_type_changed || computed_expr_changed { + need_rebuild = true; + continue; + } + + // Default-only change: skip rebuild unless non-deterministic. + if default_expr_changed && field.default_expr.is_some() { + let data_field: DataField = field.into(); + let scalar_expr = default_expr_binder.parse_and_bind(&data_field)?; + // Use default_value_evaluable() to detect nextval (AsyncFunctionCall) + // because parse_and_bind may wrap the result in CastExpr, hiding + // the AsyncFunctionCall from a top-level matches! check. + // See: https://github.com/databendlabs/databend/issues/19451 + let (_, has_nextval) = scalar_expr.default_value_evaluable(); + let is_deterministic = !has_nextval + && scalar_expr + .as_expr()? + .project_column_ref(|col| Ok(col.index))? + .is_deterministic(&BUILTIN_FUNCTIONS); + if !is_deterministic { + need_rebuild = true; + } + } } } + // Block non-fastpath schema changes on non-empty change-tracking tables. + // Metadata-only default changes can silently alter historical row values + // without producing change records, breaking stream/CDC consistency. + if has_column_change && !is_empty_table && fuse_table.change_tracking_enabled() { + return Err(ErrorCode::AlterTableError(format!( + "table {} has change tracking enabled, modifying columns should be avoided", + table_info.desc + ))); + } + // if don't need to rebuild table, only update table meta. - if modified_default_scalars.is_empty() - || base_snapshot.is_none_or(|v| v.summary.row_count == 0) - { + if !need_rebuild || is_empty_table { commit_table_meta( &self.ctx, table.as_ref(), @@ -431,16 +484,6 @@ impl ModifyTableColumnInterpreter { return Ok(PipelineBuildResult::create()); } - if fuse_table.change_tracking_enabled() { - // Modifying columns while change tracking is active may break - // the consistency between tracked changes and the current table schema, - // leading to incorrect or incomplete change records. - return Err(ErrorCode::AlterTableError(format!( - "table {} has change tracking enabled, modifying columns should be avoided", - table_info.desc - ))); - } - // construct sql for selecting data from old table. // computed columns are ignored, as it is build from other columns. let query_fields = new_schema_without_computed_fields diff --git a/tests/sqllogictests/suites/base/05_ddl/05_0040_ddl_alter_table_modify_column_type_pending.test b/tests/sqllogictests/suites/base/05_ddl/05_0040_ddl_alter_table_modify_column_type_pending.test new file mode 100644 index 0000000000000..2f5f0d76dbf1e --- /dev/null +++ b/tests/sqllogictests/suites/base/05_ddl/05_0040_ddl_alter_table_modify_column_type_pending.test @@ -0,0 +1,168 @@ +# Scenario A: type conversion on existing columns. +# Scenario B: expected errors for invalid cast / unknown column / invalid default. +# Scenario C: default evolution after modify/add operations. +# Scenario D: varchar default and not-null behavior after modify. + +statement ok +DROP DATABASE IF EXISTS db_05_0040 + +statement ok +CREATE DATABASE db_05_0040 + +statement ok +USE db_05_0040 + +# -------------------------- +# A. Basic type conversion +# -------------------------- +statement ok +CREATE TABLE a(a STRING NOT NULL, b INT NOT NULL, c INT NOT NULL) + +statement ok +INSERT INTO a VALUES('1', 2, 3) + +statement ok +ALTER TABLE a MODIFY COLUMN a FLOAT NOT NULL, COLUMN b STRING NOT NULL + +query B +SELECT count(*) = 1 AND min(a) = 1 AND min(b) = '2' AND min(c) = 3 FROM a +---- +1 + +query TT +SELECT name, data_type FROM system.columns WHERE database = 'db_05_0040' AND table = 'a' ORDER BY name +---- +a FLOAT +b VARCHAR +c INT + +# ---------------------------------------------- +# B. Invalid cast / unknown column / bad default +# ---------------------------------------------- +statement ok +CREATE TABLE b(a STRING NOT NULL) + +statement ok +INSERT INTO b VALUES('a') + +statement error 1006 +ALTER TABLE b MODIFY COLUMN a FLOAT NOT NULL + +statement error 1058 +ALTER TABLE b MODIFY COLUMN b FLOAT NOT NULL + +statement ok +CREATE TABLE c(a INT NOT NULL, b INT NOT NULL) + +statement error 1006 +INSERT INTO c (b) VALUES(1) + +statement ok +INSERT INTO c (a, b) VALUES(0, 1) + +statement error 1006 +ALTER TABLE c MODIFY COLUMN a FLOAT NOT NULL DEFAULT 'a' + +statement ok +ALTER TABLE c MODIFY COLUMN a FLOAT NOT NULL DEFAULT 1.2 + +statement ok +CREATE TABLE c_multi(a INT NOT NULL, b INT NOT NULL) + +statement ok +INSERT INTO c_multi VALUES(1, 1) + +statement error 1006 +ALTER TABLE c_multi MODIFY COLUMN a FLOAT NOT NULL, COLUMN b FLOAT NOT NULL DEFAULT 'a' + +query TT +SELECT name, data_type FROM system.columns WHERE database = 'db_05_0040' AND table = 'c_multi' ORDER BY name +---- +a INT +b INT + +query B +SELECT count(*) = 1 AND min(a) = 0 AND max(a) = 0 AND sum(b) = 1 FROM c +---- +1 + +# ---------------------------------------------- +# C. Default evolution with modify/add operations +# ---------------------------------------------- +statement ok +CREATE TABLE d(a INT NOT NULL, b INT NOT NULL DEFAULT 10) + +statement ok +INSERT INTO d (a) VALUES(1) + +statement ok +ALTER TABLE d MODIFY COLUMN b INT NOT NULL DEFAULT 2 + +statement ok +ALTER TABLE d ADD COLUMN c FLOAT NOT NULL DEFAULT 1.01 + +statement ok +ALTER TABLE d MODIFY COLUMN c FLOAT NOT NULL DEFAULT 2.2 + +statement ok +INSERT INTO d (a) VALUES(10) + +query B +SELECT count(*) = 2 AND sum(b) = 12 AND min(c) > 2.1 AND max(c) < 2.3 FROM d +---- +1 + +query I +SELECT count(*) FROM d WHERE a = 10 AND b = 2 AND c = 2.2 +---- +1 + +# ---------------------------------------------- +# D. VARCHAR default + NOT NULL behavior +# ---------------------------------------------- +statement ok +CREATE TABLE e(a INT NOT NULL, b INT NOT NULL) + +statement ok +INSERT INTO e VALUES(1, 1) + +statement ok +ALTER TABLE e MODIFY COLUMN a VARCHAR(10) NOT NULL DEFAULT 'not' + +# Default expression should be updated after MODIFY COLUMN. +query T +SELECT default_expression FROM system.columns WHERE database = 'db_05_0040' AND table = 'e' AND name = 'a' +---- +'not' + +statement ok +INSERT INTO e (b) VALUES(2) + +query TI +SELECT a, b FROM e ORDER BY b +---- +1 1 +not 2 + +statement ok +CREATE TABLE f(a INT NOT NULL, b INT NOT NULL) + +statement ok +INSERT INTO f VALUES(1, 1) + +statement ok +ALTER TABLE f MODIFY COLUMN a VARCHAR(10) NOT NULL COMMENT 'new column' + +statement error 1006 +INSERT INTO f (b) VALUES(2) + +statement ok +INSERT INTO f (a, b) VALUES('', 2) + +query T +SELECT comment FROM system.columns WHERE database = 'db_05_0040' AND table = 'f' AND name = 'a' +---- +new column + +statement ok +DROP DATABASE db_05_0040 diff --git a/tests/sqllogictests/suites/base/09_fuse_engine/09_0103_modify_column_default_no_rewrite.test b/tests/sqllogictests/suites/base/09_fuse_engine/09_0103_modify_column_default_no_rewrite.test new file mode 100644 index 0000000000000..a7287fcbf02ec --- /dev/null +++ b/tests/sqllogictests/suites/base/09_fuse_engine/09_0103_modify_column_default_no_rewrite.test @@ -0,0 +1,178 @@ +statement ok +CREATE OR REPLACE DATABASE db_09_0103 + +statement ok +USE db_09_0103 + +statement ok +CREATE TABLE t_ct(a INT NOT NULL, b INT DEFAULT 10) CHANGE_TRACKING=true + +############################################# +# time travel: default-only modify keeps snapshot-specific defaults # +############################################# + +statement ok +CREATE TABLE t(a INT NOT NULL) + +statement ok +INSERT INTO t VALUES(1),(2) + +statement ok +ALTER TABLE t ADD COLUMN b INT DEFAULT 10 + +query II +SELECT a,b FROM t ORDER BY a +---- +1 10 +2 10 + +statement ok +ALTER TABLE t MODIFY COLUMN b INT DEFAULT 20 + +query II +SELECT a,b FROM t ORDER BY a +---- +1 20 +2 20 + +query T +SELECT snapshot:schema:fields[1]:default_expr FROM fuse_dump_snapshots('db_09_0103', 't') LIMIT 2 +---- +"20" +"10" + +############################################################# +# non-deterministic default: default-only modify triggers rewrite # +############################################################# + +statement ok +CREATE TABLE t_nd(a INT NOT NULL) row_per_block=1 + +statement ok +INSERT INTO t_nd VALUES(1) + +statement ok +ALTER TABLE t_nd ADD COLUMN b FLOAT DEFAULT 1.0 + +# The ADD COLUMN above is metadata-only, so the existing block does not have +# physical column metadata for `b`. +query I +SELECT count() FROM fuse_column('db_09_0103', 't_nd') WHERE column_name = 'b' +---- +0 + +statement ok +ALTER TABLE t_nd MODIFY COLUMN b FLOAT DEFAULT rand() + +# Changing the default to a non-deterministic expression requires rewriting +# existing blocks to materialize stable values. +query B +SELECT count() >= 1 FROM fuse_column('db_09_0103', 't_nd') WHERE column_name = 'b' +---- +1 + +############################################################# +# nextval default: default-only modify triggers rewrite # +############################################################# + +statement ok +CREATE SEQUENCE seq_09_0103 + +statement ok +CREATE TABLE t_nv(a INT NOT NULL) row_per_block=1 + +statement ok +INSERT INTO t_nv VALUES(1) + +statement ok +ALTER TABLE t_nv ADD COLUMN b INT DEFAULT 1 + +query I +SELECT count() FROM fuse_column('db_09_0103', 't_nv') WHERE column_name = 'b' +---- +0 + +statement ok +ALTER TABLE t_nv MODIFY COLUMN b INT DEFAULT nextval(seq_09_0103) + +# nextval is non-deterministic (AsyncFunctionCall); must trigger rewrite. +query B +SELECT count() >= 1 FROM fuse_column('db_09_0103', 't_nv') WHERE column_name = 'b' +---- +1 + +#################################################### +# remove default: metadata-only, column falls back to type default # +#################################################### + +statement ok +CREATE TABLE t_rm(a INT NOT NULL, b INT DEFAULT 10) + +statement ok +INSERT INTO t_rm(a) VALUES(1) + +statement ok +ALTER TABLE t_rm MODIFY COLUMN b INT + +query II +SELECT a, b FROM t_rm +---- +1 10 + +statement ok +INSERT INTO t_rm(a) VALUES(2) + +query IT +SELECT a, b FROM t_rm ORDER BY a +---- +1 10 +2 NULL + +#################################################### +# change tracking: reject schema changes on non-empty table # +#################################################### + +statement ok +INSERT INTO t_ct(a) VALUES(1) + +statement error 1132 +ALTER TABLE t_ct MODIFY COLUMN b INT DEFAULT 99 + +statement ok +INSERT INTO t_ct(a) VALUES(2) + +query II +SELECT a,b FROM t_ct ORDER BY a +---- +1 10 +2 10 + +statement error 1132 +ALTER TABLE t_ct MODIFY COLUMN b STRING + +#################################################### +# change tracking: no-op column spec should not trigger guard # +#################################################### + +# Re-specifying column b with its current type and default is a no-op; +# it must not be rejected on a change-tracking table. +statement ok +ALTER TABLE t_ct MODIFY COLUMN b INT DEFAULT 10 + +#################################################### +# change tracking: allow schema changes on empty table # +#################################################### + +statement ok +CREATE TABLE t_ct_empty(a INT NOT NULL, b INT DEFAULT 10) CHANGE_TRACKING=true + +statement ok +ALTER TABLE t_ct_empty MODIFY COLUMN b INT DEFAULT 99 + +query T +SELECT default_expression FROM system.columns WHERE database = 'db_09_0103' AND table = 't_ct_empty' AND name = 'b' +---- +99 + +statement ok +DROP DATABASE db_09_0103 diff --git a/tests/sqllogictests/suites/ee/06_ee_stream/06_0013_stream_modify_column_default.test b/tests/sqllogictests/suites/ee/06_ee_stream/06_0013_stream_modify_column_default.test new file mode 100644 index 0000000000000..f5a28693ee4b9 --- /dev/null +++ b/tests/sqllogictests/suites/ee/06_ee_stream/06_0013_stream_modify_column_default.test @@ -0,0 +1,53 @@ +## Copyright 2023 Databend Cloud +## +## Licensed under the Elastic License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## https://www.elastic.co/licensing/elastic-license +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. + +statement ok +DROP DATABASE IF EXISTS test_stream_modify_column_default + +statement ok +CREATE DATABASE test_stream_modify_column_default + +statement ok +USE test_stream_modify_column_default + +statement ok +CREATE TABLE t(a INT NOT NULL, b INT DEFAULT 10) CHANGE_TRACKING=true + +statement ok +INSERT INTO t(a) VALUES(1) + +statement error 1132 +ALTER TABLE t MODIFY COLUMN b INT DEFAULT 99 + +# Explicit stream case: create a stream on this table. +statement ok +CREATE STREAM s ON TABLE t APPEND_ONLY=false + +statement ok +INSERT INTO t(a) VALUES(2) + +# The ALTER above was rejected, so b still uses the original default (10). +query IITB +SELECT a, b, change$action, change$is_update FROM s ORDER BY a +---- +2 10 INSERT 0 + +statement ok +DROP STREAM s + +statement ok +DROP TABLE t ALL + +statement ok +DROP DATABASE IF EXISTS test_stream_modify_column_default diff --git a/tests/suites/0_stateless/17_altertable/17_0005_alter_table_modify_column_type.result b/tests/suites/0_stateless/17_altertable/17_0005_alter_table_modify_column_type.result index 9cdf9d2be5e17..6368aa493d4d2 100644 --- a/tests/suites/0_stateless/17_altertable/17_0005_alter_table_modify_column_type.result +++ b/tests/suites/0_stateless/17_altertable/17_0005_alter_table_modify_column_type.result @@ -24,7 +24,7 @@ Error: APIError: QueryFailed: [1006]invalid float literal while evaluating funct 1 10 1 10 1.01 1 -1 10 1.01 +1 10 2.2 10 2 2.2 begin test default column 1 diff --git a/tests/suites/0_stateless/17_altertable/17_0005_alter_table_modify_column_type.sh b/tests/suites/0_stateless/17_altertable/17_0005_alter_table_modify_column_type.sh index 63f151becfa03..fa5080d403b9b 100755 --- a/tests/suites/0_stateless/17_altertable/17_0005_alter_table_modify_column_type.sh +++ b/tests/suites/0_stateless/17_altertable/17_0005_alter_table_modify_column_type.sh @@ -37,9 +37,12 @@ echo "alter table test_modify_column_type.d modify column b int not null default echo "SELECT a,b from test_modify_column_type.d" | $BENDSQL_CLIENT_CONNECT echo "alter table test_modify_column_type.d add column c float not null default 1.01" | $BENDSQL_CLIENT_CONNECT echo "SELECT a,b,c from test_modify_column_type.d" | $BENDSQL_CLIENT_CONNECT +# This ALTER only updates table metadata; it does not rewrite existing rows. +# After changing c's default to 2.2, the old row also reads c as 2.2. echo "alter table test_modify_column_type.d modify column c float not null default 2.2" | $BENDSQL_CLIENT_CONNECT echo "INSERT INTO test_modify_column_type.d (a) values(10)" | $BENDSQL_CLIENT_CONNECT -echo "SELECT a,b,c from test_modify_column_type.d order by a" | $BENDSQL_CLIENT_CONNECT +echo "SELECT a,b,c from test_modify_column_type.d where a = 1" | $BENDSQL_CLIENT_CONNECT +echo "SELECT a,b,c from test_modify_column_type.d where a = 10" | $BENDSQL_CLIENT_CONNECT echo "begin test default column" echo "CREATE table test_modify_column_type.e(a int not null, b int not null)" | $BENDSQL_CLIENT_CONNECT