Skip to content
Open
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
a0f6ea6
fix(query): avoid rewrite for default-only modify column
dantengsky Feb 8, 2026
4d4dd55
fix(query): validate all defaults in modify-column loop
dantengsky Feb 8, 2026
9ec44d6
test(fuse): add stream changes checks for modify default
dantengsky Feb 8, 2026
6761518
test(stream): clarify changes comments and add stream case
dantengsky Feb 8, 2026
9f826d3
test(fuse): remove flaky offset-based changes checks
dantengsky Feb 8, 2026
04b1f9d
test(altertable): align modify-default expected row in 17_0005
dantengsky Feb 8, 2026
2140560
test(altertable): clarify default-only checks in 17_0005
dantengsky Feb 8, 2026
1af2b7d
test(altertable): simplify comments in 17_0005
dantengsky Feb 8, 2026
13f622e
fix(altertable): rebuild for non-deterministic defaults
dantengsky Feb 9, 2026
8f9e57c
fix(altertable): guard change-tracking tables against default-only sc…
dantengsky Feb 10, 2026
e98dd04
test(altertable): cover remove-default and empty change-tracking table
dantengsky Feb 10, 2026
252c738
fix(test): nullable column without default fills NULL, not 0
dantengsky Feb 10, 2026
5d67ea9
fix(altertable): allow parquet string-to-binary on change-tracking ta…
dantengsky Feb 11, 2026
a1278c6
fix(test): correct remove-default expectations for physically stored …
dantengsky Feb 11, 2026
9356498
fix(altertable): treat AsyncFunctionCall (nextval) as non-determinist…
dantengsky Feb 12, 2026
03eff92
chore: add TODO for ScalarExpr::is_deterministic (issue #19451)
dantengsky Feb 12, 2026
0b91c87
style: cargo fmt
dantengsky Feb 12, 2026
b46c130
fix(altertable): skip no-op column specs before setting change-tracki…
dantengsky Feb 12, 2026
6fa6e48
test(altertable): cover nextval default triggers rebuild
dantengsky Feb 12, 2026
8908597
fix(test): remove quotes from nextval sequence name
dantengsky Feb 12, 2026
88b6f29
fix(altertable): detect nextval inside CastExpr for rebuild decision
dantengsky Feb 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::collections::HashMap;
use std::collections::HashSet;
use std::sync::Arc;

Expand All @@ -23,13 +22,15 @@ use databend_common_catalog::table::TableExt;
use databend_common_exception::ErrorCode;
use databend_common_exception::Result;
use databend_common_expression::ComputedExpr;
use databend_common_expression::DataField;
use databend_common_expression::DataSchema;
use databend_common_expression::Scalar;
use databend_common_expression::TableDataType;
use databend_common_expression::TableField;
use databend_common_expression::TableSchema;
use databend_common_expression::TableSchemaRef;
use databend_common_expression::types::DataType;
use databend_common_functions::BUILTIN_FUNCTIONS;
use databend_common_license::license::Feature::ComputedColumn;
use databend_common_license::license::Feature::DataMask;
use databend_common_license::license_manager::LicenseManagerSwitch;
Expand Down Expand Up @@ -380,38 +381,78 @@ impl ModifyTableColumnInterpreter {
return Ok(PipelineBuildResult::create());
}

let mut modified_default_scalars = HashMap::new();
let schema_changed = schema != new_schema;
let is_empty_table = base_snapshot.is_none_or(|v| v.summary.row_count == 0);

let mut need_rebuild = false;
let mut has_column_change = false;
let mut default_expr_binder = DefaultExprBinder::try_new(self.ctx.clone())?;
let new_schema_without_computed_fields = new_schema.remove_computed_fields();
let format_as_parquet = fuse_table.storage_format_as_parquet();
if schema != new_schema {
if schema_changed {
for (field, _) in field_and_comments {
let old_field = schema.field_with_name(&field.name)?;
let is_alter_column_string_to_binary =
is_string_to_binary(&old_field.data_type, &field.data_type);
// If two conditions are met, we don't need rebuild the table,
// as rebuild table can be a time-consuming job.
// 1. alter column from string to binary in parquet or data type not changed.
// 2. default expr and computed expr not changed. Otherwise, we need fill value for
// new added column.
if ((format_as_parquet && is_alter_column_string_to_binary)
|| old_field.data_type == field.data_type)
&& old_field.default_expr == field.default_expr
&& old_field.computed_expr == field.computed_expr
let data_type_changed = old_field.data_type != field.data_type;
let default_expr_changed = old_field.default_expr != field.default_expr;
let computed_expr_changed = old_field.computed_expr != field.computed_expr;

// Validate the new default expression against the new column type
// to catch invalid defaults at ALTER time rather than at query time.
if data_type_changed || default_expr_changed {
let field_index = new_schema_without_computed_fields.index_of(&field.name)?;
let _ = default_expr_binder
.get_scalar(&new_schema_without_computed_fields.fields[field_index])?;
}

// Parquet String -> Binary: safe metadata-only conversion,
// physical data is identical so no rebuild or CDC concern.
if format_as_parquet
&& is_string_to_binary(&old_field.data_type, &field.data_type)
&& !default_expr_changed
&& !computed_expr_changed
{
continue;
}
let field_index = new_schema_without_computed_fields.index_of(&field.name)?;
let default_scalar = default_expr_binder
.get_scalar(&new_schema_without_computed_fields.fields[field_index])?;
modified_default_scalars.insert(field_index, default_scalar);

has_column_change = true;

// Already decided to rebuild from a previous field; keep
// iterating only to validate remaining default expressions.
if need_rebuild {
continue;
}

if data_type_changed || computed_expr_changed {
need_rebuild = true;
continue;
}

// Default-only change: skip rebuild unless non-deterministic.
if default_expr_changed && field.default_expr.is_some() {
let data_field: DataField = field.into();
let scalar_expr = default_expr_binder.parse_and_bind(&data_field)?;
let expr = scalar_expr
.as_expr()?
.project_column_ref(|col| Ok(col.index))?;
if !expr.is_deterministic(&BUILTIN_FUNCTIONS) {
need_rebuild = true;
}
}
}
}

// Block non-fastpath schema changes on non-empty change-tracking tables.
// Metadata-only default changes can silently alter historical row values
// without producing change records, breaking stream/CDC consistency.
if has_column_change && !is_empty_table && fuse_table.change_tracking_enabled() {
return Err(ErrorCode::AlterTableError(format!(
"table {} has change tracking enabled, modifying columns should be avoided",
table_info.desc
)));
}

// if don't need to rebuild table, only update table meta.
if modified_default_scalars.is_empty()
|| base_snapshot.is_none_or(|v| v.summary.row_count == 0)
{
if !need_rebuild || is_empty_table {
commit_table_meta(
&self.ctx,
table.as_ref(),
Expand All @@ -431,16 +472,6 @@ impl ModifyTableColumnInterpreter {
return Ok(PipelineBuildResult::create());
}

if fuse_table.change_tracking_enabled() {
// Modifying columns while change tracking is active may break
// the consistency between tracked changes and the current table schema,
// leading to incorrect or incomplete change records.
return Err(ErrorCode::AlterTableError(format!(
"table {} has change tracking enabled, modifying columns should be avoided",
table_info.desc
)));
}

// construct sql for selecting data from old table.
// computed columns are ignored, as it is build from other columns.
let query_fields = new_schema_without_computed_fields
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
# Scenario A: type conversion on existing columns.
# Scenario B: expected errors for invalid cast / unknown column / invalid default.
# Scenario C: default evolution after modify/add operations.
# Scenario D: varchar default and not-null behavior after modify.

statement ok
DROP DATABASE IF EXISTS db_05_0040

statement ok
CREATE DATABASE db_05_0040

statement ok
USE db_05_0040

# --------------------------
# A. Basic type conversion
# --------------------------
statement ok
CREATE TABLE a(a STRING NOT NULL, b INT NOT NULL, c INT NOT NULL)

statement ok
INSERT INTO a VALUES('1', 2, 3)

statement ok
ALTER TABLE a MODIFY COLUMN a FLOAT NOT NULL, COLUMN b STRING NOT NULL

query B
SELECT count(*) = 1 AND min(a) = 1 AND min(b) = '2' AND min(c) = 3 FROM a
----
1

query TT
SELECT name, data_type FROM system.columns WHERE database = 'db_05_0040' AND table = 'a' ORDER BY name
----
a FLOAT
b VARCHAR
c INT

# ----------------------------------------------
# B. Invalid cast / unknown column / bad default
# ----------------------------------------------
statement ok
CREATE TABLE b(a STRING NOT NULL)

statement ok
INSERT INTO b VALUES('a')

statement error 1006
ALTER TABLE b MODIFY COLUMN a FLOAT NOT NULL

statement error 1058
ALTER TABLE b MODIFY COLUMN b FLOAT NOT NULL

statement ok
CREATE TABLE c(a INT NOT NULL, b INT NOT NULL)

statement error 1006
INSERT INTO c (b) VALUES(1)

statement ok
INSERT INTO c (a, b) VALUES(0, 1)

statement error 1006
ALTER TABLE c MODIFY COLUMN a FLOAT NOT NULL DEFAULT 'a'

statement ok
ALTER TABLE c MODIFY COLUMN a FLOAT NOT NULL DEFAULT 1.2

statement ok
CREATE TABLE c_multi(a INT NOT NULL, b INT NOT NULL)

statement ok
INSERT INTO c_multi VALUES(1, 1)

statement error 1006
ALTER TABLE c_multi MODIFY COLUMN a FLOAT NOT NULL, COLUMN b FLOAT NOT NULL DEFAULT 'a'

query TT
SELECT name, data_type FROM system.columns WHERE database = 'db_05_0040' AND table = 'c_multi' ORDER BY name
----
a INT
b INT

query B
SELECT count(*) = 1 AND min(a) = 0 AND max(a) = 0 AND sum(b) = 1 FROM c
----
1

# ----------------------------------------------
# C. Default evolution with modify/add operations
# ----------------------------------------------
statement ok
CREATE TABLE d(a INT NOT NULL, b INT NOT NULL DEFAULT 10)

statement ok
INSERT INTO d (a) VALUES(1)

statement ok
ALTER TABLE d MODIFY COLUMN b INT NOT NULL DEFAULT 2

statement ok
ALTER TABLE d ADD COLUMN c FLOAT NOT NULL DEFAULT 1.01

statement ok
ALTER TABLE d MODIFY COLUMN c FLOAT NOT NULL DEFAULT 2.2

statement ok
INSERT INTO d (a) VALUES(10)

query B
SELECT count(*) = 2 AND sum(b) = 12 AND min(c) > 2.1 AND max(c) < 2.3 FROM d
----
1

query I
SELECT count(*) FROM d WHERE a = 10 AND b = 2 AND c = 2.2
----
1

# ----------------------------------------------
# D. VARCHAR default + NOT NULL behavior
# ----------------------------------------------
statement ok
CREATE TABLE e(a INT NOT NULL, b INT NOT NULL)

statement ok
INSERT INTO e VALUES(1, 1)

statement ok
ALTER TABLE e MODIFY COLUMN a VARCHAR(10) NOT NULL DEFAULT 'not'

# Default expression should be updated after MODIFY COLUMN.
query T
SELECT default_expression FROM system.columns WHERE database = 'db_05_0040' AND table = 'e' AND name = 'a'
----
'not'

statement ok
INSERT INTO e (b) VALUES(2)

query TI
SELECT a, b FROM e ORDER BY b
----
1 1
not 2

statement ok
CREATE TABLE f(a INT NOT NULL, b INT NOT NULL)

statement ok
INSERT INTO f VALUES(1, 1)

statement ok
ALTER TABLE f MODIFY COLUMN a VARCHAR(10) NOT NULL COMMENT 'new column'

statement error 1006
INSERT INTO f (b) VALUES(2)

statement ok
INSERT INTO f (a, b) VALUES('', 2)

query T
SELECT comment FROM system.columns WHERE database = 'db_05_0040' AND table = 'f' AND name = 'a'
----
new column

statement ok
DROP DATABASE db_05_0040
Loading
Loading