Skip to content
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
a0f6ea6
fix(query): avoid rewrite for default-only modify column
dantengsky Feb 8, 2026
4d4dd55
fix(query): validate all defaults in modify-column loop
dantengsky Feb 8, 2026
9ec44d6
test(fuse): add stream changes checks for modify default
dantengsky Feb 8, 2026
6761518
test(stream): clarify changes comments and add stream case
dantengsky Feb 8, 2026
9f826d3
test(fuse): remove flaky offset-based changes checks
dantengsky Feb 8, 2026
04b1f9d
test(altertable): align modify-default expected row in 17_0005
dantengsky Feb 8, 2026
2140560
test(altertable): clarify default-only checks in 17_0005
dantengsky Feb 8, 2026
1af2b7d
test(altertable): simplify comments in 17_0005
dantengsky Feb 8, 2026
13f622e
fix(altertable): rebuild for non-deterministic defaults
dantengsky Feb 9, 2026
8f9e57c
fix(altertable): guard change-tracking tables against default-only sc…
dantengsky Feb 10, 2026
e98dd04
test(altertable): cover remove-default and empty change-tracking table
dantengsky Feb 10, 2026
252c738
fix(test): nullable column without default fills NULL, not 0
dantengsky Feb 10, 2026
5d67ea9
fix(altertable): allow parquet string-to-binary on change-tracking ta…
dantengsky Feb 11, 2026
a1278c6
fix(test): correct remove-default expectations for physically stored …
dantengsky Feb 11, 2026
9356498
fix(altertable): treat AsyncFunctionCall (nextval) as non-determinist…
dantengsky Feb 12, 2026
03eff92
chore: add TODO for ScalarExpr::is_deterministic (issue #19451)
dantengsky Feb 12, 2026
0b91c87
style: cargo fmt
dantengsky Feb 12, 2026
b46c130
fix(altertable): skip no-op column specs before setting change-tracki…
dantengsky Feb 12, 2026
6fa6e48
test(altertable): cover nextval default triggers rebuild
dantengsky Feb 12, 2026
8908597
fix(test): remove quotes from nextval sequence name
dantengsky Feb 12, 2026
88b6f29
fix(altertable): detect nextval inside CastExpr for rebuild decision
dantengsky Feb 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::collections::HashMap;
use std::collections::HashSet;
use std::sync::Arc;

Expand All @@ -23,13 +22,15 @@ use databend_common_catalog::table::TableExt;
use databend_common_exception::ErrorCode;
use databend_common_exception::Result;
use databend_common_expression::ComputedExpr;
use databend_common_expression::DataField;
use databend_common_expression::DataSchema;
use databend_common_expression::Scalar;
use databend_common_expression::TableDataType;
use databend_common_expression::TableField;
use databend_common_expression::TableSchema;
use databend_common_expression::TableSchemaRef;
use databend_common_expression::types::DataType;
use databend_common_functions::BUILTIN_FUNCTIONS;
use databend_common_license::license::Feature::ComputedColumn;
use databend_common_license::license::Feature::DataMask;
use databend_common_license::license_manager::LicenseManagerSwitch;
Expand Down Expand Up @@ -380,7 +381,7 @@ impl ModifyTableColumnInterpreter {
return Ok(PipelineBuildResult::create());
}

let mut modified_default_scalars = HashMap::new();
let mut need_rebuild = false;
let mut default_expr_binder = DefaultExprBinder::try_new(self.ctx.clone())?;
let new_schema_without_computed_fields = new_schema.remove_computed_fields();
let format_as_parquet = fuse_table.storage_format_as_parquet();
Expand All @@ -389,29 +390,58 @@ impl ModifyTableColumnInterpreter {
let old_field = schema.field_with_name(&field.name)?;
let is_alter_column_string_to_binary =
is_string_to_binary(&old_field.data_type, &field.data_type);
// If two conditions are met, we don't need rebuild the table,
// as rebuild table can be a time-consuming job.
// 1. alter column from string to binary in parquet or data type not changed.
// 2. default expr and computed expr not changed. Otherwise, we need fill value for
// new added column.
if ((format_as_parquet && is_alter_column_string_to_binary)
|| old_field.data_type == field.data_type)
&& old_field.default_expr == field.default_expr
&& old_field.computed_expr == field.computed_expr
let data_type_changed = old_field.data_type != field.data_type;
let default_expr_changed = old_field.default_expr != field.default_expr;
let computed_expr_changed = old_field.computed_expr != field.computed_expr;

// Validate the new default expression against the new column type
// to keep ALTER-time semantics consistent for invalid defaults.
if data_type_changed || default_expr_changed {
let field_index = new_schema_without_computed_fields.index_of(&field.name)?;
let _ = default_expr_binder
.get_scalar(&new_schema_without_computed_fields.fields[field_index])?;
}

// Keep the existing parquet String -> Binary fast path.
if format_as_parquet
&& is_alter_column_string_to_binary
&& !default_expr_changed
&& !computed_expr_changed
{
continue;
}
let field_index = new_schema_without_computed_fields.index_of(&field.name)?;
let default_scalar = default_expr_binder
.get_scalar(&new_schema_without_computed_fields.fields[field_index])?;
modified_default_scalars.insert(field_index, default_scalar);

// Allow default-only changes to avoid rebuilding table data.
if !data_type_changed && default_expr_changed && !computed_expr_changed {
if field.default_expr.is_some() {
let data_field: DataField = field.into();
let scalar_expr = default_expr_binder.parse_and_bind(&data_field)?;
let expr = scalar_expr
.as_expr()?
.project_column_ref(|col| Ok(col.index))?;

// For non-deterministic default expressions, a metadata-only change may
// cause missing column values in historical blocks to be re-evaluated on
// every query (e.g. `rand()`), leading to unstable results.
// Force rebuilding table data to materialize existing values.
if !expr.is_deterministic(&BUILTIN_FUNCTIONS) {
need_rebuild = true;
}
}

if !need_rebuild {
continue;
}
}

if data_type_changed || computed_expr_changed {
need_rebuild = true;
}
}
}

// if don't need to rebuild table, only update table meta.
if modified_default_scalars.is_empty()
|| base_snapshot.is_none_or(|v| v.summary.row_count == 0)
{
if !need_rebuild || base_snapshot.is_none_or(|v| v.summary.row_count == 0) {
commit_table_meta(
&self.ctx,
table.as_ref(),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
# Scenario A: type conversion on existing columns.
# Scenario B: expected errors for invalid cast / unknown column / invalid default.
# Scenario C: default evolution after modify/add operations.
# Scenario D: varchar default and not-null behavior after modify.

statement ok
DROP DATABASE IF EXISTS db_05_0040

statement ok
CREATE DATABASE db_05_0040

statement ok
USE db_05_0040

# --------------------------
# A. Basic type conversion
# --------------------------
statement ok
CREATE TABLE a(a STRING NOT NULL, b INT NOT NULL, c INT NOT NULL)

statement ok
INSERT INTO a VALUES('1', 2, 3)

statement ok
ALTER TABLE a MODIFY COLUMN a FLOAT NOT NULL, COLUMN b STRING NOT NULL

query B
SELECT count(*) = 1 AND min(a) = 1 AND min(b) = '2' AND min(c) = 3 FROM a
----
1

query TT
SELECT name, data_type FROM system.columns WHERE database = 'db_05_0040' AND table = 'a' ORDER BY name
----
a FLOAT
b VARCHAR
c INT

# ----------------------------------------------
# B. Invalid cast / unknown column / bad default
# ----------------------------------------------
statement ok
CREATE TABLE b(a STRING NOT NULL)

statement ok
INSERT INTO b VALUES('a')

statement error 1006
ALTER TABLE b MODIFY COLUMN a FLOAT NOT NULL

statement error 1058
ALTER TABLE b MODIFY COLUMN b FLOAT NOT NULL

statement ok
CREATE TABLE c(a INT NOT NULL, b INT NOT NULL)

statement error 1006
INSERT INTO c (b) VALUES(1)

statement ok
INSERT INTO c (a, b) VALUES(0, 1)

statement error 1006
ALTER TABLE c MODIFY COLUMN a FLOAT NOT NULL DEFAULT 'a'

statement ok
ALTER TABLE c MODIFY COLUMN a FLOAT NOT NULL DEFAULT 1.2

statement ok
CREATE TABLE c_multi(a INT NOT NULL, b INT NOT NULL)

statement ok
INSERT INTO c_multi VALUES(1, 1)

statement error 1006
ALTER TABLE c_multi MODIFY COLUMN a FLOAT NOT NULL, COLUMN b FLOAT NOT NULL DEFAULT 'a'

query TT
SELECT name, data_type FROM system.columns WHERE database = 'db_05_0040' AND table = 'c_multi' ORDER BY name
----
a INT
b INT

query B
SELECT count(*) = 1 AND min(a) = 0 AND max(a) = 0 AND sum(b) = 1 FROM c
----
1

# ----------------------------------------------
# C. Default evolution with modify/add operations
# ----------------------------------------------
statement ok
CREATE TABLE d(a INT NOT NULL, b INT NOT NULL DEFAULT 10)

statement ok
INSERT INTO d (a) VALUES(1)

statement ok
ALTER TABLE d MODIFY COLUMN b INT NOT NULL DEFAULT 2

statement ok
ALTER TABLE d ADD COLUMN c FLOAT NOT NULL DEFAULT 1.01

statement ok
ALTER TABLE d MODIFY COLUMN c FLOAT NOT NULL DEFAULT 2.2

statement ok
INSERT INTO d (a) VALUES(10)

query B
SELECT count(*) = 2 AND sum(b) = 12 AND min(c) > 2.1 AND max(c) < 2.3 FROM d
----
1

query I
SELECT count(*) FROM d WHERE a = 10 AND b = 2 AND c = 2.2
----
1

# ----------------------------------------------
# D. VARCHAR default + NOT NULL behavior
# ----------------------------------------------
statement ok
CREATE TABLE e(a INT NOT NULL, b INT NOT NULL)

statement ok
INSERT INTO e VALUES(1, 1)

statement ok
ALTER TABLE e MODIFY COLUMN a VARCHAR(10) NOT NULL DEFAULT 'not'

# Default expression should be updated after MODIFY COLUMN.
query T
SELECT default_expression FROM system.columns WHERE database = 'db_05_0040' AND table = 'e' AND name = 'a'
----
'not'

statement ok
INSERT INTO e (b) VALUES(2)

query TI
SELECT a, b FROM e ORDER BY b
----
1 1
not 2

statement ok
CREATE TABLE f(a INT NOT NULL, b INT NOT NULL)

statement ok
INSERT INTO f VALUES(1, 1)

statement ok
ALTER TABLE f MODIFY COLUMN a VARCHAR(10) NOT NULL COMMENT 'new column'

statement error 1006
INSERT INTO f (b) VALUES(2)

statement ok
INSERT INTO f (a, b) VALUES('', 2)

query T
SELECT comment FROM system.columns WHERE database = 'db_05_0040' AND table = 'f' AND name = 'a'
----
new column

statement ok
DROP DATABASE db_05_0040
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
statement ok
CREATE OR REPLACE DATABASE db_09_0103

statement ok
USE db_09_0103

statement ok
CREATE TABLE t_ct(a INT NOT NULL, b INT DEFAULT 10) CHANGE_TRACKING=true

#############################################
# time travel: default-only modify keeps snapshot-specific defaults #
#############################################

statement ok
CREATE TABLE t(a INT NOT NULL)

statement ok
INSERT INTO t VALUES(1),(2)

statement ok
ALTER TABLE t ADD COLUMN b INT DEFAULT 10

query II
SELECT a,b FROM t ORDER BY a
----
1 10
2 10

statement ok
ALTER TABLE t MODIFY COLUMN b INT DEFAULT 20

query II
SELECT a,b FROM t ORDER BY a
----
1 20
2 20

query T
SELECT snapshot:schema:fields[1]:default_expr FROM fuse_dump_snapshots('db_09_0103', 't') LIMIT 2
----
"20"
"10"

#############################################################
# non-deterministic default: default-only modify triggers rewrite #
#############################################################

statement ok
CREATE TABLE t_nd(a INT NOT NULL) row_per_block=1

statement ok
INSERT INTO t_nd VALUES(1)

statement ok
ALTER TABLE t_nd ADD COLUMN b FLOAT DEFAULT 1.0

# The ADD COLUMN above is metadata-only, so the existing block does not have
# physical column metadata for `b`.
query I
SELECT count() FROM fuse_column('db_09_0103', 't_nd') WHERE column_name = 'b'
----
0

statement ok
ALTER TABLE t_nd MODIFY COLUMN b FLOAT DEFAULT rand()

# Changing the default to a non-deterministic expression requires rewriting
# existing blocks to materialize stable values.
query B
SELECT count() >= 1 FROM fuse_column('db_09_0103', 't_nd') WHERE column_name = 'b'
----
1

####################################################
# change tracking: default-only modify is allowed (metadata-only) #
####################################################

statement ok
INSERT INTO t_ct(a) VALUES(1)

statement ok
ALTER TABLE t_ct MODIFY COLUMN b INT DEFAULT 99

statement ok
INSERT INTO t_ct(a) VALUES(2)

query II
SELECT a,b FROM t_ct ORDER BY a
----
1 10
2 99

statement error 1132
ALTER TABLE t_ct MODIFY COLUMN b STRING

statement ok
DROP TABLE t ALL

statement ok
DROP TABLE t_nd ALL

statement ok
DROP TABLE t_ct ALL

statement ok
DROP DATABASE db_09_0103
Loading
Loading