Skip to content

Commit 70b49bd

Browse files
veeceeyrtyler
authored andcommitted
refactor: narrow Err(_) catch-all in with_generated_columns to schema resolution errors only
Address review feedback: - Replace broad Err(_) catch-all with a guard that only matches column resolution errors (No field named / Schema error), letting parse and type errors propagate normally. - Bind the error and include it in the debug log message. - Add e2e test through WriteBuilder & SchemaMode::Merge that writes with and without the referenced column, then asserts actual values.
1 parent 102ae16 commit 70b49bd

File tree

2 files changed

+115
-3
lines changed

2 files changed

+115
-3
lines changed

crates/core/src/operations/write/generated_columns.rs

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,18 @@ pub fn gc_is_enabled(snapshot: &EagerSnapshot) -> bool {
2323
.is_feature_enabled(&TableFeature::GeneratedColumns)
2424
}
2525

26+
/// Returns `true` when the error indicates that a column referenced in the
27+
/// generation expression could not be found in the current plan schema.
28+
/// This happens during `SchemaMode::Merge` when the input batch omits
29+
/// nullable columns that schema evolution will add later. All other errors
30+
/// (e.g. SQL syntax errors, type mismatches) should still be surfaced.
31+
fn is_column_resolution_error(err: &crate::DeltaTableError) -> bool {
32+
let msg = err.to_string();
33+
// DataFusion emits "No field named ..." for unresolved column references
34+
// and "Schema error: ..." for broader schema resolution failures.
35+
msg.contains("No field named") || msg.contains("Schema error")
36+
}
37+
2638
pub fn with_generated_columns(
2739
session: &dyn Session,
2840
plan: LogicalPlan,
@@ -68,11 +80,11 @@ pub fn with_generated_columns(
6880
}
6981
e
7082
}
71-
Err(_) => {
83+
Err(ref err) if is_column_resolution_error(err) => {
7284
debug!(
73-
"Could not resolve generation expression for column {}, \
85+
"Could not resolve generation expression for column {} ({}), \
7486
inserting NULL placeholder (will be resolved after schema evolution).",
75-
name
87+
name, err
7688
);
7789
// Use the target data type from the table schema if available,
7890
// otherwise fall back to a bare NULL.
@@ -82,6 +94,7 @@ pub fn with_generated_columns(
8294
lit(ScalarValue::Null).alias(name)
8395
}
8496
}
97+
Err(err) => return Err(err.into()),
8598
};
8699
projection.push(expr);
87100
}

crates/core/tests/integration_datafusion.rs

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1669,6 +1669,105 @@ async fn test_schema_merge_append_missing_non_nullable_column_with_generated_col
16691669
);
16701670
}
16711671

1672+
/// E2E test for #4169: a generated column whose expression references a nullable
1673+
/// column that is *not* present in the input batch should still succeed when
1674+
/// appending with `SchemaMode::Merge`. The generated column value should be
1675+
/// NULL for the rows where the referenced column is absent, and correctly
1676+
/// computed when the referenced column is present.
1677+
#[tokio::test]
1678+
async fn test_schema_merge_generated_column_referencing_missing_column_e2e() {
1679+
let ctx = SessionContext::new();
1680+
let tmp_dir = tempfile::tempdir().unwrap();
1681+
let table_uri = tmp_dir.path().to_str().unwrap();
1682+
1683+
// Table schema: id (int, required), tag (string, nullable),
1684+
// tag_upper (string, nullable, generated = upper(tag))
1685+
let table_schema = StructType::try_new(vec![
1686+
StructField::new(
1687+
"id".to_string(),
1688+
DataType::Primitive(PrimitiveType::Integer),
1689+
false,
1690+
),
1691+
StructField::new(
1692+
"tag".to_string(),
1693+
DataType::Primitive(PrimitiveType::String),
1694+
true,
1695+
),
1696+
StructField::new(
1697+
"tag_upper".to_string(),
1698+
DataType::Primitive(PrimitiveType::String),
1699+
true,
1700+
)
1701+
.with_metadata(vec![(
1702+
ColumnMetadataKey::GenerationExpression.as_ref().to_string(),
1703+
MetadataValue::String("upper(tag)".to_string()),
1704+
)]),
1705+
])
1706+
.unwrap();
1707+
1708+
let table = create_table_with_schema(table_uri, &table_schema).await;
1709+
1710+
// First write: input has both id and tag — generated column should be computed.
1711+
let batch_with_tag = RecordBatch::try_from_iter_with_nullable(vec![
1712+
(
1713+
"id",
1714+
Arc::new(Int32Array::from(vec![1, 2])) as ArrayRef,
1715+
false,
1716+
),
1717+
(
1718+
"tag",
1719+
Arc::new(StringArray::from(vec![Some("hello"), Some("world")])) as ArrayRef,
1720+
true,
1721+
),
1722+
])
1723+
.unwrap();
1724+
1725+
let table = table
1726+
.write(vec![batch_with_tag])
1727+
.with_schema_mode(SchemaMode::Merge)
1728+
.await
1729+
.unwrap();
1730+
1731+
// Second write: input omits `tag` entirely — generated column should become NULL.
1732+
let batch_without_tag = RecordBatch::try_from_iter_with_nullable(vec![(
1733+
"id",
1734+
Arc::new(Int32Array::from(vec![3, 4])) as ArrayRef,
1735+
false,
1736+
)])
1737+
.unwrap();
1738+
1739+
let table = table
1740+
.write(vec![batch_without_tag])
1741+
.with_schema_mode(SchemaMode::Merge)
1742+
.await
1743+
.unwrap();
1744+
1745+
// Read back and verify values.
1746+
let batches = ctx
1747+
.read_table(table.table_provider().await.unwrap())
1748+
.unwrap()
1749+
.select_exprs(&["id", "tag", "tag_upper"])
1750+
.unwrap()
1751+
.collect()
1752+
.await
1753+
.unwrap();
1754+
1755+
assert_batches_sorted_eq!(
1756+
#[rustfmt::skip]
1757+
&[
1758+
"+----+-------+-----------+",
1759+
"| id | tag | tag_upper |",
1760+
"+----+-------+-----------+",
1761+
"| 1 | hello | HELLO |",
1762+
"| 2 | world | WORLD |",
1763+
"| 3 | | |",
1764+
"| 4 | | |",
1765+
"+----+-------+-----------+",
1766+
],
1767+
&batches
1768+
);
1769+
}
1770+
16721771
mod date_partitions {
16731772
use super::*;
16741773

0 commit comments

Comments
 (0)