Skip to content

Commit 81b93e5

Browse files
Backport "Provide field and schema metadata missing on cross joins, and union with null fields" (apache#12729) (apache#12974)
* Patch for PR 12729 * Test before drop
1 parent 6ebc876 commit 81b93e5

File tree

5 files changed

+62
-8
lines changed

5 files changed

+62
-8
lines changed

datafusion/core/src/datasource/file_format/mod.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -257,11 +257,13 @@ pub(crate) fn coerce_file_schema_to_view_type(
257257
|field| match (table_fields.get(field.name()), field.data_type()) {
258258
(Some(DataType::Utf8View), DataType::Utf8)
259259
| (Some(DataType::Utf8View), DataType::LargeUtf8) => Arc::new(
260-
Field::new(field.name(), DataType::Utf8View, field.is_nullable()),
260+
Field::new(field.name(), DataType::Utf8View, field.is_nullable())
261+
.with_metadata(field.metadata().to_owned()),
261262
),
262263
(Some(DataType::BinaryView), DataType::Binary)
263264
| (Some(DataType::BinaryView), DataType::LargeBinary) => Arc::new(
264-
Field::new(field.name(), DataType::BinaryView, field.is_nullable()),
265+
Field::new(field.name(), DataType::BinaryView, field.is_nullable())
266+
.with_metadata(field.metadata().to_owned()),
265267
),
266268
_ => field.clone(),
267269
},

datafusion/physical-plan/src/joins/cross_join.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,15 +69,22 @@ impl CrossJoinExec {
6969
/// Create a new [CrossJoinExec].
7070
pub fn new(left: Arc<dyn ExecutionPlan>, right: Arc<dyn ExecutionPlan>) -> Self {
7171
// left then right
72-
let all_columns: Fields = {
72+
let (all_columns, metadata) = {
7373
let left_schema = left.schema();
7474
let right_schema = right.schema();
7575
let left_fields = left_schema.fields().iter();
7676
let right_fields = right_schema.fields().iter();
77-
left_fields.chain(right_fields).cloned().collect()
77+
78+
let mut metadata = left_schema.metadata().clone();
79+
metadata.extend(right_schema.metadata().clone());
80+
81+
(
82+
left_fields.chain(right_fields).cloned().collect::<Fields>(),
83+
metadata,
84+
)
7885
};
7986

80-
let schema = Arc::new(Schema::new(all_columns));
87+
let schema = Arc::new(Schema::new(all_columns).with_metadata(metadata));
8188
let cache = Self::compute_properties(&left, &right, Arc::clone(&schema));
8289
CrossJoinExec {
8390
left,

datafusion/physical-plan/src/union.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -474,7 +474,16 @@ fn union_schema(inputs: &[Arc<dyn ExecutionPlan>]) -> SchemaRef {
474474
.iter()
475475
.filter_map(|input| {
476476
if input.schema().fields().len() > i {
477-
Some(input.schema().field(i).clone())
477+
let field = input.schema().field(i).clone();
478+
let right_hand_metdata = inputs
479+
.get(1)
480+
.map(|right_input| {
481+
right_input.schema().field(i).metadata().clone()
482+
})
483+
.unwrap_or_default();
484+
let mut metadata = field.metadata().clone();
485+
metadata.extend(right_hand_metdata);
486+
Some(field.with_metadata(metadata))
478487
} else {
479488
None
480489
}

datafusion/sqllogictest/src/test_context.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -313,8 +313,13 @@ pub async fn register_metadata_tables(ctx: &SessionContext) {
313313
String::from("metadata_key"),
314314
String::from("the name field"),
315315
)]));
316+
let l_name =
317+
Field::new("l_name", DataType::Utf8, true).with_metadata(HashMap::from([(
318+
String::from("metadata_key"),
319+
String::from("the l_name field"),
320+
)]));
316321

317-
let schema = Schema::new(vec![id, name]).with_metadata(HashMap::from([(
322+
let schema = Schema::new(vec![id, name, l_name]).with_metadata(HashMap::from([(
318323
String::from("metadata_key"),
319324
String::from("the entire schema"),
320325
)]));
@@ -324,6 +329,7 @@ pub async fn register_metadata_tables(ctx: &SessionContext) {
324329
vec![
325330
Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])) as _,
326331
Arc::new(StringArray::from(vec![None, Some("bar"), Some("baz")])) as _,
332+
Arc::new(StringArray::from(vec![None, Some("l_bar"), Some("l_baz")])) as _,
327333
],
328334
)
329335
.unwrap();

datafusion/sqllogictest/test_files/metadata.slt

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
## with metadata in SQL.
2626

2727
query IT
28-
select * from table_with_metadata;
28+
select id, name from table_with_metadata;
2929
----
3030
1 NULL
3131
NULL bar
@@ -58,5 +58,35 @@ WHERE "data"."id" = "samples"."id";
5858
1
5959
3
6060

61+
62+
# Regression test: missing schema metadata, when aggregate on cross join
63+
query I
64+
SELECT count("data"."id")
65+
FROM
66+
(
67+
SELECT "id" FROM "table_with_metadata"
68+
) as "data",
69+
(
70+
SELECT "id" FROM "table_with_metadata"
71+
) as "samples";
72+
----
73+
6
74+
75+
# Regression test: missing field metadata, from the NULL field on the left side of the union
76+
query ITT
77+
(SELECT id, NULL::string as name, l_name FROM "table_with_metadata")
78+
UNION
79+
(SELECT id, name, NULL::string as l_name FROM "table_with_metadata")
80+
ORDER BY id, name, l_name;
81+
----
82+
1 NULL NULL
83+
3 baz NULL
84+
3 NULL l_baz
85+
NULL bar NULL
86+
NULL NULL l_bar
87+
6188
statement ok
6289
drop table table_with_metadata;
90+
91+
92+

0 commit comments

Comments
 (0)