Skip to content

Commit d3c7900

Browse files
authored
fix: handle Null type in try_merge for Struct, List, LargeList, and Union (#9524)
# Which issue does this PR close? Field::try_merge correctly handles DataType::Null for primitive types and when self is Null, but fails when self is a compound type (Struct, List, LargeList, Union) and from is Null. This causes Schema::try_merge to error when merging schemas where one has a Null field and another has a concrete compound type for the same field. This is common in JSON inference where some files have null values for fields that are structs/lists in other files. - Closes[ #9523](#9523) # Rationale for this change Add `DataType::Null` arms to the Struct, List, LargeList, and Union branches in `Field::try_merge`, consistent with how primitive types already handle it. # What changes are included in this PR? Add `DataType::Null` arms to the Struct, List, LargeList, and Union branches in `Field::try_merge`, consistent with how primitive types already handle it. # Are these changes tested? - Added test `test_merge_compound_with_null` covering Struct, List, LargeList, and Union merging with Null in both directions. - Existing tests continue to pass. # Are there any user-facing changes? No
1 parent 33aed33 commit d3c7900

File tree

1 file changed

+66
-0
lines changed

1 file changed

+66
-0
lines changed

arrow-schema/src/field.rs

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -830,6 +830,9 @@ impl Field {
830830
.try_for_each(|f| builder.try_merge(f))?;
831831
*nested_fields = builder.finish().fields;
832832
}
833+
DataType::Null => {
834+
self.nullable = true;
835+
}
833836
_ => {
834837
return Err(ArrowError::SchemaError(format!(
835838
"Fail to merge schema field '{}' because the from data_type = {} is not DataType::Struct",
@@ -841,6 +844,9 @@ impl Field {
841844
DataType::Union(from_nested_fields, _) => {
842845
nested_fields.try_merge(from_nested_fields)?
843846
}
847+
DataType::Null => {
848+
self.nullable = true;
849+
}
844850
_ => {
845851
return Err(ArrowError::SchemaError(format!(
846852
"Fail to merge schema field '{}' because the from data_type = {} is not DataType::Union",
@@ -854,6 +860,9 @@ impl Field {
854860
f.try_merge(from_field)?;
855861
(*field) = Arc::new(f);
856862
}
863+
DataType::Null => {
864+
self.nullable = true;
865+
}
857866
_ => {
858867
return Err(ArrowError::SchemaError(format!(
859868
"Fail to merge schema field '{}' because the from data_type = {} is not DataType::List",
@@ -867,6 +876,9 @@ impl Field {
867876
f.try_merge(from_field)?;
868877
(*field) = Arc::new(f);
869878
}
879+
DataType::Null => {
880+
self.nullable = true;
881+
}
870882
_ => {
871883
return Err(ArrowError::SchemaError(format!(
872884
"Fail to merge schema field '{}' because the from data_type = {} is not DataType::LargeList",
@@ -1461,4 +1473,58 @@ mod test {
14611473

14621474
assert_binary_serde_round_trip(field)
14631475
}
1476+
1477+
#[test]
1478+
fn test_merge_compound_with_null() {
1479+
// Struct + Null
1480+
let mut field = Field::new(
1481+
"s",
1482+
DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, false)])),
1483+
false,
1484+
);
1485+
field
1486+
.try_merge(&Field::new("s", DataType::Null, true))
1487+
.expect("Struct should merge with Null");
1488+
assert!(field.is_nullable());
1489+
assert!(matches!(field.data_type(), DataType::Struct(_)));
1490+
1491+
// List + Null
1492+
let mut field = Field::new(
1493+
"l",
1494+
DataType::List(Field::new("item", DataType::Utf8, false).into()),
1495+
false,
1496+
);
1497+
field
1498+
.try_merge(&Field::new("l", DataType::Null, true))
1499+
.expect("List should merge with Null");
1500+
assert!(field.is_nullable());
1501+
assert!(matches!(field.data_type(), DataType::List(_)));
1502+
1503+
// LargeList + Null
1504+
let mut field = Field::new(
1505+
"ll",
1506+
DataType::LargeList(Field::new("item", DataType::Utf8, false).into()),
1507+
false,
1508+
);
1509+
field
1510+
.try_merge(&Field::new("ll", DataType::Null, true))
1511+
.expect("LargeList should merge with Null");
1512+
assert!(field.is_nullable());
1513+
assert!(matches!(field.data_type(), DataType::LargeList(_)));
1514+
1515+
// Union + Null
1516+
let mut field = Field::new(
1517+
"u",
1518+
DataType::Union(
1519+
UnionFields::try_new(vec![0], vec![Field::new("f", DataType::Int32, false)])
1520+
.unwrap(),
1521+
UnionMode::Dense,
1522+
),
1523+
false,
1524+
);
1525+
field
1526+
.try_merge(&Field::new("u", DataType::Null, true))
1527+
.expect("Union should merge with Null");
1528+
assert!(matches!(field.data_type(), DataType::Union(_, _)));
1529+
}
14641530
}

0 commit comments

Comments
 (0)