Skip to content

Commit 4a4d8d3

Browse files
committed
Revert "refactor: simplify cast_struct_column and extract helpers"
This reverts commit 5a549ec.
1 parent 5a549ec commit 4a4d8d3

File tree

1 file changed

+39
-61
lines changed

1 file changed

+39
-61
lines changed

datafusion/common/src/nested_struct.rs

Lines changed: 39 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -55,86 +55,64 @@ fn cast_struct_column(
5555
target_fields: &[Arc<Field>],
5656
cast_options: &CastOptions,
5757
) -> Result<ArrayRef> {
58-
// Fast path for completely-null arrays or a Null-typed column
59-
if is_all_null_array(source_col) {
58+
if source_col.data_type() == &DataType::Null
59+
|| (!source_col.is_empty() && source_col.null_count() == source_col.len())
60+
{
6061
return Ok(new_null_array(
6162
&Struct(target_fields.to_vec().into()),
6263
source_col.len(),
6364
));
6465
}
6566

66-
// Ensure the source is actually a StructArray
67-
let source_struct = match source_col.as_any().downcast_ref::<StructArray>() {
68-
Some(s) => s,
69-
None => {
70-
return _plan_err!(
71-
"Cannot cast column of type {} to struct type: source is not a StructArray.",
72-
source_col.data_type()
73-
);
74-
}
75-
};
67+
if let Some(source_struct) = source_col.as_any().downcast_ref::<StructArray>() {
68+
let source_fields = source_struct.fields();
69+
let has_overlap = fields_have_name_overlap(source_fields, target_fields);
70+
validate_struct_compatibility(source_fields, target_fields)?;
7671

77-
let source_fields = source_struct.fields();
78-
let has_overlap = fields_have_name_overlap(source_fields, target_fields);
79-
80-
// Validate that individual fields are cast-compatible before doing work
81-
validate_struct_compatibility(source_fields, target_fields)?;
72+
let mut fields: Vec<Arc<Field>> = Vec::with_capacity(target_fields.len());
73+
let mut arrays: Vec<ArrayRef> = Vec::with_capacity(target_fields.len());
74+
let num_rows = source_col.len();
8275

83-
let num_rows = source_col.len();
76+
// Iterate target fields and pick source child either by name (when fields overlap)
77+
// or by position (when there is no name overlap).
78+
for (index, target_child_field) in target_fields.iter().enumerate() {
79+
fields.push(Arc::clone(target_child_field));
8480

85-
// Collect target field descriptors (we preserve exact target Field objects)
86-
let fields: Vec<Arc<Field>> = target_fields.iter().cloned().collect();
81+
// Determine the source child column: by name when overlapping names exist,
82+
// otherwise by position.
83+
let source_child_opt: Option<&ArrayRef> = if has_overlap {
84+
source_struct.column_by_name(target_child_field.name())
85+
} else {
86+
Some(source_struct.column(index))
87+
};
8788

88-
// Build child arrays in a single iterator to make error propagation concise
89-
let arrays: Vec<ArrayRef> = target_fields
90-
.iter()
91-
.enumerate()
92-
.map(|(index, target_child_field)| {
93-
match select_source_child(
94-
source_struct,
95-
has_overlap,
96-
index,
97-
target_child_field,
98-
) {
89+
match source_child_opt {
9990
Some(source_child_col) => {
100-
cast_column(source_child_col, target_child_field, cast_options)
101-
.map_err(|e| {
91+
let adapted_child =
92+
cast_column(source_child_col, target_child_field, cast_options)
93+
.map_err(|e| {
10294
e.context(format!(
10395
"While casting struct field '{}'",
10496
target_child_field.name()
10597
))
106-
})
98+
})?;
99+
arrays.push(adapted_child);
100+
}
101+
None => {
102+
arrays.push(new_null_array(target_child_field.data_type(), num_rows));
107103
}
108-
None => Ok(new_null_array(target_child_field.data_type(), num_rows)),
109104
}
110-
})
111-
.collect::<Result<Vec<_>>>()?;
112-
113-
let struct_array =
114-
StructArray::new(fields.into(), arrays, source_struct.nulls().cloned());
115-
Ok(Arc::new(struct_array))
116-
}
117-
118-
/// Helper: return true if column is a Null type or contains only nulls
119-
fn is_all_null_array(source_col: &ArrayRef) -> bool {
120-
source_col.data_type() == &DataType::Null
121-
|| (!source_col.is_empty() && source_col.null_count() == source_col.len())
122-
}
105+
}
123106

124-
/// Helper: select a child column by name when names overlap, otherwise by position.
125-
///
126-
/// Safety: when `has_overlap == false` callers must ensure `index` is valid; caller guarantees
127-
/// this via validation earlier (`validate_struct_compatibility`).
128-
fn select_source_child<'a>(
129-
source_struct: &'a StructArray,
130-
has_overlap: bool,
131-
index: usize,
132-
target_child_field: &Field,
133-
) -> Option<&'a ArrayRef> {
134-
if has_overlap {
135-
source_struct.column_by_name(target_child_field.name())
107+
let struct_array =
108+
StructArray::new(fields.into(), arrays, source_struct.nulls().cloned());
109+
Ok(Arc::new(struct_array))
136110
} else {
137-
Some(source_struct.column(index))
111+
// Return error if source is not a struct type
112+
_plan_err!(
113+
"Cannot cast column of type {} to struct type. Source must be a struct to cast to struct.",
114+
source_col.data_type()
115+
)
138116
}
139117
}
140118

0 commit comments

Comments
 (0)