Skip to content

Commit 1dd328d

Browse files
committed
impl
1 parent d86ff5b commit 1dd328d

File tree

1 file changed

+71
-53
lines changed

1 file changed

+71
-53
lines changed

native/core/src/execution/planner.rs

Lines changed: 71 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -2685,52 +2685,13 @@ fn create_case_expr(
26852685
}
26862686
}
26872687

2688-
pub(crate) fn build_list_from_literal(data: ListLiteral) -> ArrayRef {
2689-
// --- Base case: no nested children → leaf node ---
2690-
if data.list_values.is_empty() {
2691-
// Create an Int32Array from the leaf values
2692-
Arc::new(Int32Array::from(data.int_values.clone())) as ArrayRef
2693-
} else {
2694-
// --- Recursive case: has nested children ---
2695-
// Build a ListArray for each child recursively
2696-
let child_arrays: Vec<ArrayRef> = data
2697-
.list_values
2698-
.iter()
2699-
.map(|c| build_list_from_literal(c.clone()))
2700-
.collect();
2701-
2702-
// Convert Vec<ArrayRef> into Vec<&dyn Array> for concat()
2703-
let child_refs: Vec<&dyn arrow::array::Array> =
2704-
child_arrays.iter().map(|a| a.as_ref()).collect();
2705-
2706-
// Concatenate all child arrays' *values* into one array
2707-
// Example: [[1,2,3], [4,5,6]] → values = [1,2,3,4,5,6]
2708-
let concat = arrow::compute::concat(&child_refs).unwrap();
2709-
2710-
// --- Build offsets for the parent list ---
2711-
let mut offsets = Vec::with_capacity(child_arrays.len() + 1);
2712-
offsets.push(0); // first list always starts at 0
2713-
let mut sum = 0;
2714-
for arr in &child_arrays {
2715-
sum += arr.len() as i32; // each child's length adds to total
2716-
offsets.push(sum); // store cumulative sum as next offset
2717-
}
2718-
2719-
// Create and return the parent ListArray
2720-
Arc::new(ListArray::new(
2721-
// Field: item type matches the concatenated child's type
2722-
FieldRef::from(Field::new("item", concat.data_type().clone(), true)),
2723-
OffsetBuffer::new(offsets.into()), // where each sublist starts/ends
2724-
concat, // the flattened values array
2725-
None, // no null bitmap at this level
2726-
))
2727-
}
2728-
}
27292688
fn literal_to_array_ref(
27302689
data_type: DataType,
27312690
list_literal: ListLiteral,
27322691
) -> Result<ArrayRef, ExecutionError> {
27332692
let nulls = &list_literal.null_mask;
2693+
dbg!(&data_type);
2694+
dbg!(&list_literal);
27342695
match data_type {
27352696
DataType::Null => Ok(Arc::new(NullArray::new(nulls.len()))),
27362697
DataType::Boolean => Ok(Arc::new(BooleanArray::new(
@@ -2852,7 +2813,44 @@ fn literal_to_array_ref(
28522813
)
28532814
.with_precision_and_scale(p, s)?,
28542815
)),
2855-
DataType::List(f) => Ok(Arc::new(build_list_from_literal(list_literal))),
2816+
// list of primitive types
2817+
DataType::List(f) if !matches!(f.data_type(), DataType::List(_)) => {
2818+
literal_to_array_ref(f.data_type().clone(), list_literal)
2819+
}
2820+
DataType::List(f) => {
2821+
let dt = f.data_type().clone();
2822+
let child_arrays: Vec<ArrayRef> = list_literal
2823+
.list_values
2824+
.iter()
2825+
.map(|c| literal_to_array_ref(dt.clone(), c.clone()).unwrap())
2826+
.collect();
2827+
2828+
// Convert Vec<ArrayRef> into Vec<&dyn Array> for concat()
2829+
let child_refs: Vec<&dyn arrow::array::Array> =
2830+
child_arrays.iter().map(|a| a.as_ref()).collect();
2831+
2832+
// Concatenate all child arrays' *values* into one array
2833+
// Example: [[1,2,3], [4,5,6]] → values = [1,2,3,4,5,6]
2834+
let concat = arrow::compute::concat(&child_refs).unwrap();
2835+
2836+
// --- Build offsets for the parent list ---
2837+
let mut offsets = Vec::with_capacity(child_arrays.len() + 1);
2838+
offsets.push(0); // first list always starts at 0
2839+
let mut sum = 0;
2840+
for arr in &child_arrays {
2841+
sum += arr.len() as i32; // each child's length adds to total
2842+
offsets.push(sum); // store cumulative sum as next offset
2843+
}
2844+
2845+
// Create and return the parent ListArray
2846+
Ok(Arc::new(ListArray::new(
2847+
// Field: item type matches the concatenated child's type
2848+
FieldRef::from(Field::new("item", concat.data_type().clone(), true)),
2849+
OffsetBuffer::new(offsets.into()), // where each sublist starts/ends
2850+
concat, // the flattened values array
2851+
None, // no null bitmap at this level
2852+
)))
2853+
}
28562854
dt => Err(GeneralError(format!(
28572855
"DataType::List literal does not support {dt:?} type"
28582856
))),
@@ -2865,7 +2863,7 @@ mod tests {
28652863
use std::{sync::Arc, task::Poll};
28662864

28672865
use arrow::array::{Array, DictionaryArray, Int32Array, ListArray, RecordBatch, StringArray};
2868-
use arrow::datatypes::{DataType, Field, Fields, Schema};
2866+
use arrow::datatypes::{DataType, Field, FieldRef, Fields, Schema};
28692867
use datafusion::catalog::memory::DataSourceExec;
28702868
use datafusion::datasource::listing::PartitionedFile;
28712869
use datafusion::datasource::object_store::ObjectStoreUrl;
@@ -2882,7 +2880,7 @@ mod tests {
28822880
use crate::execution::{operators::InputBatch, planner::PhysicalPlanner};
28832881

28842882
use crate::execution::operators::ExecutionError;
2885-
use crate::execution::planner::build_list_from_literal;
2883+
use crate::execution::planner::literal_to_array_ref;
28862884
use crate::parquet::parquet_support::SparkParquetOptions;
28872885
use crate::parquet::schema_adapter::SparkSchemaAdapterFactory;
28882886
use datafusion_comet_proto::spark_expression::expr::ExprStruct;
@@ -3687,11 +3685,11 @@ mod tests {
36873685
null_mask: vec![true, true, true, false],
36883686
..Default::default()
36893687
},
3690-
ListLiteral {
3691-
..Default::default()
3692-
},
3688+
// ListLiteral {
3689+
// ..Default::default()
3690+
// },
36933691
],
3694-
null_mask: vec![true, true, true, false],
3692+
null_mask: vec![true, true, true],
36953693
..Default::default()
36963694
},
36973695
ListLiteral {
@@ -3703,15 +3701,35 @@ mod tests {
37033701
null_mask: vec![true],
37043702
..Default::default()
37053703
},
3706-
ListLiteral {
3707-
..Default::default()
3708-
},
3704+
// ListLiteral {
3705+
// ..Default::default()
3706+
// },
37093707
],
3710-
null_mask: vec![true, true, false],
3708+
null_mask: vec![true, true],
37113709
..Default::default()
37123710
};
37133711

3714-
let array = build_list_from_literal(data);
3712+
let nested_type = DataType::List(FieldRef::from(Field::new(
3713+
"item",
3714+
DataType::List(
3715+
Field::new(
3716+
"item",
3717+
DataType::List(
3718+
Field::new(
3719+
"item",
3720+
DataType::Int32,
3721+
true, // Int32 nullable
3722+
)
3723+
.into(),
3724+
),
3725+
true, // inner list nullable
3726+
)
3727+
.into(),
3728+
),
3729+
true, // outer list nullable
3730+
)));
3731+
3732+
let array = literal_to_array_ref(nested_type, data)?;
37153733

37163734
// Top-level should be ListArray<ListArray<Int32>>
37173735
let list_outer = array.as_any().downcast_ref::<ListArray>().unwrap();

0 commit comments

Comments
 (0)