Skip to content

Commit 95f938b

Browse files
committed
impl
1 parent cd0f0d4 commit 95f938b

File tree

1 file changed

+71
-53
lines changed

1 file changed

+71
-53
lines changed

native/core/src/execution/planner.rs

Lines changed: 71 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -2714,52 +2714,13 @@ fn create_case_expr(
27142714
}
27152715
}
27162716

2717-
pub(crate) fn build_list_from_literal(data: ListLiteral) -> ArrayRef {
2718-
// --- Base case: no nested children → leaf node ---
2719-
if data.list_values.is_empty() {
2720-
// Create an Int32Array from the leaf values
2721-
Arc::new(Int32Array::from(data.int_values.clone())) as ArrayRef
2722-
} else {
2723-
// --- Recursive case: has nested children ---
2724-
// Build a ListArray for each child recursively
2725-
let child_arrays: Vec<ArrayRef> = data
2726-
.list_values
2727-
.iter()
2728-
.map(|c| build_list_from_literal(c.clone()))
2729-
.collect();
2730-
2731-
// Convert Vec<ArrayRef> into Vec<&dyn Array> for concat()
2732-
let child_refs: Vec<&dyn arrow::array::Array> =
2733-
child_arrays.iter().map(|a| a.as_ref()).collect();
2734-
2735-
// Concatenate all child arrays' *values* into one array
2736-
// Example: [[1,2,3], [4,5,6]] → values = [1,2,3,4,5,6]
2737-
let concat = arrow::compute::concat(&child_refs).unwrap();
2738-
2739-
// --- Build offsets for the parent list ---
2740-
let mut offsets = Vec::with_capacity(child_arrays.len() + 1);
2741-
offsets.push(0); // first list always starts at 0
2742-
let mut sum = 0;
2743-
for arr in &child_arrays {
2744-
sum += arr.len() as i32; // each child's length adds to total
2745-
offsets.push(sum); // store cumulative sum as next offset
2746-
}
2747-
2748-
// Create and return the parent ListArray
2749-
Arc::new(ListArray::new(
2750-
// Field: item type matches the concatenated child's type
2751-
FieldRef::from(Field::new("item", concat.data_type().clone(), true)),
2752-
OffsetBuffer::new(offsets.into()), // where each sublist starts/ends
2753-
concat, // the flattened values array
2754-
None, // no null bitmap at this level
2755-
))
2756-
}
2757-
}
27582717
fn literal_to_array_ref(
27592718
data_type: DataType,
27602719
list_literal: ListLiteral,
27612720
) -> Result<ArrayRef, ExecutionError> {
27622721
let nulls = &list_literal.null_mask;
2722+
dbg!(&data_type);
2723+
dbg!(&list_literal);
27632724
match data_type {
27642725
DataType::Null => Ok(Arc::new(NullArray::new(nulls.len()))),
27652726
DataType::Boolean => Ok(Arc::new(BooleanArray::new(
@@ -2881,7 +2842,44 @@ fn literal_to_array_ref(
28812842
)
28822843
.with_precision_and_scale(p, s)?,
28832844
)),
2884-
DataType::List(f) => Ok(Arc::new(build_list_from_literal(list_literal))),
2845+
// list of primitive types
2846+
DataType::List(f) if !matches!(f.data_type(), DataType::List(_)) => {
2847+
literal_to_array_ref(f.data_type().clone(), list_literal)
2848+
}
2849+
DataType::List(f) => {
2850+
let dt = f.data_type().clone();
2851+
let child_arrays: Vec<ArrayRef> = list_literal
2852+
.list_values
2853+
.iter()
2854+
.map(|c| literal_to_array_ref(dt.clone(), c.clone()).unwrap())
2855+
.collect();
2856+
2857+
// Convert Vec<ArrayRef> into Vec<&dyn Array> for concat()
2858+
let child_refs: Vec<&dyn arrow::array::Array> =
2859+
child_arrays.iter().map(|a| a.as_ref()).collect();
2860+
2861+
// Concatenate all child arrays' *values* into one array
2862+
// Example: [[1,2,3], [4,5,6]] → values = [1,2,3,4,5,6]
2863+
let concat = arrow::compute::concat(&child_refs).unwrap();
2864+
2865+
// --- Build offsets for the parent list ---
2866+
let mut offsets = Vec::with_capacity(child_arrays.len() + 1);
2867+
offsets.push(0); // first list always starts at 0
2868+
let mut sum = 0;
2869+
for arr in &child_arrays {
2870+
sum += arr.len() as i32; // each child's length adds to total
2871+
offsets.push(sum); // store cumulative sum as next offset
2872+
}
2873+
2874+
// Create and return the parent ListArray
2875+
Ok(Arc::new(ListArray::new(
2876+
// Field: item type matches the concatenated child's type
2877+
FieldRef::from(Field::new("item", concat.data_type().clone(), true)),
2878+
OffsetBuffer::new(offsets.into()), // where each sublist starts/ends
2879+
concat, // the flattened values array
2880+
None, // no null bitmap at this level
2881+
)))
2882+
}
28852883
dt => Err(GeneralError(format!(
28862884
"DataType::List literal does not support {dt:?} type"
28872885
))),
@@ -2894,7 +2892,7 @@ mod tests {
28942892
use std::{sync::Arc, task::Poll};
28952893

28962894
use arrow::array::{Array, DictionaryArray, Int32Array, ListArray, RecordBatch, StringArray};
2897-
use arrow::datatypes::{DataType, Field, Fields, Schema};
2895+
use arrow::datatypes::{DataType, Field, FieldRef, Fields, Schema};
28982896
use datafusion::catalog::memory::DataSourceExec;
28992897
use datafusion::datasource::listing::PartitionedFile;
29002898
use datafusion::datasource::object_store::ObjectStoreUrl;
@@ -2911,7 +2909,7 @@ mod tests {
29112909
use crate::execution::{operators::InputBatch, planner::PhysicalPlanner};
29122910

29132911
use crate::execution::operators::ExecutionError;
2914-
use crate::execution::planner::build_list_from_literal;
2912+
use crate::execution::planner::literal_to_array_ref;
29152913
use crate::parquet::parquet_support::SparkParquetOptions;
29162914
use crate::parquet::schema_adapter::SparkSchemaAdapterFactory;
29172915
use datafusion_comet_proto::spark_expression::expr::ExprStruct;
@@ -3719,11 +3717,11 @@ mod tests {
37193717
null_mask: vec![true, true, true, false],
37203718
..Default::default()
37213719
},
3722-
ListLiteral {
3723-
..Default::default()
3724-
},
3720+
// ListLiteral {
3721+
// ..Default::default()
3722+
// },
37253723
],
3726-
null_mask: vec![true, true, true, false],
3724+
null_mask: vec![true, true, true],
37273725
..Default::default()
37283726
},
37293727
ListLiteral {
@@ -3735,15 +3733,35 @@ mod tests {
37353733
null_mask: vec![true],
37363734
..Default::default()
37373735
},
3738-
ListLiteral {
3739-
..Default::default()
3740-
},
3736+
// ListLiteral {
3737+
// ..Default::default()
3738+
// },
37413739
],
3742-
null_mask: vec![true, true, false],
3740+
null_mask: vec![true, true],
37433741
..Default::default()
37443742
};
37453743

3746-
let array = build_list_from_literal(data);
3744+
let nested_type = DataType::List(FieldRef::from(Field::new(
3745+
"item",
3746+
DataType::List(
3747+
Field::new(
3748+
"item",
3749+
DataType::List(
3750+
Field::new(
3751+
"item",
3752+
DataType::Int32,
3753+
true, // Int32 nullable
3754+
)
3755+
.into(),
3756+
),
3757+
true, // inner list nullable
3758+
)
3759+
.into(),
3760+
),
3761+
true, // outer list nullable
3762+
)));
3763+
3764+
let array = literal_to_array_ref(nested_type, data)?;
37473765

37483766
// Top-level should be ListArray<ListArray<Int32>>
37493767
let list_outer = array.as_any().downcast_ref::<ListArray>().unwrap();

0 commit comments

Comments
 (0)