Skip to content

Commit eb2b8c0

Browse files
authored
fix error result in execute&pre_selection (#16930)
* fix error result in execute&pre_selection * fix clippy * Optimize implementation * more efficiency impl * fix CI
1 parent 36f5f14 commit eb2b8c0

File tree

3 files changed

+119
-46
lines changed

3 files changed

+119
-46
lines changed

datafusion/physical-expr-common/src/physical_expr.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use std::sync::Arc;
2323

2424
use crate::utils::scatter;
2525

26-
use arrow::array::BooleanArray;
26+
use arrow::array::{ArrayRef, BooleanArray};
2727
use arrow::compute::filter_record_batch;
2828
use arrow::datatypes::{DataType, Field, FieldRef, Schema};
2929
use arrow::record_batch::RecordBatch;
@@ -106,6 +106,20 @@ pub trait PhysicalExpr: Send + Sync + Display + Debug + DynEq + DynHash {
106106
Ok(tmp_result)
107107
} else if let ColumnarValue::Array(a) = tmp_result {
108108
scatter(selection, a.as_ref()).map(ColumnarValue::Array)
109+
} else if let ColumnarValue::Scalar(ScalarValue::Boolean(value)) = &tmp_result {
110+
// When the scalar is true or false, skip the scatter process
111+
if let Some(v) = value {
112+
if *v {
113+
return Ok(ColumnarValue::from(
114+
Arc::new(selection.clone()) as ArrayRef
115+
));
116+
} else {
117+
return Ok(tmp_result);
118+
}
119+
} else {
120+
let array = BooleanArray::from(vec![None; batch.num_rows()]);
121+
return scatter(selection, &array).map(ColumnarValue::Array);
122+
}
109123
} else {
110124
Ok(tmp_result)
111125
}

datafusion/physical-expr/src/expressions/binary.rs

Lines changed: 102 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,44 @@ impl PhysicalExpr for BinaryExpr {
375375
// as it takes into account cases where the selection contains null values.
376376
let batch = filter_record_batch(batch, selection)?;
377377
let right_ret = self.right.evaluate(&batch)?;
378-
return pre_selection_scatter(selection, right_ret);
378+
379+
match &right_ret {
380+
ColumnarValue::Array(array) => {
381+
// When the array on the right is all true or all false, skip the scatter process
382+
let boolean_array = array.as_boolean();
383+
let true_count = boolean_array.true_count();
384+
let length = boolean_array.len();
385+
if true_count == length {
386+
return Ok(lhs);
387+
} else if true_count == 0 && boolean_array.null_count() == 0 {
388+
// If the right-hand array is returned at this point,the lengths will be inconsistent;
389+
// returning a scalar can avoid this issue
390+
return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(
391+
Some(false),
392+
)));
393+
}
394+
395+
return pre_selection_scatter(selection, Some(boolean_array));
396+
}
397+
ColumnarValue::Scalar(scalar) => {
398+
if let ScalarValue::Boolean(v) = scalar {
399+
// When the scalar is true or false, skip the scatter process
400+
if let Some(v) = v {
401+
if *v {
402+
return Ok(lhs);
403+
} else {
404+
return Ok(right_ret);
405+
}
406+
} else {
407+
return pre_selection_scatter(selection, None);
408+
}
409+
} else {
410+
return internal_err!(
411+
"Expected boolean scalar value, found: {right_ret:?}"
412+
);
413+
}
414+
}
415+
}
379416
}
380417
}
381418

@@ -974,13 +1011,8 @@ fn check_short_circuit<'a>(
9741011
/// However, this is difficult to achieve under the immutable constraints of [`Arc`] and [`BooleanArray`].
9751012
fn pre_selection_scatter(
9761013
left_result: &BooleanArray,
977-
right_result: ColumnarValue,
1014+
right_result: Option<&BooleanArray>,
9781015
) -> Result<ColumnarValue> {
979-
let right_boolean_array = match &right_result {
980-
ColumnarValue::Array(array) => array.as_boolean(),
981-
ColumnarValue::Scalar(_) => return Ok(right_result),
982-
};
983-
9841016
let result_len = left_result.len();
9851017

9861018
let mut result_array_builder = BooleanArray::builder(result_len);
@@ -990,22 +1022,39 @@ fn pre_selection_scatter(
9901022

9911023
// keep track of how much is filled
9921024
let mut last_end = 0;
993-
SlicesIterator::new(left_result).for_each(|(start, end)| {
994-
// the gap needs to be filled with false
995-
if start > last_end {
996-
result_array_builder.append_n(start - last_end, false);
1025+
// reduce if condition in for_each
1026+
match right_result {
1027+
Some(right_result) => {
1028+
SlicesIterator::new(left_result).for_each(|(start, end)| {
1029+
// the gap needs to be filled with false
1030+
if start > last_end {
1031+
result_array_builder.append_n(start - last_end, false);
1032+
}
1033+
1034+
// copy values from right array for this slice
1035+
let len = end - start;
1036+
right_result
1037+
.slice(right_array_pos, len)
1038+
.iter()
1039+
.for_each(|v| result_array_builder.append_option(v));
1040+
1041+
right_array_pos += len;
1042+
last_end = end;
1043+
});
9971044
}
1045+
None => SlicesIterator::new(left_result).for_each(|(start, end)| {
1046+
// the gap needs to be filled with false
1047+
if start > last_end {
1048+
result_array_builder.append_n(start - last_end, false);
1049+
}
9981050

999-
// copy values from right array for this slice
1000-
let len = end - start;
1001-
right_boolean_array
1002-
.slice(right_array_pos, len)
1003-
.iter()
1004-
.for_each(|v| result_array_builder.append_option(v));
1051+
// append nulls for this slice derictly
1052+
let len = end - start;
1053+
result_array_builder.append_nulls(len);
10051054

1006-
right_array_pos += len;
1007-
last_end = end;
1008-
});
1055+
last_end = end;
1056+
}),
1057+
}
10091058

10101059
// Fill any remaining positions with false
10111060
if last_end < result_len {
@@ -5211,7 +5260,6 @@ mod tests {
52115260
/// 4. Test single true at first position
52125261
/// 5. Test single true at last position
52135262
/// 6. Test nulls in right array
5214-
/// 7. Test scalar right handling
52155263
#[test]
52165264
fn test_pre_selection_scatter() {
52175265
fn create_bool_array(bools: Vec<bool>) -> BooleanArray {
@@ -5222,11 +5270,9 @@ mod tests {
52225270
// Left: [T, F, T, F, T]
52235271
// Right: [F, T, F] (values for 3 true positions)
52245272
let left = create_bool_array(vec![true, false, true, false, true]);
5225-
let right = ColumnarValue::Array(Arc::new(create_bool_array(vec![
5226-
false, true, false,
5227-
])));
5273+
let right = create_bool_array(vec![false, true, false]);
52285274

5229-
let result = pre_selection_scatter(&left, right).unwrap();
5275+
let result = pre_selection_scatter(&left, Some(&right)).unwrap();
52305276
let result_arr = result.into_array(left.len()).unwrap();
52315277

52325278
let expected = create_bool_array(vec![false, false, true, false, false]);
@@ -5238,11 +5284,9 @@ mod tests {
52385284
// Right: [T, F, F, T, F]
52395285
let left =
52405286
create_bool_array(vec![false, true, true, false, true, true, true]);
5241-
let right = ColumnarValue::Array(Arc::new(create_bool_array(vec![
5242-
true, false, false, true, false,
5243-
])));
5287+
let right = create_bool_array(vec![true, false, false, true, false]);
52445288

5245-
let result = pre_selection_scatter(&left, right).unwrap();
5289+
let result = pre_selection_scatter(&left, Some(&right)).unwrap();
52465290
let result_arr = result.into_array(left.len()).unwrap();
52475291

52485292
let expected =
@@ -5254,9 +5298,9 @@ mod tests {
52545298
// Left: [T, F, F]
52555299
// Right: [F]
52565300
let left = create_bool_array(vec![true, false, false]);
5257-
let right = ColumnarValue::Array(Arc::new(create_bool_array(vec![false])));
5301+
let right = create_bool_array(vec![false]);
52585302

5259-
let result = pre_selection_scatter(&left, right).unwrap();
5303+
let result = pre_selection_scatter(&left, Some(&right)).unwrap();
52605304
let result_arr = result.into_array(left.len()).unwrap();
52615305

52625306
let expected = create_bool_array(vec![false, false, false]);
@@ -5267,9 +5311,9 @@ mod tests {
52675311
// Left: [F, F, T]
52685312
// Right: [F]
52695313
let left = create_bool_array(vec![false, false, true]);
5270-
let right = ColumnarValue::Array(Arc::new(create_bool_array(vec![false])));
5314+
let right = create_bool_array(vec![false]);
52715315

5272-
let result = pre_selection_scatter(&left, right).unwrap();
5316+
let result = pre_selection_scatter(&left, Some(&right)).unwrap();
52735317
let result_arr = result.into_array(left.len()).unwrap();
52745318

52755319
let expected = create_bool_array(vec![false, false, false]);
@@ -5280,10 +5324,9 @@ mod tests {
52805324
// Left: [F, T, F, T]
52815325
// Right: [None, Some(false)] (with null at first position)
52825326
let left = create_bool_array(vec![false, true, false, true]);
5283-
let right_arr = BooleanArray::from(vec![None, Some(false)]);
5284-
let right = ColumnarValue::Array(Arc::new(right_arr));
5327+
let right = BooleanArray::from(vec![None, Some(false)]);
52855328

5286-
let result = pre_selection_scatter(&left, right).unwrap();
5329+
let result = pre_selection_scatter(&left, Some(&right)).unwrap();
52875330
let result_arr = result.into_array(left.len()).unwrap();
52885331

52895332
let expected = BooleanArray::from(vec![
@@ -5294,16 +5337,30 @@ mod tests {
52945337
]);
52955338
assert_eq!(&expected, result_arr.as_boolean());
52965339
}
5297-
// Test scalar right handling
5298-
{
5299-
// Left: [T, F, T]
5300-
// Right: Scalar true
5301-
let left = create_bool_array(vec![true, false, true]);
5302-
let right = ColumnarValue::Scalar(ScalarValue::Boolean(Some(true)));
5340+
}
53035341

5304-
let result = pre_selection_scatter(&left, right).unwrap();
5305-
assert!(matches!(result, ColumnarValue::Scalar(_)));
5306-
}
5342+
#[test]
5343+
fn test_and_true_preselection_returns_lhs() {
5344+
let schema =
5345+
Arc::new(Schema::new(vec![Field::new("c", DataType::Boolean, false)]));
5346+
let c_array = Arc::new(BooleanArray::from(vec![false, true, false, false, false]))
5347+
as ArrayRef;
5348+
let batch = RecordBatch::try_new(Arc::clone(&schema), vec![Arc::clone(&c_array)])
5349+
.unwrap();
5350+
5351+
let expr = logical2physical(&logical_col("c").and(expr_lit(true)), &schema);
5352+
5353+
let result = expr.evaluate(&batch).unwrap();
5354+
let ColumnarValue::Array(result_arr) = result else {
5355+
panic!("Expected ColumnarValue::Array");
5356+
};
5357+
5358+
let expected: Vec<_> = c_array.as_boolean().iter().collect();
5359+
let actual: Vec<_> = result_arr.as_boolean().iter().collect();
5360+
assert_eq!(
5361+
expected, actual,
5362+
"AND with TRUE must equal LHS even with PreSelection"
5363+
);
53075364
}
53085365

53095366
#[test]

docs/source/user-guide/sql/window_functions.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,8 @@ FROM employees;
331331
+-------------+--------+---------+
332332
```
333333

334+
#
335+
334336
## Analytical Functions
335337

336338
- [first_value](#first_value)

0 commit comments

Comments
 (0)