Skip to content

Commit 4afd042

Browse files
authored
chore: reenable array_contains tests (#3912)
1 parent 967a81e commit 4afd042

File tree

5 files changed

+8
-45
lines changed

5 files changed

+8
-45
lines changed

docs/source/user-guide/latest/compatibility.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,6 @@ the [Comet Supported Expressions Guide](expressions.md) for more information on
6060

6161
### Array Expressions
6262

63-
- **ArrayContains**: Returns null instead of false for empty arrays with literal values.
64-
[#3346](https://github.com/apache/datafusion-comet/issues/3346)
6563
- **ArrayRemove**: Returns null when the element to remove is null, instead of removing null elements from the array.
6664
[#3173](https://github.com/apache/datafusion-comet/issues/3173)
6765
- **ArraysOverlap**: Inconsistent behavior when arrays contain NULL values.

docs/source/user-guide/latest/expressions.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ Comet supports using the following aggregate functions within window contexts wi
234234
| -------------- | ----------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
235235
| ArrayAppend | Yes | |
236236
| ArrayCompact | No | |
237-
| ArrayContains | No | Returns null instead of false for empty arrays with literal values ([#3346](https://github.com/apache/datafusion-comet/issues/3346)) |
237+
| ArrayContains | Yes | |
238238
| ArrayDistinct | No | Behaves differently than spark. Comet first sorts then removes duplicates while Spark preserves the original order. |
239239
| ArrayExcept | No | |
240240
| ArrayFilter | Yes | Only supports case where function is `IsNotNull` |

native/core/src/execution/jni_api.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ use datafusion::{
4040
prelude::{SessionConfig, SessionContext},
4141
};
4242
use datafusion_comet_proto::spark_operator::Operator;
43+
use datafusion_spark::function::array::array_contains::SparkArrayContains;
4344
use datafusion_spark::function::bitwise::bit_count::SparkBitCount;
4445
use datafusion_spark::function::bitwise::bit_get::SparkBitGet;
4546
use datafusion_spark::function::bitwise::bitwise_not::SparkBitwiseNot;
@@ -416,6 +417,7 @@ fn register_datafusion_spark_function(session_ctx: &SessionContext) {
416417
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkLuhnCheck::default()));
417418
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkSpace::default()));
418419
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkBitCount::default()));
420+
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkArrayContains::default()));
419421
}
420422

421423
/// Prepares arrow arrays for output.

spark/src/main/scala/org/apache/comet/serde/arrays.scala

Lines changed: 1 addition & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -141,49 +141,14 @@ object CometArrayAppend extends CometExpressionSerde[ArrayAppend] {
141141

142142
object CometArrayContains extends CometExpressionSerde[ArrayContains] {
143143

144-
override def getSupportLevel(expr: ArrayContains): SupportLevel =
145-
Incompatible(
146-
Some(
147-
"Returns null instead of false for empty arrays with literal values" +
148-
" (https://github.com/apache/datafusion-comet/issues/3346)"))
149-
150144
override def convert(
151145
expr: ArrayContains,
152146
inputs: Seq[Attribute],
153147
binding: Boolean): Option[ExprOuterClass.Expr] = {
154148
val arrayExprProto = exprToProto(expr.children.head, inputs, binding)
155149
val keyExprProto = exprToProto(expr.children(1), inputs, binding)
156150

157-
val arrayContainsScalarExpr =
158-
scalarFunctionExprToProto("array_has", arrayExprProto, keyExprProto)
159-
160-
// Handle NULL array input - return NULL if array is NULL (matching Spark's behavior)
161-
val isNotNullExpr = createUnaryExpr(
162-
expr,
163-
expr.children.head,
164-
inputs,
165-
binding,
166-
(builder, unaryExpr) => builder.setIsNotNull(unaryExpr))
167-
168-
val nullLiteralProto = exprToProto(Literal(null, BooleanType), Seq.empty)
169-
170-
if (arrayContainsScalarExpr.isDefined && isNotNullExpr.isDefined &&
171-
nullLiteralProto.isDefined) {
172-
val caseWhenExpr = ExprOuterClass.CaseWhen
173-
.newBuilder()
174-
.addWhen(isNotNullExpr.get)
175-
.addThen(arrayContainsScalarExpr.get)
176-
.setElseExpr(nullLiteralProto.get)
177-
.build()
178-
Some(
179-
ExprOuterClass.Expr
180-
.newBuilder()
181-
.setCaseWhen(caseWhenExpr)
182-
.build())
183-
} else {
184-
withInfo(expr, expr.children: _*)
185-
None
186-
}
151+
scalarFunctionExprToProto("array_contains", arrayExprProto, keyExprProto)
187152
}
188153
}
189154

spark/src/test/resources/sql-tests/expressions/array/array_contains.sql

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,21 +23,19 @@ CREATE TABLE test_array_contains(arr array<int>, val int) USING parquet
2323
statement
2424
INSERT INTO test_array_contains VALUES (array(1, 2, 3), 2), (array(1, 2, 3), 4), (array(1, NULL, 3), NULL), (array(), 1), (NULL, 1)
2525

26-
query spark_answer_only
26+
query
2727
SELECT array_contains(arr, val) FROM test_array_contains
2828

2929
-- column + literal
30-
query ignore(https://github.com/apache/datafusion-comet/issues/3346)
30+
query
3131
SELECT array_contains(arr, 2) FROM test_array_contains
3232

3333
-- literal + column
34-
query spark_answer_only
34+
query
3535
SELECT array_contains(array(1, 2, 3), val) FROM test_array_contains
3636

3737
-- literal + literal
38-
-- Note: array_contains(array(), 1) still has a bug (issue #3346) so we use spark_answer_only
39-
-- The NULL array case (cast(NULL as array<int>)) was fixed in issue #3345
40-
query spark_answer_only
38+
query
4139
SELECT array_contains(array(1, 2, 3), 2), array_contains(array(1, 2, 3), 4), array_contains(array(), 1), array_contains(cast(NULL as array<int>), 1)
4240

4341
-- Additional NULL array tests (issue #3345 fix verification)

0 commit comments

Comments
 (0)