Skip to content

Commit 9361549

Browse files
authored
(feat) add support for ArrayMin scalar function (#1944)
1 parent 3f66495 commit 9361549

File tree

4 files changed

+38
-1
lines changed

4 files changed

+38
-1
lines changed

docs/source/user-guide/expressions.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ The following Spark expressions are currently available. Any known compatibility
206206
| ArrayIntersect | Experimental |
207207
| ArrayJoin | Experimental |
208208
| ArrayMax | Experimental |
209+
| ArrayMin | |
209210
| ArrayRemove | |
210211
| ArrayRepeat | Experimental |
211212
| ArraysOverlap | Experimental |

spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ object QueryPlanSerde extends Logging with CometExprShim {
8888
classOf[ArrayIntersect] -> CometArrayIntersect,
8989
classOf[ArrayJoin] -> CometArrayJoin,
9090
classOf[ArrayMax] -> CometArrayMax,
91+
classOf[ArrayMin] -> CometArrayMin,
9192
classOf[ArrayRemove] -> CometArrayRemove,
9293
classOf[ArrayRepeat] -> CometArrayRepeat,
9394
classOf[ArraysOverlap] -> CometArraysOverlap,

spark/src/main/scala/org/apache/comet/serde/arrays.scala

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ package org.apache.comet.serde
2121

2222
import scala.annotation.tailrec
2323

24-
import org.apache.spark.sql.catalyst.expressions.{ArrayAppend, ArrayContains, ArrayDistinct, ArrayExcept, ArrayInsert, ArrayIntersect, ArrayJoin, ArrayMax, ArrayRemove, ArrayRepeat, ArraysOverlap, ArrayUnion, Attribute, CreateArray, Expression, Literal}
24+
import org.apache.spark.sql.catalyst.expressions.{ArrayAppend, ArrayContains, ArrayDistinct, ArrayExcept, ArrayInsert, ArrayIntersect, ArrayJoin, ArrayMax, ArrayMin, ArrayRemove, ArrayRepeat, ArraysOverlap, ArrayUnion, Attribute, CreateArray, Expression, Literal}
2525
import org.apache.spark.sql.internal.SQLConf
2626
import org.apache.spark.sql.types._
2727

@@ -189,6 +189,18 @@ object CometArrayMax extends CometExpressionSerde[ArrayMax] {
189189
}
190190
}
191191

192+
object CometArrayMin extends CometExpressionSerde[ArrayMin] {
193+
override def convert(
194+
expr: ArrayMin,
195+
inputs: Seq[Attribute],
196+
binding: Boolean): Option[ExprOuterClass.Expr] = {
197+
val arrayExprProto = exprToProto(expr.children.head, inputs, binding)
198+
199+
val arrayMinScalarExpr = scalarFunctionExprToProto("array_min", arrayExprProto)
200+
optExprWithInfo(arrayMinScalarExpr, expr)
201+
}
202+
}
203+
192204
object CometArraysOverlap extends CometExpressionSerde[ArraysOverlap] with IncompatExpr {
193205
override def convert(
194206
expr: ArraysOverlap,

spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,29 @@ class CometArrayExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelp
409409
}
410410
}
411411

412+
test("array_min") {
413+
Seq(true, false).foreach { dictionaryEnabled =>
414+
withTempDir { dir =>
415+
val path = new Path(dir.toURI.toString, "test.parquet")
416+
makeParquetFileAllPrimitiveTypes(path, dictionaryEnabled, n = 10000)
417+
spark.read.parquet(path.toString).createOrReplaceTempView("t1");
418+
checkSparkAnswerAndOperator(spark.sql("SELECT array_min(array(_2, _3, _4)) FROM t1"))
419+
checkSparkAnswerAndOperator(
420+
spark.sql("SELECT array_min((CASE WHEN _2 =_3 THEN array(_4) END)) FROM t1"))
421+
checkSparkAnswerAndOperator(
422+
spark.sql("SELECT array_min((CASE WHEN _2 =_3 THEN array(_2, _4) END)) FROM t1"))
423+
checkSparkAnswerAndOperator(
424+
spark.sql("SELECT array_min(array(CAST(NULL AS INT), CAST(NULL AS INT))) FROM t1"))
425+
checkSparkAnswerAndOperator(
426+
spark.sql("SELECT array_min(array(_2, CAST(NULL AS INT))) FROM t1"))
427+
checkSparkAnswerAndOperator(spark.sql("SELECT array_min(array()) FROM t1"))
428+
checkSparkAnswerAndOperator(
429+
spark.sql(
430+
"SELECT array_min(array(double('-Infinity'), 0.0, double('Infinity'))) FROM t1"))
431+
}
432+
}
433+
}
434+
412435
test("array_intersect") {
413436
// TODO test fails if scan is auto
414437
// https://github.com/apache/datafusion-comet/issues/2174

0 commit comments

Comments
 (0)