Skip to content

Commit f110a7f

Browse files
mgaido91gatorsmile
authored andcommitted
[SPARK-22693][SQL] CreateNamedStruct and InSet should not use global variables
## What changes were proposed in this pull request? CreateNamedStruct and InSet are using a global variable which is not needed. This can generate some unneeded entries in the constant pool. The PR removes the unnecessary mutable states and makes them local variables. ## How was this patch tested? added UT Author: Marco Gaido <[email protected]> Author: Marco Gaido <[email protected]> Closes #19896 from mgaido91/SPARK-22693.
1 parent 9948b86 commit f110a7f

File tree

4 files changed

+40
-23
lines changed

4 files changed

+40
-23
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -356,22 +356,25 @@ case class CreateNamedStruct(children: Seq[Expression]) extends CreateNamedStruc
356356
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
357357
val rowClass = classOf[GenericInternalRow].getName
358358
val values = ctx.freshName("values")
359-
ctx.addMutableState("Object[]", values, s"$values = null;")
359+
val valCodes = valExprs.zipWithIndex.map { case (e, i) =>
360+
val eval = e.genCode(ctx)
361+
s"""
362+
|${eval.code}
363+
|if (${eval.isNull}) {
364+
| $values[$i] = null;
365+
|} else {
366+
| $values[$i] = ${eval.value};
367+
|}
368+
""".stripMargin
369+
}
360370
val valuesCode = ctx.splitExpressionsWithCurrentInputs(
361-
valExprs.zipWithIndex.map { case (e, i) =>
362-
val eval = e.genCode(ctx)
363-
s"""
364-
${eval.code}
365-
if (${eval.isNull}) {
366-
$values[$i] = null;
367-
} else {
368-
$values[$i] = ${eval.value};
369-
}"""
370-
})
371+
expressions = valCodes,
372+
funcName = "createNamedStruct",
373+
extraArguments = "Object[]" -> values :: Nil)
371374

372375
ev.copy(code =
373376
s"""
374-
|$values = new Object[${valExprs.size}];
377+
|Object[] $values = new Object[${valExprs.size}];
375378
|$valuesCode
376379
|final InternalRow ${ev.value} = new $rowClass($values);
377380
|$values = null;

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -344,17 +344,17 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with
344344
} else {
345345
""
346346
}
347-
ctx.addMutableState(setName, setTerm,
348-
s"$setTerm = (($InSetName)references[${ctx.references.size - 1}]).getSet();")
349-
ev.copy(code = s"""
350-
${childGen.code}
351-
boolean ${ev.isNull} = ${childGen.isNull};
352-
boolean ${ev.value} = false;
353-
if (!${ev.isNull}) {
354-
${ev.value} = $setTerm.contains(${childGen.value});
355-
$setNull
356-
}
357-
""")
347+
ev.copy(code =
348+
s"""
349+
|${childGen.code}
350+
|${ctx.JAVA_BOOLEAN} ${ev.isNull} = ${childGen.isNull};
351+
|${ctx.JAVA_BOOLEAN} ${ev.value} = false;
352+
|if (!${ev.isNull}) {
353+
| $setName $setTerm = (($InSetName)references[${ctx.references.size - 1}]).getSet();
354+
| ${ev.value} = $setTerm.contains(${childGen.value});
355+
| $setNull
356+
|}
357+
""".stripMargin)
358358
}
359359

360360
override def sql: String = {

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
2020
import org.apache.spark.SparkFunSuite
2121
import org.apache.spark.sql.catalyst.analysis.UnresolvedExtractValue
2222
import org.apache.spark.sql.catalyst.dsl.expressions._
23+
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
2324
import org.apache.spark.sql.types._
2425
import org.apache.spark.unsafe.types.UTF8String
2526

@@ -299,4 +300,10 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
299300
new StringToMap(Literal("a=1_b=2_c=3"), Literal("_"), NonFoldableLiteral("="))
300301
.checkInputDataTypes().isFailure)
301302
}
303+
304+
test("SPARK-22693: CreateNamedStruct should not use global variables") {
305+
val ctx = new CodegenContext
306+
CreateNamedStruct(Seq("a", "x", "b", 2.0)).genCode(ctx)
307+
assert(ctx.mutableStates.isEmpty)
308+
}
302309
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import org.apache.spark.SparkFunSuite
2525
import org.apache.spark.sql.RandomDataGenerator
2626
import org.apache.spark.sql.catalyst.InternalRow
2727
import org.apache.spark.sql.catalyst.encoders.ExamplePointUDT
28+
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
2829
import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData}
2930
import org.apache.spark.sql.types._
3031

@@ -429,4 +430,10 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
429430
val infinity = Literal(Double.PositiveInfinity)
430431
checkEvaluation(EqualTo(infinity, infinity), true)
431432
}
433+
434+
test("SPARK-22693: InSet should not use global variables") {
435+
val ctx = new CodegenContext
436+
InSet(Literal(1), Set(1, 2, 3, 4)).genCode(ctx)
437+
assert(ctx.mutableStates.isEmpty)
438+
}
432439
}

0 commit comments

Comments
 (0)