Skip to content

Commit b790719

Browse files
mgaido91cloud-fan
authored andcommitted
[SPARK-22696][SQL] objects functions should not use unneeded global variables
## What changes were proposed in this pull request? Some objects functions are using global variables which are not needed. This can generate some unneeded entries in the constant pool. The PR replaces the unneeded global variables with local variables. ## How was this patch tested? added UTs Author: Marco Gaido <[email protected]> Author: Marco Gaido <[email protected]> Closes #19908 from mgaido91/SPARK-22696.
1 parent fc29446 commit b790719

File tree

2 files changed

+49
-28
lines changed

2 files changed

+49
-28
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala

Lines changed: 34 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1106,27 +1106,31 @@ case class CreateExternalRow(children: Seq[Expression], schema: StructType)
11061106
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
11071107
val rowClass = classOf[GenericRowWithSchema].getName
11081108
val values = ctx.freshName("values")
1109-
ctx.addMutableState("Object[]", values)
11101109

11111110
val childrenCodes = children.zipWithIndex.map { case (e, i) =>
11121111
val eval = e.genCode(ctx)
1113-
eval.code + s"""
1114-
if (${eval.isNull}) {
1115-
$values[$i] = null;
1116-
} else {
1117-
$values[$i] = ${eval.value};
1118-
}
1119-
"""
1112+
s"""
1113+
|${eval.code}
1114+
|if (${eval.isNull}) {
1115+
| $values[$i] = null;
1116+
|} else {
1117+
| $values[$i] = ${eval.value};
1118+
|}
1119+
""".stripMargin
11201120
}
11211121

1122-
val childrenCode = ctx.splitExpressionsWithCurrentInputs(childrenCodes)
1123-
val schemaField = ctx.addReferenceObj("schema", schema)
1122+
val childrenCode = ctx.splitExpressionsWithCurrentInputs(
1123+
expressions = childrenCodes,
1124+
funcName = "createExternalRow",
1125+
extraArguments = "Object[]" -> values :: Nil)
1126+
val schemaField = ctx.addReferenceMinorObj(schema)
11241127

1125-
val code = s"""
1126-
$values = new Object[${children.size}];
1127-
$childrenCode
1128-
final ${classOf[Row].getName} ${ev.value} = new $rowClass($values, $schemaField);
1129-
"""
1128+
val code =
1129+
s"""
1130+
|Object[] $values = new Object[${children.size}];
1131+
|$childrenCode
1132+
|final ${classOf[Row].getName} ${ev.value} = new $rowClass($values, $schemaField);
1133+
""".stripMargin
11301134
ev.copy(code = code, isNull = "false")
11311135
}
11321136
}
@@ -1244,25 +1248,28 @@ case class InitializeJavaBean(beanInstance: Expression, setters: Map[String, Exp
12441248

12451249
val javaBeanInstance = ctx.freshName("javaBean")
12461250
val beanInstanceJavaType = ctx.javaType(beanInstance.dataType)
1247-
ctx.addMutableState(beanInstanceJavaType, javaBeanInstance)
12481251

12491252
val initialize = setters.map {
12501253
case (setterMethod, fieldValue) =>
12511254
val fieldGen = fieldValue.genCode(ctx)
12521255
s"""
1253-
${fieldGen.code}
1254-
${javaBeanInstance}.$setterMethod(${fieldGen.value});
1255-
"""
1256+
|${fieldGen.code}
1257+
|$javaBeanInstance.$setterMethod(${fieldGen.value});
1258+
""".stripMargin
12561259
}
1257-
val initializeCode = ctx.splitExpressionsWithCurrentInputs(initialize.toSeq)
1260+
val initializeCode = ctx.splitExpressionsWithCurrentInputs(
1261+
expressions = initialize.toSeq,
1262+
funcName = "initializeJavaBean",
1263+
extraArguments = beanInstanceJavaType -> javaBeanInstance :: Nil)
12581264

1259-
val code = s"""
1260-
${instanceGen.code}
1261-
${javaBeanInstance} = ${instanceGen.value};
1262-
if (!${instanceGen.isNull}) {
1263-
$initializeCode
1264-
}
1265-
"""
1265+
val code =
1266+
s"""
1267+
|${instanceGen.code}
1268+
|$beanInstanceJavaType $javaBeanInstance = ${instanceGen.value};
1269+
|if (!${instanceGen.isNull}) {
1270+
| $initializeCode
1271+
|}
1272+
""".stripMargin
12661273
ev.copy(code = code, isNull = instanceGen.isNull, value = instanceGen.value)
12671274
}
12681275
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import org.apache.spark.sql.Row
2525
import org.apache.spark.sql.catalyst.InternalRow
2626
import org.apache.spark.sql.catalyst.dsl.expressions._
2727
import org.apache.spark.sql.catalyst.expressions.codegen._
28-
import org.apache.spark.sql.catalyst.expressions.objects.{AssertNotNull, CreateExternalRow, GetExternalRowField, ValidateExternalType}
28+
import org.apache.spark.sql.catalyst.expressions.objects._
2929
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils}
3030
import org.apache.spark.sql.types._
3131
import org.apache.spark.unsafe.types.UTF8String
@@ -380,4 +380,18 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
380380
s"Incorrect Evaluation: expressions: $exprAnd, actual: $actualAnd, expected: $expectedAnd")
381381
}
382382
}
383+
384+
test("SPARK-22696: CreateExternalRow should not use global variables") {
385+
val ctx = new CodegenContext
386+
val schema = new StructType().add("a", IntegerType).add("b", StringType)
387+
CreateExternalRow(Seq(Literal(1), Literal("x")), schema).genCode(ctx)
388+
assert(ctx.mutableStates.isEmpty)
389+
}
390+
391+
test("SPARK-22696: InitializeJavaBean should not use global variables") {
392+
val ctx = new CodegenContext
393+
InitializeJavaBean(Literal.fromObject(new java.util.LinkedList[Int]),
394+
Map("add" -> Literal(1))).genCode(ctx)
395+
assert(ctx.mutableStates.isEmpty)
396+
}
383397
}

0 commit comments

Comments
 (0)