Skip to content

Commit fc29446

Browse files
mgaido91cloud-fan
authored andcommitted
[SPARK-22699][SQL] GenerateSafeProjection should not use global variables for struct
## What changes were proposed in this pull request? GenerateSafeProjection is defining a mutable state for each struct, which is not needed. This is bad for the well known issues related to constant pool limits. The PR replace the global variable with a local one. ## How was this patch tested? added UT Author: Marco Gaido <[email protected]> Closes #19914 from mgaido91/SPARK-22699.
1 parent dd59a4b commit fc29446

File tree

2 files changed

+19
-10
lines changed

2 files changed

+19
-10
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,6 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
4949
val tmpInput = ctx.freshName("tmpInput")
5050
val output = ctx.freshName("safeRow")
5151
val values = ctx.freshName("values")
52-
// These expressions could be split into multiple functions
53-
ctx.addMutableState("Object[]", values, s"$values = null;")
5452

5553
val rowClass = classOf[GenericInternalRow].getName
5654

@@ -66,15 +64,15 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
6664
val allFields = ctx.splitExpressions(
6765
expressions = fieldWriters,
6866
funcName = "writeFields",
69-
arguments = Seq("InternalRow" -> tmpInput)
67+
arguments = Seq("InternalRow" -> tmpInput, "Object[]" -> values)
7068
)
71-
val code = s"""
72-
final InternalRow $tmpInput = $input;
73-
$values = new Object[${schema.length}];
74-
$allFields
75-
final InternalRow $output = new $rowClass($values);
76-
$values = null;
77-
"""
69+
val code =
70+
s"""
71+
|final InternalRow $tmpInput = $input;
72+
|final Object[] $values = new Object[${schema.length}];
73+
|$allFields
74+
|final InternalRow $output = new $rowClass($values);
75+
""".stripMargin
7876

7977
ExprCode(code, "false", output)
8078
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,4 +208,15 @@ class GeneratedProjectionSuite extends SparkFunSuite {
208208
unsafeProj.apply(InternalRow(InternalRow(UTF8String.fromString("b"))))
209209
assert(row.getStruct(0, 1).getString(0).toString == "a")
210210
}
211+
212+
test("SPARK-22699: GenerateSafeProjection should not use global variables for struct") {
213+
val safeProj = GenerateSafeProjection.generate(
214+
Seq(BoundReference(0, new StructType().add("i", IntegerType), true)))
215+
val globalVariables = safeProj.getClass.getDeclaredFields
216+
// We need always 3 variables:
217+
// - one is a reference to this
218+
// - one is the references object
219+
// - one is the mutableRow
220+
assert(globalVariables.length == 3)
221+
}
211222
}

0 commit comments

Comments
 (0)