Skip to content
This repository was archived by the owner on Jan 9, 2020. It is now read-only.

Commit 198e3a0

Browse files
ALeksander Eskilsoncloud-fan
authored andcommitted
[SPARK-18016][SQL][CATALYST][BRANCH-2.2] Code Generation: Constant Pool Limit - Class Splitting
## What changes were proposed in this pull request? This is a backport patch for Spark 2.2.x of the class splitting feature over excess generated code as was merged in apache#18075. ## How was this patch tested? The same test provided in apache#18075 is included in this patch. Author: ALeksander Eskilson <[email protected]> Closes apache#18377 from bdrillard/class_splitting_2.2.
1 parent 529c04f commit 198e3a0

21 files changed

+248
-79
lines changed

sql/catalyst/pom.xml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,13 @@
131131
</execution>
132132
</executions>
133133
</plugin>
134+
<plugin>
135+
<groupId>org.scalatest</groupId>
136+
<artifactId>scalatest-maven-plugin</artifactId>
137+
<configuration>
138+
<argLine>-Xmx4g -Xss4096k -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine>
139+
</configuration>
140+
</plugin>
134141
<plugin>
135142
<groupId>org.antlr</groupId>
136143
<artifactId>antlr4-maven-plugin</artifactId>

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -988,7 +988,7 @@ case class ScalaUDF(
988988
val converterTerm = ctx.freshName("converter")
989989
val expressionIdx = ctx.references.size - 1
990990
ctx.addMutableState(converterClassName, converterTerm,
991-
s"this.$converterTerm = ($converterClassName)$typeConvertersClassName" +
991+
s"$converterTerm = ($converterClassName)$typeConvertersClassName" +
992992
s".createToScalaConverter(((${expressionClassName})((($scalaUDFClassName)" +
993993
s"references[$expressionIdx]).getChildren().apply($index))).dataType());")
994994
converterTerm
@@ -1005,7 +1005,7 @@ case class ScalaUDF(
10051005
// Generate codes used to convert the returned value of user-defined functions to Catalyst type
10061006
val catalystConverterTerm = ctx.freshName("catalystConverter")
10071007
ctx.addMutableState(converterClassName, catalystConverterTerm,
1008-
s"this.$catalystConverterTerm = ($converterClassName)$typeConvertersClassName" +
1008+
s"$catalystConverterTerm = ($converterClassName)$typeConvertersClassName" +
10091009
s".createToCatalystConverter($scalaUDF.dataType());")
10101010

10111011
val resultTerm = ctx.freshName("result")
@@ -1019,7 +1019,7 @@ case class ScalaUDF(
10191019

10201020
val funcTerm = ctx.freshName("udf")
10211021
ctx.addMutableState(funcClassName, funcTerm,
1022-
s"this.$funcTerm = ($funcClassName)$scalaUDF.userDefinedFunc();")
1022+
s"$funcTerm = ($funcClassName)$scalaUDF.userDefinedFunc();")
10231023

10241024
// codegen for children expressions
10251025
val evals = children.map(_.genCode(ctx))

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala

Lines changed: 114 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ class CodegenContext {
113113
val idx = references.length
114114
references += obj
115115
val clsName = Option(className).getOrElse(obj.getClass.getName)
116-
addMutableState(clsName, term, s"this.$term = ($clsName) references[$idx];")
116+
addMutableState(clsName, term, s"$term = ($clsName) references[$idx];")
117117
term
118118
}
119119

@@ -202,16 +202,6 @@ class CodegenContext {
202202
partitionInitializationStatements.mkString("\n")
203203
}
204204

205-
/**
206-
* Holding all the functions those will be added into generated class.
207-
*/
208-
val addedFunctions: mutable.Map[String, String] =
209-
mutable.Map.empty[String, String]
210-
211-
def addNewFunction(funcName: String, funcCode: String): Unit = {
212-
addedFunctions += ((funcName, funcCode))
213-
}
214-
215205
/**
216206
* Holds expressions that are equivalent. Used to perform subexpression elimination
217207
* during codegen.
@@ -233,10 +223,118 @@ class CodegenContext {
233223
// The collection of sub-expression result resetting methods that need to be called on each row.
234224
val subexprFunctions = mutable.ArrayBuffer.empty[String]
235225

236-
def declareAddedFunctions(): String = {
237-
addedFunctions.map { case (funcName, funcCode) => funcCode }.mkString("\n")
226+
private val outerClassName = "OuterClass"
227+
228+
/**
229+
* Holds the class and instance names to be generated, where `OuterClass` is a placeholder
230+
* standing for whichever class is generated as the outermost class and which will contain any
231+
* nested sub-classes. All other classes and instance names in this list will represent private,
232+
* nested sub-classes.
233+
*/
234+
private val classes: mutable.ListBuffer[(String, String)] =
235+
mutable.ListBuffer[(String, String)](outerClassName -> null)
236+
237+
// A map holding the current size in bytes of each class to be generated.
238+
private val classSize: mutable.Map[String, Int] =
239+
mutable.Map[String, Int](outerClassName -> 0)
240+
241+
// Nested maps holding function names and their code belonging to each class.
242+
private val classFunctions: mutable.Map[String, mutable.Map[String, String]] =
243+
mutable.Map(outerClassName -> mutable.Map.empty[String, String])
244+
245+
// Returns the size of the most recently added class.
246+
private def currClassSize(): Int = classSize(classes.head._1)
247+
248+
// Returns the class name and instance name for the most recently added class.
249+
private def currClass(): (String, String) = classes.head
250+
251+
// Adds a new class. Requires the class' name, and its instance name.
252+
private def addClass(className: String, classInstance: String): Unit = {
253+
classes.prepend(className -> classInstance)
254+
classSize += className -> 0
255+
classFunctions += className -> mutable.Map.empty[String, String]
238256
}
239257

258+
/**
259+
* Adds a function to the generated class. If the code for the `OuterClass` grows too large, the
260+
* function will be inlined into a new private, nested class, and a instance-qualified name for
261+
* the function will be returned. Otherwise, the function will be inined to the `OuterClass` the
262+
* simple `funcName` will be returned.
263+
*
264+
* @param funcName the class-unqualified name of the function
265+
* @param funcCode the body of the function
266+
* @param inlineToOuterClass whether the given code must be inlined to the `OuterClass`. This
267+
* can be necessary when a function is declared outside of the context
268+
* it is eventually referenced and a returned qualified function name
269+
* cannot otherwise be accessed.
270+
* @return the name of the function, qualified by class if it will be inlined to a private,
271+
* nested sub-class
272+
*/
273+
def addNewFunction(
274+
funcName: String,
275+
funcCode: String,
276+
inlineToOuterClass: Boolean = false): String = {
277+
// The number of named constants that can exist in the class is limited by the Constant Pool
278+
// limit, 65,536. We cannot know how many constants will be inserted for a class, so we use a
279+
// threshold of 1600k bytes to determine when a function should be inlined to a private, nested
280+
// sub-class.
281+
val (className, classInstance) = if (inlineToOuterClass) {
282+
outerClassName -> ""
283+
} else if (currClassSize > 1600000) {
284+
val className = freshName("NestedClass")
285+
val classInstance = freshName("nestedClassInstance")
286+
287+
addClass(className, classInstance)
288+
289+
className -> classInstance
290+
} else {
291+
currClass()
292+
}
293+
294+
classSize(className) += funcCode.length
295+
classFunctions(className) += funcName -> funcCode
296+
297+
if (className == outerClassName) {
298+
funcName
299+
} else {
300+
301+
s"$classInstance.$funcName"
302+
}
303+
}
304+
305+
/**
306+
* Instantiates all nested, private sub-classes as objects to the `OuterClass`
307+
*/
308+
private[sql] def initNestedClasses(): String = {
309+
// Nested, private sub-classes have no mutable state (though they do reference the outer class'
310+
// mutable state), so we declare and initialize them inline to the OuterClass.
311+
classes.filter(_._1 != outerClassName).map {
312+
case (className, classInstance) =>
313+
s"private $className $classInstance = new $className();"
314+
}.mkString("\n")
315+
}
316+
317+
/**
318+
* Declares all function code that should be inlined to the `OuterClass`.
319+
*/
320+
private[sql] def declareAddedFunctions(): String = {
321+
classFunctions(outerClassName).values.mkString("\n")
322+
}
323+
324+
/**
325+
* Declares all nested, private sub-classes and the function code that should be inlined to them.
326+
*/
327+
private[sql] def declareNestedClasses(): String = {
328+
classFunctions.filterKeys(_ != outerClassName).map {
329+
case (className, functions) =>
330+
s"""
331+
|private class $className {
332+
| ${functions.values.mkString("\n")}
333+
|}
334+
""".stripMargin
335+
}
336+
}.mkString("\n")
337+
240338
final val JAVA_BOOLEAN = "boolean"
241339
final val JAVA_BYTE = "byte"
242340
final val JAVA_SHORT = "short"
@@ -556,8 +654,7 @@ class CodegenContext {
556654
return 0;
557655
}
558656
"""
559-
addNewFunction(compareFunc, funcCode)
560-
s"this.$compareFunc($c1, $c2)"
657+
s"${addNewFunction(compareFunc, funcCode)}($c1, $c2)"
561658
case schema: StructType =>
562659
val comparisons = GenerateOrdering.genComparisons(this, schema)
563660
val compareFunc = freshName("compareStruct")
@@ -573,8 +670,7 @@ class CodegenContext {
573670
return 0;
574671
}
575672
"""
576-
addNewFunction(compareFunc, funcCode)
577-
s"this.$compareFunc($c1, $c2)"
673+
s"${addNewFunction(compareFunc, funcCode)}($c1, $c2)"
578674
case other if other.isInstanceOf[AtomicType] => s"$c1.compare($c2)"
579675
case udt: UserDefinedType[_] => genComp(udt.sqlType, c1, c2)
580676
case _ =>
@@ -689,7 +785,6 @@ class CodegenContext {
689785
|}
690786
""".stripMargin
691787
addNewFunction(name, code)
692-
name
693788
}
694789

695790
foldFunctions(functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")})"))
@@ -773,8 +868,6 @@ class CodegenContext {
773868
|}
774869
""".stripMargin
775870

776-
addNewFunction(fnName, fn)
777-
778871
// Add a state and a mapping of the common subexpressions that are associate with this
779872
// state. Adding this expression to subExprEliminationExprMap means it will call `fn`
780873
// when it is code generated. This decision should be a cost based one.
@@ -792,7 +885,7 @@ class CodegenContext {
792885
addMutableState(javaType(expr.dataType), value,
793886
s"$value = ${defaultValue(expr.dataType)};")
794887

795-
subexprFunctions += s"$fnName($INPUT_ROW);"
888+
subexprFunctions += s"${addNewFunction(fnName, fn)}($INPUT_ROW);"
796889
val state = SubExprEliminationState(isNull, value)
797890
e.foreach(subExprEliminationExprs.put(_, state))
798891
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,21 +63,21 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
6363
if (e.nullable) {
6464
val isNull = s"isNull_$i"
6565
val value = s"value_$i"
66-
ctx.addMutableState("boolean", isNull, s"this.$isNull = true;")
66+
ctx.addMutableState("boolean", isNull, s"$isNull = true;")
6767
ctx.addMutableState(ctx.javaType(e.dataType), value,
68-
s"this.$value = ${ctx.defaultValue(e.dataType)};")
68+
s"$value = ${ctx.defaultValue(e.dataType)};")
6969
s"""
7070
${ev.code}
71-
this.$isNull = ${ev.isNull};
72-
this.$value = ${ev.value};
71+
$isNull = ${ev.isNull};
72+
$value = ${ev.value};
7373
"""
7474
} else {
7575
val value = s"value_$i"
7676
ctx.addMutableState(ctx.javaType(e.dataType), value,
77-
s"this.$value = ${ctx.defaultValue(e.dataType)};")
77+
s"$value = ${ctx.defaultValue(e.dataType)};")
7878
s"""
7979
${ev.code}
80-
this.$value = ${ev.value};
80+
$value = ${ev.value};
8181
"""
8282
}
8383
}
@@ -87,7 +87,7 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
8787

8888
val updates = validExpr.zip(index).map {
8989
case (e, i) =>
90-
val ev = ExprCode("", s"this.isNull_$i", s"this.value_$i")
90+
val ev = ExprCode("", s"isNull_$i", s"value_$i")
9191
ctx.updateColumn("mutableRow", e.dataType, i, ev, e.nullable)
9292
}
9393

@@ -135,6 +135,9 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
135135
$allUpdates
136136
return mutableRow;
137137
}
138+
139+
${ctx.initNestedClasses()}
140+
${ctx.declareNestedClasses()}
138141
}
139142
"""
140143

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,9 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
179179
$comparisons
180180
return 0;
181181
}
182+
183+
${ctx.initNestedClasses()}
184+
${ctx.declareNestedClasses()}
182185
}"""
183186

184187
val code = CodeFormatter.stripOverlappingComments(

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@ object GeneratePredicate extends CodeGenerator[Expression, Predicate] {
7272
${eval.code}
7373
return !${eval.isNull} && ${eval.value};
7474
}
75+
76+
${ctx.initNestedClasses()}
77+
${ctx.declareNestedClasses()}
7578
}"""
7679

7780
val code = CodeFormatter.stripOverlappingComments(

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
4949
val output = ctx.freshName("safeRow")
5050
val values = ctx.freshName("values")
5151
// These expressions could be split into multiple functions
52-
ctx.addMutableState("Object[]", values, s"this.$values = null;")
52+
ctx.addMutableState("Object[]", values, s"$values = null;")
5353

5454
val rowClass = classOf[GenericInternalRow].getName
5555

@@ -65,10 +65,10 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
6565
val allFields = ctx.splitExpressions(tmp, fieldWriters)
6666
val code = s"""
6767
final InternalRow $tmp = $input;
68-
this.$values = new Object[${schema.length}];
68+
$values = new Object[${schema.length}];
6969
$allFields
7070
final InternalRow $output = new $rowClass($values);
71-
this.$values = null;
71+
$values = null;
7272
"""
7373

7474
ExprCode(code, "false", output)
@@ -184,6 +184,9 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
184184
$allExpressions
185185
return mutableRow;
186186
}
187+
188+
${ctx.initNestedClasses()}
189+
${ctx.declareNestedClasses()}
187190
}
188191
"""
189192

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
8282
val rowWriterClass = classOf[UnsafeRowWriter].getName
8383
val rowWriter = ctx.freshName("rowWriter")
8484
ctx.addMutableState(rowWriterClass, rowWriter,
85-
s"this.$rowWriter = new $rowWriterClass($bufferHolder, ${inputs.length});")
85+
s"$rowWriter = new $rowWriterClass($bufferHolder, ${inputs.length});")
8686

8787
val resetWriter = if (isTopLevel) {
8888
// For top level row writer, it always writes to the beginning of the global buffer holder,
@@ -182,7 +182,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
182182
val arrayWriterClass = classOf[UnsafeArrayWriter].getName
183183
val arrayWriter = ctx.freshName("arrayWriter")
184184
ctx.addMutableState(arrayWriterClass, arrayWriter,
185-
s"this.$arrayWriter = new $arrayWriterClass();")
185+
s"$arrayWriter = new $arrayWriterClass();")
186186
val numElements = ctx.freshName("numElements")
187187
val index = ctx.freshName("index")
188188
val element = ctx.freshName("element")
@@ -321,7 +321,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
321321
val holder = ctx.freshName("holder")
322322
val holderClass = classOf[BufferHolder].getName
323323
ctx.addMutableState(holderClass, holder,
324-
s"this.$holder = new $holderClass($result, ${numVarLenFields * 32});")
324+
s"$holder = new $holderClass($result, ${numVarLenFields * 32});")
325325

326326
val resetBufferHolder = if (numVarLenFields == 0) {
327327
""
@@ -402,6 +402,9 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
402402
${eval.code.trim}
403403
return ${eval.value};
404404
}
405+
406+
${ctx.initNestedClasses()}
407+
${ctx.declareNestedClasses()}
405408
}
406409
"""
407410

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ private [sql] object GenArrayData {
9393
if (!ctx.isPrimitiveType(elementType)) {
9494
val genericArrayClass = classOf[GenericArrayData].getName
9595
ctx.addMutableState("Object[]", arrayName,
96-
s"this.$arrayName = new Object[${numElements}];")
96+
s"$arrayName = new Object[${numElements}];")
9797

9898
val assignments = elementsCode.zipWithIndex.map { case (eval, i) =>
9999
val isNullAssignment = if (!isMapKey) {
@@ -340,7 +340,7 @@ case class CreateNamedStruct(children: Seq[Expression]) extends CreateNamedStruc
340340
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
341341
val rowClass = classOf[GenericInternalRow].getName
342342
val values = ctx.freshName("values")
343-
ctx.addMutableState("Object[]", values, s"this.$values = null;")
343+
ctx.addMutableState("Object[]", values, s"$values = null;")
344344

345345
ev.copy(code = s"""
346346
$values = new Object[${valExprs.size}];""" +
@@ -357,7 +357,7 @@ case class CreateNamedStruct(children: Seq[Expression]) extends CreateNamedStruc
357357
}) +
358358
s"""
359359
final InternalRow ${ev.value} = new $rowClass($values);
360-
this.$values = null;
360+
$values = null;
361361
""", isNull = "false")
362362
}
363363

0 commit comments

Comments
 (0)