Skip to content
This repository was archived by the owner on Jan 9, 2020. It is now read-only.

Commit b99c0e9

Browse files
committed
Revert "[SPARK-18016][SQL][CATALYST][BRANCH-2.2] Code Generation: Constant Pool Limit - Class Splitting"
This reverts commit 198e3a0.
1 parent d625734 commit b99c0e9

21 files changed

+79
-248
lines changed

sql/catalyst/pom.xml

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -131,13 +131,6 @@
131131
</execution>
132132
</executions>
133133
</plugin>
134-
<plugin>
135-
<groupId>org.scalatest</groupId>
136-
<artifactId>scalatest-maven-plugin</artifactId>
137-
<configuration>
138-
<argLine>-Xmx4g -Xss4096k -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine>
139-
</configuration>
140-
</plugin>
141134
<plugin>
142135
<groupId>org.antlr</groupId>
143136
<artifactId>antlr4-maven-plugin</artifactId>

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -988,7 +988,7 @@ case class ScalaUDF(
988988
val converterTerm = ctx.freshName("converter")
989989
val expressionIdx = ctx.references.size - 1
990990
ctx.addMutableState(converterClassName, converterTerm,
991-
s"$converterTerm = ($converterClassName)$typeConvertersClassName" +
991+
s"this.$converterTerm = ($converterClassName)$typeConvertersClassName" +
992992
s".createToScalaConverter(((${expressionClassName})((($scalaUDFClassName)" +
993993
s"references[$expressionIdx]).getChildren().apply($index))).dataType());")
994994
converterTerm
@@ -1005,7 +1005,7 @@ case class ScalaUDF(
10051005
// Generate codes used to convert the returned value of user-defined functions to Catalyst type
10061006
val catalystConverterTerm = ctx.freshName("catalystConverter")
10071007
ctx.addMutableState(converterClassName, catalystConverterTerm,
1008-
s"$catalystConverterTerm = ($converterClassName)$typeConvertersClassName" +
1008+
s"this.$catalystConverterTerm = ($converterClassName)$typeConvertersClassName" +
10091009
s".createToCatalystConverter($scalaUDF.dataType());")
10101010

10111011
val resultTerm = ctx.freshName("result")
@@ -1019,7 +1019,7 @@ case class ScalaUDF(
10191019

10201020
val funcTerm = ctx.freshName("udf")
10211021
ctx.addMutableState(funcClassName, funcTerm,
1022-
s"$funcTerm = ($funcClassName)$scalaUDF.userDefinedFunc();")
1022+
s"this.$funcTerm = ($funcClassName)$scalaUDF.userDefinedFunc();")
10231023

10241024
// codegen for children expressions
10251025
val evals = children.map(_.genCode(ctx))

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala

Lines changed: 21 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ class CodegenContext {
113113
val idx = references.length
114114
references += obj
115115
val clsName = Option(className).getOrElse(obj.getClass.getName)
116-
addMutableState(clsName, term, s"$term = ($clsName) references[$idx];")
116+
addMutableState(clsName, term, s"this.$term = ($clsName) references[$idx];")
117117
term
118118
}
119119

@@ -202,6 +202,16 @@ class CodegenContext {
202202
partitionInitializationStatements.mkString("\n")
203203
}
204204

205+
/**
206+
* Holding all the functions those will be added into generated class.
207+
*/
208+
val addedFunctions: mutable.Map[String, String] =
209+
mutable.Map.empty[String, String]
210+
211+
def addNewFunction(funcName: String, funcCode: String): Unit = {
212+
addedFunctions += ((funcName, funcCode))
213+
}
214+
205215
/**
206216
* Holds expressions that are equivalent. Used to perform subexpression elimination
207217
* during codegen.
@@ -223,118 +233,10 @@ class CodegenContext {
223233
// The collection of sub-expression result resetting methods that need to be called on each row.
224234
val subexprFunctions = mutable.ArrayBuffer.empty[String]
225235

226-
private val outerClassName = "OuterClass"
227-
228-
/**
229-
* Holds the class and instance names to be generated, where `OuterClass` is a placeholder
230-
* standing for whichever class is generated as the outermost class and which will contain any
231-
* nested sub-classes. All other classes and instance names in this list will represent private,
232-
* nested sub-classes.
233-
*/
234-
private val classes: mutable.ListBuffer[(String, String)] =
235-
mutable.ListBuffer[(String, String)](outerClassName -> null)
236-
237-
// A map holding the current size in bytes of each class to be generated.
238-
private val classSize: mutable.Map[String, Int] =
239-
mutable.Map[String, Int](outerClassName -> 0)
240-
241-
// Nested maps holding function names and their code belonging to each class.
242-
private val classFunctions: mutable.Map[String, mutable.Map[String, String]] =
243-
mutable.Map(outerClassName -> mutable.Map.empty[String, String])
244-
245-
// Returns the size of the most recently added class.
246-
private def currClassSize(): Int = classSize(classes.head._1)
247-
248-
// Returns the class name and instance name for the most recently added class.
249-
private def currClass(): (String, String) = classes.head
250-
251-
// Adds a new class. Requires the class' name, and its instance name.
252-
private def addClass(className: String, classInstance: String): Unit = {
253-
classes.prepend(className -> classInstance)
254-
classSize += className -> 0
255-
classFunctions += className -> mutable.Map.empty[String, String]
236+
def declareAddedFunctions(): String = {
237+
addedFunctions.map { case (funcName, funcCode) => funcCode }.mkString("\n")
256238
}
257239

258-
/**
259-
* Adds a function to the generated class. If the code for the `OuterClass` grows too large, the
260-
* function will be inlined into a new private, nested class, and a instance-qualified name for
261-
* the function will be returned. Otherwise, the function will be inined to the `OuterClass` the
262-
* simple `funcName` will be returned.
263-
*
264-
* @param funcName the class-unqualified name of the function
265-
* @param funcCode the body of the function
266-
* @param inlineToOuterClass whether the given code must be inlined to the `OuterClass`. This
267-
* can be necessary when a function is declared outside of the context
268-
* it is eventually referenced and a returned qualified function name
269-
* cannot otherwise be accessed.
270-
* @return the name of the function, qualified by class if it will be inlined to a private,
271-
* nested sub-class
272-
*/
273-
def addNewFunction(
274-
funcName: String,
275-
funcCode: String,
276-
inlineToOuterClass: Boolean = false): String = {
277-
// The number of named constants that can exist in the class is limited by the Constant Pool
278-
// limit, 65,536. We cannot know how many constants will be inserted for a class, so we use a
279-
// threshold of 1600k bytes to determine when a function should be inlined to a private, nested
280-
// sub-class.
281-
val (className, classInstance) = if (inlineToOuterClass) {
282-
outerClassName -> ""
283-
} else if (currClassSize > 1600000) {
284-
val className = freshName("NestedClass")
285-
val classInstance = freshName("nestedClassInstance")
286-
287-
addClass(className, classInstance)
288-
289-
className -> classInstance
290-
} else {
291-
currClass()
292-
}
293-
294-
classSize(className) += funcCode.length
295-
classFunctions(className) += funcName -> funcCode
296-
297-
if (className == outerClassName) {
298-
funcName
299-
} else {
300-
301-
s"$classInstance.$funcName"
302-
}
303-
}
304-
305-
/**
306-
* Instantiates all nested, private sub-classes as objects to the `OuterClass`
307-
*/
308-
private[sql] def initNestedClasses(): String = {
309-
// Nested, private sub-classes have no mutable state (though they do reference the outer class'
310-
// mutable state), so we declare and initialize them inline to the OuterClass.
311-
classes.filter(_._1 != outerClassName).map {
312-
case (className, classInstance) =>
313-
s"private $className $classInstance = new $className();"
314-
}.mkString("\n")
315-
}
316-
317-
/**
318-
* Declares all function code that should be inlined to the `OuterClass`.
319-
*/
320-
private[sql] def declareAddedFunctions(): String = {
321-
classFunctions(outerClassName).values.mkString("\n")
322-
}
323-
324-
/**
325-
* Declares all nested, private sub-classes and the function code that should be inlined to them.
326-
*/
327-
private[sql] def declareNestedClasses(): String = {
328-
classFunctions.filterKeys(_ != outerClassName).map {
329-
case (className, functions) =>
330-
s"""
331-
|private class $className {
332-
| ${functions.values.mkString("\n")}
333-
|}
334-
""".stripMargin
335-
}
336-
}.mkString("\n")
337-
338240
final val JAVA_BOOLEAN = "boolean"
339241
final val JAVA_BYTE = "byte"
340242
final val JAVA_SHORT = "short"
@@ -654,7 +556,8 @@ class CodegenContext {
654556
return 0;
655557
}
656558
"""
657-
s"${addNewFunction(compareFunc, funcCode)}($c1, $c2)"
559+
addNewFunction(compareFunc, funcCode)
560+
s"this.$compareFunc($c1, $c2)"
658561
case schema: StructType =>
659562
val comparisons = GenerateOrdering.genComparisons(this, schema)
660563
val compareFunc = freshName("compareStruct")
@@ -670,7 +573,8 @@ class CodegenContext {
670573
return 0;
671574
}
672575
"""
673-
s"${addNewFunction(compareFunc, funcCode)}($c1, $c2)"
576+
addNewFunction(compareFunc, funcCode)
577+
s"this.$compareFunc($c1, $c2)"
674578
case other if other.isInstanceOf[AtomicType] => s"$c1.compare($c2)"
675579
case udt: UserDefinedType[_] => genComp(udt.sqlType, c1, c2)
676580
case _ =>
@@ -785,6 +689,7 @@ class CodegenContext {
785689
|}
786690
""".stripMargin
787691
addNewFunction(name, code)
692+
name
788693
}
789694

790695
foldFunctions(functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")})"))
@@ -868,6 +773,8 @@ class CodegenContext {
868773
|}
869774
""".stripMargin
870775

776+
addNewFunction(fnName, fn)
777+
871778
// Add a state and a mapping of the common subexpressions that are associate with this
872779
// state. Adding this expression to subExprEliminationExprMap means it will call `fn`
873780
// when it is code generated. This decision should be a cost based one.
@@ -885,7 +792,7 @@ class CodegenContext {
885792
addMutableState(javaType(expr.dataType), value,
886793
s"$value = ${defaultValue(expr.dataType)};")
887794

888-
subexprFunctions += s"${addNewFunction(fnName, fn)}($INPUT_ROW);"
795+
subexprFunctions += s"$fnName($INPUT_ROW);"
889796
val state = SubExprEliminationState(isNull, value)
890797
e.foreach(subExprEliminationExprs.put(_, state))
891798
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -63,21 +63,21 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
6363
if (e.nullable) {
6464
val isNull = s"isNull_$i"
6565
val value = s"value_$i"
66-
ctx.addMutableState("boolean", isNull, s"$isNull = true;")
66+
ctx.addMutableState("boolean", isNull, s"this.$isNull = true;")
6767
ctx.addMutableState(ctx.javaType(e.dataType), value,
68-
s"$value = ${ctx.defaultValue(e.dataType)};")
68+
s"this.$value = ${ctx.defaultValue(e.dataType)};")
6969
s"""
7070
${ev.code}
71-
$isNull = ${ev.isNull};
72-
$value = ${ev.value};
71+
this.$isNull = ${ev.isNull};
72+
this.$value = ${ev.value};
7373
"""
7474
} else {
7575
val value = s"value_$i"
7676
ctx.addMutableState(ctx.javaType(e.dataType), value,
77-
s"$value = ${ctx.defaultValue(e.dataType)};")
77+
s"this.$value = ${ctx.defaultValue(e.dataType)};")
7878
s"""
7979
${ev.code}
80-
$value = ${ev.value};
80+
this.$value = ${ev.value};
8181
"""
8282
}
8383
}
@@ -87,7 +87,7 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
8787

8888
val updates = validExpr.zip(index).map {
8989
case (e, i) =>
90-
val ev = ExprCode("", s"isNull_$i", s"value_$i")
90+
val ev = ExprCode("", s"this.isNull_$i", s"this.value_$i")
9191
ctx.updateColumn("mutableRow", e.dataType, i, ev, e.nullable)
9292
}
9393

@@ -135,9 +135,6 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
135135
$allUpdates
136136
return mutableRow;
137137
}
138-
139-
${ctx.initNestedClasses()}
140-
${ctx.declareNestedClasses()}
141138
}
142139
"""
143140

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -179,9 +179,6 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
179179
$comparisons
180180
return 0;
181181
}
182-
183-
${ctx.initNestedClasses()}
184-
${ctx.declareNestedClasses()}
185182
}"""
186183

187184
val code = CodeFormatter.stripOverlappingComments(

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,6 @@ object GeneratePredicate extends CodeGenerator[Expression, Predicate] {
7272
${eval.code}
7373
return !${eval.isNull} && ${eval.value};
7474
}
75-
76-
${ctx.initNestedClasses()}
77-
${ctx.declareNestedClasses()}
7875
}"""
7976

8077
val code = CodeFormatter.stripOverlappingComments(

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
4949
val output = ctx.freshName("safeRow")
5050
val values = ctx.freshName("values")
5151
// These expressions could be split into multiple functions
52-
ctx.addMutableState("Object[]", values, s"$values = null;")
52+
ctx.addMutableState("Object[]", values, s"this.$values = null;")
5353

5454
val rowClass = classOf[GenericInternalRow].getName
5555

@@ -65,10 +65,10 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
6565
val allFields = ctx.splitExpressions(tmp, fieldWriters)
6666
val code = s"""
6767
final InternalRow $tmp = $input;
68-
$values = new Object[${schema.length}];
68+
this.$values = new Object[${schema.length}];
6969
$allFields
7070
final InternalRow $output = new $rowClass($values);
71-
$values = null;
71+
this.$values = null;
7272
"""
7373

7474
ExprCode(code, "false", output)
@@ -184,9 +184,6 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
184184
$allExpressions
185185
return mutableRow;
186186
}
187-
188-
${ctx.initNestedClasses()}
189-
${ctx.declareNestedClasses()}
190187
}
191188
"""
192189

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
8282
val rowWriterClass = classOf[UnsafeRowWriter].getName
8383
val rowWriter = ctx.freshName("rowWriter")
8484
ctx.addMutableState(rowWriterClass, rowWriter,
85-
s"$rowWriter = new $rowWriterClass($bufferHolder, ${inputs.length});")
85+
s"this.$rowWriter = new $rowWriterClass($bufferHolder, ${inputs.length});")
8686

8787
val resetWriter = if (isTopLevel) {
8888
// For top level row writer, it always writes to the beginning of the global buffer holder,
@@ -182,7 +182,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
182182
val arrayWriterClass = classOf[UnsafeArrayWriter].getName
183183
val arrayWriter = ctx.freshName("arrayWriter")
184184
ctx.addMutableState(arrayWriterClass, arrayWriter,
185-
s"$arrayWriter = new $arrayWriterClass();")
185+
s"this.$arrayWriter = new $arrayWriterClass();")
186186
val numElements = ctx.freshName("numElements")
187187
val index = ctx.freshName("index")
188188
val element = ctx.freshName("element")
@@ -321,7 +321,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
321321
val holder = ctx.freshName("holder")
322322
val holderClass = classOf[BufferHolder].getName
323323
ctx.addMutableState(holderClass, holder,
324-
s"$holder = new $holderClass($result, ${numVarLenFields * 32});")
324+
s"this.$holder = new $holderClass($result, ${numVarLenFields * 32});")
325325

326326
val resetBufferHolder = if (numVarLenFields == 0) {
327327
""
@@ -402,9 +402,6 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
402402
${eval.code.trim}
403403
return ${eval.value};
404404
}
405-
406-
${ctx.initNestedClasses()}
407-
${ctx.declareNestedClasses()}
408405
}
409406
"""
410407

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ private [sql] object GenArrayData {
9393
if (!ctx.isPrimitiveType(elementType)) {
9494
val genericArrayClass = classOf[GenericArrayData].getName
9595
ctx.addMutableState("Object[]", arrayName,
96-
s"$arrayName = new Object[${numElements}];")
96+
s"this.$arrayName = new Object[${numElements}];")
9797

9898
val assignments = elementsCode.zipWithIndex.map { case (eval, i) =>
9999
val isNullAssignment = if (!isMapKey) {
@@ -340,7 +340,7 @@ case class CreateNamedStruct(children: Seq[Expression]) extends CreateNamedStruc
340340
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
341341
val rowClass = classOf[GenericInternalRow].getName
342342
val values = ctx.freshName("values")
343-
ctx.addMutableState("Object[]", values, s"$values = null;")
343+
ctx.addMutableState("Object[]", values, s"this.$values = null;")
344344

345345
ev.copy(code = s"""
346346
$values = new Object[${valExprs.size}];""" +
@@ -357,7 +357,7 @@ case class CreateNamedStruct(children: Seq[Expression]) extends CreateNamedStruc
357357
}) +
358358
s"""
359359
final InternalRow ${ev.value} = new $rowClass($values);
360-
$values = null;
360+
this.$values = null;
361361
""", isNull = "false")
362362
}
363363

0 commit comments

Comments
 (0)