Skip to content

Commit 2dbe275

Browse files
kiszkcloud-fan
authored andcommitted
[SPARK-22603][SQL] Fix 64KB JVM bytecode limit problem with FormatString
## What changes were proposed in this pull request? This PR changes `FormatString` code generation to place generated code for expressions for arguments into separated methods if these size could be large. This PR passes variable arguments by using an `Object` array. ## How was this patch tested? Added new test cases into `StringExpressionSuite` Author: Kazuaki Ishizaki <[email protected]> Closes #19817 from kiszk/SPARK-22603.
1 parent 5a02e3a commit 2dbe275

File tree

2 files changed

+28
-8
lines changed

2 files changed

+28
-8
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1372,19 +1372,30 @@ case class FormatString(children: Expression*) extends Expression with ImplicitC
13721372
val pattern = children.head.genCode(ctx)
13731373

13741374
val argListGen = children.tail.map(x => (x.dataType, x.genCode(ctx)))
1375-
val argListCode = argListGen.map(_._2.code + "\n")
1376-
1377-
val argListString = argListGen.foldLeft("")((s, v) => {
1378-
val nullSafeString =
1375+
val argList = ctx.freshName("argLists")
1376+
val numArgLists = argListGen.length
1377+
val argListCode = argListGen.zipWithIndex.map { case(v, index) =>
1378+
val value =
13791379
if (ctx.boxedType(v._1) != ctx.javaType(v._1)) {
13801380
// Java primitives get boxed in order to allow null values.
13811381
s"(${v._2.isNull}) ? (${ctx.boxedType(v._1)}) null : " +
13821382
s"new ${ctx.boxedType(v._1)}(${v._2.value})"
13831383
} else {
13841384
s"(${v._2.isNull}) ? null : ${v._2.value}"
13851385
}
1386-
s + "," + nullSafeString
1387-
})
1386+
s"""
1387+
${v._2.code}
1388+
$argList[$index] = $value;
1389+
"""
1390+
}
1391+
val argListCodes = if (ctx.INPUT_ROW != null && ctx.currentVars == null) {
1392+
ctx.splitExpressions(
1393+
expressions = argListCode,
1394+
funcName = "valueFormatString",
1395+
arguments = ("InternalRow", ctx.INPUT_ROW) :: ("Object[]", argList) :: Nil)
1396+
} else {
1397+
argListCode.mkString("\n")
1398+
}
13881399

13891400
val form = ctx.freshName("formatter")
13901401
val formatter = classOf[java.util.Formatter].getName
@@ -1395,10 +1406,11 @@ case class FormatString(children: Expression*) extends Expression with ImplicitC
13951406
boolean ${ev.isNull} = ${pattern.isNull};
13961407
${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
13971408
if (!${ev.isNull}) {
1398-
${argListCode.mkString}
13991409
$stringBuffer $sb = new $stringBuffer();
14001410
$formatter $form = new $formatter($sb, ${classOf[Locale].getName}.US);
1401-
$form.format(${pattern.value}.toString() $argListString);
1411+
Object[] $argList = new Object[$numArgLists];
1412+
$argListCodes
1413+
$form.format(${pattern.value}.toString(), $argList);
14021414
${ev.value} = UTF8String.fromString($sb.toString());
14031415
}""")
14041416
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,14 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
518518
FormatString(Literal("aa%d%s"), 12, Literal.create(null, StringType)), "aa12null")
519519
}
520520

521+
test("SPARK-22603: FormatString should not generate codes beyond 64KB") {
522+
val N = 4500
523+
val args = (1 to N).map(i => Literal.create(i.toString, StringType))
524+
val format = "%s" * N
525+
val expected = (1 to N).map(i => i.toString).mkString
526+
checkEvaluation(FormatString(Literal(format) +: args: _*), expected)
527+
}
528+
521529
test("INSTR") {
522530
val s1 = 'a.string.at(0)
523531
val s2 = 'b.string.at(1)

0 commit comments

Comments
 (0)