Skip to content

Commit 6d7ff76

Browse files
stefankandiccloud-fan
authored andcommitted
[SPARK-54843][SQL] Try_to_number expression not working for empty string input
### What changes were proposed in this pull request? Catching the case in `ToNumberParser` when the input string only consists of whitespace, preventing a failure with an internal error later on when trying to create `BigDecimal`. ### Why are the changes needed? Without this change passing an empty string (`select try_to_number('', '99')`) would fail with the following exception: ``` JVM stacktrace: java.lang.NumberFormatException at java.base/java.math.BigDecimal.(BigDecimal.java:692) at java.base/java.math.BigDecimal.(BigDecimal.java:471) at java.base/java.math.BigDecimal.(BigDecimal.java:900) at org.apache.spark.sql.catalyst.util.ToNumberParser.parseResultToDecimalValue(ToNumberParser.scala:627) at org.apache.spark.sql.catalyst.util.ToNumberParser.parse(ToNumberParser.scala:499) ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? New unit tests. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #53609 from stefankandic/numFormatFix. Authored-by: Stefan Kandic <[email protected]> Signed-off-by: Wenchen Fan <[email protected]> (cherry picked from commit df720c1) Signed-off-by: Wenchen Fan <[email protected]>
1 parent eec9f8f commit 6d7ff76

File tree

2 files changed

+23
-0
lines changed

2 files changed

+23
-0
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,9 @@ class ToNumberParser(numberFormat: String, errorOnFail: Boolean) extends Seriali
495495
// If we have consumed all the tokens in the format string, but characters remain unconsumed
496496
// in the input string, then the input string does not match the format string.
497497
formatMatchFailure(input, numberFormat)
498+
} else if (parsedBeforeDecimalPoint.isEmpty && parsedAfterDecimalPoint.isEmpty) {
499+
// If no digits were collected (e.g. input was all whitespace), treat as format match failure.
500+
formatMatchFailure(input, numberFormat)
498501
} else {
499502
parseResultToDecimalValue(negateResult)
500503
}

sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1342,6 +1342,26 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
13421342
checkAnswer(df.select(try_to_number($"value", lit("$99.99"))), Seq(Row(null)))
13431343
}
13441344

1345+
test("try_to_number with whitespace-only input should return NULL") {
1346+
// Empty string
1347+
checkAnswer(sql("select try_to_number('', '99')"), Seq(Row(null)))
1348+
checkAnswer(sql("select try_to_number('', '999')"), Seq(Row(null)))
1349+
1350+
// Spaces only
1351+
checkAnswer(sql("select try_to_number(' ', '99')"), Seq(Row(null)))
1352+
checkAnswer(sql("select try_to_number(' ', '9')"), Seq(Row(null)))
1353+
1354+
// Different whitespace characters (tabs, newlines)
1355+
checkAnswer(sql("select try_to_number('\t\t', '99')"), Seq(Row(null)))
1356+
checkAnswer(sql("select try_to_number('\n\n', '99')"), Seq(Row(null)))
1357+
checkAnswer(sql("select try_to_number(' \t\n ', '99')"), Seq(Row(null)))
1358+
1359+
// With format strings containing decimal points, dollar signs, etc.
1360+
checkAnswer(sql("select try_to_number(' ', '$99.99')"), Seq(Row(null)))
1361+
checkAnswer(sql("select try_to_number('', '999.99')"), Seq(Row(null)))
1362+
checkAnswer(sql("select try_to_number('\t', '9,999')"), Seq(Row(null)))
1363+
}
1364+
13451365
test("SPARK-44905: stateful lastRegex causes NullPointerException on eval for regexp_replace") {
13461366
val df = sql("select regexp_replace('', '[a\\\\d]{0, 2}', 'x')")
13471367
intercept[SparkRuntimeException](df.queryExecution.optimizedPlan)

0 commit comments

Comments
 (0)