Skip to content

Commit 05af2de

Browse files
hvanhovellgatorsmile
authored andcommitted
[SPARK-21830][SQL] Bump ANTLR version and fix a few issues.
## What changes were proposed in this pull request? This PR bumps the ANTLR version to 4.7, and fixes a number of small parser related issues uncovered by the bump. The main reason for upgrading is that in some cases the current version of ANTLR (4.5) can exhibit exponential slowdowns if it needs to parse boolean predicates. For example the following query will take forever to parse: ```sql SELECT * FROM RANGE(1000) WHERE TRUE AND NOT upper(DESCRIPTION) LIKE '%FOO%' AND NOT upper(DESCRIPTION) LIKE '%FOO%' AND NOT upper(DESCRIPTION) LIKE '%FOO%' AND NOT upper(DESCRIPTION) LIKE '%FOO%' AND NOT upper(DESCRIPTION) LIKE '%FOO%' AND NOT upper(DESCRIPTION) LIKE '%FOO%' AND NOT upper(DESCRIPTION) LIKE '%FOO%' AND NOT upper(DESCRIPTION) LIKE '%FOO%' AND NOT upper(DESCRIPTION) LIKE '%FOO%' AND NOT upper(DESCRIPTION) LIKE '%FOO%' AND NOT upper(DESCRIPTION) LIKE '%FOO%' AND NOT upper(DESCRIPTION) LIKE '%FOO%' AND NOT upper(DESCRIPTION) LIKE '%FOO%' AND NOT upper(DESCRIPTION) LIKE '%FOO%' AND NOT upper(DESCRIPTION) LIKE '%FOO%' AND NOT upper(DESCRIPTION) LIKE '%FOO%' AND NOT upper(DESCRIPTION) LIKE '%FOO%' AND NOT upper(DESCRIPTION) LIKE '%FOO%' ``` This is caused by a know bug in ANTLR (antlr/antlr4#994), which was fixed in version 4.6. ## How was this patch tested? Existing tests. Author: Herman van Hovell <[email protected]> Closes apache#19042 from hvanhovell/SPARK-21830.
1 parent 763b83e commit 05af2de

File tree

10 files changed

+25
-14
lines changed

10 files changed

+25
-14
lines changed

dev/deps/spark-deps-hadoop-2.6

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ activation-1.1.1.jar
55
aircompressor-0.3.jar
66
antlr-2.7.7.jar
77
antlr-runtime-3.4.jar
8-
antlr4-runtime-4.5.3.jar
8+
antlr4-runtime-4.7.jar
99
aopalliance-1.0.jar
1010
aopalliance-repackaged-2.4.0-b34.jar
1111
apache-log4j-extras-1.2.17.jar

dev/deps/spark-deps-hadoop-2.7

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ activation-1.1.1.jar
55
aircompressor-0.3.jar
66
antlr-2.7.7.jar
77
antlr-runtime-3.4.jar
8-
antlr4-runtime-4.5.3.jar
8+
antlr4-runtime-4.7.jar
99
aopalliance-1.0.jar
1010
aopalliance-repackaged-2.4.0-b34.jar
1111
apache-log4j-extras-1.2.17.jar

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@
178178
<jodd.version>3.5.2</jodd.version>
179179
<jsr305.version>1.3.9</jsr305.version>
180180
<libthrift.version>0.9.3</libthrift.version>
181-
<antlr4.version>4.5.3</antlr4.version>
181+
<antlr4.version>4.7</antlr4.version>
182182
<jpam.version>1.1</jpam.version>
183183
<selenium.version>2.52.0</selenium.version>
184184
<paranamer.version>2.6</paranamer.version>

project/SparkBuild.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,7 @@ object OldDeps {
474474

475475
object Catalyst {
476476
lazy val settings = antlr4Settings ++ Seq(
477+
antlr4Version in Antlr4 := "4.7",
477478
antlr4PackageName in Antlr4 := Some("org.apache.spark.sql.catalyst.parser"),
478479
antlr4GenListener in Antlr4 := true,
479480
antlr4GenVisitor in Antlr4 := true

sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ singleDataType
6464
: dataType EOF
6565
;
6666

67+
singleTableSchema
68+
: colTypeList EOF
69+
;
70+
6771
statement
6872
: query #statementDefault
6973
| USE db=identifier #use
@@ -974,7 +978,7 @@ CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP';
974978

975979
STRING
976980
: '\'' ( ~('\''|'\\') | ('\\' .) )* '\''
977-
| '\"' ( ~('\"'|'\\') | ('\\' .) )* '\"'
981+
| '"' ( ~('"'|'\\') | ('\\' .) )* '"'
978982
;
979983

980984
BIGINT_LITERAL

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,10 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
8989
visitSparkDataType(ctx.dataType)
9090
}
9191

92+
override def visitSingleTableSchema(ctx: SingleTableSchemaContext): StructType = {
93+
withOrigin(ctx)(StructType(visitColTypeList(ctx.colTypeList)))
94+
}
95+
9296
/* ********************************************************************************************
9397
* Plan parsing
9498
* ******************************************************************************************** */

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ abstract class AbstractSqlParser extends ParserInterface with Logging {
6161
* definitions which will preserve the correct Hive metadata.
6262
*/
6363
override def parseTableSchema(sqlText: String): StructType = parse(sqlText) { parser =>
64-
StructType(astBuilder.visitColTypeList(parser.colTypeList()))
64+
astBuilder.visitSingleTableSchema(parser.singleTableSchema())
6565
}
6666

6767
/** Creates LogicalPlan for a given SQL string. */

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,12 @@ class TableSchemaParserSuite extends SparkFunSuite {
7979
}
8080

8181
// Negative cases
82-
assertError("")
83-
assertError("a")
84-
assertError("a INT b long")
85-
assertError("a INT,, b long")
86-
assertError("a INT, b long,,")
87-
assertError("a INT, b long, c int,")
82+
test("Negative cases") {
83+
assertError("")
84+
assertError("a")
85+
assertError("a INT b long")
86+
assertError("a INT,, b long")
87+
assertError("a INT, b long,,")
88+
assertError("a INT, b long, c int,")
89+
}
8890
}

sql/core/src/test/resources/sql-tests/results/show-tables.sql.out

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ struct<>
164164
-- !query 13 output
165165
org.apache.spark.sql.catalyst.parser.ParseException
166166

167-
mismatched input '<EOF>' expecting 'LIKE'(line 1, pos 19)
167+
mismatched input '<EOF>' expecting {'FROM', 'IN', 'LIKE'}(line 1, pos 19)
168168

169169
== SQL ==
170170
SHOW TABLE EXTENDED
@@ -187,7 +187,7 @@ struct<>
187187
-- !query 15 output
188188
org.apache.spark.sql.catalyst.parser.ParseException
189189

190-
mismatched input 'PARTITION' expecting 'LIKE'(line 1, pos 20)
190+
mismatched input 'PARTITION' expecting {'FROM', 'IN', 'LIKE'}(line 1, pos 20)
191191

192192
== SQL ==
193193
SHOW TABLE EXTENDED PARTITION(c='Us', d=1)

sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,7 @@ class JDBCWriteSuite extends SharedSQLContext with BeforeAndAfter {
468468
.option("createTableColumnTypes", "`name char(20)") // incorrectly quoted column
469469
.jdbc(url1, "TEST.USERDBTYPETEST", properties)
470470
}.getMessage()
471-
assert(msg.contains("no viable alternative at input"))
471+
assert(msg.contains("extraneous input"))
472472
}
473473

474474
test("SPARK-10849: jdbc CreateTableColumnTypes duplicate columns") {

0 commit comments

Comments
 (0)