diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 000000000..9ecbfe804 --- /dev/null +++ b/docker/README.md @@ -0,0 +1,42 @@ + +## 使用方式 + +1. 构建镜像 +``` sh +docker build -f ./docker/antlr4-profiling.Dockerfile -t antlr4-profiling . +``` + +2. 运行容器 +``` sh +docker run -d -it --name antlr4-profiling -v ./src/grammar:/grammar antlr4-profiling +``` + +3. 进入容器 +``` sh +docker exec -it antlr4-profiling bash +``` + +> **注意** +> 1. 进行 ANTLR4 Profiling 时,需要删除部分 ts 相关的代码,否则会报错。如下: +> - SparkSqlLexer.g4 需要删除 @members 的内容;SparkSqlParser.g4 需要删除 @@header 和 shouldMatchEmpty 的内容 +> +> 2. 以下 java 命令需要进入容器的指定目录,否则 java 类会找不到报错 + +``` sh +cd /grammar/spark +``` + +4. 在容器中执行,生成 Java 版解析器 +``` sh +antlr4 -Dlanguage=Java -visitor -no-listener ./SparkSqlLexer.g4 ./SparkSqlParser.g4 +``` + +5. 编译 Java 文件 +``` sh +javac -cp .:/usr/local/lib/antlr-4.13.1-complete.jar SparkSqlProfiling.java +``` + +6. 运行 Java 程序 +``` sh +java -cp .:/usr/local/lib/antlr-4.13.1-complete.jar SparkSqlProfiling "SELECT * FROM a WHERE b = 1" +``` diff --git a/docker/antlr4-profiling.Dockerfile b/docker/antlr4-profiling.Dockerfile new file mode 100644 index 000000000..8a45cc8f7 --- /dev/null +++ b/docker/antlr4-profiling.Dockerfile @@ -0,0 +1,19 @@ +FROM registry.cn-hangzhou.aliyuncs.com/liuxy0551/eclipse-temurin:17-jdk-jammy + +# 安装必要工具 +RUN apt-get update && apt-get install -y curl unzip vim && apt-get clean + +# 下载 ANTLR jar +RUN curl -O https://www.antlr.org/download/antlr-4.13.1-complete.jar && \ + mv antlr-4.13.1-complete.jar /usr/local/lib/ + +# 设置环境变量 +RUN echo 'export CLASSPATH=".:/usr/local/lib/antlr-4.13.1-complete.jar:$CLASSPATH"' >> ~/.bashrc \ + && echo 'alias antlr4="java -jar /usr/local/lib/antlr-4.13.1-complete.jar"' >> ~/.bashrc \ + && echo 'alias grun="java org.antlr.v4.gui.TestRig"' >> ~/.bashrc + +# 工作目录 +WORKDIR /grammar + +# 默认命令保持 bash +CMD ["bash"] diff --git a/src/grammar/spark/SparkSqlLexer.g4 b/src/grammar/spark/SparkSqlLexer.g4 index 69058ced3..1d3b68203 100644 --- a/src/grammar/spark/SparkSqlLexer.g4 +++ b/src/grammar/spark/SparkSqlLexer.g4 @@ -29,23 +29,6 @@ options { caseInsensitive= true; } -@members { - /** - * When true, parser should throw ParseException for unclosed bracketed comment. - */ - public has_unclosed_bracketed_comment = false; - - /** - * This method will be called when the character stream ends and try to find out the - * unclosed bracketed comment. - * If the method be called, it means the end of the entire character stream match, - * and we set the flag and fail later. - */ - public markUnclosedComment() { - this.has_unclosed_bracketed_comment = true; - } -} - SEMICOLON: ';'; LEFT_PAREN : '('; @@ -424,7 +407,7 @@ GTE : '>=' | '!<'; NOT : '!'; PLUS : '+'; MINUS : '-'; -ASTERISK : '*'; +STAR : '*'; SLASH : '/'; PERCENT : '%'; TILDE : '~'; @@ -478,8 +461,7 @@ fragment LETTER: [A-Z]; LINE_COMMENT: '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN); -BRACKETED_COMMENT: - '/*' (BRACKETED_COMMENT | .)*? ('*/' | {this.markUnclosedComment();} EOF) -> channel(HIDDEN); +BRACKETED_COMMENT: '/*' (BRACKETED_COMMENT | .)*? '*/' -> channel(HIDDEN); WHITE_SPACE: (' ' | '\t' | '\r' | '\n') -> channel(HIDDEN); diff --git a/src/grammar/spark/SparkSqlParser.g4 b/src/grammar/spark/SparkSqlParser.g4 index 2dad67eae..450a482d4 100644 --- a/src/grammar/spark/SparkSqlParser.g4 +++ b/src/grammar/spark/SparkSqlParser.g4 @@ -27,11 +27,6 @@ parser grammar SparkSqlParser; options { tokenVocab=SparkSqlLexer; caseInsensitive= true; - superClass=SQLParserBase; -} - -@header { -import { SQLParserBase } from '../SQLParserBase'; } program @@ -80,12 +75,12 @@ statement | KW_ANALYZE KW_TABLES ((KW_FROM | KW_IN) namespaceName)? KW_COMPUTE KW_STATISTICS (KW_NOSCAN)? # analyzeTables | KW_ALTER KW_TABLE tableName KW_ADD KW_COLUMN qualifiedColTypeWithPositionForAdd # alterTableAddColumn | KW_ALTER KW_TABLE tableName KW_ADD KW_COLUMNS LEFT_PAREN qualifiedColTypeWithPositionSeqForAdd RIGHT_PAREN # alterTableAddColumns - | KW_ALTER KW_TABLE table=tableName KW_RENAME KW_COLUMN columnName KW_TO columnNameCreate # renameTableColumn | KW_ALTER KW_TABLE tableName KW_DROP KW_COLUMN (ifExists)? columnName # alterTableDropColumn | KW_ALTER KW_TABLE tableName KW_DROP KW_COLUMNS (ifExists)? LEFT_PAREN columnNameSeq RIGHT_PAREN # dropTableColumns | KW_ALTER (KW_TABLE tableName | KW_VIEW viewName) KW_RENAME KW_TO multipartIdentifier # renameTable | KW_ALTER (KW_TABLE tableName | KW_VIEW viewName) KW_SET KW_TBLPROPERTIES propertyList # setTableProperties | KW_ALTER (KW_TABLE tableName | KW_VIEW viewName) KW_UNSET KW_TBLPROPERTIES (ifExists)? propertyList # unsetTableProperties + | KW_ALTER KW_TABLE table=tableName KW_RENAME KW_COLUMN columnName KW_TO columnNameCreate # renameTableColumn | KW_ALTER KW_TABLE table=tableName (KW_ALTER | KW_CHANGE) KW_COLUMN? column=columnName alterColumnAction? # alterTableAlterColumn | KW_ALTER KW_TABLE table=tableName partitionSpec? KW_CHANGE KW_COLUMN? colName=columnName columnType colPosition? # hiveChangeColumn | KW_ALTER KW_TABLE table=tableName partitionSpec? KW_REPLACE KW_COLUMNS LEFT_PAREN qualifiedColTypeWithPositionSeqForReplace RIGHT_PAREN # @@ -171,17 +166,13 @@ statement | (KW_MSCK)? KW_REPAIR KW_TABLE tableName (option=(KW_ADD | KW_DROP | KW_SYNC) KW_PARTITIONS)? # repairTable | op=(KW_ADD | KW_LIST) identifier .*? # manageResource | KW_SET KW_ROLE .*? # failNativeCommand - | KW_SET KW_TIME KW_ZONE interval # setTimeZoneInterval - | KW_SET KW_TIME KW_ZONE (stringLit | KW_LOCAL) # setTimeZone - | KW_SET KW_TIME KW_ZONE .*? # setTimeZoneAny + | KW_SET KW_TIME KW_ZONE (interval | stringLit | KW_LOCAL | .*?) # setTimeZone | KW_SET (KW_VARIABLE | KW_VAR) assignmentList # setVariableAssignment | KW_SET (KW_VARIABLE | KW_VAR) LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ LEFT_PAREN query RIGHT_PAREN # setVariableMultiAssignment - | KW_SET quotedIdentifier EQ BACKQUOTED_IDENTIFIER # setConfig + | KW_SET (quotedIdentifier | .*?) EQ BACKQUOTED_IDENTIFIER # setConfig | KW_SET quotedIdentifier (EQ .*?)? # setConfigAndValue - | KW_SET .*? EQ BACKQUOTED_IDENTIFIER # setConfigAnyKey | KW_SET .*? # setAny - | KW_RESET quotedIdentifier # resetConfig - | KW_RESET .*? # resetAny + | KW_RESET (quotedIdentifier | .*?) # resetConfig | KW_CREATE KW_INDEX (ifNotExists)? identifier KW_ON KW_TABLE? tableName ( KW_USING indexType=identifier )? LEFT_PAREN multipartIdentifierPropertyList RIGHT_PAREN (KW_OPTIONS options=propertyList)? # createIndex @@ -191,40 +182,32 @@ statement ; unsupportedHiveNativeCommands - : kw1=(KW_CREATE | KW_DROP) kw2=KW_ROLE - | kw1=(KW_GRANT | KW_REVOKE) kw2=KW_ROLE? - | kw1=KW_SHOW kw2=( - KW_GRANT - | KW_PRINCIPALS - | KW_COMPACTIONS - | KW_TRANSACTIONS - | KW_INDEXES - | KW_LOCKS - ) - | kw1=KW_SHOW kw2=KW_ROLE kw3=KW_GRANT? - | kw1=KW_SHOW KW_CURRENT? KW_ROLES - | kw1=KW_SHOW kw2=KW_CREATE kw3=KW_TABLE - | kw1=(KW_CREATE | KW_DROP | KW_ALTER) kw2=KW_INDEX - | kw1=(KW_EXPORT | KW_IMPORT | KW_LOCK | KW_UNLOCK) kw2=KW_TABLE - | kw1=(KW_LOCK | KW_UNLOCK) kw2=KW_DATABASE - | kw1=(KW_CREATE | KW_DROP) kw2=KW_TEMPORARY kw3=KW_MACRO - | kw1=KW_ALTER kw2=KW_TABLE tableName kw3=KW_NOT kw4=(KW_CLUSTERED | KW_SORTED | KW_SKEWED) - | kw1=KW_ALTER kw2=KW_TABLE tableName kw3=(KW_CLUSTERED | KW_SKEWED) kw4=KW_BY - | kw1=KW_ALTER kw2=KW_TABLE tableName kw3=KW_SKEWED kw4=KW_BY - | kw1=KW_ALTER kw2=KW_TABLE tableName kw3=KW_NOT kw4=KW_STORED kw5=KW_AS kw6=KW_DIRECTORIES - | kw1=KW_ALTER kw2=KW_TABLE tableName kw3=KW_SET kw4=KW_SKEWED kw5=KW_LOCATION - | kw1=KW_ALTER kw2=KW_TABLE tableName kw3=(KW_EXCHANGE | KW_ARCHIVE | KW_UNARCHIVE) kw4=KW_PARTITION - | kw1=KW_ALTER kw2=KW_TABLE tableName kw3=KW_TOUCH - | kw1=KW_ALTER kw2=KW_TABLE tableName partitionSpec? ( + : (KW_CREATE | KW_DROP) KW_ROLE + | (KW_GRANT | KW_REVOKE) KW_ROLE? + | KW_SHOW (KW_GRANT | KW_PRINCIPALS | KW_COMPACTIONS | KW_TRANSACTIONS | KW_INDEXES | KW_LOCKS) + | KW_SHOW KW_ROLE KW_GRANT? + | KW_SHOW KW_CURRENT? KW_ROLES + | KW_SHOW KW_CREATE KW_TABLE + | (KW_CREATE | KW_DROP | KW_ALTER) KW_INDEX + | (KW_EXPORT | KW_IMPORT | KW_LOCK | KW_UNLOCK) KW_TABLE + | (KW_LOCK | KW_UNLOCK) KW_DATABASE + | (KW_CREATE | KW_DROP) KW_TEMPORARY KW_MACRO + | KW_ALTER KW_TABLE tableName KW_NOT (KW_CLUSTERED | KW_SORTED | KW_SKEWED) + | KW_ALTER KW_TABLE tableName (KW_CLUSTERED | KW_SKEWED) KW_BY + | KW_ALTER KW_TABLE tableName KW_NOT KW_STORED KW_AS KW_DIRECTORIES + | KW_ALTER KW_TABLE tableName KW_SET KW_SKEWED KW_LOCATION + | KW_ALTER KW_TABLE tableName (KW_EXCHANGE | KW_ARCHIVE | KW_UNARCHIVE) KW_PARTITION + | KW_ALTER KW_TABLE tableName KW_TOUCH + | KW_ALTER KW_TABLE tableName partitionSpec? ( KW_COMPACT | KW_CONCATENATE | (KW_SET KW_FILEFORMAT) | (KW_REPLACE KW_COLUMNS) ) - | kw1=KW_START kw2=KW_TRANSACTION - | kw1=KW_COMMIT - | kw1=KW_ROLLBACK - | kw1=KW_DFS + | KW_START KW_TRANSACTION + | KW_COMMIT + | KW_ROLLBACK + | KW_DFS ; bucketSpec @@ -415,7 +398,6 @@ viewName columnName : multipartIdentifier - | {this.shouldMatchEmpty()}? ; columnNamePath @@ -436,8 +418,8 @@ identifierReference ; queryOrganization - : (KW_ORDER KW_BY orderOrSortByClause)? (KW_CLUSTER KW_BY clusterOrDistributeBy)? ( - KW_DISTRIBUTE KW_BY clusterOrDistributeBy + : (KW_ORDER KW_BY orderOrSortByClause)? (KW_CLUSTER KW_BY expressionSeq)? ( + KW_DISTRIBUTE KW_BY expressionSeq )? (KW_SORT KW_BY orderOrSortByClause)? windowClause? limitClause? ( KW_OFFSET offset=expression )? @@ -451,22 +433,16 @@ orderOrSortByClause : sortItem (COMMA sortItem)* ; -clusterOrDistributeBy - : expression (COMMA expression)* - ; - queryTerm : queryPrimary | left=queryTerm operator=(KW_INTERSECT | KW_UNION | KW_EXCEPT | KW_MINUS) setQuantifier? right=queryTerm - | left=queryTerm operator=KW_INTERSECT setQuantifier? right=queryTerm - | left=queryTerm operator=(KW_UNION | KW_EXCEPT | KW_MINUS) setQuantifier? right=queryTerm ; queryPrimary : querySpecification | fromClause fromStatementBody+ | KW_TABLE tableName - | KW_VALUES expression (COMMA expression)* tableAlias + | KW_VALUES expressionSeq tableAlias | LEFT_PAREN query RIGHT_PAREN ; @@ -482,8 +458,7 @@ fromStatementBody ; querySpecification - : transformClause fromClause? lateralView* whereClause? aggregationClause? havingClause? windowClause? - | selectClause fromClause? lateralView* whereClause? aggregationClause? havingClause? windowClause? + : (transformClause | selectClause) fromClause? lateralView* whereClause? aggregationClause? havingClause? windowClause? ; transformClause @@ -511,7 +486,7 @@ setClause matchedClause : KW_WHEN KW_MATCHED (KW_AND matchedCond=booleanExpression)? KW_THEN ( KW_DELETE - | KW_UPDATE KW_SET (ASTERISK | assignmentList) + | KW_UPDATE KW_SET (STAR | assignmentList) ) ; @@ -527,7 +502,7 @@ notMatchedBySourceClause ; notMatchedAction - : KW_INSERT ASTERISK + : KW_INSERT STAR | KW_INSERT LEFT_PAREN multipartIdentifierList RIGHT_PAREN KW_VALUES LEFT_PAREN expression ( COMMA expression )* RIGHT_PAREN @@ -554,10 +529,9 @@ hint ; hintStatement - : hintName=identifier - | hintName=identifier LEFT_PAREN parameters+=primaryExpression ( - COMMA parameters+=primaryExpression - )* RIGHT_PAREN + : hintName=identifier ( + LEFT_PAREN parameters+=primaryExpression (COMMA parameters+=primaryExpression)* RIGHT_PAREN + )? ; fromClause @@ -651,7 +625,7 @@ ifExists ; lateralView - : KW_LATERAL KW_VIEW (KW_OUTER)? viewName LEFT_PAREN (expression (COMMA expression)*)? RIGHT_PAREN tableAlias ( + : KW_LATERAL KW_VIEW (KW_OUTER)? viewName LEFT_PAREN expressionSeq? RIGHT_PAREN tableAlias ( KW_AS? colName+=identifier (COMMA colName+=identifier)* )? ; @@ -667,16 +641,15 @@ relation ; joinRelation - : (joinType) KW_JOIN KW_LATERAL? right=relationPrimary joinCriteria? - | KW_NATURAL joinType KW_JOIN KW_LATERAL? right=relationPrimary + : (joinType) KW_JOIN KW_LATERAL? relationPrimary joinCriteria? + | KW_NATURAL joinType KW_JOIN KW_LATERAL? relationPrimary ; joinType : KW_INNER? | KW_CROSS - | KW_LEFT KW_OUTER? | KW_LEFT? (KW_SEMI | KW_ANTI) - | (KW_RIGHT | KW_FULL) KW_OUTER? + | (KW_LEFT | KW_RIGHT | KW_FULL) KW_OUTER? ; joinCriteria @@ -692,11 +665,10 @@ sample sampleMethod : negativeSign=MINUS? percentage=(INTEGER_VALUE | DECIMAL_VALUE) KW_PERCENTLIT - | expression KW_ROWS + | bytes=expression KW_ROWS? | sampleType=KW_BUCKET numerator=INTEGER_VALUE KW_OUT KW_OF denominator=INTEGER_VALUE ( KW_ON (identifier | qualifiedName LEFT_PAREN RIGHT_PAREN) )? - | bytes=expression ; identifierList @@ -725,16 +697,14 @@ identifierComment relationPrimary : (tableName | viewName | identifierReference) temporalClause? sample? tableAlias - | LEFT_PAREN query RIGHT_PAREN sample? tableAlias - | LEFT_PAREN relation RIGHT_PAREN sample? tableAlias - | KW_VALUES expression (COMMA expression)* tableAlias + | LEFT_PAREN (query | relation) RIGHT_PAREN sample? tableAlias + | KW_VALUES expressionSeq tableAlias | functionName LEFT_PAREN (functionTableArgument (COMMA functionTableArgument)*)? RIGHT_PAREN tableAlias ; functionTableSubqueryArgument : KW_TABLE tableName tableArgumentPartitioning? - | KW_TABLE LEFT_PAREN tableName RIGHT_PAREN tableArgumentPartitioning? - | KW_TABLE LEFT_PAREN query RIGHT_PAREN tableArgumentPartitioning? + | KW_TABLE LEFT_PAREN (tableName | query) RIGHT_PAREN tableArgumentPartitioning? ; tableArgumentPartitioning @@ -746,7 +716,7 @@ tableArgumentPartitioning | partition+=expression ) ) - ) ((KW_ORDER | KW_SORT) KW_BY ( ((LEFT_PAREN orderOrSortByClause RIGHT_PAREN) | sortItem)))? + ) ((KW_ORDER | KW_SORT) KW_BY (((LEFT_PAREN orderOrSortByClause RIGHT_PAREN) | sortItem)))? ; functionTableNamedArgumentExpression @@ -852,33 +822,37 @@ booleanExpression : (KW_NOT | NOT) booleanExpression | KW_EXISTS LEFT_PAREN query RIGHT_PAREN | valueExpression predicate? - | left=booleanExpression operator=KW_AND right=booleanExpression - | left=booleanExpression operator=KW_OR right=booleanExpression + | left=booleanExpression operator=(KW_AND | KW_OR) right=booleanExpression ; predicate : KW_NOT? kind=KW_BETWEEN lower=valueExpression KW_AND upper=valueExpression - | KW_NOT? kind=KW_IN LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN - | KW_NOT? kind=KW_IN LEFT_PAREN query RIGHT_PAREN + | KW_NOT? kind=KW_IN LEFT_PAREN (expressionSeq | query) RIGHT_PAREN | KW_NOT? kind=(KW_RLIKE | KW_REGEXP) pattern=valueExpression | KW_NOT? kind=(KW_LIKE | KW_ILIKE) quantifier=(KW_ANY | KW_SOME | KW_ALL) ( LEFT_PAREN RIGHT_PAREN - | LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN + | LEFT_PAREN expressionSeq RIGHT_PAREN ) | KW_NOT? kind=(KW_LIKE | KW_ILIKE) pattern=valueExpression (KW_ESCAPE escapeChar=stringLit)? - | KW_IS KW_NOT? kind=KW_NULL - | KW_IS KW_NOT? kind=(KW_TRUE | KW_FALSE | KW_UNKNOWN) + | KW_IS KW_NOT? kind=(KW_NULL | KW_TRUE | KW_FALSE | KW_UNKNOWN) | KW_IS KW_NOT? kind=KW_DISTINCT KW_FROM right=valueExpression ; valueExpression : primaryExpression | operator=(MINUS | PLUS | TILDE) valueExpression - | left=valueExpression operator=(ASTERISK | SLASH | PERCENT | KW_DIV) right=valueExpression - | left=valueExpression operator=(PLUS | MINUS | CONCAT_PIPE) right=valueExpression - | left=valueExpression operator=AMPERSAND right=valueExpression - | left=valueExpression operator=HAT right=valueExpression - | left=valueExpression operator=PIPE right=valueExpression + | left=valueExpression operator=( + STAR + | SLASH + | PERCENT + | KW_DIV + | PLUS + | MINUS + | CONCAT_PIPE + | AMPERSAND + | HAT + | PIPE + ) right=valueExpression | left=valueExpression comparisonOperator right=valueExpression ; @@ -898,25 +872,23 @@ datetimeUnit primaryExpression : name=(KW_CURRENT_DATE | KW_CURRENT_TIMESTAMP | KW_CURRENT_USER | KW_USER | KW_SESSION_USER) - | name=(KW_TIMESTAMPADD | KW_DATEADD | KW_DATE_ADD) LEFT_PAREN ( - unit=datetimeUnit - | invalidUnit=stringLit - ) COMMA unitsAmount=valueExpression COMMA timestamp=valueExpression RIGHT_PAREN - | name=(KW_TIMESTAMPDIFF | KW_DATEDIFF | KW_DATE_DIFF | KW_TIMEDIFF) LEFT_PAREN ( - unit=datetimeUnit - | invalidUnit=stringLit - ) COMMA startTimestamp=valueExpression COMMA endTimestamp=valueExpression RIGHT_PAREN - | KW_CASE whenClause+ (KW_ELSE elseExpression=expression)? KW_END - | KW_CASE expression whenClause+ (KW_ELSE elseExpression=expression)? KW_END + | name=( + KW_TIMESTAMPADD + | KW_DATEADD + | KW_DATE_ADD + | KW_TIMESTAMPDIFF + | KW_DATEDIFF + | KW_DATE_DIFF + | KW_TIMEDIFF + ) LEFT_PAREN (unit=datetimeUnit | invalidUnit=stringLit) COMMA valueExpression COMMA valueExpression RIGHT_PAREN + | KW_CASE expression? whenClause+ (KW_ELSE elseExpression=expression)? KW_END | name=(KW_CAST | KW_TRY_CAST) LEFT_PAREN expression KW_AS dataType RIGHT_PAREN - | KW_STRUCT LEFT_PAREN (namedExpression (COMMA namedExpression)*)? RIGHT_PAREN - | KW_FIRST LEFT_PAREN expression (KW_IGNORE KW_NULLS)? RIGHT_PAREN - | KW_ANY_VALUE LEFT_PAREN expression (KW_IGNORE KW_NULLS)? RIGHT_PAREN - | KW_LAST LEFT_PAREN expression (KW_IGNORE KW_NULLS)? RIGHT_PAREN + | KW_STRUCT LEFT_PAREN namedExpressionSeq? RIGHT_PAREN + | (KW_FIRST | KW_ANY_VALUE | KW_LAST) LEFT_PAREN expression (KW_IGNORE KW_NULLS)? RIGHT_PAREN | KW_POSITION LEFT_PAREN substr=valueExpression KW_IN str=valueExpression RIGHT_PAREN | constant - | ASTERISK - | qualifiedName DOT ASTERISK + | STAR + | qualifiedName DOT STAR | LEFT_PAREN namedExpression (COMMA namedExpression)+ RIGHT_PAREN | LEFT_PAREN query RIGHT_PAREN | KW_IDENTIFIER LEFT_PAREN expression RIGHT_PAREN @@ -980,7 +952,7 @@ comparisonOperator arithmeticOperator : PLUS | MINUS - | ASTERISK + | STAR | SLASH | PERCENT | KW_DIV @@ -1194,14 +1166,13 @@ windowSpec * https://github.com/tunnelvisionlabs/antlr4ts/issues/417 */ windowFrame - : frameType=(KW_RANGE | KW_ROWS) start_=frameBound - | frameType=(KW_RANGE | KW_ROWS) KW_BETWEEN start_=frameBound KW_AND end=frameBound + : frameType=(KW_RANGE | KW_ROWS) start=frameBound + | frameType=(KW_RANGE | KW_ROWS) KW_BETWEEN start=frameBound KW_AND end=frameBound ; frameBound - : KW_UNBOUNDED boundType=(KW_PRECEDING | KW_FOLLOWING) + : (KW_UNBOUNDED | expression) boundType=(KW_PRECEDING | KW_FOLLOWING) | boundType=KW_CURRENT KW_ROW - | expression boundType=(KW_PRECEDING | KW_FOLLOWING) ; qualifiedNameList diff --git a/test/parser/spark/syntax/fixtures/alterTable.sql b/test/parser/spark/syntax/fixtures/alterTable.sql index 083cfb2c6..4046faca1 100644 --- a/test/parser/spark/syntax/fixtures/alterTable.sql +++ b/test/parser/spark/syntax/fixtures/alterTable.sql @@ -78,3 +78,55 @@ ALTER TABLE dbx.tab1 SET LOCATION '/path/to/part/ways' -- Syntax RECOVER PARTITIONS -- ALTER TABLE table_identifier RECOVER PARTITIONS ALTER TABLE dbx.tab1 RECOVER PARTITIONS; + + +-- After Profiling +ALTER TABLE my_table RENAME COLUMN old_column TO new_column; +ALTER TABLE sales_data REPLACE COLUMNS (product_id INT, product_name STRING, price DECIMAL(10,2), sale_date DATE); +ALTER TABLE sales_data PARTITION (region='US') REPLACE COLUMNS (product_id INT, product_name STRING, price DECIMAL(10,2), sale_date DATE); +COMMENT ON TABLE sales_2025 IS '2025 年销售数据表'; +COMMENT ON NAMESPACE hr IS '人力资源相关的数据集合'; +SET TIME ZONE INTERVAL '+08:00' HOUR TO MINUTE; +SET TIME ZONE LOCAL; +SET TIME ZONE 'Asia/Shanghai'; +SET TIME ZONE SYSTEM; +SET TIME ZONE DEFAULT; +SET spark.sql.shuffle.partitions = `200`; +SET spark.sql.adaptive.enabled = true; +SET spark.sql.autoBroadcastJoinThreshold = 10MB; +SET; +RESET spark.sql.shuffle.partitions; +SELECT * FROM a UNION SELECT * FROM b; +SELECT * FROM a UNION ALL SELECT * FROM b; +SELECT id FROM a INTERSECT SELECT id FROM b; +SELECT id FROM a INTERSECT DISTINCT SELECT id FROM b; +SELECT id FROM a EXCEPT SELECT id FROM b; +SELECT id FROM a EXCEPT ALL SELECT id FROM b; +SELECT id FROM a MINUS SELECT id FROM b; +(SELECT * FROM x UNION ALL SELECT * FROM y) EXCEPT SELECT * FROM z; +SELECT id, name FROM users WHERE age > 18; +SELECT department, COUNT(*) AS cnt FROM employees GROUP BY department HAVING cnt > 10; +SELECT id, salary, AVG(salary) OVER (PARTITION BY department ORDER BY hire_date) AS avg_salary FROM employees; +SELECT uid, word FROM documents LATERAL VIEW explode(split(content, ' ')) t AS word; +SELECT TRANSFORM (name, age) USING 'python3 /scripts/normalize.py' AS (norm_name STRING, norm_age INT) FROM users; +SELECT TRANSFORM (col1, col2) USING 'map_script.sh' AS (key, value) FROM logs WHERE status = 'OK' GROUP BY col1 HAVING COUNT(*) > 10; +SELECT id, amount, SUM(amount) OVER (PARTITION BY customer_id ORDER BY date ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS rolling_sum FROM transactions WHERE date >= '2024-01-01'; +SELECT a.id, b.value FROM table_a a INNER JOIN table_b b ON a.id = b.id; +SELECT a.id, b.value FROM table_a a JOIN table_b b ON a.id = b.id; +SELECT a.* FROM table_a a LEFT SEMI JOIN table_b b ON a.id = b.id; +SELECT a.* FROM table_a a LEFT ANTI JOIN table_b b ON a.id = b.id; +SELECT a.id, b.value FROM table_a a LEFT OUTER JOIN table_b b ON a.id = b.id; +SELECT a.id, b.value FROM table_a a RIGHT OUTER JOIN table_b b ON a.id = b.id; +SELECT a.id, b.value FROM table_a a FULL OUTER JOIN table_b b ON a.id = b.id; +SELECT * FROM table_a TABLESAMPLE(10 PERCENT); +SELECT * FROM table_a TABLESAMPLE(-25 PERCENT); +SELECT * FROM table_a TABLESAMPLE(100 ROWS); +SELECT * FROM table_a TABLESAMPLE(BUCKET 2 OUT OF 4); +SELECT * FROM table_a TABLESAMPLE(BUCKET 1 OUT OF 10 ON id); +SELECT * FROM table_a TABLESAMPLE(BUCKET 3 OUT OF 5 ON rand()); +SELECT * FROM table_a TABLESAMPLE(1024); +SELECT id, salary, SUM(salary) OVER (ORDER BY hire_date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cumulative_salary FROM employees; +SELECT id, salary, SUM(salary) OVER (ORDER BY hire_date ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS remaining_salary FROM employees; +SELECT id, salary, SUM(salary) OVER (ORDER BY hire_date ROWS BETWEEN CURRENT ROW AND CURRENT ROW) AS single_row_salary FROM employees; +SELECT id, salary, SUM(salary) OVER (ORDER BY hire_date ROWS BETWEEN 3 PRECEDING AND CURRENT ROW) AS rolling_3 FROM employees; +SELECT id, salary, SUM(salary) OVER (ORDER BY hire_date ROWS BETWEEN CURRENT ROW AND 5 FOLLOWING) AS next_5 FROM employees;