Skip to content

Commit 1e72878

Browse files
committed
feat: antlr4 profiling sparksql with java
1 parent 4705620 commit 1e72878

File tree

4 files changed

+71
-25
lines changed

4 files changed

+71
-25
lines changed

docker/antlr4-profiling.Dockerfile

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
FROM registry.cn-hangzhou.aliyuncs.com/liuxy0551/eclipse-temurin:17-jdk-jammy
2+
3+
# 安装必要工具
4+
RUN apt-get update && apt-get install -y curl unzip vim && apt-get clean
5+
6+
# 下载 ANTLR jar
7+
RUN curl -O https://www.antlr.org/download/antlr-4.13.1-complete.jar && \
8+
mv antlr-4.13.1-complete.jar /usr/local/lib/
9+
10+
# 设置环境变量
11+
RUN echo 'export CLASSPATH=".:/usr/local/lib/antlr-4.13.1-complete.jar:$CLASSPATH"' >> ~/.bashrc \
12+
&& echo 'alias antlr4="java -jar /usr/local/lib/antlr-4.13.1-complete.jar"' >> ~/.bashrc \
13+
&& echo 'alias grun="java org.antlr.v4.gui.TestRig"' >> ~/.bashrc
14+
15+
# 工作目录
16+
WORKDIR /grammar
17+
18+
# 默认命令保持 bash
19+
CMD ["bash"]
20+
21+
22+
23+
########################## 使用方式 ##########################
24+
# 1. 构建镜像
25+
# docker build -f ./docker/antlr4-profiling.Dockerfile -t antlr4-profiling .
26+
# 2. 运行容器
27+
# docker run -d -it --name antlr4-profiling -v ./src/grammar:/grammar antlr4-profiling
28+
# 3. 进入容器
29+
# docker exec -it antlr4-profiling bash
30+
31+
# 注意:以下 java 命令需要进入指定目录,否则 java 类会找不到报错
32+
# cd /grammar/spark
33+
34+
# 4. 在容器中执行,生成 Java 版解析器
35+
# antlr4 -Dlanguage=Java -visitor -no-listener ./SparkSqlLexer.g4 ./SparkSqlParser.g4
36+
37+
# 5. 编译 Java 文件
38+
# javac -cp .:/usr/local/lib/antlr-4.13.1-complete.jar SparkSqlProfiling.java
39+
40+
# 6. 运行 Java 程序
41+
# java -cp .:/usr/local/lib/antlr-4.13.1-complete.jar SparkSqlProfiling "SELECT * FROM a WHERE b = 1"

src/grammar/spark/SparkSqlLexer.g4

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -29,23 +29,6 @@ options {
2929
caseInsensitive= true;
3030
}
3131

32-
@members {
33-
/**
34-
* When true, parser should throw ParseException for unclosed bracketed comment.
35-
*/
36-
public has_unclosed_bracketed_comment = false;
37-
38-
/**
39-
* This method will be called when the character stream ends and try to find out the
40-
* unclosed bracketed comment.
41-
* If the method be called, it means the end of the entire character stream match,
42-
* and we set the flag and fail later.
43-
*/
44-
public markUnclosedComment() {
45-
this.has_unclosed_bracketed_comment = true;
46-
}
47-
}
48-
4932
SEMICOLON: ';';
5033

5134
LEFT_PAREN : '(';
@@ -478,8 +461,7 @@ fragment LETTER: [A-Z];
478461

479462
LINE_COMMENT: '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN);
480463

481-
BRACKETED_COMMENT:
482-
'/*' (BRACKETED_COMMENT | .)*? ('*/' | {this.markUnclosedComment();} EOF) -> channel(HIDDEN);
464+
BRACKETED_COMMENT: '/*' (BRACKETED_COMMENT | .)*? '*/' -> channel(HIDDEN);
483465

484466
WHITE_SPACE: (' ' | '\t' | '\r' | '\n') -> channel(HIDDEN);
485467

src/grammar/spark/SparkSqlParser.g4

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,6 @@ parser grammar SparkSqlParser;
2727
options {
2828
tokenVocab=SparkSqlLexer;
2929
caseInsensitive= true;
30-
superClass=SQLParserBase;
31-
}
32-
33-
@header {
34-
import { SQLParserBase } from '../SQLParserBase';
3530
}
3631

3732
program
@@ -415,7 +410,6 @@ viewName
415410

416411
columnName
417412
: multipartIdentifier
418-
| {this.shouldMatchEmpty()}?
419413
;
420414

421415
columnNamePath
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import org.antlr.v4.runtime.*;
2+
import org.antlr.v4.runtime.atn.PredictionMode;
3+
4+
public class SparkSqlProfiling {
5+
public static void main(String[] args) throws Exception {
6+
if(args.length == 0){
7+
System.out.println("请传入 SQL 测试语句,例如: java SparkSqlProfiling \"SELECT * FROM a WHERE b = 1\"");
8+
return;
9+
}
10+
11+
String sql = String.join(" ", args);
12+
13+
// 创建 Lexer & Parser
14+
SparkSqlLexer lexer = new SparkSqlLexer(CharStreams.fromString(sql));
15+
CommonTokenStream tokens = new CommonTokenStream(lexer);
16+
SparkSqlParser parser = new SparkSqlParser(tokens);
17+
18+
// 开启 LL 回溯性能分析
19+
parser.getInterpreter().setPredictionMode(PredictionMode.LL_EXACT_AMBIG_DETECTION);
20+
parser.addErrorListener(new DiagnosticErrorListener(true));
21+
22+
// 入口规则
23+
parser.singleStatement();
24+
25+
// 输出 profiling 信息
26+
System.out.println(parser.getParseInfo());
27+
}
28+
}
29+

0 commit comments

Comments
 (0)