Skip to content

Commit 17c4aa2

Browse files
committed
feat: antlr4 profiling sparksql with java
1 parent 4705620 commit 17c4aa2

File tree

5 files changed

+87
-25
lines changed

5 files changed

+87
-25
lines changed

docker/README.md

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
2+
## 使用方式
3+
4+
1. 构建镜像
5+
``` sh
6+
docker build -f ./docker/antlr4-profiling.Dockerfile -t antlr4-profiling .
7+
```
8+
9+
2. 运行容器
10+
``` sh
11+
docker run -d -it --name antlr4-profiling -v ./src/grammar:/grammar antlr4-profiling
12+
```
13+
14+
3. 进入容器
15+
``` sh
16+
docker exec -it antlr4-profiling bash
17+
```
18+
19+
> **注意**:以下 java 命令需要进入指定目录,否则 java 类会找不到报错
20+
21+
``` sh
22+
cd /grammar/spark
23+
```
24+
25+
4. 在容器中执行,生成 Java 版解析器
26+
``` sh
27+
antlr4 -Dlanguage=Java -visitor -no-listener ./SparkSqlLexer.g4 ./SparkSqlParser.g4
28+
```
29+
30+
5. 编译 Java 文件
31+
``` sh
32+
javac -cp .:/usr/local/lib/antlr-4.13.1-complete.jar SparkSqlProfiling.java
33+
```
34+
35+
6. 运行 Java 程序
36+
``` sh
37+
java -cp .:/usr/local/lib/antlr-4.13.1-complete.jar SparkSqlProfiling "SELECT * FROM a WHERE b = 1"
38+
```

docker/antlr4-profiling.Dockerfile

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
FROM registry.cn-hangzhou.aliyuncs.com/liuxy0551/eclipse-temurin:17-jdk-jammy
2+
3+
# 安装必要工具
4+
RUN apt-get update && apt-get install -y curl unzip vim && apt-get clean
5+
6+
# 下载 ANTLR jar
7+
RUN curl -O https://www.antlr.org/download/antlr-4.13.1-complete.jar && \
8+
mv antlr-4.13.1-complete.jar /usr/local/lib/
9+
10+
# 设置环境变量
11+
RUN echo 'export CLASSPATH=".:/usr/local/lib/antlr-4.13.1-complete.jar:$CLASSPATH"' >> ~/.bashrc \
12+
&& echo 'alias antlr4="java -jar /usr/local/lib/antlr-4.13.1-complete.jar"' >> ~/.bashrc \
13+
&& echo 'alias grun="java org.antlr.v4.gui.TestRig"' >> ~/.bashrc
14+
15+
# 工作目录
16+
WORKDIR /grammar
17+
18+
# 默认命令保持 bash
19+
CMD ["bash"]

src/grammar/spark/SparkSqlLexer.g4

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -29,23 +29,6 @@ options {
2929
caseInsensitive= true;
3030
}
3131

32-
@members {
33-
/**
34-
* When true, parser should throw ParseException for unclosed bracketed comment.
35-
*/
36-
public has_unclosed_bracketed_comment = false;
37-
38-
/**
39-
* This method will be called when the character stream ends and try to find out the
40-
* unclosed bracketed comment.
41-
* If the method be called, it means the end of the entire character stream match,
42-
* and we set the flag and fail later.
43-
*/
44-
public markUnclosedComment() {
45-
this.has_unclosed_bracketed_comment = true;
46-
}
47-
}
48-
4932
SEMICOLON: ';';
5033

5134
LEFT_PAREN : '(';
@@ -478,8 +461,7 @@ fragment LETTER: [A-Z];
478461

479462
LINE_COMMENT: '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN);
480463

481-
BRACKETED_COMMENT:
482-
'/*' (BRACKETED_COMMENT | .)*? ('*/' | {this.markUnclosedComment();} EOF) -> channel(HIDDEN);
464+
BRACKETED_COMMENT: '/*' (BRACKETED_COMMENT | .)*? '*/' -> channel(HIDDEN);
483465

484466
WHITE_SPACE: (' ' | '\t' | '\r' | '\n') -> channel(HIDDEN);
485467

src/grammar/spark/SparkSqlParser.g4

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,6 @@ parser grammar SparkSqlParser;
2727
options {
2828
tokenVocab=SparkSqlLexer;
2929
caseInsensitive= true;
30-
superClass=SQLParserBase;
31-
}
32-
33-
@header {
34-
import { SQLParserBase } from '../SQLParserBase';
3530
}
3631

3732
program
@@ -415,7 +410,6 @@ viewName
415410

416411
columnName
417412
: multipartIdentifier
418-
| {this.shouldMatchEmpty()}?
419413
;
420414

421415
columnNamePath
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import org.antlr.v4.runtime.*;
2+
import org.antlr.v4.runtime.atn.PredictionMode;
3+
4+
public class SparkSqlProfiling {
5+
public static void main(String[] args) throws Exception {
6+
if(args.length == 0){
7+
System.out.println("请传入 SQL 测试语句,例如: java SparkSqlProfiling \"SELECT * FROM a WHERE b = 1\"");
8+
return;
9+
}
10+
11+
String sql = String.join(" ", args);
12+
13+
// 创建 Lexer & Parser
14+
SparkSqlLexer lexer = new SparkSqlLexer(CharStreams.fromString(sql));
15+
CommonTokenStream tokens = new CommonTokenStream(lexer);
16+
SparkSqlParser parser = new SparkSqlParser(tokens);
17+
18+
// 开启 LL 回溯性能分析
19+
parser.getInterpreter().setPredictionMode(PredictionMode.LL_EXACT_AMBIG_DETECTION);
20+
parser.addErrorListener(new DiagnosticErrorListener(true));
21+
22+
// 入口规则
23+
parser.singleStatement();
24+
25+
// 输出 profiling 信息
26+
System.out.println(parser.getParseInfo());
27+
}
28+
}
29+

0 commit comments

Comments
 (0)