Skip to content

Commit 6085fdf

Browse files
committed
feat: antlr4 profiling sparksql with java
1 parent 4705620 commit 6085fdf

File tree

4 files changed

+62
-25
lines changed

4 files changed

+62
-25
lines changed

docker/README.md

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
2+
## 使用方式
3+
4+
1. 构建镜像
5+
``` sh
6+
docker build -f ./docker/antlr4-profiling.Dockerfile -t antlr4-profiling .
7+
```
8+
9+
2. 运行容器
10+
``` sh
11+
docker run -d -it --name antlr4-profiling -v ./src/grammar:/grammar antlr4-profiling
12+
```
13+
14+
3. 进入容器
15+
``` sh
16+
docker exec -it antlr4-profiling bash
17+
```
18+
19+
> **注意**
20+
> 1. 进行 ANTLR4 Profiling 时,需要删除部分 ts 相关的代码,否则会报错。如下:
21+
> - SparkSqlLexer.g4 需要删除 @members 的内容;SparkSqlParser.g4 需要删除 @@header 和 shouldMatchEmpty 的内容
22+
>
23+
> 2. 以下 java 命令需要进入容器的指定目录,否则 java 类会找不到报错
24+
25+
``` sh
26+
cd /grammar/spark
27+
```
28+
29+
4. 在容器中执行,生成 Java 版解析器
30+
``` sh
31+
antlr4 -Dlanguage=Java -visitor -no-listener ./SparkSqlLexer.g4 ./SparkSqlParser.g4
32+
```
33+
34+
5. 编译 Java 文件
35+
``` sh
36+
javac -cp .:/usr/local/lib/antlr-4.13.1-complete.jar SparkSqlProfiling.java
37+
```
38+
39+
6. 运行 Java 程序
40+
``` sh
41+
java -cp .:/usr/local/lib/antlr-4.13.1-complete.jar SparkSqlProfiling "SELECT * FROM a WHERE b = 1"
42+
```

docker/antlr4-profiling.Dockerfile

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
FROM registry.cn-hangzhou.aliyuncs.com/liuxy0551/eclipse-temurin:17-jdk-jammy
2+
3+
# 安装必要工具
4+
RUN apt-get update && apt-get install -y curl unzip vim && apt-get clean
5+
6+
# 下载 ANTLR jar
7+
RUN curl -O https://www.antlr.org/download/antlr-4.13.1-complete.jar && \
8+
mv antlr-4.13.1-complete.jar /usr/local/lib/
9+
10+
# 设置环境变量
11+
RUN echo 'export CLASSPATH=".:/usr/local/lib/antlr-4.13.1-complete.jar:$CLASSPATH"' >> ~/.bashrc \
12+
&& echo 'alias antlr4="java -jar /usr/local/lib/antlr-4.13.1-complete.jar"' >> ~/.bashrc \
13+
&& echo 'alias grun="java org.antlr.v4.gui.TestRig"' >> ~/.bashrc
14+
15+
# 工作目录
16+
WORKDIR /grammar
17+
18+
# 默认命令保持 bash
19+
CMD ["bash"]

src/grammar/spark/SparkSqlLexer.g4

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -29,23 +29,6 @@ options {
2929
caseInsensitive= true;
3030
}
3131

32-
@members {
33-
/**
34-
* When true, parser should throw ParseException for unclosed bracketed comment.
35-
*/
36-
public has_unclosed_bracketed_comment = false;
37-
38-
/**
39-
* This method will be called when the character stream ends and try to find out the
40-
* unclosed bracketed comment.
41-
* If the method be called, it means the end of the entire character stream match,
42-
* and we set the flag and fail later.
43-
*/
44-
public markUnclosedComment() {
45-
this.has_unclosed_bracketed_comment = true;
46-
}
47-
}
48-
4932
SEMICOLON: ';';
5033

5134
LEFT_PAREN : '(';
@@ -478,8 +461,7 @@ fragment LETTER: [A-Z];
478461

479462
LINE_COMMENT: '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN);
480463

481-
BRACKETED_COMMENT:
482-
'/*' (BRACKETED_COMMENT | .)*? ('*/' | {this.markUnclosedComment();} EOF) -> channel(HIDDEN);
464+
BRACKETED_COMMENT: '/*' (BRACKETED_COMMENT | .)*? '*/' -> channel(HIDDEN);
483465

484466
WHITE_SPACE: (' ' | '\t' | '\r' | '\n') -> channel(HIDDEN);
485467

src/grammar/spark/SparkSqlParser.g4

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,6 @@ parser grammar SparkSqlParser;
2727
options {
2828
tokenVocab=SparkSqlLexer;
2929
caseInsensitive= true;
30-
superClass=SQLParserBase;
31-
}
32-
33-
@header {
34-
import { SQLParserBase } from '../SQLParserBase';
3530
}
3631

3732
program
@@ -415,7 +410,6 @@ viewName
415410

416411
columnName
417412
: multipartIdentifier
418-
| {this.shouldMatchEmpty()}?
419413
;
420414

421415
columnNamePath

0 commit comments

Comments
 (0)