Skip to content

Commit 8bd6ef1

Browse files
authored
feat: introduce redshift parser into mono (#1)
* feat: introduce redshift parser into mono * chore: debug msg * chore: debug msg * chore: debug msg * chore: debug msg * chore: remove build step
1 parent a80f05f commit 8bd6ef1

File tree

143 files changed

+327736
-2
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

143 files changed

+327736
-2
lines changed

.github/workflows/tests.yml

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
name: Tests
2+
3+
on:
4+
push:
5+
branches: [main]
6+
pull_request:
7+
branches: [main]
8+
9+
jobs:
10+
detect-changes:
11+
runs-on: ubuntu-latest
12+
outputs:
13+
matrix: ${{ steps.set-matrix.outputs.matrix }}
14+
any_changed: ${{ steps.set-matrix.outputs.any_changed }}
15+
steps:
16+
- uses: actions/checkout@v4
17+
with:
18+
fetch-depth: 0
19+
20+
- name: Get changed files
21+
id: changed-files
22+
run: |
23+
if [ "${{ github.event_name }}" = "pull_request" ]; then
24+
echo "changed_files<<EOF" >> $GITHUB_OUTPUT
25+
git diff --name-only origin/${{ github.base_ref }}..HEAD >> $GITHUB_OUTPUT
26+
echo "EOF" >> $GITHUB_OUTPUT
27+
else
28+
echo "changed_files<<EOF" >> $GITHUB_OUTPUT
29+
git diff --name-only HEAD~1..HEAD >> $GITHUB_OUTPUT
30+
echo "EOF" >> $GITHUB_OUTPUT
31+
fi
32+
33+
- name: Set matrix for changed parsers
34+
id: set-matrix
35+
run: |
36+
# List of all available parsers
37+
ALL_PARSERS="redshift"
38+
# Add more parsers here as they are added to the repository
39+
# ALL_PARSERS="redshift mysql postgresql"
40+
41+
CHANGED_FILES="${{ steps.changed-files.outputs.changed_files }}"
42+
CHANGED_PARSERS=""
43+
44+
for parser in $ALL_PARSERS; do
45+
if echo "$CHANGED_FILES" | grep -q "^$parser/"; then
46+
if [ -z "$CHANGED_PARSERS" ]; then
47+
CHANGED_PARSERS="\"$parser\""
48+
else
49+
CHANGED_PARSERS="$CHANGED_PARSERS,\"$parser\""
50+
fi
51+
fi
52+
done
53+
54+
if [ -n "$CHANGED_PARSERS" ]; then
55+
echo "matrix={\"parser\":[$CHANGED_PARSERS]}" >> $GITHUB_OUTPUT
56+
echo "any_changed=true" >> $GITHUB_OUTPUT
57+
echo "Changed parsers: $CHANGED_PARSERS"
58+
else
59+
echo "matrix={\"parser\":[]}" >> $GITHUB_OUTPUT
60+
echo "any_changed=false" >> $GITHUB_OUTPUT
61+
echo "No parser changes detected"
62+
fi
63+
64+
go-mod-tidy:
65+
runs-on: ubuntu-latest
66+
steps:
67+
- uses: actions/checkout@v4
68+
69+
- uses: actions/setup-go@v5
70+
with:
71+
go-version-file: go.mod
72+
cache-dependency-path: go.sum
73+
74+
- name: Verify go mod tidy
75+
run: |
76+
go mod tidy
77+
git diff --exit-code -- go.mod go.sum
78+
79+
go-tests:
80+
needs: detect-changes
81+
if: needs.detect-changes.outputs.any_changed == 'true'
82+
runs-on: ubuntu-latest
83+
strategy:
84+
matrix: ${{ fromJSON(needs.detect-changes.outputs.matrix) }}
85+
steps:
86+
- uses: actions/checkout@v4
87+
88+
- uses: actions/setup-go@v5
89+
with:
90+
go-version-file: go.mod
91+
cache-dependency-path: go.sum
92+
93+
- name: Run all tests
94+
working-directory: ${{ matrix.parser }}
95+
run: go test -p=8 -timeout 30m -ldflags "-w -s" -v ./... | tee test.log; exit ${PIPESTATUS[0]}
96+
- name: Pretty print tests running time
97+
working-directory: ${{ matrix.parser }}
98+
# grep: filter out lines like "--- PASS: Test (15.04s)"
99+
# sed: remove unnecessary characters
100+
# awk: re-format lines to "PASS: Test (15.04s)"
101+
# sort: cut into columns by delimiter ' ' (single space) and sort by column 3 (test time in seconds) as numeric type in reverse order (largest comes first)
102+
# awk: accumulate sum by test time in seconds
103+
run: grep --color=never -e '--- PASS:' -e '--- FAIL:' test.log | sed 's/[:()]//g' | awk '{print $2,$3,$4}' | sort -t' ' -nk3 -r | awk '{sum += $3; print $1,$2,$3,sum"s"}'

.gitignore

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
*.dll
88
*.so
99
*.dylib
10+
**/.DS_Store
1011

1112
# Test binary, built with `go test -c`
1213
*.test
@@ -28,5 +29,8 @@ go.work.sum
2829
.env
2930

3031
# Editor/IDE
31-
# .idea/
32-
# .vscode/
32+
.idea/
33+
34+
# Plguin
35+
# Intellij ANTLR plugin
36+
**/gen/

go.mod

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
module github.com/bytebase/parser
2+
3+
go 1.24.5
4+
5+
require (
6+
github.com/antlr4-go/antlr/v4 v4.13.1
7+
github.com/stretchr/testify v1.10.0
8+
)
9+
10+
require (
11+
github.com/davecgh/go-spew v1.1.1 // indirect
12+
github.com/pmezard/go-difflib v1.0.0 // indirect
13+
golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect
14+
gopkg.in/yaml.v3 v3.0.1 // indirect
15+
)
16+
17+
replace github.com/antlr4-go/antlr/v4 => github.com/bytebase/antlr/v4 v4.0.0-20240827034948-8c385f108920

go.sum

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
github.com/bytebase/antlr/v4 v4.0.0-20240827034948-8c385f108920 h1:IfmPt5o5R70NKtOrs+QHOoCgViYZelZysGxVBvV4ybA=
2+
github.com/bytebase/antlr/v4 v4.0.0-20240827034948-8c385f108920/go.mod h1:ykhjIPiv0IWpu3OGXCHdz2eUSe8UNGGD6baqjs8jSuU=
3+
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
4+
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
5+
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
6+
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
7+
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
8+
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
9+
golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 h1:vr/HnozRka3pE4EsMEg1lgkXJkTFJCVUX+S/ZT6wYzM=
10+
golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842/go.mod h1:XtvwrStGgqGPLc4cjQfWqZHG1YFdYs6swckp8vpsjnc=
11+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
12+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
13+
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
14+
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

redshift/CLAUDE.md

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# Redshift Parser Development Guide
2+
3+
## Project Overview
4+
5+
This repository is a Redshift parser built with ANTLR 4, forked from github.com/bytebase/postgresql-parser. Due to incompatibility issues with PostgreSQL, this separate repository was created to support Amazon Redshift-specific syntax and features.
6+
7+
## Architecture
8+
9+
### Core Components
10+
11+
1. **ANTLR Grammar Files**:
12+
- `RedshiftLexer.g4` - Tokenization rules for Redshift SQL
13+
- `RedshiftParser.g4` - Parser grammar with 200+ statement types
14+
- Generated Go files: `redshift_parser.go`, `redshift_lexer.go`, etc.
15+
16+
2. **Base Classes**:
17+
- `redshift_parser_base.go` - Engine-aware parser with PostgreSQL/Redshift support
18+
- `redshift_lexer_base.go` - Base lexer implementation
19+
- `string_stack.go` - Utility for string stack operations
20+
21+
3. **Supporting Files**:
22+
- `keywords.go` - 600+ PostgreSQL keywords with reserved status
23+
- `builtin_function.go` - Built-in function definitions
24+
- `build.sh` - ANTLR code generation script
25+
26+
### Engine Support
27+
28+
The parser supports multiple database engines:
29+
- `EnginePostgreSQL` - Standard PostgreSQL syntax
30+
- `EngineRedshift` - Amazon Redshift-specific syntax extensions
31+
32+
## Development Guidelines
33+
34+
### Code Conventions
35+
36+
1. **Follow existing patterns**: Always examine existing code before making changes
37+
2. **Token/Rule/Name Convention**: Maintain consistency with current ANTLR grammar naming
38+
3. **Engine-specific features**: Use engine detection for Redshift-specific syntax
39+
4. **Error handling**: Implement proper error listeners and recovery mechanisms
40+
41+
### Testing Requirements
42+
43+
**CRITICAL**: Every change must include a related test case.
44+
45+
1. **Test Structure**:
46+
- Add SQL test files to the `examples/` directory
47+
- Use Go-based tests in `parser_test.go` and `engine_specific_test.go`
48+
- Tests automatically parse all SQL files in `examples/`
49+
50+
2. **Test Content Sources**:
51+
- Reference https://docs.aws.amazon.com/redshift/latest/dg/c_SQL_commands.html
52+
- Crawl syntax examples from AWS Redshift documentation
53+
- Use real-world SQL examples when possible
54+
55+
3. **Test Categories**:
56+
- DDL: CREATE, ALTER, DROP statements
57+
- DML: SELECT, INSERT, UPDATE, DELETE
58+
- Redshift-specific: IDENTITY columns, DISTKEY, SORTKEY, etc.
59+
- Advanced: Window functions, CTEs, JSON operations
60+
61+
### Adding New Features
62+
63+
1. **Grammar Changes**:
64+
```bash
65+
# Edit RedshiftLexer.g4 or RedshiftParser.g4
66+
# Run build script to regenerate Go code
67+
make build
68+
```
69+
70+
2. **Engine-Specific Logic**:
71+
- Use `GetEngine()` method to detect Redshift vs PostgreSQL
72+
- Implement conditional parsing for dialect-specific features
73+
- See `engine_specific_test.go` for examples
74+
75+
3. **Testing Process**:
76+
- Create SQL test files in `examples/`
77+
- Run tests: `go test -v`
78+
- Verify both parsing success and error handling
79+
80+
### Common Tasks
81+
82+
#### Adding Redshift-Specific Syntax
83+
84+
1. Identify the syntax difference from PostgreSQL
85+
2. Update the appropriate grammar file (lexer or parser)
86+
3. Add engine-specific logic if needed
87+
4. Create test cases with AWS documentation examples
88+
5. Verify tests pass for both engines
89+
90+
#### Adding New Keywords
91+
92+
1. Add to `keywords.go` with appropriate reserved status
93+
2. Update lexer grammar if needed
94+
3. Test keyword recognition in various contexts
95+
96+
#### Adding Built-in Functions
97+
98+
1. Add to `builtin_function.go` in appropriate category
99+
2. Update parser rules if function has special syntax
100+
3. Test function parsing and recognition
101+
102+
## Build and Test Commands
103+
104+
**IMPORTANT**: Always run `./build.sh` before running tests to generate the latest Go code from ANTLR grammars.
105+
106+
```bash
107+
# Generate parser code from ANTLR grammars (REQUIRED before testing)
108+
make build
109+
110+
# Run all tests
111+
go test -v
112+
113+
# Run specific test
114+
go test -run TestParser -v
115+
116+
# Run benchmarks
117+
go test -bench=. -v
118+
```
119+
120+
## References
121+
122+
- [AWS Redshift SQL Commands](https://docs.aws.amazon.com/redshift/latest/dg/c_SQL_commands.html)
123+
- [ANTLR 4 Documentation](https://github.com/antlr/antlr4/blob/master/doc/index.md)
124+
- [PostgreSQL Grammar Reference](https://github.com/tunnelvisionlabs/antlr4-postgresql)
125+
126+
## Contributing
127+
128+
1. Always add test cases for new features
129+
2. Follow existing code patterns and conventions
130+
3. Test against both PostgreSQL and Redshift engines
131+
4. Use AWS documentation for accurate syntax examples
132+
5. Ensure all tests pass before submitting changes

redshift/Makefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
all: build test
2+
3+
build:
4+
antlr -Dlanguage=Go -package redshift -visitor -o . RedshiftLexer.g4 RedshiftParser.g4
5+
6+
test: go test -v -run TestRedshiftParser

0 commit comments

Comments
 (0)