Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
285 changes: 285 additions & 0 deletions .github/workflows/antlr-lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,285 @@
name: ANTLR Grammar Lint

# This workflow runs the antlr-v4-linter on all ANTLR grammar files (.g4)
# to ensure they follow best practices and coding standards.
#
# The linter will check for:
# - Naming conventions (rules, tokens, labels)
# - Grammar complexity issues
# - Documentation requirements
# - Performance optimizations
# - Syntax and structural problems
# - Token management best practices

on:
# Trigger on push events that modify grammar files (only on main/master branch)
push:
branches:
- main
- master
paths:
- '**/*.g4'
- '.github/workflows/antlr-lint.yml' # Re-run if workflow itself changes

# Trigger on pull requests that modify grammar files
pull_request:
paths:
- '**/*.g4'
- '.github/workflows/antlr-lint.yml'

# Allow manual triggering from Actions tab
workflow_dispatch:
inputs:
verbose:
description: 'Enable verbose output'
required: false
default: 'false'
type: choice
options:
- 'true'
- 'false'

jobs:
lint-grammars:
name: Lint ANTLR Grammars
runs-on: ubuntu-latest

# Define the dialects to check
# Add new dialects here as they are added to the repository
strategy:
matrix:
include:
- dialect: redshift
path: redshift
# Future dialects can be added like:
# - dialect: postgres
# path: postgres
# - dialect: mysql
# path: mysql

steps:
# Step 1: Checkout the parser repository containing grammar files
- name: 📥 Checkout parser repository
uses: actions/checkout@v4
with:
path: parser

# Step 2: Checkout the antlr-v4-linter tool repository
- name: 📥 Checkout antlr-v4-linter tool
uses: actions/checkout@v4
with:
repository: bytebase/antlr-v4-linter
path: antlr-v4-linter

# Step 3: Set up Python environment
- name: 🐍 Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
cache: 'pip'
cache-dependency-path: 'antlr-v4-linter/pyproject.toml'

# Step 4: Install the antlr-v4-linter tool
- name: 📦 Install antlr-v4-linter
run: |
echo "Installing antlr-v4-linter and its dependencies..."
cd antlr-v4-linter

# Install in editable mode for development
pip install -e .

# Verify installation was successful
echo "Verifying installation..."
which antlr-lint
antlr-lint --version || echo "Version command not available"

# Show available commands
echo "Available commands:"
antlr-lint --help

# Step 5: Create or check for configuration file
- name: ⚙️ Setup linter configuration
working-directory: parser
run: |
# Check if a custom configuration exists for this dialect
if [ -f "${{ matrix.path }}/antlr-lint.json" ]; then
echo "✅ Found custom configuration for ${{ matrix.dialect }}"
echo "CONFIG_FILE=${{ matrix.path }}/antlr-lint.json" >> $GITHUB_ENV
elif [ -f "antlr-lint.json" ]; then
echo "✅ Found global configuration"
echo "CONFIG_FILE=antlr-lint.json" >> $GITHUB_ENV
else
echo "ℹ️ No configuration found, using defaults"
echo "CONFIG_FILE=" >> $GITHUB_ENV
fi

# Step 6: Run the linter on grammar files with detailed output
- name: 🔍 Lint ${{ matrix.dialect }} grammar files
working-directory: parser
run: |
echo "========================================="
echo "Linting ANTLR grammar files for: ${{ matrix.dialect }}"
echo "Path: ${{ matrix.path }}"
echo "========================================="
echo ""

# Initialize counters
total_files=0
failed_files=0
passed_files=0

# Create a temporary file to store all issues
issues_file=$(mktemp)

# Process each .g4 file
for file in $(find ${{ matrix.path }} -name "*.g4" -type f | sort); do
total_files=$((total_files + 1))
echo "📄 Checking: $file"
echo "----------------------------------------"

# Prepare config option if config file exists
config_opt=""
if [ -n "$CONFIG_FILE" ]; then
config_opt="--config $CONFIG_FILE"
fi

# Add verbose flag if requested
verbose_opt=""
if [ "${{ github.event.inputs.verbose }}" = "true" ]; then
verbose_opt="--verbose"
fi

# Run linter and capture output
output_file=$(mktemp)
antlr-lint lint $verbose_opt $config_opt "$file" 2>&1 | tee "$output_file"

# Check if there are any errors or warnings in the output
if grep -E "ERROR|WARNING" "$output_file" > /dev/null; then
echo "❌ FAILED: Issues detected"
failed_files=$((failed_files + 1))

# Parse the output for GitHub annotations
# Look for the table format with Location, Severity, Rule, and Message
while IFS= read -r line; do
# Look for lines with the format: │ 827:1 │ ERROR │ S001 │ Message...
if echo "$line" | grep -E "^│ [0-9]+:[0-9]+" > /dev/null; then
# Extract location (line:column)
location=$(echo "$line" | sed -n 's/^│ *\([0-9]*:[0-9]*\).*/\1/p' | tr -d ' ')
line_num=$(echo "$location" | cut -d: -f1)
col_num=$(echo "$location" | cut -d: -f2)

# Extract severity
if echo "$line" | grep -i "ERROR" > /dev/null; then
severity="error"
elif echo "$line" | grep -i "WARNING" > /dev/null; then
severity="warning"
else
severity="notice"
fi

# Extract rule code
rule=$(echo "$line" | sed -n 's/.*│ *\([A-Z][0-9]*\) *│.*/\1/p')

# Extract message - everything after the rule code
message=$(echo "$line" | sed -n 's/.*│ [A-Z][0-9]* *│ *\(.*\) *│$/\1/p' | sed 's/ *$//')

# Output GitHub annotation
if [ -n "$col_num" ] && [ "$col_num" != "1" ]; then
echo "::${severity} file=${file},line=${line_num},col=${col_num}::[$rule] ${message}"
else
echo "::${severity} file=${file},line=${line_num}::[$rule] ${message}"
fi

# Store for summary
echo "${severity}: ${file}:${line_num} - [$rule] ${message}" >> "$issues_file"
fi
done < "$output_file"

# If no specific line annotations were found, create a general file-level annotation
if ! grep -q "::" "$output_file"; then
echo "::error file=${file}::ANTLR grammar linting failed. Check the workflow logs for details."
fi
else
echo "✅ PASSED: No issues found"
passed_files=$((passed_files + 1))
fi

rm -f "$output_file"
echo ""
done

# Summary statistics
echo "========================================="
echo "📊 Linting Summary for ${{ matrix.dialect }}"
echo "========================================="
echo "Total files checked: ${total_files}"
echo "✅ Passed: ${passed_files}"
echo "❌ Failed: ${failed_files}"
echo ""

# If there were failures, show a summary of issues
if [ $failed_files -gt 0 ]; then
echo "📋 Issues Summary:"
echo "----------------------------------------"
cat "$issues_file" | sort | uniq
echo ""
echo "❌ Grammar linting failed for ${{ matrix.dialect }}"
echo "Please fix the issues above and try again."
rm -f "$issues_file"
exit 1
else
echo "✅ All grammar files for ${{ matrix.dialect }} passed linting!"
rm -f "$issues_file"
fi

# Step 7: Upload linter results as artifacts (useful for debugging)
- name: 📤 Upload linting results
if: failure()
uses: actions/upload-artifact@v4
with:
name: linting-results-${{ matrix.dialect }}
path: |
parser/${{ matrix.path }}/*.g4
parser/antlr-lint.json
parser/${{ matrix.path }}/antlr-lint.json
retention-days: 7

# Step 8: Create job summary
- name: 📝 Create job summary
if: always()
working-directory: parser
run: |
echo "# 🔍 ANTLR Grammar Lint Results" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY

echo "## Dialect: \`${{ matrix.dialect }}\`" >> $GITHUB_STEP_SUMMARY
echo "**Path:** \`${{ matrix.path }}\`" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY

echo "### 📄 Grammar Files Checked:" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY

echo "| File | Status |" >> $GITHUB_STEP_SUMMARY
echo "|------|--------|" >> $GITHUB_STEP_SUMMARY

# Get config file path from environment
config_opt=""
if [ -f "${{ matrix.path }}/antlr-lint.json" ]; then
config_opt="--config ${{ matrix.path }}/antlr-lint.json"
elif [ -f "antlr-lint.json" ]; then
config_opt="--config antlr-lint.json"
fi

for file in $(find ${{ matrix.path }} -name "*.g4" -type f | sort); do
# Check if this file had issues (ignore exit code)
output=$(antlr-lint lint $config_opt "$file" 2>&1 || true)
if echo "$output" | grep -E "ERROR|WARNING" > /dev/null; then
echo "| \`${file}\` | ❌ Failed |" >> $GITHUB_STEP_SUMMARY
else
echo "| \`${file}\` | ✅ Passed |" >> $GITHUB_STEP_SUMMARY
fi
done

echo "" >> $GITHUB_STEP_SUMMARY
echo "---" >> $GITHUB_STEP_SUMMARY
echo "*Workflow run at: $(date -u '+%Y-%m-%d %H:%M:%S UTC')*" >> $GITHUB_STEP_SUMMARY
102 changes: 102 additions & 0 deletions antlr-lint.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
{
"rules": {
"S001": {
"description": "Main parser rule should end with EOF token",
"enabled": true,
"severity": "error"
},
"S002": {
"description": "Duplicate rule definition",
"enabled": true,
"severity": "warning"
},
"S003": {
"description": "Invalid grammar declaration",
"enabled": true,
"severity": "error"
},
"N001": {
"description": "Parser rule should start with lowercase letter",
"enabled": true,
"severity": "error"
},
"N002": {
"description": "Lexer rule should start with uppercase letter",
"enabled": true,
"severity": "error"
},
"N003": {
"description": "Rule name doesn't follow camelCase/UPPER_CASE convention",
"enabled": true,
"severity": "warning"
},
"L001": {
"description": "Missing labels for alternatives in parser rules",
"enabled": true,
"severity": "warning"
},
"L002": {
"description": "Inconsistent label naming convention",
"enabled": true,
"severity": "info"
},
"L003": {
"description": "Duplicate label names within the same rule",
"enabled": true,
"severity": "warning"
},
"C001": {
"description": "Rule exceeds complexity thresholds",
"enabled": true,
"severity": "warning",
"thresholds": {
"maxAlternatives": 10,
"maxNestingDepth": 5,
"maxTokens": 50
}
},
"C002": {
"description": "Deeply nested rule structure",
"enabled": true,
"severity": "warning"
},
"C003": {
"description": "Very long rule definition (consider breaking it up)",
"enabled": true,
"severity": "info"
},
"T001": {
"description": "Overlapping token definitions",
"enabled": true,
"severity": "warning"
},
"T002": {
"description": "Unreachable token rule",
"enabled": true,
"severity": "warning"
},
"T003": {
"description": "Token defined but never used",
"enabled": true,
"severity": "info"
},
"E001": {
"description": "Missing error recovery strategy",
"enabled": false,
"severity": "info"
},
"E002": {
"description": "Potential ambiguity in grammar",
"enabled": true,
"severity": "warning"
}
},
"excludePatterns": [
"*.generated.g4",
"*Test*.g4",
"node_modules/**/*.g4",
"dist/**/*.g4",
"build/**/*.g4"
],
"outputFormat": "text"
}
1 change: 1 addition & 0 deletions redshift/RedshiftLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -3926,3 +3926,4 @@ EndDollarStringConstant
{p.isTag()}?
{l.popTag();} -> popMode
;

Loading