bytebase · rebelice · Aug 4, 2025 · Aug 4, 2025 · Aug 4, 2025 · Aug 4, 2025
diff --git a/.github/workflows/antlr-lint.yml b/.github/workflows/antlr-lint.yml
@@ -0,0 +1,285 @@
+name: ANTLR Grammar Lint
+
+# This workflow runs the antlr-v4-linter on all ANTLR grammar files (.g4)
+# to ensure they follow best practices and coding standards.
+#
+# The linter will check for:
+# - Naming conventions (rules, tokens, labels)
+# - Grammar complexity issues
+# - Documentation requirements
+# - Performance optimizations
+# - Syntax and structural problems
+# - Token management best practices
+
+on:
+  # Trigger on push events that modify grammar files (only on main/master branch)
+  push:
+    branches:
+      - main
+      - master
+    paths:
+      - '**/*.g4'
+      - '.github/workflows/antlr-lint.yml'  # Re-run if workflow itself changes
+
+  # Trigger on pull requests that modify grammar files
+  pull_request:
+    paths:
+      - '**/*.g4'
+      - '.github/workflows/antlr-lint.yml'
+
+  # Allow manual triggering from Actions tab
+  workflow_dispatch:
+    inputs:
+      verbose:
+        description: 'Enable verbose output'
+        required: false
+        default: 'false'
+        type: choice
+        options:
+          - 'true'
+          - 'false'
+
+jobs:
+  lint-grammars:
+    name: Lint ANTLR Grammars
+    runs-on: ubuntu-latest
+
+    # Define the dialects to check
+    # Add new dialects here as they are added to the repository
+    strategy:
+      matrix:
+        include:
+          - dialect: redshift
+            path: redshift
+          # Future dialects can be added like:
+          # - dialect: postgres
+          #   path: postgres
+          # - dialect: mysql
+          #   path: mysql
+
+    steps:
+      # Step 1: Checkout the parser repository containing grammar files
+      - name: 📥 Checkout parser repository
+        uses: actions/checkout@v4
+        with:
+          path: parser
+
+      # Step 2: Checkout the antlr-v4-linter tool repository
+      - name: 📥 Checkout antlr-v4-linter tool
+        uses: actions/checkout@v4
+        with:
+          repository: bytebase/antlr-v4-linter
+          path: antlr-v4-linter
+
+      # Step 3: Set up Python environment
+      - name: 🐍 Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.10'
+          cache: 'pip'
+          cache-dependency-path: 'antlr-v4-linter/pyproject.toml'
+
+      # Step 4: Install the antlr-v4-linter tool
+      - name: 📦 Install antlr-v4-linter
+        run: |
+          echo "Installing antlr-v4-linter and its dependencies..."
+          cd antlr-v4-linter
+
+          # Install in editable mode for development
+          pip install -e .
+
+          # Verify installation was successful
+          echo "Verifying installation..."
+          which antlr-lint
+          antlr-lint --version || echo "Version command not available"
+
+          # Show available commands
+          echo "Available commands:"
+          antlr-lint --help
+
+      # Step 5: Create or check for configuration file
+      - name: ⚙️ Setup linter configuration
+        working-directory: parser
+        run: |
+          # Check if a custom configuration exists for this dialect
+          if [ -f "${{ matrix.path }}/antlr-lint.json" ]; then
+            echo "✅ Found custom configuration for ${{ matrix.dialect }}"
+            echo "CONFIG_FILE=${{ matrix.path }}/antlr-lint.json" >> $GITHUB_ENV
+          elif [ -f "antlr-lint.json" ]; then
+            echo "✅ Found global configuration"
+            echo "CONFIG_FILE=antlr-lint.json" >> $GITHUB_ENV
+          else
+            echo "ℹ️ No configuration found, using defaults"
+            echo "CONFIG_FILE=" >> $GITHUB_ENV
+          fi
+
+      # Step 6: Run the linter on grammar files with detailed output
+      - name: 🔍 Lint ${{ matrix.dialect }} grammar files
+        working-directory: parser
+        run: |
+          echo "========================================="
+          echo "Linting ANTLR grammar files for: ${{ matrix.dialect }}"
+          echo "Path: ${{ matrix.path }}"
+          echo "========================================="
+          echo ""
+
+          # Initialize counters
+          total_files=0
+          failed_files=0
+          passed_files=0
+
+          # Create a temporary file to store all issues
+          issues_file=$(mktemp)
+
+          # Process each .g4 file
+          for file in $(find ${{ matrix.path }} -name "*.g4" -type f | sort); do
+            total_files=$((total_files + 1))
+            echo "📄 Checking: $file"
+            echo "----------------------------------------"
+
+            # Prepare config option if config file exists
+            config_opt=""
+            if [ -n "$CONFIG_FILE" ]; then
+              config_opt="--config $CONFIG_FILE"
+            fi
+
+            # Add verbose flag if requested
+            verbose_opt=""
+            if [ "${{ github.event.inputs.verbose }}" = "true" ]; then
+              verbose_opt="--verbose"
+            fi
+
+            # Run linter and capture output
+            output_file=$(mktemp)
+            antlr-lint lint $verbose_opt $config_opt "$file" 2>&1 | tee "$output_file"
+
+            # Check if there are any errors or warnings in the output
+            if grep -E "ERROR|WARNING" "$output_file" > /dev/null; then
+              echo "❌ FAILED: Issues detected"
+              failed_files=$((failed_files + 1))
+
+              # Parse the output for GitHub annotations
+              # Look for the table format with Location, Severity, Rule, and Message
+              while IFS= read -r line; do
+                # Look for lines with the format: │ 827:1    │  ERROR   │ S001 │ Message...
+                if echo "$line" | grep -E "^│ [0-9]+:[0-9]+" > /dev/null; then
+                  # Extract location (line:column)
+                  location=$(echo "$line" | sed -n 's/^│ *\([0-9]*:[0-9]*\).*/\1/p' | tr -d ' ')
+                  line_num=$(echo "$location" | cut -d: -f1)
+                  col_num=$(echo "$location" | cut -d: -f2)
+
+                  # Extract severity
+                  if echo "$line" | grep -i "ERROR" > /dev/null; then
+                    severity="error"
+                  elif echo "$line" | grep -i "WARNING" > /dev/null; then
+                    severity="warning"
+                  else
+                    severity="notice"
+                  fi
+
+                  # Extract rule code
+                  rule=$(echo "$line" | sed -n 's/.*│ *\([A-Z][0-9]*\) *│.*/\1/p')
+
+                  # Extract message - everything after the rule code
+                  message=$(echo "$line" | sed -n 's/.*│ [A-Z][0-9]* *│ *\(.*\) *│$/\1/p' | sed 's/ *$//')
+
+                  # Output GitHub annotation
+                  if [ -n "$col_num" ] && [ "$col_num" != "1" ]; then
+                    echo "::${severity} file=${file},line=${line_num},col=${col_num}::[$rule] ${message}"
+                  else
+                    echo "::${severity} file=${file},line=${line_num}::[$rule] ${message}"
+                  fi
+
+                  # Store for summary
+                  echo "${severity}: ${file}:${line_num} - [$rule] ${message}" >> "$issues_file"
+                fi
+              done < "$output_file"
+
+              # If no specific line annotations were found, create a general file-level annotation
+              if ! grep -q "::" "$output_file"; then
+                echo "::error file=${file}::ANTLR grammar linting failed. Check the workflow logs for details."
+              fi
+            else
+              echo "✅ PASSED: No issues found"
+              passed_files=$((passed_files + 1))
+            fi
+
+            rm -f "$output_file"
+            echo ""
+          done
+
+          # Summary statistics
+          echo "========================================="
+          echo "📊 Linting Summary for ${{ matrix.dialect }}"
+          echo "========================================="
+          echo "Total files checked: ${total_files}"
+          echo "✅ Passed: ${passed_files}"
+          echo "❌ Failed: ${failed_files}"
+          echo ""
+
+          # If there were failures, show a summary of issues
+          if [ $failed_files -gt 0 ]; then
+            echo "📋 Issues Summary:"
+            echo "----------------------------------------"
+            cat "$issues_file" | sort | uniq
+            echo ""
+            echo "❌ Grammar linting failed for ${{ matrix.dialect }}"
+            echo "Please fix the issues above and try again."
+            rm -f "$issues_file"
+            exit 1
+          else
+            echo "✅ All grammar files for ${{ matrix.dialect }} passed linting!"
+            rm -f "$issues_file"
+          fi
+
+      # Step 7: Upload linter results as artifacts (useful for debugging)
+      - name: 📤 Upload linting results
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: linting-results-${{ matrix.dialect }}
+          path: |
+            parser/${{ matrix.path }}/*.g4
+            parser/antlr-lint.json
+            parser/${{ matrix.path }}/antlr-lint.json
+          retention-days: 7
+
+      # Step 8: Create job summary
+      - name: 📝 Create job summary
+        if: always()
+        working-directory: parser
+        run: |
+          echo "# 🔍 ANTLR Grammar Lint Results" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+
+          echo "## Dialect: \`${{ matrix.dialect }}\`" >> $GITHUB_STEP_SUMMARY
+          echo "**Path:** \`${{ matrix.path }}\`" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+
+          echo "### 📄 Grammar Files Checked:" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+
+          echo "| File | Status |" >> $GITHUB_STEP_SUMMARY
+          echo "|------|--------|" >> $GITHUB_STEP_SUMMARY
+
+          # Get config file path from environment
+          config_opt=""
+          if [ -f "${{ matrix.path }}/antlr-lint.json" ]; then
+            config_opt="--config ${{ matrix.path }}/antlr-lint.json"
+          elif [ -f "antlr-lint.json" ]; then
+            config_opt="--config antlr-lint.json"
+          fi
+
+          for file in $(find ${{ matrix.path }} -name "*.g4" -type f | sort); do
+            # Check if this file had issues (ignore exit code)
+            output=$(antlr-lint lint $config_opt "$file" 2>&1 || true)
+            if echo "$output" | grep -E "ERROR|WARNING" > /dev/null; then
+              echo "| \`${file}\` | ❌ Failed |" >> $GITHUB_STEP_SUMMARY
+            else
+              echo "| \`${file}\` | ✅ Passed |" >> $GITHUB_STEP_SUMMARY
+            fi
+          done
+
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "---" >> $GITHUB_STEP_SUMMARY
+          echo "*Workflow run at: $(date -u '+%Y-%m-%d %H:%M:%S UTC')*" >> $GITHUB_STEP_SUMMARY
diff --git a/antlr-lint.json b/antlr-lint.json
@@ -0,0 +1,102 @@
+{
+  "rules": {
+    "S001": {
+      "description": "Main parser rule should end with EOF token",
+      "enabled": true,
+      "severity": "error"
+    },
+    "S002": {
+      "description": "Duplicate rule definition",
+      "enabled": true,
+      "severity": "warning"
+    },
+    "S003": {
+      "description": "Invalid grammar declaration",
+      "enabled": true,
+      "severity": "error"
+    },
+    "N001": {
+      "description": "Parser rule should start with lowercase letter",
+      "enabled": true,
+      "severity": "error"
+    },
+    "N002": {
+      "description": "Lexer rule should start with uppercase letter",
+      "enabled": true,
+      "severity": "error"
+    },
+    "N003": {
+      "description": "Rule name doesn't follow camelCase/UPPER_CASE convention",
+      "enabled": true,
+      "severity": "warning"
+    },
+    "L001": {
+      "description": "Missing labels for alternatives in parser rules",
+      "enabled": true,
+      "severity": "warning"
+    },
+    "L002": {
+      "description": "Inconsistent label naming convention",
+      "enabled": true,
+      "severity": "info"
+    },
+    "L003": {
+      "description": "Duplicate label names within the same rule",
+      "enabled": true,
+      "severity": "warning"
+    },
+    "C001": {
+      "description": "Rule exceeds complexity thresholds",
+      "enabled": true,
+      "severity": "warning",
+      "thresholds": {
+        "maxAlternatives": 10,
+        "maxNestingDepth": 5,
+        "maxTokens": 50
+      }
+    },
+    "C002": {
+      "description": "Deeply nested rule structure",
+      "enabled": true,
+      "severity": "warning"
+    },
+    "C003": {
+      "description": "Very long rule definition (consider breaking it up)",
+      "enabled": true,
+      "severity": "info"
+    },
+    "T001": {
+      "description": "Overlapping token definitions",
+      "enabled": true,
+      "severity": "warning"
+    },
+    "T002": {
+      "description": "Unreachable token rule",
+      "enabled": true,
+      "severity": "warning"
+    },
+    "T003": {
+      "description": "Token defined but never used",
+      "enabled": true,
+      "severity": "info"
+    },
+    "E001": {
+      "description": "Missing error recovery strategy",
+      "enabled": false,
+      "severity": "info"
+    },
+    "E002": {
+      "description": "Potential ambiguity in grammar",
+      "enabled": true,
+      "severity": "warning"
+    }
+  },
+  "excludePatterns": [
+    "*.generated.g4",
+    "*Test*.g4",
+    "node_modules/**/*.g4",
+    "dist/**/*.g4",
+    "build/**/*.g4"
+  ],
+  "outputFormat": "text"
+}
diff --git a/redshift/RedshiftLexer.g4 b/redshift/RedshiftLexer.g4
@@ -3926,3 +3926,4 @@ EndDollarStringConstant
    {p.isTag()}?
    {l.popTag();} -> popMode
    ;
+
Original file line number	Diff line number	Diff line change
Expand Up		@@ -3926,3 +3926,4 @@ EndDollarStringConstant
		{p.isTag()}?
		{l.popTag();} -> popMode
		;