-
Notifications
You must be signed in to change notification settings - Fork 236
Adding Query Validation Workflow #5514
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 25 commits
6bcc8f2
187b52f
e645bf9
fac336c
088acb9
0e8ea7e
872b8d1
71268b6
6991012
ecd58ae
d2c9a2c
994b81a
2dc87df
18fda2d
318734d
2b09da4
b9c4d0e
3b44020
db58bb8
f61e18b
95e4d85
ae88c80
52d50f1
62433df
1ddefbc
886d6db
3eb3520
458f024
e4b7a76
a92d5a5
09972fa
b631559
362d343
899c089
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | |||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,80 @@ | |||||||||||||||||
| name: Validate SumoLogic Queries | |||||||||||||||||
| on: | |||||||||||||||||
| push: | |||||||||||||||||
| paths: | |||||||||||||||||
| - '**/*.md' | |||||||||||||||||
| pull_request: | |||||||||||||||||
| paths: | |||||||||||||||||
| - '**/*.md' | |||||||||||||||||
|
|
|||||||||||||||||
| jobs: | |||||||||||||||||
| validate-queries: | |||||||||||||||||
| runs-on: ubuntu-latest | |||||||||||||||||
| steps: | |||||||||||||||||
| - uses: actions/checkout@v4 | |||||||||||||||||
| with: | |||||||||||||||||
| fetch-depth: 0 # Required for git diff detection | |||||||||||||||||
|
|
|||||||||||||||||
| - name: Check for SQL changes | |||||||||||||||||
| id: check-sql | |||||||||||||||||
| run: | | |||||||||||||||||
| # Get the base commit for comparison | |||||||||||||||||
| if [ "${{ github.event_name }}" = "pull_request" ]; then | |||||||||||||||||
| BASE_COMMIT="${{ github.event.pull_request.base.sha }}" | |||||||||||||||||
| else | |||||||||||||||||
| # For push events, compare with previous commit | |||||||||||||||||
| BASE_COMMIT="${{ github.event.before }}" | |||||||||||||||||
| fi | |||||||||||||||||
|
|
|||||||||||||||||
| echo "Base commit: $BASE_COMMIT" | |||||||||||||||||
| echo "Current commit: ${{ github.sha }}" | |||||||||||||||||
|
|
|||||||||||||||||
| # Get changed markdown files | |||||||||||||||||
| git diff --name-only --diff-filter=AM $BASE_COMMIT...${{ github.sha }} -- '**/*.md' > changed_files.txt | |||||||||||||||||
|
|
|||||||||||||||||
| if [ ! -s changed_files.txt ]; then | |||||||||||||||||
| echo "No markdown files changed" | |||||||||||||||||
| echo "sql_changed=false" >> $GITHUB_OUTPUT | |||||||||||||||||
| exit 0 | |||||||||||||||||
| fi | |||||||||||||||||
|
|
|||||||||||||||||
| echo "Changed markdown files:" | |||||||||||||||||
| cat changed_files.txt | |||||||||||||||||
|
|
|||||||||||||||||
| # Check if any of the changed files have SQL code block modifications | |||||||||||||||||
| SQL_CHANGED=false | |||||||||||||||||
| while IFS= read -r file; do | |||||||||||||||||
| if [ -f "$file" ]; then | |||||||||||||||||
| # Check if the diff contains changes to SQL code blocks | |||||||||||||||||
| if git diff $BASE_COMMIT...${{ github.sha }} -- "$file" | grep -E "^[+-].*\`\`\`(sql|sumo)" > /dev/null; then | |||||||||||||||||
| echo "SQL code block changes detected in: $file" | |||||||||||||||||
| SQL_CHANGED=true | |||||||||||||||||
| fi | |||||||||||||||||
| fi | |||||||||||||||||
| done < changed_files.txt | |||||||||||||||||
|
|
|||||||||||||||||
| echo "sql_changed=$SQL_CHANGED" >> $GITHUB_OUTPUT | |||||||||||||||||
| echo "SQL changes detected: $SQL_CHANGED" | |||||||||||||||||
|
|
|||||||||||||||||
| - name: Set up Python | |||||||||||||||||
| if: steps.check-sql.outputs.sql_changed == 'true' | |||||||||||||||||
| uses: actions/setup-python@v4 | |||||||||||||||||
| with: | |||||||||||||||||
| python-version: "3.10" | |||||||||||||||||
|
|
|||||||||||||||||
| - name: Install dependencies | |||||||||||||||||
| if: steps.check-sql.outputs.sql_changed == 'true' | |||||||||||||||||
| run: pip install requests python-dotenv | |||||||||||||||||
|
|
|||||||||||||||||
| - name: Validate queries | |||||||||||||||||
| if: steps.check-sql.outputs.sql_changed == 'true' | |||||||||||||||||
| working-directory: ./scripts | |||||||||||||||||
| env: | |||||||||||||||||
| SUMO_LOGIC_ACCESS_ID: ${{ secrets.SUMO_LOGIC_ACCESS_ID }} | |||||||||||||||||
| SUMO_LOGIC_ACCESS_KEY: ${{ secrets.SUMO_LOGIC_ACCESS_KEY }} | |||||||||||||||||
| run: | | |||||||||||||||||
| python validate_queries.py | |||||||||||||||||
|
|||||||||||||||||
|
|
|||||||||||||||||
| - name: Skip validation | |||||||||||||||||
| if: steps.check-sql.outputs.sql_changed == 'false' | |||||||||||||||||
| run: echo "No SQL code block changes detected, skipping validation" | |||||||||||||||||
|
Comment on lines
+12
to
+81
Check warningCode scanning / CodeQL Workflow does not contain permissions Medium
Actions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {contents: read}
Copilot AutofixAI 8 months ago To fix the issue, we will add a The
Suggested changeset
1
.github/workflows/validate-queries.yml
Copilot is powered by AI and may make mistakes. Always verify output.
Refresh and try again.
|
|||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,47 @@ | ||
| import os | ||
| import requests | ||
| from datetime import datetime, timedelta | ||
|
|
||
| class SumoLogicClient: | ||
| def __init__(self): | ||
| self.base_url = "https://long-api.sumologic.net/api/v1" | ||
| self.session = requests.Session() | ||
| self.session.headers.update({'Content-Type': 'application/json'}) | ||
| self.session.auth = ( | ||
| os.getenv("SUMO_LOGIC_ACCESS_ID"), | ||
| os.getenv("SUMO_LOGIC_ACCESS_KEY") | ||
| ) | ||
|
|
||
| def test_query(self, query): | ||
| """Execute a query in Sumo Logic and check for results""" | ||
| job_id = self._create_search_job(query) | ||
| status = self._wait_for_job(job_id) | ||
| return self._check_results(job_id) if status == "DONE GATHERING RESULTS" else False | ||
|
|
||
| def _create_search_job(self, query): | ||
| end_time = datetime.utcnow() | ||
| start_time = end_time - timedelta(hours=24) | ||
| payload = { | ||
| "query": query, | ||
| "from": start_time.isoformat() + "Z", | ||
| "to": end_time.isoformat() + "Z", | ||
| "timeZone": "UTC" | ||
| } | ||
| response = self.session.post(f"{self.base_url}/search/jobs", json=payload) | ||
| response.raise_for_status() | ||
| return response.json()["id"] | ||
|
|
||
| def _wait_for_job(self, job_id, max_attempts=10): | ||
| for _ in range(max_attempts): | ||
| response = self.session.get(f"{self.base_url}/search/jobs/{job_id}") | ||
| response.raise_for_status() | ||
| status = response.json()["state"] | ||
| if status in ["DONE GATHERING RESULTS", "CANCELLED"]: | ||
| return status | ||
| time.sleep(3) | ||
| return "TIMEOUT" | ||
|
|
||
| def _check_results(self, job_id): | ||
| response = self.session.get(f"{self.base_url}/search/jobs/{job_id}/messages") | ||
| response.raise_for_status() | ||
| return len(response.json()["messages"]) > 0 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,209 @@ | ||
| #!/usr/bin/env python3 | ||
| import re | ||
| import sys | ||
| import os | ||
| import json | ||
| import requests | ||
| from pathlib import Path | ||
| from datetime import datetime, timedelta | ||
|
|
||
| def get_repo_root(): | ||
| """Get absolute path to repository root""" | ||
| github_workspace = os.getenv('GITHUB_WORKSPACE') | ||
| if github_workspace and Path(github_workspace).exists(): | ||
| return Path(github_workspace) | ||
| return Path(__file__).parent.parent # Move up from scripts/ directory | ||
|
|
||
| def debug_environment(): | ||
| """Debug workspace structure""" | ||
| repo_root = get_repo_root() | ||
| print("::group::⚙️ Environment Debug") | ||
| print(f"📂 Repo root: {repo_root}") | ||
| print(f"📂 Working dir: {os.getcwd()}") | ||
| print("\n📁 Directory Structure:") | ||
| os.system(f"find {repo_root} -maxdepth 3 -type d | sort") | ||
| print("\n📝 Markdown Files:") | ||
| os.system(f"find {repo_root} -name '*.md' | head -n 20") | ||
| print("::endgroup::") | ||
| return repo_root | ||
|
|
||
| def get_changed_files(repo_root): | ||
| """Find Markdown files to validate""" | ||
| # First try to read from changed_files.txt if it exists (from GitHub workflow) | ||
| changed_files_path = repo_root / "changed_files.txt" | ||
| if changed_files_path.exists(): | ||
| try: | ||
| with open(changed_files_path) as f: | ||
| files = [line.strip() for line in f if line.strip()] | ||
| if files: | ||
| print(f"📦 Found {len(files)} changed Markdown files from workflow") | ||
| return [str(repo_root / f) for f in files] | ||
| except Exception as e: | ||
| print(f"::warning::Couldn't read changed_files.txt: {e}") | ||
|
|
||
| # Try GitHub PR context | ||
| if "GITHUB_EVENT_PATH" in os.environ: | ||
| try: | ||
| with open(os.environ["GITHUB_EVENT_PATH"]) as f: | ||
| pr_files = [ | ||
| str(repo_root / f['filename']) | ||
| for f in json.load(f).get('pull_request', {}).get('files', []) | ||
| if f['filename'].endswith('.md') | ||
| ] | ||
| if pr_files: | ||
| print(f"📦 Found {len(pr_files)} changed Markdown files") | ||
| return pr_files | ||
| except Exception as e: | ||
| print(f"::warning::Couldn't read PR data: {e}") | ||
|
|
||
| # Fallback: Scan docs directory | ||
| docs_dir = repo_root / "docs" | ||
| if docs_dir.exists(): | ||
| md_files = list(docs_dir.rglob("*.md")) | ||
| print(f"🔄 Scanning {len(md_files)} docs files") | ||
| return [str(f) for f in md_files] | ||
|
|
||
| print("::error::No Markdown files found in docs/ directory") | ||
| return [] | ||
|
|
||
| def extract_sql_queries(file_path): | ||
| """Extract SQL code blocks from markdown files""" | ||
| try: | ||
| with open(file_path, 'r', encoding='utf-8') as f: | ||
| content = f.read() | ||
|
|
||
| # Find SQL code blocks using regex | ||
| sql_pattern = r'```(?:sql|sumo)\s*(?:title="[^"]*")?\s*\n(.*?)```' | ||
| sql_blocks = re.findall(sql_pattern, content, re.DOTALL | re.IGNORECASE) | ||
|
|
||
| queries = [] | ||
| for block in sql_blocks: | ||
| # Clean up the query | ||
| query = block.strip() | ||
| if query and not query.startswith('#') and not query.startswith('//'): | ||
| queries.append(query) | ||
|
|
||
| return queries | ||
| except Exception as e: | ||
| print(f"::error::Error reading file {file_path}: {e}") | ||
| return [] | ||
|
|
||
| def validate_query_syntax(query): | ||
| """Basic syntax validation for SumoLogic queries""" | ||
| errors = [] | ||
|
|
||
| # Check for basic syntax issues | ||
| if '|' in query: | ||
| # Split by pipes to check operators | ||
| parts = [part.strip() for part in query.split('|')] | ||
| for i, part in enumerate(parts): | ||
| if not part: | ||
| errors.append(f"Empty pipe section at position {i}") | ||
|
|
||
| # Check for common operator patterns | ||
| if i > 0: # Skip the first part (search expression) | ||
| if not any(op in part.lower() for op in [ | ||
| 'where', 'parse', 'json', 'count', 'sum', 'avg', 'max', 'min', | ||
| 'timeslice', 'sort', 'top', 'bottom', 'fields', 'if', 'lookup', | ||
| 'join', 'extract', 'formatDate', 'toLowerCase', 'toUpperCase' | ||
| ]): | ||
| # This might be a custom function or valid operator we don't know about | ||
| pass | ||
|
|
||
| # Check for unmatched quotes | ||
| single_quotes = query.count("'") - query.count("\\'") | ||
| double_quotes = query.count('"') - query.count('\\"') | ||
|
|
||
| if single_quotes % 2 != 0: | ||
| errors.append("Unmatched single quotes") | ||
| if double_quotes % 2 != 0: | ||
| errors.append("Unmatched double quotes") | ||
|
|
||
| # Check for unmatched parentheses | ||
| paren_count = query.count('(') - query.count(')') | ||
| if paren_count != 0: | ||
| errors.append("Unmatched parentheses") | ||
|
|
||
| # Check for unmatched brackets | ||
| bracket_count = query.count('[') - query.count(']') | ||
| if bracket_count != 0: | ||
| errors.append("Unmatched square brackets") | ||
|
|
||
| return errors | ||
|
|
||
| def validate_file(file_path): | ||
| """Validate all SQL queries in a markdown file""" | ||
| print(f"🔍 Validating: {file_path}") | ||
|
|
||
| queries = extract_sql_queries(file_path) | ||
| if not queries: | ||
| print(f" ℹ️ No SQL queries found") | ||
| return True | ||
|
|
||
| print(f" 📊 Found {len(queries)} SQL queries") | ||
|
|
||
| all_valid = True | ||
| for i, query in enumerate(queries, 1): | ||
| errors = validate_query_syntax(query) | ||
| if errors: | ||
| all_valid = False | ||
| print(f" ❌ Query {i} has errors:") | ||
| for error in errors: | ||
| print(f" - {error}") | ||
| print(f" Query preview: {query[:100]}...") | ||
| else: | ||
| print(f" ✅ Query {i} passed basic syntax validation") | ||
|
|
||
| return all_valid | ||
|
|
||
| def main(): | ||
| repo_root = debug_environment() | ||
| changed_files = get_changed_files(repo_root) | ||
|
|
||
| if not changed_files: | ||
| print("::warning::No Markdown files to validate") | ||
| sys.exit(0) | ||
|
|
||
| print(f"📋 Validating {len(changed_files)} files...") | ||
|
|
||
| validation_results = [] | ||
| total_queries = 0 | ||
|
|
||
| for file_path in changed_files: | ||
| if os.path.exists(file_path): | ||
| result = validate_file(file_path) | ||
| validation_results.append((file_path, result)) | ||
|
|
||
| # Count queries for summary | ||
| queries = extract_sql_queries(file_path) | ||
| total_queries += len(queries) | ||
| else: | ||
| print(f"::warning::File not found: {file_path}") | ||
|
|
||
| # Summary | ||
| print("\n" + "="*60) | ||
| print("📊 VALIDATION SUMMARY") | ||
| print("="*60) | ||
|
|
||
| passed_files = sum(1 for _, result in validation_results if result) | ||
| failed_files = len(validation_results) - passed_files | ||
|
|
||
| print(f"📁 Files processed: {len(validation_results)}") | ||
| print(f"📊 Total SQL queries: {total_queries}") | ||
| print(f"✅ Files passed: {passed_files}") | ||
| print(f"❌ Files failed: {failed_files}") | ||
|
|
||
| if failed_files > 0: | ||
| print("\n❌ Files with validation errors:") | ||
| for file_path, result in validation_results: | ||
| if not result: | ||
| print(f" - {file_path}") | ||
|
|
||
| print("\n::error::SQL query validation failed!") | ||
| sys.exit(1) | ||
| else: | ||
| print("\n🎉 All SQL queries passed validation!") | ||
| sys.exit(0) | ||
|
|
||
| if __name__ == "__main__": | ||
| main() |
Uh oh!
There was an error while loading. Please reload this page.