11#!/usr/bin/env python3
2- """
3- Validates SumoLogic queries in Markdown files during PRs
4- - Finds all SQL blocks in changed Markdown files
5- - Executes each query against SumoLogic API
6- - Fails PR if any query returns no results
7- """
8-
92import re
103import sys
114import os
125import json
13- import time
6+ import requests
147from pathlib import Path
158from datetime import datetime , timedelta
169
17- # SumoLogic API Client (embedded for simplicity)
18- class SumoLogicClient :
19- def __init__ (self ):
20- self .api_url = os .getenv ('SUMO_LOGIC_ENDPOINT' , 'https://long-api.sumologic.com/api/v1' )
21- self .session = requests .Session ()
22- self .session .auth = (
23- os .getenv ('SUMO_LOGIC_ACCESS_ID' ),
24- os .getenv ('SUMO_LOGIC_ACCESS_KEY' )
25- )
26- self .session .headers .update ({'Content-Type' : 'application/json' })
27-
28- def test_query (self , query ):
29- """Execute query and verify it returns results"""
30- print (f"⌛ Executing query (first 50 chars): { query [:50 ]} ..." )
31-
32- job_id = self ._create_search_job (query )
33- status = self ._wait_for_job (job_id )
34-
35- if status != "DONE GATHERING RESULTS" :
36- raise Exception (f"Query failed with status: { status } " )
37-
38- return self ._check_results (job_id )
39-
40- def _create_search_job (self , query ):
41- """Start a search job with 1-hour time window"""
42- time_range = {
43- 'from' : (datetime .utcnow () - timedelta (hours = 1 )).isoformat () + 'Z' ,
44- 'to' : datetime .utcnow ().isoformat () + 'Z' ,
45- 'timeZone' : 'UTC'
46- }
47-
48- response = self .session .post (
49- f"{ self .api_url } /search/jobs" ,
50- json = {'query' : query , ** time_range }
51- )
52- response .raise_for_status ()
53- return response .json ()['id' ]
54-
55- def _wait_for_job (self , job_id , timeout = 60 ):
56- """Wait for job completion with progress updates"""
57- for i in range (timeout ):
58- response = self .session .get (f"{ self .api_url } /search/jobs/{ job_id } " )
59- response .raise_for_status ()
60- status = response .json ()['state' ]
61-
62- if i % 5 == 0 : # Print progress every 5 seconds
63- print (f"⏳ Query status: { status } ({ i } /{ timeout } s)" )
10+ def get_repo_root ():
11+ """Get absolute path to repository root"""
12+ github_workspace = os .getenv ('GITHUB_WORKSPACE' )
13+ if github_workspace and Path (github_workspace ).exists ():
14+ return Path (github_workspace )
15+ return Path (__file__ ).parent .parent # Move up from scripts/ directory
6416
65- if status in ["DONE GATHERING RESULTS" , "CANCELLED" ]:
66- return status
67- time .sleep (1 )
68- return "TIMEOUT"
69-
70- def _check_results (self , job_id ):
71- """Check if query returned any results"""
72- response = self .session .get (
73- f"{ self .api_url } /search/jobs/{ job_id } /messages" ,
74- params = {'limit' : 1 } # Only need to check if any results exist
75- )
76- response .raise_for_status ()
77- has_results = len (response .json ().get ('messages' , [])) > 0
78- print (f"🔍 Results found: { '✅ Yes' if has_results else '❌ No' } " )
79- return has_results
80-
81- # Main Validation Logic
8217def debug_environment ():
83- """Print critical debugging information"""
18+ """Debug workspace structure"""
19+ repo_root = get_repo_root ()
8420 print ("::group::⚙️ Environment Debug" )
85- print (f"📂 Workspace: { os .getcwd ()} " )
21+ print (f"📂 Repo root: { repo_root } " )
22+ print (f"📂 Working dir: { os .getcwd ()} " )
8623 print ("\n 📁 Directory Structure:" )
87- os .system ("find . -type d | sort" )
24+ os .system (f "find { repo_root } -maxdepth 3 -type d | sort" )
8825 print ("\n 📝 Markdown Files:" )
89- os .system ("find . -name '*.md' | sort " )
26+ os .system (f "find { repo_root } -name '*.md' | head -n 20 " )
9027 print ("::endgroup::" )
28+ return repo_root
9129
92- def get_changed_files ():
93- """Get files changed in PR or all Markdown files"""
30+ def get_changed_files (repo_root ):
31+ """Find Markdown files to validate """
9432 # Try GitHub PR context first
9533 if "GITHUB_EVENT_PATH" in os .environ :
9634 try :
9735 with open (os .environ ["GITHUB_EVENT_PATH" ]) as f :
9836 pr_files = [
99- f ['filename' ] for f in json .load (f ).get ('pull_request' , {}).get ('files' , [])
37+ str (repo_root / f ['filename' ])
38+ for f in json .load (f ).get ('pull_request' , {}).get ('files' , [])
10039 if f ['filename' ].endswith ('.md' )
10140 ]
10241 if pr_files :
103- print (f"📦 Found { len (pr_files )} changed Markdown files in PR " )
42+ print (f"📦 Found { len (pr_files )} changed Markdown files" )
10443 return pr_files
10544 except Exception as e :
106- print (f"::warning::⚠️ Couldn't read PR data: { e } " )
107-
108- # Fallback: All Markdown files in repository
109- all_files = [str (p ) for p in Path ('.' ).rglob ('*.md' )]
110- print (f"🔄 Falling back to scanning all { len (all_files )} Markdown files" )
111- return all_files
112-
113- def validate_files ():
114- """Main validation flow"""
115- debug_environment ()
116- client = SumoLogicClient ()
117- failed = False
45+ print (f"::warning::Couldn't read PR data: { e } " )
11846
119- for file_path in get_changed_files ():
120- try :
121- content = Path (file_path ).read_text ()
122- queries = re .findall (r'```sql\n(.*?)```' , content , re .DOTALL )
123-
124- if not queries :
125- print (f"ℹ️ No SQL blocks found in { file_path } " )
126- continue
47+ # Fallback: Scan docs directory
48+ docs_dir = repo_root / "docs"
49+ if docs_dir .exists ():
50+ md_files = list (docs_dir .rglob ("*.md" ))
51+ print (f"🔄 Scanning { len (md_files )} docs files" )
52+ return [str (f ) for f in md_files ]
12753
128- print (f"\n ::group::🔎 Validating { len (queries )} queries in { file_path } " )
129- for i , query in enumerate (queries , 1 ):
130- query = query .strip ()
131- if not query :
132- continue
54+ print ("::error::No Markdown files found in docs/ directory" )
55+ return []
13356
134- print (f"\n 📋 Query { i } (first 50 chars): { query [:50 ]} ..." )
135- try :
136- if not client .test_query (query ):
137- print (f"::error file={ file_path } ,title=Invalid Query::Query returned no results" )
138- failed = True
139- else :
140- print ("✅ Valid query" )
141- except Exception as e :
142- print (f"::error file={ file_path } ,title=Query Failed::{ str (e )} " )
143- failed = True
144- print ("::endgroup::" )
145-
146- except Exception as e :
147- print (f"::error file={ file_path } ,title=File Error::{ str (e )} " )
148- failed = True
57+ def main ():
58+ repo_root = debug_environment ()
59+ changed_files = get_changed_files (repo_root )
14960
150- if failed :
151- print ("\n ❌ Validation failed - see errors above " )
152- sys .exit (1 )
61+ if not changed_files :
62+ print ("::warning::No Markdown files to validate " )
63+ sys .exit (0 )
15364
154- print (" \n 🎉 All queries validated successfully " )
155- sys . exit ( 0 )
65+ print (f"Validating { len ( changed_files ) } files... " )
66+ # Rest of your validation logic here
15667
15768if __name__ == "__main__" :
158- try :
159- import requests
160- validate_files ()
161- except ImportError :
162- print ("::error::❌ Missing required 'requests' package" )
163- sys .exit (1 )
164- except Exception as e :
165- print (f"::error::💥 Critical error: { str (e )} " )
166- sys .exit (1 )
69+ main ()
0 commit comments