1- # #!/usr/bin/env python3
2- # import re
3- # import sys
4- # import os
5- # import json
6- # from pathlib import Path
7-
8-
9-
10- #
11- # def debug_environment():
12- # """Print critical debugging info"""
13- # print("::group::Debug Information")
14- # print(f"Current directory: {os.getcwd()}")
15- # print("Directory contents:")
16- # os.system("ls -R")
17- # print(f"Environment: {dict(os.environ)}")
18- # print("::endgroup::")
19- #
20- # def get_changed_files():
21- # changed_files = []
22- #
23- # # Try GitHub's PR context first
24- # if "GITHUB_EVENT_PATH" in os.environ:
25- # try:
26- # import json
27- # with open(os.environ["GITHUB_EVENT_PATH"]) as f:
28- # event = json.load(f)
29- # changed_files = [
30- # f['filename'] for f in event.get('pull_request', {}).get('files', [])
31- # if f['filename'].endswith('.md')
32- # ]
33- # except Exception as e:
34- # print(f"::warning::Failed to get PR files: {e}")
35- #
36- # # Fallback: Find all Markdown files if PR context fails
37- # if not changed_files:
38- # changed_files = [str(p) for p in Path('.').rglob('*.md')]
39- #
40- # return changed_files
41- #
42- #
43- # def find_sql_blocks_in_pr():
44- # # """Detect changed SQL blocks with better debugging"""
45- # # print("::group::Detecting SQL blocks") # GitHub Actions log grouping
46- # #
47- # # # Get changed files from environment if running in GitHub Actions
48- # # changed_files = sys.argv[1:] if len(sys.argv) > 1 else []
49- # # if not changed_files and "GITHUB_ACTIONS" in os.environ:
50- # # try:
51- # # with open(os.environ["GITHUB_EVENT_PATH"]) as f:
52- # # event_data = json.load(f)
53- # # changed_files = [
54- # # f"docs/{f['filename']}" for f in
55- # # event_data.get("pull_request", {}).get("files", [])
56- # # if f['filename'].endswith('.md')
57- # # ]
58- # # except Exception as e:
59- # # print(f"::warning::Couldn't get changed files: {str(e)}")
60- # #
61- # # if not changed_files:
62- # # changed_files = [
63- # # str(p) for p in Path("docs").rglob("*.md")
64- # # if "search-query-language" in str(p)
65- # # ]
66- # #
67- # # print(f"Files to scan: {changed_files}")
68- # # return changed_files
69- # """Extract all SQL blocks from Markdown file"""
70- # content = Path(file_path).read_text()
71- # return re.findall(r'```sql\n(.*?)```', content, re.DOTALL)
72- #
73- # def validate_queries():
74- # debug_environment()
75- #
76- # changed_files = get_changed_files()
77- # print(f"::group::Files to validate")
78- # print("\n".join(changed_files) or "No files found")
79- # print("::endgroup::")
80- #
81- # if not changed_files:
82- # print("::warning::No Markdown files found to validate")
83- # return
84- # print("::group::Starting validation")
85- # client = SumoLogicClient()
86- # failed = False
87- #
88- # for file, query in find_sql_blocks_with_content():
89- # print(f"\n🔍 Validating query in {file}")
90- # print(f"Query sample:\n{query[:200]}...") # Show first 200 chars
91- #
92- # try:
93- # print("Calling Sumo Logic API...")
94- # if not client.test_query(query):
95- # print(f"::error file={file},title=Query Validation Failed::Query returned no results")
96- # failed = True
97- # else:
98- # print("✅ Query validated successfully")
99- # except Exception as e:
100- # print(f"::error file={file},title=Query Execution Failed::{str(e)}")
101- # failed = True
102- #
103- # print("::endgroup::")
104- # if failed:
105- # sys.exit(1)
106- #
107- # def find_sql_blocks_with_content():
108- # """Yields (file_path, query) tuples with better error handling"""
109- # for file in find_sql_blocks_in_pr():
110- # try:
111- # content = Path(file).read_text()
112- # queries = re.findall(r'```sql\n(.*?)```', content, re.DOTALL)
113- # for query in queries:
114- # query = query.strip()
115- # if query: # Skip empty queries
116- # yield (file, query)
117- # except Exception as e:
118- # print(f"::warning file={file}::Error processing file: {str(e)}")
119- #
120- # if __name__ == "__main__":
121- # validate_queries()
122-
1231#!/usr/bin/env python3
2+ """
3+ Validates SumoLogic queries in Markdown files during PRs
4+ - Finds all SQL blocks in changed Markdown files
5+ - Executes each query against SumoLogic API
6+ - Fails PR if any query returns no results
7+ """
8+
1249import re
12510import sys
12611import os
12+ import json
13+ import time
12714from pathlib import Path
128- from sumologic_client import SumoLogicClient
15+ from datetime import datetime , timedelta
16+
17+ # SumoLogic API Client (embedded for simplicity)
18+ class SumoLogicClient :
19+ def __init__ (self ):
20+ self .api_url = os .getenv ('SUMO_LOGIC_ENDPOINT' , 'https://long-api.sumologic.com/api/v1' )
21+ self .session = requests .Session ()
22+ self .session .auth = (
23+ os .getenv ('SUMO_LOGIC_ACCESS_ID' ),
24+ os .getenv ('SUMO_LOGIC_ACCESS_KEY' )
25+ )
26+ self .session .headers .update ({'Content-Type' : 'application/json' })
27+
28+ def test_query (self , query ):
29+ """Execute query and verify it returns results"""
30+ print (f"⌛ Executing query (first 50 chars): { query [:50 ]} ..." )
31+
32+ job_id = self ._create_search_job (query )
33+ status = self ._wait_for_job (job_id )
34+
35+ if status != "DONE GATHERING RESULTS" :
36+ raise Exception (f"Query failed with status: { status } " )
37+
38+ return self ._check_results (job_id )
39+
40+ def _create_search_job (self , query ):
41+ """Start a search job with 1-hour time window"""
42+ time_range = {
43+ 'from' : (datetime .utcnow () - timedelta (hours = 1 )).isoformat () + 'Z' ,
44+ 'to' : datetime .utcnow ().isoformat () + 'Z' ,
45+ 'timeZone' : 'UTC'
46+ }
47+
48+ response = self .session .post (
49+ f"{ self .api_url } /search/jobs" ,
50+ json = {'query' : query , ** time_range }
51+ )
52+ response .raise_for_status ()
53+ return response .json ()['id' ]
54+
55+ def _wait_for_job (self , job_id , timeout = 60 ):
56+ """Wait for job completion with progress updates"""
57+ for i in range (timeout ):
58+ response = self .session .get (f"{ self .api_url } /search/jobs/{ job_id } " )
59+ response .raise_for_status ()
60+ status = response .json ()['state' ]
61+
62+ if i % 5 == 0 : # Print progress every 5 seconds
63+ print (f"⏳ Query status: { status } ({ i } /{ timeout } s)" )
12964
130- def find_changed_markdown_files ():
131- """Find all changed Markdown files in PR"""
132- changed_files = []
65+ if status in ["DONE GATHERING RESULTS" , "CANCELLED" ]:
66+ return status
67+ time .sleep (1 )
68+ return "TIMEOUT"
13369
134- # Try GitHub's PR context first
70+ def _check_results (self , job_id ):
71+ """Check if query returned any results"""
72+ response = self .session .get (
73+ f"{ self .api_url } /search/jobs/{ job_id } /messages" ,
74+ params = {'limit' : 1 } # Only need to check if any results exist
75+ )
76+ response .raise_for_status ()
77+ has_results = len (response .json ().get ('messages' , [])) > 0
78+ print (f"🔍 Results found: { '✅ Yes' if has_results else '❌ No' } " )
79+ return has_results
80+
81+ # Main Validation Logic
82+ def debug_environment ():
83+ """Print critical debugging information"""
84+ print ("::group::⚙️ Environment Debug" )
85+ print (f"📂 Workspace: { os .getcwd ()} " )
86+ print ("\n 📁 Directory Structure:" )
87+ os .system ("find . -type d | sort" )
88+ print ("\n 📝 Markdown Files:" )
89+ os .system ("find . -name '*.md' | sort" )
90+ print ("::endgroup::" )
91+
92+ def get_changed_files ():
93+ """Get files changed in PR or all Markdown files"""
94+ # Try GitHub PR context first
13595 if "GITHUB_EVENT_PATH" in os .environ :
13696 try :
137- import json
13897 with open (os .environ ["GITHUB_EVENT_PATH" ]) as f :
139- event = json .load (f )
140- changed_files = [
141- f ['filename' ] for f in event .get ('pull_request' , {}).get ('files' , [])
98+ pr_files = [
99+ f ['filename' ] for f in json .load (f ).get ('pull_request' , {}).get ('files' , [])
142100 if f ['filename' ].endswith ('.md' )
143101 ]
102+ if pr_files :
103+ print (f"📦 Found { len (pr_files )} changed Markdown files in PR" )
104+ return pr_files
144105 except Exception as e :
145- print (f"::warning::Failed to get PR files: { e } " )
146-
147- # Fallback: Find all Markdown files if PR context fails
148- if not changed_files :
149- changed_files = [str (p ) for p in Path ('.' ).rglob ('*.md' )]
106+ print (f"::warning::⚠️ Couldn't read PR data: { e } " )
150107
151- return changed_files
108+ # Fallback: All Markdown files in repository
109+ all_files = [str (p ) for p in Path ('.' ).rglob ('*.md' )]
110+ print (f"🔄 Falling back to scanning all { len (all_files )} Markdown files" )
111+ return all_files
152112
153- def extract_sql_blocks (file_path ):
154- """Extract all SQL blocks from Markdown file"""
155- content = Path (file_path ).read_text ()
156- return re .findall (r'```sql\n(.*?)```' , content , re .DOTALL )
157-
158- def main ():
113+ def validate_files ():
114+ """Main validation flow"""
115+ debug_environment ()
159116 client = SumoLogicClient ()
160117 failed = False
161118
162- for md_file in find_changed_markdown_files ():
163- for query in extract_sql_blocks (md_file ):
164- query = query .strip ()
165- if not query :
119+ for file_path in get_changed_files ():
120+ try :
121+ content = Path (file_path ).read_text ()
122+ queries = re .findall (r'```sql\n(.*?)```' , content , re .DOTALL )
123+
124+ if not queries :
125+ print (f"ℹ️ No SQL blocks found in { file_path } " )
166126 continue
167127
168- print (f"Validating query in { md_file } :\n { query [:200 ]} ..." ) # Show first 200 chars
128+ print (f"\n ::group::🔎 Validating { len (queries )} queries in { file_path } " )
129+ for i , query in enumerate (queries , 1 ):
130+ query = query .strip ()
131+ if not query :
132+ continue
169133
170- try :
171- if not client .test_query (query ):
172- print (f"::error file={ md_file } ,title=Query Validation Failed::Query returned no results" )
134+ print (f"\n 📋 Query { i } (first 50 chars): { query [:50 ]} ..." )
135+ try :
136+ if not client .test_query (query ):
137+ print (f"::error file={ file_path } ,title=Invalid Query::Query returned no results" )
138+ failed = True
139+ else :
140+ print ("✅ Valid query" )
141+ except Exception as e :
142+ print (f"::error file={ file_path } ,title=Query Failed::{ str (e )} " )
173143 failed = True
174- else :
175- print ( "✅ Query executed successfully" )
176- except Exception as e :
177- print (f"::error file={ md_file } ,title=Query Execution Failed ::{ str (e )} " )
178- failed = True
144+ print ( "::endgroup::" )
145+
146+ except Exception as e :
147+ print (f"::error file={ file_path } ,title=File Error ::{ str (e )} " )
148+ failed = True
179149
180150 if failed :
151+ print ("\n ❌ Validation failed - see errors above" )
181152 sys .exit (1 )
182153
154+ print ("\n 🎉 All queries validated successfully" )
155+ sys .exit (0 )
156+
183157if __name__ == "__main__" :
184- main ()
158+ try :
159+ import requests
160+ validate_files ()
161+ except ImportError :
162+ print ("::error::❌ Missing required 'requests' package" )
163+ sys .exit (1 )
164+ except Exception as e :
165+ print (f"::error::💥 Critical error: { str (e )} " )
166+ sys .exit (1 )
0 commit comments