Skip to content

Commit 362d343

Browse files
committed
Fix SQL validation to exclude Markdown table content
- Improved diff parsing to only extract content from SQL code blocks - Enhanced SQL query extraction to skip table rows (lines with | delimiters) - Prevents false positives from Markdown table formatting being treated as SQL
1 parent b631559 commit 362d343

File tree

1 file changed

+23
-13
lines changed

1 file changed

+23
-13
lines changed

scripts/validate_queries.py

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -100,17 +100,24 @@ def extract_changed_sql_queries(file_path, base_commit, current_commit):
100100
changed_queries = []
101101
lines = diff_content.split('\n')
102102

103+
# Look for SQL code blocks in the diff instead of individual lines
104+
in_sql_block = False
105+
current_block = []
106+
103107
for line in lines:
104-
# Look for added lines that contain SQL-like content
105-
if line.startswith('+') and not line.startswith('+++'):
106-
content = line[1:].strip() # Remove the '+' prefix
107-
108-
# Check if this line looks like a SQL query
109-
if content and any(keyword in content.lower() for keyword in [
110-
'_collector=', 'metric=', '| where', '| parse', '| count',
111-
'| sum', '| avg', '| json', '| timeslice'
112-
]):
113-
changed_queries.append(content)
108+
if line.startswith('+```sql') or line.startswith('+```sumo'):
109+
in_sql_block = True
110+
current_block = []
111+
elif line.startswith('+```') and in_sql_block:
112+
in_sql_block = False
113+
if current_block:
114+
query_content = '\n'.join(current_block).strip()
115+
if query_content:
116+
changed_queries.append(query_content)
117+
current_block = []
118+
elif in_sql_block and line.startswith('+'):
119+
content = line[1:] # Remove the '+' prefix but keep whitespace
120+
current_block.append(content)
114121

115122
if changed_queries:
116123
print(f"📊 Found {len(changed_queries)} changed SQL queries in diff")
@@ -129,16 +136,19 @@ def extract_sql_queries(file_path):
129136
with open(file_path, 'r', encoding='utf-8') as f:
130137
content = f.read()
131138

132-
# Find SQL code blocks using regex
133-
sql_pattern = r'```(?:sql|sumo)\s*(?:title="[^"]*")?\s*\n(.*?)```'
139+
# Find SQL code blocks using regex - more precise pattern
140+
sql_pattern = r'```(?:sql|sumo)(?:[^\n]*)\n(.*?)```'
134141
sql_blocks = re.findall(sql_pattern, content, re.DOTALL | re.IGNORECASE)
135142

136143
queries = []
137144
for block in sql_blocks:
138145
# Clean up the query
139146
query = block.strip()
140147
if query and not query.startswith('#') and not query.startswith('//'):
141-
queries.append(query)
148+
# Skip table content (lines that start and end with |)
149+
lines = query.split('\n')
150+
if not all(line.strip().startswith('|') and line.strip().endswith('|') for line in lines if line.strip()):
151+
queries.append(query)
142152

143153
return queries
144154
except Exception as e:

0 commit comments

Comments
 (0)