Skip to content

Commit b5225ff

Browse files
Add comprehensive column parsing test suite and unified analyzer
- Add test_column_parsing.sql with 20+ test scenarios covering basic columns, schema-qualified columns, alias chains, complex expressions, nested struct fields, and various SQL contexts (WHERE, GROUP BY, ORDER BY, etc.) - Add test_column_parsing_core.sql for essential functionality verification - Add unified_analyzer_v2.sql integrating column analysis with existing function and table parsing using correct DuckDB table function syntax - Unified analyzer now provides complete SQL dependency analysis across functions, tables, and columns with existence checking and suggestions 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent 6dbf572 commit b5225ff

File tree

3 files changed

+469
-0
lines changed

3 files changed

+469
-0
lines changed

test_column_parsing.sql

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
-- Test suite for column parsing functionality
2+
-- Load the extension first
3+
LOAD parser_tools;
4+
5+
-- Create test tables
6+
CREATE TABLE users (id INT, age INT, name VARCHAR, email VARCHAR);
7+
CREATE TABLE orders (id INT, user_id INT, total DECIMAL, status VARCHAR);
8+
CREATE TABLE profiles (user_id INT, first_name VARCHAR, last_name VARCHAR, address STRUCT(street VARCHAR, city VARCHAR, zip VARCHAR));
9+
10+
-- Test 1: Basic column parsing
11+
SELECT '=== Test 1: Basic column parsing ===' as test_section;
12+
SELECT * FROM parse_columns('SELECT name, age FROM users');
13+
14+
-- Test 2: Schema-qualified columns
15+
SELECT '=== Test 2: Schema-qualified columns ===' as test_section;
16+
SELECT * FROM parse_columns('SELECT main.users.name, main.users.age FROM main.users');
17+
18+
-- Test 3: Complex expressions with multiple columns
19+
SELECT '=== Test 3: Complex expressions ===' as test_section;
20+
SELECT * FROM parse_columns('SELECT u.name, o.total, u.age + o.total AS summary FROM users u JOIN orders o ON u.id = o.user_id');
21+
22+
-- Test 4: Alias chain scenario (from our discussion)
23+
SELECT '=== Test 4: Alias chains ===' as test_section;
24+
SELECT * FROM parse_columns('SELECT 1 AS a, users.age AS b, a+b AS c, b AS d FROM users');
25+
26+
-- Test 5: Nested struct field access
27+
SELECT '=== Test 5: Nested struct fields ===' as test_section;
28+
SELECT * FROM parse_columns('SELECT profiles.address.street, profiles.address.city FROM profiles');
29+
30+
-- Test 6: Deeply nested struct with schema qualification
31+
SELECT '=== Test 6: Deep nested struct with schema ===' as test_section;
32+
SELECT * FROM parse_columns('SELECT main.profiles.address.city FROM main.profiles');
33+
34+
-- Test 7: WHERE clause columns
35+
SELECT '=== Test 7: WHERE clause columns ===' as test_section;
36+
SELECT * FROM parse_columns('SELECT name FROM users WHERE age > 18 AND email LIKE ''%@gmail.com''');
37+
38+
-- Test 8: GROUP BY and HAVING columns
39+
SELECT '=== Test 8: GROUP BY and HAVING columns ===' as test_section;
40+
SELECT * FROM parse_columns('SELECT status, COUNT(*) FROM orders GROUP BY status HAVING COUNT(*) > 5');
41+
42+
-- Test 9: ORDER BY columns
43+
SELECT '=== Test 9: ORDER BY columns ===' as test_section;
44+
SELECT * FROM parse_columns('SELECT name, age FROM users ORDER BY age DESC, name ASC');
45+
46+
-- Test 10: Function arguments with columns
47+
SELECT '=== Test 10: Function arguments ===' as test_section;
48+
SELECT * FROM parse_columns('SELECT UPPER(name), LENGTH(email), CONCAT(first_name, '' '', last_name) FROM users');
49+
50+
-- Test 11: Window functions
51+
SELECT '=== Test 11: Window functions ===' as test_section;
52+
SELECT * FROM parse_columns('SELECT name, ROW_NUMBER() OVER (PARTITION BY age ORDER BY name) FROM users');
53+
54+
-- Test 12: Complex query with joins, subqueries, and functions
55+
SELECT '=== Test 12: Complex query ===' as test_section;
56+
SELECT * FROM parse_columns('
57+
WITH user_stats AS (
58+
SELECT u.id, u.name, COUNT(o.id) as order_count
59+
FROM users u
60+
LEFT JOIN orders o ON u.id = o.user_id
61+
GROUP BY u.id, u.name
62+
)
63+
SELECT
64+
us.name,
65+
us.order_count,
66+
CASE
67+
WHEN us.order_count > 5 THEN ''high''
68+
WHEN us.order_count > 1 THEN ''medium''
69+
ELSE ''low''
70+
END as activity_level
71+
FROM user_stats us
72+
WHERE us.order_count > 0
73+
ORDER BY us.order_count DESC
74+
');
75+
76+
-- Test 13: Unqualified columns (aliases, literals)
77+
SELECT '=== Test 13: Unqualified columns and literals ===' as test_section;
78+
SELECT * FROM parse_columns('SELECT 42 AS answer, ''hello'' AS greeting, name FROM users');
79+
80+
-- Test 14: Mixed qualified and unqualified references
81+
SELECT '=== Test 14: Mixed qualifications ===' as test_section;
82+
SELECT * FROM parse_columns('SELECT users.name, age, profiles.first_name FROM users JOIN profiles ON users.id = profiles.user_id');
83+
84+
-- Test 15: CASE expressions with columns
85+
SELECT '=== Test 15: CASE expressions ===' as test_section;
86+
SELECT * FROM parse_columns('
87+
SELECT
88+
name,
89+
CASE
90+
WHEN age < 18 THEN ''minor''
91+
WHEN age < 65 THEN ''adult''
92+
ELSE ''senior''
93+
END as age_group
94+
FROM users
95+
');
96+
97+
-- Test 16: Subquery column references
98+
SELECT '=== Test 16: Subquery columns ===' as test_section;
99+
SELECT * FROM parse_columns('
100+
SELECT name, age
101+
FROM users
102+
WHERE id IN (SELECT user_id FROM orders WHERE total > 100)
103+
');
104+
105+
-- Test 17: JOIN conditions
106+
SELECT '=== Test 17: JOIN conditions ===' as test_section;
107+
SELECT * FROM parse_columns('
108+
SELECT u.name, o.total
109+
FROM users u
110+
INNER JOIN orders o ON u.id = o.user_id AND u.age > 18
111+
');
112+
113+
-- Test 18: Multiple table aliases with same column names
114+
SELECT '=== Test 18: Multiple aliases, same column names ===' as test_section;
115+
SELECT * FROM parse_columns('
116+
SELECT u.id as user_id, o.id as order_id, u.name, o.status
117+
FROM users u
118+
JOIN orders o ON u.id = o.user_id
119+
');
120+
121+
-- Test 19: Column references in aggregates
122+
SELECT '=== Test 19: Aggregates with columns ===' as test_section;
123+
SELECT * FROM parse_columns('
124+
SELECT
125+
COUNT(DISTINCT u.id) as unique_users,
126+
AVG(o.total) as avg_order,
127+
SUM(o.total) as total_revenue
128+
FROM users u
129+
JOIN orders o ON u.id = o.user_id
130+
');
131+
132+
-- Test 20: Column with arithmetic operations
133+
SELECT '=== Test 20: Arithmetic operations ===' as test_section;
134+
SELECT * FROM parse_columns('SELECT age * 2 + 10 AS calculated_age, total / quantity AS unit_price FROM users JOIN orders ON users.id = orders.user_id');
135+
136+
-- Summary report: Show unique contexts found
137+
SELECT '=== Summary: Column contexts found ===' as summary_section;
138+
SELECT DISTINCT context, COUNT(*) as count
139+
FROM (
140+
SELECT * FROM parse_columns('SELECT u.name, o.total FROM users u JOIN orders o ON u.id = o.user_id WHERE u.age > 18 ORDER BY o.total DESC')
141+
)
142+
GROUP BY context
143+
ORDER BY count DESC;
144+
145+
-- Summary report: Show expression identifier patterns
146+
SELECT '=== Summary: Expression identifier patterns ===' as summary_section;
147+
SELECT
148+
CASE
149+
WHEN expression_identifiers LIKE '%[%[%,%' THEN 'Multiple identifiers'
150+
WHEN expression_identifiers LIKE '%"%,"%,"%' THEN 'Three-part qualified'
151+
WHEN expression_identifiers LIKE '%"%,"%' THEN 'Two-part qualified'
152+
ELSE 'Single identifier'
153+
END as pattern_type,
154+
COUNT(*) as count
155+
FROM (
156+
SELECT * FROM parse_columns('SELECT main.users.name, users.age, name, 1 AS const FROM main.users')
157+
)
158+
GROUP BY pattern_type
159+
ORDER BY count DESC;
160+
161+
-- Cleanup
162+
DROP TABLE IF EXISTS users;
163+
DROP TABLE IF EXISTS orders;
164+
DROP TABLE IF EXISTS profiles;

test_column_parsing_core.sql

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
-- Core column parsing tests
2+
-- This file tests the essential functionality without dependencies on complex table structures
3+
4+
-- Test 1: Basic unqualified columns
5+
SELECT 'Test 1: Basic columns' as test_name;
6+
SELECT * FROM parse_columns('SELECT name, age FROM users');
7+
8+
-- Test 2: Schema-qualified columns
9+
SELECT 'Test 2: Schema-qualified' as test_name;
10+
SELECT * FROM parse_columns('SELECT main.users.name FROM main.users');
11+
12+
-- Test 3: Alias chain (our key scenario)
13+
SELECT 'Test 3: Alias chains' as test_name;
14+
SELECT * FROM parse_columns('SELECT 1 AS a, users.age AS b, a+b AS c, b AS d FROM users');
15+
16+
-- Test 4: Complex expression with multiple identifiers
17+
SELECT 'Test 4: Complex expressions' as test_name;
18+
SELECT * FROM parse_columns('SELECT u.name, o.total, u.age + o.total AS summary FROM users u JOIN orders o ON u.id = o.user_id');
19+
20+
-- Test 5: WHERE clause columns
21+
SELECT 'Test 5: WHERE clause' as test_name;
22+
SELECT * FROM parse_columns('SELECT name FROM users WHERE age > 18 AND email LIKE ''test''');
23+
24+
-- Test 6: Function arguments
25+
SELECT 'Test 6: Function arguments' as test_name;
26+
SELECT * FROM parse_columns('SELECT UPPER(name), CONCAT(first_name, last_name) FROM users');
27+
28+
-- Test 7: Nested struct field (simulated)
29+
SELECT 'Test 7: Nested struct' as test_name;
30+
SELECT * FROM parse_columns('SELECT users.profile.address.city FROM users');
31+
32+
-- Test 8: Output validation - check NULL handling
33+
SELECT 'Test 8: NULL handling verification' as test_name;
34+
SELECT
35+
CASE WHEN table_schema IS NULL THEN 'NULL' ELSE table_schema END as schema_check,
36+
CASE WHEN table_name IS NULL THEN 'NULL' ELSE table_name END as table_check,
37+
CASE WHEN selected_name IS NULL THEN 'NULL' ELSE selected_name END as selected_check
38+
FROM parse_columns('SELECT 1 AS a, users.age AS b FROM users')
39+
LIMIT 3;

0 commit comments

Comments
 (0)