Skip to content

Commit 44bbb75

Browse files
authored
chore(wren-ai-service): fix add quotes (#1917)
1 parent 878e803 commit 44bbb75

File tree

2 files changed

+447
-2
lines changed

2 files changed

+447
-2
lines changed

wren-ai-service/src/core/engine.py

Lines changed: 127 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,17 +70,142 @@ def _quote_sql_identifiers_by_tokens(sql: str, quote_char: str = '"') -> str:
7070
Add quotes around identifiers using SQLGlot's tokenizer positions.
7171
"""
7272

73+
def is_sql_keyword(text: str) -> bool:
74+
"""Check if the text is a SQL keyword that should not be quoted."""
75+
# Common SQL keywords that should never be quoted
76+
sql_keywords = {
77+
# Basic SQL keywords
78+
"SELECT",
79+
"FROM",
80+
"WHERE",
81+
"JOIN",
82+
"LEFT",
83+
"RIGHT",
84+
"INNER",
85+
"OUTER",
86+
"ON",
87+
"AND",
88+
"OR",
89+
"NOT",
90+
"IN",
91+
"EXISTS",
92+
"BETWEEN",
93+
"LIKE",
94+
"IS",
95+
"NULL",
96+
"ORDER",
97+
"BY",
98+
"GROUP",
99+
"HAVING",
100+
"LIMIT",
101+
"OFFSET",
102+
"UNION",
103+
"INTERSECT",
104+
"EXCEPT",
105+
"AS",
106+
"DISTINCT",
107+
"ALL",
108+
"TOP",
109+
"WITH",
110+
"RECURSIVE",
111+
"CTE",
112+
# Data types
113+
"INTEGER",
114+
"INT",
115+
"BIGINT",
116+
"SMALLINT",
117+
"DECIMAL",
118+
"NUMERIC",
119+
"FLOAT",
120+
"REAL",
121+
"DOUBLE",
122+
"PRECISION",
123+
"VARCHAR",
124+
"CHAR",
125+
"TEXT",
126+
"BOOLEAN",
127+
"BOOL",
128+
"DATE",
129+
"TIME",
130+
"TIMESTAMP",
131+
"TIMESTAMPTZ",
132+
"INTERVAL",
133+
"WITH",
134+
"WITHOUT",
135+
# Time/date keywords
136+
"YEAR",
137+
"MONTH",
138+
"DAY",
139+
"HOUR",
140+
"MINUTE",
141+
"SECOND",
142+
"TIMEZONE",
143+
"EPOCH",
144+
"AT",
145+
"ZONE",
146+
"CURRENT_DATE",
147+
"CURRENT_TIME",
148+
"CURRENT_TIMESTAMP",
149+
"EXTRACT",
150+
"DATE_TRUNC",
151+
"DATE_PART",
152+
# Aggregate functions (common ones)
153+
"COUNT",
154+
"SUM",
155+
"AVG",
156+
"MIN",
157+
"MAX",
158+
"STDDEV",
159+
"VARIANCE",
160+
# Window functions
161+
"OVER",
162+
"PARTITION",
163+
"ROW_NUMBER",
164+
"RANK",
165+
"DENSE_RANK",
166+
"LAG",
167+
"LEAD",
168+
"FIRST_VALUE",
169+
"LAST_VALUE",
170+
"NTILE",
171+
# Other common keywords
172+
"CASE",
173+
"WHEN",
174+
"THEN",
175+
"ELSE",
176+
"END",
177+
"CAST",
178+
"CONVERT",
179+
"COALESCE",
180+
"NULLIF",
181+
"IFNULL",
182+
"ISNULL",
183+
"DESC",
184+
"ASC",
185+
"TRUE",
186+
"FALSE",
187+
}
188+
return text.upper() in sql_keywords
189+
73190
def is_ident(tok: Token):
74191
# SQLGlot uses VAR for identifiers, but also treats SQL keywords as identifiers in some contexts
75-
return tok.token_type in (
192+
if tok.token_type not in (
76193
TokenType.VAR,
77194
TokenType.SCHEMA,
78195
TokenType.TABLE,
79196
TokenType.COLUMN,
80197
TokenType.DATABASE,
81198
TokenType.INDEX,
82199
TokenType.VIEW,
83-
)
200+
):
201+
return False
202+
203+
# Don't quote SQL keywords
204+
token_text = sql[tok.start : tok.end + 1]
205+
if is_sql_keyword(token_text):
206+
return False
207+
208+
return True
84209

85210
def is_already_quoted_text(text: str) -> bool:
86211
text = text.strip()

0 commit comments

Comments
 (0)