Skip to content

Commit d457a98

Browse files
committed
Get formatter working with existing features
1 parent 80f6e1e commit d457a98

File tree

5 files changed

+459
-290
lines changed

5 files changed

+459
-290
lines changed

src/languages/clickhouse/clickhouse.formatter.ts

Lines changed: 153 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,45 @@
11
import { DialectOptions } from '../../dialect.js';
22
import { expandPhrases } from '../../expandPhrases.js';
3+
import { EOF_TOKEN, Token, TokenType } from '../../lexer/token.js';
34
import { functions } from './clickhouse.functions.js';
4-
import { dataTypes, keywords, keywordPhrases } from './clickhouse.keywords.js';
5+
import { dataTypes, keywords } from './clickhouse.keywords.js';
56

67
const reservedSelect = expandPhrases(['SELECT [DISTINCT]']);
78

89
const reservedClauses = expandPhrases([
9-
// https://clickhouse.com/docs/sql-reference/statements/explain
10-
'EXPLAIN [AST | SYNTAX | QUERY TREE | PLAN | PIPELINE | ESTIMATE | TABLE OVERRIDE]',
10+
'SET',
11+
// https://clickhouse.com/docs/sql-reference/statements/select
12+
'WITH',
13+
'FROM',
14+
'SAMPLE',
15+
'PREWHERE',
16+
'WHERE',
17+
'GROUP BY',
18+
'HAVING',
19+
'QUALIFY',
20+
'ORDER BY',
21+
'LIMIT', // Note: Clickhouse has no OFFSET clause
22+
'SETTINGS',
23+
'INTO OUTFILE',
24+
'FORMAT',
25+
// https://clickhouse.com/docs/sql-reference/window-functions
26+
'WINDOW',
27+
'PARTITION BY',
28+
// https://clickhouse.com/docs/sql-reference/statements/insert-into
29+
'INSERT INTO',
30+
'VALUES',
1131
]);
1232

1333
const standardOnelineClauses = expandPhrases([
1434
// https://clickhouse.com/docs/sql-reference/statements/create
1535
'CREATE [OR REPLACE] [TEMPORARY] TABLE [IF NOT EXISTS]',
36+
]);
37+
const tabularOnelineClauses = expandPhrases([
1638
// https://clickhouse.com/docs/sql-reference/statements/update
1739
'UPDATE',
1840
// https://clickhouse.com/docs/sql-reference/statements/system
1941
'SYSTEM RELOAD {DICTIONARIES | DICTIONARY | FUNCTIONS | FUNCTION | ASYNCHRONOUS METRICS} [ON CLUSTER]',
20-
'SYSTEM DROP {DNS CACHE | MARK CACHE | ICEBERG METADATA CACHE | TEXT INDEX DICTIONARY CACHE | TEXT INDEX HEADER CACHE | TEXT INDEX POSTINGS CACHE | REPLICA | DATABASE REPLICA | UNCOMPRESSED CACHE | COMPILED EXPRESSION CACHE | QUERY CONDITION CACHE | QUERY CACHE | FORMAT SCHEMA CACHE | DROP FILESYSTEM CACHE}',
42+
'SYSTEM DROP {DNS CACHE | MARK CACHE | ICEBERG METADATA CACHE | TEXT INDEX DICTIONARY CACHE | TEXT INDEX HEADER CACHE | TEXT INDEX POSTINGS CACHE | REPLICA | DATABASE REPLICA | UNCOMPRESSED CACHE | COMPILED EXPRESSION CACHE | QUERY CONDITION CACHE | QUERY CACHE | FORMAT SCHEMA CACHE | FILESYSTEM CACHE}',
2143
'SYSTEM FLUSH LOGS',
2244
'SYSTEM RELOAD {CONFIG | USERS}',
2345
'SYSTEM SHUTDOWN',
@@ -56,8 +78,6 @@ const standardOnelineClauses = expandPhrases([
5678
'RENAME [TABLE | DICTIONARY | DATABASE]',
5779
// https://clickhouse.com/docs/sql-reference/statements/exchange
5880
'EXCHANGE {TABLES | DICTIONARIES}',
59-
// https://clickhouse.com/docs/sql-reference/statements/set
60-
'SET',
6181
// https://clickhouse.com/docs/sql-reference/statements/set-role
6282
'SET ROLE [DEFAULT | NONE | ALL | ALL EXCEPT]',
6383
'SET DEFAULT ROLE [NONE]',
@@ -73,8 +93,6 @@ const standardOnelineClauses = expandPhrases([
7393
'CHECK GRANT',
7494
// https://clickhouse.com/docs/sql-reference/statements/undrop
7595
'UNDROP TABLE',
76-
]);
77-
const tabularOnelineClauses = expandPhrases([
7896
// https://clickhouse.com/docs/sql-reference/statements/create
7997
'CREATE {DATABASE | NAMED COLLECTION} [IF NOT EXISTS]',
8098
'CREATE [OR REPLACE] {VIEW | DICTIONARY} [IF NOT EXISTS]',
@@ -89,8 +107,73 @@ const tabularOnelineClauses = expandPhrases([
89107
'ALTER {USER | ROLE | QUOTA | SETTINGS PROFILE} [IF EXISTS]',
90108
'ALTER [ROW] POLICY [IF EXISTS]',
91109
'ALTER NAMED COLLECTION [IF EXISTS]',
110+
// https://clickhouse.com/docs/sql-reference/statements/alter/user
111+
'RENAME TO',
112+
'DEFAULT ROLE [ALL [EXCEPT]]',
113+
'GRANTEES',
114+
'NOT IDENTIFIED',
115+
'RESET AUTHENTICATION METHODS TO NEW',
116+
'{IDENTIFIED | ADD IDENTIFIED} [WITH | BY]',
117+
'[ADD | DROP] HOST {LOCAL | NAME | REGEXP | IP | LIKE}',
118+
'VALID UNTIL',
119+
'DROP [ALL] {PROFILES | SETTINGS}',
120+
'{ADD | MODIFY} SETTINGS',
121+
'ADD PROFILES',
122+
// https://clickhouse.com/docs/sql-reference/statements/alter/apply-deleted-mask
123+
'APPLY DELETED MASK [IN PARTITION]',
124+
// https://clickhouse.com/docs/sql-reference/statements/alter/column
125+
'{ADD | DROP | RENAME | CLEAR | COMMENT | MODIFY | ALTER | MATERIALIZE} COLUMN',
126+
// https://clickhouse.com/docs/sql-reference/statements/alter/partition
127+
'{DETACH | DROP | ATTACH | FETCH | MOVE} {PART | PARTITION}',
128+
'DROP DETACHED {PART | PARTITION}',
129+
'{FORGET | REPLACE} PARTITION',
130+
'CLEAR COLUMN',
131+
'{FREEZE | UNFREEZE} [PARTITION]',
132+
'CLEAR INDEX',
133+
'TO {DISK | VOLUME}',
134+
'[DELETE | REWRITE PARTS] IN PARTITION',
135+
// https://clickhouse.com/docs/sql-reference/statements/alter/setting
136+
'{MODIFY | RESET} SETTING',
137+
// https://clickhouse.com/docs/sql-reference/statements/alter/delete
138+
'DELETE WHERE',
139+
// https://clickhouse.com/docs/sql-reference/statements/alter/order-by
140+
'MODIFY ORDER BY',
141+
// https://clickhouse.com/docs/sql-reference/statements/alter/sample-by
142+
'{MODIFY | REMOVE} SAMPLE BY',
143+
// https://clickhouse.com/docs/sql-reference/statements/alter/skipping-index
144+
'{ADD | MATERIALIZE | CLEAR} INDEX [IF NOT EXISTS]',
145+
// https://clickhouse.com/docs/sql-reference/statements/alter/constraint
146+
'ADD CONSTRAINT [IF NOT EXISTS]',
147+
'DROP CONSTRAINT [IF EXISTS]',
148+
// https://clickhouse.com/docs/sql-reference/statements/alter/ttl
149+
'MODIFY TTL',
150+
'REMOVE TTL',
151+
// https://clickhouse.com/docs/sql-reference/statements/alter/statistics
152+
'ADD STATISTICS [IF NOT EXISTS]',
153+
'MODIFY STATISTICS',
154+
'{DROP | CLEAR} STATISTICS [IF EXISTS]',
155+
'MATERIALIZE STATISTICS [ALL | IF EXISTS]',
156+
// https://clickhouse.com/docs/sql-reference/statements/alter/quota
157+
'KEYED BY',
158+
'NOT KEYED',
159+
'FOR [RANDOMIZED] INTERVAL',
160+
// https://clickhouse.com/docs/sql-reference/statements/alter/row-policy
161+
'AS {PERMISSIVE | RESTRICTIVE}',
162+
'FOR SELECT',
163+
// https://clickhouse.com/docs/sql-reference/statements/alter/projection
164+
'ADD PROJECTION [IF NOT EXISTS]',
165+
'{DROP | MATERIALIZE | CLEAR} PROJECTION [IF EXISTS]',
166+
// https://clickhouse.com/docs/sql-reference/statements/alter/view
167+
'MODIFY QUERY',
168+
// https://clickhouse.com/docs/sql-reference/statements/create/view#refreshable-materialized-view
169+
'REFRESH {EVERY | AFTER}',
170+
'RANDOMIZE FOR',
171+
'DEPENDS ON',
172+
'APPEND TO',
92173
// https://clickhouse.com/docs/sql-reference/statements/delete
93174
'DELETE FROM',
175+
// https://clickhouse.com/docs/sql-reference/statements/explain
176+
'EXPLAIN [AST | SYNTAX | QUERY TREE | PLAN | PIPELINE | ESTIMATE | TABLE OVERRIDE]',
94177
// https://clickhouse.com/docs/sql-reference/statements/grant
95178
'GRANT [ON CLUSTER]',
96179
// https://clickhouse.com/docs/sql-reference/statements/revoke
@@ -102,7 +185,7 @@ const tabularOnelineClauses = expandPhrases([
102185
]);
103186

104187
const reservedSetOperations = expandPhrases([
105-
// https://clickhouse.com/docs/sql-reference/statements/select/set-operations
188+
// https://clickhouse.com/docs/sql-reference/statements/select/union
106189
'UNION [ALL | DISTINCT]',
107190
// https://clickhouse.com/docs/sql-reference/statements/parallel_with
108191
'PARALLEL WITH',
@@ -113,6 +196,8 @@ const reservedJoins = expandPhrases([
113196
'[GLOBAL] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI|ANY|ALL|ASOF] JOIN',
114197
]);
115198

199+
const reservedKeywordPhrases = expandPhrases(['{ROWS | RANGE} BETWEEN']);
200+
116201
// https://clickhouse.com/docs/sql-reference/syntax
117202
export const clickhouse: DialectOptions = {
118203
name: 'clickhouse',
@@ -121,23 +206,25 @@ export const clickhouse: DialectOptions = {
121206
reservedClauses: [...reservedClauses, ...standardOnelineClauses, ...tabularOnelineClauses],
122207
reservedSetOperations,
123208
reservedJoins,
124-
reservedKeywordPhrases: keywordPhrases,
209+
reservedKeywordPhrases,
125210

126211
reservedKeywords: keywords,
127212
reservedDataTypes: dataTypes,
128213
reservedFunctionNames: functions,
214+
extraParens: ['[]'],
215+
lineCommentTypes: ['#', '--'],
129216
nestedBlockComments: false,
130217
underscoresInNumbers: true,
131-
stringTypes: ['$$', "''-qq", "''-qq-bs"],
132-
identTypes: ['""-qq', '``'],
218+
stringTypes: ['$$', "''-qq-bs"],
219+
identTypes: ['""-qq-bs', '``'],
133220
paramTypes: {
134221
// https://clickhouse.com/docs/sql-reference/syntax#defining-and-using-query-parameters
135222
custom: [
136223
{
137-
regex: String.raw`\{\s*[a-zA-Z0-9_]+\s*:\s*[a-zA-Z0-9_]+\s*\}`,
224+
regex: String.raw`\{\s*[^:]+:[^}]+\}`,
138225
key: v => {
139-
const [key] = v.split(':');
140-
return key.trim();
226+
const match = /\{([^:]+):/.exec(v);
227+
return match ? match[1].trim() : v;
141228
},
142229
},
143230
],
@@ -153,9 +240,59 @@ export const clickhouse: DialectOptions = {
153240
// Lambda creation
154241
'->',
155242
],
243+
postProcess,
156244
},
157245
formatOptions: {
158-
onelineClauses: standardOnelineClauses,
246+
onelineClauses: [...standardOnelineClauses, ...tabularOnelineClauses],
159247
tabularOnelineClauses,
160248
},
161249
};
250+
251+
/**
252+
* Converts IN and ANY from RESERVED_FUNCTION_NAME to RESERVED_KEYWORD
253+
* when they are used as operators (not function calls).
254+
*
255+
* IN operator: foo IN (1, 2, 3) - IN comes after an identifier/expression
256+
* IN function: IN(foo, 1, 2, 3) - IN comes at start or after operators/keywords
257+
*
258+
* ANY operator: foo = ANY (1, 2, 3) - ANY comes after an operator like =
259+
* ANY function: ANY(foo, 1, 2, 3) - ANY comes at start or after operators/keywords
260+
*/
261+
function postProcess(tokens: Token[]): Token[] {
262+
return tokens.map((token, i) => {
263+
// Only process IN and ANY that are currently RESERVED_FUNCTION_NAME
264+
// Check text (uppercase canonical form) for matching, but preserve raw (original casing)
265+
if (
266+
token.type === TokenType.RESERVED_FUNCTION_NAME &&
267+
(token.text === 'IN' || token.text === 'ANY')
268+
) {
269+
const nextToken = tokens[i + 1] || EOF_TOKEN;
270+
const prevToken = tokens[i - 1] || EOF_TOKEN;
271+
272+
// Must be followed by ( to be a function
273+
if (nextToken.text !== '(') {
274+
// Not followed by ( means it's an operator/keyword, convert to uppercase
275+
return { ...token, type: TokenType.RESERVED_KEYWORD, raw: token.text };
276+
}
277+
278+
// For IN: convert to keyword if previous token is an expression token
279+
// For ANY: convert to keyword if previous token is an operator
280+
if (
281+
(token.text === 'IN' &&
282+
(prevToken.type === TokenType.IDENTIFIER ||
283+
prevToken.type === TokenType.QUOTED_IDENTIFIER ||
284+
prevToken.type === TokenType.NUMBER ||
285+
prevToken.type === TokenType.STRING ||
286+
prevToken.type === TokenType.CLOSE_PAREN ||
287+
prevToken.type === TokenType.ASTERISK)) ||
288+
(token.text === 'ANY' && prevToken.type === TokenType.OPERATOR)
289+
) {
290+
// Convert to keyword (operator) - use uppercase for display
291+
return { ...token, type: TokenType.RESERVED_KEYWORD, raw: token.text };
292+
}
293+
// Otherwise, keep as RESERVED_FUNCTION_NAME to preserve original casing via functionCase option
294+
}
295+
296+
return token;
297+
});
298+
}

0 commit comments

Comments
 (0)