diff --git a/src/utils/sqlSplitter/index.ts b/src/utils/sqlSplitter/index.ts index 67a2faf9..c86c2dae 100644 --- a/src/utils/sqlSplitter/index.ts +++ b/src/utils/sqlSplitter/index.ts @@ -47,6 +47,7 @@ export type TokenKind = | 'delimiter' | 'setDelimiter' | 'goDelimiter' + | 'slashDelimiter' | 'data'; export interface Token { @@ -68,6 +69,9 @@ export interface DialectOptions { readonly dollarQuoting: boolean; readonly customDelimiter: boolean; readonly goDelimiter: boolean; + readonly slashTerminator: boolean; + readonly plsqlBlocks: boolean; + readonly qQuoting: boolean; readonly lineComments: boolean; readonly blockComments: boolean; /** @@ -106,6 +110,9 @@ const POSTGRES: DialectOptions = { dollarQuoting: true, customDelimiter: false, goDelimiter: false, + slashTerminator: false, + plsqlBlocks: false, + qQuoting: false, lineComments: true, blockComments: true, executableComments: false, @@ -123,6 +130,9 @@ const MYSQL: DialectOptions = { dollarQuoting: false, customDelimiter: true, goDelimiter: false, + slashTerminator: false, + plsqlBlocks: false, + qQuoting: false, lineComments: true, blockComments: true, executableComments: true, @@ -140,6 +150,9 @@ const MSSQL: DialectOptions = { dollarQuoting: false, customDelimiter: false, goDelimiter: true, + slashTerminator: false, + plsqlBlocks: false, + qQuoting: false, lineComments: true, blockComments: true, executableComments: false, @@ -158,6 +171,9 @@ const SQLITE: DialectOptions = { dollarQuoting: false, customDelimiter: false, goDelimiter: false, + slashTerminator: false, + plsqlBlocks: false, + qQuoting: false, lineComments: true, blockComments: true, executableComments: false, @@ -165,19 +181,15 @@ const SQLITE: DialectOptions = { lineCommentRequiresSpace: false, }; -// Oracle's option shape currently matches GENERIC. They are kept as -// separate constants on purpose: once an Oracle-only feature lands -// (e.g. `/` block terminator, nested block comments via SQLPlus, the -// `Q'…'` quoted literal syntax), the divergence stays a one-line edit -// rather than a search across call sites. If you find yourself -// modifying both, prefer adding the flag to GENERIC only when it is -// truly dialect-agnostic. const ORACLE: DialectOptions = { quotes: STANDARD_QUOTES, eString: false, dollarQuoting: false, customDelimiter: false, goDelimiter: false, + slashTerminator: true, + plsqlBlocks: true, + qQuoting: true, lineComments: true, blockComments: true, executableComments: false, @@ -191,6 +203,9 @@ const GENERIC: DialectOptions = { dollarQuoting: false, customDelimiter: false, goDelimiter: false, + slashTerminator: false, + plsqlBlocks: false, + qQuoting: false, lineComments: true, blockComments: true, executableComments: false, diff --git a/src/utils/sqlSplitter/splitter.ts b/src/utils/sqlSplitter/splitter.ts index 1347e38b..5f9b72cf 100644 --- a/src/utils/sqlSplitter/splitter.ts +++ b/src/utils/sqlSplitter/splitter.ts @@ -1,7 +1,9 @@ -// Statement splitter. Two-pass: +// Statement splitter: // 1. collectSegments — walk tokens, partition source by delimiter into // raw segments with a hasMeaningful flag. -// 2. foldComments — apply fold rule: +// 2. foldBlocks — for Oracle-like dialects, merge PL/SQL source units +// until the next slash-terminated segment. +// 3. foldComments — apply fold rule: // (a) leading comment-only segment(s) → prepended to NEXT meaningful; // (b) trailing comment-only segment(s) → appended to PREVIOUS; // (c) entirely comment-only input → drop, return []. @@ -19,8 +21,16 @@ interface RawSegment { readonly start: number; readonly end: number; readonly hasMeaningful: boolean; + readonly terminator: SegmentTerminator; } +type SegmentTerminator = + | 'delimiter' + | 'goDelimiter' + | 'slashDelimiter' + | 'setDelimiter' + | 'eof'; + interface Span { readonly start: number; readonly end: number; @@ -34,7 +44,8 @@ interface FoldedGroup { export function splitInto(sql: string, options: DialectOptions): Statement[] { if (sql.length === 0) return []; const segments = collectSegments(sql, options); - const folded = foldComments(segments); + const foldedBlocks = options.plsqlBlocks ? foldBlocks(sql, segments) : segments; + const folded = foldComments(foldedBlocks); return folded.map((group) => buildStatement(sql, group)); } @@ -45,9 +56,9 @@ function collectSegments(sql: string, options: DialectOptions): RawSegment[] { let hasMeaningful = false; let position = 0; - const pushSegment = (end: number): void => { + const pushSegment = (end: number, terminator: SegmentTerminator): void => { if (end <= segStart) return; - segments.push({ start: segStart, end, hasMeaningful }); + segments.push({ start: segStart, end, hasMeaningful, terminator }); }; while (position < sql.length) { @@ -57,14 +68,15 @@ function collectSegments(sql: string, options: DialectOptions): RawSegment[] { switch (token.kind) { case 'delimiter': case 'goDelimiter': - pushSegment(position); + case 'slashDelimiter': + pushSegment(position, token.kind); position += token.length; segStart = position; hasMeaningful = false; state.lineLeading = false; break; case 'setDelimiter': - pushSegment(position); + pushSegment(position, token.kind); position += token.length; segStart = position; hasMeaningful = false; @@ -92,10 +104,181 @@ function collectSegments(sql: string, options: DialectOptions): RawSegment[] { } } - pushSegment(sql.length); + pushSegment(sql.length, 'eof'); return segments; } +function foldBlocks( + sql: string, + segments: ReadonlyArray, +): RawSegment[] { + const output: RawSegment[] = []; + let index = 0; + + while (index < segments.length) { + const segment = segments[index]; + if (!segment.hasMeaningful || !isPlsqlBlockOpener(sql, segment)) { + output.push(segment); + index++; + continue; + } + + let endIndex = index; + while ( + endIndex < segments.length && + segments[endIndex].terminator !== 'slashDelimiter' + ) { + endIndex++; + } + + if (endIndex >= segments.length) { + endIndex = segments.length - 1; + } + + const endSegment = segments[endIndex]; + output.push({ + start: segment.start, + end: endSegment.end, + hasMeaningful: true, + terminator: endSegment.terminator, + }); + index = endIndex + 1; + } + + return output; +} + +function isPlsqlBlockOpener(sql: string, segment: RawSegment): boolean { + const tokens = leadingSignificantTokens(sql.slice(segment.start, segment.end)); + let index = 0; + while (tokens[index] === '<<') { + const labelEnd = tokens.indexOf('>>', index + 1); + if (labelEnd === -1) break; + index = labelEnd + 1; + } + + const first = tokens[index]; + if (first === 'DECLARE' || first === 'BEGIN') return true; + if (first === 'WITH') return isWithBlockOpener(tokens, index + 1); + if (first !== 'CREATE') return false; + return isCreateBlockOpener(tokens, index + 1); +} + +function isWithBlockOpener( + tokens: ReadonlyArray, + index: number, +): boolean { + const kind = tokens[index]; + if (kind !== 'FUNCTION' && kind !== 'PROCEDURE') return false; + const next = tokens[index + 1]; + return next !== 'AS' && next !== '('; +} + +function isCreateBlockOpener( + tokens: ReadonlyArray, + startIndex: number, +): boolean { + let index = startIndex; + if (tokens[index] === 'OR' && tokens[index + 1] === 'REPLACE') { + index += 2; + } + if ( + tokens[index] === 'EDITIONABLE' || + tokens[index] === 'NONEDITIONABLE' + ) { + index++; + } + if (tokens[index] === 'AND' && isJavaCompileOption(tokens[index + 1])) { + index += 2; + } + + const kind = tokens[index]; + const next = tokens[index + 1]; + switch (kind) { + case 'FUNCTION': + case 'PROCEDURE': + case 'TRIGGER': + case 'LIBRARY': + return true; + case 'PACKAGE': + case 'TYPE': + case 'CLASS': + return next === 'BODY' || next !== undefined; + case 'JAVA': + return next === 'SOURCE' || next === 'CLASS'; + default: + return false; + } +} + +function isJavaCompileOption(token: string | undefined): boolean { + return token === 'COMPILE' || token === 'RESOLVE'; +} + +function leadingSignificantTokens(source: string): string[] { + const tokens: string[] = []; + let position = 0; + + while (position < source.length && tokens.length < 12) { + const next = skipTrivia(source, position); + if (next >= source.length) break; + position = next; + + if (source.startsWith('<<', position)) { + tokens.push('<<'); + position += 2; + continue; + } + if (source.startsWith('>>', position)) { + tokens.push('>>'); + position += 2; + continue; + } + + const ch = source[position]; + if (ch === '(') { + tokens.push('('); + position++; + continue; + } + + const word = /^[A-Za-z_][A-Za-z0-9_$#]*/.exec(source.slice(position)); + if (word) { + tokens.push(word[0].toUpperCase()); + position += word[0].length; + continue; + } + + position++; + } + + return tokens; +} + +function skipTrivia(source: string, position: number): number { + let p = position; + for (;;) { + while (p < source.length && isAsciiSpace(source.charCodeAt(p))) p++; + if (source.startsWith('--', p)) { + p += 2; + while (p < source.length && source[p] !== '\n') p++; + continue; + } + if (source.startsWith('/*', p)) { + p += 2; + while (p + 1 < source.length) { + if (source[p] === '*' && source[p + 1] === '/') { + p += 2; + break; + } + p++; + } + continue; + } + return p; + } +} + function foldComments(segments: ReadonlyArray): FoldedGroup[] { const output: { spans: Span[]; meaningful: Span }[] = []; let pending: Span[] = []; diff --git a/src/utils/sqlSplitter/tokenizer.ts b/src/utils/sqlSplitter/tokenizer.ts index 1e51358a..7d54cf80 100644 --- a/src/utils/sqlSplitter/tokenizer.ts +++ b/src/utils/sqlSplitter/tokenizer.ts @@ -81,6 +81,14 @@ export function scanToken( if (dollarToken) return dollarToken; } + if ( + options.qQuoting && + (ch === 'Q' || ch === 'q' || ch === 'N' || ch === 'n') + ) { + const qQuoteToken = scanQQuoted(source, position); + if (qQuoteToken) return qQuoteToken; + } + for (const rule of options.quotes) { if (source.startsWith(rule.open, position)) { return scanQuoted(source, position, rule); @@ -99,6 +107,10 @@ export function scanToken( const token = readGoDelimiter(source, position); if (token) return token; } + if (options.slashTerminator && ch === '/') { + const token = readSlashDelimiter(source, position); + if (token) return token; + } } return { kind: 'data', length: 1 }; @@ -200,6 +212,68 @@ function scanDollarQuoted(source: string, position: number): Token | null { return { kind: 'string', length: source.length - position }; } +function scanQQuoted(source: string, position: number): Token | null { + const prevIndex = position - 1; + if (isIdentBoundary(source, prevIndex)) return null; + + let prefixLength = 0; + const ch = source[position]; + const next = source[position + 1]; + const third = source[position + 2]; + if ((ch === 'Q' || ch === 'q') && next === "'") { + prefixLength = 2; + } else if ( + (ch === 'N' || ch === 'n') && + (next === 'Q' || next === 'q') && + third === "'" + ) { + prefixLength = 3; + } else { + return null; + } + + const delimiterPosition = position + prefixLength; + if (delimiterPosition >= source.length) return null; + const openCodePoint = source.codePointAt(delimiterPosition); + if (openCodePoint === undefined) return null; + + const openDelimiter = String.fromCodePoint(openCodePoint); + const openLength = openCodePoint > 0xffff ? 2 : 1; + const closeDelimiter = qQuoteCloseDelimiter(openDelimiter); + let p = delimiterPosition + openLength; + + while (p < source.length) { + if ( + source.startsWith(closeDelimiter, p) && + source[p + closeDelimiter.length] === "'" + ) { + return { + kind: 'string', + length: p + closeDelimiter.length + 1 - position, + }; + } + const codePoint = source.codePointAt(p); + p += codePoint !== undefined && codePoint > 0xffff ? 2 : 1; + } + + return { kind: 'string', length: source.length - position }; +} + +function qQuoteCloseDelimiter(openDelimiter: string): string { + switch (openDelimiter) { + case '[': + return ']'; + case '{': + return '}'; + case '(': + return ')'; + case '<': + return '>'; + default: + return openDelimiter; + } +} + function matchesKeyword( source: string, position: number, @@ -241,3 +315,13 @@ function readGoDelimiter(source: string, position: number): Token | null { const length = m[0].endsWith('\n') ? m[0].length - 1 : m[0].length; return { kind: 'goDelimiter', length }; } + +const SLASH_RE = /\/[ \t\r]*(\n|$)/y; + +function readSlashDelimiter(source: string, position: number): Token | null { + SLASH_RE.lastIndex = position; + const m = SLASH_RE.exec(source); + if (!m) return null; + const length = m[0].endsWith('\n') ? m[0].length - 1 : m[0].length; + return { kind: 'slashDelimiter', length }; +} diff --git a/tests/utils/sqlSplitter/splitter.test.ts b/tests/utils/sqlSplitter/splitter.test.ts index 60eaffa6..d554a39e 100644 --- a/tests/utils/sqlSplitter/splitter.test.ts +++ b/tests/utils/sqlSplitter/splitter.test.ts @@ -194,6 +194,225 @@ describe('splitStatements', () => { }); }); + describe('oracle slash terminator and PL/SQL blocks', () => { + it('splits plain SQL on a line-leading slash', () => { + const sql = ['SELECT 1 FROM dual', '/', 'SELECT 2 FROM dual', '/'].join( + '\n', + ); + expect(splitQueries(sql, 'oracle')).toEqual([ + 'SELECT 1 FROM dual', + 'SELECT 2 FROM dual', + ]); + }); + + it('keeps generic splitting unchanged for line-leading slash input', () => { + const sql = ['SELECT 1', '/', 'SELECT 2'].join('\n'); + expect(splitQueries(sql, 'generic')).toEqual([sql]); + }); + + it('keeps a DM trigger body together until the slash terminator', () => { + const sql = [ + 'CREATE OR REPLACE TRIGGER TRG_SPLITTER_VFY', + 'AFTER UPDATE OF STATUS, TOTAL_AMOUNT ON ORDERS', + 'FOR EACH ROW', + 'BEGIN', + ' INSERT INTO ORDER_AUDIT (', + ' ID,', + ' ORDER_ID,', + ' OLD_STATUS,', + ' NEW_STATUS', + ' ) VALUES (', + ' ORDER_AUDIT_SEQ.NEXTVAL,', + ' :OLD.ID,', + ' :OLD.STATUS,', + ' :NEW.STATUS', + ' );', + 'END;', + '/', + 'SELECT 1 FROM dual;', + ].join('\n'); + + const result = splitQueries(sql, 'oracle'); + expect(result).toHaveLength(2); + expect(result[0]).toContain('CREATE OR REPLACE TRIGGER'); + expect(result[0]).toContain('ORDER_AUDIT_SEQ.NEXTVAL'); + expect(result[0]).toContain('END;'); + expect(result[0]).not.toContain('\n/'); + expect(result[1]).toBe('SELECT 1 FROM dual'); + }); + + it('does not split nested BEGIN...END bodies on internal semicolons', () => { + const sql = [ + 'BEGIN', + ' IF TRUE THEN', + ' NULL;', + ' END IF;', + ' LOOP', + ' EXIT;', + ' END LOOP;', + 'END;', + '/', + ].join('\n'); + + const result = splitQueries(sql, 'oracle'); + expect(result).toHaveLength(1); + expect(result[0]).toContain('END IF;'); + expect(result[0]).toContain('END LOOP;'); + }); + + it('keeps package bodies with multiple procedures together', () => { + const sql = [ + 'CREATE OR REPLACE PACKAGE BODY pkg_demo AS', + ' PROCEDURE p1 IS', + ' BEGIN', + ' NULL;', + ' END;', + '', + ' PROCEDURE p2 IS', + ' BEGIN', + ' NULL;', + ' END;', + 'END pkg_demo;', + '/', + ].join('\n'); + + const result = splitQueries(sql, 'oracle'); + expect(result).toHaveLength(1); + expect(result[0]).toContain('PROCEDURE p1'); + expect(result[0]).toContain('PROCEDURE p2'); + }); + + it('keeps DM CREATE CLASS and CLASS BODY blocks together', () => { + const sql = [ + 'CREATE OR REPLACE CLASS CLS_SPLITTER_VFY AS', + ' STATIC FUNCTION NAME RETURN VARCHAR;', + 'END;', + '/', + 'CREATE OR REPLACE CLASS BODY CLS_SPLITTER_VFY AS', + ' STATIC FUNCTION NAME RETURN VARCHAR AS', + ' BEGIN', + " RETURN 'ok';", + ' END;', + 'END;', + '/', + ].join('\n'); + + const result = splitQueries(sql, 'oracle'); + expect(result).toHaveLength(2); + expect(result[0]).toContain('CREATE OR REPLACE CLASS'); + expect(result[1]).toContain('CREATE OR REPLACE CLASS BODY'); + }); + + it('keeps DM CREATE JAVA CLASS blocks together', () => { + const sql = [ + 'CREATE OR REPLACE JAVA CLASS JCLS_SPLITTER_VFY {', + ' public static String name() {', + ' return "a;b";', + ' }', + '}', + '/', + ].join('\n'); + + const result = splitQueries(sql, 'oracle'); + expect(result).toHaveLength(1); + expect(result[0]).toContain('return "a;b";'); + }); + + it('shields semicolons and slash lines inside q-quoted strings', () => { + const sql = [ + "SELECT q'[first;", + '/', + "second]' FROM dual", + '/', + 'SELECT 2 FROM dual', + ].join('\n'); + + const result = splitQueries(sql, 'oracle'); + expect(result).toHaveLength(2); + expect(result[0]).toContain("q'[first;"); + expect(result[0]).toContain("second]'"); + }); + + it('shields semicolons inside nq-quoted strings', () => { + const sql = "SELECT nq'{a;b}' FROM dual; SELECT 2 FROM dual"; + expect(splitQueries(sql, 'oracle')).toEqual([ + "SELECT nq'{a;b}' FROM dual", + 'SELECT 2 FROM dual', + ]); + }); + + it('handles comment-tolerant CREATE openers', () => { + const sql = [ + 'CREATE /* comment */ OR REPLACE PROCEDURE p_demo AS', + 'BEGIN', + ' NULL;', + 'END;', + '/', + ].join('\n'); + + expect(splitQueries(sql, 'oracle')).toHaveLength(1); + }); + + it('handles labeled blocks', () => { + const sql = ['<>', 'BEGIN', ' NULL;', 'END;', '/'].join( + '\n', + ); + const result = splitQueries(sql, 'oracle'); + expect(result).toHaveLength(1); + expect(result[0]).toContain('<>'); + }); + + it('treats WITH FUNCTION as a PL/SQL block but CTEs named function as normal SQL', () => { + const inlineFunction = [ + 'WITH FUNCTION f RETURN NUMBER IS', + 'BEGIN', + ' RETURN 1;', + 'END;', + 'SELECT f FROM dual', + '/', + ].join('\n'); + expect(splitQueries(inlineFunction, 'oracle')).toHaveLength(1); + + expect( + splitQueries('WITH function AS (SELECT 1 x FROM dual) SELECT x FROM function;', 'oracle'), + ).toHaveLength(1); + expect( + splitQueries( + 'WITH function (x) AS (SELECT 1 FROM dual) SELECT x FROM function;', + 'oracle', + ), + ).toHaveLength(1); + }); + + it('keeps a PL/SQL block together at EOF without a trailing slash', () => { + const sql = ['CREATE OR REPLACE PROCEDURE p_demo AS', 'BEGIN', ' NULL;', 'END;'].join( + '\n', + ); + const result = splitQueries(sql, 'oracle'); + expect(result).toHaveLength(1); + expect(result[0]).toContain('NULL;'); + }); + + it('keeps END semicolon ownership when the slash terminates the following whitespace segment', () => { + const sql = ['CREATE OR REPLACE PROCEDURE p_demo AS', 'BEGIN', ' NULL;', 'END;', '/'].join( + '\n', + ); + const [stmt] = splitStatements(sql, 'oracle'); + expect(stmt.text).toContain('END;'); + expect(sql.slice(stmt.range.start, stmt.range.end)).toContain('END;'); + }); + + it('documents the SQL*Plus-compatible division continuation tradeoff', () => { + const sql = ['SELECT 10', '/', '2 FROM dual'].join('\n'); + expect(splitQueries(sql, 'oracle')).toEqual(['SELECT 10', '2 FROM dual']); + }); + + it('does not split on a slash followed by more tokens on the same line', () => { + const sql = 'SELECT 10\n/2 FROM dual'; + expect(splitQueries(sql, 'oracle')).toEqual([sql]); + }); + }); + describe('mysql `--` requires trailing whitespace', () => { it('treats `1--1` as the subtraction operator, not a comment', () => { // MySQL parses `--` as subtraction unless followed by whitespace, diff --git a/tests/utils/sqlSplitter/tokenizer.test.ts b/tests/utils/sqlSplitter/tokenizer.test.ts index d4db9f6f..a283e1ea 100644 --- a/tests/utils/sqlSplitter/tokenizer.test.ts +++ b/tests/utils/sqlSplitter/tokenizer.test.ts @@ -5,6 +5,7 @@ import { dialectOptions } from '../../../src/utils/sqlSplitter'; const PG = dialectOptions('postgres'); const MY = dialectOptions('mysql'); const MS = dialectOptions('mssql'); +const OR = dialectOptions('oracle'); const freshState = (): TokenizerState => ({ delimiter: ';', lineLeading: true }); @@ -144,5 +145,42 @@ describe('scanToken', () => { const t = scanToken('DELIMITER //', 0, MY, state); expect(t?.kind).toBe('data'); }); + + it('recognises a line-leading slash terminator (oracle)', () => { + const src = '/\nSELECT 1'; + const t = scanToken(src, 0, OR, freshState()); + expect(t).toEqual({ kind: 'slashDelimiter', length: 1 }); + }); + + it('does not recognise a slash terminator mid-line', () => { + const state: TokenizerState = { delimiter: ';', lineLeading: false }; + const t = scanToken('/', 0, OR, state); + expect(t).toEqual({ kind: 'data', length: 1 }); + }); + + it('does not confuse a line-leading block comment with a slash terminator', () => { + const t = scanToken('/* comment */', 0, OR, freshState()); + expect(t?.kind).toBe('blockComment'); + }); + }); + + describe('oracle q-quoting', () => { + it('consumes q-quoted strings with paired delimiters', () => { + const src = "q'[it; has / inside]'"; + const t = scanToken(src, 0, OR, freshState()); + expect(t).toEqual({ kind: 'string', length: src.length }); + }); + + it('consumes nq-quoted strings', () => { + const src = "nq'{it; has / inside}'"; + const t = scanToken(src, 0, OR, freshState()); + expect(t).toEqual({ kind: 'string', length: src.length }); + }); + + it('does not start q-quoting after an identifier character', () => { + const src = "colq'[x]'"; + const t = scanToken(src, 3, OR, freshState()); + expect(t).toEqual({ kind: 'data', length: 1 }); + }); }); });