Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 22 additions & 7 deletions src/utils/sqlSplitter/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ export type TokenKind =
| 'delimiter'
| 'setDelimiter'
| 'goDelimiter'
| 'slashDelimiter'
| 'data';

export interface Token {
Expand All @@ -68,6 +69,9 @@ export interface DialectOptions {
readonly dollarQuoting: boolean;
readonly customDelimiter: boolean;
readonly goDelimiter: boolean;
readonly slashTerminator: boolean;
readonly plsqlBlocks: boolean;
readonly qQuoting: boolean;
readonly lineComments: boolean;
readonly blockComments: boolean;
/**
Expand Down Expand Up @@ -106,6 +110,9 @@ const POSTGRES: DialectOptions = {
dollarQuoting: true,
customDelimiter: false,
goDelimiter: false,
slashTerminator: false,
plsqlBlocks: false,
qQuoting: false,
lineComments: true,
blockComments: true,
executableComments: false,
Expand All @@ -123,6 +130,9 @@ const MYSQL: DialectOptions = {
dollarQuoting: false,
customDelimiter: true,
goDelimiter: false,
slashTerminator: false,
plsqlBlocks: false,
qQuoting: false,
lineComments: true,
blockComments: true,
executableComments: true,
Expand All @@ -140,6 +150,9 @@ const MSSQL: DialectOptions = {
dollarQuoting: false,
customDelimiter: false,
goDelimiter: true,
slashTerminator: false,
plsqlBlocks: false,
qQuoting: false,
lineComments: true,
blockComments: true,
executableComments: false,
Expand All @@ -158,26 +171,25 @@ const SQLITE: DialectOptions = {
dollarQuoting: false,
customDelimiter: false,
goDelimiter: false,
slashTerminator: false,
plsqlBlocks: false,
qQuoting: false,
lineComments: true,
blockComments: true,
executableComments: false,
nestedBlockComments: false,
lineCommentRequiresSpace: false,
};

// Oracle's option shape currently matches GENERIC. They are kept as
// separate constants on purpose: once an Oracle-only feature lands
// (e.g. `/` block terminator, nested block comments via SQLPlus, the
// `Q'…'` quoted literal syntax), the divergence stays a one-line edit
// rather than a search across call sites. If you find yourself
// modifying both, prefer adding the flag to GENERIC only when it is
// truly dialect-agnostic.
const ORACLE: DialectOptions = {
quotes: STANDARD_QUOTES,
eString: false,
dollarQuoting: false,
customDelimiter: false,
goDelimiter: false,
slashTerminator: true,
plsqlBlocks: true,
qQuoting: true,
lineComments: true,
blockComments: true,
executableComments: false,
Expand All @@ -191,6 +203,9 @@ const GENERIC: DialectOptions = {
dollarQuoting: false,
customDelimiter: false,
goDelimiter: false,
slashTerminator: false,
plsqlBlocks: false,
qQuoting: false,
lineComments: true,
blockComments: true,
executableComments: false,
Expand Down
199 changes: 191 additions & 8 deletions src/utils/sqlSplitter/splitter.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
// Statement splitter. Two-pass:
// Statement splitter:
// 1. collectSegments — walk tokens, partition source by delimiter into
// raw segments with a hasMeaningful flag.
// 2. foldComments — apply fold rule:
// 2. foldBlocks — for Oracle-like dialects, merge PL/SQL source units
// until the next slash-terminated segment.
// 3. foldComments — apply fold rule:
// (a) leading comment-only segment(s) → prepended to NEXT meaningful;
// (b) trailing comment-only segment(s) → appended to PREVIOUS;
// (c) entirely comment-only input → drop, return [].
Expand All @@ -19,8 +21,16 @@ interface RawSegment {
readonly start: number;
readonly end: number;
readonly hasMeaningful: boolean;
readonly terminator: SegmentTerminator;
}

type SegmentTerminator =
| 'delimiter'
| 'goDelimiter'
| 'slashDelimiter'
| 'setDelimiter'
| 'eof';

interface Span {
readonly start: number;
readonly end: number;
Expand All @@ -34,7 +44,8 @@ interface FoldedGroup {
export function splitInto(sql: string, options: DialectOptions): Statement[] {
if (sql.length === 0) return [];
const segments = collectSegments(sql, options);
const folded = foldComments(segments);
const foldedBlocks = options.plsqlBlocks ? foldBlocks(sql, segments) : segments;
const folded = foldComments(foldedBlocks);
return folded.map((group) => buildStatement(sql, group));
}

Expand All @@ -45,9 +56,9 @@ function collectSegments(sql: string, options: DialectOptions): RawSegment[] {
let hasMeaningful = false;
let position = 0;

const pushSegment = (end: number): void => {
const pushSegment = (end: number, terminator: SegmentTerminator): void => {
if (end <= segStart) return;
segments.push({ start: segStart, end, hasMeaningful });
segments.push({ start: segStart, end, hasMeaningful, terminator });
};

while (position < sql.length) {
Expand All @@ -57,14 +68,15 @@ function collectSegments(sql: string, options: DialectOptions): RawSegment[] {
switch (token.kind) {
case 'delimiter':
case 'goDelimiter':
pushSegment(position);
case 'slashDelimiter':
pushSegment(position, token.kind);
position += token.length;
segStart = position;
hasMeaningful = false;
state.lineLeading = false;
break;
case 'setDelimiter':
pushSegment(position);
pushSegment(position, token.kind);
position += token.length;
segStart = position;
hasMeaningful = false;
Expand Down Expand Up @@ -92,10 +104,181 @@ function collectSegments(sql: string, options: DialectOptions): RawSegment[] {
}
}

pushSegment(sql.length);
pushSegment(sql.length, 'eof');
return segments;
}

function foldBlocks(
sql: string,
segments: ReadonlyArray<RawSegment>,
): RawSegment[] {
const output: RawSegment[] = [];
let index = 0;

while (index < segments.length) {
const segment = segments[index];
if (!segment.hasMeaningful || !isPlsqlBlockOpener(sql, segment)) {
output.push(segment);
index++;
continue;
}

let endIndex = index;
while (
endIndex < segments.length &&
segments[endIndex].terminator !== 'slashDelimiter'
) {
endIndex++;
}

if (endIndex >= segments.length) {
endIndex = segments.length - 1;
}

const endSegment = segments[endIndex];
output.push({
start: segment.start,
end: endSegment.end,
hasMeaningful: true,
terminator: endSegment.terminator,
});
index = endIndex + 1;
}

return output;
}

function isPlsqlBlockOpener(sql: string, segment: RawSegment): boolean {
const tokens = leadingSignificantTokens(sql.slice(segment.start, segment.end));
let index = 0;
while (tokens[index] === '<<') {
const labelEnd = tokens.indexOf('>>', index + 1);
if (labelEnd === -1) break;
index = labelEnd + 1;
}

const first = tokens[index];
if (first === 'DECLARE' || first === 'BEGIN') return true;
if (first === 'WITH') return isWithBlockOpener(tokens, index + 1);
if (first !== 'CREATE') return false;
return isCreateBlockOpener(tokens, index + 1);
}

function isWithBlockOpener(
tokens: ReadonlyArray<string>,
index: number,
): boolean {
const kind = tokens[index];
if (kind !== 'FUNCTION' && kind !== 'PROCEDURE') return false;
const next = tokens[index + 1];
return next !== 'AS' && next !== '(';
}

function isCreateBlockOpener(
tokens: ReadonlyArray<string>,
startIndex: number,
): boolean {
let index = startIndex;
if (tokens[index] === 'OR' && tokens[index + 1] === 'REPLACE') {
index += 2;
}
if (
tokens[index] === 'EDITIONABLE' ||
tokens[index] === 'NONEDITIONABLE'
) {
index++;
}
if (tokens[index] === 'AND' && isJavaCompileOption(tokens[index + 1])) {
index += 2;
}

const kind = tokens[index];
const next = tokens[index + 1];
switch (kind) {
case 'FUNCTION':
case 'PROCEDURE':
case 'TRIGGER':
case 'LIBRARY':
return true;
case 'PACKAGE':
case 'TYPE':
case 'CLASS':
return next === 'BODY' || next !== undefined;
case 'JAVA':
return next === 'SOURCE' || next === 'CLASS';
default:
return false;
}
}

function isJavaCompileOption(token: string | undefined): boolean {
return token === 'COMPILE' || token === 'RESOLVE';
}

function leadingSignificantTokens(source: string): string[] {
const tokens: string[] = [];
let position = 0;

while (position < source.length && tokens.length < 12) {
const next = skipTrivia(source, position);
if (next >= source.length) break;
position = next;

if (source.startsWith('<<', position)) {
tokens.push('<<');
position += 2;
continue;
}
if (source.startsWith('>>', position)) {
tokens.push('>>');
position += 2;
continue;
}

const ch = source[position];
if (ch === '(') {
tokens.push('(');
position++;
continue;
}

const word = /^[A-Za-z_][A-Za-z0-9_$#]*/.exec(source.slice(position));
if (word) {
tokens.push(word[0].toUpperCase());
position += word[0].length;
continue;
}

position++;
}

return tokens;
}

function skipTrivia(source: string, position: number): number {
let p = position;
for (;;) {
while (p < source.length && isAsciiSpace(source.charCodeAt(p))) p++;
if (source.startsWith('--', p)) {
p += 2;
while (p < source.length && source[p] !== '\n') p++;
continue;
}
if (source.startsWith('/*', p)) {
p += 2;
while (p + 1 < source.length) {
if (source[p] === '*' && source[p + 1] === '/') {
p += 2;
break;
}
p++;
}
continue;
}
return p;
}
}

function foldComments(segments: ReadonlyArray<RawSegment>): FoldedGroup[] {
const output: { spans: Span[]; meaningful: Span }[] = [];
let pending: Span[] = [];
Expand Down
Loading