Skip to content

Commit 461fee8

Browse files
committed
Improve regex diagnostics
1 parent 9b45c0e commit 461fee8

File tree

1 file changed

+47
-28
lines changed

1 file changed

+47
-28
lines changed

src/DiagnosticCollection.ts

Lines changed: 47 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ async function Diagnostics(document: vscode.TextDocument) {
204204
diagnosticsMismatchingRootScopeName(diagnostics, rootNode, document),
205205
diagnosticsTreeSitterJSONErrors(diagnostics, rootNode),
206206
diagnosticsTreeSitterRegexErrors(diagnostics, trees),
207-
diagnosticsOnigurumaRegexErrors(diagnostics, trees),
207+
diagnosticsRegularExpressionErrors(diagnostics, trees),
208208
diagnosticsBrokenIncludes(diagnostics, rootNode),
209209
diagnosticsUnusedRepos(diagnostics, rootNode),
210210
diagnosticsLinguistCaptures(diagnostics, rootNode),
@@ -393,7 +393,7 @@ async function diagnosticsTreeSitterRegexErrors(diagnostics: vscode.Diagnostic[]
393393
// vscode.window.showInformationMessage(`Regex ${(performance.now() - start).toFixed(3)}ms`);
394394
}
395395

396-
async function diagnosticsOnigurumaRegexErrors(diagnostics: vscode.Diagnostic[], trees: trees) {
396+
async function diagnosticsRegularExpressionErrors(diagnostics: vscode.Diagnostic[], trees: trees) {
397397
// vscode.window.showInformationMessage(JSON.stringify("diagnostics Regex Oniguruma"));
398398
// const start = performance.now();
399399
const regexNodes = trees.regexNodes;
@@ -425,33 +425,33 @@ async function diagnosticsOnigurumaRegexErrors(diagnostics: vscode.Diagnostic[],
425425
groupCaptures = queryNode(beginRegex, captureGroupQuery);
426426
}
427427

428+
428429
let errorCodeOniguruma: string;
429430
try {
431+
// VSCode TextMate uses oniguruma
430432
let replacedRegex = regex;
431433
if (hasBackreferences) {
432434
// VSCode TextMate replaces the backreferences directly
433435
if (beginNode) {
434436
let index = 0;
435437
for (const groupCapture of groupCaptures) {
436-
const groupText = groupCapture.node.text.slice( // substring() doesn't work with -1
437-
groupCapture.name == 'name' ? groupCapture.node.firstNamedChild!.text.length + 4 : // remove `(?<name>`
438-
groupCapture.name == 'group' ? 1 : // remove `(`
439-
0,
440-
groupCapture.name == 'regex' ? undefined : -1, // remove `)`
441-
).replace(/[\-\\\{\}\*\+\?\|\^\$\.\,\[\]\(\)\#\s]/g, '\\$&');
438+
const groupText = extractCaptureGroupText(groupCapture).replaceAll(/[\-\\{}*+?|^$.,\[\]()#\s]/g, '\\$&');
442439
// https://github.com/microsoft/vscode-textmate/blob/main/src/utils.ts#L160
443440
// https://github.com/microsoft/vscode-textmate/issues/239
444441

445-
replacedRegex = replacedRegex.replace(
446-
// VSCode TextMate targets all backreferences /\\[0-9]+/
447-
// and doesn't correctly account for escaped backslashes
448-
new RegExp(`\\\\0*${index}(?![0-9])`, 'g'),
442+
replacedRegex = replacedRegex.replaceAll(
443+
// VSCode TextMate targets all escaped numbers /\\[0-9]+/
444+
// not correctly accounting for escaped backslashes
445+
new RegExp(
446+
/\\0*/.source + index + /(?![0-9])/.source,
447+
'g',
448+
),
449449
groupText,
450450
);
451451
index++;
452452
}
453453
}
454-
replacedRegex = replacedRegex.replace(/\\[0-9]+/g, ''); // All non-existent backreferences are removed
454+
replacedRegex = replacedRegex.replaceAll(/\\[0-9]+/g, ''); // All non-existent backreferences are removed
455455
}
456456

457457
const scanner = new vscodeOniguruma.OnigScanner([replacedRegex]) as OnigScanner;
@@ -460,29 +460,28 @@ async function diagnosticsOnigurumaRegexErrors(diagnostics: vscode.Diagnostic[],
460460

461461
scanner.dispose();
462462
} catch (error: any) {
463-
errorCodeOniguruma = error.toString();
463+
errorCodeOniguruma = error?.message || String(error);
464464
}
465465

466+
466467
let errorCodeOnigmo: string;
467468
try {
468-
// TextMate 2.0
469+
// TextMate 2.0 uses Onigmo
469470
let replacedRegex = regex;
470471
if (hasBackreferences) {
471472
if (beginNode) {
472473
let index = 0;
473474
for (const groupCapture of groupCaptures) {
474-
const groupText = groupCapture.node.text.slice( // substring() doesn't work with -1
475-
groupCapture.name == 'name' ? groupCapture.node.firstNamedChild!.text.length + 4 : // remove `(?<name>`
476-
groupCapture.name == 'group' ? 1 : // remove `(`
477-
0,
478-
groupCapture.name == 'regex' ? undefined : -1, // remove `)`
479-
).replace(/[\\|([{}\]).?*+^$]/g, '\\$&');
475+
const groupText = extractCaptureGroupText(groupCapture).replaceAll(/[\\|([{}\]).?*+^$]/g, '\\$&');
480476
// https://github.com/textmate/textmate/blob/master/Frameworks/parse/src/parse.cc#L120
481477

482-
replacedRegex = replacedRegex.replace(
478+
replacedRegex = replacedRegex.replaceAll(
483479
// TextMate 2.0 only targets single digit backreferences /\\[0-9]/
484480
// https://github.com/textmate/textmate/blob/master/Frameworks/parse/src/parse.cc#L136-L148
485-
new RegExp(`\\\\${index}|\\\\\\\\`, 'g'),
481+
new RegExp(
482+
/\\\\|\\/.source + index,
483+
'g',
484+
),
486485
(match: string): string => match === '\\\\' ? '\\\\' : groupText,
487486
);
488487
index++;
@@ -497,8 +496,9 @@ async function diagnosticsOnigurumaRegexErrors(diagnostics: vscode.Diagnostic[],
497496

498497
scanner.dispose();
499498
} catch (error: any) {
500-
errorCodeOnigmo = error.toString();
499+
errorCodeOnigmo = error?.message || String(error);
501500
}
501+
errorCodeOnigmo = errorCodeOnigmo?.replace(/^Error: /, '');
502502

503503

504504
let errorCodePCRE: string | undefined;
@@ -546,7 +546,7 @@ async function diagnosticsOnigurumaRegexErrors(diagnostics: vscode.Diagnostic[],
546546
);
547547
}
548548
} catch (error: any) {
549-
errorCodePCRE = error.toString();
549+
errorCodePCRE = error?.message || String(error);
550550
}
551551
errorCodePCRE = errorCodePCRE?.replace(/^PCRE compilation failed: /, '').replace(
552552
/\b\d+$/,
@@ -582,7 +582,7 @@ async function diagnosticsOnigurumaRegexErrors(diagnostics: vscode.Diagnostic[],
582582
};
583583
const jsRegex = onigurumaToES.toRegExpDetails(regex, options);
584584
} catch (error: any) {
585-
errorCodeES = error.toString();
585+
errorCodeES = error?.message || String(error);
586586
}
587587

588588

@@ -592,7 +592,7 @@ async function diagnosticsOnigurumaRegexErrors(diagnostics: vscode.Diagnostic[],
592592

593593
const range = toRange(key);
594594

595-
if (errorCodeOnigmo.replace(/^Error: /, '') === errorCodeOniguruma) {
595+
if (errorCodeOnigmo === errorCodeOniguruma) {
596596
diagnostics.push({
597597
range: range,
598598
message: errorCodeOniguruma,
@@ -621,7 +621,7 @@ async function diagnosticsOnigurumaRegexErrors(diagnostics: vscode.Diagnostic[],
621621
continue;
622622
}
623623

624-
if (errorCodeOniguruma != 'undefined error code') {
624+
if (errorCodeOniguruma !== 'undefined error code') {
625625
diagnostics.push({
626626
range: range,
627627
message: `Regex incompatible with VSCode TextMate (Oniguruma v6.9.8)\n${errorCodeOniguruma}`,
@@ -1034,3 +1034,22 @@ async function diagnosticsLinguistCaptures(diagnostics: vscode.Diagnostic[], roo
10341034

10351035
// vscode.window.showInformationMessage(`(captures) ${(performance.now() - start).toFixed(3)}ms`);
10361036
}
1037+
1038+
1039+
function extractCaptureGroupText(groupCapture: webTreeSitter.QueryCapture) {
1040+
const name = groupCapture.name;
1041+
const node = groupCapture.node;
1042+
const nodeText = node.text;
1043+
const groupText = nodeText.slice( // substring() doesn't work with -1
1044+
name === 'name' ?
1045+
node.firstNamedChild!.text.length + 4 : // remove `(?<name>`
1046+
name === 'group' ?
1047+
1 : // remove `(`
1048+
0, // start of regex
1049+
1050+
name === 'regex' ?
1051+
undefined : // end of regex
1052+
-1, // remove `)`
1053+
);
1054+
return JSONParseStringRelaxed(groupText);
1055+
}

0 commit comments

Comments
 (0)