diff --git a/src/core/registry.js b/src/core/registry.js index 724246b65..e22ffd309 100644 --- a/src/core/registry.js +++ b/src/core/registry.js @@ -1,8 +1,8 @@ import { kebabToCamelCase } from '../shared/util.js'; -import { cloneGrammar } from '../util/extend.js'; +import { extend } from '../util/extend.js'; +import { grammarPatch } from '../util/grammar-patch.js'; import { forEach, toArray } from '../util/iterables.js'; -import { extend } from '../util/language-util.js'; -import { defineLazyProperty } from '../util/objects.js'; +import { deepClone, defineLazyProperty } from '../util/objects.js'; /** * TODO: docs @@ -221,7 +221,7 @@ export class Registry { const base = entry?.proto.base; // We need this so that any code modifying the base grammar doesn't affect other instances - const baseGrammar = base && cloneGrammar(required(base.id), base.id); + const baseGrammar = base && deepClone(required(base.id)); const requiredLanguages = toArray( /** @type {LanguageProto | LanguageProto[] | undefined} */ (entry?.proto.require) @@ -240,7 +240,7 @@ export class Registry { else { const options = { getOptionalLanguage: id => this.getLanguage(id), - extend: (id, ref) => extend(required(id), id, ref), + extend: (id, ref) => extend(required(id), ref), ...(baseGrammar && { base: baseGrammar }), ...(requiredLanguages.length && { languages }), }; @@ -249,10 +249,10 @@ export class Registry { } if (baseGrammar) { - evaluatedGrammar = extend(baseGrammar, base.id, evaluatedGrammar); + evaluatedGrammar = extend(baseGrammar, evaluatedGrammar); } - return (entry.evaluatedGrammar = evaluatedGrammar); + return (entry.evaluatedGrammar = grammarPatch(evaluatedGrammar)); } } diff --git a/src/core/tokenize/tokenize.js b/src/core/tokenize/tokenize.js index a55644875..3bf23dd42 100644 --- a/src/core/tokenize/tokenize.js +++ b/src/core/tokenize/tokenize.js @@ -42,7 +42,14 @@ export function tokenize (text, grammar) { const tokenList = new LinkedList(); tokenList.addAfter(tokenList.head, text); - _matchGrammar.call(prism, text, tokenList, grammar, tokenList.head, 0); + _matchGrammar.call( + prism, + text, + tokenList, + /** @type {GrammarTokens} */ (grammar), + tokenList.head, + 0 + ); return tokenList.toArray(); } @@ -50,5 +57,6 @@ export function tokenize (text, grammar) { /** * @typedef {import('../../types.d.ts').TokenStream} TokenStream * @typedef {import('../../types.d.ts').Grammar} Grammar + * @typedef {import('../../types.d.ts').GrammarTokens} GrammarTokens * @typedef {import('../prism.js').Prism} Prism */ diff --git a/src/languages/c.js b/src/languages/c.js index 25ec15677..9b96dcdeb 100644 --- a/src/languages/c.js +++ b/src/languages/c.js @@ -73,7 +73,7 @@ export default { /* OpenCL host API */ const extensions = getOptionalLanguage('opencl-extensions'); if (extensions) { - insertBefore(base, 'keyword', extensions); + insertBefore(base, 'keyword', /** @type {GrammarTokens} */ (extensions)); delete base['type-opencl-host-cpp']; } @@ -104,4 +104,5 @@ export default { /** * @typedef {import('../types.d.ts').GrammarToken} GrammarToken + * @typedef {import('../types.d.ts').GrammarTokens} GrammarTokens */ diff --git a/src/languages/chaiscript.js b/src/languages/chaiscript.js index 12f031101..8effefa2c 100644 --- a/src/languages/chaiscript.js +++ b/src/languages/chaiscript.js @@ -1,5 +1,4 @@ import { toArray } from '../util/iterables.js'; -import { insertBefore } from '../util/language-util.js'; import clike from './clike.js'; import cpp from './cpp.js'; @@ -8,44 +7,7 @@ export default { id: 'chaiscript', base: clike, require: cpp, - grammar ({ base, languages }) { - insertBefore(base, 'operator', { - 'parameter-type': { - // e.g. def foo(int x, Vector y) {...} - pattern: /([,(]\s*)\w+(?=\s+\w)/, - lookbehind: true, - alias: 'class-name', - }, - }); - - insertBefore(base, 'string', { - 'string-interpolation': { - pattern: - /(^|[^\\])"(?:[^"$\\]|\\[\s\S]|\$(?!\{)|\$\{(?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*\})*"/, - lookbehind: true, - greedy: true, - inside: { - 'interpolation': { - pattern: - /((?:^|[^\\])(?:\\{2})*)\$\{(?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*\}/, - lookbehind: true, - inside: { - 'interpolation-expression': { - pattern: /(^\$\{)[\s\S]+(?=\}$)/, - lookbehind: true, - inside: 'chaiscript', - }, - 'interpolation-punctuation': { - pattern: /^\$\{|\}$/, - alias: 'punctuation', - }, - }, - }, - 'string': /[\s\S]+/, - }, - }, - }); - + grammar ({ languages }) { return { 'string': { pattern: /(^|[^\\])'(?:[^'\\]|\\[\s\S])*'/, @@ -66,8 +28,50 @@ export default { ], 'keyword': /\b(?:attr|auto|break|case|catch|class|continue|def|default|else|finally|for|fun|global|if|return|switch|this|try|var|while)\b/, - 'number': [...toArray(languages.cpp.number), /\b(?:Infinity|NaN)\b/], + 'number': [ + ...toArray( + /** @type {import('../types.d.ts').GrammarTokens} */ (languages.cpp).number + ), + /\b(?:Infinity|NaN)\b/, + ], 'operator': />>=?|<<=?|\|\||&&|:[:=]?|--|\+\+|[=!<>+\-*/%|&^]=?|[?~]|`[^`\r\n]{1,4}`/, + $insertBefore: { + 'operator': { + 'parameter-type': { + // e.g. def foo(int x, Vector y) {...} + pattern: /([,(]\s*)\w+(?=\s+\w)/, + lookbehind: true, + alias: 'class-name', + }, + }, + 'string': { + 'string-interpolation': { + pattern: + /(^|[^\\])"(?:[^"$\\]|\\[\s\S]|\$(?!\{)|\$\{(?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*\})*"/, + lookbehind: true, + greedy: true, + inside: { + 'interpolation': { + pattern: + /((?:^|[^\\])(?:\\{2})*)\$\{(?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*\}/, + lookbehind: true, + inside: { + 'interpolation-expression': { + pattern: /(^\$\{)[\s\S]+(?=\}$)/, + lookbehind: true, + inside: 'chaiscript', + }, + 'interpolation-punctuation': { + pattern: /^\$\{|\}$/, + alias: 'punctuation', + }, + }, + }, + 'string': /[\s\S]+/, + }, + }, + }, + }, }; }, }; diff --git a/src/languages/cpp.js b/src/languages/cpp.js index 77a3bf620..0cfe7057e 100644 --- a/src/languages/cpp.js +++ b/src/languages/cpp.js @@ -103,7 +103,11 @@ export default { /* OpenCL host API */ const extensions = getOptionalLanguage('opencl-extensions'); if (extensions) { - insertBefore(cpp, 'keyword', extensions); + insertBefore( + cpp, + 'keyword', + /** @type {import('../types.d.ts').GrammarTokens} */ (extensions) + ); } const baseInside = { ...cpp }; diff --git a/src/languages/crystal.js b/src/languages/crystal.js index 1b7782f30..cf21ac988 100644 --- a/src/languages/crystal.js +++ b/src/languages/crystal.js @@ -1,5 +1,4 @@ import { toArray } from '../util/iterables.js'; -import { insertBefore } from '../util/language-util.js'; import ruby from './ruby.js'; /** @type {import('../types.d.ts').LanguageProto<'crystal'>} */ @@ -7,45 +6,6 @@ export default { id: 'crystal', base: ruby, grammar ({ base }) { - insertBefore(base, 'string-literal', { - 'attribute': { - pattern: /@\[.*?\]/, - inside: { - 'delimiter': { - pattern: /^@\[|\]$/, - alias: 'punctuation', - }, - 'attribute': { - pattern: /^(\s*)\w+/, - lookbehind: true, - alias: 'class-name', - }, - 'args': { - pattern: /\S(?:[\s\S]*\S)?/, - inside: 'crystal', - }, - }, - }, - 'expansion': { - pattern: /\{(?:\{.*?\}|%.*?%)\}/, - inside: { - 'content': { - pattern: /^(\{.)[\s\S]+(?=.\}$)/, - lookbehind: true, - inside: 'crystal', - }, - 'delimiter': { - pattern: /^\{[\{%]|[\}%]\}$/, - alias: 'operator', - }, - }, - }, - 'char': { - pattern: /'(?:[^\\\r\n]{1,2}|\\(?:.|u(?:[A-Fa-f0-9]{1,4}|\{[A-Fa-f0-9]{1,6}\})))'/, - greedy: true, - }, - }); - return { 'keyword': [ /\b(?:__DIR__|__END_LINE__|__FILE__|__LINE__|abstract|alias|annotation|as|asm|begin|break|case|class|def|do|else|elsif|end|ensure|enum|extend|for|fun|if|ifdef|include|instance_sizeof|lib|macro|module|next|of|out|pointerof|private|protected|ptr|require|rescue|return|select|self|sizeof|struct|super|then|type|typeof|undef|uninitialized|union|unless|until|when|while|with|yield)\b/, @@ -56,8 +16,52 @@ export default { ], 'number': /\b(?:0b[01_]*[01]|0o[0-7_]*[0-7]|0x[\da-fA-F_]*[\da-fA-F]|(?:\d(?:[\d_]*\d)?)(?:\.[\d_]*\d)?(?:[eE][+-]?[\d_]*\d)?)(?:_(?:[uif](?:8|16|32|64))?)?\b/, - 'operator': [/->/, ...toArray(base.operator)], + 'operator': [ + /->/, + ...toArray(/** @type {import('../types.d.ts').GrammarTokens} */ (base).operator), + ], 'punctuation': /[(){}[\].,;\\]/, + $insertBefore: { + 'string-literal': { + 'attribute': { + pattern: /@\[.*?\]/, + inside: { + 'delimiter': { + pattern: /^@\[|\]$/, + alias: 'punctuation', + }, + 'attribute': { + pattern: /^(\s*)\w+/, + lookbehind: true, + alias: 'class-name', + }, + 'args': { + pattern: /\S(?:[\s\S]*\S)?/, + inside: 'crystal', + }, + }, + }, + 'expansion': { + pattern: /\{(?:\{.*?\}|%.*?%)\}/, + inside: { + 'content': { + pattern: /^(\{.)[\s\S]+(?=.\}$)/, + lookbehind: true, + inside: 'crystal', + }, + 'delimiter': { + pattern: /^\{[\{%]|[\}%]\}$/, + alias: 'operator', + }, + }, + }, + 'char': { + pattern: + /'(?:[^\\\r\n]{1,2}|\\(?:.|u(?:[A-Fa-f0-9]{1,4}|\{[A-Fa-f0-9]{1,6}\})))'/, + greedy: true, + }, + }, + }, }; }, }; diff --git a/src/languages/csharp.js b/src/languages/csharp.js index 9752901ee..e5bdb300e 100644 --- a/src/languages/csharp.js +++ b/src/languages/csharp.js @@ -1,4 +1,3 @@ -import { insertBefore } from '../util/language-util.js'; import clike from './clike.js'; /** @@ -36,7 +35,7 @@ export default { base: clike, optional: 'xml-doc', alias: ['cs', 'dotnet'], - grammar ({ base, getOptionalLanguage }) { + grammar ({ getOptionalLanguage }) { // https://docs.microsoft.com/en-us/dotnet/csharp/language-reference/keywords/ const keywordKinds = { // keywords which represent a return or variable type @@ -112,133 +111,6 @@ export default { const regularString = /"(?:\\.|[^\\"\r\n])*"/.source; const verbatimString = /@"(?:""|\\[\s\S]|[^\\"])*"(?!")/.source; - insertBefore(base, 'number', { - 'range': { - pattern: /\.\./, - alias: 'operator', - }, - }); - - insertBefore(base, 'punctuation', { - 'named-parameter': { - pattern: re(/([(,]\s*)<<0>>(?=\s*:)/.source, [name]), - lookbehind: true, - alias: 'punctuation', - }, - }); - - insertBefore(base, 'class-name', { - 'namespace': { - // namespace Foo.Bar {} - // using Foo.Bar; - pattern: re(/(\b(?:namespace|using)\s+)<<0>>(?:\s*\.\s*<<0>>)*(?=\s*[;{])/.source, [ - name, - ]), - lookbehind: true, - inside: { - 'punctuation': /\./, - }, - }, - 'type-expression': { - // default(Foo), typeof(Foo), sizeof(int) - pattern: re( - /(\b(?:default|sizeof|typeof)\s*\(\s*(?!\s))(?:[^()\s]|\s(?!\s)|<<0>>)*(?=\s*\))/ - .source, - [nestedRound] - ), - lookbehind: true, - alias: 'class-name', - inside: typeInside, - }, - 'return-type': { - // Foo ForBar(); Foo IFoo.Bar() => 0 - // int this[int index] => 0; T IReadOnlyList.this[int index] => this[index]; - // int Foo => 0; int Foo { get; set } = 0; - pattern: re(/<<0>>(?=\s+(?:<<1>>\s*(?:=>|[({]|\.\s*this\s*\[)|this\s*\[))/.source, [ - typeExpression, - identifier, - ]), - inside: typeInside, - alias: 'class-name', - }, - 'constructor-invocation': { - // new List> { } - pattern: re(/(\bnew\s+)<<0>>(?=\s*[[({])/.source, [typeExpression]), - lookbehind: true, - inside: typeInside, - alias: 'class-name', - }, - /*'explicit-implementation': { - // int IFoo.Bar => 0; void IFoo>.Foo(); - pattern: replace(/\b<<0>>(?=\.<<1>>)/, className, methodOrPropertyDeclaration), - inside: classNameInside, - alias: 'class-name' - },*/ - 'generic-method': { - // foo() - pattern: re(/<<0>>\s*<<1>>(?=\s*\()/.source, [name, generic]), - inside: { - 'function': re(/^<<0>>/.source, [name]), - 'generic': { - pattern: RegExp(generic), - alias: 'class-name', - inside: typeInside, - }, - }, - }, - 'type-list': { - // The list of types inherited or of generic constraints - // class Foo : Bar, IList - // where F : Bar, IList - pattern: re( - /\b((?:<<0>>\s+<<1>>|record\s+<<1>>\s*<<5>>|where\s+<<2>>)\s*:\s*)(?:<<3>>|<<4>>|<<1>>\s*<<5>>|<<6>>)(?:\s*,\s*(?:<<3>>|<<4>>|<<6>>))*(?=\s*(?:where|[{;]|=>|$))/ - .source, - [ - typeDeclarationKeywords, - genericName, - name, - typeExpression, - keywords.source, - nestedRound, - /\bnew\s*\(\s*\)/.source, - ] - ), - lookbehind: true, - inside: { - 'record-arguments': { - pattern: re(/(^(?!new\s*\()<<0>>\s*)<<1>>/.source, [ - genericName, - nestedRound, - ]), - lookbehind: true, - greedy: true, - inside: 'csharp', - }, - 'keyword': keywords, - 'class-name': { - pattern: RegExp(typeExpression), - greedy: true, - inside: typeInside, - }, - 'punctuation': /[,()]/, - }, - }, - 'preprocessor': { - pattern: /(^[\t ]*)#.*/m, - lookbehind: true, - alias: 'property', - inside: { - // highlight preprocessor directives as keywords - 'directive': { - pattern: - /(#)\b(?:define|elif|else|endif|endregion|error|if|line|nullable|pragma|region|undef|warning)\b/, - lookbehind: true, - alias: 'keyword', - }, - }, - }, - }); - // attributes const regularStringOrCharacter = regularString + '|' + character; const regularStringCharacterOrComment = replace( @@ -255,37 +127,6 @@ export default { .source; const attr = replace(/<<0>>(?:\s*\(<<1>>*\))?/.source, [identifier, roundExpression]); - insertBefore(base, 'class-name', { - 'attribute': { - // Attributes - // [Foo], [Foo(1), Bar(2, Prop = "foo")], [return: Foo(1), Bar(2)], [assembly: Foo(Bar)] - pattern: re( - /((?:^|[^\s\w>)?])\s*\[\s*)(?:<<0>>\s*:\s*)?<<1>>(?:\s*,\s*<<1>>)*(?=\s*\])/ - .source, - [attrTarget, attr] - ), - lookbehind: true, - greedy: true, - inside: { - 'target': { - pattern: re(/^<<0>>(?=\s*:)/.source, [attrTarget]), - alias: 'keyword', - }, - 'attribute-arguments': { - pattern: re(/\(<<0>>*\)/.source, [roundExpression]), - inside: 'csharp', - }, - 'class-name': { - pattern: RegExp(identifier), - inside: { - 'punctuation': /\./, - }, - }, - 'punctuation': /[:,]/, - }, - }, - }); - // string interpolation const formatString = /:[^}\r\n]+/.source; // multi line @@ -337,36 +178,6 @@ export default { }; } - insertBefore(base, 'string', { - 'interpolation-string': [ - { - pattern: re( - /(^|[^\\])(?:\$@|@\$)"(?:""|\\[\s\S]|\{\{|<<0>>|[^\\{"])*"/.source, - [mInterpolation] - ), - lookbehind: true, - greedy: true, - inside: createInterpolationInside(mInterpolation, mInterpolationRound), - }, - { - pattern: re(/(^|[^@\\])\$"(?:\\.|\{\{|<<0>>|[^\\"{])*"/.source, [ - sInterpolation, - ]), - lookbehind: true, - greedy: true, - inside: createInterpolationInside(sInterpolation, sInterpolationRound), - }, - ], - 'char': { - pattern: RegExp(character), - greedy: true, - }, - }); - - insertBefore(base, 'comment', { - 'doc-comment': getOptionalLanguage('xml-doc')?.slash, - }); - return { 'string': [ { @@ -453,6 +264,191 @@ export default { /(?:\b0(?:x[\da-f_]*[\da-f]|b[01_]*[01])|(?:\B\.\d+(?:_+\d+)*|\b\d+(?:_+\d+)*(?:\.\d+(?:_+\d+)*)?)(?:e[-+]?\d+(?:_+\d+)*)?)(?:[dflmu]|lu|ul)?\b/i, 'operator': />>=?|<<=?|[-=]>|([-+&|])\1|~|\?\?=?|[-+*/%&|^!=<>]=?/, 'punctuation': /\?\.?|::|[{}[\];(),.:]/, + $insertBefore: { + 'number': { + 'range': { + pattern: /\.\./, + alias: 'operator', + }, + }, + 'punctuation': { + 'named-parameter': { + pattern: re(/([(,]\s*)<<0>>(?=\s*:)/.source, [name]), + lookbehind: true, + alias: 'punctuation', + }, + }, + 'class-name': { + 'namespace': { + // namespace Foo.Bar {} + // using Foo.Bar; + pattern: re( + /(\b(?:namespace|using)\s+)<<0>>(?:\s*\.\s*<<0>>)*(?=\s*[;{])/.source, + [name] + ), + lookbehind: true, + inside: { + 'punctuation': /\./, + }, + }, + 'type-expression': { + // default(Foo), typeof(Foo), sizeof(int) + pattern: re( + /(\b(?:default|sizeof|typeof)\s*\(\s*(?!\s))(?:[^()\s]|\s(?!\s)|<<0>>)*(?=\s*\))/ + .source, + [nestedRound] + ), + lookbehind: true, + alias: 'class-name', + inside: typeInside, + }, + 'return-type': { + // Foo ForBar(); Foo IFoo.Bar() => 0 + // int this[int index] => 0; T IReadOnlyList.this[int index] => this[index]; + // int Foo => 0; int Foo { get; set } = 0; + pattern: re( + /<<0>>(?=\s+(?:<<1>>\s*(?:=>|[({]|\.\s*this\s*\[)|this\s*\[))/.source, + [typeExpression, identifier] + ), + inside: typeInside, + alias: 'class-name', + }, + 'constructor-invocation': { + // new List> { } + pattern: re(/(\bnew\s+)<<0>>(?=\s*[[({])/.source, [typeExpression]), + lookbehind: true, + inside: typeInside, + alias: 'class-name', + }, + /*'explicit-implementation': { + // int IFoo.Bar => 0; void IFoo>.Foo(); + pattern: replace(/\b<<0>>(?=\.<<1>>)/, className, methodOrPropertyDeclaration), + inside: classNameInside, + alias: 'class-name' + },*/ + 'generic-method': { + // foo() + pattern: re(/<<0>>\s*<<1>>(?=\s*\()/.source, [name, generic]), + inside: { + 'function': re(/^<<0>>/.source, [name]), + 'generic': { + pattern: RegExp(generic), + alias: 'class-name', + inside: typeInside, + }, + }, + }, + 'type-list': { + // The list of types inherited or of generic constraints + // class Foo : Bar, IList + // where F : Bar, IList + pattern: re( + /\b((?:<<0>>\s+<<1>>|record\s+<<1>>\s*<<5>>|where\s+<<2>>)\s*:\s*)(?:<<3>>|<<4>>|<<1>>\s*<<5>>|<<6>>)(?:\s*,\s*(?:<<3>>|<<4>>|<<6>>))*(?=\s*(?:where|[{;]|=>|$))/ + .source, + [ + typeDeclarationKeywords, + genericName, + name, + typeExpression, + keywords.source, + nestedRound, + /\bnew\s*\(\s*\)/.source, + ] + ), + lookbehind: true, + inside: { + 'record-arguments': { + pattern: re(/(^(?!new\s*\()<<0>>\s*)<<1>>/.source, [ + genericName, + nestedRound, + ]), + lookbehind: true, + greedy: true, + inside: 'csharp', + }, + 'keyword': keywords, + 'class-name': { + pattern: RegExp(typeExpression), + greedy: true, + inside: typeInside, + }, + 'punctuation': /[,()]/, + }, + }, + 'preprocessor': { + pattern: /(^[\t ]*)#.*/m, + lookbehind: true, + alias: 'property', + inside: { + // highlight preprocessor directives as keywords + 'directive': { + pattern: + /(#)\b(?:define|elif|else|endif|endregion|error|if|line|nullable|pragma|region|undef|warning)\b/, + lookbehind: true, + alias: 'keyword', + }, + }, + }, + 'attribute': { + // Attributes + // [Foo], [Foo(1), Bar(2, Prop = "foo")], [return: Foo(1), Bar(2)], [assembly: Foo(Bar)] + pattern: re( + /((?:^|[^\s\w>)?])\s*\[\s*)(?:<<0>>\s*:\s*)?<<1>>(?:\s*,\s*<<1>>)*(?=\s*\])/ + .source, + [attrTarget, attr] + ), + lookbehind: true, + greedy: true, + inside: { + 'target': { + pattern: re(/^<<0>>(?=\s*:)/.source, [attrTarget]), + alias: 'keyword', + }, + 'attribute-arguments': { + pattern: re(/\(<<0>>*\)/.source, [roundExpression]), + inside: 'csharp', + }, + 'class-name': { + pattern: RegExp(identifier), + inside: { + 'punctuation': /\./, + }, + }, + 'punctuation': /[:,]/, + }, + }, + }, + 'string': { + 'interpolation-string': [ + { + pattern: re( + /(^|[^\\])(?:\$@|@\$)"(?:""|\\[\s\S]|\{\{|<<0>>|[^\\{"])*"/.source, + [mInterpolation] + ), + lookbehind: true, + greedy: true, + inside: createInterpolationInside(mInterpolation, mInterpolationRound), + }, + { + pattern: re(/(^|[^@\\])\$"(?:\\.|\{\{|<<0>>|[^\\"{])*"/.source, [ + sInterpolation, + ]), + lookbehind: true, + greedy: true, + inside: createInterpolationInside(sInterpolation, sInterpolationRound), + }, + ], + 'char': { + pattern: RegExp(character), + greedy: true, + }, + }, + 'comment': { + 'doc-comment': /** @type {import('../types.d.ts').GrammarTokens} */ ( + getOptionalLanguage('xml-doc') + )?.slash, + }, + }, }; }, }; diff --git a/src/languages/css.js b/src/languages/css.js index c053c21da..c70e8c1de 100644 --- a/src/languages/css.js +++ b/src/languages/css.js @@ -89,7 +89,11 @@ export default { const extras = getOptionalLanguage('css-extras'); if (extras) { - insertBefore(css, 'function', extras); + insertBefore( + css, + 'function', + /** @type {import('../types.d.ts').GrammarTokens} */ (extras) + ); } return css; diff --git a/src/languages/flow.js b/src/languages/flow.js index e77ed7365..92dcd603b 100644 --- a/src/languages/flow.js +++ b/src/languages/flow.js @@ -1,5 +1,4 @@ import { toArray } from '../util/iterables.js'; -import { insertBefore } from '../util/language-util.js'; import javascript from './javascript.js'; /** @type {import('../types.d.ts').LanguageProto<'flow'>} */ @@ -7,29 +6,12 @@ export default { id: 'flow', base: javascript, grammar ({ base }) { - insertBefore(base, 'keyword', { - 'type': { - pattern: - /\b(?:[Bb]oolean|Function|[Nn]umber|[Ss]tring|[Ss]ymbol|any|mixed|null|void)\b/, - alias: 'class-name', - }, - }); - - insertBefore(base, 'operator', { - 'flow-punctuation': { - pattern: /\{\||\|\}/, - alias: 'punctuation', - }, - }); - const fnVariable = /** @type {import('../types.d.ts').GrammarToken} */ ( base['function-variable'] ); fnVariable.pattern = /(?!\s)[_$a-z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*(?=\s*=\s*(?:function\b|(?:\([^()]*\)(?:\s*:\s*\w+)?|(?!\s)[_$a-z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*)\s*=>))/i; - delete base['parameter']; - return { 'keyword': [ { @@ -41,8 +23,24 @@ export default { /(^|[^$]\B)\$(?:Diff|Enum|Exact|Keys|ObjMap|PropertyType|Record|Shape|Subtype|Supertype|await)\b(?!\$)/, lookbehind: true, }, - ...toArray(base.keyword), + ...toArray(/** @type {import('../types.d.ts').GrammarTokens} */ (base).keyword), ], + $insertBefore: { + 'keyword': { + 'type': { + pattern: + /\b(?:[Bb]oolean|Function|[Nn]umber|[Ss]tring|[Ss]ymbol|any|mixed|null|void)\b/, + alias: 'class-name', + }, + }, + 'operator': { + 'flow-punctuation': { + pattern: /\{\||\|\}/, + alias: 'punctuation', + }, + }, + }, + $delete: ['parameter'], }; }, }; diff --git a/src/languages/fsharp.js b/src/languages/fsharp.js index af094960d..4b45e634a 100644 --- a/src/languages/fsharp.js +++ b/src/languages/fsharp.js @@ -1,4 +1,3 @@ -import { insertBefore } from '../util/language-util.js'; import clike from './clike.js'; /** @type {import('../types.d.ts').LanguageProto<'fsharp'>} */ @@ -6,54 +5,7 @@ export default { id: 'fsharp', base: clike, optional: 'xml-doc', - grammar ({ base, getOptionalLanguage }) { - insertBefore(base, 'keyword', { - 'preprocessor': { - pattern: /(^[\t ]*)#.*/m, - lookbehind: true, - alias: 'property', - inside: { - 'directive': { - pattern: /(^#)\b(?:else|endif|if|light|line|nowarn)\b/, - lookbehind: true, - alias: 'keyword', - }, - }, - }, - }); - insertBefore(base, 'punctuation', { - 'computation-expression': { - pattern: /\b[_a-z]\w*(?=\s*\{)/i, - alias: 'keyword', - }, - }); - insertBefore(base, 'string', { - 'annotation': { - pattern: /\[<.+?>\]/, - greedy: true, - inside: { - 'punctuation': /^\[<|>\]$/, - 'class-name': { - pattern: /^\w+$|(^|;\s*)[A-Z]\w*(?=\()/, - lookbehind: true, - }, - 'annotation-content': { - pattern: /[\s\S]+/, - inside: 'fsharp', - }, - }, - }, - 'char': { - pattern: - /'(?:[^\\']|\\(?:.|\d{3}|x[a-fA-F\d]{2}|u[a-fA-F\d]{4}|U[a-fA-F\d]{8}))'B?/, - greedy: true, - }, - }); - - insertBefore(base, 'comment', { - 'doc-comment': getOptionalLanguage('xml-doc')?.slash, - }); - + grammar ({ getOptionalLanguage }) { return { 'comment': [ { @@ -90,6 +42,55 @@ export default { ], 'operator': /([<>~&^])\1\1|([*.:<>&])\2|<-|->|[!=:]=|?|\??(?:<=|>=|<>|[-+*/%=<>])\??|[!?^&]|~[+~-]|:>|:\?>?/, + $insert: { + 'preprocessor': { + $before: 'keyword', + pattern: /(^[\t ]*)#.*/m, + lookbehind: true, + alias: 'property', + inside: { + 'directive': { + pattern: /(^#)\b(?:else|endif|if|light|line|nowarn)\b/, + lookbehind: true, + alias: 'keyword', + }, + }, + }, + 'computation-expression': { + $before: 'punctuation', + pattern: /\b[_a-z]\w*(?=\s*\{)/i, + alias: 'keyword', + }, + 'annotation': { + $before: 'string', + pattern: /\[<.+?>\]/, + greedy: true, + inside: { + 'punctuation': /^\[<|>\]$/, + 'class-name': { + pattern: /^\w+$|(^|;\s*)[A-Z]\w*(?=\()/, + lookbehind: true, + }, + 'annotation-content': { + pattern: /[\s\S]+/, + inside: 'fsharp', + }, + }, + }, + 'char': { + $before: 'string', + pattern: + /'(?:[^\\']|\\(?:.|\d{3}|x[a-fA-F\d]{2}|u[a-fA-F\d]{4}|U[a-fA-F\d]{8}))'B?/, + greedy: true, + }, + }, + $insertBefore: { + 'comment': { + 'doc-comment': /** @type {import('../types.d.ts').GrammarTokens} */ ( + getOptionalLanguage('xml-doc') + )?.slash, + }, + }, }; }, }; diff --git a/src/languages/haml.js b/src/languages/haml.js index 1495ffe33..63b375fcf 100644 --- a/src/languages/haml.js +++ b/src/languages/haml.js @@ -1,4 +1,3 @@ -import { insertBefore } from '../util/language-util.js'; import ruby from './ruby.js'; /** @type {import('../types.d.ts').LanguageProto<'haml'>} */ @@ -13,7 +12,43 @@ export default { code | */ - const haml = { + const filter_pattern = + '((?:^|\\r?\\n|\\r)([\\t ]*)):{{filter_name}}(?:(?:\\r?\\n|\\r)(?:\\2[\\t ].+|\\s*?(?=\\r?\\n|\\r)))+'; + + // Non exhaustive list of available filters and associated languages + const filters = [ + 'css', + { filter: 'coffee', language: 'coffeescript' }, + 'erb', + 'javascript', + 'less', + 'markdown', + 'ruby', + 'scss', + 'textile', + ]; + + const all_filters = /** @type {import('../types.d.ts').GrammarTokens} */ ({}); + for (const f of filters) { + const { filter, language } = typeof f === 'string' ? { filter: f, language: f } : f; + all_filters['filter-' + filter] = { + pattern: RegExp(filter_pattern.replace('{{filter_name}}', () => filter)), + lookbehind: true, + inside: { + 'filter-name': { + pattern: /^:[\w-]+/, + alias: 'symbol', + }, + 'text': { + pattern: /[\s\S]+/, + alias: [language, 'language-' + language], + inside: language, + }, + }, + }; + } + + return { // Multiline stuff should appear before the rest 'multiline-comment': { @@ -115,47 +150,9 @@ export default { pattern: /((?:^|\r?\n|\r)[\t ]*)[~=\-&!]+/, lookbehind: true, }, + $insertBefore: { + 'filter': all_filters, + }, }; - - const filter_pattern = - '((?:^|\\r?\\n|\\r)([\\t ]*)):{{filter_name}}(?:(?:\\r?\\n|\\r)(?:\\2[\\t ].+|\\s*?(?=\\r?\\n|\\r)))+'; - - // Non exhaustive list of available filters and associated languages - const filters = [ - 'css', - { filter: 'coffee', language: 'coffeescript' }, - 'erb', - 'javascript', - 'less', - 'markdown', - 'ruby', - 'scss', - 'textile', - ]; - - /** @type {import('../types.d.ts').Grammar} */ - const all_filters = {}; - for (const f of filters) { - const { filter, language } = typeof f === 'string' ? { filter: f, language: f } : f; - all_filters['filter-' + filter] = { - pattern: RegExp(filter_pattern.replace('{{filter_name}}', () => filter)), - lookbehind: true, - inside: { - 'filter-name': { - pattern: /^:[\w-]+/, - alias: 'symbol', - }, - 'text': { - pattern: /[\s\S]+/, - alias: [language, 'language-' + language], - inside: language, - }, - }, - }; - } - - insertBefore(haml, 'filter', all_filters); - - return haml; }, }; diff --git a/src/languages/hlsl.js b/src/languages/hlsl.js index 16b90ba47..9396a7d6e 100644 --- a/src/languages/hlsl.js +++ b/src/languages/hlsl.js @@ -12,7 +12,9 @@ export default { // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-appendix-keywords // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-appendix-reserved-words 'class-name': [ - ...toArray(base['class-name']), + ...toArray( + /** @type {import('../types.d.ts').GrammarTokens} */ (base)['class-name'] + ), /\b(?:AppendStructuredBuffer|BlendState|Buffer|ByteAddressBuffer|CompileShader|ComputeShader|ConsumeStructuredBuffer|DepthStencilState|DepthStencilView|DomainShader|GeometryShader|Hullshader|InputPatch|LineStream|OutputPatch|PixelShader|PointStream|RWBuffer|RWByteAddressBuffer|RWStructuredBuffer|RWTexture(?:1D|1DArray|2D|2DArray|3D)|RasterizerState|RenderTargetView|SamplerComparisonState|SamplerState|StructuredBuffer|Texture(?:1D|1DArray|2D|2DArray|2DMS|2DMSArray|3D|Cube|CubeArray)|TriangleStream|VertexShader)\b/, ], 'keyword': [ diff --git a/src/languages/http.js b/src/languages/http.js index cadd2f965..97d636ded 100644 --- a/src/languages/http.js +++ b/src/languages/http.js @@ -1,5 +1,3 @@ -import { insertBefore } from '../util/language-util.js'; - /** @type {import('../types.d.ts').LanguageProto<'http'>} */ export default { id: 'http', @@ -13,7 +11,64 @@ export default { return RegExp('(^(?:' + name + '):[ \t]*(?![ \t]))[^]+', 'i'); } - const http = { + // Create a mapping of Content-Type headers to language definitions + const httpLanguages = { + 'application/javascript': 'javascript', + 'application/json': getOptionalLanguage('json') || 'javascript', + 'application/xml': 'xml', + 'text/xml': 'xml', + 'text/html': 'html', + 'text/css': 'css', + 'text/plain': 'plain', + }; + + // Declare which types can also be suffixes + const suffixTypes = { + 'application/json': true, + 'application/xml': true, + }; + + /** + * Returns a pattern for the given content type which matches it and any type which has it as a suffix. + * + * @param {string} contentType + * @returns {string} + */ + function getSuffixPattern (contentType) { + const suffix = contentType.replace(/^[a-z]+\//, ''); + const suffixPattern = '\\w+/(?:[\\w.-]+\\+)+' + suffix + '(?![+\\w.-])'; + return '(?:' + contentType + '|' + suffixPattern + ')'; + } + + // Insert each content type parser that has its associated language + // currently loaded. + + const options = /** @type {import('../types.d.ts').GrammarTokens} */ ({}); + for (const key in httpLanguages) { + const contentType = key; + + const pattern = suffixTypes[contentType] ? getSuffixPattern(contentType) : contentType; + options[contentType.replace(/\//g, '-')] = { + pattern: RegExp( + '(' + + /content-type:\s*/.source + + pattern + + /(?:(?:\r\n?|\n)[\w-].*)*(?:\r(?:\n|(?!\n))|\n)/.source + + ')' + + // This is a little interesting: + // The HTTP format spec required 1 empty line before the body to make everything unambiguous. + // However, when writing code by hand (e.g. to display on a website) people can forget about this, + // so we want to be liberal here. We will allow the empty line to be omitted if the first line of + // the body does not start with a [\w-] character (as headers do). + /[^ \t\w-][\s\S]*/.source, + 'i' + ), + lookbehind: true, + inside: httpLanguages[contentType], + }; + } + + return { 'request-line': { pattern: /^(?:CONNECT|DELETE|GET|HEAD|OPTIONS|PATCH|POST|PRI|PUT|SEARCH|TRACE)\s(?:https?:\/\/|\/)\S*\sHTTP\/[\d.]+/m, @@ -94,66 +149,9 @@ export default { 'punctuation': /^:/, }, }, + $insertBefore: { + 'header': options, + }, }; - - // Create a mapping of Content-Type headers to language definitions - const httpLanguages = { - 'application/javascript': 'javascript', - 'application/json': getOptionalLanguage('json') || 'javascript', - 'application/xml': 'xml', - 'text/xml': 'xml', - 'text/html': 'html', - 'text/css': 'css', - 'text/plain': 'plain', - }; - - // Declare which types can also be suffixes - const suffixTypes = { - 'application/json': true, - 'application/xml': true, - }; - - /** - * Returns a pattern for the given content type which matches it and any type which has it as a suffix. - * - * @param {string} contentType - * @returns {string} - */ - function getSuffixPattern (contentType) { - const suffix = contentType.replace(/^[a-z]+\//, ''); - const suffixPattern = '\\w+/(?:[\\w.-]+\\+)+' + suffix + '(?![+\\w.-])'; - return '(?:' + contentType + '|' + suffixPattern + ')'; - } - - // Insert each content type parser that has its associated language - // currently loaded. - /** @type {import('../types.d.ts').Grammar} */ - const options = {}; - for (const key in httpLanguages) { - const contentType = key; - - const pattern = suffixTypes[contentType] ? getSuffixPattern(contentType) : contentType; - options[contentType.replace(/\//g, '-')] = { - pattern: RegExp( - '(' + - /content-type:\s*/.source + - pattern + - /(?:(?:\r\n?|\n)[\w-].*)*(?:\r(?:\n|(?!\n))|\n)/.source + - ')' + - // This is a little interesting: - // The HTTP format spec required 1 empty line before the body to make everything unambiguous. - // However, when writing code by hand (e.g. to display on a website) people can forget about this, - // so we want to be liberal here. We will allow the empty line to be omitted if the first line of - // the body does not start with a [\w-] character (as headers do). - /[^ \t\w-][\s\S]*/.source, - 'i' - ), - lookbehind: true, - inside: httpLanguages[contentType], - }; - } - insertBefore(http, 'header', options); - - return http; }, }; diff --git a/src/languages/java.js b/src/languages/java.js index 197eddc50..7642db53d 100644 --- a/src/languages/java.js +++ b/src/languages/java.js @@ -1,5 +1,4 @@ import { toArray } from '../util/iterables.js'; -import { insertBefore } from '../util/language-util.js'; import clike from './clike.js'; /** @type {import('../types.d.ts').LanguageProto<'java'>} */ @@ -30,88 +29,6 @@ export default { }, }; - insertBefore(base, 'comment', { - 'doc-comment': { - pattern: /\/\*\*(?!\/)[\s\S]*?(?:\*\/|$)/, - greedy: true, - alias: 'comment', - inside: 'javadoc', - }, - }); - - insertBefore(base, 'string', { - 'triple-quoted-string': { - // http://openjdk.java.net/jeps/355#Description - pattern: /"""[ \t]*[\r\n](?:(?:"|"")?(?:\\.|[^"\\]))*"""/, - greedy: true, - alias: 'string', - }, - 'char': { - pattern: /'(?:\\.|[^'\\\r\n]){1,6}'/, - greedy: true, - }, - }); - - insertBefore(base, 'class-name', { - 'annotation': { - pattern: /(^|[^.])@\w+(?:\s*\.\s*\w+)*/, - lookbehind: true, - alias: 'punctuation', - }, - 'generics': { - pattern: - /<(?:[\w\s,.?]|&(?!&)|<(?:[\w\s,.?]|&(?!&)|<(?:[\w\s,.?]|&(?!&)|<(?:[\w\s,.?]|&(?!&))*>)*>)*>)*>/, - inside: { - 'class-name': className, - 'keyword': keywords, - 'punctuation': /[<>(),.:]/, - 'operator': /[?&|]/, - }, - }, - 'import': [ - { - pattern: RegExp( - /(\bimport\s+)/.source + classNamePrefix + /(?:[A-Z]\w*|\*)(?=\s*;)/.source - ), - lookbehind: true, - inside: { - 'namespace': className.inside.namespace, - 'punctuation': /\./, - 'operator': /\*/, - 'class-name': /\w+/, - }, - }, - { - pattern: RegExp( - /(\bimport\s+static\s+)/.source + - classNamePrefix + - /(?:\w+|\*)(?=\s*;)/.source - ), - lookbehind: true, - alias: 'static', - inside: { - 'namespace': className.inside.namespace, - 'static': /\b\w+$/, - 'punctuation': /\./, - 'operator': /\*/, - 'class-name': /\w+/, - }, - }, - ], - 'namespace': { - pattern: RegExp( - /(\b(?:exports|import(?:\s+static)?|module|open|opens|package|provides|requires|to|transitive|uses|with)\s+)(?!)[a-z]\w*(?:\.[a-z]\w*)*\.?/.source.replace( - //g, - () => keywords.source - ) - ), - lookbehind: true, - inside: { - 'punctuation': /\./, - }, - }, - }); - return { 'string': { pattern: /(^|[^\\])"(?:\\.|[^"\\\r\n])*"/, @@ -146,7 +63,7 @@ export default { ], 'keyword': keywords, 'function': [ - ...toArray(base.function), + ...toArray(/** @type {import('../types.d.ts').GrammarTokens} */ (base).function), { pattern: /(::\s*)[a-z_]\w*/, lookbehind: true, @@ -159,6 +76,89 @@ export default { lookbehind: true, }, 'constant': /\b[A-Z][A-Z_\d]+\b/, + $insertBefore: { + 'comment': { + 'doc-comment': { + pattern: /\/\*\*(?!\/)[\s\S]*?(?:\*\/|$)/, + greedy: true, + alias: 'comment', + inside: 'javadoc', + }, + }, + 'string': { + 'triple-quoted-string': { + // http://openjdk.java.net/jeps/355#Description + pattern: /"""[ \t]*[\r\n](?:(?:"|"")?(?:\\.|[^"\\]))*"""/, + greedy: true, + alias: 'string', + }, + 'char': { + pattern: /'(?:\\.|[^'\\\r\n]){1,6}'/, + greedy: true, + }, + }, + 'class-name': { + 'annotation': { + pattern: /(^|[^.])@\w+(?:\s*\.\s*\w+)*/, + lookbehind: true, + alias: 'punctuation', + }, + 'generics': { + pattern: + /<(?:[\w\s,.?]|&(?!&)|<(?:[\w\s,.?]|&(?!&)|<(?:[\w\s,.?]|&(?!&)|<(?:[\w\s,.?]|&(?!&))*>)*>)*>)*>/, + inside: { + 'class-name': className, + 'keyword': keywords, + 'punctuation': /[<>(),.:]/, + 'operator': /[?&|]/, + }, + }, + 'import': [ + { + pattern: RegExp( + /(\bimport\s+)/.source + + classNamePrefix + + /(?:[A-Z]\w*|\*)(?=\s*;)/.source + ), + lookbehind: true, + inside: { + 'namespace': className.inside.namespace, + 'punctuation': /\./, + 'operator': /\*/, + 'class-name': /\w+/, + }, + }, + { + pattern: RegExp( + /(\bimport\s+static\s+)/.source + + classNamePrefix + + /(?:\w+|\*)(?=\s*;)/.source + ), + lookbehind: true, + alias: 'static', + inside: { + 'namespace': className.inside.namespace, + 'static': /\b\w+$/, + 'punctuation': /\./, + 'operator': /\*/, + 'class-name': /\w+/, + }, + }, + ], + 'namespace': { + pattern: RegExp( + /(\b(?:exports|import(?:\s+static)?|module|open|opens|package|provides|requires|to|transitive|uses|with)\s+)(?!)[a-z]\w*(?:\.[a-z]\w*)*\.?/.source.replace( + //g, + () => keywords.source + ) + ), + lookbehind: true, + inside: { + 'punctuation': /\./, + }, + }, + }, + }, }; }, }; diff --git a/src/languages/javascript.js b/src/languages/javascript.js index 645c9692b..737edae8b 100644 --- a/src/languages/javascript.js +++ b/src/languages/javascript.js @@ -1,6 +1,5 @@ import { JS_TEMPLATE, JS_TEMPLATE_INTERPOLATION } from '../shared/languages/patterns.js'; import { toArray } from '../util/iterables.js'; -import { insertBefore } from '../util/language-util.js'; import clike from './clike.js'; /** @type {import('../types.d.ts').LanguageProto<'javascript'>} */ @@ -9,135 +8,10 @@ export default { base: clike, optional: 'js-templates', alias: 'js', - grammar ({ base, getOptionalLanguage }) { - insertBefore(base, 'comment', { - 'doc-comment': { - pattern: /\/\*\*(?!\/)[\s\S]*?(?:\*\/|$)/, - greedy: true, - inside: 'jsdoc', - }, - }); - - insertBefore(base, 'keyword', { - 'regex': { - pattern: RegExp( - // lookbehind - // eslint-disable-next-line regexp/no-dupe-characters-character-class - /((?:^|[^$\w\xA0-\uFFFF."'\])\s]|\b(?:return|yield))\s*)/.source + - // Regex pattern: - // There are 2 regex patterns here. The RegExp set notation proposal added support for nested character - // classes if the `v` flag is present. Unfortunately, nested CCs are both context-free and incompatible - // with the only syntax, so we have to define 2 different regex patterns. - /\//.source + - '(?:' + - /(?:\[(?:[^\]\\\r\n]|\\.)*\]|\\.|[^/\\\[\r\n])+\/[dgimyus]{0,7}/.source + - '|' + - // `v` flag syntax. This supports 3 levels of nested character classes. - /(?:\[(?:[^[\]\\\r\n]|\\.|\[(?:[^[\]\\\r\n]|\\.|\[(?:[^[\]\\\r\n]|\\.)*\])*\])*\]|\\.|[^/\\\[\r\n])+\/[dgimyus]{0,7}v[dgimyus]{0,7}/ - .source + - ')' + - // lookahead - /(?=(?:\s|\/\*(?:[^*]|\*(?!\/))*\*\/)*(?:$|[\r\n,.;:})\]]|\/\/))/.source - ), - lookbehind: true, - greedy: true, - inside: { - 'regex-source': { - pattern: /^(\/)[\s\S]+(?=\/[a-z]*$)/, - lookbehind: true, - alias: 'language-regex', - inside: 'regex', - }, - 'regex-delimiter': /^\/|\/$/, - 'regex-flags': /^[a-z]+$/, - }, - }, - // This must be declared before keyword because we use "function" inside the look-forward - 'function-variable': { - pattern: - /#?(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*(?=\s*[=:]\s*(?:async\s*)?(?:\bfunction\b|(?:\((?:[^()]|\([^()]*\))*\)|(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*)\s*=>))/, - alias: 'function', - }, - 'parameter': [ - { - pattern: - /(function(?:\s+(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*)?\s*\(\s*)(?!\s)(?:[^()\s]|\s+(?![\s)])|\([^()]*\))+(?=\s*\))/, - lookbehind: true, - inside: 'javascript', - }, - { - pattern: - /(^|[^$\w\xA0-\uFFFF])(?!\s)[_$a-z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*(?=\s*=>)/i, - lookbehind: true, - inside: 'javascript', - }, - { - pattern: /(\(\s*)(?!\s)(?:[^()\s]|\s+(?![\s)])|\([^()]*\))+(?=\s*\)\s*=>)/, - lookbehind: true, - inside: 'javascript', - }, - { - pattern: - /((?:\b|\s|^)(?!(?:as|async|await|break|case|catch|class|const|continue|debugger|default|delete|do|else|enum|export|extends|finally|for|from|function|get|if|implements|import|in|instanceof|interface|let|new|null|of|package|private|protected|public|return|set|static|super|switch|this|throw|try|typeof|undefined|var|void|while|with|yield)(?![$\w\xA0-\uFFFF]))(?:(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*\s*)\(\s*|\]\s*\(\s*)(?!\s)(?:[^()\s]|\s+(?![\s)])|\([^()]*\))+(?=\s*\)\s*\{)/, - lookbehind: true, - inside: 'javascript', - }, - ], - 'constant': /\b[A-Z](?:[A-Z_]|\dx?)*\b/, - }); - - const jsTemplates = getOptionalLanguage('js-templates')?.['template-string']; - - insertBefore(base, 'string', { - 'hashbang': { - pattern: /^#!.*/, - greedy: true, - alias: 'comment', - }, - 'template-string': [ - ...toArray(jsTemplates), - { - pattern: JS_TEMPLATE, - greedy: true, - inside: /** @type {Grammar} */ ({ - 'template-punctuation': { - pattern: /^`|`$/, - alias: 'string', - }, - 'interpolation': { - pattern: RegExp( - /((?:^|[^\\])(?:\\{2})*)/.source + JS_TEMPLATE_INTERPOLATION.source - ), - lookbehind: true, - inside: { - 'interpolation-punctuation': { - pattern: /^\$\{|\}$/, - alias: 'punctuation', - }, - $rest: /** @type {Grammar['$rest']} */ ('javascript'), - }, - }, - 'string': /[\s\S]+/, - }), - }, - ], - 'string-property': { - pattern: - /((?:^|[,{])[ \t]*)(["'])(?:\\(?:\r\n|[\s\S])|(?!\2)[^\\\r\n])*\2(?=\s*:)/m, - lookbehind: true, - greedy: true, - alias: 'property', - }, - }); - - insertBefore(base, 'operator', { - 'literal-property': { - pattern: - /((?:^|[,{])[ \t]*)(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*(?=\s*:)/m, - lookbehind: true, - alias: 'property', - }, - }); + grammar ({ getOptionalLanguage }) { + const jsTemplates = /** @type {GrammarTokens} */ (getOptionalLanguage('js-templates'))?.[ + 'template-string' + ]; return { 'class-name': [ @@ -210,10 +84,140 @@ export default { }, 'operator': /--|\+\+|\*\*=?|=>|&&=?|\|\|=?|[!=]==|<<=?|>>>?=?|[-+*/%&|^!=<>]=?|\.{3}|\?\?=?|\?\.?|[~:]/, + $insertBefore: { + 'comment': { + 'doc-comment': { + pattern: /\/\*\*(?!\/)[\s\S]*?(?:\*\/|$)/, + greedy: true, + inside: 'jsdoc', + }, + }, + 'keyword': { + 'regex': { + pattern: RegExp( + // lookbehind + // eslint-disable-next-line regexp/no-dupe-characters-character-class + /((?:^|[^$\w\xA0-\uFFFF."'\])\s]|\b(?:return|yield))\s*)/.source + + // Regex pattern: + // There are 2 regex patterns here. The RegExp set notation proposal added support for nested character + // classes if the `v` flag is present. Unfortunately, nested CCs are both context-free and incompatible + // with the only syntax, so we have to define 2 different regex patterns. + /\//.source + + '(?:' + + /(?:\[(?:[^\]\\\r\n]|\\.)*\]|\\.|[^/\\\[\r\n])+\/[dgimyus]{0,7}/ + .source + + '|' + + // `v` flag syntax. This supports 3 levels of nested character classes. + /(?:\[(?:[^[\]\\\r\n]|\\.|\[(?:[^[\]\\\r\n]|\\.|\[(?:[^[\]\\\r\n]|\\.)*\])*\])*\]|\\.|[^/\\\[\r\n])+\/[dgimyus]{0,7}v[dgimyus]{0,7}/ + .source + + ')' + + // lookahead + /(?=(?:\s|\/\*(?:[^*]|\*(?!\/))*\*\/)*(?:$|[\r\n,.;:})\]]|\/\/))/ + .source + ), + lookbehind: true, + greedy: true, + inside: { + 'regex-source': { + pattern: /^(\/)[\s\S]+(?=\/[a-z]*$)/, + lookbehind: true, + alias: 'language-regex', + inside: 'regex', + }, + 'regex-delimiter': /^\/|\/$/, + 'regex-flags': /^[a-z]+$/, + }, + }, + // This must be declared before keyword because we use "function" inside the look-forward + 'function-variable': { + pattern: + /#?(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*(?=\s*[=:]\s*(?:async\s*)?(?:\bfunction\b|(?:\((?:[^()]|\([^()]*\))*\)|(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*)\s*=>))/, + alias: 'function', + }, + 'parameter': [ + { + pattern: + /(function(?:\s+(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*)?\s*\(\s*)(?!\s)(?:[^()\s]|\s+(?![\s)])|\([^()]*\))+(?=\s*\))/, + lookbehind: true, + inside: 'javascript', + }, + { + pattern: + /(^|[^$\w\xA0-\uFFFF])(?!\s)[_$a-z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*(?=\s*=>)/i, + lookbehind: true, + inside: 'javascript', + }, + { + pattern: + /(\(\s*)(?!\s)(?:[^()\s]|\s+(?![\s)])|\([^()]*\))+(?=\s*\)\s*=>)/, + lookbehind: true, + inside: 'javascript', + }, + { + pattern: + /((?:\b|\s|^)(?!(?:as|async|await|break|case|catch|class|const|continue|debugger|default|delete|do|else|enum|export|extends|finally|for|from|function|get|if|implements|import|in|instanceof|interface|let|new|null|of|package|private|protected|public|return|set|static|super|switch|this|throw|try|typeof|undefined|var|void|while|with|yield)(?![$\w\xA0-\uFFFF]))(?:(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*\s*)\(\s*|\]\s*\(\s*)(?!\s)(?:[^()\s]|\s+(?![\s)])|\([^()]*\))+(?=\s*\)\s*\{)/, + lookbehind: true, + inside: 'javascript', + }, + ], + 'constant': /\b[A-Z](?:[A-Z_]|\dx?)*\b/, + }, + 'string': { + 'hashbang': { + pattern: /^#!.*/, + greedy: true, + alias: 'comment', + }, + 'template-string': [ + ...toArray(jsTemplates), + { + pattern: JS_TEMPLATE, + greedy: true, + inside: /** @type {Grammar} */ ({ + 'template-punctuation': { + pattern: /^`|`$/, + alias: 'string', + }, + 'interpolation': { + pattern: RegExp( + /((?:^|[^\\])(?:\\{2})*)/.source + + JS_TEMPLATE_INTERPOLATION.source + ), + lookbehind: true, + inside: { + 'interpolation-punctuation': { + pattern: /^\$\{|\}$/, + alias: 'punctuation', + }, + $rest: /** @type {Grammar['$rest']} */ ('javascript'), + }, + }, + 'string': /[\s\S]+/, + }), + }, + ], + 'string-property': { + pattern: + /((?:^|[,{])[ \t]*)(["'])(?:\\(?:\r\n|[\s\S])|(?!\2)[^\\\r\n])*\2(?=\s*:)/m, + lookbehind: true, + greedy: true, + alias: 'property', + }, + }, + 'operator': { + 'literal-property': { + pattern: + /((?:^|[,{])[ \t]*)(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*(?=\s*:)/m, + lookbehind: true, + alias: 'property', + }, + }, + }, }; }, }; /** * @typedef {import('../types.d.ts').Grammar} Grammar + * @typedef {import('../types.d.ts').GrammarTokens} GrammarTokens */ diff --git a/src/languages/jsdoc.js b/src/languages/jsdoc.js index 5c302c61f..e251128a5 100644 --- a/src/languages/jsdoc.js +++ b/src/languages/jsdoc.js @@ -1,4 +1,3 @@ -import { insertBefore } from '../util/language-util.js'; import javadoclike from './javadoclike.js'; import javascript from './javascript.js'; import typescript from './typescript.js'; @@ -8,85 +7,88 @@ export default { id: 'jsdoc', base: javadoclike, require: [javascript, typescript], - grammar ({ base, languages }) { + grammar ({ languages }) { const { javascript, typescript } = languages; const type = /\{(?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})+\}/.source; const parameterPrefix = '(@(?:arg|argument|param|property)\\s+(?:' + type + '\\s+)?)'; - insertBefore(base, 'keyword', { - 'optional-parameter': { - // @param {string} [baz.foo="bar"] foo bar - pattern: RegExp( - parameterPrefix + /\[(?:(?!\s)[$\w\xA0-\uFFFF.])+(?:=[^[\]]+)?\](?=\s|$)/.source - ), + return { + 'parameter': { + // @param {string} foo - foo bar + pattern: RegExp(parameterPrefix + /(?:(?!\s)[$\w\xA0-\uFFFF.])+(?=\s|$)/.source), lookbehind: true, inside: { - 'parameter': { - pattern: /(^\[)[$\w\xA0-\uFFFF\.]+/, + 'punctuation': /\./, + }, + }, + $insertBefore: { + 'keyword': { + 'optional-parameter': { + // @param {string} [baz.foo="bar"] foo bar + pattern: RegExp( + parameterPrefix + + /\[(?:(?!\s)[$\w\xA0-\uFFFF.])+(?:=[^[\]]+)?\](?=\s|$)/.source + ), lookbehind: true, inside: { - 'punctuation': /\./, + 'parameter': { + pattern: /(^\[)[$\w\xA0-\uFFFF\.]+/, + lookbehind: true, + inside: { + 'punctuation': /\./, + }, + }, + 'code': { + pattern: /(=)[\s\S]*(?=\]$)/, + lookbehind: true, + alias: 'language-javascript', + inside: 'javascript', + }, + 'punctuation': /[=[\]]/, }, }, - 'code': { - pattern: /(=)[\s\S]*(?=\]$)/, - lookbehind: true, - alias: 'language-javascript', - inside: 'javascript', - }, - 'punctuation': /[=[\]]/, - }, - }, - 'class-name': [ - { - pattern: RegExp( - /(@(?:augments|class|extends|interface|memberof!?|template|this|typedef)\s+(?:\s+)?)[A-Z]\w*(?:\.[A-Z]\w*)*/.source.replace( - //g, - () => type - ) - ), - lookbehind: true, - inside: { - 'punctuation': /\./, - }, - }, - { - pattern: RegExp('(@[a-z]+\\s+)' + type), - lookbehind: true, - inside: { - 'string': javascript.string, - 'number': javascript.number, - 'boolean': javascript.boolean, - 'keyword': typescript.keyword, - 'operator': /=>|\.\.\.|[&|?:*]/, - 'punctuation': /[.,;=<>{}()[\]]/, - }, - }, - ], - 'example': { - pattern: /(@example\s+(?!\s))(?:[^@\s]|\s+(?!\s))+?(?=\s*(?:\*\s*)?(?:@\w|\*\/))/, - lookbehind: true, - inside: { - 'code': { - pattern: /^([\t ]*(?:\*\s*)?)\S.*$/m, + 'class-name': [ + { + pattern: RegExp( + /(@(?:augments|class|extends|interface|memberof!?|template|this|typedef)\s+(?:\s+)?)[A-Z]\w*(?:\.[A-Z]\w*)*/.source.replace( + //g, + () => type + ) + ), + lookbehind: true, + inside: { + 'punctuation': /\./, + }, + }, + { + pattern: RegExp('(@[a-z]+\\s+)' + type), + lookbehind: true, + inside: { + 'string': javascript.string, + 'number': javascript.number, + 'boolean': javascript.boolean, + 'keyword': typescript.keyword, + 'operator': /=>|\.\.\.|[&|?:*]/, + 'punctuation': /[.,;=<>{}()[\]]/, + }, + }, + ], + 'example': { + pattern: + /(@example\s+(?!\s))(?:[^@\s]|\s+(?!\s))+?(?=\s*(?:\*\s*)?(?:@\w|\*\/))/, lookbehind: true, - alias: 'language-javascript', - inside: 'javascript', + inside: { + 'code': { + pattern: /^([\t ]*(?:\*\s*)?)\S.*$/m, + lookbehind: true, + alias: 'language-javascript', + inside: 'javascript', + }, + }, }, }, }, - }); - - return { - 'parameter': { - // @param {string} foo - foo bar - pattern: RegExp(parameterPrefix + /(?:(?!\s)[$\w\xA0-\uFFFF.])+(?=\s|$)/.source), - lookbehind: true, - inside: { - 'punctuation': /\./, - }, - }, }; }, }; diff --git a/src/languages/php.js b/src/languages/php.js index daadd32f6..736d85dde 100644 --- a/src/languages/php.js +++ b/src/languages/php.js @@ -352,7 +352,7 @@ export default { const extras = getOptionalLanguage('php-extras'); if (extras) { - insertBefore(php, 'variable', extras); + insertBefore(php, 'variable', /** @type {GrammarTokens} */ (extras)); } const embedded = embeddedIn('markup'); @@ -376,4 +376,5 @@ export default { /** * @typedef {import('../types.d.ts').Grammar} Grammar + * @typedef {import('../types.d.ts').GrammarTokens} GrammarTokens */ diff --git a/src/languages/ruby.js b/src/languages/ruby.js index 1a4986ce8..0ca0266b4 100644 --- a/src/languages/ruby.js +++ b/src/languages/ruby.js @@ -1,4 +1,3 @@ -import { insertBefore } from '../util/language-util.js'; import clike from './clike.js'; /** @type {import('../types.d.ts').LanguageProto<'ruby'>} */ @@ -6,19 +5,13 @@ export default { id: 'ruby', base: clike, alias: 'rb', - grammar ({ base }) { + grammar () { /** * Original by Samuel Flores * * Adds the following new token classes: * constant, builtin, variable, symbol, regex */ - insertBefore(base, 'operator', { - 'double-colon': { - pattern: /::/, - alias: 'punctuation', - }, - }); const interpolation = { pattern: /((?:^|[^\\])(?:\\{2})*)#\{(?:[^{}]|\{[^{}]*\})*\}/, @@ -36,8 +29,6 @@ export default { }, }; - delete base.function; - const percentExpression = '(?:' + [ @@ -52,153 +43,162 @@ export default { const symbolName = /(?:"(?:\\.|[^"\\\r\n])*"|(?:\b[a-zA-Z_]\w*|[^\s\0-\x7F]+)[?!]?|\$.)/ .source; - insertBefore(base, 'keyword', { - 'regex-literal': [ - { - pattern: RegExp(/%r/.source + percentExpression + /[egimnosux]{0,6}/.source), - greedy: true, - inside: { - 'interpolation': interpolation, - 'regex': /[\s\S]+/, - }, - }, - { - pattern: - /(^|[^/])\/(?!\/)(?:\[[^\r\n\]]+\]|\\.|[^[/\\\r\n])+\/[egimnosux]{0,6}(?=\s*(?:$|[\r\n,.;})#]))/, - lookbehind: true, - greedy: true, - inside: { - 'interpolation': interpolation, - 'regex': /[\s\S]+/, - }, - }, - ], - 'variable': /[@$]+[a-zA-Z_]\w*(?:[?!]|\b)/, - 'symbol': [ - { - pattern: RegExp(/(^|[^:]):/.source + symbolName), - lookbehind: true, - greedy: true, - }, - { - pattern: RegExp(/([\r\n{(,][ \t]*)/.source + symbolName + /(?=:(?!:))/.source), - lookbehind: true, - greedy: true, - }, - ], - 'method-definition': { - pattern: /(\bdef\s+)\w+(?:\s*\.\s*\w+)?/, + return { + 'comment': { + pattern: /#.*|^=begin\s[\s\S]*?^=end/m, + greedy: true, + }, + 'class-name': { + pattern: + /(\b(?:class|module)\s+|\bcatch\s+\()[\w.\\]+|\b[A-Z_]\w*(?=\s*\.\s*new\b)/, lookbehind: true, inside: { - 'function': /\b\w+$/, - 'keyword': /^self\b/, - 'class-name': /^\w+/, - 'punctuation': /\./, + 'punctuation': /[.\\]/, }, }, - }); - - insertBefore(base, 'string', { - 'string-literal': [ - { - pattern: RegExp(/%[qQiIwWs]?/.source + percentExpression), - greedy: true, - inside: { - 'interpolation': interpolation, - 'string': /[\s\S]+/, - }, - }, - { - pattern: /("|')(?:#\{[^}]+\}|#(?!\{)|\\(?:\r\n|[\s\S])|(?!\1)[^\\#\r\n])*\1/, - greedy: true, - inside: { - 'interpolation': interpolation, - 'string': /[\s\S]+/, + 'keyword': + /\b(?:BEGIN|END|alias|and|begin|break|case|class|def|define_method|defined|do|each|else|elsif|end|ensure|extend|for|if|in|include|module|new|next|nil|not|or|prepend|private|protected|public|raise|redo|require|rescue|retry|return|self|super|then|throw|undef|unless|until|when|while|yield)\b/, + 'operator': /\.{2,3}|&\.|===||[!=]?~|(?:&&|\|\||<<|>>|\*\*|[+\-*/%<>!^&|=])=?|[?:]/, + 'punctuation': /[(){}[\].,;]/, + $insertBefore: { + 'operator': { + 'double-colon': { + pattern: /::/, + alias: 'punctuation', }, }, - { - pattern: /<<[-~]?([a-z_]\w*)[\r\n](?:.*[\r\n])*?[\t ]*\1/i, - alias: 'heredoc-string', - greedy: true, - inside: { - 'delimiter': { - pattern: /^<<[-~]?[a-z_]\w*|\b[a-z_]\w*$/i, + 'keyword': { + 'regex-literal': [ + { + pattern: RegExp( + /%r/.source + percentExpression + /[egimnosux]{0,6}/.source + ), + greedy: true, inside: { - 'symbol': /\b\w+/, - 'punctuation': /^<<[-~]?/, + 'interpolation': interpolation, + 'regex': /[\s\S]+/, }, }, - 'interpolation': interpolation, - 'string': /[\s\S]+/, - }, - }, - { - pattern: /<<[-~]?'([a-z_]\w*)'[\r\n](?:.*[\r\n])*?[\t ]*\1/i, - alias: 'heredoc-string', - greedy: true, - inside: { - 'delimiter': { - pattern: /^<<[-~]?'[a-z_]\w*'|\b[a-z_]\w*$/i, + { + pattern: + /(^|[^/])\/(?!\/)(?:\[[^\r\n\]]+\]|\\.|[^[/\\\r\n])+\/[egimnosux]{0,6}(?=\s*(?:$|[\r\n,.;})#]))/, + lookbehind: true, + greedy: true, inside: { - 'symbol': /\b\w+/, - 'punctuation': /^<<[-~]?'|'$/, + 'interpolation': interpolation, + 'regex': /[\s\S]+/, }, }, - 'string': /[\s\S]+/, - }, - }, - ], - 'command-literal': [ - { - pattern: RegExp(/%x/.source + percentExpression), - greedy: true, - inside: { - 'interpolation': interpolation, - 'command': { - pattern: /[\s\S]+/, - alias: 'string', + ], + 'variable': /[@$]+[a-zA-Z_]\w*(?:[?!]|\b)/, + 'symbol': [ + { + pattern: RegExp(/(^|[^:]):/.source + symbolName), + lookbehind: true, + greedy: true, + }, + { + pattern: RegExp( + /([\r\n{(,][ \t]*)/.source + symbolName + /(?=:(?!:))/.source + ), + lookbehind: true, + greedy: true, + }, + ], + 'method-definition': { + pattern: /(\bdef\s+)\w+(?:\s*\.\s*\w+)?/, + lookbehind: true, + inside: { + 'function': /\b\w+$/, + 'keyword': /^self\b/, + 'class-name': /^\w+/, + 'punctuation': /\./, }, }, }, - { - pattern: /`(?:#\{[^}]+\}|#(?!\{)|\\(?:\r\n|[\s\S])|[^\\`#\r\n])*`/, - greedy: true, - inside: { - 'interpolation': interpolation, - 'command': { - pattern: /[\s\S]+/, - alias: 'string', + 'string': { + 'string-literal': [ + { + pattern: RegExp(/%[qQiIwWs]?/.source + percentExpression), + greedy: true, + inside: { + 'interpolation': interpolation, + 'string': /[\s\S]+/, + }, }, - }, + { + pattern: + /("|')(?:#\{[^}]+\}|#(?!\{)|\\(?:\r\n|[\s\S])|(?!\1)[^\\#\r\n])*\1/, + greedy: true, + inside: { + 'interpolation': interpolation, + 'string': /[\s\S]+/, + }, + }, + { + pattern: /<<[-~]?([a-z_]\w*)[\r\n](?:.*[\r\n])*?[\t ]*\1/i, + alias: 'heredoc-string', + greedy: true, + inside: { + 'delimiter': { + pattern: /^<<[-~]?[a-z_]\w*|\b[a-z_]\w*$/i, + inside: { + 'symbol': /\b\w+/, + 'punctuation': /^<<[-~]?/, + }, + }, + 'interpolation': interpolation, + 'string': /[\s\S]+/, + }, + }, + { + pattern: /<<[-~]?'([a-z_]\w*)'[\r\n](?:.*[\r\n])*?[\t ]*\1/i, + alias: 'heredoc-string', + greedy: true, + inside: { + 'delimiter': { + pattern: /^<<[-~]?'[a-z_]\w*'|\b[a-z_]\w*$/i, + inside: { + 'symbol': /\b\w+/, + 'punctuation': /^<<[-~]?'|'$/, + }, + }, + 'string': /[\s\S]+/, + }, + }, + ], + 'command-literal': [ + { + pattern: RegExp(/%x/.source + percentExpression), + greedy: true, + inside: { + 'interpolation': interpolation, + 'command': { + pattern: /[\s\S]+/, + alias: 'string', + }, + }, + }, + { + pattern: /`(?:#\{[^}]+\}|#(?!\{)|\\(?:\r\n|[\s\S])|[^\\`#\r\n])*`/, + greedy: true, + inside: { + 'interpolation': interpolation, + 'command': { + pattern: /[\s\S]+/, + alias: 'string', + }, + }, + }, + ], }, - ], - }); - - delete base.string; - - insertBefore(base, 'number', { - 'builtin': - /\b(?:Array|Bignum|Binding|Class|Continuation|Dir|Exception|FalseClass|File|Fixnum|Float|Hash|IO|Integer|MatchData|Method|Module|NilClass|Numeric|Object|Proc|Range|Regexp|Stat|String|Struct|Symbol|TMS|Thread|ThreadGroup|Time|TrueClass)\b/, - 'constant': /\b[A-Z][A-Z0-9_]*(?:[?!]|\b)/, - }); - - return { - 'comment': { - pattern: /#.*|^=begin\s[\s\S]*?^=end/m, - greedy: true, - }, - 'class-name': { - pattern: - /(\b(?:class|module)\s+|\bcatch\s+\()[\w.\\]+|\b[A-Z_]\w*(?=\s*\.\s*new\b)/, - lookbehind: true, - inside: { - 'punctuation': /[.\\]/, + 'number': { + 'builtin': + /\b(?:Array|Bignum|Binding|Class|Continuation|Dir|Exception|FalseClass|File|Fixnum|Float|Hash|IO|Integer|MatchData|Method|Module|NilClass|Numeric|Object|Proc|Range|Regexp|Stat|String|Struct|Symbol|TMS|Thread|ThreadGroup|Time|TrueClass)\b/, + 'constant': /\b[A-Z][A-Z0-9_]*(?:[?!]|\b)/, }, }, - 'keyword': - /\b(?:BEGIN|END|alias|and|begin|break|case|class|def|define_method|defined|do|each|else|elsif|end|ensure|extend|for|if|in|include|module|new|next|nil|not|or|prepend|private|protected|public|raise|redo|require|rescue|retry|return|self|super|then|throw|undef|unless|until|when|while|yield)\b/, - 'operator': /\.{2,3}|&\.|===||[!=]?~|(?:&&|\|\||<<|>>|\*\*|[+\-*/%<>!^&|=])=?|[?:]/, - 'punctuation': /[(){}[\].,;]/, + $delete: ['function', 'string'], }; }, }; diff --git a/src/languages/squirrel.js b/src/languages/squirrel.js index 4c2cb3ab6..6c19d0eec 100644 --- a/src/languages/squirrel.js +++ b/src/languages/squirrel.js @@ -1,5 +1,4 @@ import { toArray } from '../util/iterables.js'; -import { insertBefore } from '../util/language-util.js'; import clike from './clike.js'; /** @type {import('../types.d.ts').LanguageProto<'squirrel'>} */ @@ -7,28 +6,9 @@ export default { id: 'squirrel', base: clike, grammar ({ base }) { - insertBefore(base, 'string', { - 'char': { - pattern: /(^|[^\\"'])'(?:[^\\']|\\(?:[xuU][0-9a-fA-F]{0,8}|[\s\S]))'/, - lookbehind: true, - greedy: true, - }, - }); - - insertBefore(base, 'operator', { - 'attribute-punctuation': { - pattern: /<\/|\/>/, - alias: 'important', - }, - 'lambda': { - pattern: /@(?=\()/, - alias: 'operator', - }, - }); - return { 'comment': [ - ...toArray(base.comment), + ...toArray(/** @type {import('../types.d.ts').GrammarTokens} */ (base).comment), { pattern: /#.*/, greedy: true, @@ -53,6 +33,25 @@ export default { 'number': /\b(?:0x[0-9a-fA-F]+|\d+(?:\.(?:\d+|[eE][+-]?\d+))?)\b/, 'operator': /\+\+|--|<=>|<[-<]|>>>?|&&?|\|\|?|[-+*/%!=<>]=?|[~^]|::?/, 'punctuation': /[(){}\[\],;.]/, + $insertBefore: { + 'string': { + 'char': { + pattern: /(^|[^\\"'])'(?:[^\\']|\\(?:[xuU][0-9a-fA-F]{0,8}|[\s\S]))'/, + lookbehind: true, + greedy: true, + }, + }, + 'operator': { + 'attribute-punctuation': { + pattern: /<\/|\/>/, + alias: 'important', + }, + 'lambda': { + pattern: /@(?=\()/, + alias: 'operator', + }, + }, + }, }; }, }; diff --git a/src/languages/typescript.js b/src/languages/typescript.js index 69a7e721a..206709362 100644 --- a/src/languages/typescript.js +++ b/src/languages/typescript.js @@ -8,8 +8,7 @@ export default { require: javascript, alias: 'ts', grammar ({ extend }) { - /** @type {import('../types.d.ts').Grammar} */ - const typeInside = {}; + const typeInside = /** @type {import('../types.d.ts').Grammar} */ ({}); const typescript = extend('javascript', { 'class-name': { @@ -24,7 +23,7 @@ export default { }); typescript.keyword = [ - ...toArray(typescript.keyword), + ...toArray(/** @type {import('../types.d.ts').GrammarTokens} */ (typescript).keyword), // The keywords TypeScript adds to JavaScript /\b(?:abstract|declare|is|keyof|out|readonly|require|satisfies)\b/, diff --git a/src/languages/vbnet.js b/src/languages/vbnet.js index e617b6fdb..d554d3a4c 100644 --- a/src/languages/vbnet.js +++ b/src/languages/vbnet.js @@ -1,4 +1,3 @@ -import { insertBefore } from '../util/language-util.js'; import basic from './basic.js'; /** @type {import('../types.d.ts').LanguageProto<'vbnet'>} */ @@ -6,11 +5,7 @@ export default { id: 'vbnet', base: basic, optional: 'xml-doc', - grammar ({ base, getOptionalLanguage }) { - insertBefore(base, 'comment', { - 'doc-comment': getOptionalLanguage('xml-doc')?.tick, - }); - + grammar ({ getOptionalLanguage }) { return { 'comment': [ { @@ -33,6 +28,13 @@ export default { 'keyword': /(?:\b(?:ADDHANDLER|ADDRESSOF|ALIAS|AND|ANDALSO|AS|BEEP|BLOAD|BOOLEAN|BSAVE|BYREF|BYTE|BYVAL|CALL(?: ABSOLUTE)?|CASE|CATCH|CBOOL|CBYTE|CCHAR|CDATE|CDBL|CDEC|CHAIN|CHAR|CHDIR|CINT|CLASS|CLEAR|CLNG|CLOSE|CLS|COBJ|COM|COMMON|CONST|CONTINUE|CSBYTE|CSHORT|CSNG|CSTR|CTYPE|CUINT|CULNG|CUSHORT|DATA|DATE|DECIMAL|DECLARE|DEF(?: FN| SEG|DBL|INT|LNG|SNG|STR)|DEFAULT|DELEGATE|DIM|DIRECTCAST|DO|DOUBLE|ELSE|ELSEIF|END|ENUM|ENVIRON|ERASE|ERROR|EVENT|EXIT|FALSE|FIELD|FILES|FINALLY|FOR(?: EACH)?|FRIEND|FUNCTION|GET|GETTYPE|GETXMLNAMESPACE|GLOBAL|GOSUB|GOTO|HANDLES|IF|IMPLEMENTS|IMPORTS|IN|INHERITS|INPUT|INTEGER|INTERFACE|IOCTL|IS|ISNOT|KEY|KILL|LET|LIB|LIKE|LINE INPUT|LOCATE|LOCK|LONG|LOOP|LSET|ME|MKDIR|MOD|MODULE|MUSTINHERIT|MUSTOVERRIDE|MYBASE|MYCLASS|NAME|NAMESPACE|NARROWING|NEW|NEXT|NOT|NOTHING|NOTINHERITABLE|NOTOVERRIDABLE|OBJECT|OF|OFF|ON(?: COM| ERROR| KEY| TIMER)?|OPEN|OPERATOR|OPTION(?: BASE)?|OPTIONAL|OR|ORELSE|OUT|OVERLOADS|OVERRIDABLE|OVERRIDES|PARAMARRAY|PARTIAL|POKE|PRIVATE|PROPERTY|PROTECTED|PUBLIC|PUT|RAISEEVENT|READ|READONLY|REDIM|REM|REMOVEHANDLER|RESTORE|RESUME|RETURN|RMDIR|RSET|RUN|SBYTE|SELECT(?: CASE)?|SET|SHADOWS|SHARED|SHELL|SHORT|SINGLE|SLEEP|STATIC|STEP|STOP|STRING|STRUCTURE|SUB|SWAP|SYNCLOCK|SYSTEM|THEN|THROW|TIMER|TO|TROFF|TRON|TRUE|TRY|TRYCAST|TYPE|TYPEOF|UINTEGER|ULONG|UNLOCK|UNTIL|USHORT|USING|VIEW PRINT|WAIT|WEND|WHEN|WHILE|WIDENING|WITH|WITHEVENTS|WRITE|WRITEONLY|XOR)|\B(?:#CONST|#ELSE|#ELSEIF|#END|#IF))(?:\$|\b)/i, 'punctuation': /[,;:(){}]/, + $insertBefore: { + 'comment': { + 'doc-comment': /** @type {import('../types.d.ts').GrammarTokens} */ ( + getOptionalLanguage('xml-doc') + )?.tick, + }, + }, }; }, }; diff --git a/src/types.d.ts b/src/types.d.ts index 7958dd68a..b9f84c4b1 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -198,7 +198,33 @@ export type GrammarSpecial = { $tokenize?: (code: string, grammar: Grammar, Prism: Prism) => TokenStream; }; -export type Grammar = GrammarTokens & GrammarSpecial; +/** + * Tokens within $insert + */ +export type InsertableToken = (RegExpLike | GrammarToken | (RegExpLike | GrammarToken)[]) & { + $before?: TokenName | TokenName[]; + $after?: TokenName | TokenName[]; +}; + +/** + * A grammar that is defined as its delta from another grammar. + */ +export type GrammarPatch = { + $insert?: Partial>; + $insertBefore?: Partial>; + $insertAfter?: Partial>; + $delete?: TokenName[]; + $merge?: GrammarTokens; +}; + +export interface Grammar extends GrammarSpecial, GrammarPatch { + [token: string]: + | RegExpLike + | GrammarToken + | (RegExpLike | GrammarToken)[] + | GrammarSpecial[keyof GrammarSpecial] + | GrammarPatch[keyof GrammarPatch]; +} export interface PlainObject { [key: string]: unknown; diff --git a/src/util/extend.js b/src/util/extend.js index fb369f263..3412d8368 100644 --- a/src/util/extend.js +++ b/src/util/extend.js @@ -1,3 +1,5 @@ +import { betterAssign, deepClone } from './objects.js'; + /** * Creates a deep copy of the language with the given id and appends the given tokens. * @@ -13,9 +15,8 @@ * Therefore, it is encouraged to order overwriting tokens according to the positions of the overwritten tokens. * Furthermore, all non-overwriting tokens should be placed after the overwriting ones. * - * @param {Grammar} grammar The grammar of the language to extend. - * @param {string} id The id of the language to extend. - * @param {Grammar} reDef The new tokens to append. + * @param {Grammar} base The grammar of the language to extend. + * @param {Grammar} grammar The new tokens to append. * @returns {Grammar} The new language created. * @example * Prism.languages['css-with-colors'] = Prism.languages.extend('css', { @@ -26,120 +27,67 @@ * 'color': /\b(?:red|green|blue)\b/ * }); */ -export function extend (grammar, id, reDef) { - const lang = cloneGrammar(grammar, id); +export function extend (base, grammar) { + const lang = deepClone(base); + + for (const key in grammar) { + if (typeof key !== 'string' || key.startsWith('$')) { + // ignore special keys + continue; + } - for (const key in reDef) { - lang[key] = reDef[key]; + lang[key] = grammar[key]; } - return lang; -} + if (grammar.$insertBefore) { + lang.$insertBefore = betterAssign(lang.$insertBefore ?? {}, grammar.$insertBefore); + } -/** - * @param {Grammar} grammar - * @param {string} id - * @returns {Grammar} - */ -export function cloneGrammar (grammar, id) { - /** @type {Grammar} */ - const result = {}; + if (grammar.$insertAfter) { + lang.$insertAfter = betterAssign(lang.$insertAfter ?? {}, grammar.$insertAfter); + } - /** @type {Map} */ - const visited = new Map(); + if (grammar.$insert) { + // Syntactic sugar for $insertBefore/$insertAfter + for (const tokenName in grammar.$insert) { + const def = grammar.$insert[tokenName]; + const { $before, $after, ...token } = def; + const relToken = $before || $after; + const all = $before ? '$insertBefore' : '$insertAfter'; + lang[all] ??= {}; - /** - * @param {GrammarToken | RegExpLike} value - */ - function cloneToken (value) { - if (!value.pattern) { - return value; - } - else { - /** @type {GrammarToken} */ - const copy = { pattern: value.pattern }; - if (value.lookbehind) { - copy.lookbehind = value.lookbehind; + if (Array.isArray(relToken)) { + // Insert in multiple places + for (const t of relToken) { + lang[all][t][tokenName] = token; + } } - if (value.greedy) { - copy.greedy = value.greedy; + else if (relToken) { + (lang[all][relToken] ??= {})[tokenName] = token; } - if (value.alias) { - copy.alias = Array.isArray(value.alias) ? [...value.alias] : value.alias; + else { + lang[tokenName] = token; } - if (value.inside) { - copy.inside = cloneRef(value.inside); - } - return copy; } } - /** - * @param {GrammarTokens['string']} value - */ - function cloneTokens (value) { - if (!value) { - return undefined; - } - else if (Array.isArray(value)) { - return value.map(cloneToken); + if (grammar.$delete) { + if (lang.$delete) { + // base also had $delete + lang.$delete.push(...grammar.$delete); } else { - return cloneToken(value); + lang.$delete = [...grammar.$delete]; } } - /** - * @param {string | Grammar} ref - */ - function cloneRef (ref) { - if (ref === id) { - // self ref - return result; - } - else if (typeof ref === 'string') { - return ref; - } - else { - return clone(ref); - } + if (grammar.$merge) { + lang.$merge = betterAssign(lang.$merge ?? {}, grammar.$merge); } - /** - * @param {Grammar} value - */ - function clone (value) { - let mapped = visited.get(value); - if (mapped === undefined) { - mapped = value === grammar ? result : {}; - visited.set(value, mapped); - - // tokens - for (const [key, tokens] of Object.entries(value)) { - mapped[key] = cloneTokens(/** @type {GrammarToken[]} */ (tokens)); - } - - // rest - const r = value.$rest; - if (r != null) { - mapped.$rest = cloneRef(r); - } - - // tokenize - const t = value.$tokenize; - if (t) { - mapped.$tokenize = t; - } - } - return mapped; - } - - return clone(grammar); + return lang; } /** * @typedef {import('../types.d.ts').Grammar} Grammar - * @typedef {import('../types.d.ts').GrammarToken} GrammarToken - * @typedef {import('../types.d.ts').GrammarTokens} GrammarTokens - * @typedef {import('../types.d.ts').RegExpLike} RegExpLike */ diff --git a/src/util/grammar-patch.js b/src/util/grammar-patch.js new file mode 100644 index 000000000..27392cdf9 --- /dev/null +++ b/src/util/grammar-patch.js @@ -0,0 +1,92 @@ +import { insertAfter, insertBefore } from './insert.js'; +import { deepMerge } from './objects.js'; + +/** + * Apply a patch to a grammar to modify it. + * The patch and the grammar may be the same object. + * + * @param {Grammar} grammar + * @param {Grammar} [patch=grammar] + * @returns {Grammar} + */ +export function grammarPatch (grammar, patch = grammar) { + if (patch.$insertBefore) { + for (const key in patch.$insertBefore) { + const tokens = patch.$insertBefore[key]; + + if (key?.includes('/')) { + // Deep key + let path = key.split('/'); + const lastKey = path.pop(); + path = path.flatMap(key => [key, 'inside']); // add `inside` after each key + // @ts-ignore + const obj = path.reduce((acc, key) => acc?.[key], grammar); + + if (obj) { + // @ts-ignore + insertBefore(obj, lastKey, tokens); + } + } + else if (tokens) { + // @ts-ignore + insertBefore(grammar, key, tokens); + } + } + delete grammar.$insertBefore; + } + + if (patch.$insertAfter) { + for (const key in patch.$insertAfter) { + const tokens = patch.$insertAfter[key]; + + if (key?.includes('/')) { + // Deep key + let path = key.split('/'); + const lastKey = path.pop(); + path = path.flatMap(key => [key, 'inside']); // add `inside` after each key + // @ts-ignore + const obj = path.reduce((acc, key) => acc?.[key], grammar); + + if (obj) { + // @ts-ignore + insertAfter(obj, lastKey, tokens); + } + } + else if (tokens) { + // @ts-ignore + insertAfter(grammar, key, tokens); + } + } + delete grammar.$insertAfter; + } + + if (patch.$delete) { + // @ts-ignore + for (const key of patch.$delete) { + // TODO support deep keys + delete grammar[key]; + } + delete grammar.$delete; + } + + if (patch.$merge) { + for (const key in patch.$merge) { + const tokens = patch.$merge[key]; + + if (grammar[key]) { + deepMerge(grammar[key], tokens); + } + else { + grammar[key] = tokens; + } + } + + delete grammar.$merge; + } + + return grammar; +} + +/** + * @typedef {import('../types.d.ts').Grammar} Grammar + */ diff --git a/src/util/insert.js b/src/util/insert.js index 229d0f656..1764c4bf7 100644 --- a/src/util/insert.js +++ b/src/util/insert.js @@ -1,3 +1,5 @@ +import { betterAssign } from './objects.js'; + /** * Inserts tokens _before_ another token in the given grammar. * @@ -26,48 +28,59 @@ * }); * ``` * - * ## Special cases - * - * If the grammars of `grammar` and `insert` have tokens with the same name, the tokens in `grammar`'s grammar - * will be ignored. - * - * This behavior can be used to insert tokens after `before`: + * @param {Grammar} grammar The grammar to be modified. + * @param {string} beforeKey The key to insert before. + * @param {GrammarTokens} tokens An object containing the key-value pairs to be inserted. + */ +export function insertBefore (grammar, beforeKey, tokens) { + insert(grammar, beforeKey, tokens, 'before'); +} + +/** * - * ```js - * insertBefore(markup, 'comment', { - * 'comment': markup.comment, - * // tokens after 'comment' - * }); - * ``` + * @param {Grammar} grammar + * @param {string} afterKey + * @param {GrammarTokens} tokens + */ +export function insertAfter (grammar, afterKey, tokens) { + insert(grammar, afterKey, tokens); +} + +/** * - * @param {Grammar} grammar The grammar to be modified. - * @param {string} before The key to insert before. - * @param {GrammarTokens} insert An object containing the key-value pairs to be inserted. - * @returns {void} + * @param {Grammar} grammar + * @param {string} atKey + * @param {GrammarTokens} insert + * @param {'before' | 'after'} [position='after'] */ -export function insertBefore (grammar, before, insert) { - if (!(before in grammar)) { - throw new Error(`"${before}" has to be a key of grammar.`); +export function insert (grammar, atKey, insert, position = 'after') { + if (!(atKey in grammar)) { + // TODO support deep keys + throw new Error(`"${atKey}" has to be a key of grammar.`); } - const grammarEntries = Object.entries(grammar); + const descriptors = Object.getOwnPropertyDescriptors(grammar); // delete all keys in `grammar` - for (const [key] of grammarEntries) { - delete grammar[key]; + for (const key in descriptors) { + if (Object.hasOwn(descriptors, key)) { + delete grammar[key]; + } } // insert keys again - for (const [key, value] of grammarEntries) { - if (key === before) { - for (const insertKey of Object.keys(insert)) { - grammar[insertKey] = insert[insertKey]; - } + for (const key in descriptors) { + if (position === 'before' && key === atKey) { + betterAssign(grammar, insert); } // Do not insert tokens which also occur in `insert`. See #1525 - if (!insert.hasOwnProperty(key)) { - grammar[key] = /** @type {GrammarToken} */ (value); + if (!Object.hasOwn(insert, key)) { + Object.defineProperty(grammar, key, descriptors[key]); + } + + if (position === 'after' && key === atKey) { + betterAssign(grammar, insert); } } } diff --git a/src/util/objects.js b/src/util/objects.js index 76bde2ee3..49532eeb0 100644 --- a/src/util/objects.js +++ b/src/util/objects.js @@ -1,3 +1,5 @@ +import { toArray } from './iterables.js'; + /** * @template {Record} T * @template {keyof T} K @@ -43,3 +45,140 @@ export function defineSimpleProperty (obj, key, value) { configurable: false, }); } + +/** + * + * @param {any} obj + * @param {string} type + * @returns {boolean} + */ +export function isObject (obj, type) { + if (!obj || typeof obj !== 'object') { + return false; + } + + const proto = Object.getPrototypeOf(obj); + return proto.constructor?.name === type; +} + +/** + * @param {any} obj + * @returns {boolean} + */ +export function isPlainObject (obj) { + return isObject(obj, 'Object'); +} + +/** + * @typedef {object} MergeOptions + * @property {any[]} [emptyValues] + * @property {string[]} [containers] + * @property {(value: any, key?: Property, parent?: any) => boolean} [isContainer] + * @property {boolean} [mergeArrays] + */ + +/** @typedef {string | number | symbol} Property */ + +/** + * + * @param {any} target + * @param {any} source + * @param {MergeOptions} [options={}] + * @returns + */ +export function deepMerge (target, source, options = {}) { + const { + emptyValues = [undefined], + containers = ['Object', 'EventTarget'], + isContainer = value => containers.some(type => isObject(value, type)), + mergeArrays = false, + } = options; + + if (mergeArrays && (Array.isArray(target) || Array.isArray(source))) { + target = toArray(target); + source = toArray(source); + return target.concat(source); + } + + if (isContainer(target) && isContainer(source)) { + for (const key in source) { + target[key] = deepMerge(target[key], source[key], options); + } + + return target; + } + + if (emptyValues.includes(target)) { + return source; + } + + return target ?? source; +} + +/** + * @typedef {object} CloneOptions + * + * Used internally to store clones of objects, + * both for performance but mainly to avoid getting tripped up in circular references + * @property {WeakMap} [_clones] + */ + +/** + * @param {any} obj + * @param {CloneOptions} options + */ +export function deepClone (obj, options = {}) { + if (!obj || typeof obj !== 'object') { + return obj; + } + + options._clones ??= new WeakMap(); + const { _clones } = options; + + if (_clones.has(obj)) { + return _clones.get(obj); + } + + let ret = obj; + + if (Array.isArray(obj)) { + ret = []; + _clones.set(obj, ret); + + for (const item of obj) { + ret.push(deepClone(item, options)); + } + } + else if (isPlainObject(obj)) { + ret = { ...obj }; + _clones.set(obj, ret); + + for (const key in obj) { + ret[key] = deepClone(obj[key], options); + } + } + + return ret; +} + +/** + * Like Object.assign() but preserves accessors. + * + * @param {Record} target + * @param {Record[]} sources + */ +export function betterAssign (target, ...sources) { + for (const source of sources) { + const descriptors = Object.getOwnPropertyDescriptors(source); + for (const key in descriptors) { + if (Object.hasOwn(target, key)) { + continue; + } + + const descriptor = descriptors[key]; + Object.defineProperty(target, key, descriptor); + } + } + + return target; +} diff --git a/tsconfig.json b/tsconfig.json index 238dafeae..2b52d6120 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -11,7 +11,7 @@ // "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */ /* Language and Environment */ - "target": "es2020" /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */, + "target": "ES2022" /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */, // "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */ // "jsx": "preserve", /* Specify what JSX code is generated. */ // "experimentalDecorators": true, /* Enable experimental support for TC39 stage 2 draft decorators. */