Skip to content

Commit 5f74bb1

Browse files
Introduce grammar patch from the simplify branch
Authored by: Lea Verou <[email protected]>
1 parent 9579fc1 commit 5f74bb1

File tree

8 files changed

+364
-135
lines changed

8 files changed

+364
-135
lines changed

src/core/registry.js

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import { kebabToCamelCase } from '../shared/util.js';
2-
import { cloneGrammar } from '../util/extend.js';
2+
import { extend } from '../util/extend.js';
3+
import { grammarPatch } from '../util/grammar-patch.js';
34
import { forEach, toArray } from '../util/iterables.js';
4-
import { extend } from '../util/language-util.js';
5-
import { defineLazyProperty } from '../util/objects.js';
5+
import { deepClone, defineLazyProperty } from '../util/objects.js';
66

77
/**
88
* TODO: docs
@@ -221,7 +221,7 @@ export class Registry {
221221

222222
const base = entry?.proto.base;
223223
// We need this so that any code modifying the base grammar doesn't affect other instances
224-
const baseGrammar = base && cloneGrammar(required(base.id), base.id);
224+
const baseGrammar = base && deepClone(required(base.id));
225225

226226
const requiredLanguages = toArray(
227227
/** @type {LanguageProto | LanguageProto[] | undefined} */ (entry?.proto.require)
@@ -240,7 +240,7 @@ export class Registry {
240240
else {
241241
const options = {
242242
getOptionalLanguage: id => this.getLanguage(id),
243-
extend: (id, ref) => extend(required(id), id, ref),
243+
extend: (id, ref) => extend(required(id), ref),
244244
...(baseGrammar && { base: baseGrammar }),
245245
...(requiredLanguages.length && { languages }),
246246
};
@@ -249,10 +249,10 @@ export class Registry {
249249
}
250250

251251
if (baseGrammar) {
252-
evaluatedGrammar = extend(baseGrammar, base.id, evaluatedGrammar);
252+
evaluatedGrammar = extend(baseGrammar, evaluatedGrammar);
253253
}
254254

255-
return (entry.evaluatedGrammar = evaluatedGrammar);
255+
return (entry.evaluatedGrammar = grammarPatch(evaluatedGrammar));
256256
}
257257
}
258258

src/core/tokenize/tokenize.js

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,21 @@ export function tokenize (text, grammar) {
4242
const tokenList = new LinkedList();
4343
tokenList.addAfter(tokenList.head, text);
4444

45-
_matchGrammar.call(prism, text, tokenList, grammar, tokenList.head, 0);
45+
_matchGrammar.call(
46+
prism,
47+
text,
48+
tokenList,
49+
/** @type {GrammarTokens} */ (grammar),
50+
tokenList.head,
51+
0
52+
);
4653

4754
return tokenList.toArray();
4855
}
4956

5057
/**
5158
* @typedef {import('../../types.d.ts').TokenStream} TokenStream
5259
* @typedef {import('../../types.d.ts').Grammar} Grammar
60+
* @typedef {import('../../types.d.ts').GrammarTokens} GrammarTokens
5361
* @typedef {import('../prism.js').Prism} Prism
5462
*/

src/types.d.ts

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,35 @@ export type GrammarSpecial = {
198198
$tokenize?: (code: string, grammar: Grammar, Prism: Prism) => TokenStream;
199199
};
200200

201-
export type Grammar = GrammarTokens & GrammarSpecial;
201+
/**
202+
* Tokens within $insert
203+
*/
204+
export type InsertableToken = (RegExpLike | GrammarToken | (RegExpLike | GrammarToken)[]) & {
205+
$before?: TokenName | TokenName[];
206+
$after?: TokenName | TokenName[];
207+
};
208+
209+
/**
210+
* A grammar that is defined as its delta from another grammar.
211+
*/
212+
export type GrammarPatch = {
213+
$insert?: Partial<Record<TokenName, InsertableToken>>;
214+
$insertBefore?: Partial<Record<TokenName, GrammarTokens>>;
215+
$insertAfter?: Partial<Record<TokenName, GrammarTokens>>;
216+
$delete?: TokenName[];
217+
$merge?: Partial<
218+
Record<TokenName, Partial<Omit<GrammarToken, 'pattern'>> & { pattern?: RegExpLike }>
219+
>;
220+
};
221+
222+
export interface Grammar extends GrammarSpecial, GrammarPatch {
223+
[token: string]:
224+
| RegExpLike
225+
| GrammarToken
226+
| (RegExpLike | GrammarToken)[]
227+
| GrammarSpecial[keyof GrammarSpecial]
228+
| GrammarPatch[keyof GrammarPatch];
229+
}
202230

203231
export interface PlainObject {
204232
[key: string]: unknown;

src/util/extend.js

Lines changed: 44 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import { betterAssign, deepClone } from './objects.js';
2+
13
/**
24
* Creates a deep copy of the language with the given id and appends the given tokens.
35
*
@@ -13,9 +15,8 @@
1315
* Therefore, it is encouraged to order overwriting tokens according to the positions of the overwritten tokens.
1416
* Furthermore, all non-overwriting tokens should be placed after the overwriting ones.
1517
*
16-
* @param {Grammar} grammar The grammar of the language to extend.
17-
* @param {string} id The id of the language to extend.
18-
* @param {Grammar} reDef The new tokens to append.
18+
* @param {Grammar} base The grammar of the language to extend.
19+
* @param {Grammar} grammar The new tokens to append.
1920
* @returns {Grammar} The new language created.
2021
* @example
2122
* Prism.languages['css-with-colors'] = Prism.languages.extend('css', {
@@ -26,120 +27,67 @@
2627
* 'color': /\b(?:red|green|blue)\b/
2728
* });
2829
*/
29-
export function extend (grammar, id, reDef) {
30-
const lang = cloneGrammar(grammar, id);
30+
export function extend (base, grammar) {
31+
const lang = deepClone(base);
32+
33+
for (const key in grammar) {
34+
if (typeof key !== 'string' || key.startsWith('$')) {
35+
// ignore special keys
36+
continue;
37+
}
3138

32-
for (const key in reDef) {
33-
lang[key] = reDef[key];
39+
lang[key] = grammar[key];
3440
}
3541

36-
return lang;
37-
}
42+
if (grammar.$insertBefore) {
43+
lang.$insertBefore = betterAssign(lang.$insertBefore ?? {}, grammar.$insertBefore);
44+
}
3845

39-
/**
40-
* @param {Grammar} grammar
41-
* @param {string} id
42-
* @returns {Grammar}
43-
*/
44-
export function cloneGrammar (grammar, id) {
45-
/** @type {Grammar} */
46-
const result = {};
46+
if (grammar.$insertAfter) {
47+
lang.$insertAfter = betterAssign(lang.$insertAfter ?? {}, grammar.$insertAfter);
48+
}
4749

48-
/** @type {Map<Grammar, Grammar>} */
49-
const visited = new Map();
50+
if (grammar.$insert) {
51+
// Syntactic sugar for $insertBefore/$insertAfter
52+
for (const tokenName in grammar.$insert) {
53+
const def = grammar.$insert[tokenName];
54+
const { $before, $after, ...token } = def;
55+
const relToken = $before || $after;
56+
const all = $before ? '$insertBefore' : '$insertAfter';
57+
lang[all] ??= {};
5058

51-
/**
52-
* @param {GrammarToken | RegExpLike} value
53-
*/
54-
function cloneToken (value) {
55-
if (!value.pattern) {
56-
return value;
57-
}
58-
else {
59-
/** @type {GrammarToken} */
60-
const copy = { pattern: value.pattern };
61-
if (value.lookbehind) {
62-
copy.lookbehind = value.lookbehind;
59+
if (Array.isArray(relToken)) {
60+
// Insert in multiple places
61+
for (const t of relToken) {
62+
lang[all][t][tokenName] = token;
63+
}
6364
}
64-
if (value.greedy) {
65-
copy.greedy = value.greedy;
65+
else if (relToken) {
66+
(lang[all][relToken] ??= {})[tokenName] = token;
6667
}
67-
if (value.alias) {
68-
copy.alias = Array.isArray(value.alias) ? [...value.alias] : value.alias;
68+
else {
69+
lang[tokenName] = token;
6970
}
70-
if (value.inside) {
71-
copy.inside = cloneRef(value.inside);
72-
}
73-
return copy;
7471
}
7572
}
7673

77-
/**
78-
* @param {GrammarTokens['string']} value
79-
*/
80-
function cloneTokens (value) {
81-
if (!value) {
82-
return undefined;
83-
}
84-
else if (Array.isArray(value)) {
85-
return value.map(cloneToken);
74+
if (grammar.$delete) {
75+
if (lang.$delete) {
76+
// base also had $delete
77+
lang.$delete.push(...grammar.$delete);
8678
}
8779
else {
88-
return cloneToken(value);
80+
lang.$delete = [...grammar.$delete];
8981
}
9082
}
9183

92-
/**
93-
* @param {string | Grammar} ref
94-
*/
95-
function cloneRef (ref) {
96-
if (ref === id) {
97-
// self ref
98-
return result;
99-
}
100-
else if (typeof ref === 'string') {
101-
return ref;
102-
}
103-
else {
104-
return clone(ref);
105-
}
84+
if (grammar.$merge) {
85+
lang.$merge = betterAssign(lang.$merge ?? {}, grammar.$merge);
10686
}
10787

108-
/**
109-
* @param {Grammar} value
110-
*/
111-
function clone (value) {
112-
let mapped = visited.get(value);
113-
if (mapped === undefined) {
114-
mapped = value === grammar ? result : {};
115-
visited.set(value, mapped);
116-
117-
// tokens
118-
for (const [key, tokens] of Object.entries(value)) {
119-
mapped[key] = cloneTokens(/** @type {GrammarToken[]} */ (tokens));
120-
}
121-
122-
// rest
123-
const r = value.$rest;
124-
if (r != null) {
125-
mapped.$rest = cloneRef(r);
126-
}
127-
128-
// tokenize
129-
const t = value.$tokenize;
130-
if (t) {
131-
mapped.$tokenize = t;
132-
}
133-
}
134-
return mapped;
135-
}
136-
137-
return clone(grammar);
88+
return lang;
13889
}
13990

14091
/**
14192
* @typedef {import('../types.d.ts').Grammar} Grammar
142-
* @typedef {import('../types.d.ts').GrammarToken} GrammarToken
143-
* @typedef {import('../types.d.ts').GrammarTokens} GrammarTokens
144-
* @typedef {import('../types.d.ts').RegExpLike} RegExpLike
14593
*/

src/util/grammar-patch.js

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
import { insertAfter, insertBefore } from './insert.js';
2+
import { deepMerge } from './objects.js';
3+
4+
/**
5+
* Apply a patch to a grammar to modify it.
6+
* The patch and the grammar may be the same object.
7+
*
8+
* @param {Grammar} grammar
9+
* @param {Grammar} [patch=grammar]
10+
* @returns {Grammar}
11+
*/
12+
export function grammarPatch (grammar, patch = grammar) {
13+
if (patch.$insertBefore) {
14+
for (const key in patch.$insertBefore) {
15+
const tokens = patch.$insertBefore[key];
16+
17+
if (key?.includes('/')) {
18+
// Deep key
19+
let path = key.split('/');
20+
const lastKey = path.pop();
21+
path = path.flatMap(key => [key, 'inside']); // add `inside` after each key
22+
// @ts-ignore
23+
const obj = path.reduce((acc, key) => acc?.[key], grammar);
24+
25+
if (obj) {
26+
// @ts-ignore
27+
insertBefore(obj, lastKey, tokens);
28+
}
29+
}
30+
else if (tokens) {
31+
// @ts-ignore
32+
insertBefore(grammar, key, tokens);
33+
}
34+
}
35+
delete grammar.$insertBefore;
36+
}
37+
38+
if (patch.$insertAfter) {
39+
for (const key in patch.$insertAfter) {
40+
const tokens = patch.$insertAfter[key];
41+
42+
if (key?.includes('/')) {
43+
// Deep key
44+
let path = key.split('/');
45+
const lastKey = path.pop();
46+
path = path.flatMap(key => [key, 'inside']); // add `inside` after each key
47+
// @ts-ignore
48+
const obj = path.reduce((acc, key) => acc?.[key], grammar);
49+
50+
if (obj) {
51+
// @ts-ignore
52+
insertAfter(obj, lastKey, tokens);
53+
}
54+
}
55+
else if (tokens) {
56+
// @ts-ignore
57+
insertAfter(grammar, key, tokens);
58+
}
59+
}
60+
delete grammar.$insertAfter;
61+
}
62+
63+
if (patch.$delete) {
64+
// @ts-ignore
65+
for (const key of patch.$delete) {
66+
// TODO support deep keys
67+
delete grammar[key];
68+
}
69+
delete grammar.$delete;
70+
}
71+
72+
if (patch.$merge) {
73+
for (const key in patch.$merge) {
74+
const tokens = patch.$merge[key];
75+
76+
if (grammar[key]) {
77+
deepMerge(grammar[key], tokens);
78+
}
79+
else {
80+
grammar[key] = /** @type {GrammarTokens} */ (tokens);
81+
}
82+
}
83+
84+
delete grammar.$merge;
85+
}
86+
87+
return grammar;
88+
}
89+
90+
/**
91+
* @typedef {import('../types.d.ts').Grammar} Grammar
92+
* @typedef {import('../types.d.ts').GrammarTokens} GrammarTokens
93+
*/

0 commit comments

Comments
 (0)