Skip to content

Commit 128d64f

Browse files
committed
Add guide for multi mode lexing
1 parent a320631 commit 128d64f

File tree

1 file changed

+116
-0
lines changed

1 file changed

+116
-0
lines changed
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
---
2+
title: "Multi-Mode Lexing"
3+
weight: 400
4+
---
5+
6+
Many modern programming languages such as [JavaScript](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Template_literals) or [C#](https://learn.microsoft.com/en-us/dotnet/csharp/language-reference/tokens/interpolated) support template literals.
7+
They are a way to easily concatenate or interpolate string values while maintaining great code readability.
8+
This guide will show you how to support template literals in Langium.
9+
10+
```antlr
11+
TemplateLiteral:
12+
// Either just the full content
13+
content+=TemplateContent |
14+
// Or template string parts with expressions in between
15+
(
16+
content+=TemplateContentStart
17+
content+=Expression?
18+
(
19+
content+=TemplateContentMiddle
20+
content+=Expression?
21+
)*
22+
content+=TemplateContentEnd
23+
);
24+
25+
TemplateContent returns TextLiteral:
26+
value=RICH_TEXT;
27+
28+
TemplateContentStart returns TextLiteral:
29+
value=RICH_TEXT_START;
30+
31+
TemplateContentMiddle returns TextLiteral:
32+
value=RICH_TEXT_INBETWEEN;
33+
34+
TemplateContentEnd returns TextLiteral:
35+
value=RICH_TEXT_END;
36+
37+
terminal RICH_TEXT:
38+
'`' IN_RICH_TEXT* '`';
39+
40+
terminal RICH_TEXT_START:
41+
'`' IN_RICH_TEXT* '{';
42+
43+
terminal RICH_TEXT_INBETWEEN:
44+
'}' IN_RICH_TEXT* '{';
45+
46+
terminal RICH_TEXT_END:
47+
'}' IN_RICH_TEXT* '`';
48+
49+
terminal fragment IN_RICH_TEXT:
50+
/[^{`]|{{|``/;
51+
```
52+
53+
```ts
54+
import { DefaultTokenBuilder, Grammar, isTokenTypeArray, Keyword, TerminalRule } from "langium";
55+
import { IMultiModeLexerDefinition, TokenType, TokenVocabulary } from "chevrotain";
56+
57+
const REGULAR_MODE = 'regular_mode';
58+
const TEMPLATE_MODE = 'template_mode';
59+
60+
export class CustomTokenBuilder extends DefaultTokenBuilder {
61+
62+
override buildTokens(grammar: Grammar, options?: { caseInsensitive?: boolean }): TokenVocabulary {
63+
const tokenTypes = super.buildTokens(grammar, options);
64+
65+
if(isTokenTypeArray(tokenTypes)) {
66+
// Regular mode just drops rich text middle & end
67+
const regularModeTokens = tokenTypes
68+
.filter(token => !['RICH_TEXT_INBETWEEN','RICH_TEXT_END'].includes(token.name));
69+
// Template mode needs to exclude the '}' keyword, which causes confusion while lexing
70+
const templateModeTokens = tokenTypes
71+
.filter(token => !['}'].includes(token.name));
72+
73+
const multiModeLexerDef: IMultiModeLexerDefinition = {
74+
modes: {
75+
[REGULAR_MODE]: regularModeTokens,
76+
[TEMPLATE_MODE]: templateModeTokens
77+
},
78+
defaultMode: REGULAR_MODE
79+
};
80+
return multiModeLexerDef;
81+
} else {
82+
throw new Error('Invalid token vocabulary received from DefaultTokenBuilder!');
83+
}
84+
}
85+
86+
protected override buildKeywordToken(
87+
keyword: Keyword,
88+
terminalTokens: TokenType[],
89+
caseInsensitive: boolean
90+
): TokenType {
91+
let tokenType = super.buildKeywordToken(keyword, terminalTokens, caseInsensitive);
92+
93+
if (tokenType.name === '}') {
94+
// The default } token will use [RICH_TEXT_INBETWEEN, RICH_TEXT_END] as longer alts
95+
// We need to delete the LONGER_ALT, they are not valid for the regular lexer mode
96+
delete tokenType.LONGER_ALT;
97+
}
98+
99+
return tokenType;
100+
}
101+
102+
protected override buildTerminalToken(terminal: TerminalRule): TokenType {
103+
let tokenType = super.buildTerminalToken(terminal);
104+
105+
// Update token types to enter & exit template mode
106+
if(tokenType.name === 'RICH_TEXT_START') {
107+
tokenType.PUSH_MODE = TEMPLATE_MODE;
108+
} else if(tokenType.name === 'RICH_TEXT_END') {
109+
tokenType.POP_MODE = true;
110+
}
111+
112+
return tokenType;
113+
}
114+
115+
}
116+
```

0 commit comments

Comments
 (0)