Skip to content

Commit ab87f37

Browse files
authored
feat: add rawContentTags options (#32)
* feat: add customTags options * refactor * add docs
1 parent 9f327bb commit ab87f37

File tree

13 files changed

+326
-34
lines changed

13 files changed

+326
-34
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ parse(html: string, options?: Options): ParseResult;
6868
- `html`: HTML string to parse.
6969
- `options (optional)`
7070
- `tokenAdapter`: The adapter option for changing tokens information.
71+
- `rawContentTags` (string[]) : Specifies tag names whose child contents should be treated as raw text, meaning the parser will not interpret characters like < and > as HTML syntax inside these tags.
7172

7273
**Returns**
7374

src/constants/tokenizer-context-types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,5 @@ export enum TokenizerContextTypes {
1818
CommentOpen = "CommentOpen",
1919
CommentContent = "CommentContent",
2020
CommentClose = "CommentClose",
21+
CustomTagRawContent = "CustomTagRawContent",
2122
}

src/parser/parse.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import { Options } from "../types/parse";
77

88
export function parse(html: string, options?: Options): ParseResult {
99
const tokenAdapter = (options && options.tokenAdapter) || defaultTokenAdapter;
10-
const { tokens } = tokenize(html, tokenAdapter, options?.templateInfos);
10+
const { tokens } = tokenize(html, tokenAdapter, options);
1111
const { ast } = constructTree(tokens);
1212
return {
1313
ast: clearParent(ast),
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
<markdown attr="1">
2+
# Hello, world!
3+
4+
```cpp{4-6,9}
5+
#include <iostream>
6+
7+
class Example {
8+
Example() {
9+
std::cout << "Hello, world!" << std::endl;
10+
}
11+
12+
Example(std::string name) {
13+
std::cout << "Hello, " << name << std::endl;
14+
}
15+
};
16+
```
17+
</markdown>
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
import { TokenTypes } from "../../../constants";
2+
import { AnyToken } from "../../../types";
3+
4+
const OUTPUT: AnyToken[] = [
5+
{
6+
type: TokenTypes.OpenTagStart,
7+
value: "<markdown",
8+
range: [0, 9],
9+
loc: {
10+
start: {
11+
column: 0,
12+
line: 1,
13+
},
14+
end: {
15+
line: 1,
16+
column: 9,
17+
},
18+
},
19+
},
20+
{
21+
type: TokenTypes.AttributeKey,
22+
value: "attr",
23+
loc: {
24+
start: {
25+
column: 10,
26+
line: 1,
27+
},
28+
end: {
29+
line: 1,
30+
column: 14,
31+
},
32+
},
33+
range: [10, 14],
34+
parts: [],
35+
},
36+
{
37+
type: TokenTypes.AttributeAssignment,
38+
value: "=",
39+
range: [14, 15],
40+
loc: {
41+
start: {
42+
column: 14,
43+
line: 1,
44+
},
45+
end: {
46+
line: 1,
47+
column: 15,
48+
},
49+
},
50+
},
51+
{
52+
type: TokenTypes.AttributeValueWrapperStart,
53+
value: '"',
54+
range: [15, 16],
55+
loc: {
56+
start: {
57+
column: 15,
58+
line: 1,
59+
},
60+
end: {
61+
line: 1,
62+
column: 16,
63+
},
64+
},
65+
},
66+
{
67+
type: TokenTypes.AttributeValue,
68+
value: "1",
69+
range: [16, 17],
70+
loc: {
71+
start: {
72+
column: 16,
73+
line: 1,
74+
},
75+
end: {
76+
line: 1,
77+
column: 17,
78+
},
79+
},
80+
parts: [],
81+
},
82+
{
83+
type: TokenTypes.AttributeValueWrapperEnd,
84+
value: '"',
85+
range: [17, 18],
86+
loc: {
87+
start: {
88+
column: 17,
89+
line: 1,
90+
},
91+
end: {
92+
line: 1,
93+
column: 18,
94+
},
95+
},
96+
},
97+
{
98+
type: TokenTypes.OpenTagEnd,
99+
value: ">",
100+
range: [18, 19],
101+
loc: {
102+
start: {
103+
column: 18,
104+
line: 1,
105+
},
106+
end: {
107+
line: 1,
108+
column: 19,
109+
},
110+
},
111+
},
112+
{
113+
type: TokenTypes.Text,
114+
value: `
115+
# Hello, world!
116+
117+
\`\`\`cpp{4-6,9}
118+
#include <iostream>
119+
120+
class Example {
121+
Example() {
122+
std::cout << "Hello, world!" << std::endl;
123+
}
124+
125+
Example(std::string name) {
126+
std::cout << "Hello, " << name << std::endl;
127+
}
128+
};
129+
\`\`\`
130+
`,
131+
range: [19, 260],
132+
loc: {
133+
start: {
134+
column: 19,
135+
line: 1,
136+
},
137+
end: {
138+
line: 17,
139+
column: 0,
140+
},
141+
},
142+
parts: [],
143+
},
144+
{
145+
type: TokenTypes.CloseTag,
146+
value: "</markdown>",
147+
range: [260, 271],
148+
loc: {
149+
start: {
150+
column: 0,
151+
line: 17,
152+
},
153+
end: {
154+
line: 17,
155+
column: 11,
156+
},
157+
},
158+
},
159+
];
160+
161+
export default OUTPUT;

src/tokenizer/__tests__/tokenize.spec.ts

Lines changed: 51 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import * as fs from "fs";
22
import * as path from "path";
3-
import { tokenize } from "../tokenize";
3+
import { tokenize, TokenizeOptions } from "../tokenize";
44
import OPENING_CLOSING_TEXT from "./__output__/opening-closing-text";
55
import NESTED_TAGS from "./__output__/nested-tags";
66
import COMMENTS from "./__output__/comments";
@@ -30,7 +30,7 @@ import TEMPLATE_COMMENT from "./__output__/templates-comment";
3030
import TEMPLATE_SCRIPT_CONTENT from "./__output__/templates-script-content";
3131
import TEMPLATE_STYLE_CONTENT from "./__output__/templates-style-content";
3232
import TEMPLATE_CONTENT_END from "./__output__/templates-content-end";
33-
33+
import CUSTOM_TAG_RAW_CONTENT from "./__output__/custom-tag-raw-content";
3434
import { defaultTokenAdapter } from "../../token-adapter";
3535
import { Range, TemplateInfo } from "../../types";
3636

@@ -98,78 +98,107 @@ describe("tokenize", () => {
9898
"templates-attributes-key.html",
9999
TEMPLATE_ATTRIBUTES_KEY,
100100
null,
101-
[[5, 11]] as Range[],
101+
{
102+
templateInfos: [[5, 11]] as Range[],
103+
},
102104
],
103105
[
104106
"Template Attributes Key (wrapper)",
105107
"templates-attributes-key.html",
106108
TEMPLATE_ATTRIBUTES_KEY_WRAPPER,
107109
null,
108-
[
109-
{
110-
open: [5, 7],
111-
close: [10, 11],
112-
},
113-
] as TemplateInfo[],
110+
{
111+
templateInfos: [
112+
{
113+
open: [5, 7],
114+
close: [10, 11],
115+
},
116+
] as TemplateInfo[],
117+
},
114118
],
115119
[
116120
"Template Attributes Value Bare",
117121
"templates-attributes-value-bare.html",
118122
TEMPLATE_ATTRIBUTES_VALUE_BARE,
119123
null,
120-
[[8, 13]] as Range[],
124+
{
125+
templateInfos: [[8, 13]] as Range[],
126+
},
121127
],
122128
[
123129
"Template Attributes Value Wrapped",
124130
"templates-attributes-value-wrapped.html",
125131
TEMPLATE_ATTRIBUTES_VALUE_WRAPPED,
126132
null,
127-
[[9, 14]] as Range[],
133+
{
134+
templateInfos: [[9, 14]] as Range[],
135+
},
128136
],
129137
[
130138
"Template Attributes Value Wrapped 2",
131139
"templates-attributes-value-wrapped-2.html",
132140
TEMPLATE_ATTRIBUTES_VALUE_WRAPPED_2,
133141
null,
134-
[
135-
[16, 22],
136-
[23, 31],
137-
] as Range[],
142+
{
143+
templateInfos: [
144+
[16, 22],
145+
[23, 31],
146+
] as Range[],
147+
},
138148
],
139149
[
140150
"Templates Data",
141151
"templates-data.html",
142152
TEMPLATE_DATA,
143153
null,
144-
[[5, 16]] as Range[],
154+
{
155+
templateInfos: [[5, 16]] as Range[],
156+
},
145157
],
146158
[
147159
"Templates Comment",
148160
"templates-comment.html",
149161
TEMPLATE_COMMENT,
150162
null,
151-
[[4, 14]] as Range[],
163+
{
164+
templateInfos: [[4, 14]] as Range[],
165+
},
152166
],
153167
[
154168
"Templates Script Content",
155169
"templates-script-content.html",
156170
TEMPLATE_SCRIPT_CONTENT,
157171
null,
158-
[[8, 18]] as Range[],
172+
{
173+
templateInfos: [[8, 18]] as Range[],
174+
},
159175
],
160176
[
161177
"Templates Style Content",
162178
"templates-style-content.html",
163179
TEMPLATE_STYLE_CONTENT,
164180
null,
165-
[[7, 17]] as Range[],
181+
{
182+
templateInfos: [[7, 17]] as Range[],
183+
},
166184
],
167185
[
168186
"Templates Content End",
169187
"templates-content-end.html",
170188
TEMPLATE_CONTENT_END,
171189
null,
172-
[[0, 10]] as Range[],
190+
{
191+
templateInfos: [[0, 10]] as Range[],
192+
},
193+
],
194+
[
195+
"Custom Tag Raw Content",
196+
"custom-tag-raw-content.html",
197+
CUSTOM_TAG_RAW_CONTENT,
198+
null,
199+
{
200+
rawContentTags: ["markdown"],
201+
},
173202
],
174203
])(
175204
"%s",
@@ -178,18 +207,14 @@ describe("tokenize", () => {
178207
input,
179208
output,
180209
process: null | ((html: string) => string) = null,
181-
ranges: null | TemplateInfo[]
210+
options: TokenizeOptions | null = null
182211
) => {
183212
const inputPath = path.join(__dirname, "__input__", input);
184213
let html = fs.readFileSync(inputPath, "utf-8");
185214
if (process) {
186215
html = process(html);
187216
}
188-
const { tokens } = tokenize(
189-
html,
190-
defaultTokenAdapter,
191-
ranges ?? undefined
192-
);
217+
const { tokens } = tokenize(html, defaultTokenAdapter, options || {});
193218
expect(tokens).toEqual(output);
194219
}
195220
);

0 commit comments

Comments
 (0)