Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ parse(html: string, options?: Options): ParseResult;
- `html`: HTML string to parse.
- `options (optional)`
- `tokenAdapter`: The adapter option for changing tokens information.
- `rawContentTags` (string[]) : Specifies tag names whose child contents should be treated as raw text, meaning the parser will not interpret characters like < and > as HTML syntax inside these tags.

**Returns**

Expand Down
1 change: 1 addition & 0 deletions src/constants/tokenizer-context-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ export enum TokenizerContextTypes {
CommentOpen = "CommentOpen",
CommentContent = "CommentContent",
CommentClose = "CommentClose",
CustomTagRawContent = "CustomTagRawContent",
}
2 changes: 1 addition & 1 deletion src/parser/parse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import { Options } from "../types/parse";

export function parse(html: string, options?: Options): ParseResult {
const tokenAdapter = (options && options.tokenAdapter) || defaultTokenAdapter;
const { tokens } = tokenize(html, tokenAdapter, options?.templateInfos);
const { tokens } = tokenize(html, tokenAdapter, options);
const { ast } = constructTree(tokens);
return {
ast: clearParent(ast),
Expand Down
17 changes: 17 additions & 0 deletions src/tokenizer/__tests__/__input__/custom-tag-raw-content.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<markdown attr="1">
# Hello, world!

```cpp{4-6,9}
#include <iostream>

class Example {
Example() {
std::cout << "Hello, world!" << std::endl;
}

Example(std::string name) {
std::cout << "Hello, " << name << std::endl;
}
};
```
</markdown>
161 changes: 161 additions & 0 deletions src/tokenizer/__tests__/__output__/custom-tag-raw-content.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import { TokenTypes } from "../../../constants";
import { AnyToken } from "../../../types";

const OUTPUT: AnyToken[] = [
{
type: TokenTypes.OpenTagStart,
value: "<markdown",
range: [0, 9],
loc: {
start: {
column: 0,
line: 1,
},
end: {
line: 1,
column: 9,
},
},
},
{
type: TokenTypes.AttributeKey,
value: "attr",
loc: {
start: {
column: 10,
line: 1,
},
end: {
line: 1,
column: 14,
},
},
range: [10, 14],
parts: [],
},
{
type: TokenTypes.AttributeAssignment,
value: "=",
range: [14, 15],
loc: {
start: {
column: 14,
line: 1,
},
end: {
line: 1,
column: 15,
},
},
},
{
type: TokenTypes.AttributeValueWrapperStart,
value: '"',
range: [15, 16],
loc: {
start: {
column: 15,
line: 1,
},
end: {
line: 1,
column: 16,
},
},
},
{
type: TokenTypes.AttributeValue,
value: "1",
range: [16, 17],
loc: {
start: {
column: 16,
line: 1,
},
end: {
line: 1,
column: 17,
},
},
parts: [],
},
{
type: TokenTypes.AttributeValueWrapperEnd,
value: '"',
range: [17, 18],
loc: {
start: {
column: 17,
line: 1,
},
end: {
line: 1,
column: 18,
},
},
},
{
type: TokenTypes.OpenTagEnd,
value: ">",
range: [18, 19],
loc: {
start: {
column: 18,
line: 1,
},
end: {
line: 1,
column: 19,
},
},
},
{
type: TokenTypes.Text,
value: `
# Hello, world!

\`\`\`cpp{4-6,9}
#include <iostream>

class Example {
Example() {
std::cout << "Hello, world!" << std::endl;
}

Example(std::string name) {
std::cout << "Hello, " << name << std::endl;
}
};
\`\`\`
`,
range: [19, 260],
loc: {
start: {
column: 19,
line: 1,
},
end: {
line: 17,
column: 0,
},
},
parts: [],
},
{
type: TokenTypes.CloseTag,
value: "</markdown>",
range: [260, 271],
loc: {
start: {
column: 0,
line: 17,
},
end: {
line: 17,
column: 11,
},
},
},
];

export default OUTPUT;
77 changes: 51 additions & 26 deletions src/tokenizer/__tests__/tokenize.spec.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import * as fs from "fs";
import * as path from "path";
import { tokenize } from "../tokenize";
import { tokenize, TokenizeOptions } from "../tokenize";
import OPENING_CLOSING_TEXT from "./__output__/opening-closing-text";
import NESTED_TAGS from "./__output__/nested-tags";
import COMMENTS from "./__output__/comments";
Expand Down Expand Up @@ -30,7 +30,7 @@ import TEMPLATE_COMMENT from "./__output__/templates-comment";
import TEMPLATE_SCRIPT_CONTENT from "./__output__/templates-script-content";
import TEMPLATE_STYLE_CONTENT from "./__output__/templates-style-content";
import TEMPLATE_CONTENT_END from "./__output__/templates-content-end";

import CUSTOM_TAG_RAW_CONTENT from "./__output__/custom-tag-raw-content";
import { defaultTokenAdapter } from "../../token-adapter";
import { Range, TemplateInfo } from "../../types";

Expand Down Expand Up @@ -98,78 +98,107 @@ describe("tokenize", () => {
"templates-attributes-key.html",
TEMPLATE_ATTRIBUTES_KEY,
null,
[[5, 11]] as Range[],
{
templateInfos: [[5, 11]] as Range[],
},
],
[
"Template Attributes Key (wrapper)",
"templates-attributes-key.html",
TEMPLATE_ATTRIBUTES_KEY_WRAPPER,
null,
[
{
open: [5, 7],
close: [10, 11],
},
] as TemplateInfo[],
{
templateInfos: [
{
open: [5, 7],
close: [10, 11],
},
] as TemplateInfo[],
},
],
[
"Template Attributes Value Bare",
"templates-attributes-value-bare.html",
TEMPLATE_ATTRIBUTES_VALUE_BARE,
null,
[[8, 13]] as Range[],
{
templateInfos: [[8, 13]] as Range[],
},
],
[
"Template Attributes Value Wrapped",
"templates-attributes-value-wrapped.html",
TEMPLATE_ATTRIBUTES_VALUE_WRAPPED,
null,
[[9, 14]] as Range[],
{
templateInfos: [[9, 14]] as Range[],
},
],
[
"Template Attributes Value Wrapped 2",
"templates-attributes-value-wrapped-2.html",
TEMPLATE_ATTRIBUTES_VALUE_WRAPPED_2,
null,
[
[16, 22],
[23, 31],
] as Range[],
{
templateInfos: [
[16, 22],
[23, 31],
] as Range[],
},
],
[
"Templates Data",
"templates-data.html",
TEMPLATE_DATA,
null,
[[5, 16]] as Range[],
{
templateInfos: [[5, 16]] as Range[],
},
],
[
"Templates Comment",
"templates-comment.html",
TEMPLATE_COMMENT,
null,
[[4, 14]] as Range[],
{
templateInfos: [[4, 14]] as Range[],
},
],
[
"Templates Script Content",
"templates-script-content.html",
TEMPLATE_SCRIPT_CONTENT,
null,
[[8, 18]] as Range[],
{
templateInfos: [[8, 18]] as Range[],
},
],
[
"Templates Style Content",
"templates-style-content.html",
TEMPLATE_STYLE_CONTENT,
null,
[[7, 17]] as Range[],
{
templateInfos: [[7, 17]] as Range[],
},
],
[
"Templates Content End",
"templates-content-end.html",
TEMPLATE_CONTENT_END,
null,
[[0, 10]] as Range[],
{
templateInfos: [[0, 10]] as Range[],
},
],
[
"Custom Tag Raw Content",
"custom-tag-raw-content.html",
CUSTOM_TAG_RAW_CONTENT,
null,
{
rawContentTags: ["markdown"],
},
],
])(
"%s",
Expand All @@ -178,18 +207,14 @@ describe("tokenize", () => {
input,
output,
process: null | ((html: string) => string) = null,
ranges: null | TemplateInfo[]
options: TokenizeOptions | null = null
) => {
const inputPath = path.join(__dirname, "__input__", input);
let html = fs.readFileSync(inputPath, "utf-8");
if (process) {
html = process(html);
}
const { tokens } = tokenize(
html,
defaultTokenAdapter,
ranges ?? undefined
);
const { tokens } = tokenize(html, defaultTokenAdapter, options || {});
expect(tokens).toEqual(output);
}
);
Expand Down
Loading
Loading