diff --git a/README.md b/README.md
index 0ac087e..9dd304d 100644
--- a/README.md
+++ b/README.md
@@ -68,6 +68,7 @@ parse(html: string, options?: Options): ParseResult;
- `html`: HTML string to parse.
- `options (optional)`
- `tokenAdapter`: The adapter option for changing tokens information.
+ - `rawContentTags` (string[]) : Specifies tag names whose child contents should be treated as raw text, meaning the parser will not interpret characters like < and > as HTML syntax inside these tags.
**Returns**
diff --git a/src/constants/tokenizer-context-types.ts b/src/constants/tokenizer-context-types.ts
index 23f0ca7..2031e9b 100644
--- a/src/constants/tokenizer-context-types.ts
+++ b/src/constants/tokenizer-context-types.ts
@@ -18,4 +18,5 @@ export enum TokenizerContextTypes {
CommentOpen = "CommentOpen",
CommentContent = "CommentContent",
CommentClose = "CommentClose",
+ CustomTagRawContent = "CustomTagRawContent",
}
diff --git a/src/parser/parse.ts b/src/parser/parse.ts
index 35b9491..e487cbe 100644
--- a/src/parser/parse.ts
+++ b/src/parser/parse.ts
@@ -7,7 +7,7 @@ import { Options } from "../types/parse";
export function parse(html: string, options?: Options): ParseResult {
const tokenAdapter = (options && options.tokenAdapter) || defaultTokenAdapter;
- const { tokens } = tokenize(html, tokenAdapter, options?.templateInfos);
+ const { tokens } = tokenize(html, tokenAdapter, options);
const { ast } = constructTree(tokens);
return {
ast: clearParent(ast),
diff --git a/src/tokenizer/__tests__/__input__/custom-tag-raw-content.html b/src/tokenizer/__tests__/__input__/custom-tag-raw-content.html
new file mode 100644
index 0000000..2addf7f
--- /dev/null
+++ b/src/tokenizer/__tests__/__input__/custom-tag-raw-content.html
@@ -0,0 +1,17 @@
+
+# Hello, world!
+
+```cpp{4-6,9}
+#include
+
+class Example {
+ Example() {
+ std::cout << "Hello, world!" << std::endl;
+ }
+
+ Example(std::string name) {
+ std::cout << "Hello, " << name << std::endl;
+ }
+};
+```
+
\ No newline at end of file
diff --git a/src/tokenizer/__tests__/__output__/custom-tag-raw-content.ts b/src/tokenizer/__tests__/__output__/custom-tag-raw-content.ts
new file mode 100644
index 0000000..6264a5b
--- /dev/null
+++ b/src/tokenizer/__tests__/__output__/custom-tag-raw-content.ts
@@ -0,0 +1,161 @@
+import { TokenTypes } from "../../../constants";
+import { AnyToken } from "../../../types";
+
+const OUTPUT: AnyToken[] = [
+ {
+ type: TokenTypes.OpenTagStart,
+ value: "",
+ range: [18, 19],
+ loc: {
+ start: {
+ column: 18,
+ line: 1,
+ },
+ end: {
+ line: 1,
+ column: 19,
+ },
+ },
+ },
+ {
+ type: TokenTypes.Text,
+ value: `
+# Hello, world!
+
+\`\`\`cpp{4-6,9}
+#include
+
+class Example {
+ Example() {
+ std::cout << "Hello, world!" << std::endl;
+ }
+
+ Example(std::string name) {
+ std::cout << "Hello, " << name << std::endl;
+ }
+};
+\`\`\`
+`,
+ range: [19, 260],
+ loc: {
+ start: {
+ column: 19,
+ line: 1,
+ },
+ end: {
+ line: 17,
+ column: 0,
+ },
+ },
+ parts: [],
+ },
+ {
+ type: TokenTypes.CloseTag,
+ value: "",
+ range: [260, 271],
+ loc: {
+ start: {
+ column: 0,
+ line: 17,
+ },
+ end: {
+ line: 17,
+ column: 11,
+ },
+ },
+ },
+];
+
+export default OUTPUT;
diff --git a/src/tokenizer/__tests__/tokenize.spec.ts b/src/tokenizer/__tests__/tokenize.spec.ts
index 853a8bb..91e02a2 100644
--- a/src/tokenizer/__tests__/tokenize.spec.ts
+++ b/src/tokenizer/__tests__/tokenize.spec.ts
@@ -1,6 +1,6 @@
import * as fs from "fs";
import * as path from "path";
-import { tokenize } from "../tokenize";
+import { tokenize, TokenizeOptions } from "../tokenize";
import OPENING_CLOSING_TEXT from "./__output__/opening-closing-text";
import NESTED_TAGS from "./__output__/nested-tags";
import COMMENTS from "./__output__/comments";
@@ -30,7 +30,7 @@ import TEMPLATE_COMMENT from "./__output__/templates-comment";
import TEMPLATE_SCRIPT_CONTENT from "./__output__/templates-script-content";
import TEMPLATE_STYLE_CONTENT from "./__output__/templates-style-content";
import TEMPLATE_CONTENT_END from "./__output__/templates-content-end";
-
+import CUSTOM_TAG_RAW_CONTENT from "./__output__/custom-tag-raw-content";
import { defaultTokenAdapter } from "../../token-adapter";
import { Range, TemplateInfo } from "../../types";
@@ -98,78 +98,107 @@ describe("tokenize", () => {
"templates-attributes-key.html",
TEMPLATE_ATTRIBUTES_KEY,
null,
- [[5, 11]] as Range[],
+ {
+ templateInfos: [[5, 11]] as Range[],
+ },
],
[
"Template Attributes Key (wrapper)",
"templates-attributes-key.html",
TEMPLATE_ATTRIBUTES_KEY_WRAPPER,
null,
- [
- {
- open: [5, 7],
- close: [10, 11],
- },
- ] as TemplateInfo[],
+ {
+ templateInfos: [
+ {
+ open: [5, 7],
+ close: [10, 11],
+ },
+ ] as TemplateInfo[],
+ },
],
[
"Template Attributes Value Bare",
"templates-attributes-value-bare.html",
TEMPLATE_ATTRIBUTES_VALUE_BARE,
null,
- [[8, 13]] as Range[],
+ {
+ templateInfos: [[8, 13]] as Range[],
+ },
],
[
"Template Attributes Value Wrapped",
"templates-attributes-value-wrapped.html",
TEMPLATE_ATTRIBUTES_VALUE_WRAPPED,
null,
- [[9, 14]] as Range[],
+ {
+ templateInfos: [[9, 14]] as Range[],
+ },
],
[
"Template Attributes Value Wrapped 2",
"templates-attributes-value-wrapped-2.html",
TEMPLATE_ATTRIBUTES_VALUE_WRAPPED_2,
null,
- [
- [16, 22],
- [23, 31],
- ] as Range[],
+ {
+ templateInfos: [
+ [16, 22],
+ [23, 31],
+ ] as Range[],
+ },
],
[
"Templates Data",
"templates-data.html",
TEMPLATE_DATA,
null,
- [[5, 16]] as Range[],
+ {
+ templateInfos: [[5, 16]] as Range[],
+ },
],
[
"Templates Comment",
"templates-comment.html",
TEMPLATE_COMMENT,
null,
- [[4, 14]] as Range[],
+ {
+ templateInfos: [[4, 14]] as Range[],
+ },
],
[
"Templates Script Content",
"templates-script-content.html",
TEMPLATE_SCRIPT_CONTENT,
null,
- [[8, 18]] as Range[],
+ {
+ templateInfos: [[8, 18]] as Range[],
+ },
],
[
"Templates Style Content",
"templates-style-content.html",
TEMPLATE_STYLE_CONTENT,
null,
- [[7, 17]] as Range[],
+ {
+ templateInfos: [[7, 17]] as Range[],
+ },
],
[
"Templates Content End",
"templates-content-end.html",
TEMPLATE_CONTENT_END,
null,
- [[0, 10]] as Range[],
+ {
+ templateInfos: [[0, 10]] as Range[],
+ },
+ ],
+ [
+ "Custom Tag Raw Content",
+ "custom-tag-raw-content.html",
+ CUSTOM_TAG_RAW_CONTENT,
+ null,
+ {
+ rawContentTags: ["markdown"],
+ },
],
])(
"%s",
@@ -178,18 +207,14 @@ describe("tokenize", () => {
input,
output,
process: null | ((html: string) => string) = null,
- ranges: null | TemplateInfo[]
+ options: TokenizeOptions | null = null
) => {
const inputPath = path.join(__dirname, "__input__", input);
let html = fs.readFileSync(inputPath, "utf-8");
if (process) {
html = process(html);
}
- const { tokens } = tokenize(
- html,
- defaultTokenAdapter,
- ranges ?? undefined
- );
+ const { tokens } = tokenize(html, defaultTokenAdapter, options || {});
expect(tokens).toEqual(output);
}
);
diff --git a/src/tokenizer/handlers/custom-tag-raw-content.ts b/src/tokenizer/handlers/custom-tag-raw-content.ts
new file mode 100644
index 0000000..9ad3dea
--- /dev/null
+++ b/src/tokenizer/handlers/custom-tag-raw-content.ts
@@ -0,0 +1,63 @@
+import {
+ TokenizerContextTypes,
+ INCOMPLETE_CLOSING_TAG_PATTERN,
+ TokenTypes,
+} from "../../constants";
+import { calculateTokenPosition, createParts } from "../../utils";
+import { Range, TokenizerState } from "../../types";
+import { CharsBuffer } from "../chars-buffer";
+
+export function parse(chars: CharsBuffer, state: TokenizerState) {
+ if (
+ chars.value() === "<" ||
+ chars.value() === "" ||
+ INCOMPLETE_CLOSING_TAG_PATTERN.test(chars.value())
+ ) {
+ state.sourceCode.next();
+ return;
+ }
+ const regex = new RegExp(
+ "" +
+ state.contextParams[TokenizerContextTypes.CustomTagRawContent]?.tagName +
+ "\\s*>"
+ );
+
+ if (regex.test(chars.value())) {
+ return parseClosingCustomTag(state);
+ }
+
+ state.accumulatedContent.concatBuffer(state.decisionBuffer);
+ state.decisionBuffer.clear();
+ state.sourceCode.next();
+}
+
+function parseClosingCustomTag(state: TokenizerState) {
+ if (state.accumulatedContent.value() !== "") {
+ const position = calculateTokenPosition(state, { keepBuffer: false });
+ state.tokens.push({
+ type: TokenTypes.Text,
+ value: state.accumulatedContent.value(),
+ range: position.range,
+ loc: position.loc,
+ parts: createParts(state, TokenTypes.Text),
+ });
+ }
+
+ const range: Range = [
+ state.sourceCode.index() - (state.decisionBuffer.length() - 1),
+ state.sourceCode.index() + 1,
+ ];
+
+ state.tokens.push({
+ type: TokenTypes.CloseTag,
+ value: state.decisionBuffer.value(),
+ range,
+ loc: state.sourceCode.getLocationOf(range),
+ });
+
+ state.accumulatedContent.clear();
+ state.decisionBuffer.clear();
+ state.contextParams[TokenizerContextTypes.CustomTagRawContent] = undefined;
+ state.currentContext = TokenizerContextTypes.Data;
+ state.sourceCode.next();
+}
diff --git a/src/tokenizer/handlers/index.ts b/src/tokenizer/handlers/index.ts
index cbd182b..5156b86 100644
--- a/src/tokenizer/handlers/index.ts
+++ b/src/tokenizer/handlers/index.ts
@@ -17,6 +17,7 @@ export * as openTagEnd from "./open-tag-end";
export * as openTagStart from "./open-tag-start";
export * as scriptTagContent from "./script-tag-content";
export * as styleTagContent from "./style-tag-content";
+export * as customTagRawContent from "./custom-tag-raw-content";
export const noop: TokenizeHandler = {
parse: () => void 0,
};
diff --git a/src/tokenizer/handlers/open-tag-end.ts b/src/tokenizer/handlers/open-tag-end.ts
index 2744b6c..deae96f 100644
--- a/src/tokenizer/handlers/open-tag-end.ts
+++ b/src/tokenizer/handlers/open-tag-end.ts
@@ -39,8 +39,16 @@ function parseClosingCornerBrace(state: TokenizerState) {
state.accumulatedContent.clear();
state.decisionBuffer.clear();
- state.currentContext =
- contextsMap[tagName || "default"] || contextsMap["default"];
+ if (state.rawContentTags?.includes(tagName)) {
+ state.contextParams[TokenizerContextTypes.CustomTagRawContent] = {
+ tagName,
+ };
+ state.currentContext = TokenizerContextTypes.CustomTagRawContent;
+ } else {
+ state.currentContext =
+ contextsMap[tagName || "default"] || contextsMap["default"];
+ }
+
state.sourceCode.next();
state.contextParams[TokenizerContextTypes.OpenTagEnd] = undefined;
diff --git a/src/tokenizer/tokenize.ts b/src/tokenizer/tokenize.ts
index d7bc264..27c88e3 100644
--- a/src/tokenizer/tokenize.ts
+++ b/src/tokenizer/tokenize.ts
@@ -18,6 +18,7 @@ import {
styleTagContent,
attributeValue,
attributeValueWrapped,
+ customTagRawContent,
noop,
} from "./handlers";
import { TokenizeHandler } from "../types";
@@ -42,6 +43,7 @@ const contextHandlers: Record = {
[TokenizerContextTypes.DoctypeAttributeWrapped]: doctypeAttributeWrapped,
[TokenizerContextTypes.DoctypeAttributeBare]: doctypeAttributeBare,
[TokenizerContextTypes.CommentContent]: commentContent,
+ [TokenizerContextTypes.CustomTagRawContent]: customTagRawContent,
[TokenizerContextTypes.CommentOpen]: noop,
[TokenizerContextTypes.CommentClose]: noop,
};
@@ -61,21 +63,30 @@ function tokenizeChars(state: TokenizerState) {
}
}
+export type TokenizeOptions = {
+ templateInfos?: TemplateInfo[];
+ rawContentTags?: string[];
+};
+
export function tokenize(
source = "",
tokenAdapter: TokenAdapter,
- templateInfos?: TemplateInfo[]
+ options: TokenizeOptions = {
+ templateInfos: undefined,
+ rawContentTags: undefined,
+ }
): { state: TokenizerState; tokens: AnyToken[] } {
const tokens: AnyToken[] = [];
const state: TokenizerState = {
currentContext: TokenizerContextTypes.Data,
contextParams: {},
- mode: templateInfos ? "template" : "default",
- templateInfos: templateInfos || [],
+ mode: options.templateInfos ? "template" : "default",
+ templateInfos: options.templateInfos || [],
decisionBuffer: new CharsBuffer(),
accumulatedContent: new CharsBuffer(),
tokenAdapter,
- sourceCode: new SourceCode(source, templateInfos || []),
+ rawContentTags: options.rawContentTags,
+ sourceCode: new SourceCode(source, options.templateInfos || []),
tokens: {
push(token: AnyToken) {
tokens.push({
diff --git a/src/tree-constructor/__tests__/construct-tree.spec.ts b/src/tree-constructor/__tests__/construct-tree.spec.ts
index e9f8d0b..49b34cd 100644
--- a/src/tree-constructor/__tests__/construct-tree.spec.ts
+++ b/src/tree-constructor/__tests__/construct-tree.spec.ts
@@ -43,7 +43,6 @@ import TEMPLATE_COMMENT from "../../tokenizer/__tests__/__output__/templates-com
import TEMPLATE_SCRIPT_CONTENT from "../../tokenizer/__tests__/__output__/templates-script-content";
import TEMPLATE_STYLE_CONTENT from "../../tokenizer/__tests__/__output__/templates-style-content";
import TEMPLATE_CONTENT_END from "../../tokenizer/__tests__/__output__/templates-content-end";
-
import { clearParent } from "../../utils";
import { toMatchFile } from "jest-file-snapshot";
diff --git a/src/types/parse.ts b/src/types/parse.ts
index d0925f1..8a71546 100644
--- a/src/types/parse.ts
+++ b/src/types/parse.ts
@@ -10,5 +10,6 @@ export type ParseResult = {
export type Options = {
tokenAdapter?: TokenAdapter;
+ rawContentTags?: string[];
templateInfos?: TemplateInfo[];
};
diff --git a/src/types/tokenizer-state.ts b/src/types/tokenizer-state.ts
index 68f87ab..ded3b82 100644
--- a/src/types/tokenizer-state.ts
+++ b/src/types/tokenizer-state.ts
@@ -18,6 +18,9 @@ type ContextParams = {
[TokenizerContextTypes.DoctypeAttributeWrapped]?: {
wrapper: string;
};
+ [TokenizerContextTypes.CustomTagRawContent]?: {
+ tagName: string;
+ };
};
export type TokenizerState = {
@@ -27,6 +30,7 @@ export type TokenizerState = {
decisionBuffer: CharsBuffer;
accumulatedContent: CharsBuffer;
templateInfos: TemplateInfo[];
+ rawContentTags?: string[];
tokenAdapter: TokenAdapter;
sourceCode: SourceCode;
tokens: {