Skip to content

Commit 801b1bb

Browse files
committed
feat(chat): Remove semi-markdown, sanitize markdown to display unknown tags
1 parent 20efd91 commit 801b1bb

File tree

3 files changed

+93
-44
lines changed

3 files changed

+93
-44
lines changed

js/chat/chat.ts

Lines changed: 70 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import { unsafeHTML } from "lit-html/directives/unsafe-html.js";
33
import { property } from "lit/decorators.js";
44

55
import ClipboardJS from "clipboard";
6-
import { sanitize } from "dompurify";
6+
import DOMPurify from "dompurify";
77
import hljs from "highlight.js/lib/common";
88
import { Renderer, parse } from "marked";
99

@@ -85,26 +85,75 @@ const requestScroll = (el: HTMLElement, cancelIfScrolledUp = false) => {
8585
// because it's confusing if the user is using tag-like syntax to demarcate parts of
8686
// their prompt for other reasons (like <User>/<Assistant> for providing examples to the
8787
// chat model), and those tags simply vanish.
88-
const rendererEscapeHTML = new Renderer();
89-
rendererEscapeHTML.html = (html: string) =>
90-
html
91-
.replaceAll("&", "&amp;")
92-
.replaceAll("<", "&lt;")
93-
.replaceAll(">", "&gt;")
94-
.replaceAll('"', "&quot;")
95-
.replaceAll("'", "&#039;");
96-
const markedEscapeOpts = { renderer: rendererEscapeHTML };
97-
98-
function contentToHTML(
99-
content: string,
100-
content_type: ContentType | "semi-markdown"
101-
) {
102-
if (content_type === "markdown") {
103-
return unsafeHTML(sanitize(parse(content) as string));
104-
} else if (content_type === "semi-markdown") {
105-
return unsafeHTML(sanitize(parse(content, markedEscapeOpts) as string));
88+
const markdownDOMPurify = DOMPurify();
89+
markdownDOMPurify.addHook("beforeSanitizeAttributes", (node) => {
90+
// Escape & in text content
91+
if (node.nodeName && node.nodeName === "#text" && node.textContent) {
92+
node.textContent = node.textContent.replaceAll("&", "&amp;");
93+
}
94+
});
95+
96+
// From https://github.com/bevacqua/insane#defaults
97+
const allowedTags = [
98+
"a",
99+
"article",
100+
"b",
101+
"blockquote",
102+
"br",
103+
"caption",
104+
"code",
105+
"del",
106+
"details",
107+
"div",
108+
"em",
109+
"h1",
110+
"h2",
111+
"h3",
112+
"h4",
113+
"h5",
114+
"h6",
115+
"hr",
116+
"i",
117+
"img",
118+
"ins",
119+
"kbd",
120+
"li",
121+
"main",
122+
"ol",
123+
"p",
124+
"pre",
125+
"section",
126+
"span",
127+
"strike",
128+
"strong",
129+
"sub",
130+
"summary",
131+
"sup",
132+
"table",
133+
"tbody",
134+
"td",
135+
"th",
136+
"thead",
137+
"tr",
138+
"u",
139+
"ul",
140+
];
141+
const escapeUnknownTags = (html: string): string =>
142+
html.replace(
143+
/<(\/?)([^ >]+)([^>]*)>/g,
144+
(_, slash = "", tag = "", extra = "") =>
145+
allowedTags.includes(tag.toLowerCase())
146+
? `<${slash + tag + extra}>`
147+
: `&lt;${slash + tag + extra}&gt;`
148+
);
149+
150+
function contentToHTML(content: string, content_type: ContentType) {
151+
if (content_type == "markdown") {
152+
return unsafeHTML(
153+
markdownDOMPurify.sanitize(escapeUnknownTags(parse(content) as string))
154+
);
106155
} else if (content_type === "html") {
107-
return unsafeHTML(sanitize(content));
156+
return unsafeHTML(DOMPurify.sanitize(content));
108157
} else if (content_type === "text") {
109158
return content;
110159
} else {
@@ -188,7 +237,7 @@ class ChatMessage extends LightElement {
188237

189238
class ChatUserMessage extends LightElement {
190239
@property() content = "...";
191-
@property() content_type: ContentType | "semi-markdown" = "semi-markdown";
240+
@property() content_type: ContentType = "markdown";
192241

193242
render(): ReturnType<LitElement["render"]> {
194243
return contentToHTML(this.content, this.content_type);

0 commit comments

Comments
 (0)