@@ -3,7 +3,7 @@ import { unsafeHTML } from "lit-html/directives/unsafe-html.js";
33import { property } from "lit/decorators.js" ;
44
55import ClipboardJS from "clipboard" ;
6- import { sanitize } from "dompurify" ;
6+ import DOMPurify from "dompurify" ;
77import hljs from "highlight.js/lib/common" ;
88import { Renderer , parse } from "marked" ;
99
@@ -85,26 +85,75 @@ const requestScroll = (el: HTMLElement, cancelIfScrolledUp = false) => {
8585// because it's confusing if the user is using tag-like syntax to demarcate parts of
8686// their prompt for other reasons (like <User>/<Assistant> for providing examples to the
8787// chat model), and those tags simply vanish.
88- const rendererEscapeHTML = new Renderer ( ) ;
89- rendererEscapeHTML . html = ( html : string ) =>
90- html
91- . replaceAll ( "&" , "&" )
92- . replaceAll ( "<" , "<" )
93- . replaceAll ( ">" , ">" )
94- . replaceAll ( '"' , """ )
95- . replaceAll ( "'" , "'" ) ;
96- const markedEscapeOpts = { renderer : rendererEscapeHTML } ;
97-
98- function contentToHTML (
99- content : string ,
100- content_type : ContentType | "semi-markdown"
101- ) {
102- if ( content_type === "markdown" ) {
103- return unsafeHTML ( sanitize ( parse ( content ) as string ) ) ;
104- } else if ( content_type === "semi-markdown" ) {
105- return unsafeHTML ( sanitize ( parse ( content , markedEscapeOpts ) as string ) ) ;
88+ const markdownDOMPurify = DOMPurify ( ) ;
89+ markdownDOMPurify . addHook ( "beforeSanitizeAttributes" , ( node ) => {
90+ // Escape & in text content
91+ if ( node . nodeName && node . nodeName === "#text" && node . textContent ) {
92+ node . textContent = node . textContent . replaceAll ( "&" , "&" ) ;
93+ }
94+ } ) ;
95+
96+ // From https://github.com/bevacqua/insane#defaults
97+ const allowedTags = [
98+ "a" ,
99+ "article" ,
100+ "b" ,
101+ "blockquote" ,
102+ "br" ,
103+ "caption" ,
104+ "code" ,
105+ "del" ,
106+ "details" ,
107+ "div" ,
108+ "em" ,
109+ "h1" ,
110+ "h2" ,
111+ "h3" ,
112+ "h4" ,
113+ "h5" ,
114+ "h6" ,
115+ "hr" ,
116+ "i" ,
117+ "img" ,
118+ "ins" ,
119+ "kbd" ,
120+ "li" ,
121+ "main" ,
122+ "ol" ,
123+ "p" ,
124+ "pre" ,
125+ "section" ,
126+ "span" ,
127+ "strike" ,
128+ "strong" ,
129+ "sub" ,
130+ "summary" ,
131+ "sup" ,
132+ "table" ,
133+ "tbody" ,
134+ "td" ,
135+ "th" ,
136+ "thead" ,
137+ "tr" ,
138+ "u" ,
139+ "ul" ,
140+ ] ;
141+ const escapeUnknownTags = ( html : string ) : string =>
142+ html . replace (
143+ / < ( \/ ? ) ( [ ^ > ] + ) ( [ ^ > ] * ) > / g,
144+ ( _ , slash = "" , tag = "" , extra = "" ) =>
145+ allowedTags . includes ( tag . toLowerCase ( ) )
146+ ? `<${ slash + tag + extra } >`
147+ : `<${ slash + tag + extra } >`
148+ ) ;
149+
150+ function contentToHTML ( content : string , content_type : ContentType ) {
151+ if ( content_type == "markdown" ) {
152+ return unsafeHTML (
153+ markdownDOMPurify . sanitize ( escapeUnknownTags ( parse ( content ) as string ) )
154+ ) ;
106155 } else if ( content_type === "html" ) {
107- return unsafeHTML ( sanitize ( content ) ) ;
156+ return unsafeHTML ( DOMPurify . sanitize ( content ) ) ;
108157 } else if ( content_type === "text" ) {
109158 return content ;
110159 } else {
@@ -188,7 +237,7 @@ class ChatMessage extends LightElement {
188237
189238class ChatUserMessage extends LightElement {
190239 @property ( ) content = "..." ;
191- @property ( ) content_type : ContentType | "semi-markdown" = "semi- markdown" ;
240+ @property ( ) content_type : ContentType = "markdown" ;
192241
193242 render ( ) : ReturnType < LitElement [ "render" ] > {
194243 return contentToHTML ( this . content , this . content_type ) ;
0 commit comments