@@ -47,21 +47,65 @@ export function mdSerialize(model: EditorModel) {
4747
4848export function htmlSerializeIfNeeded ( model : EditorModel , { forceHTML = false } = { } ) {
4949 let md = mdSerialize ( model ) ;
50+ // copy of raw input to remove unwanted math later
51+ const orig = md ;
5052
5153 if ( SettingsStore . getValue ( "feature_latex_maths" ) ) {
52- const displayPattern = ( SdkConfig . get ( ) [ 'latex_maths_delims' ] || { } ) [ 'display_pattern' ] ||
53- "\\$\\$(([^$]|\\\\\\$)*)\\$\\$" ;
54- const inlinePattern = ( SdkConfig . get ( ) [ 'latex_maths_delims' ] || { } ) [ 'inline_pattern' ] ||
55- "\\$(([^$]|\\\\\\$)*)\\$" ;
56-
57- md = md . replace ( RegExp ( displayPattern , "gm" ) , function ( m , p1 ) {
58- const p1e = AllHtmlEntities . encode ( p1 ) ;
59- return `<div data-mx-maths="${ p1e } ">\n\n</div>\n\n` ;
60- } ) ;
61-
62- md = md . replace ( RegExp ( inlinePattern , "gm" ) , function ( m , p1 ) {
63- const p1e = AllHtmlEntities . encode ( p1 ) ;
64- return `<span data-mx-maths="${ p1e } "></span>` ;
54+ const patternNames = [ 'tex' , 'latex' ] ;
55+ const patternTypes = [ 'display' , 'inline' ] ;
56+ const patternDefaults = {
57+ "tex" : {
58+ // detect math with tex delimiters, inline: $...$, display $$...$$
59+ // preferably use negative lookbehinds, not supported in all major browsers:
60+ // const displayPattern = "^(?<!\\\\)\\$\\$(?![ \\t])(([^$]|\\\\\\$)+?)\\$\\$$";
61+ // const inlinePattern = "(?:^|\\s)(?<!\\\\)\\$(?!\\s)(([^$]|\\\\\\$)+?)(?<!\\\\|\\s)\\$";
62+
63+ // conditions for display math detection $$...$$:
64+ // - pattern starts at beginning of line or is not prefixed with backslash or dollar
65+ // - left delimiter ($$) is not escaped by backslash
66+ "display" : "(^|[^\\\\$])\\$\\$(([^$]|\\\\\\$)+?)\\$\\$" ,
67+
68+ // conditions for inline math detection $...$:
69+ // - pattern starts at beginning of line, follows whitespace character or punctuation
70+ // - pattern is on a single line
71+ // - left and right delimiters ($) are not escaped by backslashes
72+ // - left delimiter is not followed by whitespace character
73+ // - right delimiter is not prefixed with whitespace character
74+ "inline" :
75+ "(^|\\s|[.,!?:;])(?!\\\\)\\$(?!\\s)(([^$\\n]|\\\\\\$)*([^\\\\\\s\\$]|\\\\\\$)(?:\\\\\\$)?)\\$" ,
76+ } ,
77+ "latex" : {
78+ // detect math with latex delimiters, inline: \(...\), display \[...\]
79+
80+ // conditions for display math detection \[...\]:
81+ // - pattern starts at beginning of line or is not prefixed with backslash
82+ // - pattern is not empty
83+ "display" : "(^|[^\\\\])\\\\\\[(?!\\\\\\])(.*?)\\\\\\]" ,
84+
85+ // conditions for inline math detection \(...\):
86+ // - pattern starts at beginning of line or is not prefixed with backslash
87+ // - pattern is not empty
88+ "inline" : "(^|[^\\\\])\\\\\\((?!\\\\\\))(.*?)\\\\\\)" ,
89+ } ,
90+ } ;
91+
92+ patternNames . forEach ( function ( patternName ) {
93+ patternTypes . forEach ( function ( patternType ) {
94+ // get the regex replace pattern from config or use the default
95+ const pattern = ( ( ( SdkConfig . get ( ) [ "latex_maths_delims" ] ||
96+ { } ) [ patternType ] || { } ) [ "pattern" ] || { } ) [ patternName ] ||
97+ patternDefaults [ patternName ] [ patternType ] ;
98+
99+ md = md . replace ( RegExp ( pattern , "gms" ) , function ( m , p1 , p2 ) {
100+ const p2e = AllHtmlEntities . encode ( p2 ) ;
101+ switch ( patternType ) {
102+ case "display" :
103+ return `${ p1 } <div data-mx-maths="${ p2e } ">\n\n</div>\n\n` ;
104+ case "inline" :
105+ return `${ p1 } <span data-mx-maths="${ p2e } "></span>` ;
106+ }
107+ } ) ;
108+ } ) ;
65109 } ) ;
66110
67111 // make sure div tags always start on a new line, otherwise it will confuse
@@ -73,15 +117,29 @@ export function htmlSerializeIfNeeded(model: EditorModel, {forceHTML = false} =
73117 if ( ! parser . isPlainText ( ) || forceHTML ) {
74118 // feed Markdown output to HTML parser
75119 const phtml = cheerio . load ( parser . toHTML ( ) ,
76- { _useHtmlParser2 : true , decodeEntities : false } )
77-
78- // add fallback output for latex math, which should not be interpreted as markdown
79- phtml ( 'div, span' ) . each ( function ( i , e ) {
80- const tex = phtml ( e ) . attr ( 'data-mx-maths' )
81- if ( tex ) {
82- phtml ( e ) . html ( `<code>${ tex } </code>` )
83- }
84- } ) ;
120+ { _useHtmlParser2 : true , decodeEntities : false } ) ;
121+
122+ if ( SettingsStore . getValue ( "feature_latex_maths" ) ) {
123+ // original Markdown without LaTeX replacements
124+ const parserOrig = new Markdown ( orig ) ;
125+ const phtmlOrig = cheerio . load ( parserOrig . toHTML ( ) ,
126+ { _useHtmlParser2 : true , decodeEntities : false } ) ;
127+
128+ // since maths delimiters are handled before Markdown,
129+ // code blocks could contain mangled content.
130+ // replace code blocks with original content
131+ phtmlOrig ( 'code' ) . each ( function ( i ) {
132+ phtml ( 'code' ) . eq ( i ) . text ( phtmlOrig ( 'code' ) . eq ( i ) . text ( ) ) ;
133+ } ) ;
134+
135+ // add fallback output for latex math, which should not be interpreted as markdown
136+ phtml ( 'div, span' ) . each ( function ( i , e ) {
137+ const tex = phtml ( e ) . attr ( 'data-mx-maths' )
138+ if ( tex ) {
139+ phtml ( e ) . html ( `<code>${ tex } </code>` )
140+ }
141+ } ) ;
142+ }
85143 return phtml . html ( ) ;
86144 }
87145 // ensure removal of escape backslashes in non-Markdown messages
0 commit comments