1+ import {
2+ CODE_BLOCK_REGEXP ,
3+ LATEX_MATH_AND_CODE_PATTERN ,
4+ MHCHEM_PATTERN_MAP
5+ } from '$lib/constants/latex-protection' ;
6+
17/**
28 * Replaces inline LaTeX expressions enclosed in `$...$` with placeholders, avoiding dollar signs
39 * that appear to be part of monetary values or identifiers.
@@ -43,7 +49,8 @@ export function maskInlineLaTeX(content: string, latexExpressions: string[]): st
4349
4450 const charBeforeOpen = openDollarIndex > 0 ? line [ openDollarIndex - 1 ] : '' ;
4551 const charAfterOpen = line [ openDollarIndex + 1 ] ;
46- const charBeforeClose = openDollarIndex + 1 < closeDollarIndex ? line [ closeDollarIndex - 1 ] : '' ;
52+ const charBeforeClose =
53+ openDollarIndex + 1 < closeDollarIndex ? line [ closeDollarIndex - 1 ] : '' ;
4754 const charAfterClose = closeDollarIndex + 1 < line . length ? line [ closeDollarIndex + 1 ] : '' ;
4855
4956 let shouldSkipAsNonLatex = false ;
@@ -87,14 +94,8 @@ export function maskInlineLaTeX(content: string, latexExpressions: string[]): st
8794}
8895
8996function escapeBrackets ( text : string ) : string {
90- // Using the look‑behind pattern `(?<!\\)` we skip matches
91- // that are preceded by a backslash, e.g.
92- // `Definitions\\(also called macros)` (title of chapter 20 in The TeXbook)
93- // or `\\[4pt]`.
94- const pattern = / ( ` ` ` [ \S \s ] * ?` ` ` | ` .* ?` ) | (?< ! \\ ) \\ \[ ( [ \S \s ] * ?[ ^ \\ ] ) \\ ] | (?< ! \\ ) \\ \( ( .* ?) \\ \) / g;
95-
9697 return text . replace (
97- pattern ,
98+ LATEX_MATH_AND_CODE_PATTERN ,
9899 (
99100 match : string ,
100101 codeBlock : string | undefined ,
@@ -116,14 +117,12 @@ function escapeBrackets(text: string): string {
116117
117118// Escape $\\ce{...} → $\\ce{...} but with proper handling
118119function escapeMhchem ( text : string ) : string {
119- return text . replaceAll ( '$\\ce{' , '$\\\\ce{' ) . replaceAll ( '$\\pu{' , '$\\\\pu{' ) ;
120+ return MHCHEM_PATTERN_MAP . reduce ( ( result , [ pattern , replacement ] ) => {
121+ return result . replace ( pattern , replacement ) ;
122+ } , text ) ;
120123}
121124
122- // See also:
123- // https://github.com/danny-avila/LibreChat/blob/main/client/src/utils/latex.ts
124-
125- // Protect code blocks: ```...``` and `...`
126- const codeBlockRegex = / ( ` ` ` [ \s \S ] * ?` ` ` | ` [ ^ ` \n ] + ` ) / g;
125+ const doEscapeMhchem = false ;
127126
128127/**
129128 * Preprocesses markdown content to safely handle LaTeX math expressions while protecting
@@ -145,10 +144,13 @@ const codeBlockRegex = /(```[\s\S]*?```|`[^`\n]+`)/g;
145144 * // → "Price: $10. The equation is $x^2$."
146145 */
147146export function preprocessLaTeX ( content : string ) : string {
147+ // See also:
148+ // https://github.com/danny-avila/LibreChat/blob/main/client/src/utils/latex.ts
149+
148150 // Step 1: Protect code blocks
149151 const codeBlocks : string [ ] = [ ] ;
150152
151- content = content . replace ( codeBlockRegex , ( match ) => {
153+ content = content . replace ( CODE_BLOCK_REGEXP , ( match ) => {
152154 codeBlocks . push ( match ) ;
153155
154156 return `<<CODE_BLOCK_${ codeBlocks . length - 1 } >>` ;
@@ -207,7 +209,7 @@ export function preprocessLaTeX(content: string): string {
207209 // Step 6: Apply additional escaping functions (brackets and mhchem)
208210 content = escapeBrackets ( content ) ;
209211
210- if ( content . includes ( '\\ce{' ) || content . includes ( '\\pu{' ) ) {
212+ if ( doEscapeMhchem && ( content . includes ( '\\ce{' ) || content . includes ( '\\pu{' ) ) ) {
211213 content = escapeMhchem ( content ) ;
212214 }
213215
0 commit comments