|
154 | 154 | return mutated ? tempDiv.innerHTML : html; |
155 | 155 | } |
156 | 156 |
|
| 157 | + // See also: |
| 158 | + // https://github.com/danny-avila/LibreChat/blob/main/client/src/utils/latex.ts |
| 159 | +
|
| 160 | + // Protect code blocks: ```...``` and `...` |
| 161 | + const codeBlockRegex = /(```[\s\S]*?```|`[^`\n]+`)/g; |
| 162 | +
|
| 163 | + export function preprocessLaTeX(content: string): string { |
| 164 | + // Step 1: Protect code blocks |
| 165 | + const codeBlocks: string[] = []; |
| 166 | + content = content.replace(codeBlockRegex, (match) => { |
| 167 | + codeBlocks.push(match); |
| 168 | + return `<<CODE_BLOCK_${codeBlocks.length - 1}>>`; |
| 169 | + }); |
| 170 | +
|
| 171 | + // Step 2: Protect existing LaTeX expressions |
| 172 | + const latexExpressions: string[] = []; |
| 173 | +
|
| 174 | + // Match \(...\), \[...\], $$...$$ and protect them |
| 175 | + content = content.replace(/(\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]|\\\(.*?\\\))/g, (match) => { |
| 176 | + latexExpressions.push(match); |
| 177 | + return `<<LATEX_${latexExpressions.length - 1}>>`; |
| 178 | + }); |
| 179 | +
|
| 180 | + // Protect inline $...$ but NOT if it looks like money (e.g., $10, $3.99) |
| 181 | + content = protectLaTeXButNotMoney(content, latexExpressions); |
| 182 | +
|
| 183 | + // Step 3: Escape standalone $ before digits (currency like $5 → \$5) |
| 184 | + // (Now that inline math is protected, this will only escape dollars not already protected) |
| 185 | + content = content.replace(/\$(?=\d)/g, '\\$'); |
| 186 | +
|
| 187 | + // Step 4: Restore protected LaTeX expressions (they are valid) |
| 188 | + content = content.replace(/<<LATEX_(\d+)>>/g, (_, index) => { |
| 189 | + return latexExpressions[parseInt(index)]; |
| 190 | + }); |
| 191 | +
|
| 192 | + // Step 5: Restore code blocks |
| 193 | + content = content.replace(/<<CODE_BLOCK_(\d+)>>/g, (_, index) => { |
| 194 | + return codeBlocks[parseInt(index)]; |
| 195 | + }); |
| 196 | +
|
| 197 | + // Step 6: Apply additional escaping functions (brackets and mhchem) |
| 198 | + content = escapeBrackets(content); |
| 199 | + if (content.includes('\\ce{') || content.includes('\\pu{')) { |
| 200 | + content = escapeMhchem(content); |
| 201 | + } |
| 202 | +
|
| 203 | + // Final pass: Convert \(...\) → $...$, \[...\] → $$...$$ |
| 204 | + content = content |
| 205 | + .replace(/\\\((.+?)\\\)/g, '$$$1$') // inline |
| 206 | + .replace(/\\\[(.+?)\\\]/g, '$$$$1$$'); // display |
| 207 | +
|
| 208 | + return content; |
| 209 | + } |
| 210 | +
|
| 211 | + function protectLaTeXButNotMoney(content: string, latexExpressions: string[]): string { |
| 212 | + if (content.indexOf('$') == -1) { |
| 213 | + return content; |
| 214 | + } |
| 215 | + return content |
| 216 | + .split('\n') |
| 217 | + .map((line) => { |
| 218 | + if (line.indexOf('$') == -1) { |
| 219 | + return line; |
| 220 | + } |
| 221 | + let result = ''; |
| 222 | + let index = 0; |
| 223 | + while (index + 2 < line.length) { |
| 224 | + const openIndex = line.indexOf('$', index); |
| 225 | + if (openIndex == -1) { |
| 226 | + result += line.slice(index); |
| 227 | + break; |
| 228 | + } |
| 229 | +
|
| 230 | + // Is there a next $-sign? |
| 231 | + const nextIndex = line.indexOf('$', openIndex + 1); |
| 232 | + if (nextIndex == -1) { |
| 233 | + result += line.slice(index); |
| 234 | + break; |
| 235 | + } |
| 236 | +
|
| 237 | + const beforeOpenChar = openIndex > 0 ? line[openIndex - 1] : ''; |
| 238 | + const afterOpenChar = line[openIndex + 1]; |
| 239 | + const afterCloseChar = nextIndex + 1 < line.length ? line[nextIndex + 1] : ''; |
| 240 | + if (/[A-Za-z0-9_$-]/.test(beforeOpenChar)) { |
| 241 | + // character, digit, $, _ or - before first '$', no TeX. |
| 242 | + result += line.slice(index, openIndex + 1); |
| 243 | + index = openIndex + 1; |
| 244 | + continue; |
| 245 | + } |
| 246 | + if (/[0-9]/.test(afterOpenChar) && /[A-Za-z0-9_$-]/.test(afterCloseChar)) { |
| 247 | + // First $ seems to belong to an amount. |
| 248 | + result += line.slice(index, openIndex + 1); |
| 249 | + index = openIndex + 1; |
| 250 | + continue; |
| 251 | + } |
| 252 | +
|
| 253 | + // Treat as LaTeX |
| 254 | + result += line.slice(index, openIndex); |
| 255 | + const latexContent = line.slice(openIndex, nextIndex + 1); |
| 256 | + latexExpressions.push(latexContent); |
| 257 | + result += `<<LATEX_${latexExpressions.length - 1}>>`; |
| 258 | + index = nextIndex + 1; |
| 259 | + } |
| 260 | + return result; |
| 261 | + }) |
| 262 | + .join('\n'); |
| 263 | + } |
| 264 | +
|
| 265 | + function escapeBrackets(text: string): string { |
| 266 | + const pattern = /(```[\S\s]*?```|`.*?`)|\\\[([\S\s]*?[^\\])\\]|\\\((.*?)\\\)/g; |
| 267 | + return text.replace( |
| 268 | + pattern, |
| 269 | + ( |
| 270 | + match: string, |
| 271 | + codeBlock: string | undefined, |
| 272 | + squareBracket: string | undefined, |
| 273 | + roundBracket: string | undefined |
| 274 | + ): string => { |
| 275 | + if (codeBlock != null) { |
| 276 | + return codeBlock; |
| 277 | + } else if (squareBracket != null) { |
| 278 | + return `$$${squareBracket}$$`; |
| 279 | + } else if (roundBracket != null) { |
| 280 | + return `$${roundBracket}$`; |
| 281 | + } |
| 282 | + return match; |
| 283 | + } |
| 284 | + ); |
| 285 | + } |
| 286 | +
|
| 287 | + // Escape $\\ce{...} → $\\ce{...} but with proper handling |
| 288 | + function escapeMhchem(text: string): string { |
| 289 | + return text.replaceAll('$\\ce{', '$\\\\ce{').replaceAll('$\\pu{', '$\\\\pu{'); |
| 290 | + } |
| 291 | +
|
157 | 292 | async function processMarkdown(text: string): Promise<string> { |
158 | 293 | try { |
159 | | - const result = await processor().process(text); |
| 294 | + const processedText = preprocessLaTeX(text); |
| 295 | +
|
| 296 | + const result = await processor().process(processedText); |
160 | 297 | const html = String(result); |
161 | 298 | const enhancedLinks = enhanceLinks(html); |
162 | 299 |
|
|
0 commit comments