Skip to content

Commit 69ab37c

Browse files
committed
webui: Moved constants to lib/constants.
1 parent 3191b63 commit 69ab37c

File tree

4 files changed

+77
-18
lines changed

4 files changed

+77
-18
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/**
2+
* Matches common Markdown code blocks to exclude them from further processing (e.g. LaTeX).
3+
* - Fenced: ```...```
4+
* - Inline: `...` (does NOT support nested backticks or multi-backtick syntax)
5+
*
6+
* Note: This pattern does not handle advanced cases like:
7+
* `` `code with `backticks` `` or \\``...\\``
8+
*/
9+
export const CODE_BLOCK_REGEXP = /(```[\s\S]*?```|`[^`\n]+`)/g;
10+
11+
/**
12+
* Matches LaTeX math delimiters \(...\) and \[...\] only when not preceded by a backslash (i.e., not escaped),
13+
* while also capturing code blocks (```, `...`) so they can be skipped during processing.
14+
*
15+
* Uses negative lookbehind `(?<!\\)` to avoid matching \\( or \\[.
16+
* Using the look‑behind pattern `(?<!\\)` we skip matches
17+
* that are preceded by a backslash, e.g.
18+
* `Definitions\\(also called macros)` (title of chapter 20 in The TeXbook)
19+
* or `\\[4pt]` (LaTeX line-break).
20+
*
21+
* group 1: code-block
22+
* group 2: square-bracket
23+
* group 3: round-bracket
24+
*/
25+
export const LATEX_MATH_AND_CODE_PATTERN =
26+
/(```[\S\s]*?```|`.*?`)|(?<!\\)\\\[([\S\s]*?[^\\])\\]|(?<!\\)\\\((.*?)\\\)/g;
27+
28+
/** map from mchem-regexp to replacement */
29+
export const MHCHEM_PATTERN_MAP: readonly [RegExp, string][] = [
30+
[/(\s)\$\\ce{/g, '$1$\\\\ce{'],
31+
[/(\s)\$\\pu{/g, '$1$\\\\pu{']
32+
] as const;

tools/server/webui/src/lib/utils/latex-protection.test.ts

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,8 +276,7 @@ $$`
276276
const input = 'Chemical reaction: \\( \\ce{H2O} \\) and $\\ce{CO2}$';
277277
const output = preprocessLaTeX(input);
278278

279-
expect(output).toBe('Chemical reaction: $ \\ce{H2O} $ and $\\\\ce{CO2}$');
280-
// Note: \\ce{...} remains, but $\\ce{...} → $\\\\ce{...} via escapeMhchem
279+
expect(output).toBe('Chemical reaction: $ \\ce{H2O} $ and $\\ce{CO2}$');
281280
});
282281

283282
test('preserves code blocks', () => {
@@ -286,4 +285,20 @@ $$`
286285

287286
expect(output).toBe(input); // Code blocks prevent misinterpretation
288287
});
288+
289+
test('escape backslash in mchem ce', () => {
290+
const input = 'mchem ce:\n$\\ce{2H2(g) + O2(g) -> 2H2O(l)}$';
291+
const output = preprocessLaTeX(input);
292+
293+
// mhchem-escape would insert a backslash here.
294+
expect(output).toBe('mchem ce:\n$\\ce{2H2(g) + O2(g) -> 2H2O(l)}$');
295+
});
296+
297+
test('escape backslash in mchem pu', () => {
298+
const input = 'mchem pu:\n$\\pu{-572 kJ mol^{-1}}$';
299+
const output = preprocessLaTeX(input);
300+
301+
// mhchem-escape would insert a backslash here.
302+
expect(output).toBe('mchem pu:\n$\\pu{-572 kJ mol^{-1}}$');
303+
});
289304
});

tools/server/webui/src/lib/utils/latex-protection.ts

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
import {
2+
CODE_BLOCK_REGEXP,
3+
LATEX_MATH_AND_CODE_PATTERN,
4+
MHCHEM_PATTERN_MAP
5+
} from '$lib/constants/latex-protection';
6+
17
/**
28
* Replaces inline LaTeX expressions enclosed in `$...$` with placeholders, avoiding dollar signs
39
* that appear to be part of monetary values or identifiers.
@@ -43,7 +49,8 @@ export function maskInlineLaTeX(content: string, latexExpressions: string[]): st
4349

4450
const charBeforeOpen = openDollarIndex > 0 ? line[openDollarIndex - 1] : '';
4551
const charAfterOpen = line[openDollarIndex + 1];
46-
const charBeforeClose = openDollarIndex + 1 < closeDollarIndex ? line[closeDollarIndex - 1] : '';
52+
const charBeforeClose =
53+
openDollarIndex + 1 < closeDollarIndex ? line[closeDollarIndex - 1] : '';
4754
const charAfterClose = closeDollarIndex + 1 < line.length ? line[closeDollarIndex + 1] : '';
4855

4956
let shouldSkipAsNonLatex = false;
@@ -87,14 +94,8 @@ export function maskInlineLaTeX(content: string, latexExpressions: string[]): st
8794
}
8895

8996
function escapeBrackets(text: string): string {
90-
// Using the look‑behind pattern `(?<!\\)` we skip matches
91-
// that are preceded by a backslash, e.g.
92-
// `Definitions\\(also called macros)` (title of chapter 20 in The TeXbook)
93-
// or `\\[4pt]`.
94-
const pattern = /(```[\S\s]*?```|`.*?`)|(?<!\\)\\\[([\S\s]*?[^\\])\\]|(?<!\\)\\\((.*?)\\\)/g;
95-
9697
return text.replace(
97-
pattern,
98+
LATEX_MATH_AND_CODE_PATTERN,
9899
(
99100
match: string,
100101
codeBlock: string | undefined,
@@ -116,14 +117,12 @@ function escapeBrackets(text: string): string {
116117

117118
// Escape $\\ce{...} → $\\ce{...} but with proper handling
118119
function escapeMhchem(text: string): string {
119-
return text.replaceAll('$\\ce{', '$\\\\ce{').replaceAll('$\\pu{', '$\\\\pu{');
120+
return MHCHEM_PATTERN_MAP.reduce((result, [pattern, replacement]) => {
121+
return result.replace(pattern, replacement);
122+
}, text);
120123
}
121124

122-
// See also:
123-
// https://github.com/danny-avila/LibreChat/blob/main/client/src/utils/latex.ts
124-
125-
// Protect code blocks: ```...``` and `...`
126-
const codeBlockRegex = /(```[\s\S]*?```|`[^`\n]+`)/g;
125+
const doEscapeMhchem = false;
127126

128127
/**
129128
* Preprocesses markdown content to safely handle LaTeX math expressions while protecting
@@ -145,10 +144,13 @@ const codeBlockRegex = /(```[\s\S]*?```|`[^`\n]+`)/g;
145144
* // → "Price: $10. The equation is $x^2$."
146145
*/
147146
export function preprocessLaTeX(content: string): string {
147+
// See also:
148+
// https://github.com/danny-avila/LibreChat/blob/main/client/src/utils/latex.ts
149+
148150
// Step 1: Protect code blocks
149151
const codeBlocks: string[] = [];
150152

151-
content = content.replace(codeBlockRegex, (match) => {
153+
content = content.replace(CODE_BLOCK_REGEXP, (match) => {
152154
codeBlocks.push(match);
153155

154156
return `<<CODE_BLOCK_${codeBlocks.length - 1}>>`;
@@ -207,7 +209,7 @@ export function preprocessLaTeX(content: string): string {
207209
// Step 6: Apply additional escaping functions (brackets and mhchem)
208210
content = escapeBrackets(content);
209211

210-
if (content.includes('\\ce{') || content.includes('\\pu{')) {
212+
if (doEscapeMhchem && (content.includes('\\ce{') || content.includes('\\pu{'))) {
211213
content = escapeMhchem(content);
212214
}
213215

tools/server/webui/src/stories/fixtures/math-formulas.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,16 @@ x = \frac{-b \pm \sqrt{\,b^{2}-4ac\,}}{2a}
199199
| | \(\displaystyle \prod_{k=1}^{n}k = n! \) | Factorial definition |
200200
| **Geometry** | \( \mathbf{a}\cdot \mathbf{b} = \|\mathbf{a}\|\,\|\mathbf{b}\|\,\cos\theta \) | Dot product & angle |
201201
202+
## No math (but chemical)
203+
204+
Balanced chemical reaction with states:
205+
206+
\[
207+
\ce{2H2(g) + O2(g) -> 2H2O(l)}
208+
\]
209+
210+
The standard enthalpy change for the reaction is: $\Delta H^\circ = \pu{-572 kJ mol^{-1}}$.
211+
202212
---
203213
204214
*This document showcases various mathematical notation and formulas that can be rendered in markdown using LaTeX syntax.*

0 commit comments

Comments
 (0)