diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz index c026f36c4844d..8a58654b33f2d 100644 Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ diff --git a/tools/server/webui/src/lib/components/app/misc/MarkdownContent.svelte b/tools/server/webui/src/lib/components/app/misc/MarkdownContent.svelte index 24d29c2b3e51e..a4be13168dd3f 100644 --- a/tools/server/webui/src/lib/components/app/misc/MarkdownContent.svelte +++ b/tools/server/webui/src/lib/components/app/misc/MarkdownContent.svelte @@ -8,6 +8,7 @@ import rehypeKatex from 'rehype-katex'; import rehypeStringify from 'rehype-stringify'; import { copyCodeToClipboard } from '$lib/utils/copy'; + import { preprocessLaTeX } from '$lib/utils/latex-protection'; import { browser } from '$app/environment'; import 'katex/dist/katex.min.css'; @@ -156,7 +157,9 @@ async function processMarkdown(text: string): Promise { try { - const result = await processor().process(text); + const processedText = preprocessLaTeX(text); + + const result = await processor().process(processedText); const html = String(result); const enhancedLinks = enhanceLinks(html); diff --git a/tools/server/webui/src/lib/utils/latex-protection.test.ts b/tools/server/webui/src/lib/utils/latex-protection.test.ts new file mode 100644 index 0000000000000..9e2c4727798a7 --- /dev/null +++ b/tools/server/webui/src/lib/utils/latex-protection.test.ts @@ -0,0 +1,236 @@ +/* eslint-disable no-irregular-whitespace */ +import { describe, it, expect, test } from 'vitest'; +import { maskInlineLaTeX, preprocessLaTeX } from './latex-protection'; + +describe('maskInlineLaTeX', () => { + it('should protect LaTeX $x + y$ but not money $3.99', () => { + const latexExpressions: string[] = []; + const input = 'I have $10, $3.99 and $x + y$ and $100x$. The amount is $2,000.'; + const output = maskInlineLaTeX(input, latexExpressions); + + expect(output).toBe('I have $10, $3.99 and <> and <>. The amount is $2,000.'); + expect(latexExpressions).toEqual(['$x + y$', '$100x$']); + }); + + it('should ignore money like $5 and $12.99', () => { + const latexExpressions: string[] = []; + const input = 'Prices are $12.99 and $5. Tax?'; + const output = maskInlineLaTeX(input, latexExpressions); + + expect(output).toBe('Prices are $12.99 and $5. Tax?'); + expect(latexExpressions).toEqual([]); + }); + + it('should protect inline math $a^2 + b^2$ even after text', () => { + const latexExpressions: string[] = []; + const input = 'Pythagorean: $a^2 + b^2 = c^2$.'; + const output = maskInlineLaTeX(input, latexExpressions); + + expect(output).toBe('Pythagorean: <>.'); + expect(latexExpressions).toEqual(['$a^2 + b^2 = c^2$']); + }); + + it('should not protect math that has letter after closing $ (e.g. units)', () => { + const latexExpressions: string[] = []; + const input = 'The cost is $99 and change.'; + const output = maskInlineLaTeX(input, latexExpressions); + + expect(output).toBe('The cost is $99 and change.'); + expect(latexExpressions).toEqual([]); + }); + + it('should allow $x$ followed by punctuation', () => { + const latexExpressions: string[] = []; + const input = 'We know $x$, right?'; + const output = maskInlineLaTeX(input, latexExpressions); + + expect(output).toBe('We know <>, right?'); + expect(latexExpressions).toEqual(['$x$']); + }); + + it('should work across multiple lines', () => { + const latexExpressions: string[] = []; + const input = `Emma buys cupcakes for $3 each.\nHow much is $x + y$?`; + const output = maskInlineLaTeX(input, latexExpressions); + + expect(output).toBe(`Emma buys cupcakes for $3 each.\nHow much is <>?`); + expect(latexExpressions).toEqual(['$x + y$']); + }); + + it('should not protect $100 but protect $matrix$', () => { + const latexExpressions: string[] = []; + const input = '$100 and $\\mathrm{GL}_2(\\mathbb{F}_7)$ are different.'; + const output = maskInlineLaTeX(input, latexExpressions); + + expect(output).toBe('$100 and <> are different.'); + expect(latexExpressions).toEqual(['$\\mathrm{GL}_2(\\mathbb{F}_7)$']); + }); + + it('should skip if $ is followed by digit and alphanumeric after close (money)', () => { + const latexExpressions: string[] = []; + const input = 'I paid $5 quickly.'; + const output = maskInlineLaTeX(input, latexExpressions); + + expect(output).toBe('I paid $5 quickly.'); + expect(latexExpressions).toEqual([]); + }); + + it('should protect LaTeX even with special chars inside', () => { + const latexExpressions: string[] = []; + const input = 'Consider $\\alpha_1 + \\beta_2$ now.'; + const output = maskInlineLaTeX(input, latexExpressions); + + expect(output).toBe('Consider <> now.'); + expect(latexExpressions).toEqual(['$\\alpha_1 + \\beta_2$']); + }); + + it('short text', () => { + const latexExpressions: string[] = ['$0$']; + const input = '$a$\n$a$ and $b$'; + const output = maskInlineLaTeX(input, latexExpressions); + + expect(output).toBe('<>\n<> and <>'); + expect(latexExpressions).toEqual(['$0$', '$a$', '$a$', '$b$']); + }); + + it('empty text', () => { + const latexExpressions: string[] = []; + const input = '$\n$$\n'; + const output = maskInlineLaTeX(input, latexExpressions); + + expect(output).toBe('$\n$$\n'); + expect(latexExpressions).toEqual([]); + }); +}); + +describe('preprocessLaTeX', () => { + test('converts inline \\( ... \\) to $...$', () => { + const input = + '\\( \\mathrm{GL}_2(\\mathbb{F}_7) \\): Group of invertible matrices with entries in \\(\\mathbb{F}_7\\).'; + const output = preprocessLaTeX(input); + expect(output).toBe( + '$ \\mathrm{GL}_2(\\mathbb{F}_7) $: Group of invertible matrices with entries in $\\mathbb{F}_7$.' + ); + }); + + test('preserves display math \\[ ... \\] and protects adjacent text', () => { + const input = `Some kernel of \\(\\mathrm{SL}_2(\\mathbb{F}_7)\\): + \\[ + \\left\\{ \\begin{pmatrix} 1 & 0 \\\\ 0 & 1 \\end{pmatrix}, \\begin{pmatrix} -1 & 0 \\\\ 0 & -1 \\end{pmatrix} \\right\\} = \\{\\pm I\\} + \\]`; + const output = preprocessLaTeX(input); + + expect(output).toBe(`Some kernel of $\\mathrm{SL}_2(\\mathbb{F}_7)$: + $$ + \\left\\{ \\begin{pmatrix} 1 & 0 \\\\ 0 & 1 \\end{pmatrix}, \\begin{pmatrix} -1 & 0 \\\\ 0 & -1 \\end{pmatrix} \\right\\} = \\{\\pm I\\} + $$`); + }); + + test('handles standalone display math equation', () => { + const input = `Algebra: +\\[ +x = \\frac{-b \\pm \\sqrt{\\,b^{2}-4ac\\,}}{2a} +\\]`; + const output = preprocessLaTeX(input); + + expect(output).toBe(`Algebra: +$$ +x = \\frac{-b \\pm \\sqrt{\\,b^{2}-4ac\\,}}{2a} +$$`); + }); + + test('does not interpret currency values as LaTeX', () => { + const input = 'I have $10, $3.99 and $x + y$ and $100x$. The amount is $2,000.'; + const output = preprocessLaTeX(input); + + expect(output).toBe('I have \\$10, \\$3.99 and $x + y$ and $100x$. The amount is \\$2,000.'); + }); + + test('ignores dollar signs followed by digits (money), but keeps valid math $x + y$', () => { + const input = 'I have $10, $3.99 and $x + y$ and $100x$. The amount is $2,000.'; + const output = preprocessLaTeX(input); + + expect(output).toBe('I have \\$10, \\$3.99 and $x + y$ and $100x$. The amount is \\$2,000.'); + }); + + test('handles real-world word problems with amounts and no math delimiters', () => { + const input = + 'Emma buys 2 cupcakes for $3 each and 1 cookie for $1.50. How much money does she spend in total?'; + const output = preprocessLaTeX(input); + + expect(output).toBe( + 'Emma buys 2 cupcakes for \\$3 each and 1 cookie for \\$1.50. How much money does she spend in total?' + ); + }); + + test('handles decimal amounts in word problem correctly', () => { + const input = + 'Maria has $20. She buys a notebook for $4.75 and a pack of pencils for $3.25. How much change does she receive?'; + const output = preprocessLaTeX(input); + + expect(output).toBe( + 'Maria has \\$20. She buys a notebook for \\$4.75 and a pack of pencils for \\$3.25. How much change does she receive?' + ); + }); + + test('preserves display math with surrounding non-ASCII text', () => { + const input = `1 kg の質量は + \\[ + E = (1\\ \\text{kg}) \\times (3.0 \\times 10^8\\ \\text{m/s})^2 \\approx 9.0 \\times 10^{16}\\ \\text{J} + \\] + というエネルギーに相当します。これは約 21 百万トンの TNT が爆発したときのエネルギーに匹敵します。`; + const output = preprocessLaTeX(input); + + expect(output).toBe( + `1 kg の質量は + $$ + E = (1\\ \\text{kg}) \\times (3.0 \\times 10^8\\ \\text{m/s})^2 \\approx 9.0 \\times 10^{16}\\ \\text{J} + $$ + というエネルギーに相当します。これは約 21 百万トンの TNT が爆発したときのエネルギーに匹敵します。` + ); + }); + + test('converts \\[ ... \\] even when preceded by text without space', () => { + const input = 'Some line ...\nAlgebra: \\[x = \\frac{-b \\pm \\sqrt{\\,b^{2}-4ac\\,}}{2a}\\]'; + const output = preprocessLaTeX(input); + + expect(output).toBe( + 'Some line ...\nAlgebra: \n$$x = \\frac{-b \\pm \\sqrt{\\,b^{2}-4ac\\,}}{2a}$$\n' + ); + }); + + test('converts \\[ ... \\] in table-cells', () => { + const input = `| ID | Expression |\n| #1 | \\[ + x = \\frac{-b \\pm \\sqrt{\\,b^{2}-4ac\\,}}{2a} +\\] |`; + const output = preprocessLaTeX(input); + + expect(output).toBe( + '| ID | Expression |\n| #1 | $x = \\frac{-b \\pm \\sqrt{\\,b^{2}-4ac\\,}}{2a}$ |' + ); + }); + + test('escapes isolated $ before digits ($5 → \\$5), but not valid math', () => { + const input = 'This costs $5 and this is math $x^2$. $100 is money.'; + const output = preprocessLaTeX(input); + + expect(output).toBe('This costs \\$5 and this is math $x^2$. \\$100 is money.'); + // Note: Since $x^2$ is detected as valid LaTeX, it's preserved. + // $5 becomes \$5 only *after* real math is masked — but here it's correct because the masking logic avoids treating $5 as math. + }); + + test('handles mhchem notation safely if present', () => { + const input = 'Chemical reaction: \\( \\ce{H2O} \\) and $\\ce{CO2}$'; + const output = preprocessLaTeX(input); + + expect(output).toBe('Chemical reaction: $ \\ce{H2O} $ and $\\\\ce{CO2}$'); + // Note: \\ce{...} remains, but $\\ce{...} → $\\\\ce{...} via escapeMhchem + }); + + test('preserves code blocks', () => { + const input = 'Inline code: `sum $total` and block:\n```\ndollar $amount\n```\nEnd.'; + const output = preprocessLaTeX(input); + + expect(output).toBe(input); // Code blocks prevent misinterpretation + }); +}); diff --git a/tools/server/webui/src/lib/utils/latex-protection.ts b/tools/server/webui/src/lib/utils/latex-protection.ts new file mode 100644 index 0000000000000..8a9f7648ae242 --- /dev/null +++ b/tools/server/webui/src/lib/utils/latex-protection.ts @@ -0,0 +1,191 @@ +/** + * Replaces inline LaTeX expressions enclosed in `$...$` with placeholders, avoiding dollar signs + * that appear to be part of monetary values or identifiers. + * + * This function processes the input line by line and skips `$` sequences that are likely + * part of money amounts (e.g., `$5`, `$100.99`) or code-like tokens (e.g., `var$`, `$var`). + * Valid LaTeX inline math is replaced with a placeholder like `<>`, and the + * actual LaTeX content is stored in the provided `latexExpressions` array. + * + * @param content - The input text potentially containing LaTeX expressions. + * @param latexExpressions - An array used to collect extracted LaTeX expressions. + * @returns The processed string with LaTeX replaced by placeholders. + */ +export function maskInlineLaTeX(content: string, latexExpressions: string[]): string { + if (content.indexOf('$') == -1) { + return content; + } + return content + .split('\n') + .map((line) => { + if (line.indexOf('$') == -1) { + return line; + } + let result = ''; + let index = 0; + while (index < line.length) { + const openIndex = line.indexOf('$', index); + if (openIndex == -1) { + result += line.slice(index); + break; + } + + // Is there a next $-sign? + const nextIndex = line.indexOf('$', openIndex + 1); + if (nextIndex == -1) { + result += line.slice(index); + break; + } + + const beforeOpenChar = openIndex > 0 ? line[openIndex - 1] : ''; + const afterOpenChar = line[openIndex + 1]; + const beforeCloseChar = openIndex + 1 < nextIndex ? line[nextIndex - 1] : ''; + const afterCloseChar = nextIndex + 1 < line.length ? line[nextIndex + 1] : ''; + let cont = false; + if (nextIndex == index + 1) { + // no content + cont = true; + } + if (/[A-Za-z0-9_$-]/.test(beforeOpenChar)) { + // character, digit, $, _ or - before first '$', no TeX. + cont = true; + } + if ( + /[0-9]/.test(afterOpenChar) && + (/[A-Za-z0-9_$-]/.test(afterCloseChar) || ' ' == beforeCloseChar) + ) { + // First $ seems to belong to an amount. + cont = true; + } + if (cont) { + result += line.slice(index, openIndex + 1); + index = openIndex + 1; + continue; + } + + // Treat as LaTeX + result += line.slice(index, openIndex); + const latexContent = line.slice(openIndex, nextIndex + 1); + latexExpressions.push(latexContent); + result += `<>`; + index = nextIndex + 1; + } + return result; + }) + .join('\n'); +} + +function escapeBrackets(text: string): string { + const pattern = /(```[\S\s]*?```|`.*?`)|\\\[([\S\s]*?[^\\])\\]|\\\((.*?)\\\)/g; + return text.replace( + pattern, + ( + match: string, + codeBlock: string | undefined, + squareBracket: string | undefined, + roundBracket: string | undefined + ): string => { + if (codeBlock != null) { + return codeBlock; + } else if (squareBracket != null) { + return `$$${squareBracket}$$`; + } else if (roundBracket != null) { + return `$${roundBracket}$`; + } + return match; + } + ); +} + +// Escape $\\ce{...} → $\\ce{...} but with proper handling +function escapeMhchem(text: string): string { + return text.replaceAll('$\\ce{', '$\\\\ce{').replaceAll('$\\pu{', '$\\\\pu{'); +} + +// See also: +// https://github.com/danny-avila/LibreChat/blob/main/client/src/utils/latex.ts + +// Protect code blocks: ```...``` and `...` +const codeBlockRegex = /(```[\s\S]*?```|`[^`\n]+`)/g; + +/** + * Preprocesses markdown content to safely handle LaTeX math expressions while protecting + * against false positives (e.g., dollar amounts like $5.99) and ensuring proper rendering. + * + * This function: + * - Protects code blocks (```) and inline code (`...`) + * - Safeguards block and inline LaTeX: \(...\), \[...\], $$...$$, and selective $...$ + * - Escapes standalone dollar signs before numbers (e.g., $5 → \$5) to prevent misinterpretation + * - Restores protected LaTeX and code blocks after processing + * - Converts \(...\) → $...$ and \[...\] → $$...$$ for compatibility with math renderers + * - Applies additional escaping for brackets and mhchem syntax if needed + * + * @param content - The raw text (e.g., markdown) that may contain LaTeX or code blocks. + * @returns The preprocessed string with properly escaped and normalized LaTeX. + * + * @example + * preprocessLaTeX("Price: $10. The equation is \\(x^2\\).") + * // → "Price: $10. The equation is $x^2$." + */ +export function preprocessLaTeX(content: string): string { + // Step 1: Protect code blocks + const codeBlocks: string[] = []; + content = content.replace(codeBlockRegex, (match) => { + codeBlocks.push(match); + return `<>`; + }); + + // Step 2: Protect existing LaTeX expressions + const latexExpressions: string[] = []; + + // Match \S...\[...\] and protect them and insert a line-break. + content = content.replace(/([\S].*?)\\\[([\s\S]*?)\\\](.*)/g, (match, group1, group2, group3) => { + // Check if there are characters following the formula (display-formula in a table-cell?) + const hasSuffix = /\S/.test(group3); + let optBreak; + if (hasSuffix) { + latexExpressions.push(`\\(${group2.trim()}\\)`); // Convert into inline. + optBreak = ''; + } else { + latexExpressions.push(`\\[${group2}\\]`); + optBreak = '\n'; + } + return `${group1}${optBreak}<>${optBreak}${group3}`; + }); + + // Match \(...\), \[...\], $$...$$ and protect them + content = content.replace(/(\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]|\\\(.*?\\\))/g, (match) => { + latexExpressions.push(match); + return `<>`; + }); + + // Protect inline $...$ but NOT if it looks like money (e.g., $10, $3.99) + content = maskInlineLaTeX(content, latexExpressions); + + // Step 3: Escape standalone $ before digits (currency like $5 → \$5) + // (Now that inline math is protected, this will only escape dollars not already protected) + content = content.replace(/\$(?=\d)/g, '\\$'); + + // Step 4: Restore protected LaTeX expressions (they are valid) + content = content.replace(/<>/g, (_, index) => { + return latexExpressions[parseInt(index)]; + }); + + // Step 5: Restore code blocks + content = content.replace(/<>/g, (_, index) => { + return codeBlocks[parseInt(index)]; + }); + + // Step 6: Apply additional escaping functions (brackets and mhchem) + content = escapeBrackets(content); + if (content.includes('\\ce{') || content.includes('\\pu{')) { + content = escapeMhchem(content); + } + + // Final pass: Convert \(...\) → $...$, \[...\] → $$...$$ + content = content + .replace(/\\\((.+?)\\\)/g, '$$$1$') // inline + .replace(/\\\[(.+?)\\\]/g, '$$$$1$$'); // display + + return content; +} diff --git a/tools/server/webui/src/stories/fixtures/math-formulas.ts b/tools/server/webui/src/stories/fixtures/math-formulas.ts index a4e9ab0ed88cf..f569e52cf2ea9 100644 --- a/tools/server/webui/src/stories/fixtures/math-formulas.ts +++ b/tools/server/webui/src/stories/fixtures/math-formulas.ts @@ -1,3 +1,4 @@ +/* eslint-disable no-irregular-whitespace */ // Math Formulas Content export const MATH_FORMULAS_MD = String.raw` # Mathematical Formulas and Expressions @@ -150,6 +151,43 @@ $$\lim_{x \to 0} \frac{\sin x}{x} = 1$$ $$\lim_{n \to \infty} \left(1 + \frac{x}{n}\right)^n = e^x$$ +## Further Bracket Styles and Amounts + +- \( \mathrm{GL}_2(\mathbb{F}_7) \): Group of invertible matrices with entries in \(\mathbb{F}_7\). +- Some kernel of \(\mathrm{SL}_2(\mathbb{F}_7)\): + \[ + \left\{ \begin{pmatrix} 1 & 0 \\ 0 & 1 \end{pmatrix}, \begin{pmatrix} -1 & 0 \\ 0 & -1 \end{pmatrix} \right\} = \{\pm I\} + \] +- Algebra: +\[ +x = \frac{-b \pm \sqrt{\,b^{2}-4ac\,}}{2a} +\] +- $100 and $12.99 are amounts, not LaTeX. +- I have $10, $3.99 and $x + y$ and $100x$. The amount is $2,000. +- Emma buys 2 cupcakes for $3 each and 1 cookie for $1.50. How much money does she spend in total? +- Maria has $20. She buys a notebook for $4.75 and a pack of pencils for $3.25. How much change does she receive? +- 1 kg の質量は + \[ + E = (1\ \text{kg}) \times (3.0 \times 10^8\ \text{m/s})^2 \approx 9.0 \times 10^{16}\ \text{J} + \] + というエネルギーに相当します。これは約 21 百万トンの TNT が爆発したときのエネルギーに匹敵します。 +- Algebra: \[ +x = \frac{-b \pm \sqrt{\,b^{2}-4ac\,}}{2a} +\] + +## Formulas in a Table + +| Area | Expression | Comment | +|------|------------|---------| +| **Algebra** | \[ +x = \frac{-b \pm \sqrt{\,b^{2}-4ac\,}}{2a} +\] | Quadratic formula | +| | \[ +(a+b)^{n} = \sum_{k=0}^{n}\binom{n}{k}\,a^{\,n-k}\,b^{\,k} +\] | Binomial theorem | +| | \(\displaystyle \prod_{k=1}^{n}k = n! \) | Factorial definition | +| **Geometry** | \( \mathbf{a}\cdot \mathbf{b} = \|\mathbf{a}\|\,\|\mathbf{b}\|\,\cos\theta \) | Dot product & angle | + --- *This document showcases various mathematical notation and formulas that can be rendered in markdown using LaTeX syntax.*