Skip to content

Commit ea5140f

Browse files
authored
🧮 feat: Improve LaTeX rendering consistency (danny-avila#3763)
* refactor: simplify LaTeX pre-processing for more consistent rendering, disables `singleDollarTextMath` * refactor: disable singleDollarTextMath in all markdown components * wip: first pass * refactor: preserve code blocks and convert rather than preserve LaTeX delimiters * refactor: remove unused escapeDollarNumber function from latex.ts
1 parent 967e8a1 commit ea5140f

File tree

3 files changed

+156
-3
lines changed

3 files changed

+156
-3
lines changed

client/src/components/Chat/Messages/Content/Markdown.tsx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import { useRecoilValue } from 'recoil';
88
import ReactMarkdown from 'react-markdown';
99
import type { PluggableList } from 'unified';
1010
import rehypeHighlight from 'rehype-highlight';
11-
import { cn, langSubset, validateIframe, processLaTeX, handleDoubleClick } from '~/utils';
11+
import { langSubset, validateIframe, preprocessLaTeX, handleDoubleClick } from '~/utils';
1212
import CodeBlock from '~/components/Messages/Content/CodeBlock';
1313
import { useFileDownload } from '~/data-provider';
1414
import useLocalize from '~/hooks/useLocalize';
@@ -123,7 +123,7 @@ const Markdown = memo(({ content = '', isEdited, showCursor, isLatestMessage }:
123123
let currentContent = content;
124124
if (!isInitializing) {
125125
currentContent = currentContent.replace('z-index: 1;', '') || '';
126-
currentContent = LaTeXParsing ? processLaTeX(currentContent) : currentContent;
126+
currentContent = LaTeXParsing ? preprocessLaTeX(currentContent) : currentContent;
127127
}
128128

129129
const rehypePlugins: PluggableList = [

client/src/utils/latex.spec.ts

Lines changed: 91 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
import { processLaTeX } from './latex';
1+
/* eslint-disable no-useless-escape */
2+
import { processLaTeX, preprocessLaTeX } from './latex';
23

34
describe('processLaTeX', () => {
45
test('returns the same string if no LaTeX patterns are found', () => {
@@ -103,3 +104,92 @@ describe('processLaTeX', () => {
103104
});
104105
});
105106
});
107+
108+
describe('preprocessLaTeX', () => {
109+
test('returns the same string if no LaTeX patterns are found', () => {
110+
const content = 'This is a test string without LaTeX';
111+
expect(preprocessLaTeX(content)).toBe(content);
112+
});
113+
114+
test('escapes dollar signs followed by digits', () => {
115+
const content = 'Price is $50 and $100';
116+
const expected = 'Price is \\$50 and \\$100';
117+
expect(preprocessLaTeX(content)).toBe(expected);
118+
});
119+
120+
test('does not escape dollar signs not followed by digits', () => {
121+
const content = 'This $variable is not escaped';
122+
expect(preprocessLaTeX(content)).toBe(content);
123+
});
124+
125+
test('preserves existing LaTeX expressions', () => {
126+
const content = 'Inline $x^2 + y^2 = z^2$ and block $$E = mc^2$$';
127+
expect(preprocessLaTeX(content)).toBe(content);
128+
});
129+
130+
test('handles mixed LaTeX and currency', () => {
131+
const content = 'LaTeX $x^2$ and price $50';
132+
const expected = 'LaTeX $x^2$ and price \\$50';
133+
expect(preprocessLaTeX(content)).toBe(expected);
134+
});
135+
136+
test('converts LaTeX delimiters', () => {
137+
const content = 'Brackets \\[x^2\\] and parentheses \\(y^2\\)';
138+
const expected = 'Brackets $$x^2$$ and parentheses $y^2$';
139+
expect(preprocessLaTeX(content)).toBe(expected);
140+
});
141+
142+
test('escapes mhchem commands', () => {
143+
const content = '$\\ce{H2O}$ and $\\pu{123 J}$';
144+
const expected = '$\\\\ce{H2O}$ and $\\\\pu{123 J}$';
145+
expect(preprocessLaTeX(content)).toBe(expected);
146+
});
147+
148+
test('handles complex mixed content', () => {
149+
const content = `
150+
LaTeX inline $x^2$ and block $$y^2$$
151+
Currency $100 and $200
152+
Chemical $\\ce{H2O}$
153+
Brackets \\[z^2\\]
154+
`;
155+
const expected = `
156+
LaTeX inline $x^2$ and block $$y^2$$
157+
Currency \\$100 and \\$200
158+
Chemical $\\\\ce{H2O}$
159+
Brackets $$z^2$$
160+
`;
161+
expect(preprocessLaTeX(content)).toBe(expected);
162+
});
163+
164+
test('handles empty string', () => {
165+
expect(preprocessLaTeX('')).toBe('');
166+
});
167+
168+
test('preserves code blocks', () => {
169+
const content = '```\n$100\n```\nOutside $200';
170+
const expected = '```\n$100\n```\nOutside \\$200';
171+
expect(preprocessLaTeX(content)).toBe(expected);
172+
});
173+
174+
test('handles multiple currency values in a sentence', () => {
175+
const content = 'I have $50 in my wallet and $100 in the bank.';
176+
const expected = 'I have \\$50 in my wallet and \\$100 in the bank.';
177+
expect(preprocessLaTeX(content)).toBe(expected);
178+
});
179+
180+
test('preserves LaTeX expressions with numbers', () => {
181+
const content = 'The equation is $f(x) = 2x + 3$ where x is a variable.';
182+
expect(preprocessLaTeX(content)).toBe(content);
183+
});
184+
185+
test('handles currency values with commas', () => {
186+
const content = 'The price is $1,000,000 for this item.';
187+
const expected = 'The price is \\$1,000,000 for this item.';
188+
expect(preprocessLaTeX(content)).toBe(expected);
189+
});
190+
191+
test('preserves LaTeX expressions with special characters', () => {
192+
const content = 'The set is defined as $\\{x | x > 0\\}$.';
193+
expect(preprocessLaTeX(content)).toBe(content);
194+
});
195+
});

client/src/utils/latex.ts

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,66 @@ export const processLaTeX = (_content: string) => {
4040
// Restore code blocks
4141
return restoreCodeBlocks(processedContent, codeBlocks);
4242
};
43+
44+
/**
45+
* Preprocesses LaTeX content by replacing delimiters and escaping certain characters.
46+
*
47+
* @param content The input string containing LaTeX expressions.
48+
* @returns The processed string with replaced delimiters and escaped characters.
49+
*/
50+
export function preprocessLaTeX(content: string): string {
51+
// Step 1: Protect code blocks
52+
const codeBlocks: string[] = [];
53+
content = content.replace(/(```[\s\S]*?```|`[^`\n]+`)/g, (match, code) => {
54+
codeBlocks.push(code);
55+
return `<<CODE_BLOCK_${codeBlocks.length - 1}>>`;
56+
});
57+
58+
// Step 2: Protect existing LaTeX expressions
59+
const latexExpressions: string[] = [];
60+
content = content.replace(/(\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]|\\\(.*?\\\))/g, (match) => {
61+
latexExpressions.push(match);
62+
return `<<LATEX_${latexExpressions.length - 1}>>`;
63+
});
64+
65+
// Step 3: Escape dollar signs that are likely currency indicators
66+
content = content.replace(/\$(?=\d)/g, '\\$');
67+
68+
// Step 4: Restore LaTeX expressions
69+
content = content.replace(/<<LATEX_(\d+)>>/g, (_, index) => latexExpressions[parseInt(index)]);
70+
71+
// Step 5: Restore code blocks
72+
content = content.replace(/<<CODE_BLOCK_(\d+)>>/g, (_, index) => codeBlocks[parseInt(index)]);
73+
74+
// Step 6: Apply additional escaping functions
75+
content = escapeBrackets(content);
76+
content = escapeMhchem(content);
77+
78+
return content;
79+
}
80+
81+
export function escapeBrackets(text: string): string {
82+
const pattern = /(```[\S\s]*?```|`.*?`)|\\\[([\S\s]*?[^\\])\\]|\\\((.*?)\\\)/g;
83+
return text.replace(
84+
pattern,
85+
(
86+
match: string,
87+
codeBlock: string | undefined,
88+
squareBracket: string | undefined,
89+
roundBracket: string | undefined,
90+
): string => {
91+
if (codeBlock != null) {
92+
return codeBlock;
93+
} else if (squareBracket != null) {
94+
return `$$${squareBracket}$$`;
95+
} else if (roundBracket != null) {
96+
return `$${roundBracket}$`;
97+
}
98+
return match;
99+
},
100+
);
101+
}
102+
103+
export function escapeMhchem(text: string) {
104+
return text.replaceAll('$\\ce{', '$\\\\ce{').replaceAll('$\\pu{', '$\\\\pu{');
105+
}

0 commit comments

Comments
 (0)