Skip to content

Commit 323ca04

Browse files
brichetdlqqq
andauthored
Allow $ to literally denote quantities of USD in chat (#95)
Co-authored-by: david qiu <[email protected]>
1 parent 2819cee commit 323ca04

File tree

1 file changed

+72
-5
lines changed

1 file changed

+72
-5
lines changed

packages/jupyter-chat/src/components/rendermime-markdown.tsx

Lines changed: 72 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,74 @@ type RendermimeMarkdownProps = {
2424
};
2525

2626
/**
27-
* Takes \( and returns \\(. Escapes LaTeX delimeters by adding extra backslashes where needed for proper rendering by @jupyterlab/rendermime.
27+
* Escapes backslashes in LaTeX delimiters such that they appear in the DOM
28+
* after the initial MarkDown render. For example, this function takes '\(` and
29+
* returns `\\(`.
30+
*
31+
* Required for proper rendering of MarkDown + LaTeX markup in the chat by
32+
* `ILatexTypesetter`.
2833
*/
2934
function escapeLatexDelimiters(text: string) {
3035
return text
31-
.replace('\\(', '\\\\(')
32-
.replace('\\)', '\\\\)')
33-
.replace('\\[', '\\\\[')
34-
.replace('\\]', '\\\\]');
36+
.replace('\\(/g', '\\\\(')
37+
.replace('\\)/g', '\\\\)')
38+
.replace('\\[/g', '\\\\[')
39+
.replace('\\]/g', '\\\\]');
40+
}
41+
42+
/**
43+
* Type predicate function that determines whether a given DOM Node is a Text
44+
* node.
45+
*/
46+
function isTextNode(node: Node | null): node is Text {
47+
return node?.nodeType === Node.TEXT_NODE;
48+
}
49+
50+
/**
51+
* Escapes all `$` symbols present in an HTML element except those within the
52+
* following elements: `pre`, `code`, `samp`, `kbd`.
53+
*
54+
* This prevents `$` symbols from being used as inline math delimiters, allowing
55+
* `$` symbols to be used literally to denote quantities of USD. This does not
56+
* escape literal `$` within elements that display their contents literally,
57+
* like code elements. This overrides JupyterLab's default rendering of MarkDown
58+
* w/ LaTeX.
59+
*
60+
* The Jupyter AI system prompt should explicitly request that the LLM not use
61+
* `$` as an inline math delimiter. This is the default behavior.
62+
*/
63+
function escapeDollarSymbols(el: HTMLElement) {
64+
// Get all text nodes that are not within pre, code, samp, or kbd elements
65+
const walker = document.createTreeWalker(el, NodeFilter.SHOW_TEXT, {
66+
acceptNode: node => {
67+
const isInSkippedElements = node.parentElement?.closest(
68+
'pre, code, samp, kbd'
69+
);
70+
return isInSkippedElements
71+
? NodeFilter.FILTER_SKIP
72+
: NodeFilter.FILTER_ACCEPT;
73+
}
74+
});
75+
76+
// Collect all valid text nodes in an array.
77+
const textNodes: Text[] = [];
78+
let currentNode: Node | null;
79+
while ((currentNode = walker.nextNode())) {
80+
if (isTextNode(currentNode)) {
81+
textNodes.push(currentNode);
82+
}
83+
}
84+
85+
// Replace each `$` symbol with `\$` for each text node, unless there is
86+
// another `$` symbol adjacent or it is already escaped. Examples:
87+
// - `$10 - $5` => `\$10 - \$5` (escaped)
88+
// - `$$ \infty $$` => `$$ \infty $$` (unchanged)
89+
// - `\$10` => `\$10` (unchanged, already escaped)
90+
textNodes.forEach(node => {
91+
if (node.textContent) {
92+
node.textContent = node.textContent.replace(/(?<![$\\])\$(?!\$)/g, '\\$');
93+
}
94+
});
3595
}
3696

3797
function RendermimeMarkdownBase(props: RendermimeMarkdownProps): JSX.Element {
@@ -47,12 +107,15 @@ function RendermimeMarkdownBase(props: RendermimeMarkdownProps): JSX.Element {
47107

48108
useEffect(() => {
49109
const renderContent = async () => {
110+
// initialize mime model
50111
const mdStr = escapeLatexDelimiters(props.markdownStr);
51112
const model = props.rmRegistry.createModel({
52113
data: { [MD_MIME_TYPE]: mdStr }
53114
});
54115

55116
const renderer = props.rmRegistry.createRenderer(MD_MIME_TYPE);
117+
118+
// step 1: render markdown
56119
await renderer.renderModel(model);
57120
props.rmRegistry.latexTypesetter?.typeset(renderer.node);
58121
if (!renderer.node) {
@@ -61,6 +124,10 @@ function RendermimeMarkdownBase(props: RendermimeMarkdownProps): JSX.Element {
61124
);
62125
}
63126

127+
// step 2: render LaTeX via MathJax, while escaping single dollar symbols.
128+
escapeDollarSymbols(renderer.node);
129+
props.rmRegistry.latexTypesetter?.typeset(renderer.node);
130+
64131
const newCodeToolbarDefns: [HTMLDivElement, CodeToolbarProps][] = [];
65132

66133
// Attach CodeToolbar root element to each <pre> block

0 commit comments

Comments
 (0)