Skip to content

Commit fa999f7

Browse files
committed
Fix problem with $$ not being put around the correct items in a LaTeX equation (#8707)
* First try * Add news entry and fix multiples * Add a bunch of comments
1 parent e157ced commit fa999f7

File tree

3 files changed

+128
-30
lines changed

3 files changed

+128
-30
lines changed

news/2 Fixes/8673.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Some LaTeX equations do not print in notebooks or the interactive window.
Lines changed: 70 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,81 @@
11
// Copyright (c) Microsoft Corporation. All rights reserved.
22
// Licensed under the MIT License.
3+
4+
// tslint:disable-next-line:no-require-imports no-var-requires
5+
const _escapeRegExp = require('lodash/escapeRegExp') as typeof import('lodash/escapeRegExp');
6+
37
// Adds '$$' to latex formulas that don't have a '$', allowing users to input the formula directly.
8+
//
9+
// The general algorithm here is:
10+
// Search for either $$ or $ or a \begin{name} item.
11+
// If a $$ or $ is found, output up to the next dollar sign
12+
// If a \begin{name} is found, find the matching \end{name}, wrap the section in $$ and output up to the \end.
13+
//
14+
// LaTeX seems to follow the pattern of \begin{name} or is escaped with $$ or $. See here for a bunch of examples:
15+
// https://jupyter-notebook.readthedocs.io/en/stable/examples/Notebook/Typesetting%20Equations.html
416
export function fixLatexEquations(input: string): string {
5-
const block = '\n$$\n';
17+
const output: string[] = [];
618

7-
const beginIndexes = getAllIndexesOfRegex(input, /\\begin\{[a-z]*\*?\}/g);
8-
const endIndexes = getAllIndexesOfRegex(input, /\\end\{[a-z]*\*?\}/g);
19+
// Search for begin/end pairs, outputting as we go
20+
let start = 0;
921

10-
if (beginIndexes.length === endIndexes.length) {
11-
for (let i = 0; i < beginIndexes.length; i += 1) {
12-
const endOfEnd = input.indexOf('}', endIndexes[i] + 1 + 8 * i);
22+
// Loop until we run out string
23+
while (start < input.length) {
24+
// Check $$, $ and begin
25+
const dollars = /\$\$/.exec(input.substr(start));
26+
const dollar = /\$/.exec(input.substr(start));
27+
const begin = /\\begin\{([a-z,\*]+)\}/.exec(input.substr(start));
28+
let endRegex = /\$\$/;
29+
let endRegexLength = 2;
1330

14-
// Edge case, if the input starts with the latex formula we add the block at the beggining.
15-
if (beginIndexes[i] === 0 && input[beginIndexes[i]] === '\\') {
16-
input = block + input.slice(0, endOfEnd + 1) + block + input.slice(endOfEnd + 1, input.length);
17-
// Normal case, if the latex formula starts with a '$' we don't do anything.
18-
// Otherwise, we insert the block at the beginning and ending of the latex formula.
19-
} else if (input[beginIndexes[i] - 1] !== '$') {
20-
input = input.slice(0, beginIndexes[i] + block.length * 2 * i) + block + input.slice(beginIndexes[i] + block.length * 2 * i, endOfEnd + 1) + block + input.slice(endOfEnd + 1, input.length);
21-
}
31+
// Pick the first that matches
32+
let match = dollars;
33+
let isBeginMatch = false;
34+
if (!match || (dollar && dollar.index < match.index)) {
35+
match = dollar;
36+
endRegex = /\$/;
37+
endRegexLength = 1;
38+
}
39+
if (!match || (begin && begin.index < match.index)) {
40+
match = begin;
41+
endRegex = begin ? new RegExp(`\\\\end\\{${_escapeRegExp(begin[1])}\\}`) : /\$/;
42+
endRegexLength = begin ? `\\end{${begin[1]}}`.length : 1;
43+
isBeginMatch = true;
2244
}
23-
}
24-
25-
return input;
26-
}
27-
28-
function getAllIndexesOfRegex(arr: string, value: RegExp): number[] {
29-
const indexes = [];
30-
let result;
3145

32-
// tslint:disable-next-line: no-conditional-assignment
33-
while ((result = value.exec(arr)) !== null) {
34-
indexes.push(result.index);
46+
// Output this match
47+
if (match) {
48+
if (isBeginMatch) {
49+
// Begin match is a little more complicated.
50+
const offset = match.index + start;
51+
const end = endRegex.exec(input.substr(start));
52+
if (end) {
53+
const prefix = input.substr(start, match.index);
54+
const wrapped = input.substr(offset, endRegexLength + end.index - match.index);
55+
output.push(`${prefix}\n$$\n${wrapped}\n$$\n`);
56+
start = start + prefix.length + wrapped.length;
57+
} else {
58+
// Invalid, just return
59+
return input;
60+
}
61+
} else {
62+
// Output till the next $ or $$
63+
const offset = match.index + 1 + start;
64+
const endDollar = endRegex.exec(input.substr(offset));
65+
if (endDollar) {
66+
const length = endDollar.index + 1 + offset;
67+
output.push(input.substr(start, length));
68+
start = start + length;
69+
} else {
70+
// Invalid, just return
71+
return input;
72+
}
73+
}
74+
} else {
75+
// No more matches
76+
output.push(input.substr(start));
77+
start = input.length;
78+
}
3579
}
36-
37-
return indexes;
80+
return output.join('');
3881
}

src/test/datascience/latexManipulation.unit.test.ts

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,18 +85,72 @@ $$
8585
$$
8686
`;
8787

88-
test('Latex - Equations don\'t have $$', () => {
88+
const markdown4 = `
89+
$$
90+
\\begin{equation*}
91+
\\mathbf{V}_1 \\times \\mathbf{V}_2 = \\begin{vmatrix}
92+
\\mathbf{i} & \\mathbf{j} & \\mathbf{k} \\
93+
\\frac{\partial X}{\\partial u} & \\frac{\\partial Y}{\\partial u} & 0 \\\\
94+
\\frac{\partial X}{\\partial v} & \\frac{\\partial Y}{\\partial v} & 0
95+
\\end{vmatrix}
96+
\\end{equation*}
97+
$$
98+
`;
99+
100+
const markdown5 = `
101+
\\begin{equation*}
102+
P(E) = {n \\choose k} p^k (1-p)^{ n-k}
103+
\\end{equation*}
104+
105+
This expression $\\sqrt{3x-1}+(1+x)^2$ is an example of a TeX inline equation in a [Markdown-formatted](https://daringfireball.net/projects/markdown/) sentence.
106+
`;
107+
const output5 = `
108+
109+
$$
110+
\\begin{equation*}
111+
P(E) = {n \\choose k} p^k (1-p)^{ n-k}
112+
\\end{equation*}
113+
$$
114+
115+
116+
This expression $\\sqrt{3x-1}+(1+x)^2$ is an example of a TeX inline equation in a [Markdown-formatted](https://daringfireball.net/projects/markdown/) sentence.
117+
`;
118+
119+
test('Latex - Equations don\'t have \$\$', () => {
89120
const result = fixLatexEquations(markdown1);
90121
expect(result).to.be.equal(output1, 'Result is incorrect');
91122
});
92123

93-
test('Latex - Equations have $', () => {
124+
test('Latex - Equations have \$', () => {
94125
const result = fixLatexEquations(markdown2);
95126
expect(result).to.be.equal(markdown2, 'Result is incorrect');
96127
});
97128

98-
test('Latex - Multiple equations don\'t have $$', () => {
129+
test('Latex - Multiple equations don\'t have \$\$', () => {
99130
const result = fixLatexEquations(markdown3);
100131
expect(result).to.be.equal(output3, 'Result is incorrect');
101132
});
133+
134+
test('Latex - All on the same line', () => {
135+
const line = '\\begin{matrix}1 & 0\\0 & 1\\end{matrix}';
136+
const after = '\n$$\n\\begin{matrix}1 & 0\\0 & 1\\end{matrix}\n$$\n';
137+
const result = fixLatexEquations(line);
138+
expect(result).to.be.equal(after, 'Result is incorrect');
139+
});
140+
141+
test('Latex - Invalid', () => {
142+
const invalid = '\n\\begin{eq*}do stuff\\end{eq}';
143+
const result = fixLatexEquations(invalid);
144+
expect(result).to.be.equal(invalid, 'Result should not have changed');
145+
});
146+
147+
test('Latex - \$\$ already present', () => {
148+
const result = fixLatexEquations(markdown4);
149+
expect(result).to.be.equal(markdown4, 'Result should not have changed');
150+
});
151+
152+
test('Latex - Multiple types', () => {
153+
const result = fixLatexEquations(markdown5);
154+
expect(result).to.be.equal(output5, 'Result is incorrect');
155+
});
102156
});

0 commit comments

Comments
 (0)