Fix problem with $$ not being put around the correct items in a LaTeX equation (#8707)

rchiodo · rchiodo · commit fa999f7e19be · 2019-11-21T09:28:42.000-08:00
* First try

* Add news entry and fix multiples

* Add a bunch of comments
diff --git a/news/2 Fixes/8673.md b/news/2 Fixes/8673.md
@@ -0,0 +1 @@
+Some LaTeX equations do not print in notebooks or the interactive window.
diff --git a/src/datascience-ui/interactive-common/latexManipulation.ts b/src/datascience-ui/interactive-common/latexManipulation.ts
@@ -1,38 +1,81 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
+
+// tslint:disable-next-line:no-require-imports no-var-requires
+const _escapeRegExp = require('lodash/escapeRegExp') as typeof import('lodash/escapeRegExp');
+
 // Adds '$$' to latex formulas that don't have a '$', allowing users to input the formula directly.
+//
+// The general algorithm here is:
+// Search for either $$ or $ or a \begin{name} item.
+// If a $$ or $ is found, output up to the next dollar sign
+// If a \begin{name} is found, find the matching \end{name}, wrap the section in $$ and output up to the \end.
+//
+// LaTeX seems to follow the pattern of \begin{name} or is escaped with $$ or $. See here for a bunch of examples:
+// https://jupyter-notebook.readthedocs.io/en/stable/examples/Notebook/Typesetting%20Equations.html
 export function fixLatexEquations(input: string): string {
-    const block = '\n$$\n';
+    const output: string[] = [];
 
-    const beginIndexes = getAllIndexesOfRegex(input, /\\begin\{[a-z]*\*?\}/g);
-    const endIndexes = getAllIndexesOfRegex(input, /\\end\{[a-z]*\*?\}/g);
+    // Search for begin/end pairs, outputting as we go
+    let start = 0;
 
-    if (beginIndexes.length === endIndexes.length) {
-        for (let i = 0; i < beginIndexes.length; i += 1) {
-            const endOfEnd = input.indexOf('}', endIndexes[i] + 1 + 8 * i);
+    // Loop until we run out string
+    while (start < input.length) {
+        // Check $$, $ and begin
+        const dollars = /\$\$/.exec(input.substr(start));
+        const dollar = /\$/.exec(input.substr(start));
+        const begin = /\\begin\{([a-z,\*]+)\}/.exec(input.substr(start));
+        let endRegex = /\$\$/;
+        let endRegexLength = 2;
 
-            // Edge case, if the input starts with the latex formula we add the block at the beggining.
-            if (beginIndexes[i] === 0 && input[beginIndexes[i]] === '\\') {
-                input = block + input.slice(0, endOfEnd + 1) + block + input.slice(endOfEnd + 1, input.length);
-                // Normal case, if the latex formula starts with a '$' we don't do anything.
-                // Otherwise, we insert the block at the beginning and ending of the latex formula.
-            } else if (input[beginIndexes[i] - 1] !== '$') {
-                input = input.slice(0, beginIndexes[i] + block.length * 2 * i) + block + input.slice(beginIndexes[i] + block.length * 2 * i, endOfEnd + 1) + block + input.slice(endOfEnd + 1, input.length);
-            }
+        // Pick the first that matches
+        let match = dollars;
+        let isBeginMatch = false;
+        if (!match || (dollar && dollar.index < match.index)) {
+            match = dollar;
+            endRegex = /\$/;
+            endRegexLength = 1;
+        }
+        if (!match || (begin && begin.index < match.index)) {
+            match = begin;
+            endRegex = begin ? new RegExp(`\\\\end\\{${_escapeRegExp(begin[1])}\\}`) : /\$/;
+            endRegexLength = begin ? `\\end{${begin[1]}}`.length : 1;
+            isBeginMatch = true;
         }
-    }
-
-    return input;
-}
-
-function getAllIndexesOfRegex(arr: string, value: RegExp): number[] {
-    const indexes = [];
-    let result;
 
-    // tslint:disable-next-line: no-conditional-assignment
-    while ((result = value.exec(arr)) !== null) {
-        indexes.push(result.index);
+        // Output this match
+        if (match) {
+            if (isBeginMatch) {
+                // Begin match is a little more complicated.
+                const offset = match.index + start;
+                const end = endRegex.exec(input.substr(start));
+                if (end) {
+                    const prefix = input.substr(start, match.index);
+                    const wrapped = input.substr(offset, endRegexLength + end.index - match.index);
+                    output.push(`${prefix}\n$$\n${wrapped}\n$$\n`);
+                    start = start + prefix.length + wrapped.length;
+                } else {
+                    // Invalid, just return
+                    return input;
+                }
+            } else {
+                // Output till the next $ or $$
+                const offset = match.index + 1 + start;
+                const endDollar = endRegex.exec(input.substr(offset));
+                if (endDollar) {
+                    const length = endDollar.index + 1 + offset;
+                    output.push(input.substr(start, length));
+                    start = start + length;
+                } else {
+                    // Invalid, just return
+                    return input;
+                }
+            }
+        } else {
+            // No more matches
+            output.push(input.substr(start));
+            start = input.length;
+        }
     }
-
-    return indexes;
+    return output.join('');
 }
diff --git a/src/test/datascience/latexManipulation.unit.test.ts b/src/test/datascience/latexManipulation.unit.test.ts
@@ -85,18 +85,72 @@ $$
 $$
 `;
 
-    test('Latex - Equations don\'t have $$', () => {
+    const markdown4 = `
+$$
+\\begin{equation*}
+\\mathbf{V}_1 \\times \\mathbf{V}_2 = \\begin{vmatrix}
+\\mathbf{i} & \\mathbf{j} & \\mathbf{k} \\
+\\frac{\partial X}{\\partial u} & \\frac{\\partial Y}{\\partial u} & 0 \\\\
+\\frac{\partial X}{\\partial v} & \\frac{\\partial Y}{\\partial v} & 0
+\\end{vmatrix}
+\\end{equation*}
+$$
+`;
+
+    const markdown5 = `
+\\begin{equation*}
+P(E)   = {n \\choose k} p^k (1-p)^{ n-k}
+\\end{equation*}
+
+This expression $\\sqrt{3x-1}+(1+x)^2$ is an example of a TeX inline equation in a [Markdown-formatted](https://daringfireball.net/projects/markdown/) sentence.
+`;
+    const output5 = `
+
+$$
+\\begin{equation*}
+P(E)   = {n \\choose k} p^k (1-p)^{ n-k}
+\\end{equation*}
+$$
+
+
+This expression $\\sqrt{3x-1}+(1+x)^2$ is an example of a TeX inline equation in a [Markdown-formatted](https://daringfireball.net/projects/markdown/) sentence.
+`;
+
+    test('Latex - Equations don\'t have \$\$', () => {
         const result = fixLatexEquations(markdown1);
         expect(result).to.be.equal(output1, 'Result is incorrect');
     });
 
-    test('Latex - Equations have $', () => {
+    test('Latex - Equations have \$', () => {
         const result = fixLatexEquations(markdown2);
         expect(result).to.be.equal(markdown2, 'Result is incorrect');
     });
 
-    test('Latex - Multiple equations don\'t have $$', () => {
+    test('Latex - Multiple equations don\'t have \$\$', () => {
         const result = fixLatexEquations(markdown3);
         expect(result).to.be.equal(output3, 'Result is incorrect');
     });
+
+    test('Latex - All on the same line', () => {
+        const line = '\\begin{matrix}1 & 0\\0 & 1\\end{matrix}';
+        const after = '\n$$\n\\begin{matrix}1 & 0\\0 & 1\\end{matrix}\n$$\n';
+        const result = fixLatexEquations(line);
+        expect(result).to.be.equal(after, 'Result is incorrect');
+    });
+
+    test('Latex - Invalid', () => {
+        const invalid = '\n\\begin{eq*}do stuff\\end{eq}';
+        const result = fixLatexEquations(invalid);
+        expect(result).to.be.equal(invalid, 'Result should not have changed');
+    });
+
+    test('Latex - \$\$ already present', () => {
+        const result = fixLatexEquations(markdown4);
+        expect(result).to.be.equal(markdown4, 'Result should not have changed');
+    });
+
+    test('Latex - Multiple types', () => {
+        const result = fixLatexEquations(markdown5);
+        expect(result).to.be.equal(output5, 'Result is incorrect');
+    });
 });

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Some LaTeX equations do not print in notebooks or the interactive window.`