|
1 | 1 | // Copyright (c) Microsoft Corporation. All rights reserved.
|
2 | 2 | // Licensed under the MIT License.
|
| 3 | + |
| 4 | +// tslint:disable-next-line:no-require-imports no-var-requires |
| 5 | +const _escapeRegExp = require('lodash/escapeRegExp') as typeof import('lodash/escapeRegExp'); |
| 6 | + |
3 | 7 | // Adds '$$' to latex formulas that don't have a '$', allowing users to input the formula directly.
|
| 8 | +// |
| 9 | +// The general algorithm here is: |
| 10 | +// Search for either $$ or $ or a \begin{name} item. |
| 11 | +// If a $$ or $ is found, output up to the next dollar sign |
| 12 | +// If a \begin{name} is found, find the matching \end{name}, wrap the section in $$ and output up to the \end. |
| 13 | +// |
| 14 | +// LaTeX seems to follow the pattern of \begin{name} or is escaped with $$ or $. See here for a bunch of examples: |
| 15 | +// https://jupyter-notebook.readthedocs.io/en/stable/examples/Notebook/Typesetting%20Equations.html |
4 | 16 | export function fixLatexEquations(input: string): string {
|
5 |
| - const block = '\n$$\n'; |
| 17 | + const output: string[] = []; |
6 | 18 |
|
7 |
| - const beginIndexes = getAllIndexesOfRegex(input, /\\begin\{[a-z]*\*?\}/g); |
8 |
| - const endIndexes = getAllIndexesOfRegex(input, /\\end\{[a-z]*\*?\}/g); |
| 19 | + // Search for begin/end pairs, outputting as we go |
| 20 | + let start = 0; |
9 | 21 |
|
10 |
| - if (beginIndexes.length === endIndexes.length) { |
11 |
| - for (let i = 0; i < beginIndexes.length; i += 1) { |
12 |
| - const endOfEnd = input.indexOf('}', endIndexes[i] + 1 + 8 * i); |
| 22 | + // Loop until we run out string |
| 23 | + while (start < input.length) { |
| 24 | + // Check $$, $ and begin |
| 25 | + const dollars = /\$\$/.exec(input.substr(start)); |
| 26 | + const dollar = /\$/.exec(input.substr(start)); |
| 27 | + const begin = /\\begin\{([a-z,\*]+)\}/.exec(input.substr(start)); |
| 28 | + let endRegex = /\$\$/; |
| 29 | + let endRegexLength = 2; |
13 | 30 |
|
14 |
| - // Edge case, if the input starts with the latex formula we add the block at the beggining. |
15 |
| - if (beginIndexes[i] === 0 && input[beginIndexes[i]] === '\\') { |
16 |
| - input = block + input.slice(0, endOfEnd + 1) + block + input.slice(endOfEnd + 1, input.length); |
17 |
| - // Normal case, if the latex formula starts with a '$' we don't do anything. |
18 |
| - // Otherwise, we insert the block at the beginning and ending of the latex formula. |
19 |
| - } else if (input[beginIndexes[i] - 1] !== '$') { |
20 |
| - input = input.slice(0, beginIndexes[i] + block.length * 2 * i) + block + input.slice(beginIndexes[i] + block.length * 2 * i, endOfEnd + 1) + block + input.slice(endOfEnd + 1, input.length); |
21 |
| - } |
| 31 | + // Pick the first that matches |
| 32 | + let match = dollars; |
| 33 | + let isBeginMatch = false; |
| 34 | + if (!match || (dollar && dollar.index < match.index)) { |
| 35 | + match = dollar; |
| 36 | + endRegex = /\$/; |
| 37 | + endRegexLength = 1; |
| 38 | + } |
| 39 | + if (!match || (begin && begin.index < match.index)) { |
| 40 | + match = begin; |
| 41 | + endRegex = begin ? new RegExp(`\\\\end\\{${_escapeRegExp(begin[1])}\\}`) : /\$/; |
| 42 | + endRegexLength = begin ? `\\end{${begin[1]}}`.length : 1; |
| 43 | + isBeginMatch = true; |
22 | 44 | }
|
23 |
| - } |
24 |
| - |
25 |
| - return input; |
26 |
| -} |
27 |
| - |
28 |
| -function getAllIndexesOfRegex(arr: string, value: RegExp): number[] { |
29 |
| - const indexes = []; |
30 |
| - let result; |
31 | 45 |
|
32 |
| - // tslint:disable-next-line: no-conditional-assignment |
33 |
| - while ((result = value.exec(arr)) !== null) { |
34 |
| - indexes.push(result.index); |
| 46 | + // Output this match |
| 47 | + if (match) { |
| 48 | + if (isBeginMatch) { |
| 49 | + // Begin match is a little more complicated. |
| 50 | + const offset = match.index + start; |
| 51 | + const end = endRegex.exec(input.substr(start)); |
| 52 | + if (end) { |
| 53 | + const prefix = input.substr(start, match.index); |
| 54 | + const wrapped = input.substr(offset, endRegexLength + end.index - match.index); |
| 55 | + output.push(`${prefix}\n$$\n${wrapped}\n$$\n`); |
| 56 | + start = start + prefix.length + wrapped.length; |
| 57 | + } else { |
| 58 | + // Invalid, just return |
| 59 | + return input; |
| 60 | + } |
| 61 | + } else { |
| 62 | + // Output till the next $ or $$ |
| 63 | + const offset = match.index + 1 + start; |
| 64 | + const endDollar = endRegex.exec(input.substr(offset)); |
| 65 | + if (endDollar) { |
| 66 | + const length = endDollar.index + 1 + offset; |
| 67 | + output.push(input.substr(start, length)); |
| 68 | + start = start + length; |
| 69 | + } else { |
| 70 | + // Invalid, just return |
| 71 | + return input; |
| 72 | + } |
| 73 | + } |
| 74 | + } else { |
| 75 | + // No more matches |
| 76 | + output.push(input.substr(start)); |
| 77 | + start = input.length; |
| 78 | + } |
35 | 79 | }
|
36 |
| - |
37 |
| - return indexes; |
| 80 | + return output.join(''); |
38 | 81 | }
|
0 commit comments