Skip to content
This repository was archived by the owner on Sep 11, 2024. It is now read-only.

Commit ae2082b

Browse files
authored
Merge pull request #5515 from rda0/maths-parsing-latex
Use LaTeX and TeX delimiters by default
2 parents cf06948 + 31e85ec commit ae2082b

File tree

2 files changed

+84
-26
lines changed

2 files changed

+84
-26
lines changed

src/editor/deserialize.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,11 +143,11 @@ function parseElement(n: HTMLElement, partCreator: PartCreator, lastNode: HTMLEl
143143
// math nodes are translated back into delimited latex strings
144144
if (n.hasAttribute("data-mx-maths")) {
145145
const delimLeft = (n.nodeName == "SPAN") ?
146-
(SdkConfig.get()['latex_maths_delims'] || {})['inline_left'] || "$" :
147-
(SdkConfig.get()['latex_maths_delims'] || {})['display_left'] || "$$";
146+
((SdkConfig.get()['latex_maths_delims'] || {})['inline'] || {})['left'] || "\\(" :
147+
((SdkConfig.get()['latex_maths_delims'] || {})['display'] || {})['left'] || "\\[";
148148
const delimRight = (n.nodeName == "SPAN") ?
149-
(SdkConfig.get()['latex_maths_delims'] || {})['inline_right'] || "$" :
150-
(SdkConfig.get()['latex_maths_delims'] || {})['display_right'] || "$$";
149+
((SdkConfig.get()['latex_maths_delims'] || {})['inline'] || {})['right'] || "\\)" :
150+
((SdkConfig.get()['latex_maths_delims'] || {})['display'] || {})['right'] || "\\]";
151151
const tex = n.getAttribute("data-mx-maths");
152152
return partCreator.plain(delimLeft + tex + delimRight);
153153
} else if (!checkDescendInto(n)) {

src/editor/serialize.ts

Lines changed: 80 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -47,21 +47,65 @@ export function mdSerialize(model: EditorModel) {
4747

4848
export function htmlSerializeIfNeeded(model: EditorModel, {forceHTML = false} = {}) {
4949
let md = mdSerialize(model);
50+
// copy of raw input to remove unwanted math later
51+
const orig = md;
5052

5153
if (SettingsStore.getValue("feature_latex_maths")) {
52-
const displayPattern = (SdkConfig.get()['latex_maths_delims'] || {})['display_pattern'] ||
53-
"\\$\\$(([^$]|\\\\\\$)*)\\$\\$";
54-
const inlinePattern = (SdkConfig.get()['latex_maths_delims'] || {})['inline_pattern'] ||
55-
"\\$(([^$]|\\\\\\$)*)\\$";
56-
57-
md = md.replace(RegExp(displayPattern, "gm"), function(m, p1) {
58-
const p1e = AllHtmlEntities.encode(p1);
59-
return `<div data-mx-maths="${p1e}">\n\n</div>\n\n`;
60-
});
61-
62-
md = md.replace(RegExp(inlinePattern, "gm"), function(m, p1) {
63-
const p1e = AllHtmlEntities.encode(p1);
64-
return `<span data-mx-maths="${p1e}"></span>`;
54+
const patternNames = ['tex', 'latex'];
55+
const patternTypes = ['display', 'inline'];
56+
const patternDefaults = {
57+
"tex": {
58+
// detect math with tex delimiters, inline: $...$, display $$...$$
59+
// preferably use negative lookbehinds, not supported in all major browsers:
60+
// const displayPattern = "^(?<!\\\\)\\$\\$(?![ \\t])(([^$]|\\\\\\$)+?)\\$\\$$";
61+
// const inlinePattern = "(?:^|\\s)(?<!\\\\)\\$(?!\\s)(([^$]|\\\\\\$)+?)(?<!\\\\|\\s)\\$";
62+
63+
// conditions for display math detection $$...$$:
64+
// - pattern starts at beginning of line or is not prefixed with backslash or dollar
65+
// - left delimiter ($$) is not escaped by backslash
66+
"display": "(^|[^\\\\$])\\$\\$(([^$]|\\\\\\$)+?)\\$\\$",
67+
68+
// conditions for inline math detection $...$:
69+
// - pattern starts at beginning of line, follows whitespace character or punctuation
70+
// - pattern is on a single line
71+
// - left and right delimiters ($) are not escaped by backslashes
72+
// - left delimiter is not followed by whitespace character
73+
// - right delimiter is not prefixed with whitespace character
74+
"inline":
75+
"(^|\\s|[.,!?:;])(?!\\\\)\\$(?!\\s)(([^$\\n]|\\\\\\$)*([^\\\\\\s\\$]|\\\\\\$)(?:\\\\\\$)?)\\$",
76+
},
77+
"latex": {
78+
// detect math with latex delimiters, inline: \(...\), display \[...\]
79+
80+
// conditions for display math detection \[...\]:
81+
// - pattern starts at beginning of line or is not prefixed with backslash
82+
// - pattern is not empty
83+
"display": "(^|[^\\\\])\\\\\\[(?!\\\\\\])(.*?)\\\\\\]",
84+
85+
// conditions for inline math detection \(...\):
86+
// - pattern starts at beginning of line or is not prefixed with backslash
87+
// - pattern is not empty
88+
"inline": "(^|[^\\\\])\\\\\\((?!\\\\\\))(.*?)\\\\\\)",
89+
},
90+
};
91+
92+
patternNames.forEach(function(patternName) {
93+
patternTypes.forEach(function(patternType) {
94+
// get the regex replace pattern from config or use the default
95+
const pattern = (((SdkConfig.get()["latex_maths_delims"] ||
96+
{})[patternType] || {})["pattern"] || {})[patternName] ||
97+
patternDefaults[patternName][patternType];
98+
99+
md = md.replace(RegExp(pattern, "gms"), function(m, p1, p2) {
100+
const p2e = AllHtmlEntities.encode(p2);
101+
switch (patternType) {
102+
case "display":
103+
return `${p1}<div data-mx-maths="${p2e}">\n\n</div>\n\n`;
104+
case "inline":
105+
return `${p1}<span data-mx-maths="${p2e}"></span>`;
106+
}
107+
});
108+
});
65109
});
66110

67111
// make sure div tags always start on a new line, otherwise it will confuse
@@ -73,15 +117,29 @@ export function htmlSerializeIfNeeded(model: EditorModel, {forceHTML = false} =
73117
if (!parser.isPlainText() || forceHTML) {
74118
// feed Markdown output to HTML parser
75119
const phtml = cheerio.load(parser.toHTML(),
76-
{ _useHtmlParser2: true, decodeEntities: false })
77-
78-
// add fallback output for latex math, which should not be interpreted as markdown
79-
phtml('div, span').each(function(i, e) {
80-
const tex = phtml(e).attr('data-mx-maths')
81-
if (tex) {
82-
phtml(e).html(`<code>${tex}</code>`)
83-
}
84-
});
120+
{ _useHtmlParser2: true, decodeEntities: false });
121+
122+
if (SettingsStore.getValue("feature_latex_maths")) {
123+
// original Markdown without LaTeX replacements
124+
const parserOrig = new Markdown(orig);
125+
const phtmlOrig = cheerio.load(parserOrig.toHTML(),
126+
{ _useHtmlParser2: true, decodeEntities: false });
127+
128+
// since maths delimiters are handled before Markdown,
129+
// code blocks could contain mangled content.
130+
// replace code blocks with original content
131+
phtmlOrig('code').each(function(i) {
132+
phtml('code').eq(i).text(phtmlOrig('code').eq(i).text());
133+
});
134+
135+
// add fallback output for latex math, which should not be interpreted as markdown
136+
phtml('div, span').each(function(i, e) {
137+
const tex = phtml(e).attr('data-mx-maths')
138+
if (tex) {
139+
phtml(e).html(`<code>${tex}</code>`)
140+
}
141+
});
142+
}
85143
return phtml.html();
86144
}
87145
// ensure removal of escape backslashes in non-Markdown messages

0 commit comments

Comments
 (0)