Skip to content
This repository was archived by the owner on Sep 11, 2024. It is now read-only.

Commit 853c89d

Browse files
committed
Fix spurious html tags like <shrug>
* Only render HTML tags in markdown if they're del tags * Consider non-allowed HTML tags as plain text nodes, so a message of just '<shrug>' doesn't need to be sent as HTML * Consequently rewrite isPlaintext to just look at the parse tree rather than making and gutting a renderer to walk the tree (now we're using a library that actually produces a meaningfgul parse tree). * Tweak when we put \n on text output to avoid putting \n on the end of messages. Fixes element-hq/element-web#3065
1 parent 63e47d8 commit 853c89d

File tree

1 file changed

+79
-40
lines changed

1 file changed

+79
-40
lines changed

src/Markdown.js

Lines changed: 79 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,45 @@ limitations under the License.
1515
*/
1616

1717
import commonmark from 'commonmark';
18+
import escape from 'lodash/escape';
19+
20+
const ALLOWED_HTML_TAGS = ['del'];
21+
22+
// These types of node are definitely text
23+
const TEXT_NODES = ['text', 'softbreak', 'linebreak', 'paragraph', 'document'];
24+
25+
function is_allowed_html_tag(node) {
26+
// Regex won't work for tags with attrs, but we only
27+
// allow <del> anyway.
28+
const matches = /^<\/?(.*)>$/.exec(node.literal);
29+
if (matches && matches.length == 2) {
30+
const tag = matches[1];
31+
return ALLOWED_HTML_TAGS.indexOf(tag) > -1;
32+
}
33+
return false;
34+
}
35+
36+
function html_if_tag_allowed(node) {
37+
if (is_allowed_html_tag(node)) {
38+
this.lit(node.literal);
39+
return;
40+
} else {
41+
this.lit(escape(node.literal));
42+
}
43+
}
44+
45+
/*
46+
* Returns true if the parse output containing the node
47+
* comprises multiple block level elements (ie. lines),
48+
* or false if it is only a single line.
49+
*/
50+
function is_multi_line(node) {
51+
var par = node;
52+
while (par.parent) {
53+
par = par.parent;
54+
}
55+
return par.firstChild != par.lastChild;
56+
}
1857

1958
/**
2059
* Class that wraps commonmark, adding the ability to see whether
@@ -30,29 +69,26 @@ export default class Markdown {
3069
}
3170

3271
isPlainText() {
33-
// we determine if the message requires markdown by
34-
// running the parser on the tokens with a dummy
35-
// rendered and seeing if any of the renderer's
36-
// functions are called other than those noted below.
37-
// TODO: can't we just examine the output of the parser?
38-
let is_plain = true;
39-
40-
function setNotPlain() {
41-
is_plain = false;
42-
}
43-
44-
const dummy_renderer = new commonmark.HtmlRenderer();
45-
for (const k of Object.keys(commonmark.HtmlRenderer.prototype)) {
46-
dummy_renderer[k] = setNotPlain;
72+
const walker = this.parsed.walker();
73+
74+
let ev;
75+
while ( (ev = walker.next()) ) {
76+
const node = ev.node;
77+
if (TEXT_NODES.indexOf(node.type) > -1) {
78+
// definitely text
79+
continue;
80+
} else if (node.type == 'html_inline' || node.type == 'html_block') {
81+
// if it's an allowed html tag, we need to render it and therefore
82+
// we will need to use HTML. If it's not allowed, it's not HTML since
83+
// we'll just be treating it as text.
84+
if (is_allowed_html_tag(node)) {
85+
return false;
86+
}
87+
} else {
88+
return false;
89+
}
4790
}
48-
// text and paragraph are just text
49-
dummy_renderer.text = function(t) { return t; };
50-
dummy_renderer.softbreak = function(t) { return t; };
51-
dummy_renderer.paragraph = function(t) { return t; };
52-
53-
dummy_renderer.render(this.parsed);
54-
55-
return is_plain;
91+
return true;
5692
}
5793

5894
toHTML() {
@@ -65,20 +101,27 @@ export default class Markdown {
65101
// 'inline', rather than unnecessarily wrapped in its own
66102
// p tag. If, however, we have multiple nodes, each gets
67103
// its own p tag to keep them as separate paragraphs.
68-
var par = node;
69-
while (par.parent) {
70-
par = par.parent;
71-
}
72-
if (par.firstChild != par.lastChild) {
104+
if (is_multi_line(node)) {
73105
real_paragraph.call(this, node, entering);
74106
}
75107
};
76108

109+
renderer.html_inline = html_if_tag_allowed;
110+
renderer.html_block = function(node) {
111+
// as with `paragraph`, we only insert line breaks
112+
// if there are multiple lines in the markdown.
113+
const isMultiLine = is_multi_line(node);
114+
115+
if (isMultiLine) this.cr();
116+
html_if_tag_allowed.call(this, node);
117+
if (isMultiLine) this.cr();
118+
}
119+
77120
return renderer.render(this.parsed);
78121
}
79122

80123
/*
81-
* Render the mrkdown message to plain text. That is, essentially
124+
* Render the markdown message to plain text. That is, essentially
82125
* just remove any backslashes escaping what would otherwise be
83126
* markdown syntax
84127
* (to fix https://github.com/vector-im/riot-web/issues/2870)
@@ -96,22 +139,18 @@ export default class Markdown {
96139
};
97140

98141
renderer.paragraph = function(node, entering) {
99-
// If there is only one top level node, just return the
100-
// bare text: it's a single line of text and so should be
101-
// 'inline', rather than unnecessarily wrapped in its own
102-
// p tag. If, however, we have multiple nodes, each gets
103-
// its own p tag to keep them as separate paragraphs.
104-
var par = node;
105-
while (par.parent) {
106-
node = par;
107-
par = par.parent;
108-
}
109-
if (node != par.lastChild) {
110-
if (!entering) {
142+
// as with toHTML, only append lines to paragraphs if there are
143+
// multiple paragraphs
144+
if (is_multi_line(node)) {
145+
if (!entering && node.next) {
111146
this.lit('\n\n');
112147
}
113148
}
114149
};
150+
renderer.html_block = function(node) {
151+
this.lit(node.literal);
152+
if (is_multi_line(node) && node.next) this.lit('\n\n');
153+
}
115154

116155
return renderer.render(this.parsed);
117156
}

0 commit comments

Comments
 (0)