@@ -15,110 +15,143 @@ limitations under the License.
15
15
*/
16
16
17
17
import commonmark from 'commonmark' ;
18
+ import escape from 'lodash/escape' ;
19
+
20
+ const ALLOWED_HTML_TAGS = [ 'del' ] ;
21
+
22
+ // These types of node are definitely text
23
+ const TEXT_NODES = [ 'text' , 'softbreak' , 'linebreak' , 'paragraph' , 'document' ] ;
24
+
25
+ function is_allowed_html_tag ( node ) {
26
+ // Regex won't work for tags with attrs, but we only
27
+ // allow <del> anyway.
28
+ const matches = / ^ < \/ ? ( .* ) > $ / . exec ( node . literal ) ;
29
+ if ( matches && matches . length == 2 ) {
30
+ const tag = matches [ 1 ] ;
31
+ return ALLOWED_HTML_TAGS . indexOf ( tag ) > - 1 ;
32
+ }
33
+ return false ;
34
+ }
35
+
36
+ function html_if_tag_allowed ( node ) {
37
+ if ( is_allowed_html_tag ( node ) ) {
38
+ this . lit ( node . literal ) ;
39
+ return ;
40
+ } else {
41
+ this . lit ( escape ( node . literal ) ) ;
42
+ }
43
+ }
44
+
45
+ /*
46
+ * Returns true if the parse output containing the node
47
+ * comprises multiple block level elements (ie. lines),
48
+ * or false if it is only a single line.
49
+ */
50
+ function is_multi_line ( node ) {
51
+ var par = node ;
52
+ while ( par . parent ) {
53
+ par = par . parent ;
54
+ }
55
+ return par . firstChild != par . lastChild ;
56
+ }
18
57
19
58
/**
20
- * Class that wraps marked , adding the ability to see whether
59
+ * Class that wraps commonmark , adding the ability to see whether
21
60
* a given message actually uses any markdown syntax or whether
22
61
* it's plain text.
23
62
*/
24
63
export default class Markdown {
25
64
constructor ( input ) {
26
65
this . input = input ;
27
- this . parser = new commonmark . Parser ( ) ;
28
- this . renderer = new commonmark . HtmlRenderer ( { safe : false } ) ;
66
+
67
+ const parser = new commonmark . Parser ( ) ;
68
+ this . parsed = parser . parse ( this . input ) ;
29
69
}
30
70
31
71
isPlainText ( ) {
32
- // we determine if the message requires markdown by
33
- // running the parser on the tokens with a dummy
34
- // rendered and seeing if any of the renderer's
35
- // functions are called other than those noted below.
36
- // In case you were wondering, no we can't just examine
37
- // the tokens because the tokens we have are only the
38
- // output of the *first* tokenizer: any line-based
39
- // markdown is processed by marked within Parser by
40
- // the 'inline lexer'...
41
- let is_plain = true ;
42
-
43
- function setNotPlain ( ) {
44
- is_plain = false ;
45
- }
46
-
47
- const dummy_renderer = new commonmark . HtmlRenderer ( ) ;
48
- for ( const k of Object . keys ( commonmark . HtmlRenderer . prototype ) ) {
49
- dummy_renderer [ k ] = setNotPlain ;
72
+ const walker = this . parsed . walker ( ) ;
73
+
74
+ let ev ;
75
+ while ( ( ev = walker . next ( ) ) ) {
76
+ const node = ev . node ;
77
+ if ( TEXT_NODES . indexOf ( node . type ) > - 1 ) {
78
+ // definitely text
79
+ continue ;
80
+ } else if ( node . type == 'html_inline' || node . type == 'html_block' ) {
81
+ // if it's an allowed html tag, we need to render it and therefore
82
+ // we will need to use HTML. If it's not allowed, it's not HTML since
83
+ // we'll just be treating it as text.
84
+ if ( is_allowed_html_tag ( node ) ) {
85
+ return false ;
86
+ }
87
+ } else {
88
+ return false ;
89
+ }
50
90
}
51
- // text and paragraph are just text
52
- dummy_renderer . text = function ( t ) { return t ; } ;
53
- dummy_renderer . softbreak = function ( t ) { return t ; } ;
54
- dummy_renderer . paragraph = function ( t ) { return t ; } ;
55
-
56
- const dummy_parser = new commonmark . Parser ( ) ;
57
- dummy_renderer . render ( dummy_parser . parse ( this . input ) ) ;
58
-
59
- return is_plain ;
91
+ return true ;
60
92
}
61
93
62
94
toHTML ( ) {
63
- const real_paragraph = this . renderer . paragraph ;
95
+ const renderer = new commonmark . HtmlRenderer ( { safe : false } ) ;
96
+ const real_paragraph = renderer . paragraph ;
64
97
65
- this . renderer . paragraph = function ( node , entering ) {
98
+ renderer . paragraph = function ( node , entering ) {
66
99
// If there is only one top level node, just return the
67
100
// bare text: it's a single line of text and so should be
68
101
// 'inline', rather than unnecessarily wrapped in its own
69
102
// p tag. If, however, we have multiple nodes, each gets
70
103
// its own p tag to keep them as separate paragraphs.
71
- var par = node ;
72
- while ( par . parent ) {
73
- par = par . parent ;
74
- }
75
- if ( par . firstChild != par . lastChild ) {
104
+ if ( is_multi_line ( node ) ) {
76
105
real_paragraph . call ( this , node , entering ) ;
77
106
}
78
107
} ;
79
108
80
- var parsed = this . parser . parse ( this . input ) ;
81
- var rendered = this . renderer . render ( parsed ) ;
109
+ renderer . html_inline = html_if_tag_allowed ;
110
+ renderer . html_block = function ( node ) {
111
+ // as with `paragraph`, we only insert line breaks
112
+ // if there are multiple lines in the markdown.
113
+ const isMultiLine = is_multi_line ( node ) ;
82
114
83
- this . renderer . paragraph = real_paragraph ;
115
+ if ( isMultiLine ) this . cr ( ) ;
116
+ html_if_tag_allowed . call ( this , node ) ;
117
+ if ( isMultiLine ) this . cr ( ) ;
118
+ }
84
119
85
- return rendered ;
120
+ return renderer . render ( this . parsed ) ;
86
121
}
87
122
123
+ /*
124
+ * Render the markdown message to plain text. That is, essentially
125
+ * just remove any backslashes escaping what would otherwise be
126
+ * markdown syntax
127
+ * (to fix https://github.com/vector-im/riot-web/issues/2870)
128
+ */
88
129
toPlaintext ( ) {
89
- const real_paragraph = this . renderer . paragraph ;
130
+ const renderer = new commonmark . HtmlRenderer ( { safe : false } ) ;
131
+ const real_paragraph = renderer . paragraph ;
90
132
91
133
// The default `out` function only sends the input through an XML
92
134
// escaping function, which causes messages to be entity encoded,
93
135
// which we don't want in this case.
94
- this . renderer . out = function ( s ) {
136
+ renderer . out = function ( s ) {
95
137
// The `lit` function adds a string literal to the output buffer.
96
138
this . lit ( s ) ;
97
139
} ;
98
140
99
- this . renderer . paragraph = function ( node , entering ) {
100
- // If there is only one top level node, just return the
101
- // bare text: it's a single line of text and so should be
102
- // 'inline', rather than unnecessarily wrapped in its own
103
- // p tag. If, however, we have multiple nodes, each gets
104
- // its own p tag to keep them as separate paragraphs.
105
- var par = node ;
106
- while ( par . parent ) {
107
- node = par ;
108
- par = par . parent ;
109
- }
110
- if ( node != par . lastChild ) {
111
- if ( ! entering ) {
141
+ renderer . paragraph = function ( node , entering ) {
142
+ // as with toHTML, only append lines to paragraphs if there are
143
+ // multiple paragraphs
144
+ if ( is_multi_line ( node ) ) {
145
+ if ( ! entering && node . next ) {
112
146
this . lit ( '\n\n' ) ;
113
147
}
114
148
}
115
149
} ;
150
+ renderer . html_block = function ( node ) {
151
+ this . lit ( node . literal ) ;
152
+ if ( is_multi_line ( node ) && node . next ) this . lit ( '\n\n' ) ;
153
+ }
116
154
117
- var parsed = this . parser . parse ( this . input ) ;
118
- var rendered = this . renderer . render ( parsed ) ;
119
-
120
- this . renderer . paragraph = real_paragraph ;
121
-
122
- return rendered ;
155
+ return renderer . render ( this . parsed ) ;
123
156
}
124
157
}
0 commit comments