22// for details. All rights reserved. Use of this source code is governed by a
33// BSD-style license that can be found in the LICENSE file.
44
5- import 'package:html/parser.dart' show parse;
5+ import 'package:html/dom.dart' as dom;
6+ import 'package:html/parser.dart' show parseFragment;
7+
68import 'package:markdown/markdown.dart' as md;
79import 'package:meta/meta.dart' ;
810
911abstract class DocumentationRenderer {
1012 DocumentationRenderResult render (
1113 List <md.Node > nodes, {
1214 @required bool processFullDocs,
15+ @required bool sanitizeHtml,
1316 });
1417}
1518
@@ -20,16 +23,16 @@ class DocumentationRendererHtml implements DocumentationRenderer {
2023 DocumentationRenderResult render (
2124 List <md.Node > nodes, {
2225 @required bool processFullDocs,
26+ @required bool sanitizeHtml,
2327 }) {
2428 if (nodes.isEmpty) {
2529 return DocumentationRenderResult .empty;
2630 }
31+
2732 var rawHtml = md.HtmlRenderer ().render (nodes);
28- var asHtmlDocument = parse (rawHtml);
29- for (var s in asHtmlDocument.querySelectorAll ('script' )) {
30- s.remove ();
31- }
32- for (var pre in asHtmlDocument.querySelectorAll ('pre' )) {
33+ var asHtmlFragment = parseFragment (rawHtml);
34+
35+ for (var pre in asHtmlFragment.querySelectorAll ('pre' )) {
3336 if (pre.children.length > 1 && pre.children.first.localName != 'code' ) {
3437 continue ;
3538 }
@@ -44,16 +47,21 @@ class DocumentationRendererHtml implements DocumentationRenderer {
4447 // Assume the user intended Dart if there are no other classes present.
4548 if (! specifiesLanguage) pre.classes.add ('language-dart' );
4649 }
50+
51+ if (sanitizeHtml) {
52+ _sanitize (asHtmlFragment);
53+ }
54+
4755 var asHtml = '' ;
4856
4957 if (processFullDocs) {
5058 // `trim` fixes an issue with line ending differences between Mac and
5159 // Windows.
52- asHtml = asHtmlDocument.body.innerHtml ? .trim ();
60+ asHtml = asHtmlFragment.outerHtml .trim ();
5361 }
54- var asOneLiner = asHtmlDocument.body .children.isEmpty
62+ var asOneLiner = asHtmlFragment .children.isEmpty
5563 ? ''
56- : asHtmlDocument.body .children.first.innerHtml;
64+ : asHtmlFragment .children.first.innerHtml;
5765
5866 return DocumentationRenderResult (asHtml: asHtml, asOneLiner: asOneLiner);
5967 }
@@ -68,3 +76,253 @@ class DocumentationRenderResult {
6876 const DocumentationRenderResult (
6977 {@required this .asHtml, @required this .asOneLiner});
7078}
79+
80+ bool _allowClassName (String className) =>
81+ className == 'deprecated' || className.startsWith ('language-' );
82+
83+ Iterable <String > _addLinkRel (String uri) {
84+ final u = Uri .tryParse (uri);
85+ if (u.host.isNotEmpty) {
86+ // TODO(jonasfj): Consider allowing non-ugc links for trusted sites.
87+ return ['ugc' ];
88+ }
89+ return [];
90+ }
91+
92+ void _sanitize (dom.Node node) {
93+ if (node is dom.Element ) {
94+ final tagName = node.localName.toUpperCase ();
95+ if (! _allowedElements.contains (tagName)) {
96+ node.remove ();
97+ return ;
98+ }
99+ node.attributes.removeWhere ((k, v) {
100+ final attrName = k.toString ();
101+ if (attrName == 'class' ) {
102+ node.classes.removeWhere ((cn) => ! _allowClassName (cn));
103+ return node.classes.isEmpty;
104+ }
105+ return ! _isAttributeAllowed (tagName, attrName, v);
106+ });
107+ if (tagName == 'A' ) {
108+ final href = node.attributes['href' ];
109+ if (href != null ) {
110+ final rels = _addLinkRel (href);
111+ if (rels != null && rels.isNotEmpty) {
112+ node.attributes['rel' ] = rels.join (' ' );
113+ }
114+ }
115+ }
116+ }
117+ if (node.hasChildNodes ()) {
118+ // doing it in reverse order, because we could otherwise skip one, when a
119+ // node is removed...
120+ for (var i = node.nodes.length - 1 ; i >= 0 ; i-- ) {
121+ _sanitize (node.nodes[i]);
122+ }
123+ }
124+ }
125+
126+ bool _isAttributeAllowed (String tagName, String attrName, String value) {
127+ if (_alwaysAllowedAttributes.contains (attrName)) return true ;
128+
129+ // Special validators for special attributes on special tags (href/src/cite)
130+ final attributeValidators = _elementAttributeValidators[tagName];
131+ if (attributeValidators == null ) {
132+ return false ;
133+ }
134+
135+ final validator = attributeValidators[attrName];
136+ if (validator == null ) {
137+ return false ;
138+ }
139+
140+ return validator (value);
141+ }
142+
143+ // Inspired by the set of HTML tags allowed in GFM.
144+ final _allowedElements = < String > {
145+ 'H1' ,
146+ 'H2' ,
147+ 'H3' ,
148+ 'H4' ,
149+ 'H5' ,
150+ 'H6' ,
151+ 'H7' ,
152+ 'H8' ,
153+ 'BR' ,
154+ 'B' ,
155+ 'I' ,
156+ 'STRONG' ,
157+ 'EM' ,
158+ 'A' ,
159+ 'PRE' ,
160+ 'CODE' ,
161+ 'IMG' ,
162+ 'TT' ,
163+ 'DIV' ,
164+ 'INS' ,
165+ 'DEL' ,
166+ 'SUP' ,
167+ 'SUB' ,
168+ 'P' ,
169+ 'OL' ,
170+ 'UL' ,
171+ 'TABLE' ,
172+ 'THEAD' ,
173+ 'TBODY' ,
174+ 'TFOOT' ,
175+ 'BLOCKQUOTE' ,
176+ 'DL' ,
177+ 'DT' ,
178+ 'DD' ,
179+ 'KBD' ,
180+ 'Q' ,
181+ 'SAMP' ,
182+ 'VAR' ,
183+ 'HR' ,
184+ 'RUBY' ,
185+ 'RT' ,
186+ 'RP' ,
187+ 'LI' ,
188+ 'TR' ,
189+ 'TD' ,
190+ 'TH' ,
191+ 'S' ,
192+ 'STRIKE' ,
193+ 'SUMMARY' ,
194+ 'DETAILS' ,
195+ 'CAPTION' ,
196+ 'FIGURE' ,
197+ 'FIGCAPTION' ,
198+ 'ABBR' ,
199+ 'BDO' ,
200+ 'CITE' ,
201+ 'DFN' ,
202+ 'MARK' ,
203+ 'SMALL' ,
204+ 'SPAN' ,
205+ 'TIME' ,
206+ 'WBR' ,
207+ };
208+
209+ // Inspired by the set of HTML attributes allowed in GFM.
210+ final _alwaysAllowedAttributes = < String > {
211+ 'abbr' ,
212+ 'accept' ,
213+ 'accept-charset' ,
214+ 'accesskey' ,
215+ 'action' ,
216+ 'align' ,
217+ 'alt' ,
218+ 'aria-describedby' ,
219+ 'aria-hidden' ,
220+ 'aria-label' ,
221+ 'aria-labelledby' ,
222+ 'axis' ,
223+ 'border' ,
224+ 'cellpadding' ,
225+ 'cellspacing' ,
226+ 'char' ,
227+ 'charoff' ,
228+ 'charset' ,
229+ 'checked' ,
230+ 'clear' ,
231+ 'cols' ,
232+ 'colspan' ,
233+ 'color' ,
234+ 'compact' ,
235+ 'coords' ,
236+ 'datetime' ,
237+ 'dir' ,
238+ 'disabled' ,
239+ 'enctype' ,
240+ 'for' ,
241+ 'frame' ,
242+ 'headers' ,
243+ 'height' ,
244+ 'hreflang' ,
245+ 'hspace' ,
246+ 'ismap' ,
247+ 'label' ,
248+ 'lang' ,
249+ 'maxlength' ,
250+ 'media' ,
251+ 'method' ,
252+ 'multiple' ,
253+ 'name' ,
254+ 'nohref' ,
255+ 'noshade' ,
256+ 'nowrap' ,
257+ 'open' ,
258+ 'prompt' ,
259+ 'readonly' ,
260+ 'rel' ,
261+ 'rev' ,
262+ 'rows' ,
263+ 'rowspan' ,
264+ 'rules' ,
265+ 'scope' ,
266+ 'selected' ,
267+ 'shape' ,
268+ 'size' ,
269+ 'span' ,
270+ 'start' ,
271+ 'summary' ,
272+ 'tabindex' ,
273+ 'target' ,
274+ 'title' ,
275+ 'type' ,
276+ 'usemap' ,
277+ 'valign' ,
278+ 'value' ,
279+ 'vspace' ,
280+ 'width' ,
281+ 'itemprop' ,
282+ };
283+
284+ bool _alwaysAllowed (String _) => true ;
285+
286+ bool _validLink (String url) {
287+ try {
288+ final uri = Uri .parse (url);
289+ return uri.isScheme ('https' ) ||
290+ uri.isScheme ('http' ) ||
291+ uri.isScheme ('mailto' ) ||
292+ ! uri.hasScheme;
293+ } on FormatException {
294+ return false ;
295+ }
296+ }
297+
298+ bool _validUrl (String url) {
299+ try {
300+ final uri = Uri .parse (url);
301+ return uri.isScheme ('https' ) || uri.isScheme ('http' ) || ! uri.hasScheme;
302+ } on FormatException {
303+ return false ;
304+ }
305+ }
306+
307+ final _citeAttributeValidator = < String , bool Function (String )> {
308+ 'cite' : _validUrl,
309+ };
310+
311+ final _elementAttributeValidators =
312+ < String , Map <String , bool Function (String )>> {
313+ 'A' : {
314+ 'href' : _validLink,
315+ },
316+ 'IMG' : {
317+ 'src' : _validUrl,
318+ 'longdesc' : _validUrl,
319+ },
320+ 'DIV' : {
321+ 'itemscope' : _alwaysAllowed,
322+ 'itemtype' : _alwaysAllowed,
323+ },
324+ 'BLOCKQUOTE' : _citeAttributeValidator,
325+ 'DEL' : _citeAttributeValidator,
326+ 'INS' : _citeAttributeValidator,
327+ 'Q' : _citeAttributeValidator,
328+ };
0 commit comments