Skip to content

Commit 4244c43

Browse files
committed
Switch to HtmlSanitizer and SvgSanitizer
1 parent ed44495 commit 4244c43

File tree

3 files changed

+319
-78
lines changed

3 files changed

+319
-78
lines changed

composer.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@
2727
"symfony/yaml": "^6.0",
2828
"twig/twig": "^3.21",
2929
"league/csv": "~9.1",
30-
"laravel/tinker": "~2.0"
30+
"laravel/tinker": "~2.0",
31+
"symfony/html-sanitizer": "^6.1|^7.0",
32+
"enshrined/svg-sanitize": "^0.15|^0.16|^0.17|^0.18|^0.19|^0.20|^0.21|^0.22"
3133
},
3234
"require-dev": {
3335
"laravel/framework": "^12.0",

src/Html/HtmlBuilder.php

Lines changed: 19 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
use October\Rain\Support\Str;
44
use Illuminate\Routing\UrlGenerator;
5+
use Symfony\Component\HtmlSanitizer\HtmlSanitizer;
6+
use Symfony\Component\HtmlSanitizer\HtmlSanitizerConfig;
7+
use enshrined\svgSanitize\Sanitizer as SvgSanitizer;
58

69
/**
710
* HtmlBuilder builds HTML elements
@@ -505,80 +508,34 @@ public static function minify($html)
505508
}
506509

507510
/**
508-
* clean HTML to prevent most XSS attacks.
509-
* @todo shift to external library
510-
* @param string $html
511-
* @return string
511+
* clean HTML to prevent XSS attacks using DOM-based sanitization.
512512
*/
513-
public static function clean($html)
513+
public static function clean(string $html): string
514514
{
515-
do {
516-
$oldHtml = $html;
517-
518-
// Fix &entity\n;
519-
$html = str_replace(['&','<','>'], ['&','<','>'], $html);
520-
$html = preg_replace('#(&\#*\w+)[\x00-\x20]+;#u', "$1;", $html);
521-
$html = preg_replace('#(&\#x*)([0-9A-F]+);*#iu', "$1$2;", $html);
522-
$html = html_entity_decode($html, ENT_COMPAT, 'UTF-8');
523-
524-
// Remove any attribute starting with "on" or xmlns
525-
$html = preg_replace('#(<[^>]+[\x00-\x20\"\'\/])(on|xmlns)[^>]*>#iUu', "$1>", $html);
526-
527-
// Remove javascript: and vbscript: protocols
528-
$html = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/|(&\#\d+;)]*j[\x00-\x20]*a[\x00-\x20]*v[\x00-\x20]*a[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iUu', '$1=$2nojavascript...', $html);
529-
$html = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/|(&\#\d+;)]*v[\x00-\x20]*b[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iUu', '$1=$2novbscript...', $html);
530-
$html = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/|(&\#\d+;)]*-moz-binding[\x00-\x20]*:#Uu', '$1=$2nomozbinding...', $html);
531-
$html = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/|(&\#\d+;)]*data[\x00-\x20]*:#Uu', '$1=$2nodata...', $html);
532-
533-
// Only works in IE: <span style="width: expression(alert('Ping!'));"></span>
534-
$html = preg_replace('#(<[^>]+[\x00-\x20\"\'\/])style[^>]*>#iUu', "$1>", $html);
535-
536-
// Remove namespaced elements (we do not need them)
537-
$html = preg_replace('#</*\w+:\w[^>]*>#i', "", $html);
515+
$config = (new HtmlSanitizerConfig())
516+
->allowSafeElements()
517+
->allowRelativeLinks()
518+
->allowRelativeMedias();
538519

539-
// Remove really unwanted tags
540-
$html = preg_replace('#</*(applet|meta|xml|blink|link|style|script|embed|object|iframe|frame|frameset|ilayer|layer|bgsound|title|base)[^>]*>#i', "", $html);
541-
}
542-
while ($oldHtml !== $html);
520+
$sanitizer = new HtmlSanitizer($config);
543521

544-
return $html;
522+
return $sanitizer->sanitize($html);
545523
}
546524

547525
/**
548-
* clean XML to prevent most XSS attacks in vector files (SVGs). Same as clean except:
549-
* - allowed tags: xml, title, style
550-
* - allowed attributes: xmlns, style
551-
* @todo shift to external library
526+
* cleanVector sanitizes XML/SVG content to prevent XSS attacks using DOM-based sanitization.
527+
* Uses enshrined/svg-sanitize library which is ported from DOMPurify.
552528
*/
553529
public static function cleanVector(string $html): string
554530
{
555-
do {
556-
$oldHtml = $html;
557-
558-
// Fix &entity\n;
559-
$html = str_replace(['&amp;','&lt;','&gt;'], ['&amp;amp;','&amp;lt;','&amp;gt;'], $html);
560-
$html = preg_replace('#(&\#*\w+)[\x00-\x20]+;#u', "$1;", $html);
561-
$html = preg_replace('#(&\#x*)([0-9A-F]+);*#iu', "$1$2;", $html);
562-
$html = html_entity_decode($html, ENT_COMPAT, 'UTF-8');
563-
564-
// Remove any attribute starting with "on" or xmlns
565-
$html = preg_replace('#(<[^>]+[\x00-\x20\"\'\/])(on)[^>]*>#iUu', "$1>", $html);
531+
$sanitizer = new SvgSanitizer();
532+
$sanitizer->minify(false);
533+
$sanitizer->removeRemoteReferences(true);
534+
$sanitizer->removeXMLTag(true);
566535

567-
// Remove javascript: and vbscript: protocols
568-
$html = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/|(&\#\d+;)]*j[\x00-\x20]*a[\x00-\x20]*v[\x00-\x20]*a[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iUu', '$1=$2nojavascript...', $html);
569-
$html = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/|(&\#\d+;)]*v[\x00-\x20]*b[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iUu', '$1=$2novbscript...', $html);
570-
$html = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/|(&\#\d+;)]*-moz-binding[\x00-\x20]*:#Uu', '$1=$2nomozbinding...', $html);
571-
$html = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/|(&\#\d+;)]*data[\x00-\x20]*:#Uu', '$1=$2nodata...', $html);
572-
573-
// Remove namespaced elements (we do not need them)
574-
$html = preg_replace('#</*\w+:\w[^>]*>#i', "", $html);
575-
576-
// Remove really unwanted tags
577-
$html = preg_replace('#</*(applet|meta|blink|link|script|embed|object|iframe|frame|frameset|ilayer|layer|bgsound|base)[^>]*>#i', "", $html);
578-
}
579-
while ($oldHtml !== $html);
536+
$clean = $sanitizer->sanitize($html);
580537

581-
return $html;
538+
return $clean !== false ? $clean : '';
582539
}
583540

584541
/**

0 commit comments

Comments
 (0)