|
2 | 2 |
|
3 | 3 | use October\Rain\Support\Str; |
4 | 4 | use Illuminate\Routing\UrlGenerator; |
| 5 | +use Symfony\Component\HtmlSanitizer\HtmlSanitizer; |
| 6 | +use Symfony\Component\HtmlSanitizer\HtmlSanitizerConfig; |
| 7 | +use enshrined\svgSanitize\Sanitizer as SvgSanitizer; |
5 | 8 |
|
6 | 9 | /** |
7 | 10 | * HtmlBuilder builds HTML elements |
@@ -505,80 +508,34 @@ public static function minify($html) |
505 | 508 | } |
506 | 509 |
|
507 | 510 | /** |
508 | | - * clean HTML to prevent most XSS attacks. |
509 | | - * @todo shift to external library |
510 | | - * @param string $html |
511 | | - * @return string |
| 511 | + * clean HTML to prevent XSS attacks using DOM-based sanitization. |
512 | 512 | */ |
513 | | - public static function clean($html) |
| 513 | + public static function clean(string $html): string |
514 | 514 | { |
515 | | - do { |
516 | | - $oldHtml = $html; |
517 | | - |
518 | | - // Fix &entity\n; |
519 | | - $html = str_replace(['&','<','>'], ['&amp;','&lt;','&gt;'], $html); |
520 | | - $html = preg_replace('#(&\#*\w+)[\x00-\x20]+;#u', "$1;", $html); |
521 | | - $html = preg_replace('#(&\#x*)([0-9A-F]+);*#iu', "$1$2;", $html); |
522 | | - $html = html_entity_decode($html, ENT_COMPAT, 'UTF-8'); |
523 | | - |
524 | | - // Remove any attribute starting with "on" or xmlns |
525 | | - $html = preg_replace('#(<[^>]+[\x00-\x20\"\'\/])(on|xmlns)[^>]*>#iUu', "$1>", $html); |
526 | | - |
527 | | - // Remove javascript: and vbscript: protocols |
528 | | - $html = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/|(&\#\d+;)]*j[\x00-\x20]*a[\x00-\x20]*v[\x00-\x20]*a[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iUu', '$1=$2nojavascript...', $html); |
529 | | - $html = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/|(&\#\d+;)]*v[\x00-\x20]*b[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iUu', '$1=$2novbscript...', $html); |
530 | | - $html = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/|(&\#\d+;)]*-moz-binding[\x00-\x20]*:#Uu', '$1=$2nomozbinding...', $html); |
531 | | - $html = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/|(&\#\d+;)]*data[\x00-\x20]*:#Uu', '$1=$2nodata...', $html); |
532 | | - |
533 | | - // Only works in IE: <span style="width: expression(alert('Ping!'));"></span> |
534 | | - $html = preg_replace('#(<[^>]+[\x00-\x20\"\'\/])style[^>]*>#iUu', "$1>", $html); |
535 | | - |
536 | | - // Remove namespaced elements (we do not need them) |
537 | | - $html = preg_replace('#</*\w+:\w[^>]*>#i', "", $html); |
| 515 | + $config = (new HtmlSanitizerConfig()) |
| 516 | + ->allowSafeElements() |
| 517 | + ->allowRelativeLinks() |
| 518 | + ->allowRelativeMedias(); |
538 | 519 |
|
539 | | - // Remove really unwanted tags |
540 | | - $html = preg_replace('#</*(applet|meta|xml|blink|link|style|script|embed|object|iframe|frame|frameset|ilayer|layer|bgsound|title|base)[^>]*>#i', "", $html); |
541 | | - } |
542 | | - while ($oldHtml !== $html); |
| 520 | + $sanitizer = new HtmlSanitizer($config); |
543 | 521 |
|
544 | | - return $html; |
| 522 | + return $sanitizer->sanitize($html); |
545 | 523 | } |
546 | 524 |
|
547 | 525 | /** |
548 | | - * clean XML to prevent most XSS attacks in vector files (SVGs). Same as clean except: |
549 | | - * - allowed tags: xml, title, style |
550 | | - * - allowed attributes: xmlns, style |
551 | | - * @todo shift to external library |
| 526 | + * cleanVector sanitizes XML/SVG content to prevent XSS attacks using DOM-based sanitization. |
| 527 | + * Uses enshrined/svg-sanitize library which is ported from DOMPurify. |
552 | 528 | */ |
553 | 529 | public static function cleanVector(string $html): string |
554 | 530 | { |
555 | | - do { |
556 | | - $oldHtml = $html; |
557 | | - |
558 | | - // Fix &entity\n; |
559 | | - $html = str_replace(['&','<','>'], ['&amp;','&lt;','&gt;'], $html); |
560 | | - $html = preg_replace('#(&\#*\w+)[\x00-\x20]+;#u', "$1;", $html); |
561 | | - $html = preg_replace('#(&\#x*)([0-9A-F]+);*#iu', "$1$2;", $html); |
562 | | - $html = html_entity_decode($html, ENT_COMPAT, 'UTF-8'); |
563 | | - |
564 | | - // Remove any attribute starting with "on" or xmlns |
565 | | - $html = preg_replace('#(<[^>]+[\x00-\x20\"\'\/])(on)[^>]*>#iUu', "$1>", $html); |
| 531 | + $sanitizer = new SvgSanitizer(); |
| 532 | + $sanitizer->minify(false); |
| 533 | + $sanitizer->removeRemoteReferences(true); |
| 534 | + $sanitizer->removeXMLTag(true); |
566 | 535 |
|
567 | | - // Remove javascript: and vbscript: protocols |
568 | | - $html = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/|(&\#\d+;)]*j[\x00-\x20]*a[\x00-\x20]*v[\x00-\x20]*a[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iUu', '$1=$2nojavascript...', $html); |
569 | | - $html = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/|(&\#\d+;)]*v[\x00-\x20]*b[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iUu', '$1=$2novbscript...', $html); |
570 | | - $html = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/|(&\#\d+;)]*-moz-binding[\x00-\x20]*:#Uu', '$1=$2nomozbinding...', $html); |
571 | | - $html = preg_replace('#([a-z]*)[\x00-\x20\/]*=[\x00-\x20\/]*([\`\'\"]*)[\x00-\x20\/|(&\#\d+;)]*data[\x00-\x20]*:#Uu', '$1=$2nodata...', $html); |
572 | | - |
573 | | - // Remove namespaced elements (we do not need them) |
574 | | - $html = preg_replace('#</*\w+:\w[^>]*>#i', "", $html); |
575 | | - |
576 | | - // Remove really unwanted tags |
577 | | - $html = preg_replace('#</*(applet|meta|blink|link|script|embed|object|iframe|frame|frameset|ilayer|layer|bgsound|base)[^>]*>#i', "", $html); |
578 | | - } |
579 | | - while ($oldHtml !== $html); |
| 536 | + $clean = $sanitizer->sanitize($html); |
580 | 537 |
|
581 | | - return $html; |
| 538 | + return $clean !== false ? $clean : ''; |
582 | 539 | } |
583 | 540 |
|
584 | 541 | /** |
|
0 commit comments