File tree Expand file tree Collapse file tree 2 files changed +36
-14
lines changed Expand file tree Collapse file tree 2 files changed +36
-14
lines changed Original file line number Diff line number Diff line change @@ -581,22 +581,27 @@ def _has_sneaky_javascript(self, style):
581581 that and remove only the Javascript from the style; this catches
582582 more sneaky attempts.
583583 """
584- style = self ._substitute_comments ('' , style )
585- style = style .replace ('\\ ' , '' )
586584 style = _substitute_whitespace ('' , style )
587585 style = style .lower ()
588- if _has_javascript_scheme (style ):
589- return True
590- if 'expression(' in style :
591- return True
592- if '@import' in style :
593- return True
594- if '</noscript' in style :
595- # e.g. '<noscript><style><a title="</noscript><img src=x onerror=alert(1)>">'
596- return True
597- if _looks_like_tag_content (style ):
598- # e.g. '<math><style><img src=x onerror=alert(1)></style></math>'
599- return True
586+
587+ for with_comments in True , False :
588+ if not with_comments :
589+ style = self ._substitute_comments ('' , style )
590+
591+ style = style .replace ('\\ ' , '' )
592+
593+ if _has_javascript_scheme (style ):
594+ return True
595+ if 'expression(' in style :
596+ return True
597+ if '@import' in style :
598+ return True
599+ if '</noscript' in style :
600+ # e.g. '<noscript><style><a title="</noscript><img src=x onerror=alert(1)>">'
601+ return True
602+ if _looks_like_tag_content (style ):
603+ # e.g. '<math><style><img src=x onerror=alert(1)></style></math>'
604+ return True
600605 return False
601606
602607 def clean_html (self , html ):
Original file line number Diff line number Diff line change @@ -127,6 +127,23 @@ def test_sneaky_js_in_math_style(self):
127127 b'<math><style>/* deleted */</style></math>' ,
128128 lxml .html .tostring (clean_html (s )))
129129
130+ def test_sneaky_js_in_style_comment_math_svg (self ):
131+ for tag in "svg" , "math" :
132+ html = f'<{ tag } ><style>/*<img src onerror=alert(origin)>*/'
133+ s = lxml .html .fragment_fromstring (html )
134+
135+ self .assertEqual (
136+ f'<{ tag } ><style>/* deleted */</style></{ tag } >' .encode (),
137+ lxml .html .tostring (clean_html (s )))
138+
139+ def test_sneaky_js_in_style_comment_noscript (self ):
140+ html = '<noscript><style>/*</noscript><img src onerror=alert(origin)>*/'
141+ s = lxml .html .fragment_fromstring (html )
142+
143+ self .assertEqual (
144+ b'<noscript><style>/* deleted */</style></noscript>' ,
145+ lxml .html .tostring (clean_html (s )))
146+
130147 def test_sneaky_import_in_style (self ):
131148 # Prevent "@@importimport" -> "@import" replacement etc.
132149 style_codes = [
You can’t perform that action at this time.
0 commit comments