Skip to content

Commit dde13ce

Browse files
authored
Fixed false positives and negatives captured in privacy plugin (#8542)
1 parent 291012d commit dde13ce

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

src/plugins/privacy/plugin.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@ def on_config(self, config):
7070
self.assets = Files([])
7171
self.assets_done: list[File] = []
7272
self.assets_expr_map = {
73-
".css": r"url\(\s*([\"']?)(?P<url>http?[^)'\"]+)\1\s*\)",
74-
".js": r"[\"'](?P<url>http[^\"']+\.(?:css|js(?:on)?))[\"']",
73+
".css": r"url\(\s*([\"']?)(?P<url>(?:https?:)?//[^)'\"]+)\1\s*\)",
74+
".js": r"[\"'](?P<url>(?:https?:)?//[^\"']+\.(?:css|js(?:on)?))[\"']",
7575
**self.config.assets_expr_map
7676
}
7777

@@ -156,7 +156,7 @@ def on_page_content(self, html, *, page, config, files):
156156

157157
# Find all external images and download them if not excluded
158158
for match in re.findall(
159-
r"<img[^>]+src=['\"]?http[^>]+>",
159+
r"<img[^>]+src=['\"]?(?:https?:)?//[^>]+>",
160160
html, flags = re.I | re.M
161161
):
162162
el = self._parse_fragment(match)
@@ -407,7 +407,7 @@ def replace(match: Match):
407407

408408
# Find and replace all external asset URLs in current page
409409
return re.sub(
410-
r"<(?:(?:a|link|image)[^>]+href|(?:script|img)[^>]+src)=['\"]?http[^>]+>",
410+
r"<(?:(?:a|link|image)[^>]+href|(?:script|img)[^>]+src)=['\"]?(?:https?:)?//[^>]+>",
411411
replace, output, flags = re.I | re.M
412412
)
413413

0 commit comments

Comments
 (0)