diff --git a/src/plugins/privacy/plugin.py b/src/plugins/privacy/plugin.py index 7a3c395ecba..eb91f22f00c 100644 --- a/src/plugins/privacy/plugin.py +++ b/src/plugins/privacy/plugin.py @@ -70,8 +70,8 @@ def on_config(self, config): self.assets = Files([]) self.assets_done: list[File] = [] self.assets_expr_map = { - ".css": r"url\(\s*([\"']?)(?Phttp?[^)'\"]+)\1\s*\)", - ".js": r"[\"'](?Phttp[^\"']+\.(?:css|js(?:on)?))[\"']", + ".css": r"url\(\s*([\"']?)(?P(?:https?:)?//[^)'\"]+)\1\s*\)", + ".js": r"[\"'](?P(?:https?:)?//[^\"']+\.(?:css|js(?:on)?))[\"']", **self.config.assets_expr_map } @@ -156,7 +156,7 @@ def on_page_content(self, html, *, page, config, files): # Find all external images and download them if not excluded for match in re.findall( - r"]+src=['\"]?http[^>]+>", + r"]+src=['\"]?(?:https?:)?//[^>]+>", html, flags = re.I | re.M ): el = self._parse_fragment(match) @@ -407,7 +407,7 @@ def replace(match: Match): # Find and replace all external asset URLs in current page return re.sub( - r"<(?:(?:a|link|image)[^>]+href|(?:script|img)[^>]+src)=['\"]?http[^>]+>", + r"<(?:(?:a|link|image)[^>]+href|(?:script|img)[^>]+src)=['\"]?(?:https?:)?//[^>]+>", replace, output, flags = re.I | re.M )