Skip to content

Commit fd1c033

Browse files
committed
Update sourcemap paths when concatenating source files.
When building a package from source files, the built source files get concatenated together before being post-processed by Django. Prior to Django 4.0, the post-processing step would normalize `url(...)` entries in CSS by looking it up in storage and replacing the path with the hashed version. Starting in Django 4.0, post-processing would do the same for sourcemaps. This can break when concatenating either CSS or JavaScript files, since Pipeline may produce a built package file that's in a different directory from one or more built source files. Django would fail to find the file and raise an error. We now include sourcemap normalization as part of the concatenation process. This is using a similar approach to `url(...)` normalization, but now consolidated into the `Compressor.concatenate()` function. This has been updated to take arguments controlling the concatenation process, such as a regex for capturing paths to normalize. The regex for capturing sourcemap lines is built to be spec-compliant, and is currently more broad than what Django looks for during post-processing. This will help avoid potential issues as Django makes changes to their process. The old functions (`concatenate_and_rewrite()`) and old default behavior has been left intact, but with runtime deprecation warnings, so that any code specializing Pipeline will continue to work. This helps ensure this change is API-compatible and non-breaking. See issue #808 for more details on the problem and the solution.
1 parent 2018c11 commit fd1c033

File tree

5 files changed

+508
-41
lines changed

5 files changed

+508
-41
lines changed

pipeline/compressors/__init__.py

Lines changed: 187 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
1+
from __future__ import annotations
2+
13
import base64
24
import os
35
import posixpath
46
import re
57
import subprocess
8+
import warnings
69
from itertools import takewhile
10+
from typing import Iterator, Optional, Sequence
711

812
from django.contrib.staticfiles.storage import staticfiles_storage
913
from django.utils.encoding import force_str, smart_bytes
@@ -12,8 +16,58 @@
1216
from pipeline.exceptions import CompressorError
1317
from pipeline.utils import relpath, set_std_streams_blocking, to_class
1418

15-
URL_DETECTOR = r"""url\((['"]?)\s*(.*?)\1\)"""
16-
URL_REPLACER = r"""url\(__EMBED__(.+?)(\?\d+)?\)"""
19+
20+
# Regex matching url(...), url('...'), and url("...") patterns.
21+
#
22+
# Replacements will preserve the quotes and any whitespace contained within
23+
# the pattern, transforming only the filename.
24+
#
25+
# Verbose and documented, to ease future maintenance.
26+
_CSS_URL_REWRITE_PATH_RE_STR = r"""
27+
(?P<url_prefix>
28+
url\( # The opening `url(`.
29+
(?P<url_quote>['"]?) # Optional quote (' or ").
30+
\s*
31+
)
32+
(?P<url_path>.*?) # The path to capture.
33+
(?P<url_suffix>
34+
(?P=url_quote) # The quote found earlier, if any.
35+
\s*
36+
\) # The end `)`, completing `url(...)`.
37+
)
38+
"""
39+
40+
41+
# Regex matching `//@ sourceMappingURL=...` and variants.
42+
#
43+
# This will capture sourceMappingURL and sourceURL keywords, both
44+
# `//@` and `//#` variants, and both `//` and `/* ... */` comment types.
45+
#
46+
# Verbose and documented, to ease future maintenance.
47+
_SOURCEMAP_REWRITE_PATH_RE_STR = r"""
48+
(?P<sourcemap_prefix>
49+
/(?:/|(?P<sourcemap_mlcomment>\*)) # Opening comment (`//#`, `//@`,
50+
[#@]\s+ # `/*@`, `/*#`).
51+
source(?:Mapping)?URL= # The sourcemap indicator.
52+
\s*
53+
)
54+
(?P<sourcemap_path>.*?) # The path to capture.
55+
(?P<sourcemap_suffix>
56+
\s*
57+
(?(sourcemap_mlcomment)\*/\s*) # End comment (`*/`)
58+
)
59+
$ # The line should now end.
60+
"""
61+
62+
63+
# Implementation of the above regexes, for CSS and JavaScript.
64+
CSS_REWRITE_PATH_RE = re.compile(
65+
f"{_CSS_URL_REWRITE_PATH_RE_STR}|{_SOURCEMAP_REWRITE_PATH_RE_STR}", re.X | re.M
66+
)
67+
JS_REWRITE_PATH_RE = re.compile(_SOURCEMAP_REWRITE_PATH_RE_STR, re.X | re.M)
68+
69+
70+
URL_REPLACER = re.compile(r"""url\(__EMBED__(.+?)(\?\d+)?\)""")
1771
NON_REWRITABLE_URL = re.compile(r"^(#|http:|https:|data:|//)")
1872

1973
DEFAULT_TEMPLATE_FUNC = "template"
@@ -51,9 +105,27 @@ def js_compressor(self):
51105
def css_compressor(self):
52106
return to_class(settings.CSS_COMPRESSOR)
53107

54-
def compress_js(self, paths, templates=None, **kwargs):
108+
def compress_js(
109+
self,
110+
paths: Sequence[str],
111+
templates: Optional[Sequence[str]] = None,
112+
*,
113+
output_filename: Optional[str] = None,
114+
**kwargs,
115+
) -> str:
55116
"""Concatenate and compress JS files"""
56-
js = self.concatenate(paths)
117+
# Note how a semicolon is added between the two files to make sure that
118+
# their behavior is not changed. '(expression1)\n(expression2)' calls
119+
# `expression1` with `expression2` as an argument! Superfluous
120+
# semicolons are valid in JavaScript and will be removed by the
121+
# minifier.
122+
js = self.concatenate(
123+
paths,
124+
file_sep=";",
125+
output_filename=output_filename,
126+
rewrite_path_re=JS_REWRITE_PATH_RE,
127+
)
128+
57129
if templates:
58130
js = js + self.compile_templates(templates)
59131

@@ -68,7 +140,13 @@ def compress_js(self, paths, templates=None, **kwargs):
68140

69141
def compress_css(self, paths, output_filename, variant=None, **kwargs):
70142
"""Concatenate and compress CSS files"""
71-
css = self.concatenate_and_rewrite(paths, output_filename, variant)
143+
css = self.concatenate(
144+
paths,
145+
file_sep="",
146+
rewrite_path_re=CSS_REWRITE_PATH_RE,
147+
output_filename=output_filename,
148+
variant=variant,
149+
)
72150
compressor = self.css_compressor
73151
if compressor:
74152
css = getattr(compressor(verbose=self.verbose), "compress_css")(css)
@@ -131,38 +209,116 @@ def template_name(self, path, base):
131209

132210
def concatenate_and_rewrite(self, paths, output_filename, variant=None):
133211
"""Concatenate together files and rewrite urls"""
134-
stylesheets = []
135-
for path in paths:
212+
warnings.warn(
213+
"Compressor.concatenate_and_rewrite() is deprecated. Please "
214+
"call concatenate() instead.",
215+
DeprecationWarning,
216+
stacklevel=2,
217+
)
218+
219+
return self.concatenate(
220+
paths=paths,
221+
file_sep="",
222+
rewrite_path_re=CSS_REWRITE_PATH_RE,
223+
output_filename=output_filename,
224+
variant=variant,
225+
)
136226

137-
def reconstruct(match):
138-
quote = match.group(1) or ""
139-
asset_path = match.group(2)
140-
if NON_REWRITABLE_URL.match(asset_path):
141-
return f"url({quote}{asset_path}{quote})"
142-
asset_url = self.construct_asset_path(
143-
asset_path, path, output_filename, variant
227+
def concatenate(
228+
self,
229+
paths: Sequence[str],
230+
*,
231+
file_sep: Optional[str] = None,
232+
output_filename: Optional[str] = None,
233+
rewrite_path_re: Optional[re.Pattern] = None,
234+
variant: Optional[str] = None,
235+
) -> str:
236+
"""Concatenate together a list of files.
237+
238+
The caller can specify a delimiter between files and any regexes
239+
used to normalize relative paths. Path normalization is important for
240+
ensuring that local resources or sourcemaps can be updated in time
241+
for Django's static media post-processing phase.
242+
"""
243+
244+
def _reconstruct(
245+
m: re.Match,
246+
source_path: str,
247+
) -> str:
248+
groups = m.groupdict()
249+
asset_path: Optional[str] = None
250+
prefix = ""
251+
suffix = ""
252+
253+
for prefix in ("sourcemap", "url"):
254+
asset_path = groups.get(f"{prefix}_path")
255+
256+
if asset_path is not None:
257+
asset_path = asset_path.strip()
258+
prefix, suffix = m.group(f"{prefix}_prefix", f"{prefix}_suffix")
259+
break
260+
261+
if asset_path is None:
262+
# This is empty. Return the whole match as-is.
263+
return m.group()
264+
265+
if asset_path and not NON_REWRITABLE_URL.match(asset_path):
266+
asset_path = self.construct_asset_path(
267+
asset_path=asset_path,
268+
source_path=source_path,
269+
output_filename=output_filename,
270+
variant=variant,
271+
)
272+
273+
return f"{prefix}{asset_path}{suffix}"
274+
275+
def _iter_files() -> Iterator[str]:
276+
if not output_filename or not rewrite_path_re:
277+
# This is legacy call, which does not support sourcemap-aware
278+
# asset rewriting. Pipeline itself won't invoke this outside
279+
# of tests, but it maybe important for third-parties who
280+
# are specializing these classes.
281+
warnings.warn(
282+
"Compressor.concatenate() was called without passing "
283+
"rewrite_path_re_= or output_filename=. If you are "
284+
"specializing Compressor, please update your call "
285+
"to remain compatible with future changes.",
286+
DeprecationWarning,
287+
stacklevel=3,
144288
)
145-
return f"url({asset_url})"
146289

147-
content = self.read_text(path)
148-
# content needs to be unicode to avoid explosions with non-ascii chars
149-
content = re.sub(URL_DETECTOR, reconstruct, content)
150-
stylesheets.append(content)
151-
return "\n".join(stylesheets)
290+
return (self.read_text(path) for path in paths)
152291

153-
def concatenate(self, paths):
154-
"""Concatenate together a list of files"""
155-
# Note how a semicolon is added between the two files to make sure that
156-
# their behavior is not changed. '(expression1)\n(expression2)' calls
157-
# `expression1` with `expression2` as an argument! Superfluos semicolons
158-
# are valid in JavaScript and will be removed by the minifier.
159-
return "\n;".join([self.read_text(path) for path in paths])
292+
# Now that we can attempt the modern support for concatenating
293+
# files, handling rewriting of relative assets in the process.
294+
return (
295+
rewrite_path_re.sub(
296+
lambda m: _reconstruct(m, path), self.read_text(path)
297+
)
298+
for path in paths
299+
)
300+
301+
if file_sep is None:
302+
warnings.warn(
303+
"Compressor.concatenate() was called without passing "
304+
"file_sep=. If you are specializing Compressor, please "
305+
"update your call to remain compatible with future changes. "
306+
"Defaulting to JavaScript behavior for "
307+
"backwards-compatibility.",
308+
DeprecationWarning,
309+
stacklevel=2,
310+
)
311+
file_sep = ";"
312+
313+
return f"\n{file_sep}".join(_iter_files())
160314

161-
def construct_asset_path(self, asset_path, css_path, output_filename, variant=None):
162-
"""Return a rewritten asset URL for a stylesheet"""
315+
def construct_asset_path(
316+
self, asset_path, source_path, output_filename, variant=None
317+
):
318+
"""Return a rewritten asset URL for a stylesheet or JavaScript file."""
163319
public_path = self.absolute_path(
164320
asset_path,
165-
os.path.dirname(css_path).replace("\\", "/"),
321+
os.path.dirname(source_path).replace("\\", "/"),
166322
)
167323
if self.embeddable(public_path, variant):
168324
return "__EMBED__%s" % public_path
@@ -196,7 +352,7 @@ def datauri(match):
196352
data = self.encoded_content(path)
197353
return f'url("data:{mime_type};charset=utf-8;base64,{data}")'
198354

199-
return re.sub(URL_REPLACER, datauri, css)
355+
return URL_REPLACER.sub(datauri, css)
200356

201357
def encoded_content(self, path):
202358
"""Return the base64 encoded contents"""

pipeline/packager.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ def pack_javascripts(self, package, **kwargs):
152152
package,
153153
self.compressor.compress_js,
154154
js_compressed,
155+
output_filename=package.output_filename,
155156
templates=package.templates,
156157
**kwargs,
157158
)

tests/assets/css/sourcemap.css

Lines changed: 24 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/assets/js/sourcemap.js

Lines changed: 18 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)