Skip to content

Commit a5720e0

Browse files
committed
refactor(@angular/build): Auto-CSP support as an index file transformation.
Auto-CSP is a feature to rewrite the `<script>` tags in a index.html file to either hash their contents or rewrite them as a dynamic loader script that can be hashed. These hashes will be placed in a CSP inside a `<meta>` tag inside the `<head>` of the document to ensure that the scripts running on the page are those known during the compile-time of the client-side rendered application.
1 parent 29855bf commit a5720e0

File tree

5 files changed

+450
-1
lines changed

5 files changed

+450
-1
lines changed

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@
174174
"ora": "5.4.1",
175175
"pacote": "19.0.0",
176176
"parse5-html-rewriting-stream": "7.0.0",
177+
"parse5-sax-parser": "7.0.0",
177178
"picomatch": "4.0.2",
178179
"piscina": "4.7.0",
179180
"postcss": "8.4.47",

packages/angular/build/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ ts_library(
8989
"@npm//magic-string",
9090
"@npm//mrmime",
9191
"@npm//parse5-html-rewriting-stream",
92+
"@npm//parse5-sax-parser",
9293
"@npm//picomatch",
9394
"@npm//piscina",
9495
"@npm//postcss",
Lines changed: 322 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,322 @@
1+
/**
2+
* @license
3+
* Copyright Google LLC All Rights Reserved.
4+
*
5+
* Use of this source code is governed by an MIT-style license that can be
6+
* found in the LICENSE file at https://angular.dev/license
7+
*/
8+
9+
import { htmlRewritingStream } from './html-rewriting-stream';
10+
import { StartTag } from 'parse5-sax-parser';
11+
import * as crypto from 'crypto';
12+
import { RewritingStream } from 'parse5-html-rewriting-stream';
13+
14+
/**
15+
* The hash function to use for hash directives to use in the CSP.
16+
*/
17+
const HASH_FUNCTION = 'sha256';
18+
19+
/**
20+
* Appropriate and legacy JS MIME types from
21+
* https://developer.mozilla.org/en-US/docs/Web/HTTP/MIME_types#textjavascript
22+
*/
23+
const JS_MIME_TYPES = new Set([
24+
'text/javascript',
25+
'application/javascript',
26+
'application/ecmascript',
27+
'application/x-ecmascript',
28+
'application/x-javascript',
29+
'text/ecmascript',
30+
'text/javascript1.0',
31+
'text/javascript1.1',
32+
'text/javascript1.2',
33+
'text/javascript1.3',
34+
'text/javascript1.4',
35+
'text/javascript1.5',
36+
'text/jscript',
37+
'text/livescript',
38+
'text/x-ecmascript',
39+
'text/x-javascript',
40+
]);
41+
42+
/**
43+
* Store the appropriate attributes of a sourced script tag to generate the loader script.
44+
*/
45+
interface SrcScriptTag {
46+
scriptType: 'src';
47+
src: string;
48+
type?: string;
49+
async: boolean;
50+
defer: boolean;
51+
}
52+
53+
/**
54+
* Get the specified attribute or return undefined if the tag doesn't have that attribute.
55+
*
56+
* @param tag StartTag of the <script>
57+
* @returns
58+
*/
59+
function getScriptAttributeValue(tag: StartTag, attrName: string): string | undefined {
60+
return tag.attrs.find((attr) => attr.name === attrName)?.value;
61+
}
62+
63+
/**
64+
* Checks whether a particular string is a MIME type associated with JavaScript, according to
65+
* https://developer.mozilla.org/en-US/docs/Web/HTTP/MIME_types#textjavascript
66+
*
67+
* @param mimeType a string that may be a MIME type
68+
* @returns whether the string is a MIME type that is associated with JavaScript
69+
*/
70+
function isJavascriptMimeType(mimeType: string): boolean {
71+
return JS_MIME_TYPES.has(mimeType.split(';')[0]);
72+
}
73+
74+
/**
75+
* Which of the type attributes on the script tag we should try passing along
76+
* based on https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script/type
77+
* @param scriptType the `type` attribute on the `<script>` tag under question
78+
* @returns whether to add the script tag to the dynamically loaded script tag
79+
*/
80+
function shouldDynamicallyLoadScriptTagBasedOnType(scriptType: string | undefined): boolean {
81+
return (
82+
scriptType === undefined ||
83+
scriptType === '' ||
84+
scriptType === 'module' ||
85+
isJavascriptMimeType(scriptType)
86+
);
87+
}
88+
89+
/**
90+
* Calculates a CSP compatible hash of an inline script.
91+
* @param scriptText Text between opening and closing script tag. Has to
92+
* include whitespaces and newlines!
93+
* @returns The hash of the text formatted appropriately for CSP.
94+
*/
95+
export function hashScriptText(scriptText: string): string {
96+
const hash = crypto.createHash(HASH_FUNCTION).update(scriptText, 'utf-8').digest('base64');
97+
return `'${HASH_FUNCTION}-${hash}'`;
98+
}
99+
100+
/**
101+
* Generates the dynamic loading script and puts it in the rewriter and adds the hash of the dynamic
102+
* loader script to the collection of hashes to add to the <meta> tag CSP.
103+
*
104+
* @param scriptContent The current streak of <script src="..."> tags
105+
* @param hashes The array of hashes to include in the final CSP
106+
* @param rewriter Where to emit tags to
107+
*/
108+
function emitLoaderScript(
109+
scriptContent: SrcScriptTag[],
110+
hashes: string[],
111+
rewriter: RewritingStream,
112+
) {
113+
const loaderScript = createLoaderScript(scriptContent);
114+
hashes.push(hashScriptText(loaderScript));
115+
rewriter.emitRaw(`<script>${loaderScript}</script>`);
116+
}
117+
118+
/**
119+
* Finds all `<script>` tags and creates a dynamic script loading block for consecutive `<script>` with `src` attributes.
120+
* Hashes all scripts, both inline and generated dynamic script loading blocks.
121+
* Inserts a `<meta>` tag at the end of the `<head>` of the document with the generated hash-based CSP.
122+
*
123+
* @param html Markup that should be processed.
124+
* @returns The transformed HTML that contains the `<meta>` tag CSP and dynamic loader scripts.
125+
*/
126+
export async function autoCsp(html: string): Promise<string> {
127+
const { rewriter, transformedContent } = await htmlRewritingStream(html);
128+
129+
let openedScriptTag: StartTag | undefined = undefined;
130+
let scriptContent: SrcScriptTag[] = [];
131+
let hashes: string[] = [];
132+
133+
rewriter.on('startTag', (tag, html) => {
134+
if (tag.tagName === 'script') {
135+
openedScriptTag = tag;
136+
const src = getScriptAttributeValue(tag, 'src');
137+
138+
if (src) {
139+
// If there are any interesting attributes, note them down.
140+
const scriptType = getScriptAttributeValue(tag, 'type');
141+
if (shouldDynamicallyLoadScriptTagBasedOnType(scriptType)) {
142+
scriptContent.push({
143+
scriptType: 'src',
144+
src: src,
145+
type: scriptType,
146+
async: !(getScriptAttributeValue(tag, 'async') === undefined),
147+
defer: !(getScriptAttributeValue(tag, 'defer') === undefined),
148+
});
149+
return; // Skip writing my script tag until we've read it all.
150+
}
151+
}
152+
}
153+
// We are encountering the first start tag that's not <script src="..."> after a string of
154+
// consecutive <script src="...">. The first place when we can determine this to be the case is
155+
// during the first opening tag that's not <script src="...">, where we need to insert the
156+
// dynamic loader script before continuing on with writing the rest of the tags.
157+
// (One edge case is where there are no more opening tags after the last <script src="..."> is
158+
// closed, but this case is handled below with the final </body> tag.)
159+
if (scriptContent.length > 0) {
160+
emitLoaderScript(scriptContent, hashes, rewriter);
161+
scriptContent = [];
162+
}
163+
rewriter.emitStartTag(tag);
164+
});
165+
166+
rewriter.on('text', (tag, html) => {
167+
if (openedScriptTag && !getScriptAttributeValue(openedScriptTag, 'src')) {
168+
hashes.push(hashScriptText(html));
169+
}
170+
rewriter.emitText(tag);
171+
});
172+
173+
rewriter.on('endTag', (tag, html) => {
174+
if (tag.tagName === 'script') {
175+
const src = getScriptAttributeValue(openedScriptTag!, 'src');
176+
const scriptType = getScriptAttributeValue(openedScriptTag!, 'type');
177+
openedScriptTag = undefined;
178+
// Return early to avoid writing the closing </script> tag if it's a part of the
179+
// dynamic loader script.
180+
if (src && shouldDynamicallyLoadScriptTagBasedOnType(scriptType)) {
181+
return;
182+
}
183+
}
184+
185+
if (tag.tagName === 'body' || tag.tagName === 'html') {
186+
// Write the loader script if a string of <script>s were the last opening tag of the document.
187+
if (scriptContent.length > 0) {
188+
emitLoaderScript(scriptContent, hashes, rewriter);
189+
scriptContent = [];
190+
}
191+
}
192+
rewriter.emitEndTag(tag);
193+
});
194+
195+
const rewritten = await transformedContent();
196+
197+
// Second pass to add the header
198+
const secondPass = await htmlRewritingStream(rewritten);
199+
secondPass.rewriter.on('startTag', (tag, _) => {
200+
secondPass.rewriter.emitStartTag(tag);
201+
if (tag.tagName === 'head') {
202+
// See what hashes we came up with!
203+
secondPass.rewriter.emitRaw(
204+
`<meta http-equiv="Content-Security-Policy" content="${getStrictCsp(hashes)}">`,
205+
);
206+
}
207+
});
208+
return secondPass.transformedContent();
209+
}
210+
211+
/**
212+
* Returns a strict Content Security Policy for mitigating XSS.
213+
* For more details read csp.withgoogle.com.
214+
* If you modify this CSP, make sure it has not become trivially bypassable by
215+
* checking the policy using csp-evaluator.withgoogle.com.
216+
*
217+
* @param hashes A list of sha-256 hashes of trusted inline scripts.
218+
* @param enableTrustedTypes If Trusted Types should be enabled for scripts.
219+
* @param enableBrowserFallbacks If fallbacks for older browsers should be
220+
* added. This is will not weaken the policy as modern browsers will ignore
221+
* the fallbacks.
222+
* @param enableUnsafeEval If you cannot remove all uses of eval(), you can
223+
* still set a strict CSP, but you will have to use the 'unsafe-eval'
224+
* keyword which will make your policy slightly less secure.
225+
*/
226+
function getStrictCsp(
227+
hashes?: string[],
228+
// default CSP options
229+
cspOptions: {
230+
enableBrowserFallbacks?: boolean;
231+
enableTrustedTypes?: boolean;
232+
enableUnsafeEval?: boolean;
233+
} = {
234+
enableBrowserFallbacks: true,
235+
enableTrustedTypes: false,
236+
enableUnsafeEval: false,
237+
},
238+
): string {
239+
hashes = hashes || [];
240+
let strictCspTemplate: Record<string, string[]> = {
241+
// 'strict-dynamic' allows hashed scripts to create new scripts.
242+
'script-src': [`'strict-dynamic'`, ...hashes],
243+
// Restricts `object-src` to disable dangerous plugins like Flash.
244+
'object-src': [`'none'`],
245+
// Restricts `base-uri` to block the injection of `<base>` tags. This
246+
// prevents attackers from changing the locations of scripts loaded from
247+
// relative URLs.
248+
'base-uri': [`'self'`],
249+
};
250+
251+
// Adds fallbacks for browsers not compatible to CSP3 and CSP2.
252+
// These fallbacks are ignored by modern browsers in presence of hashes,
253+
// and 'strict-dynamic'.
254+
if (cspOptions.enableBrowserFallbacks) {
255+
// Fallback for Safari. All modern browsers supporting strict-dynamic will
256+
// ignore the 'https:' fallback.
257+
strictCspTemplate['script-src'].push('https:');
258+
// 'unsafe-inline' is only ignored in presence of a hash or nonce.
259+
if (hashes.length > 0) {
260+
strictCspTemplate['script-src'].push(`'unsafe-inline'`);
261+
}
262+
}
263+
264+
// If enabled, dangerous DOM sinks will only accept typed objects instead of
265+
// strings.
266+
if (cspOptions.enableTrustedTypes) {
267+
strictCspTemplate['require-trusted-types-for'] = ['script'];
268+
}
269+
270+
// If enabled, `eval()`-calls will be allowed, making the policy slightly
271+
// less secure.
272+
if (cspOptions.enableUnsafeEval) {
273+
strictCspTemplate['script-src'].push(`'unsafe-eval'`);
274+
}
275+
276+
return Object.entries(strictCspTemplate)
277+
.map(([directive, values]) => {
278+
return `${directive} ${values.join(' ')};`;
279+
})
280+
.join('');
281+
}
282+
283+
/**
284+
* Returns JS code for dynamically loading sourced (external) scripts.
285+
* @param srcList A list of paths for scripts that should be loaded.
286+
*/
287+
function createLoaderScript(srcList: SrcScriptTag[], enableTrustedTypes = false): string {
288+
if (!srcList.length) {
289+
throw new Error('Cannot create a loader script with no scripts to load.');
290+
}
291+
const srcListFormatted = srcList
292+
.map(
293+
(s) =>
294+
`['${encodeURI(s.src)}', ${s.type ? "'" + encodeURI(s.type) + "'" : undefined}, ${s.async ? 'true' : 'false'}, ${s.defer ? 'true' : 'false'}]`,
295+
)
296+
.join();
297+
return enableTrustedTypes
298+
? `
299+
var scripts = [${srcListFormatted}];
300+
var policy = self.trustedTypes && self.trustedTypes.createPolicy ?
301+
self.trustedTypes.createPolicy('angular#auto-csp', {createScriptURL: function(u) {
302+
return scripts.includes(u) ? u : null;
303+
}}) : { createScriptURL: function(u) { return u; } };
304+
scripts.forEach(function(scriptUrl) {
305+
var s = document.createElement('script');
306+
s.src = policy.createScriptURL(scriptUrl[0]);
307+
s.type = scriptUrl[1];
308+
s.async = !!scriptUrl[2];
309+
s.defer = !!scriptUrl[3];
310+
document.body.appendChild(s);
311+
});\n`
312+
: `
313+
var scripts = [${srcListFormatted}];
314+
scripts.forEach(function(scriptUrl) {
315+
var s = document.createElement('script');
316+
s.src = scriptUrl[0];
317+
s.type = scriptUrl[1];
318+
s.async = !!scriptUrl[2];
319+
s.defer = !!scriptUrl[3];
320+
document.body.appendChild(s);
321+
});\n`;
322+
}

0 commit comments

Comments
 (0)