Skip to content

Commit 772344d

Browse files
authored
Merge pull request #16886 from aegilops/aegilops/polyfill-io-compromised-script
2 parents 1de2943 + de5ec1f commit 772344d

18 files changed

+470
-153
lines changed

javascript/ql/lib/qlpack.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,5 @@ dependencies:
1616
dataExtensions:
1717
- semmle/javascript/frameworks/**/model.yml
1818
- semmle/javascript/frameworks/**/*.model.yml
19+
- semmle/javascript/security/domains/**/*.model.yml
1920
warnOnImplicitThis: true
Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
/**
2+
* Provides classes for finding functionality that is loaded from untrusted sources and used in script or frame elements.
3+
*/
4+
5+
import javascript
6+
7+
/** A location that adds a reference to an untrusted source. */
8+
abstract class AddsUntrustedUrl extends Locatable {
9+
/** Gets an explanation why this source is untrusted. */
10+
abstract string getProblem();
11+
12+
/** Gets the URL of the untrusted source. */
13+
abstract string getUrl();
14+
}
15+
16+
/** Looks for static creation of an element and source. */
17+
module StaticCreation {
18+
/** Holds if `host` is an alias of localhost. */
19+
bindingset[host]
20+
predicate isLocalhostPrefix(string host) {
21+
host.toLowerCase()
22+
.regexpMatch([
23+
"(?i)localhost(:[0-9]+)?/.*", "127.0.0.1(:[0-9]+)?/.*", "::1/.*", "\\[::1\\]:[0-9]+/.*"
24+
])
25+
}
26+
27+
/** Holds if `url` is a url that is vulnerable to a MITM attack. */
28+
bindingset[url]
29+
predicate isUntrustedSourceUrl(string url) {
30+
exists(string hostPath | hostPath = url.regexpCapture("(?i)http://(.*)", 1) |
31+
not isLocalhostPrefix(hostPath)
32+
)
33+
}
34+
35+
/** Holds if `url` refers to a CDN that needs an integrity check - even with https. */
36+
bindingset[url]
37+
predicate isCdnUrlWithCheckingRequired(string url) {
38+
// Some CDN URLs are required to have an integrity attribute. We only add CDNs to that list
39+
// that recommend integrity-checking.
40+
exists(string hostname, string requiredCheckingHostname |
41+
hostname = url.regexpCapture("(?i)^(?:https?:)?//([^/]+)/.*\\.js$", 1) and
42+
isCdnDomainWithCheckingRequired(requiredCheckingHostname) and
43+
hostname = requiredCheckingHostname
44+
)
45+
}
46+
47+
/** A script element that refers to untrusted content. */
48+
class ScriptElementWithUntrustedContent extends AddsUntrustedUrl instanceof HTML::ScriptElement {
49+
ScriptElementWithUntrustedContent() {
50+
not exists(string digest | not digest = "" | super.getIntegrityDigest() = digest) and
51+
isUntrustedSourceUrl(super.getSourcePath())
52+
}
53+
54+
override string getUrl() { result = super.getSourcePath() }
55+
56+
override string getProblem() { result = "Script loaded using unencrypted connection." }
57+
}
58+
59+
/** A script element that refers to untrusted content. */
60+
class CdnScriptElementWithUntrustedContent extends AddsUntrustedUrl, HTML::ScriptElement {
61+
CdnScriptElementWithUntrustedContent() {
62+
not exists(string digest | not digest = "" | this.getIntegrityDigest() = digest) and
63+
(
64+
isCdnUrlWithCheckingRequired(this.getSourcePath())
65+
or
66+
isUrlWithUntrustedDomain(super.getSourcePath())
67+
)
68+
}
69+
70+
override string getUrl() { result = this.getSourcePath() }
71+
72+
override string getProblem() {
73+
result = "Script loaded from content delivery network with no integrity check."
74+
}
75+
}
76+
77+
/** An iframe element that includes untrusted content. */
78+
class IframeElementWithUntrustedContent extends AddsUntrustedUrl instanceof HTML::IframeElement {
79+
IframeElementWithUntrustedContent() { isUntrustedSourceUrl(super.getSourcePath()) }
80+
81+
override string getUrl() { result = super.getSourcePath() }
82+
83+
override string getProblem() { result = "Iframe loaded using unencrypted connection." }
84+
}
85+
}
86+
87+
/** Holds if `url` refers to an URL that uses an untrusted domain. */
88+
bindingset[url]
89+
predicate isUrlWithUntrustedDomain(string url) {
90+
exists(string hostname |
91+
hostname = url.regexpCapture("(?i)^(?:https?:)?//([^/]+)/.*", 1) and
92+
isUntrustedHostname(hostname)
93+
)
94+
}
95+
96+
/** Holds if `hostname` refers to a domain or subdomain that is untrusted. */
97+
bindingset[hostname]
98+
predicate isUntrustedHostname(string hostname) {
99+
exists(string domain |
100+
(hostname = domain or hostname.matches("%." + domain)) and
101+
isUntrustedDomain(domain)
102+
)
103+
}
104+
105+
// The following predicates are extended in data extensions under javascript/ql/lib/semmle/javascript/security/domains/
106+
// and can be extended with custom model packs as necessary.
107+
/** Holds for hostnames defined in data extensions */
108+
extensible predicate isCdnDomainWithCheckingRequired(string hostname);
109+
110+
/** Holds for domains defined in data extensions */
111+
extensible predicate isUntrustedDomain(string domain);
112+
113+
/** Looks for dyanmic creation of an element and source. */
114+
module DynamicCreation {
115+
/** Holds if `call` creates a tag of kind `name`. */
116+
predicate isCreateElementNode(DataFlow::CallNode call, string name) {
117+
call = DataFlow::globalVarRef("document").getAMethodCall("createElement") and
118+
call.getArgument(0).getStringValue().toLowerCase() = name
119+
}
120+
121+
/** Get the right-hand side of an assignment to a named attribute. */
122+
DataFlow::Node getAttributeAssignmentRhs(DataFlow::CallNode createCall, string name) {
123+
result = createCall.getAPropertyWrite(name).getRhs()
124+
or
125+
exists(DataFlow::InvokeNode inv | inv = createCall.getAMemberInvocation("setAttribute") |
126+
inv.getArgument(0).getStringValue() = name and
127+
result = inv.getArgument(1)
128+
)
129+
}
130+
131+
/**
132+
* Holds if `createCall` creates a `<script ../>` element which never
133+
* has its `integrity` attribute set locally.
134+
*/
135+
predicate isCreateScriptNodeWoIntegrityCheck(DataFlow::CallNode createCall) {
136+
isCreateElementNode(createCall, "script") and
137+
not exists(getAttributeAssignmentRhs(createCall, "integrity"))
138+
}
139+
140+
/** Holds if `t` tracks a URL that is loaded from an untrusted source. */
141+
DataFlow::Node urlTrackedFromUnsafeSourceLiteral(DataFlow::TypeTracker t) {
142+
t.start() and result.getStringValue().regexpMatch("(?i)http:.*")
143+
or
144+
exists(DataFlow::TypeTracker t2, DataFlow::Node prev |
145+
prev = urlTrackedFromUnsafeSourceLiteral(t2)
146+
|
147+
not exists(string httpsUrl | httpsUrl.toLowerCase() = "https:" + any(string rest) |
148+
// when the result may have a string value starting with https,
149+
// we're most likely with an assignment like:
150+
// e.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js'
151+
// these assignments, we don't want to fix - once the browser is using http,
152+
// MITM attacks are possible anyway.
153+
result.mayHaveStringValue(httpsUrl)
154+
) and
155+
(
156+
t2 = t.smallstep(prev, result)
157+
or
158+
TaintTracking::sharedTaintStep(prev, result) and
159+
t = t2
160+
)
161+
)
162+
}
163+
164+
/** Holds a dataflow node is traked from an untrusted source. */
165+
DataFlow::Node urlTrackedFromUnsafeSourceLiteral() {
166+
result = urlTrackedFromUnsafeSourceLiteral(DataFlow::TypeTracker::end())
167+
}
168+
169+
/** Holds if `sink` is assigned to the attribute `name` of any HTML element. */
170+
predicate isAssignedToSrcAttribute(string name, DataFlow::Node sink) {
171+
exists(DataFlow::CallNode createElementCall |
172+
sink = getAttributeAssignmentRhs(createElementCall, "src") and
173+
(
174+
name = "script" and
175+
isCreateScriptNodeWoIntegrityCheck(createElementCall)
176+
or
177+
name = "iframe" and
178+
isCreateElementNode(createElementCall, "iframe")
179+
)
180+
)
181+
}
182+
183+
/** A script or iframe element that refers to untrusted content. */
184+
class IframeOrScriptSrcAssignment extends AddsUntrustedUrl {
185+
string name;
186+
187+
IframeOrScriptSrcAssignment() {
188+
name = ["script", "iframe"] and
189+
exists(DataFlow::Node n | n.asExpr() = this |
190+
isAssignedToSrcAttribute(name, n) and
191+
n = urlTrackedFromUnsafeSourceLiteral()
192+
)
193+
}
194+
195+
override string getUrl() {
196+
exists(DataFlow::Node n | n.asExpr() = this |
197+
isAssignedToSrcAttribute(name, n) and
198+
result = n.getStringValue()
199+
)
200+
}
201+
202+
override string getProblem() {
203+
name = "script" and result = "Script loaded using unencrypted connection."
204+
or
205+
name = "iframe" and result = "Iframe loaded using unencrypted connection."
206+
}
207+
}
208+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
extensions:
2+
- addsTo:
3+
pack: codeql/javascript-all
4+
extensible: isCdnDomainWithCheckingRequired
5+
data:
6+
- ["code.jquery.com"]
7+
- ["cdnjs.cloudflare.com"]
8+
- ["cdnjs.com"]
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
extensions:
2+
- addsTo:
3+
pack: codeql/javascript-all
4+
extensible: isUntrustedDomain
5+
data:
6+
- ["polyfill.io"]
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
extensions:
2+
- addsTo:
3+
pack: codeql/javascript-all
4+
extensible: isUntrustedDomain
5+
data:
6+
# new location of the polyfill.io CDN, which was used to serve malware. See: https://www.cside.dev/blog/the-polyfill-attack-explained
7+
- ["polyfill.com"]
8+
- ["polyfillcache.com"]
9+
10+
# domains operated by the same owner as polyfill.io, which was used to serve malware. See: https://www.cside.dev/blog/the-polyfill-attack-explained
11+
- ["bootcdn.net"]
12+
- ["bootcss.com"]
13+
- ["staticfile.net"]
14+
- ["staticfile.org"]
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Extending the library list of untrusted sources and domains
2+
3+
You can expand the list of untrusted domains in the CodeQL library used by the `js/functionality-from-untrusted-source` and `js/functionality-from-untrusted-domain` queries using [CodeQL data extensions](https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-javascript/).
4+
5+
This allows you to add additional domains to warn users about and to require Subresource Integrity (SRI) checks on specific content delivery network (CDN) hostnames.
6+
7+
For example, this YAML model can be used inside a CodeQL model pack to alert on uses of `example.com` in imported functionality, extending the `js/functionality-from-untrusted-domain` query:
8+
9+
```yaml
10+
extensions:
11+
- addsTo:
12+
pack: codeql/javascript-all
13+
extensible: untrustedDomain
14+
data:
15+
- ["example.com"]
16+
```
17+
18+
To add new hostnames that always require SRI checking, this YAML model can be used to require SRI on `cdn.example.com`, extending the `js/functionality-from-untrusted-source` query:
19+
20+
```yaml
21+
extensions:
22+
- addsTo:
23+
pack: codeql/javascript-all
24+
extensible: isCdnDomainWithCheckingRequired
25+
data:
26+
- ["cdn.example.com"]
27+
```
28+
29+
You would create a model pack with this information using metadata similar to that in the example below:
30+
31+
```yaml
32+
name: my-org/javascript-untrusted-functionality-model-pack
33+
version: 1.0.0
34+
extensionTargets:
35+
codeql/java-all: '*'
36+
dataExtensions:
37+
- models/**/*.yml
38+
```
39+
40+
## References
41+
42+
- [Customizing library models for javascript](https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-javascript/)
43+
- [Creating and working with CodeQL packs](https://docs.github.com/en/code-security/codeql-cli/using-the-advanced-functionality-of-the-codeql-cli/creating-and-working-with-codeql-packs#creating-a-codeql-model-pack)
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
<overview>
6+
<p>
7+
Content Delivery Networks (CDNs) are used to deliver content to users quickly and efficiently.
8+
9+
However, they can change hands or be operated by untrustworthy owners, risking the security of the sites that use them.
10+
11+
Some CDN domains are operated by entities that have used CDNs to deliver malware, which this query identifies.
12+
</p>
13+
14+
<p>
15+
For example, <code>polyfill.io</code> was a popular JavaScript CDN,
16+
used to support new web browser standards on older browsers.
17+
18+
In February 2024 the domain was sold, and in June 2024 it was publicised that the domain
19+
had been used to serve malicious scripts. It was taken down later in that month, leaving a window
20+
where sites that used the service could have been compromised.
21+
22+
The same operator runs several other CDNs, undermining trust in those too.
23+
</p>
24+
25+
<p>
26+
Including a resource from an untrusted source or using an untrusted channel may
27+
allow an attacker to include arbitrary code in the response.
28+
When including an external resource (for example, a <code>script</code> element) on a page,
29+
it is important to ensure that the received data is not malicious.
30+
</p>
31+
32+
<p>
33+
Even when <code>https</code> is used, an untrustworthy operator might deliver malware.
34+
</p>
35+
36+
<p>
37+
See the [`CUSTOMIZING.md`](https://github.com/github/codeql/blob/main/javascript/ql/src/Security/CWE-830/CUSTOMIZING.md) file in the source code for this query for information on how to extend the list of untrusted domains used by this query.
38+
</p>
39+
</overview>
40+
41+
<recommendation>
42+
<p>
43+
Carefully research the ownership of a Content Delivery Network (CDN) before using it in your application.
44+
</p>
45+
46+
<p>
47+
If you find code that originated from an untrusted domain in your application, you should review your logs to check for compromise.
48+
</p>
49+
50+
<p>
51+
To help mitigate the risk of including a script that could be compromised in the future, consider whether you need to
52+
use polyfill or another library at all. Modern browsers do not require a polyfill, and other popular libraries were made redundant by enhancements to HTML 5.
53+
</p>
54+
55+
<p>
56+
If you do need a polyfill service or library, move to using a CDN that you trust.
57+
</p>
58+
59+
<p>
60+
When you use a <code>script</code> or <code>link</code> element,
61+
you should check for <a href="https://developer.mozilla.org/en-US/docs/Web/Security/Subresource_Integrity">subresource integrity (SRI)</a>,
62+
and pin to a hash of a version of the service that you can trust (for example, because you have audited it for security and unwanted features).
63+
64+
A dynamic service cannot be easily used with SRI. Nevertheless,
65+
it is possible to list multiple acceptable SHA hashes in the <code>integrity</code> attribute,
66+
such as hashes for the content required for the major browsers used by your users.
67+
</p>
68+
69+
<p>
70+
You can also choose to self-host an uncompromised version of the service or library.
71+
</p>
72+
</recommendation>
73+
74+
<example>
75+
<p>
76+
The following example loads the Polyfill.io library from the <code>polyfill.io</code> CDN. This use was open to malicious scripts being served by the CDN.
77+
</p>
78+
79+
<sample src="polyfill-compromised.html" />
80+
81+
<p>
82+
Instead, load the Polyfill library from a trusted CDN, as in the next example:
83+
</p>
84+
85+
<sample src="polyfill-trusted.html" />
86+
87+
<p>
88+
If you know which browsers are used by the majority of your users, you can list the hashes of the polyfills for those browsers:
89+
</p>
90+
91+
<sample src="polyfill-sri.html" />
92+
93+
</example>
94+
95+
<references>
96+
<li>Sansec: <a href="https://sansec.io/research/polyfill-supply-chain-attack">Polyfill supply chain attack hits 100K+ sites</a></li>
97+
<li>Cloudflare: <a href="https://cdnjs.cloudflare.com/polyfill">Upgrade the web. Automatically. Delivers only the polyfills required by the user's web browser.</a></li>
98+
<li>Fastly: <a href="https://community.fastly.com/t/new-options-for-polyfill-io-users/2540">New options for Polyfill.io users</a></li>
99+
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/Polyfill_(programming)">Polyfill (programming)</a></li>
100+
<li>MDN Web Docs: <a href="https://developer.mozilla.org/en-US/docs/Web/Security/Subresource_Integrity">Subresource Integrity</a></li>
101+
</references>
102+
</qhelp>

0 commit comments

Comments
 (0)