Skip to content

Commit 747c7f6

Browse files
committed
JS/Ruby: share implementation of IncompleteUrlSubstringSanitization query
1 parent 49b4fe7 commit 747c7f6

7 files changed

+143
-115
lines changed

config/identical-files.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -508,5 +508,9 @@
508508
"java/ql/lib/semmle/code/java/dataflow/internal/AccessPathSyntax.qll",
509509
"javascript/ql/lib/semmle/javascript/frameworks/data/internal/AccessPathSyntax.qll",
510510
"ruby/ql/lib/codeql/ruby/dataflow/internal/AccessPathSyntax.qll"
511+
],
512+
"IncompleteUrlSubstringSanitization": [
513+
"javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.qll",
514+
"ruby/ql/src/queries/security/cwe-020/IncompleteUrlSubstringSanitization.qll"
511515
]
512516
}

javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.ql

Lines changed: 1 addition & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -11,60 +11,4 @@
1111
* external/cwe/cwe-020
1212
*/
1313

14-
import javascript
15-
private import semmle.javascript.dataflow.InferredTypes
16-
17-
/**
18-
* A check on a string for whether it contains a given substring, possibly with restrictions on the location of the substring.
19-
*/
20-
class SomeSubstringCheck extends DataFlow::Node {
21-
DataFlow::Node substring;
22-
23-
SomeSubstringCheck() {
24-
this.(StringOps::StartsWith).getSubstring() = substring or
25-
this.(StringOps::Includes).getSubstring() = substring or
26-
this.(StringOps::EndsWith).getSubstring() = substring
27-
}
28-
29-
/**
30-
* Gets the substring.
31-
*/
32-
DataFlow::Node getSubstring() { result = substring }
33-
}
34-
35-
from SomeSubstringCheck check, DataFlow::Node substring, string target, string msg
36-
where
37-
substring = check.getSubstring() and
38-
substring.mayHaveStringValue(target) and
39-
(
40-
// target contains a domain on a common TLD, and perhaps some other URL components
41-
target
42-
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + RegExpPatterns::commonTLD() +
43-
"(:[0-9]+)?/?")
44-
or
45-
// target is a HTTP URL to a domain on any TLD
46-
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/?")
47-
or
48-
// target is a HTTP URL to a domain on any TLD with path elements, and the check is an includes check
49-
check instanceof StringOps::Includes and
50-
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/[a-z0-9/_-]+")
51-
) and
52-
(
53-
if check instanceof StringOps::StartsWith
54-
then msg = "may be followed by an arbitrary host name"
55-
else
56-
if check instanceof StringOps::EndsWith
57-
then msg = "may be preceded by an arbitrary host name"
58-
else msg = "can be anywhere in the URL, and arbitrary hosts may come before or after it"
59-
) and
60-
// whitelist
61-
not (
62-
// the leading dot in a subdomain sequence makes the suffix-check safe (if it is performed on the host of the url)
63-
check instanceof StringOps::EndsWith and
64-
target.regexpMatch("(?i)\\.([a-z0-9-]+)(\\.[a-z0-9-]+)+")
65-
or
66-
// the trailing port or slash makes the prefix-check safe
67-
check instanceof StringOps::StartsWith and
68-
target.regexpMatch(".*(:[0-9]+|/)")
69-
)
70-
select check, "'$@' " + msg + ".", substring, target
14+
import IncompleteUrlSubstringSanitization
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/**
2+
* Incomplete URL substring sanitization
3+
*/
4+
5+
private import IncompleteUrlSubstringSanitizationSpecific
6+
7+
/**
8+
* A check on a string for whether it contains a given substring, possibly with restrictions on the location of the substring.
9+
*/
10+
class SomeSubstringCheck extends DataFlow::Node {
11+
DataFlow::Node substring;
12+
13+
SomeSubstringCheck() {
14+
this.(StringOps::StartsWith).getSubstring() = substring or
15+
this.(StringOps::Includes).getSubstring() = substring or
16+
this.(StringOps::EndsWith).getSubstring() = substring
17+
}
18+
19+
/**
20+
* Gets the substring.
21+
*/
22+
DataFlow::Node getSubstring() { result = substring }
23+
}
24+
25+
/** Holds if there is an incomplete URL substring sanitization problem */
26+
query predicate problems(
27+
SomeSubstringCheck check, string msg, DataFlow::Node substring, string target
28+
) {
29+
substring = check.getSubstring() and
30+
mayHaveStringValue(substring, target) and
31+
(
32+
// target contains a domain on a common TLD, and perhaps some other URL components
33+
target
34+
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + RegExpPatterns::commonTLD() +
35+
"(:[0-9]+)?/?")
36+
or
37+
// target is a HTTP URL to a domain on any TLD
38+
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/?")
39+
or
40+
// target is a HTTP URL to a domain on any TLD with path elements, and the check is an includes check
41+
check instanceof StringOps::Includes and
42+
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/[a-z0-9/_-]+")
43+
) and
44+
(
45+
if check instanceof StringOps::StartsWith
46+
then msg = "'$@' may be followed by an arbitrary host name."
47+
else
48+
if check instanceof StringOps::EndsWith
49+
then msg = "'$@' may be preceded by an arbitrary host name."
50+
else msg = "'$@' can be anywhere in the URL, and arbitrary hosts may come before or after it."
51+
) and
52+
// whitelist
53+
not (
54+
// the leading dot in a subdomain sequence makes the suffix-check safe (if it is performed on the host of the url)
55+
check instanceof StringOps::EndsWith and
56+
target.regexpMatch("(?i)\\.([a-z0-9-]+)(\\.[a-z0-9-]+)+")
57+
or
58+
// the trailing port or slash makes the prefix-check safe
59+
check instanceof StringOps::StartsWith and
60+
target.regexpMatch(".*(:[0-9]+|/)")
61+
)
62+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import javascript
2+
import semmle.javascript.dataflow.InferredTypes
3+
4+
/** Holds if `node` may evaluate to `value` */
5+
predicate mayHaveStringValue(DataFlow::Node node, string value) { node.mayHaveStringValue(value) }

ruby/ql/src/queries/security/cwe-020/IncompleteUrlSubstringSanitization.ql

Lines changed: 1 addition & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -11,61 +11,4 @@
1111
* external/cwe/cwe-020
1212
*/
1313

14-
import codeql.ruby.DataFlow
15-
import codeql.ruby.StringOps
16-
import codeql.ruby.security.performance.RegExpTreeView::RegExpPatterns as RegExpPatterns
17-
18-
/**
19-
* A check on a string for whether it contains a given substring, possibly with restrictions on the location of the substring.
20-
*/
21-
class SomeSubstringCheck extends DataFlow::Node {
22-
DataFlow::Node substring;
23-
24-
SomeSubstringCheck() {
25-
this.(StringOps::StartsWith).getSubstring() = substring or
26-
this.(StringOps::Includes).getSubstring() = substring or
27-
this.(StringOps::EndsWith).getSubstring() = substring
28-
}
29-
30-
/**
31-
* Gets the substring.
32-
*/
33-
DataFlow::Node getSubstring() { result = substring }
34-
}
35-
36-
from SomeSubstringCheck check, DataFlow::Node substring, string target, string msg
37-
where
38-
substring = check.getSubstring() and
39-
substring.asExpr().getExpr().getConstantValue().getString() = target and
40-
(
41-
// target contains a domain on a common TLD, and perhaps some other URL components
42-
target
43-
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + RegExpPatterns::commonTLD() +
44-
"(:[0-9]+)?/?")
45-
or
46-
// target is a HTTP URL to a domain on any TLD
47-
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/?")
48-
or
49-
// target is a HTTP URL to a domain on any TLD with path elements, and the check is an includes check
50-
check instanceof StringOps::Includes and
51-
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/[a-z0-9/_-]+")
52-
) and
53-
(
54-
if check instanceof StringOps::StartsWith
55-
then msg = "may be followed by an arbitrary host name"
56-
else
57-
if check instanceof StringOps::EndsWith
58-
then msg = "may be preceded by an arbitrary host name"
59-
else msg = "can be anywhere in the URL, and arbitrary hosts may come before or after it"
60-
) and
61-
// whitelist
62-
not (
63-
// the leading dot in a subdomain sequence makes the suffix-check safe (if it is performed on the host of the url)
64-
check instanceof StringOps::EndsWith and
65-
target.regexpMatch("(?i)\\.([a-z0-9-]+)(\\.[a-z0-9-]+)+")
66-
or
67-
// the trailing port or slash makes the prefix-check safe
68-
check instanceof StringOps::StartsWith and
69-
target.regexpMatch(".*(:[0-9]+|/)")
70-
)
71-
select check, "'$@' " + msg + ".", substring, target
14+
import IncompleteUrlSubstringSanitization
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/**
2+
* Incomplete URL substring sanitization
3+
*/
4+
5+
private import IncompleteUrlSubstringSanitizationSpecific
6+
7+
/**
8+
* A check on a string for whether it contains a given substring, possibly with restrictions on the location of the substring.
9+
*/
10+
class SomeSubstringCheck extends DataFlow::Node {
11+
DataFlow::Node substring;
12+
13+
SomeSubstringCheck() {
14+
this.(StringOps::StartsWith).getSubstring() = substring or
15+
this.(StringOps::Includes).getSubstring() = substring or
16+
this.(StringOps::EndsWith).getSubstring() = substring
17+
}
18+
19+
/**
20+
* Gets the substring.
21+
*/
22+
DataFlow::Node getSubstring() { result = substring }
23+
}
24+
25+
/** Holds if there is an incomplete URL substring sanitization problem */
26+
query predicate problems(
27+
SomeSubstringCheck check, string msg, DataFlow::Node substring, string target
28+
) {
29+
substring = check.getSubstring() and
30+
mayHaveStringValue(substring, target) and
31+
(
32+
// target contains a domain on a common TLD, and perhaps some other URL components
33+
target
34+
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + RegExpPatterns::commonTLD() +
35+
"(:[0-9]+)?/?")
36+
or
37+
// target is a HTTP URL to a domain on any TLD
38+
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/?")
39+
or
40+
// target is a HTTP URL to a domain on any TLD with path elements, and the check is an includes check
41+
check instanceof StringOps::Includes and
42+
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/[a-z0-9/_-]+")
43+
) and
44+
(
45+
if check instanceof StringOps::StartsWith
46+
then msg = "'$@' may be followed by an arbitrary host name."
47+
else
48+
if check instanceof StringOps::EndsWith
49+
then msg = "'$@' may be preceded by an arbitrary host name."
50+
else msg = "'$@' can be anywhere in the URL, and arbitrary hosts may come before or after it."
51+
) and
52+
// whitelist
53+
not (
54+
// the leading dot in a subdomain sequence makes the suffix-check safe (if it is performed on the host of the url)
55+
check instanceof StringOps::EndsWith and
56+
target.regexpMatch("(?i)\\.([a-z0-9-]+)(\\.[a-z0-9-]+)+")
57+
or
58+
// the trailing port or slash makes the prefix-check safe
59+
check instanceof StringOps::StartsWith and
60+
target.regexpMatch(".*(:[0-9]+|/)")
61+
)
62+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
import codeql.ruby.DataFlow
2+
import codeql.ruby.StringOps
3+
import codeql.ruby.security.performance.RegExpTreeView::RegExpPatterns as RegExpPatterns
4+
5+
/** Holds if `node` may evaluate to `value` */
6+
predicate mayHaveStringValue(DataFlow::Node node, string value) {
7+
node.asExpr().getExpr().getConstantValue().getString() = value
8+
}

0 commit comments

Comments
 (0)