Skip to content

Commit 35e8d6a

Browse files
committed
move getACommonTld into a utility module without parameters
1 parent ba7321a commit 35e8d6a

File tree

5 files changed

+43
-38
lines changed

5 files changed

+43
-38
lines changed

javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.qll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
*/
44

55
private import IncompleteUrlSubstringSanitizationSpecific
6+
private import codeql.regex.HostnameRegexp::Utils
67

78
/**
89
* A check on a string for whether it contains a given substring, possibly with restrictions on the location of the substring.
@@ -30,9 +31,7 @@ query predicate problems(
3031
mayHaveStringValue(substring, target) and
3132
(
3233
// target contains a domain on a common TLD, and perhaps some other URL components
33-
target
34-
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + HostnameRegexp::getACommonTld() +
35-
"(:[0-9]+)?/?")
34+
target.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + getACommonTld() + "(:[0-9]+)?/?")
3635
or
3736
// target is a HTTP URL to a domain on any TLD
3837
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/?")

javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitizationSpecific.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ import semmle.javascript.dataflow.InferredTypes
44
/** Holds if `node` may evaluate to `value` */
55
predicate mayHaveStringValue(DataFlow::Node node, string value) { node.mayHaveStringValue(value) }
66

7-
import semmle.javascript.security.regexp.HostnameRegexp as HostnameRegexp
7+
import codeql.regex.HostnameRegexp::Utils

ruby/ql/src/queries/security/cwe-020/IncompleteUrlSubstringSanitization.qll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
*/
44

55
private import IncompleteUrlSubstringSanitizationSpecific
6+
private import codeql.regex.HostnameRegexp::Utils
67

78
/**
89
* A check on a string for whether it contains a given substring, possibly with restrictions on the location of the substring.
@@ -30,9 +31,7 @@ query predicate problems(
3031
mayHaveStringValue(substring, target) and
3132
(
3233
// target contains a domain on a common TLD, and perhaps some other URL components
33-
target
34-
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + HostnameRegexp::getACommonTld() +
35-
"(:[0-9]+)?/?")
34+
target.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + getACommonTld() + "(:[0-9]+)?/?")
3635
or
3736
// target is a HTTP URL to a domain on any TLD
3837
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/?")

ruby/ql/src/queries/security/cwe-020/IncompleteUrlSubstringSanitizationSpecific.qll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,3 @@ import codeql.ruby.StringOps
55
predicate mayHaveStringValue(DataFlow::Node node, string value) {
66
node.asExpr().getConstantValue().getString() = value
77
}
8-
9-
import codeql.ruby.security.regexp.HostnameRegexp as HostnameRegexp

shared/regex/codeql/regex/HostnameRegexp.qll

Lines changed: 38 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,10 @@ private import RegexTreeView
1111
*/
1212
signature module HostnameRegexpSig<RegexTreeViewSig TreeImpl> {
1313
/** A node in the data-flow graph. */
14-
class DataFlowNode;
14+
class DataFlowNode {
15+
/** Gets a string representation of this node. */
16+
string toString();
17+
}
1518

1619
/** A node in the data-flow graph that represents a regular expression pattern. */
1720
class RegExpPatternSource extends DataFlowNode {
@@ -28,12 +31,26 @@ signature module HostnameRegexpSig<RegexTreeViewSig TreeImpl> {
2831
}
2932
}
3033

34+
/**
35+
* Utility predicates and classes that doesn't depend on any signature.
36+
*/
37+
module Utils {
38+
/**
39+
* Gets a pattern that matches common top-level domain names in lower case.
40+
*/
41+
string getACommonTld() {
42+
// according to ranking by http://google.com/search?q=site:.<<TLD>>
43+
result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
44+
}
45+
}
46+
3147
/**
3248
* Classes and predicates implementing an analysis on regular expressions
3349
* that match URLs and hostname patterns.
3450
*/
3551
module Make<RegexTreeViewSig TreeImpl, HostnameRegexpSig<TreeImpl> Specific> {
3652
private import TreeImpl
53+
import Utils
3754

3855
/**
3956
* Holds if the given constant is unlikely to occur in the origin part of a URL.
@@ -213,30 +230,6 @@ module Make<RegexTreeViewSig TreeImpl, HostnameRegexpSig<TreeImpl> Specific> {
213230
)
214231
}
215232

216-
/**
217-
* Holds if `regexp` is a regular expression that is likely to match a hostname,
218-
* but the pattern is incomplete and may match more hosts than intended.
219-
*/
220-
predicate incompleteHostnameRegExp(
221-
RegExpSequence hostSequence, string message, Specific::DataFlowNode aux, string label
222-
) {
223-
exists(Specific::RegExpPatternSource re, RegExpTerm regexp, string msg, string kind |
224-
regexp = re.getRegExpTerm() and
225-
isIncompleteHostNameRegExpPattern(regexp, hostSequence, msg) and
226-
(
227-
if re.getAParse() != re
228-
then (
229-
kind = "string, which is used as a regular expression $@," and
230-
aux = re.getAParse()
231-
) else (
232-
kind = "regular expression" and aux = re
233-
)
234-
)
235-
|
236-
message = "This " + kind + " " + msg and label = "here"
237-
)
238-
}
239-
240233
/** Holds if `term` is one of the transitive left children of a regexp. */
241234
predicate isLeftArmTerm(RegExpTerm term) {
242235
term.isRootTerm()
@@ -262,10 +255,26 @@ module Make<RegexTreeViewSig TreeImpl, HostnameRegexpSig<TreeImpl> Specific> {
262255
}
263256

264257
/**
265-
* Gets a pattern that matches common top-level domain names in lower case.
258+
* Holds if `regexp` is a regular expression that is likely to match a hostname,
259+
* but the pattern is incomplete and may match more hosts than intended.
266260
*/
267-
string getACommonTld() {
268-
// according to ranking by http://google.com/search?q=site:.<<TLD>>
269-
result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
261+
predicate incompleteHostnameRegExp(
262+
RegExpSequence hostSequence, string message, Specific::DataFlowNode aux, string label
263+
) {
264+
exists(Specific::RegExpPatternSource re, RegExpTerm regexp, string msg, string kind |
265+
regexp = re.getRegExpTerm() and
266+
isIncompleteHostNameRegExpPattern(regexp, hostSequence, msg) and
267+
(
268+
if re.getAParse() != re
269+
then (
270+
kind = "string, which is used as a regular expression $@," and
271+
aux = re.getAParse()
272+
) else (
273+
kind = "regular expression" and aux = re
274+
)
275+
)
276+
|
277+
message = "This " + kind + " " + msg and label = "here"
278+
)
270279
}
271280
}

0 commit comments

Comments
 (0)