Skip to content

Commit 372f099

Browse files
authored
Merge pull request #7323 from adityasharad/atm/perf-debugging-std-lib
JS: Performance improvements to libraries using regex matching
2 parents c0b61d7 + 1857de1 commit 372f099

File tree

3 files changed

+61
-27
lines changed

3 files changed

+61
-27
lines changed

javascript/ql/lib/semmle/javascript/GeneratedCode.qll

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,14 @@ private predicate codeGeneratorMarkerComment(Comment c, string tool) {
4949
*/
5050
private class GenericGeneratedCodeMarkerComment extends GeneratedCodeMarkerComment {
5151
GenericGeneratedCodeMarkerComment() {
52-
exists(string line | line = getLine(_) |
53-
exists(string entity, string was, string automatically |
54-
entity = "code|file|class|interface|art[ei]fact|module|script" and
55-
was = "was|is|has been" and
56-
automatically = "automatically |mechanically |auto[- ]?" and
57-
line.regexpMatch("(?i).*\\b(This|The following) (" + entity + ") (" + was + ") (" +
58-
automatically + ")?gener(e?)ated\\b.*")
59-
)
52+
exists(string entity, string was, string automatically |
53+
entity = "code|file|class|interface|art[ei]fact|module|script" and
54+
was = "was|is|has been" and
55+
automatically = "automatically |mechanically |auto[- ]?" and
56+
// Look for this pattern in each line of the comment.
57+
this.getText()
58+
.regexpMatch("(?im)^.*\\b(This|The following) (" + entity + ") (" + was + ") (" +
59+
automatically + ")?gener(e?)ated\\b.*$")
6060
)
6161
}
6262
}
@@ -66,9 +66,14 @@ private class GenericGeneratedCodeMarkerComment extends GeneratedCodeMarkerComme
6666
*/
6767
private class DontModifyMarkerComment extends GeneratedCodeMarkerComment {
6868
DontModifyMarkerComment() {
69-
exists(string line | line = getLine(_) |
70-
line.regexpMatch("(?i).*\\bGenerated by\\b.*\\bDo not edit\\b.*") or
71-
line.regexpMatch("(?i).*\\bAny modifications to this file will be lost\\b.*")
69+
exists(string pattern |
70+
// Look for these patterns in each line of the comment.
71+
this.getText().regexpMatch(pattern) and
72+
pattern =
73+
[
74+
"(?im)^.*\\bGenerated by\\b.*\\bDo not edit\\b.*$",
75+
"(?im)^.*\\bAny modifications to this file will be lost\\b.*$"
76+
]
7277
)
7378
}
7479
}

javascript/ql/lib/semmle/javascript/dependencies/FrameworkLibraries.qll

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -299,12 +299,15 @@ private class JQuery extends FrameworkLibraryWithGenericURL {
299299
private predicate jQueryMarkerComment(Comment c, TopLevel tl, string version) {
300300
tl = c.getTopLevel() and
301301
exists(string txt | txt = c.getText() |
302-
// more recent versions use this format
302+
// More recent versions use this format:
303+
// "(?s).*jQuery (?:JavaScript Library )?v(" + versionRegex() + ").*",
304+
// Earlier versions used this format:
305+
// "(?s).*jQuery (" + versionRegex() + ") - New Wave Javascript.*"
306+
// For efficiency, construct a single regex that matches both,
307+
// at the cost of being slightly more permissive.
303308
version =
304-
txt.regexpCapture("(?s).*jQuery (?:JavaScript Library )?v(" + versionRegex() + ").*", 1)
305-
or
306-
// earlier versions used this format
307-
version = txt.regexpCapture("(?s).*jQuery (" + versionRegex() + ") - New Wave Javascript.*", 1)
309+
txt.regexpCapture("(?s).*jQuery (?:JavaScript Library )?v?(" + versionRegex() +
310+
")(?: - New Wave Javascript)?.*", 1)
308311
or
309312
// 1.0.0 and 1.0.1 have the same marker comment; we call them both "1.0"
310313
txt.matches("%jQuery - New Wave Javascript%") and version = "1.0"

javascript/ql/lib/semmle/javascript/security/SensitiveActions.qll

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -81,22 +81,48 @@ abstract class SensitiveVariableAccess extends SensitiveExpr {
8181
/** A write to a location that might contain sensitive data. */
8282
abstract class SensitiveWrite extends DataFlow::Node { }
8383

84+
/**
85+
* Holds if `node` is a write to a variable or property named `name`.
86+
*
87+
* Helper predicate factored out for performance,
88+
* to filter `name` as much as possible before using it in
89+
* regex matching.
90+
*/
91+
pragma[nomagic]
92+
private predicate writesProperty(DataFlow::Node node, string name) {
93+
exists(DataFlow::PropWrite pwn |
94+
pwn.getPropertyName() = name and
95+
pwn.getRhs() = node
96+
)
97+
or
98+
exists(VarDef v | v.getAVariable().getName() = name |
99+
if exists(v.getSource())
100+
then v.getSource() = node.asExpr()
101+
else node = DataFlow::ssaDefinitionNode(SSA::definition(v))
102+
)
103+
}
104+
84105
/** A write to a variable or property that might contain sensitive data. */
85106
private class BasicSensitiveWrite extends SensitiveWrite {
86107
SensitiveDataClassification classification;
87108

88109
BasicSensitiveWrite() {
89-
exists(string name | nameIndicatesSensitiveData(name, classification) |
90-
exists(DataFlow::PropWrite pwn |
91-
pwn.getPropertyName() = name and
92-
pwn.getRhs() = this
93-
)
94-
or
95-
exists(VarDef v | v.getAVariable().getName() = name |
96-
if exists(v.getSource())
97-
then v.getSource() = this.asExpr()
98-
else this = DataFlow::ssaDefinitionNode(SSA::definition(v))
99-
)
110+
exists(string name |
111+
/*
112+
* PERFORMANCE OPTIMISATION:
113+
* `nameIndicatesSensitiveData` performs a `regexpMatch` on `name`.
114+
* To carry out a regex match, we must first compute the Cartesian product
115+
* of all possible `name`s and regexes, then match.
116+
* To keep this product as small as possible,
117+
* we want to filter `name` as much as possible before the product.
118+
*
119+
* Do this by factoring out a helper predicate containing the filtering
120+
* logic that restricts `name`. This helper predicate will get picked first
121+
* in the join order, since it is the only call here that binds `name`.
122+
*/
123+
124+
writesProperty(this, name) and
125+
nameIndicatesSensitiveData(name, classification)
100126
)
101127
}
102128

0 commit comments

Comments
 (0)