Skip to content

Commit 2a3b5fc

Browse files
committed
JS: Performance optimisation for matching framework libraries with their marker comments
The `matchMarkerComment` predicate performs badly on any codebase with a moderately large number of comments, because the current implementation has to first compute the Cartesian product between the set of comments and the set of framework library comment regexes. Instead, match first against a single regex: the union of all framework library comment regexes. This computes a more benign Cartesian product, the same size as the set of comments. See inline comments for more details.
1 parent d0840af commit 2a3b5fc

File tree

1 file changed

+64
-6
lines changed

1 file changed

+64
-6
lines changed

javascript/ql/lib/semmle/javascript/dependencies/FrameworkLibraries.qll

Lines changed: 64 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -92,14 +92,26 @@ abstract class FrameworkLibraryWithMarkerComment extends FrameworkLibrary {
9292

9393
/**
9494
* Gets a regular expression that can be used to identify an instance of
95-
* this framework library.
95+
* this framework library, with `<VERSION>` as a placeholder for version
96+
* numbers.
9697
*
9798
* The first capture group of this regular expression should match
9899
* the version number. Any occurrences of the string `<VERSION>` in
99-
* the regular expression are replaced by `versionRegex()` before
100+
* the regular expression will be replaced by `versionRegex()` before
100101
* matching.
101102
*/
102103
abstract string getAMarkerCommentRegex();
104+
105+
/**
106+
* Gets a regular expression that can be used to identify an instance of
107+
* this framework library.
108+
*
109+
* The first capture group of this regular expression is intended to match
110+
* the version number.
111+
*/
112+
final string getAMarkerCommentRegexWithoutPlaceholders() {
113+
result = this.getAMarkerCommentRegex().replaceAll("<VERSION>", versionRegex())
114+
}
103115
}
104116

105117
/**
@@ -182,18 +194,64 @@ class FrameworkLibraryInstanceWithMarkerComment extends FrameworkLibraryInstance
182194
override predicate info(FrameworkLibrary fl, string v) { matchMarkerComment(_, this, fl, v) }
183195
}
184196

197+
/** A marker comment that indicates a framework library. */
198+
private class MarkerComment extends Comment {
199+
MarkerComment() {
200+
/*
201+
* PERFORMANCE OPTIMISATION:
202+
*
203+
* Each framework library has a regular expression describing its marker comments.
204+
* We want to find the set of marker comments and the framework regexes they match.
205+
* In order to perform such regex matching, CodeQL needs to compute the
206+
* Cartesian product of possible receiver strings and regexes first,
207+
* containing `num_receivers * num_regexes` tuples.
208+
*
209+
* A straightforward attempt to match marker comments with individual
210+
* framework regexes will compute the Cartesian product between
211+
* the set of comments and the set of framework regexes.
212+
* Total: `num_comments * num_frameworks` tuples.
213+
*
214+
* Instead, create a single regex that matches *all* frameworks.
215+
* This is the regex union of the individual framework regexes
216+
* i.e. `(regex_1)|(regex_2)|...|(regex_n)`
217+
* This approach will compute the Cartesian product between
218+
* the set of comments and the singleton set of this union regex.
219+
* Total: `num_comments * 1` tuples.
220+
*
221+
* To identify the individual frameworks and extract the version number from capture groups,
222+
* use the member predicate `matchesFramework` *after* this predicate has been computed.
223+
*/
224+
225+
exists(string unionRegex |
226+
unionRegex =
227+
concat(FrameworkLibraryWithMarkerComment fl |
228+
|
229+
"(" + fl.getAMarkerCommentRegexWithoutPlaceholders() + ")", "|"
230+
)
231+
|
232+
this.getText().regexpMatch(unionRegex)
233+
)
234+
}
235+
236+
/**
237+
* Holds if this marker comment indicates an instance of the framework `fl`
238+
* with version number `version`.
239+
*/
240+
predicate matchesFramework(FrameworkLibraryWithMarkerComment fl, string version) {
241+
fl.getAMarkerCommentRegexWithoutPlaceholders().regexpCapture(this.getText(), 1) = version
242+
}
243+
}
244+
185245
/**
186246
* Holds if comment `c` in toplevel `tl` matches the marker comment of library
187247
* `fl` at `version`.
188248
*/
189249
cached
190250
private predicate matchMarkerComment(
191-
Comment c, TopLevel tl, FrameworkLibraryWithMarkerComment fl, string version
251+
MarkerComment c, TopLevel tl, FrameworkLibraryWithMarkerComment fl, string version
192252
) {
193253
c.getTopLevel() = tl and
194-
exists(string r | r = fl.getAMarkerCommentRegex().replaceAll("<VERSION>", versionRegex()) |
195-
version = c.getText().regexpCapture(r, 1)
196-
)
254+
c.matchesFramework(fl, version)
197255
}
198256

199257
/**

0 commit comments

Comments
 (0)