Skip to content

Commit c186453

Browse files
committed
JS: Push FeaturizationConfig context into more predicates
1 parent 383437c commit c186453

File tree

1 file changed

+64
-49
lines changed
  • javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling

1 file changed

+64
-49
lines changed

javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll

Lines changed: 64 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -8,57 +8,74 @@ import javascript
88
import CodeToFeatures
99
private import EndpointScoring
1010

11+
/**
12+
* A configuration that defines which endpoints should be featurized.
13+
*
14+
* This is used as a performance optimization to ensure that we only featurize the endpoints we need
15+
* to featurize.
16+
*/
17+
abstract class FeaturizationConfig extends string {
18+
bindingset[this]
19+
FeaturizationConfig() { any() }
20+
21+
abstract DataFlow::Node getAnEndpointToFeaturize();
22+
}
23+
1124
/**
1225
* Gets the value of the token-based feature named `featureName` for the endpoint `endpoint`.
1326
*
1427
* This is a single string containing a space-separated list of tokens.
1528
*/
1629
private string getTokenFeature(DataFlow::Node endpoint, string featureName) {
17-
// Features for endpoints that are contained within a function.
18-
exists(DatabaseFeatures::Entity entity | entity = getRepresentativeEntityForEndpoint(endpoint) |
19-
// The name of the function that encloses the endpoint.
20-
featureName = "enclosingFunctionName" and result = entity.getName()
21-
or
22-
// A feature containing natural language tokens from the function that encloses the endpoint in
23-
// the order that they appear in the source code.
24-
featureName = "enclosingFunctionBody" and
25-
result = unique(string x | x = FunctionBodies::getBodyTokenFeatureForEntity(entity))
26-
)
27-
or
28-
result =
29-
strictconcat(DataFlow::CallNode call, string component |
30-
component = getACallBasedTokenFeatureComponent(endpoint, call, featureName)
31-
|
32-
component, " "
30+
// Performance optimization: Restrict feature extraction to endpoints we've explicitly asked to featurize.
31+
endpoint = any(FeaturizationConfig cfg).getAnEndpointToFeaturize() and
32+
(
33+
// Features for endpoints that are contained within a function.
34+
exists(DatabaseFeatures::Entity entity | entity = getRepresentativeEntityForEndpoint(endpoint) |
35+
// The name of the function that encloses the endpoint.
36+
featureName = "enclosingFunctionName" and result = entity.getName()
37+
or
38+
// A feature containing natural language tokens from the function that encloses the endpoint in
39+
// the order that they appear in the source code.
40+
featureName = "enclosingFunctionBody" and
41+
result = unique(string x | x = FunctionBodies::getBodyTokenFeatureForEntity(entity))
3342
)
34-
or
35-
// The access path of the function being called, both with and without structural info, if the
36-
// function being called originates from an external API. For example, the endpoint here:
37-
//
38-
// ```js
39-
// const mongoose = require('mongoose'),
40-
// User = mongoose.model('User', null);
41-
// User.findOne(ENDPOINT);
42-
// ```
43-
//
44-
// would have a callee access path with structural info of
45-
// `mongoose member model instanceorreturn member findOne instanceorreturn`, and a callee access
46-
// path without structural info of `mongoose model findOne`.
47-
//
48-
// These features indicate that the callee comes from (reading the access path backwards) an
49-
// instance of the `findOne` member of an instance of the `model` member of the `mongoose`
50-
// external library.
51-
exists(AccessPaths::Boolean includeStructuralInfo |
52-
featureName =
53-
"calleeAccessPath" +
54-
any(string x | if includeStructuralInfo = true then x = "WithStructuralInfo" else x = "") and
43+
or
5544
result =
56-
concat(API::Node node, string accessPath |
57-
node.getInducingNode().(DataFlow::CallNode).getAnArgument() = endpoint and
58-
AccessPaths::accessPaths(node, includeStructuralInfo, accessPath, _)
45+
strictconcat(DataFlow::CallNode call, string component |
46+
component = getACallBasedTokenFeatureComponent(endpoint, call, featureName)
5947
|
60-
accessPath, " "
48+
component, " "
6149
)
50+
or
51+
// The access path of the function being called, both with and without structural info, if the
52+
// function being called originates from an external API. For example, the endpoint here:
53+
//
54+
// ```js
55+
// const mongoose = require('mongoose'),
56+
// User = mongoose.model('User', null);
57+
// User.findOne(ENDPOINT);
58+
// ```
59+
//
60+
// would have a callee access path with structural info of
61+
// `mongoose member model instanceorreturn member findOne instanceorreturn`, and a callee access
62+
// path without structural info of `mongoose model findOne`.
63+
//
64+
// These features indicate that the callee comes from (reading the access path backwards) an
65+
// instance of the `findOne` member of an instance of the `model` member of the `mongoose`
66+
// external library.
67+
exists(AccessPaths::Boolean includeStructuralInfo |
68+
featureName =
69+
"calleeAccessPath" +
70+
any(string x | if includeStructuralInfo = true then x = "WithStructuralInfo" else x = "") and
71+
result =
72+
concat(API::Node node, string accessPath |
73+
node.getInducingNode().(DataFlow::CallNode).getAnArgument() = endpoint and
74+
AccessPaths::accessPaths(node, includeStructuralInfo, accessPath, _)
75+
|
76+
accessPath, " "
77+
)
78+
)
6279
)
6380
}
6481

@@ -77,6 +94,8 @@ private string getTokenFeature(DataFlow::Node endpoint, string featureName) {
7794
private string getACallBasedTokenFeatureComponent(
7895
DataFlow::Node endpoint, DataFlow::CallNode call, string featureName
7996
) {
97+
// Performance optimization: Restrict feature extraction to endpoints we've explicitly asked to featurize.
98+
endpoint = any(FeaturizationConfig cfg).getAnEndpointToFeaturize() and
8099
// Features for endpoints that are an argument to a function call.
81100
endpoint = call.getAnArgument() and
82101
(
@@ -111,6 +130,9 @@ private string getACallBasedTokenFeatureComponent(
111130
module FunctionBodies {
112131
/** Holds if `location` is the location of an AST node within the entity `entity` and `token` is a node attribute associated with that AST node. */
113132
private predicate bodyTokens(DatabaseFeatures::Entity entity, Location location, string token) {
133+
// Performance optimization: Restrict the set of entities to those containing an endpoint to featurize.
134+
entity =
135+
getRepresentativeEntityForEndpoint(any(FeaturizationConfig cfg).getAnEndpointToFeaturize()) and
114136
exists(DatabaseFeatures::AstNode node |
115137
DatabaseFeatures::astNodes(entity, _, _, node, _) and
116138
token = unique(string t | DatabaseFeatures::nodeAttributes(node, t)) and
@@ -269,21 +291,14 @@ private string getASupportedFeatureName() {
269291
]
270292
}
271293

272-
/** A configuration that defines which endpoints should be featurized. */
273-
abstract class FeaturizationConfig extends string {
274-
bindingset[this]
275-
FeaturizationConfig() { any() }
276-
277-
abstract DataFlow::Node getAnEndpointToFeaturize();
278-
}
279-
280294
/**
281295
* Generic token-based features for ATM.
282296
*
283297
* This predicate holds if the generic token-based feature named `featureName` has the value
284298
* `featureValue` for the endpoint `endpoint`.
285299
*/
286300
predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
301+
// Performance optimization: Restrict feature extraction to endpoints we've explicitly asked to featurize.
287302
endpoint = any(FeaturizationConfig cfg).getAnEndpointToFeaturize() and
288303
(
289304
if strictcount(getTokenFeature(endpoint, featureName)) = 1

0 commit comments

Comments
 (0)