@@ -8,57 +8,74 @@ import javascript
8
8
import CodeToFeatures
9
9
private import EndpointScoring
10
10
11
+ /**
12
+ * A configuration that defines which endpoints should be featurized.
13
+ *
14
+ * This is used as a performance optimization to ensure that we only featurize the endpoints we need
15
+ * to featurize.
16
+ */
17
+ abstract class FeaturizationConfig extends string {
18
+ bindingset [ this ]
19
+ FeaturizationConfig ( ) { any ( ) }
20
+
21
+ abstract DataFlow:: Node getAnEndpointToFeaturize ( ) ;
22
+ }
23
+
11
24
/**
12
25
* Gets the value of the token-based feature named `featureName` for the endpoint `endpoint`.
13
26
*
14
27
* This is a single string containing a space-separated list of tokens.
15
28
*/
16
29
private string getTokenFeature ( DataFlow:: Node endpoint , string featureName ) {
17
- // Features for endpoints that are contained within a function.
18
- exists ( DatabaseFeatures:: Entity entity | entity = getRepresentativeEntityForEndpoint ( endpoint ) |
19
- // The name of the function that encloses the endpoint.
20
- featureName = "enclosingFunctionName" and result = entity .getName ( )
21
- or
22
- // A feature containing natural language tokens from the function that encloses the endpoint in
23
- // the order that they appear in the source code.
24
- featureName = "enclosingFunctionBody" and
25
- result = unique( string x | x = FunctionBodies:: getBodyTokenFeatureForEntity ( entity ) )
26
- )
27
- or
28
- result =
29
- strictconcat ( DataFlow:: CallNode call , string component |
30
- component = getACallBasedTokenFeatureComponent ( endpoint , call , featureName )
31
- |
32
- component , " "
30
+ // Performance optimization: Restrict feature extraction to endpoints we've explicitly asked to featurize.
31
+ endpoint = any ( FeaturizationConfig cfg ) .getAnEndpointToFeaturize ( ) and
32
+ (
33
+ // Features for endpoints that are contained within a function.
34
+ exists ( DatabaseFeatures:: Entity entity | entity = getRepresentativeEntityForEndpoint ( endpoint ) |
35
+ // The name of the function that encloses the endpoint.
36
+ featureName = "enclosingFunctionName" and result = entity .getName ( )
37
+ or
38
+ // A feature containing natural language tokens from the function that encloses the endpoint in
39
+ // the order that they appear in the source code.
40
+ featureName = "enclosingFunctionBody" and
41
+ result = unique( string x | x = FunctionBodies:: getBodyTokenFeatureForEntity ( entity ) )
33
42
)
34
- or
35
- // The access path of the function being called, both with and without structural info, if the
36
- // function being called originates from an external API. For example, the endpoint here:
37
- //
38
- // ```js
39
- // const mongoose = require('mongoose'),
40
- // User = mongoose.model('User', null);
41
- // User.findOne(ENDPOINT);
42
- // ```
43
- //
44
- // would have a callee access path with structural info of
45
- // `mongoose member model instanceorreturn member findOne instanceorreturn`, and a callee access
46
- // path without structural info of `mongoose model findOne`.
47
- //
48
- // These features indicate that the callee comes from (reading the access path backwards) an
49
- // instance of the `findOne` member of an instance of the `model` member of the `mongoose`
50
- // external library.
51
- exists ( AccessPaths:: Boolean includeStructuralInfo |
52
- featureName =
53
- "calleeAccessPath" +
54
- any ( string x | if includeStructuralInfo = true then x = "WithStructuralInfo" else x = "" ) and
43
+ or
55
44
result =
56
- concat ( API:: Node node , string accessPath |
57
- node .getInducingNode ( ) .( DataFlow:: CallNode ) .getAnArgument ( ) = endpoint and
58
- AccessPaths:: accessPaths ( node , includeStructuralInfo , accessPath , _)
45
+ strictconcat ( DataFlow:: CallNode call , string component |
46
+ component = getACallBasedTokenFeatureComponent ( endpoint , call , featureName )
59
47
|
60
- accessPath , " "
48
+ component , " "
61
49
)
50
+ or
51
+ // The access path of the function being called, both with and without structural info, if the
52
+ // function being called originates from an external API. For example, the endpoint here:
53
+ //
54
+ // ```js
55
+ // const mongoose = require('mongoose'),
56
+ // User = mongoose.model('User', null);
57
+ // User.findOne(ENDPOINT);
58
+ // ```
59
+ //
60
+ // would have a callee access path with structural info of
61
+ // `mongoose member model instanceorreturn member findOne instanceorreturn`, and a callee access
62
+ // path without structural info of `mongoose model findOne`.
63
+ //
64
+ // These features indicate that the callee comes from (reading the access path backwards) an
65
+ // instance of the `findOne` member of an instance of the `model` member of the `mongoose`
66
+ // external library.
67
+ exists ( AccessPaths:: Boolean includeStructuralInfo |
68
+ featureName =
69
+ "calleeAccessPath" +
70
+ any ( string x | if includeStructuralInfo = true then x = "WithStructuralInfo" else x = "" ) and
71
+ result =
72
+ concat ( API:: Node node , string accessPath |
73
+ node .getInducingNode ( ) .( DataFlow:: CallNode ) .getAnArgument ( ) = endpoint and
74
+ AccessPaths:: accessPaths ( node , includeStructuralInfo , accessPath , _)
75
+ |
76
+ accessPath , " "
77
+ )
78
+ )
62
79
)
63
80
}
64
81
@@ -77,6 +94,8 @@ private string getTokenFeature(DataFlow::Node endpoint, string featureName) {
77
94
private string getACallBasedTokenFeatureComponent (
78
95
DataFlow:: Node endpoint , DataFlow:: CallNode call , string featureName
79
96
) {
97
+ // Performance optimization: Restrict feature extraction to endpoints we've explicitly asked to featurize.
98
+ endpoint = any ( FeaturizationConfig cfg ) .getAnEndpointToFeaturize ( ) and
80
99
// Features for endpoints that are an argument to a function call.
81
100
endpoint = call .getAnArgument ( ) and
82
101
(
@@ -111,6 +130,9 @@ private string getACallBasedTokenFeatureComponent(
111
130
module FunctionBodies {
112
131
/** Holds if `location` is the location of an AST node within the entity `entity` and `token` is a node attribute associated with that AST node. */
113
132
private predicate bodyTokens ( DatabaseFeatures:: Entity entity , Location location , string token ) {
133
+ // Performance optimization: Restrict the set of entities to those containing an endpoint to featurize.
134
+ entity =
135
+ getRepresentativeEntityForEndpoint ( any ( FeaturizationConfig cfg ) .getAnEndpointToFeaturize ( ) ) and
114
136
exists ( DatabaseFeatures:: AstNode node |
115
137
DatabaseFeatures:: astNodes ( entity , _, _, node , _) and
116
138
token = unique( string t | DatabaseFeatures:: nodeAttributes ( node , t ) ) and
@@ -269,21 +291,14 @@ private string getASupportedFeatureName() {
269
291
]
270
292
}
271
293
272
- /** A configuration that defines which endpoints should be featurized. */
273
- abstract class FeaturizationConfig extends string {
274
- bindingset [ this ]
275
- FeaturizationConfig ( ) { any ( ) }
276
-
277
- abstract DataFlow:: Node getAnEndpointToFeaturize ( ) ;
278
- }
279
-
280
294
/**
281
295
* Generic token-based features for ATM.
282
296
*
283
297
* This predicate holds if the generic token-based feature named `featureName` has the value
284
298
* `featureValue` for the endpoint `endpoint`.
285
299
*/
286
300
predicate tokenFeatures ( DataFlow:: Node endpoint , string featureName , string featureValue ) {
301
+ // Performance optimization: Restrict feature extraction to endpoints we've explicitly asked to featurize.
287
302
endpoint = any ( FeaturizationConfig cfg ) .getAnEndpointToFeaturize ( ) and
288
303
(
289
304
if strictcount ( getTokenFeature ( endpoint , featureName ) ) = 1
0 commit comments