Skip to content

Commit 865fb5d

Browse files
committed
Migrate representative entity -> representative function
1 parent 0e5b493 commit 865fb5d

File tree

3 files changed

+54
-98
lines changed

3 files changed

+54
-98
lines changed

javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/BaseScoring.qll

Lines changed: 7 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -15,69 +15,18 @@ external predicate availableMlModels(
1515
ATMConfig getCfg() { any() }
1616

1717
/**
18-
* This module provides functionality that takes an endpoint and provides an entity that encloses that
19-
* endpoint and is suitable for similarity analysis.
18+
* This module provides functionality that takes an endpoint and provides an function that encloses
19+
* that endpoint.
2020
*/
21-
module EndpointToEntity {
21+
module EndpointToFunction {
2222
private import CodeToFeatures
2323

2424
/**
25-
* Get an entity enclosing the endpoint that is suitable for similarity analysis. In general,
26-
* this may associate multiple entities to a single endpoint.
25+
* Get a function containing the endpoint that is suitable for featurization. In general,
26+
* this associates an endpoint to multiple functions, since there may be more than one multiple entities to a single endpoint.
2727
*/
28-
DatabaseFeatures::Entity getAnEntityForEndpoint(DataFlow::Node endpoint) {
29-
DatabaseFeatures::entities(result, _, _, _, _, _, _, _, _) and
30-
result.getDefinedFunction() = endpoint.getContainer().getEnclosingContainer*()
31-
}
32-
}
33-
34-
/**
35-
* This module provides functionality that takes an entity and provides effective endpoints within
36-
* that entity.
37-
*
38-
* We use the following terminology to describe endpoints:
39-
*
40-
* - The *candidate* endpoints are the set of data flow nodes that should be passed to the
41-
* appropriate endpoint filter to produce the set of effective endpoints.
42-
* When we have a model that beats the performance of the baseline, we will likely define the
43-
* candidate endpoints based on the most confident predictions of the model.
44-
* - An *effective* endpoint is a candidate endpoint which passes through the endpoint filter.
45-
* In other words, it is a candidate endpoint for which the `isEffectiveSink` (or
46-
* `isEffectiveSource`) predicate defined in the `ATMConfig` instance in scope holds.
47-
*/
48-
module EntityToEffectiveEndpoint {
49-
private import CodeToFeatures
50-
51-
/**
52-
* Returns endpoint candidates within the specified entities.
53-
*
54-
* The baseline implementation of this is that a candidate endpoint is any data flow node that is
55-
* enclosed within the specified entity.
56-
*/
57-
private DataFlow::Node getABaselineEndpointCandidate(DatabaseFeatures::Entity entity) {
58-
result.getContainer().getEnclosingContainer*() = entity.getDefinedFunction()
59-
}
60-
61-
/**
62-
* Get an effective source enclosed by the specified entity.
63-
*
64-
* N.B. This is _not_ an inverse of `EndpointToEntity::getAnEntityForEndpoint`: the effective
65-
* source may occur in a function defined within the specified entity.
66-
*/
67-
DataFlow::Node getAnEffectiveSource(DatabaseFeatures::Entity entity) {
68-
result = getABaselineEndpointCandidate(entity) and
69-
getCfg().isEffectiveSource(result)
70-
}
71-
72-
/**
73-
* Get an effective sink enclosed by the specified entity.
74-
*
75-
* N.B. This is _not_ an inverse of `EndpointToEntity::getAnEntityForEndpoint`: the effective
76-
* sink may occur in a function defined within the specified entity.
77-
*/
78-
DataFlow::Node getAnEffectiveSink(DatabaseFeatures::Entity entity) {
79-
result = getABaselineEndpointCandidate(entity) and
80-
getCfg().isEffectiveSink(result)
28+
Function getAFunctionForEndpoint(DataFlow::Node endpoint) {
29+
result = endpoint.getContainer().getEnclosingContainer*()
8130
}
8231
}
8332

javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@ private string getTokenFeature(DataFlow::Node endpoint, string featureName) {
3131
endpoint = any(FeaturizationConfig cfg).getAnEndpointToFeaturize() and
3232
(
3333
// Features for endpoints that are contained within a function.
34-
exists(DatabaseFeatures::Entity entity | entity = getRepresentativeEntityForEndpoint(endpoint) |
34+
exists(DatabaseFeatures::Entity entity |
35+
entity.getDefinedFunction() = getRepresentativeFunctionForEndpoint(endpoint)
36+
|
3537
// The name of the function that encloses the endpoint.
3638
featureName = "enclosingFunctionName" and result = entity.getName()
3739
or
@@ -147,24 +149,27 @@ module FunctionBodies {
147149
result = node.(TemplateElement).getRawValue()
148150
}
149151

152+
/** Returns an AST node within the function `f` that we should featurize. */
153+
pragma[inline]
154+
ASTNode getAnASTNodeToFeaturize(Function f) {
155+
result.getParent*() = f and
156+
not result = f.getIdentifier() and
157+
exists(getTokenizedAstNode(result))
158+
}
159+
150160
/** Holds if `location` is the location of an AST node within the entity `entity` and `token` is a node attribute associated with that AST node. */
151161
private predicate bodyTokens(DatabaseFeatures::Entity entity, Location location, string token) {
152162
// Performance optimization: Restrict the set of entities to those containing an endpoint to featurize.
153-
entity =
154-
getRepresentativeEntityForEndpoint(any(FeaturizationConfig cfg).getAnEndpointToFeaturize()) and
163+
entity.getDefinedFunction() =
164+
getRepresentativeFunctionForEndpoint(any(FeaturizationConfig cfg).getAnEndpointToFeaturize()) and
155165
// Performance optimization: If a function has more than 256 body subtokens, then featurize it as absent. This
156166
// approximates the behavior of the classifer on non-generic body features where large body
157167
// features are replaced by the absent token.
158168
//
159169
// We count nodes instead of tokens because tokens are often not unique.
160-
strictcount(ASTNode node |
161-
node.getParent*() = entity.getDefinedFunction() and
162-
not node = entity.getDefinedFunction().getIdentifier() and
163-
exists(getTokenizedAstNode(node))
164-
) <= 256 and
170+
strictcount(getAnASTNodeToFeaturize(entity.getDefinedFunction())) <= 256 and
165171
exists(ASTNode node |
166-
node.getParent*() = entity.getDefinedFunction() and
167-
not node = entity.getDefinedFunction().getIdentifier() and
172+
node = getAnASTNodeToFeaturize(entity.getDefinedFunction()) and
168173
token = getTokenizedAstNode(node) and
169174
location = node.getLocation()
170175
)

javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll

Lines changed: 32 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ private string getACompatibleModelChecksum() {
1515
}
1616

1717
/**
18-
* The maximum number of AST nodes an entity containing an endpoint should have before we should
19-
* choose a smaller entity to represent the endpoint.
18+
* The maximum number of AST nodes an function containing an endpoint should have before we should
19+
* choose a smaller function to represent the endpoint.
2020
*
2121
* This is intended to represent a balance in terms of the amount of context we provide to the
2222
* model: we don't want the function to be too small, because then it doesn't contain very much
@@ -26,54 +26,56 @@ private string getACompatibleModelChecksum() {
2626
private int getMaxNumAstNodes() { result = 1024 }
2727

2828
/**
29-
* Returns the number of AST nodes contained within the specified entity.
29+
* Returns the number of AST nodes contained within the specified function.
3030
*/
31-
private int getNumAstNodesInEntity(DatabaseFeatures::Entity entity) {
32-
// Restrict the values `entity` can take on
33-
entity = EndpointToEntity::getAnEntityForEndpoint(_) and
34-
result =
35-
count(DatabaseFeatures::AstNode astNode | DatabaseFeatures::astNodes(entity, _, _, astNode, _))
31+
private int getNumAstNodesInFunction(Function function) {
32+
// Restrict the values `function` can take on
33+
function = EndpointToFunction::getAFunctionForEndpoint(_) and
34+
result = count(EndpointFeatures::FunctionBodies::getAnASTNodeToFeaturize(function))
3635
}
3736

3837
/**
39-
* Get a single entity to use as the representative entity for the endpoint.
38+
* Get the enclosing function for an endpoint.
39+
*
40+
* This is used to compute the `enclosingFunctionBody` and `enclosingFunctionName` features.
4041
*
41-
* We try to use the largest entity containing the endpoint that's below the AST node limit defined
42-
* in `getMaxNumAstNodes`. In the event of a tie, we use the entity that appears first within the
43-
* source archive.
42+
* We try to use the largest function containing the endpoint that's below the AST node limit
43+
* defined in `getMaxNumAstNodes`. In the event of a tie, we use the function that appears first
44+
* within the source code.
4445
*
45-
* If no entities are smaller than the AST node limit, then we use the smallest entity containing
46+
* If no functions are smaller than the AST node limit, then we use the smallest function containing
4647
* the endpoint.
4748
*/
48-
DatabaseFeatures::Entity getRepresentativeEntityForEndpoint(DataFlow::Node endpoint) {
49-
// Check whether there's an entity containing the endpoint that's smaller than the AST node limit.
49+
Function getRepresentativeFunctionForEndpoint(DataFlow::Node endpoint) {
50+
// Check whether there's a function containing the endpoint that's smaller than the AST node
51+
// limit.
5052
if
51-
getNumAstNodesInEntity(EndpointToEntity::getAnEntityForEndpoint(endpoint)) <=
53+
getNumAstNodesInFunction(EndpointToFunction::getAFunctionForEndpoint(endpoint)) <=
5254
getMaxNumAstNodes()
5355
then
54-
// Use the largest entity smaller than the AST node limit, resolving ties using the entity that
55-
// appears first in the source archive.
56+
// Use the largest function smaller than the AST node limit, resolving ties using the function
57+
// that appears first in the source code.
5658
result =
57-
min(DatabaseFeatures::Entity entity, int numAstNodes, Location l |
58-
entity = EndpointToEntity::getAnEntityForEndpoint(endpoint) and
59-
numAstNodes = getNumAstNodesInEntity(entity) and
59+
min(Function function, int numAstNodes, Location l |
60+
function = EndpointToFunction::getAFunctionForEndpoint(endpoint) and
61+
numAstNodes = getNumAstNodesInFunction(function) and
6062
numAstNodes <= getMaxNumAstNodes() and
61-
l = entity.getLocation()
63+
l = function.getLocation()
6264
|
63-
entity
65+
function
6466
order by
6567
numAstNodes desc, l.getStartLine(), l.getStartColumn(), l.getEndLine(), l.getEndColumn()
6668
)
6769
else
68-
// Use the smallest entity, resolving ties using the entity that
69-
// appears first in the source archive.
70+
// Use the smallest function, resolving ties using the function that appears first in the source
71+
// code.
7072
result =
71-
min(DatabaseFeatures::Entity entity, int numAstNodes, Location l |
72-
entity = EndpointToEntity::getAnEntityForEndpoint(endpoint) and
73-
numAstNodes = getNumAstNodesInEntity(entity) and
74-
l = entity.getLocation()
73+
min(Function function, int numAstNodes, Location l |
74+
function = EndpointToFunction::getAFunctionForEndpoint(endpoint) and
75+
numAstNodes = getNumAstNodesInFunction(function) and
76+
l = function.getLocation()
7577
|
76-
entity
78+
function
7779
order by
7880
numAstNodes, l.getStartLine(), l.getStartColumn(), l.getEndLine(), l.getEndColumn()
7981
)

0 commit comments

Comments
 (0)