Skip to content

Commit 387829b

Browse files
committed
Extract body tokens from the JS AST, not the CodeToFeatures AST
1 parent 3ef6976 commit 387829b

File tree

1 file changed

+30
-9
lines changed
  • javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling

1 file changed

+30
-9
lines changed

javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -128,23 +128,44 @@ private string getACallBasedTokenFeatureComponent(
128128

129129
/** This module provides functionality for getting the function body feature associated with a particular entity. */
130130
module FunctionBodies {
131+
string getTokenizedAstNode(ASTNode node) {
132+
// NB: Unary and binary operator expressions e.g. -a, a + b and compound
133+
// assignments e.g. a += b can be identified by the expression type.
134+
result = node.(Identifier).getName()
135+
or
136+
// Computed property accesses for which we can predetermine the property being accessed.
137+
// NB: May alias with operators e.g. could have '+' as a property name.
138+
result = node.(IndexExpr).getPropertyName()
139+
or
140+
// We use `getRawValue` to give us distinct representations for `0xa`, `0xA`, and `10`.
141+
result = node.(NumberLiteral).getRawValue()
142+
or
143+
// We use `getValue` rather than `getRawValue` so we assign `"a"` and `'a'` the same representation.
144+
not node instanceof NumberLiteral and
145+
result = node.(Literal).getValue()
146+
or
147+
result = node.(TemplateElement).getRawValue()
148+
}
149+
131150
/** Holds if `location` is the location of an AST node within the entity `entity` and `token` is a node attribute associated with that AST node. */
132151
private predicate bodyTokens(DatabaseFeatures::Entity entity, Location location, string token) {
133152
// Performance optimization: Restrict the set of entities to those containing an endpoint to featurize.
134153
entity =
135154
getRepresentativeEntityForEndpoint(any(FeaturizationConfig cfg).getAnEndpointToFeaturize()) and
136-
// Performance optimization: If a function has more than 256 body tokens, then featurize it as
137-
// absent. This approximates the behavior of the classifer on non-generic body features where
138-
// large body features are replaced by the absent token.
155+
// Performance optimization: If a function has more than 256 body subtokens, then featurize it as absent. This
156+
// approximates the behavior of the classifer on non-generic body features where large body
157+
// features are replaced by the absent token.
139158
//
140159
// We count nodes instead of tokens because tokens are often not unique.
141-
strictcount(DatabaseFeatures::AstNode node |
142-
DatabaseFeatures::astNodes(entity, _, _, node, _) and
143-
exists(string t | DatabaseFeatures::nodeAttributes(node, t))
160+
strictcount(ASTNode node |
161+
node.getParent*() = entity.getDefinedFunction() and
162+
not node = entity.getDefinedFunction().getIdentifier() and
163+
exists(getTokenizedAstNode(node))
144164
) <= 256 and
145-
exists(DatabaseFeatures::AstNode node |
146-
DatabaseFeatures::astNodes(entity, _, _, node, _) and
147-
token = unique(string t | DatabaseFeatures::nodeAttributes(node, t)) and
165+
exists(ASTNode node |
166+
node.getParent*() = entity.getDefinedFunction() and
167+
not node = entity.getDefinedFunction().getIdentifier() and
168+
token = getTokenizedAstNode(node) and
148169
location = node.getLocation()
149170
)
150171
}

0 commit comments

Comments
 (0)