Skip to content

Commit 016727d

Browse files
committed
JS: Fix occasional duplicate body tokens
0e31439 introduces some occasional duplicate tokens due to duplicate AST node attributes. The long-term fix is to update `CodeToFeatures.qll`, but for the short-term, we update the concatenation to concatenate unique (location, token) pairs.
1 parent f68a40f commit 016727d

File tree

1 file changed

+19
-20
lines changed
  • javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling

1 file changed

+19
-20
lines changed

javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -109,12 +109,13 @@ private string getACallBasedTokenFeatureComponent(
109109

110110
/** This module provides functionality for getting the function body feature associated with a particular entity. */
111111
module FunctionBodies {
112-
/** Holds if `node` is an AST node within the entity `entity` and `token` is a node attribute associated with `node`. */
113-
private predicate bodyTokens(
114-
DatabaseFeatures::Entity entity, DatabaseFeatures::AstNode node, string token
115-
) {
116-
DatabaseFeatures::astNodes(entity, _, _, node, _) and
117-
token = unique(string t | DatabaseFeatures::nodeAttributes(node, t))
112+
/** Holds if `location` is the location of an AST node within the entity `entity` and `token` is a node attribute associated with that AST node. */
113+
private predicate bodyTokens(DatabaseFeatures::Entity entity, Location location, string token) {
114+
exists(DatabaseFeatures::AstNode node |
115+
DatabaseFeatures::astNodes(entity, _, _, node, _) and
116+
token = unique(string t | DatabaseFeatures::nodeAttributes(node, t)) and
117+
location = node.getLocation()
118+
)
118119
}
119120

120121
/**
@@ -126,20 +127,18 @@ module FunctionBodies {
126127
// If a function has more than 256 body subtokens, then featurize it as absent. This
127128
// approximates the behavior of the classifer on non-generic body features where large body
128129
// features are replaced by the absent token.
129-
if
130-
strictcount(DatabaseFeatures::AstNode node, string token | bodyTokens(entity, node, token)) >
131-
256
132-
then result = ""
133-
else
134-
result =
135-
strictconcat(DatabaseFeatures::AstNode node, string token, Location l |
136-
bodyTokens(entity, node, token) and l = node.getLocation()
137-
|
138-
token, " "
139-
order by
140-
l.getFile().getAbsolutePath(), l.getStartLine(), l.getStartColumn(), l.getEndLine(),
141-
l.getEndColumn(), token
142-
)
130+
//
131+
// We count locations instead of tokens because tokens are often not unique.
132+
strictcount(Location l | bodyTokens(entity, l, _)) <= 256 and
133+
result =
134+
strictconcat(string token, Location l |
135+
bodyTokens(entity, l, token)
136+
|
137+
token, " "
138+
order by
139+
l.getFile().getAbsolutePath(), l.getStartLine(), l.getStartColumn(), l.getEndLine(),
140+
l.getEndColumn(), token
141+
)
143142
}
144143
}
145144

0 commit comments

Comments
 (0)