Skip to content

Commit 6c3daf4

Browse files
authored
Merge pull request github#7785 from github/z80coder/impose-length-restriction
Restrict AST nodes according to string length
2 parents bb1e89d + be5e8da commit 6c3daf4

File tree

1 file changed

+15
-0
lines changed

1 file changed

+15
-0
lines changed

javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/FunctionBodyFeatures.qll

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,18 @@ ASTNode getAnASTNodeWithAFeature(Function f) {
127127
result = getAnASTNodeToFeaturize(f)
128128
}
129129

130+
/** Returns the number of source-code characters in a function. */
131+
int getNumCharsInFunction(Function f) {
132+
result =
133+
strictsum(ASTNode node | node = getAnASTNodeWithAFeature(f) | getTokenizedAstNode(node).length())
134+
}
135+
136+
/**
137+
* The maximum number of characters a feature can be.
138+
* The evaluator string limit is 5395415 characters. We choose a limit lower than this.
139+
*/
140+
private int getMaxChars() { result = 1000000 }
141+
130142
/**
131143
* Returns a featurized representation of the function that can be used to populate the
132144
* `enclosingFunctionBody` feature for an endpoint.
@@ -141,6 +153,9 @@ string getBodyTokensFeature(Function function) {
141153
node = getAnASTNodeToFeaturize(function) and
142154
exists(getTokenizedAstNode(node))
143155
) <= 256 and
156+
// Performance optimization: If a function has more than getMaxChars() characters in its body subtokens,
157+
// then featurize it as absent.
158+
getNumCharsInFunction(function) <= getMaxChars() and
144159
result =
145160
strictconcat(Location l, string token |
146161
// The use of a nested exists here allows us to avoid duplicates due to two AST nodes in the

0 commit comments

Comments
 (0)