@@ -128,23 +128,44 @@ private string getACallBasedTokenFeatureComponent(
128
128
129
129
/** This module provides functionality for getting the function body feature associated with a particular entity. */
130
130
module FunctionBodies {
131
+ string getTokenizedAstNode ( ASTNode node ) {
132
+ // NB: Unary and binary operator expressions e.g. -a, a + b and compound
133
+ // assignments e.g. a += b can be identified by the expression type.
134
+ result = node .( Identifier ) .getName ( )
135
+ or
136
+ // Computed property accesses for which we can predetermine the property being accessed.
137
+ // NB: May alias with operators e.g. could have '+' as a property name.
138
+ result = node .( IndexExpr ) .getPropertyName ( )
139
+ or
140
+ // We use `getRawValue` to give us distinct representations for `0xa`, `0xA`, and `10`.
141
+ result = node .( NumberLiteral ) .getRawValue ( )
142
+ or
143
+ // We use `getValue` rather than `getRawValue` so we assign `"a"` and `'a'` the same representation.
144
+ not node instanceof NumberLiteral and
145
+ result = node .( Literal ) .getValue ( )
146
+ or
147
+ result = node .( TemplateElement ) .getRawValue ( )
148
+ }
149
+
131
150
/** Holds if `location` is the location of an AST node within the entity `entity` and `token` is a node attribute associated with that AST node. */
132
151
private predicate bodyTokens ( DatabaseFeatures:: Entity entity , Location location , string token ) {
133
152
// Performance optimization: Restrict the set of entities to those containing an endpoint to featurize.
134
153
entity =
135
154
getRepresentativeEntityForEndpoint ( any ( FeaturizationConfig cfg ) .getAnEndpointToFeaturize ( ) ) and
136
- // Performance optimization: If a function has more than 256 body tokens , then featurize it as
137
- // absent. This approximates the behavior of the classifer on non-generic body features where
138
- // large body features are replaced by the absent token.
155
+ // Performance optimization: If a function has more than 256 body subtokens , then featurize it as absent. This
156
+ // approximates the behavior of the classifer on non-generic body features where large body
157
+ // features are replaced by the absent token.
139
158
//
140
159
// We count nodes instead of tokens because tokens are often not unique.
141
- strictcount ( DatabaseFeatures:: AstNode node |
142
- DatabaseFeatures:: astNodes ( entity , _, _, node , _) and
143
- exists ( string t | DatabaseFeatures:: nodeAttributes ( node , t ) )
160
+ strictcount ( ASTNode node |
161
+ node .getParent * ( ) = entity .getDefinedFunction ( ) and
162
+ not node = entity .getDefinedFunction ( ) .getIdentifier ( ) and
163
+ exists ( getTokenizedAstNode ( node ) )
144
164
) <= 256 and
145
- exists ( DatabaseFeatures:: AstNode node |
146
- DatabaseFeatures:: astNodes ( entity , _, _, node , _) and
147
- token = unique( string t | DatabaseFeatures:: nodeAttributes ( node , t ) ) and
165
+ exists ( ASTNode node |
166
+ node .getParent * ( ) = entity .getDefinedFunction ( ) and
167
+ not node = entity .getDefinedFunction ( ) .getIdentifier ( ) and
168
+ token = getTokenizedAstNode ( node ) and
148
169
location = node .getLocation ( )
149
170
)
150
171
}
0 commit comments