@@ -192,7 +192,6 @@ private module FunctionNames {
192
192
193
193
/** Get a name of a supported generic token-based feature. */
194
194
string getASupportedFeatureName ( ) {
195
- // allowlist of vetted features that are permitted in production
196
195
result = any ( EndpointFeature f ) .getName ( )
197
196
}
198
197
@@ -226,18 +225,18 @@ private newtype TEndpointFeature =
226
225
TStringConcatenatedWith ( )
227
226
228
227
/**
229
- * An implementation of an endpoint feature: produces feature names and values for use in ML.
228
+ * An implementation of an endpoint feature: defines feature-name/value tuples for use in ML.
230
229
*/
231
230
abstract class EndpointFeature extends TEndpointFeature {
232
231
/**
233
232
* Gets the name of the feature. Used by the ML model.
234
- * Changes to the name of a feature requires training the model again .
233
+ * Names are coupled to models: changing the name of a feature requires retraining the model.
235
234
*/
236
235
abstract string getName ( ) ;
237
236
238
237
/**
239
238
* Gets the value of the feature. Used by the ML model.
240
- * Changes to the value of a feature requires training the model again .
239
+ * Models are trained based on feature values, so changing the value of a feature requires retraining the model.
241
240
*/
242
241
abstract string getValue ( DataFlow:: Node endpoint ) ;
243
242
@@ -275,7 +274,7 @@ class ReceiverName extends EndpointFeature, TReceiverName {
275
274
276
275
/**
277
276
* The feature for the natural language tokens from the function that encloses the endpoint in
278
- * the order that they appear in the source code.
277
+ * the order that they appear in the source code.
279
278
*/
280
279
class EnclosingFunctionBody extends EndpointFeature , TEnclosingFunctionBody {
281
280
override string getName ( ) { result = "enclosingFunctionBody" }
@@ -322,6 +321,8 @@ class FileImports extends EndpointFeature, TFileImports {
322
321
* }
323
322
* ```
324
323
* In the above example, the feature for the marked endpoint has value '(a, b)\n(c, d)'.
324
+ * The line breaks act as a separator between the parameters of different functions but
325
+ * will be treated by tokenization as if they were spaces.
325
326
*/
326
327
class ContextSurroundingFunctionParameters extends EndpointFeature ,
327
328
TContextSurroundingFunctionParameters {
@@ -345,12 +346,14 @@ class ContextSurroundingFunctionParameters extends EndpointFeature,
345
346
}
346
347
347
348
/**
348
- * The feature that gives the name an endpoint is assigned to (if any).
349
+ * The feature that gives the name of any properties an endpoint is assigned to (if any).
349
350
*
350
351
* ### Example
351
352
* ```javascript
352
353
* const div = document.createElement('div');
353
354
* div.innerHTML = endpoint; // feature value is 'innerHTML'
355
+ *
356
+ * foo({x: endpoint}); // feature value is 'x'
354
357
* ```
355
358
*/
356
359
class AssignedToPropName extends EndpointFeature , TAssignedToPropName {
@@ -364,12 +367,13 @@ class AssignedToPropName extends EndpointFeature, TAssignedToPropName {
364
367
}
365
368
366
369
/**
367
- * The feature that shows the text an endpoint is being concatenated with.class
370
+ * The feature that shows the text an endpoint is being concatenated with.
368
371
*
369
372
* ### Example
370
373
*
371
374
* ```javascript
372
- * const x = 'foo' + endpoint + 'bar'; // feature value is `'foo' -endpoint- 'bar'`
375
+ * const x = 'foo' + endpoint + 'bar'; // feature value is `'foo' -endpoint- 'bar'
376
+ * ```
373
377
*/
374
378
class StringConcatenatedWith extends EndpointFeature , TStringConcatenatedWith {
375
379
override string getName ( ) { result = "stringConcatenatedWith" }
@@ -456,8 +460,6 @@ class CalleeImports extends EndpointFeature, TCalleeImports {
456
460
* ...
457
461
* }
458
462
* ```
459
- *
460
- * The feature value for the marked endpoint will be `f(a, b, c)\ng(x, y, z)\nh(u, v)`.
461
463
*/
462
464
class ContextFunctionInterfaces extends EndpointFeature , TContextFunctionInterfaces {
463
465
override string getName ( ) { result = "contextFunctionInterfaces" }
@@ -471,6 +473,10 @@ class ContextFunctionInterfaces extends EndpointFeature, TContextFunctionInterfa
471
473
* Syntactic utilities for feature value computation.
472
474
*/
473
475
private module SyntacticUtilities {
476
+ /**
477
+ * Renders an operand in a string concatenation by surrounding a constant in quotes, and
478
+ * by using `getSimpleAccessPath` for everything else.
479
+ */
474
480
string renderStringConcatOperand ( DataFlow:: Node operand ) {
475
481
if exists ( unique( string v | operand .mayHaveStringValue ( v ) ) )
476
482
then result = "'" + any ( string v | operand .mayHaveStringValue ( v ) ) + "'"
@@ -555,7 +561,7 @@ private module SyntacticUtilities {
555
561
* - direct arguments
556
562
* - properties of (nested) objects that are arguments
557
563
*
558
- * Unknown cases and property names results in `?`.
564
+ * Unknown cases and property names result in `?`.
559
565
*/
560
566
string getSimpleParameterAccessPath ( DataFlow:: Node node ) {
561
567
if exists ( DataFlow:: CallNode call | node = call .getArgument ( _) )
@@ -569,7 +575,7 @@ private module SyntacticUtilities {
569
575
* Supports:
570
576
* - properties of (nested) objects
571
577
*
572
- * Unknown cases and property names results in `?`.
578
+ * Unknown cases and property names result in `?`.
573
579
*/
574
580
string getSimplePropertyAccessPath ( DataFlow:: Node node ) {
575
581
if exists ( ObjectExpr o | o .getAProperty ( ) .getInit ( ) .getUnderlyingValue ( ) = node .asExpr ( ) )
@@ -617,6 +623,17 @@ private module SyntacticUtilities {
617
623
* - invocations
618
624
*
619
625
* Unknown cases and property names results in `?`.
626
+ *
627
+ * # Examples
628
+ *
629
+ * - The node `x.foo` will have the simple access path `x.foo`.
630
+ * - In the following file, the simple access path will be `import("./foo").bar.baz`:
631
+ *
632
+ * ```javascript
633
+ * import * as lib from "./foo"
634
+ * console.log(lib.bar.baz());
635
+ * // ^^^^^^^^^^^ node
636
+ *
620
637
*/
621
638
string getSimpleAccessPath ( DataFlow:: Node node ) {
622
639
exists ( Expr e | e = node .asExpr ( ) .getUnderlyingValue ( ) |
@@ -661,7 +678,16 @@ private module SyntacticUtilities {
661
678
if exists ( i .getImportedPath ( ) .getValue ( ) )
662
679
then
663
680
exists ( string p | p = i .getImportedPath ( ) .getValue ( ) |
664
- if p .matches ( ".%" ) then result = "\"p\"" else result = "!" // hide absolute imports from the ML training
681
+ // Hide absolute imports from ML training data.
682
+ // ============================================
683
+ // There is the hypothesis that exposing absolute imports to the model
684
+ // might lead to bad generalization. For example, the model might learn
685
+ // to strongly associate a specific database client with sinks and no
686
+ // longer be able to flag sinks when data flow is broken.
687
+ // Placing this logic so deeply within the feature extraction code is
688
+ // perhaps a bit of a hack and it is a use case to consider when refactoring
689
+ // endpoint filters/data extraction.
690
+ if p .matches ( ".%" ) then result = "\"p\"" else result = "!"
665
691
)
666
692
else result = getUnknownSymbol ( )
667
693
}
@@ -688,8 +714,6 @@ private module SyntacticUtilities {
688
714
*
689
715
* "Containment" is syntactic, and currently means that the endpoint is an argument to the call, or that the endpoint is a (nested) property value of an argument.
690
716
*
691
- * This feature is intended as a superior version of the many `Callee*` features.
692
- *
693
717
* Examples:
694
718
* ```
695
719
* foo(endpoint); // -> foo
@@ -746,8 +770,6 @@ class InputAccessPathFromCallee extends EndpointFeature, TInputAccessPathFromCal
746
770
*
747
771
* "Containment" is syntactic, and currently means that the endpoint is an argument to the call, or that the endpoint is a (nested) property value of an argument.
748
772
*
749
- * This feature is intended as a superior version of the `ArgumentIndexFeature`.
750
- *
751
773
* Examples:
752
774
* ```
753
775
* foo(endpoint); // -> 0
0 commit comments