Skip to content

Commit caf3959

Browse files
author
Stephan Brandauer
committed
better documentation
1 parent b9cb60c commit caf3959

File tree

2 files changed

+40
-18
lines changed
  • javascript/ql/experimental/adaptivethreatmodeling

2 files changed

+40
-18
lines changed

javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,6 @@ private module FunctionNames {
192192

193193
/** Get a name of a supported generic token-based feature. */
194194
string getASupportedFeatureName() {
195-
// allowlist of vetted features that are permitted in production
196195
result = any(EndpointFeature f).getName()
197196
}
198197

@@ -226,18 +225,18 @@ private newtype TEndpointFeature =
226225
TStringConcatenatedWith()
227226

228227
/**
229-
* An implementation of an endpoint feature: produces feature names and values for use in ML.
228+
* An implementation of an endpoint feature: defines feature-name/value tuples for use in ML.
230229
*/
231230
abstract class EndpointFeature extends TEndpointFeature {
232231
/**
233232
* Gets the name of the feature. Used by the ML model.
234-
* Changes to the name of a feature requires training the model again.
233+
* Names are coupled to models: changing the name of a feature requires retraining the model.
235234
*/
236235
abstract string getName();
237236

238237
/**
239238
* Gets the value of the feature. Used by the ML model.
240-
* Changes to the value of a feature requires training the model again.
239+
* Models are trained based on feature values, so changing the value of a feature requires retraining the model.
241240
*/
242241
abstract string getValue(DataFlow::Node endpoint);
243242

@@ -275,7 +274,7 @@ class ReceiverName extends EndpointFeature, TReceiverName {
275274

276275
/**
277276
* The feature for the natural language tokens from the function that encloses the endpoint in
278-
* the order that they appear in the source code.
277+
* the order that they appear in the source code.
279278
*/
280279
class EnclosingFunctionBody extends EndpointFeature, TEnclosingFunctionBody {
281280
override string getName() { result = "enclosingFunctionBody" }
@@ -322,6 +321,8 @@ class FileImports extends EndpointFeature, TFileImports {
322321
* }
323322
* ```
324323
* In the above example, the feature for the marked endpoint has value '(a, b)\n(c, d)'.
324+
* The line breaks act as a separator between the parameters of different functions but
325+
* will be treated by tokenization as if they were spaces.
325326
*/
326327
class ContextSurroundingFunctionParameters extends EndpointFeature,
327328
TContextSurroundingFunctionParameters {
@@ -345,12 +346,14 @@ class ContextSurroundingFunctionParameters extends EndpointFeature,
345346
}
346347

347348
/**
348-
* The feature that gives the name an endpoint is assigned to (if any).
349+
* The feature that gives the name of any properties an endpoint is assigned to (if any).
349350
*
350351
* ### Example
351352
* ```javascript
352353
* const div = document.createElement('div');
353354
* div.innerHTML = endpoint; // feature value is 'innerHTML'
355+
*
356+
* foo({x: endpoint}); // feature value is 'x'
354357
* ```
355358
*/
356359
class AssignedToPropName extends EndpointFeature, TAssignedToPropName {
@@ -364,12 +367,13 @@ class AssignedToPropName extends EndpointFeature, TAssignedToPropName {
364367
}
365368

366369
/**
367-
* The feature that shows the text an endpoint is being concatenated with.class
370+
* The feature that shows the text an endpoint is being concatenated with.
368371
*
369372
* ### Example
370373
*
371374
* ```javascript
372-
* const x = 'foo' + endpoint + 'bar'; // feature value is `'foo' -endpoint- 'bar'`
375+
* const x = 'foo' + endpoint + 'bar'; // feature value is `'foo' -endpoint- 'bar'
376+
* ```
373377
*/
374378
class StringConcatenatedWith extends EndpointFeature, TStringConcatenatedWith {
375379
override string getName() { result = "stringConcatenatedWith" }
@@ -456,8 +460,6 @@ class CalleeImports extends EndpointFeature, TCalleeImports {
456460
* ...
457461
* }
458462
* ```
459-
*
460-
* The feature value for the marked endpoint will be `f(a, b, c)\ng(x, y, z)\nh(u, v)`.
461463
*/
462464
class ContextFunctionInterfaces extends EndpointFeature, TContextFunctionInterfaces {
463465
override string getName() { result = "contextFunctionInterfaces" }
@@ -471,6 +473,10 @@ class ContextFunctionInterfaces extends EndpointFeature, TContextFunctionInterfa
471473
* Syntactic utilities for feature value computation.
472474
*/
473475
private module SyntacticUtilities {
476+
/**
477+
* Renders an operand in a string concatenation by surrounding a constant in quotes, and
478+
* by using `getSimpleAccessPath` for everything else.
479+
*/
474480
string renderStringConcatOperand(DataFlow::Node operand) {
475481
if exists(unique(string v | operand.mayHaveStringValue(v)))
476482
then result = "'" + any(string v | operand.mayHaveStringValue(v)) + "'"
@@ -555,7 +561,7 @@ private module SyntacticUtilities {
555561
* - direct arguments
556562
* - properties of (nested) objects that are arguments
557563
*
558-
* Unknown cases and property names results in `?`.
564+
* Unknown cases and property names result in `?`.
559565
*/
560566
string getSimpleParameterAccessPath(DataFlow::Node node) {
561567
if exists(DataFlow::CallNode call | node = call.getArgument(_))
@@ -569,7 +575,7 @@ private module SyntacticUtilities {
569575
* Supports:
570576
* - properties of (nested) objects
571577
*
572-
* Unknown cases and property names results in `?`.
578+
* Unknown cases and property names result in `?`.
573579
*/
574580
string getSimplePropertyAccessPath(DataFlow::Node node) {
575581
if exists(ObjectExpr o | o.getAProperty().getInit().getUnderlyingValue() = node.asExpr())
@@ -617,6 +623,17 @@ private module SyntacticUtilities {
617623
* - invocations
618624
*
619625
* Unknown cases and property names results in `?`.
626+
*
627+
* # Examples
628+
*
629+
* - The node `x.foo` will have the simple access path `x.foo`.
630+
* - In the following file, the simple access path will be `import("./foo").bar.baz`:
631+
*
632+
* ```javascript
633+
* import * as lib from "./foo"
634+
* console.log(lib.bar.baz());
635+
* // ^^^^^^^^^^^ node
636+
*
620637
*/
621638
string getSimpleAccessPath(DataFlow::Node node) {
622639
exists(Expr e | e = node.asExpr().getUnderlyingValue() |
@@ -661,7 +678,16 @@ private module SyntacticUtilities {
661678
if exists(i.getImportedPath().getValue())
662679
then
663680
exists(string p | p = i.getImportedPath().getValue() |
664-
if p.matches(".%") then result = "\"p\"" else result = "!" // hide absolute imports from the ML training
681+
// Hide absolute imports from ML training data.
682+
// ============================================
683+
// There is the hypothesis that exposing absolute imports to the model
684+
// might lead to bad generalization. For example, the model might learn
685+
// to strongly associate a specific database client with sinks and no
686+
// longer be able to flag sinks when data flow is broken.
687+
// Placing this logic so deeply within the feature extraction code is
688+
// perhaps a bit of a hack and it is a use case to consider when refactoring
689+
// endpoint filters/data extraction.
690+
if p.matches(".%") then result = "\"p\"" else result = "!"
665691
)
666692
else result = getUnknownSymbol()
667693
}
@@ -688,8 +714,6 @@ private module SyntacticUtilities {
688714
*
689715
* "Containment" is syntactic, and currently means that the endpoint is an argument to the call, or that the endpoint is a (nested) property value of an argument.
690716
*
691-
* This feature is intended as a superior version of the many `Callee*` features.
692-
*
693717
* Examples:
694718
* ```
695719
* foo(endpoint); // -> foo
@@ -746,8 +770,6 @@ class InputAccessPathFromCallee extends EndpointFeature, TInputAccessPathFromCal
746770
*
747771
* "Containment" is syntactic, and currently means that the endpoint is an argument to the call, or that the endpoint is a (nested) property value of an argument.
748772
*
749-
* This feature is intended as a superior version of the `ArgumentIndexFeature`.
750-
*
751773
* Examples:
752774
* ```
753775
* foo(endpoint); // -> 0
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
<div class="form-group">
22
<input (change)="restoreBackup($event.target.files.item(endpoint))" />
3-
</div>
3+
</div>

0 commit comments

Comments
 (0)