jorgectf
diff --git a/‎python/ql/src/experimental/typetracking/TypeTracker.qll
Lines changed: 393 additions & 0 deletions b/‎python/ql/src/experimental/typetracking/TypeTracker.qll
Lines changed: 393 additions & 0 deletions
@@ -0,0 +1,393 @@
+/** Step Summaries and Type Tracking */
+
+private import TypeTrackerPrivate
+
+/** Any string that may appear as the name of a piece of content. */
+class ContentName extends string {
+  ContentName() { this = getPossibleContentName() }
+}
+
+/** Either a content name, or the empty string (representing no content). */
+class OptionalContentName extends string {
+  OptionalContentName() { this instanceof ContentName or this = "" }
+}
+
+/**
+ * A description of a step on an inter-procedural data flow path.
+ */
+private newtype TStepSummary =
+  LevelStep() or
+  CallStep() or
+  ReturnStep() or
+  StoreStep(ContentName content) or
+  LoadStep(ContentName content)
+
+/**
+ * INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead.
+ *
+ * A description of a step on an inter-procedural data flow path.
+ */
+class StepSummary extends TStepSummary {
+  /** Gets a textual representation of this step summary. */
+  string toString() {
+    this instanceof LevelStep and result = "level"
+    or
+    this instanceof CallStep and result = "call"
+    or
+    this instanceof ReturnStep and result = "return"
+    or
+    exists(string content | this = StoreStep(content) | result = "store " + content)
+    or
+    exists(string content | this = LoadStep(content) | result = "load " + content)
+  }
+}
+
+/** Provides predicates for updating step summaries (`StepSummary`s). */
+module StepSummary {
+  /**
+   * Gets the summary that corresponds to having taken a forwards
+   * heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
+   */
+  cached
+  predicate step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
+    exists(Node mid | nodeFrom.flowsTo(mid) and smallstep(mid, nodeTo, summary))
+  }
+
+  /**
+   * Gets the summary that corresponds to having taken a forwards
+   * local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
+   *
+   * Unlike `StepSummary::step`, this predicate does not compress
+   * type-preserving steps.
+   */
+  predicate smallstep(Node nodeFrom, Node nodeTo, StepSummary summary) {
+    typePreservingStep(nodeFrom, nodeTo) and
+    summary = LevelStep()
+    or
+    callStep(nodeFrom, nodeTo) and summary = CallStep()
+    or
+    returnStep(nodeFrom, nodeTo) and
+    summary = ReturnStep()
+    or
+    exists(string content |
+      basicStoreStep(nodeFrom, nodeTo, content) and
+      summary = StoreStep(content)
+      or
+      basicLoadStep(nodeFrom, nodeTo, content) and summary = LoadStep(content)
+    )
+  }
+}
+
+/** Holds if it's reasonable to expect the data flow step from `nodeFrom` to `nodeTo` to preserve types. */
+private predicate typePreservingStep(Node nodeFrom, Node nodeTo) {
+  simpleLocalFlowStep(nodeFrom, nodeTo) or
+  jumpStep(nodeFrom, nodeTo)
+}
+
+/**
+ * A utility class that is equivalent to `boolean` but does not require type joining.
+ */
+private class Boolean extends boolean {
+  Boolean() { this = true or this = false }
+}
+
+private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalContentName content)
+
+/**
+ * Summary of the steps needed to track a value to a given dataflow node.
+ *
+ * This can be used to track objects that implement a certain API in order to
+ * recognize calls to that API. Note that type-tracking does not by itself provide a
+ * source/sink relation, that is, it may determine that a node has a given type,
+ * but it won't determine where that type came from.
+ *
+ * It is recommended that all uses of this type are written in the following form,
+ * for tracking some type `myType`:
+ * ```
+ * DataFlow::LocalSourceNode myType(DataFlow::TypeTracker t) {
+ *   t.start() and
+ *   result = < source of myType >
+ *   or
+ *   exists (DataFlow::TypeTracker t2 |
+ *     result = myType(t2).track(t2, t)
+ *   )
+ * }
+ *
+ * DataFlow::Node myType() { myType(DataFlow::TypeTracker::end()).flowsTo(result) }
+ * ```
+ *
+ * Instead of `result = myType(t2).track(t2, t)`, you can also use the equivalent
+ * `t = t2.step(myType(t2), result)`. If you additionally want to track individual
+ * intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`.
+ */
+class TypeTracker extends TTypeTracker {
+  Boolean hasCall;
+  OptionalContentName content;
+
+  TypeTracker() { this = MkTypeTracker(hasCall, content) }
+
+  /** Gets the summary resulting from appending `step` to this type-tracking summary. */
+  cached
+  TypeTracker append(StepSummary step) {
+    step = LevelStep() and result = this
+    or
+    step = CallStep() and result = MkTypeTracker(true, content)
+    or
+    step = ReturnStep() and hasCall = false and result = this
+    or
+    step = LoadStep(content) and result = MkTypeTracker(hasCall, "")
+    or
+    exists(string p | step = StoreStep(p) and content = "" and result = MkTypeTracker(hasCall, p))
+  }
+
+  /** Gets a textual representation of this summary. */
+  string toString() {
+    exists(string withCall, string withContent |
+      (if hasCall = true then withCall = "with" else withCall = "without") and
+      (if content != "" then withContent = " with content " + content else withContent = "") and
+      result = "type tracker " + withCall + " call steps" + withContent
+    )
+  }
+
+  /**
+   * Holds if this is the starting point of type tracking.
+   */
+  predicate start() { hasCall = false and content = "" }
+
+  /**
+   * Holds if this is the starting point of type tracking, and the value starts in the content named `contentName`.
+   * The type tracking only ends after the content has been loaded.
+   */
+  predicate startInContent(ContentName contentName) { hasCall = false and content = contentName }
+
+  /**
+   * Holds if this is the starting point of type tracking
+   * when tracking a parameter into a call, but not out of it.
+   */
+  predicate call() { hasCall = true and content = "" }
+
+  /**
+   * Holds if this is the end point of type tracking.
+   */
+  predicate end() { content = "" }
+
+  /**
+   * INTERNAL. DO NOT USE.
+   *
+   * Holds if this type has been tracked into a call.
+   */
+  boolean hasCall() { result = hasCall }
+
+  /**
+   * INTERNAL. DO NOT USE.
+   *
+   * Gets the content associated with this type tracker.
+   */
+  string getContent() { result = content }
+
+  /**
+   * Gets a type tracker that starts where this one has left off to allow continued
+   * tracking.
+   *
+   * This predicate is only defined if the type is not associated to a piece of content.
+   */
+  TypeTracker continue() { content = "" and result = this }
+
+  /**
+   * Gets the summary that corresponds to having taken a forwards
+   * heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
+   */
+  pragma[inline]
+  TypeTracker step(LocalSourceNode nodeFrom, Node nodeTo) {
+    exists(StepSummary summary |
+      StepSummary::step(nodeFrom, nodeTo, summary) and
+      result = this.append(summary)
+    )
+  }
+
+  /**
+   * Gets the summary that corresponds to having taken a forwards
+   * local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
+   *
+   * Unlike `TypeTracker::step`, this predicate exposes all edges
+   * in the flow graph, and not just the edges between `Node`s.
+   * It may therefore be less performant.
+   *
+   * Type tracking predicates using small steps typically take the following form:
+   * ```ql
+   * DataFlow::Node myType(DataFlow::TypeTracker t) {
+   *   t.start() and
+   *   result = < source of myType >
+   *   or
+   *   exists (DataFlow::TypeTracker t2 |
+   *     t = t2.smallstep(myType(t2), result)
+   *   )
+   * }
+   *
+   * DataFlow::Node myType() {
+   *   result = myType(DataFlow::TypeTracker::end())
+   * }
+   * ```
+   */
+  pragma[inline]
+  TypeTracker smallstep(Node nodeFrom, Node nodeTo) {
+    exists(StepSummary summary |
+      StepSummary::smallstep(nodeFrom, nodeTo, summary) and
+      result = this.append(summary)
+    )
+    or
+    typePreservingStep(nodeFrom, nodeTo) and
+    result = this
+  }
+}
+
+/** Provides predicates for implementing custom `TypeTracker`s. */
+module TypeTracker {
+  /**
+   * Gets a valid end point of type tracking.
+   */
+  TypeTracker end() { result.end() }
+}
+
+private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, OptionalContentName content)
+
+/**
+ * Summary of the steps needed to back-track a use of a value to a given dataflow node.
+ *
+ * This can for example be used to track callbacks that are passed to a certain API,
+ * so we can model specific parameters of that callback as having a certain type.
+ *
+ * Note that type back-tracking does not provide a source/sink relation, that is,
+ * it may determine that a node will be used in an API call somewhere, but it won't
+ * determine exactly where that use was, or the path that led to the use.
+ *
+ * It is recommended that all uses of this type are written in the following form,
+ * for back-tracking some callback type `myCallback`:
+ *
+ * ```
+ * DataFlow::LocalSourceNode myCallback(DataFlow::TypeBackTracker t) {
+ *   t.start() and
+ *   result = (< some API call >).getArgument(< n >).getALocalSource()
+ *   or
+ *   exists (DataFlow::TypeBackTracker t2 |
+ *     result = myCallback(t2).backtrack(t2, t)
+ *   )
+ * }
+ *
+ * DataFlow::LocalSourceNode myCallback() { result = myCallback(DataFlow::TypeBackTracker::end()) }
+ * ```
+ *
+ * Instead of `result = myCallback(t2).backtrack(t2, t)`, you can also use the equivalent
+ * `t2 = t.step(result, myCallback(t2))`. If you additionally want to track individual
+ * intra-procedural steps, use `t2 = t.smallstep(result, myCallback(t2))`.
+ */
+class TypeBackTracker extends TTypeBackTracker {
+  Boolean hasReturn;
+  string content;
+
+  TypeBackTracker() { this = MkTypeBackTracker(hasReturn, content) }
+
+  /** Gets the summary resulting from prepending `step` to this type-tracking summary. */
+  TypeBackTracker prepend(StepSummary step) {
+    step = LevelStep() and result = this
+    or
+    step = CallStep() and hasReturn = false and result = this
+    or
+    step = ReturnStep() and result = MkTypeBackTracker(true, content)
+    or
+    exists(string p |
+      step = LoadStep(p) and content = "" and result = MkTypeBackTracker(hasReturn, p)
+    )
+    or
+    step = StoreStep(content) and result = MkTypeBackTracker(hasReturn, "")
+  }
+
+  /** Gets a textual representation of this summary. */
+  string toString() {
+    exists(string withReturn, string withContent |
+      (if hasReturn = true then withReturn = "with" else withReturn = "without") and
+      (if content != "" then withContent = " with content " + content else withContent = "") and
+      result = "type back-tracker " + withReturn + " return steps" + withContent
+    )
+  }
+
+  /**
+   * Holds if this is the starting point of type tracking.
+   */
+  predicate start() { hasReturn = false and content = "" }
+
+  /**
+   * Holds if this is the end point of type tracking.
+   */
+  predicate end() { content = "" }
+
+  /**
+   * INTERNAL. DO NOT USE.
+   *
+   * Holds if this type has been back-tracked into a call through return edge.
+   */
+  boolean hasReturn() { result = hasReturn }
+
+  /**
+   * Gets a type tracker that starts where this one has left off to allow continued
+   * tracking.
+   *
+   * This predicate is only defined if the type has not been tracked into a piece of content.
+   */
+  TypeBackTracker continue() { content = "" and result = this }
+
+  /**
+   * Gets the summary that corresponds to having taken a backwards
+   * heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
+   */
+  pragma[inline]
+  TypeBackTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) {
+    exists(StepSummary summary |
+      StepSummary::step(nodeFrom, nodeTo, summary) and
+      this = result.prepend(summary)
+    )
+  }
+
+  /**
+   * Gets the summary that corresponds to having taken a backwards
+   * local, heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
+   *
+   * Unlike `TypeBackTracker::step`, this predicate exposes all edges
+   * in the flowgraph, and not just the edges between
+   * `LocalSourceNode`s. It may therefore be less performant.
+   *
+   * Type tracking predicates using small steps typically take the following form:
+   * ```ql
+   * DataFlow::Node myType(DataFlow::TypeBackTracker t) {
+   *   t.start() and
+   *   result = < some API call >.getArgument(< n >)
+   *   or
+   *   exists (DataFlow::TypeBackTracker t2 |
+   *     t = t2.smallstep(result, myType(t2))
+   *   )
+   * }
+   *
+   * DataFlow::Node myType() {
+   *   result = myType(DataFlow::TypeBackTracker::end())
+   * }
+   * ```
+   */
+  pragma[inline]
+  TypeBackTracker smallstep(Node nodeFrom, Node nodeTo) {
+    exists(StepSummary summary |
+      StepSummary::smallstep(nodeFrom, nodeTo, summary) and
+      this = result.prepend(summary)
+    )
+    or
+    typePreservingStep(nodeFrom, nodeTo) and
+    this = result
+  }
+}
+
+/** Provides predicates for implementing custom `TypeBackTracker`s. */
+module TypeBackTracker {
+  /**
+   * Gets a valid end point of type back-tracking.
+   */
+  TypeBackTracker end() { result.end() }
+}