@@ -62,9 +62,12 @@ module Pydantic {
6262 * A step from an instance of a `pydantic.BaseModel` subclass, that might result in
6363 * an instance of a `pydantic.BaseModel` subclass.
6464 *
65- * NOTE: We currently overapproximate, and treat all attributes as containing another
66- * pydantic model. For the code below, we _could_ limit this to `main_foo` and
67- * members of `other_foos`.
65+ * NOTE: We currently overapproximate, and treat all attributes as containing
66+ * another pydantic model. For the code below, we _could_ limit this to `main_foo`
67+ * and members of `other_foos`. IF THIS IS CHANGED, YOU MUST CHANGE THE ADDITIONAL
68+ * TAINT STEPS BELOW, SUCH THAT SIMPLE ACCESS OF SOMETHIGN LIKE `str` IS STILL
69+ * TAINTED.
70+ *
6871 *
6972 * ```py
7073 * class MyComplexModel(BaseModel):
@@ -78,8 +81,15 @@ module Pydantic {
7881 nodeFrom = instance ( ) and
7982 nodeTo .( DataFlow:: AttrRead ) .getObject ( ) = nodeFrom
8083 or
81- // subscripts on attributes (such as `model.foo[0]`)
82- nodeFrom .( DataFlow:: AttrRead ) .getObject ( ) = instance ( ) and
84+ // subscripts on attributes (such as `model.foo[0]`). This needs to handle nested
85+ // lists (such as `model.foo[0][0]`), and access being split into multiple
86+ // statements (such as `xs = model.foo; xs[0]`).
87+ //
88+ // To handle this we overapproximate which things are a Pydantic model, by
89+ // treating any subscript on anything that originates on a Pydantic model to also
90+ // be a Pydantic model. So `model[0]` will be an overapproximation, but should not
91+ // really cause problems (since we don't expect real code to contain such accesses)
92+ nodeFrom = instance ( ) and
8393 nodeTo .asCfgNode ( ) .( SubscriptNode ) .getObject ( ) = nodeFrom .asCfgNode ( )
8494 }
8595
@@ -88,13 +98,10 @@ module Pydantic {
8898 */
8999 private class AdditionalTaintStep extends TaintTracking:: AdditionalTaintStep {
90100 override predicate step ( DataFlow:: Node nodeFrom , DataFlow:: Node nodeTo ) {
91- // attributes (such as `model.foo`)
92- nodeFrom = instance ( ) and
93- nodeTo .( DataFlow:: AttrRead ) .getObject ( ) = nodeFrom
94- or
95- // subscripts on attributes (such as `model.foo[0]`)
96- nodeFrom .( DataFlow:: AttrRead ) .getObject ( ) = instance ( ) and
97- nodeTo .asCfgNode ( ) .( SubscriptNode ) .getObject ( ) = nodeFrom .asCfgNode ( )
101+ // NOTE: if `instanceStepToPydanticModel` is changed to be more precise, these
102+ // taint steps should be expanded, such that a field that has type `str` is
103+ // still tainted.
104+ instanceStepToPydanticModel ( nodeFrom , nodeTo )
98105 }
99106 }
100107 }
0 commit comments