@@ -62,9 +62,12 @@ module Pydantic {
62
62
* A step from an instance of a `pydantic.BaseModel` subclass, that might result in
63
63
* an instance of a `pydantic.BaseModel` subclass.
64
64
*
65
- * NOTE: We currently overapproximate, and treat all attributes as containing another
66
- * pydantic model. For the code below, we _could_ limit this to `main_foo` and
67
- * members of `other_foos`.
65
+ * NOTE: We currently overapproximate, and treat all attributes as containing
66
+ * another pydantic model. For the code below, we _could_ limit this to `main_foo`
67
+ * and members of `other_foos`. IF THIS IS CHANGED, YOU MUST CHANGE THE ADDITIONAL
68
+ * TAINT STEPS BELOW, SUCH THAT SIMPLE ACCESS OF SOMETHIGN LIKE `str` IS STILL
69
+ * TAINTED.
70
+ *
68
71
*
69
72
* ```py
70
73
* class MyComplexModel(BaseModel):
@@ -78,8 +81,15 @@ module Pydantic {
78
81
nodeFrom = instance ( ) and
79
82
nodeTo .( DataFlow:: AttrRead ) .getObject ( ) = nodeFrom
80
83
or
81
- // subscripts on attributes (such as `model.foo[0]`)
82
- nodeFrom .( DataFlow:: AttrRead ) .getObject ( ) = instance ( ) and
84
+ // subscripts on attributes (such as `model.foo[0]`). This needs to handle nested
85
+ // lists (such as `model.foo[0][0]`), and access being split into multiple
86
+ // statements (such as `xs = model.foo; xs[0]`).
87
+ //
88
+ // To handle this we overapproximate which things are a Pydantic model, by
89
+ // treating any subscript on anything that originates on a Pydantic model to also
90
+ // be a Pydantic model. So `model[0]` will be an overapproximation, but should not
91
+ // really cause problems (since we don't expect real code to contain such accesses)
92
+ nodeFrom = instance ( ) and
83
93
nodeTo .asCfgNode ( ) .( SubscriptNode ) .getObject ( ) = nodeFrom .asCfgNode ( )
84
94
}
85
95
@@ -88,13 +98,10 @@ module Pydantic {
88
98
*/
89
99
private class AdditionalTaintStep extends TaintTracking:: AdditionalTaintStep {
90
100
override predicate step ( DataFlow:: Node nodeFrom , DataFlow:: Node nodeTo ) {
91
- // attributes (such as `model.foo`)
92
- nodeFrom = instance ( ) and
93
- nodeTo .( DataFlow:: AttrRead ) .getObject ( ) = nodeFrom
94
- or
95
- // subscripts on attributes (such as `model.foo[0]`)
96
- nodeFrom .( DataFlow:: AttrRead ) .getObject ( ) = instance ( ) and
97
- nodeTo .asCfgNode ( ) .( SubscriptNode ) .getObject ( ) = nodeFrom .asCfgNode ( )
101
+ // NOTE: if `instanceStepToPydanticModel` is changed to be more precise, these
102
+ // taint steps should be expanded, such that a field that has type `str` is
103
+ // still tainted.
104
+ instanceStepToPydanticModel ( nodeFrom , nodeTo )
98
105
}
99
106
}
100
107
}
0 commit comments