Skip to content

Commit b7b9120

Browse files
committed
Python: Better handling of Pydantic models
1 parent c207580 commit b7b9120

File tree

2 files changed

+21
-14
lines changed

2 files changed

+21
-14
lines changed

python/ql/lib/semmle/python/frameworks/Pydantic.qll

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,12 @@ module Pydantic {
6262
* A step from an instance of a `pydantic.BaseModel` subclass, that might result in
6363
* an instance of a `pydantic.BaseModel` subclass.
6464
*
65-
* NOTE: We currently overapproximate, and treat all attributes as containing another
66-
* pydantic model. For the code below, we _could_ limit this to `main_foo` and
67-
* members of `other_foos`.
65+
* NOTE: We currently overapproximate, and treat all attributes as containing
66+
* another pydantic model. For the code below, we _could_ limit this to `main_foo`
67+
* and members of `other_foos`. IF THIS IS CHANGED, YOU MUST CHANGE THE ADDITIONAL
68+
* TAINT STEPS BELOW, SUCH THAT SIMPLE ACCESS OF SOMETHIGN LIKE `str` IS STILL
69+
* TAINTED.
70+
*
6871
*
6972
* ```py
7073
* class MyComplexModel(BaseModel):
@@ -78,8 +81,15 @@ module Pydantic {
7881
nodeFrom = instance() and
7982
nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom
8083
or
81-
// subscripts on attributes (such as `model.foo[0]`)
82-
nodeFrom.(DataFlow::AttrRead).getObject() = instance() and
84+
// subscripts on attributes (such as `model.foo[0]`). This needs to handle nested
85+
// lists (such as `model.foo[0][0]`), and access being split into multiple
86+
// statements (such as `xs = model.foo; xs[0]`).
87+
//
88+
// To handle this we overapproximate which things are a Pydantic model, by
89+
// treating any subscript on anything that originates on a Pydantic model to also
90+
// be a Pydantic model. So `model[0]` will be an overapproximation, but should not
91+
// really cause problems (since we don't expect real code to contain such accesses)
92+
nodeFrom = instance() and
8393
nodeTo.asCfgNode().(SubscriptNode).getObject() = nodeFrom.asCfgNode()
8494
}
8595

@@ -88,13 +98,10 @@ module Pydantic {
8898
*/
8999
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
90100
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
91-
// attributes (such as `model.foo`)
92-
nodeFrom = instance() and
93-
nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom
94-
or
95-
// subscripts on attributes (such as `model.foo[0]`)
96-
nodeFrom.(DataFlow::AttrRead).getObject() = instance() and
97-
nodeTo.asCfgNode().(SubscriptNode).getObject() = nodeFrom.asCfgNode()
101+
// NOTE: if `instanceStepToPydanticModel` is changed to be more precise, these
102+
// taint steps should be expanded, such that a field that has type `str` is
103+
// still tainted.
104+
instanceStepToPydanticModel(nodeFrom, nodeTo)
98105
}
99106
}
100107
}

python/ql/test/library-tests/frameworks/fastapi/taint_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,15 @@ async def test_taint(name : str, number : int, also_input: MyComplexModel): # $
4343
also_input.nested_foos, # $ tainted
4444
also_input.nested_foos[0], # $ tainted
4545
also_input.nested_foos[0][0], # $ tainted
46-
also_input.nested_foos[0][0].foo, # $ MISSING: tainted
46+
also_input.nested_foos[0][0].foo, # $ tainted
4747
)
4848

4949
other_foos = also_input.other_foos
5050

5151
ensure_tainted(
5252
other_foos, # $ tainted
5353
other_foos[0], # $ tainted
54-
other_foos[0].foo, # $ MISSING: tainted
54+
other_foos[0].foo, # $ tainted
5555
[f.foo for f in other_foos], # $ MISSING: tainted
5656
)
5757

0 commit comments

Comments
 (0)