Skip to content

Commit 1048cf7

Browse files
authored
Merge pull request #15711 from RasmusWL/tt-content
Python: Add type tracking for content
2 parents 44fba68 + a22b994 commit 1048cf7

File tree

13 files changed

+359
-83
lines changed

13 files changed

+359
-83
lines changed

python/ql/consistency-queries/TypeTrackingConsistency.ql

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,18 @@ private module ConsistencyChecksInput implements ConsistencyChecksInputSig {
2727
TypeTrackingInput::simpleLocalSmallStep*(m, n)
2828
)
2929
or
30-
// TODO: when adding support for proper content, handle iterable unpacking better
31-
// such as `for k,v in items:`, or `a, (b,c) = ...`
32-
n instanceof DataFlow::IterableSequenceNode
33-
or
3430
// We have missing use-use flow in
3531
// https://github.com/python/cpython/blob/0fb18b02c8ad56299d6a2910be0bab8ad601ef24/Lib/socketserver.py#L276-L303
3632
// which I couldn't just fix. We ignore the problems here, and instead rely on the
3733
// test-case added in https://github.com/github/codeql/pull/15841
3834
n.getLocation().getFile().getAbsolutePath().matches("%/socketserver.py")
35+
or
36+
// for iterable unpacking like `a,b = some_list`, we currently don't want to allow
37+
// type-tracking... however, in the future when we allow tracking list indexes
38+
// precisely (that is, move away from ListElementContent), we should ensure we have
39+
// proper flow to the synthetic `IterableElementNode`.
40+
exists(DataFlow::ListElementContent c) and
41+
n instanceof DataFlow::IterableElementNode
3942
}
4043
}
4144

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* Improved the type-tracking capabilities (and therefore also API graphs) to allow tracking items in tuples and dictionaries.

python/ql/lib/semmle/python/dataflow/new/TypeTracking.qll

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,14 @@
55

66
private import internal.TypeTrackingImpl as Impl
77
import Impl::Shared::TypeTracking<Impl::TypeTrackingInput>
8+
private import semmle.python.dataflow.new.internal.DataFlowPublic as DataFlowPublic
89

9-
/** A string that may appear as the name of an attribute or access path. */
10-
class AttributeName = Impl::TypeTrackingInput::Content;
10+
/**
11+
* DEPRECATED.
12+
*
13+
* A string that may appear as the name of an attribute or access path.
14+
*/
15+
deprecated class AttributeName = Impl::TypeTrackingInput::Content;
1116

1217
/**
1318
* A summary of the steps needed to track a value to a given dataflow node.
@@ -40,17 +45,20 @@ class TypeTracker extends Impl::TypeTracker {
4045
* Holds if this is the starting point of type tracking, and the value starts in the attribute named `attrName`.
4146
* The type tracking only ends after the attribute has been loaded.
4247
*/
43-
predicate startInAttr(string attrName) { this.startInContent(attrName) }
48+
predicate startInAttr(string attrName) {
49+
exists(DataFlowPublic::AttributeContent content | content.getAttribute() = attrName |
50+
this.startInContent(content)
51+
)
52+
}
4453

4554
/**
4655
* INTERNAL. DO NOT USE.
4756
*
4857
* Gets the attribute associated with this type tracker.
4958
*/
5059
string getAttr() {
51-
result = this.getContent().asSome()
52-
or
53-
this.getContent().isNone() and
54-
result = ""
60+
if this.getContent().asSome() instanceof DataFlowPublic::AttributeContent
61+
then result = this.getContent().asSome().(DataFlowPublic::AttributeContent).getAttribute()
62+
else result = ""
5563
}
5664
}

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -641,25 +641,39 @@ predicate jumpStepNotSharedWithTypeTracker(Node nodeFrom, Node nodeTo) {
641641
//--------
642642
// Field flow
643643
//--------
644+
/**
645+
* Subset of `storeStep` that should be shared with type-tracking.
646+
*
647+
* NOTE: This does not include attributeStoreStep right now, since it has its' own
648+
* modeling in the type-tracking library (which is slightly different due to
649+
* PostUpdateNodes).
650+
*
651+
* As of 2024-04-02 the type-tracking library only supports precise content, so there is
652+
* no reason to include steps for list content right now.
653+
*/
654+
predicate storeStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) {
655+
tupleStoreStep(nodeFrom, c, nodeTo)
656+
or
657+
dictStoreStep(nodeFrom, c, nodeTo)
658+
or
659+
moreDictStoreSteps(nodeFrom, c, nodeTo)
660+
or
661+
iterableUnpackingStoreStep(nodeFrom, c, nodeTo)
662+
}
663+
644664
/**
645665
* Holds if data can flow from `nodeFrom` to `nodeTo` via an assignment to
646666
* content `c`.
647667
*/
648668
predicate storeStep(Node nodeFrom, ContentSet c, Node nodeTo) {
669+
storeStepCommon(nodeFrom, c, nodeTo)
670+
or
649671
listStoreStep(nodeFrom, c, nodeTo)
650672
or
651673
setStoreStep(nodeFrom, c, nodeTo)
652674
or
653-
tupleStoreStep(nodeFrom, c, nodeTo)
654-
or
655-
dictStoreStep(nodeFrom, c, nodeTo)
656-
or
657-
moreDictStoreSteps(nodeFrom, c, nodeTo)
658-
or
659675
comprehensionStoreStep(nodeFrom, c, nodeTo)
660676
or
661-
iterableUnpackingStoreStep(nodeFrom, c, nodeTo)
662-
or
663677
attributeStoreStep(nodeFrom, c, nodeTo)
664678
or
665679
matchStoreStep(nodeFrom, c, nodeTo)
@@ -892,12 +906,19 @@ predicate attributeStoreStep(Node nodeFrom, AttributeContent c, Node nodeTo) {
892906
}
893907

894908
/**
895-
* Holds if data can flow from `nodeFrom` to `nodeTo` via a read of content `c`.
909+
* Subset of `readStep` that should be shared with type-tracking.
896910
*/
897-
predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) {
911+
predicate readStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) {
898912
subscriptReadStep(nodeFrom, c, nodeTo)
899913
or
900914
iterableUnpackingReadStep(nodeFrom, c, nodeTo)
915+
}
916+
917+
/**
918+
* Holds if data can flow from `nodeFrom` to `nodeTo` via a read of content `c`.
919+
*/
920+
predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) {
921+
readStepCommon(nodeFrom, c, nodeTo)
901922
or
902923
matchReadStep(nodeFrom, c, nodeTo)
903924
or

python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
/** Step Summaries and Type Tracking */
22

33
private import TypeTrackerSpecific
4+
private import semmle.python.dataflow.new.internal.DataFlowPublic as DataFlowPublic
45

56
cached
67
private module Cached {
@@ -12,10 +13,22 @@ private module Cached {
1213
LevelStep() or
1314
CallStep() or
1415
ReturnStep() or
15-
deprecated StoreStep(TypeTrackerContent content) { basicStoreStep(_, _, content) } or
16-
deprecated LoadStep(TypeTrackerContent content) { basicLoadStep(_, _, content) } or
16+
deprecated StoreStep(TypeTrackerContent content) {
17+
exists(DataFlowPublic::AttributeContent dfc | dfc.getAttribute() = content |
18+
basicStoreStep(_, _, dfc)
19+
)
20+
} or
21+
deprecated LoadStep(TypeTrackerContent content) {
22+
exists(DataFlowPublic::AttributeContent dfc | dfc.getAttribute() = content |
23+
basicLoadStep(_, _, dfc)
24+
)
25+
} or
1726
deprecated LoadStoreStep(TypeTrackerContent load, TypeTrackerContent store) {
18-
basicLoadStoreStep(_, _, load, store)
27+
exists(DataFlowPublic::AttributeContent dfcLoad, DataFlowPublic::AttributeContent dfcStore |
28+
dfcLoad.getAttribute() = load and dfcStore.getAttribute() = store
29+
|
30+
basicLoadStoreStep(_, _, dfcLoad, dfcStore)
31+
)
1932
} or
2033
deprecated WithContent(ContentFilter filter) { basicWithContentStep(_, _, filter) } or
2134
deprecated WithoutContent(ContentFilter filter) { basicWithoutContentStep(_, _, filter) } or
@@ -29,13 +42,13 @@ private module Cached {
2942
// Restrict `content` to those that might eventually match a load.
3043
// We can't rely on `basicStoreStep` since `startInContent` might be used with
3144
// a content that has no corresponding store.
32-
exists(TypeTrackerContent loadContents |
45+
exists(DataFlowPublic::AttributeContent loadContents |
3346
(
3447
basicLoadStep(_, _, loadContents)
3548
or
3649
basicLoadStoreStep(_, _, loadContents, _)
3750
) and
38-
compatibleContents(content, loadContents)
51+
compatibleContents(content, loadContents.getAttribute())
3952
)
4053
}
4154

@@ -45,13 +58,13 @@ private module Cached {
4558
content = noContent()
4659
or
4760
// As in MkTypeTracker, restrict `content` to those that might eventually match a store.
48-
exists(TypeTrackerContent storeContent |
61+
exists(DataFlowPublic::AttributeContent storeContent |
4962
(
5063
basicStoreStep(_, _, storeContent)
5164
or
5265
basicLoadStoreStep(_, _, _, storeContent)
5366
) and
54-
compatibleContents(storeContent, content)
67+
compatibleContents(storeContent.getAttribute(), content)
5568
)
5669
}
5770

@@ -198,7 +211,10 @@ private module Cached {
198211
flowsToStoreStep(nodeFrom, nodeTo, content) and
199212
summary = StoreStep(content)
200213
or
201-
basicLoadStep(nodeFrom, nodeTo, content) and summary = LoadStep(content)
214+
exists(DataFlowPublic::AttributeContent dfc | dfc.getAttribute() = content |
215+
basicLoadStep(nodeFrom, nodeTo, dfc)
216+
) and
217+
summary = LoadStep(content)
202218
)
203219
or
204220
exists(TypeTrackerContent loadContent, TypeTrackerContent storeContent |
@@ -281,7 +297,12 @@ deprecated private predicate smallstepProj(Node nodeFrom, StepSummary summary) {
281297
deprecated private predicate flowsToStoreStep(
282298
Node nodeFrom, TypeTrackingNode nodeTo, TypeTrackerContent content
283299
) {
284-
exists(Node obj | nodeTo.flowsTo(obj) and basicStoreStep(nodeFrom, obj, content))
300+
exists(Node obj |
301+
nodeTo.flowsTo(obj) and
302+
exists(DataFlowPublic::AttributeContent dfc | dfc.getAttribute() = content |
303+
basicStoreStep(nodeFrom, obj, dfc)
304+
)
305+
)
285306
}
286307

287308
/**
@@ -292,7 +313,12 @@ deprecated private predicate flowsToLoadStoreStep(
292313
TypeTrackerContent storeContent
293314
) {
294315
exists(Node obj |
295-
nodeTo.flowsTo(obj) and basicLoadStoreStep(nodeFrom, obj, loadContent, storeContent)
316+
nodeTo.flowsTo(obj) and
317+
exists(DataFlowPublic::AttributeContent loadDfc, DataFlowPublic::AttributeContent storeDfc |
318+
loadDfc.getAttribute() = loadContent and storeDfc.getAttribute() = storeContent
319+
|
320+
basicLoadStoreStep(nodeFrom, obj, loadDfc, storeDfc)
321+
)
296322
)
297323
}
298324

python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ deprecated class OptionalTypeTrackerContent extends string {
1515
OptionalTypeTrackerContent() {
1616
this = ""
1717
or
18-
this instanceof TypeTrackingImpl::TypeTrackingInput::Content
18+
this = any(DataFlowPublic::AttributeContent dfc).getAttribute()
1919
}
2020
}
2121

0 commit comments

Comments
 (0)