Skip to content

Commit d4e0cb2

Browse files
authored
Merge pull request github#17767 from github/tausbn/python-3.13-model-flow-in-replace
Python: Model `copy.replace`
2 parents 886c7f1 + 3b60d83 commit d4e0cb2

File tree

5 files changed

+80
-16
lines changed

5 files changed

+80
-16
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
5+
- Added partial support for the `copy.replace` method, [added](https://docs.python.org/3.13/library/copy.html#copy.replace) in Python 3.13.

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -687,16 +687,23 @@ newtype TContent =
687687
class Content extends TContent {
688688
/** Gets a textual representation of this element. */
689689
string toString() { result = "Content" }
690+
691+
/** Gets the Models-as-Data representation of this content (if any). */
692+
string getMaDRepresentation() { none() }
690693
}
691694

692695
/** An element of a list. */
693696
class ListElementContent extends TListElementContent, Content {
694697
override string toString() { result = "List element" }
698+
699+
override string getMaDRepresentation() { result = "ListElement" }
695700
}
696701

697702
/** An element of a set. */
698703
class SetElementContent extends TSetElementContent, Content {
699704
override string toString() { result = "Set element" }
705+
706+
override string getMaDRepresentation() { result = "SetElement" }
700707
}
701708

702709
/** An element of a tuple at a specific index. */
@@ -709,6 +716,8 @@ class TupleElementContent extends TTupleElementContent, Content {
709716
int getIndex() { result = index }
710717

711718
override string toString() { result = "Tuple element at index " + index.toString() }
719+
720+
override string getMaDRepresentation() { result = "TupleElement[" + index + "]" }
712721
}
713722

714723
/** An element of a dictionary under a specific key. */
@@ -721,11 +730,15 @@ class DictionaryElementContent extends TDictionaryElementContent, Content {
721730
string getKey() { result = key }
722731

723732
override string toString() { result = "Dictionary element at key " + key }
733+
734+
override string getMaDRepresentation() { result = "DictionaryElement[" + key + "]" }
724735
}
725736

726737
/** An element of a dictionary under any key. */
727738
class DictionaryElementAnyContent extends TDictionaryElementAnyContent, Content {
728739
override string toString() { result = "Any dictionary element" }
740+
741+
override string getMaDRepresentation() { result = "DictionaryElementAny" }
729742
}
730743

731744
/** An object attribute. */
@@ -738,6 +751,8 @@ class AttributeContent extends TAttributeContent, Content {
738751
string getAttribute() { result = attr }
739752

740753
override string toString() { result = "Attribute " + attr }
754+
755+
override string getMaDRepresentation() { result = "Attribute[" + attr + "]" }
741756
}
742757

743758
/** A captured variable. */
@@ -750,6 +765,8 @@ class CapturedVariableContent extends Content, TCapturedVariableContent {
750765
VariableCapture::CapturedVariable getVariable() { result = v }
751766

752767
override string toString() { result = "captured " + v }
768+
769+
override string getMaDRepresentation() { none() }
753770
}
754771

755772
/**

python/ql/lib/semmle/python/frameworks/Stdlib.model.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ extensions:
4545
# See https://docs.python.org/3/library/contextlib.html#contextlib.ExitStack
4646
- ["contextlib.ExitStack", "Member[enter_context]", "Argument[0,cm:]", "ReturnValue", "taint"]
4747
# See https://docs.python.org/3/library/copy.html#copy.deepcopy
48-
- ["copy", "Member[copy,deepcopy]", "Argument[0,x:]", "ReturnValue", "value"]
48+
- ["copy", "Member[copy,deepcopy,replace]", "Argument[0,x:]", "ReturnValue", "value"]
4949
# See
5050
# - https://docs.python.org/3/library/ctypes.html#ctypes.create_string_buffer
5151
# - https://docs.python.org/3/library/ctypes.html#ctypes.create_unicode_buffer

python/ql/lib/semmle/python/frameworks/Stdlib.qll

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4537,21 +4537,9 @@ module StdlibPrivate {
45374537
override DataFlow::ArgumentNode getACallback() { none() }
45384538

45394539
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
4540-
exists(string content |
4541-
content = "ListElement"
4542-
or
4543-
content = "SetElement"
4544-
or
4545-
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
4546-
content = "TupleElement[" + i.toString() + "]"
4547-
)
4548-
or
4549-
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
4550-
content = "DictionaryElement[" + key + "]"
4551-
)
4552-
|
4553-
input = "Argument[self]." + content and
4554-
output = "ReturnValue." + content and
4540+
exists(DataFlow::Content c |
4541+
input = "Argument[self]." + c.getMaDRepresentation() and
4542+
output = "ReturnValue." + c.getMaDRepresentation() and
45554543
preservesValue = true
45564544
)
45574545
or
@@ -4561,6 +4549,32 @@ module StdlibPrivate {
45614549
}
45624550
}
45634551

4552+
/** A flow summary for `copy.replace`. */
4553+
class ReplaceSummary extends SummarizedCallable {
4554+
ReplaceSummary() { this = "copy.replace" }
4555+
4556+
override DataFlow::CallCfgNode getACall() {
4557+
result = API::moduleImport("copy").getMember("replace").getACall()
4558+
}
4559+
4560+
override DataFlow::ArgumentNode getACallback() {
4561+
result = API::moduleImport("copy").getMember("replace").getAValueReachableFromSource()
4562+
}
4563+
4564+
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
4565+
exists(CallNode c, string name, ControlFlowNode n, DataFlow::AttributeContent ac |
4566+
c.getFunction().(NameNode).getId() = "replace" or
4567+
c.getFunction().(AttrNode).getName() = "replace"
4568+
|
4569+
n = c.getArgByName(name) and
4570+
ac.getAttribute() = name and
4571+
input = "Argument[" + name + ":]" and
4572+
output = "ReturnValue." + ac.getMaDRepresentation() and
4573+
preservesValue = true
4574+
)
4575+
}
4576+
}
4577+
45644578
/**
45654579
* A flow summary for `pop` either for list or set.
45664580
* This ignores the index if given, since content is

python/ql/test/library-tests/dataflow/tainttracking/defaultAdditionalTaintStep/test_collections.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,34 @@ def test_copy_2():
166166
copy.deepcopy(TAINTED_LIST), # $ tainted
167167
)
168168

169+
def test_replace():
170+
from copy import replace
171+
172+
class C:
173+
def __init__(self, always_tainted, tainted_to_safe, safe_to_tainted, always_safe):
174+
self.always_tainted = always_tainted
175+
self.tainted_to_safe = tainted_to_safe
176+
self.safe_to_tainted = safe_to_tainted
177+
self.always_safe = always_safe
178+
179+
c = C(always_tainted=TAINTED_STRING,
180+
tainted_to_safe=TAINTED_STRING,
181+
safe_to_tainted=NOT_TAINTED,
182+
always_safe=NOT_TAINTED)
183+
184+
d = replace(c, tainted_to_safe=NOT_TAINTED, safe_to_tainted=TAINTED_STRING)
185+
186+
ensure_tainted(d.always_tainted) # $ tainted
187+
ensure_tainted(d.safe_to_tainted) # $ tainted
188+
ensure_not_tainted(d.always_safe)
189+
190+
# Currently, we have no way of stopping the value in the tainted_to_safe field (which gets
191+
# overwritten) from flowing through the replace call, which means we get a spurious result.
192+
193+
ensure_not_tainted(d.tainted_to_safe) # $ SPURIOUS: tainted
194+
195+
196+
169197

170198
def list_index_assign():
171199
tainted_string = TAINTED_STRING

0 commit comments

Comments
 (0)