Skip to content

Commit 0a76eaf

Browse files
fix(iast): potential string id collisions [backport 1.20] (#7328)
Backport 77540c1 from #7318 to 1-20. Fix potential string id collisions that could cause false positives with non tainted objects being marked as tainted. ## Checklist - [x] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [x] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed. If no release note is required, add label `changelog/no-changelog`. - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Title is accurate. - [x] No unnecessary changes are introduced. - [x] Description motivates each change. - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [x] Testing strategy adequately addresses listed risk(s). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] Release note makes sense to a user of the library. - [x] Reviewer has explicitly acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment. - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) - [x] If this PR touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. - [x] This PR doesn't touch any of that. Co-authored-by: Alberto Vara <[email protected]>
1 parent ecae68d commit 0a76eaf

File tree

5 files changed

+83
-8
lines changed

5 files changed

+83
-8
lines changed

ddtrace/appsec/iast/_taint_tracking/Aspects/AspectExtend.cpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,21 @@ api_extend_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs)
1919
if (!PyByteArray_Check(to_add) and !PyBytes_Check(to_add)) {
2020
return nullptr;
2121
}
22+
23+
auto ctx_map = initializer->get_tainting_map();
24+
const auto& to_candidate = get_tainted_object(candidate_text, ctx_map);
25+
auto to_result = initializer->allocate_tainted_object_copy(to_candidate);
26+
const auto& to_toadd = get_tainted_object(to_add, ctx_map);
27+
28+
// Ensure no returns are done before this method call
2229
auto method_name = PyUnicode_FromString("extend");
2330
PyObject_CallMethodObjArgs(candidate_text, method_name, to_add, nullptr);
2431
Py_DECREF(method_name);
25-
auto ctx_map = initializer->get_tainting_map();
26-
if (not ctx_map or ctx_map->empty()) {
32+
33+
if (not ctx_map or ctx_map->empty() or to_result == nullptr) {
2734
Py_RETURN_NONE;
2835
}
2936

30-
const auto& to_candidate = get_tainted_object(candidate_text, ctx_map);
31-
auto to_result = initializer->allocate_tainted_object(to_candidate);
32-
const auto& to_toadd = get_tainted_object(to_add, ctx_map);
3337
if (to_toadd) {
3438
to_result->add_ranges_shifted(to_toadd, (long)len_candidate_text);
3539
}

ddtrace/appsec/iast/_taint_tracking/TaintTracking/TaintRange.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,13 @@ using TaintedObjectPtr = TaintedObject*;
2323
using TaintRangeMapType = absl::node_hash_map<uintptr_t, TaintedObjectPtr>;
2424

2525
inline static uintptr_t
26-
get_unique_id(const PyObject* str)
26+
get_unique_id(PyObject* str)
2727
{
28-
return uintptr_t(str);
28+
if ((((PyASCIIObject*)str)->hash) == -1) {
29+
Py_hash_t result = PyObject_Hash(str);
30+
}
31+
auto res = uintptr_t(str) + ((PyASCIIObject*)str)->hash;
32+
return hash<uintptr_t>{}(res);
2933
}
3034

3135
struct TaintRange
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
fixes:
3+
- |
4+
Vulnerability Management for Code-level (IAST): Fix potential string id collisions that could cause false
5+
positives with non tainted objects being marked as tainted.

tests/appsec/iast/aspects/test_bytearray_extend_aspect.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def test_first_and_second_tainted(self):
8686
pyobject=bytearray(b"123"), source_name="test1", source_value="foo", source_origin=OriginType.PARAMETER
8787
)
8888
ba2 = taint_pyobject(
89-
pyobject=bytearray(b"456"), source_name="test2", source_value="foo", source_origin=OriginType.PARAMETER
89+
pyobject=bytearray(b"456"), source_name="test2", source_value="bar", source_origin=OriginType.BODY
9090
)
9191
result = mod.do_bytearray_extend(ba1, ba2)
9292
assert result == bytearray(b"123456")

tests/appsec/iast/taint_tracking/test_native_taint_range.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
from ddtrace.appsec.iast._taint_tracking import shift_taint_range
2424
from ddtrace.appsec.iast._taint_tracking import shift_taint_ranges
2525
from ddtrace.appsec.iast._taint_tracking import taint_pyobject
26+
from ddtrace.appsec.iast._taint_tracking.aspects import add_aspect
27+
from ddtrace.appsec.iast._taint_tracking.aspects import join_aspect
2628
except (ImportError, AttributeError):
2729
pytest.skip("IAST not supported for this Python version", allow_module_level=True)
2830

@@ -86,6 +88,66 @@ def test_unicode_fast_tainting():
8688
assert not is_notinterned_notfasttainted_unicode(c)
8789

8890

91+
@pytest.mark.skipif(sys.version_info < (3, 0, 0), reason="Collisions works only in Python 3")
92+
def test_collisions():
93+
not_tainted = []
94+
tainted = []
95+
mixed_tainted_ids = []
96+
mixed_nottainted = []
97+
mixed_tainted_and_nottainted = []
98+
99+
# Generate untainted strings
100+
for i in range(10):
101+
not_tainted.append("N%04d" % i)
102+
103+
# Generate tainted strings
104+
for i in range(10000):
105+
t = taint_pyobject(
106+
"T%04d" % i, source_name="request_body", source_value="hello", source_origin=OriginType.PARAMETER
107+
)
108+
tainted.append(t)
109+
110+
for t in tainted:
111+
assert len(get_ranges(t)) > 0
112+
113+
for n in not_tainted:
114+
assert get_ranges(n) == []
115+
116+
# Do join and format operations mixing tainted and untainted strings, store in mixed_tainted_and_nottainted
117+
for _ in range(10000):
118+
_ = add_aspect(add_aspect("_", random.choice(not_tainted)), "{}_")
119+
120+
t2 = random.choice(tainted)
121+
122+
n3 = random.choice(not_tainted)
123+
124+
t4 = random.choice(tainted)
125+
mixed_tainted_ids.append(id(t4))
126+
127+
t6 = join_aspect(t4, [t2, n3])
128+
mixed_tainted_ids.append(id(t6))
129+
130+
for t in mixed_tainted_and_nottainted:
131+
assert len(get_ranges(t)) == 2
132+
133+
# Do join and format operations with only untainted strings, store in mixed_nottainted
134+
for i in range(10):
135+
_ = add_aspect("===", not_tainted[i])
136+
137+
n2 = random.choice(not_tainted)
138+
139+
n3 = random.choice(not_tainted)
140+
141+
n4 = random.choice(not_tainted)
142+
143+
n6 = join_aspect(n4, [n2, n3])
144+
145+
mixed_nottainted.append(n6)
146+
147+
for n in mixed_nottainted:
148+
assert len(get_ranges(n)) == 0
149+
150+
89151
def test_set_get_ranges_str():
90152
s1 = "abcde😁"
91153
s2 = "defg"

0 commit comments

Comments
 (0)