Skip to content

Commit 94c38cf

Browse files
authored
chore(asm): move iast taint tracking dict to asm context (#5397)
IAST: Move the taint tracking information to the asm context so its scope is the current request. This way we resolve the problem of the memory leaked from tainted objects. For details, compare appsec-iast-latest vs appsec-iast-staging in reliability environment (staging being this branch) ## Checklist - [x] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [x] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/contributing.html#Release-Note-Guidelines) are followed. - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). - [x] Author is aware of the performance implications of this PR as reported in the benchmarks PR comment. ## Reviewer Checklist - [x] Title is accurate. - [x] No unnecessary changes are introduced. - [x] Description motivates each change. - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [x] Testing strategy adequately addresses listed risk(s). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] Release note makes sense to a user of the library. - [x] Reviewer is aware of, and discussed the performance implications of this PR as reported in the benchmarks PR comment.
1 parent a206d39 commit 94c38cf

File tree

13 files changed

+157
-188
lines changed

13 files changed

+157
-188
lines changed

ddtrace/appsec/_asm_request_context.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
thread will have a different context.
2525
"""
2626

27-
2827
# FIXME: remove these and use the new context API once implemented and allowing
2928
# contexts without spans
3029

@@ -37,6 +36,7 @@
3736
_DD_WAF_CALLBACK = contextvars.ContextVar("datadog_early_waf_callback", default=None)
3837
_DD_WAF_RESULTS = contextvars.ContextVar("datadog_early_waf_results", default=([[], [], []]))
3938
_DD_WAF_SENT = contextvars.ContextVar("datadog_waf_adress_sent", default=None)
39+
_DD_IAST_TAINT_DICT = contextvars.ContextVar("datadog_iast_taint_dict", default={})
4040

4141

4242
def reset(): # type: () -> None
@@ -45,6 +45,7 @@ def reset(): # type: () -> None
4545
_DD_EARLY_HEADERS_CASE_SENSITIVE_CONTEXTVAR.set(False)
4646
_DD_BLOCK_REQUEST_CALLABLE.set(None)
4747
_DD_WAF_SENT.set(set())
48+
_DD_IAST_TAINT_DICT.set({})
4849

4950

5051
def set_ip(ip): # type: (Optional[str]) -> None
@@ -55,6 +56,14 @@ def get_ip(): # type: () -> Optional[str]
5556
return _DD_EARLY_IP_CONTEXTVAR.get()
5657

5758

59+
def set_taint_dict(taint_dict): # type: (dict) -> None
60+
_DD_IAST_TAINT_DICT.set(taint_dict)
61+
62+
63+
def get_taint_dict(): # type: () -> dict
64+
return _DD_IAST_TAINT_DICT.get()
65+
66+
5867
# Note: get/set headers use Any since we just carry the headers here without changing or using them
5968
# and different frameworks use different types that we don't want to force it into a Mapping at the
6069
# early point set_headers is usually called

ddtrace/appsec/iast/_ast/aspects/__init__.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44
import codecs
55

66
from ddtrace.appsec.iast._input_info import Input_info
7-
from ddtrace.appsec.iast._taint_tracking import add_taint_pyobject # type: ignore[attr-defined]
8-
from ddtrace.appsec.iast._taint_tracking import get_tainted_ranges # type: ignore[attr-defined]
9-
from ddtrace.appsec.iast._taint_tracking import is_pyobject_tainted # type: ignore[attr-defined]
10-
from ddtrace.appsec.iast._taint_tracking import set_tainted_ranges # type: ignore[attr-defined]
11-
from ddtrace.appsec.iast._taint_tracking import taint_pyobject # type: ignore[attr-defined]
7+
from ddtrace.appsec.iast._taint_tracking import add_taint_pyobject
8+
from ddtrace.appsec.iast._taint_tracking import get_tainted_ranges
9+
from ddtrace.appsec.iast._taint_tracking import is_pyobject_tainted
10+
from ddtrace.appsec.iast._taint_tracking import set_tainted_ranges
11+
from ddtrace.appsec.iast._taint_tracking import taint_pyobject
1212

1313

1414
def str_aspect(*args, **kwargs):
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#!/usr/bin/env python3
2+
3+
from typing import TYPE_CHECKING
4+
5+
from ddtrace.appsec._asm_request_context import get_taint_dict
6+
from ddtrace.appsec._asm_request_context import set_taint_dict
7+
from ddtrace.appsec.iast._taint_tracking._native import new_pyobject_id
8+
from ddtrace.appsec.iast._taint_tracking._native import setup # noqa: F401
9+
10+
11+
if TYPE_CHECKING:
12+
from typing import Any
13+
14+
from ddtrace.appsec.iast._input_info import Input_info
15+
16+
17+
def add_taint_pyobject(pyobject, op1, op2): # type: (Any, Any, Any) -> Any
18+
if not (is_pyobject_tainted(op1) or is_pyobject_tainted(op2)):
19+
return pyobject
20+
21+
pyobject = new_pyobject_id(pyobject, len(pyobject))
22+
taint_dict = get_taint_dict()
23+
new_ranges = []
24+
if is_pyobject_tainted(op1):
25+
new_ranges = list(taint_dict[id(op1)])
26+
if is_pyobject_tainted(op2):
27+
offset = len(op1)
28+
for input_info, start, size in taint_dict[id(op2)]:
29+
new_ranges.append((input_info, start + offset, size))
30+
31+
taint_dict[id(pyobject)] = tuple(new_ranges)
32+
set_taint_dict(taint_dict)
33+
return pyobject
34+
35+
36+
def taint_pyobject(pyobject, input_info): # type: (Any, Input_info) -> Any
37+
if not pyobject: # len(pyobject) < 1
38+
return pyobject
39+
assert input_info is not None
40+
len_pyobject = len(pyobject)
41+
pyobject = new_pyobject_id(pyobject, len_pyobject)
42+
taint_dict = get_taint_dict()
43+
taint_dict[id(pyobject)] = ((input_info, 0, len_pyobject),)
44+
set_taint_dict(taint_dict)
45+
return pyobject
46+
47+
48+
def is_pyobject_tainted(pyobject): # type: (Any) -> bool
49+
return id(pyobject) in get_taint_dict()
50+
51+
52+
def set_tainted_ranges(pyobject, ranges): # type: (Any, tuple) -> None
53+
taint_dict = get_taint_dict()
54+
assert pyobject not in taint_dict
55+
taint_dict[id(pyobject)] = ranges
56+
set_taint_dict(taint_dict)
57+
58+
59+
def get_tainted_ranges(pyobject): # type: (Any) -> tuple
60+
return get_taint_dict().get(id(pyobject), tuple())
61+
62+
63+
def clear_taint_mapping(): # type: () -> None
64+
set_taint_dict({})
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#include <Python.h>
2+
#include <iostream>
3+
#include <tuple>
4+
5+
PyObject *bytes_join = NULL;
6+
PyObject *bytearray_join = NULL;
7+
PyObject *empty_bytes = NULL;
8+
PyObject *empty_bytearray = NULL;
9+
PyObject *empty_unicode = NULL;
10+
11+
static PyObject *setup(PyObject *Py_UNUSED(module), PyObject *args) {
12+
PyArg_ParseTuple(args, "OO", &bytes_join, &bytearray_join);
13+
empty_bytes = PyBytes_FromString("");
14+
empty_bytearray = PyByteArray_FromObject(empty_bytes);
15+
empty_unicode = PyUnicode_New(0, 127);
16+
Py_RETURN_NONE;
17+
}
18+
19+
static PyObject *new_pyobject_id(PyObject *Py_UNUSED(module), PyObject *args) {
20+
PyObject *tainted_object;
21+
Py_ssize_t object_length;
22+
PyArg_ParseTuple(args, "On", &tainted_object, &object_length);
23+
if (PyUnicode_Check(tainted_object)) {
24+
if (PyUnicode_CHECK_INTERNED(tainted_object) == 0) { // SSTATE_NOT_INTERNED
25+
Py_INCREF(tainted_object);
26+
return tainted_object;
27+
}
28+
return PyUnicode_Join(empty_unicode,
29+
Py_BuildValue("(OO)", tainted_object, empty_unicode));
30+
} else if (object_length > 1) {
31+
// Bytes and bytearrays with length > 1 are not interned
32+
Py_INCREF(tainted_object);
33+
return tainted_object;
34+
} else if (PyBytes_Check(tainted_object)) {
35+
return PyObject_CallFunctionObjArgs(
36+
bytes_join, empty_bytes,
37+
Py_BuildValue("(OO)", tainted_object, empty_bytes), NULL);
38+
} else {
39+
return PyObject_CallFunctionObjArgs(
40+
bytearray_join, empty_bytearray,
41+
Py_BuildValue("(OO)", tainted_object, empty_bytearray), NULL);
42+
}
43+
}
44+
45+
static PyMethodDef TaintTrackingMethods[] = {
46+
// We are using METH_VARARGS because we need compatibility with
47+
// python 3.5, 3.6. but METH_FASTCALL could be used instead for python
48+
// >= 3.7
49+
{"setup", (PyCFunction)setup, METH_VARARGS, "setup tainting module"},
50+
{"new_pyobject_id", (PyCFunction)new_pyobject_id, METH_VARARGS,
51+
"new_pyobject_id"},
52+
{NULL, NULL, 0, NULL}};
53+
54+
static struct PyModuleDef taint_tracking = {
55+
PyModuleDef_HEAD_INIT, "ddtrace.appsec.iast._taint_tracking._native",
56+
"taint tracking module", -1, TaintTrackingMethods};
57+
58+
PyMODINIT_FUNC PyInit__native(void) {
59+
PyObject *m;
60+
m = PyModule_Create(&taint_tracking);
61+
if (m == NULL)
62+
return NULL;
63+
return m;
64+
}

ddtrace/appsec/iast/_taint_tracking/_taint_tracking.cpp

Lines changed: 0 additions & 168 deletions
This file was deleted.

ddtrace/appsec/iast/_taint_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env python3
22
from ddtrace.appsec.iast._input_info import Input_info
3-
from ddtrace.appsec.iast._taint_tracking import is_pyobject_tainted # type: ignore[attr-defined]
4-
from ddtrace.appsec.iast._taint_tracking import taint_pyobject # type: ignore[attr-defined]
3+
from ddtrace.appsec.iast._taint_tracking import is_pyobject_tainted
4+
from ddtrace.appsec.iast._taint_tracking import taint_pyobject
55

66

77
DBAPI_INTEGRATIONS = ("sqlite", "psycopg", "mysql", "mariadb")

ddtrace/bootstrap/sitecustomize.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ def drop(module_name):
178178

179179
from ddtrace.appsec.iast._ast.ast_patching import _should_iast_patch
180180
from ddtrace.appsec.iast._loader import _exec_iast_patched_module
181-
from ddtrace.appsec.iast._taint_tracking import setup # type:ignore[attr-defined]
181+
from ddtrace.appsec.iast._taint_tracking import setup
182182
from ddtrace.internal.module import ModuleWatchdog
183183

184184
setup(bytes.join, bytearray.join)

ddtrace/contrib/flask/patch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def taint_request_init(wrapped, instance, args, kwargs):
109109
wrapped(*args, **kwargs)
110110
if _is_iast_enabled():
111111
from ddtrace.appsec.iast._input_info import Input_info
112-
from ddtrace.appsec.iast._taint_tracking import taint_pyobject # type: ignore[attr-defined]
112+
from ddtrace.appsec.iast._taint_tracking import taint_pyobject
113113

114114
taint_pyobject(
115115
instance.query_string,

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ def get_exts_for(name):
308308
if sys.version_info >= (3, 6, 0):
309309
ext_modules.append(
310310
Extension(
311-
"ddtrace.appsec.iast._taint_tracking",
311+
"ddtrace.appsec.iast._taint_tracking._native",
312312
# Sort source files for reproducibility
313313
sources=sorted(
314314
glob.glob(

0 commit comments

Comments
 (0)