Skip to content

Commit c698a10

Browse files
avara1986gnufedeP403n1x87juanjux
authored
chore(iast): taint tracking propagation, source (#6108)
Update IAST propagation: - IAST compliance: rename "Input info" to "Source." - Migrate Source to CPP. - Add new dependencies to the setup: CMake. We need these dependencies to build IAST binary. - Update the setup to use IAST CMakeList file. - Vendorize [pybind11](https://pybind11.readthedocs.io/en/stable/). This PR is part of a bigger one: #6283 ## Migrate Source to CPP We're working in parallel to migrate all IAST code to CPP. That's why this PR changed many things to work with C++. We migrate Source first to validate C++ is working, and the main features come later. ## Update setup Extend the `build_ext` command to build C++ binaries with CMake following the [pybind11 recommendations](https://github.com/pybind/). ## Pybind11 We vendorize pybind11 to fix the build wheel files. Why do we use this approach? - If we add pybind11 as a dependency, we need to find the Python binary in our CMakeList file. With that, the Macos ARM wheel build fails because CMake found the wrong Python version: ``` ld: warning: ignoring file /Library/Frameworks/Python.framework/Versions/3.8/lib/libpython3.8.dylib, building for macOS-arm64 but attempting to link with a file built for macOS-x86_64 ``` - Our first approach was to add pybind11 in `ddtrace/vendor` and link it in `ddtrace/appsec/iast`, but it raised problems with Windows and Alpine. - Our second approach was to add it as a git submodule, but this change forced us to update all CI steps. If we want to install ddtrace with `pip install git@github...`, it raises errors with submodules. ## Checklist - [x] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [x] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/contributing.html#Release-Note-Guidelines) are followed. - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). - [x] Backport labels are set (if [applicable](../docs/contributing.rst#release-branch-maintenance)) ## Reviewer Checklist - [x] Title is accurate. - [x] No unnecessary changes are introduced. - [x] Description motivates each change. - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [x] Testing strategy adequately addresses listed risk(s). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] Release note makes sense to a user of the library. - [x] Reviewer has explicitly acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment. - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](../docs/contributing.rst#release-branch-maintenance) --------- Co-authored-by: Federico Mon <[email protected]> Co-authored-by: Gabriele N. Tornetta <[email protected]> Co-authored-by: Juanjo Alvarez Martinez <[email protected]>
1 parent 905bd0f commit c698a10

28 files changed

+499
-183
lines changed

ddtrace/appsec/_constants.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,6 @@
44
import six
55

66
from ddtrace.internal.constants import HTTP_REQUEST_BLOCKED
7-
from ddtrace.internal.constants import HTTP_REQUEST_BODY
8-
from ddtrace.internal.constants import HTTP_REQUEST_COOKIE_NAME
9-
from ddtrace.internal.constants import HTTP_REQUEST_COOKIE_VALUE
10-
from ddtrace.internal.constants import HTTP_REQUEST_HEADER
11-
from ddtrace.internal.constants import HTTP_REQUEST_HEADER_NAME
12-
from ddtrace.internal.constants import HTTP_REQUEST_PARAMETER
13-
from ddtrace.internal.constants import HTTP_REQUEST_PATH
14-
from ddtrace.internal.constants import HTTP_REQUEST_PATH_PARAMETER
15-
from ddtrace.internal.constants import HTTP_REQUEST_QUERY
167
from ddtrace.internal.constants import REQUEST_PATH_PARAMS
178
from ddtrace.internal.constants import RESPONSE_HEADERS
189

@@ -89,15 +80,6 @@ class IAST(object):
8980
PATCH_MODULES = "_DD_IAST_PATCH_MODULES"
9081
DENY_MODULES = "_DD_IAST_DENY_MODULES"
9182
SEP_MODULES = ","
92-
HTTP_REQUEST_BODY = HTTP_REQUEST_BODY
93-
HTTP_REQUEST_HEADER = HTTP_REQUEST_HEADER
94-
HTTP_REQUEST_HEADER_NAME = HTTP_REQUEST_HEADER_NAME
95-
HTTP_REQUEST_PARAMETER = HTTP_REQUEST_PARAMETER
96-
HTTP_REQUEST_PATH = HTTP_REQUEST_PATH
97-
HTTP_REQUEST_PATH_PARAMETER = HTTP_REQUEST_PATH_PARAMETER
98-
HTTP_REQUEST_QUERYSTRING = HTTP_REQUEST_QUERY
99-
HTTP_REQUEST_COOKIE_NAME = HTTP_REQUEST_COOKIE_NAME
100-
HTTP_REQUEST_COOKIE_VALUE = HTTP_REQUEST_COOKIE_VALUE
10183

10284

10385
@six.add_metaclass(Constant_Class) # required for python2/3 compatibility

ddtrace/appsec/iast/_ast/aspects/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from builtins import str as builtin_str
44
import codecs
55

6-
from ddtrace.appsec.iast._input_info import Input_info
6+
from ddtrace.appsec.iast._taint_tracking import OriginType
77
from ddtrace.appsec.iast._taint_tracking import add_taint_pyobject
88
from ddtrace.appsec.iast._taint_tracking import get_tainted_ranges
99
from ddtrace.appsec.iast._taint_tracking import is_pyobject_tainted
@@ -14,7 +14,9 @@
1414
def str_aspect(*args, **kwargs):
1515
result = builtin_str(*args, **kwargs)
1616
if isinstance(args[0], (str, bytes, bytearray)) and is_pyobject_tainted(args[0]):
17-
result = taint_pyobject(result, Input_info("str_aspect", result, 0))
17+
result = taint_pyobject(
18+
result, source_name="str_aspect", source_value=result, source_origin=OriginType.PARAMETER
19+
)
1820

1921
return result
2022

ddtrace/appsec/iast/_patch.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import sys
44
from typing import TYPE_CHECKING
55

6-
from ddtrace.appsec.iast._input_info import Input_info
76
from ddtrace.appsec.iast._util import _is_iast_enabled
87
from ddtrace.internal.logger import get_logger
98
from ddtrace.vendor.wrapt import FunctionWrapper
@@ -144,7 +143,7 @@ def if_iast_taint_returned_object_for(origin, wrapped, instance, args, kwargs):
144143

145144
if not is_pyobject_tainted(value):
146145
name = str(args[0]) if len(args) else "http.request.body"
147-
return taint_pyobject(value, Input_info(name, value, origin))
146+
return taint_pyobject(pyobject=value, source_name=name, source_value=value, source_origin=origin)
148147
except Exception:
149148
log.debug("Unexpected exception while tainting pyobject", exc_info=True)
150149
return value
@@ -155,9 +154,8 @@ def if_iast_taint_yield_tuple_for(origins, wrapped, instance, args, kwargs):
155154
from ddtrace.appsec.iast._taint_tracking import taint_pyobject
156155

157156
for key, value in wrapped(*args, **kwargs):
158-
new_key = taint_pyobject(key, Input_info(key, key, origins[0]))
159-
new_value = taint_pyobject(value, Input_info(key, value, origins[1]))
160-
157+
new_key = taint_pyobject(pyobject=key, source_name=key, source_value=key, source_origin=origins[0])
158+
new_value = taint_pyobject(pyobject=value, source_name=key, source_value=value, source_origin=origins[1])
161159
yield new_key, new_value
162160

163161
else:

ddtrace/appsec/iast/_taint_dict.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@
77
from typing import Dict
88
from typing import Tuple
99

10-
from ddtrace.appsec.iast._input_info import Input_info
10+
from ddtrace.appsec.iast._taint_tracking import Source
1111

12-
_IAST_TAINT_DICT = {} # type: Dict[int, Tuple[Tuple[Input_info, int, int],...]]
12+
_IAST_TAINT_DICT = {} # type: Dict[int, Tuple[Tuple[Source, int, int],...]]
1313

1414

15-
def get_taint_dict(): # type: () -> Dict[int, Tuple[Tuple[Input_info, int, int],...]]
15+
def get_taint_dict(): # type: () -> Dict[int, Tuple[Tuple[Source, int, int],...]]
1616
return _IAST_TAINT_DICT
1717

1818

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
#pragma once
2+
#define PY_MODULE_NAME "ddtrace.appsec.iast._taint_tracking._native"
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
#include <pybind11/pybind11.h>
2+
3+
#include <sstream>
4+
#include <string>
5+
#include <utility>
6+
7+
#include "TaintTracking/Source.h"
8+
9+
using namespace std;
10+
namespace py = pybind11;
11+
using namespace pybind11::literals;
12+
13+
Source::Source(string name, string value, OriginType origin)
14+
: name(move(name))
15+
, value(move(value))
16+
, origin(origin)
17+
{}
18+
19+
Source::Source(int name, string value, OriginType origin)
20+
: name(origin_to_str(OriginType{ name }))
21+
, value(move(value))
22+
, origin(origin)
23+
{}
24+
25+
string
26+
Source::toString() const
27+
{
28+
ostringstream ret;
29+
ret << "Source at " << this << " "
30+
<< "[name=" << name << ", value=" << string(value) << ", origin=" << origin_to_str(origin) << "]";
31+
return ret.str();
32+
}
33+
34+
Source::operator std::string() const
35+
{
36+
return toString();
37+
}
38+
39+
// Note: don't use size_t or long, if the hash is bigger than an int, Python
40+
// will re-hash it!
41+
int
42+
Source::get_hash() const
43+
{
44+
return std::hash<size_t>()(std::hash<string>()(name) ^ (long)origin ^ std::hash<string>()(value));
45+
};
46+
47+
void
48+
pyexport_source(py::module& m)
49+
{
50+
m.def("origin_to_str", &origin_to_str, "origin"_a);
51+
py::enum_<TagMappingMode>(m, "TagMappingMode")
52+
.value("Normal", TagMappingMode::Normal)
53+
.value("Mapper", TagMappingMode::Mapper)
54+
.value("Mapper_Replace", TagMappingMode::Mapper_Replace)
55+
.export_values();
56+
57+
py::enum_<OriginType>(m, "OriginType")
58+
.value("PARAMETER", OriginType::PARAMETER)
59+
.value("PARAMETER_NAME", OriginType::PARAMETER_NAME)
60+
.value("HEADER", OriginType::HEADER)
61+
.value("HEADER_NAME", OriginType::HEADER_NAME)
62+
.value("PATH", OriginType::PATH)
63+
.value("BODY", OriginType::BODY)
64+
.value("QUERY", OriginType::QUERY)
65+
.value("PATH_PARAMETER", OriginType::PATH_PARAMETER)
66+
.value("COOKIE", OriginType::COOKIE)
67+
.value("COOKIE_NAME", OriginType::COOKIE_NAME)
68+
.export_values();
69+
70+
py::class_<Source>(m, "Source")
71+
.def(py::init<string, string, const OriginType>(), "name"_a = "", "value"_a = "", "origin"_a = OriginType())
72+
.def(py::init<int, string, const OriginType>(), "name"_a = "", "value"_a = "", "origin"_a = OriginType())
73+
.def_readonly("name", &Source::name)
74+
.def_readonly("origin", &Source::origin)
75+
.def_readonly("value", &Source::value)
76+
.def("to_string", &Source::toString)
77+
.def("__hash__",
78+
[](const Source& self) { return hash<string>{}(self.name + self.value) * (33 + int(self.origin)); })
79+
.def("__str__", &Source::toString)
80+
.def("__repr__", &Source::toString)
81+
.def("__eq__", [](const Source* self, const Source* other) {
82+
if (other == nullptr)
83+
return false;
84+
return self->name == other->name && self->origin == other->origin && self->value == other->value;
85+
});
86+
}
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
#pragma once
2+
#include "structmember.h"
3+
#include <Python.h>
4+
#include <iostream>
5+
#include <pybind11/pybind11.h>
6+
#include <sstream>
7+
#include <string.h>
8+
9+
using namespace std;
10+
namespace py = pybind11;
11+
12+
enum class OriginType
13+
{
14+
PARAMETER = 0,
15+
PARAMETER_NAME,
16+
HEADER,
17+
HEADER_NAME,
18+
PATH,
19+
BODY,
20+
QUERY,
21+
PATH_PARAMETER,
22+
COOKIE,
23+
COOKIE_NAME
24+
};
25+
26+
enum class TagMappingMode
27+
{
28+
Normal,
29+
Mapper,
30+
Mapper_Replace
31+
};
32+
33+
struct Source
34+
{
35+
Source(string, string, OriginType);
36+
Source(int, string, OriginType);
37+
Source() = default;
38+
string name;
39+
string value;
40+
OriginType origin;
41+
int refcount = 0;
42+
43+
[[nodiscard]] string toString() const;
44+
45+
inline void set_values(string name = "", string value = "", OriginType origin = OriginType())
46+
{
47+
this->name = move(name);
48+
this->value = move(value);
49+
this->origin = origin;
50+
}
51+
52+
[[nodiscard]] int get_hash() const;
53+
54+
static inline size_t hash(const string& name, const string& value, const OriginType origin)
55+
{
56+
return std::hash<size_t>()(std::hash<string>()(name + value) ^ (int)origin);
57+
};
58+
59+
explicit operator std::string() const;
60+
};
61+
62+
using SourcePtr = Source*;
63+
64+
inline string
65+
origin_to_str(OriginType origin_type)
66+
{
67+
switch (origin_type) {
68+
case OriginType::PARAMETER:
69+
return "http.request.parameter";
70+
case OriginType::PARAMETER_NAME:
71+
return "http.request.parameter.name";
72+
case OriginType::HEADER:
73+
return "http.request.header";
74+
case OriginType::HEADER_NAME:
75+
return "http.request.header.name";
76+
case OriginType::PATH:
77+
return "http.request.path";
78+
case OriginType::BODY:
79+
return "http.request.body";
80+
case OriginType::QUERY:
81+
return "http.request.query";
82+
case OriginType::PATH_PARAMETER:
83+
return "http.request.path.parameter";
84+
case OriginType::COOKIE_NAME:
85+
return "http.request.cookie.name";
86+
case OriginType::COOKIE:
87+
return "http.request.cookie.value";
88+
default:
89+
return "";
90+
}
91+
}
92+
93+
using SourcePtr = Source*;
94+
95+
void
96+
pyexport_source(py::module& m);
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#pragma once
2+
#include <pybind11/pybind11.h>
3+
4+
#include "TaintTracking/Source.h"
5+
#include "TaintedObject/TaintedObject.h"
6+
7+
inline void
8+
pyexport_m_taint_tracking(py::module& m)
9+
{
10+
py::module m_taint_tracking = m.def_submodule("taint_tracking", "Taint Tracking");
11+
pyexport_source(m_taint_tracking);
12+
}

ddtrace/appsec/iast/_taint_tracking/__init__.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
#!/usr/bin/env python3
2-
32
from typing import TYPE_CHECKING
43

54
from ddtrace.appsec.iast import oce
65
from ddtrace.appsec.iast._metrics import _set_metric_iast_executed_source
76
from ddtrace.appsec.iast._taint_dict import get_taint_dict
87
from ddtrace.appsec.iast._taint_tracking._native import ops # noqa: F401
8+
from ddtrace.appsec.iast._taint_tracking._native.taint_tracking import OriginType # noqa: F401
9+
from ddtrace.appsec.iast._taint_tracking._native.taint_tracking import Source # noqa: F401
910

1011

1112
setup = ops.setup
@@ -15,11 +16,10 @@
1516
from typing import Any
1617
from typing import Dict
1718
from typing import List
19+
from typing import Optional
1820
from typing import Tuple
1921
from typing import Union
2022

21-
from ddtrace.appsec.iast._input_info import Input_info
22-
2323

2424
def add_taint_pyobject(pyobject, op1, op2): # type: (Any, Any, Any) -> Any
2525
if not (is_pyobject_tainted(op1) or is_pyobject_tainted(op2)):
@@ -39,7 +39,8 @@ def add_taint_pyobject(pyobject, op1, op2): # type: (Any, Any, Any) -> Any
3939
return pyobject
4040

4141

42-
def taint_pyobject(pyobject, input_info): # type: (Any, Input_info) -> Any
42+
def taint_pyobject(pyobject, source_name=None, source_value=None, source_origin=None, start=0, len_pyobject=0):
43+
# type: (Any, Optional[str], Optional[str], Optional[OriginType], int, int) -> Any
4344
# Request is not analyzed
4445
if not oce.request_has_quota:
4546
return pyobject
@@ -48,14 +49,19 @@ def taint_pyobject(pyobject, input_info): # type: (Any, Input_info) -> Any
4849
if not pyobject or not isinstance(pyobject, (str, bytes, bytearray)):
4950
return pyobject
5051

51-
if input_info is None:
52-
return pyobject
53-
54-
len_pyobject = len(pyobject)
52+
if len_pyobject is None:
53+
len_pyobject = len(pyobject)
5554
pyobject = new_pyobject_id(pyobject, len_pyobject)
55+
if isinstance(source_name, (bytes, bytearray)):
56+
source_name = str(source_name, encoding="utf8")
57+
if isinstance(source_value, (bytes, bytearray)):
58+
source_value = str(source_value, encoding="utf8")
59+
if source_origin is None:
60+
source_origin = OriginType.PARAMETER
61+
source = Source(source_name, source_value, source_origin)
5662
taint_dict = get_taint_dict()
57-
taint_dict[id(pyobject)] = ((input_info, 0, len_pyobject),)
58-
_set_metric_iast_executed_source(input_info.origin)
63+
taint_dict[id(pyobject)] = ((source, 0, len_pyobject),)
64+
_set_metric_iast_executed_source(source.origin)
5965
return pyobject
6066

6167

@@ -73,7 +79,7 @@ def get_tainted_ranges(pyobject): # type: (Any) -> tuple
7379
return get_taint_dict().get(id(pyobject), tuple())
7480

7581

76-
def taint_ranges_as_evidence_info(pyobject): # type: (Any) -> Tuple[List[Dict[str, Union[Any, int]]], list[Input_info]]
82+
def taint_ranges_as_evidence_info(pyobject): # type: (Any) -> Tuple[List[Dict[str, Union[Any, int]]], list[Source]]
7783
value_parts = []
7884
sources = []
7985
current_pos = 0

0 commit comments

Comments
 (0)