Skip to content

Commit f6339fc

Browse files
authored
fix(iast): fix memory leak error [backport #7788 to 1.20] (#7820)
This backports below backports to 1.20. - fix(iast): iast leak for python 3.11: #7788 - chore(iast): add propagation for str aspect join: #6940 - chore(iast): add collision tests for bytes and bytearray: #7367 - chore(iast): store hash and id separately in tx_taint_map: #7340 ## Checklist - [x] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [x] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed. If no release note is required, add label `changelog/no-changelog`. - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Title is accurate. - [x] No unnecessary changes are introduced. - [x] Description motivates each change. - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [x] Testing strategy adequately addresses listed risk(s). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] Release note makes sense to a user of the library. - [x] Reviewer has explicitly acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment. - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) - [x] If this PR touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. - [x] This PR doesn't touch any of that.
1 parent 2eee5cd commit f6339fc

35 files changed

+1086
-560
lines changed

ddtrace/appsec/iast/_stacktrace.c

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,22 @@
1717
#define GET_LINENO(frame) PyFrame_GetLineNumber((PyFrameObject*)frame)
1818
#define GET_FRAME(tstate) PyThreadState_GetFrame(tstate)
1919
#define GET_PREVIOUS(frame) PyFrame_GetBack(frame)
20-
#define FRAME_DECREF(frame) Py_DECREF(frame)
20+
#define FRAME_DECREF(frame) Py_DecRef(frame)
2121
#define FRAME_XDECREF(frame) Py_XDECREF(frame)
22-
#define FILENAME_DECREF(filename) Py_DECREF(filename)
23-
#define FILENAME_XDECREF(filename) Py_XDECREF(filename)
22+
#define FILENAME_DECREF(filename) Py_DecRef(filename)
23+
#define FILENAME_XDECREF(filename) \
24+
if (filename) \
25+
Py_DecRef(filename)
2426
static inline PyObject*
2527
GET_FILENAME(PyFrameObject* frame)
2628
{
2729
PyCodeObject* code = PyFrame_GetCode(frame);
2830
if (!code) {
2931
return NULL;
3032
}
31-
return PyObject_GetAttrString((PyObject*)code, "co_filename");
33+
PyObject* filename = PyObject_GetAttrString((PyObject*)code, "co_filename");
34+
Py_DecRef(code);
35+
return filename;
3236
}
3337
#else
3438
#define GET_FRAME(tstate) tstate->frame
@@ -59,7 +63,7 @@ get_file_and_line(PyObject* Py_UNUSED(module), PyObject* cwd_obj)
5963
{
6064
PyThreadState* tstate = PyThreadState_Get();
6165
if (!tstate) {
62-
return NULL;
66+
goto exit_0;
6367
}
6468

6569
int line;
@@ -69,18 +73,16 @@ get_file_and_line(PyObject* Py_UNUSED(module), PyObject* cwd_obj)
6973
char* cwd = NULL;
7074

7175
if (!PyUnicode_FSConverter(cwd_obj, &cwd_bytes)) {
72-
return NULL;
76+
goto exit_0;
7377
}
7478
cwd = PyBytes_AsString(cwd_bytes);
7579
if (!cwd) {
76-
Py_DECREF(cwd_bytes);
77-
return NULL;
80+
goto exit_0;
7881
}
7982

8083
PyFrameObject* frame = GET_FRAME(tstate);
8184
if (!frame) {
82-
Py_DECREF(cwd_bytes);
83-
return NULL;
85+
goto exit_0;
8486
}
8587

8688
while (NULL != frame) {
@@ -109,10 +111,25 @@ get_file_and_line(PyObject* Py_UNUSED(module), PyObject* cwd_obj)
109111
result = PyTuple_Pack(2, filename_o, line_obj);
110112
break;
111113
}
114+
if (result == NULL) {
115+
goto exit_0;
116+
}
117+
112118
exit:
113-
Py_DECREF(cwd_bytes);
119+
Py_DecRef(cwd_bytes);
120+
FRAME_XDECREF(frame);
121+
FILENAME_XDECREF(filename_o);
122+
return result;
123+
124+
exit_0:; // fix: "a label can only be part of a statement and a declaration is not a statement" error
125+
// Return "", 0
126+
PyObject* line_obj = Py_BuildValue("i", 0);
127+
filename_o = PyUnicode_FromString("");
128+
result = PyTuple_Pack(2, filename_o, line_obj);
129+
Py_DecRef(cwd_bytes);
114130
FRAME_XDECREF(frame);
115131
FILENAME_XDECREF(filename_o);
132+
Py_DecRef(line_obj);
116133
return result;
117134
}
118135

ddtrace/appsec/iast/_taint_tracking/Aspects/AspectExtend.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#include "AspectExtend.h"
2-
#include "Initializer/Initializer.h"
32

43
PyObject*
54
api_extend_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs)
@@ -28,7 +27,7 @@ api_extend_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs)
2827
// Ensure no returns are done before this method call
2928
auto method_name = PyUnicode_FromString("extend");
3029
PyObject_CallMethodObjArgs(candidate_text, method_name, to_add, nullptr);
31-
Py_DECREF(method_name);
30+
Py_DecRef(method_name);
3231

3332
if (not ctx_map or ctx_map->empty() or to_result == nullptr) {
3433
Py_RETURN_NONE;

ddtrace/appsec/iast/_taint_tracking/Aspects/AspectExtend.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#pragma once
22

3+
#include "Initializer/Initializer.h"
34
#include <Python.h>
45

56
PyObject*
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#pragma once
2+
#include "Aspects/AspectFormat.h"
3+
4+
template<class StrType>
5+
StrType
6+
api_format_aspect(StrType& candidate_text,
7+
const py::tuple& parameter_list,
8+
const py::args& args,
9+
const py::kwargs& kwargs)
10+
{
11+
auto [ranges_orig, candidate_text_ranges] = are_all_text_all_ranges(candidate_text.ptr(), parameter_list);
12+
13+
if (!ranges_orig.empty() or !candidate_text_ranges.empty()) {
14+
auto new_template =
15+
_int_as_formatted_evidence<StrType>(candidate_text, candidate_text_ranges, TagMappingMode::Mapper);
16+
17+
py::list new_args;
18+
py::dict new_kwargs;
19+
for (const auto arg : args) {
20+
auto str_arg = py::cast<py::str>(arg);
21+
auto n_arg = _all_as_formatted_evidence<py::str>(str_arg, TagMappingMode::Mapper);
22+
new_args.append(n_arg);
23+
}
24+
for (auto [key, value] : new_kwargs) {
25+
auto str_value = py::cast<py::str>(value);
26+
auto n_value = _all_as_formatted_evidence<py::str>(str_value, TagMappingMode::Mapper);
27+
new_kwargs[key] = n_value;
28+
}
29+
30+
StrType new_template_format =
31+
py::getattr(new_template, "format")(*(py::cast<py::tuple>(new_args)), **new_kwargs);
32+
std::tuple result = _convert_escaped_text_to_taint_text<StrType>(new_template_format, ranges_orig);
33+
StrType result_text = get<0>(result);
34+
TaintRangeRefs result_ranges = get<1>(result);
35+
PyObject* new_result = new_pyobject_id(result_text.ptr());
36+
set_ranges(new_result, result_ranges);
37+
return py::reinterpret_steal<StrType>(new_result);
38+
}
39+
return py::getattr(candidate_text, "format")(*args, **kwargs);
40+
}
41+
42+
void
43+
pyexport_format_aspect(py::module& m)
44+
{
45+
m.def("_format_aspect", &api_format_aspect<py::str>, "candidate_text"_a, "parameter_list"_a);
46+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#pragma once
2+
#include "Aspects/Helpers.h"
3+
#include "Initializer/Initializer.h"
4+
#include "TaintTracking/Source.h"
5+
#include "TaintTracking/TaintRange.h"
6+
#include "TaintTracking/TaintedObject.h"
7+
8+
template<class StrType>
9+
StrType
10+
api_format_aspect(StrType& candidate_text,
11+
const py::tuple& parameter_list,
12+
const py::args& args,
13+
const py::kwargs& kwargs);
14+
15+
void
16+
pyexport_format_aspect(py::module& m);

ddtrace/appsec/iast/_taint_tracking/Aspects/AspectJoin.cpp

Lines changed: 68 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,62 @@
11
#include "AspectJoin.h"
22

3+
PyObject*
4+
aspect_join_str(PyObject* sep,
5+
PyObject* result,
6+
PyObject* iterable_str,
7+
size_t len_iterable,
8+
TaintRangeMapType* tx_taint_map)
9+
{
10+
// This is the special case for unicode str and unicode iterable_str.
11+
// The iterable elements string will be split into 1 char-length strings.
12+
const auto& to_iterable_str = get_tainted_object(iterable_str, tx_taint_map);
13+
const auto& to_joiner = get_tainted_object(sep, tx_taint_map);
14+
15+
if (to_joiner == nullptr and to_iterable_str == nullptr) {
16+
// No taints at all: return original result
17+
return result;
18+
}
19+
20+
const size_t& len_sep = PyUnicode_GET_LENGTH(sep);
21+
const size_t& element_len = 1;
22+
unsigned long current_pos{ 0L };
23+
TaintedObjectPtr result_to;
24+
25+
if (len_sep == 0 and to_iterable_str) {
26+
// Empty separator: the result is identical to iterable_str
27+
result_to = initializer->allocate_tainted_object_copy(to_iterable_str);
28+
} else if (len_sep > 0 and to_joiner and to_iterable_str == nullptr) {
29+
// Iterable str is not tainted, only add n-times the joiner taints
30+
result_to = initializer->allocate_tainted_object();
31+
for (size_t i = 0; i < len_iterable - 1; i++) {
32+
current_pos += element_len;
33+
result_to->add_ranges_shifted(to_joiner, current_pos);
34+
current_pos += len_sep;
35+
}
36+
} else {
37+
// General case, iterable and joiner may be tainted
38+
result_to = initializer->allocate_tainted_object();
39+
for (size_t i = 0; i < len_iterable; i++) {
40+
if (to_iterable_str) {
41+
result_to->add_ranges_shifted(to_iterable_str, current_pos, element_len, i);
42+
}
43+
44+
current_pos += element_len;
45+
if (i < len_iterable - 1) {
46+
if (to_joiner) {
47+
result_to->add_ranges_shifted(to_joiner, current_pos);
48+
}
49+
current_pos += len_sep;
50+
}
51+
}
52+
}
53+
54+
PyObject* new_result{ new_pyobject_id(result) };
55+
set_tainted_object(new_result, result_to, tx_taint_map);
56+
Py_DecRef(result);
57+
return new_result;
58+
}
59+
360
PyObject*
461
aspect_join(PyObject* sep, PyObject* result, PyObject* iterable_elements, TaintRangeMapType* tx_taint_map)
562
{
@@ -13,6 +70,12 @@ aspect_join(PyObject* sep, PyObject* result, PyObject* iterable_elements, TaintR
1370
} else if (PyTuple_Check(iterable_elements)) {
1471
len_iterable = PyTuple_Size(iterable_elements);
1572
GetElement = PyTuple_GetItem;
73+
} else if (PyUnicode_Check(sep) and PyUnicode_Check(iterable_elements)) {
74+
len_iterable = PyUnicode_GET_LENGTH(iterable_elements);
75+
if (len_iterable)
76+
return aspect_join_str(sep, result, iterable_elements, len_iterable, tx_taint_map);
77+
else
78+
return result; // Empty string is returned if empty iterable, so no tainted
1679
}
1780

1881
unsigned long current_pos{ 0L };
@@ -69,9 +132,9 @@ aspect_join(PyObject* sep, PyObject* result, PyObject* iterable_elements, TaintR
69132
}
70133
}
71134

72-
PyObject* new_result{ new_pyobject_id(result, get_pyobject_size(result)) };
135+
PyObject* new_result{ new_pyobject_id(result) };
73136
set_tainted_object(new_result, result_to, tx_taint_map);
74-
Py_DECREF(result);
137+
Py_DecRef(result);
75138
return new_result;
76139
}
77140

@@ -97,7 +160,7 @@ api_join_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs)
97160
PyList_Append(list_aux, item);
98161
}
99162
arg0 = list_aux;
100-
Py_DECREF(iterator);
163+
Py_DecRef(iterator);
101164
decref_arg0 = true;
102165
}
103166
}
@@ -118,7 +181,7 @@ api_join_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs)
118181
if (get_pyobject_size(result) == 0) {
119182
// Empty result cannot have taint ranges
120183
if (decref_arg0) {
121-
Py_DECREF(arg0);
184+
Py_DecRef(arg0);
122185
}
123186
return result;
124187
}
@@ -129,7 +192,7 @@ api_join_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs)
129192
}
130193
auto res = aspect_join(sep, result, arg0, ctx_map);
131194
if (decref_arg0) {
132-
Py_DECREF(arg0);
195+
Py_DecRef(arg0);
133196
}
134197
return res;
135198
}

ddtrace/appsec/iast/_taint_tracking/Aspects/AspectJoin.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,7 @@
11
#pragma once
2-
#include "Aspects/Helpers.h"
32
#include "Initializer/Initializer.h"
4-
#include "TaintTracking//TaintedObject.h"
53
#include "TaintTracking/TaintRange.h"
6-
#include "TaintedOps/TaintedOps.h"
7-
#include <Python.h>
8-
#include <pybind11/pybind11.h>
4+
#include "TaintTracking/TaintedObject.h"
95

106
namespace py = pybind11;
117

ddtrace/appsec/iast/_taint_tracking/Aspects/AspectOperatorAdd.cpp

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,20 @@
11
#include "AspectOperatorAdd.h"
22

3+
/**
4+
* This function updates result_o object with taint information of candidate_text and/or text_to_add
5+
*
6+
* @param result_o The result object to which the aspect will be added.
7+
* @param candidate_text The candidate text object to which the aspect will be added.
8+
* @param text_to_add The text aspect to be added.
9+
* @param tx_taint_map The taint range map that stores taint information.
10+
*
11+
* @return A new result object with the taint information.
12+
*/
313
PyObject*
414
add_aspect(PyObject* result_o, PyObject* candidate_text, PyObject* text_to_add, TaintRangeMapType* tx_taint_map)
515
{
616
size_t len_candidate_text{ get_pyobject_size(candidate_text) };
717
size_t len_text_to_add{ get_pyobject_size(text_to_add) };
8-
size_t len_result_o{ get_pyobject_size(result_o) };
918

1019
if (len_text_to_add == 0 and len_candidate_text > 0) {
1120
return candidate_text;
@@ -16,8 +25,8 @@ add_aspect(PyObject* result_o, PyObject* candidate_text, PyObject* text_to_add,
1625

1726
const auto& to_candidate_text = get_tainted_object(candidate_text, tx_taint_map);
1827
if (to_candidate_text and to_candidate_text->get_ranges().size() >= TaintedObject::TAINT_RANGE_LIMIT) {
19-
const auto& res_new_id = new_pyobject_id(result_o, len_result_o);
20-
Py_DECREF(result_o);
28+
const auto& res_new_id = new_pyobject_id(result_o);
29+
Py_DecRef(result_o);
2130
// If left side is already at the maximum taint ranges, we just reuse its
2231
// ranges, we don't need to look at left side.
2332
set_tainted_object(res_new_id, to_candidate_text, tx_taint_map);
@@ -29,21 +38,35 @@ add_aspect(PyObject* result_o, PyObject* candidate_text, PyObject* text_to_add,
2938
return result_o;
3039
}
3140
if (!to_text_to_add) {
32-
const auto& res_new_id = new_pyobject_id(result_o, len_result_o);
33-
Py_DECREF(result_o);
41+
const auto& res_new_id = new_pyobject_id(result_o);
42+
Py_DecRef(result_o);
3443
set_tainted_object(res_new_id, to_candidate_text, tx_taint_map);
3544
return res_new_id;
3645
}
3746

38-
auto tainted = initializer->allocate_tainted_object(to_candidate_text);
39-
tainted->add_ranges_shifted(to_text_to_add, (long)len_candidate_text);
40-
const auto& res_new_id = new_pyobject_id(result_o, len_result_o);
41-
Py_DECREF(result_o);
47+
auto tainted = initializer->allocate_tainted_object_copy(to_candidate_text);
48+
tainted->add_ranges_shifted(to_text_to_add, (RANGE_START)len_candidate_text);
49+
const auto res_new_id = new_pyobject_id(result_o);
50+
Py_DecRef(result_o);
4251
set_tainted_object(res_new_id, tainted, tx_taint_map);
4352

4453
return res_new_id;
4554
}
4655

56+
/**
57+
* Adds aspect, override all python Add operations.
58+
*
59+
* The AST Visitor (ddtrace/appsec/_iast/_ast/visitor.py) replaces all "+" operations in Python code with this function.
60+
* This function takes 2 arguments. If the operation is 'a = b + c', this function should be 'a = api_add_aspect(b, c)'.
61+
* This function connects Python with the C++ function 'add_aspect'.
62+
*
63+
* @param self The Python extension module.
64+
* @param args An array of Python objects containing the candidate text and text aspect.
65+
* @param nargs The number of arguments in the 'args' array.
66+
*
67+
* @return A new Python object representing the result of adding the aspect to the candidate text, considering taint
68+
* information.
69+
*/
4770
PyObject*
4871
api_add_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs)
4972
{
Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,7 @@
1-
#ifndef _NATIVE_ASPECTOPERATORADD_H
2-
#define _NATIVE_ASPECTOPERATORADD_H
3-
#include "Aspects/Helpers.h"
1+
#pragma once
42
#include "Initializer/Initializer.h"
53
#include "TaintTracking/TaintRange.h"
64
#include "TaintTracking/TaintedObject.h"
7-
#include "TaintedOps/TaintedOps.h"
8-
#include <pybind11/pybind11.h>
95

106
PyObject*
117
api_add_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs);
12-
13-
#endif //_NATIVE_ASPECTOPERATORADD_H

0 commit comments

Comments
 (0)