Skip to content

Commit 1edcac0

Browse files
authored
chore(iast): refactor open/read wrapping [backport 2.21] (#12776)
Backport 36fa1ee from #12757 to 2.21. In this PR, we're migrating the way we propagate the `open` and `read` functions. Until now, we were using `wrapt`, but it's too invasive since these functions are used in many core operations. That's why we've migrated it to AST, ensuring that only the functions actually used in the application get replaced. To achieve this, we've added a new import in the AST visitor. In addition to aspects and sink points, we now define "sources" as well. ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
1 parent c1eddea commit 1edcac0

File tree

13 files changed

+178
-127
lines changed

13 files changed

+178
-127
lines changed

ddtrace/appsec/_common_module_patches.py

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,7 @@ def patch_common_modules():
4545

4646
try_wrap_function_wrapper("builtins", "open", wrapped_open_CFDDB7ABBA9081B6)
4747
try_wrap_function_wrapper("urllib.request", "OpenerDirector.open", wrapped_open_ED4CF71136E15EBF)
48-
try_wrap_function_wrapper("_io", "BytesIO.read", wrapped_read_F3E51D71B4EC16EF)
49-
try_wrap_function_wrapper("_io", "StringIO.read", wrapped_read_F3E51D71B4EC16EF)
5048
core.on("asm.block.dbapi.execute", execute_4C9BAC8E228EB347)
51-
if asm_config._iast_enabled:
52-
from ddtrace.appsec._iast._metrics import _set_metric_iast_instrumented_sink
53-
from ddtrace.appsec._iast.constants import VULN_PATH_TRAVERSAL
54-
55-
_set_metric_iast_instrumented_sink(VULN_PATH_TRAVERSAL)
5649
_is_patched = True
5750

5851

@@ -69,29 +62,6 @@ def unpatch_common_modules():
6962
_is_patched = False
7063

7164

72-
def wrapped_read_F3E51D71B4EC16EF(original_read_callable, instance, args, kwargs):
73-
"""
74-
wrapper for _io.BytesIO and _io.StringIO read function
75-
"""
76-
result = original_read_callable(*args, **kwargs)
77-
if asm_config._iast_enabled and asm_config.is_iast_request_enabled:
78-
from ddtrace.appsec._iast._taint_tracking import OriginType
79-
from ddtrace.appsec._iast._taint_tracking import Source
80-
from ddtrace.appsec._iast._taint_tracking._taint_objects import get_tainted_ranges
81-
from ddtrace.appsec._iast._taint_tracking._taint_objects import taint_pyobject
82-
83-
ranges = get_tainted_ranges(instance)
84-
if len(ranges) > 0:
85-
source = ranges[0].source if ranges[0].source else Source(name="_io", value=result, origin=OriginType.EMPTY)
86-
result = taint_pyobject(
87-
pyobject=result,
88-
source_name=source.name,
89-
source_value=source.value,
90-
source_origin=source.origin,
91-
)
92-
return result
93-
94-
9565
def _must_block(actions: Iterable[str]) -> bool:
9666
return any(action in (WAF_ACTIONS.BLOCK_ACTION, WAF_ACTIONS.REDIRECT_ACTION) for action in actions)
9767

@@ -100,15 +70,6 @@ def wrapped_open_CFDDB7ABBA9081B6(original_open_callable, instance, args, kwargs
10070
"""
10171
wrapper for open file function
10272
"""
103-
if asm_config._iast_enabled and asm_config.is_iast_request_enabled:
104-
try:
105-
from ddtrace.appsec._iast.taint_sinks.path_traversal import check_and_report_path_traversal
106-
107-
check_and_report_path_traversal(*args, **kwargs)
108-
except ImportError:
109-
# open is used during module initialization
110-
# and shouldn't be changed at that time
111-
return original_open_callable(*args, **kwargs)
11273
if (
11374
asm_config._asm_enabled
11475
and asm_config._ep_enabled

ddtrace/appsec/_iast/_ast/visitor.py

Lines changed: 42 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from ..._constants import IAST
1616
from .._metrics import _set_metric_iast_instrumented_propagation
1717
from ..constants import DEFAULT_PATH_TRAVERSAL_FUNCTIONS
18+
from ..constants import DEFAULT_SOURCE_IO_FUNCTIONS
1819
from ..constants import DEFAULT_WEAK_RANDOMNESS_FUNCTIONS
1920

2021

@@ -29,6 +30,7 @@
2930
CODE_TYPE_SITE_PACKAGES = "site_packages"
3031
CODE_TYPE_STDLIB = "stdlib"
3132
TAINT_SINK_FUNCTION_REPLACEMENT = _PREFIX + "taint_sinks.ast_function"
33+
SOURCES_FUNCTION_REPLACEMENT = _PREFIX + "sources.ast_function"
3234

3335

3436
def _mark_avoid_convert_recursively(node):
@@ -132,14 +134,6 @@ def _mark_avoid_convert_recursively(node):
132134
"taint_sinks": {
133135
"weak_randomness": DEFAULT_WEAK_RANDOMNESS_FUNCTIONS,
134136
"path_traversal": DEFAULT_PATH_TRAVERSAL_FUNCTIONS,
135-
"other": {
136-
"load",
137-
"run",
138-
"path",
139-
"exit",
140-
"sleep",
141-
"socket",
142-
},
143137
# These explicitly WON'T be replaced by taint_sink_function:
144138
"disabled": {
145139
"__new__",
@@ -149,6 +143,7 @@ def _mark_avoid_convert_recursively(node):
149143
"super",
150144
},
151145
},
146+
"sources": {"io": DEFAULT_SOURCE_IO_FUNCTIONS, "disabled": {}},
152147
}
153148

154149

@@ -168,9 +163,11 @@ def __init__(
168163
self._sinkpoints_spec = {
169164
"definitions_module": "ddtrace.appsec._iast.taint_sinks",
170165
"alias_module": _PREFIX + "taint_sinks",
171-
"functions": {},
172166
}
173-
self._sinkpoints_functions = self._sinkpoints_spec["functions"]
167+
self._source_spec = {
168+
"definitions_module": "ddtrace.appsec._iast.sources",
169+
"alias_module": _PREFIX + "sources",
170+
}
174171

175172
self._aspect_index = _ASPECTS_SPEC["slices"]["index"]
176173
self._aspect_slice = _ASPECTS_SPEC["slices"]["slice"]
@@ -182,11 +179,14 @@ def __init__(
182179
self._aspect_build_string = _ASPECTS_SPEC["operators"]["BUILD_STRING"]
183180

184181
# Sink points
185-
self._taint_sink_replace_any = self._merge_taint_sinks(
186-
_ASPECTS_SPEC["taint_sinks"]["other"],
182+
self._taint_sink_replace_any = self._merge_dicts(
187183
_ASPECTS_SPEC["taint_sinks"]["weak_randomness"],
188184
*[functions for module, functions in _ASPECTS_SPEC["taint_sinks"]["path_traversal"].items()],
189185
)
186+
self._source_replace_any = self._merge_dicts(
187+
*[functions for module, functions in _ASPECTS_SPEC["sources"]["io"].items()],
188+
)
189+
190190
self._taint_sink_replace_disabled = _ASPECTS_SPEC["taint_sinks"]["disabled"]
191191

192192
self.update_location(filename, module_name)
@@ -219,7 +219,7 @@ def update_location(self, filename: str = "", module_name: str = ""):
219219
self.codetype = CODE_TYPE_STDLIB
220220

221221
@staticmethod
222-
def _merge_taint_sinks(*args_functions: Set[str]) -> Set[str]:
222+
def _merge_dicts(*args_functions: Set[str]) -> Set[str]:
223223
merged_set = set()
224224

225225
for functions in args_functions:
@@ -284,6 +284,11 @@ def _should_replace_with_taint_sink(self, call_node: ast.Call, is_function: bool
284284

285285
return function_name in self._taint_sink_replace_any
286286

287+
def _should_replace_with_source(self, call_node: ast.Call, is_function: bool) -> bool:
288+
function_name = self._get_function_name(call_node, is_function)
289+
290+
return function_name in self._source_replace_any
291+
287292
def _add_original_function_as_arg(self, call_node: ast.Call, is_function: bool) -> Any:
288293
"""
289294
Creates the arguments for the original function
@@ -452,6 +457,21 @@ def visit_Module(self, module_node: ast.Module) -> Any:
452457
],
453458
)
454459
module_node.body.insert(insert_position, replacements_import)
460+
461+
definitions_module = self._source_spec["definitions_module"]
462+
replacements_import = self._node(
463+
ast.Import,
464+
module_node,
465+
names=[
466+
ast.alias(
467+
lineno=1,
468+
col_offset=0,
469+
name=definitions_module,
470+
asname=self._source_spec["alias_module"],
471+
)
472+
],
473+
)
474+
module_node.body.insert(insert_position, replacements_import)
455475
# Must be called here instead of the start so the line offset is already
456476
# processed
457477
self.generic_visit(module_node)
@@ -519,11 +539,7 @@ def visit_Call(self, call_node: ast.Call) -> Any:
519539
# Substitute function call
520540
call_node.func = self._attr_node(call_node, aspect)
521541
self.ast_modified = call_modified = True
522-
else:
523-
sink_point = self._sinkpoints_functions.get(func_name_node)
524-
if sink_point:
525-
call_node.func = self._attr_node(call_node, sink_point)
526-
self.ast_modified = call_modified = True
542+
527543
# Call [attr] -> Attribute [value]-> Attribute [value]-> Attribute
528544
# a.b.c.method()
529545
# replaced_method(a.b.c)
@@ -593,6 +609,14 @@ def visit_Call(self, call_node: ast.Call) -> Any:
593609
# Create a new Name node for the replacement and set it as node.func
594610
call_node.func = self._attr_node(call_node, aspect)
595611
self.ast_modified = call_modified = True
612+
else:
613+
aspect = self._should_replace_with_source(call_node, False)
614+
if aspect:
615+
# Send 0 as flag_added_args value
616+
call_node.args.insert(0, self._int_constant(call_node, 0))
617+
call_node.args = self._add_original_function_as_arg(call_node, False)
618+
call_node.func = self._attr_node(call_node, SOURCES_FUNCTION_REPLACEMENT)
619+
self.ast_modified = call_modified = True
596620

597621
if self.codetype == CODE_TYPE_FIRST_PARTY:
598622
# Function replacement case

ddtrace/appsec/_iast/constants.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@
5555
}
5656

5757
DEFAULT_PATH_TRAVERSAL_FUNCTIONS = {
58+
"_io": {"open"},
59+
"io": {"open"},
5860
"glob": {"glob"},
5961
"os": {
6062
"mkdir",
@@ -86,3 +88,7 @@
8688
DBAPI_MYSQL = "mysql"
8789
DBAPI_MARIADB = "mariadb"
8890
DBAPI_INTEGRATIONS = (DBAPI_SQLITE, DBAPI_PSYCOPG, DBAPI_MYSQL, DBAPI_MARIADB)
91+
92+
DEFAULT_SOURCE_IO_FUNCTIONS = {
93+
"_io": {"read"},
94+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from .ast_taint import ast_function
2+
3+
4+
__all__ = [
5+
"ast_function",
6+
]
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
from typing import Any
2+
from typing import Callable
3+
4+
from ddtrace.appsec._iast._taint_tracking import OriginType
5+
from ddtrace.appsec._iast._taint_tracking import Source
6+
from ddtrace.appsec._iast._taint_tracking._taint_objects import get_tainted_ranges
7+
from ddtrace.appsec._iast._taint_tracking._taint_objects import taint_pyobject
8+
from ddtrace.settings.asm import config as asm_config
9+
10+
from ..constants import DEFAULT_SOURCE_IO_FUNCTIONS
11+
12+
13+
def ast_function(
14+
func: Callable,
15+
flag_added_args: Any,
16+
*args: Any,
17+
**kwargs: Any,
18+
) -> Any:
19+
instance = getattr(func, "__self__", None)
20+
func_name = getattr(func, "__name__", None)
21+
cls_name = ""
22+
if instance is not None and func_name:
23+
try:
24+
cls_name = instance.__class__.__name__
25+
except AttributeError:
26+
pass
27+
28+
if flag_added_args > 0:
29+
args = args[flag_added_args:]
30+
31+
module_name = instance.__class__.__module__
32+
result = func(*args, **kwargs)
33+
if (
34+
module_name == "_io"
35+
and cls_name in ("BytesIO", "StringIO")
36+
and func_name in DEFAULT_SOURCE_IO_FUNCTIONS[module_name]
37+
):
38+
if asm_config._iast_enabled and asm_config.is_iast_request_enabled:
39+
ranges = get_tainted_ranges(instance)
40+
if len(ranges) > 0:
41+
source = (
42+
ranges[0].source if ranges[0].source else Source(name="_io", value=result, origin=OriginType.EMPTY)
43+
)
44+
result = taint_pyobject(
45+
pyobject=result,
46+
source_name=source.name,
47+
source_value=source.value,
48+
source_origin=source.origin,
49+
)
50+
return result

ddtrace/appsec/_iast/taint_sinks/ast_taint.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from typing import Any
22
from typing import Callable
33

4+
from ddtrace.settings.asm import config as asm_config
5+
46
from ..._constants import IAST_SPAN_TAGS
57
from .._metrics import _set_metric_iast_executed_sink
68
from .._metrics import increment_iast_span_metric
@@ -34,10 +36,11 @@ def ast_function(
3436
and cls_name == "Random"
3537
and func_name in DEFAULT_WEAK_RANDOMNESS_FUNCTIONS
3638
):
37-
# Weak, run the analyzer
38-
increment_iast_span_metric(IAST_SPAN_TAGS.TELEMETRY_EXECUTED_SINK, WeakRandomness.vulnerability_type)
39-
_set_metric_iast_executed_sink(WeakRandomness.vulnerability_type)
40-
WeakRandomness.report(evidence_value=cls_name + "." + func_name)
39+
if asm_config._iast_enabled and asm_config.is_iast_request_enabled:
40+
# Weak, run the analyzer
41+
increment_iast_span_metric(IAST_SPAN_TAGS.TELEMETRY_EXECUTED_SINK, WeakRandomness.vulnerability_type)
42+
_set_metric_iast_executed_sink(WeakRandomness.vulnerability_type)
43+
WeakRandomness.report(evidence_value=cls_name + "." + func_name)
4144
elif hasattr(func, "__module__") and DEFAULT_PATH_TRAVERSAL_FUNCTIONS.get(func.__module__):
4245
if func_name in DEFAULT_PATH_TRAVERSAL_FUNCTIONS[func.__module__]:
4346
check_and_report_path_traversal(*args, **kwargs)

ddtrace/appsec/_iast/taint_sinks/path_traversal.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from ddtrace.appsec._constants import IAST_SPAN_TAGS
44
from ddtrace.appsec._iast import oce
55
from ddtrace.appsec._iast._metrics import _set_metric_iast_executed_sink
6+
from ddtrace.appsec._iast._metrics import _set_metric_iast_instrumented_sink
67
from ddtrace.appsec._iast._metrics import increment_iast_span_metric
78
from ddtrace.appsec._iast._taint_tracking._taint_objects import is_pyobject_tainted
89
from ddtrace.appsec._iast.constants import VULN_PATH_TRAVERSAL
@@ -20,19 +21,18 @@ class PathTraversal(VulnerabilityBase):
2021
vulnerability_type = VULN_PATH_TRAVERSAL
2122

2223

24+
IS_REPORTED_INTRUMENTED_SINK = False
25+
26+
2327
def check_and_report_path_traversal(*args: Any, **kwargs: Any) -> None:
28+
global IS_REPORTED_INTRUMENTED_SINK
29+
if not IS_REPORTED_INTRUMENTED_SINK:
30+
_set_metric_iast_instrumented_sink(VULN_PATH_TRAVERSAL)
31+
IS_REPORTED_INTRUMENTED_SINK = True
32+
2433
if asm_config.is_iast_request_enabled and PathTraversal.has_quota():
25-
try:
26-
increment_iast_span_metric(IAST_SPAN_TAGS.TELEMETRY_EXECUTED_SINK, PathTraversal.vulnerability_type)
27-
_set_metric_iast_executed_sink(PathTraversal.vulnerability_type)
28-
filename_arg = args[0] if args else kwargs.get("file", None)
29-
if is_pyobject_tainted(filename_arg):
30-
PathTraversal.report(evidence_value=filename_arg)
31-
except Exception: # nosec
32-
# FIXME: see below
33-
# log.debug("Unexpected exception while reporting vulnerability", exc_info=True)
34-
pass
35-
# FIXME: this is commented out because logs can execute _open which can runs the check_and_report_path_traversal
36-
# entering an infinite loop
37-
# else:
38-
# log.debug("IAST: no vulnerability quota to analyze more sink points")
34+
filename_arg = args[0] if args else kwargs.get("file", None)
35+
if is_pyobject_tainted(filename_arg):
36+
PathTraversal.report(evidence_value=filename_arg)
37+
increment_iast_span_metric(IAST_SPAN_TAGS.TELEMETRY_EXECUTED_SINK, PathTraversal.vulnerability_type)
38+
_set_metric_iast_executed_sink(PathTraversal.vulnerability_type)

tests/appsec/iast/_ast/test_ast_patching.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ def test_astpatch_module_changed(module_name):
7878
assert ("", None) != (module_path, new_ast)
7979
new_code = astunparse.unparse(new_ast)
8080
assert new_code.startswith(
81+
f"\nimport ddtrace.appsec._iast.sources as {_PREFIX}sources"
8182
f"\nimport ddtrace.appsec._iast.taint_sinks as {_PREFIX}taint_sinks"
8283
f"\nimport ddtrace.appsec._iast._taint_tracking.aspects as {_PREFIX}aspects"
8384
)
@@ -95,6 +96,7 @@ def test_astpatch_module_changed_add_operator(module_name):
9596
assert ("", None) != (module_path, new_ast)
9697
new_code = astunparse.unparse(new_ast)
9798
assert new_code.startswith(
99+
f"\nimport ddtrace.appsec._iast.sources as {_PREFIX}sources"
98100
f"\nimport ddtrace.appsec._iast.taint_sinks as {_PREFIX}taint_sinks"
99101
f"\nimport ddtrace.appsec._iast._taint_tracking.aspects as {_PREFIX}aspects"
100102
)
@@ -112,6 +114,7 @@ def test_astpatch_module_changed_add_inplace_operator(module_name):
112114
assert ("", None) != (module_path, new_ast)
113115
new_code = astunparse.unparse(new_ast)
114116
assert new_code.startswith(
117+
f"\nimport ddtrace.appsec._iast.sources as {_PREFIX}sources"
115118
f"\nimport ddtrace.appsec._iast.taint_sinks as {_PREFIX}taint_sinks"
116119
f"\nimport ddtrace.appsec._iast._taint_tracking.aspects as {_PREFIX}aspects"
117120
)
@@ -136,6 +139,7 @@ def test_astpatch_source_changed_with_future_imports(module_name):
136139
from __future__ import division
137140
from __future__ import print_function
138141
from __future__ import unicode_literals
142+
import ddtrace.appsec._iast.sources as {_PREFIX}sources
139143
import ddtrace.appsec._iast.taint_sinks as {_PREFIX}taint_sinks
140144
import ddtrace.appsec._iast._taint_tracking.aspects as {_PREFIX}aspects
141145
import html"""
@@ -229,6 +233,7 @@ def test_astpatch_stringio_module_changed(module_name):
229233
assert ("", None) != (module_path, new_ast)
230234
new_code = astunparse.unparse(new_ast)
231235
assert new_code.startswith(
236+
f"\nimport ddtrace.appsec._iast.sources as {_PREFIX}sources"
232237
f"\nimport ddtrace.appsec._iast.taint_sinks as {_PREFIX}taint_sinks"
233238
f"\nimport ddtrace.appsec._iast._taint_tracking.aspects as {_PREFIX}aspects"
234239
)
@@ -247,6 +252,7 @@ def test_astpatch_bytesio_module_changed(module_name):
247252
assert ("", None) != (module_path, new_ast)
248253
new_code = astunparse.unparse(new_ast)
249254
assert new_code.startswith(
255+
f"\nimport ddtrace.appsec._iast.sources as {_PREFIX}sources"
250256
f"\nimport ddtrace.appsec._iast.taint_sinks as {_PREFIX}taint_sinks"
251257
f"\nimport ddtrace.appsec._iast._taint_tracking.aspects as {_PREFIX}aspects"
252258
)

0 commit comments

Comments
 (0)