Skip to content

Commit e38aad9

Browse files
authored
feat(iast): vulnerability deduplication (#7582)
IAST vulnerability deduplication: If we report and attach a vulnerability to a span, we don't report it again until 1 hour later to reduce the number of spans marked as "manually keep." Example of this feature on Java Tracer: DataDog/dd-trace-java#4855 ## Checklist - [x] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [x] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed. If no release note is required, add label `changelog/no-changelog`. - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Title is accurate. - [x] No unnecessary changes are introduced. - [x] Description motivates each change. - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [x] Testing strategy adequately addresses listed risk(s). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] Release note makes sense to a user of the library. - [x] Reviewer has explicitly acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment. - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) - [x] If this PR touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. - [x] This PR doesn't touch any of that.
1 parent c68ae58 commit e38aad9

File tree

11 files changed

+294
-52
lines changed

11 files changed

+294
-52
lines changed

ddtrace/appsec/_iast/_overhead_control_engine.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,17 @@ def acquire_quota(cls):
5555
cls._lock.release()
5656
return result
5757

58+
@classmethod
59+
def increment_quota(cls):
60+
# type: () -> bool
61+
cls._lock.acquire()
62+
result = False
63+
if cls._vulnerability_quota < MAX_VULNERABILITIES_PER_REQUEST:
64+
cls._vulnerability_quota += 1
65+
result = True
66+
cls._lock.release()
67+
return result
68+
5869
@classmethod
5970
def has_quota(cls):
6071
# type: () -> bool

ddtrace/appsec/_iast/taint_sinks/_base.py

Lines changed: 64 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import time
23
from typing import TYPE_CHECKING
34
from typing import cast
45

@@ -10,6 +11,7 @@
1011
from ddtrace.internal.utils.cache import LFUCache
1112
from ddtrace.settings.asm import config as asm_config
1213

14+
from ..._deduplications import deduplication
1315
from .. import oce
1416
from .._overhead_control_engine import Operation
1517
from .._utils import _has_to_scrub
@@ -44,6 +46,21 @@
4446
CWD = os.path.abspath(os.getcwd())
4547

4648

49+
class taint_sink_deduplication(deduplication):
50+
def __call__(self, *args, **kwargs):
51+
# we skip 0, 1 and last position because its the cls, span and sources respectively
52+
result = None
53+
if self.is_deduplication_enabled() is False:
54+
result = self.func(*args, **kwargs)
55+
else:
56+
raw_log_hash = hash("".join([str(arg) for arg in args[2:-1]]))
57+
last_reported_timestamp = self.get_last_time_reported(raw_log_hash)
58+
if time.time() > last_reported_timestamp:
59+
result = self.func(*args, **kwargs)
60+
self.reported_logs[raw_log_hash] = time.time() + self._time_lapse
61+
return result
62+
63+
4764
def _check_positions_contained(needle, container):
4865
needle_start, needle_end = needle
4966
container_start, container_end = container
@@ -81,13 +98,51 @@ def wrapper(wrapped, instance, args, kwargs):
8198

8299
return wrapper
83100

101+
@classmethod
102+
@taint_sink_deduplication
103+
def _prepare_report(cls, span, vulnerability_type, evidence, file_name, line_number, sources):
104+
report = core.get_item(IAST.CONTEXT_KEY, span=span)
105+
if report:
106+
report.vulnerabilities.add(
107+
Vulnerability(
108+
type=vulnerability_type,
109+
evidence=evidence,
110+
location=Location(path=file_name, line=line_number, spanId=span.span_id),
111+
)
112+
)
113+
114+
else:
115+
report = IastSpanReporter(
116+
vulnerabilities={
117+
Vulnerability(
118+
type=vulnerability_type,
119+
evidence=evidence,
120+
location=Location(path=file_name, line=line_number, spanId=span.span_id),
121+
)
122+
}
123+
)
124+
if sources:
125+
126+
def cast_value(value):
127+
if isinstance(value, (bytes, bytearray)):
128+
value_decoded = value.decode("utf-8")
129+
else:
130+
value_decoded = value
131+
return value_decoded
132+
133+
report.sources = [Source(origin=x.origin, name=x.name, value=cast_value(x.value)) for x in sources]
134+
135+
redacted_report = cls._redacted_report_cache.get(
136+
hash(report), lambda x: cls._redact_report(cast(IastSpanReporter, report))
137+
)
138+
core.set_item(IAST.CONTEXT_KEY, redacted_report, span=span)
139+
140+
return True
141+
84142
@classmethod
85143
def report(cls, evidence_value="", sources=None):
86144
# type: (Union[Text|List[Dict[str, Any]]], Optional[List[Source]]) -> None
87-
"""Build a IastSpanReporter instance to report it in the `AppSecIastSpanProcessor` as a string JSON
88-
89-
TODO: check deduplications if DD_IAST_DEDUPLICATION_ENABLED is true
90-
"""
145+
"""Build a IastSpanReporter instance to report it in the `AppSecIastSpanProcessor` as a string JSON"""
91146

92147
if cls.acquire_quota():
93148
if not tracer or not hasattr(tracer, "current_root_span"):
@@ -131,41 +186,11 @@ def report(cls, evidence_value="", sources=None):
131186
log.debug("Unexpected evidence_value type: %s", type(evidence_value))
132187
evidence = Evidence(value="")
133188

134-
report = core.get_item(IAST.CONTEXT_KEY, span=span)
135-
if report:
136-
report.vulnerabilities.add(
137-
Vulnerability(
138-
type=cls.vulnerability_type,
139-
evidence=evidence,
140-
location=Location(path=file_name, line=line_number, spanId=span.span_id),
141-
)
142-
)
143-
144-
else:
145-
report = IastSpanReporter(
146-
vulnerabilities={
147-
Vulnerability(
148-
type=cls.vulnerability_type,
149-
evidence=evidence,
150-
location=Location(path=file_name, line=line_number, spanId=span.span_id),
151-
)
152-
}
153-
)
154-
if sources:
155-
156-
def cast_value(value):
157-
if isinstance(value, (bytes, bytearray)):
158-
value_decoded = value.decode("utf-8")
159-
else:
160-
value_decoded = value
161-
return value_decoded
162-
163-
report.sources = [Source(origin=x.origin, name=x.name, value=cast_value(x.value)) for x in sources]
164-
165-
redacted_report = cls._redacted_report_cache.get(
166-
hash(report), lambda x: cls._redact_report(cast(IastSpanReporter, report))
167-
)
168-
core.set_item(IAST.CONTEXT_KEY, redacted_report, span=span)
189+
result = cls._prepare_report(span, cls.vulnerability_type, evidence, file_name, line_number, sources)
190+
# If result is None that's mean deduplication raises and no vulnerability wasn't reported, with that,
191+
# we need to restore the quota
192+
if not result:
193+
cls.increment_quota()
169194

170195
@classmethod
171196
def _extract_sensitive_tokens(cls, report):

riotfile.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ def select_pys(min_version=MIN_PYTHON_VERSION, max_version=MAX_PYTHON_VERSION):
147147
},
148148
env={
149149
"DD_IAST_REQUEST_SAMPLING": "100", # Override default 30% to analyze all IAST requests
150+
"_DD_APPSEC_DEDUPLICATION_ENABLED": "false",
150151
},
151152
),
152153
Venv(

tests/appsec/iast/taint_sinks/test_command_injection.py

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from ddtrace.appsec._iast.taint_sinks.command_injection import unpatch
1313
from ddtrace.internal import core
1414
from tests.appsec.iast.iast_utils import get_line_and_hash
15+
from tests.utils import override_env
1516
from tests.utils import override_global_config
1617

1718

@@ -39,7 +40,7 @@ def setup():
3940

4041

4142
def test_ossystem(tracer, iast_span_defaults):
42-
with override_global_config(dict(_asm_enabled=True, _iast_enabled=True)):
43+
with override_global_config(dict(_iast_enabled=True)):
4344
patch()
4445
_BAD_DIR = "forbidden_dir/"
4546
_BAD_DIR = taint_pyobject(
@@ -78,7 +79,7 @@ def test_ossystem(tracer, iast_span_defaults):
7879

7980

8081
def test_communicate(tracer, iast_span_defaults):
81-
with override_global_config(dict(_asm_enabled=True, _iast_enabled=True)):
82+
with override_global_config(dict(_iast_enabled=True)):
8283
patch()
8384
_BAD_DIR = "forbidden_dir/"
8485
_BAD_DIR = taint_pyobject(
@@ -118,7 +119,7 @@ def test_communicate(tracer, iast_span_defaults):
118119

119120

120121
def test_run(tracer, iast_span_defaults):
121-
with override_global_config(dict(_asm_enabled=True, _iast_enabled=True)):
122+
with override_global_config(dict(_iast_enabled=True)):
122123
patch()
123124
_BAD_DIR = "forbidden_dir/"
124125
_BAD_DIR = taint_pyobject(
@@ -156,7 +157,7 @@ def test_run(tracer, iast_span_defaults):
156157

157158

158159
def test_popen_wait(tracer, iast_span_defaults):
159-
with override_global_config(dict(_asm_enabled=True, _iast_enabled=True)):
160+
with override_global_config(dict(_iast_enabled=True)):
160161
patch()
161162
_BAD_DIR = "forbidden_dir/"
162163
_BAD_DIR = taint_pyobject(
@@ -195,7 +196,7 @@ def test_popen_wait(tracer, iast_span_defaults):
195196

196197

197198
def test_popen_wait_shell_true(tracer, iast_span_defaults):
198-
with override_global_config(dict(_asm_enabled=True, _iast_enabled=True)):
199+
with override_global_config(dict(_iast_enabled=True)):
199200
patch()
200201
_BAD_DIR = "forbidden_dir/"
201202
_BAD_DIR = taint_pyobject(
@@ -248,7 +249,7 @@ def test_popen_wait_shell_true(tracer, iast_span_defaults):
248249
],
249250
)
250251
def test_osspawn_variants(tracer, iast_span_defaults, function, mode, arguments, tag):
251-
with override_global_config(dict(_asm_enabled=True, _iast_enabled=True)):
252+
with override_global_config(dict(_iast_enabled=True)):
252253
patch()
253254
_BAD_DIR = "forbidden_dir/"
254255
_BAD_DIR = taint_pyobject(
@@ -296,7 +297,7 @@ def test_osspawn_variants(tracer, iast_span_defaults, function, mode, arguments,
296297

297298
@pytest.mark.skipif(sys.platform != "linux", reason="Only for Linux")
298299
def test_multiple_cmdi(tracer, iast_span_defaults):
299-
with override_global_config(dict(_asm_enabled=True, _iast_enabled=True)):
300+
with override_global_config(dict(_iast_enabled=True)):
300301
patch()
301302
_BAD_DIR = taint_pyobject(
302303
pyobject="forbidden_dir/",
@@ -318,3 +319,30 @@ def test_multiple_cmdi(tracer, iast_span_defaults):
318319
assert span_report
319320

320321
assert len(list(span_report.vulnerabilities)) == 2
322+
323+
324+
@pytest.mark.parametrize("num_vuln_expected", [1, 0, 0])
325+
def test_cmdi_deduplication(num_vuln_expected, tracer, iast_span_defaults):
326+
with override_global_config(dict(_iast_enabled=True)), override_env(dict(_DD_APPSEC_DEDUPLICATION_ENABLED="true")):
327+
patch()
328+
_BAD_DIR = "forbidden_dir/"
329+
_BAD_DIR = taint_pyobject(
330+
pyobject=_BAD_DIR,
331+
source_name="test_ossystem",
332+
source_value=_BAD_DIR,
333+
source_origin=OriginType.PARAMETER,
334+
)
335+
assert is_pyobject_tainted(_BAD_DIR)
336+
for _ in range(0, 5):
337+
with tracer.trace("ossystem_test"):
338+
# label test_ossystem
339+
os.system(add_aspect("dir -l ", _BAD_DIR))
340+
341+
span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults)
342+
343+
if num_vuln_expected == 0:
344+
assert span_report is None
345+
else:
346+
assert span_report
347+
348+
assert len(span_report.vulnerabilities) == num_vuln_expected

tests/appsec/iast/taint_sinks/test_insecure_cookie.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from ddtrace.appsec._iast.constants import VULN_NO_SAMESITE_COOKIE
99
from ddtrace.appsec._iast.taint_sinks.insecure_cookie import asm_check_cookies
1010
from ddtrace.internal import core
11+
from tests.utils import override_env
1112

1213

1314
def test_insecure_cookies(iast_span_defaults):
@@ -104,3 +105,18 @@ def test_nosamesite_cookies_strict_no_error(iast_span_defaults):
104105
asm_check_cookies(cookies)
105106
span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults)
106107
assert not span_report
108+
109+
110+
@pytest.mark.parametrize("num_vuln_expected", [3, 0, 0])
111+
def test_insecure_cookies_deduplication(num_vuln_expected, iast_span_defaults):
112+
with override_env(dict(_DD_APPSEC_DEDUPLICATION_ENABLED="true")):
113+
cookies = {"foo": "bar"}
114+
asm_check_cookies(cookies)
115+
span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults)
116+
117+
if num_vuln_expected == 0:
118+
assert span_report is None
119+
else:
120+
assert span_report
121+
122+
assert len(span_report.vulnerabilities) == num_vuln_expected

tests/appsec/iast/taint_sinks/test_path_traversal.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from ddtrace.internal import core
1010
from tests.appsec.iast.aspects.conftest import _iast_patched_module
1111
from tests.appsec.iast.iast_utils import get_line_and_hash
12+
from tests.utils import override_env
1213

1314

1415
FIXTURES_PATH = "tests/appsec/iast/fixtures/taint_sinks/path_traversal.py"
@@ -106,3 +107,30 @@ def test_path_traversal(module, function, iast_span_defaults):
106107
assert vulnerability.evidence.value is None
107108
assert vulnerability.evidence.pattern is None
108109
assert vulnerability.evidence.redacted is None
110+
111+
112+
@pytest.mark.skipif(not python_supported_by_iast(), reason="Python version not supported by IAST")
113+
@pytest.mark.parametrize("num_vuln_expected", [1, 0, 0])
114+
def test_path_traversal_deduplication(num_vuln_expected, iast_span_defaults):
115+
from ddtrace.appsec._iast._taint_tracking import OriginType
116+
from ddtrace.appsec._iast._taint_tracking import taint_pyobject
117+
118+
mod = _iast_patched_module("tests.appsec.iast.fixtures.taint_sinks.path_traversal")
119+
file_path = os.path.join(ROOT_DIR, "../fixtures", "taint_sinks", "not_exists.txt")
120+
121+
with override_env(dict(_DD_APPSEC_DEDUPLICATION_ENABLED="true")):
122+
tainted_string = taint_pyobject(
123+
file_path, source_name="path", source_value=file_path, source_origin=OriginType.PATH
124+
)
125+
126+
for _ in range(0, 5):
127+
mod.pt_open(tainted_string)
128+
129+
span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults)
130+
131+
if num_vuln_expected == 0:
132+
assert span_report is None
133+
else:
134+
assert span_report
135+
136+
assert len(span_report.vulnerabilities) == num_vuln_expected

tests/appsec/iast/taint_sinks/test_sql_injection.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from ddtrace.internal import core
77
from tests.appsec.iast.aspects.conftest import _iast_patched_module
88
from tests.appsec.iast.iast_utils import get_line_and_hash
9+
from tests.utils import override_env
910

1011

1112
try:
@@ -34,6 +35,7 @@ def test_sql_injection(iast_span_defaults):
3435
span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults)
3536
assert span_report
3637

38+
assert len(span_report.vulnerabilities) == 1
3739
vulnerability = list(span_report.vulnerabilities)[0]
3840
source = span_report.sources[0]
3941
assert vulnerability.type == VULN_SQL_INJECTION
@@ -49,3 +51,28 @@ def test_sql_injection(iast_span_defaults):
4951
assert vulnerability.location.line == line
5052
assert vulnerability.location.path == FIXTURES_PATH
5153
assert vulnerability.hash == hash_value
54+
55+
56+
@pytest.mark.skipif(not python_supported_by_iast(), reason="Python version not supported by IAST")
57+
@pytest.mark.parametrize("num_vuln_expected", [1, 0, 0])
58+
def test_sql_injection_deduplication(num_vuln_expected, iast_span_defaults):
59+
mod = _iast_patched_module("tests.appsec.iast.fixtures.taint_sinks.sql_injection")
60+
with override_env(dict(_DD_APPSEC_DEDUPLICATION_ENABLED="true")):
61+
table = taint_pyobject(
62+
pyobject="students",
63+
source_name="test_ossystem",
64+
source_value="students",
65+
source_origin=OriginType.PARAMETER,
66+
)
67+
assert is_pyobject_tainted(table)
68+
for _ in range(0, 5):
69+
mod.sqli_simple(table)
70+
71+
span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults)
72+
73+
if num_vuln_expected == 0:
74+
assert span_report is None
75+
else:
76+
assert span_report
77+
78+
assert len(span_report.vulnerabilities) == num_vuln_expected

0 commit comments

Comments
 (0)