DataDog
diff --git a/‎ddtrace/appsec/_iast/_overhead_control_engine.py‎
Lines changed: 11 additions & 0 deletions b/‎ddtrace/appsec/_iast/_overhead_control_engine.py‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎ddtrace/appsec/_iast/taint_sinks/_base.py‎
Lines changed: 64 additions & 39 deletions b/‎ddtrace/appsec/_iast/taint_sinks/_base.py‎
Lines changed: 64 additions & 39 deletions
diff --git a/‎riotfile.py‎
Lines changed: 1 addition & 0 deletions b/‎riotfile.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/appsec/iast/taint_sinks/test_command_injection.py‎
Lines changed: 35 additions & 7 deletions b/‎tests/appsec/iast/taint_sinks/test_command_injection.py‎
Lines changed: 35 additions & 7 deletions
diff --git a/‎tests/appsec/iast/taint_sinks/test_insecure_cookie.py‎
Lines changed: 16 additions & 0 deletions b/‎tests/appsec/iast/taint_sinks/test_insecure_cookie.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎tests/appsec/iast/taint_sinks/test_path_traversal.py‎
Lines changed: 28 additions & 0 deletions b/‎tests/appsec/iast/taint_sinks/test_path_traversal.py‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎tests/appsec/iast/taint_sinks/test_sql_injection.py‎
Lines changed: 27 additions & 0 deletions b/‎tests/appsec/iast/taint_sinks/test_sql_injection.py‎
Lines changed: 27 additions & 0 deletions
@@ -55,6 +55,17 @@ def acquire_quota(cls):
         cls._lock.release()
         return result
 
+    @classmethod
+    def increment_quota(cls):
+        # type: () -> bool
+        cls._lock.acquire()
+        result = False
+        if cls._vulnerability_quota < MAX_VULNERABILITIES_PER_REQUEST:
+            cls._vulnerability_quota += 1
+            result = True
+        cls._lock.release()
+        return result
+
     @classmethod
     def has_quota(cls):
         # type: () -> bool
 
@@ -1,4 +1,5 @@
 import os
+import time
 from typing import TYPE_CHECKING
 from typing import cast
 
@@ -10,6 +11,7 @@
 from ddtrace.internal.utils.cache import LFUCache
 from ddtrace.settings.asm import config as asm_config
 
+from ..._deduplications import deduplication
 from .. import oce
 from .._overhead_control_engine import Operation
 from .._utils import _has_to_scrub
@@ -44,6 +46,21 @@
 CWD = os.path.abspath(os.getcwd())
 
 
+class taint_sink_deduplication(deduplication):
+    def __call__(self, *args, **kwargs):
+        # we skip 0, 1 and last position because its the cls, span and sources respectively
+        result = None
+        if self.is_deduplication_enabled() is False:
+            result = self.func(*args, **kwargs)
+        else:
+            raw_log_hash = hash("".join([str(arg) for arg in args[2:-1]]))
+            last_reported_timestamp = self.get_last_time_reported(raw_log_hash)
+            if time.time() > last_reported_timestamp:
+                result = self.func(*args, **kwargs)
+                self.reported_logs[raw_log_hash] = time.time() + self._time_lapse
+        return result
+
+
 def _check_positions_contained(needle, container):
     needle_start, needle_end = needle
     container_start, container_end = container
@@ -81,13 +98,51 @@ def wrapper(wrapped, instance, args, kwargs):
 
         return wrapper
 
+    @classmethod
+    @taint_sink_deduplication
+    def _prepare_report(cls, span, vulnerability_type, evidence, file_name, line_number, sources):
+        report = core.get_item(IAST.CONTEXT_KEY, span=span)
+        if report:
+            report.vulnerabilities.add(
+                Vulnerability(
+                    type=vulnerability_type,
+                    evidence=evidence,
+                    location=Location(path=file_name, line=line_number, spanId=span.span_id),
+                )
+            )
+
+        else:
+            report = IastSpanReporter(
+                vulnerabilities={
+                    Vulnerability(
+                        type=vulnerability_type,
+                        evidence=evidence,
+                        location=Location(path=file_name, line=line_number, spanId=span.span_id),
+                    )
+                }
+            )
+        if sources:
+
+            def cast_value(value):
+                if isinstance(value, (bytes, bytearray)):
+                    value_decoded = value.decode("utf-8")
+                else:
+                    value_decoded = value
+                return value_decoded
+
+            report.sources = [Source(origin=x.origin, name=x.name, value=cast_value(x.value)) for x in sources]
+
+        redacted_report = cls._redacted_report_cache.get(
+            hash(report), lambda x: cls._redact_report(cast(IastSpanReporter, report))
+        )
+        core.set_item(IAST.CONTEXT_KEY, redacted_report, span=span)
+
+        return True
+
     @classmethod
     def report(cls, evidence_value="", sources=None):
         # type: (Union[Text|List[Dict[str, Any]]], Optional[List[Source]]) -> None
-        """Build a IastSpanReporter instance to report it in the `AppSecIastSpanProcessor` as a string JSON
-
-        TODO: check deduplications if DD_IAST_DEDUPLICATION_ENABLED is true
-        """
+        """Build a IastSpanReporter instance to report it in the `AppSecIastSpanProcessor` as a string JSON"""
 
         if cls.acquire_quota():
             if not tracer or not hasattr(tracer, "current_root_span"):
@@ -131,41 +186,11 @@ def report(cls, evidence_value="", sources=None):
                 log.debug("Unexpected evidence_value type: %s", type(evidence_value))
                 evidence = Evidence(value="")
 
-            report = core.get_item(IAST.CONTEXT_KEY, span=span)
-            if report:
-                report.vulnerabilities.add(
-                    Vulnerability(
-                        type=cls.vulnerability_type,
-                        evidence=evidence,
-                        location=Location(path=file_name, line=line_number, spanId=span.span_id),
-                    )
-                )
-
-            else:
-                report = IastSpanReporter(
-                    vulnerabilities={
-                        Vulnerability(
-                            type=cls.vulnerability_type,
-                            evidence=evidence,
-                            location=Location(path=file_name, line=line_number, spanId=span.span_id),
-                        )
-                    }
-                )
-            if sources:
-
-                def cast_value(value):
-                    if isinstance(value, (bytes, bytearray)):
-                        value_decoded = value.decode("utf-8")
-                    else:
-                        value_decoded = value
-                    return value_decoded
-
-                report.sources = [Source(origin=x.origin, name=x.name, value=cast_value(x.value)) for x in sources]
-
-            redacted_report = cls._redacted_report_cache.get(
-                hash(report), lambda x: cls._redact_report(cast(IastSpanReporter, report))
-            )
-            core.set_item(IAST.CONTEXT_KEY, redacted_report, span=span)
+            result = cls._prepare_report(span, cls.vulnerability_type, evidence, file_name, line_number, sources)
+            # If result is None that's mean deduplication raises and no vulnerability wasn't reported, with that,
+            # we need to restore the quota
+            if not result:
+                cls.increment_quota()
 
     @classmethod
     def _extract_sensitive_tokens(cls, report):
 
@@ -147,6 +147,7 @@ def select_pys(min_version=MIN_PYTHON_VERSION, max_version=MAX_PYTHON_VERSION):
             },
             env={
                 "DD_IAST_REQUEST_SAMPLING": "100",  # Override default 30% to analyze all IAST requests
+                "_DD_APPSEC_DEDUPLICATION_ENABLED": "false",
             },
         ),
         Venv(
 
@@ -12,6 +12,7 @@
 from ddtrace.appsec._iast.taint_sinks.command_injection import unpatch
 from ddtrace.internal import core
 from tests.appsec.iast.iast_utils import get_line_and_hash
+from tests.utils import override_env
 from tests.utils import override_global_config
 
 
@@ -39,7 +40,7 @@ def setup():
 
 
 def test_ossystem(tracer, iast_span_defaults):
-    with override_global_config(dict(_asm_enabled=True, _iast_enabled=True)):
+    with override_global_config(dict(_iast_enabled=True)):
         patch()
         _BAD_DIR = "forbidden_dir/"
         _BAD_DIR = taint_pyobject(
@@ -78,7 +79,7 @@ def test_ossystem(tracer, iast_span_defaults):
 
 
 def test_communicate(tracer, iast_span_defaults):
-    with override_global_config(dict(_asm_enabled=True, _iast_enabled=True)):
+    with override_global_config(dict(_iast_enabled=True)):
         patch()
         _BAD_DIR = "forbidden_dir/"
         _BAD_DIR = taint_pyobject(
@@ -118,7 +119,7 @@ def test_communicate(tracer, iast_span_defaults):
 
 
 def test_run(tracer, iast_span_defaults):
-    with override_global_config(dict(_asm_enabled=True, _iast_enabled=True)):
+    with override_global_config(dict(_iast_enabled=True)):
         patch()
         _BAD_DIR = "forbidden_dir/"
         _BAD_DIR = taint_pyobject(
@@ -156,7 +157,7 @@ def test_run(tracer, iast_span_defaults):
 
 
 def test_popen_wait(tracer, iast_span_defaults):
-    with override_global_config(dict(_asm_enabled=True, _iast_enabled=True)):
+    with override_global_config(dict(_iast_enabled=True)):
         patch()
         _BAD_DIR = "forbidden_dir/"
         _BAD_DIR = taint_pyobject(
@@ -195,7 +196,7 @@ def test_popen_wait(tracer, iast_span_defaults):
 
 
 def test_popen_wait_shell_true(tracer, iast_span_defaults):
-    with override_global_config(dict(_asm_enabled=True, _iast_enabled=True)):
+    with override_global_config(dict(_iast_enabled=True)):
         patch()
         _BAD_DIR = "forbidden_dir/"
         _BAD_DIR = taint_pyobject(
@@ -248,7 +249,7 @@ def test_popen_wait_shell_true(tracer, iast_span_defaults):
     ],
 )
 def test_osspawn_variants(tracer, iast_span_defaults, function, mode, arguments, tag):
-    with override_global_config(dict(_asm_enabled=True, _iast_enabled=True)):
+    with override_global_config(dict(_iast_enabled=True)):
         patch()
         _BAD_DIR = "forbidden_dir/"
         _BAD_DIR = taint_pyobject(
@@ -296,7 +297,7 @@ def test_osspawn_variants(tracer, iast_span_defaults, function, mode, arguments,
 
 @pytest.mark.skipif(sys.platform != "linux", reason="Only for Linux")
 def test_multiple_cmdi(tracer, iast_span_defaults):
-    with override_global_config(dict(_asm_enabled=True, _iast_enabled=True)):
+    with override_global_config(dict(_iast_enabled=True)):
         patch()
         _BAD_DIR = taint_pyobject(
             pyobject="forbidden_dir/",
@@ -318,3 +319,30 @@ def test_multiple_cmdi(tracer, iast_span_defaults):
         assert span_report
 
         assert len(list(span_report.vulnerabilities)) == 2
+
+
+@pytest.mark.parametrize("num_vuln_expected", [1, 0, 0])
+def test_cmdi_deduplication(num_vuln_expected, tracer, iast_span_defaults):
+    with override_global_config(dict(_iast_enabled=True)), override_env(dict(_DD_APPSEC_DEDUPLICATION_ENABLED="true")):
+        patch()
+        _BAD_DIR = "forbidden_dir/"
+        _BAD_DIR = taint_pyobject(
+            pyobject=_BAD_DIR,
+            source_name="test_ossystem",
+            source_value=_BAD_DIR,
+            source_origin=OriginType.PARAMETER,
+        )
+        assert is_pyobject_tainted(_BAD_DIR)
+        for _ in range(0, 5):
+            with tracer.trace("ossystem_test"):
+                # label test_ossystem
+                os.system(add_aspect("dir -l ", _BAD_DIR))
+
+        span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults)
+
+        if num_vuln_expected == 0:
+            assert span_report is None
+        else:
+            assert span_report
+
+            assert len(span_report.vulnerabilities) == num_vuln_expected
@@ -8,6 +8,7 @@
 from ddtrace.appsec._iast.constants import VULN_NO_SAMESITE_COOKIE
 from ddtrace.appsec._iast.taint_sinks.insecure_cookie import asm_check_cookies
 from ddtrace.internal import core
+from tests.utils import override_env
 
 
 def test_insecure_cookies(iast_span_defaults):
@@ -104,3 +105,18 @@ def test_nosamesite_cookies_strict_no_error(iast_span_defaults):
     asm_check_cookies(cookies)
     span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults)
     assert not span_report
+
+
+@pytest.mark.parametrize("num_vuln_expected", [3, 0, 0])
+def test_insecure_cookies_deduplication(num_vuln_expected, iast_span_defaults):
+    with override_env(dict(_DD_APPSEC_DEDUPLICATION_ENABLED="true")):
+        cookies = {"foo": "bar"}
+        asm_check_cookies(cookies)
+        span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults)
+
+        if num_vuln_expected == 0:
+            assert span_report is None
+        else:
+            assert span_report
+
+            assert len(span_report.vulnerabilities) == num_vuln_expected
@@ -9,6 +9,7 @@
 from ddtrace.internal import core
 from tests.appsec.iast.aspects.conftest import _iast_patched_module
 from tests.appsec.iast.iast_utils import get_line_and_hash
+from tests.utils import override_env
 
 
 FIXTURES_PATH = "tests/appsec/iast/fixtures/taint_sinks/path_traversal.py"
@@ -106,3 +107,30 @@ def test_path_traversal(module, function, iast_span_defaults):
     assert vulnerability.evidence.value is None
     assert vulnerability.evidence.pattern is None
     assert vulnerability.evidence.redacted is None
+
+
+@pytest.mark.skipif(not python_supported_by_iast(), reason="Python version not supported by IAST")
+@pytest.mark.parametrize("num_vuln_expected", [1, 0, 0])
+def test_path_traversal_deduplication(num_vuln_expected, iast_span_defaults):
+    from ddtrace.appsec._iast._taint_tracking import OriginType
+    from ddtrace.appsec._iast._taint_tracking import taint_pyobject
+
+    mod = _iast_patched_module("tests.appsec.iast.fixtures.taint_sinks.path_traversal")
+    file_path = os.path.join(ROOT_DIR, "../fixtures", "taint_sinks", "not_exists.txt")
+
+    with override_env(dict(_DD_APPSEC_DEDUPLICATION_ENABLED="true")):
+        tainted_string = taint_pyobject(
+            file_path, source_name="path", source_value=file_path, source_origin=OriginType.PATH
+        )
+
+        for _ in range(0, 5):
+            mod.pt_open(tainted_string)
+
+        span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults)
+
+        if num_vuln_expected == 0:
+            assert span_report is None
+        else:
+            assert span_report
+
+            assert len(span_report.vulnerabilities) == num_vuln_expected
@@ -6,6 +6,7 @@
 from ddtrace.internal import core
 from tests.appsec.iast.aspects.conftest import _iast_patched_module
 from tests.appsec.iast.iast_utils import get_line_and_hash
+from tests.utils import override_env
 
 
 try:
@@ -34,6 +35,7 @@ def test_sql_injection(iast_span_defaults):
     span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults)
     assert span_report
 
+    assert len(span_report.vulnerabilities) == 1
     vulnerability = list(span_report.vulnerabilities)[0]
     source = span_report.sources[0]
     assert vulnerability.type == VULN_SQL_INJECTION
@@ -49,3 +51,28 @@ def test_sql_injection(iast_span_defaults):
     assert vulnerability.location.line == line
     assert vulnerability.location.path == FIXTURES_PATH
     assert vulnerability.hash == hash_value
+
+
+@pytest.mark.skipif(not python_supported_by_iast(), reason="Python version not supported by IAST")
+@pytest.mark.parametrize("num_vuln_expected", [1, 0, 0])
+def test_sql_injection_deduplication(num_vuln_expected, iast_span_defaults):
+    mod = _iast_patched_module("tests.appsec.iast.fixtures.taint_sinks.sql_injection")
+    with override_env(dict(_DD_APPSEC_DEDUPLICATION_ENABLED="true")):
+        table = taint_pyobject(
+            pyobject="students",
+            source_name="test_ossystem",
+            source_value="students",
+            source_origin=OriginType.PARAMETER,
+        )
+        assert is_pyobject_tainted(table)
+        for _ in range(0, 5):
+            mod.sqli_simple(table)
+
+        span_report = core.get_item(IAST.CONTEXT_KEY, span=iast_span_defaults)
+
+        if num_vuln_expected == 0:
+            assert span_report is None
+        else:
+            assert span_report
+
+            assert len(span_report.vulnerabilities) == num_vuln_expected