Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions ddtrace/_trace/processor/resource_renaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from urllib.parse import urlparse

from ddtrace._trace.processor import SpanProcessor
from ddtrace._trace.span import Span
from ddtrace.ext import SpanTypes
from ddtrace.ext import http
from ddtrace.internal.logger import get_logger
Expand All @@ -13,7 +14,7 @@
log = get_logger(__name__)


class ResourceRenamingProcessor(SpanProcessor):
class SimplifiedEndpointComputer:
def __init__(self):
self._INT_RE = re.compile(r"^[1-9][0-9]+$")
self._INT_ID_RE = re.compile(r"^(?=.*[0-9].*)[0-9._-]{3,}$")
Expand All @@ -35,7 +36,7 @@ def _compute_simplified_endpoint_path_element(self, elem: str) -> str:
return "{param:str}"
return elem

def _compute_simplified_endpoint(self, url: Optional[str]) -> str:
def from_url(self, url: Optional[str]) -> str:
"""Extracts and simplifies the path from an HTTP URL."""
if not url:
return "/"
Expand All @@ -62,16 +63,21 @@ def _compute_simplified_endpoint(self, url: Optional[str]) -> str:
elements = [self._compute_simplified_endpoint_path_element(elem) for elem in elements]
return "/" + "/".join(elements)

def on_span_start(self, span):

class ResourceRenamingProcessor(SpanProcessor):
def __init__(self):
self.simplified_endpoint_computer = SimplifiedEndpointComputer()

def on_span_start(self, span: Span):
pass

def on_span_finish(self, span):
def on_span_finish(self, span: Span):
if not span._is_top_level or span.span_type not in (SpanTypes.WEB, SpanTypes.HTTP, SpanTypes.SERVERLESS):
return

route = span.get_tag(http.ROUTE)

if not route or config._trace_resource_renaming_always_simplified_endpoint:
url = span.get_tag(http.URL)
endpoint = self._compute_simplified_endpoint(url)
endpoint = self.simplified_endpoint_computer.from_url(url)
span.set_tag_str(http.ENDPOINT, endpoint)
15 changes: 13 additions & 2 deletions ddtrace/appsec/_api_security/api_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,13 @@
from typing import Optional

from ddtrace._trace._limits import MAX_SPAN_META_VALUE_LEN
from ddtrace._trace.processor.resource_renaming import SimplifiedEndpointComputer
from ddtrace.appsec._asm_request_context import ASM_Environment
from ddtrace.appsec._constants import API_SECURITY
from ddtrace.appsec._constants import SPAN_DATA_NAMES
from ddtrace.appsec._trace_utils import _asm_manual_keep
import ddtrace.constants as constants
from ddtrace.ext import http
from ddtrace.internal import logger as ddlogger
from ddtrace.internal.service import Service
from ddtrace.settings.asm import config as asm_config
Expand Down Expand Up @@ -77,6 +80,7 @@ def __init__(self) -> None:

log.debug("%s initialized", self.__class__.__name__)
self._hashtable: collections.OrderedDict[int, float] = collections.OrderedDict()
self.simplified_endpoint_computer = SimplifiedEndpointComputer()

import ddtrace.appsec._asm_request_context as _asm_request_context
import ddtrace.appsec._metrics as _metrics
Expand All @@ -91,7 +95,7 @@ def _stop_service(self) -> None:
def _start_service(self) -> None:
self._asm_context.add_context_callback(self._schema_callback, global_callback=True)

def _should_collect_schema(self, env, priority: int) -> Optional[bool]:
def _should_collect_schema(self, env: ASM_Environment, priority: int) -> Optional[bool]:
"""
Rate limit per route.

Expand All @@ -103,8 +107,15 @@ def _should_collect_schema(self, env, priority: int) -> Optional[bool]:
if priority <= 0 and asm_config._apm_tracing_enabled:
return False

method = env.waf_addresses.get(SPAN_DATA_NAMES.REQUEST_METHOD)
route = env.waf_addresses.get(SPAN_DATA_NAMES.REQUEST_ROUTE)
if route is None:
endpoint = env.entry_span.get_tag(http.ENDPOINT)
if endpoint is None:
url = env.entry_span.get_tag(http.URL)
endpoint = self.simplified_endpoint_computer.from_url(url)
route = endpoint

method = env.waf_addresses.get(SPAN_DATA_NAMES.REQUEST_METHOD)
status = env.waf_addresses.get(SPAN_DATA_NAMES.RESPONSE_STATUS)
# Framework is not fully supported
if method is None or route is None or status is None:
Expand Down
58 changes: 58 additions & 0 deletions tests/appsec/appsec/api_security/test_api_security_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from ddtrace.constants import AUTO_REJECT
from ddtrace.constants import USER_KEEP
from ddtrace.constants import USER_REJECT
from ddtrace.ext import http
from tests.utils import override_global_config


Expand Down Expand Up @@ -251,3 +252,60 @@ def test_schema_callback_parse_response_body_disabled(self, api_manager, mock_en

assert len(mock_environment.entry_span._meta) == 0
api_manager._metrics._report_api_security.assert_called_with(True, 0)

def test_should_collect_schema_route_fallbacks_to_endpoint(self, mock_environment):
"""Test that _should_collect_schema falls back to endpoint tags when route is missing."""
with override_global_config(
values=dict(
_asm_enabled=True,
_api_security_enabled=True,
_apm_tracing_enabled=True,
_api_security_parse_response_body=True,
)
):
manager = APIManager()
manager._appsec_processor = MagicMock()
manager._asm_context = MagicMock()
manager._metrics = MagicMock()

mock_environment.entry_span.get_tag = lambda name: "/span-endpoint" if name == http.ENDPOINT else None
mock_environment.waf_addresses = {
SPAN_DATA_NAMES.REQUEST_ROUTE: None,
SPAN_DATA_NAMES.REQUEST_METHOD: "GET",
SPAN_DATA_NAMES.RESPONSE_STATUS: 200,
}

# First request should collect
assert manager._should_collect_schema(mock_environment, USER_KEEP)
# Sencond one should discarded
assert not manager._should_collect_schema(mock_environment, USER_KEEP)

def test_should_collect_schema_route_missing_computes_endpoint(self, mock_environment):
"""Test that _should_collect_schema computes the endpoint value when route and endpoint tags are missing."""
with override_global_config(
values=dict(
_asm_enabled=True,
_api_security_enabled=True,
_apm_tracing_enabled=True,
_api_security_parse_response_body=True,
)
):
manager = APIManager()
manager._appsec_processor = MagicMock()
manager._asm_context = MagicMock()
manager._metrics = MagicMock()

def get_tag(name):
return "https://ddtrace.dog/span-endpoint" if name == http.URL else None

mock_environment.entry_span.get_tag = get_tag
mock_environment.waf_addresses = {
SPAN_DATA_NAMES.REQUEST_ROUTE: None,
SPAN_DATA_NAMES.REQUEST_METHOD: "GET",
SPAN_DATA_NAMES.RESPONSE_STATUS: 200,
}

# First request should collect
assert manager._should_collect_schema(mock_environment, USER_KEEP)
# Sencond one should discarded
assert not manager._should_collect_schema(mock_environment, USER_KEEP)
7 changes: 3 additions & 4 deletions tests/tracer/test_resource_renaming.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pytest

from ddtrace._trace.processor.resource_renaming import ResourceRenamingProcessor
from ddtrace._trace.processor.resource_renaming import SimplifiedEndpointComputer
from ddtrace.ext import SpanTypes
from ddtrace.ext import http
from ddtrace.trace import Context
Expand Down Expand Up @@ -44,8 +45,7 @@ class TestResourceRenaming:
],
)
def test_compute_simplified_endpoint_path_element(self, elem, expected):
processor = ResourceRenamingProcessor()
result = processor._compute_simplified_endpoint_path_element(elem)
result = SimplifiedEndpointComputer()._compute_simplified_endpoint_path_element(elem)
assert result == expected

@pytest.mark.parametrize(
Expand Down Expand Up @@ -89,8 +89,7 @@ def test_compute_simplified_endpoint_path_element(self, elem, expected):
],
)
def test_compute_simplified_endpoint(self, url, expected):
processor = ResourceRenamingProcessor()
result = processor._compute_simplified_endpoint(url)
result = SimplifiedEndpointComputer().from_url(url)
assert result == expected

def test_processor_with_route(self):
Expand Down
Loading