Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions docs/edit/add-new-test.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,10 @@ def test_waf_attack_detection(self):
interfaces.library.assert_waf_attack(r, rule="sqli-detection")

# Ensure AppSec data reaches agent
def appsec_validator(data, payload, chunk, span, appsec_data):
def appsec_validator(span, appsec_data):
return "triggers" in appsec_data
interfaces.agent.validate_appsec(r, appsec_validator)

interfaces.library.validate_one_appsec(r, appsec_validator)
```

#### Custom Validation with Validators
Expand Down
1 change: 0 additions & 1 deletion docs/internals/agent-interface-validation-methods.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ class MyTestClass:
### Data Retrieval Methods
- `get_spans(request=None)` - Get spans submitted to backend, optionally filtered by request
- `get_spans_list(request)` - Get spans for a specific request as a list
- `get_appsec_data(request)` - Get AppSec data from spans for a specific request
- `get_telemetry_data(*, flatten_message_batches=True)` - Get telemetry data sent to backend
- `get_profiling_data()` - Get profiling data from `/api/v2/profile` endpoint
- `get_metrics()` - Get metrics submitted to `/api/v2/series` endpoint
Expand Down
19 changes: 19 additions & 0 deletions utils/dd_types/_datadog_agent_trace.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from abc import ABC, abstractmethod
from enum import StrEnum
from typing import Any
from ._utils import get_rid_from_span_data


class AgentTraceFormat(StrEnum):
Expand Down Expand Up @@ -124,6 +125,16 @@ def __getitem__(self, key: str):
def meta(self) -> dict[str, Any]:
pass

def get_rid(self) -> str | None:
"""Returns the request ID generated by the weblog, or None if the trace is not linked to any HTTP request"""

return get_rid_from_span_data(self.get_span_type(), self.meta, self.metrics)

@property
@abstractmethod
def metrics(self) -> dict[str, Any]:
pass

@abstractmethod
def get_span_type(self) -> str:
pass
Expand Down Expand Up @@ -156,6 +167,10 @@ def __getitem__(self, key: str):
def meta(self) -> dict[str, Any]:
return self.raw_span["meta"]

@property
def metrics(self) -> dict[str, Any]:
return self.raw_span.get("metrics", {})

def get_span_type(self) -> str:
return self.raw_span.get("type", "")

Expand Down Expand Up @@ -183,6 +198,10 @@ def __getitem__(self, key: str):
def meta(self) -> dict[str, Any]:
return self.raw_span["attributes"]

@property
def metrics(self) -> dict[str, Any]:
return self.raw_span["attributes"]

def get_span_type(self) -> str:
return self.raw_span.get("typeRef", "")

Expand Down
21 changes: 21 additions & 0 deletions utils/dd_types/_datadog_library_trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from enum import StrEnum
from typing import Any

from ._utils import get_rid_from_span_data


class LibraryTraceFormat(StrEnum):
"""Describe which format is used to carry trace payloads from the library to the agent
Expand Down Expand Up @@ -141,6 +143,16 @@ def __getitem__(self, key: str):
def meta(self) -> dict[str, Any]:
pass

@property
@abstractmethod
def metrics(self) -> dict[str, Any]:
pass

def get_rid(self) -> str | None:
"""Returns the request ID generated by the weblog, or None if the trace is not linked to any HTTP request"""

return get_rid_from_span_data(self.raw_span.get("type", ""), self.meta, self.metrics)


class DataDogLibrarySpanLegacy(DataDogLibrarySpan):
def get(self, key: str, default: Any = None): # noqa: ANN401
Expand All @@ -154,6 +166,10 @@ def meta(self) -> dict[str, Any]:
assert "meta" in self.raw_span
return self.raw_span["meta"]

@property
def metrics(self) -> dict[str, Any]:
return self.raw_span.get("metrics", {})


class DataDogLibrarySpanV1(DataDogLibrarySpan):
def __contains__(self, key: str) -> bool:
Expand Down Expand Up @@ -187,3 +203,8 @@ def __getitem__(self, key: str):
def meta(self) -> dict[str, Any]:
assert "attributes" in self.raw_span
return self.raw_span["attributes"]

@property
def metrics(self) -> dict[str, Any]:
assert "attributes" in self.raw_span
return self.raw_span["attributes"]
31 changes: 31 additions & 0 deletions utils/dd_types/_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from utils.tools import get_rid_from_user_agent


def get_rid_from_span_data(span_type: str, meta: dict, metrics: dict) -> str | None:
user_agent = None

if span_type == "rpc":
user_agent = meta.get("grpc.metadata.user-agent")
# java does not fill this tag; it uses the normal http tags

if not user_agent and metrics.get("_dd.top_level") == 1.0:
# The top level span (aka root span) is mark via the _dd.top_level tag by the tracers
user_agent = meta.get("http.request.headers.user-agent")

if not user_agent: # try something for .NET
user_agent = meta.get("http_request_headers_user-agent")

if not user_agent:
# cpp tracer
user_agent = meta.get("http_user_agent")

if not user_agent: # last hope
user_agent = meta.get("http.useragent")

if not user_agent: # last last hope (java opentelemetry autoinstrumentation)
user_agent = meta.get("user_agent.original")

if not user_agent: # last last last hope (python opentelemetry autoinstrumentation)
user_agent = meta.get("http.user_agent")

return get_rid_from_user_agent(user_agent)
36 changes: 2 additions & 34 deletions utils/interfaces/_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import threading

from utils.dd_types import DataDogAgentTrace, DataDogAgentSpan, AgentTraceFormat
from utils.tools import get_rid_from_span
from utils._logger import logger
from utils.interfaces._core import ProxyBasedInterfaceValidator
from utils.interfaces._misc_validators import HeadersPresenceValidator
Expand All @@ -27,40 +26,9 @@ def ingest_file(self, src_path: str):
self.ready.set()
return super().ingest_file(src_path)

def get_appsec_data(self, request: HttpResponse):
rid = request.get_rid()

for data in self.get_data(path_filters="/api/v0.2/traces"):
if "tracerPayloads" not in data["request"]["content"]:
continue

content = data["request"]["content"]["tracerPayloads"]

for payload in content:
for chunk in payload["chunks"]:
for span in chunk["spans"]:
appsec_data = span.get("meta", {}).get("_dd.appsec.json", None) or span.get(
"meta_struct", {}
).get("appsec", None)
if appsec_data is None:
continue

if rid is None:
yield data, payload, chunk, span, appsec_data
elif get_rid_from_span(span) == rid:
logger.debug(f"Found span with rid={rid} in {data['log_filename']}")
yield data, payload, chunk, span, appsec_data

def get_profiling_data(self):
yield from self.get_data(path_filters="/api/v2/profile")

def validate_appsec(self, request: HttpResponse, validator: Callable):
for data, payload, chunk, span, appsec_data in self.get_appsec_data(request=request):
if validator(data, payload, chunk, span, appsec_data):
return

raise ValueError("No data validate this test")

def get_telemetry_data(self, *, flatten_message_batches: bool = True):
all_data = self.get_data(path_filters="/api/v2/apmtelemetry")
if flatten_message_batches:
Expand Down Expand Up @@ -129,7 +97,7 @@ def get_traces(self, request: HttpResponse | None = None) -> Generator[tuple[dic
yield data, trace
else:
for span in trace.spans:
if get_rid_from_span(span) == rid:
if span.get_rid() == rid:
logger.debug(f"Found a span in {trace.log_filename}")
yield data, trace
break
Expand All @@ -149,7 +117,7 @@ def get_spans(self, request: HttpResponse | None = None) -> Generator[tuple[dict

for data, trace in self.get_traces(request=request):
for span in trace.spans:
if rid is None or get_rid_from_span(span) == rid:
if rid is None or span.get_rid() == rid:
yield data, span

@staticmethod
Expand Down
3 changes: 1 addition & 2 deletions utils/interfaces/_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from requests.exceptions import JSONDecodeError
from utils.interfaces._core import ProxyBasedInterfaceValidator
from utils.interfaces._library.core import LibraryInterfaceValidator
from utils.tools import get_rid_from_span
from utils._logger import logger
from utils._weblog import HttpResponse

Expand Down Expand Up @@ -69,7 +68,7 @@ def load_data_from_logs(self):
def _init_rid_to_library_trace_ids(self):
# Map each request ID to the spans created and submitted during that request call.
for _, span in self.library_interface.get_root_spans():
rid = get_rid_from_span(span)
rid = span.get_rid()

if not self.rid_to_library_trace_ids.get(rid):
self.rid_to_library_trace_ids[rid] = [span["trace_id"]]
Expand Down
2 changes: 1 addition & 1 deletion utils/interfaces/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def validate_all(
if not allow_no_data and data_is_missing:
raise ValueError(f"No data has been observed on {path_filters}")

def wait_for(self, wait_for_function: Callable, timeout: int):
def wait_for(self, wait_for_function: Callable[[dict], bool], timeout: int) -> None:
if self.replay:
return

Expand Down
17 changes: 0 additions & 17 deletions utils/interfaces/_library/_utils.py

This file was deleted.

30 changes: 16 additions & 14 deletions utils/interfaces/_library/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import json
import threading

from utils.tools import get_rid_from_user_agent, get_rid_from_span
from utils.tools import get_rid_from_user_agent
from utils._logger import logger
from utils.dd_constants import RemoteConfigApplyState, Capabilities
from utils.dd_types import DataDogLibrarySpan, DataDogLibraryTrace
Expand Down Expand Up @@ -80,31 +80,33 @@ def get_traces(

if data["path"] in ("/v0.4/traces", "/v0.5/traces"):
for trace in content:
result = DataDogLibraryTrace.from_legacy(data, trace)
if rid is None:
trace_found = True
yield data, DataDogLibraryTrace.from_legacy(data, trace)
yield data, result
else:
for span in trace:
if rid == get_rid_from_span(span):
logger.debug(f"Found a trace in {data['log_filename']}")
for span in result.spans:
if rid == span.get_rid():
logger.debug(f"Found a trace in {result.log_filename}")
trace_found = True
yield data, DataDogLibraryTrace.from_legacy(data, trace)
yield data, result
break

elif data["path"] == "/v1.0/traces":
if not content.get("chunks"):
continue

for trace in content.get("chunks"):
result = DataDogLibraryTrace.from_v1(data, trace)
if rid is None:
trace_found = True
yield data, DataDogLibraryTrace.from_v1(data, trace)
yield data, result
else:
for span in trace.get("spans"):
if rid == get_rid_from_span(span):
logger.debug(f"Found a trace in {data['log_filename']}")
for span in result.spans:
if rid == span.get_rid():
logger.debug(f"Found a trace in {result.log_filename}")
trace_found = True
yield data, DataDogLibraryTrace.from_v1(data, trace)
yield data, result
break

else:
Expand All @@ -124,11 +126,11 @@ def get_spans(self, request: HttpResponse | None = None, *, full_trace: bool = F
rid = request.get_rid() if request else None

for data, trace in self.get_traces(request=request):
for span in trace:
for span in trace.spans:
if rid is None or full_trace:
yield data, trace, span
elif rid == get_rid_from_span(span):
logger.debug(f"Found a span in {data['log_filename']}")
elif rid == span.get_rid():
logger.debug(f"Found a span in {trace.log_filename}")
yield data, trace, span

def get_root_spans(
Expand Down
43 changes: 41 additions & 2 deletions utils/interfaces/_test_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from utils.interfaces._core import InterfaceValidator
from utils._logger import logger
from utils._weblog import HttpResponse
from utils.tools import get_rid_from_span
from utils.tools import get_rid_from_user_agent


class _TestAgentInterfaceValidator(InterfaceValidator):
Expand Down Expand Up @@ -42,7 +42,7 @@ def get_traces(self, request: HttpResponse | None = None):

for trace in self._data_traces_list:
for span in trace:
if rid == get_rid_from_span(span):
if rid == _get_rid_from_span(span):
return span
return None

Expand Down Expand Up @@ -163,3 +163,42 @@ def get_telemetry_configurations(self, service_name: str | None = None, runtime_
for config in config_list:
configurations[config["name"]] = config
return configurations


def _get_rid_from_span(span: dict) -> str | None:
meta = span.get("meta", {})
metrics = span.get("metrics", {})

if span.get("attributes") is not None:
# This is a v1 span so it won't have a meta or metrics field
# To reuse the logic here just override meta with the attributes
meta = span.get("attributes")
metrics = span.get("attributes")

user_agent = None

if span.get("type") == "rpc":
user_agent = meta.get("grpc.metadata.user-agent")
# java does not fill this tag; it uses the normal http tags

if not user_agent and metrics.get("_dd.top_level") == 1.0:
# The top level span (aka root span) is mark via the _dd.top_level tag by the tracers
user_agent = meta.get("http.request.headers.user-agent")

if not user_agent: # try something for .NET
user_agent = meta.get("http_request_headers_user-agent")

if not user_agent:
# cpp tracer
user_agent = meta.get("http_user_agent")

if not user_agent: # last hope
user_agent = meta.get("http.useragent")

if not user_agent: # last last hope (java opentelemetry autoinstrumentation)
user_agent = meta.get("user_agent.original")

if not user_agent: # last last last hope (python opentelemetry autoinstrumentation)
user_agent = meta.get("http.user_agent")

return get_rid_from_user_agent(user_agent)
Loading
Loading