Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
81 commits
Select commit Hold shift + click to select a range
7ea10df
add TRACING_SAMPLING_PROBABILITY to TracingSettings
bisgaard-itis Sep 25, 2025
246ed0d
replace collector sampling strategy
bisgaard-itis Sep 25, 2025
1fc5a30
add sampling probability to traefik
bisgaard-itis Sep 25, 2025
29c5cbc
improve test
bisgaard-itis Sep 25, 2025
8e3a187
add sampling based strategy to fast api tracing instrumentation
bisgaard-itis Sep 25, 2025
c3a7962
add test and sampling strategy to aiohttp tracing instrumentation
bisgaard-itis Sep 25, 2025
5c2ce1b
clean up tests
bisgaard-itis Sep 25, 2025
bbff3e4
correct env var value
bisgaard-itis Sep 25, 2025
641ca63
Merge branch 'master' into 1090-implement-sampling-tracing-strategy
bisgaard-itis Sep 26, 2025
fa76bef
Merge branch 'master' into 1090-implement-sampling-tracing-strategy
bisgaard-itis Sep 29, 2025
cf09512
create TracingData dataclass for carrying around tracing data
bisgaard-itis Sep 29, 2025
1e4583e
propagate fastapi instrumentatoin
bisgaard-itis Sep 29, 2025
604c818
small changes
bisgaard-itis Sep 29, 2025
f9200ae
Merge branch 'master' into 1090-implement-sampling-tracing-strategy
bisgaard-itis Sep 29, 2025
33e2023
propagate tracing-data throughout fastapi apps
bisgaard-itis Sep 29, 2025
405fa86
further propagate tracing_data
bisgaard-itis Sep 29, 2025
bbc5d12
further cleanup
bisgaard-itis Sep 29, 2025
d1aba9b
propagate tracing_data
bisgaard-itis Sep 29, 2025
8f711f8
fix pylint ignore
bisgaard-itis Sep 29, 2025
8e5605a
attach tracing_data to app in webserver
bisgaard-itis Sep 29, 2025
dbb4e64
fix test
bisgaard-itis Sep 29, 2025
0897ada
bugfix for aiohttp
bisgaard-itis Sep 29, 2025
2a00fff
Merge branch 'master' into 1090-implement-sampling-tracing-strategy
bisgaard-itis Sep 30, 2025
869c2a7
simplify tests
bisgaard-itis Sep 30, 2025
4883380
fix test for profiles span
bisgaard-itis Sep 30, 2025
7dd9918
remove profiled spans from codebase
bisgaard-itis Sep 30, 2025
40cc94a
implement aiohttp middleware to support TracerProvider
bisgaard-itis Oct 1, 2025
dc8ea56
further changes
bisgaard-itis Oct 1, 2025
709af0c
Merge branch 'master' into 1090-implement-sampling-tracing-strategy
bisgaard-itis Oct 1, 2025
63ac26a
start porting the log instrumentation
bisgaard-itis Oct 1, 2025
11953ff
Merge branch 'master' into 1090-implement-sampling-tracing-strategy
bisgaard-itis Oct 6, 2025
3674785
Merge branch 'master' into 1090-implement-sampling-tracing-strategy
bisgaard-itis Oct 6, 2025
b4718a2
start factoring out TracingData
bisgaard-itis Oct 6, 2025
972eb59
refactor tracing_data creation
bisgaard-itis Oct 6, 2025
d9aff45
fix lifespan creator in fastapi
bisgaard-itis Oct 6, 2025
874a26f
refactor tracing_data creation in aiohttp case
bisgaard-itis Oct 6, 2025
dd1180d
fix which was not commited yesterday
bisgaard-itis Oct 7, 2025
4f2ffe7
Merge branch 'master' into 1090-implement-sampling-tracing-strategy
bisgaard-itis Oct 7, 2025
209b01a
make TracingData frozen
bisgaard-itis Oct 7, 2025
a63d628
several fixes and cleanups
bisgaard-itis Oct 7, 2025
598f1b2
ensure settings are not passed when getting tracing data
bisgaard-itis Oct 7, 2025
c6f1c5f
several fixes
bisgaard-itis Oct 7, 2025
d2ed79c
Merge branch 'master' into 1090-implement-sampling-tracing-strategy
bisgaard-itis Oct 7, 2025
d32f87e
minor fixes to be able to generate OAS
bisgaard-itis Oct 7, 2025
6ebc7a8
fix import - make pylint hapy
bisgaard-itis Oct 7, 2025
193e35a
fix
bisgaard-itis Oct 7, 2025
e6d5303
fix in dask sidecar
bisgaard-itis Oct 7, 2025
d2d21c8
Fix calls to create_application in webserver tests
bisgaard-itis Oct 7, 2025
4482f1d
Merge branch 'master' into 1090-implement-sampling-tracing-strategy
bisgaard-itis Oct 7, 2025
884c07d
fix import in webserver
bisgaard-itis Oct 7, 2025
2bf0901
fix clusters keeper tests
bisgaard-itis Oct 7, 2025
2dc15df
fix imports and typechecks in aiohttp tracing
bisgaard-itis Oct 7, 2025
41f68a8
fix various test fixtures
bisgaard-itis Oct 7, 2025
2aa10fc
disable tracing in tests
bisgaard-itis Oct 7, 2025
a340f98
fix sidecars_client fixture
bisgaard-itis Oct 7, 2025
84049f3
fix initilaized_app fixture in autoscaling
bisgaard-itis Oct 7, 2025
4f3a5ac
Merge branch 'master' into 1090-implement-sampling-tracing-strategy
bisgaard-itis Oct 7, 2025
eea9fe9
fex test fixture in RUT
bisgaard-itis Oct 7, 2025
efc359d
fix more tests in RUT
bisgaard-itis Oct 7, 2025
ef7c3d5
fix service lib tests
bisgaard-itis Oct 7, 2025
e8f90fb
minor fix in servicelib tracing
bisgaard-itis Oct 7, 2025
6a0cce0
several fixes
bisgaard-itis Oct 7, 2025
3cddbc2
fix aiohttp tracing tests
bisgaard-itis Oct 7, 2025
e70a02b
Merge branch 'master' into 1090-implement-sampling-tracing-strategy
bisgaard-itis Oct 7, 2025
19057ae
Merge branch 'master' into 1090-implement-sampling-tracing-strategy
bisgaard-itis Oct 8, 2025
e4c11e8
TracingData -> TracingConfig @GitHK
bisgaard-itis Oct 8, 2025
dfd4042
Merge branch 'master' into 1090-implement-sampling-tracing-strategy
bisgaard-itis Oct 8, 2025
965c5c5
fix in dirv2 tracing_data -> tracing_config
bisgaard-itis Oct 8, 2025
435d2e2
cleanup in all the applications
bisgaard-itis Oct 8, 2025
02e1652
Follow up changes
bisgaard-itis Oct 8, 2025
9148c5e
@GitHK fixes
bisgaard-itis Oct 8, 2025
f680ac1
Merge branch 'master' into 1090-implement-sampling-tracing-strategy
bisgaard-itis Oct 8, 2025
df734a4
Merge branch 'master' into 1090-implement-sampling-tracing-strategy
bisgaard-itis Oct 9, 2025
933c752
Merge branch 'master' into 1090-implement-sampling-tracing-strategy
mrnicegyu11 Oct 9, 2025
8b26424
clean up dynamic scheduler tracing
bisgaard-itis Oct 9, 2025
e125632
TRACING_SAMPLING_PROBABILITY -> TRACING_OPENTELEMETRY_SAMPLING_PROBAB…
bisgaard-itis Oct 9, 2025
15afd8e
add fixme
bisgaard-itis Oct 9, 2025
92815f2
fix typo
bisgaard-itis Oct 9, 2025
d41d64d
make pylint happy
bisgaard-itis Oct 9, 2025
00e643a
Merge branch 'master' into 1090-implement-sampling-tracing-strategy
bisgaard-itis Oct 9, 2025
009d00c
Merge branch 'master' into 1090-implement-sampling-tracing-strategy
bisgaard-itis Oct 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .env-devel
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ TRACING_OPENTELEMETRY_COLLECTOR_BATCH_SIZE=2
TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT=http://opentelemetry-collector
TRACING_OPENTELEMETRY_COLLECTOR_EXPORTER_ENDPOINT=http://jaeger:4318
TRACING_OPENTELEMETRY_COLLECTOR_PORT=4318
TRACING_OPENTELEMETRY_COLLECTOR_SAMPLING_PERCENTAGE=100
TRACING_OPENTELEMETRY_SAMPLING_PROBABILITY=1.0
TRAEFIK_SIMCORE_ZONE=internal_simcore_stack
TRASH_RETENTION_DAYS=7
TWILIO_ACCOUNT_SID=DUMMY
Expand Down
104 changes: 85 additions & 19 deletions packages/service-library/src/servicelib/aiohttp/tracing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Adds aiohttp middleware for tracing using opentelemetry instrumentation."""

import logging
import time
from collections.abc import AsyncIterator, Callable
from typing import Final

Expand All @@ -13,18 +14,28 @@
AioHttpClientInstrumentor,
)
from opentelemetry.instrumentation.aiohttp_server import (
middleware as aiohttp_server_opentelemetry_middleware, # pylint:disable=no-name-in-module
_parse_active_request_count_attrs,
_parse_duration_attrs,
collect_request_attributes,
get_default_span_details,
getter,
meter,
set_status_code,
)
from opentelemetry.sdk.resources import Resource
from opentelemetry.propagate import extract
from opentelemetry.sdk.trace import SpanProcessor, TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.semconv.metrics import MetricInstruments
from settings_library.tracing import TracingSettings
from yarl import URL

from ..logging_utils import log_context
from ..tracing import get_trace_id_header
from ..tracing import TracingConfig, get_trace_id_header

_logger = logging.getLogger(__name__)

TRACING_CONFIG_KEY: Final[str] = "tracing_config"

try:
from opentelemetry.instrumentation.botocore import ( # type: ignore[import-not-found]
BotocoreInstrumentor,
Expand Down Expand Up @@ -65,6 +76,57 @@
)


@web.middleware
async def aiohttp_server_opentelemetry_middleware(request: web.Request, handler):
"""This middleware is extracted from https://github.com/open-telemetry/opentelemetry-python-contrib/blob/main/instrumentation/opentelemetry-instrumentation-aiohttp-server/src/opentelemetry/instrumentation/aiohttp_server/__init__.py
and adapted to allow passing the tracer provider via the app instead of using the global object. The original code for the function is licensed under https://github.com/open-telemetry/opentelemetry-python-contrib/blob/main/LICENSE.
FIXME: I have recorded this limitation in the official source here: https://github.com/open-telemetry/opentelemetry-python-contrib/issues/3801 and plan on providing a fix soon.
"""

span_name, additional_attributes = get_default_span_details(request)

req_attrs = collect_request_attributes(request)
duration_attrs = _parse_duration_attrs(req_attrs)
active_requests_count_attrs = _parse_active_request_count_attrs(req_attrs)

duration_histogram = meter.create_histogram(
name=MetricInstruments.HTTP_SERVER_DURATION,
unit="ms",
description="Measures the duration of inbound HTTP requests.",
)

active_requests_counter = meter.create_up_down_counter(
name=MetricInstruments.HTTP_SERVER_ACTIVE_REQUESTS,
unit="requests",
description="measures the number of concurrent HTTP requests those are currently in flight",
)
tracing_config = request.app[TRACING_CONFIG_KEY]
assert isinstance(tracing_config, TracingConfig) # nosec
assert tracing_config.tracer_provider # nosec
tracer = tracing_config.tracer_provider.get_tracer(__name__)
with tracer.start_as_current_span(
span_name,
context=extract(request, getter=getter),
kind=trace.SpanKind.SERVER,
) as span:
attributes = collect_request_attributes(request)
attributes.update(additional_attributes)
span.set_attributes(attributes)
start = time.perf_counter()
active_requests_counter.add(1, active_requests_count_attrs)
try:
resp = await handler(request)
set_status_code(span, resp.status)
except web.HTTPException as ex:
set_status_code(span, ex.status_code)
raise
finally:
duration = max((time.perf_counter() - start) * 1000, 0)
duration_histogram.record(duration, duration_attrs)
active_requests_counter.add(-1, active_requests_count_attrs)
return resp


def _create_span_processor(tracing_destination: str) -> SpanProcessor:
otlp_exporter = OTLPSpanExporterHTTP(
endpoint=tracing_destination,
Expand All @@ -77,12 +139,12 @@ def _startup(
app: web.Application,
tracing_settings: TracingSettings,
service_name: str,
tracer_provider: TracerProvider,
add_response_trace_id_header: bool = False,
) -> None:
"""
Sets up this service for a distributed tracing system (opentelemetry)
"""
_ = app
opentelemetry_collector_endpoint = (
f"{tracing_settings.TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT}"
)
Expand All @@ -99,9 +161,6 @@ def _startup(
"unset. Provide both or remove both."
)
raise RuntimeError(msg)
resource = Resource(attributes={"service.name": service_name})
trace.set_tracer_provider(TracerProvider(resource=resource))
tracer_provider: trace.TracerProvider = trace.get_tracer_provider()

tracing_destination: str = (
f"{URL(opentelemetry_collector_endpoint).with_port(opentelemetry_collector_port).with_path('/v1/traces')}"
Expand All @@ -114,7 +173,7 @@ def _startup(
)

# Add the span processor to the tracer provider
tracer_provider.add_span_processor(_create_span_processor(tracing_destination)) # type: ignore[attr-defined] # https://github.com/open-telemetry/opentelemetry-python/issues/3713
tracer_provider.add_span_processor(_create_span_processor(tracing_destination))
# Instrument aiohttp server
# Explanation for custom middleware call DK 10/2024:
# OpenTelemetry Aiohttp autoinstrumentation is meant to be used by only calling `AioHttpServerInstrumentor().instrument()`
Expand All @@ -135,43 +194,43 @@ def _startup(
# - opentelemetry-instrumentation==0.48b0

# Instrument aiohttp client
AioHttpClientInstrumentor().instrument()
AioHttpClientInstrumentor().instrument(tracer_provider=tracer_provider)
if HAS_AIOPG:
with log_context(
_logger,
logging.INFO,
msg="Attempting to add aio-pg opentelemetry autoinstrumentation...",
):
AiopgInstrumentor().instrument()
AiopgInstrumentor().instrument(tracer_provider=tracer_provider)
if HAS_ASYNCPG:
with log_context(
_logger,
logging.INFO,
msg="Attempting to add asyncpg opentelemetry autoinstrumentation...",
):
AsyncPGInstrumentor().instrument()
AsyncPGInstrumentor().instrument(tracer_provider=tracer_provider)
if HAS_BOTOCORE:
with log_context(
_logger,
logging.INFO,
msg="Attempting to add botocore opentelemetry autoinstrumentation...",
):
BotocoreInstrumentor().instrument()
BotocoreInstrumentor().instrument(tracer_provider=tracer_provider)
if HAS_REQUESTS:
with log_context(
_logger,
logging.INFO,
msg="Attempting to add requests opentelemetry autoinstrumentation...",
):
RequestsInstrumentor().instrument()
RequestsInstrumentor().instrument(tracer_provider=tracer_provider)

if HAS_AIO_PIKA:
with log_context(
_logger,
logging.INFO,
msg="Attempting to add aio_pika opentelemetry autoinstrumentation...",
):
AioPikaInstrumentor().instrument()
AioPikaInstrumentor().instrument(tracer_provider=tracer_provider)


@web.middleware
Expand Down Expand Up @@ -222,17 +281,24 @@ def _shutdown() -> None:
_logger.exception("Failed to uninstrument AioPikaInstrumentor")


def get_tracing_lifespan(
def setup_tracing(
*,
app: web.Application,
tracing_settings: TracingSettings,
service_name: str,
tracing_config: TracingConfig,
add_response_trace_id_header: bool = False,
) -> Callable[[web.Application], AsyncIterator]:

if tracing_config.tracing_enabled is False:
msg = "Tracing is not enabled"
raise ValueError(msg)
assert tracing_config.tracer_provider # nosec
assert tracing_config.tracing_settings # nosec

_startup(
app=app,
tracing_settings=tracing_settings,
service_name=service_name,
tracing_settings=tracing_config.tracing_settings,
tracer_provider=tracing_config.tracer_provider,
service_name=tracing_config.service_name,
add_response_trace_id_header=add_response_trace_id_header,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import httpx
from fastapi import FastAPI
from settings_library.tracing import TracingSettings
from servicelib.tracing import TracingConfig

from .tracing import setup_httpx_client_tracing

Expand All @@ -12,16 +12,16 @@ def setup_client_session(
*,
default_timeout: datetime.timedelta = datetime.timedelta(seconds=20),
max_keepalive_connections: int = 20,
tracing_settings: TracingSettings | None,
tracing_config: TracingConfig | None
) -> None:
async def on_startup() -> None:
session = httpx.AsyncClient(
transport=httpx.AsyncHTTPTransport(http2=True),
limits=httpx.Limits(max_keepalive_connections=max_keepalive_connections),
timeout=default_timeout.total_seconds(),
)
if tracing_settings:
setup_httpx_client_tracing(session)
if tracing_config:
setup_httpx_client_tracing(session, tracing_config=tracing_config)
app.state.aiohttp_client_session = session

async def on_shutdown() -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from common_library.errors_classes import OsparcErrorMixin
from httpx import AsyncClient, ConnectError, HTTPError, PoolTimeout, Response
from httpx._types import TimeoutTypes, URLTypes
from settings_library.tracing import TracingSettings
from servicelib.tracing import TracingConfig
from tenacity import RetryCallState
from tenacity.asyncio import AsyncRetrying
from tenacity.before_sleep import before_sleep_log
Expand Down Expand Up @@ -200,7 +200,7 @@ def __init__(
self,
*,
total_retry_interval: float,
tracing_settings: TracingSettings | None,
tracing_config: TracingConfig,
base_url: URLTypes | None = None,
default_http_client_timeout: TimeoutTypes | None = None,
extra_allowed_method_names: set[str] | None = None,
Expand All @@ -224,8 +224,8 @@ def __init__(
client_args["timeout"] = default_http_client_timeout

client = AsyncClient(**client_args)
if tracing_settings:
setup_httpx_client_tracing(client)
if tracing_config.tracing_enabled:
setup_httpx_client_tracing(client, tracing_config=tracing_config)
super().__init__(client=client)

async def __aenter__(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from common_library.logging.logging_utils_filtering import LoggerName, MessageSubstring
from fastapi import FastAPI
from settings_library.tracing import TracingSettings
from servicelib.tracing import TracingConfig

from ..logging_utils import (
LogLevelInt,
Expand All @@ -20,7 +20,7 @@ def create_logging_lifespan(
*,
log_format_local_dev_enabled: bool,
logger_filter_mapping: dict[LoggerName, list[MessageSubstring]],
tracing_settings: TracingSettings | None,
tracing_config: TracingConfig,
log_base_level: LogLevelInt,
noisy_loggers: tuple[str, ...] | None,
) -> Lifespan:
Expand All @@ -32,7 +32,7 @@ def create_logging_lifespan(
noisy_loggers=noisy_loggers,
log_format_local_dev_enabled=log_format_local_dev_enabled,
logger_filter_mapping=logger_filter_mapping,
tracing_settings=tracing_settings,
tracing_config=tracing_config,
)
)

Expand All @@ -49,7 +49,7 @@ def create_logging_shutdown_event(
*,
log_format_local_dev_enabled: bool,
logger_filter_mapping: dict[LoggerName, list[MessageSubstring]],
tracing_settings: TracingSettings | None,
tracing_config: TracingConfig,
log_base_level: LogLevelInt,
noisy_loggers: tuple[str, ...] | None,
) -> Callable[[], Awaitable[None]]:
Expand All @@ -67,7 +67,7 @@ def create_logging_shutdown_event(
noisy_loggers=noisy_loggers,
log_format_local_dev_enabled=log_format_local_dev_enabled,
logger_filter_mapping=logger_filter_mapping,
tracing_settings=tracing_settings,
tracing_config=tracing_config,
)
)

Expand Down
Loading
Loading