Skip to content

Commit 8c651d9

Browse files
authored
[ml] Fix missing parent ids in trace logs (#34405)
* Revert change to acs_connection_id type * Implement working POC for trace logging * Refactor to take ops_logger as argument * Remove debug statements and update tests * Add check for OpsLogger type * Add check for OpsLogger type in monitor_with_telemetry_mixing * Update tests * Address mypy errors * Address mypy errors * Suppress mypy error * Refactor log_record_to_telemetry and save trace_id, span_id, and activity_id * Remove name from span * Remove whitespace to eliminate pylint error
1 parent b88ec4c commit 8c651d9

28 files changed

+255
-238
lines changed

sdk/ml/azure-ai-ml/azure/ai/ml/_ml_client.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from functools import singledispatch
1111
from itertools import product
1212
from pathlib import Path
13-
from typing import Any, Optional, Tuple, TypeVar, Union
13+
from typing import Any, Dict, Optional, Tuple, TypeVar, Union
1414

1515
from azure.ai.ml._azure_environments import (
1616
CloudArgumentKeys,
@@ -269,12 +269,12 @@ def __init__(
269269

270270
user_agent = kwargs.get("user_agent", None)
271271

272-
app_insights_handler = get_appinsights_log_handler(
272+
app_insights_handler: Tuple = get_appinsights_log_handler(
273273
user_agent,
274274
**{"properties": properties},
275275
enable_telemetry=self._operation_config.enable_telemetry,
276276
)
277-
app_insights_handler_kwargs = {"app_insights_handler": app_insights_handler}
277+
app_insights_handler_kwargs: Dict[str, Tuple] = {"app_insights_handler": app_insights_handler}
278278

279279
base_url = _get_base_url_from_metadata(cloud_name=cloud_name, is_local_mfe=True)
280280
self._base_url = base_url
@@ -463,7 +463,7 @@ def __init__(
463463
self._service_client_10_2022_preview,
464464
self._operation_container,
465465
self._credential,
466-
**app_insights_handler_kwargs,
466+
**app_insights_handler_kwargs, # type: ignore[arg-type]
467467
)
468468
self._operation_container.add(AzureMLResourceType.REGISTRY, self._registries) # type: ignore[arg-type]
469469

@@ -484,15 +484,15 @@ def __init__(
484484
self._operation_scope,
485485
self._operation_config,
486486
self._service_client_08_2023_preview,
487-
**app_insights_handler_kwargs,
487+
**app_insights_handler_kwargs, # type: ignore[arg-type]
488488
)
489489
self._operation_container.add(AzureMLResourceType.COMPUTE, self._compute)
490490
self._datastores = DatastoreOperations(
491491
operation_scope=self._operation_scope,
492492
operation_config=self._operation_config,
493493
serviceclient_2023_04_01_preview=self._service_client_04_2023_preview,
494494
serviceclient_2024_01_01_preview=self._service_client_01_2024_preview,
495-
**ops_kwargs,
495+
**ops_kwargs, # type: ignore[arg-type]
496496
)
497497
self._operation_container.add(AzureMLResourceType.DATASTORE, self._datastores)
498498
self._models = ModelOperations(
@@ -510,23 +510,23 @@ def __init__(
510510
workspace_rg=self._ws_rg,
511511
workspace_sub=self._ws_sub,
512512
registry_reference=registry_reference,
513-
**app_insights_handler_kwargs,
513+
**app_insights_handler_kwargs, # type: ignore[arg-type]
514514
)
515515
self._operation_container.add(AzureMLResourceType.MODEL, self._models)
516516
self._code = CodeOperations(
517517
self._ws_operation_scope if registry_reference else self._operation_scope,
518518
self._operation_config,
519519
self._service_client_10_2021_dataplanepreview if registry_name else self._service_client_04_2023,
520520
self._datastores,
521-
**ops_kwargs,
521+
**ops_kwargs, # type: ignore[arg-type]
522522
)
523523
self._operation_container.add(AzureMLResourceType.CODE, self._code)
524524
self._environments = EnvironmentOperations(
525525
self._ws_operation_scope if registry_reference else self._operation_scope,
526526
self._operation_config,
527527
self._service_client_10_2021_dataplanepreview if registry_name else self._service_client_04_2023_preview,
528528
self._operation_container,
529-
**ops_kwargs,
529+
**ops_kwargs, # type: ignore[arg-type]
530530
)
531531
self._operation_container.add(AzureMLResourceType.ENVIRONMENT, self._environments)
532532
self._local_endpoint_helper = _LocalEndpointHelper(requests_pipeline=self._requests_pipeline)
@@ -539,7 +539,7 @@ def __init__(
539539
self._local_endpoint_helper,
540540
self._credential,
541541
requests_pipeline=self._requests_pipeline,
542-
**ops_kwargs,
542+
**ops_kwargs, # type: ignore[arg-type]
543543
)
544544
self._batch_endpoints = BatchEndpointOperations(
545545
self._operation_scope,
@@ -549,7 +549,7 @@ def __init__(
549549
self._credential,
550550
requests_pipeline=self._requests_pipeline,
551551
service_client_09_2020_dataplanepreview=self._service_client_09_2020_dataplanepreview,
552-
**ops_kwargs,
552+
**ops_kwargs, # type: ignore[arg-type]
553553
)
554554
self._operation_container.add(AzureMLResourceType.BATCH_ENDPOINT, self._batch_endpoints)
555555
self._operation_container.add(AzureMLResourceType.ONLINE_ENDPOINT, self._online_endpoints)
@@ -560,7 +560,7 @@ def __init__(
560560
self._operation_container,
561561
self._local_deployment_helper,
562562
self._credential,
563-
**ops_kwargs,
563+
**ops_kwargs, # type: ignore[arg-type]
564564
)
565565
self._batch_deployments = BatchDeploymentOperations(
566566
self._operation_scope,
@@ -592,7 +592,7 @@ def __init__(
592592
self._service_client_10_2021_dataplanepreview if registry_name else self._service_client_01_2024_preview,
593593
self._operation_container,
594594
self._preflight,
595-
**ops_kwargs,
595+
**ops_kwargs, # type: ignore[arg-type]
596596
)
597597
self._operation_container.add(AzureMLResourceType.COMPONENT, self._components)
598598
self._jobs = JobOperations(
@@ -626,7 +626,7 @@ def __init__(
626626
self._operation_scope,
627627
self._credential,
628628
_service_client_kwargs=kwargs,
629-
**ops_kwargs,
629+
**ops_kwargs, # type: ignore[arg-type]
630630
)
631631
self._operation_container.add(
632632
AzureMLResourceType.VIRTUALCLUSTER, self._virtual_clusters # type: ignore[arg-type]
@@ -639,7 +639,7 @@ def __init__(
639639
self._service_client_08_2023_preview,
640640
self._operation_container,
641641
self._credential,
642-
**app_insights_handler_kwargs,
642+
**app_insights_handler_kwargs, # type: ignore[arg-type]
643643
)
644644

645645
self._featuresets = FeatureSetOperations(
@@ -648,22 +648,22 @@ def __init__(
648648
self._service_client_10_2023,
649649
self._service_client_08_2023_preview,
650650
self._datastores,
651-
**ops_kwargs,
651+
**ops_kwargs, # type: ignore[arg-type]
652652
)
653653

654654
self._featurestoreentities = FeatureStoreEntityOperations(
655655
self._operation_scope,
656656
self._operation_config,
657657
self._service_client_10_2023,
658-
**ops_kwargs,
658+
**ops_kwargs, # type: ignore[arg-type]
659659
)
660660

661661
self._workspace_hubs = WorkspaceHubOperations(
662662
self._operation_scope,
663663
self._service_client_08_2023_preview,
664664
self._operation_container,
665665
self._credential,
666-
**app_insights_handler_kwargs,
666+
**app_insights_handler_kwargs, # type: ignore[arg-type]
667667
)
668668
self._operation_container.add(AzureMLResourceType.WORKSPACE_HUB, self._workspace_hubs) # type: ignore[arg-type]
669669

sdk/ml/azure-ai-ml/azure/ai/ml/_telemetry/activity.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
from marshmallow import ValidationError
2525

26+
from azure.ai.ml._utils._logger_utils import OpsLogger
2627
from azure.ai.ml._utils.utils import _is_user_error_from_exception_type, _is_user_error_from_status_code, _str_to_bool
2728
from azure.ai.ml.exceptions import ErrorCategory, MlException
2829
from azure.core.exceptions import HttpResponseError
@@ -260,8 +261,8 @@ def monitor_with_activity(
260261
To monitor, use the ``@monitor_with_activity`` decorator. As an alternative, you can also wrap the
261262
logical block of code with the ``log_activity()`` method.
262263
263-
:param logger: The logger adapter.
264-
:type logger: logging.LoggerAdapter
264+
:param logger: The operations logging class, containing loggers and tracer for the package and module
265+
:type logger: ~azure.ai.ml._utils._logger_utils.OpsLogger
265266
:param activity_name: The name of the activity. The name should be unique per the wrapped logical code block.
266267
:type activity_name: str
267268
:param activity_type: One of PUBLICAPI, INTERNALCALL, or CLIENTPROXY which represent an incoming API call,
@@ -275,8 +276,16 @@ def monitor_with_activity(
275276
def monitor(f):
276277
@functools.wraps(f)
277278
def wrapper(*args, **kwargs):
278-
with log_activity(logger, activity_name or f.__name__, activity_type, custom_dimensions):
279-
return f(*args, **kwargs)
279+
tracer = logger.package_tracer if isinstance(logger, OpsLogger) else None
280+
if tracer:
281+
with tracer.span():
282+
with log_activity(
283+
logger.package_logger, activity_name or f.__name__, activity_type, custom_dimensions
284+
):
285+
return f(*args, **kwargs)
286+
else:
287+
with log_activity(logger.package_logger, activity_name or f.__name__, activity_type, custom_dimensions):
288+
return f(*args, **kwargs)
280289

281290
return wrapper
282291

@@ -300,7 +309,7 @@ def monitor_with_telemetry_mixin(
300309
will collect from return value.
301310
To monitor, use the ``@monitor_with_telemetry_mixin`` decorator.
302311
303-
:param logger: The logger adapter.
312+
:param logger: The operations logging class, containing loggers and tracer for the package and module
304313
:type logger: logging.LoggerAdapter
305314
:param activity_name: The name of the activity. The name should be unique per the wrapped logical code block.
306315
:type activity_name: str
@@ -315,6 +324,8 @@ def monitor_with_telemetry_mixin(
315324
:return:
316325
"""
317326

327+
logger = logger.package_logger if isinstance(logger, OpsLogger) else logger
328+
318329
def monitor(f):
319330
def _collect_from_parameters(f, args, kwargs, extra_keys):
320331
dimensions = {}

sdk/ml/azure-ai-ml/azure/ai/ml/_telemetry/logging_handler.py

Lines changed: 29 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,15 @@
99
import logging
1010
import platform
1111
import traceback
12+
from typing import Optional, Tuple, Union
1213

1314
from opencensus.ext.azure.common import utils
1415
from opencensus.ext.azure.common.protocol import Data, Envelope, ExceptionData, Message
1516
from opencensus.ext.azure.log_exporter import AzureLogHandler
17+
from opencensus.ext.azure.trace_exporter import AzureExporter
1618
from opencensus.trace import config_integration
19+
from opencensus.trace.samplers import ProbabilitySampler
20+
from opencensus.trace.tracer import Tracer
1721

1822
from azure.ai.ml._user_agent import USER_AGENT
1923

@@ -87,7 +91,7 @@ def get_appinsights_log_handler(
8791
component_name=None,
8892
enable_telemetry=True,
8993
**kwargs,
90-
):
94+
) -> Tuple[Union["AzureMLSDKLogHandler", logging.NullHandler], Optional[Tracer]]:
9195
"""Enable the OpenCensus logging handler for specified logger and instrumentation key to send info to AppInsights.
9296
9397
:param user_agent: Information about the user's browser.
@@ -102,22 +106,23 @@ def get_appinsights_log_handler(
102106
:paramtype enable_telemetry: bool
103107
:keyword kwargs: Optional keyword arguments for adding additional information to messages.
104108
:paramtype kwargs: dict
105-
:return: The logging handler.
106-
:rtype: AzureMLSDKLogHandler
109+
:return: The logging handler and tracer.
110+
:rtype: Tuple[Union[AzureMLSDKLogHandler, logging.NullHandler], Optional[opencensus.trace.tracer.Tracer]]
107111
"""
108112
try:
109113
if instrumentation_key is None:
110114
instrumentation_key = INSTRUMENTATION_KEY
111115

112116
if not in_jupyter_notebook() or not enable_telemetry:
113-
return logging.NullHandler()
117+
return (logging.NullHandler(), None)
114118

115119
if not user_agent or not user_agent.lower() == USER_AGENT.lower():
116-
return logging.NullHandler()
120+
return (logging.NullHandler(), None)
117121

118-
if "properties" in kwargs and "subscription_id" in kwargs.get("properties"):
119-
if kwargs.get("properties")["subscription_id"] in test_subscriptions:
120-
return logging.NullHandler()
122+
if kwargs:
123+
if "properties" in kwargs and "subscription_id" in kwargs.get("properties"): # type: ignore[operator]
124+
if kwargs.get("properties")["subscription_id"] in test_subscriptions: # type: ignore[index]
125+
return (logging.NullHandler(), None)
121126

122127
child_namespace = component_name or __name__
123128
current_logger = logging.getLogger(AML_INTERNAL_LOGGER_NAMESPACE).getChild(child_namespace)
@@ -135,10 +140,15 @@ def get_appinsights_log_handler(
135140
)
136141
current_logger.addHandler(handler)
137142

138-
return handler
143+
tracer = Tracer(
144+
exporter=AzureExporter(connection_string=f"InstrumentationKey={instrumentation_key}"),
145+
sampler=ProbabilitySampler(1.0),
146+
)
147+
148+
return (handler, tracer)
139149
except Exception: # pylint: disable=broad-except
140150
# ignore any exceptions, telemetry collection errors shouldn't block an operation
141-
return logging.NullHandler()
151+
return (logging.NullHandler(), None)
142152

143153

144154
# cspell:ignore AzureMLSDKLogHandler
@@ -173,14 +183,20 @@ def log_record_to_envelope(self, record):
173183
if self._is_telemetry_collection_disabled:
174184
return None
175185

176-
envelope = create_envelope(self.options.instrumentation_key, record)
186+
envelope = Envelope(
187+
iKey=self.options.instrumentation_key,
188+
tags=dict(utils.azure_monitor_context),
189+
time=utils.timestamp_to_iso_str(record.created),
190+
)
177191

178192
properties = {
179193
"process": record.processName,
180194
"module": record.module,
181195
"level": record.levelname,
182-
"operation_id": envelope.tags.get("ai.ml.operation.id"),
183-
"operation_parent_id": envelope.tags.get("ai.ml.operation.parentId"),
196+
"activity_id": record.properties.get("activity_id", "00000000-0000-0000-0000-000000000000"),
197+
"client-request-id": record.properties.get("client_request_id", "00000000-0000-0000-0000-000000000000"),
198+
"span_id": record.spanId,
199+
"trace_id": record.traceId,
184200
}
185201

186202
if hasattr(record, "custom_dimensions") and isinstance(record.custom_dimensions, dict):
@@ -237,21 +253,3 @@ def log_record_to_envelope(self, record):
237253
)
238254
envelope.data = Data(baseData=data, baseType="MessageData")
239255
return envelope
240-
241-
242-
def create_envelope(instrumentation_key, record):
243-
envelope = Envelope(
244-
iKey=instrumentation_key,
245-
tags=dict(utils.azure_monitor_context),
246-
time=utils.timestamp_to_iso_str(record.created),
247-
)
248-
envelope.tags["ai.ml.operation.id"] = getattr(
249-
record,
250-
"traceId",
251-
"00000000000000000000000000000000",
252-
)
253-
envelope.tags[
254-
"ai.ml.operation.parentId"
255-
] = f"|{envelope.tags.get('ai.ml.operation.id')}.{getattr(record, 'spanId', '0000000000000000')}"
256-
257-
return envelope

sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_logger_utils.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import logging
66
import sys
7+
from typing import Dict
78

89
from azure.ai.ml._telemetry.logging_handler import AML_INTERNAL_LOGGER_NAMESPACE
910

@@ -22,9 +23,12 @@ class OpsLogger:
2223
def __init__(self, name: str):
2324
self.package_logger: logging.Logger = logging.getLogger(AML_INTERNAL_LOGGER_NAMESPACE + name)
2425
self.package_logger.propagate = False
26+
self.package_tracer = None
2527
self.module_logger = logging.getLogger(name)
2628
self.custom_dimensions = {}
2729

28-
def update_info(self, data: dict) -> None:
30+
def update_info(self, data: Dict) -> None:
2931
if "app_insights_handler" in data:
30-
self.package_logger.addHandler(data.pop("app_insights_handler"))
32+
logger, tracer = data.pop("app_insights_handler")
33+
self.package_logger.addHandler(logger)
34+
self.package_tracer = tracer

0 commit comments

Comments
 (0)