Skip to content

Commit 24bb839

Browse files
author
Tom Augspurger
authored
[dataset]: Added tracing to create_item (#185)
* [dataset]: Added tracing to create_item This adds some tracing to monitor item creating time to the create_item task. After an item is created, we'll log a messaage to an application insights instance with some custom dimensions * `type`: `pctasks.create_item` for filtering * `collection_id`: The collection ID, for filtering * `asset_uri`: unique(ish) identifier for what assets were cataloged * `duration_seconds`: The time it took to create the item
1 parent 834f4a3 commit 24bb839

File tree

5 files changed

+118
-19
lines changed

5 files changed

+118
-19
lines changed

deployment/terraform/resources/keyvault.tf

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,12 @@ resource "azurerm_key_vault_secret" "task-client-secret" {
4141
key_vault_id = data.azurerm_key_vault.pctasks.id
4242
}
4343

44+
resource "azurerm_key_vault_secret" "task-application-insights-connection-string" {
45+
name = "task-application-insights-connection-string"
46+
value = azurerm_application_insights.pctasks.connection_string
47+
key_vault_id = data.azurerm_key_vault.pctasks.id
48+
}
49+
4450
# API Management access key
4551

4652
data "azurerm_key_vault" "deploy_secrets" {

pctasks/dataset/pctasks/dataset/items/task.py

Lines changed: 67 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1+
import contextlib
12
import logging
23
import os
34
import time
4-
from typing import Callable, List, Union
5+
from typing import Callable, Iterator, List, Optional, Union
56

67
import orjson
78
import pystac
9+
from opencensus.ext.azure.log_exporter import AzureLogHandler
810

911
from pctasks.core.models.task import FailedTaskResult, WaitTaskResult
1012
from pctasks.core.storage import StorageFactory
@@ -14,6 +16,9 @@
1416
from pctasks.task.task import Task
1517

1618
logger = logging.getLogger(__name__)
19+
azlogger = logging.getLogger("monitor.pctasks.dataset.items.task")
20+
azlogger.setLevel(logging.INFO)
21+
azhandler = None # initialized later in `_init_azlogger`
1722

1823

1924
class CreateItemsError(Exception):
@@ -34,6 +39,61 @@ def asset_chunk_id_to_ndjson_chunk_id(asset_chunk_id: str) -> str:
3439
return os.path.join(folder_name, "items.ndjson")
3540

3641

42+
def _init_azlogger() -> None:
43+
# AzureLogHandler is slow to initialize
44+
# do it once here
45+
global azhandler
46+
47+
if azhandler is None:
48+
logger.debug("Initializing AzureLogHandler")
49+
try:
50+
azhandler = AzureLogHandler()
51+
except ValueError:
52+
# missing instrumentation key
53+
azhandler = False
54+
logger.warning("Unable to initialize AzureLogHandler")
55+
else:
56+
azhandler.setLevel(logging.INFO)
57+
azlogger.addHandler(azhandler)
58+
59+
60+
@contextlib.contextmanager
61+
def traced_create_item(
62+
asset_uri: str,
63+
collection_id: Optional[str],
64+
i: Optional[int] = None,
65+
asset_count: Optional[int] = None,
66+
) -> Iterator[None]:
67+
_init_azlogger()
68+
start_time = time.monotonic()
69+
yield
70+
end_time = time.monotonic()
71+
72+
if i is not None and asset_count is not None:
73+
# asset_chunk_info case
74+
logger.info(
75+
f"({((i+1)/asset_count)*100:06.2f}%) "
76+
f"[{end_time - start_time:.2f}s] "
77+
f" - {asset_uri} "
78+
f"({i+1} of {asset_count})"
79+
)
80+
else:
81+
# asset_uri case
82+
logger.info(
83+
f"Created items from {asset_uri} in " f"{end_time - start_time:.2f}s"
84+
)
85+
86+
properties = {
87+
"custom_dimensions": {
88+
"type": "pctasks.create_item",
89+
"collection_id": collection_id,
90+
"asset_uri": asset_uri,
91+
"duration_seconds": end_time - start_time,
92+
}
93+
}
94+
azlogger.info("Created item", extra=properties)
95+
96+
3797
class CreateItemsTask(Task[CreateItemsInput, CreateItemsOutput]):
3898
_input_model = CreateItemsInput
3999
_output_model = CreateItemsOutput
@@ -83,13 +143,8 @@ def _ensure_collection(items: List[pystac.Item]) -> None:
83143

84144
if args.asset_uri:
85145
try:
86-
start_time = time.monotonic()
87-
result = self._create_item(args.asset_uri, storage_factory)
88-
end_time = time.monotonic()
89-
logger.info(
90-
f"Created items from {args.asset_uri} in "
91-
f"{end_time - start_time:.2f}s"
92-
)
146+
with traced_create_item(args.asset_uri, args.collection_id):
147+
result = self._create_item(args.asset_uri, storage_factory)
93148
except Exception as e:
94149
raise CreateItemsError(
95150
f"Failed to create item from {args.asset_uri}"
@@ -113,15 +168,10 @@ def _ensure_collection(items: List[pystac.Item]) -> None:
113168
chunk_lines = chunk_lines[: args.options.limit]
114169
for i, asset_uri in enumerate(chunk_lines):
115170
try:
116-
start_time = time.monotonic()
117-
result = self._create_item(asset_uri, storage_factory)
118-
end_time = time.monotonic()
119-
logger.info(
120-
f"({((i+1)/asset_count)*100:06.2f}%) "
121-
f"[{end_time - start_time:.2f}s] "
122-
f" - {asset_uri} "
123-
f"({i+1} of {asset_count})"
124-
)
171+
with traced_create_item(
172+
asset_uri, args.collection_id, i=i, asset_count=asset_count
173+
):
174+
result = self._create_item(asset_uri, storage_factory)
125175
except Exception as e:
126176
raise CreateItemsError(
127177
f"Failed to create item from {asset_uri}"

pctasks/dataset/setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
"dev": [
1616
"pytest",
1717
"pytest-cov",
18-
"pre-commit"
18+
"pre-commit",
19+
"responses",
1920
],
2021
"docs": ["mkdocs", "mkdocs-material", "pdocs"],
2122
}

pctasks/dataset/tests/items/test_task.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,25 @@
11
import json
2+
import logging
23
from pathlib import Path
34
from tempfile import TemporaryDirectory
45
from typing import List, Union
56

67
import pystac
8+
import responses
79
from pystac.utils import str_to_datetime
810

11+
import pctasks.dataset.items.task
912
from pctasks.core.models.task import CompletedTaskResult, WaitTaskResult
1013
from pctasks.core.storage import StorageFactory
1114
from pctasks.core.storage.local import LocalStorage
1215
from pctasks.core.utils.stac import validate_stac
1316
from pctasks.dataset.chunks.models import ChunkInfo
1417
from pctasks.dataset.items.models import CreateItemsOutput
15-
from pctasks.dataset.items.task import CreateItemsInput, CreateItemsTask
18+
from pctasks.dataset.items.task import (
19+
CreateItemsInput,
20+
CreateItemsTask,
21+
traced_create_item,
22+
)
1623
from pctasks.dev.test_utils import run_test_task
1724
from pctasks.task.utils import get_task_path
1825

@@ -89,3 +96,37 @@ def test_wait_for_assets():
8996

9097
task_result = run_test_task(args.dict(), TASK_PATH)
9198
assert isinstance(task_result, WaitTaskResult)
99+
100+
101+
@responses.activate
102+
def test_log_to_monitor(monkeypatch, caplog):
103+
monkeypatch.setenv(
104+
"APPLICATIONINSIGHTS_CONNECTION_STRING",
105+
"InstrumentationKey=00000000-0000-0000-0000-000000000000;IngestionEndpoint=https://westeurope-5.in.applicationinsights.azure.com/;LiveEndpoint=https://westeurope.livediagnostics.monitor.azure.com/", # noqa: E501
106+
)
107+
# opencensus will log an error about the instrumentation key being invalid
108+
opencensus_logger = logging.getLogger("opencensus.ext.azure")
109+
opencensus_logger.setLevel(logging.CRITICAL)
110+
111+
responses.post(
112+
"https://westus-0.in.applicationinsights.azure.com//v2.1/track",
113+
)
114+
115+
# Ensure that any previous tests initializing logging
116+
# (without an instrumentation key) didn't mess up our handler
117+
monkeypatch.setattr(pctasks.dataset.items.task, "azhandler", None)
118+
119+
with caplog.at_level(logging.INFO):
120+
with traced_create_item("blob://test/test/asset.tif", "test-collection"):
121+
pass
122+
123+
record = caplog.records[1]
124+
assert record.custom_dimensions.pop("duration_seconds")
125+
assert record.custom_dimensions == {
126+
"asset_uri": "blob://test/test/asset.tif",
127+
"collection_id": "test-collection",
128+
"type": "pctasks.create_item",
129+
}
130+
131+
azlogger = logging.getLogger("monitor.pctasks.dataset.items.task")
132+
assert len(azlogger.handlers) == 1

requirements-dev.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ pystac[validation]==1.*
1010

1111
azure-functions
1212
azure-functions-durable
13+
responses
1314

1415
# Mypy stubs
1516

0 commit comments

Comments
 (0)