1+ import contextlib
12import logging
23import os
34import time
4- from typing import Callable , List , Union
5+ from typing import Callable , Iterator , List , Optional , Union
56
67import orjson
78import pystac
9+ from opencensus .ext .azure .log_exporter import AzureLogHandler
810
911from pctasks .core .models .task import FailedTaskResult , WaitTaskResult
1012from pctasks .core .storage import StorageFactory
1416from pctasks .task .task import Task
1517
1618logger = logging .getLogger (__name__ )
19+ azlogger = logging .getLogger ("monitor.pctasks.dataset.items.task" )
20+ azlogger .setLevel (logging .INFO )
21+ azhandler = None # initialized later in `_init_azlogger`
1722
1823
1924class CreateItemsError (Exception ):
@@ -34,6 +39,61 @@ def asset_chunk_id_to_ndjson_chunk_id(asset_chunk_id: str) -> str:
3439 return os .path .join (folder_name , "items.ndjson" )
3540
3641
42+ def _init_azlogger () -> None :
43+ # AzureLogHandler is slow to initialize
44+ # do it once here
45+ global azhandler
46+
47+ if azhandler is None :
48+ logger .debug ("Initializing AzureLogHandler" )
49+ try :
50+ azhandler = AzureLogHandler ()
51+ except ValueError :
52+ # missing instrumentation key
53+ azhandler = False
54+ logger .warning ("Unable to initialize AzureLogHandler" )
55+ else :
56+ azhandler .setLevel (logging .INFO )
57+ azlogger .addHandler (azhandler )
58+
59+
60+ @contextlib .contextmanager
61+ def traced_create_item (
62+ asset_uri : str ,
63+ collection_id : Optional [str ],
64+ i : Optional [int ] = None ,
65+ asset_count : Optional [int ] = None ,
66+ ) -> Iterator [None ]:
67+ _init_azlogger ()
68+ start_time = time .monotonic ()
69+ yield
70+ end_time = time .monotonic ()
71+
72+ if i is not None and asset_count is not None :
73+ # asset_chunk_info case
74+ logger .info (
75+ f"({ ((i + 1 )/ asset_count )* 100 :06.2f} %) "
76+ f"[{ end_time - start_time :.2f} s] "
77+ f" - { asset_uri } "
78+ f"({ i + 1 } of { asset_count } )"
79+ )
80+ else :
81+ # asset_uri case
82+ logger .info (
83+ f"Created items from { asset_uri } in " f"{ end_time - start_time :.2f} s"
84+ )
85+
86+ properties = {
87+ "custom_dimensions" : {
88+ "type" : "pctasks.create_item" ,
89+ "collection_id" : collection_id ,
90+ "asset_uri" : asset_uri ,
91+ "duration_seconds" : end_time - start_time ,
92+ }
93+ }
94+ azlogger .info ("Created item" , extra = properties )
95+
96+
3797class CreateItemsTask (Task [CreateItemsInput , CreateItemsOutput ]):
3898 _input_model = CreateItemsInput
3999 _output_model = CreateItemsOutput
@@ -83,13 +143,8 @@ def _ensure_collection(items: List[pystac.Item]) -> None:
83143
84144 if args .asset_uri :
85145 try :
86- start_time = time .monotonic ()
87- result = self ._create_item (args .asset_uri , storage_factory )
88- end_time = time .monotonic ()
89- logger .info (
90- f"Created items from { args .asset_uri } in "
91- f"{ end_time - start_time :.2f} s"
92- )
146+ with traced_create_item (args .asset_uri , args .collection_id ):
147+ result = self ._create_item (args .asset_uri , storage_factory )
93148 except Exception as e :
94149 raise CreateItemsError (
95150 f"Failed to create item from { args .asset_uri } "
@@ -113,15 +168,10 @@ def _ensure_collection(items: List[pystac.Item]) -> None:
113168 chunk_lines = chunk_lines [: args .options .limit ]
114169 for i , asset_uri in enumerate (chunk_lines ):
115170 try :
116- start_time = time .monotonic ()
117- result = self ._create_item (asset_uri , storage_factory )
118- end_time = time .monotonic ()
119- logger .info (
120- f"({ ((i + 1 )/ asset_count )* 100 :06.2f} %) "
121- f"[{ end_time - start_time :.2f} s] "
122- f" - { asset_uri } "
123- f"({ i + 1 } of { asset_count } )"
124- )
171+ with traced_create_item (
172+ asset_uri , args .collection_id , i = i , asset_count = asset_count
173+ ):
174+ result = self ._create_item (asset_uri , storage_factory )
125175 except Exception as e :
126176 raise CreateItemsError (
127177 f"Failed to create item from { asset_uri } "
0 commit comments