Skip to content

Commit 8bb3108

Browse files
authored
chore: improve logs in batch functions (#1201)
1 parent 87a7896 commit 8bb3108

File tree

7 files changed

+109
-52
lines changed

7 files changed

+109
-52
lines changed

api/src/shared/common/logging_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ def get_env_logging_level():
66
"""
77
Get the logging level from the environment via OS variable LOGGING_LEVEL. Returns INFO if not set.
88
"""
9-
return os.getenv("LOGGING_LEVEL", "INFO")
9+
return logging.getLevelName(os.getenv("LOGGING_LEVEL", "INFO"))
1010

1111

1212
class Logger:

functions-python/batch_datasets/src/main.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,13 @@
2626
from google.cloud.pubsub_v1.futures import Future
2727
from sqlalchemy import or_
2828
from sqlalchemy.orm import Session
29+
2930
from shared.database_gen.sqlacodegen_models import Gtfsfeed, Gtfsdataset
3031
from shared.dataset_service.main import BatchExecutionService, BatchExecution
3132
from shared.database.database import with_db_session
32-
from shared.helpers.logger import Logger
33+
from shared.helpers.logger import init_logger
3334

34-
logging.basicConfig(level=logging.INFO)
35+
init_logger()
3536
pubsub_topic_name = os.getenv("PUBSUB_TOPIC_NAME")
3637
project_id = os.getenv("PROJECT_ID")
3738

@@ -91,8 +92,6 @@ def get_non_deprecated_feeds(session: Session):
9192
limit = os.getenv("FEEDS_LIMIT")
9293
query = query.limit(10 if limit is None else int(limit))
9394
results = query.all()
94-
logging.info(f"Retrieved {len(results)} feeds.")
95-
9695
return results
9796

9897

@@ -109,7 +108,6 @@ def batch_datasets(request, db_session: Session):
109108
:param db_session: database session object
110109
:return: HTTP response object
111110
"""
112-
Logger.init_logger()
113111
try:
114112
feeds = get_non_deprecated_feeds(db_session)
115113
except Exception as error:
@@ -139,7 +137,7 @@ def batch_datasets(request, db_session: Session):
139137
"api_key_parameter_name": feed.api_key_parameter_name,
140138
}
141139
data_str = json.dumps(payload)
142-
logging.info(f"Publishing {data_str} to {topic_path}.")
140+
logging.debug(f"Publishing {data_str} to {topic_path}.")
143141
future = publish(publisher, topic_path, data_str.encode("utf-8"))
144142
future.add_done_callback(
145143
lambda _: publish_callback(future, feed.stable_id, topic_path)
@@ -151,4 +149,6 @@ def batch_datasets(request, db_session: Session):
151149
timestamp=timestamp,
152150
)
153151
)
154-
return f"Publish completed. Published {len(feeds)} feeds to {pubsub_topic_name}."
152+
message = f"Publish completed. Published {len(feeds)} feeds to {pubsub_topic_name}."
153+
logging.info(message)
154+
return message

functions-python/batch_process_dataset/main_local_debug.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
# - python batch_process_dataset/main_local_debug.py
1919

2020
from dotenv import load_dotenv
21-
from batch_process_dataset.src.main import process_dataset
21+
from main import process_dataset
2222

2323
# Load environment variables from .env.local
2424
load_dotenv(dotenv_path=".env.local")

functions-python/batch_process_dataset/src/main.py

Lines changed: 42 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,12 @@
3434
from shared.database.database import with_db_session, refresh_materialized_view
3535
import logging
3636

37-
from shared.helpers.logger import Logger
37+
from shared.helpers.logger import init_logger, get_logger
3838
from shared.helpers.utils import download_and_get_hash
3939
from sqlalchemy.orm import Session
4040

41+
init_logger()
42+
4143

4244
@dataclass
4345
class DatasetFile:
@@ -63,6 +65,7 @@ def __init__(
6365
api_key_parameter_name,
6466
public_hosted_datasets_url,
6567
):
68+
self.logger = get_logger(DatasetProcessor.__name__, feed_stable_id)
6669
self.producer_url = producer_url
6770
self.bucket_name = bucket_name
6871
self.latest_hash = latest_hash
@@ -73,7 +76,7 @@ def __init__(
7376
self.api_key_parameter_name = api_key_parameter_name
7477
self.date = datetime.now().strftime("%Y%m%d%H%M")
7578
if self.authentication_type != 0:
76-
logging.info(f"Getting feed credentials for feed {self.feed_stable_id}")
79+
self.logger.info(f"Getting feed credentials for feed {self.feed_stable_id}")
7780
self.feed_credentials = self.get_feed_credentials(self.feed_stable_id)
7881
if self.feed_credentials is None:
7982
raise Exception(
@@ -95,7 +98,9 @@ def get_feed_credentials(feed_stable_id) -> str | None:
9598
feeds_credentials = json.loads(os.getenv("FEEDS_CREDENTIALS", "{}"))
9699
return feeds_credentials.get(feed_stable_id, None)
97100
except Exception as e:
98-
logging.error(f"Error getting feed credentials: {e}")
101+
get_logger(DatasetProcessor.__name__, feed_stable_id).error(
102+
f"Error getting feed credentials: {e}"
103+
)
99104
return None
100105

101106
@staticmethod
@@ -141,23 +146,25 @@ def upload_dataset(self) -> DatasetFile or None:
141146
:return: the file hash and the hosted url as a tuple or None if no upload is required
142147
"""
143148
try:
144-
logging.info(f"[{self.feed_stable_id}] - Accessing URL {self.producer_url}")
149+
self.logger.info("Accessing URL %s", self.producer_url)
145150
temp_file_path = self.generate_temp_filename()
146151
file_sha256_hash, is_zip = self.download_content(temp_file_path)
147152
if not is_zip:
148-
logging.error(
153+
self.logger.error(
149154
f"[{self.feed_stable_id}] The downloaded file from {self.producer_url} is not a valid ZIP file."
150155
)
151156
return None
152157

153-
logging.info(f"[{self.feed_stable_id}] File hash is {file_sha256_hash}.")
158+
self.logger.info(
159+
f"[{self.feed_stable_id}] File hash is {file_sha256_hash}."
160+
)
154161

155162
if self.latest_hash != file_sha256_hash:
156-
logging.info(
163+
self.logger.info(
157164
f"[{self.feed_stable_id}] Dataset has changed (hash {self.latest_hash}"
158165
f"-> {file_sha256_hash}). Uploading new version."
159166
)
160-
logging.info(
167+
self.logger.info(
161168
f"Creating file {self.feed_stable_id}/latest.zip in bucket {self.bucket_name}"
162169
)
163170
self.upload_file_to_storage(
@@ -170,7 +177,7 @@ def upload_dataset(self) -> DatasetFile or None:
170177
dataset_full_path = (
171178
f"{self.feed_stable_id}/{dataset_stable_id}/{dataset_stable_id}.zip"
172179
)
173-
logging.info(
180+
self.logger.info(
174181
f"Creating file: {dataset_full_path}"
175182
f" in bucket {self.bucket_name}"
176183
)
@@ -185,7 +192,7 @@ def upload_dataset(self) -> DatasetFile or None:
185192
hosted_url=f"{self.public_hosted_datasets_url}/{dataset_full_path}",
186193
)
187194

188-
logging.info(
195+
self.logger.info(
189196
f"[{self.feed_stable_id}] Datasets hash has not changed (hash {self.latest_hash} "
190197
f"-> {file_sha256_hash}). Not uploading it."
191198
)
@@ -216,11 +223,11 @@ def create_dataset(self, dataset_file: DatasetFile, db_session: Session):
216223
.one_or_none()
217224
)
218225
if not latest_dataset:
219-
logging.info(
226+
self.logger.info(
220227
f"[{self.feed_stable_id}] No latest dataset found for feed."
221228
)
222229

223-
logging.info(
230+
self.logger.info(
224231
f"[{self.feed_stable_id}] Creating new dataset for feed with stable id {dataset_file.stable_id}."
225232
)
226233
new_dataset = Gtfsdataset(
@@ -239,10 +246,10 @@ def create_dataset(self, dataset_file: DatasetFile, db_session: Session):
239246
db_session.add(latest_dataset)
240247
db_session.add(new_dataset)
241248
db_session.commit()
242-
logging.info(f"[{self.feed_stable_id}] Dataset created successfully.")
249+
self.logger.info(f"[{self.feed_stable_id}] Dataset created successfully.")
243250

244251
refresh_materialized_view(db_session, t_feedsearch.name)
245-
logging.info(
252+
self.logger.info(
246253
f"[{self.feed_stable_id}] Materialized view refresh event triggered successfully."
247254
)
248255
except Exception as e:
@@ -256,7 +263,7 @@ def process(self) -> DatasetFile or None:
256263
dataset_file = self.upload_dataset()
257264

258265
if dataset_file is None:
259-
logging.info(f"[{self.feed_stable_id}] No database update required.")
266+
self.logger.info(f"[{self.feed_stable_id}] No database update required.")
260267
return None
261268
self.create_dataset(dataset_file)
262269
return dataset_file
@@ -268,8 +275,8 @@ def record_trace(
268275
"""
269276
Record the trace in the datastore
270277
"""
271-
logging.info(
272-
f"[{stable_id}] Recording trace in execution: [{execution_id}] with status: [{status}]"
278+
get_logger("record_trace", stable_id if stable_id else "UNKNOWN").info(
279+
f"Recording trace in execution: [{execution_id}] with status: [{status}]"
273280
)
274281
trace = DatasetTrace(
275282
trace_id=None,
@@ -306,7 +313,6 @@ def process_dataset(cloud_event: CloudEvent):
306313
}
307314
}
308315
"""
309-
Logger.init_logger()
310316
logging.info("Function Started")
311317
stable_id = "UNKNOWN"
312318
bucket_name = os.getenv("DATASETS_BUCKET_NAME")
@@ -336,27 +342,27 @@ def process_dataset(cloud_event: CloudEvent):
336342
# Extract data from message
337343
data = base64.b64decode(cloud_event.data["message"]["data"]).decode()
338344
json_payload = json.loads(data)
339-
logging.info(
340-
f"[{json_payload['feed_stable_id']}] JSON Payload: {json.dumps(json_payload)}"
341-
)
342345
stable_id = json_payload["feed_stable_id"]
346+
logger = get_logger("process_dataset", stable_id)
347+
logger.info(f"JSON Payload: {json.dumps(json_payload)}")
348+
343349
execution_id = json_payload["execution_id"]
344350
trace_service = DatasetTraceService()
345351
trace = trace_service.get_by_execution_and_stable_ids(execution_id, stable_id)
346-
logging.info(f"[{stable_id}] Dataset trace: {trace}")
352+
logger.info(f"Dataset trace: {trace}")
347353
executions = len(trace) if trace else 0
348-
logging.info(
349-
f"[{stable_id}] Dataset executed times={executions}/{maximum_executions} "
354+
logger.info(
355+
f"Dataset executed times={executions}/{maximum_executions} "
350356
f"in execution=[{execution_id}] "
351357
)
352358

353359
if executions > 0:
354360
if executions >= maximum_executions:
355361
error_message = (
356-
f"[{stable_id}] Function already executed maximum times "
362+
f"Function already executed maximum times "
357363
f"in execution: [{execution_id}]"
358364
)
359-
logging.error(error_message)
365+
logger.error(error_message)
360366
return error_message
361367

362368
processor = DatasetProcessor(
@@ -372,11 +378,14 @@ def process_dataset(cloud_event: CloudEvent):
372378
)
373379
dataset_file = processor.process()
374380
except Exception as e:
375-
logging.error(e)
376-
error_message = f"[{stable_id}] Error execution: [{execution_id}] error: [{e}]"
377-
logging.error(error_message)
378-
logging.error(f"Function completed with error:{error_message}")
381+
# This makes sure the logger is initialized
382+
logger = get_logger("process_dataset", stable_id if stable_id else "UNKNOWN")
383+
logger.error(e)
384+
error_message = f"Error execution: [{execution_id}] error: [{e}]"
385+
logger.error(error_message)
386+
logger.error(f"Function completed with error:{error_message}")
379387
finally:
388+
logger = get_logger("process_dataset", stable_id if stable_id else "UNKNOWN")
380389
if stable_id and execution_id:
381390
status = (
382391
Status.PUBLISHED if dataset_file is not None else Status.NOT_PUBLISHED
@@ -392,12 +401,12 @@ def process_dataset(cloud_event: CloudEvent):
392401
trace_service,
393402
)
394403
else:
395-
logging.error(
404+
logger.error(
396405
f"Function completed with errors, missing stable={stable_id} or execution_id={execution_id}"
397406
)
398407
return f"Function completed with errors, missing stable={stable_id} or execution_id={execution_id}"
399-
logging.info(
400-
f"[{stable_id}] Function %s in execution: [{execution_id}]",
408+
logger.info(
409+
f"Function %s in execution: [{execution_id}]",
401410
"successfully completed" if not error_message else "Failed",
402411
)
403412
return "Completed." if error_message is None else error_message

functions-python/batch_process_dataset/tests/test_batch_process_dataset_main.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -356,11 +356,10 @@ def test_process_no_change(self):
356356
self.assertIsNone(result)
357357
processor.create_dataset.assert_not_called()
358358

359-
@patch("main.Logger")
360359
@patch("main.DatasetTraceService")
361360
@patch("main.DatasetProcessor")
362361
def test_process_dataset_normal_execution(
363-
self, mock_dataset_processor, mock_dataset_trace, _
362+
self, mock_dataset_processor, mock_dataset_trace
364363
):
365364
db_url = os.getenv("TEST_FEEDS_DATABASE_URL", default=default_db_url)
366365
os.environ["FEEDS_DATABASE_URL"] = db_url
@@ -392,11 +391,12 @@ def test_process_dataset_normal_execution(
392391
mock_dataset_processor.assert_called_once()
393392
mock_dataset_processor_instance.process.assert_called_once()
394393

395-
@patch("main.Logger")
396394
@patch("main.DatasetTraceService")
397395
@patch("main.DatasetProcessor")
398396
def test_process_dataset_exception_caught(
399-
self, mock_dataset_processor, mock_dataset_trace, _
397+
self,
398+
mock_dataset_processor,
399+
mock_dataset_trace,
400400
):
401401
db_url = os.getenv("TEST_FEEDS_DATABASE_URL", default=default_db_url)
402402
os.environ["FEEDS_DATABASE_URL"] = db_url
@@ -416,9 +416,8 @@ def test_process_dataset_exception_caught(
416416
# Call the function
417417
process_dataset(cloud_event)
418418

419-
@patch("main.Logger")
420419
@patch("main.DatasetTraceService")
421-
def test_process_dataset_missing_stable_id(self, mock_dataset_trace, _):
420+
def test_process_dataset_missing_stable_id(self, mock_dataset_trace):
422421
db_url = os.getenv("TEST_FEEDS_DATABASE_URL", default=default_db_url)
423422
os.environ["FEEDS_DATABASE_URL"] = db_url
424423

functions-python/big_query_ingestion/function_config.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"memory": "8Gi",
77
"trigger_http": false,
88
"include_folders": ["helpers"],
9-
"include_api_folders": ["database_gen"],
9+
"include_api_folders": ["database_gen", "common"],
1010
"environment_variables": [],
1111
"secret_environment_variables": [
1212
{

0 commit comments

Comments
 (0)