Skip to content

Commit c53ec19

Browse files
Implement TRACE-level logging utilities and safe serialization for sensitive data
1 parent 8f3ca2b commit c53ec19

File tree

8 files changed

+404
-47
lines changed

8 files changed

+404
-47
lines changed

src/event_gate_lambda.py

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,32 +22,24 @@
2222
import sys
2323
from typing import Any, Dict
2424

25-
import urllib3
26-
2725
import boto3
2826
import jwt
2927
import requests
28+
import urllib3
29+
from cryptography.exceptions import UnsupportedAlgorithm
3030
from cryptography.hazmat.primitives import serialization
3131
from jsonschema import validate
3232
from jsonschema.exceptions import ValidationError
3333

34-
# Added explicit import for serialization-related exceptions
35-
try: # pragma: no cover - import guard
36-
from cryptography.exceptions import UnsupportedAlgorithm # type: ignore
37-
except Exception: # pragma: no cover - very defensive
38-
UnsupportedAlgorithm = Exception # type: ignore
39-
4034
# Import writer modules with explicit ImportError fallback
4135
try:
42-
from . import writer_eventbridge, writer_kafka, writer_postgres
36+
from . import writer_eventbridge
37+
from . import writer_kafka
38+
from . import writer_postgres
4339
except ImportError: # fallback when executed outside package context
44-
import writer_eventbridge, writer_kafka, writer_postgres # type: ignore[no-redef]
45-
46-
# Register custom TRACE level before using LOG_LEVEL env var
47-
try:
48-
from .logging_levels import TRACE_LEVEL # noqa: F401
49-
except Exception: # pragma: no cover - defensive
50-
TRACE_LEVEL = 5 # type: ignore
40+
import writer_eventbridge # type: ignore[no-redef]
41+
import writer_kafka # type: ignore[no-redef]
42+
import writer_postgres # type: ignore[no-redef]
5143

5244
# Import configuration directory symbols with explicit ImportError fallback
5345
try:

src/logging_levels.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,14 @@
1313
logging.addLevelName(TRACE_LEVEL, "TRACE")
1414

1515
def trace(self: logging.Logger, message: str, *args, **kws): # type: ignore[override]
16+
"""Log a message with TRACE level.
17+
18+
Args:
19+
self: Logger instance.
20+
message: Log message format string.
21+
*args: Positional arguments for message formatting.
22+
**kws: Keyword arguments passed to _log.
23+
"""
1624
if self.isEnabledFor(TRACE_LEVEL):
1725
self._log(TRACE_LEVEL, message, args, **kws) # pylint: disable=protected-access
1826

src/safe_serialization.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
#
2+
# Copyright 2025 ABSA Group Limited
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
#
16+
17+
"""Safe serialization utilities for logging.
18+
19+
Provides PII-safe, size-bounded JSON serialization for TRACE logging.
20+
"""
21+
22+
import json
23+
import os
24+
from typing import Any, List, Set
25+
26+
27+
def _redact_sensitive_keys(obj: Any, redact_keys: Set[str]) -> Any:
28+
"""Recursively redact sensitive keys from nested structures.
29+
30+
Args:
31+
obj: Object to redact (dict, list, or scalar).
32+
redact_keys: Set of key names to redact (case-insensitive).
33+
34+
Returns:
35+
Copy of obj with sensitive values replaced by "***REDACTED***".
36+
"""
37+
if isinstance(obj, dict):
38+
return {
39+
k: "***REDACTED***" if k.lower() in redact_keys else _redact_sensitive_keys(v, redact_keys)
40+
for k, v in obj.items()
41+
}
42+
if isinstance(obj, list):
43+
return [_redact_sensitive_keys(item, redact_keys) for item in obj]
44+
return obj
45+
46+
47+
def safe_serialize_for_log(message: Any, redact_keys: List[str] | None = None, max_bytes: int | None = None) -> str:
48+
"""Safely serialize a message for logging with redaction and size capping.
49+
50+
Args:
51+
message: Object to serialize (typically a dict).
52+
redact_keys: List of key names to redact (case-insensitive). If None, uses env TRACE_REDACT_KEYS.
53+
max_bytes: Maximum serialized output size in bytes. If None, uses env TRACE_MAX_BYTES (default 10000).
54+
55+
Returns:
56+
JSON string (redacted and truncated if needed), or empty string on serialization error.
57+
"""
58+
# Apply configuration defaults
59+
if redact_keys is None:
60+
redact_keys_str = os.environ.get("TRACE_REDACT_KEYS", "password,secret,token,key,apikey,api_key")
61+
redact_keys = [k.strip() for k in redact_keys_str.split(",") if k.strip()]
62+
if max_bytes is None:
63+
max_bytes = int(os.environ.get("TRACE_MAX_BYTES", "10000"))
64+
65+
# Normalize to case-insensitive set
66+
redact_set = {k.lower() for k in redact_keys}
67+
68+
try:
69+
# Redact sensitive keys
70+
redacted = _redact_sensitive_keys(message, redact_set)
71+
# Serialize with minimal whitespace
72+
serialized = json.dumps(redacted, separators=(",", ":"))
73+
# Truncate if needed
74+
if len(serialized.encode("utf-8")) > max_bytes:
75+
# Binary truncate to max_bytes and append marker
76+
truncated_bytes = serialized.encode("utf-8")[:max_bytes]
77+
# Ensure we don't break mid-multibyte character
78+
try:
79+
return truncated_bytes.decode("utf-8", errors="ignore") + "..."
80+
except UnicodeDecodeError: # pragma: no cover - defensive
81+
return ""
82+
return serialized
83+
except (TypeError, ValueError, OverflowError): # pragma: no cover - catch serialization errors
84+
return ""
85+
86+
87+
__all__ = ["safe_serialize_for_log"]

src/trace_logging.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#
2+
# Copyright 2025 ABSA Group Limited
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
#
16+
17+
"""Trace-level logging utilities.
18+
19+
Provides reusable TRACE-level payload logging for writer modules.
20+
"""
21+
22+
import logging
23+
from typing import Any, Dict
24+
25+
from .logging_levels import TRACE_LEVEL
26+
from .safe_serialization import safe_serialize_for_log
27+
28+
29+
def log_payload_at_trace(logger: logging.Logger, writer_name: str, topic_name: str, message: Dict[str, Any]) -> None:
30+
"""Log message payload at TRACE level with safe serialization.
31+
32+
Args:
33+
logger: Logger instance to use for logging.
34+
writer_name: Name of the writer (e.g., "EventBridge", "Kafka", "Postgres").
35+
topic_name: Topic name being written to.
36+
message: Message payload to log.
37+
"""
38+
if not logger.isEnabledFor(TRACE_LEVEL):
39+
return
40+
41+
try:
42+
safe_payload = safe_serialize_for_log(message)
43+
if safe_payload:
44+
logger.trace( # type: ignore[attr-defined]
45+
"%s payload topic=%s payload=%s", writer_name, topic_name, safe_payload
46+
)
47+
except (TypeError, ValueError): # pragma: no cover - defensive serialization guard
48+
logger.trace("%s payload topic=%s <unserializable>", writer_name, topic_name) # type: ignore[attr-defined]
49+
50+
51+
__all__ = ["log_payload_at_trace"]

src/writer_eventbridge.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,7 @@
2626
import boto3
2727
from botocore.exceptions import BotoCoreError, ClientError
2828

29-
# Ensure TRACE level is registered
30-
from . import logging_levels # noqa: F401
31-
from .logging_levels import TRACE_LEVEL
29+
from .trace_logging import log_payload_at_trace
3230

3331
STATE: Dict[str, Any] = {"logger": logging.getLogger(__name__), "event_bus_arn": "", "client": None}
3432

@@ -72,14 +70,7 @@ def write(topic_name: str, message: Dict[str, Any]) -> Tuple[bool, Optional[str]
7270
logger.debug("EventBridge client not initialized - skipping")
7371
return True, None
7472

75-
# TRACE-level payload logging
76-
if logger.isEnabledFor(TRACE_LEVEL):
77-
try:
78-
logger.trace( # type: ignore[attr-defined]
79-
"EventBridge payload topic=%s payload=%s", topic_name, json.dumps(message, separators=(",", ":"))
80-
)
81-
except Exception: # pragma: no cover - defensive serialization guard
82-
logger.trace("EventBridge payload topic=%s <unserializable>", topic_name) # type: ignore[attr-defined]
73+
log_payload_at_trace(logger, "EventBridge", topic_name, message)
8374

8475
try:
8576
logger.debug("Sending to eventBridge %s", topic_name)

src/writer_kafka.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,7 @@
2626

2727
from confluent_kafka import Producer
2828

29-
# Add TRACE level import
30-
from .logging_levels import TRACE_LEVEL # type: ignore
29+
from .trace_logging import log_payload_at_trace
3130

3231
try: # KafkaException may not exist in stubbed test module
3332
from confluent_kafka import KafkaException # type: ignore
@@ -94,14 +93,7 @@ def write(topic_name: str, message: Dict[str, Any]) -> Tuple[bool, Optional[str]
9493
logger.debug("Kafka producer not initialized - skipping")
9594
return True, None
9695

97-
# TRACE-level payload logging prior to produce
98-
if logger.isEnabledFor(TRACE_LEVEL):
99-
try:
100-
logger.trace( # type: ignore[attr-defined]
101-
"Kafka payload topic=%s payload=%s", topic_name, json.dumps(message, separators=(",", ":"))
102-
)
103-
except Exception: # pragma: no cover - defensive
104-
logger.trace("Kafka payload topic=%s <unserializable>", topic_name) # type: ignore[attr-defined]
96+
log_payload_at_trace(logger, "Kafka", topic_name, message)
10597

10698
errors: list[Any] = []
10799
try:

src/writer_postgres.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,7 @@
3131
except ImportError: # pragma: no cover - environment without psycopg2
3232
psycopg2 = None # type: ignore
3333

34-
# Ensure TRACE level is registered
35-
from .logging_levels import TRACE_LEVEL # type: ignore
34+
from .trace_logging import log_payload_at_trace
3635

3736
# Define a unified psycopg2 error base for safe exception handling even if psycopg2 missing
3837
if psycopg2 is not None: # type: ignore
@@ -274,14 +273,7 @@ def write(topic_name: str, message: Dict[str, Any]) -> Tuple[bool, Optional[str]
274273
_logger.debug("psycopg2 not available - skipping actual Postgres write")
275274
return True, None
276275

277-
# TRACE-level payload logging (only when we intend to write)
278-
if _logger.isEnabledFor(TRACE_LEVEL):
279-
try:
280-
_logger.trace( # type: ignore[attr-defined]
281-
"Postgres payload topic=%s payload=%s", topic_name, json.dumps(message, separators=(",", ":"))
282-
)
283-
except Exception: # pragma: no cover - defensive
284-
_logger.trace("Postgres payload topic=%s <unserializable>", topic_name) # type: ignore[attr-defined]
276+
log_payload_at_trace(_logger, "Postgres", topic_name, message)
285277

286278
with psycopg2.connect( # type: ignore[attr-defined]
287279
database=POSTGRES["database"],

0 commit comments

Comments
 (0)