Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 5 additions & 35 deletions packages/service-library/src/servicelib/logging_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
class LogExtra(TypedDict):
log_uid: NotRequired[str]
log_oec: NotRequired[str]
log_trace_id: NotRequired[str]


def get_log_record_extra(
Expand Down Expand Up @@ -113,62 +114,32 @@ def format(self, record) -> str:

# SEE https://docs.python.org/3/library/logging.html#logrecord-attributes
_DEFAULT_FORMATTING: Final[str] = " | ".join(
[
"log_level=%(levelname)s",
"log_timestamp=%(asctime)s",
"log_source=%(name)s:%(funcName)s(%(lineno)d)",
"log_uid=%(log_uid)s",
"log_oec=%(log_oec)s",
"log_msg=%(message)s",
]
)

_LOCAL_FORMATTING: Final[str] = (
"%(levelname)s: [%(asctime)s/%(processName)s] [%(name)s:%(funcName)s(%(lineno)d)] - %(message)s"
)

# Tracing format strings
_TRACING_FORMATTING: Final[str] = " | ".join(
[
"log_level=%(levelname)s",
"log_timestamp=%(asctime)s",
"log_source=%(name)s:%(funcName)s(%(lineno)d)",
"log_uid=%(log_uid)s",
"log_oec=%(log_oec)s",
"log_trace_id=%(otelTraceID)s",
"log_span_id=%(otelSpanID)s",
"log_resource.service.name=%(otelServiceName)s",
"log_trace_sampled=%(otelTraceSampled)s",
"log_msg=%(message)s",
]
)

_LOCAL_TRACING_FORMATTING: Final[str] = (
_LOCAL_FORMATTING: Final[str] = (
"%(levelname)s: [%(asctime)s/%(processName)s] "
"[log_trace_id=%(otelTraceID)s log_span_id=%(otelSpanID)s "
"log_resource.service.name=%(otelServiceName)s log_trace_sampled=%(otelTraceSampled)s] "
"[log_trace_id=%(otelTraceID)s] "
"[%(name)s:%(funcName)s(%(lineno)d)] - %(message)s"
)

# Graylog Grok pattern extractor:
# log_level=%{WORD:log_level} \| log_timestamp=%{TIMESTAMP_ISO8601:log_timestamp} \| log_source=%{DATA:log_source} \| (log_uid=%{WORD:log_uid} \| )?log_msg=%{GREEDYDATA:log_msg}
# log_level=%{WORD:log_level} \| log_timestamp=%{TIMESTAMP_ISO8601:log_timestamp} \| log_source=%{NOTSPACE:log_source} \| log_uid=%{NOTSPACE:log_uid} \| log_oec=%{NOTSPACE:log_oec} \| log_trace_id=%{NOTSPACE:log_trace_id} \| log_msg=%{GREEDYDATA:log_msg}


def _setup_logging_formatter(
*,
tracing_settings: TracingSettings | None,
log_format_local_dev_enabled: bool,
) -> logging.Formatter:
if log_format_local_dev_enabled:
fmt = (
_LOCAL_TRACING_FORMATTING
if tracing_settings is not None
else _LOCAL_FORMATTING
)
else:
fmt = (
_TRACING_FORMATTING if tracing_settings is not None else _DEFAULT_FORMATTING
)
fmt = _LOCAL_FORMATTING if log_format_local_dev_enabled else _DEFAULT_FORMATTING

return CustomFormatter(
fmt, log_format_local_dev_enabled=log_format_local_dev_enabled
Expand Down Expand Up @@ -234,7 +205,6 @@ def _configure_common_logging_settings(
setup_log_tracing(tracing_settings=tracing_settings)

return _setup_logging_formatter(
tracing_settings=tracing_settings,
log_format_local_dev_enabled=log_format_local_dev_enabled,
)

Expand Down
169 changes: 169 additions & 0 deletions packages/service-library/tests/test_logging_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# pylint:disable=redefined-outer-name
# pylint:disable=unused-argument

import io
import logging
import re
from collections.abc import Iterable
from contextlib import suppress
from pathlib import Path
Expand All @@ -10,6 +12,8 @@
import pytest
from faker import Faker
from servicelib.logging_utils import (
_DEFAULT_FORMATTING,
CustomFormatter,
LogExtra,
LogLevelInt,
LogMessageStr,
Expand Down Expand Up @@ -573,3 +577,168 @@ def _raise_test_exception():

# Check that the message was logged and cleanup happened
assert "Message before exception" in caplog.text


def _create_grok_regex_pattern() -> str:
"""Convert Grok pattern to regex for testing."""
# The Grok pattern from the comment:
# log_level=%{WORD:log_level} \| log_timestamp=%{TIMESTAMP_ISO8601:log_timestamp} \| log_source=%{NOTSPACE:log_source} \| log_uid=%{NOTSPACE:log_uid} \| log_oec=%{NOTSPACE:log_oec} \| log_trace_id=%{NOTSPACE:log_trace_id} \| log_msg=%{GREEDYDATA:log_msg}

grok_to_regex = {
r"%{WORD:log_level}": r"(?P<log_level>\w+)",
r"%{TIMESTAMP_ISO8601:log_timestamp}": r"(?P<log_timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})",
r"%{NOTSPACE:log_source}": r"(?P<log_source>\S+)",
r"%{NOTSPACE:log_uid}": r"(?P<log_uid>\S+)",
r"%{NOTSPACE:log_oec}": r"(?P<log_oec>\S+)",
r"%{NOTSPACE:log_trace_id}": r"(?P<log_trace_id>\S+)",
r"%{GREEDYDATA:log_msg}": r"(?P<log_msg>.*)",
}

grok_pattern = r"log_level=%{WORD:log_level} \| log_timestamp=%{TIMESTAMP_ISO8601:log_timestamp} \| log_source=%{NOTSPACE:log_source} \| log_uid=%{NOTSPACE:log_uid} \| log_oec=%{NOTSPACE:log_oec} \| log_trace_id=%{NOTSPACE:log_trace_id} \| log_msg=%{GREEDYDATA:log_msg}"

# Convert to regex
regex_pattern = grok_pattern
for grok, regex in grok_to_regex.items():
regex_pattern = regex_pattern.replace(grok, regex)

return regex_pattern


def _create_test_log_record(
name: str,
level: int,
func_name: str,
lineno: int,
message: str,
*,
user_id: int | str | None = None,
error_code: str | None = None,
trace_id: str | None = None,
) -> logging.LogRecord:
"""Create a test LogRecord with optional extra fields."""
from servicelib.logging_utils import get_log_record_extra

record = logging.LogRecord(
name=name,
level=level,
pathname="/path/to/file.py",
lineno=lineno,
msg=message,
args=(),
exc_info=None,
func=func_name,
)

# Add extra fields if provided
extra = get_log_record_extra(user_id=user_id, error_code=error_code)
if extra:
for key, value in extra.items():
setattr(record, key, value)

# Add OpenTelemetry trace ID
record.otelTraceID = trace_id # type: ignore[attr-defined]

return record


def test_grok_pattern_parsing(caplog: pytest.LogCaptureFixture) -> None:
"""
Test that the Graylog Grok pattern correctly parses logs formatted with _DEFAULT_FORMATTING.

This test validates that the Grok pattern defined in the comment can correctly
parse logs formatted with _DEFAULT_FORMATTING.

WARNING: If log formatting changes, the Grok pattern in Graylog must be updated accordingly.
"""

# Create a custom handler with the default formatter
log_stream = io.StringIO()
handler = logging.StreamHandler(log_stream)
formatter = CustomFormatter(_DEFAULT_FORMATTING, log_format_local_dev_enabled=False)
handler.setFormatter(formatter)

# Create test log record with all fields populated
test_message = (
"This is a test log message with special chars: []{} and new line\nembedded"
)
record = _create_test_log_record(
name="test.module.submodule",
level=logging.INFO,
func_name="test_function",
lineno=42,
message=test_message,
user_id=12345,
error_code="OEC001",
trace_id="1234567890abcdef1234567890abcdef",
)

# Format the record
formatted_log = formatter.format(record)

# Test that the formatted log matches the Grok pattern
regex_pattern = _create_grok_regex_pattern()
match = re.match(regex_pattern, formatted_log)

assert (
match is not None
), f"Grok pattern did not match formatted log. Log: {formatted_log!r}"

# Verify extracted fields match expected values
groups = match.groupdict()

assert groups["log_level"] == "INFO"
assert groups["log_source"] == "test.module.submodule:test_function(42)"
assert groups["log_uid"] == "12345"
assert groups["log_oec"] == "OEC001"
assert groups["log_trace_id"] == "1234567890abcdef1234567890abcdef"

# Verify the message is correctly escaped (newlines become \\n)
expected_message = test_message.replace("\n", "\\n")
assert groups["log_msg"] == expected_message

# Verify timestamp format is ISO8601-like (as expected by Python logging)
timestamp_pattern = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}"
assert re.match(timestamp_pattern, groups["log_timestamp"])


def test_grok_pattern_parsing_with_none_values(
caplog: pytest.LogCaptureFixture,
) -> None:
"""
Test Grok pattern parsing when optional fields are None.

WARNING: If log formatting changes, the Grok pattern in Graylog must be updated accordingly.
"""

# Create a custom handler with the default formatter
handler = logging.StreamHandler(io.StringIO())
formatter = CustomFormatter(_DEFAULT_FORMATTING, log_format_local_dev_enabled=False)
handler.setFormatter(formatter)

# Create test log record with None values for optional fields
record = _create_test_log_record(
name="test.module",
level=logging.ERROR,
func_name="error_function",
lineno=100,
message="Error message",
user_id=None,
error_code=None,
trace_id=None,
)

formatted_log = formatter.format(record)
regex_pattern = _create_grok_regex_pattern()
match = re.match(regex_pattern, formatted_log)

assert (
match is not None
), f"Grok pattern did not match log with None values. Log: {formatted_log!r}"

groups = match.groupdict()
assert groups["log_level"] == "ERROR"
assert groups["log_source"] == "test.module:error_function(100)"
assert groups["log_uid"] == "None"
assert groups["log_oec"] == "None"
assert groups["log_trace_id"] == "None"
assert groups["log_msg"] == "Error message"
Loading