diff --git a/packages/service-library/src/servicelib/logging_utils.py b/packages/service-library/src/servicelib/logging_utils.py index d335fc059897..5cead04bffaa 100644 --- a/packages/service-library/src/servicelib/logging_utils.py +++ b/packages/service-library/src/servicelib/logging_utils.py @@ -97,7 +97,10 @@ def format(self, record) -> str: if hasattr(record, "file_name_override"): record.filename = record.file_name_override - for name in LogExtra.__optional_keys__: # pylint: disable=no-member + optional_keys = LogExtra.__optional_keys__ | frozenset( # pylint: disable=no-member + ["otelTraceID", "otelSpanID"] + ) + for name in optional_keys: if not hasattr(record, name): setattr(record, name, None) @@ -113,22 +116,6 @@ def format(self, record) -> str: # SEE https://docs.python.org/3/library/logging.html#logrecord-attributes _DEFAULT_FORMATTING: Final[str] = " | ".join( - [ - "log_level=%(levelname)s", - "log_timestamp=%(asctime)s", - "log_source=%(name)s:%(funcName)s(%(lineno)d)", - "log_uid=%(log_uid)s", - "log_oec=%(log_oec)s", - "log_msg=%(message)s", - ] -) - -_LOCAL_FORMATTING: Final[str] = ( - "%(levelname)s: [%(asctime)s/%(processName)s] [%(name)s:%(funcName)s(%(lineno)d)] - %(message)s" -) - -# Tracing format strings -_TRACING_FORMATTING: Final[str] = " | ".join( [ "log_level=%(levelname)s", "log_timestamp=%(asctime)s", @@ -137,38 +124,25 @@ def format(self, record) -> str: "log_oec=%(log_oec)s", "log_trace_id=%(otelTraceID)s", "log_span_id=%(otelSpanID)s", - "log_resource.service.name=%(otelServiceName)s", - "log_trace_sampled=%(otelTraceSampled)s", "log_msg=%(message)s", ] ) -_LOCAL_TRACING_FORMATTING: Final[str] = ( +_LOCAL_FORMATTING: Final[str] = ( "%(levelname)s: [%(asctime)s/%(processName)s] " - "[log_trace_id=%(otelTraceID)s log_span_id=%(otelSpanID)s " - "log_resource.service.name=%(otelServiceName)s log_trace_sampled=%(otelTraceSampled)s] " + "[log_trace_id=%(otelTraceID)s|log_span_id=%(otelSpanID)s] " "[%(name)s:%(funcName)s(%(lineno)d)] - %(message)s" ) # Graylog Grok pattern extractor: -# log_level=%{WORD:log_level} \| log_timestamp=%{TIMESTAMP_ISO8601:log_timestamp} \| log_source=%{DATA:log_source} \| (log_uid=%{WORD:log_uid} \| )?log_msg=%{GREEDYDATA:log_msg} +# log_level=%{WORD:log_level} \| log_timestamp=%{TIMESTAMP_ISO8601:log_timestamp} \| log_source=%{NOTSPACE:log_source} \| log_uid=%{NOTSPACE:log_uid} \| log_oec=%{NOTSPACE:log_oec} \| log_trace_id=%{NOTSPACE:log_trace_id} \| log_span_id=%{NOTSPACE:log_span_id} \| log_msg=%{GREEDYDATA:log_msg} def _setup_logging_formatter( *, - tracing_settings: TracingSettings | None, log_format_local_dev_enabled: bool, ) -> logging.Formatter: - if log_format_local_dev_enabled: - fmt = ( - _LOCAL_TRACING_FORMATTING - if tracing_settings is not None - else _LOCAL_FORMATTING - ) - else: - fmt = ( - _TRACING_FORMATTING if tracing_settings is not None else _DEFAULT_FORMATTING - ) + fmt = _LOCAL_FORMATTING if log_format_local_dev_enabled else _DEFAULT_FORMATTING return CustomFormatter( fmt, log_format_local_dev_enabled=log_format_local_dev_enabled @@ -234,7 +208,6 @@ def _configure_common_logging_settings( setup_log_tracing(tracing_settings=tracing_settings) return _setup_logging_formatter( - tracing_settings=tracing_settings, log_format_local_dev_enabled=log_format_local_dev_enabled, ) diff --git a/packages/service-library/tests/test_logging_utils.py b/packages/service-library/tests/test_logging_utils.py index 287487c17c64..60085b687358 100644 --- a/packages/service-library/tests/test_logging_utils.py +++ b/packages/service-library/tests/test_logging_utils.py @@ -1,7 +1,9 @@ # pylint:disable=redefined-outer-name # pylint:disable=unused-argument +import io import logging +import re from collections.abc import Iterable from contextlib import suppress from pathlib import Path @@ -10,6 +12,8 @@ import pytest from faker import Faker from servicelib.logging_utils import ( + _DEFAULT_FORMATTING, + CustomFormatter, LogExtra, LogLevelInt, LogMessageStr, @@ -573,3 +577,175 @@ def _raise_test_exception(): # Check that the message was logged and cleanup happened assert "Message before exception" in caplog.text + + +def _create_grok_regex_pattern() -> str: + """Convert Grok pattern to regex for testing.""" + # The Grok pattern from the comment: + # log_level=%{WORD:log_level} \| log_timestamp=%{TIMESTAMP_ISO8601:log_timestamp} \| log_source=%{NOTSPACE:log_source} \| log_uid=%{NOTSPACE:log_uid} \| log_oec=%{NOTSPACE:log_oec} \| log_trace_id=%{NOTSPACE:log_trace_id} \| log_span_id=%{NOTSPACE:log_span_id} \| log_msg=%{GREEDYDATA:log_msg} + + grok_to_regex = { + r"%{WORD:log_level}": r"(?P\w+)", + r"%{TIMESTAMP_ISO8601:log_timestamp}": r"(?P\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})", + r"%{NOTSPACE:log_source}": r"(?P\S+)", + r"%{NOTSPACE:log_uid}": r"(?P\S+)", + r"%{NOTSPACE:log_oec}": r"(?P\S+)", + r"%{NOTSPACE:log_trace_id}": r"(?P\S+)", + r"%{NOTSPACE:log_span_id}": r"(?P\S+)", + r"%{GREEDYDATA:log_msg}": r"(?P.*)", + } + + grok_pattern = r"log_level=%{WORD:log_level} \| log_timestamp=%{TIMESTAMP_ISO8601:log_timestamp} \| log_source=%{NOTSPACE:log_source} \| log_uid=%{NOTSPACE:log_uid} \| log_oec=%{NOTSPACE:log_oec} \| log_trace_id=%{NOTSPACE:log_trace_id} \| log_span_id=%{NOTSPACE:log_span_id} \| log_msg=%{GREEDYDATA:log_msg}" + + # Convert to regex + regex_pattern = grok_pattern + for grok, regex in grok_to_regex.items(): + regex_pattern = regex_pattern.replace(grok, regex) + + return regex_pattern + + +def _create_test_log_record( + name: str, + level: int, + func_name: str, + lineno: int, + message: str, + *, + user_id: int | str | None = None, + error_code: str | None = None, + trace_id: str | None = None, + span_id: str | None = None, +) -> logging.LogRecord: + """Create a test LogRecord with optional extra fields.""" + from servicelib.logging_utils import get_log_record_extra + + record = logging.LogRecord( + name=name, + level=level, + pathname="/path/to/file.py", + lineno=lineno, + msg=message, + args=(), + exc_info=None, + func=func_name, + ) + + # Add extra fields if provided + extra = get_log_record_extra(user_id=user_id, error_code=error_code) + if extra: + for key, value in extra.items(): + setattr(record, key, value) + + # Add OpenTelemetry trace ID + record.otelTraceID = trace_id # type: ignore[attr-defined] + record.otelSpanID = span_id # type: ignore[attr-defined] + + return record + + +def test_grok_pattern_parsing(caplog: pytest.LogCaptureFixture) -> None: + """ + Test that the Graylog Grok pattern correctly parses logs formatted with _DEFAULT_FORMATTING. + + This test validates that the Grok pattern defined in the comment can correctly + parse logs formatted with _DEFAULT_FORMATTING. + + WARNING: If log formatting changes, the Grok pattern in Graylog must be updated accordingly. + """ + + # Create a custom handler with the default formatter + log_stream = io.StringIO() + handler = logging.StreamHandler(log_stream) + formatter = CustomFormatter(_DEFAULT_FORMATTING, log_format_local_dev_enabled=False) + handler.setFormatter(formatter) + + # Create test log record with all fields populated + test_message = ( + "This is a test log message with special chars: []{} and new line\nembedded" + ) + record = _create_test_log_record( + name="test.module.submodule", + level=logging.INFO, + func_name="test_function", + lineno=42, + message=test_message, + user_id=12345, + error_code="OEC001", + trace_id="1234567890abcdef1234567890abcdef", + span_id="987654321", + ) + + # Format the record + formatted_log = formatter.format(record) + + # Test that the formatted log matches the Grok pattern + regex_pattern = _create_grok_regex_pattern() + match = re.match(regex_pattern, formatted_log) + + assert ( + match is not None + ), f"Grok pattern did not match formatted log. Log: {formatted_log!r}" + + # Verify extracted fields match expected values + groups = match.groupdict() + + assert groups["log_level"] == "INFO" + assert groups["log_source"] == "test.module.submodule:test_function(42)" + assert groups["log_uid"] == "12345" + assert groups["log_oec"] == "OEC001" + assert groups["log_trace_id"] == "1234567890abcdef1234567890abcdef" + assert groups["log_span_id"] == "987654321" + + # Verify the message is correctly escaped (newlines become \\n) + expected_message = test_message.replace("\n", "\\n") + assert groups["log_msg"] == expected_message + + # Verify timestamp format is ISO8601-like (as expected by Python logging) + timestamp_pattern = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}" + assert re.match(timestamp_pattern, groups["log_timestamp"]) + + +def test_grok_pattern_parsing_with_none_values( + caplog: pytest.LogCaptureFixture, +) -> None: + """ + Test Grok pattern parsing when optional fields are None. + + WARNING: If log formatting changes, the Grok pattern in Graylog must be updated accordingly. + """ + + # Create a custom handler with the default formatter + handler = logging.StreamHandler(io.StringIO()) + formatter = CustomFormatter(_DEFAULT_FORMATTING, log_format_local_dev_enabled=False) + handler.setFormatter(formatter) + + # Create test log record with None values for optional fields + record = _create_test_log_record( + name="test.module", + level=logging.ERROR, + func_name="error_function", + lineno=100, + message="Error message", + user_id=None, + error_code=None, + trace_id=None, + span_id=None, + ) + + formatted_log = formatter.format(record) + regex_pattern = _create_grok_regex_pattern() + match = re.match(regex_pattern, formatted_log) + + assert ( + match is not None + ), f"Grok pattern did not match log with None values. Log: {formatted_log!r}" + + groups = match.groupdict() + assert groups["log_level"] == "ERROR" + assert groups["log_source"] == "test.module:error_function(100)" + assert groups["log_uid"] == "None" + assert groups["log_oec"] == "None" + assert groups["log_trace_id"] == "None" + assert groups["log_span_id"] == "None" + assert groups["log_msg"] == "Error message"