Skip to content

Commit 478df52

Browse files
fix Graylog parsing issue
1 parent 1ba2817 commit 478df52

File tree

2 files changed

+180
-33
lines changed

2 files changed

+180
-33
lines changed

packages/service-library/src/servicelib/logging_utils.py

Lines changed: 7 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
class LogExtra(TypedDict):
6464
log_uid: NotRequired[str]
6565
log_oec: NotRequired[str]
66+
log_trace_id: NotRequired[str]
6667

6768

6869
def get_log_record_extra(
@@ -113,62 +114,35 @@ def format(self, record) -> str:
113114

114115
# SEE https://docs.python.org/3/library/logging.html#logrecord-attributes
115116
_DEFAULT_FORMATTING: Final[str] = " | ".join(
116-
[
117-
"log_level=%(levelname)s",
118-
"log_timestamp=%(asctime)s",
119-
"log_source=%(name)s:%(funcName)s(%(lineno)d)",
120-
"log_uid=%(log_uid)s",
121-
"log_oec=%(log_oec)s",
122-
"log_msg=%(message)s",
123-
]
124-
)
125-
126-
_LOCAL_FORMATTING: Final[str] = (
127-
"%(levelname)s: [%(asctime)s/%(processName)s] [%(name)s:%(funcName)s(%(lineno)d)] - %(message)s"
128-
)
129-
130-
# Tracing format strings
131-
_TRACING_FORMATTING: Final[str] = " | ".join(
132117
[
133118
"log_level=%(levelname)s",
134119
"log_timestamp=%(asctime)s",
135120
"log_source=%(name)s:%(funcName)s(%(lineno)d)",
136121
"log_uid=%(log_uid)s",
137122
"log_oec=%(log_oec)s",
138123
"log_trace_id=%(otelTraceID)s",
139-
"log_span_id=%(otelSpanID)s",
140-
"log_resource.service.name=%(otelServiceName)s",
141-
"log_trace_sampled=%(otelTraceSampled)s",
142124
"log_msg=%(message)s",
143125
]
144126
)
145127

146-
_LOCAL_TRACING_FORMATTING: Final[str] = (
128+
_LOCAL_FORMATTING: Final[str] = (
147129
"%(levelname)s: [%(asctime)s/%(processName)s] "
148-
"[log_trace_id=%(otelTraceID)s log_span_id=%(otelSpanID)s "
149-
"log_resource.service.name=%(otelServiceName)s log_trace_sampled=%(otelTraceSampled)s] "
130+
"[log_trace_id=%(otelTraceID)s] "
150131
"[%(name)s:%(funcName)s(%(lineno)d)] - %(message)s"
151132
)
152133

153134
# Graylog Grok pattern extractor:
154-
# log_level=%{WORD:log_level} \| log_timestamp=%{TIMESTAMP_ISO8601:log_timestamp} \| log_source=%{DATA:log_source} \| (log_uid=%{WORD:log_uid} \| )?log_msg=%{GREEDYDATA:log_msg}
135+
# log_level=%{WORD:log_level} \| log_timestamp=%{TIMESTAMP_ISO8601:log_timestamp} \| log_source=%{NOTSPACE:log_source} \| log_uid=%{NOTSPACE:log_uid} \| log_oec=%{NOTSPACE:log_oec} \| log_trace_id=%{NOTSPACE:log_trace_id} \| log_msg=%{GREEDYDATA:log_msg}
155136

156137

157138
def _setup_logging_formatter(
158139
*,
159140
tracing_settings: TracingSettings | None,
160141
log_format_local_dev_enabled: bool,
161142
) -> logging.Formatter:
162-
if log_format_local_dev_enabled:
163-
fmt = (
164-
_LOCAL_TRACING_FORMATTING
165-
if tracing_settings is not None
166-
else _LOCAL_FORMATTING
167-
)
168-
else:
169-
fmt = (
170-
_TRACING_FORMATTING if tracing_settings is not None else _DEFAULT_FORMATTING
171-
)
143+
assert tracing_settings # nosec
144+
145+
fmt = _LOCAL_FORMATTING if log_format_local_dev_enabled else _DEFAULT_FORMATTING
172146

173147
return CustomFormatter(
174148
fmt, log_format_local_dev_enabled=log_format_local_dev_enabled

packages/service-library/tests/test_logging_utils.py

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,3 +573,176 @@ def _raise_test_exception():
573573

574574
# Check that the message was logged and cleanup happened
575575
assert "Message before exception" in caplog.text
576+
577+
578+
def _create_grok_regex_pattern() -> str:
579+
"""Convert Grok pattern to regex for testing."""
580+
# The Grok pattern from the comment:
581+
# log_level=%{WORD:log_level} \| log_timestamp=%{TIMESTAMP_ISO8601:log_timestamp} \| log_source=%{NOTSPACE:log_source} \| log_uid=%{NOTSPACE:log_uid} \| log_oec=%{NOTSPACE:log_oec} \| log_trace_id=%{NOTSPACE:log_trace_id} \| log_msg=%{GREEDYDATA:log_msg}
582+
583+
grok_to_regex = {
584+
r"%{WORD:log_level}": r"(?P<log_level>\w+)",
585+
r"%{TIMESTAMP_ISO8601:log_timestamp}": r"(?P<log_timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})",
586+
r"%{NOTSPACE:log_source}": r"(?P<log_source>\S+)",
587+
r"%{NOTSPACE:log_uid}": r"(?P<log_uid>\S+)",
588+
r"%{NOTSPACE:log_oec}": r"(?P<log_oec>\S+)",
589+
r"%{NOTSPACE:log_trace_id}": r"(?P<log_trace_id>\S+)",
590+
r"%{GREEDYDATA:log_msg}": r"(?P<log_msg>.*)",
591+
}
592+
593+
grok_pattern = r"log_level=%{WORD:log_level} \| log_timestamp=%{TIMESTAMP_ISO8601:log_timestamp} \| log_source=%{NOTSPACE:log_source} \| log_uid=%{NOTSPACE:log_uid} \| log_oec=%{NOTSPACE:log_oec} \| log_trace_id=%{NOTSPACE:log_trace_id} \| log_msg=%{GREEDYDATA:log_msg}"
594+
595+
# Convert to regex
596+
regex_pattern = grok_pattern
597+
for grok, regex in grok_to_regex.items():
598+
regex_pattern = regex_pattern.replace(grok, regex)
599+
600+
return regex_pattern
601+
602+
603+
def _create_test_log_record(
604+
name: str,
605+
level: int,
606+
func_name: str,
607+
lineno: int,
608+
message: str,
609+
*,
610+
user_id: int | str | None = None,
611+
error_code: str | None = None,
612+
trace_id: str | None = None,
613+
) -> logging.LogRecord:
614+
"""Create a test LogRecord with optional extra fields."""
615+
from servicelib.logging_utils import get_log_record_extra
616+
617+
record = logging.LogRecord(
618+
name=name,
619+
level=level,
620+
pathname="/path/to/file.py",
621+
lineno=lineno,
622+
msg=message,
623+
args=(),
624+
exc_info=None,
625+
func=func_name,
626+
)
627+
628+
# Add extra fields if provided
629+
extra = get_log_record_extra(user_id=user_id, error_code=error_code)
630+
if extra:
631+
for key, value in extra.items():
632+
setattr(record, key, value)
633+
634+
# Add OpenTelemetry trace ID
635+
record.otelTraceID = trace_id # type: ignore[attr-defined]
636+
637+
return record
638+
639+
640+
def test_grok_pattern_parsing(caplog: pytest.LogCaptureFixture) -> None:
641+
"""
642+
Test that the Graylog Grok pattern correctly parses logs formatted with _DEFAULT_FORMATTING.
643+
644+
This test validates that the Grok pattern defined in the comment can correctly
645+
parse logs formatted with _DEFAULT_FORMATTING.
646+
647+
WARNING: If log formatting changes, the Grok pattern in Graylog must be updated accordingly.
648+
"""
649+
import io
650+
import re
651+
652+
from servicelib.logging_utils import _DEFAULT_FORMATTING, CustomFormatter
653+
654+
# Create a custom handler with the default formatter
655+
log_stream = io.StringIO()
656+
handler = logging.StreamHandler(log_stream)
657+
formatter = CustomFormatter(_DEFAULT_FORMATTING, log_format_local_dev_enabled=False)
658+
handler.setFormatter(formatter)
659+
660+
# Create test log record with all fields populated
661+
test_message = (
662+
"This is a test log message with special chars: []{} and new line\nembedded"
663+
)
664+
record = _create_test_log_record(
665+
name="test.module.submodule",
666+
level=logging.INFO,
667+
func_name="test_function",
668+
lineno=42,
669+
message=test_message,
670+
user_id=12345,
671+
error_code="OEC001",
672+
trace_id="1234567890abcdef1234567890abcdef",
673+
)
674+
675+
# Format the record
676+
formatted_log = formatter.format(record)
677+
678+
# Test that the formatted log matches the Grok pattern
679+
regex_pattern = _create_grok_regex_pattern()
680+
match = re.match(regex_pattern, formatted_log)
681+
682+
assert (
683+
match is not None
684+
), f"Grok pattern did not match formatted log. Log: {formatted_log!r}"
685+
686+
# Verify extracted fields match expected values
687+
groups = match.groupdict()
688+
689+
assert groups["log_level"] == "INFO"
690+
assert groups["log_source"] == "test.module.submodule:test_function(42)"
691+
assert groups["log_uid"] == "12345"
692+
assert groups["log_oec"] == "OEC001"
693+
assert groups["log_trace_id"] == "1234567890abcdef1234567890abcdef"
694+
695+
# Verify the message is correctly escaped (newlines become \\n)
696+
expected_message = test_message.replace("\n", "\\n")
697+
assert groups["log_msg"] == expected_message
698+
699+
# Verify timestamp format is ISO8601-like (as expected by Python logging)
700+
timestamp_pattern = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}"
701+
assert re.match(timestamp_pattern, groups["log_timestamp"])
702+
703+
704+
def test_grok_pattern_parsing_with_none_values(
705+
caplog: pytest.LogCaptureFixture,
706+
) -> None:
707+
"""
708+
Test Grok pattern parsing when optional fields are None.
709+
710+
WARNING: If log formatting changes, the Grok pattern in Graylog must be updated accordingly.
711+
"""
712+
import io
713+
import re
714+
715+
from servicelib.logging_utils import _DEFAULT_FORMATTING, CustomFormatter
716+
717+
# Create a custom handler with the default formatter
718+
handler = logging.StreamHandler(io.StringIO())
719+
formatter = CustomFormatter(_DEFAULT_FORMATTING, log_format_local_dev_enabled=False)
720+
handler.setFormatter(formatter)
721+
722+
# Create test log record with None values for optional fields
723+
record = _create_test_log_record(
724+
name="test.module",
725+
level=logging.ERROR,
726+
func_name="error_function",
727+
lineno=100,
728+
message="Error message",
729+
user_id=None,
730+
error_code=None,
731+
trace_id=None,
732+
)
733+
734+
formatted_log = formatter.format(record)
735+
regex_pattern = _create_grok_regex_pattern()
736+
match = re.match(regex_pattern, formatted_log)
737+
738+
assert (
739+
match is not None
740+
), f"Grok pattern did not match log with None values. Log: {formatted_log!r}"
741+
742+
groups = match.groupdict()
743+
assert groups["log_level"] == "ERROR"
744+
assert groups["log_source"] == "test.module:error_function(100)"
745+
assert groups["log_uid"] == "None"
746+
assert groups["log_oec"] == "None"
747+
assert groups["log_trace_id"] == "None"
748+
assert groups["log_msg"] == "Error message"

0 commit comments

Comments
 (0)