Skip to content

Commit 2e0459d

Browse files
authored
feat: Centralize logging setup with structlog integration (#179)
1 parent 398c83c commit 2e0459d

File tree

15 files changed

+1134
-197
lines changed

15 files changed

+1134
-197
lines changed

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@ optional-dependencies = { test-tools = [
1919
"prometheus-client (>=0.0.16)",
2020
"psycopg2-binary (>=2.9,<3)",
2121
"requests",
22+
"sentry-sdk (>=2.0.0,<3.0.0)",
2223
"simplejson (>=3,<4)",
24+
"structlog (>=24.4,<26)",
25+
"typing_extensions",
2326
], task-processor = [
2427
"backoff (>=2.2.1,<3.0.0)",
2528
"django (>4,<6)",

src/common/core/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
DEFAULT_PROMETHEUS_MULTIPROC_DIR_NAME = "flagsmith-prometheus"
2+
3+
LOGGING_DEFAULT_ROOT_LOG_LEVEL = "WARNING"

src/common/core/logging.py

Lines changed: 227 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,40 @@
11
import json
22
import logging
3+
import logging.config
4+
import os
5+
import sys
6+
import threading
37
from typing import Any
48

9+
import structlog
10+
import structlog.contextvars
11+
import structlog.dev
12+
import structlog.processors
13+
import structlog.stdlib
14+
from structlog.typing import EventDict, Processor, WrappedLogger
15+
from typing_extensions import TypedDict
16+
17+
from common.core.constants import LOGGING_DEFAULT_ROOT_LOG_LEVEL
18+
19+
logger = logging.getLogger(__name__)
20+
21+
22+
class JsonRecord(TypedDict, extra_items=Any, total=False): # type: ignore[call-arg] # TODO https://github.com/python/mypy/issues/18176
23+
levelname: str
24+
message: str
25+
timestamp: str
26+
logger_name: str
27+
pid: int | None
28+
thread_name: str | None
29+
exc_info: str
30+
531

632
class JsonFormatter(logging.Formatter):
733
"""Custom formatter for json logs."""
834

9-
def get_json_record(self, record: logging.LogRecord) -> dict[str, Any]:
35+
def get_json_record(self, record: logging.LogRecord) -> JsonRecord:
1036
formatted_message = record.getMessage()
11-
json_record = {
37+
json_record: JsonRecord = {
1238
"levelname": record.levelname,
1339
"message": formatted_message,
1440
"timestamp": self.formatTime(record, self.datefmt),
@@ -22,3 +48,202 @@ def get_json_record(self, record: logging.LogRecord) -> dict[str, Any]:
2248

2349
def format(self, record: logging.LogRecord) -> str:
2450
return json.dumps(self.get_json_record(record))
51+
52+
53+
def setup_logging(
54+
log_level: str = "INFO",
55+
log_format: str = "generic",
56+
logging_configuration_file: str | None = None,
57+
application_loggers: list[str] | None = None,
58+
extra_foreign_processors: list[Processor] | None = None,
59+
) -> None:
60+
"""
61+
Set up logging for the application.
62+
63+
This should be called early, before Django settings are loaded, to ensure
64+
that all log output is properly formatted from the start.
65+
66+
Args:
67+
log_level: The log level for application code (e.g. "DEBUG", "INFO").
68+
log_format: Either "generic" or "json".
69+
logging_configuration_file: Path to a JSON logging config file.
70+
If provided, this takes precedence over other format options.
71+
application_loggers: Top-level logger names for application packages.
72+
These loggers are set to ``log_level`` while the root logger uses
73+
``max(log_level, WARNING)`` to suppress noise from third-party
74+
libraries. If ``log_level`` is DEBUG, everything logs at DEBUG.
75+
extra_foreign_processors: Additional structlog processors to run in
76+
the ``foreign_pre_chain`` for stdlib log records (e.g. the
77+
Gunicorn access log field extractor).
78+
"""
79+
if logging_configuration_file:
80+
with open(logging_configuration_file) as f:
81+
config = json.load(f)
82+
logging.config.dictConfig(config)
83+
else:
84+
log_level_int = logging.getLevelNamesMapping()[log_level.upper()]
85+
root_level_int = logging.getLevelNamesMapping()[LOGGING_DEFAULT_ROOT_LOG_LEVEL]
86+
# Suppress third-party noise at WARNING, but if the user requests
87+
# DEBUG, honour that for the entire process.
88+
effective_root_level = (
89+
log_level_int if log_level_int < logging.INFO else root_level_int
90+
)
91+
92+
dict_config: dict[str, Any] = {
93+
"version": 1,
94+
"disable_existing_loggers": False,
95+
"handlers": {
96+
"console": {
97+
"class": "logging.StreamHandler",
98+
"stream": "ext://sys.stdout",
99+
"level": log_level,
100+
},
101+
},
102+
"root": {
103+
"level": effective_root_level,
104+
"handlers": ["console"],
105+
},
106+
"loggers": {
107+
name: {"level": log_level, "handlers": [], "propagate": True}
108+
for name in application_loggers or []
109+
},
110+
}
111+
logging.config.dictConfig(dict_config)
112+
113+
setup_structlog(
114+
log_format=log_format, extra_foreign_processors=extra_foreign_processors
115+
)
116+
117+
118+
def map_event_to_json_record(
119+
logger: WrappedLogger,
120+
method_name: str,
121+
event_dict: EventDict,
122+
) -> EventDict:
123+
"""Map structlog fields to match :class:`JsonFormatter` output schema."""
124+
record: JsonRecord = {
125+
"message": event_dict.pop("event", ""),
126+
"levelname": event_dict.pop("level", "").upper(),
127+
"logger_name": event_dict.pop("logger", ""),
128+
"pid": os.getpid(),
129+
"thread_name": threading.current_thread().name,
130+
}
131+
if "exception" in event_dict:
132+
record["exc_info"] = event_dict.pop("exception")
133+
# Merge remaining structlog keys (e.g. extra_key from bind()) with the
134+
# canonical record so they appear in the JSON output.
135+
event_dict.update(record)
136+
return event_dict
137+
138+
139+
class _SentryFriendlyProcessorFormatter(structlog.stdlib.ProcessorFormatter):
140+
"""Preserves ``record.msg`` and ``record.args`` across formatting.
141+
142+
Sentry's ``LoggingIntegration`` reads these fields *after* handlers run;
143+
structlog's ``ProcessorFormatter`` replaces them with rendered output, breaking event
144+
grouping. We snapshot before and restore after so Sentry sees the originals
145+
and avoids failed event deduplication.
146+
"""
147+
148+
def __init__(
149+
self,
150+
log_format: str = "generic",
151+
extra_foreign_processors: list[Processor] | None = None,
152+
**kwargs: Any,
153+
) -> None:
154+
if log_format == "json":
155+
renderer_processors: list[Processor] = [
156+
map_event_to_json_record,
157+
structlog.processors.JSONRenderer(),
158+
]
159+
else:
160+
colors = sys.stdout.isatty() and structlog.dev._has_colors
161+
renderer_processors = [
162+
structlog.dev.ConsoleRenderer(colors=colors),
163+
]
164+
165+
foreign_pre_chain: list[Processor] = [
166+
structlog.contextvars.merge_contextvars,
167+
structlog.stdlib.add_logger_name,
168+
structlog.stdlib.add_log_level,
169+
structlog.processors.TimeStamper(fmt="iso"),
170+
structlog.stdlib.ExtraAdder(),
171+
self.drop_internal_keys,
172+
*(extra_foreign_processors or []),
173+
]
174+
175+
super().__init__(
176+
processors=[
177+
structlog.stdlib.ProcessorFormatter.remove_processors_meta,
178+
*renderer_processors,
179+
],
180+
foreign_pre_chain=foreign_pre_chain,
181+
**kwargs,
182+
)
183+
184+
def format(self, record: logging.LogRecord) -> str:
185+
# Snapshot the original fields before ProcessorFormatter
186+
# replaces them with rendered output.
187+
original_msg = record.msg
188+
original_args = record.args
189+
190+
# Stash original args on the record so foreign_pre_chain
191+
# processors (e.g. the Gunicorn access log extractor) can
192+
# access them — ProcessorFormatter clears record.args to ()
193+
# before running the chain.
194+
record._original_args = original_args
195+
196+
formatted = super().format(record)
197+
198+
# Restore so Sentry (and any other post-handler hook) sees
199+
# the original message template and substitution args.
200+
record.msg = original_msg
201+
record.args = original_args
202+
del record._original_args # type: ignore[attr-defined]
203+
204+
return formatted
205+
206+
@staticmethod
207+
def drop_internal_keys(
208+
_: WrappedLogger, __: str, event_dict: EventDict
209+
) -> EventDict:
210+
"""Remove internal attributes that leak via ``ExtraAdder``."""
211+
event_dict.pop("_original_args", None)
212+
return event_dict
213+
214+
215+
def setup_structlog(
216+
log_format: str,
217+
extra_foreign_processors: list[Processor] | None = None,
218+
) -> None:
219+
"""Configure structlog to route through stdlib logging."""
220+
from common.core.sentry import sentry_processor
221+
222+
formatter = _SentryFriendlyProcessorFormatter(
223+
log_format=log_format, extra_foreign_processors=extra_foreign_processors
224+
)
225+
226+
# Replace the formatter on existing root handlers with ProcessorFormatter.
227+
root = logging.getLogger()
228+
for handler in root.handlers:
229+
handler.setFormatter(formatter)
230+
231+
structlog.configure(
232+
processors=[
233+
structlog.contextvars.merge_contextvars,
234+
structlog.stdlib.filter_by_level,
235+
structlog.stdlib.add_logger_name,
236+
structlog.stdlib.add_log_level,
237+
structlog.stdlib.PositionalArgumentsFormatter(),
238+
structlog.processors.StackInfoRenderer(),
239+
structlog.processors.UnicodeDecoder(),
240+
structlog.processors.format_exc_info,
241+
structlog.processors.TimeStamper(fmt="iso"),
242+
sentry_processor,
243+
structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
244+
],
245+
wrapper_class=structlog.stdlib.BoundLogger,
246+
context_class=dict,
247+
logger_factory=structlog.stdlib.LoggerFactory(),
248+
cache_logger_on_first_use=True,
249+
)

src/common/core/main.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,13 @@
88
from django.core.management import (
99
execute_from_command_line as django_execute_from_command_line,
1010
)
11+
from environs import Env
1112

1213
from common.core.cli import healthcheck
14+
from common.core.logging import setup_logging
15+
from common.gunicorn.processors import make_gunicorn_access_processor
16+
17+
env = Env()
1318

1419
logger = logging.getLogger(__name__)
1520

@@ -32,7 +37,18 @@ def ensure_cli_env() -> typing.Generator[None, None, None]:
3237
"""
3338
ctx = contextlib.ExitStack()
3439

35-
# TODO @khvn26 Move logging setup to here
40+
# Set up logging early, before Django settings are loaded.
41+
setup_logging(
42+
log_level=env.str("LOG_LEVEL", "INFO"),
43+
log_format=env.str("LOG_FORMAT", "generic"),
44+
logging_configuration_file=env.str("LOGGING_CONFIGURATION_FILE", None),
45+
application_loggers=env.list("APPLICATION_LOGGERS", []) or None,
46+
extra_foreign_processors=[
47+
make_gunicorn_access_processor(
48+
env.list("ACCESS_LOG_EXTRA_ITEMS", []) or None,
49+
),
50+
],
51+
)
3652

3753
# Prometheus multiproc support
3854
if not os.environ.get("PROMETHEUS_MULTIPROC_DIR"):

src/common/core/sentry.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import sentry_sdk
2+
from structlog.typing import EventDict, WrappedLogger
3+
4+
_SKIP_CONTEXT_FIELDS = frozenset({"event", "level", "timestamp", "_record"})
5+
6+
7+
def sentry_processor(
8+
logger: WrappedLogger,
9+
method_name: str,
10+
event_dict: EventDict,
11+
) -> EventDict:
12+
"""
13+
Structlog processor that enriches Sentry with structured context and tags.
14+
15+
Since structlog routes through stdlib, Sentry's LoggingIntegration
16+
automatically captures ERROR+ logs as Sentry events. This processor
17+
adds structured context on top of that.
18+
"""
19+
context = {k: v for k, v in event_dict.items() if k not in _SKIP_CONTEXT_FIELDS}
20+
sentry_sdk.set_context("structlog", context)
21+
22+
return event_dict

0 commit comments

Comments
 (0)