Skip to content

Commit 1e4209c

Browse files
SkylarKeltyclaude
andcommitted
Add structured logging with request ID correlation
- Add LOG_FORMAT setting ("text" or "json", default: text) - Add request ID middleware: reads x-request-id from upstream (nginx/LiteLLM) or generates UUID, echoes in response header - Store request ID in contextvars.ContextVar for async propagation - Custom logging.Filter injects request_id into every log record - JSON formatter for production log aggregation - Text format now includes [request_id] field - Add PLAYWRIGHT_CONTEXT_RECYCLE_PAGES and PLAYWRIGHT_MAX_HTML_BYTES config settings (used in upcoming Playwright hardening commit) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent d0d7f52 commit 1e4209c

File tree

5 files changed

+124
-6
lines changed

5 files changed

+124
-6
lines changed

artemis/config.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,28 @@ def _parse_log_level(name: str, default: str = "INFO") -> str:
221221
return raw_value
222222

223223

224+
_LOG_FORMATS = {"text", "json"}
225+
226+
227+
def _parse_log_format(name: str, default: str = "text") -> str:
228+
"""Parse log format from environment variable.
229+
230+
Args:
231+
name: Environment variable name
232+
default: Default format if variable is not set (default: text)
233+
234+
Returns:
235+
Validated log format string ("text" or "json")
236+
237+
Raises:
238+
ConfigError: If the value is not a valid log format
239+
"""
240+
raw_value = os.getenv(name, default).strip().lower()
241+
if raw_value not in _LOG_FORMATS:
242+
raise ConfigError(f"{name} must be one of: text, json.")
243+
return raw_value
244+
245+
224246
@dataclass(frozen=True)
225247
class Settings:
226248
"""Immutable configuration container for Artemis.
@@ -263,6 +285,9 @@ class Settings:
263285
cache_max_entries: Maximum entries per cache before oldest are evicted
264286
embedding_model: Model for query embeddings (enables semantic dedup when set)
265287
semantic_similarity_threshold: Cosine similarity threshold for semantic cache hits
288+
log_format: Log output format ("text" or "json")
289+
playwright_context_recycle_pages: Recycle Playwright context after this many pages
290+
playwright_max_html_bytes: Maximum HTML bytes to extract per page
266291
"""
267292

268293
searxng_api_base: str
@@ -298,6 +323,9 @@ class Settings:
298323
cache_max_entries: int
299324
embedding_model: str | None
300325
semantic_similarity_threshold: float
326+
log_format: str
327+
playwright_context_recycle_pages: int
328+
playwright_max_html_bytes: int
301329

302330

303331
@lru_cache(maxsize=1)
@@ -402,6 +430,13 @@ def get_settings() -> Settings:
402430
semantic_similarity_threshold=_parse_float(
403431
"SEMANTIC_SIMILARITY_THRESHOLD", 0.92, minimum=0.5, maximum=1.0
404432
),
433+
log_format=_parse_log_format("LOG_FORMAT"),
434+
playwright_context_recycle_pages=_parse_int(
435+
"PLAYWRIGHT_CONTEXT_RECYCLE_PAGES", 50, minimum=1, maximum=10000
436+
),
437+
playwright_max_html_bytes=_parse_int(
438+
"PLAYWRIGHT_MAX_HTML_BYTES", 5 * 1024 * 1024, minimum=65536, maximum=50 * 1024 * 1024
439+
),
405440
)
406441

407442

artemis/main.py

Lines changed: 68 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,16 @@
1414

1515
from dataclasses import dataclass
1616
import asyncio
17+
import contextvars
18+
import json as _json
1719
import logging
1820
import secrets
1921
import time
2022
import uuid
2123
from collections.abc import AsyncGenerator
2224
from contextlib import asynccontextmanager
2325

24-
from fastapi import Depends, FastAPI, HTTPException, status
26+
from fastapi import Depends, FastAPI, HTTPException, Request, status
2527
from fastapi.middleware.cors import CORSMiddleware
2628
from fastapi.responses import JSONResponse, StreamingResponse
2729
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
@@ -50,14 +52,61 @@
5052
from artemis.searcher import close_client as close_searxng_client
5153
from artemis.summarizer import summarize_results
5254

55+
# ---------------------------------------------------------------------------
56+
# Request ID context variable — set by middleware, read by log formatter
57+
# ---------------------------------------------------------------------------
58+
request_id_ctx: contextvars.ContextVar[str] = contextvars.ContextVar(
59+
"request_id", default="-"
60+
)
61+
62+
63+
class _RequestIdFilter(logging.Filter):
64+
"""Inject request_id from context into every log record."""
65+
66+
def filter(self, record: logging.LogRecord) -> bool:
67+
record.request_id = request_id_ctx.get("-") # type: ignore[attr-defined]
68+
return True
69+
70+
71+
class _JsonFormatter(logging.Formatter):
72+
"""Emit log records as single-line JSON objects."""
73+
74+
def format(self, record: logging.LogRecord) -> str:
75+
obj = {
76+
"ts": self.formatTime(record),
77+
"level": record.levelname,
78+
"logger": record.name,
79+
"request_id": getattr(record, "request_id", "-"),
80+
"msg": record.getMessage(),
81+
}
82+
if record.exc_info and record.exc_info[1]:
83+
obj["exception"] = self.formatException(record.exc_info)
84+
return _json.dumps(obj)
85+
86+
87+
def _configure_logging(settings_obj: Settings) -> None:
88+
"""Configure root logger with request ID support."""
89+
root = logging.getLogger()
90+
root.setLevel(getattr(logging, settings_obj.log_level, logging.INFO))
91+
root.handlers.clear()
92+
93+
handler = logging.StreamHandler()
94+
handler.addFilter(_RequestIdFilter())
95+
96+
if settings_obj.log_format == "json":
97+
handler.setFormatter(_JsonFormatter())
98+
else:
99+
handler.setFormatter(logging.Formatter(
100+
"%(asctime)s %(levelname)s [%(name)s] [%(request_id)s] %(message)s"
101+
))
102+
root.addHandler(handler)
103+
104+
53105
# Load settings at module import time to fail fast on config errors
54106
initial_settings = get_settings()
55107

56-
# Configure logging based on settings
57-
logging.basicConfig(
58-
level=getattr(logging, initial_settings.log_level, logging.INFO),
59-
format="%(asctime)s %(levelname)s [%(name)s] %(message)s",
60-
)
108+
# Configure structured logging
109+
_configure_logging(initial_settings)
61110
logger = logging.getLogger(__name__)
62111

63112
# Log security warnings at startup
@@ -157,6 +206,19 @@ def _research_preset_config(settings: Settings, preset: str) -> ResearchPresetCo
157206
)
158207

159208

209+
@app.middleware("http")
210+
async def request_id_middleware(request: Request, call_next):
211+
"""Attach a unique request ID to every request for log correlation."""
212+
rid = request.headers.get("x-request-id") or str(uuid.uuid4())
213+
token = request_id_ctx.set(rid)
214+
try:
215+
response = await call_next(request)
216+
response.headers["x-request-id"] = rid
217+
return response
218+
finally:
219+
request_id_ctx.reset(token)
220+
221+
160222
@app.exception_handler(UpstreamServiceError)
161223
async def upstream_service_error_handler(_, exc: UpstreamServiceError) -> JSONResponse:
162224
logger.error("Upstream service failure: %s", exc)

tests/test_api.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@ def test_search_requires_api_key_when_configured(
6666
cache_max_entries=1000,
6767
embedding_model=None,
6868
semantic_similarity_threshold=0.92,
69+
log_format="text",
70+
playwright_context_recycle_pages=50,
71+
playwright_max_html_bytes=5242880,
6972
),
7073
):
7174
unauthorized = self.client.post("/search", json={"query": "fastapi"})
@@ -130,6 +133,9 @@ def test_deep_research_uses_requested_max_steps(
130133
cache_max_entries=1000,
131134
embedding_model=None,
132135
semantic_similarity_threshold=0.92,
136+
log_format="text",
137+
playwright_context_recycle_pages=50,
138+
playwright_max_html_bytes=5242880,
133139
)
134140

135141
with patch("artemis.main.get_settings", return_value=settings):
@@ -201,6 +207,9 @@ def test_shallow_research_uses_shallow_settings(
201207
cache_max_entries=1000,
202208
embedding_model=None,
203209
semantic_similarity_threshold=0.92,
210+
log_format="text",
211+
playwright_context_recycle_pages=50,
212+
playwright_max_html_bytes=5242880,
204213
),
205214
):
206215
response = self.client.post(

tests/test_api_extended.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ def _default_settings(**overrides) -> Settings:
5858
cache_max_entries=1000,
5959
embedding_model=None,
6060
semantic_similarity_threshold=0.92,
61+
log_format="text",
62+
playwright_context_recycle_pages=50,
63+
playwright_max_html_bytes=5242880,
6164
)
6265
defaults.update(overrides)
6366
return Settings(**defaults)

tests/test_cache.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,9 @@ async def test_semantic_hit_skips_search(
457457
cache_max_entries=1000,
458458
embedding_model="text-embedding-3-small",
459459
semantic_similarity_threshold=0.90,
460+
log_format="text",
461+
playwright_context_recycle_pages=50,
462+
playwright_max_html_bytes=5242880,
460463
)
461464
mock_settings.return_value = settings
462465

@@ -528,6 +531,9 @@ async def test_dissimilar_queries_no_semantic_hit(
528531
cache_max_entries=1000,
529532
embedding_model="text-embedding-3-small",
530533
semantic_similarity_threshold=0.90,
534+
log_format="text",
535+
playwright_context_recycle_pages=50,
536+
playwright_max_html_bytes=5242880,
531537
)
532538
mock_settings.return_value = settings
533539

@@ -593,6 +599,9 @@ async def test_no_embedding_model_skips_semantic(
593599
cache_max_entries=1000,
594600
embedding_model=None, # Disabled
595601
semantic_similarity_threshold=0.92,
602+
log_format="text",
603+
playwright_context_recycle_pages=50,
604+
playwright_max_html_bytes=5242880,
596605
)
597606
mock_settings.return_value = settings
598607

0 commit comments

Comments
 (0)