Skip to content

Commit 619e3c5

Browse files
authored
Merge pull request #291 from cmu-delphi/development
Improved logging
2 parents a768d84 + ab0fbfa commit 619e3c5

File tree

2 files changed

+179
-0
lines changed

2 files changed

+179
-0
lines changed

src/epiportal/middleware.py

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
"""
2+
Request logging middleware that captures comprehensive data from all HTTP requests.
3+
"""
4+
5+
import json
6+
import logging
7+
import time
8+
import uuid
9+
from typing import Any
10+
11+
from django.utils.deprecation import MiddlewareMixin
12+
13+
logger = logging.getLogger("epiportal.requests")
14+
15+
# Headers that may contain sensitive data - values will be redacted
16+
SENSITIVE_HEADERS = frozenset(
17+
name.lower()
18+
for name in (
19+
"authorization",
20+
"cookie",
21+
"x-api-key",
22+
"x-auth-token",
23+
"proxy-authorization",
24+
"x-csrf-token",
25+
"x-session-id",
26+
)
27+
)
28+
29+
# Maximum bytes of request/response body to log (prevents log bloat)
30+
MAX_BODY_LOG_SIZE = 4096
31+
32+
# Path segments to exclude from request logging (matched anywhere in path)
33+
LOG_EXCLUDE_PATH_PATTERNS = (
34+
"get_table_stats_info",
35+
"get_related_indicators",
36+
"get_available_geos",
37+
)
38+
39+
40+
def _should_log_request(request) -> bool:
41+
"""Return False if this request should be excluded from logging."""
42+
path = request.path
43+
return not any(pattern in path for pattern in LOG_EXCLUDE_PATH_PATTERNS)
44+
45+
46+
def _get_client_ip(request) -> str:
47+
"""Extract client IP, respecting X-Forwarded-For when behind proxies."""
48+
x_forwarded_for = request.META.get("HTTP_X_FORWARDED_FOR")
49+
if x_forwarded_for:
50+
# Take the leftmost (original client) IP
51+
return x_forwarded_for.split(",")[0].strip()
52+
return request.META.get("REMOTE_ADDR", "")
53+
54+
55+
def _sanitize_headers(meta: dict) -> dict[str, str]:
56+
"""Extract HTTP headers from META, redacting sensitive ones."""
57+
headers = {}
58+
for key, value in meta.items():
59+
if key.startswith("HTTP_"):
60+
header_name = key[5:].replace("_", "-").lower()
61+
if header_name in SENSITIVE_HEADERS:
62+
headers[header_name] = "[REDACTED]"
63+
elif value:
64+
headers[header_name] = str(value)
65+
return headers
66+
67+
68+
def _safe_body_preview(body: bytes | None, max_size: int = MAX_BODY_LOG_SIZE) -> str | None:
69+
"""Return a safe preview of request/response body, truncated if large."""
70+
if body is None or len(body) == 0:
71+
return None
72+
try:
73+
decoded = body.decode("utf-8", errors="replace")
74+
if len(decoded) > max_size:
75+
return decoded[:max_size] + f"... [truncated, total {len(body)} bytes]"
76+
return decoded
77+
except Exception:
78+
return f"[binary, {len(body)} bytes]"
79+
80+
81+
class RequestLoggingMiddleware(MiddlewareMixin):
82+
"""
83+
Middleware that logs every HTTP request with comprehensive request and response data.
84+
"""
85+
86+
def process_request(self, request):
87+
request._request_start_time = time.monotonic()
88+
request._request_id = (
89+
request.META.get("HTTP_X_REQUEST_ID") or str(uuid.uuid4())
90+
)
91+
return None
92+
93+
def process_response(self, request, response):
94+
if not _should_log_request(request):
95+
if hasattr(request, "_request_id"):
96+
response["X-Request-ID"] = request._request_id
97+
return response
98+
99+
try:
100+
duration_ms = (
101+
(time.monotonic() - request._request_start_time) * 1000
102+
if hasattr(request, "_request_start_time")
103+
else None
104+
)
105+
106+
# Build comprehensive log data
107+
log_data: dict[str, Any] = {
108+
"request_id": getattr(request, "_request_id", None),
109+
"method": request.method,
110+
"path": request.path,
111+
"full_uri": request.build_absolute_uri(),
112+
"query_string": request.META.get("QUERY_STRING") or "",
113+
"query_params": dict(request.GET) if request.GET else {},
114+
"client_ip": _get_client_ip(request),
115+
"referer": request.META.get("HTTP_REFERER", ""),
116+
"user_agent": request.META.get("HTTP_USER_AGENT", ""),
117+
"content_type": request.content_type or "",
118+
"content_length": request.META.get("CONTENT_LENGTH"),
119+
"headers": _sanitize_headers(request.META),
120+
"response_status": response.status_code,
121+
"response_content_type": response.get("Content-Type", ""),
122+
}
123+
124+
# Authentication / user info
125+
if hasattr(request, "user") and request.user.is_authenticated:
126+
log_data["user_id"] = getattr(request.user, "pk", None)
127+
log_data["username"] = getattr(request.user, "username", str(request.user))
128+
else:
129+
log_data["user"] = "anonymous"
130+
131+
# Request body (for methods that typically have one)
132+
if request.method in ("POST", "PUT", "PATCH"):
133+
try:
134+
body = getattr(request, "_body", None) or request.body
135+
log_data["request_body_preview"] = _safe_body_preview(body)
136+
except Exception as e:
137+
log_data["request_body_error"] = str(e)
138+
139+
# Response body (for API responses, avoid huge HTML)
140+
if (
141+
"application/json" in (response.get("Content-Type") or "")
142+
and hasattr(response, "content")
143+
):
144+
log_data["response_body_preview"] = _safe_body_preview(response.content)
145+
146+
if duration_ms is not None:
147+
log_data["duration_ms"] = round(duration_ms, 2)
148+
149+
logger.info(
150+
"%s %s %s | %s",
151+
request.method,
152+
request.path,
153+
response.status_code,
154+
json.dumps(log_data, default=str),
155+
)
156+
157+
except Exception as e:
158+
logger.exception("Error in request logging middleware: %s", e)
159+
160+
# Add request ID to response for client-side correlation
161+
if hasattr(request, "_request_id"):
162+
response["X-Request-ID"] = request._request_id
163+
164+
return response

src/epiportal/settings.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@
135135
INSTALLED_APPS: list[str] = DEFAULT_APPS + EXTERNAL_APPS + LOCAL_APPS
136136

137137
MIDDLEWARE = [
138+
'epiportal.middleware.RequestLoggingMiddleware',
138139
'django.middleware.security.SecurityMiddleware',
139140
'django.contrib.sessions.middleware.SessionMiddleware',
140141
'django.middleware.common.CommonMiddleware',
@@ -232,20 +233,34 @@
232233
'format': '[%(asctime)s] %(levelname)s | %(name)s | %(message)s',
233234
'datefmt': '%Y-%m-%d %H:%M:%S',
234235
},
236+
'request_verbose': {
237+
'format': '[%(asctime)s] %(levelname)s | epiportal.requests | %(message)s',
238+
'datefmt': '%Y-%m-%d %H:%M:%S',
239+
},
235240
},
236241
'handlers': {
237242
'console': {
238243
'class': 'logging.StreamHandler',
239244
'formatter': 'simple',
240245
'stream': sys.stdout,
241246
},
247+
'request_console': {
248+
'class': 'logging.StreamHandler',
249+
'formatter': 'request_verbose',
250+
'stream': sys.stdout,
251+
},
242252
},
243253
'loggers': {
244254
'django': {
245255
'handlers': ['console'],
246256
'level': 'INFO',
247257
'propagate': True,
248258
},
259+
'epiportal.requests': {
260+
'handlers': ['request_console'],
261+
'level': 'INFO',
262+
'propagate': False,
263+
},
249264
},
250265
}
251266

0 commit comments

Comments
 (0)